1/*
2 * encoding.c : implements the encoding conversion functions needed for XML
3 *
4 * Related specs:
5 * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6 * rfc2781        UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7 * [ISO-10646]    UTF-8 and UTF-16 in Annexes
8 * [ISO-8859-1]   ISO Latin-1 characters codes.
9 * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
10 *                Worldwide Character Encoding -- Version 1.0", Addison-
11 *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
12 *                described in Unicode Technical Report #4.
13 * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
14 *                Information Interchange, ANSI X3.4-1986.
15 *
16 * See Copyright for the status of this software.
17 *
18 * daniel@veillard.com
19 *
20 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
21 */
22
23#define IN_LIBXML
24#include "libxml.h"
25
26#include <string.h>
27#include <limits.h>
28
29#ifdef HAVE_CTYPE_H
30#include <ctype.h>
31#endif
32#ifdef HAVE_STDLIB_H
33#include <stdlib.h>
34#endif
35#ifdef LIBXML_ICONV_ENABLED
36#ifdef HAVE_ERRNO_H
37#include <errno.h>
38#endif
39#endif
40#include <libxml/encoding.h>
41#include <libxml/xmlmemory.h>
42#ifdef LIBXML_HTML_ENABLED
43#include <libxml/HTMLparser.h>
44#endif
45#include <libxml/globals.h>
46#include <libxml/xmlerror.h>
47
48#ifdef LIBXML_ICU_ENABLED
49#include <unicode/ucnv.h>
50#endif
51
52#include "buf.h"
53#include "enc.h"
54
55static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
56static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
57
58typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
59typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
60struct _xmlCharEncodingAlias {
61    const char *name;
62    const char *alias;
63};
64
65static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
66static int xmlCharEncodingAliasesNb = 0;
67static int xmlCharEncodingAliasesMax = 0;
68
69#if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
70#if 0
71#define DEBUG_ENCODING  /* Define this to get encoding traces */
72#endif
73#else
74#ifdef LIBXML_ISO8859X_ENABLED
75static void xmlRegisterCharEncodingHandlersISO8859x (void);
76#endif
77#endif
78
79static int xmlLittleEndian = 1;
80
81/**
82 * xmlEncodingErrMemory:
83 * @extra:  extra informations
84 *
85 * Handle an out of memory condition
86 */
87static void
88xmlEncodingErrMemory(const char *extra)
89{
90    __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
91}
92
93/**
94 * xmlErrEncoding:
95 * @error:  the error number
96 * @msg:  the error message
97 *
98 * n encoding error
99 */
100static void
101xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
102{
103    __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
104                    XML_FROM_I18N, error, XML_ERR_FATAL,
105                    NULL, 0, val, NULL, NULL, 0, 0, msg, val);
106}
107
108#ifdef LIBXML_ICU_ENABLED
109static uconv_t*
110openIcuConverter(const char* name, int toUnicode)
111{
112  UErrorCode status = U_ZERO_ERROR;
113  uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
114  if (conv == NULL)
115    return NULL;
116
117  conv->uconv = ucnv_open(name, &status);
118  if (U_FAILURE(status))
119    goto error;
120
121  status = U_ZERO_ERROR;
122  if (toUnicode) {
123    ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
124                        NULL, NULL, NULL, &status);
125  }
126  else {
127    ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
128                        NULL, NULL, NULL, &status);
129  }
130  if (U_FAILURE(status))
131    goto error;
132
133  status = U_ZERO_ERROR;
134  conv->utf8 = ucnv_open("UTF-8", &status);
135  if (U_SUCCESS(status))
136    return conv;
137
138error:
139  if (conv->uconv)
140    ucnv_close(conv->uconv);
141  xmlFree(conv);
142  return NULL;
143}
144
145static void
146closeIcuConverter(uconv_t *conv)
147{
148  if (conv != NULL) {
149    ucnv_close(conv->uconv);
150    ucnv_close(conv->utf8);
151    xmlFree(conv);
152  }
153}
154#endif /* LIBXML_ICU_ENABLED */
155
156/************************************************************************
157 *									*
158 *		Conversions To/From UTF8 encoding			*
159 *									*
160 ************************************************************************/
161
162/**
163 * asciiToUTF8:
164 * @out:  a pointer to an array of bytes to store the result
165 * @outlen:  the length of @out
166 * @in:  a pointer to an array of ASCII chars
167 * @inlen:  the length of @in
168 *
169 * Take a block of ASCII chars in and try to convert it to an UTF-8
170 * block of chars out.
171 * Returns 0 if success, or -1 otherwise
172 * The value of @inlen after return is the number of octets consumed
173 *     if the return value is positive, else unpredictable.
174 * The value of @outlen after return is the number of octets consumed.
175 */
176static int
177asciiToUTF8(unsigned char* out, int *outlen,
178              const unsigned char* in, int *inlen) {
179    unsigned char* outstart = out;
180    const unsigned char* base = in;
181    const unsigned char* processed = in;
182    unsigned char* outend = out + *outlen;
183    const unsigned char* inend;
184    unsigned int c;
185
186    inend = in + (*inlen);
187    while ((in < inend) && (out - outstart + 5 < *outlen)) {
188	c= *in++;
189
190        if (out >= outend)
191	    break;
192        if (c < 0x80) {
193	    *out++ = c;
194	} else {
195	    *outlen = out - outstart;
196	    *inlen = processed - base;
197	    return(-1);
198	}
199
200	processed = (const unsigned char*) in;
201    }
202    *outlen = out - outstart;
203    *inlen = processed - base;
204    return(*outlen);
205}
206
207#ifdef LIBXML_OUTPUT_ENABLED
208/**
209 * UTF8Toascii:
210 * @out:  a pointer to an array of bytes to store the result
211 * @outlen:  the length of @out
212 * @in:  a pointer to an array of UTF-8 chars
213 * @inlen:  the length of @in
214 *
215 * Take a block of UTF-8 chars in and try to convert it to an ASCII
216 * block of chars out.
217 *
218 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
219 * The value of @inlen after return is the number of octets consumed
220 *     if the return value is positive, else unpredictable.
221 * The value of @outlen after return is the number of octets consumed.
222 */
223static int
224UTF8Toascii(unsigned char* out, int *outlen,
225              const unsigned char* in, int *inlen) {
226    const unsigned char* processed = in;
227    const unsigned char* outend;
228    const unsigned char* outstart = out;
229    const unsigned char* instart = in;
230    const unsigned char* inend;
231    unsigned int c, d;
232    int trailing;
233
234    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
235    if (in == NULL) {
236        /*
237	 * initialization nothing to do
238	 */
239	*outlen = 0;
240	*inlen = 0;
241	return(0);
242    }
243    inend = in + (*inlen);
244    outend = out + (*outlen);
245    while (in < inend) {
246	d = *in++;
247	if      (d < 0x80)  { c= d; trailing= 0; }
248	else if (d < 0xC0) {
249	    /* trailing byte in leading position */
250	    *outlen = out - outstart;
251	    *inlen = processed - instart;
252	    return(-2);
253        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
254        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
255        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
256	else {
257	    /* no chance for this in Ascii */
258	    *outlen = out - outstart;
259	    *inlen = processed - instart;
260	    return(-2);
261	}
262
263	if (inend - in < trailing) {
264	    break;
265	}
266
267	for ( ; trailing; trailing--) {
268	    if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
269		break;
270	    c <<= 6;
271	    c |= d & 0x3F;
272	}
273
274	/* assertion: c is a single UTF-4 value */
275	if (c < 0x80) {
276	    if (out >= outend)
277		break;
278	    *out++ = c;
279	} else {
280	    /* no chance for this in Ascii */
281	    *outlen = out - outstart;
282	    *inlen = processed - instart;
283	    return(-2);
284	}
285	processed = in;
286    }
287    *outlen = out - outstart;
288    *inlen = processed - instart;
289    return(*outlen);
290}
291#endif /* LIBXML_OUTPUT_ENABLED */
292
293/**
294 * isolat1ToUTF8:
295 * @out:  a pointer to an array of bytes to store the result
296 * @outlen:  the length of @out
297 * @in:  a pointer to an array of ISO Latin 1 chars
298 * @inlen:  the length of @in
299 *
300 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
301 * block of chars out.
302 * Returns the number of bytes written if success, or -1 otherwise
303 * The value of @inlen after return is the number of octets consumed
304 *     if the return value is positive, else unpredictable.
305 * The value of @outlen after return is the number of octets consumed.
306 */
307int
308isolat1ToUTF8(unsigned char* out, int *outlen,
309              const unsigned char* in, int *inlen) {
310    unsigned char* outstart = out;
311    const unsigned char* base = in;
312    unsigned char* outend;
313    const unsigned char* inend;
314    const unsigned char* instop;
315
316    if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
317	return(-1);
318
319    outend = out + *outlen;
320    inend = in + (*inlen);
321    instop = inend;
322
323    while ((in < inend) && (out < outend - 1)) {
324	if (*in >= 0x80) {
325	    *out++ = (((*in) >>  6) & 0x1F) | 0xC0;
326            *out++ = ((*in) & 0x3F) | 0x80;
327	    ++in;
328	}
329	if ((instop - in) > (outend - out)) instop = in + (outend - out);
330	while ((in < instop) && (*in < 0x80)) {
331	    *out++ = *in++;
332	}
333    }
334    if ((in < inend) && (out < outend) && (*in < 0x80)) {
335        *out++ = *in++;
336    }
337    *outlen = out - outstart;
338    *inlen = in - base;
339    return(*outlen);
340}
341
342/**
343 * UTF8ToUTF8:
344 * @out:  a pointer to an array of bytes to store the result
345 * @outlen:  the length of @out
346 * @inb:  a pointer to an array of UTF-8 chars
347 * @inlenb:  the length of @in in UTF-8 chars
348 *
349 * No op copy operation for UTF8 handling.
350 *
351 * Returns the number of bytes written, or -1 if lack of space.
352 *     The value of *inlen after return is the number of octets consumed
353 *     if the return value is positive, else unpredictable.
354 */
355static int
356UTF8ToUTF8(unsigned char* out, int *outlen,
357           const unsigned char* inb, int *inlenb)
358{
359    int len;
360
361    if ((out == NULL) || (inb == NULL) || (outlen == NULL) || (inlenb == NULL))
362	return(-1);
363    if (*outlen > *inlenb) {
364	len = *inlenb;
365    } else {
366	len = *outlen;
367    }
368    if (len < 0)
369	return(-1);
370
371    memcpy(out, inb, len);
372
373    *outlen = len;
374    *inlenb = len;
375    return(*outlen);
376}
377
378
379#ifdef LIBXML_OUTPUT_ENABLED
380/**
381 * UTF8Toisolat1:
382 * @out:  a pointer to an array of bytes to store the result
383 * @outlen:  the length of @out
384 * @in:  a pointer to an array of UTF-8 chars
385 * @inlen:  the length of @in
386 *
387 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
388 * block of chars out.
389 *
390 * Returns the number of bytes written if success, -2 if the transcoding fails,
391           or -1 otherwise
392 * The value of @inlen after return is the number of octets consumed
393 *     if the return value is positive, else unpredictable.
394 * The value of @outlen after return is the number of octets consumed.
395 */
396int
397UTF8Toisolat1(unsigned char* out, int *outlen,
398              const unsigned char* in, int *inlen) {
399    const unsigned char* processed = in;
400    const unsigned char* outend;
401    const unsigned char* outstart = out;
402    const unsigned char* instart = in;
403    const unsigned char* inend;
404    unsigned int c, d;
405    int trailing;
406
407    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
408    if (in == NULL) {
409        /*
410	 * initialization nothing to do
411	 */
412	*outlen = 0;
413	*inlen = 0;
414	return(0);
415    }
416    inend = in + (*inlen);
417    outend = out + (*outlen);
418    while (in < inend) {
419	d = *in++;
420	if      (d < 0x80)  { c= d; trailing= 0; }
421	else if (d < 0xC0) {
422	    /* trailing byte in leading position */
423	    *outlen = out - outstart;
424	    *inlen = processed - instart;
425	    return(-2);
426        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
427        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
428        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
429	else {
430	    /* no chance for this in IsoLat1 */
431	    *outlen = out - outstart;
432	    *inlen = processed - instart;
433	    return(-2);
434	}
435
436	if (inend - in < trailing) {
437	    break;
438	}
439
440	for ( ; trailing; trailing--) {
441	    if (in >= inend)
442		break;
443	    if (((d= *in++) & 0xC0) != 0x80) {
444		*outlen = out - outstart;
445		*inlen = processed - instart;
446		return(-2);
447	    }
448	    c <<= 6;
449	    c |= d & 0x3F;
450	}
451
452	/* assertion: c is a single UTF-4 value */
453	if (c <= 0xFF) {
454	    if (out >= outend)
455		break;
456	    *out++ = c;
457	} else {
458	    /* no chance for this in IsoLat1 */
459	    *outlen = out - outstart;
460	    *inlen = processed - instart;
461	    return(-2);
462	}
463	processed = in;
464    }
465    *outlen = out - outstart;
466    *inlen = processed - instart;
467    return(*outlen);
468}
469#endif /* LIBXML_OUTPUT_ENABLED */
470
471/**
472 * UTF16LEToUTF8:
473 * @out:  a pointer to an array of bytes to store the result
474 * @outlen:  the length of @out
475 * @inb:  a pointer to an array of UTF-16LE passwd as a byte array
476 * @inlenb:  the length of @in in UTF-16LE chars
477 *
478 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
479 * block of chars out. This function assumes the endian property
480 * is the same between the native type of this machine and the
481 * inputed one.
482 *
483 * Returns the number of bytes written, or -1 if lack of space, or -2
484 *     if the transcoding fails (if *in is not a valid utf16 string)
485 *     The value of *inlen after return is the number of octets consumed
486 *     if the return value is positive, else unpredictable.
487 */
488static int
489UTF16LEToUTF8(unsigned char* out, int *outlen,
490            const unsigned char* inb, int *inlenb)
491{
492    unsigned char* outstart = out;
493    const unsigned char* processed = inb;
494    unsigned char* outend = out + *outlen;
495    unsigned short* in = (unsigned short*) inb;
496    unsigned short* inend;
497    unsigned int c, d, inlen;
498    unsigned char *tmp;
499    int bits;
500
501    if ((*inlenb % 2) == 1)
502        (*inlenb)--;
503    inlen = *inlenb / 2;
504    inend = in + inlen;
505    while ((in < inend) && (out - outstart + 5 < *outlen)) {
506        if (xmlLittleEndian) {
507	    c= *in++;
508	} else {
509	    tmp = (unsigned char *) in;
510	    c = *tmp++;
511	    c = c | (((unsigned int)*tmp) << 8);
512	    in++;
513	}
514        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
515	    if (in >= inend) {           /* (in > inend) shouldn't happens */
516		break;
517	    }
518	    if (xmlLittleEndian) {
519		d = *in++;
520	    } else {
521		tmp = (unsigned char *) in;
522		d = *tmp++;
523		d = d | (((unsigned int)*tmp) << 8);
524		in++;
525	    }
526            if ((d & 0xFC00) == 0xDC00) {
527                c &= 0x03FF;
528                c <<= 10;
529                c |= d & 0x03FF;
530                c += 0x10000;
531            }
532            else {
533		*outlen = out - outstart;
534		*inlenb = processed - inb;
535	        return(-2);
536	    }
537        }
538
539	/* assertion: c is a single UTF-4 value */
540        if (out >= outend)
541	    break;
542        if      (c <    0x80) {  *out++=  c;                bits= -6; }
543        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
544        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
545        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
546
547        for ( ; bits >= 0; bits-= 6) {
548            if (out >= outend)
549	        break;
550            *out++= ((c >> bits) & 0x3F) | 0x80;
551        }
552	processed = (const unsigned char*) in;
553    }
554    *outlen = out - outstart;
555    *inlenb = processed - inb;
556    return(*outlen);
557}
558
559#ifdef LIBXML_OUTPUT_ENABLED
560/**
561 * UTF8ToUTF16LE:
562 * @outb:  a pointer to an array of bytes to store the result
563 * @outlen:  the length of @outb
564 * @in:  a pointer to an array of UTF-8 chars
565 * @inlen:  the length of @in
566 *
567 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
568 * block of chars out.
569 *
570 * Returns the number of bytes written, or -1 if lack of space, or -2
571 *     if the transcoding failed.
572 */
573static int
574UTF8ToUTF16LE(unsigned char* outb, int *outlen,
575            const unsigned char* in, int *inlen)
576{
577    unsigned short* out = (unsigned short*) outb;
578    const unsigned char* processed = in;
579    const unsigned char *const instart = in;
580    unsigned short* outstart= out;
581    unsigned short* outend;
582    const unsigned char* inend;
583    unsigned int c, d;
584    int trailing;
585    unsigned char *tmp;
586    unsigned short tmp1, tmp2;
587
588    /* UTF16LE encoding has no BOM */
589    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
590    if (in == NULL) {
591	*outlen = 0;
592	*inlen = 0;
593	return(0);
594    }
595    inend= in + *inlen;
596    outend = out + (*outlen / 2);
597    while (in < inend) {
598      d= *in++;
599      if      (d < 0x80)  { c= d; trailing= 0; }
600      else if (d < 0xC0) {
601          /* trailing byte in leading position */
602	  *outlen = (out - outstart) * 2;
603	  *inlen = processed - instart;
604	  return(-2);
605      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
606      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
607      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
608      else {
609	/* no chance for this in UTF-16 */
610	*outlen = (out - outstart) * 2;
611	*inlen = processed - instart;
612	return(-2);
613      }
614
615      if (inend - in < trailing) {
616          break;
617      }
618
619      for ( ; trailing; trailing--) {
620          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
621	      break;
622          c <<= 6;
623          c |= d & 0x3F;
624      }
625
626      /* assertion: c is a single UTF-4 value */
627        if (c < 0x10000) {
628            if (out >= outend)
629	        break;
630	    if (xmlLittleEndian) {
631		*out++ = c;
632	    } else {
633		tmp = (unsigned char *) out;
634		*tmp = c ;
635		*(tmp + 1) = c >> 8 ;
636		out++;
637	    }
638        }
639        else if (c < 0x110000) {
640            if (out+1 >= outend)
641	        break;
642            c -= 0x10000;
643	    if (xmlLittleEndian) {
644		*out++ = 0xD800 | (c >> 10);
645		*out++ = 0xDC00 | (c & 0x03FF);
646	    } else {
647		tmp1 = 0xD800 | (c >> 10);
648		tmp = (unsigned char *) out;
649		*tmp = (unsigned char) tmp1;
650		*(tmp + 1) = tmp1 >> 8;
651		out++;
652
653		tmp2 = 0xDC00 | (c & 0x03FF);
654		tmp = (unsigned char *) out;
655		*tmp  = (unsigned char) tmp2;
656		*(tmp + 1) = tmp2 >> 8;
657		out++;
658	    }
659        }
660        else
661	    break;
662	processed = in;
663    }
664    *outlen = (out - outstart) * 2;
665    *inlen = processed - instart;
666    return(*outlen);
667}
668
669/**
670 * UTF8ToUTF16:
671 * @outb:  a pointer to an array of bytes to store the result
672 * @outlen:  the length of @outb
673 * @in:  a pointer to an array of UTF-8 chars
674 * @inlen:  the length of @in
675 *
676 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
677 * block of chars out.
678 *
679 * Returns the number of bytes written, or -1 if lack of space, or -2
680 *     if the transcoding failed.
681 */
682static int
683UTF8ToUTF16(unsigned char* outb, int *outlen,
684            const unsigned char* in, int *inlen)
685{
686    if (in == NULL) {
687	/*
688	 * initialization, add the Byte Order Mark for UTF-16LE
689	 */
690        if (*outlen >= 2) {
691	    outb[0] = 0xFF;
692	    outb[1] = 0xFE;
693	    *outlen = 2;
694	    *inlen = 0;
695#ifdef DEBUG_ENCODING
696            xmlGenericError(xmlGenericErrorContext,
697		    "Added FFFE Byte Order Mark\n");
698#endif
699	    return(2);
700	}
701	*outlen = 0;
702	*inlen = 0;
703	return(0);
704    }
705    return (UTF8ToUTF16LE(outb, outlen, in, inlen));
706}
707#endif /* LIBXML_OUTPUT_ENABLED */
708
709/**
710 * UTF16BEToUTF8:
711 * @out:  a pointer to an array of bytes to store the result
712 * @outlen:  the length of @out
713 * @inb:  a pointer to an array of UTF-16 passed as a byte array
714 * @inlenb:  the length of @in in UTF-16 chars
715 *
716 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
717 * block of chars out. This function assumes the endian property
718 * is the same between the native type of this machine and the
719 * inputed one.
720 *
721 * Returns the number of bytes written, or -1 if lack of space, or -2
722 *     if the transcoding fails (if *in is not a valid utf16 string)
723 * The value of *inlen after return is the number of octets consumed
724 *     if the return value is positive, else unpredictable.
725 */
726static int
727UTF16BEToUTF8(unsigned char* out, int *outlen,
728            const unsigned char* inb, int *inlenb)
729{
730    unsigned char* outstart = out;
731    const unsigned char* processed = inb;
732    unsigned char* outend = out + *outlen;
733    unsigned short* in = (unsigned short*) inb;
734    unsigned short* inend;
735    unsigned int c, d, inlen;
736    unsigned char *tmp;
737    int bits;
738
739    if ((*inlenb % 2) == 1)
740        (*inlenb)--;
741    inlen = *inlenb / 2;
742    inend= in + inlen;
743    while (in < inend) {
744	if (xmlLittleEndian) {
745	    tmp = (unsigned char *) in;
746	    c = *tmp++;
747	    c = c << 8;
748	    c = c | (unsigned int) *tmp;
749	    in++;
750	} else {
751	    c= *in++;
752	}
753        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
754	    if (in >= inend) {           /* (in > inend) shouldn't happens */
755		*outlen = out - outstart;
756		*inlenb = processed - inb;
757	        return(-2);
758	    }
759	    if (xmlLittleEndian) {
760		tmp = (unsigned char *) in;
761		d = *tmp++;
762		d = d << 8;
763		d = d | (unsigned int) *tmp;
764		in++;
765	    } else {
766		d= *in++;
767	    }
768            if ((d & 0xFC00) == 0xDC00) {
769                c &= 0x03FF;
770                c <<= 10;
771                c |= d & 0x03FF;
772                c += 0x10000;
773            }
774            else {
775		*outlen = out - outstart;
776		*inlenb = processed - inb;
777	        return(-2);
778	    }
779        }
780
781	/* assertion: c is a single UTF-4 value */
782        if (out >= outend)
783	    break;
784        if      (c <    0x80) {  *out++=  c;                bits= -6; }
785        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
786        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
787        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
788
789        for ( ; bits >= 0; bits-= 6) {
790            if (out >= outend)
791	        break;
792            *out++= ((c >> bits) & 0x3F) | 0x80;
793        }
794	processed = (const unsigned char*) in;
795    }
796    *outlen = out - outstart;
797    *inlenb = processed - inb;
798    return(*outlen);
799}
800
801#ifdef LIBXML_OUTPUT_ENABLED
802/**
803 * UTF8ToUTF16BE:
804 * @outb:  a pointer to an array of bytes to store the result
805 * @outlen:  the length of @outb
806 * @in:  a pointer to an array of UTF-8 chars
807 * @inlen:  the length of @in
808 *
809 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
810 * block of chars out.
811 *
812 * Returns the number of byte written, or -1 by lack of space, or -2
813 *     if the transcoding failed.
814 */
815static int
816UTF8ToUTF16BE(unsigned char* outb, int *outlen,
817            const unsigned char* in, int *inlen)
818{
819    unsigned short* out = (unsigned short*) outb;
820    const unsigned char* processed = in;
821    const unsigned char *const instart = in;
822    unsigned short* outstart= out;
823    unsigned short* outend;
824    const unsigned char* inend;
825    unsigned int c, d;
826    int trailing;
827    unsigned char *tmp;
828    unsigned short tmp1, tmp2;
829
830    /* UTF-16BE has no BOM */
831    if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
832    if (in == NULL) {
833	*outlen = 0;
834	*inlen = 0;
835	return(0);
836    }
837    inend= in + *inlen;
838    outend = out + (*outlen / 2);
839    while (in < inend) {
840      d= *in++;
841      if      (d < 0x80)  { c= d; trailing= 0; }
842      else if (d < 0xC0)  {
843          /* trailing byte in leading position */
844	  *outlen = out - outstart;
845	  *inlen = processed - instart;
846	  return(-2);
847      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
848      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
849      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
850      else {
851          /* no chance for this in UTF-16 */
852	  *outlen = out - outstart;
853	  *inlen = processed - instart;
854	  return(-2);
855      }
856
857      if (inend - in < trailing) {
858          break;
859      }
860
861      for ( ; trailing; trailing--) {
862          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
863          c <<= 6;
864          c |= d & 0x3F;
865      }
866
867      /* assertion: c is a single UTF-4 value */
868        if (c < 0x10000) {
869            if (out >= outend)  break;
870	    if (xmlLittleEndian) {
871		tmp = (unsigned char *) out;
872		*tmp = c >> 8;
873		*(tmp + 1) = c;
874		out++;
875	    } else {
876		*out++ = c;
877	    }
878        }
879        else if (c < 0x110000) {
880            if (out+1 >= outend)  break;
881            c -= 0x10000;
882	    if (xmlLittleEndian) {
883		tmp1 = 0xD800 | (c >> 10);
884		tmp = (unsigned char *) out;
885		*tmp = tmp1 >> 8;
886		*(tmp + 1) = (unsigned char) tmp1;
887		out++;
888
889		tmp2 = 0xDC00 | (c & 0x03FF);
890		tmp = (unsigned char *) out;
891		*tmp = tmp2 >> 8;
892		*(tmp + 1) = (unsigned char) tmp2;
893		out++;
894	    } else {
895		*out++ = 0xD800 | (c >> 10);
896		*out++ = 0xDC00 | (c & 0x03FF);
897	    }
898        }
899        else
900	    break;
901	processed = in;
902    }
903    *outlen = (out - outstart) * 2;
904    *inlen = processed - instart;
905    return(*outlen);
906}
907#endif /* LIBXML_OUTPUT_ENABLED */
908
909/************************************************************************
910 *									*
911 *		Generic encoding handling routines			*
912 *									*
913 ************************************************************************/
914
915/**
916 * xmlDetectCharEncoding:
917 * @in:  a pointer to the first bytes of the XML entity, must be at least
918 *       2 bytes long (at least 4 if encoding is UTF4 variant).
919 * @len:  pointer to the length of the buffer
920 *
921 * Guess the encoding of the entity using the first bytes of the entity content
922 * according to the non-normative appendix F of the XML-1.0 recommendation.
923 *
924 * Returns one of the XML_CHAR_ENCODING_... values.
925 */
926xmlCharEncoding
927xmlDetectCharEncoding(const unsigned char* in, int len)
928{
929    if (in == NULL)
930        return(XML_CHAR_ENCODING_NONE);
931    if (len >= 4) {
932	if ((in[0] == 0x00) && (in[1] == 0x00) &&
933	    (in[2] == 0x00) && (in[3] == 0x3C))
934	    return(XML_CHAR_ENCODING_UCS4BE);
935	if ((in[0] == 0x3C) && (in[1] == 0x00) &&
936	    (in[2] == 0x00) && (in[3] == 0x00))
937	    return(XML_CHAR_ENCODING_UCS4LE);
938	if ((in[0] == 0x00) && (in[1] == 0x00) &&
939	    (in[2] == 0x3C) && (in[3] == 0x00))
940	    return(XML_CHAR_ENCODING_UCS4_2143);
941	if ((in[0] == 0x00) && (in[1] == 0x3C) &&
942	    (in[2] == 0x00) && (in[3] == 0x00))
943	    return(XML_CHAR_ENCODING_UCS4_3412);
944	if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
945	    (in[2] == 0xA7) && (in[3] == 0x94))
946	    return(XML_CHAR_ENCODING_EBCDIC);
947	if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
948	    (in[2] == 0x78) && (in[3] == 0x6D))
949	    return(XML_CHAR_ENCODING_UTF8);
950	/*
951	 * Although not part of the recommendation, we also
952	 * attempt an "auto-recognition" of UTF-16LE and
953	 * UTF-16BE encodings.
954	 */
955	if ((in[0] == 0x3C) && (in[1] == 0x00) &&
956	    (in[2] == 0x3F) && (in[3] == 0x00))
957	    return(XML_CHAR_ENCODING_UTF16LE);
958	if ((in[0] == 0x00) && (in[1] == 0x3C) &&
959	    (in[2] == 0x00) && (in[3] == 0x3F))
960	    return(XML_CHAR_ENCODING_UTF16BE);
961    }
962    if (len >= 3) {
963	/*
964	 * Errata on XML-1.0 June 20 2001
965	 * We now allow an UTF8 encoded BOM
966	 */
967	if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
968	    (in[2] == 0xBF))
969	    return(XML_CHAR_ENCODING_UTF8);
970    }
971    /* For UTF-16 we can recognize by the BOM */
972    if (len >= 2) {
973	if ((in[0] == 0xFE) && (in[1] == 0xFF))
974	    return(XML_CHAR_ENCODING_UTF16BE);
975	if ((in[0] == 0xFF) && (in[1] == 0xFE))
976	    return(XML_CHAR_ENCODING_UTF16LE);
977    }
978    return(XML_CHAR_ENCODING_NONE);
979}
980
981/**
982 * xmlCleanupEncodingAliases:
983 *
984 * Unregisters all aliases
985 */
986void
987xmlCleanupEncodingAliases(void) {
988    int i;
989
990    if (xmlCharEncodingAliases == NULL)
991	return;
992
993    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
994	if (xmlCharEncodingAliases[i].name != NULL)
995	    xmlFree((char *) xmlCharEncodingAliases[i].name);
996	if (xmlCharEncodingAliases[i].alias != NULL)
997	    xmlFree((char *) xmlCharEncodingAliases[i].alias);
998    }
999    xmlCharEncodingAliasesNb = 0;
1000    xmlCharEncodingAliasesMax = 0;
1001    xmlFree(xmlCharEncodingAliases);
1002    xmlCharEncodingAliases = NULL;
1003}
1004
1005/**
1006 * xmlGetEncodingAlias:
1007 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1008 *
1009 * Lookup an encoding name for the given alias.
1010 *
1011 * Returns NULL if not found, otherwise the original name
1012 */
1013const char *
1014xmlGetEncodingAlias(const char *alias) {
1015    int i;
1016    char upper[100];
1017
1018    if (alias == NULL)
1019	return(NULL);
1020
1021    if (xmlCharEncodingAliases == NULL)
1022	return(NULL);
1023
1024    for (i = 0;i < 99;i++) {
1025        upper[i] = toupper(alias[i]);
1026	if (upper[i] == 0) break;
1027    }
1028    upper[i] = 0;
1029
1030    /*
1031     * Walk down the list looking for a definition of the alias
1032     */
1033    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1034	if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1035	    return(xmlCharEncodingAliases[i].name);
1036	}
1037    }
1038    return(NULL);
1039}
1040
1041/**
1042 * xmlAddEncodingAlias:
1043 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1044 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1045 *
1046 * Registers an alias @alias for an encoding named @name. Existing alias
1047 * will be overwritten.
1048 *
1049 * Returns 0 in case of success, -1 in case of error
1050 */
1051int
1052xmlAddEncodingAlias(const char *name, const char *alias) {
1053    int i;
1054    char upper[100];
1055
1056    if ((name == NULL) || (alias == NULL))
1057	return(-1);
1058
1059    for (i = 0;i < 99;i++) {
1060        upper[i] = toupper(alias[i]);
1061	if (upper[i] == 0) break;
1062    }
1063    upper[i] = 0;
1064
1065    if (xmlCharEncodingAliases == NULL) {
1066	xmlCharEncodingAliasesNb = 0;
1067	xmlCharEncodingAliasesMax = 20;
1068	xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1069	      xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1070	if (xmlCharEncodingAliases == NULL)
1071	    return(-1);
1072    } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1073	xmlCharEncodingAliasesMax *= 2;
1074	xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1075	      xmlRealloc(xmlCharEncodingAliases,
1076		         xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1077    }
1078    /*
1079     * Walk down the list looking for a definition of the alias
1080     */
1081    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1082	if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1083	    /*
1084	     * Replace the definition.
1085	     */
1086	    xmlFree((char *) xmlCharEncodingAliases[i].name);
1087	    xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1088	    return(0);
1089	}
1090    }
1091    /*
1092     * Add the definition
1093     */
1094    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1095    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1096    xmlCharEncodingAliasesNb++;
1097    return(0);
1098}
1099
1100/**
1101 * xmlDelEncodingAlias:
1102 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1103 *
1104 * Unregisters an encoding alias @alias
1105 *
1106 * Returns 0 in case of success, -1 in case of error
1107 */
1108int
1109xmlDelEncodingAlias(const char *alias) {
1110    int i;
1111
1112    if (alias == NULL)
1113	return(-1);
1114
1115    if (xmlCharEncodingAliases == NULL)
1116	return(-1);
1117    /*
1118     * Walk down the list looking for a definition of the alias
1119     */
1120    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1121	if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1122	    xmlFree((char *) xmlCharEncodingAliases[i].name);
1123	    xmlFree((char *) xmlCharEncodingAliases[i].alias);
1124	    xmlCharEncodingAliasesNb--;
1125	    memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1126		    sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1127	    return(0);
1128	}
1129    }
1130    return(-1);
1131}
1132
1133/**
1134 * xmlParseCharEncoding:
1135 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1136 *
1137 * Compare the string to the encoding schemes already known. Note
1138 * that the comparison is case insensitive accordingly to the section
1139 * [XML] 4.3.3 Character Encoding in Entities.
1140 *
1141 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1142 * if not recognized.
1143 */
1144xmlCharEncoding
1145xmlParseCharEncoding(const char* name)
1146{
1147    const char *alias;
1148    char upper[500];
1149    int i;
1150
1151    if (name == NULL)
1152	return(XML_CHAR_ENCODING_NONE);
1153
1154    /*
1155     * Do the alias resolution
1156     */
1157    alias = xmlGetEncodingAlias(name);
1158    if (alias != NULL)
1159	name = alias;
1160
1161    for (i = 0;i < 499;i++) {
1162        upper[i] = toupper(name[i]);
1163	if (upper[i] == 0) break;
1164    }
1165    upper[i] = 0;
1166
1167    if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1168    if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1169    if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1170
1171    /*
1172     * NOTE: if we were able to parse this, the endianness of UTF16 is
1173     *       already found and in use
1174     */
1175    if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1176    if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1177
1178    if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1179    if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1180    if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1181
1182    /*
1183     * NOTE: if we were able to parse this, the endianness of UCS4 is
1184     *       already found and in use
1185     */
1186    if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1187    if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1188    if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1189
1190
1191    if (!strcmp(upper,  "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1192    if (!strcmp(upper,  "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1193    if (!strcmp(upper,  "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1194
1195    if (!strcmp(upper,  "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1196    if (!strcmp(upper,  "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1197    if (!strcmp(upper,  "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1198
1199    if (!strcmp(upper,  "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1200    if (!strcmp(upper,  "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1201    if (!strcmp(upper,  "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1202    if (!strcmp(upper,  "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1203    if (!strcmp(upper,  "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1204    if (!strcmp(upper,  "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1205    if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1206
1207    if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1208    if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1209    if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1210
1211#ifdef DEBUG_ENCODING
1212    xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1213#endif
1214    return(XML_CHAR_ENCODING_ERROR);
1215}
1216
1217/**
1218 * xmlGetCharEncodingName:
1219 * @enc:  the encoding
1220 *
1221 * The "canonical" name for XML encoding.
1222 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1223 * Section 4.3.3  Character Encoding in Entities
1224 *
1225 * Returns the canonical name for the given encoding
1226 */
1227
1228const char*
1229xmlGetCharEncodingName(xmlCharEncoding enc) {
1230    switch (enc) {
1231        case XML_CHAR_ENCODING_ERROR:
1232	    return(NULL);
1233        case XML_CHAR_ENCODING_NONE:
1234	    return(NULL);
1235        case XML_CHAR_ENCODING_UTF8:
1236	    return("UTF-8");
1237        case XML_CHAR_ENCODING_UTF16LE:
1238	    return("UTF-16");
1239        case XML_CHAR_ENCODING_UTF16BE:
1240	    return("UTF-16");
1241        case XML_CHAR_ENCODING_EBCDIC:
1242            return("EBCDIC");
1243        case XML_CHAR_ENCODING_UCS4LE:
1244            return("ISO-10646-UCS-4");
1245        case XML_CHAR_ENCODING_UCS4BE:
1246            return("ISO-10646-UCS-4");
1247        case XML_CHAR_ENCODING_UCS4_2143:
1248            return("ISO-10646-UCS-4");
1249        case XML_CHAR_ENCODING_UCS4_3412:
1250            return("ISO-10646-UCS-4");
1251        case XML_CHAR_ENCODING_UCS2:
1252            return("ISO-10646-UCS-2");
1253        case XML_CHAR_ENCODING_8859_1:
1254	    return("ISO-8859-1");
1255        case XML_CHAR_ENCODING_8859_2:
1256	    return("ISO-8859-2");
1257        case XML_CHAR_ENCODING_8859_3:
1258	    return("ISO-8859-3");
1259        case XML_CHAR_ENCODING_8859_4:
1260	    return("ISO-8859-4");
1261        case XML_CHAR_ENCODING_8859_5:
1262	    return("ISO-8859-5");
1263        case XML_CHAR_ENCODING_8859_6:
1264	    return("ISO-8859-6");
1265        case XML_CHAR_ENCODING_8859_7:
1266	    return("ISO-8859-7");
1267        case XML_CHAR_ENCODING_8859_8:
1268	    return("ISO-8859-8");
1269        case XML_CHAR_ENCODING_8859_9:
1270	    return("ISO-8859-9");
1271        case XML_CHAR_ENCODING_2022_JP:
1272            return("ISO-2022-JP");
1273        case XML_CHAR_ENCODING_SHIFT_JIS:
1274            return("Shift-JIS");
1275        case XML_CHAR_ENCODING_EUC_JP:
1276            return("EUC-JP");
1277	case XML_CHAR_ENCODING_ASCII:
1278	    return(NULL);
1279    }
1280    return(NULL);
1281}
1282
1283/************************************************************************
1284 *									*
1285 *			Char encoding handlers				*
1286 *									*
1287 ************************************************************************/
1288
1289
1290/* the size should be growable, but it's not a big deal ... */
1291#define MAX_ENCODING_HANDLERS 50
1292static xmlCharEncodingHandlerPtr *handlers = NULL;
1293static int nbCharEncodingHandler = 0;
1294
1295/*
1296 * The default is UTF-8 for XML, that's also the default used for the
1297 * parser internals, so the default encoding handler is NULL
1298 */
1299
1300static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1301
1302/**
1303 * xmlNewCharEncodingHandler:
1304 * @name:  the encoding name, in UTF-8 format (ASCII actually)
1305 * @input:  the xmlCharEncodingInputFunc to read that encoding
1306 * @output:  the xmlCharEncodingOutputFunc to write that encoding
1307 *
1308 * Create and registers an xmlCharEncodingHandler.
1309 *
1310 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1311 */
1312xmlCharEncodingHandlerPtr
1313xmlNewCharEncodingHandler(const char *name,
1314                          xmlCharEncodingInputFunc input,
1315                          xmlCharEncodingOutputFunc output) {
1316    xmlCharEncodingHandlerPtr handler;
1317    const char *alias;
1318    char upper[500];
1319    int i;
1320    char *up = NULL;
1321
1322    /*
1323     * Do the alias resolution
1324     */
1325    alias = xmlGetEncodingAlias(name);
1326    if (alias != NULL)
1327	name = alias;
1328
1329    /*
1330     * Keep only the uppercase version of the encoding.
1331     */
1332    if (name == NULL) {
1333        xmlEncodingErr(XML_I18N_NO_NAME,
1334		       "xmlNewCharEncodingHandler : no name !\n", NULL);
1335	return(NULL);
1336    }
1337    for (i = 0;i < 499;i++) {
1338        upper[i] = toupper(name[i]);
1339	if (upper[i] == 0) break;
1340    }
1341    upper[i] = 0;
1342    up = xmlMemStrdup(upper);
1343    if (up == NULL) {
1344        xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1345	return(NULL);
1346    }
1347
1348    /*
1349     * allocate and fill-up an handler block.
1350     */
1351    handler = (xmlCharEncodingHandlerPtr)
1352              xmlMalloc(sizeof(xmlCharEncodingHandler));
1353    if (handler == NULL) {
1354        xmlFree(up);
1355        xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1356	return(NULL);
1357    }
1358    memset(handler, 0, sizeof(xmlCharEncodingHandler));
1359    handler->input = input;
1360    handler->output = output;
1361    handler->name = up;
1362
1363#ifdef LIBXML_ICONV_ENABLED
1364    handler->iconv_in = NULL;
1365    handler->iconv_out = NULL;
1366#endif
1367#ifdef LIBXML_ICU_ENABLED
1368    handler->uconv_in = NULL;
1369    handler->uconv_out = NULL;
1370#endif
1371
1372    /*
1373     * registers and returns the handler.
1374     */
1375    xmlRegisterCharEncodingHandler(handler);
1376#ifdef DEBUG_ENCODING
1377    xmlGenericError(xmlGenericErrorContext,
1378	    "Registered encoding handler for %s\n", name);
1379#endif
1380    return(handler);
1381}
1382
1383/**
1384 * xmlInitCharEncodingHandlers:
1385 *
1386 * Initialize the char encoding support, it registers the default
1387 * encoding supported.
1388 * NOTE: while public, this function usually doesn't need to be called
1389 *       in normal processing.
1390 */
1391void
1392xmlInitCharEncodingHandlers(void) {
1393    unsigned short int tst = 0x1234;
1394    unsigned char *ptr = (unsigned char *) &tst;
1395
1396    if (handlers != NULL) return;
1397
1398    handlers = (xmlCharEncodingHandlerPtr *)
1399        xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1400
1401    if (*ptr == 0x12) xmlLittleEndian = 0;
1402    else if (*ptr == 0x34) xmlLittleEndian = 1;
1403    else {
1404        xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1405	               "Odd problem at endianness detection\n", NULL);
1406    }
1407
1408    if (handlers == NULL) {
1409        xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
1410	return;
1411    }
1412    xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
1413#ifdef LIBXML_OUTPUT_ENABLED
1414    xmlUTF16LEHandler =
1415          xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1416    xmlUTF16BEHandler =
1417          xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
1418    xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
1419    xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1420    xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
1421    xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
1422#ifdef LIBXML_HTML_ENABLED
1423    xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1424#endif
1425#else
1426    xmlUTF16LEHandler =
1427          xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1428    xmlUTF16BEHandler =
1429          xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
1430    xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
1431    xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1432    xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1433    xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1434#endif /* LIBXML_OUTPUT_ENABLED */
1435#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
1436#ifdef LIBXML_ISO8859X_ENABLED
1437    xmlRegisterCharEncodingHandlersISO8859x ();
1438#endif
1439#endif
1440
1441}
1442
1443/**
1444 * xmlCleanupCharEncodingHandlers:
1445 *
1446 * Cleanup the memory allocated for the char encoding support, it
1447 * unregisters all the encoding handlers and the aliases.
1448 */
1449void
1450xmlCleanupCharEncodingHandlers(void) {
1451    xmlCleanupEncodingAliases();
1452
1453    if (handlers == NULL) return;
1454
1455    for (;nbCharEncodingHandler > 0;) {
1456        nbCharEncodingHandler--;
1457	if (handlers[nbCharEncodingHandler] != NULL) {
1458	    if (handlers[nbCharEncodingHandler]->name != NULL)
1459		xmlFree(handlers[nbCharEncodingHandler]->name);
1460	    xmlFree(handlers[nbCharEncodingHandler]);
1461	}
1462    }
1463    xmlFree(handlers);
1464    handlers = NULL;
1465    nbCharEncodingHandler = 0;
1466    xmlDefaultCharEncodingHandler = NULL;
1467}
1468
1469/**
1470 * xmlRegisterCharEncodingHandler:
1471 * @handler:  the xmlCharEncodingHandlerPtr handler block
1472 *
1473 * Register the char encoding handler, surprising, isn't it ?
1474 */
1475void
1476xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1477    if (handlers == NULL) xmlInitCharEncodingHandlers();
1478    if ((handler == NULL) || (handlers == NULL)) {
1479        xmlEncodingErr(XML_I18N_NO_HANDLER,
1480		"xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
1481	return;
1482    }
1483
1484    if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1485        xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1486	"xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1487	               "MAX_ENCODING_HANDLERS");
1488	return;
1489    }
1490    handlers[nbCharEncodingHandler++] = handler;
1491}
1492
1493/**
1494 * xmlGetCharEncodingHandler:
1495 * @enc:  an xmlCharEncoding value.
1496 *
1497 * Search in the registered set the handler able to read/write that encoding.
1498 *
1499 * Returns the handler or NULL if not found
1500 */
1501xmlCharEncodingHandlerPtr
1502xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1503    xmlCharEncodingHandlerPtr handler;
1504
1505    if (handlers == NULL) xmlInitCharEncodingHandlers();
1506    switch (enc) {
1507        case XML_CHAR_ENCODING_ERROR:
1508	    return(NULL);
1509        case XML_CHAR_ENCODING_NONE:
1510	    return(NULL);
1511        case XML_CHAR_ENCODING_UTF8:
1512	    return(NULL);
1513        case XML_CHAR_ENCODING_UTF16LE:
1514	    return(xmlUTF16LEHandler);
1515        case XML_CHAR_ENCODING_UTF16BE:
1516	    return(xmlUTF16BEHandler);
1517        case XML_CHAR_ENCODING_EBCDIC:
1518            handler = xmlFindCharEncodingHandler("EBCDIC");
1519            if (handler != NULL) return(handler);
1520            handler = xmlFindCharEncodingHandler("ebcdic");
1521            if (handler != NULL) return(handler);
1522            handler = xmlFindCharEncodingHandler("EBCDIC-US");
1523            if (handler != NULL) return(handler);
1524            handler = xmlFindCharEncodingHandler("IBM-037");
1525            if (handler != NULL) return(handler);
1526	    break;
1527        case XML_CHAR_ENCODING_UCS4BE:
1528            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1529            if (handler != NULL) return(handler);
1530            handler = xmlFindCharEncodingHandler("UCS-4");
1531            if (handler != NULL) return(handler);
1532            handler = xmlFindCharEncodingHandler("UCS4");
1533            if (handler != NULL) return(handler);
1534	    break;
1535        case XML_CHAR_ENCODING_UCS4LE:
1536            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1537            if (handler != NULL) return(handler);
1538            handler = xmlFindCharEncodingHandler("UCS-4");
1539            if (handler != NULL) return(handler);
1540            handler = xmlFindCharEncodingHandler("UCS4");
1541            if (handler != NULL) return(handler);
1542	    break;
1543        case XML_CHAR_ENCODING_UCS4_2143:
1544	    break;
1545        case XML_CHAR_ENCODING_UCS4_3412:
1546	    break;
1547        case XML_CHAR_ENCODING_UCS2:
1548            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1549            if (handler != NULL) return(handler);
1550            handler = xmlFindCharEncodingHandler("UCS-2");
1551            if (handler != NULL) return(handler);
1552            handler = xmlFindCharEncodingHandler("UCS2");
1553            if (handler != NULL) return(handler);
1554	    break;
1555
1556	    /*
1557	     * We used to keep ISO Latin encodings native in the
1558	     * generated data. This led to so many problems that
1559	     * this has been removed. One can still change this
1560	     * back by registering no-ops encoders for those
1561	     */
1562        case XML_CHAR_ENCODING_8859_1:
1563	    handler = xmlFindCharEncodingHandler("ISO-8859-1");
1564	    if (handler != NULL) return(handler);
1565	    break;
1566        case XML_CHAR_ENCODING_8859_2:
1567	    handler = xmlFindCharEncodingHandler("ISO-8859-2");
1568	    if (handler != NULL) return(handler);
1569	    break;
1570        case XML_CHAR_ENCODING_8859_3:
1571	    handler = xmlFindCharEncodingHandler("ISO-8859-3");
1572	    if (handler != NULL) return(handler);
1573	    break;
1574        case XML_CHAR_ENCODING_8859_4:
1575	    handler = xmlFindCharEncodingHandler("ISO-8859-4");
1576	    if (handler != NULL) return(handler);
1577	    break;
1578        case XML_CHAR_ENCODING_8859_5:
1579	    handler = xmlFindCharEncodingHandler("ISO-8859-5");
1580	    if (handler != NULL) return(handler);
1581	    break;
1582        case XML_CHAR_ENCODING_8859_6:
1583	    handler = xmlFindCharEncodingHandler("ISO-8859-6");
1584	    if (handler != NULL) return(handler);
1585	    break;
1586        case XML_CHAR_ENCODING_8859_7:
1587	    handler = xmlFindCharEncodingHandler("ISO-8859-7");
1588	    if (handler != NULL) return(handler);
1589	    break;
1590        case XML_CHAR_ENCODING_8859_8:
1591	    handler = xmlFindCharEncodingHandler("ISO-8859-8");
1592	    if (handler != NULL) return(handler);
1593	    break;
1594        case XML_CHAR_ENCODING_8859_9:
1595	    handler = xmlFindCharEncodingHandler("ISO-8859-9");
1596	    if (handler != NULL) return(handler);
1597	    break;
1598
1599
1600        case XML_CHAR_ENCODING_2022_JP:
1601            handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1602            if (handler != NULL) return(handler);
1603	    break;
1604        case XML_CHAR_ENCODING_SHIFT_JIS:
1605            handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1606            if (handler != NULL) return(handler);
1607            handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1608            if (handler != NULL) return(handler);
1609            handler = xmlFindCharEncodingHandler("Shift_JIS");
1610            if (handler != NULL) return(handler);
1611	    break;
1612        case XML_CHAR_ENCODING_EUC_JP:
1613            handler = xmlFindCharEncodingHandler("EUC-JP");
1614            if (handler != NULL) return(handler);
1615	    break;
1616	default:
1617	    break;
1618    }
1619
1620#ifdef DEBUG_ENCODING
1621    xmlGenericError(xmlGenericErrorContext,
1622	    "No handler found for encoding %d\n", enc);
1623#endif
1624    return(NULL);
1625}
1626
1627/**
1628 * xmlFindCharEncodingHandler:
1629 * @name:  a string describing the char encoding.
1630 *
1631 * Search in the registered set the handler able to read/write that encoding.
1632 *
1633 * Returns the handler or NULL if not found
1634 */
1635xmlCharEncodingHandlerPtr
1636xmlFindCharEncodingHandler(const char *name) {
1637    const char *nalias;
1638    const char *norig;
1639    xmlCharEncoding alias;
1640#ifdef LIBXML_ICONV_ENABLED
1641    xmlCharEncodingHandlerPtr enc;
1642    iconv_t icv_in, icv_out;
1643#endif /* LIBXML_ICONV_ENABLED */
1644#ifdef LIBXML_ICU_ENABLED
1645    xmlCharEncodingHandlerPtr encu;
1646    uconv_t *ucv_in, *ucv_out;
1647#endif /* LIBXML_ICU_ENABLED */
1648    char upper[100];
1649    int i;
1650
1651    if (handlers == NULL) xmlInitCharEncodingHandlers();
1652    if (name == NULL) return(xmlDefaultCharEncodingHandler);
1653    if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1654
1655    /*
1656     * Do the alias resolution
1657     */
1658    norig = name;
1659    nalias = xmlGetEncodingAlias(name);
1660    if (nalias != NULL)
1661	name = nalias;
1662
1663    /*
1664     * Check first for directly registered encoding names
1665     */
1666    for (i = 0;i < 99;i++) {
1667        upper[i] = toupper(name[i]);
1668	if (upper[i] == 0) break;
1669    }
1670    upper[i] = 0;
1671
1672    if (handlers != NULL) {
1673        for (i = 0;i < nbCharEncodingHandler; i++) {
1674            if (!strcmp(upper, handlers[i]->name)) {
1675#ifdef DEBUG_ENCODING
1676                xmlGenericError(xmlGenericErrorContext,
1677                        "Found registered handler for encoding %s\n", name);
1678#endif
1679                return(handlers[i]);
1680            }
1681        }
1682    }
1683
1684#ifdef LIBXML_ICONV_ENABLED
1685    /* check whether iconv can handle this */
1686    icv_in = iconv_open("UTF-8", name);
1687    icv_out = iconv_open(name, "UTF-8");
1688    if (icv_in == (iconv_t) -1) {
1689        icv_in = iconv_open("UTF-8", upper);
1690    }
1691    if (icv_out == (iconv_t) -1) {
1692	icv_out = iconv_open(upper, "UTF-8");
1693    }
1694    if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1695	    enc = (xmlCharEncodingHandlerPtr)
1696	          xmlMalloc(sizeof(xmlCharEncodingHandler));
1697	    if (enc == NULL) {
1698	        iconv_close(icv_in);
1699	        iconv_close(icv_out);
1700		return(NULL);
1701	    }
1702            memset(enc, 0, sizeof(xmlCharEncodingHandler));
1703	    enc->name = xmlMemStrdup(name);
1704	    enc->input = NULL;
1705	    enc->output = NULL;
1706	    enc->iconv_in = icv_in;
1707	    enc->iconv_out = icv_out;
1708#ifdef DEBUG_ENCODING
1709            xmlGenericError(xmlGenericErrorContext,
1710		    "Found iconv handler for encoding %s\n", name);
1711#endif
1712	    return enc;
1713    } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1714	    xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1715		    "iconv : problems with filters for '%s'\n", name);
1716    }
1717#endif /* LIBXML_ICONV_ENABLED */
1718#ifdef LIBXML_ICU_ENABLED
1719    /* check whether icu can handle this */
1720    ucv_in = openIcuConverter(name, 1);
1721    ucv_out = openIcuConverter(name, 0);
1722    if (ucv_in != NULL && ucv_out != NULL) {
1723	    encu = (xmlCharEncodingHandlerPtr)
1724	           xmlMalloc(sizeof(xmlCharEncodingHandler));
1725	    if (encu == NULL) {
1726                closeIcuConverter(ucv_in);
1727                closeIcuConverter(ucv_out);
1728		return(NULL);
1729	    }
1730            memset(encu, 0, sizeof(xmlCharEncodingHandler));
1731	    encu->name = xmlMemStrdup(name);
1732	    encu->input = NULL;
1733	    encu->output = NULL;
1734	    encu->uconv_in = ucv_in;
1735	    encu->uconv_out = ucv_out;
1736#ifdef DEBUG_ENCODING
1737            xmlGenericError(xmlGenericErrorContext,
1738		    "Found ICU converter handler for encoding %s\n", name);
1739#endif
1740	    return encu;
1741    } else if (ucv_in != NULL || ucv_out != NULL) {
1742            closeIcuConverter(ucv_in);
1743            closeIcuConverter(ucv_out);
1744	    xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1745		    "ICU converter : problems with filters for '%s'\n", name);
1746    }
1747#endif /* LIBXML_ICU_ENABLED */
1748
1749#ifdef DEBUG_ENCODING
1750    xmlGenericError(xmlGenericErrorContext,
1751	    "No handler found for encoding %s\n", name);
1752#endif
1753
1754    /*
1755     * Fallback using the canonical names
1756     */
1757    alias = xmlParseCharEncoding(norig);
1758    if (alias != XML_CHAR_ENCODING_ERROR) {
1759        const char* canon;
1760        canon = xmlGetCharEncodingName(alias);
1761        if ((canon != NULL) && (strcmp(name, canon))) {
1762	    return(xmlFindCharEncodingHandler(canon));
1763        }
1764    }
1765
1766    /* If "none of the above", give up */
1767    return(NULL);
1768}
1769
1770/************************************************************************
1771 *									*
1772 *		ICONV based generic conversion functions		*
1773 *									*
1774 ************************************************************************/
1775
1776#ifdef LIBXML_ICONV_ENABLED
1777/**
1778 * xmlIconvWrapper:
1779 * @cd:		iconv converter data structure
1780 * @out:  a pointer to an array of bytes to store the result
1781 * @outlen:  the length of @out
1782 * @in:  a pointer to an array of ISO Latin 1 chars
1783 * @inlen:  the length of @in
1784 *
1785 * Returns 0 if success, or
1786 *     -1 by lack of space, or
1787 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1788 *        the result of transformation can't fit into the encoding we want), or
1789 *     -3 if there the last byte can't form a single output char.
1790 *
1791 * The value of @inlen after return is the number of octets consumed
1792 *     as the return value is positive, else unpredictable.
1793 * The value of @outlen after return is the number of ocetes consumed.
1794 */
1795static int
1796xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1797                const unsigned char *in, int *inlen) {
1798    size_t icv_inlen, icv_outlen;
1799    const char *icv_in = (const char *) in;
1800    char *icv_out = (char *) out;
1801    int ret;
1802
1803    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1804        if (outlen != NULL) *outlen = 0;
1805        return(-1);
1806    }
1807    icv_inlen = *inlen;
1808    icv_outlen = *outlen;
1809    ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1810    *inlen -= icv_inlen;
1811    *outlen -= icv_outlen;
1812    if ((icv_inlen != 0) || (ret == -1)) {
1813#ifdef EILSEQ
1814        if (errno == EILSEQ) {
1815            return -2;
1816        } else
1817#endif
1818#ifdef E2BIG
1819        if (errno == E2BIG) {
1820            return -1;
1821        } else
1822#endif
1823#ifdef EINVAL
1824        if (errno == EINVAL) {
1825            return -3;
1826        } else
1827#endif
1828        {
1829            return -3;
1830        }
1831    }
1832    return 0;
1833}
1834#endif /* LIBXML_ICONV_ENABLED */
1835
1836/************************************************************************
1837 *									*
1838 *		ICU based generic conversion functions		*
1839 *									*
1840 ************************************************************************/
1841
1842#ifdef LIBXML_ICU_ENABLED
1843/**
1844 * xmlUconvWrapper:
1845 * @cd: ICU uconverter data structure
1846 * @toUnicode : non-zero if toUnicode. 0 otherwise.
1847 * @out:  a pointer to an array of bytes to store the result
1848 * @outlen:  the length of @out
1849 * @in:  a pointer to an array of ISO Latin 1 chars
1850 * @inlen:  the length of @in
1851 *
1852 * Returns 0 if success, or
1853 *     -1 by lack of space, or
1854 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1855 *        the result of transformation can't fit into the encoding we want), or
1856 *     -3 if there the last byte can't form a single output char.
1857 *
1858 * The value of @inlen after return is the number of octets consumed
1859 *     as the return value is positive, else unpredictable.
1860 * The value of @outlen after return is the number of ocetes consumed.
1861 */
1862static int
1863xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1864                const unsigned char *in, int *inlen) {
1865    const char *ucv_in = (const char *) in;
1866    char *ucv_out = (char *) out;
1867    UErrorCode err = U_ZERO_ERROR;
1868
1869    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1870        if (outlen != NULL) *outlen = 0;
1871        return(-1);
1872    }
1873
1874    /*
1875     * TODO(jungshik)
1876     * 1. is ucnv_convert(To|From)Algorithmic better?
1877     * 2. had we better use an explicit pivot buffer?
1878     * 3. error returned comes from 'fromUnicode' only even
1879     *    when toUnicode is true !
1880     */
1881    if (toUnicode) {
1882        /* encoding => UTF-16 => UTF-8 */
1883        ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1884                       &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
1885                       0, TRUE, &err);
1886    } else {
1887        /* UTF-8 => UTF-16 => encoding */
1888        ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1889                       &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
1890                       0, TRUE, &err);
1891    }
1892    *inlen = ucv_in - (const char*) in;
1893    *outlen = ucv_out - (char *) out;
1894    if (U_SUCCESS(err))
1895        return 0;
1896    if (err == U_BUFFER_OVERFLOW_ERROR)
1897        return -1;
1898    if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1899        return -2;
1900    /* if (err == U_TRUNCATED_CHAR_FOUND) */
1901    return -3;
1902}
1903#endif /* LIBXML_ICU_ENABLED */
1904
1905/************************************************************************
1906 *									*
1907 *		The real API used by libxml for on-the-fly conversion	*
1908 *									*
1909 ************************************************************************/
1910
1911/**
1912 * xmlCharEncFirstLineInt:
1913 * @handler:	char enconding transformation data structure
1914 * @out:  an xmlBuffer for the output.
1915 * @in:  an xmlBuffer for the input
1916 * @len:  number of bytes to convert for the first line, or -1
1917 *
1918 * Front-end for the encoding handler input function, but handle only
1919 * the very first line, i.e. limit itself to 45 chars.
1920 *
1921 * Returns the number of byte written if success, or
1922 *     -1 general error
1923 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1924 *        the result of transformation can't fit into the encoding we want), or
1925 */
1926int
1927xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1928                       xmlBufferPtr in, int len) {
1929    int ret = -2;
1930    int written;
1931    int toconv;
1932
1933    if (handler == NULL) return(-1);
1934    if (out == NULL) return(-1);
1935    if (in == NULL) return(-1);
1936
1937    /* calculate space available */
1938    written = out->size - out->use - 1; /* count '\0' */
1939    toconv = in->use;
1940    /*
1941     * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
1942     * 45 chars should be sufficient to reach the end of the encoding
1943     * declaration without going too far inside the document content.
1944     * on UTF-16 this means 90bytes, on UCS4 this means 180
1945     * The actual value depending on guessed encoding is passed as @len
1946     * if provided
1947     */
1948    if (len >= 0) {
1949        if (toconv > len)
1950            toconv = len;
1951    } else {
1952        if (toconv > 180)
1953            toconv = 180;
1954    }
1955    if (toconv * 2 >= written) {
1956        xmlBufferGrow(out, toconv * 2);
1957	written = out->size - out->use - 1;
1958    }
1959
1960    if (handler->input != NULL) {
1961	ret = handler->input(&out->content[out->use], &written,
1962	                     in->content, &toconv);
1963	xmlBufferShrink(in, toconv);
1964	out->use += written;
1965	out->content[out->use] = 0;
1966    }
1967#ifdef LIBXML_ICONV_ENABLED
1968    else if (handler->iconv_in != NULL) {
1969	ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
1970	                      &written, in->content, &toconv);
1971	xmlBufferShrink(in, toconv);
1972	out->use += written;
1973	out->content[out->use] = 0;
1974	if (ret == -1) ret = -3;
1975    }
1976#endif /* LIBXML_ICONV_ENABLED */
1977#ifdef LIBXML_ICU_ENABLED
1978    else if (handler->uconv_in != NULL) {
1979	ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use],
1980	                      &written, in->content, &toconv);
1981	xmlBufferShrink(in, toconv);
1982	out->use += written;
1983	out->content[out->use] = 0;
1984	if (ret == -1) ret = -3;
1985    }
1986#endif /* LIBXML_ICU_ENABLED */
1987#ifdef DEBUG_ENCODING
1988    switch (ret) {
1989        case 0:
1990	    xmlGenericError(xmlGenericErrorContext,
1991		    "converted %d bytes to %d bytes of input\n",
1992	            toconv, written);
1993	    break;
1994        case -1:
1995	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
1996	            toconv, written, in->use);
1997	    break;
1998        case -2:
1999	    xmlGenericError(xmlGenericErrorContext,
2000		    "input conversion failed due to input error\n");
2001	    break;
2002        case -3:
2003	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2004	            toconv, written, in->use);
2005	    break;
2006	default:
2007	    xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2008    }
2009#endif /* DEBUG_ENCODING */
2010    /*
2011     * Ignore when input buffer is not on a boundary
2012     */
2013    if (ret == -3) ret = 0;
2014    if (ret == -1) ret = 0;
2015    return(ret);
2016}
2017
2018/**
2019 * xmlCharEncFirstLine:
2020 * @handler:	char enconding transformation data structure
2021 * @out:  an xmlBuffer for the output.
2022 * @in:  an xmlBuffer for the input
2023 *
2024 * Front-end for the encoding handler input function, but handle only
2025 * the very first line, i.e. limit itself to 45 chars.
2026 *
2027 * Returns the number of byte written if success, or
2028 *     -1 general error
2029 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2030 *        the result of transformation can't fit into the encoding we want), or
2031 */
2032int
2033xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2034                 xmlBufferPtr in) {
2035    return(xmlCharEncFirstLineInt(handler, out, in, -1));
2036}
2037
2038/**
2039 * xmlCharEncFirstLineInput:
2040 * @input: a parser input buffer
2041 * @len:  number of bytes to convert for the first line, or -1
2042 *
2043 * Front-end for the encoding handler input function, but handle only
2044 * the very first line. Point is that this is based on autodetection
2045 * of the encoding and once that first line is converted we may find
2046 * out that a different decoder is needed to process the input.
2047 *
2048 * Returns the number of byte written if success, or
2049 *     -1 general error
2050 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2051 *        the result of transformation can't fit into the encoding we want), or
2052 */
2053int
2054xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
2055{
2056    int ret = -2;
2057    size_t written;
2058    size_t toconv;
2059    int c_in;
2060    int c_out;
2061    xmlBufPtr in;
2062    xmlBufPtr out;
2063
2064    if ((input == NULL) || (input->encoder == NULL) ||
2065        (input->buffer == NULL) || (input->raw == NULL))
2066        return (-1);
2067    out = input->buffer;
2068    in = input->raw;
2069
2070    toconv = xmlBufUse(in);
2071    if (toconv == 0)
2072        return (0);
2073    written = xmlBufAvail(out) - 1; /* count '\0' */
2074    /*
2075     * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2076     * 45 chars should be sufficient to reach the end of the encoding
2077     * declaration without going too far inside the document content.
2078     * on UTF-16 this means 90bytes, on UCS4 this means 180
2079     * The actual value depending on guessed encoding is passed as @len
2080     * if provided
2081     */
2082    if (len >= 0) {
2083        if (toconv > (unsigned int) len)
2084            toconv = len;
2085    } else {
2086        if (toconv > 180)
2087            toconv = 180;
2088    }
2089    if (toconv * 2 >= written) {
2090        xmlBufGrow(out, toconv * 2);
2091        written = xmlBufAvail(out) - 1;
2092    }
2093    if (written > 360)
2094        written = 360;
2095
2096    c_in = toconv;
2097    c_out = written;
2098    if (input->encoder->input != NULL) {
2099        ret = input->encoder->input(xmlBufEnd(out), &c_out,
2100                                    xmlBufContent(in), &c_in);
2101        xmlBufShrink(in, c_in);
2102        xmlBufAddLen(out, c_out);
2103    }
2104#ifdef LIBXML_ICONV_ENABLED
2105    else if (input->encoder->iconv_in != NULL) {
2106        ret = xmlIconvWrapper(input->encoder->iconv_in, xmlBufEnd(out),
2107                              &c_out, xmlBufContent(in), &c_in);
2108        xmlBufShrink(in, c_in);
2109        xmlBufAddLen(out, c_out);
2110        if (ret == -1)
2111            ret = -3;
2112    }
2113#endif /* LIBXML_ICONV_ENABLED */
2114#ifdef LIBXML_ICU_ENABLED
2115    else if (input->encoder->uconv_in != NULL) {
2116        ret = xmlUconvWrapper(input->encoder->uconv_in, 1, xmlBufEnd(out),
2117                              &c_out, xmlBufContent(in), &c_in);
2118        xmlBufShrink(in, c_in);
2119        xmlBufAddLen(out, c_out);
2120        if (ret == -1)
2121            ret = -3;
2122    }
2123#endif /* LIBXML_ICU_ENABLED */
2124    switch (ret) {
2125        case 0:
2126#ifdef DEBUG_ENCODING
2127            xmlGenericError(xmlGenericErrorContext,
2128                            "converted %d bytes to %d bytes of input\n",
2129                            c_in, c_out);
2130#endif
2131            break;
2132        case -1:
2133#ifdef DEBUG_ENCODING
2134            xmlGenericError(xmlGenericErrorContext,
2135                         "converted %d bytes to %d bytes of input, %d left\n",
2136                            c_in, c_out, (int)xmlBufUse(in));
2137#endif
2138            break;
2139        case -3:
2140#ifdef DEBUG_ENCODING
2141            xmlGenericError(xmlGenericErrorContext,
2142                        "converted %d bytes to %d bytes of input, %d left\n",
2143                            c_in, c_out, (int)xmlBufUse(in));
2144#endif
2145            break;
2146        case -2: {
2147            char buf[50];
2148            const xmlChar *content = xmlBufContent(in);
2149
2150	    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2151		     content[0], content[1],
2152		     content[2], content[3]);
2153	    buf[49] = 0;
2154	    xmlEncodingErr(XML_I18N_CONV_FAILED,
2155		    "input conversion failed due to input error, bytes %s\n",
2156		           buf);
2157        }
2158    }
2159    /*
2160     * Ignore when input buffer is not on a boundary
2161     */
2162    if (ret == -3) ret = 0;
2163    if (ret == -1) ret = 0;
2164    return(ret);
2165}
2166
2167/**
2168 * xmlCharEncInput:
2169 * @input: a parser input buffer
2170 * @flush: try to flush all the raw buffer
2171 *
2172 * Generic front-end for the encoding handler on parser input
2173 *
2174 * Returns the number of byte written if success, or
2175 *     -1 general error
2176 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2177 *        the result of transformation can't fit into the encoding we want), or
2178 */
2179int
2180xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
2181{
2182    int ret = -2;
2183    size_t written;
2184    size_t toconv;
2185    int c_in;
2186    int c_out;
2187    xmlBufPtr in;
2188    xmlBufPtr out;
2189
2190    if ((input == NULL) || (input->encoder == NULL) ||
2191        (input->buffer == NULL) || (input->raw == NULL))
2192        return (-1);
2193    out = input->buffer;
2194    in = input->raw;
2195
2196    toconv = xmlBufUse(in);
2197    if (toconv == 0)
2198        return (0);
2199    if ((toconv > 64 * 1024) && (flush == 0))
2200        toconv = 64 * 1024;
2201    written = xmlBufAvail(out);
2202    if (written > 0)
2203        written--; /* count '\0' */
2204    if (toconv * 2 >= written) {
2205        xmlBufGrow(out, toconv * 2);
2206        written = xmlBufAvail(out);
2207        if (written > 0)
2208            written--; /* count '\0' */
2209    }
2210    if ((written > 128 * 1024) && (flush == 0))
2211        written = 128 * 1024;
2212
2213    c_in = toconv;
2214    c_out = written;
2215    if (input->encoder->input != NULL) {
2216        ret = input->encoder->input(xmlBufEnd(out), &c_out,
2217                                    xmlBufContent(in), &c_in);
2218        xmlBufShrink(in, c_in);
2219        xmlBufAddLen(out, c_out);
2220    }
2221#ifdef LIBXML_ICONV_ENABLED
2222    else if (input->encoder->iconv_in != NULL) {
2223        ret = xmlIconvWrapper(input->encoder->iconv_in, xmlBufEnd(out),
2224                              &c_out, xmlBufContent(in), &c_in);
2225        xmlBufShrink(in, c_in);
2226        xmlBufAddLen(out, c_out);
2227        if (ret == -1)
2228            ret = -3;
2229    }
2230#endif /* LIBXML_ICONV_ENABLED */
2231#ifdef LIBXML_ICU_ENABLED
2232    else if (input->encoder->uconv_in != NULL) {
2233        ret = xmlUconvWrapper(input->encoder->uconv_in, 1, xmlBufEnd(out),
2234                              &c_out, xmlBufContent(in), &c_in);
2235        xmlBufShrink(in, c_in);
2236        xmlBufAddLen(out, c_out);
2237        if (ret == -1)
2238            ret = -3;
2239    }
2240#endif /* LIBXML_ICU_ENABLED */
2241    switch (ret) {
2242        case 0:
2243#ifdef DEBUG_ENCODING
2244            xmlGenericError(xmlGenericErrorContext,
2245                            "converted %d bytes to %d bytes of input\n",
2246                            c_in, c_out);
2247#endif
2248            break;
2249        case -1:
2250#ifdef DEBUG_ENCODING
2251            xmlGenericError(xmlGenericErrorContext,
2252                         "converted %d bytes to %d bytes of input, %d left\n",
2253                            c_in, c_out, (int)xmlBufUse(in));
2254#endif
2255            break;
2256        case -3:
2257#ifdef DEBUG_ENCODING
2258            xmlGenericError(xmlGenericErrorContext,
2259                        "converted %d bytes to %d bytes of input, %d left\n",
2260                            c_in, c_out, (int)xmlBufUse(in));
2261#endif
2262            break;
2263        case -2: {
2264            char buf[50];
2265            const xmlChar *content = xmlBufContent(in);
2266
2267	    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2268		     content[0], content[1],
2269		     content[2], content[3]);
2270	    buf[49] = 0;
2271	    xmlEncodingErr(XML_I18N_CONV_FAILED,
2272		    "input conversion failed due to input error, bytes %s\n",
2273		           buf);
2274        }
2275    }
2276    /*
2277     * Ignore when input buffer is not on a boundary
2278     */
2279    if (ret == -3)
2280        ret = 0;
2281    return (c_out? c_out : ret);
2282}
2283
2284/**
2285 * xmlCharEncInFunc:
2286 * @handler:	char encoding transformation data structure
2287 * @out:  an xmlBuffer for the output.
2288 * @in:  an xmlBuffer for the input
2289 *
2290 * Generic front-end for the encoding handler input function
2291 *
2292 * Returns the number of byte written if success, or
2293 *     -1 general error
2294 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2295 *        the result of transformation can't fit into the encoding we want), or
2296 */
2297int
2298xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2299                 xmlBufferPtr in)
2300{
2301    int ret = -2;
2302    int written;
2303    int toconv;
2304
2305    if (handler == NULL)
2306        return (-1);
2307    if (out == NULL)
2308        return (-1);
2309    if (in == NULL)
2310        return (-1);
2311
2312    toconv = in->use;
2313    if (toconv == 0)
2314        return (0);
2315    written = out->size - out->use -1; /* count '\0' */
2316    if (toconv * 2 >= written) {
2317        xmlBufferGrow(out, out->size + toconv * 2);
2318        written = out->size - out->use - 1;
2319    }
2320    if (handler->input != NULL) {
2321        ret = handler->input(&out->content[out->use], &written,
2322                             in->content, &toconv);
2323        xmlBufferShrink(in, toconv);
2324        out->use += written;
2325        out->content[out->use] = 0;
2326    }
2327#ifdef LIBXML_ICONV_ENABLED
2328    else if (handler->iconv_in != NULL) {
2329        ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
2330                              &written, in->content, &toconv);
2331        xmlBufferShrink(in, toconv);
2332        out->use += written;
2333        out->content[out->use] = 0;
2334        if (ret == -1)
2335            ret = -3;
2336    }
2337#endif /* LIBXML_ICONV_ENABLED */
2338#ifdef LIBXML_ICU_ENABLED
2339    else if (handler->uconv_in != NULL) {
2340        ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use],
2341                              &written, in->content, &toconv);
2342        xmlBufferShrink(in, toconv);
2343        out->use += written;
2344        out->content[out->use] = 0;
2345        if (ret == -1)
2346            ret = -3;
2347    }
2348#endif /* LIBXML_ICU_ENABLED */
2349    switch (ret) {
2350        case 0:
2351#ifdef DEBUG_ENCODING
2352            xmlGenericError(xmlGenericErrorContext,
2353                            "converted %d bytes to %d bytes of input\n",
2354                            toconv, written);
2355#endif
2356            break;
2357        case -1:
2358#ifdef DEBUG_ENCODING
2359            xmlGenericError(xmlGenericErrorContext,
2360                         "converted %d bytes to %d bytes of input, %d left\n",
2361                            toconv, written, in->use);
2362#endif
2363            break;
2364        case -3:
2365#ifdef DEBUG_ENCODING
2366            xmlGenericError(xmlGenericErrorContext,
2367                        "converted %d bytes to %d bytes of input, %d left\n",
2368                            toconv, written, in->use);
2369#endif
2370            break;
2371        case -2: {
2372            char buf[50];
2373
2374	    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2375		     in->content[0], in->content[1],
2376		     in->content[2], in->content[3]);
2377	    buf[49] = 0;
2378	    xmlEncodingErr(XML_I18N_CONV_FAILED,
2379		    "input conversion failed due to input error, bytes %s\n",
2380		           buf);
2381        }
2382    }
2383    /*
2384     * Ignore when input buffer is not on a boundary
2385     */
2386    if (ret == -3)
2387        ret = 0;
2388    return (written? written : ret);
2389}
2390
2391/**
2392 * xmlCharEncOutput:
2393 * @output: a parser output buffer
2394 * @init: is this an initialization call without data
2395 *
2396 * Generic front-end for the encoding handler on parser output
2397 * a first call with @init == 1 has to be made first to initiate the
2398 * output in case of non-stateless encoding needing to initiate their
2399 * state or the output (like the BOM in UTF16).
2400 * In case of UTF8 sequence conversion errors for the given encoder,
2401 * the content will be automatically remapped to a CharRef sequence.
2402 *
2403 * Returns the number of byte written if success, or
2404 *     -1 general error
2405 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2406 *        the result of transformation can't fit into the encoding we want), or
2407 */
2408int
2409xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2410{
2411    int ret = -2;
2412    size_t written;
2413    size_t writtentot = 0;
2414    size_t toconv;
2415    int c_in;
2416    int c_out;
2417    xmlBufPtr in;
2418    xmlBufPtr out;
2419    int charref_len = 0;
2420
2421    if ((output == NULL) || (output->encoder == NULL) ||
2422        (output->buffer == NULL) || (output->conv == NULL))
2423        return (-1);
2424    out = output->conv;
2425    in = output->buffer;
2426
2427retry:
2428
2429    written = xmlBufAvail(out);
2430    if (written > 0)
2431        written--; /* count '\0' */
2432
2433    /*
2434     * First specific handling of the initialization call
2435     */
2436    if (init) {
2437        c_in = 0;
2438        c_out = written;
2439        if (output->encoder->output != NULL) {
2440            ret = output->encoder->output(xmlBufEnd(out), &c_out,
2441                                          NULL, &c_in);
2442            if (ret > 0) /* Gennady: check return value */
2443                xmlBufAddLen(out, c_out);
2444        }
2445#ifdef LIBXML_ICONV_ENABLED
2446        else if (output->encoder->iconv_out != NULL) {
2447            ret = xmlIconvWrapper(output->encoder->iconv_out, xmlBufEnd(out),
2448                                  &c_out, NULL, &c_in);
2449            xmlBufAddLen(out, c_out);
2450        }
2451#endif /* LIBXML_ICONV_ENABLED */
2452#ifdef LIBXML_ICU_ENABLED
2453        else if (output->encoder->uconv_out != NULL) {
2454            ret = xmlUconvWrapper(output->encoder->uconv_out, 0, xmlBufEnd(out),
2455                                  &c_out, NULL, &c_in);
2456            xmlBufAddLen(out, c_out);
2457        }
2458#endif /* LIBXML_ICU_ENABLED */
2459#ifdef DEBUG_ENCODING
2460	xmlGenericError(xmlGenericErrorContext,
2461		"initialized encoder\n");
2462#endif
2463        return(0);
2464    }
2465
2466    /*
2467     * Conversion itself.
2468     */
2469    toconv = xmlBufUse(in);
2470    if (toconv == 0)
2471        return (0);
2472    if (toconv > 64 * 1024)
2473        toconv = 64 * 1024;
2474    if (toconv * 4 >= written) {
2475        xmlBufGrow(out, toconv * 4);
2476        written = xmlBufAvail(out) - 1;
2477    }
2478    if (written > 256 * 1024)
2479        written = 256 * 1024;
2480
2481    c_in = toconv;
2482    c_out = written;
2483    if (output->encoder->output != NULL) {
2484        ret = output->encoder->output(xmlBufEnd(out), &c_out,
2485                                      xmlBufContent(in), &c_in);
2486        if (c_out > 0) {
2487            xmlBufShrink(in, c_in);
2488            xmlBufAddLen(out, c_out);
2489            writtentot += c_out;
2490        }
2491    }
2492#ifdef LIBXML_ICONV_ENABLED
2493    else if (output->encoder->iconv_out != NULL) {
2494        ret = xmlIconvWrapper(output->encoder->iconv_out, xmlBufEnd(out),
2495                              &c_out, xmlBufContent(in), &c_in);
2496        xmlBufShrink(in, c_in);
2497        xmlBufAddLen(out, c_out);
2498        writtentot += c_out;
2499        if (ret == -1) {
2500            if (c_out > 0) {
2501                /*
2502                 * Can be a limitation of iconv
2503                 */
2504                charref_len = 0;
2505                goto retry;
2506            }
2507            ret = -3;
2508        }
2509    }
2510#endif /* LIBXML_ICONV_ENABLED */
2511#ifdef LIBXML_ICU_ENABLED
2512    else if (output->encoder->uconv_out != NULL) {
2513        ret = xmlUconvWrapper(output->encoder->uconv_out, 0, xmlBufEnd(out),
2514                              &c_out, xmlBufContent(in), &c_in);
2515        xmlBufShrink(in, c_in);
2516        xmlBufAddLen(out, c_out);
2517        writtentot += c_out;
2518        if (ret == -1) {
2519            if (c_out > 0) {
2520                /*
2521                 * Can be a limitation of uconv
2522                 */
2523                charref_len = 0;
2524                goto retry;
2525            }
2526            ret = -3;
2527        }
2528    }
2529#endif /* LIBXML_ICU_ENABLED */
2530    else {
2531        xmlEncodingErr(XML_I18N_NO_OUTPUT,
2532                       "xmlCharEncOutFunc: no output function !\n", NULL);
2533        return(-1);
2534    }
2535
2536    if (ret >= 0) output += ret;
2537
2538    /*
2539     * Attempt to handle error cases
2540     */
2541    switch (ret) {
2542        case 0:
2543#ifdef DEBUG_ENCODING
2544	    xmlGenericError(xmlGenericErrorContext,
2545		    "converted %d bytes to %d bytes of output\n",
2546	            c_in, c_out);
2547#endif
2548	    break;
2549        case -1:
2550#ifdef DEBUG_ENCODING
2551	    xmlGenericError(xmlGenericErrorContext,
2552		    "output conversion failed by lack of space\n");
2553#endif
2554	    break;
2555        case -3:
2556#ifdef DEBUG_ENCODING
2557	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2558	            c_in, c_out, (int) xmlBufUse(in));
2559#endif
2560	    break;
2561        case -2: {
2562	    int len = (int) xmlBufUse(in);
2563            xmlChar *content = xmlBufContent(in);
2564	    int cur;
2565
2566	    cur = xmlGetUTF8Char(content, &len);
2567	    if ((charref_len != 0) && (c_out < charref_len)) {
2568		/*
2569		 * We attempted to insert a character reference and failed.
2570		 * Undo what was written and skip the remaining charref.
2571		 */
2572                xmlBufErase(out, c_out);
2573		writtentot -= c_out;
2574		xmlBufShrink(in, charref_len - c_out);
2575		charref_len = 0;
2576
2577		ret = -1;
2578                break;
2579	    } else if (cur > 0) {
2580		xmlChar charref[20];
2581
2582#ifdef DEBUG_ENCODING
2583		xmlGenericError(xmlGenericErrorContext,
2584			"handling output conversion error\n");
2585		xmlGenericError(xmlGenericErrorContext,
2586			"Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2587			content[0], content[1],
2588			content[2], content[3]);
2589#endif
2590		/*
2591		 * Removes the UTF8 sequence, and replace it by a charref
2592		 * and continue the transcoding phase, hoping the error
2593		 * did not mangle the encoder state.
2594		 */
2595		charref_len = snprintf((char *) &charref[0], sizeof(charref),
2596				 "&#%d;", cur);
2597		xmlBufShrink(in, len);
2598		xmlBufAddHead(in, charref, -1);
2599
2600		goto retry;
2601	    } else {
2602		char buf[50];
2603
2604		snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2605			 content[0], content[1],
2606			 content[2], content[3]);
2607		buf[49] = 0;
2608		xmlEncodingErr(XML_I18N_CONV_FAILED,
2609		    "output conversion failed due to conv error, bytes %s\n",
2610			       buf);
2611		if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE)
2612		    content[0] = ' ';
2613	    }
2614	    break;
2615	}
2616    }
2617    return(ret);
2618}
2619
2620/**
2621 * xmlCharEncOutFunc:
2622 * @handler:	char enconding transformation data structure
2623 * @out:  an xmlBuffer for the output.
2624 * @in:  an xmlBuffer for the input
2625 *
2626 * Generic front-end for the encoding handler output function
2627 * a first call with @in == NULL has to be made firs to initiate the
2628 * output in case of non-stateless encoding needing to initiate their
2629 * state or the output (like the BOM in UTF16).
2630 * In case of UTF8 sequence conversion errors for the given encoder,
2631 * the content will be automatically remapped to a CharRef sequence.
2632 *
2633 * Returns the number of byte written if success, or
2634 *     -1 general error
2635 *     -2 if the transcoding fails (for *in is not valid utf8 string or
2636 *        the result of transformation can't fit into the encoding we want), or
2637 */
2638int
2639xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2640                  xmlBufferPtr in) {
2641    int ret = -2;
2642    int written;
2643    int writtentot = 0;
2644    int toconv;
2645    int output = 0;
2646    int charref_len = 0;
2647
2648    if (handler == NULL) return(-1);
2649    if (out == NULL) return(-1);
2650
2651retry:
2652
2653    written = out->size - out->use;
2654
2655    if (written > 0)
2656	written--; /* Gennady: count '/0' */
2657
2658    /*
2659     * First specific handling of in = NULL, i.e. the initialization call
2660     */
2661    if (in == NULL) {
2662        toconv = 0;
2663	if (handler->output != NULL) {
2664	    ret = handler->output(&out->content[out->use], &written,
2665				  NULL, &toconv);
2666	    if (ret >= 0) { /* Gennady: check return value */
2667		out->use += written;
2668		out->content[out->use] = 0;
2669	    }
2670	}
2671#ifdef LIBXML_ICONV_ENABLED
2672	else if (handler->iconv_out != NULL) {
2673	    ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
2674				  &written, NULL, &toconv);
2675	    out->use += written;
2676	    out->content[out->use] = 0;
2677	}
2678#endif /* LIBXML_ICONV_ENABLED */
2679#ifdef LIBXML_ICU_ENABLED
2680	else if (handler->uconv_out != NULL) {
2681	    ret = xmlUconvWrapper(handler->uconv_out, 0,
2682                              &out->content[out->use],
2683				              &written, NULL, &toconv);
2684	    out->use += written;
2685	    out->content[out->use] = 0;
2686	}
2687#endif /* LIBXML_ICU_ENABLED */
2688#ifdef DEBUG_ENCODING
2689	xmlGenericError(xmlGenericErrorContext,
2690		"initialized encoder\n");
2691#endif
2692        return(0);
2693    }
2694
2695    /*
2696     * Conversion itself.
2697     */
2698    toconv = in->use;
2699    if (toconv == 0)
2700	return(0);
2701    if (toconv * 4 >= written) {
2702        xmlBufferGrow(out, toconv * 4);
2703	written = out->size - out->use - 1;
2704    }
2705    if (handler->output != NULL) {
2706	ret = handler->output(&out->content[out->use], &written,
2707	                      in->content, &toconv);
2708	if (written > 0) {
2709	    xmlBufferShrink(in, toconv);
2710	    out->use += written;
2711	    writtentot += written;
2712	}
2713	out->content[out->use] = 0;
2714    }
2715#ifdef LIBXML_ICONV_ENABLED
2716    else if (handler->iconv_out != NULL) {
2717	ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
2718	                      &written, in->content, &toconv);
2719	xmlBufferShrink(in, toconv);
2720	out->use += written;
2721	writtentot += written;
2722	out->content[out->use] = 0;
2723	if (ret == -1) {
2724	    if (written > 0) {
2725		/*
2726		 * Can be a limitation of iconv
2727		 */
2728                charref_len = 0;
2729		goto retry;
2730	    }
2731	    ret = -3;
2732	}
2733    }
2734#endif /* LIBXML_ICONV_ENABLED */
2735#ifdef LIBXML_ICU_ENABLED
2736    else if (handler->uconv_out != NULL) {
2737	ret = xmlUconvWrapper(handler->uconv_out, 0,
2738                              &out->content[out->use],
2739	                      &written, in->content, &toconv);
2740	xmlBufferShrink(in, toconv);
2741	out->use += written;
2742	writtentot += written;
2743	out->content[out->use] = 0;
2744	if (ret == -1) {
2745	    if (written > 0) {
2746		/*
2747		 * Can be a limitation of iconv
2748		 */
2749                charref_len = 0;
2750		goto retry;
2751	    }
2752	    ret = -3;
2753	}
2754    }
2755#endif /* LIBXML_ICU_ENABLED */
2756    else {
2757	xmlEncodingErr(XML_I18N_NO_OUTPUT,
2758		       "xmlCharEncOutFunc: no output function !\n", NULL);
2759	return(-1);
2760    }
2761
2762    if (ret >= 0) output += ret;
2763
2764    /*
2765     * Attempt to handle error cases
2766     */
2767    switch (ret) {
2768        case 0:
2769#ifdef DEBUG_ENCODING
2770	    xmlGenericError(xmlGenericErrorContext,
2771		    "converted %d bytes to %d bytes of output\n",
2772	            toconv, written);
2773#endif
2774	    break;
2775        case -1:
2776#ifdef DEBUG_ENCODING
2777	    xmlGenericError(xmlGenericErrorContext,
2778		    "output conversion failed by lack of space\n");
2779#endif
2780	    break;
2781        case -3:
2782#ifdef DEBUG_ENCODING
2783	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2784	            toconv, written, in->use);
2785#endif
2786	    break;
2787        case -2: {
2788	    int len = in->use;
2789	    const xmlChar *utf = (const xmlChar *) in->content;
2790	    int cur;
2791
2792	    cur = xmlGetUTF8Char(utf, &len);
2793	    if ((charref_len != 0) && (written < charref_len)) {
2794		/*
2795		 * We attempted to insert a character reference and failed.
2796		 * Undo what was written and skip the remaining charref.
2797		 */
2798		out->use -= written;
2799		writtentot -= written;
2800		xmlBufferShrink(in, charref_len - written);
2801		charref_len = 0;
2802
2803		ret = -1;
2804                break;
2805	    } else if (cur > 0) {
2806		xmlChar charref[20];
2807
2808#ifdef DEBUG_ENCODING
2809		xmlGenericError(xmlGenericErrorContext,
2810			"handling output conversion error\n");
2811		xmlGenericError(xmlGenericErrorContext,
2812			"Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2813			in->content[0], in->content[1],
2814			in->content[2], in->content[3]);
2815#endif
2816		/*
2817		 * Removes the UTF8 sequence, and replace it by a charref
2818		 * and continue the transcoding phase, hoping the error
2819		 * did not mangle the encoder state.
2820		 */
2821		charref_len = snprintf((char *) &charref[0], sizeof(charref),
2822				 "&#%d;", cur);
2823		xmlBufferShrink(in, len);
2824		xmlBufferAddHead(in, charref, -1);
2825
2826		goto retry;
2827	    } else {
2828		char buf[50];
2829
2830		snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2831			 in->content[0], in->content[1],
2832			 in->content[2], in->content[3]);
2833		buf[49] = 0;
2834		xmlEncodingErr(XML_I18N_CONV_FAILED,
2835		    "output conversion failed due to conv error, bytes %s\n",
2836			       buf);
2837		if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
2838		    in->content[0] = ' ';
2839	    }
2840	    break;
2841	}
2842    }
2843    return(ret);
2844}
2845
2846/**
2847 * xmlCharEncCloseFunc:
2848 * @handler:	char enconding transformation data structure
2849 *
2850 * Generic front-end for encoding handler close function
2851 *
2852 * Returns 0 if success, or -1 in case of error
2853 */
2854int
2855xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2856    int ret = 0;
2857    int tofree = 0;
2858    if (handler == NULL) return(-1);
2859    if (handler->name == NULL) return(-1);
2860#ifdef LIBXML_ICONV_ENABLED
2861    /*
2862     * Iconv handlers can be used only once, free the whole block.
2863     * and the associated icon resources.
2864     */
2865    if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
2866        tofree = 1;
2867	if (handler->iconv_out != NULL) {
2868	    if (iconv_close(handler->iconv_out))
2869		ret = -1;
2870	    handler->iconv_out = NULL;
2871	}
2872	if (handler->iconv_in != NULL) {
2873	    if (iconv_close(handler->iconv_in))
2874		ret = -1;
2875	    handler->iconv_in = NULL;
2876	}
2877    }
2878#endif /* LIBXML_ICONV_ENABLED */
2879#ifdef LIBXML_ICU_ENABLED
2880    if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) {
2881        tofree = 1;
2882	if (handler->uconv_out != NULL) {
2883	    closeIcuConverter(handler->uconv_out);
2884	    handler->uconv_out = NULL;
2885	}
2886	if (handler->uconv_in != NULL) {
2887	    closeIcuConverter(handler->uconv_in);
2888	    handler->uconv_in = NULL;
2889	}
2890    }
2891#endif
2892    if (tofree) {
2893        /* free up only dynamic handlers iconv/uconv */
2894        if (handler->name != NULL)
2895            xmlFree(handler->name);
2896        handler->name = NULL;
2897        xmlFree(handler);
2898    }
2899#ifdef DEBUG_ENCODING
2900    if (ret)
2901        xmlGenericError(xmlGenericErrorContext,
2902		"failed to close the encoding handler\n");
2903    else
2904        xmlGenericError(xmlGenericErrorContext,
2905		"closed the encoding handler\n");
2906#endif
2907
2908    return(ret);
2909}
2910
2911/**
2912 * xmlByteConsumed:
2913 * @ctxt: an XML parser context
2914 *
2915 * This function provides the current index of the parser relative
2916 * to the start of the current entity. This function is computed in
2917 * bytes from the beginning starting at zero and finishing at the
2918 * size in byte of the file if parsing a file. The function is
2919 * of constant cost if the input is UTF-8 but can be costly if run
2920 * on non-UTF-8 input.
2921 *
2922 * Returns the index in bytes from the beginning of the entity or -1
2923 *         in case the index could not be computed.
2924 */
2925long
2926xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2927    xmlParserInputPtr in;
2928
2929    if (ctxt == NULL) return(-1);
2930    in = ctxt->input;
2931    if (in == NULL)  return(-1);
2932    if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2933        unsigned int unused = 0;
2934	xmlCharEncodingHandler * handler = in->buf->encoder;
2935        /*
2936	 * Encoding conversion, compute the number of unused original
2937	 * bytes from the input not consumed and substract that from
2938	 * the raw consumed value, this is not a cheap operation
2939	 */
2940        if (in->end - in->cur > 0) {
2941	    unsigned char convbuf[32000];
2942	    const unsigned char *cur = (const unsigned char *)in->cur;
2943	    int toconv = in->end - in->cur, written = 32000;
2944
2945	    int ret;
2946
2947	    if (handler->output != NULL) {
2948	        do {
2949		    toconv = in->end - cur;
2950		    written = 32000;
2951		    ret = handler->output(&convbuf[0], &written,
2952				      cur, &toconv);
2953		    if (ret == -1) return(-1);
2954		    unused += written;
2955		    cur += toconv;
2956		} while (ret == -2);
2957#ifdef LIBXML_ICONV_ENABLED
2958	    } else if (handler->iconv_out != NULL) {
2959	        do {
2960		    toconv = in->end - cur;
2961		    written = 32000;
2962		    ret = xmlIconvWrapper(handler->iconv_out, &convbuf[0],
2963	                      &written, cur, &toconv);
2964		    if (ret < 0) {
2965		        if (written > 0)
2966			    ret = -2;
2967			else
2968			    return(-1);
2969		    }
2970		    unused += written;
2971		    cur += toconv;
2972		} while (ret == -2);
2973#endif
2974#ifdef LIBXML_ICU_ENABLED
2975	    } else if (handler->uconv_out != NULL) {
2976	        do {
2977		    toconv = in->end - cur;
2978		    written = 32000;
2979		    ret = xmlUconvWrapper(handler->uconv_out, 0, &convbuf[0],
2980	                      &written, cur, &toconv);
2981		    if (ret < 0) {
2982		        if (written > 0)
2983			    ret = -2;
2984			else
2985			    return(-1);
2986		    }
2987		    unused += written;
2988		    cur += toconv;
2989		} while (ret == -2);
2990#endif
2991            } else {
2992	        /* could not find a converter */
2993	        return(-1);
2994	    }
2995	}
2996	if (in->buf->rawconsumed < unused)
2997	    return(-1);
2998	return(in->buf->rawconsumed - unused);
2999    }
3000    return(in->consumed + (in->cur - in->base));
3001}
3002
3003#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
3004#ifdef LIBXML_ISO8859X_ENABLED
3005
3006/**
3007 * UTF8ToISO8859x:
3008 * @out:  a pointer to an array of bytes to store the result
3009 * @outlen:  the length of @out
3010 * @in:  a pointer to an array of UTF-8 chars
3011 * @inlen:  the length of @in
3012 * @xlattable: the 2-level transcoding table
3013 *
3014 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
3015 * block of chars out.
3016 *
3017 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
3018 * The value of @inlen after return is the number of octets consumed
3019 *     as the return value is positive, else unpredictable.
3020 * The value of @outlen after return is the number of ocetes consumed.
3021 */
3022static int
3023UTF8ToISO8859x(unsigned char* out, int *outlen,
3024              const unsigned char* in, int *inlen,
3025              unsigned char const *xlattable) {
3026    const unsigned char* outstart = out;
3027    const unsigned char* inend;
3028    const unsigned char* instart = in;
3029    const unsigned char* processed = in;
3030
3031    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
3032        (xlattable == NULL))
3033	return(-1);
3034    if (in == NULL) {
3035        /*
3036        * initialization nothing to do
3037        */
3038        *outlen = 0;
3039        *inlen = 0;
3040        return(0);
3041    }
3042    inend = in + (*inlen);
3043    while (in < inend) {
3044        unsigned char d = *in++;
3045        if  (d < 0x80)  {
3046            *out++ = d;
3047        } else if (d < 0xC0) {
3048            /* trailing byte in leading position */
3049            *outlen = out - outstart;
3050            *inlen = processed - instart;
3051            return(-2);
3052        } else if (d < 0xE0) {
3053            unsigned char c;
3054            if (!(in < inend)) {
3055                /* trailing byte not in input buffer */
3056                *outlen = out - outstart;
3057                *inlen = processed - instart;
3058                return(-3);
3059            }
3060            c = *in++;
3061            if ((c & 0xC0) != 0x80) {
3062                /* not a trailing byte */
3063                *outlen = out - outstart;
3064                *inlen = processed - instart;
3065                return(-2);
3066            }
3067            c = c & 0x3F;
3068            d = d & 0x1F;
3069            d = xlattable [48 + c + xlattable [d] * 64];
3070            if (d == 0) {
3071                /* not in character set */
3072                *outlen = out - outstart;
3073                *inlen = processed - instart;
3074                return(-2);
3075            }
3076            *out++ = d;
3077        } else if (d < 0xF0) {
3078            unsigned char c1;
3079            unsigned char c2;
3080            if (!(in < inend - 1)) {
3081                /* trailing bytes not in input buffer */
3082                *outlen = out - outstart;
3083                *inlen = processed - instart;
3084                return(-3);
3085            }
3086            c1 = *in++;
3087            if ((c1 & 0xC0) != 0x80) {
3088                /* not a trailing byte (c1) */
3089                *outlen = out - outstart;
3090                *inlen = processed - instart;
3091                return(-2);
3092            }
3093            c2 = *in++;
3094            if ((c2 & 0xC0) != 0x80) {
3095                /* not a trailing byte (c2) */
3096                *outlen = out - outstart;
3097                *inlen = processed - instart;
3098                return(-2);
3099            }
3100            c1 = c1 & 0x3F;
3101            c2 = c2 & 0x3F;
3102	    d = d & 0x0F;
3103	    d = xlattable [48 + c2 + xlattable [48 + c1 +
3104			xlattable [32 + d] * 64] * 64];
3105            if (d == 0) {
3106                /* not in character set */
3107                *outlen = out - outstart;
3108                *inlen = processed - instart;
3109                return(-2);
3110            }
3111            *out++ = d;
3112        } else {
3113            /* cannot transcode >= U+010000 */
3114            *outlen = out - outstart;
3115            *inlen = processed - instart;
3116            return(-2);
3117        }
3118        processed = in;
3119    }
3120    *outlen = out - outstart;
3121    *inlen = processed - instart;
3122    return(*outlen);
3123}
3124
3125/**
3126 * ISO8859xToUTF8
3127 * @out:  a pointer to an array of bytes to store the result
3128 * @outlen:  the length of @out
3129 * @in:  a pointer to an array of ISO Latin 1 chars
3130 * @inlen:  the length of @in
3131 *
3132 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
3133 * block of chars out.
3134 * Returns 0 if success, or -1 otherwise
3135 * The value of @inlen after return is the number of octets consumed
3136 * The value of @outlen after return is the number of ocetes produced.
3137 */
3138static int
3139ISO8859xToUTF8(unsigned char* out, int *outlen,
3140              const unsigned char* in, int *inlen,
3141              unsigned short const *unicodetable) {
3142    unsigned char* outstart = out;
3143    unsigned char* outend;
3144    const unsigned char* instart = in;
3145    const unsigned char* inend;
3146    const unsigned char* instop;
3147    unsigned int c;
3148
3149    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
3150        (in == NULL) || (unicodetable == NULL))
3151	return(-1);
3152    outend = out + *outlen;
3153    inend = in + *inlen;
3154    instop = inend;
3155
3156    while ((in < inend) && (out < outend - 2)) {
3157        if (*in >= 0x80) {
3158            c = unicodetable [*in - 0x80];
3159            if (c == 0) {
3160                /* undefined code point */
3161                *outlen = out - outstart;
3162                *inlen = in - instart;
3163                return (-1);
3164            }
3165            if (c < 0x800) {
3166                *out++ = ((c >>  6) & 0x1F) | 0xC0;
3167                *out++ = (c & 0x3F) | 0x80;
3168            } else {
3169                *out++ = ((c >>  12) & 0x0F) | 0xE0;
3170                *out++ = ((c >>  6) & 0x3F) | 0x80;
3171                *out++ = (c & 0x3F) | 0x80;
3172            }
3173            ++in;
3174        }
3175        if (instop - in > outend - out) instop = in + (outend - out);
3176        while ((*in < 0x80) && (in < instop)) {
3177            *out++ = *in++;
3178        }
3179    }
3180    if ((in < inend) && (out < outend) && (*in < 0x80)) {
3181        *out++ =  *in++;
3182    }
3183    if ((in < inend) && (out < outend) && (*in < 0x80)) {
3184        *out++ =  *in++;
3185    }
3186    *outlen = out - outstart;
3187    *inlen = in - instart;
3188    return (*outlen);
3189}
3190
3191
3192/************************************************************************
3193 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding                *
3194 ************************************************************************/
3195
3196static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
3197    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3198    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3199    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3200    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3201    0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
3202    0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
3203    0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
3204    0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
3205    0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
3206    0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
3207    0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
3208    0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
3209    0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
3210    0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
3211    0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
3212    0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
3213};
3214
3215static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
3216    "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3217    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3218    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3219    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3220    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3221    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3222    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3223    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3224    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3225    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3226    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3227    "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
3228    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
3229    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3230    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
3231    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3232    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3233    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3234    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3235    "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3236    "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3237    "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3238    "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3239    "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3240    "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3241    "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3242    "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3243};
3244
3245static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
3246    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3247    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3248    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3249    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3250    0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3251    0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3252    0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3253    0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3254    0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3255    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3256    0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3257    0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3258    0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3259    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3260    0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3261    0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
3262};
3263
3264static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
3265    "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3266    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3267    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3268    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3269    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3270    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3271    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3272    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3273    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3274    "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3275    "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3276    "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3277    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3278    "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3279    "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3280    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3281    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3282    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3283    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3284    "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3285    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3286    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3287    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3288    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3289    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3290    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3291    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3292    "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3293    "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3294    "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3295    "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3296};
3297
3298static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
3299    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3300    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3301    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3302    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3303    0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3304    0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3305    0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3306    0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3307    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3308    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3309    0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3310    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3311    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3312    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3313    0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3314    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
3315};
3316
3317static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
3318    "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3319    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3320    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3321    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3322    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3323    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3324    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3325    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3326    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3327    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3328    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3329    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3330    "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3331    "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3332    "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3333    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3334    "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3335    "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3336    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3337    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3338    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3339    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3340    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3341    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3342    "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3343    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3344    "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3345};
3346
3347static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
3348    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3349    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3350    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3351    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3352    0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3353    0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3354    0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3355    0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3356    0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3357    0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3358    0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3359    0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3360    0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3361    0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3362    0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3363    0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
3364};
3365
3366static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
3367    "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3368    "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3369    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3370    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3371    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3372    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3373    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3374    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3375    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3376    "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3377    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3378    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3379    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3380    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3381    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3382    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3383    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3384    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3385    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3386    "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3387    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3388    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3389    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3390    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3391    "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3392    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3393    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3394};
3395
3396static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
3397    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3398    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3399    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3400    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3401    0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3402    0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3403    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3404    0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3405    0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3406    0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3407    0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3408    0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3409    0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3410    0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3411    0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3412    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3413};
3414
3415static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3416    "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3417    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3418    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3419    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3420    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3421    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3422    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3423    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3424    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3425    "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3426    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3427    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3428    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3429    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3430    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3431    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3432    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3433    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3434    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3435    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3436    "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3437    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3438    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3439};
3440
3441static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
3442    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3443    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3444    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3445    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3446    0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3447    0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3448    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3449    0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3450    0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3451    0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3452    0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3453    0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3454    0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3455    0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3456    0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3457    0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3458};
3459
3460static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3461    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3462    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3463    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3464    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3465    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3466    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3467    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3468    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3469    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3470    "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3471    "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3472    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3473    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3474    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3475    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3476    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3477    "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3478    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3479    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3480    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3481    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3482    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3483    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3484    "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3485    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3486    "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3487    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3488    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3489    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3490    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3491    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3492};
3493
3494static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3495    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3496    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3497    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3498    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3499    0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3500    0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3501    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3502    0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3503    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3504    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3505    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3506    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3507    0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3508    0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3509    0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3510    0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3511};
3512
3513static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3514    "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3515    "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3516    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3517    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3518    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3519    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3520    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3521    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3522    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3523    "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3524    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3525    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3526    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3527    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3528    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3529    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3530    "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3531    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3532    "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3533    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3534    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3535    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3536    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3537    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3538    "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3539    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3540    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3541    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3542    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3543    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3544    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3545};
3546
3547static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3548    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3549    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3550    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3551    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3552    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3553    0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3554    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3555    0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3556    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3557    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3558    0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3559    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3560    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3561    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3562    0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3563    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3564};
3565
3566static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3567    "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3568    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3569    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3570    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3571    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3572    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3573    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3574    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3575    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3576    "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3577    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3578    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3579    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3580    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3581    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3582    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3583    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3584    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3585    "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3586    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3587    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3588    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3589    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3590};
3591
3592static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3593    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3594    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3595    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3596    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3597    0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3598    0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3599    0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3600    0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3601    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3602    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3603    0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3604    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3605    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3606    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3607    0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3608    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3609};
3610
3611static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3612    "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3613    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3614    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3615    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3616    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3617    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3618    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3619    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3620    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3621    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3622    "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3623    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3624    "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3625    "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3626    "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3627    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3628    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3629    "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3630    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3631    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3632    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3633    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3634    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3635    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3636    "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3637    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3638    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3639    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3640    "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3641    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3642    "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3643};
3644
3645static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3646    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3647    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3648    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3649    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3650    0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3651    0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3652    0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3653    0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3654    0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3655    0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3656    0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3657    0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3658    0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3659    0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3660    0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3661    0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3662};
3663
3664static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3665    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3666    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3667    "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3668    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3669    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3670    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3671    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3672    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3673    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3674    "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3675    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3676    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3677    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3678    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3679    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3680    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3681    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3682    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3683    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3684    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3685    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3686    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3687    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3688    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3689    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3690    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3691    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3692};
3693
3694static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3695    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3696    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3697    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3698    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3699    0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3700    0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3701    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3702    0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3703    0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3704    0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3705    0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3706    0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3707    0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3708    0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3709    0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3710    0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3711};
3712
3713static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3714    "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3715    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3716    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3717    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3718    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3719    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3720    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3721    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3722    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3723    "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3724    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3725    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3726    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3727    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3728    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3729    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3730    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3731    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3732    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3733    "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3734    "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3735    "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3736    "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3737    "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3738    "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3739    "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3740    "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3741    "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3742    "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3743    "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3744    "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3745};
3746
3747static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3748    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3749    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3750    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3751    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3752    0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3753    0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3754    0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3755    0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3756    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3757    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3758    0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3759    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3760    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3761    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3762    0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3763    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3764};
3765
3766static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3767    "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3768    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3769    "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3770    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3771    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3772    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3773    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3774    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3775    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3776    "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3777    "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3778    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3779    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3780    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3781    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3782    "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3783    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3784    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3785    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3786    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3787    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3788    "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3789    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3790    "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3791    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3792    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3793    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3794    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3795    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3796    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3797    "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3798    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3799    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3800    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3801    "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3802    "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3803    "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3804    "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3805    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3806    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3807    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3808    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3809    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3810};
3811
3812static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3813    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3814    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3815    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3816    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3817    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3818    0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3819    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3820    0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3821    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3822    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3823    0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3824    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3825    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3826    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3827    0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3828    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3829};
3830
3831static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3832    "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3833    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3834    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3835    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3836    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3837    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3838    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3839    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3840    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3841    "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3842    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3843    "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3844    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3845    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3846    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3847    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3848    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3849    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3850    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3851    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3852    "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3853    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3854    "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3855    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3856    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3857    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3858    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3859};
3860
3861static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3862    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3863    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3864    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3865    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3866    0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3867    0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3868    0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3869    0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3870    0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3871    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3872    0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3873    0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3874    0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3875    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3876    0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3877    0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3878};
3879
3880static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3881    "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3882    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3883    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3884    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3885    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3886    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3887    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3888    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3889    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3890    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3891    "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3892    "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3893    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3894    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3895    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3896    "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3897    "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3898    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3899    "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3900    "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3901    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3902    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3903    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3904    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3905    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3906    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3907    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3908    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3909    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3910    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3911    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3912    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3913    "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3914    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3915    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3916    "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3917    "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3918    "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3919    "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3920};
3921
3922
3923/*
3924 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3925 */
3926
3927static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3928    const unsigned char* in, int *inlen) {
3929    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3930}
3931static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3932    const unsigned char* in, int *inlen) {
3933    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3934}
3935
3936static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3937    const unsigned char* in, int *inlen) {
3938    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3939}
3940static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3941    const unsigned char* in, int *inlen) {
3942    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3943}
3944
3945static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3946    const unsigned char* in, int *inlen) {
3947    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3948}
3949static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3950    const unsigned char* in, int *inlen) {
3951    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3952}
3953
3954static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3955    const unsigned char* in, int *inlen) {
3956    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3957}
3958static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3959    const unsigned char* in, int *inlen) {
3960    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3961}
3962
3963static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3964    const unsigned char* in, int *inlen) {
3965    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3966}
3967static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3968    const unsigned char* in, int *inlen) {
3969    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3970}
3971
3972static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3973    const unsigned char* in, int *inlen) {
3974    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3975}
3976static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3977    const unsigned char* in, int *inlen) {
3978    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3979}
3980
3981static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3982    const unsigned char* in, int *inlen) {
3983    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3984}
3985static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3986    const unsigned char* in, int *inlen) {
3987    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3988}
3989
3990static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3991    const unsigned char* in, int *inlen) {
3992    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3993}
3994static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3995    const unsigned char* in, int *inlen) {
3996    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3997}
3998
3999static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
4000    const unsigned char* in, int *inlen) {
4001    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
4002}
4003static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
4004    const unsigned char* in, int *inlen) {
4005    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
4006}
4007
4008static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
4009    const unsigned char* in, int *inlen) {
4010    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
4011}
4012static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
4013    const unsigned char* in, int *inlen) {
4014    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
4015}
4016
4017static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
4018    const unsigned char* in, int *inlen) {
4019    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
4020}
4021static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
4022    const unsigned char* in, int *inlen) {
4023    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
4024}
4025
4026static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
4027    const unsigned char* in, int *inlen) {
4028    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
4029}
4030static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
4031    const unsigned char* in, int *inlen) {
4032    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
4033}
4034
4035static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
4036    const unsigned char* in, int *inlen) {
4037    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
4038}
4039static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
4040    const unsigned char* in, int *inlen) {
4041    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
4042}
4043
4044static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
4045    const unsigned char* in, int *inlen) {
4046    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
4047}
4048static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
4049    const unsigned char* in, int *inlen) {
4050    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
4051}
4052
4053static void
4054xmlRegisterCharEncodingHandlersISO8859x (void) {
4055    xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
4056    xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
4057    xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
4058    xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
4059    xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
4060    xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
4061    xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
4062    xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
4063    xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
4064    xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
4065    xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
4066    xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
4067    xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
4068    xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
4069}
4070
4071#endif
4072#endif
4073
4074#define bottom_encoding
4075#include "elfgcchack.h"
4076