1/*
2 * encoding.c : implements the encoding conversion functions needed for XML
3 *
4 * Related specs:
5 * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6 * rfc2781        UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7 * [ISO-10646]    UTF-8 and UTF-16 in Annexes
8 * [ISO-8859-1]   ISO Latin-1 characters codes.
9 * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
10 *                Worldwide Character Encoding -- Version 1.0", Addison-
11 *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
12 *                described in Unicode Technical Report #4.
13 * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
14 *                Information Interchange, ANSI X3.4-1986.
15 *
16 * See Copyright for the status of this software.
17 *
18 * daniel@veillard.com
19 *
20 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
21 */
22
23#define IN_LIBXML
24#include "libxml.h"
25
26#include <string.h>
27
28#ifdef HAVE_CTYPE_H
29#include <ctype.h>
30#endif
31#ifdef HAVE_STDLIB_H
32#include <stdlib.h>
33#endif
34#ifdef LIBXML_ICONV_ENABLED
35#ifdef HAVE_ERRNO_H
36#include <errno.h>
37#endif
38#endif
39#include <libxml/encoding.h>
40#include <libxml/xmlmemory.h>
41#ifdef LIBXML_HTML_ENABLED
42#include <libxml/HTMLparser.h>
43#endif
44#include <libxml/globals.h>
45#include <libxml/xmlerror.h>
46
47static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
48static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
49
50typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
51typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
52struct _xmlCharEncodingAlias {
53    const char *name;
54    const char *alias;
55};
56
57static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
58static int xmlCharEncodingAliasesNb = 0;
59static int xmlCharEncodingAliasesMax = 0;
60
61#ifdef LIBXML_ICONV_ENABLED
62#if 0
63#define DEBUG_ENCODING  /* Define this to get encoding traces */
64#endif
65#else
66#ifdef LIBXML_ISO8859X_ENABLED
67static void xmlRegisterCharEncodingHandlersISO8859x (void);
68#endif
69#endif
70
71static int xmlLittleEndian = 1;
72
73/**
74 * xmlEncodingErrMemory:
75 * @extra:  extra informations
76 *
77 * Handle an out of memory condition
78 */
79static void
80xmlEncodingErrMemory(const char *extra)
81{
82    __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
83}
84
85/**
86 * xmlErrEncoding:
87 * @error:  the error number
88 * @msg:  the error message
89 *
90 * n encoding error
91 */
92static void
93xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
94{
95    __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
96                    XML_FROM_I18N, error, XML_ERR_FATAL,
97                    NULL, 0, val, NULL, NULL, 0, 0, msg, val);
98}
99
100/************************************************************************
101 *									*
102 *		Conversions To/From UTF8 encoding			*
103 *									*
104 ************************************************************************/
105
106/**
107 * asciiToUTF8:
108 * @out:  a pointer to an array of bytes to store the result
109 * @outlen:  the length of @out
110 * @in:  a pointer to an array of ASCII chars
111 * @inlen:  the length of @in
112 *
113 * Take a block of ASCII chars in and try to convert it to an UTF-8
114 * block of chars out.
115 * Returns 0 if success, or -1 otherwise
116 * The value of @inlen after return is the number of octets consumed
117 *     if the return value is positive, else unpredictable.
118 * The value of @outlen after return is the number of octets consumed.
119 */
120static int
121asciiToUTF8(unsigned char* out, int *outlen,
122              const unsigned char* in, int *inlen) {
123    unsigned char* outstart = out;
124    const unsigned char* base = in;
125    const unsigned char* processed = in;
126    unsigned char* outend = out + *outlen;
127    const unsigned char* inend;
128    unsigned int c;
129
130    inend = in + (*inlen);
131    while ((in < inend) && (out - outstart + 5 < *outlen)) {
132	c= *in++;
133
134        if (out >= outend)
135	    break;
136        if (c < 0x80) {
137	    *out++ = c;
138	} else {
139	    *outlen = out - outstart;
140	    *inlen = processed - base;
141	    return(-1);
142	}
143
144	processed = (const unsigned char*) in;
145    }
146    *outlen = out - outstart;
147    *inlen = processed - base;
148    return(*outlen);
149}
150
151#ifdef LIBXML_OUTPUT_ENABLED
152/**
153 * UTF8Toascii:
154 * @out:  a pointer to an array of bytes to store the result
155 * @outlen:  the length of @out
156 * @in:  a pointer to an array of UTF-8 chars
157 * @inlen:  the length of @in
158 *
159 * Take a block of UTF-8 chars in and try to convert it to an ASCII
160 * block of chars out.
161 *
162 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
163 * The value of @inlen after return is the number of octets consumed
164 *     if the return value is positive, else unpredictable.
165 * The value of @outlen after return is the number of octets consumed.
166 */
167static int
168UTF8Toascii(unsigned char* out, int *outlen,
169              const unsigned char* in, int *inlen) {
170    const unsigned char* processed = in;
171    const unsigned char* outend;
172    const unsigned char* outstart = out;
173    const unsigned char* instart = in;
174    const unsigned char* inend;
175    unsigned int c, d;
176    int trailing;
177
178    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
179    if (in == NULL) {
180        /*
181	 * initialization nothing to do
182	 */
183	*outlen = 0;
184	*inlen = 0;
185	return(0);
186    }
187    inend = in + (*inlen);
188    outend = out + (*outlen);
189    while (in < inend) {
190	d = *in++;
191	if      (d < 0x80)  { c= d; trailing= 0; }
192	else if (d < 0xC0) {
193	    /* trailing byte in leading position */
194	    *outlen = out - outstart;
195	    *inlen = processed - instart;
196	    return(-2);
197        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
198        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
199        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
200	else {
201	    /* no chance for this in Ascii */
202	    *outlen = out - outstart;
203	    *inlen = processed - instart;
204	    return(-2);
205	}
206
207	if (inend - in < trailing) {
208	    break;
209	}
210
211	for ( ; trailing; trailing--) {
212	    if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
213		break;
214	    c <<= 6;
215	    c |= d & 0x3F;
216	}
217
218	/* assertion: c is a single UTF-4 value */
219	if (c < 0x80) {
220	    if (out >= outend)
221		break;
222	    *out++ = c;
223	} else {
224	    /* no chance for this in Ascii */
225	    *outlen = out - outstart;
226	    *inlen = processed - instart;
227	    return(-2);
228	}
229	processed = in;
230    }
231    *outlen = out - outstart;
232    *inlen = processed - instart;
233    return(*outlen);
234}
235#endif /* LIBXML_OUTPUT_ENABLED */
236
237/**
238 * isolat1ToUTF8:
239 * @out:  a pointer to an array of bytes to store the result
240 * @outlen:  the length of @out
241 * @in:  a pointer to an array of ISO Latin 1 chars
242 * @inlen:  the length of @in
243 *
244 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
245 * block of chars out.
246 * Returns the number of bytes written if success, or -1 otherwise
247 * The value of @inlen after return is the number of octets consumed
248 *     if the return value is positive, else unpredictable.
249 * The value of @outlen after return is the number of octets consumed.
250 */
251int
252isolat1ToUTF8(unsigned char* out, int *outlen,
253              const unsigned char* in, int *inlen) {
254    unsigned char* outstart = out;
255    const unsigned char* base = in;
256    unsigned char* outend;
257    const unsigned char* inend;
258    const unsigned char* instop;
259
260    if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
261	return(-1);
262
263    outend = out + *outlen;
264    inend = in + (*inlen);
265    instop = inend;
266
267    while (in < inend && out < outend - 1) {
268    	if (*in >= 0x80) {
269	    *out++ = (((*in) >>  6) & 0x1F) | 0xC0;
270        *out++ = ((*in) & 0x3F) | 0x80;
271	    ++in;
272	}
273	if (instop - in > outend - out) instop = in + (outend - out);
274	while (in < instop && *in < 0x80) {
275	    *out++ = *in++;
276	}
277    }
278    if (in < inend && out < outend && *in < 0x80) {
279        *out++ = *in++;
280    }
281    *outlen = out - outstart;
282    *inlen = in - base;
283    return(*outlen);
284}
285
286/**
287 * UTF8ToUTF8:
288 * @out:  a pointer to an array of bytes to store the result
289 * @outlen:  the length of @out
290 * @inb:  a pointer to an array of UTF-8 chars
291 * @inlenb:  the length of @in in UTF-8 chars
292 *
293 * No op copy operation for UTF8 handling.
294 *
295 * Returns the number of bytes written, or -1 if lack of space.
296 *     The value of *inlen after return is the number of octets consumed
297 *     if the return value is positive, else unpredictable.
298 */
299static int
300UTF8ToUTF8(unsigned char* out, int *outlen,
301           const unsigned char* inb, int *inlenb)
302{
303    int len;
304
305    if ((out == NULL) || (inb == NULL) || (outlen == NULL) || (inlenb == NULL))
306	return(-1);
307    if (*outlen > *inlenb) {
308	len = *inlenb;
309    } else {
310	len = *outlen;
311    }
312    if (len < 0)
313	return(-1);
314
315    memcpy(out, inb, len);
316
317    *outlen = len;
318    *inlenb = len;
319    return(*outlen);
320}
321
322
323#ifdef LIBXML_OUTPUT_ENABLED
324/**
325 * UTF8Toisolat1:
326 * @out:  a pointer to an array of bytes to store the result
327 * @outlen:  the length of @out
328 * @in:  a pointer to an array of UTF-8 chars
329 * @inlen:  the length of @in
330 *
331 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
332 * block of chars out.
333 *
334 * Returns the number of bytes written if success, -2 if the transcoding fails,
335           or -1 otherwise
336 * The value of @inlen after return is the number of octets consumed
337 *     if the return value is positive, else unpredictable.
338 * The value of @outlen after return is the number of octets consumed.
339 */
340int
341UTF8Toisolat1(unsigned char* out, int *outlen,
342              const unsigned char* in, int *inlen) {
343    const unsigned char* processed = in;
344    const unsigned char* outend;
345    const unsigned char* outstart = out;
346    const unsigned char* instart = in;
347    const unsigned char* inend;
348    unsigned int c, d;
349    int trailing;
350
351    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
352    if (in == NULL) {
353        /*
354	 * initialization nothing to do
355	 */
356	*outlen = 0;
357	*inlen = 0;
358	return(0);
359    }
360    inend = in + (*inlen);
361    outend = out + (*outlen);
362    while (in < inend) {
363	d = *in++;
364	if      (d < 0x80)  { c= d; trailing= 0; }
365	else if (d < 0xC0) {
366	    /* trailing byte in leading position */
367	    *outlen = out - outstart;
368	    *inlen = processed - instart;
369	    return(-2);
370        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
371        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
372        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
373	else {
374	    /* no chance for this in IsoLat1 */
375	    *outlen = out - outstart;
376	    *inlen = processed - instart;
377	    return(-2);
378	}
379
380	if (inend - in < trailing) {
381	    break;
382	}
383
384	for ( ; trailing; trailing--) {
385	    if (in >= inend)
386		break;
387	    if (((d= *in++) & 0xC0) != 0x80) {
388		*outlen = out - outstart;
389		*inlen = processed - instart;
390		return(-2);
391	    }
392	    c <<= 6;
393	    c |= d & 0x3F;
394	}
395
396	/* assertion: c is a single UTF-4 value */
397	if (c <= 0xFF) {
398	    if (out >= outend)
399		break;
400	    *out++ = c;
401	} else {
402	    /* no chance for this in IsoLat1 */
403	    *outlen = out - outstart;
404	    *inlen = processed - instart;
405	    return(-2);
406	}
407	processed = in;
408    }
409    *outlen = out - outstart;
410    *inlen = processed - instart;
411    return(*outlen);
412}
413#endif /* LIBXML_OUTPUT_ENABLED */
414
415/**
416 * UTF16LEToUTF8:
417 * @out:  a pointer to an array of bytes to store the result
418 * @outlen:  the length of @out
419 * @inb:  a pointer to an array of UTF-16LE passwd as a byte array
420 * @inlenb:  the length of @in in UTF-16LE chars
421 *
422 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
423 * block of chars out. This function assumes the endian property
424 * is the same between the native type of this machine and the
425 * inputed one.
426 *
427 * Returns the number of bytes written, or -1 if lack of space, or -2
428 *     if the transcoding fails (if *in is not a valid utf16 string)
429 *     The value of *inlen after return is the number of octets consumed
430 *     if the return value is positive, else unpredictable.
431 */
432static int
433UTF16LEToUTF8(unsigned char* out, int *outlen,
434            const unsigned char* inb, int *inlenb)
435{
436    unsigned char* outstart = out;
437    const unsigned char* processed = inb;
438    unsigned char* outend = out + *outlen;
439    unsigned short* in = (unsigned short*) inb;
440    unsigned short* inend;
441    unsigned int c, d, inlen;
442    unsigned char *tmp;
443    int bits;
444
445    if ((*inlenb % 2) == 1)
446        (*inlenb)--;
447    inlen = *inlenb / 2;
448    inend = in + inlen;
449    while ((in < inend) && (out - outstart + 5 < *outlen)) {
450        if (xmlLittleEndian) {
451	    c= *in++;
452	} else {
453	    tmp = (unsigned char *) in;
454	    c = *tmp++;
455	    c = c | (((unsigned int)*tmp) << 8);
456	    in++;
457	}
458        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
459	    if (in >= inend) {           /* (in > inend) shouldn't happens */
460		break;
461	    }
462	    if (xmlLittleEndian) {
463		d = *in++;
464	    } else {
465		tmp = (unsigned char *) in;
466		d = *tmp++;
467		d = d | (((unsigned int)*tmp) << 8);
468		in++;
469	    }
470            if ((d & 0xFC00) == 0xDC00) {
471                c &= 0x03FF;
472                c <<= 10;
473                c |= d & 0x03FF;
474                c += 0x10000;
475            }
476            else {
477		*outlen = out - outstart;
478		*inlenb = processed - inb;
479	        return(-2);
480	    }
481        }
482
483	/* assertion: c is a single UTF-4 value */
484        if (out >= outend)
485	    break;
486        if      (c <    0x80) {  *out++=  c;                bits= -6; }
487        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
488        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
489        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
490
491        for ( ; bits >= 0; bits-= 6) {
492            if (out >= outend)
493	        break;
494            *out++= ((c >> bits) & 0x3F) | 0x80;
495        }
496	processed = (const unsigned char*) in;
497    }
498    *outlen = out - outstart;
499    *inlenb = processed - inb;
500    return(*outlen);
501}
502
503#ifdef LIBXML_OUTPUT_ENABLED
504/**
505 * UTF8ToUTF16LE:
506 * @outb:  a pointer to an array of bytes to store the result
507 * @outlen:  the length of @outb
508 * @in:  a pointer to an array of UTF-8 chars
509 * @inlen:  the length of @in
510 *
511 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
512 * block of chars out.
513 *
514 * Returns the number of bytes written, or -1 if lack of space, or -2
515 *     if the transcoding failed.
516 */
517static int
518UTF8ToUTF16LE(unsigned char* outb, int *outlen,
519            const unsigned char* in, int *inlen)
520{
521    unsigned short* out = (unsigned short*) outb;
522    const unsigned char* processed = in;
523    const unsigned char *const instart = in;
524    unsigned short* outstart= out;
525    unsigned short* outend;
526    const unsigned char* inend;
527    unsigned int c, d;
528    int trailing;
529    unsigned char *tmp;
530    unsigned short tmp1, tmp2;
531
532    /* UTF16LE encoding has no BOM */
533    if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
534    if (in == NULL) {
535	*outlen = 0;
536	*inlen = 0;
537	return(0);
538    }
539    inend= in + *inlen;
540    outend = out + (*outlen / 2);
541    while (in < inend) {
542      d= *in++;
543      if      (d < 0x80)  { c= d; trailing= 0; }
544      else if (d < 0xC0) {
545          /* trailing byte in leading position */
546	  *outlen = (out - outstart) * 2;
547	  *inlen = processed - instart;
548	  return(-2);
549      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
550      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
551      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
552      else {
553	/* no chance for this in UTF-16 */
554	*outlen = (out - outstart) * 2;
555	*inlen = processed - instart;
556	return(-2);
557      }
558
559      if (inend - in < trailing) {
560          break;
561      }
562
563      for ( ; trailing; trailing--) {
564          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
565	      break;
566          c <<= 6;
567          c |= d & 0x3F;
568      }
569
570      /* assertion: c is a single UTF-4 value */
571        if (c < 0x10000) {
572            if (out >= outend)
573	        break;
574	    if (xmlLittleEndian) {
575		*out++ = c;
576	    } else {
577		tmp = (unsigned char *) out;
578		*tmp = c ;
579		*(tmp + 1) = c >> 8 ;
580		out++;
581	    }
582        }
583        else if (c < 0x110000) {
584            if (out+1 >= outend)
585	        break;
586            c -= 0x10000;
587	    if (xmlLittleEndian) {
588		*out++ = 0xD800 | (c >> 10);
589		*out++ = 0xDC00 | (c & 0x03FF);
590	    } else {
591		tmp1 = 0xD800 | (c >> 10);
592		tmp = (unsigned char *) out;
593		*tmp = (unsigned char) tmp1;
594		*(tmp + 1) = tmp1 >> 8;
595		out++;
596
597		tmp2 = 0xDC00 | (c & 0x03FF);
598		tmp = (unsigned char *) out;
599		*tmp  = (unsigned char) tmp2;
600		*(tmp + 1) = tmp2 >> 8;
601		out++;
602	    }
603        }
604        else
605	    break;
606	processed = in;
607    }
608    *outlen = (out - outstart) * 2;
609    *inlen = processed - instart;
610    return(*outlen);
611}
612
613/**
614 * UTF8ToUTF16:
615 * @outb:  a pointer to an array of bytes to store the result
616 * @outlen:  the length of @outb
617 * @in:  a pointer to an array of UTF-8 chars
618 * @inlen:  the length of @in
619 *
620 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
621 * block of chars out.
622 *
623 * Returns the number of bytes written, or -1 if lack of space, or -2
624 *     if the transcoding failed.
625 */
626static int
627UTF8ToUTF16(unsigned char* outb, int *outlen,
628            const unsigned char* in, int *inlen)
629{
630    if (in == NULL) {
631	/*
632	 * initialization, add the Byte Order Mark for UTF-16LE
633	 */
634        if (*outlen >= 2) {
635	    outb[0] = 0xFF;
636	    outb[1] = 0xFE;
637	    *outlen = 2;
638	    *inlen = 0;
639#ifdef DEBUG_ENCODING
640            xmlGenericError(xmlGenericErrorContext,
641		    "Added FFFE Byte Order Mark\n");
642#endif
643	    return(2);
644	}
645	*outlen = 0;
646	*inlen = 0;
647	return(0);
648    }
649    return (UTF8ToUTF16LE(outb, outlen, in, inlen));
650}
651#endif /* LIBXML_OUTPUT_ENABLED */
652
653/**
654 * UTF16BEToUTF8:
655 * @out:  a pointer to an array of bytes to store the result
656 * @outlen:  the length of @out
657 * @inb:  a pointer to an array of UTF-16 passed as a byte array
658 * @inlenb:  the length of @in in UTF-16 chars
659 *
660 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
661 * block of chars out. This function assumes the endian property
662 * is the same between the native type of this machine and the
663 * inputed one.
664 *
665 * Returns the number of bytes written, or -1 if lack of space, or -2
666 *     if the transcoding fails (if *in is not a valid utf16 string)
667 * The value of *inlen after return is the number of octets consumed
668 *     if the return value is positive, else unpredictable.
669 */
670static int
671UTF16BEToUTF8(unsigned char* out, int *outlen,
672            const unsigned char* inb, int *inlenb)
673{
674    unsigned char* outstart = out;
675    const unsigned char* processed = inb;
676    unsigned char* outend = out + *outlen;
677    unsigned short* in = (unsigned short*) inb;
678    unsigned short* inend;
679    unsigned int c, d, inlen;
680    unsigned char *tmp;
681    int bits;
682
683    if ((*inlenb % 2) == 1)
684        (*inlenb)--;
685    inlen = *inlenb / 2;
686    inend= in + inlen;
687    while (in < inend) {
688	if (xmlLittleEndian) {
689	    tmp = (unsigned char *) in;
690	    c = *tmp++;
691	    c = c << 8;
692	    c = c | (unsigned int) *tmp;
693	    in++;
694	} else {
695	    c= *in++;
696	}
697        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
698	    if (in >= inend) {           /* (in > inend) shouldn't happens */
699		*outlen = out - outstart;
700		*inlenb = processed - inb;
701	        return(-2);
702	    }
703	    if (xmlLittleEndian) {
704		tmp = (unsigned char *) in;
705		d = *tmp++;
706		d = d << 8;
707		d = d | (unsigned int) *tmp;
708		in++;
709	    } else {
710		d= *in++;
711	    }
712            if ((d & 0xFC00) == 0xDC00) {
713                c &= 0x03FF;
714                c <<= 10;
715                c |= d & 0x03FF;
716                c += 0x10000;
717            }
718            else {
719		*outlen = out - outstart;
720		*inlenb = processed - inb;
721	        return(-2);
722	    }
723        }
724
725	/* assertion: c is a single UTF-4 value */
726        if (out >= outend)
727	    break;
728        if      (c <    0x80) {  *out++=  c;                bits= -6; }
729        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
730        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
731        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
732
733        for ( ; bits >= 0; bits-= 6) {
734            if (out >= outend)
735	        break;
736            *out++= ((c >> bits) & 0x3F) | 0x80;
737        }
738	processed = (const unsigned char*) in;
739    }
740    *outlen = out - outstart;
741    *inlenb = processed - inb;
742    return(*outlen);
743}
744
745#ifdef LIBXML_OUTPUT_ENABLED
746/**
747 * UTF8ToUTF16BE:
748 * @outb:  a pointer to an array of bytes to store the result
749 * @outlen:  the length of @outb
750 * @in:  a pointer to an array of UTF-8 chars
751 * @inlen:  the length of @in
752 *
753 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
754 * block of chars out.
755 *
756 * Returns the number of byte written, or -1 by lack of space, or -2
757 *     if the transcoding failed.
758 */
759static int
760UTF8ToUTF16BE(unsigned char* outb, int *outlen,
761            const unsigned char* in, int *inlen)
762{
763    unsigned short* out = (unsigned short*) outb;
764    const unsigned char* processed = in;
765    const unsigned char *const instart = in;
766    unsigned short* outstart= out;
767    unsigned short* outend;
768    const unsigned char* inend;
769    unsigned int c, d;
770    int trailing;
771    unsigned char *tmp;
772    unsigned short tmp1, tmp2;
773
774    /* UTF-16BE has no BOM */
775    if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
776    if (in == NULL) {
777	*outlen = 0;
778	*inlen = 0;
779	return(0);
780    }
781    inend= in + *inlen;
782    outend = out + (*outlen / 2);
783    while (in < inend) {
784      d= *in++;
785      if      (d < 0x80)  { c= d; trailing= 0; }
786      else if (d < 0xC0)  {
787          /* trailing byte in leading position */
788	  *outlen = out - outstart;
789	  *inlen = processed - instart;
790	  return(-2);
791      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
792      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
793      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
794      else {
795          /* no chance for this in UTF-16 */
796	  *outlen = out - outstart;
797	  *inlen = processed - instart;
798	  return(-2);
799      }
800
801      if (inend - in < trailing) {
802          break;
803      }
804
805      for ( ; trailing; trailing--) {
806          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
807          c <<= 6;
808          c |= d & 0x3F;
809      }
810
811      /* assertion: c is a single UTF-4 value */
812        if (c < 0x10000) {
813            if (out >= outend)  break;
814	    if (xmlLittleEndian) {
815		tmp = (unsigned char *) out;
816		*tmp = c >> 8;
817		*(tmp + 1) = c;
818		out++;
819	    } else {
820		*out++ = c;
821	    }
822        }
823        else if (c < 0x110000) {
824            if (out+1 >= outend)  break;
825            c -= 0x10000;
826	    if (xmlLittleEndian) {
827		tmp1 = 0xD800 | (c >> 10);
828		tmp = (unsigned char *) out;
829		*tmp = tmp1 >> 8;
830		*(tmp + 1) = (unsigned char) tmp1;
831		out++;
832
833		tmp2 = 0xDC00 | (c & 0x03FF);
834		tmp = (unsigned char *) out;
835		*tmp = tmp2 >> 8;
836		*(tmp + 1) = (unsigned char) tmp2;
837		out++;
838	    } else {
839		*out++ = 0xD800 | (c >> 10);
840		*out++ = 0xDC00 | (c & 0x03FF);
841	    }
842        }
843        else
844	    break;
845	processed = in;
846    }
847    *outlen = (out - outstart) * 2;
848    *inlen = processed - instart;
849    return(*outlen);
850}
851#endif /* LIBXML_OUTPUT_ENABLED */
852
853/************************************************************************
854 *									*
855 *		Generic encoding handling routines			*
856 *									*
857 ************************************************************************/
858
859/**
860 * xmlDetectCharEncoding:
861 * @in:  a pointer to the first bytes of the XML entity, must be at least
862 *       2 bytes long (at least 4 if encoding is UTF4 variant).
863 * @len:  pointer to the length of the buffer
864 *
865 * Guess the encoding of the entity using the first bytes of the entity content
866 * according to the non-normative appendix F of the XML-1.0 recommendation.
867 *
868 * Returns one of the XML_CHAR_ENCODING_... values.
869 */
870xmlCharEncoding
871xmlDetectCharEncoding(const unsigned char* in, int len)
872{
873    if (in == NULL)
874        return(XML_CHAR_ENCODING_NONE);
875    if (len >= 4) {
876	if ((in[0] == 0x00) && (in[1] == 0x00) &&
877	    (in[2] == 0x00) && (in[3] == 0x3C))
878	    return(XML_CHAR_ENCODING_UCS4BE);
879	if ((in[0] == 0x3C) && (in[1] == 0x00) &&
880	    (in[2] == 0x00) && (in[3] == 0x00))
881	    return(XML_CHAR_ENCODING_UCS4LE);
882	if ((in[0] == 0x00) && (in[1] == 0x00) &&
883	    (in[2] == 0x3C) && (in[3] == 0x00))
884	    return(XML_CHAR_ENCODING_UCS4_2143);
885	if ((in[0] == 0x00) && (in[1] == 0x3C) &&
886	    (in[2] == 0x00) && (in[3] == 0x00))
887	    return(XML_CHAR_ENCODING_UCS4_3412);
888	if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
889	    (in[2] == 0xA7) && (in[3] == 0x94))
890	    return(XML_CHAR_ENCODING_EBCDIC);
891	if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
892	    (in[2] == 0x78) && (in[3] == 0x6D))
893	    return(XML_CHAR_ENCODING_UTF8);
894	/*
895	 * Although not part of the recommendation, we also
896	 * attempt an "auto-recognition" of UTF-16LE and
897	 * UTF-16BE encodings.
898	 */
899	if ((in[0] == 0x3C) && (in[1] == 0x00) &&
900	    (in[2] == 0x3F) && (in[3] == 0x00))
901	    return(XML_CHAR_ENCODING_UTF16LE);
902	if ((in[0] == 0x00) && (in[1] == 0x3C) &&
903	    (in[2] == 0x00) && (in[3] == 0x3F))
904	    return(XML_CHAR_ENCODING_UTF16BE);
905    }
906    if (len >= 3) {
907	/*
908	 * Errata on XML-1.0 June 20 2001
909	 * We now allow an UTF8 encoded BOM
910	 */
911	if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
912	    (in[2] == 0xBF))
913	    return(XML_CHAR_ENCODING_UTF8);
914    }
915    /* For UTF-16 we can recognize by the BOM */
916    if (len >= 2) {
917	if ((in[0] == 0xFE) && (in[1] == 0xFF))
918	    return(XML_CHAR_ENCODING_UTF16BE);
919	if ((in[0] == 0xFF) && (in[1] == 0xFE))
920	    return(XML_CHAR_ENCODING_UTF16LE);
921    }
922    return(XML_CHAR_ENCODING_NONE);
923}
924
925/**
926 * xmlCleanupEncodingAliases:
927 *
928 * Unregisters all aliases
929 */
930void
931xmlCleanupEncodingAliases(void) {
932    int i;
933
934    if (xmlCharEncodingAliases == NULL)
935	return;
936
937    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
938	if (xmlCharEncodingAliases[i].name != NULL)
939	    xmlFree((char *) xmlCharEncodingAliases[i].name);
940	if (xmlCharEncodingAliases[i].alias != NULL)
941	    xmlFree((char *) xmlCharEncodingAliases[i].alias);
942    }
943    xmlCharEncodingAliasesNb = 0;
944    xmlCharEncodingAliasesMax = 0;
945    xmlFree(xmlCharEncodingAliases);
946    xmlCharEncodingAliases = NULL;
947}
948
949/**
950 * xmlGetEncodingAlias:
951 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
952 *
953 * Lookup an encoding name for the given alias.
954 *
955 * Returns NULL if not found, otherwise the original name
956 */
957const char *
958xmlGetEncodingAlias(const char *alias) {
959    int i;
960    char upper[100];
961
962    if (alias == NULL)
963	return(NULL);
964
965    if (xmlCharEncodingAliases == NULL)
966	return(NULL);
967
968    for (i = 0;i < 99;i++) {
969        upper[i] = toupper(alias[i]);
970	if (upper[i] == 0) break;
971    }
972    upper[i] = 0;
973
974    /*
975     * Walk down the list looking for a definition of the alias
976     */
977    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
978	if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
979	    return(xmlCharEncodingAliases[i].name);
980	}
981    }
982    return(NULL);
983}
984
985/**
986 * xmlAddEncodingAlias:
987 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
988 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
989 *
990 * Registers an alias @alias for an encoding named @name. Existing alias
991 * will be overwritten.
992 *
993 * Returns 0 in case of success, -1 in case of error
994 */
995int
996xmlAddEncodingAlias(const char *name, const char *alias) {
997    int i;
998    char upper[100];
999
1000    if ((name == NULL) || (alias == NULL))
1001	return(-1);
1002
1003    for (i = 0;i < 99;i++) {
1004        upper[i] = toupper(alias[i]);
1005	if (upper[i] == 0) break;
1006    }
1007    upper[i] = 0;
1008
1009    if (xmlCharEncodingAliases == NULL) {
1010	xmlCharEncodingAliasesNb = 0;
1011	xmlCharEncodingAliasesMax = 20;
1012	xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1013	      xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1014	if (xmlCharEncodingAliases == NULL)
1015	    return(-1);
1016    } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1017	xmlCharEncodingAliasesMax *= 2;
1018	xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1019	      xmlRealloc(xmlCharEncodingAliases,
1020		         xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1021    }
1022    /*
1023     * Walk down the list looking for a definition of the alias
1024     */
1025    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1026	if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1027	    /*
1028	     * Replace the definition.
1029	     */
1030	    xmlFree((char *) xmlCharEncodingAliases[i].name);
1031	    xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1032	    return(0);
1033	}
1034    }
1035    /*
1036     * Add the definition
1037     */
1038    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1039    xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1040    xmlCharEncodingAliasesNb++;
1041    return(0);
1042}
1043
1044/**
1045 * xmlDelEncodingAlias:
1046 * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1047 *
1048 * Unregisters an encoding alias @alias
1049 *
1050 * Returns 0 in case of success, -1 in case of error
1051 */
1052int
1053xmlDelEncodingAlias(const char *alias) {
1054    int i;
1055
1056    if (alias == NULL)
1057	return(-1);
1058
1059    if (xmlCharEncodingAliases == NULL)
1060	return(-1);
1061    /*
1062     * Walk down the list looking for a definition of the alias
1063     */
1064    for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1065	if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1066	    xmlFree((char *) xmlCharEncodingAliases[i].name);
1067	    xmlFree((char *) xmlCharEncodingAliases[i].alias);
1068	    xmlCharEncodingAliasesNb--;
1069	    memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1070		    sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1071	    return(0);
1072	}
1073    }
1074    return(-1);
1075}
1076
1077/**
1078 * xmlParseCharEncoding:
1079 * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1080 *
1081 * Compare the string to the encoding schemes already known. Note
1082 * that the comparison is case insensitive accordingly to the section
1083 * [XML] 4.3.3 Character Encoding in Entities.
1084 *
1085 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1086 * if not recognized.
1087 */
1088xmlCharEncoding
1089xmlParseCharEncoding(const char* name)
1090{
1091    const char *alias;
1092    char upper[500];
1093    int i;
1094
1095    if (name == NULL)
1096	return(XML_CHAR_ENCODING_NONE);
1097
1098    /*
1099     * Do the alias resolution
1100     */
1101    alias = xmlGetEncodingAlias(name);
1102    if (alias != NULL)
1103	name = alias;
1104
1105    for (i = 0;i < 499;i++) {
1106        upper[i] = toupper(name[i]);
1107	if (upper[i] == 0) break;
1108    }
1109    upper[i] = 0;
1110
1111    if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1112    if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1113    if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1114
1115    /*
1116     * NOTE: if we were able to parse this, the endianness of UTF16 is
1117     *       already found and in use
1118     */
1119    if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1120    if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1121
1122    if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1123    if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1124    if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1125
1126    /*
1127     * NOTE: if we were able to parse this, the endianness of UCS4 is
1128     *       already found and in use
1129     */
1130    if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1131    if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1132    if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1133
1134
1135    if (!strcmp(upper,  "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1136    if (!strcmp(upper,  "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1137    if (!strcmp(upper,  "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1138
1139    if (!strcmp(upper,  "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1140    if (!strcmp(upper,  "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1141    if (!strcmp(upper,  "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1142
1143    if (!strcmp(upper,  "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1144    if (!strcmp(upper,  "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1145    if (!strcmp(upper,  "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1146    if (!strcmp(upper,  "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1147    if (!strcmp(upper,  "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1148    if (!strcmp(upper,  "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1149    if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1150
1151    if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1152    if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1153    if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1154
1155#ifdef DEBUG_ENCODING
1156    xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1157#endif
1158    return(XML_CHAR_ENCODING_ERROR);
1159}
1160
1161/**
1162 * xmlGetCharEncodingName:
1163 * @enc:  the encoding
1164 *
1165 * The "canonical" name for XML encoding.
1166 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1167 * Section 4.3.3  Character Encoding in Entities
1168 *
1169 * Returns the canonical name for the given encoding
1170 */
1171
1172const char*
1173xmlGetCharEncodingName(xmlCharEncoding enc) {
1174    switch (enc) {
1175        case XML_CHAR_ENCODING_ERROR:
1176	    return(NULL);
1177        case XML_CHAR_ENCODING_NONE:
1178	    return(NULL);
1179        case XML_CHAR_ENCODING_UTF8:
1180	    return("UTF-8");
1181        case XML_CHAR_ENCODING_UTF16LE:
1182	    return("UTF-16");
1183        case XML_CHAR_ENCODING_UTF16BE:
1184	    return("UTF-16");
1185        case XML_CHAR_ENCODING_EBCDIC:
1186            return("EBCDIC");
1187        case XML_CHAR_ENCODING_UCS4LE:
1188            return("ISO-10646-UCS-4");
1189        case XML_CHAR_ENCODING_UCS4BE:
1190            return("ISO-10646-UCS-4");
1191        case XML_CHAR_ENCODING_UCS4_2143:
1192            return("ISO-10646-UCS-4");
1193        case XML_CHAR_ENCODING_UCS4_3412:
1194            return("ISO-10646-UCS-4");
1195        case XML_CHAR_ENCODING_UCS2:
1196            return("ISO-10646-UCS-2");
1197        case XML_CHAR_ENCODING_8859_1:
1198	    return("ISO-8859-1");
1199        case XML_CHAR_ENCODING_8859_2:
1200	    return("ISO-8859-2");
1201        case XML_CHAR_ENCODING_8859_3:
1202	    return("ISO-8859-3");
1203        case XML_CHAR_ENCODING_8859_4:
1204	    return("ISO-8859-4");
1205        case XML_CHAR_ENCODING_8859_5:
1206	    return("ISO-8859-5");
1207        case XML_CHAR_ENCODING_8859_6:
1208	    return("ISO-8859-6");
1209        case XML_CHAR_ENCODING_8859_7:
1210	    return("ISO-8859-7");
1211        case XML_CHAR_ENCODING_8859_8:
1212	    return("ISO-8859-8");
1213        case XML_CHAR_ENCODING_8859_9:
1214	    return("ISO-8859-9");
1215        case XML_CHAR_ENCODING_2022_JP:
1216            return("ISO-2022-JP");
1217        case XML_CHAR_ENCODING_SHIFT_JIS:
1218            return("Shift-JIS");
1219        case XML_CHAR_ENCODING_EUC_JP:
1220            return("EUC-JP");
1221	case XML_CHAR_ENCODING_ASCII:
1222	    return(NULL);
1223    }
1224    return(NULL);
1225}
1226
1227/************************************************************************
1228 *									*
1229 *			Char encoding handlers				*
1230 *									*
1231 ************************************************************************/
1232
1233
1234/* the size should be growable, but it's not a big deal ... */
1235#define MAX_ENCODING_HANDLERS 50
1236static xmlCharEncodingHandlerPtr *handlers = NULL;
1237static int nbCharEncodingHandler = 0;
1238
1239/*
1240 * The default is UTF-8 for XML, that's also the default used for the
1241 * parser internals, so the default encoding handler is NULL
1242 */
1243
1244static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1245
1246/**
1247 * xmlNewCharEncodingHandler:
1248 * @name:  the encoding name, in UTF-8 format (ASCII actually)
1249 * @input:  the xmlCharEncodingInputFunc to read that encoding
1250 * @output:  the xmlCharEncodingOutputFunc to write that encoding
1251 *
1252 * Create and registers an xmlCharEncodingHandler.
1253 *
1254 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1255 */
1256xmlCharEncodingHandlerPtr
1257xmlNewCharEncodingHandler(const char *name,
1258                          xmlCharEncodingInputFunc input,
1259                          xmlCharEncodingOutputFunc output) {
1260    xmlCharEncodingHandlerPtr handler;
1261    const char *alias;
1262    char upper[500];
1263    int i;
1264    char *up = NULL;
1265
1266    /*
1267     * Do the alias resolution
1268     */
1269    alias = xmlGetEncodingAlias(name);
1270    if (alias != NULL)
1271	name = alias;
1272
1273    /*
1274     * Keep only the uppercase version of the encoding.
1275     */
1276    if (name == NULL) {
1277        xmlEncodingErr(XML_I18N_NO_NAME,
1278		       "xmlNewCharEncodingHandler : no name !\n", NULL);
1279	return(NULL);
1280    }
1281    for (i = 0;i < 499;i++) {
1282        upper[i] = toupper(name[i]);
1283	if (upper[i] == 0) break;
1284    }
1285    upper[i] = 0;
1286    up = xmlMemStrdup(upper);
1287    if (up == NULL) {
1288        xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1289	return(NULL);
1290    }
1291
1292    /*
1293     * allocate and fill-up an handler block.
1294     */
1295    handler = (xmlCharEncodingHandlerPtr)
1296              xmlMalloc(sizeof(xmlCharEncodingHandler));
1297    if (handler == NULL) {
1298        xmlFree(up);
1299        xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1300	return(NULL);
1301    }
1302    handler->input = input;
1303    handler->output = output;
1304    handler->name = up;
1305
1306#ifdef LIBXML_ICONV_ENABLED
1307    handler->iconv_in = NULL;
1308    handler->iconv_out = NULL;
1309#endif /* LIBXML_ICONV_ENABLED */
1310
1311    /*
1312     * registers and returns the handler.
1313     */
1314    xmlRegisterCharEncodingHandler(handler);
1315#ifdef DEBUG_ENCODING
1316    xmlGenericError(xmlGenericErrorContext,
1317	    "Registered encoding handler for %s\n", name);
1318#endif
1319    return(handler);
1320}
1321
1322/**
1323 * xmlInitCharEncodingHandlers:
1324 *
1325 * Initialize the char encoding support, it registers the default
1326 * encoding supported.
1327 * NOTE: while public, this function usually doesn't need to be called
1328 *       in normal processing.
1329 */
1330void
1331xmlInitCharEncodingHandlers(void) {
1332    unsigned short int tst = 0x1234;
1333    unsigned char *ptr = (unsigned char *) &tst;
1334
1335    if (handlers != NULL) return;
1336
1337    handlers = (xmlCharEncodingHandlerPtr *)
1338        xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1339
1340    if (*ptr == 0x12) xmlLittleEndian = 0;
1341    else if (*ptr == 0x34) xmlLittleEndian = 1;
1342    else {
1343        xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1344	               "Odd problem at endianness detection\n", NULL);
1345    }
1346
1347    if (handlers == NULL) {
1348        xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
1349	return;
1350    }
1351    xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
1352#ifdef LIBXML_OUTPUT_ENABLED
1353    xmlUTF16LEHandler =
1354          xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1355    xmlUTF16BEHandler =
1356          xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
1357    xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
1358    xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1359    xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
1360    xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
1361#ifdef LIBXML_HTML_ENABLED
1362    xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1363#endif
1364#else
1365    xmlUTF16LEHandler =
1366          xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1367    xmlUTF16BEHandler =
1368          xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
1369    xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
1370    xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1371    xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1372    xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1373#endif /* LIBXML_OUTPUT_ENABLED */
1374#ifndef LIBXML_ICONV_ENABLED
1375#ifdef LIBXML_ISO8859X_ENABLED
1376    xmlRegisterCharEncodingHandlersISO8859x ();
1377#endif
1378#endif
1379
1380}
1381
1382/**
1383 * xmlCleanupCharEncodingHandlers:
1384 *
1385 * Cleanup the memory allocated for the char encoding support, it
1386 * unregisters all the encoding handlers and the aliases.
1387 */
1388void
1389xmlCleanupCharEncodingHandlers(void) {
1390    xmlCleanupEncodingAliases();
1391
1392    if (handlers == NULL) return;
1393
1394    for (;nbCharEncodingHandler > 0;) {
1395        nbCharEncodingHandler--;
1396	if (handlers[nbCharEncodingHandler] != NULL) {
1397	    if (handlers[nbCharEncodingHandler]->name != NULL)
1398		xmlFree(handlers[nbCharEncodingHandler]->name);
1399	    xmlFree(handlers[nbCharEncodingHandler]);
1400	}
1401    }
1402    xmlFree(handlers);
1403    handlers = NULL;
1404    nbCharEncodingHandler = 0;
1405    xmlDefaultCharEncodingHandler = NULL;
1406}
1407
1408/**
1409 * xmlRegisterCharEncodingHandler:
1410 * @handler:  the xmlCharEncodingHandlerPtr handler block
1411 *
1412 * Register the char encoding handler, surprising, isn't it ?
1413 */
1414void
1415xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1416    if (handlers == NULL) xmlInitCharEncodingHandlers();
1417    if ((handler == NULL) || (handlers == NULL)) {
1418        xmlEncodingErr(XML_I18N_NO_HANDLER,
1419		"xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
1420	return;
1421    }
1422
1423    if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1424        xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1425	"xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1426	               "MAX_ENCODING_HANDLERS");
1427	return;
1428    }
1429    handlers[nbCharEncodingHandler++] = handler;
1430}
1431
1432/**
1433 * xmlGetCharEncodingHandler:
1434 * @enc:  an xmlCharEncoding value.
1435 *
1436 * Search in the registered set the handler able to read/write that encoding.
1437 *
1438 * Returns the handler or NULL if not found
1439 */
1440xmlCharEncodingHandlerPtr
1441xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1442    xmlCharEncodingHandlerPtr handler;
1443
1444    if (handlers == NULL) xmlInitCharEncodingHandlers();
1445    switch (enc) {
1446        case XML_CHAR_ENCODING_ERROR:
1447	    return(NULL);
1448        case XML_CHAR_ENCODING_NONE:
1449	    return(NULL);
1450        case XML_CHAR_ENCODING_UTF8:
1451	    return(NULL);
1452        case XML_CHAR_ENCODING_UTF16LE:
1453	    return(xmlUTF16LEHandler);
1454        case XML_CHAR_ENCODING_UTF16BE:
1455	    return(xmlUTF16BEHandler);
1456        case XML_CHAR_ENCODING_EBCDIC:
1457            handler = xmlFindCharEncodingHandler("EBCDIC");
1458            if (handler != NULL) return(handler);
1459            handler = xmlFindCharEncodingHandler("ebcdic");
1460            if (handler != NULL) return(handler);
1461            handler = xmlFindCharEncodingHandler("EBCDIC-US");
1462            if (handler != NULL) return(handler);
1463	    break;
1464        case XML_CHAR_ENCODING_UCS4BE:
1465            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1466            if (handler != NULL) return(handler);
1467            handler = xmlFindCharEncodingHandler("UCS-4");
1468            if (handler != NULL) return(handler);
1469            handler = xmlFindCharEncodingHandler("UCS4");
1470            if (handler != NULL) return(handler);
1471	    break;
1472        case XML_CHAR_ENCODING_UCS4LE:
1473            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1474            if (handler != NULL) return(handler);
1475            handler = xmlFindCharEncodingHandler("UCS-4");
1476            if (handler != NULL) return(handler);
1477            handler = xmlFindCharEncodingHandler("UCS4");
1478            if (handler != NULL) return(handler);
1479	    break;
1480        case XML_CHAR_ENCODING_UCS4_2143:
1481	    break;
1482        case XML_CHAR_ENCODING_UCS4_3412:
1483	    break;
1484        case XML_CHAR_ENCODING_UCS2:
1485            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1486            if (handler != NULL) return(handler);
1487            handler = xmlFindCharEncodingHandler("UCS-2");
1488            if (handler != NULL) return(handler);
1489            handler = xmlFindCharEncodingHandler("UCS2");
1490            if (handler != NULL) return(handler);
1491	    break;
1492
1493	    /*
1494	     * We used to keep ISO Latin encodings native in the
1495	     * generated data. This led to so many problems that
1496	     * this has been removed. One can still change this
1497	     * back by registering no-ops encoders for those
1498	     */
1499        case XML_CHAR_ENCODING_8859_1:
1500	    handler = xmlFindCharEncodingHandler("ISO-8859-1");
1501	    if (handler != NULL) return(handler);
1502	    break;
1503        case XML_CHAR_ENCODING_8859_2:
1504	    handler = xmlFindCharEncodingHandler("ISO-8859-2");
1505	    if (handler != NULL) return(handler);
1506	    break;
1507        case XML_CHAR_ENCODING_8859_3:
1508	    handler = xmlFindCharEncodingHandler("ISO-8859-3");
1509	    if (handler != NULL) return(handler);
1510	    break;
1511        case XML_CHAR_ENCODING_8859_4:
1512	    handler = xmlFindCharEncodingHandler("ISO-8859-4");
1513	    if (handler != NULL) return(handler);
1514	    break;
1515        case XML_CHAR_ENCODING_8859_5:
1516	    handler = xmlFindCharEncodingHandler("ISO-8859-5");
1517	    if (handler != NULL) return(handler);
1518	    break;
1519        case XML_CHAR_ENCODING_8859_6:
1520	    handler = xmlFindCharEncodingHandler("ISO-8859-6");
1521	    if (handler != NULL) return(handler);
1522	    break;
1523        case XML_CHAR_ENCODING_8859_7:
1524	    handler = xmlFindCharEncodingHandler("ISO-8859-7");
1525	    if (handler != NULL) return(handler);
1526	    break;
1527        case XML_CHAR_ENCODING_8859_8:
1528	    handler = xmlFindCharEncodingHandler("ISO-8859-8");
1529	    if (handler != NULL) return(handler);
1530	    break;
1531        case XML_CHAR_ENCODING_8859_9:
1532	    handler = xmlFindCharEncodingHandler("ISO-8859-9");
1533	    if (handler != NULL) return(handler);
1534	    break;
1535
1536
1537        case XML_CHAR_ENCODING_2022_JP:
1538            handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1539            if (handler != NULL) return(handler);
1540	    break;
1541        case XML_CHAR_ENCODING_SHIFT_JIS:
1542            handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1543            if (handler != NULL) return(handler);
1544            handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1545            if (handler != NULL) return(handler);
1546            handler = xmlFindCharEncodingHandler("Shift_JIS");
1547            if (handler != NULL) return(handler);
1548	    break;
1549        case XML_CHAR_ENCODING_EUC_JP:
1550            handler = xmlFindCharEncodingHandler("EUC-JP");
1551            if (handler != NULL) return(handler);
1552	    break;
1553	default:
1554	    break;
1555    }
1556
1557#ifdef DEBUG_ENCODING
1558    xmlGenericError(xmlGenericErrorContext,
1559	    "No handler found for encoding %d\n", enc);
1560#endif
1561    return(NULL);
1562}
1563
1564/**
1565 * xmlFindCharEncodingHandler:
1566 * @name:  a string describing the char encoding.
1567 *
1568 * Search in the registered set the handler able to read/write that encoding.
1569 *
1570 * Returns the handler or NULL if not found
1571 */
1572xmlCharEncodingHandlerPtr
1573xmlFindCharEncodingHandler(const char *name) {
1574    const char *nalias;
1575    const char *norig;
1576    xmlCharEncoding alias;
1577#ifdef LIBXML_ICONV_ENABLED
1578    xmlCharEncodingHandlerPtr enc;
1579    iconv_t icv_in, icv_out;
1580#endif /* LIBXML_ICONV_ENABLED */
1581    char upper[100];
1582    int i;
1583
1584    if (handlers == NULL) xmlInitCharEncodingHandlers();
1585    if (name == NULL) return(xmlDefaultCharEncodingHandler);
1586    if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1587
1588    /*
1589     * Do the alias resolution
1590     */
1591    norig = name;
1592    nalias = xmlGetEncodingAlias(name);
1593    if (nalias != NULL)
1594	name = nalias;
1595
1596    /*
1597     * Check first for directly registered encoding names
1598     */
1599    for (i = 0;i < 99;i++) {
1600        upper[i] = toupper(name[i]);
1601	if (upper[i] == 0) break;
1602    }
1603    upper[i] = 0;
1604
1605    if (handlers != NULL) {
1606        for (i = 0;i < nbCharEncodingHandler; i++) {
1607            if (!strcmp(upper, handlers[i]->name)) {
1608#ifdef DEBUG_ENCODING
1609                xmlGenericError(xmlGenericErrorContext,
1610                        "Found registered handler for encoding %s\n", name);
1611#endif
1612                return(handlers[i]);
1613            }
1614        }
1615    }
1616
1617#ifdef LIBXML_ICONV_ENABLED
1618    /* check whether iconv can handle this */
1619    icv_in = iconv_open("UTF-8", name);
1620    icv_out = iconv_open(name, "UTF-8");
1621    if (icv_in == (iconv_t) -1) {
1622        icv_in = iconv_open("UTF-8", upper);
1623    }
1624    if (icv_out == (iconv_t) -1) {
1625	icv_out = iconv_open(upper, "UTF-8");
1626    }
1627    if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1628	    enc = (xmlCharEncodingHandlerPtr)
1629	          xmlMalloc(sizeof(xmlCharEncodingHandler));
1630	    if (enc == NULL) {
1631	        iconv_close(icv_in);
1632	        iconv_close(icv_out);
1633		return(NULL);
1634	    }
1635	    enc->name = xmlMemStrdup(name);
1636	    enc->input = NULL;
1637	    enc->output = NULL;
1638	    enc->iconv_in = icv_in;
1639	    enc->iconv_out = icv_out;
1640#ifdef DEBUG_ENCODING
1641            xmlGenericError(xmlGenericErrorContext,
1642		    "Found iconv handler for encoding %s\n", name);
1643#endif
1644	    return enc;
1645    } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1646	    xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1647		    "iconv : problems with filters for '%s'\n", name);
1648    }
1649#endif /* LIBXML_ICONV_ENABLED */
1650
1651#ifdef DEBUG_ENCODING
1652    xmlGenericError(xmlGenericErrorContext,
1653	    "No handler found for encoding %s\n", name);
1654#endif
1655
1656    /*
1657     * Fallback using the canonical names
1658     */
1659    alias = xmlParseCharEncoding(norig);
1660    if (alias != XML_CHAR_ENCODING_ERROR) {
1661        const char* canon;
1662        canon = xmlGetCharEncodingName(alias);
1663        if ((canon != NULL) && (strcmp(name, canon))) {
1664	    return(xmlFindCharEncodingHandler(canon));
1665        }
1666    }
1667
1668    /* If "none of the above", give up */
1669    return(NULL);
1670}
1671
1672/************************************************************************
1673 *									*
1674 *		ICONV based generic conversion functions		*
1675 *									*
1676 ************************************************************************/
1677
1678#ifdef LIBXML_ICONV_ENABLED
1679/**
1680 * xmlIconvWrapper:
1681 * @cd:		iconv converter data structure
1682 * @out:  a pointer to an array of bytes to store the result
1683 * @outlen:  the length of @out
1684 * @in:  a pointer to an array of ISO Latin 1 chars
1685 * @inlen:  the length of @in
1686 *
1687 * Returns 0 if success, or
1688 *     -1 by lack of space, or
1689 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1690 *        the result of transformation can't fit into the encoding we want), or
1691 *     -3 if there the last byte can't form a single output char.
1692 *
1693 * The value of @inlen after return is the number of octets consumed
1694 *     as the return value is positive, else unpredictable.
1695 * The value of @outlen after return is the number of ocetes consumed.
1696 */
1697static int
1698xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1699                const unsigned char *in, int *inlen) {
1700    size_t icv_inlen, icv_outlen;
1701    const char *icv_in = (const char *) in;
1702    char *icv_out = (char *) out;
1703    int ret;
1704
1705    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1706        if (outlen != NULL) *outlen = 0;
1707        return(-1);
1708    }
1709    icv_inlen = *inlen;
1710    icv_outlen = *outlen;
1711    ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1712    *inlen -= icv_inlen;
1713    *outlen -= icv_outlen;
1714    if ((icv_inlen != 0) || (ret == -1)) {
1715#ifdef EILSEQ
1716        if (errno == EILSEQ) {
1717            return -2;
1718        } else
1719#endif
1720#ifdef E2BIG
1721        if (errno == E2BIG) {
1722            return -1;
1723        } else
1724#endif
1725#ifdef EINVAL
1726        if (errno == EINVAL) {
1727            return -3;
1728        } else
1729#endif
1730        {
1731            return -3;
1732        }
1733    }
1734    return 0;
1735}
1736#endif /* LIBXML_ICONV_ENABLED */
1737
1738/************************************************************************
1739 *									*
1740 *		The real API used by libxml for on-the-fly conversion	*
1741 *									*
1742 ************************************************************************/
1743int
1744xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1745                       xmlBufferPtr in, int len);
1746
1747/**
1748 * xmlCharEncFirstLineInt:
1749 * @handler:	char enconding transformation data structure
1750 * @out:  an xmlBuffer for the output.
1751 * @in:  an xmlBuffer for the input
1752 * @len:  number of bytes to convert for the first line, or -1
1753 *
1754 * Front-end for the encoding handler input function, but handle only
1755 * the very first line, i.e. limit itself to 45 chars.
1756 *
1757 * Returns the number of byte written if success, or
1758 *     -1 general error
1759 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1760 *        the result of transformation can't fit into the encoding we want), or
1761 */
1762int
1763xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1764                       xmlBufferPtr in, int len) {
1765    int ret = -2;
1766    int written;
1767    int toconv;
1768
1769    if (handler == NULL) return(-1);
1770    if (out == NULL) return(-1);
1771    if (in == NULL) return(-1);
1772
1773    /* calculate space available */
1774    written = out->size - out->use;
1775    toconv = in->use;
1776    /*
1777     * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
1778     * 45 chars should be sufficient to reach the end of the encoding
1779     * declaration without going too far inside the document content.
1780     * on UTF-16 this means 90bytes, on UCS4 this means 180
1781     * The actual value depending on guessed encoding is passed as @len
1782     * if provided
1783     */
1784    if (len >= 0) {
1785        if (toconv > len)
1786            toconv = len;
1787    } else {
1788        if (toconv > 180)
1789            toconv = 180;
1790    }
1791    if (toconv * 2 >= written) {
1792        xmlBufferGrow(out, toconv);
1793	written = out->size - out->use - 1;
1794    }
1795
1796    if (handler->input != NULL) {
1797	ret = handler->input(&out->content[out->use], &written,
1798	                     in->content, &toconv);
1799	xmlBufferShrink(in, toconv);
1800	out->use += written;
1801	out->content[out->use] = 0;
1802    }
1803#ifdef LIBXML_ICONV_ENABLED
1804    else if (handler->iconv_in != NULL) {
1805	ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
1806	                      &written, in->content, &toconv);
1807	xmlBufferShrink(in, toconv);
1808	out->use += written;
1809	out->content[out->use] = 0;
1810	if (ret == -1) ret = -3;
1811    }
1812#endif /* LIBXML_ICONV_ENABLED */
1813#ifdef DEBUG_ENCODING
1814    switch (ret) {
1815        case 0:
1816	    xmlGenericError(xmlGenericErrorContext,
1817		    "converted %d bytes to %d bytes of input\n",
1818	            toconv, written);
1819	    break;
1820        case -1:
1821	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
1822	            toconv, written, in->use);
1823	    break;
1824        case -2:
1825	    xmlGenericError(xmlGenericErrorContext,
1826		    "input conversion failed due to input error\n");
1827	    break;
1828        case -3:
1829	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
1830	            toconv, written, in->use);
1831	    break;
1832	default:
1833	    xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
1834    }
1835#endif /* DEBUG_ENCODING */
1836    /*
1837     * Ignore when input buffer is not on a boundary
1838     */
1839    if (ret == -3) ret = 0;
1840    if (ret == -1) ret = 0;
1841    return(ret);
1842}
1843
1844/**
1845 * xmlCharEncFirstLine:
1846 * @handler:	char enconding transformation data structure
1847 * @out:  an xmlBuffer for the output.
1848 * @in:  an xmlBuffer for the input
1849 *
1850 * Front-end for the encoding handler input function, but handle only
1851 * the very first line, i.e. limit itself to 45 chars.
1852 *
1853 * Returns the number of byte written if success, or
1854 *     -1 general error
1855 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1856 *        the result of transformation can't fit into the encoding we want), or
1857 */
1858int
1859xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1860                 xmlBufferPtr in) {
1861    return(xmlCharEncFirstLineInt(handler, out, in, -1));
1862}
1863
1864/**
1865 * xmlCharEncInFunc:
1866 * @handler:	char encoding transformation data structure
1867 * @out:  an xmlBuffer for the output.
1868 * @in:  an xmlBuffer for the input
1869 *
1870 * Generic front-end for the encoding handler input function
1871 *
1872 * Returns the number of byte written if success, or
1873 *     -1 general error
1874 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1875 *        the result of transformation can't fit into the encoding we want), or
1876 */
1877int
1878xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
1879                 xmlBufferPtr in)
1880{
1881    int ret = -2;
1882    int written;
1883    int toconv;
1884
1885    if (handler == NULL)
1886        return (-1);
1887    if (out == NULL)
1888        return (-1);
1889    if (in == NULL)
1890        return (-1);
1891
1892    toconv = in->use;
1893    if (toconv == 0)
1894        return (0);
1895    written = out->size - out->use;
1896    if (toconv * 2 >= written) {
1897        xmlBufferGrow(out, out->size + toconv * 2);
1898        written = out->size - out->use - 1;
1899    }
1900    if (handler->input != NULL) {
1901        ret = handler->input(&out->content[out->use], &written,
1902                             in->content, &toconv);
1903        xmlBufferShrink(in, toconv);
1904        out->use += written;
1905        out->content[out->use] = 0;
1906    }
1907#ifdef LIBXML_ICONV_ENABLED
1908    else if (handler->iconv_in != NULL) {
1909        ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
1910                              &written, in->content, &toconv);
1911        xmlBufferShrink(in, toconv);
1912        out->use += written;
1913        out->content[out->use] = 0;
1914        if (ret == -1)
1915            ret = -3;
1916    }
1917#endif /* LIBXML_ICONV_ENABLED */
1918    switch (ret) {
1919        case 0:
1920#ifdef DEBUG_ENCODING
1921            xmlGenericError(xmlGenericErrorContext,
1922                            "converted %d bytes to %d bytes of input\n",
1923                            toconv, written);
1924#endif
1925            break;
1926        case -1:
1927#ifdef DEBUG_ENCODING
1928            xmlGenericError(xmlGenericErrorContext,
1929                         "converted %d bytes to %d bytes of input, %d left\n",
1930                            toconv, written, in->use);
1931#endif
1932            break;
1933        case -3:
1934#ifdef DEBUG_ENCODING
1935            xmlGenericError(xmlGenericErrorContext,
1936                        "converted %d bytes to %d bytes of input, %d left\n",
1937                            toconv, written, in->use);
1938#endif
1939            break;
1940        case -2: {
1941            char buf[50];
1942
1943	    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
1944		     in->content[0], in->content[1],
1945		     in->content[2], in->content[3]);
1946	    buf[49] = 0;
1947	    xmlEncodingErr(XML_I18N_CONV_FAILED,
1948		    "input conversion failed due to input error, bytes %s\n",
1949		           buf);
1950        }
1951    }
1952    /*
1953     * Ignore when input buffer is not on a boundary
1954     */
1955    if (ret == -3)
1956        ret = 0;
1957    return (written? written : ret);
1958}
1959
1960/**
1961 * xmlCharEncOutFunc:
1962 * @handler:	char enconding transformation data structure
1963 * @out:  an xmlBuffer for the output.
1964 * @in:  an xmlBuffer for the input
1965 *
1966 * Generic front-end for the encoding handler output function
1967 * a first call with @in == NULL has to be made firs to initiate the
1968 * output in case of non-stateless encoding needing to initiate their
1969 * state or the output (like the BOM in UTF16).
1970 * In case of UTF8 sequence conversion errors for the given encoder,
1971 * the content will be automatically remapped to a CharRef sequence.
1972 *
1973 * Returns the number of byte written if success, or
1974 *     -1 general error
1975 *     -2 if the transcoding fails (for *in is not valid utf8 string or
1976 *        the result of transformation can't fit into the encoding we want), or
1977 */
1978int
1979xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1980                  xmlBufferPtr in) {
1981    int ret = -2;
1982    int written;
1983    int writtentot = 0;
1984    int toconv;
1985    int output = 0;
1986
1987    if (handler == NULL) return(-1);
1988    if (out == NULL) return(-1);
1989
1990retry:
1991
1992    written = out->size - out->use;
1993
1994    if (written > 0)
1995	written--; /* Gennady: count '/0' */
1996
1997    /*
1998     * First specific handling of in = NULL, i.e. the initialization call
1999     */
2000    if (in == NULL) {
2001        toconv = 0;
2002	if (handler->output != NULL) {
2003	    ret = handler->output(&out->content[out->use], &written,
2004				  NULL, &toconv);
2005	    if (ret >= 0) { /* Gennady: check return value */
2006		out->use += written;
2007		out->content[out->use] = 0;
2008	    }
2009	}
2010#ifdef LIBXML_ICONV_ENABLED
2011	else if (handler->iconv_out != NULL) {
2012	    ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
2013				  &written, NULL, &toconv);
2014	    out->use += written;
2015	    out->content[out->use] = 0;
2016	}
2017#endif /* LIBXML_ICONV_ENABLED */
2018#ifdef DEBUG_ENCODING
2019	xmlGenericError(xmlGenericErrorContext,
2020		"initialized encoder\n");
2021#endif
2022        return(0);
2023    }
2024
2025    /*
2026     * Conversion itself.
2027     */
2028    toconv = in->use;
2029    if (toconv == 0)
2030	return(0);
2031    if (toconv * 4 >= written) {
2032        xmlBufferGrow(out, toconv * 4);
2033	written = out->size - out->use - 1;
2034    }
2035    if (handler->output != NULL) {
2036	ret = handler->output(&out->content[out->use], &written,
2037	                      in->content, &toconv);
2038	if (written > 0) {
2039	    xmlBufferShrink(in, toconv);
2040	    out->use += written;
2041	    writtentot += written;
2042	}
2043	out->content[out->use] = 0;
2044    }
2045#ifdef LIBXML_ICONV_ENABLED
2046    else if (handler->iconv_out != NULL) {
2047	ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
2048	                      &written, in->content, &toconv);
2049	xmlBufferShrink(in, toconv);
2050	out->use += written;
2051	writtentot += written;
2052	out->content[out->use] = 0;
2053	if (ret == -1) {
2054	    if (written > 0) {
2055		/*
2056		 * Can be a limitation of iconv
2057		 */
2058		goto retry;
2059	    }
2060	    ret = -3;
2061	}
2062    }
2063#endif /* LIBXML_ICONV_ENABLED */
2064    else {
2065	xmlEncodingErr(XML_I18N_NO_OUTPUT,
2066		       "xmlCharEncOutFunc: no output function !\n", NULL);
2067	return(-1);
2068    }
2069
2070    if (ret >= 0) output += ret;
2071
2072    /*
2073     * Attempt to handle error cases
2074     */
2075    switch (ret) {
2076        case 0:
2077#ifdef DEBUG_ENCODING
2078	    xmlGenericError(xmlGenericErrorContext,
2079		    "converted %d bytes to %d bytes of output\n",
2080	            toconv, written);
2081#endif
2082	    break;
2083        case -1:
2084#ifdef DEBUG_ENCODING
2085	    xmlGenericError(xmlGenericErrorContext,
2086		    "output conversion failed by lack of space\n");
2087#endif
2088	    break;
2089        case -3:
2090#ifdef DEBUG_ENCODING
2091	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2092	            toconv, written, in->use);
2093#endif
2094	    break;
2095        case -2: {
2096	    int len = in->use;
2097	    const xmlChar *utf = (const xmlChar *) in->content;
2098	    int cur;
2099
2100	    cur = xmlGetUTF8Char(utf, &len);
2101	    if (cur > 0) {
2102		xmlChar charref[20];
2103
2104#ifdef DEBUG_ENCODING
2105		xmlGenericError(xmlGenericErrorContext,
2106			"handling output conversion error\n");
2107		xmlGenericError(xmlGenericErrorContext,
2108			"Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2109			in->content[0], in->content[1],
2110			in->content[2], in->content[3]);
2111#endif
2112		/*
2113		 * Removes the UTF8 sequence, and replace it by a charref
2114		 * and continue the transcoding phase, hoping the error
2115		 * did not mangle the encoder state.
2116		 */
2117		snprintf((char *) &charref[0], sizeof(charref), "&#%d;", cur);
2118		xmlBufferShrink(in, len);
2119		xmlBufferAddHead(in, charref, -1);
2120
2121		goto retry;
2122	    } else {
2123		char buf[50];
2124
2125		snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2126			 in->content[0], in->content[1],
2127			 in->content[2], in->content[3]);
2128		buf[49] = 0;
2129		xmlEncodingErr(XML_I18N_CONV_FAILED,
2130		    "output conversion failed due to conv error, bytes %s\n",
2131			       buf);
2132		if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
2133		    in->content[0] = ' ';
2134	    }
2135	    break;
2136	}
2137    }
2138    return(ret);
2139}
2140
2141/**
2142 * xmlCharEncCloseFunc:
2143 * @handler:	char enconding transformation data structure
2144 *
2145 * Generic front-end for encoding handler close function
2146 *
2147 * Returns 0 if success, or -1 in case of error
2148 */
2149int
2150xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2151    int ret = 0;
2152    if (handler == NULL) return(-1);
2153    if (handler->name == NULL) return(-1);
2154#ifdef LIBXML_ICONV_ENABLED
2155    /*
2156     * Iconv handlers can be used only once, free the whole block.
2157     * and the associated icon resources.
2158     */
2159    if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
2160	if (handler->name != NULL)
2161	    xmlFree(handler->name);
2162	handler->name = NULL;
2163	if (handler->iconv_out != NULL) {
2164	    if (iconv_close(handler->iconv_out))
2165		ret = -1;
2166	    handler->iconv_out = NULL;
2167	}
2168	if (handler->iconv_in != NULL) {
2169	    if (iconv_close(handler->iconv_in))
2170		ret = -1;
2171	    handler->iconv_in = NULL;
2172	}
2173	xmlFree(handler);
2174    }
2175#endif /* LIBXML_ICONV_ENABLED */
2176#ifdef DEBUG_ENCODING
2177    if (ret)
2178        xmlGenericError(xmlGenericErrorContext,
2179		"failed to close the encoding handler\n");
2180    else
2181        xmlGenericError(xmlGenericErrorContext,
2182		"closed the encoding handler\n");
2183#endif
2184
2185    return(ret);
2186}
2187
2188/**
2189 * xmlByteConsumed:
2190 * @ctxt: an XML parser context
2191 *
2192 * This function provides the current index of the parser relative
2193 * to the start of the current entity. This function is computed in
2194 * bytes from the beginning starting at zero and finishing at the
2195 * size in byte of the file if parsing a file. The function is
2196 * of constant cost if the input is UTF-8 but can be costly if run
2197 * on non-UTF-8 input.
2198 *
2199 * Returns the index in bytes from the beginning of the entity or -1
2200 *         in case the index could not be computed.
2201 */
2202long
2203xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2204    xmlParserInputPtr in;
2205
2206    if (ctxt == NULL) return(-1);
2207    in = ctxt->input;
2208    if (in == NULL)  return(-1);
2209    if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2210        unsigned int unused = 0;
2211	xmlCharEncodingHandler * handler = in->buf->encoder;
2212        /*
2213	 * Encoding conversion, compute the number of unused original
2214	 * bytes from the input not consumed and substract that from
2215	 * the raw consumed value, this is not a cheap operation
2216	 */
2217        if (in->end - in->cur > 0) {
2218	    unsigned char convbuf[32000];
2219	    const unsigned char *cur = (const unsigned char *)in->cur;
2220	    int toconv = in->end - in->cur, written = 32000;
2221
2222	    int ret;
2223
2224	    if (handler->output != NULL) {
2225	        do {
2226		    toconv = in->end - cur;
2227		    written = 32000;
2228		    ret = handler->output(&convbuf[0], &written,
2229				      cur, &toconv);
2230		    if (ret == -1) return(-1);
2231		    unused += written;
2232		    cur += toconv;
2233		} while (ret == -2);
2234#ifdef LIBXML_ICONV_ENABLED
2235	    } else if (handler->iconv_out != NULL) {
2236	        do {
2237		    toconv = in->end - cur;
2238		    written = 32000;
2239		    ret = xmlIconvWrapper(handler->iconv_out, &convbuf[0],
2240	                      &written, cur, &toconv);
2241		    if (ret < 0) {
2242		        if (written > 0)
2243			    ret = -2;
2244			else
2245			    return(-1);
2246		    }
2247		    unused += written;
2248		    cur += toconv;
2249		} while (ret == -2);
2250#endif
2251            } else {
2252	        /* could not find a converter */
2253	        return(-1);
2254	    }
2255	}
2256	if (in->buf->rawconsumed < unused)
2257	    return(-1);
2258	return(in->buf->rawconsumed - unused);
2259    }
2260    return(in->consumed + (in->cur - in->base));
2261}
2262
2263#ifndef LIBXML_ICONV_ENABLED
2264#ifdef LIBXML_ISO8859X_ENABLED
2265
2266/**
2267 * UTF8ToISO8859x:
2268 * @out:  a pointer to an array of bytes to store the result
2269 * @outlen:  the length of @out
2270 * @in:  a pointer to an array of UTF-8 chars
2271 * @inlen:  the length of @in
2272 * @xlattable: the 2-level transcoding table
2273 *
2274 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2275 * block of chars out.
2276 *
2277 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2278 * The value of @inlen after return is the number of octets consumed
2279 *     as the return value is positive, else unpredictable.
2280 * The value of @outlen after return is the number of ocetes consumed.
2281 */
2282static int
2283UTF8ToISO8859x(unsigned char* out, int *outlen,
2284              const unsigned char* in, int *inlen,
2285              unsigned char const *xlattable) {
2286    const unsigned char* outstart = out;
2287    const unsigned char* inend;
2288    const unsigned char* instart = in;
2289
2290    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2291        (xlattable == NULL))
2292	return(-1);
2293    if (in == NULL) {
2294        /*
2295        * initialization nothing to do
2296        */
2297        *outlen = 0;
2298        *inlen = 0;
2299        return(0);
2300    }
2301    inend = in + (*inlen);
2302    while (in < inend) {
2303        unsigned char d = *in++;
2304        if  (d < 0x80)  {
2305            *out++ = d;
2306        } else if (d < 0xC0) {
2307            /* trailing byte in leading position */
2308            *outlen = out - outstart;
2309            *inlen = in - instart - 1;
2310            return(-2);
2311        } else if (d < 0xE0) {
2312            unsigned char c;
2313            if (!(in < inend)) {
2314                /* trailing byte not in input buffer */
2315                *outlen = out - outstart;
2316                *inlen = in - instart - 1;
2317                return(-2);
2318            }
2319            c = *in++;
2320            if ((c & 0xC0) != 0x80) {
2321                /* not a trailing byte */
2322                *outlen = out - outstart;
2323                *inlen = in - instart - 2;
2324                return(-2);
2325            }
2326            c = c & 0x3F;
2327            d = d & 0x1F;
2328            d = xlattable [48 + c + xlattable [d] * 64];
2329            if (d == 0) {
2330                /* not in character set */
2331                *outlen = out - outstart;
2332                *inlen = in - instart - 2;
2333                return(-2);
2334            }
2335            *out++ = d;
2336        } else if (d < 0xF0) {
2337            unsigned char c1;
2338            unsigned char c2;
2339            if (!(in < inend - 1)) {
2340                /* trailing bytes not in input buffer */
2341                *outlen = out - outstart;
2342                *inlen = in - instart - 1;
2343                return(-2);
2344            }
2345            c1 = *in++;
2346            if ((c1 & 0xC0) != 0x80) {
2347                /* not a trailing byte (c1) */
2348                *outlen = out - outstart;
2349                *inlen = in - instart - 2;
2350                return(-2);
2351            }
2352            c2 = *in++;
2353            if ((c2 & 0xC0) != 0x80) {
2354                /* not a trailing byte (c2) */
2355                *outlen = out - outstart;
2356                *inlen = in - instart - 2;
2357                return(-2);
2358            }
2359            c1 = c1 & 0x3F;
2360            c2 = c2 & 0x3F;
2361	    d = d & 0x0F;
2362	    d = xlattable [48 + c2 + xlattable [48 + c1 +
2363	    		xlattable [32 + d] * 64] * 64];
2364            if (d == 0) {
2365                /* not in character set */
2366                *outlen = out - outstart;
2367                *inlen = in - instart - 3;
2368                return(-2);
2369            }
2370            *out++ = d;
2371        } else {
2372            /* cannot transcode >= U+010000 */
2373            *outlen = out - outstart;
2374            *inlen = in - instart - 1;
2375            return(-2);
2376        }
2377    }
2378    *outlen = out - outstart;
2379    *inlen = in - instart;
2380    return(*outlen);
2381}
2382
2383/**
2384 * ISO8859xToUTF8
2385 * @out:  a pointer to an array of bytes to store the result
2386 * @outlen:  the length of @out
2387 * @in:  a pointer to an array of ISO Latin 1 chars
2388 * @inlen:  the length of @in
2389 *
2390 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
2391 * block of chars out.
2392 * Returns 0 if success, or -1 otherwise
2393 * The value of @inlen after return is the number of octets consumed
2394 * The value of @outlen after return is the number of ocetes produced.
2395 */
2396static int
2397ISO8859xToUTF8(unsigned char* out, int *outlen,
2398              const unsigned char* in, int *inlen,
2399              unsigned short const *unicodetable) {
2400    unsigned char* outstart = out;
2401    unsigned char* outend;
2402    const unsigned char* instart = in;
2403    const unsigned char* inend;
2404    const unsigned char* instop;
2405    unsigned int c;
2406
2407    if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2408        (in == NULL) || (unicodetable == NULL))
2409	return(-1);
2410    outend = out + *outlen;
2411    inend = in + *inlen;
2412    instop = inend;
2413    c = *in;
2414    while (in < inend && out < outend - 1) {
2415        if (c >= 0x80) {
2416            c = unicodetable [c - 0x80];
2417            if (c == 0) {
2418                /* undefined code point */
2419                *outlen = out - outstart;
2420                *inlen = in - instart;
2421                return (-1);
2422            }
2423            if (c < 0x800) {
2424                *out++ = ((c >>  6) & 0x1F) | 0xC0;
2425                *out++ = (c & 0x3F) | 0x80;
2426            } else {
2427                *out++ = ((c >>  12) & 0x0F) | 0xE0;
2428                *out++ = ((c >>  6) & 0x3F) | 0x80;
2429                *out++ = (c & 0x3F) | 0x80;
2430            }
2431            ++in;
2432            c = *in;
2433        }
2434        if (instop - in > outend - out) instop = in + (outend - out);
2435        while (c < 0x80 && in < instop) {
2436            *out++ =  c;
2437            ++in;
2438            c = *in;
2439        }
2440    }
2441    if (in < inend && out < outend && c < 0x80) {
2442        *out++ =  c;
2443        ++in;
2444    }
2445    *outlen = out - outstart;
2446    *inlen = in - instart;
2447    return (*outlen);
2448}
2449
2450
2451/************************************************************************
2452 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding                *
2453 ************************************************************************/
2454
2455static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
2456    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2457    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2458    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2459    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2460    0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
2461    0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
2462    0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
2463    0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
2464    0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
2465    0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
2466    0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
2467    0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
2468    0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
2469    0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
2470    0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
2471    0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
2472};
2473
2474static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
2475    "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2476    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2477    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2478    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2479    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2480    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2481    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2482    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2483    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2484    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2485    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2486    "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
2487    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
2488    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2489    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
2490    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2491    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
2492    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2493    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2494    "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
2495    "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
2496    "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
2497    "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
2498    "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2499    "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
2500    "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
2501    "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
2502};
2503
2504static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
2505    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2506    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2507    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2508    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2509    0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
2510    0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
2511    0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
2512    0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
2513    0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
2514    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2515    0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
2516    0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
2517    0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
2518    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2519    0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
2520    0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
2521};
2522
2523static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
2524    "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2525    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2526    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2527    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2528    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2529    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2530    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2531    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2532    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2533    "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2534    "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
2535    "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
2536    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
2537    "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
2538    "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2539    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2540    "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
2541    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2542    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2543    "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2544    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2545    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2546    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2547    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2548    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
2549    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
2550    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
2551    "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2552    "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
2553    "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2554    "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
2555};
2556
2557static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
2558    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2559    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2560    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2561    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2562    0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
2563    0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
2564    0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
2565    0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
2566    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
2567    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
2568    0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
2569    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
2570    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
2571    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
2572    0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
2573    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
2574};
2575
2576static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
2577    "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
2578    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2579    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2580    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2581    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2582    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2583    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2584    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2585    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2586    "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
2587    "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2588    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
2589    "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
2590    "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
2591    "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
2592    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
2593    "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
2594    "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
2595    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
2596    "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2597    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
2598    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2599    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2600    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2601    "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
2602    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
2603    "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
2604};
2605
2606static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
2607    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2608    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2609    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2610    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2611    0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
2612    0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
2613    0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
2614    0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
2615    0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
2616    0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
2617    0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
2618    0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
2619    0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
2620    0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
2621    0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
2622    0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
2623};
2624
2625static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
2626    "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2627    "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2628    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2629    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2630    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2631    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2632    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2633    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2634    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2635    "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
2636    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2637    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
2638    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2639    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2640    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2641    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2642    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
2643    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2644    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2645    "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2646    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2647    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2648    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2649    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2650    "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2651    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2652    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2653};
2654
2655static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
2656    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2657    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2658    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2659    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2660    0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
2661    0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
2662    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2663    0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
2664    0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
2665    0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
2666    0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
2667    0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2668    0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
2669    0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
2670    0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2671    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2672};
2673
2674static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
2675    "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2676    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
2677    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2678    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2679    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2680    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2681    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2682    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2683    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2684    "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
2685    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2686    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2687    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2688    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2689    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2690    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
2691    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
2692    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2693    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
2694    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2695    "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2696    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2697    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2698};
2699
2700static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
2701    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2702    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2703    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2704    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2705    0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
2706    0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
2707    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
2708    0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
2709    0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
2710    0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
2711    0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
2712    0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
2713    0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
2714    0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
2715    0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
2716    0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
2717};
2718
2719static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
2720    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
2721    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2722    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2723    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2724    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2725    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2726    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2727    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2728    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2729    "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
2730    "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
2731    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2732    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2733    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2734    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2735    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2736    "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
2737    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2738    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2739    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2740    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2741    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2742    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2743    "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
2744    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2745    "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2746    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2747    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
2748    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2749    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2750    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2751};
2752
2753static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
2754    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2755    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2756    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2757    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2758    0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
2759    0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
2760    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
2761    0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
2762    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2763    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2764    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2765    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
2766    0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
2767    0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
2768    0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
2769    0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
2770};
2771
2772static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
2773    "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2774    "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
2775    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2776    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2777    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2778    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2779    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2780    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2781    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2782    "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
2783    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
2784    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2785    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2786    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2787    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2788    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2789    "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
2790    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2791    "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
2792    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2793    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2794    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2795    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2796    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
2797    "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
2798    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2799    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2800    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2801    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2802    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
2803    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2804};
2805
2806static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
2807    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2808    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2809    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2810    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2811    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
2812    0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
2813    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
2814    0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
2815    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
2816    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2817    0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
2818    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
2819    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
2820    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2821    0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
2822    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
2823};
2824
2825static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
2826    "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2827    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2828    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2829    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2830    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2831    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2832    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2833    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2834    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2835    "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
2836    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2837    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2838    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
2839    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2840    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
2841    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2842    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
2843    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2844    "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2845    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2846    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
2847    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2848    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2849};
2850
2851static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
2852    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2853    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2854    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2855    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2856    0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
2857    0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
2858    0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
2859    0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
2860    0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
2861    0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
2862    0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
2863    0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
2864    0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
2865    0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
2866    0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
2867    0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
2868};
2869
2870static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
2871    "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2872    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2873    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2874    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2875    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2876    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2877    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2878    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2879    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2880    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
2881    "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2882    "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
2883    "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
2884    "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
2885    "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
2886    "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
2887    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2888    "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
2889    "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
2890    "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2891    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2892    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2893    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2894    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2895    "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2896    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2897    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2898    "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
2899    "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
2900    "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
2901    "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
2902};
2903
2904static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
2905    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2906    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2907    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2908    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2909    0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
2910    0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
2911    0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
2912    0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
2913    0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
2914    0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
2915    0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
2916    0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
2917    0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
2918    0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
2919    0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
2920    0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
2921};
2922
2923static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
2924    "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2925    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2926    "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2927    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2928    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2929    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2930    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2931    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2932    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2933    "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2934    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2935    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2936    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2937    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2938    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
2939    "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
2940    "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2941    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2942    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
2943    "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2944    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2945    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2946    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2947    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2948    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
2949    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2950    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2951};
2952
2953static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
2954    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2955    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2956    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2957    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2958    0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
2959    0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
2960    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
2961    0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
2962    0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
2963    0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
2964    0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
2965    0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
2966    0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
2967    0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
2968    0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
2969    0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
2970};
2971
2972static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
2973    "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2974    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2975    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2976    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2977    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2978    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2979    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2980    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2981    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2982    "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
2983    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
2984    "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2985    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2986    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2987    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2988    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2989    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
2990    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2991    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2992    "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
2993    "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
2994    "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
2995    "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
2996    "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
2997    "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
2998    "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
2999    "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3000    "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3001    "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3002    "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3003    "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3004};
3005
3006static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3007    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3008    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3009    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3010    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3011    0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3012    0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3013    0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3014    0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3015    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3016    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3017    0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3018    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3019    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3020    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3021    0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3022    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3023};
3024
3025static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3026    "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3027    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3028    "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3029    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3030    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3031    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3032    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3033    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3034    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3035    "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3036    "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3037    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3038    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3039    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3040    "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3041    "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3042    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3043    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3044    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3045    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3046    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3047    "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3048    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3049    "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3050    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3051    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3052    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3053    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3054    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3055    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3056    "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3057    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3058    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3059    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3060    "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3061    "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3062    "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3063    "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3064    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3065    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3066    "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3067    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3068    "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3069};
3070
3071static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3072    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3073    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3074    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3075    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3076    0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3077    0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3078    0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3079    0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3080    0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3081    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3082    0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3083    0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3084    0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3085    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3086    0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3087    0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3088};
3089
3090static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3091    "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3092    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3093    "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3094    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3095    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3096    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3097    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3098    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3099    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3100    "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3101    "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3102    "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3103    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3104    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3105    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3106    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3107    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3108    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3109    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3110    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3111    "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3112    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3113    "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3114    "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3115    "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3116    "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3117    "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3118};
3119
3120static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3121    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3122    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3123    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3124    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3125    0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3126    0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3127    0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3128    0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3129    0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3130    0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3131    0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3132    0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3133    0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3134    0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3135    0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3136    0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3137};
3138
3139static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3140    "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3141    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3142    "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3143    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3144    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3145    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3146    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3147    "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3148    "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3149    "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3150    "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3151    "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3152    "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3153    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3154    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3155    "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3156    "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3157    "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3158    "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3159    "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3160    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3161    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3162    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3163    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3164    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3165    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3166    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3167    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3168    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3169    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3170    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3171    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3172    "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3173    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3174    "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3175    "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3176    "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3177    "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3178    "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3179};
3180
3181
3182/*
3183 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3184 */
3185
3186static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3187    const unsigned char* in, int *inlen) {
3188    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3189}
3190static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3191    const unsigned char* in, int *inlen) {
3192    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3193}
3194
3195static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3196    const unsigned char* in, int *inlen) {
3197    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3198}
3199static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3200    const unsigned char* in, int *inlen) {
3201    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3202}
3203
3204static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3205    const unsigned char* in, int *inlen) {
3206    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3207}
3208static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3209    const unsigned char* in, int *inlen) {
3210    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3211}
3212
3213static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3214    const unsigned char* in, int *inlen) {
3215    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3216}
3217static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3218    const unsigned char* in, int *inlen) {
3219    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3220}
3221
3222static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3223    const unsigned char* in, int *inlen) {
3224    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3225}
3226static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3227    const unsigned char* in, int *inlen) {
3228    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3229}
3230
3231static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3232    const unsigned char* in, int *inlen) {
3233    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3234}
3235static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3236    const unsigned char* in, int *inlen) {
3237    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3238}
3239
3240static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3241    const unsigned char* in, int *inlen) {
3242    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3243}
3244static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3245    const unsigned char* in, int *inlen) {
3246    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3247}
3248
3249static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3250    const unsigned char* in, int *inlen) {
3251    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3252}
3253static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3254    const unsigned char* in, int *inlen) {
3255    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3256}
3257
3258static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3259    const unsigned char* in, int *inlen) {
3260    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3261}
3262static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3263    const unsigned char* in, int *inlen) {
3264    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3265}
3266
3267static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3268    const unsigned char* in, int *inlen) {
3269    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3270}
3271static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3272    const unsigned char* in, int *inlen) {
3273    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3274}
3275
3276static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3277    const unsigned char* in, int *inlen) {
3278    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3279}
3280static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3281    const unsigned char* in, int *inlen) {
3282    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3283}
3284
3285static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3286    const unsigned char* in, int *inlen) {
3287    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3288}
3289static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3290    const unsigned char* in, int *inlen) {
3291    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3292}
3293
3294static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3295    const unsigned char* in, int *inlen) {
3296    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3297}
3298static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3299    const unsigned char* in, int *inlen) {
3300    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3301}
3302
3303static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3304    const unsigned char* in, int *inlen) {
3305    return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3306}
3307static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3308    const unsigned char* in, int *inlen) {
3309    return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3310}
3311
3312static void
3313xmlRegisterCharEncodingHandlersISO8859x (void) {
3314    xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3315    xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3316    xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3317    xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3318    xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3319    xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3320    xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3321    xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3322    xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3323    xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3324    xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3325    xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3326    xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3327    xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3328}
3329
3330#endif
3331#endif
3332
3333#define bottom_encoding
3334#include "elfgcchack.h"
3335
3336