1/* 2 * encoding.c : implements the encoding conversion functions needed for XML 3 * 4 * Related specs: 5 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies 6 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau 7 * [ISO-10646] UTF-8 and UTF-16 in Annexes 8 * [ISO-8859-1] ISO Latin-1 characters codes. 9 * [UNICODE] The Unicode Consortium, "The Unicode Standard -- 10 * Worldwide Character Encoding -- Version 1.0", Addison- 11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is 12 * described in Unicode Technical Report #4. 13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for 14 * Information Interchange, ANSI X3.4-1986. 15 * 16 * See Copyright for the status of this software. 17 * 18 * daniel@veillard.com 19 * 20 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org> 21 */ 22 23#define IN_LIBXML 24#include "libxml.h" 25 26#include <string.h> 27 28#ifdef HAVE_CTYPE_H 29#include <ctype.h> 30#endif 31#ifdef HAVE_STDLIB_H 32#include <stdlib.h> 33#endif 34#ifdef LIBXML_ICONV_ENABLED 35#ifdef HAVE_ERRNO_H 36#include <errno.h> 37#endif 38#endif 39#include <libxml/encoding.h> 40#include <libxml/xmlmemory.h> 41#ifdef LIBXML_HTML_ENABLED 42#include <libxml/HTMLparser.h> 43#endif 44#include <libxml/globals.h> 45#include <libxml/xmlerror.h> 46 47static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL; 48static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL; 49 50typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias; 51typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr; 52struct _xmlCharEncodingAlias { 53 const char *name; 54 const char *alias; 55}; 56 57static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL; 58static int xmlCharEncodingAliasesNb = 0; 59static int xmlCharEncodingAliasesMax = 0; 60 61#ifdef LIBXML_ICONV_ENABLED 62#if 0 63#define DEBUG_ENCODING /* Define this to get encoding traces */ 64#endif 65#else 66#ifdef LIBXML_ISO8859X_ENABLED 67static void xmlRegisterCharEncodingHandlersISO8859x (void); 68#endif 69#endif 70 71static int xmlLittleEndian = 1; 72 73/** 74 * xmlEncodingErrMemory: 75 * @extra: extra informations 76 * 77 * Handle an out of memory condition 78 */ 79static void 80xmlEncodingErrMemory(const char *extra) 81{ 82 __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra); 83} 84 85/** 86 * xmlErrEncoding: 87 * @error: the error number 88 * @msg: the error message 89 * 90 * n encoding error 91 */ 92static void 93xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val) 94{ 95 __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, 96 XML_FROM_I18N, error, XML_ERR_FATAL, 97 NULL, 0, val, NULL, NULL, 0, 0, msg, val); 98} 99 100/************************************************************************ 101 * * 102 * Conversions To/From UTF8 encoding * 103 * * 104 ************************************************************************/ 105 106/** 107 * asciiToUTF8: 108 * @out: a pointer to an array of bytes to store the result 109 * @outlen: the length of @out 110 * @in: a pointer to an array of ASCII chars 111 * @inlen: the length of @in 112 * 113 * Take a block of ASCII chars in and try to convert it to an UTF-8 114 * block of chars out. 115 * Returns 0 if success, or -1 otherwise 116 * The value of @inlen after return is the number of octets consumed 117 * if the return value is positive, else unpredictable. 118 * The value of @outlen after return is the number of octets consumed. 119 */ 120static int 121asciiToUTF8(unsigned char* out, int *outlen, 122 const unsigned char* in, int *inlen) { 123 unsigned char* outstart = out; 124 const unsigned char* base = in; 125 const unsigned char* processed = in; 126 unsigned char* outend = out + *outlen; 127 const unsigned char* inend; 128 unsigned int c; 129 130 inend = in + (*inlen); 131 while ((in < inend) && (out - outstart + 5 < *outlen)) { 132 c= *in++; 133 134 if (out >= outend) 135 break; 136 if (c < 0x80) { 137 *out++ = c; 138 } else { 139 *outlen = out - outstart; 140 *inlen = processed - base; 141 return(-1); 142 } 143 144 processed = (const unsigned char*) in; 145 } 146 *outlen = out - outstart; 147 *inlen = processed - base; 148 return(*outlen); 149} 150 151#ifdef LIBXML_OUTPUT_ENABLED 152/** 153 * UTF8Toascii: 154 * @out: a pointer to an array of bytes to store the result 155 * @outlen: the length of @out 156 * @in: a pointer to an array of UTF-8 chars 157 * @inlen: the length of @in 158 * 159 * Take a block of UTF-8 chars in and try to convert it to an ASCII 160 * block of chars out. 161 * 162 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise 163 * The value of @inlen after return is the number of octets consumed 164 * if the return value is positive, else unpredictable. 165 * The value of @outlen after return is the number of octets consumed. 166 */ 167static int 168UTF8Toascii(unsigned char* out, int *outlen, 169 const unsigned char* in, int *inlen) { 170 const unsigned char* processed = in; 171 const unsigned char* outend; 172 const unsigned char* outstart = out; 173 const unsigned char* instart = in; 174 const unsigned char* inend; 175 unsigned int c, d; 176 int trailing; 177 178 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); 179 if (in == NULL) { 180 /* 181 * initialization nothing to do 182 */ 183 *outlen = 0; 184 *inlen = 0; 185 return(0); 186 } 187 inend = in + (*inlen); 188 outend = out + (*outlen); 189 while (in < inend) { 190 d = *in++; 191 if (d < 0x80) { c= d; trailing= 0; } 192 else if (d < 0xC0) { 193 /* trailing byte in leading position */ 194 *outlen = out - outstart; 195 *inlen = processed - instart; 196 return(-2); 197 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } 198 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } 199 else if (d < 0xF8) { c= d & 0x07; trailing= 3; } 200 else { 201 /* no chance for this in Ascii */ 202 *outlen = out - outstart; 203 *inlen = processed - instart; 204 return(-2); 205 } 206 207 if (inend - in < trailing) { 208 break; 209 } 210 211 for ( ; trailing; trailing--) { 212 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) 213 break; 214 c <<= 6; 215 c |= d & 0x3F; 216 } 217 218 /* assertion: c is a single UTF-4 value */ 219 if (c < 0x80) { 220 if (out >= outend) 221 break; 222 *out++ = c; 223 } else { 224 /* no chance for this in Ascii */ 225 *outlen = out - outstart; 226 *inlen = processed - instart; 227 return(-2); 228 } 229 processed = in; 230 } 231 *outlen = out - outstart; 232 *inlen = processed - instart; 233 return(*outlen); 234} 235#endif /* LIBXML_OUTPUT_ENABLED */ 236 237/** 238 * isolat1ToUTF8: 239 * @out: a pointer to an array of bytes to store the result 240 * @outlen: the length of @out 241 * @in: a pointer to an array of ISO Latin 1 chars 242 * @inlen: the length of @in 243 * 244 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8 245 * block of chars out. 246 * Returns the number of bytes written if success, or -1 otherwise 247 * The value of @inlen after return is the number of octets consumed 248 * if the return value is positive, else unpredictable. 249 * The value of @outlen after return is the number of octets consumed. 250 */ 251int 252isolat1ToUTF8(unsigned char* out, int *outlen, 253 const unsigned char* in, int *inlen) { 254 unsigned char* outstart = out; 255 const unsigned char* base = in; 256 unsigned char* outend; 257 const unsigned char* inend; 258 const unsigned char* instop; 259 260 if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL)) 261 return(-1); 262 263 outend = out + *outlen; 264 inend = in + (*inlen); 265 instop = inend; 266 267 while (in < inend && out < outend - 1) { 268 if (*in >= 0x80) { 269 *out++ = (((*in) >> 6) & 0x1F) | 0xC0; 270 *out++ = ((*in) & 0x3F) | 0x80; 271 ++in; 272 } 273 if (instop - in > outend - out) instop = in + (outend - out); 274 while (in < instop && *in < 0x80) { 275 *out++ = *in++; 276 } 277 } 278 if (in < inend && out < outend && *in < 0x80) { 279 *out++ = *in++; 280 } 281 *outlen = out - outstart; 282 *inlen = in - base; 283 return(*outlen); 284} 285 286/** 287 * UTF8ToUTF8: 288 * @out: a pointer to an array of bytes to store the result 289 * @outlen: the length of @out 290 * @inb: a pointer to an array of UTF-8 chars 291 * @inlenb: the length of @in in UTF-8 chars 292 * 293 * No op copy operation for UTF8 handling. 294 * 295 * Returns the number of bytes written, or -1 if lack of space. 296 * The value of *inlen after return is the number of octets consumed 297 * if the return value is positive, else unpredictable. 298 */ 299static int 300UTF8ToUTF8(unsigned char* out, int *outlen, 301 const unsigned char* inb, int *inlenb) 302{ 303 int len; 304 305 if ((out == NULL) || (inb == NULL) || (outlen == NULL) || (inlenb == NULL)) 306 return(-1); 307 if (*outlen > *inlenb) { 308 len = *inlenb; 309 } else { 310 len = *outlen; 311 } 312 if (len < 0) 313 return(-1); 314 315 memcpy(out, inb, len); 316 317 *outlen = len; 318 *inlenb = len; 319 return(*outlen); 320} 321 322 323#ifdef LIBXML_OUTPUT_ENABLED 324/** 325 * UTF8Toisolat1: 326 * @out: a pointer to an array of bytes to store the result 327 * @outlen: the length of @out 328 * @in: a pointer to an array of UTF-8 chars 329 * @inlen: the length of @in 330 * 331 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1 332 * block of chars out. 333 * 334 * Returns the number of bytes written if success, -2 if the transcoding fails, 335 or -1 otherwise 336 * The value of @inlen after return is the number of octets consumed 337 * if the return value is positive, else unpredictable. 338 * The value of @outlen after return is the number of octets consumed. 339 */ 340int 341UTF8Toisolat1(unsigned char* out, int *outlen, 342 const unsigned char* in, int *inlen) { 343 const unsigned char* processed = in; 344 const unsigned char* outend; 345 const unsigned char* outstart = out; 346 const unsigned char* instart = in; 347 const unsigned char* inend; 348 unsigned int c, d; 349 int trailing; 350 351 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); 352 if (in == NULL) { 353 /* 354 * initialization nothing to do 355 */ 356 *outlen = 0; 357 *inlen = 0; 358 return(0); 359 } 360 inend = in + (*inlen); 361 outend = out + (*outlen); 362 while (in < inend) { 363 d = *in++; 364 if (d < 0x80) { c= d; trailing= 0; } 365 else if (d < 0xC0) { 366 /* trailing byte in leading position */ 367 *outlen = out - outstart; 368 *inlen = processed - instart; 369 return(-2); 370 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } 371 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } 372 else if (d < 0xF8) { c= d & 0x07; trailing= 3; } 373 else { 374 /* no chance for this in IsoLat1 */ 375 *outlen = out - outstart; 376 *inlen = processed - instart; 377 return(-2); 378 } 379 380 if (inend - in < trailing) { 381 break; 382 } 383 384 for ( ; trailing; trailing--) { 385 if (in >= inend) 386 break; 387 if (((d= *in++) & 0xC0) != 0x80) { 388 *outlen = out - outstart; 389 *inlen = processed - instart; 390 return(-2); 391 } 392 c <<= 6; 393 c |= d & 0x3F; 394 } 395 396 /* assertion: c is a single UTF-4 value */ 397 if (c <= 0xFF) { 398 if (out >= outend) 399 break; 400 *out++ = c; 401 } else { 402 /* no chance for this in IsoLat1 */ 403 *outlen = out - outstart; 404 *inlen = processed - instart; 405 return(-2); 406 } 407 processed = in; 408 } 409 *outlen = out - outstart; 410 *inlen = processed - instart; 411 return(*outlen); 412} 413#endif /* LIBXML_OUTPUT_ENABLED */ 414 415/** 416 * UTF16LEToUTF8: 417 * @out: a pointer to an array of bytes to store the result 418 * @outlen: the length of @out 419 * @inb: a pointer to an array of UTF-16LE passwd as a byte array 420 * @inlenb: the length of @in in UTF-16LE chars 421 * 422 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8 423 * block of chars out. This function assumes the endian property 424 * is the same between the native type of this machine and the 425 * inputed one. 426 * 427 * Returns the number of bytes written, or -1 if lack of space, or -2 428 * if the transcoding fails (if *in is not a valid utf16 string) 429 * The value of *inlen after return is the number of octets consumed 430 * if the return value is positive, else unpredictable. 431 */ 432static int 433UTF16LEToUTF8(unsigned char* out, int *outlen, 434 const unsigned char* inb, int *inlenb) 435{ 436 unsigned char* outstart = out; 437 const unsigned char* processed = inb; 438 unsigned char* outend = out + *outlen; 439 unsigned short* in = (unsigned short*) inb; 440 unsigned short* inend; 441 unsigned int c, d, inlen; 442 unsigned char *tmp; 443 int bits; 444 445 if ((*inlenb % 2) == 1) 446 (*inlenb)--; 447 inlen = *inlenb / 2; 448 inend = in + inlen; 449 while ((in < inend) && (out - outstart + 5 < *outlen)) { 450 if (xmlLittleEndian) { 451 c= *in++; 452 } else { 453 tmp = (unsigned char *) in; 454 c = *tmp++; 455 c = c | (((unsigned int)*tmp) << 8); 456 in++; 457 } 458 if ((c & 0xFC00) == 0xD800) { /* surrogates */ 459 if (in >= inend) { /* (in > inend) shouldn't happens */ 460 break; 461 } 462 if (xmlLittleEndian) { 463 d = *in++; 464 } else { 465 tmp = (unsigned char *) in; 466 d = *tmp++; 467 d = d | (((unsigned int)*tmp) << 8); 468 in++; 469 } 470 if ((d & 0xFC00) == 0xDC00) { 471 c &= 0x03FF; 472 c <<= 10; 473 c |= d & 0x03FF; 474 c += 0x10000; 475 } 476 else { 477 *outlen = out - outstart; 478 *inlenb = processed - inb; 479 return(-2); 480 } 481 } 482 483 /* assertion: c is a single UTF-4 value */ 484 if (out >= outend) 485 break; 486 if (c < 0x80) { *out++= c; bits= -6; } 487 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; } 488 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; } 489 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; } 490 491 for ( ; bits >= 0; bits-= 6) { 492 if (out >= outend) 493 break; 494 *out++= ((c >> bits) & 0x3F) | 0x80; 495 } 496 processed = (const unsigned char*) in; 497 } 498 *outlen = out - outstart; 499 *inlenb = processed - inb; 500 return(*outlen); 501} 502 503#ifdef LIBXML_OUTPUT_ENABLED 504/** 505 * UTF8ToUTF16LE: 506 * @outb: a pointer to an array of bytes to store the result 507 * @outlen: the length of @outb 508 * @in: a pointer to an array of UTF-8 chars 509 * @inlen: the length of @in 510 * 511 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE 512 * block of chars out. 513 * 514 * Returns the number of bytes written, or -1 if lack of space, or -2 515 * if the transcoding failed. 516 */ 517static int 518UTF8ToUTF16LE(unsigned char* outb, int *outlen, 519 const unsigned char* in, int *inlen) 520{ 521 unsigned short* out = (unsigned short*) outb; 522 const unsigned char* processed = in; 523 const unsigned char *const instart = in; 524 unsigned short* outstart= out; 525 unsigned short* outend; 526 const unsigned char* inend; 527 unsigned int c, d; 528 int trailing; 529 unsigned char *tmp; 530 unsigned short tmp1, tmp2; 531 532 /* UTF16LE encoding has no BOM */ 533 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); 534 if (in == NULL) { 535 *outlen = 0; 536 *inlen = 0; 537 return(0); 538 } 539 inend= in + *inlen; 540 outend = out + (*outlen / 2); 541 while (in < inend) { 542 d= *in++; 543 if (d < 0x80) { c= d; trailing= 0; } 544 else if (d < 0xC0) { 545 /* trailing byte in leading position */ 546 *outlen = (out - outstart) * 2; 547 *inlen = processed - instart; 548 return(-2); 549 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } 550 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } 551 else if (d < 0xF8) { c= d & 0x07; trailing= 3; } 552 else { 553 /* no chance for this in UTF-16 */ 554 *outlen = (out - outstart) * 2; 555 *inlen = processed - instart; 556 return(-2); 557 } 558 559 if (inend - in < trailing) { 560 break; 561 } 562 563 for ( ; trailing; trailing--) { 564 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) 565 break; 566 c <<= 6; 567 c |= d & 0x3F; 568 } 569 570 /* assertion: c is a single UTF-4 value */ 571 if (c < 0x10000) { 572 if (out >= outend) 573 break; 574 if (xmlLittleEndian) { 575 *out++ = c; 576 } else { 577 tmp = (unsigned char *) out; 578 *tmp = c ; 579 *(tmp + 1) = c >> 8 ; 580 out++; 581 } 582 } 583 else if (c < 0x110000) { 584 if (out+1 >= outend) 585 break; 586 c -= 0x10000; 587 if (xmlLittleEndian) { 588 *out++ = 0xD800 | (c >> 10); 589 *out++ = 0xDC00 | (c & 0x03FF); 590 } else { 591 tmp1 = 0xD800 | (c >> 10); 592 tmp = (unsigned char *) out; 593 *tmp = (unsigned char) tmp1; 594 *(tmp + 1) = tmp1 >> 8; 595 out++; 596 597 tmp2 = 0xDC00 | (c & 0x03FF); 598 tmp = (unsigned char *) out; 599 *tmp = (unsigned char) tmp2; 600 *(tmp + 1) = tmp2 >> 8; 601 out++; 602 } 603 } 604 else 605 break; 606 processed = in; 607 } 608 *outlen = (out - outstart) * 2; 609 *inlen = processed - instart; 610 return(*outlen); 611} 612 613/** 614 * UTF8ToUTF16: 615 * @outb: a pointer to an array of bytes to store the result 616 * @outlen: the length of @outb 617 * @in: a pointer to an array of UTF-8 chars 618 * @inlen: the length of @in 619 * 620 * Take a block of UTF-8 chars in and try to convert it to an UTF-16 621 * block of chars out. 622 * 623 * Returns the number of bytes written, or -1 if lack of space, or -2 624 * if the transcoding failed. 625 */ 626static int 627UTF8ToUTF16(unsigned char* outb, int *outlen, 628 const unsigned char* in, int *inlen) 629{ 630 if (in == NULL) { 631 /* 632 * initialization, add the Byte Order Mark for UTF-16LE 633 */ 634 if (*outlen >= 2) { 635 outb[0] = 0xFF; 636 outb[1] = 0xFE; 637 *outlen = 2; 638 *inlen = 0; 639#ifdef DEBUG_ENCODING 640 xmlGenericError(xmlGenericErrorContext, 641 "Added FFFE Byte Order Mark\n"); 642#endif 643 return(2); 644 } 645 *outlen = 0; 646 *inlen = 0; 647 return(0); 648 } 649 return (UTF8ToUTF16LE(outb, outlen, in, inlen)); 650} 651#endif /* LIBXML_OUTPUT_ENABLED */ 652 653/** 654 * UTF16BEToUTF8: 655 * @out: a pointer to an array of bytes to store the result 656 * @outlen: the length of @out 657 * @inb: a pointer to an array of UTF-16 passed as a byte array 658 * @inlenb: the length of @in in UTF-16 chars 659 * 660 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8 661 * block of chars out. This function assumes the endian property 662 * is the same between the native type of this machine and the 663 * inputed one. 664 * 665 * Returns the number of bytes written, or -1 if lack of space, or -2 666 * if the transcoding fails (if *in is not a valid utf16 string) 667 * The value of *inlen after return is the number of octets consumed 668 * if the return value is positive, else unpredictable. 669 */ 670static int 671UTF16BEToUTF8(unsigned char* out, int *outlen, 672 const unsigned char* inb, int *inlenb) 673{ 674 unsigned char* outstart = out; 675 const unsigned char* processed = inb; 676 unsigned char* outend = out + *outlen; 677 unsigned short* in = (unsigned short*) inb; 678 unsigned short* inend; 679 unsigned int c, d, inlen; 680 unsigned char *tmp; 681 int bits; 682 683 if ((*inlenb % 2) == 1) 684 (*inlenb)--; 685 inlen = *inlenb / 2; 686 inend= in + inlen; 687 while (in < inend) { 688 if (xmlLittleEndian) { 689 tmp = (unsigned char *) in; 690 c = *tmp++; 691 c = c << 8; 692 c = c | (unsigned int) *tmp; 693 in++; 694 } else { 695 c= *in++; 696 } 697 if ((c & 0xFC00) == 0xD800) { /* surrogates */ 698 if (in >= inend) { /* (in > inend) shouldn't happens */ 699 *outlen = out - outstart; 700 *inlenb = processed - inb; 701 return(-2); 702 } 703 if (xmlLittleEndian) { 704 tmp = (unsigned char *) in; 705 d = *tmp++; 706 d = d << 8; 707 d = d | (unsigned int) *tmp; 708 in++; 709 } else { 710 d= *in++; 711 } 712 if ((d & 0xFC00) == 0xDC00) { 713 c &= 0x03FF; 714 c <<= 10; 715 c |= d & 0x03FF; 716 c += 0x10000; 717 } 718 else { 719 *outlen = out - outstart; 720 *inlenb = processed - inb; 721 return(-2); 722 } 723 } 724 725 /* assertion: c is a single UTF-4 value */ 726 if (out >= outend) 727 break; 728 if (c < 0x80) { *out++= c; bits= -6; } 729 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; } 730 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; } 731 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; } 732 733 for ( ; bits >= 0; bits-= 6) { 734 if (out >= outend) 735 break; 736 *out++= ((c >> bits) & 0x3F) | 0x80; 737 } 738 processed = (const unsigned char*) in; 739 } 740 *outlen = out - outstart; 741 *inlenb = processed - inb; 742 return(*outlen); 743} 744 745#ifdef LIBXML_OUTPUT_ENABLED 746/** 747 * UTF8ToUTF16BE: 748 * @outb: a pointer to an array of bytes to store the result 749 * @outlen: the length of @outb 750 * @in: a pointer to an array of UTF-8 chars 751 * @inlen: the length of @in 752 * 753 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE 754 * block of chars out. 755 * 756 * Returns the number of byte written, or -1 by lack of space, or -2 757 * if the transcoding failed. 758 */ 759static int 760UTF8ToUTF16BE(unsigned char* outb, int *outlen, 761 const unsigned char* in, int *inlen) 762{ 763 unsigned short* out = (unsigned short*) outb; 764 const unsigned char* processed = in; 765 const unsigned char *const instart = in; 766 unsigned short* outstart= out; 767 unsigned short* outend; 768 const unsigned char* inend; 769 unsigned int c, d; 770 int trailing; 771 unsigned char *tmp; 772 unsigned short tmp1, tmp2; 773 774 /* UTF-16BE has no BOM */ 775 if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); 776 if (in == NULL) { 777 *outlen = 0; 778 *inlen = 0; 779 return(0); 780 } 781 inend= in + *inlen; 782 outend = out + (*outlen / 2); 783 while (in < inend) { 784 d= *in++; 785 if (d < 0x80) { c= d; trailing= 0; } 786 else if (d < 0xC0) { 787 /* trailing byte in leading position */ 788 *outlen = out - outstart; 789 *inlen = processed - instart; 790 return(-2); 791 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } 792 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } 793 else if (d < 0xF8) { c= d & 0x07; trailing= 3; } 794 else { 795 /* no chance for this in UTF-16 */ 796 *outlen = out - outstart; 797 *inlen = processed - instart; 798 return(-2); 799 } 800 801 if (inend - in < trailing) { 802 break; 803 } 804 805 for ( ; trailing; trailing--) { 806 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break; 807 c <<= 6; 808 c |= d & 0x3F; 809 } 810 811 /* assertion: c is a single UTF-4 value */ 812 if (c < 0x10000) { 813 if (out >= outend) break; 814 if (xmlLittleEndian) { 815 tmp = (unsigned char *) out; 816 *tmp = c >> 8; 817 *(tmp + 1) = c; 818 out++; 819 } else { 820 *out++ = c; 821 } 822 } 823 else if (c < 0x110000) { 824 if (out+1 >= outend) break; 825 c -= 0x10000; 826 if (xmlLittleEndian) { 827 tmp1 = 0xD800 | (c >> 10); 828 tmp = (unsigned char *) out; 829 *tmp = tmp1 >> 8; 830 *(tmp + 1) = (unsigned char) tmp1; 831 out++; 832 833 tmp2 = 0xDC00 | (c & 0x03FF); 834 tmp = (unsigned char *) out; 835 *tmp = tmp2 >> 8; 836 *(tmp + 1) = (unsigned char) tmp2; 837 out++; 838 } else { 839 *out++ = 0xD800 | (c >> 10); 840 *out++ = 0xDC00 | (c & 0x03FF); 841 } 842 } 843 else 844 break; 845 processed = in; 846 } 847 *outlen = (out - outstart) * 2; 848 *inlen = processed - instart; 849 return(*outlen); 850} 851#endif /* LIBXML_OUTPUT_ENABLED */ 852 853/************************************************************************ 854 * * 855 * Generic encoding handling routines * 856 * * 857 ************************************************************************/ 858 859/** 860 * xmlDetectCharEncoding: 861 * @in: a pointer to the first bytes of the XML entity, must be at least 862 * 2 bytes long (at least 4 if encoding is UTF4 variant). 863 * @len: pointer to the length of the buffer 864 * 865 * Guess the encoding of the entity using the first bytes of the entity content 866 * according to the non-normative appendix F of the XML-1.0 recommendation. 867 * 868 * Returns one of the XML_CHAR_ENCODING_... values. 869 */ 870xmlCharEncoding 871xmlDetectCharEncoding(const unsigned char* in, int len) 872{ 873 if (in == NULL) 874 return(XML_CHAR_ENCODING_NONE); 875 if (len >= 4) { 876 if ((in[0] == 0x00) && (in[1] == 0x00) && 877 (in[2] == 0x00) && (in[3] == 0x3C)) 878 return(XML_CHAR_ENCODING_UCS4BE); 879 if ((in[0] == 0x3C) && (in[1] == 0x00) && 880 (in[2] == 0x00) && (in[3] == 0x00)) 881 return(XML_CHAR_ENCODING_UCS4LE); 882 if ((in[0] == 0x00) && (in[1] == 0x00) && 883 (in[2] == 0x3C) && (in[3] == 0x00)) 884 return(XML_CHAR_ENCODING_UCS4_2143); 885 if ((in[0] == 0x00) && (in[1] == 0x3C) && 886 (in[2] == 0x00) && (in[3] == 0x00)) 887 return(XML_CHAR_ENCODING_UCS4_3412); 888 if ((in[0] == 0x4C) && (in[1] == 0x6F) && 889 (in[2] == 0xA7) && (in[3] == 0x94)) 890 return(XML_CHAR_ENCODING_EBCDIC); 891 if ((in[0] == 0x3C) && (in[1] == 0x3F) && 892 (in[2] == 0x78) && (in[3] == 0x6D)) 893 return(XML_CHAR_ENCODING_UTF8); 894 /* 895 * Although not part of the recommendation, we also 896 * attempt an "auto-recognition" of UTF-16LE and 897 * UTF-16BE encodings. 898 */ 899 if ((in[0] == 0x3C) && (in[1] == 0x00) && 900 (in[2] == 0x3F) && (in[3] == 0x00)) 901 return(XML_CHAR_ENCODING_UTF16LE); 902 if ((in[0] == 0x00) && (in[1] == 0x3C) && 903 (in[2] == 0x00) && (in[3] == 0x3F)) 904 return(XML_CHAR_ENCODING_UTF16BE); 905 } 906 if (len >= 3) { 907 /* 908 * Errata on XML-1.0 June 20 2001 909 * We now allow an UTF8 encoded BOM 910 */ 911 if ((in[0] == 0xEF) && (in[1] == 0xBB) && 912 (in[2] == 0xBF)) 913 return(XML_CHAR_ENCODING_UTF8); 914 } 915 /* For UTF-16 we can recognize by the BOM */ 916 if (len >= 2) { 917 if ((in[0] == 0xFE) && (in[1] == 0xFF)) 918 return(XML_CHAR_ENCODING_UTF16BE); 919 if ((in[0] == 0xFF) && (in[1] == 0xFE)) 920 return(XML_CHAR_ENCODING_UTF16LE); 921 } 922 return(XML_CHAR_ENCODING_NONE); 923} 924 925/** 926 * xmlCleanupEncodingAliases: 927 * 928 * Unregisters all aliases 929 */ 930void 931xmlCleanupEncodingAliases(void) { 932 int i; 933 934 if (xmlCharEncodingAliases == NULL) 935 return; 936 937 for (i = 0;i < xmlCharEncodingAliasesNb;i++) { 938 if (xmlCharEncodingAliases[i].name != NULL) 939 xmlFree((char *) xmlCharEncodingAliases[i].name); 940 if (xmlCharEncodingAliases[i].alias != NULL) 941 xmlFree((char *) xmlCharEncodingAliases[i].alias); 942 } 943 xmlCharEncodingAliasesNb = 0; 944 xmlCharEncodingAliasesMax = 0; 945 xmlFree(xmlCharEncodingAliases); 946 xmlCharEncodingAliases = NULL; 947} 948 949/** 950 * xmlGetEncodingAlias: 951 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually) 952 * 953 * Lookup an encoding name for the given alias. 954 * 955 * Returns NULL if not found, otherwise the original name 956 */ 957const char * 958xmlGetEncodingAlias(const char *alias) { 959 int i; 960 char upper[100]; 961 962 if (alias == NULL) 963 return(NULL); 964 965 if (xmlCharEncodingAliases == NULL) 966 return(NULL); 967 968 for (i = 0;i < 99;i++) { 969 upper[i] = toupper(alias[i]); 970 if (upper[i] == 0) break; 971 } 972 upper[i] = 0; 973 974 /* 975 * Walk down the list looking for a definition of the alias 976 */ 977 for (i = 0;i < xmlCharEncodingAliasesNb;i++) { 978 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) { 979 return(xmlCharEncodingAliases[i].name); 980 } 981 } 982 return(NULL); 983} 984 985/** 986 * xmlAddEncodingAlias: 987 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually) 988 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually) 989 * 990 * Registers an alias @alias for an encoding named @name. Existing alias 991 * will be overwritten. 992 * 993 * Returns 0 in case of success, -1 in case of error 994 */ 995int 996xmlAddEncodingAlias(const char *name, const char *alias) { 997 int i; 998 char upper[100]; 999 1000 if ((name == NULL) || (alias == NULL)) 1001 return(-1); 1002 1003 for (i = 0;i < 99;i++) { 1004 upper[i] = toupper(alias[i]); 1005 if (upper[i] == 0) break; 1006 } 1007 upper[i] = 0; 1008 1009 if (xmlCharEncodingAliases == NULL) { 1010 xmlCharEncodingAliasesNb = 0; 1011 xmlCharEncodingAliasesMax = 20; 1012 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr) 1013 xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias)); 1014 if (xmlCharEncodingAliases == NULL) 1015 return(-1); 1016 } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) { 1017 xmlCharEncodingAliasesMax *= 2; 1018 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr) 1019 xmlRealloc(xmlCharEncodingAliases, 1020 xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias)); 1021 } 1022 /* 1023 * Walk down the list looking for a definition of the alias 1024 */ 1025 for (i = 0;i < xmlCharEncodingAliasesNb;i++) { 1026 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) { 1027 /* 1028 * Replace the definition. 1029 */ 1030 xmlFree((char *) xmlCharEncodingAliases[i].name); 1031 xmlCharEncodingAliases[i].name = xmlMemStrdup(name); 1032 return(0); 1033 } 1034 } 1035 /* 1036 * Add the definition 1037 */ 1038 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name); 1039 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper); 1040 xmlCharEncodingAliasesNb++; 1041 return(0); 1042} 1043 1044/** 1045 * xmlDelEncodingAlias: 1046 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually) 1047 * 1048 * Unregisters an encoding alias @alias 1049 * 1050 * Returns 0 in case of success, -1 in case of error 1051 */ 1052int 1053xmlDelEncodingAlias(const char *alias) { 1054 int i; 1055 1056 if (alias == NULL) 1057 return(-1); 1058 1059 if (xmlCharEncodingAliases == NULL) 1060 return(-1); 1061 /* 1062 * Walk down the list looking for a definition of the alias 1063 */ 1064 for (i = 0;i < xmlCharEncodingAliasesNb;i++) { 1065 if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) { 1066 xmlFree((char *) xmlCharEncodingAliases[i].name); 1067 xmlFree((char *) xmlCharEncodingAliases[i].alias); 1068 xmlCharEncodingAliasesNb--; 1069 memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1], 1070 sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i)); 1071 return(0); 1072 } 1073 } 1074 return(-1); 1075} 1076 1077/** 1078 * xmlParseCharEncoding: 1079 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually) 1080 * 1081 * Compare the string to the encoding schemes already known. Note 1082 * that the comparison is case insensitive accordingly to the section 1083 * [XML] 4.3.3 Character Encoding in Entities. 1084 * 1085 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE 1086 * if not recognized. 1087 */ 1088xmlCharEncoding 1089xmlParseCharEncoding(const char* name) 1090{ 1091 const char *alias; 1092 char upper[500]; 1093 int i; 1094 1095 if (name == NULL) 1096 return(XML_CHAR_ENCODING_NONE); 1097 1098 /* 1099 * Do the alias resolution 1100 */ 1101 alias = xmlGetEncodingAlias(name); 1102 if (alias != NULL) 1103 name = alias; 1104 1105 for (i = 0;i < 499;i++) { 1106 upper[i] = toupper(name[i]); 1107 if (upper[i] == 0) break; 1108 } 1109 upper[i] = 0; 1110 1111 if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE); 1112 if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8); 1113 if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8); 1114 1115 /* 1116 * NOTE: if we were able to parse this, the endianness of UTF16 is 1117 * already found and in use 1118 */ 1119 if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE); 1120 if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE); 1121 1122 if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2); 1123 if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2); 1124 if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2); 1125 1126 /* 1127 * NOTE: if we were able to parse this, the endianness of UCS4 is 1128 * already found and in use 1129 */ 1130 if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE); 1131 if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE); 1132 if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE); 1133 1134 1135 if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1); 1136 if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1); 1137 if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1); 1138 1139 if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2); 1140 if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2); 1141 if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2); 1142 1143 if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3); 1144 if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4); 1145 if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5); 1146 if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6); 1147 if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7); 1148 if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8); 1149 if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9); 1150 1151 if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP); 1152 if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS); 1153 if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP); 1154 1155#ifdef DEBUG_ENCODING 1156 xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name); 1157#endif 1158 return(XML_CHAR_ENCODING_ERROR); 1159} 1160 1161/** 1162 * xmlGetCharEncodingName: 1163 * @enc: the encoding 1164 * 1165 * The "canonical" name for XML encoding. 1166 * C.f. http://www.w3.org/TR/REC-xml#charencoding 1167 * Section 4.3.3 Character Encoding in Entities 1168 * 1169 * Returns the canonical name for the given encoding 1170 */ 1171 1172const char* 1173xmlGetCharEncodingName(xmlCharEncoding enc) { 1174 switch (enc) { 1175 case XML_CHAR_ENCODING_ERROR: 1176 return(NULL); 1177 case XML_CHAR_ENCODING_NONE: 1178 return(NULL); 1179 case XML_CHAR_ENCODING_UTF8: 1180 return("UTF-8"); 1181 case XML_CHAR_ENCODING_UTF16LE: 1182 return("UTF-16"); 1183 case XML_CHAR_ENCODING_UTF16BE: 1184 return("UTF-16"); 1185 case XML_CHAR_ENCODING_EBCDIC: 1186 return("EBCDIC"); 1187 case XML_CHAR_ENCODING_UCS4LE: 1188 return("ISO-10646-UCS-4"); 1189 case XML_CHAR_ENCODING_UCS4BE: 1190 return("ISO-10646-UCS-4"); 1191 case XML_CHAR_ENCODING_UCS4_2143: 1192 return("ISO-10646-UCS-4"); 1193 case XML_CHAR_ENCODING_UCS4_3412: 1194 return("ISO-10646-UCS-4"); 1195 case XML_CHAR_ENCODING_UCS2: 1196 return("ISO-10646-UCS-2"); 1197 case XML_CHAR_ENCODING_8859_1: 1198 return("ISO-8859-1"); 1199 case XML_CHAR_ENCODING_8859_2: 1200 return("ISO-8859-2"); 1201 case XML_CHAR_ENCODING_8859_3: 1202 return("ISO-8859-3"); 1203 case XML_CHAR_ENCODING_8859_4: 1204 return("ISO-8859-4"); 1205 case XML_CHAR_ENCODING_8859_5: 1206 return("ISO-8859-5"); 1207 case XML_CHAR_ENCODING_8859_6: 1208 return("ISO-8859-6"); 1209 case XML_CHAR_ENCODING_8859_7: 1210 return("ISO-8859-7"); 1211 case XML_CHAR_ENCODING_8859_8: 1212 return("ISO-8859-8"); 1213 case XML_CHAR_ENCODING_8859_9: 1214 return("ISO-8859-9"); 1215 case XML_CHAR_ENCODING_2022_JP: 1216 return("ISO-2022-JP"); 1217 case XML_CHAR_ENCODING_SHIFT_JIS: 1218 return("Shift-JIS"); 1219 case XML_CHAR_ENCODING_EUC_JP: 1220 return("EUC-JP"); 1221 case XML_CHAR_ENCODING_ASCII: 1222 return(NULL); 1223 } 1224 return(NULL); 1225} 1226 1227/************************************************************************ 1228 * * 1229 * Char encoding handlers * 1230 * * 1231 ************************************************************************/ 1232 1233 1234/* the size should be growable, but it's not a big deal ... */ 1235#define MAX_ENCODING_HANDLERS 50 1236static xmlCharEncodingHandlerPtr *handlers = NULL; 1237static int nbCharEncodingHandler = 0; 1238 1239/* 1240 * The default is UTF-8 for XML, that's also the default used for the 1241 * parser internals, so the default encoding handler is NULL 1242 */ 1243 1244static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL; 1245 1246/** 1247 * xmlNewCharEncodingHandler: 1248 * @name: the encoding name, in UTF-8 format (ASCII actually) 1249 * @input: the xmlCharEncodingInputFunc to read that encoding 1250 * @output: the xmlCharEncodingOutputFunc to write that encoding 1251 * 1252 * Create and registers an xmlCharEncodingHandler. 1253 * 1254 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error). 1255 */ 1256xmlCharEncodingHandlerPtr 1257xmlNewCharEncodingHandler(const char *name, 1258 xmlCharEncodingInputFunc input, 1259 xmlCharEncodingOutputFunc output) { 1260 xmlCharEncodingHandlerPtr handler; 1261 const char *alias; 1262 char upper[500]; 1263 int i; 1264 char *up = NULL; 1265 1266 /* 1267 * Do the alias resolution 1268 */ 1269 alias = xmlGetEncodingAlias(name); 1270 if (alias != NULL) 1271 name = alias; 1272 1273 /* 1274 * Keep only the uppercase version of the encoding. 1275 */ 1276 if (name == NULL) { 1277 xmlEncodingErr(XML_I18N_NO_NAME, 1278 "xmlNewCharEncodingHandler : no name !\n", NULL); 1279 return(NULL); 1280 } 1281 for (i = 0;i < 499;i++) { 1282 upper[i] = toupper(name[i]); 1283 if (upper[i] == 0) break; 1284 } 1285 upper[i] = 0; 1286 up = xmlMemStrdup(upper); 1287 if (up == NULL) { 1288 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n"); 1289 return(NULL); 1290 } 1291 1292 /* 1293 * allocate and fill-up an handler block. 1294 */ 1295 handler = (xmlCharEncodingHandlerPtr) 1296 xmlMalloc(sizeof(xmlCharEncodingHandler)); 1297 if (handler == NULL) { 1298 xmlFree(up); 1299 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n"); 1300 return(NULL); 1301 } 1302 handler->input = input; 1303 handler->output = output; 1304 handler->name = up; 1305 1306#ifdef LIBXML_ICONV_ENABLED 1307 handler->iconv_in = NULL; 1308 handler->iconv_out = NULL; 1309#endif /* LIBXML_ICONV_ENABLED */ 1310 1311 /* 1312 * registers and returns the handler. 1313 */ 1314 xmlRegisterCharEncodingHandler(handler); 1315#ifdef DEBUG_ENCODING 1316 xmlGenericError(xmlGenericErrorContext, 1317 "Registered encoding handler for %s\n", name); 1318#endif 1319 return(handler); 1320} 1321 1322/** 1323 * xmlInitCharEncodingHandlers: 1324 * 1325 * Initialize the char encoding support, it registers the default 1326 * encoding supported. 1327 * NOTE: while public, this function usually doesn't need to be called 1328 * in normal processing. 1329 */ 1330void 1331xmlInitCharEncodingHandlers(void) { 1332 unsigned short int tst = 0x1234; 1333 unsigned char *ptr = (unsigned char *) &tst; 1334 1335 if (handlers != NULL) return; 1336 1337 handlers = (xmlCharEncodingHandlerPtr *) 1338 xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr)); 1339 1340 if (*ptr == 0x12) xmlLittleEndian = 0; 1341 else if (*ptr == 0x34) xmlLittleEndian = 1; 1342 else { 1343 xmlEncodingErr(XML_ERR_INTERNAL_ERROR, 1344 "Odd problem at endianness detection\n", NULL); 1345 } 1346 1347 if (handlers == NULL) { 1348 xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n"); 1349 return; 1350 } 1351 xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8); 1352#ifdef LIBXML_OUTPUT_ENABLED 1353 xmlUTF16LEHandler = 1354 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE); 1355 xmlUTF16BEHandler = 1356 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE); 1357 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16); 1358 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1); 1359 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii); 1360 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii); 1361#ifdef LIBXML_HTML_ENABLED 1362 xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml); 1363#endif 1364#else 1365 xmlUTF16LEHandler = 1366 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL); 1367 xmlUTF16BEHandler = 1368 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL); 1369 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL); 1370 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL); 1371 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL); 1372 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL); 1373#endif /* LIBXML_OUTPUT_ENABLED */ 1374#ifndef LIBXML_ICONV_ENABLED 1375#ifdef LIBXML_ISO8859X_ENABLED 1376 xmlRegisterCharEncodingHandlersISO8859x (); 1377#endif 1378#endif 1379 1380} 1381 1382/** 1383 * xmlCleanupCharEncodingHandlers: 1384 * 1385 * Cleanup the memory allocated for the char encoding support, it 1386 * unregisters all the encoding handlers and the aliases. 1387 */ 1388void 1389xmlCleanupCharEncodingHandlers(void) { 1390 xmlCleanupEncodingAliases(); 1391 1392 if (handlers == NULL) return; 1393 1394 for (;nbCharEncodingHandler > 0;) { 1395 nbCharEncodingHandler--; 1396 if (handlers[nbCharEncodingHandler] != NULL) { 1397 if (handlers[nbCharEncodingHandler]->name != NULL) 1398 xmlFree(handlers[nbCharEncodingHandler]->name); 1399 xmlFree(handlers[nbCharEncodingHandler]); 1400 } 1401 } 1402 xmlFree(handlers); 1403 handlers = NULL; 1404 nbCharEncodingHandler = 0; 1405 xmlDefaultCharEncodingHandler = NULL; 1406} 1407 1408/** 1409 * xmlRegisterCharEncodingHandler: 1410 * @handler: the xmlCharEncodingHandlerPtr handler block 1411 * 1412 * Register the char encoding handler, surprising, isn't it ? 1413 */ 1414void 1415xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) { 1416 if (handlers == NULL) xmlInitCharEncodingHandlers(); 1417 if ((handler == NULL) || (handlers == NULL)) { 1418 xmlEncodingErr(XML_I18N_NO_HANDLER, 1419 "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL); 1420 return; 1421 } 1422 1423 if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) { 1424 xmlEncodingErr(XML_I18N_EXCESS_HANDLER, 1425 "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n", 1426 "MAX_ENCODING_HANDLERS"); 1427 return; 1428 } 1429 handlers[nbCharEncodingHandler++] = handler; 1430} 1431 1432/** 1433 * xmlGetCharEncodingHandler: 1434 * @enc: an xmlCharEncoding value. 1435 * 1436 * Search in the registered set the handler able to read/write that encoding. 1437 * 1438 * Returns the handler or NULL if not found 1439 */ 1440xmlCharEncodingHandlerPtr 1441xmlGetCharEncodingHandler(xmlCharEncoding enc) { 1442 xmlCharEncodingHandlerPtr handler; 1443 1444 if (handlers == NULL) xmlInitCharEncodingHandlers(); 1445 switch (enc) { 1446 case XML_CHAR_ENCODING_ERROR: 1447 return(NULL); 1448 case XML_CHAR_ENCODING_NONE: 1449 return(NULL); 1450 case XML_CHAR_ENCODING_UTF8: 1451 return(NULL); 1452 case XML_CHAR_ENCODING_UTF16LE: 1453 return(xmlUTF16LEHandler); 1454 case XML_CHAR_ENCODING_UTF16BE: 1455 return(xmlUTF16BEHandler); 1456 case XML_CHAR_ENCODING_EBCDIC: 1457 handler = xmlFindCharEncodingHandler("EBCDIC"); 1458 if (handler != NULL) return(handler); 1459 handler = xmlFindCharEncodingHandler("ebcdic"); 1460 if (handler != NULL) return(handler); 1461 handler = xmlFindCharEncodingHandler("EBCDIC-US"); 1462 if (handler != NULL) return(handler); 1463 break; 1464 case XML_CHAR_ENCODING_UCS4BE: 1465 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4"); 1466 if (handler != NULL) return(handler); 1467 handler = xmlFindCharEncodingHandler("UCS-4"); 1468 if (handler != NULL) return(handler); 1469 handler = xmlFindCharEncodingHandler("UCS4"); 1470 if (handler != NULL) return(handler); 1471 break; 1472 case XML_CHAR_ENCODING_UCS4LE: 1473 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4"); 1474 if (handler != NULL) return(handler); 1475 handler = xmlFindCharEncodingHandler("UCS-4"); 1476 if (handler != NULL) return(handler); 1477 handler = xmlFindCharEncodingHandler("UCS4"); 1478 if (handler != NULL) return(handler); 1479 break; 1480 case XML_CHAR_ENCODING_UCS4_2143: 1481 break; 1482 case XML_CHAR_ENCODING_UCS4_3412: 1483 break; 1484 case XML_CHAR_ENCODING_UCS2: 1485 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2"); 1486 if (handler != NULL) return(handler); 1487 handler = xmlFindCharEncodingHandler("UCS-2"); 1488 if (handler != NULL) return(handler); 1489 handler = xmlFindCharEncodingHandler("UCS2"); 1490 if (handler != NULL) return(handler); 1491 break; 1492 1493 /* 1494 * We used to keep ISO Latin encodings native in the 1495 * generated data. This led to so many problems that 1496 * this has been removed. One can still change this 1497 * back by registering no-ops encoders for those 1498 */ 1499 case XML_CHAR_ENCODING_8859_1: 1500 handler = xmlFindCharEncodingHandler("ISO-8859-1"); 1501 if (handler != NULL) return(handler); 1502 break; 1503 case XML_CHAR_ENCODING_8859_2: 1504 handler = xmlFindCharEncodingHandler("ISO-8859-2"); 1505 if (handler != NULL) return(handler); 1506 break; 1507 case XML_CHAR_ENCODING_8859_3: 1508 handler = xmlFindCharEncodingHandler("ISO-8859-3"); 1509 if (handler != NULL) return(handler); 1510 break; 1511 case XML_CHAR_ENCODING_8859_4: 1512 handler = xmlFindCharEncodingHandler("ISO-8859-4"); 1513 if (handler != NULL) return(handler); 1514 break; 1515 case XML_CHAR_ENCODING_8859_5: 1516 handler = xmlFindCharEncodingHandler("ISO-8859-5"); 1517 if (handler != NULL) return(handler); 1518 break; 1519 case XML_CHAR_ENCODING_8859_6: 1520 handler = xmlFindCharEncodingHandler("ISO-8859-6"); 1521 if (handler != NULL) return(handler); 1522 break; 1523 case XML_CHAR_ENCODING_8859_7: 1524 handler = xmlFindCharEncodingHandler("ISO-8859-7"); 1525 if (handler != NULL) return(handler); 1526 break; 1527 case XML_CHAR_ENCODING_8859_8: 1528 handler = xmlFindCharEncodingHandler("ISO-8859-8"); 1529 if (handler != NULL) return(handler); 1530 break; 1531 case XML_CHAR_ENCODING_8859_9: 1532 handler = xmlFindCharEncodingHandler("ISO-8859-9"); 1533 if (handler != NULL) return(handler); 1534 break; 1535 1536 1537 case XML_CHAR_ENCODING_2022_JP: 1538 handler = xmlFindCharEncodingHandler("ISO-2022-JP"); 1539 if (handler != NULL) return(handler); 1540 break; 1541 case XML_CHAR_ENCODING_SHIFT_JIS: 1542 handler = xmlFindCharEncodingHandler("SHIFT-JIS"); 1543 if (handler != NULL) return(handler); 1544 handler = xmlFindCharEncodingHandler("SHIFT_JIS"); 1545 if (handler != NULL) return(handler); 1546 handler = xmlFindCharEncodingHandler("Shift_JIS"); 1547 if (handler != NULL) return(handler); 1548 break; 1549 case XML_CHAR_ENCODING_EUC_JP: 1550 handler = xmlFindCharEncodingHandler("EUC-JP"); 1551 if (handler != NULL) return(handler); 1552 break; 1553 default: 1554 break; 1555 } 1556 1557#ifdef DEBUG_ENCODING 1558 xmlGenericError(xmlGenericErrorContext, 1559 "No handler found for encoding %d\n", enc); 1560#endif 1561 return(NULL); 1562} 1563 1564/** 1565 * xmlFindCharEncodingHandler: 1566 * @name: a string describing the char encoding. 1567 * 1568 * Search in the registered set the handler able to read/write that encoding. 1569 * 1570 * Returns the handler or NULL if not found 1571 */ 1572xmlCharEncodingHandlerPtr 1573xmlFindCharEncodingHandler(const char *name) { 1574 const char *nalias; 1575 const char *norig; 1576 xmlCharEncoding alias; 1577#ifdef LIBXML_ICONV_ENABLED 1578 xmlCharEncodingHandlerPtr enc; 1579 iconv_t icv_in, icv_out; 1580#endif /* LIBXML_ICONV_ENABLED */ 1581 char upper[100]; 1582 int i; 1583 1584 if (handlers == NULL) xmlInitCharEncodingHandlers(); 1585 if (name == NULL) return(xmlDefaultCharEncodingHandler); 1586 if (name[0] == 0) return(xmlDefaultCharEncodingHandler); 1587 1588 /* 1589 * Do the alias resolution 1590 */ 1591 norig = name; 1592 nalias = xmlGetEncodingAlias(name); 1593 if (nalias != NULL) 1594 name = nalias; 1595 1596 /* 1597 * Check first for directly registered encoding names 1598 */ 1599 for (i = 0;i < 99;i++) { 1600 upper[i] = toupper(name[i]); 1601 if (upper[i] == 0) break; 1602 } 1603 upper[i] = 0; 1604 1605 if (handlers != NULL) { 1606 for (i = 0;i < nbCharEncodingHandler; i++) { 1607 if (!strcmp(upper, handlers[i]->name)) { 1608#ifdef DEBUG_ENCODING 1609 xmlGenericError(xmlGenericErrorContext, 1610 "Found registered handler for encoding %s\n", name); 1611#endif 1612 return(handlers[i]); 1613 } 1614 } 1615 } 1616 1617#ifdef LIBXML_ICONV_ENABLED 1618 /* check whether iconv can handle this */ 1619 icv_in = iconv_open("UTF-8", name); 1620 icv_out = iconv_open(name, "UTF-8"); 1621 if (icv_in == (iconv_t) -1) { 1622 icv_in = iconv_open("UTF-8", upper); 1623 } 1624 if (icv_out == (iconv_t) -1) { 1625 icv_out = iconv_open(upper, "UTF-8"); 1626 } 1627 if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) { 1628 enc = (xmlCharEncodingHandlerPtr) 1629 xmlMalloc(sizeof(xmlCharEncodingHandler)); 1630 if (enc == NULL) { 1631 iconv_close(icv_in); 1632 iconv_close(icv_out); 1633 return(NULL); 1634 } 1635 enc->name = xmlMemStrdup(name); 1636 enc->input = NULL; 1637 enc->output = NULL; 1638 enc->iconv_in = icv_in; 1639 enc->iconv_out = icv_out; 1640#ifdef DEBUG_ENCODING 1641 xmlGenericError(xmlGenericErrorContext, 1642 "Found iconv handler for encoding %s\n", name); 1643#endif 1644 return enc; 1645 } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) { 1646 xmlEncodingErr(XML_ERR_INTERNAL_ERROR, 1647 "iconv : problems with filters for '%s'\n", name); 1648 } 1649#endif /* LIBXML_ICONV_ENABLED */ 1650 1651#ifdef DEBUG_ENCODING 1652 xmlGenericError(xmlGenericErrorContext, 1653 "No handler found for encoding %s\n", name); 1654#endif 1655 1656 /* 1657 * Fallback using the canonical names 1658 */ 1659 alias = xmlParseCharEncoding(norig); 1660 if (alias != XML_CHAR_ENCODING_ERROR) { 1661 const char* canon; 1662 canon = xmlGetCharEncodingName(alias); 1663 if ((canon != NULL) && (strcmp(name, canon))) { 1664 return(xmlFindCharEncodingHandler(canon)); 1665 } 1666 } 1667 1668 /* If "none of the above", give up */ 1669 return(NULL); 1670} 1671 1672/************************************************************************ 1673 * * 1674 * ICONV based generic conversion functions * 1675 * * 1676 ************************************************************************/ 1677 1678#ifdef LIBXML_ICONV_ENABLED 1679/** 1680 * xmlIconvWrapper: 1681 * @cd: iconv converter data structure 1682 * @out: a pointer to an array of bytes to store the result 1683 * @outlen: the length of @out 1684 * @in: a pointer to an array of ISO Latin 1 chars 1685 * @inlen: the length of @in 1686 * 1687 * Returns 0 if success, or 1688 * -1 by lack of space, or 1689 * -2 if the transcoding fails (for *in is not valid utf8 string or 1690 * the result of transformation can't fit into the encoding we want), or 1691 * -3 if there the last byte can't form a single output char. 1692 * 1693 * The value of @inlen after return is the number of octets consumed 1694 * as the return value is positive, else unpredictable. 1695 * The value of @outlen after return is the number of ocetes consumed. 1696 */ 1697static int 1698xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen, 1699 const unsigned char *in, int *inlen) { 1700 size_t icv_inlen, icv_outlen; 1701 const char *icv_in = (const char *) in; 1702 char *icv_out = (char *) out; 1703 int ret; 1704 1705 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) { 1706 if (outlen != NULL) *outlen = 0; 1707 return(-1); 1708 } 1709 icv_inlen = *inlen; 1710 icv_outlen = *outlen; 1711 ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen); 1712 *inlen -= icv_inlen; 1713 *outlen -= icv_outlen; 1714 if ((icv_inlen != 0) || (ret == -1)) { 1715#ifdef EILSEQ 1716 if (errno == EILSEQ) { 1717 return -2; 1718 } else 1719#endif 1720#ifdef E2BIG 1721 if (errno == E2BIG) { 1722 return -1; 1723 } else 1724#endif 1725#ifdef EINVAL 1726 if (errno == EINVAL) { 1727 return -3; 1728 } else 1729#endif 1730 { 1731 return -3; 1732 } 1733 } 1734 return 0; 1735} 1736#endif /* LIBXML_ICONV_ENABLED */ 1737 1738/************************************************************************ 1739 * * 1740 * The real API used by libxml for on-the-fly conversion * 1741 * * 1742 ************************************************************************/ 1743int 1744xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out, 1745 xmlBufferPtr in, int len); 1746 1747/** 1748 * xmlCharEncFirstLineInt: 1749 * @handler: char enconding transformation data structure 1750 * @out: an xmlBuffer for the output. 1751 * @in: an xmlBuffer for the input 1752 * @len: number of bytes to convert for the first line, or -1 1753 * 1754 * Front-end for the encoding handler input function, but handle only 1755 * the very first line, i.e. limit itself to 45 chars. 1756 * 1757 * Returns the number of byte written if success, or 1758 * -1 general error 1759 * -2 if the transcoding fails (for *in is not valid utf8 string or 1760 * the result of transformation can't fit into the encoding we want), or 1761 */ 1762int 1763xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out, 1764 xmlBufferPtr in, int len) { 1765 int ret = -2; 1766 int written; 1767 int toconv; 1768 1769 if (handler == NULL) return(-1); 1770 if (out == NULL) return(-1); 1771 if (in == NULL) return(-1); 1772 1773 /* calculate space available */ 1774 written = out->size - out->use; 1775 toconv = in->use; 1776 /* 1777 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38 1778 * 45 chars should be sufficient to reach the end of the encoding 1779 * declaration without going too far inside the document content. 1780 * on UTF-16 this means 90bytes, on UCS4 this means 180 1781 * The actual value depending on guessed encoding is passed as @len 1782 * if provided 1783 */ 1784 if (len >= 0) { 1785 if (toconv > len) 1786 toconv = len; 1787 } else { 1788 if (toconv > 180) 1789 toconv = 180; 1790 } 1791 if (toconv * 2 >= written) { 1792 xmlBufferGrow(out, toconv); 1793 written = out->size - out->use - 1; 1794 } 1795 1796 if (handler->input != NULL) { 1797 ret = handler->input(&out->content[out->use], &written, 1798 in->content, &toconv); 1799 xmlBufferShrink(in, toconv); 1800 out->use += written; 1801 out->content[out->use] = 0; 1802 } 1803#ifdef LIBXML_ICONV_ENABLED 1804 else if (handler->iconv_in != NULL) { 1805 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use], 1806 &written, in->content, &toconv); 1807 xmlBufferShrink(in, toconv); 1808 out->use += written; 1809 out->content[out->use] = 0; 1810 if (ret == -1) ret = -3; 1811 } 1812#endif /* LIBXML_ICONV_ENABLED */ 1813#ifdef DEBUG_ENCODING 1814 switch (ret) { 1815 case 0: 1816 xmlGenericError(xmlGenericErrorContext, 1817 "converted %d bytes to %d bytes of input\n", 1818 toconv, written); 1819 break; 1820 case -1: 1821 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n", 1822 toconv, written, in->use); 1823 break; 1824 case -2: 1825 xmlGenericError(xmlGenericErrorContext, 1826 "input conversion failed due to input error\n"); 1827 break; 1828 case -3: 1829 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n", 1830 toconv, written, in->use); 1831 break; 1832 default: 1833 xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret); 1834 } 1835#endif /* DEBUG_ENCODING */ 1836 /* 1837 * Ignore when input buffer is not on a boundary 1838 */ 1839 if (ret == -3) ret = 0; 1840 if (ret == -1) ret = 0; 1841 return(ret); 1842} 1843 1844/** 1845 * xmlCharEncFirstLine: 1846 * @handler: char enconding transformation data structure 1847 * @out: an xmlBuffer for the output. 1848 * @in: an xmlBuffer for the input 1849 * 1850 * Front-end for the encoding handler input function, but handle only 1851 * the very first line, i.e. limit itself to 45 chars. 1852 * 1853 * Returns the number of byte written if success, or 1854 * -1 general error 1855 * -2 if the transcoding fails (for *in is not valid utf8 string or 1856 * the result of transformation can't fit into the encoding we want), or 1857 */ 1858int 1859xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out, 1860 xmlBufferPtr in) { 1861 return(xmlCharEncFirstLineInt(handler, out, in, -1)); 1862} 1863 1864/** 1865 * xmlCharEncInFunc: 1866 * @handler: char encoding transformation data structure 1867 * @out: an xmlBuffer for the output. 1868 * @in: an xmlBuffer for the input 1869 * 1870 * Generic front-end for the encoding handler input function 1871 * 1872 * Returns the number of byte written if success, or 1873 * -1 general error 1874 * -2 if the transcoding fails (for *in is not valid utf8 string or 1875 * the result of transformation can't fit into the encoding we want), or 1876 */ 1877int 1878xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out, 1879 xmlBufferPtr in) 1880{ 1881 int ret = -2; 1882 int written; 1883 int toconv; 1884 1885 if (handler == NULL) 1886 return (-1); 1887 if (out == NULL) 1888 return (-1); 1889 if (in == NULL) 1890 return (-1); 1891 1892 toconv = in->use; 1893 if (toconv == 0) 1894 return (0); 1895 written = out->size - out->use; 1896 if (toconv * 2 >= written) { 1897 xmlBufferGrow(out, out->size + toconv * 2); 1898 written = out->size - out->use - 1; 1899 } 1900 if (handler->input != NULL) { 1901 ret = handler->input(&out->content[out->use], &written, 1902 in->content, &toconv); 1903 xmlBufferShrink(in, toconv); 1904 out->use += written; 1905 out->content[out->use] = 0; 1906 } 1907#ifdef LIBXML_ICONV_ENABLED 1908 else if (handler->iconv_in != NULL) { 1909 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use], 1910 &written, in->content, &toconv); 1911 xmlBufferShrink(in, toconv); 1912 out->use += written; 1913 out->content[out->use] = 0; 1914 if (ret == -1) 1915 ret = -3; 1916 } 1917#endif /* LIBXML_ICONV_ENABLED */ 1918 switch (ret) { 1919 case 0: 1920#ifdef DEBUG_ENCODING 1921 xmlGenericError(xmlGenericErrorContext, 1922 "converted %d bytes to %d bytes of input\n", 1923 toconv, written); 1924#endif 1925 break; 1926 case -1: 1927#ifdef DEBUG_ENCODING 1928 xmlGenericError(xmlGenericErrorContext, 1929 "converted %d bytes to %d bytes of input, %d left\n", 1930 toconv, written, in->use); 1931#endif 1932 break; 1933 case -3: 1934#ifdef DEBUG_ENCODING 1935 xmlGenericError(xmlGenericErrorContext, 1936 "converted %d bytes to %d bytes of input, %d left\n", 1937 toconv, written, in->use); 1938#endif 1939 break; 1940 case -2: { 1941 char buf[50]; 1942 1943 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", 1944 in->content[0], in->content[1], 1945 in->content[2], in->content[3]); 1946 buf[49] = 0; 1947 xmlEncodingErr(XML_I18N_CONV_FAILED, 1948 "input conversion failed due to input error, bytes %s\n", 1949 buf); 1950 } 1951 } 1952 /* 1953 * Ignore when input buffer is not on a boundary 1954 */ 1955 if (ret == -3) 1956 ret = 0; 1957 return (written? written : ret); 1958} 1959 1960/** 1961 * xmlCharEncOutFunc: 1962 * @handler: char enconding transformation data structure 1963 * @out: an xmlBuffer for the output. 1964 * @in: an xmlBuffer for the input 1965 * 1966 * Generic front-end for the encoding handler output function 1967 * a first call with @in == NULL has to be made firs to initiate the 1968 * output in case of non-stateless encoding needing to initiate their 1969 * state or the output (like the BOM in UTF16). 1970 * In case of UTF8 sequence conversion errors for the given encoder, 1971 * the content will be automatically remapped to a CharRef sequence. 1972 * 1973 * Returns the number of byte written if success, or 1974 * -1 general error 1975 * -2 if the transcoding fails (for *in is not valid utf8 string or 1976 * the result of transformation can't fit into the encoding we want), or 1977 */ 1978int 1979xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out, 1980 xmlBufferPtr in) { 1981 int ret = -2; 1982 int written; 1983 int writtentot = 0; 1984 int toconv; 1985 int output = 0; 1986 1987 if (handler == NULL) return(-1); 1988 if (out == NULL) return(-1); 1989 1990retry: 1991 1992 written = out->size - out->use; 1993 1994 if (written > 0) 1995 written--; /* Gennady: count '/0' */ 1996 1997 /* 1998 * First specific handling of in = NULL, i.e. the initialization call 1999 */ 2000 if (in == NULL) { 2001 toconv = 0; 2002 if (handler->output != NULL) { 2003 ret = handler->output(&out->content[out->use], &written, 2004 NULL, &toconv); 2005 if (ret >= 0) { /* Gennady: check return value */ 2006 out->use += written; 2007 out->content[out->use] = 0; 2008 } 2009 } 2010#ifdef LIBXML_ICONV_ENABLED 2011 else if (handler->iconv_out != NULL) { 2012 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use], 2013 &written, NULL, &toconv); 2014 out->use += written; 2015 out->content[out->use] = 0; 2016 } 2017#endif /* LIBXML_ICONV_ENABLED */ 2018#ifdef DEBUG_ENCODING 2019 xmlGenericError(xmlGenericErrorContext, 2020 "initialized encoder\n"); 2021#endif 2022 return(0); 2023 } 2024 2025 /* 2026 * Conversion itself. 2027 */ 2028 toconv = in->use; 2029 if (toconv == 0) 2030 return(0); 2031 if (toconv * 4 >= written) { 2032 xmlBufferGrow(out, toconv * 4); 2033 written = out->size - out->use - 1; 2034 } 2035 if (handler->output != NULL) { 2036 ret = handler->output(&out->content[out->use], &written, 2037 in->content, &toconv); 2038 if (written > 0) { 2039 xmlBufferShrink(in, toconv); 2040 out->use += written; 2041 writtentot += written; 2042 } 2043 out->content[out->use] = 0; 2044 } 2045#ifdef LIBXML_ICONV_ENABLED 2046 else if (handler->iconv_out != NULL) { 2047 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use], 2048 &written, in->content, &toconv); 2049 xmlBufferShrink(in, toconv); 2050 out->use += written; 2051 writtentot += written; 2052 out->content[out->use] = 0; 2053 if (ret == -1) { 2054 if (written > 0) { 2055 /* 2056 * Can be a limitation of iconv 2057 */ 2058 goto retry; 2059 } 2060 ret = -3; 2061 } 2062 } 2063#endif /* LIBXML_ICONV_ENABLED */ 2064 else { 2065 xmlEncodingErr(XML_I18N_NO_OUTPUT, 2066 "xmlCharEncOutFunc: no output function !\n", NULL); 2067 return(-1); 2068 } 2069 2070 if (ret >= 0) output += ret; 2071 2072 /* 2073 * Attempt to handle error cases 2074 */ 2075 switch (ret) { 2076 case 0: 2077#ifdef DEBUG_ENCODING 2078 xmlGenericError(xmlGenericErrorContext, 2079 "converted %d bytes to %d bytes of output\n", 2080 toconv, written); 2081#endif 2082 break; 2083 case -1: 2084#ifdef DEBUG_ENCODING 2085 xmlGenericError(xmlGenericErrorContext, 2086 "output conversion failed by lack of space\n"); 2087#endif 2088 break; 2089 case -3: 2090#ifdef DEBUG_ENCODING 2091 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n", 2092 toconv, written, in->use); 2093#endif 2094 break; 2095 case -2: { 2096 int len = in->use; 2097 const xmlChar *utf = (const xmlChar *) in->content; 2098 int cur; 2099 2100 cur = xmlGetUTF8Char(utf, &len); 2101 if (cur > 0) { 2102 xmlChar charref[20]; 2103 2104#ifdef DEBUG_ENCODING 2105 xmlGenericError(xmlGenericErrorContext, 2106 "handling output conversion error\n"); 2107 xmlGenericError(xmlGenericErrorContext, 2108 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 2109 in->content[0], in->content[1], 2110 in->content[2], in->content[3]); 2111#endif 2112 /* 2113 * Removes the UTF8 sequence, and replace it by a charref 2114 * and continue the transcoding phase, hoping the error 2115 * did not mangle the encoder state. 2116 */ 2117 snprintf((char *) &charref[0], sizeof(charref), "&#%d;", cur); 2118 xmlBufferShrink(in, len); 2119 xmlBufferAddHead(in, charref, -1); 2120 2121 goto retry; 2122 } else { 2123 char buf[50]; 2124 2125 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", 2126 in->content[0], in->content[1], 2127 in->content[2], in->content[3]); 2128 buf[49] = 0; 2129 xmlEncodingErr(XML_I18N_CONV_FAILED, 2130 "output conversion failed due to conv error, bytes %s\n", 2131 buf); 2132 if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE) 2133 in->content[0] = ' '; 2134 } 2135 break; 2136 } 2137 } 2138 return(ret); 2139} 2140 2141/** 2142 * xmlCharEncCloseFunc: 2143 * @handler: char enconding transformation data structure 2144 * 2145 * Generic front-end for encoding handler close function 2146 * 2147 * Returns 0 if success, or -1 in case of error 2148 */ 2149int 2150xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) { 2151 int ret = 0; 2152 if (handler == NULL) return(-1); 2153 if (handler->name == NULL) return(-1); 2154#ifdef LIBXML_ICONV_ENABLED 2155 /* 2156 * Iconv handlers can be used only once, free the whole block. 2157 * and the associated icon resources. 2158 */ 2159 if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) { 2160 if (handler->name != NULL) 2161 xmlFree(handler->name); 2162 handler->name = NULL; 2163 if (handler->iconv_out != NULL) { 2164 if (iconv_close(handler->iconv_out)) 2165 ret = -1; 2166 handler->iconv_out = NULL; 2167 } 2168 if (handler->iconv_in != NULL) { 2169 if (iconv_close(handler->iconv_in)) 2170 ret = -1; 2171 handler->iconv_in = NULL; 2172 } 2173 xmlFree(handler); 2174 } 2175#endif /* LIBXML_ICONV_ENABLED */ 2176#ifdef DEBUG_ENCODING 2177 if (ret) 2178 xmlGenericError(xmlGenericErrorContext, 2179 "failed to close the encoding handler\n"); 2180 else 2181 xmlGenericError(xmlGenericErrorContext, 2182 "closed the encoding handler\n"); 2183#endif 2184 2185 return(ret); 2186} 2187 2188/** 2189 * xmlByteConsumed: 2190 * @ctxt: an XML parser context 2191 * 2192 * This function provides the current index of the parser relative 2193 * to the start of the current entity. This function is computed in 2194 * bytes from the beginning starting at zero and finishing at the 2195 * size in byte of the file if parsing a file. The function is 2196 * of constant cost if the input is UTF-8 but can be costly if run 2197 * on non-UTF-8 input. 2198 * 2199 * Returns the index in bytes from the beginning of the entity or -1 2200 * in case the index could not be computed. 2201 */ 2202long 2203xmlByteConsumed(xmlParserCtxtPtr ctxt) { 2204 xmlParserInputPtr in; 2205 2206 if (ctxt == NULL) return(-1); 2207 in = ctxt->input; 2208 if (in == NULL) return(-1); 2209 if ((in->buf != NULL) && (in->buf->encoder != NULL)) { 2210 unsigned int unused = 0; 2211 xmlCharEncodingHandler * handler = in->buf->encoder; 2212 /* 2213 * Encoding conversion, compute the number of unused original 2214 * bytes from the input not consumed and substract that from 2215 * the raw consumed value, this is not a cheap operation 2216 */ 2217 if (in->end - in->cur > 0) { 2218 unsigned char convbuf[32000]; 2219 const unsigned char *cur = (const unsigned char *)in->cur; 2220 int toconv = in->end - in->cur, written = 32000; 2221 2222 int ret; 2223 2224 if (handler->output != NULL) { 2225 do { 2226 toconv = in->end - cur; 2227 written = 32000; 2228 ret = handler->output(&convbuf[0], &written, 2229 cur, &toconv); 2230 if (ret == -1) return(-1); 2231 unused += written; 2232 cur += toconv; 2233 } while (ret == -2); 2234#ifdef LIBXML_ICONV_ENABLED 2235 } else if (handler->iconv_out != NULL) { 2236 do { 2237 toconv = in->end - cur; 2238 written = 32000; 2239 ret = xmlIconvWrapper(handler->iconv_out, &convbuf[0], 2240 &written, cur, &toconv); 2241 if (ret < 0) { 2242 if (written > 0) 2243 ret = -2; 2244 else 2245 return(-1); 2246 } 2247 unused += written; 2248 cur += toconv; 2249 } while (ret == -2); 2250#endif 2251 } else { 2252 /* could not find a converter */ 2253 return(-1); 2254 } 2255 } 2256 if (in->buf->rawconsumed < unused) 2257 return(-1); 2258 return(in->buf->rawconsumed - unused); 2259 } 2260 return(in->consumed + (in->cur - in->base)); 2261} 2262 2263#ifndef LIBXML_ICONV_ENABLED 2264#ifdef LIBXML_ISO8859X_ENABLED 2265 2266/** 2267 * UTF8ToISO8859x: 2268 * @out: a pointer to an array of bytes to store the result 2269 * @outlen: the length of @out 2270 * @in: a pointer to an array of UTF-8 chars 2271 * @inlen: the length of @in 2272 * @xlattable: the 2-level transcoding table 2273 * 2274 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-* 2275 * block of chars out. 2276 * 2277 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise 2278 * The value of @inlen after return is the number of octets consumed 2279 * as the return value is positive, else unpredictable. 2280 * The value of @outlen after return is the number of ocetes consumed. 2281 */ 2282static int 2283UTF8ToISO8859x(unsigned char* out, int *outlen, 2284 const unsigned char* in, int *inlen, 2285 unsigned char const *xlattable) { 2286 const unsigned char* outstart = out; 2287 const unsigned char* inend; 2288 const unsigned char* instart = in; 2289 2290 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || 2291 (xlattable == NULL)) 2292 return(-1); 2293 if (in == NULL) { 2294 /* 2295 * initialization nothing to do 2296 */ 2297 *outlen = 0; 2298 *inlen = 0; 2299 return(0); 2300 } 2301 inend = in + (*inlen); 2302 while (in < inend) { 2303 unsigned char d = *in++; 2304 if (d < 0x80) { 2305 *out++ = d; 2306 } else if (d < 0xC0) { 2307 /* trailing byte in leading position */ 2308 *outlen = out - outstart; 2309 *inlen = in - instart - 1; 2310 return(-2); 2311 } else if (d < 0xE0) { 2312 unsigned char c; 2313 if (!(in < inend)) { 2314 /* trailing byte not in input buffer */ 2315 *outlen = out - outstart; 2316 *inlen = in - instart - 1; 2317 return(-2); 2318 } 2319 c = *in++; 2320 if ((c & 0xC0) != 0x80) { 2321 /* not a trailing byte */ 2322 *outlen = out - outstart; 2323 *inlen = in - instart - 2; 2324 return(-2); 2325 } 2326 c = c & 0x3F; 2327 d = d & 0x1F; 2328 d = xlattable [48 + c + xlattable [d] * 64]; 2329 if (d == 0) { 2330 /* not in character set */ 2331 *outlen = out - outstart; 2332 *inlen = in - instart - 2; 2333 return(-2); 2334 } 2335 *out++ = d; 2336 } else if (d < 0xF0) { 2337 unsigned char c1; 2338 unsigned char c2; 2339 if (!(in < inend - 1)) { 2340 /* trailing bytes not in input buffer */ 2341 *outlen = out - outstart; 2342 *inlen = in - instart - 1; 2343 return(-2); 2344 } 2345 c1 = *in++; 2346 if ((c1 & 0xC0) != 0x80) { 2347 /* not a trailing byte (c1) */ 2348 *outlen = out - outstart; 2349 *inlen = in - instart - 2; 2350 return(-2); 2351 } 2352 c2 = *in++; 2353 if ((c2 & 0xC0) != 0x80) { 2354 /* not a trailing byte (c2) */ 2355 *outlen = out - outstart; 2356 *inlen = in - instart - 2; 2357 return(-2); 2358 } 2359 c1 = c1 & 0x3F; 2360 c2 = c2 & 0x3F; 2361 d = d & 0x0F; 2362 d = xlattable [48 + c2 + xlattable [48 + c1 + 2363 xlattable [32 + d] * 64] * 64]; 2364 if (d == 0) { 2365 /* not in character set */ 2366 *outlen = out - outstart; 2367 *inlen = in - instart - 3; 2368 return(-2); 2369 } 2370 *out++ = d; 2371 } else { 2372 /* cannot transcode >= U+010000 */ 2373 *outlen = out - outstart; 2374 *inlen = in - instart - 1; 2375 return(-2); 2376 } 2377 } 2378 *outlen = out - outstart; 2379 *inlen = in - instart; 2380 return(*outlen); 2381} 2382 2383/** 2384 * ISO8859xToUTF8 2385 * @out: a pointer to an array of bytes to store the result 2386 * @outlen: the length of @out 2387 * @in: a pointer to an array of ISO Latin 1 chars 2388 * @inlen: the length of @in 2389 * 2390 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8 2391 * block of chars out. 2392 * Returns 0 if success, or -1 otherwise 2393 * The value of @inlen after return is the number of octets consumed 2394 * The value of @outlen after return is the number of ocetes produced. 2395 */ 2396static int 2397ISO8859xToUTF8(unsigned char* out, int *outlen, 2398 const unsigned char* in, int *inlen, 2399 unsigned short const *unicodetable) { 2400 unsigned char* outstart = out; 2401 unsigned char* outend; 2402 const unsigned char* instart = in; 2403 const unsigned char* inend; 2404 const unsigned char* instop; 2405 unsigned int c; 2406 2407 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || 2408 (in == NULL) || (unicodetable == NULL)) 2409 return(-1); 2410 outend = out + *outlen; 2411 inend = in + *inlen; 2412 instop = inend; 2413 c = *in; 2414 while (in < inend && out < outend - 1) { 2415 if (c >= 0x80) { 2416 c = unicodetable [c - 0x80]; 2417 if (c == 0) { 2418 /* undefined code point */ 2419 *outlen = out - outstart; 2420 *inlen = in - instart; 2421 return (-1); 2422 } 2423 if (c < 0x800) { 2424 *out++ = ((c >> 6) & 0x1F) | 0xC0; 2425 *out++ = (c & 0x3F) | 0x80; 2426 } else { 2427 *out++ = ((c >> 12) & 0x0F) | 0xE0; 2428 *out++ = ((c >> 6) & 0x3F) | 0x80; 2429 *out++ = (c & 0x3F) | 0x80; 2430 } 2431 ++in; 2432 c = *in; 2433 } 2434 if (instop - in > outend - out) instop = in + (outend - out); 2435 while (c < 0x80 && in < instop) { 2436 *out++ = c; 2437 ++in; 2438 c = *in; 2439 } 2440 } 2441 if (in < inend && out < outend && c < 0x80) { 2442 *out++ = c; 2443 ++in; 2444 } 2445 *outlen = out - outstart; 2446 *inlen = in - instart; 2447 return (*outlen); 2448} 2449 2450 2451/************************************************************************ 2452 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding * 2453 ************************************************************************/ 2454 2455static unsigned short const xmlunicodetable_ISO8859_2 [128] = { 2456 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 2457 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 2458 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 2459 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 2460 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7, 2461 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b, 2462 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7, 2463 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c, 2464 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7, 2465 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e, 2466 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7, 2467 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df, 2468 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7, 2469 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f, 2470 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7, 2471 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9, 2472}; 2473 2474static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = { 2475 "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00" 2476 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2477 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2478 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2479 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2480 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2481 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2482 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 2483 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 2484 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00" 2485 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00" 2486 "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef" 2487 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00" 2488 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2489 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00" 2490 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00" 2491 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00" 2492 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2493 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2494 "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00" 2495 "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba" 2496 "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9" 2497 "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00" 2498 "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00" 2499 "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf" 2500 "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00" 2501 "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00" 2502}; 2503 2504static unsigned short const xmlunicodetable_ISO8859_3 [128] = { 2505 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 2506 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 2507 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 2508 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 2509 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7, 2510 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b, 2511 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7, 2512 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c, 2513 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7, 2514 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 2515 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7, 2516 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df, 2517 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7, 2518 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 2519 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7, 2520 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9, 2521}; 2522 2523static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = { 2524 "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00" 2525 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2526 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2527 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2528 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2529 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2530 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2531 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 2532 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 2533 "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00" 2534 "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00" 2535 "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00" 2536 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb" 2537 "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00" 2538 "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2539 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2540 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00" 2541 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2542 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2543 "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2544 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2545 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2546 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2547 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2548 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba" 2549 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00" 2550 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00" 2551 "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 2552 "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf" 2553 "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 2554 "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00" 2555}; 2556 2557static unsigned short const xmlunicodetable_ISO8859_4 [128] = { 2558 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 2559 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 2560 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 2561 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 2562 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7, 2563 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af, 2564 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7, 2565 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b, 2566 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e, 2567 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a, 2568 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 2569 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df, 2570 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f, 2571 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b, 2572 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 2573 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9, 2574}; 2575 2576static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = { 2577 "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00" 2578 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2579 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2580 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2581 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2582 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2583 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2584 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 2585 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 2586 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf" 2587 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00" 2588 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00" 2589 "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00" 2590 "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7" 2591 "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00" 2592 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00" 2593 "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00" 2594 "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00" 2595 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00" 2596 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00" 2597 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00" 2598 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2599 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2600 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00" 2601 "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf" 2602 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00" 2603 "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00" 2604}; 2605 2606static unsigned short const xmlunicodetable_ISO8859_5 [128] = { 2607 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 2608 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 2609 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 2610 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 2611 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407, 2612 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f, 2613 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 2614 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, 2615 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 2616 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f, 2617 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 2618 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f, 2619 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 2620 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f, 2621 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457, 2622 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f, 2623}; 2624 2625static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = { 2626 "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2627 "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2628 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2629 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2630 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2631 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2632 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2633 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 2634 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 2635 "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00" 2636 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2637 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf" 2638 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" 2639 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 2640 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" 2641 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 2642 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff" 2643 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2644 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2645 "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2646 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2647 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2648 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2649 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2650 "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2651 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2652 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2653}; 2654 2655static unsigned short const xmlunicodetable_ISO8859_6 [128] = { 2656 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 2657 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 2658 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 2659 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 2660 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000, 2661 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000, 2662 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 2663 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f, 2664 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627, 2665 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f, 2666 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637, 2667 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 2668 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647, 2669 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f, 2670 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 2671 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 2672}; 2673 2674static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = { 2675 "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2676 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00" 2677 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2678 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2679 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2680 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2681 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2682 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 2683 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 2684 "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00" 2685 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2686 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2687 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2688 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2689 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2690 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00" 2691 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf" 2692 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 2693 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00" 2694 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 2695 "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2696 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2697 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2698}; 2699 2700static unsigned short const xmlunicodetable_ISO8859_7 [128] = { 2701 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 2702 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 2703 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 2704 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 2705 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7, 2706 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015, 2707 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7, 2708 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f, 2709 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 2710 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f, 2711 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7, 2712 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af, 2713 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7, 2714 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf, 2715 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7, 2716 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000, 2717}; 2718 2719static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = { 2720 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06" 2721 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2722 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2723 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2724 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2725 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2726 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2727 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 2728 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 2729 "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00" 2730 "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00" 2731 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2732 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2733 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2734 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2735 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2736 "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00" 2737 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2738 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2739 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2740 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2741 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2742 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2743 "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf" 2744 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 2745 "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" 2746 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 2747 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00" 2748 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2749 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2750 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2751}; 2752 2753static unsigned short const xmlunicodetable_ISO8859_8 [128] = { 2754 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 2755 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 2756 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 2757 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 2758 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 2759 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 2760 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 2761 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000, 2762 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 2763 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 2764 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 2765 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017, 2766 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7, 2767 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df, 2768 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7, 2769 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000, 2770}; 2771 2772static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = { 2773 "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2774 "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00" 2775 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2776 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2777 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2778 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2779 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2780 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 2781 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 2782 "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf" 2783 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00" 2784 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2785 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2786 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2787 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2788 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2789 "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00" 2790 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2791 "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00" 2792 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2793 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2794 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2795 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2796 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe" 2797 "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00" 2798 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2799 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2800 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2801 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 2802 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00" 2803 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2804}; 2805 2806static unsigned short const xmlunicodetable_ISO8859_9 [128] = { 2807 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 2808 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 2809 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 2810 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 2811 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 2812 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 2813 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 2814 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, 2815 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 2816 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 2817 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 2818 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df, 2819 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 2820 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 2821 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 2822 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff, 2823}; 2824 2825static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = { 2826 "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2827 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2828 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2829 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2830 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2831 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2832 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2833 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 2834 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 2835 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf" 2836 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" 2837 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 2838 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf" 2839 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 2840 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff" 2841 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2842 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0" 2843 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2844 "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2845 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2846 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe" 2847 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2848 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2849}; 2850 2851static unsigned short const xmlunicodetable_ISO8859_10 [128] = { 2852 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 2853 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 2854 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 2855 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 2856 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7, 2857 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a, 2858 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7, 2859 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b, 2860 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e, 2861 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf, 2862 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168, 2863 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, 2864 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f, 2865 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef, 2866 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169, 2867 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138, 2868}; 2869 2870static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = { 2871 "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2872 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2873 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2874 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2875 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2876 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2877 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2878 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 2879 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 2880 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00" 2881 "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00" 2882 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00" 2883 "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00" 2884 "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7" 2885 "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00" 2886 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00" 2887 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2888 "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00" 2889 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00" 2890 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2891 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2892 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2893 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2894 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2895 "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2896 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2897 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2898 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf" 2899 "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf" 2900 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef" 2901 "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00" 2902}; 2903 2904static unsigned short const xmlunicodetable_ISO8859_11 [128] = { 2905 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 2906 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 2907 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 2908 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 2909 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07, 2910 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f, 2911 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17, 2912 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f, 2913 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27, 2914 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f, 2915 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37, 2916 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f, 2917 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47, 2918 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f, 2919 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57, 2920 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000, 2921}; 2922 2923static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = { 2924 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2925 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2926 "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2927 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2928 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2929 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2930 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2931 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 2932 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 2933 "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2934 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2935 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2936 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2937 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2938 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00" 2939 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf" 2940 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" 2941 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 2942 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf" 2943 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2944 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2945 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2946 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2947 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 2948 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00" 2949 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2950 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2951}; 2952 2953static unsigned short const xmlunicodetable_ISO8859_13 [128] = { 2954 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 2955 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 2956 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 2957 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 2958 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7, 2959 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6, 2960 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7, 2961 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6, 2962 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112, 2963 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b, 2964 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7, 2965 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df, 2966 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113, 2967 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c, 2968 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7, 2969 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019, 2970}; 2971 2972static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = { 2973 "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2974 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2975 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2976 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2977 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2978 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2979 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2980 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 2981 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 2982 "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00" 2983 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00" 2984 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2985 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2986 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2987 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2988 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2989 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00" 2990 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2991 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2992 "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00" 2993 "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf" 2994 "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00" 2995 "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00" 2996 "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00" 2997 "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00" 2998 "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00" 2999 "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00" 3000 "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00" 3001 "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00" 3002 "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1" 3003 "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00" 3004}; 3005 3006static unsigned short const xmlunicodetable_ISO8859_14 [128] = { 3007 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3008 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3009 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3010 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3011 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7, 3012 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178, 3013 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56, 3014 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61, 3015 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 3016 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 3017 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a, 3018 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df, 3019 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 3020 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 3021 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b, 3022 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff, 3023}; 3024 3025static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = { 3026 "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3027 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3028 "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3029 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3030 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3031 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3032 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3033 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3034 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3035 "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00" 3036 "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3037 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3038 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3039 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3040 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00" 3041 "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00" 3042 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1" 3043 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3044 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3045 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00" 3046 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3047 "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3048 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3049 "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3050 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3051 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3052 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3053 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3054 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3055 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3056 "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3057 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3058 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3059 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3060 "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00" 3061 "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3062 "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00" 3063 "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00" 3064 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3065 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3066 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf" 3067 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3068 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff" 3069}; 3070 3071static unsigned short const xmlunicodetable_ISO8859_15 [128] = { 3072 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3073 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3074 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3075 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3076 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7, 3077 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 3078 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7, 3079 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf, 3080 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 3081 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 3082 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 3083 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, 3084 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 3085 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 3086 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 3087 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, 3088}; 3089 3090static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = { 3091 "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3092 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3093 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3094 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3095 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3096 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3097 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3098 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3099 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3100 "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf" 3101 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf" 3102 "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3103 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3104 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3105 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3106 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3107 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3108 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00" 3109 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3110 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3111 "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3112 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3113 "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00" 3114 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3115 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" 3116 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3117 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff" 3118}; 3119 3120static unsigned short const xmlunicodetable_ISO8859_16 [128] = { 3121 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3122 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3123 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3124 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3125 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7, 3126 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b, 3127 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7, 3128 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c, 3129 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7, 3130 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 3131 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a, 3132 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df, 3133 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7, 3134 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 3135 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b, 3136 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff, 3137}; 3138 3139static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = { 3140 "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00" 3141 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3142 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3143 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3144 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3145 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3146 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3147 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3148 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3149 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00" 3150 "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00" 3151 "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00" 3152 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00" 3153 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3154 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3155 "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3156 "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00" 3157 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3158 "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00" 3159 "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3160 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3161 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3162 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3163 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3164 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3165 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00" 3166 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3167 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3168 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00" 3169 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3170 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3171 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3172 "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00" 3173 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3174 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3175 "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3176 "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf" 3177 "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3178 "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff" 3179}; 3180 3181 3182/* 3183 * auto-generated functions for ISO-8859-2 .. ISO-8859-16 3184 */ 3185 3186static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen, 3187 const unsigned char* in, int *inlen) { 3188 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2); 3189} 3190static int UTF8ToISO8859_2 (unsigned char* out, int *outlen, 3191 const unsigned char* in, int *inlen) { 3192 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2); 3193} 3194 3195static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen, 3196 const unsigned char* in, int *inlen) { 3197 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3); 3198} 3199static int UTF8ToISO8859_3 (unsigned char* out, int *outlen, 3200 const unsigned char* in, int *inlen) { 3201 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3); 3202} 3203 3204static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen, 3205 const unsigned char* in, int *inlen) { 3206 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4); 3207} 3208static int UTF8ToISO8859_4 (unsigned char* out, int *outlen, 3209 const unsigned char* in, int *inlen) { 3210 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4); 3211} 3212 3213static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen, 3214 const unsigned char* in, int *inlen) { 3215 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5); 3216} 3217static int UTF8ToISO8859_5 (unsigned char* out, int *outlen, 3218 const unsigned char* in, int *inlen) { 3219 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5); 3220} 3221 3222static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen, 3223 const unsigned char* in, int *inlen) { 3224 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6); 3225} 3226static int UTF8ToISO8859_6 (unsigned char* out, int *outlen, 3227 const unsigned char* in, int *inlen) { 3228 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6); 3229} 3230 3231static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen, 3232 const unsigned char* in, int *inlen) { 3233 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7); 3234} 3235static int UTF8ToISO8859_7 (unsigned char* out, int *outlen, 3236 const unsigned char* in, int *inlen) { 3237 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7); 3238} 3239 3240static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen, 3241 const unsigned char* in, int *inlen) { 3242 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8); 3243} 3244static int UTF8ToISO8859_8 (unsigned char* out, int *outlen, 3245 const unsigned char* in, int *inlen) { 3246 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8); 3247} 3248 3249static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen, 3250 const unsigned char* in, int *inlen) { 3251 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9); 3252} 3253static int UTF8ToISO8859_9 (unsigned char* out, int *outlen, 3254 const unsigned char* in, int *inlen) { 3255 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9); 3256} 3257 3258static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen, 3259 const unsigned char* in, int *inlen) { 3260 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10); 3261} 3262static int UTF8ToISO8859_10 (unsigned char* out, int *outlen, 3263 const unsigned char* in, int *inlen) { 3264 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10); 3265} 3266 3267static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen, 3268 const unsigned char* in, int *inlen) { 3269 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11); 3270} 3271static int UTF8ToISO8859_11 (unsigned char* out, int *outlen, 3272 const unsigned char* in, int *inlen) { 3273 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11); 3274} 3275 3276static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen, 3277 const unsigned char* in, int *inlen) { 3278 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13); 3279} 3280static int UTF8ToISO8859_13 (unsigned char* out, int *outlen, 3281 const unsigned char* in, int *inlen) { 3282 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13); 3283} 3284 3285static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen, 3286 const unsigned char* in, int *inlen) { 3287 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14); 3288} 3289static int UTF8ToISO8859_14 (unsigned char* out, int *outlen, 3290 const unsigned char* in, int *inlen) { 3291 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14); 3292} 3293 3294static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen, 3295 const unsigned char* in, int *inlen) { 3296 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15); 3297} 3298static int UTF8ToISO8859_15 (unsigned char* out, int *outlen, 3299 const unsigned char* in, int *inlen) { 3300 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15); 3301} 3302 3303static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen, 3304 const unsigned char* in, int *inlen) { 3305 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16); 3306} 3307static int UTF8ToISO8859_16 (unsigned char* out, int *outlen, 3308 const unsigned char* in, int *inlen) { 3309 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16); 3310} 3311 3312static void 3313xmlRegisterCharEncodingHandlersISO8859x (void) { 3314 xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2); 3315 xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3); 3316 xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4); 3317 xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5); 3318 xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6); 3319 xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7); 3320 xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8); 3321 xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9); 3322 xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10); 3323 xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11); 3324 xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13); 3325 xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14); 3326 xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15); 3327 xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16); 3328} 3329 3330#endif 3331#endif 3332 3333#define bottom_encoding 3334#include "elfgcchack.h" 3335 3336