1/* 2 * encoding.c : implements the encoding conversion functions needed for XML 3 * 4 * Related specs: 5 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies 6 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau 7 * [ISO-10646] UTF-8 and UTF-16 in Annexes 8 * [ISO-8859-1] ISO Latin-1 characters codes. 9 * [UNICODE] The Unicode Consortium, "The Unicode Standard -- 10 * Worldwide Character Encoding -- Version 1.0", Addison- 11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is 12 * described in Unicode Technical Report #4. 13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for 14 * Information Interchange, ANSI X3.4-1986. 15 * 16 * See Copyright for the status of this software. 17 * 18 * daniel@veillard.com 19 * 20 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org> 21 */ 22 23#define IN_LIBXML 24#include "libxml.h" 25 26#include <string.h> 27#include <limits.h> 28 29#ifdef HAVE_CTYPE_H 30#include <ctype.h> 31#endif 32#ifdef HAVE_STDLIB_H 33#include <stdlib.h> 34#endif 35#ifdef LIBXML_ICONV_ENABLED 36#ifdef HAVE_ERRNO_H 37#include <errno.h> 38#endif 39#endif 40#include <libxml/encoding.h> 41#include <libxml/xmlmemory.h> 42#ifdef LIBXML_HTML_ENABLED 43#include <libxml/HTMLparser.h> 44#endif 45#include <libxml/globals.h> 46#include <libxml/xmlerror.h> 47 48#ifdef LIBXML_ICU_ENABLED 49#include <unicode/ucnv.h> 50#endif 51 52#include "buf.h" 53#include "enc.h" 54 55static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL; 56static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL; 57 58typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias; 59typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr; 60struct _xmlCharEncodingAlias { 61 const char *name; 62 const char *alias; 63}; 64 65static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL; 66static int xmlCharEncodingAliasesNb = 0; 67static int xmlCharEncodingAliasesMax = 0; 68 69#if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED) 70#if 0 71#define DEBUG_ENCODING /* Define this to get encoding traces */ 72#endif 73#else 74#ifdef LIBXML_ISO8859X_ENABLED 75static void xmlRegisterCharEncodingHandlersISO8859x (void); 76#endif 77#endif 78 79static int xmlLittleEndian = 1; 80 81/** 82 * xmlEncodingErrMemory: 83 * @extra: extra informations 84 * 85 * Handle an out of memory condition 86 */ 87static void 88xmlEncodingErrMemory(const char *extra) 89{ 90 __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra); 91} 92 93/** 94 * xmlErrEncoding: 95 * @error: the error number 96 * @msg: the error message 97 * 98 * n encoding error 99 */ 100static void 101xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val) 102{ 103 __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, 104 XML_FROM_I18N, error, XML_ERR_FATAL, 105 NULL, 0, val, NULL, NULL, 0, 0, msg, val); 106} 107 108#ifdef LIBXML_ICU_ENABLED 109static uconv_t* 110openIcuConverter(const char* name, int toUnicode) 111{ 112 UErrorCode status = U_ZERO_ERROR; 113 uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t)); 114 if (conv == NULL) 115 return NULL; 116 117 conv->uconv = ucnv_open(name, &status); 118 if (U_FAILURE(status)) 119 goto error; 120 121 status = U_ZERO_ERROR; 122 if (toUnicode) { 123 ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP, 124 NULL, NULL, NULL, &status); 125 } 126 else { 127 ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP, 128 NULL, NULL, NULL, &status); 129 } 130 if (U_FAILURE(status)) 131 goto error; 132 133 status = U_ZERO_ERROR; 134 conv->utf8 = ucnv_open("UTF-8", &status); 135 if (U_SUCCESS(status)) 136 return conv; 137 138error: 139 if (conv->uconv) 140 ucnv_close(conv->uconv); 141 xmlFree(conv); 142 return NULL; 143} 144 145static void 146closeIcuConverter(uconv_t *conv) 147{ 148 if (conv != NULL) { 149 ucnv_close(conv->uconv); 150 ucnv_close(conv->utf8); 151 xmlFree(conv); 152 } 153} 154#endif /* LIBXML_ICU_ENABLED */ 155 156/************************************************************************ 157 * * 158 * Conversions To/From UTF8 encoding * 159 * * 160 ************************************************************************/ 161 162/** 163 * asciiToUTF8: 164 * @out: a pointer to an array of bytes to store the result 165 * @outlen: the length of @out 166 * @in: a pointer to an array of ASCII chars 167 * @inlen: the length of @in 168 * 169 * Take a block of ASCII chars in and try to convert it to an UTF-8 170 * block of chars out. 171 * Returns 0 if success, or -1 otherwise 172 * The value of @inlen after return is the number of octets consumed 173 * if the return value is positive, else unpredictable. 174 * The value of @outlen after return is the number of octets consumed. 175 */ 176static int 177asciiToUTF8(unsigned char* out, int *outlen, 178 const unsigned char* in, int *inlen) { 179 unsigned char* outstart = out; 180 const unsigned char* base = in; 181 const unsigned char* processed = in; 182 unsigned char* outend = out + *outlen; 183 const unsigned char* inend; 184 unsigned int c; 185 186 inend = in + (*inlen); 187 while ((in < inend) && (out - outstart + 5 < *outlen)) { 188 c= *in++; 189 190 if (out >= outend) 191 break; 192 if (c < 0x80) { 193 *out++ = c; 194 } else { 195 *outlen = out - outstart; 196 *inlen = processed - base; 197 return(-1); 198 } 199 200 processed = (const unsigned char*) in; 201 } 202 *outlen = out - outstart; 203 *inlen = processed - base; 204 return(*outlen); 205} 206 207#ifdef LIBXML_OUTPUT_ENABLED 208/** 209 * UTF8Toascii: 210 * @out: a pointer to an array of bytes to store the result 211 * @outlen: the length of @out 212 * @in: a pointer to an array of UTF-8 chars 213 * @inlen: the length of @in 214 * 215 * Take a block of UTF-8 chars in and try to convert it to an ASCII 216 * block of chars out. 217 * 218 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise 219 * The value of @inlen after return is the number of octets consumed 220 * if the return value is positive, else unpredictable. 221 * The value of @outlen after return is the number of octets consumed. 222 */ 223static int 224UTF8Toascii(unsigned char* out, int *outlen, 225 const unsigned char* in, int *inlen) { 226 const unsigned char* processed = in; 227 const unsigned char* outend; 228 const unsigned char* outstart = out; 229 const unsigned char* instart = in; 230 const unsigned char* inend; 231 unsigned int c, d; 232 int trailing; 233 234 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); 235 if (in == NULL) { 236 /* 237 * initialization nothing to do 238 */ 239 *outlen = 0; 240 *inlen = 0; 241 return(0); 242 } 243 inend = in + (*inlen); 244 outend = out + (*outlen); 245 while (in < inend) { 246 d = *in++; 247 if (d < 0x80) { c= d; trailing= 0; } 248 else if (d < 0xC0) { 249 /* trailing byte in leading position */ 250 *outlen = out - outstart; 251 *inlen = processed - instart; 252 return(-2); 253 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } 254 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } 255 else if (d < 0xF8) { c= d & 0x07; trailing= 3; } 256 else { 257 /* no chance for this in Ascii */ 258 *outlen = out - outstart; 259 *inlen = processed - instart; 260 return(-2); 261 } 262 263 if (inend - in < trailing) { 264 break; 265 } 266 267 for ( ; trailing; trailing--) { 268 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) 269 break; 270 c <<= 6; 271 c |= d & 0x3F; 272 } 273 274 /* assertion: c is a single UTF-4 value */ 275 if (c < 0x80) { 276 if (out >= outend) 277 break; 278 *out++ = c; 279 } else { 280 /* no chance for this in Ascii */ 281 *outlen = out - outstart; 282 *inlen = processed - instart; 283 return(-2); 284 } 285 processed = in; 286 } 287 *outlen = out - outstart; 288 *inlen = processed - instart; 289 return(*outlen); 290} 291#endif /* LIBXML_OUTPUT_ENABLED */ 292 293/** 294 * isolat1ToUTF8: 295 * @out: a pointer to an array of bytes to store the result 296 * @outlen: the length of @out 297 * @in: a pointer to an array of ISO Latin 1 chars 298 * @inlen: the length of @in 299 * 300 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8 301 * block of chars out. 302 * Returns the number of bytes written if success, or -1 otherwise 303 * The value of @inlen after return is the number of octets consumed 304 * if the return value is positive, else unpredictable. 305 * The value of @outlen after return is the number of octets consumed. 306 */ 307int 308isolat1ToUTF8(unsigned char* out, int *outlen, 309 const unsigned char* in, int *inlen) { 310 unsigned char* outstart = out; 311 const unsigned char* base = in; 312 unsigned char* outend; 313 const unsigned char* inend; 314 const unsigned char* instop; 315 316 if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL)) 317 return(-1); 318 319 outend = out + *outlen; 320 inend = in + (*inlen); 321 instop = inend; 322 323 while ((in < inend) && (out < outend - 1)) { 324 if (*in >= 0x80) { 325 *out++ = (((*in) >> 6) & 0x1F) | 0xC0; 326 *out++ = ((*in) & 0x3F) | 0x80; 327 ++in; 328 } 329 if ((instop - in) > (outend - out)) instop = in + (outend - out); 330 while ((in < instop) && (*in < 0x80)) { 331 *out++ = *in++; 332 } 333 } 334 if ((in < inend) && (out < outend) && (*in < 0x80)) { 335 *out++ = *in++; 336 } 337 *outlen = out - outstart; 338 *inlen = in - base; 339 return(*outlen); 340} 341 342/** 343 * UTF8ToUTF8: 344 * @out: a pointer to an array of bytes to store the result 345 * @outlen: the length of @out 346 * @inb: a pointer to an array of UTF-8 chars 347 * @inlenb: the length of @in in UTF-8 chars 348 * 349 * No op copy operation for UTF8 handling. 350 * 351 * Returns the number of bytes written, or -1 if lack of space. 352 * The value of *inlen after return is the number of octets consumed 353 * if the return value is positive, else unpredictable. 354 */ 355static int 356UTF8ToUTF8(unsigned char* out, int *outlen, 357 const unsigned char* inb, int *inlenb) 358{ 359 int len; 360 361 if ((out == NULL) || (inb == NULL) || (outlen == NULL) || (inlenb == NULL)) 362 return(-1); 363 if (*outlen > *inlenb) { 364 len = *inlenb; 365 } else { 366 len = *outlen; 367 } 368 if (len < 0) 369 return(-1); 370 371 memcpy(out, inb, len); 372 373 *outlen = len; 374 *inlenb = len; 375 return(*outlen); 376} 377 378 379#ifdef LIBXML_OUTPUT_ENABLED 380/** 381 * UTF8Toisolat1: 382 * @out: a pointer to an array of bytes to store the result 383 * @outlen: the length of @out 384 * @in: a pointer to an array of UTF-8 chars 385 * @inlen: the length of @in 386 * 387 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1 388 * block of chars out. 389 * 390 * Returns the number of bytes written if success, -2 if the transcoding fails, 391 or -1 otherwise 392 * The value of @inlen after return is the number of octets consumed 393 * if the return value is positive, else unpredictable. 394 * The value of @outlen after return is the number of octets consumed. 395 */ 396int 397UTF8Toisolat1(unsigned char* out, int *outlen, 398 const unsigned char* in, int *inlen) { 399 const unsigned char* processed = in; 400 const unsigned char* outend; 401 const unsigned char* outstart = out; 402 const unsigned char* instart = in; 403 const unsigned char* inend; 404 unsigned int c, d; 405 int trailing; 406 407 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); 408 if (in == NULL) { 409 /* 410 * initialization nothing to do 411 */ 412 *outlen = 0; 413 *inlen = 0; 414 return(0); 415 } 416 inend = in + (*inlen); 417 outend = out + (*outlen); 418 while (in < inend) { 419 d = *in++; 420 if (d < 0x80) { c= d; trailing= 0; } 421 else if (d < 0xC0) { 422 /* trailing byte in leading position */ 423 *outlen = out - outstart; 424 *inlen = processed - instart; 425 return(-2); 426 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } 427 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } 428 else if (d < 0xF8) { c= d & 0x07; trailing= 3; } 429 else { 430 /* no chance for this in IsoLat1 */ 431 *outlen = out - outstart; 432 *inlen = processed - instart; 433 return(-2); 434 } 435 436 if (inend - in < trailing) { 437 break; 438 } 439 440 for ( ; trailing; trailing--) { 441 if (in >= inend) 442 break; 443 if (((d= *in++) & 0xC0) != 0x80) { 444 *outlen = out - outstart; 445 *inlen = processed - instart; 446 return(-2); 447 } 448 c <<= 6; 449 c |= d & 0x3F; 450 } 451 452 /* assertion: c is a single UTF-4 value */ 453 if (c <= 0xFF) { 454 if (out >= outend) 455 break; 456 *out++ = c; 457 } else { 458 /* no chance for this in IsoLat1 */ 459 *outlen = out - outstart; 460 *inlen = processed - instart; 461 return(-2); 462 } 463 processed = in; 464 } 465 *outlen = out - outstart; 466 *inlen = processed - instart; 467 return(*outlen); 468} 469#endif /* LIBXML_OUTPUT_ENABLED */ 470 471/** 472 * UTF16LEToUTF8: 473 * @out: a pointer to an array of bytes to store the result 474 * @outlen: the length of @out 475 * @inb: a pointer to an array of UTF-16LE passwd as a byte array 476 * @inlenb: the length of @in in UTF-16LE chars 477 * 478 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8 479 * block of chars out. This function assumes the endian property 480 * is the same between the native type of this machine and the 481 * inputed one. 482 * 483 * Returns the number of bytes written, or -1 if lack of space, or -2 484 * if the transcoding fails (if *in is not a valid utf16 string) 485 * The value of *inlen after return is the number of octets consumed 486 * if the return value is positive, else unpredictable. 487 */ 488static int 489UTF16LEToUTF8(unsigned char* out, int *outlen, 490 const unsigned char* inb, int *inlenb) 491{ 492 unsigned char* outstart = out; 493 const unsigned char* processed = inb; 494 unsigned char* outend = out + *outlen; 495 unsigned short* in = (unsigned short*) inb; 496 unsigned short* inend; 497 unsigned int c, d, inlen; 498 unsigned char *tmp; 499 int bits; 500 501 if ((*inlenb % 2) == 1) 502 (*inlenb)--; 503 inlen = *inlenb / 2; 504 inend = in + inlen; 505 while ((in < inend) && (out - outstart + 5 < *outlen)) { 506 if (xmlLittleEndian) { 507 c= *in++; 508 } else { 509 tmp = (unsigned char *) in; 510 c = *tmp++; 511 c = c | (((unsigned int)*tmp) << 8); 512 in++; 513 } 514 if ((c & 0xFC00) == 0xD800) { /* surrogates */ 515 if (in >= inend) { /* (in > inend) shouldn't happens */ 516 break; 517 } 518 if (xmlLittleEndian) { 519 d = *in++; 520 } else { 521 tmp = (unsigned char *) in; 522 d = *tmp++; 523 d = d | (((unsigned int)*tmp) << 8); 524 in++; 525 } 526 if ((d & 0xFC00) == 0xDC00) { 527 c &= 0x03FF; 528 c <<= 10; 529 c |= d & 0x03FF; 530 c += 0x10000; 531 } 532 else { 533 *outlen = out - outstart; 534 *inlenb = processed - inb; 535 return(-2); 536 } 537 } 538 539 /* assertion: c is a single UTF-4 value */ 540 if (out >= outend) 541 break; 542 if (c < 0x80) { *out++= c; bits= -6; } 543 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; } 544 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; } 545 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; } 546 547 for ( ; bits >= 0; bits-= 6) { 548 if (out >= outend) 549 break; 550 *out++= ((c >> bits) & 0x3F) | 0x80; 551 } 552 processed = (const unsigned char*) in; 553 } 554 *outlen = out - outstart; 555 *inlenb = processed - inb; 556 return(*outlen); 557} 558 559#ifdef LIBXML_OUTPUT_ENABLED 560/** 561 * UTF8ToUTF16LE: 562 * @outb: a pointer to an array of bytes to store the result 563 * @outlen: the length of @outb 564 * @in: a pointer to an array of UTF-8 chars 565 * @inlen: the length of @in 566 * 567 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE 568 * block of chars out. 569 * 570 * Returns the number of bytes written, or -1 if lack of space, or -2 571 * if the transcoding failed. 572 */ 573static int 574UTF8ToUTF16LE(unsigned char* outb, int *outlen, 575 const unsigned char* in, int *inlen) 576{ 577 unsigned short* out = (unsigned short*) outb; 578 const unsigned char* processed = in; 579 const unsigned char *const instart = in; 580 unsigned short* outstart= out; 581 unsigned short* outend; 582 const unsigned char* inend; 583 unsigned int c, d; 584 int trailing; 585 unsigned char *tmp; 586 unsigned short tmp1, tmp2; 587 588 /* UTF16LE encoding has no BOM */ 589 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); 590 if (in == NULL) { 591 *outlen = 0; 592 *inlen = 0; 593 return(0); 594 } 595 inend= in + *inlen; 596 outend = out + (*outlen / 2); 597 while (in < inend) { 598 d= *in++; 599 if (d < 0x80) { c= d; trailing= 0; } 600 else if (d < 0xC0) { 601 /* trailing byte in leading position */ 602 *outlen = (out - outstart) * 2; 603 *inlen = processed - instart; 604 return(-2); 605 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } 606 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } 607 else if (d < 0xF8) { c= d & 0x07; trailing= 3; } 608 else { 609 /* no chance for this in UTF-16 */ 610 *outlen = (out - outstart) * 2; 611 *inlen = processed - instart; 612 return(-2); 613 } 614 615 if (inend - in < trailing) { 616 break; 617 } 618 619 for ( ; trailing; trailing--) { 620 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) 621 break; 622 c <<= 6; 623 c |= d & 0x3F; 624 } 625 626 /* assertion: c is a single UTF-4 value */ 627 if (c < 0x10000) { 628 if (out >= outend) 629 break; 630 if (xmlLittleEndian) { 631 *out++ = c; 632 } else { 633 tmp = (unsigned char *) out; 634 *tmp = c ; 635 *(tmp + 1) = c >> 8 ; 636 out++; 637 } 638 } 639 else if (c < 0x110000) { 640 if (out+1 >= outend) 641 break; 642 c -= 0x10000; 643 if (xmlLittleEndian) { 644 *out++ = 0xD800 | (c >> 10); 645 *out++ = 0xDC00 | (c & 0x03FF); 646 } else { 647 tmp1 = 0xD800 | (c >> 10); 648 tmp = (unsigned char *) out; 649 *tmp = (unsigned char) tmp1; 650 *(tmp + 1) = tmp1 >> 8; 651 out++; 652 653 tmp2 = 0xDC00 | (c & 0x03FF); 654 tmp = (unsigned char *) out; 655 *tmp = (unsigned char) tmp2; 656 *(tmp + 1) = tmp2 >> 8; 657 out++; 658 } 659 } 660 else 661 break; 662 processed = in; 663 } 664 *outlen = (out - outstart) * 2; 665 *inlen = processed - instart; 666 return(*outlen); 667} 668 669/** 670 * UTF8ToUTF16: 671 * @outb: a pointer to an array of bytes to store the result 672 * @outlen: the length of @outb 673 * @in: a pointer to an array of UTF-8 chars 674 * @inlen: the length of @in 675 * 676 * Take a block of UTF-8 chars in and try to convert it to an UTF-16 677 * block of chars out. 678 * 679 * Returns the number of bytes written, or -1 if lack of space, or -2 680 * if the transcoding failed. 681 */ 682static int 683UTF8ToUTF16(unsigned char* outb, int *outlen, 684 const unsigned char* in, int *inlen) 685{ 686 if (in == NULL) { 687 /* 688 * initialization, add the Byte Order Mark for UTF-16LE 689 */ 690 if (*outlen >= 2) { 691 outb[0] = 0xFF; 692 outb[1] = 0xFE; 693 *outlen = 2; 694 *inlen = 0; 695#ifdef DEBUG_ENCODING 696 xmlGenericError(xmlGenericErrorContext, 697 "Added FFFE Byte Order Mark\n"); 698#endif 699 return(2); 700 } 701 *outlen = 0; 702 *inlen = 0; 703 return(0); 704 } 705 return (UTF8ToUTF16LE(outb, outlen, in, inlen)); 706} 707#endif /* LIBXML_OUTPUT_ENABLED */ 708 709/** 710 * UTF16BEToUTF8: 711 * @out: a pointer to an array of bytes to store the result 712 * @outlen: the length of @out 713 * @inb: a pointer to an array of UTF-16 passed as a byte array 714 * @inlenb: the length of @in in UTF-16 chars 715 * 716 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8 717 * block of chars out. This function assumes the endian property 718 * is the same between the native type of this machine and the 719 * inputed one. 720 * 721 * Returns the number of bytes written, or -1 if lack of space, or -2 722 * if the transcoding fails (if *in is not a valid utf16 string) 723 * The value of *inlen after return is the number of octets consumed 724 * if the return value is positive, else unpredictable. 725 */ 726static int 727UTF16BEToUTF8(unsigned char* out, int *outlen, 728 const unsigned char* inb, int *inlenb) 729{ 730 unsigned char* outstart = out; 731 const unsigned char* processed = inb; 732 unsigned char* outend = out + *outlen; 733 unsigned short* in = (unsigned short*) inb; 734 unsigned short* inend; 735 unsigned int c, d, inlen; 736 unsigned char *tmp; 737 int bits; 738 739 if ((*inlenb % 2) == 1) 740 (*inlenb)--; 741 inlen = *inlenb / 2; 742 inend= in + inlen; 743 while (in < inend) { 744 if (xmlLittleEndian) { 745 tmp = (unsigned char *) in; 746 c = *tmp++; 747 c = c << 8; 748 c = c | (unsigned int) *tmp; 749 in++; 750 } else { 751 c= *in++; 752 } 753 if ((c & 0xFC00) == 0xD800) { /* surrogates */ 754 if (in >= inend) { /* (in > inend) shouldn't happens */ 755 *outlen = out - outstart; 756 *inlenb = processed - inb; 757 return(-2); 758 } 759 if (xmlLittleEndian) { 760 tmp = (unsigned char *) in; 761 d = *tmp++; 762 d = d << 8; 763 d = d | (unsigned int) *tmp; 764 in++; 765 } else { 766 d= *in++; 767 } 768 if ((d & 0xFC00) == 0xDC00) { 769 c &= 0x03FF; 770 c <<= 10; 771 c |= d & 0x03FF; 772 c += 0x10000; 773 } 774 else { 775 *outlen = out - outstart; 776 *inlenb = processed - inb; 777 return(-2); 778 } 779 } 780 781 /* assertion: c is a single UTF-4 value */ 782 if (out >= outend) 783 break; 784 if (c < 0x80) { *out++= c; bits= -6; } 785 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; } 786 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; } 787 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; } 788 789 for ( ; bits >= 0; bits-= 6) { 790 if (out >= outend) 791 break; 792 *out++= ((c >> bits) & 0x3F) | 0x80; 793 } 794 processed = (const unsigned char*) in; 795 } 796 *outlen = out - outstart; 797 *inlenb = processed - inb; 798 return(*outlen); 799} 800 801#ifdef LIBXML_OUTPUT_ENABLED 802/** 803 * UTF8ToUTF16BE: 804 * @outb: a pointer to an array of bytes to store the result 805 * @outlen: the length of @outb 806 * @in: a pointer to an array of UTF-8 chars 807 * @inlen: the length of @in 808 * 809 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE 810 * block of chars out. 811 * 812 * Returns the number of byte written, or -1 by lack of space, or -2 813 * if the transcoding failed. 814 */ 815static int 816UTF8ToUTF16BE(unsigned char* outb, int *outlen, 817 const unsigned char* in, int *inlen) 818{ 819 unsigned short* out = (unsigned short*) outb; 820 const unsigned char* processed = in; 821 const unsigned char *const instart = in; 822 unsigned short* outstart= out; 823 unsigned short* outend; 824 const unsigned char* inend; 825 unsigned int c, d; 826 int trailing; 827 unsigned char *tmp; 828 unsigned short tmp1, tmp2; 829 830 /* UTF-16BE has no BOM */ 831 if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); 832 if (in == NULL) { 833 *outlen = 0; 834 *inlen = 0; 835 return(0); 836 } 837 inend= in + *inlen; 838 outend = out + (*outlen / 2); 839 while (in < inend) { 840 d= *in++; 841 if (d < 0x80) { c= d; trailing= 0; } 842 else if (d < 0xC0) { 843 /* trailing byte in leading position */ 844 *outlen = out - outstart; 845 *inlen = processed - instart; 846 return(-2); 847 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } 848 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } 849 else if (d < 0xF8) { c= d & 0x07; trailing= 3; } 850 else { 851 /* no chance for this in UTF-16 */ 852 *outlen = out - outstart; 853 *inlen = processed - instart; 854 return(-2); 855 } 856 857 if (inend - in < trailing) { 858 break; 859 } 860 861 for ( ; trailing; trailing--) { 862 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break; 863 c <<= 6; 864 c |= d & 0x3F; 865 } 866 867 /* assertion: c is a single UTF-4 value */ 868 if (c < 0x10000) { 869 if (out >= outend) break; 870 if (xmlLittleEndian) { 871 tmp = (unsigned char *) out; 872 *tmp = c >> 8; 873 *(tmp + 1) = c; 874 out++; 875 } else { 876 *out++ = c; 877 } 878 } 879 else if (c < 0x110000) { 880 if (out+1 >= outend) break; 881 c -= 0x10000; 882 if (xmlLittleEndian) { 883 tmp1 = 0xD800 | (c >> 10); 884 tmp = (unsigned char *) out; 885 *tmp = tmp1 >> 8; 886 *(tmp + 1) = (unsigned char) tmp1; 887 out++; 888 889 tmp2 = 0xDC00 | (c & 0x03FF); 890 tmp = (unsigned char *) out; 891 *tmp = tmp2 >> 8; 892 *(tmp + 1) = (unsigned char) tmp2; 893 out++; 894 } else { 895 *out++ = 0xD800 | (c >> 10); 896 *out++ = 0xDC00 | (c & 0x03FF); 897 } 898 } 899 else 900 break; 901 processed = in; 902 } 903 *outlen = (out - outstart) * 2; 904 *inlen = processed - instart; 905 return(*outlen); 906} 907#endif /* LIBXML_OUTPUT_ENABLED */ 908 909/************************************************************************ 910 * * 911 * Generic encoding handling routines * 912 * * 913 ************************************************************************/ 914 915/** 916 * xmlDetectCharEncoding: 917 * @in: a pointer to the first bytes of the XML entity, must be at least 918 * 2 bytes long (at least 4 if encoding is UTF4 variant). 919 * @len: pointer to the length of the buffer 920 * 921 * Guess the encoding of the entity using the first bytes of the entity content 922 * according to the non-normative appendix F of the XML-1.0 recommendation. 923 * 924 * Returns one of the XML_CHAR_ENCODING_... values. 925 */ 926xmlCharEncoding 927xmlDetectCharEncoding(const unsigned char* in, int len) 928{ 929 if (in == NULL) 930 return(XML_CHAR_ENCODING_NONE); 931 if (len >= 4) { 932 if ((in[0] == 0x00) && (in[1] == 0x00) && 933 (in[2] == 0x00) && (in[3] == 0x3C)) 934 return(XML_CHAR_ENCODING_UCS4BE); 935 if ((in[0] == 0x3C) && (in[1] == 0x00) && 936 (in[2] == 0x00) && (in[3] == 0x00)) 937 return(XML_CHAR_ENCODING_UCS4LE); 938 if ((in[0] == 0x00) && (in[1] == 0x00) && 939 (in[2] == 0x3C) && (in[3] == 0x00)) 940 return(XML_CHAR_ENCODING_UCS4_2143); 941 if ((in[0] == 0x00) && (in[1] == 0x3C) && 942 (in[2] == 0x00) && (in[3] == 0x00)) 943 return(XML_CHAR_ENCODING_UCS4_3412); 944 if ((in[0] == 0x4C) && (in[1] == 0x6F) && 945 (in[2] == 0xA7) && (in[3] == 0x94)) 946 return(XML_CHAR_ENCODING_EBCDIC); 947 if ((in[0] == 0x3C) && (in[1] == 0x3F) && 948 (in[2] == 0x78) && (in[3] == 0x6D)) 949 return(XML_CHAR_ENCODING_UTF8); 950 /* 951 * Although not part of the recommendation, we also 952 * attempt an "auto-recognition" of UTF-16LE and 953 * UTF-16BE encodings. 954 */ 955 if ((in[0] == 0x3C) && (in[1] == 0x00) && 956 (in[2] == 0x3F) && (in[3] == 0x00)) 957 return(XML_CHAR_ENCODING_UTF16LE); 958 if ((in[0] == 0x00) && (in[1] == 0x3C) && 959 (in[2] == 0x00) && (in[3] == 0x3F)) 960 return(XML_CHAR_ENCODING_UTF16BE); 961 } 962 if (len >= 3) { 963 /* 964 * Errata on XML-1.0 June 20 2001 965 * We now allow an UTF8 encoded BOM 966 */ 967 if ((in[0] == 0xEF) && (in[1] == 0xBB) && 968 (in[2] == 0xBF)) 969 return(XML_CHAR_ENCODING_UTF8); 970 } 971 /* For UTF-16 we can recognize by the BOM */ 972 if (len >= 2) { 973 if ((in[0] == 0xFE) && (in[1] == 0xFF)) 974 return(XML_CHAR_ENCODING_UTF16BE); 975 if ((in[0] == 0xFF) && (in[1] == 0xFE)) 976 return(XML_CHAR_ENCODING_UTF16LE); 977 } 978 return(XML_CHAR_ENCODING_NONE); 979} 980 981/** 982 * xmlCleanupEncodingAliases: 983 * 984 * Unregisters all aliases 985 */ 986void 987xmlCleanupEncodingAliases(void) { 988 int i; 989 990 if (xmlCharEncodingAliases == NULL) 991 return; 992 993 for (i = 0;i < xmlCharEncodingAliasesNb;i++) { 994 if (xmlCharEncodingAliases[i].name != NULL) 995 xmlFree((char *) xmlCharEncodingAliases[i].name); 996 if (xmlCharEncodingAliases[i].alias != NULL) 997 xmlFree((char *) xmlCharEncodingAliases[i].alias); 998 } 999 xmlCharEncodingAliasesNb = 0; 1000 xmlCharEncodingAliasesMax = 0; 1001 xmlFree(xmlCharEncodingAliases); 1002 xmlCharEncodingAliases = NULL; 1003} 1004 1005/** 1006 * xmlGetEncodingAlias: 1007 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually) 1008 * 1009 * Lookup an encoding name for the given alias. 1010 * 1011 * Returns NULL if not found, otherwise the original name 1012 */ 1013const char * 1014xmlGetEncodingAlias(const char *alias) { 1015 int i; 1016 char upper[100]; 1017 1018 if (alias == NULL) 1019 return(NULL); 1020 1021 if (xmlCharEncodingAliases == NULL) 1022 return(NULL); 1023 1024 for (i = 0;i < 99;i++) { 1025 upper[i] = toupper(alias[i]); 1026 if (upper[i] == 0) break; 1027 } 1028 upper[i] = 0; 1029 1030 /* 1031 * Walk down the list looking for a definition of the alias 1032 */ 1033 for (i = 0;i < xmlCharEncodingAliasesNb;i++) { 1034 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) { 1035 return(xmlCharEncodingAliases[i].name); 1036 } 1037 } 1038 return(NULL); 1039} 1040 1041/** 1042 * xmlAddEncodingAlias: 1043 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually) 1044 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually) 1045 * 1046 * Registers an alias @alias for an encoding named @name. Existing alias 1047 * will be overwritten. 1048 * 1049 * Returns 0 in case of success, -1 in case of error 1050 */ 1051int 1052xmlAddEncodingAlias(const char *name, const char *alias) { 1053 int i; 1054 char upper[100]; 1055 1056 if ((name == NULL) || (alias == NULL)) 1057 return(-1); 1058 1059 for (i = 0;i < 99;i++) { 1060 upper[i] = toupper(alias[i]); 1061 if (upper[i] == 0) break; 1062 } 1063 upper[i] = 0; 1064 1065 if (xmlCharEncodingAliases == NULL) { 1066 xmlCharEncodingAliasesNb = 0; 1067 xmlCharEncodingAliasesMax = 20; 1068 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr) 1069 xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias)); 1070 if (xmlCharEncodingAliases == NULL) 1071 return(-1); 1072 } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) { 1073 xmlCharEncodingAliasesMax *= 2; 1074 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr) 1075 xmlRealloc(xmlCharEncodingAliases, 1076 xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias)); 1077 } 1078 /* 1079 * Walk down the list looking for a definition of the alias 1080 */ 1081 for (i = 0;i < xmlCharEncodingAliasesNb;i++) { 1082 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) { 1083 /* 1084 * Replace the definition. 1085 */ 1086 xmlFree((char *) xmlCharEncodingAliases[i].name); 1087 xmlCharEncodingAliases[i].name = xmlMemStrdup(name); 1088 return(0); 1089 } 1090 } 1091 /* 1092 * Add the definition 1093 */ 1094 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name); 1095 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper); 1096 xmlCharEncodingAliasesNb++; 1097 return(0); 1098} 1099 1100/** 1101 * xmlDelEncodingAlias: 1102 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually) 1103 * 1104 * Unregisters an encoding alias @alias 1105 * 1106 * Returns 0 in case of success, -1 in case of error 1107 */ 1108int 1109xmlDelEncodingAlias(const char *alias) { 1110 int i; 1111 1112 if (alias == NULL) 1113 return(-1); 1114 1115 if (xmlCharEncodingAliases == NULL) 1116 return(-1); 1117 /* 1118 * Walk down the list looking for a definition of the alias 1119 */ 1120 for (i = 0;i < xmlCharEncodingAliasesNb;i++) { 1121 if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) { 1122 xmlFree((char *) xmlCharEncodingAliases[i].name); 1123 xmlFree((char *) xmlCharEncodingAliases[i].alias); 1124 xmlCharEncodingAliasesNb--; 1125 memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1], 1126 sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i)); 1127 return(0); 1128 } 1129 } 1130 return(-1); 1131} 1132 1133/** 1134 * xmlParseCharEncoding: 1135 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually) 1136 * 1137 * Compare the string to the encoding schemes already known. Note 1138 * that the comparison is case insensitive accordingly to the section 1139 * [XML] 4.3.3 Character Encoding in Entities. 1140 * 1141 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE 1142 * if not recognized. 1143 */ 1144xmlCharEncoding 1145xmlParseCharEncoding(const char* name) 1146{ 1147 const char *alias; 1148 char upper[500]; 1149 int i; 1150 1151 if (name == NULL) 1152 return(XML_CHAR_ENCODING_NONE); 1153 1154 /* 1155 * Do the alias resolution 1156 */ 1157 alias = xmlGetEncodingAlias(name); 1158 if (alias != NULL) 1159 name = alias; 1160 1161 for (i = 0;i < 499;i++) { 1162 upper[i] = toupper(name[i]); 1163 if (upper[i] == 0) break; 1164 } 1165 upper[i] = 0; 1166 1167 if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE); 1168 if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8); 1169 if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8); 1170 1171 /* 1172 * NOTE: if we were able to parse this, the endianness of UTF16 is 1173 * already found and in use 1174 */ 1175 if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE); 1176 if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE); 1177 1178 if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2); 1179 if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2); 1180 if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2); 1181 1182 /* 1183 * NOTE: if we were able to parse this, the endianness of UCS4 is 1184 * already found and in use 1185 */ 1186 if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE); 1187 if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE); 1188 if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE); 1189 1190 1191 if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1); 1192 if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1); 1193 if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1); 1194 1195 if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2); 1196 if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2); 1197 if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2); 1198 1199 if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3); 1200 if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4); 1201 if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5); 1202 if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6); 1203 if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7); 1204 if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8); 1205 if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9); 1206 1207 if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP); 1208 if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS); 1209 if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP); 1210 1211#ifdef DEBUG_ENCODING 1212 xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name); 1213#endif 1214 return(XML_CHAR_ENCODING_ERROR); 1215} 1216 1217/** 1218 * xmlGetCharEncodingName: 1219 * @enc: the encoding 1220 * 1221 * The "canonical" name for XML encoding. 1222 * C.f. http://www.w3.org/TR/REC-xml#charencoding 1223 * Section 4.3.3 Character Encoding in Entities 1224 * 1225 * Returns the canonical name for the given encoding 1226 */ 1227 1228const char* 1229xmlGetCharEncodingName(xmlCharEncoding enc) { 1230 switch (enc) { 1231 case XML_CHAR_ENCODING_ERROR: 1232 return(NULL); 1233 case XML_CHAR_ENCODING_NONE: 1234 return(NULL); 1235 case XML_CHAR_ENCODING_UTF8: 1236 return("UTF-8"); 1237 case XML_CHAR_ENCODING_UTF16LE: 1238 return("UTF-16"); 1239 case XML_CHAR_ENCODING_UTF16BE: 1240 return("UTF-16"); 1241 case XML_CHAR_ENCODING_EBCDIC: 1242 return("EBCDIC"); 1243 case XML_CHAR_ENCODING_UCS4LE: 1244 return("ISO-10646-UCS-4"); 1245 case XML_CHAR_ENCODING_UCS4BE: 1246 return("ISO-10646-UCS-4"); 1247 case XML_CHAR_ENCODING_UCS4_2143: 1248 return("ISO-10646-UCS-4"); 1249 case XML_CHAR_ENCODING_UCS4_3412: 1250 return("ISO-10646-UCS-4"); 1251 case XML_CHAR_ENCODING_UCS2: 1252 return("ISO-10646-UCS-2"); 1253 case XML_CHAR_ENCODING_8859_1: 1254 return("ISO-8859-1"); 1255 case XML_CHAR_ENCODING_8859_2: 1256 return("ISO-8859-2"); 1257 case XML_CHAR_ENCODING_8859_3: 1258 return("ISO-8859-3"); 1259 case XML_CHAR_ENCODING_8859_4: 1260 return("ISO-8859-4"); 1261 case XML_CHAR_ENCODING_8859_5: 1262 return("ISO-8859-5"); 1263 case XML_CHAR_ENCODING_8859_6: 1264 return("ISO-8859-6"); 1265 case XML_CHAR_ENCODING_8859_7: 1266 return("ISO-8859-7"); 1267 case XML_CHAR_ENCODING_8859_8: 1268 return("ISO-8859-8"); 1269 case XML_CHAR_ENCODING_8859_9: 1270 return("ISO-8859-9"); 1271 case XML_CHAR_ENCODING_2022_JP: 1272 return("ISO-2022-JP"); 1273 case XML_CHAR_ENCODING_SHIFT_JIS: 1274 return("Shift-JIS"); 1275 case XML_CHAR_ENCODING_EUC_JP: 1276 return("EUC-JP"); 1277 case XML_CHAR_ENCODING_ASCII: 1278 return(NULL); 1279 } 1280 return(NULL); 1281} 1282 1283/************************************************************************ 1284 * * 1285 * Char encoding handlers * 1286 * * 1287 ************************************************************************/ 1288 1289 1290/* the size should be growable, but it's not a big deal ... */ 1291#define MAX_ENCODING_HANDLERS 50 1292static xmlCharEncodingHandlerPtr *handlers = NULL; 1293static int nbCharEncodingHandler = 0; 1294 1295/* 1296 * The default is UTF-8 for XML, that's also the default used for the 1297 * parser internals, so the default encoding handler is NULL 1298 */ 1299 1300static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL; 1301 1302/** 1303 * xmlNewCharEncodingHandler: 1304 * @name: the encoding name, in UTF-8 format (ASCII actually) 1305 * @input: the xmlCharEncodingInputFunc to read that encoding 1306 * @output: the xmlCharEncodingOutputFunc to write that encoding 1307 * 1308 * Create and registers an xmlCharEncodingHandler. 1309 * 1310 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error). 1311 */ 1312xmlCharEncodingHandlerPtr 1313xmlNewCharEncodingHandler(const char *name, 1314 xmlCharEncodingInputFunc input, 1315 xmlCharEncodingOutputFunc output) { 1316 xmlCharEncodingHandlerPtr handler; 1317 const char *alias; 1318 char upper[500]; 1319 int i; 1320 char *up = NULL; 1321 1322 /* 1323 * Do the alias resolution 1324 */ 1325 alias = xmlGetEncodingAlias(name); 1326 if (alias != NULL) 1327 name = alias; 1328 1329 /* 1330 * Keep only the uppercase version of the encoding. 1331 */ 1332 if (name == NULL) { 1333 xmlEncodingErr(XML_I18N_NO_NAME, 1334 "xmlNewCharEncodingHandler : no name !\n", NULL); 1335 return(NULL); 1336 } 1337 for (i = 0;i < 499;i++) { 1338 upper[i] = toupper(name[i]); 1339 if (upper[i] == 0) break; 1340 } 1341 upper[i] = 0; 1342 up = xmlMemStrdup(upper); 1343 if (up == NULL) { 1344 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n"); 1345 return(NULL); 1346 } 1347 1348 /* 1349 * allocate and fill-up an handler block. 1350 */ 1351 handler = (xmlCharEncodingHandlerPtr) 1352 xmlMalloc(sizeof(xmlCharEncodingHandler)); 1353 if (handler == NULL) { 1354 xmlFree(up); 1355 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n"); 1356 return(NULL); 1357 } 1358 memset(handler, 0, sizeof(xmlCharEncodingHandler)); 1359 handler->input = input; 1360 handler->output = output; 1361 handler->name = up; 1362 1363#ifdef LIBXML_ICONV_ENABLED 1364 handler->iconv_in = NULL; 1365 handler->iconv_out = NULL; 1366#endif 1367#ifdef LIBXML_ICU_ENABLED 1368 handler->uconv_in = NULL; 1369 handler->uconv_out = NULL; 1370#endif 1371 1372 /* 1373 * registers and returns the handler. 1374 */ 1375 xmlRegisterCharEncodingHandler(handler); 1376#ifdef DEBUG_ENCODING 1377 xmlGenericError(xmlGenericErrorContext, 1378 "Registered encoding handler for %s\n", name); 1379#endif 1380 return(handler); 1381} 1382 1383/** 1384 * xmlInitCharEncodingHandlers: 1385 * 1386 * Initialize the char encoding support, it registers the default 1387 * encoding supported. 1388 * NOTE: while public, this function usually doesn't need to be called 1389 * in normal processing. 1390 */ 1391void 1392xmlInitCharEncodingHandlers(void) { 1393 unsigned short int tst = 0x1234; 1394 unsigned char *ptr = (unsigned char *) &tst; 1395 1396 if (handlers != NULL) return; 1397 1398 handlers = (xmlCharEncodingHandlerPtr *) 1399 xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr)); 1400 1401 if (*ptr == 0x12) xmlLittleEndian = 0; 1402 else if (*ptr == 0x34) xmlLittleEndian = 1; 1403 else { 1404 xmlEncodingErr(XML_ERR_INTERNAL_ERROR, 1405 "Odd problem at endianness detection\n", NULL); 1406 } 1407 1408 if (handlers == NULL) { 1409 xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n"); 1410 return; 1411 } 1412 xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8); 1413#ifdef LIBXML_OUTPUT_ENABLED 1414 xmlUTF16LEHandler = 1415 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE); 1416 xmlUTF16BEHandler = 1417 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE); 1418 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16); 1419 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1); 1420 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii); 1421 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii); 1422#ifdef LIBXML_HTML_ENABLED 1423 xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml); 1424#endif 1425#else 1426 xmlUTF16LEHandler = 1427 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL); 1428 xmlUTF16BEHandler = 1429 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL); 1430 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL); 1431 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL); 1432 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL); 1433 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL); 1434#endif /* LIBXML_OUTPUT_ENABLED */ 1435#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) 1436#ifdef LIBXML_ISO8859X_ENABLED 1437 xmlRegisterCharEncodingHandlersISO8859x (); 1438#endif 1439#endif 1440 1441} 1442 1443/** 1444 * xmlCleanupCharEncodingHandlers: 1445 * 1446 * Cleanup the memory allocated for the char encoding support, it 1447 * unregisters all the encoding handlers and the aliases. 1448 */ 1449void 1450xmlCleanupCharEncodingHandlers(void) { 1451 xmlCleanupEncodingAliases(); 1452 1453 if (handlers == NULL) return; 1454 1455 for (;nbCharEncodingHandler > 0;) { 1456 nbCharEncodingHandler--; 1457 if (handlers[nbCharEncodingHandler] != NULL) { 1458 if (handlers[nbCharEncodingHandler]->name != NULL) 1459 xmlFree(handlers[nbCharEncodingHandler]->name); 1460 xmlFree(handlers[nbCharEncodingHandler]); 1461 } 1462 } 1463 xmlFree(handlers); 1464 handlers = NULL; 1465 nbCharEncodingHandler = 0; 1466 xmlDefaultCharEncodingHandler = NULL; 1467} 1468 1469/** 1470 * xmlRegisterCharEncodingHandler: 1471 * @handler: the xmlCharEncodingHandlerPtr handler block 1472 * 1473 * Register the char encoding handler, surprising, isn't it ? 1474 */ 1475void 1476xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) { 1477 if (handlers == NULL) xmlInitCharEncodingHandlers(); 1478 if ((handler == NULL) || (handlers == NULL)) { 1479 xmlEncodingErr(XML_I18N_NO_HANDLER, 1480 "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL); 1481 return; 1482 } 1483 1484 if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) { 1485 xmlEncodingErr(XML_I18N_EXCESS_HANDLER, 1486 "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n", 1487 "MAX_ENCODING_HANDLERS"); 1488 return; 1489 } 1490 handlers[nbCharEncodingHandler++] = handler; 1491} 1492 1493/** 1494 * xmlGetCharEncodingHandler: 1495 * @enc: an xmlCharEncoding value. 1496 * 1497 * Search in the registered set the handler able to read/write that encoding. 1498 * 1499 * Returns the handler or NULL if not found 1500 */ 1501xmlCharEncodingHandlerPtr 1502xmlGetCharEncodingHandler(xmlCharEncoding enc) { 1503 xmlCharEncodingHandlerPtr handler; 1504 1505 if (handlers == NULL) xmlInitCharEncodingHandlers(); 1506 switch (enc) { 1507 case XML_CHAR_ENCODING_ERROR: 1508 return(NULL); 1509 case XML_CHAR_ENCODING_NONE: 1510 return(NULL); 1511 case XML_CHAR_ENCODING_UTF8: 1512 return(NULL); 1513 case XML_CHAR_ENCODING_UTF16LE: 1514 return(xmlUTF16LEHandler); 1515 case XML_CHAR_ENCODING_UTF16BE: 1516 return(xmlUTF16BEHandler); 1517 case XML_CHAR_ENCODING_EBCDIC: 1518 handler = xmlFindCharEncodingHandler("EBCDIC"); 1519 if (handler != NULL) return(handler); 1520 handler = xmlFindCharEncodingHandler("ebcdic"); 1521 if (handler != NULL) return(handler); 1522 handler = xmlFindCharEncodingHandler("EBCDIC-US"); 1523 if (handler != NULL) return(handler); 1524 handler = xmlFindCharEncodingHandler("IBM-037"); 1525 if (handler != NULL) return(handler); 1526 break; 1527 case XML_CHAR_ENCODING_UCS4BE: 1528 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4"); 1529 if (handler != NULL) return(handler); 1530 handler = xmlFindCharEncodingHandler("UCS-4"); 1531 if (handler != NULL) return(handler); 1532 handler = xmlFindCharEncodingHandler("UCS4"); 1533 if (handler != NULL) return(handler); 1534 break; 1535 case XML_CHAR_ENCODING_UCS4LE: 1536 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4"); 1537 if (handler != NULL) return(handler); 1538 handler = xmlFindCharEncodingHandler("UCS-4"); 1539 if (handler != NULL) return(handler); 1540 handler = xmlFindCharEncodingHandler("UCS4"); 1541 if (handler != NULL) return(handler); 1542 break; 1543 case XML_CHAR_ENCODING_UCS4_2143: 1544 break; 1545 case XML_CHAR_ENCODING_UCS4_3412: 1546 break; 1547 case XML_CHAR_ENCODING_UCS2: 1548 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2"); 1549 if (handler != NULL) return(handler); 1550 handler = xmlFindCharEncodingHandler("UCS-2"); 1551 if (handler != NULL) return(handler); 1552 handler = xmlFindCharEncodingHandler("UCS2"); 1553 if (handler != NULL) return(handler); 1554 break; 1555 1556 /* 1557 * We used to keep ISO Latin encodings native in the 1558 * generated data. This led to so many problems that 1559 * this has been removed. One can still change this 1560 * back by registering no-ops encoders for those 1561 */ 1562 case XML_CHAR_ENCODING_8859_1: 1563 handler = xmlFindCharEncodingHandler("ISO-8859-1"); 1564 if (handler != NULL) return(handler); 1565 break; 1566 case XML_CHAR_ENCODING_8859_2: 1567 handler = xmlFindCharEncodingHandler("ISO-8859-2"); 1568 if (handler != NULL) return(handler); 1569 break; 1570 case XML_CHAR_ENCODING_8859_3: 1571 handler = xmlFindCharEncodingHandler("ISO-8859-3"); 1572 if (handler != NULL) return(handler); 1573 break; 1574 case XML_CHAR_ENCODING_8859_4: 1575 handler = xmlFindCharEncodingHandler("ISO-8859-4"); 1576 if (handler != NULL) return(handler); 1577 break; 1578 case XML_CHAR_ENCODING_8859_5: 1579 handler = xmlFindCharEncodingHandler("ISO-8859-5"); 1580 if (handler != NULL) return(handler); 1581 break; 1582 case XML_CHAR_ENCODING_8859_6: 1583 handler = xmlFindCharEncodingHandler("ISO-8859-6"); 1584 if (handler != NULL) return(handler); 1585 break; 1586 case XML_CHAR_ENCODING_8859_7: 1587 handler = xmlFindCharEncodingHandler("ISO-8859-7"); 1588 if (handler != NULL) return(handler); 1589 break; 1590 case XML_CHAR_ENCODING_8859_8: 1591 handler = xmlFindCharEncodingHandler("ISO-8859-8"); 1592 if (handler != NULL) return(handler); 1593 break; 1594 case XML_CHAR_ENCODING_8859_9: 1595 handler = xmlFindCharEncodingHandler("ISO-8859-9"); 1596 if (handler != NULL) return(handler); 1597 break; 1598 1599 1600 case XML_CHAR_ENCODING_2022_JP: 1601 handler = xmlFindCharEncodingHandler("ISO-2022-JP"); 1602 if (handler != NULL) return(handler); 1603 break; 1604 case XML_CHAR_ENCODING_SHIFT_JIS: 1605 handler = xmlFindCharEncodingHandler("SHIFT-JIS"); 1606 if (handler != NULL) return(handler); 1607 handler = xmlFindCharEncodingHandler("SHIFT_JIS"); 1608 if (handler != NULL) return(handler); 1609 handler = xmlFindCharEncodingHandler("Shift_JIS"); 1610 if (handler != NULL) return(handler); 1611 break; 1612 case XML_CHAR_ENCODING_EUC_JP: 1613 handler = xmlFindCharEncodingHandler("EUC-JP"); 1614 if (handler != NULL) return(handler); 1615 break; 1616 default: 1617 break; 1618 } 1619 1620#ifdef DEBUG_ENCODING 1621 xmlGenericError(xmlGenericErrorContext, 1622 "No handler found for encoding %d\n", enc); 1623#endif 1624 return(NULL); 1625} 1626 1627/** 1628 * xmlFindCharEncodingHandler: 1629 * @name: a string describing the char encoding. 1630 * 1631 * Search in the registered set the handler able to read/write that encoding. 1632 * 1633 * Returns the handler or NULL if not found 1634 */ 1635xmlCharEncodingHandlerPtr 1636xmlFindCharEncodingHandler(const char *name) { 1637 const char *nalias; 1638 const char *norig; 1639 xmlCharEncoding alias; 1640#ifdef LIBXML_ICONV_ENABLED 1641 xmlCharEncodingHandlerPtr enc; 1642 iconv_t icv_in, icv_out; 1643#endif /* LIBXML_ICONV_ENABLED */ 1644#ifdef LIBXML_ICU_ENABLED 1645 xmlCharEncodingHandlerPtr encu; 1646 uconv_t *ucv_in, *ucv_out; 1647#endif /* LIBXML_ICU_ENABLED */ 1648 char upper[100]; 1649 int i; 1650 1651 if (handlers == NULL) xmlInitCharEncodingHandlers(); 1652 if (name == NULL) return(xmlDefaultCharEncodingHandler); 1653 if (name[0] == 0) return(xmlDefaultCharEncodingHandler); 1654 1655 /* 1656 * Do the alias resolution 1657 */ 1658 norig = name; 1659 nalias = xmlGetEncodingAlias(name); 1660 if (nalias != NULL) 1661 name = nalias; 1662 1663 /* 1664 * Check first for directly registered encoding names 1665 */ 1666 for (i = 0;i < 99;i++) { 1667 upper[i] = toupper(name[i]); 1668 if (upper[i] == 0) break; 1669 } 1670 upper[i] = 0; 1671 1672 if (handlers != NULL) { 1673 for (i = 0;i < nbCharEncodingHandler; i++) { 1674 if (!strcmp(upper, handlers[i]->name)) { 1675#ifdef DEBUG_ENCODING 1676 xmlGenericError(xmlGenericErrorContext, 1677 "Found registered handler for encoding %s\n", name); 1678#endif 1679 return(handlers[i]); 1680 } 1681 } 1682 } 1683 1684#ifdef LIBXML_ICONV_ENABLED 1685 /* check whether iconv can handle this */ 1686 icv_in = iconv_open("UTF-8", name); 1687 icv_out = iconv_open(name, "UTF-8"); 1688 if (icv_in == (iconv_t) -1) { 1689 icv_in = iconv_open("UTF-8", upper); 1690 } 1691 if (icv_out == (iconv_t) -1) { 1692 icv_out = iconv_open(upper, "UTF-8"); 1693 } 1694 if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) { 1695 enc = (xmlCharEncodingHandlerPtr) 1696 xmlMalloc(sizeof(xmlCharEncodingHandler)); 1697 if (enc == NULL) { 1698 iconv_close(icv_in); 1699 iconv_close(icv_out); 1700 return(NULL); 1701 } 1702 memset(enc, 0, sizeof(xmlCharEncodingHandler)); 1703 enc->name = xmlMemStrdup(name); 1704 enc->input = NULL; 1705 enc->output = NULL; 1706 enc->iconv_in = icv_in; 1707 enc->iconv_out = icv_out; 1708#ifdef DEBUG_ENCODING 1709 xmlGenericError(xmlGenericErrorContext, 1710 "Found iconv handler for encoding %s\n", name); 1711#endif 1712 return enc; 1713 } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) { 1714 xmlEncodingErr(XML_ERR_INTERNAL_ERROR, 1715 "iconv : problems with filters for '%s'\n", name); 1716 } 1717#endif /* LIBXML_ICONV_ENABLED */ 1718#ifdef LIBXML_ICU_ENABLED 1719 /* check whether icu can handle this */ 1720 ucv_in = openIcuConverter(name, 1); 1721 ucv_out = openIcuConverter(name, 0); 1722 if (ucv_in != NULL && ucv_out != NULL) { 1723 encu = (xmlCharEncodingHandlerPtr) 1724 xmlMalloc(sizeof(xmlCharEncodingHandler)); 1725 if (encu == NULL) { 1726 closeIcuConverter(ucv_in); 1727 closeIcuConverter(ucv_out); 1728 return(NULL); 1729 } 1730 memset(encu, 0, sizeof(xmlCharEncodingHandler)); 1731 encu->name = xmlMemStrdup(name); 1732 encu->input = NULL; 1733 encu->output = NULL; 1734 encu->uconv_in = ucv_in; 1735 encu->uconv_out = ucv_out; 1736#ifdef DEBUG_ENCODING 1737 xmlGenericError(xmlGenericErrorContext, 1738 "Found ICU converter handler for encoding %s\n", name); 1739#endif 1740 return encu; 1741 } else if (ucv_in != NULL || ucv_out != NULL) { 1742 closeIcuConverter(ucv_in); 1743 closeIcuConverter(ucv_out); 1744 xmlEncodingErr(XML_ERR_INTERNAL_ERROR, 1745 "ICU converter : problems with filters for '%s'\n", name); 1746 } 1747#endif /* LIBXML_ICU_ENABLED */ 1748 1749#ifdef DEBUG_ENCODING 1750 xmlGenericError(xmlGenericErrorContext, 1751 "No handler found for encoding %s\n", name); 1752#endif 1753 1754 /* 1755 * Fallback using the canonical names 1756 */ 1757 alias = xmlParseCharEncoding(norig); 1758 if (alias != XML_CHAR_ENCODING_ERROR) { 1759 const char* canon; 1760 canon = xmlGetCharEncodingName(alias); 1761 if ((canon != NULL) && (strcmp(name, canon))) { 1762 return(xmlFindCharEncodingHandler(canon)); 1763 } 1764 } 1765 1766 /* If "none of the above", give up */ 1767 return(NULL); 1768} 1769 1770/************************************************************************ 1771 * * 1772 * ICONV based generic conversion functions * 1773 * * 1774 ************************************************************************/ 1775 1776#ifdef LIBXML_ICONV_ENABLED 1777/** 1778 * xmlIconvWrapper: 1779 * @cd: iconv converter data structure 1780 * @out: a pointer to an array of bytes to store the result 1781 * @outlen: the length of @out 1782 * @in: a pointer to an array of ISO Latin 1 chars 1783 * @inlen: the length of @in 1784 * 1785 * Returns 0 if success, or 1786 * -1 by lack of space, or 1787 * -2 if the transcoding fails (for *in is not valid utf8 string or 1788 * the result of transformation can't fit into the encoding we want), or 1789 * -3 if there the last byte can't form a single output char. 1790 * 1791 * The value of @inlen after return is the number of octets consumed 1792 * as the return value is positive, else unpredictable. 1793 * The value of @outlen after return is the number of ocetes consumed. 1794 */ 1795static int 1796xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen, 1797 const unsigned char *in, int *inlen) { 1798 size_t icv_inlen, icv_outlen; 1799 const char *icv_in = (const char *) in; 1800 char *icv_out = (char *) out; 1801 int ret; 1802 1803 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) { 1804 if (outlen != NULL) *outlen = 0; 1805 return(-1); 1806 } 1807 icv_inlen = *inlen; 1808 icv_outlen = *outlen; 1809 ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen); 1810 *inlen -= icv_inlen; 1811 *outlen -= icv_outlen; 1812 if ((icv_inlen != 0) || (ret == -1)) { 1813#ifdef EILSEQ 1814 if (errno == EILSEQ) { 1815 return -2; 1816 } else 1817#endif 1818#ifdef E2BIG 1819 if (errno == E2BIG) { 1820 return -1; 1821 } else 1822#endif 1823#ifdef EINVAL 1824 if (errno == EINVAL) { 1825 return -3; 1826 } else 1827#endif 1828 { 1829 return -3; 1830 } 1831 } 1832 return 0; 1833} 1834#endif /* LIBXML_ICONV_ENABLED */ 1835 1836/************************************************************************ 1837 * * 1838 * ICU based generic conversion functions * 1839 * * 1840 ************************************************************************/ 1841 1842#ifdef LIBXML_ICU_ENABLED 1843/** 1844 * xmlUconvWrapper: 1845 * @cd: ICU uconverter data structure 1846 * @toUnicode : non-zero if toUnicode. 0 otherwise. 1847 * @out: a pointer to an array of bytes to store the result 1848 * @outlen: the length of @out 1849 * @in: a pointer to an array of ISO Latin 1 chars 1850 * @inlen: the length of @in 1851 * 1852 * Returns 0 if success, or 1853 * -1 by lack of space, or 1854 * -2 if the transcoding fails (for *in is not valid utf8 string or 1855 * the result of transformation can't fit into the encoding we want), or 1856 * -3 if there the last byte can't form a single output char. 1857 * 1858 * The value of @inlen after return is the number of octets consumed 1859 * as the return value is positive, else unpredictable. 1860 * The value of @outlen after return is the number of ocetes consumed. 1861 */ 1862static int 1863xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen, 1864 const unsigned char *in, int *inlen) { 1865 const char *ucv_in = (const char *) in; 1866 char *ucv_out = (char *) out; 1867 UErrorCode err = U_ZERO_ERROR; 1868 1869 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) { 1870 if (outlen != NULL) *outlen = 0; 1871 return(-1); 1872 } 1873 1874 /* 1875 * TODO(jungshik) 1876 * 1. is ucnv_convert(To|From)Algorithmic better? 1877 * 2. had we better use an explicit pivot buffer? 1878 * 3. error returned comes from 'fromUnicode' only even 1879 * when toUnicode is true ! 1880 */ 1881 if (toUnicode) { 1882 /* encoding => UTF-16 => UTF-8 */ 1883 ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen, 1884 &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL, 1885 0, TRUE, &err); 1886 } else { 1887 /* UTF-8 => UTF-16 => encoding */ 1888 ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen, 1889 &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL, 1890 0, TRUE, &err); 1891 } 1892 *inlen = ucv_in - (const char*) in; 1893 *outlen = ucv_out - (char *) out; 1894 if (U_SUCCESS(err)) 1895 return 0; 1896 if (err == U_BUFFER_OVERFLOW_ERROR) 1897 return -1; 1898 if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND) 1899 return -2; 1900 /* if (err == U_TRUNCATED_CHAR_FOUND) */ 1901 return -3; 1902} 1903#endif /* LIBXML_ICU_ENABLED */ 1904 1905/************************************************************************ 1906 * * 1907 * The real API used by libxml for on-the-fly conversion * 1908 * * 1909 ************************************************************************/ 1910 1911/** 1912 * xmlCharEncFirstLineInt: 1913 * @handler: char enconding transformation data structure 1914 * @out: an xmlBuffer for the output. 1915 * @in: an xmlBuffer for the input 1916 * @len: number of bytes to convert for the first line, or -1 1917 * 1918 * Front-end for the encoding handler input function, but handle only 1919 * the very first line, i.e. limit itself to 45 chars. 1920 * 1921 * Returns the number of byte written if success, or 1922 * -1 general error 1923 * -2 if the transcoding fails (for *in is not valid utf8 string or 1924 * the result of transformation can't fit into the encoding we want), or 1925 */ 1926int 1927xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out, 1928 xmlBufferPtr in, int len) { 1929 int ret = -2; 1930 int written; 1931 int toconv; 1932 1933 if (handler == NULL) return(-1); 1934 if (out == NULL) return(-1); 1935 if (in == NULL) return(-1); 1936 1937 /* calculate space available */ 1938 written = out->size - out->use - 1; /* count '\0' */ 1939 toconv = in->use; 1940 /* 1941 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38 1942 * 45 chars should be sufficient to reach the end of the encoding 1943 * declaration without going too far inside the document content. 1944 * on UTF-16 this means 90bytes, on UCS4 this means 180 1945 * The actual value depending on guessed encoding is passed as @len 1946 * if provided 1947 */ 1948 if (len >= 0) { 1949 if (toconv > len) 1950 toconv = len; 1951 } else { 1952 if (toconv > 180) 1953 toconv = 180; 1954 } 1955 if (toconv * 2 >= written) { 1956 xmlBufferGrow(out, toconv * 2); 1957 written = out->size - out->use - 1; 1958 } 1959 1960 if (handler->input != NULL) { 1961 ret = handler->input(&out->content[out->use], &written, 1962 in->content, &toconv); 1963 xmlBufferShrink(in, toconv); 1964 out->use += written; 1965 out->content[out->use] = 0; 1966 } 1967#ifdef LIBXML_ICONV_ENABLED 1968 else if (handler->iconv_in != NULL) { 1969 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use], 1970 &written, in->content, &toconv); 1971 xmlBufferShrink(in, toconv); 1972 out->use += written; 1973 out->content[out->use] = 0; 1974 if (ret == -1) ret = -3; 1975 } 1976#endif /* LIBXML_ICONV_ENABLED */ 1977#ifdef LIBXML_ICU_ENABLED 1978 else if (handler->uconv_in != NULL) { 1979 ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use], 1980 &written, in->content, &toconv); 1981 xmlBufferShrink(in, toconv); 1982 out->use += written; 1983 out->content[out->use] = 0; 1984 if (ret == -1) ret = -3; 1985 } 1986#endif /* LIBXML_ICU_ENABLED */ 1987#ifdef DEBUG_ENCODING 1988 switch (ret) { 1989 case 0: 1990 xmlGenericError(xmlGenericErrorContext, 1991 "converted %d bytes to %d bytes of input\n", 1992 toconv, written); 1993 break; 1994 case -1: 1995 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n", 1996 toconv, written, in->use); 1997 break; 1998 case -2: 1999 xmlGenericError(xmlGenericErrorContext, 2000 "input conversion failed due to input error\n"); 2001 break; 2002 case -3: 2003 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n", 2004 toconv, written, in->use); 2005 break; 2006 default: 2007 xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret); 2008 } 2009#endif /* DEBUG_ENCODING */ 2010 /* 2011 * Ignore when input buffer is not on a boundary 2012 */ 2013 if (ret == -3) ret = 0; 2014 if (ret == -1) ret = 0; 2015 return(ret); 2016} 2017 2018/** 2019 * xmlCharEncFirstLine: 2020 * @handler: char enconding transformation data structure 2021 * @out: an xmlBuffer for the output. 2022 * @in: an xmlBuffer for the input 2023 * 2024 * Front-end for the encoding handler input function, but handle only 2025 * the very first line, i.e. limit itself to 45 chars. 2026 * 2027 * Returns the number of byte written if success, or 2028 * -1 general error 2029 * -2 if the transcoding fails (for *in is not valid utf8 string or 2030 * the result of transformation can't fit into the encoding we want), or 2031 */ 2032int 2033xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out, 2034 xmlBufferPtr in) { 2035 return(xmlCharEncFirstLineInt(handler, out, in, -1)); 2036} 2037 2038/** 2039 * xmlCharEncFirstLineInput: 2040 * @input: a parser input buffer 2041 * @len: number of bytes to convert for the first line, or -1 2042 * 2043 * Front-end for the encoding handler input function, but handle only 2044 * the very first line. Point is that this is based on autodetection 2045 * of the encoding and once that first line is converted we may find 2046 * out that a different decoder is needed to process the input. 2047 * 2048 * Returns the number of byte written if success, or 2049 * -1 general error 2050 * -2 if the transcoding fails (for *in is not valid utf8 string or 2051 * the result of transformation can't fit into the encoding we want), or 2052 */ 2053int 2054xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len) 2055{ 2056 int ret = -2; 2057 size_t written; 2058 size_t toconv; 2059 int c_in; 2060 int c_out; 2061 xmlBufPtr in; 2062 xmlBufPtr out; 2063 2064 if ((input == NULL) || (input->encoder == NULL) || 2065 (input->buffer == NULL) || (input->raw == NULL)) 2066 return (-1); 2067 out = input->buffer; 2068 in = input->raw; 2069 2070 toconv = xmlBufUse(in); 2071 if (toconv == 0) 2072 return (0); 2073 written = xmlBufAvail(out) - 1; /* count '\0' */ 2074 /* 2075 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38 2076 * 45 chars should be sufficient to reach the end of the encoding 2077 * declaration without going too far inside the document content. 2078 * on UTF-16 this means 90bytes, on UCS4 this means 180 2079 * The actual value depending on guessed encoding is passed as @len 2080 * if provided 2081 */ 2082 if (len >= 0) { 2083 if (toconv > (unsigned int) len) 2084 toconv = len; 2085 } else { 2086 if (toconv > 180) 2087 toconv = 180; 2088 } 2089 if (toconv * 2 >= written) { 2090 xmlBufGrow(out, toconv * 2); 2091 written = xmlBufAvail(out) - 1; 2092 } 2093 if (written > 360) 2094 written = 360; 2095 2096 c_in = toconv; 2097 c_out = written; 2098 if (input->encoder->input != NULL) { 2099 ret = input->encoder->input(xmlBufEnd(out), &c_out, 2100 xmlBufContent(in), &c_in); 2101 xmlBufShrink(in, c_in); 2102 xmlBufAddLen(out, c_out); 2103 } 2104#ifdef LIBXML_ICONV_ENABLED 2105 else if (input->encoder->iconv_in != NULL) { 2106 ret = xmlIconvWrapper(input->encoder->iconv_in, xmlBufEnd(out), 2107 &c_out, xmlBufContent(in), &c_in); 2108 xmlBufShrink(in, c_in); 2109 xmlBufAddLen(out, c_out); 2110 if (ret == -1) 2111 ret = -3; 2112 } 2113#endif /* LIBXML_ICONV_ENABLED */ 2114#ifdef LIBXML_ICU_ENABLED 2115 else if (input->encoder->uconv_in != NULL) { 2116 ret = xmlUconvWrapper(input->encoder->uconv_in, 1, xmlBufEnd(out), 2117 &c_out, xmlBufContent(in), &c_in); 2118 xmlBufShrink(in, c_in); 2119 xmlBufAddLen(out, c_out); 2120 if (ret == -1) 2121 ret = -3; 2122 } 2123#endif /* LIBXML_ICU_ENABLED */ 2124 switch (ret) { 2125 case 0: 2126#ifdef DEBUG_ENCODING 2127 xmlGenericError(xmlGenericErrorContext, 2128 "converted %d bytes to %d bytes of input\n", 2129 c_in, c_out); 2130#endif 2131 break; 2132 case -1: 2133#ifdef DEBUG_ENCODING 2134 xmlGenericError(xmlGenericErrorContext, 2135 "converted %d bytes to %d bytes of input, %d left\n", 2136 c_in, c_out, (int)xmlBufUse(in)); 2137#endif 2138 break; 2139 case -3: 2140#ifdef DEBUG_ENCODING 2141 xmlGenericError(xmlGenericErrorContext, 2142 "converted %d bytes to %d bytes of input, %d left\n", 2143 c_in, c_out, (int)xmlBufUse(in)); 2144#endif 2145 break; 2146 case -2: { 2147 char buf[50]; 2148 const xmlChar *content = xmlBufContent(in); 2149 2150 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", 2151 content[0], content[1], 2152 content[2], content[3]); 2153 buf[49] = 0; 2154 xmlEncodingErr(XML_I18N_CONV_FAILED, 2155 "input conversion failed due to input error, bytes %s\n", 2156 buf); 2157 } 2158 } 2159 /* 2160 * Ignore when input buffer is not on a boundary 2161 */ 2162 if (ret == -3) ret = 0; 2163 if (ret == -1) ret = 0; 2164 return(ret); 2165} 2166 2167/** 2168 * xmlCharEncInput: 2169 * @input: a parser input buffer 2170 * @flush: try to flush all the raw buffer 2171 * 2172 * Generic front-end for the encoding handler on parser input 2173 * 2174 * Returns the number of byte written if success, or 2175 * -1 general error 2176 * -2 if the transcoding fails (for *in is not valid utf8 string or 2177 * the result of transformation can't fit into the encoding we want), or 2178 */ 2179int 2180xmlCharEncInput(xmlParserInputBufferPtr input, int flush) 2181{ 2182 int ret = -2; 2183 size_t written; 2184 size_t toconv; 2185 int c_in; 2186 int c_out; 2187 xmlBufPtr in; 2188 xmlBufPtr out; 2189 2190 if ((input == NULL) || (input->encoder == NULL) || 2191 (input->buffer == NULL) || (input->raw == NULL)) 2192 return (-1); 2193 out = input->buffer; 2194 in = input->raw; 2195 2196 toconv = xmlBufUse(in); 2197 if (toconv == 0) 2198 return (0); 2199 if ((toconv > 64 * 1024) && (flush == 0)) 2200 toconv = 64 * 1024; 2201 written = xmlBufAvail(out); 2202 if (written > 0) 2203 written--; /* count '\0' */ 2204 if (toconv * 2 >= written) { 2205 xmlBufGrow(out, toconv * 2); 2206 written = xmlBufAvail(out); 2207 if (written > 0) 2208 written--; /* count '\0' */ 2209 } 2210 if ((written > 128 * 1024) && (flush == 0)) 2211 written = 128 * 1024; 2212 2213 c_in = toconv; 2214 c_out = written; 2215 if (input->encoder->input != NULL) { 2216 ret = input->encoder->input(xmlBufEnd(out), &c_out, 2217 xmlBufContent(in), &c_in); 2218 xmlBufShrink(in, c_in); 2219 xmlBufAddLen(out, c_out); 2220 } 2221#ifdef LIBXML_ICONV_ENABLED 2222 else if (input->encoder->iconv_in != NULL) { 2223 ret = xmlIconvWrapper(input->encoder->iconv_in, xmlBufEnd(out), 2224 &c_out, xmlBufContent(in), &c_in); 2225 xmlBufShrink(in, c_in); 2226 xmlBufAddLen(out, c_out); 2227 if (ret == -1) 2228 ret = -3; 2229 } 2230#endif /* LIBXML_ICONV_ENABLED */ 2231#ifdef LIBXML_ICU_ENABLED 2232 else if (input->encoder->uconv_in != NULL) { 2233 ret = xmlUconvWrapper(input->encoder->uconv_in, 1, xmlBufEnd(out), 2234 &c_out, xmlBufContent(in), &c_in); 2235 xmlBufShrink(in, c_in); 2236 xmlBufAddLen(out, c_out); 2237 if (ret == -1) 2238 ret = -3; 2239 } 2240#endif /* LIBXML_ICU_ENABLED */ 2241 switch (ret) { 2242 case 0: 2243#ifdef DEBUG_ENCODING 2244 xmlGenericError(xmlGenericErrorContext, 2245 "converted %d bytes to %d bytes of input\n", 2246 c_in, c_out); 2247#endif 2248 break; 2249 case -1: 2250#ifdef DEBUG_ENCODING 2251 xmlGenericError(xmlGenericErrorContext, 2252 "converted %d bytes to %d bytes of input, %d left\n", 2253 c_in, c_out, (int)xmlBufUse(in)); 2254#endif 2255 break; 2256 case -3: 2257#ifdef DEBUG_ENCODING 2258 xmlGenericError(xmlGenericErrorContext, 2259 "converted %d bytes to %d bytes of input, %d left\n", 2260 c_in, c_out, (int)xmlBufUse(in)); 2261#endif 2262 break; 2263 case -2: { 2264 char buf[50]; 2265 const xmlChar *content = xmlBufContent(in); 2266 2267 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", 2268 content[0], content[1], 2269 content[2], content[3]); 2270 buf[49] = 0; 2271 xmlEncodingErr(XML_I18N_CONV_FAILED, 2272 "input conversion failed due to input error, bytes %s\n", 2273 buf); 2274 } 2275 } 2276 /* 2277 * Ignore when input buffer is not on a boundary 2278 */ 2279 if (ret == -3) 2280 ret = 0; 2281 return (c_out? c_out : ret); 2282} 2283 2284/** 2285 * xmlCharEncInFunc: 2286 * @handler: char encoding transformation data structure 2287 * @out: an xmlBuffer for the output. 2288 * @in: an xmlBuffer for the input 2289 * 2290 * Generic front-end for the encoding handler input function 2291 * 2292 * Returns the number of byte written if success, or 2293 * -1 general error 2294 * -2 if the transcoding fails (for *in is not valid utf8 string or 2295 * the result of transformation can't fit into the encoding we want), or 2296 */ 2297int 2298xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out, 2299 xmlBufferPtr in) 2300{ 2301 int ret = -2; 2302 int written; 2303 int toconv; 2304 2305 if (handler == NULL) 2306 return (-1); 2307 if (out == NULL) 2308 return (-1); 2309 if (in == NULL) 2310 return (-1); 2311 2312 toconv = in->use; 2313 if (toconv == 0) 2314 return (0); 2315 written = out->size - out->use -1; /* count '\0' */ 2316 if (toconv * 2 >= written) { 2317 xmlBufferGrow(out, out->size + toconv * 2); 2318 written = out->size - out->use - 1; 2319 } 2320 if (handler->input != NULL) { 2321 ret = handler->input(&out->content[out->use], &written, 2322 in->content, &toconv); 2323 xmlBufferShrink(in, toconv); 2324 out->use += written; 2325 out->content[out->use] = 0; 2326 } 2327#ifdef LIBXML_ICONV_ENABLED 2328 else if (handler->iconv_in != NULL) { 2329 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use], 2330 &written, in->content, &toconv); 2331 xmlBufferShrink(in, toconv); 2332 out->use += written; 2333 out->content[out->use] = 0; 2334 if (ret == -1) 2335 ret = -3; 2336 } 2337#endif /* LIBXML_ICONV_ENABLED */ 2338#ifdef LIBXML_ICU_ENABLED 2339 else if (handler->uconv_in != NULL) { 2340 ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use], 2341 &written, in->content, &toconv); 2342 xmlBufferShrink(in, toconv); 2343 out->use += written; 2344 out->content[out->use] = 0; 2345 if (ret == -1) 2346 ret = -3; 2347 } 2348#endif /* LIBXML_ICU_ENABLED */ 2349 switch (ret) { 2350 case 0: 2351#ifdef DEBUG_ENCODING 2352 xmlGenericError(xmlGenericErrorContext, 2353 "converted %d bytes to %d bytes of input\n", 2354 toconv, written); 2355#endif 2356 break; 2357 case -1: 2358#ifdef DEBUG_ENCODING 2359 xmlGenericError(xmlGenericErrorContext, 2360 "converted %d bytes to %d bytes of input, %d left\n", 2361 toconv, written, in->use); 2362#endif 2363 break; 2364 case -3: 2365#ifdef DEBUG_ENCODING 2366 xmlGenericError(xmlGenericErrorContext, 2367 "converted %d bytes to %d bytes of input, %d left\n", 2368 toconv, written, in->use); 2369#endif 2370 break; 2371 case -2: { 2372 char buf[50]; 2373 2374 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", 2375 in->content[0], in->content[1], 2376 in->content[2], in->content[3]); 2377 buf[49] = 0; 2378 xmlEncodingErr(XML_I18N_CONV_FAILED, 2379 "input conversion failed due to input error, bytes %s\n", 2380 buf); 2381 } 2382 } 2383 /* 2384 * Ignore when input buffer is not on a boundary 2385 */ 2386 if (ret == -3) 2387 ret = 0; 2388 return (written? written : ret); 2389} 2390 2391/** 2392 * xmlCharEncOutput: 2393 * @output: a parser output buffer 2394 * @init: is this an initialization call without data 2395 * 2396 * Generic front-end for the encoding handler on parser output 2397 * a first call with @init == 1 has to be made first to initiate the 2398 * output in case of non-stateless encoding needing to initiate their 2399 * state or the output (like the BOM in UTF16). 2400 * In case of UTF8 sequence conversion errors for the given encoder, 2401 * the content will be automatically remapped to a CharRef sequence. 2402 * 2403 * Returns the number of byte written if success, or 2404 * -1 general error 2405 * -2 if the transcoding fails (for *in is not valid utf8 string or 2406 * the result of transformation can't fit into the encoding we want), or 2407 */ 2408int 2409xmlCharEncOutput(xmlOutputBufferPtr output, int init) 2410{ 2411 int ret = -2; 2412 size_t written; 2413 size_t writtentot = 0; 2414 size_t toconv; 2415 int c_in; 2416 int c_out; 2417 xmlBufPtr in; 2418 xmlBufPtr out; 2419 int charref_len = 0; 2420 2421 if ((output == NULL) || (output->encoder == NULL) || 2422 (output->buffer == NULL) || (output->conv == NULL)) 2423 return (-1); 2424 out = output->conv; 2425 in = output->buffer; 2426 2427retry: 2428 2429 written = xmlBufAvail(out); 2430 if (written > 0) 2431 written--; /* count '\0' */ 2432 2433 /* 2434 * First specific handling of the initialization call 2435 */ 2436 if (init) { 2437 c_in = 0; 2438 c_out = written; 2439 if (output->encoder->output != NULL) { 2440 ret = output->encoder->output(xmlBufEnd(out), &c_out, 2441 NULL, &c_in); 2442 if (ret > 0) /* Gennady: check return value */ 2443 xmlBufAddLen(out, c_out); 2444 } 2445#ifdef LIBXML_ICONV_ENABLED 2446 else if (output->encoder->iconv_out != NULL) { 2447 ret = xmlIconvWrapper(output->encoder->iconv_out, xmlBufEnd(out), 2448 &c_out, NULL, &c_in); 2449 xmlBufAddLen(out, c_out); 2450 } 2451#endif /* LIBXML_ICONV_ENABLED */ 2452#ifdef LIBXML_ICU_ENABLED 2453 else if (output->encoder->uconv_out != NULL) { 2454 ret = xmlUconvWrapper(output->encoder->uconv_out, 0, xmlBufEnd(out), 2455 &c_out, NULL, &c_in); 2456 xmlBufAddLen(out, c_out); 2457 } 2458#endif /* LIBXML_ICU_ENABLED */ 2459#ifdef DEBUG_ENCODING 2460 xmlGenericError(xmlGenericErrorContext, 2461 "initialized encoder\n"); 2462#endif 2463 return(0); 2464 } 2465 2466 /* 2467 * Conversion itself. 2468 */ 2469 toconv = xmlBufUse(in); 2470 if (toconv == 0) 2471 return (0); 2472 if (toconv > 64 * 1024) 2473 toconv = 64 * 1024; 2474 if (toconv * 4 >= written) { 2475 xmlBufGrow(out, toconv * 4); 2476 written = xmlBufAvail(out) - 1; 2477 } 2478 if (written > 256 * 1024) 2479 written = 256 * 1024; 2480 2481 c_in = toconv; 2482 c_out = written; 2483 if (output->encoder->output != NULL) { 2484 ret = output->encoder->output(xmlBufEnd(out), &c_out, 2485 xmlBufContent(in), &c_in); 2486 if (c_out > 0) { 2487 xmlBufShrink(in, c_in); 2488 xmlBufAddLen(out, c_out); 2489 writtentot += c_out; 2490 } 2491 } 2492#ifdef LIBXML_ICONV_ENABLED 2493 else if (output->encoder->iconv_out != NULL) { 2494 ret = xmlIconvWrapper(output->encoder->iconv_out, xmlBufEnd(out), 2495 &c_out, xmlBufContent(in), &c_in); 2496 xmlBufShrink(in, c_in); 2497 xmlBufAddLen(out, c_out); 2498 writtentot += c_out; 2499 if (ret == -1) { 2500 if (c_out > 0) { 2501 /* 2502 * Can be a limitation of iconv 2503 */ 2504 charref_len = 0; 2505 goto retry; 2506 } 2507 ret = -3; 2508 } 2509 } 2510#endif /* LIBXML_ICONV_ENABLED */ 2511#ifdef LIBXML_ICU_ENABLED 2512 else if (output->encoder->uconv_out != NULL) { 2513 ret = xmlUconvWrapper(output->encoder->uconv_out, 0, xmlBufEnd(out), 2514 &c_out, xmlBufContent(in), &c_in); 2515 xmlBufShrink(in, c_in); 2516 xmlBufAddLen(out, c_out); 2517 writtentot += c_out; 2518 if (ret == -1) { 2519 if (c_out > 0) { 2520 /* 2521 * Can be a limitation of uconv 2522 */ 2523 charref_len = 0; 2524 goto retry; 2525 } 2526 ret = -3; 2527 } 2528 } 2529#endif /* LIBXML_ICU_ENABLED */ 2530 else { 2531 xmlEncodingErr(XML_I18N_NO_OUTPUT, 2532 "xmlCharEncOutFunc: no output function !\n", NULL); 2533 return(-1); 2534 } 2535 2536 if (ret >= 0) output += ret; 2537 2538 /* 2539 * Attempt to handle error cases 2540 */ 2541 switch (ret) { 2542 case 0: 2543#ifdef DEBUG_ENCODING 2544 xmlGenericError(xmlGenericErrorContext, 2545 "converted %d bytes to %d bytes of output\n", 2546 c_in, c_out); 2547#endif 2548 break; 2549 case -1: 2550#ifdef DEBUG_ENCODING 2551 xmlGenericError(xmlGenericErrorContext, 2552 "output conversion failed by lack of space\n"); 2553#endif 2554 break; 2555 case -3: 2556#ifdef DEBUG_ENCODING 2557 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n", 2558 c_in, c_out, (int) xmlBufUse(in)); 2559#endif 2560 break; 2561 case -2: { 2562 int len = (int) xmlBufUse(in); 2563 xmlChar *content = xmlBufContent(in); 2564 int cur; 2565 2566 cur = xmlGetUTF8Char(content, &len); 2567 if ((charref_len != 0) && (c_out < charref_len)) { 2568 /* 2569 * We attempted to insert a character reference and failed. 2570 * Undo what was written and skip the remaining charref. 2571 */ 2572 xmlBufErase(out, c_out); 2573 writtentot -= c_out; 2574 xmlBufShrink(in, charref_len - c_out); 2575 charref_len = 0; 2576 2577 ret = -1; 2578 break; 2579 } else if (cur > 0) { 2580 xmlChar charref[20]; 2581 2582#ifdef DEBUG_ENCODING 2583 xmlGenericError(xmlGenericErrorContext, 2584 "handling output conversion error\n"); 2585 xmlGenericError(xmlGenericErrorContext, 2586 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 2587 content[0], content[1], 2588 content[2], content[3]); 2589#endif 2590 /* 2591 * Removes the UTF8 sequence, and replace it by a charref 2592 * and continue the transcoding phase, hoping the error 2593 * did not mangle the encoder state. 2594 */ 2595 charref_len = snprintf((char *) &charref[0], sizeof(charref), 2596 "&#%d;", cur); 2597 xmlBufShrink(in, len); 2598 xmlBufAddHead(in, charref, -1); 2599 2600 goto retry; 2601 } else { 2602 char buf[50]; 2603 2604 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", 2605 content[0], content[1], 2606 content[2], content[3]); 2607 buf[49] = 0; 2608 xmlEncodingErr(XML_I18N_CONV_FAILED, 2609 "output conversion failed due to conv error, bytes %s\n", 2610 buf); 2611 if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE) 2612 content[0] = ' '; 2613 } 2614 break; 2615 } 2616 } 2617 return(ret); 2618} 2619 2620/** 2621 * xmlCharEncOutFunc: 2622 * @handler: char enconding transformation data structure 2623 * @out: an xmlBuffer for the output. 2624 * @in: an xmlBuffer for the input 2625 * 2626 * Generic front-end for the encoding handler output function 2627 * a first call with @in == NULL has to be made firs to initiate the 2628 * output in case of non-stateless encoding needing to initiate their 2629 * state or the output (like the BOM in UTF16). 2630 * In case of UTF8 sequence conversion errors for the given encoder, 2631 * the content will be automatically remapped to a CharRef sequence. 2632 * 2633 * Returns the number of byte written if success, or 2634 * -1 general error 2635 * -2 if the transcoding fails (for *in is not valid utf8 string or 2636 * the result of transformation can't fit into the encoding we want), or 2637 */ 2638int 2639xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out, 2640 xmlBufferPtr in) { 2641 int ret = -2; 2642 int written; 2643 int writtentot = 0; 2644 int toconv; 2645 int output = 0; 2646 int charref_len = 0; 2647 2648 if (handler == NULL) return(-1); 2649 if (out == NULL) return(-1); 2650 2651retry: 2652 2653 written = out->size - out->use; 2654 2655 if (written > 0) 2656 written--; /* Gennady: count '/0' */ 2657 2658 /* 2659 * First specific handling of in = NULL, i.e. the initialization call 2660 */ 2661 if (in == NULL) { 2662 toconv = 0; 2663 if (handler->output != NULL) { 2664 ret = handler->output(&out->content[out->use], &written, 2665 NULL, &toconv); 2666 if (ret >= 0) { /* Gennady: check return value */ 2667 out->use += written; 2668 out->content[out->use] = 0; 2669 } 2670 } 2671#ifdef LIBXML_ICONV_ENABLED 2672 else if (handler->iconv_out != NULL) { 2673 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use], 2674 &written, NULL, &toconv); 2675 out->use += written; 2676 out->content[out->use] = 0; 2677 } 2678#endif /* LIBXML_ICONV_ENABLED */ 2679#ifdef LIBXML_ICU_ENABLED 2680 else if (handler->uconv_out != NULL) { 2681 ret = xmlUconvWrapper(handler->uconv_out, 0, 2682 &out->content[out->use], 2683 &written, NULL, &toconv); 2684 out->use += written; 2685 out->content[out->use] = 0; 2686 } 2687#endif /* LIBXML_ICU_ENABLED */ 2688#ifdef DEBUG_ENCODING 2689 xmlGenericError(xmlGenericErrorContext, 2690 "initialized encoder\n"); 2691#endif 2692 return(0); 2693 } 2694 2695 /* 2696 * Conversion itself. 2697 */ 2698 toconv = in->use; 2699 if (toconv == 0) 2700 return(0); 2701 if (toconv * 4 >= written) { 2702 xmlBufferGrow(out, toconv * 4); 2703 written = out->size - out->use - 1; 2704 } 2705 if (handler->output != NULL) { 2706 ret = handler->output(&out->content[out->use], &written, 2707 in->content, &toconv); 2708 if (written > 0) { 2709 xmlBufferShrink(in, toconv); 2710 out->use += written; 2711 writtentot += written; 2712 } 2713 out->content[out->use] = 0; 2714 } 2715#ifdef LIBXML_ICONV_ENABLED 2716 else if (handler->iconv_out != NULL) { 2717 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use], 2718 &written, in->content, &toconv); 2719 xmlBufferShrink(in, toconv); 2720 out->use += written; 2721 writtentot += written; 2722 out->content[out->use] = 0; 2723 if (ret == -1) { 2724 if (written > 0) { 2725 /* 2726 * Can be a limitation of iconv 2727 */ 2728 charref_len = 0; 2729 goto retry; 2730 } 2731 ret = -3; 2732 } 2733 } 2734#endif /* LIBXML_ICONV_ENABLED */ 2735#ifdef LIBXML_ICU_ENABLED 2736 else if (handler->uconv_out != NULL) { 2737 ret = xmlUconvWrapper(handler->uconv_out, 0, 2738 &out->content[out->use], 2739 &written, in->content, &toconv); 2740 xmlBufferShrink(in, toconv); 2741 out->use += written; 2742 writtentot += written; 2743 out->content[out->use] = 0; 2744 if (ret == -1) { 2745 if (written > 0) { 2746 /* 2747 * Can be a limitation of iconv 2748 */ 2749 charref_len = 0; 2750 goto retry; 2751 } 2752 ret = -3; 2753 } 2754 } 2755#endif /* LIBXML_ICU_ENABLED */ 2756 else { 2757 xmlEncodingErr(XML_I18N_NO_OUTPUT, 2758 "xmlCharEncOutFunc: no output function !\n", NULL); 2759 return(-1); 2760 } 2761 2762 if (ret >= 0) output += ret; 2763 2764 /* 2765 * Attempt to handle error cases 2766 */ 2767 switch (ret) { 2768 case 0: 2769#ifdef DEBUG_ENCODING 2770 xmlGenericError(xmlGenericErrorContext, 2771 "converted %d bytes to %d bytes of output\n", 2772 toconv, written); 2773#endif 2774 break; 2775 case -1: 2776#ifdef DEBUG_ENCODING 2777 xmlGenericError(xmlGenericErrorContext, 2778 "output conversion failed by lack of space\n"); 2779#endif 2780 break; 2781 case -3: 2782#ifdef DEBUG_ENCODING 2783 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n", 2784 toconv, written, in->use); 2785#endif 2786 break; 2787 case -2: { 2788 int len = in->use; 2789 const xmlChar *utf = (const xmlChar *) in->content; 2790 int cur; 2791 2792 cur = xmlGetUTF8Char(utf, &len); 2793 if ((charref_len != 0) && (written < charref_len)) { 2794 /* 2795 * We attempted to insert a character reference and failed. 2796 * Undo what was written and skip the remaining charref. 2797 */ 2798 out->use -= written; 2799 writtentot -= written; 2800 xmlBufferShrink(in, charref_len - written); 2801 charref_len = 0; 2802 2803 ret = -1; 2804 break; 2805 } else if (cur > 0) { 2806 xmlChar charref[20]; 2807 2808#ifdef DEBUG_ENCODING 2809 xmlGenericError(xmlGenericErrorContext, 2810 "handling output conversion error\n"); 2811 xmlGenericError(xmlGenericErrorContext, 2812 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 2813 in->content[0], in->content[1], 2814 in->content[2], in->content[3]); 2815#endif 2816 /* 2817 * Removes the UTF8 sequence, and replace it by a charref 2818 * and continue the transcoding phase, hoping the error 2819 * did not mangle the encoder state. 2820 */ 2821 charref_len = snprintf((char *) &charref[0], sizeof(charref), 2822 "&#%d;", cur); 2823 xmlBufferShrink(in, len); 2824 xmlBufferAddHead(in, charref, -1); 2825 2826 goto retry; 2827 } else { 2828 char buf[50]; 2829 2830 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", 2831 in->content[0], in->content[1], 2832 in->content[2], in->content[3]); 2833 buf[49] = 0; 2834 xmlEncodingErr(XML_I18N_CONV_FAILED, 2835 "output conversion failed due to conv error, bytes %s\n", 2836 buf); 2837 if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE) 2838 in->content[0] = ' '; 2839 } 2840 break; 2841 } 2842 } 2843 return(ret); 2844} 2845 2846/** 2847 * xmlCharEncCloseFunc: 2848 * @handler: char enconding transformation data structure 2849 * 2850 * Generic front-end for encoding handler close function 2851 * 2852 * Returns 0 if success, or -1 in case of error 2853 */ 2854int 2855xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) { 2856 int ret = 0; 2857 int tofree = 0; 2858 if (handler == NULL) return(-1); 2859 if (handler->name == NULL) return(-1); 2860#ifdef LIBXML_ICONV_ENABLED 2861 /* 2862 * Iconv handlers can be used only once, free the whole block. 2863 * and the associated icon resources. 2864 */ 2865 if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) { 2866 tofree = 1; 2867 if (handler->iconv_out != NULL) { 2868 if (iconv_close(handler->iconv_out)) 2869 ret = -1; 2870 handler->iconv_out = NULL; 2871 } 2872 if (handler->iconv_in != NULL) { 2873 if (iconv_close(handler->iconv_in)) 2874 ret = -1; 2875 handler->iconv_in = NULL; 2876 } 2877 } 2878#endif /* LIBXML_ICONV_ENABLED */ 2879#ifdef LIBXML_ICU_ENABLED 2880 if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) { 2881 tofree = 1; 2882 if (handler->uconv_out != NULL) { 2883 closeIcuConverter(handler->uconv_out); 2884 handler->uconv_out = NULL; 2885 } 2886 if (handler->uconv_in != NULL) { 2887 closeIcuConverter(handler->uconv_in); 2888 handler->uconv_in = NULL; 2889 } 2890 } 2891#endif 2892 if (tofree) { 2893 /* free up only dynamic handlers iconv/uconv */ 2894 if (handler->name != NULL) 2895 xmlFree(handler->name); 2896 handler->name = NULL; 2897 xmlFree(handler); 2898 } 2899#ifdef DEBUG_ENCODING 2900 if (ret) 2901 xmlGenericError(xmlGenericErrorContext, 2902 "failed to close the encoding handler\n"); 2903 else 2904 xmlGenericError(xmlGenericErrorContext, 2905 "closed the encoding handler\n"); 2906#endif 2907 2908 return(ret); 2909} 2910 2911/** 2912 * xmlByteConsumed: 2913 * @ctxt: an XML parser context 2914 * 2915 * This function provides the current index of the parser relative 2916 * to the start of the current entity. This function is computed in 2917 * bytes from the beginning starting at zero and finishing at the 2918 * size in byte of the file if parsing a file. The function is 2919 * of constant cost if the input is UTF-8 but can be costly if run 2920 * on non-UTF-8 input. 2921 * 2922 * Returns the index in bytes from the beginning of the entity or -1 2923 * in case the index could not be computed. 2924 */ 2925long 2926xmlByteConsumed(xmlParserCtxtPtr ctxt) { 2927 xmlParserInputPtr in; 2928 2929 if (ctxt == NULL) return(-1); 2930 in = ctxt->input; 2931 if (in == NULL) return(-1); 2932 if ((in->buf != NULL) && (in->buf->encoder != NULL)) { 2933 unsigned int unused = 0; 2934 xmlCharEncodingHandler * handler = in->buf->encoder; 2935 /* 2936 * Encoding conversion, compute the number of unused original 2937 * bytes from the input not consumed and substract that from 2938 * the raw consumed value, this is not a cheap operation 2939 */ 2940 if (in->end - in->cur > 0) { 2941 unsigned char convbuf[32000]; 2942 const unsigned char *cur = (const unsigned char *)in->cur; 2943 int toconv = in->end - in->cur, written = 32000; 2944 2945 int ret; 2946 2947 if (handler->output != NULL) { 2948 do { 2949 toconv = in->end - cur; 2950 written = 32000; 2951 ret = handler->output(&convbuf[0], &written, 2952 cur, &toconv); 2953 if (ret == -1) return(-1); 2954 unused += written; 2955 cur += toconv; 2956 } while (ret == -2); 2957#ifdef LIBXML_ICONV_ENABLED 2958 } else if (handler->iconv_out != NULL) { 2959 do { 2960 toconv = in->end - cur; 2961 written = 32000; 2962 ret = xmlIconvWrapper(handler->iconv_out, &convbuf[0], 2963 &written, cur, &toconv); 2964 if (ret < 0) { 2965 if (written > 0) 2966 ret = -2; 2967 else 2968 return(-1); 2969 } 2970 unused += written; 2971 cur += toconv; 2972 } while (ret == -2); 2973#endif 2974#ifdef LIBXML_ICU_ENABLED 2975 } else if (handler->uconv_out != NULL) { 2976 do { 2977 toconv = in->end - cur; 2978 written = 32000; 2979 ret = xmlUconvWrapper(handler->uconv_out, 0, &convbuf[0], 2980 &written, cur, &toconv); 2981 if (ret < 0) { 2982 if (written > 0) 2983 ret = -2; 2984 else 2985 return(-1); 2986 } 2987 unused += written; 2988 cur += toconv; 2989 } while (ret == -2); 2990#endif 2991 } else { 2992 /* could not find a converter */ 2993 return(-1); 2994 } 2995 } 2996 if (in->buf->rawconsumed < unused) 2997 return(-1); 2998 return(in->buf->rawconsumed - unused); 2999 } 3000 return(in->consumed + (in->cur - in->base)); 3001} 3002 3003#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) 3004#ifdef LIBXML_ISO8859X_ENABLED 3005 3006/** 3007 * UTF8ToISO8859x: 3008 * @out: a pointer to an array of bytes to store the result 3009 * @outlen: the length of @out 3010 * @in: a pointer to an array of UTF-8 chars 3011 * @inlen: the length of @in 3012 * @xlattable: the 2-level transcoding table 3013 * 3014 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-* 3015 * block of chars out. 3016 * 3017 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise 3018 * The value of @inlen after return is the number of octets consumed 3019 * as the return value is positive, else unpredictable. 3020 * The value of @outlen after return is the number of ocetes consumed. 3021 */ 3022static int 3023UTF8ToISO8859x(unsigned char* out, int *outlen, 3024 const unsigned char* in, int *inlen, 3025 unsigned char const *xlattable) { 3026 const unsigned char* outstart = out; 3027 const unsigned char* inend; 3028 const unsigned char* instart = in; 3029 const unsigned char* processed = in; 3030 3031 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || 3032 (xlattable == NULL)) 3033 return(-1); 3034 if (in == NULL) { 3035 /* 3036 * initialization nothing to do 3037 */ 3038 *outlen = 0; 3039 *inlen = 0; 3040 return(0); 3041 } 3042 inend = in + (*inlen); 3043 while (in < inend) { 3044 unsigned char d = *in++; 3045 if (d < 0x80) { 3046 *out++ = d; 3047 } else if (d < 0xC0) { 3048 /* trailing byte in leading position */ 3049 *outlen = out - outstart; 3050 *inlen = processed - instart; 3051 return(-2); 3052 } else if (d < 0xE0) { 3053 unsigned char c; 3054 if (!(in < inend)) { 3055 /* trailing byte not in input buffer */ 3056 *outlen = out - outstart; 3057 *inlen = processed - instart; 3058 return(-3); 3059 } 3060 c = *in++; 3061 if ((c & 0xC0) != 0x80) { 3062 /* not a trailing byte */ 3063 *outlen = out - outstart; 3064 *inlen = processed - instart; 3065 return(-2); 3066 } 3067 c = c & 0x3F; 3068 d = d & 0x1F; 3069 d = xlattable [48 + c + xlattable [d] * 64]; 3070 if (d == 0) { 3071 /* not in character set */ 3072 *outlen = out - outstart; 3073 *inlen = processed - instart; 3074 return(-2); 3075 } 3076 *out++ = d; 3077 } else if (d < 0xF0) { 3078 unsigned char c1; 3079 unsigned char c2; 3080 if (!(in < inend - 1)) { 3081 /* trailing bytes not in input buffer */ 3082 *outlen = out - outstart; 3083 *inlen = processed - instart; 3084 return(-3); 3085 } 3086 c1 = *in++; 3087 if ((c1 & 0xC0) != 0x80) { 3088 /* not a trailing byte (c1) */ 3089 *outlen = out - outstart; 3090 *inlen = processed - instart; 3091 return(-2); 3092 } 3093 c2 = *in++; 3094 if ((c2 & 0xC0) != 0x80) { 3095 /* not a trailing byte (c2) */ 3096 *outlen = out - outstart; 3097 *inlen = processed - instart; 3098 return(-2); 3099 } 3100 c1 = c1 & 0x3F; 3101 c2 = c2 & 0x3F; 3102 d = d & 0x0F; 3103 d = xlattable [48 + c2 + xlattable [48 + c1 + 3104 xlattable [32 + d] * 64] * 64]; 3105 if (d == 0) { 3106 /* not in character set */ 3107 *outlen = out - outstart; 3108 *inlen = processed - instart; 3109 return(-2); 3110 } 3111 *out++ = d; 3112 } else { 3113 /* cannot transcode >= U+010000 */ 3114 *outlen = out - outstart; 3115 *inlen = processed - instart; 3116 return(-2); 3117 } 3118 processed = in; 3119 } 3120 *outlen = out - outstart; 3121 *inlen = processed - instart; 3122 return(*outlen); 3123} 3124 3125/** 3126 * ISO8859xToUTF8 3127 * @out: a pointer to an array of bytes to store the result 3128 * @outlen: the length of @out 3129 * @in: a pointer to an array of ISO Latin 1 chars 3130 * @inlen: the length of @in 3131 * 3132 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8 3133 * block of chars out. 3134 * Returns 0 if success, or -1 otherwise 3135 * The value of @inlen after return is the number of octets consumed 3136 * The value of @outlen after return is the number of ocetes produced. 3137 */ 3138static int 3139ISO8859xToUTF8(unsigned char* out, int *outlen, 3140 const unsigned char* in, int *inlen, 3141 unsigned short const *unicodetable) { 3142 unsigned char* outstart = out; 3143 unsigned char* outend; 3144 const unsigned char* instart = in; 3145 const unsigned char* inend; 3146 const unsigned char* instop; 3147 unsigned int c; 3148 3149 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || 3150 (in == NULL) || (unicodetable == NULL)) 3151 return(-1); 3152 outend = out + *outlen; 3153 inend = in + *inlen; 3154 instop = inend; 3155 3156 while ((in < inend) && (out < outend - 2)) { 3157 if (*in >= 0x80) { 3158 c = unicodetable [*in - 0x80]; 3159 if (c == 0) { 3160 /* undefined code point */ 3161 *outlen = out - outstart; 3162 *inlen = in - instart; 3163 return (-1); 3164 } 3165 if (c < 0x800) { 3166 *out++ = ((c >> 6) & 0x1F) | 0xC0; 3167 *out++ = (c & 0x3F) | 0x80; 3168 } else { 3169 *out++ = ((c >> 12) & 0x0F) | 0xE0; 3170 *out++ = ((c >> 6) & 0x3F) | 0x80; 3171 *out++ = (c & 0x3F) | 0x80; 3172 } 3173 ++in; 3174 } 3175 if (instop - in > outend - out) instop = in + (outend - out); 3176 while ((*in < 0x80) && (in < instop)) { 3177 *out++ = *in++; 3178 } 3179 } 3180 if ((in < inend) && (out < outend) && (*in < 0x80)) { 3181 *out++ = *in++; 3182 } 3183 if ((in < inend) && (out < outend) && (*in < 0x80)) { 3184 *out++ = *in++; 3185 } 3186 *outlen = out - outstart; 3187 *inlen = in - instart; 3188 return (*outlen); 3189} 3190 3191 3192/************************************************************************ 3193 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding * 3194 ************************************************************************/ 3195 3196static unsigned short const xmlunicodetable_ISO8859_2 [128] = { 3197 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3198 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3199 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3200 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3201 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7, 3202 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b, 3203 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7, 3204 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c, 3205 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7, 3206 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e, 3207 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7, 3208 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df, 3209 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7, 3210 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f, 3211 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7, 3212 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9, 3213}; 3214 3215static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = { 3216 "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00" 3217 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3218 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3219 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3220 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3221 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3222 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3223 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3224 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3225 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00" 3226 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00" 3227 "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef" 3228 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00" 3229 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3230 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00" 3231 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00" 3232 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00" 3233 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3234 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3235 "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00" 3236 "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba" 3237 "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9" 3238 "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00" 3239 "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00" 3240 "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf" 3241 "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00" 3242 "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00" 3243}; 3244 3245static unsigned short const xmlunicodetable_ISO8859_3 [128] = { 3246 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3247 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3248 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3249 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3250 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7, 3251 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b, 3252 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7, 3253 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c, 3254 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7, 3255 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 3256 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7, 3257 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df, 3258 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7, 3259 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 3260 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7, 3261 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9, 3262}; 3263 3264static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = { 3265 "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00" 3266 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3267 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3268 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3269 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3270 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3271 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3272 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3273 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3274 "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00" 3275 "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00" 3276 "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00" 3277 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb" 3278 "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00" 3279 "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3280 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3281 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00" 3282 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3283 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3284 "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3285 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3286 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3287 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3288 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3289 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba" 3290 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00" 3291 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00" 3292 "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3293 "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf" 3294 "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3295 "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00" 3296}; 3297 3298static unsigned short const xmlunicodetable_ISO8859_4 [128] = { 3299 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3300 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3301 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3302 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3303 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7, 3304 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af, 3305 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7, 3306 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b, 3307 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e, 3308 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a, 3309 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 3310 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df, 3311 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f, 3312 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b, 3313 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 3314 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9, 3315}; 3316 3317static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = { 3318 "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00" 3319 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3320 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3321 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3322 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3323 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3324 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3325 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3326 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3327 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf" 3328 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00" 3329 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00" 3330 "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00" 3331 "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7" 3332 "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00" 3333 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00" 3334 "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00" 3335 "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00" 3336 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00" 3337 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00" 3338 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00" 3339 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3340 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3341 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00" 3342 "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf" 3343 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00" 3344 "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00" 3345}; 3346 3347static unsigned short const xmlunicodetable_ISO8859_5 [128] = { 3348 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3349 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3350 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3351 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3352 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407, 3353 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f, 3354 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 3355 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, 3356 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 3357 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f, 3358 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 3359 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f, 3360 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 3361 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f, 3362 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457, 3363 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f, 3364}; 3365 3366static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = { 3367 "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3368 "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3369 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3370 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3371 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3372 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3373 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3374 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3375 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3376 "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00" 3377 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3378 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf" 3379 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" 3380 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3381 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" 3382 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3383 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff" 3384 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3385 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3386 "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3387 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3388 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3389 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3390 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3391 "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3392 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3393 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3394}; 3395 3396static unsigned short const xmlunicodetable_ISO8859_6 [128] = { 3397 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3398 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3399 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3400 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3401 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000, 3402 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000, 3403 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3404 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f, 3405 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627, 3406 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f, 3407 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637, 3408 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3409 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647, 3410 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f, 3411 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3412 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3413}; 3414 3415static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = { 3416 "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3417 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00" 3418 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3419 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3420 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3421 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3422 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3423 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3424 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3425 "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00" 3426 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3427 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3428 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3429 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3430 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3431 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00" 3432 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf" 3433 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3434 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00" 3435 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3436 "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3437 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3438 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3439}; 3440 3441static unsigned short const xmlunicodetable_ISO8859_7 [128] = { 3442 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3443 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3444 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3445 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3446 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7, 3447 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015, 3448 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7, 3449 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f, 3450 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 3451 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f, 3452 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7, 3453 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af, 3454 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7, 3455 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf, 3456 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7, 3457 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000, 3458}; 3459 3460static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = { 3461 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06" 3462 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3463 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3464 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3465 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3466 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3467 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3468 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3469 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3470 "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00" 3471 "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00" 3472 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3473 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3474 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3475 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3476 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3477 "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00" 3478 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3479 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3480 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3481 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3482 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3483 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3484 "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf" 3485 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3486 "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" 3487 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3488 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00" 3489 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3490 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3491 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3492}; 3493 3494static unsigned short const xmlunicodetable_ISO8859_8 [128] = { 3495 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3496 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3497 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3498 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3499 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 3500 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 3501 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 3502 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000, 3503 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3504 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3505 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3506 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017, 3507 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7, 3508 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df, 3509 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7, 3510 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000, 3511}; 3512 3513static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = { 3514 "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3515 "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00" 3516 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3517 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3518 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3519 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3520 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3521 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3522 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3523 "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf" 3524 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00" 3525 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3526 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3527 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3528 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3529 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3530 "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00" 3531 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3532 "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00" 3533 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3534 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3535 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3536 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3537 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe" 3538 "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00" 3539 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3540 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3541 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3542 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3543 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00" 3544 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3545}; 3546 3547static unsigned short const xmlunicodetable_ISO8859_9 [128] = { 3548 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3549 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3550 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3551 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3552 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 3553 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 3554 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 3555 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, 3556 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 3557 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 3558 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 3559 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df, 3560 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 3561 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 3562 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 3563 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff, 3564}; 3565 3566static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = { 3567 "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3568 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3569 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3570 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3571 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3572 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3573 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3574 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3575 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3576 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf" 3577 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" 3578 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3579 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf" 3580 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3581 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff" 3582 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3583 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0" 3584 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3585 "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3586 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3587 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe" 3588 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3589 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3590}; 3591 3592static unsigned short const xmlunicodetable_ISO8859_10 [128] = { 3593 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3594 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3595 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3596 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3597 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7, 3598 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a, 3599 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7, 3600 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b, 3601 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e, 3602 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf, 3603 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168, 3604 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, 3605 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f, 3606 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef, 3607 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169, 3608 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138, 3609}; 3610 3611static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = { 3612 "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3613 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3614 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3615 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3616 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3617 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3618 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3619 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3620 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3621 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00" 3622 "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00" 3623 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00" 3624 "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00" 3625 "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7" 3626 "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00" 3627 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00" 3628 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3629 "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00" 3630 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00" 3631 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3632 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3633 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3634 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3635 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3636 "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3637 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3638 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3639 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf" 3640 "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf" 3641 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef" 3642 "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00" 3643}; 3644 3645static unsigned short const xmlunicodetable_ISO8859_11 [128] = { 3646 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3647 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3648 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3649 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3650 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07, 3651 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f, 3652 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17, 3653 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f, 3654 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27, 3655 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f, 3656 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37, 3657 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f, 3658 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47, 3659 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f, 3660 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57, 3661 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000, 3662}; 3663 3664static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = { 3665 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3666 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3667 "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3668 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3669 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3670 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3671 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3672 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3673 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3674 "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3675 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3676 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3677 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3678 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3679 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00" 3680 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf" 3681 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" 3682 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3683 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf" 3684 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3685 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3686 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3687 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3688 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3689 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00" 3690 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3691 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3692}; 3693 3694static unsigned short const xmlunicodetable_ISO8859_13 [128] = { 3695 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3696 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3697 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3698 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3699 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7, 3700 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6, 3701 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7, 3702 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6, 3703 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112, 3704 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b, 3705 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7, 3706 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df, 3707 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113, 3708 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c, 3709 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7, 3710 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019, 3711}; 3712 3713static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = { 3714 "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3715 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3716 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3717 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3718 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3719 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3720 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3721 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3722 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3723 "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00" 3724 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00" 3725 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3726 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3727 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3728 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3729 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3730 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00" 3731 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3732 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3733 "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00" 3734 "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf" 3735 "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00" 3736 "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00" 3737 "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00" 3738 "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00" 3739 "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00" 3740 "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00" 3741 "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00" 3742 "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00" 3743 "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1" 3744 "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00" 3745}; 3746 3747static unsigned short const xmlunicodetable_ISO8859_14 [128] = { 3748 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3749 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3750 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3751 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3752 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7, 3753 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178, 3754 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56, 3755 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61, 3756 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 3757 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 3758 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a, 3759 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df, 3760 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 3761 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 3762 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b, 3763 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff, 3764}; 3765 3766static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = { 3767 "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3768 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3769 "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3770 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3771 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3772 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3773 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3774 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3775 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3776 "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00" 3777 "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3778 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3779 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3780 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3781 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00" 3782 "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00" 3783 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1" 3784 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3785 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3786 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00" 3787 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3788 "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3789 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3790 "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3791 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3792 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3793 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3794 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3795 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3796 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3797 "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3798 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3799 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3800 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3801 "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00" 3802 "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3803 "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00" 3804 "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00" 3805 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3806 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3807 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf" 3808 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3809 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff" 3810}; 3811 3812static unsigned short const xmlunicodetable_ISO8859_15 [128] = { 3813 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3814 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3815 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3816 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3817 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7, 3818 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 3819 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7, 3820 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf, 3821 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 3822 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 3823 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 3824 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, 3825 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 3826 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 3827 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 3828 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, 3829}; 3830 3831static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = { 3832 "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3833 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3834 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3835 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3836 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3837 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3838 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3839 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3840 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3841 "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf" 3842 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf" 3843 "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3844 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3845 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3846 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3847 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3848 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3849 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00" 3850 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3851 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3852 "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3853 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3854 "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00" 3855 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3856 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" 3857 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3858 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff" 3859}; 3860 3861static unsigned short const xmlunicodetable_ISO8859_16 [128] = { 3862 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3863 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3864 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3865 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3866 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7, 3867 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b, 3868 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7, 3869 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c, 3870 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7, 3871 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 3872 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a, 3873 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df, 3874 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7, 3875 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 3876 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b, 3877 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff, 3878}; 3879 3880static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = { 3881 "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00" 3882 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3883 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3884 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3885 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3886 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3887 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3888 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3889 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3890 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00" 3891 "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00" 3892 "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00" 3893 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00" 3894 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3895 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3896 "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3897 "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00" 3898 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3899 "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00" 3900 "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3901 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3902 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3903 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3904 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3905 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3906 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00" 3907 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3908 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3909 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00" 3910 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3911 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3912 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3913 "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00" 3914 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3915 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3916 "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3917 "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf" 3918 "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3919 "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff" 3920}; 3921 3922 3923/* 3924 * auto-generated functions for ISO-8859-2 .. ISO-8859-16 3925 */ 3926 3927static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen, 3928 const unsigned char* in, int *inlen) { 3929 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2); 3930} 3931static int UTF8ToISO8859_2 (unsigned char* out, int *outlen, 3932 const unsigned char* in, int *inlen) { 3933 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2); 3934} 3935 3936static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen, 3937 const unsigned char* in, int *inlen) { 3938 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3); 3939} 3940static int UTF8ToISO8859_3 (unsigned char* out, int *outlen, 3941 const unsigned char* in, int *inlen) { 3942 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3); 3943} 3944 3945static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen, 3946 const unsigned char* in, int *inlen) { 3947 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4); 3948} 3949static int UTF8ToISO8859_4 (unsigned char* out, int *outlen, 3950 const unsigned char* in, int *inlen) { 3951 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4); 3952} 3953 3954static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen, 3955 const unsigned char* in, int *inlen) { 3956 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5); 3957} 3958static int UTF8ToISO8859_5 (unsigned char* out, int *outlen, 3959 const unsigned char* in, int *inlen) { 3960 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5); 3961} 3962 3963static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen, 3964 const unsigned char* in, int *inlen) { 3965 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6); 3966} 3967static int UTF8ToISO8859_6 (unsigned char* out, int *outlen, 3968 const unsigned char* in, int *inlen) { 3969 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6); 3970} 3971 3972static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen, 3973 const unsigned char* in, int *inlen) { 3974 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7); 3975} 3976static int UTF8ToISO8859_7 (unsigned char* out, int *outlen, 3977 const unsigned char* in, int *inlen) { 3978 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7); 3979} 3980 3981static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen, 3982 const unsigned char* in, int *inlen) { 3983 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8); 3984} 3985static int UTF8ToISO8859_8 (unsigned char* out, int *outlen, 3986 const unsigned char* in, int *inlen) { 3987 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8); 3988} 3989 3990static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen, 3991 const unsigned char* in, int *inlen) { 3992 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9); 3993} 3994static int UTF8ToISO8859_9 (unsigned char* out, int *outlen, 3995 const unsigned char* in, int *inlen) { 3996 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9); 3997} 3998 3999static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen, 4000 const unsigned char* in, int *inlen) { 4001 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10); 4002} 4003static int UTF8ToISO8859_10 (unsigned char* out, int *outlen, 4004 const unsigned char* in, int *inlen) { 4005 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10); 4006} 4007 4008static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen, 4009 const unsigned char* in, int *inlen) { 4010 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11); 4011} 4012static int UTF8ToISO8859_11 (unsigned char* out, int *outlen, 4013 const unsigned char* in, int *inlen) { 4014 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11); 4015} 4016 4017static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen, 4018 const unsigned char* in, int *inlen) { 4019 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13); 4020} 4021static int UTF8ToISO8859_13 (unsigned char* out, int *outlen, 4022 const unsigned char* in, int *inlen) { 4023 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13); 4024} 4025 4026static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen, 4027 const unsigned char* in, int *inlen) { 4028 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14); 4029} 4030static int UTF8ToISO8859_14 (unsigned char* out, int *outlen, 4031 const unsigned char* in, int *inlen) { 4032 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14); 4033} 4034 4035static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen, 4036 const unsigned char* in, int *inlen) { 4037 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15); 4038} 4039static int UTF8ToISO8859_15 (unsigned char* out, int *outlen, 4040 const unsigned char* in, int *inlen) { 4041 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15); 4042} 4043 4044static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen, 4045 const unsigned char* in, int *inlen) { 4046 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16); 4047} 4048static int UTF8ToISO8859_16 (unsigned char* out, int *outlen, 4049 const unsigned char* in, int *inlen) { 4050 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16); 4051} 4052 4053static void 4054xmlRegisterCharEncodingHandlersISO8859x (void) { 4055 xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2); 4056 xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3); 4057 xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4); 4058 xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5); 4059 xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6); 4060 xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7); 4061 xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8); 4062 xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9); 4063 xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10); 4064 xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11); 4065 xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13); 4066 xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14); 4067 xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15); 4068 xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16); 4069} 4070 4071#endif 4072#endif 4073 4074#define bottom_encoding 4075#include "elfgcchack.h" 4076