1/* 2 * parserInternals.c : Internal routines (and obsolete ones) needed for the 3 * XML and HTML parsers. 4 * 5 * See Copyright for the status of this software. 6 * 7 * daniel@veillard.com 8 */ 9 10#define IN_LIBXML 11#include "libxml.h" 12 13#if defined(WIN32) && !defined (__CYGWIN__) 14#define XML_DIR_SEP '\\' 15#else 16#define XML_DIR_SEP '/' 17#endif 18 19#include <string.h> 20#ifdef HAVE_CTYPE_H 21#include <ctype.h> 22#endif 23#ifdef HAVE_STDLIB_H 24#include <stdlib.h> 25#endif 26#ifdef HAVE_SYS_STAT_H 27#include <sys/stat.h> 28#endif 29#ifdef HAVE_FCNTL_H 30#include <fcntl.h> 31#endif 32#ifdef HAVE_UNISTD_H 33#include <unistd.h> 34#endif 35#ifdef HAVE_ZLIB_H 36#include <zlib.h> 37#endif 38 39#include <libxml/xmlmemory.h> 40#include <libxml/tree.h> 41#include <libxml/parser.h> 42#include <libxml/parserInternals.h> 43#include <libxml/valid.h> 44#include <libxml/entities.h> 45#include <libxml/xmlerror.h> 46#include <libxml/encoding.h> 47#include <libxml/valid.h> 48#include <libxml/xmlIO.h> 49#include <libxml/uri.h> 50#include <libxml/dict.h> 51#include <libxml/SAX.h> 52#ifdef LIBXML_CATALOG_ENABLED 53#include <libxml/catalog.h> 54#endif 55#include <libxml/globals.h> 56#include <libxml/chvalid.h> 57 58/* 59 * Various global defaults for parsing 60 */ 61 62/** 63 * xmlCheckVersion: 64 * @version: the include version number 65 * 66 * check the compiled lib version against the include one. 67 * This can warn or immediately kill the application 68 */ 69void 70xmlCheckVersion(int version) { 71 int myversion = (int) LIBXML_VERSION; 72 73 xmlInitParser(); 74 75 if ((myversion / 10000) != (version / 10000)) { 76 xmlGenericError(xmlGenericErrorContext, 77 "Fatal: program compiled against libxml %d using libxml %d\n", 78 (version / 10000), (myversion / 10000)); 79 fprintf(stderr, 80 "Fatal: program compiled against libxml %d using libxml %d\n", 81 (version / 10000), (myversion / 10000)); 82 } 83 if ((myversion / 100) < (version / 100)) { 84 xmlGenericError(xmlGenericErrorContext, 85 "Warning: program compiled against libxml %d using older %d\n", 86 (version / 100), (myversion / 100)); 87 } 88} 89 90 91/************************************************************************ 92 * * 93 * Some factorized error routines * 94 * * 95 ************************************************************************/ 96 97 98/** 99 * xmlErrMemory: 100 * @ctxt: an XML parser context 101 * @extra: extra informations 102 * 103 * Handle a redefinition of attribute error 104 */ 105void 106xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra) 107{ 108 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 109 (ctxt->instate == XML_PARSER_EOF)) 110 return; 111 if (ctxt != NULL) { 112 ctxt->errNo = XML_ERR_NO_MEMORY; 113 ctxt->instate = XML_PARSER_EOF; 114 ctxt->disableSAX = 1; 115 } 116 if (extra) 117 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 118 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra, 119 NULL, NULL, 0, 0, 120 "Memory allocation failed : %s\n", extra); 121 else 122 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 123 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL, 124 NULL, NULL, 0, 0, "Memory allocation failed\n"); 125} 126 127/** 128 * __xmlErrEncoding: 129 * @ctxt: an XML parser context 130 * @xmlerr: the error number 131 * @msg: the error message 132 * @str1: an string info 133 * @str2: an string info 134 * 135 * Handle an encoding error 136 */ 137void 138__xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr, 139 const char *msg, const xmlChar * str1, const xmlChar * str2) 140{ 141 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 142 (ctxt->instate == XML_PARSER_EOF)) 143 return; 144 if (ctxt != NULL) 145 ctxt->errNo = xmlerr; 146 __xmlRaiseError(NULL, NULL, NULL, 147 ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL, 148 NULL, 0, (const char *) str1, (const char *) str2, 149 NULL, 0, 0, msg, str1, str2); 150 if (ctxt != NULL) { 151 ctxt->wellFormed = 0; 152 if (ctxt->recovery == 0) 153 ctxt->disableSAX = 1; 154 } 155} 156 157/** 158 * xmlErrInternal: 159 * @ctxt: an XML parser context 160 * @msg: the error message 161 * @str: error informations 162 * 163 * Handle an internal error 164 */ 165static void 166xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str) 167{ 168 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 169 (ctxt->instate == XML_PARSER_EOF)) 170 return; 171 if (ctxt != NULL) 172 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 173 __xmlRaiseError(NULL, NULL, NULL, 174 ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR, 175 XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL, 176 0, 0, msg, str); 177 if (ctxt != NULL) { 178 ctxt->wellFormed = 0; 179 if (ctxt->recovery == 0) 180 ctxt->disableSAX = 1; 181 } 182} 183 184/** 185 * xmlErrEncodingInt: 186 * @ctxt: an XML parser context 187 * @error: the error number 188 * @msg: the error message 189 * @val: an integer value 190 * 191 * n encoding error 192 */ 193static void 194xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 195 const char *msg, int val) 196{ 197 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 198 (ctxt->instate == XML_PARSER_EOF)) 199 return; 200 if (ctxt != NULL) 201 ctxt->errNo = error; 202 __xmlRaiseError(NULL, NULL, NULL, 203 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 204 NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 205 if (ctxt != NULL) { 206 ctxt->wellFormed = 0; 207 if (ctxt->recovery == 0) 208 ctxt->disableSAX = 1; 209 } 210} 211 212/** 213 * xmlIsLetter: 214 * @c: an unicode character (int) 215 * 216 * Check whether the character is allowed by the production 217 * [84] Letter ::= BaseChar | Ideographic 218 * 219 * Returns 0 if not, non-zero otherwise 220 */ 221int 222xmlIsLetter(int c) { 223 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c)); 224} 225 226/************************************************************************ 227 * * 228 * Input handling functions for progressive parsing * 229 * * 230 ************************************************************************/ 231 232/* #define DEBUG_INPUT */ 233/* #define DEBUG_STACK */ 234/* #define DEBUG_PUSH */ 235 236 237/* we need to keep enough input to show errors in context */ 238#define LINE_LEN 80 239 240#ifdef DEBUG_INPUT 241#define CHECK_BUFFER(in) check_buffer(in) 242 243static 244void check_buffer(xmlParserInputPtr in) { 245 if (in->base != in->buf->buffer->content) { 246 xmlGenericError(xmlGenericErrorContext, 247 "xmlParserInput: base mismatch problem\n"); 248 } 249 if (in->cur < in->base) { 250 xmlGenericError(xmlGenericErrorContext, 251 "xmlParserInput: cur < base problem\n"); 252 } 253 if (in->cur > in->base + in->buf->buffer->use) { 254 xmlGenericError(xmlGenericErrorContext, 255 "xmlParserInput: cur > base + use problem\n"); 256 } 257 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n", 258 (int) in, (int) in->buf->buffer->content, in->cur - in->base, 259 in->buf->buffer->use, in->buf->buffer->size); 260} 261 262#else 263#define CHECK_BUFFER(in) 264#endif 265 266 267/** 268 * xmlParserInputRead: 269 * @in: an XML parser input 270 * @len: an indicative size for the lookahead 271 * 272 * This function refresh the input for the parser. It doesn't try to 273 * preserve pointers to the input buffer, and discard already read data 274 * 275 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the 276 * end of this entity 277 */ 278int 279xmlParserInputRead(xmlParserInputPtr in, int len) { 280 int ret; 281 int used; 282 int indx; 283 284 if (in == NULL) return(-1); 285#ifdef DEBUG_INPUT 286 xmlGenericError(xmlGenericErrorContext, "Read\n"); 287#endif 288 if (in->buf == NULL) return(-1); 289 if (in->base == NULL) return(-1); 290 if (in->cur == NULL) return(-1); 291 if (in->buf->buffer == NULL) return(-1); 292 if (in->buf->readcallback == NULL) return(-1); 293 294 CHECK_BUFFER(in); 295 296 used = in->cur - in->buf->buffer->content; 297 ret = xmlBufferShrink(in->buf->buffer, used); 298 if (ret > 0) { 299 in->cur -= ret; 300 in->consumed += ret; 301 } 302 ret = xmlParserInputBufferRead(in->buf, len); 303 if (in->base != in->buf->buffer->content) { 304 /* 305 * the buffer has been reallocated 306 */ 307 indx = in->cur - in->base; 308 in->base = in->buf->buffer->content; 309 in->cur = &in->buf->buffer->content[indx]; 310 } 311 in->end = &in->buf->buffer->content[in->buf->buffer->use]; 312 313 CHECK_BUFFER(in); 314 315 return(ret); 316} 317 318/** 319 * xmlParserInputGrow: 320 * @in: an XML parser input 321 * @len: an indicative size for the lookahead 322 * 323 * This function increase the input for the parser. It tries to 324 * preserve pointers to the input buffer, and keep already read data 325 * 326 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the 327 * end of this entity 328 */ 329int 330xmlParserInputGrow(xmlParserInputPtr in, int len) { 331 int ret; 332 int indx; 333 334 if (in == NULL) return(-1); 335#ifdef DEBUG_INPUT 336 xmlGenericError(xmlGenericErrorContext, "Grow\n"); 337#endif 338 if (in->buf == NULL) return(-1); 339 if (in->base == NULL) return(-1); 340 if (in->cur == NULL) return(-1); 341 if (in->buf->buffer == NULL) return(-1); 342 343 CHECK_BUFFER(in); 344 345 indx = in->cur - in->base; 346 if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) { 347 348 CHECK_BUFFER(in); 349 350 return(0); 351 } 352 if (in->buf->readcallback != NULL) 353 ret = xmlParserInputBufferGrow(in->buf, len); 354 else 355 return(0); 356 357 /* 358 * NOTE : in->base may be a "dangling" i.e. freed pointer in this 359 * block, but we use it really as an integer to do some 360 * pointer arithmetic. Insure will raise it as a bug but in 361 * that specific case, that's not ! 362 */ 363 if (in->base != in->buf->buffer->content) { 364 /* 365 * the buffer has been reallocated 366 */ 367 indx = in->cur - in->base; 368 in->base = in->buf->buffer->content; 369 in->cur = &in->buf->buffer->content[indx]; 370 } 371 in->end = &in->buf->buffer->content[in->buf->buffer->use]; 372 373 CHECK_BUFFER(in); 374 375 return(ret); 376} 377 378/** 379 * xmlParserInputShrink: 380 * @in: an XML parser input 381 * 382 * This function removes used input for the parser. 383 */ 384void 385xmlParserInputShrink(xmlParserInputPtr in) { 386 int used; 387 int ret; 388 int indx; 389 390#ifdef DEBUG_INPUT 391 xmlGenericError(xmlGenericErrorContext, "Shrink\n"); 392#endif 393 if (in == NULL) return; 394 if (in->buf == NULL) return; 395 if (in->base == NULL) return; 396 if (in->cur == NULL) return; 397 if (in->buf->buffer == NULL) return; 398 399 CHECK_BUFFER(in); 400 401 used = in->cur - in->buf->buffer->content; 402 /* 403 * Do not shrink on large buffers whose only a tiny fraction 404 * was consumed 405 */ 406 if (used > INPUT_CHUNK) { 407 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN); 408 if (ret > 0) { 409 in->cur -= ret; 410 in->consumed += ret; 411 } 412 in->end = &in->buf->buffer->content[in->buf->buffer->use]; 413 } 414 415 CHECK_BUFFER(in); 416 417 if (in->buf->buffer->use > INPUT_CHUNK) { 418 return; 419 } 420 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK); 421 if (in->base != in->buf->buffer->content) { 422 /* 423 * the buffer has been reallocated 424 */ 425 indx = in->cur - in->base; 426 in->base = in->buf->buffer->content; 427 in->cur = &in->buf->buffer->content[indx]; 428 } 429 in->end = &in->buf->buffer->content[in->buf->buffer->use]; 430 431 CHECK_BUFFER(in); 432} 433 434/************************************************************************ 435 * * 436 * UTF8 character input and related functions * 437 * * 438 ************************************************************************/ 439 440/** 441 * xmlNextChar: 442 * @ctxt: the XML parser context 443 * 444 * Skip to the next char input char. 445 */ 446 447void 448xmlNextChar(xmlParserCtxtPtr ctxt) 449{ 450 if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) || 451 (ctxt->input == NULL)) 452 return; 453 454 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { 455 if ((*ctxt->input->cur == 0) && 456 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) && 457 (ctxt->instate != XML_PARSER_COMMENT)) { 458 /* 459 * If we are at the end of the current entity and 460 * the context allows it, we pop consumed entities 461 * automatically. 462 * the auto closing should be blocked in other cases 463 */ 464 xmlPopInput(ctxt); 465 } else { 466 const unsigned char *cur; 467 unsigned char c; 468 469 /* 470 * 2.11 End-of-Line Handling 471 * the literal two-character sequence "#xD#xA" or a standalone 472 * literal #xD, an XML processor must pass to the application 473 * the single character #xA. 474 */ 475 if (*(ctxt->input->cur) == '\n') { 476 ctxt->input->line++; ctxt->input->col = 1; 477 } else 478 ctxt->input->col++; 479 480 /* 481 * We are supposed to handle UTF8, check it's valid 482 * From rfc2044: encoding of the Unicode values on UTF-8: 483 * 484 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 485 * 0000 0000-0000 007F 0xxxxxxx 486 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 487 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 488 * 489 * Check for the 0x110000 limit too 490 */ 491 cur = ctxt->input->cur; 492 493 c = *cur; 494 if (c & 0x80) { 495 if (c == 0xC0) 496 goto encoding_error; 497 if (cur[1] == 0) 498 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 499 if ((cur[1] & 0xc0) != 0x80) 500 goto encoding_error; 501 if ((c & 0xe0) == 0xe0) { 502 unsigned int val; 503 504 if (cur[2] == 0) 505 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 506 if ((cur[2] & 0xc0) != 0x80) 507 goto encoding_error; 508 if ((c & 0xf0) == 0xf0) { 509 if (cur[3] == 0) 510 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 511 if (((c & 0xf8) != 0xf0) || 512 ((cur[3] & 0xc0) != 0x80)) 513 goto encoding_error; 514 /* 4-byte code */ 515 ctxt->input->cur += 4; 516 val = (cur[0] & 0x7) << 18; 517 val |= (cur[1] & 0x3f) << 12; 518 val |= (cur[2] & 0x3f) << 6; 519 val |= cur[3] & 0x3f; 520 } else { 521 /* 3-byte code */ 522 ctxt->input->cur += 3; 523 val = (cur[0] & 0xf) << 12; 524 val |= (cur[1] & 0x3f) << 6; 525 val |= cur[2] & 0x3f; 526 } 527 if (((val > 0xd7ff) && (val < 0xe000)) || 528 ((val > 0xfffd) && (val < 0x10000)) || 529 (val >= 0x110000)) { 530 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 531 "Char 0x%X out of allowed range\n", 532 val); 533 } 534 } else 535 /* 2-byte code */ 536 ctxt->input->cur += 2; 537 } else 538 /* 1-byte code */ 539 ctxt->input->cur++; 540 541 ctxt->nbChars++; 542 if (*ctxt->input->cur == 0) 543 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 544 } 545 } else { 546 /* 547 * Assume it's a fixed length encoding (1) with 548 * a compatible encoding for the ASCII set, since 549 * XML constructs only use < 128 chars 550 */ 551 552 if (*(ctxt->input->cur) == '\n') { 553 ctxt->input->line++; ctxt->input->col = 1; 554 } else 555 ctxt->input->col++; 556 ctxt->input->cur++; 557 ctxt->nbChars++; 558 if (*ctxt->input->cur == 0) 559 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 560 } 561 if ((*ctxt->input->cur == '%') && (!ctxt->html)) 562 xmlParserHandlePEReference(ctxt); 563 if ((*ctxt->input->cur == 0) && 564 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 565 xmlPopInput(ctxt); 566 return; 567encoding_error: 568 /* 569 * If we detect an UTF8 error that probably mean that the 570 * input encoding didn't get properly advertised in the 571 * declaration header. Report the error and switch the encoding 572 * to ISO-Latin-1 (if you don't like this policy, just declare the 573 * encoding !) 574 */ 575 if ((ctxt == NULL) || (ctxt->input == NULL) || 576 (ctxt->input->end - ctxt->input->cur < 4)) { 577 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 578 "Input is not proper UTF-8, indicate encoding !\n", 579 NULL, NULL); 580 } else { 581 char buffer[150]; 582 583 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 584 ctxt->input->cur[0], ctxt->input->cur[1], 585 ctxt->input->cur[2], ctxt->input->cur[3]); 586 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 587 "Input is not proper UTF-8, indicate encoding !\n%s", 588 BAD_CAST buffer, NULL); 589 } 590 ctxt->charset = XML_CHAR_ENCODING_8859_1; 591 ctxt->input->cur++; 592 return; 593} 594 595/** 596 * xmlCurrentChar: 597 * @ctxt: the XML parser context 598 * @len: pointer to the length of the char read 599 * 600 * The current char value, if using UTF-8 this may actually span multiple 601 * bytes in the input buffer. Implement the end of line normalization: 602 * 2.11 End-of-Line Handling 603 * Wherever an external parsed entity or the literal entity value 604 * of an internal parsed entity contains either the literal two-character 605 * sequence "#xD#xA" or a standalone literal #xD, an XML processor 606 * must pass to the application the single character #xA. 607 * This behavior can conveniently be produced by normalizing all 608 * line breaks to #xA on input, before parsing.) 609 * 610 * Returns the current char value and its length 611 */ 612 613int 614xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { 615 if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0); 616 if (ctxt->instate == XML_PARSER_EOF) 617 return(0); 618 619 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) { 620 *len = 1; 621 return((int) *ctxt->input->cur); 622 } 623 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { 624 /* 625 * We are supposed to handle UTF8, check it's valid 626 * From rfc2044: encoding of the Unicode values on UTF-8: 627 * 628 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 629 * 0000 0000-0000 007F 0xxxxxxx 630 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 631 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 632 * 633 * Check for the 0x110000 limit too 634 */ 635 const unsigned char *cur = ctxt->input->cur; 636 unsigned char c; 637 unsigned int val; 638 639 c = *cur; 640 if (c & 0x80) { 641 if (c == 0xC0) 642 goto encoding_error; 643 if (cur[1] == 0) 644 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 645 if ((cur[1] & 0xc0) != 0x80) 646 goto encoding_error; 647 if ((c & 0xe0) == 0xe0) { 648 649 if (cur[2] == 0) 650 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 651 if ((cur[2] & 0xc0) != 0x80) 652 goto encoding_error; 653 if ((c & 0xf0) == 0xf0) { 654 if (cur[3] == 0) 655 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 656 if (((c & 0xf8) != 0xf0) || 657 ((cur[3] & 0xc0) != 0x80)) 658 goto encoding_error; 659 /* 4-byte code */ 660 *len = 4; 661 val = (cur[0] & 0x7) << 18; 662 val |= (cur[1] & 0x3f) << 12; 663 val |= (cur[2] & 0x3f) << 6; 664 val |= cur[3] & 0x3f; 665 } else { 666 /* 3-byte code */ 667 *len = 3; 668 val = (cur[0] & 0xf) << 12; 669 val |= (cur[1] & 0x3f) << 6; 670 val |= cur[2] & 0x3f; 671 } 672 } else { 673 /* 2-byte code */ 674 *len = 2; 675 val = (cur[0] & 0x1f) << 6; 676 val |= cur[1] & 0x3f; 677 } 678 if (!IS_CHAR(val)) { 679 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 680 "Char 0x%X out of allowed range\n", val); 681 } 682 return(val); 683 } else { 684 /* 1-byte code */ 685 *len = 1; 686 if (*ctxt->input->cur == 0xD) { 687 if (ctxt->input->cur[1] == 0xA) { 688 ctxt->nbChars++; 689 ctxt->input->cur++; 690 } 691 return(0xA); 692 } 693 return((int) *ctxt->input->cur); 694 } 695 } 696 /* 697 * Assume it's a fixed length encoding (1) with 698 * a compatible encoding for the ASCII set, since 699 * XML constructs only use < 128 chars 700 */ 701 *len = 1; 702 if (*ctxt->input->cur == 0xD) { 703 if (ctxt->input->cur[1] == 0xA) { 704 ctxt->nbChars++; 705 ctxt->input->cur++; 706 } 707 return(0xA); 708 } 709 return((int) *ctxt->input->cur); 710encoding_error: 711 /* 712 * An encoding problem may arise from a truncated input buffer 713 * splitting a character in the middle. In that case do not raise 714 * an error but return 0 to endicate an end of stream problem 715 */ 716 if (ctxt->input->end - ctxt->input->cur < 4) { 717 *len = 0; 718 return(0); 719 } 720 721 /* 722 * If we detect an UTF8 error that probably mean that the 723 * input encoding didn't get properly advertised in the 724 * declaration header. Report the error and switch the encoding 725 * to ISO-Latin-1 (if you don't like this policy, just declare the 726 * encoding !) 727 */ 728 { 729 char buffer[150]; 730 731 snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 732 ctxt->input->cur[0], ctxt->input->cur[1], 733 ctxt->input->cur[2], ctxt->input->cur[3]); 734 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 735 "Input is not proper UTF-8, indicate encoding !\n%s", 736 BAD_CAST buffer, NULL); 737 } 738 ctxt->charset = XML_CHAR_ENCODING_8859_1; 739 *len = 1; 740 return((int) *ctxt->input->cur); 741} 742 743/** 744 * xmlStringCurrentChar: 745 * @ctxt: the XML parser context 746 * @cur: pointer to the beginning of the char 747 * @len: pointer to the length of the char read 748 * 749 * The current char value, if using UTF-8 this may actually span multiple 750 * bytes in the input buffer. 751 * 752 * Returns the current char value and its length 753 */ 754 755int 756xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len) 757{ 758 if ((len == NULL) || (cur == NULL)) return(0); 759 if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) { 760 /* 761 * We are supposed to handle UTF8, check it's valid 762 * From rfc2044: encoding of the Unicode values on UTF-8: 763 * 764 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 765 * 0000 0000-0000 007F 0xxxxxxx 766 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 767 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 768 * 769 * Check for the 0x110000 limit too 770 */ 771 unsigned char c; 772 unsigned int val; 773 774 c = *cur; 775 if (c & 0x80) { 776 if ((cur[1] & 0xc0) != 0x80) 777 goto encoding_error; 778 if ((c & 0xe0) == 0xe0) { 779 780 if ((cur[2] & 0xc0) != 0x80) 781 goto encoding_error; 782 if ((c & 0xf0) == 0xf0) { 783 if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80)) 784 goto encoding_error; 785 /* 4-byte code */ 786 *len = 4; 787 val = (cur[0] & 0x7) << 18; 788 val |= (cur[1] & 0x3f) << 12; 789 val |= (cur[2] & 0x3f) << 6; 790 val |= cur[3] & 0x3f; 791 } else { 792 /* 3-byte code */ 793 *len = 3; 794 val = (cur[0] & 0xf) << 12; 795 val |= (cur[1] & 0x3f) << 6; 796 val |= cur[2] & 0x3f; 797 } 798 } else { 799 /* 2-byte code */ 800 *len = 2; 801 val = (cur[0] & 0x1f) << 6; 802 val |= cur[1] & 0x3f; 803 } 804 if (!IS_CHAR(val)) { 805 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 806 "Char 0x%X out of allowed range\n", val); 807 } 808 return (val); 809 } else { 810 /* 1-byte code */ 811 *len = 1; 812 return ((int) *cur); 813 } 814 } 815 /* 816 * Assume it's a fixed length encoding (1) with 817 * a compatible encoding for the ASCII set, since 818 * XML constructs only use < 128 chars 819 */ 820 *len = 1; 821 return ((int) *cur); 822encoding_error: 823 824 /* 825 * An encoding problem may arise from a truncated input buffer 826 * splitting a character in the middle. In that case do not raise 827 * an error but return 0 to endicate an end of stream problem 828 */ 829 if ((ctxt == NULL) || (ctxt->input == NULL) || 830 (ctxt->input->end - ctxt->input->cur < 4)) { 831 *len = 0; 832 return(0); 833 } 834 /* 835 * If we detect an UTF8 error that probably mean that the 836 * input encoding didn't get properly advertised in the 837 * declaration header. Report the error and switch the encoding 838 * to ISO-Latin-1 (if you don't like this policy, just declare the 839 * encoding !) 840 */ 841 { 842 char buffer[150]; 843 844 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 845 ctxt->input->cur[0], ctxt->input->cur[1], 846 ctxt->input->cur[2], ctxt->input->cur[3]); 847 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 848 "Input is not proper UTF-8, indicate encoding !\n%s", 849 BAD_CAST buffer, NULL); 850 } 851 *len = 1; 852 return ((int) *cur); 853} 854 855/** 856 * xmlCopyCharMultiByte: 857 * @out: pointer to an array of xmlChar 858 * @val: the char value 859 * 860 * append the char value in the array 861 * 862 * Returns the number of xmlChar written 863 */ 864int 865xmlCopyCharMultiByte(xmlChar *out, int val) { 866 if (out == NULL) return(0); 867 /* 868 * We are supposed to handle UTF8, check it's valid 869 * From rfc2044: encoding of the Unicode values on UTF-8: 870 * 871 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 872 * 0000 0000-0000 007F 0xxxxxxx 873 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 874 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 875 */ 876 if (val >= 0x80) { 877 xmlChar *savedout = out; 878 int bits; 879 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; } 880 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;} 881 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; } 882 else { 883 xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR, 884 "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n", 885 val); 886 return(0); 887 } 888 for ( ; bits >= 0; bits-= 6) 889 *out++= ((val >> bits) & 0x3F) | 0x80 ; 890 return (out - savedout); 891 } 892 *out = (xmlChar) val; 893 return 1; 894} 895 896/** 897 * xmlCopyChar: 898 * @len: Ignored, compatibility 899 * @out: pointer to an array of xmlChar 900 * @val: the char value 901 * 902 * append the char value in the array 903 * 904 * Returns the number of xmlChar written 905 */ 906 907int 908xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) { 909 if (out == NULL) return(0); 910 /* the len parameter is ignored */ 911 if (val >= 0x80) { 912 return(xmlCopyCharMultiByte (out, val)); 913 } 914 *out = (xmlChar) val; 915 return 1; 916} 917 918/************************************************************************ 919 * * 920 * Commodity functions to switch encodings * 921 * * 922 ************************************************************************/ 923 924/** 925 * xmlSwitchEncoding: 926 * @ctxt: the parser context 927 * @enc: the encoding value (number) 928 * 929 * change the input functions when discovering the character encoding 930 * of a given entity. 931 * 932 * Returns 0 in case of success, -1 otherwise 933 */ 934int 935xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) 936{ 937 xmlCharEncodingHandlerPtr handler; 938 939 if (ctxt == NULL) return(-1); 940 switch (enc) { 941 case XML_CHAR_ENCODING_ERROR: 942 __xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING, 943 "encoding unknown\n", NULL, NULL); 944 return(-1); 945 case XML_CHAR_ENCODING_NONE: 946 /* let's assume it's UTF-8 without the XML decl */ 947 ctxt->charset = XML_CHAR_ENCODING_UTF8; 948 return(0); 949 case XML_CHAR_ENCODING_UTF8: 950 /* default encoding, no conversion should be needed */ 951 ctxt->charset = XML_CHAR_ENCODING_UTF8; 952 953 /* 954 * Errata on XML-1.0 June 20 2001 955 * Specific handling of the Byte Order Mark for 956 * UTF-8 957 */ 958 if ((ctxt->input != NULL) && 959 (ctxt->input->cur[0] == 0xEF) && 960 (ctxt->input->cur[1] == 0xBB) && 961 (ctxt->input->cur[2] == 0xBF)) { 962 ctxt->input->cur += 3; 963 } 964 return(0); 965 case XML_CHAR_ENCODING_UTF16LE: 966 case XML_CHAR_ENCODING_UTF16BE: 967 /*The raw input characters are encoded 968 *in UTF-16. As we expect this function 969 *to be called after xmlCharEncInFunc, we expect 970 *ctxt->input->cur to contain UTF-8 encoded characters. 971 *So the raw UTF16 Byte Order Mark 972 *has also been converted into 973 *an UTF-8 BOM. Let's skip that BOM. 974 */ 975 if ((ctxt->input != NULL) && (ctxt->input->cur != NULL) && 976 (ctxt->input->cur[0] == 0xEF) && 977 (ctxt->input->cur[1] == 0xBB) && 978 (ctxt->input->cur[2] == 0xBF)) { 979 ctxt->input->cur += 3; 980 } 981 break ; 982 default: 983 break; 984 } 985 handler = xmlGetCharEncodingHandler(enc); 986 if (handler == NULL) { 987 /* 988 * Default handlers. 989 */ 990 switch (enc) { 991 case XML_CHAR_ENCODING_ASCII: 992 /* default encoding, no conversion should be needed */ 993 ctxt->charset = XML_CHAR_ENCODING_UTF8; 994 return(0); 995 case XML_CHAR_ENCODING_UTF16LE: 996 break; 997 case XML_CHAR_ENCODING_UTF16BE: 998 break; 999 case XML_CHAR_ENCODING_UCS4LE: 1000 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1001 "encoding not supported %s\n", 1002 BAD_CAST "USC4 little endian", NULL); 1003 break; 1004 case XML_CHAR_ENCODING_UCS4BE: 1005 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1006 "encoding not supported %s\n", 1007 BAD_CAST "USC4 big endian", NULL); 1008 break; 1009 case XML_CHAR_ENCODING_EBCDIC: 1010 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1011 "encoding not supported %s\n", 1012 BAD_CAST "EBCDIC", NULL); 1013 break; 1014 case XML_CHAR_ENCODING_UCS4_2143: 1015 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1016 "encoding not supported %s\n", 1017 BAD_CAST "UCS4 2143", NULL); 1018 break; 1019 case XML_CHAR_ENCODING_UCS4_3412: 1020 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1021 "encoding not supported %s\n", 1022 BAD_CAST "UCS4 3412", NULL); 1023 break; 1024 case XML_CHAR_ENCODING_UCS2: 1025 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1026 "encoding not supported %s\n", 1027 BAD_CAST "UCS2", NULL); 1028 break; 1029 case XML_CHAR_ENCODING_8859_1: 1030 case XML_CHAR_ENCODING_8859_2: 1031 case XML_CHAR_ENCODING_8859_3: 1032 case XML_CHAR_ENCODING_8859_4: 1033 case XML_CHAR_ENCODING_8859_5: 1034 case XML_CHAR_ENCODING_8859_6: 1035 case XML_CHAR_ENCODING_8859_7: 1036 case XML_CHAR_ENCODING_8859_8: 1037 case XML_CHAR_ENCODING_8859_9: 1038 /* 1039 * We used to keep the internal content in the 1040 * document encoding however this turns being unmaintainable 1041 * So xmlGetCharEncodingHandler() will return non-null 1042 * values for this now. 1043 */ 1044 if ((ctxt->inputNr == 1) && 1045 (ctxt->encoding == NULL) && 1046 (ctxt->input != NULL) && 1047 (ctxt->input->encoding != NULL)) { 1048 ctxt->encoding = xmlStrdup(ctxt->input->encoding); 1049 } 1050 ctxt->charset = enc; 1051 return(0); 1052 case XML_CHAR_ENCODING_2022_JP: 1053 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1054 "encoding not supported %s\n", 1055 BAD_CAST "ISO-2022-JP", NULL); 1056 break; 1057 case XML_CHAR_ENCODING_SHIFT_JIS: 1058 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1059 "encoding not supported %s\n", 1060 BAD_CAST "Shift_JIS", NULL); 1061 break; 1062 case XML_CHAR_ENCODING_EUC_JP: 1063 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 1064 "encoding not supported %s\n", 1065 BAD_CAST "EUC-JP", NULL); 1066 break; 1067 default: 1068 break; 1069 } 1070 } 1071 if (handler == NULL) 1072 return(-1); 1073 ctxt->charset = XML_CHAR_ENCODING_UTF8; 1074 return(xmlSwitchToEncoding(ctxt, handler)); 1075} 1076 1077/** 1078 * xmlSwitchInputEncoding: 1079 * @ctxt: the parser context 1080 * @input: the input stream 1081 * @handler: the encoding handler 1082 * 1083 * change the input functions when discovering the character encoding 1084 * of a given entity. 1085 * 1086 * Returns 0 in case of success, -1 otherwise 1087 */ 1088int 1089xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, 1090 xmlCharEncodingHandlerPtr handler) 1091{ 1092 int nbchars; 1093 1094 if (handler == NULL) 1095 return (-1); 1096 if (input == NULL) 1097 return (-1); 1098 if (input->buf != NULL) { 1099 if (input->buf->encoder != NULL) { 1100 /* 1101 * Check in case the auto encoding detetection triggered 1102 * in already. 1103 */ 1104 if (input->buf->encoder == handler) 1105 return (0); 1106 1107 /* 1108 * "UTF-16" can be used for both LE and BE 1109 if ((!xmlStrncmp(BAD_CAST input->buf->encoder->name, 1110 BAD_CAST "UTF-16", 6)) && 1111 (!xmlStrncmp(BAD_CAST handler->name, 1112 BAD_CAST "UTF-16", 6))) { 1113 return(0); 1114 } 1115 */ 1116 1117 /* 1118 * Note: this is a bit dangerous, but that's what it 1119 * takes to use nearly compatible signature for different 1120 * encodings. 1121 */ 1122 xmlCharEncCloseFunc(input->buf->encoder); 1123 input->buf->encoder = handler; 1124 return (0); 1125 } 1126 input->buf->encoder = handler; 1127 1128 /* 1129 * Is there already some content down the pipe to convert ? 1130 */ 1131 if ((input->buf->buffer != NULL) && (input->buf->buffer->use > 0)) { 1132 int processed; 1133 unsigned int use; 1134 1135 /* 1136 * Specific handling of the Byte Order Mark for 1137 * UTF-16 1138 */ 1139 if ((handler->name != NULL) && 1140 (!strcmp(handler->name, "UTF-16LE") || 1141 !strcmp(handler->name, "UTF-16")) && 1142 (input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) { 1143 input->cur += 2; 1144 } 1145 if ((handler->name != NULL) && 1146 (!strcmp(handler->name, "UTF-16BE")) && 1147 (input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) { 1148 input->cur += 2; 1149 } 1150 /* 1151 * Errata on XML-1.0 June 20 2001 1152 * Specific handling of the Byte Order Mark for 1153 * UTF-8 1154 */ 1155 if ((handler->name != NULL) && 1156 (!strcmp(handler->name, "UTF-8")) && 1157 (input->cur[0] == 0xEF) && 1158 (input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) { 1159 input->cur += 3; 1160 } 1161 1162 /* 1163 * Shrink the current input buffer. 1164 * Move it as the raw buffer and create a new input buffer 1165 */ 1166 processed = input->cur - input->base; 1167 xmlBufferShrink(input->buf->buffer, processed); 1168 input->buf->raw = input->buf->buffer; 1169 input->buf->buffer = xmlBufferCreate(); 1170 input->buf->rawconsumed = processed; 1171 use = input->buf->raw->use; 1172 1173 if (ctxt->html) { 1174 /* 1175 * convert as much as possible of the buffer 1176 */ 1177 nbchars = xmlCharEncInFunc(input->buf->encoder, 1178 input->buf->buffer, 1179 input->buf->raw); 1180 } else { 1181 /* 1182 * convert just enough to get 1183 * '<?xml version="1.0" encoding="xxx"?>' 1184 * parsed with the autodetected encoding 1185 * into the parser reading buffer. 1186 */ 1187 nbchars = xmlCharEncFirstLine(input->buf->encoder, 1188 input->buf->buffer, 1189 input->buf->raw); 1190 } 1191 if (nbchars < 0) { 1192 xmlErrInternal(ctxt, 1193 "switching encoding: encoder error\n", 1194 NULL); 1195 return (-1); 1196 } 1197 input->buf->rawconsumed += use - input->buf->raw->use; 1198 input->base = input->cur = input->buf->buffer->content; 1199 input->end = &input->base[input->buf->buffer->use]; 1200 1201 } 1202 return (0); 1203 } else if (input->length == 0) { 1204 /* 1205 * When parsing a static memory array one must know the 1206 * size to be able to convert the buffer. 1207 */ 1208 xmlErrInternal(ctxt, "switching encoding : no input\n", NULL); 1209 return (-1); 1210 } 1211 return (0); 1212} 1213 1214/** 1215 * xmlSwitchToEncoding: 1216 * @ctxt: the parser context 1217 * @handler: the encoding handler 1218 * 1219 * change the input functions when discovering the character encoding 1220 * of a given entity. 1221 * 1222 * Returns 0 in case of success, -1 otherwise 1223 */ 1224int 1225xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) 1226{ 1227 int ret = 0; 1228 1229 if (handler != NULL) { 1230 if (ctxt->input != NULL) { 1231 ret = xmlSwitchInputEncoding(ctxt, ctxt->input, handler); 1232 } else { 1233 xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n", 1234 NULL); 1235 return(-1); 1236 } 1237 /* 1238 * The parsing is now done in UTF8 natively 1239 */ 1240 ctxt->charset = XML_CHAR_ENCODING_UTF8; 1241 } else 1242 return(-1); 1243 return(ret); 1244} 1245 1246/************************************************************************ 1247 * * 1248 * Commodity functions to handle entities processing * 1249 * * 1250 ************************************************************************/ 1251 1252/** 1253 * xmlFreeInputStream: 1254 * @input: an xmlParserInputPtr 1255 * 1256 * Free up an input stream. 1257 */ 1258void 1259xmlFreeInputStream(xmlParserInputPtr input) { 1260 if (input == NULL) return; 1261 1262 if (input->filename != NULL) xmlFree((char *) input->filename); 1263 if (input->directory != NULL) xmlFree((char *) input->directory); 1264 if (input->encoding != NULL) xmlFree((char *) input->encoding); 1265 if (input->version != NULL) xmlFree((char *) input->version); 1266 if ((input->free != NULL) && (input->base != NULL)) 1267 input->free((xmlChar *) input->base); 1268 if (input->buf != NULL) 1269 xmlFreeParserInputBuffer(input->buf); 1270 xmlFree(input); 1271} 1272 1273/** 1274 * xmlNewInputStream: 1275 * @ctxt: an XML parser context 1276 * 1277 * Create a new input stream structure 1278 * Returns the new input stream or NULL 1279 */ 1280xmlParserInputPtr 1281xmlNewInputStream(xmlParserCtxtPtr ctxt) { 1282 xmlParserInputPtr input; 1283 static int id = 0; 1284 1285 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput)); 1286 if (input == NULL) { 1287 xmlErrMemory(ctxt, "couldn't allocate a new input stream\n"); 1288 return(NULL); 1289 } 1290 memset(input, 0, sizeof(xmlParserInput)); 1291 input->line = 1; 1292 input->col = 1; 1293 input->standalone = -1; 1294 /* 1295 * we don't care about thread reentrancy unicity for a single 1296 * parser context (and hence thread) is sufficient. 1297 */ 1298 input->id = id++; 1299 return(input); 1300} 1301 1302/** 1303 * xmlNewIOInputStream: 1304 * @ctxt: an XML parser context 1305 * @input: an I/O Input 1306 * @enc: the charset encoding if known 1307 * 1308 * Create a new input stream structure encapsulating the @input into 1309 * a stream suitable for the parser. 1310 * 1311 * Returns the new input stream or NULL 1312 */ 1313xmlParserInputPtr 1314xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input, 1315 xmlCharEncoding enc) { 1316 xmlParserInputPtr inputStream; 1317 1318 if (input == NULL) return(NULL); 1319 if (xmlParserDebugEntities) 1320 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n"); 1321 inputStream = xmlNewInputStream(ctxt); 1322 if (inputStream == NULL) { 1323 return(NULL); 1324 } 1325 inputStream->filename = NULL; 1326 inputStream->buf = input; 1327 inputStream->base = inputStream->buf->buffer->content; 1328 inputStream->cur = inputStream->buf->buffer->content; 1329 inputStream->end = &inputStream->base[inputStream->buf->buffer->use]; 1330 if (enc != XML_CHAR_ENCODING_NONE) { 1331 xmlSwitchEncoding(ctxt, enc); 1332 } 1333 1334 return(inputStream); 1335} 1336 1337/** 1338 * xmlNewEntityInputStream: 1339 * @ctxt: an XML parser context 1340 * @entity: an Entity pointer 1341 * 1342 * Create a new input stream based on an xmlEntityPtr 1343 * 1344 * Returns the new input stream or NULL 1345 */ 1346xmlParserInputPtr 1347xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 1348 xmlParserInputPtr input; 1349 1350 if (entity == NULL) { 1351 xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n", 1352 NULL); 1353 return(NULL); 1354 } 1355 if (xmlParserDebugEntities) 1356 xmlGenericError(xmlGenericErrorContext, 1357 "new input from entity: %s\n", entity->name); 1358 if (entity->content == NULL) { 1359 switch (entity->etype) { 1360 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: 1361 xmlErrInternal(ctxt, "Cannot parse entity %s\n", 1362 entity->name); 1363 break; 1364 case XML_EXTERNAL_GENERAL_PARSED_ENTITY: 1365 case XML_EXTERNAL_PARAMETER_ENTITY: 1366 return(xmlLoadExternalEntity((char *) entity->URI, 1367 (char *) entity->ExternalID, ctxt)); 1368 case XML_INTERNAL_GENERAL_ENTITY: 1369 xmlErrInternal(ctxt, 1370 "Internal entity %s without content !\n", 1371 entity->name); 1372 break; 1373 case XML_INTERNAL_PARAMETER_ENTITY: 1374 xmlErrInternal(ctxt, 1375 "Internal parameter entity %s without content !\n", 1376 entity->name); 1377 break; 1378 case XML_INTERNAL_PREDEFINED_ENTITY: 1379 xmlErrInternal(ctxt, 1380 "Predefined entity %s without content !\n", 1381 entity->name); 1382 break; 1383 } 1384 return(NULL); 1385 } 1386 input = xmlNewInputStream(ctxt); 1387 if (input == NULL) { 1388 return(NULL); 1389 } 1390 input->filename = (char *) entity->URI; 1391 input->base = entity->content; 1392 input->cur = entity->content; 1393 input->length = entity->length; 1394 input->end = &entity->content[input->length]; 1395 return(input); 1396} 1397 1398/** 1399 * xmlNewStringInputStream: 1400 * @ctxt: an XML parser context 1401 * @buffer: an memory buffer 1402 * 1403 * Create a new input stream based on a memory buffer. 1404 * Returns the new input stream 1405 */ 1406xmlParserInputPtr 1407xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) { 1408 xmlParserInputPtr input; 1409 1410 if (buffer == NULL) { 1411 xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n", 1412 NULL); 1413 return(NULL); 1414 } 1415 if (xmlParserDebugEntities) 1416 xmlGenericError(xmlGenericErrorContext, 1417 "new fixed input: %.30s\n", buffer); 1418 input = xmlNewInputStream(ctxt); 1419 if (input == NULL) { 1420 xmlErrMemory(ctxt, "couldn't allocate a new input stream\n"); 1421 return(NULL); 1422 } 1423 input->base = buffer; 1424 input->cur = buffer; 1425 input->length = xmlStrlen(buffer); 1426 input->end = &buffer[input->length]; 1427 return(input); 1428} 1429 1430/** 1431 * xmlNewInputFromFile: 1432 * @ctxt: an XML parser context 1433 * @filename: the filename to use as entity 1434 * 1435 * Create a new input stream based on a file or an URL. 1436 * 1437 * Returns the new input stream or NULL in case of error 1438 */ 1439xmlParserInputPtr 1440xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) { 1441 xmlParserInputBufferPtr buf; 1442 xmlParserInputPtr inputStream; 1443 char *directory = NULL; 1444 xmlChar *URI = NULL; 1445 1446 if (xmlParserDebugEntities) 1447 xmlGenericError(xmlGenericErrorContext, 1448 "new input from file: %s\n", filename); 1449 if (ctxt == NULL) return(NULL); 1450 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE); 1451 if (buf == NULL) { 1452 if (filename == NULL) 1453 __xmlLoaderErr(ctxt, 1454 "failed to load external entity: NULL filename \n", 1455 NULL); 1456 else 1457 __xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n", 1458 (const char *) filename); 1459 return(NULL); 1460 } 1461 1462 inputStream = xmlNewInputStream(ctxt); 1463 if (inputStream == NULL) 1464 return(NULL); 1465 1466 inputStream->buf = buf; 1467 inputStream = xmlCheckHTTPInput(ctxt, inputStream); 1468 if (inputStream == NULL) 1469 return(NULL); 1470 1471 if (inputStream->filename == NULL) 1472 URI = xmlStrdup((xmlChar *) filename); 1473 else 1474 URI = xmlStrdup((xmlChar *) inputStream->filename); 1475 directory = xmlParserGetDirectory((const char *) URI); 1476 if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename); 1477 inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI); 1478 if (URI != NULL) xmlFree((char *) URI); 1479 inputStream->directory = directory; 1480 1481 inputStream->base = inputStream->buf->buffer->content; 1482 inputStream->cur = inputStream->buf->buffer->content; 1483 inputStream->end = &inputStream->base[inputStream->buf->buffer->use]; 1484 if ((ctxt->directory == NULL) && (directory != NULL)) 1485 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory); 1486 return(inputStream); 1487} 1488 1489/************************************************************************ 1490 * * 1491 * Commodity functions to handle parser contexts * 1492 * * 1493 ************************************************************************/ 1494 1495/** 1496 * xmlInitParserCtxt: 1497 * @ctxt: an XML parser context 1498 * 1499 * Initialize a parser context 1500 * 1501 * Returns 0 in case of success and -1 in case of error 1502 */ 1503 1504int 1505xmlInitParserCtxt(xmlParserCtxtPtr ctxt) 1506{ 1507 xmlParserInputPtr input; 1508 1509 if(ctxt==NULL) { 1510 xmlErrInternal(NULL, "Got NULL parser context\n", NULL); 1511 return(-1); 1512 } 1513 1514 xmlDefaultSAXHandlerInit(); 1515 1516 if (ctxt->dict == NULL) 1517 ctxt->dict = xmlDictCreate(); 1518 if (ctxt->dict == NULL) { 1519 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1520 return(-1); 1521 } 1522 if (ctxt->sax == NULL) 1523 ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler)); 1524 if (ctxt->sax == NULL) { 1525 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1526 return(-1); 1527 } 1528 else 1529 xmlSAXVersion(ctxt->sax, 2); 1530 1531 ctxt->maxatts = 0; 1532 ctxt->atts = NULL; 1533 /* Allocate the Input stack */ 1534 if (ctxt->inputTab == NULL) { 1535 ctxt->inputTab = (xmlParserInputPtr *) 1536 xmlMalloc(5 * sizeof(xmlParserInputPtr)); 1537 ctxt->inputMax = 5; 1538 } 1539 if (ctxt->inputTab == NULL) { 1540 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1541 ctxt->inputNr = 0; 1542 ctxt->inputMax = 0; 1543 ctxt->input = NULL; 1544 return(-1); 1545 } 1546 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 1547 xmlFreeInputStream(input); 1548 } 1549 ctxt->inputNr = 0; 1550 ctxt->input = NULL; 1551 1552 ctxt->version = NULL; 1553 ctxt->encoding = NULL; 1554 ctxt->standalone = -1; 1555 ctxt->hasExternalSubset = 0; 1556 ctxt->hasPErefs = 0; 1557 ctxt->html = 0; 1558 ctxt->external = 0; 1559 ctxt->instate = XML_PARSER_START; 1560 ctxt->token = 0; 1561 ctxt->directory = NULL; 1562 1563 /* Allocate the Node stack */ 1564 if (ctxt->nodeTab == NULL) { 1565 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr)); 1566 ctxt->nodeMax = 10; 1567 } 1568 if (ctxt->nodeTab == NULL) { 1569 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1570 ctxt->nodeNr = 0; 1571 ctxt->nodeMax = 0; 1572 ctxt->node = NULL; 1573 ctxt->inputNr = 0; 1574 ctxt->inputMax = 0; 1575 ctxt->input = NULL; 1576 return(-1); 1577 } 1578 ctxt->nodeNr = 0; 1579 ctxt->node = NULL; 1580 1581 /* Allocate the Name stack */ 1582 if (ctxt->nameTab == NULL) { 1583 ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *)); 1584 ctxt->nameMax = 10; 1585 } 1586 if (ctxt->nameTab == NULL) { 1587 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1588 ctxt->nodeNr = 0; 1589 ctxt->nodeMax = 0; 1590 ctxt->node = NULL; 1591 ctxt->inputNr = 0; 1592 ctxt->inputMax = 0; 1593 ctxt->input = NULL; 1594 ctxt->nameNr = 0; 1595 ctxt->nameMax = 0; 1596 ctxt->name = NULL; 1597 return(-1); 1598 } 1599 ctxt->nameNr = 0; 1600 ctxt->name = NULL; 1601 1602 /* Allocate the space stack */ 1603 if (ctxt->spaceTab == NULL) { 1604 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int)); 1605 ctxt->spaceMax = 10; 1606 } 1607 if (ctxt->spaceTab == NULL) { 1608 xmlErrMemory(NULL, "cannot initialize parser context\n"); 1609 ctxt->nodeNr = 0; 1610 ctxt->nodeMax = 0; 1611 ctxt->node = NULL; 1612 ctxt->inputNr = 0; 1613 ctxt->inputMax = 0; 1614 ctxt->input = NULL; 1615 ctxt->nameNr = 0; 1616 ctxt->nameMax = 0; 1617 ctxt->name = NULL; 1618 ctxt->spaceNr = 0; 1619 ctxt->spaceMax = 0; 1620 ctxt->space = NULL; 1621 return(-1); 1622 } 1623 ctxt->spaceNr = 1; 1624 ctxt->spaceMax = 10; 1625 ctxt->spaceTab[0] = -1; 1626 ctxt->space = &ctxt->spaceTab[0]; 1627 ctxt->userData = ctxt; 1628 ctxt->myDoc = NULL; 1629 ctxt->wellFormed = 1; 1630 ctxt->nsWellFormed = 1; 1631 ctxt->valid = 1; 1632 ctxt->loadsubset = xmlLoadExtDtdDefaultValue; 1633 ctxt->validate = xmlDoValidityCheckingDefaultValue; 1634 ctxt->pedantic = xmlPedanticParserDefaultValue; 1635 ctxt->linenumbers = xmlLineNumbersDefaultValue; 1636 ctxt->keepBlanks = xmlKeepBlanksDefaultValue; 1637 if (ctxt->keepBlanks == 0) 1638 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 1639 1640 ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0; 1641 ctxt->vctxt.userData = ctxt; 1642 ctxt->vctxt.error = xmlParserValidityError; 1643 ctxt->vctxt.warning = xmlParserValidityWarning; 1644 if (ctxt->validate) { 1645 if (xmlGetWarningsDefaultValue == 0) 1646 ctxt->vctxt.warning = NULL; 1647 else 1648 ctxt->vctxt.warning = xmlParserValidityWarning; 1649 ctxt->vctxt.nodeMax = 0; 1650 } 1651 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue; 1652 ctxt->record_info = 0; 1653 ctxt->nbChars = 0; 1654 ctxt->checkIndex = 0; 1655 ctxt->inSubset = 0; 1656 ctxt->errNo = XML_ERR_OK; 1657 ctxt->depth = 0; 1658 ctxt->charset = XML_CHAR_ENCODING_UTF8; 1659 ctxt->catalogs = NULL; 1660 xmlInitNodeInfoSeq(&ctxt->node_seq); 1661 return(0); 1662} 1663 1664/** 1665 * xmlFreeParserCtxt: 1666 * @ctxt: an XML parser context 1667 * 1668 * Free all the memory used by a parser context. However the parsed 1669 * document in ctxt->myDoc is not freed. 1670 */ 1671 1672void 1673xmlFreeParserCtxt(xmlParserCtxtPtr ctxt) 1674{ 1675 xmlParserInputPtr input; 1676 1677 if (ctxt == NULL) return; 1678 1679 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 1680 xmlFreeInputStream(input); 1681 } 1682 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab); 1683 if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab); 1684 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab); 1685 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab); 1686 if (ctxt->version != NULL) xmlFree((char *) ctxt->version); 1687 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding); 1688 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI); 1689 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem); 1690#ifdef LIBXML_SAX1_ENABLED 1691 if ((ctxt->sax != NULL) && 1692 (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)) 1693#else 1694 if (ctxt->sax != NULL) 1695#endif /* LIBXML_SAX1_ENABLED */ 1696 xmlFree(ctxt->sax); 1697 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory); 1698 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab); 1699 if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts); 1700 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 1701 if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab); 1702 if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab); 1703 if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs); 1704 if (ctxt->attsDefault != NULL) 1705 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree); 1706 if (ctxt->attsSpecial != NULL) 1707 xmlHashFree(ctxt->attsSpecial, NULL); 1708 if (ctxt->freeElems != NULL) { 1709 xmlNodePtr cur, next; 1710 1711 cur = ctxt->freeElems; 1712 while (cur != NULL) { 1713 next = cur->next; 1714 xmlFree(cur); 1715 cur = next; 1716 } 1717 } 1718 if (ctxt->freeAttrs != NULL) { 1719 xmlAttrPtr cur, next; 1720 1721 cur = ctxt->freeAttrs; 1722 while (cur != NULL) { 1723 next = cur->next; 1724 xmlFree(cur); 1725 cur = next; 1726 } 1727 } 1728 /* 1729 * cleanup the error strings 1730 */ 1731 if (ctxt->lastError.message != NULL) 1732 xmlFree(ctxt->lastError.message); 1733 if (ctxt->lastError.file != NULL) 1734 xmlFree(ctxt->lastError.file); 1735 if (ctxt->lastError.str1 != NULL) 1736 xmlFree(ctxt->lastError.str1); 1737 if (ctxt->lastError.str2 != NULL) 1738 xmlFree(ctxt->lastError.str2); 1739 if (ctxt->lastError.str3 != NULL) 1740 xmlFree(ctxt->lastError.str3); 1741 1742#ifdef LIBXML_CATALOG_ENABLED 1743 if (ctxt->catalogs != NULL) 1744 xmlCatalogFreeLocal(ctxt->catalogs); 1745#endif 1746 xmlFree(ctxt); 1747} 1748 1749/** 1750 * xmlNewParserCtxt: 1751 * 1752 * Allocate and initialize a new parser context. 1753 * 1754 * Returns the xmlParserCtxtPtr or NULL 1755 */ 1756 1757xmlParserCtxtPtr 1758xmlNewParserCtxt(void) 1759{ 1760 xmlParserCtxtPtr ctxt; 1761 1762 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt)); 1763 if (ctxt == NULL) { 1764 xmlErrMemory(NULL, "cannot allocate parser context\n"); 1765 return(NULL); 1766 } 1767 memset(ctxt, 0, sizeof(xmlParserCtxt)); 1768 if (xmlInitParserCtxt(ctxt) < 0) { 1769 xmlFreeParserCtxt(ctxt); 1770 return(NULL); 1771 } 1772 return(ctxt); 1773} 1774 1775/************************************************************************ 1776 * * 1777 * Handling of node informations * 1778 * * 1779 ************************************************************************/ 1780 1781/** 1782 * xmlClearParserCtxt: 1783 * @ctxt: an XML parser context 1784 * 1785 * Clear (release owned resources) and reinitialize a parser context 1786 */ 1787 1788void 1789xmlClearParserCtxt(xmlParserCtxtPtr ctxt) 1790{ 1791 if (ctxt==NULL) 1792 return; 1793 xmlClearNodeInfoSeq(&ctxt->node_seq); 1794 xmlCtxtReset(ctxt); 1795} 1796 1797 1798/** 1799 * xmlParserFindNodeInfo: 1800 * @ctx: an XML parser context 1801 * @node: an XML node within the tree 1802 * 1803 * Find the parser node info struct for a given node 1804 * 1805 * Returns an xmlParserNodeInfo block pointer or NULL 1806 */ 1807const xmlParserNodeInfo * 1808xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node) 1809{ 1810 unsigned long pos; 1811 1812 if ((ctx == NULL) || (node == NULL)) 1813 return (NULL); 1814 /* Find position where node should be at */ 1815 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node); 1816 if (pos < ctx->node_seq.length 1817 && ctx->node_seq.buffer[pos].node == node) 1818 return &ctx->node_seq.buffer[pos]; 1819 else 1820 return NULL; 1821} 1822 1823 1824/** 1825 * xmlInitNodeInfoSeq: 1826 * @seq: a node info sequence pointer 1827 * 1828 * -- Initialize (set to initial state) node info sequence 1829 */ 1830void 1831xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq) 1832{ 1833 if (seq == NULL) 1834 return; 1835 seq->length = 0; 1836 seq->maximum = 0; 1837 seq->buffer = NULL; 1838} 1839 1840/** 1841 * xmlClearNodeInfoSeq: 1842 * @seq: a node info sequence pointer 1843 * 1844 * -- Clear (release memory and reinitialize) node 1845 * info sequence 1846 */ 1847void 1848xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq) 1849{ 1850 if (seq == NULL) 1851 return; 1852 if (seq->buffer != NULL) 1853 xmlFree(seq->buffer); 1854 xmlInitNodeInfoSeq(seq); 1855} 1856 1857/** 1858 * xmlParserFindNodeInfoIndex: 1859 * @seq: a node info sequence pointer 1860 * @node: an XML node pointer 1861 * 1862 * 1863 * xmlParserFindNodeInfoIndex : Find the index that the info record for 1864 * the given node is or should be at in a sorted sequence 1865 * 1866 * Returns a long indicating the position of the record 1867 */ 1868unsigned long 1869xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq, 1870 const xmlNodePtr node) 1871{ 1872 unsigned long upper, lower, middle; 1873 int found = 0; 1874 1875 if ((seq == NULL) || (node == NULL)) 1876 return ((unsigned long) -1); 1877 1878 /* Do a binary search for the key */ 1879 lower = 1; 1880 upper = seq->length; 1881 middle = 0; 1882 while (lower <= upper && !found) { 1883 middle = lower + (upper - lower) / 2; 1884 if (node == seq->buffer[middle - 1].node) 1885 found = 1; 1886 else if (node < seq->buffer[middle - 1].node) 1887 upper = middle - 1; 1888 else 1889 lower = middle + 1; 1890 } 1891 1892 /* Return position */ 1893 if (middle == 0 || seq->buffer[middle - 1].node < node) 1894 return middle; 1895 else 1896 return middle - 1; 1897} 1898 1899 1900/** 1901 * xmlParserAddNodeInfo: 1902 * @ctxt: an XML parser context 1903 * @info: a node info sequence pointer 1904 * 1905 * Insert node info record into the sorted sequence 1906 */ 1907void 1908xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt, 1909 const xmlParserNodeInfoPtr info) 1910{ 1911 unsigned long pos; 1912 1913 if ((ctxt == NULL) || (info == NULL)) return; 1914 1915 /* Find pos and check to see if node is already in the sequence */ 1916 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr) 1917 info->node); 1918 1919 if ((pos < ctxt->node_seq.length) && 1920 (ctxt->node_seq.buffer != NULL) && 1921 (ctxt->node_seq.buffer[pos].node == info->node)) { 1922 ctxt->node_seq.buffer[pos] = *info; 1923 } 1924 1925 /* Otherwise, we need to add new node to buffer */ 1926 else { 1927 if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) { 1928 xmlParserNodeInfo *tmp_buffer; 1929 unsigned int byte_size; 1930 1931 if (ctxt->node_seq.maximum == 0) 1932 ctxt->node_seq.maximum = 2; 1933 byte_size = (sizeof(*ctxt->node_seq.buffer) * 1934 (2 * ctxt->node_seq.maximum)); 1935 1936 if (ctxt->node_seq.buffer == NULL) 1937 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size); 1938 else 1939 tmp_buffer = 1940 (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer, 1941 byte_size); 1942 1943 if (tmp_buffer == NULL) { 1944 xmlErrMemory(ctxt, "failed to allocate buffer\n"); 1945 return; 1946 } 1947 ctxt->node_seq.buffer = tmp_buffer; 1948 ctxt->node_seq.maximum *= 2; 1949 } 1950 1951 /* If position is not at end, move elements out of the way */ 1952 if (pos != ctxt->node_seq.length) { 1953 unsigned long i; 1954 1955 for (i = ctxt->node_seq.length; i > pos; i--) 1956 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1]; 1957 } 1958 1959 /* Copy element and increase length */ 1960 ctxt->node_seq.buffer[pos] = *info; 1961 ctxt->node_seq.length++; 1962 } 1963} 1964 1965/************************************************************************ 1966 * * 1967 * Defaults settings * 1968 * * 1969 ************************************************************************/ 1970/** 1971 * xmlPedanticParserDefault: 1972 * @val: int 0 or 1 1973 * 1974 * Set and return the previous value for enabling pedantic warnings. 1975 * 1976 * Returns the last value for 0 for no substitution, 1 for substitution. 1977 */ 1978 1979int 1980xmlPedanticParserDefault(int val) { 1981 int old = xmlPedanticParserDefaultValue; 1982 1983 xmlPedanticParserDefaultValue = val; 1984 return(old); 1985} 1986 1987/** 1988 * xmlLineNumbersDefault: 1989 * @val: int 0 or 1 1990 * 1991 * Set and return the previous value for enabling line numbers in elements 1992 * contents. This may break on old application and is turned off by default. 1993 * 1994 * Returns the last value for 0 for no substitution, 1 for substitution. 1995 */ 1996 1997int 1998xmlLineNumbersDefault(int val) { 1999 int old = xmlLineNumbersDefaultValue; 2000 2001 xmlLineNumbersDefaultValue = val; 2002 return(old); 2003} 2004 2005/** 2006 * xmlSubstituteEntitiesDefault: 2007 * @val: int 0 or 1 2008 * 2009 * Set and return the previous value for default entity support. 2010 * Initially the parser always keep entity references instead of substituting 2011 * entity values in the output. This function has to be used to change the 2012 * default parser behavior 2013 * SAX::substituteEntities() has to be used for changing that on a file by 2014 * file basis. 2015 * 2016 * Returns the last value for 0 for no substitution, 1 for substitution. 2017 */ 2018 2019int 2020xmlSubstituteEntitiesDefault(int val) { 2021 int old = xmlSubstituteEntitiesDefaultValue; 2022 2023 xmlSubstituteEntitiesDefaultValue = val; 2024 return(old); 2025} 2026 2027/** 2028 * xmlKeepBlanksDefault: 2029 * @val: int 0 or 1 2030 * 2031 * Set and return the previous value for default blanks text nodes support. 2032 * The 1.x version of the parser used an heuristic to try to detect 2033 * ignorable white spaces. As a result the SAX callback was generating 2034 * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when 2035 * using the DOM output text nodes containing those blanks were not generated. 2036 * The 2.x and later version will switch to the XML standard way and 2037 * ignorableWhitespace() are only generated when running the parser in 2038 * validating mode and when the current element doesn't allow CDATA or 2039 * mixed content. 2040 * This function is provided as a way to force the standard behavior 2041 * on 1.X libs and to switch back to the old mode for compatibility when 2042 * running 1.X client code on 2.X . Upgrade of 1.X code should be done 2043 * by using xmlIsBlankNode() commodity function to detect the "empty" 2044 * nodes generated. 2045 * This value also affect autogeneration of indentation when saving code 2046 * if blanks sections are kept, indentation is not generated. 2047 * 2048 * Returns the last value for 0 for no substitution, 1 for substitution. 2049 */ 2050 2051int 2052xmlKeepBlanksDefault(int val) { 2053 int old = xmlKeepBlanksDefaultValue; 2054 2055 xmlKeepBlanksDefaultValue = val; 2056 xmlIndentTreeOutput = !val; 2057 return(old); 2058} 2059 2060#define bottom_parserInternals 2061#include "elfgcchack.h" 2062