1/* 2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly 3 * implemented on top of the SAX interfaces 4 * 5 * References: 6 * The XML specification: 7 * http://www.w3.org/TR/REC-xml 8 * Original 1.0 version: 9 * http://www.w3.org/TR/1998/REC-xml-19980210 10 * XML second edition working draft 11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814 12 * 13 * Okay this is a big file, the parser core is around 7000 lines, then it 14 * is followed by the progressive parser top routines, then the various 15 * high level APIs to call the parser and a few miscellaneous functions. 16 * A number of helper functions and deprecated ones have been moved to 17 * parserInternals.c to reduce this file size. 18 * As much as possible the functions are associated with their relative 19 * production in the XML specification. A few productions defining the 20 * different ranges of character are actually implanted either in 21 * parserInternals.h or parserInternals.c 22 * The DOM tree build is realized from the default SAX callbacks in 23 * the module SAX.c. 24 * The routines doing the validation checks are in valid.c and called either 25 * from the SAX callbacks or as standalone functions using a preparsed 26 * document. 27 * 28 * See Copyright for the status of this software. 29 * 30 * daniel@veillard.com 31 */ 32 33#define IN_LIBXML 34#include "libxml.h" 35 36#if defined(WIN32) && !defined (__CYGWIN__) 37#define XML_DIR_SEP '\\' 38#else 39#define XML_DIR_SEP '/' 40#endif 41 42#include <stdlib.h> 43#include <string.h> 44#include <stdarg.h> 45#include <libxml/xmlmemory.h> 46#include <libxml/threads.h> 47#include <libxml/globals.h> 48#include <libxml/tree.h> 49#include <libxml/parser.h> 50#include <libxml/parserInternals.h> 51#include <libxml/valid.h> 52#include <libxml/entities.h> 53#include <libxml/xmlerror.h> 54#include <libxml/encoding.h> 55#include <libxml/xmlIO.h> 56#include <libxml/uri.h> 57#ifdef LIBXML_CATALOG_ENABLED 58#include <libxml/catalog.h> 59#endif 60#ifdef LIBXML_SCHEMAS_ENABLED 61#include <libxml/xmlschemastypes.h> 62#include <libxml/relaxng.h> 63#endif 64#ifdef HAVE_CTYPE_H 65#include <ctype.h> 66#endif 67#ifdef HAVE_STDLIB_H 68#include <stdlib.h> 69#endif 70#ifdef HAVE_SYS_STAT_H 71#include <sys/stat.h> 72#endif 73#ifdef HAVE_FCNTL_H 74#include <fcntl.h> 75#endif 76#ifdef HAVE_UNISTD_H 77#include <unistd.h> 78#endif 79#ifdef HAVE_ZLIB_H 80#include <zlib.h> 81#endif 82 83static void 84xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info); 85 86/************************************************************************ 87 * * 88 * Arbitrary limits set in the parser. See XML_PARSE_HUGE * 89 * * 90 ************************************************************************/ 91 92#define XML_PARSER_BIG_ENTITY 1000 93#define XML_PARSER_LOT_ENTITY 5000 94 95/* 96 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity 97 * replacement over the size in byte of the input indicates that you have 98 * and eponential behaviour. A value of 10 correspond to at least 3 entity 99 * replacement per byte of input. 100 */ 101#define XML_PARSER_NON_LINEAR 10 102 103/* 104 * xmlParserEntityCheck 105 * 106 * Function to check non-linear entity expansion behaviour 107 * This is here to detect and stop exponential linear entity expansion 108 * This is not a limitation of the parser but a safety 109 * boundary feature. It can be disabled with the XML_PARSE_HUGE 110 * parser option. 111 */ 112static int 113xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size, 114 xmlEntityPtr ent) 115{ 116 unsigned long consumed = 0; 117 118 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE)) 119 return (0); 120 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) 121 return (1); 122 if (size != 0) { 123 /* 124 * Do the check based on the replacement size of the entity 125 */ 126 if (size < XML_PARSER_BIG_ENTITY) 127 return(0); 128 129 /* 130 * A limit on the amount of text data reasonably used 131 */ 132 if (ctxt->input != NULL) { 133 consumed = ctxt->input->consumed + 134 (ctxt->input->cur - ctxt->input->base); 135 } 136 consumed += ctxt->sizeentities; 137 138 if ((size < XML_PARSER_NON_LINEAR * consumed) && 139 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed)) 140 return (0); 141 } else if (ent != NULL) { 142 /* 143 * use the number of parsed entities in the replacement 144 */ 145 size = ent->checked; 146 147 /* 148 * The amount of data parsed counting entities size only once 149 */ 150 if (ctxt->input != NULL) { 151 consumed = ctxt->input->consumed + 152 (ctxt->input->cur - ctxt->input->base); 153 } 154 consumed += ctxt->sizeentities; 155 156 /* 157 * Check the density of entities for the amount of data 158 * knowing an entity reference will take at least 3 bytes 159 */ 160 if (size * 3 < consumed * XML_PARSER_NON_LINEAR) 161 return (0); 162 } else { 163 /* 164 * strange we got no data for checking just return 165 */ 166 return (0); 167 } 168 169 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 170 return (1); 171} 172 173/** 174 * xmlParserMaxDepth: 175 * 176 * arbitrary depth limit for the XML documents that we allow to 177 * process. This is not a limitation of the parser but a safety 178 * boundary feature. It can be disabled with the XML_PARSE_HUGE 179 * parser option. 180 */ 181unsigned int xmlParserMaxDepth = 256; 182 183 184 185#define SAX2 1 186#define XML_PARSER_BIG_BUFFER_SIZE 300 187#define XML_PARSER_BUFFER_SIZE 100 188#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" 189 190/* 191 * List of XML prefixed PI allowed by W3C specs 192 */ 193 194static const char *xmlW3CPIs[] = { 195 "xml-stylesheet", 196 NULL 197}; 198 199 200/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ 201xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, 202 const xmlChar **str); 203 204static xmlParserErrors 205xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 206 xmlSAXHandlerPtr sax, 207 void *user_data, int depth, const xmlChar *URL, 208 const xmlChar *ID, xmlNodePtr *list); 209 210static int 211xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, 212 const char *encoding); 213#ifdef LIBXML_LEGACY_ENABLED 214static void 215xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 216 xmlNodePtr lastNode); 217#endif /* LIBXML_LEGACY_ENABLED */ 218 219static xmlParserErrors 220xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 221 const xmlChar *string, void *user_data, xmlNodePtr *lst); 222 223static int 224xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity); 225 226/************************************************************************ 227 * * 228 * Some factorized error routines * 229 * * 230 ************************************************************************/ 231 232/** 233 * xmlErrAttributeDup: 234 * @ctxt: an XML parser context 235 * @prefix: the attribute prefix 236 * @localname: the attribute localname 237 * 238 * Handle a redefinition of attribute error 239 */ 240static void 241xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix, 242 const xmlChar * localname) 243{ 244 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 245 (ctxt->instate == XML_PARSER_EOF)) 246 return; 247 if (ctxt != NULL) 248 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; 249 if (prefix == NULL) 250 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 251 ctxt->errNo, XML_ERR_FATAL, NULL, 0, 252 (const char *) localname, NULL, NULL, 0, 0, 253 "Attribute %s redefined\n", localname); 254 else 255 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 256 ctxt->errNo, XML_ERR_FATAL, NULL, 0, 257 (const char *) prefix, (const char *) localname, 258 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix, 259 localname); 260 if (ctxt != NULL) { 261 ctxt->wellFormed = 0; 262 if (ctxt->recovery == 0) 263 ctxt->disableSAX = 1; 264 } 265} 266 267/** 268 * xmlFatalErr: 269 * @ctxt: an XML parser context 270 * @error: the error number 271 * @extra: extra information string 272 * 273 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 274 */ 275static void 276xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info) 277{ 278 const char *errmsg; 279 280 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 281 (ctxt->instate == XML_PARSER_EOF)) 282 return; 283 switch (error) { 284 case XML_ERR_INVALID_HEX_CHARREF: 285 errmsg = "CharRef: invalid hexadecimal value\n"; 286 break; 287 case XML_ERR_INVALID_DEC_CHARREF: 288 errmsg = "CharRef: invalid decimal value\n"; 289 break; 290 case XML_ERR_INVALID_CHARREF: 291 errmsg = "CharRef: invalid value\n"; 292 break; 293 case XML_ERR_INTERNAL_ERROR: 294 errmsg = "internal error"; 295 break; 296 case XML_ERR_PEREF_AT_EOF: 297 errmsg = "PEReference at end of document\n"; 298 break; 299 case XML_ERR_PEREF_IN_PROLOG: 300 errmsg = "PEReference in prolog\n"; 301 break; 302 case XML_ERR_PEREF_IN_EPILOG: 303 errmsg = "PEReference in epilog\n"; 304 break; 305 case XML_ERR_PEREF_NO_NAME: 306 errmsg = "PEReference: no name\n"; 307 break; 308 case XML_ERR_PEREF_SEMICOL_MISSING: 309 errmsg = "PEReference: expecting ';'\n"; 310 break; 311 case XML_ERR_ENTITY_LOOP: 312 errmsg = "Detected an entity reference loop\n"; 313 break; 314 case XML_ERR_ENTITY_NOT_STARTED: 315 errmsg = "EntityValue: \" or ' expected\n"; 316 break; 317 case XML_ERR_ENTITY_PE_INTERNAL: 318 errmsg = "PEReferences forbidden in internal subset\n"; 319 break; 320 case XML_ERR_ENTITY_NOT_FINISHED: 321 errmsg = "EntityValue: \" or ' expected\n"; 322 break; 323 case XML_ERR_ATTRIBUTE_NOT_STARTED: 324 errmsg = "AttValue: \" or ' expected\n"; 325 break; 326 case XML_ERR_LT_IN_ATTRIBUTE: 327 errmsg = "Unescaped '<' not allowed in attributes values\n"; 328 break; 329 case XML_ERR_LITERAL_NOT_STARTED: 330 errmsg = "SystemLiteral \" or ' expected\n"; 331 break; 332 case XML_ERR_LITERAL_NOT_FINISHED: 333 errmsg = "Unfinished System or Public ID \" or ' expected\n"; 334 break; 335 case XML_ERR_MISPLACED_CDATA_END: 336 errmsg = "Sequence ']]>' not allowed in content\n"; 337 break; 338 case XML_ERR_URI_REQUIRED: 339 errmsg = "SYSTEM or PUBLIC, the URI is missing\n"; 340 break; 341 case XML_ERR_PUBID_REQUIRED: 342 errmsg = "PUBLIC, the Public Identifier is missing\n"; 343 break; 344 case XML_ERR_HYPHEN_IN_COMMENT: 345 errmsg = "Comment must not contain '--' (double-hyphen)\n"; 346 break; 347 case XML_ERR_PI_NOT_STARTED: 348 errmsg = "xmlParsePI : no target name\n"; 349 break; 350 case XML_ERR_RESERVED_XML_NAME: 351 errmsg = "Invalid PI name\n"; 352 break; 353 case XML_ERR_NOTATION_NOT_STARTED: 354 errmsg = "NOTATION: Name expected here\n"; 355 break; 356 case XML_ERR_NOTATION_NOT_FINISHED: 357 errmsg = "'>' required to close NOTATION declaration\n"; 358 break; 359 case XML_ERR_VALUE_REQUIRED: 360 errmsg = "Entity value required\n"; 361 break; 362 case XML_ERR_URI_FRAGMENT: 363 errmsg = "Fragment not allowed"; 364 break; 365 case XML_ERR_ATTLIST_NOT_STARTED: 366 errmsg = "'(' required to start ATTLIST enumeration\n"; 367 break; 368 case XML_ERR_NMTOKEN_REQUIRED: 369 errmsg = "NmToken expected in ATTLIST enumeration\n"; 370 break; 371 case XML_ERR_ATTLIST_NOT_FINISHED: 372 errmsg = "')' required to finish ATTLIST enumeration\n"; 373 break; 374 case XML_ERR_MIXED_NOT_STARTED: 375 errmsg = "MixedContentDecl : '|' or ')*' expected\n"; 376 break; 377 case XML_ERR_PCDATA_REQUIRED: 378 errmsg = "MixedContentDecl : '#PCDATA' expected\n"; 379 break; 380 case XML_ERR_ELEMCONTENT_NOT_STARTED: 381 errmsg = "ContentDecl : Name or '(' expected\n"; 382 break; 383 case XML_ERR_ELEMCONTENT_NOT_FINISHED: 384 errmsg = "ContentDecl : ',' '|' or ')' expected\n"; 385 break; 386 case XML_ERR_PEREF_IN_INT_SUBSET: 387 errmsg = 388 "PEReference: forbidden within markup decl in internal subset\n"; 389 break; 390 case XML_ERR_GT_REQUIRED: 391 errmsg = "expected '>'\n"; 392 break; 393 case XML_ERR_CONDSEC_INVALID: 394 errmsg = "XML conditional section '[' expected\n"; 395 break; 396 case XML_ERR_EXT_SUBSET_NOT_FINISHED: 397 errmsg = "Content error in the external subset\n"; 398 break; 399 case XML_ERR_CONDSEC_INVALID_KEYWORD: 400 errmsg = 401 "conditional section INCLUDE or IGNORE keyword expected\n"; 402 break; 403 case XML_ERR_CONDSEC_NOT_FINISHED: 404 errmsg = "XML conditional section not closed\n"; 405 break; 406 case XML_ERR_XMLDECL_NOT_STARTED: 407 errmsg = "Text declaration '<?xml' required\n"; 408 break; 409 case XML_ERR_XMLDECL_NOT_FINISHED: 410 errmsg = "parsing XML declaration: '?>' expected\n"; 411 break; 412 case XML_ERR_EXT_ENTITY_STANDALONE: 413 errmsg = "external parsed entities cannot be standalone\n"; 414 break; 415 case XML_ERR_ENTITYREF_SEMICOL_MISSING: 416 errmsg = "EntityRef: expecting ';'\n"; 417 break; 418 case XML_ERR_DOCTYPE_NOT_FINISHED: 419 errmsg = "DOCTYPE improperly terminated\n"; 420 break; 421 case XML_ERR_LTSLASH_REQUIRED: 422 errmsg = "EndTag: '</' not found\n"; 423 break; 424 case XML_ERR_EQUAL_REQUIRED: 425 errmsg = "expected '='\n"; 426 break; 427 case XML_ERR_STRING_NOT_CLOSED: 428 errmsg = "String not closed expecting \" or '\n"; 429 break; 430 case XML_ERR_STRING_NOT_STARTED: 431 errmsg = "String not started expecting ' or \"\n"; 432 break; 433 case XML_ERR_ENCODING_NAME: 434 errmsg = "Invalid XML encoding name\n"; 435 break; 436 case XML_ERR_STANDALONE_VALUE: 437 errmsg = "standalone accepts only 'yes' or 'no'\n"; 438 break; 439 case XML_ERR_DOCUMENT_EMPTY: 440 errmsg = "Document is empty\n"; 441 break; 442 case XML_ERR_DOCUMENT_END: 443 errmsg = "Extra content at the end of the document\n"; 444 break; 445 case XML_ERR_NOT_WELL_BALANCED: 446 errmsg = "chunk is not well balanced\n"; 447 break; 448 case XML_ERR_EXTRA_CONTENT: 449 errmsg = "extra content at the end of well balanced chunk\n"; 450 break; 451 case XML_ERR_VERSION_MISSING: 452 errmsg = "Malformed declaration expecting version\n"; 453 break; 454#if 0 455 case: 456 errmsg = "\n"; 457 break; 458#endif 459 default: 460 errmsg = "Unregistered error message\n"; 461 } 462 if (ctxt != NULL) 463 ctxt->errNo = error; 464 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 465 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg, 466 info); 467 if (ctxt != NULL) { 468 ctxt->wellFormed = 0; 469 if (ctxt->recovery == 0) 470 ctxt->disableSAX = 1; 471 } 472} 473 474/** 475 * xmlFatalErrMsg: 476 * @ctxt: an XML parser context 477 * @error: the error number 478 * @msg: the error message 479 * 480 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 481 */ 482static void 483xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 484 const char *msg) 485{ 486 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 487 (ctxt->instate == XML_PARSER_EOF)) 488 return; 489 if (ctxt != NULL) 490 ctxt->errNo = error; 491 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 492 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg); 493 if (ctxt != NULL) { 494 ctxt->wellFormed = 0; 495 if (ctxt->recovery == 0) 496 ctxt->disableSAX = 1; 497 } 498} 499 500/** 501 * xmlWarningMsg: 502 * @ctxt: an XML parser context 503 * @error: the error number 504 * @msg: the error message 505 * @str1: extra data 506 * @str2: extra data 507 * 508 * Handle a warning. 509 */ 510static void 511xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 512 const char *msg, const xmlChar *str1, const xmlChar *str2) 513{ 514 xmlStructuredErrorFunc schannel = NULL; 515 516 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 517 (ctxt->instate == XML_PARSER_EOF)) 518 return; 519 if ((ctxt != NULL) && (ctxt->sax != NULL) && 520 (ctxt->sax->initialized == XML_SAX2_MAGIC)) 521 schannel = ctxt->sax->serror; 522 __xmlRaiseError(schannel, 523 (ctxt->sax) ? ctxt->sax->warning : NULL, 524 ctxt->userData, 525 ctxt, NULL, XML_FROM_PARSER, error, 526 XML_ERR_WARNING, NULL, 0, 527 (const char *) str1, (const char *) str2, NULL, 0, 0, 528 msg, (const char *) str1, (const char *) str2); 529} 530 531/** 532 * xmlValidityError: 533 * @ctxt: an XML parser context 534 * @error: the error number 535 * @msg: the error message 536 * @str1: extra data 537 * 538 * Handle a validity error. 539 */ 540static void 541xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error, 542 const char *msg, const xmlChar *str1, const xmlChar *str2) 543{ 544 xmlStructuredErrorFunc schannel = NULL; 545 546 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 547 (ctxt->instate == XML_PARSER_EOF)) 548 return; 549 if (ctxt != NULL) { 550 ctxt->errNo = error; 551 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC)) 552 schannel = ctxt->sax->serror; 553 } 554 __xmlRaiseError(schannel, 555 ctxt->vctxt.error, ctxt->vctxt.userData, 556 ctxt, NULL, XML_FROM_DTD, error, 557 XML_ERR_ERROR, NULL, 0, (const char *) str1, 558 (const char *) str2, NULL, 0, 0, 559 msg, (const char *) str1, (const char *) str2); 560 if (ctxt != NULL) { 561 ctxt->valid = 0; 562 } 563} 564 565/** 566 * xmlFatalErrMsgInt: 567 * @ctxt: an XML parser context 568 * @error: the error number 569 * @msg: the error message 570 * @val: an integer value 571 * 572 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 573 */ 574static void 575xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 576 const char *msg, int val) 577{ 578 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 579 (ctxt->instate == XML_PARSER_EOF)) 580 return; 581 if (ctxt != NULL) 582 ctxt->errNo = error; 583 __xmlRaiseError(NULL, NULL, NULL, 584 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 585 NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 586 if (ctxt != NULL) { 587 ctxt->wellFormed = 0; 588 if (ctxt->recovery == 0) 589 ctxt->disableSAX = 1; 590 } 591} 592 593/** 594 * xmlFatalErrMsgStrIntStr: 595 * @ctxt: an XML parser context 596 * @error: the error number 597 * @msg: the error message 598 * @str1: an string info 599 * @val: an integer value 600 * @str2: an string info 601 * 602 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 603 */ 604static void 605xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 606 const char *msg, const xmlChar *str1, int val, 607 const xmlChar *str2) 608{ 609 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 610 (ctxt->instate == XML_PARSER_EOF)) 611 return; 612 if (ctxt != NULL) 613 ctxt->errNo = error; 614 __xmlRaiseError(NULL, NULL, NULL, 615 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 616 NULL, 0, (const char *) str1, (const char *) str2, 617 NULL, val, 0, msg, str1, val, str2); 618 if (ctxt != NULL) { 619 ctxt->wellFormed = 0; 620 if (ctxt->recovery == 0) 621 ctxt->disableSAX = 1; 622 } 623} 624 625/** 626 * xmlFatalErrMsgStr: 627 * @ctxt: an XML parser context 628 * @error: the error number 629 * @msg: the error message 630 * @val: a string value 631 * 632 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 633 */ 634static void 635xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 636 const char *msg, const xmlChar * val) 637{ 638 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 639 (ctxt->instate == XML_PARSER_EOF)) 640 return; 641 if (ctxt != NULL) 642 ctxt->errNo = error; 643 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 644 XML_FROM_PARSER, error, XML_ERR_FATAL, 645 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 646 val); 647 if (ctxt != NULL) { 648 ctxt->wellFormed = 0; 649 if (ctxt->recovery == 0) 650 ctxt->disableSAX = 1; 651 } 652} 653 654/** 655 * xmlErrMsgStr: 656 * @ctxt: an XML parser context 657 * @error: the error number 658 * @msg: the error message 659 * @val: a string value 660 * 661 * Handle a non fatal parser error 662 */ 663static void 664xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 665 const char *msg, const xmlChar * val) 666{ 667 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 668 (ctxt->instate == XML_PARSER_EOF)) 669 return; 670 if (ctxt != NULL) 671 ctxt->errNo = error; 672 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 673 XML_FROM_PARSER, error, XML_ERR_ERROR, 674 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 675 val); 676} 677 678/** 679 * xmlNsErr: 680 * @ctxt: an XML parser context 681 * @error: the error number 682 * @msg: the message 683 * @info1: extra information string 684 * @info2: extra information string 685 * 686 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 687 */ 688static void 689xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 690 const char *msg, 691 const xmlChar * info1, const xmlChar * info2, 692 const xmlChar * info3) 693{ 694 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 695 (ctxt->instate == XML_PARSER_EOF)) 696 return; 697 if (ctxt != NULL) 698 ctxt->errNo = error; 699 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 700 XML_ERR_ERROR, NULL, 0, (const char *) info1, 701 (const char *) info2, (const char *) info3, 0, 0, msg, 702 info1, info2, info3); 703 if (ctxt != NULL) 704 ctxt->nsWellFormed = 0; 705} 706 707/** 708 * xmlNsWarn 709 * @ctxt: an XML parser context 710 * @error: the error number 711 * @msg: the message 712 * @info1: extra information string 713 * @info2: extra information string 714 * 715 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 716 */ 717static void 718xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error, 719 const char *msg, 720 const xmlChar * info1, const xmlChar * info2, 721 const xmlChar * info3) 722{ 723 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 724 (ctxt->instate == XML_PARSER_EOF)) 725 return; 726 if (ctxt != NULL) 727 ctxt->errNo = error; 728 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 729 XML_ERR_WARNING, NULL, 0, (const char *) info1, 730 (const char *) info2, (const char *) info3, 0, 0, msg, 731 info1, info2, info3); 732} 733 734/************************************************************************ 735 * * 736 * Library wide options * 737 * * 738 ************************************************************************/ 739 740/** 741 * xmlHasFeature: 742 * @feature: the feature to be examined 743 * 744 * Examines if the library has been compiled with a given feature. 745 * 746 * Returns a non-zero value if the feature exist, otherwise zero. 747 * Returns zero (0) if the feature does not exist or an unknown 748 * unknown feature is requested, non-zero otherwise. 749 */ 750int 751xmlHasFeature(xmlFeature feature) 752{ 753 switch (feature) { 754 case XML_WITH_THREAD: 755#ifdef LIBXML_THREAD_ENABLED 756 return(1); 757#else 758 return(0); 759#endif 760 case XML_WITH_TREE: 761#ifdef LIBXML_TREE_ENABLED 762 return(1); 763#else 764 return(0); 765#endif 766 case XML_WITH_OUTPUT: 767#ifdef LIBXML_OUTPUT_ENABLED 768 return(1); 769#else 770 return(0); 771#endif 772 case XML_WITH_PUSH: 773#ifdef LIBXML_PUSH_ENABLED 774 return(1); 775#else 776 return(0); 777#endif 778 case XML_WITH_READER: 779#ifdef LIBXML_READER_ENABLED 780 return(1); 781#else 782 return(0); 783#endif 784 case XML_WITH_PATTERN: 785#ifdef LIBXML_PATTERN_ENABLED 786 return(1); 787#else 788 return(0); 789#endif 790 case XML_WITH_WRITER: 791#ifdef LIBXML_WRITER_ENABLED 792 return(1); 793#else 794 return(0); 795#endif 796 case XML_WITH_SAX1: 797#ifdef LIBXML_SAX1_ENABLED 798 return(1); 799#else 800 return(0); 801#endif 802 case XML_WITH_FTP: 803#ifdef LIBXML_FTP_ENABLED 804 return(1); 805#else 806 return(0); 807#endif 808 case XML_WITH_HTTP: 809#ifdef LIBXML_HTTP_ENABLED 810 return(1); 811#else 812 return(0); 813#endif 814 case XML_WITH_VALID: 815#ifdef LIBXML_VALID_ENABLED 816 return(1); 817#else 818 return(0); 819#endif 820 case XML_WITH_HTML: 821#ifdef LIBXML_HTML_ENABLED 822 return(1); 823#else 824 return(0); 825#endif 826 case XML_WITH_LEGACY: 827#ifdef LIBXML_LEGACY_ENABLED 828 return(1); 829#else 830 return(0); 831#endif 832 case XML_WITH_C14N: 833#ifdef LIBXML_C14N_ENABLED 834 return(1); 835#else 836 return(0); 837#endif 838 case XML_WITH_CATALOG: 839#ifdef LIBXML_CATALOG_ENABLED 840 return(1); 841#else 842 return(0); 843#endif 844 case XML_WITH_XPATH: 845#ifdef LIBXML_XPATH_ENABLED 846 return(1); 847#else 848 return(0); 849#endif 850 case XML_WITH_XPTR: 851#ifdef LIBXML_XPTR_ENABLED 852 return(1); 853#else 854 return(0); 855#endif 856 case XML_WITH_XINCLUDE: 857#ifdef LIBXML_XINCLUDE_ENABLED 858 return(1); 859#else 860 return(0); 861#endif 862 case XML_WITH_ICONV: 863#ifdef LIBXML_ICONV_ENABLED 864 return(1); 865#else 866 return(0); 867#endif 868 case XML_WITH_ISO8859X: 869#ifdef LIBXML_ISO8859X_ENABLED 870 return(1); 871#else 872 return(0); 873#endif 874 case XML_WITH_UNICODE: 875#ifdef LIBXML_UNICODE_ENABLED 876 return(1); 877#else 878 return(0); 879#endif 880 case XML_WITH_REGEXP: 881#ifdef LIBXML_REGEXP_ENABLED 882 return(1); 883#else 884 return(0); 885#endif 886 case XML_WITH_AUTOMATA: 887#ifdef LIBXML_AUTOMATA_ENABLED 888 return(1); 889#else 890 return(0); 891#endif 892 case XML_WITH_EXPR: 893#ifdef LIBXML_EXPR_ENABLED 894 return(1); 895#else 896 return(0); 897#endif 898 case XML_WITH_SCHEMAS: 899#ifdef LIBXML_SCHEMAS_ENABLED 900 return(1); 901#else 902 return(0); 903#endif 904 case XML_WITH_SCHEMATRON: 905#ifdef LIBXML_SCHEMATRON_ENABLED 906 return(1); 907#else 908 return(0); 909#endif 910 case XML_WITH_MODULES: 911#ifdef LIBXML_MODULES_ENABLED 912 return(1); 913#else 914 return(0); 915#endif 916 case XML_WITH_DEBUG: 917#ifdef LIBXML_DEBUG_ENABLED 918 return(1); 919#else 920 return(0); 921#endif 922 case XML_WITH_DEBUG_MEM: 923#ifdef DEBUG_MEMORY_LOCATION 924 return(1); 925#else 926 return(0); 927#endif 928 case XML_WITH_DEBUG_RUN: 929#ifdef LIBXML_DEBUG_RUNTIME 930 return(1); 931#else 932 return(0); 933#endif 934 case XML_WITH_ZLIB: 935#ifdef LIBXML_ZLIB_ENABLED 936 return(1); 937#else 938 return(0); 939#endif 940 default: 941 break; 942 } 943 return(0); 944} 945 946/************************************************************************ 947 * * 948 * SAX2 defaulted attributes handling * 949 * * 950 ************************************************************************/ 951 952/** 953 * xmlDetectSAX2: 954 * @ctxt: an XML parser context 955 * 956 * Do the SAX2 detection and specific intialization 957 */ 958static void 959xmlDetectSAX2(xmlParserCtxtPtr ctxt) { 960 if (ctxt == NULL) return; 961#ifdef LIBXML_SAX1_ENABLED 962 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) && 963 ((ctxt->sax->startElementNs != NULL) || 964 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1; 965#else 966 ctxt->sax2 = 1; 967#endif /* LIBXML_SAX1_ENABLED */ 968 969 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 970 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 971 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 972 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) || 973 (ctxt->str_xml_ns == NULL)) { 974 xmlErrMemory(ctxt, NULL); 975 } 976} 977 978typedef struct _xmlDefAttrs xmlDefAttrs; 979typedef xmlDefAttrs *xmlDefAttrsPtr; 980struct _xmlDefAttrs { 981 int nbAttrs; /* number of defaulted attributes on that element */ 982 int maxAttrs; /* the size of the array */ 983 const xmlChar *values[5]; /* array of localname/prefix/values/external */ 984}; 985 986/** 987 * xmlAttrNormalizeSpace: 988 * @src: the source string 989 * @dst: the target string 990 * 991 * Normalize the space in non CDATA attribute values: 992 * If the attribute type is not CDATA, then the XML processor MUST further 993 * process the normalized attribute value by discarding any leading and 994 * trailing space (#x20) characters, and by replacing sequences of space 995 * (#x20) characters by a single space (#x20) character. 996 * Note that the size of dst need to be at least src, and if one doesn't need 997 * to preserve dst (and it doesn't come from a dictionary or read-only) then 998 * passing src as dst is just fine. 999 * 1000 * Returns a pointer to the normalized value (dst) or NULL if no conversion 1001 * is needed. 1002 */ 1003static xmlChar * 1004xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst) 1005{ 1006 if ((src == NULL) || (dst == NULL)) 1007 return(NULL); 1008 1009 while (*src == 0x20) src++; 1010 while (*src != 0) { 1011 if (*src == 0x20) { 1012 while (*src == 0x20) src++; 1013 if (*src != 0) 1014 *dst++ = 0x20; 1015 } else { 1016 *dst++ = *src++; 1017 } 1018 } 1019 *dst = 0; 1020 if (dst == src) 1021 return(NULL); 1022 return(dst); 1023} 1024 1025/** 1026 * xmlAttrNormalizeSpace2: 1027 * @src: the source string 1028 * 1029 * Normalize the space in non CDATA attribute values, a slightly more complex 1030 * front end to avoid allocation problems when running on attribute values 1031 * coming from the input. 1032 * 1033 * Returns a pointer to the normalized value (dst) or NULL if no conversion 1034 * is needed. 1035 */ 1036static const xmlChar * 1037xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len) 1038{ 1039 int i; 1040 int remove_head = 0; 1041 int need_realloc = 0; 1042 const xmlChar *cur; 1043 1044 if ((ctxt == NULL) || (src == NULL) || (len == NULL)) 1045 return(NULL); 1046 i = *len; 1047 if (i <= 0) 1048 return(NULL); 1049 1050 cur = src; 1051 while (*cur == 0x20) { 1052 cur++; 1053 remove_head++; 1054 } 1055 while (*cur != 0) { 1056 if (*cur == 0x20) { 1057 cur++; 1058 if ((*cur == 0x20) || (*cur == 0)) { 1059 need_realloc = 1; 1060 break; 1061 } 1062 } else 1063 cur++; 1064 } 1065 if (need_realloc) { 1066 xmlChar *ret; 1067 1068 ret = xmlStrndup(src + remove_head, i - remove_head + 1); 1069 if (ret == NULL) { 1070 xmlErrMemory(ctxt, NULL); 1071 return(NULL); 1072 } 1073 xmlAttrNormalizeSpace(ret, ret); 1074 *len = (int) strlen((const char *)ret); 1075 return(ret); 1076 } else if (remove_head) { 1077 *len -= remove_head; 1078 memmove(src, src + remove_head, 1 + *len); 1079 return(src); 1080 } 1081 return(NULL); 1082} 1083 1084/** 1085 * xmlAddDefAttrs: 1086 * @ctxt: an XML parser context 1087 * @fullname: the element fullname 1088 * @fullattr: the attribute fullname 1089 * @value: the attribute value 1090 * 1091 * Add a defaulted attribute for an element 1092 */ 1093static void 1094xmlAddDefAttrs(xmlParserCtxtPtr ctxt, 1095 const xmlChar *fullname, 1096 const xmlChar *fullattr, 1097 const xmlChar *value) { 1098 xmlDefAttrsPtr defaults; 1099 int len; 1100 const xmlChar *name; 1101 const xmlChar *prefix; 1102 1103 /* 1104 * Allows to detect attribute redefinitions 1105 */ 1106 if (ctxt->attsSpecial != NULL) { 1107 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1108 return; 1109 } 1110 1111 if (ctxt->attsDefault == NULL) { 1112 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict); 1113 if (ctxt->attsDefault == NULL) 1114 goto mem_error; 1115 } 1116 1117 /* 1118 * split the element name into prefix:localname , the string found 1119 * are within the DTD and then not associated to namespace names. 1120 */ 1121 name = xmlSplitQName3(fullname, &len); 1122 if (name == NULL) { 1123 name = xmlDictLookup(ctxt->dict, fullname, -1); 1124 prefix = NULL; 1125 } else { 1126 name = xmlDictLookup(ctxt->dict, name, -1); 1127 prefix = xmlDictLookup(ctxt->dict, fullname, len); 1128 } 1129 1130 /* 1131 * make sure there is some storage 1132 */ 1133 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix); 1134 if (defaults == NULL) { 1135 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) + 1136 (4 * 5) * sizeof(const xmlChar *)); 1137 if (defaults == NULL) 1138 goto mem_error; 1139 defaults->nbAttrs = 0; 1140 defaults->maxAttrs = 4; 1141 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1142 defaults, NULL) < 0) { 1143 xmlFree(defaults); 1144 goto mem_error; 1145 } 1146 } else if (defaults->nbAttrs >= defaults->maxAttrs) { 1147 xmlDefAttrsPtr temp; 1148 1149 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) + 1150 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *)); 1151 if (temp == NULL) 1152 goto mem_error; 1153 defaults = temp; 1154 defaults->maxAttrs *= 2; 1155 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1156 defaults, NULL) < 0) { 1157 xmlFree(defaults); 1158 goto mem_error; 1159 } 1160 } 1161 1162 /* 1163 * Split the element name into prefix:localname , the string found 1164 * are within the DTD and hen not associated to namespace names. 1165 */ 1166 name = xmlSplitQName3(fullattr, &len); 1167 if (name == NULL) { 1168 name = xmlDictLookup(ctxt->dict, fullattr, -1); 1169 prefix = NULL; 1170 } else { 1171 name = xmlDictLookup(ctxt->dict, name, -1); 1172 prefix = xmlDictLookup(ctxt->dict, fullattr, len); 1173 } 1174 1175 defaults->values[5 * defaults->nbAttrs] = name; 1176 defaults->values[5 * defaults->nbAttrs + 1] = prefix; 1177 /* intern the string and precompute the end */ 1178 len = xmlStrlen(value); 1179 value = xmlDictLookup(ctxt->dict, value, len); 1180 defaults->values[5 * defaults->nbAttrs + 2] = value; 1181 defaults->values[5 * defaults->nbAttrs + 3] = value + len; 1182 if (ctxt->external) 1183 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external"; 1184 else 1185 defaults->values[5 * defaults->nbAttrs + 4] = NULL; 1186 defaults->nbAttrs++; 1187 1188 return; 1189 1190mem_error: 1191 xmlErrMemory(ctxt, NULL); 1192 return; 1193} 1194 1195/** 1196 * xmlAddSpecialAttr: 1197 * @ctxt: an XML parser context 1198 * @fullname: the element fullname 1199 * @fullattr: the attribute fullname 1200 * @type: the attribute type 1201 * 1202 * Register this attribute type 1203 */ 1204static void 1205xmlAddSpecialAttr(xmlParserCtxtPtr ctxt, 1206 const xmlChar *fullname, 1207 const xmlChar *fullattr, 1208 int type) 1209{ 1210 if (ctxt->attsSpecial == NULL) { 1211 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict); 1212 if (ctxt->attsSpecial == NULL) 1213 goto mem_error; 1214 } 1215 1216 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1217 return; 1218 1219 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr, 1220 (void *) (long) type); 1221 return; 1222 1223mem_error: 1224 xmlErrMemory(ctxt, NULL); 1225 return; 1226} 1227 1228/** 1229 * xmlCleanSpecialAttrCallback: 1230 * 1231 * Removes CDATA attributes from the special attribute table 1232 */ 1233static void 1234xmlCleanSpecialAttrCallback(void *payload, void *data, 1235 const xmlChar *fullname, const xmlChar *fullattr, 1236 const xmlChar *unused ATTRIBUTE_UNUSED) { 1237 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data; 1238 1239 if (((long) payload) == XML_ATTRIBUTE_CDATA) { 1240 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL); 1241 } 1242} 1243 1244/** 1245 * xmlCleanSpecialAttr: 1246 * @ctxt: an XML parser context 1247 * 1248 * Trim the list of attributes defined to remove all those of type 1249 * CDATA as they are not special. This call should be done when finishing 1250 * to parse the DTD and before starting to parse the document root. 1251 */ 1252static void 1253xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt) 1254{ 1255 if (ctxt->attsSpecial == NULL) 1256 return; 1257 1258 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt); 1259 1260 if (xmlHashSize(ctxt->attsSpecial) == 0) { 1261 xmlHashFree(ctxt->attsSpecial, NULL); 1262 ctxt->attsSpecial = NULL; 1263 } 1264 return; 1265} 1266 1267/** 1268 * xmlCheckLanguageID: 1269 * @lang: pointer to the string value 1270 * 1271 * Checks that the value conforms to the LanguageID production: 1272 * 1273 * NOTE: this is somewhat deprecated, those productions were removed from 1274 * the XML Second edition. 1275 * 1276 * [33] LanguageID ::= Langcode ('-' Subcode)* 1277 * [34] Langcode ::= ISO639Code | IanaCode | UserCode 1278 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z]) 1279 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+ 1280 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+ 1281 * [38] Subcode ::= ([a-z] | [A-Z])+ 1282 * 1283 * Returns 1 if correct 0 otherwise 1284 **/ 1285int 1286xmlCheckLanguageID(const xmlChar * lang) 1287{ 1288 const xmlChar *cur = lang; 1289 1290 if (cur == NULL) 1291 return (0); 1292 if (((cur[0] == 'i') && (cur[1] == '-')) || 1293 ((cur[0] == 'I') && (cur[1] == '-'))) { 1294 /* 1295 * IANA code 1296 */ 1297 cur += 2; 1298 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */ 1299 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1300 cur++; 1301 } else if (((cur[0] == 'x') && (cur[1] == '-')) || 1302 ((cur[0] == 'X') && (cur[1] == '-'))) { 1303 /* 1304 * User code 1305 */ 1306 cur += 2; 1307 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */ 1308 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1309 cur++; 1310 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 1311 ((cur[0] >= 'a') && (cur[0] <= 'z'))) { 1312 /* 1313 * ISO639 1314 */ 1315 cur++; 1316 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 1317 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1318 cur++; 1319 else 1320 return (0); 1321 } else 1322 return (0); 1323 while (cur[0] != 0) { /* non input consuming */ 1324 if (cur[0] != '-') 1325 return (0); 1326 cur++; 1327 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 1328 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1329 cur++; 1330 else 1331 return (0); 1332 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */ 1333 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1334 cur++; 1335 } 1336 return (1); 1337} 1338 1339/************************************************************************ 1340 * * 1341 * Parser stacks related functions and macros * 1342 * * 1343 ************************************************************************/ 1344 1345xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, 1346 const xmlChar ** str); 1347 1348#ifdef SAX2 1349/** 1350 * nsPush: 1351 * @ctxt: an XML parser context 1352 * @prefix: the namespace prefix or NULL 1353 * @URL: the namespace name 1354 * 1355 * Pushes a new parser namespace on top of the ns stack 1356 * 1357 * Returns -1 in case of error, -2 if the namespace should be discarded 1358 * and the index in the stack otherwise. 1359 */ 1360static int 1361nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL) 1362{ 1363 if (ctxt->options & XML_PARSE_NSCLEAN) { 1364 int i; 1365 for (i = 0;i < ctxt->nsNr;i += 2) { 1366 if (ctxt->nsTab[i] == prefix) { 1367 /* in scope */ 1368 if (ctxt->nsTab[i + 1] == URL) 1369 return(-2); 1370 /* out of scope keep it */ 1371 break; 1372 } 1373 } 1374 } 1375 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) { 1376 ctxt->nsMax = 10; 1377 ctxt->nsNr = 0; 1378 ctxt->nsTab = (const xmlChar **) 1379 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *)); 1380 if (ctxt->nsTab == NULL) { 1381 xmlErrMemory(ctxt, NULL); 1382 ctxt->nsMax = 0; 1383 return (-1); 1384 } 1385 } else if (ctxt->nsNr >= ctxt->nsMax) { 1386 const xmlChar ** tmp; 1387 ctxt->nsMax *= 2; 1388 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab, 1389 ctxt->nsMax * sizeof(ctxt->nsTab[0])); 1390 if (tmp == NULL) { 1391 xmlErrMemory(ctxt, NULL); 1392 ctxt->nsMax /= 2; 1393 return (-1); 1394 } 1395 ctxt->nsTab = tmp; 1396 } 1397 ctxt->nsTab[ctxt->nsNr++] = prefix; 1398 ctxt->nsTab[ctxt->nsNr++] = URL; 1399 return (ctxt->nsNr); 1400} 1401/** 1402 * nsPop: 1403 * @ctxt: an XML parser context 1404 * @nr: the number to pop 1405 * 1406 * Pops the top @nr parser prefix/namespace from the ns stack 1407 * 1408 * Returns the number of namespaces removed 1409 */ 1410static int 1411nsPop(xmlParserCtxtPtr ctxt, int nr) 1412{ 1413 int i; 1414 1415 if (ctxt->nsTab == NULL) return(0); 1416 if (ctxt->nsNr < nr) { 1417 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr); 1418 nr = ctxt->nsNr; 1419 } 1420 if (ctxt->nsNr <= 0) 1421 return (0); 1422 1423 for (i = 0;i < nr;i++) { 1424 ctxt->nsNr--; 1425 ctxt->nsTab[ctxt->nsNr] = NULL; 1426 } 1427 return(nr); 1428} 1429#endif 1430 1431static int 1432xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) { 1433 const xmlChar **atts; 1434 int *attallocs; 1435 int maxatts; 1436 1437 if (ctxt->atts == NULL) { 1438 maxatts = 55; /* allow for 10 attrs by default */ 1439 atts = (const xmlChar **) 1440 xmlMalloc(maxatts * sizeof(xmlChar *)); 1441 if (atts == NULL) goto mem_error; 1442 ctxt->atts = atts; 1443 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int)); 1444 if (attallocs == NULL) goto mem_error; 1445 ctxt->attallocs = attallocs; 1446 ctxt->maxatts = maxatts; 1447 } else if (nr + 5 > ctxt->maxatts) { 1448 maxatts = (nr + 5) * 2; 1449 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts, 1450 maxatts * sizeof(const xmlChar *)); 1451 if (atts == NULL) goto mem_error; 1452 ctxt->atts = atts; 1453 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs, 1454 (maxatts / 5) * sizeof(int)); 1455 if (attallocs == NULL) goto mem_error; 1456 ctxt->attallocs = attallocs; 1457 ctxt->maxatts = maxatts; 1458 } 1459 return(ctxt->maxatts); 1460mem_error: 1461 xmlErrMemory(ctxt, NULL); 1462 return(-1); 1463} 1464 1465/** 1466 * inputPush: 1467 * @ctxt: an XML parser context 1468 * @value: the parser input 1469 * 1470 * Pushes a new parser input on top of the input stack 1471 * 1472 * Returns -1 in case of error, the index in the stack otherwise 1473 */ 1474int 1475inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value) 1476{ 1477 if ((ctxt == NULL) || (value == NULL)) 1478 return(-1); 1479 if (ctxt->inputNr >= ctxt->inputMax) { 1480 ctxt->inputMax *= 2; 1481 ctxt->inputTab = 1482 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab, 1483 ctxt->inputMax * 1484 sizeof(ctxt->inputTab[0])); 1485 if (ctxt->inputTab == NULL) { 1486 xmlErrMemory(ctxt, NULL); 1487 xmlFreeInputStream(value); 1488 ctxt->inputMax /= 2; 1489 value = NULL; 1490 return (-1); 1491 } 1492 } 1493 ctxt->inputTab[ctxt->inputNr] = value; 1494 ctxt->input = value; 1495 return (ctxt->inputNr++); 1496} 1497/** 1498 * inputPop: 1499 * @ctxt: an XML parser context 1500 * 1501 * Pops the top parser input from the input stack 1502 * 1503 * Returns the input just removed 1504 */ 1505xmlParserInputPtr 1506inputPop(xmlParserCtxtPtr ctxt) 1507{ 1508 xmlParserInputPtr ret; 1509 1510 if (ctxt == NULL) 1511 return(NULL); 1512 if (ctxt->inputNr <= 0) 1513 return (NULL); 1514 ctxt->inputNr--; 1515 if (ctxt->inputNr > 0) 1516 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1]; 1517 else 1518 ctxt->input = NULL; 1519 ret = ctxt->inputTab[ctxt->inputNr]; 1520 ctxt->inputTab[ctxt->inputNr] = NULL; 1521 return (ret); 1522} 1523/** 1524 * nodePush: 1525 * @ctxt: an XML parser context 1526 * @value: the element node 1527 * 1528 * Pushes a new element node on top of the node stack 1529 * 1530 * Returns -1 in case of error, the index in the stack otherwise 1531 */ 1532int 1533nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value) 1534{ 1535 if (ctxt == NULL) return(0); 1536 if (ctxt->nodeNr >= ctxt->nodeMax) { 1537 xmlNodePtr *tmp; 1538 1539 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab, 1540 ctxt->nodeMax * 2 * 1541 sizeof(ctxt->nodeTab[0])); 1542 if (tmp == NULL) { 1543 xmlErrMemory(ctxt, NULL); 1544 return (-1); 1545 } 1546 ctxt->nodeTab = tmp; 1547 ctxt->nodeMax *= 2; 1548 } 1549 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) && 1550 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 1551 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 1552 "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 1553 xmlParserMaxDepth); 1554 ctxt->instate = XML_PARSER_EOF; 1555 return(-1); 1556 } 1557 ctxt->nodeTab[ctxt->nodeNr] = value; 1558 ctxt->node = value; 1559 return (ctxt->nodeNr++); 1560} 1561 1562/** 1563 * nodePop: 1564 * @ctxt: an XML parser context 1565 * 1566 * Pops the top element node from the node stack 1567 * 1568 * Returns the node just removed 1569 */ 1570xmlNodePtr 1571nodePop(xmlParserCtxtPtr ctxt) 1572{ 1573 xmlNodePtr ret; 1574 1575 if (ctxt == NULL) return(NULL); 1576 if (ctxt->nodeNr <= 0) 1577 return (NULL); 1578 ctxt->nodeNr--; 1579 if (ctxt->nodeNr > 0) 1580 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1]; 1581 else 1582 ctxt->node = NULL; 1583 ret = ctxt->nodeTab[ctxt->nodeNr]; 1584 ctxt->nodeTab[ctxt->nodeNr] = NULL; 1585 return (ret); 1586} 1587 1588#ifdef LIBXML_PUSH_ENABLED 1589/** 1590 * nameNsPush: 1591 * @ctxt: an XML parser context 1592 * @value: the element name 1593 * @prefix: the element prefix 1594 * @URI: the element namespace name 1595 * 1596 * Pushes a new element name/prefix/URL on top of the name stack 1597 * 1598 * Returns -1 in case of error, the index in the stack otherwise 1599 */ 1600static int 1601nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value, 1602 const xmlChar *prefix, const xmlChar *URI, int nsNr) 1603{ 1604 if (ctxt->nameNr >= ctxt->nameMax) { 1605 const xmlChar * *tmp; 1606 void **tmp2; 1607 ctxt->nameMax *= 2; 1608 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1609 ctxt->nameMax * 1610 sizeof(ctxt->nameTab[0])); 1611 if (tmp == NULL) { 1612 ctxt->nameMax /= 2; 1613 goto mem_error; 1614 } 1615 ctxt->nameTab = tmp; 1616 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab, 1617 ctxt->nameMax * 3 * 1618 sizeof(ctxt->pushTab[0])); 1619 if (tmp2 == NULL) { 1620 ctxt->nameMax /= 2; 1621 goto mem_error; 1622 } 1623 ctxt->pushTab = tmp2; 1624 } 1625 ctxt->nameTab[ctxt->nameNr] = value; 1626 ctxt->name = value; 1627 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix; 1628 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI; 1629 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr; 1630 return (ctxt->nameNr++); 1631mem_error: 1632 xmlErrMemory(ctxt, NULL); 1633 return (-1); 1634} 1635/** 1636 * nameNsPop: 1637 * @ctxt: an XML parser context 1638 * 1639 * Pops the top element/prefix/URI name from the name stack 1640 * 1641 * Returns the name just removed 1642 */ 1643static const xmlChar * 1644nameNsPop(xmlParserCtxtPtr ctxt) 1645{ 1646 const xmlChar *ret; 1647 1648 if (ctxt->nameNr <= 0) 1649 return (NULL); 1650 ctxt->nameNr--; 1651 if (ctxt->nameNr > 0) 1652 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1653 else 1654 ctxt->name = NULL; 1655 ret = ctxt->nameTab[ctxt->nameNr]; 1656 ctxt->nameTab[ctxt->nameNr] = NULL; 1657 return (ret); 1658} 1659#endif /* LIBXML_PUSH_ENABLED */ 1660 1661/** 1662 * namePush: 1663 * @ctxt: an XML parser context 1664 * @value: the element name 1665 * 1666 * Pushes a new element name on top of the name stack 1667 * 1668 * Returns -1 in case of error, the index in the stack otherwise 1669 */ 1670int 1671namePush(xmlParserCtxtPtr ctxt, const xmlChar * value) 1672{ 1673 if (ctxt == NULL) return (-1); 1674 1675 if (ctxt->nameNr >= ctxt->nameMax) { 1676 const xmlChar * *tmp; 1677 ctxt->nameMax *= 2; 1678 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1679 ctxt->nameMax * 1680 sizeof(ctxt->nameTab[0])); 1681 if (tmp == NULL) { 1682 ctxt->nameMax /= 2; 1683 goto mem_error; 1684 } 1685 ctxt->nameTab = tmp; 1686 } 1687 ctxt->nameTab[ctxt->nameNr] = value; 1688 ctxt->name = value; 1689 return (ctxt->nameNr++); 1690mem_error: 1691 xmlErrMemory(ctxt, NULL); 1692 return (-1); 1693} 1694/** 1695 * namePop: 1696 * @ctxt: an XML parser context 1697 * 1698 * Pops the top element name from the name stack 1699 * 1700 * Returns the name just removed 1701 */ 1702const xmlChar * 1703namePop(xmlParserCtxtPtr ctxt) 1704{ 1705 const xmlChar *ret; 1706 1707 if ((ctxt == NULL) || (ctxt->nameNr <= 0)) 1708 return (NULL); 1709 ctxt->nameNr--; 1710 if (ctxt->nameNr > 0) 1711 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1712 else 1713 ctxt->name = NULL; 1714 ret = ctxt->nameTab[ctxt->nameNr]; 1715 ctxt->nameTab[ctxt->nameNr] = NULL; 1716 return (ret); 1717} 1718 1719static int spacePush(xmlParserCtxtPtr ctxt, int val) { 1720 if (ctxt->spaceNr >= ctxt->spaceMax) { 1721 int *tmp; 1722 1723 ctxt->spaceMax *= 2; 1724 tmp = (int *) xmlRealloc(ctxt->spaceTab, 1725 ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); 1726 if (tmp == NULL) { 1727 xmlErrMemory(ctxt, NULL); 1728 ctxt->spaceMax /=2; 1729 return(-1); 1730 } 1731 ctxt->spaceTab = tmp; 1732 } 1733 ctxt->spaceTab[ctxt->spaceNr] = val; 1734 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; 1735 return(ctxt->spaceNr++); 1736} 1737 1738static int spacePop(xmlParserCtxtPtr ctxt) { 1739 int ret; 1740 if (ctxt->spaceNr <= 0) return(0); 1741 ctxt->spaceNr--; 1742 if (ctxt->spaceNr > 0) 1743 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; 1744 else 1745 ctxt->space = &ctxt->spaceTab[0]; 1746 ret = ctxt->spaceTab[ctxt->spaceNr]; 1747 ctxt->spaceTab[ctxt->spaceNr] = -1; 1748 return(ret); 1749} 1750 1751/* 1752 * Macros for accessing the content. Those should be used only by the parser, 1753 * and not exported. 1754 * 1755 * Dirty macros, i.e. one often need to make assumption on the context to 1756 * use them 1757 * 1758 * CUR_PTR return the current pointer to the xmlChar to be parsed. 1759 * To be used with extreme caution since operations consuming 1760 * characters may move the input buffer to a different location ! 1761 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled 1762 * This should be used internally by the parser 1763 * only to compare to ASCII values otherwise it would break when 1764 * running with UTF-8 encoding. 1765 * RAW same as CUR but in the input buffer, bypass any token 1766 * extraction that may have been done 1767 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only 1768 * to compare on ASCII based substring. 1769 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined 1770 * strings without newlines within the parser. 1771 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII 1772 * defined char within the parser. 1773 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding 1774 * 1775 * NEXT Skip to the next character, this does the proper decoding 1776 * in UTF-8 mode. It also pop-up unfinished entities on the fly. 1777 * NEXTL(l) Skip the current unicode character of l xmlChars long. 1778 * CUR_CHAR(l) returns the current unicode character (int), set l 1779 * to the number of xmlChars used for the encoding [0-5]. 1780 * CUR_SCHAR same but operate on a string instead of the context 1781 * COPY_BUF copy the current unicode char to the target buffer, increment 1782 * the index 1783 * GROW, SHRINK handling of input buffers 1784 */ 1785 1786#define RAW (*ctxt->input->cur) 1787#define CUR (*ctxt->input->cur) 1788#define NXT(val) ctxt->input->cur[(val)] 1789#define CUR_PTR ctxt->input->cur 1790 1791#define CMP4( s, c1, c2, c3, c4 ) \ 1792 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \ 1793 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 ) 1794#define CMP5( s, c1, c2, c3, c4, c5 ) \ 1795 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 ) 1796#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \ 1797 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 ) 1798#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \ 1799 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 ) 1800#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \ 1801 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 ) 1802#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \ 1803 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \ 1804 ((unsigned char *) s)[ 8 ] == c9 ) 1805#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \ 1806 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \ 1807 ((unsigned char *) s)[ 9 ] == c10 ) 1808 1809#define SKIP(val) do { \ 1810 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \ 1811 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 1812 if ((*ctxt->input->cur == 0) && \ 1813 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 1814 xmlPopInput(ctxt); \ 1815 } while (0) 1816 1817#define SKIPL(val) do { \ 1818 int skipl; \ 1819 for(skipl=0; skipl<val; skipl++) { \ 1820 if (*(ctxt->input->cur) == '\n') { \ 1821 ctxt->input->line++; ctxt->input->col = 1; \ 1822 } else ctxt->input->col++; \ 1823 ctxt->nbChars++; \ 1824 ctxt->input->cur++; \ 1825 } \ 1826 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 1827 if ((*ctxt->input->cur == 0) && \ 1828 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 1829 xmlPopInput(ctxt); \ 1830 } while (0) 1831 1832#define SHRINK if ((ctxt->progressive == 0) && \ 1833 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \ 1834 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \ 1835 xmlSHRINK (ctxt); 1836 1837static void xmlSHRINK (xmlParserCtxtPtr ctxt) { 1838 xmlParserInputShrink(ctxt->input); 1839 if ((*ctxt->input->cur == 0) && 1840 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 1841 xmlPopInput(ctxt); 1842 } 1843 1844#define GROW if ((ctxt->progressive == 0) && \ 1845 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \ 1846 xmlGROW (ctxt); 1847 1848static void xmlGROW (xmlParserCtxtPtr ctxt) { 1849 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 1850 if ((*ctxt->input->cur == 0) && 1851 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 1852 xmlPopInput(ctxt); 1853} 1854 1855#define SKIP_BLANKS xmlSkipBlankChars(ctxt) 1856 1857#define NEXT xmlNextChar(ctxt) 1858 1859#define NEXT1 { \ 1860 ctxt->input->col++; \ 1861 ctxt->input->cur++; \ 1862 ctxt->nbChars++; \ 1863 if (*ctxt->input->cur == 0) \ 1864 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 1865 } 1866 1867#define NEXTL(l) do { \ 1868 if (*(ctxt->input->cur) == '\n') { \ 1869 ctxt->input->line++; ctxt->input->col = 1; \ 1870 } else ctxt->input->col++; \ 1871 ctxt->input->cur += l; \ 1872 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 1873 } while (0) 1874 1875#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) 1876#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) 1877 1878#define COPY_BUF(l,b,i,v) \ 1879 if (l == 1) b[i++] = (xmlChar) v; \ 1880 else i += xmlCopyCharMultiByte(&b[i],v) 1881 1882/** 1883 * xmlSkipBlankChars: 1884 * @ctxt: the XML parser context 1885 * 1886 * skip all blanks character found at that point in the input streams. 1887 * It pops up finished entities in the process if allowable at that point. 1888 * 1889 * Returns the number of space chars skipped 1890 */ 1891 1892int 1893xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { 1894 int res = 0; 1895 1896 /* 1897 * It's Okay to use CUR/NEXT here since all the blanks are on 1898 * the ASCII range. 1899 */ 1900 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) { 1901 const xmlChar *cur; 1902 /* 1903 * if we are in the document content, go really fast 1904 */ 1905 cur = ctxt->input->cur; 1906 while (IS_BLANK_CH(*cur)) { 1907 if (*cur == '\n') { 1908 ctxt->input->line++; ctxt->input->col = 1; 1909 } 1910 cur++; 1911 res++; 1912 if (*cur == 0) { 1913 ctxt->input->cur = cur; 1914 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 1915 cur = ctxt->input->cur; 1916 } 1917 } 1918 ctxt->input->cur = cur; 1919 } else { 1920 int cur; 1921 do { 1922 cur = CUR; 1923 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */ 1924 NEXT; 1925 cur = CUR; 1926 res++; 1927 } 1928 while ((cur == 0) && (ctxt->inputNr > 1) && 1929 (ctxt->instate != XML_PARSER_COMMENT)) { 1930 xmlPopInput(ctxt); 1931 cur = CUR; 1932 } 1933 /* 1934 * Need to handle support of entities branching here 1935 */ 1936 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); 1937 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */ 1938 } 1939 return(res); 1940} 1941 1942/************************************************************************ 1943 * * 1944 * Commodity functions to handle entities * 1945 * * 1946 ************************************************************************/ 1947 1948/** 1949 * xmlPopInput: 1950 * @ctxt: an XML parser context 1951 * 1952 * xmlPopInput: the current input pointed by ctxt->input came to an end 1953 * pop it and return the next char. 1954 * 1955 * Returns the current xmlChar in the parser context 1956 */ 1957xmlChar 1958xmlPopInput(xmlParserCtxtPtr ctxt) { 1959 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0); 1960 if (xmlParserDebugEntities) 1961 xmlGenericError(xmlGenericErrorContext, 1962 "Popping input %d\n", ctxt->inputNr); 1963 xmlFreeInputStream(inputPop(ctxt)); 1964 if ((*ctxt->input->cur == 0) && 1965 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 1966 return(xmlPopInput(ctxt)); 1967 return(CUR); 1968} 1969 1970/** 1971 * xmlPushInput: 1972 * @ctxt: an XML parser context 1973 * @input: an XML parser input fragment (entity, XML fragment ...). 1974 * 1975 * xmlPushInput: switch to a new input stream which is stacked on top 1976 * of the previous one(s). 1977 * Returns -1 in case of error or the index in the input stack 1978 */ 1979int 1980xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { 1981 int ret; 1982 if (input == NULL) return(-1); 1983 1984 if (xmlParserDebugEntities) { 1985 if ((ctxt->input != NULL) && (ctxt->input->filename)) 1986 xmlGenericError(xmlGenericErrorContext, 1987 "%s(%d): ", ctxt->input->filename, 1988 ctxt->input->line); 1989 xmlGenericError(xmlGenericErrorContext, 1990 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); 1991 } 1992 ret = inputPush(ctxt, input); 1993 GROW; 1994 return(ret); 1995} 1996 1997/** 1998 * xmlParseCharRef: 1999 * @ctxt: an XML parser context 2000 * 2001 * parse Reference declarations 2002 * 2003 * [66] CharRef ::= '&#' [0-9]+ ';' | 2004 * '&#x' [0-9a-fA-F]+ ';' 2005 * 2006 * [ WFC: Legal Character ] 2007 * Characters referred to using character references must match the 2008 * production for Char. 2009 * 2010 * Returns the value parsed (as an int), 0 in case of error 2011 */ 2012int 2013xmlParseCharRef(xmlParserCtxtPtr ctxt) { 2014 unsigned int val = 0; 2015 int count = 0; 2016 unsigned int outofrange = 0; 2017 2018 /* 2019 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here 2020 */ 2021 if ((RAW == '&') && (NXT(1) == '#') && 2022 (NXT(2) == 'x')) { 2023 SKIP(3); 2024 GROW; 2025 while (RAW != ';') { /* loop blocked by count */ 2026 if (count++ > 20) { 2027 count = 0; 2028 GROW; 2029 } 2030 if ((RAW >= '0') && (RAW <= '9')) 2031 val = val * 16 + (CUR - '0'); 2032 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) 2033 val = val * 16 + (CUR - 'a') + 10; 2034 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) 2035 val = val * 16 + (CUR - 'A') + 10; 2036 else { 2037 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2038 val = 0; 2039 break; 2040 } 2041 if (val > 0x10FFFF) 2042 outofrange = val; 2043 2044 NEXT; 2045 count++; 2046 } 2047 if (RAW == ';') { 2048 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2049 ctxt->input->col++; 2050 ctxt->nbChars ++; 2051 ctxt->input->cur++; 2052 } 2053 } else if ((RAW == '&') && (NXT(1) == '#')) { 2054 SKIP(2); 2055 GROW; 2056 while (RAW != ';') { /* loop blocked by count */ 2057 if (count++ > 20) { 2058 count = 0; 2059 GROW; 2060 } 2061 if ((RAW >= '0') && (RAW <= '9')) 2062 val = val * 10 + (CUR - '0'); 2063 else { 2064 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2065 val = 0; 2066 break; 2067 } 2068 if (val > 0x10FFFF) 2069 outofrange = val; 2070 2071 NEXT; 2072 count++; 2073 } 2074 if (RAW == ';') { 2075 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2076 ctxt->input->col++; 2077 ctxt->nbChars ++; 2078 ctxt->input->cur++; 2079 } 2080 } else { 2081 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2082 } 2083 2084 /* 2085 * [ WFC: Legal Character ] 2086 * Characters referred to using character references must match the 2087 * production for Char. 2088 */ 2089 if ((IS_CHAR(val) && (outofrange == 0))) { 2090 return(val); 2091 } else { 2092 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2093 "xmlParseCharRef: invalid xmlChar value %d\n", 2094 val); 2095 } 2096 return(0); 2097} 2098 2099/** 2100 * xmlParseStringCharRef: 2101 * @ctxt: an XML parser context 2102 * @str: a pointer to an index in the string 2103 * 2104 * parse Reference declarations, variant parsing from a string rather 2105 * than an an input flow. 2106 * 2107 * [66] CharRef ::= '&#' [0-9]+ ';' | 2108 * '&#x' [0-9a-fA-F]+ ';' 2109 * 2110 * [ WFC: Legal Character ] 2111 * Characters referred to using character references must match the 2112 * production for Char. 2113 * 2114 * Returns the value parsed (as an int), 0 in case of error, str will be 2115 * updated to the current value of the index 2116 */ 2117static int 2118xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { 2119 const xmlChar *ptr; 2120 xmlChar cur; 2121 unsigned int val = 0; 2122 unsigned int outofrange = 0; 2123 2124 if ((str == NULL) || (*str == NULL)) return(0); 2125 ptr = *str; 2126 cur = *ptr; 2127 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { 2128 ptr += 3; 2129 cur = *ptr; 2130 while (cur != ';') { /* Non input consuming loop */ 2131 if ((cur >= '0') && (cur <= '9')) 2132 val = val * 16 + (cur - '0'); 2133 else if ((cur >= 'a') && (cur <= 'f')) 2134 val = val * 16 + (cur - 'a') + 10; 2135 else if ((cur >= 'A') && (cur <= 'F')) 2136 val = val * 16 + (cur - 'A') + 10; 2137 else { 2138 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2139 val = 0; 2140 break; 2141 } 2142 if (val > 0x10FFFF) 2143 outofrange = val; 2144 2145 ptr++; 2146 cur = *ptr; 2147 } 2148 if (cur == ';') 2149 ptr++; 2150 } else if ((cur == '&') && (ptr[1] == '#')){ 2151 ptr += 2; 2152 cur = *ptr; 2153 while (cur != ';') { /* Non input consuming loops */ 2154 if ((cur >= '0') && (cur <= '9')) 2155 val = val * 10 + (cur - '0'); 2156 else { 2157 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2158 val = 0; 2159 break; 2160 } 2161 if (val > 0x10FFFF) 2162 outofrange = val; 2163 2164 ptr++; 2165 cur = *ptr; 2166 } 2167 if (cur == ';') 2168 ptr++; 2169 } else { 2170 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2171 return(0); 2172 } 2173 *str = ptr; 2174 2175 /* 2176 * [ WFC: Legal Character ] 2177 * Characters referred to using character references must match the 2178 * production for Char. 2179 */ 2180 if ((IS_CHAR(val) && (outofrange == 0))) { 2181 return(val); 2182 } else { 2183 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2184 "xmlParseStringCharRef: invalid xmlChar value %d\n", 2185 val); 2186 } 2187 return(0); 2188} 2189 2190/** 2191 * xmlNewBlanksWrapperInputStream: 2192 * @ctxt: an XML parser context 2193 * @entity: an Entity pointer 2194 * 2195 * Create a new input stream for wrapping 2196 * blanks around a PEReference 2197 * 2198 * Returns the new input stream or NULL 2199 */ 2200 2201static void deallocblankswrapper (xmlChar *str) {xmlFree(str);} 2202 2203static xmlParserInputPtr 2204xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 2205 xmlParserInputPtr input; 2206 xmlChar *buffer; 2207 size_t length; 2208 if (entity == NULL) { 2209 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 2210 "xmlNewBlanksWrapperInputStream entity\n"); 2211 return(NULL); 2212 } 2213 if (xmlParserDebugEntities) 2214 xmlGenericError(xmlGenericErrorContext, 2215 "new blanks wrapper for entity: %s\n", entity->name); 2216 input = xmlNewInputStream(ctxt); 2217 if (input == NULL) { 2218 return(NULL); 2219 } 2220 length = xmlStrlen(entity->name) + 5; 2221 buffer = xmlMallocAtomic(length); 2222 if (buffer == NULL) { 2223 xmlErrMemory(ctxt, NULL); 2224 xmlFree(input); 2225 return(NULL); 2226 } 2227 buffer [0] = ' '; 2228 buffer [1] = '%'; 2229 buffer [length-3] = ';'; 2230 buffer [length-2] = ' '; 2231 buffer [length-1] = 0; 2232 memcpy(buffer + 2, entity->name, length - 5); 2233 input->free = deallocblankswrapper; 2234 input->base = buffer; 2235 input->cur = buffer; 2236 input->length = length; 2237 input->end = &buffer[length]; 2238 return(input); 2239} 2240 2241/** 2242 * xmlParserHandlePEReference: 2243 * @ctxt: the parser context 2244 * 2245 * [69] PEReference ::= '%' Name ';' 2246 * 2247 * [ WFC: No Recursion ] 2248 * A parsed entity must not contain a recursive 2249 * reference to itself, either directly or indirectly. 2250 * 2251 * [ WFC: Entity Declared ] 2252 * In a document without any DTD, a document with only an internal DTD 2253 * subset which contains no parameter entity references, or a document 2254 * with "standalone='yes'", ... ... The declaration of a parameter 2255 * entity must precede any reference to it... 2256 * 2257 * [ VC: Entity Declared ] 2258 * In a document with an external subset or external parameter entities 2259 * with "standalone='no'", ... ... The declaration of a parameter entity 2260 * must precede any reference to it... 2261 * 2262 * [ WFC: In DTD ] 2263 * Parameter-entity references may only appear in the DTD. 2264 * NOTE: misleading but this is handled. 2265 * 2266 * A PEReference may have been detected in the current input stream 2267 * the handling is done accordingly to 2268 * http://www.w3.org/TR/REC-xml#entproc 2269 * i.e. 2270 * - Included in literal in entity values 2271 * - Included as Parameter Entity reference within DTDs 2272 */ 2273void 2274xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { 2275 const xmlChar *name; 2276 xmlEntityPtr entity = NULL; 2277 xmlParserInputPtr input; 2278 2279 if (RAW != '%') return; 2280 switch(ctxt->instate) { 2281 case XML_PARSER_CDATA_SECTION: 2282 return; 2283 case XML_PARSER_COMMENT: 2284 return; 2285 case XML_PARSER_START_TAG: 2286 return; 2287 case XML_PARSER_END_TAG: 2288 return; 2289 case XML_PARSER_EOF: 2290 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL); 2291 return; 2292 case XML_PARSER_PROLOG: 2293 case XML_PARSER_START: 2294 case XML_PARSER_MISC: 2295 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL); 2296 return; 2297 case XML_PARSER_ENTITY_DECL: 2298 case XML_PARSER_CONTENT: 2299 case XML_PARSER_ATTRIBUTE_VALUE: 2300 case XML_PARSER_PI: 2301 case XML_PARSER_SYSTEM_LITERAL: 2302 case XML_PARSER_PUBLIC_LITERAL: 2303 /* we just ignore it there */ 2304 return; 2305 case XML_PARSER_EPILOG: 2306 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL); 2307 return; 2308 case XML_PARSER_ENTITY_VALUE: 2309 /* 2310 * NOTE: in the case of entity values, we don't do the 2311 * substitution here since we need the literal 2312 * entity value to be able to save the internal 2313 * subset of the document. 2314 * This will be handled by xmlStringDecodeEntities 2315 */ 2316 return; 2317 case XML_PARSER_DTD: 2318 /* 2319 * [WFC: Well-Formedness Constraint: PEs in Internal Subset] 2320 * In the internal DTD subset, parameter-entity references 2321 * can occur only where markup declarations can occur, not 2322 * within markup declarations. 2323 * In that case this is handled in xmlParseMarkupDecl 2324 */ 2325 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 2326 return; 2327 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0) 2328 return; 2329 break; 2330 case XML_PARSER_IGNORE: 2331 return; 2332 } 2333 2334 NEXT; 2335 name = xmlParseName(ctxt); 2336 if (xmlParserDebugEntities) 2337 xmlGenericError(xmlGenericErrorContext, 2338 "PEReference: %s\n", name); 2339 if (name == NULL) { 2340 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL); 2341 } else { 2342 if (RAW == ';') { 2343 NEXT; 2344 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) 2345 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 2346 if (entity == NULL) { 2347 2348 /* 2349 * [ WFC: Entity Declared ] 2350 * In a document without any DTD, a document with only an 2351 * internal DTD subset which contains no parameter entity 2352 * references, or a document with "standalone='yes'", ... 2353 * ... The declaration of a parameter entity must precede 2354 * any reference to it... 2355 */ 2356 if ((ctxt->standalone == 1) || 2357 ((ctxt->hasExternalSubset == 0) && 2358 (ctxt->hasPErefs == 0))) { 2359 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 2360 "PEReference: %%%s; not found\n", name); 2361 } else { 2362 /* 2363 * [ VC: Entity Declared ] 2364 * In a document with an external subset or external 2365 * parameter entities with "standalone='no'", ... 2366 * ... The declaration of a parameter entity must precede 2367 * any reference to it... 2368 */ 2369 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) { 2370 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY, 2371 "PEReference: %%%s; not found\n", 2372 name, NULL); 2373 } else 2374 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 2375 "PEReference: %%%s; not found\n", 2376 name, NULL); 2377 ctxt->valid = 0; 2378 } 2379 } else if (ctxt->input->free != deallocblankswrapper) { 2380 input = xmlNewBlanksWrapperInputStream(ctxt, entity); 2381 if (xmlPushInput(ctxt, input) < 0) 2382 return; 2383 } else { 2384 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || 2385 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) { 2386 xmlChar start[4]; 2387 xmlCharEncoding enc; 2388 2389 /* 2390 * handle the extra spaces added before and after 2391 * c.f. http://www.w3.org/TR/REC-xml#as-PE 2392 * this is done independently. 2393 */ 2394 input = xmlNewEntityInputStream(ctxt, entity); 2395 if (xmlPushInput(ctxt, input) < 0) 2396 return; 2397 2398 /* 2399 * Get the 4 first bytes and decode the charset 2400 * if enc != XML_CHAR_ENCODING_NONE 2401 * plug some encoding conversion routines. 2402 * Note that, since we may have some non-UTF8 2403 * encoding (like UTF16, bug 135229), the 'length' 2404 * is not known, but we can calculate based upon 2405 * the amount of data in the buffer. 2406 */ 2407 GROW 2408 if ((ctxt->input->end - ctxt->input->cur)>=4) { 2409 start[0] = RAW; 2410 start[1] = NXT(1); 2411 start[2] = NXT(2); 2412 start[3] = NXT(3); 2413 enc = xmlDetectCharEncoding(start, 4); 2414 if (enc != XML_CHAR_ENCODING_NONE) { 2415 xmlSwitchEncoding(ctxt, enc); 2416 } 2417 } 2418 2419 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 2420 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) && 2421 (IS_BLANK_CH(NXT(5)))) { 2422 xmlParseTextDecl(ctxt); 2423 } 2424 } else { 2425 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 2426 "PEReference: %s is not a parameter entity\n", 2427 name); 2428 } 2429 } 2430 } else { 2431 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL); 2432 } 2433 } 2434} 2435 2436/* 2437 * Macro used to grow the current buffer. 2438 */ 2439#define growBuffer(buffer, n) { \ 2440 xmlChar *tmp; \ 2441 buffer##_size *= 2; \ 2442 buffer##_size += n; \ 2443 tmp = (xmlChar *) \ 2444 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \ 2445 if (tmp == NULL) goto mem_error; \ 2446 buffer = tmp; \ 2447} 2448 2449/** 2450 * xmlStringLenDecodeEntities: 2451 * @ctxt: the parser context 2452 * @str: the input string 2453 * @len: the string length 2454 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2455 * @end: an end marker xmlChar, 0 if none 2456 * @end2: an end marker xmlChar, 0 if none 2457 * @end3: an end marker xmlChar, 0 if none 2458 * 2459 * Takes a entity string content and process to do the adequate substitutions. 2460 * 2461 * [67] Reference ::= EntityRef | CharRef 2462 * 2463 * [69] PEReference ::= '%' Name ';' 2464 * 2465 * Returns A newly allocated string with the substitution done. The caller 2466 * must deallocate it ! 2467 */ 2468xmlChar * 2469xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2470 int what, xmlChar end, xmlChar end2, xmlChar end3) { 2471 xmlChar *buffer = NULL; 2472 int buffer_size = 0; 2473 2474 xmlChar *current = NULL; 2475 xmlChar *rep = NULL; 2476 const xmlChar *last; 2477 xmlEntityPtr ent; 2478 int c,l; 2479 int nbchars = 0; 2480 2481 if ((ctxt == NULL) || (str == NULL) || (len < 0)) 2482 return(NULL); 2483 last = str + len; 2484 2485 if (((ctxt->depth > 40) && 2486 ((ctxt->options & XML_PARSE_HUGE) == 0)) || 2487 (ctxt->depth > 1024)) { 2488 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 2489 return(NULL); 2490 } 2491 2492 /* 2493 * allocate a translation buffer. 2494 */ 2495 buffer_size = XML_PARSER_BIG_BUFFER_SIZE; 2496 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar)); 2497 if (buffer == NULL) goto mem_error; 2498 2499 /* 2500 * OK loop until we reach one of the ending char or a size limit. 2501 * we are operating on already parsed values. 2502 */ 2503 if (str < last) 2504 c = CUR_SCHAR(str, l); 2505 else 2506 c = 0; 2507 while ((c != 0) && (c != end) && /* non input consuming loop */ 2508 (c != end2) && (c != end3)) { 2509 2510 if (c == 0) break; 2511 if ((c == '&') && (str[1] == '#')) { 2512 int val = xmlParseStringCharRef(ctxt, &str); 2513 if (val != 0) { 2514 COPY_BUF(0,buffer,nbchars,val); 2515 } 2516 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { 2517 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2518 } 2519 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { 2520 if (xmlParserDebugEntities) 2521 xmlGenericError(xmlGenericErrorContext, 2522 "String decoding Entity Reference: %.30s\n", 2523 str); 2524 ent = xmlParseStringEntityRef(ctxt, &str); 2525 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) || 2526 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR)) 2527 goto int_error; 2528 if (ent != NULL) 2529 ctxt->nbentities += ent->checked; 2530 if ((ent != NULL) && 2531 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 2532 if (ent->content != NULL) { 2533 COPY_BUF(0,buffer,nbchars,ent->content[0]); 2534 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { 2535 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2536 } 2537 } else { 2538 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 2539 "predefined entity has no content\n"); 2540 } 2541 } else if ((ent != NULL) && (ent->content != NULL)) { 2542 ctxt->depth++; 2543 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2544 0, 0, 0); 2545 ctxt->depth--; 2546 2547 if (rep != NULL) { 2548 current = rep; 2549 while (*current != 0) { /* non input consuming loop */ 2550 buffer[nbchars++] = *current++; 2551 if (nbchars > 2552 buffer_size - XML_PARSER_BUFFER_SIZE) { 2553 if (xmlParserEntityCheck(ctxt, nbchars, ent)) 2554 goto int_error; 2555 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2556 } 2557 } 2558 xmlFree(rep); 2559 rep = NULL; 2560 } 2561 } else if (ent != NULL) { 2562 int i = xmlStrlen(ent->name); 2563 const xmlChar *cur = ent->name; 2564 2565 buffer[nbchars++] = '&'; 2566 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) { 2567 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2568 } 2569 for (;i > 0;i--) 2570 buffer[nbchars++] = *cur++; 2571 buffer[nbchars++] = ';'; 2572 } 2573 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { 2574 if (xmlParserDebugEntities) 2575 xmlGenericError(xmlGenericErrorContext, 2576 "String decoding PE Reference: %.30s\n", str); 2577 ent = xmlParseStringPEReference(ctxt, &str); 2578 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) 2579 goto int_error; 2580 if (ent != NULL) 2581 ctxt->nbentities += ent->checked; 2582 if (ent != NULL) { 2583 if (ent->content == NULL) { 2584 xmlLoadEntityContent(ctxt, ent); 2585 } 2586 ctxt->depth++; 2587 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2588 0, 0, 0); 2589 ctxt->depth--; 2590 if (rep != NULL) { 2591 current = rep; 2592 while (*current != 0) { /* non input consuming loop */ 2593 buffer[nbchars++] = *current++; 2594 if (nbchars > 2595 buffer_size - XML_PARSER_BUFFER_SIZE) { 2596 if (xmlParserEntityCheck(ctxt, nbchars, ent)) 2597 goto int_error; 2598 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2599 } 2600 } 2601 xmlFree(rep); 2602 rep = NULL; 2603 } 2604 } 2605 } else { 2606 COPY_BUF(l,buffer,nbchars,c); 2607 str += l; 2608 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { 2609 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2610 } 2611 } 2612 if (str < last) 2613 c = CUR_SCHAR(str, l); 2614 else 2615 c = 0; 2616 } 2617 buffer[nbchars++] = 0; 2618 return(buffer); 2619 2620mem_error: 2621 xmlErrMemory(ctxt, NULL); 2622int_error: 2623 if (rep != NULL) 2624 xmlFree(rep); 2625 if (buffer != NULL) 2626 xmlFree(buffer); 2627 return(NULL); 2628} 2629 2630/** 2631 * xmlStringDecodeEntities: 2632 * @ctxt: the parser context 2633 * @str: the input string 2634 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2635 * @end: an end marker xmlChar, 0 if none 2636 * @end2: an end marker xmlChar, 0 if none 2637 * @end3: an end marker xmlChar, 0 if none 2638 * 2639 * Takes a entity string content and process to do the adequate substitutions. 2640 * 2641 * [67] Reference ::= EntityRef | CharRef 2642 * 2643 * [69] PEReference ::= '%' Name ';' 2644 * 2645 * Returns A newly allocated string with the substitution done. The caller 2646 * must deallocate it ! 2647 */ 2648xmlChar * 2649xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, 2650 xmlChar end, xmlChar end2, xmlChar end3) { 2651 if ((ctxt == NULL) || (str == NULL)) return(NULL); 2652 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what, 2653 end, end2, end3)); 2654} 2655 2656/************************************************************************ 2657 * * 2658 * Commodity functions, cleanup needed ? * 2659 * * 2660 ************************************************************************/ 2661 2662/** 2663 * areBlanks: 2664 * @ctxt: an XML parser context 2665 * @str: a xmlChar * 2666 * @len: the size of @str 2667 * @blank_chars: we know the chars are blanks 2668 * 2669 * Is this a sequence of blank chars that one can ignore ? 2670 * 2671 * Returns 1 if ignorable 0 otherwise. 2672 */ 2673 2674static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2675 int blank_chars) { 2676 int i, ret; 2677 xmlNodePtr lastChild; 2678 2679 /* 2680 * Don't spend time trying to differentiate them, the same callback is 2681 * used ! 2682 */ 2683 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters) 2684 return(0); 2685 2686 /* 2687 * Check for xml:space value. 2688 */ 2689 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) || 2690 (*(ctxt->space) == -2)) 2691 return(0); 2692 2693 /* 2694 * Check that the string is made of blanks 2695 */ 2696 if (blank_chars == 0) { 2697 for (i = 0;i < len;i++) 2698 if (!(IS_BLANK_CH(str[i]))) return(0); 2699 } 2700 2701 /* 2702 * Look if the element is mixed content in the DTD if available 2703 */ 2704 if (ctxt->node == NULL) return(0); 2705 if (ctxt->myDoc != NULL) { 2706 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); 2707 if (ret == 0) return(1); 2708 if (ret == 1) return(0); 2709 } 2710 2711 /* 2712 * Otherwise, heuristic :-\ 2713 */ 2714 if ((RAW != '<') && (RAW != 0xD)) return(0); 2715 if ((ctxt->node->children == NULL) && 2716 (RAW == '<') && (NXT(1) == '/')) return(0); 2717 2718 lastChild = xmlGetLastChild(ctxt->node); 2719 if (lastChild == NULL) { 2720 if ((ctxt->node->type != XML_ELEMENT_NODE) && 2721 (ctxt->node->content != NULL)) return(0); 2722 } else if (xmlNodeIsText(lastChild)) 2723 return(0); 2724 else if ((ctxt->node->children != NULL) && 2725 (xmlNodeIsText(ctxt->node->children))) 2726 return(0); 2727 return(1); 2728} 2729 2730/************************************************************************ 2731 * * 2732 * Extra stuff for namespace support * 2733 * Relates to http://www.w3.org/TR/WD-xml-names * 2734 * * 2735 ************************************************************************/ 2736 2737/** 2738 * xmlSplitQName: 2739 * @ctxt: an XML parser context 2740 * @name: an XML parser context 2741 * @prefix: a xmlChar ** 2742 * 2743 * parse an UTF8 encoded XML qualified name string 2744 * 2745 * [NS 5] QName ::= (Prefix ':')? LocalPart 2746 * 2747 * [NS 6] Prefix ::= NCName 2748 * 2749 * [NS 7] LocalPart ::= NCName 2750 * 2751 * Returns the local part, and prefix is updated 2752 * to get the Prefix if any. 2753 */ 2754 2755xmlChar * 2756xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { 2757 xmlChar buf[XML_MAX_NAMELEN + 5]; 2758 xmlChar *buffer = NULL; 2759 int len = 0; 2760 int max = XML_MAX_NAMELEN; 2761 xmlChar *ret = NULL; 2762 const xmlChar *cur = name; 2763 int c; 2764 2765 if (prefix == NULL) return(NULL); 2766 *prefix = NULL; 2767 2768 if (cur == NULL) return(NULL); 2769 2770#ifndef XML_XML_NAMESPACE 2771 /* xml: prefix is not really a namespace */ 2772 if ((cur[0] == 'x') && (cur[1] == 'm') && 2773 (cur[2] == 'l') && (cur[3] == ':')) 2774 return(xmlStrdup(name)); 2775#endif 2776 2777 /* nasty but well=formed */ 2778 if (cur[0] == ':') 2779 return(xmlStrdup(name)); 2780 2781 c = *cur++; 2782 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ 2783 buf[len++] = c; 2784 c = *cur++; 2785 } 2786 if (len >= max) { 2787 /* 2788 * Okay someone managed to make a huge name, so he's ready to pay 2789 * for the processing speed. 2790 */ 2791 max = len * 2; 2792 2793 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 2794 if (buffer == NULL) { 2795 xmlErrMemory(ctxt, NULL); 2796 return(NULL); 2797 } 2798 memcpy(buffer, buf, len); 2799 while ((c != 0) && (c != ':')) { /* tested bigname.xml */ 2800 if (len + 10 > max) { 2801 xmlChar *tmp; 2802 2803 max *= 2; 2804 tmp = (xmlChar *) xmlRealloc(buffer, 2805 max * sizeof(xmlChar)); 2806 if (tmp == NULL) { 2807 xmlFree(buffer); 2808 xmlErrMemory(ctxt, NULL); 2809 return(NULL); 2810 } 2811 buffer = tmp; 2812 } 2813 buffer[len++] = c; 2814 c = *cur++; 2815 } 2816 buffer[len] = 0; 2817 } 2818 2819 if ((c == ':') && (*cur == 0)) { 2820 if (buffer != NULL) 2821 xmlFree(buffer); 2822 *prefix = NULL; 2823 return(xmlStrdup(name)); 2824 } 2825 2826 if (buffer == NULL) 2827 ret = xmlStrndup(buf, len); 2828 else { 2829 ret = buffer; 2830 buffer = NULL; 2831 max = XML_MAX_NAMELEN; 2832 } 2833 2834 2835 if (c == ':') { 2836 c = *cur; 2837 *prefix = ret; 2838 if (c == 0) { 2839 return(xmlStrndup(BAD_CAST "", 0)); 2840 } 2841 len = 0; 2842 2843 /* 2844 * Check that the first character is proper to start 2845 * a new name 2846 */ 2847 if (!(((c >= 0x61) && (c <= 0x7A)) || 2848 ((c >= 0x41) && (c <= 0x5A)) || 2849 (c == '_') || (c == ':'))) { 2850 int l; 2851 int first = CUR_SCHAR(cur, l); 2852 2853 if (!IS_LETTER(first) && (first != '_')) { 2854 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME, 2855 "Name %s is not XML Namespace compliant\n", 2856 name); 2857 } 2858 } 2859 cur++; 2860 2861 while ((c != 0) && (len < max)) { /* tested bigname2.xml */ 2862 buf[len++] = c; 2863 c = *cur++; 2864 } 2865 if (len >= max) { 2866 /* 2867 * Okay someone managed to make a huge name, so he's ready to pay 2868 * for the processing speed. 2869 */ 2870 max = len * 2; 2871 2872 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 2873 if (buffer == NULL) { 2874 xmlErrMemory(ctxt, NULL); 2875 return(NULL); 2876 } 2877 memcpy(buffer, buf, len); 2878 while (c != 0) { /* tested bigname2.xml */ 2879 if (len + 10 > max) { 2880 xmlChar *tmp; 2881 2882 max *= 2; 2883 tmp = (xmlChar *) xmlRealloc(buffer, 2884 max * sizeof(xmlChar)); 2885 if (tmp == NULL) { 2886 xmlErrMemory(ctxt, NULL); 2887 xmlFree(buffer); 2888 return(NULL); 2889 } 2890 buffer = tmp; 2891 } 2892 buffer[len++] = c; 2893 c = *cur++; 2894 } 2895 buffer[len] = 0; 2896 } 2897 2898 if (buffer == NULL) 2899 ret = xmlStrndup(buf, len); 2900 else { 2901 ret = buffer; 2902 } 2903 } 2904 2905 return(ret); 2906} 2907 2908/************************************************************************ 2909 * * 2910 * The parser itself * 2911 * Relates to http://www.w3.org/TR/REC-xml * 2912 * * 2913 ************************************************************************/ 2914 2915/************************************************************************ 2916 * * 2917 * Routines to parse Name, NCName and NmToken * 2918 * * 2919 ************************************************************************/ 2920unsigned long nbParseName = 0; 2921unsigned long nbParseNmToken = 0; 2922unsigned long nbParseNCName = 0; 2923unsigned long nbParseNCNameComplex = 0; 2924unsigned long nbParseNameComplex = 0; 2925unsigned long nbParseStringName = 0; 2926/* 2927 * The two following functions are related to the change of accepted 2928 * characters for Name and NmToken in the Revision 5 of XML-1.0 2929 * They correspond to the modified production [4] and the new production [4a] 2930 * changes in that revision. Also note that the macros used for the 2931 * productions Letter, Digit, CombiningChar and Extender are not needed 2932 * anymore. 2933 * We still keep compatibility to pre-revision5 parsing semantic if the 2934 * new XML_PARSE_OLD10 option is given to the parser. 2935 */ 2936static int 2937xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) { 2938 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 2939 /* 2940 * Use the new checks of production [4] [4a] amd [5] of the 2941 * Update 5 of XML-1.0 2942 */ 2943 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 2944 (((c >= 'a') && (c <= 'z')) || 2945 ((c >= 'A') && (c <= 'Z')) || 2946 (c == '_') || (c == ':') || 2947 ((c >= 0xC0) && (c <= 0xD6)) || 2948 ((c >= 0xD8) && (c <= 0xF6)) || 2949 ((c >= 0xF8) && (c <= 0x2FF)) || 2950 ((c >= 0x370) && (c <= 0x37D)) || 2951 ((c >= 0x37F) && (c <= 0x1FFF)) || 2952 ((c >= 0x200C) && (c <= 0x200D)) || 2953 ((c >= 0x2070) && (c <= 0x218F)) || 2954 ((c >= 0x2C00) && (c <= 0x2FEF)) || 2955 ((c >= 0x3001) && (c <= 0xD7FF)) || 2956 ((c >= 0xF900) && (c <= 0xFDCF)) || 2957 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 2958 ((c >= 0x10000) && (c <= 0xEFFFF)))) 2959 return(1); 2960 } else { 2961 if (IS_LETTER(c) || (c == '_') || (c == ':')) 2962 return(1); 2963 } 2964 return(0); 2965} 2966 2967static int 2968xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) { 2969 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 2970 /* 2971 * Use the new checks of production [4] [4a] amd [5] of the 2972 * Update 5 of XML-1.0 2973 */ 2974 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 2975 (((c >= 'a') && (c <= 'z')) || 2976 ((c >= 'A') && (c <= 'Z')) || 2977 ((c >= '0') && (c <= '9')) || /* !start */ 2978 (c == '_') || (c == ':') || 2979 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 2980 ((c >= 0xC0) && (c <= 0xD6)) || 2981 ((c >= 0xD8) && (c <= 0xF6)) || 2982 ((c >= 0xF8) && (c <= 0x2FF)) || 2983 ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 2984 ((c >= 0x370) && (c <= 0x37D)) || 2985 ((c >= 0x37F) && (c <= 0x1FFF)) || 2986 ((c >= 0x200C) && (c <= 0x200D)) || 2987 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 2988 ((c >= 0x2070) && (c <= 0x218F)) || 2989 ((c >= 0x2C00) && (c <= 0x2FEF)) || 2990 ((c >= 0x3001) && (c <= 0xD7FF)) || 2991 ((c >= 0xF900) && (c <= 0xFDCF)) || 2992 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 2993 ((c >= 0x10000) && (c <= 0xEFFFF)))) 2994 return(1); 2995 } else { 2996 if ((IS_LETTER(c)) || (IS_DIGIT(c)) || 2997 (c == '.') || (c == '-') || 2998 (c == '_') || (c == ':') || 2999 (IS_COMBINING(c)) || 3000 (IS_EXTENDER(c))) 3001 return(1); 3002 } 3003 return(0); 3004} 3005 3006static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, 3007 int *len, int *alloc, int normalize); 3008 3009static const xmlChar * 3010xmlParseNameComplex(xmlParserCtxtPtr ctxt) { 3011 int len = 0, l; 3012 int c; 3013 int count = 0; 3014 3015 nbParseNameComplex++; 3016 3017 /* 3018 * Handler for more complex cases 3019 */ 3020 GROW; 3021 c = CUR_CHAR(l); 3022 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3023 /* 3024 * Use the new checks of production [4] [4a] amd [5] of the 3025 * Update 5 of XML-1.0 3026 */ 3027 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3028 (!(((c >= 'a') && (c <= 'z')) || 3029 ((c >= 'A') && (c <= 'Z')) || 3030 (c == '_') || (c == ':') || 3031 ((c >= 0xC0) && (c <= 0xD6)) || 3032 ((c >= 0xD8) && (c <= 0xF6)) || 3033 ((c >= 0xF8) && (c <= 0x2FF)) || 3034 ((c >= 0x370) && (c <= 0x37D)) || 3035 ((c >= 0x37F) && (c <= 0x1FFF)) || 3036 ((c >= 0x200C) && (c <= 0x200D)) || 3037 ((c >= 0x2070) && (c <= 0x218F)) || 3038 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3039 ((c >= 0x3001) && (c <= 0xD7FF)) || 3040 ((c >= 0xF900) && (c <= 0xFDCF)) || 3041 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3042 ((c >= 0x10000) && (c <= 0xEFFFF))))) { 3043 return(NULL); 3044 } 3045 len += l; 3046 NEXTL(l); 3047 c = CUR_CHAR(l); 3048 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3049 (((c >= 'a') && (c <= 'z')) || 3050 ((c >= 'A') && (c <= 'Z')) || 3051 ((c >= '0') && (c <= '9')) || /* !start */ 3052 (c == '_') || (c == ':') || 3053 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3054 ((c >= 0xC0) && (c <= 0xD6)) || 3055 ((c >= 0xD8) && (c <= 0xF6)) || 3056 ((c >= 0xF8) && (c <= 0x2FF)) || 3057 ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3058 ((c >= 0x370) && (c <= 0x37D)) || 3059 ((c >= 0x37F) && (c <= 0x1FFF)) || 3060 ((c >= 0x200C) && (c <= 0x200D)) || 3061 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3062 ((c >= 0x2070) && (c <= 0x218F)) || 3063 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3064 ((c >= 0x3001) && (c <= 0xD7FF)) || 3065 ((c >= 0xF900) && (c <= 0xFDCF)) || 3066 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3067 ((c >= 0x10000) && (c <= 0xEFFFF)) 3068 )) { 3069 if (count++ > 100) { 3070 count = 0; 3071 GROW; 3072 } 3073 len += l; 3074 NEXTL(l); 3075 c = CUR_CHAR(l); 3076 } 3077 } else { 3078 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3079 (!IS_LETTER(c) && (c != '_') && 3080 (c != ':'))) { 3081 return(NULL); 3082 } 3083 len += l; 3084 NEXTL(l); 3085 c = CUR_CHAR(l); 3086 3087 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3088 ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3089 (c == '.') || (c == '-') || 3090 (c == '_') || (c == ':') || 3091 (IS_COMBINING(c)) || 3092 (IS_EXTENDER(c)))) { 3093 if (count++ > 100) { 3094 count = 0; 3095 GROW; 3096 } 3097 len += l; 3098 NEXTL(l); 3099 c = CUR_CHAR(l); 3100 } 3101 } 3102 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r')) 3103 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len)); 3104 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 3105} 3106 3107/** 3108 * xmlParseName: 3109 * @ctxt: an XML parser context 3110 * 3111 * parse an XML name. 3112 * 3113 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3114 * CombiningChar | Extender 3115 * 3116 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3117 * 3118 * [6] Names ::= Name (#x20 Name)* 3119 * 3120 * Returns the Name parsed or NULL 3121 */ 3122 3123const xmlChar * 3124xmlParseName(xmlParserCtxtPtr ctxt) { 3125 const xmlChar *in; 3126 const xmlChar *ret; 3127 int count = 0; 3128 3129 GROW; 3130 3131 nbParseName++; 3132 3133 /* 3134 * Accelerator for simple ASCII names 3135 */ 3136 in = ctxt->input->cur; 3137 if (((*in >= 0x61) && (*in <= 0x7A)) || 3138 ((*in >= 0x41) && (*in <= 0x5A)) || 3139 (*in == '_') || (*in == ':')) { 3140 in++; 3141 while (((*in >= 0x61) && (*in <= 0x7A)) || 3142 ((*in >= 0x41) && (*in <= 0x5A)) || 3143 ((*in >= 0x30) && (*in <= 0x39)) || 3144 (*in == '_') || (*in == '-') || 3145 (*in == ':') || (*in == '.')) 3146 in++; 3147 if ((*in > 0) && (*in < 0x80)) { 3148 count = in - ctxt->input->cur; 3149 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3150 ctxt->input->cur = in; 3151 ctxt->nbChars += count; 3152 ctxt->input->col += count; 3153 if (ret == NULL) 3154 xmlErrMemory(ctxt, NULL); 3155 return(ret); 3156 } 3157 } 3158 /* accelerator for special cases */ 3159 return(xmlParseNameComplex(ctxt)); 3160} 3161 3162static const xmlChar * 3163xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { 3164 int len = 0, l; 3165 int c; 3166 int count = 0; 3167 3168 nbParseNCNameComplex++; 3169 3170 /* 3171 * Handler for more complex cases 3172 */ 3173 GROW; 3174 c = CUR_CHAR(l); 3175 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3176 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) { 3177 return(NULL); 3178 } 3179 3180 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3181 (xmlIsNameChar(ctxt, c) && (c != ':'))) { 3182 if (count++ > 100) { 3183 count = 0; 3184 GROW; 3185 } 3186 len += l; 3187 NEXTL(l); 3188 c = CUR_CHAR(l); 3189 } 3190 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 3191} 3192 3193/** 3194 * xmlParseNCName: 3195 * @ctxt: an XML parser context 3196 * @len: lenght of the string parsed 3197 * 3198 * parse an XML name. 3199 * 3200 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | 3201 * CombiningChar | Extender 3202 * 3203 * [5NS] NCName ::= (Letter | '_') (NCNameChar)* 3204 * 3205 * Returns the Name parsed or NULL 3206 */ 3207 3208static const xmlChar * 3209xmlParseNCName(xmlParserCtxtPtr ctxt) { 3210 const xmlChar *in; 3211 const xmlChar *ret; 3212 int count = 0; 3213 3214 nbParseNCName++; 3215 3216 /* 3217 * Accelerator for simple ASCII names 3218 */ 3219 in = ctxt->input->cur; 3220 if (((*in >= 0x61) && (*in <= 0x7A)) || 3221 ((*in >= 0x41) && (*in <= 0x5A)) || 3222 (*in == '_')) { 3223 in++; 3224 while (((*in >= 0x61) && (*in <= 0x7A)) || 3225 ((*in >= 0x41) && (*in <= 0x5A)) || 3226 ((*in >= 0x30) && (*in <= 0x39)) || 3227 (*in == '_') || (*in == '-') || 3228 (*in == '.')) 3229 in++; 3230 if ((*in > 0) && (*in < 0x80)) { 3231 count = in - ctxt->input->cur; 3232 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3233 ctxt->input->cur = in; 3234 ctxt->nbChars += count; 3235 ctxt->input->col += count; 3236 if (ret == NULL) { 3237 xmlErrMemory(ctxt, NULL); 3238 } 3239 return(ret); 3240 } 3241 } 3242 return(xmlParseNCNameComplex(ctxt)); 3243} 3244 3245/** 3246 * xmlParseNameAndCompare: 3247 * @ctxt: an XML parser context 3248 * 3249 * parse an XML name and compares for match 3250 * (specialized for endtag parsing) 3251 * 3252 * Returns NULL for an illegal name, (xmlChar*) 1 for success 3253 * and the name for mismatch 3254 */ 3255 3256static const xmlChar * 3257xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { 3258 register const xmlChar *cmp = other; 3259 register const xmlChar *in; 3260 const xmlChar *ret; 3261 3262 GROW; 3263 3264 in = ctxt->input->cur; 3265 while (*in != 0 && *in == *cmp) { 3266 ++in; 3267 ++cmp; 3268 ctxt->input->col++; 3269 } 3270 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 3271 /* success */ 3272 ctxt->input->cur = in; 3273 return (const xmlChar*) 1; 3274 } 3275 /* failure (or end of input buffer), check with full function */ 3276 ret = xmlParseName (ctxt); 3277 /* strings coming from the dictionnary direct compare possible */ 3278 if (ret == other) { 3279 return (const xmlChar*) 1; 3280 } 3281 return ret; 3282} 3283 3284/** 3285 * xmlParseStringName: 3286 * @ctxt: an XML parser context 3287 * @str: a pointer to the string pointer (IN/OUT) 3288 * 3289 * parse an XML name. 3290 * 3291 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3292 * CombiningChar | Extender 3293 * 3294 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3295 * 3296 * [6] Names ::= Name (#x20 Name)* 3297 * 3298 * Returns the Name parsed or NULL. The @str pointer 3299 * is updated to the current location in the string. 3300 */ 3301 3302static xmlChar * 3303xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { 3304 xmlChar buf[XML_MAX_NAMELEN + 5]; 3305 const xmlChar *cur = *str; 3306 int len = 0, l; 3307 int c; 3308 3309 nbParseStringName++; 3310 3311 c = CUR_SCHAR(cur, l); 3312 if (!xmlIsNameStartChar(ctxt, c)) { 3313 return(NULL); 3314 } 3315 3316 COPY_BUF(l,buf,len,c); 3317 cur += l; 3318 c = CUR_SCHAR(cur, l); 3319 while (xmlIsNameChar(ctxt, c)) { 3320 COPY_BUF(l,buf,len,c); 3321 cur += l; 3322 c = CUR_SCHAR(cur, l); 3323 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ 3324 /* 3325 * Okay someone managed to make a huge name, so he's ready to pay 3326 * for the processing speed. 3327 */ 3328 xmlChar *buffer; 3329 int max = len * 2; 3330 3331 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3332 if (buffer == NULL) { 3333 xmlErrMemory(ctxt, NULL); 3334 return(NULL); 3335 } 3336 memcpy(buffer, buf, len); 3337 while (xmlIsNameChar(ctxt, c)) { 3338 if (len + 10 > max) { 3339 xmlChar *tmp; 3340 max *= 2; 3341 tmp = (xmlChar *) xmlRealloc(buffer, 3342 max * sizeof(xmlChar)); 3343 if (tmp == NULL) { 3344 xmlErrMemory(ctxt, NULL); 3345 xmlFree(buffer); 3346 return(NULL); 3347 } 3348 buffer = tmp; 3349 } 3350 COPY_BUF(l,buffer,len,c); 3351 cur += l; 3352 c = CUR_SCHAR(cur, l); 3353 } 3354 buffer[len] = 0; 3355 *str = cur; 3356 return(buffer); 3357 } 3358 } 3359 *str = cur; 3360 return(xmlStrndup(buf, len)); 3361} 3362 3363/** 3364 * xmlParseNmtoken: 3365 * @ctxt: an XML parser context 3366 * 3367 * parse an XML Nmtoken. 3368 * 3369 * [7] Nmtoken ::= (NameChar)+ 3370 * 3371 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)* 3372 * 3373 * Returns the Nmtoken parsed or NULL 3374 */ 3375 3376xmlChar * 3377xmlParseNmtoken(xmlParserCtxtPtr ctxt) { 3378 xmlChar buf[XML_MAX_NAMELEN + 5]; 3379 int len = 0, l; 3380 int c; 3381 int count = 0; 3382 3383 nbParseNmToken++; 3384 3385 GROW; 3386 c = CUR_CHAR(l); 3387 3388 while (xmlIsNameChar(ctxt, c)) { 3389 if (count++ > 100) { 3390 count = 0; 3391 GROW; 3392 } 3393 COPY_BUF(l,buf,len,c); 3394 NEXTL(l); 3395 c = CUR_CHAR(l); 3396 if (len >= XML_MAX_NAMELEN) { 3397 /* 3398 * Okay someone managed to make a huge token, so he's ready to pay 3399 * for the processing speed. 3400 */ 3401 xmlChar *buffer; 3402 int max = len * 2; 3403 3404 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3405 if (buffer == NULL) { 3406 xmlErrMemory(ctxt, NULL); 3407 return(NULL); 3408 } 3409 memcpy(buffer, buf, len); 3410 while (xmlIsNameChar(ctxt, c)) { 3411 if (count++ > 100) { 3412 count = 0; 3413 GROW; 3414 } 3415 if (len + 10 > max) { 3416 xmlChar *tmp; 3417 3418 max *= 2; 3419 tmp = (xmlChar *) xmlRealloc(buffer, 3420 max * sizeof(xmlChar)); 3421 if (tmp == NULL) { 3422 xmlErrMemory(ctxt, NULL); 3423 xmlFree(buffer); 3424 return(NULL); 3425 } 3426 buffer = tmp; 3427 } 3428 COPY_BUF(l,buffer,len,c); 3429 NEXTL(l); 3430 c = CUR_CHAR(l); 3431 } 3432 buffer[len] = 0; 3433 return(buffer); 3434 } 3435 } 3436 if (len == 0) 3437 return(NULL); 3438 return(xmlStrndup(buf, len)); 3439} 3440 3441/** 3442 * xmlParseEntityValue: 3443 * @ctxt: an XML parser context 3444 * @orig: if non-NULL store a copy of the original entity value 3445 * 3446 * parse a value for ENTITY declarations 3447 * 3448 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | 3449 * "'" ([^%&'] | PEReference | Reference)* "'" 3450 * 3451 * Returns the EntityValue parsed with reference substituted or NULL 3452 */ 3453 3454xmlChar * 3455xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { 3456 xmlChar *buf = NULL; 3457 int len = 0; 3458 int size = XML_PARSER_BUFFER_SIZE; 3459 int c, l; 3460 xmlChar stop; 3461 xmlChar *ret = NULL; 3462 const xmlChar *cur = NULL; 3463 xmlParserInputPtr input; 3464 3465 if (RAW == '"') stop = '"'; 3466 else if (RAW == '\'') stop = '\''; 3467 else { 3468 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL); 3469 return(NULL); 3470 } 3471 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3472 if (buf == NULL) { 3473 xmlErrMemory(ctxt, NULL); 3474 return(NULL); 3475 } 3476 3477 /* 3478 * The content of the entity definition is copied in a buffer. 3479 */ 3480 3481 ctxt->instate = XML_PARSER_ENTITY_VALUE; 3482 input = ctxt->input; 3483 GROW; 3484 NEXT; 3485 c = CUR_CHAR(l); 3486 /* 3487 * NOTE: 4.4.5 Included in Literal 3488 * When a parameter entity reference appears in a literal entity 3489 * value, ... a single or double quote character in the replacement 3490 * text is always treated as a normal data character and will not 3491 * terminate the literal. 3492 * In practice it means we stop the loop only when back at parsing 3493 * the initial entity and the quote is found 3494 */ 3495 while ((IS_CHAR(c)) && ((c != stop) || /* checked */ 3496 (ctxt->input != input))) { 3497 if (len + 5 >= size) { 3498 xmlChar *tmp; 3499 3500 size *= 2; 3501 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3502 if (tmp == NULL) { 3503 xmlErrMemory(ctxt, NULL); 3504 xmlFree(buf); 3505 return(NULL); 3506 } 3507 buf = tmp; 3508 } 3509 COPY_BUF(l,buf,len,c); 3510 NEXTL(l); 3511 /* 3512 * Pop-up of finished entities. 3513 */ 3514 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */ 3515 xmlPopInput(ctxt); 3516 3517 GROW; 3518 c = CUR_CHAR(l); 3519 if (c == 0) { 3520 GROW; 3521 c = CUR_CHAR(l); 3522 } 3523 } 3524 buf[len] = 0; 3525 3526 /* 3527 * Raise problem w.r.t. '&' and '%' being used in non-entities 3528 * reference constructs. Note Charref will be handled in 3529 * xmlStringDecodeEntities() 3530 */ 3531 cur = buf; 3532 while (*cur != 0) { /* non input consuming */ 3533 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { 3534 xmlChar *name; 3535 xmlChar tmp = *cur; 3536 3537 cur++; 3538 name = xmlParseStringName(ctxt, &cur); 3539 if ((name == NULL) || (*cur != ';')) { 3540 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR, 3541 "EntityValue: '%c' forbidden except for entities references\n", 3542 tmp); 3543 } 3544 if ((tmp == '%') && (ctxt->inSubset == 1) && 3545 (ctxt->inputNr == 1)) { 3546 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL); 3547 } 3548 if (name != NULL) 3549 xmlFree(name); 3550 if (*cur == 0) 3551 break; 3552 } 3553 cur++; 3554 } 3555 3556 /* 3557 * Then PEReference entities are substituted. 3558 */ 3559 if (c != stop) { 3560 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL); 3561 xmlFree(buf); 3562 } else { 3563 NEXT; 3564 /* 3565 * NOTE: 4.4.7 Bypassed 3566 * When a general entity reference appears in the EntityValue in 3567 * an entity declaration, it is bypassed and left as is. 3568 * so XML_SUBSTITUTE_REF is not set here. 3569 */ 3570 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, 3571 0, 0, 0); 3572 if (orig != NULL) 3573 *orig = buf; 3574 else 3575 xmlFree(buf); 3576 } 3577 3578 return(ret); 3579} 3580 3581/** 3582 * xmlParseAttValueComplex: 3583 * @ctxt: an XML parser context 3584 * @len: the resulting attribute len 3585 * @normalize: wether to apply the inner normalization 3586 * 3587 * parse a value for an attribute, this is the fallback function 3588 * of xmlParseAttValue() when the attribute parsing requires handling 3589 * of non-ASCII characters, or normalization compaction. 3590 * 3591 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 3592 */ 3593static xmlChar * 3594xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { 3595 xmlChar limit = 0; 3596 xmlChar *buf = NULL; 3597 xmlChar *rep = NULL; 3598 int len = 0; 3599 int buf_size = 0; 3600 int c, l, in_space = 0; 3601 xmlChar *current = NULL; 3602 xmlEntityPtr ent; 3603 3604 if (NXT(0) == '"') { 3605 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3606 limit = '"'; 3607 NEXT; 3608 } else if (NXT(0) == '\'') { 3609 limit = '\''; 3610 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3611 NEXT; 3612 } else { 3613 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 3614 return(NULL); 3615 } 3616 3617 /* 3618 * allocate a translation buffer. 3619 */ 3620 buf_size = XML_PARSER_BUFFER_SIZE; 3621 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar)); 3622 if (buf == NULL) goto mem_error; 3623 3624 /* 3625 * OK loop until we reach one of the ending char or a size limit. 3626 */ 3627 c = CUR_CHAR(l); 3628 while ((NXT(0) != limit) && /* checked */ 3629 (IS_CHAR(c)) && (c != '<')) { 3630 if (c == 0) break; 3631 if (c == '&') { 3632 in_space = 0; 3633 if (NXT(1) == '#') { 3634 int val = xmlParseCharRef(ctxt); 3635 3636 if (val == '&') { 3637 if (ctxt->replaceEntities) { 3638 if (len > buf_size - 10) { 3639 growBuffer(buf, 10); 3640 } 3641 buf[len++] = '&'; 3642 } else { 3643 /* 3644 * The reparsing will be done in xmlStringGetNodeList() 3645 * called by the attribute() function in SAX.c 3646 */ 3647 if (len > buf_size - 10) { 3648 growBuffer(buf, 10); 3649 } 3650 buf[len++] = '&'; 3651 buf[len++] = '#'; 3652 buf[len++] = '3'; 3653 buf[len++] = '8'; 3654 buf[len++] = ';'; 3655 } 3656 } else if (val != 0) { 3657 if (len > buf_size - 10) { 3658 growBuffer(buf, 10); 3659 } 3660 len += xmlCopyChar(0, &buf[len], val); 3661 } 3662 } else { 3663 ent = xmlParseEntityRef(ctxt); 3664 ctxt->nbentities++; 3665 if (ent != NULL) 3666 ctxt->nbentities += ent->owner; 3667 if ((ent != NULL) && 3668 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 3669 if (len > buf_size - 10) { 3670 growBuffer(buf, 10); 3671 } 3672 if ((ctxt->replaceEntities == 0) && 3673 (ent->content[0] == '&')) { 3674 buf[len++] = '&'; 3675 buf[len++] = '#'; 3676 buf[len++] = '3'; 3677 buf[len++] = '8'; 3678 buf[len++] = ';'; 3679 } else { 3680 buf[len++] = ent->content[0]; 3681 } 3682 } else if ((ent != NULL) && 3683 (ctxt->replaceEntities != 0)) { 3684 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { 3685 rep = xmlStringDecodeEntities(ctxt, ent->content, 3686 XML_SUBSTITUTE_REF, 3687 0, 0, 0); 3688 if (rep != NULL) { 3689 current = rep; 3690 while (*current != 0) { /* non input consuming */ 3691 buf[len++] = *current++; 3692 if (len > buf_size - 10) { 3693 growBuffer(buf, 10); 3694 } 3695 } 3696 xmlFree(rep); 3697 rep = NULL; 3698 } 3699 } else { 3700 if (len > buf_size - 10) { 3701 growBuffer(buf, 10); 3702 } 3703 if (ent->content != NULL) 3704 buf[len++] = ent->content[0]; 3705 } 3706 } else if (ent != NULL) { 3707 int i = xmlStrlen(ent->name); 3708 const xmlChar *cur = ent->name; 3709 3710 /* 3711 * This may look absurd but is needed to detect 3712 * entities problems 3713 */ 3714 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 3715 (ent->content != NULL)) { 3716 rep = xmlStringDecodeEntities(ctxt, ent->content, 3717 XML_SUBSTITUTE_REF, 0, 0, 0); 3718 if (rep != NULL) { 3719 xmlFree(rep); 3720 rep = NULL; 3721 } 3722 } 3723 3724 /* 3725 * Just output the reference 3726 */ 3727 buf[len++] = '&'; 3728 while (len > buf_size - i - 10) { 3729 growBuffer(buf, i + 10); 3730 } 3731 for (;i > 0;i--) 3732 buf[len++] = *cur++; 3733 buf[len++] = ';'; 3734 } 3735 } 3736 } else { 3737 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { 3738 if ((len != 0) || (!normalize)) { 3739 if ((!normalize) || (!in_space)) { 3740 COPY_BUF(l,buf,len,0x20); 3741 while (len > buf_size - 10) { 3742 growBuffer(buf, 10); 3743 } 3744 } 3745 in_space = 1; 3746 } 3747 } else { 3748 in_space = 0; 3749 COPY_BUF(l,buf,len,c); 3750 if (len > buf_size - 10) { 3751 growBuffer(buf, 10); 3752 } 3753 } 3754 NEXTL(l); 3755 } 3756 GROW; 3757 c = CUR_CHAR(l); 3758 } 3759 if ((in_space) && (normalize)) { 3760 while (buf[len - 1] == 0x20) len--; 3761 } 3762 buf[len] = 0; 3763 if (RAW == '<') { 3764 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL); 3765 } else if (RAW != limit) { 3766 if ((c != 0) && (!IS_CHAR(c))) { 3767 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, 3768 "invalid character in attribute value\n"); 3769 } else { 3770 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 3771 "AttValue: ' expected\n"); 3772 } 3773 } else 3774 NEXT; 3775 if (attlen != NULL) *attlen = len; 3776 return(buf); 3777 3778mem_error: 3779 xmlErrMemory(ctxt, NULL); 3780 if (buf != NULL) 3781 xmlFree(buf); 3782 if (rep != NULL) 3783 xmlFree(rep); 3784 return(NULL); 3785} 3786 3787/** 3788 * xmlParseAttValue: 3789 * @ctxt: an XML parser context 3790 * 3791 * parse a value for an attribute 3792 * Note: the parser won't do substitution of entities here, this 3793 * will be handled later in xmlStringGetNodeList 3794 * 3795 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | 3796 * "'" ([^<&'] | Reference)* "'" 3797 * 3798 * 3.3.3 Attribute-Value Normalization: 3799 * Before the value of an attribute is passed to the application or 3800 * checked for validity, the XML processor must normalize it as follows: 3801 * - a character reference is processed by appending the referenced 3802 * character to the attribute value 3803 * - an entity reference is processed by recursively processing the 3804 * replacement text of the entity 3805 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 3806 * appending #x20 to the normalized value, except that only a single 3807 * #x20 is appended for a "#xD#xA" sequence that is part of an external 3808 * parsed entity or the literal entity value of an internal parsed entity 3809 * - other characters are processed by appending them to the normalized value 3810 * If the declared value is not CDATA, then the XML processor must further 3811 * process the normalized attribute value by discarding any leading and 3812 * trailing space (#x20) characters, and by replacing sequences of space 3813 * (#x20) characters by a single space (#x20) character. 3814 * All attributes for which no declaration has been read should be treated 3815 * by a non-validating parser as if declared CDATA. 3816 * 3817 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 3818 */ 3819 3820 3821xmlChar * 3822xmlParseAttValue(xmlParserCtxtPtr ctxt) { 3823 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL); 3824 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0)); 3825} 3826 3827/** 3828 * xmlParseSystemLiteral: 3829 * @ctxt: an XML parser context 3830 * 3831 * parse an XML Literal 3832 * 3833 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 3834 * 3835 * Returns the SystemLiteral parsed or NULL 3836 */ 3837 3838xmlChar * 3839xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { 3840 xmlChar *buf = NULL; 3841 int len = 0; 3842 int size = XML_PARSER_BUFFER_SIZE; 3843 int cur, l; 3844 xmlChar stop; 3845 int state = ctxt->instate; 3846 int count = 0; 3847 3848 SHRINK; 3849 if (RAW == '"') { 3850 NEXT; 3851 stop = '"'; 3852 } else if (RAW == '\'') { 3853 NEXT; 3854 stop = '\''; 3855 } else { 3856 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 3857 return(NULL); 3858 } 3859 3860 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3861 if (buf == NULL) { 3862 xmlErrMemory(ctxt, NULL); 3863 return(NULL); 3864 } 3865 ctxt->instate = XML_PARSER_SYSTEM_LITERAL; 3866 cur = CUR_CHAR(l); 3867 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ 3868 if (len + 5 >= size) { 3869 xmlChar *tmp; 3870 3871 size *= 2; 3872 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3873 if (tmp == NULL) { 3874 xmlFree(buf); 3875 xmlErrMemory(ctxt, NULL); 3876 ctxt->instate = (xmlParserInputState) state; 3877 return(NULL); 3878 } 3879 buf = tmp; 3880 } 3881 count++; 3882 if (count > 50) { 3883 GROW; 3884 count = 0; 3885 } 3886 COPY_BUF(l,buf,len,cur); 3887 NEXTL(l); 3888 cur = CUR_CHAR(l); 3889 if (cur == 0) { 3890 GROW; 3891 SHRINK; 3892 cur = CUR_CHAR(l); 3893 } 3894 } 3895 buf[len] = 0; 3896 ctxt->instate = (xmlParserInputState) state; 3897 if (!IS_CHAR(cur)) { 3898 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 3899 } else { 3900 NEXT; 3901 } 3902 return(buf); 3903} 3904 3905/** 3906 * xmlParsePubidLiteral: 3907 * @ctxt: an XML parser context 3908 * 3909 * parse an XML public literal 3910 * 3911 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 3912 * 3913 * Returns the PubidLiteral parsed or NULL. 3914 */ 3915 3916xmlChar * 3917xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { 3918 xmlChar *buf = NULL; 3919 int len = 0; 3920 int size = XML_PARSER_BUFFER_SIZE; 3921 xmlChar cur; 3922 xmlChar stop; 3923 int count = 0; 3924 xmlParserInputState oldstate = ctxt->instate; 3925 3926 SHRINK; 3927 if (RAW == '"') { 3928 NEXT; 3929 stop = '"'; 3930 } else if (RAW == '\'') { 3931 NEXT; 3932 stop = '\''; 3933 } else { 3934 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 3935 return(NULL); 3936 } 3937 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3938 if (buf == NULL) { 3939 xmlErrMemory(ctxt, NULL); 3940 return(NULL); 3941 } 3942 ctxt->instate = XML_PARSER_PUBLIC_LITERAL; 3943 cur = CUR; 3944 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */ 3945 if (len + 1 >= size) { 3946 xmlChar *tmp; 3947 3948 size *= 2; 3949 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3950 if (tmp == NULL) { 3951 xmlErrMemory(ctxt, NULL); 3952 xmlFree(buf); 3953 return(NULL); 3954 } 3955 buf = tmp; 3956 } 3957 buf[len++] = cur; 3958 count++; 3959 if (count > 50) { 3960 GROW; 3961 count = 0; 3962 } 3963 NEXT; 3964 cur = CUR; 3965 if (cur == 0) { 3966 GROW; 3967 SHRINK; 3968 cur = CUR; 3969 } 3970 } 3971 buf[len] = 0; 3972 if (cur != stop) { 3973 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 3974 } else { 3975 NEXT; 3976 } 3977 ctxt->instate = oldstate; 3978 return(buf); 3979} 3980 3981void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); 3982 3983/* 3984 * used for the test in the inner loop of the char data testing 3985 */ 3986static const unsigned char test_char_data[256] = { 3987 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3988 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */ 3989 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3990 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3991 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */ 3992 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 3993 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 3994 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */ 3995 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 3996 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 3997 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 3998 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */ 3999 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 4000 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 4001 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 4002 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 4003 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */ 4004 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4005 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4006 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4007 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4008 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4009 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4010 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4011 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4012 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4013 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4014 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4015 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4016 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4017 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4018 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 4019}; 4020 4021/** 4022 * xmlParseCharData: 4023 * @ctxt: an XML parser context 4024 * @cdata: int indicating whether we are within a CDATA section 4025 * 4026 * parse a CharData section. 4027 * if we are within a CDATA section ']]>' marks an end of section. 4028 * 4029 * The right angle bracket (>) may be represented using the string ">", 4030 * and must, for compatibility, be escaped using ">" or a character 4031 * reference when it appears in the string "]]>" in content, when that 4032 * string is not marking the end of a CDATA section. 4033 * 4034 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 4035 */ 4036 4037void 4038xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { 4039 const xmlChar *in; 4040 int nbchar = 0; 4041 int line = ctxt->input->line; 4042 int col = ctxt->input->col; 4043 int ccol; 4044 4045 SHRINK; 4046 GROW; 4047 /* 4048 * Accelerated common case where input don't need to be 4049 * modified before passing it to the handler. 4050 */ 4051 if (!cdata) { 4052 in = ctxt->input->cur; 4053 do { 4054get_more_space: 4055 while (*in == 0x20) { in++; ctxt->input->col++; } 4056 if (*in == 0xA) { 4057 do { 4058 ctxt->input->line++; ctxt->input->col = 1; 4059 in++; 4060 } while (*in == 0xA); 4061 goto get_more_space; 4062 } 4063 if (*in == '<') { 4064 nbchar = in - ctxt->input->cur; 4065 if (nbchar > 0) { 4066 const xmlChar *tmp = ctxt->input->cur; 4067 ctxt->input->cur = in; 4068 4069 if ((ctxt->sax != NULL) && 4070 (ctxt->sax->ignorableWhitespace != 4071 ctxt->sax->characters)) { 4072 if (areBlanks(ctxt, tmp, nbchar, 1)) { 4073 if (ctxt->sax->ignorableWhitespace != NULL) 4074 ctxt->sax->ignorableWhitespace(ctxt->userData, 4075 tmp, nbchar); 4076 } else { 4077 if (ctxt->sax->characters != NULL) 4078 ctxt->sax->characters(ctxt->userData, 4079 tmp, nbchar); 4080 if (*ctxt->space == -1) 4081 *ctxt->space = -2; 4082 } 4083 } else if ((ctxt->sax != NULL) && 4084 (ctxt->sax->characters != NULL)) { 4085 ctxt->sax->characters(ctxt->userData, 4086 tmp, nbchar); 4087 } 4088 } 4089 return; 4090 } 4091 4092get_more: 4093 ccol = ctxt->input->col; 4094 while (test_char_data[*in]) { 4095 in++; 4096 ccol++; 4097 } 4098 ctxt->input->col = ccol; 4099 if (*in == 0xA) { 4100 do { 4101 ctxt->input->line++; ctxt->input->col = 1; 4102 in++; 4103 } while (*in == 0xA); 4104 goto get_more; 4105 } 4106 if (*in == ']') { 4107 if ((in[1] == ']') && (in[2] == '>')) { 4108 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4109 ctxt->input->cur = in; 4110 return; 4111 } 4112 in++; 4113 ctxt->input->col++; 4114 goto get_more; 4115 } 4116 nbchar = in - ctxt->input->cur; 4117 if (nbchar > 0) { 4118 if ((ctxt->sax != NULL) && 4119 (ctxt->sax->ignorableWhitespace != 4120 ctxt->sax->characters) && 4121 (IS_BLANK_CH(*ctxt->input->cur))) { 4122 const xmlChar *tmp = ctxt->input->cur; 4123 ctxt->input->cur = in; 4124 4125 if (areBlanks(ctxt, tmp, nbchar, 0)) { 4126 if (ctxt->sax->ignorableWhitespace != NULL) 4127 ctxt->sax->ignorableWhitespace(ctxt->userData, 4128 tmp, nbchar); 4129 } else { 4130 if (ctxt->sax->characters != NULL) 4131 ctxt->sax->characters(ctxt->userData, 4132 tmp, nbchar); 4133 if (*ctxt->space == -1) 4134 *ctxt->space = -2; 4135 } 4136 line = ctxt->input->line; 4137 col = ctxt->input->col; 4138 } else if (ctxt->sax != NULL) { 4139 if (ctxt->sax->characters != NULL) 4140 ctxt->sax->characters(ctxt->userData, 4141 ctxt->input->cur, nbchar); 4142 line = ctxt->input->line; 4143 col = ctxt->input->col; 4144 } 4145 } 4146 ctxt->input->cur = in; 4147 if (*in == 0xD) { 4148 in++; 4149 if (*in == 0xA) { 4150 ctxt->input->cur = in; 4151 in++; 4152 ctxt->input->line++; ctxt->input->col = 1; 4153 continue; /* while */ 4154 } 4155 in--; 4156 } 4157 if (*in == '<') { 4158 return; 4159 } 4160 if (*in == '&') { 4161 return; 4162 } 4163 SHRINK; 4164 GROW; 4165 in = ctxt->input->cur; 4166 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 4167 nbchar = 0; 4168 } 4169 ctxt->input->line = line; 4170 ctxt->input->col = col; 4171 xmlParseCharDataComplex(ctxt, cdata); 4172} 4173 4174/** 4175 * xmlParseCharDataComplex: 4176 * @ctxt: an XML parser context 4177 * @cdata: int indicating whether we are within a CDATA section 4178 * 4179 * parse a CharData section.this is the fallback function 4180 * of xmlParseCharData() when the parsing requires handling 4181 * of non-ASCII characters. 4182 */ 4183void 4184xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { 4185 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; 4186 int nbchar = 0; 4187 int cur, l; 4188 int count = 0; 4189 4190 SHRINK; 4191 GROW; 4192 cur = CUR_CHAR(l); 4193 while ((cur != '<') && /* checked */ 4194 (cur != '&') && 4195 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { 4196 if ((cur == ']') && (NXT(1) == ']') && 4197 (NXT(2) == '>')) { 4198 if (cdata) break; 4199 else { 4200 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4201 } 4202 } 4203 COPY_BUF(l,buf,nbchar,cur); 4204 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { 4205 buf[nbchar] = 0; 4206 4207 /* 4208 * OK the segment is to be consumed as chars. 4209 */ 4210 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4211 if (areBlanks(ctxt, buf, nbchar, 0)) { 4212 if (ctxt->sax->ignorableWhitespace != NULL) 4213 ctxt->sax->ignorableWhitespace(ctxt->userData, 4214 buf, nbchar); 4215 } else { 4216 if (ctxt->sax->characters != NULL) 4217 ctxt->sax->characters(ctxt->userData, buf, nbchar); 4218 if ((ctxt->sax->characters != 4219 ctxt->sax->ignorableWhitespace) && 4220 (*ctxt->space == -1)) 4221 *ctxt->space = -2; 4222 } 4223 } 4224 nbchar = 0; 4225 } 4226 count++; 4227 if (count > 50) { 4228 GROW; 4229 count = 0; 4230 } 4231 NEXTL(l); 4232 cur = CUR_CHAR(l); 4233 } 4234 if (nbchar != 0) { 4235 buf[nbchar] = 0; 4236 /* 4237 * OK the segment is to be consumed as chars. 4238 */ 4239 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4240 if (areBlanks(ctxt, buf, nbchar, 0)) { 4241 if (ctxt->sax->ignorableWhitespace != NULL) 4242 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); 4243 } else { 4244 if (ctxt->sax->characters != NULL) 4245 ctxt->sax->characters(ctxt->userData, buf, nbchar); 4246 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) && 4247 (*ctxt->space == -1)) 4248 *ctxt->space = -2; 4249 } 4250 } 4251 } 4252 if ((cur != 0) && (!IS_CHAR(cur))) { 4253 /* Generate the error and skip the offending character */ 4254 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4255 "PCDATA invalid Char value %d\n", 4256 cur); 4257 NEXTL(l); 4258 } 4259} 4260 4261/** 4262 * xmlParseExternalID: 4263 * @ctxt: an XML parser context 4264 * @publicID: a xmlChar** receiving PubidLiteral 4265 * @strict: indicate whether we should restrict parsing to only 4266 * production [75], see NOTE below 4267 * 4268 * Parse an External ID or a Public ID 4269 * 4270 * NOTE: Productions [75] and [83] interact badly since [75] can generate 4271 * 'PUBLIC' S PubidLiteral S SystemLiteral 4272 * 4273 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral 4274 * | 'PUBLIC' S PubidLiteral S SystemLiteral 4275 * 4276 * [83] PublicID ::= 'PUBLIC' S PubidLiteral 4277 * 4278 * Returns the function returns SystemLiteral and in the second 4279 * case publicID receives PubidLiteral, is strict is off 4280 * it is possible to return NULL and have publicID set. 4281 */ 4282 4283xmlChar * 4284xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { 4285 xmlChar *URI = NULL; 4286 4287 SHRINK; 4288 4289 *publicID = NULL; 4290 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) { 4291 SKIP(6); 4292 if (!IS_BLANK_CH(CUR)) { 4293 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4294 "Space required after 'SYSTEM'\n"); 4295 } 4296 SKIP_BLANKS; 4297 URI = xmlParseSystemLiteral(ctxt); 4298 if (URI == NULL) { 4299 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4300 } 4301 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) { 4302 SKIP(6); 4303 if (!IS_BLANK_CH(CUR)) { 4304 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4305 "Space required after 'PUBLIC'\n"); 4306 } 4307 SKIP_BLANKS; 4308 *publicID = xmlParsePubidLiteral(ctxt); 4309 if (*publicID == NULL) { 4310 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL); 4311 } 4312 if (strict) { 4313 /* 4314 * We don't handle [83] so "S SystemLiteral" is required. 4315 */ 4316 if (!IS_BLANK_CH(CUR)) { 4317 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4318 "Space required after the Public Identifier\n"); 4319 } 4320 } else { 4321 /* 4322 * We handle [83] so we return immediately, if 4323 * "S SystemLiteral" is not detected. From a purely parsing 4324 * point of view that's a nice mess. 4325 */ 4326 const xmlChar *ptr; 4327 GROW; 4328 4329 ptr = CUR_PTR; 4330 if (!IS_BLANK_CH(*ptr)) return(NULL); 4331 4332 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */ 4333 if ((*ptr != '\'') && (*ptr != '"')) return(NULL); 4334 } 4335 SKIP_BLANKS; 4336 URI = xmlParseSystemLiteral(ctxt); 4337 if (URI == NULL) { 4338 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4339 } 4340 } 4341 return(URI); 4342} 4343 4344/** 4345 * xmlParseCommentComplex: 4346 * @ctxt: an XML parser context 4347 * @buf: the already parsed part of the buffer 4348 * @len: number of bytes filles in the buffer 4349 * @size: allocated size of the buffer 4350 * 4351 * Skip an XML (SGML) comment <!-- .... --> 4352 * The spec says that "For compatibility, the string "--" (double-hyphen) 4353 * must not occur within comments. " 4354 * This is the slow routine in case the accelerator for ascii didn't work 4355 * 4356 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 4357 */ 4358static void 4359xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) { 4360 int q, ql; 4361 int r, rl; 4362 int cur, l; 4363 int count = 0; 4364 int inputid; 4365 4366 inputid = ctxt->input->id; 4367 4368 if (buf == NULL) { 4369 len = 0; 4370 size = XML_PARSER_BUFFER_SIZE; 4371 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4372 if (buf == NULL) { 4373 xmlErrMemory(ctxt, NULL); 4374 return; 4375 } 4376 } 4377 GROW; /* Assure there's enough input data */ 4378 q = CUR_CHAR(ql); 4379 if (q == 0) 4380 goto not_terminated; 4381 if (!IS_CHAR(q)) { 4382 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4383 "xmlParseComment: invalid xmlChar value %d\n", 4384 q); 4385 xmlFree (buf); 4386 return; 4387 } 4388 NEXTL(ql); 4389 r = CUR_CHAR(rl); 4390 if (r == 0) 4391 goto not_terminated; 4392 if (!IS_CHAR(r)) { 4393 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4394 "xmlParseComment: invalid xmlChar value %d\n", 4395 q); 4396 xmlFree (buf); 4397 return; 4398 } 4399 NEXTL(rl); 4400 cur = CUR_CHAR(l); 4401 if (cur == 0) 4402 goto not_terminated; 4403 while (IS_CHAR(cur) && /* checked */ 4404 ((cur != '>') || 4405 (r != '-') || (q != '-'))) { 4406 if ((r == '-') && (q == '-')) { 4407 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL); 4408 } 4409 if (len + 5 >= size) { 4410 xmlChar *new_buf; 4411 size *= 2; 4412 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4413 if (new_buf == NULL) { 4414 xmlFree (buf); 4415 xmlErrMemory(ctxt, NULL); 4416 return; 4417 } 4418 buf = new_buf; 4419 } 4420 COPY_BUF(ql,buf,len,q); 4421 q = r; 4422 ql = rl; 4423 r = cur; 4424 rl = l; 4425 4426 count++; 4427 if (count > 50) { 4428 GROW; 4429 count = 0; 4430 } 4431 NEXTL(l); 4432 cur = CUR_CHAR(l); 4433 if (cur == 0) { 4434 SHRINK; 4435 GROW; 4436 cur = CUR_CHAR(l); 4437 } 4438 } 4439 buf[len] = 0; 4440 if (cur == 0) { 4441 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4442 "Comment not terminated \n<!--%.50s\n", buf); 4443 } else if (!IS_CHAR(cur)) { 4444 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4445 "xmlParseComment: invalid xmlChar value %d\n", 4446 cur); 4447 } else { 4448 if (inputid != ctxt->input->id) { 4449 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4450 "Comment doesn't start and stop in the same entity\n"); 4451 } 4452 NEXT; 4453 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 4454 (!ctxt->disableSAX)) 4455 ctxt->sax->comment(ctxt->userData, buf); 4456 } 4457 xmlFree(buf); 4458 return; 4459not_terminated: 4460 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4461 "Comment not terminated\n", NULL); 4462 xmlFree(buf); 4463 return; 4464} 4465 4466/** 4467 * xmlParseComment: 4468 * @ctxt: an XML parser context 4469 * 4470 * Skip an XML (SGML) comment <!-- .... --> 4471 * The spec says that "For compatibility, the string "--" (double-hyphen) 4472 * must not occur within comments. " 4473 * 4474 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 4475 */ 4476void 4477xmlParseComment(xmlParserCtxtPtr ctxt) { 4478 xmlChar *buf = NULL; 4479 int size = XML_PARSER_BUFFER_SIZE; 4480 int len = 0; 4481 xmlParserInputState state; 4482 const xmlChar *in; 4483 int nbchar = 0, ccol; 4484 int inputid; 4485 4486 /* 4487 * Check that there is a comment right here. 4488 */ 4489 if ((RAW != '<') || (NXT(1) != '!') || 4490 (NXT(2) != '-') || (NXT(3) != '-')) return; 4491 state = ctxt->instate; 4492 ctxt->instate = XML_PARSER_COMMENT; 4493 inputid = ctxt->input->id; 4494 SKIP(4); 4495 SHRINK; 4496 GROW; 4497 4498 /* 4499 * Accelerated common case where input don't need to be 4500 * modified before passing it to the handler. 4501 */ 4502 in = ctxt->input->cur; 4503 do { 4504 if (*in == 0xA) { 4505 do { 4506 ctxt->input->line++; ctxt->input->col = 1; 4507 in++; 4508 } while (*in == 0xA); 4509 } 4510get_more: 4511 ccol = ctxt->input->col; 4512 while (((*in > '-') && (*in <= 0x7F)) || 4513 ((*in >= 0x20) && (*in < '-')) || 4514 (*in == 0x09)) { 4515 in++; 4516 ccol++; 4517 } 4518 ctxt->input->col = ccol; 4519 if (*in == 0xA) { 4520 do { 4521 ctxt->input->line++; ctxt->input->col = 1; 4522 in++; 4523 } while (*in == 0xA); 4524 goto get_more; 4525 } 4526 nbchar = in - ctxt->input->cur; 4527 /* 4528 * save current set of data 4529 */ 4530 if (nbchar > 0) { 4531 if ((ctxt->sax != NULL) && 4532 (ctxt->sax->comment != NULL)) { 4533 if (buf == NULL) { 4534 if ((*in == '-') && (in[1] == '-')) 4535 size = nbchar + 1; 4536 else 4537 size = XML_PARSER_BUFFER_SIZE + nbchar; 4538 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4539 if (buf == NULL) { 4540 xmlErrMemory(ctxt, NULL); 4541 ctxt->instate = state; 4542 return; 4543 } 4544 len = 0; 4545 } else if (len + nbchar + 1 >= size) { 4546 xmlChar *new_buf; 4547 size += len + nbchar + XML_PARSER_BUFFER_SIZE; 4548 new_buf = (xmlChar *) xmlRealloc(buf, 4549 size * sizeof(xmlChar)); 4550 if (new_buf == NULL) { 4551 xmlFree (buf); 4552 xmlErrMemory(ctxt, NULL); 4553 ctxt->instate = state; 4554 return; 4555 } 4556 buf = new_buf; 4557 } 4558 memcpy(&buf[len], ctxt->input->cur, nbchar); 4559 len += nbchar; 4560 buf[len] = 0; 4561 } 4562 } 4563 ctxt->input->cur = in; 4564 if (*in == 0xA) { 4565 in++; 4566 ctxt->input->line++; ctxt->input->col = 1; 4567 } 4568 if (*in == 0xD) { 4569 in++; 4570 if (*in == 0xA) { 4571 ctxt->input->cur = in; 4572 in++; 4573 ctxt->input->line++; ctxt->input->col = 1; 4574 continue; /* while */ 4575 } 4576 in--; 4577 } 4578 SHRINK; 4579 GROW; 4580 in = ctxt->input->cur; 4581 if (*in == '-') { 4582 if (in[1] == '-') { 4583 if (in[2] == '>') { 4584 if (ctxt->input->id != inputid) { 4585 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4586 "comment doesn't start and stop in the same entity\n"); 4587 } 4588 SKIP(3); 4589 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 4590 (!ctxt->disableSAX)) { 4591 if (buf != NULL) 4592 ctxt->sax->comment(ctxt->userData, buf); 4593 else 4594 ctxt->sax->comment(ctxt->userData, BAD_CAST ""); 4595 } 4596 if (buf != NULL) 4597 xmlFree(buf); 4598 ctxt->instate = state; 4599 return; 4600 } 4601 if (buf != NULL) 4602 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4603 "Comment not terminated \n<!--%.50s\n", 4604 buf); 4605 else 4606 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4607 "Comment not terminated \n", NULL); 4608 in++; 4609 ctxt->input->col++; 4610 } 4611 in++; 4612 ctxt->input->col++; 4613 goto get_more; 4614 } 4615 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 4616 xmlParseCommentComplex(ctxt, buf, len, size); 4617 ctxt->instate = state; 4618 return; 4619} 4620 4621 4622/** 4623 * xmlParsePITarget: 4624 * @ctxt: an XML parser context 4625 * 4626 * parse the name of a PI 4627 * 4628 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 4629 * 4630 * Returns the PITarget name or NULL 4631 */ 4632 4633const xmlChar * 4634xmlParsePITarget(xmlParserCtxtPtr ctxt) { 4635 const xmlChar *name; 4636 4637 name = xmlParseName(ctxt); 4638 if ((name != NULL) && 4639 ((name[0] == 'x') || (name[0] == 'X')) && 4640 ((name[1] == 'm') || (name[1] == 'M')) && 4641 ((name[2] == 'l') || (name[2] == 'L'))) { 4642 int i; 4643 if ((name[0] == 'x') && (name[1] == 'm') && 4644 (name[2] == 'l') && (name[3] == 0)) { 4645 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 4646 "XML declaration allowed only at the start of the document\n"); 4647 return(name); 4648 } else if (name[3] == 0) { 4649 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL); 4650 return(name); 4651 } 4652 for (i = 0;;i++) { 4653 if (xmlW3CPIs[i] == NULL) break; 4654 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i])) 4655 return(name); 4656 } 4657 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 4658 "xmlParsePITarget: invalid name prefix 'xml'\n", 4659 NULL, NULL); 4660 } 4661 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) { 4662 xmlNsErr(ctxt, XML_NS_ERR_COLON, 4663 "colon are forbidden from PI names '%s'\n", name, NULL, NULL); 4664 } 4665 return(name); 4666} 4667 4668#ifdef LIBXML_CATALOG_ENABLED 4669/** 4670 * xmlParseCatalogPI: 4671 * @ctxt: an XML parser context 4672 * @catalog: the PI value string 4673 * 4674 * parse an XML Catalog Processing Instruction. 4675 * 4676 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?> 4677 * 4678 * Occurs only if allowed by the user and if happening in the Misc 4679 * part of the document before any doctype informations 4680 * This will add the given catalog to the parsing context in order 4681 * to be used if there is a resolution need further down in the document 4682 */ 4683 4684static void 4685xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) { 4686 xmlChar *URL = NULL; 4687 const xmlChar *tmp, *base; 4688 xmlChar marker; 4689 4690 tmp = catalog; 4691 while (IS_BLANK_CH(*tmp)) tmp++; 4692 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7)) 4693 goto error; 4694 tmp += 7; 4695 while (IS_BLANK_CH(*tmp)) tmp++; 4696 if (*tmp != '=') { 4697 return; 4698 } 4699 tmp++; 4700 while (IS_BLANK_CH(*tmp)) tmp++; 4701 marker = *tmp; 4702 if ((marker != '\'') && (marker != '"')) 4703 goto error; 4704 tmp++; 4705 base = tmp; 4706 while ((*tmp != 0) && (*tmp != marker)) tmp++; 4707 if (*tmp == 0) 4708 goto error; 4709 URL = xmlStrndup(base, tmp - base); 4710 tmp++; 4711 while (IS_BLANK_CH(*tmp)) tmp++; 4712 if (*tmp != 0) 4713 goto error; 4714 4715 if (URL != NULL) { 4716 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL); 4717 xmlFree(URL); 4718 } 4719 return; 4720 4721error: 4722 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI, 4723 "Catalog PI syntax error: %s\n", 4724 catalog, NULL); 4725 if (URL != NULL) 4726 xmlFree(URL); 4727} 4728#endif 4729 4730/** 4731 * xmlParsePI: 4732 * @ctxt: an XML parser context 4733 * 4734 * parse an XML Processing Instruction. 4735 * 4736 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 4737 * 4738 * The processing is transfered to SAX once parsed. 4739 */ 4740 4741void 4742xmlParsePI(xmlParserCtxtPtr ctxt) { 4743 xmlChar *buf = NULL; 4744 int len = 0; 4745 int size = XML_PARSER_BUFFER_SIZE; 4746 int cur, l; 4747 const xmlChar *target; 4748 xmlParserInputState state; 4749 int count = 0; 4750 4751 if ((RAW == '<') && (NXT(1) == '?')) { 4752 xmlParserInputPtr input = ctxt->input; 4753 state = ctxt->instate; 4754 ctxt->instate = XML_PARSER_PI; 4755 /* 4756 * this is a Processing Instruction. 4757 */ 4758 SKIP(2); 4759 SHRINK; 4760 4761 /* 4762 * Parse the target name and check for special support like 4763 * namespace. 4764 */ 4765 target = xmlParsePITarget(ctxt); 4766 if (target != NULL) { 4767 if ((RAW == '?') && (NXT(1) == '>')) { 4768 if (input != ctxt->input) { 4769 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4770 "PI declaration doesn't start and stop in the same entity\n"); 4771 } 4772 SKIP(2); 4773 4774 /* 4775 * SAX: PI detected. 4776 */ 4777 if ((ctxt->sax) && (!ctxt->disableSAX) && 4778 (ctxt->sax->processingInstruction != NULL)) 4779 ctxt->sax->processingInstruction(ctxt->userData, 4780 target, NULL); 4781 ctxt->instate = state; 4782 return; 4783 } 4784 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4785 if (buf == NULL) { 4786 xmlErrMemory(ctxt, NULL); 4787 ctxt->instate = state; 4788 return; 4789 } 4790 cur = CUR; 4791 if (!IS_BLANK(cur)) { 4792 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED, 4793 "ParsePI: PI %s space expected\n", target); 4794 } 4795 SKIP_BLANKS; 4796 cur = CUR_CHAR(l); 4797 while (IS_CHAR(cur) && /* checked */ 4798 ((cur != '?') || (NXT(1) != '>'))) { 4799 if (len + 5 >= size) { 4800 xmlChar *tmp; 4801 4802 size *= 2; 4803 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4804 if (tmp == NULL) { 4805 xmlErrMemory(ctxt, NULL); 4806 xmlFree(buf); 4807 ctxt->instate = state; 4808 return; 4809 } 4810 buf = tmp; 4811 } 4812 count++; 4813 if (count > 50) { 4814 GROW; 4815 count = 0; 4816 } 4817 COPY_BUF(l,buf,len,cur); 4818 NEXTL(l); 4819 cur = CUR_CHAR(l); 4820 if (cur == 0) { 4821 SHRINK; 4822 GROW; 4823 cur = CUR_CHAR(l); 4824 } 4825 } 4826 buf[len] = 0; 4827 if (cur != '?') { 4828 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 4829 "ParsePI: PI %s never end ...\n", target); 4830 } else { 4831 if (input != ctxt->input) { 4832 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4833 "PI declaration doesn't start and stop in the same entity\n"); 4834 } 4835 SKIP(2); 4836 4837#ifdef LIBXML_CATALOG_ENABLED 4838 if (((state == XML_PARSER_MISC) || 4839 (state == XML_PARSER_START)) && 4840 (xmlStrEqual(target, XML_CATALOG_PI))) { 4841 xmlCatalogAllow allow = xmlCatalogGetDefaults(); 4842 if ((allow == XML_CATA_ALLOW_DOCUMENT) || 4843 (allow == XML_CATA_ALLOW_ALL)) 4844 xmlParseCatalogPI(ctxt, buf); 4845 } 4846#endif 4847 4848 4849 /* 4850 * SAX: PI detected. 4851 */ 4852 if ((ctxt->sax) && (!ctxt->disableSAX) && 4853 (ctxt->sax->processingInstruction != NULL)) 4854 ctxt->sax->processingInstruction(ctxt->userData, 4855 target, buf); 4856 } 4857 xmlFree(buf); 4858 } else { 4859 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL); 4860 } 4861 ctxt->instate = state; 4862 } 4863} 4864 4865/** 4866 * xmlParseNotationDecl: 4867 * @ctxt: an XML parser context 4868 * 4869 * parse a notation declaration 4870 * 4871 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' 4872 * 4873 * Hence there is actually 3 choices: 4874 * 'PUBLIC' S PubidLiteral 4875 * 'PUBLIC' S PubidLiteral S SystemLiteral 4876 * and 'SYSTEM' S SystemLiteral 4877 * 4878 * See the NOTE on xmlParseExternalID(). 4879 */ 4880 4881void 4882xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { 4883 const xmlChar *name; 4884 xmlChar *Pubid; 4885 xmlChar *Systemid; 4886 4887 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 4888 xmlParserInputPtr input = ctxt->input; 4889 SHRINK; 4890 SKIP(10); 4891 if (!IS_BLANK_CH(CUR)) { 4892 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4893 "Space required after '<!NOTATION'\n"); 4894 return; 4895 } 4896 SKIP_BLANKS; 4897 4898 name = xmlParseName(ctxt); 4899 if (name == NULL) { 4900 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 4901 return; 4902 } 4903 if (!IS_BLANK_CH(CUR)) { 4904 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4905 "Space required after the NOTATION name'\n"); 4906 return; 4907 } 4908 if (xmlStrchr(name, ':') != NULL) { 4909 xmlNsErr(ctxt, XML_NS_ERR_COLON, 4910 "colon are forbidden from notation names '%s'\n", 4911 name, NULL, NULL); 4912 } 4913 SKIP_BLANKS; 4914 4915 /* 4916 * Parse the IDs. 4917 */ 4918 Systemid = xmlParseExternalID(ctxt, &Pubid, 0); 4919 SKIP_BLANKS; 4920 4921 if (RAW == '>') { 4922 if (input != ctxt->input) { 4923 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4924 "Notation declaration doesn't start and stop in the same entity\n"); 4925 } 4926 NEXT; 4927 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 4928 (ctxt->sax->notationDecl != NULL)) 4929 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid); 4930 } else { 4931 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 4932 } 4933 if (Systemid != NULL) xmlFree(Systemid); 4934 if (Pubid != NULL) xmlFree(Pubid); 4935 } 4936} 4937 4938/** 4939 * xmlParseEntityDecl: 4940 * @ctxt: an XML parser context 4941 * 4942 * parse <!ENTITY declarations 4943 * 4944 * [70] EntityDecl ::= GEDecl | PEDecl 4945 * 4946 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 4947 * 4948 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 4949 * 4950 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 4951 * 4952 * [74] PEDef ::= EntityValue | ExternalID 4953 * 4954 * [76] NDataDecl ::= S 'NDATA' S Name 4955 * 4956 * [ VC: Notation Declared ] 4957 * The Name must match the declared name of a notation. 4958 */ 4959 4960void 4961xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { 4962 const xmlChar *name = NULL; 4963 xmlChar *value = NULL; 4964 xmlChar *URI = NULL, *literal = NULL; 4965 const xmlChar *ndata = NULL; 4966 int isParameter = 0; 4967 xmlChar *orig = NULL; 4968 int skipped; 4969 4970 /* GROW; done in the caller */ 4971 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) { 4972 xmlParserInputPtr input = ctxt->input; 4973 SHRINK; 4974 SKIP(8); 4975 skipped = SKIP_BLANKS; 4976 if (skipped == 0) { 4977 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4978 "Space required after '<!ENTITY'\n"); 4979 } 4980 4981 if (RAW == '%') { 4982 NEXT; 4983 skipped = SKIP_BLANKS; 4984 if (skipped == 0) { 4985 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4986 "Space required after '%'\n"); 4987 } 4988 isParameter = 1; 4989 } 4990 4991 name = xmlParseName(ctxt); 4992 if (name == NULL) { 4993 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 4994 "xmlParseEntityDecl: no name\n"); 4995 return; 4996 } 4997 if (xmlStrchr(name, ':') != NULL) { 4998 xmlNsErr(ctxt, XML_NS_ERR_COLON, 4999 "colon are forbidden from entities names '%s'\n", 5000 name, NULL, NULL); 5001 } 5002 skipped = SKIP_BLANKS; 5003 if (skipped == 0) { 5004 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5005 "Space required after the entity name\n"); 5006 } 5007 5008 ctxt->instate = XML_PARSER_ENTITY_DECL; 5009 /* 5010 * handle the various case of definitions... 5011 */ 5012 if (isParameter) { 5013 if ((RAW == '"') || (RAW == '\'')) { 5014 value = xmlParseEntityValue(ctxt, &orig); 5015 if (value) { 5016 if ((ctxt->sax != NULL) && 5017 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5018 ctxt->sax->entityDecl(ctxt->userData, name, 5019 XML_INTERNAL_PARAMETER_ENTITY, 5020 NULL, NULL, value); 5021 } 5022 } else { 5023 URI = xmlParseExternalID(ctxt, &literal, 1); 5024 if ((URI == NULL) && (literal == NULL)) { 5025 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5026 } 5027 if (URI) { 5028 xmlURIPtr uri; 5029 5030 uri = xmlParseURI((const char *) URI); 5031 if (uri == NULL) { 5032 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5033 "Invalid URI: %s\n", URI); 5034 /* 5035 * This really ought to be a well formedness error 5036 * but the XML Core WG decided otherwise c.f. issue 5037 * E26 of the XML erratas. 5038 */ 5039 } else { 5040 if (uri->fragment != NULL) { 5041 /* 5042 * Okay this is foolish to block those but not 5043 * invalid URIs. 5044 */ 5045 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5046 } else { 5047 if ((ctxt->sax != NULL) && 5048 (!ctxt->disableSAX) && 5049 (ctxt->sax->entityDecl != NULL)) 5050 ctxt->sax->entityDecl(ctxt->userData, name, 5051 XML_EXTERNAL_PARAMETER_ENTITY, 5052 literal, URI, NULL); 5053 } 5054 xmlFreeURI(uri); 5055 } 5056 } 5057 } 5058 } else { 5059 if ((RAW == '"') || (RAW == '\'')) { 5060 value = xmlParseEntityValue(ctxt, &orig); 5061 if ((ctxt->sax != NULL) && 5062 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5063 ctxt->sax->entityDecl(ctxt->userData, name, 5064 XML_INTERNAL_GENERAL_ENTITY, 5065 NULL, NULL, value); 5066 /* 5067 * For expat compatibility in SAX mode. 5068 */ 5069 if ((ctxt->myDoc == NULL) || 5070 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 5071 if (ctxt->myDoc == NULL) { 5072 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5073 if (ctxt->myDoc == NULL) { 5074 xmlErrMemory(ctxt, "New Doc failed"); 5075 return; 5076 } 5077 ctxt->myDoc->properties = XML_DOC_INTERNAL; 5078 } 5079 if (ctxt->myDoc->intSubset == NULL) 5080 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5081 BAD_CAST "fake", NULL, NULL); 5082 5083 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY, 5084 NULL, NULL, value); 5085 } 5086 } else { 5087 URI = xmlParseExternalID(ctxt, &literal, 1); 5088 if ((URI == NULL) && (literal == NULL)) { 5089 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5090 } 5091 if (URI) { 5092 xmlURIPtr uri; 5093 5094 uri = xmlParseURI((const char *)URI); 5095 if (uri == NULL) { 5096 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5097 "Invalid URI: %s\n", URI); 5098 /* 5099 * This really ought to be a well formedness error 5100 * but the XML Core WG decided otherwise c.f. issue 5101 * E26 of the XML erratas. 5102 */ 5103 } else { 5104 if (uri->fragment != NULL) { 5105 /* 5106 * Okay this is foolish to block those but not 5107 * invalid URIs. 5108 */ 5109 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5110 } 5111 xmlFreeURI(uri); 5112 } 5113 } 5114 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) { 5115 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5116 "Space required before 'NDATA'\n"); 5117 } 5118 SKIP_BLANKS; 5119 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) { 5120 SKIP(5); 5121 if (!IS_BLANK_CH(CUR)) { 5122 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5123 "Space required after 'NDATA'\n"); 5124 } 5125 SKIP_BLANKS; 5126 ndata = xmlParseName(ctxt); 5127 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5128 (ctxt->sax->unparsedEntityDecl != NULL)) 5129 ctxt->sax->unparsedEntityDecl(ctxt->userData, name, 5130 literal, URI, ndata); 5131 } else { 5132 if ((ctxt->sax != NULL) && 5133 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5134 ctxt->sax->entityDecl(ctxt->userData, name, 5135 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5136 literal, URI, NULL); 5137 /* 5138 * For expat compatibility in SAX mode. 5139 * assuming the entity repalcement was asked for 5140 */ 5141 if ((ctxt->replaceEntities != 0) && 5142 ((ctxt->myDoc == NULL) || 5143 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) { 5144 if (ctxt->myDoc == NULL) { 5145 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5146 if (ctxt->myDoc == NULL) { 5147 xmlErrMemory(ctxt, "New Doc failed"); 5148 return; 5149 } 5150 ctxt->myDoc->properties = XML_DOC_INTERNAL; 5151 } 5152 5153 if (ctxt->myDoc->intSubset == NULL) 5154 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5155 BAD_CAST "fake", NULL, NULL); 5156 xmlSAX2EntityDecl(ctxt, name, 5157 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5158 literal, URI, NULL); 5159 } 5160 } 5161 } 5162 } 5163 SKIP_BLANKS; 5164 if (RAW != '>') { 5165 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, 5166 "xmlParseEntityDecl: entity %s not terminated\n", name); 5167 } else { 5168 if (input != ctxt->input) { 5169 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5170 "Entity declaration doesn't start and stop in the same entity\n"); 5171 } 5172 NEXT; 5173 } 5174 if (orig != NULL) { 5175 /* 5176 * Ugly mechanism to save the raw entity value. 5177 */ 5178 xmlEntityPtr cur = NULL; 5179 5180 if (isParameter) { 5181 if ((ctxt->sax != NULL) && 5182 (ctxt->sax->getParameterEntity != NULL)) 5183 cur = ctxt->sax->getParameterEntity(ctxt->userData, name); 5184 } else { 5185 if ((ctxt->sax != NULL) && 5186 (ctxt->sax->getEntity != NULL)) 5187 cur = ctxt->sax->getEntity(ctxt->userData, name); 5188 if ((cur == NULL) && (ctxt->userData==ctxt)) { 5189 cur = xmlSAX2GetEntity(ctxt, name); 5190 } 5191 } 5192 if (cur != NULL) { 5193 if (cur->orig != NULL) 5194 xmlFree(orig); 5195 else 5196 cur->orig = orig; 5197 } else 5198 xmlFree(orig); 5199 } 5200 if (value != NULL) xmlFree(value); 5201 if (URI != NULL) xmlFree(URI); 5202 if (literal != NULL) xmlFree(literal); 5203 } 5204} 5205 5206/** 5207 * xmlParseDefaultDecl: 5208 * @ctxt: an XML parser context 5209 * @value: Receive a possible fixed default value for the attribute 5210 * 5211 * Parse an attribute default declaration 5212 * 5213 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) 5214 * 5215 * [ VC: Required Attribute ] 5216 * if the default declaration is the keyword #REQUIRED, then the 5217 * attribute must be specified for all elements of the type in the 5218 * attribute-list declaration. 5219 * 5220 * [ VC: Attribute Default Legal ] 5221 * The declared default value must meet the lexical constraints of 5222 * the declared attribute type c.f. xmlValidateAttributeDecl() 5223 * 5224 * [ VC: Fixed Attribute Default ] 5225 * if an attribute has a default value declared with the #FIXED 5226 * keyword, instances of that attribute must match the default value. 5227 * 5228 * [ WFC: No < in Attribute Values ] 5229 * handled in xmlParseAttValue() 5230 * 5231 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED 5232 * or XML_ATTRIBUTE_FIXED. 5233 */ 5234 5235int 5236xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { 5237 int val; 5238 xmlChar *ret; 5239 5240 *value = NULL; 5241 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) { 5242 SKIP(9); 5243 return(XML_ATTRIBUTE_REQUIRED); 5244 } 5245 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) { 5246 SKIP(8); 5247 return(XML_ATTRIBUTE_IMPLIED); 5248 } 5249 val = XML_ATTRIBUTE_NONE; 5250 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) { 5251 SKIP(6); 5252 val = XML_ATTRIBUTE_FIXED; 5253 if (!IS_BLANK_CH(CUR)) { 5254 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5255 "Space required after '#FIXED'\n"); 5256 } 5257 SKIP_BLANKS; 5258 } 5259 ret = xmlParseAttValue(ctxt); 5260 ctxt->instate = XML_PARSER_DTD; 5261 if (ret == NULL) { 5262 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo, 5263 "Attribute default value declaration error\n"); 5264 } else 5265 *value = ret; 5266 return(val); 5267} 5268 5269/** 5270 * xmlParseNotationType: 5271 * @ctxt: an XML parser context 5272 * 5273 * parse an Notation attribute type. 5274 * 5275 * Note: the leading 'NOTATION' S part has already being parsed... 5276 * 5277 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 5278 * 5279 * [ VC: Notation Attributes ] 5280 * Values of this type must match one of the notation names included 5281 * in the declaration; all notation names in the declaration must be declared. 5282 * 5283 * Returns: the notation attribute tree built while parsing 5284 */ 5285 5286xmlEnumerationPtr 5287xmlParseNotationType(xmlParserCtxtPtr ctxt) { 5288 const xmlChar *name; 5289 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5290 5291 if (RAW != '(') { 5292 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 5293 return(NULL); 5294 } 5295 SHRINK; 5296 do { 5297 NEXT; 5298 SKIP_BLANKS; 5299 name = xmlParseName(ctxt); 5300 if (name == NULL) { 5301 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5302 "Name expected in NOTATION declaration\n"); 5303 return(ret); 5304 } 5305 tmp = ret; 5306 while (tmp != NULL) { 5307 if (xmlStrEqual(name, tmp->name)) { 5308 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5309 "standalone: attribute notation value token %s duplicated\n", 5310 name, NULL); 5311 if (!xmlDictOwns(ctxt->dict, name)) 5312 xmlFree((xmlChar *) name); 5313 break; 5314 } 5315 tmp = tmp->next; 5316 } 5317 if (tmp == NULL) { 5318 cur = xmlCreateEnumeration(name); 5319 if (cur == NULL) return(ret); 5320 if (last == NULL) ret = last = cur; 5321 else { 5322 last->next = cur; 5323 last = cur; 5324 } 5325 } 5326 SKIP_BLANKS; 5327 } while (RAW == '|'); 5328 if (RAW != ')') { 5329 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 5330 if ((last != NULL) && (last != ret)) 5331 xmlFreeEnumeration(last); 5332 return(ret); 5333 } 5334 NEXT; 5335 return(ret); 5336} 5337 5338/** 5339 * xmlParseEnumerationType: 5340 * @ctxt: an XML parser context 5341 * 5342 * parse an Enumeration attribute type. 5343 * 5344 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' 5345 * 5346 * [ VC: Enumeration ] 5347 * Values of this type must match one of the Nmtoken tokens in 5348 * the declaration 5349 * 5350 * Returns: the enumeration attribute tree built while parsing 5351 */ 5352 5353xmlEnumerationPtr 5354xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { 5355 xmlChar *name; 5356 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5357 5358 if (RAW != '(') { 5359 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL); 5360 return(NULL); 5361 } 5362 SHRINK; 5363 do { 5364 NEXT; 5365 SKIP_BLANKS; 5366 name = xmlParseNmtoken(ctxt); 5367 if (name == NULL) { 5368 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL); 5369 return(ret); 5370 } 5371 tmp = ret; 5372 while (tmp != NULL) { 5373 if (xmlStrEqual(name, tmp->name)) { 5374 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5375 "standalone: attribute enumeration value token %s duplicated\n", 5376 name, NULL); 5377 if (!xmlDictOwns(ctxt->dict, name)) 5378 xmlFree(name); 5379 break; 5380 } 5381 tmp = tmp->next; 5382 } 5383 if (tmp == NULL) { 5384 cur = xmlCreateEnumeration(name); 5385 if (!xmlDictOwns(ctxt->dict, name)) 5386 xmlFree(name); 5387 if (cur == NULL) return(ret); 5388 if (last == NULL) ret = last = cur; 5389 else { 5390 last->next = cur; 5391 last = cur; 5392 } 5393 } 5394 SKIP_BLANKS; 5395 } while (RAW == '|'); 5396 if (RAW != ')') { 5397 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL); 5398 return(ret); 5399 } 5400 NEXT; 5401 return(ret); 5402} 5403 5404/** 5405 * xmlParseEnumeratedType: 5406 * @ctxt: an XML parser context 5407 * @tree: the enumeration tree built while parsing 5408 * 5409 * parse an Enumerated attribute type. 5410 * 5411 * [57] EnumeratedType ::= NotationType | Enumeration 5412 * 5413 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 5414 * 5415 * 5416 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION 5417 */ 5418 5419int 5420xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 5421 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 5422 SKIP(8); 5423 if (!IS_BLANK_CH(CUR)) { 5424 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5425 "Space required after 'NOTATION'\n"); 5426 return(0); 5427 } 5428 SKIP_BLANKS; 5429 *tree = xmlParseNotationType(ctxt); 5430 if (*tree == NULL) return(0); 5431 return(XML_ATTRIBUTE_NOTATION); 5432 } 5433 *tree = xmlParseEnumerationType(ctxt); 5434 if (*tree == NULL) return(0); 5435 return(XML_ATTRIBUTE_ENUMERATION); 5436} 5437 5438/** 5439 * xmlParseAttributeType: 5440 * @ctxt: an XML parser context 5441 * @tree: the enumeration tree built while parsing 5442 * 5443 * parse the Attribute list def for an element 5444 * 5445 * [54] AttType ::= StringType | TokenizedType | EnumeratedType 5446 * 5447 * [55] StringType ::= 'CDATA' 5448 * 5449 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 5450 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' 5451 * 5452 * Validity constraints for attribute values syntax are checked in 5453 * xmlValidateAttributeValue() 5454 * 5455 * [ VC: ID ] 5456 * Values of type ID must match the Name production. A name must not 5457 * appear more than once in an XML document as a value of this type; 5458 * i.e., ID values must uniquely identify the elements which bear them. 5459 * 5460 * [ VC: One ID per Element Type ] 5461 * No element type may have more than one ID attribute specified. 5462 * 5463 * [ VC: ID Attribute Default ] 5464 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED. 5465 * 5466 * [ VC: IDREF ] 5467 * Values of type IDREF must match the Name production, and values 5468 * of type IDREFS must match Names; each IDREF Name must match the value 5469 * of an ID attribute on some element in the XML document; i.e. IDREF 5470 * values must match the value of some ID attribute. 5471 * 5472 * [ VC: Entity Name ] 5473 * Values of type ENTITY must match the Name production, values 5474 * of type ENTITIES must match Names; each Entity Name must match the 5475 * name of an unparsed entity declared in the DTD. 5476 * 5477 * [ VC: Name Token ] 5478 * Values of type NMTOKEN must match the Nmtoken production; values 5479 * of type NMTOKENS must match Nmtokens. 5480 * 5481 * Returns the attribute type 5482 */ 5483int 5484xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 5485 SHRINK; 5486 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) { 5487 SKIP(5); 5488 return(XML_ATTRIBUTE_CDATA); 5489 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) { 5490 SKIP(6); 5491 return(XML_ATTRIBUTE_IDREFS); 5492 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) { 5493 SKIP(5); 5494 return(XML_ATTRIBUTE_IDREF); 5495 } else if ((RAW == 'I') && (NXT(1) == 'D')) { 5496 SKIP(2); 5497 return(XML_ATTRIBUTE_ID); 5498 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) { 5499 SKIP(6); 5500 return(XML_ATTRIBUTE_ENTITY); 5501 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) { 5502 SKIP(8); 5503 return(XML_ATTRIBUTE_ENTITIES); 5504 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) { 5505 SKIP(8); 5506 return(XML_ATTRIBUTE_NMTOKENS); 5507 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) { 5508 SKIP(7); 5509 return(XML_ATTRIBUTE_NMTOKEN); 5510 } 5511 return(xmlParseEnumeratedType(ctxt, tree)); 5512} 5513 5514/** 5515 * xmlParseAttributeListDecl: 5516 * @ctxt: an XML parser context 5517 * 5518 * : parse the Attribute list def for an element 5519 * 5520 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 5521 * 5522 * [53] AttDef ::= S Name S AttType S DefaultDecl 5523 * 5524 */ 5525void 5526xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { 5527 const xmlChar *elemName; 5528 const xmlChar *attrName; 5529 xmlEnumerationPtr tree; 5530 5531 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) { 5532 xmlParserInputPtr input = ctxt->input; 5533 5534 SKIP(9); 5535 if (!IS_BLANK_CH(CUR)) { 5536 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5537 "Space required after '<!ATTLIST'\n"); 5538 } 5539 SKIP_BLANKS; 5540 elemName = xmlParseName(ctxt); 5541 if (elemName == NULL) { 5542 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5543 "ATTLIST: no name for Element\n"); 5544 return; 5545 } 5546 SKIP_BLANKS; 5547 GROW; 5548 while (RAW != '>') { 5549 const xmlChar *check = CUR_PTR; 5550 int type; 5551 int def; 5552 xmlChar *defaultValue = NULL; 5553 5554 GROW; 5555 tree = NULL; 5556 attrName = xmlParseName(ctxt); 5557 if (attrName == NULL) { 5558 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5559 "ATTLIST: no name for Attribute\n"); 5560 break; 5561 } 5562 GROW; 5563 if (!IS_BLANK_CH(CUR)) { 5564 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5565 "Space required after the attribute name\n"); 5566 break; 5567 } 5568 SKIP_BLANKS; 5569 5570 type = xmlParseAttributeType(ctxt, &tree); 5571 if (type <= 0) { 5572 break; 5573 } 5574 5575 GROW; 5576 if (!IS_BLANK_CH(CUR)) { 5577 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5578 "Space required after the attribute type\n"); 5579 if (tree != NULL) 5580 xmlFreeEnumeration(tree); 5581 break; 5582 } 5583 SKIP_BLANKS; 5584 5585 def = xmlParseDefaultDecl(ctxt, &defaultValue); 5586 if (def <= 0) { 5587 if (defaultValue != NULL) 5588 xmlFree(defaultValue); 5589 if (tree != NULL) 5590 xmlFreeEnumeration(tree); 5591 break; 5592 } 5593 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL)) 5594 xmlAttrNormalizeSpace(defaultValue, defaultValue); 5595 5596 GROW; 5597 if (RAW != '>') { 5598 if (!IS_BLANK_CH(CUR)) { 5599 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5600 "Space required after the attribute default value\n"); 5601 if (defaultValue != NULL) 5602 xmlFree(defaultValue); 5603 if (tree != NULL) 5604 xmlFreeEnumeration(tree); 5605 break; 5606 } 5607 SKIP_BLANKS; 5608 } 5609 if (check == CUR_PTR) { 5610 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 5611 "in xmlParseAttributeListDecl\n"); 5612 if (defaultValue != NULL) 5613 xmlFree(defaultValue); 5614 if (tree != NULL) 5615 xmlFreeEnumeration(tree); 5616 break; 5617 } 5618 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5619 (ctxt->sax->attributeDecl != NULL)) 5620 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName, 5621 type, def, defaultValue, tree); 5622 else if (tree != NULL) 5623 xmlFreeEnumeration(tree); 5624 5625 if ((ctxt->sax2) && (defaultValue != NULL) && 5626 (def != XML_ATTRIBUTE_IMPLIED) && 5627 (def != XML_ATTRIBUTE_REQUIRED)) { 5628 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue); 5629 } 5630 if (ctxt->sax2) { 5631 xmlAddSpecialAttr(ctxt, elemName, attrName, type); 5632 } 5633 if (defaultValue != NULL) 5634 xmlFree(defaultValue); 5635 GROW; 5636 } 5637 if (RAW == '>') { 5638 if (input != ctxt->input) { 5639 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 5640 "Attribute list declaration doesn't start and stop in the same entity\n", 5641 NULL, NULL); 5642 } 5643 NEXT; 5644 } 5645 } 5646} 5647 5648/** 5649 * xmlParseElementMixedContentDecl: 5650 * @ctxt: an XML parser context 5651 * @inputchk: the input used for the current entity, needed for boundary checks 5652 * 5653 * parse the declaration for a Mixed Element content 5654 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 5655 * 5656 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | 5657 * '(' S? '#PCDATA' S? ')' 5658 * 5659 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49]) 5660 * 5661 * [ VC: No Duplicate Types ] 5662 * The same name must not appear more than once in a single 5663 * mixed-content declaration. 5664 * 5665 * returns: the list of the xmlElementContentPtr describing the element choices 5666 */ 5667xmlElementContentPtr 5668xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 5669 xmlElementContentPtr ret = NULL, cur = NULL, n; 5670 const xmlChar *elem = NULL; 5671 5672 GROW; 5673 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 5674 SKIP(7); 5675 SKIP_BLANKS; 5676 SHRINK; 5677 if (RAW == ')') { 5678 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 5679 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 5680"Element content declaration doesn't start and stop in the same entity\n", 5681 NULL, NULL); 5682 } 5683 NEXT; 5684 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 5685 if (ret == NULL) 5686 return(NULL); 5687 if (RAW == '*') { 5688 ret->ocur = XML_ELEMENT_CONTENT_MULT; 5689 NEXT; 5690 } 5691 return(ret); 5692 } 5693 if ((RAW == '(') || (RAW == '|')) { 5694 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 5695 if (ret == NULL) return(NULL); 5696 } 5697 while (RAW == '|') { 5698 NEXT; 5699 if (elem == NULL) { 5700 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 5701 if (ret == NULL) return(NULL); 5702 ret->c1 = cur; 5703 if (cur != NULL) 5704 cur->parent = ret; 5705 cur = ret; 5706 } else { 5707 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 5708 if (n == NULL) return(NULL); 5709 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 5710 if (n->c1 != NULL) 5711 n->c1->parent = n; 5712 cur->c2 = n; 5713 if (n != NULL) 5714 n->parent = cur; 5715 cur = n; 5716 } 5717 SKIP_BLANKS; 5718 elem = xmlParseName(ctxt); 5719 if (elem == NULL) { 5720 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5721 "xmlParseElementMixedContentDecl : Name expected\n"); 5722 xmlFreeDocElementContent(ctxt->myDoc, cur); 5723 return(NULL); 5724 } 5725 SKIP_BLANKS; 5726 GROW; 5727 } 5728 if ((RAW == ')') && (NXT(1) == '*')) { 5729 if (elem != NULL) { 5730 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem, 5731 XML_ELEMENT_CONTENT_ELEMENT); 5732 if (cur->c2 != NULL) 5733 cur->c2->parent = cur; 5734 } 5735 ret->ocur = XML_ELEMENT_CONTENT_MULT; 5736 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 5737 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 5738"Element content declaration doesn't start and stop in the same entity\n", 5739 NULL, NULL); 5740 } 5741 SKIP(2); 5742 } else { 5743 xmlFreeDocElementContent(ctxt->myDoc, ret); 5744 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL); 5745 return(NULL); 5746 } 5747 5748 } else { 5749 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL); 5750 } 5751 return(ret); 5752} 5753 5754/** 5755 * xmlParseElementChildrenContentDecl: 5756 * @ctxt: an XML parser context 5757 * @inputchk: the input used for the current entity, needed for boundary checks 5758 * 5759 * parse the declaration for a Mixed Element content 5760 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 5761 * 5762 * 5763 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 5764 * 5765 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 5766 * 5767 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 5768 * 5769 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 5770 * 5771 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 5772 * TODO Parameter-entity replacement text must be properly nested 5773 * with parenthesized groups. That is to say, if either of the 5774 * opening or closing parentheses in a choice, seq, or Mixed 5775 * construct is contained in the replacement text for a parameter 5776 * entity, both must be contained in the same replacement text. For 5777 * interoperability, if a parameter-entity reference appears in a 5778 * choice, seq, or Mixed construct, its replacement text should not 5779 * be empty, and neither the first nor last non-blank character of 5780 * the replacement text should be a connector (| or ,). 5781 * 5782 * Returns the tree of xmlElementContentPtr describing the element 5783 * hierarchy. 5784 */ 5785xmlElementContentPtr 5786xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) { 5787 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL; 5788 const xmlChar *elem; 5789 xmlChar type = 0; 5790 5791 SKIP_BLANKS; 5792 GROW; 5793 if (RAW == '(') { 5794 int inputid = ctxt->input->id; 5795 5796 /* Recurse on first child */ 5797 NEXT; 5798 SKIP_BLANKS; 5799 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid); 5800 SKIP_BLANKS; 5801 GROW; 5802 } else { 5803 elem = xmlParseName(ctxt); 5804 if (elem == NULL) { 5805 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 5806 return(NULL); 5807 } 5808 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 5809 if (cur == NULL) { 5810 xmlErrMemory(ctxt, NULL); 5811 return(NULL); 5812 } 5813 GROW; 5814 if (RAW == '?') { 5815 cur->ocur = XML_ELEMENT_CONTENT_OPT; 5816 NEXT; 5817 } else if (RAW == '*') { 5818 cur->ocur = XML_ELEMENT_CONTENT_MULT; 5819 NEXT; 5820 } else if (RAW == '+') { 5821 cur->ocur = XML_ELEMENT_CONTENT_PLUS; 5822 NEXT; 5823 } else { 5824 cur->ocur = XML_ELEMENT_CONTENT_ONCE; 5825 } 5826 GROW; 5827 } 5828 SKIP_BLANKS; 5829 SHRINK; 5830 while (RAW != ')') { 5831 /* 5832 * Each loop we parse one separator and one element. 5833 */ 5834 if (RAW == ',') { 5835 if (type == 0) type = CUR; 5836 5837 /* 5838 * Detect "Name | Name , Name" error 5839 */ 5840 else if (type != CUR) { 5841 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 5842 "xmlParseElementChildrenContentDecl : '%c' expected\n", 5843 type); 5844 if ((last != NULL) && (last != ret)) 5845 xmlFreeDocElementContent(ctxt->myDoc, last); 5846 if (ret != NULL) 5847 xmlFreeDocElementContent(ctxt->myDoc, ret); 5848 return(NULL); 5849 } 5850 NEXT; 5851 5852 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ); 5853 if (op == NULL) { 5854 if ((last != NULL) && (last != ret)) 5855 xmlFreeDocElementContent(ctxt->myDoc, last); 5856 xmlFreeDocElementContent(ctxt->myDoc, ret); 5857 return(NULL); 5858 } 5859 if (last == NULL) { 5860 op->c1 = ret; 5861 if (ret != NULL) 5862 ret->parent = op; 5863 ret = cur = op; 5864 } else { 5865 cur->c2 = op; 5866 if (op != NULL) 5867 op->parent = cur; 5868 op->c1 = last; 5869 if (last != NULL) 5870 last->parent = op; 5871 cur =op; 5872 last = NULL; 5873 } 5874 } else if (RAW == '|') { 5875 if (type == 0) type = CUR; 5876 5877 /* 5878 * Detect "Name , Name | Name" error 5879 */ 5880 else if (type != CUR) { 5881 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 5882 "xmlParseElementChildrenContentDecl : '%c' expected\n", 5883 type); 5884 if ((last != NULL) && (last != ret)) 5885 xmlFreeDocElementContent(ctxt->myDoc, last); 5886 if (ret != NULL) 5887 xmlFreeDocElementContent(ctxt->myDoc, ret); 5888 return(NULL); 5889 } 5890 NEXT; 5891 5892 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 5893 if (op == NULL) { 5894 if ((last != NULL) && (last != ret)) 5895 xmlFreeDocElementContent(ctxt->myDoc, last); 5896 if (ret != NULL) 5897 xmlFreeDocElementContent(ctxt->myDoc, ret); 5898 return(NULL); 5899 } 5900 if (last == NULL) { 5901 op->c1 = ret; 5902 if (ret != NULL) 5903 ret->parent = op; 5904 ret = cur = op; 5905 } else { 5906 cur->c2 = op; 5907 if (op != NULL) 5908 op->parent = cur; 5909 op->c1 = last; 5910 if (last != NULL) 5911 last->parent = op; 5912 cur =op; 5913 last = NULL; 5914 } 5915 } else { 5916 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL); 5917 if ((last != NULL) && (last != ret)) 5918 xmlFreeDocElementContent(ctxt->myDoc, last); 5919 if (ret != NULL) 5920 xmlFreeDocElementContent(ctxt->myDoc, ret); 5921 return(NULL); 5922 } 5923 GROW; 5924 SKIP_BLANKS; 5925 GROW; 5926 if (RAW == '(') { 5927 int inputid = ctxt->input->id; 5928 /* Recurse on second child */ 5929 NEXT; 5930 SKIP_BLANKS; 5931 last = xmlParseElementChildrenContentDecl(ctxt, inputid); 5932 SKIP_BLANKS; 5933 } else { 5934 elem = xmlParseName(ctxt); 5935 if (elem == NULL) { 5936 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 5937 if (ret != NULL) 5938 xmlFreeDocElementContent(ctxt->myDoc, ret); 5939 return(NULL); 5940 } 5941 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 5942 if (last == NULL) { 5943 if (ret != NULL) 5944 xmlFreeDocElementContent(ctxt->myDoc, ret); 5945 return(NULL); 5946 } 5947 if (RAW == '?') { 5948 last->ocur = XML_ELEMENT_CONTENT_OPT; 5949 NEXT; 5950 } else if (RAW == '*') { 5951 last->ocur = XML_ELEMENT_CONTENT_MULT; 5952 NEXT; 5953 } else if (RAW == '+') { 5954 last->ocur = XML_ELEMENT_CONTENT_PLUS; 5955 NEXT; 5956 } else { 5957 last->ocur = XML_ELEMENT_CONTENT_ONCE; 5958 } 5959 } 5960 SKIP_BLANKS; 5961 GROW; 5962 } 5963 if ((cur != NULL) && (last != NULL)) { 5964 cur->c2 = last; 5965 if (last != NULL) 5966 last->parent = cur; 5967 } 5968 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 5969 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 5970"Element content declaration doesn't start and stop in the same entity\n", 5971 NULL, NULL); 5972 } 5973 NEXT; 5974 if (RAW == '?') { 5975 if (ret != NULL) { 5976 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) || 5977 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 5978 ret->ocur = XML_ELEMENT_CONTENT_MULT; 5979 else 5980 ret->ocur = XML_ELEMENT_CONTENT_OPT; 5981 } 5982 NEXT; 5983 } else if (RAW == '*') { 5984 if (ret != NULL) { 5985 ret->ocur = XML_ELEMENT_CONTENT_MULT; 5986 cur = ret; 5987 /* 5988 * Some normalization: 5989 * (a | b* | c?)* == (a | b | c)* 5990 */ 5991 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 5992 if ((cur->c1 != NULL) && 5993 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 5994 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) 5995 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 5996 if ((cur->c2 != NULL) && 5997 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 5998 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) 5999 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6000 cur = cur->c2; 6001 } 6002 } 6003 NEXT; 6004 } else if (RAW == '+') { 6005 if (ret != NULL) { 6006 int found = 0; 6007 6008 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) || 6009 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6010 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6011 else 6012 ret->ocur = XML_ELEMENT_CONTENT_PLUS; 6013 /* 6014 * Some normalization: 6015 * (a | b*)+ == (a | b)* 6016 * (a | b?)+ == (a | b)* 6017 */ 6018 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6019 if ((cur->c1 != NULL) && 6020 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6021 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) { 6022 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6023 found = 1; 6024 } 6025 if ((cur->c2 != NULL) && 6026 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6027 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) { 6028 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6029 found = 1; 6030 } 6031 cur = cur->c2; 6032 } 6033 if (found) 6034 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6035 } 6036 NEXT; 6037 } 6038 return(ret); 6039} 6040 6041/** 6042 * xmlParseElementContentDecl: 6043 * @ctxt: an XML parser context 6044 * @name: the name of the element being defined. 6045 * @result: the Element Content pointer will be stored here if any 6046 * 6047 * parse the declaration for an Element content either Mixed or Children, 6048 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl 6049 * 6050 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 6051 * 6052 * returns: the type of element content XML_ELEMENT_TYPE_xxx 6053 */ 6054 6055int 6056xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name, 6057 xmlElementContentPtr *result) { 6058 6059 xmlElementContentPtr tree = NULL; 6060 int inputid = ctxt->input->id; 6061 int res; 6062 6063 *result = NULL; 6064 6065 if (RAW != '(') { 6066 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6067 "xmlParseElementContentDecl : %s '(' expected\n", name); 6068 return(-1); 6069 } 6070 NEXT; 6071 GROW; 6072 SKIP_BLANKS; 6073 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 6074 tree = xmlParseElementMixedContentDecl(ctxt, inputid); 6075 res = XML_ELEMENT_TYPE_MIXED; 6076 } else { 6077 tree = xmlParseElementChildrenContentDecl(ctxt, inputid); 6078 res = XML_ELEMENT_TYPE_ELEMENT; 6079 } 6080 SKIP_BLANKS; 6081 *result = tree; 6082 return(res); 6083} 6084 6085/** 6086 * xmlParseElementDecl: 6087 * @ctxt: an XML parser context 6088 * 6089 * parse an Element declaration. 6090 * 6091 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' 6092 * 6093 * [ VC: Unique Element Type Declaration ] 6094 * No element type may be declared more than once 6095 * 6096 * Returns the type of the element, or -1 in case of error 6097 */ 6098int 6099xmlParseElementDecl(xmlParserCtxtPtr ctxt) { 6100 const xmlChar *name; 6101 int ret = -1; 6102 xmlElementContentPtr content = NULL; 6103 6104 /* GROW; done in the caller */ 6105 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) { 6106 xmlParserInputPtr input = ctxt->input; 6107 6108 SKIP(9); 6109 if (!IS_BLANK_CH(CUR)) { 6110 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6111 "Space required after 'ELEMENT'\n"); 6112 } 6113 SKIP_BLANKS; 6114 name = xmlParseName(ctxt); 6115 if (name == NULL) { 6116 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6117 "xmlParseElementDecl: no name for Element\n"); 6118 return(-1); 6119 } 6120 while ((RAW == 0) && (ctxt->inputNr > 1)) 6121 xmlPopInput(ctxt); 6122 if (!IS_BLANK_CH(CUR)) { 6123 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6124 "Space required after the element name\n"); 6125 } 6126 SKIP_BLANKS; 6127 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) { 6128 SKIP(5); 6129 /* 6130 * Element must always be empty. 6131 */ 6132 ret = XML_ELEMENT_TYPE_EMPTY; 6133 } else if ((RAW == 'A') && (NXT(1) == 'N') && 6134 (NXT(2) == 'Y')) { 6135 SKIP(3); 6136 /* 6137 * Element is a generic container. 6138 */ 6139 ret = XML_ELEMENT_TYPE_ANY; 6140 } else if (RAW == '(') { 6141 ret = xmlParseElementContentDecl(ctxt, name, &content); 6142 } else { 6143 /* 6144 * [ WFC: PEs in Internal Subset ] error handling. 6145 */ 6146 if ((RAW == '%') && (ctxt->external == 0) && 6147 (ctxt->inputNr == 1)) { 6148 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET, 6149 "PEReference: forbidden within markup decl in internal subset\n"); 6150 } else { 6151 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6152 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n"); 6153 } 6154 return(-1); 6155 } 6156 6157 SKIP_BLANKS; 6158 /* 6159 * Pop-up of finished entities. 6160 */ 6161 while ((RAW == 0) && (ctxt->inputNr > 1)) 6162 xmlPopInput(ctxt); 6163 SKIP_BLANKS; 6164 6165 if (RAW != '>') { 6166 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 6167 if (content != NULL) { 6168 xmlFreeDocElementContent(ctxt->myDoc, content); 6169 } 6170 } else { 6171 if (input != ctxt->input) { 6172 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6173 "Element declaration doesn't start and stop in the same entity\n"); 6174 } 6175 6176 NEXT; 6177 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 6178 (ctxt->sax->elementDecl != NULL)) { 6179 if (content != NULL) 6180 content->parent = NULL; 6181 ctxt->sax->elementDecl(ctxt->userData, name, ret, 6182 content); 6183 if ((content != NULL) && (content->parent == NULL)) { 6184 /* 6185 * this is a trick: if xmlAddElementDecl is called, 6186 * instead of copying the full tree it is plugged directly 6187 * if called from the parser. Avoid duplicating the 6188 * interfaces or change the API/ABI 6189 */ 6190 xmlFreeDocElementContent(ctxt->myDoc, content); 6191 } 6192 } else if (content != NULL) { 6193 xmlFreeDocElementContent(ctxt->myDoc, content); 6194 } 6195 } 6196 } 6197 return(ret); 6198} 6199 6200/** 6201 * xmlParseConditionalSections 6202 * @ctxt: an XML parser context 6203 * 6204 * [61] conditionalSect ::= includeSect | ignoreSect 6205 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' 6206 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' 6207 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* 6208 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) 6209 */ 6210 6211static void 6212xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { 6213 int id = ctxt->input->id; 6214 6215 SKIP(3); 6216 SKIP_BLANKS; 6217 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) { 6218 SKIP(7); 6219 SKIP_BLANKS; 6220 if (RAW != '[') { 6221 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6222 } else { 6223 if (ctxt->input->id != id) { 6224 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6225 "All markup of the conditional section is not in the same entity\n", 6226 NULL, NULL); 6227 } 6228 NEXT; 6229 } 6230 if (xmlParserDebugEntities) { 6231 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6232 xmlGenericError(xmlGenericErrorContext, 6233 "%s(%d): ", ctxt->input->filename, 6234 ctxt->input->line); 6235 xmlGenericError(xmlGenericErrorContext, 6236 "Entering INCLUDE Conditional Section\n"); 6237 } 6238 6239 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || 6240 (NXT(2) != '>'))) { 6241 const xmlChar *check = CUR_PTR; 6242 unsigned int cons = ctxt->input->consumed; 6243 6244 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6245 xmlParseConditionalSections(ctxt); 6246 } else if (IS_BLANK_CH(CUR)) { 6247 NEXT; 6248 } else if (RAW == '%') { 6249 xmlParsePEReference(ctxt); 6250 } else 6251 xmlParseMarkupDecl(ctxt); 6252 6253 /* 6254 * Pop-up of finished entities. 6255 */ 6256 while ((RAW == 0) && (ctxt->inputNr > 1)) 6257 xmlPopInput(ctxt); 6258 6259 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 6260 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6261 break; 6262 } 6263 } 6264 if (xmlParserDebugEntities) { 6265 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6266 xmlGenericError(xmlGenericErrorContext, 6267 "%s(%d): ", ctxt->input->filename, 6268 ctxt->input->line); 6269 xmlGenericError(xmlGenericErrorContext, 6270 "Leaving INCLUDE Conditional Section\n"); 6271 } 6272 6273 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) { 6274 int state; 6275 xmlParserInputState instate; 6276 int depth = 0; 6277 6278 SKIP(6); 6279 SKIP_BLANKS; 6280 if (RAW != '[') { 6281 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6282 } else { 6283 if (ctxt->input->id != id) { 6284 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6285 "All markup of the conditional section is not in the same entity\n", 6286 NULL, NULL); 6287 } 6288 NEXT; 6289 } 6290 if (xmlParserDebugEntities) { 6291 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6292 xmlGenericError(xmlGenericErrorContext, 6293 "%s(%d): ", ctxt->input->filename, 6294 ctxt->input->line); 6295 xmlGenericError(xmlGenericErrorContext, 6296 "Entering IGNORE Conditional Section\n"); 6297 } 6298 6299 /* 6300 * Parse up to the end of the conditional section 6301 * But disable SAX event generating DTD building in the meantime 6302 */ 6303 state = ctxt->disableSAX; 6304 instate = ctxt->instate; 6305 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6306 ctxt->instate = XML_PARSER_IGNORE; 6307 6308 while ((depth >= 0) && (RAW != 0)) { 6309 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6310 depth++; 6311 SKIP(3); 6312 continue; 6313 } 6314 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { 6315 if (--depth >= 0) SKIP(3); 6316 continue; 6317 } 6318 NEXT; 6319 continue; 6320 } 6321 6322 ctxt->disableSAX = state; 6323 ctxt->instate = instate; 6324 6325 if (xmlParserDebugEntities) { 6326 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6327 xmlGenericError(xmlGenericErrorContext, 6328 "%s(%d): ", ctxt->input->filename, 6329 ctxt->input->line); 6330 xmlGenericError(xmlGenericErrorContext, 6331 "Leaving IGNORE Conditional Section\n"); 6332 } 6333 6334 } else { 6335 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL); 6336 } 6337 6338 if (RAW == 0) 6339 SHRINK; 6340 6341 if (RAW == 0) { 6342 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL); 6343 } else { 6344 if (ctxt->input->id != id) { 6345 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6346 "All markup of the conditional section is not in the same entity\n", 6347 NULL, NULL); 6348 } 6349 SKIP(3); 6350 } 6351} 6352 6353/** 6354 * xmlParseMarkupDecl: 6355 * @ctxt: an XML parser context 6356 * 6357 * parse Markup declarations 6358 * 6359 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | 6360 * NotationDecl | PI | Comment 6361 * 6362 * [ VC: Proper Declaration/PE Nesting ] 6363 * Parameter-entity replacement text must be properly nested with 6364 * markup declarations. That is to say, if either the first character 6365 * or the last character of a markup declaration (markupdecl above) is 6366 * contained in the replacement text for a parameter-entity reference, 6367 * both must be contained in the same replacement text. 6368 * 6369 * [ WFC: PEs in Internal Subset ] 6370 * In the internal DTD subset, parameter-entity references can occur 6371 * only where markup declarations can occur, not within markup declarations. 6372 * (This does not apply to references that occur in external parameter 6373 * entities or to the external subset.) 6374 */ 6375void 6376xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { 6377 GROW; 6378 if (CUR == '<') { 6379 if (NXT(1) == '!') { 6380 switch (NXT(2)) { 6381 case 'E': 6382 if (NXT(3) == 'L') 6383 xmlParseElementDecl(ctxt); 6384 else if (NXT(3) == 'N') 6385 xmlParseEntityDecl(ctxt); 6386 break; 6387 case 'A': 6388 xmlParseAttributeListDecl(ctxt); 6389 break; 6390 case 'N': 6391 xmlParseNotationDecl(ctxt); 6392 break; 6393 case '-': 6394 xmlParseComment(ctxt); 6395 break; 6396 default: 6397 /* there is an error but it will be detected later */ 6398 break; 6399 } 6400 } else if (NXT(1) == '?') { 6401 xmlParsePI(ctxt); 6402 } 6403 } 6404 /* 6405 * This is only for internal subset. On external entities, 6406 * the replacement is done before parsing stage 6407 */ 6408 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 6409 xmlParsePEReference(ctxt); 6410 6411 /* 6412 * Conditional sections are allowed from entities included 6413 * by PE References in the internal subset. 6414 */ 6415 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) { 6416 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6417 xmlParseConditionalSections(ctxt); 6418 } 6419 } 6420 6421 ctxt->instate = XML_PARSER_DTD; 6422} 6423 6424/** 6425 * xmlParseTextDecl: 6426 * @ctxt: an XML parser context 6427 * 6428 * parse an XML declaration header for external entities 6429 * 6430 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 6431 */ 6432 6433void 6434xmlParseTextDecl(xmlParserCtxtPtr ctxt) { 6435 xmlChar *version; 6436 const xmlChar *encoding; 6437 6438 /* 6439 * We know that '<?xml' is here. 6440 */ 6441 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 6442 SKIP(5); 6443 } else { 6444 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL); 6445 return; 6446 } 6447 6448 if (!IS_BLANK_CH(CUR)) { 6449 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6450 "Space needed after '<?xml'\n"); 6451 } 6452 SKIP_BLANKS; 6453 6454 /* 6455 * We may have the VersionInfo here. 6456 */ 6457 version = xmlParseVersionInfo(ctxt); 6458 if (version == NULL) 6459 version = xmlCharStrdup(XML_DEFAULT_VERSION); 6460 else { 6461 if (!IS_BLANK_CH(CUR)) { 6462 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6463 "Space needed here\n"); 6464 } 6465 } 6466 ctxt->input->version = version; 6467 6468 /* 6469 * We must have the encoding declaration 6470 */ 6471 encoding = xmlParseEncodingDecl(ctxt); 6472 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 6473 /* 6474 * The XML REC instructs us to stop parsing right here 6475 */ 6476 return; 6477 } 6478 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) { 6479 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING, 6480 "Missing encoding in text declaration\n"); 6481 } 6482 6483 SKIP_BLANKS; 6484 if ((RAW == '?') && (NXT(1) == '>')) { 6485 SKIP(2); 6486 } else if (RAW == '>') { 6487 /* Deprecated old WD ... */ 6488 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 6489 NEXT; 6490 } else { 6491 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 6492 MOVETO_ENDTAG(CUR_PTR); 6493 NEXT; 6494 } 6495} 6496 6497/** 6498 * xmlParseExternalSubset: 6499 * @ctxt: an XML parser context 6500 * @ExternalID: the external identifier 6501 * @SystemID: the system identifier (or URL) 6502 * 6503 * parse Markup declarations from an external subset 6504 * 6505 * [30] extSubset ::= textDecl? extSubsetDecl 6506 * 6507 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) * 6508 */ 6509void 6510xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, 6511 const xmlChar *SystemID) { 6512 xmlDetectSAX2(ctxt); 6513 GROW; 6514 6515 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) && 6516 (ctxt->input->end - ctxt->input->cur >= 4)) { 6517 xmlChar start[4]; 6518 xmlCharEncoding enc; 6519 6520 start[0] = RAW; 6521 start[1] = NXT(1); 6522 start[2] = NXT(2); 6523 start[3] = NXT(3); 6524 enc = xmlDetectCharEncoding(start, 4); 6525 if (enc != XML_CHAR_ENCODING_NONE) 6526 xmlSwitchEncoding(ctxt, enc); 6527 } 6528 6529 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) { 6530 xmlParseTextDecl(ctxt); 6531 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 6532 /* 6533 * The XML REC instructs us to stop parsing right here 6534 */ 6535 ctxt->instate = XML_PARSER_EOF; 6536 return; 6537 } 6538 } 6539 if (ctxt->myDoc == NULL) { 6540 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 6541 if (ctxt->myDoc == NULL) { 6542 xmlErrMemory(ctxt, "New Doc failed"); 6543 return; 6544 } 6545 ctxt->myDoc->properties = XML_DOC_INTERNAL; 6546 } 6547 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL)) 6548 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID); 6549 6550 ctxt->instate = XML_PARSER_DTD; 6551 ctxt->external = 1; 6552 while (((RAW == '<') && (NXT(1) == '?')) || 6553 ((RAW == '<') && (NXT(1) == '!')) || 6554 (RAW == '%') || IS_BLANK_CH(CUR)) { 6555 const xmlChar *check = CUR_PTR; 6556 unsigned int cons = ctxt->input->consumed; 6557 6558 GROW; 6559 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6560 xmlParseConditionalSections(ctxt); 6561 } else if (IS_BLANK_CH(CUR)) { 6562 NEXT; 6563 } else if (RAW == '%') { 6564 xmlParsePEReference(ctxt); 6565 } else 6566 xmlParseMarkupDecl(ctxt); 6567 6568 /* 6569 * Pop-up of finished entities. 6570 */ 6571 while ((RAW == 0) && (ctxt->inputNr > 1)) 6572 xmlPopInput(ctxt); 6573 6574 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 6575 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6576 break; 6577 } 6578 } 6579 6580 if (RAW != 0) { 6581 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6582 } 6583 6584} 6585 6586/** 6587 * xmlParseReference: 6588 * @ctxt: an XML parser context 6589 * 6590 * parse and handle entity references in content, depending on the SAX 6591 * interface, this may end-up in a call to character() if this is a 6592 * CharRef, a predefined entity, if there is no reference() callback. 6593 * or if the parser was asked to switch to that mode. 6594 * 6595 * [67] Reference ::= EntityRef | CharRef 6596 */ 6597void 6598xmlParseReference(xmlParserCtxtPtr ctxt) { 6599 xmlEntityPtr ent; 6600 xmlChar *val; 6601 int was_checked; 6602 xmlNodePtr list = NULL; 6603 xmlParserErrors ret = XML_ERR_OK; 6604 6605 6606 if (RAW != '&') 6607 return; 6608 6609 /* 6610 * Simple case of a CharRef 6611 */ 6612 if (NXT(1) == '#') { 6613 int i = 0; 6614 xmlChar out[10]; 6615 int hex = NXT(2); 6616 int value = xmlParseCharRef(ctxt); 6617 6618 if (value == 0) 6619 return; 6620 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) { 6621 /* 6622 * So we are using non-UTF-8 buffers 6623 * Check that the char fit on 8bits, if not 6624 * generate a CharRef. 6625 */ 6626 if (value <= 0xFF) { 6627 out[0] = value; 6628 out[1] = 0; 6629 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 6630 (!ctxt->disableSAX)) 6631 ctxt->sax->characters(ctxt->userData, out, 1); 6632 } else { 6633 if ((hex == 'x') || (hex == 'X')) 6634 snprintf((char *)out, sizeof(out), "#x%X", value); 6635 else 6636 snprintf((char *)out, sizeof(out), "#%d", value); 6637 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 6638 (!ctxt->disableSAX)) 6639 ctxt->sax->reference(ctxt->userData, out); 6640 } 6641 } else { 6642 /* 6643 * Just encode the value in UTF-8 6644 */ 6645 COPY_BUF(0 ,out, i, value); 6646 out[i] = 0; 6647 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 6648 (!ctxt->disableSAX)) 6649 ctxt->sax->characters(ctxt->userData, out, i); 6650 } 6651 return; 6652 } 6653 6654 /* 6655 * We are seeing an entity reference 6656 */ 6657 ent = xmlParseEntityRef(ctxt); 6658 if (ent == NULL) return; 6659 if (!ctxt->wellFormed) 6660 return; 6661 was_checked = ent->checked; 6662 6663 /* special case of predefined entities */ 6664 if ((ent->name == NULL) || 6665 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 6666 val = ent->content; 6667 if (val == NULL) return; 6668 /* 6669 * inline the entity. 6670 */ 6671 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 6672 (!ctxt->disableSAX)) 6673 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); 6674 return; 6675 } 6676 6677 /* 6678 * The first reference to the entity trigger a parsing phase 6679 * where the ent->children is filled with the result from 6680 * the parsing. 6681 */ 6682 if (ent->checked == 0) { 6683 unsigned long oldnbent = ctxt->nbentities; 6684 6685 /* 6686 * This is a bit hackish but this seems the best 6687 * way to make sure both SAX and DOM entity support 6688 * behaves okay. 6689 */ 6690 void *user_data; 6691 if (ctxt->userData == ctxt) 6692 user_data = NULL; 6693 else 6694 user_data = ctxt->userData; 6695 6696 /* 6697 * Check that this entity is well formed 6698 * 4.3.2: An internal general parsed entity is well-formed 6699 * if its replacement text matches the production labeled 6700 * content. 6701 */ 6702 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 6703 ctxt->depth++; 6704 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content, 6705 user_data, &list); 6706 ctxt->depth--; 6707 6708 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 6709 ctxt->depth++; 6710 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax, 6711 user_data, ctxt->depth, ent->URI, 6712 ent->ExternalID, &list); 6713 ctxt->depth--; 6714 } else { 6715 ret = XML_ERR_ENTITY_PE_INTERNAL; 6716 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 6717 "invalid entity type found\n", NULL); 6718 } 6719 6720 /* 6721 * Store the number of entities needing parsing for this entity 6722 * content and do checkings 6723 */ 6724 ent->checked = ctxt->nbentities - oldnbent; 6725 if (ret == XML_ERR_ENTITY_LOOP) { 6726 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 6727 xmlFreeNodeList(list); 6728 return; 6729 } 6730 if (xmlParserEntityCheck(ctxt, 0, ent)) { 6731 xmlFreeNodeList(list); 6732 return; 6733 } 6734 6735 if ((ret == XML_ERR_OK) && (list != NULL)) { 6736 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) || 6737 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&& 6738 (ent->children == NULL)) { 6739 ent->children = list; 6740 if (ctxt->replaceEntities) { 6741 /* 6742 * Prune it directly in the generated document 6743 * except for single text nodes. 6744 */ 6745 if (((list->type == XML_TEXT_NODE) && 6746 (list->next == NULL)) || 6747 (ctxt->parseMode == XML_PARSE_READER)) { 6748 list->parent = (xmlNodePtr) ent; 6749 list = NULL; 6750 ent->owner = 1; 6751 } else { 6752 ent->owner = 0; 6753 while (list != NULL) { 6754 list->parent = (xmlNodePtr) ctxt->node; 6755 list->doc = ctxt->myDoc; 6756 if (list->next == NULL) 6757 ent->last = list; 6758 list = list->next; 6759 } 6760 list = ent->children; 6761#ifdef LIBXML_LEGACY_ENABLED 6762 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 6763 xmlAddEntityReference(ent, list, NULL); 6764#endif /* LIBXML_LEGACY_ENABLED */ 6765 } 6766 } else { 6767 ent->owner = 1; 6768 while (list != NULL) { 6769 list->parent = (xmlNodePtr) ent; 6770 if (list->next == NULL) 6771 ent->last = list; 6772 list = list->next; 6773 } 6774 } 6775 } else { 6776 xmlFreeNodeList(list); 6777 list = NULL; 6778 } 6779 } else if ((ret != XML_ERR_OK) && 6780 (ret != XML_WAR_UNDECLARED_ENTITY)) { 6781 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 6782 "Entity '%s' failed to parse\n", ent->name); 6783 } else if (list != NULL) { 6784 xmlFreeNodeList(list); 6785 list = NULL; 6786 } 6787 if (ent->checked == 0) 6788 ent->checked = 1; 6789 } else if (ent->checked != 1) { 6790 ctxt->nbentities += ent->checked; 6791 } 6792 6793 /* 6794 * Now that the entity content has been gathered 6795 * provide it to the application, this can take different forms based 6796 * on the parsing modes. 6797 */ 6798 if (ent->children == NULL) { 6799 /* 6800 * Probably running in SAX mode and the callbacks don't 6801 * build the entity content. So unless we already went 6802 * though parsing for first checking go though the entity 6803 * content to generate callbacks associated to the entity 6804 */ 6805 if (was_checked != 0) { 6806 void *user_data; 6807 /* 6808 * This is a bit hackish but this seems the best 6809 * way to make sure both SAX and DOM entity support 6810 * behaves okay. 6811 */ 6812 if (ctxt->userData == ctxt) 6813 user_data = NULL; 6814 else 6815 user_data = ctxt->userData; 6816 6817 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 6818 ctxt->depth++; 6819 ret = xmlParseBalancedChunkMemoryInternal(ctxt, 6820 ent->content, user_data, NULL); 6821 ctxt->depth--; 6822 } else if (ent->etype == 6823 XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 6824 ctxt->depth++; 6825 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, 6826 ctxt->sax, user_data, ctxt->depth, 6827 ent->URI, ent->ExternalID, NULL); 6828 ctxt->depth--; 6829 } else { 6830 ret = XML_ERR_ENTITY_PE_INTERNAL; 6831 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 6832 "invalid entity type found\n", NULL); 6833 } 6834 if (ret == XML_ERR_ENTITY_LOOP) { 6835 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 6836 return; 6837 } 6838 } 6839 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 6840 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 6841 /* 6842 * Entity reference callback comes second, it's somewhat 6843 * superfluous but a compatibility to historical behaviour 6844 */ 6845 ctxt->sax->reference(ctxt->userData, ent->name); 6846 } 6847 return; 6848 } 6849 6850 /* 6851 * If we didn't get any children for the entity being built 6852 */ 6853 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 6854 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 6855 /* 6856 * Create a node. 6857 */ 6858 ctxt->sax->reference(ctxt->userData, ent->name); 6859 return; 6860 } 6861 6862 if ((ctxt->replaceEntities) || (ent->children == NULL)) { 6863 /* 6864 * There is a problem on the handling of _private for entities 6865 * (bug 155816): Should we copy the content of the field from 6866 * the entity (possibly overwriting some value set by the user 6867 * when a copy is created), should we leave it alone, or should 6868 * we try to take care of different situations? The problem 6869 * is exacerbated by the usage of this field by the xmlReader. 6870 * To fix this bug, we look at _private on the created node 6871 * and, if it's NULL, we copy in whatever was in the entity. 6872 * If it's not NULL we leave it alone. This is somewhat of a 6873 * hack - maybe we should have further tests to determine 6874 * what to do. 6875 */ 6876 if ((ctxt->node != NULL) && (ent->children != NULL)) { 6877 /* 6878 * Seems we are generating the DOM content, do 6879 * a simple tree copy for all references except the first 6880 * In the first occurrence list contains the replacement. 6881 * progressive == 2 means we are operating on the Reader 6882 * and since nodes are discarded we must copy all the time. 6883 */ 6884 if (((list == NULL) && (ent->owner == 0)) || 6885 (ctxt->parseMode == XML_PARSE_READER)) { 6886 xmlNodePtr nw = NULL, cur, firstChild = NULL; 6887 6888 /* 6889 * when operating on a reader, the entities definitions 6890 * are always owning the entities subtree. 6891 if (ctxt->parseMode == XML_PARSE_READER) 6892 ent->owner = 1; 6893 */ 6894 6895 cur = ent->children; 6896 while (cur != NULL) { 6897 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 6898 if (nw != NULL) { 6899 if (nw->_private == NULL) 6900 nw->_private = cur->_private; 6901 if (firstChild == NULL){ 6902 firstChild = nw; 6903 } 6904 nw = xmlAddChild(ctxt->node, nw); 6905 } 6906 if (cur == ent->last) { 6907 /* 6908 * needed to detect some strange empty 6909 * node cases in the reader tests 6910 */ 6911 if ((ctxt->parseMode == XML_PARSE_READER) && 6912 (nw != NULL) && 6913 (nw->type == XML_ELEMENT_NODE) && 6914 (nw->children == NULL)) 6915 nw->extra = 1; 6916 6917 break; 6918 } 6919 cur = cur->next; 6920 } 6921#ifdef LIBXML_LEGACY_ENABLED 6922 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 6923 xmlAddEntityReference(ent, firstChild, nw); 6924#endif /* LIBXML_LEGACY_ENABLED */ 6925 } else if (list == NULL) { 6926 xmlNodePtr nw = NULL, cur, next, last, 6927 firstChild = NULL; 6928 /* 6929 * Copy the entity child list and make it the new 6930 * entity child list. The goal is to make sure any 6931 * ID or REF referenced will be the one from the 6932 * document content and not the entity copy. 6933 */ 6934 cur = ent->children; 6935 ent->children = NULL; 6936 last = ent->last; 6937 ent->last = NULL; 6938 while (cur != NULL) { 6939 next = cur->next; 6940 cur->next = NULL; 6941 cur->parent = NULL; 6942 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 6943 if (nw != NULL) { 6944 if (nw->_private == NULL) 6945 nw->_private = cur->_private; 6946 if (firstChild == NULL){ 6947 firstChild = cur; 6948 } 6949 xmlAddChild((xmlNodePtr) ent, nw); 6950 xmlAddChild(ctxt->node, cur); 6951 } 6952 if (cur == last) 6953 break; 6954 cur = next; 6955 } 6956 if (ent->owner == 0) 6957 ent->owner = 1; 6958#ifdef LIBXML_LEGACY_ENABLED 6959 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 6960 xmlAddEntityReference(ent, firstChild, nw); 6961#endif /* LIBXML_LEGACY_ENABLED */ 6962 } else { 6963 const xmlChar *nbktext; 6964 6965 /* 6966 * the name change is to avoid coalescing of the 6967 * node with a possible previous text one which 6968 * would make ent->children a dangling pointer 6969 */ 6970 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext", 6971 -1); 6972 if (ent->children->type == XML_TEXT_NODE) 6973 ent->children->name = nbktext; 6974 if ((ent->last != ent->children) && 6975 (ent->last->type == XML_TEXT_NODE)) 6976 ent->last->name = nbktext; 6977 xmlAddChildList(ctxt->node, ent->children); 6978 } 6979 6980 /* 6981 * This is to avoid a nasty side effect, see 6982 * characters() in SAX.c 6983 */ 6984 ctxt->nodemem = 0; 6985 ctxt->nodelen = 0; 6986 return; 6987 } 6988 } 6989} 6990 6991/** 6992 * xmlParseEntityRef: 6993 * @ctxt: an XML parser context 6994 * 6995 * parse ENTITY references declarations 6996 * 6997 * [68] EntityRef ::= '&' Name ';' 6998 * 6999 * [ WFC: Entity Declared ] 7000 * In a document without any DTD, a document with only an internal DTD 7001 * subset which contains no parameter entity references, or a document 7002 * with "standalone='yes'", the Name given in the entity reference 7003 * must match that in an entity declaration, except that well-formed 7004 * documents need not declare any of the following entities: amp, lt, 7005 * gt, apos, quot. The declaration of a parameter entity must precede 7006 * any reference to it. Similarly, the declaration of a general entity 7007 * must precede any reference to it which appears in a default value in an 7008 * attribute-list declaration. Note that if entities are declared in the 7009 * external subset or in external parameter entities, a non-validating 7010 * processor is not obligated to read and process their declarations; 7011 * for such documents, the rule that an entity must be declared is a 7012 * well-formedness constraint only if standalone='yes'. 7013 * 7014 * [ WFC: Parsed Entity ] 7015 * An entity reference must not contain the name of an unparsed entity 7016 * 7017 * Returns the xmlEntityPtr if found, or NULL otherwise. 7018 */ 7019xmlEntityPtr 7020xmlParseEntityRef(xmlParserCtxtPtr ctxt) { 7021 const xmlChar *name; 7022 xmlEntityPtr ent = NULL; 7023 7024 GROW; 7025 7026 if (RAW != '&') 7027 return(NULL); 7028 NEXT; 7029 name = xmlParseName(ctxt); 7030 if (name == NULL) { 7031 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7032 "xmlParseEntityRef: no name\n"); 7033 return(NULL); 7034 } 7035 if (RAW != ';') { 7036 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7037 return(NULL); 7038 } 7039 NEXT; 7040 7041 /* 7042 * Predefined entites override any extra definition 7043 */ 7044 ent = xmlGetPredefinedEntity(name); 7045 if (ent != NULL) 7046 return(ent); 7047 7048 /* 7049 * Increate the number of entity references parsed 7050 */ 7051 ctxt->nbentities++; 7052 7053 /* 7054 * Ask first SAX for entity resolution, otherwise try the 7055 * entities which may have stored in the parser context. 7056 */ 7057 if (ctxt->sax != NULL) { 7058 if (ctxt->sax->getEntity != NULL) 7059 ent = ctxt->sax->getEntity(ctxt->userData, name); 7060 if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7061 (ctxt->userData==ctxt)) { 7062 ent = xmlSAX2GetEntity(ctxt, name); 7063 } 7064 } 7065 /* 7066 * [ WFC: Entity Declared ] 7067 * In a document without any DTD, a document with only an 7068 * internal DTD subset which contains no parameter entity 7069 * references, or a document with "standalone='yes'", the 7070 * Name given in the entity reference must match that in an 7071 * entity declaration, except that well-formed documents 7072 * need not declare any of the following entities: amp, lt, 7073 * gt, apos, quot. 7074 * The declaration of a parameter entity must precede any 7075 * reference to it. 7076 * Similarly, the declaration of a general entity must 7077 * precede any reference to it which appears in a default 7078 * value in an attribute-list declaration. Note that if 7079 * entities are declared in the external subset or in 7080 * external parameter entities, a non-validating processor 7081 * is not obligated to read and process their declarations; 7082 * for such documents, the rule that an entity must be 7083 * declared is a well-formedness constraint only if 7084 * standalone='yes'. 7085 */ 7086 if (ent == NULL) { 7087 if ((ctxt->standalone == 1) || 7088 ((ctxt->hasExternalSubset == 0) && 7089 (ctxt->hasPErefs == 0))) { 7090 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7091 "Entity '%s' not defined\n", name); 7092 } else { 7093 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7094 "Entity '%s' not defined\n", name); 7095 if ((ctxt->inSubset == 0) && 7096 (ctxt->sax != NULL) && 7097 (ctxt->sax->reference != NULL)) { 7098 ctxt->sax->reference(ctxt->userData, name); 7099 } 7100 } 7101 ctxt->valid = 0; 7102 } 7103 7104 /* 7105 * [ WFC: Parsed Entity ] 7106 * An entity reference must not contain the name of an 7107 * unparsed entity 7108 */ 7109 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7110 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7111 "Entity reference to unparsed entity %s\n", name); 7112 } 7113 7114 /* 7115 * [ WFC: No External Entity References ] 7116 * Attribute values cannot contain direct or indirect 7117 * entity references to external entities. 7118 */ 7119 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7120 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7121 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7122 "Attribute references external entity '%s'\n", name); 7123 } 7124 /* 7125 * [ WFC: No < in Attribute Values ] 7126 * The replacement text of any entity referred to directly or 7127 * indirectly in an attribute value (other than "<") must 7128 * not contain a <. 7129 */ 7130 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7131 (ent != NULL) && (ent->content != NULL) && 7132 (xmlStrchr(ent->content, '<'))) { 7133 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7134 "'<' in entity '%s' is not allowed in attributes values\n", name); 7135 } 7136 7137 /* 7138 * Internal check, no parameter entities here ... 7139 */ 7140 else { 7141 switch (ent->etype) { 7142 case XML_INTERNAL_PARAMETER_ENTITY: 7143 case XML_EXTERNAL_PARAMETER_ENTITY: 7144 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 7145 "Attempt to reference the parameter entity '%s'\n", 7146 name); 7147 break; 7148 default: 7149 break; 7150 } 7151 } 7152 7153 /* 7154 * [ WFC: No Recursion ] 7155 * A parsed entity must not contain a recursive reference 7156 * to itself, either directly or indirectly. 7157 * Done somewhere else 7158 */ 7159 return(ent); 7160} 7161 7162/** 7163 * xmlParseStringEntityRef: 7164 * @ctxt: an XML parser context 7165 * @str: a pointer to an index in the string 7166 * 7167 * parse ENTITY references declarations, but this version parses it from 7168 * a string value. 7169 * 7170 * [68] EntityRef ::= '&' Name ';' 7171 * 7172 * [ WFC: Entity Declared ] 7173 * In a document without any DTD, a document with only an internal DTD 7174 * subset which contains no parameter entity references, or a document 7175 * with "standalone='yes'", the Name given in the entity reference 7176 * must match that in an entity declaration, except that well-formed 7177 * documents need not declare any of the following entities: amp, lt, 7178 * gt, apos, quot. The declaration of a parameter entity must precede 7179 * any reference to it. Similarly, the declaration of a general entity 7180 * must precede any reference to it which appears in a default value in an 7181 * attribute-list declaration. Note that if entities are declared in the 7182 * external subset or in external parameter entities, a non-validating 7183 * processor is not obligated to read and process their declarations; 7184 * for such documents, the rule that an entity must be declared is a 7185 * well-formedness constraint only if standalone='yes'. 7186 * 7187 * [ WFC: Parsed Entity ] 7188 * An entity reference must not contain the name of an unparsed entity 7189 * 7190 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer 7191 * is updated to the current location in the string. 7192 */ 7193xmlEntityPtr 7194xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { 7195 xmlChar *name; 7196 const xmlChar *ptr; 7197 xmlChar cur; 7198 xmlEntityPtr ent = NULL; 7199 7200 if ((str == NULL) || (*str == NULL)) 7201 return(NULL); 7202 ptr = *str; 7203 cur = *ptr; 7204 if (cur != '&') 7205 return(NULL); 7206 7207 ptr++; 7208 cur = *ptr; 7209 name = xmlParseStringName(ctxt, &ptr); 7210 if (name == NULL) { 7211 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7212 "xmlParseStringEntityRef: no name\n"); 7213 *str = ptr; 7214 return(NULL); 7215 } 7216 if (*ptr != ';') { 7217 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7218 xmlFree(name); 7219 *str = ptr; 7220 return(NULL); 7221 } 7222 ptr++; 7223 7224 7225 /* 7226 * Predefined entites override any extra definition 7227 */ 7228 ent = xmlGetPredefinedEntity(name); 7229 if (ent != NULL) { 7230 xmlFree(name); 7231 *str = ptr; 7232 return(ent); 7233 } 7234 7235 /* 7236 * Increate the number of entity references parsed 7237 */ 7238 ctxt->nbentities++; 7239 7240 /* 7241 * Ask first SAX for entity resolution, otherwise try the 7242 * entities which may have stored in the parser context. 7243 */ 7244 if (ctxt->sax != NULL) { 7245 if (ctxt->sax->getEntity != NULL) 7246 ent = ctxt->sax->getEntity(ctxt->userData, name); 7247 if ((ent == NULL) && (ctxt->userData==ctxt)) { 7248 ent = xmlSAX2GetEntity(ctxt, name); 7249 } 7250 } 7251 7252 /* 7253 * [ WFC: Entity Declared ] 7254 * In a document without any DTD, a document with only an 7255 * internal DTD subset which contains no parameter entity 7256 * references, or a document with "standalone='yes'", the 7257 * Name given in the entity reference must match that in an 7258 * entity declaration, except that well-formed documents 7259 * need not declare any of the following entities: amp, lt, 7260 * gt, apos, quot. 7261 * The declaration of a parameter entity must precede any 7262 * reference to it. 7263 * Similarly, the declaration of a general entity must 7264 * precede any reference to it which appears in a default 7265 * value in an attribute-list declaration. Note that if 7266 * entities are declared in the external subset or in 7267 * external parameter entities, a non-validating processor 7268 * is not obligated to read and process their declarations; 7269 * for such documents, the rule that an entity must be 7270 * declared is a well-formedness constraint only if 7271 * standalone='yes'. 7272 */ 7273 if (ent == NULL) { 7274 if ((ctxt->standalone == 1) || 7275 ((ctxt->hasExternalSubset == 0) && 7276 (ctxt->hasPErefs == 0))) { 7277 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7278 "Entity '%s' not defined\n", name); 7279 } else { 7280 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7281 "Entity '%s' not defined\n", 7282 name); 7283 } 7284 /* TODO ? check regressions ctxt->valid = 0; */ 7285 } 7286 7287 /* 7288 * [ WFC: Parsed Entity ] 7289 * An entity reference must not contain the name of an 7290 * unparsed entity 7291 */ 7292 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7293 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7294 "Entity reference to unparsed entity %s\n", name); 7295 } 7296 7297 /* 7298 * [ WFC: No External Entity References ] 7299 * Attribute values cannot contain direct or indirect 7300 * entity references to external entities. 7301 */ 7302 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7303 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7304 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7305 "Attribute references external entity '%s'\n", name); 7306 } 7307 /* 7308 * [ WFC: No < in Attribute Values ] 7309 * The replacement text of any entity referred to directly or 7310 * indirectly in an attribute value (other than "<") must 7311 * not contain a <. 7312 */ 7313 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7314 (ent != NULL) && (ent->content != NULL) && 7315 (xmlStrchr(ent->content, '<'))) { 7316 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7317 "'<' in entity '%s' is not allowed in attributes values\n", 7318 name); 7319 } 7320 7321 /* 7322 * Internal check, no parameter entities here ... 7323 */ 7324 else { 7325 switch (ent->etype) { 7326 case XML_INTERNAL_PARAMETER_ENTITY: 7327 case XML_EXTERNAL_PARAMETER_ENTITY: 7328 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 7329 "Attempt to reference the parameter entity '%s'\n", 7330 name); 7331 break; 7332 default: 7333 break; 7334 } 7335 } 7336 7337 /* 7338 * [ WFC: No Recursion ] 7339 * A parsed entity must not contain a recursive reference 7340 * to itself, either directly or indirectly. 7341 * Done somewhere else 7342 */ 7343 7344 xmlFree(name); 7345 *str = ptr; 7346 return(ent); 7347} 7348 7349/** 7350 * xmlParsePEReference: 7351 * @ctxt: an XML parser context 7352 * 7353 * parse PEReference declarations 7354 * The entity content is handled directly by pushing it's content as 7355 * a new input stream. 7356 * 7357 * [69] PEReference ::= '%' Name ';' 7358 * 7359 * [ WFC: No Recursion ] 7360 * A parsed entity must not contain a recursive 7361 * reference to itself, either directly or indirectly. 7362 * 7363 * [ WFC: Entity Declared ] 7364 * In a document without any DTD, a document with only an internal DTD 7365 * subset which contains no parameter entity references, or a document 7366 * with "standalone='yes'", ... ... The declaration of a parameter 7367 * entity must precede any reference to it... 7368 * 7369 * [ VC: Entity Declared ] 7370 * In a document with an external subset or external parameter entities 7371 * with "standalone='no'", ... ... The declaration of a parameter entity 7372 * must precede any reference to it... 7373 * 7374 * [ WFC: In DTD ] 7375 * Parameter-entity references may only appear in the DTD. 7376 * NOTE: misleading but this is handled. 7377 */ 7378void 7379xmlParsePEReference(xmlParserCtxtPtr ctxt) 7380{ 7381 const xmlChar *name; 7382 xmlEntityPtr entity = NULL; 7383 xmlParserInputPtr input; 7384 7385 if (RAW != '%') 7386 return; 7387 NEXT; 7388 name = xmlParseName(ctxt); 7389 if (name == NULL) { 7390 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7391 "xmlParsePEReference: no name\n"); 7392 return; 7393 } 7394 if (RAW != ';') { 7395 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7396 return; 7397 } 7398 7399 NEXT; 7400 7401 /* 7402 * Increate the number of entity references parsed 7403 */ 7404 ctxt->nbentities++; 7405 7406 /* 7407 * Request the entity from SAX 7408 */ 7409 if ((ctxt->sax != NULL) && 7410 (ctxt->sax->getParameterEntity != NULL)) 7411 entity = ctxt->sax->getParameterEntity(ctxt->userData, 7412 name); 7413 if (entity == NULL) { 7414 /* 7415 * [ WFC: Entity Declared ] 7416 * In a document without any DTD, a document with only an 7417 * internal DTD subset which contains no parameter entity 7418 * references, or a document with "standalone='yes'", ... 7419 * ... The declaration of a parameter entity must precede 7420 * any reference to it... 7421 */ 7422 if ((ctxt->standalone == 1) || 7423 ((ctxt->hasExternalSubset == 0) && 7424 (ctxt->hasPErefs == 0))) { 7425 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7426 "PEReference: %%%s; not found\n", 7427 name); 7428 } else { 7429 /* 7430 * [ VC: Entity Declared ] 7431 * In a document with an external subset or external 7432 * parameter entities with "standalone='no'", ... 7433 * ... The declaration of a parameter entity must 7434 * precede any reference to it... 7435 */ 7436 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7437 "PEReference: %%%s; not found\n", 7438 name, NULL); 7439 ctxt->valid = 0; 7440 } 7441 } else { 7442 /* 7443 * Internal checking in case the entity quest barfed 7444 */ 7445 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 7446 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 7447 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7448 "Internal: %%%s; is not a parameter entity\n", 7449 name, NULL); 7450 } else if (ctxt->input->free != deallocblankswrapper) { 7451 input = xmlNewBlanksWrapperInputStream(ctxt, entity); 7452 if (xmlPushInput(ctxt, input) < 0) 7453 return; 7454 } else { 7455 /* 7456 * TODO !!! 7457 * handle the extra spaces added before and after 7458 * c.f. http://www.w3.org/TR/REC-xml#as-PE 7459 */ 7460 input = xmlNewEntityInputStream(ctxt, entity); 7461 if (xmlPushInput(ctxt, input) < 0) 7462 return; 7463 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 7464 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && 7465 (IS_BLANK_CH(NXT(5)))) { 7466 xmlParseTextDecl(ctxt); 7467 if (ctxt->errNo == 7468 XML_ERR_UNSUPPORTED_ENCODING) { 7469 /* 7470 * The XML REC instructs us to stop parsing 7471 * right here 7472 */ 7473 ctxt->instate = XML_PARSER_EOF; 7474 return; 7475 } 7476 } 7477 } 7478 } 7479 ctxt->hasPErefs = 1; 7480} 7481 7482/** 7483 * xmlLoadEntityContent: 7484 * @ctxt: an XML parser context 7485 * @entity: an unloaded system entity 7486 * 7487 * Load the original content of the given system entity from the 7488 * ExternalID/SystemID given. This is to be used for Included in Literal 7489 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references 7490 * 7491 * Returns 0 in case of success and -1 in case of failure 7492 */ 7493static int 7494xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 7495 xmlParserInputPtr input; 7496 xmlBufferPtr buf; 7497 int l, c; 7498 int count = 0; 7499 7500 if ((ctxt == NULL) || (entity == NULL) || 7501 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) && 7502 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) || 7503 (entity->content != NULL)) { 7504 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 7505 "xmlLoadEntityContent parameter error"); 7506 return(-1); 7507 } 7508 7509 if (xmlParserDebugEntities) 7510 xmlGenericError(xmlGenericErrorContext, 7511 "Reading %s entity content input\n", entity->name); 7512 7513 buf = xmlBufferCreate(); 7514 if (buf == NULL) { 7515 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 7516 "xmlLoadEntityContent parameter error"); 7517 return(-1); 7518 } 7519 7520 input = xmlNewEntityInputStream(ctxt, entity); 7521 if (input == NULL) { 7522 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 7523 "xmlLoadEntityContent input error"); 7524 xmlBufferFree(buf); 7525 return(-1); 7526 } 7527 7528 /* 7529 * Push the entity as the current input, read char by char 7530 * saving to the buffer until the end of the entity or an error 7531 */ 7532 if (xmlPushInput(ctxt, input) < 0) { 7533 xmlBufferFree(buf); 7534 return(-1); 7535 } 7536 7537 GROW; 7538 c = CUR_CHAR(l); 7539 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) && 7540 (IS_CHAR(c))) { 7541 xmlBufferAdd(buf, ctxt->input->cur, l); 7542 if (count++ > 100) { 7543 count = 0; 7544 GROW; 7545 } 7546 NEXTL(l); 7547 c = CUR_CHAR(l); 7548 } 7549 7550 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) { 7551 xmlPopInput(ctxt); 7552 } else if (!IS_CHAR(c)) { 7553 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 7554 "xmlLoadEntityContent: invalid char value %d\n", 7555 c); 7556 xmlBufferFree(buf); 7557 return(-1); 7558 } 7559 entity->content = buf->content; 7560 buf->content = NULL; 7561 xmlBufferFree(buf); 7562 7563 return(0); 7564} 7565 7566/** 7567 * xmlParseStringPEReference: 7568 * @ctxt: an XML parser context 7569 * @str: a pointer to an index in the string 7570 * 7571 * parse PEReference declarations 7572 * 7573 * [69] PEReference ::= '%' Name ';' 7574 * 7575 * [ WFC: No Recursion ] 7576 * A parsed entity must not contain a recursive 7577 * reference to itself, either directly or indirectly. 7578 * 7579 * [ WFC: Entity Declared ] 7580 * In a document without any DTD, a document with only an internal DTD 7581 * subset which contains no parameter entity references, or a document 7582 * with "standalone='yes'", ... ... The declaration of a parameter 7583 * entity must precede any reference to it... 7584 * 7585 * [ VC: Entity Declared ] 7586 * In a document with an external subset or external parameter entities 7587 * with "standalone='no'", ... ... The declaration of a parameter entity 7588 * must precede any reference to it... 7589 * 7590 * [ WFC: In DTD ] 7591 * Parameter-entity references may only appear in the DTD. 7592 * NOTE: misleading but this is handled. 7593 * 7594 * Returns the string of the entity content. 7595 * str is updated to the current value of the index 7596 */ 7597xmlEntityPtr 7598xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { 7599 const xmlChar *ptr; 7600 xmlChar cur; 7601 xmlChar *name; 7602 xmlEntityPtr entity = NULL; 7603 7604 if ((str == NULL) || (*str == NULL)) return(NULL); 7605 ptr = *str; 7606 cur = *ptr; 7607 if (cur != '%') 7608 return(NULL); 7609 ptr++; 7610 cur = *ptr; 7611 name = xmlParseStringName(ctxt, &ptr); 7612 if (name == NULL) { 7613 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7614 "xmlParseStringPEReference: no name\n"); 7615 *str = ptr; 7616 return(NULL); 7617 } 7618 cur = *ptr; 7619 if (cur != ';') { 7620 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7621 xmlFree(name); 7622 *str = ptr; 7623 return(NULL); 7624 } 7625 ptr++; 7626 7627 /* 7628 * Increate the number of entity references parsed 7629 */ 7630 ctxt->nbentities++; 7631 7632 /* 7633 * Request the entity from SAX 7634 */ 7635 if ((ctxt->sax != NULL) && 7636 (ctxt->sax->getParameterEntity != NULL)) 7637 entity = ctxt->sax->getParameterEntity(ctxt->userData, 7638 name); 7639 if (entity == NULL) { 7640 /* 7641 * [ WFC: Entity Declared ] 7642 * In a document without any DTD, a document with only an 7643 * internal DTD subset which contains no parameter entity 7644 * references, or a document with "standalone='yes'", ... 7645 * ... The declaration of a parameter entity must precede 7646 * any reference to it... 7647 */ 7648 if ((ctxt->standalone == 1) || 7649 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) { 7650 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7651 "PEReference: %%%s; not found\n", name); 7652 } else { 7653 /* 7654 * [ VC: Entity Declared ] 7655 * In a document with an external subset or external 7656 * parameter entities with "standalone='no'", ... 7657 * ... The declaration of a parameter entity must 7658 * precede any reference to it... 7659 */ 7660 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7661 "PEReference: %%%s; not found\n", 7662 name, NULL); 7663 ctxt->valid = 0; 7664 } 7665 } else { 7666 /* 7667 * Internal checking in case the entity quest barfed 7668 */ 7669 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 7670 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 7671 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7672 "%%%s; is not a parameter entity\n", 7673 name, NULL); 7674 } 7675 } 7676 ctxt->hasPErefs = 1; 7677 xmlFree(name); 7678 *str = ptr; 7679 return(entity); 7680} 7681 7682/** 7683 * xmlParseDocTypeDecl: 7684 * @ctxt: an XML parser context 7685 * 7686 * parse a DOCTYPE declaration 7687 * 7688 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 7689 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 7690 * 7691 * [ VC: Root Element Type ] 7692 * The Name in the document type declaration must match the element 7693 * type of the root element. 7694 */ 7695 7696void 7697xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { 7698 const xmlChar *name = NULL; 7699 xmlChar *ExternalID = NULL; 7700 xmlChar *URI = NULL; 7701 7702 /* 7703 * We know that '<!DOCTYPE' has been detected. 7704 */ 7705 SKIP(9); 7706 7707 SKIP_BLANKS; 7708 7709 /* 7710 * Parse the DOCTYPE name. 7711 */ 7712 name = xmlParseName(ctxt); 7713 if (name == NULL) { 7714 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7715 "xmlParseDocTypeDecl : no DOCTYPE name !\n"); 7716 } 7717 ctxt->intSubName = name; 7718 7719 SKIP_BLANKS; 7720 7721 /* 7722 * Check for SystemID and ExternalID 7723 */ 7724 URI = xmlParseExternalID(ctxt, &ExternalID, 1); 7725 7726 if ((URI != NULL) || (ExternalID != NULL)) { 7727 ctxt->hasExternalSubset = 1; 7728 } 7729 ctxt->extSubURI = URI; 7730 ctxt->extSubSystem = ExternalID; 7731 7732 SKIP_BLANKS; 7733 7734 /* 7735 * Create and update the internal subset. 7736 */ 7737 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && 7738 (!ctxt->disableSAX)) 7739 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); 7740 7741 /* 7742 * Is there any internal subset declarations ? 7743 * they are handled separately in xmlParseInternalSubset() 7744 */ 7745 if (RAW == '[') 7746 return; 7747 7748 /* 7749 * We should be at the end of the DOCTYPE declaration. 7750 */ 7751 if (RAW != '>') { 7752 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 7753 } 7754 NEXT; 7755} 7756 7757/** 7758 * xmlParseInternalSubset: 7759 * @ctxt: an XML parser context 7760 * 7761 * parse the internal subset declaration 7762 * 7763 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 7764 */ 7765 7766static void 7767xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { 7768 /* 7769 * Is there any DTD definition ? 7770 */ 7771 if (RAW == '[') { 7772 ctxt->instate = XML_PARSER_DTD; 7773 NEXT; 7774 /* 7775 * Parse the succession of Markup declarations and 7776 * PEReferences. 7777 * Subsequence (markupdecl | PEReference | S)* 7778 */ 7779 while (RAW != ']') { 7780 const xmlChar *check = CUR_PTR; 7781 unsigned int cons = ctxt->input->consumed; 7782 7783 SKIP_BLANKS; 7784 xmlParseMarkupDecl(ctxt); 7785 xmlParsePEReference(ctxt); 7786 7787 /* 7788 * Pop-up of finished entities. 7789 */ 7790 while ((RAW == 0) && (ctxt->inputNr > 1)) 7791 xmlPopInput(ctxt); 7792 7793 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 7794 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 7795 "xmlParseInternalSubset: error detected in Markup declaration\n"); 7796 break; 7797 } 7798 } 7799 if (RAW == ']') { 7800 NEXT; 7801 SKIP_BLANKS; 7802 } 7803 } 7804 7805 /* 7806 * We should be at the end of the DOCTYPE declaration. 7807 */ 7808 if (RAW != '>') { 7809 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 7810 } 7811 NEXT; 7812} 7813 7814#ifdef LIBXML_SAX1_ENABLED 7815/** 7816 * xmlParseAttribute: 7817 * @ctxt: an XML parser context 7818 * @value: a xmlChar ** used to store the value of the attribute 7819 * 7820 * parse an attribute 7821 * 7822 * [41] Attribute ::= Name Eq AttValue 7823 * 7824 * [ WFC: No External Entity References ] 7825 * Attribute values cannot contain direct or indirect entity references 7826 * to external entities. 7827 * 7828 * [ WFC: No < in Attribute Values ] 7829 * The replacement text of any entity referred to directly or indirectly in 7830 * an attribute value (other than "<") must not contain a <. 7831 * 7832 * [ VC: Attribute Value Type ] 7833 * The attribute must have been declared; the value must be of the type 7834 * declared for it. 7835 * 7836 * [25] Eq ::= S? '=' S? 7837 * 7838 * With namespace: 7839 * 7840 * [NS 11] Attribute ::= QName Eq AttValue 7841 * 7842 * Also the case QName == xmlns:??? is handled independently as a namespace 7843 * definition. 7844 * 7845 * Returns the attribute name, and the value in *value. 7846 */ 7847 7848const xmlChar * 7849xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { 7850 const xmlChar *name; 7851 xmlChar *val; 7852 7853 *value = NULL; 7854 GROW; 7855 name = xmlParseName(ctxt); 7856 if (name == NULL) { 7857 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7858 "error parsing attribute name\n"); 7859 return(NULL); 7860 } 7861 7862 /* 7863 * read the value 7864 */ 7865 SKIP_BLANKS; 7866 if (RAW == '=') { 7867 NEXT; 7868 SKIP_BLANKS; 7869 val = xmlParseAttValue(ctxt); 7870 ctxt->instate = XML_PARSER_CONTENT; 7871 } else { 7872 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 7873 "Specification mandate value for attribute %s\n", name); 7874 return(NULL); 7875 } 7876 7877 /* 7878 * Check that xml:lang conforms to the specification 7879 * No more registered as an error, just generate a warning now 7880 * since this was deprecated in XML second edition 7881 */ 7882 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) { 7883 if (!xmlCheckLanguageID(val)) { 7884 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 7885 "Malformed value for xml:lang : %s\n", 7886 val, NULL); 7887 } 7888 } 7889 7890 /* 7891 * Check that xml:space conforms to the specification 7892 */ 7893 if (xmlStrEqual(name, BAD_CAST "xml:space")) { 7894 if (xmlStrEqual(val, BAD_CAST "default")) 7895 *(ctxt->space) = 0; 7896 else if (xmlStrEqual(val, BAD_CAST "preserve")) 7897 *(ctxt->space) = 1; 7898 else { 7899 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 7900"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 7901 val, NULL); 7902 } 7903 } 7904 7905 *value = val; 7906 return(name); 7907} 7908 7909/** 7910 * xmlParseStartTag: 7911 * @ctxt: an XML parser context 7912 * 7913 * parse a start of tag either for rule element or 7914 * EmptyElement. In both case we don't parse the tag closing chars. 7915 * 7916 * [40] STag ::= '<' Name (S Attribute)* S? '>' 7917 * 7918 * [ WFC: Unique Att Spec ] 7919 * No attribute name may appear more than once in the same start-tag or 7920 * empty-element tag. 7921 * 7922 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 7923 * 7924 * [ WFC: Unique Att Spec ] 7925 * No attribute name may appear more than once in the same start-tag or 7926 * empty-element tag. 7927 * 7928 * With namespace: 7929 * 7930 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 7931 * 7932 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 7933 * 7934 * Returns the element name parsed 7935 */ 7936 7937const xmlChar * 7938xmlParseStartTag(xmlParserCtxtPtr ctxt) { 7939 const xmlChar *name; 7940 const xmlChar *attname; 7941 xmlChar *attvalue; 7942 const xmlChar **atts = ctxt->atts; 7943 int nbatts = 0; 7944 int maxatts = ctxt->maxatts; 7945 int i; 7946 7947 if (RAW != '<') return(NULL); 7948 NEXT1; 7949 7950 name = xmlParseName(ctxt); 7951 if (name == NULL) { 7952 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7953 "xmlParseStartTag: invalid element name\n"); 7954 return(NULL); 7955 } 7956 7957 /* 7958 * Now parse the attributes, it ends up with the ending 7959 * 7960 * (S Attribute)* S? 7961 */ 7962 SKIP_BLANKS; 7963 GROW; 7964 7965 while ((RAW != '>') && 7966 ((RAW != '/') || (NXT(1) != '>')) && 7967 (IS_BYTE_CHAR(RAW))) { 7968 const xmlChar *q = CUR_PTR; 7969 unsigned int cons = ctxt->input->consumed; 7970 7971 attname = xmlParseAttribute(ctxt, &attvalue); 7972 if ((attname != NULL) && (attvalue != NULL)) { 7973 /* 7974 * [ WFC: Unique Att Spec ] 7975 * No attribute name may appear more than once in the same 7976 * start-tag or empty-element tag. 7977 */ 7978 for (i = 0; i < nbatts;i += 2) { 7979 if (xmlStrEqual(atts[i], attname)) { 7980 xmlErrAttributeDup(ctxt, NULL, attname); 7981 xmlFree(attvalue); 7982 goto failed; 7983 } 7984 } 7985 /* 7986 * Add the pair to atts 7987 */ 7988 if (atts == NULL) { 7989 maxatts = 22; /* allow for 10 attrs by default */ 7990 atts = (const xmlChar **) 7991 xmlMalloc(maxatts * sizeof(xmlChar *)); 7992 if (atts == NULL) { 7993 xmlErrMemory(ctxt, NULL); 7994 if (attvalue != NULL) 7995 xmlFree(attvalue); 7996 goto failed; 7997 } 7998 ctxt->atts = atts; 7999 ctxt->maxatts = maxatts; 8000 } else if (nbatts + 4 > maxatts) { 8001 const xmlChar **n; 8002 8003 maxatts *= 2; 8004 n = (const xmlChar **) xmlRealloc((void *) atts, 8005 maxatts * sizeof(const xmlChar *)); 8006 if (n == NULL) { 8007 xmlErrMemory(ctxt, NULL); 8008 if (attvalue != NULL) 8009 xmlFree(attvalue); 8010 goto failed; 8011 } 8012 atts = n; 8013 ctxt->atts = atts; 8014 ctxt->maxatts = maxatts; 8015 } 8016 atts[nbatts++] = attname; 8017 atts[nbatts++] = attvalue; 8018 atts[nbatts] = NULL; 8019 atts[nbatts + 1] = NULL; 8020 } else { 8021 if (attvalue != NULL) 8022 xmlFree(attvalue); 8023 } 8024 8025failed: 8026 8027 GROW 8028 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 8029 break; 8030 if (!IS_BLANK_CH(RAW)) { 8031 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 8032 "attributes construct error\n"); 8033 } 8034 SKIP_BLANKS; 8035 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 8036 (attname == NULL) && (attvalue == NULL)) { 8037 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 8038 "xmlParseStartTag: problem parsing attributes\n"); 8039 break; 8040 } 8041 SHRINK; 8042 GROW; 8043 } 8044 8045 /* 8046 * SAX: Start of Element ! 8047 */ 8048 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) && 8049 (!ctxt->disableSAX)) { 8050 if (nbatts > 0) 8051 ctxt->sax->startElement(ctxt->userData, name, atts); 8052 else 8053 ctxt->sax->startElement(ctxt->userData, name, NULL); 8054 } 8055 8056 if (atts != NULL) { 8057 /* Free only the content strings */ 8058 for (i = 1;i < nbatts;i+=2) 8059 if (atts[i] != NULL) 8060 xmlFree((xmlChar *) atts[i]); 8061 } 8062 return(name); 8063} 8064 8065/** 8066 * xmlParseEndTag1: 8067 * @ctxt: an XML parser context 8068 * @line: line of the start tag 8069 * @nsNr: number of namespaces on the start tag 8070 * 8071 * parse an end of tag 8072 * 8073 * [42] ETag ::= '</' Name S? '>' 8074 * 8075 * With namespace 8076 * 8077 * [NS 9] ETag ::= '</' QName S? '>' 8078 */ 8079 8080static void 8081xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) { 8082 const xmlChar *name; 8083 8084 GROW; 8085 if ((RAW != '<') || (NXT(1) != '/')) { 8086 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED, 8087 "xmlParseEndTag: '</' not found\n"); 8088 return; 8089 } 8090 SKIP(2); 8091 8092 name = xmlParseNameAndCompare(ctxt,ctxt->name); 8093 8094 /* 8095 * We should definitely be at the ending "S? '>'" part 8096 */ 8097 GROW; 8098 SKIP_BLANKS; 8099 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 8100 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 8101 } else 8102 NEXT1; 8103 8104 /* 8105 * [ WFC: Element Type Match ] 8106 * The Name in an element's end-tag must match the element type in the 8107 * start-tag. 8108 * 8109 */ 8110 if (name != (xmlChar*)1) { 8111 if (name == NULL) name = BAD_CAST "unparseable"; 8112 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 8113 "Opening and ending tag mismatch: %s line %d and %s\n", 8114 ctxt->name, line, name); 8115 } 8116 8117 /* 8118 * SAX: End of Tag 8119 */ 8120 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 8121 (!ctxt->disableSAX)) 8122 ctxt->sax->endElement(ctxt->userData, ctxt->name); 8123 8124 namePop(ctxt); 8125 spacePop(ctxt); 8126 return; 8127} 8128 8129/** 8130 * xmlParseEndTag: 8131 * @ctxt: an XML parser context 8132 * 8133 * parse an end of tag 8134 * 8135 * [42] ETag ::= '</' Name S? '>' 8136 * 8137 * With namespace 8138 * 8139 * [NS 9] ETag ::= '</' QName S? '>' 8140 */ 8141 8142void 8143xmlParseEndTag(xmlParserCtxtPtr ctxt) { 8144 xmlParseEndTag1(ctxt, 0); 8145} 8146#endif /* LIBXML_SAX1_ENABLED */ 8147 8148/************************************************************************ 8149 * * 8150 * SAX 2 specific operations * 8151 * * 8152 ************************************************************************/ 8153 8154/* 8155 * xmlGetNamespace: 8156 * @ctxt: an XML parser context 8157 * @prefix: the prefix to lookup 8158 * 8159 * Lookup the namespace name for the @prefix (which ca be NULL) 8160 * The prefix must come from the @ctxt->dict dictionnary 8161 * 8162 * Returns the namespace name or NULL if not bound 8163 */ 8164static const xmlChar * 8165xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) { 8166 int i; 8167 8168 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns); 8169 for (i = ctxt->nsNr - 2;i >= 0;i-=2) 8170 if (ctxt->nsTab[i] == prefix) { 8171 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0)) 8172 return(NULL); 8173 return(ctxt->nsTab[i + 1]); 8174 } 8175 return(NULL); 8176} 8177 8178/** 8179 * xmlParseQName: 8180 * @ctxt: an XML parser context 8181 * @prefix: pointer to store the prefix part 8182 * 8183 * parse an XML Namespace QName 8184 * 8185 * [6] QName ::= (Prefix ':')? LocalPart 8186 * [7] Prefix ::= NCName 8187 * [8] LocalPart ::= NCName 8188 * 8189 * Returns the Name parsed or NULL 8190 */ 8191 8192static const xmlChar * 8193xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) { 8194 const xmlChar *l, *p; 8195 8196 GROW; 8197 8198 l = xmlParseNCName(ctxt); 8199 if (l == NULL) { 8200 if (CUR == ':') { 8201 l = xmlParseName(ctxt); 8202 if (l != NULL) { 8203 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8204 "Failed to parse QName '%s'\n", l, NULL, NULL); 8205 *prefix = NULL; 8206 return(l); 8207 } 8208 } 8209 return(NULL); 8210 } 8211 if (CUR == ':') { 8212 NEXT; 8213 p = l; 8214 l = xmlParseNCName(ctxt); 8215 if (l == NULL) { 8216 xmlChar *tmp; 8217 8218 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8219 "Failed to parse QName '%s:'\n", p, NULL, NULL); 8220 l = xmlParseNmtoken(ctxt); 8221 if (l == NULL) 8222 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0); 8223 else { 8224 tmp = xmlBuildQName(l, p, NULL, 0); 8225 xmlFree((char *)l); 8226 } 8227 p = xmlDictLookup(ctxt->dict, tmp, -1); 8228 if (tmp != NULL) xmlFree(tmp); 8229 *prefix = NULL; 8230 return(p); 8231 } 8232 if (CUR == ':') { 8233 xmlChar *tmp; 8234 8235 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8236 "Failed to parse QName '%s:%s:'\n", p, l, NULL); 8237 NEXT; 8238 tmp = (xmlChar *) xmlParseName(ctxt); 8239 if (tmp != NULL) { 8240 tmp = xmlBuildQName(tmp, l, NULL, 0); 8241 l = xmlDictLookup(ctxt->dict, tmp, -1); 8242 if (tmp != NULL) xmlFree(tmp); 8243 *prefix = p; 8244 return(l); 8245 } 8246 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0); 8247 l = xmlDictLookup(ctxt->dict, tmp, -1); 8248 if (tmp != NULL) xmlFree(tmp); 8249 *prefix = p; 8250 return(l); 8251 } 8252 *prefix = p; 8253 } else 8254 *prefix = NULL; 8255 return(l); 8256} 8257 8258/** 8259 * xmlParseQNameAndCompare: 8260 * @ctxt: an XML parser context 8261 * @name: the localname 8262 * @prefix: the prefix, if any. 8263 * 8264 * parse an XML name and compares for match 8265 * (specialized for endtag parsing) 8266 * 8267 * Returns NULL for an illegal name, (xmlChar*) 1 for success 8268 * and the name for mismatch 8269 */ 8270 8271static const xmlChar * 8272xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name, 8273 xmlChar const *prefix) { 8274 const xmlChar *cmp = name; 8275 const xmlChar *in; 8276 const xmlChar *ret; 8277 const xmlChar *prefix2; 8278 8279 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name)); 8280 8281 GROW; 8282 in = ctxt->input->cur; 8283 8284 cmp = prefix; 8285 while (*in != 0 && *in == *cmp) { 8286 ++in; 8287 ++cmp; 8288 } 8289 if ((*cmp == 0) && (*in == ':')) { 8290 in++; 8291 cmp = name; 8292 while (*in != 0 && *in == *cmp) { 8293 ++in; 8294 ++cmp; 8295 } 8296 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 8297 /* success */ 8298 ctxt->input->cur = in; 8299 return((const xmlChar*) 1); 8300 } 8301 } 8302 /* 8303 * all strings coms from the dictionary, equality can be done directly 8304 */ 8305 ret = xmlParseQName (ctxt, &prefix2); 8306 if ((ret == name) && (prefix == prefix2)) 8307 return((const xmlChar*) 1); 8308 return ret; 8309} 8310 8311/** 8312 * xmlParseAttValueInternal: 8313 * @ctxt: an XML parser context 8314 * @len: attribute len result 8315 * @alloc: whether the attribute was reallocated as a new string 8316 * @normalize: if 1 then further non-CDATA normalization must be done 8317 * 8318 * parse a value for an attribute. 8319 * NOTE: if no normalization is needed, the routine will return pointers 8320 * directly from the data buffer. 8321 * 8322 * 3.3.3 Attribute-Value Normalization: 8323 * Before the value of an attribute is passed to the application or 8324 * checked for validity, the XML processor must normalize it as follows: 8325 * - a character reference is processed by appending the referenced 8326 * character to the attribute value 8327 * - an entity reference is processed by recursively processing the 8328 * replacement text of the entity 8329 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 8330 * appending #x20 to the normalized value, except that only a single 8331 * #x20 is appended for a "#xD#xA" sequence that is part of an external 8332 * parsed entity or the literal entity value of an internal parsed entity 8333 * - other characters are processed by appending them to the normalized value 8334 * If the declared value is not CDATA, then the XML processor must further 8335 * process the normalized attribute value by discarding any leading and 8336 * trailing space (#x20) characters, and by replacing sequences of space 8337 * (#x20) characters by a single space (#x20) character. 8338 * All attributes for which no declaration has been read should be treated 8339 * by a non-validating parser as if declared CDATA. 8340 * 8341 * Returns the AttValue parsed or NULL. The value has to be freed by the 8342 * caller if it was copied, this can be detected by val[*len] == 0. 8343 */ 8344 8345static xmlChar * 8346xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, 8347 int normalize) 8348{ 8349 xmlChar limit = 0; 8350 const xmlChar *in = NULL, *start, *end, *last; 8351 xmlChar *ret = NULL; 8352 8353 GROW; 8354 in = (xmlChar *) CUR_PTR; 8355 if (*in != '"' && *in != '\'') { 8356 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 8357 return (NULL); 8358 } 8359 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 8360 8361 /* 8362 * try to handle in this routine the most common case where no 8363 * allocation of a new string is required and where content is 8364 * pure ASCII. 8365 */ 8366 limit = *in++; 8367 end = ctxt->input->end; 8368 start = in; 8369 if (in >= end) { 8370 const xmlChar *oldbase = ctxt->input->base; 8371 GROW; 8372 if (oldbase != ctxt->input->base) { 8373 long delta = ctxt->input->base - oldbase; 8374 start = start + delta; 8375 in = in + delta; 8376 } 8377 end = ctxt->input->end; 8378 } 8379 if (normalize) { 8380 /* 8381 * Skip any leading spaces 8382 */ 8383 while ((in < end) && (*in != limit) && 8384 ((*in == 0x20) || (*in == 0x9) || 8385 (*in == 0xA) || (*in == 0xD))) { 8386 in++; 8387 start = in; 8388 if (in >= end) { 8389 const xmlChar *oldbase = ctxt->input->base; 8390 GROW; 8391 if (oldbase != ctxt->input->base) { 8392 long delta = ctxt->input->base - oldbase; 8393 start = start + delta; 8394 in = in + delta; 8395 } 8396 end = ctxt->input->end; 8397 } 8398 } 8399 while ((in < end) && (*in != limit) && (*in >= 0x20) && 8400 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 8401 if ((*in++ == 0x20) && (*in == 0x20)) break; 8402 if (in >= end) { 8403 const xmlChar *oldbase = ctxt->input->base; 8404 GROW; 8405 if (oldbase != ctxt->input->base) { 8406 long delta = ctxt->input->base - oldbase; 8407 start = start + delta; 8408 in = in + delta; 8409 } 8410 end = ctxt->input->end; 8411 } 8412 } 8413 last = in; 8414 /* 8415 * skip the trailing blanks 8416 */ 8417 while ((last[-1] == 0x20) && (last > start)) last--; 8418 while ((in < end) && (*in != limit) && 8419 ((*in == 0x20) || (*in == 0x9) || 8420 (*in == 0xA) || (*in == 0xD))) { 8421 in++; 8422 if (in >= end) { 8423 const xmlChar *oldbase = ctxt->input->base; 8424 GROW; 8425 if (oldbase != ctxt->input->base) { 8426 long delta = ctxt->input->base - oldbase; 8427 start = start + delta; 8428 in = in + delta; 8429 last = last + delta; 8430 } 8431 end = ctxt->input->end; 8432 } 8433 } 8434 if (*in != limit) goto need_complex; 8435 } else { 8436 while ((in < end) && (*in != limit) && (*in >= 0x20) && 8437 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 8438 in++; 8439 if (in >= end) { 8440 const xmlChar *oldbase = ctxt->input->base; 8441 GROW; 8442 if (oldbase != ctxt->input->base) { 8443 long delta = ctxt->input->base - oldbase; 8444 start = start + delta; 8445 in = in + delta; 8446 } 8447 end = ctxt->input->end; 8448 } 8449 } 8450 last = in; 8451 if (*in != limit) goto need_complex; 8452 } 8453 in++; 8454 if (len != NULL) { 8455 *len = last - start; 8456 ret = (xmlChar *) start; 8457 } else { 8458 if (alloc) *alloc = 1; 8459 ret = xmlStrndup(start, last - start); 8460 } 8461 CUR_PTR = in; 8462 if (alloc) *alloc = 0; 8463 return ret; 8464need_complex: 8465 if (alloc) *alloc = 1; 8466 return xmlParseAttValueComplex(ctxt, len, normalize); 8467} 8468 8469/** 8470 * xmlParseAttribute2: 8471 * @ctxt: an XML parser context 8472 * @pref: the element prefix 8473 * @elem: the element name 8474 * @prefix: a xmlChar ** used to store the value of the attribute prefix 8475 * @value: a xmlChar ** used to store the value of the attribute 8476 * @len: an int * to save the length of the attribute 8477 * @alloc: an int * to indicate if the attribute was allocated 8478 * 8479 * parse an attribute in the new SAX2 framework. 8480 * 8481 * Returns the attribute name, and the value in *value, . 8482 */ 8483 8484static const xmlChar * 8485xmlParseAttribute2(xmlParserCtxtPtr ctxt, 8486 const xmlChar * pref, const xmlChar * elem, 8487 const xmlChar ** prefix, xmlChar ** value, 8488 int *len, int *alloc) 8489{ 8490 const xmlChar *name; 8491 xmlChar *val, *internal_val = NULL; 8492 int normalize = 0; 8493 8494 *value = NULL; 8495 GROW; 8496 name = xmlParseQName(ctxt, prefix); 8497 if (name == NULL) { 8498 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8499 "error parsing attribute name\n"); 8500 return (NULL); 8501 } 8502 8503 /* 8504 * get the type if needed 8505 */ 8506 if (ctxt->attsSpecial != NULL) { 8507 int type; 8508 8509 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial, 8510 pref, elem, *prefix, name); 8511 if (type != 0) 8512 normalize = 1; 8513 } 8514 8515 /* 8516 * read the value 8517 */ 8518 SKIP_BLANKS; 8519 if (RAW == '=') { 8520 NEXT; 8521 SKIP_BLANKS; 8522 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize); 8523 if (normalize) { 8524 /* 8525 * Sometimes a second normalisation pass for spaces is needed 8526 * but that only happens if charrefs or entities refernces 8527 * have been used in the attribute value, i.e. the attribute 8528 * value have been extracted in an allocated string already. 8529 */ 8530 if (*alloc) { 8531 const xmlChar *val2; 8532 8533 val2 = xmlAttrNormalizeSpace2(ctxt, val, len); 8534 if ((val2 != NULL) && (val2 != val)) { 8535 xmlFree(val); 8536 val = (xmlChar *) val2; 8537 } 8538 } 8539 } 8540 ctxt->instate = XML_PARSER_CONTENT; 8541 } else { 8542 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 8543 "Specification mandate value for attribute %s\n", 8544 name); 8545 return (NULL); 8546 } 8547 8548 if (*prefix == ctxt->str_xml) { 8549 /* 8550 * Check that xml:lang conforms to the specification 8551 * No more registered as an error, just generate a warning now 8552 * since this was deprecated in XML second edition 8553 */ 8554 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) { 8555 internal_val = xmlStrndup(val, *len); 8556 if (!xmlCheckLanguageID(internal_val)) { 8557 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 8558 "Malformed value for xml:lang : %s\n", 8559 internal_val, NULL); 8560 } 8561 } 8562 8563 /* 8564 * Check that xml:space conforms to the specification 8565 */ 8566 if (xmlStrEqual(name, BAD_CAST "space")) { 8567 internal_val = xmlStrndup(val, *len); 8568 if (xmlStrEqual(internal_val, BAD_CAST "default")) 8569 *(ctxt->space) = 0; 8570 else if (xmlStrEqual(internal_val, BAD_CAST "preserve")) 8571 *(ctxt->space) = 1; 8572 else { 8573 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 8574 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 8575 internal_val, NULL); 8576 } 8577 } 8578 if (internal_val) { 8579 xmlFree(internal_val); 8580 } 8581 } 8582 8583 *value = val; 8584 return (name); 8585} 8586/** 8587 * xmlParseStartTag2: 8588 * @ctxt: an XML parser context 8589 * 8590 * parse a start of tag either for rule element or 8591 * EmptyElement. In both case we don't parse the tag closing chars. 8592 * This routine is called when running SAX2 parsing 8593 * 8594 * [40] STag ::= '<' Name (S Attribute)* S? '>' 8595 * 8596 * [ WFC: Unique Att Spec ] 8597 * No attribute name may appear more than once in the same start-tag or 8598 * empty-element tag. 8599 * 8600 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 8601 * 8602 * [ WFC: Unique Att Spec ] 8603 * No attribute name may appear more than once in the same start-tag or 8604 * empty-element tag. 8605 * 8606 * With namespace: 8607 * 8608 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 8609 * 8610 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 8611 * 8612 * Returns the element name parsed 8613 */ 8614 8615static const xmlChar * 8616xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref, 8617 const xmlChar **URI, int *tlen) { 8618 const xmlChar *localname; 8619 const xmlChar *prefix; 8620 const xmlChar *attname; 8621 const xmlChar *aprefix; 8622 const xmlChar *nsname; 8623 xmlChar *attvalue; 8624 const xmlChar **atts = ctxt->atts; 8625 int maxatts = ctxt->maxatts; 8626 int nratts, nbatts, nbdef; 8627 int i, j, nbNs, attval, oldline, oldcol; 8628 const xmlChar *base; 8629 unsigned long cur; 8630 int nsNr = ctxt->nsNr; 8631 8632 if (RAW != '<') return(NULL); 8633 NEXT1; 8634 8635 /* 8636 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that 8637 * point since the attribute values may be stored as pointers to 8638 * the buffer and calling SHRINK would destroy them ! 8639 * The Shrinking is only possible once the full set of attribute 8640 * callbacks have been done. 8641 */ 8642reparse: 8643 SHRINK; 8644 base = ctxt->input->base; 8645 cur = ctxt->input->cur - ctxt->input->base; 8646 oldline = ctxt->input->line; 8647 oldcol = ctxt->input->col; 8648 nbatts = 0; 8649 nratts = 0; 8650 nbdef = 0; 8651 nbNs = 0; 8652 attval = 0; 8653 /* Forget any namespaces added during an earlier parse of this element. */ 8654 ctxt->nsNr = nsNr; 8655 8656 localname = xmlParseQName(ctxt, &prefix); 8657 if (localname == NULL) { 8658 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8659 "StartTag: invalid element name\n"); 8660 return(NULL); 8661 } 8662 *tlen = ctxt->input->cur - ctxt->input->base - cur; 8663 8664 /* 8665 * Now parse the attributes, it ends up with the ending 8666 * 8667 * (S Attribute)* S? 8668 */ 8669 SKIP_BLANKS; 8670 GROW; 8671 if (ctxt->input->base != base) goto base_changed; 8672 8673 while ((RAW != '>') && 8674 ((RAW != '/') || (NXT(1) != '>')) && 8675 (IS_BYTE_CHAR(RAW))) { 8676 const xmlChar *q = CUR_PTR; 8677 unsigned int cons = ctxt->input->consumed; 8678 int len = -1, alloc = 0; 8679 8680 attname = xmlParseAttribute2(ctxt, prefix, localname, 8681 &aprefix, &attvalue, &len, &alloc); 8682 if (ctxt->input->base != base) { 8683 if ((attvalue != NULL) && (alloc != 0)) 8684 xmlFree(attvalue); 8685 attvalue = NULL; 8686 goto base_changed; 8687 } 8688 if ((attname != NULL) && (attvalue != NULL)) { 8689 if (len < 0) len = xmlStrlen(attvalue); 8690 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 8691 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 8692 xmlURIPtr uri; 8693 8694 if (*URL != 0) { 8695 uri = xmlParseURI((const char *) URL); 8696 if (uri == NULL) { 8697 xmlNsErr(ctxt, XML_WAR_NS_URI, 8698 "xmlns: '%s' is not a valid URI\n", 8699 URL, NULL, NULL); 8700 } else { 8701 if (uri->scheme == NULL) { 8702 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 8703 "xmlns: URI %s is not absolute\n", 8704 URL, NULL, NULL); 8705 } 8706 xmlFreeURI(uri); 8707 } 8708 if (URL == ctxt->str_xml_ns) { 8709 if (attname != ctxt->str_xml) { 8710 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 8711 "xml namespace URI cannot be the default namespace\n", 8712 NULL, NULL, NULL); 8713 } 8714 goto skip_default_ns; 8715 } 8716 if ((len == 29) && 8717 (xmlStrEqual(URL, 8718 BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 8719 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 8720 "reuse of the xmlns namespace name is forbidden\n", 8721 NULL, NULL, NULL); 8722 goto skip_default_ns; 8723 } 8724 } 8725 /* 8726 * check that it's not a defined namespace 8727 */ 8728 for (j = 1;j <= nbNs;j++) 8729 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 8730 break; 8731 if (j <= nbNs) 8732 xmlErrAttributeDup(ctxt, NULL, attname); 8733 else 8734 if (nsPush(ctxt, NULL, URL) > 0) nbNs++; 8735skip_default_ns: 8736 if (alloc != 0) xmlFree(attvalue); 8737 SKIP_BLANKS; 8738 continue; 8739 } 8740 if (aprefix == ctxt->str_xmlns) { 8741 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 8742 xmlURIPtr uri; 8743 8744 if (attname == ctxt->str_xml) { 8745 if (URL != ctxt->str_xml_ns) { 8746 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 8747 "xml namespace prefix mapped to wrong URI\n", 8748 NULL, NULL, NULL); 8749 } 8750 /* 8751 * Do not keep a namespace definition node 8752 */ 8753 goto skip_ns; 8754 } 8755 if (URL == ctxt->str_xml_ns) { 8756 if (attname != ctxt->str_xml) { 8757 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 8758 "xml namespace URI mapped to wrong prefix\n", 8759 NULL, NULL, NULL); 8760 } 8761 goto skip_ns; 8762 } 8763 if (attname == ctxt->str_xmlns) { 8764 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 8765 "redefinition of the xmlns prefix is forbidden\n", 8766 NULL, NULL, NULL); 8767 goto skip_ns; 8768 } 8769 if ((len == 29) && 8770 (xmlStrEqual(URL, 8771 BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 8772 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 8773 "reuse of the xmlns namespace name is forbidden\n", 8774 NULL, NULL, NULL); 8775 goto skip_ns; 8776 } 8777 if ((URL == NULL) || (URL[0] == 0)) { 8778 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 8779 "xmlns:%s: Empty XML namespace is not allowed\n", 8780 attname, NULL, NULL); 8781 goto skip_ns; 8782 } else { 8783 uri = xmlParseURI((const char *) URL); 8784 if (uri == NULL) { 8785 xmlNsErr(ctxt, XML_WAR_NS_URI, 8786 "xmlns:%s: '%s' is not a valid URI\n", 8787 attname, URL, NULL); 8788 } else { 8789 if ((ctxt->pedantic) && (uri->scheme == NULL)) { 8790 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 8791 "xmlns:%s: URI %s is not absolute\n", 8792 attname, URL, NULL); 8793 } 8794 xmlFreeURI(uri); 8795 } 8796 } 8797 8798 /* 8799 * check that it's not a defined namespace 8800 */ 8801 for (j = 1;j <= nbNs;j++) 8802 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 8803 break; 8804 if (j <= nbNs) 8805 xmlErrAttributeDup(ctxt, aprefix, attname); 8806 else 8807 if (nsPush(ctxt, attname, URL) > 0) nbNs++; 8808skip_ns: 8809 if (alloc != 0) xmlFree(attvalue); 8810 SKIP_BLANKS; 8811 if (ctxt->input->base != base) goto base_changed; 8812 continue; 8813 } 8814 8815 /* 8816 * Add the pair to atts 8817 */ 8818 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 8819 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 8820 if (attvalue[len] == 0) 8821 xmlFree(attvalue); 8822 goto failed; 8823 } 8824 maxatts = ctxt->maxatts; 8825 atts = ctxt->atts; 8826 } 8827 ctxt->attallocs[nratts++] = alloc; 8828 atts[nbatts++] = attname; 8829 atts[nbatts++] = aprefix; 8830 atts[nbatts++] = NULL; /* the URI will be fetched later */ 8831 atts[nbatts++] = attvalue; 8832 attvalue += len; 8833 atts[nbatts++] = attvalue; 8834 /* 8835 * tag if some deallocation is needed 8836 */ 8837 if (alloc != 0) attval = 1; 8838 } else { 8839 if ((attvalue != NULL) && (attvalue[len] == 0)) 8840 xmlFree(attvalue); 8841 } 8842 8843failed: 8844 8845 GROW 8846 if (ctxt->input->base != base) goto base_changed; 8847 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 8848 break; 8849 if (!IS_BLANK_CH(RAW)) { 8850 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 8851 "attributes construct error\n"); 8852 break; 8853 } 8854 SKIP_BLANKS; 8855 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 8856 (attname == NULL) && (attvalue == NULL)) { 8857 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8858 "xmlParseStartTag: problem parsing attributes\n"); 8859 break; 8860 } 8861 GROW; 8862 if (ctxt->input->base != base) goto base_changed; 8863 } 8864 8865 /* 8866 * The attributes defaulting 8867 */ 8868 if (ctxt->attsDefault != NULL) { 8869 xmlDefAttrsPtr defaults; 8870 8871 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix); 8872 if (defaults != NULL) { 8873 for (i = 0;i < defaults->nbAttrs;i++) { 8874 attname = defaults->values[5 * i]; 8875 aprefix = defaults->values[5 * i + 1]; 8876 8877 /* 8878 * special work for namespaces defaulted defs 8879 */ 8880 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 8881 /* 8882 * check that it's not a defined namespace 8883 */ 8884 for (j = 1;j <= nbNs;j++) 8885 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 8886 break; 8887 if (j <= nbNs) continue; 8888 8889 nsname = xmlGetNamespace(ctxt, NULL); 8890 if (nsname != defaults->values[5 * i + 2]) { 8891 if (nsPush(ctxt, NULL, 8892 defaults->values[5 * i + 2]) > 0) 8893 nbNs++; 8894 } 8895 } else if (aprefix == ctxt->str_xmlns) { 8896 /* 8897 * check that it's not a defined namespace 8898 */ 8899 for (j = 1;j <= nbNs;j++) 8900 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 8901 break; 8902 if (j <= nbNs) continue; 8903 8904 nsname = xmlGetNamespace(ctxt, attname); 8905 if (nsname != defaults->values[2]) { 8906 if (nsPush(ctxt, attname, 8907 defaults->values[5 * i + 2]) > 0) 8908 nbNs++; 8909 } 8910 } else { 8911 /* 8912 * check that it's not a defined attribute 8913 */ 8914 for (j = 0;j < nbatts;j+=5) { 8915 if ((attname == atts[j]) && (aprefix == atts[j+1])) 8916 break; 8917 } 8918 if (j < nbatts) continue; 8919 8920 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 8921 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 8922 return(NULL); 8923 } 8924 maxatts = ctxt->maxatts; 8925 atts = ctxt->atts; 8926 } 8927 atts[nbatts++] = attname; 8928 atts[nbatts++] = aprefix; 8929 if (aprefix == NULL) 8930 atts[nbatts++] = NULL; 8931 else 8932 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix); 8933 atts[nbatts++] = defaults->values[5 * i + 2]; 8934 atts[nbatts++] = defaults->values[5 * i + 3]; 8935 if ((ctxt->standalone == 1) && 8936 (defaults->values[5 * i + 4] != NULL)) { 8937 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED, 8938 "standalone: attribute %s on %s defaulted from external subset\n", 8939 attname, localname); 8940 } 8941 nbdef++; 8942 } 8943 } 8944 } 8945 } 8946 8947 /* 8948 * The attributes checkings 8949 */ 8950 for (i = 0; i < nbatts;i += 5) { 8951 /* 8952 * The default namespace does not apply to attribute names. 8953 */ 8954 if (atts[i + 1] != NULL) { 8955 nsname = xmlGetNamespace(ctxt, atts[i + 1]); 8956 if (nsname == NULL) { 8957 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 8958 "Namespace prefix %s for %s on %s is not defined\n", 8959 atts[i + 1], atts[i], localname); 8960 } 8961 atts[i + 2] = nsname; 8962 } else 8963 nsname = NULL; 8964 /* 8965 * [ WFC: Unique Att Spec ] 8966 * No attribute name may appear more than once in the same 8967 * start-tag or empty-element tag. 8968 * As extended by the Namespace in XML REC. 8969 */ 8970 for (j = 0; j < i;j += 5) { 8971 if (atts[i] == atts[j]) { 8972 if (atts[i+1] == atts[j+1]) { 8973 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]); 8974 break; 8975 } 8976 if ((nsname != NULL) && (atts[j + 2] == nsname)) { 8977 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED, 8978 "Namespaced Attribute %s in '%s' redefined\n", 8979 atts[i], nsname, NULL); 8980 break; 8981 } 8982 } 8983 } 8984 } 8985 8986 nsname = xmlGetNamespace(ctxt, prefix); 8987 if ((prefix != NULL) && (nsname == NULL)) { 8988 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 8989 "Namespace prefix %s on %s is not defined\n", 8990 prefix, localname, NULL); 8991 } 8992 *pref = prefix; 8993 *URI = nsname; 8994 8995 /* 8996 * SAX: Start of Element ! 8997 */ 8998 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) && 8999 (!ctxt->disableSAX)) { 9000 if (nbNs > 0) 9001 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 9002 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs], 9003 nbatts / 5, nbdef, atts); 9004 else 9005 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 9006 nsname, 0, NULL, nbatts / 5, nbdef, atts); 9007 } 9008 9009 /* 9010 * Free up attribute allocated strings if needed 9011 */ 9012 if (attval != 0) { 9013 for (i = 3,j = 0; j < nratts;i += 5,j++) 9014 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 9015 xmlFree((xmlChar *) atts[i]); 9016 } 9017 9018 return(localname); 9019 9020base_changed: 9021 /* 9022 * the attribute strings are valid iif the base didn't changed 9023 */ 9024 if (attval != 0) { 9025 for (i = 3,j = 0; j < nratts;i += 5,j++) 9026 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 9027 xmlFree((xmlChar *) atts[i]); 9028 } 9029 ctxt->input->cur = ctxt->input->base + cur; 9030 ctxt->input->line = oldline; 9031 ctxt->input->col = oldcol; 9032 if (ctxt->wellFormed == 1) { 9033 goto reparse; 9034 } 9035 return(NULL); 9036} 9037 9038/** 9039 * xmlParseEndTag2: 9040 * @ctxt: an XML parser context 9041 * @line: line of the start tag 9042 * @nsNr: number of namespaces on the start tag 9043 * 9044 * parse an end of tag 9045 * 9046 * [42] ETag ::= '</' Name S? '>' 9047 * 9048 * With namespace 9049 * 9050 * [NS 9] ETag ::= '</' QName S? '>' 9051 */ 9052 9053static void 9054xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix, 9055 const xmlChar *URI, int line, int nsNr, int tlen) { 9056 const xmlChar *name; 9057 9058 GROW; 9059 if ((RAW != '<') || (NXT(1) != '/')) { 9060 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL); 9061 return; 9062 } 9063 SKIP(2); 9064 9065 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) { 9066 if (ctxt->input->cur[tlen] == '>') { 9067 ctxt->input->cur += tlen + 1; 9068 goto done; 9069 } 9070 ctxt->input->cur += tlen; 9071 name = (xmlChar*)1; 9072 } else { 9073 if (prefix == NULL) 9074 name = xmlParseNameAndCompare(ctxt, ctxt->name); 9075 else 9076 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix); 9077 } 9078 9079 /* 9080 * We should definitely be at the ending "S? '>'" part 9081 */ 9082 GROW; 9083 SKIP_BLANKS; 9084 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 9085 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 9086 } else 9087 NEXT1; 9088 9089 /* 9090 * [ WFC: Element Type Match ] 9091 * The Name in an element's end-tag must match the element type in the 9092 * start-tag. 9093 * 9094 */ 9095 if (name != (xmlChar*)1) { 9096 if (name == NULL) name = BAD_CAST "unparseable"; 9097 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 9098 "Opening and ending tag mismatch: %s line %d and %s\n", 9099 ctxt->name, line, name); 9100 } 9101 9102 /* 9103 * SAX: End of Tag 9104 */ 9105done: 9106 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 9107 (!ctxt->disableSAX)) 9108 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI); 9109 9110 spacePop(ctxt); 9111 if (nsNr != 0) 9112 nsPop(ctxt, nsNr); 9113 return; 9114} 9115 9116/** 9117 * xmlParseCDSect: 9118 * @ctxt: an XML parser context 9119 * 9120 * Parse escaped pure raw content. 9121 * 9122 * [18] CDSect ::= CDStart CData CDEnd 9123 * 9124 * [19] CDStart ::= '<![CDATA[' 9125 * 9126 * [20] Data ::= (Char* - (Char* ']]>' Char*)) 9127 * 9128 * [21] CDEnd ::= ']]>' 9129 */ 9130void 9131xmlParseCDSect(xmlParserCtxtPtr ctxt) { 9132 xmlChar *buf = NULL; 9133 int len = 0; 9134 int size = XML_PARSER_BUFFER_SIZE; 9135 int r, rl; 9136 int s, sl; 9137 int cur, l; 9138 int count = 0; 9139 9140 /* Check 2.6.0 was NXT(0) not RAW */ 9141 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 9142 SKIP(9); 9143 } else 9144 return; 9145 9146 ctxt->instate = XML_PARSER_CDATA_SECTION; 9147 r = CUR_CHAR(rl); 9148 if (!IS_CHAR(r)) { 9149 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 9150 ctxt->instate = XML_PARSER_CONTENT; 9151 return; 9152 } 9153 NEXTL(rl); 9154 s = CUR_CHAR(sl); 9155 if (!IS_CHAR(s)) { 9156 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 9157 ctxt->instate = XML_PARSER_CONTENT; 9158 return; 9159 } 9160 NEXTL(sl); 9161 cur = CUR_CHAR(l); 9162 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 9163 if (buf == NULL) { 9164 xmlErrMemory(ctxt, NULL); 9165 return; 9166 } 9167 while (IS_CHAR(cur) && 9168 ((r != ']') || (s != ']') || (cur != '>'))) { 9169 if (len + 5 >= size) { 9170 xmlChar *tmp; 9171 9172 size *= 2; 9173 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 9174 if (tmp == NULL) { 9175 xmlFree(buf); 9176 xmlErrMemory(ctxt, NULL); 9177 return; 9178 } 9179 buf = tmp; 9180 } 9181 COPY_BUF(rl,buf,len,r); 9182 r = s; 9183 rl = sl; 9184 s = cur; 9185 sl = l; 9186 count++; 9187 if (count > 50) { 9188 GROW; 9189 count = 0; 9190 } 9191 NEXTL(l); 9192 cur = CUR_CHAR(l); 9193 } 9194 buf[len] = 0; 9195 ctxt->instate = XML_PARSER_CONTENT; 9196 if (cur != '>') { 9197 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, 9198 "CData section not finished\n%.50s\n", buf); 9199 xmlFree(buf); 9200 return; 9201 } 9202 NEXTL(l); 9203 9204 /* 9205 * OK the buffer is to be consumed as cdata. 9206 */ 9207 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 9208 if (ctxt->sax->cdataBlock != NULL) 9209 ctxt->sax->cdataBlock(ctxt->userData, buf, len); 9210 else if (ctxt->sax->characters != NULL) 9211 ctxt->sax->characters(ctxt->userData, buf, len); 9212 } 9213 xmlFree(buf); 9214} 9215 9216/** 9217 * xmlParseContent: 9218 * @ctxt: an XML parser context 9219 * 9220 * Parse a content: 9221 * 9222 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 9223 */ 9224 9225void 9226xmlParseContent(xmlParserCtxtPtr ctxt) { 9227 GROW; 9228 while ((RAW != 0) && 9229 ((RAW != '<') || (NXT(1) != '/')) && 9230 (ctxt->instate != XML_PARSER_EOF)) { 9231 const xmlChar *test = CUR_PTR; 9232 unsigned int cons = ctxt->input->consumed; 9233 const xmlChar *cur = ctxt->input->cur; 9234 9235 /* 9236 * First case : a Processing Instruction. 9237 */ 9238 if ((*cur == '<') && (cur[1] == '?')) { 9239 xmlParsePI(ctxt); 9240 } 9241 9242 /* 9243 * Second case : a CDSection 9244 */ 9245 /* 2.6.0 test was *cur not RAW */ 9246 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 9247 xmlParseCDSect(ctxt); 9248 } 9249 9250 /* 9251 * Third case : a comment 9252 */ 9253 else if ((*cur == '<') && (NXT(1) == '!') && 9254 (NXT(2) == '-') && (NXT(3) == '-')) { 9255 xmlParseComment(ctxt); 9256 ctxt->instate = XML_PARSER_CONTENT; 9257 } 9258 9259 /* 9260 * Fourth case : a sub-element. 9261 */ 9262 else if (*cur == '<') { 9263 xmlParseElement(ctxt); 9264 } 9265 9266 /* 9267 * Fifth case : a reference. If if has not been resolved, 9268 * parsing returns it's Name, create the node 9269 */ 9270 9271 else if (*cur == '&') { 9272 xmlParseReference(ctxt); 9273 } 9274 9275 /* 9276 * Last case, text. Note that References are handled directly. 9277 */ 9278 else { 9279 xmlParseCharData(ctxt, 0); 9280 } 9281 9282 GROW; 9283 /* 9284 * Pop-up of finished entities. 9285 */ 9286 while ((RAW == 0) && (ctxt->inputNr > 1)) 9287 xmlPopInput(ctxt); 9288 SHRINK; 9289 9290 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 9291 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9292 "detected an error in element content\n"); 9293 ctxt->instate = XML_PARSER_EOF; 9294 break; 9295 } 9296 } 9297} 9298 9299/** 9300 * xmlParseElement: 9301 * @ctxt: an XML parser context 9302 * 9303 * parse an XML element, this is highly recursive 9304 * 9305 * [39] element ::= EmptyElemTag | STag content ETag 9306 * 9307 * [ WFC: Element Type Match ] 9308 * The Name in an element's end-tag must match the element type in the 9309 * start-tag. 9310 * 9311 */ 9312 9313void 9314xmlParseElement(xmlParserCtxtPtr ctxt) { 9315 const xmlChar *name; 9316 const xmlChar *prefix; 9317 const xmlChar *URI; 9318 xmlParserNodeInfo node_info; 9319 int line, tlen; 9320 xmlNodePtr ret; 9321 int nsNr = ctxt->nsNr; 9322 9323 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) && 9324 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9325 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 9326 "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 9327 xmlParserMaxDepth); 9328 ctxt->instate = XML_PARSER_EOF; 9329 return; 9330 } 9331 9332 /* Capture start position */ 9333 if (ctxt->record_info) { 9334 node_info.begin_pos = ctxt->input->consumed + 9335 (CUR_PTR - ctxt->input->base); 9336 node_info.begin_line = ctxt->input->line; 9337 } 9338 9339 if (ctxt->spaceNr == 0) 9340 spacePush(ctxt, -1); 9341 else if (*ctxt->space == -2) 9342 spacePush(ctxt, -1); 9343 else 9344 spacePush(ctxt, *ctxt->space); 9345 9346 line = ctxt->input->line; 9347#ifdef LIBXML_SAX1_ENABLED 9348 if (ctxt->sax2) 9349#endif /* LIBXML_SAX1_ENABLED */ 9350 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 9351#ifdef LIBXML_SAX1_ENABLED 9352 else 9353 name = xmlParseStartTag(ctxt); 9354#endif /* LIBXML_SAX1_ENABLED */ 9355 if (name == NULL) { 9356 spacePop(ctxt); 9357 return; 9358 } 9359 namePush(ctxt, name); 9360 ret = ctxt->node; 9361 9362#ifdef LIBXML_VALID_ENABLED 9363 /* 9364 * [ VC: Root Element Type ] 9365 * The Name in the document type declaration must match the element 9366 * type of the root element. 9367 */ 9368 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 9369 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 9370 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 9371#endif /* LIBXML_VALID_ENABLED */ 9372 9373 /* 9374 * Check for an Empty Element. 9375 */ 9376 if ((RAW == '/') && (NXT(1) == '>')) { 9377 SKIP(2); 9378 if (ctxt->sax2) { 9379 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 9380 (!ctxt->disableSAX)) 9381 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI); 9382#ifdef LIBXML_SAX1_ENABLED 9383 } else { 9384 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 9385 (!ctxt->disableSAX)) 9386 ctxt->sax->endElement(ctxt->userData, name); 9387#endif /* LIBXML_SAX1_ENABLED */ 9388 } 9389 namePop(ctxt); 9390 spacePop(ctxt); 9391 if (nsNr != ctxt->nsNr) 9392 nsPop(ctxt, ctxt->nsNr - nsNr); 9393 if ( ret != NULL && ctxt->record_info ) { 9394 node_info.end_pos = ctxt->input->consumed + 9395 (CUR_PTR - ctxt->input->base); 9396 node_info.end_line = ctxt->input->line; 9397 node_info.node = ret; 9398 xmlParserAddNodeInfo(ctxt, &node_info); 9399 } 9400 return; 9401 } 9402 if (RAW == '>') { 9403 NEXT1; 9404 } else { 9405 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED, 9406 "Couldn't find end of Start Tag %s line %d\n", 9407 name, line, NULL); 9408 9409 /* 9410 * end of parsing of this node. 9411 */ 9412 nodePop(ctxt); 9413 namePop(ctxt); 9414 spacePop(ctxt); 9415 if (nsNr != ctxt->nsNr) 9416 nsPop(ctxt, ctxt->nsNr - nsNr); 9417 9418 /* 9419 * Capture end position and add node 9420 */ 9421 if ( ret != NULL && ctxt->record_info ) { 9422 node_info.end_pos = ctxt->input->consumed + 9423 (CUR_PTR - ctxt->input->base); 9424 node_info.end_line = ctxt->input->line; 9425 node_info.node = ret; 9426 xmlParserAddNodeInfo(ctxt, &node_info); 9427 } 9428 return; 9429 } 9430 9431 /* 9432 * Parse the content of the element: 9433 */ 9434 xmlParseContent(ctxt); 9435 if (!IS_BYTE_CHAR(RAW)) { 9436 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED, 9437 "Premature end of data in tag %s line %d\n", 9438 name, line, NULL); 9439 9440 /* 9441 * end of parsing of this node. 9442 */ 9443 nodePop(ctxt); 9444 namePop(ctxt); 9445 spacePop(ctxt); 9446 if (nsNr != ctxt->nsNr) 9447 nsPop(ctxt, ctxt->nsNr - nsNr); 9448 return; 9449 } 9450 9451 /* 9452 * parse the end of tag: '</' should be here. 9453 */ 9454 if (ctxt->sax2) { 9455 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen); 9456 namePop(ctxt); 9457 } 9458#ifdef LIBXML_SAX1_ENABLED 9459 else 9460 xmlParseEndTag1(ctxt, line); 9461#endif /* LIBXML_SAX1_ENABLED */ 9462 9463 /* 9464 * Capture end position and add node 9465 */ 9466 if ( ret != NULL && ctxt->record_info ) { 9467 node_info.end_pos = ctxt->input->consumed + 9468 (CUR_PTR - ctxt->input->base); 9469 node_info.end_line = ctxt->input->line; 9470 node_info.node = ret; 9471 xmlParserAddNodeInfo(ctxt, &node_info); 9472 } 9473} 9474 9475/** 9476 * xmlParseVersionNum: 9477 * @ctxt: an XML parser context 9478 * 9479 * parse the XML version value. 9480 * 9481 * [26] VersionNum ::= '1.' [0-9]+ 9482 * 9483 * In practice allow [0-9].[0-9]+ at that level 9484 * 9485 * Returns the string giving the XML version number, or NULL 9486 */ 9487xmlChar * 9488xmlParseVersionNum(xmlParserCtxtPtr ctxt) { 9489 xmlChar *buf = NULL; 9490 int len = 0; 9491 int size = 10; 9492 xmlChar cur; 9493 9494 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 9495 if (buf == NULL) { 9496 xmlErrMemory(ctxt, NULL); 9497 return(NULL); 9498 } 9499 cur = CUR; 9500 if (!((cur >= '0') && (cur <= '9'))) { 9501 xmlFree(buf); 9502 return(NULL); 9503 } 9504 buf[len++] = cur; 9505 NEXT; 9506 cur=CUR; 9507 if (cur != '.') { 9508 xmlFree(buf); 9509 return(NULL); 9510 } 9511 buf[len++] = cur; 9512 NEXT; 9513 cur=CUR; 9514 while ((cur >= '0') && (cur <= '9')) { 9515 if (len + 1 >= size) { 9516 xmlChar *tmp; 9517 9518 size *= 2; 9519 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 9520 if (tmp == NULL) { 9521 xmlFree(buf); 9522 xmlErrMemory(ctxt, NULL); 9523 return(NULL); 9524 } 9525 buf = tmp; 9526 } 9527 buf[len++] = cur; 9528 NEXT; 9529 cur=CUR; 9530 } 9531 buf[len] = 0; 9532 return(buf); 9533} 9534 9535/** 9536 * xmlParseVersionInfo: 9537 * @ctxt: an XML parser context 9538 * 9539 * parse the XML version. 9540 * 9541 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 9542 * 9543 * [25] Eq ::= S? '=' S? 9544 * 9545 * Returns the version string, e.g. "1.0" 9546 */ 9547 9548xmlChar * 9549xmlParseVersionInfo(xmlParserCtxtPtr ctxt) { 9550 xmlChar *version = NULL; 9551 9552 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) { 9553 SKIP(7); 9554 SKIP_BLANKS; 9555 if (RAW != '=') { 9556 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 9557 return(NULL); 9558 } 9559 NEXT; 9560 SKIP_BLANKS; 9561 if (RAW == '"') { 9562 NEXT; 9563 version = xmlParseVersionNum(ctxt); 9564 if (RAW != '"') { 9565 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 9566 } else 9567 NEXT; 9568 } else if (RAW == '\''){ 9569 NEXT; 9570 version = xmlParseVersionNum(ctxt); 9571 if (RAW != '\'') { 9572 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 9573 } else 9574 NEXT; 9575 } else { 9576 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 9577 } 9578 } 9579 return(version); 9580} 9581 9582/** 9583 * xmlParseEncName: 9584 * @ctxt: an XML parser context 9585 * 9586 * parse the XML encoding name 9587 * 9588 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 9589 * 9590 * Returns the encoding name value or NULL 9591 */ 9592xmlChar * 9593xmlParseEncName(xmlParserCtxtPtr ctxt) { 9594 xmlChar *buf = NULL; 9595 int len = 0; 9596 int size = 10; 9597 xmlChar cur; 9598 9599 cur = CUR; 9600 if (((cur >= 'a') && (cur <= 'z')) || 9601 ((cur >= 'A') && (cur <= 'Z'))) { 9602 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 9603 if (buf == NULL) { 9604 xmlErrMemory(ctxt, NULL); 9605 return(NULL); 9606 } 9607 9608 buf[len++] = cur; 9609 NEXT; 9610 cur = CUR; 9611 while (((cur >= 'a') && (cur <= 'z')) || 9612 ((cur >= 'A') && (cur <= 'Z')) || 9613 ((cur >= '0') && (cur <= '9')) || 9614 (cur == '.') || (cur == '_') || 9615 (cur == '-')) { 9616 if (len + 1 >= size) { 9617 xmlChar *tmp; 9618 9619 size *= 2; 9620 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 9621 if (tmp == NULL) { 9622 xmlErrMemory(ctxt, NULL); 9623 xmlFree(buf); 9624 return(NULL); 9625 } 9626 buf = tmp; 9627 } 9628 buf[len++] = cur; 9629 NEXT; 9630 cur = CUR; 9631 if (cur == 0) { 9632 SHRINK; 9633 GROW; 9634 cur = CUR; 9635 } 9636 } 9637 buf[len] = 0; 9638 } else { 9639 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL); 9640 } 9641 return(buf); 9642} 9643 9644/** 9645 * xmlParseEncodingDecl: 9646 * @ctxt: an XML parser context 9647 * 9648 * parse the XML encoding declaration 9649 * 9650 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'") 9651 * 9652 * this setups the conversion filters. 9653 * 9654 * Returns the encoding value or NULL 9655 */ 9656 9657const xmlChar * 9658xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { 9659 xmlChar *encoding = NULL; 9660 9661 SKIP_BLANKS; 9662 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) { 9663 SKIP(8); 9664 SKIP_BLANKS; 9665 if (RAW != '=') { 9666 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 9667 return(NULL); 9668 } 9669 NEXT; 9670 SKIP_BLANKS; 9671 if (RAW == '"') { 9672 NEXT; 9673 encoding = xmlParseEncName(ctxt); 9674 if (RAW != '"') { 9675 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 9676 } else 9677 NEXT; 9678 } else if (RAW == '\''){ 9679 NEXT; 9680 encoding = xmlParseEncName(ctxt); 9681 if (RAW != '\'') { 9682 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 9683 } else 9684 NEXT; 9685 } else { 9686 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 9687 } 9688 /* 9689 * UTF-16 encoding stwich has already taken place at this stage, 9690 * more over the little-endian/big-endian selection is already done 9691 */ 9692 if ((encoding != NULL) && 9693 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) || 9694 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) { 9695 /* 9696 * If no encoding was passed to the parser, that we are 9697 * using UTF-16 and no decoder is present i.e. the 9698 * document is apparently UTF-8 compatible, then raise an 9699 * encoding mismatch fatal error 9700 */ 9701 if ((ctxt->encoding == NULL) && 9702 (ctxt->input->buf != NULL) && 9703 (ctxt->input->buf->encoder == NULL)) { 9704 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING, 9705 "Document labelled UTF-16 but has UTF-8 content\n"); 9706 } 9707 if (ctxt->encoding != NULL) 9708 xmlFree((xmlChar *) ctxt->encoding); 9709 ctxt->encoding = encoding; 9710 } 9711 /* 9712 * UTF-8 encoding is handled natively 9713 */ 9714 else if ((encoding != NULL) && 9715 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) || 9716 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) { 9717 if (ctxt->encoding != NULL) 9718 xmlFree((xmlChar *) ctxt->encoding); 9719 ctxt->encoding = encoding; 9720 } 9721 else if (encoding != NULL) { 9722 xmlCharEncodingHandlerPtr handler; 9723 9724 if (ctxt->input->encoding != NULL) 9725 xmlFree((xmlChar *) ctxt->input->encoding); 9726 ctxt->input->encoding = encoding; 9727 9728 handler = xmlFindCharEncodingHandler((const char *) encoding); 9729 if (handler != NULL) { 9730 xmlSwitchToEncoding(ctxt, handler); 9731 } else { 9732 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 9733 "Unsupported encoding %s\n", encoding); 9734 return(NULL); 9735 } 9736 } 9737 } 9738 return(encoding); 9739} 9740 9741/** 9742 * xmlParseSDDecl: 9743 * @ctxt: an XML parser context 9744 * 9745 * parse the XML standalone declaration 9746 * 9747 * [32] SDDecl ::= S 'standalone' Eq 9748 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) 9749 * 9750 * [ VC: Standalone Document Declaration ] 9751 * TODO The standalone document declaration must have the value "no" 9752 * if any external markup declarations contain declarations of: 9753 * - attributes with default values, if elements to which these 9754 * attributes apply appear in the document without specifications 9755 * of values for these attributes, or 9756 * - entities (other than amp, lt, gt, apos, quot), if references 9757 * to those entities appear in the document, or 9758 * - attributes with values subject to normalization, where the 9759 * attribute appears in the document with a value which will change 9760 * as a result of normalization, or 9761 * - element types with element content, if white space occurs directly 9762 * within any instance of those types. 9763 * 9764 * Returns: 9765 * 1 if standalone="yes" 9766 * 0 if standalone="no" 9767 * -2 if standalone attribute is missing or invalid 9768 * (A standalone value of -2 means that the XML declaration was found, 9769 * but no value was specified for the standalone attribute). 9770 */ 9771 9772int 9773xmlParseSDDecl(xmlParserCtxtPtr ctxt) { 9774 int standalone = -2; 9775 9776 SKIP_BLANKS; 9777 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) { 9778 SKIP(10); 9779 SKIP_BLANKS; 9780 if (RAW != '=') { 9781 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 9782 return(standalone); 9783 } 9784 NEXT; 9785 SKIP_BLANKS; 9786 if (RAW == '\''){ 9787 NEXT; 9788 if ((RAW == 'n') && (NXT(1) == 'o')) { 9789 standalone = 0; 9790 SKIP(2); 9791 } else if ((RAW == 'y') && (NXT(1) == 'e') && 9792 (NXT(2) == 's')) { 9793 standalone = 1; 9794 SKIP(3); 9795 } else { 9796 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 9797 } 9798 if (RAW != '\'') { 9799 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 9800 } else 9801 NEXT; 9802 } else if (RAW == '"'){ 9803 NEXT; 9804 if ((RAW == 'n') && (NXT(1) == 'o')) { 9805 standalone = 0; 9806 SKIP(2); 9807 } else if ((RAW == 'y') && (NXT(1) == 'e') && 9808 (NXT(2) == 's')) { 9809 standalone = 1; 9810 SKIP(3); 9811 } else { 9812 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 9813 } 9814 if (RAW != '"') { 9815 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 9816 } else 9817 NEXT; 9818 } else { 9819 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 9820 } 9821 } 9822 return(standalone); 9823} 9824 9825/** 9826 * xmlParseXMLDecl: 9827 * @ctxt: an XML parser context 9828 * 9829 * parse an XML declaration header 9830 * 9831 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 9832 */ 9833 9834void 9835xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { 9836 xmlChar *version; 9837 9838 /* 9839 * This value for standalone indicates that the document has an 9840 * XML declaration but it does not have a standalone attribute. 9841 * It will be overwritten later if a standalone attribute is found. 9842 */ 9843 ctxt->input->standalone = -2; 9844 9845 /* 9846 * We know that '<?xml' is here. 9847 */ 9848 SKIP(5); 9849 9850 if (!IS_BLANK_CH(RAW)) { 9851 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 9852 "Blank needed after '<?xml'\n"); 9853 } 9854 SKIP_BLANKS; 9855 9856 /* 9857 * We must have the VersionInfo here. 9858 */ 9859 version = xmlParseVersionInfo(ctxt); 9860 if (version == NULL) { 9861 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL); 9862 } else { 9863 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) { 9864 /* 9865 * Changed here for XML-1.0 5th edition 9866 */ 9867 if (ctxt->options & XML_PARSE_OLD10) { 9868 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, 9869 "Unsupported version '%s'\n", 9870 version); 9871 } else { 9872 if ((version[0] == '1') && ((version[1] == '.'))) { 9873 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION, 9874 "Unsupported version '%s'\n", 9875 version, NULL); 9876 } else { 9877 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, 9878 "Unsupported version '%s'\n", 9879 version); 9880 } 9881 } 9882 } 9883 if (ctxt->version != NULL) 9884 xmlFree((void *) ctxt->version); 9885 ctxt->version = version; 9886 } 9887 9888 /* 9889 * We may have the encoding declaration 9890 */ 9891 if (!IS_BLANK_CH(RAW)) { 9892 if ((RAW == '?') && (NXT(1) == '>')) { 9893 SKIP(2); 9894 return; 9895 } 9896 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 9897 } 9898 xmlParseEncodingDecl(ctxt); 9899 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 9900 /* 9901 * The XML REC instructs us to stop parsing right here 9902 */ 9903 return; 9904 } 9905 9906 /* 9907 * We may have the standalone status. 9908 */ 9909 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) { 9910 if ((RAW == '?') && (NXT(1) == '>')) { 9911 SKIP(2); 9912 return; 9913 } 9914 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 9915 } 9916 SKIP_BLANKS; 9917 ctxt->input->standalone = xmlParseSDDecl(ctxt); 9918 9919 SKIP_BLANKS; 9920 if ((RAW == '?') && (NXT(1) == '>')) { 9921 SKIP(2); 9922 } else if (RAW == '>') { 9923 /* Deprecated old WD ... */ 9924 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 9925 NEXT; 9926 } else { 9927 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 9928 MOVETO_ENDTAG(CUR_PTR); 9929 NEXT; 9930 } 9931} 9932 9933/** 9934 * xmlParseMisc: 9935 * @ctxt: an XML parser context 9936 * 9937 * parse an XML Misc* optional field. 9938 * 9939 * [27] Misc ::= Comment | PI | S 9940 */ 9941 9942void 9943xmlParseMisc(xmlParserCtxtPtr ctxt) { 9944 while (((RAW == '<') && (NXT(1) == '?')) || 9945 (CMP4(CUR_PTR, '<', '!', '-', '-')) || 9946 IS_BLANK_CH(CUR)) { 9947 if ((RAW == '<') && (NXT(1) == '?')) { 9948 xmlParsePI(ctxt); 9949 } else if (IS_BLANK_CH(CUR)) { 9950 NEXT; 9951 } else 9952 xmlParseComment(ctxt); 9953 } 9954} 9955 9956/** 9957 * xmlParseDocument: 9958 * @ctxt: an XML parser context 9959 * 9960 * parse an XML document (and build a tree if using the standard SAX 9961 * interface). 9962 * 9963 * [1] document ::= prolog element Misc* 9964 * 9965 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 9966 * 9967 * Returns 0, -1 in case of error. the parser context is augmented 9968 * as a result of the parsing. 9969 */ 9970 9971int 9972xmlParseDocument(xmlParserCtxtPtr ctxt) { 9973 xmlChar start[4]; 9974 xmlCharEncoding enc; 9975 9976 xmlInitParser(); 9977 9978 if ((ctxt == NULL) || (ctxt->input == NULL)) 9979 return(-1); 9980 9981 GROW; 9982 9983 /* 9984 * SAX: detecting the level. 9985 */ 9986 xmlDetectSAX2(ctxt); 9987 9988 /* 9989 * SAX: beginning of the document processing. 9990 */ 9991 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 9992 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 9993 9994 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) && 9995 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 9996 /* 9997 * Get the 4 first bytes and decode the charset 9998 * if enc != XML_CHAR_ENCODING_NONE 9999 * plug some encoding conversion routines. 10000 */ 10001 start[0] = RAW; 10002 start[1] = NXT(1); 10003 start[2] = NXT(2); 10004 start[3] = NXT(3); 10005 enc = xmlDetectCharEncoding(&start[0], 4); 10006 if (enc != XML_CHAR_ENCODING_NONE) { 10007 xmlSwitchEncoding(ctxt, enc); 10008 } 10009 } 10010 10011 10012 if (CUR == 0) { 10013 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10014 } 10015 10016 /* 10017 * Check for the XMLDecl in the Prolog. 10018 */ 10019 GROW; 10020 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 10021 10022 /* 10023 * Note that we will switch encoding on the fly. 10024 */ 10025 xmlParseXMLDecl(ctxt); 10026 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10027 /* 10028 * The XML REC instructs us to stop parsing right here 10029 */ 10030 return(-1); 10031 } 10032 ctxt->standalone = ctxt->input->standalone; 10033 SKIP_BLANKS; 10034 } else { 10035 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10036 } 10037 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 10038 ctxt->sax->startDocument(ctxt->userData); 10039 10040 /* 10041 * The Misc part of the Prolog 10042 */ 10043 GROW; 10044 xmlParseMisc(ctxt); 10045 10046 /* 10047 * Then possibly doc type declaration(s) and more Misc 10048 * (doctypedecl Misc*)? 10049 */ 10050 GROW; 10051 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) { 10052 10053 ctxt->inSubset = 1; 10054 xmlParseDocTypeDecl(ctxt); 10055 if (RAW == '[') { 10056 ctxt->instate = XML_PARSER_DTD; 10057 xmlParseInternalSubset(ctxt); 10058 } 10059 10060 /* 10061 * Create and update the external subset. 10062 */ 10063 ctxt->inSubset = 2; 10064 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) && 10065 (!ctxt->disableSAX)) 10066 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 10067 ctxt->extSubSystem, ctxt->extSubURI); 10068 ctxt->inSubset = 0; 10069 10070 xmlCleanSpecialAttr(ctxt); 10071 10072 ctxt->instate = XML_PARSER_PROLOG; 10073 xmlParseMisc(ctxt); 10074 } 10075 10076 /* 10077 * Time to start parsing the tree itself 10078 */ 10079 GROW; 10080 if (RAW != '<') { 10081 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY, 10082 "Start tag expected, '<' not found\n"); 10083 } else { 10084 ctxt->instate = XML_PARSER_CONTENT; 10085 xmlParseElement(ctxt); 10086 ctxt->instate = XML_PARSER_EPILOG; 10087 10088 10089 /* 10090 * The Misc part at the end 10091 */ 10092 xmlParseMisc(ctxt); 10093 10094 if (RAW != 0) { 10095 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 10096 } 10097 ctxt->instate = XML_PARSER_EOF; 10098 } 10099 10100 /* 10101 * SAX: end of the document processing. 10102 */ 10103 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10104 ctxt->sax->endDocument(ctxt->userData); 10105 10106 /* 10107 * Remove locally kept entity definitions if the tree was not built 10108 */ 10109 if ((ctxt->myDoc != NULL) && 10110 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 10111 xmlFreeDoc(ctxt->myDoc); 10112 ctxt->myDoc = NULL; 10113 } 10114 10115 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) { 10116 ctxt->myDoc->properties |= XML_DOC_WELLFORMED; 10117 if (ctxt->valid) 10118 ctxt->myDoc->properties |= XML_DOC_DTDVALID; 10119 if (ctxt->nsWellFormed) 10120 ctxt->myDoc->properties |= XML_DOC_NSVALID; 10121 if (ctxt->options & XML_PARSE_OLD10) 10122 ctxt->myDoc->properties |= XML_DOC_OLD10; 10123 } 10124 if (! ctxt->wellFormed) { 10125 ctxt->valid = 0; 10126 return(-1); 10127 } 10128 return(0); 10129} 10130 10131/** 10132 * xmlParseExtParsedEnt: 10133 * @ctxt: an XML parser context 10134 * 10135 * parse a general parsed entity 10136 * An external general parsed entity is well-formed if it matches the 10137 * production labeled extParsedEnt. 10138 * 10139 * [78] extParsedEnt ::= TextDecl? content 10140 * 10141 * Returns 0, -1 in case of error. the parser context is augmented 10142 * as a result of the parsing. 10143 */ 10144 10145int 10146xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { 10147 xmlChar start[4]; 10148 xmlCharEncoding enc; 10149 10150 if ((ctxt == NULL) || (ctxt->input == NULL)) 10151 return(-1); 10152 10153 xmlDefaultSAXHandlerInit(); 10154 10155 xmlDetectSAX2(ctxt); 10156 10157 GROW; 10158 10159 /* 10160 * SAX: beginning of the document processing. 10161 */ 10162 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10163 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 10164 10165 /* 10166 * Get the 4 first bytes and decode the charset 10167 * if enc != XML_CHAR_ENCODING_NONE 10168 * plug some encoding conversion routines. 10169 */ 10170 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 10171 start[0] = RAW; 10172 start[1] = NXT(1); 10173 start[2] = NXT(2); 10174 start[3] = NXT(3); 10175 enc = xmlDetectCharEncoding(start, 4); 10176 if (enc != XML_CHAR_ENCODING_NONE) { 10177 xmlSwitchEncoding(ctxt, enc); 10178 } 10179 } 10180 10181 10182 if (CUR == 0) { 10183 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10184 } 10185 10186 /* 10187 * Check for the XMLDecl in the Prolog. 10188 */ 10189 GROW; 10190 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 10191 10192 /* 10193 * Note that we will switch encoding on the fly. 10194 */ 10195 xmlParseXMLDecl(ctxt); 10196 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10197 /* 10198 * The XML REC instructs us to stop parsing right here 10199 */ 10200 return(-1); 10201 } 10202 SKIP_BLANKS; 10203 } else { 10204 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10205 } 10206 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 10207 ctxt->sax->startDocument(ctxt->userData); 10208 10209 /* 10210 * Doing validity checking on chunk doesn't make sense 10211 */ 10212 ctxt->instate = XML_PARSER_CONTENT; 10213 ctxt->validate = 0; 10214 ctxt->loadsubset = 0; 10215 ctxt->depth = 0; 10216 10217 xmlParseContent(ctxt); 10218 10219 if ((RAW == '<') && (NXT(1) == '/')) { 10220 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 10221 } else if (RAW != 0) { 10222 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 10223 } 10224 10225 /* 10226 * SAX: end of the document processing. 10227 */ 10228 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10229 ctxt->sax->endDocument(ctxt->userData); 10230 10231 if (! ctxt->wellFormed) return(-1); 10232 return(0); 10233} 10234 10235#ifdef LIBXML_PUSH_ENABLED 10236/************************************************************************ 10237 * * 10238 * Progressive parsing interfaces * 10239 * * 10240 ************************************************************************/ 10241 10242/** 10243 * xmlParseLookupSequence: 10244 * @ctxt: an XML parser context 10245 * @first: the first char to lookup 10246 * @next: the next char to lookup or zero 10247 * @third: the next char to lookup or zero 10248 * 10249 * Try to find if a sequence (first, next, third) or just (first next) or 10250 * (first) is available in the input stream. 10251 * This function has a side effect of (possibly) incrementing ctxt->checkIndex 10252 * to avoid rescanning sequences of bytes, it DOES change the state of the 10253 * parser, do not use liberally. 10254 * 10255 * Returns the index to the current parsing point if the full sequence 10256 * is available, -1 otherwise. 10257 */ 10258static int 10259xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, 10260 xmlChar next, xmlChar third) { 10261 int base, len; 10262 xmlParserInputPtr in; 10263 const xmlChar *buf; 10264 10265 in = ctxt->input; 10266 if (in == NULL) return(-1); 10267 base = in->cur - in->base; 10268 if (base < 0) return(-1); 10269 if (ctxt->checkIndex > base) 10270 base = ctxt->checkIndex; 10271 if (in->buf == NULL) { 10272 buf = in->base; 10273 len = in->length; 10274 } else { 10275 buf = in->buf->buffer->content; 10276 len = in->buf->buffer->use; 10277 } 10278 /* take into account the sequence length */ 10279 if (third) len -= 2; 10280 else if (next) len --; 10281 for (;base < len;base++) { 10282 if (buf[base] == first) { 10283 if (third != 0) { 10284 if ((buf[base + 1] != next) || 10285 (buf[base + 2] != third)) continue; 10286 } else if (next != 0) { 10287 if (buf[base + 1] != next) continue; 10288 } 10289 ctxt->checkIndex = 0; 10290#ifdef DEBUG_PUSH 10291 if (next == 0) 10292 xmlGenericError(xmlGenericErrorContext, 10293 "PP: lookup '%c' found at %d\n", 10294 first, base); 10295 else if (third == 0) 10296 xmlGenericError(xmlGenericErrorContext, 10297 "PP: lookup '%c%c' found at %d\n", 10298 first, next, base); 10299 else 10300 xmlGenericError(xmlGenericErrorContext, 10301 "PP: lookup '%c%c%c' found at %d\n", 10302 first, next, third, base); 10303#endif 10304 return(base - (in->cur - in->base)); 10305 } 10306 } 10307 ctxt->checkIndex = base; 10308#ifdef DEBUG_PUSH 10309 if (next == 0) 10310 xmlGenericError(xmlGenericErrorContext, 10311 "PP: lookup '%c' failed\n", first); 10312 else if (third == 0) 10313 xmlGenericError(xmlGenericErrorContext, 10314 "PP: lookup '%c%c' failed\n", first, next); 10315 else 10316 xmlGenericError(xmlGenericErrorContext, 10317 "PP: lookup '%c%c%c' failed\n", first, next, third); 10318#endif 10319 return(-1); 10320} 10321 10322/** 10323 * xmlParseGetLasts: 10324 * @ctxt: an XML parser context 10325 * @lastlt: pointer to store the last '<' from the input 10326 * @lastgt: pointer to store the last '>' from the input 10327 * 10328 * Lookup the last < and > in the current chunk 10329 */ 10330static void 10331xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt, 10332 const xmlChar **lastgt) { 10333 const xmlChar *tmp; 10334 10335 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) { 10336 xmlGenericError(xmlGenericErrorContext, 10337 "Internal error: xmlParseGetLasts\n"); 10338 return; 10339 } 10340 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) { 10341 tmp = ctxt->input->end; 10342 tmp--; 10343 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--; 10344 if (tmp < ctxt->input->base) { 10345 *lastlt = NULL; 10346 *lastgt = NULL; 10347 } else { 10348 *lastlt = tmp; 10349 tmp++; 10350 while ((tmp < ctxt->input->end) && (*tmp != '>')) { 10351 if (*tmp == '\'') { 10352 tmp++; 10353 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++; 10354 if (tmp < ctxt->input->end) tmp++; 10355 } else if (*tmp == '"') { 10356 tmp++; 10357 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++; 10358 if (tmp < ctxt->input->end) tmp++; 10359 } else 10360 tmp++; 10361 } 10362 if (tmp < ctxt->input->end) 10363 *lastgt = tmp; 10364 else { 10365 tmp = *lastlt; 10366 tmp--; 10367 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--; 10368 if (tmp >= ctxt->input->base) 10369 *lastgt = tmp; 10370 else 10371 *lastgt = NULL; 10372 } 10373 } 10374 } else { 10375 *lastlt = NULL; 10376 *lastgt = NULL; 10377 } 10378} 10379/** 10380 * xmlCheckCdataPush: 10381 * @cur: pointer to the bock of characters 10382 * @len: length of the block in bytes 10383 * 10384 * Check that the block of characters is okay as SCdata content [20] 10385 * 10386 * Returns the number of bytes to pass if okay, a negative index where an 10387 * UTF-8 error occured otherwise 10388 */ 10389static int 10390xmlCheckCdataPush(const xmlChar *utf, int len) { 10391 int ix; 10392 unsigned char c; 10393 int codepoint; 10394 10395 if ((utf == NULL) || (len <= 0)) 10396 return(0); 10397 10398 for (ix = 0; ix < len;) { /* string is 0-terminated */ 10399 c = utf[ix]; 10400 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */ 10401 if (c >= 0x20) 10402 ix++; 10403 else if ((c == 0xA) || (c == 0xD) || (c == 0x9)) 10404 ix++; 10405 else 10406 return(-ix); 10407 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */ 10408 if (ix + 2 > len) return(ix); 10409 if ((utf[ix+1] & 0xc0 ) != 0x80) 10410 return(-ix); 10411 codepoint = (utf[ix] & 0x1f) << 6; 10412 codepoint |= utf[ix+1] & 0x3f; 10413 if (!xmlIsCharQ(codepoint)) 10414 return(-ix); 10415 ix += 2; 10416 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */ 10417 if (ix + 3 > len) return(ix); 10418 if (((utf[ix+1] & 0xc0) != 0x80) || 10419 ((utf[ix+2] & 0xc0) != 0x80)) 10420 return(-ix); 10421 codepoint = (utf[ix] & 0xf) << 12; 10422 codepoint |= (utf[ix+1] & 0x3f) << 6; 10423 codepoint |= utf[ix+2] & 0x3f; 10424 if (!xmlIsCharQ(codepoint)) 10425 return(-ix); 10426 ix += 3; 10427 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */ 10428 if (ix + 4 > len) return(ix); 10429 if (((utf[ix+1] & 0xc0) != 0x80) || 10430 ((utf[ix+2] & 0xc0) != 0x80) || 10431 ((utf[ix+3] & 0xc0) != 0x80)) 10432 return(-ix); 10433 codepoint = (utf[ix] & 0x7) << 18; 10434 codepoint |= (utf[ix+1] & 0x3f) << 12; 10435 codepoint |= (utf[ix+2] & 0x3f) << 6; 10436 codepoint |= utf[ix+3] & 0x3f; 10437 if (!xmlIsCharQ(codepoint)) 10438 return(-ix); 10439 ix += 4; 10440 } else /* unknown encoding */ 10441 return(-ix); 10442 } 10443 return(ix); 10444} 10445 10446/** 10447 * xmlParseTryOrFinish: 10448 * @ctxt: an XML parser context 10449 * @terminate: last chunk indicator 10450 * 10451 * Try to progress on parsing 10452 * 10453 * Returns zero if no parsing was possible 10454 */ 10455static int 10456xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { 10457 int ret = 0; 10458 int avail, tlen; 10459 xmlChar cur, next; 10460 const xmlChar *lastlt, *lastgt; 10461 10462 if (ctxt->input == NULL) 10463 return(0); 10464 10465#ifdef DEBUG_PUSH 10466 switch (ctxt->instate) { 10467 case XML_PARSER_EOF: 10468 xmlGenericError(xmlGenericErrorContext, 10469 "PP: try EOF\n"); break; 10470 case XML_PARSER_START: 10471 xmlGenericError(xmlGenericErrorContext, 10472 "PP: try START\n"); break; 10473 case XML_PARSER_MISC: 10474 xmlGenericError(xmlGenericErrorContext, 10475 "PP: try MISC\n");break; 10476 case XML_PARSER_COMMENT: 10477 xmlGenericError(xmlGenericErrorContext, 10478 "PP: try COMMENT\n");break; 10479 case XML_PARSER_PROLOG: 10480 xmlGenericError(xmlGenericErrorContext, 10481 "PP: try PROLOG\n");break; 10482 case XML_PARSER_START_TAG: 10483 xmlGenericError(xmlGenericErrorContext, 10484 "PP: try START_TAG\n");break; 10485 case XML_PARSER_CONTENT: 10486 xmlGenericError(xmlGenericErrorContext, 10487 "PP: try CONTENT\n");break; 10488 case XML_PARSER_CDATA_SECTION: 10489 xmlGenericError(xmlGenericErrorContext, 10490 "PP: try CDATA_SECTION\n");break; 10491 case XML_PARSER_END_TAG: 10492 xmlGenericError(xmlGenericErrorContext, 10493 "PP: try END_TAG\n");break; 10494 case XML_PARSER_ENTITY_DECL: 10495 xmlGenericError(xmlGenericErrorContext, 10496 "PP: try ENTITY_DECL\n");break; 10497 case XML_PARSER_ENTITY_VALUE: 10498 xmlGenericError(xmlGenericErrorContext, 10499 "PP: try ENTITY_VALUE\n");break; 10500 case XML_PARSER_ATTRIBUTE_VALUE: 10501 xmlGenericError(xmlGenericErrorContext, 10502 "PP: try ATTRIBUTE_VALUE\n");break; 10503 case XML_PARSER_DTD: 10504 xmlGenericError(xmlGenericErrorContext, 10505 "PP: try DTD\n");break; 10506 case XML_PARSER_EPILOG: 10507 xmlGenericError(xmlGenericErrorContext, 10508 "PP: try EPILOG\n");break; 10509 case XML_PARSER_PI: 10510 xmlGenericError(xmlGenericErrorContext, 10511 "PP: try PI\n");break; 10512 case XML_PARSER_IGNORE: 10513 xmlGenericError(xmlGenericErrorContext, 10514 "PP: try IGNORE\n");break; 10515 } 10516#endif 10517 10518 if ((ctxt->input != NULL) && 10519 (ctxt->input->cur - ctxt->input->base > 4096)) { 10520 xmlSHRINK(ctxt); 10521 ctxt->checkIndex = 0; 10522 } 10523 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 10524 10525 while (1) { 10526 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 10527 return(0); 10528 10529 10530 /* 10531 * Pop-up of finished entities. 10532 */ 10533 while ((RAW == 0) && (ctxt->inputNr > 1)) 10534 xmlPopInput(ctxt); 10535 10536 if (ctxt->input == NULL) break; 10537 if (ctxt->input->buf == NULL) 10538 avail = ctxt->input->length - 10539 (ctxt->input->cur - ctxt->input->base); 10540 else { 10541 /* 10542 * If we are operating on converted input, try to flush 10543 * remainng chars to avoid them stalling in the non-converted 10544 * buffer. 10545 */ 10546 if ((ctxt->input->buf->raw != NULL) && 10547 (ctxt->input->buf->raw->use > 0)) { 10548 int base = ctxt->input->base - 10549 ctxt->input->buf->buffer->content; 10550 int current = ctxt->input->cur - ctxt->input->base; 10551 10552 xmlParserInputBufferPush(ctxt->input->buf, 0, ""); 10553 ctxt->input->base = ctxt->input->buf->buffer->content + base; 10554 ctxt->input->cur = ctxt->input->base + current; 10555 ctxt->input->end = 10556 &ctxt->input->buf->buffer->content[ 10557 ctxt->input->buf->buffer->use]; 10558 } 10559 avail = ctxt->input->buf->buffer->use - 10560 (ctxt->input->cur - ctxt->input->base); 10561 } 10562 if (avail < 1) 10563 goto done; 10564 switch (ctxt->instate) { 10565 case XML_PARSER_EOF: 10566 /* 10567 * Document parsing is done ! 10568 */ 10569 goto done; 10570 case XML_PARSER_START: 10571 if (ctxt->charset == XML_CHAR_ENCODING_NONE) { 10572 xmlChar start[4]; 10573 xmlCharEncoding enc; 10574 10575 /* 10576 * Very first chars read from the document flow. 10577 */ 10578 if (avail < 4) 10579 goto done; 10580 10581 /* 10582 * Get the 4 first bytes and decode the charset 10583 * if enc != XML_CHAR_ENCODING_NONE 10584 * plug some encoding conversion routines, 10585 * else xmlSwitchEncoding will set to (default) 10586 * UTF8. 10587 */ 10588 start[0] = RAW; 10589 start[1] = NXT(1); 10590 start[2] = NXT(2); 10591 start[3] = NXT(3); 10592 enc = xmlDetectCharEncoding(start, 4); 10593 xmlSwitchEncoding(ctxt, enc); 10594 break; 10595 } 10596 10597 if (avail < 2) 10598 goto done; 10599 cur = ctxt->input->cur[0]; 10600 next = ctxt->input->cur[1]; 10601 if (cur == 0) { 10602 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10603 ctxt->sax->setDocumentLocator(ctxt->userData, 10604 &xmlDefaultSAXLocator); 10605 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10606 ctxt->instate = XML_PARSER_EOF; 10607#ifdef DEBUG_PUSH 10608 xmlGenericError(xmlGenericErrorContext, 10609 "PP: entering EOF\n"); 10610#endif 10611 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10612 ctxt->sax->endDocument(ctxt->userData); 10613 goto done; 10614 } 10615 if ((cur == '<') && (next == '?')) { 10616 /* PI or XML decl */ 10617 if (avail < 5) return(ret); 10618 if ((!terminate) && 10619 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 10620 return(ret); 10621 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10622 ctxt->sax->setDocumentLocator(ctxt->userData, 10623 &xmlDefaultSAXLocator); 10624 if ((ctxt->input->cur[2] == 'x') && 10625 (ctxt->input->cur[3] == 'm') && 10626 (ctxt->input->cur[4] == 'l') && 10627 (IS_BLANK_CH(ctxt->input->cur[5]))) { 10628 ret += 5; 10629#ifdef DEBUG_PUSH 10630 xmlGenericError(xmlGenericErrorContext, 10631 "PP: Parsing XML Decl\n"); 10632#endif 10633 xmlParseXMLDecl(ctxt); 10634 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10635 /* 10636 * The XML REC instructs us to stop parsing right 10637 * here 10638 */ 10639 ctxt->instate = XML_PARSER_EOF; 10640 return(0); 10641 } 10642 ctxt->standalone = ctxt->input->standalone; 10643 if ((ctxt->encoding == NULL) && 10644 (ctxt->input->encoding != NULL)) 10645 ctxt->encoding = xmlStrdup(ctxt->input->encoding); 10646 if ((ctxt->sax) && (ctxt->sax->startDocument) && 10647 (!ctxt->disableSAX)) 10648 ctxt->sax->startDocument(ctxt->userData); 10649 ctxt->instate = XML_PARSER_MISC; 10650#ifdef DEBUG_PUSH 10651 xmlGenericError(xmlGenericErrorContext, 10652 "PP: entering MISC\n"); 10653#endif 10654 } else { 10655 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10656 if ((ctxt->sax) && (ctxt->sax->startDocument) && 10657 (!ctxt->disableSAX)) 10658 ctxt->sax->startDocument(ctxt->userData); 10659 ctxt->instate = XML_PARSER_MISC; 10660#ifdef DEBUG_PUSH 10661 xmlGenericError(xmlGenericErrorContext, 10662 "PP: entering MISC\n"); 10663#endif 10664 } 10665 } else { 10666 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10667 ctxt->sax->setDocumentLocator(ctxt->userData, 10668 &xmlDefaultSAXLocator); 10669 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10670 if (ctxt->version == NULL) { 10671 xmlErrMemory(ctxt, NULL); 10672 break; 10673 } 10674 if ((ctxt->sax) && (ctxt->sax->startDocument) && 10675 (!ctxt->disableSAX)) 10676 ctxt->sax->startDocument(ctxt->userData); 10677 ctxt->instate = XML_PARSER_MISC; 10678#ifdef DEBUG_PUSH 10679 xmlGenericError(xmlGenericErrorContext, 10680 "PP: entering MISC\n"); 10681#endif 10682 } 10683 break; 10684 case XML_PARSER_START_TAG: { 10685 const xmlChar *name; 10686 const xmlChar *prefix; 10687 const xmlChar *URI; 10688 int nsNr = ctxt->nsNr; 10689 10690 if ((avail < 2) && (ctxt->inputNr == 1)) 10691 goto done; 10692 cur = ctxt->input->cur[0]; 10693 if (cur != '<') { 10694 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10695 ctxt->instate = XML_PARSER_EOF; 10696 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10697 ctxt->sax->endDocument(ctxt->userData); 10698 goto done; 10699 } 10700 if (!terminate) { 10701 if (ctxt->progressive) { 10702 /* > can be found unescaped in attribute values */ 10703 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 10704 goto done; 10705 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 10706 goto done; 10707 } 10708 } 10709 if (ctxt->spaceNr == 0) 10710 spacePush(ctxt, -1); 10711 else if (*ctxt->space == -2) 10712 spacePush(ctxt, -1); 10713 else 10714 spacePush(ctxt, *ctxt->space); 10715#ifdef LIBXML_SAX1_ENABLED 10716 if (ctxt->sax2) 10717#endif /* LIBXML_SAX1_ENABLED */ 10718 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 10719#ifdef LIBXML_SAX1_ENABLED 10720 else 10721 name = xmlParseStartTag(ctxt); 10722#endif /* LIBXML_SAX1_ENABLED */ 10723 if (name == NULL) { 10724 spacePop(ctxt); 10725 ctxt->instate = XML_PARSER_EOF; 10726 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10727 ctxt->sax->endDocument(ctxt->userData); 10728 goto done; 10729 } 10730#ifdef LIBXML_VALID_ENABLED 10731 /* 10732 * [ VC: Root Element Type ] 10733 * The Name in the document type declaration must match 10734 * the element type of the root element. 10735 */ 10736 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 10737 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 10738 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 10739#endif /* LIBXML_VALID_ENABLED */ 10740 10741 /* 10742 * Check for an Empty Element. 10743 */ 10744 if ((RAW == '/') && (NXT(1) == '>')) { 10745 SKIP(2); 10746 10747 if (ctxt->sax2) { 10748 if ((ctxt->sax != NULL) && 10749 (ctxt->sax->endElementNs != NULL) && 10750 (!ctxt->disableSAX)) 10751 ctxt->sax->endElementNs(ctxt->userData, name, 10752 prefix, URI); 10753 if (ctxt->nsNr - nsNr > 0) 10754 nsPop(ctxt, ctxt->nsNr - nsNr); 10755#ifdef LIBXML_SAX1_ENABLED 10756 } else { 10757 if ((ctxt->sax != NULL) && 10758 (ctxt->sax->endElement != NULL) && 10759 (!ctxt->disableSAX)) 10760 ctxt->sax->endElement(ctxt->userData, name); 10761#endif /* LIBXML_SAX1_ENABLED */ 10762 } 10763 spacePop(ctxt); 10764 if (ctxt->nameNr == 0) { 10765 ctxt->instate = XML_PARSER_EPILOG; 10766 } else { 10767 ctxt->instate = XML_PARSER_CONTENT; 10768 } 10769 break; 10770 } 10771 if (RAW == '>') { 10772 NEXT; 10773 } else { 10774 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED, 10775 "Couldn't find end of Start Tag %s\n", 10776 name); 10777 nodePop(ctxt); 10778 spacePop(ctxt); 10779 } 10780 if (ctxt->sax2) 10781 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr); 10782#ifdef LIBXML_SAX1_ENABLED 10783 else 10784 namePush(ctxt, name); 10785#endif /* LIBXML_SAX1_ENABLED */ 10786 10787 ctxt->instate = XML_PARSER_CONTENT; 10788 break; 10789 } 10790 case XML_PARSER_CONTENT: { 10791 const xmlChar *test; 10792 unsigned int cons; 10793 if ((avail < 2) && (ctxt->inputNr == 1)) 10794 goto done; 10795 cur = ctxt->input->cur[0]; 10796 next = ctxt->input->cur[1]; 10797 10798 test = CUR_PTR; 10799 cons = ctxt->input->consumed; 10800 if ((cur == '<') && (next == '/')) { 10801 ctxt->instate = XML_PARSER_END_TAG; 10802 break; 10803 } else if ((cur == '<') && (next == '?')) { 10804 if ((!terminate) && 10805 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 10806 goto done; 10807 xmlParsePI(ctxt); 10808 } else if ((cur == '<') && (next != '!')) { 10809 ctxt->instate = XML_PARSER_START_TAG; 10810 break; 10811 } else if ((cur == '<') && (next == '!') && 10812 (ctxt->input->cur[2] == '-') && 10813 (ctxt->input->cur[3] == '-')) { 10814 int term; 10815 10816 if (avail < 4) 10817 goto done; 10818 ctxt->input->cur += 4; 10819 term = xmlParseLookupSequence(ctxt, '-', '-', '>'); 10820 ctxt->input->cur -= 4; 10821 if ((!terminate) && (term < 0)) 10822 goto done; 10823 xmlParseComment(ctxt); 10824 ctxt->instate = XML_PARSER_CONTENT; 10825 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') && 10826 (ctxt->input->cur[2] == '[') && 10827 (ctxt->input->cur[3] == 'C') && 10828 (ctxt->input->cur[4] == 'D') && 10829 (ctxt->input->cur[5] == 'A') && 10830 (ctxt->input->cur[6] == 'T') && 10831 (ctxt->input->cur[7] == 'A') && 10832 (ctxt->input->cur[8] == '[')) { 10833 SKIP(9); 10834 ctxt->instate = XML_PARSER_CDATA_SECTION; 10835 break; 10836 } else if ((cur == '<') && (next == '!') && 10837 (avail < 9)) { 10838 goto done; 10839 } else if (cur == '&') { 10840 if ((!terminate) && 10841 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0)) 10842 goto done; 10843 xmlParseReference(ctxt); 10844 } else { 10845 /* TODO Avoid the extra copy, handle directly !!! */ 10846 /* 10847 * Goal of the following test is: 10848 * - minimize calls to the SAX 'character' callback 10849 * when they are mergeable 10850 * - handle an problem for isBlank when we only parse 10851 * a sequence of blank chars and the next one is 10852 * not available to check against '<' presence. 10853 * - tries to homogenize the differences in SAX 10854 * callbacks between the push and pull versions 10855 * of the parser. 10856 */ 10857 if ((ctxt->inputNr == 1) && 10858 (avail < XML_PARSER_BIG_BUFFER_SIZE)) { 10859 if (!terminate) { 10860 if (ctxt->progressive) { 10861 if ((lastlt == NULL) || 10862 (ctxt->input->cur > lastlt)) 10863 goto done; 10864 } else if (xmlParseLookupSequence(ctxt, 10865 '<', 0, 0) < 0) { 10866 goto done; 10867 } 10868 } 10869 } 10870 ctxt->checkIndex = 0; 10871 xmlParseCharData(ctxt, 0); 10872 } 10873 /* 10874 * Pop-up of finished entities. 10875 */ 10876 while ((RAW == 0) && (ctxt->inputNr > 1)) 10877 xmlPopInput(ctxt); 10878 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 10879 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 10880 "detected an error in element content\n"); 10881 ctxt->instate = XML_PARSER_EOF; 10882 break; 10883 } 10884 break; 10885 } 10886 case XML_PARSER_END_TAG: 10887 if (avail < 2) 10888 goto done; 10889 if (!terminate) { 10890 if (ctxt->progressive) { 10891 /* > can be found unescaped in attribute values */ 10892 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 10893 goto done; 10894 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 10895 goto done; 10896 } 10897 } 10898 if (ctxt->sax2) { 10899 xmlParseEndTag2(ctxt, 10900 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3], 10901 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0, 10902 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0); 10903 nameNsPop(ctxt); 10904 } 10905#ifdef LIBXML_SAX1_ENABLED 10906 else 10907 xmlParseEndTag1(ctxt, 0); 10908#endif /* LIBXML_SAX1_ENABLED */ 10909 if (ctxt->nameNr == 0) { 10910 ctxt->instate = XML_PARSER_EPILOG; 10911 } else { 10912 ctxt->instate = XML_PARSER_CONTENT; 10913 } 10914 break; 10915 case XML_PARSER_CDATA_SECTION: { 10916 /* 10917 * The Push mode need to have the SAX callback for 10918 * cdataBlock merge back contiguous callbacks. 10919 */ 10920 int base; 10921 10922 base = xmlParseLookupSequence(ctxt, ']', ']', '>'); 10923 if (base < 0) { 10924 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) { 10925 int tmp; 10926 10927 tmp = xmlCheckCdataPush(ctxt->input->cur, 10928 XML_PARSER_BIG_BUFFER_SIZE); 10929 if (tmp < 0) { 10930 tmp = -tmp; 10931 ctxt->input->cur += tmp; 10932 goto encoding_error; 10933 } 10934 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 10935 if (ctxt->sax->cdataBlock != NULL) 10936 ctxt->sax->cdataBlock(ctxt->userData, 10937 ctxt->input->cur, tmp); 10938 else if (ctxt->sax->characters != NULL) 10939 ctxt->sax->characters(ctxt->userData, 10940 ctxt->input->cur, tmp); 10941 } 10942 SKIPL(tmp); 10943 ctxt->checkIndex = 0; 10944 } 10945 goto done; 10946 } else { 10947 int tmp; 10948 10949 tmp = xmlCheckCdataPush(ctxt->input->cur, base); 10950 if ((tmp < 0) || (tmp != base)) { 10951 tmp = -tmp; 10952 ctxt->input->cur += tmp; 10953 goto encoding_error; 10954 } 10955 if ((ctxt->sax != NULL) && (base == 0) && 10956 (ctxt->sax->cdataBlock != NULL) && 10957 (!ctxt->disableSAX)) { 10958 /* 10959 * Special case to provide identical behaviour 10960 * between pull and push parsers on enpty CDATA 10961 * sections 10962 */ 10963 if ((ctxt->input->cur - ctxt->input->base >= 9) && 10964 (!strncmp((const char *)&ctxt->input->cur[-9], 10965 "<![CDATA[", 9))) 10966 ctxt->sax->cdataBlock(ctxt->userData, 10967 BAD_CAST "", 0); 10968 } else if ((ctxt->sax != NULL) && (base > 0) && 10969 (!ctxt->disableSAX)) { 10970 if (ctxt->sax->cdataBlock != NULL) 10971 ctxt->sax->cdataBlock(ctxt->userData, 10972 ctxt->input->cur, base); 10973 else if (ctxt->sax->characters != NULL) 10974 ctxt->sax->characters(ctxt->userData, 10975 ctxt->input->cur, base); 10976 } 10977 SKIPL(base + 3); 10978 ctxt->checkIndex = 0; 10979 ctxt->instate = XML_PARSER_CONTENT; 10980#ifdef DEBUG_PUSH 10981 xmlGenericError(xmlGenericErrorContext, 10982 "PP: entering CONTENT\n"); 10983#endif 10984 } 10985 break; 10986 } 10987 case XML_PARSER_MISC: 10988 SKIP_BLANKS; 10989 if (ctxt->input->buf == NULL) 10990 avail = ctxt->input->length - 10991 (ctxt->input->cur - ctxt->input->base); 10992 else 10993 avail = ctxt->input->buf->buffer->use - 10994 (ctxt->input->cur - ctxt->input->base); 10995 if (avail < 2) 10996 goto done; 10997 cur = ctxt->input->cur[0]; 10998 next = ctxt->input->cur[1]; 10999 if ((cur == '<') && (next == '?')) { 11000 if ((!terminate) && 11001 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 11002 goto done; 11003#ifdef DEBUG_PUSH 11004 xmlGenericError(xmlGenericErrorContext, 11005 "PP: Parsing PI\n"); 11006#endif 11007 xmlParsePI(ctxt); 11008 ctxt->checkIndex = 0; 11009 } else if ((cur == '<') && (next == '!') && 11010 (ctxt->input->cur[2] == '-') && 11011 (ctxt->input->cur[3] == '-')) { 11012 if ((!terminate) && 11013 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 11014 goto done; 11015#ifdef DEBUG_PUSH 11016 xmlGenericError(xmlGenericErrorContext, 11017 "PP: Parsing Comment\n"); 11018#endif 11019 xmlParseComment(ctxt); 11020 ctxt->instate = XML_PARSER_MISC; 11021 ctxt->checkIndex = 0; 11022 } else if ((cur == '<') && (next == '!') && 11023 (ctxt->input->cur[2] == 'D') && 11024 (ctxt->input->cur[3] == 'O') && 11025 (ctxt->input->cur[4] == 'C') && 11026 (ctxt->input->cur[5] == 'T') && 11027 (ctxt->input->cur[6] == 'Y') && 11028 (ctxt->input->cur[7] == 'P') && 11029 (ctxt->input->cur[8] == 'E')) { 11030 if ((!terminate) && 11031 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) 11032 goto done; 11033#ifdef DEBUG_PUSH 11034 xmlGenericError(xmlGenericErrorContext, 11035 "PP: Parsing internal subset\n"); 11036#endif 11037 ctxt->inSubset = 1; 11038 xmlParseDocTypeDecl(ctxt); 11039 if (RAW == '[') { 11040 ctxt->instate = XML_PARSER_DTD; 11041#ifdef DEBUG_PUSH 11042 xmlGenericError(xmlGenericErrorContext, 11043 "PP: entering DTD\n"); 11044#endif 11045 } else { 11046 /* 11047 * Create and update the external subset. 11048 */ 11049 ctxt->inSubset = 2; 11050 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 11051 (ctxt->sax->externalSubset != NULL)) 11052 ctxt->sax->externalSubset(ctxt->userData, 11053 ctxt->intSubName, ctxt->extSubSystem, 11054 ctxt->extSubURI); 11055 ctxt->inSubset = 0; 11056 xmlCleanSpecialAttr(ctxt); 11057 ctxt->instate = XML_PARSER_PROLOG; 11058#ifdef DEBUG_PUSH 11059 xmlGenericError(xmlGenericErrorContext, 11060 "PP: entering PROLOG\n"); 11061#endif 11062 } 11063 } else if ((cur == '<') && (next == '!') && 11064 (avail < 9)) { 11065 goto done; 11066 } else { 11067 ctxt->instate = XML_PARSER_START_TAG; 11068 ctxt->progressive = 1; 11069 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11070#ifdef DEBUG_PUSH 11071 xmlGenericError(xmlGenericErrorContext, 11072 "PP: entering START_TAG\n"); 11073#endif 11074 } 11075 break; 11076 case XML_PARSER_PROLOG: 11077 SKIP_BLANKS; 11078 if (ctxt->input->buf == NULL) 11079 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 11080 else 11081 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 11082 if (avail < 2) 11083 goto done; 11084 cur = ctxt->input->cur[0]; 11085 next = ctxt->input->cur[1]; 11086 if ((cur == '<') && (next == '?')) { 11087 if ((!terminate) && 11088 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 11089 goto done; 11090#ifdef DEBUG_PUSH 11091 xmlGenericError(xmlGenericErrorContext, 11092 "PP: Parsing PI\n"); 11093#endif 11094 xmlParsePI(ctxt); 11095 } else if ((cur == '<') && (next == '!') && 11096 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 11097 if ((!terminate) && 11098 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 11099 goto done; 11100#ifdef DEBUG_PUSH 11101 xmlGenericError(xmlGenericErrorContext, 11102 "PP: Parsing Comment\n"); 11103#endif 11104 xmlParseComment(ctxt); 11105 ctxt->instate = XML_PARSER_PROLOG; 11106 } else if ((cur == '<') && (next == '!') && 11107 (avail < 4)) { 11108 goto done; 11109 } else { 11110 ctxt->instate = XML_PARSER_START_TAG; 11111 if (ctxt->progressive == 0) 11112 ctxt->progressive = 1; 11113 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11114#ifdef DEBUG_PUSH 11115 xmlGenericError(xmlGenericErrorContext, 11116 "PP: entering START_TAG\n"); 11117#endif 11118 } 11119 break; 11120 case XML_PARSER_EPILOG: 11121 SKIP_BLANKS; 11122 if (ctxt->input->buf == NULL) 11123 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 11124 else 11125 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 11126 if (avail < 2) 11127 goto done; 11128 cur = ctxt->input->cur[0]; 11129 next = ctxt->input->cur[1]; 11130 if ((cur == '<') && (next == '?')) { 11131 if ((!terminate) && 11132 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 11133 goto done; 11134#ifdef DEBUG_PUSH 11135 xmlGenericError(xmlGenericErrorContext, 11136 "PP: Parsing PI\n"); 11137#endif 11138 xmlParsePI(ctxt); 11139 ctxt->instate = XML_PARSER_EPILOG; 11140 } else if ((cur == '<') && (next == '!') && 11141 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 11142 if ((!terminate) && 11143 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 11144 goto done; 11145#ifdef DEBUG_PUSH 11146 xmlGenericError(xmlGenericErrorContext, 11147 "PP: Parsing Comment\n"); 11148#endif 11149 xmlParseComment(ctxt); 11150 ctxt->instate = XML_PARSER_EPILOG; 11151 } else if ((cur == '<') && (next == '!') && 11152 (avail < 4)) { 11153 goto done; 11154 } else { 11155 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 11156 ctxt->instate = XML_PARSER_EOF; 11157#ifdef DEBUG_PUSH 11158 xmlGenericError(xmlGenericErrorContext, 11159 "PP: entering EOF\n"); 11160#endif 11161 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11162 ctxt->sax->endDocument(ctxt->userData); 11163 goto done; 11164 } 11165 break; 11166 case XML_PARSER_DTD: { 11167 /* 11168 * Sorry but progressive parsing of the internal subset 11169 * is not expected to be supported. We first check that 11170 * the full content of the internal subset is available and 11171 * the parsing is launched only at that point. 11172 * Internal subset ends up with "']' S? '>'" in an unescaped 11173 * section and not in a ']]>' sequence which are conditional 11174 * sections (whoever argued to keep that crap in XML deserve 11175 * a place in hell !). 11176 */ 11177 int base, i; 11178 xmlChar *buf; 11179 xmlChar quote = 0; 11180 11181 base = ctxt->input->cur - ctxt->input->base; 11182 if (base < 0) return(0); 11183 if (ctxt->checkIndex > base) 11184 base = ctxt->checkIndex; 11185 buf = ctxt->input->buf->buffer->content; 11186 for (;(unsigned int) base < ctxt->input->buf->buffer->use; 11187 base++) { 11188 if (quote != 0) { 11189 if (buf[base] == quote) 11190 quote = 0; 11191 continue; 11192 } 11193 if ((quote == 0) && (buf[base] == '<')) { 11194 int found = 0; 11195 /* special handling of comments */ 11196 if (((unsigned int) base + 4 < 11197 ctxt->input->buf->buffer->use) && 11198 (buf[base + 1] == '!') && 11199 (buf[base + 2] == '-') && 11200 (buf[base + 3] == '-')) { 11201 for (;(unsigned int) base + 3 < 11202 ctxt->input->buf->buffer->use; base++) { 11203 if ((buf[base] == '-') && 11204 (buf[base + 1] == '-') && 11205 (buf[base + 2] == '>')) { 11206 found = 1; 11207 base += 2; 11208 break; 11209 } 11210 } 11211 if (!found) { 11212#if 0 11213 fprintf(stderr, "unfinished comment\n"); 11214#endif 11215 break; /* for */ 11216 } 11217 continue; 11218 } 11219 } 11220 if (buf[base] == '"') { 11221 quote = '"'; 11222 continue; 11223 } 11224 if (buf[base] == '\'') { 11225 quote = '\''; 11226 continue; 11227 } 11228 if (buf[base] == ']') { 11229#if 0 11230 fprintf(stderr, "%c%c%c%c: ", buf[base], 11231 buf[base + 1], buf[base + 2], buf[base + 3]); 11232#endif 11233 if ((unsigned int) base +1 >= 11234 ctxt->input->buf->buffer->use) 11235 break; 11236 if (buf[base + 1] == ']') { 11237 /* conditional crap, skip both ']' ! */ 11238 base++; 11239 continue; 11240 } 11241 for (i = 1; 11242 (unsigned int) base + i < ctxt->input->buf->buffer->use; 11243 i++) { 11244 if (buf[base + i] == '>') { 11245#if 0 11246 fprintf(stderr, "found\n"); 11247#endif 11248 goto found_end_int_subset; 11249 } 11250 if (!IS_BLANK_CH(buf[base + i])) { 11251#if 0 11252 fprintf(stderr, "not found\n"); 11253#endif 11254 goto not_end_of_int_subset; 11255 } 11256 } 11257#if 0 11258 fprintf(stderr, "end of stream\n"); 11259#endif 11260 break; 11261 11262 } 11263not_end_of_int_subset: 11264 continue; /* for */ 11265 } 11266 /* 11267 * We didn't found the end of the Internal subset 11268 */ 11269#ifdef DEBUG_PUSH 11270 if (next == 0) 11271 xmlGenericError(xmlGenericErrorContext, 11272 "PP: lookup of int subset end filed\n"); 11273#endif 11274 goto done; 11275 11276found_end_int_subset: 11277 xmlParseInternalSubset(ctxt); 11278 ctxt->inSubset = 2; 11279 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 11280 (ctxt->sax->externalSubset != NULL)) 11281 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 11282 ctxt->extSubSystem, ctxt->extSubURI); 11283 ctxt->inSubset = 0; 11284 xmlCleanSpecialAttr(ctxt); 11285 ctxt->instate = XML_PARSER_PROLOG; 11286 ctxt->checkIndex = 0; 11287#ifdef DEBUG_PUSH 11288 xmlGenericError(xmlGenericErrorContext, 11289 "PP: entering PROLOG\n"); 11290#endif 11291 break; 11292 } 11293 case XML_PARSER_COMMENT: 11294 xmlGenericError(xmlGenericErrorContext, 11295 "PP: internal error, state == COMMENT\n"); 11296 ctxt->instate = XML_PARSER_CONTENT; 11297#ifdef DEBUG_PUSH 11298 xmlGenericError(xmlGenericErrorContext, 11299 "PP: entering CONTENT\n"); 11300#endif 11301 break; 11302 case XML_PARSER_IGNORE: 11303 xmlGenericError(xmlGenericErrorContext, 11304 "PP: internal error, state == IGNORE"); 11305 ctxt->instate = XML_PARSER_DTD; 11306#ifdef DEBUG_PUSH 11307 xmlGenericError(xmlGenericErrorContext, 11308 "PP: entering DTD\n"); 11309#endif 11310 break; 11311 case XML_PARSER_PI: 11312 xmlGenericError(xmlGenericErrorContext, 11313 "PP: internal error, state == PI\n"); 11314 ctxt->instate = XML_PARSER_CONTENT; 11315#ifdef DEBUG_PUSH 11316 xmlGenericError(xmlGenericErrorContext, 11317 "PP: entering CONTENT\n"); 11318#endif 11319 break; 11320 case XML_PARSER_ENTITY_DECL: 11321 xmlGenericError(xmlGenericErrorContext, 11322 "PP: internal error, state == ENTITY_DECL\n"); 11323 ctxt->instate = XML_PARSER_DTD; 11324#ifdef DEBUG_PUSH 11325 xmlGenericError(xmlGenericErrorContext, 11326 "PP: entering DTD\n"); 11327#endif 11328 break; 11329 case XML_PARSER_ENTITY_VALUE: 11330 xmlGenericError(xmlGenericErrorContext, 11331 "PP: internal error, state == ENTITY_VALUE\n"); 11332 ctxt->instate = XML_PARSER_CONTENT; 11333#ifdef DEBUG_PUSH 11334 xmlGenericError(xmlGenericErrorContext, 11335 "PP: entering DTD\n"); 11336#endif 11337 break; 11338 case XML_PARSER_ATTRIBUTE_VALUE: 11339 xmlGenericError(xmlGenericErrorContext, 11340 "PP: internal error, state == ATTRIBUTE_VALUE\n"); 11341 ctxt->instate = XML_PARSER_START_TAG; 11342#ifdef DEBUG_PUSH 11343 xmlGenericError(xmlGenericErrorContext, 11344 "PP: entering START_TAG\n"); 11345#endif 11346 break; 11347 case XML_PARSER_SYSTEM_LITERAL: 11348 xmlGenericError(xmlGenericErrorContext, 11349 "PP: internal error, state == SYSTEM_LITERAL\n"); 11350 ctxt->instate = XML_PARSER_START_TAG; 11351#ifdef DEBUG_PUSH 11352 xmlGenericError(xmlGenericErrorContext, 11353 "PP: entering START_TAG\n"); 11354#endif 11355 break; 11356 case XML_PARSER_PUBLIC_LITERAL: 11357 xmlGenericError(xmlGenericErrorContext, 11358 "PP: internal error, state == PUBLIC_LITERAL\n"); 11359 ctxt->instate = XML_PARSER_START_TAG; 11360#ifdef DEBUG_PUSH 11361 xmlGenericError(xmlGenericErrorContext, 11362 "PP: entering START_TAG\n"); 11363#endif 11364 break; 11365 } 11366 } 11367done: 11368#ifdef DEBUG_PUSH 11369 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret); 11370#endif 11371 return(ret); 11372encoding_error: 11373 { 11374 char buffer[150]; 11375 11376 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 11377 ctxt->input->cur[0], ctxt->input->cur[1], 11378 ctxt->input->cur[2], ctxt->input->cur[3]); 11379 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 11380 "Input is not proper UTF-8, indicate encoding !\n%s", 11381 BAD_CAST buffer, NULL); 11382 } 11383 return(0); 11384} 11385 11386/** 11387 * xmlParseChunk: 11388 * @ctxt: an XML parser context 11389 * @chunk: an char array 11390 * @size: the size in byte of the chunk 11391 * @terminate: last chunk indicator 11392 * 11393 * Parse a Chunk of memory 11394 * 11395 * Returns zero if no error, the xmlParserErrors otherwise. 11396 */ 11397int 11398xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, 11399 int terminate) { 11400 int end_in_lf = 0; 11401 11402 if (ctxt == NULL) 11403 return(XML_ERR_INTERNAL_ERROR); 11404 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 11405 return(ctxt->errNo); 11406 if (ctxt->instate == XML_PARSER_START) 11407 xmlDetectSAX2(ctxt); 11408 if ((size > 0) && (chunk != NULL) && (!terminate) && 11409 (chunk[size - 1] == '\r')) { 11410 end_in_lf = 1; 11411 size--; 11412 } 11413 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 11414 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { 11415 int base = ctxt->input->base - ctxt->input->buf->buffer->content; 11416 int cur = ctxt->input->cur - ctxt->input->base; 11417 int res; 11418 11419 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 11420 if (res < 0) { 11421 ctxt->errNo = XML_PARSER_EOF; 11422 ctxt->disableSAX = 1; 11423 return (XML_PARSER_EOF); 11424 } 11425 ctxt->input->base = ctxt->input->buf->buffer->content + base; 11426 ctxt->input->cur = ctxt->input->base + cur; 11427 ctxt->input->end = 11428 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; 11429#ifdef DEBUG_PUSH 11430 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 11431#endif 11432 11433 } else if (ctxt->instate != XML_PARSER_EOF) { 11434 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { 11435 xmlParserInputBufferPtr in = ctxt->input->buf; 11436 if ((in->encoder != NULL) && (in->buffer != NULL) && 11437 (in->raw != NULL)) { 11438 int nbchars; 11439 11440 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); 11441 if (nbchars < 0) { 11442 /* TODO 2.6.0 */ 11443 xmlGenericError(xmlGenericErrorContext, 11444 "xmlParseChunk: encoder error\n"); 11445 return(XML_ERR_INVALID_ENCODING); 11446 } 11447 } 11448 } 11449 } 11450 xmlParseTryOrFinish(ctxt, terminate); 11451 if ((end_in_lf == 1) && (ctxt->input != NULL) && 11452 (ctxt->input->buf != NULL)) { 11453 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r"); 11454 } 11455 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 11456 return(ctxt->errNo); 11457 if (terminate) { 11458 /* 11459 * Check for termination 11460 */ 11461 int avail = 0; 11462 11463 if (ctxt->input != NULL) { 11464 if (ctxt->input->buf == NULL) 11465 avail = ctxt->input->length - 11466 (ctxt->input->cur - ctxt->input->base); 11467 else 11468 avail = ctxt->input->buf->buffer->use - 11469 (ctxt->input->cur - ctxt->input->base); 11470 } 11471 11472 if ((ctxt->instate != XML_PARSER_EOF) && 11473 (ctxt->instate != XML_PARSER_EPILOG)) { 11474 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 11475 } 11476 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) { 11477 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 11478 } 11479 if (ctxt->instate != XML_PARSER_EOF) { 11480 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11481 ctxt->sax->endDocument(ctxt->userData); 11482 } 11483 ctxt->instate = XML_PARSER_EOF; 11484 } 11485 return((xmlParserErrors) ctxt->errNo); 11486} 11487 11488/************************************************************************ 11489 * * 11490 * I/O front end functions to the parser * 11491 * * 11492 ************************************************************************/ 11493 11494/** 11495 * xmlCreatePushParserCtxt: 11496 * @sax: a SAX handler 11497 * @user_data: The user data returned on SAX callbacks 11498 * @chunk: a pointer to an array of chars 11499 * @size: number of chars in the array 11500 * @filename: an optional file name or URI 11501 * 11502 * Create a parser context for using the XML parser in push mode. 11503 * If @buffer and @size are non-NULL, the data is used to detect 11504 * the encoding. The remaining characters will be parsed so they 11505 * don't need to be fed in again through xmlParseChunk. 11506 * To allow content encoding detection, @size should be >= 4 11507 * The value of @filename is used for fetching external entities 11508 * and error/warning reports. 11509 * 11510 * Returns the new parser context or NULL 11511 */ 11512 11513xmlParserCtxtPtr 11514xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 11515 const char *chunk, int size, const char *filename) { 11516 xmlParserCtxtPtr ctxt; 11517 xmlParserInputPtr inputStream; 11518 xmlParserInputBufferPtr buf; 11519 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 11520 11521 /* 11522 * plug some encoding conversion routines 11523 */ 11524 if ((chunk != NULL) && (size >= 4)) 11525 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 11526 11527 buf = xmlAllocParserInputBuffer(enc); 11528 if (buf == NULL) return(NULL); 11529 11530 ctxt = xmlNewParserCtxt(); 11531 if (ctxt == NULL) { 11532 xmlErrMemory(NULL, "creating parser: out of memory\n"); 11533 xmlFreeParserInputBuffer(buf); 11534 return(NULL); 11535 } 11536 ctxt->dictNames = 1; 11537 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *)); 11538 if (ctxt->pushTab == NULL) { 11539 xmlErrMemory(ctxt, NULL); 11540 xmlFreeParserInputBuffer(buf); 11541 xmlFreeParserCtxt(ctxt); 11542 return(NULL); 11543 } 11544 if (sax != NULL) { 11545#ifdef LIBXML_SAX1_ENABLED 11546 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 11547#endif /* LIBXML_SAX1_ENABLED */ 11548 xmlFree(ctxt->sax); 11549 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 11550 if (ctxt->sax == NULL) { 11551 xmlErrMemory(ctxt, NULL); 11552 xmlFreeParserInputBuffer(buf); 11553 xmlFreeParserCtxt(ctxt); 11554 return(NULL); 11555 } 11556 memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 11557 if (sax->initialized == XML_SAX2_MAGIC) 11558 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 11559 else 11560 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 11561 if (user_data != NULL) 11562 ctxt->userData = user_data; 11563 } 11564 if (filename == NULL) { 11565 ctxt->directory = NULL; 11566 } else { 11567 ctxt->directory = xmlParserGetDirectory(filename); 11568 } 11569 11570 inputStream = xmlNewInputStream(ctxt); 11571 if (inputStream == NULL) { 11572 xmlFreeParserCtxt(ctxt); 11573 xmlFreeParserInputBuffer(buf); 11574 return(NULL); 11575 } 11576 11577 if (filename == NULL) 11578 inputStream->filename = NULL; 11579 else { 11580 inputStream->filename = (char *) 11581 xmlCanonicPath((const xmlChar *) filename); 11582 if (inputStream->filename == NULL) { 11583 xmlFreeParserCtxt(ctxt); 11584 xmlFreeParserInputBuffer(buf); 11585 return(NULL); 11586 } 11587 } 11588 inputStream->buf = buf; 11589 inputStream->base = inputStream->buf->buffer->content; 11590 inputStream->cur = inputStream->buf->buffer->content; 11591 inputStream->end = 11592 &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; 11593 11594 inputPush(ctxt, inputStream); 11595 11596 /* 11597 * If the caller didn't provide an initial 'chunk' for determining 11598 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so 11599 * that it can be automatically determined later 11600 */ 11601 if ((size == 0) || (chunk == NULL)) { 11602 ctxt->charset = XML_CHAR_ENCODING_NONE; 11603 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) { 11604 int base = ctxt->input->base - ctxt->input->buf->buffer->content; 11605 int cur = ctxt->input->cur - ctxt->input->base; 11606 11607 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 11608 11609 ctxt->input->base = ctxt->input->buf->buffer->content + base; 11610 ctxt->input->cur = ctxt->input->base + cur; 11611 ctxt->input->end = 11612 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; 11613#ifdef DEBUG_PUSH 11614 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 11615#endif 11616 } 11617 11618 if (enc != XML_CHAR_ENCODING_NONE) { 11619 xmlSwitchEncoding(ctxt, enc); 11620 } 11621 11622 return(ctxt); 11623} 11624#endif /* LIBXML_PUSH_ENABLED */ 11625 11626/** 11627 * xmlStopParser: 11628 * @ctxt: an XML parser context 11629 * 11630 * Blocks further parser processing 11631 */ 11632void 11633xmlStopParser(xmlParserCtxtPtr ctxt) { 11634 if (ctxt == NULL) 11635 return; 11636 ctxt->instate = XML_PARSER_EOF; 11637 ctxt->disableSAX = 1; 11638 if (ctxt->input != NULL) { 11639 ctxt->input->cur = BAD_CAST""; 11640 ctxt->input->base = ctxt->input->cur; 11641 } 11642} 11643 11644/** 11645 * xmlCreateIOParserCtxt: 11646 * @sax: a SAX handler 11647 * @user_data: The user data returned on SAX callbacks 11648 * @ioread: an I/O read function 11649 * @ioclose: an I/O close function 11650 * @ioctx: an I/O handler 11651 * @enc: the charset encoding if known 11652 * 11653 * Create a parser context for using the XML parser with an existing 11654 * I/O stream 11655 * 11656 * Returns the new parser context or NULL 11657 */ 11658xmlParserCtxtPtr 11659xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 11660 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 11661 void *ioctx, xmlCharEncoding enc) { 11662 xmlParserCtxtPtr ctxt; 11663 xmlParserInputPtr inputStream; 11664 xmlParserInputBufferPtr buf; 11665 11666 if (ioread == NULL) return(NULL); 11667 11668 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc); 11669 if (buf == NULL) return(NULL); 11670 11671 ctxt = xmlNewParserCtxt(); 11672 if (ctxt == NULL) { 11673 xmlFreeParserInputBuffer(buf); 11674 return(NULL); 11675 } 11676 if (sax != NULL) { 11677#ifdef LIBXML_SAX1_ENABLED 11678 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 11679#endif /* LIBXML_SAX1_ENABLED */ 11680 xmlFree(ctxt->sax); 11681 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 11682 if (ctxt->sax == NULL) { 11683 xmlErrMemory(ctxt, NULL); 11684 xmlFreeParserCtxt(ctxt); 11685 return(NULL); 11686 } 11687 memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 11688 if (sax->initialized == XML_SAX2_MAGIC) 11689 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 11690 else 11691 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 11692 if (user_data != NULL) 11693 ctxt->userData = user_data; 11694 } 11695 11696 inputStream = xmlNewIOInputStream(ctxt, buf, enc); 11697 if (inputStream == NULL) { 11698 xmlFreeParserCtxt(ctxt); 11699 return(NULL); 11700 } 11701 inputPush(ctxt, inputStream); 11702 11703 return(ctxt); 11704} 11705 11706#ifdef LIBXML_VALID_ENABLED 11707/************************************************************************ 11708 * * 11709 * Front ends when parsing a DTD * 11710 * * 11711 ************************************************************************/ 11712 11713/** 11714 * xmlIOParseDTD: 11715 * @sax: the SAX handler block or NULL 11716 * @input: an Input Buffer 11717 * @enc: the charset encoding if known 11718 * 11719 * Load and parse a DTD 11720 * 11721 * Returns the resulting xmlDtdPtr or NULL in case of error. 11722 * @input will be freed by the function in any case. 11723 */ 11724 11725xmlDtdPtr 11726xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, 11727 xmlCharEncoding enc) { 11728 xmlDtdPtr ret = NULL; 11729 xmlParserCtxtPtr ctxt; 11730 xmlParserInputPtr pinput = NULL; 11731 xmlChar start[4]; 11732 11733 if (input == NULL) 11734 return(NULL); 11735 11736 ctxt = xmlNewParserCtxt(); 11737 if (ctxt == NULL) { 11738 xmlFreeParserInputBuffer(input); 11739 return(NULL); 11740 } 11741 11742 /* 11743 * Set-up the SAX context 11744 */ 11745 if (sax != NULL) { 11746 if (ctxt->sax != NULL) 11747 xmlFree(ctxt->sax); 11748 ctxt->sax = sax; 11749 ctxt->userData = ctxt; 11750 } 11751 xmlDetectSAX2(ctxt); 11752 11753 /* 11754 * generate a parser input from the I/O handler 11755 */ 11756 11757 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 11758 if (pinput == NULL) { 11759 if (sax != NULL) ctxt->sax = NULL; 11760 xmlFreeParserInputBuffer(input); 11761 xmlFreeParserCtxt(ctxt); 11762 return(NULL); 11763 } 11764 11765 /* 11766 * plug some encoding conversion routines here. 11767 */ 11768 if (xmlPushInput(ctxt, pinput) < 0) { 11769 if (sax != NULL) ctxt->sax = NULL; 11770 xmlFreeParserCtxt(ctxt); 11771 return(NULL); 11772 } 11773 if (enc != XML_CHAR_ENCODING_NONE) { 11774 xmlSwitchEncoding(ctxt, enc); 11775 } 11776 11777 pinput->filename = NULL; 11778 pinput->line = 1; 11779 pinput->col = 1; 11780 pinput->base = ctxt->input->cur; 11781 pinput->cur = ctxt->input->cur; 11782 pinput->free = NULL; 11783 11784 /* 11785 * let's parse that entity knowing it's an external subset. 11786 */ 11787 ctxt->inSubset = 2; 11788 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 11789 if (ctxt->myDoc == NULL) { 11790 xmlErrMemory(ctxt, "New Doc failed"); 11791 return(NULL); 11792 } 11793 ctxt->myDoc->properties = XML_DOC_INTERNAL; 11794 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 11795 BAD_CAST "none", BAD_CAST "none"); 11796 11797 if ((enc == XML_CHAR_ENCODING_NONE) && 11798 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 11799 /* 11800 * Get the 4 first bytes and decode the charset 11801 * if enc != XML_CHAR_ENCODING_NONE 11802 * plug some encoding conversion routines. 11803 */ 11804 start[0] = RAW; 11805 start[1] = NXT(1); 11806 start[2] = NXT(2); 11807 start[3] = NXT(3); 11808 enc = xmlDetectCharEncoding(start, 4); 11809 if (enc != XML_CHAR_ENCODING_NONE) { 11810 xmlSwitchEncoding(ctxt, enc); 11811 } 11812 } 11813 11814 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none"); 11815 11816 if (ctxt->myDoc != NULL) { 11817 if (ctxt->wellFormed) { 11818 ret = ctxt->myDoc->extSubset; 11819 ctxt->myDoc->extSubset = NULL; 11820 if (ret != NULL) { 11821 xmlNodePtr tmp; 11822 11823 ret->doc = NULL; 11824 tmp = ret->children; 11825 while (tmp != NULL) { 11826 tmp->doc = NULL; 11827 tmp = tmp->next; 11828 } 11829 } 11830 } else { 11831 ret = NULL; 11832 } 11833 xmlFreeDoc(ctxt->myDoc); 11834 ctxt->myDoc = NULL; 11835 } 11836 if (sax != NULL) ctxt->sax = NULL; 11837 xmlFreeParserCtxt(ctxt); 11838 11839 return(ret); 11840} 11841 11842/** 11843 * xmlSAXParseDTD: 11844 * @sax: the SAX handler block 11845 * @ExternalID: a NAME* containing the External ID of the DTD 11846 * @SystemID: a NAME* containing the URL to the DTD 11847 * 11848 * Load and parse an external subset. 11849 * 11850 * Returns the resulting xmlDtdPtr or NULL in case of error. 11851 */ 11852 11853xmlDtdPtr 11854xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, 11855 const xmlChar *SystemID) { 11856 xmlDtdPtr ret = NULL; 11857 xmlParserCtxtPtr ctxt; 11858 xmlParserInputPtr input = NULL; 11859 xmlCharEncoding enc; 11860 xmlChar* systemIdCanonic; 11861 11862 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL); 11863 11864 ctxt = xmlNewParserCtxt(); 11865 if (ctxt == NULL) { 11866 return(NULL); 11867 } 11868 11869 /* 11870 * Set-up the SAX context 11871 */ 11872 if (sax != NULL) { 11873 if (ctxt->sax != NULL) 11874 xmlFree(ctxt->sax); 11875 ctxt->sax = sax; 11876 ctxt->userData = ctxt; 11877 } 11878 11879 /* 11880 * Canonicalise the system ID 11881 */ 11882 systemIdCanonic = xmlCanonicPath(SystemID); 11883 if ((SystemID != NULL) && (systemIdCanonic == NULL)) { 11884 xmlFreeParserCtxt(ctxt); 11885 return(NULL); 11886 } 11887 11888 /* 11889 * Ask the Entity resolver to load the damn thing 11890 */ 11891 11892 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL)) 11893 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, 11894 systemIdCanonic); 11895 if (input == NULL) { 11896 if (sax != NULL) ctxt->sax = NULL; 11897 xmlFreeParserCtxt(ctxt); 11898 if (systemIdCanonic != NULL) 11899 xmlFree(systemIdCanonic); 11900 return(NULL); 11901 } 11902 11903 /* 11904 * plug some encoding conversion routines here. 11905 */ 11906 if (xmlPushInput(ctxt, input) < 0) { 11907 if (sax != NULL) ctxt->sax = NULL; 11908 xmlFreeParserCtxt(ctxt); 11909 if (systemIdCanonic != NULL) 11910 xmlFree(systemIdCanonic); 11911 return(NULL); 11912 } 11913 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 11914 enc = xmlDetectCharEncoding(ctxt->input->cur, 4); 11915 xmlSwitchEncoding(ctxt, enc); 11916 } 11917 11918 if (input->filename == NULL) 11919 input->filename = (char *) systemIdCanonic; 11920 else 11921 xmlFree(systemIdCanonic); 11922 input->line = 1; 11923 input->col = 1; 11924 input->base = ctxt->input->cur; 11925 input->cur = ctxt->input->cur; 11926 input->free = NULL; 11927 11928 /* 11929 * let's parse that entity knowing it's an external subset. 11930 */ 11931 ctxt->inSubset = 2; 11932 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 11933 if (ctxt->myDoc == NULL) { 11934 xmlErrMemory(ctxt, "New Doc failed"); 11935 if (sax != NULL) ctxt->sax = NULL; 11936 xmlFreeParserCtxt(ctxt); 11937 return(NULL); 11938 } 11939 ctxt->myDoc->properties = XML_DOC_INTERNAL; 11940 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 11941 ExternalID, SystemID); 11942 xmlParseExternalSubset(ctxt, ExternalID, SystemID); 11943 11944 if (ctxt->myDoc != NULL) { 11945 if (ctxt->wellFormed) { 11946 ret = ctxt->myDoc->extSubset; 11947 ctxt->myDoc->extSubset = NULL; 11948 if (ret != NULL) { 11949 xmlNodePtr tmp; 11950 11951 ret->doc = NULL; 11952 tmp = ret->children; 11953 while (tmp != NULL) { 11954 tmp->doc = NULL; 11955 tmp = tmp->next; 11956 } 11957 } 11958 } else { 11959 ret = NULL; 11960 } 11961 xmlFreeDoc(ctxt->myDoc); 11962 ctxt->myDoc = NULL; 11963 } 11964 if (sax != NULL) ctxt->sax = NULL; 11965 xmlFreeParserCtxt(ctxt); 11966 11967 return(ret); 11968} 11969 11970 11971/** 11972 * xmlParseDTD: 11973 * @ExternalID: a NAME* containing the External ID of the DTD 11974 * @SystemID: a NAME* containing the URL to the DTD 11975 * 11976 * Load and parse an external subset. 11977 * 11978 * Returns the resulting xmlDtdPtr or NULL in case of error. 11979 */ 11980 11981xmlDtdPtr 11982xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) { 11983 return(xmlSAXParseDTD(NULL, ExternalID, SystemID)); 11984} 11985#endif /* LIBXML_VALID_ENABLED */ 11986 11987/************************************************************************ 11988 * * 11989 * Front ends when parsing an Entity * 11990 * * 11991 ************************************************************************/ 11992 11993/** 11994 * xmlParseCtxtExternalEntity: 11995 * @ctx: the existing parsing context 11996 * @URL: the URL for the entity to load 11997 * @ID: the System ID for the entity to load 11998 * @lst: the return value for the set of parsed nodes 11999 * 12000 * Parse an external general entity within an existing parsing context 12001 * An external general parsed entity is well-formed if it matches the 12002 * production labeled extParsedEnt. 12003 * 12004 * [78] extParsedEnt ::= TextDecl? content 12005 * 12006 * Returns 0 if the entity is well formed, -1 in case of args problem and 12007 * the parser error code otherwise 12008 */ 12009 12010int 12011xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, 12012 const xmlChar *ID, xmlNodePtr *lst) { 12013 xmlParserCtxtPtr ctxt; 12014 xmlDocPtr newDoc; 12015 xmlNodePtr newRoot; 12016 xmlSAXHandlerPtr oldsax = NULL; 12017 int ret = 0; 12018 xmlChar start[4]; 12019 xmlCharEncoding enc; 12020 xmlParserInputPtr inputStream; 12021 char *directory = NULL; 12022 12023 if (ctx == NULL) return(-1); 12024 12025 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) || 12026 (ctx->depth > 1024)) { 12027 return(XML_ERR_ENTITY_LOOP); 12028 } 12029 12030 if (lst != NULL) 12031 *lst = NULL; 12032 if ((URL == NULL) && (ID == NULL)) 12033 return(-1); 12034 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */ 12035 return(-1); 12036 12037 ctxt = xmlNewParserCtxt(); 12038 if (ctxt == NULL) { 12039 return(-1); 12040 } 12041 12042 ctxt->userData = ctxt; 12043 ctxt->_private = ctx->_private; 12044 12045 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); 12046 if (inputStream == NULL) { 12047 xmlFreeParserCtxt(ctxt); 12048 return(-1); 12049 } 12050 12051 inputPush(ctxt, inputStream); 12052 12053 if ((ctxt->directory == NULL) && (directory == NULL)) 12054 directory = xmlParserGetDirectory((char *)URL); 12055 if ((ctxt->directory == NULL) && (directory != NULL)) 12056 ctxt->directory = directory; 12057 12058 oldsax = ctxt->sax; 12059 ctxt->sax = ctx->sax; 12060 xmlDetectSAX2(ctxt); 12061 newDoc = xmlNewDoc(BAD_CAST "1.0"); 12062 if (newDoc == NULL) { 12063 xmlFreeParserCtxt(ctxt); 12064 return(-1); 12065 } 12066 newDoc->properties = XML_DOC_INTERNAL; 12067 if (ctx->myDoc->dict) { 12068 newDoc->dict = ctx->myDoc->dict; 12069 xmlDictReference(newDoc->dict); 12070 } 12071 if (ctx->myDoc != NULL) { 12072 newDoc->intSubset = ctx->myDoc->intSubset; 12073 newDoc->extSubset = ctx->myDoc->extSubset; 12074 } 12075 if (ctx->myDoc->URL != NULL) { 12076 newDoc->URL = xmlStrdup(ctx->myDoc->URL); 12077 } 12078 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 12079 if (newRoot == NULL) { 12080 ctxt->sax = oldsax; 12081 xmlFreeParserCtxt(ctxt); 12082 newDoc->intSubset = NULL; 12083 newDoc->extSubset = NULL; 12084 xmlFreeDoc(newDoc); 12085 return(-1); 12086 } 12087 xmlAddChild((xmlNodePtr) newDoc, newRoot); 12088 nodePush(ctxt, newDoc->children); 12089 if (ctx->myDoc == NULL) { 12090 ctxt->myDoc = newDoc; 12091 } else { 12092 ctxt->myDoc = ctx->myDoc; 12093 newDoc->children->doc = ctx->myDoc; 12094 } 12095 12096 /* 12097 * Get the 4 first bytes and decode the charset 12098 * if enc != XML_CHAR_ENCODING_NONE 12099 * plug some encoding conversion routines. 12100 */ 12101 GROW 12102 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 12103 start[0] = RAW; 12104 start[1] = NXT(1); 12105 start[2] = NXT(2); 12106 start[3] = NXT(3); 12107 enc = xmlDetectCharEncoding(start, 4); 12108 if (enc != XML_CHAR_ENCODING_NONE) { 12109 xmlSwitchEncoding(ctxt, enc); 12110 } 12111 } 12112 12113 /* 12114 * Parse a possible text declaration first 12115 */ 12116 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 12117 xmlParseTextDecl(ctxt); 12118 /* 12119 * An XML-1.0 document can't reference an entity not XML-1.0 12120 */ 12121 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) && 12122 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) { 12123 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH, 12124 "Version mismatch between document and entity\n"); 12125 } 12126 } 12127 12128 /* 12129 * Doing validity checking on chunk doesn't make sense 12130 */ 12131 ctxt->instate = XML_PARSER_CONTENT; 12132 ctxt->validate = ctx->validate; 12133 ctxt->valid = ctx->valid; 12134 ctxt->loadsubset = ctx->loadsubset; 12135 ctxt->depth = ctx->depth + 1; 12136 ctxt->replaceEntities = ctx->replaceEntities; 12137 if (ctxt->validate) { 12138 ctxt->vctxt.error = ctx->vctxt.error; 12139 ctxt->vctxt.warning = ctx->vctxt.warning; 12140 } else { 12141 ctxt->vctxt.error = NULL; 12142 ctxt->vctxt.warning = NULL; 12143 } 12144 ctxt->vctxt.nodeTab = NULL; 12145 ctxt->vctxt.nodeNr = 0; 12146 ctxt->vctxt.nodeMax = 0; 12147 ctxt->vctxt.node = NULL; 12148 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 12149 ctxt->dict = ctx->dict; 12150 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 12151 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 12152 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 12153 ctxt->dictNames = ctx->dictNames; 12154 ctxt->attsDefault = ctx->attsDefault; 12155 ctxt->attsSpecial = ctx->attsSpecial; 12156 ctxt->linenumbers = ctx->linenumbers; 12157 12158 xmlParseContent(ctxt); 12159 12160 ctx->validate = ctxt->validate; 12161 ctx->valid = ctxt->valid; 12162 if ((RAW == '<') && (NXT(1) == '/')) { 12163 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12164 } else if (RAW != 0) { 12165 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 12166 } 12167 if (ctxt->node != newDoc->children) { 12168 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12169 } 12170 12171 if (!ctxt->wellFormed) { 12172 if (ctxt->errNo == 0) 12173 ret = 1; 12174 else 12175 ret = ctxt->errNo; 12176 } else { 12177 if (lst != NULL) { 12178 xmlNodePtr cur; 12179 12180 /* 12181 * Return the newly created nodeset after unlinking it from 12182 * they pseudo parent. 12183 */ 12184 cur = newDoc->children->children; 12185 *lst = cur; 12186 while (cur != NULL) { 12187 cur->parent = NULL; 12188 cur = cur->next; 12189 } 12190 newDoc->children->children = NULL; 12191 } 12192 ret = 0; 12193 } 12194 ctxt->sax = oldsax; 12195 ctxt->dict = NULL; 12196 ctxt->attsDefault = NULL; 12197 ctxt->attsSpecial = NULL; 12198 xmlFreeParserCtxt(ctxt); 12199 newDoc->intSubset = NULL; 12200 newDoc->extSubset = NULL; 12201 xmlFreeDoc(newDoc); 12202 12203 return(ret); 12204} 12205 12206/** 12207 * xmlParseExternalEntityPrivate: 12208 * @doc: the document the chunk pertains to 12209 * @oldctxt: the previous parser context if available 12210 * @sax: the SAX handler bloc (possibly NULL) 12211 * @user_data: The user data returned on SAX callbacks (possibly NULL) 12212 * @depth: Used for loop detection, use 0 12213 * @URL: the URL for the entity to load 12214 * @ID: the System ID for the entity to load 12215 * @list: the return value for the set of parsed nodes 12216 * 12217 * Private version of xmlParseExternalEntity() 12218 * 12219 * Returns 0 if the entity is well formed, -1 in case of args problem and 12220 * the parser error code otherwise 12221 */ 12222 12223static xmlParserErrors 12224xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 12225 xmlSAXHandlerPtr sax, 12226 void *user_data, int depth, const xmlChar *URL, 12227 const xmlChar *ID, xmlNodePtr *list) { 12228 xmlParserCtxtPtr ctxt; 12229 xmlDocPtr newDoc; 12230 xmlNodePtr newRoot; 12231 xmlSAXHandlerPtr oldsax = NULL; 12232 xmlParserErrors ret = XML_ERR_OK; 12233 xmlChar start[4]; 12234 xmlCharEncoding enc; 12235 12236 if (((depth > 40) && 12237 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) || 12238 (depth > 1024)) { 12239 return(XML_ERR_ENTITY_LOOP); 12240 } 12241 12242 if (list != NULL) 12243 *list = NULL; 12244 if ((URL == NULL) && (ID == NULL)) 12245 return(XML_ERR_INTERNAL_ERROR); 12246 if (doc == NULL) 12247 return(XML_ERR_INTERNAL_ERROR); 12248 12249 12250 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL); 12251 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 12252 ctxt->userData = ctxt; 12253 if (oldctxt != NULL) { 12254 ctxt->_private = oldctxt->_private; 12255 ctxt->loadsubset = oldctxt->loadsubset; 12256 ctxt->validate = oldctxt->validate; 12257 ctxt->external = oldctxt->external; 12258 ctxt->record_info = oldctxt->record_info; 12259 ctxt->node_seq.maximum = oldctxt->node_seq.maximum; 12260 ctxt->node_seq.length = oldctxt->node_seq.length; 12261 ctxt->node_seq.buffer = oldctxt->node_seq.buffer; 12262 } else { 12263 /* 12264 * Doing validity checking on chunk without context 12265 * doesn't make sense 12266 */ 12267 ctxt->_private = NULL; 12268 ctxt->validate = 0; 12269 ctxt->external = 2; 12270 ctxt->loadsubset = 0; 12271 } 12272 if (sax != NULL) { 12273 oldsax = ctxt->sax; 12274 ctxt->sax = sax; 12275 if (user_data != NULL) 12276 ctxt->userData = user_data; 12277 } 12278 xmlDetectSAX2(ctxt); 12279 newDoc = xmlNewDoc(BAD_CAST "1.0"); 12280 if (newDoc == NULL) { 12281 ctxt->node_seq.maximum = 0; 12282 ctxt->node_seq.length = 0; 12283 ctxt->node_seq.buffer = NULL; 12284 xmlFreeParserCtxt(ctxt); 12285 return(XML_ERR_INTERNAL_ERROR); 12286 } 12287 newDoc->properties = XML_DOC_INTERNAL; 12288 newDoc->intSubset = doc->intSubset; 12289 newDoc->extSubset = doc->extSubset; 12290 newDoc->dict = doc->dict; 12291 xmlDictReference(newDoc->dict); 12292 12293 if (doc->URL != NULL) { 12294 newDoc->URL = xmlStrdup(doc->URL); 12295 } 12296 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 12297 if (newRoot == NULL) { 12298 if (sax != NULL) 12299 ctxt->sax = oldsax; 12300 ctxt->node_seq.maximum = 0; 12301 ctxt->node_seq.length = 0; 12302 ctxt->node_seq.buffer = NULL; 12303 xmlFreeParserCtxt(ctxt); 12304 newDoc->intSubset = NULL; 12305 newDoc->extSubset = NULL; 12306 xmlFreeDoc(newDoc); 12307 return(XML_ERR_INTERNAL_ERROR); 12308 } 12309 xmlAddChild((xmlNodePtr) newDoc, newRoot); 12310 nodePush(ctxt, newDoc->children); 12311 ctxt->myDoc = doc; 12312 newRoot->doc = doc; 12313 12314 /* 12315 * Get the 4 first bytes and decode the charset 12316 * if enc != XML_CHAR_ENCODING_NONE 12317 * plug some encoding conversion routines. 12318 */ 12319 GROW; 12320 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 12321 start[0] = RAW; 12322 start[1] = NXT(1); 12323 start[2] = NXT(2); 12324 start[3] = NXT(3); 12325 enc = xmlDetectCharEncoding(start, 4); 12326 if (enc != XML_CHAR_ENCODING_NONE) { 12327 xmlSwitchEncoding(ctxt, enc); 12328 } 12329 } 12330 12331 /* 12332 * Parse a possible text declaration first 12333 */ 12334 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 12335 xmlParseTextDecl(ctxt); 12336 } 12337 12338 ctxt->instate = XML_PARSER_CONTENT; 12339 ctxt->depth = depth; 12340 12341 xmlParseContent(ctxt); 12342 12343 if ((RAW == '<') && (NXT(1) == '/')) { 12344 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12345 } else if (RAW != 0) { 12346 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 12347 } 12348 if (ctxt->node != newDoc->children) { 12349 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12350 } 12351 12352 if (!ctxt->wellFormed) { 12353 if (ctxt->errNo == 0) 12354 ret = XML_ERR_INTERNAL_ERROR; 12355 else 12356 ret = (xmlParserErrors)ctxt->errNo; 12357 } else { 12358 if (list != NULL) { 12359 xmlNodePtr cur; 12360 12361 /* 12362 * Return the newly created nodeset after unlinking it from 12363 * they pseudo parent. 12364 */ 12365 cur = newDoc->children->children; 12366 *list = cur; 12367 while (cur != NULL) { 12368 cur->parent = NULL; 12369 cur = cur->next; 12370 } 12371 newDoc->children->children = NULL; 12372 } 12373 ret = XML_ERR_OK; 12374 } 12375 12376 /* 12377 * Record in the parent context the number of entities replacement 12378 * done when parsing that reference. 12379 */ 12380 oldctxt->nbentities += ctxt->nbentities; 12381 /* 12382 * Also record the size of the entity parsed 12383 */ 12384 if (ctxt->input != NULL) { 12385 oldctxt->sizeentities += ctxt->input->consumed; 12386 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base); 12387 } 12388 /* 12389 * And record the last error if any 12390 */ 12391 if (ctxt->lastError.code != XML_ERR_OK) 12392 xmlCopyError(&ctxt->lastError, &oldctxt->lastError); 12393 12394 if (sax != NULL) 12395 ctxt->sax = oldsax; 12396 oldctxt->node_seq.maximum = ctxt->node_seq.maximum; 12397 oldctxt->node_seq.length = ctxt->node_seq.length; 12398 oldctxt->node_seq.buffer = ctxt->node_seq.buffer; 12399 ctxt->node_seq.maximum = 0; 12400 ctxt->node_seq.length = 0; 12401 ctxt->node_seq.buffer = NULL; 12402 xmlFreeParserCtxt(ctxt); 12403 newDoc->intSubset = NULL; 12404 newDoc->extSubset = NULL; 12405 xmlFreeDoc(newDoc); 12406 12407 return(ret); 12408} 12409 12410#ifdef LIBXML_SAX1_ENABLED 12411/** 12412 * xmlParseExternalEntity: 12413 * @doc: the document the chunk pertains to 12414 * @sax: the SAX handler bloc (possibly NULL) 12415 * @user_data: The user data returned on SAX callbacks (possibly NULL) 12416 * @depth: Used for loop detection, use 0 12417 * @URL: the URL for the entity to load 12418 * @ID: the System ID for the entity to load 12419 * @lst: the return value for the set of parsed nodes 12420 * 12421 * Parse an external general entity 12422 * An external general parsed entity is well-formed if it matches the 12423 * production labeled extParsedEnt. 12424 * 12425 * [78] extParsedEnt ::= TextDecl? content 12426 * 12427 * Returns 0 if the entity is well formed, -1 in case of args problem and 12428 * the parser error code otherwise 12429 */ 12430 12431int 12432xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, 12433 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) { 12434 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL, 12435 ID, lst)); 12436} 12437 12438/** 12439 * xmlParseBalancedChunkMemory: 12440 * @doc: the document the chunk pertains to 12441 * @sax: the SAX handler bloc (possibly NULL) 12442 * @user_data: The user data returned on SAX callbacks (possibly NULL) 12443 * @depth: Used for loop detection, use 0 12444 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 12445 * @lst: the return value for the set of parsed nodes 12446 * 12447 * Parse a well-balanced chunk of an XML document 12448 * called by the parser 12449 * The allowed sequence for the Well Balanced Chunk is the one defined by 12450 * the content production in the XML grammar: 12451 * 12452 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 12453 * 12454 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 12455 * the parser error code otherwise 12456 */ 12457 12458int 12459xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax, 12460 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) { 12461 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data, 12462 depth, string, lst, 0 ); 12463} 12464#endif /* LIBXML_SAX1_ENABLED */ 12465 12466/** 12467 * xmlParseBalancedChunkMemoryInternal: 12468 * @oldctxt: the existing parsing context 12469 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 12470 * @user_data: the user data field for the parser context 12471 * @lst: the return value for the set of parsed nodes 12472 * 12473 * 12474 * Parse a well-balanced chunk of an XML document 12475 * called by the parser 12476 * The allowed sequence for the Well Balanced Chunk is the one defined by 12477 * the content production in the XML grammar: 12478 * 12479 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 12480 * 12481 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 12482 * error code otherwise 12483 * 12484 * In case recover is set to 1, the nodelist will not be empty even if 12485 * the parsed chunk is not well balanced. 12486 */ 12487static xmlParserErrors 12488xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 12489 const xmlChar *string, void *user_data, xmlNodePtr *lst) { 12490 xmlParserCtxtPtr ctxt; 12491 xmlDocPtr newDoc = NULL; 12492 xmlNodePtr newRoot; 12493 xmlSAXHandlerPtr oldsax = NULL; 12494 xmlNodePtr content = NULL; 12495 xmlNodePtr last = NULL; 12496 int size; 12497 xmlParserErrors ret = XML_ERR_OK; 12498 12499 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) || 12500 (oldctxt->depth > 1024)) { 12501 return(XML_ERR_ENTITY_LOOP); 12502 } 12503 12504 12505 if (lst != NULL) 12506 *lst = NULL; 12507 if (string == NULL) 12508 return(XML_ERR_INTERNAL_ERROR); 12509 12510 size = xmlStrlen(string); 12511 12512 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 12513 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 12514 if (user_data != NULL) 12515 ctxt->userData = user_data; 12516 else 12517 ctxt->userData = ctxt; 12518 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 12519 ctxt->dict = oldctxt->dict; 12520 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 12521 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 12522 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 12523 12524 oldsax = ctxt->sax; 12525 ctxt->sax = oldctxt->sax; 12526 xmlDetectSAX2(ctxt); 12527 ctxt->replaceEntities = oldctxt->replaceEntities; 12528 ctxt->options = oldctxt->options; 12529 12530 ctxt->_private = oldctxt->_private; 12531 if (oldctxt->myDoc == NULL) { 12532 newDoc = xmlNewDoc(BAD_CAST "1.0"); 12533 if (newDoc == NULL) { 12534 ctxt->sax = oldsax; 12535 ctxt->dict = NULL; 12536 xmlFreeParserCtxt(ctxt); 12537 return(XML_ERR_INTERNAL_ERROR); 12538 } 12539 newDoc->properties = XML_DOC_INTERNAL; 12540 newDoc->dict = ctxt->dict; 12541 xmlDictReference(newDoc->dict); 12542 ctxt->myDoc = newDoc; 12543 } else { 12544 ctxt->myDoc = oldctxt->myDoc; 12545 content = ctxt->myDoc->children; 12546 last = ctxt->myDoc->last; 12547 } 12548 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL); 12549 if (newRoot == NULL) { 12550 ctxt->sax = oldsax; 12551 ctxt->dict = NULL; 12552 xmlFreeParserCtxt(ctxt); 12553 if (newDoc != NULL) { 12554 xmlFreeDoc(newDoc); 12555 } 12556 return(XML_ERR_INTERNAL_ERROR); 12557 } 12558 ctxt->myDoc->children = NULL; 12559 ctxt->myDoc->last = NULL; 12560 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot); 12561 nodePush(ctxt, ctxt->myDoc->children); 12562 ctxt->instate = XML_PARSER_CONTENT; 12563 ctxt->depth = oldctxt->depth + 1; 12564 12565 ctxt->validate = 0; 12566 ctxt->loadsubset = oldctxt->loadsubset; 12567 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) { 12568 /* 12569 * ID/IDREF registration will be done in xmlValidateElement below 12570 */ 12571 ctxt->loadsubset |= XML_SKIP_IDS; 12572 } 12573 ctxt->dictNames = oldctxt->dictNames; 12574 ctxt->attsDefault = oldctxt->attsDefault; 12575 ctxt->attsSpecial = oldctxt->attsSpecial; 12576 12577 xmlParseContent(ctxt); 12578 if ((RAW == '<') && (NXT(1) == '/')) { 12579 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12580 } else if (RAW != 0) { 12581 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 12582 } 12583 if (ctxt->node != ctxt->myDoc->children) { 12584 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12585 } 12586 12587 if (!ctxt->wellFormed) { 12588 if (ctxt->errNo == 0) 12589 ret = XML_ERR_INTERNAL_ERROR; 12590 else 12591 ret = (xmlParserErrors)ctxt->errNo; 12592 } else { 12593 ret = XML_ERR_OK; 12594 } 12595 12596 if ((lst != NULL) && (ret == XML_ERR_OK)) { 12597 xmlNodePtr cur; 12598 12599 /* 12600 * Return the newly created nodeset after unlinking it from 12601 * they pseudo parent. 12602 */ 12603 cur = ctxt->myDoc->children->children; 12604 *lst = cur; 12605 while (cur != NULL) { 12606#ifdef LIBXML_VALID_ENABLED 12607 if ((oldctxt->validate) && (oldctxt->wellFormed) && 12608 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) && 12609 (cur->type == XML_ELEMENT_NODE)) { 12610 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt, 12611 oldctxt->myDoc, cur); 12612 } 12613#endif /* LIBXML_VALID_ENABLED */ 12614 cur->parent = NULL; 12615 cur = cur->next; 12616 } 12617 ctxt->myDoc->children->children = NULL; 12618 } 12619 if (ctxt->myDoc != NULL) { 12620 xmlFreeNode(ctxt->myDoc->children); 12621 ctxt->myDoc->children = content; 12622 ctxt->myDoc->last = last; 12623 } 12624 12625 /* 12626 * Record in the parent context the number of entities replacement 12627 * done when parsing that reference. 12628 */ 12629 oldctxt->nbentities += ctxt->nbentities; 12630 /* 12631 * Also record the last error if any 12632 */ 12633 if (ctxt->lastError.code != XML_ERR_OK) 12634 xmlCopyError(&ctxt->lastError, &oldctxt->lastError); 12635 12636 ctxt->sax = oldsax; 12637 ctxt->dict = NULL; 12638 ctxt->attsDefault = NULL; 12639 ctxt->attsSpecial = NULL; 12640 xmlFreeParserCtxt(ctxt); 12641 if (newDoc != NULL) { 12642 xmlFreeDoc(newDoc); 12643 } 12644 12645 return(ret); 12646} 12647 12648/** 12649 * xmlParseInNodeContext: 12650 * @node: the context node 12651 * @data: the input string 12652 * @datalen: the input string length in bytes 12653 * @options: a combination of xmlParserOption 12654 * @lst: the return value for the set of parsed nodes 12655 * 12656 * Parse a well-balanced chunk of an XML document 12657 * within the context (DTD, namespaces, etc ...) of the given node. 12658 * 12659 * The allowed sequence for the data is a Well Balanced Chunk defined by 12660 * the content production in the XML grammar: 12661 * 12662 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 12663 * 12664 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 12665 * error code otherwise 12666 */ 12667xmlParserErrors 12668xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, 12669 int options, xmlNodePtr *lst) { 12670#ifdef SAX2 12671 xmlParserCtxtPtr ctxt; 12672 xmlDocPtr doc = NULL; 12673 xmlNodePtr fake, cur; 12674 int nsnr = 0; 12675 12676 xmlParserErrors ret = XML_ERR_OK; 12677 12678 /* 12679 * check all input parameters, grab the document 12680 */ 12681 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0)) 12682 return(XML_ERR_INTERNAL_ERROR); 12683 switch (node->type) { 12684 case XML_ELEMENT_NODE: 12685 case XML_ATTRIBUTE_NODE: 12686 case XML_TEXT_NODE: 12687 case XML_CDATA_SECTION_NODE: 12688 case XML_ENTITY_REF_NODE: 12689 case XML_PI_NODE: 12690 case XML_COMMENT_NODE: 12691 case XML_DOCUMENT_NODE: 12692 case XML_HTML_DOCUMENT_NODE: 12693 break; 12694 default: 12695 return(XML_ERR_INTERNAL_ERROR); 12696 12697 } 12698 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) && 12699 (node->type != XML_DOCUMENT_NODE) && 12700 (node->type != XML_HTML_DOCUMENT_NODE)) 12701 node = node->parent; 12702 if (node == NULL) 12703 return(XML_ERR_INTERNAL_ERROR); 12704 if (node->type == XML_ELEMENT_NODE) 12705 doc = node->doc; 12706 else 12707 doc = (xmlDocPtr) node; 12708 if (doc == NULL) 12709 return(XML_ERR_INTERNAL_ERROR); 12710 12711 /* 12712 * allocate a context and set-up everything not related to the 12713 * node position in the tree 12714 */ 12715 if (doc->type == XML_DOCUMENT_NODE) 12716 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen); 12717#ifdef LIBXML_HTML_ENABLED 12718 else if (doc->type == XML_HTML_DOCUMENT_NODE) 12719 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen); 12720#endif 12721 else 12722 return(XML_ERR_INTERNAL_ERROR); 12723 12724 if (ctxt == NULL) 12725 return(XML_ERR_NO_MEMORY); 12726 fake = xmlNewComment(NULL); 12727 if (fake == NULL) { 12728 xmlFreeParserCtxt(ctxt); 12729 return(XML_ERR_NO_MEMORY); 12730 } 12731 xmlAddChild(node, fake); 12732 12733 /* 12734 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set. 12735 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict 12736 * we must wait until the last moment to free the original one. 12737 */ 12738 if (doc->dict != NULL) { 12739 if (ctxt->dict != NULL) 12740 xmlDictFree(ctxt->dict); 12741 ctxt->dict = doc->dict; 12742 } else 12743 options |= XML_PARSE_NODICT; 12744 12745 xmlCtxtUseOptionsInternal(ctxt, options, NULL); 12746 xmlDetectSAX2(ctxt); 12747 ctxt->myDoc = doc; 12748 12749 if (node->type == XML_ELEMENT_NODE) { 12750 nodePush(ctxt, node); 12751 /* 12752 * initialize the SAX2 namespaces stack 12753 */ 12754 cur = node; 12755 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) { 12756 xmlNsPtr ns = cur->nsDef; 12757 const xmlChar *iprefix, *ihref; 12758 12759 while (ns != NULL) { 12760 if (ctxt->dict) { 12761 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1); 12762 ihref = xmlDictLookup(ctxt->dict, ns->href, -1); 12763 } else { 12764 iprefix = ns->prefix; 12765 ihref = ns->href; 12766 } 12767 12768 if (xmlGetNamespace(ctxt, iprefix) == NULL) { 12769 nsPush(ctxt, iprefix, ihref); 12770 nsnr++; 12771 } 12772 ns = ns->next; 12773 } 12774 cur = cur->parent; 12775 } 12776 ctxt->instate = XML_PARSER_CONTENT; 12777 } 12778 12779 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) { 12780 /* 12781 * ID/IDREF registration will be done in xmlValidateElement below 12782 */ 12783 ctxt->loadsubset |= XML_SKIP_IDS; 12784 } 12785 12786#ifdef LIBXML_HTML_ENABLED 12787 if (doc->type == XML_HTML_DOCUMENT_NODE) 12788 __htmlParseContent(ctxt); 12789 else 12790#endif 12791 xmlParseContent(ctxt); 12792 12793 nsPop(ctxt, nsnr); 12794 if ((RAW == '<') && (NXT(1) == '/')) { 12795 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12796 } else if (RAW != 0) { 12797 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 12798 } 12799 if ((ctxt->node != NULL) && (ctxt->node != node)) { 12800 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12801 ctxt->wellFormed = 0; 12802 } 12803 12804 if (!ctxt->wellFormed) { 12805 if (ctxt->errNo == 0) 12806 ret = XML_ERR_INTERNAL_ERROR; 12807 else 12808 ret = (xmlParserErrors)ctxt->errNo; 12809 } else { 12810 ret = XML_ERR_OK; 12811 } 12812 12813 /* 12814 * Return the newly created nodeset after unlinking it from 12815 * the pseudo sibling. 12816 */ 12817 12818 cur = fake->next; 12819 fake->next = NULL; 12820 node->last = fake; 12821 12822 if (cur != NULL) { 12823 cur->prev = NULL; 12824 } 12825 12826 *lst = cur; 12827 12828 while (cur != NULL) { 12829 cur->parent = NULL; 12830 cur = cur->next; 12831 } 12832 12833 xmlUnlinkNode(fake); 12834 xmlFreeNode(fake); 12835 12836 12837 if (ret != XML_ERR_OK) { 12838 xmlFreeNodeList(*lst); 12839 *lst = NULL; 12840 } 12841 12842 if (doc->dict != NULL) 12843 ctxt->dict = NULL; 12844 xmlFreeParserCtxt(ctxt); 12845 12846 return(ret); 12847#else /* !SAX2 */ 12848 return(XML_ERR_INTERNAL_ERROR); 12849#endif 12850} 12851 12852#ifdef LIBXML_SAX1_ENABLED 12853/** 12854 * xmlParseBalancedChunkMemoryRecover: 12855 * @doc: the document the chunk pertains to 12856 * @sax: the SAX handler bloc (possibly NULL) 12857 * @user_data: The user data returned on SAX callbacks (possibly NULL) 12858 * @depth: Used for loop detection, use 0 12859 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 12860 * @lst: the return value for the set of parsed nodes 12861 * @recover: return nodes even if the data is broken (use 0) 12862 * 12863 * 12864 * Parse a well-balanced chunk of an XML document 12865 * called by the parser 12866 * The allowed sequence for the Well Balanced Chunk is the one defined by 12867 * the content production in the XML grammar: 12868 * 12869 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 12870 * 12871 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 12872 * the parser error code otherwise 12873 * 12874 * In case recover is set to 1, the nodelist will not be empty even if 12875 * the parsed chunk is not well balanced, assuming the parsing succeeded to 12876 * some extent. 12877 */ 12878int 12879xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, 12880 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst, 12881 int recover) { 12882 xmlParserCtxtPtr ctxt; 12883 xmlDocPtr newDoc; 12884 xmlSAXHandlerPtr oldsax = NULL; 12885 xmlNodePtr content, newRoot; 12886 int size; 12887 int ret = 0; 12888 12889 if (depth > 40) { 12890 return(XML_ERR_ENTITY_LOOP); 12891 } 12892 12893 12894 if (lst != NULL) 12895 *lst = NULL; 12896 if (string == NULL) 12897 return(-1); 12898 12899 size = xmlStrlen(string); 12900 12901 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 12902 if (ctxt == NULL) return(-1); 12903 ctxt->userData = ctxt; 12904 if (sax != NULL) { 12905 oldsax = ctxt->sax; 12906 ctxt->sax = sax; 12907 if (user_data != NULL) 12908 ctxt->userData = user_data; 12909 } 12910 newDoc = xmlNewDoc(BAD_CAST "1.0"); 12911 if (newDoc == NULL) { 12912 xmlFreeParserCtxt(ctxt); 12913 return(-1); 12914 } 12915 newDoc->properties = XML_DOC_INTERNAL; 12916 if ((doc != NULL) && (doc->dict != NULL)) { 12917 xmlDictFree(ctxt->dict); 12918 ctxt->dict = doc->dict; 12919 xmlDictReference(ctxt->dict); 12920 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 12921 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 12922 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 12923 ctxt->dictNames = 1; 12924 } else { 12925 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL); 12926 } 12927 if (doc != NULL) { 12928 newDoc->intSubset = doc->intSubset; 12929 newDoc->extSubset = doc->extSubset; 12930 } 12931 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 12932 if (newRoot == NULL) { 12933 if (sax != NULL) 12934 ctxt->sax = oldsax; 12935 xmlFreeParserCtxt(ctxt); 12936 newDoc->intSubset = NULL; 12937 newDoc->extSubset = NULL; 12938 xmlFreeDoc(newDoc); 12939 return(-1); 12940 } 12941 xmlAddChild((xmlNodePtr) newDoc, newRoot); 12942 nodePush(ctxt, newRoot); 12943 if (doc == NULL) { 12944 ctxt->myDoc = newDoc; 12945 } else { 12946 ctxt->myDoc = newDoc; 12947 newDoc->children->doc = doc; 12948 /* Ensure that doc has XML spec namespace */ 12949 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE); 12950 newDoc->oldNs = doc->oldNs; 12951 } 12952 ctxt->instate = XML_PARSER_CONTENT; 12953 ctxt->depth = depth; 12954 12955 /* 12956 * Doing validity checking on chunk doesn't make sense 12957 */ 12958 ctxt->validate = 0; 12959 ctxt->loadsubset = 0; 12960 xmlDetectSAX2(ctxt); 12961 12962 if ( doc != NULL ){ 12963 content = doc->children; 12964 doc->children = NULL; 12965 xmlParseContent(ctxt); 12966 doc->children = content; 12967 } 12968 else { 12969 xmlParseContent(ctxt); 12970 } 12971 if ((RAW == '<') && (NXT(1) == '/')) { 12972 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12973 } else if (RAW != 0) { 12974 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 12975 } 12976 if (ctxt->node != newDoc->children) { 12977 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12978 } 12979 12980 if (!ctxt->wellFormed) { 12981 if (ctxt->errNo == 0) 12982 ret = 1; 12983 else 12984 ret = ctxt->errNo; 12985 } else { 12986 ret = 0; 12987 } 12988 12989 if ((lst != NULL) && ((ret == 0) || (recover == 1))) { 12990 xmlNodePtr cur; 12991 12992 /* 12993 * Return the newly created nodeset after unlinking it from 12994 * they pseudo parent. 12995 */ 12996 cur = newDoc->children->children; 12997 *lst = cur; 12998 while (cur != NULL) { 12999 xmlSetTreeDoc(cur, doc); 13000 cur->parent = NULL; 13001 cur = cur->next; 13002 } 13003 newDoc->children->children = NULL; 13004 } 13005 13006 if (sax != NULL) 13007 ctxt->sax = oldsax; 13008 xmlFreeParserCtxt(ctxt); 13009 newDoc->intSubset = NULL; 13010 newDoc->extSubset = NULL; 13011 newDoc->oldNs = NULL; 13012 xmlFreeDoc(newDoc); 13013 13014 return(ret); 13015} 13016 13017/** 13018 * xmlSAXParseEntity: 13019 * @sax: the SAX handler block 13020 * @filename: the filename 13021 * 13022 * parse an XML external entity out of context and build a tree. 13023 * It use the given SAX function block to handle the parsing callback. 13024 * If sax is NULL, fallback to the default DOM tree building routines. 13025 * 13026 * [78] extParsedEnt ::= TextDecl? content 13027 * 13028 * This correspond to a "Well Balanced" chunk 13029 * 13030 * Returns the resulting document tree 13031 */ 13032 13033xmlDocPtr 13034xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) { 13035 xmlDocPtr ret; 13036 xmlParserCtxtPtr ctxt; 13037 13038 ctxt = xmlCreateFileParserCtxt(filename); 13039 if (ctxt == NULL) { 13040 return(NULL); 13041 } 13042 if (sax != NULL) { 13043 if (ctxt->sax != NULL) 13044 xmlFree(ctxt->sax); 13045 ctxt->sax = sax; 13046 ctxt->userData = NULL; 13047 } 13048 13049 xmlParseExtParsedEnt(ctxt); 13050 13051 if (ctxt->wellFormed) 13052 ret = ctxt->myDoc; 13053 else { 13054 ret = NULL; 13055 xmlFreeDoc(ctxt->myDoc); 13056 ctxt->myDoc = NULL; 13057 } 13058 if (sax != NULL) 13059 ctxt->sax = NULL; 13060 xmlFreeParserCtxt(ctxt); 13061 13062 return(ret); 13063} 13064 13065/** 13066 * xmlParseEntity: 13067 * @filename: the filename 13068 * 13069 * parse an XML external entity out of context and build a tree. 13070 * 13071 * [78] extParsedEnt ::= TextDecl? content 13072 * 13073 * This correspond to a "Well Balanced" chunk 13074 * 13075 * Returns the resulting document tree 13076 */ 13077 13078xmlDocPtr 13079xmlParseEntity(const char *filename) { 13080 return(xmlSAXParseEntity(NULL, filename)); 13081} 13082#endif /* LIBXML_SAX1_ENABLED */ 13083 13084/** 13085 * xmlCreateEntityParserCtxt: 13086 * @URL: the entity URL 13087 * @ID: the entity PUBLIC ID 13088 * @base: a possible base for the target URI 13089 * 13090 * Create a parser context for an external entity 13091 * Automatic support for ZLIB/Compress compressed document is provided 13092 * by default if found at compile-time. 13093 * 13094 * Returns the new parser context or NULL 13095 */ 13096xmlParserCtxtPtr 13097xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, 13098 const xmlChar *base) { 13099 xmlParserCtxtPtr ctxt; 13100 xmlParserInputPtr inputStream; 13101 char *directory = NULL; 13102 xmlChar *uri; 13103 13104 ctxt = xmlNewParserCtxt(); 13105 if (ctxt == NULL) { 13106 return(NULL); 13107 } 13108 13109 uri = xmlBuildURI(URL, base); 13110 13111 if (uri == NULL) { 13112 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); 13113 if (inputStream == NULL) { 13114 xmlFreeParserCtxt(ctxt); 13115 return(NULL); 13116 } 13117 13118 inputPush(ctxt, inputStream); 13119 13120 if ((ctxt->directory == NULL) && (directory == NULL)) 13121 directory = xmlParserGetDirectory((char *)URL); 13122 if ((ctxt->directory == NULL) && (directory != NULL)) 13123 ctxt->directory = directory; 13124 } else { 13125 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt); 13126 if (inputStream == NULL) { 13127 xmlFree(uri); 13128 xmlFreeParserCtxt(ctxt); 13129 return(NULL); 13130 } 13131 13132 inputPush(ctxt, inputStream); 13133 13134 if ((ctxt->directory == NULL) && (directory == NULL)) 13135 directory = xmlParserGetDirectory((char *)uri); 13136 if ((ctxt->directory == NULL) && (directory != NULL)) 13137 ctxt->directory = directory; 13138 xmlFree(uri); 13139 } 13140 return(ctxt); 13141} 13142 13143/************************************************************************ 13144 * * 13145 * Front ends when parsing from a file * 13146 * * 13147 ************************************************************************/ 13148 13149/** 13150 * xmlCreateURLParserCtxt: 13151 * @filename: the filename or URL 13152 * @options: a combination of xmlParserOption 13153 * 13154 * Create a parser context for a file or URL content. 13155 * Automatic support for ZLIB/Compress compressed document is provided 13156 * by default if found at compile-time and for file accesses 13157 * 13158 * Returns the new parser context or NULL 13159 */ 13160xmlParserCtxtPtr 13161xmlCreateURLParserCtxt(const char *filename, int options) 13162{ 13163 xmlParserCtxtPtr ctxt; 13164 xmlParserInputPtr inputStream; 13165 char *directory = NULL; 13166 13167 ctxt = xmlNewParserCtxt(); 13168 if (ctxt == NULL) { 13169 xmlErrMemory(NULL, "cannot allocate parser context"); 13170 return(NULL); 13171 } 13172 13173 if (options) 13174 xmlCtxtUseOptionsInternal(ctxt, options, NULL); 13175 ctxt->linenumbers = 1; 13176 13177 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt); 13178 if (inputStream == NULL) { 13179 xmlFreeParserCtxt(ctxt); 13180 return(NULL); 13181 } 13182 13183 inputPush(ctxt, inputStream); 13184 if ((ctxt->directory == NULL) && (directory == NULL)) 13185 directory = xmlParserGetDirectory(filename); 13186 if ((ctxt->directory == NULL) && (directory != NULL)) 13187 ctxt->directory = directory; 13188 13189 return(ctxt); 13190} 13191 13192/** 13193 * xmlCreateFileParserCtxt: 13194 * @filename: the filename 13195 * 13196 * Create a parser context for a file content. 13197 * Automatic support for ZLIB/Compress compressed document is provided 13198 * by default if found at compile-time. 13199 * 13200 * Returns the new parser context or NULL 13201 */ 13202xmlParserCtxtPtr 13203xmlCreateFileParserCtxt(const char *filename) 13204{ 13205 return(xmlCreateURLParserCtxt(filename, 0)); 13206} 13207 13208#ifdef LIBXML_SAX1_ENABLED 13209/** 13210 * xmlSAXParseFileWithData: 13211 * @sax: the SAX handler block 13212 * @filename: the filename 13213 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 13214 * documents 13215 * @data: the userdata 13216 * 13217 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 13218 * compressed document is provided by default if found at compile-time. 13219 * It use the given SAX function block to handle the parsing callback. 13220 * If sax is NULL, fallback to the default DOM tree building routines. 13221 * 13222 * User data (void *) is stored within the parser context in the 13223 * context's _private member, so it is available nearly everywhere in libxml 13224 * 13225 * Returns the resulting document tree 13226 */ 13227 13228xmlDocPtr 13229xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename, 13230 int recovery, void *data) { 13231 xmlDocPtr ret; 13232 xmlParserCtxtPtr ctxt; 13233 13234 xmlInitParser(); 13235 13236 ctxt = xmlCreateFileParserCtxt(filename); 13237 if (ctxt == NULL) { 13238 return(NULL); 13239 } 13240 if (sax != NULL) { 13241 if (ctxt->sax != NULL) 13242 xmlFree(ctxt->sax); 13243 ctxt->sax = sax; 13244 } 13245 xmlDetectSAX2(ctxt); 13246 if (data!=NULL) { 13247 ctxt->_private = data; 13248 } 13249 13250 if (ctxt->directory == NULL) 13251 ctxt->directory = xmlParserGetDirectory(filename); 13252 13253 ctxt->recovery = recovery; 13254 13255 xmlParseDocument(ctxt); 13256 13257 if ((ctxt->wellFormed) || recovery) { 13258 ret = ctxt->myDoc; 13259 if (ret != NULL) { 13260 if (ctxt->input->buf->compressed > 0) 13261 ret->compression = 9; 13262 else 13263 ret->compression = ctxt->input->buf->compressed; 13264 } 13265 } 13266 else { 13267 ret = NULL; 13268 xmlFreeDoc(ctxt->myDoc); 13269 ctxt->myDoc = NULL; 13270 } 13271 if (sax != NULL) 13272 ctxt->sax = NULL; 13273 xmlFreeParserCtxt(ctxt); 13274 13275 return(ret); 13276} 13277 13278/** 13279 * xmlSAXParseFile: 13280 * @sax: the SAX handler block 13281 * @filename: the filename 13282 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 13283 * documents 13284 * 13285 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 13286 * compressed document is provided by default if found at compile-time. 13287 * It use the given SAX function block to handle the parsing callback. 13288 * If sax is NULL, fallback to the default DOM tree building routines. 13289 * 13290 * Returns the resulting document tree 13291 */ 13292 13293xmlDocPtr 13294xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename, 13295 int recovery) { 13296 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL)); 13297} 13298 13299/** 13300 * xmlRecoverDoc: 13301 * @cur: a pointer to an array of xmlChar 13302 * 13303 * parse an XML in-memory document and build a tree. 13304 * In the case the document is not Well Formed, a attempt to build a 13305 * tree is tried anyway 13306 * 13307 * Returns the resulting document tree or NULL in case of failure 13308 */ 13309 13310xmlDocPtr 13311xmlRecoverDoc(xmlChar *cur) { 13312 return(xmlSAXParseDoc(NULL, cur, 1)); 13313} 13314 13315/** 13316 * xmlParseFile: 13317 * @filename: the filename 13318 * 13319 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 13320 * compressed document is provided by default if found at compile-time. 13321 * 13322 * Returns the resulting document tree if the file was wellformed, 13323 * NULL otherwise. 13324 */ 13325 13326xmlDocPtr 13327xmlParseFile(const char *filename) { 13328 return(xmlSAXParseFile(NULL, filename, 0)); 13329} 13330 13331/** 13332 * xmlRecoverFile: 13333 * @filename: the filename 13334 * 13335 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 13336 * compressed document is provided by default if found at compile-time. 13337 * In the case the document is not Well Formed, it attempts to build 13338 * a tree anyway 13339 * 13340 * Returns the resulting document tree or NULL in case of failure 13341 */ 13342 13343xmlDocPtr 13344xmlRecoverFile(const char *filename) { 13345 return(xmlSAXParseFile(NULL, filename, 1)); 13346} 13347 13348 13349/** 13350 * xmlSetupParserForBuffer: 13351 * @ctxt: an XML parser context 13352 * @buffer: a xmlChar * buffer 13353 * @filename: a file name 13354 * 13355 * Setup the parser context to parse a new buffer; Clears any prior 13356 * contents from the parser context. The buffer parameter must not be 13357 * NULL, but the filename parameter can be 13358 */ 13359void 13360xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer, 13361 const char* filename) 13362{ 13363 xmlParserInputPtr input; 13364 13365 if ((ctxt == NULL) || (buffer == NULL)) 13366 return; 13367 13368 input = xmlNewInputStream(ctxt); 13369 if (input == NULL) { 13370 xmlErrMemory(NULL, "parsing new buffer: out of memory\n"); 13371 xmlClearParserCtxt(ctxt); 13372 return; 13373 } 13374 13375 xmlClearParserCtxt(ctxt); 13376 if (filename != NULL) 13377 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename); 13378 input->base = buffer; 13379 input->cur = buffer; 13380 input->end = &buffer[xmlStrlen(buffer)]; 13381 inputPush(ctxt, input); 13382} 13383 13384/** 13385 * xmlSAXUserParseFile: 13386 * @sax: a SAX handler 13387 * @user_data: The user data returned on SAX callbacks 13388 * @filename: a file name 13389 * 13390 * parse an XML file and call the given SAX handler routines. 13391 * Automatic support for ZLIB/Compress compressed document is provided 13392 * 13393 * Returns 0 in case of success or a error number otherwise 13394 */ 13395int 13396xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, 13397 const char *filename) { 13398 int ret = 0; 13399 xmlParserCtxtPtr ctxt; 13400 13401 ctxt = xmlCreateFileParserCtxt(filename); 13402 if (ctxt == NULL) return -1; 13403 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 13404 xmlFree(ctxt->sax); 13405 ctxt->sax = sax; 13406 xmlDetectSAX2(ctxt); 13407 13408 if (user_data != NULL) 13409 ctxt->userData = user_data; 13410 13411 xmlParseDocument(ctxt); 13412 13413 if (ctxt->wellFormed) 13414 ret = 0; 13415 else { 13416 if (ctxt->errNo != 0) 13417 ret = ctxt->errNo; 13418 else 13419 ret = -1; 13420 } 13421 if (sax != NULL) 13422 ctxt->sax = NULL; 13423 if (ctxt->myDoc != NULL) { 13424 xmlFreeDoc(ctxt->myDoc); 13425 ctxt->myDoc = NULL; 13426 } 13427 xmlFreeParserCtxt(ctxt); 13428 13429 return ret; 13430} 13431#endif /* LIBXML_SAX1_ENABLED */ 13432 13433/************************************************************************ 13434 * * 13435 * Front ends when parsing from memory * 13436 * * 13437 ************************************************************************/ 13438 13439/** 13440 * xmlCreateMemoryParserCtxt: 13441 * @buffer: a pointer to a char array 13442 * @size: the size of the array 13443 * 13444 * Create a parser context for an XML in-memory document. 13445 * 13446 * Returns the new parser context or NULL 13447 */ 13448xmlParserCtxtPtr 13449xmlCreateMemoryParserCtxt(const char *buffer, int size) { 13450 xmlParserCtxtPtr ctxt; 13451 xmlParserInputPtr input; 13452 xmlParserInputBufferPtr buf; 13453 13454 if (buffer == NULL) 13455 return(NULL); 13456 if (size <= 0) 13457 return(NULL); 13458 13459 ctxt = xmlNewParserCtxt(); 13460 if (ctxt == NULL) 13461 return(NULL); 13462 13463 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */ 13464 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 13465 if (buf == NULL) { 13466 xmlFreeParserCtxt(ctxt); 13467 return(NULL); 13468 } 13469 13470 input = xmlNewInputStream(ctxt); 13471 if (input == NULL) { 13472 xmlFreeParserInputBuffer(buf); 13473 xmlFreeParserCtxt(ctxt); 13474 return(NULL); 13475 } 13476 13477 input->filename = NULL; 13478 input->buf = buf; 13479 input->base = input->buf->buffer->content; 13480 input->cur = input->buf->buffer->content; 13481 input->end = &input->buf->buffer->content[input->buf->buffer->use]; 13482 13483 inputPush(ctxt, input); 13484 return(ctxt); 13485} 13486 13487#ifdef LIBXML_SAX1_ENABLED 13488/** 13489 * xmlSAXParseMemoryWithData: 13490 * @sax: the SAX handler block 13491 * @buffer: an pointer to a char array 13492 * @size: the size of the array 13493 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 13494 * documents 13495 * @data: the userdata 13496 * 13497 * parse an XML in-memory block and use the given SAX function block 13498 * to handle the parsing callback. If sax is NULL, fallback to the default 13499 * DOM tree building routines. 13500 * 13501 * User data (void *) is stored within the parser context in the 13502 * context's _private member, so it is available nearly everywhere in libxml 13503 * 13504 * Returns the resulting document tree 13505 */ 13506 13507xmlDocPtr 13508xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer, 13509 int size, int recovery, void *data) { 13510 xmlDocPtr ret; 13511 xmlParserCtxtPtr ctxt; 13512 13513 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 13514 if (ctxt == NULL) return(NULL); 13515 if (sax != NULL) { 13516 if (ctxt->sax != NULL) 13517 xmlFree(ctxt->sax); 13518 ctxt->sax = sax; 13519 } 13520 xmlDetectSAX2(ctxt); 13521 if (data!=NULL) { 13522 ctxt->_private=data; 13523 } 13524 13525 ctxt->recovery = recovery; 13526 13527 xmlParseDocument(ctxt); 13528 13529 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 13530 else { 13531 ret = NULL; 13532 xmlFreeDoc(ctxt->myDoc); 13533 ctxt->myDoc = NULL; 13534 } 13535 if (sax != NULL) 13536 ctxt->sax = NULL; 13537 xmlFreeParserCtxt(ctxt); 13538 13539 return(ret); 13540} 13541 13542/** 13543 * xmlSAXParseMemory: 13544 * @sax: the SAX handler block 13545 * @buffer: an pointer to a char array 13546 * @size: the size of the array 13547 * @recovery: work in recovery mode, i.e. tries to read not Well Formed 13548 * documents 13549 * 13550 * parse an XML in-memory block and use the given SAX function block 13551 * to handle the parsing callback. If sax is NULL, fallback to the default 13552 * DOM tree building routines. 13553 * 13554 * Returns the resulting document tree 13555 */ 13556xmlDocPtr 13557xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer, 13558 int size, int recovery) { 13559 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL); 13560} 13561 13562/** 13563 * xmlParseMemory: 13564 * @buffer: an pointer to a char array 13565 * @size: the size of the array 13566 * 13567 * parse an XML in-memory block and build a tree. 13568 * 13569 * Returns the resulting document tree 13570 */ 13571 13572xmlDocPtr xmlParseMemory(const char *buffer, int size) { 13573 return(xmlSAXParseMemory(NULL, buffer, size, 0)); 13574} 13575 13576/** 13577 * xmlRecoverMemory: 13578 * @buffer: an pointer to a char array 13579 * @size: the size of the array 13580 * 13581 * parse an XML in-memory block and build a tree. 13582 * In the case the document is not Well Formed, an attempt to 13583 * build a tree is tried anyway 13584 * 13585 * Returns the resulting document tree or NULL in case of error 13586 */ 13587 13588xmlDocPtr xmlRecoverMemory(const char *buffer, int size) { 13589 return(xmlSAXParseMemory(NULL, buffer, size, 1)); 13590} 13591 13592/** 13593 * xmlSAXUserParseMemory: 13594 * @sax: a SAX handler 13595 * @user_data: The user data returned on SAX callbacks 13596 * @buffer: an in-memory XML document input 13597 * @size: the length of the XML document in bytes 13598 * 13599 * A better SAX parsing routine. 13600 * parse an XML in-memory buffer and call the given SAX handler routines. 13601 * 13602 * Returns 0 in case of success or a error number otherwise 13603 */ 13604int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, 13605 const char *buffer, int size) { 13606 int ret = 0; 13607 xmlParserCtxtPtr ctxt; 13608 13609 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 13610 if (ctxt == NULL) return -1; 13611 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 13612 xmlFree(ctxt->sax); 13613 ctxt->sax = sax; 13614 xmlDetectSAX2(ctxt); 13615 13616 if (user_data != NULL) 13617 ctxt->userData = user_data; 13618 13619 xmlParseDocument(ctxt); 13620 13621 if (ctxt->wellFormed) 13622 ret = 0; 13623 else { 13624 if (ctxt->errNo != 0) 13625 ret = ctxt->errNo; 13626 else 13627 ret = -1; 13628 } 13629 if (sax != NULL) 13630 ctxt->sax = NULL; 13631 if (ctxt->myDoc != NULL) { 13632 xmlFreeDoc(ctxt->myDoc); 13633 ctxt->myDoc = NULL; 13634 } 13635 xmlFreeParserCtxt(ctxt); 13636 13637 return ret; 13638} 13639#endif /* LIBXML_SAX1_ENABLED */ 13640 13641/** 13642 * xmlCreateDocParserCtxt: 13643 * @cur: a pointer to an array of xmlChar 13644 * 13645 * Creates a parser context for an XML in-memory document. 13646 * 13647 * Returns the new parser context or NULL 13648 */ 13649xmlParserCtxtPtr 13650xmlCreateDocParserCtxt(const xmlChar *cur) { 13651 int len; 13652 13653 if (cur == NULL) 13654 return(NULL); 13655 len = xmlStrlen(cur); 13656 return(xmlCreateMemoryParserCtxt((const char *)cur, len)); 13657} 13658 13659#ifdef LIBXML_SAX1_ENABLED 13660/** 13661 * xmlSAXParseDoc: 13662 * @sax: the SAX handler block 13663 * @cur: a pointer to an array of xmlChar 13664 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 13665 * documents 13666 * 13667 * parse an XML in-memory document and build a tree. 13668 * It use the given SAX function block to handle the parsing callback. 13669 * If sax is NULL, fallback to the default DOM tree building routines. 13670 * 13671 * Returns the resulting document tree 13672 */ 13673 13674xmlDocPtr 13675xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) { 13676 xmlDocPtr ret; 13677 xmlParserCtxtPtr ctxt; 13678 xmlSAXHandlerPtr oldsax = NULL; 13679 13680 if (cur == NULL) return(NULL); 13681 13682 13683 ctxt = xmlCreateDocParserCtxt(cur); 13684 if (ctxt == NULL) return(NULL); 13685 if (sax != NULL) { 13686 oldsax = ctxt->sax; 13687 ctxt->sax = sax; 13688 ctxt->userData = NULL; 13689 } 13690 xmlDetectSAX2(ctxt); 13691 13692 xmlParseDocument(ctxt); 13693 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 13694 else { 13695 ret = NULL; 13696 xmlFreeDoc(ctxt->myDoc); 13697 ctxt->myDoc = NULL; 13698 } 13699 if (sax != NULL) 13700 ctxt->sax = oldsax; 13701 xmlFreeParserCtxt(ctxt); 13702 13703 return(ret); 13704} 13705 13706/** 13707 * xmlParseDoc: 13708 * @cur: a pointer to an array of xmlChar 13709 * 13710 * parse an XML in-memory document and build a tree. 13711 * 13712 * Returns the resulting document tree 13713 */ 13714 13715xmlDocPtr 13716xmlParseDoc(const xmlChar *cur) { 13717 return(xmlSAXParseDoc(NULL, cur, 0)); 13718} 13719#endif /* LIBXML_SAX1_ENABLED */ 13720 13721#ifdef LIBXML_LEGACY_ENABLED 13722/************************************************************************ 13723 * * 13724 * Specific function to keep track of entities references * 13725 * and used by the XSLT debugger * 13726 * * 13727 ************************************************************************/ 13728 13729static xmlEntityReferenceFunc xmlEntityRefFunc = NULL; 13730 13731/** 13732 * xmlAddEntityReference: 13733 * @ent : A valid entity 13734 * @firstNode : A valid first node for children of entity 13735 * @lastNode : A valid last node of children entity 13736 * 13737 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY 13738 */ 13739static void 13740xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 13741 xmlNodePtr lastNode) 13742{ 13743 if (xmlEntityRefFunc != NULL) { 13744 (*xmlEntityRefFunc) (ent, firstNode, lastNode); 13745 } 13746} 13747 13748 13749/** 13750 * xmlSetEntityReferenceFunc: 13751 * @func: A valid function 13752 * 13753 * Set the function to call call back when a xml reference has been made 13754 */ 13755void 13756xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func) 13757{ 13758 xmlEntityRefFunc = func; 13759} 13760#endif /* LIBXML_LEGACY_ENABLED */ 13761 13762/************************************************************************ 13763 * * 13764 * Miscellaneous * 13765 * * 13766 ************************************************************************/ 13767 13768#ifdef LIBXML_XPATH_ENABLED 13769#include <libxml/xpath.h> 13770#endif 13771 13772extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...); 13773static int xmlParserInitialized = 0; 13774 13775/** 13776 * xmlInitParser: 13777 * 13778 * Initialization function for the XML parser. 13779 * This is not reentrant. Call once before processing in case of 13780 * use in multithreaded programs. 13781 */ 13782 13783void 13784xmlInitParser(void) { 13785 if (xmlParserInitialized != 0) 13786 return; 13787 13788#ifdef LIBXML_THREAD_ENABLED 13789 __xmlGlobalInitMutexLock(); 13790 if (xmlParserInitialized == 0) { 13791#endif 13792 if ((xmlGenericError == xmlGenericErrorDefaultFunc) || 13793 (xmlGenericError == NULL)) 13794 initGenericErrorDefaultFunc(NULL); 13795 xmlInitGlobals(); 13796 xmlInitThreads(); 13797 xmlInitMemory(); 13798 xmlInitCharEncodingHandlers(); 13799 xmlDefaultSAXHandlerInit(); 13800 xmlRegisterDefaultInputCallbacks(); 13801#ifdef LIBXML_OUTPUT_ENABLED 13802 xmlRegisterDefaultOutputCallbacks(); 13803#endif /* LIBXML_OUTPUT_ENABLED */ 13804#ifdef LIBXML_HTML_ENABLED 13805 htmlInitAutoClose(); 13806 htmlDefaultSAXHandlerInit(); 13807#endif 13808#ifdef LIBXML_XPATH_ENABLED 13809 xmlXPathInit(); 13810#endif 13811 xmlParserInitialized = 1; 13812#ifdef LIBXML_THREAD_ENABLED 13813 } 13814 __xmlGlobalInitMutexUnlock(); 13815#endif 13816} 13817 13818/** 13819 * xmlCleanupParser: 13820 * 13821 * This function name is somewhat misleading. It does not clean up 13822 * parser state, it cleans up memory allocated by the library itself. 13823 * It is a cleanup function for the XML library. It tries to reclaim all 13824 * related global memory allocated for the library processing. 13825 * It doesn't deallocate any document related memory. One should 13826 * call xmlCleanupParser() only when the process has finished using 13827 * the library and all XML/HTML documents built with it. 13828 * See also xmlInitParser() which has the opposite function of preparing 13829 * the library for operations. 13830 */ 13831 13832void 13833xmlCleanupParser(void) { 13834 if (!xmlParserInitialized) 13835 return; 13836 13837 xmlCleanupCharEncodingHandlers(); 13838#ifdef LIBXML_CATALOG_ENABLED 13839 xmlCatalogCleanup(); 13840#endif 13841 xmlDictCleanup(); 13842 xmlCleanupInputCallbacks(); 13843#ifdef LIBXML_OUTPUT_ENABLED 13844 xmlCleanupOutputCallbacks(); 13845#endif 13846#ifdef LIBXML_SCHEMAS_ENABLED 13847 xmlSchemaCleanupTypes(); 13848 xmlRelaxNGCleanupTypes(); 13849#endif 13850 xmlCleanupGlobals(); 13851 xmlResetLastError(); 13852 xmlCleanupThreads(); /* must be last if called not from the main thread */ 13853 xmlCleanupMemory(); 13854 xmlParserInitialized = 0; 13855} 13856 13857/************************************************************************ 13858 * * 13859 * New set (2.6.0) of simpler and more flexible APIs * 13860 * * 13861 ************************************************************************/ 13862 13863/** 13864 * DICT_FREE: 13865 * @str: a string 13866 * 13867 * Free a string if it is not owned by the "dict" dictionnary in the 13868 * current scope 13869 */ 13870#define DICT_FREE(str) \ 13871 if ((str) && ((!dict) || \ 13872 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \ 13873 xmlFree((char *)(str)); 13874 13875/** 13876 * xmlCtxtReset: 13877 * @ctxt: an XML parser context 13878 * 13879 * Reset a parser context 13880 */ 13881void 13882xmlCtxtReset(xmlParserCtxtPtr ctxt) 13883{ 13884 xmlParserInputPtr input; 13885 xmlDictPtr dict; 13886 13887 if (ctxt == NULL) 13888 return; 13889 13890 dict = ctxt->dict; 13891 13892 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 13893 xmlFreeInputStream(input); 13894 } 13895 ctxt->inputNr = 0; 13896 ctxt->input = NULL; 13897 13898 ctxt->spaceNr = 0; 13899 if (ctxt->spaceTab != NULL) { 13900 ctxt->spaceTab[0] = -1; 13901 ctxt->space = &ctxt->spaceTab[0]; 13902 } else { 13903 ctxt->space = NULL; 13904 } 13905 13906 13907 ctxt->nodeNr = 0; 13908 ctxt->node = NULL; 13909 13910 ctxt->nameNr = 0; 13911 ctxt->name = NULL; 13912 13913 DICT_FREE(ctxt->version); 13914 ctxt->version = NULL; 13915 DICT_FREE(ctxt->encoding); 13916 ctxt->encoding = NULL; 13917 DICT_FREE(ctxt->directory); 13918 ctxt->directory = NULL; 13919 DICT_FREE(ctxt->extSubURI); 13920 ctxt->extSubURI = NULL; 13921 DICT_FREE(ctxt->extSubSystem); 13922 ctxt->extSubSystem = NULL; 13923 if (ctxt->myDoc != NULL) 13924 xmlFreeDoc(ctxt->myDoc); 13925 ctxt->myDoc = NULL; 13926 13927 ctxt->standalone = -1; 13928 ctxt->hasExternalSubset = 0; 13929 ctxt->hasPErefs = 0; 13930 ctxt->html = 0; 13931 ctxt->external = 0; 13932 ctxt->instate = XML_PARSER_START; 13933 ctxt->token = 0; 13934 13935 ctxt->wellFormed = 1; 13936 ctxt->nsWellFormed = 1; 13937 ctxt->disableSAX = 0; 13938 ctxt->valid = 1; 13939#if 0 13940 ctxt->vctxt.userData = ctxt; 13941 ctxt->vctxt.error = xmlParserValidityError; 13942 ctxt->vctxt.warning = xmlParserValidityWarning; 13943#endif 13944 ctxt->record_info = 0; 13945 ctxt->nbChars = 0; 13946 ctxt->checkIndex = 0; 13947 ctxt->inSubset = 0; 13948 ctxt->errNo = XML_ERR_OK; 13949 ctxt->depth = 0; 13950 ctxt->charset = XML_CHAR_ENCODING_UTF8; 13951 ctxt->catalogs = NULL; 13952 ctxt->nbentities = 0; 13953 ctxt->sizeentities = 0; 13954 xmlInitNodeInfoSeq(&ctxt->node_seq); 13955 13956 if (ctxt->attsDefault != NULL) { 13957 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree); 13958 ctxt->attsDefault = NULL; 13959 } 13960 if (ctxt->attsSpecial != NULL) { 13961 xmlHashFree(ctxt->attsSpecial, NULL); 13962 ctxt->attsSpecial = NULL; 13963 } 13964 13965#ifdef LIBXML_CATALOG_ENABLED 13966 if (ctxt->catalogs != NULL) 13967 xmlCatalogFreeLocal(ctxt->catalogs); 13968#endif 13969 if (ctxt->lastError.code != XML_ERR_OK) 13970 xmlResetError(&ctxt->lastError); 13971} 13972 13973/** 13974 * xmlCtxtResetPush: 13975 * @ctxt: an XML parser context 13976 * @chunk: a pointer to an array of chars 13977 * @size: number of chars in the array 13978 * @filename: an optional file name or URI 13979 * @encoding: the document encoding, or NULL 13980 * 13981 * Reset a push parser context 13982 * 13983 * Returns 0 in case of success and 1 in case of error 13984 */ 13985int 13986xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk, 13987 int size, const char *filename, const char *encoding) 13988{ 13989 xmlParserInputPtr inputStream; 13990 xmlParserInputBufferPtr buf; 13991 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 13992 13993 if (ctxt == NULL) 13994 return(1); 13995 13996 if ((encoding == NULL) && (chunk != NULL) && (size >= 4)) 13997 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 13998 13999 buf = xmlAllocParserInputBuffer(enc); 14000 if (buf == NULL) 14001 return(1); 14002 14003 if (ctxt == NULL) { 14004 xmlFreeParserInputBuffer(buf); 14005 return(1); 14006 } 14007 14008 xmlCtxtReset(ctxt); 14009 14010 if (ctxt->pushTab == NULL) { 14011 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * 14012 sizeof(xmlChar *)); 14013 if (ctxt->pushTab == NULL) { 14014 xmlErrMemory(ctxt, NULL); 14015 xmlFreeParserInputBuffer(buf); 14016 return(1); 14017 } 14018 } 14019 14020 if (filename == NULL) { 14021 ctxt->directory = NULL; 14022 } else { 14023 ctxt->directory = xmlParserGetDirectory(filename); 14024 } 14025 14026 inputStream = xmlNewInputStream(ctxt); 14027 if (inputStream == NULL) { 14028 xmlFreeParserInputBuffer(buf); 14029 return(1); 14030 } 14031 14032 if (filename == NULL) 14033 inputStream->filename = NULL; 14034 else 14035 inputStream->filename = (char *) 14036 xmlCanonicPath((const xmlChar *) filename); 14037 inputStream->buf = buf; 14038 inputStream->base = inputStream->buf->buffer->content; 14039 inputStream->cur = inputStream->buf->buffer->content; 14040 inputStream->end = 14041 &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; 14042 14043 inputPush(ctxt, inputStream); 14044 14045 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 14046 (ctxt->input->buf != NULL)) { 14047 int base = ctxt->input->base - ctxt->input->buf->buffer->content; 14048 int cur = ctxt->input->cur - ctxt->input->base; 14049 14050 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 14051 14052 ctxt->input->base = ctxt->input->buf->buffer->content + base; 14053 ctxt->input->cur = ctxt->input->base + cur; 14054 ctxt->input->end = 14055 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer-> 14056 use]; 14057#ifdef DEBUG_PUSH 14058 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 14059#endif 14060 } 14061 14062 if (encoding != NULL) { 14063 xmlCharEncodingHandlerPtr hdlr; 14064 14065 if (ctxt->encoding != NULL) 14066 xmlFree((xmlChar *) ctxt->encoding); 14067 ctxt->encoding = xmlStrdup((const xmlChar *) encoding); 14068 14069 hdlr = xmlFindCharEncodingHandler(encoding); 14070 if (hdlr != NULL) { 14071 xmlSwitchToEncoding(ctxt, hdlr); 14072 } else { 14073 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 14074 "Unsupported encoding %s\n", BAD_CAST encoding); 14075 } 14076 } else if (enc != XML_CHAR_ENCODING_NONE) { 14077 xmlSwitchEncoding(ctxt, enc); 14078 } 14079 14080 return(0); 14081} 14082 14083 14084/** 14085 * xmlCtxtUseOptionsInternal: 14086 * @ctxt: an XML parser context 14087 * @options: a combination of xmlParserOption 14088 * @encoding: the user provided encoding to use 14089 * 14090 * Applies the options to the parser context 14091 * 14092 * Returns 0 in case of success, the set of unknown or unimplemented options 14093 * in case of error. 14094 */ 14095static int 14096xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding) 14097{ 14098 if (ctxt == NULL) 14099 return(-1); 14100 if (encoding != NULL) { 14101 if (ctxt->encoding != NULL) 14102 xmlFree((xmlChar *) ctxt->encoding); 14103 ctxt->encoding = xmlStrdup((const xmlChar *) encoding); 14104 } 14105 if (options & XML_PARSE_RECOVER) { 14106 ctxt->recovery = 1; 14107 options -= XML_PARSE_RECOVER; 14108 ctxt->options |= XML_PARSE_RECOVER; 14109 } else 14110 ctxt->recovery = 0; 14111 if (options & XML_PARSE_DTDLOAD) { 14112 ctxt->loadsubset = XML_DETECT_IDS; 14113 options -= XML_PARSE_DTDLOAD; 14114 ctxt->options |= XML_PARSE_DTDLOAD; 14115 } else 14116 ctxt->loadsubset = 0; 14117 if (options & XML_PARSE_DTDATTR) { 14118 ctxt->loadsubset |= XML_COMPLETE_ATTRS; 14119 options -= XML_PARSE_DTDATTR; 14120 ctxt->options |= XML_PARSE_DTDATTR; 14121 } 14122 if (options & XML_PARSE_NOENT) { 14123 ctxt->replaceEntities = 1; 14124 /* ctxt->loadsubset |= XML_DETECT_IDS; */ 14125 options -= XML_PARSE_NOENT; 14126 ctxt->options |= XML_PARSE_NOENT; 14127 } else 14128 ctxt->replaceEntities = 0; 14129 if (options & XML_PARSE_PEDANTIC) { 14130 ctxt->pedantic = 1; 14131 options -= XML_PARSE_PEDANTIC; 14132 ctxt->options |= XML_PARSE_PEDANTIC; 14133 } else 14134 ctxt->pedantic = 0; 14135 if (options & XML_PARSE_NOBLANKS) { 14136 ctxt->keepBlanks = 0; 14137 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 14138 options -= XML_PARSE_NOBLANKS; 14139 ctxt->options |= XML_PARSE_NOBLANKS; 14140 } else 14141 ctxt->keepBlanks = 1; 14142 if (options & XML_PARSE_DTDVALID) { 14143 ctxt->validate = 1; 14144 if (options & XML_PARSE_NOWARNING) 14145 ctxt->vctxt.warning = NULL; 14146 if (options & XML_PARSE_NOERROR) 14147 ctxt->vctxt.error = NULL; 14148 options -= XML_PARSE_DTDVALID; 14149 ctxt->options |= XML_PARSE_DTDVALID; 14150 } else 14151 ctxt->validate = 0; 14152 if (options & XML_PARSE_NOWARNING) { 14153 ctxt->sax->warning = NULL; 14154 options -= XML_PARSE_NOWARNING; 14155 } 14156 if (options & XML_PARSE_NOERROR) { 14157 ctxt->sax->error = NULL; 14158 ctxt->sax->fatalError = NULL; 14159 options -= XML_PARSE_NOERROR; 14160 } 14161#ifdef LIBXML_SAX1_ENABLED 14162 if (options & XML_PARSE_SAX1) { 14163 ctxt->sax->startElement = xmlSAX2StartElement; 14164 ctxt->sax->endElement = xmlSAX2EndElement; 14165 ctxt->sax->startElementNs = NULL; 14166 ctxt->sax->endElementNs = NULL; 14167 ctxt->sax->initialized = 1; 14168 options -= XML_PARSE_SAX1; 14169 ctxt->options |= XML_PARSE_SAX1; 14170 } 14171#endif /* LIBXML_SAX1_ENABLED */ 14172 if (options & XML_PARSE_NODICT) { 14173 ctxt->dictNames = 0; 14174 options -= XML_PARSE_NODICT; 14175 ctxt->options |= XML_PARSE_NODICT; 14176 } else { 14177 ctxt->dictNames = 1; 14178 } 14179 if (options & XML_PARSE_NOCDATA) { 14180 ctxt->sax->cdataBlock = NULL; 14181 options -= XML_PARSE_NOCDATA; 14182 ctxt->options |= XML_PARSE_NOCDATA; 14183 } 14184 if (options & XML_PARSE_NSCLEAN) { 14185 ctxt->options |= XML_PARSE_NSCLEAN; 14186 options -= XML_PARSE_NSCLEAN; 14187 } 14188 if (options & XML_PARSE_NONET) { 14189 ctxt->options |= XML_PARSE_NONET; 14190 options -= XML_PARSE_NONET; 14191 } 14192 if (options & XML_PARSE_COMPACT) { 14193 ctxt->options |= XML_PARSE_COMPACT; 14194 options -= XML_PARSE_COMPACT; 14195 } 14196 if (options & XML_PARSE_OLD10) { 14197 ctxt->options |= XML_PARSE_OLD10; 14198 options -= XML_PARSE_OLD10; 14199 } 14200 if (options & XML_PARSE_NOBASEFIX) { 14201 ctxt->options |= XML_PARSE_NOBASEFIX; 14202 options -= XML_PARSE_NOBASEFIX; 14203 } 14204 if (options & XML_PARSE_HUGE) { 14205 ctxt->options |= XML_PARSE_HUGE; 14206 options -= XML_PARSE_HUGE; 14207 } 14208 ctxt->linenumbers = 1; 14209 return (options); 14210} 14211 14212/** 14213 * xmlCtxtUseOptions: 14214 * @ctxt: an XML parser context 14215 * @options: a combination of xmlParserOption 14216 * 14217 * Applies the options to the parser context 14218 * 14219 * Returns 0 in case of success, the set of unknown or unimplemented options 14220 * in case of error. 14221 */ 14222int 14223xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) 14224{ 14225 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL)); 14226} 14227 14228/** 14229 * xmlDoRead: 14230 * @ctxt: an XML parser context 14231 * @URL: the base URL to use for the document 14232 * @encoding: the document encoding, or NULL 14233 * @options: a combination of xmlParserOption 14234 * @reuse: keep the context for reuse 14235 * 14236 * Common front-end for the xmlRead functions 14237 * 14238 * Returns the resulting document tree or NULL 14239 */ 14240static xmlDocPtr 14241xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding, 14242 int options, int reuse) 14243{ 14244 xmlDocPtr ret; 14245 14246 xmlCtxtUseOptionsInternal(ctxt, options, encoding); 14247 if (encoding != NULL) { 14248 xmlCharEncodingHandlerPtr hdlr; 14249 14250 hdlr = xmlFindCharEncodingHandler(encoding); 14251 if (hdlr != NULL) 14252 xmlSwitchToEncoding(ctxt, hdlr); 14253 } 14254 if ((URL != NULL) && (ctxt->input != NULL) && 14255 (ctxt->input->filename == NULL)) 14256 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL); 14257 xmlParseDocument(ctxt); 14258 if ((ctxt->wellFormed) || ctxt->recovery) 14259 ret = ctxt->myDoc; 14260 else { 14261 ret = NULL; 14262 if (ctxt->myDoc != NULL) { 14263 xmlFreeDoc(ctxt->myDoc); 14264 } 14265 } 14266 ctxt->myDoc = NULL; 14267 if (!reuse) { 14268 xmlFreeParserCtxt(ctxt); 14269 } 14270 14271 return (ret); 14272} 14273 14274/** 14275 * xmlReadDoc: 14276 * @cur: a pointer to a zero terminated string 14277 * @URL: the base URL to use for the document 14278 * @encoding: the document encoding, or NULL 14279 * @options: a combination of xmlParserOption 14280 * 14281 * parse an XML in-memory document and build a tree. 14282 * 14283 * Returns the resulting document tree 14284 */ 14285xmlDocPtr 14286xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options) 14287{ 14288 xmlParserCtxtPtr ctxt; 14289 14290 if (cur == NULL) 14291 return (NULL); 14292 14293 ctxt = xmlCreateDocParserCtxt(cur); 14294 if (ctxt == NULL) 14295 return (NULL); 14296 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 14297} 14298 14299/** 14300 * xmlReadFile: 14301 * @filename: a file or URL 14302 * @encoding: the document encoding, or NULL 14303 * @options: a combination of xmlParserOption 14304 * 14305 * parse an XML file from the filesystem or the network. 14306 * 14307 * Returns the resulting document tree 14308 */ 14309xmlDocPtr 14310xmlReadFile(const char *filename, const char *encoding, int options) 14311{ 14312 xmlParserCtxtPtr ctxt; 14313 14314 ctxt = xmlCreateURLParserCtxt(filename, options); 14315 if (ctxt == NULL) 14316 return (NULL); 14317 return (xmlDoRead(ctxt, NULL, encoding, options, 0)); 14318} 14319 14320/** 14321 * xmlReadMemory: 14322 * @buffer: a pointer to a char array 14323 * @size: the size of the array 14324 * @URL: the base URL to use for the document 14325 * @encoding: the document encoding, or NULL 14326 * @options: a combination of xmlParserOption 14327 * 14328 * parse an XML in-memory document and build a tree. 14329 * 14330 * Returns the resulting document tree 14331 */ 14332xmlDocPtr 14333xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options) 14334{ 14335 xmlParserCtxtPtr ctxt; 14336 14337 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 14338 if (ctxt == NULL) 14339 return (NULL); 14340 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 14341} 14342 14343/** 14344 * xmlReadFd: 14345 * @fd: an open file descriptor 14346 * @URL: the base URL to use for the document 14347 * @encoding: the document encoding, or NULL 14348 * @options: a combination of xmlParserOption 14349 * 14350 * parse an XML from a file descriptor and build a tree. 14351 * NOTE that the file descriptor will not be closed when the 14352 * reader is closed or reset. 14353 * 14354 * Returns the resulting document tree 14355 */ 14356xmlDocPtr 14357xmlReadFd(int fd, const char *URL, const char *encoding, int options) 14358{ 14359 xmlParserCtxtPtr ctxt; 14360 xmlParserInputBufferPtr input; 14361 xmlParserInputPtr stream; 14362 14363 if (fd < 0) 14364 return (NULL); 14365 14366 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 14367 if (input == NULL) 14368 return (NULL); 14369 input->closecallback = NULL; 14370 ctxt = xmlNewParserCtxt(); 14371 if (ctxt == NULL) { 14372 xmlFreeParserInputBuffer(input); 14373 return (NULL); 14374 } 14375 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 14376 if (stream == NULL) { 14377 xmlFreeParserInputBuffer(input); 14378 xmlFreeParserCtxt(ctxt); 14379 return (NULL); 14380 } 14381 inputPush(ctxt, stream); 14382 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 14383} 14384 14385/** 14386 * xmlReadIO: 14387 * @ioread: an I/O read function 14388 * @ioclose: an I/O close function 14389 * @ioctx: an I/O handler 14390 * @URL: the base URL to use for the document 14391 * @encoding: the document encoding, or NULL 14392 * @options: a combination of xmlParserOption 14393 * 14394 * parse an XML document from I/O functions and source and build a tree. 14395 * 14396 * Returns the resulting document tree 14397 */ 14398xmlDocPtr 14399xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 14400 void *ioctx, const char *URL, const char *encoding, int options) 14401{ 14402 xmlParserCtxtPtr ctxt; 14403 xmlParserInputBufferPtr input; 14404 xmlParserInputPtr stream; 14405 14406 if (ioread == NULL) 14407 return (NULL); 14408 14409 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 14410 XML_CHAR_ENCODING_NONE); 14411 if (input == NULL) 14412 return (NULL); 14413 ctxt = xmlNewParserCtxt(); 14414 if (ctxt == NULL) { 14415 xmlFreeParserInputBuffer(input); 14416 return (NULL); 14417 } 14418 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 14419 if (stream == NULL) { 14420 xmlFreeParserInputBuffer(input); 14421 xmlFreeParserCtxt(ctxt); 14422 return (NULL); 14423 } 14424 inputPush(ctxt, stream); 14425 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 14426} 14427 14428/** 14429 * xmlCtxtReadDoc: 14430 * @ctxt: an XML parser context 14431 * @cur: a pointer to a zero terminated string 14432 * @URL: the base URL to use for the document 14433 * @encoding: the document encoding, or NULL 14434 * @options: a combination of xmlParserOption 14435 * 14436 * parse an XML in-memory document and build a tree. 14437 * This reuses the existing @ctxt parser context 14438 * 14439 * Returns the resulting document tree 14440 */ 14441xmlDocPtr 14442xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur, 14443 const char *URL, const char *encoding, int options) 14444{ 14445 xmlParserInputPtr stream; 14446 14447 if (cur == NULL) 14448 return (NULL); 14449 if (ctxt == NULL) 14450 return (NULL); 14451 14452 xmlCtxtReset(ctxt); 14453 14454 stream = xmlNewStringInputStream(ctxt, cur); 14455 if (stream == NULL) { 14456 return (NULL); 14457 } 14458 inputPush(ctxt, stream); 14459 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 14460} 14461 14462/** 14463 * xmlCtxtReadFile: 14464 * @ctxt: an XML parser context 14465 * @filename: a file or URL 14466 * @encoding: the document encoding, or NULL 14467 * @options: a combination of xmlParserOption 14468 * 14469 * parse an XML file from the filesystem or the network. 14470 * This reuses the existing @ctxt parser context 14471 * 14472 * Returns the resulting document tree 14473 */ 14474xmlDocPtr 14475xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename, 14476 const char *encoding, int options) 14477{ 14478 xmlParserInputPtr stream; 14479 14480 if (filename == NULL) 14481 return (NULL); 14482 if (ctxt == NULL) 14483 return (NULL); 14484 14485 xmlCtxtReset(ctxt); 14486 14487 stream = xmlLoadExternalEntity(filename, NULL, ctxt); 14488 if (stream == NULL) { 14489 return (NULL); 14490 } 14491 inputPush(ctxt, stream); 14492 return (xmlDoRead(ctxt, NULL, encoding, options, 1)); 14493} 14494 14495/** 14496 * xmlCtxtReadMemory: 14497 * @ctxt: an XML parser context 14498 * @buffer: a pointer to a char array 14499 * @size: the size of the array 14500 * @URL: the base URL to use for the document 14501 * @encoding: the document encoding, or NULL 14502 * @options: a combination of xmlParserOption 14503 * 14504 * parse an XML in-memory document and build a tree. 14505 * This reuses the existing @ctxt parser context 14506 * 14507 * Returns the resulting document tree 14508 */ 14509xmlDocPtr 14510xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size, 14511 const char *URL, const char *encoding, int options) 14512{ 14513 xmlParserInputBufferPtr input; 14514 xmlParserInputPtr stream; 14515 14516 if (ctxt == NULL) 14517 return (NULL); 14518 if (buffer == NULL) 14519 return (NULL); 14520 14521 xmlCtxtReset(ctxt); 14522 14523 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 14524 if (input == NULL) { 14525 return(NULL); 14526 } 14527 14528 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 14529 if (stream == NULL) { 14530 xmlFreeParserInputBuffer(input); 14531 return(NULL); 14532 } 14533 14534 inputPush(ctxt, stream); 14535 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 14536} 14537 14538/** 14539 * xmlCtxtReadFd: 14540 * @ctxt: an XML parser context 14541 * @fd: an open file descriptor 14542 * @URL: the base URL to use for the document 14543 * @encoding: the document encoding, or NULL 14544 * @options: a combination of xmlParserOption 14545 * 14546 * parse an XML from a file descriptor and build a tree. 14547 * This reuses the existing @ctxt parser context 14548 * NOTE that the file descriptor will not be closed when the 14549 * reader is closed or reset. 14550 * 14551 * Returns the resulting document tree 14552 */ 14553xmlDocPtr 14554xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd, 14555 const char *URL, const char *encoding, int options) 14556{ 14557 xmlParserInputBufferPtr input; 14558 xmlParserInputPtr stream; 14559 14560 if (fd < 0) 14561 return (NULL); 14562 if (ctxt == NULL) 14563 return (NULL); 14564 14565 xmlCtxtReset(ctxt); 14566 14567 14568 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 14569 if (input == NULL) 14570 return (NULL); 14571 input->closecallback = NULL; 14572 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 14573 if (stream == NULL) { 14574 xmlFreeParserInputBuffer(input); 14575 return (NULL); 14576 } 14577 inputPush(ctxt, stream); 14578 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 14579} 14580 14581/** 14582 * xmlCtxtReadIO: 14583 * @ctxt: an XML parser context 14584 * @ioread: an I/O read function 14585 * @ioclose: an I/O close function 14586 * @ioctx: an I/O handler 14587 * @URL: the base URL to use for the document 14588 * @encoding: the document encoding, or NULL 14589 * @options: a combination of xmlParserOption 14590 * 14591 * parse an XML document from I/O functions and source and build a tree. 14592 * This reuses the existing @ctxt parser context 14593 * 14594 * Returns the resulting document tree 14595 */ 14596xmlDocPtr 14597xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread, 14598 xmlInputCloseCallback ioclose, void *ioctx, 14599 const char *URL, 14600 const char *encoding, int options) 14601{ 14602 xmlParserInputBufferPtr input; 14603 xmlParserInputPtr stream; 14604 14605 if (ioread == NULL) 14606 return (NULL); 14607 if (ctxt == NULL) 14608 return (NULL); 14609 14610 xmlCtxtReset(ctxt); 14611 14612 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 14613 XML_CHAR_ENCODING_NONE); 14614 if (input == NULL) 14615 return (NULL); 14616 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 14617 if (stream == NULL) { 14618 xmlFreeParserInputBuffer(input); 14619 return (NULL); 14620 } 14621 inputPush(ctxt, stream); 14622 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 14623} 14624 14625#define bottom_parser 14626#include "elfgcchack.h" 14627