1/* 2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly 3 * implemented on top of the SAX interfaces 4 * 5 * References: 6 * The XML specification: 7 * http://www.w3.org/TR/REC-xml 8 * Original 1.0 version: 9 * http://www.w3.org/TR/1998/REC-xml-19980210 10 * XML second edition working draft 11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814 12 * 13 * Okay this is a big file, the parser core is around 7000 lines, then it 14 * is followed by the progressive parser top routines, then the various 15 * high level APIs to call the parser and a few miscellaneous functions. 16 * A number of helper functions and deprecated ones have been moved to 17 * parserInternals.c to reduce this file size. 18 * As much as possible the functions are associated with their relative 19 * production in the XML specification. A few productions defining the 20 * different ranges of character are actually implanted either in 21 * parserInternals.h or parserInternals.c 22 * The DOM tree build is realized from the default SAX callbacks in 23 * the module SAX.c. 24 * The routines doing the validation checks are in valid.c and called either 25 * from the SAX callbacks or as standalone functions using a preparsed 26 * document. 27 * 28 * See Copyright for the status of this software. 29 * 30 * daniel@veillard.com 31 */ 32 33#define IN_LIBXML 34#include "libxml.h" 35 36#if defined(WIN32) && !defined (__CYGWIN__) 37#define XML_DIR_SEP '\\' 38#else 39#define XML_DIR_SEP '/' 40#endif 41 42#include <stdlib.h> 43#include <string.h> 44#include <stdarg.h> 45#include <libxml/xmlmemory.h> 46#include <libxml/threads.h> 47#include <libxml/globals.h> 48#include <libxml/tree.h> 49#include <libxml/parser.h> 50#include <libxml/parserInternals.h> 51#include <libxml/valid.h> 52#include <libxml/entities.h> 53#include <libxml/xmlerror.h> 54#include <libxml/encoding.h> 55#include <libxml/xmlIO.h> 56#include <libxml/uri.h> 57#ifdef LIBXML_CATALOG_ENABLED 58#include <libxml/catalog.h> 59#endif 60#ifdef LIBXML_SCHEMAS_ENABLED 61#include <libxml/xmlschemastypes.h> 62#include <libxml/relaxng.h> 63#endif 64#ifdef HAVE_CTYPE_H 65#include <ctype.h> 66#endif 67#ifdef HAVE_STDLIB_H 68#include <stdlib.h> 69#endif 70#ifdef HAVE_SYS_STAT_H 71#include <sys/stat.h> 72#endif 73#ifdef HAVE_FCNTL_H 74#include <fcntl.h> 75#endif 76#ifdef HAVE_UNISTD_H 77#include <unistd.h> 78#endif 79#ifdef HAVE_ZLIB_H 80#include <zlib.h> 81#endif 82 83static void 84xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info); 85 86static xmlParserCtxtPtr 87xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, 88 const xmlChar *base, xmlParserCtxtPtr pctx); 89 90/************************************************************************ 91 * * 92 * Arbitrary limits set in the parser. See XML_PARSE_HUGE * 93 * * 94 ************************************************************************/ 95 96#define XML_PARSER_BIG_ENTITY 1000 97#define XML_PARSER_LOT_ENTITY 5000 98 99/* 100 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity 101 * replacement over the size in byte of the input indicates that you have 102 * and eponential behaviour. A value of 10 correspond to at least 3 entity 103 * replacement per byte of input. 104 */ 105#define XML_PARSER_NON_LINEAR 10 106 107/* 108 * xmlParserEntityCheck 109 * 110 * Function to check non-linear entity expansion behaviour 111 * This is here to detect and stop exponential linear entity expansion 112 * This is not a limitation of the parser but a safety 113 * boundary feature. It can be disabled with the XML_PARSE_HUGE 114 * parser option. 115 */ 116static int 117xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size, 118 xmlEntityPtr ent) 119{ 120 unsigned long consumed = 0; 121 122 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE)) 123 return (0); 124 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) 125 return (1); 126 if (size != 0) { 127 /* 128 * Do the check based on the replacement size of the entity 129 */ 130 if (size < XML_PARSER_BIG_ENTITY) 131 return(0); 132 133 /* 134 * A limit on the amount of text data reasonably used 135 */ 136 if (ctxt->input != NULL) { 137 consumed = ctxt->input->consumed + 138 (ctxt->input->cur - ctxt->input->base); 139 } 140 consumed += ctxt->sizeentities; 141 142 if ((size < XML_PARSER_NON_LINEAR * consumed) && 143 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed)) 144 return (0); 145 } else if (ent != NULL) { 146 /* 147 * use the number of parsed entities in the replacement 148 */ 149 size = ent->checked; 150 151 /* 152 * The amount of data parsed counting entities size only once 153 */ 154 if (ctxt->input != NULL) { 155 consumed = ctxt->input->consumed + 156 (ctxt->input->cur - ctxt->input->base); 157 } 158 consumed += ctxt->sizeentities; 159 160 /* 161 * Check the density of entities for the amount of data 162 * knowing an entity reference will take at least 3 bytes 163 */ 164 if (size * 3 < consumed * XML_PARSER_NON_LINEAR) 165 return (0); 166 } else { 167 /* 168 * strange we got no data for checking just return 169 */ 170 return (0); 171 } 172 173 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 174 return (1); 175} 176 177/** 178 * xmlParserMaxDepth: 179 * 180 * arbitrary depth limit for the XML documents that we allow to 181 * process. This is not a limitation of the parser but a safety 182 * boundary feature. It can be disabled with the XML_PARSE_HUGE 183 * parser option. 184 */ 185unsigned int xmlParserMaxDepth = 256; 186 187 188 189#define SAX2 1 190#define XML_PARSER_BIG_BUFFER_SIZE 300 191#define XML_PARSER_BUFFER_SIZE 100 192#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" 193 194/* 195 * List of XML prefixed PI allowed by W3C specs 196 */ 197 198static const char *xmlW3CPIs[] = { 199 "xml-stylesheet", 200 NULL 201}; 202 203 204/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ 205static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, 206 const xmlChar **str); 207 208static xmlParserErrors 209xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 210 xmlSAXHandlerPtr sax, 211 void *user_data, int depth, const xmlChar *URL, 212 const xmlChar *ID, xmlNodePtr *list); 213 214static int 215xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, 216 const char *encoding); 217#ifdef LIBXML_LEGACY_ENABLED 218static void 219xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 220 xmlNodePtr lastNode); 221#endif /* LIBXML_LEGACY_ENABLED */ 222 223static xmlParserErrors 224xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 225 const xmlChar *string, void *user_data, xmlNodePtr *lst); 226 227static int 228xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity); 229 230/************************************************************************ 231 * * 232 * Some factorized error routines * 233 * * 234 ************************************************************************/ 235 236/** 237 * xmlErrAttributeDup: 238 * @ctxt: an XML parser context 239 * @prefix: the attribute prefix 240 * @localname: the attribute localname 241 * 242 * Handle a redefinition of attribute error 243 */ 244static void 245xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix, 246 const xmlChar * localname) 247{ 248 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 249 (ctxt->instate == XML_PARSER_EOF)) 250 return; 251 if (ctxt != NULL) 252 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; 253 254 if (prefix == NULL) 255 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 256 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, 257 (const char *) localname, NULL, NULL, 0, 0, 258 "Attribute %s redefined\n", localname); 259 else 260 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 261 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, 262 (const char *) prefix, (const char *) localname, 263 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix, 264 localname); 265 if (ctxt != NULL) { 266 ctxt->wellFormed = 0; 267 if (ctxt->recovery == 0) 268 ctxt->disableSAX = 1; 269 } 270} 271 272/** 273 * xmlFatalErr: 274 * @ctxt: an XML parser context 275 * @error: the error number 276 * @extra: extra information string 277 * 278 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 279 */ 280static void 281xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info) 282{ 283 const char *errmsg; 284 285 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 286 (ctxt->instate == XML_PARSER_EOF)) 287 return; 288 switch (error) { 289 case XML_ERR_INVALID_HEX_CHARREF: 290 errmsg = "CharRef: invalid hexadecimal value\n"; 291 break; 292 case XML_ERR_INVALID_DEC_CHARREF: 293 errmsg = "CharRef: invalid decimal value\n"; 294 break; 295 case XML_ERR_INVALID_CHARREF: 296 errmsg = "CharRef: invalid value\n"; 297 break; 298 case XML_ERR_INTERNAL_ERROR: 299 errmsg = "internal error"; 300 break; 301 case XML_ERR_PEREF_AT_EOF: 302 errmsg = "PEReference at end of document\n"; 303 break; 304 case XML_ERR_PEREF_IN_PROLOG: 305 errmsg = "PEReference in prolog\n"; 306 break; 307 case XML_ERR_PEREF_IN_EPILOG: 308 errmsg = "PEReference in epilog\n"; 309 break; 310 case XML_ERR_PEREF_NO_NAME: 311 errmsg = "PEReference: no name\n"; 312 break; 313 case XML_ERR_PEREF_SEMICOL_MISSING: 314 errmsg = "PEReference: expecting ';'\n"; 315 break; 316 case XML_ERR_ENTITY_LOOP: 317 errmsg = "Detected an entity reference loop\n"; 318 break; 319 case XML_ERR_ENTITY_NOT_STARTED: 320 errmsg = "EntityValue: \" or ' expected\n"; 321 break; 322 case XML_ERR_ENTITY_PE_INTERNAL: 323 errmsg = "PEReferences forbidden in internal subset\n"; 324 break; 325 case XML_ERR_ENTITY_NOT_FINISHED: 326 errmsg = "EntityValue: \" or ' expected\n"; 327 break; 328 case XML_ERR_ATTRIBUTE_NOT_STARTED: 329 errmsg = "AttValue: \" or ' expected\n"; 330 break; 331 case XML_ERR_LT_IN_ATTRIBUTE: 332 errmsg = "Unescaped '<' not allowed in attributes values\n"; 333 break; 334 case XML_ERR_LITERAL_NOT_STARTED: 335 errmsg = "SystemLiteral \" or ' expected\n"; 336 break; 337 case XML_ERR_LITERAL_NOT_FINISHED: 338 errmsg = "Unfinished System or Public ID \" or ' expected\n"; 339 break; 340 case XML_ERR_MISPLACED_CDATA_END: 341 errmsg = "Sequence ']]>' not allowed in content\n"; 342 break; 343 case XML_ERR_URI_REQUIRED: 344 errmsg = "SYSTEM or PUBLIC, the URI is missing\n"; 345 break; 346 case XML_ERR_PUBID_REQUIRED: 347 errmsg = "PUBLIC, the Public Identifier is missing\n"; 348 break; 349 case XML_ERR_HYPHEN_IN_COMMENT: 350 errmsg = "Comment must not contain '--' (double-hyphen)\n"; 351 break; 352 case XML_ERR_PI_NOT_STARTED: 353 errmsg = "xmlParsePI : no target name\n"; 354 break; 355 case XML_ERR_RESERVED_XML_NAME: 356 errmsg = "Invalid PI name\n"; 357 break; 358 case XML_ERR_NOTATION_NOT_STARTED: 359 errmsg = "NOTATION: Name expected here\n"; 360 break; 361 case XML_ERR_NOTATION_NOT_FINISHED: 362 errmsg = "'>' required to close NOTATION declaration\n"; 363 break; 364 case XML_ERR_VALUE_REQUIRED: 365 errmsg = "Entity value required\n"; 366 break; 367 case XML_ERR_URI_FRAGMENT: 368 errmsg = "Fragment not allowed"; 369 break; 370 case XML_ERR_ATTLIST_NOT_STARTED: 371 errmsg = "'(' required to start ATTLIST enumeration\n"; 372 break; 373 case XML_ERR_NMTOKEN_REQUIRED: 374 errmsg = "NmToken expected in ATTLIST enumeration\n"; 375 break; 376 case XML_ERR_ATTLIST_NOT_FINISHED: 377 errmsg = "')' required to finish ATTLIST enumeration\n"; 378 break; 379 case XML_ERR_MIXED_NOT_STARTED: 380 errmsg = "MixedContentDecl : '|' or ')*' expected\n"; 381 break; 382 case XML_ERR_PCDATA_REQUIRED: 383 errmsg = "MixedContentDecl : '#PCDATA' expected\n"; 384 break; 385 case XML_ERR_ELEMCONTENT_NOT_STARTED: 386 errmsg = "ContentDecl : Name or '(' expected\n"; 387 break; 388 case XML_ERR_ELEMCONTENT_NOT_FINISHED: 389 errmsg = "ContentDecl : ',' '|' or ')' expected\n"; 390 break; 391 case XML_ERR_PEREF_IN_INT_SUBSET: 392 errmsg = 393 "PEReference: forbidden within markup decl in internal subset\n"; 394 break; 395 case XML_ERR_GT_REQUIRED: 396 errmsg = "expected '>'\n"; 397 break; 398 case XML_ERR_CONDSEC_INVALID: 399 errmsg = "XML conditional section '[' expected\n"; 400 break; 401 case XML_ERR_EXT_SUBSET_NOT_FINISHED: 402 errmsg = "Content error in the external subset\n"; 403 break; 404 case XML_ERR_CONDSEC_INVALID_KEYWORD: 405 errmsg = 406 "conditional section INCLUDE or IGNORE keyword expected\n"; 407 break; 408 case XML_ERR_CONDSEC_NOT_FINISHED: 409 errmsg = "XML conditional section not closed\n"; 410 break; 411 case XML_ERR_XMLDECL_NOT_STARTED: 412 errmsg = "Text declaration '<?xml' required\n"; 413 break; 414 case XML_ERR_XMLDECL_NOT_FINISHED: 415 errmsg = "parsing XML declaration: '?>' expected\n"; 416 break; 417 case XML_ERR_EXT_ENTITY_STANDALONE: 418 errmsg = "external parsed entities cannot be standalone\n"; 419 break; 420 case XML_ERR_ENTITYREF_SEMICOL_MISSING: 421 errmsg = "EntityRef: expecting ';'\n"; 422 break; 423 case XML_ERR_DOCTYPE_NOT_FINISHED: 424 errmsg = "DOCTYPE improperly terminated\n"; 425 break; 426 case XML_ERR_LTSLASH_REQUIRED: 427 errmsg = "EndTag: '</' not found\n"; 428 break; 429 case XML_ERR_EQUAL_REQUIRED: 430 errmsg = "expected '='\n"; 431 break; 432 case XML_ERR_STRING_NOT_CLOSED: 433 errmsg = "String not closed expecting \" or '\n"; 434 break; 435 case XML_ERR_STRING_NOT_STARTED: 436 errmsg = "String not started expecting ' or \"\n"; 437 break; 438 case XML_ERR_ENCODING_NAME: 439 errmsg = "Invalid XML encoding name\n"; 440 break; 441 case XML_ERR_STANDALONE_VALUE: 442 errmsg = "standalone accepts only 'yes' or 'no'\n"; 443 break; 444 case XML_ERR_DOCUMENT_EMPTY: 445 errmsg = "Document is empty\n"; 446 break; 447 case XML_ERR_DOCUMENT_END: 448 errmsg = "Extra content at the end of the document\n"; 449 break; 450 case XML_ERR_NOT_WELL_BALANCED: 451 errmsg = "chunk is not well balanced\n"; 452 break; 453 case XML_ERR_EXTRA_CONTENT: 454 errmsg = "extra content at the end of well balanced chunk\n"; 455 break; 456 case XML_ERR_VERSION_MISSING: 457 errmsg = "Malformed declaration expecting version\n"; 458 break; 459#if 0 460 case: 461 errmsg = "\n"; 462 break; 463#endif 464 default: 465 errmsg = "Unregistered error message\n"; 466 } 467 if (ctxt != NULL) 468 ctxt->errNo = error; 469 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 470 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg, 471 info); 472 if (ctxt != NULL) { 473 ctxt->wellFormed = 0; 474 if (ctxt->recovery == 0) 475 ctxt->disableSAX = 1; 476 } 477} 478 479/** 480 * xmlFatalErrMsg: 481 * @ctxt: an XML parser context 482 * @error: the error number 483 * @msg: the error message 484 * 485 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 486 */ 487static void 488xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 489 const char *msg) 490{ 491 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 492 (ctxt->instate == XML_PARSER_EOF)) 493 return; 494 if (ctxt != NULL) 495 ctxt->errNo = error; 496 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 497 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg); 498 if (ctxt != NULL) { 499 ctxt->wellFormed = 0; 500 if (ctxt->recovery == 0) 501 ctxt->disableSAX = 1; 502 } 503} 504 505/** 506 * xmlWarningMsg: 507 * @ctxt: an XML parser context 508 * @error: the error number 509 * @msg: the error message 510 * @str1: extra data 511 * @str2: extra data 512 * 513 * Handle a warning. 514 */ 515static void 516xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 517 const char *msg, const xmlChar *str1, const xmlChar *str2) 518{ 519 xmlStructuredErrorFunc schannel = NULL; 520 521 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 522 (ctxt->instate == XML_PARSER_EOF)) 523 return; 524 if ((ctxt != NULL) && (ctxt->sax != NULL) && 525 (ctxt->sax->initialized == XML_SAX2_MAGIC)) 526 schannel = ctxt->sax->serror; 527 if (ctxt != NULL) { 528 __xmlRaiseError(schannel, 529 (ctxt->sax) ? ctxt->sax->warning : NULL, 530 ctxt->userData, 531 ctxt, NULL, XML_FROM_PARSER, error, 532 XML_ERR_WARNING, NULL, 0, 533 (const char *) str1, (const char *) str2, NULL, 0, 0, 534 msg, (const char *) str1, (const char *) str2); 535 } else { 536 __xmlRaiseError(schannel, NULL, NULL, 537 ctxt, NULL, XML_FROM_PARSER, error, 538 XML_ERR_WARNING, NULL, 0, 539 (const char *) str1, (const char *) str2, NULL, 0, 0, 540 msg, (const char *) str1, (const char *) str2); 541 } 542} 543 544/** 545 * xmlValidityError: 546 * @ctxt: an XML parser context 547 * @error: the error number 548 * @msg: the error message 549 * @str1: extra data 550 * 551 * Handle a validity error. 552 */ 553static void 554xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error, 555 const char *msg, const xmlChar *str1, const xmlChar *str2) 556{ 557 xmlStructuredErrorFunc schannel = NULL; 558 559 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 560 (ctxt->instate == XML_PARSER_EOF)) 561 return; 562 if (ctxt != NULL) { 563 ctxt->errNo = error; 564 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC)) 565 schannel = ctxt->sax->serror; 566 } 567 if (ctxt != NULL) { 568 __xmlRaiseError(schannel, 569 ctxt->vctxt.error, ctxt->vctxt.userData, 570 ctxt, NULL, XML_FROM_DTD, error, 571 XML_ERR_ERROR, NULL, 0, (const char *) str1, 572 (const char *) str2, NULL, 0, 0, 573 msg, (const char *) str1, (const char *) str2); 574 ctxt->valid = 0; 575 } else { 576 __xmlRaiseError(schannel, NULL, NULL, 577 ctxt, NULL, XML_FROM_DTD, error, 578 XML_ERR_ERROR, NULL, 0, (const char *) str1, 579 (const char *) str2, NULL, 0, 0, 580 msg, (const char *) str1, (const char *) str2); 581 } 582} 583 584/** 585 * xmlFatalErrMsgInt: 586 * @ctxt: an XML parser context 587 * @error: the error number 588 * @msg: the error message 589 * @val: an integer value 590 * 591 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 592 */ 593static void 594xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 595 const char *msg, int val) 596{ 597 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 598 (ctxt->instate == XML_PARSER_EOF)) 599 return; 600 if (ctxt != NULL) 601 ctxt->errNo = error; 602 __xmlRaiseError(NULL, NULL, NULL, 603 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 604 NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 605 if (ctxt != NULL) { 606 ctxt->wellFormed = 0; 607 if (ctxt->recovery == 0) 608 ctxt->disableSAX = 1; 609 } 610} 611 612/** 613 * xmlFatalErrMsgStrIntStr: 614 * @ctxt: an XML parser context 615 * @error: the error number 616 * @msg: the error message 617 * @str1: an string info 618 * @val: an integer value 619 * @str2: an string info 620 * 621 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 622 */ 623static void 624xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 625 const char *msg, const xmlChar *str1, int val, 626 const xmlChar *str2) 627{ 628 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 629 (ctxt->instate == XML_PARSER_EOF)) 630 return; 631 if (ctxt != NULL) 632 ctxt->errNo = error; 633 __xmlRaiseError(NULL, NULL, NULL, 634 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 635 NULL, 0, (const char *) str1, (const char *) str2, 636 NULL, val, 0, msg, str1, val, str2); 637 if (ctxt != NULL) { 638 ctxt->wellFormed = 0; 639 if (ctxt->recovery == 0) 640 ctxt->disableSAX = 1; 641 } 642} 643 644/** 645 * xmlFatalErrMsgStr: 646 * @ctxt: an XML parser context 647 * @error: the error number 648 * @msg: the error message 649 * @val: a string value 650 * 651 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 652 */ 653static void 654xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 655 const char *msg, const xmlChar * val) 656{ 657 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 658 (ctxt->instate == XML_PARSER_EOF)) 659 return; 660 if (ctxt != NULL) 661 ctxt->errNo = error; 662 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 663 XML_FROM_PARSER, error, XML_ERR_FATAL, 664 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 665 val); 666 if (ctxt != NULL) { 667 ctxt->wellFormed = 0; 668 if (ctxt->recovery == 0) 669 ctxt->disableSAX = 1; 670 } 671} 672 673/** 674 * xmlErrMsgStr: 675 * @ctxt: an XML parser context 676 * @error: the error number 677 * @msg: the error message 678 * @val: a string value 679 * 680 * Handle a non fatal parser error 681 */ 682static void 683xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 684 const char *msg, const xmlChar * val) 685{ 686 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 687 (ctxt->instate == XML_PARSER_EOF)) 688 return; 689 if (ctxt != NULL) 690 ctxt->errNo = error; 691 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 692 XML_FROM_PARSER, error, XML_ERR_ERROR, 693 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 694 val); 695} 696 697/** 698 * xmlNsErr: 699 * @ctxt: an XML parser context 700 * @error: the error number 701 * @msg: the message 702 * @info1: extra information string 703 * @info2: extra information string 704 * 705 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 706 */ 707static void 708xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 709 const char *msg, 710 const xmlChar * info1, const xmlChar * info2, 711 const xmlChar * info3) 712{ 713 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 714 (ctxt->instate == XML_PARSER_EOF)) 715 return; 716 if (ctxt != NULL) 717 ctxt->errNo = error; 718 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 719 XML_ERR_ERROR, NULL, 0, (const char *) info1, 720 (const char *) info2, (const char *) info3, 0, 0, msg, 721 info1, info2, info3); 722 if (ctxt != NULL) 723 ctxt->nsWellFormed = 0; 724} 725 726/** 727 * xmlNsWarn 728 * @ctxt: an XML parser context 729 * @error: the error number 730 * @msg: the message 731 * @info1: extra information string 732 * @info2: extra information string 733 * 734 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 735 */ 736static void 737xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error, 738 const char *msg, 739 const xmlChar * info1, const xmlChar * info2, 740 const xmlChar * info3) 741{ 742 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 743 (ctxt->instate == XML_PARSER_EOF)) 744 return; 745 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 746 XML_ERR_WARNING, NULL, 0, (const char *) info1, 747 (const char *) info2, (const char *) info3, 0, 0, msg, 748 info1, info2, info3); 749} 750 751/************************************************************************ 752 * * 753 * Library wide options * 754 * * 755 ************************************************************************/ 756 757/** 758 * xmlHasFeature: 759 * @feature: the feature to be examined 760 * 761 * Examines if the library has been compiled with a given feature. 762 * 763 * Returns a non-zero value if the feature exist, otherwise zero. 764 * Returns zero (0) if the feature does not exist or an unknown 765 * unknown feature is requested, non-zero otherwise. 766 */ 767int 768xmlHasFeature(xmlFeature feature) 769{ 770 switch (feature) { 771 case XML_WITH_THREAD: 772#ifdef LIBXML_THREAD_ENABLED 773 return(1); 774#else 775 return(0); 776#endif 777 case XML_WITH_TREE: 778#ifdef LIBXML_TREE_ENABLED 779 return(1); 780#else 781 return(0); 782#endif 783 case XML_WITH_OUTPUT: 784#ifdef LIBXML_OUTPUT_ENABLED 785 return(1); 786#else 787 return(0); 788#endif 789 case XML_WITH_PUSH: 790#ifdef LIBXML_PUSH_ENABLED 791 return(1); 792#else 793 return(0); 794#endif 795 case XML_WITH_READER: 796#ifdef LIBXML_READER_ENABLED 797 return(1); 798#else 799 return(0); 800#endif 801 case XML_WITH_PATTERN: 802#ifdef LIBXML_PATTERN_ENABLED 803 return(1); 804#else 805 return(0); 806#endif 807 case XML_WITH_WRITER: 808#ifdef LIBXML_WRITER_ENABLED 809 return(1); 810#else 811 return(0); 812#endif 813 case XML_WITH_SAX1: 814#ifdef LIBXML_SAX1_ENABLED 815 return(1); 816#else 817 return(0); 818#endif 819 case XML_WITH_FTP: 820#ifdef LIBXML_FTP_ENABLED 821 return(1); 822#else 823 return(0); 824#endif 825 case XML_WITH_HTTP: 826#ifdef LIBXML_HTTP_ENABLED 827 return(1); 828#else 829 return(0); 830#endif 831 case XML_WITH_VALID: 832#ifdef LIBXML_VALID_ENABLED 833 return(1); 834#else 835 return(0); 836#endif 837 case XML_WITH_HTML: 838#ifdef LIBXML_HTML_ENABLED 839 return(1); 840#else 841 return(0); 842#endif 843 case XML_WITH_LEGACY: 844#ifdef LIBXML_LEGACY_ENABLED 845 return(1); 846#else 847 return(0); 848#endif 849 case XML_WITH_C14N: 850#ifdef LIBXML_C14N_ENABLED 851 return(1); 852#else 853 return(0); 854#endif 855 case XML_WITH_CATALOG: 856#ifdef LIBXML_CATALOG_ENABLED 857 return(1); 858#else 859 return(0); 860#endif 861 case XML_WITH_XPATH: 862#ifdef LIBXML_XPATH_ENABLED 863 return(1); 864#else 865 return(0); 866#endif 867 case XML_WITH_XPTR: 868#ifdef LIBXML_XPTR_ENABLED 869 return(1); 870#else 871 return(0); 872#endif 873 case XML_WITH_XINCLUDE: 874#ifdef LIBXML_XINCLUDE_ENABLED 875 return(1); 876#else 877 return(0); 878#endif 879 case XML_WITH_ICONV: 880#ifdef LIBXML_ICONV_ENABLED 881 return(1); 882#else 883 return(0); 884#endif 885 case XML_WITH_ISO8859X: 886#ifdef LIBXML_ISO8859X_ENABLED 887 return(1); 888#else 889 return(0); 890#endif 891 case XML_WITH_UNICODE: 892#ifdef LIBXML_UNICODE_ENABLED 893 return(1); 894#else 895 return(0); 896#endif 897 case XML_WITH_REGEXP: 898#ifdef LIBXML_REGEXP_ENABLED 899 return(1); 900#else 901 return(0); 902#endif 903 case XML_WITH_AUTOMATA: 904#ifdef LIBXML_AUTOMATA_ENABLED 905 return(1); 906#else 907 return(0); 908#endif 909 case XML_WITH_EXPR: 910#ifdef LIBXML_EXPR_ENABLED 911 return(1); 912#else 913 return(0); 914#endif 915 case XML_WITH_SCHEMAS: 916#ifdef LIBXML_SCHEMAS_ENABLED 917 return(1); 918#else 919 return(0); 920#endif 921 case XML_WITH_SCHEMATRON: 922#ifdef LIBXML_SCHEMATRON_ENABLED 923 return(1); 924#else 925 return(0); 926#endif 927 case XML_WITH_MODULES: 928#ifdef LIBXML_MODULES_ENABLED 929 return(1); 930#else 931 return(0); 932#endif 933 case XML_WITH_DEBUG: 934#ifdef LIBXML_DEBUG_ENABLED 935 return(1); 936#else 937 return(0); 938#endif 939 case XML_WITH_DEBUG_MEM: 940#ifdef DEBUG_MEMORY_LOCATION 941 return(1); 942#else 943 return(0); 944#endif 945 case XML_WITH_DEBUG_RUN: 946#ifdef LIBXML_DEBUG_RUNTIME 947 return(1); 948#else 949 return(0); 950#endif 951 case XML_WITH_ZLIB: 952#ifdef LIBXML_ZLIB_ENABLED 953 return(1); 954#else 955 return(0); 956#endif 957 default: 958 break; 959 } 960 return(0); 961} 962 963/************************************************************************ 964 * * 965 * SAX2 defaulted attributes handling * 966 * * 967 ************************************************************************/ 968 969/** 970 * xmlDetectSAX2: 971 * @ctxt: an XML parser context 972 * 973 * Do the SAX2 detection and specific intialization 974 */ 975static void 976xmlDetectSAX2(xmlParserCtxtPtr ctxt) { 977 if (ctxt == NULL) return; 978#ifdef LIBXML_SAX1_ENABLED 979 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) && 980 ((ctxt->sax->startElementNs != NULL) || 981 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1; 982#else 983 ctxt->sax2 = 1; 984#endif /* LIBXML_SAX1_ENABLED */ 985 986 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 987 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 988 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 989 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) || 990 (ctxt->str_xml_ns == NULL)) { 991 xmlErrMemory(ctxt, NULL); 992 } 993} 994 995typedef struct _xmlDefAttrs xmlDefAttrs; 996typedef xmlDefAttrs *xmlDefAttrsPtr; 997struct _xmlDefAttrs { 998 int nbAttrs; /* number of defaulted attributes on that element */ 999 int maxAttrs; /* the size of the array */ 1000 const xmlChar *values[5]; /* array of localname/prefix/values/external */ 1001}; 1002 1003/** 1004 * xmlAttrNormalizeSpace: 1005 * @src: the source string 1006 * @dst: the target string 1007 * 1008 * Normalize the space in non CDATA attribute values: 1009 * If the attribute type is not CDATA, then the XML processor MUST further 1010 * process the normalized attribute value by discarding any leading and 1011 * trailing space (#x20) characters, and by replacing sequences of space 1012 * (#x20) characters by a single space (#x20) character. 1013 * Note that the size of dst need to be at least src, and if one doesn't need 1014 * to preserve dst (and it doesn't come from a dictionary or read-only) then 1015 * passing src as dst is just fine. 1016 * 1017 * Returns a pointer to the normalized value (dst) or NULL if no conversion 1018 * is needed. 1019 */ 1020static xmlChar * 1021xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst) 1022{ 1023 if ((src == NULL) || (dst == NULL)) 1024 return(NULL); 1025 1026 while (*src == 0x20) src++; 1027 while (*src != 0) { 1028 if (*src == 0x20) { 1029 while (*src == 0x20) src++; 1030 if (*src != 0) 1031 *dst++ = 0x20; 1032 } else { 1033 *dst++ = *src++; 1034 } 1035 } 1036 *dst = 0; 1037 if (dst == src) 1038 return(NULL); 1039 return(dst); 1040} 1041 1042/** 1043 * xmlAttrNormalizeSpace2: 1044 * @src: the source string 1045 * 1046 * Normalize the space in non CDATA attribute values, a slightly more complex 1047 * front end to avoid allocation problems when running on attribute values 1048 * coming from the input. 1049 * 1050 * Returns a pointer to the normalized value (dst) or NULL if no conversion 1051 * is needed. 1052 */ 1053static const xmlChar * 1054xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len) 1055{ 1056 int i; 1057 int remove_head = 0; 1058 int need_realloc = 0; 1059 const xmlChar *cur; 1060 1061 if ((ctxt == NULL) || (src == NULL) || (len == NULL)) 1062 return(NULL); 1063 i = *len; 1064 if (i <= 0) 1065 return(NULL); 1066 1067 cur = src; 1068 while (*cur == 0x20) { 1069 cur++; 1070 remove_head++; 1071 } 1072 while (*cur != 0) { 1073 if (*cur == 0x20) { 1074 cur++; 1075 if ((*cur == 0x20) || (*cur == 0)) { 1076 need_realloc = 1; 1077 break; 1078 } 1079 } else 1080 cur++; 1081 } 1082 if (need_realloc) { 1083 xmlChar *ret; 1084 1085 ret = xmlStrndup(src + remove_head, i - remove_head + 1); 1086 if (ret == NULL) { 1087 xmlErrMemory(ctxt, NULL); 1088 return(NULL); 1089 } 1090 xmlAttrNormalizeSpace(ret, ret); 1091 *len = (int) strlen((const char *)ret); 1092 return(ret); 1093 } else if (remove_head) { 1094 *len -= remove_head; 1095 memmove(src, src + remove_head, 1 + *len); 1096 return(src); 1097 } 1098 return(NULL); 1099} 1100 1101/** 1102 * xmlAddDefAttrs: 1103 * @ctxt: an XML parser context 1104 * @fullname: the element fullname 1105 * @fullattr: the attribute fullname 1106 * @value: the attribute value 1107 * 1108 * Add a defaulted attribute for an element 1109 */ 1110static void 1111xmlAddDefAttrs(xmlParserCtxtPtr ctxt, 1112 const xmlChar *fullname, 1113 const xmlChar *fullattr, 1114 const xmlChar *value) { 1115 xmlDefAttrsPtr defaults; 1116 int len; 1117 const xmlChar *name; 1118 const xmlChar *prefix; 1119 1120 /* 1121 * Allows to detect attribute redefinitions 1122 */ 1123 if (ctxt->attsSpecial != NULL) { 1124 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1125 return; 1126 } 1127 1128 if (ctxt->attsDefault == NULL) { 1129 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict); 1130 if (ctxt->attsDefault == NULL) 1131 goto mem_error; 1132 } 1133 1134 /* 1135 * split the element name into prefix:localname , the string found 1136 * are within the DTD and then not associated to namespace names. 1137 */ 1138 name = xmlSplitQName3(fullname, &len); 1139 if (name == NULL) { 1140 name = xmlDictLookup(ctxt->dict, fullname, -1); 1141 prefix = NULL; 1142 } else { 1143 name = xmlDictLookup(ctxt->dict, name, -1); 1144 prefix = xmlDictLookup(ctxt->dict, fullname, len); 1145 } 1146 1147 /* 1148 * make sure there is some storage 1149 */ 1150 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix); 1151 if (defaults == NULL) { 1152 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) + 1153 (4 * 5) * sizeof(const xmlChar *)); 1154 if (defaults == NULL) 1155 goto mem_error; 1156 defaults->nbAttrs = 0; 1157 defaults->maxAttrs = 4; 1158 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1159 defaults, NULL) < 0) { 1160 xmlFree(defaults); 1161 goto mem_error; 1162 } 1163 } else if (defaults->nbAttrs >= defaults->maxAttrs) { 1164 xmlDefAttrsPtr temp; 1165 1166 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) + 1167 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *)); 1168 if (temp == NULL) 1169 goto mem_error; 1170 defaults = temp; 1171 defaults->maxAttrs *= 2; 1172 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, 1173 defaults, NULL) < 0) { 1174 xmlFree(defaults); 1175 goto mem_error; 1176 } 1177 } 1178 1179 /* 1180 * Split the element name into prefix:localname , the string found 1181 * are within the DTD and hen not associated to namespace names. 1182 */ 1183 name = xmlSplitQName3(fullattr, &len); 1184 if (name == NULL) { 1185 name = xmlDictLookup(ctxt->dict, fullattr, -1); 1186 prefix = NULL; 1187 } else { 1188 name = xmlDictLookup(ctxt->dict, name, -1); 1189 prefix = xmlDictLookup(ctxt->dict, fullattr, len); 1190 } 1191 1192 defaults->values[5 * defaults->nbAttrs] = name; 1193 defaults->values[5 * defaults->nbAttrs + 1] = prefix; 1194 /* intern the string and precompute the end */ 1195 len = xmlStrlen(value); 1196 value = xmlDictLookup(ctxt->dict, value, len); 1197 defaults->values[5 * defaults->nbAttrs + 2] = value; 1198 defaults->values[5 * defaults->nbAttrs + 3] = value + len; 1199 if (ctxt->external) 1200 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external"; 1201 else 1202 defaults->values[5 * defaults->nbAttrs + 4] = NULL; 1203 defaults->nbAttrs++; 1204 1205 return; 1206 1207mem_error: 1208 xmlErrMemory(ctxt, NULL); 1209 return; 1210} 1211 1212/** 1213 * xmlAddSpecialAttr: 1214 * @ctxt: an XML parser context 1215 * @fullname: the element fullname 1216 * @fullattr: the attribute fullname 1217 * @type: the attribute type 1218 * 1219 * Register this attribute type 1220 */ 1221static void 1222xmlAddSpecialAttr(xmlParserCtxtPtr ctxt, 1223 const xmlChar *fullname, 1224 const xmlChar *fullattr, 1225 int type) 1226{ 1227 if (ctxt->attsSpecial == NULL) { 1228 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict); 1229 if (ctxt->attsSpecial == NULL) 1230 goto mem_error; 1231 } 1232 1233 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) 1234 return; 1235 1236 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr, 1237 (void *) (long) type); 1238 return; 1239 1240mem_error: 1241 xmlErrMemory(ctxt, NULL); 1242 return; 1243} 1244 1245/** 1246 * xmlCleanSpecialAttrCallback: 1247 * 1248 * Removes CDATA attributes from the special attribute table 1249 */ 1250static void 1251xmlCleanSpecialAttrCallback(void *payload, void *data, 1252 const xmlChar *fullname, const xmlChar *fullattr, 1253 const xmlChar *unused ATTRIBUTE_UNUSED) { 1254 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data; 1255 1256 if (((long) payload) == XML_ATTRIBUTE_CDATA) { 1257 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL); 1258 } 1259} 1260 1261/** 1262 * xmlCleanSpecialAttr: 1263 * @ctxt: an XML parser context 1264 * 1265 * Trim the list of attributes defined to remove all those of type 1266 * CDATA as they are not special. This call should be done when finishing 1267 * to parse the DTD and before starting to parse the document root. 1268 */ 1269static void 1270xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt) 1271{ 1272 if (ctxt->attsSpecial == NULL) 1273 return; 1274 1275 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt); 1276 1277 if (xmlHashSize(ctxt->attsSpecial) == 0) { 1278 xmlHashFree(ctxt->attsSpecial, NULL); 1279 ctxt->attsSpecial = NULL; 1280 } 1281 return; 1282} 1283 1284/** 1285 * xmlCheckLanguageID: 1286 * @lang: pointer to the string value 1287 * 1288 * Checks that the value conforms to the LanguageID production: 1289 * 1290 * NOTE: this is somewhat deprecated, those productions were removed from 1291 * the XML Second edition. 1292 * 1293 * [33] LanguageID ::= Langcode ('-' Subcode)* 1294 * [34] Langcode ::= ISO639Code | IanaCode | UserCode 1295 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z]) 1296 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+ 1297 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+ 1298 * [38] Subcode ::= ([a-z] | [A-Z])+ 1299 * 1300 * Returns 1 if correct 0 otherwise 1301 **/ 1302int 1303xmlCheckLanguageID(const xmlChar * lang) 1304{ 1305 const xmlChar *cur = lang; 1306 1307 if (cur == NULL) 1308 return (0); 1309 if (((cur[0] == 'i') && (cur[1] == '-')) || 1310 ((cur[0] == 'I') && (cur[1] == '-'))) { 1311 /* 1312 * IANA code 1313 */ 1314 cur += 2; 1315 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */ 1316 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1317 cur++; 1318 } else if (((cur[0] == 'x') && (cur[1] == '-')) || 1319 ((cur[0] == 'X') && (cur[1] == '-'))) { 1320 /* 1321 * User code 1322 */ 1323 cur += 2; 1324 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */ 1325 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1326 cur++; 1327 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 1328 ((cur[0] >= 'a') && (cur[0] <= 'z'))) { 1329 /* 1330 * ISO639 1331 */ 1332 cur++; 1333 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 1334 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1335 cur++; 1336 else 1337 return (0); 1338 } else 1339 return (0); 1340 while (cur[0] != 0) { /* non input consuming */ 1341 if (cur[0] != '-') 1342 return (0); 1343 cur++; 1344 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 1345 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1346 cur++; 1347 else 1348 return (0); 1349 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */ 1350 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1351 cur++; 1352 } 1353 return (1); 1354} 1355 1356/************************************************************************ 1357 * * 1358 * Parser stacks related functions and macros * 1359 * * 1360 ************************************************************************/ 1361 1362static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, 1363 const xmlChar ** str); 1364 1365#ifdef SAX2 1366/** 1367 * nsPush: 1368 * @ctxt: an XML parser context 1369 * @prefix: the namespace prefix or NULL 1370 * @URL: the namespace name 1371 * 1372 * Pushes a new parser namespace on top of the ns stack 1373 * 1374 * Returns -1 in case of error, -2 if the namespace should be discarded 1375 * and the index in the stack otherwise. 1376 */ 1377static int 1378nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL) 1379{ 1380 if (ctxt->options & XML_PARSE_NSCLEAN) { 1381 int i; 1382 for (i = 0;i < ctxt->nsNr;i += 2) { 1383 if (ctxt->nsTab[i] == prefix) { 1384 /* in scope */ 1385 if (ctxt->nsTab[i + 1] == URL) 1386 return(-2); 1387 /* out of scope keep it */ 1388 break; 1389 } 1390 } 1391 } 1392 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) { 1393 ctxt->nsMax = 10; 1394 ctxt->nsNr = 0; 1395 ctxt->nsTab = (const xmlChar **) 1396 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *)); 1397 if (ctxt->nsTab == NULL) { 1398 xmlErrMemory(ctxt, NULL); 1399 ctxt->nsMax = 0; 1400 return (-1); 1401 } 1402 } else if (ctxt->nsNr >= ctxt->nsMax) { 1403 const xmlChar ** tmp; 1404 ctxt->nsMax *= 2; 1405 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab, 1406 ctxt->nsMax * sizeof(ctxt->nsTab[0])); 1407 if (tmp == NULL) { 1408 xmlErrMemory(ctxt, NULL); 1409 ctxt->nsMax /= 2; 1410 return (-1); 1411 } 1412 ctxt->nsTab = tmp; 1413 } 1414 ctxt->nsTab[ctxt->nsNr++] = prefix; 1415 ctxt->nsTab[ctxt->nsNr++] = URL; 1416 return (ctxt->nsNr); 1417} 1418/** 1419 * nsPop: 1420 * @ctxt: an XML parser context 1421 * @nr: the number to pop 1422 * 1423 * Pops the top @nr parser prefix/namespace from the ns stack 1424 * 1425 * Returns the number of namespaces removed 1426 */ 1427static int 1428nsPop(xmlParserCtxtPtr ctxt, int nr) 1429{ 1430 int i; 1431 1432 if (ctxt->nsTab == NULL) return(0); 1433 if (ctxt->nsNr < nr) { 1434 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr); 1435 nr = ctxt->nsNr; 1436 } 1437 if (ctxt->nsNr <= 0) 1438 return (0); 1439 1440 for (i = 0;i < nr;i++) { 1441 ctxt->nsNr--; 1442 ctxt->nsTab[ctxt->nsNr] = NULL; 1443 } 1444 return(nr); 1445} 1446#endif 1447 1448static int 1449xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) { 1450 const xmlChar **atts; 1451 int *attallocs; 1452 int maxatts; 1453 1454 if (ctxt->atts == NULL) { 1455 maxatts = 55; /* allow for 10 attrs by default */ 1456 atts = (const xmlChar **) 1457 xmlMalloc(maxatts * sizeof(xmlChar *)); 1458 if (atts == NULL) goto mem_error; 1459 ctxt->atts = atts; 1460 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int)); 1461 if (attallocs == NULL) goto mem_error; 1462 ctxt->attallocs = attallocs; 1463 ctxt->maxatts = maxatts; 1464 } else if (nr + 5 > ctxt->maxatts) { 1465 maxatts = (nr + 5) * 2; 1466 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts, 1467 maxatts * sizeof(const xmlChar *)); 1468 if (atts == NULL) goto mem_error; 1469 ctxt->atts = atts; 1470 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs, 1471 (maxatts / 5) * sizeof(int)); 1472 if (attallocs == NULL) goto mem_error; 1473 ctxt->attallocs = attallocs; 1474 ctxt->maxatts = maxatts; 1475 } 1476 return(ctxt->maxatts); 1477mem_error: 1478 xmlErrMemory(ctxt, NULL); 1479 return(-1); 1480} 1481 1482/** 1483 * inputPush: 1484 * @ctxt: an XML parser context 1485 * @value: the parser input 1486 * 1487 * Pushes a new parser input on top of the input stack 1488 * 1489 * Returns -1 in case of error, the index in the stack otherwise 1490 */ 1491int 1492inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value) 1493{ 1494 if ((ctxt == NULL) || (value == NULL)) 1495 return(-1); 1496 if (ctxt->inputNr >= ctxt->inputMax) { 1497 ctxt->inputMax *= 2; 1498 ctxt->inputTab = 1499 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab, 1500 ctxt->inputMax * 1501 sizeof(ctxt->inputTab[0])); 1502 if (ctxt->inputTab == NULL) { 1503 xmlErrMemory(ctxt, NULL); 1504 xmlFreeInputStream(value); 1505 ctxt->inputMax /= 2; 1506 value = NULL; 1507 return (-1); 1508 } 1509 } 1510 ctxt->inputTab[ctxt->inputNr] = value; 1511 ctxt->input = value; 1512 return (ctxt->inputNr++); 1513} 1514/** 1515 * inputPop: 1516 * @ctxt: an XML parser context 1517 * 1518 * Pops the top parser input from the input stack 1519 * 1520 * Returns the input just removed 1521 */ 1522xmlParserInputPtr 1523inputPop(xmlParserCtxtPtr ctxt) 1524{ 1525 xmlParserInputPtr ret; 1526 1527 if (ctxt == NULL) 1528 return(NULL); 1529 if (ctxt->inputNr <= 0) 1530 return (NULL); 1531 ctxt->inputNr--; 1532 if (ctxt->inputNr > 0) 1533 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1]; 1534 else 1535 ctxt->input = NULL; 1536 ret = ctxt->inputTab[ctxt->inputNr]; 1537 ctxt->inputTab[ctxt->inputNr] = NULL; 1538 return (ret); 1539} 1540/** 1541 * nodePush: 1542 * @ctxt: an XML parser context 1543 * @value: the element node 1544 * 1545 * Pushes a new element node on top of the node stack 1546 * 1547 * Returns -1 in case of error, the index in the stack otherwise 1548 */ 1549int 1550nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value) 1551{ 1552 if (ctxt == NULL) return(0); 1553 if (ctxt->nodeNr >= ctxt->nodeMax) { 1554 xmlNodePtr *tmp; 1555 1556 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab, 1557 ctxt->nodeMax * 2 * 1558 sizeof(ctxt->nodeTab[0])); 1559 if (tmp == NULL) { 1560 xmlErrMemory(ctxt, NULL); 1561 return (-1); 1562 } 1563 ctxt->nodeTab = tmp; 1564 ctxt->nodeMax *= 2; 1565 } 1566 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) && 1567 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 1568 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 1569 "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 1570 xmlParserMaxDepth); 1571 ctxt->instate = XML_PARSER_EOF; 1572 return(-1); 1573 } 1574 ctxt->nodeTab[ctxt->nodeNr] = value; 1575 ctxt->node = value; 1576 return (ctxt->nodeNr++); 1577} 1578 1579/** 1580 * nodePop: 1581 * @ctxt: an XML parser context 1582 * 1583 * Pops the top element node from the node stack 1584 * 1585 * Returns the node just removed 1586 */ 1587xmlNodePtr 1588nodePop(xmlParserCtxtPtr ctxt) 1589{ 1590 xmlNodePtr ret; 1591 1592 if (ctxt == NULL) return(NULL); 1593 if (ctxt->nodeNr <= 0) 1594 return (NULL); 1595 ctxt->nodeNr--; 1596 if (ctxt->nodeNr > 0) 1597 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1]; 1598 else 1599 ctxt->node = NULL; 1600 ret = ctxt->nodeTab[ctxt->nodeNr]; 1601 ctxt->nodeTab[ctxt->nodeNr] = NULL; 1602 return (ret); 1603} 1604 1605#ifdef LIBXML_PUSH_ENABLED 1606/** 1607 * nameNsPush: 1608 * @ctxt: an XML parser context 1609 * @value: the element name 1610 * @prefix: the element prefix 1611 * @URI: the element namespace name 1612 * 1613 * Pushes a new element name/prefix/URL on top of the name stack 1614 * 1615 * Returns -1 in case of error, the index in the stack otherwise 1616 */ 1617static int 1618nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value, 1619 const xmlChar *prefix, const xmlChar *URI, int nsNr) 1620{ 1621 if (ctxt->nameNr >= ctxt->nameMax) { 1622 const xmlChar * *tmp; 1623 void **tmp2; 1624 ctxt->nameMax *= 2; 1625 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1626 ctxt->nameMax * 1627 sizeof(ctxt->nameTab[0])); 1628 if (tmp == NULL) { 1629 ctxt->nameMax /= 2; 1630 goto mem_error; 1631 } 1632 ctxt->nameTab = tmp; 1633 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab, 1634 ctxt->nameMax * 3 * 1635 sizeof(ctxt->pushTab[0])); 1636 if (tmp2 == NULL) { 1637 ctxt->nameMax /= 2; 1638 goto mem_error; 1639 } 1640 ctxt->pushTab = tmp2; 1641 } 1642 ctxt->nameTab[ctxt->nameNr] = value; 1643 ctxt->name = value; 1644 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix; 1645 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI; 1646 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr; 1647 return (ctxt->nameNr++); 1648mem_error: 1649 xmlErrMemory(ctxt, NULL); 1650 return (-1); 1651} 1652/** 1653 * nameNsPop: 1654 * @ctxt: an XML parser context 1655 * 1656 * Pops the top element/prefix/URI name from the name stack 1657 * 1658 * Returns the name just removed 1659 */ 1660static const xmlChar * 1661nameNsPop(xmlParserCtxtPtr ctxt) 1662{ 1663 const xmlChar *ret; 1664 1665 if (ctxt->nameNr <= 0) 1666 return (NULL); 1667 ctxt->nameNr--; 1668 if (ctxt->nameNr > 0) 1669 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1670 else 1671 ctxt->name = NULL; 1672 ret = ctxt->nameTab[ctxt->nameNr]; 1673 ctxt->nameTab[ctxt->nameNr] = NULL; 1674 return (ret); 1675} 1676#endif /* LIBXML_PUSH_ENABLED */ 1677 1678/** 1679 * namePush: 1680 * @ctxt: an XML parser context 1681 * @value: the element name 1682 * 1683 * Pushes a new element name on top of the name stack 1684 * 1685 * Returns -1 in case of error, the index in the stack otherwise 1686 */ 1687int 1688namePush(xmlParserCtxtPtr ctxt, const xmlChar * value) 1689{ 1690 if (ctxt == NULL) return (-1); 1691 1692 if (ctxt->nameNr >= ctxt->nameMax) { 1693 const xmlChar * *tmp; 1694 ctxt->nameMax *= 2; 1695 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1696 ctxt->nameMax * 1697 sizeof(ctxt->nameTab[0])); 1698 if (tmp == NULL) { 1699 ctxt->nameMax /= 2; 1700 goto mem_error; 1701 } 1702 ctxt->nameTab = tmp; 1703 } 1704 ctxt->nameTab[ctxt->nameNr] = value; 1705 ctxt->name = value; 1706 return (ctxt->nameNr++); 1707mem_error: 1708 xmlErrMemory(ctxt, NULL); 1709 return (-1); 1710} 1711/** 1712 * namePop: 1713 * @ctxt: an XML parser context 1714 * 1715 * Pops the top element name from the name stack 1716 * 1717 * Returns the name just removed 1718 */ 1719const xmlChar * 1720namePop(xmlParserCtxtPtr ctxt) 1721{ 1722 const xmlChar *ret; 1723 1724 if ((ctxt == NULL) || (ctxt->nameNr <= 0)) 1725 return (NULL); 1726 ctxt->nameNr--; 1727 if (ctxt->nameNr > 0) 1728 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1729 else 1730 ctxt->name = NULL; 1731 ret = ctxt->nameTab[ctxt->nameNr]; 1732 ctxt->nameTab[ctxt->nameNr] = NULL; 1733 return (ret); 1734} 1735 1736static int spacePush(xmlParserCtxtPtr ctxt, int val) { 1737 if (ctxt->spaceNr >= ctxt->spaceMax) { 1738 int *tmp; 1739 1740 ctxt->spaceMax *= 2; 1741 tmp = (int *) xmlRealloc(ctxt->spaceTab, 1742 ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); 1743 if (tmp == NULL) { 1744 xmlErrMemory(ctxt, NULL); 1745 ctxt->spaceMax /=2; 1746 return(-1); 1747 } 1748 ctxt->spaceTab = tmp; 1749 } 1750 ctxt->spaceTab[ctxt->spaceNr] = val; 1751 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; 1752 return(ctxt->spaceNr++); 1753} 1754 1755static int spacePop(xmlParserCtxtPtr ctxt) { 1756 int ret; 1757 if (ctxt->spaceNr <= 0) return(0); 1758 ctxt->spaceNr--; 1759 if (ctxt->spaceNr > 0) 1760 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; 1761 else 1762 ctxt->space = &ctxt->spaceTab[0]; 1763 ret = ctxt->spaceTab[ctxt->spaceNr]; 1764 ctxt->spaceTab[ctxt->spaceNr] = -1; 1765 return(ret); 1766} 1767 1768/* 1769 * Macros for accessing the content. Those should be used only by the parser, 1770 * and not exported. 1771 * 1772 * Dirty macros, i.e. one often need to make assumption on the context to 1773 * use them 1774 * 1775 * CUR_PTR return the current pointer to the xmlChar to be parsed. 1776 * To be used with extreme caution since operations consuming 1777 * characters may move the input buffer to a different location ! 1778 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled 1779 * This should be used internally by the parser 1780 * only to compare to ASCII values otherwise it would break when 1781 * running with UTF-8 encoding. 1782 * RAW same as CUR but in the input buffer, bypass any token 1783 * extraction that may have been done 1784 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only 1785 * to compare on ASCII based substring. 1786 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined 1787 * strings without newlines within the parser. 1788 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII 1789 * defined char within the parser. 1790 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding 1791 * 1792 * NEXT Skip to the next character, this does the proper decoding 1793 * in UTF-8 mode. It also pop-up unfinished entities on the fly. 1794 * NEXTL(l) Skip the current unicode character of l xmlChars long. 1795 * CUR_CHAR(l) returns the current unicode character (int), set l 1796 * to the number of xmlChars used for the encoding [0-5]. 1797 * CUR_SCHAR same but operate on a string instead of the context 1798 * COPY_BUF copy the current unicode char to the target buffer, increment 1799 * the index 1800 * GROW, SHRINK handling of input buffers 1801 */ 1802 1803#define RAW (*ctxt->input->cur) 1804#define CUR (*ctxt->input->cur) 1805#define NXT(val) ctxt->input->cur[(val)] 1806#define CUR_PTR ctxt->input->cur 1807 1808#define CMP4( s, c1, c2, c3, c4 ) \ 1809 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \ 1810 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 ) 1811#define CMP5( s, c1, c2, c3, c4, c5 ) \ 1812 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 ) 1813#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \ 1814 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 ) 1815#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \ 1816 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 ) 1817#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \ 1818 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 ) 1819#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \ 1820 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \ 1821 ((unsigned char *) s)[ 8 ] == c9 ) 1822#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \ 1823 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \ 1824 ((unsigned char *) s)[ 9 ] == c10 ) 1825 1826#define SKIP(val) do { \ 1827 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \ 1828 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 1829 if ((*ctxt->input->cur == 0) && \ 1830 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 1831 xmlPopInput(ctxt); \ 1832 } while (0) 1833 1834#define SKIPL(val) do { \ 1835 int skipl; \ 1836 for(skipl=0; skipl<val; skipl++) { \ 1837 if (*(ctxt->input->cur) == '\n') { \ 1838 ctxt->input->line++; ctxt->input->col = 1; \ 1839 } else ctxt->input->col++; \ 1840 ctxt->nbChars++; \ 1841 ctxt->input->cur++; \ 1842 } \ 1843 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 1844 if ((*ctxt->input->cur == 0) && \ 1845 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 1846 xmlPopInput(ctxt); \ 1847 } while (0) 1848 1849#define SHRINK if ((ctxt->progressive == 0) && \ 1850 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \ 1851 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \ 1852 xmlSHRINK (ctxt); 1853 1854static void xmlSHRINK (xmlParserCtxtPtr ctxt) { 1855 xmlParserInputShrink(ctxt->input); 1856 if ((*ctxt->input->cur == 0) && 1857 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 1858 xmlPopInput(ctxt); 1859 } 1860 1861#define GROW if ((ctxt->progressive == 0) && \ 1862 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \ 1863 xmlGROW (ctxt); 1864 1865static void xmlGROW (xmlParserCtxtPtr ctxt) { 1866 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 1867 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) && 1868 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 1869 xmlPopInput(ctxt); 1870} 1871 1872#define SKIP_BLANKS xmlSkipBlankChars(ctxt) 1873 1874#define NEXT xmlNextChar(ctxt) 1875 1876#define NEXT1 { \ 1877 ctxt->input->col++; \ 1878 ctxt->input->cur++; \ 1879 ctxt->nbChars++; \ 1880 if (*ctxt->input->cur == 0) \ 1881 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 1882 } 1883 1884#define NEXTL(l) do { \ 1885 if (*(ctxt->input->cur) == '\n') { \ 1886 ctxt->input->line++; ctxt->input->col = 1; \ 1887 } else ctxt->input->col++; \ 1888 ctxt->input->cur += l; \ 1889 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 1890 } while (0) 1891 1892#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) 1893#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) 1894 1895#define COPY_BUF(l,b,i,v) \ 1896 if (l == 1) b[i++] = (xmlChar) v; \ 1897 else i += xmlCopyCharMultiByte(&b[i],v) 1898 1899/** 1900 * xmlSkipBlankChars: 1901 * @ctxt: the XML parser context 1902 * 1903 * skip all blanks character found at that point in the input streams. 1904 * It pops up finished entities in the process if allowable at that point. 1905 * 1906 * Returns the number of space chars skipped 1907 */ 1908 1909int 1910xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { 1911 int res = 0; 1912 1913 /* 1914 * It's Okay to use CUR/NEXT here since all the blanks are on 1915 * the ASCII range. 1916 */ 1917 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) { 1918 const xmlChar *cur; 1919 /* 1920 * if we are in the document content, go really fast 1921 */ 1922 cur = ctxt->input->cur; 1923 while (IS_BLANK_CH(*cur)) { 1924 if (*cur == '\n') { 1925 ctxt->input->line++; ctxt->input->col = 1; 1926 } 1927 cur++; 1928 res++; 1929 if (*cur == 0) { 1930 ctxt->input->cur = cur; 1931 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 1932 cur = ctxt->input->cur; 1933 } 1934 } 1935 ctxt->input->cur = cur; 1936 } else { 1937 int cur; 1938 do { 1939 cur = CUR; 1940 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */ 1941 NEXT; 1942 cur = CUR; 1943 res++; 1944 } 1945 while ((cur == 0) && (ctxt->inputNr > 1) && 1946 (ctxt->instate != XML_PARSER_COMMENT)) { 1947 xmlPopInput(ctxt); 1948 cur = CUR; 1949 } 1950 /* 1951 * Need to handle support of entities branching here 1952 */ 1953 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); 1954 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */ 1955 } 1956 return(res); 1957} 1958 1959/************************************************************************ 1960 * * 1961 * Commodity functions to handle entities * 1962 * * 1963 ************************************************************************/ 1964 1965/** 1966 * xmlPopInput: 1967 * @ctxt: an XML parser context 1968 * 1969 * xmlPopInput: the current input pointed by ctxt->input came to an end 1970 * pop it and return the next char. 1971 * 1972 * Returns the current xmlChar in the parser context 1973 */ 1974xmlChar 1975xmlPopInput(xmlParserCtxtPtr ctxt) { 1976 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0); 1977 if (xmlParserDebugEntities) 1978 xmlGenericError(xmlGenericErrorContext, 1979 "Popping input %d\n", ctxt->inputNr); 1980 xmlFreeInputStream(inputPop(ctxt)); 1981 if ((*ctxt->input->cur == 0) && 1982 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 1983 return(xmlPopInput(ctxt)); 1984 return(CUR); 1985} 1986 1987/** 1988 * xmlPushInput: 1989 * @ctxt: an XML parser context 1990 * @input: an XML parser input fragment (entity, XML fragment ...). 1991 * 1992 * xmlPushInput: switch to a new input stream which is stacked on top 1993 * of the previous one(s). 1994 * Returns -1 in case of error or the index in the input stack 1995 */ 1996int 1997xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { 1998 int ret; 1999 if (input == NULL) return(-1); 2000 2001 if (xmlParserDebugEntities) { 2002 if ((ctxt->input != NULL) && (ctxt->input->filename)) 2003 xmlGenericError(xmlGenericErrorContext, 2004 "%s(%d): ", ctxt->input->filename, 2005 ctxt->input->line); 2006 xmlGenericError(xmlGenericErrorContext, 2007 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); 2008 } 2009 ret = inputPush(ctxt, input); 2010 GROW; 2011 return(ret); 2012} 2013 2014/** 2015 * xmlParseCharRef: 2016 * @ctxt: an XML parser context 2017 * 2018 * parse Reference declarations 2019 * 2020 * [66] CharRef ::= '&#' [0-9]+ ';' | 2021 * '&#x' [0-9a-fA-F]+ ';' 2022 * 2023 * [ WFC: Legal Character ] 2024 * Characters referred to using character references must match the 2025 * production for Char. 2026 * 2027 * Returns the value parsed (as an int), 0 in case of error 2028 */ 2029int 2030xmlParseCharRef(xmlParserCtxtPtr ctxt) { 2031 unsigned int val = 0; 2032 int count = 0; 2033 unsigned int outofrange = 0; 2034 2035 /* 2036 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here 2037 */ 2038 if ((RAW == '&') && (NXT(1) == '#') && 2039 (NXT(2) == 'x')) { 2040 SKIP(3); 2041 GROW; 2042 while (RAW != ';') { /* loop blocked by count */ 2043 if (count++ > 20) { 2044 count = 0; 2045 GROW; 2046 } 2047 if ((RAW >= '0') && (RAW <= '9')) 2048 val = val * 16 + (CUR - '0'); 2049 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) 2050 val = val * 16 + (CUR - 'a') + 10; 2051 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) 2052 val = val * 16 + (CUR - 'A') + 10; 2053 else { 2054 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2055 val = 0; 2056 break; 2057 } 2058 if (val > 0x10FFFF) 2059 outofrange = val; 2060 2061 NEXT; 2062 count++; 2063 } 2064 if (RAW == ';') { 2065 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2066 ctxt->input->col++; 2067 ctxt->nbChars ++; 2068 ctxt->input->cur++; 2069 } 2070 } else if ((RAW == '&') && (NXT(1) == '#')) { 2071 SKIP(2); 2072 GROW; 2073 while (RAW != ';') { /* loop blocked by count */ 2074 if (count++ > 20) { 2075 count = 0; 2076 GROW; 2077 } 2078 if ((RAW >= '0') && (RAW <= '9')) 2079 val = val * 10 + (CUR - '0'); 2080 else { 2081 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2082 val = 0; 2083 break; 2084 } 2085 if (val > 0x10FFFF) 2086 outofrange = val; 2087 2088 NEXT; 2089 count++; 2090 } 2091 if (RAW == ';') { 2092 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 2093 ctxt->input->col++; 2094 ctxt->nbChars ++; 2095 ctxt->input->cur++; 2096 } 2097 } else { 2098 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2099 } 2100 2101 /* 2102 * [ WFC: Legal Character ] 2103 * Characters referred to using character references must match the 2104 * production for Char. 2105 */ 2106 if ((IS_CHAR(val) && (outofrange == 0))) { 2107 return(val); 2108 } else { 2109 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2110 "xmlParseCharRef: invalid xmlChar value %d\n", 2111 val); 2112 } 2113 return(0); 2114} 2115 2116/** 2117 * xmlParseStringCharRef: 2118 * @ctxt: an XML parser context 2119 * @str: a pointer to an index in the string 2120 * 2121 * parse Reference declarations, variant parsing from a string rather 2122 * than an an input flow. 2123 * 2124 * [66] CharRef ::= '&#' [0-9]+ ';' | 2125 * '&#x' [0-9a-fA-F]+ ';' 2126 * 2127 * [ WFC: Legal Character ] 2128 * Characters referred to using character references must match the 2129 * production for Char. 2130 * 2131 * Returns the value parsed (as an int), 0 in case of error, str will be 2132 * updated to the current value of the index 2133 */ 2134static int 2135xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { 2136 const xmlChar *ptr; 2137 xmlChar cur; 2138 unsigned int val = 0; 2139 unsigned int outofrange = 0; 2140 2141 if ((str == NULL) || (*str == NULL)) return(0); 2142 ptr = *str; 2143 cur = *ptr; 2144 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { 2145 ptr += 3; 2146 cur = *ptr; 2147 while (cur != ';') { /* Non input consuming loop */ 2148 if ((cur >= '0') && (cur <= '9')) 2149 val = val * 16 + (cur - '0'); 2150 else if ((cur >= 'a') && (cur <= 'f')) 2151 val = val * 16 + (cur - 'a') + 10; 2152 else if ((cur >= 'A') && (cur <= 'F')) 2153 val = val * 16 + (cur - 'A') + 10; 2154 else { 2155 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 2156 val = 0; 2157 break; 2158 } 2159 if (val > 0x10FFFF) 2160 outofrange = val; 2161 2162 ptr++; 2163 cur = *ptr; 2164 } 2165 if (cur == ';') 2166 ptr++; 2167 } else if ((cur == '&') && (ptr[1] == '#')){ 2168 ptr += 2; 2169 cur = *ptr; 2170 while (cur != ';') { /* Non input consuming loops */ 2171 if ((cur >= '0') && (cur <= '9')) 2172 val = val * 10 + (cur - '0'); 2173 else { 2174 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 2175 val = 0; 2176 break; 2177 } 2178 if (val > 0x10FFFF) 2179 outofrange = val; 2180 2181 ptr++; 2182 cur = *ptr; 2183 } 2184 if (cur == ';') 2185 ptr++; 2186 } else { 2187 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 2188 return(0); 2189 } 2190 *str = ptr; 2191 2192 /* 2193 * [ WFC: Legal Character ] 2194 * Characters referred to using character references must match the 2195 * production for Char. 2196 */ 2197 if ((IS_CHAR(val) && (outofrange == 0))) { 2198 return(val); 2199 } else { 2200 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 2201 "xmlParseStringCharRef: invalid xmlChar value %d\n", 2202 val); 2203 } 2204 return(0); 2205} 2206 2207/** 2208 * xmlNewBlanksWrapperInputStream: 2209 * @ctxt: an XML parser context 2210 * @entity: an Entity pointer 2211 * 2212 * Create a new input stream for wrapping 2213 * blanks around a PEReference 2214 * 2215 * Returns the new input stream or NULL 2216 */ 2217 2218static void deallocblankswrapper (xmlChar *str) {xmlFree(str);} 2219 2220static xmlParserInputPtr 2221xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 2222 xmlParserInputPtr input; 2223 xmlChar *buffer; 2224 size_t length; 2225 if (entity == NULL) { 2226 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 2227 "xmlNewBlanksWrapperInputStream entity\n"); 2228 return(NULL); 2229 } 2230 if (xmlParserDebugEntities) 2231 xmlGenericError(xmlGenericErrorContext, 2232 "new blanks wrapper for entity: %s\n", entity->name); 2233 input = xmlNewInputStream(ctxt); 2234 if (input == NULL) { 2235 return(NULL); 2236 } 2237 length = xmlStrlen(entity->name) + 5; 2238 buffer = xmlMallocAtomic(length); 2239 if (buffer == NULL) { 2240 xmlErrMemory(ctxt, NULL); 2241 xmlFree(input); 2242 return(NULL); 2243 } 2244 buffer [0] = ' '; 2245 buffer [1] = '%'; 2246 buffer [length-3] = ';'; 2247 buffer [length-2] = ' '; 2248 buffer [length-1] = 0; 2249 memcpy(buffer + 2, entity->name, length - 5); 2250 input->free = deallocblankswrapper; 2251 input->base = buffer; 2252 input->cur = buffer; 2253 input->length = length; 2254 input->end = &buffer[length]; 2255 return(input); 2256} 2257 2258/** 2259 * xmlParserHandlePEReference: 2260 * @ctxt: the parser context 2261 * 2262 * [69] PEReference ::= '%' Name ';' 2263 * 2264 * [ WFC: No Recursion ] 2265 * A parsed entity must not contain a recursive 2266 * reference to itself, either directly or indirectly. 2267 * 2268 * [ WFC: Entity Declared ] 2269 * In a document without any DTD, a document with only an internal DTD 2270 * subset which contains no parameter entity references, or a document 2271 * with "standalone='yes'", ... ... The declaration of a parameter 2272 * entity must precede any reference to it... 2273 * 2274 * [ VC: Entity Declared ] 2275 * In a document with an external subset or external parameter entities 2276 * with "standalone='no'", ... ... The declaration of a parameter entity 2277 * must precede any reference to it... 2278 * 2279 * [ WFC: In DTD ] 2280 * Parameter-entity references may only appear in the DTD. 2281 * NOTE: misleading but this is handled. 2282 * 2283 * A PEReference may have been detected in the current input stream 2284 * the handling is done accordingly to 2285 * http://www.w3.org/TR/REC-xml#entproc 2286 * i.e. 2287 * - Included in literal in entity values 2288 * - Included as Parameter Entity reference within DTDs 2289 */ 2290void 2291xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { 2292 const xmlChar *name; 2293 xmlEntityPtr entity = NULL; 2294 xmlParserInputPtr input; 2295 2296 if (RAW != '%') return; 2297 switch(ctxt->instate) { 2298 case XML_PARSER_CDATA_SECTION: 2299 return; 2300 case XML_PARSER_COMMENT: 2301 return; 2302 case XML_PARSER_START_TAG: 2303 return; 2304 case XML_PARSER_END_TAG: 2305 return; 2306 case XML_PARSER_EOF: 2307 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL); 2308 return; 2309 case XML_PARSER_PROLOG: 2310 case XML_PARSER_START: 2311 case XML_PARSER_MISC: 2312 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL); 2313 return; 2314 case XML_PARSER_ENTITY_DECL: 2315 case XML_PARSER_CONTENT: 2316 case XML_PARSER_ATTRIBUTE_VALUE: 2317 case XML_PARSER_PI: 2318 case XML_PARSER_SYSTEM_LITERAL: 2319 case XML_PARSER_PUBLIC_LITERAL: 2320 /* we just ignore it there */ 2321 return; 2322 case XML_PARSER_EPILOG: 2323 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL); 2324 return; 2325 case XML_PARSER_ENTITY_VALUE: 2326 /* 2327 * NOTE: in the case of entity values, we don't do the 2328 * substitution here since we need the literal 2329 * entity value to be able to save the internal 2330 * subset of the document. 2331 * This will be handled by xmlStringDecodeEntities 2332 */ 2333 return; 2334 case XML_PARSER_DTD: 2335 /* 2336 * [WFC: Well-Formedness Constraint: PEs in Internal Subset] 2337 * In the internal DTD subset, parameter-entity references 2338 * can occur only where markup declarations can occur, not 2339 * within markup declarations. 2340 * In that case this is handled in xmlParseMarkupDecl 2341 */ 2342 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 2343 return; 2344 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0) 2345 return; 2346 break; 2347 case XML_PARSER_IGNORE: 2348 return; 2349 } 2350 2351 NEXT; 2352 name = xmlParseName(ctxt); 2353 if (xmlParserDebugEntities) 2354 xmlGenericError(xmlGenericErrorContext, 2355 "PEReference: %s\n", name); 2356 if (name == NULL) { 2357 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL); 2358 } else { 2359 if (RAW == ';') { 2360 NEXT; 2361 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) 2362 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 2363 if (entity == NULL) { 2364 2365 /* 2366 * [ WFC: Entity Declared ] 2367 * In a document without any DTD, a document with only an 2368 * internal DTD subset which contains no parameter entity 2369 * references, or a document with "standalone='yes'", ... 2370 * ... The declaration of a parameter entity must precede 2371 * any reference to it... 2372 */ 2373 if ((ctxt->standalone == 1) || 2374 ((ctxt->hasExternalSubset == 0) && 2375 (ctxt->hasPErefs == 0))) { 2376 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 2377 "PEReference: %%%s; not found\n", name); 2378 } else { 2379 /* 2380 * [ VC: Entity Declared ] 2381 * In a document with an external subset or external 2382 * parameter entities with "standalone='no'", ... 2383 * ... The declaration of a parameter entity must precede 2384 * any reference to it... 2385 */ 2386 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) { 2387 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY, 2388 "PEReference: %%%s; not found\n", 2389 name, NULL); 2390 } else 2391 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 2392 "PEReference: %%%s; not found\n", 2393 name, NULL); 2394 ctxt->valid = 0; 2395 } 2396 } else if (ctxt->input->free != deallocblankswrapper) { 2397 input = xmlNewBlanksWrapperInputStream(ctxt, entity); 2398 if (xmlPushInput(ctxt, input) < 0) 2399 return; 2400 } else { 2401 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || 2402 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) { 2403 xmlChar start[4]; 2404 xmlCharEncoding enc; 2405 2406 /* 2407 * handle the extra spaces added before and after 2408 * c.f. http://www.w3.org/TR/REC-xml#as-PE 2409 * this is done independently. 2410 */ 2411 input = xmlNewEntityInputStream(ctxt, entity); 2412 if (xmlPushInput(ctxt, input) < 0) 2413 return; 2414 2415 /* 2416 * Get the 4 first bytes and decode the charset 2417 * if enc != XML_CHAR_ENCODING_NONE 2418 * plug some encoding conversion routines. 2419 * Note that, since we may have some non-UTF8 2420 * encoding (like UTF16, bug 135229), the 'length' 2421 * is not known, but we can calculate based upon 2422 * the amount of data in the buffer. 2423 */ 2424 GROW 2425 if ((ctxt->input->end - ctxt->input->cur)>=4) { 2426 start[0] = RAW; 2427 start[1] = NXT(1); 2428 start[2] = NXT(2); 2429 start[3] = NXT(3); 2430 enc = xmlDetectCharEncoding(start, 4); 2431 if (enc != XML_CHAR_ENCODING_NONE) { 2432 xmlSwitchEncoding(ctxt, enc); 2433 } 2434 } 2435 2436 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 2437 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) && 2438 (IS_BLANK_CH(NXT(5)))) { 2439 xmlParseTextDecl(ctxt); 2440 } 2441 } else { 2442 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 2443 "PEReference: %s is not a parameter entity\n", 2444 name); 2445 } 2446 } 2447 } else { 2448 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL); 2449 } 2450 } 2451} 2452 2453/* 2454 * Macro used to grow the current buffer. 2455 */ 2456#define growBuffer(buffer, n) { \ 2457 xmlChar *tmp; \ 2458 buffer##_size *= 2; \ 2459 buffer##_size += n; \ 2460 tmp = (xmlChar *) \ 2461 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \ 2462 if (tmp == NULL) goto mem_error; \ 2463 buffer = tmp; \ 2464} 2465 2466/** 2467 * xmlStringLenDecodeEntities: 2468 * @ctxt: the parser context 2469 * @str: the input string 2470 * @len: the string length 2471 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2472 * @end: an end marker xmlChar, 0 if none 2473 * @end2: an end marker xmlChar, 0 if none 2474 * @end3: an end marker xmlChar, 0 if none 2475 * 2476 * Takes a entity string content and process to do the adequate substitutions. 2477 * 2478 * [67] Reference ::= EntityRef | CharRef 2479 * 2480 * [69] PEReference ::= '%' Name ';' 2481 * 2482 * Returns A newly allocated string with the substitution done. The caller 2483 * must deallocate it ! 2484 */ 2485xmlChar * 2486xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2487 int what, xmlChar end, xmlChar end2, xmlChar end3) { 2488 xmlChar *buffer = NULL; 2489 int buffer_size = 0; 2490 2491 xmlChar *current = NULL; 2492 xmlChar *rep = NULL; 2493 const xmlChar *last; 2494 xmlEntityPtr ent; 2495 int c,l; 2496 int nbchars = 0; 2497 2498 if ((ctxt == NULL) || (str == NULL) || (len < 0)) 2499 return(NULL); 2500 last = str + len; 2501 2502 if (((ctxt->depth > 40) && 2503 ((ctxt->options & XML_PARSE_HUGE) == 0)) || 2504 (ctxt->depth > 1024)) { 2505 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 2506 return(NULL); 2507 } 2508 2509 /* 2510 * allocate a translation buffer. 2511 */ 2512 buffer_size = XML_PARSER_BIG_BUFFER_SIZE; 2513 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar)); 2514 if (buffer == NULL) goto mem_error; 2515 2516 /* 2517 * OK loop until we reach one of the ending char or a size limit. 2518 * we are operating on already parsed values. 2519 */ 2520 if (str < last) 2521 c = CUR_SCHAR(str, l); 2522 else 2523 c = 0; 2524 while ((c != 0) && (c != end) && /* non input consuming loop */ 2525 (c != end2) && (c != end3)) { 2526 2527 if (c == 0) break; 2528 if ((c == '&') && (str[1] == '#')) { 2529 int val = xmlParseStringCharRef(ctxt, &str); 2530 if (val != 0) { 2531 COPY_BUF(0,buffer,nbchars,val); 2532 } 2533 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { 2534 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2535 } 2536 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { 2537 if (xmlParserDebugEntities) 2538 xmlGenericError(xmlGenericErrorContext, 2539 "String decoding Entity Reference: %.30s\n", 2540 str); 2541 ent = xmlParseStringEntityRef(ctxt, &str); 2542 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) || 2543 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR)) 2544 goto int_error; 2545 if (ent != NULL) 2546 ctxt->nbentities += ent->checked; 2547 if ((ent != NULL) && 2548 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 2549 if (ent->content != NULL) { 2550 COPY_BUF(0,buffer,nbchars,ent->content[0]); 2551 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { 2552 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2553 } 2554 } else { 2555 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 2556 "predefined entity has no content\n"); 2557 } 2558 } else if ((ent != NULL) && (ent->content != NULL)) { 2559 ctxt->depth++; 2560 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2561 0, 0, 0); 2562 ctxt->depth--; 2563 2564 if (rep != NULL) { 2565 current = rep; 2566 while (*current != 0) { /* non input consuming loop */ 2567 buffer[nbchars++] = *current++; 2568 if (nbchars > 2569 buffer_size - XML_PARSER_BUFFER_SIZE) { 2570 if (xmlParserEntityCheck(ctxt, nbchars, ent)) 2571 goto int_error; 2572 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2573 } 2574 } 2575 xmlFree(rep); 2576 rep = NULL; 2577 } 2578 } else if (ent != NULL) { 2579 int i = xmlStrlen(ent->name); 2580 const xmlChar *cur = ent->name; 2581 2582 buffer[nbchars++] = '&'; 2583 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) { 2584 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2585 } 2586 for (;i > 0;i--) 2587 buffer[nbchars++] = *cur++; 2588 buffer[nbchars++] = ';'; 2589 } 2590 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { 2591 if (xmlParserDebugEntities) 2592 xmlGenericError(xmlGenericErrorContext, 2593 "String decoding PE Reference: %.30s\n", str); 2594 ent = xmlParseStringPEReference(ctxt, &str); 2595 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) 2596 goto int_error; 2597 if (ent != NULL) 2598 ctxt->nbentities += ent->checked; 2599 if (ent != NULL) { 2600 if (ent->content == NULL) { 2601 xmlLoadEntityContent(ctxt, ent); 2602 } 2603 ctxt->depth++; 2604 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2605 0, 0, 0); 2606 ctxt->depth--; 2607 if (rep != NULL) { 2608 current = rep; 2609 while (*current != 0) { /* non input consuming loop */ 2610 buffer[nbchars++] = *current++; 2611 if (nbchars > 2612 buffer_size - XML_PARSER_BUFFER_SIZE) { 2613 if (xmlParserEntityCheck(ctxt, nbchars, ent)) 2614 goto int_error; 2615 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2616 } 2617 } 2618 xmlFree(rep); 2619 rep = NULL; 2620 } 2621 } 2622 } else { 2623 COPY_BUF(l,buffer,nbchars,c); 2624 str += l; 2625 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { 2626 growBuffer(buffer, XML_PARSER_BUFFER_SIZE); 2627 } 2628 } 2629 if (str < last) 2630 c = CUR_SCHAR(str, l); 2631 else 2632 c = 0; 2633 } 2634 buffer[nbchars] = 0; 2635 return(buffer); 2636 2637mem_error: 2638 xmlErrMemory(ctxt, NULL); 2639int_error: 2640 if (rep != NULL) 2641 xmlFree(rep); 2642 if (buffer != NULL) 2643 xmlFree(buffer); 2644 return(NULL); 2645} 2646 2647/** 2648 * xmlStringDecodeEntities: 2649 * @ctxt: the parser context 2650 * @str: the input string 2651 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2652 * @end: an end marker xmlChar, 0 if none 2653 * @end2: an end marker xmlChar, 0 if none 2654 * @end3: an end marker xmlChar, 0 if none 2655 * 2656 * Takes a entity string content and process to do the adequate substitutions. 2657 * 2658 * [67] Reference ::= EntityRef | CharRef 2659 * 2660 * [69] PEReference ::= '%' Name ';' 2661 * 2662 * Returns A newly allocated string with the substitution done. The caller 2663 * must deallocate it ! 2664 */ 2665xmlChar * 2666xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, 2667 xmlChar end, xmlChar end2, xmlChar end3) { 2668 if ((ctxt == NULL) || (str == NULL)) return(NULL); 2669 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what, 2670 end, end2, end3)); 2671} 2672 2673/************************************************************************ 2674 * * 2675 * Commodity functions, cleanup needed ? * 2676 * * 2677 ************************************************************************/ 2678 2679/** 2680 * areBlanks: 2681 * @ctxt: an XML parser context 2682 * @str: a xmlChar * 2683 * @len: the size of @str 2684 * @blank_chars: we know the chars are blanks 2685 * 2686 * Is this a sequence of blank chars that one can ignore ? 2687 * 2688 * Returns 1 if ignorable 0 otherwise. 2689 */ 2690 2691static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2692 int blank_chars) { 2693 int i, ret; 2694 xmlNodePtr lastChild; 2695 2696 /* 2697 * Don't spend time trying to differentiate them, the same callback is 2698 * used ! 2699 */ 2700 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters) 2701 return(0); 2702 2703 /* 2704 * Check for xml:space value. 2705 */ 2706 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) || 2707 (*(ctxt->space) == -2)) 2708 return(0); 2709 2710 /* 2711 * Check that the string is made of blanks 2712 */ 2713 if (blank_chars == 0) { 2714 for (i = 0;i < len;i++) 2715 if (!(IS_BLANK_CH(str[i]))) return(0); 2716 } 2717 2718 /* 2719 * Look if the element is mixed content in the DTD if available 2720 */ 2721 if (ctxt->node == NULL) return(0); 2722 if (ctxt->myDoc != NULL) { 2723 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); 2724 if (ret == 0) return(1); 2725 if (ret == 1) return(0); 2726 } 2727 2728 /* 2729 * Otherwise, heuristic :-\ 2730 */ 2731 if ((RAW != '<') && (RAW != 0xD)) return(0); 2732 if ((ctxt->node->children == NULL) && 2733 (RAW == '<') && (NXT(1) == '/')) return(0); 2734 2735 lastChild = xmlGetLastChild(ctxt->node); 2736 if (lastChild == NULL) { 2737 if ((ctxt->node->type != XML_ELEMENT_NODE) && 2738 (ctxt->node->content != NULL)) return(0); 2739 } else if (xmlNodeIsText(lastChild)) 2740 return(0); 2741 else if ((ctxt->node->children != NULL) && 2742 (xmlNodeIsText(ctxt->node->children))) 2743 return(0); 2744 return(1); 2745} 2746 2747/************************************************************************ 2748 * * 2749 * Extra stuff for namespace support * 2750 * Relates to http://www.w3.org/TR/WD-xml-names * 2751 * * 2752 ************************************************************************/ 2753 2754/** 2755 * xmlSplitQName: 2756 * @ctxt: an XML parser context 2757 * @name: an XML parser context 2758 * @prefix: a xmlChar ** 2759 * 2760 * parse an UTF8 encoded XML qualified name string 2761 * 2762 * [NS 5] QName ::= (Prefix ':')? LocalPart 2763 * 2764 * [NS 6] Prefix ::= NCName 2765 * 2766 * [NS 7] LocalPart ::= NCName 2767 * 2768 * Returns the local part, and prefix is updated 2769 * to get the Prefix if any. 2770 */ 2771 2772xmlChar * 2773xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { 2774 xmlChar buf[XML_MAX_NAMELEN + 5]; 2775 xmlChar *buffer = NULL; 2776 int len = 0; 2777 int max = XML_MAX_NAMELEN; 2778 xmlChar *ret = NULL; 2779 const xmlChar *cur = name; 2780 int c; 2781 2782 if (prefix == NULL) return(NULL); 2783 *prefix = NULL; 2784 2785 if (cur == NULL) return(NULL); 2786 2787#ifndef XML_XML_NAMESPACE 2788 /* xml: prefix is not really a namespace */ 2789 if ((cur[0] == 'x') && (cur[1] == 'm') && 2790 (cur[2] == 'l') && (cur[3] == ':')) 2791 return(xmlStrdup(name)); 2792#endif 2793 2794 /* nasty but well=formed */ 2795 if (cur[0] == ':') 2796 return(xmlStrdup(name)); 2797 2798 c = *cur++; 2799 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ 2800 buf[len++] = c; 2801 c = *cur++; 2802 } 2803 if (len >= max) { 2804 /* 2805 * Okay someone managed to make a huge name, so he's ready to pay 2806 * for the processing speed. 2807 */ 2808 max = len * 2; 2809 2810 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 2811 if (buffer == NULL) { 2812 xmlErrMemory(ctxt, NULL); 2813 return(NULL); 2814 } 2815 memcpy(buffer, buf, len); 2816 while ((c != 0) && (c != ':')) { /* tested bigname.xml */ 2817 if (len + 10 > max) { 2818 xmlChar *tmp; 2819 2820 max *= 2; 2821 tmp = (xmlChar *) xmlRealloc(buffer, 2822 max * sizeof(xmlChar)); 2823 if (tmp == NULL) { 2824 xmlFree(buffer); 2825 xmlErrMemory(ctxt, NULL); 2826 return(NULL); 2827 } 2828 buffer = tmp; 2829 } 2830 buffer[len++] = c; 2831 c = *cur++; 2832 } 2833 buffer[len] = 0; 2834 } 2835 2836 if ((c == ':') && (*cur == 0)) { 2837 if (buffer != NULL) 2838 xmlFree(buffer); 2839 *prefix = NULL; 2840 return(xmlStrdup(name)); 2841 } 2842 2843 if (buffer == NULL) 2844 ret = xmlStrndup(buf, len); 2845 else { 2846 ret = buffer; 2847 buffer = NULL; 2848 max = XML_MAX_NAMELEN; 2849 } 2850 2851 2852 if (c == ':') { 2853 c = *cur; 2854 *prefix = ret; 2855 if (c == 0) { 2856 return(xmlStrndup(BAD_CAST "", 0)); 2857 } 2858 len = 0; 2859 2860 /* 2861 * Check that the first character is proper to start 2862 * a new name 2863 */ 2864 if (!(((c >= 0x61) && (c <= 0x7A)) || 2865 ((c >= 0x41) && (c <= 0x5A)) || 2866 (c == '_') || (c == ':'))) { 2867 int l; 2868 int first = CUR_SCHAR(cur, l); 2869 2870 if (!IS_LETTER(first) && (first != '_')) { 2871 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME, 2872 "Name %s is not XML Namespace compliant\n", 2873 name); 2874 } 2875 } 2876 cur++; 2877 2878 while ((c != 0) && (len < max)) { /* tested bigname2.xml */ 2879 buf[len++] = c; 2880 c = *cur++; 2881 } 2882 if (len >= max) { 2883 /* 2884 * Okay someone managed to make a huge name, so he's ready to pay 2885 * for the processing speed. 2886 */ 2887 max = len * 2; 2888 2889 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 2890 if (buffer == NULL) { 2891 xmlErrMemory(ctxt, NULL); 2892 return(NULL); 2893 } 2894 memcpy(buffer, buf, len); 2895 while (c != 0) { /* tested bigname2.xml */ 2896 if (len + 10 > max) { 2897 xmlChar *tmp; 2898 2899 max *= 2; 2900 tmp = (xmlChar *) xmlRealloc(buffer, 2901 max * sizeof(xmlChar)); 2902 if (tmp == NULL) { 2903 xmlErrMemory(ctxt, NULL); 2904 xmlFree(buffer); 2905 return(NULL); 2906 } 2907 buffer = tmp; 2908 } 2909 buffer[len++] = c; 2910 c = *cur++; 2911 } 2912 buffer[len] = 0; 2913 } 2914 2915 if (buffer == NULL) 2916 ret = xmlStrndup(buf, len); 2917 else { 2918 ret = buffer; 2919 } 2920 } 2921 2922 return(ret); 2923} 2924 2925/************************************************************************ 2926 * * 2927 * The parser itself * 2928 * Relates to http://www.w3.org/TR/REC-xml * 2929 * * 2930 ************************************************************************/ 2931 2932/************************************************************************ 2933 * * 2934 * Routines to parse Name, NCName and NmToken * 2935 * * 2936 ************************************************************************/ 2937#ifdef DEBUG 2938static unsigned long nbParseName = 0; 2939static unsigned long nbParseNmToken = 0; 2940static unsigned long nbParseNCName = 0; 2941static unsigned long nbParseNCNameComplex = 0; 2942static unsigned long nbParseNameComplex = 0; 2943static unsigned long nbParseStringName = 0; 2944#endif 2945 2946/* 2947 * The two following functions are related to the change of accepted 2948 * characters for Name and NmToken in the Revision 5 of XML-1.0 2949 * They correspond to the modified production [4] and the new production [4a] 2950 * changes in that revision. Also note that the macros used for the 2951 * productions Letter, Digit, CombiningChar and Extender are not needed 2952 * anymore. 2953 * We still keep compatibility to pre-revision5 parsing semantic if the 2954 * new XML_PARSE_OLD10 option is given to the parser. 2955 */ 2956static int 2957xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) { 2958 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 2959 /* 2960 * Use the new checks of production [4] [4a] amd [5] of the 2961 * Update 5 of XML-1.0 2962 */ 2963 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 2964 (((c >= 'a') && (c <= 'z')) || 2965 ((c >= 'A') && (c <= 'Z')) || 2966 (c == '_') || (c == ':') || 2967 ((c >= 0xC0) && (c <= 0xD6)) || 2968 ((c >= 0xD8) && (c <= 0xF6)) || 2969 ((c >= 0xF8) && (c <= 0x2FF)) || 2970 ((c >= 0x370) && (c <= 0x37D)) || 2971 ((c >= 0x37F) && (c <= 0x1FFF)) || 2972 ((c >= 0x200C) && (c <= 0x200D)) || 2973 ((c >= 0x2070) && (c <= 0x218F)) || 2974 ((c >= 0x2C00) && (c <= 0x2FEF)) || 2975 ((c >= 0x3001) && (c <= 0xD7FF)) || 2976 ((c >= 0xF900) && (c <= 0xFDCF)) || 2977 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 2978 ((c >= 0x10000) && (c <= 0xEFFFF)))) 2979 return(1); 2980 } else { 2981 if (IS_LETTER(c) || (c == '_') || (c == ':')) 2982 return(1); 2983 } 2984 return(0); 2985} 2986 2987static int 2988xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) { 2989 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 2990 /* 2991 * Use the new checks of production [4] [4a] amd [5] of the 2992 * Update 5 of XML-1.0 2993 */ 2994 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 2995 (((c >= 'a') && (c <= 'z')) || 2996 ((c >= 'A') && (c <= 'Z')) || 2997 ((c >= '0') && (c <= '9')) || /* !start */ 2998 (c == '_') || (c == ':') || 2999 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3000 ((c >= 0xC0) && (c <= 0xD6)) || 3001 ((c >= 0xD8) && (c <= 0xF6)) || 3002 ((c >= 0xF8) && (c <= 0x2FF)) || 3003 ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3004 ((c >= 0x370) && (c <= 0x37D)) || 3005 ((c >= 0x37F) && (c <= 0x1FFF)) || 3006 ((c >= 0x200C) && (c <= 0x200D)) || 3007 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3008 ((c >= 0x2070) && (c <= 0x218F)) || 3009 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3010 ((c >= 0x3001) && (c <= 0xD7FF)) || 3011 ((c >= 0xF900) && (c <= 0xFDCF)) || 3012 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3013 ((c >= 0x10000) && (c <= 0xEFFFF)))) 3014 return(1); 3015 } else { 3016 if ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3017 (c == '.') || (c == '-') || 3018 (c == '_') || (c == ':') || 3019 (IS_COMBINING(c)) || 3020 (IS_EXTENDER(c))) 3021 return(1); 3022 } 3023 return(0); 3024} 3025 3026static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, 3027 int *len, int *alloc, int normalize); 3028 3029static const xmlChar * 3030xmlParseNameComplex(xmlParserCtxtPtr ctxt) { 3031 int len = 0, l; 3032 int c; 3033 int count = 0; 3034 3035#ifdef DEBUG 3036 nbParseNameComplex++; 3037#endif 3038 3039 /* 3040 * Handler for more complex cases 3041 */ 3042 GROW; 3043 c = CUR_CHAR(l); 3044 if ((ctxt->options & XML_PARSE_OLD10) == 0) { 3045 /* 3046 * Use the new checks of production [4] [4a] amd [5] of the 3047 * Update 5 of XML-1.0 3048 */ 3049 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3050 (!(((c >= 'a') && (c <= 'z')) || 3051 ((c >= 'A') && (c <= 'Z')) || 3052 (c == '_') || (c == ':') || 3053 ((c >= 0xC0) && (c <= 0xD6)) || 3054 ((c >= 0xD8) && (c <= 0xF6)) || 3055 ((c >= 0xF8) && (c <= 0x2FF)) || 3056 ((c >= 0x370) && (c <= 0x37D)) || 3057 ((c >= 0x37F) && (c <= 0x1FFF)) || 3058 ((c >= 0x200C) && (c <= 0x200D)) || 3059 ((c >= 0x2070) && (c <= 0x218F)) || 3060 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3061 ((c >= 0x3001) && (c <= 0xD7FF)) || 3062 ((c >= 0xF900) && (c <= 0xFDCF)) || 3063 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3064 ((c >= 0x10000) && (c <= 0xEFFFF))))) { 3065 return(NULL); 3066 } 3067 len += l; 3068 NEXTL(l); 3069 c = CUR_CHAR(l); 3070 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ 3071 (((c >= 'a') && (c <= 'z')) || 3072 ((c >= 'A') && (c <= 'Z')) || 3073 ((c >= '0') && (c <= '9')) || /* !start */ 3074 (c == '_') || (c == ':') || 3075 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ 3076 ((c >= 0xC0) && (c <= 0xD6)) || 3077 ((c >= 0xD8) && (c <= 0xF6)) || 3078 ((c >= 0xF8) && (c <= 0x2FF)) || 3079 ((c >= 0x300) && (c <= 0x36F)) || /* !start */ 3080 ((c >= 0x370) && (c <= 0x37D)) || 3081 ((c >= 0x37F) && (c <= 0x1FFF)) || 3082 ((c >= 0x200C) && (c <= 0x200D)) || 3083 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ 3084 ((c >= 0x2070) && (c <= 0x218F)) || 3085 ((c >= 0x2C00) && (c <= 0x2FEF)) || 3086 ((c >= 0x3001) && (c <= 0xD7FF)) || 3087 ((c >= 0xF900) && (c <= 0xFDCF)) || 3088 ((c >= 0xFDF0) && (c <= 0xFFFD)) || 3089 ((c >= 0x10000) && (c <= 0xEFFFF)) 3090 )) { 3091 if (count++ > 100) { 3092 count = 0; 3093 GROW; 3094 } 3095 len += l; 3096 NEXTL(l); 3097 c = CUR_CHAR(l); 3098 } 3099 } else { 3100 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3101 (!IS_LETTER(c) && (c != '_') && 3102 (c != ':'))) { 3103 return(NULL); 3104 } 3105 len += l; 3106 NEXTL(l); 3107 c = CUR_CHAR(l); 3108 3109 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3110 ((IS_LETTER(c)) || (IS_DIGIT(c)) || 3111 (c == '.') || (c == '-') || 3112 (c == '_') || (c == ':') || 3113 (IS_COMBINING(c)) || 3114 (IS_EXTENDER(c)))) { 3115 if (count++ > 100) { 3116 count = 0; 3117 GROW; 3118 } 3119 len += l; 3120 NEXTL(l); 3121 c = CUR_CHAR(l); 3122 } 3123 } 3124 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r')) 3125 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len)); 3126 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 3127} 3128 3129/** 3130 * xmlParseName: 3131 * @ctxt: an XML parser context 3132 * 3133 * parse an XML name. 3134 * 3135 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3136 * CombiningChar | Extender 3137 * 3138 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3139 * 3140 * [6] Names ::= Name (#x20 Name)* 3141 * 3142 * Returns the Name parsed or NULL 3143 */ 3144 3145const xmlChar * 3146xmlParseName(xmlParserCtxtPtr ctxt) { 3147 const xmlChar *in; 3148 const xmlChar *ret; 3149 int count = 0; 3150 3151 GROW; 3152 3153#ifdef DEBUG 3154 nbParseName++; 3155#endif 3156 3157 /* 3158 * Accelerator for simple ASCII names 3159 */ 3160 in = ctxt->input->cur; 3161 if (((*in >= 0x61) && (*in <= 0x7A)) || 3162 ((*in >= 0x41) && (*in <= 0x5A)) || 3163 (*in == '_') || (*in == ':')) { 3164 in++; 3165 while (((*in >= 0x61) && (*in <= 0x7A)) || 3166 ((*in >= 0x41) && (*in <= 0x5A)) || 3167 ((*in >= 0x30) && (*in <= 0x39)) || 3168 (*in == '_') || (*in == '-') || 3169 (*in == ':') || (*in == '.')) 3170 in++; 3171 if ((*in > 0) && (*in < 0x80)) { 3172 count = in - ctxt->input->cur; 3173 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3174 ctxt->input->cur = in; 3175 ctxt->nbChars += count; 3176 ctxt->input->col += count; 3177 if (ret == NULL) 3178 xmlErrMemory(ctxt, NULL); 3179 return(ret); 3180 } 3181 } 3182 /* accelerator for special cases */ 3183 return(xmlParseNameComplex(ctxt)); 3184} 3185 3186static const xmlChar * 3187xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { 3188 int len = 0, l; 3189 int c; 3190 int count = 0; 3191 3192#ifdef DEBUG 3193 nbParseNCNameComplex++; 3194#endif 3195 3196 /* 3197 * Handler for more complex cases 3198 */ 3199 GROW; 3200 c = CUR_CHAR(l); 3201 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 3202 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) { 3203 return(NULL); 3204 } 3205 3206 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 3207 (xmlIsNameChar(ctxt, c) && (c != ':'))) { 3208 if (count++ > 100) { 3209 count = 0; 3210 GROW; 3211 } 3212 len += l; 3213 NEXTL(l); 3214 c = CUR_CHAR(l); 3215 } 3216 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 3217} 3218 3219/** 3220 * xmlParseNCName: 3221 * @ctxt: an XML parser context 3222 * @len: lenght of the string parsed 3223 * 3224 * parse an XML name. 3225 * 3226 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | 3227 * CombiningChar | Extender 3228 * 3229 * [5NS] NCName ::= (Letter | '_') (NCNameChar)* 3230 * 3231 * Returns the Name parsed or NULL 3232 */ 3233 3234static const xmlChar * 3235xmlParseNCName(xmlParserCtxtPtr ctxt) { 3236 const xmlChar *in; 3237 const xmlChar *ret; 3238 int count = 0; 3239 3240#ifdef DEBUG 3241 nbParseNCName++; 3242#endif 3243 3244 /* 3245 * Accelerator for simple ASCII names 3246 */ 3247 in = ctxt->input->cur; 3248 if (((*in >= 0x61) && (*in <= 0x7A)) || 3249 ((*in >= 0x41) && (*in <= 0x5A)) || 3250 (*in == '_')) { 3251 in++; 3252 while (((*in >= 0x61) && (*in <= 0x7A)) || 3253 ((*in >= 0x41) && (*in <= 0x5A)) || 3254 ((*in >= 0x30) && (*in <= 0x39)) || 3255 (*in == '_') || (*in == '-') || 3256 (*in == '.')) 3257 in++; 3258 if ((*in > 0) && (*in < 0x80)) { 3259 count = in - ctxt->input->cur; 3260 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 3261 ctxt->input->cur = in; 3262 ctxt->nbChars += count; 3263 ctxt->input->col += count; 3264 if (ret == NULL) { 3265 xmlErrMemory(ctxt, NULL); 3266 } 3267 return(ret); 3268 } 3269 } 3270 return(xmlParseNCNameComplex(ctxt)); 3271} 3272 3273/** 3274 * xmlParseNameAndCompare: 3275 * @ctxt: an XML parser context 3276 * 3277 * parse an XML name and compares for match 3278 * (specialized for endtag parsing) 3279 * 3280 * Returns NULL for an illegal name, (xmlChar*) 1 for success 3281 * and the name for mismatch 3282 */ 3283 3284static const xmlChar * 3285xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { 3286 register const xmlChar *cmp = other; 3287 register const xmlChar *in; 3288 const xmlChar *ret; 3289 3290 GROW; 3291 3292 in = ctxt->input->cur; 3293 while (*in != 0 && *in == *cmp) { 3294 ++in; 3295 ++cmp; 3296 ctxt->input->col++; 3297 } 3298 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 3299 /* success */ 3300 ctxt->input->cur = in; 3301 return (const xmlChar*) 1; 3302 } 3303 /* failure (or end of input buffer), check with full function */ 3304 ret = xmlParseName (ctxt); 3305 /* strings coming from the dictionnary direct compare possible */ 3306 if (ret == other) { 3307 return (const xmlChar*) 1; 3308 } 3309 return ret; 3310} 3311 3312/** 3313 * xmlParseStringName: 3314 * @ctxt: an XML parser context 3315 * @str: a pointer to the string pointer (IN/OUT) 3316 * 3317 * parse an XML name. 3318 * 3319 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 3320 * CombiningChar | Extender 3321 * 3322 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 3323 * 3324 * [6] Names ::= Name (#x20 Name)* 3325 * 3326 * Returns the Name parsed or NULL. The @str pointer 3327 * is updated to the current location in the string. 3328 */ 3329 3330static xmlChar * 3331xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { 3332 xmlChar buf[XML_MAX_NAMELEN + 5]; 3333 const xmlChar *cur = *str; 3334 int len = 0, l; 3335 int c; 3336 3337#ifdef DEBUG 3338 nbParseStringName++; 3339#endif 3340 3341 c = CUR_SCHAR(cur, l); 3342 if (!xmlIsNameStartChar(ctxt, c)) { 3343 return(NULL); 3344 } 3345 3346 COPY_BUF(l,buf,len,c); 3347 cur += l; 3348 c = CUR_SCHAR(cur, l); 3349 while (xmlIsNameChar(ctxt, c)) { 3350 COPY_BUF(l,buf,len,c); 3351 cur += l; 3352 c = CUR_SCHAR(cur, l); 3353 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ 3354 /* 3355 * Okay someone managed to make a huge name, so he's ready to pay 3356 * for the processing speed. 3357 */ 3358 xmlChar *buffer; 3359 int max = len * 2; 3360 3361 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3362 if (buffer == NULL) { 3363 xmlErrMemory(ctxt, NULL); 3364 return(NULL); 3365 } 3366 memcpy(buffer, buf, len); 3367 while (xmlIsNameChar(ctxt, c)) { 3368 if (len + 10 > max) { 3369 xmlChar *tmp; 3370 max *= 2; 3371 tmp = (xmlChar *) xmlRealloc(buffer, 3372 max * sizeof(xmlChar)); 3373 if (tmp == NULL) { 3374 xmlErrMemory(ctxt, NULL); 3375 xmlFree(buffer); 3376 return(NULL); 3377 } 3378 buffer = tmp; 3379 } 3380 COPY_BUF(l,buffer,len,c); 3381 cur += l; 3382 c = CUR_SCHAR(cur, l); 3383 } 3384 buffer[len] = 0; 3385 *str = cur; 3386 return(buffer); 3387 } 3388 } 3389 *str = cur; 3390 return(xmlStrndup(buf, len)); 3391} 3392 3393/** 3394 * xmlParseNmtoken: 3395 * @ctxt: an XML parser context 3396 * 3397 * parse an XML Nmtoken. 3398 * 3399 * [7] Nmtoken ::= (NameChar)+ 3400 * 3401 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)* 3402 * 3403 * Returns the Nmtoken parsed or NULL 3404 */ 3405 3406xmlChar * 3407xmlParseNmtoken(xmlParserCtxtPtr ctxt) { 3408 xmlChar buf[XML_MAX_NAMELEN + 5]; 3409 int len = 0, l; 3410 int c; 3411 int count = 0; 3412 3413#ifdef DEBUG 3414 nbParseNmToken++; 3415#endif 3416 3417 GROW; 3418 c = CUR_CHAR(l); 3419 3420 while (xmlIsNameChar(ctxt, c)) { 3421 if (count++ > 100) { 3422 count = 0; 3423 GROW; 3424 } 3425 COPY_BUF(l,buf,len,c); 3426 NEXTL(l); 3427 c = CUR_CHAR(l); 3428 if (len >= XML_MAX_NAMELEN) { 3429 /* 3430 * Okay someone managed to make a huge token, so he's ready to pay 3431 * for the processing speed. 3432 */ 3433 xmlChar *buffer; 3434 int max = len * 2; 3435 3436 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 3437 if (buffer == NULL) { 3438 xmlErrMemory(ctxt, NULL); 3439 return(NULL); 3440 } 3441 memcpy(buffer, buf, len); 3442 while (xmlIsNameChar(ctxt, c)) { 3443 if (count++ > 100) { 3444 count = 0; 3445 GROW; 3446 } 3447 if (len + 10 > max) { 3448 xmlChar *tmp; 3449 3450 max *= 2; 3451 tmp = (xmlChar *) xmlRealloc(buffer, 3452 max * sizeof(xmlChar)); 3453 if (tmp == NULL) { 3454 xmlErrMemory(ctxt, NULL); 3455 xmlFree(buffer); 3456 return(NULL); 3457 } 3458 buffer = tmp; 3459 } 3460 COPY_BUF(l,buffer,len,c); 3461 NEXTL(l); 3462 c = CUR_CHAR(l); 3463 } 3464 buffer[len] = 0; 3465 return(buffer); 3466 } 3467 } 3468 if (len == 0) 3469 return(NULL); 3470 return(xmlStrndup(buf, len)); 3471} 3472 3473/** 3474 * xmlParseEntityValue: 3475 * @ctxt: an XML parser context 3476 * @orig: if non-NULL store a copy of the original entity value 3477 * 3478 * parse a value for ENTITY declarations 3479 * 3480 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | 3481 * "'" ([^%&'] | PEReference | Reference)* "'" 3482 * 3483 * Returns the EntityValue parsed with reference substituted or NULL 3484 */ 3485 3486xmlChar * 3487xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { 3488 xmlChar *buf = NULL; 3489 int len = 0; 3490 int size = XML_PARSER_BUFFER_SIZE; 3491 int c, l; 3492 xmlChar stop; 3493 xmlChar *ret = NULL; 3494 const xmlChar *cur = NULL; 3495 xmlParserInputPtr input; 3496 3497 if (RAW == '"') stop = '"'; 3498 else if (RAW == '\'') stop = '\''; 3499 else { 3500 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL); 3501 return(NULL); 3502 } 3503 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3504 if (buf == NULL) { 3505 xmlErrMemory(ctxt, NULL); 3506 return(NULL); 3507 } 3508 3509 /* 3510 * The content of the entity definition is copied in a buffer. 3511 */ 3512 3513 ctxt->instate = XML_PARSER_ENTITY_VALUE; 3514 input = ctxt->input; 3515 GROW; 3516 NEXT; 3517 c = CUR_CHAR(l); 3518 /* 3519 * NOTE: 4.4.5 Included in Literal 3520 * When a parameter entity reference appears in a literal entity 3521 * value, ... a single or double quote character in the replacement 3522 * text is always treated as a normal data character and will not 3523 * terminate the literal. 3524 * In practice it means we stop the loop only when back at parsing 3525 * the initial entity and the quote is found 3526 */ 3527 while ((IS_CHAR(c)) && ((c != stop) || /* checked */ 3528 (ctxt->input != input))) { 3529 if (len + 5 >= size) { 3530 xmlChar *tmp; 3531 3532 size *= 2; 3533 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3534 if (tmp == NULL) { 3535 xmlErrMemory(ctxt, NULL); 3536 xmlFree(buf); 3537 return(NULL); 3538 } 3539 buf = tmp; 3540 } 3541 COPY_BUF(l,buf,len,c); 3542 NEXTL(l); 3543 /* 3544 * Pop-up of finished entities. 3545 */ 3546 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */ 3547 xmlPopInput(ctxt); 3548 3549 GROW; 3550 c = CUR_CHAR(l); 3551 if (c == 0) { 3552 GROW; 3553 c = CUR_CHAR(l); 3554 } 3555 } 3556 buf[len] = 0; 3557 3558 /* 3559 * Raise problem w.r.t. '&' and '%' being used in non-entities 3560 * reference constructs. Note Charref will be handled in 3561 * xmlStringDecodeEntities() 3562 */ 3563 cur = buf; 3564 while (*cur != 0) { /* non input consuming */ 3565 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { 3566 xmlChar *name; 3567 xmlChar tmp = *cur; 3568 3569 cur++; 3570 name = xmlParseStringName(ctxt, &cur); 3571 if ((name == NULL) || (*cur != ';')) { 3572 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR, 3573 "EntityValue: '%c' forbidden except for entities references\n", 3574 tmp); 3575 } 3576 if ((tmp == '%') && (ctxt->inSubset == 1) && 3577 (ctxt->inputNr == 1)) { 3578 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL); 3579 } 3580 if (name != NULL) 3581 xmlFree(name); 3582 if (*cur == 0) 3583 break; 3584 } 3585 cur++; 3586 } 3587 3588 /* 3589 * Then PEReference entities are substituted. 3590 */ 3591 if (c != stop) { 3592 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL); 3593 xmlFree(buf); 3594 } else { 3595 NEXT; 3596 /* 3597 * NOTE: 4.4.7 Bypassed 3598 * When a general entity reference appears in the EntityValue in 3599 * an entity declaration, it is bypassed and left as is. 3600 * so XML_SUBSTITUTE_REF is not set here. 3601 */ 3602 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, 3603 0, 0, 0); 3604 if (orig != NULL) 3605 *orig = buf; 3606 else 3607 xmlFree(buf); 3608 } 3609 3610 return(ret); 3611} 3612 3613/** 3614 * xmlParseAttValueComplex: 3615 * @ctxt: an XML parser context 3616 * @len: the resulting attribute len 3617 * @normalize: wether to apply the inner normalization 3618 * 3619 * parse a value for an attribute, this is the fallback function 3620 * of xmlParseAttValue() when the attribute parsing requires handling 3621 * of non-ASCII characters, or normalization compaction. 3622 * 3623 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 3624 */ 3625static xmlChar * 3626xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { 3627 xmlChar limit = 0; 3628 xmlChar *buf = NULL; 3629 xmlChar *rep = NULL; 3630 int len = 0; 3631 int buf_size = 0; 3632 int c, l, in_space = 0; 3633 xmlChar *current = NULL; 3634 xmlEntityPtr ent; 3635 3636 if (NXT(0) == '"') { 3637 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3638 limit = '"'; 3639 NEXT; 3640 } else if (NXT(0) == '\'') { 3641 limit = '\''; 3642 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3643 NEXT; 3644 } else { 3645 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 3646 return(NULL); 3647 } 3648 3649 /* 3650 * allocate a translation buffer. 3651 */ 3652 buf_size = XML_PARSER_BUFFER_SIZE; 3653 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar)); 3654 if (buf == NULL) goto mem_error; 3655 3656 /* 3657 * OK loop until we reach one of the ending char or a size limit. 3658 */ 3659 c = CUR_CHAR(l); 3660 while ((NXT(0) != limit) && /* checked */ 3661 (IS_CHAR(c)) && (c != '<')) { 3662 if (c == 0) break; 3663 if (c == '&') { 3664 in_space = 0; 3665 if (NXT(1) == '#') { 3666 int val = xmlParseCharRef(ctxt); 3667 3668 if (val == '&') { 3669 if (ctxt->replaceEntities) { 3670 if (len > buf_size - 10) { 3671 growBuffer(buf, 10); 3672 } 3673 buf[len++] = '&'; 3674 } else { 3675 /* 3676 * The reparsing will be done in xmlStringGetNodeList() 3677 * called by the attribute() function in SAX.c 3678 */ 3679 if (len > buf_size - 10) { 3680 growBuffer(buf, 10); 3681 } 3682 buf[len++] = '&'; 3683 buf[len++] = '#'; 3684 buf[len++] = '3'; 3685 buf[len++] = '8'; 3686 buf[len++] = ';'; 3687 } 3688 } else if (val != 0) { 3689 if (len > buf_size - 10) { 3690 growBuffer(buf, 10); 3691 } 3692 len += xmlCopyChar(0, &buf[len], val); 3693 } 3694 } else { 3695 ent = xmlParseEntityRef(ctxt); 3696 ctxt->nbentities++; 3697 if (ent != NULL) 3698 ctxt->nbentities += ent->owner; 3699 if ((ent != NULL) && 3700 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 3701 if (len > buf_size - 10) { 3702 growBuffer(buf, 10); 3703 } 3704 if ((ctxt->replaceEntities == 0) && 3705 (ent->content[0] == '&')) { 3706 buf[len++] = '&'; 3707 buf[len++] = '#'; 3708 buf[len++] = '3'; 3709 buf[len++] = '8'; 3710 buf[len++] = ';'; 3711 } else { 3712 buf[len++] = ent->content[0]; 3713 } 3714 } else if ((ent != NULL) && 3715 (ctxt->replaceEntities != 0)) { 3716 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { 3717 rep = xmlStringDecodeEntities(ctxt, ent->content, 3718 XML_SUBSTITUTE_REF, 3719 0, 0, 0); 3720 if (rep != NULL) { 3721 current = rep; 3722 while (*current != 0) { /* non input consuming */ 3723 if ((*current == 0xD) || (*current == 0xA) || 3724 (*current == 0x9)) { 3725 buf[len++] = 0x20; 3726 current++; 3727 } else 3728 buf[len++] = *current++; 3729 if (len > buf_size - 10) { 3730 growBuffer(buf, 10); 3731 } 3732 } 3733 xmlFree(rep); 3734 rep = NULL; 3735 } 3736 } else { 3737 if (len > buf_size - 10) { 3738 growBuffer(buf, 10); 3739 } 3740 if (ent->content != NULL) 3741 buf[len++] = ent->content[0]; 3742 } 3743 } else if (ent != NULL) { 3744 int i = xmlStrlen(ent->name); 3745 const xmlChar *cur = ent->name; 3746 3747 /* 3748 * This may look absurd but is needed to detect 3749 * entities problems 3750 */ 3751 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 3752 (ent->content != NULL)) { 3753 rep = xmlStringDecodeEntities(ctxt, ent->content, 3754 XML_SUBSTITUTE_REF, 0, 0, 0); 3755 if (rep != NULL) { 3756 xmlFree(rep); 3757 rep = NULL; 3758 } 3759 } 3760 3761 /* 3762 * Just output the reference 3763 */ 3764 buf[len++] = '&'; 3765 while (len > buf_size - i - 10) { 3766 growBuffer(buf, i + 10); 3767 } 3768 for (;i > 0;i--) 3769 buf[len++] = *cur++; 3770 buf[len++] = ';'; 3771 } 3772 } 3773 } else { 3774 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { 3775 if ((len != 0) || (!normalize)) { 3776 if ((!normalize) || (!in_space)) { 3777 COPY_BUF(l,buf,len,0x20); 3778 while (len > buf_size - 10) { 3779 growBuffer(buf, 10); 3780 } 3781 } 3782 in_space = 1; 3783 } 3784 } else { 3785 in_space = 0; 3786 COPY_BUF(l,buf,len,c); 3787 if (len > buf_size - 10) { 3788 growBuffer(buf, 10); 3789 } 3790 } 3791 NEXTL(l); 3792 } 3793 GROW; 3794 c = CUR_CHAR(l); 3795 } 3796 if ((in_space) && (normalize)) { 3797 while (buf[len - 1] == 0x20) len--; 3798 } 3799 buf[len] = 0; 3800 if (RAW == '<') { 3801 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL); 3802 } else if (RAW != limit) { 3803 if ((c != 0) && (!IS_CHAR(c))) { 3804 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, 3805 "invalid character in attribute value\n"); 3806 } else { 3807 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 3808 "AttValue: ' expected\n"); 3809 } 3810 } else 3811 NEXT; 3812 if (attlen != NULL) *attlen = len; 3813 return(buf); 3814 3815mem_error: 3816 xmlErrMemory(ctxt, NULL); 3817 if (buf != NULL) 3818 xmlFree(buf); 3819 if (rep != NULL) 3820 xmlFree(rep); 3821 return(NULL); 3822} 3823 3824/** 3825 * xmlParseAttValue: 3826 * @ctxt: an XML parser context 3827 * 3828 * parse a value for an attribute 3829 * Note: the parser won't do substitution of entities here, this 3830 * will be handled later in xmlStringGetNodeList 3831 * 3832 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | 3833 * "'" ([^<&'] | Reference)* "'" 3834 * 3835 * 3.3.3 Attribute-Value Normalization: 3836 * Before the value of an attribute is passed to the application or 3837 * checked for validity, the XML processor must normalize it as follows: 3838 * - a character reference is processed by appending the referenced 3839 * character to the attribute value 3840 * - an entity reference is processed by recursively processing the 3841 * replacement text of the entity 3842 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 3843 * appending #x20 to the normalized value, except that only a single 3844 * #x20 is appended for a "#xD#xA" sequence that is part of an external 3845 * parsed entity or the literal entity value of an internal parsed entity 3846 * - other characters are processed by appending them to the normalized value 3847 * If the declared value is not CDATA, then the XML processor must further 3848 * process the normalized attribute value by discarding any leading and 3849 * trailing space (#x20) characters, and by replacing sequences of space 3850 * (#x20) characters by a single space (#x20) character. 3851 * All attributes for which no declaration has been read should be treated 3852 * by a non-validating parser as if declared CDATA. 3853 * 3854 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 3855 */ 3856 3857 3858xmlChar * 3859xmlParseAttValue(xmlParserCtxtPtr ctxt) { 3860 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL); 3861 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0)); 3862} 3863 3864/** 3865 * xmlParseSystemLiteral: 3866 * @ctxt: an XML parser context 3867 * 3868 * parse an XML Literal 3869 * 3870 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 3871 * 3872 * Returns the SystemLiteral parsed or NULL 3873 */ 3874 3875xmlChar * 3876xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { 3877 xmlChar *buf = NULL; 3878 int len = 0; 3879 int size = XML_PARSER_BUFFER_SIZE; 3880 int cur, l; 3881 xmlChar stop; 3882 int state = ctxt->instate; 3883 int count = 0; 3884 3885 SHRINK; 3886 if (RAW == '"') { 3887 NEXT; 3888 stop = '"'; 3889 } else if (RAW == '\'') { 3890 NEXT; 3891 stop = '\''; 3892 } else { 3893 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 3894 return(NULL); 3895 } 3896 3897 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3898 if (buf == NULL) { 3899 xmlErrMemory(ctxt, NULL); 3900 return(NULL); 3901 } 3902 ctxt->instate = XML_PARSER_SYSTEM_LITERAL; 3903 cur = CUR_CHAR(l); 3904 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ 3905 if (len + 5 >= size) { 3906 xmlChar *tmp; 3907 3908 size *= 2; 3909 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3910 if (tmp == NULL) { 3911 xmlFree(buf); 3912 xmlErrMemory(ctxt, NULL); 3913 ctxt->instate = (xmlParserInputState) state; 3914 return(NULL); 3915 } 3916 buf = tmp; 3917 } 3918 count++; 3919 if (count > 50) { 3920 GROW; 3921 count = 0; 3922 } 3923 COPY_BUF(l,buf,len,cur); 3924 NEXTL(l); 3925 cur = CUR_CHAR(l); 3926 if (cur == 0) { 3927 GROW; 3928 SHRINK; 3929 cur = CUR_CHAR(l); 3930 } 3931 } 3932 buf[len] = 0; 3933 ctxt->instate = (xmlParserInputState) state; 3934 if (!IS_CHAR(cur)) { 3935 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 3936 } else { 3937 NEXT; 3938 } 3939 return(buf); 3940} 3941 3942/** 3943 * xmlParsePubidLiteral: 3944 * @ctxt: an XML parser context 3945 * 3946 * parse an XML public literal 3947 * 3948 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 3949 * 3950 * Returns the PubidLiteral parsed or NULL. 3951 */ 3952 3953xmlChar * 3954xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { 3955 xmlChar *buf = NULL; 3956 int len = 0; 3957 int size = XML_PARSER_BUFFER_SIZE; 3958 xmlChar cur; 3959 xmlChar stop; 3960 int count = 0; 3961 xmlParserInputState oldstate = ctxt->instate; 3962 3963 SHRINK; 3964 if (RAW == '"') { 3965 NEXT; 3966 stop = '"'; 3967 } else if (RAW == '\'') { 3968 NEXT; 3969 stop = '\''; 3970 } else { 3971 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 3972 return(NULL); 3973 } 3974 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3975 if (buf == NULL) { 3976 xmlErrMemory(ctxt, NULL); 3977 return(NULL); 3978 } 3979 ctxt->instate = XML_PARSER_PUBLIC_LITERAL; 3980 cur = CUR; 3981 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */ 3982 if (len + 1 >= size) { 3983 xmlChar *tmp; 3984 3985 size *= 2; 3986 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3987 if (tmp == NULL) { 3988 xmlErrMemory(ctxt, NULL); 3989 xmlFree(buf); 3990 return(NULL); 3991 } 3992 buf = tmp; 3993 } 3994 buf[len++] = cur; 3995 count++; 3996 if (count > 50) { 3997 GROW; 3998 count = 0; 3999 } 4000 NEXT; 4001 cur = CUR; 4002 if (cur == 0) { 4003 GROW; 4004 SHRINK; 4005 cur = CUR; 4006 } 4007 } 4008 buf[len] = 0; 4009 if (cur != stop) { 4010 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 4011 } else { 4012 NEXT; 4013 } 4014 ctxt->instate = oldstate; 4015 return(buf); 4016} 4017 4018static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); 4019 4020/* 4021 * used for the test in the inner loop of the char data testing 4022 */ 4023static const unsigned char test_char_data[256] = { 4024 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4025 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */ 4026 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4027 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4028 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */ 4029 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 4030 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 4031 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */ 4032 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 4033 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 4034 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 4035 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */ 4036 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 4037 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 4038 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 4039 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 4040 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */ 4041 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4042 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4043 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4044 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4045 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4046 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4047 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4048 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4049 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4050 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4051 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4052 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4053 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4054 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 4055 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 4056}; 4057 4058/** 4059 * xmlParseCharData: 4060 * @ctxt: an XML parser context 4061 * @cdata: int indicating whether we are within a CDATA section 4062 * 4063 * parse a CharData section. 4064 * if we are within a CDATA section ']]>' marks an end of section. 4065 * 4066 * The right angle bracket (>) may be represented using the string ">", 4067 * and must, for compatibility, be escaped using ">" or a character 4068 * reference when it appears in the string "]]>" in content, when that 4069 * string is not marking the end of a CDATA section. 4070 * 4071 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 4072 */ 4073 4074void 4075xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { 4076 const xmlChar *in; 4077 int nbchar = 0; 4078 int line = ctxt->input->line; 4079 int col = ctxt->input->col; 4080 int ccol; 4081 4082 SHRINK; 4083 GROW; 4084 /* 4085 * Accelerated common case where input don't need to be 4086 * modified before passing it to the handler. 4087 */ 4088 if (!cdata) { 4089 in = ctxt->input->cur; 4090 do { 4091get_more_space: 4092 while (*in == 0x20) { in++; ctxt->input->col++; } 4093 if (*in == 0xA) { 4094 do { 4095 ctxt->input->line++; ctxt->input->col = 1; 4096 in++; 4097 } while (*in == 0xA); 4098 goto get_more_space; 4099 } 4100 if (*in == '<') { 4101 nbchar = in - ctxt->input->cur; 4102 if (nbchar > 0) { 4103 const xmlChar *tmp = ctxt->input->cur; 4104 ctxt->input->cur = in; 4105 4106 if ((ctxt->sax != NULL) && 4107 (ctxt->sax->ignorableWhitespace != 4108 ctxt->sax->characters)) { 4109 if (areBlanks(ctxt, tmp, nbchar, 1)) { 4110 if (ctxt->sax->ignorableWhitespace != NULL) 4111 ctxt->sax->ignorableWhitespace(ctxt->userData, 4112 tmp, nbchar); 4113 } else { 4114 if (ctxt->sax->characters != NULL) 4115 ctxt->sax->characters(ctxt->userData, 4116 tmp, nbchar); 4117 if (*ctxt->space == -1) 4118 *ctxt->space = -2; 4119 } 4120 } else if ((ctxt->sax != NULL) && 4121 (ctxt->sax->characters != NULL)) { 4122 ctxt->sax->characters(ctxt->userData, 4123 tmp, nbchar); 4124 } 4125 } 4126 return; 4127 } 4128 4129get_more: 4130 ccol = ctxt->input->col; 4131 while (test_char_data[*in]) { 4132 in++; 4133 ccol++; 4134 } 4135 ctxt->input->col = ccol; 4136 if (*in == 0xA) { 4137 do { 4138 ctxt->input->line++; ctxt->input->col = 1; 4139 in++; 4140 } while (*in == 0xA); 4141 goto get_more; 4142 } 4143 if (*in == ']') { 4144 if ((in[1] == ']') && (in[2] == '>')) { 4145 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4146 ctxt->input->cur = in; 4147 return; 4148 } 4149 in++; 4150 ctxt->input->col++; 4151 goto get_more; 4152 } 4153 nbchar = in - ctxt->input->cur; 4154 if (nbchar > 0) { 4155 if ((ctxt->sax != NULL) && 4156 (ctxt->sax->ignorableWhitespace != 4157 ctxt->sax->characters) && 4158 (IS_BLANK_CH(*ctxt->input->cur))) { 4159 const xmlChar *tmp = ctxt->input->cur; 4160 ctxt->input->cur = in; 4161 4162 if (areBlanks(ctxt, tmp, nbchar, 0)) { 4163 if (ctxt->sax->ignorableWhitespace != NULL) 4164 ctxt->sax->ignorableWhitespace(ctxt->userData, 4165 tmp, nbchar); 4166 } else { 4167 if (ctxt->sax->characters != NULL) 4168 ctxt->sax->characters(ctxt->userData, 4169 tmp, nbchar); 4170 if (*ctxt->space == -1) 4171 *ctxt->space = -2; 4172 } 4173 line = ctxt->input->line; 4174 col = ctxt->input->col; 4175 } else if (ctxt->sax != NULL) { 4176 if (ctxt->sax->characters != NULL) 4177 ctxt->sax->characters(ctxt->userData, 4178 ctxt->input->cur, nbchar); 4179 line = ctxt->input->line; 4180 col = ctxt->input->col; 4181 } 4182 /* something really bad happened in the SAX callback */ 4183 if (ctxt->instate != XML_PARSER_CONTENT) 4184 return; 4185 } 4186 ctxt->input->cur = in; 4187 if (*in == 0xD) { 4188 in++; 4189 if (*in == 0xA) { 4190 ctxt->input->cur = in; 4191 in++; 4192 ctxt->input->line++; ctxt->input->col = 1; 4193 continue; /* while */ 4194 } 4195 in--; 4196 } 4197 if (*in == '<') { 4198 return; 4199 } 4200 if (*in == '&') { 4201 return; 4202 } 4203 SHRINK; 4204 GROW; 4205 in = ctxt->input->cur; 4206 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 4207 nbchar = 0; 4208 } 4209 ctxt->input->line = line; 4210 ctxt->input->col = col; 4211 xmlParseCharDataComplex(ctxt, cdata); 4212} 4213 4214/** 4215 * xmlParseCharDataComplex: 4216 * @ctxt: an XML parser context 4217 * @cdata: int indicating whether we are within a CDATA section 4218 * 4219 * parse a CharData section.this is the fallback function 4220 * of xmlParseCharData() when the parsing requires handling 4221 * of non-ASCII characters. 4222 */ 4223static void 4224xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { 4225 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; 4226 int nbchar = 0; 4227 int cur, l; 4228 int count = 0; 4229 4230 SHRINK; 4231 GROW; 4232 cur = CUR_CHAR(l); 4233 while ((cur != '<') && /* checked */ 4234 (cur != '&') && 4235 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { 4236 if ((cur == ']') && (NXT(1) == ']') && 4237 (NXT(2) == '>')) { 4238 if (cdata) break; 4239 else { 4240 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 4241 } 4242 } 4243 COPY_BUF(l,buf,nbchar,cur); 4244 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { 4245 buf[nbchar] = 0; 4246 4247 /* 4248 * OK the segment is to be consumed as chars. 4249 */ 4250 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4251 if (areBlanks(ctxt, buf, nbchar, 0)) { 4252 if (ctxt->sax->ignorableWhitespace != NULL) 4253 ctxt->sax->ignorableWhitespace(ctxt->userData, 4254 buf, nbchar); 4255 } else { 4256 if (ctxt->sax->characters != NULL) 4257 ctxt->sax->characters(ctxt->userData, buf, nbchar); 4258 if ((ctxt->sax->characters != 4259 ctxt->sax->ignorableWhitespace) && 4260 (*ctxt->space == -1)) 4261 *ctxt->space = -2; 4262 } 4263 } 4264 nbchar = 0; 4265 /* something really bad happened in the SAX callback */ 4266 if (ctxt->instate != XML_PARSER_CONTENT) 4267 return; 4268 } 4269 count++; 4270 if (count > 50) { 4271 GROW; 4272 count = 0; 4273 } 4274 NEXTL(l); 4275 cur = CUR_CHAR(l); 4276 } 4277 if (nbchar != 0) { 4278 buf[nbchar] = 0; 4279 /* 4280 * OK the segment is to be consumed as chars. 4281 */ 4282 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 4283 if (areBlanks(ctxt, buf, nbchar, 0)) { 4284 if (ctxt->sax->ignorableWhitespace != NULL) 4285 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); 4286 } else { 4287 if (ctxt->sax->characters != NULL) 4288 ctxt->sax->characters(ctxt->userData, buf, nbchar); 4289 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) && 4290 (*ctxt->space == -1)) 4291 *ctxt->space = -2; 4292 } 4293 } 4294 } 4295 if ((cur != 0) && (!IS_CHAR(cur))) { 4296 /* Generate the error and skip the offending character */ 4297 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4298 "PCDATA invalid Char value %d\n", 4299 cur); 4300 NEXTL(l); 4301 } 4302} 4303 4304/** 4305 * xmlParseExternalID: 4306 * @ctxt: an XML parser context 4307 * @publicID: a xmlChar** receiving PubidLiteral 4308 * @strict: indicate whether we should restrict parsing to only 4309 * production [75], see NOTE below 4310 * 4311 * Parse an External ID or a Public ID 4312 * 4313 * NOTE: Productions [75] and [83] interact badly since [75] can generate 4314 * 'PUBLIC' S PubidLiteral S SystemLiteral 4315 * 4316 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral 4317 * | 'PUBLIC' S PubidLiteral S SystemLiteral 4318 * 4319 * [83] PublicID ::= 'PUBLIC' S PubidLiteral 4320 * 4321 * Returns the function returns SystemLiteral and in the second 4322 * case publicID receives PubidLiteral, is strict is off 4323 * it is possible to return NULL and have publicID set. 4324 */ 4325 4326xmlChar * 4327xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { 4328 xmlChar *URI = NULL; 4329 4330 SHRINK; 4331 4332 *publicID = NULL; 4333 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) { 4334 SKIP(6); 4335 if (!IS_BLANK_CH(CUR)) { 4336 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4337 "Space required after 'SYSTEM'\n"); 4338 } 4339 SKIP_BLANKS; 4340 URI = xmlParseSystemLiteral(ctxt); 4341 if (URI == NULL) { 4342 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4343 } 4344 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) { 4345 SKIP(6); 4346 if (!IS_BLANK_CH(CUR)) { 4347 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4348 "Space required after 'PUBLIC'\n"); 4349 } 4350 SKIP_BLANKS; 4351 *publicID = xmlParsePubidLiteral(ctxt); 4352 if (*publicID == NULL) { 4353 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL); 4354 } 4355 if (strict) { 4356 /* 4357 * We don't handle [83] so "S SystemLiteral" is required. 4358 */ 4359 if (!IS_BLANK_CH(CUR)) { 4360 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4361 "Space required after the Public Identifier\n"); 4362 } 4363 } else { 4364 /* 4365 * We handle [83] so we return immediately, if 4366 * "S SystemLiteral" is not detected. From a purely parsing 4367 * point of view that's a nice mess. 4368 */ 4369 const xmlChar *ptr; 4370 GROW; 4371 4372 ptr = CUR_PTR; 4373 if (!IS_BLANK_CH(*ptr)) return(NULL); 4374 4375 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */ 4376 if ((*ptr != '\'') && (*ptr != '"')) return(NULL); 4377 } 4378 SKIP_BLANKS; 4379 URI = xmlParseSystemLiteral(ctxt); 4380 if (URI == NULL) { 4381 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 4382 } 4383 } 4384 return(URI); 4385} 4386 4387/** 4388 * xmlParseCommentComplex: 4389 * @ctxt: an XML parser context 4390 * @buf: the already parsed part of the buffer 4391 * @len: number of bytes filles in the buffer 4392 * @size: allocated size of the buffer 4393 * 4394 * Skip an XML (SGML) comment <!-- .... --> 4395 * The spec says that "For compatibility, the string "--" (double-hyphen) 4396 * must not occur within comments. " 4397 * This is the slow routine in case the accelerator for ascii didn't work 4398 * 4399 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 4400 */ 4401static void 4402xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) { 4403 int q, ql; 4404 int r, rl; 4405 int cur, l; 4406 int count = 0; 4407 int inputid; 4408 4409 inputid = ctxt->input->id; 4410 4411 if (buf == NULL) { 4412 len = 0; 4413 size = XML_PARSER_BUFFER_SIZE; 4414 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4415 if (buf == NULL) { 4416 xmlErrMemory(ctxt, NULL); 4417 return; 4418 } 4419 } 4420 GROW; /* Assure there's enough input data */ 4421 q = CUR_CHAR(ql); 4422 if (q == 0) 4423 goto not_terminated; 4424 if (!IS_CHAR(q)) { 4425 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4426 "xmlParseComment: invalid xmlChar value %d\n", 4427 q); 4428 xmlFree (buf); 4429 return; 4430 } 4431 NEXTL(ql); 4432 r = CUR_CHAR(rl); 4433 if (r == 0) 4434 goto not_terminated; 4435 if (!IS_CHAR(r)) { 4436 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4437 "xmlParseComment: invalid xmlChar value %d\n", 4438 q); 4439 xmlFree (buf); 4440 return; 4441 } 4442 NEXTL(rl); 4443 cur = CUR_CHAR(l); 4444 if (cur == 0) 4445 goto not_terminated; 4446 while (IS_CHAR(cur) && /* checked */ 4447 ((cur != '>') || 4448 (r != '-') || (q != '-'))) { 4449 if ((r == '-') && (q == '-')) { 4450 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL); 4451 } 4452 if (len + 5 >= size) { 4453 xmlChar *new_buf; 4454 size *= 2; 4455 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4456 if (new_buf == NULL) { 4457 xmlFree (buf); 4458 xmlErrMemory(ctxt, NULL); 4459 return; 4460 } 4461 buf = new_buf; 4462 } 4463 COPY_BUF(ql,buf,len,q); 4464 q = r; 4465 ql = rl; 4466 r = cur; 4467 rl = l; 4468 4469 count++; 4470 if (count > 50) { 4471 GROW; 4472 count = 0; 4473 } 4474 NEXTL(l); 4475 cur = CUR_CHAR(l); 4476 if (cur == 0) { 4477 SHRINK; 4478 GROW; 4479 cur = CUR_CHAR(l); 4480 } 4481 } 4482 buf[len] = 0; 4483 if (cur == 0) { 4484 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4485 "Comment not terminated \n<!--%.50s\n", buf); 4486 } else if (!IS_CHAR(cur)) { 4487 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 4488 "xmlParseComment: invalid xmlChar value %d\n", 4489 cur); 4490 } else { 4491 if (inputid != ctxt->input->id) { 4492 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4493 "Comment doesn't start and stop in the same entity\n"); 4494 } 4495 NEXT; 4496 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 4497 (!ctxt->disableSAX)) 4498 ctxt->sax->comment(ctxt->userData, buf); 4499 } 4500 xmlFree(buf); 4501 return; 4502not_terminated: 4503 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4504 "Comment not terminated\n", NULL); 4505 xmlFree(buf); 4506 return; 4507} 4508 4509/** 4510 * xmlParseComment: 4511 * @ctxt: an XML parser context 4512 * 4513 * Skip an XML (SGML) comment <!-- .... --> 4514 * The spec says that "For compatibility, the string "--" (double-hyphen) 4515 * must not occur within comments. " 4516 * 4517 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 4518 */ 4519void 4520xmlParseComment(xmlParserCtxtPtr ctxt) { 4521 xmlChar *buf = NULL; 4522 int size = XML_PARSER_BUFFER_SIZE; 4523 int len = 0; 4524 xmlParserInputState state; 4525 const xmlChar *in; 4526 int nbchar = 0, ccol; 4527 int inputid; 4528 4529 /* 4530 * Check that there is a comment right here. 4531 */ 4532 if ((RAW != '<') || (NXT(1) != '!') || 4533 (NXT(2) != '-') || (NXT(3) != '-')) return; 4534 state = ctxt->instate; 4535 ctxt->instate = XML_PARSER_COMMENT; 4536 inputid = ctxt->input->id; 4537 SKIP(4); 4538 SHRINK; 4539 GROW; 4540 4541 /* 4542 * Accelerated common case where input don't need to be 4543 * modified before passing it to the handler. 4544 */ 4545 in = ctxt->input->cur; 4546 do { 4547 if (*in == 0xA) { 4548 do { 4549 ctxt->input->line++; ctxt->input->col = 1; 4550 in++; 4551 } while (*in == 0xA); 4552 } 4553get_more: 4554 ccol = ctxt->input->col; 4555 while (((*in > '-') && (*in <= 0x7F)) || 4556 ((*in >= 0x20) && (*in < '-')) || 4557 (*in == 0x09)) { 4558 in++; 4559 ccol++; 4560 } 4561 ctxt->input->col = ccol; 4562 if (*in == 0xA) { 4563 do { 4564 ctxt->input->line++; ctxt->input->col = 1; 4565 in++; 4566 } while (*in == 0xA); 4567 goto get_more; 4568 } 4569 nbchar = in - ctxt->input->cur; 4570 /* 4571 * save current set of data 4572 */ 4573 if (nbchar > 0) { 4574 if ((ctxt->sax != NULL) && 4575 (ctxt->sax->comment != NULL)) { 4576 if (buf == NULL) { 4577 if ((*in == '-') && (in[1] == '-')) 4578 size = nbchar + 1; 4579 else 4580 size = XML_PARSER_BUFFER_SIZE + nbchar; 4581 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4582 if (buf == NULL) { 4583 xmlErrMemory(ctxt, NULL); 4584 ctxt->instate = state; 4585 return; 4586 } 4587 len = 0; 4588 } else if (len + nbchar + 1 >= size) { 4589 xmlChar *new_buf; 4590 size += len + nbchar + XML_PARSER_BUFFER_SIZE; 4591 new_buf = (xmlChar *) xmlRealloc(buf, 4592 size * sizeof(xmlChar)); 4593 if (new_buf == NULL) { 4594 xmlFree (buf); 4595 xmlErrMemory(ctxt, NULL); 4596 ctxt->instate = state; 4597 return; 4598 } 4599 buf = new_buf; 4600 } 4601 memcpy(&buf[len], ctxt->input->cur, nbchar); 4602 len += nbchar; 4603 buf[len] = 0; 4604 } 4605 } 4606 ctxt->input->cur = in; 4607 if (*in == 0xA) { 4608 in++; 4609 ctxt->input->line++; ctxt->input->col = 1; 4610 } 4611 if (*in == 0xD) { 4612 in++; 4613 if (*in == 0xA) { 4614 ctxt->input->cur = in; 4615 in++; 4616 ctxt->input->line++; ctxt->input->col = 1; 4617 continue; /* while */ 4618 } 4619 in--; 4620 } 4621 SHRINK; 4622 GROW; 4623 in = ctxt->input->cur; 4624 if (*in == '-') { 4625 if (in[1] == '-') { 4626 if (in[2] == '>') { 4627 if (ctxt->input->id != inputid) { 4628 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4629 "comment doesn't start and stop in the same entity\n"); 4630 } 4631 SKIP(3); 4632 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 4633 (!ctxt->disableSAX)) { 4634 if (buf != NULL) 4635 ctxt->sax->comment(ctxt->userData, buf); 4636 else 4637 ctxt->sax->comment(ctxt->userData, BAD_CAST ""); 4638 } 4639 if (buf != NULL) 4640 xmlFree(buf); 4641 ctxt->instate = state; 4642 return; 4643 } 4644 if (buf != NULL) 4645 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4646 "Comment not terminated \n<!--%.50s\n", 4647 buf); 4648 else 4649 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4650 "Comment not terminated \n", NULL); 4651 in++; 4652 ctxt->input->col++; 4653 } 4654 in++; 4655 ctxt->input->col++; 4656 goto get_more; 4657 } 4658 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 4659 xmlParseCommentComplex(ctxt, buf, len, size); 4660 ctxt->instate = state; 4661 return; 4662} 4663 4664 4665/** 4666 * xmlParsePITarget: 4667 * @ctxt: an XML parser context 4668 * 4669 * parse the name of a PI 4670 * 4671 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 4672 * 4673 * Returns the PITarget name or NULL 4674 */ 4675 4676const xmlChar * 4677xmlParsePITarget(xmlParserCtxtPtr ctxt) { 4678 const xmlChar *name; 4679 4680 name = xmlParseName(ctxt); 4681 if ((name != NULL) && 4682 ((name[0] == 'x') || (name[0] == 'X')) && 4683 ((name[1] == 'm') || (name[1] == 'M')) && 4684 ((name[2] == 'l') || (name[2] == 'L'))) { 4685 int i; 4686 if ((name[0] == 'x') && (name[1] == 'm') && 4687 (name[2] == 'l') && (name[3] == 0)) { 4688 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 4689 "XML declaration allowed only at the start of the document\n"); 4690 return(name); 4691 } else if (name[3] == 0) { 4692 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL); 4693 return(name); 4694 } 4695 for (i = 0;;i++) { 4696 if (xmlW3CPIs[i] == NULL) break; 4697 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i])) 4698 return(name); 4699 } 4700 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 4701 "xmlParsePITarget: invalid name prefix 'xml'\n", 4702 NULL, NULL); 4703 } 4704 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) { 4705 xmlNsErr(ctxt, XML_NS_ERR_COLON, 4706 "colon are forbidden from PI names '%s'\n", name, NULL, NULL); 4707 } 4708 return(name); 4709} 4710 4711#ifdef LIBXML_CATALOG_ENABLED 4712/** 4713 * xmlParseCatalogPI: 4714 * @ctxt: an XML parser context 4715 * @catalog: the PI value string 4716 * 4717 * parse an XML Catalog Processing Instruction. 4718 * 4719 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?> 4720 * 4721 * Occurs only if allowed by the user and if happening in the Misc 4722 * part of the document before any doctype informations 4723 * This will add the given catalog to the parsing context in order 4724 * to be used if there is a resolution need further down in the document 4725 */ 4726 4727static void 4728xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) { 4729 xmlChar *URL = NULL; 4730 const xmlChar *tmp, *base; 4731 xmlChar marker; 4732 4733 tmp = catalog; 4734 while (IS_BLANK_CH(*tmp)) tmp++; 4735 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7)) 4736 goto error; 4737 tmp += 7; 4738 while (IS_BLANK_CH(*tmp)) tmp++; 4739 if (*tmp != '=') { 4740 return; 4741 } 4742 tmp++; 4743 while (IS_BLANK_CH(*tmp)) tmp++; 4744 marker = *tmp; 4745 if ((marker != '\'') && (marker != '"')) 4746 goto error; 4747 tmp++; 4748 base = tmp; 4749 while ((*tmp != 0) && (*tmp != marker)) tmp++; 4750 if (*tmp == 0) 4751 goto error; 4752 URL = xmlStrndup(base, tmp - base); 4753 tmp++; 4754 while (IS_BLANK_CH(*tmp)) tmp++; 4755 if (*tmp != 0) 4756 goto error; 4757 4758 if (URL != NULL) { 4759 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL); 4760 xmlFree(URL); 4761 } 4762 return; 4763 4764error: 4765 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI, 4766 "Catalog PI syntax error: %s\n", 4767 catalog, NULL); 4768 if (URL != NULL) 4769 xmlFree(URL); 4770} 4771#endif 4772 4773/** 4774 * xmlParsePI: 4775 * @ctxt: an XML parser context 4776 * 4777 * parse an XML Processing Instruction. 4778 * 4779 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 4780 * 4781 * The processing is transfered to SAX once parsed. 4782 */ 4783 4784void 4785xmlParsePI(xmlParserCtxtPtr ctxt) { 4786 xmlChar *buf = NULL; 4787 int len = 0; 4788 int size = XML_PARSER_BUFFER_SIZE; 4789 int cur, l; 4790 const xmlChar *target; 4791 xmlParserInputState state; 4792 int count = 0; 4793 4794 if ((RAW == '<') && (NXT(1) == '?')) { 4795 xmlParserInputPtr input = ctxt->input; 4796 state = ctxt->instate; 4797 ctxt->instate = XML_PARSER_PI; 4798 /* 4799 * this is a Processing Instruction. 4800 */ 4801 SKIP(2); 4802 SHRINK; 4803 4804 /* 4805 * Parse the target name and check for special support like 4806 * namespace. 4807 */ 4808 target = xmlParsePITarget(ctxt); 4809 if (target != NULL) { 4810 if ((RAW == '?') && (NXT(1) == '>')) { 4811 if (input != ctxt->input) { 4812 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4813 "PI declaration doesn't start and stop in the same entity\n"); 4814 } 4815 SKIP(2); 4816 4817 /* 4818 * SAX: PI detected. 4819 */ 4820 if ((ctxt->sax) && (!ctxt->disableSAX) && 4821 (ctxt->sax->processingInstruction != NULL)) 4822 ctxt->sax->processingInstruction(ctxt->userData, 4823 target, NULL); 4824 ctxt->instate = state; 4825 return; 4826 } 4827 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4828 if (buf == NULL) { 4829 xmlErrMemory(ctxt, NULL); 4830 ctxt->instate = state; 4831 return; 4832 } 4833 cur = CUR; 4834 if (!IS_BLANK(cur)) { 4835 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED, 4836 "ParsePI: PI %s space expected\n", target); 4837 } 4838 SKIP_BLANKS; 4839 cur = CUR_CHAR(l); 4840 while (IS_CHAR(cur) && /* checked */ 4841 ((cur != '?') || (NXT(1) != '>'))) { 4842 if (len + 5 >= size) { 4843 xmlChar *tmp; 4844 4845 size *= 2; 4846 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4847 if (tmp == NULL) { 4848 xmlErrMemory(ctxt, NULL); 4849 xmlFree(buf); 4850 ctxt->instate = state; 4851 return; 4852 } 4853 buf = tmp; 4854 } 4855 count++; 4856 if (count > 50) { 4857 GROW; 4858 count = 0; 4859 } 4860 COPY_BUF(l,buf,len,cur); 4861 NEXTL(l); 4862 cur = CUR_CHAR(l); 4863 if (cur == 0) { 4864 SHRINK; 4865 GROW; 4866 cur = CUR_CHAR(l); 4867 } 4868 } 4869 buf[len] = 0; 4870 if (cur != '?') { 4871 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 4872 "ParsePI: PI %s never end ...\n", target); 4873 } else { 4874 if (input != ctxt->input) { 4875 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4876 "PI declaration doesn't start and stop in the same entity\n"); 4877 } 4878 SKIP(2); 4879 4880#ifdef LIBXML_CATALOG_ENABLED 4881 if (((state == XML_PARSER_MISC) || 4882 (state == XML_PARSER_START)) && 4883 (xmlStrEqual(target, XML_CATALOG_PI))) { 4884 xmlCatalogAllow allow = xmlCatalogGetDefaults(); 4885 if ((allow == XML_CATA_ALLOW_DOCUMENT) || 4886 (allow == XML_CATA_ALLOW_ALL)) 4887 xmlParseCatalogPI(ctxt, buf); 4888 } 4889#endif 4890 4891 4892 /* 4893 * SAX: PI detected. 4894 */ 4895 if ((ctxt->sax) && (!ctxt->disableSAX) && 4896 (ctxt->sax->processingInstruction != NULL)) 4897 ctxt->sax->processingInstruction(ctxt->userData, 4898 target, buf); 4899 } 4900 xmlFree(buf); 4901 } else { 4902 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL); 4903 } 4904 ctxt->instate = state; 4905 } 4906} 4907 4908/** 4909 * xmlParseNotationDecl: 4910 * @ctxt: an XML parser context 4911 * 4912 * parse a notation declaration 4913 * 4914 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' 4915 * 4916 * Hence there is actually 3 choices: 4917 * 'PUBLIC' S PubidLiteral 4918 * 'PUBLIC' S PubidLiteral S SystemLiteral 4919 * and 'SYSTEM' S SystemLiteral 4920 * 4921 * See the NOTE on xmlParseExternalID(). 4922 */ 4923 4924void 4925xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { 4926 const xmlChar *name; 4927 xmlChar *Pubid; 4928 xmlChar *Systemid; 4929 4930 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 4931 xmlParserInputPtr input = ctxt->input; 4932 SHRINK; 4933 SKIP(10); 4934 if (!IS_BLANK_CH(CUR)) { 4935 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4936 "Space required after '<!NOTATION'\n"); 4937 return; 4938 } 4939 SKIP_BLANKS; 4940 4941 name = xmlParseName(ctxt); 4942 if (name == NULL) { 4943 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 4944 return; 4945 } 4946 if (!IS_BLANK_CH(CUR)) { 4947 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4948 "Space required after the NOTATION name'\n"); 4949 return; 4950 } 4951 if (xmlStrchr(name, ':') != NULL) { 4952 xmlNsErr(ctxt, XML_NS_ERR_COLON, 4953 "colon are forbidden from notation names '%s'\n", 4954 name, NULL, NULL); 4955 } 4956 SKIP_BLANKS; 4957 4958 /* 4959 * Parse the IDs. 4960 */ 4961 Systemid = xmlParseExternalID(ctxt, &Pubid, 0); 4962 SKIP_BLANKS; 4963 4964 if (RAW == '>') { 4965 if (input != ctxt->input) { 4966 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4967 "Notation declaration doesn't start and stop in the same entity\n"); 4968 } 4969 NEXT; 4970 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 4971 (ctxt->sax->notationDecl != NULL)) 4972 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid); 4973 } else { 4974 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 4975 } 4976 if (Systemid != NULL) xmlFree(Systemid); 4977 if (Pubid != NULL) xmlFree(Pubid); 4978 } 4979} 4980 4981/** 4982 * xmlParseEntityDecl: 4983 * @ctxt: an XML parser context 4984 * 4985 * parse <!ENTITY declarations 4986 * 4987 * [70] EntityDecl ::= GEDecl | PEDecl 4988 * 4989 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 4990 * 4991 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 4992 * 4993 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 4994 * 4995 * [74] PEDef ::= EntityValue | ExternalID 4996 * 4997 * [76] NDataDecl ::= S 'NDATA' S Name 4998 * 4999 * [ VC: Notation Declared ] 5000 * The Name must match the declared name of a notation. 5001 */ 5002 5003void 5004xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { 5005 const xmlChar *name = NULL; 5006 xmlChar *value = NULL; 5007 xmlChar *URI = NULL, *literal = NULL; 5008 const xmlChar *ndata = NULL; 5009 int isParameter = 0; 5010 xmlChar *orig = NULL; 5011 int skipped; 5012 5013 /* GROW; done in the caller */ 5014 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) { 5015 xmlParserInputPtr input = ctxt->input; 5016 SHRINK; 5017 SKIP(8); 5018 skipped = SKIP_BLANKS; 5019 if (skipped == 0) { 5020 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5021 "Space required after '<!ENTITY'\n"); 5022 } 5023 5024 if (RAW == '%') { 5025 NEXT; 5026 skipped = SKIP_BLANKS; 5027 if (skipped == 0) { 5028 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5029 "Space required after '%'\n"); 5030 } 5031 isParameter = 1; 5032 } 5033 5034 name = xmlParseName(ctxt); 5035 if (name == NULL) { 5036 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5037 "xmlParseEntityDecl: no name\n"); 5038 return; 5039 } 5040 if (xmlStrchr(name, ':') != NULL) { 5041 xmlNsErr(ctxt, XML_NS_ERR_COLON, 5042 "colon are forbidden from entities names '%s'\n", 5043 name, NULL, NULL); 5044 } 5045 skipped = SKIP_BLANKS; 5046 if (skipped == 0) { 5047 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5048 "Space required after the entity name\n"); 5049 } 5050 5051 ctxt->instate = XML_PARSER_ENTITY_DECL; 5052 /* 5053 * handle the various case of definitions... 5054 */ 5055 if (isParameter) { 5056 if ((RAW == '"') || (RAW == '\'')) { 5057 value = xmlParseEntityValue(ctxt, &orig); 5058 if (value) { 5059 if ((ctxt->sax != NULL) && 5060 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5061 ctxt->sax->entityDecl(ctxt->userData, name, 5062 XML_INTERNAL_PARAMETER_ENTITY, 5063 NULL, NULL, value); 5064 } 5065 } else { 5066 URI = xmlParseExternalID(ctxt, &literal, 1); 5067 if ((URI == NULL) && (literal == NULL)) { 5068 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5069 } 5070 if (URI) { 5071 xmlURIPtr uri; 5072 5073 uri = xmlParseURI((const char *) URI); 5074 if (uri == NULL) { 5075 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5076 "Invalid URI: %s\n", URI); 5077 /* 5078 * This really ought to be a well formedness error 5079 * but the XML Core WG decided otherwise c.f. issue 5080 * E26 of the XML erratas. 5081 */ 5082 } else { 5083 if (uri->fragment != NULL) { 5084 /* 5085 * Okay this is foolish to block those but not 5086 * invalid URIs. 5087 */ 5088 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5089 } else { 5090 if ((ctxt->sax != NULL) && 5091 (!ctxt->disableSAX) && 5092 (ctxt->sax->entityDecl != NULL)) 5093 ctxt->sax->entityDecl(ctxt->userData, name, 5094 XML_EXTERNAL_PARAMETER_ENTITY, 5095 literal, URI, NULL); 5096 } 5097 xmlFreeURI(uri); 5098 } 5099 } 5100 } 5101 } else { 5102 if ((RAW == '"') || (RAW == '\'')) { 5103 value = xmlParseEntityValue(ctxt, &orig); 5104 if ((ctxt->sax != NULL) && 5105 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5106 ctxt->sax->entityDecl(ctxt->userData, name, 5107 XML_INTERNAL_GENERAL_ENTITY, 5108 NULL, NULL, value); 5109 /* 5110 * For expat compatibility in SAX mode. 5111 */ 5112 if ((ctxt->myDoc == NULL) || 5113 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 5114 if (ctxt->myDoc == NULL) { 5115 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5116 if (ctxt->myDoc == NULL) { 5117 xmlErrMemory(ctxt, "New Doc failed"); 5118 return; 5119 } 5120 ctxt->myDoc->properties = XML_DOC_INTERNAL; 5121 } 5122 if (ctxt->myDoc->intSubset == NULL) 5123 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5124 BAD_CAST "fake", NULL, NULL); 5125 5126 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY, 5127 NULL, NULL, value); 5128 } 5129 } else { 5130 URI = xmlParseExternalID(ctxt, &literal, 1); 5131 if ((URI == NULL) && (literal == NULL)) { 5132 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 5133 } 5134 if (URI) { 5135 xmlURIPtr uri; 5136 5137 uri = xmlParseURI((const char *)URI); 5138 if (uri == NULL) { 5139 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 5140 "Invalid URI: %s\n", URI); 5141 /* 5142 * This really ought to be a well formedness error 5143 * but the XML Core WG decided otherwise c.f. issue 5144 * E26 of the XML erratas. 5145 */ 5146 } else { 5147 if (uri->fragment != NULL) { 5148 /* 5149 * Okay this is foolish to block those but not 5150 * invalid URIs. 5151 */ 5152 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 5153 } 5154 xmlFreeURI(uri); 5155 } 5156 } 5157 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) { 5158 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5159 "Space required before 'NDATA'\n"); 5160 } 5161 SKIP_BLANKS; 5162 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) { 5163 SKIP(5); 5164 if (!IS_BLANK_CH(CUR)) { 5165 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5166 "Space required after 'NDATA'\n"); 5167 } 5168 SKIP_BLANKS; 5169 ndata = xmlParseName(ctxt); 5170 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5171 (ctxt->sax->unparsedEntityDecl != NULL)) 5172 ctxt->sax->unparsedEntityDecl(ctxt->userData, name, 5173 literal, URI, ndata); 5174 } else { 5175 if ((ctxt->sax != NULL) && 5176 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 5177 ctxt->sax->entityDecl(ctxt->userData, name, 5178 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5179 literal, URI, NULL); 5180 /* 5181 * For expat compatibility in SAX mode. 5182 * assuming the entity repalcement was asked for 5183 */ 5184 if ((ctxt->replaceEntities != 0) && 5185 ((ctxt->myDoc == NULL) || 5186 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) { 5187 if (ctxt->myDoc == NULL) { 5188 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 5189 if (ctxt->myDoc == NULL) { 5190 xmlErrMemory(ctxt, "New Doc failed"); 5191 return; 5192 } 5193 ctxt->myDoc->properties = XML_DOC_INTERNAL; 5194 } 5195 5196 if (ctxt->myDoc->intSubset == NULL) 5197 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 5198 BAD_CAST "fake", NULL, NULL); 5199 xmlSAX2EntityDecl(ctxt, name, 5200 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 5201 literal, URI, NULL); 5202 } 5203 } 5204 } 5205 } 5206 SKIP_BLANKS; 5207 if (RAW != '>') { 5208 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, 5209 "xmlParseEntityDecl: entity %s not terminated\n", name); 5210 } else { 5211 if (input != ctxt->input) { 5212 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5213 "Entity declaration doesn't start and stop in the same entity\n"); 5214 } 5215 NEXT; 5216 } 5217 if (orig != NULL) { 5218 /* 5219 * Ugly mechanism to save the raw entity value. 5220 */ 5221 xmlEntityPtr cur = NULL; 5222 5223 if (isParameter) { 5224 if ((ctxt->sax != NULL) && 5225 (ctxt->sax->getParameterEntity != NULL)) 5226 cur = ctxt->sax->getParameterEntity(ctxt->userData, name); 5227 } else { 5228 if ((ctxt->sax != NULL) && 5229 (ctxt->sax->getEntity != NULL)) 5230 cur = ctxt->sax->getEntity(ctxt->userData, name); 5231 if ((cur == NULL) && (ctxt->userData==ctxt)) { 5232 cur = xmlSAX2GetEntity(ctxt, name); 5233 } 5234 } 5235 if (cur != NULL) { 5236 if (cur->orig != NULL) 5237 xmlFree(orig); 5238 else 5239 cur->orig = orig; 5240 } else 5241 xmlFree(orig); 5242 } 5243 if (value != NULL) xmlFree(value); 5244 if (URI != NULL) xmlFree(URI); 5245 if (literal != NULL) xmlFree(literal); 5246 } 5247} 5248 5249/** 5250 * xmlParseDefaultDecl: 5251 * @ctxt: an XML parser context 5252 * @value: Receive a possible fixed default value for the attribute 5253 * 5254 * Parse an attribute default declaration 5255 * 5256 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) 5257 * 5258 * [ VC: Required Attribute ] 5259 * if the default declaration is the keyword #REQUIRED, then the 5260 * attribute must be specified for all elements of the type in the 5261 * attribute-list declaration. 5262 * 5263 * [ VC: Attribute Default Legal ] 5264 * The declared default value must meet the lexical constraints of 5265 * the declared attribute type c.f. xmlValidateAttributeDecl() 5266 * 5267 * [ VC: Fixed Attribute Default ] 5268 * if an attribute has a default value declared with the #FIXED 5269 * keyword, instances of that attribute must match the default value. 5270 * 5271 * [ WFC: No < in Attribute Values ] 5272 * handled in xmlParseAttValue() 5273 * 5274 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED 5275 * or XML_ATTRIBUTE_FIXED. 5276 */ 5277 5278int 5279xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { 5280 int val; 5281 xmlChar *ret; 5282 5283 *value = NULL; 5284 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) { 5285 SKIP(9); 5286 return(XML_ATTRIBUTE_REQUIRED); 5287 } 5288 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) { 5289 SKIP(8); 5290 return(XML_ATTRIBUTE_IMPLIED); 5291 } 5292 val = XML_ATTRIBUTE_NONE; 5293 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) { 5294 SKIP(6); 5295 val = XML_ATTRIBUTE_FIXED; 5296 if (!IS_BLANK_CH(CUR)) { 5297 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5298 "Space required after '#FIXED'\n"); 5299 } 5300 SKIP_BLANKS; 5301 } 5302 ret = xmlParseAttValue(ctxt); 5303 ctxt->instate = XML_PARSER_DTD; 5304 if (ret == NULL) { 5305 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo, 5306 "Attribute default value declaration error\n"); 5307 } else 5308 *value = ret; 5309 return(val); 5310} 5311 5312/** 5313 * xmlParseNotationType: 5314 * @ctxt: an XML parser context 5315 * 5316 * parse an Notation attribute type. 5317 * 5318 * Note: the leading 'NOTATION' S part has already being parsed... 5319 * 5320 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 5321 * 5322 * [ VC: Notation Attributes ] 5323 * Values of this type must match one of the notation names included 5324 * in the declaration; all notation names in the declaration must be declared. 5325 * 5326 * Returns: the notation attribute tree built while parsing 5327 */ 5328 5329xmlEnumerationPtr 5330xmlParseNotationType(xmlParserCtxtPtr ctxt) { 5331 const xmlChar *name; 5332 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5333 5334 if (RAW != '(') { 5335 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 5336 return(NULL); 5337 } 5338 SHRINK; 5339 do { 5340 NEXT; 5341 SKIP_BLANKS; 5342 name = xmlParseName(ctxt); 5343 if (name == NULL) { 5344 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5345 "Name expected in NOTATION declaration\n"); 5346 xmlFreeEnumeration(ret); 5347 return(NULL); 5348 } 5349 tmp = ret; 5350 while (tmp != NULL) { 5351 if (xmlStrEqual(name, tmp->name)) { 5352 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5353 "standalone: attribute notation value token %s duplicated\n", 5354 name, NULL); 5355 if (!xmlDictOwns(ctxt->dict, name)) 5356 xmlFree((xmlChar *) name); 5357 break; 5358 } 5359 tmp = tmp->next; 5360 } 5361 if (tmp == NULL) { 5362 cur = xmlCreateEnumeration(name); 5363 if (cur == NULL) { 5364 xmlFreeEnumeration(ret); 5365 return(NULL); 5366 } 5367 if (last == NULL) ret = last = cur; 5368 else { 5369 last->next = cur; 5370 last = cur; 5371 } 5372 } 5373 SKIP_BLANKS; 5374 } while (RAW == '|'); 5375 if (RAW != ')') { 5376 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 5377 xmlFreeEnumeration(ret); 5378 return(NULL); 5379 } 5380 NEXT; 5381 return(ret); 5382} 5383 5384/** 5385 * xmlParseEnumerationType: 5386 * @ctxt: an XML parser context 5387 * 5388 * parse an Enumeration attribute type. 5389 * 5390 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' 5391 * 5392 * [ VC: Enumeration ] 5393 * Values of this type must match one of the Nmtoken tokens in 5394 * the declaration 5395 * 5396 * Returns: the enumeration attribute tree built while parsing 5397 */ 5398 5399xmlEnumerationPtr 5400xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { 5401 xmlChar *name; 5402 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp; 5403 5404 if (RAW != '(') { 5405 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL); 5406 return(NULL); 5407 } 5408 SHRINK; 5409 do { 5410 NEXT; 5411 SKIP_BLANKS; 5412 name = xmlParseNmtoken(ctxt); 5413 if (name == NULL) { 5414 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL); 5415 return(ret); 5416 } 5417 tmp = ret; 5418 while (tmp != NULL) { 5419 if (xmlStrEqual(name, tmp->name)) { 5420 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN, 5421 "standalone: attribute enumeration value token %s duplicated\n", 5422 name, NULL); 5423 if (!xmlDictOwns(ctxt->dict, name)) 5424 xmlFree(name); 5425 break; 5426 } 5427 tmp = tmp->next; 5428 } 5429 if (tmp == NULL) { 5430 cur = xmlCreateEnumeration(name); 5431 if (!xmlDictOwns(ctxt->dict, name)) 5432 xmlFree(name); 5433 if (cur == NULL) { 5434 xmlFreeEnumeration(ret); 5435 return(NULL); 5436 } 5437 if (last == NULL) ret = last = cur; 5438 else { 5439 last->next = cur; 5440 last = cur; 5441 } 5442 } 5443 SKIP_BLANKS; 5444 } while (RAW == '|'); 5445 if (RAW != ')') { 5446 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL); 5447 return(ret); 5448 } 5449 NEXT; 5450 return(ret); 5451} 5452 5453/** 5454 * xmlParseEnumeratedType: 5455 * @ctxt: an XML parser context 5456 * @tree: the enumeration tree built while parsing 5457 * 5458 * parse an Enumerated attribute type. 5459 * 5460 * [57] EnumeratedType ::= NotationType | Enumeration 5461 * 5462 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 5463 * 5464 * 5465 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION 5466 */ 5467 5468int 5469xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 5470 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 5471 SKIP(8); 5472 if (!IS_BLANK_CH(CUR)) { 5473 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5474 "Space required after 'NOTATION'\n"); 5475 return(0); 5476 } 5477 SKIP_BLANKS; 5478 *tree = xmlParseNotationType(ctxt); 5479 if (*tree == NULL) return(0); 5480 return(XML_ATTRIBUTE_NOTATION); 5481 } 5482 *tree = xmlParseEnumerationType(ctxt); 5483 if (*tree == NULL) return(0); 5484 return(XML_ATTRIBUTE_ENUMERATION); 5485} 5486 5487/** 5488 * xmlParseAttributeType: 5489 * @ctxt: an XML parser context 5490 * @tree: the enumeration tree built while parsing 5491 * 5492 * parse the Attribute list def for an element 5493 * 5494 * [54] AttType ::= StringType | TokenizedType | EnumeratedType 5495 * 5496 * [55] StringType ::= 'CDATA' 5497 * 5498 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 5499 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' 5500 * 5501 * Validity constraints for attribute values syntax are checked in 5502 * xmlValidateAttributeValue() 5503 * 5504 * [ VC: ID ] 5505 * Values of type ID must match the Name production. A name must not 5506 * appear more than once in an XML document as a value of this type; 5507 * i.e., ID values must uniquely identify the elements which bear them. 5508 * 5509 * [ VC: One ID per Element Type ] 5510 * No element type may have more than one ID attribute specified. 5511 * 5512 * [ VC: ID Attribute Default ] 5513 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED. 5514 * 5515 * [ VC: IDREF ] 5516 * Values of type IDREF must match the Name production, and values 5517 * of type IDREFS must match Names; each IDREF Name must match the value 5518 * of an ID attribute on some element in the XML document; i.e. IDREF 5519 * values must match the value of some ID attribute. 5520 * 5521 * [ VC: Entity Name ] 5522 * Values of type ENTITY must match the Name production, values 5523 * of type ENTITIES must match Names; each Entity Name must match the 5524 * name of an unparsed entity declared in the DTD. 5525 * 5526 * [ VC: Name Token ] 5527 * Values of type NMTOKEN must match the Nmtoken production; values 5528 * of type NMTOKENS must match Nmtokens. 5529 * 5530 * Returns the attribute type 5531 */ 5532int 5533xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 5534 SHRINK; 5535 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) { 5536 SKIP(5); 5537 return(XML_ATTRIBUTE_CDATA); 5538 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) { 5539 SKIP(6); 5540 return(XML_ATTRIBUTE_IDREFS); 5541 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) { 5542 SKIP(5); 5543 return(XML_ATTRIBUTE_IDREF); 5544 } else if ((RAW == 'I') && (NXT(1) == 'D')) { 5545 SKIP(2); 5546 return(XML_ATTRIBUTE_ID); 5547 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) { 5548 SKIP(6); 5549 return(XML_ATTRIBUTE_ENTITY); 5550 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) { 5551 SKIP(8); 5552 return(XML_ATTRIBUTE_ENTITIES); 5553 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) { 5554 SKIP(8); 5555 return(XML_ATTRIBUTE_NMTOKENS); 5556 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) { 5557 SKIP(7); 5558 return(XML_ATTRIBUTE_NMTOKEN); 5559 } 5560 return(xmlParseEnumeratedType(ctxt, tree)); 5561} 5562 5563/** 5564 * xmlParseAttributeListDecl: 5565 * @ctxt: an XML parser context 5566 * 5567 * : parse the Attribute list def for an element 5568 * 5569 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 5570 * 5571 * [53] AttDef ::= S Name S AttType S DefaultDecl 5572 * 5573 */ 5574void 5575xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { 5576 const xmlChar *elemName; 5577 const xmlChar *attrName; 5578 xmlEnumerationPtr tree; 5579 5580 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) { 5581 xmlParserInputPtr input = ctxt->input; 5582 5583 SKIP(9); 5584 if (!IS_BLANK_CH(CUR)) { 5585 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5586 "Space required after '<!ATTLIST'\n"); 5587 } 5588 SKIP_BLANKS; 5589 elemName = xmlParseName(ctxt); 5590 if (elemName == NULL) { 5591 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5592 "ATTLIST: no name for Element\n"); 5593 return; 5594 } 5595 SKIP_BLANKS; 5596 GROW; 5597 while (RAW != '>') { 5598 const xmlChar *check = CUR_PTR; 5599 int type; 5600 int def; 5601 xmlChar *defaultValue = NULL; 5602 5603 GROW; 5604 tree = NULL; 5605 attrName = xmlParseName(ctxt); 5606 if (attrName == NULL) { 5607 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5608 "ATTLIST: no name for Attribute\n"); 5609 break; 5610 } 5611 GROW; 5612 if (!IS_BLANK_CH(CUR)) { 5613 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5614 "Space required after the attribute name\n"); 5615 break; 5616 } 5617 SKIP_BLANKS; 5618 5619 type = xmlParseAttributeType(ctxt, &tree); 5620 if (type <= 0) { 5621 break; 5622 } 5623 5624 GROW; 5625 if (!IS_BLANK_CH(CUR)) { 5626 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5627 "Space required after the attribute type\n"); 5628 if (tree != NULL) 5629 xmlFreeEnumeration(tree); 5630 break; 5631 } 5632 SKIP_BLANKS; 5633 5634 def = xmlParseDefaultDecl(ctxt, &defaultValue); 5635 if (def <= 0) { 5636 if (defaultValue != NULL) 5637 xmlFree(defaultValue); 5638 if (tree != NULL) 5639 xmlFreeEnumeration(tree); 5640 break; 5641 } 5642 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL)) 5643 xmlAttrNormalizeSpace(defaultValue, defaultValue); 5644 5645 GROW; 5646 if (RAW != '>') { 5647 if (!IS_BLANK_CH(CUR)) { 5648 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5649 "Space required after the attribute default value\n"); 5650 if (defaultValue != NULL) 5651 xmlFree(defaultValue); 5652 if (tree != NULL) 5653 xmlFreeEnumeration(tree); 5654 break; 5655 } 5656 SKIP_BLANKS; 5657 } 5658 if (check == CUR_PTR) { 5659 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 5660 "in xmlParseAttributeListDecl\n"); 5661 if (defaultValue != NULL) 5662 xmlFree(defaultValue); 5663 if (tree != NULL) 5664 xmlFreeEnumeration(tree); 5665 break; 5666 } 5667 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5668 (ctxt->sax->attributeDecl != NULL)) 5669 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName, 5670 type, def, defaultValue, tree); 5671 else if (tree != NULL) 5672 xmlFreeEnumeration(tree); 5673 5674 if ((ctxt->sax2) && (defaultValue != NULL) && 5675 (def != XML_ATTRIBUTE_IMPLIED) && 5676 (def != XML_ATTRIBUTE_REQUIRED)) { 5677 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue); 5678 } 5679 if (ctxt->sax2) { 5680 xmlAddSpecialAttr(ctxt, elemName, attrName, type); 5681 } 5682 if (defaultValue != NULL) 5683 xmlFree(defaultValue); 5684 GROW; 5685 } 5686 if (RAW == '>') { 5687 if (input != ctxt->input) { 5688 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 5689 "Attribute list declaration doesn't start and stop in the same entity\n", 5690 NULL, NULL); 5691 } 5692 NEXT; 5693 } 5694 } 5695} 5696 5697/** 5698 * xmlParseElementMixedContentDecl: 5699 * @ctxt: an XML parser context 5700 * @inputchk: the input used for the current entity, needed for boundary checks 5701 * 5702 * parse the declaration for a Mixed Element content 5703 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 5704 * 5705 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | 5706 * '(' S? '#PCDATA' S? ')' 5707 * 5708 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49]) 5709 * 5710 * [ VC: No Duplicate Types ] 5711 * The same name must not appear more than once in a single 5712 * mixed-content declaration. 5713 * 5714 * returns: the list of the xmlElementContentPtr describing the element choices 5715 */ 5716xmlElementContentPtr 5717xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 5718 xmlElementContentPtr ret = NULL, cur = NULL, n; 5719 const xmlChar *elem = NULL; 5720 5721 GROW; 5722 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 5723 SKIP(7); 5724 SKIP_BLANKS; 5725 SHRINK; 5726 if (RAW == ')') { 5727 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 5728 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 5729"Element content declaration doesn't start and stop in the same entity\n", 5730 NULL, NULL); 5731 } 5732 NEXT; 5733 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 5734 if (ret == NULL) 5735 return(NULL); 5736 if (RAW == '*') { 5737 ret->ocur = XML_ELEMENT_CONTENT_MULT; 5738 NEXT; 5739 } 5740 return(ret); 5741 } 5742 if ((RAW == '(') || (RAW == '|')) { 5743 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 5744 if (ret == NULL) return(NULL); 5745 } 5746 while (RAW == '|') { 5747 NEXT; 5748 if (elem == NULL) { 5749 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 5750 if (ret == NULL) return(NULL); 5751 ret->c1 = cur; 5752 if (cur != NULL) 5753 cur->parent = ret; 5754 cur = ret; 5755 } else { 5756 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 5757 if (n == NULL) return(NULL); 5758 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 5759 if (n->c1 != NULL) 5760 n->c1->parent = n; 5761 cur->c2 = n; 5762 if (n != NULL) 5763 n->parent = cur; 5764 cur = n; 5765 } 5766 SKIP_BLANKS; 5767 elem = xmlParseName(ctxt); 5768 if (elem == NULL) { 5769 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5770 "xmlParseElementMixedContentDecl : Name expected\n"); 5771 xmlFreeDocElementContent(ctxt->myDoc, cur); 5772 return(NULL); 5773 } 5774 SKIP_BLANKS; 5775 GROW; 5776 } 5777 if ((RAW == ')') && (NXT(1) == '*')) { 5778 if (elem != NULL) { 5779 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem, 5780 XML_ELEMENT_CONTENT_ELEMENT); 5781 if (cur->c2 != NULL) 5782 cur->c2->parent = cur; 5783 } 5784 if (ret != NULL) 5785 ret->ocur = XML_ELEMENT_CONTENT_MULT; 5786 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 5787 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 5788"Element content declaration doesn't start and stop in the same entity\n", 5789 NULL, NULL); 5790 } 5791 SKIP(2); 5792 } else { 5793 xmlFreeDocElementContent(ctxt->myDoc, ret); 5794 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL); 5795 return(NULL); 5796 } 5797 5798 } else { 5799 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL); 5800 } 5801 return(ret); 5802} 5803 5804/** 5805 * xmlParseElementChildrenContentDeclPriv: 5806 * @ctxt: an XML parser context 5807 * @inputchk: the input used for the current entity, needed for boundary checks 5808 * @depth: the level of recursion 5809 * 5810 * parse the declaration for a Mixed Element content 5811 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 5812 * 5813 * 5814 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 5815 * 5816 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 5817 * 5818 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 5819 * 5820 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 5821 * 5822 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 5823 * TODO Parameter-entity replacement text must be properly nested 5824 * with parenthesized groups. That is to say, if either of the 5825 * opening or closing parentheses in a choice, seq, or Mixed 5826 * construct is contained in the replacement text for a parameter 5827 * entity, both must be contained in the same replacement text. For 5828 * interoperability, if a parameter-entity reference appears in a 5829 * choice, seq, or Mixed construct, its replacement text should not 5830 * be empty, and neither the first nor last non-blank character of 5831 * the replacement text should be a connector (| or ,). 5832 * 5833 * Returns the tree of xmlElementContentPtr describing the element 5834 * hierarchy. 5835 */ 5836static xmlElementContentPtr 5837xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk, 5838 int depth) { 5839 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL; 5840 const xmlChar *elem; 5841 xmlChar type = 0; 5842 5843 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) || 5844 (depth > 2048)) { 5845 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, 5846"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n", 5847 depth); 5848 return(NULL); 5849 } 5850 SKIP_BLANKS; 5851 GROW; 5852 if (RAW == '(') { 5853 int inputid = ctxt->input->id; 5854 5855 /* Recurse on first child */ 5856 NEXT; 5857 SKIP_BLANKS; 5858 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 5859 depth + 1); 5860 SKIP_BLANKS; 5861 GROW; 5862 } else { 5863 elem = xmlParseName(ctxt); 5864 if (elem == NULL) { 5865 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 5866 return(NULL); 5867 } 5868 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 5869 if (cur == NULL) { 5870 xmlErrMemory(ctxt, NULL); 5871 return(NULL); 5872 } 5873 GROW; 5874 if (RAW == '?') { 5875 cur->ocur = XML_ELEMENT_CONTENT_OPT; 5876 NEXT; 5877 } else if (RAW == '*') { 5878 cur->ocur = XML_ELEMENT_CONTENT_MULT; 5879 NEXT; 5880 } else if (RAW == '+') { 5881 cur->ocur = XML_ELEMENT_CONTENT_PLUS; 5882 NEXT; 5883 } else { 5884 cur->ocur = XML_ELEMENT_CONTENT_ONCE; 5885 } 5886 GROW; 5887 } 5888 SKIP_BLANKS; 5889 SHRINK; 5890 while (RAW != ')') { 5891 /* 5892 * Each loop we parse one separator and one element. 5893 */ 5894 if (RAW == ',') { 5895 if (type == 0) type = CUR; 5896 5897 /* 5898 * Detect "Name | Name , Name" error 5899 */ 5900 else if (type != CUR) { 5901 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 5902 "xmlParseElementChildrenContentDecl : '%c' expected\n", 5903 type); 5904 if ((last != NULL) && (last != ret)) 5905 xmlFreeDocElementContent(ctxt->myDoc, last); 5906 if (ret != NULL) 5907 xmlFreeDocElementContent(ctxt->myDoc, ret); 5908 return(NULL); 5909 } 5910 NEXT; 5911 5912 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ); 5913 if (op == NULL) { 5914 if ((last != NULL) && (last != ret)) 5915 xmlFreeDocElementContent(ctxt->myDoc, last); 5916 xmlFreeDocElementContent(ctxt->myDoc, ret); 5917 return(NULL); 5918 } 5919 if (last == NULL) { 5920 op->c1 = ret; 5921 if (ret != NULL) 5922 ret->parent = op; 5923 ret = cur = op; 5924 } else { 5925 cur->c2 = op; 5926 if (op != NULL) 5927 op->parent = cur; 5928 op->c1 = last; 5929 if (last != NULL) 5930 last->parent = op; 5931 cur =op; 5932 last = NULL; 5933 } 5934 } else if (RAW == '|') { 5935 if (type == 0) type = CUR; 5936 5937 /* 5938 * Detect "Name , Name | Name" error 5939 */ 5940 else if (type != CUR) { 5941 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 5942 "xmlParseElementChildrenContentDecl : '%c' expected\n", 5943 type); 5944 if ((last != NULL) && (last != ret)) 5945 xmlFreeDocElementContent(ctxt->myDoc, last); 5946 if (ret != NULL) 5947 xmlFreeDocElementContent(ctxt->myDoc, ret); 5948 return(NULL); 5949 } 5950 NEXT; 5951 5952 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 5953 if (op == NULL) { 5954 if ((last != NULL) && (last != ret)) 5955 xmlFreeDocElementContent(ctxt->myDoc, last); 5956 if (ret != NULL) 5957 xmlFreeDocElementContent(ctxt->myDoc, ret); 5958 return(NULL); 5959 } 5960 if (last == NULL) { 5961 op->c1 = ret; 5962 if (ret != NULL) 5963 ret->parent = op; 5964 ret = cur = op; 5965 } else { 5966 cur->c2 = op; 5967 if (op != NULL) 5968 op->parent = cur; 5969 op->c1 = last; 5970 if (last != NULL) 5971 last->parent = op; 5972 cur =op; 5973 last = NULL; 5974 } 5975 } else { 5976 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL); 5977 if ((last != NULL) && (last != ret)) 5978 xmlFreeDocElementContent(ctxt->myDoc, last); 5979 if (ret != NULL) 5980 xmlFreeDocElementContent(ctxt->myDoc, ret); 5981 return(NULL); 5982 } 5983 GROW; 5984 SKIP_BLANKS; 5985 GROW; 5986 if (RAW == '(') { 5987 int inputid = ctxt->input->id; 5988 /* Recurse on second child */ 5989 NEXT; 5990 SKIP_BLANKS; 5991 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 5992 depth + 1); 5993 SKIP_BLANKS; 5994 } else { 5995 elem = xmlParseName(ctxt); 5996 if (elem == NULL) { 5997 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 5998 if (ret != NULL) 5999 xmlFreeDocElementContent(ctxt->myDoc, ret); 6000 return(NULL); 6001 } 6002 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 6003 if (last == NULL) { 6004 if (ret != NULL) 6005 xmlFreeDocElementContent(ctxt->myDoc, ret); 6006 return(NULL); 6007 } 6008 if (RAW == '?') { 6009 last->ocur = XML_ELEMENT_CONTENT_OPT; 6010 NEXT; 6011 } else if (RAW == '*') { 6012 last->ocur = XML_ELEMENT_CONTENT_MULT; 6013 NEXT; 6014 } else if (RAW == '+') { 6015 last->ocur = XML_ELEMENT_CONTENT_PLUS; 6016 NEXT; 6017 } else { 6018 last->ocur = XML_ELEMENT_CONTENT_ONCE; 6019 } 6020 } 6021 SKIP_BLANKS; 6022 GROW; 6023 } 6024 if ((cur != NULL) && (last != NULL)) { 6025 cur->c2 = last; 6026 if (last != NULL) 6027 last->parent = cur; 6028 } 6029 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 6030 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6031"Element content declaration doesn't start and stop in the same entity\n", 6032 NULL, NULL); 6033 } 6034 NEXT; 6035 if (RAW == '?') { 6036 if (ret != NULL) { 6037 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) || 6038 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6039 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6040 else 6041 ret->ocur = XML_ELEMENT_CONTENT_OPT; 6042 } 6043 NEXT; 6044 } else if (RAW == '*') { 6045 if (ret != NULL) { 6046 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6047 cur = ret; 6048 /* 6049 * Some normalization: 6050 * (a | b* | c?)* == (a | b | c)* 6051 */ 6052 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6053 if ((cur->c1 != NULL) && 6054 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6055 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) 6056 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6057 if ((cur->c2 != NULL) && 6058 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6059 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) 6060 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6061 cur = cur->c2; 6062 } 6063 } 6064 NEXT; 6065 } else if (RAW == '+') { 6066 if (ret != NULL) { 6067 int found = 0; 6068 6069 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) || 6070 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 6071 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6072 else 6073 ret->ocur = XML_ELEMENT_CONTENT_PLUS; 6074 /* 6075 * Some normalization: 6076 * (a | b*)+ == (a | b)* 6077 * (a | b?)+ == (a | b)* 6078 */ 6079 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 6080 if ((cur->c1 != NULL) && 6081 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 6082 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) { 6083 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 6084 found = 1; 6085 } 6086 if ((cur->c2 != NULL) && 6087 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 6088 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) { 6089 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 6090 found = 1; 6091 } 6092 cur = cur->c2; 6093 } 6094 if (found) 6095 ret->ocur = XML_ELEMENT_CONTENT_MULT; 6096 } 6097 NEXT; 6098 } 6099 return(ret); 6100} 6101 6102/** 6103 * xmlParseElementChildrenContentDecl: 6104 * @ctxt: an XML parser context 6105 * @inputchk: the input used for the current entity, needed for boundary checks 6106 * 6107 * parse the declaration for a Mixed Element content 6108 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 6109 * 6110 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 6111 * 6112 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 6113 * 6114 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 6115 * 6116 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 6117 * 6118 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 6119 * TODO Parameter-entity replacement text must be properly nested 6120 * with parenthesized groups. That is to say, if either of the 6121 * opening or closing parentheses in a choice, seq, or Mixed 6122 * construct is contained in the replacement text for a parameter 6123 * entity, both must be contained in the same replacement text. For 6124 * interoperability, if a parameter-entity reference appears in a 6125 * choice, seq, or Mixed construct, its replacement text should not 6126 * be empty, and neither the first nor last non-blank character of 6127 * the replacement text should be a connector (| or ,). 6128 * 6129 * Returns the tree of xmlElementContentPtr describing the element 6130 * hierarchy. 6131 */ 6132xmlElementContentPtr 6133xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 6134 /* stub left for API/ABI compat */ 6135 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1)); 6136} 6137 6138/** 6139 * xmlParseElementContentDecl: 6140 * @ctxt: an XML parser context 6141 * @name: the name of the element being defined. 6142 * @result: the Element Content pointer will be stored here if any 6143 * 6144 * parse the declaration for an Element content either Mixed or Children, 6145 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl 6146 * 6147 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 6148 * 6149 * returns: the type of element content XML_ELEMENT_TYPE_xxx 6150 */ 6151 6152int 6153xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name, 6154 xmlElementContentPtr *result) { 6155 6156 xmlElementContentPtr tree = NULL; 6157 int inputid = ctxt->input->id; 6158 int res; 6159 6160 *result = NULL; 6161 6162 if (RAW != '(') { 6163 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6164 "xmlParseElementContentDecl : %s '(' expected\n", name); 6165 return(-1); 6166 } 6167 NEXT; 6168 GROW; 6169 SKIP_BLANKS; 6170 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 6171 tree = xmlParseElementMixedContentDecl(ctxt, inputid); 6172 res = XML_ELEMENT_TYPE_MIXED; 6173 } else { 6174 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1); 6175 res = XML_ELEMENT_TYPE_ELEMENT; 6176 } 6177 SKIP_BLANKS; 6178 *result = tree; 6179 return(res); 6180} 6181 6182/** 6183 * xmlParseElementDecl: 6184 * @ctxt: an XML parser context 6185 * 6186 * parse an Element declaration. 6187 * 6188 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' 6189 * 6190 * [ VC: Unique Element Type Declaration ] 6191 * No element type may be declared more than once 6192 * 6193 * Returns the type of the element, or -1 in case of error 6194 */ 6195int 6196xmlParseElementDecl(xmlParserCtxtPtr ctxt) { 6197 const xmlChar *name; 6198 int ret = -1; 6199 xmlElementContentPtr content = NULL; 6200 6201 /* GROW; done in the caller */ 6202 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) { 6203 xmlParserInputPtr input = ctxt->input; 6204 6205 SKIP(9); 6206 if (!IS_BLANK_CH(CUR)) { 6207 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6208 "Space required after 'ELEMENT'\n"); 6209 } 6210 SKIP_BLANKS; 6211 name = xmlParseName(ctxt); 6212 if (name == NULL) { 6213 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6214 "xmlParseElementDecl: no name for Element\n"); 6215 return(-1); 6216 } 6217 while ((RAW == 0) && (ctxt->inputNr > 1)) 6218 xmlPopInput(ctxt); 6219 if (!IS_BLANK_CH(CUR)) { 6220 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6221 "Space required after the element name\n"); 6222 } 6223 SKIP_BLANKS; 6224 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) { 6225 SKIP(5); 6226 /* 6227 * Element must always be empty. 6228 */ 6229 ret = XML_ELEMENT_TYPE_EMPTY; 6230 } else if ((RAW == 'A') && (NXT(1) == 'N') && 6231 (NXT(2) == 'Y')) { 6232 SKIP(3); 6233 /* 6234 * Element is a generic container. 6235 */ 6236 ret = XML_ELEMENT_TYPE_ANY; 6237 } else if (RAW == '(') { 6238 ret = xmlParseElementContentDecl(ctxt, name, &content); 6239 } else { 6240 /* 6241 * [ WFC: PEs in Internal Subset ] error handling. 6242 */ 6243 if ((RAW == '%') && (ctxt->external == 0) && 6244 (ctxt->inputNr == 1)) { 6245 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET, 6246 "PEReference: forbidden within markup decl in internal subset\n"); 6247 } else { 6248 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 6249 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n"); 6250 } 6251 return(-1); 6252 } 6253 6254 SKIP_BLANKS; 6255 /* 6256 * Pop-up of finished entities. 6257 */ 6258 while ((RAW == 0) && (ctxt->inputNr > 1)) 6259 xmlPopInput(ctxt); 6260 SKIP_BLANKS; 6261 6262 if (RAW != '>') { 6263 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 6264 if (content != NULL) { 6265 xmlFreeDocElementContent(ctxt->myDoc, content); 6266 } 6267 } else { 6268 if (input != ctxt->input) { 6269 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 6270 "Element declaration doesn't start and stop in the same entity\n"); 6271 } 6272 6273 NEXT; 6274 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 6275 (ctxt->sax->elementDecl != NULL)) { 6276 if (content != NULL) 6277 content->parent = NULL; 6278 ctxt->sax->elementDecl(ctxt->userData, name, ret, 6279 content); 6280 if ((content != NULL) && (content->parent == NULL)) { 6281 /* 6282 * this is a trick: if xmlAddElementDecl is called, 6283 * instead of copying the full tree it is plugged directly 6284 * if called from the parser. Avoid duplicating the 6285 * interfaces or change the API/ABI 6286 */ 6287 xmlFreeDocElementContent(ctxt->myDoc, content); 6288 } 6289 } else if (content != NULL) { 6290 xmlFreeDocElementContent(ctxt->myDoc, content); 6291 } 6292 } 6293 } 6294 return(ret); 6295} 6296 6297/** 6298 * xmlParseConditionalSections 6299 * @ctxt: an XML parser context 6300 * 6301 * [61] conditionalSect ::= includeSect | ignoreSect 6302 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' 6303 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' 6304 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* 6305 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) 6306 */ 6307 6308static void 6309xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { 6310 int id = ctxt->input->id; 6311 6312 SKIP(3); 6313 SKIP_BLANKS; 6314 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) { 6315 SKIP(7); 6316 SKIP_BLANKS; 6317 if (RAW != '[') { 6318 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6319 } else { 6320 if (ctxt->input->id != id) { 6321 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6322 "All markup of the conditional section is not in the same entity\n", 6323 NULL, NULL); 6324 } 6325 NEXT; 6326 } 6327 if (xmlParserDebugEntities) { 6328 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6329 xmlGenericError(xmlGenericErrorContext, 6330 "%s(%d): ", ctxt->input->filename, 6331 ctxt->input->line); 6332 xmlGenericError(xmlGenericErrorContext, 6333 "Entering INCLUDE Conditional Section\n"); 6334 } 6335 6336 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || 6337 (NXT(2) != '>'))) { 6338 const xmlChar *check = CUR_PTR; 6339 unsigned int cons = ctxt->input->consumed; 6340 6341 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6342 xmlParseConditionalSections(ctxt); 6343 } else if (IS_BLANK_CH(CUR)) { 6344 NEXT; 6345 } else if (RAW == '%') { 6346 xmlParsePEReference(ctxt); 6347 } else 6348 xmlParseMarkupDecl(ctxt); 6349 6350 /* 6351 * Pop-up of finished entities. 6352 */ 6353 while ((RAW == 0) && (ctxt->inputNr > 1)) 6354 xmlPopInput(ctxt); 6355 6356 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 6357 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6358 break; 6359 } 6360 } 6361 if (xmlParserDebugEntities) { 6362 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6363 xmlGenericError(xmlGenericErrorContext, 6364 "%s(%d): ", ctxt->input->filename, 6365 ctxt->input->line); 6366 xmlGenericError(xmlGenericErrorContext, 6367 "Leaving INCLUDE Conditional Section\n"); 6368 } 6369 6370 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) { 6371 int state; 6372 xmlParserInputState instate; 6373 int depth = 0; 6374 6375 SKIP(6); 6376 SKIP_BLANKS; 6377 if (RAW != '[') { 6378 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 6379 } else { 6380 if (ctxt->input->id != id) { 6381 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6382 "All markup of the conditional section is not in the same entity\n", 6383 NULL, NULL); 6384 } 6385 NEXT; 6386 } 6387 if (xmlParserDebugEntities) { 6388 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6389 xmlGenericError(xmlGenericErrorContext, 6390 "%s(%d): ", ctxt->input->filename, 6391 ctxt->input->line); 6392 xmlGenericError(xmlGenericErrorContext, 6393 "Entering IGNORE Conditional Section\n"); 6394 } 6395 6396 /* 6397 * Parse up to the end of the conditional section 6398 * But disable SAX event generating DTD building in the meantime 6399 */ 6400 state = ctxt->disableSAX; 6401 instate = ctxt->instate; 6402 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 6403 ctxt->instate = XML_PARSER_IGNORE; 6404 6405 while ((depth >= 0) && (RAW != 0)) { 6406 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6407 depth++; 6408 SKIP(3); 6409 continue; 6410 } 6411 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { 6412 if (--depth >= 0) SKIP(3); 6413 continue; 6414 } 6415 NEXT; 6416 continue; 6417 } 6418 6419 ctxt->disableSAX = state; 6420 ctxt->instate = instate; 6421 6422 if (xmlParserDebugEntities) { 6423 if ((ctxt->input != NULL) && (ctxt->input->filename)) 6424 xmlGenericError(xmlGenericErrorContext, 6425 "%s(%d): ", ctxt->input->filename, 6426 ctxt->input->line); 6427 xmlGenericError(xmlGenericErrorContext, 6428 "Leaving IGNORE Conditional Section\n"); 6429 } 6430 6431 } else { 6432 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL); 6433 } 6434 6435 if (RAW == 0) 6436 SHRINK; 6437 6438 if (RAW == 0) { 6439 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL); 6440 } else { 6441 if (ctxt->input->id != id) { 6442 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 6443 "All markup of the conditional section is not in the same entity\n", 6444 NULL, NULL); 6445 } 6446 SKIP(3); 6447 } 6448} 6449 6450/** 6451 * xmlParseMarkupDecl: 6452 * @ctxt: an XML parser context 6453 * 6454 * parse Markup declarations 6455 * 6456 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | 6457 * NotationDecl | PI | Comment 6458 * 6459 * [ VC: Proper Declaration/PE Nesting ] 6460 * Parameter-entity replacement text must be properly nested with 6461 * markup declarations. That is to say, if either the first character 6462 * or the last character of a markup declaration (markupdecl above) is 6463 * contained in the replacement text for a parameter-entity reference, 6464 * both must be contained in the same replacement text. 6465 * 6466 * [ WFC: PEs in Internal Subset ] 6467 * In the internal DTD subset, parameter-entity references can occur 6468 * only where markup declarations can occur, not within markup declarations. 6469 * (This does not apply to references that occur in external parameter 6470 * entities or to the external subset.) 6471 */ 6472void 6473xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { 6474 GROW; 6475 if (CUR == '<') { 6476 if (NXT(1) == '!') { 6477 switch (NXT(2)) { 6478 case 'E': 6479 if (NXT(3) == 'L') 6480 xmlParseElementDecl(ctxt); 6481 else if (NXT(3) == 'N') 6482 xmlParseEntityDecl(ctxt); 6483 break; 6484 case 'A': 6485 xmlParseAttributeListDecl(ctxt); 6486 break; 6487 case 'N': 6488 xmlParseNotationDecl(ctxt); 6489 break; 6490 case '-': 6491 xmlParseComment(ctxt); 6492 break; 6493 default: 6494 /* there is an error but it will be detected later */ 6495 break; 6496 } 6497 } else if (NXT(1) == '?') { 6498 xmlParsePI(ctxt); 6499 } 6500 } 6501 /* 6502 * This is only for internal subset. On external entities, 6503 * the replacement is done before parsing stage 6504 */ 6505 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 6506 xmlParsePEReference(ctxt); 6507 6508 /* 6509 * Conditional sections are allowed from entities included 6510 * by PE References in the internal subset. 6511 */ 6512 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) { 6513 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6514 xmlParseConditionalSections(ctxt); 6515 } 6516 } 6517 6518 ctxt->instate = XML_PARSER_DTD; 6519} 6520 6521/** 6522 * xmlParseTextDecl: 6523 * @ctxt: an XML parser context 6524 * 6525 * parse an XML declaration header for external entities 6526 * 6527 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 6528 */ 6529 6530void 6531xmlParseTextDecl(xmlParserCtxtPtr ctxt) { 6532 xmlChar *version; 6533 const xmlChar *encoding; 6534 6535 /* 6536 * We know that '<?xml' is here. 6537 */ 6538 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 6539 SKIP(5); 6540 } else { 6541 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL); 6542 return; 6543 } 6544 6545 if (!IS_BLANK_CH(CUR)) { 6546 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6547 "Space needed after '<?xml'\n"); 6548 } 6549 SKIP_BLANKS; 6550 6551 /* 6552 * We may have the VersionInfo here. 6553 */ 6554 version = xmlParseVersionInfo(ctxt); 6555 if (version == NULL) 6556 version = xmlCharStrdup(XML_DEFAULT_VERSION); 6557 else { 6558 if (!IS_BLANK_CH(CUR)) { 6559 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 6560 "Space needed here\n"); 6561 } 6562 } 6563 ctxt->input->version = version; 6564 6565 /* 6566 * We must have the encoding declaration 6567 */ 6568 encoding = xmlParseEncodingDecl(ctxt); 6569 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 6570 /* 6571 * The XML REC instructs us to stop parsing right here 6572 */ 6573 return; 6574 } 6575 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) { 6576 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING, 6577 "Missing encoding in text declaration\n"); 6578 } 6579 6580 SKIP_BLANKS; 6581 if ((RAW == '?') && (NXT(1) == '>')) { 6582 SKIP(2); 6583 } else if (RAW == '>') { 6584 /* Deprecated old WD ... */ 6585 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 6586 NEXT; 6587 } else { 6588 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 6589 MOVETO_ENDTAG(CUR_PTR); 6590 NEXT; 6591 } 6592} 6593 6594/** 6595 * xmlParseExternalSubset: 6596 * @ctxt: an XML parser context 6597 * @ExternalID: the external identifier 6598 * @SystemID: the system identifier (or URL) 6599 * 6600 * parse Markup declarations from an external subset 6601 * 6602 * [30] extSubset ::= textDecl? extSubsetDecl 6603 * 6604 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) * 6605 */ 6606void 6607xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, 6608 const xmlChar *SystemID) { 6609 xmlDetectSAX2(ctxt); 6610 GROW; 6611 6612 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) && 6613 (ctxt->input->end - ctxt->input->cur >= 4)) { 6614 xmlChar start[4]; 6615 xmlCharEncoding enc; 6616 6617 start[0] = RAW; 6618 start[1] = NXT(1); 6619 start[2] = NXT(2); 6620 start[3] = NXT(3); 6621 enc = xmlDetectCharEncoding(start, 4); 6622 if (enc != XML_CHAR_ENCODING_NONE) 6623 xmlSwitchEncoding(ctxt, enc); 6624 } 6625 6626 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) { 6627 xmlParseTextDecl(ctxt); 6628 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 6629 /* 6630 * The XML REC instructs us to stop parsing right here 6631 */ 6632 ctxt->instate = XML_PARSER_EOF; 6633 return; 6634 } 6635 } 6636 if (ctxt->myDoc == NULL) { 6637 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 6638 if (ctxt->myDoc == NULL) { 6639 xmlErrMemory(ctxt, "New Doc failed"); 6640 return; 6641 } 6642 ctxt->myDoc->properties = XML_DOC_INTERNAL; 6643 } 6644 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL)) 6645 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID); 6646 6647 ctxt->instate = XML_PARSER_DTD; 6648 ctxt->external = 1; 6649 while (((RAW == '<') && (NXT(1) == '?')) || 6650 ((RAW == '<') && (NXT(1) == '!')) || 6651 (RAW == '%') || IS_BLANK_CH(CUR)) { 6652 const xmlChar *check = CUR_PTR; 6653 unsigned int cons = ctxt->input->consumed; 6654 6655 GROW; 6656 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 6657 xmlParseConditionalSections(ctxt); 6658 } else if (IS_BLANK_CH(CUR)) { 6659 NEXT; 6660 } else if (RAW == '%') { 6661 xmlParsePEReference(ctxt); 6662 } else 6663 xmlParseMarkupDecl(ctxt); 6664 6665 /* 6666 * Pop-up of finished entities. 6667 */ 6668 while ((RAW == 0) && (ctxt->inputNr > 1)) 6669 xmlPopInput(ctxt); 6670 6671 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 6672 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6673 break; 6674 } 6675 } 6676 6677 if (RAW != 0) { 6678 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 6679 } 6680 6681} 6682 6683/** 6684 * xmlParseReference: 6685 * @ctxt: an XML parser context 6686 * 6687 * parse and handle entity references in content, depending on the SAX 6688 * interface, this may end-up in a call to character() if this is a 6689 * CharRef, a predefined entity, if there is no reference() callback. 6690 * or if the parser was asked to switch to that mode. 6691 * 6692 * [67] Reference ::= EntityRef | CharRef 6693 */ 6694void 6695xmlParseReference(xmlParserCtxtPtr ctxt) { 6696 xmlEntityPtr ent; 6697 xmlChar *val; 6698 int was_checked; 6699 xmlNodePtr list = NULL; 6700 xmlParserErrors ret = XML_ERR_OK; 6701 6702 6703 if (RAW != '&') 6704 return; 6705 6706 /* 6707 * Simple case of a CharRef 6708 */ 6709 if (NXT(1) == '#') { 6710 int i = 0; 6711 xmlChar out[10]; 6712 int hex = NXT(2); 6713 int value = xmlParseCharRef(ctxt); 6714 6715 if (value == 0) 6716 return; 6717 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) { 6718 /* 6719 * So we are using non-UTF-8 buffers 6720 * Check that the char fit on 8bits, if not 6721 * generate a CharRef. 6722 */ 6723 if (value <= 0xFF) { 6724 out[0] = value; 6725 out[1] = 0; 6726 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 6727 (!ctxt->disableSAX)) 6728 ctxt->sax->characters(ctxt->userData, out, 1); 6729 } else { 6730 if ((hex == 'x') || (hex == 'X')) 6731 snprintf((char *)out, sizeof(out), "#x%X", value); 6732 else 6733 snprintf((char *)out, sizeof(out), "#%d", value); 6734 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 6735 (!ctxt->disableSAX)) 6736 ctxt->sax->reference(ctxt->userData, out); 6737 } 6738 } else { 6739 /* 6740 * Just encode the value in UTF-8 6741 */ 6742 COPY_BUF(0 ,out, i, value); 6743 out[i] = 0; 6744 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 6745 (!ctxt->disableSAX)) 6746 ctxt->sax->characters(ctxt->userData, out, i); 6747 } 6748 return; 6749 } 6750 6751 /* 6752 * We are seeing an entity reference 6753 */ 6754 ent = xmlParseEntityRef(ctxt); 6755 if (ent == NULL) return; 6756 if (!ctxt->wellFormed) 6757 return; 6758 was_checked = ent->checked; 6759 6760 /* special case of predefined entities */ 6761 if ((ent->name == NULL) || 6762 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 6763 val = ent->content; 6764 if (val == NULL) return; 6765 /* 6766 * inline the entity. 6767 */ 6768 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 6769 (!ctxt->disableSAX)) 6770 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); 6771 return; 6772 } 6773 6774 /* 6775 * The first reference to the entity trigger a parsing phase 6776 * where the ent->children is filled with the result from 6777 * the parsing. 6778 */ 6779 if (ent->checked == 0) { 6780 unsigned long oldnbent = ctxt->nbentities; 6781 6782 /* 6783 * This is a bit hackish but this seems the best 6784 * way to make sure both SAX and DOM entity support 6785 * behaves okay. 6786 */ 6787 void *user_data; 6788 if (ctxt->userData == ctxt) 6789 user_data = NULL; 6790 else 6791 user_data = ctxt->userData; 6792 6793 /* 6794 * Check that this entity is well formed 6795 * 4.3.2: An internal general parsed entity is well-formed 6796 * if its replacement text matches the production labeled 6797 * content. 6798 */ 6799 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 6800 ctxt->depth++; 6801 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content, 6802 user_data, &list); 6803 ctxt->depth--; 6804 6805 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 6806 ctxt->depth++; 6807 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax, 6808 user_data, ctxt->depth, ent->URI, 6809 ent->ExternalID, &list); 6810 ctxt->depth--; 6811 } else { 6812 ret = XML_ERR_ENTITY_PE_INTERNAL; 6813 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 6814 "invalid entity type found\n", NULL); 6815 } 6816 6817 /* 6818 * Store the number of entities needing parsing for this entity 6819 * content and do checkings 6820 */ 6821 ent->checked = ctxt->nbentities - oldnbent; 6822 if (ret == XML_ERR_ENTITY_LOOP) { 6823 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 6824 xmlFreeNodeList(list); 6825 return; 6826 } 6827 if (xmlParserEntityCheck(ctxt, 0, ent)) { 6828 xmlFreeNodeList(list); 6829 return; 6830 } 6831 6832 if ((ret == XML_ERR_OK) && (list != NULL)) { 6833 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) || 6834 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&& 6835 (ent->children == NULL)) { 6836 ent->children = list; 6837 if (ctxt->replaceEntities) { 6838 /* 6839 * Prune it directly in the generated document 6840 * except for single text nodes. 6841 */ 6842 if (((list->type == XML_TEXT_NODE) && 6843 (list->next == NULL)) || 6844 (ctxt->parseMode == XML_PARSE_READER)) { 6845 list->parent = (xmlNodePtr) ent; 6846 list = NULL; 6847 ent->owner = 1; 6848 } else { 6849 ent->owner = 0; 6850 while (list != NULL) { 6851 list->parent = (xmlNodePtr) ctxt->node; 6852 list->doc = ctxt->myDoc; 6853 if (list->next == NULL) 6854 ent->last = list; 6855 list = list->next; 6856 } 6857 list = ent->children; 6858#ifdef LIBXML_LEGACY_ENABLED 6859 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 6860 xmlAddEntityReference(ent, list, NULL); 6861#endif /* LIBXML_LEGACY_ENABLED */ 6862 } 6863 } else { 6864 ent->owner = 1; 6865 while (list != NULL) { 6866 list->parent = (xmlNodePtr) ent; 6867 if (list->next == NULL) 6868 ent->last = list; 6869 list = list->next; 6870 } 6871 } 6872 } else { 6873 xmlFreeNodeList(list); 6874 list = NULL; 6875 } 6876 } else if ((ret != XML_ERR_OK) && 6877 (ret != XML_WAR_UNDECLARED_ENTITY)) { 6878 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 6879 "Entity '%s' failed to parse\n", ent->name); 6880 } else if (list != NULL) { 6881 xmlFreeNodeList(list); 6882 list = NULL; 6883 } 6884 if (ent->checked == 0) 6885 ent->checked = 1; 6886 } else if (ent->checked != 1) { 6887 ctxt->nbentities += ent->checked; 6888 } 6889 6890 /* 6891 * Now that the entity content has been gathered 6892 * provide it to the application, this can take different forms based 6893 * on the parsing modes. 6894 */ 6895 if (ent->children == NULL) { 6896 /* 6897 * Probably running in SAX mode and the callbacks don't 6898 * build the entity content. So unless we already went 6899 * though parsing for first checking go though the entity 6900 * content to generate callbacks associated to the entity 6901 */ 6902 if (was_checked != 0) { 6903 void *user_data; 6904 /* 6905 * This is a bit hackish but this seems the best 6906 * way to make sure both SAX and DOM entity support 6907 * behaves okay. 6908 */ 6909 if (ctxt->userData == ctxt) 6910 user_data = NULL; 6911 else 6912 user_data = ctxt->userData; 6913 6914 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 6915 ctxt->depth++; 6916 ret = xmlParseBalancedChunkMemoryInternal(ctxt, 6917 ent->content, user_data, NULL); 6918 ctxt->depth--; 6919 } else if (ent->etype == 6920 XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 6921 ctxt->depth++; 6922 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, 6923 ctxt->sax, user_data, ctxt->depth, 6924 ent->URI, ent->ExternalID, NULL); 6925 ctxt->depth--; 6926 } else { 6927 ret = XML_ERR_ENTITY_PE_INTERNAL; 6928 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 6929 "invalid entity type found\n", NULL); 6930 } 6931 if (ret == XML_ERR_ENTITY_LOOP) { 6932 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 6933 return; 6934 } 6935 } 6936 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 6937 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 6938 /* 6939 * Entity reference callback comes second, it's somewhat 6940 * superfluous but a compatibility to historical behaviour 6941 */ 6942 ctxt->sax->reference(ctxt->userData, ent->name); 6943 } 6944 return; 6945 } 6946 6947 /* 6948 * If we didn't get any children for the entity being built 6949 */ 6950 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 6951 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 6952 /* 6953 * Create a node. 6954 */ 6955 ctxt->sax->reference(ctxt->userData, ent->name); 6956 return; 6957 } 6958 6959 if ((ctxt->replaceEntities) || (ent->children == NULL)) { 6960 /* 6961 * There is a problem on the handling of _private for entities 6962 * (bug 155816): Should we copy the content of the field from 6963 * the entity (possibly overwriting some value set by the user 6964 * when a copy is created), should we leave it alone, or should 6965 * we try to take care of different situations? The problem 6966 * is exacerbated by the usage of this field by the xmlReader. 6967 * To fix this bug, we look at _private on the created node 6968 * and, if it's NULL, we copy in whatever was in the entity. 6969 * If it's not NULL we leave it alone. This is somewhat of a 6970 * hack - maybe we should have further tests to determine 6971 * what to do. 6972 */ 6973 if ((ctxt->node != NULL) && (ent->children != NULL)) { 6974 /* 6975 * Seems we are generating the DOM content, do 6976 * a simple tree copy for all references except the first 6977 * In the first occurrence list contains the replacement. 6978 * progressive == 2 means we are operating on the Reader 6979 * and since nodes are discarded we must copy all the time. 6980 */ 6981 if (((list == NULL) && (ent->owner == 0)) || 6982 (ctxt->parseMode == XML_PARSE_READER)) { 6983 xmlNodePtr nw = NULL, cur, firstChild = NULL; 6984 6985 /* 6986 * when operating on a reader, the entities definitions 6987 * are always owning the entities subtree. 6988 if (ctxt->parseMode == XML_PARSE_READER) 6989 ent->owner = 1; 6990 */ 6991 6992 cur = ent->children; 6993 while (cur != NULL) { 6994 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 6995 if (nw != NULL) { 6996 if (nw->_private == NULL) 6997 nw->_private = cur->_private; 6998 if (firstChild == NULL){ 6999 firstChild = nw; 7000 } 7001 nw = xmlAddChild(ctxt->node, nw); 7002 } 7003 if (cur == ent->last) { 7004 /* 7005 * needed to detect some strange empty 7006 * node cases in the reader tests 7007 */ 7008 if ((ctxt->parseMode == XML_PARSE_READER) && 7009 (nw != NULL) && 7010 (nw->type == XML_ELEMENT_NODE) && 7011 (nw->children == NULL)) 7012 nw->extra = 1; 7013 7014 break; 7015 } 7016 cur = cur->next; 7017 } 7018#ifdef LIBXML_LEGACY_ENABLED 7019 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7020 xmlAddEntityReference(ent, firstChild, nw); 7021#endif /* LIBXML_LEGACY_ENABLED */ 7022 } else if (list == NULL) { 7023 xmlNodePtr nw = NULL, cur, next, last, 7024 firstChild = NULL; 7025 /* 7026 * Copy the entity child list and make it the new 7027 * entity child list. The goal is to make sure any 7028 * ID or REF referenced will be the one from the 7029 * document content and not the entity copy. 7030 */ 7031 cur = ent->children; 7032 ent->children = NULL; 7033 last = ent->last; 7034 ent->last = NULL; 7035 while (cur != NULL) { 7036 next = cur->next; 7037 cur->next = NULL; 7038 cur->parent = NULL; 7039 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 7040 if (nw != NULL) { 7041 if (nw->_private == NULL) 7042 nw->_private = cur->_private; 7043 if (firstChild == NULL){ 7044 firstChild = cur; 7045 } 7046 xmlAddChild((xmlNodePtr) ent, nw); 7047 xmlAddChild(ctxt->node, cur); 7048 } 7049 if (cur == last) 7050 break; 7051 cur = next; 7052 } 7053 if (ent->owner == 0) 7054 ent->owner = 1; 7055#ifdef LIBXML_LEGACY_ENABLED 7056 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 7057 xmlAddEntityReference(ent, firstChild, nw); 7058#endif /* LIBXML_LEGACY_ENABLED */ 7059 } else { 7060 const xmlChar *nbktext; 7061 7062 /* 7063 * the name change is to avoid coalescing of the 7064 * node with a possible previous text one which 7065 * would make ent->children a dangling pointer 7066 */ 7067 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext", 7068 -1); 7069 if (ent->children->type == XML_TEXT_NODE) 7070 ent->children->name = nbktext; 7071 if ((ent->last != ent->children) && 7072 (ent->last->type == XML_TEXT_NODE)) 7073 ent->last->name = nbktext; 7074 xmlAddChildList(ctxt->node, ent->children); 7075 } 7076 7077 /* 7078 * This is to avoid a nasty side effect, see 7079 * characters() in SAX.c 7080 */ 7081 ctxt->nodemem = 0; 7082 ctxt->nodelen = 0; 7083 return; 7084 } 7085 } 7086} 7087 7088/** 7089 * xmlParseEntityRef: 7090 * @ctxt: an XML parser context 7091 * 7092 * parse ENTITY references declarations 7093 * 7094 * [68] EntityRef ::= '&' Name ';' 7095 * 7096 * [ WFC: Entity Declared ] 7097 * In a document without any DTD, a document with only an internal DTD 7098 * subset which contains no parameter entity references, or a document 7099 * with "standalone='yes'", the Name given in the entity reference 7100 * must match that in an entity declaration, except that well-formed 7101 * documents need not declare any of the following entities: amp, lt, 7102 * gt, apos, quot. The declaration of a parameter entity must precede 7103 * any reference to it. Similarly, the declaration of a general entity 7104 * must precede any reference to it which appears in a default value in an 7105 * attribute-list declaration. Note that if entities are declared in the 7106 * external subset or in external parameter entities, a non-validating 7107 * processor is not obligated to read and process their declarations; 7108 * for such documents, the rule that an entity must be declared is a 7109 * well-formedness constraint only if standalone='yes'. 7110 * 7111 * [ WFC: Parsed Entity ] 7112 * An entity reference must not contain the name of an unparsed entity 7113 * 7114 * Returns the xmlEntityPtr if found, or NULL otherwise. 7115 */ 7116xmlEntityPtr 7117xmlParseEntityRef(xmlParserCtxtPtr ctxt) { 7118 const xmlChar *name; 7119 xmlEntityPtr ent = NULL; 7120 7121 GROW; 7122 7123 if (RAW != '&') 7124 return(NULL); 7125 NEXT; 7126 name = xmlParseName(ctxt); 7127 if (name == NULL) { 7128 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7129 "xmlParseEntityRef: no name\n"); 7130 return(NULL); 7131 } 7132 if (RAW != ';') { 7133 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7134 return(NULL); 7135 } 7136 NEXT; 7137 7138 /* 7139 * Predefined entites override any extra definition 7140 */ 7141 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { 7142 ent = xmlGetPredefinedEntity(name); 7143 if (ent != NULL) 7144 return(ent); 7145 } 7146 7147 /* 7148 * Increate the number of entity references parsed 7149 */ 7150 ctxt->nbentities++; 7151 7152 /* 7153 * Ask first SAX for entity resolution, otherwise try the 7154 * entities which may have stored in the parser context. 7155 */ 7156 if (ctxt->sax != NULL) { 7157 if (ctxt->sax->getEntity != NULL) 7158 ent = ctxt->sax->getEntity(ctxt->userData, name); 7159 if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7160 (ctxt->options & XML_PARSE_OLDSAX)) 7161 ent = xmlGetPredefinedEntity(name); 7162 if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 7163 (ctxt->userData==ctxt)) { 7164 ent = xmlSAX2GetEntity(ctxt, name); 7165 } 7166 } 7167 /* 7168 * [ WFC: Entity Declared ] 7169 * In a document without any DTD, a document with only an 7170 * internal DTD subset which contains no parameter entity 7171 * references, or a document with "standalone='yes'", the 7172 * Name given in the entity reference must match that in an 7173 * entity declaration, except that well-formed documents 7174 * need not declare any of the following entities: amp, lt, 7175 * gt, apos, quot. 7176 * The declaration of a parameter entity must precede any 7177 * reference to it. 7178 * Similarly, the declaration of a general entity must 7179 * precede any reference to it which appears in a default 7180 * value in an attribute-list declaration. Note that if 7181 * entities are declared in the external subset or in 7182 * external parameter entities, a non-validating processor 7183 * is not obligated to read and process their declarations; 7184 * for such documents, the rule that an entity must be 7185 * declared is a well-formedness constraint only if 7186 * standalone='yes'. 7187 */ 7188 if (ent == NULL) { 7189 if ((ctxt->standalone == 1) || 7190 ((ctxt->hasExternalSubset == 0) && 7191 (ctxt->hasPErefs == 0))) { 7192 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7193 "Entity '%s' not defined\n", name); 7194 } else { 7195 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7196 "Entity '%s' not defined\n", name); 7197 if ((ctxt->inSubset == 0) && 7198 (ctxt->sax != NULL) && 7199 (ctxt->sax->reference != NULL)) { 7200 ctxt->sax->reference(ctxt->userData, name); 7201 } 7202 } 7203 ctxt->valid = 0; 7204 } 7205 7206 /* 7207 * [ WFC: Parsed Entity ] 7208 * An entity reference must not contain the name of an 7209 * unparsed entity 7210 */ 7211 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7212 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7213 "Entity reference to unparsed entity %s\n", name); 7214 } 7215 7216 /* 7217 * [ WFC: No External Entity References ] 7218 * Attribute values cannot contain direct or indirect 7219 * entity references to external entities. 7220 */ 7221 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7222 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7223 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7224 "Attribute references external entity '%s'\n", name); 7225 } 7226 /* 7227 * [ WFC: No < in Attribute Values ] 7228 * The replacement text of any entity referred to directly or 7229 * indirectly in an attribute value (other than "<") must 7230 * not contain a <. 7231 */ 7232 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7233 (ent != NULL) && (ent->content != NULL) && 7234 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 7235 (xmlStrchr(ent->content, '<'))) { 7236 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7237 "'<' in entity '%s' is not allowed in attributes values\n", name); 7238 } 7239 7240 /* 7241 * Internal check, no parameter entities here ... 7242 */ 7243 else { 7244 switch (ent->etype) { 7245 case XML_INTERNAL_PARAMETER_ENTITY: 7246 case XML_EXTERNAL_PARAMETER_ENTITY: 7247 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 7248 "Attempt to reference the parameter entity '%s'\n", 7249 name); 7250 break; 7251 default: 7252 break; 7253 } 7254 } 7255 7256 /* 7257 * [ WFC: No Recursion ] 7258 * A parsed entity must not contain a recursive reference 7259 * to itself, either directly or indirectly. 7260 * Done somewhere else 7261 */ 7262 return(ent); 7263} 7264 7265/** 7266 * xmlParseStringEntityRef: 7267 * @ctxt: an XML parser context 7268 * @str: a pointer to an index in the string 7269 * 7270 * parse ENTITY references declarations, but this version parses it from 7271 * a string value. 7272 * 7273 * [68] EntityRef ::= '&' Name ';' 7274 * 7275 * [ WFC: Entity Declared ] 7276 * In a document without any DTD, a document with only an internal DTD 7277 * subset which contains no parameter entity references, or a document 7278 * with "standalone='yes'", the Name given in the entity reference 7279 * must match that in an entity declaration, except that well-formed 7280 * documents need not declare any of the following entities: amp, lt, 7281 * gt, apos, quot. The declaration of a parameter entity must precede 7282 * any reference to it. Similarly, the declaration of a general entity 7283 * must precede any reference to it which appears in a default value in an 7284 * attribute-list declaration. Note that if entities are declared in the 7285 * external subset or in external parameter entities, a non-validating 7286 * processor is not obligated to read and process their declarations; 7287 * for such documents, the rule that an entity must be declared is a 7288 * well-formedness constraint only if standalone='yes'. 7289 * 7290 * [ WFC: Parsed Entity ] 7291 * An entity reference must not contain the name of an unparsed entity 7292 * 7293 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer 7294 * is updated to the current location in the string. 7295 */ 7296static xmlEntityPtr 7297xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { 7298 xmlChar *name; 7299 const xmlChar *ptr; 7300 xmlChar cur; 7301 xmlEntityPtr ent = NULL; 7302 7303 if ((str == NULL) || (*str == NULL)) 7304 return(NULL); 7305 ptr = *str; 7306 cur = *ptr; 7307 if (cur != '&') 7308 return(NULL); 7309 7310 ptr++; 7311 name = xmlParseStringName(ctxt, &ptr); 7312 if (name == NULL) { 7313 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7314 "xmlParseStringEntityRef: no name\n"); 7315 *str = ptr; 7316 return(NULL); 7317 } 7318 if (*ptr != ';') { 7319 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7320 xmlFree(name); 7321 *str = ptr; 7322 return(NULL); 7323 } 7324 ptr++; 7325 7326 7327 /* 7328 * Predefined entites override any extra definition 7329 */ 7330 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) { 7331 ent = xmlGetPredefinedEntity(name); 7332 if (ent != NULL) { 7333 xmlFree(name); 7334 *str = ptr; 7335 return(ent); 7336 } 7337 } 7338 7339 /* 7340 * Increate the number of entity references parsed 7341 */ 7342 ctxt->nbentities++; 7343 7344 /* 7345 * Ask first SAX for entity resolution, otherwise try the 7346 * entities which may have stored in the parser context. 7347 */ 7348 if (ctxt->sax != NULL) { 7349 if (ctxt->sax->getEntity != NULL) 7350 ent = ctxt->sax->getEntity(ctxt->userData, name); 7351 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX)) 7352 ent = xmlGetPredefinedEntity(name); 7353 if ((ent == NULL) && (ctxt->userData==ctxt)) { 7354 ent = xmlSAX2GetEntity(ctxt, name); 7355 } 7356 } 7357 7358 /* 7359 * [ WFC: Entity Declared ] 7360 * In a document without any DTD, a document with only an 7361 * internal DTD subset which contains no parameter entity 7362 * references, or a document with "standalone='yes'", the 7363 * Name given in the entity reference must match that in an 7364 * entity declaration, except that well-formed documents 7365 * need not declare any of the following entities: amp, lt, 7366 * gt, apos, quot. 7367 * The declaration of a parameter entity must precede any 7368 * reference to it. 7369 * Similarly, the declaration of a general entity must 7370 * precede any reference to it which appears in a default 7371 * value in an attribute-list declaration. Note that if 7372 * entities are declared in the external subset or in 7373 * external parameter entities, a non-validating processor 7374 * is not obligated to read and process their declarations; 7375 * for such documents, the rule that an entity must be 7376 * declared is a well-formedness constraint only if 7377 * standalone='yes'. 7378 */ 7379 if (ent == NULL) { 7380 if ((ctxt->standalone == 1) || 7381 ((ctxt->hasExternalSubset == 0) && 7382 (ctxt->hasPErefs == 0))) { 7383 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7384 "Entity '%s' not defined\n", name); 7385 } else { 7386 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 7387 "Entity '%s' not defined\n", 7388 name); 7389 } 7390 /* TODO ? check regressions ctxt->valid = 0; */ 7391 } 7392 7393 /* 7394 * [ WFC: Parsed Entity ] 7395 * An entity reference must not contain the name of an 7396 * unparsed entity 7397 */ 7398 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 7399 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 7400 "Entity reference to unparsed entity %s\n", name); 7401 } 7402 7403 /* 7404 * [ WFC: No External Entity References ] 7405 * Attribute values cannot contain direct or indirect 7406 * entity references to external entities. 7407 */ 7408 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7409 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 7410 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 7411 "Attribute references external entity '%s'\n", name); 7412 } 7413 /* 7414 * [ WFC: No < in Attribute Values ] 7415 * The replacement text of any entity referred to directly or 7416 * indirectly in an attribute value (other than "<") must 7417 * not contain a <. 7418 */ 7419 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 7420 (ent != NULL) && (ent->content != NULL) && 7421 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 7422 (xmlStrchr(ent->content, '<'))) { 7423 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 7424 "'<' in entity '%s' is not allowed in attributes values\n", 7425 name); 7426 } 7427 7428 /* 7429 * Internal check, no parameter entities here ... 7430 */ 7431 else { 7432 switch (ent->etype) { 7433 case XML_INTERNAL_PARAMETER_ENTITY: 7434 case XML_EXTERNAL_PARAMETER_ENTITY: 7435 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 7436 "Attempt to reference the parameter entity '%s'\n", 7437 name); 7438 break; 7439 default: 7440 break; 7441 } 7442 } 7443 7444 /* 7445 * [ WFC: No Recursion ] 7446 * A parsed entity must not contain a recursive reference 7447 * to itself, either directly or indirectly. 7448 * Done somewhere else 7449 */ 7450 7451 xmlFree(name); 7452 *str = ptr; 7453 return(ent); 7454} 7455 7456/** 7457 * xmlParsePEReference: 7458 * @ctxt: an XML parser context 7459 * 7460 * parse PEReference declarations 7461 * The entity content is handled directly by pushing it's content as 7462 * a new input stream. 7463 * 7464 * [69] PEReference ::= '%' Name ';' 7465 * 7466 * [ WFC: No Recursion ] 7467 * A parsed entity must not contain a recursive 7468 * reference to itself, either directly or indirectly. 7469 * 7470 * [ WFC: Entity Declared ] 7471 * In a document without any DTD, a document with only an internal DTD 7472 * subset which contains no parameter entity references, or a document 7473 * with "standalone='yes'", ... ... The declaration of a parameter 7474 * entity must precede any reference to it... 7475 * 7476 * [ VC: Entity Declared ] 7477 * In a document with an external subset or external parameter entities 7478 * with "standalone='no'", ... ... The declaration of a parameter entity 7479 * must precede any reference to it... 7480 * 7481 * [ WFC: In DTD ] 7482 * Parameter-entity references may only appear in the DTD. 7483 * NOTE: misleading but this is handled. 7484 */ 7485void 7486xmlParsePEReference(xmlParserCtxtPtr ctxt) 7487{ 7488 const xmlChar *name; 7489 xmlEntityPtr entity = NULL; 7490 xmlParserInputPtr input; 7491 7492 if (RAW != '%') 7493 return; 7494 NEXT; 7495 name = xmlParseName(ctxt); 7496 if (name == NULL) { 7497 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7498 "xmlParsePEReference: no name\n"); 7499 return; 7500 } 7501 if (RAW != ';') { 7502 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7503 return; 7504 } 7505 7506 NEXT; 7507 7508 /* 7509 * Increate the number of entity references parsed 7510 */ 7511 ctxt->nbentities++; 7512 7513 /* 7514 * Request the entity from SAX 7515 */ 7516 if ((ctxt->sax != NULL) && 7517 (ctxt->sax->getParameterEntity != NULL)) 7518 entity = ctxt->sax->getParameterEntity(ctxt->userData, 7519 name); 7520 if (entity == NULL) { 7521 /* 7522 * [ WFC: Entity Declared ] 7523 * In a document without any DTD, a document with only an 7524 * internal DTD subset which contains no parameter entity 7525 * references, or a document with "standalone='yes'", ... 7526 * ... The declaration of a parameter entity must precede 7527 * any reference to it... 7528 */ 7529 if ((ctxt->standalone == 1) || 7530 ((ctxt->hasExternalSubset == 0) && 7531 (ctxt->hasPErefs == 0))) { 7532 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7533 "PEReference: %%%s; not found\n", 7534 name); 7535 } else { 7536 /* 7537 * [ VC: Entity Declared ] 7538 * In a document with an external subset or external 7539 * parameter entities with "standalone='no'", ... 7540 * ... The declaration of a parameter entity must 7541 * precede any reference to it... 7542 */ 7543 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7544 "PEReference: %%%s; not found\n", 7545 name, NULL); 7546 ctxt->valid = 0; 7547 } 7548 } else { 7549 /* 7550 * Internal checking in case the entity quest barfed 7551 */ 7552 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 7553 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 7554 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7555 "Internal: %%%s; is not a parameter entity\n", 7556 name, NULL); 7557 } else if (ctxt->input->free != deallocblankswrapper) { 7558 input = xmlNewBlanksWrapperInputStream(ctxt, entity); 7559 if (xmlPushInput(ctxt, input) < 0) 7560 return; 7561 } else { 7562 /* 7563 * TODO !!! 7564 * handle the extra spaces added before and after 7565 * c.f. http://www.w3.org/TR/REC-xml#as-PE 7566 */ 7567 input = xmlNewEntityInputStream(ctxt, entity); 7568 if (xmlPushInput(ctxt, input) < 0) 7569 return; 7570 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 7571 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && 7572 (IS_BLANK_CH(NXT(5)))) { 7573 xmlParseTextDecl(ctxt); 7574 if (ctxt->errNo == 7575 XML_ERR_UNSUPPORTED_ENCODING) { 7576 /* 7577 * The XML REC instructs us to stop parsing 7578 * right here 7579 */ 7580 ctxt->instate = XML_PARSER_EOF; 7581 return; 7582 } 7583 } 7584 } 7585 } 7586 ctxt->hasPErefs = 1; 7587} 7588 7589/** 7590 * xmlLoadEntityContent: 7591 * @ctxt: an XML parser context 7592 * @entity: an unloaded system entity 7593 * 7594 * Load the original content of the given system entity from the 7595 * ExternalID/SystemID given. This is to be used for Included in Literal 7596 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references 7597 * 7598 * Returns 0 in case of success and -1 in case of failure 7599 */ 7600static int 7601xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 7602 xmlParserInputPtr input; 7603 xmlBufferPtr buf; 7604 int l, c; 7605 int count = 0; 7606 7607 if ((ctxt == NULL) || (entity == NULL) || 7608 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) && 7609 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) || 7610 (entity->content != NULL)) { 7611 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 7612 "xmlLoadEntityContent parameter error"); 7613 return(-1); 7614 } 7615 7616 if (xmlParserDebugEntities) 7617 xmlGenericError(xmlGenericErrorContext, 7618 "Reading %s entity content input\n", entity->name); 7619 7620 buf = xmlBufferCreate(); 7621 if (buf == NULL) { 7622 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 7623 "xmlLoadEntityContent parameter error"); 7624 return(-1); 7625 } 7626 7627 input = xmlNewEntityInputStream(ctxt, entity); 7628 if (input == NULL) { 7629 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 7630 "xmlLoadEntityContent input error"); 7631 xmlBufferFree(buf); 7632 return(-1); 7633 } 7634 7635 /* 7636 * Push the entity as the current input, read char by char 7637 * saving to the buffer until the end of the entity or an error 7638 */ 7639 if (xmlPushInput(ctxt, input) < 0) { 7640 xmlBufferFree(buf); 7641 return(-1); 7642 } 7643 7644 GROW; 7645 c = CUR_CHAR(l); 7646 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) && 7647 (IS_CHAR(c))) { 7648 xmlBufferAdd(buf, ctxt->input->cur, l); 7649 if (count++ > 100) { 7650 count = 0; 7651 GROW; 7652 } 7653 NEXTL(l); 7654 c = CUR_CHAR(l); 7655 } 7656 7657 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) { 7658 xmlPopInput(ctxt); 7659 } else if (!IS_CHAR(c)) { 7660 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 7661 "xmlLoadEntityContent: invalid char value %d\n", 7662 c); 7663 xmlBufferFree(buf); 7664 return(-1); 7665 } 7666 entity->content = buf->content; 7667 buf->content = NULL; 7668 xmlBufferFree(buf); 7669 7670 return(0); 7671} 7672 7673/** 7674 * xmlParseStringPEReference: 7675 * @ctxt: an XML parser context 7676 * @str: a pointer to an index in the string 7677 * 7678 * parse PEReference declarations 7679 * 7680 * [69] PEReference ::= '%' Name ';' 7681 * 7682 * [ WFC: No Recursion ] 7683 * A parsed entity must not contain a recursive 7684 * reference to itself, either directly or indirectly. 7685 * 7686 * [ WFC: Entity Declared ] 7687 * In a document without any DTD, a document with only an internal DTD 7688 * subset which contains no parameter entity references, or a document 7689 * with "standalone='yes'", ... ... The declaration of a parameter 7690 * entity must precede any reference to it... 7691 * 7692 * [ VC: Entity Declared ] 7693 * In a document with an external subset or external parameter entities 7694 * with "standalone='no'", ... ... The declaration of a parameter entity 7695 * must precede any reference to it... 7696 * 7697 * [ WFC: In DTD ] 7698 * Parameter-entity references may only appear in the DTD. 7699 * NOTE: misleading but this is handled. 7700 * 7701 * Returns the string of the entity content. 7702 * str is updated to the current value of the index 7703 */ 7704static xmlEntityPtr 7705xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { 7706 const xmlChar *ptr; 7707 xmlChar cur; 7708 xmlChar *name; 7709 xmlEntityPtr entity = NULL; 7710 7711 if ((str == NULL) || (*str == NULL)) return(NULL); 7712 ptr = *str; 7713 cur = *ptr; 7714 if (cur != '%') 7715 return(NULL); 7716 ptr++; 7717 name = xmlParseStringName(ctxt, &ptr); 7718 if (name == NULL) { 7719 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7720 "xmlParseStringPEReference: no name\n"); 7721 *str = ptr; 7722 return(NULL); 7723 } 7724 cur = *ptr; 7725 if (cur != ';') { 7726 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 7727 xmlFree(name); 7728 *str = ptr; 7729 return(NULL); 7730 } 7731 ptr++; 7732 7733 /* 7734 * Increate the number of entity references parsed 7735 */ 7736 ctxt->nbentities++; 7737 7738 /* 7739 * Request the entity from SAX 7740 */ 7741 if ((ctxt->sax != NULL) && 7742 (ctxt->sax->getParameterEntity != NULL)) 7743 entity = ctxt->sax->getParameterEntity(ctxt->userData, 7744 name); 7745 if (entity == NULL) { 7746 /* 7747 * [ WFC: Entity Declared ] 7748 * In a document without any DTD, a document with only an 7749 * internal DTD subset which contains no parameter entity 7750 * references, or a document with "standalone='yes'", ... 7751 * ... The declaration of a parameter entity must precede 7752 * any reference to it... 7753 */ 7754 if ((ctxt->standalone == 1) || 7755 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) { 7756 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 7757 "PEReference: %%%s; not found\n", name); 7758 } else { 7759 /* 7760 * [ VC: Entity Declared ] 7761 * In a document with an external subset or external 7762 * parameter entities with "standalone='no'", ... 7763 * ... The declaration of a parameter entity must 7764 * precede any reference to it... 7765 */ 7766 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7767 "PEReference: %%%s; not found\n", 7768 name, NULL); 7769 ctxt->valid = 0; 7770 } 7771 } else { 7772 /* 7773 * Internal checking in case the entity quest barfed 7774 */ 7775 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 7776 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 7777 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 7778 "%%%s; is not a parameter entity\n", 7779 name, NULL); 7780 } 7781 } 7782 ctxt->hasPErefs = 1; 7783 xmlFree(name); 7784 *str = ptr; 7785 return(entity); 7786} 7787 7788/** 7789 * xmlParseDocTypeDecl: 7790 * @ctxt: an XML parser context 7791 * 7792 * parse a DOCTYPE declaration 7793 * 7794 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 7795 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 7796 * 7797 * [ VC: Root Element Type ] 7798 * The Name in the document type declaration must match the element 7799 * type of the root element. 7800 */ 7801 7802void 7803xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { 7804 const xmlChar *name = NULL; 7805 xmlChar *ExternalID = NULL; 7806 xmlChar *URI = NULL; 7807 7808 /* 7809 * We know that '<!DOCTYPE' has been detected. 7810 */ 7811 SKIP(9); 7812 7813 SKIP_BLANKS; 7814 7815 /* 7816 * Parse the DOCTYPE name. 7817 */ 7818 name = xmlParseName(ctxt); 7819 if (name == NULL) { 7820 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7821 "xmlParseDocTypeDecl : no DOCTYPE name !\n"); 7822 } 7823 ctxt->intSubName = name; 7824 7825 SKIP_BLANKS; 7826 7827 /* 7828 * Check for SystemID and ExternalID 7829 */ 7830 URI = xmlParseExternalID(ctxt, &ExternalID, 1); 7831 7832 if ((URI != NULL) || (ExternalID != NULL)) { 7833 ctxt->hasExternalSubset = 1; 7834 } 7835 ctxt->extSubURI = URI; 7836 ctxt->extSubSystem = ExternalID; 7837 7838 SKIP_BLANKS; 7839 7840 /* 7841 * Create and update the internal subset. 7842 */ 7843 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && 7844 (!ctxt->disableSAX)) 7845 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); 7846 7847 /* 7848 * Is there any internal subset declarations ? 7849 * they are handled separately in xmlParseInternalSubset() 7850 */ 7851 if (RAW == '[') 7852 return; 7853 7854 /* 7855 * We should be at the end of the DOCTYPE declaration. 7856 */ 7857 if (RAW != '>') { 7858 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 7859 } 7860 NEXT; 7861} 7862 7863/** 7864 * xmlParseInternalSubset: 7865 * @ctxt: an XML parser context 7866 * 7867 * parse the internal subset declaration 7868 * 7869 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 7870 */ 7871 7872static void 7873xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { 7874 /* 7875 * Is there any DTD definition ? 7876 */ 7877 if (RAW == '[') { 7878 ctxt->instate = XML_PARSER_DTD; 7879 NEXT; 7880 /* 7881 * Parse the succession of Markup declarations and 7882 * PEReferences. 7883 * Subsequence (markupdecl | PEReference | S)* 7884 */ 7885 while (RAW != ']') { 7886 const xmlChar *check = CUR_PTR; 7887 unsigned int cons = ctxt->input->consumed; 7888 7889 SKIP_BLANKS; 7890 xmlParseMarkupDecl(ctxt); 7891 xmlParsePEReference(ctxt); 7892 7893 /* 7894 * Pop-up of finished entities. 7895 */ 7896 while ((RAW == 0) && (ctxt->inputNr > 1)) 7897 xmlPopInput(ctxt); 7898 7899 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 7900 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 7901 "xmlParseInternalSubset: error detected in Markup declaration\n"); 7902 break; 7903 } 7904 } 7905 if (RAW == ']') { 7906 NEXT; 7907 SKIP_BLANKS; 7908 } 7909 } 7910 7911 /* 7912 * We should be at the end of the DOCTYPE declaration. 7913 */ 7914 if (RAW != '>') { 7915 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 7916 } 7917 NEXT; 7918} 7919 7920#ifdef LIBXML_SAX1_ENABLED 7921/** 7922 * xmlParseAttribute: 7923 * @ctxt: an XML parser context 7924 * @value: a xmlChar ** used to store the value of the attribute 7925 * 7926 * parse an attribute 7927 * 7928 * [41] Attribute ::= Name Eq AttValue 7929 * 7930 * [ WFC: No External Entity References ] 7931 * Attribute values cannot contain direct or indirect entity references 7932 * to external entities. 7933 * 7934 * [ WFC: No < in Attribute Values ] 7935 * The replacement text of any entity referred to directly or indirectly in 7936 * an attribute value (other than "<") must not contain a <. 7937 * 7938 * [ VC: Attribute Value Type ] 7939 * The attribute must have been declared; the value must be of the type 7940 * declared for it. 7941 * 7942 * [25] Eq ::= S? '=' S? 7943 * 7944 * With namespace: 7945 * 7946 * [NS 11] Attribute ::= QName Eq AttValue 7947 * 7948 * Also the case QName == xmlns:??? is handled independently as a namespace 7949 * definition. 7950 * 7951 * Returns the attribute name, and the value in *value. 7952 */ 7953 7954const xmlChar * 7955xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { 7956 const xmlChar *name; 7957 xmlChar *val; 7958 7959 *value = NULL; 7960 GROW; 7961 name = xmlParseName(ctxt); 7962 if (name == NULL) { 7963 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7964 "error parsing attribute name\n"); 7965 return(NULL); 7966 } 7967 7968 /* 7969 * read the value 7970 */ 7971 SKIP_BLANKS; 7972 if (RAW == '=') { 7973 NEXT; 7974 SKIP_BLANKS; 7975 val = xmlParseAttValue(ctxt); 7976 ctxt->instate = XML_PARSER_CONTENT; 7977 } else { 7978 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 7979 "Specification mandate value for attribute %s\n", name); 7980 return(NULL); 7981 } 7982 7983 /* 7984 * Check that xml:lang conforms to the specification 7985 * No more registered as an error, just generate a warning now 7986 * since this was deprecated in XML second edition 7987 */ 7988 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) { 7989 if (!xmlCheckLanguageID(val)) { 7990 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 7991 "Malformed value for xml:lang : %s\n", 7992 val, NULL); 7993 } 7994 } 7995 7996 /* 7997 * Check that xml:space conforms to the specification 7998 */ 7999 if (xmlStrEqual(name, BAD_CAST "xml:space")) { 8000 if (xmlStrEqual(val, BAD_CAST "default")) 8001 *(ctxt->space) = 0; 8002 else if (xmlStrEqual(val, BAD_CAST "preserve")) 8003 *(ctxt->space) = 1; 8004 else { 8005 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 8006"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 8007 val, NULL); 8008 } 8009 } 8010 8011 *value = val; 8012 return(name); 8013} 8014 8015/** 8016 * xmlParseStartTag: 8017 * @ctxt: an XML parser context 8018 * 8019 * parse a start of tag either for rule element or 8020 * EmptyElement. In both case we don't parse the tag closing chars. 8021 * 8022 * [40] STag ::= '<' Name (S Attribute)* S? '>' 8023 * 8024 * [ WFC: Unique Att Spec ] 8025 * No attribute name may appear more than once in the same start-tag or 8026 * empty-element tag. 8027 * 8028 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 8029 * 8030 * [ WFC: Unique Att Spec ] 8031 * No attribute name may appear more than once in the same start-tag or 8032 * empty-element tag. 8033 * 8034 * With namespace: 8035 * 8036 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 8037 * 8038 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 8039 * 8040 * Returns the element name parsed 8041 */ 8042 8043const xmlChar * 8044xmlParseStartTag(xmlParserCtxtPtr ctxt) { 8045 const xmlChar *name; 8046 const xmlChar *attname; 8047 xmlChar *attvalue; 8048 const xmlChar **atts = ctxt->atts; 8049 int nbatts = 0; 8050 int maxatts = ctxt->maxatts; 8051 int i; 8052 8053 if (RAW != '<') return(NULL); 8054 NEXT1; 8055 8056 name = xmlParseName(ctxt); 8057 if (name == NULL) { 8058 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8059 "xmlParseStartTag: invalid element name\n"); 8060 return(NULL); 8061 } 8062 8063 /* 8064 * Now parse the attributes, it ends up with the ending 8065 * 8066 * (S Attribute)* S? 8067 */ 8068 SKIP_BLANKS; 8069 GROW; 8070 8071 while ((RAW != '>') && 8072 ((RAW != '/') || (NXT(1) != '>')) && 8073 (IS_BYTE_CHAR(RAW))) { 8074 const xmlChar *q = CUR_PTR; 8075 unsigned int cons = ctxt->input->consumed; 8076 8077 attname = xmlParseAttribute(ctxt, &attvalue); 8078 if ((attname != NULL) && (attvalue != NULL)) { 8079 /* 8080 * [ WFC: Unique Att Spec ] 8081 * No attribute name may appear more than once in the same 8082 * start-tag or empty-element tag. 8083 */ 8084 for (i = 0; i < nbatts;i += 2) { 8085 if (xmlStrEqual(atts[i], attname)) { 8086 xmlErrAttributeDup(ctxt, NULL, attname); 8087 xmlFree(attvalue); 8088 goto failed; 8089 } 8090 } 8091 /* 8092 * Add the pair to atts 8093 */ 8094 if (atts == NULL) { 8095 maxatts = 22; /* allow for 10 attrs by default */ 8096 atts = (const xmlChar **) 8097 xmlMalloc(maxatts * sizeof(xmlChar *)); 8098 if (atts == NULL) { 8099 xmlErrMemory(ctxt, NULL); 8100 if (attvalue != NULL) 8101 xmlFree(attvalue); 8102 goto failed; 8103 } 8104 ctxt->atts = atts; 8105 ctxt->maxatts = maxatts; 8106 } else if (nbatts + 4 > maxatts) { 8107 const xmlChar **n; 8108 8109 maxatts *= 2; 8110 n = (const xmlChar **) xmlRealloc((void *) atts, 8111 maxatts * sizeof(const xmlChar *)); 8112 if (n == NULL) { 8113 xmlErrMemory(ctxt, NULL); 8114 if (attvalue != NULL) 8115 xmlFree(attvalue); 8116 goto failed; 8117 } 8118 atts = n; 8119 ctxt->atts = atts; 8120 ctxt->maxatts = maxatts; 8121 } 8122 atts[nbatts++] = attname; 8123 atts[nbatts++] = attvalue; 8124 atts[nbatts] = NULL; 8125 atts[nbatts + 1] = NULL; 8126 } else { 8127 if (attvalue != NULL) 8128 xmlFree(attvalue); 8129 } 8130 8131failed: 8132 8133 GROW 8134 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 8135 break; 8136 if (!IS_BLANK_CH(RAW)) { 8137 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 8138 "attributes construct error\n"); 8139 } 8140 SKIP_BLANKS; 8141 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 8142 (attname == NULL) && (attvalue == NULL)) { 8143 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 8144 "xmlParseStartTag: problem parsing attributes\n"); 8145 break; 8146 } 8147 SHRINK; 8148 GROW; 8149 } 8150 8151 /* 8152 * SAX: Start of Element ! 8153 */ 8154 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) && 8155 (!ctxt->disableSAX)) { 8156 if (nbatts > 0) 8157 ctxt->sax->startElement(ctxt->userData, name, atts); 8158 else 8159 ctxt->sax->startElement(ctxt->userData, name, NULL); 8160 } 8161 8162 if (atts != NULL) { 8163 /* Free only the content strings */ 8164 for (i = 1;i < nbatts;i+=2) 8165 if (atts[i] != NULL) 8166 xmlFree((xmlChar *) atts[i]); 8167 } 8168 return(name); 8169} 8170 8171/** 8172 * xmlParseEndTag1: 8173 * @ctxt: an XML parser context 8174 * @line: line of the start tag 8175 * @nsNr: number of namespaces on the start tag 8176 * 8177 * parse an end of tag 8178 * 8179 * [42] ETag ::= '</' Name S? '>' 8180 * 8181 * With namespace 8182 * 8183 * [NS 9] ETag ::= '</' QName S? '>' 8184 */ 8185 8186static void 8187xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) { 8188 const xmlChar *name; 8189 8190 GROW; 8191 if ((RAW != '<') || (NXT(1) != '/')) { 8192 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED, 8193 "xmlParseEndTag: '</' not found\n"); 8194 return; 8195 } 8196 SKIP(2); 8197 8198 name = xmlParseNameAndCompare(ctxt,ctxt->name); 8199 8200 /* 8201 * We should definitely be at the ending "S? '>'" part 8202 */ 8203 GROW; 8204 SKIP_BLANKS; 8205 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 8206 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 8207 } else 8208 NEXT1; 8209 8210 /* 8211 * [ WFC: Element Type Match ] 8212 * The Name in an element's end-tag must match the element type in the 8213 * start-tag. 8214 * 8215 */ 8216 if (name != (xmlChar*)1) { 8217 if (name == NULL) name = BAD_CAST "unparseable"; 8218 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 8219 "Opening and ending tag mismatch: %s line %d and %s\n", 8220 ctxt->name, line, name); 8221 } 8222 8223 /* 8224 * SAX: End of Tag 8225 */ 8226 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 8227 (!ctxt->disableSAX)) 8228 ctxt->sax->endElement(ctxt->userData, ctxt->name); 8229 8230 namePop(ctxt); 8231 spacePop(ctxt); 8232 return; 8233} 8234 8235/** 8236 * xmlParseEndTag: 8237 * @ctxt: an XML parser context 8238 * 8239 * parse an end of tag 8240 * 8241 * [42] ETag ::= '</' Name S? '>' 8242 * 8243 * With namespace 8244 * 8245 * [NS 9] ETag ::= '</' QName S? '>' 8246 */ 8247 8248void 8249xmlParseEndTag(xmlParserCtxtPtr ctxt) { 8250 xmlParseEndTag1(ctxt, 0); 8251} 8252#endif /* LIBXML_SAX1_ENABLED */ 8253 8254/************************************************************************ 8255 * * 8256 * SAX 2 specific operations * 8257 * * 8258 ************************************************************************/ 8259 8260/* 8261 * xmlGetNamespace: 8262 * @ctxt: an XML parser context 8263 * @prefix: the prefix to lookup 8264 * 8265 * Lookup the namespace name for the @prefix (which ca be NULL) 8266 * The prefix must come from the @ctxt->dict dictionnary 8267 * 8268 * Returns the namespace name or NULL if not bound 8269 */ 8270static const xmlChar * 8271xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) { 8272 int i; 8273 8274 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns); 8275 for (i = ctxt->nsNr - 2;i >= 0;i-=2) 8276 if (ctxt->nsTab[i] == prefix) { 8277 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0)) 8278 return(NULL); 8279 return(ctxt->nsTab[i + 1]); 8280 } 8281 return(NULL); 8282} 8283 8284/** 8285 * xmlParseQName: 8286 * @ctxt: an XML parser context 8287 * @prefix: pointer to store the prefix part 8288 * 8289 * parse an XML Namespace QName 8290 * 8291 * [6] QName ::= (Prefix ':')? LocalPart 8292 * [7] Prefix ::= NCName 8293 * [8] LocalPart ::= NCName 8294 * 8295 * Returns the Name parsed or NULL 8296 */ 8297 8298static const xmlChar * 8299xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) { 8300 const xmlChar *l, *p; 8301 8302 GROW; 8303 8304 l = xmlParseNCName(ctxt); 8305 if (l == NULL) { 8306 if (CUR == ':') { 8307 l = xmlParseName(ctxt); 8308 if (l != NULL) { 8309 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8310 "Failed to parse QName '%s'\n", l, NULL, NULL); 8311 *prefix = NULL; 8312 return(l); 8313 } 8314 } 8315 return(NULL); 8316 } 8317 if (CUR == ':') { 8318 NEXT; 8319 p = l; 8320 l = xmlParseNCName(ctxt); 8321 if (l == NULL) { 8322 xmlChar *tmp; 8323 8324 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8325 "Failed to parse QName '%s:'\n", p, NULL, NULL); 8326 l = xmlParseNmtoken(ctxt); 8327 if (l == NULL) 8328 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0); 8329 else { 8330 tmp = xmlBuildQName(l, p, NULL, 0); 8331 xmlFree((char *)l); 8332 } 8333 p = xmlDictLookup(ctxt->dict, tmp, -1); 8334 if (tmp != NULL) xmlFree(tmp); 8335 *prefix = NULL; 8336 return(p); 8337 } 8338 if (CUR == ':') { 8339 xmlChar *tmp; 8340 8341 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 8342 "Failed to parse QName '%s:%s:'\n", p, l, NULL); 8343 NEXT; 8344 tmp = (xmlChar *) xmlParseName(ctxt); 8345 if (tmp != NULL) { 8346 tmp = xmlBuildQName(tmp, l, NULL, 0); 8347 l = xmlDictLookup(ctxt->dict, tmp, -1); 8348 if (tmp != NULL) xmlFree(tmp); 8349 *prefix = p; 8350 return(l); 8351 } 8352 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0); 8353 l = xmlDictLookup(ctxt->dict, tmp, -1); 8354 if (tmp != NULL) xmlFree(tmp); 8355 *prefix = p; 8356 return(l); 8357 } 8358 *prefix = p; 8359 } else 8360 *prefix = NULL; 8361 return(l); 8362} 8363 8364/** 8365 * xmlParseQNameAndCompare: 8366 * @ctxt: an XML parser context 8367 * @name: the localname 8368 * @prefix: the prefix, if any. 8369 * 8370 * parse an XML name and compares for match 8371 * (specialized for endtag parsing) 8372 * 8373 * Returns NULL for an illegal name, (xmlChar*) 1 for success 8374 * and the name for mismatch 8375 */ 8376 8377static const xmlChar * 8378xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name, 8379 xmlChar const *prefix) { 8380 const xmlChar *cmp; 8381 const xmlChar *in; 8382 const xmlChar *ret; 8383 const xmlChar *prefix2; 8384 8385 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name)); 8386 8387 GROW; 8388 in = ctxt->input->cur; 8389 8390 cmp = prefix; 8391 while (*in != 0 && *in == *cmp) { 8392 ++in; 8393 ++cmp; 8394 } 8395 if ((*cmp == 0) && (*in == ':')) { 8396 in++; 8397 cmp = name; 8398 while (*in != 0 && *in == *cmp) { 8399 ++in; 8400 ++cmp; 8401 } 8402 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 8403 /* success */ 8404 ctxt->input->cur = in; 8405 return((const xmlChar*) 1); 8406 } 8407 } 8408 /* 8409 * all strings coms from the dictionary, equality can be done directly 8410 */ 8411 ret = xmlParseQName (ctxt, &prefix2); 8412 if ((ret == name) && (prefix == prefix2)) 8413 return((const xmlChar*) 1); 8414 return ret; 8415} 8416 8417/** 8418 * xmlParseAttValueInternal: 8419 * @ctxt: an XML parser context 8420 * @len: attribute len result 8421 * @alloc: whether the attribute was reallocated as a new string 8422 * @normalize: if 1 then further non-CDATA normalization must be done 8423 * 8424 * parse a value for an attribute. 8425 * NOTE: if no normalization is needed, the routine will return pointers 8426 * directly from the data buffer. 8427 * 8428 * 3.3.3 Attribute-Value Normalization: 8429 * Before the value of an attribute is passed to the application or 8430 * checked for validity, the XML processor must normalize it as follows: 8431 * - a character reference is processed by appending the referenced 8432 * character to the attribute value 8433 * - an entity reference is processed by recursively processing the 8434 * replacement text of the entity 8435 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 8436 * appending #x20 to the normalized value, except that only a single 8437 * #x20 is appended for a "#xD#xA" sequence that is part of an external 8438 * parsed entity or the literal entity value of an internal parsed entity 8439 * - other characters are processed by appending them to the normalized value 8440 * If the declared value is not CDATA, then the XML processor must further 8441 * process the normalized attribute value by discarding any leading and 8442 * trailing space (#x20) characters, and by replacing sequences of space 8443 * (#x20) characters by a single space (#x20) character. 8444 * All attributes for which no declaration has been read should be treated 8445 * by a non-validating parser as if declared CDATA. 8446 * 8447 * Returns the AttValue parsed or NULL. The value has to be freed by the 8448 * caller if it was copied, this can be detected by val[*len] == 0. 8449 */ 8450 8451static xmlChar * 8452xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, 8453 int normalize) 8454{ 8455 xmlChar limit = 0; 8456 const xmlChar *in = NULL, *start, *end, *last; 8457 xmlChar *ret = NULL; 8458 8459 GROW; 8460 in = (xmlChar *) CUR_PTR; 8461 if (*in != '"' && *in != '\'') { 8462 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 8463 return (NULL); 8464 } 8465 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 8466 8467 /* 8468 * try to handle in this routine the most common case where no 8469 * allocation of a new string is required and where content is 8470 * pure ASCII. 8471 */ 8472 limit = *in++; 8473 end = ctxt->input->end; 8474 start = in; 8475 if (in >= end) { 8476 const xmlChar *oldbase = ctxt->input->base; 8477 GROW; 8478 if (oldbase != ctxt->input->base) { 8479 long delta = ctxt->input->base - oldbase; 8480 start = start + delta; 8481 in = in + delta; 8482 } 8483 end = ctxt->input->end; 8484 } 8485 if (normalize) { 8486 /* 8487 * Skip any leading spaces 8488 */ 8489 while ((in < end) && (*in != limit) && 8490 ((*in == 0x20) || (*in == 0x9) || 8491 (*in == 0xA) || (*in == 0xD))) { 8492 in++; 8493 start = in; 8494 if (in >= end) { 8495 const xmlChar *oldbase = ctxt->input->base; 8496 GROW; 8497 if (oldbase != ctxt->input->base) { 8498 long delta = ctxt->input->base - oldbase; 8499 start = start + delta; 8500 in = in + delta; 8501 } 8502 end = ctxt->input->end; 8503 } 8504 } 8505 while ((in < end) && (*in != limit) && (*in >= 0x20) && 8506 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 8507 if ((*in++ == 0x20) && (*in == 0x20)) break; 8508 if (in >= end) { 8509 const xmlChar *oldbase = ctxt->input->base; 8510 GROW; 8511 if (oldbase != ctxt->input->base) { 8512 long delta = ctxt->input->base - oldbase; 8513 start = start + delta; 8514 in = in + delta; 8515 } 8516 end = ctxt->input->end; 8517 } 8518 } 8519 last = in; 8520 /* 8521 * skip the trailing blanks 8522 */ 8523 while ((last[-1] == 0x20) && (last > start)) last--; 8524 while ((in < end) && (*in != limit) && 8525 ((*in == 0x20) || (*in == 0x9) || 8526 (*in == 0xA) || (*in == 0xD))) { 8527 in++; 8528 if (in >= end) { 8529 const xmlChar *oldbase = ctxt->input->base; 8530 GROW; 8531 if (oldbase != ctxt->input->base) { 8532 long delta = ctxt->input->base - oldbase; 8533 start = start + delta; 8534 in = in + delta; 8535 last = last + delta; 8536 } 8537 end = ctxt->input->end; 8538 } 8539 } 8540 if (*in != limit) goto need_complex; 8541 } else { 8542 while ((in < end) && (*in != limit) && (*in >= 0x20) && 8543 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 8544 in++; 8545 if (in >= end) { 8546 const xmlChar *oldbase = ctxt->input->base; 8547 GROW; 8548 if (oldbase != ctxt->input->base) { 8549 long delta = ctxt->input->base - oldbase; 8550 start = start + delta; 8551 in = in + delta; 8552 } 8553 end = ctxt->input->end; 8554 } 8555 } 8556 last = in; 8557 if (*in != limit) goto need_complex; 8558 } 8559 in++; 8560 if (len != NULL) { 8561 *len = last - start; 8562 ret = (xmlChar *) start; 8563 } else { 8564 if (alloc) *alloc = 1; 8565 ret = xmlStrndup(start, last - start); 8566 } 8567 CUR_PTR = in; 8568 if (alloc) *alloc = 0; 8569 return ret; 8570need_complex: 8571 if (alloc) *alloc = 1; 8572 return xmlParseAttValueComplex(ctxt, len, normalize); 8573} 8574 8575/** 8576 * xmlParseAttribute2: 8577 * @ctxt: an XML parser context 8578 * @pref: the element prefix 8579 * @elem: the element name 8580 * @prefix: a xmlChar ** used to store the value of the attribute prefix 8581 * @value: a xmlChar ** used to store the value of the attribute 8582 * @len: an int * to save the length of the attribute 8583 * @alloc: an int * to indicate if the attribute was allocated 8584 * 8585 * parse an attribute in the new SAX2 framework. 8586 * 8587 * Returns the attribute name, and the value in *value, . 8588 */ 8589 8590static const xmlChar * 8591xmlParseAttribute2(xmlParserCtxtPtr ctxt, 8592 const xmlChar * pref, const xmlChar * elem, 8593 const xmlChar ** prefix, xmlChar ** value, 8594 int *len, int *alloc) 8595{ 8596 const xmlChar *name; 8597 xmlChar *val, *internal_val = NULL; 8598 int normalize = 0; 8599 8600 *value = NULL; 8601 GROW; 8602 name = xmlParseQName(ctxt, prefix); 8603 if (name == NULL) { 8604 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8605 "error parsing attribute name\n"); 8606 return (NULL); 8607 } 8608 8609 /* 8610 * get the type if needed 8611 */ 8612 if (ctxt->attsSpecial != NULL) { 8613 int type; 8614 8615 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial, 8616 pref, elem, *prefix, name); 8617 if (type != 0) 8618 normalize = 1; 8619 } 8620 8621 /* 8622 * read the value 8623 */ 8624 SKIP_BLANKS; 8625 if (RAW == '=') { 8626 NEXT; 8627 SKIP_BLANKS; 8628 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize); 8629 if (normalize) { 8630 /* 8631 * Sometimes a second normalisation pass for spaces is needed 8632 * but that only happens if charrefs or entities refernces 8633 * have been used in the attribute value, i.e. the attribute 8634 * value have been extracted in an allocated string already. 8635 */ 8636 if (*alloc) { 8637 const xmlChar *val2; 8638 8639 val2 = xmlAttrNormalizeSpace2(ctxt, val, len); 8640 if ((val2 != NULL) && (val2 != val)) { 8641 xmlFree(val); 8642 val = (xmlChar *) val2; 8643 } 8644 } 8645 } 8646 ctxt->instate = XML_PARSER_CONTENT; 8647 } else { 8648 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 8649 "Specification mandate value for attribute %s\n", 8650 name); 8651 return (NULL); 8652 } 8653 8654 if (*prefix == ctxt->str_xml) { 8655 /* 8656 * Check that xml:lang conforms to the specification 8657 * No more registered as an error, just generate a warning now 8658 * since this was deprecated in XML second edition 8659 */ 8660 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) { 8661 internal_val = xmlStrndup(val, *len); 8662 if (!xmlCheckLanguageID(internal_val)) { 8663 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 8664 "Malformed value for xml:lang : %s\n", 8665 internal_val, NULL); 8666 } 8667 } 8668 8669 /* 8670 * Check that xml:space conforms to the specification 8671 */ 8672 if (xmlStrEqual(name, BAD_CAST "space")) { 8673 internal_val = xmlStrndup(val, *len); 8674 if (xmlStrEqual(internal_val, BAD_CAST "default")) 8675 *(ctxt->space) = 0; 8676 else if (xmlStrEqual(internal_val, BAD_CAST "preserve")) 8677 *(ctxt->space) = 1; 8678 else { 8679 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 8680 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 8681 internal_val, NULL); 8682 } 8683 } 8684 if (internal_val) { 8685 xmlFree(internal_val); 8686 } 8687 } 8688 8689 *value = val; 8690 return (name); 8691} 8692/** 8693 * xmlParseStartTag2: 8694 * @ctxt: an XML parser context 8695 * 8696 * parse a start of tag either for rule element or 8697 * EmptyElement. In both case we don't parse the tag closing chars. 8698 * This routine is called when running SAX2 parsing 8699 * 8700 * [40] STag ::= '<' Name (S Attribute)* S? '>' 8701 * 8702 * [ WFC: Unique Att Spec ] 8703 * No attribute name may appear more than once in the same start-tag or 8704 * empty-element tag. 8705 * 8706 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 8707 * 8708 * [ WFC: Unique Att Spec ] 8709 * No attribute name may appear more than once in the same start-tag or 8710 * empty-element tag. 8711 * 8712 * With namespace: 8713 * 8714 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 8715 * 8716 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 8717 * 8718 * Returns the element name parsed 8719 */ 8720 8721static const xmlChar * 8722xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref, 8723 const xmlChar **URI, int *tlen) { 8724 const xmlChar *localname; 8725 const xmlChar *prefix; 8726 const xmlChar *attname; 8727 const xmlChar *aprefix; 8728 const xmlChar *nsname; 8729 xmlChar *attvalue; 8730 const xmlChar **atts = ctxt->atts; 8731 int maxatts = ctxt->maxatts; 8732 int nratts, nbatts, nbdef; 8733 int i, j, nbNs, attval, oldline, oldcol; 8734 const xmlChar *base; 8735 unsigned long cur; 8736 int nsNr = ctxt->nsNr; 8737 8738 if (RAW != '<') return(NULL); 8739 NEXT1; 8740 8741 /* 8742 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that 8743 * point since the attribute values may be stored as pointers to 8744 * the buffer and calling SHRINK would destroy them ! 8745 * The Shrinking is only possible once the full set of attribute 8746 * callbacks have been done. 8747 */ 8748reparse: 8749 SHRINK; 8750 base = ctxt->input->base; 8751 cur = ctxt->input->cur - ctxt->input->base; 8752 oldline = ctxt->input->line; 8753 oldcol = ctxt->input->col; 8754 nbatts = 0; 8755 nratts = 0; 8756 nbdef = 0; 8757 nbNs = 0; 8758 attval = 0; 8759 /* Forget any namespaces added during an earlier parse of this element. */ 8760 ctxt->nsNr = nsNr; 8761 8762 localname = xmlParseQName(ctxt, &prefix); 8763 if (localname == NULL) { 8764 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 8765 "StartTag: invalid element name\n"); 8766 return(NULL); 8767 } 8768 *tlen = ctxt->input->cur - ctxt->input->base - cur; 8769 8770 /* 8771 * Now parse the attributes, it ends up with the ending 8772 * 8773 * (S Attribute)* S? 8774 */ 8775 SKIP_BLANKS; 8776 GROW; 8777 if (ctxt->input->base != base) goto base_changed; 8778 8779 while ((RAW != '>') && 8780 ((RAW != '/') || (NXT(1) != '>')) && 8781 (IS_BYTE_CHAR(RAW))) { 8782 const xmlChar *q = CUR_PTR; 8783 unsigned int cons = ctxt->input->consumed; 8784 int len = -1, alloc = 0; 8785 8786 attname = xmlParseAttribute2(ctxt, prefix, localname, 8787 &aprefix, &attvalue, &len, &alloc); 8788 if (ctxt->input->base != base) { 8789 if ((attvalue != NULL) && (alloc != 0)) 8790 xmlFree(attvalue); 8791 attvalue = NULL; 8792 goto base_changed; 8793 } 8794 if ((attname != NULL) && (attvalue != NULL)) { 8795 if (len < 0) len = xmlStrlen(attvalue); 8796 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 8797 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 8798 xmlURIPtr uri; 8799 8800 if (*URL != 0) { 8801 uri = xmlParseURI((const char *) URL); 8802 if (uri == NULL) { 8803 xmlNsErr(ctxt, XML_WAR_NS_URI, 8804 "xmlns: '%s' is not a valid URI\n", 8805 URL, NULL, NULL); 8806 } else { 8807 if (uri->scheme == NULL) { 8808 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 8809 "xmlns: URI %s is not absolute\n", 8810 URL, NULL, NULL); 8811 } 8812 xmlFreeURI(uri); 8813 } 8814 if (URL == ctxt->str_xml_ns) { 8815 if (attname != ctxt->str_xml) { 8816 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 8817 "xml namespace URI cannot be the default namespace\n", 8818 NULL, NULL, NULL); 8819 } 8820 goto skip_default_ns; 8821 } 8822 if ((len == 29) && 8823 (xmlStrEqual(URL, 8824 BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 8825 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 8826 "reuse of the xmlns namespace name is forbidden\n", 8827 NULL, NULL, NULL); 8828 goto skip_default_ns; 8829 } 8830 } 8831 /* 8832 * check that it's not a defined namespace 8833 */ 8834 for (j = 1;j <= nbNs;j++) 8835 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 8836 break; 8837 if (j <= nbNs) 8838 xmlErrAttributeDup(ctxt, NULL, attname); 8839 else 8840 if (nsPush(ctxt, NULL, URL) > 0) nbNs++; 8841skip_default_ns: 8842 if (alloc != 0) xmlFree(attvalue); 8843 SKIP_BLANKS; 8844 continue; 8845 } 8846 if (aprefix == ctxt->str_xmlns) { 8847 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 8848 xmlURIPtr uri; 8849 8850 if (attname == ctxt->str_xml) { 8851 if (URL != ctxt->str_xml_ns) { 8852 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 8853 "xml namespace prefix mapped to wrong URI\n", 8854 NULL, NULL, NULL); 8855 } 8856 /* 8857 * Do not keep a namespace definition node 8858 */ 8859 goto skip_ns; 8860 } 8861 if (URL == ctxt->str_xml_ns) { 8862 if (attname != ctxt->str_xml) { 8863 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 8864 "xml namespace URI mapped to wrong prefix\n", 8865 NULL, NULL, NULL); 8866 } 8867 goto skip_ns; 8868 } 8869 if (attname == ctxt->str_xmlns) { 8870 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 8871 "redefinition of the xmlns prefix is forbidden\n", 8872 NULL, NULL, NULL); 8873 goto skip_ns; 8874 } 8875 if ((len == 29) && 8876 (xmlStrEqual(URL, 8877 BAD_CAST "http://www.w3.org/2000/xmlns/"))) { 8878 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 8879 "reuse of the xmlns namespace name is forbidden\n", 8880 NULL, NULL, NULL); 8881 goto skip_ns; 8882 } 8883 if ((URL == NULL) || (URL[0] == 0)) { 8884 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 8885 "xmlns:%s: Empty XML namespace is not allowed\n", 8886 attname, NULL, NULL); 8887 goto skip_ns; 8888 } else { 8889 uri = xmlParseURI((const char *) URL); 8890 if (uri == NULL) { 8891 xmlNsErr(ctxt, XML_WAR_NS_URI, 8892 "xmlns:%s: '%s' is not a valid URI\n", 8893 attname, URL, NULL); 8894 } else { 8895 if ((ctxt->pedantic) && (uri->scheme == NULL)) { 8896 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE, 8897 "xmlns:%s: URI %s is not absolute\n", 8898 attname, URL, NULL); 8899 } 8900 xmlFreeURI(uri); 8901 } 8902 } 8903 8904 /* 8905 * check that it's not a defined namespace 8906 */ 8907 for (j = 1;j <= nbNs;j++) 8908 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 8909 break; 8910 if (j <= nbNs) 8911 xmlErrAttributeDup(ctxt, aprefix, attname); 8912 else 8913 if (nsPush(ctxt, attname, URL) > 0) nbNs++; 8914skip_ns: 8915 if (alloc != 0) xmlFree(attvalue); 8916 SKIP_BLANKS; 8917 if (ctxt->input->base != base) goto base_changed; 8918 continue; 8919 } 8920 8921 /* 8922 * Add the pair to atts 8923 */ 8924 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 8925 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 8926 if (attvalue[len] == 0) 8927 xmlFree(attvalue); 8928 goto failed; 8929 } 8930 maxatts = ctxt->maxatts; 8931 atts = ctxt->atts; 8932 } 8933 ctxt->attallocs[nratts++] = alloc; 8934 atts[nbatts++] = attname; 8935 atts[nbatts++] = aprefix; 8936 atts[nbatts++] = NULL; /* the URI will be fetched later */ 8937 atts[nbatts++] = attvalue; 8938 attvalue += len; 8939 atts[nbatts++] = attvalue; 8940 /* 8941 * tag if some deallocation is needed 8942 */ 8943 if (alloc != 0) attval = 1; 8944 } else { 8945 if ((attvalue != NULL) && (attvalue[len] == 0)) 8946 xmlFree(attvalue); 8947 } 8948 8949failed: 8950 8951 GROW 8952 if (ctxt->input->base != base) goto base_changed; 8953 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 8954 break; 8955 if (!IS_BLANK_CH(RAW)) { 8956 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 8957 "attributes construct error\n"); 8958 break; 8959 } 8960 SKIP_BLANKS; 8961 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 8962 (attname == NULL) && (attvalue == NULL)) { 8963 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8964 "xmlParseStartTag: problem parsing attributes\n"); 8965 break; 8966 } 8967 GROW; 8968 if (ctxt->input->base != base) goto base_changed; 8969 } 8970 8971 /* 8972 * The attributes defaulting 8973 */ 8974 if (ctxt->attsDefault != NULL) { 8975 xmlDefAttrsPtr defaults; 8976 8977 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix); 8978 if (defaults != NULL) { 8979 for (i = 0;i < defaults->nbAttrs;i++) { 8980 attname = defaults->values[5 * i]; 8981 aprefix = defaults->values[5 * i + 1]; 8982 8983 /* 8984 * special work for namespaces defaulted defs 8985 */ 8986 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 8987 /* 8988 * check that it's not a defined namespace 8989 */ 8990 for (j = 1;j <= nbNs;j++) 8991 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 8992 break; 8993 if (j <= nbNs) continue; 8994 8995 nsname = xmlGetNamespace(ctxt, NULL); 8996 if (nsname != defaults->values[5 * i + 2]) { 8997 if (nsPush(ctxt, NULL, 8998 defaults->values[5 * i + 2]) > 0) 8999 nbNs++; 9000 } 9001 } else if (aprefix == ctxt->str_xmlns) { 9002 /* 9003 * check that it's not a defined namespace 9004 */ 9005 for (j = 1;j <= nbNs;j++) 9006 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 9007 break; 9008 if (j <= nbNs) continue; 9009 9010 nsname = xmlGetNamespace(ctxt, attname); 9011 if (nsname != defaults->values[2]) { 9012 if (nsPush(ctxt, attname, 9013 defaults->values[5 * i + 2]) > 0) 9014 nbNs++; 9015 } 9016 } else { 9017 /* 9018 * check that it's not a defined attribute 9019 */ 9020 for (j = 0;j < nbatts;j+=5) { 9021 if ((attname == atts[j]) && (aprefix == atts[j+1])) 9022 break; 9023 } 9024 if (j < nbatts) continue; 9025 9026 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 9027 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 9028 return(NULL); 9029 } 9030 maxatts = ctxt->maxatts; 9031 atts = ctxt->atts; 9032 } 9033 atts[nbatts++] = attname; 9034 atts[nbatts++] = aprefix; 9035 if (aprefix == NULL) 9036 atts[nbatts++] = NULL; 9037 else 9038 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix); 9039 atts[nbatts++] = defaults->values[5 * i + 2]; 9040 atts[nbatts++] = defaults->values[5 * i + 3]; 9041 if ((ctxt->standalone == 1) && 9042 (defaults->values[5 * i + 4] != NULL)) { 9043 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED, 9044 "standalone: attribute %s on %s defaulted from external subset\n", 9045 attname, localname); 9046 } 9047 nbdef++; 9048 } 9049 } 9050 } 9051 } 9052 9053 /* 9054 * The attributes checkings 9055 */ 9056 for (i = 0; i < nbatts;i += 5) { 9057 /* 9058 * The default namespace does not apply to attribute names. 9059 */ 9060 if (atts[i + 1] != NULL) { 9061 nsname = xmlGetNamespace(ctxt, atts[i + 1]); 9062 if (nsname == NULL) { 9063 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9064 "Namespace prefix %s for %s on %s is not defined\n", 9065 atts[i + 1], atts[i], localname); 9066 } 9067 atts[i + 2] = nsname; 9068 } else 9069 nsname = NULL; 9070 /* 9071 * [ WFC: Unique Att Spec ] 9072 * No attribute name may appear more than once in the same 9073 * start-tag or empty-element tag. 9074 * As extended by the Namespace in XML REC. 9075 */ 9076 for (j = 0; j < i;j += 5) { 9077 if (atts[i] == atts[j]) { 9078 if (atts[i+1] == atts[j+1]) { 9079 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]); 9080 break; 9081 } 9082 if ((nsname != NULL) && (atts[j + 2] == nsname)) { 9083 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED, 9084 "Namespaced Attribute %s in '%s' redefined\n", 9085 atts[i], nsname, NULL); 9086 break; 9087 } 9088 } 9089 } 9090 } 9091 9092 nsname = xmlGetNamespace(ctxt, prefix); 9093 if ((prefix != NULL) && (nsname == NULL)) { 9094 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 9095 "Namespace prefix %s on %s is not defined\n", 9096 prefix, localname, NULL); 9097 } 9098 *pref = prefix; 9099 *URI = nsname; 9100 9101 /* 9102 * SAX: Start of Element ! 9103 */ 9104 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) && 9105 (!ctxt->disableSAX)) { 9106 if (nbNs > 0) 9107 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 9108 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs], 9109 nbatts / 5, nbdef, atts); 9110 else 9111 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 9112 nsname, 0, NULL, nbatts / 5, nbdef, atts); 9113 } 9114 9115 /* 9116 * Free up attribute allocated strings if needed 9117 */ 9118 if (attval != 0) { 9119 for (i = 3,j = 0; j < nratts;i += 5,j++) 9120 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 9121 xmlFree((xmlChar *) atts[i]); 9122 } 9123 9124 return(localname); 9125 9126base_changed: 9127 /* 9128 * the attribute strings are valid iif the base didn't changed 9129 */ 9130 if (attval != 0) { 9131 for (i = 3,j = 0; j < nratts;i += 5,j++) 9132 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 9133 xmlFree((xmlChar *) atts[i]); 9134 } 9135 ctxt->input->cur = ctxt->input->base + cur; 9136 ctxt->input->line = oldline; 9137 ctxt->input->col = oldcol; 9138 if (ctxt->wellFormed == 1) { 9139 goto reparse; 9140 } 9141 return(NULL); 9142} 9143 9144/** 9145 * xmlParseEndTag2: 9146 * @ctxt: an XML parser context 9147 * @line: line of the start tag 9148 * @nsNr: number of namespaces on the start tag 9149 * 9150 * parse an end of tag 9151 * 9152 * [42] ETag ::= '</' Name S? '>' 9153 * 9154 * With namespace 9155 * 9156 * [NS 9] ETag ::= '</' QName S? '>' 9157 */ 9158 9159static void 9160xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix, 9161 const xmlChar *URI, int line, int nsNr, int tlen) { 9162 const xmlChar *name; 9163 9164 GROW; 9165 if ((RAW != '<') || (NXT(1) != '/')) { 9166 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL); 9167 return; 9168 } 9169 SKIP(2); 9170 9171 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) { 9172 if (ctxt->input->cur[tlen] == '>') { 9173 ctxt->input->cur += tlen + 1; 9174 goto done; 9175 } 9176 ctxt->input->cur += tlen; 9177 name = (xmlChar*)1; 9178 } else { 9179 if (prefix == NULL) 9180 name = xmlParseNameAndCompare(ctxt, ctxt->name); 9181 else 9182 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix); 9183 } 9184 9185 /* 9186 * We should definitely be at the ending "S? '>'" part 9187 */ 9188 GROW; 9189 SKIP_BLANKS; 9190 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 9191 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 9192 } else 9193 NEXT1; 9194 9195 /* 9196 * [ WFC: Element Type Match ] 9197 * The Name in an element's end-tag must match the element type in the 9198 * start-tag. 9199 * 9200 */ 9201 if (name != (xmlChar*)1) { 9202 if (name == NULL) name = BAD_CAST "unparseable"; 9203 if ((line == 0) && (ctxt->node != NULL)) 9204 line = ctxt->node->line; 9205 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 9206 "Opening and ending tag mismatch: %s line %d and %s\n", 9207 ctxt->name, line, name); 9208 } 9209 9210 /* 9211 * SAX: End of Tag 9212 */ 9213done: 9214 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 9215 (!ctxt->disableSAX)) 9216 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI); 9217 9218 spacePop(ctxt); 9219 if (nsNr != 0) 9220 nsPop(ctxt, nsNr); 9221 return; 9222} 9223 9224/** 9225 * xmlParseCDSect: 9226 * @ctxt: an XML parser context 9227 * 9228 * Parse escaped pure raw content. 9229 * 9230 * [18] CDSect ::= CDStart CData CDEnd 9231 * 9232 * [19] CDStart ::= '<![CDATA[' 9233 * 9234 * [20] Data ::= (Char* - (Char* ']]>' Char*)) 9235 * 9236 * [21] CDEnd ::= ']]>' 9237 */ 9238void 9239xmlParseCDSect(xmlParserCtxtPtr ctxt) { 9240 xmlChar *buf = NULL; 9241 int len = 0; 9242 int size = XML_PARSER_BUFFER_SIZE; 9243 int r, rl; 9244 int s, sl; 9245 int cur, l; 9246 int count = 0; 9247 9248 /* Check 2.6.0 was NXT(0) not RAW */ 9249 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 9250 SKIP(9); 9251 } else 9252 return; 9253 9254 ctxt->instate = XML_PARSER_CDATA_SECTION; 9255 r = CUR_CHAR(rl); 9256 if (!IS_CHAR(r)) { 9257 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 9258 ctxt->instate = XML_PARSER_CONTENT; 9259 return; 9260 } 9261 NEXTL(rl); 9262 s = CUR_CHAR(sl); 9263 if (!IS_CHAR(s)) { 9264 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 9265 ctxt->instate = XML_PARSER_CONTENT; 9266 return; 9267 } 9268 NEXTL(sl); 9269 cur = CUR_CHAR(l); 9270 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 9271 if (buf == NULL) { 9272 xmlErrMemory(ctxt, NULL); 9273 return; 9274 } 9275 while (IS_CHAR(cur) && 9276 ((r != ']') || (s != ']') || (cur != '>'))) { 9277 if (len + 5 >= size) { 9278 xmlChar *tmp; 9279 9280 size *= 2; 9281 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 9282 if (tmp == NULL) { 9283 xmlFree(buf); 9284 xmlErrMemory(ctxt, NULL); 9285 return; 9286 } 9287 buf = tmp; 9288 } 9289 COPY_BUF(rl,buf,len,r); 9290 r = s; 9291 rl = sl; 9292 s = cur; 9293 sl = l; 9294 count++; 9295 if (count > 50) { 9296 GROW; 9297 count = 0; 9298 } 9299 NEXTL(l); 9300 cur = CUR_CHAR(l); 9301 } 9302 buf[len] = 0; 9303 ctxt->instate = XML_PARSER_CONTENT; 9304 if (cur != '>') { 9305 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, 9306 "CData section not finished\n%.50s\n", buf); 9307 xmlFree(buf); 9308 return; 9309 } 9310 NEXTL(l); 9311 9312 /* 9313 * OK the buffer is to be consumed as cdata. 9314 */ 9315 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 9316 if (ctxt->sax->cdataBlock != NULL) 9317 ctxt->sax->cdataBlock(ctxt->userData, buf, len); 9318 else if (ctxt->sax->characters != NULL) 9319 ctxt->sax->characters(ctxt->userData, buf, len); 9320 } 9321 xmlFree(buf); 9322} 9323 9324/** 9325 * xmlParseContent: 9326 * @ctxt: an XML parser context 9327 * 9328 * Parse a content: 9329 * 9330 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 9331 */ 9332 9333void 9334xmlParseContent(xmlParserCtxtPtr ctxt) { 9335 GROW; 9336 while ((RAW != 0) && 9337 ((RAW != '<') || (NXT(1) != '/')) && 9338 (ctxt->instate != XML_PARSER_EOF)) { 9339 const xmlChar *test = CUR_PTR; 9340 unsigned int cons = ctxt->input->consumed; 9341 const xmlChar *cur = ctxt->input->cur; 9342 9343 /* 9344 * First case : a Processing Instruction. 9345 */ 9346 if ((*cur == '<') && (cur[1] == '?')) { 9347 xmlParsePI(ctxt); 9348 } 9349 9350 /* 9351 * Second case : a CDSection 9352 */ 9353 /* 2.6.0 test was *cur not RAW */ 9354 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 9355 xmlParseCDSect(ctxt); 9356 } 9357 9358 /* 9359 * Third case : a comment 9360 */ 9361 else if ((*cur == '<') && (NXT(1) == '!') && 9362 (NXT(2) == '-') && (NXT(3) == '-')) { 9363 xmlParseComment(ctxt); 9364 ctxt->instate = XML_PARSER_CONTENT; 9365 } 9366 9367 /* 9368 * Fourth case : a sub-element. 9369 */ 9370 else if (*cur == '<') { 9371 xmlParseElement(ctxt); 9372 } 9373 9374 /* 9375 * Fifth case : a reference. If if has not been resolved, 9376 * parsing returns it's Name, create the node 9377 */ 9378 9379 else if (*cur == '&') { 9380 xmlParseReference(ctxt); 9381 } 9382 9383 /* 9384 * Last case, text. Note that References are handled directly. 9385 */ 9386 else { 9387 xmlParseCharData(ctxt, 0); 9388 } 9389 9390 GROW; 9391 /* 9392 * Pop-up of finished entities. 9393 */ 9394 while ((RAW == 0) && (ctxt->inputNr > 1)) 9395 xmlPopInput(ctxt); 9396 SHRINK; 9397 9398 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 9399 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 9400 "detected an error in element content\n"); 9401 ctxt->instate = XML_PARSER_EOF; 9402 break; 9403 } 9404 } 9405} 9406 9407/** 9408 * xmlParseElement: 9409 * @ctxt: an XML parser context 9410 * 9411 * parse an XML element, this is highly recursive 9412 * 9413 * [39] element ::= EmptyElemTag | STag content ETag 9414 * 9415 * [ WFC: Element Type Match ] 9416 * The Name in an element's end-tag must match the element type in the 9417 * start-tag. 9418 * 9419 */ 9420 9421void 9422xmlParseElement(xmlParserCtxtPtr ctxt) { 9423 const xmlChar *name; 9424 const xmlChar *prefix = NULL; 9425 const xmlChar *URI = NULL; 9426 xmlParserNodeInfo node_info; 9427 int line, tlen; 9428 xmlNodePtr ret; 9429 int nsNr = ctxt->nsNr; 9430 9431 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) && 9432 ((ctxt->options & XML_PARSE_HUGE) == 0)) { 9433 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 9434 "Excessive depth in document: %d use XML_PARSE_HUGE option\n", 9435 xmlParserMaxDepth); 9436 ctxt->instate = XML_PARSER_EOF; 9437 return; 9438 } 9439 9440 /* Capture start position */ 9441 if (ctxt->record_info) { 9442 node_info.begin_pos = ctxt->input->consumed + 9443 (CUR_PTR - ctxt->input->base); 9444 node_info.begin_line = ctxt->input->line; 9445 } 9446 9447 if (ctxt->spaceNr == 0) 9448 spacePush(ctxt, -1); 9449 else if (*ctxt->space == -2) 9450 spacePush(ctxt, -1); 9451 else 9452 spacePush(ctxt, *ctxt->space); 9453 9454 line = ctxt->input->line; 9455#ifdef LIBXML_SAX1_ENABLED 9456 if (ctxt->sax2) 9457#endif /* LIBXML_SAX1_ENABLED */ 9458 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 9459#ifdef LIBXML_SAX1_ENABLED 9460 else 9461 name = xmlParseStartTag(ctxt); 9462#endif /* LIBXML_SAX1_ENABLED */ 9463 if (name == NULL) { 9464 spacePop(ctxt); 9465 return; 9466 } 9467 namePush(ctxt, name); 9468 ret = ctxt->node; 9469 9470#ifdef LIBXML_VALID_ENABLED 9471 /* 9472 * [ VC: Root Element Type ] 9473 * The Name in the document type declaration must match the element 9474 * type of the root element. 9475 */ 9476 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 9477 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 9478 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 9479#endif /* LIBXML_VALID_ENABLED */ 9480 9481 /* 9482 * Check for an Empty Element. 9483 */ 9484 if ((RAW == '/') && (NXT(1) == '>')) { 9485 SKIP(2); 9486 if (ctxt->sax2) { 9487 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 9488 (!ctxt->disableSAX)) 9489 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI); 9490#ifdef LIBXML_SAX1_ENABLED 9491 } else { 9492 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 9493 (!ctxt->disableSAX)) 9494 ctxt->sax->endElement(ctxt->userData, name); 9495#endif /* LIBXML_SAX1_ENABLED */ 9496 } 9497 namePop(ctxt); 9498 spacePop(ctxt); 9499 if (nsNr != ctxt->nsNr) 9500 nsPop(ctxt, ctxt->nsNr - nsNr); 9501 if ( ret != NULL && ctxt->record_info ) { 9502 node_info.end_pos = ctxt->input->consumed + 9503 (CUR_PTR - ctxt->input->base); 9504 node_info.end_line = ctxt->input->line; 9505 node_info.node = ret; 9506 xmlParserAddNodeInfo(ctxt, &node_info); 9507 } 9508 return; 9509 } 9510 if (RAW == '>') { 9511 NEXT1; 9512 } else { 9513 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED, 9514 "Couldn't find end of Start Tag %s line %d\n", 9515 name, line, NULL); 9516 9517 /* 9518 * end of parsing of this node. 9519 */ 9520 nodePop(ctxt); 9521 namePop(ctxt); 9522 spacePop(ctxt); 9523 if (nsNr != ctxt->nsNr) 9524 nsPop(ctxt, ctxt->nsNr - nsNr); 9525 9526 /* 9527 * Capture end position and add node 9528 */ 9529 if ( ret != NULL && ctxt->record_info ) { 9530 node_info.end_pos = ctxt->input->consumed + 9531 (CUR_PTR - ctxt->input->base); 9532 node_info.end_line = ctxt->input->line; 9533 node_info.node = ret; 9534 xmlParserAddNodeInfo(ctxt, &node_info); 9535 } 9536 return; 9537 } 9538 9539 /* 9540 * Parse the content of the element: 9541 */ 9542 xmlParseContent(ctxt); 9543 if (!IS_BYTE_CHAR(RAW)) { 9544 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED, 9545 "Premature end of data in tag %s line %d\n", 9546 name, line, NULL); 9547 9548 /* 9549 * end of parsing of this node. 9550 */ 9551 nodePop(ctxt); 9552 namePop(ctxt); 9553 spacePop(ctxt); 9554 if (nsNr != ctxt->nsNr) 9555 nsPop(ctxt, ctxt->nsNr - nsNr); 9556 return; 9557 } 9558 9559 /* 9560 * parse the end of tag: '</' should be here. 9561 */ 9562 if (ctxt->sax2) { 9563 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen); 9564 namePop(ctxt); 9565 } 9566#ifdef LIBXML_SAX1_ENABLED 9567 else 9568 xmlParseEndTag1(ctxt, line); 9569#endif /* LIBXML_SAX1_ENABLED */ 9570 9571 /* 9572 * Capture end position and add node 9573 */ 9574 if ( ret != NULL && ctxt->record_info ) { 9575 node_info.end_pos = ctxt->input->consumed + 9576 (CUR_PTR - ctxt->input->base); 9577 node_info.end_line = ctxt->input->line; 9578 node_info.node = ret; 9579 xmlParserAddNodeInfo(ctxt, &node_info); 9580 } 9581} 9582 9583/** 9584 * xmlParseVersionNum: 9585 * @ctxt: an XML parser context 9586 * 9587 * parse the XML version value. 9588 * 9589 * [26] VersionNum ::= '1.' [0-9]+ 9590 * 9591 * In practice allow [0-9].[0-9]+ at that level 9592 * 9593 * Returns the string giving the XML version number, or NULL 9594 */ 9595xmlChar * 9596xmlParseVersionNum(xmlParserCtxtPtr ctxt) { 9597 xmlChar *buf = NULL; 9598 int len = 0; 9599 int size = 10; 9600 xmlChar cur; 9601 9602 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 9603 if (buf == NULL) { 9604 xmlErrMemory(ctxt, NULL); 9605 return(NULL); 9606 } 9607 cur = CUR; 9608 if (!((cur >= '0') && (cur <= '9'))) { 9609 xmlFree(buf); 9610 return(NULL); 9611 } 9612 buf[len++] = cur; 9613 NEXT; 9614 cur=CUR; 9615 if (cur != '.') { 9616 xmlFree(buf); 9617 return(NULL); 9618 } 9619 buf[len++] = cur; 9620 NEXT; 9621 cur=CUR; 9622 while ((cur >= '0') && (cur <= '9')) { 9623 if (len + 1 >= size) { 9624 xmlChar *tmp; 9625 9626 size *= 2; 9627 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 9628 if (tmp == NULL) { 9629 xmlFree(buf); 9630 xmlErrMemory(ctxt, NULL); 9631 return(NULL); 9632 } 9633 buf = tmp; 9634 } 9635 buf[len++] = cur; 9636 NEXT; 9637 cur=CUR; 9638 } 9639 buf[len] = 0; 9640 return(buf); 9641} 9642 9643/** 9644 * xmlParseVersionInfo: 9645 * @ctxt: an XML parser context 9646 * 9647 * parse the XML version. 9648 * 9649 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 9650 * 9651 * [25] Eq ::= S? '=' S? 9652 * 9653 * Returns the version string, e.g. "1.0" 9654 */ 9655 9656xmlChar * 9657xmlParseVersionInfo(xmlParserCtxtPtr ctxt) { 9658 xmlChar *version = NULL; 9659 9660 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) { 9661 SKIP(7); 9662 SKIP_BLANKS; 9663 if (RAW != '=') { 9664 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 9665 return(NULL); 9666 } 9667 NEXT; 9668 SKIP_BLANKS; 9669 if (RAW == '"') { 9670 NEXT; 9671 version = xmlParseVersionNum(ctxt); 9672 if (RAW != '"') { 9673 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 9674 } else 9675 NEXT; 9676 } else if (RAW == '\''){ 9677 NEXT; 9678 version = xmlParseVersionNum(ctxt); 9679 if (RAW != '\'') { 9680 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 9681 } else 9682 NEXT; 9683 } else { 9684 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 9685 } 9686 } 9687 return(version); 9688} 9689 9690/** 9691 * xmlParseEncName: 9692 * @ctxt: an XML parser context 9693 * 9694 * parse the XML encoding name 9695 * 9696 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 9697 * 9698 * Returns the encoding name value or NULL 9699 */ 9700xmlChar * 9701xmlParseEncName(xmlParserCtxtPtr ctxt) { 9702 xmlChar *buf = NULL; 9703 int len = 0; 9704 int size = 10; 9705 xmlChar cur; 9706 9707 cur = CUR; 9708 if (((cur >= 'a') && (cur <= 'z')) || 9709 ((cur >= 'A') && (cur <= 'Z'))) { 9710 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 9711 if (buf == NULL) { 9712 xmlErrMemory(ctxt, NULL); 9713 return(NULL); 9714 } 9715 9716 buf[len++] = cur; 9717 NEXT; 9718 cur = CUR; 9719 while (((cur >= 'a') && (cur <= 'z')) || 9720 ((cur >= 'A') && (cur <= 'Z')) || 9721 ((cur >= '0') && (cur <= '9')) || 9722 (cur == '.') || (cur == '_') || 9723 (cur == '-')) { 9724 if (len + 1 >= size) { 9725 xmlChar *tmp; 9726 9727 size *= 2; 9728 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 9729 if (tmp == NULL) { 9730 xmlErrMemory(ctxt, NULL); 9731 xmlFree(buf); 9732 return(NULL); 9733 } 9734 buf = tmp; 9735 } 9736 buf[len++] = cur; 9737 NEXT; 9738 cur = CUR; 9739 if (cur == 0) { 9740 SHRINK; 9741 GROW; 9742 cur = CUR; 9743 } 9744 } 9745 buf[len] = 0; 9746 } else { 9747 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL); 9748 } 9749 return(buf); 9750} 9751 9752/** 9753 * xmlParseEncodingDecl: 9754 * @ctxt: an XML parser context 9755 * 9756 * parse the XML encoding declaration 9757 * 9758 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'") 9759 * 9760 * this setups the conversion filters. 9761 * 9762 * Returns the encoding value or NULL 9763 */ 9764 9765const xmlChar * 9766xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { 9767 xmlChar *encoding = NULL; 9768 9769 SKIP_BLANKS; 9770 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) { 9771 SKIP(8); 9772 SKIP_BLANKS; 9773 if (RAW != '=') { 9774 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 9775 return(NULL); 9776 } 9777 NEXT; 9778 SKIP_BLANKS; 9779 if (RAW == '"') { 9780 NEXT; 9781 encoding = xmlParseEncName(ctxt); 9782 if (RAW != '"') { 9783 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 9784 } else 9785 NEXT; 9786 } else if (RAW == '\''){ 9787 NEXT; 9788 encoding = xmlParseEncName(ctxt); 9789 if (RAW != '\'') { 9790 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 9791 } else 9792 NEXT; 9793 } else { 9794 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 9795 } 9796 /* 9797 * UTF-16 encoding stwich has already taken place at this stage, 9798 * more over the little-endian/big-endian selection is already done 9799 */ 9800 if ((encoding != NULL) && 9801 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) || 9802 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) { 9803 /* 9804 * If no encoding was passed to the parser, that we are 9805 * using UTF-16 and no decoder is present i.e. the 9806 * document is apparently UTF-8 compatible, then raise an 9807 * encoding mismatch fatal error 9808 */ 9809 if ((ctxt->encoding == NULL) && 9810 (ctxt->input->buf != NULL) && 9811 (ctxt->input->buf->encoder == NULL)) { 9812 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING, 9813 "Document labelled UTF-16 but has UTF-8 content\n"); 9814 } 9815 if (ctxt->encoding != NULL) 9816 xmlFree((xmlChar *) ctxt->encoding); 9817 ctxt->encoding = encoding; 9818 } 9819 /* 9820 * UTF-8 encoding is handled natively 9821 */ 9822 else if ((encoding != NULL) && 9823 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) || 9824 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) { 9825 if (ctxt->encoding != NULL) 9826 xmlFree((xmlChar *) ctxt->encoding); 9827 ctxt->encoding = encoding; 9828 } 9829 else if (encoding != NULL) { 9830 xmlCharEncodingHandlerPtr handler; 9831 9832 if (ctxt->input->encoding != NULL) 9833 xmlFree((xmlChar *) ctxt->input->encoding); 9834 ctxt->input->encoding = encoding; 9835 9836 handler = xmlFindCharEncodingHandler((const char *) encoding); 9837 if (handler != NULL) { 9838 xmlSwitchToEncoding(ctxt, handler); 9839 } else { 9840 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 9841 "Unsupported encoding %s\n", encoding); 9842 return(NULL); 9843 } 9844 } 9845 } 9846 return(encoding); 9847} 9848 9849/** 9850 * xmlParseSDDecl: 9851 * @ctxt: an XML parser context 9852 * 9853 * parse the XML standalone declaration 9854 * 9855 * [32] SDDecl ::= S 'standalone' Eq 9856 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) 9857 * 9858 * [ VC: Standalone Document Declaration ] 9859 * TODO The standalone document declaration must have the value "no" 9860 * if any external markup declarations contain declarations of: 9861 * - attributes with default values, if elements to which these 9862 * attributes apply appear in the document without specifications 9863 * of values for these attributes, or 9864 * - entities (other than amp, lt, gt, apos, quot), if references 9865 * to those entities appear in the document, or 9866 * - attributes with values subject to normalization, where the 9867 * attribute appears in the document with a value which will change 9868 * as a result of normalization, or 9869 * - element types with element content, if white space occurs directly 9870 * within any instance of those types. 9871 * 9872 * Returns: 9873 * 1 if standalone="yes" 9874 * 0 if standalone="no" 9875 * -2 if standalone attribute is missing or invalid 9876 * (A standalone value of -2 means that the XML declaration was found, 9877 * but no value was specified for the standalone attribute). 9878 */ 9879 9880int 9881xmlParseSDDecl(xmlParserCtxtPtr ctxt) { 9882 int standalone = -2; 9883 9884 SKIP_BLANKS; 9885 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) { 9886 SKIP(10); 9887 SKIP_BLANKS; 9888 if (RAW != '=') { 9889 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 9890 return(standalone); 9891 } 9892 NEXT; 9893 SKIP_BLANKS; 9894 if (RAW == '\''){ 9895 NEXT; 9896 if ((RAW == 'n') && (NXT(1) == 'o')) { 9897 standalone = 0; 9898 SKIP(2); 9899 } else if ((RAW == 'y') && (NXT(1) == 'e') && 9900 (NXT(2) == 's')) { 9901 standalone = 1; 9902 SKIP(3); 9903 } else { 9904 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 9905 } 9906 if (RAW != '\'') { 9907 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 9908 } else 9909 NEXT; 9910 } else if (RAW == '"'){ 9911 NEXT; 9912 if ((RAW == 'n') && (NXT(1) == 'o')) { 9913 standalone = 0; 9914 SKIP(2); 9915 } else if ((RAW == 'y') && (NXT(1) == 'e') && 9916 (NXT(2) == 's')) { 9917 standalone = 1; 9918 SKIP(3); 9919 } else { 9920 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 9921 } 9922 if (RAW != '"') { 9923 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 9924 } else 9925 NEXT; 9926 } else { 9927 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 9928 } 9929 } 9930 return(standalone); 9931} 9932 9933/** 9934 * xmlParseXMLDecl: 9935 * @ctxt: an XML parser context 9936 * 9937 * parse an XML declaration header 9938 * 9939 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 9940 */ 9941 9942void 9943xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { 9944 xmlChar *version; 9945 9946 /* 9947 * This value for standalone indicates that the document has an 9948 * XML declaration but it does not have a standalone attribute. 9949 * It will be overwritten later if a standalone attribute is found. 9950 */ 9951 ctxt->input->standalone = -2; 9952 9953 /* 9954 * We know that '<?xml' is here. 9955 */ 9956 SKIP(5); 9957 9958 if (!IS_BLANK_CH(RAW)) { 9959 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 9960 "Blank needed after '<?xml'\n"); 9961 } 9962 SKIP_BLANKS; 9963 9964 /* 9965 * We must have the VersionInfo here. 9966 */ 9967 version = xmlParseVersionInfo(ctxt); 9968 if (version == NULL) { 9969 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL); 9970 } else { 9971 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) { 9972 /* 9973 * Changed here for XML-1.0 5th edition 9974 */ 9975 if (ctxt->options & XML_PARSE_OLD10) { 9976 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, 9977 "Unsupported version '%s'\n", 9978 version); 9979 } else { 9980 if ((version[0] == '1') && ((version[1] == '.'))) { 9981 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION, 9982 "Unsupported version '%s'\n", 9983 version, NULL); 9984 } else { 9985 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, 9986 "Unsupported version '%s'\n", 9987 version); 9988 } 9989 } 9990 } 9991 if (ctxt->version != NULL) 9992 xmlFree((void *) ctxt->version); 9993 ctxt->version = version; 9994 } 9995 9996 /* 9997 * We may have the encoding declaration 9998 */ 9999 if (!IS_BLANK_CH(RAW)) { 10000 if ((RAW == '?') && (NXT(1) == '>')) { 10001 SKIP(2); 10002 return; 10003 } 10004 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 10005 } 10006 xmlParseEncodingDecl(ctxt); 10007 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10008 /* 10009 * The XML REC instructs us to stop parsing right here 10010 */ 10011 return; 10012 } 10013 10014 /* 10015 * We may have the standalone status. 10016 */ 10017 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) { 10018 if ((RAW == '?') && (NXT(1) == '>')) { 10019 SKIP(2); 10020 return; 10021 } 10022 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 10023 } 10024 10025 /* 10026 * We can grow the input buffer freely at that point 10027 */ 10028 GROW; 10029 10030 SKIP_BLANKS; 10031 ctxt->input->standalone = xmlParseSDDecl(ctxt); 10032 10033 SKIP_BLANKS; 10034 if ((RAW == '?') && (NXT(1) == '>')) { 10035 SKIP(2); 10036 } else if (RAW == '>') { 10037 /* Deprecated old WD ... */ 10038 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 10039 NEXT; 10040 } else { 10041 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 10042 MOVETO_ENDTAG(CUR_PTR); 10043 NEXT; 10044 } 10045} 10046 10047/** 10048 * xmlParseMisc: 10049 * @ctxt: an XML parser context 10050 * 10051 * parse an XML Misc* optional field. 10052 * 10053 * [27] Misc ::= Comment | PI | S 10054 */ 10055 10056void 10057xmlParseMisc(xmlParserCtxtPtr ctxt) { 10058 while (((RAW == '<') && (NXT(1) == '?')) || 10059 (CMP4(CUR_PTR, '<', '!', '-', '-')) || 10060 IS_BLANK_CH(CUR)) { 10061 if ((RAW == '<') && (NXT(1) == '?')) { 10062 xmlParsePI(ctxt); 10063 } else if (IS_BLANK_CH(CUR)) { 10064 NEXT; 10065 } else 10066 xmlParseComment(ctxt); 10067 } 10068} 10069 10070/** 10071 * xmlParseDocument: 10072 * @ctxt: an XML parser context 10073 * 10074 * parse an XML document (and build a tree if using the standard SAX 10075 * interface). 10076 * 10077 * [1] document ::= prolog element Misc* 10078 * 10079 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 10080 * 10081 * Returns 0, -1 in case of error. the parser context is augmented 10082 * as a result of the parsing. 10083 */ 10084 10085int 10086xmlParseDocument(xmlParserCtxtPtr ctxt) { 10087 xmlChar start[4]; 10088 xmlCharEncoding enc; 10089 10090 xmlInitParser(); 10091 10092 if ((ctxt == NULL) || (ctxt->input == NULL)) 10093 return(-1); 10094 10095 GROW; 10096 10097 /* 10098 * SAX: detecting the level. 10099 */ 10100 xmlDetectSAX2(ctxt); 10101 10102 /* 10103 * SAX: beginning of the document processing. 10104 */ 10105 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10106 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 10107 10108 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) && 10109 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 10110 /* 10111 * Get the 4 first bytes and decode the charset 10112 * if enc != XML_CHAR_ENCODING_NONE 10113 * plug some encoding conversion routines. 10114 */ 10115 start[0] = RAW; 10116 start[1] = NXT(1); 10117 start[2] = NXT(2); 10118 start[3] = NXT(3); 10119 enc = xmlDetectCharEncoding(&start[0], 4); 10120 if (enc != XML_CHAR_ENCODING_NONE) { 10121 xmlSwitchEncoding(ctxt, enc); 10122 } 10123 } 10124 10125 10126 if (CUR == 0) { 10127 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10128 } 10129 10130 /* 10131 * Check for the XMLDecl in the Prolog. 10132 * do not GROW here to avoid the detected encoder to decode more 10133 * than just the first line, unless the amount of data is really 10134 * too small to hold "<?xml version="1.0" encoding="foo" 10135 */ 10136 if ((ctxt->input->end - ctxt->input->cur) < 35) { 10137 GROW; 10138 } 10139 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 10140 10141 /* 10142 * Note that we will switch encoding on the fly. 10143 */ 10144 xmlParseXMLDecl(ctxt); 10145 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10146 /* 10147 * The XML REC instructs us to stop parsing right here 10148 */ 10149 return(-1); 10150 } 10151 ctxt->standalone = ctxt->input->standalone; 10152 SKIP_BLANKS; 10153 } else { 10154 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10155 } 10156 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 10157 ctxt->sax->startDocument(ctxt->userData); 10158 10159 /* 10160 * The Misc part of the Prolog 10161 */ 10162 GROW; 10163 xmlParseMisc(ctxt); 10164 10165 /* 10166 * Then possibly doc type declaration(s) and more Misc 10167 * (doctypedecl Misc*)? 10168 */ 10169 GROW; 10170 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) { 10171 10172 ctxt->inSubset = 1; 10173 xmlParseDocTypeDecl(ctxt); 10174 if (RAW == '[') { 10175 ctxt->instate = XML_PARSER_DTD; 10176 xmlParseInternalSubset(ctxt); 10177 } 10178 10179 /* 10180 * Create and update the external subset. 10181 */ 10182 ctxt->inSubset = 2; 10183 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) && 10184 (!ctxt->disableSAX)) 10185 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 10186 ctxt->extSubSystem, ctxt->extSubURI); 10187 ctxt->inSubset = 0; 10188 10189 xmlCleanSpecialAttr(ctxt); 10190 10191 ctxt->instate = XML_PARSER_PROLOG; 10192 xmlParseMisc(ctxt); 10193 } 10194 10195 /* 10196 * Time to start parsing the tree itself 10197 */ 10198 GROW; 10199 if (RAW != '<') { 10200 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY, 10201 "Start tag expected, '<' not found\n"); 10202 } else { 10203 ctxt->instate = XML_PARSER_CONTENT; 10204 xmlParseElement(ctxt); 10205 ctxt->instate = XML_PARSER_EPILOG; 10206 10207 10208 /* 10209 * The Misc part at the end 10210 */ 10211 xmlParseMisc(ctxt); 10212 10213 if (RAW != 0) { 10214 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 10215 } 10216 ctxt->instate = XML_PARSER_EOF; 10217 } 10218 10219 /* 10220 * SAX: end of the document processing. 10221 */ 10222 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10223 ctxt->sax->endDocument(ctxt->userData); 10224 10225 /* 10226 * Remove locally kept entity definitions if the tree was not built 10227 */ 10228 if ((ctxt->myDoc != NULL) && 10229 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 10230 xmlFreeDoc(ctxt->myDoc); 10231 ctxt->myDoc = NULL; 10232 } 10233 10234 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) { 10235 ctxt->myDoc->properties |= XML_DOC_WELLFORMED; 10236 if (ctxt->valid) 10237 ctxt->myDoc->properties |= XML_DOC_DTDVALID; 10238 if (ctxt->nsWellFormed) 10239 ctxt->myDoc->properties |= XML_DOC_NSVALID; 10240 if (ctxt->options & XML_PARSE_OLD10) 10241 ctxt->myDoc->properties |= XML_DOC_OLD10; 10242 } 10243 if (! ctxt->wellFormed) { 10244 ctxt->valid = 0; 10245 return(-1); 10246 } 10247 return(0); 10248} 10249 10250/** 10251 * xmlParseExtParsedEnt: 10252 * @ctxt: an XML parser context 10253 * 10254 * parse a general parsed entity 10255 * An external general parsed entity is well-formed if it matches the 10256 * production labeled extParsedEnt. 10257 * 10258 * [78] extParsedEnt ::= TextDecl? content 10259 * 10260 * Returns 0, -1 in case of error. the parser context is augmented 10261 * as a result of the parsing. 10262 */ 10263 10264int 10265xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { 10266 xmlChar start[4]; 10267 xmlCharEncoding enc; 10268 10269 if ((ctxt == NULL) || (ctxt->input == NULL)) 10270 return(-1); 10271 10272 xmlDefaultSAXHandlerInit(); 10273 10274 xmlDetectSAX2(ctxt); 10275 10276 GROW; 10277 10278 /* 10279 * SAX: beginning of the document processing. 10280 */ 10281 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10282 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 10283 10284 /* 10285 * Get the 4 first bytes and decode the charset 10286 * if enc != XML_CHAR_ENCODING_NONE 10287 * plug some encoding conversion routines. 10288 */ 10289 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 10290 start[0] = RAW; 10291 start[1] = NXT(1); 10292 start[2] = NXT(2); 10293 start[3] = NXT(3); 10294 enc = xmlDetectCharEncoding(start, 4); 10295 if (enc != XML_CHAR_ENCODING_NONE) { 10296 xmlSwitchEncoding(ctxt, enc); 10297 } 10298 } 10299 10300 10301 if (CUR == 0) { 10302 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10303 } 10304 10305 /* 10306 * Check for the XMLDecl in the Prolog. 10307 */ 10308 GROW; 10309 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 10310 10311 /* 10312 * Note that we will switch encoding on the fly. 10313 */ 10314 xmlParseXMLDecl(ctxt); 10315 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10316 /* 10317 * The XML REC instructs us to stop parsing right here 10318 */ 10319 return(-1); 10320 } 10321 SKIP_BLANKS; 10322 } else { 10323 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10324 } 10325 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 10326 ctxt->sax->startDocument(ctxt->userData); 10327 10328 /* 10329 * Doing validity checking on chunk doesn't make sense 10330 */ 10331 ctxt->instate = XML_PARSER_CONTENT; 10332 ctxt->validate = 0; 10333 ctxt->loadsubset = 0; 10334 ctxt->depth = 0; 10335 10336 xmlParseContent(ctxt); 10337 10338 if ((RAW == '<') && (NXT(1) == '/')) { 10339 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 10340 } else if (RAW != 0) { 10341 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 10342 } 10343 10344 /* 10345 * SAX: end of the document processing. 10346 */ 10347 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10348 ctxt->sax->endDocument(ctxt->userData); 10349 10350 if (! ctxt->wellFormed) return(-1); 10351 return(0); 10352} 10353 10354#ifdef LIBXML_PUSH_ENABLED 10355/************************************************************************ 10356 * * 10357 * Progressive parsing interfaces * 10358 * * 10359 ************************************************************************/ 10360 10361/** 10362 * xmlParseLookupSequence: 10363 * @ctxt: an XML parser context 10364 * @first: the first char to lookup 10365 * @next: the next char to lookup or zero 10366 * @third: the next char to lookup or zero 10367 * 10368 * Try to find if a sequence (first, next, third) or just (first next) or 10369 * (first) is available in the input stream. 10370 * This function has a side effect of (possibly) incrementing ctxt->checkIndex 10371 * to avoid rescanning sequences of bytes, it DOES change the state of the 10372 * parser, do not use liberally. 10373 * 10374 * Returns the index to the current parsing point if the full sequence 10375 * is available, -1 otherwise. 10376 */ 10377static int 10378xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, 10379 xmlChar next, xmlChar third) { 10380 int base, len; 10381 xmlParserInputPtr in; 10382 const xmlChar *buf; 10383 10384 in = ctxt->input; 10385 if (in == NULL) return(-1); 10386 base = in->cur - in->base; 10387 if (base < 0) return(-1); 10388 if (ctxt->checkIndex > base) 10389 base = ctxt->checkIndex; 10390 if (in->buf == NULL) { 10391 buf = in->base; 10392 len = in->length; 10393 } else { 10394 buf = in->buf->buffer->content; 10395 len = in->buf->buffer->use; 10396 } 10397 /* take into account the sequence length */ 10398 if (third) len -= 2; 10399 else if (next) len --; 10400 for (;base < len;base++) { 10401 if (buf[base] == first) { 10402 if (third != 0) { 10403 if ((buf[base + 1] != next) || 10404 (buf[base + 2] != third)) continue; 10405 } else if (next != 0) { 10406 if (buf[base + 1] != next) continue; 10407 } 10408 ctxt->checkIndex = 0; 10409#ifdef DEBUG_PUSH 10410 if (next == 0) 10411 xmlGenericError(xmlGenericErrorContext, 10412 "PP: lookup '%c' found at %d\n", 10413 first, base); 10414 else if (third == 0) 10415 xmlGenericError(xmlGenericErrorContext, 10416 "PP: lookup '%c%c' found at %d\n", 10417 first, next, base); 10418 else 10419 xmlGenericError(xmlGenericErrorContext, 10420 "PP: lookup '%c%c%c' found at %d\n", 10421 first, next, third, base); 10422#endif 10423 return(base - (in->cur - in->base)); 10424 } 10425 } 10426 ctxt->checkIndex = base; 10427#ifdef DEBUG_PUSH 10428 if (next == 0) 10429 xmlGenericError(xmlGenericErrorContext, 10430 "PP: lookup '%c' failed\n", first); 10431 else if (third == 0) 10432 xmlGenericError(xmlGenericErrorContext, 10433 "PP: lookup '%c%c' failed\n", first, next); 10434 else 10435 xmlGenericError(xmlGenericErrorContext, 10436 "PP: lookup '%c%c%c' failed\n", first, next, third); 10437#endif 10438 return(-1); 10439} 10440 10441/** 10442 * xmlParseGetLasts: 10443 * @ctxt: an XML parser context 10444 * @lastlt: pointer to store the last '<' from the input 10445 * @lastgt: pointer to store the last '>' from the input 10446 * 10447 * Lookup the last < and > in the current chunk 10448 */ 10449static void 10450xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt, 10451 const xmlChar **lastgt) { 10452 const xmlChar *tmp; 10453 10454 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) { 10455 xmlGenericError(xmlGenericErrorContext, 10456 "Internal error: xmlParseGetLasts\n"); 10457 return; 10458 } 10459 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) { 10460 tmp = ctxt->input->end; 10461 tmp--; 10462 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--; 10463 if (tmp < ctxt->input->base) { 10464 *lastlt = NULL; 10465 *lastgt = NULL; 10466 } else { 10467 *lastlt = tmp; 10468 tmp++; 10469 while ((tmp < ctxt->input->end) && (*tmp != '>')) { 10470 if (*tmp == '\'') { 10471 tmp++; 10472 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++; 10473 if (tmp < ctxt->input->end) tmp++; 10474 } else if (*tmp == '"') { 10475 tmp++; 10476 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++; 10477 if (tmp < ctxt->input->end) tmp++; 10478 } else 10479 tmp++; 10480 } 10481 if (tmp < ctxt->input->end) 10482 *lastgt = tmp; 10483 else { 10484 tmp = *lastlt; 10485 tmp--; 10486 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--; 10487 if (tmp >= ctxt->input->base) 10488 *lastgt = tmp; 10489 else 10490 *lastgt = NULL; 10491 } 10492 } 10493 } else { 10494 *lastlt = NULL; 10495 *lastgt = NULL; 10496 } 10497} 10498/** 10499 * xmlCheckCdataPush: 10500 * @cur: pointer to the bock of characters 10501 * @len: length of the block in bytes 10502 * 10503 * Check that the block of characters is okay as SCdata content [20] 10504 * 10505 * Returns the number of bytes to pass if okay, a negative index where an 10506 * UTF-8 error occured otherwise 10507 */ 10508static int 10509xmlCheckCdataPush(const xmlChar *utf, int len) { 10510 int ix; 10511 unsigned char c; 10512 int codepoint; 10513 10514 if ((utf == NULL) || (len <= 0)) 10515 return(0); 10516 10517 for (ix = 0; ix < len;) { /* string is 0-terminated */ 10518 c = utf[ix]; 10519 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */ 10520 if (c >= 0x20) 10521 ix++; 10522 else if ((c == 0xA) || (c == 0xD) || (c == 0x9)) 10523 ix++; 10524 else 10525 return(-ix); 10526 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */ 10527 if (ix + 2 > len) return(ix); 10528 if ((utf[ix+1] & 0xc0 ) != 0x80) 10529 return(-ix); 10530 codepoint = (utf[ix] & 0x1f) << 6; 10531 codepoint |= utf[ix+1] & 0x3f; 10532 if (!xmlIsCharQ(codepoint)) 10533 return(-ix); 10534 ix += 2; 10535 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */ 10536 if (ix + 3 > len) return(ix); 10537 if (((utf[ix+1] & 0xc0) != 0x80) || 10538 ((utf[ix+2] & 0xc0) != 0x80)) 10539 return(-ix); 10540 codepoint = (utf[ix] & 0xf) << 12; 10541 codepoint |= (utf[ix+1] & 0x3f) << 6; 10542 codepoint |= utf[ix+2] & 0x3f; 10543 if (!xmlIsCharQ(codepoint)) 10544 return(-ix); 10545 ix += 3; 10546 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */ 10547 if (ix + 4 > len) return(ix); 10548 if (((utf[ix+1] & 0xc0) != 0x80) || 10549 ((utf[ix+2] & 0xc0) != 0x80) || 10550 ((utf[ix+3] & 0xc0) != 0x80)) 10551 return(-ix); 10552 codepoint = (utf[ix] & 0x7) << 18; 10553 codepoint |= (utf[ix+1] & 0x3f) << 12; 10554 codepoint |= (utf[ix+2] & 0x3f) << 6; 10555 codepoint |= utf[ix+3] & 0x3f; 10556 if (!xmlIsCharQ(codepoint)) 10557 return(-ix); 10558 ix += 4; 10559 } else /* unknown encoding */ 10560 return(-ix); 10561 } 10562 return(ix); 10563} 10564 10565/** 10566 * xmlParseTryOrFinish: 10567 * @ctxt: an XML parser context 10568 * @terminate: last chunk indicator 10569 * 10570 * Try to progress on parsing 10571 * 10572 * Returns zero if no parsing was possible 10573 */ 10574static int 10575xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { 10576 int ret = 0; 10577 int avail, tlen; 10578 xmlChar cur, next; 10579 const xmlChar *lastlt, *lastgt; 10580 10581 if (ctxt->input == NULL) 10582 return(0); 10583 10584#ifdef DEBUG_PUSH 10585 switch (ctxt->instate) { 10586 case XML_PARSER_EOF: 10587 xmlGenericError(xmlGenericErrorContext, 10588 "PP: try EOF\n"); break; 10589 case XML_PARSER_START: 10590 xmlGenericError(xmlGenericErrorContext, 10591 "PP: try START\n"); break; 10592 case XML_PARSER_MISC: 10593 xmlGenericError(xmlGenericErrorContext, 10594 "PP: try MISC\n");break; 10595 case XML_PARSER_COMMENT: 10596 xmlGenericError(xmlGenericErrorContext, 10597 "PP: try COMMENT\n");break; 10598 case XML_PARSER_PROLOG: 10599 xmlGenericError(xmlGenericErrorContext, 10600 "PP: try PROLOG\n");break; 10601 case XML_PARSER_START_TAG: 10602 xmlGenericError(xmlGenericErrorContext, 10603 "PP: try START_TAG\n");break; 10604 case XML_PARSER_CONTENT: 10605 xmlGenericError(xmlGenericErrorContext, 10606 "PP: try CONTENT\n");break; 10607 case XML_PARSER_CDATA_SECTION: 10608 xmlGenericError(xmlGenericErrorContext, 10609 "PP: try CDATA_SECTION\n");break; 10610 case XML_PARSER_END_TAG: 10611 xmlGenericError(xmlGenericErrorContext, 10612 "PP: try END_TAG\n");break; 10613 case XML_PARSER_ENTITY_DECL: 10614 xmlGenericError(xmlGenericErrorContext, 10615 "PP: try ENTITY_DECL\n");break; 10616 case XML_PARSER_ENTITY_VALUE: 10617 xmlGenericError(xmlGenericErrorContext, 10618 "PP: try ENTITY_VALUE\n");break; 10619 case XML_PARSER_ATTRIBUTE_VALUE: 10620 xmlGenericError(xmlGenericErrorContext, 10621 "PP: try ATTRIBUTE_VALUE\n");break; 10622 case XML_PARSER_DTD: 10623 xmlGenericError(xmlGenericErrorContext, 10624 "PP: try DTD\n");break; 10625 case XML_PARSER_EPILOG: 10626 xmlGenericError(xmlGenericErrorContext, 10627 "PP: try EPILOG\n");break; 10628 case XML_PARSER_PI: 10629 xmlGenericError(xmlGenericErrorContext, 10630 "PP: try PI\n");break; 10631 case XML_PARSER_IGNORE: 10632 xmlGenericError(xmlGenericErrorContext, 10633 "PP: try IGNORE\n");break; 10634 } 10635#endif 10636 10637 if ((ctxt->input != NULL) && 10638 (ctxt->input->cur - ctxt->input->base > 4096)) { 10639 xmlSHRINK(ctxt); 10640 ctxt->checkIndex = 0; 10641 } 10642 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 10643 10644 while (1) { 10645 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 10646 return(0); 10647 10648 10649 /* 10650 * Pop-up of finished entities. 10651 */ 10652 while ((RAW == 0) && (ctxt->inputNr > 1)) 10653 xmlPopInput(ctxt); 10654 10655 if (ctxt->input == NULL) break; 10656 if (ctxt->input->buf == NULL) 10657 avail = ctxt->input->length - 10658 (ctxt->input->cur - ctxt->input->base); 10659 else { 10660 /* 10661 * If we are operating on converted input, try to flush 10662 * remainng chars to avoid them stalling in the non-converted 10663 * buffer. 10664 */ 10665 if ((ctxt->input->buf->raw != NULL) && 10666 (ctxt->input->buf->raw->use > 0)) { 10667 int base = ctxt->input->base - 10668 ctxt->input->buf->buffer->content; 10669 int current = ctxt->input->cur - ctxt->input->base; 10670 10671 xmlParserInputBufferPush(ctxt->input->buf, 0, ""); 10672 ctxt->input->base = ctxt->input->buf->buffer->content + base; 10673 ctxt->input->cur = ctxt->input->base + current; 10674 ctxt->input->end = 10675 &ctxt->input->buf->buffer->content[ 10676 ctxt->input->buf->buffer->use]; 10677 } 10678 avail = ctxt->input->buf->buffer->use - 10679 (ctxt->input->cur - ctxt->input->base); 10680 } 10681 if (avail < 1) 10682 goto done; 10683 switch (ctxt->instate) { 10684 case XML_PARSER_EOF: 10685 /* 10686 * Document parsing is done ! 10687 */ 10688 goto done; 10689 case XML_PARSER_START: 10690 if (ctxt->charset == XML_CHAR_ENCODING_NONE) { 10691 xmlChar start[4]; 10692 xmlCharEncoding enc; 10693 10694 /* 10695 * Very first chars read from the document flow. 10696 */ 10697 if (avail < 4) 10698 goto done; 10699 10700 /* 10701 * Get the 4 first bytes and decode the charset 10702 * if enc != XML_CHAR_ENCODING_NONE 10703 * plug some encoding conversion routines, 10704 * else xmlSwitchEncoding will set to (default) 10705 * UTF8. 10706 */ 10707 start[0] = RAW; 10708 start[1] = NXT(1); 10709 start[2] = NXT(2); 10710 start[3] = NXT(3); 10711 enc = xmlDetectCharEncoding(start, 4); 10712 xmlSwitchEncoding(ctxt, enc); 10713 break; 10714 } 10715 10716 if (avail < 2) 10717 goto done; 10718 cur = ctxt->input->cur[0]; 10719 next = ctxt->input->cur[1]; 10720 if (cur == 0) { 10721 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10722 ctxt->sax->setDocumentLocator(ctxt->userData, 10723 &xmlDefaultSAXLocator); 10724 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10725 ctxt->instate = XML_PARSER_EOF; 10726#ifdef DEBUG_PUSH 10727 xmlGenericError(xmlGenericErrorContext, 10728 "PP: entering EOF\n"); 10729#endif 10730 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10731 ctxt->sax->endDocument(ctxt->userData); 10732 goto done; 10733 } 10734 if ((cur == '<') && (next == '?')) { 10735 /* PI or XML decl */ 10736 if (avail < 5) return(ret); 10737 if ((!terminate) && 10738 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 10739 return(ret); 10740 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10741 ctxt->sax->setDocumentLocator(ctxt->userData, 10742 &xmlDefaultSAXLocator); 10743 if ((ctxt->input->cur[2] == 'x') && 10744 (ctxt->input->cur[3] == 'm') && 10745 (ctxt->input->cur[4] == 'l') && 10746 (IS_BLANK_CH(ctxt->input->cur[5]))) { 10747 ret += 5; 10748#ifdef DEBUG_PUSH 10749 xmlGenericError(xmlGenericErrorContext, 10750 "PP: Parsing XML Decl\n"); 10751#endif 10752 xmlParseXMLDecl(ctxt); 10753 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 10754 /* 10755 * The XML REC instructs us to stop parsing right 10756 * here 10757 */ 10758 ctxt->instate = XML_PARSER_EOF; 10759 return(0); 10760 } 10761 ctxt->standalone = ctxt->input->standalone; 10762 if ((ctxt->encoding == NULL) && 10763 (ctxt->input->encoding != NULL)) 10764 ctxt->encoding = xmlStrdup(ctxt->input->encoding); 10765 if ((ctxt->sax) && (ctxt->sax->startDocument) && 10766 (!ctxt->disableSAX)) 10767 ctxt->sax->startDocument(ctxt->userData); 10768 ctxt->instate = XML_PARSER_MISC; 10769#ifdef DEBUG_PUSH 10770 xmlGenericError(xmlGenericErrorContext, 10771 "PP: entering MISC\n"); 10772#endif 10773 } else { 10774 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10775 if ((ctxt->sax) && (ctxt->sax->startDocument) && 10776 (!ctxt->disableSAX)) 10777 ctxt->sax->startDocument(ctxt->userData); 10778 ctxt->instate = XML_PARSER_MISC; 10779#ifdef DEBUG_PUSH 10780 xmlGenericError(xmlGenericErrorContext, 10781 "PP: entering MISC\n"); 10782#endif 10783 } 10784 } else { 10785 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 10786 ctxt->sax->setDocumentLocator(ctxt->userData, 10787 &xmlDefaultSAXLocator); 10788 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 10789 if (ctxt->version == NULL) { 10790 xmlErrMemory(ctxt, NULL); 10791 break; 10792 } 10793 if ((ctxt->sax) && (ctxt->sax->startDocument) && 10794 (!ctxt->disableSAX)) 10795 ctxt->sax->startDocument(ctxt->userData); 10796 ctxt->instate = XML_PARSER_MISC; 10797#ifdef DEBUG_PUSH 10798 xmlGenericError(xmlGenericErrorContext, 10799 "PP: entering MISC\n"); 10800#endif 10801 } 10802 break; 10803 case XML_PARSER_START_TAG: { 10804 const xmlChar *name; 10805 const xmlChar *prefix = NULL; 10806 const xmlChar *URI = NULL; 10807 int nsNr = ctxt->nsNr; 10808 10809 if ((avail < 2) && (ctxt->inputNr == 1)) 10810 goto done; 10811 cur = ctxt->input->cur[0]; 10812 if (cur != '<') { 10813 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 10814 ctxt->instate = XML_PARSER_EOF; 10815 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10816 ctxt->sax->endDocument(ctxt->userData); 10817 goto done; 10818 } 10819 if (!terminate) { 10820 if (ctxt->progressive) { 10821 /* > can be found unescaped in attribute values */ 10822 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 10823 goto done; 10824 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 10825 goto done; 10826 } 10827 } 10828 if (ctxt->spaceNr == 0) 10829 spacePush(ctxt, -1); 10830 else if (*ctxt->space == -2) 10831 spacePush(ctxt, -1); 10832 else 10833 spacePush(ctxt, *ctxt->space); 10834#ifdef LIBXML_SAX1_ENABLED 10835 if (ctxt->sax2) 10836#endif /* LIBXML_SAX1_ENABLED */ 10837 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 10838#ifdef LIBXML_SAX1_ENABLED 10839 else 10840 name = xmlParseStartTag(ctxt); 10841#endif /* LIBXML_SAX1_ENABLED */ 10842 if (name == NULL) { 10843 spacePop(ctxt); 10844 ctxt->instate = XML_PARSER_EOF; 10845 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10846 ctxt->sax->endDocument(ctxt->userData); 10847 goto done; 10848 } 10849#ifdef LIBXML_VALID_ENABLED 10850 /* 10851 * [ VC: Root Element Type ] 10852 * The Name in the document type declaration must match 10853 * the element type of the root element. 10854 */ 10855 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 10856 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 10857 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 10858#endif /* LIBXML_VALID_ENABLED */ 10859 10860 /* 10861 * Check for an Empty Element. 10862 */ 10863 if ((RAW == '/') && (NXT(1) == '>')) { 10864 SKIP(2); 10865 10866 if (ctxt->sax2) { 10867 if ((ctxt->sax != NULL) && 10868 (ctxt->sax->endElementNs != NULL) && 10869 (!ctxt->disableSAX)) 10870 ctxt->sax->endElementNs(ctxt->userData, name, 10871 prefix, URI); 10872 if (ctxt->nsNr - nsNr > 0) 10873 nsPop(ctxt, ctxt->nsNr - nsNr); 10874#ifdef LIBXML_SAX1_ENABLED 10875 } else { 10876 if ((ctxt->sax != NULL) && 10877 (ctxt->sax->endElement != NULL) && 10878 (!ctxt->disableSAX)) 10879 ctxt->sax->endElement(ctxt->userData, name); 10880#endif /* LIBXML_SAX1_ENABLED */ 10881 } 10882 spacePop(ctxt); 10883 if (ctxt->nameNr == 0) { 10884 ctxt->instate = XML_PARSER_EPILOG; 10885 } else { 10886 ctxt->instate = XML_PARSER_CONTENT; 10887 } 10888 break; 10889 } 10890 if (RAW == '>') { 10891 NEXT; 10892 } else { 10893 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED, 10894 "Couldn't find end of Start Tag %s\n", 10895 name); 10896 nodePop(ctxt); 10897 spacePop(ctxt); 10898 } 10899 if (ctxt->sax2) 10900 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr); 10901#ifdef LIBXML_SAX1_ENABLED 10902 else 10903 namePush(ctxt, name); 10904#endif /* LIBXML_SAX1_ENABLED */ 10905 10906 ctxt->instate = XML_PARSER_CONTENT; 10907 break; 10908 } 10909 case XML_PARSER_CONTENT: { 10910 const xmlChar *test; 10911 unsigned int cons; 10912 if ((avail < 2) && (ctxt->inputNr == 1)) 10913 goto done; 10914 cur = ctxt->input->cur[0]; 10915 next = ctxt->input->cur[1]; 10916 10917 test = CUR_PTR; 10918 cons = ctxt->input->consumed; 10919 if ((cur == '<') && (next == '/')) { 10920 ctxt->instate = XML_PARSER_END_TAG; 10921 break; 10922 } else if ((cur == '<') && (next == '?')) { 10923 if ((!terminate) && 10924 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 10925 goto done; 10926 xmlParsePI(ctxt); 10927 } else if ((cur == '<') && (next != '!')) { 10928 ctxt->instate = XML_PARSER_START_TAG; 10929 break; 10930 } else if ((cur == '<') && (next == '!') && 10931 (ctxt->input->cur[2] == '-') && 10932 (ctxt->input->cur[3] == '-')) { 10933 int term; 10934 10935 if (avail < 4) 10936 goto done; 10937 ctxt->input->cur += 4; 10938 term = xmlParseLookupSequence(ctxt, '-', '-', '>'); 10939 ctxt->input->cur -= 4; 10940 if ((!terminate) && (term < 0)) 10941 goto done; 10942 xmlParseComment(ctxt); 10943 ctxt->instate = XML_PARSER_CONTENT; 10944 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') && 10945 (ctxt->input->cur[2] == '[') && 10946 (ctxt->input->cur[3] == 'C') && 10947 (ctxt->input->cur[4] == 'D') && 10948 (ctxt->input->cur[5] == 'A') && 10949 (ctxt->input->cur[6] == 'T') && 10950 (ctxt->input->cur[7] == 'A') && 10951 (ctxt->input->cur[8] == '[')) { 10952 SKIP(9); 10953 ctxt->instate = XML_PARSER_CDATA_SECTION; 10954 break; 10955 } else if ((cur == '<') && (next == '!') && 10956 (avail < 9)) { 10957 goto done; 10958 } else if (cur == '&') { 10959 if ((!terminate) && 10960 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0)) 10961 goto done; 10962 xmlParseReference(ctxt); 10963 } else { 10964 /* TODO Avoid the extra copy, handle directly !!! */ 10965 /* 10966 * Goal of the following test is: 10967 * - minimize calls to the SAX 'character' callback 10968 * when they are mergeable 10969 * - handle an problem for isBlank when we only parse 10970 * a sequence of blank chars and the next one is 10971 * not available to check against '<' presence. 10972 * - tries to homogenize the differences in SAX 10973 * callbacks between the push and pull versions 10974 * of the parser. 10975 */ 10976 if ((ctxt->inputNr == 1) && 10977 (avail < XML_PARSER_BIG_BUFFER_SIZE)) { 10978 if (!terminate) { 10979 if (ctxt->progressive) { 10980 if ((lastlt == NULL) || 10981 (ctxt->input->cur > lastlt)) 10982 goto done; 10983 } else if (xmlParseLookupSequence(ctxt, 10984 '<', 0, 0) < 0) { 10985 goto done; 10986 } 10987 } 10988 } 10989 ctxt->checkIndex = 0; 10990 xmlParseCharData(ctxt, 0); 10991 } 10992 /* 10993 * Pop-up of finished entities. 10994 */ 10995 while ((RAW == 0) && (ctxt->inputNr > 1)) 10996 xmlPopInput(ctxt); 10997 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 10998 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 10999 "detected an error in element content\n"); 11000 ctxt->instate = XML_PARSER_EOF; 11001 break; 11002 } 11003 break; 11004 } 11005 case XML_PARSER_END_TAG: 11006 if (avail < 2) 11007 goto done; 11008 if (!terminate) { 11009 if (ctxt->progressive) { 11010 /* > can be found unescaped in attribute values */ 11011 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 11012 goto done; 11013 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 11014 goto done; 11015 } 11016 } 11017 if (ctxt->sax2) { 11018 xmlParseEndTag2(ctxt, 11019 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3], 11020 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0, 11021 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0); 11022 nameNsPop(ctxt); 11023 } 11024#ifdef LIBXML_SAX1_ENABLED 11025 else 11026 xmlParseEndTag1(ctxt, 0); 11027#endif /* LIBXML_SAX1_ENABLED */ 11028 if (ctxt->nameNr == 0) { 11029 ctxt->instate = XML_PARSER_EPILOG; 11030 } else { 11031 ctxt->instate = XML_PARSER_CONTENT; 11032 } 11033 break; 11034 case XML_PARSER_CDATA_SECTION: { 11035 /* 11036 * The Push mode need to have the SAX callback for 11037 * cdataBlock merge back contiguous callbacks. 11038 */ 11039 int base; 11040 11041 base = xmlParseLookupSequence(ctxt, ']', ']', '>'); 11042 if (base < 0) { 11043 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) { 11044 int tmp; 11045 11046 tmp = xmlCheckCdataPush(ctxt->input->cur, 11047 XML_PARSER_BIG_BUFFER_SIZE); 11048 if (tmp < 0) { 11049 tmp = -tmp; 11050 ctxt->input->cur += tmp; 11051 goto encoding_error; 11052 } 11053 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 11054 if (ctxt->sax->cdataBlock != NULL) 11055 ctxt->sax->cdataBlock(ctxt->userData, 11056 ctxt->input->cur, tmp); 11057 else if (ctxt->sax->characters != NULL) 11058 ctxt->sax->characters(ctxt->userData, 11059 ctxt->input->cur, tmp); 11060 } 11061 SKIPL(tmp); 11062 ctxt->checkIndex = 0; 11063 } 11064 goto done; 11065 } else { 11066 int tmp; 11067 11068 tmp = xmlCheckCdataPush(ctxt->input->cur, base); 11069 if ((tmp < 0) || (tmp != base)) { 11070 tmp = -tmp; 11071 ctxt->input->cur += tmp; 11072 goto encoding_error; 11073 } 11074 if ((ctxt->sax != NULL) && (base == 0) && 11075 (ctxt->sax->cdataBlock != NULL) && 11076 (!ctxt->disableSAX)) { 11077 /* 11078 * Special case to provide identical behaviour 11079 * between pull and push parsers on enpty CDATA 11080 * sections 11081 */ 11082 if ((ctxt->input->cur - ctxt->input->base >= 9) && 11083 (!strncmp((const char *)&ctxt->input->cur[-9], 11084 "<![CDATA[", 9))) 11085 ctxt->sax->cdataBlock(ctxt->userData, 11086 BAD_CAST "", 0); 11087 } else if ((ctxt->sax != NULL) && (base > 0) && 11088 (!ctxt->disableSAX)) { 11089 if (ctxt->sax->cdataBlock != NULL) 11090 ctxt->sax->cdataBlock(ctxt->userData, 11091 ctxt->input->cur, base); 11092 else if (ctxt->sax->characters != NULL) 11093 ctxt->sax->characters(ctxt->userData, 11094 ctxt->input->cur, base); 11095 } 11096 SKIPL(base + 3); 11097 ctxt->checkIndex = 0; 11098 ctxt->instate = XML_PARSER_CONTENT; 11099#ifdef DEBUG_PUSH 11100 xmlGenericError(xmlGenericErrorContext, 11101 "PP: entering CONTENT\n"); 11102#endif 11103 } 11104 break; 11105 } 11106 case XML_PARSER_MISC: 11107 SKIP_BLANKS; 11108 if (ctxt->input->buf == NULL) 11109 avail = ctxt->input->length - 11110 (ctxt->input->cur - ctxt->input->base); 11111 else 11112 avail = ctxt->input->buf->buffer->use - 11113 (ctxt->input->cur - ctxt->input->base); 11114 if (avail < 2) 11115 goto done; 11116 cur = ctxt->input->cur[0]; 11117 next = ctxt->input->cur[1]; 11118 if ((cur == '<') && (next == '?')) { 11119 if ((!terminate) && 11120 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 11121 goto done; 11122#ifdef DEBUG_PUSH 11123 xmlGenericError(xmlGenericErrorContext, 11124 "PP: Parsing PI\n"); 11125#endif 11126 xmlParsePI(ctxt); 11127 ctxt->checkIndex = 0; 11128 } else if ((cur == '<') && (next == '!') && 11129 (ctxt->input->cur[2] == '-') && 11130 (ctxt->input->cur[3] == '-')) { 11131 if ((!terminate) && 11132 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 11133 goto done; 11134#ifdef DEBUG_PUSH 11135 xmlGenericError(xmlGenericErrorContext, 11136 "PP: Parsing Comment\n"); 11137#endif 11138 xmlParseComment(ctxt); 11139 ctxt->instate = XML_PARSER_MISC; 11140 ctxt->checkIndex = 0; 11141 } else if ((cur == '<') && (next == '!') && 11142 (ctxt->input->cur[2] == 'D') && 11143 (ctxt->input->cur[3] == 'O') && 11144 (ctxt->input->cur[4] == 'C') && 11145 (ctxt->input->cur[5] == 'T') && 11146 (ctxt->input->cur[6] == 'Y') && 11147 (ctxt->input->cur[7] == 'P') && 11148 (ctxt->input->cur[8] == 'E')) { 11149 if ((!terminate) && 11150 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) 11151 goto done; 11152#ifdef DEBUG_PUSH 11153 xmlGenericError(xmlGenericErrorContext, 11154 "PP: Parsing internal subset\n"); 11155#endif 11156 ctxt->inSubset = 1; 11157 xmlParseDocTypeDecl(ctxt); 11158 if (RAW == '[') { 11159 ctxt->instate = XML_PARSER_DTD; 11160#ifdef DEBUG_PUSH 11161 xmlGenericError(xmlGenericErrorContext, 11162 "PP: entering DTD\n"); 11163#endif 11164 } else { 11165 /* 11166 * Create and update the external subset. 11167 */ 11168 ctxt->inSubset = 2; 11169 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 11170 (ctxt->sax->externalSubset != NULL)) 11171 ctxt->sax->externalSubset(ctxt->userData, 11172 ctxt->intSubName, ctxt->extSubSystem, 11173 ctxt->extSubURI); 11174 ctxt->inSubset = 0; 11175 xmlCleanSpecialAttr(ctxt); 11176 ctxt->instate = XML_PARSER_PROLOG; 11177#ifdef DEBUG_PUSH 11178 xmlGenericError(xmlGenericErrorContext, 11179 "PP: entering PROLOG\n"); 11180#endif 11181 } 11182 } else if ((cur == '<') && (next == '!') && 11183 (avail < 9)) { 11184 goto done; 11185 } else { 11186 ctxt->instate = XML_PARSER_START_TAG; 11187 ctxt->progressive = 1; 11188 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11189#ifdef DEBUG_PUSH 11190 xmlGenericError(xmlGenericErrorContext, 11191 "PP: entering START_TAG\n"); 11192#endif 11193 } 11194 break; 11195 case XML_PARSER_PROLOG: 11196 SKIP_BLANKS; 11197 if (ctxt->input->buf == NULL) 11198 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 11199 else 11200 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 11201 if (avail < 2) 11202 goto done; 11203 cur = ctxt->input->cur[0]; 11204 next = ctxt->input->cur[1]; 11205 if ((cur == '<') && (next == '?')) { 11206 if ((!terminate) && 11207 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 11208 goto done; 11209#ifdef DEBUG_PUSH 11210 xmlGenericError(xmlGenericErrorContext, 11211 "PP: Parsing PI\n"); 11212#endif 11213 xmlParsePI(ctxt); 11214 } else if ((cur == '<') && (next == '!') && 11215 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 11216 if ((!terminate) && 11217 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 11218 goto done; 11219#ifdef DEBUG_PUSH 11220 xmlGenericError(xmlGenericErrorContext, 11221 "PP: Parsing Comment\n"); 11222#endif 11223 xmlParseComment(ctxt); 11224 ctxt->instate = XML_PARSER_PROLOG; 11225 } else if ((cur == '<') && (next == '!') && 11226 (avail < 4)) { 11227 goto done; 11228 } else { 11229 ctxt->instate = XML_PARSER_START_TAG; 11230 if (ctxt->progressive == 0) 11231 ctxt->progressive = 1; 11232 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 11233#ifdef DEBUG_PUSH 11234 xmlGenericError(xmlGenericErrorContext, 11235 "PP: entering START_TAG\n"); 11236#endif 11237 } 11238 break; 11239 case XML_PARSER_EPILOG: 11240 SKIP_BLANKS; 11241 if (ctxt->input->buf == NULL) 11242 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 11243 else 11244 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 11245 if (avail < 2) 11246 goto done; 11247 cur = ctxt->input->cur[0]; 11248 next = ctxt->input->cur[1]; 11249 if ((cur == '<') && (next == '?')) { 11250 if ((!terminate) && 11251 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 11252 goto done; 11253#ifdef DEBUG_PUSH 11254 xmlGenericError(xmlGenericErrorContext, 11255 "PP: Parsing PI\n"); 11256#endif 11257 xmlParsePI(ctxt); 11258 ctxt->instate = XML_PARSER_EPILOG; 11259 } else if ((cur == '<') && (next == '!') && 11260 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 11261 if ((!terminate) && 11262 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 11263 goto done; 11264#ifdef DEBUG_PUSH 11265 xmlGenericError(xmlGenericErrorContext, 11266 "PP: Parsing Comment\n"); 11267#endif 11268 xmlParseComment(ctxt); 11269 ctxt->instate = XML_PARSER_EPILOG; 11270 } else if ((cur == '<') && (next == '!') && 11271 (avail < 4)) { 11272 goto done; 11273 } else { 11274 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 11275 ctxt->instate = XML_PARSER_EOF; 11276#ifdef DEBUG_PUSH 11277 xmlGenericError(xmlGenericErrorContext, 11278 "PP: entering EOF\n"); 11279#endif 11280 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11281 ctxt->sax->endDocument(ctxt->userData); 11282 goto done; 11283 } 11284 break; 11285 case XML_PARSER_DTD: { 11286 /* 11287 * Sorry but progressive parsing of the internal subset 11288 * is not expected to be supported. We first check that 11289 * the full content of the internal subset is available and 11290 * the parsing is launched only at that point. 11291 * Internal subset ends up with "']' S? '>'" in an unescaped 11292 * section and not in a ']]>' sequence which are conditional 11293 * sections (whoever argued to keep that crap in XML deserve 11294 * a place in hell !). 11295 */ 11296 int base, i; 11297 xmlChar *buf; 11298 xmlChar quote = 0; 11299 11300 base = ctxt->input->cur - ctxt->input->base; 11301 if (base < 0) return(0); 11302 if (ctxt->checkIndex > base) 11303 base = ctxt->checkIndex; 11304 buf = ctxt->input->buf->buffer->content; 11305 for (;(unsigned int) base < ctxt->input->buf->buffer->use; 11306 base++) { 11307 if (quote != 0) { 11308 if (buf[base] == quote) 11309 quote = 0; 11310 continue; 11311 } 11312 if ((quote == 0) && (buf[base] == '<')) { 11313 int found = 0; 11314 /* special handling of comments */ 11315 if (((unsigned int) base + 4 < 11316 ctxt->input->buf->buffer->use) && 11317 (buf[base + 1] == '!') && 11318 (buf[base + 2] == '-') && 11319 (buf[base + 3] == '-')) { 11320 for (;(unsigned int) base + 3 < 11321 ctxt->input->buf->buffer->use; base++) { 11322 if ((buf[base] == '-') && 11323 (buf[base + 1] == '-') && 11324 (buf[base + 2] == '>')) { 11325 found = 1; 11326 base += 2; 11327 break; 11328 } 11329 } 11330 if (!found) { 11331#if 0 11332 fprintf(stderr, "unfinished comment\n"); 11333#endif 11334 break; /* for */ 11335 } 11336 continue; 11337 } 11338 } 11339 if (buf[base] == '"') { 11340 quote = '"'; 11341 continue; 11342 } 11343 if (buf[base] == '\'') { 11344 quote = '\''; 11345 continue; 11346 } 11347 if (buf[base] == ']') { 11348#if 0 11349 fprintf(stderr, "%c%c%c%c: ", buf[base], 11350 buf[base + 1], buf[base + 2], buf[base + 3]); 11351#endif 11352 if ((unsigned int) base +1 >= 11353 ctxt->input->buf->buffer->use) 11354 break; 11355 if (buf[base + 1] == ']') { 11356 /* conditional crap, skip both ']' ! */ 11357 base++; 11358 continue; 11359 } 11360 for (i = 1; 11361 (unsigned int) base + i < ctxt->input->buf->buffer->use; 11362 i++) { 11363 if (buf[base + i] == '>') { 11364#if 0 11365 fprintf(stderr, "found\n"); 11366#endif 11367 goto found_end_int_subset; 11368 } 11369 if (!IS_BLANK_CH(buf[base + i])) { 11370#if 0 11371 fprintf(stderr, "not found\n"); 11372#endif 11373 goto not_end_of_int_subset; 11374 } 11375 } 11376#if 0 11377 fprintf(stderr, "end of stream\n"); 11378#endif 11379 break; 11380 11381 } 11382not_end_of_int_subset: 11383 continue; /* for */ 11384 } 11385 /* 11386 * We didn't found the end of the Internal subset 11387 */ 11388#ifdef DEBUG_PUSH 11389 if (next == 0) 11390 xmlGenericError(xmlGenericErrorContext, 11391 "PP: lookup of int subset end filed\n"); 11392#endif 11393 goto done; 11394 11395found_end_int_subset: 11396 xmlParseInternalSubset(ctxt); 11397 ctxt->inSubset = 2; 11398 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 11399 (ctxt->sax->externalSubset != NULL)) 11400 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 11401 ctxt->extSubSystem, ctxt->extSubURI); 11402 ctxt->inSubset = 0; 11403 xmlCleanSpecialAttr(ctxt); 11404 ctxt->instate = XML_PARSER_PROLOG; 11405 ctxt->checkIndex = 0; 11406#ifdef DEBUG_PUSH 11407 xmlGenericError(xmlGenericErrorContext, 11408 "PP: entering PROLOG\n"); 11409#endif 11410 break; 11411 } 11412 case XML_PARSER_COMMENT: 11413 xmlGenericError(xmlGenericErrorContext, 11414 "PP: internal error, state == COMMENT\n"); 11415 ctxt->instate = XML_PARSER_CONTENT; 11416#ifdef DEBUG_PUSH 11417 xmlGenericError(xmlGenericErrorContext, 11418 "PP: entering CONTENT\n"); 11419#endif 11420 break; 11421 case XML_PARSER_IGNORE: 11422 xmlGenericError(xmlGenericErrorContext, 11423 "PP: internal error, state == IGNORE"); 11424 ctxt->instate = XML_PARSER_DTD; 11425#ifdef DEBUG_PUSH 11426 xmlGenericError(xmlGenericErrorContext, 11427 "PP: entering DTD\n"); 11428#endif 11429 break; 11430 case XML_PARSER_PI: 11431 xmlGenericError(xmlGenericErrorContext, 11432 "PP: internal error, state == PI\n"); 11433 ctxt->instate = XML_PARSER_CONTENT; 11434#ifdef DEBUG_PUSH 11435 xmlGenericError(xmlGenericErrorContext, 11436 "PP: entering CONTENT\n"); 11437#endif 11438 break; 11439 case XML_PARSER_ENTITY_DECL: 11440 xmlGenericError(xmlGenericErrorContext, 11441 "PP: internal error, state == ENTITY_DECL\n"); 11442 ctxt->instate = XML_PARSER_DTD; 11443#ifdef DEBUG_PUSH 11444 xmlGenericError(xmlGenericErrorContext, 11445 "PP: entering DTD\n"); 11446#endif 11447 break; 11448 case XML_PARSER_ENTITY_VALUE: 11449 xmlGenericError(xmlGenericErrorContext, 11450 "PP: internal error, state == ENTITY_VALUE\n"); 11451 ctxt->instate = XML_PARSER_CONTENT; 11452#ifdef DEBUG_PUSH 11453 xmlGenericError(xmlGenericErrorContext, 11454 "PP: entering DTD\n"); 11455#endif 11456 break; 11457 case XML_PARSER_ATTRIBUTE_VALUE: 11458 xmlGenericError(xmlGenericErrorContext, 11459 "PP: internal error, state == ATTRIBUTE_VALUE\n"); 11460 ctxt->instate = XML_PARSER_START_TAG; 11461#ifdef DEBUG_PUSH 11462 xmlGenericError(xmlGenericErrorContext, 11463 "PP: entering START_TAG\n"); 11464#endif 11465 break; 11466 case XML_PARSER_SYSTEM_LITERAL: 11467 xmlGenericError(xmlGenericErrorContext, 11468 "PP: internal error, state == SYSTEM_LITERAL\n"); 11469 ctxt->instate = XML_PARSER_START_TAG; 11470#ifdef DEBUG_PUSH 11471 xmlGenericError(xmlGenericErrorContext, 11472 "PP: entering START_TAG\n"); 11473#endif 11474 break; 11475 case XML_PARSER_PUBLIC_LITERAL: 11476 xmlGenericError(xmlGenericErrorContext, 11477 "PP: internal error, state == PUBLIC_LITERAL\n"); 11478 ctxt->instate = XML_PARSER_START_TAG; 11479#ifdef DEBUG_PUSH 11480 xmlGenericError(xmlGenericErrorContext, 11481 "PP: entering START_TAG\n"); 11482#endif 11483 break; 11484 } 11485 } 11486done: 11487#ifdef DEBUG_PUSH 11488 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret); 11489#endif 11490 return(ret); 11491encoding_error: 11492 { 11493 char buffer[150]; 11494 11495 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 11496 ctxt->input->cur[0], ctxt->input->cur[1], 11497 ctxt->input->cur[2], ctxt->input->cur[3]); 11498 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 11499 "Input is not proper UTF-8, indicate encoding !\n%s", 11500 BAD_CAST buffer, NULL); 11501 } 11502 return(0); 11503} 11504 11505/** 11506 * xmlParseChunk: 11507 * @ctxt: an XML parser context 11508 * @chunk: an char array 11509 * @size: the size in byte of the chunk 11510 * @terminate: last chunk indicator 11511 * 11512 * Parse a Chunk of memory 11513 * 11514 * Returns zero if no error, the xmlParserErrors otherwise. 11515 */ 11516int 11517xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, 11518 int terminate) { 11519 int end_in_lf = 0; 11520 int remain = 0; 11521 11522 if (ctxt == NULL) 11523 return(XML_ERR_INTERNAL_ERROR); 11524 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 11525 return(ctxt->errNo); 11526 if (ctxt->instate == XML_PARSER_START) 11527 xmlDetectSAX2(ctxt); 11528 if ((size > 0) && (chunk != NULL) && (!terminate) && 11529 (chunk[size - 1] == '\r')) { 11530 end_in_lf = 1; 11531 size--; 11532 } 11533 11534xmldecl_done: 11535 11536 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 11537 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { 11538 int base = ctxt->input->base - ctxt->input->buf->buffer->content; 11539 int cur = ctxt->input->cur - ctxt->input->base; 11540 int res; 11541 11542 /* 11543 * Specific handling if we autodetected an encoding, we should not 11544 * push more than the first line ... which depend on the encoding 11545 * And only push the rest once the final encoding was detected 11546 */ 11547 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) && 11548 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) { 11549 unsigned int len = 45; 11550 11551 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 11552 BAD_CAST "UTF-16")) || 11553 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 11554 BAD_CAST "UTF16"))) 11555 len = 90; 11556 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 11557 BAD_CAST "UCS-4")) || 11558 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, 11559 BAD_CAST "UCS4"))) 11560 len = 180; 11561 11562 if (ctxt->input->buf->rawconsumed < len) 11563 len -= ctxt->input->buf->rawconsumed; 11564 11565 /* 11566 * Change size for reading the initial declaration only 11567 * if size is greater than len. Otherwise, memmove in xmlBufferAdd 11568 * will blindly copy extra bytes from memory. 11569 */ 11570 if (size > len) { 11571 remain = size - len; 11572 size = len; 11573 } else { 11574 remain = 0; 11575 } 11576 } 11577 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 11578 if (res < 0) { 11579 ctxt->errNo = XML_PARSER_EOF; 11580 ctxt->disableSAX = 1; 11581 return (XML_PARSER_EOF); 11582 } 11583 ctxt->input->base = ctxt->input->buf->buffer->content + base; 11584 ctxt->input->cur = ctxt->input->base + cur; 11585 ctxt->input->end = 11586 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; 11587#ifdef DEBUG_PUSH 11588 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 11589#endif 11590 11591 } else if (ctxt->instate != XML_PARSER_EOF) { 11592 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { 11593 xmlParserInputBufferPtr in = ctxt->input->buf; 11594 if ((in->encoder != NULL) && (in->buffer != NULL) && 11595 (in->raw != NULL)) { 11596 int nbchars; 11597 11598 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); 11599 if (nbchars < 0) { 11600 /* TODO 2.6.0 */ 11601 xmlGenericError(xmlGenericErrorContext, 11602 "xmlParseChunk: encoder error\n"); 11603 return(XML_ERR_INVALID_ENCODING); 11604 } 11605 } 11606 } 11607 } 11608 if (remain != 0) 11609 xmlParseTryOrFinish(ctxt, 0); 11610 else 11611 xmlParseTryOrFinish(ctxt, terminate); 11612 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 11613 return(ctxt->errNo); 11614 11615 if (remain != 0) { 11616 chunk += size; 11617 size = remain; 11618 remain = 0; 11619 goto xmldecl_done; 11620 } 11621 if ((end_in_lf == 1) && (ctxt->input != NULL) && 11622 (ctxt->input->buf != NULL)) { 11623 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r"); 11624 } 11625 if (terminate) { 11626 /* 11627 * Check for termination 11628 */ 11629 int avail = 0; 11630 11631 if (ctxt->input != NULL) { 11632 if (ctxt->input->buf == NULL) 11633 avail = ctxt->input->length - 11634 (ctxt->input->cur - ctxt->input->base); 11635 else 11636 avail = ctxt->input->buf->buffer->use - 11637 (ctxt->input->cur - ctxt->input->base); 11638 } 11639 11640 if ((ctxt->instate != XML_PARSER_EOF) && 11641 (ctxt->instate != XML_PARSER_EPILOG)) { 11642 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 11643 } 11644 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) { 11645 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 11646 } 11647 if (ctxt->instate != XML_PARSER_EOF) { 11648 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 11649 ctxt->sax->endDocument(ctxt->userData); 11650 } 11651 ctxt->instate = XML_PARSER_EOF; 11652 } 11653 return((xmlParserErrors) ctxt->errNo); 11654} 11655 11656/************************************************************************ 11657 * * 11658 * I/O front end functions to the parser * 11659 * * 11660 ************************************************************************/ 11661 11662/** 11663 * xmlCreatePushParserCtxt: 11664 * @sax: a SAX handler 11665 * @user_data: The user data returned on SAX callbacks 11666 * @chunk: a pointer to an array of chars 11667 * @size: number of chars in the array 11668 * @filename: an optional file name or URI 11669 * 11670 * Create a parser context for using the XML parser in push mode. 11671 * If @buffer and @size are non-NULL, the data is used to detect 11672 * the encoding. The remaining characters will be parsed so they 11673 * don't need to be fed in again through xmlParseChunk. 11674 * To allow content encoding detection, @size should be >= 4 11675 * The value of @filename is used for fetching external entities 11676 * and error/warning reports. 11677 * 11678 * Returns the new parser context or NULL 11679 */ 11680 11681xmlParserCtxtPtr 11682xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 11683 const char *chunk, int size, const char *filename) { 11684 xmlParserCtxtPtr ctxt; 11685 xmlParserInputPtr inputStream; 11686 xmlParserInputBufferPtr buf; 11687 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 11688 11689 /* 11690 * plug some encoding conversion routines 11691 */ 11692 if ((chunk != NULL) && (size >= 4)) 11693 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 11694 11695 buf = xmlAllocParserInputBuffer(enc); 11696 if (buf == NULL) return(NULL); 11697 11698 ctxt = xmlNewParserCtxt(); 11699 if (ctxt == NULL) { 11700 xmlErrMemory(NULL, "creating parser: out of memory\n"); 11701 xmlFreeParserInputBuffer(buf); 11702 return(NULL); 11703 } 11704 ctxt->dictNames = 1; 11705 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *)); 11706 if (ctxt->pushTab == NULL) { 11707 xmlErrMemory(ctxt, NULL); 11708 xmlFreeParserInputBuffer(buf); 11709 xmlFreeParserCtxt(ctxt); 11710 return(NULL); 11711 } 11712 if (sax != NULL) { 11713#ifdef LIBXML_SAX1_ENABLED 11714 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 11715#endif /* LIBXML_SAX1_ENABLED */ 11716 xmlFree(ctxt->sax); 11717 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 11718 if (ctxt->sax == NULL) { 11719 xmlErrMemory(ctxt, NULL); 11720 xmlFreeParserInputBuffer(buf); 11721 xmlFreeParserCtxt(ctxt); 11722 return(NULL); 11723 } 11724 memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 11725 if (sax->initialized == XML_SAX2_MAGIC) 11726 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 11727 else 11728 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 11729 if (user_data != NULL) 11730 ctxt->userData = user_data; 11731 } 11732 if (filename == NULL) { 11733 ctxt->directory = NULL; 11734 } else { 11735 ctxt->directory = xmlParserGetDirectory(filename); 11736 } 11737 11738 inputStream = xmlNewInputStream(ctxt); 11739 if (inputStream == NULL) { 11740 xmlFreeParserCtxt(ctxt); 11741 xmlFreeParserInputBuffer(buf); 11742 return(NULL); 11743 } 11744 11745 if (filename == NULL) 11746 inputStream->filename = NULL; 11747 else { 11748 inputStream->filename = (char *) 11749 xmlCanonicPath((const xmlChar *) filename); 11750 if (inputStream->filename == NULL) { 11751 xmlFreeParserCtxt(ctxt); 11752 xmlFreeParserInputBuffer(buf); 11753 return(NULL); 11754 } 11755 } 11756 inputStream->buf = buf; 11757 inputStream->base = inputStream->buf->buffer->content; 11758 inputStream->cur = inputStream->buf->buffer->content; 11759 inputStream->end = 11760 &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; 11761 11762 inputPush(ctxt, inputStream); 11763 11764 /* 11765 * If the caller didn't provide an initial 'chunk' for determining 11766 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so 11767 * that it can be automatically determined later 11768 */ 11769 if ((size == 0) || (chunk == NULL)) { 11770 ctxt->charset = XML_CHAR_ENCODING_NONE; 11771 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) { 11772 int base = ctxt->input->base - ctxt->input->buf->buffer->content; 11773 int cur = ctxt->input->cur - ctxt->input->base; 11774 11775 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 11776 11777 ctxt->input->base = ctxt->input->buf->buffer->content + base; 11778 ctxt->input->cur = ctxt->input->base + cur; 11779 ctxt->input->end = 11780 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; 11781#ifdef DEBUG_PUSH 11782 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 11783#endif 11784 } 11785 11786 if (enc != XML_CHAR_ENCODING_NONE) { 11787 xmlSwitchEncoding(ctxt, enc); 11788 } 11789 11790 return(ctxt); 11791} 11792#endif /* LIBXML_PUSH_ENABLED */ 11793 11794/** 11795 * xmlStopParser: 11796 * @ctxt: an XML parser context 11797 * 11798 * Blocks further parser processing 11799 */ 11800void 11801xmlStopParser(xmlParserCtxtPtr ctxt) { 11802 if (ctxt == NULL) 11803 return; 11804 ctxt->instate = XML_PARSER_EOF; 11805 ctxt->disableSAX = 1; 11806 if (ctxt->input != NULL) { 11807 ctxt->input->cur = BAD_CAST""; 11808 ctxt->input->base = ctxt->input->cur; 11809 } 11810} 11811 11812/** 11813 * xmlCreateIOParserCtxt: 11814 * @sax: a SAX handler 11815 * @user_data: The user data returned on SAX callbacks 11816 * @ioread: an I/O read function 11817 * @ioclose: an I/O close function 11818 * @ioctx: an I/O handler 11819 * @enc: the charset encoding if known 11820 * 11821 * Create a parser context for using the XML parser with an existing 11822 * I/O stream 11823 * 11824 * Returns the new parser context or NULL 11825 */ 11826xmlParserCtxtPtr 11827xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 11828 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 11829 void *ioctx, xmlCharEncoding enc) { 11830 xmlParserCtxtPtr ctxt; 11831 xmlParserInputPtr inputStream; 11832 xmlParserInputBufferPtr buf; 11833 11834 if (ioread == NULL) return(NULL); 11835 11836 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc); 11837 if (buf == NULL) return(NULL); 11838 11839 ctxt = xmlNewParserCtxt(); 11840 if (ctxt == NULL) { 11841 xmlFreeParserInputBuffer(buf); 11842 return(NULL); 11843 } 11844 if (sax != NULL) { 11845#ifdef LIBXML_SAX1_ENABLED 11846 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 11847#endif /* LIBXML_SAX1_ENABLED */ 11848 xmlFree(ctxt->sax); 11849 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 11850 if (ctxt->sax == NULL) { 11851 xmlErrMemory(ctxt, NULL); 11852 xmlFreeParserCtxt(ctxt); 11853 return(NULL); 11854 } 11855 memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 11856 if (sax->initialized == XML_SAX2_MAGIC) 11857 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 11858 else 11859 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 11860 if (user_data != NULL) 11861 ctxt->userData = user_data; 11862 } 11863 11864 inputStream = xmlNewIOInputStream(ctxt, buf, enc); 11865 if (inputStream == NULL) { 11866 xmlFreeParserCtxt(ctxt); 11867 return(NULL); 11868 } 11869 inputPush(ctxt, inputStream); 11870 11871 return(ctxt); 11872} 11873 11874#ifdef LIBXML_VALID_ENABLED 11875/************************************************************************ 11876 * * 11877 * Front ends when parsing a DTD * 11878 * * 11879 ************************************************************************/ 11880 11881/** 11882 * xmlIOParseDTD: 11883 * @sax: the SAX handler block or NULL 11884 * @input: an Input Buffer 11885 * @enc: the charset encoding if known 11886 * 11887 * Load and parse a DTD 11888 * 11889 * Returns the resulting xmlDtdPtr or NULL in case of error. 11890 * @input will be freed by the function in any case. 11891 */ 11892 11893xmlDtdPtr 11894xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, 11895 xmlCharEncoding enc) { 11896 xmlDtdPtr ret = NULL; 11897 xmlParserCtxtPtr ctxt; 11898 xmlParserInputPtr pinput = NULL; 11899 xmlChar start[4]; 11900 11901 if (input == NULL) 11902 return(NULL); 11903 11904 ctxt = xmlNewParserCtxt(); 11905 if (ctxt == NULL) { 11906 xmlFreeParserInputBuffer(input); 11907 return(NULL); 11908 } 11909 11910 /* 11911 * Set-up the SAX context 11912 */ 11913 if (sax != NULL) { 11914 if (ctxt->sax != NULL) 11915 xmlFree(ctxt->sax); 11916 ctxt->sax = sax; 11917 ctxt->userData = ctxt; 11918 } 11919 xmlDetectSAX2(ctxt); 11920 11921 /* 11922 * generate a parser input from the I/O handler 11923 */ 11924 11925 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 11926 if (pinput == NULL) { 11927 if (sax != NULL) ctxt->sax = NULL; 11928 xmlFreeParserInputBuffer(input); 11929 xmlFreeParserCtxt(ctxt); 11930 return(NULL); 11931 } 11932 11933 /* 11934 * plug some encoding conversion routines here. 11935 */ 11936 if (xmlPushInput(ctxt, pinput) < 0) { 11937 if (sax != NULL) ctxt->sax = NULL; 11938 xmlFreeParserCtxt(ctxt); 11939 return(NULL); 11940 } 11941 if (enc != XML_CHAR_ENCODING_NONE) { 11942 xmlSwitchEncoding(ctxt, enc); 11943 } 11944 11945 pinput->filename = NULL; 11946 pinput->line = 1; 11947 pinput->col = 1; 11948 pinput->base = ctxt->input->cur; 11949 pinput->cur = ctxt->input->cur; 11950 pinput->free = NULL; 11951 11952 /* 11953 * let's parse that entity knowing it's an external subset. 11954 */ 11955 ctxt->inSubset = 2; 11956 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 11957 if (ctxt->myDoc == NULL) { 11958 xmlErrMemory(ctxt, "New Doc failed"); 11959 return(NULL); 11960 } 11961 ctxt->myDoc->properties = XML_DOC_INTERNAL; 11962 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 11963 BAD_CAST "none", BAD_CAST "none"); 11964 11965 if ((enc == XML_CHAR_ENCODING_NONE) && 11966 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 11967 /* 11968 * Get the 4 first bytes and decode the charset 11969 * if enc != XML_CHAR_ENCODING_NONE 11970 * plug some encoding conversion routines. 11971 */ 11972 start[0] = RAW; 11973 start[1] = NXT(1); 11974 start[2] = NXT(2); 11975 start[3] = NXT(3); 11976 enc = xmlDetectCharEncoding(start, 4); 11977 if (enc != XML_CHAR_ENCODING_NONE) { 11978 xmlSwitchEncoding(ctxt, enc); 11979 } 11980 } 11981 11982 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none"); 11983 11984 if (ctxt->myDoc != NULL) { 11985 if (ctxt->wellFormed) { 11986 ret = ctxt->myDoc->extSubset; 11987 ctxt->myDoc->extSubset = NULL; 11988 if (ret != NULL) { 11989 xmlNodePtr tmp; 11990 11991 ret->doc = NULL; 11992 tmp = ret->children; 11993 while (tmp != NULL) { 11994 tmp->doc = NULL; 11995 tmp = tmp->next; 11996 } 11997 } 11998 } else { 11999 ret = NULL; 12000 } 12001 xmlFreeDoc(ctxt->myDoc); 12002 ctxt->myDoc = NULL; 12003 } 12004 if (sax != NULL) ctxt->sax = NULL; 12005 xmlFreeParserCtxt(ctxt); 12006 12007 return(ret); 12008} 12009 12010/** 12011 * xmlSAXParseDTD: 12012 * @sax: the SAX handler block 12013 * @ExternalID: a NAME* containing the External ID of the DTD 12014 * @SystemID: a NAME* containing the URL to the DTD 12015 * 12016 * Load and parse an external subset. 12017 * 12018 * Returns the resulting xmlDtdPtr or NULL in case of error. 12019 */ 12020 12021xmlDtdPtr 12022xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, 12023 const xmlChar *SystemID) { 12024 xmlDtdPtr ret = NULL; 12025 xmlParserCtxtPtr ctxt; 12026 xmlParserInputPtr input = NULL; 12027 xmlCharEncoding enc; 12028 xmlChar* systemIdCanonic; 12029 12030 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL); 12031 12032 ctxt = xmlNewParserCtxt(); 12033 if (ctxt == NULL) { 12034 return(NULL); 12035 } 12036 12037 /* 12038 * Set-up the SAX context 12039 */ 12040 if (sax != NULL) { 12041 if (ctxt->sax != NULL) 12042 xmlFree(ctxt->sax); 12043 ctxt->sax = sax; 12044 ctxt->userData = ctxt; 12045 } 12046 12047 /* 12048 * Canonicalise the system ID 12049 */ 12050 systemIdCanonic = xmlCanonicPath(SystemID); 12051 if ((SystemID != NULL) && (systemIdCanonic == NULL)) { 12052 xmlFreeParserCtxt(ctxt); 12053 return(NULL); 12054 } 12055 12056 /* 12057 * Ask the Entity resolver to load the damn thing 12058 */ 12059 12060 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL)) 12061 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, 12062 systemIdCanonic); 12063 if (input == NULL) { 12064 if (sax != NULL) ctxt->sax = NULL; 12065 xmlFreeParserCtxt(ctxt); 12066 if (systemIdCanonic != NULL) 12067 xmlFree(systemIdCanonic); 12068 return(NULL); 12069 } 12070 12071 /* 12072 * plug some encoding conversion routines here. 12073 */ 12074 if (xmlPushInput(ctxt, input) < 0) { 12075 if (sax != NULL) ctxt->sax = NULL; 12076 xmlFreeParserCtxt(ctxt); 12077 if (systemIdCanonic != NULL) 12078 xmlFree(systemIdCanonic); 12079 return(NULL); 12080 } 12081 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 12082 enc = xmlDetectCharEncoding(ctxt->input->cur, 4); 12083 xmlSwitchEncoding(ctxt, enc); 12084 } 12085 12086 if (input->filename == NULL) 12087 input->filename = (char *) systemIdCanonic; 12088 else 12089 xmlFree(systemIdCanonic); 12090 input->line = 1; 12091 input->col = 1; 12092 input->base = ctxt->input->cur; 12093 input->cur = ctxt->input->cur; 12094 input->free = NULL; 12095 12096 /* 12097 * let's parse that entity knowing it's an external subset. 12098 */ 12099 ctxt->inSubset = 2; 12100 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 12101 if (ctxt->myDoc == NULL) { 12102 xmlErrMemory(ctxt, "New Doc failed"); 12103 if (sax != NULL) ctxt->sax = NULL; 12104 xmlFreeParserCtxt(ctxt); 12105 return(NULL); 12106 } 12107 ctxt->myDoc->properties = XML_DOC_INTERNAL; 12108 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 12109 ExternalID, SystemID); 12110 xmlParseExternalSubset(ctxt, ExternalID, SystemID); 12111 12112 if (ctxt->myDoc != NULL) { 12113 if (ctxt->wellFormed) { 12114 ret = ctxt->myDoc->extSubset; 12115 ctxt->myDoc->extSubset = NULL; 12116 if (ret != NULL) { 12117 xmlNodePtr tmp; 12118 12119 ret->doc = NULL; 12120 tmp = ret->children; 12121 while (tmp != NULL) { 12122 tmp->doc = NULL; 12123 tmp = tmp->next; 12124 } 12125 } 12126 } else { 12127 ret = NULL; 12128 } 12129 xmlFreeDoc(ctxt->myDoc); 12130 ctxt->myDoc = NULL; 12131 } 12132 if (sax != NULL) ctxt->sax = NULL; 12133 xmlFreeParserCtxt(ctxt); 12134 12135 return(ret); 12136} 12137 12138 12139/** 12140 * xmlParseDTD: 12141 * @ExternalID: a NAME* containing the External ID of the DTD 12142 * @SystemID: a NAME* containing the URL to the DTD 12143 * 12144 * Load and parse an external subset. 12145 * 12146 * Returns the resulting xmlDtdPtr or NULL in case of error. 12147 */ 12148 12149xmlDtdPtr 12150xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) { 12151 return(xmlSAXParseDTD(NULL, ExternalID, SystemID)); 12152} 12153#endif /* LIBXML_VALID_ENABLED */ 12154 12155/************************************************************************ 12156 * * 12157 * Front ends when parsing an Entity * 12158 * * 12159 ************************************************************************/ 12160 12161/** 12162 * xmlParseCtxtExternalEntity: 12163 * @ctx: the existing parsing context 12164 * @URL: the URL for the entity to load 12165 * @ID: the System ID for the entity to load 12166 * @lst: the return value for the set of parsed nodes 12167 * 12168 * Parse an external general entity within an existing parsing context 12169 * An external general parsed entity is well-formed if it matches the 12170 * production labeled extParsedEnt. 12171 * 12172 * [78] extParsedEnt ::= TextDecl? content 12173 * 12174 * Returns 0 if the entity is well formed, -1 in case of args problem and 12175 * the parser error code otherwise 12176 */ 12177 12178int 12179xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, 12180 const xmlChar *ID, xmlNodePtr *lst) { 12181 xmlParserCtxtPtr ctxt; 12182 xmlDocPtr newDoc; 12183 xmlNodePtr newRoot; 12184 xmlSAXHandlerPtr oldsax = NULL; 12185 int ret = 0; 12186 xmlChar start[4]; 12187 xmlCharEncoding enc; 12188 12189 if (ctx == NULL) return(-1); 12190 12191 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) || 12192 (ctx->depth > 1024)) { 12193 return(XML_ERR_ENTITY_LOOP); 12194 } 12195 12196 if (lst != NULL) 12197 *lst = NULL; 12198 if ((URL == NULL) && (ID == NULL)) 12199 return(-1); 12200 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */ 12201 return(-1); 12202 12203 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx); 12204 if (ctxt == NULL) { 12205 return(-1); 12206 } 12207 12208 oldsax = ctxt->sax; 12209 ctxt->sax = ctx->sax; 12210 xmlDetectSAX2(ctxt); 12211 newDoc = xmlNewDoc(BAD_CAST "1.0"); 12212 if (newDoc == NULL) { 12213 xmlFreeParserCtxt(ctxt); 12214 return(-1); 12215 } 12216 newDoc->properties = XML_DOC_INTERNAL; 12217 if (ctx->myDoc->dict) { 12218 newDoc->dict = ctx->myDoc->dict; 12219 xmlDictReference(newDoc->dict); 12220 } 12221 if (ctx->myDoc != NULL) { 12222 newDoc->intSubset = ctx->myDoc->intSubset; 12223 newDoc->extSubset = ctx->myDoc->extSubset; 12224 } 12225 if (ctx->myDoc->URL != NULL) { 12226 newDoc->URL = xmlStrdup(ctx->myDoc->URL); 12227 } 12228 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 12229 if (newRoot == NULL) { 12230 ctxt->sax = oldsax; 12231 xmlFreeParserCtxt(ctxt); 12232 newDoc->intSubset = NULL; 12233 newDoc->extSubset = NULL; 12234 xmlFreeDoc(newDoc); 12235 return(-1); 12236 } 12237 xmlAddChild((xmlNodePtr) newDoc, newRoot); 12238 nodePush(ctxt, newDoc->children); 12239 if (ctx->myDoc == NULL) { 12240 ctxt->myDoc = newDoc; 12241 } else { 12242 ctxt->myDoc = ctx->myDoc; 12243 newDoc->children->doc = ctx->myDoc; 12244 } 12245 12246 /* 12247 * Get the 4 first bytes and decode the charset 12248 * if enc != XML_CHAR_ENCODING_NONE 12249 * plug some encoding conversion routines. 12250 */ 12251 GROW 12252 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 12253 start[0] = RAW; 12254 start[1] = NXT(1); 12255 start[2] = NXT(2); 12256 start[3] = NXT(3); 12257 enc = xmlDetectCharEncoding(start, 4); 12258 if (enc != XML_CHAR_ENCODING_NONE) { 12259 xmlSwitchEncoding(ctxt, enc); 12260 } 12261 } 12262 12263 /* 12264 * Parse a possible text declaration first 12265 */ 12266 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 12267 xmlParseTextDecl(ctxt); 12268 /* 12269 * An XML-1.0 document can't reference an entity not XML-1.0 12270 */ 12271 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) && 12272 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) { 12273 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH, 12274 "Version mismatch between document and entity\n"); 12275 } 12276 } 12277 12278 /* 12279 * Doing validity checking on chunk doesn't make sense 12280 */ 12281 ctxt->instate = XML_PARSER_CONTENT; 12282 ctxt->validate = ctx->validate; 12283 ctxt->valid = ctx->valid; 12284 ctxt->loadsubset = ctx->loadsubset; 12285 ctxt->depth = ctx->depth + 1; 12286 ctxt->replaceEntities = ctx->replaceEntities; 12287 if (ctxt->validate) { 12288 ctxt->vctxt.error = ctx->vctxt.error; 12289 ctxt->vctxt.warning = ctx->vctxt.warning; 12290 } else { 12291 ctxt->vctxt.error = NULL; 12292 ctxt->vctxt.warning = NULL; 12293 } 12294 ctxt->vctxt.nodeTab = NULL; 12295 ctxt->vctxt.nodeNr = 0; 12296 ctxt->vctxt.nodeMax = 0; 12297 ctxt->vctxt.node = NULL; 12298 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 12299 ctxt->dict = ctx->dict; 12300 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 12301 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 12302 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 12303 ctxt->dictNames = ctx->dictNames; 12304 ctxt->attsDefault = ctx->attsDefault; 12305 ctxt->attsSpecial = ctx->attsSpecial; 12306 ctxt->linenumbers = ctx->linenumbers; 12307 12308 xmlParseContent(ctxt); 12309 12310 ctx->validate = ctxt->validate; 12311 ctx->valid = ctxt->valid; 12312 if ((RAW == '<') && (NXT(1) == '/')) { 12313 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12314 } else if (RAW != 0) { 12315 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 12316 } 12317 if (ctxt->node != newDoc->children) { 12318 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12319 } 12320 12321 if (!ctxt->wellFormed) { 12322 if (ctxt->errNo == 0) 12323 ret = 1; 12324 else 12325 ret = ctxt->errNo; 12326 } else { 12327 if (lst != NULL) { 12328 xmlNodePtr cur; 12329 12330 /* 12331 * Return the newly created nodeset after unlinking it from 12332 * they pseudo parent. 12333 */ 12334 cur = newDoc->children->children; 12335 *lst = cur; 12336 while (cur != NULL) { 12337 cur->parent = NULL; 12338 cur = cur->next; 12339 } 12340 newDoc->children->children = NULL; 12341 } 12342 ret = 0; 12343 } 12344 ctxt->sax = oldsax; 12345 ctxt->dict = NULL; 12346 ctxt->attsDefault = NULL; 12347 ctxt->attsSpecial = NULL; 12348 xmlFreeParserCtxt(ctxt); 12349 newDoc->intSubset = NULL; 12350 newDoc->extSubset = NULL; 12351 xmlFreeDoc(newDoc); 12352 12353 return(ret); 12354} 12355 12356/** 12357 * xmlParseExternalEntityPrivate: 12358 * @doc: the document the chunk pertains to 12359 * @oldctxt: the previous parser context if available 12360 * @sax: the SAX handler bloc (possibly NULL) 12361 * @user_data: The user data returned on SAX callbacks (possibly NULL) 12362 * @depth: Used for loop detection, use 0 12363 * @URL: the URL for the entity to load 12364 * @ID: the System ID for the entity to load 12365 * @list: the return value for the set of parsed nodes 12366 * 12367 * Private version of xmlParseExternalEntity() 12368 * 12369 * Returns 0 if the entity is well formed, -1 in case of args problem and 12370 * the parser error code otherwise 12371 */ 12372 12373static xmlParserErrors 12374xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 12375 xmlSAXHandlerPtr sax, 12376 void *user_data, int depth, const xmlChar *URL, 12377 const xmlChar *ID, xmlNodePtr *list) { 12378 xmlParserCtxtPtr ctxt; 12379 xmlDocPtr newDoc; 12380 xmlNodePtr newRoot; 12381 xmlSAXHandlerPtr oldsax = NULL; 12382 xmlParserErrors ret = XML_ERR_OK; 12383 xmlChar start[4]; 12384 xmlCharEncoding enc; 12385 12386 if (((depth > 40) && 12387 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) || 12388 (depth > 1024)) { 12389 return(XML_ERR_ENTITY_LOOP); 12390 } 12391 12392 if (list != NULL) 12393 *list = NULL; 12394 if ((URL == NULL) && (ID == NULL)) 12395 return(XML_ERR_INTERNAL_ERROR); 12396 if (doc == NULL) 12397 return(XML_ERR_INTERNAL_ERROR); 12398 12399 12400 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt); 12401 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 12402 ctxt->userData = ctxt; 12403 if (oldctxt != NULL) { 12404 ctxt->_private = oldctxt->_private; 12405 ctxt->loadsubset = oldctxt->loadsubset; 12406 ctxt->validate = oldctxt->validate; 12407 ctxt->external = oldctxt->external; 12408 ctxt->record_info = oldctxt->record_info; 12409 ctxt->node_seq.maximum = oldctxt->node_seq.maximum; 12410 ctxt->node_seq.length = oldctxt->node_seq.length; 12411 ctxt->node_seq.buffer = oldctxt->node_seq.buffer; 12412 } else { 12413 /* 12414 * Doing validity checking on chunk without context 12415 * doesn't make sense 12416 */ 12417 ctxt->_private = NULL; 12418 ctxt->validate = 0; 12419 ctxt->external = 2; 12420 ctxt->loadsubset = 0; 12421 } 12422 if (sax != NULL) { 12423 oldsax = ctxt->sax; 12424 ctxt->sax = sax; 12425 if (user_data != NULL) 12426 ctxt->userData = user_data; 12427 } 12428 xmlDetectSAX2(ctxt); 12429 newDoc = xmlNewDoc(BAD_CAST "1.0"); 12430 if (newDoc == NULL) { 12431 ctxt->node_seq.maximum = 0; 12432 ctxt->node_seq.length = 0; 12433 ctxt->node_seq.buffer = NULL; 12434 xmlFreeParserCtxt(ctxt); 12435 return(XML_ERR_INTERNAL_ERROR); 12436 } 12437 newDoc->properties = XML_DOC_INTERNAL; 12438 newDoc->intSubset = doc->intSubset; 12439 newDoc->extSubset = doc->extSubset; 12440 newDoc->dict = doc->dict; 12441 xmlDictReference(newDoc->dict); 12442 12443 if (doc->URL != NULL) { 12444 newDoc->URL = xmlStrdup(doc->URL); 12445 } 12446 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 12447 if (newRoot == NULL) { 12448 if (sax != NULL) 12449 ctxt->sax = oldsax; 12450 ctxt->node_seq.maximum = 0; 12451 ctxt->node_seq.length = 0; 12452 ctxt->node_seq.buffer = NULL; 12453 xmlFreeParserCtxt(ctxt); 12454 newDoc->intSubset = NULL; 12455 newDoc->extSubset = NULL; 12456 xmlFreeDoc(newDoc); 12457 return(XML_ERR_INTERNAL_ERROR); 12458 } 12459 xmlAddChild((xmlNodePtr) newDoc, newRoot); 12460 nodePush(ctxt, newDoc->children); 12461 ctxt->myDoc = doc; 12462 newRoot->doc = doc; 12463 12464 /* 12465 * Get the 4 first bytes and decode the charset 12466 * if enc != XML_CHAR_ENCODING_NONE 12467 * plug some encoding conversion routines. 12468 */ 12469 GROW; 12470 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 12471 start[0] = RAW; 12472 start[1] = NXT(1); 12473 start[2] = NXT(2); 12474 start[3] = NXT(3); 12475 enc = xmlDetectCharEncoding(start, 4); 12476 if (enc != XML_CHAR_ENCODING_NONE) { 12477 xmlSwitchEncoding(ctxt, enc); 12478 } 12479 } 12480 12481 /* 12482 * Parse a possible text declaration first 12483 */ 12484 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 12485 xmlParseTextDecl(ctxt); 12486 } 12487 12488 ctxt->instate = XML_PARSER_CONTENT; 12489 ctxt->depth = depth; 12490 12491 xmlParseContent(ctxt); 12492 12493 if ((RAW == '<') && (NXT(1) == '/')) { 12494 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12495 } else if (RAW != 0) { 12496 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 12497 } 12498 if (ctxt->node != newDoc->children) { 12499 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12500 } 12501 12502 if (!ctxt->wellFormed) { 12503 if (ctxt->errNo == 0) 12504 ret = XML_ERR_INTERNAL_ERROR; 12505 else 12506 ret = (xmlParserErrors)ctxt->errNo; 12507 } else { 12508 if (list != NULL) { 12509 xmlNodePtr cur; 12510 12511 /* 12512 * Return the newly created nodeset after unlinking it from 12513 * they pseudo parent. 12514 */ 12515 cur = newDoc->children->children; 12516 *list = cur; 12517 while (cur != NULL) { 12518 cur->parent = NULL; 12519 cur = cur->next; 12520 } 12521 newDoc->children->children = NULL; 12522 } 12523 ret = XML_ERR_OK; 12524 } 12525 12526 /* 12527 * Record in the parent context the number of entities replacement 12528 * done when parsing that reference. 12529 */ 12530 if (oldctxt != NULL) 12531 oldctxt->nbentities += ctxt->nbentities; 12532 12533 /* 12534 * Also record the size of the entity parsed 12535 */ 12536 if (ctxt->input != NULL) { 12537 oldctxt->sizeentities += ctxt->input->consumed; 12538 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base); 12539 } 12540 /* 12541 * And record the last error if any 12542 */ 12543 if (ctxt->lastError.code != XML_ERR_OK) 12544 xmlCopyError(&ctxt->lastError, &oldctxt->lastError); 12545 12546 if (sax != NULL) 12547 ctxt->sax = oldsax; 12548 oldctxt->node_seq.maximum = ctxt->node_seq.maximum; 12549 oldctxt->node_seq.length = ctxt->node_seq.length; 12550 oldctxt->node_seq.buffer = ctxt->node_seq.buffer; 12551 ctxt->node_seq.maximum = 0; 12552 ctxt->node_seq.length = 0; 12553 ctxt->node_seq.buffer = NULL; 12554 xmlFreeParserCtxt(ctxt); 12555 newDoc->intSubset = NULL; 12556 newDoc->extSubset = NULL; 12557 xmlFreeDoc(newDoc); 12558 12559 return(ret); 12560} 12561 12562#ifdef LIBXML_SAX1_ENABLED 12563/** 12564 * xmlParseExternalEntity: 12565 * @doc: the document the chunk pertains to 12566 * @sax: the SAX handler bloc (possibly NULL) 12567 * @user_data: The user data returned on SAX callbacks (possibly NULL) 12568 * @depth: Used for loop detection, use 0 12569 * @URL: the URL for the entity to load 12570 * @ID: the System ID for the entity to load 12571 * @lst: the return value for the set of parsed nodes 12572 * 12573 * Parse an external general entity 12574 * An external general parsed entity is well-formed if it matches the 12575 * production labeled extParsedEnt. 12576 * 12577 * [78] extParsedEnt ::= TextDecl? content 12578 * 12579 * Returns 0 if the entity is well formed, -1 in case of args problem and 12580 * the parser error code otherwise 12581 */ 12582 12583int 12584xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, 12585 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) { 12586 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL, 12587 ID, lst)); 12588} 12589 12590/** 12591 * xmlParseBalancedChunkMemory: 12592 * @doc: the document the chunk pertains to 12593 * @sax: the SAX handler bloc (possibly NULL) 12594 * @user_data: The user data returned on SAX callbacks (possibly NULL) 12595 * @depth: Used for loop detection, use 0 12596 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 12597 * @lst: the return value for the set of parsed nodes 12598 * 12599 * Parse a well-balanced chunk of an XML document 12600 * called by the parser 12601 * The allowed sequence for the Well Balanced Chunk is the one defined by 12602 * the content production in the XML grammar: 12603 * 12604 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 12605 * 12606 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 12607 * the parser error code otherwise 12608 */ 12609 12610int 12611xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax, 12612 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) { 12613 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data, 12614 depth, string, lst, 0 ); 12615} 12616#endif /* LIBXML_SAX1_ENABLED */ 12617 12618/** 12619 * xmlParseBalancedChunkMemoryInternal: 12620 * @oldctxt: the existing parsing context 12621 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 12622 * @user_data: the user data field for the parser context 12623 * @lst: the return value for the set of parsed nodes 12624 * 12625 * 12626 * Parse a well-balanced chunk of an XML document 12627 * called by the parser 12628 * The allowed sequence for the Well Balanced Chunk is the one defined by 12629 * the content production in the XML grammar: 12630 * 12631 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 12632 * 12633 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 12634 * error code otherwise 12635 * 12636 * In case recover is set to 1, the nodelist will not be empty even if 12637 * the parsed chunk is not well balanced. 12638 */ 12639static xmlParserErrors 12640xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 12641 const xmlChar *string, void *user_data, xmlNodePtr *lst) { 12642 xmlParserCtxtPtr ctxt; 12643 xmlDocPtr newDoc = NULL; 12644 xmlNodePtr newRoot; 12645 xmlSAXHandlerPtr oldsax = NULL; 12646 xmlNodePtr content = NULL; 12647 xmlNodePtr last = NULL; 12648 int size; 12649 xmlParserErrors ret = XML_ERR_OK; 12650#ifdef SAX2 12651 int i; 12652#endif 12653 12654 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) || 12655 (oldctxt->depth > 1024)) { 12656 return(XML_ERR_ENTITY_LOOP); 12657 } 12658 12659 12660 if (lst != NULL) 12661 *lst = NULL; 12662 if (string == NULL) 12663 return(XML_ERR_INTERNAL_ERROR); 12664 12665 size = xmlStrlen(string); 12666 12667 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 12668 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 12669 if (user_data != NULL) 12670 ctxt->userData = user_data; 12671 else 12672 ctxt->userData = ctxt; 12673 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 12674 ctxt->dict = oldctxt->dict; 12675 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 12676 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 12677 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 12678 12679#ifdef SAX2 12680 /* propagate namespaces down the entity */ 12681 for (i = 0;i < oldctxt->nsNr;i += 2) { 12682 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]); 12683 } 12684#endif 12685 12686 oldsax = ctxt->sax; 12687 ctxt->sax = oldctxt->sax; 12688 xmlDetectSAX2(ctxt); 12689 ctxt->replaceEntities = oldctxt->replaceEntities; 12690 ctxt->options = oldctxt->options; 12691 12692 ctxt->_private = oldctxt->_private; 12693 if (oldctxt->myDoc == NULL) { 12694 newDoc = xmlNewDoc(BAD_CAST "1.0"); 12695 if (newDoc == NULL) { 12696 ctxt->sax = oldsax; 12697 ctxt->dict = NULL; 12698 xmlFreeParserCtxt(ctxt); 12699 return(XML_ERR_INTERNAL_ERROR); 12700 } 12701 newDoc->properties = XML_DOC_INTERNAL; 12702 newDoc->dict = ctxt->dict; 12703 xmlDictReference(newDoc->dict); 12704 ctxt->myDoc = newDoc; 12705 } else { 12706 ctxt->myDoc = oldctxt->myDoc; 12707 content = ctxt->myDoc->children; 12708 last = ctxt->myDoc->last; 12709 } 12710 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL); 12711 if (newRoot == NULL) { 12712 ctxt->sax = oldsax; 12713 ctxt->dict = NULL; 12714 xmlFreeParserCtxt(ctxt); 12715 if (newDoc != NULL) { 12716 xmlFreeDoc(newDoc); 12717 } 12718 return(XML_ERR_INTERNAL_ERROR); 12719 } 12720 ctxt->myDoc->children = NULL; 12721 ctxt->myDoc->last = NULL; 12722 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot); 12723 nodePush(ctxt, ctxt->myDoc->children); 12724 ctxt->instate = XML_PARSER_CONTENT; 12725 ctxt->depth = oldctxt->depth + 1; 12726 12727 ctxt->validate = 0; 12728 ctxt->loadsubset = oldctxt->loadsubset; 12729 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) { 12730 /* 12731 * ID/IDREF registration will be done in xmlValidateElement below 12732 */ 12733 ctxt->loadsubset |= XML_SKIP_IDS; 12734 } 12735 ctxt->dictNames = oldctxt->dictNames; 12736 ctxt->attsDefault = oldctxt->attsDefault; 12737 ctxt->attsSpecial = oldctxt->attsSpecial; 12738 12739 xmlParseContent(ctxt); 12740 if ((RAW == '<') && (NXT(1) == '/')) { 12741 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12742 } else if (RAW != 0) { 12743 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 12744 } 12745 if (ctxt->node != ctxt->myDoc->children) { 12746 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12747 } 12748 12749 if (!ctxt->wellFormed) { 12750 if (ctxt->errNo == 0) 12751 ret = XML_ERR_INTERNAL_ERROR; 12752 else 12753 ret = (xmlParserErrors)ctxt->errNo; 12754 } else { 12755 ret = XML_ERR_OK; 12756 } 12757 12758 if ((lst != NULL) && (ret == XML_ERR_OK)) { 12759 xmlNodePtr cur; 12760 12761 /* 12762 * Return the newly created nodeset after unlinking it from 12763 * they pseudo parent. 12764 */ 12765 cur = ctxt->myDoc->children->children; 12766 *lst = cur; 12767 while (cur != NULL) { 12768#ifdef LIBXML_VALID_ENABLED 12769 if ((oldctxt->validate) && (oldctxt->wellFormed) && 12770 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) && 12771 (cur->type == XML_ELEMENT_NODE)) { 12772 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt, 12773 oldctxt->myDoc, cur); 12774 } 12775#endif /* LIBXML_VALID_ENABLED */ 12776 cur->parent = NULL; 12777 cur = cur->next; 12778 } 12779 ctxt->myDoc->children->children = NULL; 12780 } 12781 if (ctxt->myDoc != NULL) { 12782 xmlFreeNode(ctxt->myDoc->children); 12783 ctxt->myDoc->children = content; 12784 ctxt->myDoc->last = last; 12785 } 12786 12787 /* 12788 * Record in the parent context the number of entities replacement 12789 * done when parsing that reference. 12790 */ 12791 if (oldctxt != NULL) 12792 oldctxt->nbentities += ctxt->nbentities; 12793 12794 /* 12795 * Also record the last error if any 12796 */ 12797 if (ctxt->lastError.code != XML_ERR_OK) 12798 xmlCopyError(&ctxt->lastError, &oldctxt->lastError); 12799 12800 ctxt->sax = oldsax; 12801 ctxt->dict = NULL; 12802 ctxt->attsDefault = NULL; 12803 ctxt->attsSpecial = NULL; 12804 xmlFreeParserCtxt(ctxt); 12805 if (newDoc != NULL) { 12806 xmlFreeDoc(newDoc); 12807 } 12808 12809 return(ret); 12810} 12811 12812/** 12813 * xmlParseInNodeContext: 12814 * @node: the context node 12815 * @data: the input string 12816 * @datalen: the input string length in bytes 12817 * @options: a combination of xmlParserOption 12818 * @lst: the return value for the set of parsed nodes 12819 * 12820 * Parse a well-balanced chunk of an XML document 12821 * within the context (DTD, namespaces, etc ...) of the given node. 12822 * 12823 * The allowed sequence for the data is a Well Balanced Chunk defined by 12824 * the content production in the XML grammar: 12825 * 12826 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 12827 * 12828 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 12829 * error code otherwise 12830 */ 12831xmlParserErrors 12832xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, 12833 int options, xmlNodePtr *lst) { 12834#ifdef SAX2 12835 xmlParserCtxtPtr ctxt; 12836 xmlDocPtr doc = NULL; 12837 xmlNodePtr fake, cur; 12838 int nsnr = 0; 12839 12840 xmlParserErrors ret = XML_ERR_OK; 12841 12842 /* 12843 * check all input parameters, grab the document 12844 */ 12845 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0)) 12846 return(XML_ERR_INTERNAL_ERROR); 12847 switch (node->type) { 12848 case XML_ELEMENT_NODE: 12849 case XML_ATTRIBUTE_NODE: 12850 case XML_TEXT_NODE: 12851 case XML_CDATA_SECTION_NODE: 12852 case XML_ENTITY_REF_NODE: 12853 case XML_PI_NODE: 12854 case XML_COMMENT_NODE: 12855 case XML_DOCUMENT_NODE: 12856 case XML_HTML_DOCUMENT_NODE: 12857 break; 12858 default: 12859 return(XML_ERR_INTERNAL_ERROR); 12860 12861 } 12862 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) && 12863 (node->type != XML_DOCUMENT_NODE) && 12864 (node->type != XML_HTML_DOCUMENT_NODE)) 12865 node = node->parent; 12866 if (node == NULL) 12867 return(XML_ERR_INTERNAL_ERROR); 12868 if (node->type == XML_ELEMENT_NODE) 12869 doc = node->doc; 12870 else 12871 doc = (xmlDocPtr) node; 12872 if (doc == NULL) 12873 return(XML_ERR_INTERNAL_ERROR); 12874 12875 /* 12876 * allocate a context and set-up everything not related to the 12877 * node position in the tree 12878 */ 12879 if (doc->type == XML_DOCUMENT_NODE) 12880 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen); 12881#ifdef LIBXML_HTML_ENABLED 12882 else if (doc->type == XML_HTML_DOCUMENT_NODE) { 12883 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen); 12884 /* 12885 * When parsing in context, it makes no sense to add implied 12886 * elements like html/body/etc... 12887 */ 12888 options |= HTML_PARSE_NOIMPLIED; 12889 } 12890#endif 12891 else 12892 return(XML_ERR_INTERNAL_ERROR); 12893 12894 if (ctxt == NULL) 12895 return(XML_ERR_NO_MEMORY); 12896 12897 /* 12898 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set. 12899 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict 12900 * we must wait until the last moment to free the original one. 12901 */ 12902 if (doc->dict != NULL) { 12903 if (ctxt->dict != NULL) 12904 xmlDictFree(ctxt->dict); 12905 ctxt->dict = doc->dict; 12906 } else 12907 options |= XML_PARSE_NODICT; 12908 12909 if (doc->encoding != NULL) { 12910 xmlCharEncodingHandlerPtr hdlr; 12911 12912 if (ctxt->encoding != NULL) 12913 xmlFree((xmlChar *) ctxt->encoding); 12914 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding); 12915 12916 hdlr = xmlFindCharEncodingHandler(doc->encoding); 12917 if (hdlr != NULL) { 12918 xmlSwitchToEncoding(ctxt, hdlr); 12919 } else { 12920 return(XML_ERR_UNSUPPORTED_ENCODING); 12921 } 12922 } 12923 12924 xmlCtxtUseOptionsInternal(ctxt, options, NULL); 12925 xmlDetectSAX2(ctxt); 12926 ctxt->myDoc = doc; 12927 12928 fake = xmlNewComment(NULL); 12929 if (fake == NULL) { 12930 xmlFreeParserCtxt(ctxt); 12931 return(XML_ERR_NO_MEMORY); 12932 } 12933 xmlAddChild(node, fake); 12934 12935 if (node->type == XML_ELEMENT_NODE) { 12936 nodePush(ctxt, node); 12937 /* 12938 * initialize the SAX2 namespaces stack 12939 */ 12940 cur = node; 12941 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) { 12942 xmlNsPtr ns = cur->nsDef; 12943 const xmlChar *iprefix, *ihref; 12944 12945 while (ns != NULL) { 12946 if (ctxt->dict) { 12947 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1); 12948 ihref = xmlDictLookup(ctxt->dict, ns->href, -1); 12949 } else { 12950 iprefix = ns->prefix; 12951 ihref = ns->href; 12952 } 12953 12954 if (xmlGetNamespace(ctxt, iprefix) == NULL) { 12955 nsPush(ctxt, iprefix, ihref); 12956 nsnr++; 12957 } 12958 ns = ns->next; 12959 } 12960 cur = cur->parent; 12961 } 12962 ctxt->instate = XML_PARSER_CONTENT; 12963 } 12964 12965 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) { 12966 /* 12967 * ID/IDREF registration will be done in xmlValidateElement below 12968 */ 12969 ctxt->loadsubset |= XML_SKIP_IDS; 12970 } 12971 12972#ifdef LIBXML_HTML_ENABLED 12973 if (doc->type == XML_HTML_DOCUMENT_NODE) 12974 __htmlParseContent(ctxt); 12975 else 12976#endif 12977 xmlParseContent(ctxt); 12978 12979 nsPop(ctxt, nsnr); 12980 if ((RAW == '<') && (NXT(1) == '/')) { 12981 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12982 } else if (RAW != 0) { 12983 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 12984 } 12985 if ((ctxt->node != NULL) && (ctxt->node != node)) { 12986 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12987 ctxt->wellFormed = 0; 12988 } 12989 12990 if (!ctxt->wellFormed) { 12991 if (ctxt->errNo == 0) 12992 ret = XML_ERR_INTERNAL_ERROR; 12993 else 12994 ret = (xmlParserErrors)ctxt->errNo; 12995 } else { 12996 ret = XML_ERR_OK; 12997 } 12998 12999 /* 13000 * Return the newly created nodeset after unlinking it from 13001 * the pseudo sibling. 13002 */ 13003 13004 cur = fake->next; 13005 fake->next = NULL; 13006 node->last = fake; 13007 13008 if (cur != NULL) { 13009 cur->prev = NULL; 13010 } 13011 13012 *lst = cur; 13013 13014 while (cur != NULL) { 13015 cur->parent = NULL; 13016 cur = cur->next; 13017 } 13018 13019 xmlUnlinkNode(fake); 13020 xmlFreeNode(fake); 13021 13022 13023 if (ret != XML_ERR_OK) { 13024 xmlFreeNodeList(*lst); 13025 *lst = NULL; 13026 } 13027 13028 if (doc->dict != NULL) 13029 ctxt->dict = NULL; 13030 xmlFreeParserCtxt(ctxt); 13031 13032 return(ret); 13033#else /* !SAX2 */ 13034 return(XML_ERR_INTERNAL_ERROR); 13035#endif 13036} 13037 13038#ifdef LIBXML_SAX1_ENABLED 13039/** 13040 * xmlParseBalancedChunkMemoryRecover: 13041 * @doc: the document the chunk pertains to 13042 * @sax: the SAX handler bloc (possibly NULL) 13043 * @user_data: The user data returned on SAX callbacks (possibly NULL) 13044 * @depth: Used for loop detection, use 0 13045 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 13046 * @lst: the return value for the set of parsed nodes 13047 * @recover: return nodes even if the data is broken (use 0) 13048 * 13049 * 13050 * Parse a well-balanced chunk of an XML document 13051 * called by the parser 13052 * The allowed sequence for the Well Balanced Chunk is the one defined by 13053 * the content production in the XML grammar: 13054 * 13055 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 13056 * 13057 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 13058 * the parser error code otherwise 13059 * 13060 * In case recover is set to 1, the nodelist will not be empty even if 13061 * the parsed chunk is not well balanced, assuming the parsing succeeded to 13062 * some extent. 13063 */ 13064int 13065xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, 13066 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst, 13067 int recover) { 13068 xmlParserCtxtPtr ctxt; 13069 xmlDocPtr newDoc; 13070 xmlSAXHandlerPtr oldsax = NULL; 13071 xmlNodePtr content, newRoot; 13072 int size; 13073 int ret = 0; 13074 13075 if (depth > 40) { 13076 return(XML_ERR_ENTITY_LOOP); 13077 } 13078 13079 13080 if (lst != NULL) 13081 *lst = NULL; 13082 if (string == NULL) 13083 return(-1); 13084 13085 size = xmlStrlen(string); 13086 13087 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 13088 if (ctxt == NULL) return(-1); 13089 ctxt->userData = ctxt; 13090 if (sax != NULL) { 13091 oldsax = ctxt->sax; 13092 ctxt->sax = sax; 13093 if (user_data != NULL) 13094 ctxt->userData = user_data; 13095 } 13096 newDoc = xmlNewDoc(BAD_CAST "1.0"); 13097 if (newDoc == NULL) { 13098 xmlFreeParserCtxt(ctxt); 13099 return(-1); 13100 } 13101 newDoc->properties = XML_DOC_INTERNAL; 13102 if ((doc != NULL) && (doc->dict != NULL)) { 13103 xmlDictFree(ctxt->dict); 13104 ctxt->dict = doc->dict; 13105 xmlDictReference(ctxt->dict); 13106 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 13107 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 13108 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 13109 ctxt->dictNames = 1; 13110 } else { 13111 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL); 13112 } 13113 if (doc != NULL) { 13114 newDoc->intSubset = doc->intSubset; 13115 newDoc->extSubset = doc->extSubset; 13116 } 13117 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 13118 if (newRoot == NULL) { 13119 if (sax != NULL) 13120 ctxt->sax = oldsax; 13121 xmlFreeParserCtxt(ctxt); 13122 newDoc->intSubset = NULL; 13123 newDoc->extSubset = NULL; 13124 xmlFreeDoc(newDoc); 13125 return(-1); 13126 } 13127 xmlAddChild((xmlNodePtr) newDoc, newRoot); 13128 nodePush(ctxt, newRoot); 13129 if (doc == NULL) { 13130 ctxt->myDoc = newDoc; 13131 } else { 13132 ctxt->myDoc = newDoc; 13133 newDoc->children->doc = doc; 13134 /* Ensure that doc has XML spec namespace */ 13135 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE); 13136 newDoc->oldNs = doc->oldNs; 13137 } 13138 ctxt->instate = XML_PARSER_CONTENT; 13139 ctxt->depth = depth; 13140 13141 /* 13142 * Doing validity checking on chunk doesn't make sense 13143 */ 13144 ctxt->validate = 0; 13145 ctxt->loadsubset = 0; 13146 xmlDetectSAX2(ctxt); 13147 13148 if ( doc != NULL ){ 13149 content = doc->children; 13150 doc->children = NULL; 13151 xmlParseContent(ctxt); 13152 doc->children = content; 13153 } 13154 else { 13155 xmlParseContent(ctxt); 13156 } 13157 if ((RAW == '<') && (NXT(1) == '/')) { 13158 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13159 } else if (RAW != 0) { 13160 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 13161 } 13162 if (ctxt->node != newDoc->children) { 13163 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 13164 } 13165 13166 if (!ctxt->wellFormed) { 13167 if (ctxt->errNo == 0) 13168 ret = 1; 13169 else 13170 ret = ctxt->errNo; 13171 } else { 13172 ret = 0; 13173 } 13174 13175 if ((lst != NULL) && ((ret == 0) || (recover == 1))) { 13176 xmlNodePtr cur; 13177 13178 /* 13179 * Return the newly created nodeset after unlinking it from 13180 * they pseudo parent. 13181 */ 13182 cur = newDoc->children->children; 13183 *lst = cur; 13184 while (cur != NULL) { 13185 xmlSetTreeDoc(cur, doc); 13186 cur->parent = NULL; 13187 cur = cur->next; 13188 } 13189 newDoc->children->children = NULL; 13190 } 13191 13192 if (sax != NULL) 13193 ctxt->sax = oldsax; 13194 xmlFreeParserCtxt(ctxt); 13195 newDoc->intSubset = NULL; 13196 newDoc->extSubset = NULL; 13197 newDoc->oldNs = NULL; 13198 xmlFreeDoc(newDoc); 13199 13200 return(ret); 13201} 13202 13203/** 13204 * xmlSAXParseEntity: 13205 * @sax: the SAX handler block 13206 * @filename: the filename 13207 * 13208 * parse an XML external entity out of context and build a tree. 13209 * It use the given SAX function block to handle the parsing callback. 13210 * If sax is NULL, fallback to the default DOM tree building routines. 13211 * 13212 * [78] extParsedEnt ::= TextDecl? content 13213 * 13214 * This correspond to a "Well Balanced" chunk 13215 * 13216 * Returns the resulting document tree 13217 */ 13218 13219xmlDocPtr 13220xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) { 13221 xmlDocPtr ret; 13222 xmlParserCtxtPtr ctxt; 13223 13224 ctxt = xmlCreateFileParserCtxt(filename); 13225 if (ctxt == NULL) { 13226 return(NULL); 13227 } 13228 if (sax != NULL) { 13229 if (ctxt->sax != NULL) 13230 xmlFree(ctxt->sax); 13231 ctxt->sax = sax; 13232 ctxt->userData = NULL; 13233 } 13234 13235 xmlParseExtParsedEnt(ctxt); 13236 13237 if (ctxt->wellFormed) 13238 ret = ctxt->myDoc; 13239 else { 13240 ret = NULL; 13241 xmlFreeDoc(ctxt->myDoc); 13242 ctxt->myDoc = NULL; 13243 } 13244 if (sax != NULL) 13245 ctxt->sax = NULL; 13246 xmlFreeParserCtxt(ctxt); 13247 13248 return(ret); 13249} 13250 13251/** 13252 * xmlParseEntity: 13253 * @filename: the filename 13254 * 13255 * parse an XML external entity out of context and build a tree. 13256 * 13257 * [78] extParsedEnt ::= TextDecl? content 13258 * 13259 * This correspond to a "Well Balanced" chunk 13260 * 13261 * Returns the resulting document tree 13262 */ 13263 13264xmlDocPtr 13265xmlParseEntity(const char *filename) { 13266 return(xmlSAXParseEntity(NULL, filename)); 13267} 13268#endif /* LIBXML_SAX1_ENABLED */ 13269 13270/** 13271 * xmlCreateEntityParserCtxtInternal: 13272 * @URL: the entity URL 13273 * @ID: the entity PUBLIC ID 13274 * @base: a possible base for the target URI 13275 * @pctx: parser context used to set options on new context 13276 * 13277 * Create a parser context for an external entity 13278 * Automatic support for ZLIB/Compress compressed document is provided 13279 * by default if found at compile-time. 13280 * 13281 * Returns the new parser context or NULL 13282 */ 13283static xmlParserCtxtPtr 13284xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, 13285 const xmlChar *base, xmlParserCtxtPtr pctx) { 13286 xmlParserCtxtPtr ctxt; 13287 xmlParserInputPtr inputStream; 13288 char *directory = NULL; 13289 xmlChar *uri; 13290 13291 ctxt = xmlNewParserCtxt(); 13292 if (ctxt == NULL) { 13293 return(NULL); 13294 } 13295 13296 if (pctx != NULL) { 13297 ctxt->options = pctx->options; 13298 ctxt->_private = pctx->_private; 13299 } 13300 13301 uri = xmlBuildURI(URL, base); 13302 13303 if (uri == NULL) { 13304 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); 13305 if (inputStream == NULL) { 13306 xmlFreeParserCtxt(ctxt); 13307 return(NULL); 13308 } 13309 13310 inputPush(ctxt, inputStream); 13311 13312 if ((ctxt->directory == NULL) && (directory == NULL)) 13313 directory = xmlParserGetDirectory((char *)URL); 13314 if ((ctxt->directory == NULL) && (directory != NULL)) 13315 ctxt->directory = directory; 13316 } else { 13317 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt); 13318 if (inputStream == NULL) { 13319 xmlFree(uri); 13320 xmlFreeParserCtxt(ctxt); 13321 return(NULL); 13322 } 13323 13324 inputPush(ctxt, inputStream); 13325 13326 if ((ctxt->directory == NULL) && (directory == NULL)) 13327 directory = xmlParserGetDirectory((char *)uri); 13328 if ((ctxt->directory == NULL) && (directory != NULL)) 13329 ctxt->directory = directory; 13330 xmlFree(uri); 13331 } 13332 return(ctxt); 13333} 13334 13335/** 13336 * xmlCreateEntityParserCtxt: 13337 * @URL: the entity URL 13338 * @ID: the entity PUBLIC ID 13339 * @base: a possible base for the target URI 13340 * 13341 * Create a parser context for an external entity 13342 * Automatic support for ZLIB/Compress compressed document is provided 13343 * by default if found at compile-time. 13344 * 13345 * Returns the new parser context or NULL 13346 */ 13347xmlParserCtxtPtr 13348xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, 13349 const xmlChar *base) { 13350 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL); 13351 13352} 13353 13354/************************************************************************ 13355 * * 13356 * Front ends when parsing from a file * 13357 * * 13358 ************************************************************************/ 13359 13360/** 13361 * xmlCreateURLParserCtxt: 13362 * @filename: the filename or URL 13363 * @options: a combination of xmlParserOption 13364 * 13365 * Create a parser context for a file or URL content. 13366 * Automatic support for ZLIB/Compress compressed document is provided 13367 * by default if found at compile-time and for file accesses 13368 * 13369 * Returns the new parser context or NULL 13370 */ 13371xmlParserCtxtPtr 13372xmlCreateURLParserCtxt(const char *filename, int options) 13373{ 13374 xmlParserCtxtPtr ctxt; 13375 xmlParserInputPtr inputStream; 13376 char *directory = NULL; 13377 13378 ctxt = xmlNewParserCtxt(); 13379 if (ctxt == NULL) { 13380 xmlErrMemory(NULL, "cannot allocate parser context"); 13381 return(NULL); 13382 } 13383 13384 if (options) 13385 xmlCtxtUseOptionsInternal(ctxt, options, NULL); 13386 ctxt->linenumbers = 1; 13387 13388 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt); 13389 if (inputStream == NULL) { 13390 xmlFreeParserCtxt(ctxt); 13391 return(NULL); 13392 } 13393 13394 inputPush(ctxt, inputStream); 13395 if ((ctxt->directory == NULL) && (directory == NULL)) 13396 directory = xmlParserGetDirectory(filename); 13397 if ((ctxt->directory == NULL) && (directory != NULL)) 13398 ctxt->directory = directory; 13399 13400 return(ctxt); 13401} 13402 13403/** 13404 * xmlCreateFileParserCtxt: 13405 * @filename: the filename 13406 * 13407 * Create a parser context for a file content. 13408 * Automatic support for ZLIB/Compress compressed document is provided 13409 * by default if found at compile-time. 13410 * 13411 * Returns the new parser context or NULL 13412 */ 13413xmlParserCtxtPtr 13414xmlCreateFileParserCtxt(const char *filename) 13415{ 13416 return(xmlCreateURLParserCtxt(filename, 0)); 13417} 13418 13419#ifdef LIBXML_SAX1_ENABLED 13420/** 13421 * xmlSAXParseFileWithData: 13422 * @sax: the SAX handler block 13423 * @filename: the filename 13424 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 13425 * documents 13426 * @data: the userdata 13427 * 13428 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 13429 * compressed document is provided by default if found at compile-time. 13430 * It use the given SAX function block to handle the parsing callback. 13431 * If sax is NULL, fallback to the default DOM tree building routines. 13432 * 13433 * User data (void *) is stored within the parser context in the 13434 * context's _private member, so it is available nearly everywhere in libxml 13435 * 13436 * Returns the resulting document tree 13437 */ 13438 13439xmlDocPtr 13440xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename, 13441 int recovery, void *data) { 13442 xmlDocPtr ret; 13443 xmlParserCtxtPtr ctxt; 13444 13445 xmlInitParser(); 13446 13447 ctxt = xmlCreateFileParserCtxt(filename); 13448 if (ctxt == NULL) { 13449 return(NULL); 13450 } 13451 if (sax != NULL) { 13452 if (ctxt->sax != NULL) 13453 xmlFree(ctxt->sax); 13454 ctxt->sax = sax; 13455 } 13456 xmlDetectSAX2(ctxt); 13457 if (data!=NULL) { 13458 ctxt->_private = data; 13459 } 13460 13461 if (ctxt->directory == NULL) 13462 ctxt->directory = xmlParserGetDirectory(filename); 13463 13464 ctxt->recovery = recovery; 13465 13466 xmlParseDocument(ctxt); 13467 13468 if ((ctxt->wellFormed) || recovery) { 13469 ret = ctxt->myDoc; 13470 if (ret != NULL) { 13471 if (ctxt->input->buf->compressed > 0) 13472 ret->compression = 9; 13473 else 13474 ret->compression = ctxt->input->buf->compressed; 13475 } 13476 } 13477 else { 13478 ret = NULL; 13479 xmlFreeDoc(ctxt->myDoc); 13480 ctxt->myDoc = NULL; 13481 } 13482 if (sax != NULL) 13483 ctxt->sax = NULL; 13484 xmlFreeParserCtxt(ctxt); 13485 13486 return(ret); 13487} 13488 13489/** 13490 * xmlSAXParseFile: 13491 * @sax: the SAX handler block 13492 * @filename: the filename 13493 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 13494 * documents 13495 * 13496 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 13497 * compressed document is provided by default if found at compile-time. 13498 * It use the given SAX function block to handle the parsing callback. 13499 * If sax is NULL, fallback to the default DOM tree building routines. 13500 * 13501 * Returns the resulting document tree 13502 */ 13503 13504xmlDocPtr 13505xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename, 13506 int recovery) { 13507 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL)); 13508} 13509 13510/** 13511 * xmlRecoverDoc: 13512 * @cur: a pointer to an array of xmlChar 13513 * 13514 * parse an XML in-memory document and build a tree. 13515 * In the case the document is not Well Formed, a attempt to build a 13516 * tree is tried anyway 13517 * 13518 * Returns the resulting document tree or NULL in case of failure 13519 */ 13520 13521xmlDocPtr 13522xmlRecoverDoc(const xmlChar *cur) { 13523 return(xmlSAXParseDoc(NULL, cur, 1)); 13524} 13525 13526/** 13527 * xmlParseFile: 13528 * @filename: the filename 13529 * 13530 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 13531 * compressed document is provided by default if found at compile-time. 13532 * 13533 * Returns the resulting document tree if the file was wellformed, 13534 * NULL otherwise. 13535 */ 13536 13537xmlDocPtr 13538xmlParseFile(const char *filename) { 13539 return(xmlSAXParseFile(NULL, filename, 0)); 13540} 13541 13542/** 13543 * xmlRecoverFile: 13544 * @filename: the filename 13545 * 13546 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 13547 * compressed document is provided by default if found at compile-time. 13548 * In the case the document is not Well Formed, it attempts to build 13549 * a tree anyway 13550 * 13551 * Returns the resulting document tree or NULL in case of failure 13552 */ 13553 13554xmlDocPtr 13555xmlRecoverFile(const char *filename) { 13556 return(xmlSAXParseFile(NULL, filename, 1)); 13557} 13558 13559 13560/** 13561 * xmlSetupParserForBuffer: 13562 * @ctxt: an XML parser context 13563 * @buffer: a xmlChar * buffer 13564 * @filename: a file name 13565 * 13566 * Setup the parser context to parse a new buffer; Clears any prior 13567 * contents from the parser context. The buffer parameter must not be 13568 * NULL, but the filename parameter can be 13569 */ 13570void 13571xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer, 13572 const char* filename) 13573{ 13574 xmlParserInputPtr input; 13575 13576 if ((ctxt == NULL) || (buffer == NULL)) 13577 return; 13578 13579 input = xmlNewInputStream(ctxt); 13580 if (input == NULL) { 13581 xmlErrMemory(NULL, "parsing new buffer: out of memory\n"); 13582 xmlClearParserCtxt(ctxt); 13583 return; 13584 } 13585 13586 xmlClearParserCtxt(ctxt); 13587 if (filename != NULL) 13588 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename); 13589 input->base = buffer; 13590 input->cur = buffer; 13591 input->end = &buffer[xmlStrlen(buffer)]; 13592 inputPush(ctxt, input); 13593} 13594 13595/** 13596 * xmlSAXUserParseFile: 13597 * @sax: a SAX handler 13598 * @user_data: The user data returned on SAX callbacks 13599 * @filename: a file name 13600 * 13601 * parse an XML file and call the given SAX handler routines. 13602 * Automatic support for ZLIB/Compress compressed document is provided 13603 * 13604 * Returns 0 in case of success or a error number otherwise 13605 */ 13606int 13607xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, 13608 const char *filename) { 13609 int ret = 0; 13610 xmlParserCtxtPtr ctxt; 13611 13612 ctxt = xmlCreateFileParserCtxt(filename); 13613 if (ctxt == NULL) return -1; 13614 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 13615 xmlFree(ctxt->sax); 13616 ctxt->sax = sax; 13617 xmlDetectSAX2(ctxt); 13618 13619 if (user_data != NULL) 13620 ctxt->userData = user_data; 13621 13622 xmlParseDocument(ctxt); 13623 13624 if (ctxt->wellFormed) 13625 ret = 0; 13626 else { 13627 if (ctxt->errNo != 0) 13628 ret = ctxt->errNo; 13629 else 13630 ret = -1; 13631 } 13632 if (sax != NULL) 13633 ctxt->sax = NULL; 13634 if (ctxt->myDoc != NULL) { 13635 xmlFreeDoc(ctxt->myDoc); 13636 ctxt->myDoc = NULL; 13637 } 13638 xmlFreeParserCtxt(ctxt); 13639 13640 return ret; 13641} 13642#endif /* LIBXML_SAX1_ENABLED */ 13643 13644/************************************************************************ 13645 * * 13646 * Front ends when parsing from memory * 13647 * * 13648 ************************************************************************/ 13649 13650/** 13651 * xmlCreateMemoryParserCtxt: 13652 * @buffer: a pointer to a char array 13653 * @size: the size of the array 13654 * 13655 * Create a parser context for an XML in-memory document. 13656 * 13657 * Returns the new parser context or NULL 13658 */ 13659xmlParserCtxtPtr 13660xmlCreateMemoryParserCtxt(const char *buffer, int size) { 13661 xmlParserCtxtPtr ctxt; 13662 xmlParserInputPtr input; 13663 xmlParserInputBufferPtr buf; 13664 13665 if (buffer == NULL) 13666 return(NULL); 13667 if (size <= 0) 13668 return(NULL); 13669 13670 ctxt = xmlNewParserCtxt(); 13671 if (ctxt == NULL) 13672 return(NULL); 13673 13674 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */ 13675 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 13676 if (buf == NULL) { 13677 xmlFreeParserCtxt(ctxt); 13678 return(NULL); 13679 } 13680 13681 input = xmlNewInputStream(ctxt); 13682 if (input == NULL) { 13683 xmlFreeParserInputBuffer(buf); 13684 xmlFreeParserCtxt(ctxt); 13685 return(NULL); 13686 } 13687 13688 input->filename = NULL; 13689 input->buf = buf; 13690 input->base = input->buf->buffer->content; 13691 input->cur = input->buf->buffer->content; 13692 input->end = &input->buf->buffer->content[input->buf->buffer->use]; 13693 13694 inputPush(ctxt, input); 13695 return(ctxt); 13696} 13697 13698#ifdef LIBXML_SAX1_ENABLED 13699/** 13700 * xmlSAXParseMemoryWithData: 13701 * @sax: the SAX handler block 13702 * @buffer: an pointer to a char array 13703 * @size: the size of the array 13704 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 13705 * documents 13706 * @data: the userdata 13707 * 13708 * parse an XML in-memory block and use the given SAX function block 13709 * to handle the parsing callback. If sax is NULL, fallback to the default 13710 * DOM tree building routines. 13711 * 13712 * User data (void *) is stored within the parser context in the 13713 * context's _private member, so it is available nearly everywhere in libxml 13714 * 13715 * Returns the resulting document tree 13716 */ 13717 13718xmlDocPtr 13719xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer, 13720 int size, int recovery, void *data) { 13721 xmlDocPtr ret; 13722 xmlParserCtxtPtr ctxt; 13723 13724 xmlInitParser(); 13725 13726 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 13727 if (ctxt == NULL) return(NULL); 13728 if (sax != NULL) { 13729 if (ctxt->sax != NULL) 13730 xmlFree(ctxt->sax); 13731 ctxt->sax = sax; 13732 } 13733 xmlDetectSAX2(ctxt); 13734 if (data!=NULL) { 13735 ctxt->_private=data; 13736 } 13737 13738 ctxt->recovery = recovery; 13739 13740 xmlParseDocument(ctxt); 13741 13742 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 13743 else { 13744 ret = NULL; 13745 xmlFreeDoc(ctxt->myDoc); 13746 ctxt->myDoc = NULL; 13747 } 13748 if (sax != NULL) 13749 ctxt->sax = NULL; 13750 xmlFreeParserCtxt(ctxt); 13751 13752 return(ret); 13753} 13754 13755/** 13756 * xmlSAXParseMemory: 13757 * @sax: the SAX handler block 13758 * @buffer: an pointer to a char array 13759 * @size: the size of the array 13760 * @recovery: work in recovery mode, i.e. tries to read not Well Formed 13761 * documents 13762 * 13763 * parse an XML in-memory block and use the given SAX function block 13764 * to handle the parsing callback. If sax is NULL, fallback to the default 13765 * DOM tree building routines. 13766 * 13767 * Returns the resulting document tree 13768 */ 13769xmlDocPtr 13770xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer, 13771 int size, int recovery) { 13772 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL); 13773} 13774 13775/** 13776 * xmlParseMemory: 13777 * @buffer: an pointer to a char array 13778 * @size: the size of the array 13779 * 13780 * parse an XML in-memory block and build a tree. 13781 * 13782 * Returns the resulting document tree 13783 */ 13784 13785xmlDocPtr xmlParseMemory(const char *buffer, int size) { 13786 return(xmlSAXParseMemory(NULL, buffer, size, 0)); 13787} 13788 13789/** 13790 * xmlRecoverMemory: 13791 * @buffer: an pointer to a char array 13792 * @size: the size of the array 13793 * 13794 * parse an XML in-memory block and build a tree. 13795 * In the case the document is not Well Formed, an attempt to 13796 * build a tree is tried anyway 13797 * 13798 * Returns the resulting document tree or NULL in case of error 13799 */ 13800 13801xmlDocPtr xmlRecoverMemory(const char *buffer, int size) { 13802 return(xmlSAXParseMemory(NULL, buffer, size, 1)); 13803} 13804 13805/** 13806 * xmlSAXUserParseMemory: 13807 * @sax: a SAX handler 13808 * @user_data: The user data returned on SAX callbacks 13809 * @buffer: an in-memory XML document input 13810 * @size: the length of the XML document in bytes 13811 * 13812 * A better SAX parsing routine. 13813 * parse an XML in-memory buffer and call the given SAX handler routines. 13814 * 13815 * Returns 0 in case of success or a error number otherwise 13816 */ 13817int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, 13818 const char *buffer, int size) { 13819 int ret = 0; 13820 xmlParserCtxtPtr ctxt; 13821 13822 xmlInitParser(); 13823 13824 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 13825 if (ctxt == NULL) return -1; 13826 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 13827 xmlFree(ctxt->sax); 13828 ctxt->sax = sax; 13829 xmlDetectSAX2(ctxt); 13830 13831 if (user_data != NULL) 13832 ctxt->userData = user_data; 13833 13834 xmlParseDocument(ctxt); 13835 13836 if (ctxt->wellFormed) 13837 ret = 0; 13838 else { 13839 if (ctxt->errNo != 0) 13840 ret = ctxt->errNo; 13841 else 13842 ret = -1; 13843 } 13844 if (sax != NULL) 13845 ctxt->sax = NULL; 13846 if (ctxt->myDoc != NULL) { 13847 xmlFreeDoc(ctxt->myDoc); 13848 ctxt->myDoc = NULL; 13849 } 13850 xmlFreeParserCtxt(ctxt); 13851 13852 return ret; 13853} 13854#endif /* LIBXML_SAX1_ENABLED */ 13855 13856/** 13857 * xmlCreateDocParserCtxt: 13858 * @cur: a pointer to an array of xmlChar 13859 * 13860 * Creates a parser context for an XML in-memory document. 13861 * 13862 * Returns the new parser context or NULL 13863 */ 13864xmlParserCtxtPtr 13865xmlCreateDocParserCtxt(const xmlChar *cur) { 13866 int len; 13867 13868 if (cur == NULL) 13869 return(NULL); 13870 len = xmlStrlen(cur); 13871 return(xmlCreateMemoryParserCtxt((const char *)cur, len)); 13872} 13873 13874#ifdef LIBXML_SAX1_ENABLED 13875/** 13876 * xmlSAXParseDoc: 13877 * @sax: the SAX handler block 13878 * @cur: a pointer to an array of xmlChar 13879 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 13880 * documents 13881 * 13882 * parse an XML in-memory document and build a tree. 13883 * It use the given SAX function block to handle the parsing callback. 13884 * If sax is NULL, fallback to the default DOM tree building routines. 13885 * 13886 * Returns the resulting document tree 13887 */ 13888 13889xmlDocPtr 13890xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) { 13891 xmlDocPtr ret; 13892 xmlParserCtxtPtr ctxt; 13893 xmlSAXHandlerPtr oldsax = NULL; 13894 13895 if (cur == NULL) return(NULL); 13896 13897 13898 ctxt = xmlCreateDocParserCtxt(cur); 13899 if (ctxt == NULL) return(NULL); 13900 if (sax != NULL) { 13901 oldsax = ctxt->sax; 13902 ctxt->sax = sax; 13903 ctxt->userData = NULL; 13904 } 13905 xmlDetectSAX2(ctxt); 13906 13907 xmlParseDocument(ctxt); 13908 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 13909 else { 13910 ret = NULL; 13911 xmlFreeDoc(ctxt->myDoc); 13912 ctxt->myDoc = NULL; 13913 } 13914 if (sax != NULL) 13915 ctxt->sax = oldsax; 13916 xmlFreeParserCtxt(ctxt); 13917 13918 return(ret); 13919} 13920 13921/** 13922 * xmlParseDoc: 13923 * @cur: a pointer to an array of xmlChar 13924 * 13925 * parse an XML in-memory document and build a tree. 13926 * 13927 * Returns the resulting document tree 13928 */ 13929 13930xmlDocPtr 13931xmlParseDoc(const xmlChar *cur) { 13932 return(xmlSAXParseDoc(NULL, cur, 0)); 13933} 13934#endif /* LIBXML_SAX1_ENABLED */ 13935 13936#ifdef LIBXML_LEGACY_ENABLED 13937/************************************************************************ 13938 * * 13939 * Specific function to keep track of entities references * 13940 * and used by the XSLT debugger * 13941 * * 13942 ************************************************************************/ 13943 13944static xmlEntityReferenceFunc xmlEntityRefFunc = NULL; 13945 13946/** 13947 * xmlAddEntityReference: 13948 * @ent : A valid entity 13949 * @firstNode : A valid first node for children of entity 13950 * @lastNode : A valid last node of children entity 13951 * 13952 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY 13953 */ 13954static void 13955xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 13956 xmlNodePtr lastNode) 13957{ 13958 if (xmlEntityRefFunc != NULL) { 13959 (*xmlEntityRefFunc) (ent, firstNode, lastNode); 13960 } 13961} 13962 13963 13964/** 13965 * xmlSetEntityReferenceFunc: 13966 * @func: A valid function 13967 * 13968 * Set the function to call call back when a xml reference has been made 13969 */ 13970void 13971xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func) 13972{ 13973 xmlEntityRefFunc = func; 13974} 13975#endif /* LIBXML_LEGACY_ENABLED */ 13976 13977/************************************************************************ 13978 * * 13979 * Miscellaneous * 13980 * * 13981 ************************************************************************/ 13982 13983#ifdef LIBXML_XPATH_ENABLED 13984#include <libxml/xpath.h> 13985#endif 13986 13987extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...); 13988static int xmlParserInitialized = 0; 13989 13990/** 13991 * xmlInitParser: 13992 * 13993 * Initialization function for the XML parser. 13994 * This is not reentrant. Call once before processing in case of 13995 * use in multithreaded programs. 13996 */ 13997 13998void 13999xmlInitParser(void) { 14000 if (xmlParserInitialized != 0) 14001 return; 14002 14003#ifdef LIBXML_THREAD_ENABLED 14004 __xmlGlobalInitMutexLock(); 14005 if (xmlParserInitialized == 0) { 14006#endif 14007 xmlInitGlobals(); 14008 xmlInitThreads(); 14009 if ((xmlGenericError == xmlGenericErrorDefaultFunc) || 14010 (xmlGenericError == NULL)) 14011 initGenericErrorDefaultFunc(NULL); 14012 xmlInitMemory(); 14013 xmlInitCharEncodingHandlers(); 14014 xmlDefaultSAXHandlerInit(); 14015 xmlRegisterDefaultInputCallbacks(); 14016#ifdef LIBXML_OUTPUT_ENABLED 14017 xmlRegisterDefaultOutputCallbacks(); 14018#endif /* LIBXML_OUTPUT_ENABLED */ 14019#ifdef LIBXML_HTML_ENABLED 14020 htmlInitAutoClose(); 14021 htmlDefaultSAXHandlerInit(); 14022#endif 14023#ifdef LIBXML_XPATH_ENABLED 14024 xmlXPathInit(); 14025#endif 14026 xmlParserInitialized = 1; 14027#ifdef LIBXML_THREAD_ENABLED 14028 } 14029 __xmlGlobalInitMutexUnlock(); 14030#endif 14031} 14032 14033/** 14034 * xmlCleanupParser: 14035 * 14036 * This function name is somewhat misleading. It does not clean up 14037 * parser state, it cleans up memory allocated by the library itself. 14038 * It is a cleanup function for the XML library. It tries to reclaim all 14039 * related global memory allocated for the library processing. 14040 * It doesn't deallocate any document related memory. One should 14041 * call xmlCleanupParser() only when the process has finished using 14042 * the library and all XML/HTML documents built with it. 14043 * See also xmlInitParser() which has the opposite function of preparing 14044 * the library for operations. 14045 * 14046 * WARNING: if your application is multithreaded or has plugin support 14047 * calling this may crash the application if another thread or 14048 * a plugin is still using libxml2. It's sometimes very hard to 14049 * guess if libxml2 is in use in the application, some libraries 14050 * or plugins may use it without notice. In case of doubt abstain 14051 * from calling this function or do it just before calling exit() 14052 * to avoid leak reports from valgrind ! 14053 */ 14054 14055void 14056xmlCleanupParser(void) { 14057 if (!xmlParserInitialized) 14058 return; 14059 14060 xmlCleanupCharEncodingHandlers(); 14061#ifdef LIBXML_CATALOG_ENABLED 14062 xmlCatalogCleanup(); 14063#endif 14064 xmlDictCleanup(); 14065 xmlCleanupInputCallbacks(); 14066#ifdef LIBXML_OUTPUT_ENABLED 14067 xmlCleanupOutputCallbacks(); 14068#endif 14069#ifdef LIBXML_SCHEMAS_ENABLED 14070 xmlSchemaCleanupTypes(); 14071 xmlRelaxNGCleanupTypes(); 14072#endif 14073 xmlCleanupGlobals(); 14074 xmlResetLastError(); 14075 xmlCleanupThreads(); /* must be last if called not from the main thread */ 14076 xmlCleanupMemory(); 14077 xmlParserInitialized = 0; 14078} 14079 14080/************************************************************************ 14081 * * 14082 * New set (2.6.0) of simpler and more flexible APIs * 14083 * * 14084 ************************************************************************/ 14085 14086/** 14087 * DICT_FREE: 14088 * @str: a string 14089 * 14090 * Free a string if it is not owned by the "dict" dictionnary in the 14091 * current scope 14092 */ 14093#define DICT_FREE(str) \ 14094 if ((str) && ((!dict) || \ 14095 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \ 14096 xmlFree((char *)(str)); 14097 14098/** 14099 * xmlCtxtReset: 14100 * @ctxt: an XML parser context 14101 * 14102 * Reset a parser context 14103 */ 14104void 14105xmlCtxtReset(xmlParserCtxtPtr ctxt) 14106{ 14107 xmlParserInputPtr input; 14108 xmlDictPtr dict; 14109 14110 if (ctxt == NULL) 14111 return; 14112 14113 dict = ctxt->dict; 14114 14115 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 14116 xmlFreeInputStream(input); 14117 } 14118 ctxt->inputNr = 0; 14119 ctxt->input = NULL; 14120 14121 ctxt->spaceNr = 0; 14122 if (ctxt->spaceTab != NULL) { 14123 ctxt->spaceTab[0] = -1; 14124 ctxt->space = &ctxt->spaceTab[0]; 14125 } else { 14126 ctxt->space = NULL; 14127 } 14128 14129 14130 ctxt->nodeNr = 0; 14131 ctxt->node = NULL; 14132 14133 ctxt->nameNr = 0; 14134 ctxt->name = NULL; 14135 14136 DICT_FREE(ctxt->version); 14137 ctxt->version = NULL; 14138 DICT_FREE(ctxt->encoding); 14139 ctxt->encoding = NULL; 14140 DICT_FREE(ctxt->directory); 14141 ctxt->directory = NULL; 14142 DICT_FREE(ctxt->extSubURI); 14143 ctxt->extSubURI = NULL; 14144 DICT_FREE(ctxt->extSubSystem); 14145 ctxt->extSubSystem = NULL; 14146 if (ctxt->myDoc != NULL) 14147 xmlFreeDoc(ctxt->myDoc); 14148 ctxt->myDoc = NULL; 14149 14150 ctxt->standalone = -1; 14151 ctxt->hasExternalSubset = 0; 14152 ctxt->hasPErefs = 0; 14153 ctxt->html = 0; 14154 ctxt->external = 0; 14155 ctxt->instate = XML_PARSER_START; 14156 ctxt->token = 0; 14157 14158 ctxt->wellFormed = 1; 14159 ctxt->nsWellFormed = 1; 14160 ctxt->disableSAX = 0; 14161 ctxt->valid = 1; 14162#if 0 14163 ctxt->vctxt.userData = ctxt; 14164 ctxt->vctxt.error = xmlParserValidityError; 14165 ctxt->vctxt.warning = xmlParserValidityWarning; 14166#endif 14167 ctxt->record_info = 0; 14168 ctxt->nbChars = 0; 14169 ctxt->checkIndex = 0; 14170 ctxt->inSubset = 0; 14171 ctxt->errNo = XML_ERR_OK; 14172 ctxt->depth = 0; 14173 ctxt->charset = XML_CHAR_ENCODING_UTF8; 14174 ctxt->catalogs = NULL; 14175 ctxt->nbentities = 0; 14176 ctxt->sizeentities = 0; 14177 xmlInitNodeInfoSeq(&ctxt->node_seq); 14178 14179 if (ctxt->attsDefault != NULL) { 14180 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree); 14181 ctxt->attsDefault = NULL; 14182 } 14183 if (ctxt->attsSpecial != NULL) { 14184 xmlHashFree(ctxt->attsSpecial, NULL); 14185 ctxt->attsSpecial = NULL; 14186 } 14187 14188#ifdef LIBXML_CATALOG_ENABLED 14189 if (ctxt->catalogs != NULL) 14190 xmlCatalogFreeLocal(ctxt->catalogs); 14191#endif 14192 if (ctxt->lastError.code != XML_ERR_OK) 14193 xmlResetError(&ctxt->lastError); 14194} 14195 14196/** 14197 * xmlCtxtResetPush: 14198 * @ctxt: an XML parser context 14199 * @chunk: a pointer to an array of chars 14200 * @size: number of chars in the array 14201 * @filename: an optional file name or URI 14202 * @encoding: the document encoding, or NULL 14203 * 14204 * Reset a push parser context 14205 * 14206 * Returns 0 in case of success and 1 in case of error 14207 */ 14208int 14209xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk, 14210 int size, const char *filename, const char *encoding) 14211{ 14212 xmlParserInputPtr inputStream; 14213 xmlParserInputBufferPtr buf; 14214 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 14215 14216 if (ctxt == NULL) 14217 return(1); 14218 14219 if ((encoding == NULL) && (chunk != NULL) && (size >= 4)) 14220 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 14221 14222 buf = xmlAllocParserInputBuffer(enc); 14223 if (buf == NULL) 14224 return(1); 14225 14226 if (ctxt == NULL) { 14227 xmlFreeParserInputBuffer(buf); 14228 return(1); 14229 } 14230 14231 xmlCtxtReset(ctxt); 14232 14233 if (ctxt->pushTab == NULL) { 14234 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * 14235 sizeof(xmlChar *)); 14236 if (ctxt->pushTab == NULL) { 14237 xmlErrMemory(ctxt, NULL); 14238 xmlFreeParserInputBuffer(buf); 14239 return(1); 14240 } 14241 } 14242 14243 if (filename == NULL) { 14244 ctxt->directory = NULL; 14245 } else { 14246 ctxt->directory = xmlParserGetDirectory(filename); 14247 } 14248 14249 inputStream = xmlNewInputStream(ctxt); 14250 if (inputStream == NULL) { 14251 xmlFreeParserInputBuffer(buf); 14252 return(1); 14253 } 14254 14255 if (filename == NULL) 14256 inputStream->filename = NULL; 14257 else 14258 inputStream->filename = (char *) 14259 xmlCanonicPath((const xmlChar *) filename); 14260 inputStream->buf = buf; 14261 inputStream->base = inputStream->buf->buffer->content; 14262 inputStream->cur = inputStream->buf->buffer->content; 14263 inputStream->end = 14264 &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; 14265 14266 inputPush(ctxt, inputStream); 14267 14268 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 14269 (ctxt->input->buf != NULL)) { 14270 int base = ctxt->input->base - ctxt->input->buf->buffer->content; 14271 int cur = ctxt->input->cur - ctxt->input->base; 14272 14273 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 14274 14275 ctxt->input->base = ctxt->input->buf->buffer->content + base; 14276 ctxt->input->cur = ctxt->input->base + cur; 14277 ctxt->input->end = 14278 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer-> 14279 use]; 14280#ifdef DEBUG_PUSH 14281 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 14282#endif 14283 } 14284 14285 if (encoding != NULL) { 14286 xmlCharEncodingHandlerPtr hdlr; 14287 14288 if (ctxt->encoding != NULL) 14289 xmlFree((xmlChar *) ctxt->encoding); 14290 ctxt->encoding = xmlStrdup((const xmlChar *) encoding); 14291 14292 hdlr = xmlFindCharEncodingHandler(encoding); 14293 if (hdlr != NULL) { 14294 xmlSwitchToEncoding(ctxt, hdlr); 14295 } else { 14296 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 14297 "Unsupported encoding %s\n", BAD_CAST encoding); 14298 } 14299 } else if (enc != XML_CHAR_ENCODING_NONE) { 14300 xmlSwitchEncoding(ctxt, enc); 14301 } 14302 14303 return(0); 14304} 14305 14306 14307/** 14308 * xmlCtxtUseOptionsInternal: 14309 * @ctxt: an XML parser context 14310 * @options: a combination of xmlParserOption 14311 * @encoding: the user provided encoding to use 14312 * 14313 * Applies the options to the parser context 14314 * 14315 * Returns 0 in case of success, the set of unknown or unimplemented options 14316 * in case of error. 14317 */ 14318static int 14319xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding) 14320{ 14321 if (ctxt == NULL) 14322 return(-1); 14323 if (encoding != NULL) { 14324 if (ctxt->encoding != NULL) 14325 xmlFree((xmlChar *) ctxt->encoding); 14326 ctxt->encoding = xmlStrdup((const xmlChar *) encoding); 14327 } 14328 if (options & XML_PARSE_RECOVER) { 14329 ctxt->recovery = 1; 14330 options -= XML_PARSE_RECOVER; 14331 ctxt->options |= XML_PARSE_RECOVER; 14332 } else 14333 ctxt->recovery = 0; 14334 if (options & XML_PARSE_DTDLOAD) { 14335 ctxt->loadsubset = XML_DETECT_IDS; 14336 options -= XML_PARSE_DTDLOAD; 14337 ctxt->options |= XML_PARSE_DTDLOAD; 14338 } else 14339 ctxt->loadsubset = 0; 14340 if (options & XML_PARSE_DTDATTR) { 14341 ctxt->loadsubset |= XML_COMPLETE_ATTRS; 14342 options -= XML_PARSE_DTDATTR; 14343 ctxt->options |= XML_PARSE_DTDATTR; 14344 } 14345 if (options & XML_PARSE_NOENT) { 14346 ctxt->replaceEntities = 1; 14347 /* ctxt->loadsubset |= XML_DETECT_IDS; */ 14348 options -= XML_PARSE_NOENT; 14349 ctxt->options |= XML_PARSE_NOENT; 14350 } else 14351 ctxt->replaceEntities = 0; 14352 if (options & XML_PARSE_PEDANTIC) { 14353 ctxt->pedantic = 1; 14354 options -= XML_PARSE_PEDANTIC; 14355 ctxt->options |= XML_PARSE_PEDANTIC; 14356 } else 14357 ctxt->pedantic = 0; 14358 if (options & XML_PARSE_NOBLANKS) { 14359 ctxt->keepBlanks = 0; 14360 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 14361 options -= XML_PARSE_NOBLANKS; 14362 ctxt->options |= XML_PARSE_NOBLANKS; 14363 } else 14364 ctxt->keepBlanks = 1; 14365 if (options & XML_PARSE_DTDVALID) { 14366 ctxt->validate = 1; 14367 if (options & XML_PARSE_NOWARNING) 14368 ctxt->vctxt.warning = NULL; 14369 if (options & XML_PARSE_NOERROR) 14370 ctxt->vctxt.error = NULL; 14371 options -= XML_PARSE_DTDVALID; 14372 ctxt->options |= XML_PARSE_DTDVALID; 14373 } else 14374 ctxt->validate = 0; 14375 if (options & XML_PARSE_NOWARNING) { 14376 ctxt->sax->warning = NULL; 14377 options -= XML_PARSE_NOWARNING; 14378 } 14379 if (options & XML_PARSE_NOERROR) { 14380 ctxt->sax->error = NULL; 14381 ctxt->sax->fatalError = NULL; 14382 options -= XML_PARSE_NOERROR; 14383 } 14384#ifdef LIBXML_SAX1_ENABLED 14385 if (options & XML_PARSE_SAX1) { 14386 ctxt->sax->startElement = xmlSAX2StartElement; 14387 ctxt->sax->endElement = xmlSAX2EndElement; 14388 ctxt->sax->startElementNs = NULL; 14389 ctxt->sax->endElementNs = NULL; 14390 ctxt->sax->initialized = 1; 14391 options -= XML_PARSE_SAX1; 14392 ctxt->options |= XML_PARSE_SAX1; 14393 } 14394#endif /* LIBXML_SAX1_ENABLED */ 14395 if (options & XML_PARSE_NODICT) { 14396 ctxt->dictNames = 0; 14397 options -= XML_PARSE_NODICT; 14398 ctxt->options |= XML_PARSE_NODICT; 14399 } else { 14400 ctxt->dictNames = 1; 14401 } 14402 if (options & XML_PARSE_NOCDATA) { 14403 ctxt->sax->cdataBlock = NULL; 14404 options -= XML_PARSE_NOCDATA; 14405 ctxt->options |= XML_PARSE_NOCDATA; 14406 } 14407 if (options & XML_PARSE_NSCLEAN) { 14408 ctxt->options |= XML_PARSE_NSCLEAN; 14409 options -= XML_PARSE_NSCLEAN; 14410 } 14411 if (options & XML_PARSE_NONET) { 14412 ctxt->options |= XML_PARSE_NONET; 14413 options -= XML_PARSE_NONET; 14414 } 14415 if (options & XML_PARSE_COMPACT) { 14416 ctxt->options |= XML_PARSE_COMPACT; 14417 options -= XML_PARSE_COMPACT; 14418 } 14419 if (options & XML_PARSE_OLD10) { 14420 ctxt->options |= XML_PARSE_OLD10; 14421 options -= XML_PARSE_OLD10; 14422 } 14423 if (options & XML_PARSE_NOBASEFIX) { 14424 ctxt->options |= XML_PARSE_NOBASEFIX; 14425 options -= XML_PARSE_NOBASEFIX; 14426 } 14427 if (options & XML_PARSE_HUGE) { 14428 ctxt->options |= XML_PARSE_HUGE; 14429 options -= XML_PARSE_HUGE; 14430 } 14431 if (options & XML_PARSE_OLDSAX) { 14432 ctxt->options |= XML_PARSE_OLDSAX; 14433 options -= XML_PARSE_OLDSAX; 14434 } 14435 ctxt->linenumbers = 1; 14436 return (options); 14437} 14438 14439/** 14440 * xmlCtxtUseOptions: 14441 * @ctxt: an XML parser context 14442 * @options: a combination of xmlParserOption 14443 * 14444 * Applies the options to the parser context 14445 * 14446 * Returns 0 in case of success, the set of unknown or unimplemented options 14447 * in case of error. 14448 */ 14449int 14450xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) 14451{ 14452 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL)); 14453} 14454 14455/** 14456 * xmlDoRead: 14457 * @ctxt: an XML parser context 14458 * @URL: the base URL to use for the document 14459 * @encoding: the document encoding, or NULL 14460 * @options: a combination of xmlParserOption 14461 * @reuse: keep the context for reuse 14462 * 14463 * Common front-end for the xmlRead functions 14464 * 14465 * Returns the resulting document tree or NULL 14466 */ 14467static xmlDocPtr 14468xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding, 14469 int options, int reuse) 14470{ 14471 xmlDocPtr ret; 14472 14473 xmlCtxtUseOptionsInternal(ctxt, options, encoding); 14474 if (encoding != NULL) { 14475 xmlCharEncodingHandlerPtr hdlr; 14476 14477 hdlr = xmlFindCharEncodingHandler(encoding); 14478 if (hdlr != NULL) 14479 xmlSwitchToEncoding(ctxt, hdlr); 14480 } 14481 if ((URL != NULL) && (ctxt->input != NULL) && 14482 (ctxt->input->filename == NULL)) 14483 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL); 14484 xmlParseDocument(ctxt); 14485 if ((ctxt->wellFormed) || ctxt->recovery) 14486 ret = ctxt->myDoc; 14487 else { 14488 ret = NULL; 14489 if (ctxt->myDoc != NULL) { 14490 xmlFreeDoc(ctxt->myDoc); 14491 } 14492 } 14493 ctxt->myDoc = NULL; 14494 if (!reuse) { 14495 xmlFreeParserCtxt(ctxt); 14496 } 14497 14498 return (ret); 14499} 14500 14501/** 14502 * xmlReadDoc: 14503 * @cur: a pointer to a zero terminated string 14504 * @URL: the base URL to use for the document 14505 * @encoding: the document encoding, or NULL 14506 * @options: a combination of xmlParserOption 14507 * 14508 * parse an XML in-memory document and build a tree. 14509 * 14510 * Returns the resulting document tree 14511 */ 14512xmlDocPtr 14513xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options) 14514{ 14515 xmlParserCtxtPtr ctxt; 14516 14517 if (cur == NULL) 14518 return (NULL); 14519 14520 ctxt = xmlCreateDocParserCtxt(cur); 14521 if (ctxt == NULL) 14522 return (NULL); 14523 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 14524} 14525 14526/** 14527 * xmlReadFile: 14528 * @filename: a file or URL 14529 * @encoding: the document encoding, or NULL 14530 * @options: a combination of xmlParserOption 14531 * 14532 * parse an XML file from the filesystem or the network. 14533 * 14534 * Returns the resulting document tree 14535 */ 14536xmlDocPtr 14537xmlReadFile(const char *filename, const char *encoding, int options) 14538{ 14539 xmlParserCtxtPtr ctxt; 14540 14541 ctxt = xmlCreateURLParserCtxt(filename, options); 14542 if (ctxt == NULL) 14543 return (NULL); 14544 return (xmlDoRead(ctxt, NULL, encoding, options, 0)); 14545} 14546 14547/** 14548 * xmlReadMemory: 14549 * @buffer: a pointer to a char array 14550 * @size: the size of the array 14551 * @URL: the base URL to use for the document 14552 * @encoding: the document encoding, or NULL 14553 * @options: a combination of xmlParserOption 14554 * 14555 * parse an XML in-memory document and build a tree. 14556 * 14557 * Returns the resulting document tree 14558 */ 14559xmlDocPtr 14560xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options) 14561{ 14562 xmlParserCtxtPtr ctxt; 14563 14564 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 14565 if (ctxt == NULL) 14566 return (NULL); 14567 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 14568} 14569 14570/** 14571 * xmlReadFd: 14572 * @fd: an open file descriptor 14573 * @URL: the base URL to use for the document 14574 * @encoding: the document encoding, or NULL 14575 * @options: a combination of xmlParserOption 14576 * 14577 * parse an XML from a file descriptor and build a tree. 14578 * NOTE that the file descriptor will not be closed when the 14579 * reader is closed or reset. 14580 * 14581 * Returns the resulting document tree 14582 */ 14583xmlDocPtr 14584xmlReadFd(int fd, const char *URL, const char *encoding, int options) 14585{ 14586 xmlParserCtxtPtr ctxt; 14587 xmlParserInputBufferPtr input; 14588 xmlParserInputPtr stream; 14589 14590 if (fd < 0) 14591 return (NULL); 14592 14593 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 14594 if (input == NULL) 14595 return (NULL); 14596 input->closecallback = NULL; 14597 ctxt = xmlNewParserCtxt(); 14598 if (ctxt == NULL) { 14599 xmlFreeParserInputBuffer(input); 14600 return (NULL); 14601 } 14602 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 14603 if (stream == NULL) { 14604 xmlFreeParserInputBuffer(input); 14605 xmlFreeParserCtxt(ctxt); 14606 return (NULL); 14607 } 14608 inputPush(ctxt, stream); 14609 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 14610} 14611 14612/** 14613 * xmlReadIO: 14614 * @ioread: an I/O read function 14615 * @ioclose: an I/O close function 14616 * @ioctx: an I/O handler 14617 * @URL: the base URL to use for the document 14618 * @encoding: the document encoding, or NULL 14619 * @options: a combination of xmlParserOption 14620 * 14621 * parse an XML document from I/O functions and source and build a tree. 14622 * 14623 * Returns the resulting document tree 14624 */ 14625xmlDocPtr 14626xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 14627 void *ioctx, const char *URL, const char *encoding, int options) 14628{ 14629 xmlParserCtxtPtr ctxt; 14630 xmlParserInputBufferPtr input; 14631 xmlParserInputPtr stream; 14632 14633 if (ioread == NULL) 14634 return (NULL); 14635 14636 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 14637 XML_CHAR_ENCODING_NONE); 14638 if (input == NULL) 14639 return (NULL); 14640 ctxt = xmlNewParserCtxt(); 14641 if (ctxt == NULL) { 14642 xmlFreeParserInputBuffer(input); 14643 return (NULL); 14644 } 14645 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 14646 if (stream == NULL) { 14647 xmlFreeParserInputBuffer(input); 14648 xmlFreeParserCtxt(ctxt); 14649 return (NULL); 14650 } 14651 inputPush(ctxt, stream); 14652 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 14653} 14654 14655/** 14656 * xmlCtxtReadDoc: 14657 * @ctxt: an XML parser context 14658 * @cur: a pointer to a zero terminated string 14659 * @URL: the base URL to use for the document 14660 * @encoding: the document encoding, or NULL 14661 * @options: a combination of xmlParserOption 14662 * 14663 * parse an XML in-memory document and build a tree. 14664 * This reuses the existing @ctxt parser context 14665 * 14666 * Returns the resulting document tree 14667 */ 14668xmlDocPtr 14669xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur, 14670 const char *URL, const char *encoding, int options) 14671{ 14672 xmlParserInputPtr stream; 14673 14674 if (cur == NULL) 14675 return (NULL); 14676 if (ctxt == NULL) 14677 return (NULL); 14678 14679 xmlCtxtReset(ctxt); 14680 14681 stream = xmlNewStringInputStream(ctxt, cur); 14682 if (stream == NULL) { 14683 return (NULL); 14684 } 14685 inputPush(ctxt, stream); 14686 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 14687} 14688 14689/** 14690 * xmlCtxtReadFile: 14691 * @ctxt: an XML parser context 14692 * @filename: a file or URL 14693 * @encoding: the document encoding, or NULL 14694 * @options: a combination of xmlParserOption 14695 * 14696 * parse an XML file from the filesystem or the network. 14697 * This reuses the existing @ctxt parser context 14698 * 14699 * Returns the resulting document tree 14700 */ 14701xmlDocPtr 14702xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename, 14703 const char *encoding, int options) 14704{ 14705 xmlParserInputPtr stream; 14706 14707 if (filename == NULL) 14708 return (NULL); 14709 if (ctxt == NULL) 14710 return (NULL); 14711 14712 xmlCtxtReset(ctxt); 14713 14714 stream = xmlLoadExternalEntity(filename, NULL, ctxt); 14715 if (stream == NULL) { 14716 return (NULL); 14717 } 14718 inputPush(ctxt, stream); 14719 return (xmlDoRead(ctxt, NULL, encoding, options, 1)); 14720} 14721 14722/** 14723 * xmlCtxtReadMemory: 14724 * @ctxt: an XML parser context 14725 * @buffer: a pointer to a char array 14726 * @size: the size of the array 14727 * @URL: the base URL to use for the document 14728 * @encoding: the document encoding, or NULL 14729 * @options: a combination of xmlParserOption 14730 * 14731 * parse an XML in-memory document and build a tree. 14732 * This reuses the existing @ctxt parser context 14733 * 14734 * Returns the resulting document tree 14735 */ 14736xmlDocPtr 14737xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size, 14738 const char *URL, const char *encoding, int options) 14739{ 14740 xmlParserInputBufferPtr input; 14741 xmlParserInputPtr stream; 14742 14743 if (ctxt == NULL) 14744 return (NULL); 14745 if (buffer == NULL) 14746 return (NULL); 14747 14748 xmlCtxtReset(ctxt); 14749 14750 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 14751 if (input == NULL) { 14752 return(NULL); 14753 } 14754 14755 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 14756 if (stream == NULL) { 14757 xmlFreeParserInputBuffer(input); 14758 return(NULL); 14759 } 14760 14761 inputPush(ctxt, stream); 14762 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 14763} 14764 14765/** 14766 * xmlCtxtReadFd: 14767 * @ctxt: an XML parser context 14768 * @fd: an open file descriptor 14769 * @URL: the base URL to use for the document 14770 * @encoding: the document encoding, or NULL 14771 * @options: a combination of xmlParserOption 14772 * 14773 * parse an XML from a file descriptor and build a tree. 14774 * This reuses the existing @ctxt parser context 14775 * NOTE that the file descriptor will not be closed when the 14776 * reader is closed or reset. 14777 * 14778 * Returns the resulting document tree 14779 */ 14780xmlDocPtr 14781xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd, 14782 const char *URL, const char *encoding, int options) 14783{ 14784 xmlParserInputBufferPtr input; 14785 xmlParserInputPtr stream; 14786 14787 if (fd < 0) 14788 return (NULL); 14789 if (ctxt == NULL) 14790 return (NULL); 14791 14792 xmlCtxtReset(ctxt); 14793 14794 14795 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 14796 if (input == NULL) 14797 return (NULL); 14798 input->closecallback = NULL; 14799 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 14800 if (stream == NULL) { 14801 xmlFreeParserInputBuffer(input); 14802 return (NULL); 14803 } 14804 inputPush(ctxt, stream); 14805 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 14806} 14807 14808/** 14809 * xmlCtxtReadIO: 14810 * @ctxt: an XML parser context 14811 * @ioread: an I/O read function 14812 * @ioclose: an I/O close function 14813 * @ioctx: an I/O handler 14814 * @URL: the base URL to use for the document 14815 * @encoding: the document encoding, or NULL 14816 * @options: a combination of xmlParserOption 14817 * 14818 * parse an XML document from I/O functions and source and build a tree. 14819 * This reuses the existing @ctxt parser context 14820 * 14821 * Returns the resulting document tree 14822 */ 14823xmlDocPtr 14824xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread, 14825 xmlInputCloseCallback ioclose, void *ioctx, 14826 const char *URL, 14827 const char *encoding, int options) 14828{ 14829 xmlParserInputBufferPtr input; 14830 xmlParserInputPtr stream; 14831 14832 if (ioread == NULL) 14833 return (NULL); 14834 if (ctxt == NULL) 14835 return (NULL); 14836 14837 xmlCtxtReset(ctxt); 14838 14839 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 14840 XML_CHAR_ENCODING_NONE); 14841 if (input == NULL) 14842 return (NULL); 14843 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 14844 if (stream == NULL) { 14845 xmlFreeParserInputBuffer(input); 14846 return (NULL); 14847 } 14848 inputPush(ctxt, stream); 14849 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 14850} 14851 14852#define bottom_parser 14853#include "elfgcchack.h" 14854