1/* 2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly 3 * implemented on top of the SAX interfaces 4 * 5 * References: 6 * The XML specification: 7 * http://www.w3.org/TR/REC-xml 8 * Original 1.0 version: 9 * http://www.w3.org/TR/1998/REC-xml-19980210 10 * XML second edition working draft 11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814 12 * 13 * Okay this is a big file, the parser core is around 7000 lines, then it 14 * is followed by the progressive parser top routines, then the various 15 * high level APIs to call the parser and a few miscellaneous functions. 16 * A number of helper functions and deprecated ones have been moved to 17 * parserInternals.c to reduce this file size. 18 * As much as possible the functions are associated with their relative 19 * production in the XML specification. A few productions defining the 20 * different ranges of character are actually implanted either in 21 * parserInternals.h or parserInternals.c 22 * The DOM tree build is realized from the default SAX callbacks in 23 * the module SAX.c. 24 * The routines doing the validation checks are in valid.c and called either 25 * from the SAX callbacks or as standalone functions using a preparsed 26 * document. 27 * 28 * See Copyright for the status of this software. 29 * 30 * daniel@veillard.com 31 */ 32 33#define IN_LIBXML 34#include "libxml.h" 35 36#if defined(WIN32) && !defined (__CYGWIN__) 37#define XML_DIR_SEP '\\' 38#else 39#define XML_DIR_SEP '/' 40#endif 41 42#include <stdlib.h> 43#include <string.h> 44#include <stdarg.h> 45#include <libxml/xmlmemory.h> 46#include <libxml/threads.h> 47#include <libxml/globals.h> 48#include <libxml/tree.h> 49#include <libxml/parser.h> 50#include <libxml/parserInternals.h> 51#include <libxml/valid.h> 52#include <libxml/entities.h> 53#include <libxml/xmlerror.h> 54#include <libxml/encoding.h> 55#include <libxml/xmlIO.h> 56#include <libxml/uri.h> 57#ifdef LIBXML_CATALOG_ENABLED 58#include <libxml/catalog.h> 59#endif 60#ifdef LIBXML_SCHEMAS_ENABLED 61#include <libxml/xmlschemastypes.h> 62#include <libxml/relaxng.h> 63#endif 64#ifdef HAVE_CTYPE_H 65#include <ctype.h> 66#endif 67#ifdef HAVE_STDLIB_H 68#include <stdlib.h> 69#endif 70#ifdef HAVE_SYS_STAT_H 71#include <sys/stat.h> 72#endif 73#ifdef HAVE_FCNTL_H 74#include <fcntl.h> 75#endif 76#ifdef HAVE_UNISTD_H 77#include <unistd.h> 78#endif 79#ifdef HAVE_ZLIB_H 80#include <zlib.h> 81#endif 82 83/** 84 * xmlParserMaxDepth: 85 * 86 * arbitrary depth limit for the XML documents that we allow to 87 * process. This is not a limitation of the parser but a safety 88 * boundary feature. 89 */ 90unsigned int xmlParserMaxDepth = 1024; 91 92#define SAX2 1 93 94#define XML_PARSER_BIG_BUFFER_SIZE 300 95#define XML_PARSER_BUFFER_SIZE 100 96 97#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" 98 99/* 100 * List of XML prefixed PI allowed by W3C specs 101 */ 102 103static const char *xmlW3CPIs[] = { 104 "xml-stylesheet", 105 NULL 106}; 107 108 109/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ 110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, 111 const xmlChar **str); 112 113static xmlParserErrors 114xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 115 xmlSAXHandlerPtr sax, 116 void *user_data, int depth, const xmlChar *URL, 117 const xmlChar *ID, xmlNodePtr *list); 118 119#ifdef LIBXML_LEGACY_ENABLED 120static void 121xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 122 xmlNodePtr lastNode); 123#endif /* LIBXML_LEGACY_ENABLED */ 124 125static xmlParserErrors 126xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 127 const xmlChar *string, void *user_data, xmlNodePtr *lst); 128 129/************************************************************************ 130 * * 131 * Some factorized error routines * 132 * * 133 ************************************************************************/ 134 135/** 136 * xmlErrAttributeDup: 137 * @ctxt: an XML parser context 138 * @prefix: the attribute prefix 139 * @localname: the attribute localname 140 * 141 * Handle a redefinition of attribute error 142 */ 143static void 144xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix, 145 const xmlChar * localname) 146{ 147 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 148 (ctxt->instate == XML_PARSER_EOF)) 149 return; 150 if (ctxt != NULL) 151 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; 152 if (prefix == NULL) 153 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 154 ctxt->errNo, XML_ERR_FATAL, NULL, 0, 155 (const char *) localname, NULL, NULL, 0, 0, 156 "Attribute %s redefined\n", localname); 157 else 158 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 159 ctxt->errNo, XML_ERR_FATAL, NULL, 0, 160 (const char *) prefix, (const char *) localname, 161 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix, 162 localname); 163 if (ctxt != NULL) { 164 ctxt->wellFormed = 0; 165 if (ctxt->recovery == 0) 166 ctxt->disableSAX = 1; 167 } 168} 169 170/** 171 * xmlFatalErr: 172 * @ctxt: an XML parser context 173 * @error: the error number 174 * @extra: extra information string 175 * 176 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 177 */ 178static void 179xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info) 180{ 181 const char *errmsg; 182 183 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 184 (ctxt->instate == XML_PARSER_EOF)) 185 return; 186 switch (error) { 187 case XML_ERR_INVALID_HEX_CHARREF: 188 errmsg = "CharRef: invalid hexadecimal value\n"; 189 break; 190 case XML_ERR_INVALID_DEC_CHARREF: 191 errmsg = "CharRef: invalid decimal value\n"; 192 break; 193 case XML_ERR_INVALID_CHARREF: 194 errmsg = "CharRef: invalid value\n"; 195 break; 196 case XML_ERR_INTERNAL_ERROR: 197 errmsg = "internal error"; 198 break; 199 case XML_ERR_PEREF_AT_EOF: 200 errmsg = "PEReference at end of document\n"; 201 break; 202 case XML_ERR_PEREF_IN_PROLOG: 203 errmsg = "PEReference in prolog\n"; 204 break; 205 case XML_ERR_PEREF_IN_EPILOG: 206 errmsg = "PEReference in epilog\n"; 207 break; 208 case XML_ERR_PEREF_NO_NAME: 209 errmsg = "PEReference: no name\n"; 210 break; 211 case XML_ERR_PEREF_SEMICOL_MISSING: 212 errmsg = "PEReference: expecting ';'\n"; 213 break; 214 case XML_ERR_ENTITY_LOOP: 215 errmsg = "Detected an entity reference loop\n"; 216 break; 217 case XML_ERR_ENTITY_NOT_STARTED: 218 errmsg = "EntityValue: \" or ' expected\n"; 219 break; 220 case XML_ERR_ENTITY_PE_INTERNAL: 221 errmsg = "PEReferences forbidden in internal subset\n"; 222 break; 223 case XML_ERR_ENTITY_NOT_FINISHED: 224 errmsg = "EntityValue: \" or ' expected\n"; 225 break; 226 case XML_ERR_ATTRIBUTE_NOT_STARTED: 227 errmsg = "AttValue: \" or ' expected\n"; 228 break; 229 case XML_ERR_LT_IN_ATTRIBUTE: 230 errmsg = "Unescaped '<' not allowed in attributes values\n"; 231 break; 232 case XML_ERR_LITERAL_NOT_STARTED: 233 errmsg = "SystemLiteral \" or ' expected\n"; 234 break; 235 case XML_ERR_LITERAL_NOT_FINISHED: 236 errmsg = "Unfinished System or Public ID \" or ' expected\n"; 237 break; 238 case XML_ERR_MISPLACED_CDATA_END: 239 errmsg = "Sequence ']]>' not allowed in content\n"; 240 break; 241 case XML_ERR_URI_REQUIRED: 242 errmsg = "SYSTEM or PUBLIC, the URI is missing\n"; 243 break; 244 case XML_ERR_PUBID_REQUIRED: 245 errmsg = "PUBLIC, the Public Identifier is missing\n"; 246 break; 247 case XML_ERR_HYPHEN_IN_COMMENT: 248 errmsg = "Comment must not contain '--' (double-hyphen)\n"; 249 break; 250 case XML_ERR_PI_NOT_STARTED: 251 errmsg = "xmlParsePI : no target name\n"; 252 break; 253 case XML_ERR_RESERVED_XML_NAME: 254 errmsg = "Invalid PI name\n"; 255 break; 256 case XML_ERR_NOTATION_NOT_STARTED: 257 errmsg = "NOTATION: Name expected here\n"; 258 break; 259 case XML_ERR_NOTATION_NOT_FINISHED: 260 errmsg = "'>' required to close NOTATION declaration\n"; 261 break; 262 case XML_ERR_VALUE_REQUIRED: 263 errmsg = "Entity value required\n"; 264 break; 265 case XML_ERR_URI_FRAGMENT: 266 errmsg = "Fragment not allowed"; 267 break; 268 case XML_ERR_ATTLIST_NOT_STARTED: 269 errmsg = "'(' required to start ATTLIST enumeration\n"; 270 break; 271 case XML_ERR_NMTOKEN_REQUIRED: 272 errmsg = "NmToken expected in ATTLIST enumeration\n"; 273 break; 274 case XML_ERR_ATTLIST_NOT_FINISHED: 275 errmsg = "')' required to finish ATTLIST enumeration\n"; 276 break; 277 case XML_ERR_MIXED_NOT_STARTED: 278 errmsg = "MixedContentDecl : '|' or ')*' expected\n"; 279 break; 280 case XML_ERR_PCDATA_REQUIRED: 281 errmsg = "MixedContentDecl : '#PCDATA' expected\n"; 282 break; 283 case XML_ERR_ELEMCONTENT_NOT_STARTED: 284 errmsg = "ContentDecl : Name or '(' expected\n"; 285 break; 286 case XML_ERR_ELEMCONTENT_NOT_FINISHED: 287 errmsg = "ContentDecl : ',' '|' or ')' expected\n"; 288 break; 289 case XML_ERR_PEREF_IN_INT_SUBSET: 290 errmsg = 291 "PEReference: forbidden within markup decl in internal subset\n"; 292 break; 293 case XML_ERR_GT_REQUIRED: 294 errmsg = "expected '>'\n"; 295 break; 296 case XML_ERR_CONDSEC_INVALID: 297 errmsg = "XML conditional section '[' expected\n"; 298 break; 299 case XML_ERR_EXT_SUBSET_NOT_FINISHED: 300 errmsg = "Content error in the external subset\n"; 301 break; 302 case XML_ERR_CONDSEC_INVALID_KEYWORD: 303 errmsg = 304 "conditional section INCLUDE or IGNORE keyword expected\n"; 305 break; 306 case XML_ERR_CONDSEC_NOT_FINISHED: 307 errmsg = "XML conditional section not closed\n"; 308 break; 309 case XML_ERR_XMLDECL_NOT_STARTED: 310 errmsg = "Text declaration '<?xml' required\n"; 311 break; 312 case XML_ERR_XMLDECL_NOT_FINISHED: 313 errmsg = "parsing XML declaration: '?>' expected\n"; 314 break; 315 case XML_ERR_EXT_ENTITY_STANDALONE: 316 errmsg = "external parsed entities cannot be standalone\n"; 317 break; 318 case XML_ERR_ENTITYREF_SEMICOL_MISSING: 319 errmsg = "EntityRef: expecting ';'\n"; 320 break; 321 case XML_ERR_DOCTYPE_NOT_FINISHED: 322 errmsg = "DOCTYPE improperly terminated\n"; 323 break; 324 case XML_ERR_LTSLASH_REQUIRED: 325 errmsg = "EndTag: '</' not found\n"; 326 break; 327 case XML_ERR_EQUAL_REQUIRED: 328 errmsg = "expected '='\n"; 329 break; 330 case XML_ERR_STRING_NOT_CLOSED: 331 errmsg = "String not closed expecting \" or '\n"; 332 break; 333 case XML_ERR_STRING_NOT_STARTED: 334 errmsg = "String not started expecting ' or \"\n"; 335 break; 336 case XML_ERR_ENCODING_NAME: 337 errmsg = "Invalid XML encoding name\n"; 338 break; 339 case XML_ERR_STANDALONE_VALUE: 340 errmsg = "standalone accepts only 'yes' or 'no'\n"; 341 break; 342 case XML_ERR_DOCUMENT_EMPTY: 343 errmsg = "Document is empty\n"; 344 break; 345 case XML_ERR_DOCUMENT_END: 346 errmsg = "Extra content at the end of the document\n"; 347 break; 348 case XML_ERR_NOT_WELL_BALANCED: 349 errmsg = "chunk is not well balanced\n"; 350 break; 351 case XML_ERR_EXTRA_CONTENT: 352 errmsg = "extra content at the end of well balanced chunk\n"; 353 break; 354 case XML_ERR_VERSION_MISSING: 355 errmsg = "Malformed declaration expecting version\n"; 356 break; 357#if 0 358 case: 359 errmsg = "\n"; 360 break; 361#endif 362 default: 363 errmsg = "Unregistered error message\n"; 364 } 365 if (ctxt != NULL) 366 ctxt->errNo = error; 367 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 368 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg, 369 info); 370 if (ctxt != NULL) { 371 ctxt->wellFormed = 0; 372 if (ctxt->recovery == 0) 373 ctxt->disableSAX = 1; 374 } 375} 376 377/** 378 * xmlFatalErrMsg: 379 * @ctxt: an XML parser context 380 * @error: the error number 381 * @msg: the error message 382 * 383 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 384 */ 385static void 386xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 387 const char *msg) 388{ 389 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 390 (ctxt->instate == XML_PARSER_EOF)) 391 return; 392 if (ctxt != NULL) 393 ctxt->errNo = error; 394 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, 395 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg); 396 if (ctxt != NULL) { 397 ctxt->wellFormed = 0; 398 if (ctxt->recovery == 0) 399 ctxt->disableSAX = 1; 400 } 401} 402 403/** 404 * xmlWarningMsg: 405 * @ctxt: an XML parser context 406 * @error: the error number 407 * @msg: the error message 408 * @str1: extra data 409 * @str2: extra data 410 * 411 * Handle a warning. 412 */ 413static void 414xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, 415 const char *msg, const xmlChar *str1, const xmlChar *str2) 416{ 417 xmlStructuredErrorFunc schannel = NULL; 418 419 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 420 (ctxt->instate == XML_PARSER_EOF)) 421 return; 422 if ((ctxt != NULL) && (ctxt->sax != NULL) && 423 (ctxt->sax->initialized == XML_SAX2_MAGIC)) 424 schannel = ctxt->sax->serror; 425 __xmlRaiseError(schannel, 426 (ctxt->sax) ? ctxt->sax->warning : NULL, 427 ctxt->userData, 428 ctxt, NULL, XML_FROM_PARSER, error, 429 XML_ERR_WARNING, NULL, 0, 430 (const char *) str1, (const char *) str2, NULL, 0, 0, 431 msg, (const char *) str1, (const char *) str2); 432} 433 434/** 435 * xmlValidityError: 436 * @ctxt: an XML parser context 437 * @error: the error number 438 * @msg: the error message 439 * @str1: extra data 440 * 441 * Handle a validity error. 442 */ 443static void 444xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error, 445 const char *msg, const xmlChar *str1) 446{ 447 xmlStructuredErrorFunc schannel = NULL; 448 449 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 450 (ctxt->instate == XML_PARSER_EOF)) 451 return; 452 if (ctxt != NULL) { 453 ctxt->errNo = error; 454 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC)) 455 schannel = ctxt->sax->serror; 456 } 457 __xmlRaiseError(schannel, 458 ctxt->vctxt.error, ctxt->vctxt.userData, 459 ctxt, NULL, XML_FROM_DTD, error, 460 XML_ERR_ERROR, NULL, 0, (const char *) str1, 461 NULL, NULL, 0, 0, 462 msg, (const char *) str1); 463 if (ctxt != NULL) { 464 ctxt->valid = 0; 465 } 466} 467 468/** 469 * xmlFatalErrMsgInt: 470 * @ctxt: an XML parser context 471 * @error: the error number 472 * @msg: the error message 473 * @val: an integer value 474 * 475 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 476 */ 477static void 478xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 479 const char *msg, int val) 480{ 481 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 482 (ctxt->instate == XML_PARSER_EOF)) 483 return; 484 if (ctxt != NULL) 485 ctxt->errNo = error; 486 __xmlRaiseError(NULL, NULL, NULL, 487 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 488 NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 489 if (ctxt != NULL) { 490 ctxt->wellFormed = 0; 491 if (ctxt->recovery == 0) 492 ctxt->disableSAX = 1; 493 } 494} 495 496/** 497 * xmlFatalErrMsgStrIntStr: 498 * @ctxt: an XML parser context 499 * @error: the error number 500 * @msg: the error message 501 * @str1: an string info 502 * @val: an integer value 503 * @str2: an string info 504 * 505 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 506 */ 507static void 508xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 509 const char *msg, const xmlChar *str1, int val, 510 const xmlChar *str2) 511{ 512 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 513 (ctxt->instate == XML_PARSER_EOF)) 514 return; 515 if (ctxt != NULL) 516 ctxt->errNo = error; 517 __xmlRaiseError(NULL, NULL, NULL, 518 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 519 NULL, 0, (const char *) str1, (const char *) str2, 520 NULL, val, 0, msg, str1, val, str2); 521 if (ctxt != NULL) { 522 ctxt->wellFormed = 0; 523 if (ctxt->recovery == 0) 524 ctxt->disableSAX = 1; 525 } 526} 527 528/** 529 * xmlFatalErrMsgStr: 530 * @ctxt: an XML parser context 531 * @error: the error number 532 * @msg: the error message 533 * @val: a string value 534 * 535 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 536 */ 537static void 538xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 539 const char *msg, const xmlChar * val) 540{ 541 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 542 (ctxt->instate == XML_PARSER_EOF)) 543 return; 544 if (ctxt != NULL) 545 ctxt->errNo = error; 546 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 547 XML_FROM_PARSER, error, XML_ERR_FATAL, 548 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 549 val); 550 if (ctxt != NULL) { 551 ctxt->wellFormed = 0; 552 if (ctxt->recovery == 0) 553 ctxt->disableSAX = 1; 554 } 555} 556 557/** 558 * xmlErrMsgStr: 559 * @ctxt: an XML parser context 560 * @error: the error number 561 * @msg: the error message 562 * @val: a string value 563 * 564 * Handle a non fatal parser error 565 */ 566static void 567xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 568 const char *msg, const xmlChar * val) 569{ 570 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 571 (ctxt->instate == XML_PARSER_EOF)) 572 return; 573 if (ctxt != NULL) 574 ctxt->errNo = error; 575 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, 576 XML_FROM_PARSER, error, XML_ERR_ERROR, 577 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, 578 val); 579} 580 581/** 582 * xmlNsErr: 583 * @ctxt: an XML parser context 584 * @error: the error number 585 * @msg: the message 586 * @info1: extra information string 587 * @info2: extra information string 588 * 589 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 590 */ 591static void 592xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 593 const char *msg, 594 const xmlChar * info1, const xmlChar * info2, 595 const xmlChar * info3) 596{ 597 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 598 (ctxt->instate == XML_PARSER_EOF)) 599 return; 600 if (ctxt != NULL) 601 ctxt->errNo = error; 602 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, 603 XML_ERR_ERROR, NULL, 0, (const char *) info1, 604 (const char *) info2, (const char *) info3, 0, 0, msg, 605 info1, info2, info3); 606 if (ctxt != NULL) 607 ctxt->nsWellFormed = 0; 608} 609 610/************************************************************************ 611 * * 612 * Library wide options * 613 * * 614 ************************************************************************/ 615 616/** 617 * xmlHasFeature: 618 * @feature: the feature to be examined 619 * 620 * Examines if the library has been compiled with a given feature. 621 * 622 * Returns a non-zero value if the feature exist, otherwise zero. 623 * Returns zero (0) if the feature does not exist or an unknown 624 * unknown feature is requested, non-zero otherwise. 625 */ 626int 627xmlHasFeature(xmlFeature feature) 628{ 629 switch (feature) { 630 case XML_WITH_THREAD: 631#ifdef LIBXML_THREAD_ENABLED 632 return(1); 633#else 634 return(0); 635#endif 636 case XML_WITH_TREE: 637#ifdef LIBXML_TREE_ENABLED 638 return(1); 639#else 640 return(0); 641#endif 642 case XML_WITH_OUTPUT: 643#ifdef LIBXML_OUTPUT_ENABLED 644 return(1); 645#else 646 return(0); 647#endif 648 case XML_WITH_PUSH: 649#ifdef LIBXML_PUSH_ENABLED 650 return(1); 651#else 652 return(0); 653#endif 654 case XML_WITH_READER: 655#ifdef LIBXML_READER_ENABLED 656 return(1); 657#else 658 return(0); 659#endif 660 case XML_WITH_PATTERN: 661#ifdef LIBXML_PATTERN_ENABLED 662 return(1); 663#else 664 return(0); 665#endif 666 case XML_WITH_WRITER: 667#ifdef LIBXML_WRITER_ENABLED 668 return(1); 669#else 670 return(0); 671#endif 672 case XML_WITH_SAX1: 673#ifdef LIBXML_SAX1_ENABLED 674 return(1); 675#else 676 return(0); 677#endif 678 case XML_WITH_FTP: 679#ifdef LIBXML_FTP_ENABLED 680 return(1); 681#else 682 return(0); 683#endif 684 case XML_WITH_HTTP: 685#ifdef LIBXML_HTTP_ENABLED 686 return(1); 687#else 688 return(0); 689#endif 690 case XML_WITH_VALID: 691#ifdef LIBXML_VALID_ENABLED 692 return(1); 693#else 694 return(0); 695#endif 696 case XML_WITH_HTML: 697#ifdef LIBXML_HTML_ENABLED 698 return(1); 699#else 700 return(0); 701#endif 702 case XML_WITH_LEGACY: 703#ifdef LIBXML_LEGACY_ENABLED 704 return(1); 705#else 706 return(0); 707#endif 708 case XML_WITH_C14N: 709#ifdef LIBXML_C14N_ENABLED 710 return(1); 711#else 712 return(0); 713#endif 714 case XML_WITH_CATALOG: 715#ifdef LIBXML_CATALOG_ENABLED 716 return(1); 717#else 718 return(0); 719#endif 720 case XML_WITH_XPATH: 721#ifdef LIBXML_XPATH_ENABLED 722 return(1); 723#else 724 return(0); 725#endif 726 case XML_WITH_XPTR: 727#ifdef LIBXML_XPTR_ENABLED 728 return(1); 729#else 730 return(0); 731#endif 732 case XML_WITH_XINCLUDE: 733#ifdef LIBXML_XINCLUDE_ENABLED 734 return(1); 735#else 736 return(0); 737#endif 738 case XML_WITH_ICONV: 739#ifdef LIBXML_ICONV_ENABLED 740 return(1); 741#else 742 return(0); 743#endif 744 case XML_WITH_ISO8859X: 745#ifdef LIBXML_ISO8859X_ENABLED 746 return(1); 747#else 748 return(0); 749#endif 750 case XML_WITH_UNICODE: 751#ifdef LIBXML_UNICODE_ENABLED 752 return(1); 753#else 754 return(0); 755#endif 756 case XML_WITH_REGEXP: 757#ifdef LIBXML_REGEXP_ENABLED 758 return(1); 759#else 760 return(0); 761#endif 762 case XML_WITH_AUTOMATA: 763#ifdef LIBXML_AUTOMATA_ENABLED 764 return(1); 765#else 766 return(0); 767#endif 768 case XML_WITH_EXPR: 769#ifdef LIBXML_EXPR_ENABLED 770 return(1); 771#else 772 return(0); 773#endif 774 case XML_WITH_SCHEMAS: 775#ifdef LIBXML_SCHEMAS_ENABLED 776 return(1); 777#else 778 return(0); 779#endif 780 case XML_WITH_SCHEMATRON: 781#ifdef LIBXML_SCHEMATRON_ENABLED 782 return(1); 783#else 784 return(0); 785#endif 786 case XML_WITH_MODULES: 787#ifdef LIBXML_MODULES_ENABLED 788 return(1); 789#else 790 return(0); 791#endif 792 case XML_WITH_DEBUG: 793#ifdef LIBXML_DEBUG_ENABLED 794 return(1); 795#else 796 return(0); 797#endif 798 case XML_WITH_DEBUG_MEM: 799#ifdef DEBUG_MEMORY_LOCATION 800 return(1); 801#else 802 return(0); 803#endif 804 case XML_WITH_DEBUG_RUN: 805#ifdef LIBXML_DEBUG_RUNTIME 806 return(1); 807#else 808 return(0); 809#endif 810 case XML_WITH_ZLIB: 811#ifdef LIBXML_ZLIB_ENABLED 812 return(1); 813#else 814 return(0); 815#endif 816 default: 817 break; 818 } 819 return(0); 820} 821 822/************************************************************************ 823 * * 824 * SAX2 defaulted attributes handling * 825 * * 826 ************************************************************************/ 827 828/** 829 * xmlDetectSAX2: 830 * @ctxt: an XML parser context 831 * 832 * Do the SAX2 detection and specific intialization 833 */ 834static void 835xmlDetectSAX2(xmlParserCtxtPtr ctxt) { 836 if (ctxt == NULL) return; 837#ifdef LIBXML_SAX1_ENABLED 838 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) && 839 ((ctxt->sax->startElementNs != NULL) || 840 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1; 841#else 842 ctxt->sax2 = 1; 843#endif /* LIBXML_SAX1_ENABLED */ 844 845 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 846 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 847 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 848 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) || 849 (ctxt->str_xml_ns == NULL)) { 850 xmlErrMemory(ctxt, NULL); 851 } 852} 853 854typedef struct _xmlDefAttrs xmlDefAttrs; 855typedef xmlDefAttrs *xmlDefAttrsPtr; 856struct _xmlDefAttrs { 857 int nbAttrs; /* number of defaulted attributes on that element */ 858 int maxAttrs; /* the size of the array */ 859 const xmlChar *values[4]; /* array of localname/prefix/values */ 860}; 861 862/** 863 * xmlAddDefAttrs: 864 * @ctxt: an XML parser context 865 * @fullname: the element fullname 866 * @fullattr: the attribute fullname 867 * @value: the attribute value 868 * 869 * Add a defaulted attribute for an element 870 */ 871static void 872xmlAddDefAttrs(xmlParserCtxtPtr ctxt, 873 const xmlChar *fullname, 874 const xmlChar *fullattr, 875 const xmlChar *value) { 876 xmlDefAttrsPtr defaults; 877 int len; 878 const xmlChar *name; 879 const xmlChar *prefix; 880 881 if (ctxt->attsDefault == NULL) { 882 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict); 883 if (ctxt->attsDefault == NULL) 884 goto mem_error; 885 } 886 887 /* 888 * split the element name into prefix:localname , the string found 889 * are within the DTD and then not associated to namespace names. 890 */ 891 name = xmlSplitQName3(fullname, &len); 892 if (name == NULL) { 893 name = xmlDictLookup(ctxt->dict, fullname, -1); 894 prefix = NULL; 895 } else { 896 name = xmlDictLookup(ctxt->dict, name, -1); 897 prefix = xmlDictLookup(ctxt->dict, fullname, len); 898 } 899 900 /* 901 * make sure there is some storage 902 */ 903 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix); 904 if (defaults == NULL) { 905 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) + 906 (4 * 4) * sizeof(const xmlChar *)); 907 if (defaults == NULL) 908 goto mem_error; 909 defaults->nbAttrs = 0; 910 defaults->maxAttrs = 4; 911 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL); 912 } else if (defaults->nbAttrs >= defaults->maxAttrs) { 913 xmlDefAttrsPtr temp; 914 915 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) + 916 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *)); 917 if (temp == NULL) 918 goto mem_error; 919 defaults = temp; 920 defaults->maxAttrs *= 2; 921 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL); 922 } 923 924 /* 925 * Split the element name into prefix:localname , the string found 926 * are within the DTD and hen not associated to namespace names. 927 */ 928 name = xmlSplitQName3(fullattr, &len); 929 if (name == NULL) { 930 name = xmlDictLookup(ctxt->dict, fullattr, -1); 931 prefix = NULL; 932 } else { 933 name = xmlDictLookup(ctxt->dict, name, -1); 934 prefix = xmlDictLookup(ctxt->dict, fullattr, len); 935 } 936 937 defaults->values[4 * defaults->nbAttrs] = name; 938 defaults->values[4 * defaults->nbAttrs + 1] = prefix; 939 /* intern the string and precompute the end */ 940 len = xmlStrlen(value); 941 value = xmlDictLookup(ctxt->dict, value, len); 942 defaults->values[4 * defaults->nbAttrs + 2] = value; 943 defaults->values[4 * defaults->nbAttrs + 3] = value + len; 944 defaults->nbAttrs++; 945 946 return; 947 948mem_error: 949 xmlErrMemory(ctxt, NULL); 950 return; 951} 952 953/** 954 * xmlAddSpecialAttr: 955 * @ctxt: an XML parser context 956 * @fullname: the element fullname 957 * @fullattr: the attribute fullname 958 * @type: the attribute type 959 * 960 * Register that this attribute is not CDATA 961 */ 962static void 963xmlAddSpecialAttr(xmlParserCtxtPtr ctxt, 964 const xmlChar *fullname, 965 const xmlChar *fullattr, 966 int type) 967{ 968 if (ctxt->attsSpecial == NULL) { 969 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict); 970 if (ctxt->attsSpecial == NULL) 971 goto mem_error; 972 } 973 974 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr, 975 (void *) (long) type); 976 return; 977 978mem_error: 979 xmlErrMemory(ctxt, NULL); 980 return; 981} 982 983/** 984 * xmlCheckLanguageID: 985 * @lang: pointer to the string value 986 * 987 * Checks that the value conforms to the LanguageID production: 988 * 989 * NOTE: this is somewhat deprecated, those productions were removed from 990 * the XML Second edition. 991 * 992 * [33] LanguageID ::= Langcode ('-' Subcode)* 993 * [34] Langcode ::= ISO639Code | IanaCode | UserCode 994 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z]) 995 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+ 996 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+ 997 * [38] Subcode ::= ([a-z] | [A-Z])+ 998 * 999 * Returns 1 if correct 0 otherwise 1000 **/ 1001int 1002xmlCheckLanguageID(const xmlChar * lang) 1003{ 1004 const xmlChar *cur = lang; 1005 1006 if (cur == NULL) 1007 return (0); 1008 if (((cur[0] == 'i') && (cur[1] == '-')) || 1009 ((cur[0] == 'I') && (cur[1] == '-'))) { 1010 /* 1011 * IANA code 1012 */ 1013 cur += 2; 1014 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */ 1015 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1016 cur++; 1017 } else if (((cur[0] == 'x') && (cur[1] == '-')) || 1018 ((cur[0] == 'X') && (cur[1] == '-'))) { 1019 /* 1020 * User code 1021 */ 1022 cur += 2; 1023 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */ 1024 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1025 cur++; 1026 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 1027 ((cur[0] >= 'a') && (cur[0] <= 'z'))) { 1028 /* 1029 * ISO639 1030 */ 1031 cur++; 1032 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 1033 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1034 cur++; 1035 else 1036 return (0); 1037 } else 1038 return (0); 1039 while (cur[0] != 0) { /* non input consuming */ 1040 if (cur[0] != '-') 1041 return (0); 1042 cur++; 1043 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) || 1044 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1045 cur++; 1046 else 1047 return (0); 1048 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */ 1049 ((cur[0] >= 'a') && (cur[0] <= 'z'))) 1050 cur++; 1051 } 1052 return (1); 1053} 1054 1055/************************************************************************ 1056 * * 1057 * Parser stacks related functions and macros * 1058 * * 1059 ************************************************************************/ 1060 1061xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, 1062 const xmlChar ** str); 1063 1064#ifdef SAX2 1065/** 1066 * nsPush: 1067 * @ctxt: an XML parser context 1068 * @prefix: the namespace prefix or NULL 1069 * @URL: the namespace name 1070 * 1071 * Pushes a new parser namespace on top of the ns stack 1072 * 1073 * Returns -1 in case of error, -2 if the namespace should be discarded 1074 * and the index in the stack otherwise. 1075 */ 1076static int 1077nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL) 1078{ 1079 if (ctxt->options & XML_PARSE_NSCLEAN) { 1080 int i; 1081 for (i = 0;i < ctxt->nsNr;i += 2) { 1082 if (ctxt->nsTab[i] == prefix) { 1083 /* in scope */ 1084 if (ctxt->nsTab[i + 1] == URL) 1085 return(-2); 1086 /* out of scope keep it */ 1087 break; 1088 } 1089 } 1090 } 1091 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) { 1092 ctxt->nsMax = 10; 1093 ctxt->nsNr = 0; 1094 ctxt->nsTab = (const xmlChar **) 1095 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *)); 1096 if (ctxt->nsTab == NULL) { 1097 xmlErrMemory(ctxt, NULL); 1098 ctxt->nsMax = 0; 1099 return (-1); 1100 } 1101 } else if (ctxt->nsNr >= ctxt->nsMax) { 1102 ctxt->nsMax *= 2; 1103 ctxt->nsTab = (const xmlChar **) 1104 xmlRealloc((char *) ctxt->nsTab, 1105 ctxt->nsMax * sizeof(ctxt->nsTab[0])); 1106 if (ctxt->nsTab == NULL) { 1107 xmlErrMemory(ctxt, NULL); 1108 ctxt->nsMax /= 2; 1109 return (-1); 1110 } 1111 } 1112 ctxt->nsTab[ctxt->nsNr++] = prefix; 1113 ctxt->nsTab[ctxt->nsNr++] = URL; 1114 return (ctxt->nsNr); 1115} 1116/** 1117 * nsPop: 1118 * @ctxt: an XML parser context 1119 * @nr: the number to pop 1120 * 1121 * Pops the top @nr parser prefix/namespace from the ns stack 1122 * 1123 * Returns the number of namespaces removed 1124 */ 1125static int 1126nsPop(xmlParserCtxtPtr ctxt, int nr) 1127{ 1128 int i; 1129 1130 if (ctxt->nsTab == NULL) return(0); 1131 if (ctxt->nsNr < nr) { 1132 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr); 1133 nr = ctxt->nsNr; 1134 } 1135 if (ctxt->nsNr <= 0) 1136 return (0); 1137 1138 for (i = 0;i < nr;i++) { 1139 ctxt->nsNr--; 1140 ctxt->nsTab[ctxt->nsNr] = NULL; 1141 } 1142 return(nr); 1143} 1144#endif 1145 1146static int 1147xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) { 1148 const xmlChar **atts; 1149 int *attallocs; 1150 int maxatts; 1151 1152 if (ctxt->atts == NULL) { 1153 maxatts = 55; /* allow for 10 attrs by default */ 1154 atts = (const xmlChar **) 1155 xmlMalloc(maxatts * sizeof(xmlChar *)); 1156 if (atts == NULL) goto mem_error; 1157 ctxt->atts = atts; 1158 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int)); 1159 if (attallocs == NULL) goto mem_error; 1160 ctxt->attallocs = attallocs; 1161 ctxt->maxatts = maxatts; 1162 } else if (nr + 5 > ctxt->maxatts) { 1163 maxatts = (nr + 5) * 2; 1164 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts, 1165 maxatts * sizeof(const xmlChar *)); 1166 if (atts == NULL) goto mem_error; 1167 ctxt->atts = atts; 1168 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs, 1169 (maxatts / 5) * sizeof(int)); 1170 if (attallocs == NULL) goto mem_error; 1171 ctxt->attallocs = attallocs; 1172 ctxt->maxatts = maxatts; 1173 } 1174 return(ctxt->maxatts); 1175mem_error: 1176 xmlErrMemory(ctxt, NULL); 1177 return(-1); 1178} 1179 1180/** 1181 * inputPush: 1182 * @ctxt: an XML parser context 1183 * @value: the parser input 1184 * 1185 * Pushes a new parser input on top of the input stack 1186 * 1187 * Returns 0 in case of error, the index in the stack otherwise 1188 */ 1189int 1190inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value) 1191{ 1192 if ((ctxt == NULL) || (value == NULL)) 1193 return(0); 1194 if (ctxt->inputNr >= ctxt->inputMax) { 1195 ctxt->inputMax *= 2; 1196 ctxt->inputTab = 1197 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab, 1198 ctxt->inputMax * 1199 sizeof(ctxt->inputTab[0])); 1200 if (ctxt->inputTab == NULL) { 1201 xmlErrMemory(ctxt, NULL); 1202 return (0); 1203 } 1204 } 1205 ctxt->inputTab[ctxt->inputNr] = value; 1206 ctxt->input = value; 1207 return (ctxt->inputNr++); 1208} 1209/** 1210 * inputPop: 1211 * @ctxt: an XML parser context 1212 * 1213 * Pops the top parser input from the input stack 1214 * 1215 * Returns the input just removed 1216 */ 1217xmlParserInputPtr 1218inputPop(xmlParserCtxtPtr ctxt) 1219{ 1220 xmlParserInputPtr ret; 1221 1222 if (ctxt == NULL) 1223 return(NULL); 1224 if (ctxt->inputNr <= 0) 1225 return (NULL); 1226 ctxt->inputNr--; 1227 if (ctxt->inputNr > 0) 1228 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1]; 1229 else 1230 ctxt->input = NULL; 1231 ret = ctxt->inputTab[ctxt->inputNr]; 1232 ctxt->inputTab[ctxt->inputNr] = NULL; 1233 return (ret); 1234} 1235/** 1236 * nodePush: 1237 * @ctxt: an XML parser context 1238 * @value: the element node 1239 * 1240 * Pushes a new element node on top of the node stack 1241 * 1242 * Returns 0 in case of error, the index in the stack otherwise 1243 */ 1244int 1245nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value) 1246{ 1247 if (ctxt == NULL) return(0); 1248 if (ctxt->nodeNr >= ctxt->nodeMax) { 1249 xmlNodePtr *tmp; 1250 1251 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab, 1252 ctxt->nodeMax * 2 * 1253 sizeof(ctxt->nodeTab[0])); 1254 if (tmp == NULL) { 1255 xmlErrMemory(ctxt, NULL); 1256 return (0); 1257 } 1258 ctxt->nodeTab = tmp; 1259 ctxt->nodeMax *= 2; 1260 } 1261 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) { 1262 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 1263 "Excessive depth in document: change xmlParserMaxDepth = %d\n", 1264 xmlParserMaxDepth); 1265 ctxt->instate = XML_PARSER_EOF; 1266 return(0); 1267 } 1268 ctxt->nodeTab[ctxt->nodeNr] = value; 1269 ctxt->node = value; 1270 return (ctxt->nodeNr++); 1271} 1272/** 1273 * nodePop: 1274 * @ctxt: an XML parser context 1275 * 1276 * Pops the top element node from the node stack 1277 * 1278 * Returns the node just removed 1279 */ 1280xmlNodePtr 1281nodePop(xmlParserCtxtPtr ctxt) 1282{ 1283 xmlNodePtr ret; 1284 1285 if (ctxt == NULL) return(NULL); 1286 if (ctxt->nodeNr <= 0) 1287 return (NULL); 1288 ctxt->nodeNr--; 1289 if (ctxt->nodeNr > 0) 1290 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1]; 1291 else 1292 ctxt->node = NULL; 1293 ret = ctxt->nodeTab[ctxt->nodeNr]; 1294 ctxt->nodeTab[ctxt->nodeNr] = NULL; 1295 return (ret); 1296} 1297 1298#ifdef LIBXML_PUSH_ENABLED 1299/** 1300 * nameNsPush: 1301 * @ctxt: an XML parser context 1302 * @value: the element name 1303 * @prefix: the element prefix 1304 * @URI: the element namespace name 1305 * 1306 * Pushes a new element name/prefix/URL on top of the name stack 1307 * 1308 * Returns -1 in case of error, the index in the stack otherwise 1309 */ 1310static int 1311nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value, 1312 const xmlChar *prefix, const xmlChar *URI, int nsNr) 1313{ 1314 if (ctxt->nameNr >= ctxt->nameMax) { 1315 const xmlChar * *tmp; 1316 void **tmp2; 1317 ctxt->nameMax *= 2; 1318 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1319 ctxt->nameMax * 1320 sizeof(ctxt->nameTab[0])); 1321 if (tmp == NULL) { 1322 ctxt->nameMax /= 2; 1323 goto mem_error; 1324 } 1325 ctxt->nameTab = tmp; 1326 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab, 1327 ctxt->nameMax * 3 * 1328 sizeof(ctxt->pushTab[0])); 1329 if (tmp2 == NULL) { 1330 ctxt->nameMax /= 2; 1331 goto mem_error; 1332 } 1333 ctxt->pushTab = tmp2; 1334 } 1335 ctxt->nameTab[ctxt->nameNr] = value; 1336 ctxt->name = value; 1337 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix; 1338 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI; 1339 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr; 1340 return (ctxt->nameNr++); 1341mem_error: 1342 xmlErrMemory(ctxt, NULL); 1343 return (-1); 1344} 1345/** 1346 * nameNsPop: 1347 * @ctxt: an XML parser context 1348 * 1349 * Pops the top element/prefix/URI name from the name stack 1350 * 1351 * Returns the name just removed 1352 */ 1353static const xmlChar * 1354nameNsPop(xmlParserCtxtPtr ctxt) 1355{ 1356 const xmlChar *ret; 1357 1358 if (ctxt->nameNr <= 0) 1359 return (NULL); 1360 ctxt->nameNr--; 1361 if (ctxt->nameNr > 0) 1362 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1363 else 1364 ctxt->name = NULL; 1365 ret = ctxt->nameTab[ctxt->nameNr]; 1366 ctxt->nameTab[ctxt->nameNr] = NULL; 1367 return (ret); 1368} 1369#endif /* LIBXML_PUSH_ENABLED */ 1370 1371/** 1372 * namePush: 1373 * @ctxt: an XML parser context 1374 * @value: the element name 1375 * 1376 * Pushes a new element name on top of the name stack 1377 * 1378 * Returns -1 in case of error, the index in the stack otherwise 1379 */ 1380int 1381namePush(xmlParserCtxtPtr ctxt, const xmlChar * value) 1382{ 1383 if (ctxt == NULL) return (-1); 1384 1385 if (ctxt->nameNr >= ctxt->nameMax) { 1386 const xmlChar * *tmp; 1387 ctxt->nameMax *= 2; 1388 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, 1389 ctxt->nameMax * 1390 sizeof(ctxt->nameTab[0])); 1391 if (tmp == NULL) { 1392 ctxt->nameMax /= 2; 1393 goto mem_error; 1394 } 1395 ctxt->nameTab = tmp; 1396 } 1397 ctxt->nameTab[ctxt->nameNr] = value; 1398 ctxt->name = value; 1399 return (ctxt->nameNr++); 1400mem_error: 1401 xmlErrMemory(ctxt, NULL); 1402 return (-1); 1403} 1404/** 1405 * namePop: 1406 * @ctxt: an XML parser context 1407 * 1408 * Pops the top element name from the name stack 1409 * 1410 * Returns the name just removed 1411 */ 1412const xmlChar * 1413namePop(xmlParserCtxtPtr ctxt) 1414{ 1415 const xmlChar *ret; 1416 1417 if ((ctxt == NULL) || (ctxt->nameNr <= 0)) 1418 return (NULL); 1419 ctxt->nameNr--; 1420 if (ctxt->nameNr > 0) 1421 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; 1422 else 1423 ctxt->name = NULL; 1424 ret = ctxt->nameTab[ctxt->nameNr]; 1425 ctxt->nameTab[ctxt->nameNr] = NULL; 1426 return (ret); 1427} 1428 1429static int spacePush(xmlParserCtxtPtr ctxt, int val) { 1430 if (ctxt->spaceNr >= ctxt->spaceMax) { 1431 ctxt->spaceMax *= 2; 1432 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab, 1433 ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); 1434 if (ctxt->spaceTab == NULL) { 1435 xmlErrMemory(ctxt, NULL); 1436 return(0); 1437 } 1438 } 1439 ctxt->spaceTab[ctxt->spaceNr] = val; 1440 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; 1441 return(ctxt->spaceNr++); 1442} 1443 1444static int spacePop(xmlParserCtxtPtr ctxt) { 1445 int ret; 1446 if (ctxt->spaceNr <= 0) return(0); 1447 ctxt->spaceNr--; 1448 if (ctxt->spaceNr > 0) 1449 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; 1450 else 1451 ctxt->space = &ctxt->spaceTab[0]; 1452 ret = ctxt->spaceTab[ctxt->spaceNr]; 1453 ctxt->spaceTab[ctxt->spaceNr] = -1; 1454 return(ret); 1455} 1456 1457/* 1458 * Macros for accessing the content. Those should be used only by the parser, 1459 * and not exported. 1460 * 1461 * Dirty macros, i.e. one often need to make assumption on the context to 1462 * use them 1463 * 1464 * CUR_PTR return the current pointer to the xmlChar to be parsed. 1465 * To be used with extreme caution since operations consuming 1466 * characters may move the input buffer to a different location ! 1467 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled 1468 * This should be used internally by the parser 1469 * only to compare to ASCII values otherwise it would break when 1470 * running with UTF-8 encoding. 1471 * RAW same as CUR but in the input buffer, bypass any token 1472 * extraction that may have been done 1473 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only 1474 * to compare on ASCII based substring. 1475 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined 1476 * strings without newlines within the parser. 1477 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII 1478 * defined char within the parser. 1479 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding 1480 * 1481 * NEXT Skip to the next character, this does the proper decoding 1482 * in UTF-8 mode. It also pop-up unfinished entities on the fly. 1483 * NEXTL(l) Skip the current unicode character of l xmlChars long. 1484 * CUR_CHAR(l) returns the current unicode character (int), set l 1485 * to the number of xmlChars used for the encoding [0-5]. 1486 * CUR_SCHAR same but operate on a string instead of the context 1487 * COPY_BUF copy the current unicode char to the target buffer, increment 1488 * the index 1489 * GROW, SHRINK handling of input buffers 1490 */ 1491 1492#define RAW (*ctxt->input->cur) 1493#define CUR (*ctxt->input->cur) 1494#define NXT(val) ctxt->input->cur[(val)] 1495#define CUR_PTR ctxt->input->cur 1496 1497#define CMP4( s, c1, c2, c3, c4 ) \ 1498 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \ 1499 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 ) 1500#define CMP5( s, c1, c2, c3, c4, c5 ) \ 1501 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 ) 1502#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \ 1503 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 ) 1504#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \ 1505 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 ) 1506#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \ 1507 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 ) 1508#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \ 1509 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \ 1510 ((unsigned char *) s)[ 8 ] == c9 ) 1511#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \ 1512 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \ 1513 ((unsigned char *) s)[ 9 ] == c10 ) 1514 1515#define SKIP(val) do { \ 1516 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \ 1517 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 1518 if ((*ctxt->input->cur == 0) && \ 1519 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 1520 xmlPopInput(ctxt); \ 1521 } while (0) 1522 1523#define SKIPL(val) do { \ 1524 int skipl; \ 1525 for(skipl=0; skipl<val; skipl++) { \ 1526 if (*(ctxt->input->cur) == '\n') { \ 1527 ctxt->input->line++; ctxt->input->col = 1; \ 1528 } else ctxt->input->col++; \ 1529 ctxt->nbChars++; \ 1530 ctxt->input->cur++; \ 1531 } \ 1532 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 1533 if ((*ctxt->input->cur == 0) && \ 1534 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ 1535 xmlPopInput(ctxt); \ 1536 } while (0) 1537 1538#define SHRINK if ((ctxt->progressive == 0) && \ 1539 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \ 1540 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \ 1541 xmlSHRINK (ctxt); 1542 1543static void xmlSHRINK (xmlParserCtxtPtr ctxt) { 1544 xmlParserInputShrink(ctxt->input); 1545 if ((*ctxt->input->cur == 0) && 1546 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 1547 xmlPopInput(ctxt); 1548 } 1549 1550#define GROW if ((ctxt->progressive == 0) && \ 1551 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \ 1552 xmlGROW (ctxt); 1553 1554static void xmlGROW (xmlParserCtxtPtr ctxt) { 1555 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 1556 if ((*ctxt->input->cur == 0) && 1557 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 1558 xmlPopInput(ctxt); 1559} 1560 1561#define SKIP_BLANKS xmlSkipBlankChars(ctxt) 1562 1563#define NEXT xmlNextChar(ctxt) 1564 1565#define NEXT1 { \ 1566 ctxt->input->col++; \ 1567 ctxt->input->cur++; \ 1568 ctxt->nbChars++; \ 1569 if (*ctxt->input->cur == 0) \ 1570 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ 1571 } 1572 1573#define NEXTL(l) do { \ 1574 if (*(ctxt->input->cur) == '\n') { \ 1575 ctxt->input->line++; ctxt->input->col = 1; \ 1576 } else ctxt->input->col++; \ 1577 ctxt->input->cur += l; \ 1578 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ 1579 } while (0) 1580 1581#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) 1582#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) 1583 1584#define COPY_BUF(l,b,i,v) \ 1585 if (l == 1) b[i++] = (xmlChar) v; \ 1586 else i += xmlCopyCharMultiByte(&b[i],v) 1587 1588/** 1589 * xmlSkipBlankChars: 1590 * @ctxt: the XML parser context 1591 * 1592 * skip all blanks character found at that point in the input streams. 1593 * It pops up finished entities in the process if allowable at that point. 1594 * 1595 * Returns the number of space chars skipped 1596 */ 1597 1598int 1599xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { 1600 int res = 0; 1601 1602 /* 1603 * It's Okay to use CUR/NEXT here since all the blanks are on 1604 * the ASCII range. 1605 */ 1606 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) { 1607 const xmlChar *cur; 1608 /* 1609 * if we are in the document content, go really fast 1610 */ 1611 cur = ctxt->input->cur; 1612 while (IS_BLANK_CH(*cur)) { 1613 if (*cur == '\n') { 1614 ctxt->input->line++; ctxt->input->col = 1; 1615 } 1616 cur++; 1617 res++; 1618 if (*cur == 0) { 1619 ctxt->input->cur = cur; 1620 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 1621 cur = ctxt->input->cur; 1622 } 1623 } 1624 ctxt->input->cur = cur; 1625 } else { 1626 int cur; 1627 do { 1628 cur = CUR; 1629 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */ 1630 NEXT; 1631 cur = CUR; 1632 res++; 1633 } 1634 while ((cur == 0) && (ctxt->inputNr > 1) && 1635 (ctxt->instate != XML_PARSER_COMMENT)) { 1636 xmlPopInput(ctxt); 1637 cur = CUR; 1638 } 1639 /* 1640 * Need to handle support of entities branching here 1641 */ 1642 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); 1643 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */ 1644 } 1645 return(res); 1646} 1647 1648/************************************************************************ 1649 * * 1650 * Commodity functions to handle entities * 1651 * * 1652 ************************************************************************/ 1653 1654/** 1655 * xmlPopInput: 1656 * @ctxt: an XML parser context 1657 * 1658 * xmlPopInput: the current input pointed by ctxt->input came to an end 1659 * pop it and return the next char. 1660 * 1661 * Returns the current xmlChar in the parser context 1662 */ 1663xmlChar 1664xmlPopInput(xmlParserCtxtPtr ctxt) { 1665 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0); 1666 if (xmlParserDebugEntities) 1667 xmlGenericError(xmlGenericErrorContext, 1668 "Popping input %d\n", ctxt->inputNr); 1669 xmlFreeInputStream(inputPop(ctxt)); 1670 if ((*ctxt->input->cur == 0) && 1671 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 1672 return(xmlPopInput(ctxt)); 1673 return(CUR); 1674} 1675 1676/** 1677 * xmlPushInput: 1678 * @ctxt: an XML parser context 1679 * @input: an XML parser input fragment (entity, XML fragment ...). 1680 * 1681 * xmlPushInput: switch to a new input stream which is stacked on top 1682 * of the previous one(s). 1683 */ 1684void 1685xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { 1686 if (input == NULL) return; 1687 1688 if (xmlParserDebugEntities) { 1689 if ((ctxt->input != NULL) && (ctxt->input->filename)) 1690 xmlGenericError(xmlGenericErrorContext, 1691 "%s(%d): ", ctxt->input->filename, 1692 ctxt->input->line); 1693 xmlGenericError(xmlGenericErrorContext, 1694 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); 1695 } 1696 inputPush(ctxt, input); 1697 GROW; 1698} 1699 1700/** 1701 * xmlParseCharRef: 1702 * @ctxt: an XML parser context 1703 * 1704 * parse Reference declarations 1705 * 1706 * [66] CharRef ::= '&#' [0-9]+ ';' | 1707 * '&#x' [0-9a-fA-F]+ ';' 1708 * 1709 * [ WFC: Legal Character ] 1710 * Characters referred to using character references must match the 1711 * production for Char. 1712 * 1713 * Returns the value parsed (as an int), 0 in case of error 1714 */ 1715int 1716xmlParseCharRef(xmlParserCtxtPtr ctxt) { 1717 unsigned int val = 0; 1718 int count = 0; 1719 unsigned int outofrange = 0; 1720 1721 /* 1722 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here 1723 */ 1724 if ((RAW == '&') && (NXT(1) == '#') && 1725 (NXT(2) == 'x')) { 1726 SKIP(3); 1727 GROW; 1728 while (RAW != ';') { /* loop blocked by count */ 1729 if (count++ > 20) { 1730 count = 0; 1731 GROW; 1732 } 1733 if ((RAW >= '0') && (RAW <= '9')) 1734 val = val * 16 + (CUR - '0'); 1735 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) 1736 val = val * 16 + (CUR - 'a') + 10; 1737 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) 1738 val = val * 16 + (CUR - 'A') + 10; 1739 else { 1740 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 1741 val = 0; 1742 break; 1743 } 1744 if (val > 0x10FFFF) 1745 outofrange = val; 1746 1747 NEXT; 1748 count++; 1749 } 1750 if (RAW == ';') { 1751 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 1752 ctxt->input->col++; 1753 ctxt->nbChars ++; 1754 ctxt->input->cur++; 1755 } 1756 } else if ((RAW == '&') && (NXT(1) == '#')) { 1757 SKIP(2); 1758 GROW; 1759 while (RAW != ';') { /* loop blocked by count */ 1760 if (count++ > 20) { 1761 count = 0; 1762 GROW; 1763 } 1764 if ((RAW >= '0') && (RAW <= '9')) 1765 val = val * 10 + (CUR - '0'); 1766 else { 1767 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 1768 val = 0; 1769 break; 1770 } 1771 if (val > 0x10FFFF) 1772 outofrange = val; 1773 1774 NEXT; 1775 count++; 1776 } 1777 if (RAW == ';') { 1778 /* on purpose to avoid reentrancy problems with NEXT and SKIP */ 1779 ctxt->input->col++; 1780 ctxt->nbChars ++; 1781 ctxt->input->cur++; 1782 } 1783 } else { 1784 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 1785 } 1786 1787 /* 1788 * [ WFC: Legal Character ] 1789 * Characters referred to using character references must match the 1790 * production for Char. 1791 */ 1792 if ((IS_CHAR(val) && (outofrange == 0))) { 1793 return(val); 1794 } else { 1795 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 1796 "xmlParseCharRef: invalid xmlChar value %d\n", 1797 val); 1798 } 1799 return(0); 1800} 1801 1802/** 1803 * xmlParseStringCharRef: 1804 * @ctxt: an XML parser context 1805 * @str: a pointer to an index in the string 1806 * 1807 * parse Reference declarations, variant parsing from a string rather 1808 * than an an input flow. 1809 * 1810 * [66] CharRef ::= '&#' [0-9]+ ';' | 1811 * '&#x' [0-9a-fA-F]+ ';' 1812 * 1813 * [ WFC: Legal Character ] 1814 * Characters referred to using character references must match the 1815 * production for Char. 1816 * 1817 * Returns the value parsed (as an int), 0 in case of error, str will be 1818 * updated to the current value of the index 1819 */ 1820static int 1821xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { 1822 const xmlChar *ptr; 1823 xmlChar cur; 1824 unsigned int val = 0; 1825 unsigned int outofrange = 0; 1826 1827 if ((str == NULL) || (*str == NULL)) return(0); 1828 ptr = *str; 1829 cur = *ptr; 1830 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { 1831 ptr += 3; 1832 cur = *ptr; 1833 while (cur != ';') { /* Non input consuming loop */ 1834 if ((cur >= '0') && (cur <= '9')) 1835 val = val * 16 + (cur - '0'); 1836 else if ((cur >= 'a') && (cur <= 'f')) 1837 val = val * 16 + (cur - 'a') + 10; 1838 else if ((cur >= 'A') && (cur <= 'F')) 1839 val = val * 16 + (cur - 'A') + 10; 1840 else { 1841 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); 1842 val = 0; 1843 break; 1844 } 1845 if (val > 0x10FFFF) 1846 outofrange = val; 1847 1848 ptr++; 1849 cur = *ptr; 1850 } 1851 if (cur == ';') 1852 ptr++; 1853 } else if ((cur == '&') && (ptr[1] == '#')){ 1854 ptr += 2; 1855 cur = *ptr; 1856 while (cur != ';') { /* Non input consuming loops */ 1857 if ((cur >= '0') && (cur <= '9')) 1858 val = val * 10 + (cur - '0'); 1859 else { 1860 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); 1861 val = 0; 1862 break; 1863 } 1864 if (val > 0x10FFFF) 1865 outofrange = val; 1866 1867 ptr++; 1868 cur = *ptr; 1869 } 1870 if (cur == ';') 1871 ptr++; 1872 } else { 1873 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); 1874 return(0); 1875 } 1876 *str = ptr; 1877 1878 /* 1879 * [ WFC: Legal Character ] 1880 * Characters referred to using character references must match the 1881 * production for Char. 1882 */ 1883 if ((IS_CHAR(val) && (outofrange == 0))) { 1884 return(val); 1885 } else { 1886 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 1887 "xmlParseStringCharRef: invalid xmlChar value %d\n", 1888 val); 1889 } 1890 return(0); 1891} 1892 1893/** 1894 * xmlNewBlanksWrapperInputStream: 1895 * @ctxt: an XML parser context 1896 * @entity: an Entity pointer 1897 * 1898 * Create a new input stream for wrapping 1899 * blanks around a PEReference 1900 * 1901 * Returns the new input stream or NULL 1902 */ 1903 1904static void deallocblankswrapper (xmlChar *str) {xmlFree(str);} 1905 1906static xmlParserInputPtr 1907xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 1908 xmlParserInputPtr input; 1909 xmlChar *buffer; 1910 size_t length; 1911 if (entity == NULL) { 1912 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 1913 "xmlNewBlanksWrapperInputStream entity\n"); 1914 return(NULL); 1915 } 1916 if (xmlParserDebugEntities) 1917 xmlGenericError(xmlGenericErrorContext, 1918 "new blanks wrapper for entity: %s\n", entity->name); 1919 input = xmlNewInputStream(ctxt); 1920 if (input == NULL) { 1921 return(NULL); 1922 } 1923 length = xmlStrlen(entity->name) + 5; 1924 buffer = xmlMallocAtomic(length); 1925 if (buffer == NULL) { 1926 xmlErrMemory(ctxt, NULL); 1927 return(NULL); 1928 } 1929 buffer [0] = ' '; 1930 buffer [1] = '%'; 1931 buffer [length-3] = ';'; 1932 buffer [length-2] = ' '; 1933 buffer [length-1] = 0; 1934 memcpy(buffer + 2, entity->name, length - 5); 1935 input->free = deallocblankswrapper; 1936 input->base = buffer; 1937 input->cur = buffer; 1938 input->length = length; 1939 input->end = &buffer[length]; 1940 return(input); 1941} 1942 1943/** 1944 * xmlParserHandlePEReference: 1945 * @ctxt: the parser context 1946 * 1947 * [69] PEReference ::= '%' Name ';' 1948 * 1949 * [ WFC: No Recursion ] 1950 * A parsed entity must not contain a recursive 1951 * reference to itself, either directly or indirectly. 1952 * 1953 * [ WFC: Entity Declared ] 1954 * In a document without any DTD, a document with only an internal DTD 1955 * subset which contains no parameter entity references, or a document 1956 * with "standalone='yes'", ... ... The declaration of a parameter 1957 * entity must precede any reference to it... 1958 * 1959 * [ VC: Entity Declared ] 1960 * In a document with an external subset or external parameter entities 1961 * with "standalone='no'", ... ... The declaration of a parameter entity 1962 * must precede any reference to it... 1963 * 1964 * [ WFC: In DTD ] 1965 * Parameter-entity references may only appear in the DTD. 1966 * NOTE: misleading but this is handled. 1967 * 1968 * A PEReference may have been detected in the current input stream 1969 * the handling is done accordingly to 1970 * http://www.w3.org/TR/REC-xml#entproc 1971 * i.e. 1972 * - Included in literal in entity values 1973 * - Included as Parameter Entity reference within DTDs 1974 */ 1975void 1976xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { 1977 const xmlChar *name; 1978 xmlEntityPtr entity = NULL; 1979 xmlParserInputPtr input; 1980 1981 if (RAW != '%') return; 1982 switch(ctxt->instate) { 1983 case XML_PARSER_CDATA_SECTION: 1984 return; 1985 case XML_PARSER_COMMENT: 1986 return; 1987 case XML_PARSER_START_TAG: 1988 return; 1989 case XML_PARSER_END_TAG: 1990 return; 1991 case XML_PARSER_EOF: 1992 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL); 1993 return; 1994 case XML_PARSER_PROLOG: 1995 case XML_PARSER_START: 1996 case XML_PARSER_MISC: 1997 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL); 1998 return; 1999 case XML_PARSER_ENTITY_DECL: 2000 case XML_PARSER_CONTENT: 2001 case XML_PARSER_ATTRIBUTE_VALUE: 2002 case XML_PARSER_PI: 2003 case XML_PARSER_SYSTEM_LITERAL: 2004 case XML_PARSER_PUBLIC_LITERAL: 2005 /* we just ignore it there */ 2006 return; 2007 case XML_PARSER_EPILOG: 2008 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL); 2009 return; 2010 case XML_PARSER_ENTITY_VALUE: 2011 /* 2012 * NOTE: in the case of entity values, we don't do the 2013 * substitution here since we need the literal 2014 * entity value to be able to save the internal 2015 * subset of the document. 2016 * This will be handled by xmlStringDecodeEntities 2017 */ 2018 return; 2019 case XML_PARSER_DTD: 2020 /* 2021 * [WFC: Well-Formedness Constraint: PEs in Internal Subset] 2022 * In the internal DTD subset, parameter-entity references 2023 * can occur only where markup declarations can occur, not 2024 * within markup declarations. 2025 * In that case this is handled in xmlParseMarkupDecl 2026 */ 2027 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 2028 return; 2029 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0) 2030 return; 2031 break; 2032 case XML_PARSER_IGNORE: 2033 return; 2034 } 2035 2036 NEXT; 2037 name = xmlParseName(ctxt); 2038 if (xmlParserDebugEntities) 2039 xmlGenericError(xmlGenericErrorContext, 2040 "PEReference: %s\n", name); 2041 if (name == NULL) { 2042 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL); 2043 } else { 2044 if (RAW == ';') { 2045 NEXT; 2046 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) 2047 entity = ctxt->sax->getParameterEntity(ctxt->userData, name); 2048 if (entity == NULL) { 2049 2050 /* 2051 * [ WFC: Entity Declared ] 2052 * In a document without any DTD, a document with only an 2053 * internal DTD subset which contains no parameter entity 2054 * references, or a document with "standalone='yes'", ... 2055 * ... The declaration of a parameter entity must precede 2056 * any reference to it... 2057 */ 2058 if ((ctxt->standalone == 1) || 2059 ((ctxt->hasExternalSubset == 0) && 2060 (ctxt->hasPErefs == 0))) { 2061 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 2062 "PEReference: %%%s; not found\n", name); 2063 } else { 2064 /* 2065 * [ VC: Entity Declared ] 2066 * In a document with an external subset or external 2067 * parameter entities with "standalone='no'", ... 2068 * ... The declaration of a parameter entity must precede 2069 * any reference to it... 2070 */ 2071 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) { 2072 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY, 2073 "PEReference: %%%s; not found\n", 2074 name); 2075 } else 2076 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 2077 "PEReference: %%%s; not found\n", 2078 name, NULL); 2079 ctxt->valid = 0; 2080 } 2081 } else if (ctxt->input->free != deallocblankswrapper) { 2082 input = xmlNewBlanksWrapperInputStream(ctxt, entity); 2083 xmlPushInput(ctxt, input); 2084 } else { 2085 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || 2086 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) { 2087 xmlChar start[4]; 2088 xmlCharEncoding enc; 2089 2090 /* 2091 * handle the extra spaces added before and after 2092 * c.f. http://www.w3.org/TR/REC-xml#as-PE 2093 * this is done independently. 2094 */ 2095 input = xmlNewEntityInputStream(ctxt, entity); 2096 xmlPushInput(ctxt, input); 2097 2098 /* 2099 * Get the 4 first bytes and decode the charset 2100 * if enc != XML_CHAR_ENCODING_NONE 2101 * plug some encoding conversion routines. 2102 * Note that, since we may have some non-UTF8 2103 * encoding (like UTF16, bug 135229), the 'length' 2104 * is not known, but we can calculate based upon 2105 * the amount of data in the buffer. 2106 */ 2107 GROW 2108 if ((ctxt->input->end - ctxt->input->cur)>=4) { 2109 start[0] = RAW; 2110 start[1] = NXT(1); 2111 start[2] = NXT(2); 2112 start[3] = NXT(3); 2113 enc = xmlDetectCharEncoding(start, 4); 2114 if (enc != XML_CHAR_ENCODING_NONE) { 2115 xmlSwitchEncoding(ctxt, enc); 2116 } 2117 } 2118 2119 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 2120 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) && 2121 (IS_BLANK_CH(NXT(5)))) { 2122 xmlParseTextDecl(ctxt); 2123 } 2124 } else { 2125 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 2126 "PEReference: %s is not a parameter entity\n", 2127 name); 2128 } 2129 } 2130 } else { 2131 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL); 2132 } 2133 } 2134} 2135 2136/* 2137 * Macro used to grow the current buffer. 2138 */ 2139#define growBuffer(buffer) { \ 2140 xmlChar *tmp; \ 2141 buffer##_size *= 2; \ 2142 tmp = (xmlChar *) \ 2143 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \ 2144 if (tmp == NULL) goto mem_error; \ 2145 buffer = tmp; \ 2146} 2147 2148/** 2149 * xmlStringLenDecodeEntities: 2150 * @ctxt: the parser context 2151 * @str: the input string 2152 * @len: the string length 2153 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2154 * @end: an end marker xmlChar, 0 if none 2155 * @end2: an end marker xmlChar, 0 if none 2156 * @end3: an end marker xmlChar, 0 if none 2157 * 2158 * Takes a entity string content and process to do the adequate substitutions. 2159 * 2160 * [67] Reference ::= EntityRef | CharRef 2161 * 2162 * [69] PEReference ::= '%' Name ';' 2163 * 2164 * Returns A newly allocated string with the substitution done. The caller 2165 * must deallocate it ! 2166 */ 2167xmlChar * 2168xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2169 int what, xmlChar end, xmlChar end2, xmlChar end3) { 2170 xmlChar *buffer = NULL; 2171 int buffer_size = 0; 2172 2173 xmlChar *current = NULL; 2174 const xmlChar *last; 2175 xmlEntityPtr ent; 2176 int c,l; 2177 int nbchars = 0; 2178 2179 if ((ctxt == NULL) || (str == NULL) || (len < 0)) 2180 return(NULL); 2181 last = str + len; 2182 2183 if (ctxt->depth > 40) { 2184 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 2185 return(NULL); 2186 } 2187 2188 /* 2189 * allocate a translation buffer. 2190 */ 2191 buffer_size = XML_PARSER_BIG_BUFFER_SIZE; 2192 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar)); 2193 if (buffer == NULL) goto mem_error; 2194 2195 /* 2196 * OK loop until we reach one of the ending char or a size limit. 2197 * we are operating on already parsed values. 2198 */ 2199 if (str < last) 2200 c = CUR_SCHAR(str, l); 2201 else 2202 c = 0; 2203 while ((c != 0) && (c != end) && /* non input consuming loop */ 2204 (c != end2) && (c != end3)) { 2205 2206 if (c == 0) break; 2207 if ((c == '&') && (str[1] == '#')) { 2208 int val = xmlParseStringCharRef(ctxt, &str); 2209 if (val != 0) { 2210 COPY_BUF(0,buffer,nbchars,val); 2211 } 2212 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { 2213 growBuffer(buffer); 2214 } 2215 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { 2216 if (xmlParserDebugEntities) 2217 xmlGenericError(xmlGenericErrorContext, 2218 "String decoding Entity Reference: %.30s\n", 2219 str); 2220 ent = xmlParseStringEntityRef(ctxt, &str); 2221 if ((ent != NULL) && 2222 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 2223 if (ent->content != NULL) { 2224 COPY_BUF(0,buffer,nbchars,ent->content[0]); 2225 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { 2226 growBuffer(buffer); 2227 } 2228 } else { 2229 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 2230 "predefined entity has no content\n"); 2231 } 2232 } else if ((ent != NULL) && (ent->content != NULL)) { 2233 xmlChar *rep; 2234 2235 ctxt->depth++; 2236 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2237 0, 0, 0); 2238 ctxt->depth--; 2239 if (rep != NULL) { 2240 current = rep; 2241 while (*current != 0) { /* non input consuming loop */ 2242 buffer[nbchars++] = *current++; 2243 if (nbchars > 2244 buffer_size - XML_PARSER_BUFFER_SIZE) { 2245 growBuffer(buffer); 2246 } 2247 } 2248 xmlFree(rep); 2249 } 2250 } else if (ent != NULL) { 2251 int i = xmlStrlen(ent->name); 2252 const xmlChar *cur = ent->name; 2253 2254 buffer[nbchars++] = '&'; 2255 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) { 2256 growBuffer(buffer); 2257 } 2258 for (;i > 0;i--) 2259 buffer[nbchars++] = *cur++; 2260 buffer[nbchars++] = ';'; 2261 } 2262 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { 2263 if (xmlParserDebugEntities) 2264 xmlGenericError(xmlGenericErrorContext, 2265 "String decoding PE Reference: %.30s\n", str); 2266 ent = xmlParseStringPEReference(ctxt, &str); 2267 if (ent != NULL) { 2268 xmlChar *rep; 2269 2270 ctxt->depth++; 2271 rep = xmlStringDecodeEntities(ctxt, ent->content, what, 2272 0, 0, 0); 2273 ctxt->depth--; 2274 if (rep != NULL) { 2275 current = rep; 2276 while (*current != 0) { /* non input consuming loop */ 2277 buffer[nbchars++] = *current++; 2278 if (nbchars > 2279 buffer_size - XML_PARSER_BUFFER_SIZE) { 2280 growBuffer(buffer); 2281 } 2282 } 2283 xmlFree(rep); 2284 } 2285 } 2286 } else { 2287 COPY_BUF(l,buffer,nbchars,c); 2288 str += l; 2289 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { 2290 growBuffer(buffer); 2291 } 2292 } 2293 if (str < last) 2294 c = CUR_SCHAR(str, l); 2295 else 2296 c = 0; 2297 } 2298 buffer[nbchars++] = 0; 2299 return(buffer); 2300 2301mem_error: 2302 xmlErrMemory(ctxt, NULL); 2303 return(NULL); 2304} 2305 2306/** 2307 * xmlStringDecodeEntities: 2308 * @ctxt: the parser context 2309 * @str: the input string 2310 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF 2311 * @end: an end marker xmlChar, 0 if none 2312 * @end2: an end marker xmlChar, 0 if none 2313 * @end3: an end marker xmlChar, 0 if none 2314 * 2315 * Takes a entity string content and process to do the adequate substitutions. 2316 * 2317 * [67] Reference ::= EntityRef | CharRef 2318 * 2319 * [69] PEReference ::= '%' Name ';' 2320 * 2321 * Returns A newly allocated string with the substitution done. The caller 2322 * must deallocate it ! 2323 */ 2324xmlChar * 2325xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, 2326 xmlChar end, xmlChar end2, xmlChar end3) { 2327 if ((ctxt == NULL) || (str == NULL)) return(NULL); 2328 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what, 2329 end, end2, end3)); 2330} 2331 2332/************************************************************************ 2333 * * 2334 * Commodity functions, cleanup needed ? * 2335 * * 2336 ************************************************************************/ 2337 2338/** 2339 * areBlanks: 2340 * @ctxt: an XML parser context 2341 * @str: a xmlChar * 2342 * @len: the size of @str 2343 * @blank_chars: we know the chars are blanks 2344 * 2345 * Is this a sequence of blank chars that one can ignore ? 2346 * 2347 * Returns 1 if ignorable 0 otherwise. 2348 */ 2349 2350static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, 2351 int blank_chars) { 2352 int i, ret; 2353 xmlNodePtr lastChild; 2354 2355 /* 2356 * Don't spend time trying to differentiate them, the same callback is 2357 * used ! 2358 */ 2359 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters) 2360 return(0); 2361 2362 /* 2363 * Check for xml:space value. 2364 */ 2365 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) || 2366 (*(ctxt->space) == -2)) 2367 return(0); 2368 2369 /* 2370 * Check that the string is made of blanks 2371 */ 2372 if (blank_chars == 0) { 2373 for (i = 0;i < len;i++) 2374 if (!(IS_BLANK_CH(str[i]))) return(0); 2375 } 2376 2377 /* 2378 * Look if the element is mixed content in the DTD if available 2379 */ 2380 if (ctxt->node == NULL) return(0); 2381 if (ctxt->myDoc != NULL) { 2382 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); 2383 if (ret == 0) return(1); 2384 if (ret == 1) return(0); 2385 } 2386 2387 /* 2388 * Otherwise, heuristic :-\ 2389 */ 2390 if ((RAW != '<') && (RAW != 0xD)) return(0); 2391 if ((ctxt->node->children == NULL) && 2392 (RAW == '<') && (NXT(1) == '/')) return(0); 2393 2394 lastChild = xmlGetLastChild(ctxt->node); 2395 if (lastChild == NULL) { 2396 if ((ctxt->node->type != XML_ELEMENT_NODE) && 2397 (ctxt->node->content != NULL)) return(0); 2398 } else if (xmlNodeIsText(lastChild)) 2399 return(0); 2400 else if ((ctxt->node->children != NULL) && 2401 (xmlNodeIsText(ctxt->node->children))) 2402 return(0); 2403 return(1); 2404} 2405 2406/************************************************************************ 2407 * * 2408 * Extra stuff for namespace support * 2409 * Relates to http://www.w3.org/TR/WD-xml-names * 2410 * * 2411 ************************************************************************/ 2412 2413/** 2414 * xmlSplitQName: 2415 * @ctxt: an XML parser context 2416 * @name: an XML parser context 2417 * @prefix: a xmlChar ** 2418 * 2419 * parse an UTF8 encoded XML qualified name string 2420 * 2421 * [NS 5] QName ::= (Prefix ':')? LocalPart 2422 * 2423 * [NS 6] Prefix ::= NCName 2424 * 2425 * [NS 7] LocalPart ::= NCName 2426 * 2427 * Returns the local part, and prefix is updated 2428 * to get the Prefix if any. 2429 */ 2430 2431xmlChar * 2432xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { 2433 xmlChar buf[XML_MAX_NAMELEN + 5]; 2434 xmlChar *buffer = NULL; 2435 int len = 0; 2436 int max = XML_MAX_NAMELEN; 2437 xmlChar *ret = NULL; 2438 const xmlChar *cur = name; 2439 int c; 2440 2441 if (prefix == NULL) return(NULL); 2442 *prefix = NULL; 2443 2444 if (cur == NULL) return(NULL); 2445 2446#ifndef XML_XML_NAMESPACE 2447 /* xml: prefix is not really a namespace */ 2448 if ((cur[0] == 'x') && (cur[1] == 'm') && 2449 (cur[2] == 'l') && (cur[3] == ':')) 2450 return(xmlStrdup(name)); 2451#endif 2452 2453 /* nasty but well=formed */ 2454 if (cur[0] == ':') 2455 return(xmlStrdup(name)); 2456 2457 c = *cur++; 2458 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ 2459 buf[len++] = c; 2460 c = *cur++; 2461 } 2462 if (len >= max) { 2463 /* 2464 * Okay someone managed to make a huge name, so he's ready to pay 2465 * for the processing speed. 2466 */ 2467 max = len * 2; 2468 2469 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 2470 if (buffer == NULL) { 2471 xmlErrMemory(ctxt, NULL); 2472 return(NULL); 2473 } 2474 memcpy(buffer, buf, len); 2475 while ((c != 0) && (c != ':')) { /* tested bigname.xml */ 2476 if (len + 10 > max) { 2477 xmlChar *tmp; 2478 2479 max *= 2; 2480 tmp = (xmlChar *) xmlRealloc(buffer, 2481 max * sizeof(xmlChar)); 2482 if (tmp == NULL) { 2483 xmlFree(tmp); 2484 xmlErrMemory(ctxt, NULL); 2485 return(NULL); 2486 } 2487 buffer = tmp; 2488 } 2489 buffer[len++] = c; 2490 c = *cur++; 2491 } 2492 buffer[len] = 0; 2493 } 2494 2495 if ((c == ':') && (*cur == 0)) { 2496 if (buffer != NULL) 2497 xmlFree(buffer); 2498 *prefix = NULL; 2499 return(xmlStrdup(name)); 2500 } 2501 2502 if (buffer == NULL) 2503 ret = xmlStrndup(buf, len); 2504 else { 2505 ret = buffer; 2506 buffer = NULL; 2507 max = XML_MAX_NAMELEN; 2508 } 2509 2510 2511 if (c == ':') { 2512 c = *cur; 2513 *prefix = ret; 2514 if (c == 0) { 2515 return(xmlStrndup(BAD_CAST "", 0)); 2516 } 2517 len = 0; 2518 2519 /* 2520 * Check that the first character is proper to start 2521 * a new name 2522 */ 2523 if (!(((c >= 0x61) && (c <= 0x7A)) || 2524 ((c >= 0x41) && (c <= 0x5A)) || 2525 (c == '_') || (c == ':'))) { 2526 int l; 2527 int first = CUR_SCHAR(cur, l); 2528 2529 if (!IS_LETTER(first) && (first != '_')) { 2530 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME, 2531 "Name %s is not XML Namespace compliant\n", 2532 name); 2533 } 2534 } 2535 cur++; 2536 2537 while ((c != 0) && (len < max)) { /* tested bigname2.xml */ 2538 buf[len++] = c; 2539 c = *cur++; 2540 } 2541 if (len >= max) { 2542 /* 2543 * Okay someone managed to make a huge name, so he's ready to pay 2544 * for the processing speed. 2545 */ 2546 max = len * 2; 2547 2548 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 2549 if (buffer == NULL) { 2550 xmlErrMemory(ctxt, NULL); 2551 return(NULL); 2552 } 2553 memcpy(buffer, buf, len); 2554 while (c != 0) { /* tested bigname2.xml */ 2555 if (len + 10 > max) { 2556 xmlChar *tmp; 2557 2558 max *= 2; 2559 tmp = (xmlChar *) xmlRealloc(buffer, 2560 max * sizeof(xmlChar)); 2561 if (tmp == NULL) { 2562 xmlErrMemory(ctxt, NULL); 2563 xmlFree(buffer); 2564 return(NULL); 2565 } 2566 buffer = tmp; 2567 } 2568 buffer[len++] = c; 2569 c = *cur++; 2570 } 2571 buffer[len] = 0; 2572 } 2573 2574 if (buffer == NULL) 2575 ret = xmlStrndup(buf, len); 2576 else { 2577 ret = buffer; 2578 } 2579 } 2580 2581 return(ret); 2582} 2583 2584/************************************************************************ 2585 * * 2586 * The parser itself * 2587 * Relates to http://www.w3.org/TR/REC-xml * 2588 * * 2589 ************************************************************************/ 2590 2591static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt); 2592static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, 2593 int *len, int *alloc, int normalize); 2594 2595/** 2596 * xmlParseName: 2597 * @ctxt: an XML parser context 2598 * 2599 * parse an XML name. 2600 * 2601 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 2602 * CombiningChar | Extender 2603 * 2604 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 2605 * 2606 * [6] Names ::= Name (#x20 Name)* 2607 * 2608 * Returns the Name parsed or NULL 2609 */ 2610 2611const xmlChar * 2612xmlParseName(xmlParserCtxtPtr ctxt) { 2613 const xmlChar *in; 2614 const xmlChar *ret; 2615 int count = 0; 2616 2617 GROW; 2618 2619 /* 2620 * Accelerator for simple ASCII names 2621 */ 2622 in = ctxt->input->cur; 2623 if (((*in >= 0x61) && (*in <= 0x7A)) || 2624 ((*in >= 0x41) && (*in <= 0x5A)) || 2625 (*in == '_') || (*in == ':')) { 2626 in++; 2627 while (((*in >= 0x61) && (*in <= 0x7A)) || 2628 ((*in >= 0x41) && (*in <= 0x5A)) || 2629 ((*in >= 0x30) && (*in <= 0x39)) || 2630 (*in == '_') || (*in == '-') || 2631 (*in == ':') || (*in == '.')) 2632 in++; 2633 if ((*in > 0) && (*in < 0x80)) { 2634 count = in - ctxt->input->cur; 2635 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 2636 ctxt->input->cur = in; 2637 ctxt->nbChars += count; 2638 ctxt->input->col += count; 2639 if (ret == NULL) 2640 xmlErrMemory(ctxt, NULL); 2641 return(ret); 2642 } 2643 } 2644 return(xmlParseNameComplex(ctxt)); 2645} 2646 2647/** 2648 * xmlParseNameAndCompare: 2649 * @ctxt: an XML parser context 2650 * 2651 * parse an XML name and compares for match 2652 * (specialized for endtag parsing) 2653 * 2654 * Returns NULL for an illegal name, (xmlChar*) 1 for success 2655 * and the name for mismatch 2656 */ 2657 2658static const xmlChar * 2659xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { 2660 register const xmlChar *cmp = other; 2661 register const xmlChar *in; 2662 const xmlChar *ret; 2663 2664 GROW; 2665 2666 in = ctxt->input->cur; 2667 while (*in != 0 && *in == *cmp) { 2668 ++in; 2669 ++cmp; 2670 ctxt->input->col++; 2671 } 2672 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 2673 /* success */ 2674 ctxt->input->cur = in; 2675 return (const xmlChar*) 1; 2676 } 2677 /* failure (or end of input buffer), check with full function */ 2678 ret = xmlParseName (ctxt); 2679 /* strings coming from the dictionnary direct compare possible */ 2680 if (ret == other) { 2681 return (const xmlChar*) 1; 2682 } 2683 return ret; 2684} 2685 2686static const xmlChar * 2687xmlParseNameComplex(xmlParserCtxtPtr ctxt) { 2688 int len = 0, l; 2689 int c; 2690 int count = 0; 2691 2692 /* 2693 * Handler for more complex cases 2694 */ 2695 GROW; 2696 c = CUR_CHAR(l); 2697 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 2698 (!IS_LETTER(c) && (c != '_') && 2699 (c != ':'))) { 2700 return(NULL); 2701 } 2702 2703 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 2704 ((IS_LETTER(c)) || (IS_DIGIT(c)) || 2705 (c == '.') || (c == '-') || 2706 (c == '_') || (c == ':') || 2707 (IS_COMBINING(c)) || 2708 (IS_EXTENDER(c)))) { 2709 if (count++ > 100) { 2710 count = 0; 2711 GROW; 2712 } 2713 len += l; 2714 NEXTL(l); 2715 c = CUR_CHAR(l); 2716 } 2717 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r')) 2718 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len)); 2719 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 2720} 2721 2722/** 2723 * xmlParseStringName: 2724 * @ctxt: an XML parser context 2725 * @str: a pointer to the string pointer (IN/OUT) 2726 * 2727 * parse an XML name. 2728 * 2729 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | 2730 * CombiningChar | Extender 2731 * 2732 * [5] Name ::= (Letter | '_' | ':') (NameChar)* 2733 * 2734 * [6] Names ::= Name (#x20 Name)* 2735 * 2736 * Returns the Name parsed or NULL. The @str pointer 2737 * is updated to the current location in the string. 2738 */ 2739 2740static xmlChar * 2741xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { 2742 xmlChar buf[XML_MAX_NAMELEN + 5]; 2743 const xmlChar *cur = *str; 2744 int len = 0, l; 2745 int c; 2746 2747 c = CUR_SCHAR(cur, l); 2748 if (!IS_LETTER(c) && (c != '_') && 2749 (c != ':')) { 2750 return(NULL); 2751 } 2752 2753 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */ 2754 (c == '.') || (c == '-') || 2755 (c == '_') || (c == ':') || 2756 (IS_COMBINING(c)) || 2757 (IS_EXTENDER(c))) { 2758 COPY_BUF(l,buf,len,c); 2759 cur += l; 2760 c = CUR_SCHAR(cur, l); 2761 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ 2762 /* 2763 * Okay someone managed to make a huge name, so he's ready to pay 2764 * for the processing speed. 2765 */ 2766 xmlChar *buffer; 2767 int max = len * 2; 2768 2769 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 2770 if (buffer == NULL) { 2771 xmlErrMemory(ctxt, NULL); 2772 return(NULL); 2773 } 2774 memcpy(buffer, buf, len); 2775 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || 2776 /* test bigentname.xml */ 2777 (c == '.') || (c == '-') || 2778 (c == '_') || (c == ':') || 2779 (IS_COMBINING(c)) || 2780 (IS_EXTENDER(c))) { 2781 if (len + 10 > max) { 2782 xmlChar *tmp; 2783 max *= 2; 2784 tmp = (xmlChar *) xmlRealloc(buffer, 2785 max * sizeof(xmlChar)); 2786 if (tmp == NULL) { 2787 xmlErrMemory(ctxt, NULL); 2788 xmlFree(buffer); 2789 return(NULL); 2790 } 2791 buffer = tmp; 2792 } 2793 COPY_BUF(l,buffer,len,c); 2794 cur += l; 2795 c = CUR_SCHAR(cur, l); 2796 } 2797 buffer[len] = 0; 2798 *str = cur; 2799 return(buffer); 2800 } 2801 } 2802 *str = cur; 2803 return(xmlStrndup(buf, len)); 2804} 2805 2806/** 2807 * xmlParseNmtoken: 2808 * @ctxt: an XML parser context 2809 * 2810 * parse an XML Nmtoken. 2811 * 2812 * [7] Nmtoken ::= (NameChar)+ 2813 * 2814 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)* 2815 * 2816 * Returns the Nmtoken parsed or NULL 2817 */ 2818 2819xmlChar * 2820xmlParseNmtoken(xmlParserCtxtPtr ctxt) { 2821 xmlChar buf[XML_MAX_NAMELEN + 5]; 2822 int len = 0, l; 2823 int c; 2824 int count = 0; 2825 2826 GROW; 2827 c = CUR_CHAR(l); 2828 2829 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ 2830 (c == '.') || (c == '-') || 2831 (c == '_') || (c == ':') || 2832 (IS_COMBINING(c)) || 2833 (IS_EXTENDER(c))) { 2834 if (count++ > 100) { 2835 count = 0; 2836 GROW; 2837 } 2838 COPY_BUF(l,buf,len,c); 2839 NEXTL(l); 2840 c = CUR_CHAR(l); 2841 if (len >= XML_MAX_NAMELEN) { 2842 /* 2843 * Okay someone managed to make a huge token, so he's ready to pay 2844 * for the processing speed. 2845 */ 2846 xmlChar *buffer; 2847 int max = len * 2; 2848 2849 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); 2850 if (buffer == NULL) { 2851 xmlErrMemory(ctxt, NULL); 2852 return(NULL); 2853 } 2854 memcpy(buffer, buf, len); 2855 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ 2856 (c == '.') || (c == '-') || 2857 (c == '_') || (c == ':') || 2858 (IS_COMBINING(c)) || 2859 (IS_EXTENDER(c))) { 2860 if (count++ > 100) { 2861 count = 0; 2862 GROW; 2863 } 2864 if (len + 10 > max) { 2865 xmlChar *tmp; 2866 2867 max *= 2; 2868 tmp = (xmlChar *) xmlRealloc(buffer, 2869 max * sizeof(xmlChar)); 2870 if (tmp == NULL) { 2871 xmlErrMemory(ctxt, NULL); 2872 xmlFree(buffer); 2873 return(NULL); 2874 } 2875 buffer = tmp; 2876 } 2877 COPY_BUF(l,buffer,len,c); 2878 NEXTL(l); 2879 c = CUR_CHAR(l); 2880 } 2881 buffer[len] = 0; 2882 return(buffer); 2883 } 2884 } 2885 if (len == 0) 2886 return(NULL); 2887 return(xmlStrndup(buf, len)); 2888} 2889 2890/** 2891 * xmlParseEntityValue: 2892 * @ctxt: an XML parser context 2893 * @orig: if non-NULL store a copy of the original entity value 2894 * 2895 * parse a value for ENTITY declarations 2896 * 2897 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | 2898 * "'" ([^%&'] | PEReference | Reference)* "'" 2899 * 2900 * Returns the EntityValue parsed with reference substituted or NULL 2901 */ 2902 2903xmlChar * 2904xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { 2905 xmlChar *buf = NULL; 2906 int len = 0; 2907 int size = XML_PARSER_BUFFER_SIZE; 2908 int c, l; 2909 xmlChar stop; 2910 xmlChar *ret = NULL; 2911 const xmlChar *cur = NULL; 2912 xmlParserInputPtr input; 2913 2914 if (RAW == '"') stop = '"'; 2915 else if (RAW == '\'') stop = '\''; 2916 else { 2917 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL); 2918 return(NULL); 2919 } 2920 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 2921 if (buf == NULL) { 2922 xmlErrMemory(ctxt, NULL); 2923 return(NULL); 2924 } 2925 2926 /* 2927 * The content of the entity definition is copied in a buffer. 2928 */ 2929 2930 ctxt->instate = XML_PARSER_ENTITY_VALUE; 2931 input = ctxt->input; 2932 GROW; 2933 NEXT; 2934 c = CUR_CHAR(l); 2935 /* 2936 * NOTE: 4.4.5 Included in Literal 2937 * When a parameter entity reference appears in a literal entity 2938 * value, ... a single or double quote character in the replacement 2939 * text is always treated as a normal data character and will not 2940 * terminate the literal. 2941 * In practice it means we stop the loop only when back at parsing 2942 * the initial entity and the quote is found 2943 */ 2944 while ((IS_CHAR(c)) && ((c != stop) || /* checked */ 2945 (ctxt->input != input))) { 2946 if (len + 5 >= size) { 2947 xmlChar *tmp; 2948 2949 size *= 2; 2950 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 2951 if (tmp == NULL) { 2952 xmlErrMemory(ctxt, NULL); 2953 xmlFree(buf); 2954 return(NULL); 2955 } 2956 buf = tmp; 2957 } 2958 COPY_BUF(l,buf,len,c); 2959 NEXTL(l); 2960 /* 2961 * Pop-up of finished entities. 2962 */ 2963 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */ 2964 xmlPopInput(ctxt); 2965 2966 GROW; 2967 c = CUR_CHAR(l); 2968 if (c == 0) { 2969 GROW; 2970 c = CUR_CHAR(l); 2971 } 2972 } 2973 buf[len] = 0; 2974 2975 /* 2976 * Raise problem w.r.t. '&' and '%' being used in non-entities 2977 * reference constructs. Note Charref will be handled in 2978 * xmlStringDecodeEntities() 2979 */ 2980 cur = buf; 2981 while (*cur != 0) { /* non input consuming */ 2982 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { 2983 xmlChar *name; 2984 xmlChar tmp = *cur; 2985 2986 cur++; 2987 name = xmlParseStringName(ctxt, &cur); 2988 if ((name == NULL) || (*cur != ';')) { 2989 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR, 2990 "EntityValue: '%c' forbidden except for entities references\n", 2991 tmp); 2992 } 2993 if ((tmp == '%') && (ctxt->inSubset == 1) && 2994 (ctxt->inputNr == 1)) { 2995 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL); 2996 } 2997 if (name != NULL) 2998 xmlFree(name); 2999 if (*cur == 0) 3000 break; 3001 } 3002 cur++; 3003 } 3004 3005 /* 3006 * Then PEReference entities are substituted. 3007 */ 3008 if (c != stop) { 3009 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL); 3010 xmlFree(buf); 3011 } else { 3012 NEXT; 3013 /* 3014 * NOTE: 4.4.7 Bypassed 3015 * When a general entity reference appears in the EntityValue in 3016 * an entity declaration, it is bypassed and left as is. 3017 * so XML_SUBSTITUTE_REF is not set here. 3018 */ 3019 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, 3020 0, 0, 0); 3021 if (orig != NULL) 3022 *orig = buf; 3023 else 3024 xmlFree(buf); 3025 } 3026 3027 return(ret); 3028} 3029 3030/** 3031 * xmlParseAttValueComplex: 3032 * @ctxt: an XML parser context 3033 * @len: the resulting attribute len 3034 * @normalize: wether to apply the inner normalization 3035 * 3036 * parse a value for an attribute, this is the fallback function 3037 * of xmlParseAttValue() when the attribute parsing requires handling 3038 * of non-ASCII characters, or normalization compaction. 3039 * 3040 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 3041 */ 3042static xmlChar * 3043xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { 3044 xmlChar limit = 0; 3045 xmlChar *buf = NULL; 3046 int len = 0; 3047 int buf_size = 0; 3048 int c, l, in_space = 0; 3049 xmlChar *current = NULL; 3050 xmlEntityPtr ent; 3051 3052 if (NXT(0) == '"') { 3053 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3054 limit = '"'; 3055 NEXT; 3056 } else if (NXT(0) == '\'') { 3057 limit = '\''; 3058 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 3059 NEXT; 3060 } else { 3061 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 3062 return(NULL); 3063 } 3064 3065 /* 3066 * allocate a translation buffer. 3067 */ 3068 buf_size = XML_PARSER_BUFFER_SIZE; 3069 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar)); 3070 if (buf == NULL) goto mem_error; 3071 3072 /* 3073 * OK loop until we reach one of the ending char or a size limit. 3074 */ 3075 c = CUR_CHAR(l); 3076 while ((NXT(0) != limit) && /* checked */ 3077 (c != '<')) { 3078 if (c == 0) break; 3079 if (c == '&') { 3080 in_space = 0; 3081 if (NXT(1) == '#') { 3082 int val = xmlParseCharRef(ctxt); 3083 3084 if (val == '&') { 3085 if (ctxt->replaceEntities) { 3086 if (len > buf_size - 10) { 3087 growBuffer(buf); 3088 } 3089 buf[len++] = '&'; 3090 } else { 3091 /* 3092 * The reparsing will be done in xmlStringGetNodeList() 3093 * called by the attribute() function in SAX.c 3094 */ 3095 if (len > buf_size - 10) { 3096 growBuffer(buf); 3097 } 3098 buf[len++] = '&'; 3099 buf[len++] = '#'; 3100 buf[len++] = '3'; 3101 buf[len++] = '8'; 3102 buf[len++] = ';'; 3103 } 3104 } else { 3105 if (len > buf_size - 10) { 3106 growBuffer(buf); 3107 } 3108 len += xmlCopyChar(0, &buf[len], val); 3109 } 3110 } else { 3111 ent = xmlParseEntityRef(ctxt); 3112 if ((ent != NULL) && 3113 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { 3114 if (len > buf_size - 10) { 3115 growBuffer(buf); 3116 } 3117 if ((ctxt->replaceEntities == 0) && 3118 (ent->content[0] == '&')) { 3119 buf[len++] = '&'; 3120 buf[len++] = '#'; 3121 buf[len++] = '3'; 3122 buf[len++] = '8'; 3123 buf[len++] = ';'; 3124 } else { 3125 buf[len++] = ent->content[0]; 3126 } 3127 } else if ((ent != NULL) && 3128 (ctxt->replaceEntities != 0)) { 3129 xmlChar *rep; 3130 3131 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { 3132 rep = xmlStringDecodeEntities(ctxt, ent->content, 3133 XML_SUBSTITUTE_REF, 3134 0, 0, 0); 3135 if (rep != NULL) { 3136 current = rep; 3137 while (*current != 0) { /* non input consuming */ 3138 buf[len++] = *current++; 3139 if (len > buf_size - 10) { 3140 growBuffer(buf); 3141 } 3142 } 3143 xmlFree(rep); 3144 } 3145 } else { 3146 if (len > buf_size - 10) { 3147 growBuffer(buf); 3148 } 3149 if (ent->content != NULL) 3150 buf[len++] = ent->content[0]; 3151 } 3152 } else if (ent != NULL) { 3153 int i = xmlStrlen(ent->name); 3154 const xmlChar *cur = ent->name; 3155 3156 /* 3157 * This may look absurd but is needed to detect 3158 * entities problems 3159 */ 3160 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && 3161 (ent->content != NULL)) { 3162 xmlChar *rep; 3163 rep = xmlStringDecodeEntities(ctxt, ent->content, 3164 XML_SUBSTITUTE_REF, 0, 0, 0); 3165 if (rep != NULL) 3166 xmlFree(rep); 3167 } 3168 3169 /* 3170 * Just output the reference 3171 */ 3172 buf[len++] = '&'; 3173 if (len > buf_size - i - 10) { 3174 growBuffer(buf); 3175 } 3176 for (;i > 0;i--) 3177 buf[len++] = *cur++; 3178 buf[len++] = ';'; 3179 } 3180 } 3181 } else { 3182 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { 3183 if ((len != 0) || (!normalize)) { 3184 if ((!normalize) || (!in_space)) { 3185 COPY_BUF(l,buf,len,0x20); 3186 if (len > buf_size - 10) { 3187 growBuffer(buf); 3188 } 3189 } 3190 in_space = 1; 3191 } 3192 } else { 3193 in_space = 0; 3194 COPY_BUF(l,buf,len,c); 3195 if (len > buf_size - 10) { 3196 growBuffer(buf); 3197 } 3198 } 3199 NEXTL(l); 3200 } 3201 GROW; 3202 c = CUR_CHAR(l); 3203 } 3204 if ((in_space) && (normalize)) { 3205 while (buf[len - 1] == 0x20) len--; 3206 } 3207 buf[len] = 0; 3208 if (RAW == '<') { 3209 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL); 3210 } else if (RAW != limit) { 3211 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, 3212 "AttValue: ' expected\n"); 3213 } else 3214 NEXT; 3215 if (attlen != NULL) *attlen = len; 3216 return(buf); 3217 3218mem_error: 3219 xmlErrMemory(ctxt, NULL); 3220 return(NULL); 3221} 3222 3223/** 3224 * xmlParseAttValue: 3225 * @ctxt: an XML parser context 3226 * 3227 * parse a value for an attribute 3228 * Note: the parser won't do substitution of entities here, this 3229 * will be handled later in xmlStringGetNodeList 3230 * 3231 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | 3232 * "'" ([^<&'] | Reference)* "'" 3233 * 3234 * 3.3.3 Attribute-Value Normalization: 3235 * Before the value of an attribute is passed to the application or 3236 * checked for validity, the XML processor must normalize it as follows: 3237 * - a character reference is processed by appending the referenced 3238 * character to the attribute value 3239 * - an entity reference is processed by recursively processing the 3240 * replacement text of the entity 3241 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 3242 * appending #x20 to the normalized value, except that only a single 3243 * #x20 is appended for a "#xD#xA" sequence that is part of an external 3244 * parsed entity or the literal entity value of an internal parsed entity 3245 * - other characters are processed by appending them to the normalized value 3246 * If the declared value is not CDATA, then the XML processor must further 3247 * process the normalized attribute value by discarding any leading and 3248 * trailing space (#x20) characters, and by replacing sequences of space 3249 * (#x20) characters by a single space (#x20) character. 3250 * All attributes for which no declaration has been read should be treated 3251 * by a non-validating parser as if declared CDATA. 3252 * 3253 * Returns the AttValue parsed or NULL. The value has to be freed by the caller. 3254 */ 3255 3256 3257xmlChar * 3258xmlParseAttValue(xmlParserCtxtPtr ctxt) { 3259 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL); 3260 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0)); 3261} 3262 3263/** 3264 * xmlParseSystemLiteral: 3265 * @ctxt: an XML parser context 3266 * 3267 * parse an XML Literal 3268 * 3269 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 3270 * 3271 * Returns the SystemLiteral parsed or NULL 3272 */ 3273 3274xmlChar * 3275xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { 3276 xmlChar *buf = NULL; 3277 int len = 0; 3278 int size = XML_PARSER_BUFFER_SIZE; 3279 int cur, l; 3280 xmlChar stop; 3281 int state = ctxt->instate; 3282 int count = 0; 3283 3284 SHRINK; 3285 if (RAW == '"') { 3286 NEXT; 3287 stop = '"'; 3288 } else if (RAW == '\'') { 3289 NEXT; 3290 stop = '\''; 3291 } else { 3292 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 3293 return(NULL); 3294 } 3295 3296 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3297 if (buf == NULL) { 3298 xmlErrMemory(ctxt, NULL); 3299 return(NULL); 3300 } 3301 ctxt->instate = XML_PARSER_SYSTEM_LITERAL; 3302 cur = CUR_CHAR(l); 3303 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ 3304 if (len + 5 >= size) { 3305 xmlChar *tmp; 3306 3307 size *= 2; 3308 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3309 if (tmp == NULL) { 3310 xmlFree(buf); 3311 xmlErrMemory(ctxt, NULL); 3312 ctxt->instate = (xmlParserInputState) state; 3313 return(NULL); 3314 } 3315 buf = tmp; 3316 } 3317 count++; 3318 if (count > 50) { 3319 GROW; 3320 count = 0; 3321 } 3322 COPY_BUF(l,buf,len,cur); 3323 NEXTL(l); 3324 cur = CUR_CHAR(l); 3325 if (cur == 0) { 3326 GROW; 3327 SHRINK; 3328 cur = CUR_CHAR(l); 3329 } 3330 } 3331 buf[len] = 0; 3332 ctxt->instate = (xmlParserInputState) state; 3333 if (!IS_CHAR(cur)) { 3334 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 3335 } else { 3336 NEXT; 3337 } 3338 return(buf); 3339} 3340 3341/** 3342 * xmlParsePubidLiteral: 3343 * @ctxt: an XML parser context 3344 * 3345 * parse an XML public literal 3346 * 3347 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 3348 * 3349 * Returns the PubidLiteral parsed or NULL. 3350 */ 3351 3352xmlChar * 3353xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { 3354 xmlChar *buf = NULL; 3355 int len = 0; 3356 int size = XML_PARSER_BUFFER_SIZE; 3357 xmlChar cur; 3358 xmlChar stop; 3359 int count = 0; 3360 xmlParserInputState oldstate = ctxt->instate; 3361 3362 SHRINK; 3363 if (RAW == '"') { 3364 NEXT; 3365 stop = '"'; 3366 } else if (RAW == '\'') { 3367 NEXT; 3368 stop = '\''; 3369 } else { 3370 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); 3371 return(NULL); 3372 } 3373 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3374 if (buf == NULL) { 3375 xmlErrMemory(ctxt, NULL); 3376 return(NULL); 3377 } 3378 ctxt->instate = XML_PARSER_PUBLIC_LITERAL; 3379 cur = CUR; 3380 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */ 3381 if (len + 1 >= size) { 3382 xmlChar *tmp; 3383 3384 size *= 2; 3385 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3386 if (tmp == NULL) { 3387 xmlErrMemory(ctxt, NULL); 3388 xmlFree(buf); 3389 return(NULL); 3390 } 3391 buf = tmp; 3392 } 3393 buf[len++] = cur; 3394 count++; 3395 if (count > 50) { 3396 GROW; 3397 count = 0; 3398 } 3399 NEXT; 3400 cur = CUR; 3401 if (cur == 0) { 3402 GROW; 3403 SHRINK; 3404 cur = CUR; 3405 } 3406 } 3407 buf[len] = 0; 3408 if (cur != stop) { 3409 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); 3410 } else { 3411 NEXT; 3412 } 3413 ctxt->instate = oldstate; 3414 return(buf); 3415} 3416 3417void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); 3418 3419/* 3420 * used for the test in the inner loop of the char data testing 3421 */ 3422static const unsigned char test_char_data[256] = { 3423 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3424 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */ 3425 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3426 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3427 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */ 3428 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 3429 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 3430 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */ 3431 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 3432 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 3433 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 3434 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */ 3435 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 3436 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 3437 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 3438 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 3439 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */ 3440 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3441 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3442 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3443 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3444 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3445 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3446 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3447 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3448 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3449 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3450 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3451 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3452 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3453 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3454 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 3455}; 3456 3457/** 3458 * xmlParseCharData: 3459 * @ctxt: an XML parser context 3460 * @cdata: int indicating whether we are within a CDATA section 3461 * 3462 * parse a CharData section. 3463 * if we are within a CDATA section ']]>' marks an end of section. 3464 * 3465 * The right angle bracket (>) may be represented using the string ">", 3466 * and must, for compatibility, be escaped using ">" or a character 3467 * reference when it appears in the string "]]>" in content, when that 3468 * string is not marking the end of a CDATA section. 3469 * 3470 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 3471 */ 3472 3473void 3474xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { 3475 const xmlChar *in; 3476 int nbchar = 0; 3477 int line = ctxt->input->line; 3478 int col = ctxt->input->col; 3479 int ccol; 3480 3481 SHRINK; 3482 GROW; 3483 /* 3484 * Accelerated common case where input don't need to be 3485 * modified before passing it to the handler. 3486 */ 3487 if (!cdata) { 3488 in = ctxt->input->cur; 3489 do { 3490get_more_space: 3491 while (*in == 0x20) in++; 3492 if (*in == 0xA) { 3493 do { 3494 ctxt->input->line++; ctxt->input->col = 1; 3495 in++; 3496 } while (*in == 0xA); 3497 goto get_more_space; 3498 } 3499 if (*in == '<') { 3500 nbchar = in - ctxt->input->cur; 3501 if (nbchar > 0) { 3502 const xmlChar *tmp = ctxt->input->cur; 3503 ctxt->input->cur = in; 3504 3505 if ((ctxt->sax != NULL) && 3506 (ctxt->sax->ignorableWhitespace != 3507 ctxt->sax->characters)) { 3508 if (areBlanks(ctxt, tmp, nbchar, 1)) { 3509 if (ctxt->sax->ignorableWhitespace != NULL) 3510 ctxt->sax->ignorableWhitespace(ctxt->userData, 3511 tmp, nbchar); 3512 } else { 3513 if (ctxt->sax->characters != NULL) 3514 ctxt->sax->characters(ctxt->userData, 3515 tmp, nbchar); 3516 if (*ctxt->space == -1) 3517 *ctxt->space = -2; 3518 } 3519 } else if ((ctxt->sax != NULL) && 3520 (ctxt->sax->characters != NULL)) { 3521 ctxt->sax->characters(ctxt->userData, 3522 tmp, nbchar); 3523 } 3524 } 3525 return; 3526 } 3527 3528get_more: 3529 ccol = ctxt->input->col; 3530 while (test_char_data[*in]) { 3531 in++; 3532 ccol++; 3533 } 3534 ctxt->input->col = ccol; 3535 if (*in == 0xA) { 3536 do { 3537 ctxt->input->line++; ctxt->input->col = 1; 3538 in++; 3539 } while (*in == 0xA); 3540 goto get_more; 3541 } 3542 if (*in == ']') { 3543 if ((in[1] == ']') && (in[2] == '>')) { 3544 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 3545 ctxt->input->cur = in; 3546 return; 3547 } 3548 in++; 3549 ctxt->input->col++; 3550 goto get_more; 3551 } 3552 nbchar = in - ctxt->input->cur; 3553 if (nbchar > 0) { 3554 if ((ctxt->sax != NULL) && 3555 (ctxt->sax->ignorableWhitespace != 3556 ctxt->sax->characters) && 3557 (IS_BLANK_CH(*ctxt->input->cur))) { 3558 const xmlChar *tmp = ctxt->input->cur; 3559 ctxt->input->cur = in; 3560 3561 if (areBlanks(ctxt, tmp, nbchar, 0)) { 3562 if (ctxt->sax->ignorableWhitespace != NULL) 3563 ctxt->sax->ignorableWhitespace(ctxt->userData, 3564 tmp, nbchar); 3565 } else { 3566 if (ctxt->sax->characters != NULL) 3567 ctxt->sax->characters(ctxt->userData, 3568 tmp, nbchar); 3569 if (*ctxt->space == -1) 3570 *ctxt->space = -2; 3571 } 3572 line = ctxt->input->line; 3573 col = ctxt->input->col; 3574 } else if (ctxt->sax != NULL) { 3575 if (ctxt->sax->characters != NULL) 3576 ctxt->sax->characters(ctxt->userData, 3577 ctxt->input->cur, nbchar); 3578 line = ctxt->input->line; 3579 col = ctxt->input->col; 3580 } 3581 } 3582 ctxt->input->cur = in; 3583 if (*in == 0xD) { 3584 in++; 3585 if (*in == 0xA) { 3586 ctxt->input->cur = in; 3587 in++; 3588 ctxt->input->line++; ctxt->input->col = 1; 3589 continue; /* while */ 3590 } 3591 in--; 3592 } 3593 if (*in == '<') { 3594 return; 3595 } 3596 if (*in == '&') { 3597 return; 3598 } 3599 SHRINK; 3600 GROW; 3601 in = ctxt->input->cur; 3602 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 3603 nbchar = 0; 3604 } 3605 ctxt->input->line = line; 3606 ctxt->input->col = col; 3607 xmlParseCharDataComplex(ctxt, cdata); 3608} 3609 3610/** 3611 * xmlParseCharDataComplex: 3612 * @ctxt: an XML parser context 3613 * @cdata: int indicating whether we are within a CDATA section 3614 * 3615 * parse a CharData section.this is the fallback function 3616 * of xmlParseCharData() when the parsing requires handling 3617 * of non-ASCII characters. 3618 */ 3619void 3620xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { 3621 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; 3622 int nbchar = 0; 3623 int cur, l; 3624 int count = 0; 3625 3626 SHRINK; 3627 GROW; 3628 cur = CUR_CHAR(l); 3629 while ((cur != '<') && /* checked */ 3630 (cur != '&') && 3631 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { 3632 if ((cur == ']') && (NXT(1) == ']') && 3633 (NXT(2) == '>')) { 3634 if (cdata) break; 3635 else { 3636 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); 3637 } 3638 } 3639 COPY_BUF(l,buf,nbchar,cur); 3640 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { 3641 buf[nbchar] = 0; 3642 3643 /* 3644 * OK the segment is to be consumed as chars. 3645 */ 3646 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 3647 if (areBlanks(ctxt, buf, nbchar, 0)) { 3648 if (ctxt->sax->ignorableWhitespace != NULL) 3649 ctxt->sax->ignorableWhitespace(ctxt->userData, 3650 buf, nbchar); 3651 } else { 3652 if (ctxt->sax->characters != NULL) 3653 ctxt->sax->characters(ctxt->userData, buf, nbchar); 3654 if ((ctxt->sax->characters != 3655 ctxt->sax->ignorableWhitespace) && 3656 (*ctxt->space == -1)) 3657 *ctxt->space = -2; 3658 } 3659 } 3660 nbchar = 0; 3661 } 3662 count++; 3663 if (count > 50) { 3664 GROW; 3665 count = 0; 3666 } 3667 NEXTL(l); 3668 cur = CUR_CHAR(l); 3669 } 3670 if (nbchar != 0) { 3671 buf[nbchar] = 0; 3672 /* 3673 * OK the segment is to be consumed as chars. 3674 */ 3675 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 3676 if (areBlanks(ctxt, buf, nbchar, 0)) { 3677 if (ctxt->sax->ignorableWhitespace != NULL) 3678 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); 3679 } else { 3680 if (ctxt->sax->characters != NULL) 3681 ctxt->sax->characters(ctxt->userData, buf, nbchar); 3682 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) && 3683 (*ctxt->space == -1)) 3684 *ctxt->space = -2; 3685 } 3686 } 3687 } 3688 if ((cur != 0) && (!IS_CHAR(cur))) { 3689 /* Generate the error and skip the offending character */ 3690 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, 3691 "PCDATA invalid Char value %d\n", 3692 cur); 3693 NEXTL(l); 3694 } 3695} 3696 3697/** 3698 * xmlParseExternalID: 3699 * @ctxt: an XML parser context 3700 * @publicID: a xmlChar** receiving PubidLiteral 3701 * @strict: indicate whether we should restrict parsing to only 3702 * production [75], see NOTE below 3703 * 3704 * Parse an External ID or a Public ID 3705 * 3706 * NOTE: Productions [75] and [83] interact badly since [75] can generate 3707 * 'PUBLIC' S PubidLiteral S SystemLiteral 3708 * 3709 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral 3710 * | 'PUBLIC' S PubidLiteral S SystemLiteral 3711 * 3712 * [83] PublicID ::= 'PUBLIC' S PubidLiteral 3713 * 3714 * Returns the function returns SystemLiteral and in the second 3715 * case publicID receives PubidLiteral, is strict is off 3716 * it is possible to return NULL and have publicID set. 3717 */ 3718 3719xmlChar * 3720xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { 3721 xmlChar *URI = NULL; 3722 3723 SHRINK; 3724 3725 *publicID = NULL; 3726 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) { 3727 SKIP(6); 3728 if (!IS_BLANK_CH(CUR)) { 3729 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 3730 "Space required after 'SYSTEM'\n"); 3731 } 3732 SKIP_BLANKS; 3733 URI = xmlParseSystemLiteral(ctxt); 3734 if (URI == NULL) { 3735 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 3736 } 3737 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) { 3738 SKIP(6); 3739 if (!IS_BLANK_CH(CUR)) { 3740 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 3741 "Space required after 'PUBLIC'\n"); 3742 } 3743 SKIP_BLANKS; 3744 *publicID = xmlParsePubidLiteral(ctxt); 3745 if (*publicID == NULL) { 3746 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL); 3747 } 3748 if (strict) { 3749 /* 3750 * We don't handle [83] so "S SystemLiteral" is required. 3751 */ 3752 if (!IS_BLANK_CH(CUR)) { 3753 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 3754 "Space required after the Public Identifier\n"); 3755 } 3756 } else { 3757 /* 3758 * We handle [83] so we return immediately, if 3759 * "S SystemLiteral" is not detected. From a purely parsing 3760 * point of view that's a nice mess. 3761 */ 3762 const xmlChar *ptr; 3763 GROW; 3764 3765 ptr = CUR_PTR; 3766 if (!IS_BLANK_CH(*ptr)) return(NULL); 3767 3768 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */ 3769 if ((*ptr != '\'') && (*ptr != '"')) return(NULL); 3770 } 3771 SKIP_BLANKS; 3772 URI = xmlParseSystemLiteral(ctxt); 3773 if (URI == NULL) { 3774 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); 3775 } 3776 } 3777 return(URI); 3778} 3779 3780/** 3781 * xmlParseCommentComplex: 3782 * @ctxt: an XML parser context 3783 * @buf: the already parsed part of the buffer 3784 * @len: number of bytes filles in the buffer 3785 * @size: allocated size of the buffer 3786 * 3787 * Skip an XML (SGML) comment <!-- .... --> 3788 * The spec says that "For compatibility, the string "--" (double-hyphen) 3789 * must not occur within comments. " 3790 * This is the slow routine in case the accelerator for ascii didn't work 3791 * 3792 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 3793 */ 3794static void 3795xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) { 3796 int q, ql; 3797 int r, rl; 3798 int cur, l; 3799 xmlParserInputPtr input = ctxt->input; 3800 int count = 0; 3801 3802 if (buf == NULL) { 3803 len = 0; 3804 size = XML_PARSER_BUFFER_SIZE; 3805 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3806 if (buf == NULL) { 3807 xmlErrMemory(ctxt, NULL); 3808 return; 3809 } 3810 } 3811 q = CUR_CHAR(ql); 3812 if (q == 0) 3813 goto not_terminated; 3814 NEXTL(ql); 3815 r = CUR_CHAR(rl); 3816 if (r == 0) 3817 goto not_terminated; 3818 NEXTL(rl); 3819 cur = CUR_CHAR(l); 3820 if (cur == 0) 3821 goto not_terminated; 3822 while (IS_CHAR(cur) && /* checked */ 3823 ((cur != '>') || 3824 (r != '-') || (q != '-'))) { 3825 if ((r == '-') && (q == '-')) { 3826 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL); 3827 } 3828 if (len + 5 >= size) { 3829 xmlChar *new_buf; 3830 size *= 2; 3831 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3832 if (new_buf == NULL) { 3833 xmlFree (buf); 3834 xmlErrMemory(ctxt, NULL); 3835 return; 3836 } 3837 buf = new_buf; 3838 } 3839 COPY_BUF(ql,buf,len,q); 3840 q = r; 3841 ql = rl; 3842 r = cur; 3843 rl = l; 3844 3845 count++; 3846 if (count > 50) { 3847 GROW; 3848 count = 0; 3849 } 3850 NEXTL(l); 3851 cur = CUR_CHAR(l); 3852 if (cur == 0) { 3853 SHRINK; 3854 GROW; 3855 cur = CUR_CHAR(l); 3856 } 3857 } 3858 buf[len] = 0; 3859 if (!IS_CHAR(cur)) { 3860 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 3861 "Comment not terminated \n<!--%.50s\n", buf); 3862 xmlFree(buf); 3863 } else { 3864 if (input != ctxt->input) { 3865 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 3866 "Comment doesn't start and stop in the same entity\n"); 3867 } 3868 NEXT; 3869 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 3870 (!ctxt->disableSAX)) 3871 ctxt->sax->comment(ctxt->userData, buf); 3872 xmlFree(buf); 3873 } 3874 return; 3875not_terminated: 3876 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 3877 "Comment not terminated\n", NULL); 3878 xmlFree(buf); 3879} 3880/** 3881 * xmlParseComment: 3882 * @ctxt: an XML parser context 3883 * 3884 * Skip an XML (SGML) comment <!-- .... --> 3885 * The spec says that "For compatibility, the string "--" (double-hyphen) 3886 * must not occur within comments. " 3887 * 3888 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 3889 */ 3890void 3891xmlParseComment(xmlParserCtxtPtr ctxt) { 3892 xmlChar *buf = NULL; 3893 int size = XML_PARSER_BUFFER_SIZE; 3894 int len = 0; 3895 xmlParserInputState state; 3896 const xmlChar *in; 3897 int nbchar = 0, ccol; 3898 3899 /* 3900 * Check that there is a comment right here. 3901 */ 3902 if ((RAW != '<') || (NXT(1) != '!') || 3903 (NXT(2) != '-') || (NXT(3) != '-')) return; 3904 3905 state = ctxt->instate; 3906 ctxt->instate = XML_PARSER_COMMENT; 3907 SKIP(4); 3908 SHRINK; 3909 GROW; 3910 3911 /* 3912 * Accelerated common case where input don't need to be 3913 * modified before passing it to the handler. 3914 */ 3915 in = ctxt->input->cur; 3916 do { 3917 if (*in == 0xA) { 3918 do { 3919 ctxt->input->line++; ctxt->input->col = 1; 3920 in++; 3921 } while (*in == 0xA); 3922 } 3923get_more: 3924 ccol = ctxt->input->col; 3925 while (((*in > '-') && (*in <= 0x7F)) || 3926 ((*in >= 0x20) && (*in < '-')) || 3927 (*in == 0x09)) { 3928 in++; 3929 ccol++; 3930 } 3931 ctxt->input->col = ccol; 3932 if (*in == 0xA) { 3933 do { 3934 ctxt->input->line++; ctxt->input->col = 1; 3935 in++; 3936 } while (*in == 0xA); 3937 goto get_more; 3938 } 3939 nbchar = in - ctxt->input->cur; 3940 /* 3941 * save current set of data 3942 */ 3943 if (nbchar > 0) { 3944 if ((ctxt->sax != NULL) && 3945 (ctxt->sax->comment != NULL)) { 3946 if (buf == NULL) { 3947 if ((*in == '-') && (in[1] == '-')) 3948 size = nbchar + 1; 3949 else 3950 size = XML_PARSER_BUFFER_SIZE + nbchar; 3951 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3952 if (buf == NULL) { 3953 xmlErrMemory(ctxt, NULL); 3954 ctxt->instate = state; 3955 return; 3956 } 3957 len = 0; 3958 } else if (len + nbchar + 1 >= size) { 3959 xmlChar *new_buf; 3960 size += len + nbchar + XML_PARSER_BUFFER_SIZE; 3961 new_buf = (xmlChar *) xmlRealloc(buf, 3962 size * sizeof(xmlChar)); 3963 if (new_buf == NULL) { 3964 xmlFree (buf); 3965 xmlErrMemory(ctxt, NULL); 3966 ctxt->instate = state; 3967 return; 3968 } 3969 buf = new_buf; 3970 } 3971 memcpy(&buf[len], ctxt->input->cur, nbchar); 3972 len += nbchar; 3973 buf[len] = 0; 3974 } 3975 } 3976 ctxt->input->cur = in; 3977 if (*in == 0xA) { 3978 in++; 3979 ctxt->input->line++; ctxt->input->col = 1; 3980 } 3981 if (*in == 0xD) { 3982 in++; 3983 if (*in == 0xA) { 3984 ctxt->input->cur = in; 3985 in++; 3986 ctxt->input->line++; ctxt->input->col = 1; 3987 continue; /* while */ 3988 } 3989 in--; 3990 } 3991 SHRINK; 3992 GROW; 3993 in = ctxt->input->cur; 3994 if (*in == '-') { 3995 if (in[1] == '-') { 3996 if (in[2] == '>') { 3997 SKIP(3); 3998 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 3999 (!ctxt->disableSAX)) { 4000 if (buf != NULL) 4001 ctxt->sax->comment(ctxt->userData, buf); 4002 else 4003 ctxt->sax->comment(ctxt->userData, BAD_CAST ""); 4004 } 4005 if (buf != NULL) 4006 xmlFree(buf); 4007 ctxt->instate = state; 4008 return; 4009 } 4010 if (buf != NULL) 4011 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4012 "Comment not terminated \n<!--%.50s\n", 4013 buf); 4014 else 4015 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, 4016 "Comment not terminated \n", NULL); 4017 in++; 4018 ctxt->input->col++; 4019 } 4020 in++; 4021 ctxt->input->col++; 4022 goto get_more; 4023 } 4024 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); 4025 xmlParseCommentComplex(ctxt, buf, len, size); 4026 ctxt->instate = state; 4027 return; 4028} 4029 4030 4031/** 4032 * xmlParsePITarget: 4033 * @ctxt: an XML parser context 4034 * 4035 * parse the name of a PI 4036 * 4037 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 4038 * 4039 * Returns the PITarget name or NULL 4040 */ 4041 4042const xmlChar * 4043xmlParsePITarget(xmlParserCtxtPtr ctxt) { 4044 const xmlChar *name; 4045 4046 name = xmlParseName(ctxt); 4047 if ((name != NULL) && 4048 ((name[0] == 'x') || (name[0] == 'X')) && 4049 ((name[1] == 'm') || (name[1] == 'M')) && 4050 ((name[2] == 'l') || (name[2] == 'L'))) { 4051 int i; 4052 if ((name[0] == 'x') && (name[1] == 'm') && 4053 (name[2] == 'l') && (name[3] == 0)) { 4054 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 4055 "XML declaration allowed only at the start of the document\n"); 4056 return(name); 4057 } else if (name[3] == 0) { 4058 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL); 4059 return(name); 4060 } 4061 for (i = 0;;i++) { 4062 if (xmlW3CPIs[i] == NULL) break; 4063 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i])) 4064 return(name); 4065 } 4066 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME, 4067 "xmlParsePITarget: invalid name prefix 'xml'\n", 4068 NULL, NULL); 4069 } 4070 return(name); 4071} 4072 4073#ifdef LIBXML_CATALOG_ENABLED 4074/** 4075 * xmlParseCatalogPI: 4076 * @ctxt: an XML parser context 4077 * @catalog: the PI value string 4078 * 4079 * parse an XML Catalog Processing Instruction. 4080 * 4081 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?> 4082 * 4083 * Occurs only if allowed by the user and if happening in the Misc 4084 * part of the document before any doctype informations 4085 * This will add the given catalog to the parsing context in order 4086 * to be used if there is a resolution need further down in the document 4087 */ 4088 4089static void 4090xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) { 4091 xmlChar *URL = NULL; 4092 const xmlChar *tmp, *base; 4093 xmlChar marker; 4094 4095 tmp = catalog; 4096 while (IS_BLANK_CH(*tmp)) tmp++; 4097 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7)) 4098 goto error; 4099 tmp += 7; 4100 while (IS_BLANK_CH(*tmp)) tmp++; 4101 if (*tmp != '=') { 4102 return; 4103 } 4104 tmp++; 4105 while (IS_BLANK_CH(*tmp)) tmp++; 4106 marker = *tmp; 4107 if ((marker != '\'') && (marker != '"')) 4108 goto error; 4109 tmp++; 4110 base = tmp; 4111 while ((*tmp != 0) && (*tmp != marker)) tmp++; 4112 if (*tmp == 0) 4113 goto error; 4114 URL = xmlStrndup(base, tmp - base); 4115 tmp++; 4116 while (IS_BLANK_CH(*tmp)) tmp++; 4117 if (*tmp != 0) 4118 goto error; 4119 4120 if (URL != NULL) { 4121 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL); 4122 xmlFree(URL); 4123 } 4124 return; 4125 4126error: 4127 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI, 4128 "Catalog PI syntax error: %s\n", 4129 catalog, NULL); 4130 if (URL != NULL) 4131 xmlFree(URL); 4132} 4133#endif 4134 4135/** 4136 * xmlParsePI: 4137 * @ctxt: an XML parser context 4138 * 4139 * parse an XML Processing Instruction. 4140 * 4141 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 4142 * 4143 * The processing is transfered to SAX once parsed. 4144 */ 4145 4146void 4147xmlParsePI(xmlParserCtxtPtr ctxt) { 4148 xmlChar *buf = NULL; 4149 int len = 0; 4150 int size = XML_PARSER_BUFFER_SIZE; 4151 int cur, l; 4152 const xmlChar *target; 4153 xmlParserInputState state; 4154 int count = 0; 4155 4156 if ((RAW == '<') && (NXT(1) == '?')) { 4157 xmlParserInputPtr input = ctxt->input; 4158 state = ctxt->instate; 4159 ctxt->instate = XML_PARSER_PI; 4160 /* 4161 * this is a Processing Instruction. 4162 */ 4163 SKIP(2); 4164 SHRINK; 4165 4166 /* 4167 * Parse the target name and check for special support like 4168 * namespace. 4169 */ 4170 target = xmlParsePITarget(ctxt); 4171 if (target != NULL) { 4172 if ((RAW == '?') && (NXT(1) == '>')) { 4173 if (input != ctxt->input) { 4174 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4175 "PI declaration doesn't start and stop in the same entity\n"); 4176 } 4177 SKIP(2); 4178 4179 /* 4180 * SAX: PI detected. 4181 */ 4182 if ((ctxt->sax) && (!ctxt->disableSAX) && 4183 (ctxt->sax->processingInstruction != NULL)) 4184 ctxt->sax->processingInstruction(ctxt->userData, 4185 target, NULL); 4186 ctxt->instate = state; 4187 return; 4188 } 4189 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 4190 if (buf == NULL) { 4191 xmlErrMemory(ctxt, NULL); 4192 ctxt->instate = state; 4193 return; 4194 } 4195 cur = CUR; 4196 if (!IS_BLANK(cur)) { 4197 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED, 4198 "ParsePI: PI %s space expected\n", target); 4199 } 4200 SKIP_BLANKS; 4201 cur = CUR_CHAR(l); 4202 while (IS_CHAR(cur) && /* checked */ 4203 ((cur != '?') || (NXT(1) != '>'))) { 4204 if (len + 5 >= size) { 4205 xmlChar *tmp; 4206 4207 size *= 2; 4208 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 4209 if (tmp == NULL) { 4210 xmlErrMemory(ctxt, NULL); 4211 xmlFree(buf); 4212 ctxt->instate = state; 4213 return; 4214 } 4215 buf = tmp; 4216 } 4217 count++; 4218 if (count > 50) { 4219 GROW; 4220 count = 0; 4221 } 4222 COPY_BUF(l,buf,len,cur); 4223 NEXTL(l); 4224 cur = CUR_CHAR(l); 4225 if (cur == 0) { 4226 SHRINK; 4227 GROW; 4228 cur = CUR_CHAR(l); 4229 } 4230 } 4231 buf[len] = 0; 4232 if (cur != '?') { 4233 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, 4234 "ParsePI: PI %s never end ...\n", target); 4235 } else { 4236 if (input != ctxt->input) { 4237 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4238 "PI declaration doesn't start and stop in the same entity\n"); 4239 } 4240 SKIP(2); 4241 4242#ifdef LIBXML_CATALOG_ENABLED 4243 if (((state == XML_PARSER_MISC) || 4244 (state == XML_PARSER_START)) && 4245 (xmlStrEqual(target, XML_CATALOG_PI))) { 4246 xmlCatalogAllow allow = xmlCatalogGetDefaults(); 4247 if ((allow == XML_CATA_ALLOW_DOCUMENT) || 4248 (allow == XML_CATA_ALLOW_ALL)) 4249 xmlParseCatalogPI(ctxt, buf); 4250 } 4251#endif 4252 4253 4254 /* 4255 * SAX: PI detected. 4256 */ 4257 if ((ctxt->sax) && (!ctxt->disableSAX) && 4258 (ctxt->sax->processingInstruction != NULL)) 4259 ctxt->sax->processingInstruction(ctxt->userData, 4260 target, buf); 4261 } 4262 xmlFree(buf); 4263 } else { 4264 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL); 4265 } 4266 ctxt->instate = state; 4267 } 4268} 4269 4270/** 4271 * xmlParseNotationDecl: 4272 * @ctxt: an XML parser context 4273 * 4274 * parse a notation declaration 4275 * 4276 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' 4277 * 4278 * Hence there is actually 3 choices: 4279 * 'PUBLIC' S PubidLiteral 4280 * 'PUBLIC' S PubidLiteral S SystemLiteral 4281 * and 'SYSTEM' S SystemLiteral 4282 * 4283 * See the NOTE on xmlParseExternalID(). 4284 */ 4285 4286void 4287xmlParseNotationDecl(xmlParserCtxtPtr ctxt) { 4288 const xmlChar *name; 4289 xmlChar *Pubid; 4290 xmlChar *Systemid; 4291 4292 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 4293 xmlParserInputPtr input = ctxt->input; 4294 SHRINK; 4295 SKIP(10); 4296 if (!IS_BLANK_CH(CUR)) { 4297 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4298 "Space required after '<!NOTATION'\n"); 4299 return; 4300 } 4301 SKIP_BLANKS; 4302 4303 name = xmlParseName(ctxt); 4304 if (name == NULL) { 4305 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 4306 return; 4307 } 4308 if (!IS_BLANK_CH(CUR)) { 4309 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4310 "Space required after the NOTATION name'\n"); 4311 return; 4312 } 4313 SKIP_BLANKS; 4314 4315 /* 4316 * Parse the IDs. 4317 */ 4318 Systemid = xmlParseExternalID(ctxt, &Pubid, 0); 4319 SKIP_BLANKS; 4320 4321 if (RAW == '>') { 4322 if (input != ctxt->input) { 4323 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4324 "Notation declaration doesn't start and stop in the same entity\n"); 4325 } 4326 NEXT; 4327 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 4328 (ctxt->sax->notationDecl != NULL)) 4329 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid); 4330 } else { 4331 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 4332 } 4333 if (Systemid != NULL) xmlFree(Systemid); 4334 if (Pubid != NULL) xmlFree(Pubid); 4335 } 4336} 4337 4338/** 4339 * xmlParseEntityDecl: 4340 * @ctxt: an XML parser context 4341 * 4342 * parse <!ENTITY declarations 4343 * 4344 * [70] EntityDecl ::= GEDecl | PEDecl 4345 * 4346 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 4347 * 4348 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 4349 * 4350 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 4351 * 4352 * [74] PEDef ::= EntityValue | ExternalID 4353 * 4354 * [76] NDataDecl ::= S 'NDATA' S Name 4355 * 4356 * [ VC: Notation Declared ] 4357 * The Name must match the declared name of a notation. 4358 */ 4359 4360void 4361xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { 4362 const xmlChar *name = NULL; 4363 xmlChar *value = NULL; 4364 xmlChar *URI = NULL, *literal = NULL; 4365 const xmlChar *ndata = NULL; 4366 int isParameter = 0; 4367 xmlChar *orig = NULL; 4368 int skipped; 4369 4370 /* GROW; done in the caller */ 4371 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) { 4372 xmlParserInputPtr input = ctxt->input; 4373 SHRINK; 4374 SKIP(8); 4375 skipped = SKIP_BLANKS; 4376 if (skipped == 0) { 4377 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4378 "Space required after '<!ENTITY'\n"); 4379 } 4380 4381 if (RAW == '%') { 4382 NEXT; 4383 skipped = SKIP_BLANKS; 4384 if (skipped == 0) { 4385 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4386 "Space required after '%'\n"); 4387 } 4388 isParameter = 1; 4389 } 4390 4391 name = xmlParseName(ctxt); 4392 if (name == NULL) { 4393 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 4394 "xmlParseEntityDecl: no name\n"); 4395 return; 4396 } 4397 skipped = SKIP_BLANKS; 4398 if (skipped == 0) { 4399 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4400 "Space required after the entity name\n"); 4401 } 4402 4403 ctxt->instate = XML_PARSER_ENTITY_DECL; 4404 /* 4405 * handle the various case of definitions... 4406 */ 4407 if (isParameter) { 4408 if ((RAW == '"') || (RAW == '\'')) { 4409 value = xmlParseEntityValue(ctxt, &orig); 4410 if (value) { 4411 if ((ctxt->sax != NULL) && 4412 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 4413 ctxt->sax->entityDecl(ctxt->userData, name, 4414 XML_INTERNAL_PARAMETER_ENTITY, 4415 NULL, NULL, value); 4416 } 4417 } else { 4418 URI = xmlParseExternalID(ctxt, &literal, 1); 4419 if ((URI == NULL) && (literal == NULL)) { 4420 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 4421 } 4422 if (URI) { 4423 xmlURIPtr uri; 4424 4425 uri = xmlParseURI((const char *) URI); 4426 if (uri == NULL) { 4427 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 4428 "Invalid URI: %s\n", URI); 4429 /* 4430 * This really ought to be a well formedness error 4431 * but the XML Core WG decided otherwise c.f. issue 4432 * E26 of the XML erratas. 4433 */ 4434 } else { 4435 if (uri->fragment != NULL) { 4436 /* 4437 * Okay this is foolish to block those but not 4438 * invalid URIs. 4439 */ 4440 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 4441 } else { 4442 if ((ctxt->sax != NULL) && 4443 (!ctxt->disableSAX) && 4444 (ctxt->sax->entityDecl != NULL)) 4445 ctxt->sax->entityDecl(ctxt->userData, name, 4446 XML_EXTERNAL_PARAMETER_ENTITY, 4447 literal, URI, NULL); 4448 } 4449 xmlFreeURI(uri); 4450 } 4451 } 4452 } 4453 } else { 4454 if ((RAW == '"') || (RAW == '\'')) { 4455 value = xmlParseEntityValue(ctxt, &orig); 4456 if ((ctxt->sax != NULL) && 4457 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 4458 ctxt->sax->entityDecl(ctxt->userData, name, 4459 XML_INTERNAL_GENERAL_ENTITY, 4460 NULL, NULL, value); 4461 /* 4462 * For expat compatibility in SAX mode. 4463 */ 4464 if ((ctxt->myDoc == NULL) || 4465 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 4466 if (ctxt->myDoc == NULL) { 4467 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 4468 } 4469 if (ctxt->myDoc->intSubset == NULL) 4470 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 4471 BAD_CAST "fake", NULL, NULL); 4472 4473 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY, 4474 NULL, NULL, value); 4475 } 4476 } else { 4477 URI = xmlParseExternalID(ctxt, &literal, 1); 4478 if ((URI == NULL) && (literal == NULL)) { 4479 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL); 4480 } 4481 if (URI) { 4482 xmlURIPtr uri; 4483 4484 uri = xmlParseURI((const char *)URI); 4485 if (uri == NULL) { 4486 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI, 4487 "Invalid URI: %s\n", URI); 4488 /* 4489 * This really ought to be a well formedness error 4490 * but the XML Core WG decided otherwise c.f. issue 4491 * E26 of the XML erratas. 4492 */ 4493 } else { 4494 if (uri->fragment != NULL) { 4495 /* 4496 * Okay this is foolish to block those but not 4497 * invalid URIs. 4498 */ 4499 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL); 4500 } 4501 xmlFreeURI(uri); 4502 } 4503 } 4504 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) { 4505 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4506 "Space required before 'NDATA'\n"); 4507 } 4508 SKIP_BLANKS; 4509 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) { 4510 SKIP(5); 4511 if (!IS_BLANK_CH(CUR)) { 4512 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4513 "Space required after 'NDATA'\n"); 4514 } 4515 SKIP_BLANKS; 4516 ndata = xmlParseName(ctxt); 4517 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 4518 (ctxt->sax->unparsedEntityDecl != NULL)) 4519 ctxt->sax->unparsedEntityDecl(ctxt->userData, name, 4520 literal, URI, ndata); 4521 } else { 4522 if ((ctxt->sax != NULL) && 4523 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) 4524 ctxt->sax->entityDecl(ctxt->userData, name, 4525 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 4526 literal, URI, NULL); 4527 /* 4528 * For expat compatibility in SAX mode. 4529 * assuming the entity repalcement was asked for 4530 */ 4531 if ((ctxt->replaceEntities != 0) && 4532 ((ctxt->myDoc == NULL) || 4533 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) { 4534 if (ctxt->myDoc == NULL) { 4535 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE); 4536 } 4537 4538 if (ctxt->myDoc->intSubset == NULL) 4539 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc, 4540 BAD_CAST "fake", NULL, NULL); 4541 xmlSAX2EntityDecl(ctxt, name, 4542 XML_EXTERNAL_GENERAL_PARSED_ENTITY, 4543 literal, URI, NULL); 4544 } 4545 } 4546 } 4547 } 4548 SKIP_BLANKS; 4549 if (RAW != '>') { 4550 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, 4551 "xmlParseEntityDecl: entity %s not terminated\n", name); 4552 } else { 4553 if (input != ctxt->input) { 4554 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4555 "Entity declaration doesn't start and stop in the same entity\n"); 4556 } 4557 NEXT; 4558 } 4559 if (orig != NULL) { 4560 /* 4561 * Ugly mechanism to save the raw entity value. 4562 */ 4563 xmlEntityPtr cur = NULL; 4564 4565 if (isParameter) { 4566 if ((ctxt->sax != NULL) && 4567 (ctxt->sax->getParameterEntity != NULL)) 4568 cur = ctxt->sax->getParameterEntity(ctxt->userData, name); 4569 } else { 4570 if ((ctxt->sax != NULL) && 4571 (ctxt->sax->getEntity != NULL)) 4572 cur = ctxt->sax->getEntity(ctxt->userData, name); 4573 if ((cur == NULL) && (ctxt->userData==ctxt)) { 4574 cur = xmlSAX2GetEntity(ctxt, name); 4575 } 4576 } 4577 if (cur != NULL) { 4578 if (cur->orig != NULL) 4579 xmlFree(orig); 4580 else 4581 cur->orig = orig; 4582 } else 4583 xmlFree(orig); 4584 } 4585 if (value != NULL) xmlFree(value); 4586 if (URI != NULL) xmlFree(URI); 4587 if (literal != NULL) xmlFree(literal); 4588 } 4589} 4590 4591/** 4592 * xmlParseDefaultDecl: 4593 * @ctxt: an XML parser context 4594 * @value: Receive a possible fixed default value for the attribute 4595 * 4596 * Parse an attribute default declaration 4597 * 4598 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) 4599 * 4600 * [ VC: Required Attribute ] 4601 * if the default declaration is the keyword #REQUIRED, then the 4602 * attribute must be specified for all elements of the type in the 4603 * attribute-list declaration. 4604 * 4605 * [ VC: Attribute Default Legal ] 4606 * The declared default value must meet the lexical constraints of 4607 * the declared attribute type c.f. xmlValidateAttributeDecl() 4608 * 4609 * [ VC: Fixed Attribute Default ] 4610 * if an attribute has a default value declared with the #FIXED 4611 * keyword, instances of that attribute must match the default value. 4612 * 4613 * [ WFC: No < in Attribute Values ] 4614 * handled in xmlParseAttValue() 4615 * 4616 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED 4617 * or XML_ATTRIBUTE_FIXED. 4618 */ 4619 4620int 4621xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { 4622 int val; 4623 xmlChar *ret; 4624 4625 *value = NULL; 4626 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) { 4627 SKIP(9); 4628 return(XML_ATTRIBUTE_REQUIRED); 4629 } 4630 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) { 4631 SKIP(8); 4632 return(XML_ATTRIBUTE_IMPLIED); 4633 } 4634 val = XML_ATTRIBUTE_NONE; 4635 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) { 4636 SKIP(6); 4637 val = XML_ATTRIBUTE_FIXED; 4638 if (!IS_BLANK_CH(CUR)) { 4639 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4640 "Space required after '#FIXED'\n"); 4641 } 4642 SKIP_BLANKS; 4643 } 4644 ret = xmlParseAttValue(ctxt); 4645 ctxt->instate = XML_PARSER_DTD; 4646 if (ret == NULL) { 4647 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo, 4648 "Attribute default value declaration error\n"); 4649 } else 4650 *value = ret; 4651 return(val); 4652} 4653 4654/** 4655 * xmlParseNotationType: 4656 * @ctxt: an XML parser context 4657 * 4658 * parse an Notation attribute type. 4659 * 4660 * Note: the leading 'NOTATION' S part has already being parsed... 4661 * 4662 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 4663 * 4664 * [ VC: Notation Attributes ] 4665 * Values of this type must match one of the notation names included 4666 * in the declaration; all notation names in the declaration must be declared. 4667 * 4668 * Returns: the notation attribute tree built while parsing 4669 */ 4670 4671xmlEnumerationPtr 4672xmlParseNotationType(xmlParserCtxtPtr ctxt) { 4673 const xmlChar *name; 4674 xmlEnumerationPtr ret = NULL, last = NULL, cur; 4675 4676 if (RAW != '(') { 4677 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL); 4678 return(NULL); 4679 } 4680 SHRINK; 4681 do { 4682 NEXT; 4683 SKIP_BLANKS; 4684 name = xmlParseName(ctxt); 4685 if (name == NULL) { 4686 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 4687 "Name expected in NOTATION declaration\n"); 4688 return(ret); 4689 } 4690 cur = xmlCreateEnumeration(name); 4691 if (cur == NULL) return(ret); 4692 if (last == NULL) ret = last = cur; 4693 else { 4694 last->next = cur; 4695 last = cur; 4696 } 4697 SKIP_BLANKS; 4698 } while (RAW == '|'); 4699 if (RAW != ')') { 4700 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL); 4701 if ((last != NULL) && (last != ret)) 4702 xmlFreeEnumeration(last); 4703 return(ret); 4704 } 4705 NEXT; 4706 return(ret); 4707} 4708 4709/** 4710 * xmlParseEnumerationType: 4711 * @ctxt: an XML parser context 4712 * 4713 * parse an Enumeration attribute type. 4714 * 4715 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' 4716 * 4717 * [ VC: Enumeration ] 4718 * Values of this type must match one of the Nmtoken tokens in 4719 * the declaration 4720 * 4721 * Returns: the enumeration attribute tree built while parsing 4722 */ 4723 4724xmlEnumerationPtr 4725xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { 4726 xmlChar *name; 4727 xmlEnumerationPtr ret = NULL, last = NULL, cur; 4728 4729 if (RAW != '(') { 4730 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL); 4731 return(NULL); 4732 } 4733 SHRINK; 4734 do { 4735 NEXT; 4736 SKIP_BLANKS; 4737 name = xmlParseNmtoken(ctxt); 4738 if (name == NULL) { 4739 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL); 4740 return(ret); 4741 } 4742 cur = xmlCreateEnumeration(name); 4743 xmlFree(name); 4744 if (cur == NULL) return(ret); 4745 if (last == NULL) ret = last = cur; 4746 else { 4747 last->next = cur; 4748 last = cur; 4749 } 4750 SKIP_BLANKS; 4751 } while (RAW == '|'); 4752 if (RAW != ')') { 4753 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL); 4754 return(ret); 4755 } 4756 NEXT; 4757 return(ret); 4758} 4759 4760/** 4761 * xmlParseEnumeratedType: 4762 * @ctxt: an XML parser context 4763 * @tree: the enumeration tree built while parsing 4764 * 4765 * parse an Enumerated attribute type. 4766 * 4767 * [57] EnumeratedType ::= NotationType | Enumeration 4768 * 4769 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 4770 * 4771 * 4772 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION 4773 */ 4774 4775int 4776xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 4777 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) { 4778 SKIP(8); 4779 if (!IS_BLANK_CH(CUR)) { 4780 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4781 "Space required after 'NOTATION'\n"); 4782 return(0); 4783 } 4784 SKIP_BLANKS; 4785 *tree = xmlParseNotationType(ctxt); 4786 if (*tree == NULL) return(0); 4787 return(XML_ATTRIBUTE_NOTATION); 4788 } 4789 *tree = xmlParseEnumerationType(ctxt); 4790 if (*tree == NULL) return(0); 4791 return(XML_ATTRIBUTE_ENUMERATION); 4792} 4793 4794/** 4795 * xmlParseAttributeType: 4796 * @ctxt: an XML parser context 4797 * @tree: the enumeration tree built while parsing 4798 * 4799 * parse the Attribute list def for an element 4800 * 4801 * [54] AttType ::= StringType | TokenizedType | EnumeratedType 4802 * 4803 * [55] StringType ::= 'CDATA' 4804 * 4805 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 4806 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' 4807 * 4808 * Validity constraints for attribute values syntax are checked in 4809 * xmlValidateAttributeValue() 4810 * 4811 * [ VC: ID ] 4812 * Values of type ID must match the Name production. A name must not 4813 * appear more than once in an XML document as a value of this type; 4814 * i.e., ID values must uniquely identify the elements which bear them. 4815 * 4816 * [ VC: One ID per Element Type ] 4817 * No element type may have more than one ID attribute specified. 4818 * 4819 * [ VC: ID Attribute Default ] 4820 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED. 4821 * 4822 * [ VC: IDREF ] 4823 * Values of type IDREF must match the Name production, and values 4824 * of type IDREFS must match Names; each IDREF Name must match the value 4825 * of an ID attribute on some element in the XML document; i.e. IDREF 4826 * values must match the value of some ID attribute. 4827 * 4828 * [ VC: Entity Name ] 4829 * Values of type ENTITY must match the Name production, values 4830 * of type ENTITIES must match Names; each Entity Name must match the 4831 * name of an unparsed entity declared in the DTD. 4832 * 4833 * [ VC: Name Token ] 4834 * Values of type NMTOKEN must match the Nmtoken production; values 4835 * of type NMTOKENS must match Nmtokens. 4836 * 4837 * Returns the attribute type 4838 */ 4839int 4840xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { 4841 SHRINK; 4842 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) { 4843 SKIP(5); 4844 return(XML_ATTRIBUTE_CDATA); 4845 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) { 4846 SKIP(6); 4847 return(XML_ATTRIBUTE_IDREFS); 4848 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) { 4849 SKIP(5); 4850 return(XML_ATTRIBUTE_IDREF); 4851 } else if ((RAW == 'I') && (NXT(1) == 'D')) { 4852 SKIP(2); 4853 return(XML_ATTRIBUTE_ID); 4854 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) { 4855 SKIP(6); 4856 return(XML_ATTRIBUTE_ENTITY); 4857 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) { 4858 SKIP(8); 4859 return(XML_ATTRIBUTE_ENTITIES); 4860 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) { 4861 SKIP(8); 4862 return(XML_ATTRIBUTE_NMTOKENS); 4863 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) { 4864 SKIP(7); 4865 return(XML_ATTRIBUTE_NMTOKEN); 4866 } 4867 return(xmlParseEnumeratedType(ctxt, tree)); 4868} 4869 4870/** 4871 * xmlParseAttributeListDecl: 4872 * @ctxt: an XML parser context 4873 * 4874 * : parse the Attribute list def for an element 4875 * 4876 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 4877 * 4878 * [53] AttDef ::= S Name S AttType S DefaultDecl 4879 * 4880 */ 4881void 4882xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { 4883 const xmlChar *elemName; 4884 const xmlChar *attrName; 4885 xmlEnumerationPtr tree; 4886 4887 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) { 4888 xmlParserInputPtr input = ctxt->input; 4889 4890 SKIP(9); 4891 if (!IS_BLANK_CH(CUR)) { 4892 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4893 "Space required after '<!ATTLIST'\n"); 4894 } 4895 SKIP_BLANKS; 4896 elemName = xmlParseName(ctxt); 4897 if (elemName == NULL) { 4898 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 4899 "ATTLIST: no name for Element\n"); 4900 return; 4901 } 4902 SKIP_BLANKS; 4903 GROW; 4904 while (RAW != '>') { 4905 const xmlChar *check = CUR_PTR; 4906 int type; 4907 int def; 4908 xmlChar *defaultValue = NULL; 4909 4910 GROW; 4911 tree = NULL; 4912 attrName = xmlParseName(ctxt); 4913 if (attrName == NULL) { 4914 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 4915 "ATTLIST: no name for Attribute\n"); 4916 break; 4917 } 4918 GROW; 4919 if (!IS_BLANK_CH(CUR)) { 4920 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4921 "Space required after the attribute name\n"); 4922 break; 4923 } 4924 SKIP_BLANKS; 4925 4926 type = xmlParseAttributeType(ctxt, &tree); 4927 if (type <= 0) { 4928 break; 4929 } 4930 4931 GROW; 4932 if (!IS_BLANK_CH(CUR)) { 4933 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4934 "Space required after the attribute type\n"); 4935 if (tree != NULL) 4936 xmlFreeEnumeration(tree); 4937 break; 4938 } 4939 SKIP_BLANKS; 4940 4941 def = xmlParseDefaultDecl(ctxt, &defaultValue); 4942 if (def <= 0) { 4943 if (defaultValue != NULL) 4944 xmlFree(defaultValue); 4945 if (tree != NULL) 4946 xmlFreeEnumeration(tree); 4947 break; 4948 } 4949 4950 GROW; 4951 if (RAW != '>') { 4952 if (!IS_BLANK_CH(CUR)) { 4953 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 4954 "Space required after the attribute default value\n"); 4955 if (defaultValue != NULL) 4956 xmlFree(defaultValue); 4957 if (tree != NULL) 4958 xmlFreeEnumeration(tree); 4959 break; 4960 } 4961 SKIP_BLANKS; 4962 } 4963 if (check == CUR_PTR) { 4964 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 4965 "in xmlParseAttributeListDecl\n"); 4966 if (defaultValue != NULL) 4967 xmlFree(defaultValue); 4968 if (tree != NULL) 4969 xmlFreeEnumeration(tree); 4970 break; 4971 } 4972 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 4973 (ctxt->sax->attributeDecl != NULL)) 4974 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName, 4975 type, def, defaultValue, tree); 4976 else if (tree != NULL) 4977 xmlFreeEnumeration(tree); 4978 4979 if ((ctxt->sax2) && (defaultValue != NULL) && 4980 (def != XML_ATTRIBUTE_IMPLIED) && 4981 (def != XML_ATTRIBUTE_REQUIRED)) { 4982 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue); 4983 } 4984 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) { 4985 xmlAddSpecialAttr(ctxt, elemName, attrName, type); 4986 } 4987 if (defaultValue != NULL) 4988 xmlFree(defaultValue); 4989 GROW; 4990 } 4991 if (RAW == '>') { 4992 if (input != ctxt->input) { 4993 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 4994 "Attribute list declaration doesn't start and stop in the same entity\n"); 4995 } 4996 NEXT; 4997 } 4998 } 4999} 5000 5001/** 5002 * xmlParseElementMixedContentDecl: 5003 * @ctxt: an XML parser context 5004 * @inputchk: the input used for the current entity, needed for boundary checks 5005 * 5006 * parse the declaration for a Mixed Element content 5007 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 5008 * 5009 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | 5010 * '(' S? '#PCDATA' S? ')' 5011 * 5012 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49]) 5013 * 5014 * [ VC: No Duplicate Types ] 5015 * The same name must not appear more than once in a single 5016 * mixed-content declaration. 5017 * 5018 * returns: the list of the xmlElementContentPtr describing the element choices 5019 */ 5020xmlElementContentPtr 5021xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { 5022 xmlElementContentPtr ret = NULL, cur = NULL, n; 5023 const xmlChar *elem = NULL; 5024 5025 GROW; 5026 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 5027 SKIP(7); 5028 SKIP_BLANKS; 5029 SHRINK; 5030 if (RAW == ')') { 5031 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 5032 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 5033"Element content declaration doesn't start and stop in the same entity\n", 5034 NULL); 5035 } 5036 NEXT; 5037 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 5038 if (RAW == '*') { 5039 ret->ocur = XML_ELEMENT_CONTENT_MULT; 5040 NEXT; 5041 } 5042 return(ret); 5043 } 5044 if ((RAW == '(') || (RAW == '|')) { 5045 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA); 5046 if (ret == NULL) return(NULL); 5047 } 5048 while (RAW == '|') { 5049 NEXT; 5050 if (elem == NULL) { 5051 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 5052 if (ret == NULL) return(NULL); 5053 ret->c1 = cur; 5054 if (cur != NULL) 5055 cur->parent = ret; 5056 cur = ret; 5057 } else { 5058 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 5059 if (n == NULL) return(NULL); 5060 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 5061 if (n->c1 != NULL) 5062 n->c1->parent = n; 5063 cur->c2 = n; 5064 if (n != NULL) 5065 n->parent = cur; 5066 cur = n; 5067 } 5068 SKIP_BLANKS; 5069 elem = xmlParseName(ctxt); 5070 if (elem == NULL) { 5071 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5072 "xmlParseElementMixedContentDecl : Name expected\n"); 5073 xmlFreeDocElementContent(ctxt->myDoc, cur); 5074 return(NULL); 5075 } 5076 SKIP_BLANKS; 5077 GROW; 5078 } 5079 if ((RAW == ')') && (NXT(1) == '*')) { 5080 if (elem != NULL) { 5081 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem, 5082 XML_ELEMENT_CONTENT_ELEMENT); 5083 if (cur->c2 != NULL) 5084 cur->c2->parent = cur; 5085 } 5086 ret->ocur = XML_ELEMENT_CONTENT_MULT; 5087 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 5088 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 5089"Element content declaration doesn't start and stop in the same entity\n", 5090 NULL); 5091 } 5092 SKIP(2); 5093 } else { 5094 xmlFreeDocElementContent(ctxt->myDoc, ret); 5095 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL); 5096 return(NULL); 5097 } 5098 5099 } else { 5100 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL); 5101 } 5102 return(ret); 5103} 5104 5105/** 5106 * xmlParseElementChildrenContentDecl: 5107 * @ctxt: an XML parser context 5108 * @inputchk: the input used for the current entity, needed for boundary checks 5109 * 5110 * parse the declaration for a Mixed Element content 5111 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl 5112 * 5113 * 5114 * [47] children ::= (choice | seq) ('?' | '*' | '+')? 5115 * 5116 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 5117 * 5118 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 5119 * 5120 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 5121 * 5122 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50] 5123 * TODO Parameter-entity replacement text must be properly nested 5124 * with parenthesized groups. That is to say, if either of the 5125 * opening or closing parentheses in a choice, seq, or Mixed 5126 * construct is contained in the replacement text for a parameter 5127 * entity, both must be contained in the same replacement text. For 5128 * interoperability, if a parameter-entity reference appears in a 5129 * choice, seq, or Mixed construct, its replacement text should not 5130 * be empty, and neither the first nor last non-blank character of 5131 * the replacement text should be a connector (| or ,). 5132 * 5133 * Returns the tree of xmlElementContentPtr describing the element 5134 * hierarchy. 5135 */ 5136xmlElementContentPtr 5137xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) { 5138 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL; 5139 const xmlChar *elem; 5140 xmlChar type = 0; 5141 5142 SKIP_BLANKS; 5143 GROW; 5144 if (RAW == '(') { 5145 int inputid = ctxt->input->id; 5146 5147 /* Recurse on first child */ 5148 NEXT; 5149 SKIP_BLANKS; 5150 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid); 5151 SKIP_BLANKS; 5152 GROW; 5153 } else { 5154 elem = xmlParseName(ctxt); 5155 if (elem == NULL) { 5156 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 5157 return(NULL); 5158 } 5159 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 5160 if (cur == NULL) { 5161 xmlErrMemory(ctxt, NULL); 5162 return(NULL); 5163 } 5164 GROW; 5165 if (RAW == '?') { 5166 cur->ocur = XML_ELEMENT_CONTENT_OPT; 5167 NEXT; 5168 } else if (RAW == '*') { 5169 cur->ocur = XML_ELEMENT_CONTENT_MULT; 5170 NEXT; 5171 } else if (RAW == '+') { 5172 cur->ocur = XML_ELEMENT_CONTENT_PLUS; 5173 NEXT; 5174 } else { 5175 cur->ocur = XML_ELEMENT_CONTENT_ONCE; 5176 } 5177 GROW; 5178 } 5179 SKIP_BLANKS; 5180 SHRINK; 5181 while (RAW != ')') { 5182 /* 5183 * Each loop we parse one separator and one element. 5184 */ 5185 if (RAW == ',') { 5186 if (type == 0) type = CUR; 5187 5188 /* 5189 * Detect "Name | Name , Name" error 5190 */ 5191 else if (type != CUR) { 5192 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 5193 "xmlParseElementChildrenContentDecl : '%c' expected\n", 5194 type); 5195 if ((last != NULL) && (last != ret)) 5196 xmlFreeDocElementContent(ctxt->myDoc, last); 5197 if (ret != NULL) 5198 xmlFreeDocElementContent(ctxt->myDoc, ret); 5199 return(NULL); 5200 } 5201 NEXT; 5202 5203 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ); 5204 if (op == NULL) { 5205 if ((last != NULL) && (last != ret)) 5206 xmlFreeDocElementContent(ctxt->myDoc, last); 5207 xmlFreeDocElementContent(ctxt->myDoc, ret); 5208 return(NULL); 5209 } 5210 if (last == NULL) { 5211 op->c1 = ret; 5212 if (ret != NULL) 5213 ret->parent = op; 5214 ret = cur = op; 5215 } else { 5216 cur->c2 = op; 5217 if (op != NULL) 5218 op->parent = cur; 5219 op->c1 = last; 5220 if (last != NULL) 5221 last->parent = op; 5222 cur =op; 5223 last = NULL; 5224 } 5225 } else if (RAW == '|') { 5226 if (type == 0) type = CUR; 5227 5228 /* 5229 * Detect "Name , Name | Name" error 5230 */ 5231 else if (type != CUR) { 5232 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED, 5233 "xmlParseElementChildrenContentDecl : '%c' expected\n", 5234 type); 5235 if ((last != NULL) && (last != ret)) 5236 xmlFreeDocElementContent(ctxt->myDoc, last); 5237 if (ret != NULL) 5238 xmlFreeDocElementContent(ctxt->myDoc, ret); 5239 return(NULL); 5240 } 5241 NEXT; 5242 5243 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); 5244 if (op == NULL) { 5245 if ((last != NULL) && (last != ret)) 5246 xmlFreeDocElementContent(ctxt->myDoc, last); 5247 if (ret != NULL) 5248 xmlFreeDocElementContent(ctxt->myDoc, ret); 5249 return(NULL); 5250 } 5251 if (last == NULL) { 5252 op->c1 = ret; 5253 if (ret != NULL) 5254 ret->parent = op; 5255 ret = cur = op; 5256 } else { 5257 cur->c2 = op; 5258 if (op != NULL) 5259 op->parent = cur; 5260 op->c1 = last; 5261 if (last != NULL) 5262 last->parent = op; 5263 cur =op; 5264 last = NULL; 5265 } 5266 } else { 5267 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL); 5268 if (ret != NULL) 5269 xmlFreeDocElementContent(ctxt->myDoc, ret); 5270 return(NULL); 5271 } 5272 GROW; 5273 SKIP_BLANKS; 5274 GROW; 5275 if (RAW == '(') { 5276 int inputid = ctxt->input->id; 5277 /* Recurse on second child */ 5278 NEXT; 5279 SKIP_BLANKS; 5280 last = xmlParseElementChildrenContentDecl(ctxt, inputid); 5281 SKIP_BLANKS; 5282 } else { 5283 elem = xmlParseName(ctxt); 5284 if (elem == NULL) { 5285 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL); 5286 if (ret != NULL) 5287 xmlFreeDocElementContent(ctxt->myDoc, ret); 5288 return(NULL); 5289 } 5290 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); 5291 if (RAW == '?') { 5292 last->ocur = XML_ELEMENT_CONTENT_OPT; 5293 NEXT; 5294 } else if (RAW == '*') { 5295 last->ocur = XML_ELEMENT_CONTENT_MULT; 5296 NEXT; 5297 } else if (RAW == '+') { 5298 last->ocur = XML_ELEMENT_CONTENT_PLUS; 5299 NEXT; 5300 } else { 5301 last->ocur = XML_ELEMENT_CONTENT_ONCE; 5302 } 5303 } 5304 SKIP_BLANKS; 5305 GROW; 5306 } 5307 if ((cur != NULL) && (last != NULL)) { 5308 cur->c2 = last; 5309 if (last != NULL) 5310 last->parent = cur; 5311 } 5312 if ((ctxt->validate) && (ctxt->input->id != inputchk)) { 5313 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY, 5314"Element content declaration doesn't start and stop in the same entity\n", 5315 NULL); 5316 } 5317 NEXT; 5318 if (RAW == '?') { 5319 if (ret != NULL) { 5320 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) || 5321 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 5322 ret->ocur = XML_ELEMENT_CONTENT_MULT; 5323 else 5324 ret->ocur = XML_ELEMENT_CONTENT_OPT; 5325 } 5326 NEXT; 5327 } else if (RAW == '*') { 5328 if (ret != NULL) { 5329 ret->ocur = XML_ELEMENT_CONTENT_MULT; 5330 cur = ret; 5331 /* 5332 * Some normalization: 5333 * (a | b* | c?)* == (a | b | c)* 5334 */ 5335 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 5336 if ((cur->c1 != NULL) && 5337 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 5338 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) 5339 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 5340 if ((cur->c2 != NULL) && 5341 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 5342 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) 5343 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 5344 cur = cur->c2; 5345 } 5346 } 5347 NEXT; 5348 } else if (RAW == '+') { 5349 if (ret != NULL) { 5350 int found = 0; 5351 5352 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) || 5353 (ret->ocur == XML_ELEMENT_CONTENT_MULT)) 5354 ret->ocur = XML_ELEMENT_CONTENT_MULT; 5355 else 5356 ret->ocur = XML_ELEMENT_CONTENT_PLUS; 5357 /* 5358 * Some normalization: 5359 * (a | b*)+ == (a | b)* 5360 * (a | b?)+ == (a | b)* 5361 */ 5362 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) { 5363 if ((cur->c1 != NULL) && 5364 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) || 5365 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) { 5366 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE; 5367 found = 1; 5368 } 5369 if ((cur->c2 != NULL) && 5370 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) || 5371 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) { 5372 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE; 5373 found = 1; 5374 } 5375 cur = cur->c2; 5376 } 5377 if (found) 5378 ret->ocur = XML_ELEMENT_CONTENT_MULT; 5379 } 5380 NEXT; 5381 } 5382 return(ret); 5383} 5384 5385/** 5386 * xmlParseElementContentDecl: 5387 * @ctxt: an XML parser context 5388 * @name: the name of the element being defined. 5389 * @result: the Element Content pointer will be stored here if any 5390 * 5391 * parse the declaration for an Element content either Mixed or Children, 5392 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl 5393 * 5394 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 5395 * 5396 * returns: the type of element content XML_ELEMENT_TYPE_xxx 5397 */ 5398 5399int 5400xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name, 5401 xmlElementContentPtr *result) { 5402 5403 xmlElementContentPtr tree = NULL; 5404 int inputid = ctxt->input->id; 5405 int res; 5406 5407 *result = NULL; 5408 5409 if (RAW != '(') { 5410 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 5411 "xmlParseElementContentDecl : %s '(' expected\n", name); 5412 return(-1); 5413 } 5414 NEXT; 5415 GROW; 5416 SKIP_BLANKS; 5417 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) { 5418 tree = xmlParseElementMixedContentDecl(ctxt, inputid); 5419 res = XML_ELEMENT_TYPE_MIXED; 5420 } else { 5421 tree = xmlParseElementChildrenContentDecl(ctxt, inputid); 5422 res = XML_ELEMENT_TYPE_ELEMENT; 5423 } 5424 SKIP_BLANKS; 5425 *result = tree; 5426 return(res); 5427} 5428 5429/** 5430 * xmlParseElementDecl: 5431 * @ctxt: an XML parser context 5432 * 5433 * parse an Element declaration. 5434 * 5435 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' 5436 * 5437 * [ VC: Unique Element Type Declaration ] 5438 * No element type may be declared more than once 5439 * 5440 * Returns the type of the element, or -1 in case of error 5441 */ 5442int 5443xmlParseElementDecl(xmlParserCtxtPtr ctxt) { 5444 const xmlChar *name; 5445 int ret = -1; 5446 xmlElementContentPtr content = NULL; 5447 5448 /* GROW; done in the caller */ 5449 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) { 5450 xmlParserInputPtr input = ctxt->input; 5451 5452 SKIP(9); 5453 if (!IS_BLANK_CH(CUR)) { 5454 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5455 "Space required after 'ELEMENT'\n"); 5456 } 5457 SKIP_BLANKS; 5458 name = xmlParseName(ctxt); 5459 if (name == NULL) { 5460 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 5461 "xmlParseElementDecl: no name for Element\n"); 5462 return(-1); 5463 } 5464 while ((RAW == 0) && (ctxt->inputNr > 1)) 5465 xmlPopInput(ctxt); 5466 if (!IS_BLANK_CH(CUR)) { 5467 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5468 "Space required after the element name\n"); 5469 } 5470 SKIP_BLANKS; 5471 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) { 5472 SKIP(5); 5473 /* 5474 * Element must always be empty. 5475 */ 5476 ret = XML_ELEMENT_TYPE_EMPTY; 5477 } else if ((RAW == 'A') && (NXT(1) == 'N') && 5478 (NXT(2) == 'Y')) { 5479 SKIP(3); 5480 /* 5481 * Element is a generic container. 5482 */ 5483 ret = XML_ELEMENT_TYPE_ANY; 5484 } else if (RAW == '(') { 5485 ret = xmlParseElementContentDecl(ctxt, name, &content); 5486 } else { 5487 /* 5488 * [ WFC: PEs in Internal Subset ] error handling. 5489 */ 5490 if ((RAW == '%') && (ctxt->external == 0) && 5491 (ctxt->inputNr == 1)) { 5492 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET, 5493 "PEReference: forbidden within markup decl in internal subset\n"); 5494 } else { 5495 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, 5496 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n"); 5497 } 5498 return(-1); 5499 } 5500 5501 SKIP_BLANKS; 5502 /* 5503 * Pop-up of finished entities. 5504 */ 5505 while ((RAW == 0) && (ctxt->inputNr > 1)) 5506 xmlPopInput(ctxt); 5507 SKIP_BLANKS; 5508 5509 if (RAW != '>') { 5510 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 5511 if (content != NULL) { 5512 xmlFreeDocElementContent(ctxt->myDoc, content); 5513 } 5514 } else { 5515 if (input != ctxt->input) { 5516 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, 5517 "Element declaration doesn't start and stop in the same entity\n"); 5518 } 5519 5520 NEXT; 5521 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 5522 (ctxt->sax->elementDecl != NULL)) { 5523 if (content != NULL) 5524 content->parent = NULL; 5525 ctxt->sax->elementDecl(ctxt->userData, name, ret, 5526 content); 5527 if ((content != NULL) && (content->parent == NULL)) { 5528 /* 5529 * this is a trick: if xmlAddElementDecl is called, 5530 * instead of copying the full tree it is plugged directly 5531 * if called from the parser. Avoid duplicating the 5532 * interfaces or change the API/ABI 5533 */ 5534 xmlFreeDocElementContent(ctxt->myDoc, content); 5535 } 5536 } else if (content != NULL) { 5537 xmlFreeDocElementContent(ctxt->myDoc, content); 5538 } 5539 } 5540 } 5541 return(ret); 5542} 5543 5544/** 5545 * xmlParseConditionalSections 5546 * @ctxt: an XML parser context 5547 * 5548 * [61] conditionalSect ::= includeSect | ignoreSect 5549 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' 5550 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' 5551 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* 5552 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) 5553 */ 5554 5555static void 5556xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { 5557 SKIP(3); 5558 SKIP_BLANKS; 5559 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) { 5560 SKIP(7); 5561 SKIP_BLANKS; 5562 if (RAW != '[') { 5563 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 5564 } else { 5565 NEXT; 5566 } 5567 if (xmlParserDebugEntities) { 5568 if ((ctxt->input != NULL) && (ctxt->input->filename)) 5569 xmlGenericError(xmlGenericErrorContext, 5570 "%s(%d): ", ctxt->input->filename, 5571 ctxt->input->line); 5572 xmlGenericError(xmlGenericErrorContext, 5573 "Entering INCLUDE Conditional Section\n"); 5574 } 5575 5576 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || 5577 (NXT(2) != '>'))) { 5578 const xmlChar *check = CUR_PTR; 5579 unsigned int cons = ctxt->input->consumed; 5580 5581 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 5582 xmlParseConditionalSections(ctxt); 5583 } else if (IS_BLANK_CH(CUR)) { 5584 NEXT; 5585 } else if (RAW == '%') { 5586 xmlParsePEReference(ctxt); 5587 } else 5588 xmlParseMarkupDecl(ctxt); 5589 5590 /* 5591 * Pop-up of finished entities. 5592 */ 5593 while ((RAW == 0) && (ctxt->inputNr > 1)) 5594 xmlPopInput(ctxt); 5595 5596 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 5597 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 5598 break; 5599 } 5600 } 5601 if (xmlParserDebugEntities) { 5602 if ((ctxt->input != NULL) && (ctxt->input->filename)) 5603 xmlGenericError(xmlGenericErrorContext, 5604 "%s(%d): ", ctxt->input->filename, 5605 ctxt->input->line); 5606 xmlGenericError(xmlGenericErrorContext, 5607 "Leaving INCLUDE Conditional Section\n"); 5608 } 5609 5610 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) { 5611 int state; 5612 xmlParserInputState instate; 5613 int depth = 0; 5614 5615 SKIP(6); 5616 SKIP_BLANKS; 5617 if (RAW != '[') { 5618 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL); 5619 } else { 5620 NEXT; 5621 } 5622 if (xmlParserDebugEntities) { 5623 if ((ctxt->input != NULL) && (ctxt->input->filename)) 5624 xmlGenericError(xmlGenericErrorContext, 5625 "%s(%d): ", ctxt->input->filename, 5626 ctxt->input->line); 5627 xmlGenericError(xmlGenericErrorContext, 5628 "Entering IGNORE Conditional Section\n"); 5629 } 5630 5631 /* 5632 * Parse up to the end of the conditional section 5633 * But disable SAX event generating DTD building in the meantime 5634 */ 5635 state = ctxt->disableSAX; 5636 instate = ctxt->instate; 5637 if (ctxt->recovery == 0) ctxt->disableSAX = 1; 5638 ctxt->instate = XML_PARSER_IGNORE; 5639 5640 while ((depth >= 0) && (RAW != 0)) { 5641 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 5642 depth++; 5643 SKIP(3); 5644 continue; 5645 } 5646 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { 5647 if (--depth >= 0) SKIP(3); 5648 continue; 5649 } 5650 NEXT; 5651 continue; 5652 } 5653 5654 ctxt->disableSAX = state; 5655 ctxt->instate = instate; 5656 5657 if (xmlParserDebugEntities) { 5658 if ((ctxt->input != NULL) && (ctxt->input->filename)) 5659 xmlGenericError(xmlGenericErrorContext, 5660 "%s(%d): ", ctxt->input->filename, 5661 ctxt->input->line); 5662 xmlGenericError(xmlGenericErrorContext, 5663 "Leaving IGNORE Conditional Section\n"); 5664 } 5665 5666 } else { 5667 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL); 5668 } 5669 5670 if (RAW == 0) 5671 SHRINK; 5672 5673 if (RAW == 0) { 5674 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL); 5675 } else { 5676 SKIP(3); 5677 } 5678} 5679 5680/** 5681 * xmlParseMarkupDecl: 5682 * @ctxt: an XML parser context 5683 * 5684 * parse Markup declarations 5685 * 5686 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | 5687 * NotationDecl | PI | Comment 5688 * 5689 * [ VC: Proper Declaration/PE Nesting ] 5690 * Parameter-entity replacement text must be properly nested with 5691 * markup declarations. That is to say, if either the first character 5692 * or the last character of a markup declaration (markupdecl above) is 5693 * contained in the replacement text for a parameter-entity reference, 5694 * both must be contained in the same replacement text. 5695 * 5696 * [ WFC: PEs in Internal Subset ] 5697 * In the internal DTD subset, parameter-entity references can occur 5698 * only where markup declarations can occur, not within markup declarations. 5699 * (This does not apply to references that occur in external parameter 5700 * entities or to the external subset.) 5701 */ 5702void 5703xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { 5704 GROW; 5705 if (CUR == '<') { 5706 if (NXT(1) == '!') { 5707 switch (NXT(2)) { 5708 case 'E': 5709 if (NXT(3) == 'L') 5710 xmlParseElementDecl(ctxt); 5711 else if (NXT(3) == 'N') 5712 xmlParseEntityDecl(ctxt); 5713 break; 5714 case 'A': 5715 xmlParseAttributeListDecl(ctxt); 5716 break; 5717 case 'N': 5718 xmlParseNotationDecl(ctxt); 5719 break; 5720 case '-': 5721 xmlParseComment(ctxt); 5722 break; 5723 default: 5724 /* there is an error but it will be detected later */ 5725 break; 5726 } 5727 } else if (NXT(1) == '?') { 5728 xmlParsePI(ctxt); 5729 } 5730 } 5731 /* 5732 * This is only for internal subset. On external entities, 5733 * the replacement is done before parsing stage 5734 */ 5735 if ((ctxt->external == 0) && (ctxt->inputNr == 1)) 5736 xmlParsePEReference(ctxt); 5737 5738 /* 5739 * Conditional sections are allowed from entities included 5740 * by PE References in the internal subset. 5741 */ 5742 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) { 5743 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 5744 xmlParseConditionalSections(ctxt); 5745 } 5746 } 5747 5748 ctxt->instate = XML_PARSER_DTD; 5749} 5750 5751/** 5752 * xmlParseTextDecl: 5753 * @ctxt: an XML parser context 5754 * 5755 * parse an XML declaration header for external entities 5756 * 5757 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 5758 * 5759 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ? 5760 */ 5761 5762void 5763xmlParseTextDecl(xmlParserCtxtPtr ctxt) { 5764 xmlChar *version; 5765 const xmlChar *encoding; 5766 5767 /* 5768 * We know that '<?xml' is here. 5769 */ 5770 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 5771 SKIP(5); 5772 } else { 5773 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL); 5774 return; 5775 } 5776 5777 if (!IS_BLANK_CH(CUR)) { 5778 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5779 "Space needed after '<?xml'\n"); 5780 } 5781 SKIP_BLANKS; 5782 5783 /* 5784 * We may have the VersionInfo here. 5785 */ 5786 version = xmlParseVersionInfo(ctxt); 5787 if (version == NULL) 5788 version = xmlCharStrdup(XML_DEFAULT_VERSION); 5789 else { 5790 if (!IS_BLANK_CH(CUR)) { 5791 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 5792 "Space needed here\n"); 5793 } 5794 } 5795 ctxt->input->version = version; 5796 5797 /* 5798 * We must have the encoding declaration 5799 */ 5800 encoding = xmlParseEncodingDecl(ctxt); 5801 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 5802 /* 5803 * The XML REC instructs us to stop parsing right here 5804 */ 5805 return; 5806 } 5807 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) { 5808 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING, 5809 "Missing encoding in text declaration\n"); 5810 } 5811 5812 SKIP_BLANKS; 5813 if ((RAW == '?') && (NXT(1) == '>')) { 5814 SKIP(2); 5815 } else if (RAW == '>') { 5816 /* Deprecated old WD ... */ 5817 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 5818 NEXT; 5819 } else { 5820 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 5821 MOVETO_ENDTAG(CUR_PTR); 5822 NEXT; 5823 } 5824} 5825 5826/** 5827 * xmlParseExternalSubset: 5828 * @ctxt: an XML parser context 5829 * @ExternalID: the external identifier 5830 * @SystemID: the system identifier (or URL) 5831 * 5832 * parse Markup declarations from an external subset 5833 * 5834 * [30] extSubset ::= textDecl? extSubsetDecl 5835 * 5836 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) * 5837 */ 5838void 5839xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, 5840 const xmlChar *SystemID) { 5841 xmlDetectSAX2(ctxt); 5842 GROW; 5843 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) { 5844 xmlParseTextDecl(ctxt); 5845 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 5846 /* 5847 * The XML REC instructs us to stop parsing right here 5848 */ 5849 ctxt->instate = XML_PARSER_EOF; 5850 return; 5851 } 5852 } 5853 if (ctxt->myDoc == NULL) { 5854 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 5855 } 5856 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL)) 5857 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID); 5858 5859 ctxt->instate = XML_PARSER_DTD; 5860 ctxt->external = 1; 5861 while (((RAW == '<') && (NXT(1) == '?')) || 5862 ((RAW == '<') && (NXT(1) == '!')) || 5863 (RAW == '%') || IS_BLANK_CH(CUR)) { 5864 const xmlChar *check = CUR_PTR; 5865 unsigned int cons = ctxt->input->consumed; 5866 5867 GROW; 5868 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { 5869 xmlParseConditionalSections(ctxt); 5870 } else if (IS_BLANK_CH(CUR)) { 5871 NEXT; 5872 } else if (RAW == '%') { 5873 xmlParsePEReference(ctxt); 5874 } else 5875 xmlParseMarkupDecl(ctxt); 5876 5877 /* 5878 * Pop-up of finished entities. 5879 */ 5880 while ((RAW == 0) && (ctxt->inputNr > 1)) 5881 xmlPopInput(ctxt); 5882 5883 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 5884 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 5885 break; 5886 } 5887 } 5888 5889 if (RAW != 0) { 5890 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL); 5891 } 5892 5893} 5894 5895/** 5896 * xmlParseReference: 5897 * @ctxt: an XML parser context 5898 * 5899 * parse and handle entity references in content, depending on the SAX 5900 * interface, this may end-up in a call to character() if this is a 5901 * CharRef, a predefined entity, if there is no reference() callback. 5902 * or if the parser was asked to switch to that mode. 5903 * 5904 * [67] Reference ::= EntityRef | CharRef 5905 */ 5906void 5907xmlParseReference(xmlParserCtxtPtr ctxt) { 5908 xmlEntityPtr ent; 5909 xmlChar *val; 5910 if (RAW != '&') return; 5911 5912 if (NXT(1) == '#') { 5913 int i = 0; 5914 xmlChar out[10]; 5915 int hex = NXT(2); 5916 int value = xmlParseCharRef(ctxt); 5917 5918 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) { 5919 /* 5920 * So we are using non-UTF-8 buffers 5921 * Check that the char fit on 8bits, if not 5922 * generate a CharRef. 5923 */ 5924 if (value <= 0xFF) { 5925 out[0] = value; 5926 out[1] = 0; 5927 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 5928 (!ctxt->disableSAX)) 5929 ctxt->sax->characters(ctxt->userData, out, 1); 5930 } else { 5931 if ((hex == 'x') || (hex == 'X')) 5932 snprintf((char *)out, sizeof(out), "#x%X", value); 5933 else 5934 snprintf((char *)out, sizeof(out), "#%d", value); 5935 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 5936 (!ctxt->disableSAX)) 5937 ctxt->sax->reference(ctxt->userData, out); 5938 } 5939 } else { 5940 /* 5941 * Just encode the value in UTF-8 5942 */ 5943 COPY_BUF(0 ,out, i, value); 5944 out[i] = 0; 5945 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 5946 (!ctxt->disableSAX)) 5947 ctxt->sax->characters(ctxt->userData, out, i); 5948 } 5949 } else { 5950 int was_checked; 5951 5952 ent = xmlParseEntityRef(ctxt); 5953 if (ent == NULL) return; 5954 if (!ctxt->wellFormed) 5955 return; 5956 was_checked = ent->checked; 5957 if ((ent->name != NULL) && 5958 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) { 5959 xmlNodePtr list = NULL; 5960 xmlParserErrors ret = XML_ERR_OK; 5961 5962 5963 /* 5964 * The first reference to the entity trigger a parsing phase 5965 * where the ent->children is filled with the result from 5966 * the parsing. 5967 */ 5968 if (ent->checked == 0) { 5969 xmlChar *value; 5970 5971 value = ent->content; 5972 5973 /* 5974 * Check that this entity is well formed 5975 */ 5976 if ((value != NULL) && (value[0] != 0) && 5977 (value[1] == 0) && (value[0] == '<') && 5978 (xmlStrEqual(ent->name, BAD_CAST "lt"))) { 5979 /* 5980 * DONE: get definite answer on this !!! 5981 * Lots of entity decls are used to declare a single 5982 * char 5983 * <!ENTITY lt "<"> 5984 * Which seems to be valid since 5985 * 2.4: The ampersand character (&) and the left angle 5986 * bracket (<) may appear in their literal form only 5987 * when used ... They are also legal within the literal 5988 * entity value of an internal entity declaration;i 5989 * see "4.3.2 Well-Formed Parsed Entities". 5990 * IMHO 2.4 and 4.3.2 are directly in contradiction. 5991 * Looking at the OASIS test suite and James Clark 5992 * tests, this is broken. However the XML REC uses 5993 * it. Is the XML REC not well-formed ???? 5994 * This is a hack to avoid this problem 5995 * 5996 * ANSWER: since lt gt amp .. are already defined, 5997 * this is a redefinition and hence the fact that the 5998 * content is not well balanced is not a Wf error, this 5999 * is lousy but acceptable. 6000 */ 6001 list = xmlNewDocText(ctxt->myDoc, value); 6002 if (list != NULL) { 6003 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) && 6004 (ent->children == NULL)) { 6005 ent->children = list; 6006 ent->last = list; 6007 ent->owner = 1; 6008 list->parent = (xmlNodePtr) ent; 6009 } else { 6010 xmlFreeNodeList(list); 6011 } 6012 } else if (list != NULL) { 6013 xmlFreeNodeList(list); 6014 } 6015 } else { 6016 /* 6017 * 4.3.2: An internal general parsed entity is well-formed 6018 * if its replacement text matches the production labeled 6019 * content. 6020 */ 6021 6022 void *user_data; 6023 /* 6024 * This is a bit hackish but this seems the best 6025 * way to make sure both SAX and DOM entity support 6026 * behaves okay. 6027 */ 6028 if (ctxt->userData == ctxt) 6029 user_data = NULL; 6030 else 6031 user_data = ctxt->userData; 6032 6033 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 6034 ctxt->depth++; 6035 ret = xmlParseBalancedChunkMemoryInternal(ctxt, 6036 value, user_data, &list); 6037 ctxt->depth--; 6038 } else if (ent->etype == 6039 XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 6040 ctxt->depth++; 6041 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, 6042 ctxt->sax, user_data, ctxt->depth, 6043 ent->URI, ent->ExternalID, &list); 6044 ctxt->depth--; 6045 } else { 6046 ret = XML_ERR_ENTITY_PE_INTERNAL; 6047 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 6048 "invalid entity type found\n", NULL); 6049 } 6050 if (ret == XML_ERR_ENTITY_LOOP) { 6051 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 6052 return; 6053 } else if ((ret == XML_ERR_OK) && (list != NULL)) { 6054 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) || 6055 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&& 6056 (ent->children == NULL)) { 6057 ent->children = list; 6058 if (ctxt->replaceEntities) { 6059 /* 6060 * Prune it directly in the generated document 6061 * except for single text nodes. 6062 */ 6063 if (((list->type == XML_TEXT_NODE) && 6064 (list->next == NULL)) || 6065 (ctxt->parseMode == XML_PARSE_READER)) { 6066 list->parent = (xmlNodePtr) ent; 6067 list = NULL; 6068 ent->owner = 1; 6069 } else { 6070 ent->owner = 0; 6071 while (list != NULL) { 6072 list->parent = (xmlNodePtr) ctxt->node; 6073 list->doc = ctxt->myDoc; 6074 if (list->next == NULL) 6075 ent->last = list; 6076 list = list->next; 6077 } 6078 list = ent->children; 6079#ifdef LIBXML_LEGACY_ENABLED 6080 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 6081 xmlAddEntityReference(ent, list, NULL); 6082#endif /* LIBXML_LEGACY_ENABLED */ 6083 } 6084 } else { 6085 ent->owner = 1; 6086 while (list != NULL) { 6087 list->parent = (xmlNodePtr) ent; 6088 if (list->next == NULL) 6089 ent->last = list; 6090 list = list->next; 6091 } 6092 } 6093 } else { 6094 xmlFreeNodeList(list); 6095 list = NULL; 6096 } 6097 } else if ((ret != XML_ERR_OK) && 6098 (ret != XML_WAR_UNDECLARED_ENTITY)) { 6099 xmlFatalErr(ctxt, ret, NULL); 6100 } else if (list != NULL) { 6101 xmlFreeNodeList(list); 6102 list = NULL; 6103 } 6104 } 6105 ent->checked = 1; 6106 } 6107 6108 if (ent->children == NULL) { 6109 /* 6110 * Probably running in SAX mode and the callbacks don't 6111 * build the entity content. So unless we already went 6112 * though parsing for first checking go though the entity 6113 * content to generate callbacks associated to the entity 6114 */ 6115 if (was_checked == 1) { 6116 void *user_data; 6117 /* 6118 * This is a bit hackish but this seems the best 6119 * way to make sure both SAX and DOM entity support 6120 * behaves okay. 6121 */ 6122 if (ctxt->userData == ctxt) 6123 user_data = NULL; 6124 else 6125 user_data = ctxt->userData; 6126 6127 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { 6128 ctxt->depth++; 6129 ret = xmlParseBalancedChunkMemoryInternal(ctxt, 6130 ent->content, user_data, NULL); 6131 ctxt->depth--; 6132 } else if (ent->etype == 6133 XML_EXTERNAL_GENERAL_PARSED_ENTITY) { 6134 ctxt->depth++; 6135 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, 6136 ctxt->sax, user_data, ctxt->depth, 6137 ent->URI, ent->ExternalID, NULL); 6138 ctxt->depth--; 6139 } else { 6140 ret = XML_ERR_ENTITY_PE_INTERNAL; 6141 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, 6142 "invalid entity type found\n", NULL); 6143 } 6144 if (ret == XML_ERR_ENTITY_LOOP) { 6145 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); 6146 return; 6147 } 6148 } 6149 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 6150 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 6151 /* 6152 * Entity reference callback comes second, it's somewhat 6153 * superfluous but a compatibility to historical behaviour 6154 */ 6155 ctxt->sax->reference(ctxt->userData, ent->name); 6156 } 6157 return; 6158 } 6159 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && 6160 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { 6161 /* 6162 * Create a node. 6163 */ 6164 ctxt->sax->reference(ctxt->userData, ent->name); 6165 return; 6166 } 6167 if ((ctxt->replaceEntities) || (ent->children == NULL)) { 6168 /* 6169 * There is a problem on the handling of _private for entities 6170 * (bug 155816): Should we copy the content of the field from 6171 * the entity (possibly overwriting some value set by the user 6172 * when a copy is created), should we leave it alone, or should 6173 * we try to take care of different situations? The problem 6174 * is exacerbated by the usage of this field by the xmlReader. 6175 * To fix this bug, we look at _private on the created node 6176 * and, if it's NULL, we copy in whatever was in the entity. 6177 * If it's not NULL we leave it alone. This is somewhat of a 6178 * hack - maybe we should have further tests to determine 6179 * what to do. 6180 */ 6181 if ((ctxt->node != NULL) && (ent->children != NULL)) { 6182 /* 6183 * Seems we are generating the DOM content, do 6184 * a simple tree copy for all references except the first 6185 * In the first occurrence list contains the replacement. 6186 * progressive == 2 means we are operating on the Reader 6187 * and since nodes are discarded we must copy all the time. 6188 */ 6189 if (((list == NULL) && (ent->owner == 0)) || 6190 (ctxt->parseMode == XML_PARSE_READER)) { 6191 xmlNodePtr nw = NULL, cur, firstChild = NULL; 6192 6193 /* 6194 * when operating on a reader, the entities definitions 6195 * are always owning the entities subtree. 6196 if (ctxt->parseMode == XML_PARSE_READER) 6197 ent->owner = 1; 6198 */ 6199 6200 cur = ent->children; 6201 while (cur != NULL) { 6202 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 6203 if (nw != NULL) { 6204 if (nw->_private == NULL) 6205 nw->_private = cur->_private; 6206 if (firstChild == NULL){ 6207 firstChild = nw; 6208 } 6209 nw = xmlAddChild(ctxt->node, nw); 6210 } 6211 if (cur == ent->last) { 6212 /* 6213 * needed to detect some strange empty 6214 * node cases in the reader tests 6215 */ 6216 if ((ctxt->parseMode == XML_PARSE_READER) && 6217 (nw != NULL) && 6218 (nw->type == XML_ELEMENT_NODE) && 6219 (nw->children == NULL)) 6220 nw->extra = 1; 6221 6222 break; 6223 } 6224 cur = cur->next; 6225 } 6226#ifdef LIBXML_LEGACY_ENABLED 6227 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 6228 xmlAddEntityReference(ent, firstChild, nw); 6229#endif /* LIBXML_LEGACY_ENABLED */ 6230 } else if (list == NULL) { 6231 xmlNodePtr nw = NULL, cur, next, last, 6232 firstChild = NULL; 6233 /* 6234 * Copy the entity child list and make it the new 6235 * entity child list. The goal is to make sure any 6236 * ID or REF referenced will be the one from the 6237 * document content and not the entity copy. 6238 */ 6239 cur = ent->children; 6240 ent->children = NULL; 6241 last = ent->last; 6242 ent->last = NULL; 6243 while (cur != NULL) { 6244 next = cur->next; 6245 cur->next = NULL; 6246 cur->parent = NULL; 6247 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1); 6248 if (nw != NULL) { 6249 if (nw->_private == NULL) 6250 nw->_private = cur->_private; 6251 if (firstChild == NULL){ 6252 firstChild = cur; 6253 } 6254 xmlAddChild((xmlNodePtr) ent, nw); 6255 xmlAddChild(ctxt->node, cur); 6256 } 6257 if (cur == last) 6258 break; 6259 cur = next; 6260 } 6261 ent->owner = 1; 6262#ifdef LIBXML_LEGACY_ENABLED 6263 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) 6264 xmlAddEntityReference(ent, firstChild, nw); 6265#endif /* LIBXML_LEGACY_ENABLED */ 6266 } else { 6267 const xmlChar *nbktext; 6268 6269 /* 6270 * the name change is to avoid coalescing of the 6271 * node with a possible previous text one which 6272 * would make ent->children a dangling pointer 6273 */ 6274 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext", 6275 -1); 6276 if (ent->children->type == XML_TEXT_NODE) 6277 ent->children->name = nbktext; 6278 if ((ent->last != ent->children) && 6279 (ent->last->type == XML_TEXT_NODE)) 6280 ent->last->name = nbktext; 6281 xmlAddChildList(ctxt->node, ent->children); 6282 } 6283 6284 /* 6285 * This is to avoid a nasty side effect, see 6286 * characters() in SAX.c 6287 */ 6288 ctxt->nodemem = 0; 6289 ctxt->nodelen = 0; 6290 return; 6291 } 6292 } 6293 } else { 6294 val = ent->content; 6295 if (val == NULL) return; 6296 /* 6297 * inline the entity. 6298 */ 6299 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && 6300 (!ctxt->disableSAX)) 6301 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); 6302 } 6303 } 6304} 6305 6306/** 6307 * xmlParseEntityRef: 6308 * @ctxt: an XML parser context 6309 * 6310 * parse ENTITY references declarations 6311 * 6312 * [68] EntityRef ::= '&' Name ';' 6313 * 6314 * [ WFC: Entity Declared ] 6315 * In a document without any DTD, a document with only an internal DTD 6316 * subset which contains no parameter entity references, or a document 6317 * with "standalone='yes'", the Name given in the entity reference 6318 * must match that in an entity declaration, except that well-formed 6319 * documents need not declare any of the following entities: amp, lt, 6320 * gt, apos, quot. The declaration of a parameter entity must precede 6321 * any reference to it. Similarly, the declaration of a general entity 6322 * must precede any reference to it which appears in a default value in an 6323 * attribute-list declaration. Note that if entities are declared in the 6324 * external subset or in external parameter entities, a non-validating 6325 * processor is not obligated to read and process their declarations; 6326 * for such documents, the rule that an entity must be declared is a 6327 * well-formedness constraint only if standalone='yes'. 6328 * 6329 * [ WFC: Parsed Entity ] 6330 * An entity reference must not contain the name of an unparsed entity 6331 * 6332 * Returns the xmlEntityPtr if found, or NULL otherwise. 6333 */ 6334xmlEntityPtr 6335xmlParseEntityRef(xmlParserCtxtPtr ctxt) { 6336 const xmlChar *name; 6337 xmlEntityPtr ent = NULL; 6338 6339 GROW; 6340 6341 if (RAW == '&') { 6342 NEXT; 6343 name = xmlParseName(ctxt); 6344 if (name == NULL) { 6345 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6346 "xmlParseEntityRef: no name\n"); 6347 } else { 6348 if (RAW == ';') { 6349 NEXT; 6350 /* 6351 * Ask first SAX for entity resolution, otherwise try the 6352 * predefined set. 6353 */ 6354 if (ctxt->sax != NULL) { 6355 if (ctxt->sax->getEntity != NULL) 6356 ent = ctxt->sax->getEntity(ctxt->userData, name); 6357 if ((ctxt->wellFormed == 1 ) && (ent == NULL)) 6358 ent = xmlGetPredefinedEntity(name); 6359 if ((ctxt->wellFormed == 1 ) && (ent == NULL) && 6360 (ctxt->userData==ctxt)) { 6361 ent = xmlSAX2GetEntity(ctxt, name); 6362 } 6363 } 6364 /* 6365 * [ WFC: Entity Declared ] 6366 * In a document without any DTD, a document with only an 6367 * internal DTD subset which contains no parameter entity 6368 * references, or a document with "standalone='yes'", the 6369 * Name given in the entity reference must match that in an 6370 * entity declaration, except that well-formed documents 6371 * need not declare any of the following entities: amp, lt, 6372 * gt, apos, quot. 6373 * The declaration of a parameter entity must precede any 6374 * reference to it. 6375 * Similarly, the declaration of a general entity must 6376 * precede any reference to it which appears in a default 6377 * value in an attribute-list declaration. Note that if 6378 * entities are declared in the external subset or in 6379 * external parameter entities, a non-validating processor 6380 * is not obligated to read and process their declarations; 6381 * for such documents, the rule that an entity must be 6382 * declared is a well-formedness constraint only if 6383 * standalone='yes'. 6384 */ 6385 if (ent == NULL) { 6386 if ((ctxt->standalone == 1) || 6387 ((ctxt->hasExternalSubset == 0) && 6388 (ctxt->hasPErefs == 0))) { 6389 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 6390 "Entity '%s' not defined\n", name); 6391 } else { 6392 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 6393 "Entity '%s' not defined\n", name); 6394 if ((ctxt->inSubset == 0) && 6395 (ctxt->sax != NULL) && 6396 (ctxt->sax->reference != NULL)) { 6397 ctxt->sax->reference(ctxt->userData, name); 6398 } 6399 } 6400 ctxt->valid = 0; 6401 } 6402 6403 /* 6404 * [ WFC: Parsed Entity ] 6405 * An entity reference must not contain the name of an 6406 * unparsed entity 6407 */ 6408 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 6409 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 6410 "Entity reference to unparsed entity %s\n", name); 6411 } 6412 6413 /* 6414 * [ WFC: No External Entity References ] 6415 * Attribute values cannot contain direct or indirect 6416 * entity references to external entities. 6417 */ 6418 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 6419 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 6420 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 6421 "Attribute references external entity '%s'\n", name); 6422 } 6423 /* 6424 * [ WFC: No < in Attribute Values ] 6425 * The replacement text of any entity referred to directly or 6426 * indirectly in an attribute value (other than "<") must 6427 * not contain a <. 6428 */ 6429 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 6430 (ent != NULL) && 6431 (!xmlStrEqual(ent->name, BAD_CAST "lt")) && 6432 (ent->content != NULL) && 6433 (xmlStrchr(ent->content, '<'))) { 6434 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 6435 "'<' in entity '%s' is not allowed in attributes values\n", name); 6436 } 6437 6438 /* 6439 * Internal check, no parameter entities here ... 6440 */ 6441 else { 6442 switch (ent->etype) { 6443 case XML_INTERNAL_PARAMETER_ENTITY: 6444 case XML_EXTERNAL_PARAMETER_ENTITY: 6445 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 6446 "Attempt to reference the parameter entity '%s'\n", 6447 name); 6448 break; 6449 default: 6450 break; 6451 } 6452 } 6453 6454 /* 6455 * [ WFC: No Recursion ] 6456 * A parsed entity must not contain a recursive reference 6457 * to itself, either directly or indirectly. 6458 * Done somewhere else 6459 */ 6460 6461 } else { 6462 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 6463 } 6464 } 6465 } 6466 return(ent); 6467} 6468 6469/** 6470 * xmlParseStringEntityRef: 6471 * @ctxt: an XML parser context 6472 * @str: a pointer to an index in the string 6473 * 6474 * parse ENTITY references declarations, but this version parses it from 6475 * a string value. 6476 * 6477 * [68] EntityRef ::= '&' Name ';' 6478 * 6479 * [ WFC: Entity Declared ] 6480 * In a document without any DTD, a document with only an internal DTD 6481 * subset which contains no parameter entity references, or a document 6482 * with "standalone='yes'", the Name given in the entity reference 6483 * must match that in an entity declaration, except that well-formed 6484 * documents need not declare any of the following entities: amp, lt, 6485 * gt, apos, quot. The declaration of a parameter entity must precede 6486 * any reference to it. Similarly, the declaration of a general entity 6487 * must precede any reference to it which appears in a default value in an 6488 * attribute-list declaration. Note that if entities are declared in the 6489 * external subset or in external parameter entities, a non-validating 6490 * processor is not obligated to read and process their declarations; 6491 * for such documents, the rule that an entity must be declared is a 6492 * well-formedness constraint only if standalone='yes'. 6493 * 6494 * [ WFC: Parsed Entity ] 6495 * An entity reference must not contain the name of an unparsed entity 6496 * 6497 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer 6498 * is updated to the current location in the string. 6499 */ 6500xmlEntityPtr 6501xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { 6502 xmlChar *name; 6503 const xmlChar *ptr; 6504 xmlChar cur; 6505 xmlEntityPtr ent = NULL; 6506 6507 if ((str == NULL) || (*str == NULL)) 6508 return(NULL); 6509 ptr = *str; 6510 cur = *ptr; 6511 if (cur == '&') { 6512 ptr++; 6513 cur = *ptr; 6514 name = xmlParseStringName(ctxt, &ptr); 6515 if (name == NULL) { 6516 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6517 "xmlParseStringEntityRef: no name\n"); 6518 } else { 6519 if (*ptr == ';') { 6520 ptr++; 6521 /* 6522 * Ask first SAX for entity resolution, otherwise try the 6523 * predefined set. 6524 */ 6525 if (ctxt->sax != NULL) { 6526 if (ctxt->sax->getEntity != NULL) 6527 ent = ctxt->sax->getEntity(ctxt->userData, name); 6528 if (ent == NULL) 6529 ent = xmlGetPredefinedEntity(name); 6530 if ((ent == NULL) && (ctxt->userData==ctxt)) { 6531 ent = xmlSAX2GetEntity(ctxt, name); 6532 } 6533 } 6534 /* 6535 * [ WFC: Entity Declared ] 6536 * In a document without any DTD, a document with only an 6537 * internal DTD subset which contains no parameter entity 6538 * references, or a document with "standalone='yes'", the 6539 * Name given in the entity reference must match that in an 6540 * entity declaration, except that well-formed documents 6541 * need not declare any of the following entities: amp, lt, 6542 * gt, apos, quot. 6543 * The declaration of a parameter entity must precede any 6544 * reference to it. 6545 * Similarly, the declaration of a general entity must 6546 * precede any reference to it which appears in a default 6547 * value in an attribute-list declaration. Note that if 6548 * entities are declared in the external subset or in 6549 * external parameter entities, a non-validating processor 6550 * is not obligated to read and process their declarations; 6551 * for such documents, the rule that an entity must be 6552 * declared is a well-formedness constraint only if 6553 * standalone='yes'. 6554 */ 6555 if (ent == NULL) { 6556 if ((ctxt->standalone == 1) || 6557 ((ctxt->hasExternalSubset == 0) && 6558 (ctxt->hasPErefs == 0))) { 6559 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 6560 "Entity '%s' not defined\n", name); 6561 } else { 6562 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY, 6563 "Entity '%s' not defined\n", 6564 name); 6565 } 6566 /* TODO ? check regressions ctxt->valid = 0; */ 6567 } 6568 6569 /* 6570 * [ WFC: Parsed Entity ] 6571 * An entity reference must not contain the name of an 6572 * unparsed entity 6573 */ 6574 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { 6575 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY, 6576 "Entity reference to unparsed entity %s\n", name); 6577 } 6578 6579 /* 6580 * [ WFC: No External Entity References ] 6581 * Attribute values cannot contain direct or indirect 6582 * entity references to external entities. 6583 */ 6584 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 6585 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { 6586 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL, 6587 "Attribute references external entity '%s'\n", name); 6588 } 6589 /* 6590 * [ WFC: No < in Attribute Values ] 6591 * The replacement text of any entity referred to directly or 6592 * indirectly in an attribute value (other than "<") must 6593 * not contain a <. 6594 */ 6595 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) && 6596 (ent != NULL) && 6597 (!xmlStrEqual(ent->name, BAD_CAST "lt")) && 6598 (ent->content != NULL) && 6599 (xmlStrchr(ent->content, '<'))) { 6600 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, 6601 "'<' in entity '%s' is not allowed in attributes values\n", 6602 name); 6603 } 6604 6605 /* 6606 * Internal check, no parameter entities here ... 6607 */ 6608 else { 6609 switch (ent->etype) { 6610 case XML_INTERNAL_PARAMETER_ENTITY: 6611 case XML_EXTERNAL_PARAMETER_ENTITY: 6612 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, 6613 "Attempt to reference the parameter entity '%s'\n", 6614 name); 6615 break; 6616 default: 6617 break; 6618 } 6619 } 6620 6621 /* 6622 * [ WFC: No Recursion ] 6623 * A parsed entity must not contain a recursive reference 6624 * to itself, either directly or indirectly. 6625 * Done somewhere else 6626 */ 6627 6628 } else { 6629 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 6630 } 6631 xmlFree(name); 6632 } 6633 } 6634 *str = ptr; 6635 return(ent); 6636} 6637 6638/** 6639 * xmlParsePEReference: 6640 * @ctxt: an XML parser context 6641 * 6642 * parse PEReference declarations 6643 * The entity content is handled directly by pushing it's content as 6644 * a new input stream. 6645 * 6646 * [69] PEReference ::= '%' Name ';' 6647 * 6648 * [ WFC: No Recursion ] 6649 * A parsed entity must not contain a recursive 6650 * reference to itself, either directly or indirectly. 6651 * 6652 * [ WFC: Entity Declared ] 6653 * In a document without any DTD, a document with only an internal DTD 6654 * subset which contains no parameter entity references, or a document 6655 * with "standalone='yes'", ... ... The declaration of a parameter 6656 * entity must precede any reference to it... 6657 * 6658 * [ VC: Entity Declared ] 6659 * In a document with an external subset or external parameter entities 6660 * with "standalone='no'", ... ... The declaration of a parameter entity 6661 * must precede any reference to it... 6662 * 6663 * [ WFC: In DTD ] 6664 * Parameter-entity references may only appear in the DTD. 6665 * NOTE: misleading but this is handled. 6666 */ 6667void 6668xmlParsePEReference(xmlParserCtxtPtr ctxt) 6669{ 6670 const xmlChar *name; 6671 xmlEntityPtr entity = NULL; 6672 xmlParserInputPtr input; 6673 6674 if (RAW == '%') { 6675 NEXT; 6676 name = xmlParseName(ctxt); 6677 if (name == NULL) { 6678 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6679 "xmlParsePEReference: no name\n"); 6680 } else { 6681 if (RAW == ';') { 6682 NEXT; 6683 if ((ctxt->sax != NULL) && 6684 (ctxt->sax->getParameterEntity != NULL)) 6685 entity = ctxt->sax->getParameterEntity(ctxt->userData, 6686 name); 6687 if (entity == NULL) { 6688 /* 6689 * [ WFC: Entity Declared ] 6690 * In a document without any DTD, a document with only an 6691 * internal DTD subset which contains no parameter entity 6692 * references, or a document with "standalone='yes'", ... 6693 * ... The declaration of a parameter entity must precede 6694 * any reference to it... 6695 */ 6696 if ((ctxt->standalone == 1) || 6697 ((ctxt->hasExternalSubset == 0) && 6698 (ctxt->hasPErefs == 0))) { 6699 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 6700 "PEReference: %%%s; not found\n", 6701 name); 6702 } else { 6703 /* 6704 * [ VC: Entity Declared ] 6705 * In a document with an external subset or external 6706 * parameter entities with "standalone='no'", ... 6707 * ... The declaration of a parameter entity must 6708 * precede any reference to it... 6709 */ 6710 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 6711 "PEReference: %%%s; not found\n", 6712 name, NULL); 6713 ctxt->valid = 0; 6714 } 6715 } else { 6716 /* 6717 * Internal checking in case the entity quest barfed 6718 */ 6719 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 6720 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 6721 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 6722 "Internal: %%%s; is not a parameter entity\n", 6723 name, NULL); 6724 } else if (ctxt->input->free != deallocblankswrapper) { 6725 input = 6726 xmlNewBlanksWrapperInputStream(ctxt, entity); 6727 xmlPushInput(ctxt, input); 6728 } else { 6729 /* 6730 * TODO !!! 6731 * handle the extra spaces added before and after 6732 * c.f. http://www.w3.org/TR/REC-xml#as-PE 6733 */ 6734 input = xmlNewEntityInputStream(ctxt, entity); 6735 xmlPushInput(ctxt, input); 6736 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && 6737 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && 6738 (IS_BLANK_CH(NXT(5)))) { 6739 xmlParseTextDecl(ctxt); 6740 if (ctxt->errNo == 6741 XML_ERR_UNSUPPORTED_ENCODING) { 6742 /* 6743 * The XML REC instructs us to stop parsing 6744 * right here 6745 */ 6746 ctxt->instate = XML_PARSER_EOF; 6747 return; 6748 } 6749 } 6750 } 6751 } 6752 ctxt->hasPErefs = 1; 6753 } else { 6754 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 6755 } 6756 } 6757 } 6758} 6759 6760/** 6761 * xmlParseStringPEReference: 6762 * @ctxt: an XML parser context 6763 * @str: a pointer to an index in the string 6764 * 6765 * parse PEReference declarations 6766 * 6767 * [69] PEReference ::= '%' Name ';' 6768 * 6769 * [ WFC: No Recursion ] 6770 * A parsed entity must not contain a recursive 6771 * reference to itself, either directly or indirectly. 6772 * 6773 * [ WFC: Entity Declared ] 6774 * In a document without any DTD, a document with only an internal DTD 6775 * subset which contains no parameter entity references, or a document 6776 * with "standalone='yes'", ... ... The declaration of a parameter 6777 * entity must precede any reference to it... 6778 * 6779 * [ VC: Entity Declared ] 6780 * In a document with an external subset or external parameter entities 6781 * with "standalone='no'", ... ... The declaration of a parameter entity 6782 * must precede any reference to it... 6783 * 6784 * [ WFC: In DTD ] 6785 * Parameter-entity references may only appear in the DTD. 6786 * NOTE: misleading but this is handled. 6787 * 6788 * Returns the string of the entity content. 6789 * str is updated to the current value of the index 6790 */ 6791xmlEntityPtr 6792xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) { 6793 const xmlChar *ptr; 6794 xmlChar cur; 6795 xmlChar *name; 6796 xmlEntityPtr entity = NULL; 6797 6798 if ((str == NULL) || (*str == NULL)) return(NULL); 6799 ptr = *str; 6800 cur = *ptr; 6801 if (cur == '%') { 6802 ptr++; 6803 cur = *ptr; 6804 name = xmlParseStringName(ctxt, &ptr); 6805 if (name == NULL) { 6806 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6807 "xmlParseStringPEReference: no name\n"); 6808 } else { 6809 cur = *ptr; 6810 if (cur == ';') { 6811 ptr++; 6812 cur = *ptr; 6813 if ((ctxt->sax != NULL) && 6814 (ctxt->sax->getParameterEntity != NULL)) 6815 entity = ctxt->sax->getParameterEntity(ctxt->userData, 6816 name); 6817 if (entity == NULL) { 6818 /* 6819 * [ WFC: Entity Declared ] 6820 * In a document without any DTD, a document with only an 6821 * internal DTD subset which contains no parameter entity 6822 * references, or a document with "standalone='yes'", ... 6823 * ... The declaration of a parameter entity must precede 6824 * any reference to it... 6825 */ 6826 if ((ctxt->standalone == 1) || 6827 ((ctxt->hasExternalSubset == 0) && 6828 (ctxt->hasPErefs == 0))) { 6829 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, 6830 "PEReference: %%%s; not found\n", name); 6831 } else { 6832 /* 6833 * [ VC: Entity Declared ] 6834 * In a document with an external subset or external 6835 * parameter entities with "standalone='no'", ... 6836 * ... The declaration of a parameter entity must 6837 * precede any reference to it... 6838 */ 6839 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 6840 "PEReference: %%%s; not found\n", 6841 name, NULL); 6842 ctxt->valid = 0; 6843 } 6844 } else { 6845 /* 6846 * Internal checking in case the entity quest barfed 6847 */ 6848 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && 6849 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { 6850 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, 6851 "%%%s; is not a parameter entity\n", 6852 name, NULL); 6853 } 6854 } 6855 ctxt->hasPErefs = 1; 6856 } else { 6857 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); 6858 } 6859 xmlFree(name); 6860 } 6861 } 6862 *str = ptr; 6863 return(entity); 6864} 6865 6866/** 6867 * xmlParseDocTypeDecl: 6868 * @ctxt: an XML parser context 6869 * 6870 * parse a DOCTYPE declaration 6871 * 6872 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 6873 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 6874 * 6875 * [ VC: Root Element Type ] 6876 * The Name in the document type declaration must match the element 6877 * type of the root element. 6878 */ 6879 6880void 6881xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { 6882 const xmlChar *name = NULL; 6883 xmlChar *ExternalID = NULL; 6884 xmlChar *URI = NULL; 6885 6886 /* 6887 * We know that '<!DOCTYPE' has been detected. 6888 */ 6889 SKIP(9); 6890 6891 SKIP_BLANKS; 6892 6893 /* 6894 * Parse the DOCTYPE name. 6895 */ 6896 name = xmlParseName(ctxt); 6897 if (name == NULL) { 6898 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 6899 "xmlParseDocTypeDecl : no DOCTYPE name !\n"); 6900 } 6901 ctxt->intSubName = name; 6902 6903 SKIP_BLANKS; 6904 6905 /* 6906 * Check for SystemID and ExternalID 6907 */ 6908 URI = xmlParseExternalID(ctxt, &ExternalID, 1); 6909 6910 if ((URI != NULL) || (ExternalID != NULL)) { 6911 ctxt->hasExternalSubset = 1; 6912 } 6913 ctxt->extSubURI = URI; 6914 ctxt->extSubSystem = ExternalID; 6915 6916 SKIP_BLANKS; 6917 6918 /* 6919 * Create and update the internal subset. 6920 */ 6921 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && 6922 (!ctxt->disableSAX)) 6923 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); 6924 6925 /* 6926 * Is there any internal subset declarations ? 6927 * they are handled separately in xmlParseInternalSubset() 6928 */ 6929 if (RAW == '[') 6930 return; 6931 6932 /* 6933 * We should be at the end of the DOCTYPE declaration. 6934 */ 6935 if (RAW != '>') { 6936 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 6937 } 6938 NEXT; 6939} 6940 6941/** 6942 * xmlParseInternalSubset: 6943 * @ctxt: an XML parser context 6944 * 6945 * parse the internal subset declaration 6946 * 6947 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 6948 */ 6949 6950static void 6951xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { 6952 /* 6953 * Is there any DTD definition ? 6954 */ 6955 if (RAW == '[') { 6956 ctxt->instate = XML_PARSER_DTD; 6957 NEXT; 6958 /* 6959 * Parse the succession of Markup declarations and 6960 * PEReferences. 6961 * Subsequence (markupdecl | PEReference | S)* 6962 */ 6963 while (RAW != ']') { 6964 const xmlChar *check = CUR_PTR; 6965 unsigned int cons = ctxt->input->consumed; 6966 6967 SKIP_BLANKS; 6968 xmlParseMarkupDecl(ctxt); 6969 xmlParsePEReference(ctxt); 6970 6971 /* 6972 * Pop-up of finished entities. 6973 */ 6974 while ((RAW == 0) && (ctxt->inputNr > 1)) 6975 xmlPopInput(ctxt); 6976 6977 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { 6978 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 6979 "xmlParseInternalSubset: error detected in Markup declaration\n"); 6980 break; 6981 } 6982 } 6983 if (RAW == ']') { 6984 NEXT; 6985 SKIP_BLANKS; 6986 } 6987 } 6988 6989 /* 6990 * We should be at the end of the DOCTYPE declaration. 6991 */ 6992 if (RAW != '>') { 6993 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL); 6994 } 6995 NEXT; 6996} 6997 6998#ifdef LIBXML_SAX1_ENABLED 6999/** 7000 * xmlParseAttribute: 7001 * @ctxt: an XML parser context 7002 * @value: a xmlChar ** used to store the value of the attribute 7003 * 7004 * parse an attribute 7005 * 7006 * [41] Attribute ::= Name Eq AttValue 7007 * 7008 * [ WFC: No External Entity References ] 7009 * Attribute values cannot contain direct or indirect entity references 7010 * to external entities. 7011 * 7012 * [ WFC: No < in Attribute Values ] 7013 * The replacement text of any entity referred to directly or indirectly in 7014 * an attribute value (other than "<") must not contain a <. 7015 * 7016 * [ VC: Attribute Value Type ] 7017 * The attribute must have been declared; the value must be of the type 7018 * declared for it. 7019 * 7020 * [25] Eq ::= S? '=' S? 7021 * 7022 * With namespace: 7023 * 7024 * [NS 11] Attribute ::= QName Eq AttValue 7025 * 7026 * Also the case QName == xmlns:??? is handled independently as a namespace 7027 * definition. 7028 * 7029 * Returns the attribute name, and the value in *value. 7030 */ 7031 7032const xmlChar * 7033xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { 7034 const xmlChar *name; 7035 xmlChar *val; 7036 7037 *value = NULL; 7038 GROW; 7039 name = xmlParseName(ctxt); 7040 if (name == NULL) { 7041 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7042 "error parsing attribute name\n"); 7043 return(NULL); 7044 } 7045 7046 /* 7047 * read the value 7048 */ 7049 SKIP_BLANKS; 7050 if (RAW == '=') { 7051 NEXT; 7052 SKIP_BLANKS; 7053 val = xmlParseAttValue(ctxt); 7054 ctxt->instate = XML_PARSER_CONTENT; 7055 } else { 7056 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 7057 "Specification mandate value for attribute %s\n", name); 7058 return(NULL); 7059 } 7060 7061 /* 7062 * Check that xml:lang conforms to the specification 7063 * No more registered as an error, just generate a warning now 7064 * since this was deprecated in XML second edition 7065 */ 7066 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) { 7067 if (!xmlCheckLanguageID(val)) { 7068 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 7069 "Malformed value for xml:lang : %s\n", 7070 val, NULL); 7071 } 7072 } 7073 7074 /* 7075 * Check that xml:space conforms to the specification 7076 */ 7077 if (xmlStrEqual(name, BAD_CAST "xml:space")) { 7078 if (xmlStrEqual(val, BAD_CAST "default")) 7079 *(ctxt->space) = 0; 7080 else if (xmlStrEqual(val, BAD_CAST "preserve")) 7081 *(ctxt->space) = 1; 7082 else { 7083 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 7084"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 7085 val, NULL); 7086 } 7087 } 7088 7089 *value = val; 7090 return(name); 7091} 7092 7093/** 7094 * xmlParseStartTag: 7095 * @ctxt: an XML parser context 7096 * 7097 * parse a start of tag either for rule element or 7098 * EmptyElement. In both case we don't parse the tag closing chars. 7099 * 7100 * [40] STag ::= '<' Name (S Attribute)* S? '>' 7101 * 7102 * [ WFC: Unique Att Spec ] 7103 * No attribute name may appear more than once in the same start-tag or 7104 * empty-element tag. 7105 * 7106 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 7107 * 7108 * [ WFC: Unique Att Spec ] 7109 * No attribute name may appear more than once in the same start-tag or 7110 * empty-element tag. 7111 * 7112 * With namespace: 7113 * 7114 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 7115 * 7116 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 7117 * 7118 * Returns the element name parsed 7119 */ 7120 7121const xmlChar * 7122xmlParseStartTag(xmlParserCtxtPtr ctxt) { 7123 const xmlChar *name; 7124 const xmlChar *attname; 7125 xmlChar *attvalue; 7126 const xmlChar **atts = ctxt->atts; 7127 int nbatts = 0; 7128 int maxatts = ctxt->maxatts; 7129 int i; 7130 7131 if (RAW != '<') return(NULL); 7132 NEXT1; 7133 7134 name = xmlParseName(ctxt); 7135 if (name == NULL) { 7136 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7137 "xmlParseStartTag: invalid element name\n"); 7138 return(NULL); 7139 } 7140 7141 /* 7142 * Now parse the attributes, it ends up with the ending 7143 * 7144 * (S Attribute)* S? 7145 */ 7146 SKIP_BLANKS; 7147 GROW; 7148 7149 while ((RAW != '>') && 7150 ((RAW != '/') || (NXT(1) != '>')) && 7151 (IS_BYTE_CHAR(RAW))) { 7152 const xmlChar *q = CUR_PTR; 7153 unsigned int cons = ctxt->input->consumed; 7154 7155 attname = xmlParseAttribute(ctxt, &attvalue); 7156 if ((attname != NULL) && (attvalue != NULL)) { 7157 /* 7158 * [ WFC: Unique Att Spec ] 7159 * No attribute name may appear more than once in the same 7160 * start-tag or empty-element tag. 7161 */ 7162 for (i = 0; i < nbatts;i += 2) { 7163 if (xmlStrEqual(atts[i], attname)) { 7164 xmlErrAttributeDup(ctxt, NULL, attname); 7165 xmlFree(attvalue); 7166 goto failed; 7167 } 7168 } 7169 /* 7170 * Add the pair to atts 7171 */ 7172 if (atts == NULL) { 7173 maxatts = 22; /* allow for 10 attrs by default */ 7174 atts = (const xmlChar **) 7175 xmlMalloc(maxatts * sizeof(xmlChar *)); 7176 if (atts == NULL) { 7177 xmlErrMemory(ctxt, NULL); 7178 if (attvalue != NULL) 7179 xmlFree(attvalue); 7180 goto failed; 7181 } 7182 ctxt->atts = atts; 7183 ctxt->maxatts = maxatts; 7184 } else if (nbatts + 4 > maxatts) { 7185 const xmlChar **n; 7186 7187 maxatts *= 2; 7188 n = (const xmlChar **) xmlRealloc((void *) atts, 7189 maxatts * sizeof(const xmlChar *)); 7190 if (n == NULL) { 7191 xmlErrMemory(ctxt, NULL); 7192 if (attvalue != NULL) 7193 xmlFree(attvalue); 7194 goto failed; 7195 } 7196 atts = n; 7197 ctxt->atts = atts; 7198 ctxt->maxatts = maxatts; 7199 } 7200 atts[nbatts++] = attname; 7201 atts[nbatts++] = attvalue; 7202 atts[nbatts] = NULL; 7203 atts[nbatts + 1] = NULL; 7204 } else { 7205 if (attvalue != NULL) 7206 xmlFree(attvalue); 7207 } 7208 7209failed: 7210 7211 GROW 7212 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 7213 break; 7214 if (!IS_BLANK_CH(RAW)) { 7215 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 7216 "attributes construct error\n"); 7217 } 7218 SKIP_BLANKS; 7219 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 7220 (attname == NULL) && (attvalue == NULL)) { 7221 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, 7222 "xmlParseStartTag: problem parsing attributes\n"); 7223 break; 7224 } 7225 SHRINK; 7226 GROW; 7227 } 7228 7229 /* 7230 * SAX: Start of Element ! 7231 */ 7232 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) && 7233 (!ctxt->disableSAX)) { 7234 if (nbatts > 0) 7235 ctxt->sax->startElement(ctxt->userData, name, atts); 7236 else 7237 ctxt->sax->startElement(ctxt->userData, name, NULL); 7238 } 7239 7240 if (atts != NULL) { 7241 /* Free only the content strings */ 7242 for (i = 1;i < nbatts;i+=2) 7243 if (atts[i] != NULL) 7244 xmlFree((xmlChar *) atts[i]); 7245 } 7246 return(name); 7247} 7248 7249/** 7250 * xmlParseEndTag1: 7251 * @ctxt: an XML parser context 7252 * @line: line of the start tag 7253 * @nsNr: number of namespaces on the start tag 7254 * 7255 * parse an end of tag 7256 * 7257 * [42] ETag ::= '</' Name S? '>' 7258 * 7259 * With namespace 7260 * 7261 * [NS 9] ETag ::= '</' QName S? '>' 7262 */ 7263 7264static void 7265xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) { 7266 const xmlChar *name; 7267 7268 GROW; 7269 if ((RAW != '<') || (NXT(1) != '/')) { 7270 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED, 7271 "xmlParseEndTag: '</' not found\n"); 7272 return; 7273 } 7274 SKIP(2); 7275 7276 name = xmlParseNameAndCompare(ctxt,ctxt->name); 7277 7278 /* 7279 * We should definitely be at the ending "S? '>'" part 7280 */ 7281 GROW; 7282 SKIP_BLANKS; 7283 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 7284 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 7285 } else 7286 NEXT1; 7287 7288 /* 7289 * [ WFC: Element Type Match ] 7290 * The Name in an element's end-tag must match the element type in the 7291 * start-tag. 7292 * 7293 */ 7294 if (name != (xmlChar*)1) { 7295 if (name == NULL) name = BAD_CAST "unparseable"; 7296 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 7297 "Opening and ending tag mismatch: %s line %d and %s\n", 7298 ctxt->name, line, name); 7299 } 7300 7301 /* 7302 * SAX: End of Tag 7303 */ 7304 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 7305 (!ctxt->disableSAX)) 7306 ctxt->sax->endElement(ctxt->userData, ctxt->name); 7307 7308 namePop(ctxt); 7309 spacePop(ctxt); 7310 return; 7311} 7312 7313/** 7314 * xmlParseEndTag: 7315 * @ctxt: an XML parser context 7316 * 7317 * parse an end of tag 7318 * 7319 * [42] ETag ::= '</' Name S? '>' 7320 * 7321 * With namespace 7322 * 7323 * [NS 9] ETag ::= '</' QName S? '>' 7324 */ 7325 7326void 7327xmlParseEndTag(xmlParserCtxtPtr ctxt) { 7328 xmlParseEndTag1(ctxt, 0); 7329} 7330#endif /* LIBXML_SAX1_ENABLED */ 7331 7332/************************************************************************ 7333 * * 7334 * SAX 2 specific operations * 7335 * * 7336 ************************************************************************/ 7337 7338static const xmlChar * 7339xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { 7340 int len = 0, l; 7341 int c; 7342 int count = 0; 7343 7344 /* 7345 * Handler for more complex cases 7346 */ 7347 GROW; 7348 c = CUR_CHAR(l); 7349 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 7350 (!IS_LETTER(c) && (c != '_'))) { 7351 return(NULL); 7352 } 7353 7354 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 7355 ((IS_LETTER(c)) || (IS_DIGIT(c)) || 7356 (c == '.') || (c == '-') || (c == '_') || 7357 (IS_COMBINING(c)) || 7358 (IS_EXTENDER(c)))) { 7359 if (count++ > 100) { 7360 count = 0; 7361 GROW; 7362 } 7363 len += l; 7364 NEXTL(l); 7365 c = CUR_CHAR(l); 7366 } 7367 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 7368} 7369 7370/* 7371 * xmlGetNamespace: 7372 * @ctxt: an XML parser context 7373 * @prefix: the prefix to lookup 7374 * 7375 * Lookup the namespace name for the @prefix (which ca be NULL) 7376 * The prefix must come from the @ctxt->dict dictionnary 7377 * 7378 * Returns the namespace name or NULL if not bound 7379 */ 7380static const xmlChar * 7381xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) { 7382 int i; 7383 7384 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns); 7385 for (i = ctxt->nsNr - 2;i >= 0;i-=2) 7386 if (ctxt->nsTab[i] == prefix) { 7387 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0)) 7388 return(NULL); 7389 return(ctxt->nsTab[i + 1]); 7390 } 7391 return(NULL); 7392} 7393 7394/** 7395 * xmlParseNCName: 7396 * @ctxt: an XML parser context 7397 * @len: lenght of the string parsed 7398 * 7399 * parse an XML name. 7400 * 7401 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | 7402 * CombiningChar | Extender 7403 * 7404 * [5NS] NCName ::= (Letter | '_') (NCNameChar)* 7405 * 7406 * Returns the Name parsed or NULL 7407 */ 7408 7409static const xmlChar * 7410xmlParseNCName(xmlParserCtxtPtr ctxt) { 7411 const xmlChar *in; 7412 const xmlChar *ret; 7413 int count = 0; 7414 7415 /* 7416 * Accelerator for simple ASCII names 7417 */ 7418 in = ctxt->input->cur; 7419 if (((*in >= 0x61) && (*in <= 0x7A)) || 7420 ((*in >= 0x41) && (*in <= 0x5A)) || 7421 (*in == '_')) { 7422 in++; 7423 while (((*in >= 0x61) && (*in <= 0x7A)) || 7424 ((*in >= 0x41) && (*in <= 0x5A)) || 7425 ((*in >= 0x30) && (*in <= 0x39)) || 7426 (*in == '_') || (*in == '-') || 7427 (*in == '.')) 7428 in++; 7429 if ((*in > 0) && (*in < 0x80)) { 7430 count = in - ctxt->input->cur; 7431 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 7432 ctxt->input->cur = in; 7433 ctxt->nbChars += count; 7434 ctxt->input->col += count; 7435 if (ret == NULL) { 7436 xmlErrMemory(ctxt, NULL); 7437 } 7438 return(ret); 7439 } 7440 } 7441 return(xmlParseNCNameComplex(ctxt)); 7442} 7443 7444/** 7445 * xmlParseQName: 7446 * @ctxt: an XML parser context 7447 * @prefix: pointer to store the prefix part 7448 * 7449 * parse an XML Namespace QName 7450 * 7451 * [6] QName ::= (Prefix ':')? LocalPart 7452 * [7] Prefix ::= NCName 7453 * [8] LocalPart ::= NCName 7454 * 7455 * Returns the Name parsed or NULL 7456 */ 7457 7458static const xmlChar * 7459xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) { 7460 const xmlChar *l, *p; 7461 7462 GROW; 7463 7464 l = xmlParseNCName(ctxt); 7465 if (l == NULL) { 7466 if (CUR == ':') { 7467 l = xmlParseName(ctxt); 7468 if (l != NULL) { 7469 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 7470 "Failed to parse QName '%s'\n", l, NULL, NULL); 7471 *prefix = NULL; 7472 return(l); 7473 } 7474 } 7475 return(NULL); 7476 } 7477 if (CUR == ':') { 7478 NEXT; 7479 p = l; 7480 l = xmlParseNCName(ctxt); 7481 if (l == NULL) { 7482 xmlChar *tmp; 7483 7484 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 7485 "Failed to parse QName '%s:'\n", p, NULL, NULL); 7486 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0); 7487 p = xmlDictLookup(ctxt->dict, tmp, -1); 7488 if (tmp != NULL) xmlFree(tmp); 7489 *prefix = NULL; 7490 return(p); 7491 } 7492 if (CUR == ':') { 7493 xmlChar *tmp; 7494 7495 xmlNsErr(ctxt, XML_NS_ERR_QNAME, 7496 "Failed to parse QName '%s:%s:'\n", p, l, NULL); 7497 NEXT; 7498 tmp = (xmlChar *) xmlParseName(ctxt); 7499 if (tmp != NULL) { 7500 tmp = xmlBuildQName(tmp, l, NULL, 0); 7501 l = xmlDictLookup(ctxt->dict, tmp, -1); 7502 if (tmp != NULL) xmlFree(tmp); 7503 *prefix = p; 7504 return(l); 7505 } 7506 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0); 7507 l = xmlDictLookup(ctxt->dict, tmp, -1); 7508 if (tmp != NULL) xmlFree(tmp); 7509 *prefix = p; 7510 return(l); 7511 } 7512 *prefix = p; 7513 } else 7514 *prefix = NULL; 7515 return(l); 7516} 7517 7518/** 7519 * xmlParseQNameAndCompare: 7520 * @ctxt: an XML parser context 7521 * @name: the localname 7522 * @prefix: the prefix, if any. 7523 * 7524 * parse an XML name and compares for match 7525 * (specialized for endtag parsing) 7526 * 7527 * Returns NULL for an illegal name, (xmlChar*) 1 for success 7528 * and the name for mismatch 7529 */ 7530 7531static const xmlChar * 7532xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name, 7533 xmlChar const *prefix) { 7534 const xmlChar *cmp = name; 7535 const xmlChar *in; 7536 const xmlChar *ret; 7537 const xmlChar *prefix2; 7538 7539 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name)); 7540 7541 GROW; 7542 in = ctxt->input->cur; 7543 7544 cmp = prefix; 7545 while (*in != 0 && *in == *cmp) { 7546 ++in; 7547 ++cmp; 7548 } 7549 if ((*cmp == 0) && (*in == ':')) { 7550 in++; 7551 cmp = name; 7552 while (*in != 0 && *in == *cmp) { 7553 ++in; 7554 ++cmp; 7555 } 7556 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { 7557 /* success */ 7558 ctxt->input->cur = in; 7559 return((const xmlChar*) 1); 7560 } 7561 } 7562 /* 7563 * all strings coms from the dictionary, equality can be done directly 7564 */ 7565 ret = xmlParseQName (ctxt, &prefix2); 7566 if ((ret == name) && (prefix == prefix2)) 7567 return((const xmlChar*) 1); 7568 return ret; 7569} 7570 7571/** 7572 * xmlParseAttValueInternal: 7573 * @ctxt: an XML parser context 7574 * @len: attribute len result 7575 * @alloc: whether the attribute was reallocated as a new string 7576 * @normalize: if 1 then further non-CDATA normalization must be done 7577 * 7578 * parse a value for an attribute. 7579 * NOTE: if no normalization is needed, the routine will return pointers 7580 * directly from the data buffer. 7581 * 7582 * 3.3.3 Attribute-Value Normalization: 7583 * Before the value of an attribute is passed to the application or 7584 * checked for validity, the XML processor must normalize it as follows: 7585 * - a character reference is processed by appending the referenced 7586 * character to the attribute value 7587 * - an entity reference is processed by recursively processing the 7588 * replacement text of the entity 7589 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by 7590 * appending #x20 to the normalized value, except that only a single 7591 * #x20 is appended for a "#xD#xA" sequence that is part of an external 7592 * parsed entity or the literal entity value of an internal parsed entity 7593 * - other characters are processed by appending them to the normalized value 7594 * If the declared value is not CDATA, then the XML processor must further 7595 * process the normalized attribute value by discarding any leading and 7596 * trailing space (#x20) characters, and by replacing sequences of space 7597 * (#x20) characters by a single space (#x20) character. 7598 * All attributes for which no declaration has been read should be treated 7599 * by a non-validating parser as if declared CDATA. 7600 * 7601 * Returns the AttValue parsed or NULL. The value has to be freed by the 7602 * caller if it was copied, this can be detected by val[*len] == 0. 7603 */ 7604 7605static xmlChar * 7606xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, 7607 int normalize) 7608{ 7609 xmlChar limit = 0; 7610 const xmlChar *in = NULL, *start, *end, *last; 7611 xmlChar *ret = NULL; 7612 7613 GROW; 7614 in = (xmlChar *) CUR_PTR; 7615 if (*in != '"' && *in != '\'') { 7616 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); 7617 return (NULL); 7618 } 7619 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; 7620 7621 /* 7622 * try to handle in this routine the most common case where no 7623 * allocation of a new string is required and where content is 7624 * pure ASCII. 7625 */ 7626 limit = *in++; 7627 end = ctxt->input->end; 7628 start = in; 7629 if (in >= end) { 7630 const xmlChar *oldbase = ctxt->input->base; 7631 GROW; 7632 if (oldbase != ctxt->input->base) { 7633 long delta = ctxt->input->base - oldbase; 7634 start = start + delta; 7635 in = in + delta; 7636 } 7637 end = ctxt->input->end; 7638 } 7639 if (normalize) { 7640 /* 7641 * Skip any leading spaces 7642 */ 7643 while ((in < end) && (*in != limit) && 7644 ((*in == 0x20) || (*in == 0x9) || 7645 (*in == 0xA) || (*in == 0xD))) { 7646 in++; 7647 start = in; 7648 if (in >= end) { 7649 const xmlChar *oldbase = ctxt->input->base; 7650 GROW; 7651 if (oldbase != ctxt->input->base) { 7652 long delta = ctxt->input->base - oldbase; 7653 start = start + delta; 7654 in = in + delta; 7655 } 7656 end = ctxt->input->end; 7657 } 7658 } 7659 while ((in < end) && (*in != limit) && (*in >= 0x20) && 7660 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 7661 if ((*in++ == 0x20) && (*in == 0x20)) break; 7662 if (in >= end) { 7663 const xmlChar *oldbase = ctxt->input->base; 7664 GROW; 7665 if (oldbase != ctxt->input->base) { 7666 long delta = ctxt->input->base - oldbase; 7667 start = start + delta; 7668 in = in + delta; 7669 } 7670 end = ctxt->input->end; 7671 } 7672 } 7673 last = in; 7674 /* 7675 * skip the trailing blanks 7676 */ 7677 while ((last[-1] == 0x20) && (last > start)) last--; 7678 while ((in < end) && (*in != limit) && 7679 ((*in == 0x20) || (*in == 0x9) || 7680 (*in == 0xA) || (*in == 0xD))) { 7681 in++; 7682 if (in >= end) { 7683 const xmlChar *oldbase = ctxt->input->base; 7684 GROW; 7685 if (oldbase != ctxt->input->base) { 7686 long delta = ctxt->input->base - oldbase; 7687 start = start + delta; 7688 in = in + delta; 7689 last = last + delta; 7690 } 7691 end = ctxt->input->end; 7692 } 7693 } 7694 if (*in != limit) goto need_complex; 7695 } else { 7696 while ((in < end) && (*in != limit) && (*in >= 0x20) && 7697 (*in <= 0x7f) && (*in != '&') && (*in != '<')) { 7698 in++; 7699 if (in >= end) { 7700 const xmlChar *oldbase = ctxt->input->base; 7701 GROW; 7702 if (oldbase != ctxt->input->base) { 7703 long delta = ctxt->input->base - oldbase; 7704 start = start + delta; 7705 in = in + delta; 7706 } 7707 end = ctxt->input->end; 7708 } 7709 } 7710 last = in; 7711 if (*in != limit) goto need_complex; 7712 } 7713 in++; 7714 if (len != NULL) { 7715 *len = last - start; 7716 ret = (xmlChar *) start; 7717 } else { 7718 if (alloc) *alloc = 1; 7719 ret = xmlStrndup(start, last - start); 7720 } 7721 CUR_PTR = in; 7722 if (alloc) *alloc = 0; 7723 return ret; 7724need_complex: 7725 if (alloc) *alloc = 1; 7726 return xmlParseAttValueComplex(ctxt, len, normalize); 7727} 7728 7729/** 7730 * xmlParseAttribute2: 7731 * @ctxt: an XML parser context 7732 * @pref: the element prefix 7733 * @elem: the element name 7734 * @prefix: a xmlChar ** used to store the value of the attribute prefix 7735 * @value: a xmlChar ** used to store the value of the attribute 7736 * @len: an int * to save the length of the attribute 7737 * @alloc: an int * to indicate if the attribute was allocated 7738 * 7739 * parse an attribute in the new SAX2 framework. 7740 * 7741 * Returns the attribute name, and the value in *value, . 7742 */ 7743 7744static const xmlChar * 7745xmlParseAttribute2(xmlParserCtxtPtr ctxt, 7746 const xmlChar *pref, const xmlChar *elem, 7747 const xmlChar **prefix, xmlChar **value, 7748 int *len, int *alloc) { 7749 const xmlChar *name; 7750 xmlChar *val, *internal_val = NULL; 7751 int normalize = 0; 7752 7753 *value = NULL; 7754 GROW; 7755 name = xmlParseQName(ctxt, prefix); 7756 if (name == NULL) { 7757 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7758 "error parsing attribute name\n"); 7759 return(NULL); 7760 } 7761 7762 /* 7763 * get the type if needed 7764 */ 7765 if (ctxt->attsSpecial != NULL) { 7766 int type; 7767 7768 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial, 7769 pref, elem, *prefix, name); 7770 if (type != 0) normalize = 1; 7771 } 7772 7773 /* 7774 * read the value 7775 */ 7776 SKIP_BLANKS; 7777 if (RAW == '=') { 7778 NEXT; 7779 SKIP_BLANKS; 7780 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize); 7781 ctxt->instate = XML_PARSER_CONTENT; 7782 } else { 7783 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE, 7784 "Specification mandate value for attribute %s\n", name); 7785 return(NULL); 7786 } 7787 7788 if (*prefix == ctxt->str_xml) { 7789 /* 7790 * Check that xml:lang conforms to the specification 7791 * No more registered as an error, just generate a warning now 7792 * since this was deprecated in XML second edition 7793 */ 7794 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) { 7795 internal_val = xmlStrndup(val, *len); 7796 if (!xmlCheckLanguageID(internal_val)) { 7797 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE, 7798 "Malformed value for xml:lang : %s\n", 7799 internal_val, NULL); 7800 } 7801 } 7802 7803 /* 7804 * Check that xml:space conforms to the specification 7805 */ 7806 if (xmlStrEqual(name, BAD_CAST "space")) { 7807 internal_val = xmlStrndup(val, *len); 7808 if (xmlStrEqual(internal_val, BAD_CAST "default")) 7809 *(ctxt->space) = 0; 7810 else if (xmlStrEqual(internal_val, BAD_CAST "preserve")) 7811 *(ctxt->space) = 1; 7812 else { 7813 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE, 7814"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n", 7815 internal_val, NULL); 7816 } 7817 } 7818 if (internal_val) { 7819 xmlFree(internal_val); 7820 } 7821 } 7822 7823 *value = val; 7824 return(name); 7825} 7826 7827/** 7828 * xmlParseStartTag2: 7829 * @ctxt: an XML parser context 7830 * 7831 * parse a start of tag either for rule element or 7832 * EmptyElement. In both case we don't parse the tag closing chars. 7833 * This routine is called when running SAX2 parsing 7834 * 7835 * [40] STag ::= '<' Name (S Attribute)* S? '>' 7836 * 7837 * [ WFC: Unique Att Spec ] 7838 * No attribute name may appear more than once in the same start-tag or 7839 * empty-element tag. 7840 * 7841 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 7842 * 7843 * [ WFC: Unique Att Spec ] 7844 * No attribute name may appear more than once in the same start-tag or 7845 * empty-element tag. 7846 * 7847 * With namespace: 7848 * 7849 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' 7850 * 7851 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' 7852 * 7853 * Returns the element name parsed 7854 */ 7855 7856static const xmlChar * 7857xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref, 7858 const xmlChar **URI, int *tlen) { 7859 const xmlChar *localname; 7860 const xmlChar *prefix; 7861 const xmlChar *attname; 7862 const xmlChar *aprefix; 7863 const xmlChar *nsname; 7864 xmlChar *attvalue; 7865 const xmlChar **atts = ctxt->atts; 7866 int maxatts = ctxt->maxatts; 7867 int nratts, nbatts, nbdef; 7868 int i, j, nbNs, attval, oldline, oldcol; 7869 const xmlChar *base; 7870 unsigned long cur; 7871 int nsNr = ctxt->nsNr; 7872 7873 if (RAW != '<') return(NULL); 7874 NEXT1; 7875 7876 /* 7877 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that 7878 * point since the attribute values may be stored as pointers to 7879 * the buffer and calling SHRINK would destroy them ! 7880 * The Shrinking is only possible once the full set of attribute 7881 * callbacks have been done. 7882 */ 7883reparse: 7884 SHRINK; 7885 base = ctxt->input->base; 7886 cur = ctxt->input->cur - ctxt->input->base; 7887 oldline = ctxt->input->line; 7888 oldcol = ctxt->input->col; 7889 nbatts = 0; 7890 nratts = 0; 7891 nbdef = 0; 7892 nbNs = 0; 7893 attval = 0; 7894 /* Forget any namespaces added during an earlier parse of this element. */ 7895 ctxt->nsNr = nsNr; 7896 7897 localname = xmlParseQName(ctxt, &prefix); 7898 if (localname == NULL) { 7899 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, 7900 "StartTag: invalid element name\n"); 7901 return(NULL); 7902 } 7903 *tlen = ctxt->input->cur - ctxt->input->base - cur; 7904 7905 /* 7906 * Now parse the attributes, it ends up with the ending 7907 * 7908 * (S Attribute)* S? 7909 */ 7910 SKIP_BLANKS; 7911 GROW; 7912 if (ctxt->input->base != base) goto base_changed; 7913 7914 while ((RAW != '>') && 7915 ((RAW != '/') || (NXT(1) != '>')) && 7916 (IS_BYTE_CHAR(RAW))) { 7917 const xmlChar *q = CUR_PTR; 7918 unsigned int cons = ctxt->input->consumed; 7919 int len = -1, alloc = 0; 7920 7921 attname = xmlParseAttribute2(ctxt, prefix, localname, 7922 &aprefix, &attvalue, &len, &alloc); 7923 if (ctxt->input->base != base) { 7924 if ((attvalue != NULL) && (alloc != 0)) 7925 xmlFree(attvalue); 7926 attvalue = NULL; 7927 goto base_changed; 7928 } 7929 if ((attname != NULL) && (attvalue != NULL)) { 7930 if (len < 0) len = xmlStrlen(attvalue); 7931 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 7932 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 7933 xmlURIPtr uri; 7934 7935 if (*URL != 0) { 7936 uri = xmlParseURI((const char *) URL); 7937 if (uri == NULL) { 7938 xmlWarningMsg(ctxt, XML_WAR_NS_URI, 7939 "xmlns: %s not a valid URI\n", 7940 URL, NULL); 7941 } else { 7942 if (uri->scheme == NULL) { 7943 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE, 7944 "xmlns: URI %s is not absolute\n", 7945 URL, NULL); 7946 } 7947 xmlFreeURI(uri); 7948 } 7949 } 7950 /* 7951 * check that it's not a defined namespace 7952 */ 7953 for (j = 1;j <= nbNs;j++) 7954 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 7955 break; 7956 if (j <= nbNs) 7957 xmlErrAttributeDup(ctxt, NULL, attname); 7958 else 7959 if (nsPush(ctxt, NULL, URL) > 0) nbNs++; 7960 if (alloc != 0) xmlFree(attvalue); 7961 SKIP_BLANKS; 7962 continue; 7963 } 7964 if (aprefix == ctxt->str_xmlns) { 7965 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len); 7966 xmlURIPtr uri; 7967 7968 if (attname == ctxt->str_xml) { 7969 if (URL != ctxt->str_xml_ns) { 7970 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE, 7971 "xml namespace prefix mapped to wrong URI\n", 7972 NULL, NULL, NULL); 7973 } 7974 /* 7975 * Do not keep a namespace definition node 7976 */ 7977 if (alloc != 0) xmlFree(attvalue); 7978 SKIP_BLANKS; 7979 continue; 7980 } 7981 uri = xmlParseURI((const char *) URL); 7982 if (uri == NULL) { 7983 xmlWarningMsg(ctxt, XML_WAR_NS_URI, 7984 "xmlns:%s: '%s' is not a valid URI\n", 7985 attname, URL); 7986 } else { 7987 if ((ctxt->pedantic) && (uri->scheme == NULL)) { 7988 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE, 7989 "xmlns:%s: URI %s is not absolute\n", 7990 attname, URL); 7991 } 7992 xmlFreeURI(uri); 7993 } 7994 7995 /* 7996 * check that it's not a defined namespace 7997 */ 7998 for (j = 1;j <= nbNs;j++) 7999 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 8000 break; 8001 if (j <= nbNs) 8002 xmlErrAttributeDup(ctxt, aprefix, attname); 8003 else 8004 if (nsPush(ctxt, attname, URL) > 0) nbNs++; 8005 if (alloc != 0) xmlFree(attvalue); 8006 SKIP_BLANKS; 8007 if (ctxt->input->base != base) goto base_changed; 8008 continue; 8009 } 8010 8011 /* 8012 * Add the pair to atts 8013 */ 8014 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 8015 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 8016 if (attvalue[len] == 0) 8017 xmlFree(attvalue); 8018 goto failed; 8019 } 8020 maxatts = ctxt->maxatts; 8021 atts = ctxt->atts; 8022 } 8023 ctxt->attallocs[nratts++] = alloc; 8024 atts[nbatts++] = attname; 8025 atts[nbatts++] = aprefix; 8026 atts[nbatts++] = NULL; /* the URI will be fetched later */ 8027 atts[nbatts++] = attvalue; 8028 attvalue += len; 8029 atts[nbatts++] = attvalue; 8030 /* 8031 * tag if some deallocation is needed 8032 */ 8033 if (alloc != 0) attval = 1; 8034 } else { 8035 if ((attvalue != NULL) && (attvalue[len] == 0)) 8036 xmlFree(attvalue); 8037 } 8038 8039failed: 8040 8041 GROW 8042 if (ctxt->input->base != base) goto base_changed; 8043 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>')))) 8044 break; 8045 if (!IS_BLANK_CH(RAW)) { 8046 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 8047 "attributes construct error\n"); 8048 break; 8049 } 8050 SKIP_BLANKS; 8051 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) && 8052 (attname == NULL) && (attvalue == NULL)) { 8053 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8054 "xmlParseStartTag: problem parsing attributes\n"); 8055 break; 8056 } 8057 GROW; 8058 if (ctxt->input->base != base) goto base_changed; 8059 } 8060 8061 /* 8062 * The attributes defaulting 8063 */ 8064 if (ctxt->attsDefault != NULL) { 8065 xmlDefAttrsPtr defaults; 8066 8067 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix); 8068 if (defaults != NULL) { 8069 for (i = 0;i < defaults->nbAttrs;i++) { 8070 attname = defaults->values[4 * i]; 8071 aprefix = defaults->values[4 * i + 1]; 8072 8073 /* 8074 * special work for namespaces defaulted defs 8075 */ 8076 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) { 8077 /* 8078 * check that it's not a defined namespace 8079 */ 8080 for (j = 1;j <= nbNs;j++) 8081 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL) 8082 break; 8083 if (j <= nbNs) continue; 8084 8085 nsname = xmlGetNamespace(ctxt, NULL); 8086 if (nsname != defaults->values[4 * i + 2]) { 8087 if (nsPush(ctxt, NULL, 8088 defaults->values[4 * i + 2]) > 0) 8089 nbNs++; 8090 } 8091 } else if (aprefix == ctxt->str_xmlns) { 8092 /* 8093 * check that it's not a defined namespace 8094 */ 8095 for (j = 1;j <= nbNs;j++) 8096 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname) 8097 break; 8098 if (j <= nbNs) continue; 8099 8100 nsname = xmlGetNamespace(ctxt, attname); 8101 if (nsname != defaults->values[2]) { 8102 if (nsPush(ctxt, attname, 8103 defaults->values[4 * i + 2]) > 0) 8104 nbNs++; 8105 } 8106 } else { 8107 /* 8108 * check that it's not a defined attribute 8109 */ 8110 for (j = 0;j < nbatts;j+=5) { 8111 if ((attname == atts[j]) && (aprefix == atts[j+1])) 8112 break; 8113 } 8114 if (j < nbatts) continue; 8115 8116 if ((atts == NULL) || (nbatts + 5 > maxatts)) { 8117 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) { 8118 return(NULL); 8119 } 8120 maxatts = ctxt->maxatts; 8121 atts = ctxt->atts; 8122 } 8123 atts[nbatts++] = attname; 8124 atts[nbatts++] = aprefix; 8125 if (aprefix == NULL) 8126 atts[nbatts++] = NULL; 8127 else 8128 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix); 8129 atts[nbatts++] = defaults->values[4 * i + 2]; 8130 atts[nbatts++] = defaults->values[4 * i + 3]; 8131 nbdef++; 8132 } 8133 } 8134 } 8135 } 8136 8137 /* 8138 * The attributes checkings 8139 */ 8140 for (i = 0; i < nbatts;i += 5) { 8141 /* 8142 * The default namespace does not apply to attribute names. 8143 */ 8144 if (atts[i + 1] != NULL) { 8145 nsname = xmlGetNamespace(ctxt, atts[i + 1]); 8146 if (nsname == NULL) { 8147 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 8148 "Namespace prefix %s for %s on %s is not defined\n", 8149 atts[i + 1], atts[i], localname); 8150 } 8151 atts[i + 2] = nsname; 8152 } else 8153 nsname = NULL; 8154 /* 8155 * [ WFC: Unique Att Spec ] 8156 * No attribute name may appear more than once in the same 8157 * start-tag or empty-element tag. 8158 * As extended by the Namespace in XML REC. 8159 */ 8160 for (j = 0; j < i;j += 5) { 8161 if (atts[i] == atts[j]) { 8162 if (atts[i+1] == atts[j+1]) { 8163 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]); 8164 break; 8165 } 8166 if ((nsname != NULL) && (atts[j + 2] == nsname)) { 8167 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED, 8168 "Namespaced Attribute %s in '%s' redefined\n", 8169 atts[i], nsname, NULL); 8170 break; 8171 } 8172 } 8173 } 8174 } 8175 8176 nsname = xmlGetNamespace(ctxt, prefix); 8177 if ((prefix != NULL) && (nsname == NULL)) { 8178 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, 8179 "Namespace prefix %s on %s is not defined\n", 8180 prefix, localname, NULL); 8181 } 8182 *pref = prefix; 8183 *URI = nsname; 8184 8185 /* 8186 * SAX: Start of Element ! 8187 */ 8188 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) && 8189 (!ctxt->disableSAX)) { 8190 if (nbNs > 0) 8191 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 8192 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs], 8193 nbatts / 5, nbdef, atts); 8194 else 8195 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, 8196 nsname, 0, NULL, nbatts / 5, nbdef, atts); 8197 } 8198 8199 /* 8200 * Free up attribute allocated strings if needed 8201 */ 8202 if (attval != 0) { 8203 for (i = 3,j = 0; j < nratts;i += 5,j++) 8204 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 8205 xmlFree((xmlChar *) atts[i]); 8206 } 8207 8208 return(localname); 8209 8210base_changed: 8211 /* 8212 * the attribute strings are valid iif the base didn't changed 8213 */ 8214 if (attval != 0) { 8215 for (i = 3,j = 0; j < nratts;i += 5,j++) 8216 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL)) 8217 xmlFree((xmlChar *) atts[i]); 8218 } 8219 ctxt->input->cur = ctxt->input->base + cur; 8220 ctxt->input->line = oldline; 8221 ctxt->input->col = oldcol; 8222 if (ctxt->wellFormed == 1) { 8223 goto reparse; 8224 } 8225 return(NULL); 8226} 8227 8228/** 8229 * xmlParseEndTag2: 8230 * @ctxt: an XML parser context 8231 * @line: line of the start tag 8232 * @nsNr: number of namespaces on the start tag 8233 * 8234 * parse an end of tag 8235 * 8236 * [42] ETag ::= '</' Name S? '>' 8237 * 8238 * With namespace 8239 * 8240 * [NS 9] ETag ::= '</' QName S? '>' 8241 */ 8242 8243static void 8244xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix, 8245 const xmlChar *URI, int line, int nsNr, int tlen) { 8246 const xmlChar *name; 8247 8248 GROW; 8249 if ((RAW != '<') || (NXT(1) != '/')) { 8250 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL); 8251 return; 8252 } 8253 SKIP(2); 8254 8255 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) { 8256 if (ctxt->input->cur[tlen] == '>') { 8257 ctxt->input->cur += tlen + 1; 8258 goto done; 8259 } 8260 ctxt->input->cur += tlen; 8261 name = (xmlChar*)1; 8262 } else { 8263 if (prefix == NULL) 8264 name = xmlParseNameAndCompare(ctxt, ctxt->name); 8265 else 8266 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix); 8267 } 8268 8269 /* 8270 * We should definitely be at the ending "S? '>'" part 8271 */ 8272 GROW; 8273 SKIP_BLANKS; 8274 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) { 8275 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL); 8276 } else 8277 NEXT1; 8278 8279 /* 8280 * [ WFC: Element Type Match ] 8281 * The Name in an element's end-tag must match the element type in the 8282 * start-tag. 8283 * 8284 */ 8285 if (name != (xmlChar*)1) { 8286 if (name == NULL) name = BAD_CAST "unparseable"; 8287 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, 8288 "Opening and ending tag mismatch: %s line %d and %s\n", 8289 ctxt->name, line, name); 8290 } 8291 8292 /* 8293 * SAX: End of Tag 8294 */ 8295done: 8296 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 8297 (!ctxt->disableSAX)) 8298 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI); 8299 8300 spacePop(ctxt); 8301 if (nsNr != 0) 8302 nsPop(ctxt, nsNr); 8303 return; 8304} 8305 8306/** 8307 * xmlParseCDSect: 8308 * @ctxt: an XML parser context 8309 * 8310 * Parse escaped pure raw content. 8311 * 8312 * [18] CDSect ::= CDStart CData CDEnd 8313 * 8314 * [19] CDStart ::= '<![CDATA[' 8315 * 8316 * [20] Data ::= (Char* - (Char* ']]>' Char*)) 8317 * 8318 * [21] CDEnd ::= ']]>' 8319 */ 8320void 8321xmlParseCDSect(xmlParserCtxtPtr ctxt) { 8322 xmlChar *buf = NULL; 8323 int len = 0; 8324 int size = XML_PARSER_BUFFER_SIZE; 8325 int r, rl; 8326 int s, sl; 8327 int cur, l; 8328 int count = 0; 8329 8330 /* Check 2.6.0 was NXT(0) not RAW */ 8331 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 8332 SKIP(9); 8333 } else 8334 return; 8335 8336 ctxt->instate = XML_PARSER_CDATA_SECTION; 8337 r = CUR_CHAR(rl); 8338 if (!IS_CHAR(r)) { 8339 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 8340 ctxt->instate = XML_PARSER_CONTENT; 8341 return; 8342 } 8343 NEXTL(rl); 8344 s = CUR_CHAR(sl); 8345 if (!IS_CHAR(s)) { 8346 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL); 8347 ctxt->instate = XML_PARSER_CONTENT; 8348 return; 8349 } 8350 NEXTL(sl); 8351 cur = CUR_CHAR(l); 8352 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 8353 if (buf == NULL) { 8354 xmlErrMemory(ctxt, NULL); 8355 return; 8356 } 8357 while (IS_CHAR(cur) && 8358 ((r != ']') || (s != ']') || (cur != '>'))) { 8359 if (len + 5 >= size) { 8360 xmlChar *tmp; 8361 8362 size *= 2; 8363 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 8364 if (tmp == NULL) { 8365 xmlFree(buf); 8366 xmlErrMemory(ctxt, NULL); 8367 return; 8368 } 8369 buf = tmp; 8370 } 8371 COPY_BUF(rl,buf,len,r); 8372 r = s; 8373 rl = sl; 8374 s = cur; 8375 sl = l; 8376 count++; 8377 if (count > 50) { 8378 GROW; 8379 count = 0; 8380 } 8381 NEXTL(l); 8382 cur = CUR_CHAR(l); 8383 } 8384 buf[len] = 0; 8385 ctxt->instate = XML_PARSER_CONTENT; 8386 if (cur != '>') { 8387 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED, 8388 "CData section not finished\n%.50s\n", buf); 8389 xmlFree(buf); 8390 return; 8391 } 8392 NEXTL(l); 8393 8394 /* 8395 * OK the buffer is to be consumed as cdata. 8396 */ 8397 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 8398 if (ctxt->sax->cdataBlock != NULL) 8399 ctxt->sax->cdataBlock(ctxt->userData, buf, len); 8400 else if (ctxt->sax->characters != NULL) 8401 ctxt->sax->characters(ctxt->userData, buf, len); 8402 } 8403 xmlFree(buf); 8404} 8405 8406/** 8407 * xmlParseContent: 8408 * @ctxt: an XML parser context 8409 * 8410 * Parse a content: 8411 * 8412 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 8413 */ 8414 8415void 8416xmlParseContent(xmlParserCtxtPtr ctxt) { 8417 GROW; 8418 while ((RAW != 0) && 8419 ((RAW != '<') || (NXT(1) != '/')) && 8420 (ctxt->instate != XML_PARSER_EOF)) { 8421 const xmlChar *test = CUR_PTR; 8422 unsigned int cons = ctxt->input->consumed; 8423 const xmlChar *cur = ctxt->input->cur; 8424 8425 /* 8426 * First case : a Processing Instruction. 8427 */ 8428 if ((*cur == '<') && (cur[1] == '?')) { 8429 xmlParsePI(ctxt); 8430 } 8431 8432 /* 8433 * Second case : a CDSection 8434 */ 8435 /* 2.6.0 test was *cur not RAW */ 8436 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) { 8437 xmlParseCDSect(ctxt); 8438 } 8439 8440 /* 8441 * Third case : a comment 8442 */ 8443 else if ((*cur == '<') && (NXT(1) == '!') && 8444 (NXT(2) == '-') && (NXT(3) == '-')) { 8445 xmlParseComment(ctxt); 8446 ctxt->instate = XML_PARSER_CONTENT; 8447 } 8448 8449 /* 8450 * Fourth case : a sub-element. 8451 */ 8452 else if (*cur == '<') { 8453 xmlParseElement(ctxt); 8454 } 8455 8456 /* 8457 * Fifth case : a reference. If if has not been resolved, 8458 * parsing returns it's Name, create the node 8459 */ 8460 8461 else if (*cur == '&') { 8462 xmlParseReference(ctxt); 8463 } 8464 8465 /* 8466 * Last case, text. Note that References are handled directly. 8467 */ 8468 else { 8469 xmlParseCharData(ctxt, 0); 8470 } 8471 8472 GROW; 8473 /* 8474 * Pop-up of finished entities. 8475 */ 8476 while ((RAW == 0) && (ctxt->inputNr > 1)) 8477 xmlPopInput(ctxt); 8478 SHRINK; 8479 8480 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 8481 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 8482 "detected an error in element content\n"); 8483 ctxt->instate = XML_PARSER_EOF; 8484 break; 8485 } 8486 } 8487} 8488 8489/** 8490 * xmlParseElement: 8491 * @ctxt: an XML parser context 8492 * 8493 * parse an XML element, this is highly recursive 8494 * 8495 * [39] element ::= EmptyElemTag | STag content ETag 8496 * 8497 * [ WFC: Element Type Match ] 8498 * The Name in an element's end-tag must match the element type in the 8499 * start-tag. 8500 * 8501 */ 8502 8503void 8504xmlParseElement(xmlParserCtxtPtr ctxt) { 8505 const xmlChar *name; 8506 const xmlChar *prefix; 8507 const xmlChar *URI; 8508 xmlParserNodeInfo node_info; 8509 int line, tlen; 8510 xmlNodePtr ret; 8511 int nsNr = ctxt->nsNr; 8512 8513 if ((unsigned int) ctxt->nameNr > xmlParserMaxDepth) { 8514 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, 8515 "Excessive depth in document: change xmlParserMaxDepth = %d\n", 8516 xmlParserMaxDepth); 8517 ctxt->instate = XML_PARSER_EOF; 8518 return; 8519 } 8520 8521 /* Capture start position */ 8522 if (ctxt->record_info) { 8523 node_info.begin_pos = ctxt->input->consumed + 8524 (CUR_PTR - ctxt->input->base); 8525 node_info.begin_line = ctxt->input->line; 8526 } 8527 8528 if (ctxt->spaceNr == 0) 8529 spacePush(ctxt, -1); 8530 else if (*ctxt->space == -2) 8531 spacePush(ctxt, -1); 8532 else 8533 spacePush(ctxt, *ctxt->space); 8534 8535 line = ctxt->input->line; 8536#ifdef LIBXML_SAX1_ENABLED 8537 if (ctxt->sax2) 8538#endif /* LIBXML_SAX1_ENABLED */ 8539 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 8540#ifdef LIBXML_SAX1_ENABLED 8541 else 8542 name = xmlParseStartTag(ctxt); 8543#endif /* LIBXML_SAX1_ENABLED */ 8544 if (name == NULL) { 8545 spacePop(ctxt); 8546 return; 8547 } 8548 namePush(ctxt, name); 8549 ret = ctxt->node; 8550 8551#ifdef LIBXML_VALID_ENABLED 8552 /* 8553 * [ VC: Root Element Type ] 8554 * The Name in the document type declaration must match the element 8555 * type of the root element. 8556 */ 8557 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 8558 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 8559 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 8560#endif /* LIBXML_VALID_ENABLED */ 8561 8562 /* 8563 * Check for an Empty Element. 8564 */ 8565 if ((RAW == '/') && (NXT(1) == '>')) { 8566 SKIP(2); 8567 if (ctxt->sax2) { 8568 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && 8569 (!ctxt->disableSAX)) 8570 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI); 8571#ifdef LIBXML_SAX1_ENABLED 8572 } else { 8573 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) && 8574 (!ctxt->disableSAX)) 8575 ctxt->sax->endElement(ctxt->userData, name); 8576#endif /* LIBXML_SAX1_ENABLED */ 8577 } 8578 namePop(ctxt); 8579 spacePop(ctxt); 8580 if (nsNr != ctxt->nsNr) 8581 nsPop(ctxt, ctxt->nsNr - nsNr); 8582 if ( ret != NULL && ctxt->record_info ) { 8583 node_info.end_pos = ctxt->input->consumed + 8584 (CUR_PTR - ctxt->input->base); 8585 node_info.end_line = ctxt->input->line; 8586 node_info.node = ret; 8587 xmlParserAddNodeInfo(ctxt, &node_info); 8588 } 8589 return; 8590 } 8591 if (RAW == '>') { 8592 NEXT1; 8593 } else { 8594 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED, 8595 "Couldn't find end of Start Tag %s line %d\n", 8596 name, line, NULL); 8597 8598 /* 8599 * end of parsing of this node. 8600 */ 8601 nodePop(ctxt); 8602 namePop(ctxt); 8603 spacePop(ctxt); 8604 if (nsNr != ctxt->nsNr) 8605 nsPop(ctxt, ctxt->nsNr - nsNr); 8606 8607 /* 8608 * Capture end position and add node 8609 */ 8610 if ( ret != NULL && ctxt->record_info ) { 8611 node_info.end_pos = ctxt->input->consumed + 8612 (CUR_PTR - ctxt->input->base); 8613 node_info.end_line = ctxt->input->line; 8614 node_info.node = ret; 8615 xmlParserAddNodeInfo(ctxt, &node_info); 8616 } 8617 return; 8618 } 8619 8620 /* 8621 * Parse the content of the element: 8622 */ 8623 xmlParseContent(ctxt); 8624 if (!IS_BYTE_CHAR(RAW)) { 8625 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED, 8626 "Premature end of data in tag %s line %d\n", 8627 name, line, NULL); 8628 8629 /* 8630 * end of parsing of this node. 8631 */ 8632 nodePop(ctxt); 8633 namePop(ctxt); 8634 spacePop(ctxt); 8635 if (nsNr != ctxt->nsNr) 8636 nsPop(ctxt, ctxt->nsNr - nsNr); 8637 return; 8638 } 8639 8640 /* 8641 * parse the end of tag: '</' should be here. 8642 */ 8643 if (ctxt->sax2) { 8644 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen); 8645 namePop(ctxt); 8646 } 8647#ifdef LIBXML_SAX1_ENABLED 8648 else 8649 xmlParseEndTag1(ctxt, line); 8650#endif /* LIBXML_SAX1_ENABLED */ 8651 8652 /* 8653 * Capture end position and add node 8654 */ 8655 if ( ret != NULL && ctxt->record_info ) { 8656 node_info.end_pos = ctxt->input->consumed + 8657 (CUR_PTR - ctxt->input->base); 8658 node_info.end_line = ctxt->input->line; 8659 node_info.node = ret; 8660 xmlParserAddNodeInfo(ctxt, &node_info); 8661 } 8662} 8663 8664/** 8665 * xmlParseVersionNum: 8666 * @ctxt: an XML parser context 8667 * 8668 * parse the XML version value. 8669 * 8670 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+ 8671 * 8672 * Returns the string giving the XML version number, or NULL 8673 */ 8674xmlChar * 8675xmlParseVersionNum(xmlParserCtxtPtr ctxt) { 8676 xmlChar *buf = NULL; 8677 int len = 0; 8678 int size = 10; 8679 xmlChar cur; 8680 8681 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 8682 if (buf == NULL) { 8683 xmlErrMemory(ctxt, NULL); 8684 return(NULL); 8685 } 8686 cur = CUR; 8687 while (((cur >= 'a') && (cur <= 'z')) || 8688 ((cur >= 'A') && (cur <= 'Z')) || 8689 ((cur >= '0') && (cur <= '9')) || 8690 (cur == '_') || (cur == '.') || 8691 (cur == ':') || (cur == '-')) { 8692 if (len + 1 >= size) { 8693 xmlChar *tmp; 8694 8695 size *= 2; 8696 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 8697 if (tmp == NULL) { 8698 xmlErrMemory(ctxt, NULL); 8699 return(NULL); 8700 } 8701 buf = tmp; 8702 } 8703 buf[len++] = cur; 8704 NEXT; 8705 cur=CUR; 8706 } 8707 buf[len] = 0; 8708 return(buf); 8709} 8710 8711/** 8712 * xmlParseVersionInfo: 8713 * @ctxt: an XML parser context 8714 * 8715 * parse the XML version. 8716 * 8717 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 8718 * 8719 * [25] Eq ::= S? '=' S? 8720 * 8721 * Returns the version string, e.g. "1.0" 8722 */ 8723 8724xmlChar * 8725xmlParseVersionInfo(xmlParserCtxtPtr ctxt) { 8726 xmlChar *version = NULL; 8727 8728 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) { 8729 SKIP(7); 8730 SKIP_BLANKS; 8731 if (RAW != '=') { 8732 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 8733 return(NULL); 8734 } 8735 NEXT; 8736 SKIP_BLANKS; 8737 if (RAW == '"') { 8738 NEXT; 8739 version = xmlParseVersionNum(ctxt); 8740 if (RAW != '"') { 8741 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 8742 } else 8743 NEXT; 8744 } else if (RAW == '\''){ 8745 NEXT; 8746 version = xmlParseVersionNum(ctxt); 8747 if (RAW != '\'') { 8748 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 8749 } else 8750 NEXT; 8751 } else { 8752 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 8753 } 8754 } 8755 return(version); 8756} 8757 8758/** 8759 * xmlParseEncName: 8760 * @ctxt: an XML parser context 8761 * 8762 * parse the XML encoding name 8763 * 8764 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 8765 * 8766 * Returns the encoding name value or NULL 8767 */ 8768xmlChar * 8769xmlParseEncName(xmlParserCtxtPtr ctxt) { 8770 xmlChar *buf = NULL; 8771 int len = 0; 8772 int size = 10; 8773 xmlChar cur; 8774 8775 cur = CUR; 8776 if (((cur >= 'a') && (cur <= 'z')) || 8777 ((cur >= 'A') && (cur <= 'Z'))) { 8778 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 8779 if (buf == NULL) { 8780 xmlErrMemory(ctxt, NULL); 8781 return(NULL); 8782 } 8783 8784 buf[len++] = cur; 8785 NEXT; 8786 cur = CUR; 8787 while (((cur >= 'a') && (cur <= 'z')) || 8788 ((cur >= 'A') && (cur <= 'Z')) || 8789 ((cur >= '0') && (cur <= '9')) || 8790 (cur == '.') || (cur == '_') || 8791 (cur == '-')) { 8792 if (len + 1 >= size) { 8793 xmlChar *tmp; 8794 8795 size *= 2; 8796 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 8797 if (tmp == NULL) { 8798 xmlErrMemory(ctxt, NULL); 8799 xmlFree(buf); 8800 return(NULL); 8801 } 8802 buf = tmp; 8803 } 8804 buf[len++] = cur; 8805 NEXT; 8806 cur = CUR; 8807 if (cur == 0) { 8808 SHRINK; 8809 GROW; 8810 cur = CUR; 8811 } 8812 } 8813 buf[len] = 0; 8814 } else { 8815 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL); 8816 } 8817 return(buf); 8818} 8819 8820/** 8821 * xmlParseEncodingDecl: 8822 * @ctxt: an XML parser context 8823 * 8824 * parse the XML encoding declaration 8825 * 8826 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'") 8827 * 8828 * this setups the conversion filters. 8829 * 8830 * Returns the encoding value or NULL 8831 */ 8832 8833const xmlChar * 8834xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) { 8835 xmlChar *encoding = NULL; 8836 8837 SKIP_BLANKS; 8838 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) { 8839 SKIP(8); 8840 SKIP_BLANKS; 8841 if (RAW != '=') { 8842 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 8843 return(NULL); 8844 } 8845 NEXT; 8846 SKIP_BLANKS; 8847 if (RAW == '"') { 8848 NEXT; 8849 encoding = xmlParseEncName(ctxt); 8850 if (RAW != '"') { 8851 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 8852 } else 8853 NEXT; 8854 } else if (RAW == '\''){ 8855 NEXT; 8856 encoding = xmlParseEncName(ctxt); 8857 if (RAW != '\'') { 8858 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 8859 } else 8860 NEXT; 8861 } else { 8862 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 8863 } 8864 /* 8865 * UTF-16 encoding stwich has already taken place at this stage, 8866 * more over the little-endian/big-endian selection is already done 8867 */ 8868 if ((encoding != NULL) && 8869 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) || 8870 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) { 8871 if (ctxt->encoding != NULL) 8872 xmlFree((xmlChar *) ctxt->encoding); 8873 ctxt->encoding = encoding; 8874 } 8875 /* 8876 * UTF-8 encoding is handled natively 8877 */ 8878 else if ((encoding != NULL) && 8879 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) || 8880 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) { 8881 if (ctxt->encoding != NULL) 8882 xmlFree((xmlChar *) ctxt->encoding); 8883 ctxt->encoding = encoding; 8884 } 8885 else if (encoding != NULL) { 8886 xmlCharEncodingHandlerPtr handler; 8887 8888 if (ctxt->input->encoding != NULL) 8889 xmlFree((xmlChar *) ctxt->input->encoding); 8890 ctxt->input->encoding = encoding; 8891 8892 handler = xmlFindCharEncodingHandler((const char *) encoding); 8893 if (handler != NULL) { 8894 xmlSwitchToEncoding(ctxt, handler); 8895 } else { 8896 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 8897 "Unsupported encoding %s\n", encoding); 8898 return(NULL); 8899 } 8900 } 8901 } 8902 return(encoding); 8903} 8904 8905/** 8906 * xmlParseSDDecl: 8907 * @ctxt: an XML parser context 8908 * 8909 * parse the XML standalone declaration 8910 * 8911 * [32] SDDecl ::= S 'standalone' Eq 8912 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"')) 8913 * 8914 * [ VC: Standalone Document Declaration ] 8915 * TODO The standalone document declaration must have the value "no" 8916 * if any external markup declarations contain declarations of: 8917 * - attributes with default values, if elements to which these 8918 * attributes apply appear in the document without specifications 8919 * of values for these attributes, or 8920 * - entities (other than amp, lt, gt, apos, quot), if references 8921 * to those entities appear in the document, or 8922 * - attributes with values subject to normalization, where the 8923 * attribute appears in the document with a value which will change 8924 * as a result of normalization, or 8925 * - element types with element content, if white space occurs directly 8926 * within any instance of those types. 8927 * 8928 * Returns 1 if standalone, 0 otherwise 8929 */ 8930 8931int 8932xmlParseSDDecl(xmlParserCtxtPtr ctxt) { 8933 int standalone = -1; 8934 8935 SKIP_BLANKS; 8936 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) { 8937 SKIP(10); 8938 SKIP_BLANKS; 8939 if (RAW != '=') { 8940 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL); 8941 return(standalone); 8942 } 8943 NEXT; 8944 SKIP_BLANKS; 8945 if (RAW == '\''){ 8946 NEXT; 8947 if ((RAW == 'n') && (NXT(1) == 'o')) { 8948 standalone = 0; 8949 SKIP(2); 8950 } else if ((RAW == 'y') && (NXT(1) == 'e') && 8951 (NXT(2) == 's')) { 8952 standalone = 1; 8953 SKIP(3); 8954 } else { 8955 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 8956 } 8957 if (RAW != '\'') { 8958 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 8959 } else 8960 NEXT; 8961 } else if (RAW == '"'){ 8962 NEXT; 8963 if ((RAW == 'n') && (NXT(1) == 'o')) { 8964 standalone = 0; 8965 SKIP(2); 8966 } else if ((RAW == 'y') && (NXT(1) == 'e') && 8967 (NXT(2) == 's')) { 8968 standalone = 1; 8969 SKIP(3); 8970 } else { 8971 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL); 8972 } 8973 if (RAW != '"') { 8974 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL); 8975 } else 8976 NEXT; 8977 } else { 8978 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL); 8979 } 8980 } 8981 return(standalone); 8982} 8983 8984/** 8985 * xmlParseXMLDecl: 8986 * @ctxt: an XML parser context 8987 * 8988 * parse an XML declaration header 8989 * 8990 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 8991 */ 8992 8993void 8994xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { 8995 xmlChar *version; 8996 8997 /* 8998 * This value for standalone indicates that the document has an 8999 * XML declaration but it does not have a standalone attribute. 9000 * It will be overwritten later if a standalone attribute is found. 9001 */ 9002 ctxt->input->standalone = -2; 9003 9004 /* 9005 * We know that '<?xml' is here. 9006 */ 9007 SKIP(5); 9008 9009 if (!IS_BLANK_CH(RAW)) { 9010 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, 9011 "Blank needed after '<?xml'\n"); 9012 } 9013 SKIP_BLANKS; 9014 9015 /* 9016 * We must have the VersionInfo here. 9017 */ 9018 version = xmlParseVersionInfo(ctxt); 9019 if (version == NULL) { 9020 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL); 9021 } else { 9022 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) { 9023 /* 9024 * TODO: Blueberry should be detected here 9025 */ 9026 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION, 9027 "Unsupported version '%s'\n", 9028 version, NULL); 9029 } 9030 if (ctxt->version != NULL) 9031 xmlFree((void *) ctxt->version); 9032 ctxt->version = version; 9033 } 9034 9035 /* 9036 * We may have the encoding declaration 9037 */ 9038 if (!IS_BLANK_CH(RAW)) { 9039 if ((RAW == '?') && (NXT(1) == '>')) { 9040 SKIP(2); 9041 return; 9042 } 9043 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 9044 } 9045 xmlParseEncodingDecl(ctxt); 9046 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 9047 /* 9048 * The XML REC instructs us to stop parsing right here 9049 */ 9050 return; 9051 } 9052 9053 /* 9054 * We may have the standalone status. 9055 */ 9056 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) { 9057 if ((RAW == '?') && (NXT(1) == '>')) { 9058 SKIP(2); 9059 return; 9060 } 9061 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); 9062 } 9063 SKIP_BLANKS; 9064 ctxt->input->standalone = xmlParseSDDecl(ctxt); 9065 9066 SKIP_BLANKS; 9067 if ((RAW == '?') && (NXT(1) == '>')) { 9068 SKIP(2); 9069 } else if (RAW == '>') { 9070 /* Deprecated old WD ... */ 9071 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 9072 NEXT; 9073 } else { 9074 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL); 9075 MOVETO_ENDTAG(CUR_PTR); 9076 NEXT; 9077 } 9078} 9079 9080/** 9081 * xmlParseMisc: 9082 * @ctxt: an XML parser context 9083 * 9084 * parse an XML Misc* optional field. 9085 * 9086 * [27] Misc ::= Comment | PI | S 9087 */ 9088 9089void 9090xmlParseMisc(xmlParserCtxtPtr ctxt) { 9091 while (((RAW == '<') && (NXT(1) == '?')) || 9092 (CMP4(CUR_PTR, '<', '!', '-', '-')) || 9093 IS_BLANK_CH(CUR)) { 9094 if ((RAW == '<') && (NXT(1) == '?')) { 9095 xmlParsePI(ctxt); 9096 } else if (IS_BLANK_CH(CUR)) { 9097 NEXT; 9098 } else 9099 xmlParseComment(ctxt); 9100 } 9101} 9102 9103/** 9104 * xmlParseDocument: 9105 * @ctxt: an XML parser context 9106 * 9107 * parse an XML document (and build a tree if using the standard SAX 9108 * interface). 9109 * 9110 * [1] document ::= prolog element Misc* 9111 * 9112 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 9113 * 9114 * Returns 0, -1 in case of error. the parser context is augmented 9115 * as a result of the parsing. 9116 */ 9117 9118int 9119xmlParseDocument(xmlParserCtxtPtr ctxt) { 9120 xmlChar start[4]; 9121 xmlCharEncoding enc; 9122 9123 xmlInitParser(); 9124 9125 if ((ctxt == NULL) || (ctxt->input == NULL)) 9126 return(-1); 9127 9128 GROW; 9129 9130 /* 9131 * SAX: detecting the level. 9132 */ 9133 xmlDetectSAX2(ctxt); 9134 9135 /* 9136 * SAX: beginning of the document processing. 9137 */ 9138 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 9139 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 9140 9141 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) && 9142 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 9143 /* 9144 * Get the 4 first bytes and decode the charset 9145 * if enc != XML_CHAR_ENCODING_NONE 9146 * plug some encoding conversion routines. 9147 */ 9148 start[0] = RAW; 9149 start[1] = NXT(1); 9150 start[2] = NXT(2); 9151 start[3] = NXT(3); 9152 enc = xmlDetectCharEncoding(&start[0], 4); 9153 if (enc != XML_CHAR_ENCODING_NONE) { 9154 xmlSwitchEncoding(ctxt, enc); 9155 } 9156 } 9157 9158 9159 if (CUR == 0) { 9160 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 9161 } 9162 9163 /* 9164 * Check for the XMLDecl in the Prolog. 9165 */ 9166 GROW; 9167 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 9168 9169 /* 9170 * Note that we will switch encoding on the fly. 9171 */ 9172 xmlParseXMLDecl(ctxt); 9173 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 9174 /* 9175 * The XML REC instructs us to stop parsing right here 9176 */ 9177 return(-1); 9178 } 9179 ctxt->standalone = ctxt->input->standalone; 9180 SKIP_BLANKS; 9181 } else { 9182 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 9183 } 9184 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 9185 ctxt->sax->startDocument(ctxt->userData); 9186 9187 /* 9188 * The Misc part of the Prolog 9189 */ 9190 GROW; 9191 xmlParseMisc(ctxt); 9192 9193 /* 9194 * Then possibly doc type declaration(s) and more Misc 9195 * (doctypedecl Misc*)? 9196 */ 9197 GROW; 9198 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) { 9199 9200 ctxt->inSubset = 1; 9201 xmlParseDocTypeDecl(ctxt); 9202 if (RAW == '[') { 9203 ctxt->instate = XML_PARSER_DTD; 9204 xmlParseInternalSubset(ctxt); 9205 } 9206 9207 /* 9208 * Create and update the external subset. 9209 */ 9210 ctxt->inSubset = 2; 9211 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) && 9212 (!ctxt->disableSAX)) 9213 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 9214 ctxt->extSubSystem, ctxt->extSubURI); 9215 ctxt->inSubset = 0; 9216 9217 9218 ctxt->instate = XML_PARSER_PROLOG; 9219 xmlParseMisc(ctxt); 9220 } 9221 9222 /* 9223 * Time to start parsing the tree itself 9224 */ 9225 GROW; 9226 if (RAW != '<') { 9227 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY, 9228 "Start tag expected, '<' not found\n"); 9229 } else { 9230 ctxt->instate = XML_PARSER_CONTENT; 9231 xmlParseElement(ctxt); 9232 ctxt->instate = XML_PARSER_EPILOG; 9233 9234 9235 /* 9236 * The Misc part at the end 9237 */ 9238 xmlParseMisc(ctxt); 9239 9240 if (RAW != 0) { 9241 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 9242 } 9243 ctxt->instate = XML_PARSER_EOF; 9244 } 9245 9246 /* 9247 * SAX: end of the document processing. 9248 */ 9249 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 9250 ctxt->sax->endDocument(ctxt->userData); 9251 9252 /* 9253 * Remove locally kept entity definitions if the tree was not built 9254 */ 9255 if ((ctxt->myDoc != NULL) && 9256 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) { 9257 xmlFreeDoc(ctxt->myDoc); 9258 ctxt->myDoc = NULL; 9259 } 9260 9261 if (! ctxt->wellFormed) { 9262 ctxt->valid = 0; 9263 return(-1); 9264 } 9265 return(0); 9266} 9267 9268/** 9269 * xmlParseExtParsedEnt: 9270 * @ctxt: an XML parser context 9271 * 9272 * parse a general parsed entity 9273 * An external general parsed entity is well-formed if it matches the 9274 * production labeled extParsedEnt. 9275 * 9276 * [78] extParsedEnt ::= TextDecl? content 9277 * 9278 * Returns 0, -1 in case of error. the parser context is augmented 9279 * as a result of the parsing. 9280 */ 9281 9282int 9283xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { 9284 xmlChar start[4]; 9285 xmlCharEncoding enc; 9286 9287 if ((ctxt == NULL) || (ctxt->input == NULL)) 9288 return(-1); 9289 9290 xmlDefaultSAXHandlerInit(); 9291 9292 xmlDetectSAX2(ctxt); 9293 9294 GROW; 9295 9296 /* 9297 * SAX: beginning of the document processing. 9298 */ 9299 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 9300 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); 9301 9302 /* 9303 * Get the 4 first bytes and decode the charset 9304 * if enc != XML_CHAR_ENCODING_NONE 9305 * plug some encoding conversion routines. 9306 */ 9307 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 9308 start[0] = RAW; 9309 start[1] = NXT(1); 9310 start[2] = NXT(2); 9311 start[3] = NXT(3); 9312 enc = xmlDetectCharEncoding(start, 4); 9313 if (enc != XML_CHAR_ENCODING_NONE) { 9314 xmlSwitchEncoding(ctxt, enc); 9315 } 9316 } 9317 9318 9319 if (CUR == 0) { 9320 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 9321 } 9322 9323 /* 9324 * Check for the XMLDecl in the Prolog. 9325 */ 9326 GROW; 9327 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 9328 9329 /* 9330 * Note that we will switch encoding on the fly. 9331 */ 9332 xmlParseXMLDecl(ctxt); 9333 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 9334 /* 9335 * The XML REC instructs us to stop parsing right here 9336 */ 9337 return(-1); 9338 } 9339 SKIP_BLANKS; 9340 } else { 9341 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 9342 } 9343 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) 9344 ctxt->sax->startDocument(ctxt->userData); 9345 9346 /* 9347 * Doing validity checking on chunk doesn't make sense 9348 */ 9349 ctxt->instate = XML_PARSER_CONTENT; 9350 ctxt->validate = 0; 9351 ctxt->loadsubset = 0; 9352 ctxt->depth = 0; 9353 9354 xmlParseContent(ctxt); 9355 9356 if ((RAW == '<') && (NXT(1) == '/')) { 9357 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 9358 } else if (RAW != 0) { 9359 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 9360 } 9361 9362 /* 9363 * SAX: end of the document processing. 9364 */ 9365 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 9366 ctxt->sax->endDocument(ctxt->userData); 9367 9368 if (! ctxt->wellFormed) return(-1); 9369 return(0); 9370} 9371 9372#ifdef LIBXML_PUSH_ENABLED 9373/************************************************************************ 9374 * * 9375 * Progressive parsing interfaces * 9376 * * 9377 ************************************************************************/ 9378 9379/** 9380 * xmlParseLookupSequence: 9381 * @ctxt: an XML parser context 9382 * @first: the first char to lookup 9383 * @next: the next char to lookup or zero 9384 * @third: the next char to lookup or zero 9385 * 9386 * Try to find if a sequence (first, next, third) or just (first next) or 9387 * (first) is available in the input stream. 9388 * This function has a side effect of (possibly) incrementing ctxt->checkIndex 9389 * to avoid rescanning sequences of bytes, it DOES change the state of the 9390 * parser, do not use liberally. 9391 * 9392 * Returns the index to the current parsing point if the full sequence 9393 * is available, -1 otherwise. 9394 */ 9395static int 9396xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, 9397 xmlChar next, xmlChar third) { 9398 int base, len; 9399 xmlParserInputPtr in; 9400 const xmlChar *buf; 9401 9402 in = ctxt->input; 9403 if (in == NULL) return(-1); 9404 base = in->cur - in->base; 9405 if (base < 0) return(-1); 9406 if (ctxt->checkIndex > base) 9407 base = ctxt->checkIndex; 9408 if (in->buf == NULL) { 9409 buf = in->base; 9410 len = in->length; 9411 } else { 9412 buf = in->buf->buffer->content; 9413 len = in->buf->buffer->use; 9414 } 9415 /* take into account the sequence length */ 9416 if (third) len -= 2; 9417 else if (next) len --; 9418 for (;base < len;base++) { 9419 if (buf[base] == first) { 9420 if (third != 0) { 9421 if ((buf[base + 1] != next) || 9422 (buf[base + 2] != third)) continue; 9423 } else if (next != 0) { 9424 if (buf[base + 1] != next) continue; 9425 } 9426 ctxt->checkIndex = 0; 9427#ifdef DEBUG_PUSH 9428 if (next == 0) 9429 xmlGenericError(xmlGenericErrorContext, 9430 "PP: lookup '%c' found at %d\n", 9431 first, base); 9432 else if (third == 0) 9433 xmlGenericError(xmlGenericErrorContext, 9434 "PP: lookup '%c%c' found at %d\n", 9435 first, next, base); 9436 else 9437 xmlGenericError(xmlGenericErrorContext, 9438 "PP: lookup '%c%c%c' found at %d\n", 9439 first, next, third, base); 9440#endif 9441 return(base - (in->cur - in->base)); 9442 } 9443 } 9444 ctxt->checkIndex = base; 9445#ifdef DEBUG_PUSH 9446 if (next == 0) 9447 xmlGenericError(xmlGenericErrorContext, 9448 "PP: lookup '%c' failed\n", first); 9449 else if (third == 0) 9450 xmlGenericError(xmlGenericErrorContext, 9451 "PP: lookup '%c%c' failed\n", first, next); 9452 else 9453 xmlGenericError(xmlGenericErrorContext, 9454 "PP: lookup '%c%c%c' failed\n", first, next, third); 9455#endif 9456 return(-1); 9457} 9458 9459/** 9460 * xmlParseGetLasts: 9461 * @ctxt: an XML parser context 9462 * @lastlt: pointer to store the last '<' from the input 9463 * @lastgt: pointer to store the last '>' from the input 9464 * 9465 * Lookup the last < and > in the current chunk 9466 */ 9467static void 9468xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt, 9469 const xmlChar **lastgt) { 9470 const xmlChar *tmp; 9471 9472 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) { 9473 xmlGenericError(xmlGenericErrorContext, 9474 "Internal error: xmlParseGetLasts\n"); 9475 return; 9476 } 9477 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) { 9478 tmp = ctxt->input->end; 9479 tmp--; 9480 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--; 9481 if (tmp < ctxt->input->base) { 9482 *lastlt = NULL; 9483 *lastgt = NULL; 9484 } else { 9485 *lastlt = tmp; 9486 tmp++; 9487 while ((tmp < ctxt->input->end) && (*tmp != '>')) { 9488 if (*tmp == '\'') { 9489 tmp++; 9490 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++; 9491 if (tmp < ctxt->input->end) tmp++; 9492 } else if (*tmp == '"') { 9493 tmp++; 9494 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++; 9495 if (tmp < ctxt->input->end) tmp++; 9496 } else 9497 tmp++; 9498 } 9499 if (tmp < ctxt->input->end) 9500 *lastgt = tmp; 9501 else { 9502 tmp = *lastlt; 9503 tmp--; 9504 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--; 9505 if (tmp >= ctxt->input->base) 9506 *lastgt = tmp; 9507 else 9508 *lastgt = NULL; 9509 } 9510 } 9511 } else { 9512 *lastlt = NULL; 9513 *lastgt = NULL; 9514 } 9515} 9516/** 9517 * xmlCheckCdataPush: 9518 * @cur: pointer to the bock of characters 9519 * @len: length of the block in bytes 9520 * 9521 * Check that the block of characters is okay as SCdata content [20] 9522 * 9523 * Returns the number of bytes to pass if okay, a negative index where an 9524 * UTF-8 error occured otherwise 9525 */ 9526static int 9527xmlCheckCdataPush(const xmlChar *utf, int len) { 9528 int ix; 9529 unsigned char c; 9530 int codepoint; 9531 9532 if ((utf == NULL) || (len <= 0)) 9533 return(0); 9534 9535 for (ix = 0; ix < len;) { /* string is 0-terminated */ 9536 c = utf[ix]; 9537 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */ 9538 if (c >= 0x20) 9539 ix++; 9540 else if ((c == 0xA) || (c == 0xD) || (c == 0x9)) 9541 ix++; 9542 else 9543 return(-ix); 9544 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */ 9545 if (ix + 2 > len) return(ix); 9546 if ((utf[ix+1] & 0xc0 ) != 0x80) 9547 return(-ix); 9548 codepoint = (utf[ix] & 0x1f) << 6; 9549 codepoint |= utf[ix+1] & 0x3f; 9550 if (!xmlIsCharQ(codepoint)) 9551 return(-ix); 9552 ix += 2; 9553 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */ 9554 if (ix + 3 > len) return(ix); 9555 if (((utf[ix+1] & 0xc0) != 0x80) || 9556 ((utf[ix+2] & 0xc0) != 0x80)) 9557 return(-ix); 9558 codepoint = (utf[ix] & 0xf) << 12; 9559 codepoint |= (utf[ix+1] & 0x3f) << 6; 9560 codepoint |= utf[ix+2] & 0x3f; 9561 if (!xmlIsCharQ(codepoint)) 9562 return(-ix); 9563 ix += 3; 9564 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */ 9565 if (ix + 4 > len) return(ix); 9566 if (((utf[ix+1] & 0xc0) != 0x80) || 9567 ((utf[ix+2] & 0xc0) != 0x80) || 9568 ((utf[ix+3] & 0xc0) != 0x80)) 9569 return(-ix); 9570 codepoint = (utf[ix] & 0x7) << 18; 9571 codepoint |= (utf[ix+1] & 0x3f) << 12; 9572 codepoint |= (utf[ix+2] & 0x3f) << 6; 9573 codepoint |= utf[ix+3] & 0x3f; 9574 if (!xmlIsCharQ(codepoint)) 9575 return(-ix); 9576 ix += 4; 9577 } else /* unknown encoding */ 9578 return(-ix); 9579 } 9580 return(ix); 9581} 9582 9583/** 9584 * xmlParseTryOrFinish: 9585 * @ctxt: an XML parser context 9586 * @terminate: last chunk indicator 9587 * 9588 * Try to progress on parsing 9589 * 9590 * Returns zero if no parsing was possible 9591 */ 9592static int 9593xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { 9594 int ret = 0; 9595 int avail, tlen; 9596 xmlChar cur, next; 9597 const xmlChar *lastlt, *lastgt; 9598 9599 if (ctxt->input == NULL) 9600 return(0); 9601 9602#ifdef DEBUG_PUSH 9603 switch (ctxt->instate) { 9604 case XML_PARSER_EOF: 9605 xmlGenericError(xmlGenericErrorContext, 9606 "PP: try EOF\n"); break; 9607 case XML_PARSER_START: 9608 xmlGenericError(xmlGenericErrorContext, 9609 "PP: try START\n"); break; 9610 case XML_PARSER_MISC: 9611 xmlGenericError(xmlGenericErrorContext, 9612 "PP: try MISC\n");break; 9613 case XML_PARSER_COMMENT: 9614 xmlGenericError(xmlGenericErrorContext, 9615 "PP: try COMMENT\n");break; 9616 case XML_PARSER_PROLOG: 9617 xmlGenericError(xmlGenericErrorContext, 9618 "PP: try PROLOG\n");break; 9619 case XML_PARSER_START_TAG: 9620 xmlGenericError(xmlGenericErrorContext, 9621 "PP: try START_TAG\n");break; 9622 case XML_PARSER_CONTENT: 9623 xmlGenericError(xmlGenericErrorContext, 9624 "PP: try CONTENT\n");break; 9625 case XML_PARSER_CDATA_SECTION: 9626 xmlGenericError(xmlGenericErrorContext, 9627 "PP: try CDATA_SECTION\n");break; 9628 case XML_PARSER_END_TAG: 9629 xmlGenericError(xmlGenericErrorContext, 9630 "PP: try END_TAG\n");break; 9631 case XML_PARSER_ENTITY_DECL: 9632 xmlGenericError(xmlGenericErrorContext, 9633 "PP: try ENTITY_DECL\n");break; 9634 case XML_PARSER_ENTITY_VALUE: 9635 xmlGenericError(xmlGenericErrorContext, 9636 "PP: try ENTITY_VALUE\n");break; 9637 case XML_PARSER_ATTRIBUTE_VALUE: 9638 xmlGenericError(xmlGenericErrorContext, 9639 "PP: try ATTRIBUTE_VALUE\n");break; 9640 case XML_PARSER_DTD: 9641 xmlGenericError(xmlGenericErrorContext, 9642 "PP: try DTD\n");break; 9643 case XML_PARSER_EPILOG: 9644 xmlGenericError(xmlGenericErrorContext, 9645 "PP: try EPILOG\n");break; 9646 case XML_PARSER_PI: 9647 xmlGenericError(xmlGenericErrorContext, 9648 "PP: try PI\n");break; 9649 case XML_PARSER_IGNORE: 9650 xmlGenericError(xmlGenericErrorContext, 9651 "PP: try IGNORE\n");break; 9652 } 9653#endif 9654 9655 if ((ctxt->input != NULL) && 9656 (ctxt->input->cur - ctxt->input->base > 4096)) { 9657 xmlSHRINK(ctxt); 9658 ctxt->checkIndex = 0; 9659 } 9660 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 9661 9662 while (1) { 9663 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 9664 return(0); 9665 9666 9667 /* 9668 * Pop-up of finished entities. 9669 */ 9670 while ((RAW == 0) && (ctxt->inputNr > 1)) 9671 xmlPopInput(ctxt); 9672 9673 if (ctxt->input == NULL) break; 9674 if (ctxt->input->buf == NULL) 9675 avail = ctxt->input->length - 9676 (ctxt->input->cur - ctxt->input->base); 9677 else { 9678 /* 9679 * If we are operating on converted input, try to flush 9680 * remainng chars to avoid them stalling in the non-converted 9681 * buffer. 9682 */ 9683 if ((ctxt->input->buf->raw != NULL) && 9684 (ctxt->input->buf->raw->use > 0)) { 9685 int base = ctxt->input->base - 9686 ctxt->input->buf->buffer->content; 9687 int current = ctxt->input->cur - ctxt->input->base; 9688 9689 xmlParserInputBufferPush(ctxt->input->buf, 0, ""); 9690 ctxt->input->base = ctxt->input->buf->buffer->content + base; 9691 ctxt->input->cur = ctxt->input->base + current; 9692 ctxt->input->end = 9693 &ctxt->input->buf->buffer->content[ 9694 ctxt->input->buf->buffer->use]; 9695 } 9696 avail = ctxt->input->buf->buffer->use - 9697 (ctxt->input->cur - ctxt->input->base); 9698 } 9699 if (avail < 1) 9700 goto done; 9701 switch (ctxt->instate) { 9702 case XML_PARSER_EOF: 9703 /* 9704 * Document parsing is done ! 9705 */ 9706 goto done; 9707 case XML_PARSER_START: 9708 if (ctxt->charset == XML_CHAR_ENCODING_NONE) { 9709 xmlChar start[4]; 9710 xmlCharEncoding enc; 9711 9712 /* 9713 * Very first chars read from the document flow. 9714 */ 9715 if (avail < 4) 9716 goto done; 9717 9718 /* 9719 * Get the 4 first bytes and decode the charset 9720 * if enc != XML_CHAR_ENCODING_NONE 9721 * plug some encoding conversion routines, 9722 * else xmlSwitchEncoding will set to (default) 9723 * UTF8. 9724 */ 9725 start[0] = RAW; 9726 start[1] = NXT(1); 9727 start[2] = NXT(2); 9728 start[3] = NXT(3); 9729 enc = xmlDetectCharEncoding(start, 4); 9730 xmlSwitchEncoding(ctxt, enc); 9731 break; 9732 } 9733 9734 if (avail < 2) 9735 goto done; 9736 cur = ctxt->input->cur[0]; 9737 next = ctxt->input->cur[1]; 9738 if (cur == 0) { 9739 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 9740 ctxt->sax->setDocumentLocator(ctxt->userData, 9741 &xmlDefaultSAXLocator); 9742 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 9743 ctxt->instate = XML_PARSER_EOF; 9744#ifdef DEBUG_PUSH 9745 xmlGenericError(xmlGenericErrorContext, 9746 "PP: entering EOF\n"); 9747#endif 9748 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 9749 ctxt->sax->endDocument(ctxt->userData); 9750 goto done; 9751 } 9752 if ((cur == '<') && (next == '?')) { 9753 /* PI or XML decl */ 9754 if (avail < 5) return(ret); 9755 if ((!terminate) && 9756 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 9757 return(ret); 9758 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 9759 ctxt->sax->setDocumentLocator(ctxt->userData, 9760 &xmlDefaultSAXLocator); 9761 if ((ctxt->input->cur[2] == 'x') && 9762 (ctxt->input->cur[3] == 'm') && 9763 (ctxt->input->cur[4] == 'l') && 9764 (IS_BLANK_CH(ctxt->input->cur[5]))) { 9765 ret += 5; 9766#ifdef DEBUG_PUSH 9767 xmlGenericError(xmlGenericErrorContext, 9768 "PP: Parsing XML Decl\n"); 9769#endif 9770 xmlParseXMLDecl(ctxt); 9771 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { 9772 /* 9773 * The XML REC instructs us to stop parsing right 9774 * here 9775 */ 9776 ctxt->instate = XML_PARSER_EOF; 9777 return(0); 9778 } 9779 ctxt->standalone = ctxt->input->standalone; 9780 if ((ctxt->encoding == NULL) && 9781 (ctxt->input->encoding != NULL)) 9782 ctxt->encoding = xmlStrdup(ctxt->input->encoding); 9783 if ((ctxt->sax) && (ctxt->sax->startDocument) && 9784 (!ctxt->disableSAX)) 9785 ctxt->sax->startDocument(ctxt->userData); 9786 ctxt->instate = XML_PARSER_MISC; 9787#ifdef DEBUG_PUSH 9788 xmlGenericError(xmlGenericErrorContext, 9789 "PP: entering MISC\n"); 9790#endif 9791 } else { 9792 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 9793 if ((ctxt->sax) && (ctxt->sax->startDocument) && 9794 (!ctxt->disableSAX)) 9795 ctxt->sax->startDocument(ctxt->userData); 9796 ctxt->instate = XML_PARSER_MISC; 9797#ifdef DEBUG_PUSH 9798 xmlGenericError(xmlGenericErrorContext, 9799 "PP: entering MISC\n"); 9800#endif 9801 } 9802 } else { 9803 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 9804 ctxt->sax->setDocumentLocator(ctxt->userData, 9805 &xmlDefaultSAXLocator); 9806 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); 9807 if (ctxt->version == NULL) { 9808 xmlErrMemory(ctxt, NULL); 9809 break; 9810 } 9811 if ((ctxt->sax) && (ctxt->sax->startDocument) && 9812 (!ctxt->disableSAX)) 9813 ctxt->sax->startDocument(ctxt->userData); 9814 ctxt->instate = XML_PARSER_MISC; 9815#ifdef DEBUG_PUSH 9816 xmlGenericError(xmlGenericErrorContext, 9817 "PP: entering MISC\n"); 9818#endif 9819 } 9820 break; 9821 case XML_PARSER_START_TAG: { 9822 const xmlChar *name; 9823 const xmlChar *prefix; 9824 const xmlChar *URI; 9825 int nsNr = ctxt->nsNr; 9826 9827 if ((avail < 2) && (ctxt->inputNr == 1)) 9828 goto done; 9829 cur = ctxt->input->cur[0]; 9830 if (cur != '<') { 9831 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); 9832 ctxt->instate = XML_PARSER_EOF; 9833 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 9834 ctxt->sax->endDocument(ctxt->userData); 9835 goto done; 9836 } 9837 if (!terminate) { 9838 if (ctxt->progressive) { 9839 /* > can be found unescaped in attribute values */ 9840 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 9841 goto done; 9842 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 9843 goto done; 9844 } 9845 } 9846 if (ctxt->spaceNr == 0) 9847 spacePush(ctxt, -1); 9848 else if (*ctxt->space == -2) 9849 spacePush(ctxt, -1); 9850 else 9851 spacePush(ctxt, *ctxt->space); 9852#ifdef LIBXML_SAX1_ENABLED 9853 if (ctxt->sax2) 9854#endif /* LIBXML_SAX1_ENABLED */ 9855 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen); 9856#ifdef LIBXML_SAX1_ENABLED 9857 else 9858 name = xmlParseStartTag(ctxt); 9859#endif /* LIBXML_SAX1_ENABLED */ 9860 if (name == NULL) { 9861 spacePop(ctxt); 9862 ctxt->instate = XML_PARSER_EOF; 9863 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 9864 ctxt->sax->endDocument(ctxt->userData); 9865 goto done; 9866 } 9867#ifdef LIBXML_VALID_ENABLED 9868 /* 9869 * [ VC: Root Element Type ] 9870 * The Name in the document type declaration must match 9871 * the element type of the root element. 9872 */ 9873 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && 9874 ctxt->node && (ctxt->node == ctxt->myDoc->children)) 9875 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); 9876#endif /* LIBXML_VALID_ENABLED */ 9877 9878 /* 9879 * Check for an Empty Element. 9880 */ 9881 if ((RAW == '/') && (NXT(1) == '>')) { 9882 SKIP(2); 9883 9884 if (ctxt->sax2) { 9885 if ((ctxt->sax != NULL) && 9886 (ctxt->sax->endElementNs != NULL) && 9887 (!ctxt->disableSAX)) 9888 ctxt->sax->endElementNs(ctxt->userData, name, 9889 prefix, URI); 9890 if (ctxt->nsNr - nsNr > 0) 9891 nsPop(ctxt, ctxt->nsNr - nsNr); 9892#ifdef LIBXML_SAX1_ENABLED 9893 } else { 9894 if ((ctxt->sax != NULL) && 9895 (ctxt->sax->endElement != NULL) && 9896 (!ctxt->disableSAX)) 9897 ctxt->sax->endElement(ctxt->userData, name); 9898#endif /* LIBXML_SAX1_ENABLED */ 9899 } 9900 spacePop(ctxt); 9901 if (ctxt->nameNr == 0) { 9902 ctxt->instate = XML_PARSER_EPILOG; 9903 } else { 9904 ctxt->instate = XML_PARSER_CONTENT; 9905 } 9906 break; 9907 } 9908 if (RAW == '>') { 9909 NEXT; 9910 } else { 9911 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED, 9912 "Couldn't find end of Start Tag %s\n", 9913 name); 9914 nodePop(ctxt); 9915 spacePop(ctxt); 9916 } 9917 if (ctxt->sax2) 9918 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr); 9919#ifdef LIBXML_SAX1_ENABLED 9920 else 9921 namePush(ctxt, name); 9922#endif /* LIBXML_SAX1_ENABLED */ 9923 9924 ctxt->instate = XML_PARSER_CONTENT; 9925 break; 9926 } 9927 case XML_PARSER_CONTENT: { 9928 const xmlChar *test; 9929 unsigned int cons; 9930 if ((avail < 2) && (ctxt->inputNr == 1)) 9931 goto done; 9932 cur = ctxt->input->cur[0]; 9933 next = ctxt->input->cur[1]; 9934 9935 test = CUR_PTR; 9936 cons = ctxt->input->consumed; 9937 if ((cur == '<') && (next == '/')) { 9938 ctxt->instate = XML_PARSER_END_TAG; 9939 break; 9940 } else if ((cur == '<') && (next == '?')) { 9941 if ((!terminate) && 9942 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 9943 goto done; 9944 xmlParsePI(ctxt); 9945 } else if ((cur == '<') && (next != '!')) { 9946 ctxt->instate = XML_PARSER_START_TAG; 9947 break; 9948 } else if ((cur == '<') && (next == '!') && 9949 (ctxt->input->cur[2] == '-') && 9950 (ctxt->input->cur[3] == '-')) { 9951 int term; 9952 9953 if (avail < 4) 9954 goto done; 9955 ctxt->input->cur += 4; 9956 term = xmlParseLookupSequence(ctxt, '-', '-', '>'); 9957 ctxt->input->cur -= 4; 9958 if ((!terminate) && (term < 0)) 9959 goto done; 9960 xmlParseComment(ctxt); 9961 ctxt->instate = XML_PARSER_CONTENT; 9962 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') && 9963 (ctxt->input->cur[2] == '[') && 9964 (ctxt->input->cur[3] == 'C') && 9965 (ctxt->input->cur[4] == 'D') && 9966 (ctxt->input->cur[5] == 'A') && 9967 (ctxt->input->cur[6] == 'T') && 9968 (ctxt->input->cur[7] == 'A') && 9969 (ctxt->input->cur[8] == '[')) { 9970 SKIP(9); 9971 ctxt->instate = XML_PARSER_CDATA_SECTION; 9972 break; 9973 } else if ((cur == '<') && (next == '!') && 9974 (avail < 9)) { 9975 goto done; 9976 } else if (cur == '&') { 9977 if ((!terminate) && 9978 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0)) 9979 goto done; 9980 xmlParseReference(ctxt); 9981 } else { 9982 /* TODO Avoid the extra copy, handle directly !!! */ 9983 /* 9984 * Goal of the following test is: 9985 * - minimize calls to the SAX 'character' callback 9986 * when they are mergeable 9987 * - handle an problem for isBlank when we only parse 9988 * a sequence of blank chars and the next one is 9989 * not available to check against '<' presence. 9990 * - tries to homogenize the differences in SAX 9991 * callbacks between the push and pull versions 9992 * of the parser. 9993 */ 9994 if ((ctxt->inputNr == 1) && 9995 (avail < XML_PARSER_BIG_BUFFER_SIZE)) { 9996 if (!terminate) { 9997 if (ctxt->progressive) { 9998 if ((lastlt == NULL) || 9999 (ctxt->input->cur > lastlt)) 10000 goto done; 10001 } else if (xmlParseLookupSequence(ctxt, 10002 '<', 0, 0) < 0) { 10003 goto done; 10004 } 10005 } 10006 } 10007 ctxt->checkIndex = 0; 10008 xmlParseCharData(ctxt, 0); 10009 } 10010 /* 10011 * Pop-up of finished entities. 10012 */ 10013 while ((RAW == 0) && (ctxt->inputNr > 1)) 10014 xmlPopInput(ctxt); 10015 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { 10016 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, 10017 "detected an error in element content\n"); 10018 ctxt->instate = XML_PARSER_EOF; 10019 break; 10020 } 10021 break; 10022 } 10023 case XML_PARSER_END_TAG: 10024 if (avail < 2) 10025 goto done; 10026 if (!terminate) { 10027 if (ctxt->progressive) { 10028 /* > can be found unescaped in attribute values */ 10029 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt)) 10030 goto done; 10031 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) { 10032 goto done; 10033 } 10034 } 10035 if (ctxt->sax2) { 10036 xmlParseEndTag2(ctxt, 10037 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3], 10038 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0, 10039 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0); 10040 nameNsPop(ctxt); 10041 } 10042#ifdef LIBXML_SAX1_ENABLED 10043 else 10044 xmlParseEndTag1(ctxt, 0); 10045#endif /* LIBXML_SAX1_ENABLED */ 10046 if (ctxt->nameNr == 0) { 10047 ctxt->instate = XML_PARSER_EPILOG; 10048 } else { 10049 ctxt->instate = XML_PARSER_CONTENT; 10050 } 10051 break; 10052 case XML_PARSER_CDATA_SECTION: { 10053 /* 10054 * The Push mode need to have the SAX callback for 10055 * cdataBlock merge back contiguous callbacks. 10056 */ 10057 int base; 10058 10059 base = xmlParseLookupSequence(ctxt, ']', ']', '>'); 10060 if (base < 0) { 10061 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) { 10062 int tmp; 10063 10064 tmp = xmlCheckCdataPush(ctxt->input->cur, 10065 XML_PARSER_BIG_BUFFER_SIZE); 10066 if (tmp < 0) { 10067 tmp = -tmp; 10068 ctxt->input->cur += tmp; 10069 goto encoding_error; 10070 } 10071 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 10072 if (ctxt->sax->cdataBlock != NULL) 10073 ctxt->sax->cdataBlock(ctxt->userData, 10074 ctxt->input->cur, tmp); 10075 else if (ctxt->sax->characters != NULL) 10076 ctxt->sax->characters(ctxt->userData, 10077 ctxt->input->cur, tmp); 10078 } 10079 SKIPL(tmp); 10080 ctxt->checkIndex = 0; 10081 } 10082 goto done; 10083 } else { 10084 int tmp; 10085 10086 tmp = xmlCheckCdataPush(ctxt->input->cur, base); 10087 if ((tmp < 0) || (tmp != base)) { 10088 tmp = -tmp; 10089 ctxt->input->cur += tmp; 10090 goto encoding_error; 10091 } 10092 if ((ctxt->sax != NULL) && (base > 0) && 10093 (!ctxt->disableSAX)) { 10094 if (ctxt->sax->cdataBlock != NULL) 10095 ctxt->sax->cdataBlock(ctxt->userData, 10096 ctxt->input->cur, base); 10097 else if (ctxt->sax->characters != NULL) 10098 ctxt->sax->characters(ctxt->userData, 10099 ctxt->input->cur, base); 10100 } 10101 SKIPL(base + 3); 10102 ctxt->checkIndex = 0; 10103 ctxt->instate = XML_PARSER_CONTENT; 10104#ifdef DEBUG_PUSH 10105 xmlGenericError(xmlGenericErrorContext, 10106 "PP: entering CONTENT\n"); 10107#endif 10108 } 10109 break; 10110 } 10111 case XML_PARSER_MISC: 10112 SKIP_BLANKS; 10113 if (ctxt->input->buf == NULL) 10114 avail = ctxt->input->length - 10115 (ctxt->input->cur - ctxt->input->base); 10116 else 10117 avail = ctxt->input->buf->buffer->use - 10118 (ctxt->input->cur - ctxt->input->base); 10119 if (avail < 2) 10120 goto done; 10121 cur = ctxt->input->cur[0]; 10122 next = ctxt->input->cur[1]; 10123 if ((cur == '<') && (next == '?')) { 10124 if ((!terminate) && 10125 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 10126 goto done; 10127#ifdef DEBUG_PUSH 10128 xmlGenericError(xmlGenericErrorContext, 10129 "PP: Parsing PI\n"); 10130#endif 10131 xmlParsePI(ctxt); 10132 } else if ((cur == '<') && (next == '!') && 10133 (ctxt->input->cur[2] == '-') && 10134 (ctxt->input->cur[3] == '-')) { 10135 if ((!terminate) && 10136 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 10137 goto done; 10138#ifdef DEBUG_PUSH 10139 xmlGenericError(xmlGenericErrorContext, 10140 "PP: Parsing Comment\n"); 10141#endif 10142 xmlParseComment(ctxt); 10143 ctxt->instate = XML_PARSER_MISC; 10144 } else if ((cur == '<') && (next == '!') && 10145 (ctxt->input->cur[2] == 'D') && 10146 (ctxt->input->cur[3] == 'O') && 10147 (ctxt->input->cur[4] == 'C') && 10148 (ctxt->input->cur[5] == 'T') && 10149 (ctxt->input->cur[6] == 'Y') && 10150 (ctxt->input->cur[7] == 'P') && 10151 (ctxt->input->cur[8] == 'E')) { 10152 if ((!terminate) && 10153 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) 10154 goto done; 10155#ifdef DEBUG_PUSH 10156 xmlGenericError(xmlGenericErrorContext, 10157 "PP: Parsing internal subset\n"); 10158#endif 10159 ctxt->inSubset = 1; 10160 xmlParseDocTypeDecl(ctxt); 10161 if (RAW == '[') { 10162 ctxt->instate = XML_PARSER_DTD; 10163#ifdef DEBUG_PUSH 10164 xmlGenericError(xmlGenericErrorContext, 10165 "PP: entering DTD\n"); 10166#endif 10167 } else { 10168 /* 10169 * Create and update the external subset. 10170 */ 10171 ctxt->inSubset = 2; 10172 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 10173 (ctxt->sax->externalSubset != NULL)) 10174 ctxt->sax->externalSubset(ctxt->userData, 10175 ctxt->intSubName, ctxt->extSubSystem, 10176 ctxt->extSubURI); 10177 ctxt->inSubset = 0; 10178 ctxt->instate = XML_PARSER_PROLOG; 10179#ifdef DEBUG_PUSH 10180 xmlGenericError(xmlGenericErrorContext, 10181 "PP: entering PROLOG\n"); 10182#endif 10183 } 10184 } else if ((cur == '<') && (next == '!') && 10185 (avail < 9)) { 10186 goto done; 10187 } else { 10188 ctxt->instate = XML_PARSER_START_TAG; 10189 ctxt->progressive = 1; 10190 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 10191#ifdef DEBUG_PUSH 10192 xmlGenericError(xmlGenericErrorContext, 10193 "PP: entering START_TAG\n"); 10194#endif 10195 } 10196 break; 10197 case XML_PARSER_PROLOG: 10198 SKIP_BLANKS; 10199 if (ctxt->input->buf == NULL) 10200 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 10201 else 10202 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 10203 if (avail < 2) 10204 goto done; 10205 cur = ctxt->input->cur[0]; 10206 next = ctxt->input->cur[1]; 10207 if ((cur == '<') && (next == '?')) { 10208 if ((!terminate) && 10209 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 10210 goto done; 10211#ifdef DEBUG_PUSH 10212 xmlGenericError(xmlGenericErrorContext, 10213 "PP: Parsing PI\n"); 10214#endif 10215 xmlParsePI(ctxt); 10216 } else if ((cur == '<') && (next == '!') && 10217 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 10218 if ((!terminate) && 10219 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 10220 goto done; 10221#ifdef DEBUG_PUSH 10222 xmlGenericError(xmlGenericErrorContext, 10223 "PP: Parsing Comment\n"); 10224#endif 10225 xmlParseComment(ctxt); 10226 ctxt->instate = XML_PARSER_PROLOG; 10227 } else if ((cur == '<') && (next == '!') && 10228 (avail < 4)) { 10229 goto done; 10230 } else { 10231 ctxt->instate = XML_PARSER_START_TAG; 10232 if (ctxt->progressive == 0) 10233 ctxt->progressive = 1; 10234 xmlParseGetLasts(ctxt, &lastlt, &lastgt); 10235#ifdef DEBUG_PUSH 10236 xmlGenericError(xmlGenericErrorContext, 10237 "PP: entering START_TAG\n"); 10238#endif 10239 } 10240 break; 10241 case XML_PARSER_EPILOG: 10242 SKIP_BLANKS; 10243 if (ctxt->input->buf == NULL) 10244 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base); 10245 else 10246 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base); 10247 if (avail < 2) 10248 goto done; 10249 cur = ctxt->input->cur[0]; 10250 next = ctxt->input->cur[1]; 10251 if ((cur == '<') && (next == '?')) { 10252 if ((!terminate) && 10253 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) 10254 goto done; 10255#ifdef DEBUG_PUSH 10256 xmlGenericError(xmlGenericErrorContext, 10257 "PP: Parsing PI\n"); 10258#endif 10259 xmlParsePI(ctxt); 10260 ctxt->instate = XML_PARSER_EPILOG; 10261 } else if ((cur == '<') && (next == '!') && 10262 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) { 10263 if ((!terminate) && 10264 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) 10265 goto done; 10266#ifdef DEBUG_PUSH 10267 xmlGenericError(xmlGenericErrorContext, 10268 "PP: Parsing Comment\n"); 10269#endif 10270 xmlParseComment(ctxt); 10271 ctxt->instate = XML_PARSER_EPILOG; 10272 } else if ((cur == '<') && (next == '!') && 10273 (avail < 4)) { 10274 goto done; 10275 } else { 10276 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 10277 ctxt->instate = XML_PARSER_EOF; 10278#ifdef DEBUG_PUSH 10279 xmlGenericError(xmlGenericErrorContext, 10280 "PP: entering EOF\n"); 10281#endif 10282 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10283 ctxt->sax->endDocument(ctxt->userData); 10284 goto done; 10285 } 10286 break; 10287 case XML_PARSER_DTD: { 10288 /* 10289 * Sorry but progressive parsing of the internal subset 10290 * is not expected to be supported. We first check that 10291 * the full content of the internal subset is available and 10292 * the parsing is launched only at that point. 10293 * Internal subset ends up with "']' S? '>'" in an unescaped 10294 * section and not in a ']]>' sequence which are conditional 10295 * sections (whoever argued to keep that crap in XML deserve 10296 * a place in hell !). 10297 */ 10298 int base, i; 10299 xmlChar *buf; 10300 xmlChar quote = 0; 10301 10302 base = ctxt->input->cur - ctxt->input->base; 10303 if (base < 0) return(0); 10304 if (ctxt->checkIndex > base) 10305 base = ctxt->checkIndex; 10306 buf = ctxt->input->buf->buffer->content; 10307 for (;(unsigned int) base < ctxt->input->buf->buffer->use; 10308 base++) { 10309 if (quote != 0) { 10310 if (buf[base] == quote) 10311 quote = 0; 10312 continue; 10313 } 10314 if ((quote == 0) && (buf[base] == '<')) { 10315 int found = 0; 10316 /* special handling of comments */ 10317 if (((unsigned int) base + 4 < 10318 ctxt->input->buf->buffer->use) && 10319 (buf[base + 1] == '!') && 10320 (buf[base + 2] == '-') && 10321 (buf[base + 3] == '-')) { 10322 for (;(unsigned int) base + 3 < 10323 ctxt->input->buf->buffer->use; base++) { 10324 if ((buf[base] == '-') && 10325 (buf[base + 1] == '-') && 10326 (buf[base + 2] == '>')) { 10327 found = 1; 10328 base += 2; 10329 break; 10330 } 10331 } 10332 if (!found) { 10333#if 0 10334 fprintf(stderr, "unfinished comment\n"); 10335#endif 10336 break; /* for */ 10337 } 10338 continue; 10339 } 10340 } 10341 if (buf[base] == '"') { 10342 quote = '"'; 10343 continue; 10344 } 10345 if (buf[base] == '\'') { 10346 quote = '\''; 10347 continue; 10348 } 10349 if (buf[base] == ']') { 10350#if 0 10351 fprintf(stderr, "%c%c%c%c: ", buf[base], 10352 buf[base + 1], buf[base + 2], buf[base + 3]); 10353#endif 10354 if ((unsigned int) base +1 >= 10355 ctxt->input->buf->buffer->use) 10356 break; 10357 if (buf[base + 1] == ']') { 10358 /* conditional crap, skip both ']' ! */ 10359 base++; 10360 continue; 10361 } 10362 for (i = 1; 10363 (unsigned int) base + i < ctxt->input->buf->buffer->use; 10364 i++) { 10365 if (buf[base + i] == '>') { 10366#if 0 10367 fprintf(stderr, "found\n"); 10368#endif 10369 goto found_end_int_subset; 10370 } 10371 if (!IS_BLANK_CH(buf[base + i])) { 10372#if 0 10373 fprintf(stderr, "not found\n"); 10374#endif 10375 goto not_end_of_int_subset; 10376 } 10377 } 10378#if 0 10379 fprintf(stderr, "end of stream\n"); 10380#endif 10381 break; 10382 10383 } 10384not_end_of_int_subset: 10385 continue; /* for */ 10386 } 10387 /* 10388 * We didn't found the end of the Internal subset 10389 */ 10390#ifdef DEBUG_PUSH 10391 if (next == 0) 10392 xmlGenericError(xmlGenericErrorContext, 10393 "PP: lookup of int subset end filed\n"); 10394#endif 10395 goto done; 10396 10397found_end_int_subset: 10398 xmlParseInternalSubset(ctxt); 10399 ctxt->inSubset = 2; 10400 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && 10401 (ctxt->sax->externalSubset != NULL)) 10402 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName, 10403 ctxt->extSubSystem, ctxt->extSubURI); 10404 ctxt->inSubset = 0; 10405 ctxt->instate = XML_PARSER_PROLOG; 10406 ctxt->checkIndex = 0; 10407#ifdef DEBUG_PUSH 10408 xmlGenericError(xmlGenericErrorContext, 10409 "PP: entering PROLOG\n"); 10410#endif 10411 break; 10412 } 10413 case XML_PARSER_COMMENT: 10414 xmlGenericError(xmlGenericErrorContext, 10415 "PP: internal error, state == COMMENT\n"); 10416 ctxt->instate = XML_PARSER_CONTENT; 10417#ifdef DEBUG_PUSH 10418 xmlGenericError(xmlGenericErrorContext, 10419 "PP: entering CONTENT\n"); 10420#endif 10421 break; 10422 case XML_PARSER_IGNORE: 10423 xmlGenericError(xmlGenericErrorContext, 10424 "PP: internal error, state == IGNORE"); 10425 ctxt->instate = XML_PARSER_DTD; 10426#ifdef DEBUG_PUSH 10427 xmlGenericError(xmlGenericErrorContext, 10428 "PP: entering DTD\n"); 10429#endif 10430 break; 10431 case XML_PARSER_PI: 10432 xmlGenericError(xmlGenericErrorContext, 10433 "PP: internal error, state == PI\n"); 10434 ctxt->instate = XML_PARSER_CONTENT; 10435#ifdef DEBUG_PUSH 10436 xmlGenericError(xmlGenericErrorContext, 10437 "PP: entering CONTENT\n"); 10438#endif 10439 break; 10440 case XML_PARSER_ENTITY_DECL: 10441 xmlGenericError(xmlGenericErrorContext, 10442 "PP: internal error, state == ENTITY_DECL\n"); 10443 ctxt->instate = XML_PARSER_DTD; 10444#ifdef DEBUG_PUSH 10445 xmlGenericError(xmlGenericErrorContext, 10446 "PP: entering DTD\n"); 10447#endif 10448 break; 10449 case XML_PARSER_ENTITY_VALUE: 10450 xmlGenericError(xmlGenericErrorContext, 10451 "PP: internal error, state == ENTITY_VALUE\n"); 10452 ctxt->instate = XML_PARSER_CONTENT; 10453#ifdef DEBUG_PUSH 10454 xmlGenericError(xmlGenericErrorContext, 10455 "PP: entering DTD\n"); 10456#endif 10457 break; 10458 case XML_PARSER_ATTRIBUTE_VALUE: 10459 xmlGenericError(xmlGenericErrorContext, 10460 "PP: internal error, state == ATTRIBUTE_VALUE\n"); 10461 ctxt->instate = XML_PARSER_START_TAG; 10462#ifdef DEBUG_PUSH 10463 xmlGenericError(xmlGenericErrorContext, 10464 "PP: entering START_TAG\n"); 10465#endif 10466 break; 10467 case XML_PARSER_SYSTEM_LITERAL: 10468 xmlGenericError(xmlGenericErrorContext, 10469 "PP: internal error, state == SYSTEM_LITERAL\n"); 10470 ctxt->instate = XML_PARSER_START_TAG; 10471#ifdef DEBUG_PUSH 10472 xmlGenericError(xmlGenericErrorContext, 10473 "PP: entering START_TAG\n"); 10474#endif 10475 break; 10476 case XML_PARSER_PUBLIC_LITERAL: 10477 xmlGenericError(xmlGenericErrorContext, 10478 "PP: internal error, state == PUBLIC_LITERAL\n"); 10479 ctxt->instate = XML_PARSER_START_TAG; 10480#ifdef DEBUG_PUSH 10481 xmlGenericError(xmlGenericErrorContext, 10482 "PP: entering START_TAG\n"); 10483#endif 10484 break; 10485 } 10486 } 10487done: 10488#ifdef DEBUG_PUSH 10489 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret); 10490#endif 10491 return(ret); 10492encoding_error: 10493 { 10494 char buffer[150]; 10495 10496 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 10497 ctxt->input->cur[0], ctxt->input->cur[1], 10498 ctxt->input->cur[2], ctxt->input->cur[3]); 10499 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 10500 "Input is not proper UTF-8, indicate encoding !\n%s", 10501 BAD_CAST buffer, NULL); 10502 } 10503 return(0); 10504} 10505 10506/** 10507 * xmlParseChunk: 10508 * @ctxt: an XML parser context 10509 * @chunk: an char array 10510 * @size: the size in byte of the chunk 10511 * @terminate: last chunk indicator 10512 * 10513 * Parse a Chunk of memory 10514 * 10515 * Returns zero if no error, the xmlParserErrors otherwise. 10516 */ 10517int 10518xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, 10519 int terminate) { 10520 int end_in_lf = 0; 10521 10522 if (ctxt == NULL) 10523 return(XML_ERR_INTERNAL_ERROR); 10524 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 10525 return(ctxt->errNo); 10526 if (ctxt->instate == XML_PARSER_START) 10527 xmlDetectSAX2(ctxt); 10528 if ((size > 0) && (chunk != NULL) && (!terminate) && 10529 (chunk[size - 1] == '\r')) { 10530 end_in_lf = 1; 10531 size--; 10532 } 10533 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 10534 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { 10535 int base = ctxt->input->base - ctxt->input->buf->buffer->content; 10536 int cur = ctxt->input->cur - ctxt->input->base; 10537 int res; 10538 10539 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 10540 if (res < 0) { 10541 ctxt->errNo = XML_PARSER_EOF; 10542 ctxt->disableSAX = 1; 10543 return (XML_PARSER_EOF); 10544 } 10545 ctxt->input->base = ctxt->input->buf->buffer->content + base; 10546 ctxt->input->cur = ctxt->input->base + cur; 10547 ctxt->input->end = 10548 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; 10549#ifdef DEBUG_PUSH 10550 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 10551#endif 10552 10553 } else if (ctxt->instate != XML_PARSER_EOF) { 10554 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { 10555 xmlParserInputBufferPtr in = ctxt->input->buf; 10556 if ((in->encoder != NULL) && (in->buffer != NULL) && 10557 (in->raw != NULL)) { 10558 int nbchars; 10559 10560 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); 10561 if (nbchars < 0) { 10562 /* TODO 2.6.0 */ 10563 xmlGenericError(xmlGenericErrorContext, 10564 "xmlParseChunk: encoder error\n"); 10565 return(XML_ERR_INVALID_ENCODING); 10566 } 10567 } 10568 } 10569 } 10570 xmlParseTryOrFinish(ctxt, terminate); 10571 if ((end_in_lf == 1) && (ctxt->input != NULL) && 10572 (ctxt->input->buf != NULL)) { 10573 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r"); 10574 } 10575 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) 10576 return(ctxt->errNo); 10577 if (terminate) { 10578 /* 10579 * Check for termination 10580 */ 10581 int avail = 0; 10582 10583 if (ctxt->input != NULL) { 10584 if (ctxt->input->buf == NULL) 10585 avail = ctxt->input->length - 10586 (ctxt->input->cur - ctxt->input->base); 10587 else 10588 avail = ctxt->input->buf->buffer->use - 10589 (ctxt->input->cur - ctxt->input->base); 10590 } 10591 10592 if ((ctxt->instate != XML_PARSER_EOF) && 10593 (ctxt->instate != XML_PARSER_EPILOG)) { 10594 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 10595 } 10596 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) { 10597 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL); 10598 } 10599 if (ctxt->instate != XML_PARSER_EOF) { 10600 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 10601 ctxt->sax->endDocument(ctxt->userData); 10602 } 10603 ctxt->instate = XML_PARSER_EOF; 10604 } 10605 return((xmlParserErrors) ctxt->errNo); 10606} 10607 10608/************************************************************************ 10609 * * 10610 * I/O front end functions to the parser * 10611 * * 10612 ************************************************************************/ 10613 10614/** 10615 * xmlCreatePushParserCtxt: 10616 * @sax: a SAX handler 10617 * @user_data: The user data returned on SAX callbacks 10618 * @chunk: a pointer to an array of chars 10619 * @size: number of chars in the array 10620 * @filename: an optional file name or URI 10621 * 10622 * Create a parser context for using the XML parser in push mode. 10623 * If @buffer and @size are non-NULL, the data is used to detect 10624 * the encoding. The remaining characters will be parsed so they 10625 * don't need to be fed in again through xmlParseChunk. 10626 * To allow content encoding detection, @size should be >= 4 10627 * The value of @filename is used for fetching external entities 10628 * and error/warning reports. 10629 * 10630 * Returns the new parser context or NULL 10631 */ 10632 10633xmlParserCtxtPtr 10634xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 10635 const char *chunk, int size, const char *filename) { 10636 xmlParserCtxtPtr ctxt; 10637 xmlParserInputPtr inputStream; 10638 xmlParserInputBufferPtr buf; 10639 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 10640 10641 /* 10642 * plug some encoding conversion routines 10643 */ 10644 if ((chunk != NULL) && (size >= 4)) 10645 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 10646 10647 buf = xmlAllocParserInputBuffer(enc); 10648 if (buf == NULL) return(NULL); 10649 10650 ctxt = xmlNewParserCtxt(); 10651 if (ctxt == NULL) { 10652 xmlErrMemory(NULL, "creating parser: out of memory\n"); 10653 xmlFreeParserInputBuffer(buf); 10654 return(NULL); 10655 } 10656 ctxt->dictNames = 1; 10657 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *)); 10658 if (ctxt->pushTab == NULL) { 10659 xmlErrMemory(ctxt, NULL); 10660 xmlFreeParserInputBuffer(buf); 10661 xmlFreeParserCtxt(ctxt); 10662 return(NULL); 10663 } 10664 if (sax != NULL) { 10665#ifdef LIBXML_SAX1_ENABLED 10666 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 10667#endif /* LIBXML_SAX1_ENABLED */ 10668 xmlFree(ctxt->sax); 10669 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 10670 if (ctxt->sax == NULL) { 10671 xmlErrMemory(ctxt, NULL); 10672 xmlFreeParserInputBuffer(buf); 10673 xmlFreeParserCtxt(ctxt); 10674 return(NULL); 10675 } 10676 memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 10677 if (sax->initialized == XML_SAX2_MAGIC) 10678 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 10679 else 10680 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 10681 if (user_data != NULL) 10682 ctxt->userData = user_data; 10683 } 10684 if (filename == NULL) { 10685 ctxt->directory = NULL; 10686 } else { 10687 ctxt->directory = xmlParserGetDirectory(filename); 10688 } 10689 10690 inputStream = xmlNewInputStream(ctxt); 10691 if (inputStream == NULL) { 10692 xmlFreeParserCtxt(ctxt); 10693 xmlFreeParserInputBuffer(buf); 10694 return(NULL); 10695 } 10696 10697 if (filename == NULL) 10698 inputStream->filename = NULL; 10699 else { 10700 inputStream->filename = (char *) 10701 xmlCanonicPath((const xmlChar *) filename); 10702 if (inputStream->filename == NULL) { 10703 xmlFreeParserCtxt(ctxt); 10704 xmlFreeParserInputBuffer(buf); 10705 return(NULL); 10706 } 10707 } 10708 inputStream->buf = buf; 10709 inputStream->base = inputStream->buf->buffer->content; 10710 inputStream->cur = inputStream->buf->buffer->content; 10711 inputStream->end = 10712 &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; 10713 10714 inputPush(ctxt, inputStream); 10715 10716 /* 10717 * If the caller didn't provide an initial 'chunk' for determining 10718 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so 10719 * that it can be automatically determined later 10720 */ 10721 if ((size == 0) || (chunk == NULL)) { 10722 ctxt->charset = XML_CHAR_ENCODING_NONE; 10723 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) { 10724 int base = ctxt->input->base - ctxt->input->buf->buffer->content; 10725 int cur = ctxt->input->cur - ctxt->input->base; 10726 10727 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 10728 10729 ctxt->input->base = ctxt->input->buf->buffer->content + base; 10730 ctxt->input->cur = ctxt->input->base + cur; 10731 ctxt->input->end = 10732 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; 10733#ifdef DEBUG_PUSH 10734 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 10735#endif 10736 } 10737 10738 if (enc != XML_CHAR_ENCODING_NONE) { 10739 xmlSwitchEncoding(ctxt, enc); 10740 } 10741 10742 return(ctxt); 10743} 10744#endif /* LIBXML_PUSH_ENABLED */ 10745 10746/** 10747 * xmlStopParser: 10748 * @ctxt: an XML parser context 10749 * 10750 * Blocks further parser processing 10751 */ 10752void 10753xmlStopParser(xmlParserCtxtPtr ctxt) { 10754 if (ctxt == NULL) 10755 return; 10756 ctxt->instate = XML_PARSER_EOF; 10757 ctxt->disableSAX = 1; 10758 if (ctxt->input != NULL) { 10759 ctxt->input->cur = BAD_CAST""; 10760 ctxt->input->base = ctxt->input->cur; 10761 } 10762} 10763 10764/** 10765 * xmlCreateIOParserCtxt: 10766 * @sax: a SAX handler 10767 * @user_data: The user data returned on SAX callbacks 10768 * @ioread: an I/O read function 10769 * @ioclose: an I/O close function 10770 * @ioctx: an I/O handler 10771 * @enc: the charset encoding if known 10772 * 10773 * Create a parser context for using the XML parser with an existing 10774 * I/O stream 10775 * 10776 * Returns the new parser context or NULL 10777 */ 10778xmlParserCtxtPtr 10779xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, 10780 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 10781 void *ioctx, xmlCharEncoding enc) { 10782 xmlParserCtxtPtr ctxt; 10783 xmlParserInputPtr inputStream; 10784 xmlParserInputBufferPtr buf; 10785 10786 if (ioread == NULL) return(NULL); 10787 10788 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc); 10789 if (buf == NULL) return(NULL); 10790 10791 ctxt = xmlNewParserCtxt(); 10792 if (ctxt == NULL) { 10793 xmlFreeParserInputBuffer(buf); 10794 return(NULL); 10795 } 10796 if (sax != NULL) { 10797#ifdef LIBXML_SAX1_ENABLED 10798 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 10799#endif /* LIBXML_SAX1_ENABLED */ 10800 xmlFree(ctxt->sax); 10801 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); 10802 if (ctxt->sax == NULL) { 10803 xmlErrMemory(ctxt, NULL); 10804 xmlFreeParserCtxt(ctxt); 10805 return(NULL); 10806 } 10807 memset(ctxt->sax, 0, sizeof(xmlSAXHandler)); 10808 if (sax->initialized == XML_SAX2_MAGIC) 10809 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); 10810 else 10811 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); 10812 if (user_data != NULL) 10813 ctxt->userData = user_data; 10814 } 10815 10816 inputStream = xmlNewIOInputStream(ctxt, buf, enc); 10817 if (inputStream == NULL) { 10818 xmlFreeParserCtxt(ctxt); 10819 return(NULL); 10820 } 10821 inputPush(ctxt, inputStream); 10822 10823 return(ctxt); 10824} 10825 10826#ifdef LIBXML_VALID_ENABLED 10827/************************************************************************ 10828 * * 10829 * Front ends when parsing a DTD * 10830 * * 10831 ************************************************************************/ 10832 10833/** 10834 * xmlIOParseDTD: 10835 * @sax: the SAX handler block or NULL 10836 * @input: an Input Buffer 10837 * @enc: the charset encoding if known 10838 * 10839 * Load and parse a DTD 10840 * 10841 * Returns the resulting xmlDtdPtr or NULL in case of error. 10842 * @input will be freed by the function in any case. 10843 */ 10844 10845xmlDtdPtr 10846xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, 10847 xmlCharEncoding enc) { 10848 xmlDtdPtr ret = NULL; 10849 xmlParserCtxtPtr ctxt; 10850 xmlParserInputPtr pinput = NULL; 10851 xmlChar start[4]; 10852 10853 if (input == NULL) 10854 return(NULL); 10855 10856 ctxt = xmlNewParserCtxt(); 10857 if (ctxt == NULL) { 10858 xmlFreeParserInputBuffer(input); 10859 return(NULL); 10860 } 10861 10862 /* 10863 * Set-up the SAX context 10864 */ 10865 if (sax != NULL) { 10866 if (ctxt->sax != NULL) 10867 xmlFree(ctxt->sax); 10868 ctxt->sax = sax; 10869 ctxt->userData = ctxt; 10870 } 10871 xmlDetectSAX2(ctxt); 10872 10873 /* 10874 * generate a parser input from the I/O handler 10875 */ 10876 10877 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 10878 if (pinput == NULL) { 10879 if (sax != NULL) ctxt->sax = NULL; 10880 xmlFreeParserInputBuffer(input); 10881 xmlFreeParserCtxt(ctxt); 10882 return(NULL); 10883 } 10884 10885 /* 10886 * plug some encoding conversion routines here. 10887 */ 10888 xmlPushInput(ctxt, pinput); 10889 if (enc != XML_CHAR_ENCODING_NONE) { 10890 xmlSwitchEncoding(ctxt, enc); 10891 } 10892 10893 pinput->filename = NULL; 10894 pinput->line = 1; 10895 pinput->col = 1; 10896 pinput->base = ctxt->input->cur; 10897 pinput->cur = ctxt->input->cur; 10898 pinput->free = NULL; 10899 10900 /* 10901 * let's parse that entity knowing it's an external subset. 10902 */ 10903 ctxt->inSubset = 2; 10904 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 10905 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 10906 BAD_CAST "none", BAD_CAST "none"); 10907 10908 if ((enc == XML_CHAR_ENCODING_NONE) && 10909 ((ctxt->input->end - ctxt->input->cur) >= 4)) { 10910 /* 10911 * Get the 4 first bytes and decode the charset 10912 * if enc != XML_CHAR_ENCODING_NONE 10913 * plug some encoding conversion routines. 10914 */ 10915 start[0] = RAW; 10916 start[1] = NXT(1); 10917 start[2] = NXT(2); 10918 start[3] = NXT(3); 10919 enc = xmlDetectCharEncoding(start, 4); 10920 if (enc != XML_CHAR_ENCODING_NONE) { 10921 xmlSwitchEncoding(ctxt, enc); 10922 } 10923 } 10924 10925 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none"); 10926 10927 if (ctxt->myDoc != NULL) { 10928 if (ctxt->wellFormed) { 10929 ret = ctxt->myDoc->extSubset; 10930 ctxt->myDoc->extSubset = NULL; 10931 if (ret != NULL) { 10932 xmlNodePtr tmp; 10933 10934 ret->doc = NULL; 10935 tmp = ret->children; 10936 while (tmp != NULL) { 10937 tmp->doc = NULL; 10938 tmp = tmp->next; 10939 } 10940 } 10941 } else { 10942 ret = NULL; 10943 } 10944 xmlFreeDoc(ctxt->myDoc); 10945 ctxt->myDoc = NULL; 10946 } 10947 if (sax != NULL) ctxt->sax = NULL; 10948 xmlFreeParserCtxt(ctxt); 10949 10950 return(ret); 10951} 10952 10953/** 10954 * xmlSAXParseDTD: 10955 * @sax: the SAX handler block 10956 * @ExternalID: a NAME* containing the External ID of the DTD 10957 * @SystemID: a NAME* containing the URL to the DTD 10958 * 10959 * Load and parse an external subset. 10960 * 10961 * Returns the resulting xmlDtdPtr or NULL in case of error. 10962 */ 10963 10964xmlDtdPtr 10965xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, 10966 const xmlChar *SystemID) { 10967 xmlDtdPtr ret = NULL; 10968 xmlParserCtxtPtr ctxt; 10969 xmlParserInputPtr input = NULL; 10970 xmlCharEncoding enc; 10971 xmlChar* systemIdCanonic; 10972 10973 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL); 10974 10975 ctxt = xmlNewParserCtxt(); 10976 if (ctxt == NULL) { 10977 return(NULL); 10978 } 10979 10980 /* 10981 * Set-up the SAX context 10982 */ 10983 if (sax != NULL) { 10984 if (ctxt->sax != NULL) 10985 xmlFree(ctxt->sax); 10986 ctxt->sax = sax; 10987 ctxt->userData = ctxt; 10988 } 10989 10990 /* 10991 * Canonicalise the system ID 10992 */ 10993 systemIdCanonic = xmlCanonicPath(SystemID); 10994 if ((SystemID != NULL) && (systemIdCanonic == NULL)) { 10995 xmlFreeParserCtxt(ctxt); 10996 return(NULL); 10997 } 10998 10999 /* 11000 * Ask the Entity resolver to load the damn thing 11001 */ 11002 11003 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL)) 11004 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, 11005 systemIdCanonic); 11006 if (input == NULL) { 11007 if (sax != NULL) ctxt->sax = NULL; 11008 xmlFreeParserCtxt(ctxt); 11009 if (systemIdCanonic != NULL) 11010 xmlFree(systemIdCanonic); 11011 return(NULL); 11012 } 11013 11014 /* 11015 * plug some encoding conversion routines here. 11016 */ 11017 xmlPushInput(ctxt, input); 11018 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 11019 enc = xmlDetectCharEncoding(ctxt->input->cur, 4); 11020 xmlSwitchEncoding(ctxt, enc); 11021 } 11022 11023 if (input->filename == NULL) 11024 input->filename = (char *) systemIdCanonic; 11025 else 11026 xmlFree(systemIdCanonic); 11027 input->line = 1; 11028 input->col = 1; 11029 input->base = ctxt->input->cur; 11030 input->cur = ctxt->input->cur; 11031 input->free = NULL; 11032 11033 /* 11034 * let's parse that entity knowing it's an external subset. 11035 */ 11036 ctxt->inSubset = 2; 11037 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); 11038 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", 11039 ExternalID, SystemID); 11040 xmlParseExternalSubset(ctxt, ExternalID, SystemID); 11041 11042 if (ctxt->myDoc != NULL) { 11043 if (ctxt->wellFormed) { 11044 ret = ctxt->myDoc->extSubset; 11045 ctxt->myDoc->extSubset = NULL; 11046 if (ret != NULL) { 11047 xmlNodePtr tmp; 11048 11049 ret->doc = NULL; 11050 tmp = ret->children; 11051 while (tmp != NULL) { 11052 tmp->doc = NULL; 11053 tmp = tmp->next; 11054 } 11055 } 11056 } else { 11057 ret = NULL; 11058 } 11059 xmlFreeDoc(ctxt->myDoc); 11060 ctxt->myDoc = NULL; 11061 } 11062 if (sax != NULL) ctxt->sax = NULL; 11063 xmlFreeParserCtxt(ctxt); 11064 11065 return(ret); 11066} 11067 11068 11069/** 11070 * xmlParseDTD: 11071 * @ExternalID: a NAME* containing the External ID of the DTD 11072 * @SystemID: a NAME* containing the URL to the DTD 11073 * 11074 * Load and parse an external subset. 11075 * 11076 * Returns the resulting xmlDtdPtr or NULL in case of error. 11077 */ 11078 11079xmlDtdPtr 11080xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) { 11081 return(xmlSAXParseDTD(NULL, ExternalID, SystemID)); 11082} 11083#endif /* LIBXML_VALID_ENABLED */ 11084 11085/************************************************************************ 11086 * * 11087 * Front ends when parsing an Entity * 11088 * * 11089 ************************************************************************/ 11090 11091/** 11092 * xmlParseCtxtExternalEntity: 11093 * @ctx: the existing parsing context 11094 * @URL: the URL for the entity to load 11095 * @ID: the System ID for the entity to load 11096 * @lst: the return value for the set of parsed nodes 11097 * 11098 * Parse an external general entity within an existing parsing context 11099 * An external general parsed entity is well-formed if it matches the 11100 * production labeled extParsedEnt. 11101 * 11102 * [78] extParsedEnt ::= TextDecl? content 11103 * 11104 * Returns 0 if the entity is well formed, -1 in case of args problem and 11105 * the parser error code otherwise 11106 */ 11107 11108int 11109xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, 11110 const xmlChar *ID, xmlNodePtr *lst) { 11111 xmlParserCtxtPtr ctxt; 11112 xmlDocPtr newDoc; 11113 xmlNodePtr newRoot; 11114 xmlSAXHandlerPtr oldsax = NULL; 11115 int ret = 0; 11116 xmlChar start[4]; 11117 xmlCharEncoding enc; 11118 xmlParserInputPtr inputStream; 11119 char *directory = NULL; 11120 11121 if (ctx == NULL) return(-1); 11122 11123 if (ctx->depth > 40) { 11124 return(XML_ERR_ENTITY_LOOP); 11125 } 11126 11127 if (lst != NULL) 11128 *lst = NULL; 11129 if ((URL == NULL) && (ID == NULL)) 11130 return(-1); 11131 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */ 11132 return(-1); 11133 11134 ctxt = xmlNewParserCtxt(); 11135 if (ctxt == NULL) { 11136 return(-1); 11137 } 11138 11139 ctxt->userData = ctxt; 11140 ctxt->_private = ctx->_private; 11141 11142 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); 11143 if (inputStream == NULL) { 11144 xmlFreeParserCtxt(ctxt); 11145 return(-1); 11146 } 11147 11148 inputPush(ctxt, inputStream); 11149 11150 if ((ctxt->directory == NULL) && (directory == NULL)) 11151 directory = xmlParserGetDirectory((char *)URL); 11152 if ((ctxt->directory == NULL) && (directory != NULL)) 11153 ctxt->directory = directory; 11154 11155 oldsax = ctxt->sax; 11156 ctxt->sax = ctx->sax; 11157 xmlDetectSAX2(ctxt); 11158 newDoc = xmlNewDoc(BAD_CAST "1.0"); 11159 if (newDoc == NULL) { 11160 xmlFreeParserCtxt(ctxt); 11161 return(-1); 11162 } 11163 if (ctx->myDoc->dict) { 11164 newDoc->dict = ctx->myDoc->dict; 11165 xmlDictReference(newDoc->dict); 11166 } 11167 if (ctx->myDoc != NULL) { 11168 newDoc->intSubset = ctx->myDoc->intSubset; 11169 newDoc->extSubset = ctx->myDoc->extSubset; 11170 } 11171 if (ctx->myDoc->URL != NULL) { 11172 newDoc->URL = xmlStrdup(ctx->myDoc->URL); 11173 } 11174 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 11175 if (newRoot == NULL) { 11176 ctxt->sax = oldsax; 11177 xmlFreeParserCtxt(ctxt); 11178 newDoc->intSubset = NULL; 11179 newDoc->extSubset = NULL; 11180 xmlFreeDoc(newDoc); 11181 return(-1); 11182 } 11183 xmlAddChild((xmlNodePtr) newDoc, newRoot); 11184 nodePush(ctxt, newDoc->children); 11185 if (ctx->myDoc == NULL) { 11186 ctxt->myDoc = newDoc; 11187 } else { 11188 ctxt->myDoc = ctx->myDoc; 11189 newDoc->children->doc = ctx->myDoc; 11190 } 11191 11192 /* 11193 * Get the 4 first bytes and decode the charset 11194 * if enc != XML_CHAR_ENCODING_NONE 11195 * plug some encoding conversion routines. 11196 */ 11197 GROW 11198 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 11199 start[0] = RAW; 11200 start[1] = NXT(1); 11201 start[2] = NXT(2); 11202 start[3] = NXT(3); 11203 enc = xmlDetectCharEncoding(start, 4); 11204 if (enc != XML_CHAR_ENCODING_NONE) { 11205 xmlSwitchEncoding(ctxt, enc); 11206 } 11207 } 11208 11209 /* 11210 * Parse a possible text declaration first 11211 */ 11212 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 11213 xmlParseTextDecl(ctxt); 11214 } 11215 11216 /* 11217 * Doing validity checking on chunk doesn't make sense 11218 */ 11219 ctxt->instate = XML_PARSER_CONTENT; 11220 ctxt->validate = ctx->validate; 11221 ctxt->valid = ctx->valid; 11222 ctxt->loadsubset = ctx->loadsubset; 11223 ctxt->depth = ctx->depth + 1; 11224 ctxt->replaceEntities = ctx->replaceEntities; 11225 if (ctxt->validate) { 11226 ctxt->vctxt.error = ctx->vctxt.error; 11227 ctxt->vctxt.warning = ctx->vctxt.warning; 11228 } else { 11229 ctxt->vctxt.error = NULL; 11230 ctxt->vctxt.warning = NULL; 11231 } 11232 ctxt->vctxt.nodeTab = NULL; 11233 ctxt->vctxt.nodeNr = 0; 11234 ctxt->vctxt.nodeMax = 0; 11235 ctxt->vctxt.node = NULL; 11236 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 11237 ctxt->dict = ctx->dict; 11238 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 11239 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 11240 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 11241 ctxt->dictNames = ctx->dictNames; 11242 ctxt->attsDefault = ctx->attsDefault; 11243 ctxt->attsSpecial = ctx->attsSpecial; 11244 ctxt->linenumbers = ctx->linenumbers; 11245 11246 xmlParseContent(ctxt); 11247 11248 ctx->validate = ctxt->validate; 11249 ctx->valid = ctxt->valid; 11250 if ((RAW == '<') && (NXT(1) == '/')) { 11251 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 11252 } else if (RAW != 0) { 11253 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 11254 } 11255 if (ctxt->node != newDoc->children) { 11256 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 11257 } 11258 11259 if (!ctxt->wellFormed) { 11260 if (ctxt->errNo == 0) 11261 ret = 1; 11262 else 11263 ret = ctxt->errNo; 11264 } else { 11265 if (lst != NULL) { 11266 xmlNodePtr cur; 11267 11268 /* 11269 * Return the newly created nodeset after unlinking it from 11270 * they pseudo parent. 11271 */ 11272 cur = newDoc->children->children; 11273 *lst = cur; 11274 while (cur != NULL) { 11275 cur->parent = NULL; 11276 cur = cur->next; 11277 } 11278 newDoc->children->children = NULL; 11279 } 11280 ret = 0; 11281 } 11282 ctxt->sax = oldsax; 11283 ctxt->dict = NULL; 11284 ctxt->attsDefault = NULL; 11285 ctxt->attsSpecial = NULL; 11286 xmlFreeParserCtxt(ctxt); 11287 newDoc->intSubset = NULL; 11288 newDoc->extSubset = NULL; 11289 xmlFreeDoc(newDoc); 11290 11291 return(ret); 11292} 11293 11294/** 11295 * xmlParseExternalEntityPrivate: 11296 * @doc: the document the chunk pertains to 11297 * @oldctxt: the previous parser context if available 11298 * @sax: the SAX handler bloc (possibly NULL) 11299 * @user_data: The user data returned on SAX callbacks (possibly NULL) 11300 * @depth: Used for loop detection, use 0 11301 * @URL: the URL for the entity to load 11302 * @ID: the System ID for the entity to load 11303 * @list: the return value for the set of parsed nodes 11304 * 11305 * Private version of xmlParseExternalEntity() 11306 * 11307 * Returns 0 if the entity is well formed, -1 in case of args problem and 11308 * the parser error code otherwise 11309 */ 11310 11311static xmlParserErrors 11312xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, 11313 xmlSAXHandlerPtr sax, 11314 void *user_data, int depth, const xmlChar *URL, 11315 const xmlChar *ID, xmlNodePtr *list) { 11316 xmlParserCtxtPtr ctxt; 11317 xmlDocPtr newDoc; 11318 xmlNodePtr newRoot; 11319 xmlSAXHandlerPtr oldsax = NULL; 11320 xmlParserErrors ret = XML_ERR_OK; 11321 xmlChar start[4]; 11322 xmlCharEncoding enc; 11323 11324 if (depth > 40) { 11325 return(XML_ERR_ENTITY_LOOP); 11326 } 11327 11328 11329 11330 if (list != NULL) 11331 *list = NULL; 11332 if ((URL == NULL) && (ID == NULL)) 11333 return(XML_ERR_INTERNAL_ERROR); 11334 if (doc == NULL) 11335 return(XML_ERR_INTERNAL_ERROR); 11336 11337 11338 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL); 11339 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 11340 ctxt->userData = ctxt; 11341 if (oldctxt != NULL) { 11342 ctxt->_private = oldctxt->_private; 11343 ctxt->loadsubset = oldctxt->loadsubset; 11344 ctxt->validate = oldctxt->validate; 11345 ctxt->external = oldctxt->external; 11346 ctxt->record_info = oldctxt->record_info; 11347 ctxt->node_seq.maximum = oldctxt->node_seq.maximum; 11348 ctxt->node_seq.length = oldctxt->node_seq.length; 11349 ctxt->node_seq.buffer = oldctxt->node_seq.buffer; 11350 } else { 11351 /* 11352 * Doing validity checking on chunk without context 11353 * doesn't make sense 11354 */ 11355 ctxt->_private = NULL; 11356 ctxt->validate = 0; 11357 ctxt->external = 2; 11358 ctxt->loadsubset = 0; 11359 } 11360 if (sax != NULL) { 11361 oldsax = ctxt->sax; 11362 ctxt->sax = sax; 11363 if (user_data != NULL) 11364 ctxt->userData = user_data; 11365 } 11366 xmlDetectSAX2(ctxt); 11367 newDoc = xmlNewDoc(BAD_CAST "1.0"); 11368 if (newDoc == NULL) { 11369 ctxt->node_seq.maximum = 0; 11370 ctxt->node_seq.length = 0; 11371 ctxt->node_seq.buffer = NULL; 11372 xmlFreeParserCtxt(ctxt); 11373 return(XML_ERR_INTERNAL_ERROR); 11374 } 11375 newDoc->intSubset = doc->intSubset; 11376 newDoc->extSubset = doc->extSubset; 11377 newDoc->dict = doc->dict; 11378 xmlDictReference(newDoc->dict); 11379 11380 if (doc->URL != NULL) { 11381 newDoc->URL = xmlStrdup(doc->URL); 11382 } 11383 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 11384 if (newRoot == NULL) { 11385 if (sax != NULL) 11386 ctxt->sax = oldsax; 11387 ctxt->node_seq.maximum = 0; 11388 ctxt->node_seq.length = 0; 11389 ctxt->node_seq.buffer = NULL; 11390 xmlFreeParserCtxt(ctxt); 11391 newDoc->intSubset = NULL; 11392 newDoc->extSubset = NULL; 11393 xmlFreeDoc(newDoc); 11394 return(XML_ERR_INTERNAL_ERROR); 11395 } 11396 xmlAddChild((xmlNodePtr) newDoc, newRoot); 11397 nodePush(ctxt, newDoc->children); 11398 ctxt->myDoc = doc; 11399 newRoot->doc = doc; 11400 11401 /* 11402 * Get the 4 first bytes and decode the charset 11403 * if enc != XML_CHAR_ENCODING_NONE 11404 * plug some encoding conversion routines. 11405 */ 11406 GROW; 11407 if ((ctxt->input->end - ctxt->input->cur) >= 4) { 11408 start[0] = RAW; 11409 start[1] = NXT(1); 11410 start[2] = NXT(2); 11411 start[3] = NXT(3); 11412 enc = xmlDetectCharEncoding(start, 4); 11413 if (enc != XML_CHAR_ENCODING_NONE) { 11414 xmlSwitchEncoding(ctxt, enc); 11415 } 11416 } 11417 11418 /* 11419 * Parse a possible text declaration first 11420 */ 11421 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) { 11422 xmlParseTextDecl(ctxt); 11423 } 11424 11425 ctxt->instate = XML_PARSER_CONTENT; 11426 ctxt->depth = depth; 11427 11428 xmlParseContent(ctxt); 11429 11430 if ((RAW == '<') && (NXT(1) == '/')) { 11431 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 11432 } else if (RAW != 0) { 11433 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 11434 } 11435 if (ctxt->node != newDoc->children) { 11436 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 11437 } 11438 11439 if (!ctxt->wellFormed) { 11440 if (ctxt->errNo == 0) 11441 ret = XML_ERR_INTERNAL_ERROR; 11442 else 11443 ret = (xmlParserErrors)ctxt->errNo; 11444 } else { 11445 if (list != NULL) { 11446 xmlNodePtr cur; 11447 11448 /* 11449 * Return the newly created nodeset after unlinking it from 11450 * they pseudo parent. 11451 */ 11452 cur = newDoc->children->children; 11453 *list = cur; 11454 while (cur != NULL) { 11455 cur->parent = NULL; 11456 cur = cur->next; 11457 } 11458 newDoc->children->children = NULL; 11459 } 11460 ret = XML_ERR_OK; 11461 } 11462 if (sax != NULL) 11463 ctxt->sax = oldsax; 11464 oldctxt->node_seq.maximum = ctxt->node_seq.maximum; 11465 oldctxt->node_seq.length = ctxt->node_seq.length; 11466 oldctxt->node_seq.buffer = ctxt->node_seq.buffer; 11467 ctxt->node_seq.maximum = 0; 11468 ctxt->node_seq.length = 0; 11469 ctxt->node_seq.buffer = NULL; 11470 xmlFreeParserCtxt(ctxt); 11471 newDoc->intSubset = NULL; 11472 newDoc->extSubset = NULL; 11473 xmlFreeDoc(newDoc); 11474 11475 return(ret); 11476} 11477 11478#ifdef LIBXML_SAX1_ENABLED 11479/** 11480 * xmlParseExternalEntity: 11481 * @doc: the document the chunk pertains to 11482 * @sax: the SAX handler bloc (possibly NULL) 11483 * @user_data: The user data returned on SAX callbacks (possibly NULL) 11484 * @depth: Used for loop detection, use 0 11485 * @URL: the URL for the entity to load 11486 * @ID: the System ID for the entity to load 11487 * @lst: the return value for the set of parsed nodes 11488 * 11489 * Parse an external general entity 11490 * An external general parsed entity is well-formed if it matches the 11491 * production labeled extParsedEnt. 11492 * 11493 * [78] extParsedEnt ::= TextDecl? content 11494 * 11495 * Returns 0 if the entity is well formed, -1 in case of args problem and 11496 * the parser error code otherwise 11497 */ 11498 11499int 11500xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, 11501 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) { 11502 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL, 11503 ID, lst)); 11504} 11505 11506/** 11507 * xmlParseBalancedChunkMemory: 11508 * @doc: the document the chunk pertains to 11509 * @sax: the SAX handler bloc (possibly NULL) 11510 * @user_data: The user data returned on SAX callbacks (possibly NULL) 11511 * @depth: Used for loop detection, use 0 11512 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 11513 * @lst: the return value for the set of parsed nodes 11514 * 11515 * Parse a well-balanced chunk of an XML document 11516 * called by the parser 11517 * The allowed sequence for the Well Balanced Chunk is the one defined by 11518 * the content production in the XML grammar: 11519 * 11520 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 11521 * 11522 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 11523 * the parser error code otherwise 11524 */ 11525 11526int 11527xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax, 11528 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) { 11529 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data, 11530 depth, string, lst, 0 ); 11531} 11532#endif /* LIBXML_SAX1_ENABLED */ 11533 11534/** 11535 * xmlParseBalancedChunkMemoryInternal: 11536 * @oldctxt: the existing parsing context 11537 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 11538 * @user_data: the user data field for the parser context 11539 * @lst: the return value for the set of parsed nodes 11540 * 11541 * 11542 * Parse a well-balanced chunk of an XML document 11543 * called by the parser 11544 * The allowed sequence for the Well Balanced Chunk is the one defined by 11545 * the content production in the XML grammar: 11546 * 11547 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 11548 * 11549 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 11550 * error code otherwise 11551 * 11552 * In case recover is set to 1, the nodelist will not be empty even if 11553 * the parsed chunk is not well balanced. 11554 */ 11555static xmlParserErrors 11556xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, 11557 const xmlChar *string, void *user_data, xmlNodePtr *lst) { 11558 xmlParserCtxtPtr ctxt; 11559 xmlDocPtr newDoc = NULL; 11560 xmlNodePtr newRoot; 11561 xmlSAXHandlerPtr oldsax = NULL; 11562 xmlNodePtr content = NULL; 11563 xmlNodePtr last = NULL; 11564 int size; 11565 xmlParserErrors ret = XML_ERR_OK; 11566 11567 if (oldctxt->depth > 40) { 11568 return(XML_ERR_ENTITY_LOOP); 11569 } 11570 11571 11572 if (lst != NULL) 11573 *lst = NULL; 11574 if (string == NULL) 11575 return(XML_ERR_INTERNAL_ERROR); 11576 11577 size = xmlStrlen(string); 11578 11579 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 11580 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY); 11581 if (user_data != NULL) 11582 ctxt->userData = user_data; 11583 else 11584 ctxt->userData = ctxt; 11585 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 11586 ctxt->dict = oldctxt->dict; 11587 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 11588 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 11589 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 11590 11591 oldsax = ctxt->sax; 11592 ctxt->sax = oldctxt->sax; 11593 xmlDetectSAX2(ctxt); 11594 ctxt->replaceEntities = oldctxt->replaceEntities; 11595 ctxt->options = oldctxt->options; 11596 11597 ctxt->_private = oldctxt->_private; 11598 if (oldctxt->myDoc == NULL) { 11599 newDoc = xmlNewDoc(BAD_CAST "1.0"); 11600 if (newDoc == NULL) { 11601 ctxt->sax = oldsax; 11602 ctxt->dict = NULL; 11603 xmlFreeParserCtxt(ctxt); 11604 return(XML_ERR_INTERNAL_ERROR); 11605 } 11606 newDoc->dict = ctxt->dict; 11607 xmlDictReference(newDoc->dict); 11608 ctxt->myDoc = newDoc; 11609 } else { 11610 ctxt->myDoc = oldctxt->myDoc; 11611 content = ctxt->myDoc->children; 11612 last = ctxt->myDoc->last; 11613 } 11614 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL); 11615 if (newRoot == NULL) { 11616 ctxt->sax = oldsax; 11617 ctxt->dict = NULL; 11618 xmlFreeParserCtxt(ctxt); 11619 if (newDoc != NULL) { 11620 xmlFreeDoc(newDoc); 11621 } 11622 return(XML_ERR_INTERNAL_ERROR); 11623 } 11624 ctxt->myDoc->children = NULL; 11625 ctxt->myDoc->last = NULL; 11626 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot); 11627 nodePush(ctxt, ctxt->myDoc->children); 11628 ctxt->instate = XML_PARSER_CONTENT; 11629 ctxt->depth = oldctxt->depth + 1; 11630 11631 ctxt->validate = 0; 11632 ctxt->loadsubset = oldctxt->loadsubset; 11633 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) { 11634 /* 11635 * ID/IDREF registration will be done in xmlValidateElement below 11636 */ 11637 ctxt->loadsubset |= XML_SKIP_IDS; 11638 } 11639 ctxt->dictNames = oldctxt->dictNames; 11640 ctxt->attsDefault = oldctxt->attsDefault; 11641 ctxt->attsSpecial = oldctxt->attsSpecial; 11642 11643 xmlParseContent(ctxt); 11644 if ((RAW == '<') && (NXT(1) == '/')) { 11645 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 11646 } else if (RAW != 0) { 11647 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 11648 } 11649 if (ctxt->node != ctxt->myDoc->children) { 11650 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 11651 } 11652 11653 if (!ctxt->wellFormed) { 11654 if (ctxt->errNo == 0) 11655 ret = XML_ERR_INTERNAL_ERROR; 11656 else 11657 ret = (xmlParserErrors)ctxt->errNo; 11658 } else { 11659 ret = XML_ERR_OK; 11660 } 11661 11662 if ((lst != NULL) && (ret == XML_ERR_OK)) { 11663 xmlNodePtr cur; 11664 11665 /* 11666 * Return the newly created nodeset after unlinking it from 11667 * they pseudo parent. 11668 */ 11669 cur = ctxt->myDoc->children->children; 11670 *lst = cur; 11671 while (cur != NULL) { 11672#ifdef LIBXML_VALID_ENABLED 11673 if ((oldctxt->validate) && (oldctxt->wellFormed) && 11674 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) && 11675 (cur->type == XML_ELEMENT_NODE)) { 11676 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt, 11677 oldctxt->myDoc, cur); 11678 } 11679#endif /* LIBXML_VALID_ENABLED */ 11680 cur->parent = NULL; 11681 cur = cur->next; 11682 } 11683 ctxt->myDoc->children->children = NULL; 11684 } 11685 if (ctxt->myDoc != NULL) { 11686 xmlFreeNode(ctxt->myDoc->children); 11687 ctxt->myDoc->children = content; 11688 ctxt->myDoc->last = last; 11689 } 11690 11691 ctxt->sax = oldsax; 11692 ctxt->dict = NULL; 11693 ctxt->attsDefault = NULL; 11694 ctxt->attsSpecial = NULL; 11695 xmlFreeParserCtxt(ctxt); 11696 if (newDoc != NULL) { 11697 xmlFreeDoc(newDoc); 11698 } 11699 11700 return(ret); 11701} 11702 11703/** 11704 * xmlParseInNodeContext: 11705 * @node: the context node 11706 * @data: the input string 11707 * @datalen: the input string length in bytes 11708 * @options: a combination of xmlParserOption 11709 * @lst: the return value for the set of parsed nodes 11710 * 11711 * Parse a well-balanced chunk of an XML document 11712 * within the context (DTD, namespaces, etc ...) of the given node. 11713 * 11714 * The allowed sequence for the data is a Well Balanced Chunk defined by 11715 * the content production in the XML grammar: 11716 * 11717 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 11718 * 11719 * Returns XML_ERR_OK if the chunk is well balanced, and the parser 11720 * error code otherwise 11721 */ 11722xmlParserErrors 11723xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, 11724 int options, xmlNodePtr *lst) { 11725#ifdef SAX2 11726 xmlParserCtxtPtr ctxt; 11727 xmlDocPtr doc = NULL; 11728 xmlNodePtr fake, cur; 11729 int nsnr = 0; 11730 11731 xmlParserErrors ret = XML_ERR_OK; 11732 11733 /* 11734 * check all input parameters, grab the document 11735 */ 11736 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0)) 11737 return(XML_ERR_INTERNAL_ERROR); 11738 switch (node->type) { 11739 case XML_ELEMENT_NODE: 11740 case XML_ATTRIBUTE_NODE: 11741 case XML_TEXT_NODE: 11742 case XML_CDATA_SECTION_NODE: 11743 case XML_ENTITY_REF_NODE: 11744 case XML_PI_NODE: 11745 case XML_COMMENT_NODE: 11746 case XML_DOCUMENT_NODE: 11747 case XML_HTML_DOCUMENT_NODE: 11748 break; 11749 default: 11750 return(XML_ERR_INTERNAL_ERROR); 11751 11752 } 11753 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) && 11754 (node->type != XML_DOCUMENT_NODE) && 11755 (node->type != XML_HTML_DOCUMENT_NODE)) 11756 node = node->parent; 11757 if (node == NULL) 11758 return(XML_ERR_INTERNAL_ERROR); 11759 if (node->type == XML_ELEMENT_NODE) 11760 doc = node->doc; 11761 else 11762 doc = (xmlDocPtr) node; 11763 if (doc == NULL) 11764 return(XML_ERR_INTERNAL_ERROR); 11765 11766 /* 11767 * allocate a context and set-up everything not related to the 11768 * node position in the tree 11769 */ 11770 if (doc->type == XML_DOCUMENT_NODE) 11771 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen); 11772#ifdef LIBXML_HTML_ENABLED 11773 else if (doc->type == XML_HTML_DOCUMENT_NODE) 11774 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen); 11775#endif 11776 else 11777 return(XML_ERR_INTERNAL_ERROR); 11778 11779 if (ctxt == NULL) 11780 return(XML_ERR_NO_MEMORY); 11781 fake = xmlNewComment(NULL); 11782 if (fake == NULL) { 11783 xmlFreeParserCtxt(ctxt); 11784 return(XML_ERR_NO_MEMORY); 11785 } 11786 xmlAddChild(node, fake); 11787 11788 /* 11789 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set. 11790 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict 11791 * we must wait until the last moment to free the original one. 11792 */ 11793 if (doc->dict != NULL) { 11794 if (ctxt->dict != NULL) 11795 xmlDictFree(ctxt->dict); 11796 ctxt->dict = doc->dict; 11797 } else 11798 options |= XML_PARSE_NODICT; 11799 11800 xmlCtxtUseOptions(ctxt, options); 11801 xmlDetectSAX2(ctxt); 11802 ctxt->myDoc = doc; 11803 11804 if (node->type == XML_ELEMENT_NODE) { 11805 nodePush(ctxt, node); 11806 /* 11807 * initialize the SAX2 namespaces stack 11808 */ 11809 cur = node; 11810 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) { 11811 xmlNsPtr ns = cur->nsDef; 11812 const xmlChar *iprefix, *ihref; 11813 11814 while (ns != NULL) { 11815 if (ctxt->dict) { 11816 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1); 11817 ihref = xmlDictLookup(ctxt->dict, ns->href, -1); 11818 } else { 11819 iprefix = ns->prefix; 11820 ihref = ns->href; 11821 } 11822 11823 if (xmlGetNamespace(ctxt, iprefix) == NULL) { 11824 nsPush(ctxt, iprefix, ihref); 11825 nsnr++; 11826 } 11827 ns = ns->next; 11828 } 11829 cur = cur->parent; 11830 } 11831 ctxt->instate = XML_PARSER_CONTENT; 11832 } 11833 11834 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) { 11835 /* 11836 * ID/IDREF registration will be done in xmlValidateElement below 11837 */ 11838 ctxt->loadsubset |= XML_SKIP_IDS; 11839 } 11840 11841#ifdef LIBXML_HTML_ENABLED 11842 if (doc->type == XML_HTML_DOCUMENT_NODE) 11843 __htmlParseContent(ctxt); 11844 else 11845#endif 11846 xmlParseContent(ctxt); 11847 11848 nsPop(ctxt, nsnr); 11849 if ((RAW == '<') && (NXT(1) == '/')) { 11850 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 11851 } else if (RAW != 0) { 11852 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 11853 } 11854 if ((ctxt->node != NULL) && (ctxt->node != node)) { 11855 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 11856 ctxt->wellFormed = 0; 11857 } 11858 11859 if (!ctxt->wellFormed) { 11860 if (ctxt->errNo == 0) 11861 ret = XML_ERR_INTERNAL_ERROR; 11862 else 11863 ret = (xmlParserErrors)ctxt->errNo; 11864 } else { 11865 ret = XML_ERR_OK; 11866 } 11867 11868 /* 11869 * Return the newly created nodeset after unlinking it from 11870 * the pseudo sibling. 11871 */ 11872 11873 cur = fake->next; 11874 fake->next = NULL; 11875 node->last = fake; 11876 11877 if (cur != NULL) { 11878 cur->prev = NULL; 11879 } 11880 11881 *lst = cur; 11882 11883 while (cur != NULL) { 11884 cur->parent = NULL; 11885 cur = cur->next; 11886 } 11887 11888 xmlUnlinkNode(fake); 11889 xmlFreeNode(fake); 11890 11891 11892 if (ret != XML_ERR_OK) { 11893 xmlFreeNodeList(*lst); 11894 *lst = NULL; 11895 } 11896 11897 if (doc->dict != NULL) 11898 ctxt->dict = NULL; 11899 xmlFreeParserCtxt(ctxt); 11900 11901 return(ret); 11902#else /* !SAX2 */ 11903 return(XML_ERR_INTERNAL_ERROR); 11904#endif 11905} 11906 11907#ifdef LIBXML_SAX1_ENABLED 11908/** 11909 * xmlParseBalancedChunkMemoryRecover: 11910 * @doc: the document the chunk pertains to 11911 * @sax: the SAX handler bloc (possibly NULL) 11912 * @user_data: The user data returned on SAX callbacks (possibly NULL) 11913 * @depth: Used for loop detection, use 0 11914 * @string: the input string in UTF8 or ISO-Latin (zero terminated) 11915 * @lst: the return value for the set of parsed nodes 11916 * @recover: return nodes even if the data is broken (use 0) 11917 * 11918 * 11919 * Parse a well-balanced chunk of an XML document 11920 * called by the parser 11921 * The allowed sequence for the Well Balanced Chunk is the one defined by 11922 * the content production in the XML grammar: 11923 * 11924 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 11925 * 11926 * Returns 0 if the chunk is well balanced, -1 in case of args problem and 11927 * the parser error code otherwise 11928 * 11929 * In case recover is set to 1, the nodelist will not be empty even if 11930 * the parsed chunk is not well balanced. 11931 */ 11932int 11933xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, 11934 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst, 11935 int recover) { 11936 xmlParserCtxtPtr ctxt; 11937 xmlDocPtr newDoc; 11938 xmlSAXHandlerPtr oldsax = NULL; 11939 xmlNodePtr content, newRoot; 11940 int size; 11941 int ret = 0; 11942 11943 if (depth > 40) { 11944 return(XML_ERR_ENTITY_LOOP); 11945 } 11946 11947 11948 if (lst != NULL) 11949 *lst = NULL; 11950 if (string == NULL) 11951 return(-1); 11952 11953 size = xmlStrlen(string); 11954 11955 ctxt = xmlCreateMemoryParserCtxt((char *) string, size); 11956 if (ctxt == NULL) return(-1); 11957 ctxt->userData = ctxt; 11958 if (sax != NULL) { 11959 oldsax = ctxt->sax; 11960 ctxt->sax = sax; 11961 if (user_data != NULL) 11962 ctxt->userData = user_data; 11963 } 11964 newDoc = xmlNewDoc(BAD_CAST "1.0"); 11965 if (newDoc == NULL) { 11966 xmlFreeParserCtxt(ctxt); 11967 return(-1); 11968 } 11969 if ((doc != NULL) && (doc->dict != NULL)) { 11970 xmlDictFree(ctxt->dict); 11971 ctxt->dict = doc->dict; 11972 xmlDictReference(ctxt->dict); 11973 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); 11974 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); 11975 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); 11976 ctxt->dictNames = 1; 11977 } else { 11978 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT); 11979 } 11980 if (doc != NULL) { 11981 newDoc->intSubset = doc->intSubset; 11982 newDoc->extSubset = doc->extSubset; 11983 } 11984 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); 11985 if (newRoot == NULL) { 11986 if (sax != NULL) 11987 ctxt->sax = oldsax; 11988 xmlFreeParserCtxt(ctxt); 11989 newDoc->intSubset = NULL; 11990 newDoc->extSubset = NULL; 11991 xmlFreeDoc(newDoc); 11992 return(-1); 11993 } 11994 xmlAddChild((xmlNodePtr) newDoc, newRoot); 11995 nodePush(ctxt, newRoot); 11996 if (doc == NULL) { 11997 ctxt->myDoc = newDoc; 11998 } else { 11999 ctxt->myDoc = newDoc; 12000 newDoc->children->doc = doc; 12001 /* Ensure that doc has XML spec namespace */ 12002 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE); 12003 newDoc->oldNs = doc->oldNs; 12004 } 12005 ctxt->instate = XML_PARSER_CONTENT; 12006 ctxt->depth = depth; 12007 12008 /* 12009 * Doing validity checking on chunk doesn't make sense 12010 */ 12011 ctxt->validate = 0; 12012 ctxt->loadsubset = 0; 12013 xmlDetectSAX2(ctxt); 12014 12015 if ( doc != NULL ){ 12016 content = doc->children; 12017 doc->children = NULL; 12018 xmlParseContent(ctxt); 12019 doc->children = content; 12020 } 12021 else { 12022 xmlParseContent(ctxt); 12023 } 12024 if ((RAW == '<') && (NXT(1) == '/')) { 12025 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12026 } else if (RAW != 0) { 12027 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL); 12028 } 12029 if (ctxt->node != newDoc->children) { 12030 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL); 12031 } 12032 12033 if (!ctxt->wellFormed) { 12034 if (ctxt->errNo == 0) 12035 ret = 1; 12036 else 12037 ret = ctxt->errNo; 12038 } else { 12039 ret = 0; 12040 } 12041 12042 if ((lst != NULL) && ((ret == 0) || (recover == 1))) { 12043 xmlNodePtr cur; 12044 12045 /* 12046 * Return the newly created nodeset after unlinking it from 12047 * they pseudo parent. 12048 */ 12049 cur = newDoc->children->children; 12050 *lst = cur; 12051 while (cur != NULL) { 12052 xmlSetTreeDoc(cur, doc); 12053 cur->parent = NULL; 12054 cur = cur->next; 12055 } 12056 newDoc->children->children = NULL; 12057 } 12058 12059 if (sax != NULL) 12060 ctxt->sax = oldsax; 12061 xmlFreeParserCtxt(ctxt); 12062 newDoc->intSubset = NULL; 12063 newDoc->extSubset = NULL; 12064 newDoc->oldNs = NULL; 12065 xmlFreeDoc(newDoc); 12066 12067 return(ret); 12068} 12069 12070/** 12071 * xmlSAXParseEntity: 12072 * @sax: the SAX handler block 12073 * @filename: the filename 12074 * 12075 * parse an XML external entity out of context and build a tree. 12076 * It use the given SAX function block to handle the parsing callback. 12077 * If sax is NULL, fallback to the default DOM tree building routines. 12078 * 12079 * [78] extParsedEnt ::= TextDecl? content 12080 * 12081 * This correspond to a "Well Balanced" chunk 12082 * 12083 * Returns the resulting document tree 12084 */ 12085 12086xmlDocPtr 12087xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) { 12088 xmlDocPtr ret; 12089 xmlParserCtxtPtr ctxt; 12090 12091 ctxt = xmlCreateFileParserCtxt(filename); 12092 if (ctxt == NULL) { 12093 return(NULL); 12094 } 12095 if (sax != NULL) { 12096 if (ctxt->sax != NULL) 12097 xmlFree(ctxt->sax); 12098 ctxt->sax = sax; 12099 ctxt->userData = NULL; 12100 } 12101 12102 xmlParseExtParsedEnt(ctxt); 12103 12104 if (ctxt->wellFormed) 12105 ret = ctxt->myDoc; 12106 else { 12107 ret = NULL; 12108 xmlFreeDoc(ctxt->myDoc); 12109 ctxt->myDoc = NULL; 12110 } 12111 if (sax != NULL) 12112 ctxt->sax = NULL; 12113 xmlFreeParserCtxt(ctxt); 12114 12115 return(ret); 12116} 12117 12118/** 12119 * xmlParseEntity: 12120 * @filename: the filename 12121 * 12122 * parse an XML external entity out of context and build a tree. 12123 * 12124 * [78] extParsedEnt ::= TextDecl? content 12125 * 12126 * This correspond to a "Well Balanced" chunk 12127 * 12128 * Returns the resulting document tree 12129 */ 12130 12131xmlDocPtr 12132xmlParseEntity(const char *filename) { 12133 return(xmlSAXParseEntity(NULL, filename)); 12134} 12135#endif /* LIBXML_SAX1_ENABLED */ 12136 12137/** 12138 * xmlCreateEntityParserCtxt: 12139 * @URL: the entity URL 12140 * @ID: the entity PUBLIC ID 12141 * @base: a possible base for the target URI 12142 * 12143 * Create a parser context for an external entity 12144 * Automatic support for ZLIB/Compress compressed document is provided 12145 * by default if found at compile-time. 12146 * 12147 * Returns the new parser context or NULL 12148 */ 12149xmlParserCtxtPtr 12150xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, 12151 const xmlChar *base) { 12152 xmlParserCtxtPtr ctxt; 12153 xmlParserInputPtr inputStream; 12154 char *directory = NULL; 12155 xmlChar *uri; 12156 12157 ctxt = xmlNewParserCtxt(); 12158 if (ctxt == NULL) { 12159 return(NULL); 12160 } 12161 12162 uri = xmlBuildURI(URL, base); 12163 12164 if (uri == NULL) { 12165 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); 12166 if (inputStream == NULL) { 12167 xmlFreeParserCtxt(ctxt); 12168 return(NULL); 12169 } 12170 12171 inputPush(ctxt, inputStream); 12172 12173 if ((ctxt->directory == NULL) && (directory == NULL)) 12174 directory = xmlParserGetDirectory((char *)URL); 12175 if ((ctxt->directory == NULL) && (directory != NULL)) 12176 ctxt->directory = directory; 12177 } else { 12178 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt); 12179 if (inputStream == NULL) { 12180 xmlFree(uri); 12181 xmlFreeParserCtxt(ctxt); 12182 return(NULL); 12183 } 12184 12185 inputPush(ctxt, inputStream); 12186 12187 if ((ctxt->directory == NULL) && (directory == NULL)) 12188 directory = xmlParserGetDirectory((char *)uri); 12189 if ((ctxt->directory == NULL) && (directory != NULL)) 12190 ctxt->directory = directory; 12191 xmlFree(uri); 12192 } 12193 return(ctxt); 12194} 12195 12196/************************************************************************ 12197 * * 12198 * Front ends when parsing from a file * 12199 * * 12200 ************************************************************************/ 12201 12202/** 12203 * xmlCreateURLParserCtxt: 12204 * @filename: the filename or URL 12205 * @options: a combination of xmlParserOption 12206 * 12207 * Create a parser context for a file or URL content. 12208 * Automatic support for ZLIB/Compress compressed document is provided 12209 * by default if found at compile-time and for file accesses 12210 * 12211 * Returns the new parser context or NULL 12212 */ 12213xmlParserCtxtPtr 12214xmlCreateURLParserCtxt(const char *filename, int options) 12215{ 12216 xmlParserCtxtPtr ctxt; 12217 xmlParserInputPtr inputStream; 12218 char *directory = NULL; 12219 12220 ctxt = xmlNewParserCtxt(); 12221 if (ctxt == NULL) { 12222 xmlErrMemory(NULL, "cannot allocate parser context"); 12223 return(NULL); 12224 } 12225 12226 if (options) 12227 xmlCtxtUseOptions(ctxt, options); 12228 ctxt->linenumbers = 1; 12229 12230 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt); 12231 if (inputStream == NULL) { 12232 xmlFreeParserCtxt(ctxt); 12233 return(NULL); 12234 } 12235 12236 inputPush(ctxt, inputStream); 12237 if ((ctxt->directory == NULL) && (directory == NULL)) 12238 directory = xmlParserGetDirectory(filename); 12239 if ((ctxt->directory == NULL) && (directory != NULL)) 12240 ctxt->directory = directory; 12241 12242 return(ctxt); 12243} 12244 12245/** 12246 * xmlCreateFileParserCtxt: 12247 * @filename: the filename 12248 * 12249 * Create a parser context for a file content. 12250 * Automatic support for ZLIB/Compress compressed document is provided 12251 * by default if found at compile-time. 12252 * 12253 * Returns the new parser context or NULL 12254 */ 12255xmlParserCtxtPtr 12256xmlCreateFileParserCtxt(const char *filename) 12257{ 12258 return(xmlCreateURLParserCtxt(filename, 0)); 12259} 12260 12261#ifdef LIBXML_SAX1_ENABLED 12262/** 12263 * xmlSAXParseFileWithData: 12264 * @sax: the SAX handler block 12265 * @filename: the filename 12266 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 12267 * documents 12268 * @data: the userdata 12269 * 12270 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 12271 * compressed document is provided by default if found at compile-time. 12272 * It use the given SAX function block to handle the parsing callback. 12273 * If sax is NULL, fallback to the default DOM tree building routines. 12274 * 12275 * User data (void *) is stored within the parser context in the 12276 * context's _private member, so it is available nearly everywhere in libxml 12277 * 12278 * Returns the resulting document tree 12279 */ 12280 12281xmlDocPtr 12282xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename, 12283 int recovery, void *data) { 12284 xmlDocPtr ret; 12285 xmlParserCtxtPtr ctxt; 12286 char *directory = NULL; 12287 12288 xmlInitParser(); 12289 12290 ctxt = xmlCreateFileParserCtxt(filename); 12291 if (ctxt == NULL) { 12292 return(NULL); 12293 } 12294 if (sax != NULL) { 12295 if (ctxt->sax != NULL) 12296 xmlFree(ctxt->sax); 12297 ctxt->sax = sax; 12298 } 12299 xmlDetectSAX2(ctxt); 12300 if (data!=NULL) { 12301 ctxt->_private = data; 12302 } 12303 12304 if ((ctxt->directory == NULL) && (directory == NULL)) 12305 directory = xmlParserGetDirectory(filename); 12306 if ((ctxt->directory == NULL) && (directory != NULL)) 12307 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory); 12308 12309 ctxt->recovery = recovery; 12310 12311 xmlParseDocument(ctxt); 12312 12313 if ((ctxt->wellFormed) || recovery) { 12314 ret = ctxt->myDoc; 12315 if (ret != NULL) { 12316 if (ctxt->input->buf->compressed > 0) 12317 ret->compression = 9; 12318 else 12319 ret->compression = ctxt->input->buf->compressed; 12320 } 12321 } 12322 else { 12323 ret = NULL; 12324 xmlFreeDoc(ctxt->myDoc); 12325 ctxt->myDoc = NULL; 12326 } 12327 if (sax != NULL) 12328 ctxt->sax = NULL; 12329 xmlFreeParserCtxt(ctxt); 12330 12331 return(ret); 12332} 12333 12334/** 12335 * xmlSAXParseFile: 12336 * @sax: the SAX handler block 12337 * @filename: the filename 12338 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 12339 * documents 12340 * 12341 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 12342 * compressed document is provided by default if found at compile-time. 12343 * It use the given SAX function block to handle the parsing callback. 12344 * If sax is NULL, fallback to the default DOM tree building routines. 12345 * 12346 * Returns the resulting document tree 12347 */ 12348 12349xmlDocPtr 12350xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename, 12351 int recovery) { 12352 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL)); 12353} 12354 12355/** 12356 * xmlRecoverDoc: 12357 * @cur: a pointer to an array of xmlChar 12358 * 12359 * parse an XML in-memory document and build a tree. 12360 * In the case the document is not Well Formed, a tree is built anyway 12361 * 12362 * Returns the resulting document tree 12363 */ 12364 12365xmlDocPtr 12366xmlRecoverDoc(xmlChar *cur) { 12367 return(xmlSAXParseDoc(NULL, cur, 1)); 12368} 12369 12370/** 12371 * xmlParseFile: 12372 * @filename: the filename 12373 * 12374 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 12375 * compressed document is provided by default if found at compile-time. 12376 * 12377 * Returns the resulting document tree if the file was wellformed, 12378 * NULL otherwise. 12379 */ 12380 12381xmlDocPtr 12382xmlParseFile(const char *filename) { 12383 return(xmlSAXParseFile(NULL, filename, 0)); 12384} 12385 12386/** 12387 * xmlRecoverFile: 12388 * @filename: the filename 12389 * 12390 * parse an XML file and build a tree. Automatic support for ZLIB/Compress 12391 * compressed document is provided by default if found at compile-time. 12392 * In the case the document is not Well Formed, a tree is built anyway 12393 * 12394 * Returns the resulting document tree 12395 */ 12396 12397xmlDocPtr 12398xmlRecoverFile(const char *filename) { 12399 return(xmlSAXParseFile(NULL, filename, 1)); 12400} 12401 12402 12403/** 12404 * xmlSetupParserForBuffer: 12405 * @ctxt: an XML parser context 12406 * @buffer: a xmlChar * buffer 12407 * @filename: a file name 12408 * 12409 * Setup the parser context to parse a new buffer; Clears any prior 12410 * contents from the parser context. The buffer parameter must not be 12411 * NULL, but the filename parameter can be 12412 */ 12413void 12414xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer, 12415 const char* filename) 12416{ 12417 xmlParserInputPtr input; 12418 12419 if ((ctxt == NULL) || (buffer == NULL)) 12420 return; 12421 12422 input = xmlNewInputStream(ctxt); 12423 if (input == NULL) { 12424 xmlErrMemory(NULL, "parsing new buffer: out of memory\n"); 12425 xmlClearParserCtxt(ctxt); 12426 return; 12427 } 12428 12429 xmlClearParserCtxt(ctxt); 12430 if (filename != NULL) 12431 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename); 12432 input->base = buffer; 12433 input->cur = buffer; 12434 input->end = &buffer[xmlStrlen(buffer)]; 12435 inputPush(ctxt, input); 12436} 12437 12438/** 12439 * xmlSAXUserParseFile: 12440 * @sax: a SAX handler 12441 * @user_data: The user data returned on SAX callbacks 12442 * @filename: a file name 12443 * 12444 * parse an XML file and call the given SAX handler routines. 12445 * Automatic support for ZLIB/Compress compressed document is provided 12446 * 12447 * Returns 0 in case of success or a error number otherwise 12448 */ 12449int 12450xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, 12451 const char *filename) { 12452 int ret = 0; 12453 xmlParserCtxtPtr ctxt; 12454 12455 ctxt = xmlCreateFileParserCtxt(filename); 12456 if (ctxt == NULL) return -1; 12457#ifdef LIBXML_SAX1_ENABLED 12458 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) 12459#endif /* LIBXML_SAX1_ENABLED */ 12460 xmlFree(ctxt->sax); 12461 ctxt->sax = sax; 12462 xmlDetectSAX2(ctxt); 12463 12464 if (user_data != NULL) 12465 ctxt->userData = user_data; 12466 12467 xmlParseDocument(ctxt); 12468 12469 if (ctxt->wellFormed) 12470 ret = 0; 12471 else { 12472 if (ctxt->errNo != 0) 12473 ret = ctxt->errNo; 12474 else 12475 ret = -1; 12476 } 12477 if (sax != NULL) 12478 ctxt->sax = NULL; 12479 if (ctxt->myDoc != NULL) { 12480 xmlFreeDoc(ctxt->myDoc); 12481 ctxt->myDoc = NULL; 12482 } 12483 xmlFreeParserCtxt(ctxt); 12484 12485 return ret; 12486} 12487#endif /* LIBXML_SAX1_ENABLED */ 12488 12489/************************************************************************ 12490 * * 12491 * Front ends when parsing from memory * 12492 * * 12493 ************************************************************************/ 12494 12495/** 12496 * xmlCreateMemoryParserCtxt: 12497 * @buffer: a pointer to a char array 12498 * @size: the size of the array 12499 * 12500 * Create a parser context for an XML in-memory document. 12501 * 12502 * Returns the new parser context or NULL 12503 */ 12504xmlParserCtxtPtr 12505xmlCreateMemoryParserCtxt(const char *buffer, int size) { 12506 xmlParserCtxtPtr ctxt; 12507 xmlParserInputPtr input; 12508 xmlParserInputBufferPtr buf; 12509 12510 if (buffer == NULL) 12511 return(NULL); 12512 if (size <= 0) 12513 return(NULL); 12514 12515 ctxt = xmlNewParserCtxt(); 12516 if (ctxt == NULL) 12517 return(NULL); 12518 12519 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */ 12520 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 12521 if (buf == NULL) { 12522 xmlFreeParserCtxt(ctxt); 12523 return(NULL); 12524 } 12525 12526 input = xmlNewInputStream(ctxt); 12527 if (input == NULL) { 12528 xmlFreeParserInputBuffer(buf); 12529 xmlFreeParserCtxt(ctxt); 12530 return(NULL); 12531 } 12532 12533 input->filename = NULL; 12534 input->buf = buf; 12535 input->base = input->buf->buffer->content; 12536 input->cur = input->buf->buffer->content; 12537 input->end = &input->buf->buffer->content[input->buf->buffer->use]; 12538 12539 inputPush(ctxt, input); 12540 return(ctxt); 12541} 12542 12543#ifdef LIBXML_SAX1_ENABLED 12544/** 12545 * xmlSAXParseMemoryWithData: 12546 * @sax: the SAX handler block 12547 * @buffer: an pointer to a char array 12548 * @size: the size of the array 12549 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 12550 * documents 12551 * @data: the userdata 12552 * 12553 * parse an XML in-memory block and use the given SAX function block 12554 * to handle the parsing callback. If sax is NULL, fallback to the default 12555 * DOM tree building routines. 12556 * 12557 * User data (void *) is stored within the parser context in the 12558 * context's _private member, so it is available nearly everywhere in libxml 12559 * 12560 * Returns the resulting document tree 12561 */ 12562 12563xmlDocPtr 12564xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer, 12565 int size, int recovery, void *data) { 12566 xmlDocPtr ret; 12567 xmlParserCtxtPtr ctxt; 12568 12569 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 12570 if (ctxt == NULL) return(NULL); 12571 if (sax != NULL) { 12572 if (ctxt->sax != NULL) 12573 xmlFree(ctxt->sax); 12574 ctxt->sax = sax; 12575 } 12576 xmlDetectSAX2(ctxt); 12577 if (data!=NULL) { 12578 ctxt->_private=data; 12579 } 12580 12581 ctxt->recovery = recovery; 12582 12583 xmlParseDocument(ctxt); 12584 12585 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 12586 else { 12587 ret = NULL; 12588 xmlFreeDoc(ctxt->myDoc); 12589 ctxt->myDoc = NULL; 12590 } 12591 if (sax != NULL) 12592 ctxt->sax = NULL; 12593 xmlFreeParserCtxt(ctxt); 12594 12595 return(ret); 12596} 12597 12598/** 12599 * xmlSAXParseMemory: 12600 * @sax: the SAX handler block 12601 * @buffer: an pointer to a char array 12602 * @size: the size of the array 12603 * @recovery: work in recovery mode, i.e. tries to read not Well Formed 12604 * documents 12605 * 12606 * parse an XML in-memory block and use the given SAX function block 12607 * to handle the parsing callback. If sax is NULL, fallback to the default 12608 * DOM tree building routines. 12609 * 12610 * Returns the resulting document tree 12611 */ 12612xmlDocPtr 12613xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer, 12614 int size, int recovery) { 12615 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL); 12616} 12617 12618/** 12619 * xmlParseMemory: 12620 * @buffer: an pointer to a char array 12621 * @size: the size of the array 12622 * 12623 * parse an XML in-memory block and build a tree. 12624 * 12625 * Returns the resulting document tree 12626 */ 12627 12628xmlDocPtr xmlParseMemory(const char *buffer, int size) { 12629 return(xmlSAXParseMemory(NULL, buffer, size, 0)); 12630} 12631 12632/** 12633 * xmlRecoverMemory: 12634 * @buffer: an pointer to a char array 12635 * @size: the size of the array 12636 * 12637 * parse an XML in-memory block and build a tree. 12638 * In the case the document is not Well Formed, a tree is built anyway 12639 * 12640 * Returns the resulting document tree 12641 */ 12642 12643xmlDocPtr xmlRecoverMemory(const char *buffer, int size) { 12644 return(xmlSAXParseMemory(NULL, buffer, size, 1)); 12645} 12646 12647/** 12648 * xmlSAXUserParseMemory: 12649 * @sax: a SAX handler 12650 * @user_data: The user data returned on SAX callbacks 12651 * @buffer: an in-memory XML document input 12652 * @size: the length of the XML document in bytes 12653 * 12654 * A better SAX parsing routine. 12655 * parse an XML in-memory buffer and call the given SAX handler routines. 12656 * 12657 * Returns 0 in case of success or a error number otherwise 12658 */ 12659int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, 12660 const char *buffer, int size) { 12661 int ret = 0; 12662 xmlParserCtxtPtr ctxt; 12663 xmlSAXHandlerPtr oldsax = NULL; 12664 12665 if (sax == NULL) return -1; 12666 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 12667 if (ctxt == NULL) return -1; 12668 oldsax = ctxt->sax; 12669 ctxt->sax = sax; 12670 xmlDetectSAX2(ctxt); 12671 if (user_data != NULL) 12672 ctxt->userData = user_data; 12673 12674 xmlParseDocument(ctxt); 12675 12676 if (ctxt->wellFormed) 12677 ret = 0; 12678 else { 12679 if (ctxt->errNo != 0) 12680 ret = ctxt->errNo; 12681 else 12682 ret = -1; 12683 } 12684 ctxt->sax = oldsax; 12685 if (ctxt->myDoc != NULL) { 12686 xmlFreeDoc(ctxt->myDoc); 12687 ctxt->myDoc = NULL; 12688 } 12689 xmlFreeParserCtxt(ctxt); 12690 12691 return ret; 12692} 12693#endif /* LIBXML_SAX1_ENABLED */ 12694 12695/** 12696 * xmlCreateDocParserCtxt: 12697 * @cur: a pointer to an array of xmlChar 12698 * 12699 * Creates a parser context for an XML in-memory document. 12700 * 12701 * Returns the new parser context or NULL 12702 */ 12703xmlParserCtxtPtr 12704xmlCreateDocParserCtxt(const xmlChar *cur) { 12705 int len; 12706 12707 if (cur == NULL) 12708 return(NULL); 12709 len = xmlStrlen(cur); 12710 return(xmlCreateMemoryParserCtxt((const char *)cur, len)); 12711} 12712 12713#ifdef LIBXML_SAX1_ENABLED 12714/** 12715 * xmlSAXParseDoc: 12716 * @sax: the SAX handler block 12717 * @cur: a pointer to an array of xmlChar 12718 * @recovery: work in recovery mode, i.e. tries to read no Well Formed 12719 * documents 12720 * 12721 * parse an XML in-memory document and build a tree. 12722 * It use the given SAX function block to handle the parsing callback. 12723 * If sax is NULL, fallback to the default DOM tree building routines. 12724 * 12725 * Returns the resulting document tree 12726 */ 12727 12728xmlDocPtr 12729xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) { 12730 xmlDocPtr ret; 12731 xmlParserCtxtPtr ctxt; 12732 xmlSAXHandlerPtr oldsax = NULL; 12733 12734 if (cur == NULL) return(NULL); 12735 12736 12737 ctxt = xmlCreateDocParserCtxt(cur); 12738 if (ctxt == NULL) return(NULL); 12739 if (sax != NULL) { 12740 oldsax = ctxt->sax; 12741 ctxt->sax = sax; 12742 ctxt->userData = NULL; 12743 } 12744 xmlDetectSAX2(ctxt); 12745 12746 xmlParseDocument(ctxt); 12747 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; 12748 else { 12749 ret = NULL; 12750 xmlFreeDoc(ctxt->myDoc); 12751 ctxt->myDoc = NULL; 12752 } 12753 if (sax != NULL) 12754 ctxt->sax = oldsax; 12755 xmlFreeParserCtxt(ctxt); 12756 12757 return(ret); 12758} 12759 12760/** 12761 * xmlParseDoc: 12762 * @cur: a pointer to an array of xmlChar 12763 * 12764 * parse an XML in-memory document and build a tree. 12765 * 12766 * Returns the resulting document tree 12767 */ 12768 12769xmlDocPtr 12770xmlParseDoc(const xmlChar *cur) { 12771 return(xmlSAXParseDoc(NULL, cur, 0)); 12772} 12773#endif /* LIBXML_SAX1_ENABLED */ 12774 12775#ifdef LIBXML_LEGACY_ENABLED 12776/************************************************************************ 12777 * * 12778 * Specific function to keep track of entities references * 12779 * and used by the XSLT debugger * 12780 * * 12781 ************************************************************************/ 12782 12783static xmlEntityReferenceFunc xmlEntityRefFunc = NULL; 12784 12785/** 12786 * xmlAddEntityReference: 12787 * @ent : A valid entity 12788 * @firstNode : A valid first node for children of entity 12789 * @lastNode : A valid last node of children entity 12790 * 12791 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY 12792 */ 12793static void 12794xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, 12795 xmlNodePtr lastNode) 12796{ 12797 if (xmlEntityRefFunc != NULL) { 12798 (*xmlEntityRefFunc) (ent, firstNode, lastNode); 12799 } 12800} 12801 12802 12803/** 12804 * xmlSetEntityReferenceFunc: 12805 * @func: A valid function 12806 * 12807 * Set the function to call call back when a xml reference has been made 12808 */ 12809void 12810xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func) 12811{ 12812 xmlEntityRefFunc = func; 12813} 12814#endif /* LIBXML_LEGACY_ENABLED */ 12815 12816/************************************************************************ 12817 * * 12818 * Miscellaneous * 12819 * * 12820 ************************************************************************/ 12821 12822#ifdef LIBXML_XPATH_ENABLED 12823#include <libxml/xpath.h> 12824#endif 12825 12826extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...); 12827static int xmlParserInitialized = 0; 12828 12829/** 12830 * xmlInitParser: 12831 * 12832 * Initialization function for the XML parser. 12833 * This is not reentrant. Call once before processing in case of 12834 * use in multithreaded programs. 12835 */ 12836 12837void 12838xmlInitParser(void) { 12839 if (xmlParserInitialized != 0) 12840 return; 12841 12842 if ((xmlGenericError == xmlGenericErrorDefaultFunc) || 12843 (xmlGenericError == NULL)) 12844 initGenericErrorDefaultFunc(NULL); 12845 xmlInitGlobals(); 12846 xmlInitThreads(); 12847 xmlInitMemory(); 12848 xmlInitCharEncodingHandlers(); 12849 xmlDefaultSAXHandlerInit(); 12850 xmlRegisterDefaultInputCallbacks(); 12851#ifdef LIBXML_OUTPUT_ENABLED 12852 xmlRegisterDefaultOutputCallbacks(); 12853#endif /* LIBXML_OUTPUT_ENABLED */ 12854#ifdef LIBXML_HTML_ENABLED 12855 htmlInitAutoClose(); 12856 htmlDefaultSAXHandlerInit(); 12857#endif 12858#ifdef LIBXML_XPATH_ENABLED 12859 xmlXPathInit(); 12860#endif 12861 xmlParserInitialized = 1; 12862} 12863 12864/** 12865 * xmlCleanupParser: 12866 * 12867 * Cleanup function for the XML library. It tries to reclaim all 12868 * parsing related global memory allocated for the library processing. 12869 * It doesn't deallocate any document related memory. Calling this 12870 * function should not prevent reusing the library but one should 12871 * call xmlCleanupParser() only when the process has 12872 * finished using the library or XML document built with it. 12873 */ 12874 12875void 12876xmlCleanupParser(void) { 12877 if (!xmlParserInitialized) 12878 return; 12879 12880 xmlCleanupCharEncodingHandlers(); 12881#ifdef LIBXML_CATALOG_ENABLED 12882 xmlCatalogCleanup(); 12883#endif 12884 xmlDictCleanup(); 12885 xmlCleanupInputCallbacks(); 12886#ifdef LIBXML_OUTPUT_ENABLED 12887 xmlCleanupOutputCallbacks(); 12888#endif 12889#ifdef LIBXML_SCHEMAS_ENABLED 12890 xmlSchemaCleanupTypes(); 12891 xmlRelaxNGCleanupTypes(); 12892#endif 12893 xmlCleanupGlobals(); 12894 xmlResetLastError(); 12895 xmlCleanupThreads(); /* must be last if called not from the main thread */ 12896 xmlCleanupMemory(); 12897 xmlParserInitialized = 0; 12898} 12899 12900/************************************************************************ 12901 * * 12902 * New set (2.6.0) of simpler and more flexible APIs * 12903 * * 12904 ************************************************************************/ 12905 12906/** 12907 * DICT_FREE: 12908 * @str: a string 12909 * 12910 * Free a string if it is not owned by the "dict" dictionnary in the 12911 * current scope 12912 */ 12913#define DICT_FREE(str) \ 12914 if ((str) && ((!dict) || \ 12915 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \ 12916 xmlFree((char *)(str)); 12917 12918/** 12919 * xmlCtxtReset: 12920 * @ctxt: an XML parser context 12921 * 12922 * Reset a parser context 12923 */ 12924void 12925xmlCtxtReset(xmlParserCtxtPtr ctxt) 12926{ 12927 xmlParserInputPtr input; 12928 xmlDictPtr dict; 12929 12930 if (ctxt == NULL) 12931 return; 12932 12933 dict = ctxt->dict; 12934 12935 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 12936 xmlFreeInputStream(input); 12937 } 12938 ctxt->inputNr = 0; 12939 ctxt->input = NULL; 12940 12941 ctxt->spaceNr = 0; 12942 ctxt->spaceTab[0] = -1; 12943 ctxt->space = &ctxt->spaceTab[0]; 12944 12945 12946 ctxt->nodeNr = 0; 12947 ctxt->node = NULL; 12948 12949 ctxt->nameNr = 0; 12950 ctxt->name = NULL; 12951 12952 DICT_FREE(ctxt->version); 12953 ctxt->version = NULL; 12954 DICT_FREE(ctxt->encoding); 12955 ctxt->encoding = NULL; 12956 DICT_FREE(ctxt->directory); 12957 ctxt->directory = NULL; 12958 DICT_FREE(ctxt->extSubURI); 12959 ctxt->extSubURI = NULL; 12960 DICT_FREE(ctxt->extSubSystem); 12961 ctxt->extSubSystem = NULL; 12962 if (ctxt->myDoc != NULL) 12963 xmlFreeDoc(ctxt->myDoc); 12964 ctxt->myDoc = NULL; 12965 12966 ctxt->standalone = -1; 12967 ctxt->hasExternalSubset = 0; 12968 ctxt->hasPErefs = 0; 12969 ctxt->html = 0; 12970 ctxt->external = 0; 12971 ctxt->instate = XML_PARSER_START; 12972 ctxt->token = 0; 12973 12974 ctxt->wellFormed = 1; 12975 ctxt->nsWellFormed = 1; 12976 ctxt->disableSAX = 0; 12977 ctxt->valid = 1; 12978#if 0 12979 ctxt->vctxt.userData = ctxt; 12980 ctxt->vctxt.error = xmlParserValidityError; 12981 ctxt->vctxt.warning = xmlParserValidityWarning; 12982#endif 12983 ctxt->record_info = 0; 12984 ctxt->nbChars = 0; 12985 ctxt->checkIndex = 0; 12986 ctxt->inSubset = 0; 12987 ctxt->errNo = XML_ERR_OK; 12988 ctxt->depth = 0; 12989 ctxt->charset = XML_CHAR_ENCODING_UTF8; 12990 ctxt->catalogs = NULL; 12991 xmlInitNodeInfoSeq(&ctxt->node_seq); 12992 12993 if (ctxt->attsDefault != NULL) { 12994 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree); 12995 ctxt->attsDefault = NULL; 12996 } 12997 if (ctxt->attsSpecial != NULL) { 12998 xmlHashFree(ctxt->attsSpecial, NULL); 12999 ctxt->attsSpecial = NULL; 13000 } 13001 13002#ifdef LIBXML_CATALOG_ENABLED 13003 if (ctxt->catalogs != NULL) 13004 xmlCatalogFreeLocal(ctxt->catalogs); 13005#endif 13006 if (ctxt->lastError.code != XML_ERR_OK) 13007 xmlResetError(&ctxt->lastError); 13008} 13009 13010/** 13011 * xmlCtxtResetPush: 13012 * @ctxt: an XML parser context 13013 * @chunk: a pointer to an array of chars 13014 * @size: number of chars in the array 13015 * @filename: an optional file name or URI 13016 * @encoding: the document encoding, or NULL 13017 * 13018 * Reset a push parser context 13019 * 13020 * Returns 0 in case of success and 1 in case of error 13021 */ 13022int 13023xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk, 13024 int size, const char *filename, const char *encoding) 13025{ 13026 xmlParserInputPtr inputStream; 13027 xmlParserInputBufferPtr buf; 13028 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; 13029 13030 if (ctxt == NULL) 13031 return(1); 13032 13033 if ((encoding == NULL) && (chunk != NULL) && (size >= 4)) 13034 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size); 13035 13036 buf = xmlAllocParserInputBuffer(enc); 13037 if (buf == NULL) 13038 return(1); 13039 13040 if (ctxt == NULL) { 13041 xmlFreeParserInputBuffer(buf); 13042 return(1); 13043 } 13044 13045 xmlCtxtReset(ctxt); 13046 13047 if (ctxt->pushTab == NULL) { 13048 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * 13049 sizeof(xmlChar *)); 13050 if (ctxt->pushTab == NULL) { 13051 xmlErrMemory(ctxt, NULL); 13052 xmlFreeParserInputBuffer(buf); 13053 return(1); 13054 } 13055 } 13056 13057 if (filename == NULL) { 13058 ctxt->directory = NULL; 13059 } else { 13060 ctxt->directory = xmlParserGetDirectory(filename); 13061 } 13062 13063 inputStream = xmlNewInputStream(ctxt); 13064 if (inputStream == NULL) { 13065 xmlFreeParserInputBuffer(buf); 13066 return(1); 13067 } 13068 13069 if (filename == NULL) 13070 inputStream->filename = NULL; 13071 else 13072 inputStream->filename = (char *) 13073 xmlCanonicPath((const xmlChar *) filename); 13074 inputStream->buf = buf; 13075 inputStream->base = inputStream->buf->buffer->content; 13076 inputStream->cur = inputStream->buf->buffer->content; 13077 inputStream->end = 13078 &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; 13079 13080 inputPush(ctxt, inputStream); 13081 13082 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 13083 (ctxt->input->buf != NULL)) { 13084 int base = ctxt->input->base - ctxt->input->buf->buffer->content; 13085 int cur = ctxt->input->cur - ctxt->input->base; 13086 13087 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 13088 13089 ctxt->input->base = ctxt->input->buf->buffer->content + base; 13090 ctxt->input->cur = ctxt->input->base + cur; 13091 ctxt->input->end = 13092 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer-> 13093 use]; 13094#ifdef DEBUG_PUSH 13095 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); 13096#endif 13097 } 13098 13099 if (encoding != NULL) { 13100 xmlCharEncodingHandlerPtr hdlr; 13101 13102 hdlr = xmlFindCharEncodingHandler(encoding); 13103 if (hdlr != NULL) { 13104 xmlSwitchToEncoding(ctxt, hdlr); 13105 } else { 13106 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 13107 "Unsupported encoding %s\n", BAD_CAST encoding); 13108 } 13109 } else if (enc != XML_CHAR_ENCODING_NONE) { 13110 xmlSwitchEncoding(ctxt, enc); 13111 } 13112 13113 return(0); 13114} 13115 13116/** 13117 * xmlCtxtUseOptions: 13118 * @ctxt: an XML parser context 13119 * @options: a combination of xmlParserOption 13120 * 13121 * Applies the options to the parser context 13122 * 13123 * Returns 0 in case of success, the set of unknown or unimplemented options 13124 * in case of error. 13125 */ 13126int 13127xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) 13128{ 13129 if (ctxt == NULL) 13130 return(-1); 13131 if (options & XML_PARSE_RECOVER) { 13132 ctxt->recovery = 1; 13133 options -= XML_PARSE_RECOVER; 13134 } else 13135 ctxt->recovery = 0; 13136 if (options & XML_PARSE_DTDLOAD) { 13137 ctxt->loadsubset = XML_DETECT_IDS; 13138 options -= XML_PARSE_DTDLOAD; 13139 } else 13140 ctxt->loadsubset = 0; 13141 if (options & XML_PARSE_DTDATTR) { 13142 ctxt->loadsubset |= XML_COMPLETE_ATTRS; 13143 options -= XML_PARSE_DTDATTR; 13144 } 13145 if (options & XML_PARSE_NOENT) { 13146 ctxt->replaceEntities = 1; 13147 /* ctxt->loadsubset |= XML_DETECT_IDS; */ 13148 options -= XML_PARSE_NOENT; 13149 } else 13150 ctxt->replaceEntities = 0; 13151 if (options & XML_PARSE_PEDANTIC) { 13152 ctxt->pedantic = 1; 13153 options -= XML_PARSE_PEDANTIC; 13154 } else 13155 ctxt->pedantic = 0; 13156 if (options & XML_PARSE_NOBLANKS) { 13157 ctxt->keepBlanks = 0; 13158 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 13159 options -= XML_PARSE_NOBLANKS; 13160 } else 13161 ctxt->keepBlanks = 1; 13162 if (options & XML_PARSE_DTDVALID) { 13163 ctxt->validate = 1; 13164 if (options & XML_PARSE_NOWARNING) 13165 ctxt->vctxt.warning = NULL; 13166 if (options & XML_PARSE_NOERROR) 13167 ctxt->vctxt.error = NULL; 13168 options -= XML_PARSE_DTDVALID; 13169 } else 13170 ctxt->validate = 0; 13171 if (options & XML_PARSE_NOWARNING) { 13172 ctxt->sax->warning = NULL; 13173 options -= XML_PARSE_NOWARNING; 13174 } 13175 if (options & XML_PARSE_NOERROR) { 13176 ctxt->sax->error = NULL; 13177 ctxt->sax->fatalError = NULL; 13178 options -= XML_PARSE_NOERROR; 13179 } 13180#ifdef LIBXML_SAX1_ENABLED 13181 if (options & XML_PARSE_SAX1) { 13182 ctxt->sax->startElement = xmlSAX2StartElement; 13183 ctxt->sax->endElement = xmlSAX2EndElement; 13184 ctxt->sax->startElementNs = NULL; 13185 ctxt->sax->endElementNs = NULL; 13186 ctxt->sax->initialized = 1; 13187 options -= XML_PARSE_SAX1; 13188 } 13189#endif /* LIBXML_SAX1_ENABLED */ 13190 if (options & XML_PARSE_NODICT) { 13191 ctxt->dictNames = 0; 13192 options -= XML_PARSE_NODICT; 13193 } else { 13194 ctxt->dictNames = 1; 13195 } 13196 if (options & XML_PARSE_NOCDATA) { 13197 ctxt->sax->cdataBlock = NULL; 13198 options -= XML_PARSE_NOCDATA; 13199 } 13200 if (options & XML_PARSE_NSCLEAN) { 13201 ctxt->options |= XML_PARSE_NSCLEAN; 13202 options -= XML_PARSE_NSCLEAN; 13203 } 13204 if (options & XML_PARSE_NONET) { 13205 ctxt->options |= XML_PARSE_NONET; 13206 options -= XML_PARSE_NONET; 13207 } 13208 if (options & XML_PARSE_COMPACT) { 13209 ctxt->options |= XML_PARSE_COMPACT; 13210 options -= XML_PARSE_COMPACT; 13211 } 13212 ctxt->linenumbers = 1; 13213 return (options); 13214} 13215 13216/** 13217 * xmlDoRead: 13218 * @ctxt: an XML parser context 13219 * @URL: the base URL to use for the document 13220 * @encoding: the document encoding, or NULL 13221 * @options: a combination of xmlParserOption 13222 * @reuse: keep the context for reuse 13223 * 13224 * Common front-end for the xmlRead functions 13225 * 13226 * Returns the resulting document tree or NULL 13227 */ 13228static xmlDocPtr 13229xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding, 13230 int options, int reuse) 13231{ 13232 xmlDocPtr ret; 13233 13234 xmlCtxtUseOptions(ctxt, options); 13235 if (encoding != NULL) { 13236 xmlCharEncodingHandlerPtr hdlr; 13237 13238 hdlr = xmlFindCharEncodingHandler(encoding); 13239 if (hdlr != NULL) 13240 xmlSwitchToEncoding(ctxt, hdlr); 13241 } 13242 if ((URL != NULL) && (ctxt->input != NULL) && 13243 (ctxt->input->filename == NULL)) 13244 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL); 13245 xmlParseDocument(ctxt); 13246 if ((ctxt->wellFormed) || ctxt->recovery) 13247 ret = ctxt->myDoc; 13248 else { 13249 ret = NULL; 13250 if (ctxt->myDoc != NULL) { 13251 xmlFreeDoc(ctxt->myDoc); 13252 } 13253 } 13254 ctxt->myDoc = NULL; 13255 if (!reuse) { 13256 xmlFreeParserCtxt(ctxt); 13257 } 13258 13259 return (ret); 13260} 13261 13262/** 13263 * xmlReadDoc: 13264 * @cur: a pointer to a zero terminated string 13265 * @URL: the base URL to use for the document 13266 * @encoding: the document encoding, or NULL 13267 * @options: a combination of xmlParserOption 13268 * 13269 * parse an XML in-memory document and build a tree. 13270 * 13271 * Returns the resulting document tree 13272 */ 13273xmlDocPtr 13274xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options) 13275{ 13276 xmlParserCtxtPtr ctxt; 13277 13278 if (cur == NULL) 13279 return (NULL); 13280 13281 ctxt = xmlCreateDocParserCtxt(cur); 13282 if (ctxt == NULL) 13283 return (NULL); 13284 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 13285} 13286 13287/** 13288 * xmlReadFile: 13289 * @filename: a file or URL 13290 * @encoding: the document encoding, or NULL 13291 * @options: a combination of xmlParserOption 13292 * 13293 * parse an XML file from the filesystem or the network. 13294 * 13295 * Returns the resulting document tree 13296 */ 13297xmlDocPtr 13298xmlReadFile(const char *filename, const char *encoding, int options) 13299{ 13300 xmlParserCtxtPtr ctxt; 13301 13302 ctxt = xmlCreateURLParserCtxt(filename, options); 13303 if (ctxt == NULL) 13304 return (NULL); 13305 return (xmlDoRead(ctxt, NULL, encoding, options, 0)); 13306} 13307 13308/** 13309 * xmlReadMemory: 13310 * @buffer: a pointer to a char array 13311 * @size: the size of the array 13312 * @URL: the base URL to use for the document 13313 * @encoding: the document encoding, or NULL 13314 * @options: a combination of xmlParserOption 13315 * 13316 * parse an XML in-memory document and build a tree. 13317 * 13318 * Returns the resulting document tree 13319 */ 13320xmlDocPtr 13321xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options) 13322{ 13323 xmlParserCtxtPtr ctxt; 13324 13325 ctxt = xmlCreateMemoryParserCtxt(buffer, size); 13326 if (ctxt == NULL) 13327 return (NULL); 13328 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 13329} 13330 13331/** 13332 * xmlReadFd: 13333 * @fd: an open file descriptor 13334 * @URL: the base URL to use for the document 13335 * @encoding: the document encoding, or NULL 13336 * @options: a combination of xmlParserOption 13337 * 13338 * parse an XML from a file descriptor and build a tree. 13339 * NOTE that the file descriptor will not be closed when the 13340 * reader is closed or reset. 13341 * 13342 * Returns the resulting document tree 13343 */ 13344xmlDocPtr 13345xmlReadFd(int fd, const char *URL, const char *encoding, int options) 13346{ 13347 xmlParserCtxtPtr ctxt; 13348 xmlParserInputBufferPtr input; 13349 xmlParserInputPtr stream; 13350 13351 if (fd < 0) 13352 return (NULL); 13353 13354 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 13355 if (input == NULL) 13356 return (NULL); 13357 input->closecallback = NULL; 13358 ctxt = xmlNewParserCtxt(); 13359 if (ctxt == NULL) { 13360 xmlFreeParserInputBuffer(input); 13361 return (NULL); 13362 } 13363 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 13364 if (stream == NULL) { 13365 xmlFreeParserInputBuffer(input); 13366 xmlFreeParserCtxt(ctxt); 13367 return (NULL); 13368 } 13369 inputPush(ctxt, stream); 13370 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 13371} 13372 13373/** 13374 * xmlReadIO: 13375 * @ioread: an I/O read function 13376 * @ioclose: an I/O close function 13377 * @ioctx: an I/O handler 13378 * @URL: the base URL to use for the document 13379 * @encoding: the document encoding, or NULL 13380 * @options: a combination of xmlParserOption 13381 * 13382 * parse an XML document from I/O functions and source and build a tree. 13383 * 13384 * Returns the resulting document tree 13385 */ 13386xmlDocPtr 13387xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, 13388 void *ioctx, const char *URL, const char *encoding, int options) 13389{ 13390 xmlParserCtxtPtr ctxt; 13391 xmlParserInputBufferPtr input; 13392 xmlParserInputPtr stream; 13393 13394 if (ioread == NULL) 13395 return (NULL); 13396 13397 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 13398 XML_CHAR_ENCODING_NONE); 13399 if (input == NULL) 13400 return (NULL); 13401 ctxt = xmlNewParserCtxt(); 13402 if (ctxt == NULL) { 13403 xmlFreeParserInputBuffer(input); 13404 return (NULL); 13405 } 13406 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 13407 if (stream == NULL) { 13408 xmlFreeParserInputBuffer(input); 13409 xmlFreeParserCtxt(ctxt); 13410 return (NULL); 13411 } 13412 inputPush(ctxt, stream); 13413 return (xmlDoRead(ctxt, URL, encoding, options, 0)); 13414} 13415 13416/** 13417 * xmlCtxtReadDoc: 13418 * @ctxt: an XML parser context 13419 * @cur: a pointer to a zero terminated string 13420 * @URL: the base URL to use for the document 13421 * @encoding: the document encoding, or NULL 13422 * @options: a combination of xmlParserOption 13423 * 13424 * parse an XML in-memory document and build a tree. 13425 * This reuses the existing @ctxt parser context 13426 * 13427 * Returns the resulting document tree 13428 */ 13429xmlDocPtr 13430xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur, 13431 const char *URL, const char *encoding, int options) 13432{ 13433 xmlParserInputPtr stream; 13434 13435 if (cur == NULL) 13436 return (NULL); 13437 if (ctxt == NULL) 13438 return (NULL); 13439 13440 xmlCtxtReset(ctxt); 13441 13442 stream = xmlNewStringInputStream(ctxt, cur); 13443 if (stream == NULL) { 13444 return (NULL); 13445 } 13446 inputPush(ctxt, stream); 13447 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 13448} 13449 13450/** 13451 * xmlCtxtReadFile: 13452 * @ctxt: an XML parser context 13453 * @filename: a file or URL 13454 * @encoding: the document encoding, or NULL 13455 * @options: a combination of xmlParserOption 13456 * 13457 * parse an XML file from the filesystem or the network. 13458 * This reuses the existing @ctxt parser context 13459 * 13460 * Returns the resulting document tree 13461 */ 13462xmlDocPtr 13463xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename, 13464 const char *encoding, int options) 13465{ 13466 xmlParserInputPtr stream; 13467 13468 if (filename == NULL) 13469 return (NULL); 13470 if (ctxt == NULL) 13471 return (NULL); 13472 13473 xmlCtxtReset(ctxt); 13474 13475 stream = xmlLoadExternalEntity(filename, NULL, ctxt); 13476 if (stream == NULL) { 13477 return (NULL); 13478 } 13479 inputPush(ctxt, stream); 13480 return (xmlDoRead(ctxt, NULL, encoding, options, 1)); 13481} 13482 13483/** 13484 * xmlCtxtReadMemory: 13485 * @ctxt: an XML parser context 13486 * @buffer: a pointer to a char array 13487 * @size: the size of the array 13488 * @URL: the base URL to use for the document 13489 * @encoding: the document encoding, or NULL 13490 * @options: a combination of xmlParserOption 13491 * 13492 * parse an XML in-memory document and build a tree. 13493 * This reuses the existing @ctxt parser context 13494 * 13495 * Returns the resulting document tree 13496 */ 13497xmlDocPtr 13498xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size, 13499 const char *URL, const char *encoding, int options) 13500{ 13501 xmlParserInputBufferPtr input; 13502 xmlParserInputPtr stream; 13503 13504 if (ctxt == NULL) 13505 return (NULL); 13506 if (buffer == NULL) 13507 return (NULL); 13508 13509 xmlCtxtReset(ctxt); 13510 13511 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 13512 if (input == NULL) { 13513 return(NULL); 13514 } 13515 13516 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 13517 if (stream == NULL) { 13518 xmlFreeParserInputBuffer(input); 13519 return(NULL); 13520 } 13521 13522 inputPush(ctxt, stream); 13523 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 13524} 13525 13526/** 13527 * xmlCtxtReadFd: 13528 * @ctxt: an XML parser context 13529 * @fd: an open file descriptor 13530 * @URL: the base URL to use for the document 13531 * @encoding: the document encoding, or NULL 13532 * @options: a combination of xmlParserOption 13533 * 13534 * parse an XML from a file descriptor and build a tree. 13535 * This reuses the existing @ctxt parser context 13536 * NOTE that the file descriptor will not be closed when the 13537 * reader is closed or reset. 13538 * 13539 * Returns the resulting document tree 13540 */ 13541xmlDocPtr 13542xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd, 13543 const char *URL, const char *encoding, int options) 13544{ 13545 xmlParserInputBufferPtr input; 13546 xmlParserInputPtr stream; 13547 13548 if (fd < 0) 13549 return (NULL); 13550 if (ctxt == NULL) 13551 return (NULL); 13552 13553 xmlCtxtReset(ctxt); 13554 13555 13556 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 13557 if (input == NULL) 13558 return (NULL); 13559 input->closecallback = NULL; 13560 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 13561 if (stream == NULL) { 13562 xmlFreeParserInputBuffer(input); 13563 return (NULL); 13564 } 13565 inputPush(ctxt, stream); 13566 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 13567} 13568 13569/** 13570 * xmlCtxtReadIO: 13571 * @ctxt: an XML parser context 13572 * @ioread: an I/O read function 13573 * @ioclose: an I/O close function 13574 * @ioctx: an I/O handler 13575 * @URL: the base URL to use for the document 13576 * @encoding: the document encoding, or NULL 13577 * @options: a combination of xmlParserOption 13578 * 13579 * parse an XML document from I/O functions and source and build a tree. 13580 * This reuses the existing @ctxt parser context 13581 * 13582 * Returns the resulting document tree 13583 */ 13584xmlDocPtr 13585xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread, 13586 xmlInputCloseCallback ioclose, void *ioctx, 13587 const char *URL, 13588 const char *encoding, int options) 13589{ 13590 xmlParserInputBufferPtr input; 13591 xmlParserInputPtr stream; 13592 13593 if (ioread == NULL) 13594 return (NULL); 13595 if (ctxt == NULL) 13596 return (NULL); 13597 13598 xmlCtxtReset(ctxt); 13599 13600 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 13601 XML_CHAR_ENCODING_NONE); 13602 if (input == NULL) 13603 return (NULL); 13604 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 13605 if (stream == NULL) { 13606 xmlFreeParserInputBuffer(input); 13607 return (NULL); 13608 } 13609 inputPush(ctxt, stream); 13610 return (xmlDoRead(ctxt, URL, encoding, options, 1)); 13611} 13612 13613#define bottom_parser 13614#include "elfgcchack.h" 13615