1/* 2 * Summary: the core parser module 3 * Description: Interfaces, constants and types related to the XML parser 4 * 5 * Copy: See Copyright for the status of this software. 6 * 7 * Author: Daniel Veillard 8 */ 9 10#ifndef __XML_PARSER_H__ 11#define __XML_PARSER_H__ 12 13#include <stdarg.h> 14 15#include <libxml/xmlversion.h> 16#include <libxml/tree.h> 17#include <libxml/dict.h> 18#include <libxml/hash.h> 19#include <libxml/valid.h> 20#include <libxml/entities.h> 21#include <libxml/xmlerror.h> 22#include <libxml/xmlstring.h> 23 24#ifdef __cplusplus 25extern "C" { 26#endif 27 28/** 29 * XML_DEFAULT_VERSION: 30 * 31 * The default version of XML used: 1.0 32 */ 33#define XML_DEFAULT_VERSION "1.0" 34 35/** 36 * xmlParserInput: 37 * 38 * An xmlParserInput is an input flow for the XML processor. 39 * Each entity parsed is associated an xmlParserInput (except the 40 * few predefined ones). This is the case both for internal entities 41 * - in which case the flow is already completely in memory - or 42 * external entities - in which case we use the buf structure for 43 * progressive reading and I18N conversions to the internal UTF-8 format. 44 */ 45 46/** 47 * xmlParserInputDeallocate: 48 * @str: the string to deallocate 49 * 50 * Callback for freeing some parser input allocations. 51 */ 52typedef void (* xmlParserInputDeallocate)(xmlChar *str); 53 54struct _xmlParserInput { 55 /* Input buffer */ 56 xmlParserInputBufferPtr buf; /* UTF-8 encoded buffer */ 57 58 const char *filename; /* The file analyzed, if any */ 59 const char *directory; /* the directory/base of the file */ 60 const xmlChar *base; /* Base of the array to parse */ 61 const xmlChar *cur; /* Current char being parsed */ 62 const xmlChar *end; /* end of the array to parse */ 63 int length; /* length if known */ 64 int line; /* Current line */ 65 int col; /* Current column */ 66 /* 67 * NOTE: consumed is only tested for equality in the parser code, 68 * so even if there is an overflow this should not give troubles 69 * for parsing very large instances. 70 */ 71 unsigned long consumed; /* How many xmlChars already consumed */ 72 xmlParserInputDeallocate free; /* function to deallocate the base */ 73 const xmlChar *encoding; /* the encoding string for entity */ 74 const xmlChar *version; /* the version string for entity */ 75 int standalone; /* Was that entity marked standalone */ 76 int id; /* an unique identifier for the entity */ 77}; 78 79/** 80 * xmlParserNodeInfo: 81 * 82 * The parser can be asked to collect Node informations, i.e. at what 83 * place in the file they were detected. 84 * NOTE: This is off by default and not very well tested. 85 */ 86typedef struct _xmlParserNodeInfo xmlParserNodeInfo; 87typedef xmlParserNodeInfo *xmlParserNodeInfoPtr; 88 89struct _xmlParserNodeInfo { 90 const struct _xmlNode* node; 91 /* Position & line # that text that created the node begins & ends on */ 92 unsigned long begin_pos; 93 unsigned long begin_line; 94 unsigned long end_pos; 95 unsigned long end_line; 96}; 97 98typedef struct _xmlParserNodeInfoSeq xmlParserNodeInfoSeq; 99typedef xmlParserNodeInfoSeq *xmlParserNodeInfoSeqPtr; 100struct _xmlParserNodeInfoSeq { 101 unsigned long maximum; 102 unsigned long length; 103 xmlParserNodeInfo* buffer; 104}; 105 106/** 107 * xmlParserInputState: 108 * 109 * The parser is now working also as a state based parser. 110 * The recursive one use the state info for entities processing. 111 */ 112typedef enum { 113 XML_PARSER_EOF = -1, /* nothing is to be parsed */ 114 XML_PARSER_START = 0, /* nothing has been parsed */ 115 XML_PARSER_MISC, /* Misc* before int subset */ 116 XML_PARSER_PI, /* Within a processing instruction */ 117 XML_PARSER_DTD, /* within some DTD content */ 118 XML_PARSER_PROLOG, /* Misc* after internal subset */ 119 XML_PARSER_COMMENT, /* within a comment */ 120 XML_PARSER_START_TAG, /* within a start tag */ 121 XML_PARSER_CONTENT, /* within the content */ 122 XML_PARSER_CDATA_SECTION, /* within a CDATA section */ 123 XML_PARSER_END_TAG, /* within a closing tag */ 124 XML_PARSER_ENTITY_DECL, /* within an entity declaration */ 125 XML_PARSER_ENTITY_VALUE, /* within an entity value in a decl */ 126 XML_PARSER_ATTRIBUTE_VALUE, /* within an attribute value */ 127 XML_PARSER_SYSTEM_LITERAL, /* within a SYSTEM value */ 128 XML_PARSER_EPILOG, /* the Misc* after the last end tag */ 129 XML_PARSER_IGNORE, /* within an IGNORED section */ 130 XML_PARSER_PUBLIC_LITERAL /* within a PUBLIC value */ 131} xmlParserInputState; 132 133/** 134 * XML_DETECT_IDS: 135 * 136 * Bit in the loadsubset context field to tell to do ID/REFs lookups. 137 * Use it to initialize xmlLoadExtDtdDefaultValue. 138 */ 139#define XML_DETECT_IDS 2 140 141/** 142 * XML_COMPLETE_ATTRS: 143 * 144 * Bit in the loadsubset context field to tell to do complete the 145 * elements attributes lists with the ones defaulted from the DTDs. 146 * Use it to initialize xmlLoadExtDtdDefaultValue. 147 */ 148#define XML_COMPLETE_ATTRS 4 149 150/** 151 * XML_SKIP_IDS: 152 * 153 * Bit in the loadsubset context field to tell to not do ID/REFs registration. 154 * Used to initialize xmlLoadExtDtdDefaultValue in some special cases. 155 */ 156#define XML_SKIP_IDS 8 157 158/** 159 * xmlParserMode: 160 * 161 * A parser can operate in various modes 162 */ 163typedef enum { 164 XML_PARSE_UNKNOWN = 0, 165 XML_PARSE_DOM = 1, 166 XML_PARSE_SAX = 2, 167 XML_PARSE_PUSH_DOM = 3, 168 XML_PARSE_PUSH_SAX = 4, 169 XML_PARSE_READER = 5 170} xmlParserMode; 171 172/** 173 * xmlParserCtxt: 174 * 175 * The parser context. 176 * NOTE This doesn't completely define the parser state, the (current ?) 177 * design of the parser uses recursive function calls since this allow 178 * and easy mapping from the production rules of the specification 179 * to the actual code. The drawback is that the actual function call 180 * also reflect the parser state. However most of the parsing routines 181 * takes as the only argument the parser context pointer, so migrating 182 * to a state based parser for progressive parsing shouldn't be too hard. 183 */ 184struct _xmlParserCtxt { 185 struct _xmlSAXHandler *sax; /* The SAX handler */ 186 void *userData; /* For SAX interface only, used by DOM build */ 187 xmlDocPtr myDoc; /* the document being built */ 188 int wellFormed; /* is the document well formed */ 189 int replaceEntities; /* shall we replace entities ? */ 190 const xmlChar *version; /* the XML version string */ 191 const xmlChar *encoding; /* the declared encoding, if any */ 192 int standalone; /* standalone document */ 193 int html; /* an HTML(1)/Docbook(2) document */ 194 195 /* Input stream stack */ 196 xmlParserInputPtr input; /* Current input stream */ 197 int inputNr; /* Number of current input streams */ 198 int inputMax; /* Max number of input streams */ 199 xmlParserInputPtr *inputTab; /* stack of inputs */ 200 201 /* Node analysis stack only used for DOM building */ 202 xmlNodePtr node; /* Current parsed Node */ 203 int nodeNr; /* Depth of the parsing stack */ 204 int nodeMax; /* Max depth of the parsing stack */ 205 xmlNodePtr *nodeTab; /* array of nodes */ 206 207 int record_info; /* Whether node info should be kept */ 208 xmlParserNodeInfoSeq node_seq; /* info about each node parsed */ 209 210 int errNo; /* error code */ 211 212 int hasExternalSubset; /* reference and external subset */ 213 int hasPErefs; /* the internal subset has PE refs */ 214 int external; /* are we parsing an external entity */ 215 216 int valid; /* is the document valid */ 217 int validate; /* shall we try to validate ? */ 218 xmlValidCtxt vctxt; /* The validity context */ 219 220 xmlParserInputState instate; /* current type of input */ 221 int token; /* next char look-ahead */ 222 223 char *directory; /* the data directory */ 224 225 /* Node name stack */ 226 const xmlChar *name; /* Current parsed Node */ 227 int nameNr; /* Depth of the parsing stack */ 228 int nameMax; /* Max depth of the parsing stack */ 229 const xmlChar * *nameTab; /* array of nodes */ 230 231 long nbChars; /* number of xmlChar processed */ 232 long checkIndex; /* used by progressive parsing lookup */ 233 int keepBlanks; /* ugly but ... */ 234 int disableSAX; /* SAX callbacks are disabled */ 235 int inSubset; /* Parsing is in int 1/ext 2 subset */ 236 const xmlChar * intSubName; /* name of subset */ 237 xmlChar * extSubURI; /* URI of external subset */ 238 xmlChar * extSubSystem; /* SYSTEM ID of external subset */ 239 240 /* xml:space values */ 241 int * space; /* Should the parser preserve spaces */ 242 int spaceNr; /* Depth of the parsing stack */ 243 int spaceMax; /* Max depth of the parsing stack */ 244 int * spaceTab; /* array of space infos */ 245 246 int depth; /* to prevent entity substitution loops */ 247 xmlParserInputPtr entity; /* used to check entities boundaries */ 248 int charset; /* encoding of the in-memory content 249 actually an xmlCharEncoding */ 250 int nodelen; /* Those two fields are there to */ 251 int nodemem; /* Speed up large node parsing */ 252 int pedantic; /* signal pedantic warnings */ 253 void *_private; /* For user data, libxml won't touch it */ 254 255 int loadsubset; /* should the external subset be loaded */ 256 int linenumbers; /* set line number in element content */ 257 void *catalogs; /* document's own catalog */ 258 int recovery; /* run in recovery mode */ 259 int progressive; /* is this a progressive parsing */ 260 xmlDictPtr dict; /* dictionnary for the parser */ 261 const xmlChar * *atts; /* array for the attributes callbacks */ 262 int maxatts; /* the size of the array */ 263 int docdict; /* use strings from dict to build tree */ 264 265 /* 266 * pre-interned strings 267 */ 268 const xmlChar *str_xml; 269 const xmlChar *str_xmlns; 270 const xmlChar *str_xml_ns; 271 272 /* 273 * Everything below is used only by the new SAX mode 274 */ 275 int sax2; /* operating in the new SAX mode */ 276 int nsNr; /* the number of inherited namespaces */ 277 int nsMax; /* the size of the arrays */ 278 const xmlChar * *nsTab; /* the array of prefix/namespace name */ 279 int *attallocs; /* which attribute were allocated */ 280 void * *pushTab; /* array of data for push */ 281 xmlHashTablePtr attsDefault; /* defaulted attributes if any */ 282 xmlHashTablePtr attsSpecial; /* non-CDATA attributes if any */ 283 int nsWellFormed; /* is the document XML Nanespace okay */ 284 int options; /* Extra options */ 285 286 /* 287 * Those fields are needed only for treaming parsing so far 288 */ 289 int dictNames; /* Use dictionary names for the tree */ 290 int freeElemsNr; /* number of freed element nodes */ 291 xmlNodePtr freeElems; /* List of freed element nodes */ 292 int freeAttrsNr; /* number of freed attributes nodes */ 293 xmlAttrPtr freeAttrs; /* List of freed attributes nodes */ 294 295 /* 296 * the complete error informations for the last error. 297 */ 298 xmlError lastError; 299 xmlParserMode parseMode; /* the parser mode */ 300 unsigned long nbentities; /* number of entities references */ 301 unsigned long sizeentities; /* size of parsed entities */ 302}; 303 304/** 305 * xmlSAXLocator: 306 * 307 * A SAX Locator. 308 */ 309struct _xmlSAXLocator { 310 const xmlChar *(*getPublicId)(void *ctx); 311 const xmlChar *(*getSystemId)(void *ctx); 312 int (*getLineNumber)(void *ctx); 313 int (*getColumnNumber)(void *ctx); 314}; 315 316/** 317 * xmlSAXHandler: 318 * 319 * A SAX handler is bunch of callbacks called by the parser when processing 320 * of the input generate data or structure informations. 321 */ 322 323/** 324 * resolveEntitySAXFunc: 325 * @ctx: the user data (XML parser context) 326 * @publicId: The public ID of the entity 327 * @systemId: The system ID of the entity 328 * 329 * Callback: 330 * The entity loader, to control the loading of external entities, 331 * the application can either: 332 * - override this resolveEntity() callback in the SAX block 333 * - or better use the xmlSetExternalEntityLoader() function to 334 * set up it's own entity resolution routine 335 * 336 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour. 337 */ 338typedef xmlParserInputPtr (*resolveEntitySAXFunc) (void *ctx, 339 const xmlChar *publicId, 340 const xmlChar *systemId); 341/** 342 * internalSubsetSAXFunc: 343 * @ctx: the user data (XML parser context) 344 * @name: the root element name 345 * @ExternalID: the external ID 346 * @SystemID: the SYSTEM ID (e.g. filename or URL) 347 * 348 * Callback on internal subset declaration. 349 */ 350typedef void (*internalSubsetSAXFunc) (void *ctx, 351 const xmlChar *name, 352 const xmlChar *ExternalID, 353 const xmlChar *SystemID); 354/** 355 * externalSubsetSAXFunc: 356 * @ctx: the user data (XML parser context) 357 * @name: the root element name 358 * @ExternalID: the external ID 359 * @SystemID: the SYSTEM ID (e.g. filename or URL) 360 * 361 * Callback on external subset declaration. 362 */ 363typedef void (*externalSubsetSAXFunc) (void *ctx, 364 const xmlChar *name, 365 const xmlChar *ExternalID, 366 const xmlChar *SystemID); 367/** 368 * getEntitySAXFunc: 369 * @ctx: the user data (XML parser context) 370 * @name: The entity name 371 * 372 * Get an entity by name. 373 * 374 * Returns the xmlEntityPtr if found. 375 */ 376typedef xmlEntityPtr (*getEntitySAXFunc) (void *ctx, 377 const xmlChar *name); 378/** 379 * getParameterEntitySAXFunc: 380 * @ctx: the user data (XML parser context) 381 * @name: The entity name 382 * 383 * Get a parameter entity by name. 384 * 385 * Returns the xmlEntityPtr if found. 386 */ 387typedef xmlEntityPtr (*getParameterEntitySAXFunc) (void *ctx, 388 const xmlChar *name); 389/** 390 * entityDeclSAXFunc: 391 * @ctx: the user data (XML parser context) 392 * @name: the entity name 393 * @type: the entity type 394 * @publicId: The public ID of the entity 395 * @systemId: The system ID of the entity 396 * @content: the entity value (without processing). 397 * 398 * An entity definition has been parsed. 399 */ 400typedef void (*entityDeclSAXFunc) (void *ctx, 401 const xmlChar *name, 402 int type, 403 const xmlChar *publicId, 404 const xmlChar *systemId, 405 xmlChar *content); 406/** 407 * notationDeclSAXFunc: 408 * @ctx: the user data (XML parser context) 409 * @name: The name of the notation 410 * @publicId: The public ID of the entity 411 * @systemId: The system ID of the entity 412 * 413 * What to do when a notation declaration has been parsed. 414 */ 415typedef void (*notationDeclSAXFunc)(void *ctx, 416 const xmlChar *name, 417 const xmlChar *publicId, 418 const xmlChar *systemId); 419/** 420 * attributeDeclSAXFunc: 421 * @ctx: the user data (XML parser context) 422 * @elem: the name of the element 423 * @fullname: the attribute name 424 * @type: the attribute type 425 * @def: the type of default value 426 * @defaultValue: the attribute default value 427 * @tree: the tree of enumerated value set 428 * 429 * An attribute definition has been parsed. 430 */ 431typedef void (*attributeDeclSAXFunc)(void *ctx, 432 const xmlChar *elem, 433 const xmlChar *fullname, 434 int type, 435 int def, 436 const xmlChar *defaultValue, 437 xmlEnumerationPtr tree); 438/** 439 * elementDeclSAXFunc: 440 * @ctx: the user data (XML parser context) 441 * @name: the element name 442 * @type: the element type 443 * @content: the element value tree 444 * 445 * An element definition has been parsed. 446 */ 447typedef void (*elementDeclSAXFunc)(void *ctx, 448 const xmlChar *name, 449 int type, 450 xmlElementContentPtr content); 451/** 452 * unparsedEntityDeclSAXFunc: 453 * @ctx: the user data (XML parser context) 454 * @name: The name of the entity 455 * @publicId: The public ID of the entity 456 * @systemId: The system ID of the entity 457 * @notationName: the name of the notation 458 * 459 * What to do when an unparsed entity declaration is parsed. 460 */ 461typedef void (*unparsedEntityDeclSAXFunc)(void *ctx, 462 const xmlChar *name, 463 const xmlChar *publicId, 464 const xmlChar *systemId, 465 const xmlChar *notationName); 466/** 467 * setDocumentLocatorSAXFunc: 468 * @ctx: the user data (XML parser context) 469 * @loc: A SAX Locator 470 * 471 * Receive the document locator at startup, actually xmlDefaultSAXLocator. 472 * Everything is available on the context, so this is useless in our case. 473 */ 474typedef void (*setDocumentLocatorSAXFunc) (void *ctx, 475 xmlSAXLocatorPtr loc); 476/** 477 * startDocumentSAXFunc: 478 * @ctx: the user data (XML parser context) 479 * 480 * Called when the document start being processed. 481 */ 482typedef void (*startDocumentSAXFunc) (void *ctx); 483/** 484 * endDocumentSAXFunc: 485 * @ctx: the user data (XML parser context) 486 * 487 * Called when the document end has been detected. 488 */ 489typedef void (*endDocumentSAXFunc) (void *ctx); 490/** 491 * startElementSAXFunc: 492 * @ctx: the user data (XML parser context) 493 * @name: The element name, including namespace prefix 494 * @atts: An array of name/value attributes pairs, NULL terminated 495 * 496 * Called when an opening tag has been processed. 497 */ 498typedef void (*startElementSAXFunc) (void *ctx, 499 const xmlChar *name, 500 const xmlChar **atts); 501/** 502 * endElementSAXFunc: 503 * @ctx: the user data (XML parser context) 504 * @name: The element name 505 * 506 * Called when the end of an element has been detected. 507 */ 508typedef void (*endElementSAXFunc) (void *ctx, 509 const xmlChar *name); 510/** 511 * attributeSAXFunc: 512 * @ctx: the user data (XML parser context) 513 * @name: The attribute name, including namespace prefix 514 * @value: The attribute value 515 * 516 * Handle an attribute that has been read by the parser. 517 * The default handling is to convert the attribute into an 518 * DOM subtree and past it in a new xmlAttr element added to 519 * the element. 520 */ 521typedef void (*attributeSAXFunc) (void *ctx, 522 const xmlChar *name, 523 const xmlChar *value); 524/** 525 * referenceSAXFunc: 526 * @ctx: the user data (XML parser context) 527 * @name: The entity name 528 * 529 * Called when an entity reference is detected. 530 */ 531typedef void (*referenceSAXFunc) (void *ctx, 532 const xmlChar *name); 533/** 534 * charactersSAXFunc: 535 * @ctx: the user data (XML parser context) 536 * @ch: a xmlChar string 537 * @len: the number of xmlChar 538 * 539 * Receiving some chars from the parser. 540 */ 541typedef void (*charactersSAXFunc) (void *ctx, 542 const xmlChar *ch, 543 int len); 544/** 545 * ignorableWhitespaceSAXFunc: 546 * @ctx: the user data (XML parser context) 547 * @ch: a xmlChar string 548 * @len: the number of xmlChar 549 * 550 * Receiving some ignorable whitespaces from the parser. 551 * UNUSED: by default the DOM building will use characters. 552 */ 553typedef void (*ignorableWhitespaceSAXFunc) (void *ctx, 554 const xmlChar *ch, 555 int len); 556/** 557 * processingInstructionSAXFunc: 558 * @ctx: the user data (XML parser context) 559 * @target: the target name 560 * @data: the PI data's 561 * 562 * A processing instruction has been parsed. 563 */ 564typedef void (*processingInstructionSAXFunc) (void *ctx, 565 const xmlChar *target, 566 const xmlChar *data); 567/** 568 * commentSAXFunc: 569 * @ctx: the user data (XML parser context) 570 * @value: the comment content 571 * 572 * A comment has been parsed. 573 */ 574typedef void (*commentSAXFunc) (void *ctx, 575 const xmlChar *value); 576/** 577 * cdataBlockSAXFunc: 578 * @ctx: the user data (XML parser context) 579 * @value: The pcdata content 580 * @len: the block length 581 * 582 * Called when a pcdata block has been parsed. 583 */ 584typedef void (*cdataBlockSAXFunc) ( 585 void *ctx, 586 const xmlChar *value, 587 int len); 588/** 589 * warningSAXFunc: 590 * @ctx: an XML parser context 591 * @msg: the message to display/transmit 592 * @...: extra parameters for the message display 593 * 594 * Display and format a warning messages, callback. 595 */ 596typedef void (XMLCDECL *warningSAXFunc) (void *ctx, 597 const char *msg, ...); 598/** 599 * errorSAXFunc: 600 * @ctx: an XML parser context 601 * @msg: the message to display/transmit 602 * @...: extra parameters for the message display 603 * 604 * Display and format an error messages, callback. 605 */ 606typedef void (XMLCDECL *errorSAXFunc) (void *ctx, 607 const char *msg, ...); 608/** 609 * fatalErrorSAXFunc: 610 * @ctx: an XML parser context 611 * @msg: the message to display/transmit 612 * @...: extra parameters for the message display 613 * 614 * Display and format fatal error messages, callback. 615 * Note: so far fatalError() SAX callbacks are not used, error() 616 * get all the callbacks for errors. 617 */ 618typedef void (XMLCDECL *fatalErrorSAXFunc) (void *ctx, 619 const char *msg, ...); 620/** 621 * isStandaloneSAXFunc: 622 * @ctx: the user data (XML parser context) 623 * 624 * Is this document tagged standalone? 625 * 626 * Returns 1 if true 627 */ 628typedef int (*isStandaloneSAXFunc) (void *ctx); 629/** 630 * hasInternalSubsetSAXFunc: 631 * @ctx: the user data (XML parser context) 632 * 633 * Does this document has an internal subset. 634 * 635 * Returns 1 if true 636 */ 637typedef int (*hasInternalSubsetSAXFunc) (void *ctx); 638 639/** 640 * hasExternalSubsetSAXFunc: 641 * @ctx: the user data (XML parser context) 642 * 643 * Does this document has an external subset? 644 * 645 * Returns 1 if true 646 */ 647typedef int (*hasExternalSubsetSAXFunc) (void *ctx); 648 649/************************************************************************ 650 * * 651 * The SAX version 2 API extensions * 652 * * 653 ************************************************************************/ 654/** 655 * XML_SAX2_MAGIC: 656 * 657 * Special constant found in SAX2 blocks initialized fields 658 */ 659#define XML_SAX2_MAGIC 0xDEEDBEAF 660 661/** 662 * startElementNsSAX2Func: 663 * @ctx: the user data (XML parser context) 664 * @localname: the local name of the element 665 * @prefix: the element namespace prefix if available 666 * @URI: the element namespace name if available 667 * @nb_namespaces: number of namespace definitions on that node 668 * @namespaces: pointer to the array of prefix/URI pairs namespace definitions 669 * @nb_attributes: the number of attributes on that node 670 * @nb_defaulted: the number of defaulted attributes. The defaulted 671 * ones are at the end of the array 672 * @attributes: pointer to the array of (localname/prefix/URI/value/end) 673 * attribute values. 674 * 675 * SAX2 callback when an element start has been detected by the parser. 676 * It provides the namespace informations for the element, as well as 677 * the new namespace declarations on the element. 678 */ 679 680typedef void (*startElementNsSAX2Func) (void *ctx, 681 const xmlChar *localname, 682 const xmlChar *prefix, 683 const xmlChar *URI, 684 int nb_namespaces, 685 const xmlChar **namespaces, 686 int nb_attributes, 687 int nb_defaulted, 688 const xmlChar **attributes); 689 690/** 691 * endElementNsSAX2Func: 692 * @ctx: the user data (XML parser context) 693 * @localname: the local name of the element 694 * @prefix: the element namespace prefix if available 695 * @URI: the element namespace name if available 696 * 697 * SAX2 callback when an element end has been detected by the parser. 698 * It provides the namespace informations for the element. 699 */ 700 701typedef void (*endElementNsSAX2Func) (void *ctx, 702 const xmlChar *localname, 703 const xmlChar *prefix, 704 const xmlChar *URI); 705 706 707struct _xmlSAXHandler { 708 internalSubsetSAXFunc internalSubset; 709 isStandaloneSAXFunc isStandalone; 710 hasInternalSubsetSAXFunc hasInternalSubset; 711 hasExternalSubsetSAXFunc hasExternalSubset; 712 resolveEntitySAXFunc resolveEntity; 713 getEntitySAXFunc getEntity; 714 entityDeclSAXFunc entityDecl; 715 notationDeclSAXFunc notationDecl; 716 attributeDeclSAXFunc attributeDecl; 717 elementDeclSAXFunc elementDecl; 718 unparsedEntityDeclSAXFunc unparsedEntityDecl; 719 setDocumentLocatorSAXFunc setDocumentLocator; 720 startDocumentSAXFunc startDocument; 721 endDocumentSAXFunc endDocument; 722 startElementSAXFunc startElement; 723 endElementSAXFunc endElement; 724 referenceSAXFunc reference; 725 charactersSAXFunc characters; 726 ignorableWhitespaceSAXFunc ignorableWhitespace; 727 processingInstructionSAXFunc processingInstruction; 728 commentSAXFunc comment; 729 warningSAXFunc warning; 730 errorSAXFunc error; 731 fatalErrorSAXFunc fatalError; /* unused error() get all the errors */ 732 getParameterEntitySAXFunc getParameterEntity; 733 cdataBlockSAXFunc cdataBlock; 734 externalSubsetSAXFunc externalSubset; 735 unsigned int initialized; 736 /* The following fields are extensions available only on version 2 */ 737 void *_private; 738 startElementNsSAX2Func startElementNs; 739 endElementNsSAX2Func endElementNs; 740 xmlStructuredErrorFunc serror; 741}; 742 743/* 744 * SAX Version 1 745 */ 746typedef struct _xmlSAXHandlerV1 xmlSAXHandlerV1; 747typedef xmlSAXHandlerV1 *xmlSAXHandlerV1Ptr; 748struct _xmlSAXHandlerV1 { 749 internalSubsetSAXFunc internalSubset; 750 isStandaloneSAXFunc isStandalone; 751 hasInternalSubsetSAXFunc hasInternalSubset; 752 hasExternalSubsetSAXFunc hasExternalSubset; 753 resolveEntitySAXFunc resolveEntity; 754 getEntitySAXFunc getEntity; 755 entityDeclSAXFunc entityDecl; 756 notationDeclSAXFunc notationDecl; 757 attributeDeclSAXFunc attributeDecl; 758 elementDeclSAXFunc elementDecl; 759 unparsedEntityDeclSAXFunc unparsedEntityDecl; 760 setDocumentLocatorSAXFunc setDocumentLocator; 761 startDocumentSAXFunc startDocument; 762 endDocumentSAXFunc endDocument; 763 startElementSAXFunc startElement; 764 endElementSAXFunc endElement; 765 referenceSAXFunc reference; 766 charactersSAXFunc characters; 767 ignorableWhitespaceSAXFunc ignorableWhitespace; 768 processingInstructionSAXFunc processingInstruction; 769 commentSAXFunc comment; 770 warningSAXFunc warning; 771 errorSAXFunc error; 772 fatalErrorSAXFunc fatalError; /* unused error() get all the errors */ 773 getParameterEntitySAXFunc getParameterEntity; 774 cdataBlockSAXFunc cdataBlock; 775 externalSubsetSAXFunc externalSubset; 776 unsigned int initialized; 777}; 778 779 780/** 781 * xmlExternalEntityLoader: 782 * @URL: The System ID of the resource requested 783 * @ID: The Public ID of the resource requested 784 * @context: the XML parser context 785 * 786 * External entity loaders types. 787 * 788 * Returns the entity input parser. 789 */ 790typedef xmlParserInputPtr (*xmlExternalEntityLoader) (const char *URL, 791 const char *ID, 792 xmlParserCtxtPtr context); 793 794#ifdef __cplusplus 795} 796#endif 797 798#include <libxml/encoding.h> 799#include <libxml/xmlIO.h> 800#include <libxml/globals.h> 801 802#ifdef __cplusplus 803extern "C" { 804#endif 805 806 807/* 808 * Init/Cleanup 809 */ 810XMLPUBFUN void XMLCALL 811 xmlInitParser (void); 812XMLPUBFUN void XMLCALL 813 xmlCleanupParser (void); 814 815/* 816 * Input functions 817 */ 818XMLPUBFUN int XMLCALL 819 xmlParserInputRead (xmlParserInputPtr in, 820 int len); 821XMLPUBFUN int XMLCALL 822 xmlParserInputGrow (xmlParserInputPtr in, 823 int len); 824 825/* 826 * Basic parsing Interfaces 827 */ 828#ifdef LIBXML_SAX1_ENABLED 829XMLPUBFUN xmlDocPtr XMLCALL 830 xmlParseDoc (const xmlChar *cur); 831XMLPUBFUN xmlDocPtr XMLCALL 832 xmlParseFile (const char *filename); 833XMLPUBFUN xmlDocPtr XMLCALL 834 xmlParseMemory (const char *buffer, 835 int size); 836#endif /* LIBXML_SAX1_ENABLED */ 837XMLPUBFUN int XMLCALL 838 xmlSubstituteEntitiesDefault(int val); 839XMLPUBFUN int XMLCALL 840 xmlKeepBlanksDefault (int val); 841XMLPUBFUN void XMLCALL 842 xmlStopParser (xmlParserCtxtPtr ctxt); 843XMLPUBFUN int XMLCALL 844 xmlPedanticParserDefault(int val); 845XMLPUBFUN int XMLCALL 846 xmlLineNumbersDefault (int val); 847 848#ifdef LIBXML_SAX1_ENABLED 849/* 850 * Recovery mode 851 */ 852XMLPUBFUN xmlDocPtr XMLCALL 853 xmlRecoverDoc (xmlChar *cur); 854XMLPUBFUN xmlDocPtr XMLCALL 855 xmlRecoverMemory (const char *buffer, 856 int size); 857XMLPUBFUN xmlDocPtr XMLCALL 858 xmlRecoverFile (const char *filename); 859#endif /* LIBXML_SAX1_ENABLED */ 860 861/* 862 * Less common routines and SAX interfaces 863 */ 864XMLPUBFUN int XMLCALL 865 xmlParseDocument (xmlParserCtxtPtr ctxt); 866XMLPUBFUN int XMLCALL 867 xmlParseExtParsedEnt (xmlParserCtxtPtr ctxt); 868#ifdef LIBXML_SAX1_ENABLED 869XMLPUBFUN int XMLCALL 870 xmlSAXUserParseFile (xmlSAXHandlerPtr sax, 871 void *user_data, 872 const char *filename); 873XMLPUBFUN int XMLCALL 874 xmlSAXUserParseMemory (xmlSAXHandlerPtr sax, 875 void *user_data, 876 const char *buffer, 877 int size); 878XMLPUBFUN xmlDocPtr XMLCALL 879 xmlSAXParseDoc (xmlSAXHandlerPtr sax, 880 const xmlChar *cur, 881 int recovery); 882XMLPUBFUN xmlDocPtr XMLCALL 883 xmlSAXParseMemory (xmlSAXHandlerPtr sax, 884 const char *buffer, 885 int size, 886 int recovery); 887XMLPUBFUN xmlDocPtr XMLCALL 888 xmlSAXParseMemoryWithData (xmlSAXHandlerPtr sax, 889 const char *buffer, 890 int size, 891 int recovery, 892 void *data); 893XMLPUBFUN xmlDocPtr XMLCALL 894 xmlSAXParseFile (xmlSAXHandlerPtr sax, 895 const char *filename, 896 int recovery); 897XMLPUBFUN xmlDocPtr XMLCALL 898 xmlSAXParseFileWithData (xmlSAXHandlerPtr sax, 899 const char *filename, 900 int recovery, 901 void *data); 902XMLPUBFUN xmlDocPtr XMLCALL 903 xmlSAXParseEntity (xmlSAXHandlerPtr sax, 904 const char *filename); 905XMLPUBFUN xmlDocPtr XMLCALL 906 xmlParseEntity (const char *filename); 907#endif /* LIBXML_SAX1_ENABLED */ 908 909#ifdef LIBXML_VALID_ENABLED 910XMLPUBFUN xmlDtdPtr XMLCALL 911 xmlSAXParseDTD (xmlSAXHandlerPtr sax, 912 const xmlChar *ExternalID, 913 const xmlChar *SystemID); 914XMLPUBFUN xmlDtdPtr XMLCALL 915 xmlParseDTD (const xmlChar *ExternalID, 916 const xmlChar *SystemID); 917XMLPUBFUN xmlDtdPtr XMLCALL 918 xmlIOParseDTD (xmlSAXHandlerPtr sax, 919 xmlParserInputBufferPtr input, 920 xmlCharEncoding enc); 921#endif /* LIBXML_VALID_ENABLE */ 922#ifdef LIBXML_SAX1_ENABLED 923XMLPUBFUN int XMLCALL 924 xmlParseBalancedChunkMemory(xmlDocPtr doc, 925 xmlSAXHandlerPtr sax, 926 void *user_data, 927 int depth, 928 const xmlChar *string, 929 xmlNodePtr *lst); 930#endif /* LIBXML_SAX1_ENABLED */ 931XMLPUBFUN xmlParserErrors XMLCALL 932 xmlParseInNodeContext (xmlNodePtr node, 933 const char *data, 934 int datalen, 935 int options, 936 xmlNodePtr *lst); 937#ifdef LIBXML_SAX1_ENABLED 938XMLPUBFUN int XMLCALL 939 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, 940 xmlSAXHandlerPtr sax, 941 void *user_data, 942 int depth, 943 const xmlChar *string, 944 xmlNodePtr *lst, 945 int recover); 946XMLPUBFUN int XMLCALL 947 xmlParseExternalEntity (xmlDocPtr doc, 948 xmlSAXHandlerPtr sax, 949 void *user_data, 950 int depth, 951 const xmlChar *URL, 952 const xmlChar *ID, 953 xmlNodePtr *lst); 954#endif /* LIBXML_SAX1_ENABLED */ 955XMLPUBFUN int XMLCALL 956 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, 957 const xmlChar *URL, 958 const xmlChar *ID, 959 xmlNodePtr *lst); 960 961/* 962 * Parser contexts handling. 963 */ 964XMLPUBFUN xmlParserCtxtPtr XMLCALL 965 xmlNewParserCtxt (void); 966XMLPUBFUN int XMLCALL 967 xmlInitParserCtxt (xmlParserCtxtPtr ctxt); 968XMLPUBFUN void XMLCALL 969 xmlClearParserCtxt (xmlParserCtxtPtr ctxt); 970XMLPUBFUN void XMLCALL 971 xmlFreeParserCtxt (xmlParserCtxtPtr ctxt); 972#ifdef LIBXML_SAX1_ENABLED 973XMLPUBFUN void XMLCALL 974 xmlSetupParserForBuffer (xmlParserCtxtPtr ctxt, 975 const xmlChar* buffer, 976 const char *filename); 977#endif /* LIBXML_SAX1_ENABLED */ 978XMLPUBFUN xmlParserCtxtPtr XMLCALL 979 xmlCreateDocParserCtxt (const xmlChar *cur); 980 981#ifdef LIBXML_LEGACY_ENABLED 982/* 983 * Reading/setting optional parsing features. 984 */ 985XMLPUBFUN int XMLCALL 986 xmlGetFeaturesList (int *len, 987 const char **result); 988XMLPUBFUN int XMLCALL 989 xmlGetFeature (xmlParserCtxtPtr ctxt, 990 const char *name, 991 void *result); 992XMLPUBFUN int XMLCALL 993 xmlSetFeature (xmlParserCtxtPtr ctxt, 994 const char *name, 995 void *value); 996#endif /* LIBXML_LEGACY_ENABLED */ 997 998#ifdef LIBXML_PUSH_ENABLED 999/* 1000 * Interfaces for the Push mode. 1001 */ 1002XMLPUBFUN xmlParserCtxtPtr XMLCALL 1003 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, 1004 void *user_data, 1005 const char *chunk, 1006 int size, 1007 const char *filename); 1008XMLPUBFUN int XMLCALL 1009 xmlParseChunk (xmlParserCtxtPtr ctxt, 1010 const char *chunk, 1011 int size, 1012 int terminate); 1013#endif /* LIBXML_PUSH_ENABLED */ 1014 1015/* 1016 * Special I/O mode. 1017 */ 1018 1019XMLPUBFUN xmlParserCtxtPtr XMLCALL 1020 xmlCreateIOParserCtxt (xmlSAXHandlerPtr sax, 1021 void *user_data, 1022 xmlInputReadCallback ioread, 1023 xmlInputCloseCallback ioclose, 1024 void *ioctx, 1025 xmlCharEncoding enc); 1026 1027XMLPUBFUN xmlParserInputPtr XMLCALL 1028 xmlNewIOInputStream (xmlParserCtxtPtr ctxt, 1029 xmlParserInputBufferPtr input, 1030 xmlCharEncoding enc); 1031 1032/* 1033 * Node infos. 1034 */ 1035XMLPUBFUN const xmlParserNodeInfo* XMLCALL 1036 xmlParserFindNodeInfo (const xmlParserCtxtPtr ctxt, 1037 const xmlNodePtr node); 1038XMLPUBFUN void XMLCALL 1039 xmlInitNodeInfoSeq (xmlParserNodeInfoSeqPtr seq); 1040XMLPUBFUN void XMLCALL 1041 xmlClearNodeInfoSeq (xmlParserNodeInfoSeqPtr seq); 1042XMLPUBFUN unsigned long XMLCALL 1043 xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq, 1044 const xmlNodePtr node); 1045XMLPUBFUN void XMLCALL 1046 xmlParserAddNodeInfo (xmlParserCtxtPtr ctxt, 1047 const xmlParserNodeInfoPtr info); 1048 1049/* 1050 * External entities handling actually implemented in xmlIO. 1051 */ 1052 1053XMLPUBFUN void XMLCALL 1054 xmlSetExternalEntityLoader(xmlExternalEntityLoader f); 1055XMLPUBFUN xmlExternalEntityLoader XMLCALL 1056 xmlGetExternalEntityLoader(void); 1057XMLPUBFUN xmlParserInputPtr XMLCALL 1058 xmlLoadExternalEntity (const char *URL, 1059 const char *ID, 1060 xmlParserCtxtPtr ctxt); 1061 1062/* 1063 * Index lookup, actually implemented in the encoding module 1064 */ 1065XMLPUBFUN long XMLCALL 1066 xmlByteConsumed (xmlParserCtxtPtr ctxt); 1067 1068/* 1069 * New set of simpler/more flexible APIs 1070 */ 1071/** 1072 * xmlParserOption: 1073 * 1074 * This is the set of XML parser options that can be passed down 1075 * to the xmlReadDoc() and similar calls. 1076 */ 1077typedef enum { 1078 XML_PARSE_RECOVER = 1<<0, /* recover on errors */ 1079 XML_PARSE_NOENT = 1<<1, /* substitute entities */ 1080 XML_PARSE_DTDLOAD = 1<<2, /* load the external subset */ 1081 XML_PARSE_DTDATTR = 1<<3, /* default DTD attributes */ 1082 XML_PARSE_DTDVALID = 1<<4, /* validate with the DTD */ 1083 XML_PARSE_NOERROR = 1<<5, /* suppress error reports */ 1084 XML_PARSE_NOWARNING = 1<<6, /* suppress warning reports */ 1085 XML_PARSE_PEDANTIC = 1<<7, /* pedantic error reporting */ 1086 XML_PARSE_NOBLANKS = 1<<8, /* remove blank nodes */ 1087 XML_PARSE_SAX1 = 1<<9, /* use the SAX1 interface internally */ 1088 XML_PARSE_XINCLUDE = 1<<10,/* Implement XInclude substitition */ 1089 XML_PARSE_NONET = 1<<11,/* Forbid network access */ 1090 XML_PARSE_NODICT = 1<<12,/* Do not reuse the context dictionnary */ 1091 XML_PARSE_NSCLEAN = 1<<13,/* remove redundant namespaces declarations */ 1092 XML_PARSE_NOCDATA = 1<<14,/* merge CDATA as text nodes */ 1093 XML_PARSE_NOXINCNODE= 1<<15,/* do not generate XINCLUDE START/END nodes */ 1094 XML_PARSE_COMPACT = 1<<16,/* compact small text nodes; no modification of 1095 the tree allowed afterwards (will possibly 1096 crash if you try to modify the tree) */ 1097 XML_PARSE_OLD10 = 1<<17,/* parse using XML-1.0 before update 5 */ 1098 XML_PARSE_NOBASEFIX = 1<<18,/* do not fixup XINCLUDE xml:base uris */ 1099 XML_PARSE_HUGE = 1<<19 /* relax any hardcoded limit from the parser */ 1100} xmlParserOption; 1101 1102XMLPUBFUN void XMLCALL 1103 xmlCtxtReset (xmlParserCtxtPtr ctxt); 1104XMLPUBFUN int XMLCALL 1105 xmlCtxtResetPush (xmlParserCtxtPtr ctxt, 1106 const char *chunk, 1107 int size, 1108 const char *filename, 1109 const char *encoding); 1110XMLPUBFUN int XMLCALL 1111 xmlCtxtUseOptions (xmlParserCtxtPtr ctxt, 1112 int options); 1113XMLPUBFUN xmlDocPtr XMLCALL 1114 xmlReadDoc (const xmlChar *cur, 1115 const char *URL, 1116 const char *encoding, 1117 int options); 1118XMLPUBFUN xmlDocPtr XMLCALL 1119 xmlReadFile (const char *URL, 1120 const char *encoding, 1121 int options); 1122XMLPUBFUN xmlDocPtr XMLCALL 1123 xmlReadMemory (const char *buffer, 1124 int size, 1125 const char *URL, 1126 const char *encoding, 1127 int options); 1128XMLPUBFUN xmlDocPtr XMLCALL 1129 xmlReadFd (int fd, 1130 const char *URL, 1131 const char *encoding, 1132 int options); 1133XMLPUBFUN xmlDocPtr XMLCALL 1134 xmlReadIO (xmlInputReadCallback ioread, 1135 xmlInputCloseCallback ioclose, 1136 void *ioctx, 1137 const char *URL, 1138 const char *encoding, 1139 int options); 1140XMLPUBFUN xmlDocPtr XMLCALL 1141 xmlCtxtReadDoc (xmlParserCtxtPtr ctxt, 1142 const xmlChar *cur, 1143 const char *URL, 1144 const char *encoding, 1145 int options); 1146XMLPUBFUN xmlDocPtr XMLCALL 1147 xmlCtxtReadFile (xmlParserCtxtPtr ctxt, 1148 const char *filename, 1149 const char *encoding, 1150 int options); 1151XMLPUBFUN xmlDocPtr XMLCALL 1152 xmlCtxtReadMemory (xmlParserCtxtPtr ctxt, 1153 const char *buffer, 1154 int size, 1155 const char *URL, 1156 const char *encoding, 1157 int options); 1158XMLPUBFUN xmlDocPtr XMLCALL 1159 xmlCtxtReadFd (xmlParserCtxtPtr ctxt, 1160 int fd, 1161 const char *URL, 1162 const char *encoding, 1163 int options); 1164XMLPUBFUN xmlDocPtr XMLCALL 1165 xmlCtxtReadIO (xmlParserCtxtPtr ctxt, 1166 xmlInputReadCallback ioread, 1167 xmlInputCloseCallback ioclose, 1168 void *ioctx, 1169 const char *URL, 1170 const char *encoding, 1171 int options); 1172 1173/* 1174 * Library wide options 1175 */ 1176/** 1177 * xmlFeature: 1178 * 1179 * Used to examine the existance of features that can be enabled 1180 * or disabled at compile-time. 1181 * They used to be called XML_FEATURE_xxx but this clashed with Expat 1182 */ 1183typedef enum { 1184 XML_WITH_THREAD = 1, 1185 XML_WITH_TREE = 2, 1186 XML_WITH_OUTPUT = 3, 1187 XML_WITH_PUSH = 4, 1188 XML_WITH_READER = 5, 1189 XML_WITH_PATTERN = 6, 1190 XML_WITH_WRITER = 7, 1191 XML_WITH_SAX1 = 8, 1192 XML_WITH_FTP = 9, 1193 XML_WITH_HTTP = 10, 1194 XML_WITH_VALID = 11, 1195 XML_WITH_HTML = 12, 1196 XML_WITH_LEGACY = 13, 1197 XML_WITH_C14N = 14, 1198 XML_WITH_CATALOG = 15, 1199 XML_WITH_XPATH = 16, 1200 XML_WITH_XPTR = 17, 1201 XML_WITH_XINCLUDE = 18, 1202 XML_WITH_ICONV = 19, 1203 XML_WITH_ISO8859X = 20, 1204 XML_WITH_UNICODE = 21, 1205 XML_WITH_REGEXP = 22, 1206 XML_WITH_AUTOMATA = 23, 1207 XML_WITH_EXPR = 24, 1208 XML_WITH_SCHEMAS = 25, 1209 XML_WITH_SCHEMATRON = 26, 1210 XML_WITH_MODULES = 27, 1211 XML_WITH_DEBUG = 28, 1212 XML_WITH_DEBUG_MEM = 29, 1213 XML_WITH_DEBUG_RUN = 30, 1214 XML_WITH_ZLIB = 31, 1215 XML_WITH_NONE = 99999 /* just to be sure of allocation size */ 1216} xmlFeature; 1217 1218XMLPUBFUN int XMLCALL 1219 xmlHasFeature (xmlFeature feature); 1220 1221#ifdef __cplusplus 1222} 1223#endif 1224#endif /* __XML_PARSER_H__ */ 1225 1226