1/* 2The contents of this file are subject to the Mozilla Public License 3Version 1.1 (the "License"); you may not use this file except in 4compliance with the License. You may obtain a copy of the License at 5http://www.mozilla.org/MPL/ 6 7Software distributed under the License is distributed on an "AS IS" 8basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the 9License for the specific language governing rights and limitations 10under the License. 11 12The Original Code is expat. 13 14The Initial Developer of the Original Code is James Clark. 15Portions created by James Clark are Copyright (C) 1998, 1999 16James Clark. All Rights Reserved. 17 18Contributor(s): 19 20Alternatively, the contents of this file may be used under the terms 21of the GNU General Public License (the "GPL"), in which case the 22provisions of the GPL are applicable instead of those above. If you 23wish to allow use of your version of this file only under the terms of 24the GPL and not to allow others to use your version of this file under 25the MPL, indicate your decision by deleting the provisions above and 26replace them with the notice and other provisions required by the 27GPL. If you do not delete the provisions above, a recipient may use 28your version of this file under either the MPL or the GPL. 29*/ 30 31#ifndef XmlParse_INCLUDED 32#define XmlParse_INCLUDED 1 33 34#ifdef __cplusplus 35extern "C" { 36#endif 37 38#ifndef XMLPARSEAPI 39#define XMLPARSEAPI /* as nothing */ 40#endif 41 42typedef void *XML_Parser; 43 44#ifdef XML_UNICODE_WCHAR_T 45 46/* XML_UNICODE_WCHAR_T will work only if sizeof(wchar_t) == 2 and wchar_t 47uses Unicode. */ 48/* Information is UTF-16 encoded as wchar_ts */ 49 50#ifndef XML_UNICODE 51#define XML_UNICODE 52#endif 53 54#include <stddef.h> 55typedef wchar_t XML_Char; 56typedef wchar_t XML_LChar; 57 58#else /* not XML_UNICODE_WCHAR_T */ 59 60#ifdef XML_UNICODE 61 62/* Information is UTF-16 encoded as unsigned shorts */ 63typedef unsigned short XML_Char; 64typedef char XML_LChar; 65 66#else /* not XML_UNICODE */ 67 68/* Information is UTF-8 encoded. */ 69typedef char XML_Char; 70typedef char XML_LChar; 71 72#endif /* not XML_UNICODE */ 73 74#endif /* not XML_UNICODE_WCHAR_T */ 75 76 77/* Constructs a new parser; encoding is the encoding specified by the external 78protocol or null if there is none specified. */ 79 80XML_Parser XMLPARSEAPI 81XML_ParserCreate(const XML_Char *encoding); 82 83/* Constructs a new parser and namespace processor. Element type names 84and attribute names that belong to a namespace will be expanded; 85unprefixed attribute names are never expanded; unprefixed element type 86names are expanded only if there is a default namespace. The expanded 87name is the concatenation of the namespace URI, the namespace separator character, 88and the local part of the name. If the namespace separator is '\0' then 89the namespace URI and the local part will be concatenated without any 90separator. When a namespace is not declared, the name and prefix will be 91passed through without expansion. */ 92 93XML_Parser XMLPARSEAPI 94XML_ParserCreateNS(const XML_Char *encoding, XML_Char namespaceSeparator); 95 96 97/* atts is array of name/value pairs, terminated by 0; 98 names and values are 0 terminated. */ 99 100typedef void (*XML_StartElementHandler)(void *userData, 101 const XML_Char *name, 102 const XML_Char **atts); 103 104typedef void (*XML_EndElementHandler)(void *userData, 105 const XML_Char *name); 106 107/* s is not 0 terminated. */ 108typedef void (*XML_CharacterDataHandler)(void *userData, 109 const XML_Char *s, 110 int len); 111 112/* target and data are 0 terminated */ 113typedef void (*XML_ProcessingInstructionHandler)(void *userData, 114 const XML_Char *target, 115 const XML_Char *data); 116 117/* data is 0 terminated */ 118typedef void (*XML_CommentHandler)(void *userData, const XML_Char *data); 119 120typedef void (*XML_StartCdataSectionHandler)(void *userData); 121typedef void (*XML_EndCdataSectionHandler)(void *userData); 122 123/* This is called for any characters in the XML document for 124which there is no applicable handler. This includes both 125characters that are part of markup which is of a kind that is 126not reported (comments, markup declarations), or characters 127that are part of a construct which could be reported but 128for which no handler has been supplied. The characters are passed 129exactly as they were in the XML document except that 130they will be encoded in UTF-8. Line boundaries are not normalized. 131Note that a byte order mark character is not passed to the default handler. 132There are no guarantees about how characters are divided between calls 133to the default handler: for example, a comment might be split between 134multiple calls. */ 135 136typedef void (*XML_DefaultHandler)(void *userData, 137 const XML_Char *s, 138 int len); 139 140/* This is called for the start of the DOCTYPE declaration when the 141name of the DOCTYPE is encountered. */ 142typedef void (*XML_StartDoctypeDeclHandler)(void *userData, 143 const XML_Char *doctypeName); 144 145/* This is called for the start of the DOCTYPE declaration when the 146closing > is encountered, but after processing any external subset. */ 147typedef void (*XML_EndDoctypeDeclHandler)(void *userData); 148 149/* This is called for a declaration of an unparsed (NDATA) 150entity. The base argument is whatever was set by XML_SetBase. 151The entityName, systemId and notationName arguments will never be null. 152The other arguments may be. */ 153 154typedef void (*XML_UnparsedEntityDeclHandler)(void *userData, 155 const XML_Char *entityName, 156 const XML_Char *base, 157 const XML_Char *systemId, 158 const XML_Char *publicId, 159 const XML_Char *notationName); 160 161/* This is called for a declaration of notation. 162The base argument is whatever was set by XML_SetBase. 163The notationName will never be null. The other arguments can be. */ 164 165typedef void (*XML_NotationDeclHandler)(void *userData, 166 const XML_Char *notationName, 167 const XML_Char *base, 168 const XML_Char *systemId, 169 const XML_Char *publicId); 170 171/* When namespace processing is enabled, these are called once for 172each namespace declaration. The call to the start and end element 173handlers occur between the calls to the start and end namespace 174declaration handlers. For an xmlns attribute, prefix will be null. 175For an xmlns="" attribute, uri will be null. */ 176 177typedef void (*XML_StartNamespaceDeclHandler)(void *userData, 178 const XML_Char *prefix, 179 const XML_Char *uri); 180 181typedef void (*XML_EndNamespaceDeclHandler)(void *userData, 182 const XML_Char *prefix); 183 184/* This is called if the document is not standalone (it has an 185external subset or a reference to a parameter entity, but does not 186have standalone="yes"). If this handler returns 0, then processing 187will not continue, and the parser will return a 188XML_ERROR_NOT_STANDALONE error. */ 189 190typedef int (*XML_NotStandaloneHandler)(void *userData); 191 192/* This is called for a reference to an external parsed general entity. 193The referenced entity is not automatically parsed. 194The application can parse it immediately or later using 195XML_ExternalEntityParserCreate. 196The parser argument is the parser parsing the entity containing the reference; 197it can be passed as the parser argument to XML_ExternalEntityParserCreate. 198The systemId argument is the system identifier as specified in the entity declaration; 199it will not be null. 200The base argument is the system identifier that should be used as the base for 201resolving systemId if systemId was relative; this is set by XML_SetBase; 202it may be null. 203The publicId argument is the public identifier as specified in the entity declaration, 204or null if none was specified; the whitespace in the public identifier 205will have been normalized as required by the XML spec. 206The context argument specifies the parsing context in the format 207expected by the context argument to 208XML_ExternalEntityParserCreate; context is valid only until the handler 209returns, so if the referenced entity is to be parsed later, it must be copied. 210The handler should return 0 if processing should not continue because of 211a fatal error in the handling of the external entity. 212In this case the calling parser will return an XML_ERROR_EXTERNAL_ENTITY_HANDLING 213error. 214Note that unlike other handlers the first argument is the parser, not userData. */ 215 216typedef int (*XML_ExternalEntityRefHandler)(XML_Parser parser, 217 const XML_Char *context, 218 const XML_Char *base, 219 const XML_Char *systemId, 220 const XML_Char *publicId); 221 222/* ericm@scriptics.com */ 223 typedef void (*XML_ElementDeclHandler)(void *userData, 224 const XML_Char *name, 225 XML_Char ***contentspec); 226 typedef void (*XML_AttlistDeclHandler)(void *userData, 227 const XML_Char *name, 228 XML_Char ***attributes); 229/* ericm@scriptics.com */ 230 231/* This structure is filled in by the XML_UnknownEncodingHandler 232to provide information to the parser about encodings that are unknown 233to the parser. 234The map[b] member gives information about byte sequences 235whose first byte is b. 236If map[b] is c where c is >= 0, then b by itself encodes the Unicode scalar value c. 237If map[b] is -1, then the byte sequence is malformed. 238If map[b] is -n, where n >= 2, then b is the first byte of an n-byte 239sequence that encodes a single Unicode scalar value. 240The data member will be passed as the first argument to the convert function. 241The convert function is used to convert multibyte sequences; 242s will point to a n-byte sequence where map[(unsigned char)*s] == -n. 243The convert function must return the Unicode scalar value 244represented by this byte sequence or -1 if the byte sequence is malformed. 245The convert function may be null if the encoding is a single-byte encoding, 246that is if map[b] >= -1 for all bytes b. 247When the parser is finished with the encoding, then if release is not null, 248it will call release passing it the data member; 249once release has been called, the convert function will not be called again. 250 251Expat places certain restrictions on the encodings that are supported 252using this mechanism. 253 2541. Every ASCII character that can appear in a well-formed XML document, 255other than the characters 256 257 $@\^`{}~ 258 259must be represented by a single byte, and that byte must be the 260same byte that represents that character in ASCII. 261 2622. No character may require more than 4 bytes to encode. 263 2643. All characters encoded must have Unicode scalar values <= 0xFFFF, 265(ie characters that would be encoded by surrogates in UTF-16 266are not allowed). Note that this restriction doesn't apply to 267the built-in support for UTF-8 and UTF-16. 268 2694. No Unicode character may be encoded by more than one distinct sequence 270of bytes. */ 271 272typedef struct { 273 int map[256]; 274 void *data; 275 int (*convert)(void *data, const char *s); 276 void (*release)(void *data); 277} XML_Encoding; 278 279/* This is called for an encoding that is unknown to the parser. 280The encodingHandlerData argument is that which was passed as the 281second argument to XML_SetUnknownEncodingHandler. 282The name argument gives the name of the encoding as specified in 283the encoding declaration. 284If the callback can provide information about the encoding, 285it must fill in the XML_Encoding structure, and return 1. 286Otherwise it must return 0. 287If info does not describe a suitable encoding, 288then the parser will return an XML_UNKNOWN_ENCODING error. */ 289 290typedef int (*XML_UnknownEncodingHandler)(void *encodingHandlerData, 291 const XML_Char *name, 292 XML_Encoding *info); 293 294void XMLPARSEAPI 295XML_SetElementHandler(XML_Parser parser, 296 XML_StartElementHandler start, 297 XML_EndElementHandler end); 298 299void XMLPARSEAPI 300XML_SetCharacterDataHandler(XML_Parser parser, 301 XML_CharacterDataHandler handler); 302 303void XMLPARSEAPI 304XML_SetProcessingInstructionHandler(XML_Parser parser, 305 XML_ProcessingInstructionHandler handler); 306void XMLPARSEAPI 307XML_SetCommentHandler(XML_Parser parser, 308 XML_CommentHandler handler); 309 310void XMLPARSEAPI 311XML_SetCdataSectionHandler(XML_Parser parser, 312 XML_StartCdataSectionHandler start, 313 XML_EndCdataSectionHandler end); 314 315/* This sets the default handler and also inhibits expansion of internal entities. 316The entity reference will be passed to the default handler. */ 317 318void XMLPARSEAPI 319XML_SetDefaultHandler(XML_Parser parser, 320 XML_DefaultHandler handler); 321 322/* This sets the default handler but does not inhibit expansion of internal entities. 323The entity reference will not be passed to the default handler. */ 324 325void XMLPARSEAPI 326XML_SetDefaultHandlerExpand(XML_Parser parser, 327 XML_DefaultHandler handler); 328 329/* Turns default expansion of internal entities on/off, depending on the value 330 * of expandEntities. ericm@scriptics.com, 1999.6.28 331 */ 332void XMLPARSEAPI 333XML_SetDefaultExpandInternalEntities(XML_Parser parser, int expandEntities); 334 335void XMLPARSEAPI 336XML_SetDoctypeDeclHandler(XML_Parser parser, 337 XML_StartDoctypeDeclHandler start, 338 XML_EndDoctypeDeclHandler end); 339 340void XMLPARSEAPI 341XML_SetUnparsedEntityDeclHandler(XML_Parser parser, 342 XML_UnparsedEntityDeclHandler handler); 343 344void XMLPARSEAPI 345XML_SetNotationDeclHandler(XML_Parser parser, 346 XML_NotationDeclHandler handler); 347 348void XMLPARSEAPI 349XML_SetNamespaceDeclHandler(XML_Parser parser, 350 XML_StartNamespaceDeclHandler start, 351 XML_EndNamespaceDeclHandler end); 352 353void XMLPARSEAPI 354XML_SetNotStandaloneHandler(XML_Parser parser, 355 XML_NotStandaloneHandler handler); 356 357void XMLPARSEAPI 358XML_SetExternalEntityRefHandler(XML_Parser parser, 359 XML_ExternalEntityRefHandler handler); 360 361/* If a non-null value for arg is specified here, then it will be passed 362as the first argument to the external entity ref handler instead 363of the parser object. */ 364void XMLPARSEAPI 365XML_SetExternalEntityRefHandlerArg(XML_Parser, void *arg); 366 367void XMLPARSEAPI 368XML_SetUnknownEncodingHandler(XML_Parser parser, 369 XML_UnknownEncodingHandler handler, 370 void *encodingHandlerData); 371 372/* ericm@scriptics.com */ 373void XMLPARSEAPI 374XML_SetElementDeclHandler(XML_Parser parser, 375 XML_ElementDeclHandler handler); 376void XMLPARSEAPI 377XML_SetAttlistDeclHandler(XML_Parser parser, 378 XML_AttlistDeclHandler handler); 379/* ericm@scriptics.com */ 380 381/* This can be called within a handler for a start element, end element, 382processing instruction or character data. It causes the corresponding 383markup to be passed to the default handler. */ 384void XMLPARSEAPI XML_DefaultCurrent(XML_Parser parser); 385 386/* This value is passed as the userData argument to callbacks. */ 387void XMLPARSEAPI 388XML_SetUserData(XML_Parser parser, void *userData); 389 390/* Returns the last value set by XML_SetUserData or null. */ 391#define XML_GetUserData(parser) (*(void **)(parser)) 392 393/* This is equivalent to supplying an encoding argument 394to XML_CreateParser. It must not be called after XML_Parse 395or XML_ParseBuffer. */ 396 397int XMLPARSEAPI 398XML_SetEncoding(XML_Parser parser, const XML_Char *encoding); 399 400/* If this function is called, then the parser will be passed 401as the first argument to callbacks instead of userData. 402The userData will still be accessible using XML_GetUserData. */ 403 404void XMLPARSEAPI 405XML_UseParserAsHandlerArg(XML_Parser parser); 406 407/* Sets the base to be used for resolving relative URIs in system identifiers in 408declarations. Resolving relative identifiers is left to the application: 409this value will be passed through as the base argument to the 410XML_ExternalEntityRefHandler, XML_NotationDeclHandler 411and XML_UnparsedEntityDeclHandler. The base argument will be copied. 412Returns zero if out of memory, non-zero otherwise. */ 413 414int XMLPARSEAPI 415XML_SetBase(XML_Parser parser, const XML_Char *base); 416 417const XML_Char XMLPARSEAPI * 418XML_GetBase(XML_Parser parser); 419 420/* Returns the number of the attributes passed in last call to the 421XML_StartElementHandler that were specified in the start-tag rather 422than defaulted. */ 423 424int XMLPARSEAPI XML_GetSpecifiedAttributeCount(XML_Parser parser); 425 426/* Parses some input. Returns 0 if a fatal error is detected. 427The last call to XML_Parse must have isFinal true; 428len may be zero for this call (or any other). */ 429int XMLPARSEAPI 430XML_Parse(XML_Parser parser, const char *s, int len, int isFinal); 431 432void XMLPARSEAPI * 433XML_GetBuffer(XML_Parser parser, int len); 434 435int XMLPARSEAPI 436XML_ParseBuffer(XML_Parser parser, int len, int isFinal); 437 438/* Creates an XML_Parser object that can parse an external general entity; 439context is a '\0'-terminated string specifying the parse context; 440encoding is a '\0'-terminated string giving the name of the externally specified encoding, 441or null if there is no externally specified encoding. 442The context string consists of a sequence of tokens separated by formfeeds (\f); 443a token consisting of a name specifies that the general entity of the name 444is open; a token of the form prefix=uri specifies the namespace for a particular 445prefix; a token of the form =uri specifies the default namespace. 446This can be called at any point after the first call to an ExternalEntityRefHandler 447so longer as the parser has not yet been freed. 448The new parser is completely independent and may safely be used in a separate thread. 449The handlers and userData are initialized from the parser argument. 450Returns 0 if out of memory. Otherwise returns a new XML_Parser object. */ 451XML_Parser XMLPARSEAPI 452XML_ExternalEntityParserCreate(XML_Parser parser, 453 const XML_Char *context, 454 const XML_Char *encoding); 455 456enum XML_ParamEntityParsing { 457 XML_PARAM_ENTITY_PARSING_NEVER, 458 XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE, 459 XML_PARAM_ENTITY_PARSING_ALWAYS 460}; 461 462/* Controls parsing of parameter entities (including the external DTD 463subset). If parsing of parameter entities is enabled, then references 464to external parameter entities (including the external DTD subset) 465will be passed to the handler set with 466XML_SetExternalEntityRefHandler. The context passed will be 0. 467Unlike external general entities, external parameter entities can only 468be parsed synchronously. If the external parameter entity is to be 469parsed, it must be parsed during the call to the external entity ref 470handler: the complete sequence of XML_ExternalEntityParserCreate, 471XML_Parse/XML_ParseBuffer and XML_ParserFree calls must be made during 472this call. After XML_ExternalEntityParserCreate has been called to 473create the parser for the external parameter entity (context must be 0 474for this call), it is illegal to make any calls on the old parser 475until XML_ParserFree has been called on the newly created parser. If 476the library has been compiled without support for parameter entity 477parsing (ie without XML_DTD being defined), then 478XML_SetParamEntityParsing will return 0 if parsing of parameter 479entities is requested; otherwise it will return non-zero. */ 480 481int XMLPARSEAPI 482XML_SetParamEntityParsing(XML_Parser parser, 483 enum XML_ParamEntityParsing parsing); 484 485enum XML_Error { 486 XML_ERROR_NONE, 487 XML_ERROR_NO_MEMORY, 488 XML_ERROR_SYNTAX, 489 XML_ERROR_NO_ELEMENTS, 490 XML_ERROR_INVALID_TOKEN, 491 XML_ERROR_UNCLOSED_TOKEN, 492 XML_ERROR_PARTIAL_CHAR, 493 XML_ERROR_TAG_MISMATCH, 494 XML_ERROR_DUPLICATE_ATTRIBUTE, 495 XML_ERROR_JUNK_AFTER_DOC_ELEMENT, 496 XML_ERROR_PARAM_ENTITY_REF, 497 XML_ERROR_UNDEFINED_ENTITY, 498 XML_ERROR_RECURSIVE_ENTITY_REF, 499 XML_ERROR_ASYNC_ENTITY, 500 XML_ERROR_BAD_CHAR_REF, 501 XML_ERROR_BINARY_ENTITY_REF, 502 XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF, 503 XML_ERROR_MISPLACED_XML_PI, 504 XML_ERROR_UNKNOWN_ENCODING, 505 XML_ERROR_INCORRECT_ENCODING, 506 XML_ERROR_UNCLOSED_CDATA_SECTION, 507 XML_ERROR_EXTERNAL_ENTITY_HANDLING, 508 XML_ERROR_NOT_STANDALONE, 509 XML_ERROR_DUPLICATE_ELEMENT 510}; 511 512/* If XML_Parse or XML_ParseBuffer have returned 0, then XML_GetErrorCode 513returns information about the error. */ 514 515enum XML_Error XMLPARSEAPI XML_GetErrorCode(XML_Parser parser); 516 517/* These functions return information about the current parse location. 518They may be called when XML_Parse or XML_ParseBuffer return 0; 519in this case the location is the location of the character at which 520the error was detected. 521They may also be called from any other callback called to report 522some parse event; in this the location is the location of the first 523of the sequence of characters that generated the event. */ 524 525int XMLPARSEAPI XML_GetCurrentLineNumber(XML_Parser parser); 526int XMLPARSEAPI XML_GetCurrentColumnNumber(XML_Parser parser); 527long XMLPARSEAPI XML_GetCurrentByteIndex(XML_Parser parser); 528 529/* Return the number of bytes in the current event. 530Returns 0 if the event is in an internal entity. */ 531 532int XMLPARSEAPI XML_GetCurrentByteCount(XML_Parser parser); 533 534/* For backwards compatibility with previous versions. */ 535#define XML_GetErrorLineNumber XML_GetCurrentLineNumber 536#define XML_GetErrorColumnNumber XML_GetCurrentColumnNumber 537#define XML_GetErrorByteIndex XML_GetCurrentByteIndex 538 539/* Frees memory used by the parser. */ 540void XMLPARSEAPI 541XML_ParserFree(XML_Parser parser); 542 543/* Returns a string describing the error. */ 544const XML_LChar XMLPARSEAPI *XML_ErrorString(int code); 545 546#ifdef __cplusplus 547} 548#endif 549 550#endif /* not XmlParse_INCLUDED */ 551