1/* 2The contents of this file are subject to the Mozilla Public License 3Version 1.0 (the "License"); you may not use this file except in 4compliance with the License. You may obtain a copy of the License at 5http://www.mozilla.org/MPL/ 6 7Software distributed under the License is distributed on an "AS IS" 8basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the 9License for the specific language governing rights and limitations 10under the License. 11 12The Original Code is expat. 13 14The Initial Developer of the Original Code is James Clark. 15Portions created by James Clark are Copyright (C) 1998 16James Clark. All Rights Reserved. 17 18Contributor(s): 19*/ 20 21#include <stdlib.h> 22#include <string.h> 23#include <stddef.h> 24 25#include "xmldef.h" 26 27#ifdef XML_UNICODE 28#define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX 29#define XmlConvert XmlUtf16Convert 30#define XmlGetInternalEncoding XmlGetUtf16InternalEncoding 31#define XmlEncode XmlUtf16Encode 32#define MUST_CONVERT(enc, s) (!(enc)->isUtf16 || (((unsigned long)s) & 1)) 33typedef unsigned short ICHAR; 34#else 35#define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX 36#define XmlConvert XmlUtf8Convert 37#define XmlGetInternalEncoding XmlGetUtf8InternalEncoding 38#define XmlEncode XmlUtf8Encode 39#define MUST_CONVERT(enc, s) (!(enc)->isUtf8) 40typedef char ICHAR; 41#endif 42 43#ifdef XML_UNICODE_WCHAR_T 44#define XML_T(x) L ## x 45#else 46#define XML_T(x) x 47#endif 48 49/* Round up n to be a multiple of sz, where sz is a power of 2. */ 50#define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1)) 51 52#include "xmlparse.h" 53#include "xmltok.h" 54#include "xmlrole.h" 55#include "hashtable.h" 56 57#define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */ 58#define INIT_DATA_BUF_SIZE 1024 59#define INIT_ATTS_SIZE 16 60#define INIT_BLOCK_SIZE 1024 61#define INIT_BUFFER_SIZE 1024 62 63typedef struct tag { 64 struct tag *parent; 65 const char *rawName; 66 size_t rawNameLength; 67 const XML_Char *name; 68 char *buf; 69 char *bufEnd; 70} TAG; 71 72typedef struct { 73 const XML_Char *name; 74 const XML_Char *textPtr; 75 int textLen; 76 const XML_Char *systemId; 77 const XML_Char *base; 78 const XML_Char *publicId; 79 const XML_Char *notation; 80 char open; 81} ENTITY; 82 83typedef struct block { 84 struct block *next; 85 int size; 86 XML_Char s[1]; 87} BLOCK; 88 89typedef struct { 90 BLOCK *blocks; 91 BLOCK *freeBlocks; 92 const XML_Char *end; 93 XML_Char *ptr; 94 XML_Char *start; 95} STRING_POOL; 96 97/* The XML_Char before the name is used to determine whether 98an attribute has been specified. */ 99typedef struct { 100 XML_Char *name; 101 char maybeTokenized; 102} ATTRIBUTE_ID; 103 104typedef struct { 105 const ATTRIBUTE_ID *id; 106 char isCdata; 107 const XML_Char *value; 108} DEFAULT_ATTRIBUTE; 109 110typedef struct { 111 const XML_Char *name; 112 int nDefaultAtts; 113 int allocDefaultAtts; 114 DEFAULT_ATTRIBUTE *defaultAtts; 115} ELEMENT_TYPE; 116 117typedef struct { 118 HASH_TABLE generalEntities; 119 HASH_TABLE elementTypes; 120 HASH_TABLE attributeIds; 121 STRING_POOL pool; 122 int complete; 123 int standalone; 124 const XML_Char *base; 125} DTD; 126 127typedef enum XML_Error Processor(XML_Parser parser, 128 const char *start, 129 const char *end, 130 const char **endPtr); 131 132static Processor prologProcessor; 133static Processor prologInitProcessor; 134static Processor contentProcessor; 135static Processor cdataSectionProcessor; 136static Processor epilogProcessor; 137static Processor errorProcessor; 138static Processor externalEntityInitProcessor; 139static Processor externalEntityInitProcessor2; 140static Processor externalEntityInitProcessor3; 141static Processor externalEntityContentProcessor; 142 143static enum XML_Error 144handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName); 145static enum XML_Error 146processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *, const char *); 147static enum XML_Error 148initializeEncoding(XML_Parser parser); 149static enum XML_Error 150doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, 151 const char *start, const char *end, const char **endPtr); 152static enum XML_Error 153doCdataSection(XML_Parser parser, const ENCODING *, const char **startPtr, const char *end, const char **nextPtr); 154static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *, const XML_Char *tagName, const char *s); 155static int 156defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, int isCdata, const XML_Char *dfltValue); 157static enum XML_Error 158storeAttributeValue(XML_Parser parser, const ENCODING *, int isCdata, const char *, const char *, 159 STRING_POOL *); 160static enum XML_Error 161appendAttributeValue(XML_Parser parser, const ENCODING *, int isCdata, const char *, const char *, 162 STRING_POOL *); 163static ATTRIBUTE_ID * 164getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); 165static enum XML_Error 166storeEntityValue(XML_Parser parser, const char *start, const char *end); 167static int 168reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); 169static void 170reportDefault(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); 171 172static const XML_Char *getOpenEntityNames(XML_Parser parser); 173static int setOpenEntityNames(XML_Parser parser, const XML_Char *openEntityNames); 174static void normalizePublicId(XML_Char *s); 175static int dtdInit(DTD *); 176static void dtdDestroy(DTD *); 177static int dtdCopy(DTD *newDtd, const DTD *oldDtd); 178static void poolInit(STRING_POOL *); 179static void poolClear(STRING_POOL *); 180static void poolDestroy(STRING_POOL *); 181static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc, 182 const char *ptr, const char *end); 183static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc, 184 const char *ptr, const char *end); 185static int poolGrow(STRING_POOL *pool); 186static const XML_Char *poolCopyString(STRING_POOL *pool, const XML_Char *s); 187static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n); 188 189#define poolStart(pool) ((pool)->start) 190#define poolEnd(pool) ((pool)->ptr) 191#define poolLength(pool) ((pool)->ptr - (pool)->start) 192#define poolChop(pool) ((void)--(pool->ptr)) 193#define poolLastChar(pool) (((pool)->ptr)[-1]) 194#define poolDiscard(pool) ((pool)->ptr = (pool)->start) 195#define poolFinish(pool) ((pool)->start = (pool)->ptr) 196#define poolAppendChar(pool, c) \ 197 (((pool)->ptr == (pool)->end && !poolGrow(pool)) \ 198 ? 0 \ 199 : ((*((pool)->ptr)++ = c), 1)) 200 201typedef struct { 202 /* The first member must be userData so that the XML_GetUserData macro works. */ 203 void *userData; 204 void *handlerArg; 205 char *buffer; 206 /* first character to be parsed */ 207 const char *bufferPtr; 208 /* past last character to be parsed */ 209 char *bufferEnd; 210 /* allocated end of buffer */ 211 const char *bufferLim; 212 long parseEndByteIndex; 213 const char *parseEndPtr; 214 XML_Char *dataBuf; 215 XML_Char *dataBufEnd; 216 XML_StartElementHandler startElementHandler; 217 XML_EndElementHandler endElementHandler; 218 XML_CharacterDataHandler characterDataHandler; 219 XML_ProcessingInstructionHandler processingInstructionHandler; 220 XML_DefaultHandler defaultHandler; 221 XML_UnparsedEntityDeclHandler unparsedEntityDeclHandler; 222 XML_NotationDeclHandler notationDeclHandler; 223 XML_ExternalEntityRefHandler externalEntityRefHandler; 224 XML_UnknownEncodingHandler unknownEncodingHandler; 225 const ENCODING *encoding; 226 INIT_ENCODING initEncoding; 227 const XML_Char *protocolEncodingName; 228 void *unknownEncodingMem; 229 void *unknownEncodingData; 230 void *unknownEncodingHandlerData; 231 void (*unknownEncodingRelease)(void *); 232 PROLOG_STATE prologState; 233 Processor *processor; 234 enum XML_Error errorCode; 235 const char *eventPtr; 236 const char *eventEndPtr; 237 const char *positionPtr; 238 int tagLevel; 239 ENTITY *declEntity; 240 const XML_Char *declNotationName; 241 const XML_Char *declNotationPublicId; 242 ELEMENT_TYPE *declElementType; 243 ATTRIBUTE_ID *declAttributeId; 244 char declAttributeIsCdata; 245 DTD dtd; 246 TAG *tagStack; 247 TAG *freeTagList; 248 int attsSize; 249 ATTRIBUTE *atts; 250 POSITION position; 251 STRING_POOL tempPool; 252 STRING_POOL temp2Pool; 253 char *groupConnector; 254 unsigned groupSize; 255 int hadExternalDoctype; 256} Parser; 257 258#define userData (((Parser *)parser)->userData) 259#define handlerArg (((Parser *)parser)->handlerArg) 260#define startElementHandler (((Parser *)parser)->startElementHandler) 261#define endElementHandler (((Parser *)parser)->endElementHandler) 262#define characterDataHandler (((Parser *)parser)->characterDataHandler) 263#define processingInstructionHandler (((Parser *)parser)->processingInstructionHandler) 264#define defaultHandler (((Parser *)parser)->defaultHandler) 265#define unparsedEntityDeclHandler (((Parser *)parser)->unparsedEntityDeclHandler) 266#define notationDeclHandler (((Parser *)parser)->notationDeclHandler) 267#define externalEntityRefHandler (((Parser *)parser)->externalEntityRefHandler) 268#define unknownEncodingHandler (((Parser *)parser)->unknownEncodingHandler) 269#define encoding (((Parser *)parser)->encoding) 270#define initEncoding (((Parser *)parser)->initEncoding) 271#define unknownEncodingMem (((Parser *)parser)->unknownEncodingMem) 272#define unknownEncodingData (((Parser *)parser)->unknownEncodingData) 273#define unknownEncodingHandlerData \ 274 (((Parser *)parser)->unknownEncodingHandlerData) 275#define unknownEncodingRelease (((Parser *)parser)->unknownEncodingRelease) 276#define protocolEncodingName (((Parser *)parser)->protocolEncodingName) 277#define prologState (((Parser *)parser)->prologState) 278#define processor (((Parser *)parser)->processor) 279#define errorCode (((Parser *)parser)->errorCode) 280#define eventPtr (((Parser *)parser)->eventPtr) 281#define eventEndPtr (((Parser *)parser)->eventEndPtr) 282#define positionPtr (((Parser *)parser)->positionPtr) 283#define position (((Parser *)parser)->position) 284#define tagLevel (((Parser *)parser)->tagLevel) 285#define buffer (((Parser *)parser)->buffer) 286#define bufferPtr (((Parser *)parser)->bufferPtr) 287#define bufferEnd (((Parser *)parser)->bufferEnd) 288#define parseEndByteIndex (((Parser *)parser)->parseEndByteIndex) 289#define parseEndPtr (((Parser *)parser)->parseEndPtr) 290#define bufferLim (((Parser *)parser)->bufferLim) 291#define dataBuf (((Parser *)parser)->dataBuf) 292#define dataBufEnd (((Parser *)parser)->dataBufEnd) 293#define dtd (((Parser *)parser)->dtd) 294#define declEntity (((Parser *)parser)->declEntity) 295#define declNotationName (((Parser *)parser)->declNotationName) 296#define declNotationPublicId (((Parser *)parser)->declNotationPublicId) 297#define declElementType (((Parser *)parser)->declElementType) 298#define declAttributeId (((Parser *)parser)->declAttributeId) 299#define declAttributeIsCdata (((Parser *)parser)->declAttributeIsCdata) 300#define freeTagList (((Parser *)parser)->freeTagList) 301#define tagStack (((Parser *)parser)->tagStack) 302#define atts (((Parser *)parser)->atts) 303#define attsSize (((Parser *)parser)->attsSize) 304#define tempPool (((Parser *)parser)->tempPool) 305#define temp2Pool (((Parser *)parser)->temp2Pool) 306#define groupConnector (((Parser *)parser)->groupConnector) 307#define groupSize (((Parser *)parser)->groupSize) 308#define hadExternalDoctype (((Parser *)parser)->hadExternalDoctype) 309 310XML_Parser XML_ParserCreate(const XML_Char *encodingName) 311{ 312 XML_Parser parser = malloc(sizeof(Parser)); 313 if (!parser) 314 return parser; 315 processor = prologInitProcessor; 316 XmlPrologStateInit(&prologState); 317 userData = 0; 318 handlerArg = 0; 319 startElementHandler = 0; 320 endElementHandler = 0; 321 characterDataHandler = 0; 322 processingInstructionHandler = 0; 323 defaultHandler = 0; 324 unparsedEntityDeclHandler = 0; 325 notationDeclHandler = 0; 326 externalEntityRefHandler = 0; 327 unknownEncodingHandler = 0; 328 buffer = 0; 329 bufferPtr = 0; 330 bufferEnd = 0; 331 parseEndByteIndex = 0; 332 parseEndPtr = 0; 333 bufferLim = 0; 334 declElementType = 0; 335 declAttributeId = 0; 336 declEntity = 0; 337 declNotationName = 0; 338 declNotationPublicId = 0; 339 memset(&position, 0, sizeof(POSITION)); 340 errorCode = XML_ERROR_NONE; 341 eventPtr = 0; 342 eventEndPtr = 0; 343 positionPtr = 0; 344 tagLevel = 0; 345 tagStack = 0; 346 freeTagList = 0; 347 attsSize = INIT_ATTS_SIZE; 348 atts = malloc(attsSize * sizeof(ATTRIBUTE)); 349 dataBuf = malloc(INIT_DATA_BUF_SIZE * sizeof(XML_Char)); 350 groupSize = 0; 351 groupConnector = 0; 352 hadExternalDoctype = 0; 353 unknownEncodingMem = 0; 354 unknownEncodingRelease = 0; 355 unknownEncodingData = 0; 356 unknownEncodingHandlerData = 0; 357 poolInit(&tempPool); 358 poolInit(&temp2Pool); 359 protocolEncodingName = encodingName ? poolCopyString(&tempPool, encodingName) : 0; 360 if (!dtdInit(&dtd) || !atts || !dataBuf 361 || (encodingName && !protocolEncodingName)) { 362 XML_ParserFree(parser); 363 return 0; 364 } 365 dataBufEnd = dataBuf + INIT_DATA_BUF_SIZE; 366 XmlInitEncoding(&initEncoding, &encoding, 0); 367 return parser; 368} 369 370XML_Parser XML_ExternalEntityParserCreate(XML_Parser oldParser, 371 const XML_Char *openEntityNames, 372 const XML_Char *encodingName) 373{ 374 XML_Parser parser = oldParser; 375 DTD *oldDtd = &dtd; 376 XML_StartElementHandler oldStartElementHandler = startElementHandler; 377 XML_EndElementHandler oldEndElementHandler = endElementHandler; 378 XML_CharacterDataHandler oldCharacterDataHandler = characterDataHandler; 379 XML_ProcessingInstructionHandler oldProcessingInstructionHandler = processingInstructionHandler; 380 XML_DefaultHandler oldDefaultHandler = defaultHandler; 381 XML_ExternalEntityRefHandler oldExternalEntityRefHandler = externalEntityRefHandler; 382 XML_UnknownEncodingHandler oldUnknownEncodingHandler = unknownEncodingHandler; 383 void *oldUserData = userData; 384 void *oldHandlerArg = handlerArg; 385 386 parser = XML_ParserCreate(encodingName); 387 if (!parser) 388 return 0; 389 startElementHandler = oldStartElementHandler; 390 endElementHandler = oldEndElementHandler; 391 characterDataHandler = oldCharacterDataHandler; 392 processingInstructionHandler = oldProcessingInstructionHandler; 393 defaultHandler = oldDefaultHandler; 394 externalEntityRefHandler = oldExternalEntityRefHandler; 395 unknownEncodingHandler = oldUnknownEncodingHandler; 396 userData = oldUserData; 397 if (oldUserData == oldHandlerArg) 398 handlerArg = userData; 399 else 400 handlerArg = parser; 401 if (!dtdCopy(&dtd, oldDtd) || !setOpenEntityNames(parser, openEntityNames)) { 402 XML_ParserFree(parser); 403 return 0; 404 } 405 processor = externalEntityInitProcessor; 406 return parser; 407} 408 409void XML_ParserFree(XML_Parser parser) 410{ 411 for (;;) { 412 TAG *p; 413 if (tagStack == 0) { 414 if (freeTagList == 0) 415 break; 416 tagStack = freeTagList; 417 freeTagList = 0; 418 } 419 p = tagStack; 420 tagStack = tagStack->parent; 421 free(p->buf); 422 free(p); 423 } 424 poolDestroy(&tempPool); 425 poolDestroy(&temp2Pool); 426 dtdDestroy(&dtd); 427 free((void *)atts); 428 free(groupConnector); 429 free(buffer); 430 free(dataBuf); 431 free(unknownEncodingMem); 432 if (unknownEncodingRelease) 433 unknownEncodingRelease(unknownEncodingData); 434 free(parser); 435} 436 437void XML_UseParserAsHandlerArg(XML_Parser parser) 438{ 439 handlerArg = parser; 440} 441 442void XML_SetUserData(XML_Parser parser, void *p) 443{ 444 if (handlerArg == userData) 445 handlerArg = userData = p; 446 else 447 userData = p; 448} 449 450int XML_SetBase(XML_Parser parser, const XML_Char *p) 451{ 452 if (p) { 453 p = poolCopyString(&dtd.pool, p); 454 if (!p) 455 return 0; 456 dtd.base = p; 457 } 458 else 459 dtd.base = 0; 460 return 1; 461} 462 463const XML_Char *XML_GetBase(XML_Parser parser) 464{ 465 return dtd.base; 466} 467 468void XML_SetElementHandler(XML_Parser parser, 469 XML_StartElementHandler start, 470 XML_EndElementHandler end) 471{ 472 startElementHandler = start; 473 endElementHandler = end; 474} 475 476void XML_SetCharacterDataHandler(XML_Parser parser, 477 XML_CharacterDataHandler handler) 478{ 479 characterDataHandler = handler; 480} 481 482void XML_SetProcessingInstructionHandler(XML_Parser parser, 483 XML_ProcessingInstructionHandler handler) 484{ 485 processingInstructionHandler = handler; 486} 487 488void XML_SetDefaultHandler(XML_Parser parser, 489 XML_DefaultHandler handler) 490{ 491 defaultHandler = handler; 492} 493 494void XML_SetUnparsedEntityDeclHandler(XML_Parser parser, 495 XML_UnparsedEntityDeclHandler handler) 496{ 497 unparsedEntityDeclHandler = handler; 498} 499 500void XML_SetNotationDeclHandler(XML_Parser parser, 501 XML_NotationDeclHandler handler) 502{ 503 notationDeclHandler = handler; 504} 505 506void XML_SetExternalEntityRefHandler(XML_Parser parser, 507 XML_ExternalEntityRefHandler handler) 508{ 509 externalEntityRefHandler = handler; 510} 511 512void XML_SetUnknownEncodingHandler(XML_Parser parser, 513 XML_UnknownEncodingHandler handler, 514 void *data) 515{ 516 unknownEncodingHandler = handler; 517 unknownEncodingHandlerData = data; 518} 519 520int XML_Parse(XML_Parser parser, const char *s, size_t len, int isFinal) 521{ 522 if (len == 0) { 523 if (!isFinal) 524 return 1; 525 errorCode = processor(parser, bufferPtr, parseEndPtr = bufferEnd, 0); 526 if (errorCode == XML_ERROR_NONE) 527 return 1; 528 eventEndPtr = eventPtr; 529 return 0; 530 } 531 else if (bufferPtr == bufferEnd) { 532 const char *end; 533 size_t nLeftOver; 534 parseEndByteIndex += len; 535 positionPtr = s; 536 if (isFinal) { 537 errorCode = processor(parser, s, parseEndPtr = s + len, 0); 538 if (errorCode == XML_ERROR_NONE) 539 return 1; 540 eventEndPtr = eventPtr; 541 return 0; 542 } 543 errorCode = processor(parser, s, parseEndPtr = s + len, &end); 544 if (errorCode != XML_ERROR_NONE) { 545 eventEndPtr = eventPtr; 546 return 0; 547 } 548 XmlUpdatePosition(encoding, positionPtr, end, &position); 549 nLeftOver = s + len - end; 550 if (nLeftOver) { 551 if (buffer == 0 || nLeftOver > bufferLim - buffer) { 552 /* FIXME avoid integer overflow */ 553 buffer = buffer == 0 ? malloc(len * 2) : realloc(buffer, len * 2); 554 if (!buffer) { 555 errorCode = XML_ERROR_NO_MEMORY; 556 eventPtr = eventEndPtr = 0; 557 return 0; 558 } 559 bufferLim = buffer + len * 2; 560 } 561 memcpy(buffer, end, nLeftOver); 562 bufferPtr = buffer; 563 bufferEnd = buffer + nLeftOver; 564 } 565 return 1; 566 } 567 else { 568 memcpy(XML_GetBuffer(parser, len), s, len); 569 return XML_ParseBuffer(parser, len, isFinal); 570 } 571} 572 573int XML_ParseBuffer(XML_Parser parser, size_t len, int isFinal) 574{ 575 const char *start = bufferPtr; 576 positionPtr = start; 577 bufferEnd += len; 578 parseEndByteIndex += len; 579 errorCode = processor(parser, start, parseEndPtr = bufferEnd, 580 isFinal ? (const char **)0 : &bufferPtr); 581 if (errorCode == XML_ERROR_NONE) { 582 if (!isFinal) 583 XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position); 584 return 1; 585 } 586 else { 587 eventEndPtr = eventPtr; 588 return 0; 589 } 590} 591 592void *XML_GetBuffer(XML_Parser parser, size_t len) 593{ 594 if (len > bufferLim - bufferEnd) { 595 /* FIXME avoid integer overflow */ 596 size_t neededSize = len + (bufferEnd - bufferPtr); 597 if (neededSize <= bufferLim - buffer) { 598 memmove(buffer, bufferPtr, (size_t)(bufferEnd - bufferPtr)); 599 bufferEnd = buffer + (bufferEnd - bufferPtr); 600 bufferPtr = buffer; 601 } 602 else { 603 char *newBuf; 604 size_t bufferSize = bufferLim - bufferPtr; 605 if (bufferSize == 0) 606 bufferSize = INIT_BUFFER_SIZE; 607 do { 608 bufferSize *= 2; 609 } while (bufferSize < neededSize); 610 newBuf = malloc(bufferSize); 611 if (newBuf == 0) { 612 errorCode = XML_ERROR_NO_MEMORY; 613 return 0; 614 } 615 bufferLim = newBuf + bufferSize; 616 if (bufferPtr) { 617 memcpy(newBuf, bufferPtr, (size_t)(bufferEnd - bufferPtr)); 618 free(buffer); 619 } 620 bufferEnd = newBuf + (bufferEnd - bufferPtr); 621 bufferPtr = buffer = newBuf; 622 } 623 } 624 return bufferEnd; 625} 626 627enum XML_Error XML_GetErrorCode(XML_Parser parser) 628{ 629 return errorCode; 630} 631 632long XML_GetCurrentByteIndex(XML_Parser parser) 633{ 634 if (eventPtr) 635 return parseEndByteIndex - (parseEndPtr - eventPtr); 636 return -1; 637} 638 639int XML_GetCurrentLineNumber(XML_Parser parser) 640{ 641 if (eventPtr) { 642 XmlUpdatePosition(encoding, positionPtr, eventPtr, &position); 643 positionPtr = eventPtr; 644 } 645 return position.lineNumber + 1; 646} 647 648int XML_GetCurrentColumnNumber(XML_Parser parser) 649{ 650 if (eventPtr) { 651 XmlUpdatePosition(encoding, positionPtr, eventPtr, &position); 652 positionPtr = eventPtr; 653 } 654 return position.columnNumber; 655} 656 657void XML_DefaultCurrent(XML_Parser parser) 658{ 659 if (defaultHandler) 660 reportDefault(parser, encoding, eventPtr, eventEndPtr); 661} 662 663const XML_LChar *XML_ErrorString(enum XML_Error code) 664{ 665 static const XML_LChar *message[] = { 666 0, 667 XML_T("out of memory"), 668 XML_T("syntax error"), 669 XML_T("no element found"), 670 XML_T("not well-formed"), 671 XML_T("unclosed token"), 672 XML_T("unclosed token"), 673 XML_T("mismatched tag"), 674 XML_T("duplicate attribute"), 675 XML_T("junk after document element"), 676 XML_T("illegal parameter entity reference"), 677 XML_T("undefined entity"), 678 XML_T("recursive entity reference"), 679 XML_T("asynchronous entity"), 680 XML_T("reference to invalid character number"), 681 XML_T("reference to binary entity"), 682 XML_T("reference to external entity in attribute"), 683 XML_T("xml processing instruction not at start of external entity"), 684 XML_T("unknown encoding"), 685 XML_T("encoding specified in XML declaration is incorrect"), 686 XML_T("unclosed CDATA section"), 687 XML_T("error in processing external entity reference") 688 }; 689 if (code > 0 && code < sizeof(message)/sizeof(message[0])) 690 return message[code]; 691 return 0; 692} 693 694static 695enum XML_Error contentProcessor(XML_Parser parser, 696 const char *start, 697 const char *end, 698 const char **endPtr) 699{ 700 return doContent(parser, 0, encoding, start, end, endPtr); 701} 702 703static 704enum XML_Error externalEntityInitProcessor(XML_Parser parser, 705 const char *start, 706 const char *end, 707 const char **endPtr) 708{ 709 enum XML_Error result = initializeEncoding(parser); 710 if (result != XML_ERROR_NONE) 711 return result; 712 processor = externalEntityInitProcessor2; 713 return externalEntityInitProcessor2(parser, start, end, endPtr); 714} 715 716static 717enum XML_Error externalEntityInitProcessor2(XML_Parser parser, 718 const char *start, 719 const char *end, 720 const char **endPtr) 721{ 722 const char *next; 723 int tok = XmlContentTok(encoding, start, end, &next); 724 switch (tok) { 725 case XML_TOK_BOM: 726 start = next; 727 break; 728 case XML_TOK_PARTIAL: 729 if (endPtr) { 730 *endPtr = start; 731 return XML_ERROR_NONE; 732 } 733 eventPtr = start; 734 return XML_ERROR_UNCLOSED_TOKEN; 735 case XML_TOK_PARTIAL_CHAR: 736 if (endPtr) { 737 *endPtr = start; 738 return XML_ERROR_NONE; 739 } 740 eventPtr = start; 741 return XML_ERROR_PARTIAL_CHAR; 742 } 743 processor = externalEntityInitProcessor3; 744 return externalEntityInitProcessor3(parser, start, end, endPtr); 745} 746 747static 748enum XML_Error externalEntityInitProcessor3(XML_Parser parser, 749 const char *start, 750 const char *end, 751 const char **endPtr) 752{ 753 const char *next; 754 int tok = XmlContentTok(encoding, start, end, &next); 755 switch (tok) { 756 case XML_TOK_XML_DECL: 757 { 758 enum XML_Error result = processXmlDecl(parser, 1, start, next); 759 if (result != XML_ERROR_NONE) 760 return result; 761 start = next; 762 } 763 break; 764 case XML_TOK_PARTIAL: 765 if (endPtr) { 766 *endPtr = start; 767 return XML_ERROR_NONE; 768 } 769 eventPtr = start; 770 return XML_ERROR_UNCLOSED_TOKEN; 771 case XML_TOK_PARTIAL_CHAR: 772 if (endPtr) { 773 *endPtr = start; 774 return XML_ERROR_NONE; 775 } 776 eventPtr = start; 777 return XML_ERROR_PARTIAL_CHAR; 778 } 779 processor = externalEntityContentProcessor; 780 tagLevel = 1; 781 return doContent(parser, 1, encoding, start, end, endPtr); 782} 783 784static 785enum XML_Error externalEntityContentProcessor(XML_Parser parser, 786 const char *start, 787 const char *end, 788 const char **endPtr) 789{ 790 return doContent(parser, 1, encoding, start, end, endPtr); 791} 792 793static enum XML_Error 794doContent(XML_Parser parser, 795 int startTagLevel, 796 const ENCODING *enc, 797 const char *s, 798 const char *end, 799 const char **nextPtr) 800{ 801 const ENCODING *internalEnc = XmlGetInternalEncoding(); 802 const char *dummy; 803 const char **eventPP; 804 const char **eventEndPP; 805 if (enc == encoding) { 806 eventPP = &eventPtr; 807 *eventPP = s; 808 eventEndPP = &eventEndPtr; 809 } 810 else 811 eventPP = eventEndPP = &dummy; 812 for (;;) { 813 const char *next; 814 int tok = XmlContentTok(enc, s, end, &next); 815 *eventEndPP = next; 816 switch (tok) { 817 case XML_TOK_TRAILING_CR: 818 if (nextPtr) { 819 *nextPtr = s; 820 return XML_ERROR_NONE; 821 } 822 *eventEndPP = end; 823 if (characterDataHandler) { 824 XML_Char c = XML_T('\n'); 825 characterDataHandler(handlerArg, &c, 1); 826 } 827 else if (defaultHandler) 828 reportDefault(parser, enc, s, end); 829 if (startTagLevel == 0) 830 return XML_ERROR_NO_ELEMENTS; 831 if (tagLevel != startTagLevel) 832 return XML_ERROR_ASYNC_ENTITY; 833 return XML_ERROR_NONE; 834 case XML_TOK_NONE: 835 if (nextPtr) { 836 *nextPtr = s; 837 return XML_ERROR_NONE; 838 } 839 if (startTagLevel > 0) { 840 if (tagLevel != startTagLevel) 841 return XML_ERROR_ASYNC_ENTITY; 842 return XML_ERROR_NONE; 843 } 844 return XML_ERROR_NO_ELEMENTS; 845 case XML_TOK_INVALID: 846 *eventPP = next; 847 return XML_ERROR_INVALID_TOKEN; 848 case XML_TOK_PARTIAL: 849 if (nextPtr) { 850 *nextPtr = s; 851 return XML_ERROR_NONE; 852 } 853 return XML_ERROR_UNCLOSED_TOKEN; 854 case XML_TOK_PARTIAL_CHAR: 855 if (nextPtr) { 856 *nextPtr = s; 857 return XML_ERROR_NONE; 858 } 859 return XML_ERROR_PARTIAL_CHAR; 860 case XML_TOK_ENTITY_REF: 861 { 862 const XML_Char *name; 863 ENTITY *entity; 864 XML_Char ch = XmlPredefinedEntityName(enc, 865 s + enc->minBytesPerChar, 866 next - enc->minBytesPerChar); 867 if (ch) { 868 if (characterDataHandler) 869 characterDataHandler(handlerArg, &ch, 1); 870 else if (defaultHandler) 871 reportDefault(parser, enc, s, next); 872 break; 873 } 874 name = poolStoreString(&dtd.pool, enc, 875 s + enc->minBytesPerChar, 876 next - enc->minBytesPerChar); 877 if (!name) 878 return XML_ERROR_NO_MEMORY; 879 entity = (ENTITY *)lookup(&dtd.generalEntities, name, 0); 880 poolDiscard(&dtd.pool); 881 if (!entity) { 882 if (dtd.complete || dtd.standalone) 883 return XML_ERROR_UNDEFINED_ENTITY; 884 if (defaultHandler) 885 reportDefault(parser, enc, s, next); 886 break; 887 } 888 if (entity->open) 889 return XML_ERROR_RECURSIVE_ENTITY_REF; 890 if (entity->notation) 891 return XML_ERROR_BINARY_ENTITY_REF; 892 if (entity) { 893 if (entity->textPtr) { 894 enum XML_Error result; 895 if (defaultHandler) { 896 reportDefault(parser, enc, s, next); 897 break; 898 } 899 /* Protect against the possibility that somebody sets 900 the defaultHandler from inside another handler. */ 901 *eventEndPP = *eventPP; 902 entity->open = 1; 903 result = doContent(parser, 904 tagLevel, 905 internalEnc, 906 (char *)entity->textPtr, 907 (char *)(entity->textPtr + entity->textLen), 908 0); 909 entity->open = 0; 910 if (result) 911 return result; 912 } 913 else if (externalEntityRefHandler) { 914 const XML_Char *openEntityNames; 915 entity->open = 1; 916 openEntityNames = getOpenEntityNames(parser); 917 entity->open = 0; 918 if (!openEntityNames) 919 return XML_ERROR_NO_MEMORY; 920 if (!externalEntityRefHandler(parser, openEntityNames, dtd.base, entity->systemId, entity->publicId)) 921 return XML_ERROR_EXTERNAL_ENTITY_HANDLING; 922 } 923 else if (defaultHandler) 924 reportDefault(parser, enc, s, next); 925 } 926 break; 927 } 928 case XML_TOK_START_TAG_WITH_ATTS: 929 if (!startElementHandler) { 930 enum XML_Error result = storeAtts(parser, enc, 0, s); 931 if (result) 932 return result; 933 } 934 /* fall through */ 935 case XML_TOK_START_TAG_NO_ATTS: 936 { 937 TAG *tag; 938 if (freeTagList) { 939 tag = freeTagList; 940 freeTagList = freeTagList->parent; 941 } 942 else { 943 tag = malloc(sizeof(TAG)); 944 if (!tag) 945 return XML_ERROR_NO_MEMORY; 946 tag->buf = malloc(INIT_TAG_BUF_SIZE); 947 if (!tag->buf) 948 return XML_ERROR_NO_MEMORY; 949 tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE; 950 } 951 tag->parent = tagStack; 952 tagStack = tag; 953 tag->rawName = s + enc->minBytesPerChar; 954 tag->rawNameLength = XmlNameLength(enc, tag->rawName); 955 if (nextPtr) { 956 if (tag->rawNameLength > tag->bufEnd - tag->buf) { 957 size_t bufSize = tag->rawNameLength * 4; 958 bufSize = ROUND_UP(bufSize, sizeof(XML_Char)); 959 tag->buf = realloc(tag->buf, bufSize); 960 if (!tag->buf) 961 return XML_ERROR_NO_MEMORY; 962 tag->bufEnd = tag->buf + bufSize; 963 } 964 memcpy(tag->buf, tag->rawName, tag->rawNameLength); 965 tag->rawName = tag->buf; 966 } 967 ++tagLevel; 968 if (startElementHandler) { 969 enum XML_Error result; 970 XML_Char *toPtr; 971 for (;;) { 972 const char *rawNameEnd = tag->rawName + tag->rawNameLength; 973 const char *fromPtr = tag->rawName; 974 size_t bufSize; 975 if (nextPtr) 976 toPtr = (XML_Char *)(tag->buf + ROUND_UP(tag->rawNameLength, sizeof(XML_Char))); 977 else 978 toPtr = (XML_Char *)tag->buf; 979 tag->name = toPtr; 980 XmlConvert(enc, 981 &fromPtr, rawNameEnd, 982 (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1); 983 if (fromPtr == rawNameEnd) 984 break; 985 bufSize = (tag->bufEnd - tag->buf) << 1; 986 tag->buf = realloc(tag->buf, bufSize); 987 if (!tag->buf) 988 return XML_ERROR_NO_MEMORY; 989 tag->bufEnd = tag->buf + bufSize; 990 if (nextPtr) 991 tag->rawName = tag->buf; 992 } 993 *toPtr = XML_T('\0'); 994 result = storeAtts(parser, enc, tag->name, s); 995 if (result) 996 return result; 997 startElementHandler(handlerArg, tag->name, (const XML_Char **)atts); 998 poolClear(&tempPool); 999 } 1000 else { 1001 tag->name = 0; 1002 if (defaultHandler) 1003 reportDefault(parser, enc, s, next); 1004 } 1005 break; 1006 } 1007 case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: 1008 if (!startElementHandler) { 1009 enum XML_Error result = storeAtts(parser, enc, 0, s); 1010 if (result) 1011 return result; 1012 } 1013 /* fall through */ 1014 case XML_TOK_EMPTY_ELEMENT_NO_ATTS: 1015 if (startElementHandler || endElementHandler) { 1016 const char *rawName = s + enc->minBytesPerChar; 1017 const XML_Char *name = poolStoreString(&tempPool, enc, rawName, 1018 rawName 1019 + XmlNameLength(enc, rawName)); 1020 if (!name) 1021 return XML_ERROR_NO_MEMORY; 1022 poolFinish(&tempPool); 1023 if (startElementHandler) { 1024 enum XML_Error result = storeAtts(parser, enc, name, s); 1025 if (result) 1026 return result; 1027 startElementHandler(handlerArg, name, (const XML_Char **)atts); 1028 } 1029 if (endElementHandler) { 1030 if (startElementHandler) 1031 *eventEndPP = *eventPP; 1032 endElementHandler(handlerArg, name); 1033 } 1034 poolClear(&tempPool); 1035 } 1036 else if (defaultHandler) 1037 reportDefault(parser, enc, s, next); 1038 if (tagLevel == 0) 1039 return epilogProcessor(parser, next, end, nextPtr); 1040 break; 1041 case XML_TOK_END_TAG: 1042 if (tagLevel == startTagLevel) 1043 return XML_ERROR_ASYNC_ENTITY; 1044 else { 1045 size_t len; 1046 const char *rawName; 1047 TAG *tag = tagStack; 1048 tagStack = tag->parent; 1049 tag->parent = freeTagList; 1050 freeTagList = tag; 1051 rawName = s + enc->minBytesPerChar*2; 1052 len = XmlNameLength(enc, rawName); 1053 if (len != tag->rawNameLength 1054 || memcmp(tag->rawName, rawName, len) != 0) { 1055 *eventPP = rawName; 1056 return XML_ERROR_TAG_MISMATCH; 1057 } 1058 --tagLevel; 1059 if (endElementHandler) { 1060 if (tag->name) 1061 endElementHandler(handlerArg, tag->name); 1062 else { 1063 const XML_Char *name = poolStoreString(&tempPool, enc, rawName, 1064 rawName + len); 1065 if (!name) 1066 return XML_ERROR_NO_MEMORY; 1067 endElementHandler(handlerArg, name); 1068 poolClear(&tempPool); 1069 } 1070 } 1071 else if (defaultHandler) 1072 reportDefault(parser, enc, s, next); 1073 if (tagLevel == 0) 1074 return epilogProcessor(parser, next, end, nextPtr); 1075 } 1076 break; 1077 case XML_TOK_CHAR_REF: 1078 { 1079 int n = XmlCharRefNumber(enc, s); 1080 if (n < 0) 1081 return XML_ERROR_BAD_CHAR_REF; 1082 if (characterDataHandler) { 1083 XML_Char buf[XML_ENCODE_MAX]; 1084 characterDataHandler(handlerArg, buf, XmlEncode(n, (ICHAR *)buf)); 1085 } 1086 else if (defaultHandler) 1087 reportDefault(parser, enc, s, next); 1088 } 1089 break; 1090 case XML_TOK_XML_DECL: 1091 return XML_ERROR_MISPLACED_XML_PI; 1092 case XML_TOK_DATA_NEWLINE: 1093 if (characterDataHandler) { 1094 XML_Char c = XML_T('\n'); 1095 characterDataHandler(handlerArg, &c, 1); 1096 } 1097 else if (defaultHandler) 1098 reportDefault(parser, enc, s, next); 1099 break; 1100 case XML_TOK_CDATA_SECT_OPEN: 1101 { 1102 enum XML_Error result; 1103 if (characterDataHandler) 1104 characterDataHandler(handlerArg, dataBuf, 0); 1105 else if (defaultHandler) 1106 reportDefault(parser, enc, s, next); 1107 result = doCdataSection(parser, enc, &next, end, nextPtr); 1108 if (!next) { 1109 processor = cdataSectionProcessor; 1110 return result; 1111 } 1112 } 1113 break; 1114 case XML_TOK_TRAILING_RSQB: 1115 if (nextPtr) { 1116 *nextPtr = s; 1117 return XML_ERROR_NONE; 1118 } 1119 if (characterDataHandler) { 1120 if (MUST_CONVERT(enc, s)) { 1121 ICHAR *dataPtr = (ICHAR *)dataBuf; 1122 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd); 1123 characterDataHandler(handlerArg, dataBuf, 1124 (size_t)(dataPtr - (ICHAR *)dataBuf)); 1125 } 1126 else 1127 characterDataHandler(handlerArg, 1128 (XML_Char *)s, 1129 (size_t)((XML_Char *)end - (XML_Char *)s)); 1130 } 1131 else if (defaultHandler) 1132 reportDefault(parser, enc, s, end); 1133 if (startTagLevel == 0) { 1134 *eventPP = end; 1135 return XML_ERROR_NO_ELEMENTS; 1136 } 1137 if (tagLevel != startTagLevel) { 1138 *eventPP = end; 1139 return XML_ERROR_ASYNC_ENTITY; 1140 } 1141 return XML_ERROR_NONE; 1142 case XML_TOK_DATA_CHARS: 1143 if (characterDataHandler) { 1144 if (MUST_CONVERT(enc, s)) { 1145 for (;;) { 1146 ICHAR *dataPtr = (ICHAR *)dataBuf; 1147 XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd); 1148 *eventEndPP = s; 1149 characterDataHandler(handlerArg, dataBuf, (size_t)(dataPtr - (ICHAR *)dataBuf)); 1150 if (s == next) 1151 break; 1152 *eventPP = s; 1153 } 1154 } 1155 else 1156 characterDataHandler(handlerArg, 1157 (XML_Char *)s, 1158 (size_t)((XML_Char *)next - (XML_Char *)s)); 1159 } 1160 else if (defaultHandler) 1161 reportDefault(parser, enc, s, next); 1162 break; 1163 case XML_TOK_PI: 1164 if (!reportProcessingInstruction(parser, enc, s, next)) 1165 return XML_ERROR_NO_MEMORY; 1166 break; 1167 default: 1168 if (defaultHandler) 1169 reportDefault(parser, enc, s, next); 1170 break; 1171 } 1172 *eventPP = s = next; 1173 } 1174 /* not reached */ 1175} 1176 1177/* If tagName is non-null, build a real list of attributes, 1178otherwise just check the attributes for well-formedness. */ 1179 1180static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc, 1181 const XML_Char *tagName, const char *s) 1182{ 1183 ELEMENT_TYPE *elementType = 0; 1184 int nDefaultAtts = 0; 1185 const XML_Char **appAtts; 1186 int i; 1187 int n; 1188 1189 if (tagName) { 1190 elementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, tagName, 0); 1191 if (elementType) 1192 nDefaultAtts = elementType->nDefaultAtts; 1193 } 1194 1195 n = XmlGetAttributes(enc, s, attsSize, atts); 1196 if (n + nDefaultAtts > attsSize) { 1197 int oldAttsSize = attsSize; 1198 attsSize = n + nDefaultAtts + INIT_ATTS_SIZE; 1199 atts = realloc((void *)atts, attsSize * sizeof(ATTRIBUTE)); 1200 if (!atts) 1201 return XML_ERROR_NO_MEMORY; 1202 if (n > oldAttsSize) 1203 XmlGetAttributes(enc, s, n, atts); 1204 } 1205 appAtts = (const XML_Char **)atts; 1206 for (i = 0; i < n; i++) { 1207 ATTRIBUTE_ID *attId = getAttributeId(parser, enc, atts[i].name, 1208 atts[i].name 1209 + XmlNameLength(enc, atts[i].name)); 1210 if (!attId) 1211 return XML_ERROR_NO_MEMORY; 1212 if ((attId->name)[-1]) { 1213 if (enc == encoding) 1214 eventPtr = atts[i].name; 1215 return XML_ERROR_DUPLICATE_ATTRIBUTE; 1216 } 1217 (attId->name)[-1] = 1; 1218 appAtts[i << 1] = attId->name; 1219 if (!atts[i].normalized) { 1220 enum XML_Error result; 1221 int isCdata = 1; 1222 1223 if (attId->maybeTokenized) { 1224 int j; 1225 for (j = 0; j < nDefaultAtts; j++) { 1226 if (attId == elementType->defaultAtts[j].id) { 1227 isCdata = elementType->defaultAtts[j].isCdata; 1228 break; 1229 } 1230 } 1231 } 1232 1233 result = storeAttributeValue(parser, enc, isCdata, 1234 atts[i].valuePtr, atts[i].valueEnd, 1235 &tempPool); 1236 if (result) 1237 return result; 1238 if (tagName) { 1239 appAtts[(i << 1) + 1] = poolStart(&tempPool); 1240 poolFinish(&tempPool); 1241 } 1242 else 1243 poolDiscard(&tempPool); 1244 } 1245 else if (tagName) { 1246 appAtts[(i << 1) + 1] = poolStoreString(&tempPool, enc, atts[i].valuePtr, atts[i].valueEnd); 1247 if (appAtts[(i << 1) + 1] == 0) 1248 return XML_ERROR_NO_MEMORY; 1249 poolFinish(&tempPool); 1250 } 1251 } 1252 if (tagName) { 1253 int j; 1254 for (j = 0; j < nDefaultAtts; j++) { 1255 const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + j; 1256 if (!(da->id->name)[-1] && da->value) { 1257 (da->id->name)[-1] = 1; 1258 appAtts[i << 1] = da->id->name; 1259 appAtts[(i << 1) + 1] = da->value; 1260 i++; 1261 } 1262 } 1263 appAtts[i << 1] = 0; 1264 } 1265 while (i-- > 0) 1266 ((XML_Char *)appAtts[i << 1])[-1] = 0; 1267 return XML_ERROR_NONE; 1268} 1269 1270/* The idea here is to avoid using stack for each CDATA section when 1271the whole file is parsed with one call. */ 1272 1273static 1274enum XML_Error cdataSectionProcessor(XML_Parser parser, 1275 const char *start, 1276 const char *end, 1277 const char **endPtr) 1278{ 1279 enum XML_Error result = doCdataSection(parser, encoding, &start, end, endPtr); 1280 if (start) { 1281 processor = contentProcessor; 1282 return contentProcessor(parser, start, end, endPtr); 1283 } 1284 return result; 1285} 1286 1287/* startPtr gets set to non-null is the section is closed, and to null if 1288the section is not yet closed. */ 1289 1290static 1291enum XML_Error doCdataSection(XML_Parser parser, 1292 const ENCODING *enc, 1293 const char **startPtr, 1294 const char *end, 1295 const char **nextPtr) 1296{ 1297 const char *s = *startPtr; 1298 const char *dummy; 1299 const char **eventPP; 1300 const char **eventEndPP; 1301 if (enc == encoding) { 1302 eventPP = &eventPtr; 1303 *eventPP = s; 1304 eventEndPP = &eventEndPtr; 1305 } 1306 else 1307 eventPP = eventEndPP = &dummy; 1308 *startPtr = 0; 1309 for (;;) { 1310 const char *next; 1311 int tok = XmlCdataSectionTok(enc, s, end, &next); 1312 *eventEndPP = next; 1313 switch (tok) { 1314 case XML_TOK_CDATA_SECT_CLOSE: 1315 if (characterDataHandler) 1316 characterDataHandler(handlerArg, dataBuf, 0); 1317 else if (defaultHandler) 1318 reportDefault(parser, enc, s, next); 1319 *startPtr = next; 1320 return XML_ERROR_NONE; 1321 case XML_TOK_DATA_NEWLINE: 1322 if (characterDataHandler) { 1323 XML_Char c = XML_T('\n'); 1324 characterDataHandler(handlerArg, &c, 1); 1325 } 1326 else if (defaultHandler) 1327 reportDefault(parser, enc, s, next); 1328 break; 1329 case XML_TOK_DATA_CHARS: 1330 if (characterDataHandler) { 1331 if (MUST_CONVERT(enc, s)) { 1332 for (;;) { 1333 ICHAR *dataPtr = (ICHAR *)dataBuf; 1334 XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd); 1335 *eventEndPP = next; 1336 characterDataHandler(handlerArg, dataBuf, (size_t)(dataPtr - (ICHAR *)dataBuf)); 1337 if (s == next) 1338 break; 1339 *eventPP = s; 1340 } 1341 } 1342 else 1343 characterDataHandler(handlerArg, 1344 (XML_Char *)s, 1345 (size_t)((XML_Char *)next - (XML_Char *)s)); 1346 } 1347 else if (defaultHandler) 1348 reportDefault(parser, enc, s, next); 1349 break; 1350 case XML_TOK_INVALID: 1351 *eventPP = next; 1352 return XML_ERROR_INVALID_TOKEN; 1353 case XML_TOK_PARTIAL_CHAR: 1354 if (nextPtr) { 1355 *nextPtr = s; 1356 return XML_ERROR_NONE; 1357 } 1358 return XML_ERROR_PARTIAL_CHAR; 1359 case XML_TOK_PARTIAL: 1360 case XML_TOK_NONE: 1361 if (nextPtr) { 1362 *nextPtr = s; 1363 return XML_ERROR_NONE; 1364 } 1365 return XML_ERROR_UNCLOSED_CDATA_SECTION; 1366 default: 1367 abort(); 1368 } 1369 *eventPP = s = next; 1370 } 1371 /* not reached */ 1372} 1373 1374static enum XML_Error 1375initializeEncoding(XML_Parser parser) 1376{ 1377 const char *s; 1378#ifdef XML_UNICODE 1379 char encodingBuf[128]; 1380 if (!protocolEncodingName) 1381 s = 0; 1382 else { 1383 int i; 1384 for (i = 0; protocolEncodingName[i]; i++) { 1385 if (i == sizeof(encodingBuf) - 1 1386 || protocolEncodingName[i] >= 0x80 1387 || protocolEncodingName[i] < 0) { 1388 encodingBuf[0] = '\0'; 1389 break; 1390 } 1391 encodingBuf[i] = (char)protocolEncodingName[i]; 1392 } 1393 encodingBuf[i] = '\0'; 1394 s = encodingBuf; 1395 } 1396#else 1397 s = protocolEncodingName; 1398#endif 1399 if (XmlInitEncoding(&initEncoding, &encoding, s)) 1400 return XML_ERROR_NONE; 1401 return handleUnknownEncoding(parser, protocolEncodingName); 1402} 1403 1404static enum XML_Error 1405processXmlDecl(XML_Parser parser, int isGeneralTextEntity, 1406 const char *s, const char *next) 1407{ 1408 const char *encodingName = 0; 1409 const ENCODING *newEncoding = 0; 1410 const char *version; 1411 int standalone = -1; 1412 if (!XmlParseXmlDecl(isGeneralTextEntity, 1413 encoding, 1414 s, 1415 next, 1416 &eventPtr, 1417 &version, 1418 &encodingName, 1419 &newEncoding, 1420 &standalone)) 1421 return XML_ERROR_SYNTAX; 1422 if (defaultHandler) 1423 reportDefault(parser, encoding, s, next); 1424 if (!protocolEncodingName) { 1425 if (newEncoding) { 1426 if (newEncoding->minBytesPerChar != encoding->minBytesPerChar) { 1427 eventPtr = encodingName; 1428 return XML_ERROR_INCORRECT_ENCODING; 1429 } 1430 encoding = newEncoding; 1431 } 1432 else if (encodingName) { 1433 enum XML_Error result; 1434 const XML_Char *s = poolStoreString(&tempPool, 1435 encoding, 1436 encodingName, 1437 encodingName 1438 + XmlNameLength(encoding, encodingName)); 1439 if (!s) 1440 return XML_ERROR_NO_MEMORY; 1441 result = handleUnknownEncoding(parser, s); 1442 poolDiscard(&tempPool); 1443 if (result == XML_ERROR_UNKNOWN_ENCODING) 1444 eventPtr = encodingName; 1445 return result; 1446 } 1447 } 1448 if (!isGeneralTextEntity && standalone == 1) 1449 dtd.standalone = 1; 1450 return XML_ERROR_NONE; 1451} 1452 1453static enum XML_Error 1454handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) 1455{ 1456 if (unknownEncodingHandler) { 1457 XML_Encoding info; 1458 int i; 1459 for (i = 0; i < 256; i++) 1460 info.map[i] = -1; 1461 info.convert = 0; 1462 info.data = 0; 1463 info.release = 0; 1464 if (unknownEncodingHandler(unknownEncodingHandlerData, encodingName, &info)) { 1465 ENCODING *enc; 1466 unknownEncodingMem = malloc((size_t)XmlSizeOfUnknownEncoding()); 1467 if (!unknownEncodingMem) { 1468 if (info.release) 1469 info.release(info.data); 1470 return XML_ERROR_NO_MEMORY; 1471 } 1472 enc = XmlInitUnknownEncoding(unknownEncodingMem, 1473 info.map, 1474 info.convert, 1475 info.data); 1476 if (enc) { 1477 unknownEncodingData = info.data; 1478 unknownEncodingRelease = info.release; 1479 encoding = enc; 1480 return XML_ERROR_NONE; 1481 } 1482 } 1483 if (info.release) 1484 info.release(info.data); 1485 } 1486 return XML_ERROR_UNKNOWN_ENCODING; 1487} 1488 1489static enum XML_Error 1490prologInitProcessor(XML_Parser parser, 1491 const char *s, 1492 const char *end, 1493 const char **nextPtr) 1494{ 1495 enum XML_Error result = initializeEncoding(parser); 1496 if (result != XML_ERROR_NONE) 1497 return result; 1498 processor = prologProcessor; 1499 return prologProcessor(parser, s, end, nextPtr); 1500} 1501 1502static enum XML_Error 1503prologProcessor(XML_Parser parser, 1504 const char *s, 1505 const char *end, 1506 const char **nextPtr) 1507{ 1508 for (;;) { 1509 const char *next; 1510 int tok = XmlPrologTok(encoding, s, end, &next); 1511 if (tok <= 0) { 1512 if (nextPtr != 0 && tok != XML_TOK_INVALID) { 1513 *nextPtr = s; 1514 return XML_ERROR_NONE; 1515 } 1516 switch (tok) { 1517 case XML_TOK_INVALID: 1518 eventPtr = next; 1519 return XML_ERROR_INVALID_TOKEN; 1520 case XML_TOK_NONE: 1521 return XML_ERROR_NO_ELEMENTS; 1522 case XML_TOK_PARTIAL: 1523 return XML_ERROR_UNCLOSED_TOKEN; 1524 case XML_TOK_PARTIAL_CHAR: 1525 return XML_ERROR_PARTIAL_CHAR; 1526 case XML_TOK_TRAILING_CR: 1527 eventPtr = s + encoding->minBytesPerChar; 1528 return XML_ERROR_NO_ELEMENTS; 1529 default: 1530 abort(); 1531 } 1532 } 1533 switch (XmlTokenRole(&prologState, tok, s, next, encoding)) { 1534 case XML_ROLE_XML_DECL: 1535 { 1536 enum XML_Error result = processXmlDecl(parser, 0, s, next); 1537 if (result != XML_ERROR_NONE) 1538 return result; 1539 } 1540 break; 1541 case XML_ROLE_DOCTYPE_SYSTEM_ID: 1542 hadExternalDoctype = 1; 1543 break; 1544 case XML_ROLE_DOCTYPE_PUBLIC_ID: 1545 case XML_ROLE_ENTITY_PUBLIC_ID: 1546 if (!XmlIsPublicId(encoding, s, next, &eventPtr)) 1547 return XML_ERROR_SYNTAX; 1548 if (declEntity) { 1549 XML_Char *tem = poolStoreString(&dtd.pool, 1550 encoding, 1551 s + encoding->minBytesPerChar, 1552 next - encoding->minBytesPerChar); 1553 if (!tem) 1554 return XML_ERROR_NO_MEMORY; 1555 normalizePublicId(tem); 1556 declEntity->publicId = tem; 1557 poolFinish(&dtd.pool); 1558 } 1559 break; 1560 case XML_ROLE_INSTANCE_START: 1561 processor = contentProcessor; 1562 if (hadExternalDoctype) 1563 dtd.complete = 0; 1564 return contentProcessor(parser, s, end, nextPtr); 1565 case XML_ROLE_ATTLIST_ELEMENT_NAME: 1566 { 1567 const XML_Char *name = poolStoreString(&dtd.pool, encoding, s, next); 1568 if (!name) 1569 return XML_ERROR_NO_MEMORY; 1570 declElementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, name, sizeof(ELEMENT_TYPE)); 1571 if (!declElementType) 1572 return XML_ERROR_NO_MEMORY; 1573 if (declElementType->name != name) 1574 poolDiscard(&dtd.pool); 1575 else 1576 poolFinish(&dtd.pool); 1577 break; 1578 } 1579 case XML_ROLE_ATTRIBUTE_NAME: 1580 declAttributeId = getAttributeId(parser, encoding, s, next); 1581 if (!declAttributeId) 1582 return XML_ERROR_NO_MEMORY; 1583 declAttributeIsCdata = 0; 1584 break; 1585 case XML_ROLE_ATTRIBUTE_TYPE_CDATA: 1586 declAttributeIsCdata = 1; 1587 break; 1588 case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE: 1589 case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE: 1590 if (dtd.complete 1591 && !defineAttribute(declElementType, declAttributeId, declAttributeIsCdata, 0)) 1592 return XML_ERROR_NO_MEMORY; 1593 break; 1594 case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE: 1595 case XML_ROLE_FIXED_ATTRIBUTE_VALUE: 1596 { 1597 const XML_Char *attVal; 1598 enum XML_Error result 1599 = storeAttributeValue(parser, encoding, declAttributeIsCdata, 1600 s + encoding->minBytesPerChar, 1601 next - encoding->minBytesPerChar, 1602 &dtd.pool); 1603 if (result) 1604 return result; 1605 attVal = poolStart(&dtd.pool); 1606 poolFinish(&dtd.pool); 1607 if (dtd.complete 1608 && !defineAttribute(declElementType, declAttributeId, declAttributeIsCdata, attVal)) 1609 return XML_ERROR_NO_MEMORY; 1610 break; 1611 } 1612 case XML_ROLE_ENTITY_VALUE: 1613 { 1614 enum XML_Error result = storeEntityValue(parser, s, next); 1615 if (result != XML_ERROR_NONE) 1616 return result; 1617 } 1618 break; 1619 case XML_ROLE_ENTITY_SYSTEM_ID: 1620 if (declEntity) { 1621 declEntity->systemId = poolStoreString(&dtd.pool, encoding, 1622 s + encoding->minBytesPerChar, 1623 next - encoding->minBytesPerChar); 1624 if (!declEntity->systemId) 1625 return XML_ERROR_NO_MEMORY; 1626 declEntity->base = dtd.base; 1627 poolFinish(&dtd.pool); 1628 } 1629 break; 1630 case XML_ROLE_ENTITY_NOTATION_NAME: 1631 if (declEntity) { 1632 declEntity->notation = poolStoreString(&dtd.pool, encoding, s, next); 1633 if (!declEntity->notation) 1634 return XML_ERROR_NO_MEMORY; 1635 poolFinish(&dtd.pool); 1636 if (unparsedEntityDeclHandler) { 1637 eventPtr = eventEndPtr = s; 1638 unparsedEntityDeclHandler(handlerArg, 1639 declEntity->name, 1640 declEntity->base, 1641 declEntity->systemId, 1642 declEntity->publicId, 1643 declEntity->notation); 1644 } 1645 1646 } 1647 break; 1648 case XML_ROLE_GENERAL_ENTITY_NAME: 1649 { 1650 const XML_Char *name; 1651 if (XmlPredefinedEntityName(encoding, s, next)) { 1652 declEntity = 0; 1653 break; 1654 } 1655 name = poolStoreString(&dtd.pool, encoding, s, next); 1656 if (!name) 1657 return XML_ERROR_NO_MEMORY; 1658 if (dtd.complete) { 1659 declEntity = (ENTITY *)lookup(&dtd.generalEntities, name, sizeof(ENTITY)); 1660 if (!declEntity) 1661 return XML_ERROR_NO_MEMORY; 1662 if (declEntity->name != name) { 1663 poolDiscard(&dtd.pool); 1664 declEntity = 0; 1665 } 1666 else 1667 poolFinish(&dtd.pool); 1668 } 1669 else { 1670 poolDiscard(&dtd.pool); 1671 declEntity = 0; 1672 } 1673 } 1674 break; 1675 case XML_ROLE_PARAM_ENTITY_NAME: 1676 declEntity = 0; 1677 break; 1678 case XML_ROLE_NOTATION_NAME: 1679 declNotationPublicId = 0; 1680 declNotationName = 0; 1681 if (notationDeclHandler) { 1682 declNotationName = poolStoreString(&tempPool, encoding, s, next); 1683 if (!declNotationName) 1684 return XML_ERROR_NO_MEMORY; 1685 poolFinish(&tempPool); 1686 } 1687 break; 1688 case XML_ROLE_NOTATION_PUBLIC_ID: 1689 if (!XmlIsPublicId(encoding, s, next, &eventPtr)) 1690 return XML_ERROR_SYNTAX; 1691 if (declNotationName) { 1692 XML_Char *tem = poolStoreString(&tempPool, 1693 encoding, 1694 s + encoding->minBytesPerChar, 1695 next - encoding->minBytesPerChar); 1696 if (!tem) 1697 return XML_ERROR_NO_MEMORY; 1698 normalizePublicId(tem); 1699 declNotationPublicId = tem; 1700 poolFinish(&tempPool); 1701 } 1702 break; 1703 case XML_ROLE_NOTATION_SYSTEM_ID: 1704 if (declNotationName && notationDeclHandler) { 1705 const XML_Char *systemId 1706 = poolStoreString(&tempPool, encoding, 1707 s + encoding->minBytesPerChar, 1708 next - encoding->minBytesPerChar); 1709 if (!systemId) 1710 return XML_ERROR_NO_MEMORY; 1711 eventPtr = eventEndPtr = s; 1712 notationDeclHandler(handlerArg, 1713 declNotationName, 1714 dtd.base, 1715 systemId, 1716 declNotationPublicId); 1717 } 1718 poolClear(&tempPool); 1719 break; 1720 case XML_ROLE_NOTATION_NO_SYSTEM_ID: 1721 if (declNotationPublicId && notationDeclHandler) { 1722 eventPtr = eventEndPtr = s; 1723 notationDeclHandler(handlerArg, 1724 declNotationName, 1725 dtd.base, 1726 0, 1727 declNotationPublicId); 1728 } 1729 poolClear(&tempPool); 1730 break; 1731 case XML_ROLE_ERROR: 1732 eventPtr = s; 1733 switch (tok) { 1734 case XML_TOK_PARAM_ENTITY_REF: 1735 return XML_ERROR_PARAM_ENTITY_REF; 1736 case XML_TOK_XML_DECL: 1737 return XML_ERROR_MISPLACED_XML_PI; 1738 default: 1739 return XML_ERROR_SYNTAX; 1740 } 1741 case XML_ROLE_GROUP_OPEN: 1742 if (prologState.level >= groupSize) { 1743 if (groupSize) 1744 groupConnector = realloc(groupConnector, groupSize *= 2); 1745 else 1746 groupConnector = malloc(groupSize = 32); 1747 if (!groupConnector) 1748 return XML_ERROR_NO_MEMORY; 1749 } 1750 groupConnector[prologState.level] = 0; 1751 break; 1752 case XML_ROLE_GROUP_SEQUENCE: 1753 if (groupConnector[prologState.level] == '|') { 1754 eventPtr = s; 1755 return XML_ERROR_SYNTAX; 1756 } 1757 groupConnector[prologState.level] = ','; 1758 break; 1759 case XML_ROLE_GROUP_CHOICE: 1760 if (groupConnector[prologState.level] == ',') { 1761 eventPtr = s; 1762 return XML_ERROR_SYNTAX; 1763 } 1764 groupConnector[prologState.level] = '|'; 1765 break; 1766 case XML_ROLE_PARAM_ENTITY_REF: 1767 dtd.complete = 0; 1768 break; 1769 case XML_ROLE_NONE: 1770 switch (tok) { 1771 case XML_TOK_PI: 1772 eventPtr = s; 1773 eventEndPtr = next; 1774 if (!reportProcessingInstruction(parser, encoding, s, next)) 1775 return XML_ERROR_NO_MEMORY; 1776 break; 1777 } 1778 break; 1779 } 1780 if (defaultHandler) { 1781 switch (tok) { 1782 case XML_TOK_PI: 1783 case XML_TOK_BOM: 1784 case XML_TOK_XML_DECL: 1785 break; 1786 default: 1787 eventPtr = s; 1788 eventEndPtr = next; 1789 reportDefault(parser, encoding, s, next); 1790 } 1791 } 1792 s = next; 1793 } 1794 /* not reached */ 1795} 1796 1797static 1798enum XML_Error epilogProcessor(XML_Parser parser, 1799 const char *s, 1800 const char *end, 1801 const char **nextPtr) 1802{ 1803 processor = epilogProcessor; 1804 eventPtr = s; 1805 for (;;) { 1806 const char *next; 1807 int tok = XmlPrologTok(encoding, s, end, &next); 1808 eventEndPtr = next; 1809 switch (tok) { 1810 case XML_TOK_TRAILING_CR: 1811 if (defaultHandler) { 1812 eventEndPtr = end; 1813 reportDefault(parser, encoding, s, end); 1814 } 1815 /* fall through */ 1816 case XML_TOK_NONE: 1817 if (nextPtr) 1818 *nextPtr = end; 1819 return XML_ERROR_NONE; 1820 case XML_TOK_PROLOG_S: 1821 case XML_TOK_COMMENT: 1822 if (defaultHandler) 1823 reportDefault(parser, encoding, s, next); 1824 break; 1825 case XML_TOK_PI: 1826 if (!reportProcessingInstruction(parser, encoding, s, next)) 1827 return XML_ERROR_NO_MEMORY; 1828 break; 1829 case XML_TOK_INVALID: 1830 eventPtr = next; 1831 return XML_ERROR_INVALID_TOKEN; 1832 case XML_TOK_PARTIAL: 1833 if (nextPtr) { 1834 *nextPtr = s; 1835 return XML_ERROR_NONE; 1836 } 1837 return XML_ERROR_UNCLOSED_TOKEN; 1838 case XML_TOK_PARTIAL_CHAR: 1839 if (nextPtr) { 1840 *nextPtr = s; 1841 return XML_ERROR_NONE; 1842 } 1843 return XML_ERROR_PARTIAL_CHAR; 1844 default: 1845 return XML_ERROR_JUNK_AFTER_DOC_ELEMENT; 1846 } 1847 eventPtr = s = next; 1848 } 1849} 1850 1851static 1852enum XML_Error errorProcessor(XML_Parser parser, 1853 const char *s, 1854 const char *end, 1855 const char **nextPtr) 1856{ 1857 return errorCode; 1858} 1859 1860static enum XML_Error 1861storeAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata, 1862 const char *ptr, const char *end, 1863 STRING_POOL *pool) 1864{ 1865 enum XML_Error result = appendAttributeValue(parser, enc, isCdata, ptr, end, pool); 1866 if (result) 1867 return result; 1868 if (!isCdata && poolLength(pool) && poolLastChar(pool) == XML_T(' ')) 1869 poolChop(pool); 1870 if (!poolAppendChar(pool, XML_T('\0'))) 1871 return XML_ERROR_NO_MEMORY; 1872 return XML_ERROR_NONE; 1873} 1874 1875static enum XML_Error 1876appendAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata, 1877 const char *ptr, const char *end, 1878 STRING_POOL *pool) 1879{ 1880 const ENCODING *internalEnc = XmlGetInternalEncoding(); 1881 for (;;) { 1882 const char *next; 1883 int tok = XmlAttributeValueTok(enc, ptr, end, &next); 1884 switch (tok) { 1885 case XML_TOK_NONE: 1886 return XML_ERROR_NONE; 1887 case XML_TOK_INVALID: 1888 if (enc == encoding) 1889 eventPtr = next; 1890 return XML_ERROR_INVALID_TOKEN; 1891 case XML_TOK_PARTIAL: 1892 if (enc == encoding) 1893 eventPtr = ptr; 1894 return XML_ERROR_INVALID_TOKEN; 1895 case XML_TOK_CHAR_REF: 1896 { 1897 XML_Char buf[XML_ENCODE_MAX]; 1898 int i; 1899 int n = XmlCharRefNumber(enc, ptr); 1900 if (n < 0) { 1901 if (enc == encoding) 1902 eventPtr = ptr; 1903 return XML_ERROR_BAD_CHAR_REF; 1904 } 1905 if (!isCdata 1906 && n == 0x20 /* space */ 1907 && (poolLength(pool) == 0 || poolLastChar(pool) == XML_T(' '))) 1908 break; 1909 n = XmlEncode(n, (ICHAR *)buf); 1910 if (!n) { 1911 if (enc == encoding) 1912 eventPtr = ptr; 1913 return XML_ERROR_BAD_CHAR_REF; 1914 } 1915 for (i = 0; i < n; i++) { 1916 if (!poolAppendChar(pool, buf[i])) 1917 return XML_ERROR_NO_MEMORY; 1918 } 1919 } 1920 break; 1921 case XML_TOK_DATA_CHARS: 1922 if (!poolAppend(pool, enc, ptr, next)) 1923 return XML_ERROR_NO_MEMORY; 1924 break; 1925 break; 1926 case XML_TOK_TRAILING_CR: 1927 next = ptr + enc->minBytesPerChar; 1928 /* fall through */ 1929 case XML_TOK_ATTRIBUTE_VALUE_S: 1930 case XML_TOK_DATA_NEWLINE: 1931 if (!isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == XML_T(' '))) 1932 break; 1933 if (!poolAppendChar(pool, XML_T(' '))) 1934 return XML_ERROR_NO_MEMORY; 1935 break; 1936 case XML_TOK_ENTITY_REF: 1937 { 1938 const XML_Char *name; 1939 ENTITY *entity; 1940 XML_Char ch = XmlPredefinedEntityName(enc, 1941 ptr + enc->minBytesPerChar, 1942 next - enc->minBytesPerChar); 1943 if (ch) { 1944 if (!poolAppendChar(pool, ch)) 1945 return XML_ERROR_NO_MEMORY; 1946 break; 1947 } 1948 name = poolStoreString(&temp2Pool, enc, 1949 ptr + enc->minBytesPerChar, 1950 next - enc->minBytesPerChar); 1951 if (!name) 1952 return XML_ERROR_NO_MEMORY; 1953 entity = (ENTITY *)lookup(&dtd.generalEntities, name, 0); 1954 poolDiscard(&temp2Pool); 1955 if (!entity) { 1956 if (dtd.complete) { 1957 if (enc == encoding) 1958 eventPtr = ptr; 1959 return XML_ERROR_UNDEFINED_ENTITY; 1960 } 1961 } 1962 else if (entity->open) { 1963 if (enc == encoding) 1964 eventPtr = ptr; 1965 return XML_ERROR_RECURSIVE_ENTITY_REF; 1966 } 1967 else if (entity->notation) { 1968 if (enc == encoding) 1969 eventPtr = ptr; 1970 return XML_ERROR_BINARY_ENTITY_REF; 1971 } 1972 else if (!entity->textPtr) { 1973 if (enc == encoding) 1974 eventPtr = ptr; 1975 return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF; 1976 } 1977 else { 1978 enum XML_Error result; 1979 const XML_Char *textEnd = entity->textPtr + entity->textLen; 1980 entity->open = 1; 1981 result = appendAttributeValue(parser, internalEnc, isCdata, (char *)entity->textPtr, (char *)textEnd, pool); 1982 entity->open = 0; 1983 if (result) 1984 return result; 1985 } 1986 } 1987 break; 1988 default: 1989 abort(); 1990 } 1991 ptr = next; 1992 } 1993 /* not reached */ 1994} 1995 1996static 1997enum XML_Error storeEntityValue(XML_Parser parser, 1998 const char *entityTextPtr, 1999 const char *entityTextEnd) 2000{ 2001 /*const ENCODING *internalEnc = XmlGetInternalEncoding();*/ 2002 STRING_POOL *pool = &(dtd.pool); 2003 entityTextPtr += encoding->minBytesPerChar; 2004 entityTextEnd -= encoding->minBytesPerChar; 2005 for (;;) { 2006 const char *next; 2007 int tok = XmlEntityValueTok(encoding, entityTextPtr, entityTextEnd, &next); 2008 switch (tok) { 2009 case XML_TOK_PARAM_ENTITY_REF: 2010 eventPtr = entityTextPtr; 2011 return XML_ERROR_SYNTAX; 2012 case XML_TOK_NONE: 2013 if (declEntity) { 2014 declEntity->textPtr = pool->start; 2015 declEntity->textLen = pool->ptr - pool->start; 2016 poolFinish(pool); 2017 } 2018 else 2019 poolDiscard(pool); 2020 return XML_ERROR_NONE; 2021 case XML_TOK_ENTITY_REF: 2022 case XML_TOK_DATA_CHARS: 2023 if (!poolAppend(pool, encoding, entityTextPtr, next)) 2024 return XML_ERROR_NO_MEMORY; 2025 break; 2026 case XML_TOK_TRAILING_CR: 2027 next = entityTextPtr + encoding->minBytesPerChar; 2028 /* fall through */ 2029 case XML_TOK_DATA_NEWLINE: 2030 if (pool->end == pool->ptr && !poolGrow(pool)) 2031 return XML_ERROR_NO_MEMORY; 2032 *(pool->ptr)++ = XML_T('\n'); 2033 break; 2034 case XML_TOK_CHAR_REF: 2035 { 2036 XML_Char buf[XML_ENCODE_MAX]; 2037 int i; 2038 int n = XmlCharRefNumber(encoding, entityTextPtr); 2039 if (n < 0) { 2040 eventPtr = entityTextPtr; 2041 return XML_ERROR_BAD_CHAR_REF; 2042 } 2043 n = XmlEncode(n, (ICHAR *)buf); 2044 if (!n) { 2045 eventPtr = entityTextPtr; 2046 return XML_ERROR_BAD_CHAR_REF; 2047 } 2048 for (i = 0; i < n; i++) { 2049 if (pool->end == pool->ptr && !poolGrow(pool)) 2050 return XML_ERROR_NO_MEMORY; 2051 *(pool->ptr)++ = buf[i]; 2052 } 2053 } 2054 break; 2055 case XML_TOK_PARTIAL: 2056 eventPtr = entityTextPtr; 2057 return XML_ERROR_INVALID_TOKEN; 2058 case XML_TOK_INVALID: 2059 eventPtr = next; 2060 return XML_ERROR_INVALID_TOKEN; 2061 default: 2062 abort(); 2063 } 2064 entityTextPtr = next; 2065 } 2066 /* not reached */ 2067} 2068 2069static void 2070normalizeLines(XML_Char *s) 2071{ 2072 XML_Char *p; 2073 for (;; s++) { 2074 if (*s == XML_T('\0')) 2075 return; 2076 if (*s == XML_T('\r')) 2077 break; 2078 } 2079 p = s; 2080 do { 2081 if (*s == XML_T('\r')) { 2082 *p++ = XML_T('\n'); 2083 if (*++s == XML_T('\n')) 2084 s++; 2085 } 2086 else 2087 *p++ = *s++; 2088 } while (*s); 2089 *p = XML_T('\0'); 2090} 2091 2092static int 2093reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end) 2094{ 2095 const XML_Char *target; 2096 XML_Char *data; 2097 const char *tem; 2098 if (!processingInstructionHandler) { 2099 if (defaultHandler) 2100 reportDefault(parser, enc, start, end); 2101 return 1; 2102 } 2103 start += enc->minBytesPerChar * 2; 2104 tem = start + XmlNameLength(enc, start); 2105 target = poolStoreString(&tempPool, enc, start, tem); 2106 if (!target) 2107 return 0; 2108 poolFinish(&tempPool); 2109 data = poolStoreString(&tempPool, enc, 2110 XmlSkipS(enc, tem), 2111 end - enc->minBytesPerChar*2); 2112 if (!data) 2113 return 0; 2114 normalizeLines(data); 2115 processingInstructionHandler(handlerArg, target, data); 2116 poolClear(&tempPool); 2117 return 1; 2118} 2119 2120static void 2121reportDefault(XML_Parser parser, const ENCODING *enc, const char *s, const char *end) 2122{ 2123 if (MUST_CONVERT(enc, s)) { 2124 for (;;) { 2125 ICHAR *dataPtr = (ICHAR *)dataBuf; 2126 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd); 2127 if (s == end) { 2128 defaultHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf); 2129 break; 2130 } 2131 if (enc == encoding) { 2132 eventEndPtr = s; 2133 defaultHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf); 2134 eventPtr = s; 2135 } 2136 else 2137 defaultHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf); 2138 } 2139 } 2140 else 2141 defaultHandler(handlerArg, (XML_Char *)s, (XML_Char *)end - (XML_Char *)s); 2142} 2143 2144 2145static int 2146defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, int isCdata, const XML_Char *value) 2147{ 2148 DEFAULT_ATTRIBUTE *att; 2149 if (type->nDefaultAtts == type->allocDefaultAtts) { 2150 if (type->allocDefaultAtts == 0) { 2151 type->allocDefaultAtts = 8; 2152 type->defaultAtts = malloc(type->allocDefaultAtts*sizeof(DEFAULT_ATTRIBUTE)); 2153 } 2154 else { 2155 type->allocDefaultAtts *= 2; 2156 type->defaultAtts = realloc(type->defaultAtts, 2157 type->allocDefaultAtts*sizeof(DEFAULT_ATTRIBUTE)); 2158 } 2159 if (!type->defaultAtts) 2160 return 0; 2161 } 2162 att = type->defaultAtts + type->nDefaultAtts; 2163 att->id = attId; 2164 att->value = value; 2165 att->isCdata = isCdata; 2166 if (!isCdata) 2167 attId->maybeTokenized = 1; 2168 type->nDefaultAtts += 1; 2169 return 1; 2170} 2171 2172static ATTRIBUTE_ID * 2173getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end) 2174{ 2175 ATTRIBUTE_ID *id; 2176 const XML_Char *name; 2177 if (!poolAppendChar(&dtd.pool, XML_T('\0'))) 2178 return 0; 2179 name = poolStoreString(&dtd.pool, enc, start, end); 2180 if (!name) 2181 return 0; 2182 ++name; 2183 id = (ATTRIBUTE_ID *)lookup(&dtd.attributeIds, name, sizeof(ATTRIBUTE_ID)); 2184 if (!id) 2185 return 0; 2186 if (id->name != name) 2187 poolDiscard(&dtd.pool); 2188 else 2189 poolFinish(&dtd.pool); 2190 return id; 2191} 2192 2193static 2194const XML_Char *getOpenEntityNames(XML_Parser parser) 2195{ 2196 HASH_TABLE_ITER iter; 2197 2198 hashTableIterInit(&iter, &(dtd.generalEntities)); 2199 for (;;) { 2200 const XML_Char *s; 2201 ENTITY *e = (ENTITY *)hashTableIterNext(&iter); 2202 if (!e) 2203 break; 2204 if (!e->open) 2205 continue; 2206 if (poolLength(&tempPool) > 0 && !poolAppendChar(&tempPool, XML_T(' '))) 2207 return 0; 2208 for (s = e->name; *s; s++) 2209 if (!poolAppendChar(&tempPool, *s)) 2210 return 0; 2211 } 2212 2213 if (!poolAppendChar(&tempPool, XML_T('\0'))) 2214 return 0; 2215 return tempPool.start; 2216} 2217 2218static 2219int setOpenEntityNames(XML_Parser parser, const XML_Char *openEntityNames) 2220{ 2221 const XML_Char *s = openEntityNames; 2222 while (*openEntityNames != XML_T('\0')) { 2223 if (*s == XML_T(' ') || *s == XML_T('\0')) { 2224 ENTITY *e; 2225 if (!poolAppendChar(&tempPool, XML_T('\0'))) 2226 return 0; 2227 e = (ENTITY *)lookup(&dtd.generalEntities, poolStart(&tempPool), 0); 2228 if (e) 2229 e->open = 1; 2230 if (*s == XML_T(' ')) 2231 s++; 2232 openEntityNames = s; 2233 poolDiscard(&tempPool); 2234 } 2235 else { 2236 if (!poolAppendChar(&tempPool, *s)) 2237 return 0; 2238 s++; 2239 } 2240 } 2241 return 1; 2242} 2243 2244 2245static 2246void normalizePublicId(XML_Char *publicId) 2247{ 2248 XML_Char *p = publicId; 2249 XML_Char *s; 2250 for (s = publicId; *s; s++) { 2251 switch (*s) { 2252 case XML_T(' '): 2253 case XML_T('\r'): 2254 case XML_T('\n'): 2255 if (p != publicId && p[-1] != XML_T(' ')) 2256 *p++ = XML_T(' '); 2257 break; 2258 default: 2259 *p++ = *s; 2260 } 2261 } 2262 if (p != publicId && p[-1] == XML_T(' ')) 2263 --p; 2264 *p = XML_T('\0'); 2265} 2266 2267static int dtdInit(DTD *p) 2268{ 2269 poolInit(&(p->pool)); 2270 hashTableInit(&(p->generalEntities)); 2271 hashTableInit(&(p->elementTypes)); 2272 hashTableInit(&(p->attributeIds)); 2273 p->complete = 1; 2274 p->base = 0; 2275 return 1; 2276} 2277 2278static void dtdDestroy(DTD *p) 2279{ 2280 HASH_TABLE_ITER iter; 2281 hashTableIterInit(&iter, &(p->elementTypes)); 2282 for (;;) { 2283 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter); 2284 if (!e) 2285 break; 2286 if (e->allocDefaultAtts != 0) 2287 free(e->defaultAtts); 2288 } 2289 hashTableDestroy(&(p->generalEntities)); 2290 hashTableDestroy(&(p->elementTypes)); 2291 hashTableDestroy(&(p->attributeIds)); 2292 poolDestroy(&(p->pool)); 2293} 2294 2295/* Do a deep copy of the DTD. Return 0 for out of memory; non-zero otherwise. 2296The new DTD has already been initialized. */ 2297 2298static int dtdCopy(DTD *newDtd, const DTD *oldDtd) 2299{ 2300 HASH_TABLE_ITER iter; 2301 2302 if (oldDtd->base) { 2303 const XML_Char *tem = poolCopyString(&(newDtd->pool), oldDtd->base); 2304 if (!tem) 2305 return 0; 2306 newDtd->base = tem; 2307 } 2308 2309 hashTableIterInit(&iter, &(oldDtd->attributeIds)); 2310 2311 /* Copy the attribute id table. */ 2312 2313 for (;;) { 2314 ATTRIBUTE_ID *newA; 2315 const XML_Char *name; 2316 const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter); 2317 2318 if (!oldA) 2319 break; 2320 /* Remember to allocate the scratch byte before the name. */ 2321 if (!poolAppendChar(&(newDtd->pool), XML_T('\0'))) 2322 return 0; 2323 name = poolCopyString(&(newDtd->pool), oldA->name); 2324 if (!name) 2325 return 0; 2326 ++name; 2327 newA = (ATTRIBUTE_ID *)lookup(&(newDtd->attributeIds), name, sizeof(ATTRIBUTE_ID)); 2328 if (!newA) 2329 return 0; 2330 newA->maybeTokenized = oldA->maybeTokenized; 2331 } 2332 2333 /* Copy the element type table. */ 2334 2335 hashTableIterInit(&iter, &(oldDtd->elementTypes)); 2336 2337 for (;;) { 2338 int i; 2339 ELEMENT_TYPE *newE; 2340 const XML_Char *name; 2341 const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter); 2342 if (!oldE) 2343 break; 2344 name = poolCopyString(&(newDtd->pool), oldE->name); 2345 if (!name) 2346 return 0; 2347 newE = (ELEMENT_TYPE *)lookup(&(newDtd->elementTypes), name, sizeof(ELEMENT_TYPE)); 2348 if (!newE) 2349 return 0; 2350 newE->defaultAtts = (DEFAULT_ATTRIBUTE *)malloc(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); 2351 if (!newE->defaultAtts) 2352 return 0; 2353 newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts; 2354 for (i = 0; i < newE->nDefaultAtts; i++) { 2355 newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup(&(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0); 2356 newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata; 2357 newE->defaultAtts[i].value = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value); 2358 if (!newE->defaultAtts[i].value) 2359 return 0; 2360 } 2361 } 2362 2363 /* Copy the entity table. */ 2364 2365 hashTableIterInit(&iter, &(oldDtd->generalEntities)); 2366 2367 for (;;) { 2368 ENTITY *newE; 2369 const XML_Char *name; 2370 const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter); 2371 if (!oldE) 2372 break; 2373 name = poolCopyString(&(newDtd->pool), oldE->name); 2374 if (!name) 2375 return 0; 2376 newE = (ENTITY *)lookup(&(newDtd->generalEntities), name, sizeof(ENTITY)); 2377 if (!newE) 2378 return 0; 2379 if (oldE->systemId) { 2380 const XML_Char *tem = poolCopyString(&(newDtd->pool), oldE->systemId); 2381 if (!tem) 2382 return 0; 2383 newE->systemId = tem; 2384 if (oldE->base) { 2385 if (oldE->base == oldDtd->base) 2386 newE->base = newDtd->base; 2387 tem = poolCopyString(&(newDtd->pool), oldE->base); 2388 if (!tem) 2389 return 0; 2390 newE->base = tem; 2391 } 2392 } 2393 else { 2394 const XML_Char *tem = poolCopyStringN(&(newDtd->pool), oldE->textPtr, oldE->textLen); 2395 if (!tem) 2396 return 0; 2397 newE->textPtr = tem; 2398 newE->textLen = oldE->textLen; 2399 } 2400 if (oldE->notation) { 2401 const XML_Char *tem = poolCopyString(&(newDtd->pool), oldE->notation); 2402 if (!tem) 2403 return 0; 2404 newE->notation = tem; 2405 } 2406 } 2407 2408 newDtd->complete = oldDtd->complete; 2409 newDtd->standalone = oldDtd->standalone; 2410 return 1; 2411} 2412 2413static 2414void poolInit(STRING_POOL *pool) 2415{ 2416 pool->blocks = 0; 2417 pool->freeBlocks = 0; 2418 pool->start = 0; 2419 pool->ptr = 0; 2420 pool->end = 0; 2421} 2422 2423static 2424void poolClear(STRING_POOL *pool) 2425{ 2426 if (!pool->freeBlocks) 2427 pool->freeBlocks = pool->blocks; 2428 else { 2429 BLOCK *p = pool->blocks; 2430 while (p) { 2431 BLOCK *tem = p->next; 2432 p->next = pool->freeBlocks; 2433 pool->freeBlocks = p; 2434 p = tem; 2435 } 2436 } 2437 pool->blocks = 0; 2438 pool->start = 0; 2439 pool->ptr = 0; 2440 pool->end = 0; 2441} 2442 2443static 2444void poolDestroy(STRING_POOL *pool) 2445{ 2446 BLOCK *p = pool->blocks; 2447 while (p) { 2448 BLOCK *tem = p->next; 2449 free(p); 2450 p = tem; 2451 } 2452 pool->blocks = 0; 2453 p = pool->freeBlocks; 2454 while (p) { 2455 BLOCK *tem = p->next; 2456 free(p); 2457 p = tem; 2458 } 2459 pool->freeBlocks = 0; 2460 pool->ptr = 0; 2461 pool->start = 0; 2462 pool->end = 0; 2463} 2464 2465static 2466XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc, 2467 const char *ptr, const char *end) 2468{ 2469 if (!pool->ptr && !poolGrow(pool)) 2470 return 0; 2471 for (;;) { 2472 XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end); 2473 if (ptr == end) 2474 break; 2475 if (!poolGrow(pool)) 2476 return 0; 2477 } 2478 return pool->start; 2479} 2480 2481static const XML_Char *poolCopyString(STRING_POOL *pool, const XML_Char *s) 2482{ 2483 do { 2484 if (!poolAppendChar(pool, *s)) 2485 return 0; 2486 } while (*s++); 2487 s = pool->start; 2488 poolFinish(pool); 2489 return s; 2490} 2491 2492static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) 2493{ 2494 if (!pool->ptr && !poolGrow(pool)) 2495 return 0; 2496 for (; n > 0; --n, s++) { 2497 if (!poolAppendChar(pool, *s)) 2498 return 0; 2499 2500 } 2501 s = pool->start; 2502 poolFinish(pool); 2503 return s; 2504} 2505 2506static 2507XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc, 2508 const char *ptr, const char *end) 2509{ 2510 if (!poolAppend(pool, enc, ptr, end)) 2511 return 0; 2512 if (pool->ptr == pool->end && !poolGrow(pool)) 2513 return 0; 2514 *(pool->ptr)++ = 0; 2515 return pool->start; 2516} 2517 2518static 2519int poolGrow(STRING_POOL *pool) 2520{ 2521 if (pool->freeBlocks) { 2522 if (pool->start == 0) { 2523 pool->blocks = pool->freeBlocks; 2524 pool->freeBlocks = pool->freeBlocks->next; 2525 pool->blocks->next = 0; 2526 pool->start = pool->blocks->s; 2527 pool->end = pool->start + pool->blocks->size; 2528 pool->ptr = pool->start; 2529 return 1; 2530 } 2531 if (pool->end - pool->start < pool->freeBlocks->size) { 2532 BLOCK *tem = pool->freeBlocks->next; 2533 pool->freeBlocks->next = pool->blocks; 2534 pool->blocks = pool->freeBlocks; 2535 pool->freeBlocks = tem; 2536 memcpy(pool->blocks->s, pool->start, (pool->end - pool->start) * sizeof(XML_Char)); 2537 pool->ptr = pool->blocks->s + (pool->ptr - pool->start); 2538 pool->start = pool->blocks->s; 2539 pool->end = pool->start + pool->blocks->size; 2540 return 1; 2541 } 2542 } 2543 if (pool->blocks && pool->start == pool->blocks->s) { 2544 int blockSize = (pool->end - pool->start)*2; 2545 pool->blocks = realloc(pool->blocks, offsetof(BLOCK, s) + blockSize * sizeof(XML_Char)); 2546 if (!pool->blocks) 2547 return 0; 2548 pool->blocks->size = blockSize; 2549 pool->ptr = pool->blocks->s + (pool->ptr - pool->start); 2550 pool->start = pool->blocks->s; 2551 pool->end = pool->start + blockSize; 2552 } 2553 else { 2554 BLOCK *tem; 2555 int blockSize = pool->end - pool->start; 2556 if (blockSize < INIT_BLOCK_SIZE) 2557 blockSize = INIT_BLOCK_SIZE; 2558 else 2559 blockSize *= 2; 2560 tem = malloc(offsetof(BLOCK, s) + blockSize * sizeof(XML_Char)); 2561 if (!tem) 2562 return 0; 2563 tem->size = blockSize; 2564 tem->next = pool->blocks; 2565 pool->blocks = tem; 2566 memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char)); 2567 pool->ptr = tem->s + (pool->ptr - pool->start); 2568 pool->start = tem->s; 2569 pool->end = tem->s + blockSize; 2570 } 2571 return 1; 2572} 2573