1/*---------------------------------------------------------------------------- 2| Copyright (c) 1999 Jochen Loewer (loewerj@hotmail.com) 3|----------------------------------------------------------------------------- 4| 5| 6| A simple (hopefully fast) parser to build up a DOM structure in memory. 7| Initially based on Richard Hipp's XML parser for TMML. 8| 9| 10| The contents of this file are subject to the Mozilla Public License 11| Version 1.1 (the "License"); you may not use this file except in 12| compliance with the License. You may obtain a copy of the License at 13| http://www.mozilla.org/MPL/ 14| 15| Software distributed under the License is distributed on an "AS IS" 16| basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the 17| License for the specific language governing rights and limitations 18| under the License. 19| 20| The Original Code is tDOM. 21| 22| The Initial Developer of the Original Code is Jochen Loewer 23| Portions created by Jochen Loewer are Copyright (C) 1998, 1999 24| Jochen Loewer. All Rights Reserved. 25| 26| Contributor(s): 27| 28| June00 Zoran Vasiljevic Made thread-safe. 29| 30| 31| adopted/written by Jochen Loewer 32| July 1999 33| 34| ------------------------------------------------------------------------ 35| 36| A parser for XML. 37| 38| Copyright (C) 1998 D. Richard Hipp 39| 40| This library is free software; you can redistribute it and/or 41| modify it under the terms of the GNU Library General Public 42| License as published by the Free Software Foundation; either 43| version 2 of the License, or (at your option) any later version. 44| 45| This library is distributed in the hope that it will be useful, 46| but WITHOUT ANY WARRANTY; without even the implied warranty of 47| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 48| Library General Public License for more details. 49| 50| You should have received a copy of the GNU Library General Public 51| License along with this library; if not, write to the 52| Free Software Foundation, Inc., 59 Temple Place - Suite 330, 53| Boston, MA 02111-1307, USA. 54| 55| Author contact information: 56| drh@acm.org 57| http://www.hwaci.com/drh/ 58| 59\---------------------------------------------------------------------------*/ 60 61 62/*---------------------------------------------------------------------------- 63| Includes 64| 65\---------------------------------------------------------------------------*/ 66#include <tcl.h> 67#include <string.h> 68#include <ctype.h> 69#include <dom.h> 70 71/*---------------------------------------------------------------------------- 72| Defines 73| 74\---------------------------------------------------------------------------*/ 75#define DBG(x) 76#define TDOM_NS 77#ifdef TDOM_NS 78# define RetError(m,p) *errStr=m; *pos=p; FREE((char*)activeNS); return TCL_ERROR; 79#else 80# define RetError(m,p) *errStr=m; *pos=p; return TCL_ERROR; 81#endif 82#define SPACE(c) ((c)==' ' || (c)=='\n' || (c)=='\t' || (c)=='\r') 83 84/*--------------------------------------------------------------------------- 85| type domActiveNS 86| 87\--------------------------------------------------------------------------*/ 88typedef struct _domActiveNS { 89 90 int depth; 91 domNS *namespace; 92 93} domActiveNS; 94 95/*---------------------------------------------------------------------------- 96| Begin Character Entity Translator 97| 98| 99| The next section of code implements routines used to translate 100| character entity references into their corresponding strings. 101| 102| Examples: 103| 104| & "&" 105| < "<" 106| > ">" 107| " " 108| 109\---------------------------------------------------------------------------*/ 110 111 112/*---------------------------------------------------------------------------- 113| Each entity reference is recorded as an instance of the following 114| structure 115\---------------------------------------------------------------------------*/ 116typedef struct Er Er; 117struct Er { 118 char *zName; /* The name of this entity reference. ex: "amp" */ 119 char *zValue; /* The value for this entity. ex: "&" */ 120 Er *pNext; /* Next entity with the same hash on zName */ 121}; 122 123 124/*---------------------------------------------------------------------------- 125| The size of the hash table. For best results this should 126| be a prime number which is about the same size as the number of 127| character entity references known to the system. 128| 129\---------------------------------------------------------------------------*/ 130#define ER_HASH_SIZE 7 131 132 133/*---------------------------------------------------------------------------- 134| The following flag is TRUE if entity reference hash table needs 135| to be initialized. 136| 137| Hash table is used read-only, therefore just one copy, protected with 138| mutex when used in threading environments. The mutex is used only for 139| initial setup of the table. 140| 141\---------------------------------------------------------------------------*/ 142static int bErNeedsInit = 1; 143TDomThreaded(static Tcl_Mutex initMutex;) 144 145 146/*---------------------------------------------------------------------------- 147| The hash table 148| 149| If the name of an entity reference hashes to the value H, then 150| apErHash[H] will point to a linked list of Er structures, one of 151| which will be the Er structure for that entity reference 152| 153\---------------------------------------------------------------------------*/ 154static Er *apErHash[ER_HASH_SIZE]; 155 156 157/*---------------------------------------------------------------------------- 158| ErHash -- 159| 160| Hash an entity reference name. The value returned is an 161| integer between 0 and Er_HASH_SIZE-1, inclusive. 162| 163\---------------------------------------------------------------------------*/ 164static int ErHash( 165 const char *zName 166) 167{ 168 int h = 0; /* The hash value to be returned */ 169 char c; /* The next character in the name being hashed */ 170 171 while( (c=*zName)!=0 ){ 172 h = h<<5 ^ h ^ c; 173 zName++; 174 } 175 if( h<0 ) h = -h; 176 return h % ER_HASH_SIZE; 177 178} /* ErHash */ 179 180 181/*---------------------------------------------------------------------------- 182| The following is a table of all entity references. To create 183| new character entities, add entries to this table. 184| 185| Note: For the decoder to work, the name of the entity reference 186| must not be shorter than the value. 187| 188\---------------------------------------------------------------------------*/ 189static Er er_sequences[] = { 190 { "amp", "&", 0 }, 191 { "lt", "<", 0 }, 192 { "gt", ">", 0 }, 193 { "apos", "'", 0 }, 194 { "quot", "\"", 0 }, 195#if TclOnly8Bits 196 { "nbsp", "\240", 0 }, 197#else 198 { "nbsp", "\xC2\xA0", 0 }, 199#endif 200}; 201 202 203/*---------------------------------------------------------------------------- 204| ErInit -- 205| 206| Initialize the entity reference hash table 207| 208\---------------------------------------------------------------------------*/ 209static void ErInit (void) 210{ 211 size_t i; /* For looping thru the list of entity references */ 212 int h; /* The hash on a entity */ 213 214 for(i=0; i<sizeof(er_sequences)/sizeof(er_sequences[0]); i++){ 215 h = ErHash(er_sequences[i].zName); 216 er_sequences[i].pNext = apErHash[h]; 217 apErHash[h] = &er_sequences[i]; 218 } 219 220} /* ErInit */ 221 222 223/*---------------------------------------------------------------------------- 224| TranslateEntityRefs -- 225| 226| Translate entity references and character references in the string 227| "z". "z" is overwritten with the translated sequence. 228| 229| Unrecognized entity references are unaltered. 230| 231| Example: 232| 233| input = "AT&T > MCI" 234| output = "AT&T > MCI" 235| 236\---------------------------------------------------------------------------*/ 237static int TranslateEntityRefs ( 238 char *z, 239 int *newLen 240) 241{ 242 int from; /* Read characters from this position in z[] */ 243 int to; /* Write characters into this position in z[] */ 244 int h; /* A hash on the entity reference */ 245 char *zVal; /* The substituted value */ 246 Er *p; /* For looping down the entity reference collision chain */ 247 int value; 248 249 from = to = 0; 250 251 /*--------------------------------------------- 252 | This is done only once per process 253 \--------------------------------------------*/ 254 255 if (bErNeedsInit) { 256 TDomThreaded(Tcl_MutexLock(&initMutex);) 257 if (bErNeedsInit) { 258 ErInit(); 259 bErNeedsInit = 0; 260 } 261 TDomThreaded(Tcl_MutexUnlock(&initMutex);) 262 } 263 264 while (z[from]) { 265 if (z[from]=='&') { 266 int i = from+1; 267 int c; 268 269 if (z[i] == '#') { 270 /*--------------------------------------------- 271 | convert character reference 272 \--------------------------------------------*/ 273 value = 0; 274 if (z[++i] == 'x') { 275 i++; 276 while (z[i] && (c=z[i]) && (c!=';')) { 277 value = value * 16; 278 if ((c>='0') && (c<='9')) { 279 value += c-'0'; 280 } else 281 if ((c>='A') && (c<='F')) { 282 value += c-'A' + 10; 283 } else 284 if ((c>='a') && (c<='f')) { 285 value += c-'a' + 10; 286 } else { 287 /* error */ 288 return 0; 289 } 290 i++; 291 } 292 } else { 293 while (z[i] && (c=z[i]) && (c!=';')) { 294 value = value * 10; 295 if ((c>='0') && (c<='9')) { 296 value += c-'0'; 297 } else { 298 /* error */ 299 return 0; 300 } 301 i++; 302 } 303 } 304 if (!z[i] || (z[i]!=';')) { 305 return 0; 306 /* error */ 307 } 308 from = i+1; 309#if TclOnly8Bits 310 z[to++] = value; 311#else 312 if (value < 0x80) { 313 z[to++] = value; 314 } else if (value <= 0x7FF) { 315 z[to++] = (char) ((value >> 6) | 0xC0); 316 z[to++] = (char) ((value | 0x80) & 0xBF); 317 } else if (value <= 0xFFFF) { 318 z[to++] = (char) ((value >> 12) | 0xE0); 319 z[to++] = (char) (((value >> 6) | 0x80) & 0xBF); 320 z[to++] = (char) ((value | 0x80) & 0xBF); 321 } else { 322 /* error */ 323 return 0; 324 } 325#endif 326 } else { 327 while (z[i] && isalpha((unsigned char)z[i])) { 328 i++; 329 } 330 if (!z[i] || (z[i]!=';')) { 331 return 0; 332 } 333 c = z[i]; 334 z[i] = 0; 335 h = ErHash(&z[from+1]); 336 p = apErHash[h]; 337 while (p && strcmp(p->zName,&z[from+1])!=0 ) { 338 p = p->pNext; 339 } 340 z[i] = c; 341 if (p) { 342 zVal = p->zValue; 343 while (*zVal) { 344 z[to++] = *(zVal++); 345 } 346 from = i; 347 if (c==';') from++; 348 } else { 349 z[to++] = z[from++]; 350 } 351 } 352 } else { 353 z[to++] = z[from++]; 354 } 355 } 356 z[to] = 0; 357 *newLen = to; 358 return 1; 359} 360/*---------------------------------------------------------------------------- 361| End Of Character Entity Translator 362\---------------------------------------------------------------------------*/ 363 364 365/*--------------------------------------------------------------------------- 366| domIsNamespaceInScope 367| 368\--------------------------------------------------------------------------*/ 369static int 370domIsNamespaceInScope ( 371 domActiveNS *NSstack, 372 int NSstackPos, 373 const char *prefix, 374 const char *namespaceURI 375) 376{ 377 int i; 378 379 for (i = NSstackPos; i >= 0; i--) { 380 if (NSstack[i].namespace->prefix[0] && 381 (strcmp(NSstack[i].namespace->prefix, prefix)==0)) { 382 if (strcmp(NSstack[i].namespace->uri, namespaceURI)==0) { 383 /* OK, exactly the same namespace declaration is in scope */ 384 return 1; 385 } else { 386 /* This prefix is currently assigned to another uri, 387 we need a new NS declaration, to override this one */ 388 return 0; 389 } 390 } 391 } 392 return 0; 393} 394 395/*---------------------------------------------------------------------------- 396| XML_SimpleParse (non recursive) 397| 398| Parses the XML string starting at 'pos' and continuing to the 399| first encountered error. 400| 401\---------------------------------------------------------------------------*/ 402static int 403XML_SimpleParse ( 404 char *xml, /* XML string */ 405 int *pos, /* Index of next unparsed character in xml */ 406 domDocument *doc, 407 domNode *parent_nodeOld, 408 int ignoreWhiteSpaces, 409 char **errStr 410) { 411 register int c; /* Next character of the input file */ 412 register char *pn; 413 register char *x, *start, *piSep; 414 int saved; 415 int hasContent; 416 domNode *node; 417 domNode *parent_node = NULL; 418 domTextNode *tnode; 419 domAttrNode *attrnode, *lastAttr, *attrList; 420 int ampersandSeen = 0; 421 int only_whites = 0; 422 domProcessingInstructionNode *pinode; 423 int hnew; 424 Tcl_HashEntry *h; 425 426#ifdef TDOM_NS 427 int nspos, newNS; 428 int depth = 0; 429 int activeNSpos = -1; 430 int activeNSsize = 8; 431 domActiveNS *activeNS = (domActiveNS*) MALLOC (sizeof(domActiveNS) * activeNSsize); 432 const char *xmlns, *localname; 433 domNS *ns; 434 char tagPrefix[MAX_PREFIX_LEN]; 435 char prefix[MAX_PREFIX_LEN]; 436 domAttrNode *lastNSAttr, *NSattrList; 437#endif 438 439 x = &(xml[*pos]); 440 441 while ( (c=*x)!=0 ) { 442 443 start = x; 444 445 if (c!='<') { 446 /*---------------------------------------------------------------- 447 | read text between tags 448 | 449 \---------------------------------------------------------------*/ 450 ampersandSeen = 0; 451 only_whites = 1; 452 while ( (c=*x)!=0 && c!='<' ) { 453 if (c=='&') ampersandSeen = 1; 454 if ( (c != ' ') && 455 (c != '\t') && 456 (c != '\n') && 457 (c != '\r') ) { 458 only_whites = 0; 459 } 460 x++; 461 } 462 if (!(only_whites && ignoreWhiteSpaces) && parent_node) { 463 /*-------------------------------------------------------- 464 | allocate new TEXT node 465 \-------------------------------------------------------*/ 466 tnode = (domTextNode*) domAlloc(sizeof(domTextNode)); 467 memset(tnode, 0, sizeof(domTextNode)); 468 tnode->nodeType = TEXT_NODE; 469 tnode->nodeFlags = 0; 470 tnode->namespace = 0; 471 tnode->ownerDocument = doc; 472 tnode->nodeNumber = NODE_NO(doc); 473 tnode->valueLength = (x - start); 474 tnode->nodeValue = (char*)MALLOC((x - start)+1); 475 memmove(tnode->nodeValue, start, (x - start)); 476 *(tnode->nodeValue + (x - start)) = 0; 477 if (ampersandSeen) { 478 if (!TranslateEntityRefs(tnode->nodeValue, 479 &(tnode->valueLength) )) { 480 RetError("Entity parsing error", (x - xml)); 481 } 482 } 483 tnode->parentNode = parent_node; 484 if (parent_node->firstChild) { 485 parent_node->lastChild->nextSibling = (domNode*)tnode; 486 tnode->previousSibling = parent_node->lastChild; 487 parent_node->lastChild = (domNode*)tnode; 488 } else { 489 parent_node->firstChild = parent_node->lastChild = 490 (domNode*)tnode; 491 } 492 } 493 494 } else if (x[1]=='/') { 495 /*------------------------------------------------------------ 496 | read and check closing tag 497 \-----------------------------------------------------------*/ 498 node = parent_node; 499 if (!parent_node) { 500 RetError("Syntax error",(x - xml)); 501 } 502 parent_node = node->parentNode; 503 pn = (char*)node->nodeName; 504 505 x += 2; 506 while (*x == *pn) { x++; pn++; } 507 if ( *pn || (*x!='>' && !SPACE(*x) ) ) { 508 RetError("Unterminated element",(x - xml)); 509 } 510 while (SPACE(*x)) { 511 x++; 512 } 513 if (*x=='>') { 514 x++; 515 } else { 516 RetError("Missing \">\"",(x - xml)-1); 517 } 518#ifdef TDOM_NS 519 depth--; 520 /* pop active namespaces */ 521 while ( (activeNSpos >= 0) && 522 (activeNS[activeNSpos].depth == depth) ) 523 { 524 activeNSpos--; 525 } 526#endif 527 if (parent_node == NULL) { 528 /* we return to main node and so finished parsing */ 529#ifdef TDOM_NS 530 FREE ((char *) activeNS); 531#endif 532 return TCL_OK; 533 } 534 continue; 535 536 } else { 537 538 x++; 539 if (*x=='!') { 540 if (x[1]=='-' && x[2]=='-') { 541 /*-------------------------------------------------------- 542 | read over a comment 543 \-------------------------------------------------------*/ 544 x += 3; 545 while ( (c=*x)!=0 && 546 (c!='-' || x[1]!='-' || x[2]!='>')) { 547 x++; 548 } 549 if (*x) { 550 /*---------------------------------------------------- 551 | allocate new COMMENT node for comments 552 \---------------------------------------------------*/ 553 tnode = (domTextNode*) domAlloc(sizeof(domTextNode)); 554 memset(tnode, 0, sizeof(domTextNode)); 555 tnode->nodeType = COMMENT_NODE; 556 tnode->nodeFlags = 0; 557 tnode->namespace = 0; 558 tnode->ownerDocument = doc; 559 tnode->nodeNumber = NODE_NO(doc); 560 tnode->parentNode = parent_node; 561 tnode->valueLength = x - start - 4; 562 tnode->nodeValue = (char*)MALLOC(tnode->valueLength+1); 563 memmove(tnode->nodeValue, start+4, tnode->valueLength); 564 *(tnode->nodeValue + tnode->valueLength) = 0; 565 if (parent_node == NULL) { 566 if (doc->rootNode->lastChild) { 567 tnode->previousSibling = 568 doc->rootNode->lastChild; 569 doc->rootNode->lastChild->nextSibling 570 = (domNode*)tnode; 571 } else { 572 doc->rootNode->firstChild = (domNode*) tnode; 573 } 574 doc->rootNode->lastChild = (domNode*) tnode; 575 } else { 576 if (parent_node->firstChild) { 577 parent_node->lastChild->nextSibling = (domNode*)tnode; 578 tnode->previousSibling = parent_node->lastChild; 579 parent_node->lastChild = (domNode*)tnode; 580 } else { 581 parent_node->firstChild = parent_node->lastChild = (domNode*)tnode; 582 } 583 } 584 x += 3; 585 } else { 586 RetError("Unterminated comment",(start-xml)); 587 } 588 continue; 589 590 } else if (x[1]=='D' && x[2]=='O' && 591 x[3]=='C' && x[4]=='T' && 592 x[5]=='Y' && x[6]=='P' && x[7]=='E' ) { 593 /*-------------------------------------------------------- 594 | read over a DOCTYPE definition 595 \-------------------------------------------------------*/ 596 x += 8; 597 start = x; 598 while (*x!=0) { 599 if (*x=='[') { 600 x++; 601 while ((*x!=0) && (*x!=']')) x++; 602 } else 603 if (*x=='>') { 604 break; 605 } else { 606 x++; 607 } 608 } 609 if (*x) { 610 x++; 611 } else { 612 RetError("Unterminated DOCTYPE definition",(start-xml)); 613 } 614 continue; 615 616 } else if (x[1]=='[' && x[2]=='C' && 617 x[3]=='D' && x[4]=='A' && 618 x[5]=='T' && x[6]=='A' && x[7]=='[' ) { 619 /*-------------------------------------------------------- 620 | read over a <![CDATA[ section 621 \-------------------------------------------------------*/ 622 x += 8; 623 start = x; 624 while ( (*x!=0) && 625 ((*x!=']') || (x[1]!=']') || (x[2]!='>'))) { 626 x++; 627 } 628 if (*x) { 629 if (parent_node && (x - start)) { 630 /*---------------------------------------------------- 631 | allocate new TEXT node for CDATA section data 632 \---------------------------------------------------*/ 633 tnode = (domTextNode*) domAlloc(sizeof(domTextNode)); 634 memset(tnode, 0, sizeof(domTextNode)); 635 tnode->nodeType = TEXT_NODE; 636 tnode->nodeFlags = 0; 637 tnode->namespace = 0; 638 tnode->ownerDocument = doc; 639 tnode->nodeNumber = NODE_NO(doc); 640 tnode->parentNode = parent_node; 641 tnode->valueLength = (x - start); 642 tnode->nodeValue = (char*)MALLOC((x - start)+1); 643 memmove(tnode->nodeValue, start, (x - start)); 644 *(tnode->nodeValue + (x - start)) = 0; 645 if (parent_node->firstChild) { 646 parent_node->lastChild->nextSibling = (domNode*)tnode; 647 tnode->previousSibling = parent_node->lastChild; 648 parent_node->lastChild = (domNode*)tnode; 649 } else { 650 parent_node->firstChild = parent_node->lastChild = (domNode*)tnode; 651 } 652 } 653 x += 3; 654 } else { 655 RetError("Unterminated CDATA definition",(start-xml) ); 656 } 657 continue; 658 } else { 659 RetError("Incorrect <!... tag",(start-xml) ); 660 } 661 662 } else if (*x=='?') { 663 /*-------------------------------------------------------- 664 | read over a processing instructions(PI) / XMLDecl 665 \-------------------------------------------------------*/ 666 x++; 667 start = x; 668 while ( (c=*x)!=0 && 669 (c!='?' || x[1]!='>')) { 670 x++; 671 } 672 if (*x) { 673 /*------------------------------------------------------------ 674 | allocate new PI node for processing instruction section 675 \-----------------------------------------------------------*/ 676 pinode = (domProcessingInstructionNode*) 677 domAlloc(sizeof(domProcessingInstructionNode)); 678 memset(pinode, 0, sizeof(domProcessingInstructionNode)); 679 pinode->nodeType = PROCESSING_INSTRUCTION_NODE; 680 pinode->nodeFlags = 0; 681 pinode->namespace = 0; 682 pinode->ownerDocument = doc; 683 pinode->nodeNumber = NODE_NO(doc); 684 pinode->parentNode = parent_node; 685 686 /*------------------------------------------------- 687 | extract PI target 688 \------------------------------------------------*/ 689 piSep = start; 690 while ( (c=*piSep)!=0 && !SPACE(c) && 691 (c!='?' || piSep[1]!='>')) { 692 piSep++; 693 } 694 *piSep = '\0'; /* temporarily terminate the string */ 695 696 pinode->targetLength = strlen(start); 697 pinode->targetValue = (char*)MALLOC(pinode->targetLength); 698 memmove(pinode->targetValue, start, pinode->targetLength); 699 700 *piSep = c; /* remove temporarily termination */ 701 702 /*------------------------------------------------- 703 | extract PI data 704 \------------------------------------------------*/ 705 while (SPACE(*piSep)) { 706 piSep++; 707 } 708 pinode->dataLength = x - piSep; 709 pinode->dataValue = (char*)MALLOC(pinode->dataLength); 710 memmove(pinode->dataValue, piSep, pinode->dataLength); 711 712 if (parent_node == NULL) { 713 if (doc->rootNode->lastChild) { 714 pinode->previousSibling = doc->rootNode->lastChild; 715 doc->rootNode->lastChild->nextSibling 716 = (domNode*) pinode; 717 } else { 718 doc->rootNode->firstChild = (domNode*) pinode; 719 } 720 doc->rootNode->lastChild = (domNode*) pinode; 721 } else { 722 if (parent_node->firstChild) { 723 parent_node->lastChild->nextSibling = (domNode*)pinode; 724 pinode->previousSibling = parent_node->lastChild; 725 parent_node->lastChild = (domNode*)pinode; 726 } else { 727 parent_node->firstChild = parent_node->lastChild = (domNode*)pinode; 728 } 729 } 730 x += 2; 731 } else { 732 RetError("Unterminated processing instruction(PI)",(start-xml) ); 733 } 734 continue; 735 } 736 737 /*---------------------------------------------------------------- 738 | new tag/element 739 | 740 \---------------------------------------------------------------*/ 741 hasContent = 1; 742 while ((c=*x)!=0 && c!='/' && c!='>' && !SPACE(c) ) { 743 x++; 744 } 745 if (c==0) { 746 RetError("Missing \">\"",(start-xml) ); 747 } 748 if ( (x-start)==1) { 749 RetError("Null markup name",(start-xml) ); 750 } 751 *x = '\0'; /* temporarily terminate the string */ 752 753 /*------------------------------------------------------ 754 | create new DOM element node 755 \-----------------------------------------------------*/ 756 h = Tcl_CreateHashEntry(&HASHTAB(doc,tdom_tagNames), start+1, 757 &hnew); 758 node = (domNode*) domAlloc(sizeof(domNode)); 759 memset(node, 0, sizeof(domNode)); 760 node->nodeType = ELEMENT_NODE; 761 node->nodeFlags = 0; 762 node->namespace = 0; 763 node->nodeName = (char *)&(h->key); 764 node->ownerDocument = doc; 765 node->nodeNumber = NODE_NO(doc); 766 node->ownerDocument = doc; 767 768 if (parent_node == NULL) { 769 if (doc->rootNode->lastChild) { 770 node->previousSibling = doc->rootNode->lastChild; 771 doc->rootNode->lastChild->nextSibling = node; 772 } else { 773 doc->rootNode->firstChild = node; 774 } 775 doc->rootNode->lastChild = node; 776 } else { 777 node->parentNode = parent_node; 778 if (parent_node->firstChild) { 779 parent_node->lastChild->nextSibling = node; 780 node->previousSibling = parent_node->lastChild; 781 parent_node->lastChild = node; 782 } else { 783 parent_node->firstChild = parent_node->lastChild = node; 784 } 785 } 786 787 *x = c; /* remove temporarily termination */ 788 789 while (SPACE(*x) ) { 790 x++; 791 } 792 /*----------------------------------------------------------- 793 | read attribute name-value pairs 794 \----------------------------------------------------------*/ 795 lastAttr = NULL; 796 attrList = NULL; 797#ifdef TDOM_NS 798 lastNSAttr = NULL; 799 NSattrList = NULL; 800#endif 801 while ( (c=*x) && (c!='/') && (c!='>') ) { 802 char *ArgName = x; 803 int nArgName; 804 char *ArgVal = NULL; 805 int nArgVal = 0; 806 807 while ((c=*x)!=0 && c!='=' && c!='>' && !SPACE(c) ) { 808 x++; 809 } 810 nArgName = x - ArgName; 811 while (SPACE(*x)) { 812 x++; 813 } 814 if (*x=='=') { 815 x++; 816 } 817 saved = *(ArgName + nArgName); 818 *(ArgName + nArgName) = '\0'; /* terminate arg name */ 819 820 while (SPACE(*x)) { 821 x++; 822 } 823 if (*x=='>' || *x==0) { 824 ArgVal = ArgName; 825 nArgVal = nArgName; 826 } else if ((c=*x)=='\"' || c=='\'') { 827 register int cDelim = c; 828 x++; 829 ArgVal = x; 830 ampersandSeen = 0; 831 while ((c=*x)!=0 && c!=cDelim) { 832 if (c=='&') { 833 ampersandSeen = 1; 834 } 835 x++; 836 } 837 nArgVal = x - ArgVal; 838 if (c==0) { 839 RetError("Unterminated string",(ArgVal - xml - 1) ); 840 } else { 841 x++; 842 } 843 } else if (c!=0 && c!='>') { 844 ArgVal = x; 845 while ((c=*x)!=0 && c!='>' && !SPACE(c)) { 846 if (c=='&') { 847 ampersandSeen = 1; 848 } 849 x++; 850 } 851 if (c==0) { 852 RetError("Missing \">\"",(start-xml)); 853 } 854 nArgVal = x - ArgVal; 855 } 856 857 858#ifdef TDOM_NS 859 /*------------------------------------------------------------ 860 | handle namespace attributes or normal ones 861 \------------------------------------------------------------*/ 862 if (strncmp((char *)ArgName, "xmlns", 5) == 0) { 863 xmlns = ArgName; 864 newNS = 1; 865 866 h = Tcl_CreateHashEntry(&HASHTAB(doc, tdom_attrNames), 867 ArgName, &hnew); 868 attrnode = (domAttrNode*) domAlloc(sizeof(domAttrNode)); 869 memset(attrnode, 0, sizeof(domAttrNode)); 870 attrnode->parentNode = node; 871 attrnode->nodeName = (char *)&(h->key); 872 attrnode->nodeType = ATTRIBUTE_NODE; 873 attrnode->nodeFlags = IS_NS_NODE; 874 attrnode->nodeValue = (char*)MALLOC(nArgVal+1); 875 attrnode->valueLength = nArgVal; 876 memmove(attrnode->nodeValue, ArgVal, nArgVal); 877 *(attrnode->nodeValue + nArgVal) = 0; 878 if (ampersandSeen) { 879 if (!TranslateEntityRefs(attrnode->nodeValue, 880 &(attrnode->valueLength) )) { 881 RetError("Entity parsing error",(start-xml)); 882 } 883 } 884 885 if (xmlns[5] == ':') { 886 if (domIsNamespaceInScope (activeNS, activeNSpos, 887 &(xmlns[6]), 888 (char*)attrnode->nodeValue)) 889 { 890 ns = domLookupPrefix (node, &(xmlns[6])); 891 newNS = 0; 892 } else { 893 ns = domNewNamespace(doc, &(xmlns[6]), 894 (char*)attrnode->nodeValue); 895 } 896 } else { 897 ns = domNewNamespace(doc, "", 898 (char*)attrnode->nodeValue); 899 } 900 attrnode->namespace = ns->index; 901 if (newNS) { 902 /* push active namespace */ 903 activeNSpos++; 904 if (activeNSpos >= activeNSsize) { 905 activeNS = (domActiveNS*) REALLOC( 906 (char*)activeNS, 907 sizeof(domActiveNS) * 2 * activeNSsize); 908 activeNSsize = 2 * activeNSsize; 909 } 910 activeNS[activeNSpos].depth = depth; 911 activeNS[activeNSpos].namespace = ns; 912 } 913 914 if (NSattrList) { 915 lastNSAttr->nextSibling = attrnode; 916 } else { 917 NSattrList = attrnode; 918 } 919 lastNSAttr = attrnode; 920 921 922 } else { 923#endif 924 925 /*------------------------------------------------------------ 926 | allocate new attribute node 927 \------------------------------------------------------------*/ 928 h = Tcl_CreateHashEntry(&HASHTAB(doc,tdom_attrNames), 929 ArgName, &hnew); 930 attrnode = (domAttrNode*) domAlloc(sizeof(domAttrNode)); 931 memset(attrnode, 0, sizeof(domAttrNode)); 932 attrnode->parentNode = node; 933 attrnode->nodeName = (char *)&(h->key); 934 attrnode->nodeType = ATTRIBUTE_NODE; 935 attrnode->nodeFlags = 0; 936 attrnode->nodeValue = (char*)MALLOC(nArgVal+1); 937 attrnode->valueLength = nArgVal; 938 memmove(attrnode->nodeValue, ArgVal, nArgVal); 939 *(attrnode->nodeValue + nArgVal) = 0; 940 if (ampersandSeen) { 941 if (!TranslateEntityRefs(attrnode->nodeValue, 942 &(attrnode->valueLength) )) { 943 RetError("Entity parsing error", (start - xml)); 944 } 945 } 946 if (attrList) { 947 lastAttr->nextSibling = attrnode; 948 } else { 949 attrList = attrnode; 950 } 951 lastAttr = attrnode; 952#ifdef TDOM_NS 953 } 954#endif 955 *(ArgName + nArgName) = saved; 956 while (SPACE(*x)) { 957 x++; 958 } 959 } 960 961#ifdef TDOM_NS 962 /*---------------------------------------------------------- 963 | look for namespace of element 964 \---------------------------------------------------------*/ 965 domSplitQName (node->nodeName, tagPrefix, 966 &localname); 967 for (nspos = activeNSpos; nspos >= 0; nspos--) { 968 if ( ((tagPrefix[0] == '\0') && (activeNS[nspos].namespace->prefix[0] == '\0')) 969 || ((tagPrefix[0] != '\0') && (activeNS[nspos].namespace->prefix[0] != '\0') 970 && (strcmp(tagPrefix, activeNS[nspos].namespace->prefix) == 0)) 971 ) { 972 if (activeNS[nspos].namespace->prefix[0] == '\0' 973 && activeNS[nspos].namespace->uri[0] == '\0' 974 && tagPrefix[0] == '\0') 975 { 976 /* xml-names rec. 5.2: "The default namespace can be 977 set to the empty string. This has the same effect, 978 within the scope of the declaration, of there being 979 no default namespace." */ 980 break; 981 } 982 node->namespace = activeNS[nspos].namespace->index; 983 DBG(fprintf(stderr, "tag='%s' uri='%s' \n",node->nodeName, 984 activeNS[nspos].namespace->uri); 985 ) 986 break; 987 } 988 } 989 990 /*---------------------------------------------------------- 991 | look for attribute namespace 992 \---------------------------------------------------------*/ 993 attrnode = attrList; 994 while (attrnode) { 995 domSplitQName ((char*)attrnode->nodeName, prefix, &localname); 996 if (prefix[0] != '\0') { 997 for (nspos = activeNSpos; nspos >= 0; nspos--) { 998 if ( ((prefix[0] == '\0') && (activeNS[nspos].namespace->prefix[0] == '\0')) 999 || ((prefix[0] != '\0') && (activeNS[nspos].namespace->prefix[0] != '\0') 1000 && (strcmp(prefix, activeNS[nspos].namespace->prefix) == 0)) 1001 ) { 1002 attrnode->namespace = activeNS[nspos].namespace->index; 1003 DBG(fprintf(stderr, "attr='%s' uri='%s' \n", 1004 attrnode->nodeName, 1005 activeNS[nspos].namespace->uri); 1006 ) 1007 break; 1008 } 1009 } 1010 } 1011 attrnode = attrnode->nextSibling; 1012 } 1013 if (lastNSAttr) { 1014 node->firstAttr = NSattrList; 1015 lastNSAttr->nextSibling = attrList; 1016 } else { 1017 node->firstAttr = attrList; 1018 } 1019#else 1020 node->firstAttr = attrList; 1021 1022#endif 1023 if (*x=='/') { 1024 hasContent = 0; 1025 x++; 1026 if (*x!='>') { 1027 RetError("Syntax Error",(x - xml - 1) ); 1028 } 1029 } 1030 if (x[1] == 0) { 1031#ifdef TDOM_NS 1032 FREE ((char *) activeNS); 1033#endif 1034 return TCL_OK; 1035 } 1036 if (*x=='>') { 1037 x++; 1038 } 1039 if (hasContent) { 1040#ifdef TDOM_NS 1041 depth++; 1042#endif 1043 /*------------------------------------------------------------ 1044 | recurs to read child tags/texts 1045 \-----------------------------------------------------------*/ 1046 parent_node = node; 1047 } 1048 } 1049 } 1050 RetError("Unexpected end",(x - xml) ); 1051 1052} /* XML_SimpleParse */ 1053 1054 1055 1056/*---------------------------------------------------------------------------- 1057| XML_SimpleParseDocument 1058| 1059| Create a document, parses the XML string starting at 'pos' and 1060| continuing to the first encountered error. 1061| 1062\---------------------------------------------------------------------------*/ 1063domDocument * 1064XML_SimpleParseDocument ( 1065 char *xml, /* Complete text of the file being parsed */ 1066 int ignoreWhiteSpaces, 1067 char *baseURI, 1068 char *extResolver, 1069 int *pos, 1070 char **errStr 1071) { 1072 domDocument *doc = domCreateDoc(baseURI, 0); 1073 1074 if (extResolver) { 1075 doc->extResolver = extResolver; 1076 } 1077 1078 *pos = 0; 1079 XML_SimpleParse (xml, pos, doc, NULL, ignoreWhiteSpaces, errStr); 1080 domSetDocumentElement (doc); 1081 1082 return doc; 1083 1084} /* XML_SimpleParseDocument */ 1085 1086