1/*----------------------------------------------------------------------------
2|   Copyright (c) 1999  Jochen Loewer (loewerj@hotmail.com)
3|-----------------------------------------------------------------------------
4|
5|
6|   A simple (hopefully fast) parser to build up a DOM structure in memory.
7|   Initially based on Richard Hipp's XML parser for TMML.
8|
9|
10|   The contents of this file are subject to the Mozilla Public License
11|   Version 1.1 (the "License"); you may not use this file except in
12|   compliance with the License. You may obtain a copy of the License at
13|   http://www.mozilla.org/MPL/
14|
15|   Software distributed under the License is distributed on an "AS IS"
16|   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
17|   License for the specific language governing rights and limitations
18|   under the License.
19|
20|   The Original Code is tDOM.
21|
22|   The Initial Developer of the Original Code is Jochen Loewer
23|   Portions created by Jochen Loewer are Copyright (C) 1998, 1999
24|   Jochen Loewer. All Rights Reserved.
25|
26|   Contributor(s):
27|
28|       June00  Zoran Vasiljevic  Made thread-safe.
29|
30|
31|   adopted/written by Jochen Loewer
32|   July 1999
33|
34|   ------------------------------------------------------------------------
35|
36|   A parser for XML.
37|
38|   Copyright (C) 1998 D. Richard Hipp
39|
40|   This library is free software; you can redistribute it and/or
41|   modify it under the terms of the GNU Library General Public
42|   License as published by the Free Software Foundation; either
43|   version 2 of the License, or (at your option) any later version.
44|
45|   This library is distributed in the hope that it will be useful,
46|   but WITHOUT ANY WARRANTY; without even the implied warranty of
47|   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
48|   Library General Public License for more details.
49|
50|   You should have received a copy of the GNU Library General Public
51|   License along with this library; if not, write to the
52|   Free Software Foundation, Inc., 59 Temple Place - Suite 330,
53|   Boston, MA  02111-1307, USA.
54|
55|   Author contact information:
56|     drh@acm.org
57|     http://www.hwaci.com/drh/
58|
59\---------------------------------------------------------------------------*/
60
61
62/*----------------------------------------------------------------------------
63|   Includes
64|
65\---------------------------------------------------------------------------*/
66#include <tcl.h>
67#include <string.h>
68#include <ctype.h>
69#include <dom.h>
70
71/*----------------------------------------------------------------------------
72|   Defines
73|
74\---------------------------------------------------------------------------*/
75#define DBG(x)
76#define TDOM_NS
77#ifdef TDOM_NS
78# define RetError(m,p) *errStr=m; *pos=p; FREE((char*)activeNS); return TCL_ERROR;
79#else
80# define RetError(m,p) *errStr=m; *pos=p; return TCL_ERROR;
81#endif
82#define SPACE(c)       ((c)==' ' || (c)=='\n' || (c)=='\t' || (c)=='\r')
83
84/*---------------------------------------------------------------------------
85|   type domActiveNS
86|
87\--------------------------------------------------------------------------*/
88typedef struct _domActiveNS {
89
90    int    depth;
91    domNS *namespace;
92
93} domActiveNS;
94
95/*----------------------------------------------------------------------------
96|   Begin Character Entity Translator
97|
98|
99|   The next section of code implements routines used to translate
100|   character entity references into their corresponding strings.
101|
102|   Examples:
103|
104|         &amp;          "&"
105|         &lt;           "<"
106|         &gt;           ">"
107|         &nbsp;         " "
108|
109\---------------------------------------------------------------------------*/
110
111
112/*----------------------------------------------------------------------------
113|   Each entity reference is recorded as an instance of the following
114|   structure
115\---------------------------------------------------------------------------*/
116typedef struct Er Er;
117struct Er {
118    char *zName;     /* The name of this entity reference.  ex:  "amp" */
119    char *zValue;    /* The value for this entity.          ex:  "&"   */
120    Er *pNext;       /* Next entity with the same hash on zName        */
121};
122
123
124/*----------------------------------------------------------------------------
125|   The size of the hash table.  For best results this should
126|   be a prime number which is about the same size as the number of
127|   character entity references known to the system.
128|
129\---------------------------------------------------------------------------*/
130#define ER_HASH_SIZE 7
131
132
133/*----------------------------------------------------------------------------
134|   The following flag is TRUE if entity reference hash table needs
135|   to be initialized.
136|
137|   Hash table is used read-only, therefore just one copy, protected with
138|   mutex when used in threading environments. The mutex is used only for
139|   initial setup of the table.
140|
141\---------------------------------------------------------------------------*/
142static int bErNeedsInit = 1;
143TDomThreaded(static Tcl_Mutex initMutex;)
144
145
146/*----------------------------------------------------------------------------
147|   The hash table
148|
149|   If the name of an entity reference hashes to the value H, then
150|   apErHash[H] will point to a linked list of Er structures, one of
151|   which will be the Er structure for that entity reference
152|
153\---------------------------------------------------------------------------*/
154static Er *apErHash[ER_HASH_SIZE];
155
156
157/*----------------------------------------------------------------------------
158|   ErHash  --
159|
160|       Hash an entity reference name.  The value returned is an
161|       integer between 0 and Er_HASH_SIZE-1, inclusive.
162|
163\---------------------------------------------------------------------------*/
164static int ErHash(
165    const char *zName
166)
167{
168    int h = 0;      /* The hash value to be returned */
169    char c;         /* The next character in the name being hashed */
170
171    while( (c=*zName)!=0 ){
172        h = h<<5 ^ h ^ c;
173        zName++;
174    }
175    if( h<0 ) h = -h;
176    return h % ER_HASH_SIZE;
177
178} /* ErHash */
179
180
181/*----------------------------------------------------------------------------
182|   The following is a table of all entity references.  To create
183|   new character entities, add entries to this table.
184|
185|   Note: For the decoder to work, the name of the entity reference
186|   must not be shorter than the value.
187|
188\---------------------------------------------------------------------------*/
189static Er er_sequences[] = {
190    { "amp",       "&",        0 },
191    { "lt",        "<",        0 },
192    { "gt",        ">",        0 },
193    { "apos",      "'",        0 },
194    { "quot",      "\"",       0 },
195#if TclOnly8Bits
196    { "nbsp",      "\240",     0 },
197#else
198    { "nbsp",      "\xC2\xA0",    0 },
199#endif
200};
201
202
203/*----------------------------------------------------------------------------
204|   ErInit --
205|
206|       Initialize the entity reference hash table
207|
208\---------------------------------------------------------------------------*/
209static void ErInit (void)
210{
211    size_t i;  /* For looping thru the list of entity references */
212    int h;  /* The hash on a entity */
213
214    for(i=0; i<sizeof(er_sequences)/sizeof(er_sequences[0]); i++){
215        h = ErHash(er_sequences[i].zName);
216        er_sequences[i].pNext = apErHash[h];
217        apErHash[h] = &er_sequences[i];
218    }
219
220} /* ErInit */
221
222
223/*----------------------------------------------------------------------------
224|    TranslateEntityRefs  --
225|
226|        Translate entity references and character references in the string
227|        "z".  "z" is overwritten with the translated sequence.
228|
229|        Unrecognized entity references are unaltered.
230|
231|        Example:
232|
233|          input =    "AT&amp;T &gt MCI"
234|          output =   "AT&T > MCI"
235|
236\---------------------------------------------------------------------------*/
237static int TranslateEntityRefs (
238    char *z,
239    int  *newLen
240)
241{
242    int from;    /* Read characters from this position in z[] */
243    int to;      /* Write characters into this position in z[] */
244    int h;       /* A hash on the entity reference */
245    char *zVal;  /* The substituted value */
246    Er *p;       /* For looping down the entity reference collision chain */
247    int value;
248
249    from = to = 0;
250
251    /*---------------------------------------------
252     |   This is done only once per process
253     \--------------------------------------------*/
254
255    if (bErNeedsInit) {
256        TDomThreaded(Tcl_MutexLock(&initMutex);)
257        if (bErNeedsInit) {
258            ErInit();
259            bErNeedsInit = 0;
260        }
261        TDomThreaded(Tcl_MutexUnlock(&initMutex);)
262    }
263
264    while (z[from]) {
265        if (z[from]=='&') {
266            int i = from+1;
267            int c;
268
269            if (z[i] == '#') {
270                /*---------------------------------------------
271                |   convert character reference
272                \--------------------------------------------*/
273                value = 0;
274                if (z[++i] == 'x') {
275                    i++;
276                    while (z[i] && (c=z[i]) && (c!=';')) {
277                        value = value * 16;
278                        if ((c>='0') && (c<='9')) {
279                            value += c-'0';
280                        } else
281                        if ((c>='A') && (c<='F')) {
282                            value += c-'A' + 10;
283                        } else
284                        if ((c>='a') && (c<='f')) {
285                            value += c-'a' + 10;
286                        } else {
287                            /* error */
288                            return 0;
289                        }
290                        i++;
291                    }
292                } else {
293                    while (z[i] && (c=z[i]) && (c!=';')) {
294                        value = value * 10;
295                        if ((c>='0') && (c<='9')) {
296                            value += c-'0';
297                        } else {
298                            /* error */
299                            return 0;
300                        }
301                        i++;
302                    }
303                }
304                if (!z[i] || (z[i]!=';')) {
305                    return 0;
306                    /* error */
307                }
308                from = i+1;
309#if TclOnly8Bits
310                z[to++] = value;
311#else
312                if (value < 0x80) {
313                    z[to++] = value;
314                } else if (value <= 0x7FF) {
315                    z[to++] = (char) ((value >> 6) | 0xC0);
316                    z[to++] = (char) ((value | 0x80) & 0xBF);
317                } else if (value <= 0xFFFF) {
318                    z[to++] = (char) ((value >> 12) | 0xE0);
319                    z[to++] = (char) (((value >> 6) | 0x80) & 0xBF);
320                    z[to++] = (char) ((value | 0x80) & 0xBF);
321                } else {
322                    /* error */
323                    return 0;
324                }
325#endif
326            } else {
327                while (z[i] && isalpha((unsigned char)z[i])) {
328                   i++;
329                }
330                if (!z[i] || (z[i]!=';')) {
331                    return 0;
332                }
333                c = z[i];
334                z[i] = 0;
335                h = ErHash(&z[from+1]);
336                p = apErHash[h];
337                while (p && strcmp(p->zName,&z[from+1])!=0 ) {
338                    p = p->pNext;
339                }
340                z[i] = c;
341                if (p) {
342                    zVal = p->zValue;
343                    while (*zVal) {
344                        z[to++] = *(zVal++);
345                    }
346                    from = i;
347                    if (c==';') from++;
348                } else {
349                    z[to++] = z[from++];
350                }
351            }
352        } else {
353            z[to++] = z[from++];
354        }
355    }
356    z[to] = 0;
357    *newLen = to;
358    return 1;
359}
360/*----------------------------------------------------------------------------
361|   End Of Character Entity Translator
362\---------------------------------------------------------------------------*/
363
364
365/*---------------------------------------------------------------------------
366|   domIsNamespaceInScope
367|
368\--------------------------------------------------------------------------*/
369static int
370domIsNamespaceInScope (
371    domActiveNS *NSstack,
372    int          NSstackPos,
373    const char  *prefix,
374    const char  *namespaceURI
375)
376{
377    int    i;
378
379    for (i = NSstackPos; i >= 0; i--) {
380        if (NSstack[i].namespace->prefix[0] &&
381            (strcmp(NSstack[i].namespace->prefix, prefix)==0)) {
382            if (strcmp(NSstack[i].namespace->uri, namespaceURI)==0) {
383                /* OK, exactly the same namespace declaration is in scope */
384                return 1;
385            } else {
386                /* This prefix is currently assigned to another uri,
387                   we need a new NS declaration, to override this one */
388                return 0;
389            }
390        }
391    }
392    return 0;
393}
394
395/*----------------------------------------------------------------------------
396|   XML_SimpleParse (non recursive)
397|
398|       Parses the XML string starting at 'pos' and continuing to the
399|       first encountered error.
400|
401\---------------------------------------------------------------------------*/
402static int
403XML_SimpleParse (
404    char        *xml,   /* XML string  */
405    int         *pos,   /* Index of next unparsed character in xml */
406    domDocument *doc,
407    domNode     *parent_nodeOld,
408    int          ignoreWhiteSpaces,
409    char       **errStr
410) {
411    register int   c;          /* Next character of the input file */
412    register char *pn;
413    register char *x, *start, *piSep;
414    int            saved;
415    int            hasContent;
416    domNode       *node;
417    domNode       *parent_node = NULL;
418    domTextNode   *tnode;
419    domAttrNode   *attrnode, *lastAttr, *attrList;
420    int            ampersandSeen = 0;
421    int            only_whites   = 0;
422    domProcessingInstructionNode *pinode;
423    int            hnew;
424    Tcl_HashEntry *h;
425
426#ifdef TDOM_NS
427    int            nspos, newNS;
428    int            depth = 0;
429    int            activeNSpos  = -1;
430    int            activeNSsize = 8;
431    domActiveNS   *activeNS     = (domActiveNS*) MALLOC (sizeof(domActiveNS) * activeNSsize);
432    const char    *xmlns, *localname;
433    domNS         *ns;
434    char           tagPrefix[MAX_PREFIX_LEN];
435    char           prefix[MAX_PREFIX_LEN];
436    domAttrNode   *lastNSAttr, *NSattrList;
437#endif
438
439    x = &(xml[*pos]);
440
441    while ( (c=*x)!=0 ) {
442
443        start = x;
444
445        if (c!='<') {
446            /*----------------------------------------------------------------
447            |   read text between tags
448            |
449            \---------------------------------------------------------------*/
450            ampersandSeen = 0;
451            only_whites = 1;
452            while ( (c=*x)!=0 && c!='<' ) {
453                if (c=='&') ampersandSeen = 1;
454                if ( (c != ' ')  &&
455                     (c != '\t') &&
456                     (c != '\n') &&
457                     (c != '\r') ) {
458                    only_whites = 0;
459                }
460                x++;
461            }
462            if (!(only_whites && ignoreWhiteSpaces) && parent_node) {
463                /*--------------------------------------------------------
464                |   allocate new TEXT node
465                 \-------------------------------------------------------*/
466                tnode = (domTextNode*) domAlloc(sizeof(domTextNode));
467                memset(tnode, 0, sizeof(domTextNode));
468                tnode->nodeType    = TEXT_NODE;
469                tnode->nodeFlags   = 0;
470                tnode->namespace   = 0;
471                tnode->ownerDocument = doc;
472                tnode->nodeNumber  = NODE_NO(doc);
473                tnode->valueLength = (x - start);
474                tnode->nodeValue   = (char*)MALLOC((x - start)+1);
475                memmove(tnode->nodeValue, start, (x - start));
476                *(tnode->nodeValue + (x - start)) = 0;
477                if (ampersandSeen) {
478                    if (!TranslateEntityRefs(tnode->nodeValue,
479                                             &(tnode->valueLength) )) {
480                        RetError("Entity parsing error", (x - xml));
481                    }
482                }
483                tnode->parentNode = parent_node;
484                if (parent_node->firstChild)  {
485                    parent_node->lastChild->nextSibling = (domNode*)tnode;
486                    tnode->previousSibling = parent_node->lastChild;
487                    parent_node->lastChild = (domNode*)tnode;
488                } else {
489                    parent_node->firstChild = parent_node->lastChild =
490                        (domNode*)tnode;
491                }
492            }
493
494        } else if (x[1]=='/') {
495            /*------------------------------------------------------------
496            |   read and check closing tag
497            \-----------------------------------------------------------*/
498            node = parent_node;
499            if (!parent_node) {
500                RetError("Syntax error",(x - xml));
501            }
502            parent_node = node->parentNode;
503            pn = (char*)node->nodeName;
504
505            x += 2;
506            while (*x == *pn) { x++; pn++; }
507            if ( *pn || (*x!='>' && !SPACE(*x) ) ) {
508                RetError("Unterminated element",(x - xml));
509            }
510            while (SPACE(*x)) {
511                x++;
512            }
513            if (*x=='>') {
514                x++;
515            } else {
516                RetError("Missing \">\"",(x - xml)-1);
517            }
518#ifdef TDOM_NS
519            depth--;
520            /* pop active namespaces */
521            while ( (activeNSpos >= 0) &&
522                    (activeNS[activeNSpos].depth == depth) )
523            {
524                activeNSpos--;
525            }
526#endif
527            if (parent_node == NULL) {
528                /* we return to main node and so finished parsing */
529#ifdef TDOM_NS
530                FREE ((char *) activeNS);
531#endif
532                return TCL_OK;
533            }
534            continue;
535
536        } else {
537
538            x++;
539            if (*x=='!') {
540                if (x[1]=='-' && x[2]=='-') {
541                    /*--------------------------------------------------------
542                    |   read over a comment
543                    \-------------------------------------------------------*/
544                    x += 3;
545                    while ( (c=*x)!=0 &&
546                            (c!='-' || x[1]!='-' || x[2]!='>')) {
547                        x++;
548                    }
549                    if (*x) {
550                        /*----------------------------------------------------
551                        |   allocate new COMMENT node for comments
552                        \---------------------------------------------------*/
553                        tnode = (domTextNode*) domAlloc(sizeof(domTextNode));
554                        memset(tnode, 0, sizeof(domTextNode));
555                        tnode->nodeType      = COMMENT_NODE;
556                        tnode->nodeFlags     = 0;
557                        tnode->namespace     = 0;
558                        tnode->ownerDocument = doc;
559                        tnode->nodeNumber    = NODE_NO(doc);
560                        tnode->parentNode    = parent_node;
561                        tnode->valueLength   = x - start - 4;
562                        tnode->nodeValue     = (char*)MALLOC(tnode->valueLength+1);
563                        memmove(tnode->nodeValue, start+4, tnode->valueLength);
564                        *(tnode->nodeValue + tnode->valueLength) = 0;
565                        if (parent_node == NULL) {
566                            if (doc->rootNode->lastChild) {
567                                tnode->previousSibling =
568                                    doc->rootNode->lastChild;
569                                doc->rootNode->lastChild->nextSibling
570                                    = (domNode*)tnode;
571                            } else {
572                                doc->rootNode->firstChild = (domNode*) tnode;
573                            }
574                            doc->rootNode->lastChild = (domNode*) tnode;
575                        } else {
576                            if (parent_node->firstChild)  {
577                                parent_node->lastChild->nextSibling = (domNode*)tnode;
578                                tnode->previousSibling = parent_node->lastChild;
579                                parent_node->lastChild = (domNode*)tnode;
580                            } else {
581                                parent_node->firstChild = parent_node->lastChild = (domNode*)tnode;
582                            }
583                        }
584                        x += 3;
585                    } else {
586                        RetError("Unterminated comment",(start-xml));
587                    }
588                    continue;
589
590                } else if (x[1]=='D' && x[2]=='O' &&
591                           x[3]=='C' && x[4]=='T' &&
592                           x[5]=='Y' && x[6]=='P' && x[7]=='E' ) {
593                    /*--------------------------------------------------------
594                    |   read over a DOCTYPE definition
595                    \-------------------------------------------------------*/
596                    x += 8;
597                    start = x;
598                    while (*x!=0) {
599                        if (*x=='[') {
600                            x++;
601                            while ((*x!=0) && (*x!=']')) x++;
602                        } else
603                        if (*x=='>') {
604                            break;
605                        } else {
606                            x++;
607                        }
608                    }
609                    if (*x) {
610                        x++;
611                    } else {
612                        RetError("Unterminated DOCTYPE definition",(start-xml));
613                    }
614                    continue;
615
616                } else if (x[1]=='[' && x[2]=='C' &&
617                           x[3]=='D' && x[4]=='A' &&
618                           x[5]=='T' && x[6]=='A' && x[7]=='[' ) {
619                    /*--------------------------------------------------------
620                    |   read over a <![CDATA[ section
621                    \-------------------------------------------------------*/
622                    x += 8;
623                    start = x;
624                    while ( (*x!=0) &&
625                            ((*x!=']') || (x[1]!=']') || (x[2]!='>'))) {
626                        x++;
627                    }
628                    if (*x) {
629                        if (parent_node && (x - start)) {
630                            /*----------------------------------------------------
631                            |   allocate new TEXT node for CDATA section data
632                            \---------------------------------------------------*/
633                            tnode = (domTextNode*) domAlloc(sizeof(domTextNode));
634                            memset(tnode, 0, sizeof(domTextNode));
635                            tnode->nodeType      = TEXT_NODE;
636                            tnode->nodeFlags     = 0;
637                            tnode->namespace     = 0;
638                            tnode->ownerDocument = doc;
639                            tnode->nodeNumber    = NODE_NO(doc);
640                            tnode->parentNode    = parent_node;
641                            tnode->valueLength   = (x - start);
642                            tnode->nodeValue     = (char*)MALLOC((x - start)+1);
643                            memmove(tnode->nodeValue, start, (x - start));
644                            *(tnode->nodeValue + (x - start)) = 0;
645                            if (parent_node->firstChild)  {
646                                parent_node->lastChild->nextSibling = (domNode*)tnode;
647                                tnode->previousSibling = parent_node->lastChild;
648                                parent_node->lastChild = (domNode*)tnode;
649                            } else {
650                                parent_node->firstChild = parent_node->lastChild = (domNode*)tnode;
651                            }
652                        }
653                        x += 3;
654                    } else {
655                        RetError("Unterminated CDATA definition",(start-xml) );
656                    }
657                    continue;
658                 } else {
659                        RetError("Incorrect <!... tag",(start-xml) );
660                 }
661
662            } else if (*x=='?') {
663                /*--------------------------------------------------------
664                |   read over a processing instructions(PI) / XMLDecl
665                \-------------------------------------------------------*/
666                x++;
667                start = x;
668                while ( (c=*x)!=0 &&
669                        (c!='?' || x[1]!='>')) {
670                    x++;
671                }
672                if (*x) {
673                    /*------------------------------------------------------------
674                    |   allocate new PI node for processing instruction section
675                    \-----------------------------------------------------------*/
676                    pinode = (domProcessingInstructionNode*)
677                            domAlloc(sizeof(domProcessingInstructionNode));
678                    memset(pinode, 0, sizeof(domProcessingInstructionNode));
679                    pinode->nodeType      = PROCESSING_INSTRUCTION_NODE;
680                    pinode->nodeFlags     = 0;
681                    pinode->namespace     = 0;
682                    pinode->ownerDocument = doc;
683                    pinode->nodeNumber    = NODE_NO(doc);
684                    pinode->parentNode    = parent_node;
685
686                    /*-------------------------------------------------
687                    |   extract PI target
688                    \------------------------------------------------*/
689                    piSep = start;
690                    while ( (c=*piSep)!=0 && !SPACE(c) &&
691                            (c!='?' || piSep[1]!='>')) {
692                         piSep++;
693                    }
694                    *piSep = '\0'; /* temporarily terminate the string */
695
696                    pinode->targetLength = strlen(start);
697                    pinode->targetValue  = (char*)MALLOC(pinode->targetLength);
698                    memmove(pinode->targetValue, start, pinode->targetLength);
699
700                    *piSep = c;  /* remove temporarily termination */
701
702                    /*-------------------------------------------------
703                    |   extract PI data
704                    \------------------------------------------------*/
705                    while (SPACE(*piSep)) {
706                        piSep++;
707                    }
708                    pinode->dataLength = x - piSep;
709                    pinode->dataValue  = (char*)MALLOC(pinode->dataLength);
710                    memmove(pinode->dataValue, piSep, pinode->dataLength);
711
712                    if (parent_node == NULL) {
713                        if (doc->rootNode->lastChild) {
714                            pinode->previousSibling = doc->rootNode->lastChild;
715                            doc->rootNode->lastChild->nextSibling
716                                = (domNode*) pinode;
717                        } else {
718                            doc->rootNode->firstChild = (domNode*) pinode;
719                        }
720                        doc->rootNode->lastChild = (domNode*) pinode;
721                    } else {
722                        if (parent_node->firstChild)  {
723                            parent_node->lastChild->nextSibling = (domNode*)pinode;
724                            pinode->previousSibling = parent_node->lastChild;
725                            parent_node->lastChild = (domNode*)pinode;
726                        } else {
727                            parent_node->firstChild = parent_node->lastChild = (domNode*)pinode;
728                        }
729                    }
730                    x += 2;
731                } else {
732                    RetError("Unterminated processing instruction(PI)",(start-xml) );
733                }
734                continue;
735            }
736
737            /*----------------------------------------------------------------
738            |   new tag/element
739            |
740            \---------------------------------------------------------------*/
741            hasContent = 1;
742            while ((c=*x)!=0 && c!='/' && c!='>' && !SPACE(c) ) {
743                x++;
744            }
745            if (c==0) {
746                RetError("Missing \">\"",(start-xml) );
747            }
748            if ( (x-start)==1) {
749                RetError("Null markup name",(start-xml) );
750            }
751            *x = '\0'; /* temporarily terminate the string */
752
753            /*------------------------------------------------------
754            |   create new DOM element node
755            \-----------------------------------------------------*/
756            h = Tcl_CreateHashEntry(&HASHTAB(doc,tdom_tagNames), start+1,
757                                    &hnew);
758            node = (domNode*) domAlloc(sizeof(domNode));
759            memset(node, 0, sizeof(domNode));
760            node->nodeType      = ELEMENT_NODE;
761            node->nodeFlags     = 0;
762            node->namespace     = 0;
763            node->nodeName      = (char *)&(h->key);
764            node->ownerDocument = doc;
765            node->nodeNumber    = NODE_NO(doc);
766            node->ownerDocument = doc;
767
768            if (parent_node == NULL) {
769                if (doc->rootNode->lastChild) {
770                    node->previousSibling = doc->rootNode->lastChild;
771                    doc->rootNode->lastChild->nextSibling = node;
772                } else {
773                    doc->rootNode->firstChild = node;
774                }
775                doc->rootNode->lastChild = node;
776            } else {
777                node->parentNode = parent_node;
778                if (parent_node->firstChild)  {
779                    parent_node->lastChild->nextSibling = node;
780                    node->previousSibling = parent_node->lastChild;
781                    parent_node->lastChild = node;
782                } else {
783                    parent_node->firstChild = parent_node->lastChild = node;
784                }
785            }
786
787            *x = c;  /* remove temporarily termination */
788
789            while (SPACE(*x) ) {
790                x++;
791            }
792            /*-----------------------------------------------------------
793            |   read attribute name-value pairs
794            \----------------------------------------------------------*/
795            lastAttr = NULL;
796            attrList = NULL;
797#ifdef TDOM_NS
798            lastNSAttr = NULL;
799            NSattrList = NULL;
800#endif
801            while ( (c=*x) && (c!='/') && (c!='>') ) {
802                char *ArgName = x;
803                int nArgName;
804                char *ArgVal = NULL;
805                int nArgVal = 0;
806
807                while ((c=*x)!=0 && c!='=' && c!='>' && !SPACE(c) ) {
808                    x++;
809                }
810                nArgName = x - ArgName;
811                while (SPACE(*x)) {
812                    x++;
813                }
814                if (*x=='=') {
815                    x++;
816                }
817                saved = *(ArgName + nArgName);
818                *(ArgName + nArgName) = '\0'; /* terminate arg name */
819
820                while (SPACE(*x)) {
821                    x++;
822                }
823                if (*x=='>' || *x==0) {
824                    ArgVal = ArgName;
825                    nArgVal = nArgName;
826                } else if ((c=*x)=='\"' || c=='\'') {
827                    register int cDelim = c;
828                    x++;
829                    ArgVal = x;
830                    ampersandSeen = 0;
831                    while ((c=*x)!=0 && c!=cDelim) {
832                        if (c=='&') {
833                            ampersandSeen = 1;
834                        }
835                        x++;
836                    }
837                    nArgVal = x - ArgVal;
838                    if (c==0) {
839                        RetError("Unterminated string",(ArgVal - xml - 1) );
840                    } else {
841                        x++;
842                    }
843                } else if (c!=0 && c!='>') {
844                    ArgVal = x;
845                    while ((c=*x)!=0 && c!='>' && !SPACE(c)) {
846                        if (c=='&') {
847                            ampersandSeen = 1;
848                        }
849                        x++;
850                    }
851                    if (c==0) {
852                        RetError("Missing \">\"",(start-xml));
853                    }
854                    nArgVal = x - ArgVal;
855                }
856
857
858#ifdef TDOM_NS
859                /*------------------------------------------------------------
860                |   handle namespace attributes or normal ones
861                \------------------------------------------------------------*/
862                if (strncmp((char *)ArgName, "xmlns", 5) == 0) {
863                    xmlns = ArgName;
864                    newNS = 1;
865
866                    h = Tcl_CreateHashEntry(&HASHTAB(doc, tdom_attrNames),
867                                            ArgName, &hnew);
868                    attrnode = (domAttrNode*) domAlloc(sizeof(domAttrNode));
869                    memset(attrnode, 0, sizeof(domAttrNode));
870                    attrnode->parentNode  = node;
871                    attrnode->nodeName    = (char *)&(h->key);
872                    attrnode->nodeType    = ATTRIBUTE_NODE;
873                    attrnode->nodeFlags   = IS_NS_NODE;
874                    attrnode->nodeValue   = (char*)MALLOC(nArgVal+1);
875                    attrnode->valueLength = nArgVal;
876                    memmove(attrnode->nodeValue, ArgVal, nArgVal);
877                    *(attrnode->nodeValue + nArgVal) = 0;
878                    if (ampersandSeen) {
879                        if (!TranslateEntityRefs(attrnode->nodeValue,
880                                                 &(attrnode->valueLength) )) {
881                            RetError("Entity parsing error",(start-xml));
882                        }
883                    }
884
885                    if (xmlns[5] == ':') {
886                        if (domIsNamespaceInScope (activeNS, activeNSpos,
887                                                   &(xmlns[6]),
888                                                   (char*)attrnode->nodeValue))
889                        {
890                            ns = domLookupPrefix (node, &(xmlns[6]));
891                            newNS = 0;
892                        } else {
893                            ns = domNewNamespace(doc, &(xmlns[6]),
894                                                 (char*)attrnode->nodeValue);
895                        }
896                    } else {
897                        ns = domNewNamespace(doc, "",
898                                             (char*)attrnode->nodeValue);
899                    }
900                    attrnode->namespace   = ns->index;
901                    if (newNS) {
902                        /* push active namespace */
903                        activeNSpos++;
904                        if (activeNSpos >= activeNSsize) {
905                            activeNS = (domActiveNS*) REALLOC(
906                                           (char*)activeNS,
907                                           sizeof(domActiveNS) * 2 * activeNSsize);
908                            activeNSsize = 2 * activeNSsize;
909                        }
910                        activeNS[activeNSpos].depth     = depth;
911                        activeNS[activeNSpos].namespace = ns;
912                    }
913
914                    if (NSattrList) {
915                        lastNSAttr->nextSibling = attrnode;
916                    } else {
917                        NSattrList = attrnode;
918                    }
919                    lastNSAttr = attrnode;
920
921
922                } else {
923#endif
924
925                    /*------------------------------------------------------------
926                    |   allocate new attribute node
927                    \------------------------------------------------------------*/
928                    h = Tcl_CreateHashEntry(&HASHTAB(doc,tdom_attrNames),
929                                            ArgName, &hnew);
930                    attrnode = (domAttrNode*) domAlloc(sizeof(domAttrNode));
931                    memset(attrnode, 0, sizeof(domAttrNode));
932                    attrnode->parentNode  = node;
933                    attrnode->nodeName    = (char *)&(h->key);
934                    attrnode->nodeType    = ATTRIBUTE_NODE;
935                    attrnode->nodeFlags   = 0;
936                    attrnode->nodeValue   = (char*)MALLOC(nArgVal+1);
937                    attrnode->valueLength = nArgVal;
938                    memmove(attrnode->nodeValue, ArgVal, nArgVal);
939                    *(attrnode->nodeValue + nArgVal) = 0;
940                    if (ampersandSeen) {
941                        if (!TranslateEntityRefs(attrnode->nodeValue,
942                                                 &(attrnode->valueLength) )) {
943                            RetError("Entity parsing error", (start - xml));
944                        }
945                    }
946                    if (attrList) {
947                        lastAttr->nextSibling = attrnode;
948                    } else {
949                        attrList = attrnode;
950                    }
951                    lastAttr = attrnode;
952#ifdef TDOM_NS
953                }
954#endif
955                *(ArgName + nArgName) = saved;
956                while (SPACE(*x)) {
957                    x++;
958                }
959            }
960
961#ifdef TDOM_NS
962            /*----------------------------------------------------------
963            |   look for namespace of element
964            \---------------------------------------------------------*/
965            domSplitQName (node->nodeName, tagPrefix,
966                           &localname);
967            for (nspos = activeNSpos; nspos >= 0; nspos--) {
968                if (  ((tagPrefix[0] == '\0') && (activeNS[nspos].namespace->prefix[0] == '\0'))
969                      || ((tagPrefix[0] != '\0') && (activeNS[nspos].namespace->prefix[0] != '\0')
970                          && (strcmp(tagPrefix, activeNS[nspos].namespace->prefix) == 0))
971                    ) {
972                    if (activeNS[nspos].namespace->prefix[0] == '\0'
973                        && activeNS[nspos].namespace->uri[0] == '\0'
974                        && tagPrefix[0] == '\0')
975                    {
976                        /* xml-names rec. 5.2: "The default namespace can be
977                           set to the empty string. This has the same effect,
978                           within the scope of the declaration, of there being
979                           no default namespace." */
980                        break;
981                    }
982                    node->namespace = activeNS[nspos].namespace->index;
983                    DBG(fprintf(stderr, "tag='%s' uri='%s' \n",node->nodeName,
984                                activeNS[nspos].namespace->uri);
985                               )
986                    break;
987                }
988            }
989
990            /*----------------------------------------------------------
991            |   look for attribute namespace
992            \---------------------------------------------------------*/
993            attrnode = attrList;
994            while (attrnode) {
995                domSplitQName ((char*)attrnode->nodeName, prefix, &localname);
996                if (prefix[0] != '\0') {
997                    for (nspos = activeNSpos; nspos >= 0; nspos--) {
998                        if (  ((prefix[0] == '\0') && (activeNS[nspos].namespace->prefix[0] == '\0'))
999                              || ((prefix[0] != '\0') && (activeNS[nspos].namespace->prefix[0] != '\0')
1000                                  && (strcmp(prefix, activeNS[nspos].namespace->prefix) == 0))
1001                            ) {
1002                            attrnode->namespace = activeNS[nspos].namespace->index;
1003                            DBG(fprintf(stderr, "attr='%s' uri='%s' \n",
1004                                        attrnode->nodeName,
1005                                        activeNS[nspos].namespace->uri);
1006                                )
1007                            break;
1008                        }
1009                    }
1010                }
1011                attrnode = attrnode->nextSibling;
1012            }
1013            if (lastNSAttr) {
1014                node->firstAttr = NSattrList;
1015                lastNSAttr->nextSibling = attrList;
1016            } else {
1017                node->firstAttr = attrList;
1018            }
1019#else
1020            node->firstAttr = attrList;
1021
1022#endif
1023            if (*x=='/') {
1024                hasContent = 0;
1025                x++;
1026                if (*x!='>') {
1027                    RetError("Syntax Error",(x - xml - 1) );
1028                }
1029            }
1030            if (x[1] == 0) {
1031#ifdef TDOM_NS
1032                FREE ((char *) activeNS);
1033#endif
1034                return TCL_OK;
1035            }
1036            if (*x=='>') {
1037                x++;
1038            }
1039            if (hasContent) {
1040#ifdef TDOM_NS
1041                depth++;
1042#endif
1043                /*------------------------------------------------------------
1044                |   recurs to read child tags/texts
1045                \-----------------------------------------------------------*/
1046                parent_node = node;
1047            }
1048        }
1049    }
1050    RetError("Unexpected end",(x - xml) );
1051
1052} /* XML_SimpleParse */
1053
1054
1055
1056/*----------------------------------------------------------------------------
1057|   XML_SimpleParseDocument
1058|
1059|       Create a document, parses the XML string starting at 'pos' and
1060|       continuing to the first encountered error.
1061|
1062\---------------------------------------------------------------------------*/
1063domDocument *
1064XML_SimpleParseDocument (
1065    char    *xml,              /* Complete text of the file being parsed  */
1066    int      ignoreWhiteSpaces,
1067    char    *baseURI,
1068    char    *extResolver,
1069    int     *pos,
1070    char   **errStr
1071) {
1072    domDocument   *doc = domCreateDoc(baseURI, 0);
1073
1074    if (extResolver) {
1075        doc->extResolver = extResolver;
1076    }
1077
1078    *pos = 0;
1079    XML_SimpleParse (xml, pos, doc, NULL, ignoreWhiteSpaces, errStr);
1080    domSetDocumentElement (doc);
1081
1082    return doc;
1083
1084} /* XML_SimpleParseDocument */
1085
1086