1/* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements.  See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License.  You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "apr.h"
18#include "apr_strings.h"
19
20#define APR_WANT_STDIO          /* for sprintf() */
21#define APR_WANT_STRFUNC
22#include "apr_want.h"
23
24#include "apr_xml.h"
25
26#include "apu_config.h"
27
28#if defined(HAVE_XMLPARSE_XMLPARSE_H)
29#include <xmlparse/xmlparse.h>
30#elif defined(HAVE_XMLTOK_XMLPARSE_H)
31#include <xmltok/xmlparse.h>
32#elif defined(HAVE_XML_XMLPARSE_H)
33#include <xml/xmlparse.h>
34#else
35#include <expat.h>
36#endif
37
38#define DEBUG_CR "\r\n"
39
40static const char APR_KW_xmlns[] = { 0x78, 0x6D, 0x6C, 0x6E, 0x73, '\0' };
41static const char APR_KW_xmlns_lang[] = { 0x78, 0x6D, 0x6C, 0x3A, 0x6C, 0x61, 0x6E, 0x67, '\0' };
42static const char APR_KW_DAV[] = { 0x44, 0x41, 0x56, 0x3A, '\0' };
43
44/* errors related to namespace processing */
45#define APR_XML_NS_ERROR_UNKNOWN_PREFIX (-1000)
46#define APR_XML_NS_ERROR_INVALID_DECL (-1001)
47
48/* test for a namespace prefix that begins with [Xx][Mm][Ll] */
49#define APR_XML_NS_IS_RESERVED(name) \
50	( (name[0] == 0x58 || name[0] == 0x78) && \
51	  (name[1] == 0x4D || name[1] == 0x6D) && \
52	  (name[2] == 0x4C || name[2] == 0x6C) )
53
54
55/* the real (internal) definition of the parser context */
56struct apr_xml_parser {
57    apr_xml_doc *doc;		/* the doc we're parsing */
58    apr_pool_t *p;		/* the pool we allocate from */
59    apr_xml_elem *cur_elem;	/* current element */
60
61    int error;			/* an error has occurred */
62#define APR_XML_ERROR_EXPAT             1
63#define APR_XML_ERROR_PARSE_DONE        2
64/* also: public APR_XML_NS_ERROR_* values (if any) */
65
66    XML_Parser xp;              /* the actual (Expat) XML parser */
67    enum XML_Error xp_err;      /* stored Expat error code */
68};
69
70/* struct for scoping namespace declarations */
71typedef struct apr_xml_ns_scope {
72    const char *prefix;		/* prefix used for this ns */
73    int ns;			/* index into namespace table */
74    int emptyURI;		/* the namespace URI is the empty string */
75    struct apr_xml_ns_scope *next;	/* next scoped namespace */
76} apr_xml_ns_scope;
77
78
79/* return namespace table index for a given prefix */
80static int find_prefix(apr_xml_parser *parser, const char *prefix)
81{
82    apr_xml_elem *elem = parser->cur_elem;
83
84    /*
85    ** Walk up the tree, looking for a namespace scope that defines this
86    ** prefix.
87    */
88    for (; elem; elem = elem->parent) {
89	apr_xml_ns_scope *ns_scope;
90
91	for (ns_scope = elem->ns_scope; ns_scope; ns_scope = ns_scope->next) {
92	    if (strcmp(prefix, ns_scope->prefix) == 0) {
93		if (ns_scope->emptyURI) {
94		    /*
95		    ** It is possible to set the default namespace to an
96		    ** empty URI string; this resets the default namespace
97		    ** to mean "no namespace." We just found the prefix
98		    ** refers to an empty URI, so return "no namespace."
99		    */
100		    return APR_XML_NS_NONE;
101		}
102
103		return ns_scope->ns;
104	    }
105	}
106    }
107
108    /*
109     * If the prefix is empty (""), this means that a prefix was not
110     * specified in the element/attribute. The search that was performed
111     * just above did not locate a default namespace URI (which is stored
112     * into ns_scope with an empty prefix). This means the element/attribute
113     * has "no namespace". We have a reserved value for this.
114     */
115    if (*prefix == '\0') {
116	return APR_XML_NS_NONE;
117    }
118
119    /* not found */
120    return APR_XML_NS_ERROR_UNKNOWN_PREFIX;
121}
122
123/* return original prefix given ns index */
124static const char * find_prefix_name(const apr_xml_elem *elem, int ns, int parent)
125{
126    /*
127    ** Walk up the tree, looking for a namespace scope that defines this
128    ** prefix.
129    */
130    for (; elem; elem = parent ? elem->parent : NULL) {
131	apr_xml_ns_scope *ns_scope = elem->ns_scope;
132
133	for (; ns_scope; ns_scope = ns_scope->next) {
134	    if (ns_scope->ns == ns)
135		return ns_scope->prefix;
136	}
137    }
138    /* not found */
139    return "";
140}
141
142
143static void start_handler(void *userdata, const char *name, const char **attrs)
144{
145    apr_xml_parser *parser = userdata;
146    apr_xml_elem *elem;
147    apr_xml_attr *attr;
148    apr_xml_attr *prev;
149    char *colon;
150    const char *quoted;
151    char *elem_name;
152
153    /* punt once we find an error */
154    if (parser->error)
155	return;
156
157    elem = apr_pcalloc(parser->p, sizeof(*elem));
158
159    /* prep the element */
160    elem->name = elem_name = apr_pstrdup(parser->p, name);
161
162    /* fill in the attributes (note: ends up in reverse order) */
163    while (*attrs) {
164	attr = apr_palloc(parser->p, sizeof(*attr));
165	attr->name = apr_pstrdup(parser->p, *attrs++);
166	attr->value = apr_pstrdup(parser->p, *attrs++);
167	attr->next = elem->attr;
168	elem->attr = attr;
169    }
170
171    /* hook the element into the tree */
172    if (parser->cur_elem == NULL) {
173	/* no current element; this also becomes the root */
174	parser->cur_elem = parser->doc->root = elem;
175    }
176    else {
177	/* this element appeared within the current elem */
178	elem->parent = parser->cur_elem;
179
180	/* set up the child/sibling links */
181	if (elem->parent->last_child == NULL) {
182	    /* no first child either */
183	    elem->parent->first_child = elem->parent->last_child = elem;
184	}
185	else {
186	    /* hook onto the end of the parent's children */
187	    elem->parent->last_child->next = elem;
188	    elem->parent->last_child = elem;
189	}
190
191	/* this element is now the current element */
192	parser->cur_elem = elem;
193    }
194
195    /* scan the attributes for namespace declarations */
196    for (prev = NULL, attr = elem->attr;
197	 attr;
198	 attr = attr->next) {
199	if (strncmp(attr->name, APR_KW_xmlns, 5) == 0) {
200	    const char *prefix = &attr->name[5];
201	    apr_xml_ns_scope *ns_scope;
202
203	    /* test for xmlns:foo= form and xmlns= form */
204	    if (*prefix == 0x3A) {
205                /* a namespace prefix declaration must have a
206                   non-empty value. */
207                if (attr->value[0] == '\0') {
208                    parser->error = APR_XML_NS_ERROR_INVALID_DECL;
209                    return;
210                }
211		++prefix;
212            }
213	    else if (*prefix != '\0') {
214		/* advance "prev" since "attr" is still present */
215		prev = attr;
216		continue;
217	    }
218
219	    /* quote the URI before we ever start working with it */
220	    quoted = apr_xml_quote_string(parser->p, attr->value, 1);
221
222	    /* build and insert the new scope */
223	    ns_scope = apr_pcalloc(parser->p, sizeof(*ns_scope));
224	    ns_scope->prefix = prefix;
225	    ns_scope->ns = apr_xml_insert_uri(parser->doc->namespaces, quoted);
226	    ns_scope->emptyURI = *quoted == '\0';
227	    ns_scope->next = elem->ns_scope;
228	    elem->ns_scope = ns_scope;
229
230	    /* remove this attribute from the element */
231	    if (prev == NULL)
232		elem->attr = attr->next;
233	    else
234		prev->next = attr->next;
235
236	    /* Note: prev will not be advanced since we just removed "attr" */
237	}
238	else if (strcmp(attr->name, APR_KW_xmlns_lang) == 0) {
239	    /* save away the language (in quoted form) */
240	    elem->lang = apr_xml_quote_string(parser->p, attr->value, 1);
241
242	    /* remove this attribute from the element */
243	    if (prev == NULL)
244		elem->attr = attr->next;
245	    else
246		prev->next = attr->next;
247
248	    /* Note: prev will not be advanced since we just removed "attr" */
249	}
250	else {
251	    /* advance "prev" since "attr" is still present */
252	    prev = attr;
253	}
254    }
255
256    /*
257    ** If an xml:lang attribute didn't exist (lang==NULL), then copy the
258    ** language from the parent element (if present).
259    **
260    ** NOTE: elem_size() *depends* upon this pointer equality.
261    */
262    if (elem->lang == NULL && elem->parent != NULL)
263	elem->lang = elem->parent->lang;
264
265    /* adjust the element's namespace */
266    colon = strchr(elem_name, 0x3A);
267    if (colon == NULL) {
268	/*
269	 * The element is using the default namespace, which will always
270	 * be found. Either it will be "no namespace", or a default
271	 * namespace URI has been specified at some point.
272	 */
273	elem->ns = find_prefix(parser, "");
274    }
275    else if (APR_XML_NS_IS_RESERVED(elem->name)) {
276	elem->ns = APR_XML_NS_NONE;
277    }
278    else {
279	*colon = '\0';
280	elem->ns = find_prefix(parser, elem->name);
281	elem->name = colon + 1;
282
283	if (APR_XML_NS_IS_ERROR(elem->ns)) {
284	    parser->error = elem->ns;
285	    return;
286	}
287    }
288
289    /* adjust all remaining attributes' namespaces */
290    for (attr = elem->attr; attr; attr = attr->next) {
291        /*
292         * apr_xml_attr defines this as "const" but we dup'd it, so we
293         * know that we can change it. a bit hacky, but the existing
294         * structure def is best.
295         */
296        char *attr_name = (char *)attr->name;
297
298	colon = strchr(attr_name, 0x3A);
299	if (colon == NULL) {
300	    /*
301	     * Attributes do NOT use the default namespace. Therefore,
302	     * we place them into the "no namespace" category.
303	     */
304	    attr->ns = APR_XML_NS_NONE;
305	}
306	else if (APR_XML_NS_IS_RESERVED(attr->name)) {
307	    attr->ns = APR_XML_NS_NONE;
308	}
309	else {
310	    *colon = '\0';
311	    attr->ns = find_prefix(parser, attr->name);
312	    attr->name = colon + 1;
313
314	    if (APR_XML_NS_IS_ERROR(attr->ns)) {
315		parser->error = attr->ns;
316		return;
317	    }
318	}
319    }
320}
321
322static void end_handler(void *userdata, const char *name)
323{
324    apr_xml_parser *parser = userdata;
325
326    /* punt once we find an error */
327    if (parser->error)
328	return;
329
330    /* pop up one level */
331    parser->cur_elem = parser->cur_elem->parent;
332}
333
334static void cdata_handler(void *userdata, const char *data, int len)
335{
336    apr_xml_parser *parser = userdata;
337    apr_xml_elem *elem;
338    apr_text_header *hdr;
339    const char *s;
340
341    /* punt once we find an error */
342    if (parser->error)
343	return;
344
345    elem = parser->cur_elem;
346    s = apr_pstrndup(parser->p, data, len);
347
348    if (elem->last_child == NULL) {
349	/* no children yet. this cdata follows the start tag */
350	hdr = &elem->first_cdata;
351    }
352    else {
353	/* child elements exist. this cdata follows the last child. */
354	hdr = &elem->last_child->following_cdata;
355    }
356
357    apr_text_append(parser->p, hdr, s);
358}
359
360static apr_status_t cleanup_parser(void *ctx)
361{
362    apr_xml_parser *parser = ctx;
363
364    XML_ParserFree(parser->xp);
365    parser->xp = NULL;
366
367    return APR_SUCCESS;
368}
369
370#if XML_MAJOR_VERSION > 1
371/* Stop the parser if an entity declaration is hit. */
372static void entity_declaration(void *userData, const XML_Char *entityName,
373                               int is_parameter_entity, const XML_Char *value,
374                               int value_length, const XML_Char *base,
375                               const XML_Char *systemId, const XML_Char *publicId,
376                               const XML_Char *notationName)
377{
378    apr_xml_parser *parser = userData;
379
380    XML_StopParser(parser->xp, XML_FALSE);
381}
382#else
383/* A noop default_handler. */
384static void default_handler(void *userData, const XML_Char *s, int len)
385{
386}
387#endif
388
389APU_DECLARE(apr_xml_parser *) apr_xml_parser_create(apr_pool_t *pool)
390{
391    apr_xml_parser *parser = apr_pcalloc(pool, sizeof(*parser));
392
393    parser->p = pool;
394    parser->doc = apr_pcalloc(pool, sizeof(*parser->doc));
395
396    parser->doc->namespaces = apr_array_make(pool, 5, sizeof(const char *));
397
398    /* ### is there a way to avoid hard-coding this? */
399    apr_xml_insert_uri(parser->doc->namespaces, APR_KW_DAV);
400
401    parser->xp = XML_ParserCreate(NULL);
402    if (parser->xp == NULL) {
403        (*apr_pool_abort_get(pool))(APR_ENOMEM);
404        return NULL;
405    }
406
407    apr_pool_cleanup_register(pool, parser, cleanup_parser,
408                              apr_pool_cleanup_null);
409
410    XML_SetUserData(parser->xp, parser);
411    XML_SetElementHandler(parser->xp, start_handler, end_handler);
412    XML_SetCharacterDataHandler(parser->xp, cdata_handler);
413
414    /* Prevent the "billion laughs" attack against expat by disabling
415     * internal entity expansion.  With 2.x, forcibly stop the parser
416     * if an entity is declared - this is safer and a more obvious
417     * failure mode.  With older versions, installing a noop
418     * DefaultHandler means that internal entities will be expanded as
419     * the empty string, which is also sufficient to prevent the
420     * attack. */
421#if XML_MAJOR_VERSION > 1
422    XML_SetEntityDeclHandler(parser->xp, entity_declaration);
423#else
424    XML_SetDefaultHandler(parser->xp, default_handler);
425#endif
426
427    return parser;
428}
429
430static apr_status_t do_parse(apr_xml_parser *parser,
431                             const char *data, apr_size_t len,
432                             int is_final)
433{
434    if (parser->xp == NULL) {
435        parser->error = APR_XML_ERROR_PARSE_DONE;
436    }
437    else {
438        int rv = XML_Parse(parser->xp, data, (int)len, is_final);
439
440        if (rv == 0) {
441            parser->error = APR_XML_ERROR_EXPAT;
442            parser->xp_err = XML_GetErrorCode(parser->xp);
443        }
444    }
445
446    /* ### better error code? */
447    return parser->error ? APR_EGENERAL : APR_SUCCESS;
448}
449
450APU_DECLARE(apr_status_t) apr_xml_parser_feed(apr_xml_parser *parser,
451                                              const char *data,
452                                              apr_size_t len)
453{
454    return do_parse(parser, data, len, 0 /* is_final */);
455}
456
457APU_DECLARE(apr_status_t) apr_xml_parser_done(apr_xml_parser *parser,
458                                              apr_xml_doc **pdoc)
459{
460    char end;
461    apr_status_t status = do_parse(parser, &end, 0, 1 /* is_final */);
462
463    /* get rid of the parser */
464    (void) apr_pool_cleanup_run(parser->p, parser, cleanup_parser);
465
466    if (status)
467        return status;
468
469    if (pdoc != NULL)
470        *pdoc = parser->doc;
471    return APR_SUCCESS;
472}
473
474APU_DECLARE(char *) apr_xml_parser_geterror(apr_xml_parser *parser,
475                                            char *errbuf,
476                                            apr_size_t errbufsize)
477{
478    int error = parser->error;
479    const char *msg;
480
481    /* clear our record of an error */
482    parser->error = 0;
483
484    switch (error) {
485    case 0:
486        msg = "No error.";
487        break;
488
489    case APR_XML_NS_ERROR_UNKNOWN_PREFIX:
490        msg = "An undefined namespace prefix was used.";
491        break;
492
493    case APR_XML_NS_ERROR_INVALID_DECL:
494        msg = "A namespace prefix was defined with an empty URI.";
495        break;
496
497    case APR_XML_ERROR_EXPAT:
498        (void) apr_snprintf(errbuf, errbufsize,
499                            "XML parser error code: %s (%d)",
500                            XML_ErrorString(parser->xp_err), parser->xp_err);
501        return errbuf;
502
503    case APR_XML_ERROR_PARSE_DONE:
504        msg = "The parser is not active.";
505        break;
506
507    default:
508        msg = "There was an unknown error within the XML body.";
509        break;
510    }
511
512    (void) apr_cpystrn(errbuf, msg, errbufsize);
513    return errbuf;
514}
515
516APU_DECLARE(apr_status_t) apr_xml_parse_file(apr_pool_t *p,
517                                             apr_xml_parser **parser,
518                                             apr_xml_doc **ppdoc,
519                                             apr_file_t *xmlfd,
520                                             apr_size_t buffer_length)
521{
522    apr_status_t rv;
523    char *buffer;
524    apr_size_t length;
525
526    *parser = apr_xml_parser_create(p);
527    if (*parser == NULL) {
528        /* FIXME: returning an error code would be nice,
529         * but we dont get one ;( */
530        return APR_EGENERAL;
531    }
532    buffer = apr_palloc(p, buffer_length);
533    length = buffer_length;
534
535    rv = apr_file_read(xmlfd, buffer, &length);
536
537    while (rv == APR_SUCCESS) {
538        rv = apr_xml_parser_feed(*parser, buffer, length);
539        if (rv != APR_SUCCESS) {
540            return rv;
541        }
542
543        length = buffer_length;
544        rv = apr_file_read(xmlfd, buffer, &length);
545    }
546    if (rv != APR_EOF) {
547        return rv;
548    }
549    rv = apr_xml_parser_done(*parser, ppdoc);
550    *parser = NULL;
551    return rv;
552}
553
554APU_DECLARE(void) apr_text_append(apr_pool_t * p, apr_text_header *hdr,
555                                  const char *text)
556{
557    apr_text *t = apr_palloc(p, sizeof(*t));
558
559    t->text = text;
560    t->next = NULL;
561
562    if (hdr->first == NULL) {
563	/* no text elements yet */
564	hdr->first = hdr->last = t;
565    }
566    else {
567	/* append to the last text element */
568	hdr->last->next = t;
569	hdr->last = t;
570    }
571}
572
573
574/* ---------------------------------------------------------------
575**
576** XML UTILITY FUNCTIONS
577*/
578
579/*
580** apr_xml_quote_string: quote an XML string
581**
582** Replace '<', '>', and '&' with '&lt;', '&gt;', and '&amp;'.
583** If quotes is true, then replace '"' with '&quot;'.
584**
585** quotes is typically set to true for XML strings that will occur within
586** double quotes -- attribute values.
587*/
588APU_DECLARE(const char *) apr_xml_quote_string(apr_pool_t *p, const char *s,
589                                               int quotes)
590{
591    const char *scan;
592    apr_size_t len = 0;
593    apr_size_t extra = 0;
594    char *qstr;
595    char *qscan;
596    char c;
597
598    for (scan = s; (c = *scan) != '\0'; ++scan, ++len) {
599	if (c == '<' || c == '>')
600	    extra += 3;		/* &lt; or &gt; */
601	else if (c == '&')
602	    extra += 4;		/* &amp; */
603	else if (quotes && c == '"')
604	    extra += 5;		/* &quot; */
605    }
606
607    /* nothing to do? */
608    if (extra == 0)
609	return s;
610
611    qstr = apr_palloc(p, len + extra + 1);
612    for (scan = s, qscan = qstr; (c = *scan) != '\0'; ++scan) {
613	if (c == '<') {
614	    *qscan++ = '&';
615	    *qscan++ = 'l';
616	    *qscan++ = 't';
617	    *qscan++ = ';';
618	}
619	else if (c == '>') {
620	    *qscan++ = '&';
621	    *qscan++ = 'g';
622	    *qscan++ = 't';
623	    *qscan++ = ';';
624	}
625	else if (c == '&') {
626	    *qscan++ = '&';
627	    *qscan++ = 'a';
628	    *qscan++ = 'm';
629	    *qscan++ = 'p';
630	    *qscan++ = ';';
631	}
632	else if (quotes && c == '"') {
633	    *qscan++ = '&';
634	    *qscan++ = 'q';
635	    *qscan++ = 'u';
636	    *qscan++ = 'o';
637	    *qscan++ = 't';
638	    *qscan++ = ';';
639	}
640	else {
641	    *qscan++ = c;
642	}
643    }
644
645    *qscan = '\0';
646    return qstr;
647}
648
649/* how many characters for the given integer? */
650#define APR_XML_NS_LEN(ns) ((ns) < 10 ? 1 : (ns) < 100 ? 2 : (ns) < 1000 ? 3 : \
651                            (ns) < 10000 ? 4 : (ns) < 100000 ? 5 : \
652                            (ns) < 1000000 ? 6 : (ns) < 10000000 ? 7 : \
653                            (ns) < 100000000 ? 8 : (ns) < 1000000000 ? 9 : 10)
654
655static apr_size_t text_size(const apr_text *t)
656{
657    apr_size_t size = 0;
658
659    for (; t; t = t->next)
660	size += strlen(t->text);
661    return size;
662}
663
664static apr_size_t elem_size(const apr_xml_elem *elem, int style,
665                            apr_array_header_t *namespaces, int *ns_map)
666{
667    apr_size_t size;
668
669    if (style == APR_XML_X2T_FULL || style == APR_XML_X2T_FULL_NS_LANG ||
670	style == APR_XML_X2T_PARSED) {
671	const apr_xml_attr *attr;
672
673	size = 0;
674
675	if (style == APR_XML_X2T_FULL_NS_LANG) {
676	    int i;
677
678	    /*
679	    ** The outer element will contain xmlns:ns%d="%s" attributes
680	    ** and an xml:lang attribute, if applicable.
681	    */
682
683	    for (i = namespaces->nelts; i--;) {
684		/* compute size of: ' xmlns:ns%d="%s"' */
685		size += (9 + APR_XML_NS_LEN(i) + 2 +
686			 strlen(APR_XML_GET_URI_ITEM(namespaces, i)) + 1);
687	    }
688
689	    if (elem->lang != NULL) {
690		/* compute size of: ' xml:lang="%s"' */
691		size += 11 + strlen(elem->lang) + 1;
692	    }
693	}
694	else if (style == APR_XML_X2T_PARSED) {
695	    apr_xml_ns_scope *ns_scope = elem->ns_scope;
696
697	    /* compute size of: ' xmlns:%s="%s"' */
698	    for (; ns_scope; ns_scope = ns_scope->next) {
699		size += 10 + strlen(find_prefix_name(elem, ns_scope->ns, 0)) +
700			     strlen(APR_XML_GET_URI_ITEM(namespaces, ns_scope->ns));
701	    }
702
703	    if (elem->lang != NULL) {
704		/* compute size of: ' xml:lang="%s"' */
705		size += 11 + strlen(elem->lang) + 1;
706	    }
707	}
708
709	if (elem->ns == APR_XML_NS_NONE) {
710	    /* compute size of: <%s> */
711	    size += 1 + strlen(elem->name) + 1;
712	}
713	else if (style == APR_XML_X2T_PARSED) {
714	    /* compute size of: <%s:%s> */
715	    size += 3 + strlen(find_prefix_name(elem, elem->ns, 1)) + strlen(elem->name);
716	}
717	else {
718	    int ns = ns_map ? ns_map[elem->ns] : elem->ns;
719
720	    /* compute size of: <ns%d:%s> */
721	    size += 3 + APR_XML_NS_LEN(ns) + 1 + strlen(elem->name) + 1;
722	}
723
724	if (APR_XML_ELEM_IS_EMPTY(elem)) {
725	    /* insert a closing "/" */
726	    size += 1;
727	}
728	else {
729	    /*
730	     * two of above plus "/":
731	     *     <ns%d:%s> ... </ns%d:%s>
732	     * OR  <%s> ... </%s>
733	     */
734	    size = 2 * size + 1;
735	}
736
737	for (attr = elem->attr; attr; attr = attr->next) {
738	    if (attr->ns == APR_XML_NS_NONE) {
739		/* compute size of: ' %s="%s"' */
740		size += 1 + strlen(attr->name) + 2 + strlen(attr->value) + 1;
741	    }
742	    else if (style == APR_XML_X2T_PARSED) {
743		/* compute size of: ' %s:%s="%s"' */
744		size += 5 + strlen(find_prefix_name(elem, attr->ns, 1)) + strlen(attr->name) + strlen(attr->value);
745	    }
746	    else {
747		/* compute size of: ' ns%d:%s="%s"' */
748                int ns = ns_map ? ns_map[attr->ns] : attr->ns;
749                size += 3 + APR_XML_NS_LEN(ns) + 1 + strlen(attr->name) + 2 + strlen(attr->value) + 1;
750	    }
751	}
752
753	/*
754	** If the element has an xml:lang value that is *different* from
755	** its parent, then add the thing in: ' xml:lang="%s"'.
756	**
757	** NOTE: we take advantage of the pointer equality established by
758	** the parsing for "inheriting" the xml:lang values from parents.
759	*/
760	if (elem->lang != NULL &&
761	    (elem->parent == NULL || elem->lang != elem->parent->lang)) {
762	    size += 11 + strlen(elem->lang) + 1;
763	}
764    }
765    else if (style == APR_XML_X2T_LANG_INNER) {
766	/*
767	 * This style prepends the xml:lang value plus a null terminator.
768	 * If a lang value is not present, then we insert a null term.
769	 */
770	size = elem->lang ? strlen(elem->lang) + 1 : 1;
771    }
772    else
773	size = 0;
774
775    size += text_size(elem->first_cdata.first);
776
777    for (elem = elem->first_child; elem; elem = elem->next) {
778	/* the size of the child element plus the CDATA that follows it */
779	size += (elem_size(elem, style == APR_XML_X2T_PARSED ? APR_XML_X2T_PARSED : APR_XML_X2T_FULL, NULL, ns_map) +
780		 text_size(elem->following_cdata.first));
781    }
782
783    return size;
784}
785
786static char *write_text(char *s, const apr_text *t)
787{
788    for (; t; t = t->next) {
789	apr_size_t len = strlen(t->text);
790	memcpy(s, t->text, len);
791	s += len;
792    }
793    return s;
794}
795
796static char *write_elem(char *s, const apr_xml_elem *elem, int style,
797			apr_array_header_t *namespaces, int *ns_map)
798{
799    const apr_xml_elem *child;
800    apr_size_t len;
801    int ns;
802
803    if (style == APR_XML_X2T_FULL || style == APR_XML_X2T_FULL_NS_LANG ||
804	style == APR_XML_X2T_PARSED) {
805	int empty = APR_XML_ELEM_IS_EMPTY(elem);
806	const apr_xml_attr *attr;
807
808	if (elem->ns == APR_XML_NS_NONE)
809	    len = sprintf(s, "<%s", elem->name);
810	else if (style == APR_XML_X2T_PARSED)
811	    len = sprintf(s, "<%s:%s", find_prefix_name(elem, elem->ns, 1), elem->name);
812	else {
813	    ns = ns_map ? ns_map[elem->ns] : elem->ns;
814	    len = sprintf(s, "<ns%d:%s", ns, elem->name);
815	}
816	s += len;
817
818	for (attr = elem->attr; attr; attr = attr->next) {
819	    if (attr->ns == APR_XML_NS_NONE)
820		len = sprintf(s, " %s=\"%s\"", attr->name, attr->value);
821	    else if (style == APR_XML_X2T_PARSED)
822		len = sprintf(s, " %s:%s=\"%s\"",
823			      find_prefix_name(elem, attr->ns, 1), attr->name, attr->value);
824	    else {
825		ns = ns_map ? ns_map[attr->ns] : attr->ns;
826		len = sprintf(s, " ns%d:%s=\"%s\"", ns, attr->name, attr->value);
827	    }
828	    s += len;
829	}
830
831	/* add the xml:lang value if necessary */
832	if (elem->lang != NULL &&
833	    (style == APR_XML_X2T_FULL_NS_LANG ||
834	     elem->parent == NULL ||
835	     elem->lang != elem->parent->lang)) {
836	    len = sprintf(s, " xml:lang=\"%s\"", elem->lang);
837	    s += len;
838	}
839
840	/* add namespace definitions, if required */
841	if (style == APR_XML_X2T_FULL_NS_LANG) {
842	    int i;
843
844	    for (i = namespaces->nelts; i--;) {
845		len = sprintf(s, " xmlns:ns%d=\"%s\"", i,
846			      APR_XML_GET_URI_ITEM(namespaces, i));
847		s += len;
848	    }
849	}
850	else if (style == APR_XML_X2T_PARSED) {
851	    apr_xml_ns_scope *ns_scope = elem->ns_scope;
852
853	    for (; ns_scope; ns_scope = ns_scope->next) {
854		const char *prefix = find_prefix_name(elem, ns_scope->ns, 0);
855
856		len = sprintf(s, " xmlns%s%s=\"%s\"",
857			      *prefix ? ":" : "", *prefix ? prefix : "",
858			      APR_XML_GET_URI_ITEM(namespaces, ns_scope->ns));
859		s += len;
860	    }
861	}
862
863	/* no more to do. close it up and go. */
864	if (empty) {
865	    *s++ = '/';
866	    *s++ = '>';
867	    return s;
868	}
869
870	/* just close it */
871	*s++ = '>';
872    }
873    else if (style == APR_XML_X2T_LANG_INNER) {
874	/* prepend the xml:lang value */
875	if (elem->lang != NULL) {
876	    len = strlen(elem->lang);
877	    memcpy(s, elem->lang, len);
878	    s += len;
879	}
880	*s++ = '\0';
881    }
882
883    s = write_text(s, elem->first_cdata.first);
884
885    for (child = elem->first_child; child; child = child->next) {
886	s = write_elem(s, child,
887		       style == APR_XML_X2T_PARSED ? APR_XML_X2T_PARSED : APR_XML_X2T_FULL,
888		       NULL, ns_map);
889	s = write_text(s, child->following_cdata.first);
890    }
891
892    if (style == APR_XML_X2T_FULL || style == APR_XML_X2T_FULL_NS_LANG || style == APR_XML_X2T_PARSED) {
893	if (elem->ns == APR_XML_NS_NONE)
894	    len = sprintf(s, "</%s>", elem->name);
895	else if (style == APR_XML_X2T_PARSED)
896	    len = sprintf(s, "</%s:%s>", find_prefix_name(elem, elem->ns, 1), elem->name);
897	else {
898	    ns = ns_map ? ns_map[elem->ns] : elem->ns;
899	    len = sprintf(s, "</ns%d:%s>", ns, elem->name);
900	}
901	s += len;
902    }
903
904    return s;
905}
906
907APU_DECLARE(void) apr_xml_quote_elem(apr_pool_t *p, apr_xml_elem *elem)
908{
909    apr_text *scan_txt;
910    apr_xml_attr *scan_attr;
911    apr_xml_elem *scan_elem;
912
913    /* convert the element's text */
914    for (scan_txt = elem->first_cdata.first;
915	 scan_txt != NULL;
916	 scan_txt = scan_txt->next) {
917	scan_txt->text = apr_xml_quote_string(p, scan_txt->text, 0);
918    }
919    for (scan_txt = elem->following_cdata.first;
920	 scan_txt != NULL;
921	 scan_txt = scan_txt->next) {
922	scan_txt->text = apr_xml_quote_string(p, scan_txt->text, 0);
923    }
924
925    /* convert the attribute values */
926    for (scan_attr = elem->attr;
927	 scan_attr != NULL;
928	 scan_attr = scan_attr->next) {
929	scan_attr->value = apr_xml_quote_string(p, scan_attr->value, 1);
930    }
931
932    /* convert the child elements */
933    for (scan_elem = elem->first_child;
934	 scan_elem != NULL;
935	 scan_elem = scan_elem->next) {
936	apr_xml_quote_elem(p, scan_elem);
937    }
938}
939
940/* convert an element to a text string */
941APU_DECLARE(void) apr_xml_to_text(apr_pool_t * p, const apr_xml_elem *elem,
942                                  int style, apr_array_header_t *namespaces,
943                                  int *ns_map, const char **pbuf,
944                                  apr_size_t *psize)
945{
946    /* get the exact size, plus a null terminator */
947    apr_size_t size = elem_size(elem, style, namespaces, ns_map) + 1;
948    char *s = apr_palloc(p, size);
949
950    (void) write_elem(s, elem, style, namespaces, ns_map);
951    s[size - 1] = '\0';
952
953    *pbuf = s;
954    if (psize)
955	*psize = size;
956}
957
958APU_DECLARE(const char *) apr_xml_empty_elem(apr_pool_t * p,
959                                             const apr_xml_elem *elem)
960{
961    if (elem->ns == APR_XML_NS_NONE) {
962	/*
963	 * The prefix (xml...) is already within the prop name, or
964	 * the element simply has no prefix.
965	 */
966	return apr_psprintf(p, "<%s/>" DEBUG_CR, elem->name);
967    }
968
969    return apr_psprintf(p, "<ns%d:%s/>" DEBUG_CR, elem->ns, elem->name);
970}
971
972/* return the URI's (existing) index, or insert it and return a new index */
973APU_DECLARE(int) apr_xml_insert_uri(apr_array_header_t *uri_array,
974                                    const char *uri)
975{
976    int i;
977    const char **pelt;
978
979    /* never insert an empty URI; this index is always APR_XML_NS_NONE */
980    if (*uri == '\0')
981        return APR_XML_NS_NONE;
982
983    for (i = uri_array->nelts; i--;) {
984	if (strcmp(uri, APR_XML_GET_URI_ITEM(uri_array, i)) == 0)
985	    return i;
986    }
987
988    pelt = apr_array_push(uri_array);
989    *pelt = uri;		/* assume uri is const or in a pool */
990    return uri_array->nelts - 1;
991}
992
993/* convert the element to EBCDIC */
994#if APR_CHARSET_EBCDIC
995static apr_status_t apr_xml_parser_convert_elem(apr_xml_elem *e,
996                                                apr_xlate_t *convset)
997{
998    apr_xml_attr *a;
999    apr_xml_elem *ec;
1000    apr_text *t;
1001    apr_size_t inbytes_left, outbytes_left;
1002    apr_status_t status;
1003
1004    inbytes_left = outbytes_left = strlen(e->name);
1005    status = apr_xlate_conv_buffer(convset, e->name,  &inbytes_left, (char *) e->name, &outbytes_left);
1006    if (status) {
1007        return status;
1008    }
1009
1010    for (t = e->first_cdata.first; t != NULL; t = t->next) {
1011        inbytes_left = outbytes_left = strlen(t->text);
1012        status = apr_xlate_conv_buffer(convset, t->text, &inbytes_left, (char *) t->text, &outbytes_left);
1013        if (status) {
1014            return status;
1015        }
1016    }
1017
1018    for (t = e->following_cdata.first;  t != NULL; t = t->next) {
1019        inbytes_left = outbytes_left = strlen(t->text);
1020        status = apr_xlate_conv_buffer(convset, t->text, &inbytes_left, (char *) t->text, &outbytes_left);
1021        if (status) {
1022            return status;
1023        }
1024    }
1025
1026    for (a = e->attr; a != NULL; a = a->next) {
1027        inbytes_left = outbytes_left = strlen(a->name);
1028        status = apr_xlate_conv_buffer(convset, a->name, &inbytes_left, (char *) a->name, &outbytes_left);
1029        if (status) {
1030            return status;
1031        }
1032        inbytes_left = outbytes_left = strlen(a->value);
1033        status = apr_xlate_conv_buffer(convset, a->value, &inbytes_left, (char *) a->value, &outbytes_left);
1034        if (status) {
1035            return status;
1036        }
1037    }
1038
1039    for (ec = e->first_child; ec != NULL; ec = ec->next) {
1040        status = apr_xml_parser_convert_elem(ec, convset);
1041        if (status) {
1042            return status;
1043        }
1044    }
1045    return APR_SUCCESS;
1046}
1047
1048/* convert the whole document to EBCDIC */
1049APU_DECLARE(apr_status_t) apr_xml_parser_convert_doc(apr_pool_t *pool,
1050                                                     apr_xml_doc *pdoc,
1051                                                     apr_xlate_t *convset)
1052{
1053    apr_status_t status;
1054    /* Don't convert the namespaces: they are constant! */
1055    if (pdoc->namespaces != NULL) {
1056        int i;
1057        apr_array_header_t *namespaces;
1058        namespaces = apr_array_make(pool, pdoc->namespaces->nelts, sizeof(const char *));
1059        if (namespaces == NULL)
1060            return APR_ENOMEM;
1061        for (i = 0; i < pdoc->namespaces->nelts; i++) {
1062            apr_size_t inbytes_left, outbytes_left;
1063            char *ptr = (char *) APR_XML_GET_URI_ITEM(pdoc->namespaces, i);
1064            ptr = apr_pstrdup(pool, ptr);
1065            if ( ptr == NULL)
1066                return APR_ENOMEM;
1067            inbytes_left = outbytes_left = strlen(ptr);
1068            status = apr_xlate_conv_buffer(convset, ptr, &inbytes_left, ptr, &outbytes_left);
1069            if (status) {
1070                return status;
1071            }
1072            apr_xml_insert_uri(namespaces, ptr);
1073        }
1074        pdoc->namespaces = namespaces;
1075    }
1076    return apr_xml_parser_convert_elem(pdoc->root, convset);
1077}
1078#endif
1079