1/*
2 * xml.c:  xml helper code shared among the Subversion libraries.
3 *
4 * ====================================================================
5 *    Licensed to the Apache Software Foundation (ASF) under one
6 *    or more contributor license agreements.  See the NOTICE file
7 *    distributed with this work for additional information
8 *    regarding copyright ownership.  The ASF licenses this file
9 *    to you under the Apache License, Version 2.0 (the
10 *    "License"); you may not use this file except in compliance
11 *    with the License.  You may obtain a copy of the License at
12 *
13 *      http://www.apache.org/licenses/LICENSE-2.0
14 *
15 *    Unless required by applicable law or agreed to in writing,
16 *    software distributed under the License is distributed on an
17 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 *    KIND, either express or implied.  See the License for the
19 *    specific language governing permissions and limitations
20 *    under the License.
21 * ====================================================================
22 */
23
24
25
26#include <string.h>
27#include <assert.h>
28
29#include "svn_private_config.h"         /* for SVN_HAVE_OLD_EXPAT */
30#include "svn_hash.h"
31#include "svn_pools.h"
32#include "svn_xml.h"
33#include "svn_error.h"
34#include "svn_ctype.h"
35
36#include "private/svn_utf_private.h"
37#include "private/svn_subr_private.h"
38
39#ifdef SVN_HAVE_OLD_EXPAT
40#include <xmlparse.h>
41#else
42#include <expat.h>
43#endif
44
45#ifndef XML_VERSION_AT_LEAST
46#define XML_VERSION_AT_LEAST(major,minor,patch)                  \
47(((major) < XML_MAJOR_VERSION)                                       \
48 || ((major) == XML_MAJOR_VERSION && (minor) < XML_MINOR_VERSION)    \
49 || ((major) == XML_MAJOR_VERSION && (minor) == XML_MINOR_VERSION && \
50     (patch) <= XML_MICRO_VERSION))
51#endif /* XML_VERSION_AT_LEAST */
52
53#ifdef XML_UNICODE
54#error Expat is unusable -- it has been compiled for wide characters
55#endif
56
57const char *
58svn_xml__compiled_version(void)
59{
60  static const char xml_version_str[] = APR_STRINGIFY(XML_MAJOR_VERSION)
61                                        "." APR_STRINGIFY(XML_MINOR_VERSION)
62                                        "." APR_STRINGIFY(XML_MICRO_VERSION);
63
64  return xml_version_str;
65}
66
67const char *
68svn_xml__runtime_version(void)
69{
70  const char *expat_version = XML_ExpatVersion();
71
72  if (!strncmp(expat_version, "expat_", 6))
73    expat_version += 6;
74
75  return expat_version;
76}
77
78
79/* The private internals for a parser object. */
80struct svn_xml_parser_t
81{
82  /** the expat parser */
83  XML_Parser parser;
84
85  /** the SVN callbacks to call from the Expat callbacks */
86  svn_xml_start_elem start_handler;
87  svn_xml_end_elem end_handler;
88  svn_xml_char_data data_handler;
89
90  /** the user's baton for private data */
91  void *baton;
92
93  /** if non-@c NULL, an error happened while parsing */
94  svn_error_t *error;
95
96  /** where this object is allocated, so we can free it easily */
97  apr_pool_t *pool;
98
99};
100
101
102/*** XML character validation ***/
103
104svn_boolean_t
105svn_xml_is_xml_safe(const char *data, apr_size_t len)
106{
107  const char *end = data + len;
108  const char *p;
109
110  if (! svn_utf__is_valid(data, len))
111    return FALSE;
112
113  for (p = data; p < end; p++)
114    {
115      unsigned char c = *p;
116
117      if (svn_ctype_iscntrl(c))
118        {
119          if ((c != SVN_CTYPE_ASCII_TAB)
120              && (c != SVN_CTYPE_ASCII_LINEFEED)
121              && (c != SVN_CTYPE_ASCII_CARRIAGERETURN)
122              && (c != SVN_CTYPE_ASCII_DELETE))
123            return FALSE;
124        }
125    }
126  return TRUE;
127}
128
129
130
131
132
133/*** XML escaping. ***/
134
135/* ### ...?
136 *
137 * If *OUTSTR is @c NULL, set *OUTSTR to a new stringbuf allocated
138 * in POOL, else append to the existing stringbuf there.
139 */
140static void
141xml_escape_cdata(svn_stringbuf_t **outstr,
142                 const char *data,
143                 apr_size_t len,
144                 apr_pool_t *pool)
145{
146  const char *end = data + len;
147  const char *p = data, *q;
148
149  if (*outstr == NULL)
150    *outstr = svn_stringbuf_create_empty(pool);
151
152  while (1)
153    {
154      /* Find a character which needs to be quoted and append bytes up
155         to that point.  Strictly speaking, '>' only needs to be
156         quoted if it follows "]]", but it's easier to quote it all
157         the time.
158
159         So, why are we escaping '\r' here?  Well, according to the
160         XML spec, '\r\n' gets converted to '\n' during XML parsing.
161         Also, any '\r' not followed by '\n' is converted to '\n'.  By
162         golly, if we say we want to escape a '\r', we want to make
163         sure it remains a '\r'!  */
164      q = p;
165      while (q < end && *q != '&' && *q != '<' && *q != '>' && *q != '\r')
166        q++;
167      svn_stringbuf_appendbytes(*outstr, p, q - p);
168
169      /* We may already be a winner.  */
170      if (q == end)
171        break;
172
173      /* Append the entity reference for the character.  */
174      if (*q == '&')
175        svn_stringbuf_appendcstr(*outstr, "&amp;");
176      else if (*q == '<')
177        svn_stringbuf_appendcstr(*outstr, "&lt;");
178      else if (*q == '>')
179        svn_stringbuf_appendcstr(*outstr, "&gt;");
180      else if (*q == '\r')
181        svn_stringbuf_appendcstr(*outstr, "&#13;");
182
183      p = q + 1;
184    }
185}
186
187/* Essentially the same as xml_escape_cdata, with the addition of
188   whitespace and quote characters. */
189static void
190xml_escape_attr(svn_stringbuf_t **outstr,
191                const char *data,
192                apr_size_t len,
193                apr_pool_t *pool)
194{
195  const char *end = data + len;
196  const char *p = data, *q;
197
198  if (*outstr == NULL)
199    *outstr = svn_stringbuf_create_ensure(len, pool);
200
201  while (1)
202    {
203      /* Find a character which needs to be quoted and append bytes up
204         to that point. */
205      q = p;
206      while (q < end && *q != '&' && *q != '<' && *q != '>'
207             && *q != '"' && *q != '\'' && *q != '\r'
208             && *q != '\n' && *q != '\t')
209        q++;
210      svn_stringbuf_appendbytes(*outstr, p, q - p);
211
212      /* We may already be a winner.  */
213      if (q == end)
214        break;
215
216      /* Append the entity reference for the character.  */
217      if (*q == '&')
218        svn_stringbuf_appendcstr(*outstr, "&amp;");
219      else if (*q == '<')
220        svn_stringbuf_appendcstr(*outstr, "&lt;");
221      else if (*q == '>')
222        svn_stringbuf_appendcstr(*outstr, "&gt;");
223      else if (*q == '"')
224        svn_stringbuf_appendcstr(*outstr, "&quot;");
225      else if (*q == '\'')
226        svn_stringbuf_appendcstr(*outstr, "&apos;");
227      else if (*q == '\r')
228        svn_stringbuf_appendcstr(*outstr, "&#13;");
229      else if (*q == '\n')
230        svn_stringbuf_appendcstr(*outstr, "&#10;");
231      else if (*q == '\t')
232        svn_stringbuf_appendcstr(*outstr, "&#9;");
233
234      p = q + 1;
235    }
236}
237
238
239void
240svn_xml_escape_cdata_stringbuf(svn_stringbuf_t **outstr,
241                               const svn_stringbuf_t *string,
242                               apr_pool_t *pool)
243{
244  xml_escape_cdata(outstr, string->data, string->len, pool);
245}
246
247
248void
249svn_xml_escape_cdata_string(svn_stringbuf_t **outstr,
250                            const svn_string_t *string,
251                            apr_pool_t *pool)
252{
253  xml_escape_cdata(outstr, string->data, string->len, pool);
254}
255
256
257void
258svn_xml_escape_cdata_cstring(svn_stringbuf_t **outstr,
259                             const char *string,
260                             apr_pool_t *pool)
261{
262  xml_escape_cdata(outstr, string, (apr_size_t) strlen(string), pool);
263}
264
265
266void
267svn_xml_escape_attr_stringbuf(svn_stringbuf_t **outstr,
268                              const svn_stringbuf_t *string,
269                              apr_pool_t *pool)
270{
271  xml_escape_attr(outstr, string->data, string->len, pool);
272}
273
274
275void
276svn_xml_escape_attr_string(svn_stringbuf_t **outstr,
277                           const svn_string_t *string,
278                           apr_pool_t *pool)
279{
280  xml_escape_attr(outstr, string->data, string->len, pool);
281}
282
283
284void
285svn_xml_escape_attr_cstring(svn_stringbuf_t **outstr,
286                            const char *string,
287                            apr_pool_t *pool)
288{
289  xml_escape_attr(outstr, string, (apr_size_t) strlen(string), pool);
290}
291
292
293const char *
294svn_xml_fuzzy_escape(const char *string, apr_pool_t *pool)
295{
296  const char *end = string + strlen(string);
297  const char *p = string, *q;
298  svn_stringbuf_t *outstr;
299  char escaped_char[6];   /* ? \ u u u \0 */
300
301  for (q = p; q < end; q++)
302    {
303      if (svn_ctype_iscntrl(*q)
304          && ! ((*q == '\n') || (*q == '\r') || (*q == '\t')))
305        break;
306    }
307
308  /* Return original string if no unsafe characters found. */
309  if (q == end)
310    return string;
311
312  outstr = svn_stringbuf_create_empty(pool);
313  while (1)
314    {
315      q = p;
316
317      /* Traverse till either unsafe character or eos. */
318      while ((q < end)
319             && ((! svn_ctype_iscntrl(*q))
320                 || (*q == '\n') || (*q == '\r') || (*q == '\t')))
321        q++;
322
323      /* copy chunk before marker */
324      svn_stringbuf_appendbytes(outstr, p, q - p);
325
326      if (q == end)
327        break;
328
329      /* Append an escaped version of the unsafe character.
330
331         ### This format was chosen for consistency with
332         ### svn_utf__cstring_from_utf8_fuzzy().  The two functions
333         ### should probably share code, even though they escape
334         ### different characters.
335      */
336      apr_snprintf(escaped_char, sizeof(escaped_char), "?\\%03u",
337                   (unsigned char) *q);
338      svn_stringbuf_appendcstr(outstr, escaped_char);
339
340      p = q + 1;
341    }
342
343  return outstr->data;
344}
345
346
347/*** Map from the Expat callback types to the SVN XML types. ***/
348
349static void expat_start_handler(void *userData,
350                                const XML_Char *name,
351                                const XML_Char **atts)
352{
353  svn_xml_parser_t *svn_parser = userData;
354
355  (*svn_parser->start_handler)(svn_parser->baton, name, atts);
356
357#if XML_VERSION_AT_LEAST(1, 95, 8)
358  /* Stop XML parsing if svn_xml_signal_bailout() was called.
359     We cannot do this in svn_xml_signal_bailout() because Expat
360     documentation states that XML_StopParser() must be called only from
361     callbacks. */
362  if (svn_parser->error)
363    (void) XML_StopParser(svn_parser->parser, 0 /* resumable */);
364#endif
365}
366
367static void expat_end_handler(void *userData, const XML_Char *name)
368{
369  svn_xml_parser_t *svn_parser = userData;
370
371  (*svn_parser->end_handler)(svn_parser->baton, name);
372
373#if XML_VERSION_AT_LEAST(1, 95, 8)
374  /* Stop XML parsing if svn_xml_signal_bailout() was called.
375     We cannot do this in svn_xml_signal_bailout() because Expat
376     documentation states that XML_StopParser() must be called only from
377     callbacks. */
378  if (svn_parser->error)
379    (void) XML_StopParser(svn_parser->parser, 0 /* resumable */);
380#endif
381}
382
383static void expat_data_handler(void *userData, const XML_Char *s, int len)
384{
385  svn_xml_parser_t *svn_parser = userData;
386
387  (*svn_parser->data_handler)(svn_parser->baton, s, (apr_size_t)len);
388
389#if XML_VERSION_AT_LEAST(1, 95, 8)
390  /* Stop XML parsing if svn_xml_signal_bailout() was called.
391     We cannot do this in svn_xml_signal_bailout() because Expat
392     documentation states that XML_StopParser() must be called only from
393     callbacks. */
394  if (svn_parser->error)
395    (void) XML_StopParser(svn_parser->parser, 0 /* resumable */);
396#endif
397}
398
399#if XML_VERSION_AT_LEAST(1, 95, 8)
400static void expat_entity_declaration(void *userData,
401                                     const XML_Char *entityName,
402                                     int is_parameter_entity,
403                                     const XML_Char *value,
404                                     int value_length,
405                                     const XML_Char *base,
406                                     const XML_Char *systemId,
407                                     const XML_Char *publicId,
408                                     const XML_Char *notationName)
409{
410  svn_xml_parser_t *svn_parser = userData;
411
412  /* Stop the parser if an entity declaration is hit. */
413  XML_StopParser(svn_parser->parser, 0 /* resumable */);
414}
415#else
416/* A noop default_handler. */
417static void expat_default_handler(void *userData, const XML_Char *s, int len)
418{
419}
420#endif
421
422/*** Making a parser. ***/
423
424static apr_status_t parser_cleanup(void *data)
425{
426  svn_xml_parser_t *svn_parser = data;
427
428  /* Free Expat parser. */
429  if (svn_parser->parser)
430    {
431      XML_ParserFree(svn_parser->parser);
432      svn_parser->parser = NULL;
433    }
434  return APR_SUCCESS;
435}
436
437svn_xml_parser_t *
438svn_xml_make_parser(void *baton,
439                    svn_xml_start_elem start_handler,
440                    svn_xml_end_elem end_handler,
441                    svn_xml_char_data data_handler,
442                    apr_pool_t *pool)
443{
444  svn_xml_parser_t *svn_parser;
445  XML_Parser parser = XML_ParserCreate(NULL);
446
447  XML_SetElementHandler(parser,
448                        start_handler ? expat_start_handler : NULL,
449                        end_handler ? expat_end_handler : NULL);
450  XML_SetCharacterDataHandler(parser,
451                              data_handler ? expat_data_handler : NULL);
452
453#if XML_VERSION_AT_LEAST(1, 95, 8)
454  XML_SetEntityDeclHandler(parser, expat_entity_declaration);
455#else
456  XML_SetDefaultHandler(parser, expat_default_handler);
457#endif
458
459  svn_parser = apr_pcalloc(pool, sizeof(*svn_parser));
460
461  svn_parser->parser = parser;
462  svn_parser->start_handler = start_handler;
463  svn_parser->end_handler = end_handler;
464  svn_parser->data_handler = data_handler;
465  svn_parser->baton = baton;
466  svn_parser->pool = pool;
467
468  /* store our parser info as the UserData in the Expat parser */
469  XML_SetUserData(parser, svn_parser);
470
471  /* Register pool cleanup handler to free Expat XML parser on cleanup,
472     if svn_xml_free_parser() was not called explicitly. */
473  apr_pool_cleanup_register(svn_parser->pool, svn_parser,
474                            parser_cleanup, apr_pool_cleanup_null);
475
476  return svn_parser;
477}
478
479
480/* Free a parser */
481void
482svn_xml_free_parser(svn_xml_parser_t *svn_parser)
483{
484  apr_pool_cleanup_run(svn_parser->pool, svn_parser, parser_cleanup);
485}
486
487
488
489
490svn_error_t *
491svn_xml_parse(svn_xml_parser_t *svn_parser,
492              const char *buf,
493              apr_size_t len,
494              svn_boolean_t is_final)
495{
496  svn_error_t *err;
497  int success;
498
499  /* Parse some xml data */
500  success = XML_Parse(svn_parser->parser, buf, (int) len, is_final);
501
502  /* Did an error occur somewhere *inside* the expat callbacks? */
503  if (svn_parser->error)
504    {
505      /* Kill all parsers and return the error */
506      svn_xml_free_parser(svn_parser);
507      return svn_parser->error;
508    }
509
510  /* If expat choked internally, return its error. */
511  if (! success)
512    {
513      /* Line num is "int" in Expat v1, "long" in v2; hide the difference. */
514      long line = XML_GetCurrentLineNumber(svn_parser->parser);
515
516      err = svn_error_createf
517        (SVN_ERR_XML_MALFORMED, NULL,
518         _("Malformed XML: %s at line %ld"),
519         XML_ErrorString(XML_GetErrorCode(svn_parser->parser)), line);
520
521      /* Kill all parsers and return the expat error */
522      svn_xml_free_parser(svn_parser);
523      return err;
524    }
525
526  return SVN_NO_ERROR;
527}
528
529
530
531void svn_xml_signal_bailout(svn_error_t *error,
532                            svn_xml_parser_t *svn_parser)
533{
534  /* This will cause the current XML_Parse() call to finish quickly! */
535  XML_SetElementHandler(svn_parser->parser, NULL, NULL);
536  XML_SetCharacterDataHandler(svn_parser->parser, NULL);
537#if XML_VERSION_AT_LEAST(1, 95, 8)
538  XML_SetEntityDeclHandler(svn_parser->parser, NULL);
539#endif
540  /* Once outside of XML_Parse(), the existence of this field will
541     cause svn_delta_parse()'s main read-loop to return error. */
542  svn_parser->error = error;
543}
544
545
546
547
548
549
550
551
552/*** Attribute walking. ***/
553
554const char *
555svn_xml_get_attr_value(const char *name, const char *const *atts)
556{
557  while (atts && (*atts))
558    {
559      if (strcmp(atts[0], name) == 0)
560        return atts[1];
561      else
562        atts += 2; /* continue looping */
563    }
564
565  /* Else no such attribute name seen. */
566  return NULL;
567}
568
569
570
571/*** Printing XML ***/
572
573void
574svn_xml_make_header2(svn_stringbuf_t **str, const char *encoding,
575                     apr_pool_t *pool)
576{
577
578  if (*str == NULL)
579    *str = svn_stringbuf_create_empty(pool);
580  svn_stringbuf_appendcstr(*str, "<?xml version=\"1.0\"");
581  if (encoding)
582    {
583      encoding = apr_psprintf(pool, " encoding=\"%s\"", encoding);
584      svn_stringbuf_appendcstr(*str, encoding);
585    }
586  svn_stringbuf_appendcstr(*str, "?>\n");
587}
588
589
590
591/*** Creating attribute hashes. ***/
592
593/* Combine an existing attribute list ATTS with a HASH that itself
594   represents an attribute list.  Iff PRESERVE is true, then no value
595   already in HASH will be changed, else values from ATTS will
596   override previous values in HASH. */
597static void
598amalgamate(const char **atts,
599           apr_hash_t *ht,
600           svn_boolean_t preserve,
601           apr_pool_t *pool)
602{
603  const char *key;
604
605  if (atts)
606    for (key = *atts; key; key = *(++atts))
607      {
608        const char *val = *(++atts);
609        size_t keylen;
610        assert(key != NULL);
611        /* kff todo: should we also insist that val be non-null here?
612           Probably. */
613
614        keylen = strlen(key);
615        if (preserve && ((apr_hash_get(ht, key, keylen)) != NULL))
616          continue;
617        else
618          apr_hash_set(ht, apr_pstrndup(pool, key, keylen), keylen,
619                       val ? apr_pstrdup(pool, val) : NULL);
620      }
621}
622
623
624apr_hash_t *
625svn_xml_ap_to_hash(va_list ap, apr_pool_t *pool)
626{
627  apr_hash_t *ht = apr_hash_make(pool);
628  const char *key;
629
630  while ((key = va_arg(ap, char *)) != NULL)
631    {
632      const char *val = va_arg(ap, const char *);
633      svn_hash_sets(ht, key, val);
634    }
635
636  return ht;
637}
638
639
640apr_hash_t *
641svn_xml_make_att_hash(const char **atts, apr_pool_t *pool)
642{
643  apr_hash_t *ht = apr_hash_make(pool);
644  amalgamate(atts, ht, 0, pool);  /* third arg irrelevant in this case */
645  return ht;
646}
647
648
649void
650svn_xml_hash_atts_overlaying(const char **atts,
651                             apr_hash_t *ht,
652                             apr_pool_t *pool)
653{
654  amalgamate(atts, ht, 0, pool);
655}
656
657
658void
659svn_xml_hash_atts_preserving(const char **atts,
660                             apr_hash_t *ht,
661                             apr_pool_t *pool)
662{
663  amalgamate(atts, ht, 1, pool);
664}
665
666
667
668/*** Making XML tags. ***/
669
670
671void
672svn_xml_make_open_tag_hash(svn_stringbuf_t **str,
673                           apr_pool_t *pool,
674                           enum svn_xml_open_tag_style style,
675                           const char *tagname,
676                           apr_hash_t *attributes)
677{
678  apr_hash_index_t *hi;
679  apr_size_t est_size = strlen(tagname) + 4 + apr_hash_count(attributes) * 30;
680
681  if (*str == NULL)
682    *str = svn_stringbuf_create_ensure(est_size, pool);
683
684  svn_stringbuf_appendcstr(*str, "<");
685  svn_stringbuf_appendcstr(*str, tagname);
686
687  for (hi = apr_hash_first(pool, attributes); hi; hi = apr_hash_next(hi))
688    {
689      const void *key;
690      void *val;
691
692      apr_hash_this(hi, &key, NULL, &val);
693      assert(val != NULL);
694
695      svn_stringbuf_appendcstr(*str, "\n   ");
696      svn_stringbuf_appendcstr(*str, key);
697      svn_stringbuf_appendcstr(*str, "=\"");
698      svn_xml_escape_attr_cstring(str, val, pool);
699      svn_stringbuf_appendcstr(*str, "\"");
700    }
701
702  if (style == svn_xml_self_closing)
703    svn_stringbuf_appendcstr(*str, "/");
704  svn_stringbuf_appendcstr(*str, ">");
705  if (style != svn_xml_protect_pcdata)
706    svn_stringbuf_appendcstr(*str, "\n");
707}
708
709
710void
711svn_xml_make_open_tag_v(svn_stringbuf_t **str,
712                        apr_pool_t *pool,
713                        enum svn_xml_open_tag_style style,
714                        const char *tagname,
715                        va_list ap)
716{
717  apr_pool_t *subpool = svn_pool_create(pool);
718  apr_hash_t *ht = svn_xml_ap_to_hash(ap, subpool);
719
720  svn_xml_make_open_tag_hash(str, pool, style, tagname, ht);
721  svn_pool_destroy(subpool);
722}
723
724
725
726void
727svn_xml_make_open_tag(svn_stringbuf_t **str,
728                      apr_pool_t *pool,
729                      enum svn_xml_open_tag_style style,
730                      const char *tagname,
731                      ...)
732{
733  va_list ap;
734
735  va_start(ap, tagname);
736  svn_xml_make_open_tag_v(str, pool, style, tagname, ap);
737  va_end(ap);
738}
739
740
741void svn_xml_make_close_tag(svn_stringbuf_t **str,
742                            apr_pool_t *pool,
743                            const char *tagname)
744{
745  if (*str == NULL)
746    *str = svn_stringbuf_create_empty(pool);
747
748  svn_stringbuf_appendcstr(*str, "</");
749  svn_stringbuf_appendcstr(*str, tagname);
750  svn_stringbuf_appendcstr(*str, ">\n");
751}
752