xml.c revision 302408
1/*
2 * xml.c:  xml helper code shared among the Subversion libraries.
3 *
4 * ====================================================================
5 *    Licensed to the Apache Software Foundation (ASF) under one
6 *    or more contributor license agreements.  See the NOTICE file
7 *    distributed with this work for additional information
8 *    regarding copyright ownership.  The ASF licenses this file
9 *    to you under the Apache License, Version 2.0 (the
10 *    "License"); you may not use this file except in compliance
11 *    with the License.  You may obtain a copy of the License at
12 *
13 *      http://www.apache.org/licenses/LICENSE-2.0
14 *
15 *    Unless required by applicable law or agreed to in writing,
16 *    software distributed under the License is distributed on an
17 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 *    KIND, either express or implied.  See the License for the
19 *    specific language governing permissions and limitations
20 *    under the License.
21 * ====================================================================
22 */
23
24
25
26#include <string.h>
27#include <assert.h>
28
29#include "svn_private_config.h"         /* for SVN_HAVE_OLD_EXPAT */
30#include "svn_hash.h"
31#include "svn_pools.h"
32#include "svn_xml.h"
33#include "svn_error.h"
34#include "svn_ctype.h"
35
36#include "private/svn_utf_private.h"
37#include "private/svn_subr_private.h"
38
39#ifdef SVN_HAVE_OLD_EXPAT
40#include <xmlparse.h>
41#else
42#include <expat.h>
43#endif
44
45#ifdef XML_UNICODE
46#error Expat is unusable -- it has been compiled for wide characters
47#endif
48
49const char *
50svn_xml__compiled_version(void)
51{
52  static const char xml_version_str[] = APR_STRINGIFY(XML_MAJOR_VERSION)
53                                        "." APR_STRINGIFY(XML_MINOR_VERSION)
54                                        "." APR_STRINGIFY(XML_MICRO_VERSION);
55
56  return xml_version_str;
57}
58
59const char *
60svn_xml__runtime_version(void)
61{
62  const char *expat_version = XML_ExpatVersion();
63
64  if (!strncmp(expat_version, "expat_", 6))
65    expat_version += 6;
66
67  return expat_version;
68}
69
70
71/* The private internals for a parser object. */
72struct svn_xml_parser_t
73{
74  /** the expat parser */
75  XML_Parser parser;
76
77  /** the SVN callbacks to call from the Expat callbacks */
78  svn_xml_start_elem start_handler;
79  svn_xml_end_elem end_handler;
80  svn_xml_char_data data_handler;
81
82  /** the user's baton for private data */
83  void *baton;
84
85  /** if non-@c NULL, an error happened while parsing */
86  svn_error_t *error;
87
88  /** where this object is allocated, so we can free it easily */
89  apr_pool_t *pool;
90
91};
92
93
94/*** XML character validation ***/
95
96svn_boolean_t
97svn_xml_is_xml_safe(const char *data, apr_size_t len)
98{
99  const char *end = data + len;
100  const char *p;
101
102  if (! svn_utf__is_valid(data, len))
103    return FALSE;
104
105  for (p = data; p < end; p++)
106    {
107      unsigned char c = *p;
108
109      if (svn_ctype_iscntrl(c))
110        {
111          if ((c != SVN_CTYPE_ASCII_TAB)
112              && (c != SVN_CTYPE_ASCII_LINEFEED)
113              && (c != SVN_CTYPE_ASCII_CARRIAGERETURN)
114              && (c != SVN_CTYPE_ASCII_DELETE))
115            return FALSE;
116        }
117    }
118  return TRUE;
119}
120
121
122
123
124
125/*** XML escaping. ***/
126
127/* ### ...?
128 *
129 * If *OUTSTR is @c NULL, set *OUTSTR to a new stringbuf allocated
130 * in POOL, else append to the existing stringbuf there.
131 */
132static void
133xml_escape_cdata(svn_stringbuf_t **outstr,
134                 const char *data,
135                 apr_size_t len,
136                 apr_pool_t *pool)
137{
138  const char *end = data + len;
139  const char *p = data, *q;
140
141  if (*outstr == NULL)
142    *outstr = svn_stringbuf_create_empty(pool);
143
144  while (1)
145    {
146      /* Find a character which needs to be quoted and append bytes up
147         to that point.  Strictly speaking, '>' only needs to be
148         quoted if it follows "]]", but it's easier to quote it all
149         the time.
150
151         So, why are we escaping '\r' here?  Well, according to the
152         XML spec, '\r\n' gets converted to '\n' during XML parsing.
153         Also, any '\r' not followed by '\n' is converted to '\n'.  By
154         golly, if we say we want to escape a '\r', we want to make
155         sure it remains a '\r'!  */
156      q = p;
157      while (q < end && *q != '&' && *q != '<' && *q != '>' && *q != '\r')
158        q++;
159      svn_stringbuf_appendbytes(*outstr, p, q - p);
160
161      /* We may already be a winner.  */
162      if (q == end)
163        break;
164
165      /* Append the entity reference for the character.  */
166      if (*q == '&')
167        svn_stringbuf_appendcstr(*outstr, "&amp;");
168      else if (*q == '<')
169        svn_stringbuf_appendcstr(*outstr, "&lt;");
170      else if (*q == '>')
171        svn_stringbuf_appendcstr(*outstr, "&gt;");
172      else if (*q == '\r')
173        svn_stringbuf_appendcstr(*outstr, "&#13;");
174
175      p = q + 1;
176    }
177}
178
179/* Essentially the same as xml_escape_cdata, with the addition of
180   whitespace and quote characters. */
181static void
182xml_escape_attr(svn_stringbuf_t **outstr,
183                const char *data,
184                apr_size_t len,
185                apr_pool_t *pool)
186{
187  const char *end = data + len;
188  const char *p = data, *q;
189
190  if (*outstr == NULL)
191    *outstr = svn_stringbuf_create_ensure(len, pool);
192
193  while (1)
194    {
195      /* Find a character which needs to be quoted and append bytes up
196         to that point. */
197      q = p;
198      while (q < end && *q != '&' && *q != '<' && *q != '>'
199             && *q != '"' && *q != '\'' && *q != '\r'
200             && *q != '\n' && *q != '\t')
201        q++;
202      svn_stringbuf_appendbytes(*outstr, p, q - p);
203
204      /* We may already be a winner.  */
205      if (q == end)
206        break;
207
208      /* Append the entity reference for the character.  */
209      if (*q == '&')
210        svn_stringbuf_appendcstr(*outstr, "&amp;");
211      else if (*q == '<')
212        svn_stringbuf_appendcstr(*outstr, "&lt;");
213      else if (*q == '>')
214        svn_stringbuf_appendcstr(*outstr, "&gt;");
215      else if (*q == '"')
216        svn_stringbuf_appendcstr(*outstr, "&quot;");
217      else if (*q == '\'')
218        svn_stringbuf_appendcstr(*outstr, "&apos;");
219      else if (*q == '\r')
220        svn_stringbuf_appendcstr(*outstr, "&#13;");
221      else if (*q == '\n')
222        svn_stringbuf_appendcstr(*outstr, "&#10;");
223      else if (*q == '\t')
224        svn_stringbuf_appendcstr(*outstr, "&#9;");
225
226      p = q + 1;
227    }
228}
229
230
231void
232svn_xml_escape_cdata_stringbuf(svn_stringbuf_t **outstr,
233                               const svn_stringbuf_t *string,
234                               apr_pool_t *pool)
235{
236  xml_escape_cdata(outstr, string->data, string->len, pool);
237}
238
239
240void
241svn_xml_escape_cdata_string(svn_stringbuf_t **outstr,
242                            const svn_string_t *string,
243                            apr_pool_t *pool)
244{
245  xml_escape_cdata(outstr, string->data, string->len, pool);
246}
247
248
249void
250svn_xml_escape_cdata_cstring(svn_stringbuf_t **outstr,
251                             const char *string,
252                             apr_pool_t *pool)
253{
254  xml_escape_cdata(outstr, string, (apr_size_t) strlen(string), pool);
255}
256
257
258void
259svn_xml_escape_attr_stringbuf(svn_stringbuf_t **outstr,
260                              const svn_stringbuf_t *string,
261                              apr_pool_t *pool)
262{
263  xml_escape_attr(outstr, string->data, string->len, pool);
264}
265
266
267void
268svn_xml_escape_attr_string(svn_stringbuf_t **outstr,
269                           const svn_string_t *string,
270                           apr_pool_t *pool)
271{
272  xml_escape_attr(outstr, string->data, string->len, pool);
273}
274
275
276void
277svn_xml_escape_attr_cstring(svn_stringbuf_t **outstr,
278                            const char *string,
279                            apr_pool_t *pool)
280{
281  xml_escape_attr(outstr, string, (apr_size_t) strlen(string), pool);
282}
283
284
285const char *
286svn_xml_fuzzy_escape(const char *string, apr_pool_t *pool)
287{
288  const char *end = string + strlen(string);
289  const char *p = string, *q;
290  svn_stringbuf_t *outstr;
291  char escaped_char[6];   /* ? \ u u u \0 */
292
293  for (q = p; q < end; q++)
294    {
295      if (svn_ctype_iscntrl(*q)
296          && ! ((*q == '\n') || (*q == '\r') || (*q == '\t')))
297        break;
298    }
299
300  /* Return original string if no unsafe characters found. */
301  if (q == end)
302    return string;
303
304  outstr = svn_stringbuf_create_empty(pool);
305  while (1)
306    {
307      q = p;
308
309      /* Traverse till either unsafe character or eos. */
310      while ((q < end)
311             && ((! svn_ctype_iscntrl(*q))
312                 || (*q == '\n') || (*q == '\r') || (*q == '\t')))
313        q++;
314
315      /* copy chunk before marker */
316      svn_stringbuf_appendbytes(outstr, p, q - p);
317
318      if (q == end)
319        break;
320
321      /* Append an escaped version of the unsafe character.
322
323         ### This format was chosen for consistency with
324         ### svn_utf__cstring_from_utf8_fuzzy().  The two functions
325         ### should probably share code, even though they escape
326         ### different characters.
327      */
328      apr_snprintf(escaped_char, sizeof(escaped_char), "?\\%03u",
329                   (unsigned char) *q);
330      svn_stringbuf_appendcstr(outstr, escaped_char);
331
332      p = q + 1;
333    }
334
335  return outstr->data;
336}
337
338
339/*** Map from the Expat callback types to the SVN XML types. ***/
340
341static void expat_start_handler(void *userData,
342                                const XML_Char *name,
343                                const XML_Char **atts)
344{
345  svn_xml_parser_t *svn_parser = userData;
346
347  (*svn_parser->start_handler)(svn_parser->baton, name, atts);
348}
349
350static void expat_end_handler(void *userData, const XML_Char *name)
351{
352  svn_xml_parser_t *svn_parser = userData;
353
354  (*svn_parser->end_handler)(svn_parser->baton, name);
355}
356
357static void expat_data_handler(void *userData, const XML_Char *s, int len)
358{
359  svn_xml_parser_t *svn_parser = userData;
360
361  (*svn_parser->data_handler)(svn_parser->baton, s, (apr_size_t)len);
362}
363
364
365/*** Making a parser. ***/
366
367svn_xml_parser_t *
368svn_xml_make_parser(void *baton,
369                    svn_xml_start_elem start_handler,
370                    svn_xml_end_elem end_handler,
371                    svn_xml_char_data data_handler,
372                    apr_pool_t *pool)
373{
374  svn_xml_parser_t *svn_parser;
375  apr_pool_t *subpool;
376
377  XML_Parser parser = XML_ParserCreate(NULL);
378
379  XML_SetElementHandler(parser,
380                        start_handler ? expat_start_handler : NULL,
381                        end_handler ? expat_end_handler : NULL);
382  XML_SetCharacterDataHandler(parser,
383                              data_handler ? expat_data_handler : NULL);
384
385  /* ### we probably don't want this pool; or at least we should pass it
386     ### to the callbacks and clear it periodically.  */
387  subpool = svn_pool_create(pool);
388
389  svn_parser = apr_pcalloc(subpool, sizeof(*svn_parser));
390
391  svn_parser->parser = parser;
392  svn_parser->start_handler = start_handler;
393  svn_parser->end_handler = end_handler;
394  svn_parser->data_handler = data_handler;
395  svn_parser->baton = baton;
396  svn_parser->pool = subpool;
397
398  /* store our parser info as the UserData in the Expat parser */
399  XML_SetUserData(parser, svn_parser);
400
401  return svn_parser;
402}
403
404
405/* Free a parser */
406void
407svn_xml_free_parser(svn_xml_parser_t *svn_parser)
408{
409  /* Free the expat parser */
410  XML_ParserFree(svn_parser->parser);
411
412  /* Free the subversion parser */
413  svn_pool_destroy(svn_parser->pool);
414}
415
416
417
418
419svn_error_t *
420svn_xml_parse(svn_xml_parser_t *svn_parser,
421              const char *buf,
422              apr_size_t len,
423              svn_boolean_t is_final)
424{
425  svn_error_t *err;
426  int success;
427
428  /* Parse some xml data */
429  success = XML_Parse(svn_parser->parser, buf, (int) len, is_final);
430
431  /* If expat choked internally, return its error. */
432  if (! success)
433    {
434      /* Line num is "int" in Expat v1, "long" in v2; hide the difference. */
435      long line = XML_GetCurrentLineNumber(svn_parser->parser);
436
437      err = svn_error_createf
438        (SVN_ERR_XML_MALFORMED, NULL,
439         _("Malformed XML: %s at line %ld"),
440         XML_ErrorString(XML_GetErrorCode(svn_parser->parser)), line);
441
442      /* Kill all parsers and return the expat error */
443      svn_xml_free_parser(svn_parser);
444      return err;
445    }
446
447  /* Did an error occur somewhere *inside* the expat callbacks? */
448  if (svn_parser->error)
449    {
450      err = svn_parser->error;
451      svn_xml_free_parser(svn_parser);
452      return err;
453    }
454
455  return SVN_NO_ERROR;
456}
457
458
459
460void svn_xml_signal_bailout(svn_error_t *error,
461                            svn_xml_parser_t *svn_parser)
462{
463  /* This will cause the current XML_Parse() call to finish quickly! */
464  XML_SetElementHandler(svn_parser->parser, NULL, NULL);
465  XML_SetCharacterDataHandler(svn_parser->parser, NULL);
466
467  /* Once outside of XML_Parse(), the existence of this field will
468     cause svn_delta_parse()'s main read-loop to return error. */
469  svn_parser->error = error;
470}
471
472
473
474
475
476
477
478
479/*** Attribute walking. ***/
480
481const char *
482svn_xml_get_attr_value(const char *name, const char *const *atts)
483{
484  while (atts && (*atts))
485    {
486      if (strcmp(atts[0], name) == 0)
487        return atts[1];
488      else
489        atts += 2; /* continue looping */
490    }
491
492  /* Else no such attribute name seen. */
493  return NULL;
494}
495
496
497
498/*** Printing XML ***/
499
500void
501svn_xml_make_header2(svn_stringbuf_t **str, const char *encoding,
502                     apr_pool_t *pool)
503{
504
505  if (*str == NULL)
506    *str = svn_stringbuf_create_empty(pool);
507  svn_stringbuf_appendcstr(*str, "<?xml version=\"1.0\"");
508  if (encoding)
509    {
510      encoding = apr_psprintf(pool, " encoding=\"%s\"", encoding);
511      svn_stringbuf_appendcstr(*str, encoding);
512    }
513  svn_stringbuf_appendcstr(*str, "?>\n");
514}
515
516
517
518/*** Creating attribute hashes. ***/
519
520/* Combine an existing attribute list ATTS with a HASH that itself
521   represents an attribute list.  Iff PRESERVE is true, then no value
522   already in HASH will be changed, else values from ATTS will
523   override previous values in HASH. */
524static void
525amalgamate(const char **atts,
526           apr_hash_t *ht,
527           svn_boolean_t preserve,
528           apr_pool_t *pool)
529{
530  const char *key;
531
532  if (atts)
533    for (key = *atts; key; key = *(++atts))
534      {
535        const char *val = *(++atts);
536        size_t keylen;
537        assert(key != NULL);
538        /* kff todo: should we also insist that val be non-null here?
539           Probably. */
540
541        keylen = strlen(key);
542        if (preserve && ((apr_hash_get(ht, key, keylen)) != NULL))
543          continue;
544        else
545          apr_hash_set(ht, apr_pstrndup(pool, key, keylen), keylen,
546                       val ? apr_pstrdup(pool, val) : NULL);
547      }
548}
549
550
551apr_hash_t *
552svn_xml_ap_to_hash(va_list ap, apr_pool_t *pool)
553{
554  apr_hash_t *ht = apr_hash_make(pool);
555  const char *key;
556
557  while ((key = va_arg(ap, char *)) != NULL)
558    {
559      const char *val = va_arg(ap, const char *);
560      svn_hash_sets(ht, key, val);
561    }
562
563  return ht;
564}
565
566
567apr_hash_t *
568svn_xml_make_att_hash(const char **atts, apr_pool_t *pool)
569{
570  apr_hash_t *ht = apr_hash_make(pool);
571  amalgamate(atts, ht, 0, pool);  /* third arg irrelevant in this case */
572  return ht;
573}
574
575
576void
577svn_xml_hash_atts_overlaying(const char **atts,
578                             apr_hash_t *ht,
579                             apr_pool_t *pool)
580{
581  amalgamate(atts, ht, 0, pool);
582}
583
584
585void
586svn_xml_hash_atts_preserving(const char **atts,
587                             apr_hash_t *ht,
588                             apr_pool_t *pool)
589{
590  amalgamate(atts, ht, 1, pool);
591}
592
593
594
595/*** Making XML tags. ***/
596
597
598void
599svn_xml_make_open_tag_hash(svn_stringbuf_t **str,
600                           apr_pool_t *pool,
601                           enum svn_xml_open_tag_style style,
602                           const char *tagname,
603                           apr_hash_t *attributes)
604{
605  apr_hash_index_t *hi;
606  apr_size_t est_size = strlen(tagname) + 4 + apr_hash_count(attributes) * 30;
607
608  if (*str == NULL)
609    *str = svn_stringbuf_create_ensure(est_size, pool);
610
611  svn_stringbuf_appendcstr(*str, "<");
612  svn_stringbuf_appendcstr(*str, tagname);
613
614  for (hi = apr_hash_first(pool, attributes); hi; hi = apr_hash_next(hi))
615    {
616      const void *key;
617      void *val;
618
619      apr_hash_this(hi, &key, NULL, &val);
620      assert(val != NULL);
621
622      svn_stringbuf_appendcstr(*str, "\n   ");
623      svn_stringbuf_appendcstr(*str, key);
624      svn_stringbuf_appendcstr(*str, "=\"");
625      svn_xml_escape_attr_cstring(str, val, pool);
626      svn_stringbuf_appendcstr(*str, "\"");
627    }
628
629  if (style == svn_xml_self_closing)
630    svn_stringbuf_appendcstr(*str, "/");
631  svn_stringbuf_appendcstr(*str, ">");
632  if (style != svn_xml_protect_pcdata)
633    svn_stringbuf_appendcstr(*str, "\n");
634}
635
636
637void
638svn_xml_make_open_tag_v(svn_stringbuf_t **str,
639                        apr_pool_t *pool,
640                        enum svn_xml_open_tag_style style,
641                        const char *tagname,
642                        va_list ap)
643{
644  apr_pool_t *subpool = svn_pool_create(pool);
645  apr_hash_t *ht = svn_xml_ap_to_hash(ap, subpool);
646
647  svn_xml_make_open_tag_hash(str, pool, style, tagname, ht);
648  svn_pool_destroy(subpool);
649}
650
651
652
653void
654svn_xml_make_open_tag(svn_stringbuf_t **str,
655                      apr_pool_t *pool,
656                      enum svn_xml_open_tag_style style,
657                      const char *tagname,
658                      ...)
659{
660  va_list ap;
661
662  va_start(ap, tagname);
663  svn_xml_make_open_tag_v(str, pool, style, tagname, ap);
664  va_end(ap);
665}
666
667
668void svn_xml_make_close_tag(svn_stringbuf_t **str,
669                            apr_pool_t *pool,
670                            const char *tagname)
671{
672  if (*str == NULL)
673    *str = svn_stringbuf_create_empty(pool);
674
675  svn_stringbuf_appendcstr(*str, "</");
676  svn_stringbuf_appendcstr(*str, tagname);
677  svn_stringbuf_appendcstr(*str, ">\n");
678}
679