xml.c revision 251881
1/*
2 * xml.c:  xml helper code shared among the Subversion libraries.
3 *
4 * ====================================================================
5 *    Licensed to the Apache Software Foundation (ASF) under one
6 *    or more contributor license agreements.  See the NOTICE file
7 *    distributed with this work for additional information
8 *    regarding copyright ownership.  The ASF licenses this file
9 *    to you under the Apache License, Version 2.0 (the
10 *    "License"); you may not use this file except in compliance
11 *    with the License.  You may obtain a copy of the License at
12 *
13 *      http://www.apache.org/licenses/LICENSE-2.0
14 *
15 *    Unless required by applicable law or agreed to in writing,
16 *    software distributed under the License is distributed on an
17 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 *    KIND, either express or implied.  See the License for the
19 *    specific language governing permissions and limitations
20 *    under the License.
21 * ====================================================================
22 */
23
24
25
26#include <string.h>
27#include <assert.h>
28
29#include "svn_private_config.h"         /* for SVN_HAVE_OLD_EXPAT */
30#include "svn_hash.h"
31#include "svn_pools.h"
32#include "svn_xml.h"
33#include "svn_error.h"
34#include "svn_ctype.h"
35
36#include "private/svn_utf_private.h"
37
38#ifdef SVN_HAVE_OLD_EXPAT
39#include <xmlparse.h>
40#else
41#include <expat.h>
42#endif
43
44#ifdef XML_UNICODE
45#error Expat is unusable -- it has been compiled for wide characters
46#endif
47
48/* The private internals for a parser object. */
49struct svn_xml_parser_t
50{
51  /** the expat parser */
52  XML_Parser parser;
53
54  /** the SVN callbacks to call from the Expat callbacks */
55  svn_xml_start_elem start_handler;
56  svn_xml_end_elem end_handler;
57  svn_xml_char_data data_handler;
58
59  /** the user's baton for private data */
60  void *baton;
61
62  /** if non-@c NULL, an error happened while parsing */
63  svn_error_t *error;
64
65  /** where this object is allocated, so we can free it easily */
66  apr_pool_t *pool;
67
68};
69
70
71/*** XML character validation ***/
72
73svn_boolean_t
74svn_xml_is_xml_safe(const char *data, apr_size_t len)
75{
76  const char *end = data + len;
77  const char *p;
78
79  if (! svn_utf__is_valid(data, len))
80    return FALSE;
81
82  for (p = data; p < end; p++)
83    {
84      unsigned char c = *p;
85
86      if (svn_ctype_iscntrl(c))
87        {
88          if ((c != SVN_CTYPE_ASCII_TAB)
89              && (c != SVN_CTYPE_ASCII_LINEFEED)
90              && (c != SVN_CTYPE_ASCII_CARRIAGERETURN)
91              && (c != SVN_CTYPE_ASCII_DELETE))
92            return FALSE;
93        }
94    }
95  return TRUE;
96}
97
98
99
100
101
102/*** XML escaping. ***/
103
104/* ### ...?
105 *
106 * If *OUTSTR is @c NULL, set *OUTSTR to a new stringbuf allocated
107 * in POOL, else append to the existing stringbuf there.
108 */
109static void
110xml_escape_cdata(svn_stringbuf_t **outstr,
111                 const char *data,
112                 apr_size_t len,
113                 apr_pool_t *pool)
114{
115  const char *end = data + len;
116  const char *p = data, *q;
117
118  if (*outstr == NULL)
119    *outstr = svn_stringbuf_create_empty(pool);
120
121  while (1)
122    {
123      /* Find a character which needs to be quoted and append bytes up
124         to that point.  Strictly speaking, '>' only needs to be
125         quoted if it follows "]]", but it's easier to quote it all
126         the time.
127
128         So, why are we escaping '\r' here?  Well, according to the
129         XML spec, '\r\n' gets converted to '\n' during XML parsing.
130         Also, any '\r' not followed by '\n' is converted to '\n'.  By
131         golly, if we say we want to escape a '\r', we want to make
132         sure it remains a '\r'!  */
133      q = p;
134      while (q < end && *q != '&' && *q != '<' && *q != '>' && *q != '\r')
135        q++;
136      svn_stringbuf_appendbytes(*outstr, p, q - p);
137
138      /* We may already be a winner.  */
139      if (q == end)
140        break;
141
142      /* Append the entity reference for the character.  */
143      if (*q == '&')
144        svn_stringbuf_appendcstr(*outstr, "&amp;");
145      else if (*q == '<')
146        svn_stringbuf_appendcstr(*outstr, "&lt;");
147      else if (*q == '>')
148        svn_stringbuf_appendcstr(*outstr, "&gt;");
149      else if (*q == '\r')
150        svn_stringbuf_appendcstr(*outstr, "&#13;");
151
152      p = q + 1;
153    }
154}
155
156/* Essentially the same as xml_escape_cdata, with the addition of
157   whitespace and quote characters. */
158static void
159xml_escape_attr(svn_stringbuf_t **outstr,
160                const char *data,
161                apr_size_t len,
162                apr_pool_t *pool)
163{
164  const char *end = data + len;
165  const char *p = data, *q;
166
167  if (*outstr == NULL)
168    *outstr = svn_stringbuf_create_ensure(len, pool);
169
170  while (1)
171    {
172      /* Find a character which needs to be quoted and append bytes up
173         to that point. */
174      q = p;
175      while (q < end && *q != '&' && *q != '<' && *q != '>'
176             && *q != '"' && *q != '\'' && *q != '\r'
177             && *q != '\n' && *q != '\t')
178        q++;
179      svn_stringbuf_appendbytes(*outstr, p, q - p);
180
181      /* We may already be a winner.  */
182      if (q == end)
183        break;
184
185      /* Append the entity reference for the character.  */
186      if (*q == '&')
187        svn_stringbuf_appendcstr(*outstr, "&amp;");
188      else if (*q == '<')
189        svn_stringbuf_appendcstr(*outstr, "&lt;");
190      else if (*q == '>')
191        svn_stringbuf_appendcstr(*outstr, "&gt;");
192      else if (*q == '"')
193        svn_stringbuf_appendcstr(*outstr, "&quot;");
194      else if (*q == '\'')
195        svn_stringbuf_appendcstr(*outstr, "&apos;");
196      else if (*q == '\r')
197        svn_stringbuf_appendcstr(*outstr, "&#13;");
198      else if (*q == '\n')
199        svn_stringbuf_appendcstr(*outstr, "&#10;");
200      else if (*q == '\t')
201        svn_stringbuf_appendcstr(*outstr, "&#9;");
202
203      p = q + 1;
204    }
205}
206
207
208void
209svn_xml_escape_cdata_stringbuf(svn_stringbuf_t **outstr,
210                               const svn_stringbuf_t *string,
211                               apr_pool_t *pool)
212{
213  xml_escape_cdata(outstr, string->data, string->len, pool);
214}
215
216
217void
218svn_xml_escape_cdata_string(svn_stringbuf_t **outstr,
219                            const svn_string_t *string,
220                            apr_pool_t *pool)
221{
222  xml_escape_cdata(outstr, string->data, string->len, pool);
223}
224
225
226void
227svn_xml_escape_cdata_cstring(svn_stringbuf_t **outstr,
228                             const char *string,
229                             apr_pool_t *pool)
230{
231  xml_escape_cdata(outstr, string, (apr_size_t) strlen(string), pool);
232}
233
234
235void
236svn_xml_escape_attr_stringbuf(svn_stringbuf_t **outstr,
237                              const svn_stringbuf_t *string,
238                              apr_pool_t *pool)
239{
240  xml_escape_attr(outstr, string->data, string->len, pool);
241}
242
243
244void
245svn_xml_escape_attr_string(svn_stringbuf_t **outstr,
246                           const svn_string_t *string,
247                           apr_pool_t *pool)
248{
249  xml_escape_attr(outstr, string->data, string->len, pool);
250}
251
252
253void
254svn_xml_escape_attr_cstring(svn_stringbuf_t **outstr,
255                            const char *string,
256                            apr_pool_t *pool)
257{
258  xml_escape_attr(outstr, string, (apr_size_t) strlen(string), pool);
259}
260
261
262const char *
263svn_xml_fuzzy_escape(const char *string, apr_pool_t *pool)
264{
265  const char *end = string + strlen(string);
266  const char *p = string, *q;
267  svn_stringbuf_t *outstr;
268  char escaped_char[6];   /* ? \ u u u \0 */
269
270  for (q = p; q < end; q++)
271    {
272      if (svn_ctype_iscntrl(*q)
273          && ! ((*q == '\n') || (*q == '\r') || (*q == '\t')))
274        break;
275    }
276
277  /* Return original string if no unsafe characters found. */
278  if (q == end)
279    return string;
280
281  outstr = svn_stringbuf_create_empty(pool);
282  while (1)
283    {
284      q = p;
285
286      /* Traverse till either unsafe character or eos. */
287      while ((q < end)
288             && ((! svn_ctype_iscntrl(*q))
289                 || (*q == '\n') || (*q == '\r') || (*q == '\t')))
290        q++;
291
292      /* copy chunk before marker */
293      svn_stringbuf_appendbytes(outstr, p, q - p);
294
295      if (q == end)
296        break;
297
298      /* Append an escaped version of the unsafe character.
299
300         ### This format was chosen for consistency with
301         ### svn_utf__cstring_from_utf8_fuzzy().  The two functions
302         ### should probably share code, even though they escape
303         ### different characters.
304      */
305      apr_snprintf(escaped_char, sizeof(escaped_char), "?\\%03u",
306                   (unsigned char) *q);
307      svn_stringbuf_appendcstr(outstr, escaped_char);
308
309      p = q + 1;
310    }
311
312  return outstr->data;
313}
314
315
316/*** Map from the Expat callback types to the SVN XML types. ***/
317
318static void expat_start_handler(void *userData,
319                                const XML_Char *name,
320                                const XML_Char **atts)
321{
322  svn_xml_parser_t *svn_parser = userData;
323
324  (*svn_parser->start_handler)(svn_parser->baton, name, atts);
325}
326
327static void expat_end_handler(void *userData, const XML_Char *name)
328{
329  svn_xml_parser_t *svn_parser = userData;
330
331  (*svn_parser->end_handler)(svn_parser->baton, name);
332}
333
334static void expat_data_handler(void *userData, const XML_Char *s, int len)
335{
336  svn_xml_parser_t *svn_parser = userData;
337
338  (*svn_parser->data_handler)(svn_parser->baton, s, (apr_size_t)len);
339}
340
341
342/*** Making a parser. ***/
343
344svn_xml_parser_t *
345svn_xml_make_parser(void *baton,
346                    svn_xml_start_elem start_handler,
347                    svn_xml_end_elem end_handler,
348                    svn_xml_char_data data_handler,
349                    apr_pool_t *pool)
350{
351  svn_xml_parser_t *svn_parser;
352  apr_pool_t *subpool;
353
354  XML_Parser parser = XML_ParserCreate(NULL);
355
356  XML_SetElementHandler(parser,
357                        start_handler ? expat_start_handler : NULL,
358                        end_handler ? expat_end_handler : NULL);
359  XML_SetCharacterDataHandler(parser,
360                              data_handler ? expat_data_handler : NULL);
361
362  /* ### we probably don't want this pool; or at least we should pass it
363     ### to the callbacks and clear it periodically.  */
364  subpool = svn_pool_create(pool);
365
366  svn_parser = apr_pcalloc(subpool, sizeof(*svn_parser));
367
368  svn_parser->parser = parser;
369  svn_parser->start_handler = start_handler;
370  svn_parser->end_handler = end_handler;
371  svn_parser->data_handler = data_handler;
372  svn_parser->baton = baton;
373  svn_parser->pool = subpool;
374
375  /* store our parser info as the UserData in the Expat parser */
376  XML_SetUserData(parser, svn_parser);
377
378  return svn_parser;
379}
380
381
382/* Free a parser */
383void
384svn_xml_free_parser(svn_xml_parser_t *svn_parser)
385{
386  /* Free the expat parser */
387  XML_ParserFree(svn_parser->parser);
388
389  /* Free the subversion parser */
390  svn_pool_destroy(svn_parser->pool);
391}
392
393
394
395
396svn_error_t *
397svn_xml_parse(svn_xml_parser_t *svn_parser,
398              const char *buf,
399              apr_size_t len,
400              svn_boolean_t is_final)
401{
402  svn_error_t *err;
403  int success;
404
405  /* Parse some xml data */
406  success = XML_Parse(svn_parser->parser, buf, (int) len, is_final);
407
408  /* If expat choked internally, return its error. */
409  if (! success)
410    {
411      /* Line num is "int" in Expat v1, "long" in v2; hide the difference. */
412      long line = XML_GetCurrentLineNumber(svn_parser->parser);
413
414      err = svn_error_createf
415        (SVN_ERR_XML_MALFORMED, NULL,
416         _("Malformed XML: %s at line %ld"),
417         XML_ErrorString(XML_GetErrorCode(svn_parser->parser)), line);
418
419      /* Kill all parsers and return the expat error */
420      svn_xml_free_parser(svn_parser);
421      return err;
422    }
423
424  /* Did an error occur somewhere *inside* the expat callbacks? */
425  if (svn_parser->error)
426    {
427      err = svn_parser->error;
428      svn_xml_free_parser(svn_parser);
429      return err;
430    }
431
432  return SVN_NO_ERROR;
433}
434
435
436
437void svn_xml_signal_bailout(svn_error_t *error,
438                            svn_xml_parser_t *svn_parser)
439{
440  /* This will cause the current XML_Parse() call to finish quickly! */
441  XML_SetElementHandler(svn_parser->parser, NULL, NULL);
442  XML_SetCharacterDataHandler(svn_parser->parser, NULL);
443
444  /* Once outside of XML_Parse(), the existence of this field will
445     cause svn_delta_parse()'s main read-loop to return error. */
446  svn_parser->error = error;
447}
448
449
450
451
452
453
454
455
456/*** Attribute walking. ***/
457
458const char *
459svn_xml_get_attr_value(const char *name, const char *const *atts)
460{
461  while (atts && (*atts))
462    {
463      if (strcmp(atts[0], name) == 0)
464        return atts[1];
465      else
466        atts += 2; /* continue looping */
467    }
468
469  /* Else no such attribute name seen. */
470  return NULL;
471}
472
473
474
475/*** Printing XML ***/
476
477void
478svn_xml_make_header2(svn_stringbuf_t **str, const char *encoding,
479                     apr_pool_t *pool)
480{
481
482  if (*str == NULL)
483    *str = svn_stringbuf_create_empty(pool);
484  svn_stringbuf_appendcstr(*str, "<?xml version=\"1.0\"");
485  if (encoding)
486    {
487      encoding = apr_psprintf(pool, " encoding=\"%s\"", encoding);
488      svn_stringbuf_appendcstr(*str, encoding);
489    }
490  svn_stringbuf_appendcstr(*str, "?>\n");
491}
492
493
494
495/*** Creating attribute hashes. ***/
496
497/* Combine an existing attribute list ATTS with a HASH that itself
498   represents an attribute list.  Iff PRESERVE is true, then no value
499   already in HASH will be changed, else values from ATTS will
500   override previous values in HASH. */
501static void
502amalgamate(const char **atts,
503           apr_hash_t *ht,
504           svn_boolean_t preserve,
505           apr_pool_t *pool)
506{
507  const char *key;
508
509  if (atts)
510    for (key = *atts; key; key = *(++atts))
511      {
512        const char *val = *(++atts);
513        size_t keylen;
514        assert(key != NULL);
515        /* kff todo: should we also insist that val be non-null here?
516           Probably. */
517
518        keylen = strlen(key);
519        if (preserve && ((apr_hash_get(ht, key, keylen)) != NULL))
520          continue;
521        else
522          apr_hash_set(ht, apr_pstrndup(pool, key, keylen), keylen,
523                       val ? apr_pstrdup(pool, val) : NULL);
524      }
525}
526
527
528apr_hash_t *
529svn_xml_ap_to_hash(va_list ap, apr_pool_t *pool)
530{
531  apr_hash_t *ht = apr_hash_make(pool);
532  const char *key;
533
534  while ((key = va_arg(ap, char *)) != NULL)
535    {
536      const char *val = va_arg(ap, const char *);
537      svn_hash_sets(ht, key, val);
538    }
539
540  return ht;
541}
542
543
544apr_hash_t *
545svn_xml_make_att_hash(const char **atts, apr_pool_t *pool)
546{
547  apr_hash_t *ht = apr_hash_make(pool);
548  amalgamate(atts, ht, 0, pool);  /* third arg irrelevant in this case */
549  return ht;
550}
551
552
553void
554svn_xml_hash_atts_overlaying(const char **atts,
555                             apr_hash_t *ht,
556                             apr_pool_t *pool)
557{
558  amalgamate(atts, ht, 0, pool);
559}
560
561
562void
563svn_xml_hash_atts_preserving(const char **atts,
564                             apr_hash_t *ht,
565                             apr_pool_t *pool)
566{
567  amalgamate(atts, ht, 1, pool);
568}
569
570
571
572/*** Making XML tags. ***/
573
574
575void
576svn_xml_make_open_tag_hash(svn_stringbuf_t **str,
577                           apr_pool_t *pool,
578                           enum svn_xml_open_tag_style style,
579                           const char *tagname,
580                           apr_hash_t *attributes)
581{
582  apr_hash_index_t *hi;
583  apr_size_t est_size = strlen(tagname) + 4 + apr_hash_count(attributes) * 30;
584
585  if (*str == NULL)
586    *str = svn_stringbuf_create_ensure(est_size, pool);
587
588  svn_stringbuf_appendcstr(*str, "<");
589  svn_stringbuf_appendcstr(*str, tagname);
590
591  for (hi = apr_hash_first(pool, attributes); hi; hi = apr_hash_next(hi))
592    {
593      const void *key;
594      void *val;
595
596      apr_hash_this(hi, &key, NULL, &val);
597      assert(val != NULL);
598
599      svn_stringbuf_appendcstr(*str, "\n   ");
600      svn_stringbuf_appendcstr(*str, key);
601      svn_stringbuf_appendcstr(*str, "=\"");
602      svn_xml_escape_attr_cstring(str, val, pool);
603      svn_stringbuf_appendcstr(*str, "\"");
604    }
605
606  if (style == svn_xml_self_closing)
607    svn_stringbuf_appendcstr(*str, "/");
608  svn_stringbuf_appendcstr(*str, ">");
609  if (style != svn_xml_protect_pcdata)
610    svn_stringbuf_appendcstr(*str, "\n");
611}
612
613
614void
615svn_xml_make_open_tag_v(svn_stringbuf_t **str,
616                        apr_pool_t *pool,
617                        enum svn_xml_open_tag_style style,
618                        const char *tagname,
619                        va_list ap)
620{
621  apr_pool_t *subpool = svn_pool_create(pool);
622  apr_hash_t *ht = svn_xml_ap_to_hash(ap, subpool);
623
624  svn_xml_make_open_tag_hash(str, pool, style, tagname, ht);
625  svn_pool_destroy(subpool);
626}
627
628
629
630void
631svn_xml_make_open_tag(svn_stringbuf_t **str,
632                      apr_pool_t *pool,
633                      enum svn_xml_open_tag_style style,
634                      const char *tagname,
635                      ...)
636{
637  va_list ap;
638
639  va_start(ap, tagname);
640  svn_xml_make_open_tag_v(str, pool, style, tagname, ap);
641  va_end(ap);
642}
643
644
645void svn_xml_make_close_tag(svn_stringbuf_t **str,
646                            apr_pool_t *pool,
647                            const char *tagname)
648{
649  if (*str == NULL)
650    *str = svn_stringbuf_create_empty(pool);
651
652  svn_stringbuf_appendcstr(*str, "</");
653  svn_stringbuf_appendcstr(*str, tagname);
654  svn_stringbuf_appendcstr(*str, ">\n");
655}
656