1/* 2 * xml.c: xml helper code shared among the Subversion libraries. 3 * 4 * ==================================================================== 5 * Licensed to the Apache Software Foundation (ASF) under one 6 * or more contributor license agreements. See the NOTICE file 7 * distributed with this work for additional information 8 * regarding copyright ownership. The ASF licenses this file 9 * to you under the Apache License, Version 2.0 (the 10 * "License"); you may not use this file except in compliance 11 * with the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, 16 * software distributed under the License is distributed on an 17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 18 * KIND, either express or implied. See the License for the 19 * specific language governing permissions and limitations 20 * under the License. 21 * ==================================================================== 22 */ 23 24 25 26#include <string.h> 27#include <assert.h> 28 29#include "svn_private_config.h" /* for SVN_HAVE_OLD_EXPAT */ 30#include "svn_hash.h" 31#include "svn_pools.h" 32#include "svn_xml.h" 33#include "svn_error.h" 34#include "svn_ctype.h" 35 36#include "private/svn_utf_private.h" 37#include "private/svn_subr_private.h" 38 39#ifdef SVN_HAVE_OLD_EXPAT 40#include <xmlparse.h> 41#else 42#include <expat.h> 43#endif 44 45#ifndef XML_VERSION_AT_LEAST 46#define XML_VERSION_AT_LEAST(major,minor,patch) \ 47(((major) < XML_MAJOR_VERSION) \ 48 || ((major) == XML_MAJOR_VERSION && (minor) < XML_MINOR_VERSION) \ 49 || ((major) == XML_MAJOR_VERSION && (minor) == XML_MINOR_VERSION && \ 50 (patch) <= XML_MICRO_VERSION)) 51#endif /* XML_VERSION_AT_LEAST */ 52 53#ifdef XML_UNICODE 54#error Expat is unusable -- it has been compiled for wide characters 55#endif 56 57const char * 58svn_xml__compiled_version(void) 59{ 60 static const char xml_version_str[] = APR_STRINGIFY(XML_MAJOR_VERSION) 61 "." APR_STRINGIFY(XML_MINOR_VERSION) 62 "." APR_STRINGIFY(XML_MICRO_VERSION); 63 64 return xml_version_str; 65} 66 67const char * 68svn_xml__runtime_version(void) 69{ 70 const char *expat_version = XML_ExpatVersion(); 71 72 if (!strncmp(expat_version, "expat_", 6)) 73 expat_version += 6; 74 75 return expat_version; 76} 77 78 79/* The private internals for a parser object. */ 80struct svn_xml_parser_t 81{ 82 /** the expat parser */ 83 XML_Parser parser; 84 85 /** the SVN callbacks to call from the Expat callbacks */ 86 svn_xml_start_elem start_handler; 87 svn_xml_end_elem end_handler; 88 svn_xml_char_data data_handler; 89 90 /** the user's baton for private data */ 91 void *baton; 92 93 /** if non-@c NULL, an error happened while parsing */ 94 svn_error_t *error; 95 96 /** where this object is allocated, so we can free it easily */ 97 apr_pool_t *pool; 98 99}; 100 101 102/*** XML character validation ***/ 103 104svn_boolean_t 105svn_xml_is_xml_safe(const char *data, apr_size_t len) 106{ 107 const char *end = data + len; 108 const char *p; 109 110 if (! svn_utf__is_valid(data, len)) 111 return FALSE; 112 113 for (p = data; p < end; p++) 114 { 115 unsigned char c = *p; 116 117 if (svn_ctype_iscntrl(c)) 118 { 119 if ((c != SVN_CTYPE_ASCII_TAB) 120 && (c != SVN_CTYPE_ASCII_LINEFEED) 121 && (c != SVN_CTYPE_ASCII_CARRIAGERETURN) 122 && (c != SVN_CTYPE_ASCII_DELETE)) 123 return FALSE; 124 } 125 } 126 return TRUE; 127} 128 129 130 131 132 133/*** XML escaping. ***/ 134 135/* ### ...? 136 * 137 * If *OUTSTR is @c NULL, set *OUTSTR to a new stringbuf allocated 138 * in POOL, else append to the existing stringbuf there. 139 */ 140static void 141xml_escape_cdata(svn_stringbuf_t **outstr, 142 const char *data, 143 apr_size_t len, 144 apr_pool_t *pool) 145{ 146 const char *end = data + len; 147 const char *p = data, *q; 148 149 if (*outstr == NULL) 150 *outstr = svn_stringbuf_create_empty(pool); 151 152 while (1) 153 { 154 /* Find a character which needs to be quoted and append bytes up 155 to that point. Strictly speaking, '>' only needs to be 156 quoted if it follows "]]", but it's easier to quote it all 157 the time. 158 159 So, why are we escaping '\r' here? Well, according to the 160 XML spec, '\r\n' gets converted to '\n' during XML parsing. 161 Also, any '\r' not followed by '\n' is converted to '\n'. By 162 golly, if we say we want to escape a '\r', we want to make 163 sure it remains a '\r'! */ 164 q = p; 165 while (q < end && *q != '&' && *q != '<' && *q != '>' && *q != '\r') 166 q++; 167 svn_stringbuf_appendbytes(*outstr, p, q - p); 168 169 /* We may already be a winner. */ 170 if (q == end) 171 break; 172 173 /* Append the entity reference for the character. */ 174 if (*q == '&') 175 svn_stringbuf_appendcstr(*outstr, "&"); 176 else if (*q == '<') 177 svn_stringbuf_appendcstr(*outstr, "<"); 178 else if (*q == '>') 179 svn_stringbuf_appendcstr(*outstr, ">"); 180 else if (*q == '\r') 181 svn_stringbuf_appendcstr(*outstr, " "); 182 183 p = q + 1; 184 } 185} 186 187/* Essentially the same as xml_escape_cdata, with the addition of 188 whitespace and quote characters. */ 189static void 190xml_escape_attr(svn_stringbuf_t **outstr, 191 const char *data, 192 apr_size_t len, 193 apr_pool_t *pool) 194{ 195 const char *end = data + len; 196 const char *p = data, *q; 197 198 if (*outstr == NULL) 199 *outstr = svn_stringbuf_create_ensure(len, pool); 200 201 while (1) 202 { 203 /* Find a character which needs to be quoted and append bytes up 204 to that point. */ 205 q = p; 206 while (q < end && *q != '&' && *q != '<' && *q != '>' 207 && *q != '"' && *q != '\'' && *q != '\r' 208 && *q != '\n' && *q != '\t') 209 q++; 210 svn_stringbuf_appendbytes(*outstr, p, q - p); 211 212 /* We may already be a winner. */ 213 if (q == end) 214 break; 215 216 /* Append the entity reference for the character. */ 217 if (*q == '&') 218 svn_stringbuf_appendcstr(*outstr, "&"); 219 else if (*q == '<') 220 svn_stringbuf_appendcstr(*outstr, "<"); 221 else if (*q == '>') 222 svn_stringbuf_appendcstr(*outstr, ">"); 223 else if (*q == '"') 224 svn_stringbuf_appendcstr(*outstr, """); 225 else if (*q == '\'') 226 svn_stringbuf_appendcstr(*outstr, "'"); 227 else if (*q == '\r') 228 svn_stringbuf_appendcstr(*outstr, " "); 229 else if (*q == '\n') 230 svn_stringbuf_appendcstr(*outstr, " "); 231 else if (*q == '\t') 232 svn_stringbuf_appendcstr(*outstr, "	"); 233 234 p = q + 1; 235 } 236} 237 238 239void 240svn_xml_escape_cdata_stringbuf(svn_stringbuf_t **outstr, 241 const svn_stringbuf_t *string, 242 apr_pool_t *pool) 243{ 244 xml_escape_cdata(outstr, string->data, string->len, pool); 245} 246 247 248void 249svn_xml_escape_cdata_string(svn_stringbuf_t **outstr, 250 const svn_string_t *string, 251 apr_pool_t *pool) 252{ 253 xml_escape_cdata(outstr, string->data, string->len, pool); 254} 255 256 257void 258svn_xml_escape_cdata_cstring(svn_stringbuf_t **outstr, 259 const char *string, 260 apr_pool_t *pool) 261{ 262 xml_escape_cdata(outstr, string, (apr_size_t) strlen(string), pool); 263} 264 265 266void 267svn_xml_escape_attr_stringbuf(svn_stringbuf_t **outstr, 268 const svn_stringbuf_t *string, 269 apr_pool_t *pool) 270{ 271 xml_escape_attr(outstr, string->data, string->len, pool); 272} 273 274 275void 276svn_xml_escape_attr_string(svn_stringbuf_t **outstr, 277 const svn_string_t *string, 278 apr_pool_t *pool) 279{ 280 xml_escape_attr(outstr, string->data, string->len, pool); 281} 282 283 284void 285svn_xml_escape_attr_cstring(svn_stringbuf_t **outstr, 286 const char *string, 287 apr_pool_t *pool) 288{ 289 xml_escape_attr(outstr, string, (apr_size_t) strlen(string), pool); 290} 291 292 293const char * 294svn_xml_fuzzy_escape(const char *string, apr_pool_t *pool) 295{ 296 const char *end = string + strlen(string); 297 const char *p = string, *q; 298 svn_stringbuf_t *outstr; 299 char escaped_char[6]; /* ? \ u u u \0 */ 300 301 for (q = p; q < end; q++) 302 { 303 if (svn_ctype_iscntrl(*q) 304 && ! ((*q == '\n') || (*q == '\r') || (*q == '\t'))) 305 break; 306 } 307 308 /* Return original string if no unsafe characters found. */ 309 if (q == end) 310 return string; 311 312 outstr = svn_stringbuf_create_empty(pool); 313 while (1) 314 { 315 q = p; 316 317 /* Traverse till either unsafe character or eos. */ 318 while ((q < end) 319 && ((! svn_ctype_iscntrl(*q)) 320 || (*q == '\n') || (*q == '\r') || (*q == '\t'))) 321 q++; 322 323 /* copy chunk before marker */ 324 svn_stringbuf_appendbytes(outstr, p, q - p); 325 326 if (q == end) 327 break; 328 329 /* Append an escaped version of the unsafe character. 330 331 ### This format was chosen for consistency with 332 ### svn_utf__cstring_from_utf8_fuzzy(). The two functions 333 ### should probably share code, even though they escape 334 ### different characters. 335 */ 336 apr_snprintf(escaped_char, sizeof(escaped_char), "?\\%03u", 337 (unsigned char) *q); 338 svn_stringbuf_appendcstr(outstr, escaped_char); 339 340 p = q + 1; 341 } 342 343 return outstr->data; 344} 345 346 347/*** Map from the Expat callback types to the SVN XML types. ***/ 348 349static void expat_start_handler(void *userData, 350 const XML_Char *name, 351 const XML_Char **atts) 352{ 353 svn_xml_parser_t *svn_parser = userData; 354 355 (*svn_parser->start_handler)(svn_parser->baton, name, atts); 356 357#if XML_VERSION_AT_LEAST(1, 95, 8) 358 /* Stop XML parsing if svn_xml_signal_bailout() was called. 359 We cannot do this in svn_xml_signal_bailout() because Expat 360 documentation states that XML_StopParser() must be called only from 361 callbacks. */ 362 if (svn_parser->error) 363 (void) XML_StopParser(svn_parser->parser, 0 /* resumable */); 364#endif 365} 366 367static void expat_end_handler(void *userData, const XML_Char *name) 368{ 369 svn_xml_parser_t *svn_parser = userData; 370 371 (*svn_parser->end_handler)(svn_parser->baton, name); 372 373#if XML_VERSION_AT_LEAST(1, 95, 8) 374 /* Stop XML parsing if svn_xml_signal_bailout() was called. 375 We cannot do this in svn_xml_signal_bailout() because Expat 376 documentation states that XML_StopParser() must be called only from 377 callbacks. */ 378 if (svn_parser->error) 379 (void) XML_StopParser(svn_parser->parser, 0 /* resumable */); 380#endif 381} 382 383static void expat_data_handler(void *userData, const XML_Char *s, int len) 384{ 385 svn_xml_parser_t *svn_parser = userData; 386 387 (*svn_parser->data_handler)(svn_parser->baton, s, (apr_size_t)len); 388 389#if XML_VERSION_AT_LEAST(1, 95, 8) 390 /* Stop XML parsing if svn_xml_signal_bailout() was called. 391 We cannot do this in svn_xml_signal_bailout() because Expat 392 documentation states that XML_StopParser() must be called only from 393 callbacks. */ 394 if (svn_parser->error) 395 (void) XML_StopParser(svn_parser->parser, 0 /* resumable */); 396#endif 397} 398 399#if XML_VERSION_AT_LEAST(1, 95, 8) 400static void expat_entity_declaration(void *userData, 401 const XML_Char *entityName, 402 int is_parameter_entity, 403 const XML_Char *value, 404 int value_length, 405 const XML_Char *base, 406 const XML_Char *systemId, 407 const XML_Char *publicId, 408 const XML_Char *notationName) 409{ 410 svn_xml_parser_t *svn_parser = userData; 411 412 /* Stop the parser if an entity declaration is hit. */ 413 XML_StopParser(svn_parser->parser, 0 /* resumable */); 414} 415#else 416/* A noop default_handler. */ 417static void expat_default_handler(void *userData, const XML_Char *s, int len) 418{ 419} 420#endif 421 422/*** Making a parser. ***/ 423 424static apr_status_t parser_cleanup(void *data) 425{ 426 svn_xml_parser_t *svn_parser = data; 427 428 /* Free Expat parser. */ 429 if (svn_parser->parser) 430 { 431 XML_ParserFree(svn_parser->parser); 432 svn_parser->parser = NULL; 433 } 434 return APR_SUCCESS; 435} 436 437svn_xml_parser_t * 438svn_xml_make_parser(void *baton, 439 svn_xml_start_elem start_handler, 440 svn_xml_end_elem end_handler, 441 svn_xml_char_data data_handler, 442 apr_pool_t *pool) 443{ 444 svn_xml_parser_t *svn_parser; 445 XML_Parser parser = XML_ParserCreate(NULL); 446 447 XML_SetElementHandler(parser, 448 start_handler ? expat_start_handler : NULL, 449 end_handler ? expat_end_handler : NULL); 450 XML_SetCharacterDataHandler(parser, 451 data_handler ? expat_data_handler : NULL); 452 453#if XML_VERSION_AT_LEAST(1, 95, 8) 454 XML_SetEntityDeclHandler(parser, expat_entity_declaration); 455#else 456 XML_SetDefaultHandler(parser, expat_default_handler); 457#endif 458 459 svn_parser = apr_pcalloc(pool, sizeof(*svn_parser)); 460 461 svn_parser->parser = parser; 462 svn_parser->start_handler = start_handler; 463 svn_parser->end_handler = end_handler; 464 svn_parser->data_handler = data_handler; 465 svn_parser->baton = baton; 466 svn_parser->pool = pool; 467 468 /* store our parser info as the UserData in the Expat parser */ 469 XML_SetUserData(parser, svn_parser); 470 471 /* Register pool cleanup handler to free Expat XML parser on cleanup, 472 if svn_xml_free_parser() was not called explicitly. */ 473 apr_pool_cleanup_register(svn_parser->pool, svn_parser, 474 parser_cleanup, apr_pool_cleanup_null); 475 476 return svn_parser; 477} 478 479 480/* Free a parser */ 481void 482svn_xml_free_parser(svn_xml_parser_t *svn_parser) 483{ 484 apr_pool_cleanup_run(svn_parser->pool, svn_parser, parser_cleanup); 485} 486 487 488 489 490svn_error_t * 491svn_xml_parse(svn_xml_parser_t *svn_parser, 492 const char *buf, 493 apr_size_t len, 494 svn_boolean_t is_final) 495{ 496 svn_error_t *err; 497 int success; 498 499 /* Parse some xml data */ 500 success = XML_Parse(svn_parser->parser, buf, (int) len, is_final); 501 502 /* Did an error occur somewhere *inside* the expat callbacks? */ 503 if (svn_parser->error) 504 { 505 /* Kill all parsers and return the error */ 506 svn_xml_free_parser(svn_parser); 507 return svn_parser->error; 508 } 509 510 /* If expat choked internally, return its error. */ 511 if (! success) 512 { 513 /* Line num is "int" in Expat v1, "long" in v2; hide the difference. */ 514 long line = XML_GetCurrentLineNumber(svn_parser->parser); 515 516 err = svn_error_createf 517 (SVN_ERR_XML_MALFORMED, NULL, 518 _("Malformed XML: %s at line %ld"), 519 XML_ErrorString(XML_GetErrorCode(svn_parser->parser)), line); 520 521 /* Kill all parsers and return the expat error */ 522 svn_xml_free_parser(svn_parser); 523 return err; 524 } 525 526 return SVN_NO_ERROR; 527} 528 529 530 531void svn_xml_signal_bailout(svn_error_t *error, 532 svn_xml_parser_t *svn_parser) 533{ 534 /* This will cause the current XML_Parse() call to finish quickly! */ 535 XML_SetElementHandler(svn_parser->parser, NULL, NULL); 536 XML_SetCharacterDataHandler(svn_parser->parser, NULL); 537#if XML_VERSION_AT_LEAST(1, 95, 8) 538 XML_SetEntityDeclHandler(svn_parser->parser, NULL); 539#endif 540 /* Once outside of XML_Parse(), the existence of this field will 541 cause svn_delta_parse()'s main read-loop to return error. */ 542 svn_parser->error = error; 543} 544 545 546 547 548 549 550 551 552/*** Attribute walking. ***/ 553 554const char * 555svn_xml_get_attr_value(const char *name, const char *const *atts) 556{ 557 while (atts && (*atts)) 558 { 559 if (strcmp(atts[0], name) == 0) 560 return atts[1]; 561 else 562 atts += 2; /* continue looping */ 563 } 564 565 /* Else no such attribute name seen. */ 566 return NULL; 567} 568 569 570 571/*** Printing XML ***/ 572 573void 574svn_xml_make_header2(svn_stringbuf_t **str, const char *encoding, 575 apr_pool_t *pool) 576{ 577 578 if (*str == NULL) 579 *str = svn_stringbuf_create_empty(pool); 580 svn_stringbuf_appendcstr(*str, "<?xml version=\"1.0\""); 581 if (encoding) 582 { 583 encoding = apr_psprintf(pool, " encoding=\"%s\"", encoding); 584 svn_stringbuf_appendcstr(*str, encoding); 585 } 586 svn_stringbuf_appendcstr(*str, "?>\n"); 587} 588 589 590 591/*** Creating attribute hashes. ***/ 592 593/* Combine an existing attribute list ATTS with a HASH that itself 594 represents an attribute list. Iff PRESERVE is true, then no value 595 already in HASH will be changed, else values from ATTS will 596 override previous values in HASH. */ 597static void 598amalgamate(const char **atts, 599 apr_hash_t *ht, 600 svn_boolean_t preserve, 601 apr_pool_t *pool) 602{ 603 const char *key; 604 605 if (atts) 606 for (key = *atts; key; key = *(++atts)) 607 { 608 const char *val = *(++atts); 609 size_t keylen; 610 assert(key != NULL); 611 /* kff todo: should we also insist that val be non-null here? 612 Probably. */ 613 614 keylen = strlen(key); 615 if (preserve && ((apr_hash_get(ht, key, keylen)) != NULL)) 616 continue; 617 else 618 apr_hash_set(ht, apr_pstrndup(pool, key, keylen), keylen, 619 val ? apr_pstrdup(pool, val) : NULL); 620 } 621} 622 623 624apr_hash_t * 625svn_xml_ap_to_hash(va_list ap, apr_pool_t *pool) 626{ 627 apr_hash_t *ht = apr_hash_make(pool); 628 const char *key; 629 630 while ((key = va_arg(ap, char *)) != NULL) 631 { 632 const char *val = va_arg(ap, const char *); 633 svn_hash_sets(ht, key, val); 634 } 635 636 return ht; 637} 638 639 640apr_hash_t * 641svn_xml_make_att_hash(const char **atts, apr_pool_t *pool) 642{ 643 apr_hash_t *ht = apr_hash_make(pool); 644 amalgamate(atts, ht, 0, pool); /* third arg irrelevant in this case */ 645 return ht; 646} 647 648 649void 650svn_xml_hash_atts_overlaying(const char **atts, 651 apr_hash_t *ht, 652 apr_pool_t *pool) 653{ 654 amalgamate(atts, ht, 0, pool); 655} 656 657 658void 659svn_xml_hash_atts_preserving(const char **atts, 660 apr_hash_t *ht, 661 apr_pool_t *pool) 662{ 663 amalgamate(atts, ht, 1, pool); 664} 665 666 667 668/*** Making XML tags. ***/ 669 670 671void 672svn_xml_make_open_tag_hash(svn_stringbuf_t **str, 673 apr_pool_t *pool, 674 enum svn_xml_open_tag_style style, 675 const char *tagname, 676 apr_hash_t *attributes) 677{ 678 apr_hash_index_t *hi; 679 apr_size_t est_size = strlen(tagname) + 4 + apr_hash_count(attributes) * 30; 680 681 if (*str == NULL) 682 *str = svn_stringbuf_create_ensure(est_size, pool); 683 684 svn_stringbuf_appendcstr(*str, "<"); 685 svn_stringbuf_appendcstr(*str, tagname); 686 687 for (hi = apr_hash_first(pool, attributes); hi; hi = apr_hash_next(hi)) 688 { 689 const void *key; 690 void *val; 691 692 apr_hash_this(hi, &key, NULL, &val); 693 assert(val != NULL); 694 695 svn_stringbuf_appendcstr(*str, "\n "); 696 svn_stringbuf_appendcstr(*str, key); 697 svn_stringbuf_appendcstr(*str, "=\""); 698 svn_xml_escape_attr_cstring(str, val, pool); 699 svn_stringbuf_appendcstr(*str, "\""); 700 } 701 702 if (style == svn_xml_self_closing) 703 svn_stringbuf_appendcstr(*str, "/"); 704 svn_stringbuf_appendcstr(*str, ">"); 705 if (style != svn_xml_protect_pcdata) 706 svn_stringbuf_appendcstr(*str, "\n"); 707} 708 709 710void 711svn_xml_make_open_tag_v(svn_stringbuf_t **str, 712 apr_pool_t *pool, 713 enum svn_xml_open_tag_style style, 714 const char *tagname, 715 va_list ap) 716{ 717 apr_pool_t *subpool = svn_pool_create(pool); 718 apr_hash_t *ht = svn_xml_ap_to_hash(ap, subpool); 719 720 svn_xml_make_open_tag_hash(str, pool, style, tagname, ht); 721 svn_pool_destroy(subpool); 722} 723 724 725 726void 727svn_xml_make_open_tag(svn_stringbuf_t **str, 728 apr_pool_t *pool, 729 enum svn_xml_open_tag_style style, 730 const char *tagname, 731 ...) 732{ 733 va_list ap; 734 735 va_start(ap, tagname); 736 svn_xml_make_open_tag_v(str, pool, style, tagname, ap); 737 va_end(ap); 738} 739 740 741void svn_xml_make_close_tag(svn_stringbuf_t **str, 742 apr_pool_t *pool, 743 const char *tagname) 744{ 745 if (*str == NULL) 746 *str = svn_stringbuf_create_empty(pool); 747 748 svn_stringbuf_appendcstr(*str, "</"); 749 svn_stringbuf_appendcstr(*str, tagname); 750 svn_stringbuf_appendcstr(*str, ">\n"); 751} 752