xml.c revision 302408
1/* 2 * xml.c: xml helper code shared among the Subversion libraries. 3 * 4 * ==================================================================== 5 * Licensed to the Apache Software Foundation (ASF) under one 6 * or more contributor license agreements. See the NOTICE file 7 * distributed with this work for additional information 8 * regarding copyright ownership. The ASF licenses this file 9 * to you under the Apache License, Version 2.0 (the 10 * "License"); you may not use this file except in compliance 11 * with the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, 16 * software distributed under the License is distributed on an 17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 18 * KIND, either express or implied. See the License for the 19 * specific language governing permissions and limitations 20 * under the License. 21 * ==================================================================== 22 */ 23 24 25 26#include <string.h> 27#include <assert.h> 28 29#include "svn_private_config.h" /* for SVN_HAVE_OLD_EXPAT */ 30#include "svn_hash.h" 31#include "svn_pools.h" 32#include "svn_xml.h" 33#include "svn_error.h" 34#include "svn_ctype.h" 35 36#include "private/svn_utf_private.h" 37#include "private/svn_subr_private.h" 38 39#ifdef SVN_HAVE_OLD_EXPAT 40#include <xmlparse.h> 41#else 42#include <expat.h> 43#endif 44 45#ifdef XML_UNICODE 46#error Expat is unusable -- it has been compiled for wide characters 47#endif 48 49const char * 50svn_xml__compiled_version(void) 51{ 52 static const char xml_version_str[] = APR_STRINGIFY(XML_MAJOR_VERSION) 53 "." APR_STRINGIFY(XML_MINOR_VERSION) 54 "." APR_STRINGIFY(XML_MICRO_VERSION); 55 56 return xml_version_str; 57} 58 59const char * 60svn_xml__runtime_version(void) 61{ 62 const char *expat_version = XML_ExpatVersion(); 63 64 if (!strncmp(expat_version, "expat_", 6)) 65 expat_version += 6; 66 67 return expat_version; 68} 69 70 71/* The private internals for a parser object. */ 72struct svn_xml_parser_t 73{ 74 /** the expat parser */ 75 XML_Parser parser; 76 77 /** the SVN callbacks to call from the Expat callbacks */ 78 svn_xml_start_elem start_handler; 79 svn_xml_end_elem end_handler; 80 svn_xml_char_data data_handler; 81 82 /** the user's baton for private data */ 83 void *baton; 84 85 /** if non-@c NULL, an error happened while parsing */ 86 svn_error_t *error; 87 88 /** where this object is allocated, so we can free it easily */ 89 apr_pool_t *pool; 90 91}; 92 93 94/*** XML character validation ***/ 95 96svn_boolean_t 97svn_xml_is_xml_safe(const char *data, apr_size_t len) 98{ 99 const char *end = data + len; 100 const char *p; 101 102 if (! svn_utf__is_valid(data, len)) 103 return FALSE; 104 105 for (p = data; p < end; p++) 106 { 107 unsigned char c = *p; 108 109 if (svn_ctype_iscntrl(c)) 110 { 111 if ((c != SVN_CTYPE_ASCII_TAB) 112 && (c != SVN_CTYPE_ASCII_LINEFEED) 113 && (c != SVN_CTYPE_ASCII_CARRIAGERETURN) 114 && (c != SVN_CTYPE_ASCII_DELETE)) 115 return FALSE; 116 } 117 } 118 return TRUE; 119} 120 121 122 123 124 125/*** XML escaping. ***/ 126 127/* ### ...? 128 * 129 * If *OUTSTR is @c NULL, set *OUTSTR to a new stringbuf allocated 130 * in POOL, else append to the existing stringbuf there. 131 */ 132static void 133xml_escape_cdata(svn_stringbuf_t **outstr, 134 const char *data, 135 apr_size_t len, 136 apr_pool_t *pool) 137{ 138 const char *end = data + len; 139 const char *p = data, *q; 140 141 if (*outstr == NULL) 142 *outstr = svn_stringbuf_create_empty(pool); 143 144 while (1) 145 { 146 /* Find a character which needs to be quoted and append bytes up 147 to that point. Strictly speaking, '>' only needs to be 148 quoted if it follows "]]", but it's easier to quote it all 149 the time. 150 151 So, why are we escaping '\r' here? Well, according to the 152 XML spec, '\r\n' gets converted to '\n' during XML parsing. 153 Also, any '\r' not followed by '\n' is converted to '\n'. By 154 golly, if we say we want to escape a '\r', we want to make 155 sure it remains a '\r'! */ 156 q = p; 157 while (q < end && *q != '&' && *q != '<' && *q != '>' && *q != '\r') 158 q++; 159 svn_stringbuf_appendbytes(*outstr, p, q - p); 160 161 /* We may already be a winner. */ 162 if (q == end) 163 break; 164 165 /* Append the entity reference for the character. */ 166 if (*q == '&') 167 svn_stringbuf_appendcstr(*outstr, "&"); 168 else if (*q == '<') 169 svn_stringbuf_appendcstr(*outstr, "<"); 170 else if (*q == '>') 171 svn_stringbuf_appendcstr(*outstr, ">"); 172 else if (*q == '\r') 173 svn_stringbuf_appendcstr(*outstr, " "); 174 175 p = q + 1; 176 } 177} 178 179/* Essentially the same as xml_escape_cdata, with the addition of 180 whitespace and quote characters. */ 181static void 182xml_escape_attr(svn_stringbuf_t **outstr, 183 const char *data, 184 apr_size_t len, 185 apr_pool_t *pool) 186{ 187 const char *end = data + len; 188 const char *p = data, *q; 189 190 if (*outstr == NULL) 191 *outstr = svn_stringbuf_create_ensure(len, pool); 192 193 while (1) 194 { 195 /* Find a character which needs to be quoted and append bytes up 196 to that point. */ 197 q = p; 198 while (q < end && *q != '&' && *q != '<' && *q != '>' 199 && *q != '"' && *q != '\'' && *q != '\r' 200 && *q != '\n' && *q != '\t') 201 q++; 202 svn_stringbuf_appendbytes(*outstr, p, q - p); 203 204 /* We may already be a winner. */ 205 if (q == end) 206 break; 207 208 /* Append the entity reference for the character. */ 209 if (*q == '&') 210 svn_stringbuf_appendcstr(*outstr, "&"); 211 else if (*q == '<') 212 svn_stringbuf_appendcstr(*outstr, "<"); 213 else if (*q == '>') 214 svn_stringbuf_appendcstr(*outstr, ">"); 215 else if (*q == '"') 216 svn_stringbuf_appendcstr(*outstr, """); 217 else if (*q == '\'') 218 svn_stringbuf_appendcstr(*outstr, "'"); 219 else if (*q == '\r') 220 svn_stringbuf_appendcstr(*outstr, " "); 221 else if (*q == '\n') 222 svn_stringbuf_appendcstr(*outstr, " "); 223 else if (*q == '\t') 224 svn_stringbuf_appendcstr(*outstr, "	"); 225 226 p = q + 1; 227 } 228} 229 230 231void 232svn_xml_escape_cdata_stringbuf(svn_stringbuf_t **outstr, 233 const svn_stringbuf_t *string, 234 apr_pool_t *pool) 235{ 236 xml_escape_cdata(outstr, string->data, string->len, pool); 237} 238 239 240void 241svn_xml_escape_cdata_string(svn_stringbuf_t **outstr, 242 const svn_string_t *string, 243 apr_pool_t *pool) 244{ 245 xml_escape_cdata(outstr, string->data, string->len, pool); 246} 247 248 249void 250svn_xml_escape_cdata_cstring(svn_stringbuf_t **outstr, 251 const char *string, 252 apr_pool_t *pool) 253{ 254 xml_escape_cdata(outstr, string, (apr_size_t) strlen(string), pool); 255} 256 257 258void 259svn_xml_escape_attr_stringbuf(svn_stringbuf_t **outstr, 260 const svn_stringbuf_t *string, 261 apr_pool_t *pool) 262{ 263 xml_escape_attr(outstr, string->data, string->len, pool); 264} 265 266 267void 268svn_xml_escape_attr_string(svn_stringbuf_t **outstr, 269 const svn_string_t *string, 270 apr_pool_t *pool) 271{ 272 xml_escape_attr(outstr, string->data, string->len, pool); 273} 274 275 276void 277svn_xml_escape_attr_cstring(svn_stringbuf_t **outstr, 278 const char *string, 279 apr_pool_t *pool) 280{ 281 xml_escape_attr(outstr, string, (apr_size_t) strlen(string), pool); 282} 283 284 285const char * 286svn_xml_fuzzy_escape(const char *string, apr_pool_t *pool) 287{ 288 const char *end = string + strlen(string); 289 const char *p = string, *q; 290 svn_stringbuf_t *outstr; 291 char escaped_char[6]; /* ? \ u u u \0 */ 292 293 for (q = p; q < end; q++) 294 { 295 if (svn_ctype_iscntrl(*q) 296 && ! ((*q == '\n') || (*q == '\r') || (*q == '\t'))) 297 break; 298 } 299 300 /* Return original string if no unsafe characters found. */ 301 if (q == end) 302 return string; 303 304 outstr = svn_stringbuf_create_empty(pool); 305 while (1) 306 { 307 q = p; 308 309 /* Traverse till either unsafe character or eos. */ 310 while ((q < end) 311 && ((! svn_ctype_iscntrl(*q)) 312 || (*q == '\n') || (*q == '\r') || (*q == '\t'))) 313 q++; 314 315 /* copy chunk before marker */ 316 svn_stringbuf_appendbytes(outstr, p, q - p); 317 318 if (q == end) 319 break; 320 321 /* Append an escaped version of the unsafe character. 322 323 ### This format was chosen for consistency with 324 ### svn_utf__cstring_from_utf8_fuzzy(). The two functions 325 ### should probably share code, even though they escape 326 ### different characters. 327 */ 328 apr_snprintf(escaped_char, sizeof(escaped_char), "?\\%03u", 329 (unsigned char) *q); 330 svn_stringbuf_appendcstr(outstr, escaped_char); 331 332 p = q + 1; 333 } 334 335 return outstr->data; 336} 337 338 339/*** Map from the Expat callback types to the SVN XML types. ***/ 340 341static void expat_start_handler(void *userData, 342 const XML_Char *name, 343 const XML_Char **atts) 344{ 345 svn_xml_parser_t *svn_parser = userData; 346 347 (*svn_parser->start_handler)(svn_parser->baton, name, atts); 348} 349 350static void expat_end_handler(void *userData, const XML_Char *name) 351{ 352 svn_xml_parser_t *svn_parser = userData; 353 354 (*svn_parser->end_handler)(svn_parser->baton, name); 355} 356 357static void expat_data_handler(void *userData, const XML_Char *s, int len) 358{ 359 svn_xml_parser_t *svn_parser = userData; 360 361 (*svn_parser->data_handler)(svn_parser->baton, s, (apr_size_t)len); 362} 363 364 365/*** Making a parser. ***/ 366 367svn_xml_parser_t * 368svn_xml_make_parser(void *baton, 369 svn_xml_start_elem start_handler, 370 svn_xml_end_elem end_handler, 371 svn_xml_char_data data_handler, 372 apr_pool_t *pool) 373{ 374 svn_xml_parser_t *svn_parser; 375 apr_pool_t *subpool; 376 377 XML_Parser parser = XML_ParserCreate(NULL); 378 379 XML_SetElementHandler(parser, 380 start_handler ? expat_start_handler : NULL, 381 end_handler ? expat_end_handler : NULL); 382 XML_SetCharacterDataHandler(parser, 383 data_handler ? expat_data_handler : NULL); 384 385 /* ### we probably don't want this pool; or at least we should pass it 386 ### to the callbacks and clear it periodically. */ 387 subpool = svn_pool_create(pool); 388 389 svn_parser = apr_pcalloc(subpool, sizeof(*svn_parser)); 390 391 svn_parser->parser = parser; 392 svn_parser->start_handler = start_handler; 393 svn_parser->end_handler = end_handler; 394 svn_parser->data_handler = data_handler; 395 svn_parser->baton = baton; 396 svn_parser->pool = subpool; 397 398 /* store our parser info as the UserData in the Expat parser */ 399 XML_SetUserData(parser, svn_parser); 400 401 return svn_parser; 402} 403 404 405/* Free a parser */ 406void 407svn_xml_free_parser(svn_xml_parser_t *svn_parser) 408{ 409 /* Free the expat parser */ 410 XML_ParserFree(svn_parser->parser); 411 412 /* Free the subversion parser */ 413 svn_pool_destroy(svn_parser->pool); 414} 415 416 417 418 419svn_error_t * 420svn_xml_parse(svn_xml_parser_t *svn_parser, 421 const char *buf, 422 apr_size_t len, 423 svn_boolean_t is_final) 424{ 425 svn_error_t *err; 426 int success; 427 428 /* Parse some xml data */ 429 success = XML_Parse(svn_parser->parser, buf, (int) len, is_final); 430 431 /* If expat choked internally, return its error. */ 432 if (! success) 433 { 434 /* Line num is "int" in Expat v1, "long" in v2; hide the difference. */ 435 long line = XML_GetCurrentLineNumber(svn_parser->parser); 436 437 err = svn_error_createf 438 (SVN_ERR_XML_MALFORMED, NULL, 439 _("Malformed XML: %s at line %ld"), 440 XML_ErrorString(XML_GetErrorCode(svn_parser->parser)), line); 441 442 /* Kill all parsers and return the expat error */ 443 svn_xml_free_parser(svn_parser); 444 return err; 445 } 446 447 /* Did an error occur somewhere *inside* the expat callbacks? */ 448 if (svn_parser->error) 449 { 450 err = svn_parser->error; 451 svn_xml_free_parser(svn_parser); 452 return err; 453 } 454 455 return SVN_NO_ERROR; 456} 457 458 459 460void svn_xml_signal_bailout(svn_error_t *error, 461 svn_xml_parser_t *svn_parser) 462{ 463 /* This will cause the current XML_Parse() call to finish quickly! */ 464 XML_SetElementHandler(svn_parser->parser, NULL, NULL); 465 XML_SetCharacterDataHandler(svn_parser->parser, NULL); 466 467 /* Once outside of XML_Parse(), the existence of this field will 468 cause svn_delta_parse()'s main read-loop to return error. */ 469 svn_parser->error = error; 470} 471 472 473 474 475 476 477 478 479/*** Attribute walking. ***/ 480 481const char * 482svn_xml_get_attr_value(const char *name, const char *const *atts) 483{ 484 while (atts && (*atts)) 485 { 486 if (strcmp(atts[0], name) == 0) 487 return atts[1]; 488 else 489 atts += 2; /* continue looping */ 490 } 491 492 /* Else no such attribute name seen. */ 493 return NULL; 494} 495 496 497 498/*** Printing XML ***/ 499 500void 501svn_xml_make_header2(svn_stringbuf_t **str, const char *encoding, 502 apr_pool_t *pool) 503{ 504 505 if (*str == NULL) 506 *str = svn_stringbuf_create_empty(pool); 507 svn_stringbuf_appendcstr(*str, "<?xml version=\"1.0\""); 508 if (encoding) 509 { 510 encoding = apr_psprintf(pool, " encoding=\"%s\"", encoding); 511 svn_stringbuf_appendcstr(*str, encoding); 512 } 513 svn_stringbuf_appendcstr(*str, "?>\n"); 514} 515 516 517 518/*** Creating attribute hashes. ***/ 519 520/* Combine an existing attribute list ATTS with a HASH that itself 521 represents an attribute list. Iff PRESERVE is true, then no value 522 already in HASH will be changed, else values from ATTS will 523 override previous values in HASH. */ 524static void 525amalgamate(const char **atts, 526 apr_hash_t *ht, 527 svn_boolean_t preserve, 528 apr_pool_t *pool) 529{ 530 const char *key; 531 532 if (atts) 533 for (key = *atts; key; key = *(++atts)) 534 { 535 const char *val = *(++atts); 536 size_t keylen; 537 assert(key != NULL); 538 /* kff todo: should we also insist that val be non-null here? 539 Probably. */ 540 541 keylen = strlen(key); 542 if (preserve && ((apr_hash_get(ht, key, keylen)) != NULL)) 543 continue; 544 else 545 apr_hash_set(ht, apr_pstrndup(pool, key, keylen), keylen, 546 val ? apr_pstrdup(pool, val) : NULL); 547 } 548} 549 550 551apr_hash_t * 552svn_xml_ap_to_hash(va_list ap, apr_pool_t *pool) 553{ 554 apr_hash_t *ht = apr_hash_make(pool); 555 const char *key; 556 557 while ((key = va_arg(ap, char *)) != NULL) 558 { 559 const char *val = va_arg(ap, const char *); 560 svn_hash_sets(ht, key, val); 561 } 562 563 return ht; 564} 565 566 567apr_hash_t * 568svn_xml_make_att_hash(const char **atts, apr_pool_t *pool) 569{ 570 apr_hash_t *ht = apr_hash_make(pool); 571 amalgamate(atts, ht, 0, pool); /* third arg irrelevant in this case */ 572 return ht; 573} 574 575 576void 577svn_xml_hash_atts_overlaying(const char **atts, 578 apr_hash_t *ht, 579 apr_pool_t *pool) 580{ 581 amalgamate(atts, ht, 0, pool); 582} 583 584 585void 586svn_xml_hash_atts_preserving(const char **atts, 587 apr_hash_t *ht, 588 apr_pool_t *pool) 589{ 590 amalgamate(atts, ht, 1, pool); 591} 592 593 594 595/*** Making XML tags. ***/ 596 597 598void 599svn_xml_make_open_tag_hash(svn_stringbuf_t **str, 600 apr_pool_t *pool, 601 enum svn_xml_open_tag_style style, 602 const char *tagname, 603 apr_hash_t *attributes) 604{ 605 apr_hash_index_t *hi; 606 apr_size_t est_size = strlen(tagname) + 4 + apr_hash_count(attributes) * 30; 607 608 if (*str == NULL) 609 *str = svn_stringbuf_create_ensure(est_size, pool); 610 611 svn_stringbuf_appendcstr(*str, "<"); 612 svn_stringbuf_appendcstr(*str, tagname); 613 614 for (hi = apr_hash_first(pool, attributes); hi; hi = apr_hash_next(hi)) 615 { 616 const void *key; 617 void *val; 618 619 apr_hash_this(hi, &key, NULL, &val); 620 assert(val != NULL); 621 622 svn_stringbuf_appendcstr(*str, "\n "); 623 svn_stringbuf_appendcstr(*str, key); 624 svn_stringbuf_appendcstr(*str, "=\""); 625 svn_xml_escape_attr_cstring(str, val, pool); 626 svn_stringbuf_appendcstr(*str, "\""); 627 } 628 629 if (style == svn_xml_self_closing) 630 svn_stringbuf_appendcstr(*str, "/"); 631 svn_stringbuf_appendcstr(*str, ">"); 632 if (style != svn_xml_protect_pcdata) 633 svn_stringbuf_appendcstr(*str, "\n"); 634} 635 636 637void 638svn_xml_make_open_tag_v(svn_stringbuf_t **str, 639 apr_pool_t *pool, 640 enum svn_xml_open_tag_style style, 641 const char *tagname, 642 va_list ap) 643{ 644 apr_pool_t *subpool = svn_pool_create(pool); 645 apr_hash_t *ht = svn_xml_ap_to_hash(ap, subpool); 646 647 svn_xml_make_open_tag_hash(str, pool, style, tagname, ht); 648 svn_pool_destroy(subpool); 649} 650 651 652 653void 654svn_xml_make_open_tag(svn_stringbuf_t **str, 655 apr_pool_t *pool, 656 enum svn_xml_open_tag_style style, 657 const char *tagname, 658 ...) 659{ 660 va_list ap; 661 662 va_start(ap, tagname); 663 svn_xml_make_open_tag_v(str, pool, style, tagname, ap); 664 va_end(ap); 665} 666 667 668void svn_xml_make_close_tag(svn_stringbuf_t **str, 669 apr_pool_t *pool, 670 const char *tagname) 671{ 672 if (*str == NULL) 673 *str = svn_stringbuf_create_empty(pool); 674 675 svn_stringbuf_appendcstr(*str, "</"); 676 svn_stringbuf_appendcstr(*str, tagname); 677 svn_stringbuf_appendcstr(*str, ">\n"); 678} 679