xml.c revision 251881
1/* 2 * xml.c: xml helper code shared among the Subversion libraries. 3 * 4 * ==================================================================== 5 * Licensed to the Apache Software Foundation (ASF) under one 6 * or more contributor license agreements. See the NOTICE file 7 * distributed with this work for additional information 8 * regarding copyright ownership. The ASF licenses this file 9 * to you under the Apache License, Version 2.0 (the 10 * "License"); you may not use this file except in compliance 11 * with the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, 16 * software distributed under the License is distributed on an 17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 18 * KIND, either express or implied. See the License for the 19 * specific language governing permissions and limitations 20 * under the License. 21 * ==================================================================== 22 */ 23 24 25 26#include <string.h> 27#include <assert.h> 28 29#include "svn_private_config.h" /* for SVN_HAVE_OLD_EXPAT */ 30#include "svn_hash.h" 31#include "svn_pools.h" 32#include "svn_xml.h" 33#include "svn_error.h" 34#include "svn_ctype.h" 35 36#include "private/svn_utf_private.h" 37 38#ifdef SVN_HAVE_OLD_EXPAT 39#include <xmlparse.h> 40#else 41#include <expat.h> 42#endif 43 44#ifdef XML_UNICODE 45#error Expat is unusable -- it has been compiled for wide characters 46#endif 47 48/* The private internals for a parser object. */ 49struct svn_xml_parser_t 50{ 51 /** the expat parser */ 52 XML_Parser parser; 53 54 /** the SVN callbacks to call from the Expat callbacks */ 55 svn_xml_start_elem start_handler; 56 svn_xml_end_elem end_handler; 57 svn_xml_char_data data_handler; 58 59 /** the user's baton for private data */ 60 void *baton; 61 62 /** if non-@c NULL, an error happened while parsing */ 63 svn_error_t *error; 64 65 /** where this object is allocated, so we can free it easily */ 66 apr_pool_t *pool; 67 68}; 69 70 71/*** XML character validation ***/ 72 73svn_boolean_t 74svn_xml_is_xml_safe(const char *data, apr_size_t len) 75{ 76 const char *end = data + len; 77 const char *p; 78 79 if (! svn_utf__is_valid(data, len)) 80 return FALSE; 81 82 for (p = data; p < end; p++) 83 { 84 unsigned char c = *p; 85 86 if (svn_ctype_iscntrl(c)) 87 { 88 if ((c != SVN_CTYPE_ASCII_TAB) 89 && (c != SVN_CTYPE_ASCII_LINEFEED) 90 && (c != SVN_CTYPE_ASCII_CARRIAGERETURN) 91 && (c != SVN_CTYPE_ASCII_DELETE)) 92 return FALSE; 93 } 94 } 95 return TRUE; 96} 97 98 99 100 101 102/*** XML escaping. ***/ 103 104/* ### ...? 105 * 106 * If *OUTSTR is @c NULL, set *OUTSTR to a new stringbuf allocated 107 * in POOL, else append to the existing stringbuf there. 108 */ 109static void 110xml_escape_cdata(svn_stringbuf_t **outstr, 111 const char *data, 112 apr_size_t len, 113 apr_pool_t *pool) 114{ 115 const char *end = data + len; 116 const char *p = data, *q; 117 118 if (*outstr == NULL) 119 *outstr = svn_stringbuf_create_empty(pool); 120 121 while (1) 122 { 123 /* Find a character which needs to be quoted and append bytes up 124 to that point. Strictly speaking, '>' only needs to be 125 quoted if it follows "]]", but it's easier to quote it all 126 the time. 127 128 So, why are we escaping '\r' here? Well, according to the 129 XML spec, '\r\n' gets converted to '\n' during XML parsing. 130 Also, any '\r' not followed by '\n' is converted to '\n'. By 131 golly, if we say we want to escape a '\r', we want to make 132 sure it remains a '\r'! */ 133 q = p; 134 while (q < end && *q != '&' && *q != '<' && *q != '>' && *q != '\r') 135 q++; 136 svn_stringbuf_appendbytes(*outstr, p, q - p); 137 138 /* We may already be a winner. */ 139 if (q == end) 140 break; 141 142 /* Append the entity reference for the character. */ 143 if (*q == '&') 144 svn_stringbuf_appendcstr(*outstr, "&"); 145 else if (*q == '<') 146 svn_stringbuf_appendcstr(*outstr, "<"); 147 else if (*q == '>') 148 svn_stringbuf_appendcstr(*outstr, ">"); 149 else if (*q == '\r') 150 svn_stringbuf_appendcstr(*outstr, " "); 151 152 p = q + 1; 153 } 154} 155 156/* Essentially the same as xml_escape_cdata, with the addition of 157 whitespace and quote characters. */ 158static void 159xml_escape_attr(svn_stringbuf_t **outstr, 160 const char *data, 161 apr_size_t len, 162 apr_pool_t *pool) 163{ 164 const char *end = data + len; 165 const char *p = data, *q; 166 167 if (*outstr == NULL) 168 *outstr = svn_stringbuf_create_ensure(len, pool); 169 170 while (1) 171 { 172 /* Find a character which needs to be quoted and append bytes up 173 to that point. */ 174 q = p; 175 while (q < end && *q != '&' && *q != '<' && *q != '>' 176 && *q != '"' && *q != '\'' && *q != '\r' 177 && *q != '\n' && *q != '\t') 178 q++; 179 svn_stringbuf_appendbytes(*outstr, p, q - p); 180 181 /* We may already be a winner. */ 182 if (q == end) 183 break; 184 185 /* Append the entity reference for the character. */ 186 if (*q == '&') 187 svn_stringbuf_appendcstr(*outstr, "&"); 188 else if (*q == '<') 189 svn_stringbuf_appendcstr(*outstr, "<"); 190 else if (*q == '>') 191 svn_stringbuf_appendcstr(*outstr, ">"); 192 else if (*q == '"') 193 svn_stringbuf_appendcstr(*outstr, """); 194 else if (*q == '\'') 195 svn_stringbuf_appendcstr(*outstr, "'"); 196 else if (*q == '\r') 197 svn_stringbuf_appendcstr(*outstr, " "); 198 else if (*q == '\n') 199 svn_stringbuf_appendcstr(*outstr, " "); 200 else if (*q == '\t') 201 svn_stringbuf_appendcstr(*outstr, "	"); 202 203 p = q + 1; 204 } 205} 206 207 208void 209svn_xml_escape_cdata_stringbuf(svn_stringbuf_t **outstr, 210 const svn_stringbuf_t *string, 211 apr_pool_t *pool) 212{ 213 xml_escape_cdata(outstr, string->data, string->len, pool); 214} 215 216 217void 218svn_xml_escape_cdata_string(svn_stringbuf_t **outstr, 219 const svn_string_t *string, 220 apr_pool_t *pool) 221{ 222 xml_escape_cdata(outstr, string->data, string->len, pool); 223} 224 225 226void 227svn_xml_escape_cdata_cstring(svn_stringbuf_t **outstr, 228 const char *string, 229 apr_pool_t *pool) 230{ 231 xml_escape_cdata(outstr, string, (apr_size_t) strlen(string), pool); 232} 233 234 235void 236svn_xml_escape_attr_stringbuf(svn_stringbuf_t **outstr, 237 const svn_stringbuf_t *string, 238 apr_pool_t *pool) 239{ 240 xml_escape_attr(outstr, string->data, string->len, pool); 241} 242 243 244void 245svn_xml_escape_attr_string(svn_stringbuf_t **outstr, 246 const svn_string_t *string, 247 apr_pool_t *pool) 248{ 249 xml_escape_attr(outstr, string->data, string->len, pool); 250} 251 252 253void 254svn_xml_escape_attr_cstring(svn_stringbuf_t **outstr, 255 const char *string, 256 apr_pool_t *pool) 257{ 258 xml_escape_attr(outstr, string, (apr_size_t) strlen(string), pool); 259} 260 261 262const char * 263svn_xml_fuzzy_escape(const char *string, apr_pool_t *pool) 264{ 265 const char *end = string + strlen(string); 266 const char *p = string, *q; 267 svn_stringbuf_t *outstr; 268 char escaped_char[6]; /* ? \ u u u \0 */ 269 270 for (q = p; q < end; q++) 271 { 272 if (svn_ctype_iscntrl(*q) 273 && ! ((*q == '\n') || (*q == '\r') || (*q == '\t'))) 274 break; 275 } 276 277 /* Return original string if no unsafe characters found. */ 278 if (q == end) 279 return string; 280 281 outstr = svn_stringbuf_create_empty(pool); 282 while (1) 283 { 284 q = p; 285 286 /* Traverse till either unsafe character or eos. */ 287 while ((q < end) 288 && ((! svn_ctype_iscntrl(*q)) 289 || (*q == '\n') || (*q == '\r') || (*q == '\t'))) 290 q++; 291 292 /* copy chunk before marker */ 293 svn_stringbuf_appendbytes(outstr, p, q - p); 294 295 if (q == end) 296 break; 297 298 /* Append an escaped version of the unsafe character. 299 300 ### This format was chosen for consistency with 301 ### svn_utf__cstring_from_utf8_fuzzy(). The two functions 302 ### should probably share code, even though they escape 303 ### different characters. 304 */ 305 apr_snprintf(escaped_char, sizeof(escaped_char), "?\\%03u", 306 (unsigned char) *q); 307 svn_stringbuf_appendcstr(outstr, escaped_char); 308 309 p = q + 1; 310 } 311 312 return outstr->data; 313} 314 315 316/*** Map from the Expat callback types to the SVN XML types. ***/ 317 318static void expat_start_handler(void *userData, 319 const XML_Char *name, 320 const XML_Char **atts) 321{ 322 svn_xml_parser_t *svn_parser = userData; 323 324 (*svn_parser->start_handler)(svn_parser->baton, name, atts); 325} 326 327static void expat_end_handler(void *userData, const XML_Char *name) 328{ 329 svn_xml_parser_t *svn_parser = userData; 330 331 (*svn_parser->end_handler)(svn_parser->baton, name); 332} 333 334static void expat_data_handler(void *userData, const XML_Char *s, int len) 335{ 336 svn_xml_parser_t *svn_parser = userData; 337 338 (*svn_parser->data_handler)(svn_parser->baton, s, (apr_size_t)len); 339} 340 341 342/*** Making a parser. ***/ 343 344svn_xml_parser_t * 345svn_xml_make_parser(void *baton, 346 svn_xml_start_elem start_handler, 347 svn_xml_end_elem end_handler, 348 svn_xml_char_data data_handler, 349 apr_pool_t *pool) 350{ 351 svn_xml_parser_t *svn_parser; 352 apr_pool_t *subpool; 353 354 XML_Parser parser = XML_ParserCreate(NULL); 355 356 XML_SetElementHandler(parser, 357 start_handler ? expat_start_handler : NULL, 358 end_handler ? expat_end_handler : NULL); 359 XML_SetCharacterDataHandler(parser, 360 data_handler ? expat_data_handler : NULL); 361 362 /* ### we probably don't want this pool; or at least we should pass it 363 ### to the callbacks and clear it periodically. */ 364 subpool = svn_pool_create(pool); 365 366 svn_parser = apr_pcalloc(subpool, sizeof(*svn_parser)); 367 368 svn_parser->parser = parser; 369 svn_parser->start_handler = start_handler; 370 svn_parser->end_handler = end_handler; 371 svn_parser->data_handler = data_handler; 372 svn_parser->baton = baton; 373 svn_parser->pool = subpool; 374 375 /* store our parser info as the UserData in the Expat parser */ 376 XML_SetUserData(parser, svn_parser); 377 378 return svn_parser; 379} 380 381 382/* Free a parser */ 383void 384svn_xml_free_parser(svn_xml_parser_t *svn_parser) 385{ 386 /* Free the expat parser */ 387 XML_ParserFree(svn_parser->parser); 388 389 /* Free the subversion parser */ 390 svn_pool_destroy(svn_parser->pool); 391} 392 393 394 395 396svn_error_t * 397svn_xml_parse(svn_xml_parser_t *svn_parser, 398 const char *buf, 399 apr_size_t len, 400 svn_boolean_t is_final) 401{ 402 svn_error_t *err; 403 int success; 404 405 /* Parse some xml data */ 406 success = XML_Parse(svn_parser->parser, buf, (int) len, is_final); 407 408 /* If expat choked internally, return its error. */ 409 if (! success) 410 { 411 /* Line num is "int" in Expat v1, "long" in v2; hide the difference. */ 412 long line = XML_GetCurrentLineNumber(svn_parser->parser); 413 414 err = svn_error_createf 415 (SVN_ERR_XML_MALFORMED, NULL, 416 _("Malformed XML: %s at line %ld"), 417 XML_ErrorString(XML_GetErrorCode(svn_parser->parser)), line); 418 419 /* Kill all parsers and return the expat error */ 420 svn_xml_free_parser(svn_parser); 421 return err; 422 } 423 424 /* Did an error occur somewhere *inside* the expat callbacks? */ 425 if (svn_parser->error) 426 { 427 err = svn_parser->error; 428 svn_xml_free_parser(svn_parser); 429 return err; 430 } 431 432 return SVN_NO_ERROR; 433} 434 435 436 437void svn_xml_signal_bailout(svn_error_t *error, 438 svn_xml_parser_t *svn_parser) 439{ 440 /* This will cause the current XML_Parse() call to finish quickly! */ 441 XML_SetElementHandler(svn_parser->parser, NULL, NULL); 442 XML_SetCharacterDataHandler(svn_parser->parser, NULL); 443 444 /* Once outside of XML_Parse(), the existence of this field will 445 cause svn_delta_parse()'s main read-loop to return error. */ 446 svn_parser->error = error; 447} 448 449 450 451 452 453 454 455 456/*** Attribute walking. ***/ 457 458const char * 459svn_xml_get_attr_value(const char *name, const char *const *atts) 460{ 461 while (atts && (*atts)) 462 { 463 if (strcmp(atts[0], name) == 0) 464 return atts[1]; 465 else 466 atts += 2; /* continue looping */ 467 } 468 469 /* Else no such attribute name seen. */ 470 return NULL; 471} 472 473 474 475/*** Printing XML ***/ 476 477void 478svn_xml_make_header2(svn_stringbuf_t **str, const char *encoding, 479 apr_pool_t *pool) 480{ 481 482 if (*str == NULL) 483 *str = svn_stringbuf_create_empty(pool); 484 svn_stringbuf_appendcstr(*str, "<?xml version=\"1.0\""); 485 if (encoding) 486 { 487 encoding = apr_psprintf(pool, " encoding=\"%s\"", encoding); 488 svn_stringbuf_appendcstr(*str, encoding); 489 } 490 svn_stringbuf_appendcstr(*str, "?>\n"); 491} 492 493 494 495/*** Creating attribute hashes. ***/ 496 497/* Combine an existing attribute list ATTS with a HASH that itself 498 represents an attribute list. Iff PRESERVE is true, then no value 499 already in HASH will be changed, else values from ATTS will 500 override previous values in HASH. */ 501static void 502amalgamate(const char **atts, 503 apr_hash_t *ht, 504 svn_boolean_t preserve, 505 apr_pool_t *pool) 506{ 507 const char *key; 508 509 if (atts) 510 for (key = *atts; key; key = *(++atts)) 511 { 512 const char *val = *(++atts); 513 size_t keylen; 514 assert(key != NULL); 515 /* kff todo: should we also insist that val be non-null here? 516 Probably. */ 517 518 keylen = strlen(key); 519 if (preserve && ((apr_hash_get(ht, key, keylen)) != NULL)) 520 continue; 521 else 522 apr_hash_set(ht, apr_pstrndup(pool, key, keylen), keylen, 523 val ? apr_pstrdup(pool, val) : NULL); 524 } 525} 526 527 528apr_hash_t * 529svn_xml_ap_to_hash(va_list ap, apr_pool_t *pool) 530{ 531 apr_hash_t *ht = apr_hash_make(pool); 532 const char *key; 533 534 while ((key = va_arg(ap, char *)) != NULL) 535 { 536 const char *val = va_arg(ap, const char *); 537 svn_hash_sets(ht, key, val); 538 } 539 540 return ht; 541} 542 543 544apr_hash_t * 545svn_xml_make_att_hash(const char **atts, apr_pool_t *pool) 546{ 547 apr_hash_t *ht = apr_hash_make(pool); 548 amalgamate(atts, ht, 0, pool); /* third arg irrelevant in this case */ 549 return ht; 550} 551 552 553void 554svn_xml_hash_atts_overlaying(const char **atts, 555 apr_hash_t *ht, 556 apr_pool_t *pool) 557{ 558 amalgamate(atts, ht, 0, pool); 559} 560 561 562void 563svn_xml_hash_atts_preserving(const char **atts, 564 apr_hash_t *ht, 565 apr_pool_t *pool) 566{ 567 amalgamate(atts, ht, 1, pool); 568} 569 570 571 572/*** Making XML tags. ***/ 573 574 575void 576svn_xml_make_open_tag_hash(svn_stringbuf_t **str, 577 apr_pool_t *pool, 578 enum svn_xml_open_tag_style style, 579 const char *tagname, 580 apr_hash_t *attributes) 581{ 582 apr_hash_index_t *hi; 583 apr_size_t est_size = strlen(tagname) + 4 + apr_hash_count(attributes) * 30; 584 585 if (*str == NULL) 586 *str = svn_stringbuf_create_ensure(est_size, pool); 587 588 svn_stringbuf_appendcstr(*str, "<"); 589 svn_stringbuf_appendcstr(*str, tagname); 590 591 for (hi = apr_hash_first(pool, attributes); hi; hi = apr_hash_next(hi)) 592 { 593 const void *key; 594 void *val; 595 596 apr_hash_this(hi, &key, NULL, &val); 597 assert(val != NULL); 598 599 svn_stringbuf_appendcstr(*str, "\n "); 600 svn_stringbuf_appendcstr(*str, key); 601 svn_stringbuf_appendcstr(*str, "=\""); 602 svn_xml_escape_attr_cstring(str, val, pool); 603 svn_stringbuf_appendcstr(*str, "\""); 604 } 605 606 if (style == svn_xml_self_closing) 607 svn_stringbuf_appendcstr(*str, "/"); 608 svn_stringbuf_appendcstr(*str, ">"); 609 if (style != svn_xml_protect_pcdata) 610 svn_stringbuf_appendcstr(*str, "\n"); 611} 612 613 614void 615svn_xml_make_open_tag_v(svn_stringbuf_t **str, 616 apr_pool_t *pool, 617 enum svn_xml_open_tag_style style, 618 const char *tagname, 619 va_list ap) 620{ 621 apr_pool_t *subpool = svn_pool_create(pool); 622 apr_hash_t *ht = svn_xml_ap_to_hash(ap, subpool); 623 624 svn_xml_make_open_tag_hash(str, pool, style, tagname, ht); 625 svn_pool_destroy(subpool); 626} 627 628 629 630void 631svn_xml_make_open_tag(svn_stringbuf_t **str, 632 apr_pool_t *pool, 633 enum svn_xml_open_tag_style style, 634 const char *tagname, 635 ...) 636{ 637 va_list ap; 638 639 va_start(ap, tagname); 640 svn_xml_make_open_tag_v(str, pool, style, tagname, ap); 641 va_end(ap); 642} 643 644 645void svn_xml_make_close_tag(svn_stringbuf_t **str, 646 apr_pool_t *pool, 647 const char *tagname) 648{ 649 if (*str == NULL) 650 *str = svn_stringbuf_create_empty(pool); 651 652 svn_stringbuf_appendcstr(*str, "</"); 653 svn_stringbuf_appendcstr(*str, tagname); 654 svn_stringbuf_appendcstr(*str, ">\n"); 655} 656