1/* load.c --- parsing a 'dumpfile'-formatted stream. 2 * 3 * ==================================================================== 4 * Licensed to the Apache Software Foundation (ASF) under one 5 * or more contributor license agreements. See the NOTICE file 6 * distributed with this work for additional information 7 * regarding copyright ownership. The ASF licenses this file 8 * to you under the Apache License, Version 2.0 (the 9 * "License"); you may not use this file except in compliance 10 * with the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, 15 * software distributed under the License is distributed on an 16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 * KIND, either express or implied. See the License for the 18 * specific language governing permissions and limitations 19 * under the License. 20 * ==================================================================== 21 */ 22 23 24#include "svn_private_config.h" 25#include "svn_hash.h" 26#include "svn_pools.h" 27#include "svn_error.h" 28#include "svn_fs.h" 29#include "svn_repos.h" 30#include "svn_string.h" 31#include "svn_path.h" 32#include "svn_props.h" 33#include "repos.h" 34#include "svn_private_config.h" 35#include "svn_mergeinfo.h" 36#include "svn_checksum.h" 37#include "svn_subst.h" 38#include "svn_ctype.h" 39 40#include <apr_lib.h> 41 42#include "private/svn_dep_compat.h" 43#include "private/svn_mergeinfo_private.h" 44 45/*----------------------------------------------------------------------*/ 46 47/** The parser and related helper funcs **/ 48 49 50static svn_error_t * 51stream_ran_dry(void) 52{ 53 return svn_error_create(SVN_ERR_INCOMPLETE_DATA, NULL, 54 _("Premature end of content data in dumpstream")); 55} 56 57static svn_error_t * 58stream_malformed(void) 59{ 60 return svn_error_create(SVN_ERR_STREAM_MALFORMED_DATA, NULL, 61 _("Dumpstream data appears to be malformed")); 62} 63 64/* Allocate a new hash *HEADERS in POOL, and read a series of 65 RFC822-style headers from STREAM. Duplicate each header's name and 66 value into POOL and store in hash as a const char * ==> const char *. 67 68 The headers are assumed to be terminated by a single blank line, 69 which will be permanently sucked from the stream and tossed. 70 71 If the caller has already read in the first header line, it should 72 be passed in as FIRST_HEADER. If not, pass NULL instead. 73 */ 74static svn_error_t * 75read_header_block(svn_stream_t *stream, 76 svn_stringbuf_t *first_header, 77 apr_hash_t **headers, 78 apr_pool_t *pool) 79{ 80 *headers = apr_hash_make(pool); 81 82 while (1) 83 { 84 svn_stringbuf_t *header_str; 85 const char *name, *value; 86 svn_boolean_t eof; 87 apr_size_t i = 0; 88 89 if (first_header != NULL) 90 { 91 header_str = first_header; 92 first_header = NULL; /* so we never visit this block again. */ 93 eof = FALSE; 94 } 95 96 else 97 /* Read the next line into a stringbuf. */ 98 SVN_ERR(svn_stream_readline(stream, &header_str, "\n", &eof, pool)); 99 100 if (svn_stringbuf_isempty(header_str)) 101 break; /* end of header block */ 102 else if (eof) 103 return stream_ran_dry(); 104 105 /* Find the next colon in the stringbuf. */ 106 while (header_str->data[i] != ':') 107 { 108 if (header_str->data[i] == '\0') 109 return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL, 110 _("Dump stream contains a malformed " 111 "header (with no ':') at '%.20s'"), 112 header_str->data); 113 i++; 114 } 115 /* Create a 'name' string and point to it. */ 116 header_str->data[i] = '\0'; 117 name = header_str->data; 118 119 /* Skip over the NULL byte and the space following it. */ 120 i += 2; 121 if (i > header_str->len) 122 return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL, 123 _("Dump stream contains a malformed " 124 "header (with no value) at '%.20s'"), 125 header_str->data); 126 127 /* Point to the 'value' string. */ 128 value = header_str->data + i; 129 130 /* Store name/value in hash. */ 131 svn_hash_sets(*headers, name, value); 132 } 133 134 return SVN_NO_ERROR; 135} 136 137 138/* Set *PBUF to a string of length LEN, allocated in POOL, read from STREAM. 139 Also read a newline from STREAM and increase *ACTUAL_LEN by the total 140 number of bytes read from STREAM. */ 141static svn_error_t * 142read_key_or_val(char **pbuf, 143 svn_filesize_t *actual_length, 144 svn_stream_t *stream, 145 apr_size_t len, 146 apr_pool_t *pool) 147{ 148 char *buf = apr_pcalloc(pool, len + 1); 149 apr_size_t numread; 150 char c; 151 152 numread = len; 153 SVN_ERR(svn_stream_read(stream, buf, &numread)); 154 *actual_length += numread; 155 if (numread != len) 156 return svn_error_trace(stream_ran_dry()); 157 buf[len] = '\0'; 158 159 /* Suck up extra newline after key data */ 160 numread = 1; 161 SVN_ERR(svn_stream_read(stream, &c, &numread)); 162 *actual_length += numread; 163 if (numread != 1) 164 return svn_error_trace(stream_ran_dry()); 165 if (c != '\n') 166 return svn_error_trace(stream_malformed()); 167 168 *pbuf = buf; 169 return SVN_NO_ERROR; 170} 171 172 173/* Read CONTENT_LENGTH bytes from STREAM, parsing the bytes as an 174 encoded Subversion properties hash, and making multiple calls to 175 PARSE_FNS->set_*_property on RECORD_BATON (depending on the value 176 of IS_NODE.) 177 178 Set *ACTUAL_LENGTH to the number of bytes consumed from STREAM. 179 If an error is returned, the value of *ACTUAL_LENGTH is undefined. 180 181 Use POOL for all allocations. */ 182static svn_error_t * 183parse_property_block(svn_stream_t *stream, 184 svn_filesize_t content_length, 185 const svn_repos_parse_fns3_t *parse_fns, 186 void *record_baton, 187 void *parse_baton, 188 svn_boolean_t is_node, 189 svn_filesize_t *actual_length, 190 apr_pool_t *pool) 191{ 192 svn_stringbuf_t *strbuf; 193 apr_pool_t *proppool = svn_pool_create(pool); 194 195 *actual_length = 0; 196 while (content_length != *actual_length) 197 { 198 char *buf; /* a pointer into the stringbuf's data */ 199 svn_boolean_t eof; 200 201 svn_pool_clear(proppool); 202 203 /* Read a key length line. (Actually, it might be PROPS_END). */ 204 SVN_ERR(svn_stream_readline(stream, &strbuf, "\n", &eof, proppool)); 205 206 if (eof) 207 { 208 /* We could just use stream_ran_dry() or stream_malformed(), 209 but better to give a non-generic property block error. */ 210 return svn_error_create 211 (SVN_ERR_STREAM_MALFORMED_DATA, NULL, 212 _("Incomplete or unterminated property block")); 213 } 214 215 *actual_length += (strbuf->len + 1); /* +1 because we read a \n too. */ 216 buf = strbuf->data; 217 218 if (! strcmp(buf, "PROPS-END")) 219 break; /* no more properties. */ 220 221 else if ((buf[0] == 'K') && (buf[1] == ' ')) 222 { 223 char *keybuf; 224 apr_uint64_t len; 225 226 SVN_ERR(svn_cstring_strtoui64(&len, buf + 2, 0, APR_SIZE_MAX, 10)); 227 SVN_ERR(read_key_or_val(&keybuf, actual_length, 228 stream, (apr_size_t)len, proppool)); 229 230 /* Read a val length line */ 231 SVN_ERR(svn_stream_readline(stream, &strbuf, "\n", &eof, proppool)); 232 if (eof) 233 return stream_ran_dry(); 234 235 *actual_length += (strbuf->len + 1); /* +1 because we read \n too */ 236 buf = strbuf->data; 237 238 if ((buf[0] == 'V') && (buf[1] == ' ')) 239 { 240 svn_string_t propstring; 241 char *valbuf; 242 apr_int64_t val; 243 244 SVN_ERR(svn_cstring_atoi64(&val, buf + 2)); 245 propstring.len = (apr_size_t)val; 246 SVN_ERR(read_key_or_val(&valbuf, actual_length, 247 stream, propstring.len, proppool)); 248 propstring.data = valbuf; 249 250 /* Now, send the property pair to the vtable! */ 251 if (is_node) 252 { 253 SVN_ERR(parse_fns->set_node_property(record_baton, 254 keybuf, 255 &propstring)); 256 } 257 else 258 { 259 SVN_ERR(parse_fns->set_revision_property(record_baton, 260 keybuf, 261 &propstring)); 262 } 263 } 264 else 265 return stream_malformed(); /* didn't find expected 'V' line */ 266 } 267 else if ((buf[0] == 'D') && (buf[1] == ' ')) 268 { 269 char *keybuf; 270 apr_uint64_t len; 271 272 SVN_ERR(svn_cstring_strtoui64(&len, buf + 2, 0, APR_SIZE_MAX, 10)); 273 SVN_ERR(read_key_or_val(&keybuf, actual_length, 274 stream, (apr_size_t)len, proppool)); 275 276 /* We don't expect these in revision properties, and if we see 277 one when we don't have a delete_node_property callback, 278 then we're seeing a v3 feature in a v2 dump. */ 279 if (!is_node || !parse_fns->delete_node_property) 280 return stream_malformed(); 281 282 SVN_ERR(parse_fns->delete_node_property(record_baton, keybuf)); 283 } 284 else 285 return stream_malformed(); /* didn't find expected 'K' line */ 286 287 } /* while (1) */ 288 289 svn_pool_destroy(proppool); 290 return SVN_NO_ERROR; 291} 292 293 294/* Read CONTENT_LENGTH bytes from STREAM, and use 295 PARSE_FNS->set_fulltext to push those bytes as replace fulltext for 296 a node. Use BUFFER/BUFLEN to push the fulltext in "chunks". 297 298 Use POOL for all allocations. */ 299static svn_error_t * 300parse_text_block(svn_stream_t *stream, 301 svn_filesize_t content_length, 302 svn_boolean_t is_delta, 303 const svn_repos_parse_fns3_t *parse_fns, 304 void *record_baton, 305 char *buffer, 306 apr_size_t buflen, 307 apr_pool_t *pool) 308{ 309 svn_stream_t *text_stream = NULL; 310 apr_size_t num_to_read, rlen, wlen; 311 312 if (is_delta) 313 { 314 svn_txdelta_window_handler_t wh; 315 void *whb; 316 317 SVN_ERR(parse_fns->apply_textdelta(&wh, &whb, record_baton)); 318 if (wh) 319 text_stream = svn_txdelta_parse_svndiff(wh, whb, TRUE, pool); 320 } 321 else 322 { 323 /* Get a stream to which we can push the data. */ 324 SVN_ERR(parse_fns->set_fulltext(&text_stream, record_baton)); 325 } 326 327 /* If there are no contents to read, just write an empty buffer 328 through our callback. */ 329 if (content_length == 0) 330 { 331 wlen = 0; 332 if (text_stream) 333 SVN_ERR(svn_stream_write(text_stream, "", &wlen)); 334 } 335 336 /* Regardless of whether or not we have a sink for our data, we 337 need to read it. */ 338 while (content_length) 339 { 340 if (content_length >= (svn_filesize_t)buflen) 341 rlen = buflen; 342 else 343 rlen = (apr_size_t) content_length; 344 345 num_to_read = rlen; 346 SVN_ERR(svn_stream_read(stream, buffer, &rlen)); 347 content_length -= rlen; 348 if (rlen != num_to_read) 349 return stream_ran_dry(); 350 351 if (text_stream) 352 { 353 /* write however many bytes you read. */ 354 wlen = rlen; 355 SVN_ERR(svn_stream_write(text_stream, buffer, &wlen)); 356 if (wlen != rlen) 357 { 358 /* Uh oh, didn't write as many bytes as we read. */ 359 return svn_error_create(SVN_ERR_STREAM_UNEXPECTED_EOF, NULL, 360 _("Unexpected EOF writing contents")); 361 } 362 } 363 } 364 365 /* If we opened a stream, we must close it. */ 366 if (text_stream) 367 SVN_ERR(svn_stream_close(text_stream)); 368 369 return SVN_NO_ERROR; 370} 371 372 373 374/* Parse VERSIONSTRING and verify that we support the dumpfile format 375 version number, setting *VERSION appropriately. */ 376static svn_error_t * 377parse_format_version(int *version, 378 const char *versionstring) 379{ 380 static const int magic_len = sizeof(SVN_REPOS_DUMPFILE_MAGIC_HEADER) - 1; 381 const char *p = strchr(versionstring, ':'); 382 int value; 383 384 if (p == NULL 385 || p != (versionstring + magic_len) 386 || strncmp(versionstring, 387 SVN_REPOS_DUMPFILE_MAGIC_HEADER, 388 magic_len)) 389 return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL, 390 _("Malformed dumpfile header '%s'"), 391 versionstring); 392 393 SVN_ERR(svn_cstring_atoi(&value, p + 1)); 394 395 if (value > SVN_REPOS_DUMPFILE_FORMAT_VERSION) 396 return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL, 397 _("Unsupported dumpfile version: %d"), 398 value); 399 400 *version = value; 401 return SVN_NO_ERROR; 402} 403 404 405 406/*----------------------------------------------------------------------*/ 407 408/** The public routines **/ 409 410svn_error_t * 411svn_repos_parse_dumpstream3(svn_stream_t *stream, 412 const svn_repos_parse_fns3_t *parse_fns, 413 void *parse_baton, 414 svn_boolean_t deltas_are_text, 415 svn_cancel_func_t cancel_func, 416 void *cancel_baton, 417 apr_pool_t *pool) 418{ 419 svn_boolean_t eof; 420 svn_stringbuf_t *linebuf; 421 void *rev_baton = NULL; 422 char *buffer = apr_palloc(pool, SVN__STREAM_CHUNK_SIZE); 423 apr_size_t buflen = SVN__STREAM_CHUNK_SIZE; 424 apr_pool_t *linepool = svn_pool_create(pool); 425 apr_pool_t *revpool = svn_pool_create(pool); 426 apr_pool_t *nodepool = svn_pool_create(pool); 427 int version; 428 429 SVN_ERR(svn_stream_readline(stream, &linebuf, "\n", &eof, linepool)); 430 if (eof) 431 return stream_ran_dry(); 432 433 /* The first two lines of the stream are the dumpfile-format version 434 number, and a blank line. To preserve backward compatibility, 435 don't assume the existence of newer parser-vtable functions. */ 436 SVN_ERR(parse_format_version(&version, linebuf->data)); 437 if (parse_fns->magic_header_record != NULL) 438 SVN_ERR(parse_fns->magic_header_record(version, parse_baton, pool)); 439 440 /* A dumpfile "record" is defined to be a header-block of 441 rfc822-style headers, possibly followed by a content-block. 442 443 - A header-block is always terminated by a single blank line (\n\n) 444 445 - We know whether the record has a content-block by looking for 446 a 'Content-length:' header. The content-block will always be 447 of a specific length, plus an extra newline. 448 449 Once a record is fully sucked from the stream, an indeterminate 450 number of blank lines (or lines that begin with whitespace) may 451 follow before the next record (or the end of the stream.) 452 */ 453 454 while (1) 455 { 456 apr_hash_t *headers; 457 void *node_baton; 458 svn_boolean_t found_node = FALSE; 459 svn_boolean_t old_v1_with_cl = FALSE; 460 const char *content_length; 461 const char *prop_cl; 462 const char *text_cl; 463 const char *value; 464 svn_filesize_t actual_prop_length; 465 466 /* Clear our per-line pool. */ 467 svn_pool_clear(linepool); 468 469 /* Check for cancellation. */ 470 if (cancel_func) 471 SVN_ERR(cancel_func(cancel_baton)); 472 473 /* Keep reading blank lines until we discover a new record, or until 474 the stream runs out. */ 475 SVN_ERR(svn_stream_readline(stream, &linebuf, "\n", &eof, linepool)); 476 477 if (eof) 478 { 479 if (svn_stringbuf_isempty(linebuf)) 480 break; /* end of stream, go home. */ 481 else 482 return stream_ran_dry(); 483 } 484 485 if ((linebuf->len == 0) || (svn_ctype_isspace(linebuf->data[0]))) 486 continue; /* empty line ... loop */ 487 488 /*** Found the beginning of a new record. ***/ 489 490 /* The last line we read better be a header of some sort. 491 Read the whole header-block into a hash. */ 492 SVN_ERR(read_header_block(stream, linebuf, &headers, linepool)); 493 494 /*** Handle the various header blocks. ***/ 495 496 /* Is this a revision record? */ 497 if (svn_hash_gets(headers, SVN_REPOS_DUMPFILE_REVISION_NUMBER)) 498 { 499 /* If we already have a rev_baton open, we need to close it 500 and clear the per-revision subpool. */ 501 if (rev_baton != NULL) 502 { 503 SVN_ERR(parse_fns->close_revision(rev_baton)); 504 svn_pool_clear(revpool); 505 } 506 507 SVN_ERR(parse_fns->new_revision_record(&rev_baton, 508 headers, parse_baton, 509 revpool)); 510 } 511 /* Or is this, perhaps, a node record? */ 512 else if (svn_hash_gets(headers, SVN_REPOS_DUMPFILE_NODE_PATH)) 513 { 514 SVN_ERR(parse_fns->new_node_record(&node_baton, 515 headers, 516 rev_baton, 517 nodepool)); 518 found_node = TRUE; 519 } 520 /* Or is this the repos UUID? */ 521 else if ((value = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_UUID))) 522 { 523 SVN_ERR(parse_fns->uuid_record(value, parse_baton, pool)); 524 } 525 /* Or perhaps a dumpfile format? */ 526 /* ### TODO: use parse_format_version */ 527 else if ((value = svn_hash_gets(headers, 528 SVN_REPOS_DUMPFILE_MAGIC_HEADER))) 529 { 530 /* ### someday, switch modes of operation here. */ 531 SVN_ERR(svn_cstring_atoi(&version, value)); 532 } 533 /* Or is this bogosity?! */ 534 else 535 { 536 /* What the heck is this record?!? */ 537 return svn_error_create(SVN_ERR_STREAM_MALFORMED_DATA, NULL, 538 _("Unrecognized record type in stream")); 539 } 540 541 /* Need 3 values below to determine v1 dump type 542 543 Old (pre 0.14?) v1 dumps don't have Prop-content-length 544 and Text-content-length fields, but always have a properties 545 block in a block with Content-Length > 0 */ 546 547 content_length = svn_hash_gets(headers, 548 SVN_REPOS_DUMPFILE_CONTENT_LENGTH); 549 prop_cl = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_PROP_CONTENT_LENGTH); 550 text_cl = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_TEXT_CONTENT_LENGTH); 551 old_v1_with_cl = 552 version == 1 && content_length && ! prop_cl && ! text_cl; 553 554 /* Is there a props content-block to parse? */ 555 if (prop_cl || old_v1_with_cl) 556 { 557 const char *delta = svn_hash_gets(headers, 558 SVN_REPOS_DUMPFILE_PROP_DELTA); 559 svn_boolean_t is_delta = (delta && strcmp(delta, "true") == 0); 560 561 /* First, remove all node properties, unless this is a delta 562 property block. */ 563 if (found_node && !is_delta) 564 SVN_ERR(parse_fns->remove_node_props(node_baton)); 565 566 SVN_ERR(parse_property_block 567 (stream, 568 svn__atoui64(prop_cl ? prop_cl : content_length), 569 parse_fns, 570 found_node ? node_baton : rev_baton, 571 parse_baton, 572 found_node, 573 &actual_prop_length, 574 found_node ? nodepool : revpool)); 575 } 576 577 /* Is there a text content-block to parse? */ 578 if (text_cl) 579 { 580 const char *delta = svn_hash_gets(headers, 581 SVN_REPOS_DUMPFILE_TEXT_DELTA); 582 svn_boolean_t is_delta = FALSE; 583 if (! deltas_are_text) 584 is_delta = (delta && strcmp(delta, "true") == 0); 585 586 SVN_ERR(parse_text_block(stream, 587 svn__atoui64(text_cl), 588 is_delta, 589 parse_fns, 590 found_node ? node_baton : rev_baton, 591 buffer, 592 buflen, 593 found_node ? nodepool : revpool)); 594 } 595 else if (old_v1_with_cl) 596 { 597 /* An old-v1 block with a Content-length might have a text block. 598 If the property block did not consume all the bytes of the 599 Content-length, then it clearly does have a text block. 600 If not, then we must deduce whether we have an *empty* text 601 block or an *absent* text block. The rules are: 602 - "Node-kind: file" blocks have an empty (i.e. present, but 603 zero-length) text block, since they represent a file 604 modification. Note that file-copied-text-unmodified blocks 605 have no Content-length - even if they should have contained 606 a modified property block, the pre-0.14 dumper forgets to 607 dump the modified properties. 608 - If it is not a file node, then it is a revision or directory, 609 and so has an absent text block. 610 */ 611 const char *node_kind; 612 svn_filesize_t cl_value = svn__atoui64(content_length) 613 - actual_prop_length; 614 615 if (cl_value || 616 ((node_kind = svn_hash_gets(headers, 617 SVN_REPOS_DUMPFILE_NODE_KIND)) 618 && strcmp(node_kind, "file") == 0) 619 ) 620 SVN_ERR(parse_text_block(stream, 621 cl_value, 622 FALSE, 623 parse_fns, 624 found_node ? node_baton : rev_baton, 625 buffer, 626 buflen, 627 found_node ? nodepool : revpool)); 628 } 629 630 /* if we have a content-length header, did we read all of it? 631 in case of an old v1, we *always* read all of it, because 632 text-content-length == content-length - prop-content-length 633 */ 634 if (content_length && ! old_v1_with_cl) 635 { 636 apr_size_t rlen, num_to_read; 637 svn_filesize_t remaining = 638 svn__atoui64(content_length) - 639 (prop_cl ? svn__atoui64(prop_cl) : 0) - 640 (text_cl ? svn__atoui64(text_cl) : 0); 641 642 643 if (remaining < 0) 644 return svn_error_create(SVN_ERR_STREAM_MALFORMED_DATA, NULL, 645 _("Sum of subblock sizes larger than " 646 "total block content length")); 647 648 /* Consume remaining bytes in this content block */ 649 while (remaining > 0) 650 { 651 if (remaining >= (svn_filesize_t)buflen) 652 rlen = buflen; 653 else 654 rlen = (apr_size_t) remaining; 655 656 num_to_read = rlen; 657 SVN_ERR(svn_stream_read(stream, buffer, &rlen)); 658 remaining -= rlen; 659 if (rlen != num_to_read) 660 return stream_ran_dry(); 661 } 662 } 663 664 /* If we just finished processing a node record, we need to 665 close the node record and clear the per-node subpool. */ 666 if (found_node) 667 { 668 SVN_ERR(parse_fns->close_node(node_baton)); 669 svn_pool_clear(nodepool); 670 } 671 672 /*** End of processing for one record. ***/ 673 674 } /* end of stream */ 675 676 /* Close out whatever revision we're in. */ 677 if (rev_baton != NULL) 678 SVN_ERR(parse_fns->close_revision(rev_baton)); 679 680 svn_pool_destroy(linepool); 681 svn_pool_destroy(revpool); 682 svn_pool_destroy(nodepool); 683 return SVN_NO_ERROR; 684} 685