load.c revision 299742
1/* load.c --- parsing a 'dumpfile'-formatted stream. 2 * 3 * ==================================================================== 4 * Licensed to the Apache Software Foundation (ASF) under one 5 * or more contributor license agreements. See the NOTICE file 6 * distributed with this work for additional information 7 * regarding copyright ownership. The ASF licenses this file 8 * to you under the Apache License, Version 2.0 (the 9 * "License"); you may not use this file except in compliance 10 * with the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, 15 * software distributed under the License is distributed on an 16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 * KIND, either express or implied. See the License for the 18 * specific language governing permissions and limitations 19 * under the License. 20 * ==================================================================== 21 */ 22 23 24#include <apr.h> 25 26#include "svn_hash.h" 27#include "svn_pools.h" 28#include "svn_error.h" 29#include "svn_repos.h" 30#include "svn_string.h" 31#include "repos.h" 32#include "svn_private_config.h" 33#include "svn_ctype.h" 34 35#include "private/svn_dep_compat.h" 36 37/*----------------------------------------------------------------------*/ 38 39/** The parser and related helper funcs **/ 40 41 42static svn_error_t * 43stream_ran_dry(void) 44{ 45 return svn_error_create(SVN_ERR_INCOMPLETE_DATA, NULL, 46 _("Premature end of content data in dumpstream")); 47} 48 49static svn_error_t * 50stream_malformed(void) 51{ 52 return svn_error_create(SVN_ERR_STREAM_MALFORMED_DATA, NULL, 53 _("Dumpstream data appears to be malformed")); 54} 55 56/* Allocate a new hash *HEADERS in POOL, and read a series of 57 RFC822-style headers from STREAM. Duplicate each header's name and 58 value into POOL and store in hash as a const char * ==> const char *. 59 60 The headers are assumed to be terminated by a single blank line, 61 which will be permanently sucked from the stream and tossed. 62 63 If the caller has already read in the first header line, it should 64 be passed in as FIRST_HEADER. If not, pass NULL instead. 65 */ 66static svn_error_t * 67read_header_block(svn_stream_t *stream, 68 svn_stringbuf_t *first_header, 69 apr_hash_t **headers, 70 apr_pool_t *pool) 71{ 72 *headers = apr_hash_make(pool); 73 74 while (1) 75 { 76 svn_stringbuf_t *header_str; 77 const char *name, *value; 78 svn_boolean_t eof; 79 apr_size_t i = 0; 80 81 if (first_header != NULL) 82 { 83 header_str = first_header; 84 first_header = NULL; /* so we never visit this block again. */ 85 eof = FALSE; 86 } 87 88 else 89 /* Read the next line into a stringbuf. */ 90 SVN_ERR(svn_stream_readline(stream, &header_str, "\n", &eof, pool)); 91 92 if (svn_stringbuf_isempty(header_str)) 93 break; /* end of header block */ 94 else if (eof) 95 return stream_ran_dry(); 96 97 /* Find the next colon in the stringbuf. */ 98 while (header_str->data[i] != ':') 99 { 100 if (header_str->data[i] == '\0') 101 return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL, 102 _("Dump stream contains a malformed " 103 "header (with no ':') at '%.20s'"), 104 header_str->data); 105 i++; 106 } 107 /* Create a 'name' string and point to it. */ 108 header_str->data[i] = '\0'; 109 name = header_str->data; 110 111 /* Skip over the NULL byte and the space following it. */ 112 i += 2; 113 if (i > header_str->len) 114 return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL, 115 _("Dump stream contains a malformed " 116 "header (with no value) at '%.20s'"), 117 header_str->data); 118 119 /* Point to the 'value' string. */ 120 value = header_str->data + i; 121 122 /* Store name/value in hash. */ 123 svn_hash_sets(*headers, name, value); 124 } 125 126 return SVN_NO_ERROR; 127} 128 129 130/* Set *PBUF to a string of length LEN, allocated in POOL, read from STREAM. 131 Also read a newline from STREAM and increase *ACTUAL_LEN by the total 132 number of bytes read from STREAM. */ 133static svn_error_t * 134read_key_or_val(char **pbuf, 135 svn_filesize_t *actual_length, 136 svn_stream_t *stream, 137 apr_size_t len, 138 apr_pool_t *pool) 139{ 140 char *buf = apr_pcalloc(pool, len + 1); 141 apr_size_t numread; 142 char c; 143 144 numread = len; 145 SVN_ERR(svn_stream_read_full(stream, buf, &numread)); 146 *actual_length += numread; 147 if (numread != len) 148 return svn_error_trace(stream_ran_dry()); 149 buf[len] = '\0'; 150 151 /* Suck up extra newline after key data */ 152 numread = 1; 153 SVN_ERR(svn_stream_read_full(stream, &c, &numread)); 154 *actual_length += numread; 155 if (numread != 1) 156 return svn_error_trace(stream_ran_dry()); 157 if (c != '\n') 158 return svn_error_trace(stream_malformed()); 159 160 *pbuf = buf; 161 return SVN_NO_ERROR; 162} 163 164 165/* Read CONTENT_LENGTH bytes from STREAM, parsing the bytes as an 166 encoded Subversion properties hash, and making multiple calls to 167 PARSE_FNS->set_*_property on RECORD_BATON (depending on the value 168 of IS_NODE.) 169 170 Set *ACTUAL_LENGTH to the number of bytes consumed from STREAM. 171 If an error is returned, the value of *ACTUAL_LENGTH is undefined. 172 173 Use POOL for all allocations. */ 174static svn_error_t * 175parse_property_block(svn_stream_t *stream, 176 svn_filesize_t content_length, 177 const svn_repos_parse_fns3_t *parse_fns, 178 void *record_baton, 179 void *parse_baton, 180 svn_boolean_t is_node, 181 svn_filesize_t *actual_length, 182 apr_pool_t *pool) 183{ 184 svn_stringbuf_t *strbuf; 185 apr_pool_t *proppool = svn_pool_create(pool); 186 187 *actual_length = 0; 188 while (content_length != *actual_length) 189 { 190 char *buf; /* a pointer into the stringbuf's data */ 191 svn_boolean_t eof; 192 193 svn_pool_clear(proppool); 194 195 /* Read a key length line. (Actually, it might be PROPS_END). */ 196 SVN_ERR(svn_stream_readline(stream, &strbuf, "\n", &eof, proppool)); 197 198 if (eof) 199 { 200 /* We could just use stream_ran_dry() or stream_malformed(), 201 but better to give a non-generic property block error. */ 202 return svn_error_create 203 (SVN_ERR_STREAM_MALFORMED_DATA, NULL, 204 _("Incomplete or unterminated property block")); 205 } 206 207 *actual_length += (strbuf->len + 1); /* +1 because we read a \n too. */ 208 buf = strbuf->data; 209 210 if (! strcmp(buf, "PROPS-END")) 211 break; /* no more properties. */ 212 213 else if ((buf[0] == 'K') && (buf[1] == ' ')) 214 { 215 char *keybuf; 216 apr_uint64_t len; 217 218 SVN_ERR(svn_cstring_strtoui64(&len, buf + 2, 0, APR_SIZE_MAX, 10)); 219 SVN_ERR(read_key_or_val(&keybuf, actual_length, 220 stream, (apr_size_t)len, proppool)); 221 222 /* Read a val length line */ 223 SVN_ERR(svn_stream_readline(stream, &strbuf, "\n", &eof, proppool)); 224 if (eof) 225 return stream_ran_dry(); 226 227 *actual_length += (strbuf->len + 1); /* +1 because we read \n too */ 228 buf = strbuf->data; 229 230 if ((buf[0] == 'V') && (buf[1] == ' ')) 231 { 232 svn_string_t propstring; 233 char *valbuf; 234 apr_int64_t val; 235 236 SVN_ERR(svn_cstring_atoi64(&val, buf + 2)); 237 propstring.len = (apr_size_t)val; 238 SVN_ERR(read_key_or_val(&valbuf, actual_length, 239 stream, propstring.len, proppool)); 240 propstring.data = valbuf; 241 242 /* Now, send the property pair to the vtable! */ 243 if (is_node) 244 { 245 SVN_ERR(parse_fns->set_node_property(record_baton, 246 keybuf, 247 &propstring)); 248 } 249 else 250 { 251 SVN_ERR(parse_fns->set_revision_property(record_baton, 252 keybuf, 253 &propstring)); 254 } 255 } 256 else 257 return stream_malformed(); /* didn't find expected 'V' line */ 258 } 259 else if ((buf[0] == 'D') && (buf[1] == ' ')) 260 { 261 char *keybuf; 262 apr_uint64_t len; 263 264 SVN_ERR(svn_cstring_strtoui64(&len, buf + 2, 0, APR_SIZE_MAX, 10)); 265 SVN_ERR(read_key_or_val(&keybuf, actual_length, 266 stream, (apr_size_t)len, proppool)); 267 268 /* We don't expect these in revision properties, and if we see 269 one when we don't have a delete_node_property callback, 270 then we're seeing a v3 feature in a v2 dump. */ 271 if (!is_node || !parse_fns->delete_node_property) 272 return stream_malformed(); 273 274 SVN_ERR(parse_fns->delete_node_property(record_baton, keybuf)); 275 } 276 else 277 return stream_malformed(); /* didn't find expected 'K' line */ 278 279 } /* while (1) */ 280 281 svn_pool_destroy(proppool); 282 return SVN_NO_ERROR; 283} 284 285 286/* Read CONTENT_LENGTH bytes from STREAM. If IS_DELTA is true, use 287 PARSE_FNS->apply_textdelta to push a text delta, otherwise use 288 PARSE_FNS->set_fulltext to push those bytes as replace fulltext for 289 a node. Use BUFFER/BUFLEN to push the fulltext in "chunks". 290 291 Use POOL for all allocations. */ 292static svn_error_t * 293parse_text_block(svn_stream_t *stream, 294 svn_filesize_t content_length, 295 svn_boolean_t is_delta, 296 const svn_repos_parse_fns3_t *parse_fns, 297 void *record_baton, 298 char *buffer, 299 apr_size_t buflen, 300 apr_pool_t *pool) 301{ 302 svn_stream_t *text_stream = NULL; 303 apr_size_t num_to_read, rlen, wlen; 304 305 if (is_delta) 306 { 307 svn_txdelta_window_handler_t wh; 308 void *whb; 309 310 SVN_ERR(parse_fns->apply_textdelta(&wh, &whb, record_baton)); 311 if (wh) 312 text_stream = svn_txdelta_parse_svndiff(wh, whb, TRUE, pool); 313 } 314 else 315 { 316 /* Get a stream to which we can push the data. */ 317 SVN_ERR(parse_fns->set_fulltext(&text_stream, record_baton)); 318 } 319 320 /* Regardless of whether or not we have a sink for our data, we 321 need to read it. */ 322 while (content_length) 323 { 324 if (content_length >= (svn_filesize_t)buflen) 325 rlen = buflen; 326 else 327 rlen = (apr_size_t) content_length; 328 329 num_to_read = rlen; 330 SVN_ERR(svn_stream_read_full(stream, buffer, &rlen)); 331 content_length -= rlen; 332 if (rlen != num_to_read) 333 return stream_ran_dry(); 334 335 if (text_stream) 336 { 337 /* write however many bytes you read. */ 338 wlen = rlen; 339 SVN_ERR(svn_stream_write(text_stream, buffer, &wlen)); 340 if (wlen != rlen) 341 { 342 /* Uh oh, didn't write as many bytes as we read. */ 343 return svn_error_create(SVN_ERR_STREAM_UNEXPECTED_EOF, NULL, 344 _("Unexpected EOF writing contents")); 345 } 346 } 347 } 348 349 /* If we opened a stream, we must close it. */ 350 if (text_stream) 351 SVN_ERR(svn_stream_close(text_stream)); 352 353 return SVN_NO_ERROR; 354} 355 356 357 358/* Parse VERSIONSTRING and verify that we support the dumpfile format 359 version number, setting *VERSION appropriately. */ 360static svn_error_t * 361parse_format_version(int *version, 362 const char *versionstring) 363{ 364 static const int magic_len = sizeof(SVN_REPOS_DUMPFILE_MAGIC_HEADER) - 1; 365 const char *p = strchr(versionstring, ':'); 366 int value; 367 368 if (p == NULL 369 || p != (versionstring + magic_len) 370 || strncmp(versionstring, 371 SVN_REPOS_DUMPFILE_MAGIC_HEADER, 372 magic_len)) 373 return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL, 374 _("Malformed dumpfile header '%s'"), 375 versionstring); 376 377 SVN_ERR(svn_cstring_atoi(&value, p + 1)); 378 379 if (value > SVN_REPOS_DUMPFILE_FORMAT_VERSION) 380 return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL, 381 _("Unsupported dumpfile version: %d"), 382 value); 383 384 *version = value; 385 return SVN_NO_ERROR; 386} 387 388 389 390/*----------------------------------------------------------------------*/ 391 392/** The public routines **/ 393 394svn_error_t * 395svn_repos_parse_dumpstream3(svn_stream_t *stream, 396 const svn_repos_parse_fns3_t *parse_fns, 397 void *parse_baton, 398 svn_boolean_t deltas_are_text, 399 svn_cancel_func_t cancel_func, 400 void *cancel_baton, 401 apr_pool_t *pool) 402{ 403 svn_boolean_t eof; 404 svn_stringbuf_t *linebuf; 405 void *rev_baton = NULL; 406 char *buffer = apr_palloc(pool, SVN__STREAM_CHUNK_SIZE); 407 apr_size_t buflen = SVN__STREAM_CHUNK_SIZE; 408 apr_pool_t *linepool = svn_pool_create(pool); 409 apr_pool_t *revpool = svn_pool_create(pool); 410 apr_pool_t *nodepool = svn_pool_create(pool); 411 int version; 412 413 SVN_ERR(svn_stream_readline(stream, &linebuf, "\n", &eof, linepool)); 414 if (eof) 415 return stream_ran_dry(); 416 417 /* The first two lines of the stream are the dumpfile-format version 418 number, and a blank line. To preserve backward compatibility, 419 don't assume the existence of newer parser-vtable functions. */ 420 SVN_ERR(parse_format_version(&version, linebuf->data)); 421 if (parse_fns->magic_header_record != NULL) 422 SVN_ERR(parse_fns->magic_header_record(version, parse_baton, pool)); 423 424 /* A dumpfile "record" is defined to be a header-block of 425 rfc822-style headers, possibly followed by a content-block. 426 427 - A header-block is always terminated by a single blank line (\n\n) 428 429 - We know whether the record has a content-block by looking for 430 a 'Content-length:' header. The content-block will always be 431 of a specific length, plus an extra newline. 432 433 Once a record is fully sucked from the stream, an indeterminate 434 number of blank lines (or lines that begin with whitespace) may 435 follow before the next record (or the end of the stream.) 436 */ 437 438 while (1) 439 { 440 apr_hash_t *headers; 441 void *node_baton; 442 svn_boolean_t found_node = FALSE; 443 svn_boolean_t old_v1_with_cl = FALSE; 444 const char *content_length; 445 const char *prop_cl; 446 const char *text_cl; 447 const char *value; 448 svn_filesize_t actual_prop_length; 449 450 /* Clear our per-line pool. */ 451 svn_pool_clear(linepool); 452 453 /* Check for cancellation. */ 454 if (cancel_func) 455 SVN_ERR(cancel_func(cancel_baton)); 456 457 /* Keep reading blank lines until we discover a new record, or until 458 the stream runs out. */ 459 SVN_ERR(svn_stream_readline(stream, &linebuf, "\n", &eof, linepool)); 460 461 if (eof) 462 { 463 if (svn_stringbuf_isempty(linebuf)) 464 break; /* end of stream, go home. */ 465 else 466 return stream_ran_dry(); 467 } 468 469 if ((linebuf->len == 0) || (svn_ctype_isspace(linebuf->data[0]))) 470 continue; /* empty line ... loop */ 471 472 /*** Found the beginning of a new record. ***/ 473 474 /* The last line we read better be a header of some sort. 475 Read the whole header-block into a hash. */ 476 SVN_ERR(read_header_block(stream, linebuf, &headers, linepool)); 477 478 /*** Handle the various header blocks. ***/ 479 480 /* Is this a revision record? */ 481 if (svn_hash_gets(headers, SVN_REPOS_DUMPFILE_REVISION_NUMBER)) 482 { 483 /* If we already have a rev_baton open, we need to close it 484 and clear the per-revision subpool. */ 485 if (rev_baton != NULL) 486 { 487 SVN_ERR(parse_fns->close_revision(rev_baton)); 488 svn_pool_clear(revpool); 489 } 490 491 SVN_ERR(parse_fns->new_revision_record(&rev_baton, 492 headers, parse_baton, 493 revpool)); 494 } 495 /* Or is this, perhaps, a node record? */ 496 else if (svn_hash_gets(headers, SVN_REPOS_DUMPFILE_NODE_PATH)) 497 { 498 SVN_ERR(parse_fns->new_node_record(&node_baton, 499 headers, 500 rev_baton, 501 nodepool)); 502 found_node = TRUE; 503 } 504 /* Or is this the repos UUID? */ 505 else if ((value = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_UUID))) 506 { 507 SVN_ERR(parse_fns->uuid_record(value, parse_baton, pool)); 508 } 509 /* Or perhaps a dumpfile format? */ 510 /* ### TODO: use parse_format_version */ 511 else if ((value = svn_hash_gets(headers, 512 SVN_REPOS_DUMPFILE_MAGIC_HEADER))) 513 { 514 /* ### someday, switch modes of operation here. */ 515 SVN_ERR(svn_cstring_atoi(&version, value)); 516 } 517 /* Or is this bogosity?! */ 518 else 519 { 520 /* What the heck is this record?!? */ 521 return svn_error_create(SVN_ERR_STREAM_MALFORMED_DATA, NULL, 522 _("Unrecognized record type in stream")); 523 } 524 525 /* Need 3 values below to determine v1 dump type 526 527 Old (pre 0.14?) v1 dumps don't have Prop-content-length 528 and Text-content-length fields, but always have a properties 529 block in a block with Content-Length > 0 */ 530 531 content_length = svn_hash_gets(headers, 532 SVN_REPOS_DUMPFILE_CONTENT_LENGTH); 533 prop_cl = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_PROP_CONTENT_LENGTH); 534 text_cl = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_TEXT_CONTENT_LENGTH); 535 old_v1_with_cl = 536 version == 1 && content_length && ! prop_cl && ! text_cl; 537 538 /* Is there a props content-block to parse? */ 539 if (prop_cl || old_v1_with_cl) 540 { 541 const char *delta = svn_hash_gets(headers, 542 SVN_REPOS_DUMPFILE_PROP_DELTA); 543 svn_boolean_t is_delta = (delta && strcmp(delta, "true") == 0); 544 545 /* First, remove all node properties, unless this is a delta 546 property block. */ 547 if (found_node && !is_delta) 548 SVN_ERR(parse_fns->remove_node_props(node_baton)); 549 550 SVN_ERR(parse_property_block 551 (stream, 552 svn__atoui64(prop_cl ? prop_cl : content_length), 553 parse_fns, 554 found_node ? node_baton : rev_baton, 555 parse_baton, 556 found_node, 557 &actual_prop_length, 558 found_node ? nodepool : revpool)); 559 } 560 561 /* Is there a text content-block to parse? */ 562 if (text_cl) 563 { 564 const char *delta = svn_hash_gets(headers, 565 SVN_REPOS_DUMPFILE_TEXT_DELTA); 566 svn_boolean_t is_delta = FALSE; 567 if (! deltas_are_text) 568 is_delta = (delta && strcmp(delta, "true") == 0); 569 570 SVN_ERR(parse_text_block(stream, 571 svn__atoui64(text_cl), 572 is_delta, 573 parse_fns, 574 found_node ? node_baton : rev_baton, 575 buffer, 576 buflen, 577 found_node ? nodepool : revpool)); 578 } 579 else if (old_v1_with_cl) 580 { 581 /* An old-v1 block with a Content-length might have a text block. 582 If the property block did not consume all the bytes of the 583 Content-length, then it clearly does have a text block. 584 If not, then we must deduce whether we have an *empty* text 585 block or an *absent* text block. The rules are: 586 - "Node-kind: file" blocks have an empty (i.e. present, but 587 zero-length) text block, since they represent a file 588 modification. Note that file-copied-text-unmodified blocks 589 have no Content-length - even if they should have contained 590 a modified property block, the pre-0.14 dumper forgets to 591 dump the modified properties. 592 - If it is not a file node, then it is a revision or directory, 593 and so has an absent text block. 594 */ 595 const char *node_kind; 596 svn_filesize_t cl_value = svn__atoui64(content_length) 597 - actual_prop_length; 598 599 if (cl_value || 600 ((node_kind = svn_hash_gets(headers, 601 SVN_REPOS_DUMPFILE_NODE_KIND)) 602 && strcmp(node_kind, "file") == 0) 603 ) 604 SVN_ERR(parse_text_block(stream, 605 cl_value, 606 FALSE, 607 parse_fns, 608 found_node ? node_baton : rev_baton, 609 buffer, 610 buflen, 611 found_node ? nodepool : revpool)); 612 } 613 614 /* if we have a content-length header, did we read all of it? 615 in case of an old v1, we *always* read all of it, because 616 text-content-length == content-length - prop-content-length 617 */ 618 if (content_length && ! old_v1_with_cl) 619 { 620 apr_size_t rlen, num_to_read; 621 svn_filesize_t remaining = 622 svn__atoui64(content_length) - 623 (prop_cl ? svn__atoui64(prop_cl) : 0) - 624 (text_cl ? svn__atoui64(text_cl) : 0); 625 626 627 if (remaining < 0) 628 return svn_error_create(SVN_ERR_STREAM_MALFORMED_DATA, NULL, 629 _("Sum of subblock sizes larger than " 630 "total block content length")); 631 632 /* Consume remaining bytes in this content block */ 633 while (remaining > 0) 634 { 635 if (remaining >= (svn_filesize_t)buflen) 636 rlen = buflen; 637 else 638 rlen = (apr_size_t) remaining; 639 640 num_to_read = rlen; 641 SVN_ERR(svn_stream_read_full(stream, buffer, &rlen)); 642 remaining -= rlen; 643 if (rlen != num_to_read) 644 return stream_ran_dry(); 645 } 646 } 647 648 /* If we just finished processing a node record, we need to 649 close the node record and clear the per-node subpool. */ 650 if (found_node) 651 { 652 SVN_ERR(parse_fns->close_node(node_baton)); 653 svn_pool_clear(nodepool); 654 } 655 656 /*** End of processing for one record. ***/ 657 658 } /* end of stream */ 659 660 /* Close out whatever revision we're in. */ 661 if (rev_baton != NULL) 662 SVN_ERR(parse_fns->close_revision(rev_baton)); 663 664 svn_pool_destroy(linepool); 665 svn_pool_destroy(revpool); 666 svn_pool_destroy(nodepool); 667 return SVN_NO_ERROR; 668} 669