reps-strings.c revision 299742
1/* reps-strings.c : intepreting representations with respect to strings 2 * 3 * ==================================================================== 4 * Licensed to the Apache Software Foundation (ASF) under one 5 * or more contributor license agreements. See the NOTICE file 6 * distributed with this work for additional information 7 * regarding copyright ownership. The ASF licenses this file 8 * to you under the Apache License, Version 2.0 (the 9 * "License"); you may not use this file except in compliance 10 * with the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, 15 * software distributed under the License is distributed on an 16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 * KIND, either express or implied. See the License for the 18 * specific language governing permissions and limitations 19 * under the License. 20 * ==================================================================== 21 */ 22 23#include <assert.h> 24 25#include "svn_fs.h" 26#include "svn_pools.h" 27 28#include "fs.h" 29#include "err.h" 30#include "trail.h" 31#include "reps-strings.h" 32 33#include "bdb/reps-table.h" 34#include "bdb/strings-table.h" 35 36#include "../libsvn_fs/fs-loader.h" 37#define SVN_WANT_BDB 38#include "svn_private_config.h" 39 40 41/*** Helper Functions ***/ 42 43 44/* Return non-zero iff REP is mutable under transaction TXN_ID. */ 45static svn_boolean_t rep_is_mutable(representation_t *rep, 46 const char *txn_id) 47{ 48 if ((! rep->txn_id) || (strcmp(rep->txn_id, txn_id) != 0)) 49 return FALSE; 50 return TRUE; 51} 52 53/* Helper macro that evaluates to an error message indicating that 54 the representation referred to by X has an unknown node kind. */ 55#define UNKNOWN_NODE_KIND(x) \ 56 svn_error_createf \ 57 (SVN_ERR_FS_CORRUPT, NULL, \ 58 _("Unknown node kind for representation '%s'"), x) 59 60/* Return a `fulltext' representation, allocated in POOL, which 61 * references the string STR_KEY. 62 * 63 * If TXN_ID is non-zero and non-NULL, make the representation mutable 64 * under that TXN_ID. 65 * 66 * If STR_KEY is non-null, copy it into an allocation from POOL. 67 * 68 * If MD5_CHECKSUM is non-null, use it as the MD5 checksum for the new 69 * rep; else initialize the rep with an all-zero (i.e., always 70 * successful) MD5 checksum. 71 * 72 * If SHA1_CHECKSUM is non-null, use it as the SHA1 checksum for the new 73 * rep; else initialize the rep with an all-zero (i.e., always 74 * successful) SHA1 checksum. 75 */ 76static representation_t * 77make_fulltext_rep(const char *str_key, 78 const char *txn_id, 79 svn_checksum_t *md5_checksum, 80 svn_checksum_t *sha1_checksum, 81 apr_pool_t *pool) 82 83{ 84 representation_t *rep = apr_pcalloc(pool, sizeof(*rep)); 85 if (txn_id && *txn_id) 86 rep->txn_id = apr_pstrdup(pool, txn_id); 87 rep->kind = rep_kind_fulltext; 88 rep->md5_checksum = svn_checksum_dup(md5_checksum, pool); 89 rep->sha1_checksum = svn_checksum_dup(sha1_checksum, pool); 90 rep->contents.fulltext.string_key 91 = str_key ? apr_pstrdup(pool, str_key) : NULL; 92 return rep; 93} 94 95 96/* Set *KEYS to an array of string keys gleaned from `delta' 97 representation REP. Allocate *KEYS in POOL. */ 98static svn_error_t * 99delta_string_keys(apr_array_header_t **keys, 100 const representation_t *rep, 101 apr_pool_t *pool) 102{ 103 const char *key; 104 int i; 105 apr_array_header_t *chunks; 106 107 if (rep->kind != rep_kind_delta) 108 return svn_error_create 109 (SVN_ERR_FS_GENERAL, NULL, 110 _("Representation is not of type 'delta'")); 111 112 /* Set up a convenience variable. */ 113 chunks = rep->contents.delta.chunks; 114 115 /* Initialize *KEYS to an empty array. */ 116 *keys = apr_array_make(pool, chunks->nelts, sizeof(key)); 117 if (! chunks->nelts) 118 return SVN_NO_ERROR; 119 120 /* Now, push the string keys for each window into *KEYS */ 121 for (i = 0; i < chunks->nelts; i++) 122 { 123 rep_delta_chunk_t *chunk = APR_ARRAY_IDX(chunks, i, rep_delta_chunk_t *); 124 125 key = apr_pstrdup(pool, chunk->string_key); 126 APR_ARRAY_PUSH(*keys, const char *) = key; 127 } 128 129 return SVN_NO_ERROR; 130} 131 132 133/* Delete the strings associated with array KEYS in FS as part of TRAIL. */ 134static svn_error_t * 135delete_strings(const apr_array_header_t *keys, 136 svn_fs_t *fs, 137 trail_t *trail, 138 apr_pool_t *pool) 139{ 140 int i; 141 const char *str_key; 142 apr_pool_t *subpool = svn_pool_create(pool); 143 144 for (i = 0; i < keys->nelts; i++) 145 { 146 svn_pool_clear(subpool); 147 str_key = APR_ARRAY_IDX(keys, i, const char *); 148 SVN_ERR(svn_fs_bdb__string_delete(fs, str_key, trail, subpool)); 149 } 150 svn_pool_destroy(subpool); 151 return SVN_NO_ERROR; 152} 153 154 155 156/*** Reading the contents from a representation. ***/ 157 158struct compose_handler_baton 159{ 160 /* The combined window, and the pool it's allocated from. */ 161 svn_txdelta_window_t *window; 162 apr_pool_t *window_pool; 163 164 /* If the incoming window was self-compressed, and the combined WINDOW 165 exists from previous iterations, SOURCE_BUF will point to the 166 expanded self-compressed window. */ 167 char *source_buf; 168 169 /* The trail for this operation. WINDOW_POOL will be a child of 170 TRAIL->pool. No allocations will be made from TRAIL->pool itself. */ 171 trail_t *trail; 172 173 /* TRUE when no more windows have to be read/combined. */ 174 svn_boolean_t done; 175 176 /* TRUE if we've just started reading a new window. We need this 177 because the svndiff handler will push a NULL window at the end of 178 the stream, and we have to ignore that; but we must also know 179 when it's appropriate to push a NULL window at the combiner. */ 180 svn_boolean_t init; 181}; 182 183 184/* Handle one window. If BATON is emtpy, copy the WINDOW into it; 185 otherwise, combine WINDOW with the one in BATON, unless WINDOW 186 is self-compressed (i.e., does not copy from the source view), 187 in which case expand. */ 188 189static svn_error_t * 190compose_handler(svn_txdelta_window_t *window, void *baton) 191{ 192 struct compose_handler_baton *cb = baton; 193 SVN_ERR_ASSERT(!cb->done || window == NULL); 194 SVN_ERR_ASSERT(cb->trail && cb->trail->pool); 195 196 if (!cb->init && !window) 197 return SVN_NO_ERROR; 198 199 /* We should never get here if we've already expanded a 200 self-compressed window. */ 201 SVN_ERR_ASSERT(!cb->source_buf); 202 203 if (cb->window) 204 { 205 if (window && (window->sview_len == 0 || window->src_ops == 0)) 206 { 207 /* This is a self-compressed window. Don't combine it with 208 the others, because the combiner may go quadratic. Instead, 209 expand it here and signal that the combination has 210 ended. */ 211 apr_size_t source_len = window->tview_len; 212 SVN_ERR_ASSERT(cb->window->sview_len == source_len); 213 cb->source_buf = apr_palloc(cb->window_pool, source_len); 214 svn_txdelta_apply_instructions(window, NULL, 215 cb->source_buf, &source_len); 216 cb->done = TRUE; 217 } 218 else 219 { 220 /* Combine the incoming window with whatever's in the baton. */ 221 apr_pool_t *composite_pool = svn_pool_create(cb->trail->pool); 222 svn_txdelta_window_t *composite; 223 224 composite = svn_txdelta_compose_windows(window, cb->window, 225 composite_pool); 226 svn_pool_destroy(cb->window_pool); 227 cb->window = composite; 228 cb->window_pool = composite_pool; 229 cb->done = (composite->sview_len == 0 || composite->src_ops == 0); 230 } 231 } 232 else if (window) 233 { 234 /* Copy the (first) window into the baton. */ 235 apr_pool_t *window_pool = svn_pool_create(cb->trail->pool); 236 SVN_ERR_ASSERT(cb->window_pool == NULL); 237 cb->window = svn_txdelta_window_dup(window, window_pool); 238 cb->window_pool = window_pool; 239 cb->done = (window->sview_len == 0 || window->src_ops == 0); 240 } 241 else 242 cb->done = TRUE; 243 244 cb->init = FALSE; 245 return SVN_NO_ERROR; 246} 247 248 249 250/* Read one delta window from REP[CUR_CHUNK] and push it at the 251 composition handler. */ 252 253static svn_error_t * 254get_one_window(struct compose_handler_baton *cb, 255 svn_fs_t *fs, 256 representation_t *rep, 257 int cur_chunk) 258{ 259 svn_stream_t *wstream; 260 char diffdata[4096]; /* hunk of svndiff data */ 261 svn_filesize_t off; /* offset into svndiff data */ 262 apr_size_t amt; /* how much svndiff data to/was read */ 263 const char *str_key; 264 265 apr_array_header_t *chunks = rep->contents.delta.chunks; 266 rep_delta_chunk_t *this_chunk, *first_chunk; 267 268 cb->init = TRUE; 269 if (chunks->nelts <= cur_chunk) 270 return compose_handler(NULL, cb); 271 272 /* Set up a window handling stream for the svndiff data. */ 273 wstream = svn_txdelta_parse_svndiff(compose_handler, cb, TRUE, 274 cb->trail->pool); 275 276 /* First things first: send the "SVN"{version} header through the 277 stream. ### For now, we will just use the version specified 278 in the first chunk, and then verify that no chunks have a 279 different version number than the one used. In the future, 280 we might simply convert chunks that use a different version 281 of the diff format -- or, heck, a different format 282 altogether -- to the format/version of the first chunk. */ 283 first_chunk = APR_ARRAY_IDX(chunks, 0, rep_delta_chunk_t*); 284 diffdata[0] = 'S'; 285 diffdata[1] = 'V'; 286 diffdata[2] = 'N'; 287 diffdata[3] = (char) (first_chunk->version); 288 amt = 4; 289 SVN_ERR(svn_stream_write(wstream, diffdata, &amt)); 290 /* FIXME: The stream write handler is borked; assert (amt == 4); */ 291 292 /* Get this string key which holds this window's data. 293 ### todo: make sure this is an `svndiff' DIFF skel here. */ 294 this_chunk = APR_ARRAY_IDX(chunks, cur_chunk, rep_delta_chunk_t*); 295 str_key = this_chunk->string_key; 296 297 /* Run through the svndiff data, at least as far as necessary. */ 298 off = 0; 299 do 300 { 301 amt = sizeof(diffdata); 302 SVN_ERR(svn_fs_bdb__string_read(fs, str_key, diffdata, 303 off, &amt, cb->trail, 304 cb->trail->pool)); 305 off += amt; 306 SVN_ERR(svn_stream_write(wstream, diffdata, &amt)); 307 } 308 while (amt != 0); 309 SVN_ERR(svn_stream_close(wstream)); 310 311 SVN_ERR_ASSERT(!cb->init); 312 SVN_ERR_ASSERT(cb->window != NULL); 313 SVN_ERR_ASSERT(cb->window_pool != NULL); 314 return SVN_NO_ERROR; 315} 316 317 318/* Undeltify a range of data. DELTAS is the set of delta windows to 319 combine, FULLTEXT is the source text, CUR_CHUNK is the index of the 320 delta chunk we're starting from. OFFSET is the relative offset of 321 the requested data within the chunk; BUF and LEN are what we're 322 undeltifying to. */ 323 324static svn_error_t * 325rep_undeltify_range(svn_fs_t *fs, 326 const apr_array_header_t *deltas, 327 representation_t *fulltext, 328 int cur_chunk, 329 char *buf, 330 apr_size_t offset, 331 apr_size_t *len, 332 trail_t *trail, 333 apr_pool_t *pool) 334{ 335 apr_size_t len_read = 0; 336 337 do 338 { 339 struct compose_handler_baton cb = { 0 }; 340 char *source_buf, *target_buf; 341 apr_size_t target_len; 342 int cur_rep; 343 344 cb.trail = trail; 345 cb.done = FALSE; 346 for (cur_rep = 0; !cb.done && cur_rep < deltas->nelts; ++cur_rep) 347 { 348 representation_t *const rep = 349 APR_ARRAY_IDX(deltas, cur_rep, representation_t*); 350 SVN_ERR(get_one_window(&cb, fs, rep, cur_chunk)); 351 } 352 353 if (!cb.window) 354 /* That's it, no more source data is available. */ 355 break; 356 357 /* The source view length should not be 0 if there are source 358 copy ops in the window. */ 359 SVN_ERR_ASSERT(cb.window->sview_len > 0 || cb.window->src_ops == 0); 360 361 /* cb.window is the combined delta window. Read the source text 362 into a buffer. */ 363 if (cb.source_buf) 364 { 365 /* The combiner already created the source text from a 366 self-compressed window. */ 367 source_buf = cb.source_buf; 368 } 369 else if (fulltext && cb.window->sview_len > 0 && cb.window->src_ops > 0) 370 { 371 apr_size_t source_len = cb.window->sview_len; 372 source_buf = apr_palloc(cb.window_pool, source_len); 373 SVN_ERR(svn_fs_bdb__string_read 374 (fs, fulltext->contents.fulltext.string_key, 375 source_buf, cb.window->sview_offset, &source_len, 376 trail, pool)); 377 if (source_len != cb.window->sview_len) 378 return svn_error_create 379 (SVN_ERR_FS_CORRUPT, NULL, 380 _("Svndiff source length inconsistency")); 381 } 382 else 383 { 384 source_buf = NULL; /* Won't read anything from here. */ 385 } 386 387 if (offset > 0) 388 { 389 target_len = *len - len_read + offset; 390 target_buf = apr_palloc(cb.window_pool, target_len); 391 } 392 else 393 { 394 target_len = *len - len_read; 395 target_buf = buf; 396 } 397 398 svn_txdelta_apply_instructions(cb.window, source_buf, 399 target_buf, &target_len); 400 if (offset > 0) 401 { 402 SVN_ERR_ASSERT(target_len > offset); 403 target_len -= offset; 404 memcpy(buf, target_buf + offset, target_len); 405 offset = 0; /* Read from the beginning of the next chunk. */ 406 } 407 /* Don't need this window any more. */ 408 svn_pool_destroy(cb.window_pool); 409 410 len_read += target_len; 411 buf += target_len; 412 ++cur_chunk; 413 } 414 while (len_read < *len); 415 416 *len = len_read; 417 return SVN_NO_ERROR; 418} 419 420 421 422/* Calculate the index of the chunk in REP that contains REP_OFFSET, 423 and find the relative CHUNK_OFFSET within the chunk. 424 Return -1 if offset is beyond the end of the represented data. 425 ### The basic assumption is that all delta windows are the same size 426 and aligned at the same offset, so this number is the same in all 427 dependent deltas. Oh, and the chunks in REP must be ordered. */ 428 429static int 430get_chunk_offset(representation_t *rep, 431 svn_filesize_t rep_offset, 432 apr_size_t *chunk_offset) 433{ 434 const apr_array_header_t *chunks = rep->contents.delta.chunks; 435 int cur_chunk; 436 assert(chunks->nelts); 437 438 /* ### Yes, this is a linear search. I'll change this to bisection 439 the very second we notice it's slowing us down. */ 440 for (cur_chunk = 0; cur_chunk < chunks->nelts; ++cur_chunk) 441 { 442 const rep_delta_chunk_t *const this_chunk 443 = APR_ARRAY_IDX(chunks, cur_chunk, rep_delta_chunk_t*); 444 445 if ((this_chunk->offset + this_chunk->size) > rep_offset) 446 { 447 assert(this_chunk->offset <= rep_offset); 448 assert(rep_offset - this_chunk->offset < SVN_MAX_OBJECT_SIZE); 449 *chunk_offset = (apr_size_t) (rep_offset - this_chunk->offset); 450 return cur_chunk; 451 } 452 } 453 454 return -1; 455} 456 457/* Copy into BUF *LEN bytes starting at OFFSET from the string 458 represented via REP_KEY in FS, as part of TRAIL. 459 The number of bytes actually copied is stored in *LEN. */ 460static svn_error_t * 461rep_read_range(svn_fs_t *fs, 462 const char *rep_key, 463 svn_filesize_t offset, 464 char *buf, 465 apr_size_t *len, 466 trail_t *trail, 467 apr_pool_t *pool) 468{ 469 representation_t *rep; 470 apr_size_t chunk_offset; 471 472 /* Read in our REP. */ 473 SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool)); 474 if (rep->kind == rep_kind_fulltext) 475 { 476 SVN_ERR(svn_fs_bdb__string_read(fs, rep->contents.fulltext.string_key, 477 buf, offset, len, trail, pool)); 478 } 479 else if (rep->kind == rep_kind_delta) 480 { 481 const int cur_chunk = get_chunk_offset(rep, offset, &chunk_offset); 482 if (cur_chunk < 0) 483 *len = 0; 484 else 485 { 486 svn_error_t *err; 487 /* Preserve for potential use in error message. */ 488 const char *first_rep_key = rep_key; 489 /* Make a list of all the rep's we need to undeltify this range. 490 We'll have to read them within this trail anyway, so we might 491 as well do it once and up front. */ 492 apr_array_header_t *reps = apr_array_make(pool, 30, sizeof(rep)); 493 do 494 { 495 const rep_delta_chunk_t *const first_chunk 496 = APR_ARRAY_IDX(rep->contents.delta.chunks, 497 0, rep_delta_chunk_t*); 498 const rep_delta_chunk_t *const chunk 499 = APR_ARRAY_IDX(rep->contents.delta.chunks, 500 cur_chunk, rep_delta_chunk_t*); 501 502 /* Verify that this chunk is of the same version as the first. */ 503 if (first_chunk->version != chunk->version) 504 return svn_error_createf 505 (SVN_ERR_FS_CORRUPT, NULL, 506 _("Diff version inconsistencies in representation '%s'"), 507 rep_key); 508 509 rep_key = chunk->rep_key; 510 APR_ARRAY_PUSH(reps, representation_t *) = rep; 511 SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, 512 trail, pool)); 513 } 514 while (rep->kind == rep_kind_delta 515 && rep->contents.delta.chunks->nelts > cur_chunk); 516 517 /* Right. We've either just read the fulltext rep, or a rep that's 518 too short, in which case we'll undeltify without source data.*/ 519 if (rep->kind != rep_kind_delta && rep->kind != rep_kind_fulltext) 520 return UNKNOWN_NODE_KIND(rep_key); 521 522 if (rep->kind == rep_kind_delta) 523 rep = NULL; /* Don't use source data */ 524 525 err = rep_undeltify_range(fs, reps, rep, cur_chunk, buf, 526 chunk_offset, len, trail, pool); 527 if (err) 528 { 529 if (err->apr_err == SVN_ERR_FS_CORRUPT) 530 return svn_error_createf 531 (SVN_ERR_FS_CORRUPT, err, 532 _("Corruption detected whilst reading delta chain from " 533 "representation '%s' to '%s'"), first_rep_key, rep_key); 534 else 535 return svn_error_trace(err); 536 } 537 } 538 } 539 else /* unknown kind */ 540 return UNKNOWN_NODE_KIND(rep_key); 541 542 return SVN_NO_ERROR; 543} 544 545 546svn_error_t * 547svn_fs_base__get_mutable_rep(const char **new_rep_key, 548 const char *rep_key, 549 svn_fs_t *fs, 550 const char *txn_id, 551 trail_t *trail, 552 apr_pool_t *pool) 553{ 554 representation_t *rep = NULL; 555 const char *new_str = NULL; 556 557 /* We were passed an existing REP_KEY, so examine it. If it is 558 mutable already, then just return REP_KEY as the mutable result 559 key. */ 560 if (rep_key && (rep_key[0] != '\0')) 561 { 562 SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool)); 563 if (rep_is_mutable(rep, txn_id)) 564 { 565 *new_rep_key = rep_key; 566 return SVN_NO_ERROR; 567 } 568 } 569 570 /* Either we weren't provided a base key to examine, or the base key 571 we were provided was not mutable. So, let's make a new 572 representation and return its key to the caller. */ 573 SVN_ERR(svn_fs_bdb__string_append(fs, &new_str, 0, NULL, trail, pool)); 574 rep = make_fulltext_rep(new_str, txn_id, 575 svn_checksum_empty_checksum(svn_checksum_md5, 576 pool), 577 svn_checksum_empty_checksum(svn_checksum_sha1, 578 pool), 579 pool); 580 return svn_fs_bdb__write_new_rep(new_rep_key, fs, rep, trail, pool); 581} 582 583 584svn_error_t * 585svn_fs_base__delete_rep_if_mutable(svn_fs_t *fs, 586 const char *rep_key, 587 const char *txn_id, 588 trail_t *trail, 589 apr_pool_t *pool) 590{ 591 representation_t *rep; 592 593 SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool)); 594 if (! rep_is_mutable(rep, txn_id)) 595 return SVN_NO_ERROR; 596 597 if (rep->kind == rep_kind_fulltext) 598 { 599 SVN_ERR(svn_fs_bdb__string_delete(fs, 600 rep->contents.fulltext.string_key, 601 trail, pool)); 602 } 603 else if (rep->kind == rep_kind_delta) 604 { 605 apr_array_header_t *keys; 606 SVN_ERR(delta_string_keys(&keys, rep, pool)); 607 SVN_ERR(delete_strings(keys, fs, trail, pool)); 608 } 609 else /* unknown kind */ 610 return UNKNOWN_NODE_KIND(rep_key); 611 612 return svn_fs_bdb__delete_rep(fs, rep_key, trail, pool); 613} 614 615 616 617/*** Reading and writing data via representations. ***/ 618 619/** Reading. **/ 620 621struct rep_read_baton 622{ 623 /* The FS from which we're reading. */ 624 svn_fs_t *fs; 625 626 /* The representation skel whose contents we want to read. If this 627 is NULL, the rep has never had any contents, so all reads fetch 0 628 bytes. 629 630 Formerly, we cached the entire rep skel here, not just the key. 631 That way we didn't have to fetch the rep from the db every time 632 we want to read a little bit more of the file. Unfortunately, 633 this has a problem: if, say, a file's representation changes 634 while we're reading (changes from fulltext to delta, for 635 example), we'll never know it. So for correctness, we now 636 refetch the representation skel every time we want to read 637 another chunk. */ 638 const char *rep_key; 639 640 /* How many bytes have been read already. */ 641 svn_filesize_t offset; 642 643 /* If present, the read will be done as part of this trail, and the 644 trail's pool will be used. Otherwise, see `pool' below. */ 645 trail_t *trail; 646 647 /* MD5 checksum context. Initialized when the baton is created, updated as 648 we read data, and finalized when the stream is closed. */ 649 svn_checksum_ctx_t *md5_checksum_ctx; 650 651 /* Final resting place of the checksum created by md5_checksum_cxt. */ 652 svn_checksum_t *md5_checksum; 653 654 /* SHA1 checksum context. Initialized when the baton is created, updated as 655 we read data, and finalized when the stream is closed. */ 656 svn_checksum_ctx_t *sha1_checksum_ctx; 657 658 /* Final resting place of the checksum created by sha1_checksum_cxt. */ 659 svn_checksum_t *sha1_checksum; 660 661 /* The length of the rep's contents (as fulltext, that is, 662 independent of how the rep actually stores the data.) This is 663 retrieved when the baton is created, and used to determine when 664 we have read the last byte, at which point we compare checksums. 665 666 Getting this at baton creation time makes interleaved reads and 667 writes on the same rep in the same trail impossible. But we're 668 not doing that, and probably no one ever should. And anyway if 669 they do, they should see problems immediately. */ 670 svn_filesize_t size; 671 672 /* Set to FALSE when the baton is created, TRUE when the checksum_ctx 673 is digestified. */ 674 svn_boolean_t checksum_finalized; 675 676 /* Used for temporary allocations. This pool is cleared at the 677 start of each invocation of the relevant stream read function -- 678 see rep_read_contents(). */ 679 apr_pool_t *scratch_pool; 680 681}; 682 683 684static svn_error_t * 685rep_read_get_baton(struct rep_read_baton **rb_p, 686 svn_fs_t *fs, 687 const char *rep_key, 688 svn_boolean_t use_trail_for_reads, 689 trail_t *trail, 690 apr_pool_t *pool) 691{ 692 struct rep_read_baton *b; 693 694 b = apr_pcalloc(pool, sizeof(*b)); 695 b->md5_checksum_ctx = svn_checksum_ctx_create(svn_checksum_md5, pool); 696 b->sha1_checksum_ctx = svn_checksum_ctx_create(svn_checksum_sha1, pool); 697 698 if (rep_key) 699 SVN_ERR(svn_fs_base__rep_contents_size(&(b->size), fs, rep_key, 700 trail, pool)); 701 else 702 b->size = 0; 703 704 b->checksum_finalized = FALSE; 705 b->fs = fs; 706 b->trail = use_trail_for_reads ? trail : NULL; 707 b->scratch_pool = svn_pool_create(pool); 708 b->rep_key = rep_key; 709 b->offset = 0; 710 711 *rb_p = b; 712 713 return SVN_NO_ERROR; 714} 715 716 717 718/*** Retrieving data. ***/ 719 720svn_error_t * 721svn_fs_base__rep_contents_size(svn_filesize_t *size_p, 722 svn_fs_t *fs, 723 const char *rep_key, 724 trail_t *trail, 725 apr_pool_t *pool) 726{ 727 representation_t *rep; 728 729 SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool)); 730 731 if (rep->kind == rep_kind_fulltext) 732 { 733 /* Get the size by asking Berkeley for the string's length. */ 734 SVN_ERR(svn_fs_bdb__string_size(size_p, fs, 735 rep->contents.fulltext.string_key, 736 trail, pool)); 737 } 738 else if (rep->kind == rep_kind_delta) 739 { 740 /* Get the size by finding the last window pkg in the delta and 741 adding its offset to its size. This way, we won't even be 742 messed up by overlapping windows, as long as the window pkgs 743 are still ordered. */ 744 apr_array_header_t *chunks = rep->contents.delta.chunks; 745 rep_delta_chunk_t *last_chunk; 746 747 SVN_ERR_ASSERT(chunks->nelts); 748 749 last_chunk = APR_ARRAY_IDX(chunks, chunks->nelts - 1, 750 rep_delta_chunk_t *); 751 *size_p = last_chunk->offset + last_chunk->size; 752 } 753 else /* unknown kind */ 754 return UNKNOWN_NODE_KIND(rep_key); 755 756 return SVN_NO_ERROR; 757} 758 759 760svn_error_t * 761svn_fs_base__rep_contents_checksums(svn_checksum_t **md5_checksum, 762 svn_checksum_t **sha1_checksum, 763 svn_fs_t *fs, 764 const char *rep_key, 765 trail_t *trail, 766 apr_pool_t *pool) 767{ 768 representation_t *rep; 769 770 SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool)); 771 if (md5_checksum) 772 *md5_checksum = svn_checksum_dup(rep->md5_checksum, pool); 773 if (sha1_checksum) 774 *sha1_checksum = svn_checksum_dup(rep->sha1_checksum, pool); 775 776 return SVN_NO_ERROR; 777} 778 779 780svn_error_t * 781svn_fs_base__rep_contents(svn_string_t *str, 782 svn_fs_t *fs, 783 const char *rep_key, 784 trail_t *trail, 785 apr_pool_t *pool) 786{ 787 svn_filesize_t contents_size; 788 apr_size_t len; 789 char *data; 790 791 SVN_ERR(svn_fs_base__rep_contents_size(&contents_size, fs, rep_key, 792 trail, pool)); 793 794 /* What if the contents are larger than we can handle? */ 795 if (contents_size > SVN_MAX_OBJECT_SIZE) 796 return svn_error_createf 797 (SVN_ERR_FS_GENERAL, NULL, 798 _("Rep contents are too large: " 799 "got %s, limit is %s"), 800 apr_psprintf(pool, "%" SVN_FILESIZE_T_FMT, contents_size), 801 apr_psprintf(pool, "%" APR_SIZE_T_FMT, SVN_MAX_OBJECT_SIZE)); 802 else 803 str->len = (apr_size_t) contents_size; 804 805 data = apr_palloc(pool, str->len); 806 str->data = data; 807 len = str->len; 808 SVN_ERR(rep_read_range(fs, rep_key, 0, data, &len, trail, pool)); 809 810 /* Paranoia. */ 811 if (len != str->len) 812 return svn_error_createf 813 (SVN_ERR_FS_CORRUPT, NULL, 814 _("Failure reading representation '%s'"), rep_key); 815 816 /* Just the standard paranoia. */ 817 { 818 representation_t *rep; 819 svn_checksum_t *checksum, *rep_checksum; 820 821 SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool)); 822 rep_checksum = rep->sha1_checksum ? rep->sha1_checksum : rep->md5_checksum; 823 SVN_ERR(svn_checksum(&checksum, rep_checksum->kind, str->data, str->len, 824 pool)); 825 826 if (! svn_checksum_match(checksum, rep_checksum)) 827 return svn_error_create(SVN_ERR_FS_CORRUPT, 828 svn_checksum_mismatch_err(rep_checksum, checksum, pool, 829 _("Checksum mismatch on representation '%s'"), 830 rep_key), 831 NULL); 832 } 833 834 return SVN_NO_ERROR; 835} 836 837 838struct read_rep_args 839{ 840 struct rep_read_baton *rb; /* The data source. */ 841 char *buf; /* Where to put what we read. */ 842 apr_size_t *len; /* How much to read / was read. */ 843}; 844 845 846/* BATON is of type `read_rep_args': 847 848 Read into BATON->rb->buf the *(BATON->len) bytes starting at 849 BATON->rb->offset from the data represented at BATON->rb->rep_key 850 in BATON->rb->fs, as part of TRAIL. 851 852 Afterwards, *(BATON->len) is the number of bytes actually read, and 853 BATON->rb->offset is incremented by that amount. 854 855 If BATON->rb->rep_key is null, this is assumed to mean the file's 856 contents have no representation, i.e., the file has no contents. 857 In that case, if BATON->rb->offset > 0, return the error 858 SVN_ERR_FS_FILE_CONTENTS_CHANGED, else just set *(BATON->len) to 859 zero and return. */ 860static svn_error_t * 861txn_body_read_rep(void *baton, trail_t *trail) 862{ 863 struct read_rep_args *args = baton; 864 865 if (args->rb->rep_key) 866 { 867 SVN_ERR(rep_read_range(args->rb->fs, 868 args->rb->rep_key, 869 args->rb->offset, 870 args->buf, 871 args->len, 872 trail, 873 args->rb->scratch_pool)); 874 875 args->rb->offset += *(args->len); 876 877 /* We calculate the checksum just once, the moment we see the 878 * last byte of data. But we can't assume there was a short 879 * read. The caller may have known the length of the data and 880 * requested exactly that amount, so there would never be a 881 * short read. (That's why the read baton has to know the 882 * length of the data in advance.) 883 * 884 * On the other hand, some callers invoke the stream reader in a 885 * loop whose termination condition is that the read returned 886 * zero bytes of data -- which usually results in the read 887 * function being called one more time *after* the call that got 888 * a short read (indicating end-of-stream). 889 * 890 * The conditions below ensure that we compare checksums even 891 * when there is no short read associated with the last byte of 892 * data, while also ensuring that it's harmless to repeatedly 893 * read 0 bytes from the stream. 894 */ 895 if (! args->rb->checksum_finalized) 896 { 897 SVN_ERR(svn_checksum_update(args->rb->md5_checksum_ctx, args->buf, 898 *(args->len))); 899 SVN_ERR(svn_checksum_update(args->rb->sha1_checksum_ctx, args->buf, 900 *(args->len))); 901 902 if (args->rb->offset == args->rb->size) 903 { 904 representation_t *rep; 905 906 SVN_ERR(svn_checksum_final(&args->rb->md5_checksum, 907 args->rb->md5_checksum_ctx, 908 trail->pool)); 909 SVN_ERR(svn_checksum_final(&args->rb->sha1_checksum, 910 args->rb->sha1_checksum_ctx, 911 trail->pool)); 912 args->rb->checksum_finalized = TRUE; 913 914 SVN_ERR(svn_fs_bdb__read_rep(&rep, args->rb->fs, 915 args->rb->rep_key, 916 trail, trail->pool)); 917 918 if (rep->md5_checksum 919 && (! svn_checksum_match(rep->md5_checksum, 920 args->rb->md5_checksum))) 921 return svn_error_create(SVN_ERR_FS_CORRUPT, 922 svn_checksum_mismatch_err(rep->md5_checksum, 923 args->rb->md5_checksum, trail->pool, 924 _("MD5 checksum mismatch on representation '%s'"), 925 args->rb->rep_key), 926 NULL); 927 928 if (rep->sha1_checksum 929 && (! svn_checksum_match(rep->sha1_checksum, 930 args->rb->sha1_checksum))) 931 return svn_error_createf(SVN_ERR_FS_CORRUPT, 932 svn_checksum_mismatch_err(rep->sha1_checksum, 933 args->rb->sha1_checksum, trail->pool, 934 _("SHA1 checksum mismatch on representation '%s'"), 935 args->rb->rep_key), 936 NULL); 937 } 938 } 939 } 940 else if (args->rb->offset > 0) 941 { 942 return 943 svn_error_create 944 (SVN_ERR_FS_REP_CHANGED, NULL, 945 _("Null rep, but offset past zero already")); 946 } 947 else 948 *(args->len) = 0; 949 950 return SVN_NO_ERROR; 951} 952 953 954static svn_error_t * 955rep_read_contents(void *baton, char *buf, apr_size_t *len) 956{ 957 struct rep_read_baton *rb = baton; 958 struct read_rep_args args; 959 960 /* Clear the scratch pool of the results of previous invocations. */ 961 svn_pool_clear(rb->scratch_pool); 962 963 args.rb = rb; 964 args.buf = buf; 965 args.len = len; 966 967 /* If we got a trail, use it; else make one. */ 968 if (rb->trail) 969 SVN_ERR(txn_body_read_rep(&args, rb->trail)); 970 else 971 { 972 /* In the case of reading from the db, any returned data should 973 live in our pre-allocated buffer, so the whole operation can 974 happen within a single malloc/free cycle. This prevents us 975 from creating millions of unnecessary trail subpools when 976 reading a big file. */ 977 SVN_ERR(svn_fs_base__retry_txn(rb->fs, 978 txn_body_read_rep, 979 &args, 980 TRUE, 981 rb->scratch_pool)); 982 } 983 return SVN_NO_ERROR; 984} 985 986 987/** Writing. **/ 988 989 990struct rep_write_baton 991{ 992 /* The FS in which we're writing. */ 993 svn_fs_t *fs; 994 995 /* The representation skel whose contents we want to write. */ 996 const char *rep_key; 997 998 /* The transaction id under which this write action will take 999 place. */ 1000 const char *txn_id; 1001 1002 /* If present, do the write as part of this trail, and use trail's 1003 pool. Otherwise, see `pool' below. */ 1004 trail_t *trail; 1005 1006 /* SHA1 and MD5 checksums. Initialized when the baton is created, 1007 updated as we write data, and finalized and stored when the 1008 stream is closed. */ 1009 svn_checksum_ctx_t *md5_checksum_ctx; 1010 svn_checksum_t *md5_checksum; 1011 svn_checksum_ctx_t *sha1_checksum_ctx; 1012 svn_checksum_t *sha1_checksum; 1013 svn_boolean_t finalized; 1014 1015 /* Used for temporary allocations, iff `trail' (above) is null. */ 1016 apr_pool_t *pool; 1017 1018}; 1019 1020 1021static struct rep_write_baton * 1022rep_write_get_baton(svn_fs_t *fs, 1023 const char *rep_key, 1024 const char *txn_id, 1025 trail_t *trail, 1026 apr_pool_t *pool) 1027{ 1028 struct rep_write_baton *b; 1029 1030 b = apr_pcalloc(pool, sizeof(*b)); 1031 b->md5_checksum_ctx = svn_checksum_ctx_create(svn_checksum_md5, pool); 1032 b->sha1_checksum_ctx = svn_checksum_ctx_create(svn_checksum_sha1, pool); 1033 b->fs = fs; 1034 b->trail = trail; 1035 b->pool = pool; 1036 b->rep_key = rep_key; 1037 b->txn_id = txn_id; 1038 return b; 1039} 1040 1041 1042 1043/* Write LEN bytes from BUF into the end of the string represented via 1044 REP_KEY in FS, as part of TRAIL. If the representation is not 1045 mutable, return the error SVN_FS_REP_NOT_MUTABLE. */ 1046static svn_error_t * 1047rep_write(svn_fs_t *fs, 1048 const char *rep_key, 1049 const char *buf, 1050 apr_size_t len, 1051 const char *txn_id, 1052 trail_t *trail, 1053 apr_pool_t *pool) 1054{ 1055 representation_t *rep; 1056 1057 SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool)); 1058 1059 if (! rep_is_mutable(rep, txn_id)) 1060 return svn_error_createf 1061 (SVN_ERR_FS_REP_NOT_MUTABLE, NULL, 1062 _("Rep '%s' is not mutable"), rep_key); 1063 1064 if (rep->kind == rep_kind_fulltext) 1065 { 1066 SVN_ERR(svn_fs_bdb__string_append 1067 (fs, &(rep->contents.fulltext.string_key), len, buf, 1068 trail, pool)); 1069 } 1070 else if (rep->kind == rep_kind_delta) 1071 { 1072 /* There should never be a case when we have a mutable 1073 non-fulltext rep. The only code that creates mutable reps is 1074 in this file, and it creates them fulltext. */ 1075 return svn_error_createf 1076 (SVN_ERR_FS_CORRUPT, NULL, 1077 _("Rep '%s' both mutable and non-fulltext"), rep_key); 1078 } 1079 else /* unknown kind */ 1080 return UNKNOWN_NODE_KIND(rep_key); 1081 1082 return SVN_NO_ERROR; 1083} 1084 1085 1086struct write_rep_args 1087{ 1088 struct rep_write_baton *wb; /* Destination. */ 1089 const char *buf; /* Data. */ 1090 apr_size_t len; /* How much to write. */ 1091}; 1092 1093 1094/* BATON is of type `write_rep_args': 1095 Append onto BATON->wb->rep_key's contents BATON->len bytes of 1096 data from BATON->wb->buf, in BATON->rb->fs, as part of TRAIL. 1097 1098 If the representation is not mutable, return the error 1099 SVN_FS_REP_NOT_MUTABLE. */ 1100static svn_error_t * 1101txn_body_write_rep(void *baton, trail_t *trail) 1102{ 1103 struct write_rep_args *args = baton; 1104 1105 SVN_ERR(rep_write(args->wb->fs, 1106 args->wb->rep_key, 1107 args->buf, 1108 args->len, 1109 args->wb->txn_id, 1110 trail, 1111 trail->pool)); 1112 SVN_ERR(svn_checksum_update(args->wb->md5_checksum_ctx, 1113 args->buf, args->len)); 1114 SVN_ERR(svn_checksum_update(args->wb->sha1_checksum_ctx, 1115 args->buf, args->len)); 1116 return SVN_NO_ERROR; 1117} 1118 1119 1120static svn_error_t * 1121rep_write_contents(void *baton, 1122 const char *buf, 1123 apr_size_t *len) 1124{ 1125 struct rep_write_baton *wb = baton; 1126 struct write_rep_args args; 1127 1128 /* We toss LEN's indirectness because if not all the bytes are 1129 written, it's an error, so we wouldn't be reporting anything back 1130 through *LEN anyway. */ 1131 args.wb = wb; 1132 args.buf = buf; 1133 args.len = *len; 1134 1135 /* If we got a trail, use it; else make one. */ 1136 if (wb->trail) 1137 SVN_ERR(txn_body_write_rep(&args, wb->trail)); 1138 else 1139 { 1140 /* In the case of simply writing the rep to the db, we're 1141 *certain* that there's no data coming back to us that needs 1142 to be preserved... so the whole operation can happen within a 1143 single malloc/free cycle. This prevents us from creating 1144 millions of unnecessary trail subpools when writing a big 1145 file. */ 1146 SVN_ERR(svn_fs_base__retry_txn(wb->fs, 1147 txn_body_write_rep, 1148 &args, 1149 TRUE, 1150 wb->pool)); 1151 } 1152 1153 return SVN_NO_ERROR; 1154} 1155 1156 1157/* Helper for rep_write_close_contents(); see that doc string for 1158 more. BATON is of type `struct rep_write_baton'. */ 1159static svn_error_t * 1160txn_body_write_close_rep(void *baton, trail_t *trail) 1161{ 1162 struct rep_write_baton *wb = baton; 1163 representation_t *rep; 1164 1165 SVN_ERR(svn_fs_bdb__read_rep(&rep, wb->fs, wb->rep_key, 1166 trail, trail->pool)); 1167 rep->md5_checksum = svn_checksum_dup(wb->md5_checksum, trail->pool); 1168 rep->sha1_checksum = svn_checksum_dup(wb->sha1_checksum, trail->pool); 1169 return svn_fs_bdb__write_rep(wb->fs, wb->rep_key, rep, 1170 trail, trail->pool); 1171} 1172 1173 1174/* BATON is of type `struct rep_write_baton'. 1175 * 1176 * Finalize BATON->md5_context and store the resulting digest under 1177 * BATON->rep_key. 1178 */ 1179static svn_error_t * 1180rep_write_close_contents(void *baton) 1181{ 1182 struct rep_write_baton *wb = baton; 1183 1184 /* ### Thought: if we fixed apr-util MD5 contexts to allow repeated 1185 digestification, then we wouldn't need a stream close function at 1186 all -- instead, we could update the stored checksum each time a 1187 write occurred, which would have the added advantage of making 1188 interleaving reads and writes work. Currently, they'd fail with 1189 a checksum mismatch, it just happens that our code never tries to 1190 do that anyway. */ 1191 1192 if (! wb->finalized) 1193 { 1194 SVN_ERR(svn_checksum_final(&wb->md5_checksum, wb->md5_checksum_ctx, 1195 wb->pool)); 1196 SVN_ERR(svn_checksum_final(&wb->sha1_checksum, wb->sha1_checksum_ctx, 1197 wb->pool)); 1198 wb->finalized = TRUE; 1199 } 1200 1201 /* If we got a trail, use it; else make one. */ 1202 if (wb->trail) 1203 return txn_body_write_close_rep(wb, wb->trail); 1204 else 1205 /* We need to keep our trail pool around this time so the 1206 checksums we've calculated survive. */ 1207 return svn_fs_base__retry_txn(wb->fs, txn_body_write_close_rep, 1208 wb, FALSE, wb->pool); 1209} 1210 1211 1212/** Public read and write stream constructors. **/ 1213 1214svn_error_t * 1215svn_fs_base__rep_contents_read_stream(svn_stream_t **rs_p, 1216 svn_fs_t *fs, 1217 const char *rep_key, 1218 svn_boolean_t use_trail_for_reads, 1219 trail_t *trail, 1220 apr_pool_t *pool) 1221{ 1222 struct rep_read_baton *rb; 1223 1224 SVN_ERR(rep_read_get_baton(&rb, fs, rep_key, use_trail_for_reads, 1225 trail, pool)); 1226 *rs_p = svn_stream_create(rb, pool); 1227 svn_stream_set_read2(*rs_p, NULL /* only full read support */, 1228 rep_read_contents); 1229 1230 return SVN_NO_ERROR; 1231} 1232 1233 1234/* Clear the contents of REP_KEY, so that it represents the empty 1235 string, as part of TRAIL. TXN_ID is the id of the Subversion 1236 transaction under which this occurs. If REP_KEY is not mutable, 1237 return the error SVN_ERR_FS_REP_NOT_MUTABLE. */ 1238static svn_error_t * 1239rep_contents_clear(svn_fs_t *fs, 1240 const char *rep_key, 1241 const char *txn_id, 1242 trail_t *trail, 1243 apr_pool_t *pool) 1244{ 1245 representation_t *rep; 1246 const char *str_key; 1247 1248 SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool)); 1249 1250 /* Make sure it's mutable. */ 1251 if (! rep_is_mutable(rep, txn_id)) 1252 return svn_error_createf 1253 (SVN_ERR_FS_REP_NOT_MUTABLE, NULL, 1254 _("Rep '%s' is not mutable"), rep_key); 1255 1256 SVN_ERR_ASSERT(rep->kind == rep_kind_fulltext); 1257 1258 /* If rep has no string, just return success. Else, clear the 1259 underlying string. */ 1260 str_key = rep->contents.fulltext.string_key; 1261 if (str_key && *str_key) 1262 { 1263 SVN_ERR(svn_fs_bdb__string_clear(fs, str_key, trail, pool)); 1264 rep->md5_checksum = NULL; 1265 rep->sha1_checksum = NULL; 1266 SVN_ERR(svn_fs_bdb__write_rep(fs, rep_key, rep, trail, pool)); 1267 } 1268 return SVN_NO_ERROR; 1269} 1270 1271 1272svn_error_t * 1273svn_fs_base__rep_contents_write_stream(svn_stream_t **ws_p, 1274 svn_fs_t *fs, 1275 const char *rep_key, 1276 const char *txn_id, 1277 svn_boolean_t use_trail_for_writes, 1278 trail_t *trail, 1279 apr_pool_t *pool) 1280{ 1281 struct rep_write_baton *wb; 1282 1283 /* Clear the current rep contents (free mutability check!). */ 1284 SVN_ERR(rep_contents_clear(fs, rep_key, txn_id, trail, pool)); 1285 1286 /* Now, generate the write baton and stream. */ 1287 wb = rep_write_get_baton(fs, rep_key, txn_id, 1288 use_trail_for_writes ? trail : NULL, pool); 1289 *ws_p = svn_stream_create(wb, pool); 1290 svn_stream_set_write(*ws_p, rep_write_contents); 1291 svn_stream_set_close(*ws_p, rep_write_close_contents); 1292 1293 return SVN_NO_ERROR; 1294} 1295 1296 1297 1298/*** Deltified storage. ***/ 1299 1300/* Baton for svn_write_fn_t write_string_set(). */ 1301struct write_svndiff_strings_baton 1302{ 1303 /* The fs where lives the string we're writing. */ 1304 svn_fs_t *fs; 1305 1306 /* The key of the string we're writing to. Typically this is 1307 initialized to NULL, so svn_fs_base__string_append() can fill in a 1308 value. */ 1309 const char *key; 1310 1311 /* The amount of txdelta data written to the current 1312 string-in-progress. */ 1313 apr_size_t size; 1314 1315 /* The amount of svndiff header information we've written thus far 1316 to the strings table. */ 1317 apr_size_t header_read; 1318 1319 /* The version number of the svndiff data written. ### You'd better 1320 not count on this being populated after the first chunk is sent 1321 through the interface, since it lives at the 4th byte of the 1322 stream. */ 1323 apr_byte_t version; 1324 1325 /* The trail we're writing in. */ 1326 trail_t *trail; 1327 1328}; 1329 1330 1331/* Function of type `svn_write_fn_t', for writing to a collection of 1332 strings; BATON is `struct write_svndiff_strings_baton *'. 1333 1334 On the first call, BATON->key is null. A new string key in 1335 BATON->fs is chosen and stored in BATON->key; each call appends 1336 *LEN bytes from DATA onto the string. *LEN is never changed; if 1337 the write fails to write all *LEN bytes, an error is returned. 1338 BATON->size is used to track the total amount of data written via 1339 this handler, and must be reset by the caller to 0 when appropriate. */ 1340static svn_error_t * 1341write_svndiff_strings(void *baton, const char *data, apr_size_t *len) 1342{ 1343 struct write_svndiff_strings_baton *wb = baton; 1344 const char *buf = data; 1345 apr_size_t nheader = 0; 1346 1347 /* If we haven't stripped all the header information from this 1348 stream yet, keep stripping. If someone sends a first window 1349 through here that's shorter than 4 bytes long, this will probably 1350 cause a nuclear reactor meltdown somewhere in the American 1351 midwest. */ 1352 if (wb->header_read < 4) 1353 { 1354 nheader = 4 - wb->header_read; 1355 *len -= nheader; 1356 buf += nheader; 1357 wb->header_read += nheader; 1358 1359 /* If we have *now* read the full 4-byte header, check that 1360 least byte for the version number of the svndiff format. */ 1361 if (wb->header_read == 4) 1362 wb->version = *(buf - 1); 1363 } 1364 1365 /* Append to the current string we're writing (or create a new one 1366 if WB->key is NULL). */ 1367 SVN_ERR(svn_fs_bdb__string_append(wb->fs, &(wb->key), *len, 1368 buf, wb->trail, wb->trail->pool)); 1369 1370 /* Make sure we (still) have a key. */ 1371 if (wb->key == NULL) 1372 return svn_error_create(SVN_ERR_FS_GENERAL, NULL, 1373 _("Failed to get new string key")); 1374 1375 /* Restore *LEN to the value it *would* have been were it not for 1376 header stripping. */ 1377 *len += nheader; 1378 1379 /* Increment our running total of bytes written to this string. */ 1380 wb->size += *len; 1381 1382 return SVN_NO_ERROR; 1383} 1384 1385 1386typedef struct window_write_t 1387{ 1388 const char *key; /* string key for this window */ 1389 apr_size_t svndiff_len; /* amount of svndiff data written to the string */ 1390 svn_filesize_t text_off; /* offset of fulltext represented by this window */ 1391 apr_size_t text_len; /* amount of fulltext data represented by this window */ 1392 1393} window_write_t; 1394 1395 1396svn_error_t * 1397svn_fs_base__rep_deltify(svn_fs_t *fs, 1398 const char *target, 1399 const char *source, 1400 trail_t *trail, 1401 apr_pool_t *pool) 1402{ 1403 base_fs_data_t *bfd = fs->fsap_data; 1404 svn_stream_t *source_stream; /* stream to read the source */ 1405 svn_stream_t *target_stream; /* stream to read the target */ 1406 svn_txdelta_stream_t *txdelta_stream; /* stream to read delta windows */ 1407 1408 /* window-y things, and an array to track them */ 1409 window_write_t *ww; 1410 apr_array_header_t *windows; 1411 1412 /* stream to write new (deltified) target data and its baton */ 1413 svn_stream_t *new_target_stream; 1414 struct write_svndiff_strings_baton new_target_baton; 1415 1416 /* window handler/baton for writing to above stream */ 1417 svn_txdelta_window_handler_t new_target_handler; 1418 void *new_target_handler_baton; 1419 1420 /* yes, we do windows */ 1421 svn_txdelta_window_t *window; 1422 1423 /* The current offset into the fulltext that our window is about to 1424 write. This doubles, after all windows are written, as the 1425 total size of the svndiff data for the deltification process. */ 1426 svn_filesize_t tview_off = 0; 1427 1428 /* The total amount of diff data written while deltifying. */ 1429 svn_filesize_t diffsize = 0; 1430 1431 /* TARGET's original string keys */ 1432 apr_array_header_t *orig_str_keys; 1433 1434 /* The checksums for the representation's fulltext contents. */ 1435 svn_checksum_t *rep_md5_checksum; 1436 svn_checksum_t *rep_sha1_checksum; 1437 1438 /* MD5 digest */ 1439 const unsigned char *digest; 1440 1441 /* pool for holding the windows */ 1442 apr_pool_t *wpool; 1443 1444 /* Paranoia: never allow a rep to be deltified against itself, 1445 because then there would be no fulltext reachable in the delta 1446 chain, and badness would ensue. */ 1447 if (strcmp(target, source) == 0) 1448 return svn_error_createf 1449 (SVN_ERR_FS_CORRUPT, NULL, 1450 _("Attempt to deltify '%s' against itself"), 1451 target); 1452 1453 /* Set up a handler for the svndiff data, which will write each 1454 window to its own string in the `strings' table. */ 1455 new_target_baton.fs = fs; 1456 new_target_baton.trail = trail; 1457 new_target_baton.header_read = FALSE; 1458 new_target_stream = svn_stream_create(&new_target_baton, pool); 1459 svn_stream_set_write(new_target_stream, write_svndiff_strings); 1460 1461 /* Get streams to our source and target text data. */ 1462 SVN_ERR(svn_fs_base__rep_contents_read_stream(&source_stream, fs, source, 1463 TRUE, trail, pool)); 1464 SVN_ERR(svn_fs_base__rep_contents_read_stream(&target_stream, fs, target, 1465 TRUE, trail, pool)); 1466 1467 /* Setup a stream to convert the textdelta data into svndiff windows. */ 1468 svn_txdelta2(&txdelta_stream, source_stream, target_stream, TRUE, pool); 1469 1470 if (bfd->format >= SVN_FS_BASE__MIN_SVNDIFF1_FORMAT) 1471 svn_txdelta_to_svndiff3(&new_target_handler, &new_target_handler_baton, 1472 new_target_stream, 1, 1473 SVN_DELTA_COMPRESSION_LEVEL_DEFAULT, pool); 1474 else 1475 svn_txdelta_to_svndiff3(&new_target_handler, &new_target_handler_baton, 1476 new_target_stream, 0, 1477 SVN_DELTA_COMPRESSION_LEVEL_DEFAULT, pool); 1478 1479 /* subpool for the windows */ 1480 wpool = svn_pool_create(pool); 1481 1482 /* Now, loop, manufacturing and dispatching windows of svndiff data. */ 1483 windows = apr_array_make(pool, 1, sizeof(ww)); 1484 do 1485 { 1486 /* Reset some baton variables. */ 1487 new_target_baton.size = 0; 1488 new_target_baton.key = NULL; 1489 1490 /* Free the window. */ 1491 svn_pool_clear(wpool); 1492 1493 /* Fetch the next window of txdelta data. */ 1494 SVN_ERR(svn_txdelta_next_window(&window, txdelta_stream, wpool)); 1495 1496 /* Send off this package to be written as svndiff data. */ 1497 SVN_ERR(new_target_handler(window, new_target_handler_baton)); 1498 if (window) 1499 { 1500 /* Add a new window description to our array. */ 1501 ww = apr_pcalloc(pool, sizeof(*ww)); 1502 ww->key = new_target_baton.key; 1503 ww->svndiff_len = new_target_baton.size; 1504 ww->text_off = tview_off; 1505 ww->text_len = window->tview_len; 1506 APR_ARRAY_PUSH(windows, window_write_t *) = ww; 1507 1508 /* Update our recordkeeping variables. */ 1509 tview_off += window->tview_len; 1510 diffsize += ww->svndiff_len; 1511 } 1512 1513 } while (window); 1514 1515 svn_pool_destroy(wpool); 1516 1517 /* Having processed all the windows, we can query the MD5 digest 1518 from the stream. */ 1519 digest = svn_txdelta_md5_digest(txdelta_stream); 1520 if (! digest) 1521 return svn_error_createf 1522 (SVN_ERR_DELTA_MD5_CHECKSUM_ABSENT, NULL, 1523 _("Failed to calculate MD5 digest for '%s'"), 1524 source); 1525 1526 /* Construct a list of the strings used by the old representation so 1527 that we can delete them later. While we are here, if the old 1528 representation was a fulltext, check to make sure the delta we're 1529 replacing it with is actually smaller. (Don't perform this check 1530 if we're replacing a delta; in that case, we're going for a time 1531 optimization, not a space optimization.) */ 1532 { 1533 representation_t *old_rep; 1534 const char *str_key; 1535 1536 SVN_ERR(svn_fs_bdb__read_rep(&old_rep, fs, target, trail, pool)); 1537 if (old_rep->kind == rep_kind_fulltext) 1538 { 1539 svn_filesize_t old_size = 0; 1540 1541 str_key = old_rep->contents.fulltext.string_key; 1542 SVN_ERR(svn_fs_bdb__string_size(&old_size, fs, str_key, 1543 trail, pool)); 1544 orig_str_keys = apr_array_make(pool, 1, sizeof(str_key)); 1545 APR_ARRAY_PUSH(orig_str_keys, const char *) = str_key; 1546 1547 /* If the new data is NOT an space optimization, destroy the 1548 string(s) we created, and get outta here. */ 1549 if (diffsize >= old_size) 1550 { 1551 int i; 1552 for (i = 0; i < windows->nelts; i++) 1553 { 1554 ww = APR_ARRAY_IDX(windows, i, window_write_t *); 1555 SVN_ERR(svn_fs_bdb__string_delete(fs, ww->key, trail, pool)); 1556 } 1557 return SVN_NO_ERROR; 1558 } 1559 } 1560 else if (old_rep->kind == rep_kind_delta) 1561 SVN_ERR(delta_string_keys(&orig_str_keys, old_rep, pool)); 1562 else /* unknown kind */ 1563 return UNKNOWN_NODE_KIND(target); 1564 1565 /* Save the checksums, since the new rep needs them. */ 1566 rep_md5_checksum = svn_checksum_dup(old_rep->md5_checksum, pool); 1567 rep_sha1_checksum = svn_checksum_dup(old_rep->sha1_checksum, pool); 1568 } 1569 1570 /* Hook the new strings we wrote into the rest of the filesystem by 1571 building a new representation to replace our old one. */ 1572 { 1573 representation_t new_rep; 1574 rep_delta_chunk_t *chunk; 1575 apr_array_header_t *chunks; 1576 int i; 1577 1578 new_rep.kind = rep_kind_delta; 1579 new_rep.txn_id = NULL; 1580 1581 /* Migrate the old rep's checksums to the new rep. */ 1582 new_rep.md5_checksum = svn_checksum_dup(rep_md5_checksum, pool); 1583 new_rep.sha1_checksum = svn_checksum_dup(rep_sha1_checksum, pool); 1584 1585 chunks = apr_array_make(pool, windows->nelts, sizeof(chunk)); 1586 1587 /* Loop through the windows we wrote, creating and adding new 1588 chunks to the representation. */ 1589 for (i = 0; i < windows->nelts; i++) 1590 { 1591 ww = APR_ARRAY_IDX(windows, i, window_write_t *); 1592 1593 /* Allocate a chunk and its window */ 1594 chunk = apr_palloc(pool, sizeof(*chunk)); 1595 chunk->offset = ww->text_off; 1596 1597 /* Populate the window */ 1598 chunk->version = new_target_baton.version; 1599 chunk->string_key = ww->key; 1600 chunk->size = ww->text_len; 1601 chunk->rep_key = source; 1602 1603 /* Add this chunk to the array. */ 1604 APR_ARRAY_PUSH(chunks, rep_delta_chunk_t *) = chunk; 1605 } 1606 1607 /* Put the chunks array into the representation. */ 1608 new_rep.contents.delta.chunks = chunks; 1609 1610 /* Write out the new representation. */ 1611 SVN_ERR(svn_fs_bdb__write_rep(fs, target, &new_rep, trail, pool)); 1612 1613 /* Delete the original pre-deltified strings. */ 1614 SVN_ERR(delete_strings(orig_str_keys, fs, trail, pool)); 1615 } 1616 1617 return SVN_NO_ERROR; 1618} 1619