cached_data.c revision 298845
1/* cached_data.c --- cached (read) access to FSFS data 2 * 3 * ==================================================================== 4 * Licensed to the Apache Software Foundation (ASF) under one 5 * or more contributor license agreements. See the NOTICE file 6 * distributed with this work for additional information 7 * regarding copyright ownership. The ASF licenses this file 8 * to you under the Apache License, Version 2.0 (the 9 * "License"); you may not use this file except in compliance 10 * with the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, 15 * software distributed under the License is distributed on an 16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 * KIND, either express or implied. See the License for the 18 * specific language governing permissions and limitations 19 * under the License. 20 * ==================================================================== 21 */ 22 23#include "cached_data.h" 24 25#include <assert.h> 26 27#include "svn_hash.h" 28#include "svn_ctype.h" 29#include "svn_sorts.h" 30#include "private/svn_delta_private.h" 31#include "private/svn_io_private.h" 32#include "private/svn_sorts_private.h" 33#include "private/svn_subr_private.h" 34#include "private/svn_temp_serializer.h" 35 36#include "fs_fs.h" 37#include "id.h" 38#include "index.h" 39#include "low_level.h" 40#include "pack.h" 41#include "util.h" 42#include "temp_serializer.h" 43 44#include "../libsvn_fs/fs-loader.h" 45#include "../libsvn_delta/delta.h" /* for SVN_DELTA_WINDOW_SIZE */ 46 47#include "svn_private_config.h" 48 49/* forward-declare. See implementation for the docstring */ 50static svn_error_t * 51block_read(void **result, 52 svn_fs_t *fs, 53 svn_revnum_t revision, 54 apr_uint64_t item_index, 55 svn_fs_fs__revision_file_t *revision_file, 56 apr_pool_t *result_pool, 57 apr_pool_t *scratch_pool); 58 59 60/* Defined this to enable access logging via dgb__log_access 61#define SVN_FS_FS__LOG_ACCESS 62 */ 63 64/* When SVN_FS_FS__LOG_ACCESS has been defined, write a line to console 65 * showing where REVISION, ITEM_INDEX is located in FS and use ITEM to 66 * show details on it's contents if not NULL. To support format 6 and 67 * earlier repos, ITEM_TYPE (SVN_FS_FS__ITEM_TYPE_*) must match ITEM. 68 * Use SCRATCH_POOL for temporary allocations. 69 * 70 * For pre-format7 repos, the display will be restricted. 71 */ 72static svn_error_t * 73dbg_log_access(svn_fs_t *fs, 74 svn_revnum_t revision, 75 apr_uint64_t item_index, 76 void *item, 77 apr_uint32_t item_type, 78 apr_pool_t *scratch_pool) 79{ 80 /* no-op if this macro is not defined */ 81#ifdef SVN_FS_FS__LOG_ACCESS 82 fs_fs_data_t *ffd = fs->fsap_data; 83 apr_off_t end_offset = 0; 84 svn_fs_fs__p2l_entry_t *entry = NULL; 85 static const char *types[] = {"<n/a>", "frep ", "drep ", "fprop", "dprop", 86 "node ", "chgs ", "rep "}; 87 const char *description = ""; 88 const char *type = types[item_type]; 89 const char *pack = ""; 90 apr_off_t offset; 91 svn_fs_fs__revision_file_t *rev_file; 92 93 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, revision, 94 scratch_pool)); 95 96 /* determine rev / pack file offset */ 97 SVN_ERR(svn_fs_fs__item_offset(&offset, fs, rev_file, revision, NULL, 98 item_index, scratch_pool)); 99 100 /* constructing the pack file description */ 101 if (revision < ffd->min_unpacked_rev) 102 pack = apr_psprintf(scratch_pool, "%4ld|", 103 revision / ffd->max_files_per_dir); 104 105 /* construct description if possible */ 106 if (item_type == SVN_FS_FS__ITEM_TYPE_NODEREV && item != NULL) 107 { 108 node_revision_t *node = item; 109 const char *data_rep 110 = node->data_rep 111 ? apr_psprintf(scratch_pool, " d=%ld/%" APR_UINT64_T_FMT, 112 node->data_rep->revision, 113 node->data_rep->item_index) 114 : ""; 115 const char *prop_rep 116 = node->prop_rep 117 ? apr_psprintf(scratch_pool, " p=%ld/%" APR_UINT64_T_FMT, 118 node->prop_rep->revision, 119 node->prop_rep->item_index) 120 : ""; 121 description = apr_psprintf(scratch_pool, "%s (pc=%d%s%s)", 122 node->created_path, 123 node->predecessor_count, 124 data_rep, 125 prop_rep); 126 } 127 else if (item_type == SVN_FS_FS__ITEM_TYPE_ANY_REP) 128 { 129 svn_fs_fs__rep_header_t *header = item; 130 if (header == NULL) 131 description = " (txdelta window)"; 132 else if (header->type == svn_fs_fs__rep_plain) 133 description = " PLAIN"; 134 else if (header->type == svn_fs_fs__rep_self_delta) 135 description = " DELTA"; 136 else 137 description = apr_psprintf(scratch_pool, 138 " DELTA against %ld/%" APR_UINT64_T_FMT, 139 header->base_revision, 140 header->base_item_index); 141 } 142 else if (item_type == SVN_FS_FS__ITEM_TYPE_CHANGES && item != NULL) 143 { 144 apr_array_header_t *changes = item; 145 switch (changes->nelts) 146 { 147 case 0: description = " no change"; 148 break; 149 case 1: description = " 1 change"; 150 break; 151 default: description = apr_psprintf(scratch_pool, " %d changes", 152 changes->nelts); 153 } 154 } 155 156 /* some info is only available in format7 repos */ 157 if (svn_fs_fs__use_log_addressing(fs)) 158 { 159 /* reverse index lookup: get item description in ENTRY */ 160 SVN_ERR(svn_fs_fs__p2l_entry_lookup(&entry, fs, rev_file, revision, 161 offset, scratch_pool)); 162 if (entry) 163 { 164 /* more details */ 165 end_offset = offset + entry->size; 166 type = types[entry->type]; 167 } 168 169 /* line output */ 170 printf("%5s%4lx:%04lx -%4lx:%04lx %s %7ld %5"APR_UINT64_T_FMT" %s\n", 171 pack, (long)(offset / ffd->block_size), 172 (long)(offset % ffd->block_size), 173 (long)(end_offset / ffd->block_size), 174 (long)(end_offset % ffd->block_size), 175 type, revision, item_index, description); 176 } 177 else 178 { 179 /* reduced logging for format 6 and earlier */ 180 printf("%5s%10" APR_UINT64_T_HEX_FMT " %s %7ld %7" APR_UINT64_T_FMT \ 181 " %s\n", 182 pack, (apr_uint64_t)(offset), type, revision, item_index, 183 description); 184 } 185 186#endif 187 188 return SVN_NO_ERROR; 189} 190 191/* Convenience wrapper around svn_io_file_aligned_seek, taking filesystem 192 FS instead of a block size. */ 193static svn_error_t * 194aligned_seek(svn_fs_t *fs, 195 apr_file_t *file, 196 apr_off_t *buffer_start, 197 apr_off_t offset, 198 apr_pool_t *pool) 199{ 200 fs_fs_data_t *ffd = fs->fsap_data; 201 return svn_error_trace(svn_io_file_aligned_seek(file, ffd->block_size, 202 buffer_start, offset, 203 pool)); 204} 205 206/* Open the revision file for revision REV in filesystem FS and store 207 the newly opened file in FILE. Seek to location OFFSET before 208 returning. Perform temporary allocations in POOL. */ 209static svn_error_t * 210open_and_seek_revision(svn_fs_fs__revision_file_t **file, 211 svn_fs_t *fs, 212 svn_revnum_t rev, 213 apr_uint64_t item, 214 apr_pool_t *pool) 215{ 216 svn_fs_fs__revision_file_t *rev_file; 217 apr_off_t offset = -1; 218 219 SVN_ERR(svn_fs_fs__ensure_revision_exists(rev, fs, pool)); 220 221 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, rev, pool, pool)); 222 SVN_ERR(svn_fs_fs__item_offset(&offset, fs, rev_file, rev, NULL, item, 223 pool)); 224 225 SVN_ERR(aligned_seek(fs, rev_file->file, NULL, offset, pool)); 226 227 *file = rev_file; 228 229 return SVN_NO_ERROR; 230} 231 232/* Open the representation REP for a node-revision in filesystem FS, seek 233 to its position and store the newly opened file in FILE. Perform 234 temporary allocations in POOL. */ 235static svn_error_t * 236open_and_seek_transaction(svn_fs_fs__revision_file_t **file, 237 svn_fs_t *fs, 238 representation_t *rep, 239 apr_pool_t *pool) 240{ 241 apr_off_t offset; 242 243 SVN_ERR(svn_fs_fs__open_proto_rev_file(file, fs, &rep->txn_id, pool, pool)); 244 245 SVN_ERR(svn_fs_fs__item_offset(&offset, fs, NULL, SVN_INVALID_REVNUM, 246 &rep->txn_id, rep->item_index, pool)); 247 SVN_ERR(aligned_seek(fs, (*file)->file, NULL, offset, pool)); 248 249 return SVN_NO_ERROR; 250} 251 252/* Given a node-id ID, and a representation REP in filesystem FS, open 253 the correct file and seek to the correction location. Store this 254 file in *FILE_P. Perform any allocations in POOL. */ 255static svn_error_t * 256open_and_seek_representation(svn_fs_fs__revision_file_t **file_p, 257 svn_fs_t *fs, 258 representation_t *rep, 259 apr_pool_t *pool) 260{ 261 if (! svn_fs_fs__id_txn_used(&rep->txn_id)) 262 return open_and_seek_revision(file_p, fs, rep->revision, rep->item_index, 263 pool); 264 else 265 return open_and_seek_transaction(file_p, fs, rep, pool); 266} 267 268 269 270static svn_error_t * 271err_dangling_id(svn_fs_t *fs, const svn_fs_id_t *id) 272{ 273 svn_string_t *id_str = svn_fs_fs__id_unparse(id, fs->pool); 274 return svn_error_createf 275 (SVN_ERR_FS_ID_NOT_FOUND, 0, 276 _("Reference to non-existent node '%s' in filesystem '%s'"), 277 id_str->data, fs->path); 278} 279 280/* Return TRUE, if FS is of a format that supports block-read and the 281 feature has been enabled. */ 282static svn_boolean_t 283use_block_read(svn_fs_t *fs) 284{ 285 fs_fs_data_t *ffd = fs->fsap_data; 286 return svn_fs_fs__use_log_addressing(fs) && ffd->use_block_read; 287} 288 289/* Get the node-revision for the node ID in FS. 290 Set *NODEREV_P to the new node-revision structure, allocated in POOL. 291 See svn_fs_fs__get_node_revision, which wraps this and adds another 292 error. */ 293static svn_error_t * 294get_node_revision_body(node_revision_t **noderev_p, 295 svn_fs_t *fs, 296 const svn_fs_id_t *id, 297 apr_pool_t *result_pool, 298 apr_pool_t *scratch_pool) 299{ 300 svn_error_t *err; 301 svn_boolean_t is_cached = FALSE; 302 fs_fs_data_t *ffd = fs->fsap_data; 303 304 if (svn_fs_fs__id_is_txn(id)) 305 { 306 apr_file_t *file; 307 308 /* This is a transaction node-rev. Its storage logic is very 309 different from that of rev / pack files. */ 310 err = svn_io_file_open(&file, 311 svn_fs_fs__path_txn_node_rev(fs, id, 312 scratch_pool), 313 APR_READ | APR_BUFFERED, APR_OS_DEFAULT, 314 scratch_pool); 315 if (err) 316 { 317 if (APR_STATUS_IS_ENOENT(err->apr_err)) 318 { 319 svn_error_clear(err); 320 return svn_error_trace(err_dangling_id(fs, id)); 321 } 322 323 return svn_error_trace(err); 324 } 325 326 SVN_ERR(svn_fs_fs__read_noderev(noderev_p, 327 svn_stream_from_aprfile2(file, 328 FALSE, 329 scratch_pool), 330 result_pool, scratch_pool)); 331 } 332 else 333 { 334 svn_fs_fs__revision_file_t *revision_file; 335 336 /* noderevs in rev / pack files can be cached */ 337 const svn_fs_fs__id_part_t *rev_item = svn_fs_fs__id_rev_item(id); 338 pair_cache_key_t key = { 0 }; 339 key.revision = rev_item->revision; 340 key.second = rev_item->number; 341 342 /* Not found or not applicable. Try a noderev cache lookup. 343 * If that succeeds, we are done here. */ 344 if (ffd->node_revision_cache) 345 { 346 SVN_ERR(svn_cache__get((void **) noderev_p, 347 &is_cached, 348 ffd->node_revision_cache, 349 &key, 350 result_pool)); 351 if (is_cached) 352 return SVN_NO_ERROR; 353 } 354 355 /* read the data from disk */ 356 SVN_ERR(open_and_seek_revision(&revision_file, fs, 357 rev_item->revision, 358 rev_item->number, 359 scratch_pool)); 360 361 if (use_block_read(fs)) 362 { 363 /* block-read will parse the whole block and will also return 364 the one noderev that we need right now. */ 365 SVN_ERR(block_read((void **)noderev_p, fs, 366 rev_item->revision, 367 rev_item->number, 368 revision_file, 369 result_pool, 370 scratch_pool)); 371 } 372 else 373 { 374 /* physical addressing mode reading, parsing and caching */ 375 SVN_ERR(svn_fs_fs__read_noderev(noderev_p, 376 revision_file->stream, 377 result_pool, 378 scratch_pool)); 379 380 /* Workaround issue #4031: is-fresh-txn-root in revision files. */ 381 (*noderev_p)->is_fresh_txn_root = FALSE; 382 383 /* The noderev is not in cache, yet. Add it, if caching has been enabled. */ 384 if (ffd->node_revision_cache) 385 SVN_ERR(svn_cache__set(ffd->node_revision_cache, 386 &key, 387 *noderev_p, 388 scratch_pool)); 389 } 390 391 SVN_ERR(svn_fs_fs__close_revision_file(revision_file)); 392 } 393 394 return SVN_NO_ERROR; 395} 396 397svn_error_t * 398svn_fs_fs__get_node_revision(node_revision_t **noderev_p, 399 svn_fs_t *fs, 400 const svn_fs_id_t *id, 401 apr_pool_t *result_pool, 402 apr_pool_t *scratch_pool) 403{ 404 const svn_fs_fs__id_part_t *rev_item = svn_fs_fs__id_rev_item(id); 405 406 svn_error_t *err = get_node_revision_body(noderev_p, fs, id, 407 result_pool, scratch_pool); 408 if (err && err->apr_err == SVN_ERR_FS_CORRUPT) 409 { 410 svn_string_t *id_string = svn_fs_fs__id_unparse(id, scratch_pool); 411 return svn_error_createf(SVN_ERR_FS_CORRUPT, err, 412 "Corrupt node-revision '%s'", 413 id_string->data); 414 } 415 416 SVN_ERR(dbg_log_access(fs, 417 rev_item->revision, 418 rev_item->number, 419 *noderev_p, 420 SVN_FS_FS__ITEM_TYPE_NODEREV, 421 scratch_pool)); 422 423 return svn_error_trace(err); 424} 425 426 427/* Given a revision file REV_FILE, opened to REV in FS, find the Node-ID 428 of the header located at OFFSET and store it in *ID_P. Allocate 429 temporary variables from POOL. */ 430static svn_error_t * 431get_fs_id_at_offset(svn_fs_id_t **id_p, 432 svn_fs_fs__revision_file_t *rev_file, 433 svn_fs_t *fs, 434 svn_revnum_t rev, 435 apr_off_t offset, 436 apr_pool_t *pool) 437{ 438 node_revision_t *noderev; 439 440 SVN_ERR(aligned_seek(fs, rev_file->file, NULL, offset, pool)); 441 SVN_ERR(svn_fs_fs__read_noderev(&noderev, 442 rev_file->stream, 443 pool, pool)); 444 445 /* noderev->id is const, get rid of that */ 446 *id_p = svn_fs_fs__id_copy(noderev->id, pool); 447 448 /* assert that the txn_id is REV 449 * (asserting on offset would be harder because we the rev_offset is not 450 * known here) */ 451 assert(svn_fs_fs__id_rev(*id_p) == rev); 452 453 return SVN_NO_ERROR; 454} 455 456 457/* Given an open revision file REV_FILE in FS for REV, locate the trailer that 458 specifies the offset to the root node-id and to the changed path 459 information. Store the root node offset in *ROOT_OFFSET and the 460 changed path offset in *CHANGES_OFFSET. If either of these 461 pointers is NULL, do nothing with it. 462 463 Allocate temporary variables from POOL. */ 464static svn_error_t * 465get_root_changes_offset(apr_off_t *root_offset, 466 apr_off_t *changes_offset, 467 svn_fs_fs__revision_file_t *rev_file, 468 svn_fs_t *fs, 469 svn_revnum_t rev, 470 apr_pool_t *pool) 471{ 472 fs_fs_data_t *ffd = fs->fsap_data; 473 apr_off_t rev_offset; 474 apr_seek_where_t seek_relative; 475 svn_stringbuf_t *trailer; 476 char buffer[64]; 477 apr_off_t start; 478 apr_off_t end; 479 apr_size_t len; 480 481 /* Determine where to seek to in the file. 482 483 If we've got a pack file, we want to seek to the end of the desired 484 revision. But we don't track that, so we seek to the beginning of the 485 next revision. 486 487 Unless the next revision is in a different file, in which case, we can 488 just seek to the end of the pack file -- just like we do in the 489 non-packed case. */ 490 if (rev_file->is_packed && ((rev + 1) % ffd->max_files_per_dir != 0)) 491 { 492 SVN_ERR(svn_fs_fs__get_packed_offset(&end, fs, rev + 1, pool)); 493 seek_relative = APR_SET; 494 } 495 else 496 { 497 seek_relative = APR_END; 498 end = 0; 499 } 500 501 /* Offset of the revision from the start of the pack file, if applicable. */ 502 if (rev_file->is_packed) 503 SVN_ERR(svn_fs_fs__get_packed_offset(&rev_offset, fs, rev, pool)); 504 else 505 rev_offset = 0; 506 507 /* We will assume that the last line containing the two offsets 508 will never be longer than 64 characters. */ 509 SVN_ERR(svn_io_file_seek(rev_file->file, seek_relative, &end, pool)); 510 511 if (end < sizeof(buffer)) 512 { 513 len = (apr_size_t)end; 514 start = 0; 515 } 516 else 517 { 518 len = sizeof(buffer); 519 start = end - sizeof(buffer); 520 } 521 522 /* Read in this last block, from which we will identify the last line. */ 523 SVN_ERR(aligned_seek(fs, rev_file->file, NULL, start, pool)); 524 SVN_ERR(svn_io_file_read_full2(rev_file->file, buffer, len, NULL, NULL, 525 pool)); 526 527 /* Parse the last line. */ 528 trailer = svn_stringbuf_ncreate(buffer, len, pool); 529 SVN_ERR(svn_fs_fs__parse_revision_trailer(root_offset, 530 changes_offset, 531 trailer, 532 rev)); 533 534 /* return absolute offsets */ 535 if (root_offset) 536 *root_offset += rev_offset; 537 if (changes_offset) 538 *changes_offset += rev_offset; 539 540 return SVN_NO_ERROR; 541} 542 543svn_error_t * 544svn_fs_fs__rev_get_root(svn_fs_id_t **root_id_p, 545 svn_fs_t *fs, 546 svn_revnum_t rev, 547 apr_pool_t *result_pool, 548 apr_pool_t *scratch_pool) 549{ 550 fs_fs_data_t *ffd = fs->fsap_data; 551 SVN_ERR(svn_fs_fs__ensure_revision_exists(rev, fs, scratch_pool)); 552 553 if (svn_fs_fs__use_log_addressing(fs)) 554 { 555 *root_id_p = svn_fs_fs__id_create_root(rev, result_pool); 556 } 557 else 558 { 559 svn_fs_fs__revision_file_t *revision_file; 560 apr_off_t root_offset; 561 svn_fs_id_t *root_id = NULL; 562 svn_boolean_t is_cached; 563 564 SVN_ERR(svn_cache__get((void **) root_id_p, &is_cached, 565 ffd->rev_root_id_cache, &rev, result_pool)); 566 if (is_cached) 567 return SVN_NO_ERROR; 568 569 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&revision_file, fs, rev, 570 scratch_pool, scratch_pool)); 571 SVN_ERR(get_root_changes_offset(&root_offset, NULL, 572 revision_file, fs, rev, 573 scratch_pool)); 574 575 SVN_ERR(get_fs_id_at_offset(&root_id, revision_file, fs, rev, 576 root_offset, result_pool)); 577 578 SVN_ERR(svn_fs_fs__close_revision_file(revision_file)); 579 580 SVN_ERR(svn_cache__set(ffd->rev_root_id_cache, &rev, root_id, 581 scratch_pool)); 582 583 *root_id_p = root_id; 584 } 585 586 return SVN_NO_ERROR; 587} 588 589/* Describes a lazily opened rev / pack file. Instances will be shared 590 between multiple instances of rep_state_t. */ 591typedef struct shared_file_t 592{ 593 /* The opened file. NULL while file is not open, yet. */ 594 svn_fs_fs__revision_file_t *rfile; 595 596 /* file system to open the file in */ 597 svn_fs_t *fs; 598 599 /* a revision contained in the FILE. Since this file may be shared, 600 that value may be different from REP_STATE_T->REVISION. */ 601 svn_revnum_t revision; 602 603 /* pool to use when creating the FILE. This guarantees that the file 604 remains open / valid beyond the respective local context that required 605 the file to be opened eventually. */ 606 apr_pool_t *pool; 607} shared_file_t; 608 609/* Represents where in the current svndiff data block each 610 representation is. */ 611typedef struct rep_state_t 612{ 613 /* shared lazy-open rev/pack file structure */ 614 shared_file_t *sfile; 615 /* The txdelta window cache to use or NULL. */ 616 svn_cache__t *raw_window_cache; 617 /* Caches raw (unparsed) windows. May be NULL. */ 618 svn_cache__t *window_cache; 619 /* Caches un-deltified windows. May be NULL. */ 620 svn_cache__t *combined_cache; 621 /* revision containing the representation */ 622 svn_revnum_t revision; 623 /* representation's item index in REVISION */ 624 apr_uint64_t item_index; 625 /* length of the header at the start of the rep. 626 0 iff this is rep is stored in a container 627 (i.e. does not have a header) */ 628 apr_size_t header_size; 629 apr_off_t start; /* The starting offset for the raw 630 svndiff/plaintext data minus header. 631 -1 if the offset is yet unknown. */ 632 apr_off_t current;/* The current offset relative to START. */ 633 apr_off_t size; /* The on-disk size of the representation. */ 634 int ver; /* If a delta, what svndiff version? 635 -1 for unknown delta version. */ 636 int chunk_index; /* number of the window to read */ 637} rep_state_t; 638 639/* Simple wrapper around svn_fs_fs__get_file_offset to simplify callers. */ 640static svn_error_t * 641get_file_offset(apr_off_t *offset, 642 rep_state_t *rs, 643 apr_pool_t *pool) 644{ 645 return svn_error_trace(svn_fs_fs__get_file_offset(offset, 646 rs->sfile->rfile->file, 647 pool)); 648} 649 650/* Simple wrapper around svn_io_file_aligned_seek to simplify callers. */ 651static svn_error_t * 652rs_aligned_seek(rep_state_t *rs, 653 apr_off_t *buffer_start, 654 apr_off_t offset, 655 apr_pool_t *pool) 656{ 657 fs_fs_data_t *ffd = rs->sfile->fs->fsap_data; 658 return svn_error_trace(svn_io_file_aligned_seek(rs->sfile->rfile->file, 659 ffd->block_size, 660 buffer_start, offset, 661 pool)); 662} 663 664/* Open FILE->FILE and FILE->STREAM if they haven't been opened, yet. */ 665static svn_error_t* 666auto_open_shared_file(shared_file_t *file) 667{ 668 if (file->rfile == NULL) 669 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&file->rfile, file->fs, 670 file->revision, file->pool, 671 file->pool)); 672 673 return SVN_NO_ERROR; 674} 675 676/* Set RS->START to the begin of the representation raw in RS->FILE->FILE, 677 if that hasn't been done yet. Use POOL for temporary allocations. */ 678static svn_error_t* 679auto_set_start_offset(rep_state_t *rs, apr_pool_t *pool) 680{ 681 if (rs->start == -1) 682 { 683 SVN_ERR(svn_fs_fs__item_offset(&rs->start, rs->sfile->fs, 684 rs->sfile->rfile, rs->revision, NULL, 685 rs->item_index, pool)); 686 rs->start += rs->header_size; 687 } 688 689 return SVN_NO_ERROR; 690} 691 692/* Set RS->VER depending on what is found in the already open RS->FILE->FILE 693 if the diff version is still unknown. Use POOL for temporary allocations. 694 */ 695static svn_error_t* 696auto_read_diff_version(rep_state_t *rs, apr_pool_t *pool) 697{ 698 if (rs->ver == -1) 699 { 700 char buf[4]; 701 SVN_ERR(rs_aligned_seek(rs, NULL, rs->start, pool)); 702 SVN_ERR(svn_io_file_read_full2(rs->sfile->rfile->file, buf, 703 sizeof(buf), NULL, NULL, pool)); 704 705 /* ### Layering violation */ 706 if (! ((buf[0] == 'S') && (buf[1] == 'V') && (buf[2] == 'N'))) 707 return svn_error_create 708 (SVN_ERR_FS_CORRUPT, NULL, 709 _("Malformed svndiff data in representation")); 710 rs->ver = buf[3]; 711 712 rs->chunk_index = 0; 713 rs->current = 4; 714 } 715 716 return SVN_NO_ERROR; 717} 718 719/* See create_rep_state, which wraps this and adds another error. */ 720static svn_error_t * 721create_rep_state_body(rep_state_t **rep_state, 722 svn_fs_fs__rep_header_t **rep_header, 723 shared_file_t **shared_file, 724 representation_t *rep, 725 svn_fs_t *fs, 726 apr_pool_t *result_pool, 727 apr_pool_t *scratch_pool) 728{ 729 fs_fs_data_t *ffd = fs->fsap_data; 730 rep_state_t *rs = apr_pcalloc(result_pool, sizeof(*rs)); 731 svn_fs_fs__rep_header_t *rh; 732 svn_boolean_t is_cached = FALSE; 733 apr_uint64_t estimated_window_storage; 734 735 /* If the hint is 736 * - given, 737 * - refers to a valid revision, 738 * - refers to a packed revision, 739 * - as does the rep we want to read, and 740 * - refers to the same pack file as the rep 741 * we can re-use the same, already open file object 742 */ 743 svn_boolean_t reuse_shared_file 744 = shared_file && *shared_file && (*shared_file)->rfile 745 && SVN_IS_VALID_REVNUM((*shared_file)->revision) 746 && (*shared_file)->revision < ffd->min_unpacked_rev 747 && rep->revision < ffd->min_unpacked_rev 748 && ( ((*shared_file)->revision / ffd->max_files_per_dir) 749 == (rep->revision / ffd->max_files_per_dir)); 750 751 pair_cache_key_t key; 752 key.revision = rep->revision; 753 key.second = rep->item_index; 754 755 /* continue constructing RS and RA */ 756 rs->size = rep->size; 757 rs->revision = rep->revision; 758 rs->item_index = rep->item_index; 759 rs->raw_window_cache = ffd->raw_window_cache; 760 rs->ver = -1; 761 rs->start = -1; 762 763 /* Very long files stored as self-delta will produce a huge number of 764 delta windows. Don't cache them lest we don't thrash the cache. 765 Since we don't know the depth of the delta chain, let's assume, the 766 whole contents get rewritten 3 times. 767 */ 768 estimated_window_storage 769 = 4 * ( (rep->expanded_size ? rep->expanded_size : rep->size) 770 + SVN_DELTA_WINDOW_SIZE); 771 estimated_window_storage = MIN(estimated_window_storage, APR_SIZE_MAX); 772 773 rs->window_cache = ffd->txdelta_window_cache 774 && svn_cache__is_cachable(ffd->txdelta_window_cache, 775 (apr_size_t)estimated_window_storage) 776 ? ffd->txdelta_window_cache 777 : NULL; 778 rs->combined_cache = ffd->combined_window_cache 779 && svn_cache__is_cachable(ffd->combined_window_cache, 780 (apr_size_t)estimated_window_storage) 781 ? ffd->combined_window_cache 782 : NULL; 783 784 /* cache lookup, i.e. skip reading the rep header if possible */ 785 if (ffd->rep_header_cache && !svn_fs_fs__id_txn_used(&rep->txn_id)) 786 SVN_ERR(svn_cache__get((void **) &rh, &is_cached, 787 ffd->rep_header_cache, &key, result_pool)); 788 789 /* initialize the (shared) FILE member in RS */ 790 if (reuse_shared_file) 791 { 792 rs->sfile = *shared_file; 793 } 794 else 795 { 796 shared_file_t *file = apr_pcalloc(result_pool, sizeof(*file)); 797 file->revision = rep->revision; 798 file->pool = result_pool; 799 file->fs = fs; 800 rs->sfile = file; 801 802 /* remember the current file, if suggested by the caller */ 803 if (shared_file) 804 *shared_file = file; 805 } 806 807 /* read rep header, if necessary */ 808 if (!is_cached) 809 { 810 /* ensure file is open and navigate to the start of rep header */ 811 if (reuse_shared_file) 812 { 813 apr_off_t offset; 814 815 /* ... we can re-use the same, already open file object. 816 * This implies that we don't read from a txn. 817 */ 818 rs->sfile = *shared_file; 819 SVN_ERR(auto_open_shared_file(rs->sfile)); 820 SVN_ERR(svn_fs_fs__item_offset(&offset, fs, rs->sfile->rfile, 821 rep->revision, NULL, rep->item_index, 822 scratch_pool)); 823 SVN_ERR(rs_aligned_seek(rs, NULL, offset, scratch_pool)); 824 } 825 else 826 { 827 /* otherwise, create a new file object. May or may not be 828 * an in-txn file. 829 */ 830 SVN_ERR(open_and_seek_representation(&rs->sfile->rfile, fs, rep, 831 result_pool)); 832 } 833 834 SVN_ERR(svn_fs_fs__read_rep_header(&rh, rs->sfile->rfile->stream, 835 result_pool, scratch_pool)); 836 SVN_ERR(get_file_offset(&rs->start, rs, result_pool)); 837 838 /* populate the cache if appropriate */ 839 if (! svn_fs_fs__id_txn_used(&rep->txn_id)) 840 { 841 if (use_block_read(fs)) 842 SVN_ERR(block_read(NULL, fs, rep->revision, rep->item_index, 843 rs->sfile->rfile, result_pool, scratch_pool)); 844 else 845 if (ffd->rep_header_cache) 846 SVN_ERR(svn_cache__set(ffd->rep_header_cache, &key, rh, 847 scratch_pool)); 848 } 849 } 850 851 /* finalize */ 852 SVN_ERR(dbg_log_access(fs, rep->revision, rep->item_index, rh, 853 SVN_FS_FS__ITEM_TYPE_ANY_REP, scratch_pool)); 854 855 rs->header_size = rh->header_size; 856 *rep_state = rs; 857 *rep_header = rh; 858 859 if (rh->type == svn_fs_fs__rep_plain) 860 /* This is a plaintext, so just return the current rep_state. */ 861 return SVN_NO_ERROR; 862 863 /* skip "SVNx" diff marker */ 864 rs->current = 4; 865 866 return SVN_NO_ERROR; 867} 868 869/* Read the rep args for REP in filesystem FS and create a rep_state 870 for reading the representation. Return the rep_state in *REP_STATE 871 and the rep header in *REP_HEADER, both allocated in POOL. 872 873 When reading multiple reps, i.e. a skip delta chain, you may provide 874 non-NULL SHARED_FILE. (If SHARED_FILE is not NULL, in the first 875 call it should be a pointer to NULL.) The function will use this 876 variable to store the previous call results and tries to re-use it. 877 This may result in significant savings in I/O for packed files and 878 number of open file handles. 879 */ 880static svn_error_t * 881create_rep_state(rep_state_t **rep_state, 882 svn_fs_fs__rep_header_t **rep_header, 883 shared_file_t **shared_file, 884 representation_t *rep, 885 svn_fs_t *fs, 886 apr_pool_t *result_pool, 887 apr_pool_t *scratch_pool) 888{ 889 svn_error_t *err = create_rep_state_body(rep_state, rep_header, 890 shared_file, rep, fs, 891 result_pool, scratch_pool); 892 if (err && err->apr_err == SVN_ERR_FS_CORRUPT) 893 { 894 fs_fs_data_t *ffd = fs->fsap_data; 895 const char *rep_str; 896 897 /* ### This always returns "-1" for transaction reps, because 898 ### this particular bit of code doesn't know if the rep is 899 ### stored in the protorev or in the mutable area (for props 900 ### or dir contents). It is pretty rare for FSFS to *read* 901 ### from the protorev file, though, so this is probably OK. 902 ### And anyone going to debug corruption errors is probably 903 ### going to jump straight to this comment anyway! */ 904 rep_str = rep 905 ? svn_fs_fs__unparse_representation 906 (rep, ffd->format, TRUE, scratch_pool, scratch_pool)->data 907 : "(null)"; 908 909 return svn_error_createf(SVN_ERR_FS_CORRUPT, err, 910 "Corrupt representation '%s'", 911 rep_str); 912 } 913 /* ### Call representation_string() ? */ 914 return svn_error_trace(err); 915} 916 917svn_error_t * 918svn_fs_fs__check_rep(representation_t *rep, 919 svn_fs_t *fs, 920 void **hint, 921 apr_pool_t *scratch_pool) 922{ 923 if (svn_fs_fs__use_log_addressing(fs)) 924 { 925 apr_off_t offset; 926 svn_fs_fs__p2l_entry_t *entry; 927 svn_fs_fs__revision_file_t *rev_file = NULL; 928 929 /* Reuse the revision file provided by *HINT, if it is given and 930 * actually the rev / pack file that we want. */ 931 svn_revnum_t start_rev = svn_fs_fs__packed_base_rev(fs, rep->revision); 932 if (hint) 933 rev_file = *(svn_fs_fs__revision_file_t **)hint; 934 935 if (rev_file == NULL || rev_file->start_revision != start_rev) 936 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, rep->revision, 937 scratch_pool, scratch_pool)); 938 939 if (hint) 940 *hint = rev_file; 941 942 /* This will auto-retry if there was a background pack. */ 943 SVN_ERR(svn_fs_fs__item_offset(&offset, fs, rev_file, rep->revision, 944 NULL, rep->item_index, scratch_pool)); 945 946 /* This may fail if there is a background pack operation (can't auto- 947 retry because the item offset lookup has to be redone as well). */ 948 SVN_ERR(svn_fs_fs__p2l_entry_lookup(&entry, fs, rev_file, 949 rep->revision, offset, 950 scratch_pool, scratch_pool)); 951 952 if ( entry == NULL 953 || entry->type < SVN_FS_FS__ITEM_TYPE_FILE_REP 954 || entry->type > SVN_FS_FS__ITEM_TYPE_DIR_PROPS) 955 return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, 956 _("No representation found at offset %s " 957 "for item %s in revision %ld"), 958 apr_off_t_toa(scratch_pool, offset), 959 apr_psprintf(scratch_pool, 960 "%" APR_UINT64_T_FMT, 961 rep->item_index), 962 rep->revision); 963 } 964 else 965 { 966 rep_state_t *rs; 967 svn_fs_fs__rep_header_t *rep_header; 968 969 /* ### Should this be using read_rep_line() directly? */ 970 SVN_ERR(create_rep_state(&rs, &rep_header, (shared_file_t**)hint, 971 rep, fs, scratch_pool, scratch_pool)); 972 } 973 974 return SVN_NO_ERROR; 975} 976 977svn_error_t * 978svn_fs_fs__rep_chain_length(int *chain_length, 979 int *shard_count, 980 representation_t *rep, 981 svn_fs_t *fs, 982 apr_pool_t *scratch_pool) 983{ 984 fs_fs_data_t *ffd = fs->fsap_data; 985 svn_revnum_t shard_size = ffd->max_files_per_dir 986 ? ffd->max_files_per_dir 987 : 1; 988 apr_pool_t *subpool = svn_pool_create(scratch_pool); 989 apr_pool_t *iterpool = svn_pool_create(scratch_pool); 990 svn_boolean_t is_delta = FALSE; 991 int count = 0; 992 int shards = 1; 993 svn_revnum_t last_shard = rep->revision / shard_size; 994 995 /* Check whether the length of the deltification chain is acceptable. 996 * Otherwise, shared reps may form a non-skipping delta chain in 997 * extreme cases. */ 998 representation_t base_rep = *rep; 999 1000 /* re-use open files between iterations */ 1001 shared_file_t *file_hint = NULL; 1002 1003 svn_fs_fs__rep_header_t *header; 1004 1005 /* follow the delta chain towards the end but for at most 1006 * MAX_CHAIN_LENGTH steps. */ 1007 do 1008 { 1009 rep_state_t *rep_state; 1010 1011 svn_pool_clear(iterpool); 1012 1013 if (base_rep.revision / shard_size != last_shard) 1014 { 1015 last_shard = base_rep.revision / shard_size; 1016 ++shards; 1017 } 1018 1019 SVN_ERR(create_rep_state_body(&rep_state, 1020 &header, 1021 &file_hint, 1022 &base_rep, 1023 fs, 1024 subpool, 1025 iterpool)); 1026 1027 base_rep.revision = header->base_revision; 1028 base_rep.item_index = header->base_item_index; 1029 base_rep.size = header->base_length; 1030 svn_fs_fs__id_txn_reset(&base_rep.txn_id); 1031 is_delta = header->type == svn_fs_fs__rep_delta; 1032 1033 /* Clear it the SUBPOOL once in a while. Doing it too frequently 1034 * renders the FILE_HINT ineffective. Doing too infrequently, may 1035 * leave us with too many open file handles. 1036 * 1037 * Note that this is mostly about efficiency, with larger values 1038 * being more efficient, and any non-zero value is legal here. When 1039 * reading deltified contents, we may keep 10s of rev files open at 1040 * the same time and the system has to cope with that. Thus, the 1041 * limit of 16 chosen below is in the same ballpark. 1042 */ 1043 ++count; 1044 if (count % 16 == 0) 1045 { 1046 file_hint = NULL; 1047 svn_pool_clear(subpool); 1048 } 1049 } 1050 while (is_delta && base_rep.revision); 1051 1052 *chain_length = count; 1053 *shard_count = shards; 1054 svn_pool_destroy(subpool); 1055 svn_pool_destroy(iterpool); 1056 1057 return SVN_NO_ERROR; 1058} 1059 1060struct rep_read_baton 1061{ 1062 /* The FS from which we're reading. */ 1063 svn_fs_t *fs; 1064 1065 /* Representation to read. */ 1066 representation_t rep; 1067 1068 /* If not NULL, this is the base for the first delta window in rs_list */ 1069 svn_stringbuf_t *base_window; 1070 1071 /* The state of all prior delta representations. */ 1072 apr_array_header_t *rs_list; 1073 1074 /* The plaintext state, if there is a plaintext. */ 1075 rep_state_t *src_state; 1076 1077 /* The index of the current delta chunk, if we are reading a delta. */ 1078 int chunk_index; 1079 1080 /* The buffer where we store undeltified data. */ 1081 char *buf; 1082 apr_size_t buf_pos; 1083 apr_size_t buf_len; 1084 1085 /* A checksum context for summing the data read in order to verify it. 1086 Note: we don't need to use the sha1 checksum because we're only doing 1087 data verification, for which md5 is perfectly safe. */ 1088 svn_checksum_ctx_t *md5_checksum_ctx; 1089 1090 svn_boolean_t checksum_finalized; 1091 1092 /* The stored checksum of the representation we are reading, its 1093 length, and the amount we've read so far. Some of this 1094 information is redundant with rs_list and src_state, but it's 1095 convenient for the checksumming code to have it here. */ 1096 unsigned char md5_digest[APR_MD5_DIGESTSIZE]; 1097 1098 svn_filesize_t len; 1099 svn_filesize_t off; 1100 1101 /* The key for the fulltext cache for this rep, if there is a 1102 fulltext cache. */ 1103 pair_cache_key_t fulltext_cache_key; 1104 /* The text we've been reading, if we're going to cache it. */ 1105 svn_stringbuf_t *current_fulltext; 1106 1107 /* If not NULL, attempt to read the data from this cache. 1108 Once that lookup fails, reset it to NULL. */ 1109 svn_cache__t *fulltext_cache; 1110 1111 /* Bytes delivered from the FULLTEXT_CACHE so far. If the next 1112 lookup fails, we need to skip that much data from the reconstructed 1113 window stream before we continue normal operation. */ 1114 svn_filesize_t fulltext_delivered; 1115 1116 /* Used for temporary allocations during the read. */ 1117 apr_pool_t *pool; 1118 1119 /* Pool used to store file handles and other data that is persistant 1120 for the entire stream read. */ 1121 apr_pool_t *filehandle_pool; 1122}; 1123 1124/* Set window key in *KEY to address the window described by RS. 1125 For convenience, return the KEY. */ 1126static window_cache_key_t * 1127get_window_key(window_cache_key_t *key, rep_state_t *rs) 1128{ 1129 assert(rs->revision <= APR_UINT32_MAX); 1130 key->revision = (apr_uint32_t)rs->revision; 1131 key->item_index = rs->item_index; 1132 key->chunk_index = rs->chunk_index; 1133 1134 return key; 1135} 1136 1137/* Implement svn_cache__partial_getter_func_t for raw txdelta windows. 1138 * Parse the raw data and return a svn_fs_fs__txdelta_cached_window_t. 1139 */ 1140static svn_error_t * 1141parse_raw_window(void **out, 1142 const void *data, 1143 apr_size_t data_len, 1144 void *baton, 1145 apr_pool_t *result_pool) 1146{ 1147 svn_string_t raw_window; 1148 svn_stream_t *stream; 1149 1150 /* unparsed and parsed window */ 1151 const svn_fs_fs__raw_cached_window_t *window 1152 = (const svn_fs_fs__raw_cached_window_t *)data; 1153 svn_fs_fs__txdelta_cached_window_t *result 1154 = apr_pcalloc(result_pool, sizeof(*result)); 1155 1156 /* create a read stream taking the raw window as input */ 1157 raw_window.data = svn_temp_deserializer__ptr(window, 1158 (const void * const *)&window->window.data); 1159 raw_window.len = window->window.len; 1160 stream = svn_stream_from_string(&raw_window, result_pool); 1161 1162 /* parse it */ 1163 SVN_ERR(svn_txdelta_read_svndiff_window(&result->window, stream, 1, 1164 result_pool)); 1165 1166 /* complete the window and return it */ 1167 result->end_offset = window->end_offset; 1168 *out = result; 1169 1170 return SVN_NO_ERROR; 1171} 1172 1173 1174/* Read the WINDOW_P number CHUNK_INDEX for the representation given in 1175 * rep state RS from the current FSFS session's cache. This will be a 1176 * no-op and IS_CACHED will be set to FALSE if no cache has been given. 1177 * If a cache is available IS_CACHED will inform the caller about the 1178 * success of the lookup. Allocations of the window in will be made 1179 * from RESULT_POOL. Use SCRATCH_POOL for temporary allocations. 1180 * 1181 * If the information could be found, put RS to CHUNK_INDEX. 1182 */ 1183static svn_error_t * 1184get_cached_window(svn_txdelta_window_t **window_p, 1185 rep_state_t *rs, 1186 int chunk_index, 1187 svn_boolean_t *is_cached, 1188 apr_pool_t *result_pool, 1189 apr_pool_t *scratch_pool) 1190{ 1191 if (! rs->window_cache) 1192 { 1193 /* txdelta window has not been enabled */ 1194 *is_cached = FALSE; 1195 } 1196 else 1197 { 1198 /* ask the cache for the desired txdelta window */ 1199 svn_fs_fs__txdelta_cached_window_t *cached_window; 1200 window_cache_key_t key = { 0 }; 1201 get_window_key(&key, rs); 1202 key.chunk_index = chunk_index; 1203 SVN_ERR(svn_cache__get((void **) &cached_window, 1204 is_cached, 1205 rs->window_cache, 1206 &key, 1207 result_pool)); 1208 1209 /* If we did not find a parsed txdelta window, we might have a raw 1210 version of it in our cache. If so, read, parse and re-cache it. */ 1211 if (!*is_cached && rs->raw_window_cache) 1212 { 1213 SVN_ERR(svn_cache__get_partial((void **) &cached_window, is_cached, 1214 rs->raw_window_cache, &key, 1215 parse_raw_window, NULL, result_pool)); 1216 if (*is_cached) 1217 SVN_ERR(svn_cache__set(rs->window_cache, &key, cached_window, 1218 scratch_pool)); 1219 } 1220 1221 /* Return cached information. */ 1222 if (*is_cached) 1223 { 1224 /* found it. Pass it back to the caller. */ 1225 *window_p = cached_window->window; 1226 1227 /* manipulate the RS as if we just read the data */ 1228 rs->current = cached_window->end_offset; 1229 rs->chunk_index = chunk_index; 1230 } 1231 } 1232 1233 return SVN_NO_ERROR; 1234} 1235 1236/* Store the WINDOW read for the rep state RS in the current FSFS 1237 * session's cache. This will be a no-op if no cache has been given. 1238 * Temporary allocations will be made from SCRATCH_POOL. */ 1239static svn_error_t * 1240set_cached_window(svn_txdelta_window_t *window, 1241 rep_state_t *rs, 1242 apr_pool_t *scratch_pool) 1243{ 1244 if (rs->window_cache) 1245 { 1246 /* store the window and the first offset _past_ it */ 1247 svn_fs_fs__txdelta_cached_window_t cached_window; 1248 window_cache_key_t key = {0}; 1249 1250 cached_window.window = window; 1251 cached_window.end_offset = rs->current; 1252 1253 /* but key it with the start offset because that is the known state 1254 * when we will look it up */ 1255 SVN_ERR(svn_cache__set(rs->window_cache, 1256 get_window_key(&key, rs), 1257 &cached_window, 1258 scratch_pool)); 1259 } 1260 1261 return SVN_NO_ERROR; 1262} 1263 1264/* Read the WINDOW_P for the rep state RS from the current FSFS session's 1265 * cache. This will be a no-op and IS_CACHED will be set to FALSE if no 1266 * cache has been given. If a cache is available IS_CACHED will inform 1267 * the caller about the success of the lookup. Allocations (of the window 1268 * in particular) will be made from POOL. 1269 */ 1270static svn_error_t * 1271get_cached_combined_window(svn_stringbuf_t **window_p, 1272 rep_state_t *rs, 1273 svn_boolean_t *is_cached, 1274 apr_pool_t *pool) 1275{ 1276 if (! rs->combined_cache) 1277 { 1278 /* txdelta window has not been enabled */ 1279 *is_cached = FALSE; 1280 } 1281 else 1282 { 1283 /* ask the cache for the desired txdelta window */ 1284 window_cache_key_t key = { 0 }; 1285 return svn_cache__get((void **)window_p, 1286 is_cached, 1287 rs->combined_cache, 1288 get_window_key(&key, rs), 1289 pool); 1290 } 1291 1292 return SVN_NO_ERROR; 1293} 1294 1295/* Store the WINDOW read for the rep state RS in the current FSFS session's 1296 * cache. This will be a no-op if no cache has been given. 1297 * Temporary allocations will be made from SCRATCH_POOL. */ 1298static svn_error_t * 1299set_cached_combined_window(svn_stringbuf_t *window, 1300 rep_state_t *rs, 1301 apr_pool_t *scratch_pool) 1302{ 1303 if (rs->combined_cache) 1304 { 1305 /* but key it with the start offset because that is the known state 1306 * when we will look it up */ 1307 window_cache_key_t key = { 0 }; 1308 return svn_cache__set(rs->combined_cache, 1309 get_window_key(&key, rs), 1310 window, 1311 scratch_pool); 1312 } 1313 1314 return SVN_NO_ERROR; 1315} 1316 1317/* Build an array of rep_state structures in *LIST giving the delta 1318 reps from first_rep to a plain-text or self-compressed rep. Set 1319 *SRC_STATE to the plain-text rep we find at the end of the chain, 1320 or to NULL if the final delta representation is self-compressed. 1321 The representation to start from is designated by filesystem FS, id 1322 ID, and representation REP. 1323 Also, set *WINDOW_P to the base window content for *LIST, if it 1324 could be found in cache. Otherwise, *LIST will contain the base 1325 representation for the whole delta chain. 1326 Finally, return the expanded size of the representation in 1327 *EXPANDED_SIZE. It will take care of cases where only the on-disk 1328 size is known. */ 1329static svn_error_t * 1330build_rep_list(apr_array_header_t **list, 1331 svn_stringbuf_t **window_p, 1332 rep_state_t **src_state, 1333 svn_filesize_t *expanded_size, 1334 svn_fs_t *fs, 1335 representation_t *first_rep, 1336 apr_pool_t *pool) 1337{ 1338 representation_t rep; 1339 rep_state_t *rs = NULL; 1340 svn_fs_fs__rep_header_t *rep_header; 1341 svn_boolean_t is_cached = FALSE; 1342 shared_file_t *shared_file = NULL; 1343 apr_pool_t *iterpool = svn_pool_create(pool); 1344 1345 *list = apr_array_make(pool, 1, sizeof(rep_state_t *)); 1346 rep = *first_rep; 1347 1348 /* The value as stored in the data struct. 1349 0 is either for unknown length or actually zero length. */ 1350 *expanded_size = first_rep->expanded_size; 1351 1352 /* for the top-level rep, we need the rep_args */ 1353 SVN_ERR(create_rep_state(&rs, &rep_header, &shared_file, &rep, fs, pool, 1354 iterpool)); 1355 1356 /* Unknown size or empty representation? 1357 That implies the this being the first iteration. 1358 Usually size equals on-disk size, except for empty, 1359 compressed representations (delta, size = 4). 1360 Please note that for all non-empty deltas have 1361 a 4-byte header _plus_ some data. */ 1362 if (*expanded_size == 0) 1363 if (rep_header->type == svn_fs_fs__rep_plain || first_rep->size != 4) 1364 *expanded_size = first_rep->size; 1365 1366 while (1) 1367 { 1368 svn_pool_clear(iterpool); 1369 1370 /* fetch state, if that has not been done already */ 1371 if (!rs) 1372 SVN_ERR(create_rep_state(&rs, &rep_header, &shared_file, 1373 &rep, fs, pool, iterpool)); 1374 1375 /* for txn reps, there won't be a cached combined window */ 1376 if (!svn_fs_fs__id_txn_used(&rep.txn_id)) 1377 SVN_ERR(get_cached_combined_window(window_p, rs, &is_cached, pool)); 1378 1379 if (is_cached) 1380 { 1381 /* We already have a reconstructed window in our cache. 1382 Write a pseudo rep_state with the full length. */ 1383 rs->start = 0; 1384 rs->current = 0; 1385 rs->size = (*window_p)->len; 1386 *src_state = rs; 1387 break; 1388 } 1389 1390 if (rep_header->type == svn_fs_fs__rep_plain) 1391 { 1392 /* This is a plaintext, so just return the current rep_state. */ 1393 *src_state = rs; 1394 break; 1395 } 1396 1397 /* Push this rep onto the list. If it's self-compressed, we're done. */ 1398 APR_ARRAY_PUSH(*list, rep_state_t *) = rs; 1399 if (rep_header->type == svn_fs_fs__rep_self_delta) 1400 { 1401 *src_state = NULL; 1402 break; 1403 } 1404 1405 rep.revision = rep_header->base_revision; 1406 rep.item_index = rep_header->base_item_index; 1407 rep.size = rep_header->base_length; 1408 svn_fs_fs__id_txn_reset(&rep.txn_id); 1409 1410 rs = NULL; 1411 } 1412 svn_pool_destroy(iterpool); 1413 1414 return SVN_NO_ERROR; 1415} 1416 1417 1418/* Create a rep_read_baton structure for node revision NODEREV in 1419 filesystem FS and store it in *RB_P. Perform all allocations in 1420 POOL. If rep is mutable, it must be for file contents. */ 1421static svn_error_t * 1422rep_read_get_baton(struct rep_read_baton **rb_p, 1423 svn_fs_t *fs, 1424 representation_t *rep, 1425 pair_cache_key_t fulltext_cache_key, 1426 apr_pool_t *pool) 1427{ 1428 struct rep_read_baton *b; 1429 1430 b = apr_pcalloc(pool, sizeof(*b)); 1431 b->fs = fs; 1432 b->rep = *rep; 1433 b->base_window = NULL; 1434 b->chunk_index = 0; 1435 b->buf = NULL; 1436 b->md5_checksum_ctx = svn_checksum_ctx_create(svn_checksum_md5, pool); 1437 b->checksum_finalized = FALSE; 1438 memcpy(b->md5_digest, rep->md5_digest, sizeof(rep->md5_digest)); 1439 b->len = rep->expanded_size; 1440 b->off = 0; 1441 b->fulltext_cache_key = fulltext_cache_key; 1442 b->pool = svn_pool_create(pool); 1443 b->filehandle_pool = svn_pool_create(pool); 1444 b->fulltext_cache = NULL; 1445 b->fulltext_delivered = 0; 1446 b->current_fulltext = NULL; 1447 1448 /* Save our output baton. */ 1449 *rb_p = b; 1450 1451 return SVN_NO_ERROR; 1452} 1453 1454/* Skip forwards to THIS_CHUNK in REP_STATE and then read the next delta 1455 window into *NWIN. Note that RS->CHUNK_INDEX will be THIS_CHUNK rather 1456 than THIS_CHUNK + 1 when this function returns. */ 1457static svn_error_t * 1458read_delta_window(svn_txdelta_window_t **nwin, int this_chunk, 1459 rep_state_t *rs, apr_pool_t *result_pool, 1460 apr_pool_t *scratch_pool) 1461{ 1462 svn_boolean_t is_cached; 1463 apr_off_t start_offset; 1464 apr_off_t end_offset; 1465 apr_pool_t *iterpool; 1466 1467 SVN_ERR_ASSERT(rs->chunk_index <= this_chunk); 1468 1469 SVN_ERR(dbg_log_access(rs->sfile->fs, rs->revision, rs->item_index, 1470 NULL, SVN_FS_FS__ITEM_TYPE_ANY_REP, scratch_pool)); 1471 1472 /* Read the next window. But first, try to find it in the cache. */ 1473 SVN_ERR(get_cached_window(nwin, rs, this_chunk, &is_cached, 1474 result_pool, scratch_pool)); 1475 if (is_cached) 1476 return SVN_NO_ERROR; 1477 1478 /* someone has to actually read the data from file. Open it */ 1479 SVN_ERR(auto_open_shared_file(rs->sfile)); 1480 1481 /* invoke the 'block-read' feature for non-txn data. 1482 However, don't do that if we are in the middle of some representation, 1483 because the block is unlikely to contain other data. */ 1484 if ( rs->chunk_index == 0 1485 && SVN_IS_VALID_REVNUM(rs->revision) 1486 && use_block_read(rs->sfile->fs) 1487 && rs->raw_window_cache) 1488 { 1489 SVN_ERR(block_read(NULL, rs->sfile->fs, rs->revision, rs->item_index, 1490 rs->sfile->rfile, result_pool, scratch_pool)); 1491 1492 /* reading the whole block probably also provided us with the 1493 desired txdelta window */ 1494 SVN_ERR(get_cached_window(nwin, rs, this_chunk, &is_cached, 1495 result_pool, scratch_pool)); 1496 if (is_cached) 1497 return SVN_NO_ERROR; 1498 } 1499 1500 /* data is still not cached -> we need to read it. 1501 Make sure we have all the necessary info. */ 1502 SVN_ERR(auto_set_start_offset(rs, scratch_pool)); 1503 SVN_ERR(auto_read_diff_version(rs, scratch_pool)); 1504 1505 /* RS->FILE may be shared between RS instances -> make sure we point 1506 * to the right data. */ 1507 start_offset = rs->start + rs->current; 1508 SVN_ERR(rs_aligned_seek(rs, NULL, start_offset, scratch_pool)); 1509 1510 /* Skip windows to reach the current chunk if we aren't there yet. */ 1511 iterpool = svn_pool_create(scratch_pool); 1512 while (rs->chunk_index < this_chunk) 1513 { 1514 svn_pool_clear(iterpool); 1515 SVN_ERR(svn_txdelta_skip_svndiff_window(rs->sfile->rfile->file, 1516 rs->ver, iterpool)); 1517 rs->chunk_index++; 1518 SVN_ERR(get_file_offset(&start_offset, rs, iterpool)); 1519 rs->current = start_offset - rs->start; 1520 if (rs->current >= rs->size) 1521 return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, 1522 _("Reading one svndiff window read " 1523 "beyond the end of the " 1524 "representation")); 1525 } 1526 svn_pool_destroy(iterpool); 1527 1528 /* Actually read the next window. */ 1529 SVN_ERR(svn_txdelta_read_svndiff_window(nwin, rs->sfile->rfile->stream, 1530 rs->ver, result_pool)); 1531 SVN_ERR(get_file_offset(&end_offset, rs, scratch_pool)); 1532 rs->current = end_offset - rs->start; 1533 if (rs->current > rs->size) 1534 return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, 1535 _("Reading one svndiff window read beyond " 1536 "the end of the representation")); 1537 1538 /* the window has not been cached before, thus cache it now 1539 * (if caching is used for them at all) */ 1540 if (SVN_IS_VALID_REVNUM(rs->revision)) 1541 SVN_ERR(set_cached_window(*nwin, rs, scratch_pool)); 1542 1543 return SVN_NO_ERROR; 1544} 1545 1546/* Read SIZE bytes from the representation RS and return it in *NWIN. */ 1547static svn_error_t * 1548read_plain_window(svn_stringbuf_t **nwin, rep_state_t *rs, 1549 apr_size_t size, apr_pool_t *result_pool, 1550 apr_pool_t *scratch_pool) 1551{ 1552 apr_off_t offset; 1553 1554 /* RS->FILE may be shared between RS instances -> make sure we point 1555 * to the right data. */ 1556 SVN_ERR(auto_open_shared_file(rs->sfile)); 1557 SVN_ERR(auto_set_start_offset(rs, scratch_pool)); 1558 1559 offset = rs->start + rs->current; 1560 SVN_ERR(rs_aligned_seek(rs, NULL, offset, scratch_pool)); 1561 1562 /* Read the plain data. */ 1563 *nwin = svn_stringbuf_create_ensure(size, result_pool); 1564 SVN_ERR(svn_io_file_read_full2(rs->sfile->rfile->file, (*nwin)->data, size, 1565 NULL, NULL, result_pool)); 1566 (*nwin)->data[size] = 0; 1567 1568 /* Update RS. */ 1569 rs->current += (apr_off_t)size; 1570 1571 return SVN_NO_ERROR; 1572} 1573 1574/* Get the undeltified window that is a result of combining all deltas 1575 from the current desired representation identified in *RB with its 1576 base representation. Store the window in *RESULT. */ 1577static svn_error_t * 1578get_combined_window(svn_stringbuf_t **result, 1579 struct rep_read_baton *rb) 1580{ 1581 apr_pool_t *pool, *new_pool, *window_pool; 1582 int i; 1583 apr_array_header_t *windows; 1584 svn_stringbuf_t *source, *buf = rb->base_window; 1585 rep_state_t *rs; 1586 apr_pool_t *iterpool; 1587 1588 /* Read all windows that we need to combine. This is fine because 1589 the size of each window is relatively small (100kB) and skip- 1590 delta limits the number of deltas in a chain to well under 100. 1591 Stop early if one of them does not depend on its predecessors. */ 1592 window_pool = svn_pool_create(rb->pool); 1593 windows = apr_array_make(window_pool, 0, sizeof(svn_txdelta_window_t *)); 1594 iterpool = svn_pool_create(rb->pool); 1595 for (i = 0; i < rb->rs_list->nelts; ++i) 1596 { 1597 svn_txdelta_window_t *window; 1598 1599 svn_pool_clear(iterpool); 1600 1601 rs = APR_ARRAY_IDX(rb->rs_list, i, rep_state_t *); 1602 SVN_ERR(read_delta_window(&window, rb->chunk_index, rs, window_pool, 1603 iterpool)); 1604 1605 APR_ARRAY_PUSH(windows, svn_txdelta_window_t *) = window; 1606 if (window->src_ops == 0) 1607 { 1608 ++i; 1609 break; 1610 } 1611 } 1612 1613 /* Combine in the windows from the other delta reps. */ 1614 pool = svn_pool_create(rb->pool); 1615 for (--i; i >= 0; --i) 1616 { 1617 svn_txdelta_window_t *window; 1618 1619 svn_pool_clear(iterpool); 1620 1621 rs = APR_ARRAY_IDX(rb->rs_list, i, rep_state_t *); 1622 window = APR_ARRAY_IDX(windows, i, svn_txdelta_window_t *); 1623 1624 /* Maybe, we've got a PLAIN start representation. If we do, read 1625 as much data from it as the needed for the txdelta window's source 1626 view. 1627 Note that BUF / SOURCE may only be NULL in the first iteration. 1628 Also note that we may have short-cut reading the delta chain -- 1629 in which case SRC_OPS is 0 and it might not be a PLAIN rep. */ 1630 source = buf; 1631 if (source == NULL && rb->src_state != NULL && window->src_ops) 1632 SVN_ERR(read_plain_window(&source, rb->src_state, window->sview_len, 1633 pool, iterpool)); 1634 1635 /* Combine this window with the current one. */ 1636 new_pool = svn_pool_create(rb->pool); 1637 buf = svn_stringbuf_create_ensure(window->tview_len, new_pool); 1638 buf->len = window->tview_len; 1639 1640 svn_txdelta_apply_instructions(window, source ? source->data : NULL, 1641 buf->data, &buf->len); 1642 if (buf->len != window->tview_len) 1643 return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, 1644 _("svndiff window length is " 1645 "corrupt")); 1646 1647 /* Cache windows only if the whole rep content could be read as a 1648 single chunk. Only then will no other chunk need a deeper RS 1649 list than the cached chunk. */ 1650 if ( (rb->chunk_index == 0) && (rs->current == rs->size) 1651 && SVN_IS_VALID_REVNUM(rs->revision)) 1652 SVN_ERR(set_cached_combined_window(buf, rs, new_pool)); 1653 1654 rs->chunk_index++; 1655 1656 /* Cycle pools so that we only need to hold three windows at a time. */ 1657 svn_pool_destroy(pool); 1658 pool = new_pool; 1659 } 1660 svn_pool_destroy(iterpool); 1661 1662 svn_pool_destroy(window_pool); 1663 1664 *result = buf; 1665 return SVN_NO_ERROR; 1666} 1667 1668/* Returns whether or not the expanded fulltext of the file is cachable 1669 * based on its size SIZE. The decision depends on the cache used by RB. 1670 */ 1671static svn_boolean_t 1672fulltext_size_is_cachable(fs_fs_data_t *ffd, svn_filesize_t size) 1673{ 1674 return (size < APR_SIZE_MAX) 1675 && svn_cache__is_cachable(ffd->fulltext_cache, (apr_size_t)size); 1676} 1677 1678/* Close method used on streams returned by read_representation(). 1679 */ 1680static svn_error_t * 1681rep_read_contents_close(void *baton) 1682{ 1683 struct rep_read_baton *rb = baton; 1684 1685 svn_pool_destroy(rb->pool); 1686 svn_pool_destroy(rb->filehandle_pool); 1687 1688 return SVN_NO_ERROR; 1689} 1690 1691/* Return the next *LEN bytes of the rep from our plain / delta windows 1692 and store them in *BUF. */ 1693static svn_error_t * 1694get_contents_from_windows(struct rep_read_baton *rb, 1695 char *buf, 1696 apr_size_t *len) 1697{ 1698 apr_size_t copy_len, remaining = *len; 1699 char *cur = buf; 1700 rep_state_t *rs; 1701 1702 /* Special case for when there are no delta reps, only a plain 1703 text. */ 1704 if (rb->rs_list->nelts == 0) 1705 { 1706 copy_len = remaining; 1707 rs = rb->src_state; 1708 1709 if (rb->base_window != NULL) 1710 { 1711 /* We got the desired rep directly from the cache. 1712 This is where we need the pseudo rep_state created 1713 by build_rep_list(). */ 1714 apr_size_t offset = (apr_size_t)rs->current; 1715 if (copy_len + offset > rb->base_window->len) 1716 copy_len = offset < rb->base_window->len 1717 ? rb->base_window->len - offset 1718 : 0ul; 1719 1720 memcpy (cur, rb->base_window->data + offset, copy_len); 1721 } 1722 else 1723 { 1724 apr_off_t offset; 1725 if (((apr_off_t) copy_len) > rs->size - rs->current) 1726 copy_len = (apr_size_t) (rs->size - rs->current); 1727 1728 SVN_ERR(auto_open_shared_file(rs->sfile)); 1729 SVN_ERR(auto_set_start_offset(rs, rb->pool)); 1730 1731 offset = rs->start + rs->current; 1732 SVN_ERR(rs_aligned_seek(rs, NULL, offset, rb->pool)); 1733 SVN_ERR(svn_io_file_read_full2(rs->sfile->rfile->file, cur, 1734 copy_len, NULL, NULL, rb->pool)); 1735 } 1736 1737 rs->current += copy_len; 1738 *len = copy_len; 1739 return SVN_NO_ERROR; 1740 } 1741 1742 while (remaining > 0) 1743 { 1744 /* If we have buffered data from a previous chunk, use that. */ 1745 if (rb->buf) 1746 { 1747 /* Determine how much to copy from the buffer. */ 1748 copy_len = rb->buf_len - rb->buf_pos; 1749 if (copy_len > remaining) 1750 copy_len = remaining; 1751 1752 /* Actually copy the data. */ 1753 memcpy(cur, rb->buf + rb->buf_pos, copy_len); 1754 rb->buf_pos += copy_len; 1755 cur += copy_len; 1756 remaining -= copy_len; 1757 1758 /* If the buffer is all used up, clear it and empty the 1759 local pool. */ 1760 if (rb->buf_pos == rb->buf_len) 1761 { 1762 svn_pool_clear(rb->pool); 1763 rb->buf = NULL; 1764 } 1765 } 1766 else 1767 { 1768 svn_stringbuf_t *sbuf = NULL; 1769 1770 rs = APR_ARRAY_IDX(rb->rs_list, 0, rep_state_t *); 1771 if (rs->current == rs->size) 1772 break; 1773 1774 /* Get more buffered data by evaluating a chunk. */ 1775 SVN_ERR(get_combined_window(&sbuf, rb)); 1776 1777 rb->chunk_index++; 1778 rb->buf_len = sbuf->len; 1779 rb->buf = sbuf->data; 1780 rb->buf_pos = 0; 1781 } 1782 } 1783 1784 *len = cur - buf; 1785 1786 return SVN_NO_ERROR; 1787} 1788 1789/* Baton type for get_fulltext_partial. */ 1790typedef struct fulltext_baton_t 1791{ 1792 /* Target buffer to write to; of at least LEN bytes. */ 1793 char *buffer; 1794 1795 /* Offset within the respective fulltext at which we shall start to 1796 copy data into BUFFER. */ 1797 apr_size_t start; 1798 1799 /* Number of bytes to copy. The actual amount may be less in case 1800 the fulltext is short(er). */ 1801 apr_size_t len; 1802 1803 /* Number of bytes actually copied into BUFFER. */ 1804 apr_size_t read; 1805} fulltext_baton_t; 1806 1807/* Implement svn_cache__partial_getter_func_t for fulltext caches. 1808 * From the fulltext in DATA, we copy the range specified by the 1809 * fulltext_baton_t* BATON into the buffer provided by that baton. 1810 * OUT and RESULT_POOL are not used. 1811 */ 1812static svn_error_t * 1813get_fulltext_partial(void **out, 1814 const void *data, 1815 apr_size_t data_len, 1816 void *baton, 1817 apr_pool_t *result_pool) 1818{ 1819 fulltext_baton_t *fulltext_baton = baton; 1820 1821 /* We cached the fulltext with an NUL appended to it. */ 1822 apr_size_t fulltext_len = data_len - 1; 1823 1824 /* Clip the copy range to what the fulltext size allows. */ 1825 apr_size_t start = MIN(fulltext_baton->start, fulltext_len); 1826 fulltext_baton->read = MIN(fulltext_len - start, fulltext_baton->len); 1827 1828 /* Copy the data to the output buffer and be done. */ 1829 memcpy(fulltext_baton->buffer, (const char *)data + start, 1830 fulltext_baton->read); 1831 1832 return SVN_NO_ERROR; 1833} 1834 1835/* Find the fulltext specified in BATON in the fulltext cache given 1836 * as well by BATON. If that succeeds, set *CACHED to TRUE and copy 1837 * up to the next *LEN bytes into BUFFER. Set *LEN to the actual 1838 * number of bytes copied. 1839 */ 1840static svn_error_t * 1841get_contents_from_fulltext(svn_boolean_t *cached, 1842 struct rep_read_baton *baton, 1843 char *buffer, 1844 apr_size_t *len) 1845{ 1846 void *dummy; 1847 fulltext_baton_t fulltext_baton; 1848 1849 SVN_ERR_ASSERT((apr_size_t)baton->fulltext_delivered 1850 == baton->fulltext_delivered); 1851 fulltext_baton.buffer = buffer; 1852 fulltext_baton.start = (apr_size_t)baton->fulltext_delivered; 1853 fulltext_baton.len = *len; 1854 fulltext_baton.read = 0; 1855 1856 SVN_ERR(svn_cache__get_partial(&dummy, cached, baton->fulltext_cache, 1857 &baton->fulltext_cache_key, 1858 get_fulltext_partial, &fulltext_baton, 1859 baton->pool)); 1860 1861 if (*cached) 1862 { 1863 baton->fulltext_delivered += fulltext_baton.read; 1864 *len = fulltext_baton.read; 1865 } 1866 1867 return SVN_NO_ERROR; 1868} 1869 1870/* Determine the optimal size of a string buf that shall receive a 1871 * (full-) text of NEEDED bytes. 1872 * 1873 * The critical point is that those buffers may be very large and 1874 * can cause memory fragmentation. We apply simple heuristics to 1875 * make fragmentation less likely. 1876 */ 1877static apr_size_t 1878optimimal_allocation_size(apr_size_t needed) 1879{ 1880 /* For all allocations, assume some overhead that is shared between 1881 * OS memory managemnt, APR memory management and svn_stringbuf_t. */ 1882 const apr_size_t overhead = 0x400; 1883 apr_size_t optimal; 1884 1885 /* If an allocation size if safe for other ephemeral buffers, it should 1886 * be safe for ours. */ 1887 if (needed <= SVN__STREAM_CHUNK_SIZE) 1888 return needed; 1889 1890 /* Paranoia edge case: 1891 * Skip our heuristics if they created arithmetical overflow. 1892 * Beware to make this test work for NEEDED = APR_SIZE_MAX as well! */ 1893 if (needed >= APR_SIZE_MAX / 2 - overhead) 1894 return needed; 1895 1896 /* As per definition SVN__STREAM_CHUNK_SIZE is a power of two. 1897 * Since we know NEEDED to be larger than that, use it as the 1898 * starting point. 1899 * 1900 * Heuristics: Allocate a power-of-two number of bytes that fit 1901 * NEEDED plus some OVERHEAD. The APR allocator 1902 * will round it up to the next full page size. 1903 */ 1904 optimal = SVN__STREAM_CHUNK_SIZE; 1905 while (optimal - overhead < needed) 1906 optimal *= 2; 1907 1908 /* This is above or equal to NEEDED. */ 1909 return optimal - overhead; 1910} 1911 1912/* After a fulltext cache lookup failure, we will continue to read from 1913 * combined delta or plain windows. However, we must first make that data 1914 * stream in BATON catch up tho the position LEN already delivered from the 1915 * fulltext cache. Also, we need to store the reconstructed fulltext if we 1916 * want to cache it at the end. 1917 */ 1918static svn_error_t * 1919skip_contents(struct rep_read_baton *baton, 1920 svn_filesize_t len) 1921{ 1922 svn_error_t *err = SVN_NO_ERROR; 1923 1924 /* Do we want to cache the reconstructed fulltext? */ 1925 if (SVN_IS_VALID_REVNUM(baton->fulltext_cache_key.revision)) 1926 { 1927 char *buffer; 1928 svn_filesize_t to_alloc = MAX(len, baton->len); 1929 1930 /* This should only be happening if BATON->LEN and LEN are 1931 * cacheable, implying they fit into memory. */ 1932 SVN_ERR_ASSERT((apr_size_t)to_alloc == to_alloc); 1933 1934 /* Allocate the fulltext buffer. */ 1935 baton->current_fulltext = svn_stringbuf_create_ensure( 1936 optimimal_allocation_size((apr_size_t)to_alloc), 1937 baton->filehandle_pool); 1938 1939 /* Read LEN bytes from the window stream and store the data 1940 * in the fulltext buffer (will be filled by further reads later). */ 1941 baton->current_fulltext->len = (apr_size_t)len; 1942 baton->current_fulltext->data[(apr_size_t)len] = 0; 1943 1944 buffer = baton->current_fulltext->data; 1945 while (len > 0 && !err) 1946 { 1947 apr_size_t to_read = (apr_size_t)len; 1948 err = get_contents_from_windows(baton, buffer, &to_read); 1949 len -= to_read; 1950 buffer += to_read; 1951 } 1952 } 1953 else if (len > 0) 1954 { 1955 /* Simply drain LEN bytes from the window stream. */ 1956 apr_pool_t *subpool = subpool = svn_pool_create(baton->pool); 1957 char *buffer = apr_palloc(subpool, SVN__STREAM_CHUNK_SIZE); 1958 1959 while (len > 0 && !err) 1960 { 1961 apr_size_t to_read = len > SVN__STREAM_CHUNK_SIZE 1962 ? SVN__STREAM_CHUNK_SIZE 1963 : (apr_size_t)len; 1964 1965 err = get_contents_from_windows(baton, buffer, &to_read); 1966 len -= to_read; 1967 } 1968 1969 svn_pool_destroy(subpool); 1970 } 1971 1972 return svn_error_trace(err); 1973} 1974 1975/* BATON is of type `rep_read_baton'; read the next *LEN bytes of the 1976 representation and store them in *BUF. Sum as we read and verify 1977 the MD5 sum at the end. */ 1978static svn_error_t * 1979rep_read_contents(void *baton, 1980 char *buf, 1981 apr_size_t *len) 1982{ 1983 struct rep_read_baton *rb = baton; 1984 1985 /* Get data from the fulltext cache for as long as we can. */ 1986 if (rb->fulltext_cache) 1987 { 1988 svn_boolean_t cached; 1989 SVN_ERR(get_contents_from_fulltext(&cached, rb, buf, len)); 1990 if (cached) 1991 return SVN_NO_ERROR; 1992 1993 /* Cache miss. From now on, we will never read from the fulltext 1994 * cache for this representation anymore. */ 1995 rb->fulltext_cache = NULL; 1996 } 1997 1998 /* No fulltext cache to help us. We must read from the window stream. */ 1999 if (!rb->rs_list) 2000 { 2001 /* Window stream not initialized, yet. Do it now. */ 2002 SVN_ERR(build_rep_list(&rb->rs_list, &rb->base_window, 2003 &rb->src_state, &rb->len, rb->fs, &rb->rep, 2004 rb->filehandle_pool)); 2005 2006 /* In case we did read from the fulltext cache before, make the 2007 * window stream catch up. Also, initialize the fulltext buffer 2008 * if we want to cache the fulltext at the end. */ 2009 SVN_ERR(skip_contents(rb, rb->fulltext_delivered)); 2010 } 2011 2012 /* Get the next block of data. */ 2013 SVN_ERR(get_contents_from_windows(rb, buf, len)); 2014 2015 if (rb->current_fulltext) 2016 svn_stringbuf_appendbytes(rb->current_fulltext, buf, *len); 2017 2018 /* Perform checksumming. We want to check the checksum as soon as 2019 the last byte of data is read, in case the caller never performs 2020 a short read, but we don't want to finalize the MD5 context 2021 twice. */ 2022 if (!rb->checksum_finalized) 2023 { 2024 SVN_ERR(svn_checksum_update(rb->md5_checksum_ctx, buf, *len)); 2025 rb->off += *len; 2026 if (rb->off == rb->len) 2027 { 2028 svn_checksum_t *md5_checksum; 2029 svn_checksum_t expected; 2030 expected.kind = svn_checksum_md5; 2031 expected.digest = rb->md5_digest; 2032 2033 rb->checksum_finalized = TRUE; 2034 SVN_ERR(svn_checksum_final(&md5_checksum, rb->md5_checksum_ctx, 2035 rb->pool)); 2036 if (!svn_checksum_match(md5_checksum, &expected)) 2037 return svn_error_create(SVN_ERR_FS_CORRUPT, 2038 svn_checksum_mismatch_err(&expected, md5_checksum, 2039 rb->pool, 2040 _("Checksum mismatch while reading representation")), 2041 NULL); 2042 } 2043 } 2044 2045 if (rb->off == rb->len && rb->current_fulltext) 2046 { 2047 fs_fs_data_t *ffd = rb->fs->fsap_data; 2048 SVN_ERR(svn_cache__set(ffd->fulltext_cache, &rb->fulltext_cache_key, 2049 rb->current_fulltext, rb->pool)); 2050 rb->current_fulltext = NULL; 2051 } 2052 2053 return SVN_NO_ERROR; 2054} 2055 2056svn_error_t * 2057svn_fs_fs__get_contents(svn_stream_t **contents_p, 2058 svn_fs_t *fs, 2059 representation_t *rep, 2060 svn_boolean_t cache_fulltext, 2061 apr_pool_t *pool) 2062{ 2063 if (! rep) 2064 { 2065 *contents_p = svn_stream_empty(pool); 2066 } 2067 else 2068 { 2069 fs_fs_data_t *ffd = fs->fsap_data; 2070 svn_filesize_t len = rep->expanded_size ? rep->expanded_size : rep->size; 2071 struct rep_read_baton *rb; 2072 2073 pair_cache_key_t fulltext_cache_key = { 0 }; 2074 fulltext_cache_key.revision = rep->revision; 2075 fulltext_cache_key.second = rep->item_index; 2076 2077 /* Initialize the reader baton. Some members may added lazily 2078 * while reading from the stream */ 2079 SVN_ERR(rep_read_get_baton(&rb, fs, rep, fulltext_cache_key, pool)); 2080 2081 /* Make the stream attempt fulltext cache lookups if the fulltext 2082 * is cacheable. If it is not, then also don't try to buffer and 2083 * cache it. */ 2084 if (ffd->fulltext_cache && cache_fulltext 2085 && SVN_IS_VALID_REVNUM(rep->revision) 2086 && fulltext_size_is_cachable(ffd, len)) 2087 { 2088 rb->fulltext_cache = ffd->fulltext_cache; 2089 } 2090 else 2091 { 2092 /* This will also prevent the reconstructed fulltext from being 2093 put into the cache. */ 2094 rb->fulltext_cache_key.revision = SVN_INVALID_REVNUM; 2095 } 2096 2097 *contents_p = svn_stream_create(rb, pool); 2098 svn_stream_set_read2(*contents_p, NULL /* only full read support */, 2099 rep_read_contents); 2100 svn_stream_set_close(*contents_p, rep_read_contents_close); 2101 } 2102 2103 return SVN_NO_ERROR; 2104} 2105 2106/* Baton for cache_access_wrapper. Wraps the original parameters of 2107 * svn_fs_fs__try_process_file_content(). 2108 */ 2109typedef struct cache_access_wrapper_baton_t 2110{ 2111 svn_fs_process_contents_func_t func; 2112 void* baton; 2113} cache_access_wrapper_baton_t; 2114 2115/* Wrapper to translate between svn_fs_process_contents_func_t and 2116 * svn_cache__partial_getter_func_t. 2117 */ 2118static svn_error_t * 2119cache_access_wrapper(void **out, 2120 const void *data, 2121 apr_size_t data_len, 2122 void *baton, 2123 apr_pool_t *pool) 2124{ 2125 cache_access_wrapper_baton_t *wrapper_baton = baton; 2126 2127 SVN_ERR(wrapper_baton->func((const unsigned char *)data, 2128 data_len - 1, /* cache adds terminating 0 */ 2129 wrapper_baton->baton, 2130 pool)); 2131 2132 /* non-NULL value to signal the calling cache that all went well */ 2133 *out = baton; 2134 2135 return SVN_NO_ERROR; 2136} 2137 2138svn_error_t * 2139svn_fs_fs__try_process_file_contents(svn_boolean_t *success, 2140 svn_fs_t *fs, 2141 node_revision_t *noderev, 2142 svn_fs_process_contents_func_t processor, 2143 void* baton, 2144 apr_pool_t *pool) 2145{ 2146 representation_t *rep = noderev->data_rep; 2147 if (rep) 2148 { 2149 fs_fs_data_t *ffd = fs->fsap_data; 2150 pair_cache_key_t fulltext_cache_key = { 0 }; 2151 2152 fulltext_cache_key.revision = rep->revision; 2153 fulltext_cache_key.second = rep->item_index; 2154 if (ffd->fulltext_cache && SVN_IS_VALID_REVNUM(rep->revision) 2155 && fulltext_size_is_cachable(ffd, rep->expanded_size)) 2156 { 2157 cache_access_wrapper_baton_t wrapper_baton; 2158 void *dummy = NULL; 2159 2160 wrapper_baton.func = processor; 2161 wrapper_baton.baton = baton; 2162 return svn_cache__get_partial(&dummy, success, 2163 ffd->fulltext_cache, 2164 &fulltext_cache_key, 2165 cache_access_wrapper, 2166 &wrapper_baton, 2167 pool); 2168 } 2169 } 2170 2171 *success = FALSE; 2172 return SVN_NO_ERROR; 2173} 2174 2175 2176/* Baton used when reading delta windows. */ 2177struct delta_read_baton 2178{ 2179 rep_state_t *rs; 2180 unsigned char md5_digest[APR_MD5_DIGESTSIZE]; 2181}; 2182 2183/* This implements the svn_txdelta_next_window_fn_t interface. */ 2184static svn_error_t * 2185delta_read_next_window(svn_txdelta_window_t **window, void *baton, 2186 apr_pool_t *pool) 2187{ 2188 struct delta_read_baton *drb = baton; 2189 apr_pool_t *scratch_pool = svn_pool_create(pool); 2190 2191 *window = NULL; 2192 if (drb->rs->current < drb->rs->size) 2193 { 2194 SVN_ERR(read_delta_window(window, drb->rs->chunk_index, drb->rs, pool, 2195 scratch_pool)); 2196 drb->rs->chunk_index++; 2197 } 2198 2199 svn_pool_destroy(scratch_pool); 2200 2201 return SVN_NO_ERROR; 2202} 2203 2204/* This implements the svn_txdelta_md5_digest_fn_t interface. */ 2205static const unsigned char * 2206delta_read_md5_digest(void *baton) 2207{ 2208 struct delta_read_baton *drb = baton; 2209 return drb->md5_digest; 2210} 2211 2212/* Return a txdelta stream for on-disk representation REP_STATE 2213 * of TARGET. Allocate the result in POOL. 2214 */ 2215static svn_txdelta_stream_t * 2216get_storaged_delta_stream(rep_state_t *rep_state, 2217 node_revision_t *target, 2218 apr_pool_t *pool) 2219{ 2220 /* Create the delta read baton. */ 2221 struct delta_read_baton *drb = apr_pcalloc(pool, sizeof(*drb)); 2222 drb->rs = rep_state; 2223 memcpy(drb->md5_digest, target->data_rep->md5_digest, 2224 sizeof(drb->md5_digest)); 2225 return svn_txdelta_stream_create(drb, delta_read_next_window, 2226 delta_read_md5_digest, pool); 2227} 2228 2229svn_error_t * 2230svn_fs_fs__get_file_delta_stream(svn_txdelta_stream_t **stream_p, 2231 svn_fs_t *fs, 2232 node_revision_t *source, 2233 node_revision_t *target, 2234 apr_pool_t *pool) 2235{ 2236 svn_stream_t *source_stream, *target_stream; 2237 rep_state_t *rep_state; 2238 svn_fs_fs__rep_header_t *rep_header; 2239 fs_fs_data_t *ffd = fs->fsap_data; 2240 2241 /* Try a shortcut: if the target is stored as a delta against the source, 2242 then just use that delta. However, prefer using the fulltext cache 2243 whenever that is available. */ 2244 if (target->data_rep && (source || ! ffd->fulltext_cache)) 2245 { 2246 /* Read target's base rep if any. */ 2247 SVN_ERR(create_rep_state(&rep_state, &rep_header, NULL, 2248 target->data_rep, fs, pool, pool)); 2249 2250 if (source && source->data_rep && target->data_rep) 2251 { 2252 /* If that matches source, then use this delta as is. 2253 Note that we want an actual delta here. E.g. a self-delta would 2254 not be good enough. */ 2255 if (rep_header->type == svn_fs_fs__rep_delta 2256 && rep_header->base_revision == source->data_rep->revision 2257 && rep_header->base_item_index == source->data_rep->item_index) 2258 { 2259 *stream_p = get_storaged_delta_stream(rep_state, target, pool); 2260 return SVN_NO_ERROR; 2261 } 2262 } 2263 else if (!source) 2264 { 2265 /* We want a self-delta. There is a fair chance that TARGET got 2266 added in this revision and is already stored in the requested 2267 format. */ 2268 if (rep_header->type == svn_fs_fs__rep_self_delta) 2269 { 2270 *stream_p = get_storaged_delta_stream(rep_state, target, pool); 2271 return SVN_NO_ERROR; 2272 } 2273 } 2274 2275 /* Don't keep file handles open for longer than necessary. */ 2276 if (rep_state->sfile->rfile) 2277 { 2278 SVN_ERR(svn_fs_fs__close_revision_file(rep_state->sfile->rfile)); 2279 rep_state->sfile->rfile = NULL; 2280 } 2281 } 2282 2283 /* Read both fulltexts and construct a delta. */ 2284 if (source) 2285 SVN_ERR(svn_fs_fs__get_contents(&source_stream, fs, source->data_rep, 2286 TRUE, pool)); 2287 else 2288 source_stream = svn_stream_empty(pool); 2289 SVN_ERR(svn_fs_fs__get_contents(&target_stream, fs, target->data_rep, 2290 TRUE, pool)); 2291 2292 /* Because source and target stream will already verify their content, 2293 * there is no need to do this once more. In particular if the stream 2294 * content is being fetched from cache. */ 2295 svn_txdelta2(stream_p, source_stream, target_stream, FALSE, pool); 2296 2297 return SVN_NO_ERROR; 2298} 2299 2300/* Return TRUE when all svn_fs_dirent_t* in ENTRIES are already sorted 2301 by their respective name. */ 2302static svn_boolean_t 2303sorted(apr_array_header_t *entries) 2304{ 2305 int i; 2306 2307 const svn_fs_dirent_t * const *dirents = (const void *)entries->elts; 2308 for (i = 0; i < entries->nelts-1; ++i) 2309 if (strcmp(dirents[i]->name, dirents[i+1]->name) > 0) 2310 return FALSE; 2311 2312 return TRUE; 2313} 2314 2315/* Compare the names of the two dirents given in **A and **B. */ 2316static int 2317compare_dirents(const void *a, const void *b) 2318{ 2319 const svn_fs_dirent_t *lhs = *((const svn_fs_dirent_t * const *) a); 2320 const svn_fs_dirent_t *rhs = *((const svn_fs_dirent_t * const *) b); 2321 2322 return strcmp(lhs->name, rhs->name); 2323} 2324 2325/* Compare the name of the dirents given in **A with the C string in *B. */ 2326static int 2327compare_dirent_name(const void *a, const void *b) 2328{ 2329 const svn_fs_dirent_t *lhs = *((const svn_fs_dirent_t * const *) a); 2330 const char *rhs = b; 2331 2332 return strcmp(lhs->name, rhs); 2333} 2334 2335/* Into ENTRIES, read all directories entries from the key-value text in 2336 * STREAM. If INCREMENTAL is TRUE, read until the end of the STREAM and 2337 * update the data. ID is provided for nicer error messages. 2338 */ 2339static svn_error_t * 2340read_dir_entries(apr_array_header_t *entries, 2341 svn_stream_t *stream, 2342 svn_boolean_t incremental, 2343 const svn_fs_id_t *id, 2344 apr_pool_t *result_pool, 2345 apr_pool_t *scratch_pool) 2346{ 2347 apr_pool_t *iterpool = svn_pool_create(scratch_pool); 2348 apr_hash_t *hash = incremental ? svn_hash__make(scratch_pool) : NULL; 2349 const char *terminator = SVN_HASH_TERMINATOR; 2350 2351 /* Read until the terminator (non-incremental) or the end of STREAM 2352 (incremental mode). In the latter mode, we use a temporary HASH 2353 to make updating and removing entries cheaper. */ 2354 while (1) 2355 { 2356 svn_hash__entry_t entry; 2357 svn_fs_dirent_t *dirent; 2358 char *str; 2359 2360 svn_pool_clear(iterpool); 2361 SVN_ERR(svn_hash__read_entry(&entry, stream, terminator, 2362 incremental, iterpool)); 2363 2364 /* End of directory? */ 2365 if (entry.key == NULL) 2366 { 2367 /* In incremental mode, we skip the terminator and read the 2368 increments following it until the end of the stream. */ 2369 if (incremental && terminator) 2370 terminator = NULL; 2371 else 2372 break; 2373 } 2374 2375 /* Deleted entry? */ 2376 if (entry.val == NULL) 2377 { 2378 /* We must be in incremental mode */ 2379 assert(hash); 2380 apr_hash_set(hash, entry.key, entry.keylen, NULL); 2381 continue; 2382 } 2383 2384 /* Add a new directory entry. */ 2385 dirent = apr_pcalloc(result_pool, sizeof(*dirent)); 2386 dirent->name = apr_pstrmemdup(result_pool, entry.key, entry.keylen); 2387 2388 str = svn_cstring_tokenize(" ", &entry.val); 2389 if (str == NULL) 2390 return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, 2391 _("Directory entry corrupt in '%s'"), 2392 svn_fs_fs__id_unparse(id, scratch_pool)->data); 2393 2394 if (strcmp(str, SVN_FS_FS__KIND_FILE) == 0) 2395 { 2396 dirent->kind = svn_node_file; 2397 } 2398 else if (strcmp(str, SVN_FS_FS__KIND_DIR) == 0) 2399 { 2400 dirent->kind = svn_node_dir; 2401 } 2402 else 2403 { 2404 return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, 2405 _("Directory entry corrupt in '%s'"), 2406 svn_fs_fs__id_unparse(id, scratch_pool)->data); 2407 } 2408 2409 str = svn_cstring_tokenize(" ", &entry.val); 2410 if (str == NULL) 2411 return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, 2412 _("Directory entry corrupt in '%s'"), 2413 svn_fs_fs__id_unparse(id, scratch_pool)->data); 2414 2415 SVN_ERR(svn_fs_fs__id_parse(&dirent->id, str, result_pool)); 2416 2417 /* In incremental mode, update the hash; otherwise, write to the 2418 * final array. Be sure to use hash keys that survive this iteration. 2419 */ 2420 if (incremental) 2421 apr_hash_set(hash, dirent->name, entry.keylen, dirent); 2422 else 2423 APR_ARRAY_PUSH(entries, svn_fs_dirent_t *) = dirent; 2424 } 2425 2426 /* Convert container to a sorted array. */ 2427 if (incremental) 2428 { 2429 apr_hash_index_t *hi; 2430 for (hi = apr_hash_first(iterpool, hash); hi; hi = apr_hash_next(hi)) 2431 APR_ARRAY_PUSH(entries, svn_fs_dirent_t *) = apr_hash_this_val(hi); 2432 } 2433 2434 if (!sorted(entries)) 2435 svn_sort__array(entries, compare_dirents); 2436 2437 svn_pool_destroy(iterpool); 2438 2439 return SVN_NO_ERROR; 2440} 2441 2442/* Fetch the contents of a directory into ENTRIES. Values are stored 2443 as filename to string mappings; further conversion is necessary to 2444 convert them into svn_fs_dirent_t values. */ 2445static svn_error_t * 2446get_dir_contents(apr_array_header_t **entries, 2447 svn_fs_t *fs, 2448 node_revision_t *noderev, 2449 apr_pool_t *result_pool, 2450 apr_pool_t *scratch_pool) 2451{ 2452 svn_stream_t *contents; 2453 2454 *entries = apr_array_make(result_pool, 16, sizeof(svn_fs_dirent_t *)); 2455 if (noderev->data_rep && svn_fs_fs__id_txn_used(&noderev->data_rep->txn_id)) 2456 { 2457 const char *filename 2458 = svn_fs_fs__path_txn_node_children(fs, noderev->id, scratch_pool); 2459 2460 /* The representation is mutable. Read the old directory 2461 contents from the mutable children file, followed by the 2462 changes we've made in this transaction. */ 2463 SVN_ERR(svn_stream_open_readonly(&contents, filename, scratch_pool, 2464 scratch_pool)); 2465 SVN_ERR(read_dir_entries(*entries, contents, TRUE, noderev->id, 2466 result_pool, scratch_pool)); 2467 SVN_ERR(svn_stream_close(contents)); 2468 } 2469 else if (noderev->data_rep) 2470 { 2471 /* Undeltify content before parsing it. Otherwise, we could only 2472 * parse it byte-by-byte. 2473 */ 2474 apr_size_t len = noderev->data_rep->expanded_size 2475 ? (apr_size_t)noderev->data_rep->expanded_size 2476 : (apr_size_t)noderev->data_rep->size; 2477 svn_stringbuf_t *text; 2478 2479 /* The representation is immutable. Read it normally. */ 2480 SVN_ERR(svn_fs_fs__get_contents(&contents, fs, noderev->data_rep, 2481 FALSE, scratch_pool)); 2482 SVN_ERR(svn_stringbuf_from_stream(&text, contents, len, scratch_pool)); 2483 SVN_ERR(svn_stream_close(contents)); 2484 2485 /* de-serialize hash */ 2486 contents = svn_stream_from_stringbuf(text, scratch_pool); 2487 SVN_ERR(read_dir_entries(*entries, contents, FALSE, noderev->id, 2488 result_pool, scratch_pool)); 2489 } 2490 2491 return SVN_NO_ERROR; 2492} 2493 2494 2495/* Return the cache object in FS responsible to storing the directory the 2496 * NODEREV plus the corresponding *KEY. If no cache exists, return NULL. 2497 * PAIR_KEY must point to some key struct, which does not need to be 2498 * initialized. We use it to avoid dynamic allocation. 2499 */ 2500static svn_cache__t * 2501locate_dir_cache(svn_fs_t *fs, 2502 const void **key, 2503 pair_cache_key_t *pair_key, 2504 node_revision_t *noderev, 2505 apr_pool_t *pool) 2506{ 2507 fs_fs_data_t *ffd = fs->fsap_data; 2508 if (svn_fs_fs__id_is_txn(noderev->id)) 2509 { 2510 /* data in txns requires the expensive fs_id-based addressing mode */ 2511 *key = svn_fs_fs__id_unparse(noderev->id, pool)->data; 2512 return ffd->txn_dir_cache; 2513 } 2514 else 2515 { 2516 /* committed data can use simple rev,item pairs */ 2517 if (noderev->data_rep) 2518 { 2519 pair_key->revision = noderev->data_rep->revision; 2520 pair_key->second = noderev->data_rep->item_index; 2521 *key = pair_key; 2522 } 2523 else 2524 { 2525 /* no data rep -> empty directory. 2526 A NULL key causes a cache miss. */ 2527 *key = NULL; 2528 } 2529 2530 return ffd->dir_cache; 2531 } 2532} 2533 2534svn_error_t * 2535svn_fs_fs__rep_contents_dir(apr_array_header_t **entries_p, 2536 svn_fs_t *fs, 2537 node_revision_t *noderev, 2538 apr_pool_t *result_pool, 2539 apr_pool_t *scratch_pool) 2540{ 2541 pair_cache_key_t pair_key = { 0 }; 2542 const void *key; 2543 2544 /* find the cache we may use */ 2545 svn_cache__t *cache = locate_dir_cache(fs, &key, &pair_key, noderev, 2546 scratch_pool); 2547 if (cache) 2548 { 2549 svn_boolean_t found; 2550 2551 SVN_ERR(svn_cache__get((void **)entries_p, &found, cache, key, 2552 result_pool)); 2553 if (found) 2554 return SVN_NO_ERROR; 2555 } 2556 2557 /* Read in the directory contents. */ 2558 SVN_ERR(get_dir_contents(entries_p, fs, noderev, result_pool, 2559 scratch_pool)); 2560 2561 /* Update the cache, if we are to use one. 2562 * 2563 * Don't even attempt to serialize very large directories; it would cause 2564 * an unnecessary memory allocation peak. 150 bytes/entry is about right. 2565 */ 2566 if (cache && svn_cache__is_cachable(cache, 150 * (*entries_p)->nelts)) 2567 SVN_ERR(svn_cache__set(cache, key, *entries_p, scratch_pool)); 2568 2569 return SVN_NO_ERROR; 2570} 2571 2572svn_fs_dirent_t * 2573svn_fs_fs__find_dir_entry(apr_array_header_t *entries, 2574 const char *name, 2575 int *hint) 2576{ 2577 svn_fs_dirent_t **result 2578 = svn_sort__array_lookup(entries, name, hint, compare_dirent_name); 2579 return result ? *result : NULL; 2580} 2581 2582svn_error_t * 2583svn_fs_fs__rep_contents_dir_entry(svn_fs_dirent_t **dirent, 2584 svn_fs_t *fs, 2585 node_revision_t *noderev, 2586 const char *name, 2587 apr_pool_t *result_pool, 2588 apr_pool_t *scratch_pool) 2589{ 2590 svn_boolean_t found = FALSE; 2591 2592 /* find the cache we may use */ 2593 pair_cache_key_t pair_key = { 0 }; 2594 const void *key; 2595 svn_cache__t *cache = locate_dir_cache(fs, &key, &pair_key, noderev, 2596 scratch_pool); 2597 if (cache) 2598 { 2599 /* Cache lookup. */ 2600 SVN_ERR(svn_cache__get_partial((void **)dirent, 2601 &found, 2602 cache, 2603 key, 2604 svn_fs_fs__extract_dir_entry, 2605 (void*)name, 2606 result_pool)); 2607 } 2608 2609 /* fetch data from disk if we did not find it in the cache */ 2610 if (! found) 2611 { 2612 apr_array_header_t *entries; 2613 svn_fs_dirent_t *entry; 2614 svn_fs_dirent_t *entry_copy = NULL; 2615 2616 /* read the dir from the file system. It will probably be put it 2617 into the cache for faster lookup in future calls. */ 2618 SVN_ERR(svn_fs_fs__rep_contents_dir(&entries, fs, noderev, 2619 scratch_pool, scratch_pool)); 2620 2621 /* find desired entry and return a copy in POOL, if found */ 2622 entry = svn_fs_fs__find_dir_entry(entries, name, NULL); 2623 if (entry) 2624 { 2625 entry_copy = apr_palloc(result_pool, sizeof(*entry_copy)); 2626 entry_copy->name = apr_pstrdup(result_pool, entry->name); 2627 entry_copy->id = svn_fs_fs__id_copy(entry->id, result_pool); 2628 entry_copy->kind = entry->kind; 2629 } 2630 2631 *dirent = entry_copy; 2632 } 2633 2634 return SVN_NO_ERROR; 2635} 2636 2637svn_error_t * 2638svn_fs_fs__get_proplist(apr_hash_t **proplist_p, 2639 svn_fs_t *fs, 2640 node_revision_t *noderev, 2641 apr_pool_t *pool) 2642{ 2643 apr_hash_t *proplist; 2644 svn_stream_t *stream; 2645 2646 if (noderev->prop_rep && svn_fs_fs__id_txn_used(&noderev->prop_rep->txn_id)) 2647 { 2648 svn_error_t *err; 2649 const char *filename 2650 = svn_fs_fs__path_txn_node_props(fs, noderev->id, pool); 2651 proplist = apr_hash_make(pool); 2652 2653 SVN_ERR(svn_stream_open_readonly(&stream, filename, pool, pool)); 2654 err = svn_hash_read2(proplist, stream, SVN_HASH_TERMINATOR, pool); 2655 if (err) 2656 { 2657 svn_string_t *id_str = svn_fs_fs__id_unparse(noderev->id, pool); 2658 2659 svn_error_clear(svn_stream_close(stream)); 2660 return svn_error_quick_wrapf(err, 2661 _("malformed property list for node-revision '%s' in '%s'"), 2662 id_str->data, filename); 2663 } 2664 SVN_ERR(svn_stream_close(stream)); 2665 } 2666 else if (noderev->prop_rep) 2667 { 2668 svn_error_t *err; 2669 fs_fs_data_t *ffd = fs->fsap_data; 2670 representation_t *rep = noderev->prop_rep; 2671 pair_cache_key_t key = { 0 }; 2672 2673 key.revision = rep->revision; 2674 key.second = rep->item_index; 2675 if (ffd->properties_cache && SVN_IS_VALID_REVNUM(rep->revision)) 2676 { 2677 svn_boolean_t is_cached; 2678 SVN_ERR(svn_cache__get((void **) proplist_p, &is_cached, 2679 ffd->properties_cache, &key, pool)); 2680 if (is_cached) 2681 return SVN_NO_ERROR; 2682 } 2683 2684 proplist = apr_hash_make(pool); 2685 SVN_ERR(svn_fs_fs__get_contents(&stream, fs, noderev->prop_rep, FALSE, 2686 pool)); 2687 err = svn_hash_read2(proplist, stream, SVN_HASH_TERMINATOR, pool); 2688 if (err) 2689 { 2690 svn_string_t *id_str = svn_fs_fs__id_unparse(noderev->id, pool); 2691 2692 svn_error_clear(svn_stream_close(stream)); 2693 return svn_error_quick_wrapf(err, 2694 _("malformed property list for node-revision '%s'"), 2695 id_str->data); 2696 } 2697 SVN_ERR(svn_stream_close(stream)); 2698 2699 if (ffd->properties_cache && SVN_IS_VALID_REVNUM(rep->revision)) 2700 SVN_ERR(svn_cache__set(ffd->properties_cache, &key, proplist, pool)); 2701 } 2702 else 2703 { 2704 /* return an empty prop list if the node doesn't have any props */ 2705 proplist = apr_hash_make(pool); 2706 } 2707 2708 *proplist_p = proplist; 2709 2710 return SVN_NO_ERROR; 2711} 2712 2713svn_error_t * 2714svn_fs_fs__get_changes(apr_array_header_t **changes, 2715 svn_fs_t *fs, 2716 svn_revnum_t rev, 2717 apr_pool_t *result_pool) 2718{ 2719 apr_off_t changes_offset = SVN_FS_FS__ITEM_INDEX_CHANGES; 2720 svn_fs_fs__revision_file_t *revision_file; 2721 svn_boolean_t found; 2722 fs_fs_data_t *ffd = fs->fsap_data; 2723 apr_pool_t *scratch_pool = svn_pool_create(result_pool); 2724 2725 /* try cache lookup first */ 2726 2727 if (ffd->changes_cache) 2728 { 2729 SVN_ERR(svn_cache__get((void **) changes, &found, ffd->changes_cache, 2730 &rev, result_pool)); 2731 } 2732 else 2733 { 2734 found = FALSE; 2735 } 2736 2737 if (!found) 2738 { 2739 /* read changes from revision file */ 2740 2741 SVN_ERR(svn_fs_fs__ensure_revision_exists(rev, fs, scratch_pool)); 2742 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&revision_file, fs, rev, 2743 scratch_pool, scratch_pool)); 2744 2745 if (use_block_read(fs)) 2746 { 2747 /* 'block-read' will also provide us with the desired data */ 2748 SVN_ERR(block_read((void **)changes, fs, 2749 rev, SVN_FS_FS__ITEM_INDEX_CHANGES, 2750 revision_file, result_pool, scratch_pool)); 2751 } 2752 else 2753 { 2754 /* Addressing is very different for old formats 2755 * (needs to read the revision trailer). */ 2756 if (svn_fs_fs__use_log_addressing(fs)) 2757 SVN_ERR(svn_fs_fs__item_offset(&changes_offset, fs, 2758 revision_file, rev, NULL, 2759 SVN_FS_FS__ITEM_INDEX_CHANGES, 2760 scratch_pool)); 2761 else 2762 SVN_ERR(get_root_changes_offset(NULL, &changes_offset, 2763 revision_file, fs, rev, 2764 scratch_pool)); 2765 2766 /* Actual reading and parsing are the same, though. */ 2767 SVN_ERR(aligned_seek(fs, revision_file->file, NULL, changes_offset, 2768 scratch_pool)); 2769 SVN_ERR(svn_fs_fs__read_changes(changes, revision_file->stream, 2770 result_pool, scratch_pool)); 2771 2772 /* cache for future reference */ 2773 2774 if (ffd->changes_cache) 2775 { 2776 /* Guesstimate for the size of the in-cache representation. */ 2777 apr_size_t estimated_size = (apr_size_t)250 * (*changes)->nelts; 2778 2779 /* Don't even serialize data that probably won't fit into the 2780 * cache. This often implies that either CHANGES is very 2781 * large, memory is scarce or both. Having a huge temporary 2782 * copy would not be a good thing in either case. */ 2783 if (svn_cache__is_cachable(ffd->changes_cache, estimated_size)) 2784 SVN_ERR(svn_cache__set(ffd->changes_cache, &rev, *changes, 2785 scratch_pool)); 2786 } 2787 } 2788 2789 SVN_ERR(svn_fs_fs__close_revision_file(revision_file)); 2790 } 2791 2792 SVN_ERR(dbg_log_access(fs, rev, changes_offset, *changes, 2793 SVN_FS_FS__ITEM_TYPE_CHANGES, scratch_pool)); 2794 2795 svn_pool_destroy(scratch_pool); 2796 return SVN_NO_ERROR; 2797} 2798 2799/* Inialize the representation read state RS for the given REP_HEADER and 2800 * p2l index ENTRY. If not NULL, assign FILE and STREAM to RS. 2801 * Use RESULT_POOL for allocations. 2802 */ 2803static svn_error_t * 2804init_rep_state(rep_state_t *rs, 2805 svn_fs_fs__rep_header_t *rep_header, 2806 svn_fs_t *fs, 2807 svn_fs_fs__revision_file_t *file, 2808 svn_fs_fs__p2l_entry_t* entry, 2809 apr_pool_t *result_pool) 2810{ 2811 fs_fs_data_t *ffd = fs->fsap_data; 2812 shared_file_t *shared_file = apr_pcalloc(result_pool, sizeof(*shared_file)); 2813 2814 /* this function does not apply to representation containers */ 2815 SVN_ERR_ASSERT(entry->type >= SVN_FS_FS__ITEM_TYPE_FILE_REP 2816 && entry->type <= SVN_FS_FS__ITEM_TYPE_DIR_PROPS); 2817 2818 shared_file->rfile = file; 2819 shared_file->fs = fs; 2820 shared_file->revision = entry->item.revision; 2821 shared_file->pool = result_pool; 2822 2823 rs->sfile = shared_file; 2824 rs->revision = entry->item.revision; 2825 rs->item_index = entry->item.number; 2826 rs->header_size = rep_header->header_size; 2827 rs->start = entry->offset + rs->header_size; 2828 rs->current = rep_header->type == svn_fs_fs__rep_plain ? 0 : 4; 2829 rs->size = entry->size - rep_header->header_size - 7; 2830 rs->ver = 1; 2831 rs->chunk_index = 0; 2832 rs->raw_window_cache = ffd->raw_window_cache; 2833 rs->window_cache = ffd->txdelta_window_cache; 2834 rs->combined_cache = ffd->combined_window_cache; 2835 2836 return SVN_NO_ERROR; 2837} 2838 2839/* Implement svn_cache__partial_getter_func_t for txdelta windows. 2840 * Instead of the whole window data, return only END_OFFSET member. 2841 */ 2842static svn_error_t * 2843get_txdelta_window_end(void **out, 2844 const void *data, 2845 apr_size_t data_len, 2846 void *baton, 2847 apr_pool_t *result_pool) 2848{ 2849 const svn_fs_fs__txdelta_cached_window_t *window 2850 = (const svn_fs_fs__txdelta_cached_window_t *)data; 2851 *(apr_off_t*)out = window->end_offset; 2852 2853 return SVN_NO_ERROR; 2854} 2855 2856/* Implement svn_cache__partial_getter_func_t for raw windows. 2857 * Instead of the whole window data, return only END_OFFSET member. 2858 */ 2859static svn_error_t * 2860get_raw_window_end(void **out, 2861 const void *data, 2862 apr_size_t data_len, 2863 void *baton, 2864 apr_pool_t *result_pool) 2865{ 2866 const svn_fs_fs__raw_cached_window_t *window 2867 = (const svn_fs_fs__raw_cached_window_t *)data; 2868 *(apr_off_t*)out = window->end_offset; 2869 2870 return SVN_NO_ERROR; 2871} 2872 2873/* Walk through all windows in the representation addressed by RS in FS 2874 * (excluding the delta bases) and put those not already cached into the 2875 * window caches. If MAX_OFFSET is not -1, don't read windows that start 2876 * at or beyond that offset. Use POOL for temporary allocations. 2877 * 2878 * This function requires RS->RAW_WINDOW_CACHE and RS->WINDOW_CACHE to 2879 * be non-NULL. 2880 */ 2881static svn_error_t * 2882cache_windows(svn_fs_t *fs, 2883 rep_state_t *rs, 2884 apr_off_t max_offset, 2885 apr_pool_t *pool) 2886{ 2887 apr_pool_t *iterpool = svn_pool_create(pool); 2888 while (rs->current < rs->size) 2889 { 2890 apr_off_t end_offset; 2891 svn_boolean_t found = FALSE; 2892 window_cache_key_t key = { 0 }; 2893 2894 svn_pool_clear(iterpool); 2895 2896 if (max_offset != -1 && rs->start + rs->current >= max_offset) 2897 { 2898 svn_pool_destroy(iterpool); 2899 return SVN_NO_ERROR; 2900 } 2901 2902 /* We don't need to read the data again if it is already in cache. 2903 * It might be cached as either raw or parsed window. 2904 */ 2905 SVN_ERR(svn_cache__get_partial((void **) &end_offset, &found, 2906 rs->raw_window_cache, 2907 get_window_key(&key, rs), 2908 get_raw_window_end, NULL, 2909 iterpool)); 2910 if (! found) 2911 SVN_ERR(svn_cache__get_partial((void **) &end_offset, &found, 2912 rs->window_cache, &key, 2913 get_txdelta_window_end, NULL, 2914 iterpool)); 2915 2916 if (found) 2917 { 2918 rs->current = end_offset; 2919 } 2920 else 2921 { 2922 /* Read, decode and cache the window. */ 2923 svn_fs_fs__raw_cached_window_t window; 2924 apr_off_t start_offset = rs->start + rs->current; 2925 apr_size_t window_len; 2926 char *buf; 2927 2928 /* navigate to the current window */ 2929 SVN_ERR(rs_aligned_seek(rs, NULL, start_offset, iterpool)); 2930 SVN_ERR(svn_txdelta__read_raw_window_len(&window_len, 2931 rs->sfile->rfile->stream, 2932 iterpool)); 2933 2934 /* Read the raw window. */ 2935 buf = apr_palloc(iterpool, window_len + 1); 2936 SVN_ERR(rs_aligned_seek(rs, NULL, start_offset, iterpool)); 2937 SVN_ERR(svn_io_file_read_full2(rs->sfile->rfile->file, buf, 2938 window_len, NULL, NULL, iterpool)); 2939 buf[window_len] = 0; 2940 2941 /* update relative offset in representation */ 2942 rs->current += window_len; 2943 2944 /* Construct the cachable raw window object. */ 2945 window.end_offset = rs->current; 2946 window.window.len = window_len; 2947 window.window.data = buf; 2948 2949 /* cache the window now */ 2950 SVN_ERR(svn_cache__set(rs->raw_window_cache, &key, &window, 2951 iterpool)); 2952 } 2953 2954 if (rs->current > rs->size) 2955 return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, 2956 _("Reading one svndiff window read beyond " 2957 "the end of the representation")); 2958 2959 rs->chunk_index++; 2960 } 2961 2962 svn_pool_destroy(iterpool); 2963 return SVN_NO_ERROR; 2964} 2965 2966/* Read all txdelta / plain windows following REP_HEADER in FS as described 2967 * by ENTRY. Read the data from the already open FILE and the wrapping 2968 * STREAM object. If MAX_OFFSET is not -1, don't read windows that start 2969 * at or beyond that offset. Use SCRATCH_POOL for temporary allocations. 2970 * If caching is not enabled, this is a no-op. 2971 */ 2972static svn_error_t * 2973block_read_windows(svn_fs_fs__rep_header_t *rep_header, 2974 svn_fs_t *fs, 2975 svn_fs_fs__revision_file_t *rev_file, 2976 svn_fs_fs__p2l_entry_t* entry, 2977 apr_off_t max_offset, 2978 apr_pool_t *result_pool, 2979 apr_pool_t *scratch_pool) 2980{ 2981 fs_fs_data_t *ffd = fs->fsap_data; 2982 rep_state_t rs = { 0 }; 2983 apr_off_t offset; 2984 window_cache_key_t key = { 0 }; 2985 2986 if ( (rep_header->type != svn_fs_fs__rep_plain 2987 && (!ffd->txdelta_window_cache || !ffd->raw_window_cache)) 2988 || (rep_header->type == svn_fs_fs__rep_plain 2989 && !ffd->combined_window_cache)) 2990 return SVN_NO_ERROR; 2991 2992 SVN_ERR(init_rep_state(&rs, rep_header, fs, rev_file, entry, 2993 result_pool)); 2994 2995 /* RS->FILE may be shared between RS instances -> make sure we point 2996 * to the right data. */ 2997 offset = rs.start + rs.current; 2998 if (rep_header->type == svn_fs_fs__rep_plain) 2999 { 3000 svn_stringbuf_t *plaintext; 3001 svn_boolean_t is_cached; 3002 3003 /* already in cache? */ 3004 SVN_ERR(svn_cache__has_key(&is_cached, rs.combined_cache, 3005 get_window_key(&key, &rs), 3006 scratch_pool)); 3007 if (is_cached) 3008 return SVN_NO_ERROR; 3009 3010 /* for larger reps, the header may have crossed a block boundary. 3011 * make sure we still read blocks properly aligned, i.e. don't use 3012 * plain seek here. */ 3013 SVN_ERR(aligned_seek(fs, rev_file->file, NULL, offset, scratch_pool)); 3014 3015 plaintext = svn_stringbuf_create_ensure(rs.size, result_pool); 3016 SVN_ERR(svn_io_file_read_full2(rev_file->file, plaintext->data, 3017 rs.size, &plaintext->len, NULL, 3018 result_pool)); 3019 plaintext->data[plaintext->len] = 0; 3020 rs.current += rs.size; 3021 3022 SVN_ERR(set_cached_combined_window(plaintext, &rs, scratch_pool)); 3023 } 3024 else 3025 { 3026 SVN_ERR(cache_windows(fs, &rs, max_offset, scratch_pool)); 3027 } 3028 3029 return SVN_NO_ERROR; 3030} 3031 3032/* Try to get the representation header identified by KEY from FS's cache. 3033 * If it has not been cached, read it from the current position in STREAM 3034 * and put it into the cache (if caching has been enabled for rep headers). 3035 * Return the result in *REP_HEADER. Use POOL for allocations. 3036 */ 3037static svn_error_t * 3038read_rep_header(svn_fs_fs__rep_header_t **rep_header, 3039 svn_fs_t *fs, 3040 svn_stream_t *stream, 3041 pair_cache_key_t *key, 3042 apr_pool_t *result_pool, 3043 apr_pool_t *scratch_pool) 3044{ 3045 fs_fs_data_t *ffd = fs->fsap_data; 3046 svn_boolean_t is_cached = FALSE; 3047 3048 if (ffd->rep_header_cache) 3049 { 3050 SVN_ERR(svn_cache__get((void**)rep_header, &is_cached, 3051 ffd->rep_header_cache, key, 3052 result_pool)); 3053 if (is_cached) 3054 return SVN_NO_ERROR; 3055 } 3056 3057 SVN_ERR(svn_fs_fs__read_rep_header(rep_header, stream, result_pool, 3058 scratch_pool)); 3059 3060 if (ffd->rep_header_cache) 3061 SVN_ERR(svn_cache__set(ffd->rep_header_cache, key, *rep_header, 3062 scratch_pool)); 3063 3064 return SVN_NO_ERROR; 3065} 3066 3067/* Fetch the representation data (header, txdelta / plain windows) 3068 * addressed by ENTRY->ITEM in FS and cache it if caches are enabled. 3069 * Read the data from the already open FILE and the wrapping 3070 * STREAM object. If MAX_OFFSET is not -1, don't read windows that start 3071 * at or beyond that offset. 3072 * Use SCRATCH_POOL for temporary allocations. 3073 */ 3074static svn_error_t * 3075block_read_contents(svn_fs_t *fs, 3076 svn_fs_fs__revision_file_t *rev_file, 3077 svn_fs_fs__p2l_entry_t* entry, 3078 apr_off_t max_offset, 3079 apr_pool_t *result_pool, 3080 apr_pool_t *scratch_pool) 3081{ 3082 pair_cache_key_t header_key = { 0 }; 3083 svn_fs_fs__rep_header_t *rep_header; 3084 3085 header_key.revision = (apr_int32_t)entry->item.revision; 3086 header_key.second = entry->item.number; 3087 3088 SVN_ERR(read_rep_header(&rep_header, fs, rev_file->stream, &header_key, 3089 result_pool, scratch_pool)); 3090 SVN_ERR(block_read_windows(rep_header, fs, rev_file, entry, max_offset, 3091 result_pool, scratch_pool)); 3092 3093 return SVN_NO_ERROR; 3094} 3095 3096/* For the given REV_FILE in FS, in *STREAM return a stream covering the 3097 * item specified by ENTRY. Also, verify the item's content by low-level 3098 * checksum. Allocate the result in POOL. 3099 */ 3100static svn_error_t * 3101read_item(svn_stream_t **stream, 3102 svn_fs_t *fs, 3103 svn_fs_fs__revision_file_t *rev_file, 3104 svn_fs_fs__p2l_entry_t* entry, 3105 apr_pool_t *pool) 3106{ 3107 apr_uint32_t digest; 3108 svn_checksum_t *expected, *actual; 3109 apr_uint32_t plain_digest; 3110 3111 /* Read item into string buffer. */ 3112 svn_stringbuf_t *text = svn_stringbuf_create_ensure(entry->size, pool); 3113 text->len = entry->size; 3114 text->data[text->len] = 0; 3115 SVN_ERR(svn_io_file_read_full2(rev_file->file, text->data, text->len, 3116 NULL, NULL, pool)); 3117 3118 /* Return (construct, calculate) stream and checksum. */ 3119 *stream = svn_stream_from_stringbuf(text, pool); 3120 digest = svn__fnv1a_32x4(text->data, text->len); 3121 3122 /* Checksums will match most of the time. */ 3123 if (entry->fnv1_checksum == digest) 3124 return SVN_NO_ERROR; 3125 3126 /* Construct proper checksum objects from their digests to allow for 3127 * nice error messages. */ 3128 plain_digest = htonl(entry->fnv1_checksum); 3129 expected = svn_checksum__from_digest_fnv1a_32x4( 3130 (const unsigned char *)&plain_digest, pool); 3131 plain_digest = htonl(digest); 3132 actual = svn_checksum__from_digest_fnv1a_32x4( 3133 (const unsigned char *)&plain_digest, pool); 3134 3135 /* Construct the full error message with all the info we have. */ 3136 return svn_checksum_mismatch_err(expected, actual, pool, 3137 _("Low-level checksum mismatch while reading\n" 3138 "%s bytes of meta data at offset %s " 3139 "for item %s in revision %ld"), 3140 apr_psprintf(pool, "%" APR_OFF_T_FMT, entry->size), 3141 apr_psprintf(pool, "%" APR_OFF_T_FMT, entry->offset), 3142 apr_psprintf(pool, "%" APR_UINT64_T_FMT, entry->item.number), 3143 entry->item.revision); 3144} 3145 3146/* If not already cached or if MUST_READ is set, read the changed paths 3147 * list addressed by ENTRY in FS and ret��rn it in *CHANGES. Cache the 3148 * result if caching is enabled. Read the data from the already open 3149 * FILE and wrapping FILE_STREAM. Use POOL for allocations. 3150 */ 3151static svn_error_t * 3152block_read_changes(apr_array_header_t **changes, 3153 svn_fs_t *fs, 3154 svn_fs_fs__revision_file_t *rev_file, 3155 svn_fs_fs__p2l_entry_t *entry, 3156 svn_boolean_t must_read, 3157 apr_pool_t *result_pool, 3158 apr_pool_t *scratch_pool) 3159{ 3160 fs_fs_data_t *ffd = fs->fsap_data; 3161 svn_stream_t *stream; 3162 if (!must_read && !ffd->changes_cache) 3163 return SVN_NO_ERROR; 3164 3165 /* already in cache? */ 3166 if (!must_read && ffd->changes_cache) 3167 { 3168 svn_boolean_t is_cached; 3169 SVN_ERR(svn_cache__has_key(&is_cached, ffd->changes_cache, 3170 &entry->item.revision, 3171 scratch_pool)); 3172 if (is_cached) 3173 return SVN_NO_ERROR; 3174 } 3175 3176 SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool)); 3177 3178 /* read changes from revision file */ 3179 SVN_ERR(svn_fs_fs__read_changes(changes, stream, result_pool, 3180 scratch_pool)); 3181 3182 /* cache for future reference */ 3183 if (ffd->changes_cache) 3184 SVN_ERR(svn_cache__set(ffd->changes_cache, &entry->item.revision, 3185 *changes, scratch_pool)); 3186 3187 return SVN_NO_ERROR; 3188} 3189 3190/* If not already cached or if MUST_READ is set, read the nod revision 3191 * addressed by ENTRY in FS and ret��rn it in *NODEREV_P. Cache the 3192 * result if caching is enabled. Read the data from the already open 3193 * FILE and wrapping FILE_STREAM. Use SCRATCH_POOL for temporary allocations. 3194 */ 3195static svn_error_t * 3196block_read_noderev(node_revision_t **noderev_p, 3197 svn_fs_t *fs, 3198 svn_fs_fs__revision_file_t *rev_file, 3199 svn_fs_fs__p2l_entry_t *entry, 3200 svn_boolean_t must_read, 3201 apr_pool_t *result_pool, 3202 apr_pool_t *scratch_pool) 3203{ 3204 fs_fs_data_t *ffd = fs->fsap_data; 3205 svn_stream_t *stream; 3206 3207 pair_cache_key_t key = { 0 }; 3208 key.revision = entry->item.revision; 3209 key.second = entry->item.number; 3210 3211 if (!must_read && !ffd->node_revision_cache) 3212 return SVN_NO_ERROR; 3213 3214 /* already in cache? */ 3215 if (!must_read && ffd->node_revision_cache) 3216 { 3217 svn_boolean_t is_cached; 3218 SVN_ERR(svn_cache__has_key(&is_cached, ffd->node_revision_cache, 3219 &key, scratch_pool)); 3220 if (is_cached) 3221 return SVN_NO_ERROR; 3222 } 3223 3224 SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool)); 3225 3226 /* read node rev from revision file */ 3227 SVN_ERR(svn_fs_fs__read_noderev(noderev_p, stream, 3228 result_pool, scratch_pool)); 3229 3230 /* Workaround issue #4031: is-fresh-txn-root in revision files. */ 3231 (*noderev_p)->is_fresh_txn_root = FALSE; 3232 3233 if (ffd->node_revision_cache) 3234 SVN_ERR(svn_cache__set(ffd->node_revision_cache, &key, *noderev_p, 3235 scratch_pool)); 3236 3237 return SVN_NO_ERROR; 3238} 3239 3240/* Read the whole (e.g. 64kB) block containing ITEM_INDEX of REVISION in FS 3241 * and put all data into cache. If necessary and depending on heuristics, 3242 * neighboring blocks may also get read. The data is being read from 3243 * already open REVISION_FILE, which must be the correct rev / pack file 3244 * w.r.t. REVISION. 3245 * 3246 * For noderevs and changed path lists, the item fetched can be allocated 3247 * RESULT_POOL and returned in *RESULT. Otherwise, RESULT must be NULL. 3248 */ 3249static svn_error_t * 3250block_read(void **result, 3251 svn_fs_t *fs, 3252 svn_revnum_t revision, 3253 apr_uint64_t item_index, 3254 svn_fs_fs__revision_file_t *revision_file, 3255 apr_pool_t *result_pool, 3256 apr_pool_t *scratch_pool) 3257{ 3258 fs_fs_data_t *ffd = fs->fsap_data; 3259 apr_off_t offset, wanted_offset = 0; 3260 apr_off_t block_start = 0; 3261 apr_array_header_t *entries; 3262 int run_count = 0; 3263 int i; 3264 apr_pool_t *iterpool; 3265 3266 /* Block read is an optional feature. If the caller does not want anything 3267 * specific we may not have to read anything. */ 3268 if (!result) 3269 return SVN_NO_ERROR; 3270 3271 iterpool = svn_pool_create(scratch_pool); 3272 3273 /* don't try this on transaction protorev files */ 3274 SVN_ERR_ASSERT(SVN_IS_VALID_REVNUM(revision)); 3275 3276 /* index lookup: find the OFFSET of the item we *must* read plus (in the 3277 * "do-while" block) the list of items in the same block. */ 3278 SVN_ERR(svn_fs_fs__item_offset(&wanted_offset, fs, revision_file, 3279 revision, NULL, item_index, iterpool)); 3280 3281 offset = wanted_offset; 3282 3283 /* Heuristics: 3284 * 3285 * Read this block. If the last item crosses the block boundary, read 3286 * the next block but stop there. Because cross-boundary items cause 3287 * blocks to be read twice, this heuristics will limit this effect to 3288 * approx. 50% of blocks, probably less, while providing a sensible 3289 * amount of read-ahead. 3290 */ 3291 do 3292 { 3293 /* fetch list of items in the block surrounding OFFSET */ 3294 block_start = offset - (offset % ffd->block_size); 3295 SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, fs, revision_file, 3296 revision, block_start, 3297 ffd->block_size, scratch_pool, 3298 scratch_pool)); 3299 3300 SVN_ERR(aligned_seek(fs, revision_file->file, &block_start, offset, 3301 iterpool)); 3302 3303 /* read all items from the block */ 3304 for (i = 0; i < entries->nelts; ++i) 3305 { 3306 svn_boolean_t is_result, is_wanted; 3307 apr_pool_t *pool; 3308 svn_fs_fs__p2l_entry_t* entry; 3309 3310 svn_pool_clear(iterpool); 3311 3312 /* skip empty sections */ 3313 entry = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t); 3314 if (entry->type == SVN_FS_FS__ITEM_TYPE_UNUSED) 3315 continue; 3316 3317 /* the item / container we were looking for? */ 3318 is_wanted = entry->offset == wanted_offset 3319 && entry->item.revision == revision 3320 && entry->item.number == item_index; 3321 is_result = result && is_wanted; 3322 3323 /* select the pool that we want the item to be allocated in */ 3324 pool = is_result ? result_pool : iterpool; 3325 3326 /* handle all items that start within this block and are relatively 3327 * small (i.e. < block size). Always read the item we need to return. 3328 */ 3329 if (is_result || ( entry->offset >= block_start 3330 && entry->size < ffd->block_size)) 3331 { 3332 void *item = NULL; 3333 SVN_ERR(svn_io_file_seek(revision_file->file, APR_SET, 3334 &entry->offset, iterpool)); 3335 switch (entry->type) 3336 { 3337 case SVN_FS_FS__ITEM_TYPE_FILE_REP: 3338 case SVN_FS_FS__ITEM_TYPE_DIR_REP: 3339 case SVN_FS_FS__ITEM_TYPE_FILE_PROPS: 3340 case SVN_FS_FS__ITEM_TYPE_DIR_PROPS: 3341 SVN_ERR(block_read_contents(fs, revision_file, entry, 3342 is_wanted 3343 ? -1 3344 : block_start + ffd->block_size, 3345 pool, iterpool)); 3346 break; 3347 3348 case SVN_FS_FS__ITEM_TYPE_NODEREV: 3349 if (ffd->node_revision_cache || is_result) 3350 SVN_ERR(block_read_noderev((node_revision_t **)&item, 3351 fs, revision_file, 3352 entry, is_result, pool, 3353 iterpool)); 3354 break; 3355 3356 case SVN_FS_FS__ITEM_TYPE_CHANGES: 3357 SVN_ERR(block_read_changes((apr_array_header_t **)&item, 3358 fs, revision_file, 3359 entry, is_result, 3360 pool, iterpool)); 3361 break; 3362 3363 default: 3364 break; 3365 } 3366 3367 if (is_result) 3368 *result = item; 3369 3370 /* if we crossed a block boundary, read the remainder of 3371 * the last block as well */ 3372 offset = entry->offset + entry->size; 3373 if (offset > block_start + ffd->block_size) 3374 ++run_count; 3375 } 3376 } 3377 3378 } 3379 while(run_count++ == 1); /* can only be true once and only if a block 3380 * boundary got crossed */ 3381 3382 /* if the caller requested a result, we must have provided one by now */ 3383 assert(!result || *result); 3384 svn_pool_destroy(iterpool); 3385 3386 return SVN_NO_ERROR; 3387} 3388