stats.c revision 362181
1/* stats.c -- implements the svn_fs_fs__get_stats private API. 2 * 3 * ==================================================================== 4 * Licensed to the Apache Software Foundation (ASF) under one 5 * or more contributor license agreements. See the NOTICE file 6 * distributed with this work for additional information 7 * regarding copyright ownership. The ASF licenses this file 8 * to you under the Apache License, Version 2.0 (the 9 * "License"); you may not use this file except in compliance 10 * with the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, 15 * software distributed under the License is distributed on an 16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 * KIND, either express or implied. See the License for the 18 * specific language governing permissions and limitations 19 * under the License. 20 * ==================================================================== 21 */ 22 23#include "svn_dirent_uri.h" 24#include "svn_fs.h" 25#include "svn_pools.h" 26#include "svn_sorts.h" 27 28#include "private/svn_cache.h" 29#include "private/svn_sorts_private.h" 30#include "private/svn_string_private.h" 31 32#include "index.h" 33#include "pack.h" 34#include "rev_file.h" 35#include "util.h" 36#include "fs_fs.h" 37#include "cached_data.h" 38#include "low_level.h" 39#include "revprops.h" 40 41#include "../libsvn_fs/fs-loader.h" 42 43#include "svn_private_config.h" 44 45/* We group representations into 2x2 different kinds plus one default: 46 * [dir / file] x [text / prop]. The assignment is done by the first node 47 * that references the respective representation. 48 */ 49typedef enum rep_kind_t 50{ 51 /* The representation is not used _directly_, i.e. not referenced by any 52 * noderev. However, some other representation may use it as delta base. 53 * Null value. Should not occur in real-word repositories. */ 54 unused_rep, 55 56 /* a properties on directory rep */ 57 dir_property_rep, 58 59 /* a properties on file rep */ 60 file_property_rep, 61 62 /* a directory rep */ 63 dir_rep, 64 65 /* a file rep */ 66 file_rep 67} rep_kind_t; 68 69/* A representation fragment. 70 */ 71typedef struct rep_stats_t 72{ 73 /* offset in the revision file (phys. addressing) / 74 * item index within REVISION (log. addressing) */ 75 apr_uint64_t item_index; 76 77 /* item length in bytes */ 78 apr_uint64_t size; 79 80 /* item length after de-deltification */ 81 apr_uint64_t expanded_size; 82 83 /* revision that contains this representation 84 * (may be referenced by other revisions, though) */ 85 svn_revnum_t revision; 86 87 /* number of nodes that reference this representation */ 88 apr_uint32_t ref_count; 89 90 /* length of the PLAIN / DELTA line in the source file in bytes */ 91 apr_uint16_t header_size; 92 93 /* classification of the representation. values of rep_kind_t */ 94 char kind; 95 96 /* length of the delta chain, including this representation, 97 * saturated to 255 - if need be */ 98 apr_byte_t chain_length; 99} rep_stats_t; 100 101/* Represents a link in the rep delta chain. REVISION + ITEM_INDEX points 102 * to BASE_REVISION + BASE_ITEM_INDEX. We collect this info while scanning 103 * a f7 repo in a single pass and resolve it afterwards. */ 104typedef struct rep_ref_t 105{ 106 /* Revision that contains this representation. */ 107 svn_revnum_t revision; 108 109 /* Item index of this rep within REVISION. */ 110 apr_uint64_t item_index; 111 112 /* Revision of the representation we deltified against. 113 * -1 if this representation is either PLAIN or a self-delta. */ 114 svn_revnum_t base_revision; 115 116 /* Item index of that rep within BASE_REVISION. */ 117 apr_uint64_t base_item_index; 118 119 /* Length of the PLAIN / DELTA line in the source file in bytes. 120 * We use this to update the info in the rep stats after scanning the 121 * whole file. */ 122 apr_uint16_t header_size; 123 124} rep_ref_t; 125 126/* Represents a single revision. 127 * There will be only one instance per revision. */ 128typedef struct revision_info_t 129{ 130 /* number of this revision */ 131 svn_revnum_t revision; 132 133 /* pack file offset (manifest value), 0 for non-packed files */ 134 apr_off_t offset; 135 136 /* length of the changes list on bytes */ 137 apr_uint64_t changes_len; 138 139 /* offset of the changes list relative to OFFSET */ 140 apr_uint64_t change_count; 141 142 /* first offset behind the revision data in the pack file (file length 143 * for non-packed revs) */ 144 apr_off_t end; 145 146 /* number of directory noderevs in this revision */ 147 apr_uint64_t dir_noderev_count; 148 149 /* number of file noderevs in this revision */ 150 apr_uint64_t file_noderev_count; 151 152 /* total size of directory noderevs (i.e. the structs - not the rep) */ 153 apr_uint64_t dir_noderev_size; 154 155 /* total size of file noderevs (i.e. the structs - not the rep) */ 156 apr_uint64_t file_noderev_size; 157 158 /* all rep_stats_t of this revision (in no particular order), 159 * i.e. those that point back to this struct */ 160 apr_array_header_t *representations; 161 162 /* Temporary rev / pack file access object, used in phys. addressing 163 * mode only. NULL when done reading this revision. */ 164 svn_fs_fs__revision_file_t *rev_file; 165} revision_info_t; 166 167/* Root data structure containing all information about a given repository. 168 * We use it as a wrapper around svn_fs_t and pass it around where we would 169 * otherwise just use a svn_fs_t. 170 */ 171typedef struct query_t 172{ 173 /* FS API object*/ 174 svn_fs_t *fs; 175 176 /* The HEAD revision. */ 177 svn_revnum_t head; 178 179 /* Number of revs per shard; 0 for non-sharded repos. */ 180 int shard_size; 181 182 /* First non-packed revision. */ 183 svn_revnum_t min_unpacked_rev; 184 185 /* all revisions */ 186 apr_array_header_t *revisions; 187 188 /* empty representation. 189 * Used as a dummy base for DELTA reps without base. */ 190 rep_stats_t *null_base; 191 192 /* collected statistics */ 193 svn_fs_fs__stats_t *stats; 194 195 /* Progress notification callback to call after each shard. May be NULL. */ 196 svn_fs_progress_notify_func_t progress_func; 197 198 /* Baton for PROGRESS_FUNC. */ 199 void *progress_baton; 200 201 /* Cancellation support callback to call once in a while. May be NULL. */ 202 svn_cancel_func_t cancel_func; 203 204 /* Baton for CANCEL_FUNC. */ 205 void *cancel_baton; 206} query_t; 207 208/* Initialize the LARGEST_CHANGES member in STATS with a capacity of COUNT 209 * entries. Allocate the result in RESULT_POOL. 210 */ 211static void 212initialize_largest_changes(svn_fs_fs__stats_t *stats, 213 apr_size_t count, 214 apr_pool_t *result_pool) 215{ 216 apr_size_t i; 217 218 stats->largest_changes = apr_pcalloc(result_pool, 219 sizeof(*stats->largest_changes)); 220 stats->largest_changes->count = count; 221 stats->largest_changes->min_size = 1; 222 stats->largest_changes->changes 223 = apr_palloc(result_pool, count * sizeof(*stats->largest_changes->changes)); 224 225 /* allocate *all* entries before the path stringbufs. This increases 226 * cache locality and enhances performance significantly. */ 227 for (i = 0; i < count; ++i) 228 stats->largest_changes->changes[i] 229 = apr_palloc(result_pool, sizeof(**stats->largest_changes->changes)); 230 231 /* now initialize them and allocate the stringbufs */ 232 for (i = 0; i < count; ++i) 233 { 234 stats->largest_changes->changes[i]->size = 0; 235 stats->largest_changes->changes[i]->revision = SVN_INVALID_REVNUM; 236 stats->largest_changes->changes[i]->path 237 = svn_stringbuf_create_ensure(1024, result_pool); 238 } 239} 240 241/* Add entry for SIZE to HISTOGRAM. 242 */ 243static void 244add_to_histogram(svn_fs_fs__histogram_t *histogram, 245 apr_int64_t size) 246{ 247 apr_int64_t shift = 0; 248 249 while (((apr_int64_t)(1) << shift) <= size) 250 shift++; 251 252 histogram->total.count++; 253 histogram->total.sum += size; 254 histogram->lines[(apr_size_t)shift].count++; 255 histogram->lines[(apr_size_t)shift].sum += size; 256} 257 258/* Update data aggregators in STATS with this representation of type KIND, 259 * on-disk REP_SIZE and expanded node size EXPANDED_SIZE for PATH in REVSION. 260 * PLAIN_ADDED indicates whether the node has a deltification predecessor. 261 */ 262static void 263add_change(svn_fs_fs__stats_t *stats, 264 apr_uint64_t rep_size, 265 apr_uint64_t expanded_size, 266 svn_revnum_t revision, 267 const char *path, 268 rep_kind_t kind, 269 svn_boolean_t plain_added) 270{ 271 /* identify largest reps */ 272 if (rep_size >= stats->largest_changes->min_size) 273 { 274 apr_size_t i; 275 svn_fs_fs__largest_changes_t *largest_changes = stats->largest_changes; 276 svn_fs_fs__large_change_info_t *info 277 = largest_changes->changes[largest_changes->count - 1]; 278 info->size = rep_size; 279 info->revision = revision; 280 svn_stringbuf_set(info->path, path); 281 282 /* linear insertion but not too bad since count is low and insertions 283 * near the end are more likely than close to front */ 284 for (i = largest_changes->count - 1; i > 0; --i) 285 if (largest_changes->changes[i-1]->size >= rep_size) 286 break; 287 else 288 largest_changes->changes[i] = largest_changes->changes[i-1]; 289 290 largest_changes->changes[i] = info; 291 largest_changes->min_size 292 = largest_changes->changes[largest_changes->count-1]->size; 293 } 294 295 /* global histograms */ 296 add_to_histogram(&stats->rep_size_histogram, rep_size); 297 add_to_histogram(&stats->node_size_histogram, expanded_size); 298 299 if (plain_added) 300 { 301 add_to_histogram(&stats->added_rep_size_histogram, rep_size); 302 add_to_histogram(&stats->added_node_size_histogram, expanded_size); 303 } 304 305 /* specific histograms by type */ 306 switch (kind) 307 { 308 case unused_rep: 309 add_to_histogram(&stats->unused_rep_histogram, rep_size); 310 break; 311 case dir_property_rep: 312 add_to_histogram(&stats->dir_prop_rep_histogram, rep_size); 313 add_to_histogram(&stats->dir_prop_histogram, expanded_size); 314 break; 315 case file_property_rep: 316 add_to_histogram(&stats->file_prop_rep_histogram, rep_size); 317 add_to_histogram(&stats->file_prop_histogram, expanded_size); 318 break; 319 case dir_rep: 320 add_to_histogram(&stats->dir_rep_histogram, rep_size); 321 add_to_histogram(&stats->dir_histogram, expanded_size); 322 break; 323 case file_rep: 324 add_to_histogram(&stats->file_rep_histogram, rep_size); 325 add_to_histogram(&stats->file_histogram, expanded_size); 326 break; 327 } 328 329 /* by extension */ 330 if (kind == file_rep) 331 { 332 /* determine extension */ 333 svn_fs_fs__extension_info_t *info; 334 const char * file_name = strrchr(path, '/'); 335 const char * extension = file_name ? strrchr(file_name, '.') : NULL; 336 337 if (extension == NULL || extension == file_name + 1) 338 extension = "(none)"; 339 340 /* get / auto-insert entry for this extension */ 341 info = apr_hash_get(stats->by_extension, extension, APR_HASH_KEY_STRING); 342 if (info == NULL) 343 { 344 apr_pool_t *pool = apr_hash_pool_get(stats->by_extension); 345 info = apr_pcalloc(pool, sizeof(*info)); 346 info->extension = apr_pstrdup(pool, extension); 347 348 apr_hash_set(stats->by_extension, info->extension, 349 APR_HASH_KEY_STRING, info); 350 } 351 352 /* update per-extension histogram */ 353 add_to_histogram(&info->node_histogram, expanded_size); 354 add_to_histogram(&info->rep_histogram, rep_size); 355 } 356} 357 358/* Comparator used for binary search comparing the absolute file offset 359 * of a representation to some other offset. DATA is a *rep_stats_t, 360 * KEY is a pointer to an apr_uint64_t. 361 */ 362static int 363compare_representation_item_index(const void *data, const void *key) 364{ 365 apr_uint64_t lhs = (*(const rep_stats_t *const *)data)->item_index; 366 apr_uint64_t rhs = *(const apr_uint64_t *)key; 367 368 if (lhs < rhs) 369 return -1; 370 return (lhs > rhs ? 1 : 0); 371} 372 373/* Find the revision_info_t object to the given REVISION in QUERY and 374 * return it in *REVISION_INFO. For performance reasons, we skip the 375 * lookup if the info is already provided. 376 * 377 * In that revision, look for the rep_stats_t object for item ITEM_INDEX. 378 * If it already exists, set *IDX to its index in *REVISION_INFO's 379 * representations list and return the representation object. Otherwise, 380 * set the index to where it must be inserted and return NULL. 381 */ 382static rep_stats_t * 383find_representation(int *idx, 384 query_t *query, 385 revision_info_t **revision_info, 386 svn_revnum_t revision, 387 apr_uint64_t item_index) 388{ 389 revision_info_t *info; 390 *idx = -1; 391 392 /* first let's find the revision */ 393 info = revision_info ? *revision_info : NULL; 394 if (info == NULL || info->revision != revision) 395 { 396 info = APR_ARRAY_IDX(query->revisions, revision, revision_info_t*); 397 if (revision_info) 398 *revision_info = info; 399 } 400 401 /* not found -> no result */ 402 if (info == NULL) 403 return NULL; 404 405 /* look for the representation */ 406 *idx = svn_sort__bsearch_lower_bound(info->representations, 407 &item_index, 408 compare_representation_item_index); 409 if (*idx < info->representations->nelts) 410 { 411 /* return the representation, if this is the one we were looking for */ 412 rep_stats_t *result 413 = APR_ARRAY_IDX(info->representations, *idx, rep_stats_t *); 414 if (result->item_index == item_index) 415 return result; 416 } 417 418 /* not parsed, yet */ 419 return NULL; 420} 421 422/* Find / auto-construct the representation stats for REP in QUERY and 423 * return it in *REPRESENTATION. 424 * 425 * If necessary, allocate the result in RESULT_POOL; use SCRATCH_POOL for 426 * temporary allocations. 427 */ 428static svn_error_t * 429parse_representation(rep_stats_t **representation, 430 query_t *query, 431 representation_t *rep, 432 revision_info_t *revision_info, 433 apr_pool_t *result_pool, 434 apr_pool_t *scratch_pool) 435{ 436 rep_stats_t *result; 437 int idx; 438 439 /* read location (revision, offset) and size */ 440 441 /* look it up */ 442 result = find_representation(&idx, query, &revision_info, rep->revision, 443 rep->item_index); 444 if (!result) 445 { 446 /* not parsed, yet (probably a rep in the same revision). 447 * Create a new rep object and determine its base rep as well. 448 */ 449 result = apr_pcalloc(result_pool, sizeof(*result)); 450 result->revision = rep->revision; 451 result->expanded_size = rep->expanded_size; 452 result->item_index = rep->item_index; 453 result->size = rep->size; 454 455 /* In phys. addressing mode, follow link to the actual representation. 456 * In log. addressing mode, we will find it already as part of our 457 * linear walk through the whole file. */ 458 if (!svn_fs_fs__use_log_addressing(query->fs)) 459 { 460 svn_fs_fs__rep_header_t *header; 461 apr_off_t offset = revision_info->offset 462 + (apr_off_t)rep->item_index; 463 464 SVN_ERR_ASSERT(revision_info->rev_file); 465 SVN_ERR(svn_io_file_seek(revision_info->rev_file->file, APR_SET, 466 &offset, scratch_pool)); 467 SVN_ERR(svn_fs_fs__read_rep_header(&header, 468 revision_info->rev_file->stream, 469 scratch_pool, scratch_pool)); 470 471 result->header_size = header->header_size; 472 473 /* Determine length of the delta chain. */ 474 if (header->type == svn_fs_fs__rep_delta) 475 { 476 int base_idx; 477 rep_stats_t *base_rep 478 = find_representation(&base_idx, query, NULL, 479 header->base_revision, 480 header->base_item_index); 481 482 result->chain_length = 1 + MIN(base_rep->chain_length, 483 (apr_byte_t)0xfe); 484 } 485 else 486 { 487 result->chain_length = 1; 488 } 489 } 490 491 SVN_ERR(svn_sort__array_insert2(revision_info->representations, &result, idx)); 492 } 493 494 *representation = result; 495 496 return SVN_NO_ERROR; 497} 498 499 500/* forward declaration */ 501static svn_error_t * 502read_noderev(query_t *query, 503 svn_stringbuf_t *noderev_str, 504 revision_info_t *revision_info, 505 apr_pool_t *result_pool, 506 apr_pool_t *scratch_pool); 507 508/* Read the noderev item at OFFSET in REVISION_INFO from the filesystem 509 * provided by QUERY. Return it in *NODEREV, allocated in RESULT_POOL. 510 * Use SCRATCH_POOL for temporary allocations. 511 * 512 * The textual representation of the noderev will be used to determine 513 * the on-disk size of the noderev. Only called in phys. addressing mode. 514 */ 515static svn_error_t * 516read_phsy_noderev(svn_stringbuf_t **noderev, 517 query_t *query, 518 apr_off_t offset, 519 revision_info_t *revision_info, 520 apr_pool_t *result_pool, 521 apr_pool_t *scratch_pool) 522{ 523 svn_stringbuf_t *noderev_str = svn_stringbuf_create_empty(result_pool); 524 svn_stringbuf_t *line; 525 svn_boolean_t eof; 526 527 apr_pool_t *iterpool = svn_pool_create(scratch_pool); 528 529 /* Navigate the file stream to the start of noderev. */ 530 SVN_ERR_ASSERT(revision_info->rev_file); 531 532 offset += revision_info->offset; 533 SVN_ERR(svn_io_file_seek(revision_info->rev_file->file, APR_SET, 534 &offset, scratch_pool)); 535 536 /* Read it (terminated by an empty line) */ 537 do 538 { 539 svn_pool_clear(iterpool); 540 541 SVN_ERR(svn_stream_readline(revision_info->rev_file->stream, &line, 542 "\n", &eof, iterpool)); 543 svn_stringbuf_appendstr(noderev_str, line); 544 svn_stringbuf_appendbyte(noderev_str, '\n'); 545 } 546 while (line->len > 0 && !eof); 547 548 /* Return the result. */ 549 *noderev = noderev_str; 550 551 svn_pool_destroy(iterpool); 552 553 return SVN_NO_ERROR; 554} 555 556/* Starting at the directory in NODEREV's text, read all DAG nodes, 557 * directories and representations linked in that tree structure. 558 * Store them in QUERY and REVISION_INFO. Also, read them only once. 559 * 560 * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for 561 * temporaries. 562 */ 563static svn_error_t * 564parse_dir(query_t *query, 565 node_revision_t *noderev, 566 revision_info_t *revision_info, 567 apr_pool_t *result_pool, 568 apr_pool_t *scratch_pool) 569{ 570 apr_pool_t *iterpool = svn_pool_create(scratch_pool); 571 572 int i; 573 apr_array_header_t *entries; 574 SVN_ERR(svn_fs_fs__rep_contents_dir(&entries, query->fs, noderev, 575 scratch_pool, scratch_pool)); 576 577 for (i = 0; i < entries->nelts; ++i) 578 { 579 svn_fs_dirent_t *dirent = APR_ARRAY_IDX(entries, i, svn_fs_dirent_t *); 580 581 if (svn_fs_fs__id_rev(dirent->id) == revision_info->revision) 582 { 583 svn_stringbuf_t *noderev_str; 584 svn_pool_clear(iterpool); 585 586 SVN_ERR(read_phsy_noderev(&noderev_str, query, 587 svn_fs_fs__id_item(dirent->id), 588 revision_info, iterpool, iterpool)); 589 SVN_ERR(read_noderev(query, noderev_str, revision_info, 590 result_pool, iterpool)); 591 } 592 } 593 594 svn_pool_destroy(iterpool); 595 596 return SVN_NO_ERROR; 597} 598 599/* Parse the noderev given as NODEREV_STR and store the info in QUERY and 600 * REVISION_INFO. In phys. addressing mode, continue reading all DAG nodes, 601 * directories and representations linked in that tree structure. 602 * 603 * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for 604 * temporaries. 605 */ 606static svn_error_t * 607read_noderev(query_t *query, 608 svn_stringbuf_t *noderev_str, 609 revision_info_t *revision_info, 610 apr_pool_t *result_pool, 611 apr_pool_t *scratch_pool) 612{ 613 rep_stats_t *text = NULL; 614 rep_stats_t *props = NULL; 615 node_revision_t *noderev; 616 617 svn_stream_t *stream = svn_stream_from_stringbuf(noderev_str, scratch_pool); 618 SVN_ERR(svn_fs_fs__read_noderev(&noderev, stream, scratch_pool, 619 scratch_pool)); 620 SVN_ERR(svn_fs_fs__fixup_expanded_size(query->fs, noderev->data_rep, 621 scratch_pool)); 622 SVN_ERR(svn_fs_fs__fixup_expanded_size(query->fs, noderev->prop_rep, 623 scratch_pool)); 624 625 if (noderev->data_rep) 626 { 627 SVN_ERR(parse_representation(&text, query, 628 noderev->data_rep, revision_info, 629 result_pool, scratch_pool)); 630 631 /* if we are the first to use this rep, mark it as "text rep" */ 632 if (++text->ref_count == 1) 633 text->kind = noderev->kind == svn_node_dir ? dir_rep : file_rep; 634 } 635 636 if (noderev->prop_rep) 637 { 638 SVN_ERR(parse_representation(&props, query, 639 noderev->prop_rep, revision_info, 640 result_pool, scratch_pool)); 641 642 /* if we are the first to use this rep, mark it as "prop rep" */ 643 if (++props->ref_count == 1) 644 props->kind = noderev->kind == svn_node_dir ? dir_property_rep 645 : file_property_rep; 646 } 647 648 /* record largest changes */ 649 if (text && text->ref_count == 1) 650 add_change(query->stats, text->size, text->expanded_size, text->revision, 651 noderev->created_path, text->kind, !noderev->predecessor_id); 652 if (props && props->ref_count == 1) 653 add_change(query->stats, props->size, props->expanded_size, 654 props->revision, noderev->created_path, props->kind, 655 !noderev->predecessor_id); 656 657 /* if this is a directory and has not been processed, yet, read and 658 * process it recursively */ 659 if ( noderev->kind == svn_node_dir && text && text->ref_count == 1 660 && !svn_fs_fs__use_log_addressing(query->fs)) 661 SVN_ERR(parse_dir(query, noderev, revision_info, result_pool, 662 scratch_pool)); 663 664 /* update stats */ 665 if (noderev->kind == svn_node_dir) 666 { 667 revision_info->dir_noderev_size += noderev_str->len; 668 revision_info->dir_noderev_count++; 669 } 670 else 671 { 672 revision_info->file_noderev_size += noderev_str->len; 673 revision_info->file_noderev_count++; 674 } 675 676 return SVN_NO_ERROR; 677} 678 679/* For the revision given as REVISION_INFO within QUERY, determine the number 680 * of entries in its changed paths list and store that info in REVISION_INFO. 681 * Use SCRATCH_POOL for temporary allocations. 682 */ 683static svn_error_t * 684get_phys_change_count(query_t *query, 685 revision_info_t *revision_info, 686 apr_pool_t *scratch_pool) 687{ 688 apr_pool_t *iterpool = svn_pool_create(scratch_pool); 689 svn_fs_fs__changes_context_t *context; 690 691 /* Fetch the first block of data. */ 692 SVN_ERR(svn_fs_fs__create_changes_context(&context, query->fs, 693 revision_info->revision, 694 scratch_pool)); 695 696 revision_info->change_count = 0; 697 while (!context->eol) 698 { 699 apr_array_header_t *changes; 700 701 svn_pool_clear(iterpool); 702 SVN_ERR(svn_fs_fs__get_changes(&changes, context, iterpool, iterpool)); 703 revision_info->change_count = changes->nelts; 704 } 705 706 svn_pool_destroy(iterpool); 707 708 return SVN_NO_ERROR; 709} 710 711/* Read header information for the revision stored in FILE_CONTENT (one 712 * whole revision). Return the offsets within FILE_CONTENT for the 713 * *ROOT_NODEREV, the list of *CHANGES and its len in *CHANGES_LEN. 714 * Use POOL for temporary allocations. */ 715static svn_error_t * 716read_phys_revision(query_t *query, 717 revision_info_t *info, 718 apr_pool_t *result_pool, 719 apr_pool_t *scratch_pool) 720{ 721 char buf[64]; 722 apr_off_t root_node_offset; 723 apr_off_t changes_offset; 724 svn_stringbuf_t *trailer; 725 svn_stringbuf_t *noderev_str; 726 727 /* Read the last 64 bytes of the revision (if long enough). */ 728 apr_off_t start = MAX(info->offset, info->end - sizeof(buf)); 729 apr_size_t len = (apr_size_t)(info->end - start); 730 SVN_ERR(svn_io_file_seek(info->rev_file->file, APR_SET, &start, 731 scratch_pool)); 732 SVN_ERR(svn_io_file_read_full2(info->rev_file->file, buf, len, NULL, NULL, 733 scratch_pool)); 734 trailer = svn_stringbuf_ncreate(buf, len, scratch_pool); 735 736 /* Parse that trailer. */ 737 SVN_ERR(svn_fs_fs__parse_revision_trailer(&root_node_offset, 738 &changes_offset, trailer, 739 info->revision)); 740 SVN_ERR(get_phys_change_count(query, info, scratch_pool)); 741 742 /* Calculate the length of the changes list. */ 743 trailer = svn_fs_fs__unparse_revision_trailer(root_node_offset, 744 changes_offset, 745 scratch_pool); 746 info->changes_len = info->end - info->offset - changes_offset 747 - trailer->len; 748 749 /* Recursively read nodes added in this rev. */ 750 SVN_ERR(read_phsy_noderev(&noderev_str, query, root_node_offset, info, 751 scratch_pool, scratch_pool)); 752 SVN_ERR(read_noderev(query, noderev_str, info, result_pool, scratch_pool)); 753 754 return SVN_NO_ERROR; 755} 756 757/* Read the content of the pack file staring at revision BASE physical 758 * addressing mode and store it in QUERY. 759 * 760 * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for 761 * temporaries. 762 */ 763static svn_error_t * 764read_phys_pack_file(query_t *query, 765 svn_revnum_t base, 766 apr_pool_t *result_pool, 767 apr_pool_t *scratch_pool) 768{ 769 apr_pool_t *iterpool = svn_pool_create(scratch_pool); 770 int i; 771 svn_filesize_t file_size = 0; 772 svn_fs_fs__revision_file_t *rev_file; 773 774 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, query->fs, base, 775 scratch_pool, scratch_pool)); 776 SVN_ERR(svn_io_file_size_get(&file_size, rev_file->file, scratch_pool)); 777 778 /* process each revision in the pack file */ 779 for (i = 0; i < query->shard_size; ++i) 780 { 781 revision_info_t *info; 782 783 /* cancellation support */ 784 if (query->cancel_func) 785 SVN_ERR(query->cancel_func(query->cancel_baton)); 786 787 /* create the revision info for the current rev */ 788 info = apr_pcalloc(result_pool, sizeof(*info)); 789 info->representations = apr_array_make(result_pool, 4, 790 sizeof(rep_stats_t*)); 791 info->rev_file = rev_file; 792 793 info->revision = base + i; 794 SVN_ERR(svn_fs_fs__get_packed_offset(&info->offset, query->fs, base + i, 795 iterpool)); 796 if (i + 1 == query->shard_size) 797 info->end = file_size; 798 else 799 SVN_ERR(svn_fs_fs__get_packed_offset(&info->end, query->fs, 800 base + i + 1, iterpool)); 801 802 SVN_ERR(read_phys_revision(query, info, result_pool, iterpool)); 803 804 info->representations = apr_array_copy(result_pool, 805 info->representations); 806 807 /* Done with this revision. */ 808 info->rev_file = NULL; 809 810 /* put it into our container */ 811 APR_ARRAY_PUSH(query->revisions, revision_info_t*) = info; 812 813 /* destroy temps */ 814 svn_pool_clear(iterpool); 815 } 816 817 /* Done with this pack file. */ 818 SVN_ERR(svn_fs_fs__close_revision_file(rev_file)); 819 820 /* one more pack file processed */ 821 if (query->progress_func) 822 query->progress_func(base, query->progress_baton, scratch_pool); 823 824 return SVN_NO_ERROR; 825} 826 827/* Read the content of the file for REVISION in physical addressing mode 828 * and store its contents in QUERY. 829 * 830 * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for 831 * temporaries. 832 */ 833static svn_error_t * 834read_phys_revision_file(query_t *query, 835 svn_revnum_t revision, 836 apr_pool_t *result_pool, 837 apr_pool_t *scratch_pool) 838{ 839 revision_info_t *info = apr_pcalloc(result_pool, sizeof(*info)); 840 svn_filesize_t file_size = 0; 841 svn_fs_fs__revision_file_t *rev_file; 842 843 /* cancellation support */ 844 if (query->cancel_func) 845 SVN_ERR(query->cancel_func(query->cancel_baton)); 846 847 /* read the whole pack file into memory */ 848 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, query->fs, revision, 849 scratch_pool, scratch_pool)); 850 SVN_ERR(svn_io_file_size_get(&file_size, rev_file->file, scratch_pool)); 851 852 /* create the revision info for the current rev */ 853 info->representations = apr_array_make(result_pool, 4, sizeof(rep_stats_t*)); 854 855 info->rev_file = rev_file; 856 info->revision = revision; 857 info->offset = 0; 858 info->end = file_size; 859 860 SVN_ERR(read_phys_revision(query, info, result_pool, scratch_pool)); 861 862 /* Done with this revision. */ 863 SVN_ERR(svn_fs_fs__close_revision_file(rev_file)); 864 info->rev_file = NULL; 865 866 /* put it into our container */ 867 APR_ARRAY_PUSH(query->revisions, revision_info_t*) = info; 868 869 /* show progress every 1000 revs or so */ 870 if (query->progress_func) 871 { 872 if (query->shard_size && (revision % query->shard_size == 0)) 873 query->progress_func(revision, query->progress_baton, scratch_pool); 874 if (!query->shard_size && (revision % 1000 == 0)) 875 query->progress_func(revision, query->progress_baton, scratch_pool); 876 } 877 878 return SVN_NO_ERROR; 879} 880 881/* Given the unparsed changes list in CHANGES with LEN chars, return the 882 * number of changed paths encoded in it. Only used in log. addressing 883 * mode. 884 */ 885static apr_uint64_t 886get_log_change_count(const char *changes, 887 apr_size_t len) 888{ 889 apr_size_t lines = 0; 890 const char *end = changes + len; 891 892 /* line count */ 893 for (; changes < end; ++changes) 894 if (*changes == '\n') 895 ++lines; 896 897 /* two lines per change */ 898 return lines / 2; 899} 900 901/* Read the item described by ENTRY from the REV_FILE and return the 902 * respective byte sequence in *CONTENTS, allocated in RESULT_POOL. 903 * Use SCRATCH_POOL for temporary allocations 904 */ 905static svn_error_t * 906read_item(svn_stringbuf_t **contents, 907 svn_fs_fs__revision_file_t *rev_file, 908 svn_fs_fs__p2l_entry_t *entry, 909 apr_pool_t *result_pool, 910 apr_pool_t *scratch_pool) 911{ 912 svn_stringbuf_t *item = svn_stringbuf_create_ensure(entry->size, 913 result_pool); 914 item->len = entry->size; 915 item->data[item->len] = 0; 916 917 SVN_ERR(svn_io_file_aligned_seek(rev_file->file, rev_file->block_size, 918 NULL, entry->offset, scratch_pool)); 919 SVN_ERR(svn_io_file_read_full2(rev_file->file, item->data, item->len, 920 NULL, NULL, scratch_pool)); 921 922 *contents = item; 923 924 return SVN_NO_ERROR; 925} 926 927/* Predicate comparing the two rep_ref_t** LHS and RHS by the respective 928 * representation's revision. 929 */ 930static int 931compare_representation_refs(const void *lhs, const void *rhs) 932{ 933 svn_revnum_t lhs_rev = (*(const rep_ref_t *const *)lhs)->revision; 934 svn_revnum_t rhs_rev = (*(const rep_ref_t *const *)rhs)->revision; 935 936 if (lhs_rev < rhs_rev) 937 return -1; 938 return (lhs_rev > rhs_rev ? 1 : 0); 939} 940 941/* Given all the presentations found in a single rev / pack file as 942 * rep_ref_t * in REP_REFS, update the delta chain lengths in QUERY. 943 * REP_REFS and its contents can then be discarded. 944 */ 945static svn_error_t * 946resolve_representation_refs(query_t *query, 947 apr_array_header_t *rep_refs) 948{ 949 int i; 950 951 /* Because delta chains can only point to previous revs, after sorting 952 * REP_REFS, all base refs have already been updated. */ 953 svn_sort__array(rep_refs, compare_representation_refs); 954 955 /* Build up the CHAIN_LENGTH values. */ 956 for (i = 0; i < rep_refs->nelts; ++i) 957 { 958 int idx; 959 rep_ref_t *ref = APR_ARRAY_IDX(rep_refs, i, rep_ref_t *); 960 rep_stats_t *rep = find_representation(&idx, query, NULL, 961 ref->revision, ref->item_index); 962 963 /* No dangling pointers and all base reps have been processed. */ 964 SVN_ERR_ASSERT(rep); 965 SVN_ERR_ASSERT(!rep->chain_length); 966 967 /* Set the HEADER_SIZE as we found it during the scan. */ 968 rep->header_size = ref->header_size; 969 970 /* The delta chain got 1 element longer. */ 971 if (ref->base_revision == SVN_INVALID_REVNUM) 972 { 973 rep->chain_length = 1; 974 } 975 else 976 { 977 rep_stats_t *base; 978 979 base = find_representation(&idx, query, NULL, ref->base_revision, 980 ref->base_item_index); 981 SVN_ERR_ASSERT(base); 982 SVN_ERR_ASSERT(base->chain_length); 983 984 rep->chain_length = 1 + MIN(base->chain_length, (apr_byte_t)0xfe); 985 } 986 } 987 988 return SVN_NO_ERROR; 989} 990 991/* Process the logically addressed revision contents of revisions BASE to 992 * BASE + COUNT - 1 in QUERY. 993 * 994 * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for 995 * temporaries. 996 */ 997static svn_error_t * 998read_log_rev_or_packfile(query_t *query, 999 svn_revnum_t base, 1000 int count, 1001 apr_pool_t *result_pool, 1002 apr_pool_t *scratch_pool) 1003{ 1004 fs_fs_data_t *ffd = query->fs->fsap_data; 1005 apr_pool_t *iterpool = svn_pool_create(scratch_pool); 1006 apr_off_t max_offset; 1007 apr_off_t offset = 0; 1008 int i; 1009 svn_fs_fs__revision_file_t *rev_file; 1010 1011 /* We collect the delta chain links as we scan the file. Afterwards, 1012 * we determine the lengths of those delta chains and throw this 1013 * temporary container away. */ 1014 apr_array_header_t *rep_refs = apr_array_make(scratch_pool, 64, 1015 sizeof(rep_ref_t *)); 1016 1017 /* we will process every revision in the rev / pack file */ 1018 for (i = 0; i < count; ++i) 1019 { 1020 /* create the revision info for the current rev */ 1021 revision_info_t *info = apr_pcalloc(result_pool, sizeof(*info)); 1022 info->representations = apr_array_make(result_pool, 4, 1023 sizeof(rep_stats_t*)); 1024 info->revision = base + i; 1025 1026 APR_ARRAY_PUSH(query->revisions, revision_info_t*) = info; 1027 } 1028 1029 /* open the pack / rev file that is covered by the p2l index */ 1030 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, query->fs, base, 1031 scratch_pool, iterpool)); 1032 SVN_ERR(svn_fs_fs__p2l_get_max_offset(&max_offset, query->fs, rev_file, 1033 base, scratch_pool)); 1034 1035 /* record the whole pack size in the first rev so the total sum will 1036 still be correct */ 1037 APR_ARRAY_IDX(query->revisions, base, revision_info_t*)->end = max_offset; 1038 1039 /* for all offsets in the file, get the P2L index entries and process 1040 the interesting items (change lists, noderevs) */ 1041 for (offset = 0; offset < max_offset; ) 1042 { 1043 apr_array_header_t *entries; 1044 1045 svn_pool_clear(iterpool); 1046 1047 /* cancellation support */ 1048 if (query->cancel_func) 1049 SVN_ERR(query->cancel_func(query->cancel_baton)); 1050 1051 /* get all entries for the current block */ 1052 SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, query->fs, rev_file, base, 1053 offset, ffd->p2l_page_size, 1054 iterpool, iterpool)); 1055 1056 /* process all entries (and later continue with the next block) */ 1057 for (i = 0; i < entries->nelts; ++i) 1058 { 1059 svn_stringbuf_t *item; 1060 revision_info_t *info; 1061 svn_fs_fs__p2l_entry_t *entry 1062 = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t); 1063 1064 /* skip bits we previously processed */ 1065 if (i == 0 && entry->offset < offset) 1066 continue; 1067 1068 /* skip zero-sized entries */ 1069 if (entry->size == 0) 1070 continue; 1071 1072 /* read and process interesting items */ 1073 info = APR_ARRAY_IDX(query->revisions, entry->item.revision, 1074 revision_info_t*); 1075 1076 if (entry->type == SVN_FS_FS__ITEM_TYPE_NODEREV) 1077 { 1078 SVN_ERR(read_item(&item, rev_file, entry, iterpool, iterpool)); 1079 SVN_ERR(read_noderev(query, item, info, result_pool, iterpool)); 1080 } 1081 else if (entry->type == SVN_FS_FS__ITEM_TYPE_CHANGES) 1082 { 1083 SVN_ERR(read_item(&item, rev_file, entry, iterpool, iterpool)); 1084 info->change_count 1085 = get_log_change_count(item->data + 0, item->len); 1086 info->changes_len += entry->size; 1087 } 1088 else if ( (entry->type == SVN_FS_FS__ITEM_TYPE_FILE_REP) 1089 || (entry->type == SVN_FS_FS__ITEM_TYPE_DIR_REP) 1090 || (entry->type == SVN_FS_FS__ITEM_TYPE_FILE_PROPS) 1091 || (entry->type == SVN_FS_FS__ITEM_TYPE_DIR_PROPS)) 1092 { 1093 /* Collect the delta chain link. */ 1094 svn_fs_fs__rep_header_t *header; 1095 rep_ref_t *ref = apr_pcalloc(scratch_pool, sizeof(*ref)); 1096 1097 SVN_ERR(svn_io_file_aligned_seek(rev_file->file, 1098 rev_file->block_size, 1099 NULL, entry->offset, 1100 iterpool)); 1101 SVN_ERR(svn_fs_fs__read_rep_header(&header, 1102 rev_file->stream, 1103 iterpool, iterpool)); 1104 1105 ref->header_size = header->header_size; 1106 ref->revision = entry->item.revision; 1107 ref->item_index = entry->item.number; 1108 1109 if (header->type == svn_fs_fs__rep_delta) 1110 { 1111 ref->base_item_index = header->base_item_index; 1112 ref->base_revision = header->base_revision; 1113 } 1114 else 1115 { 1116 ref->base_item_index = SVN_FS_FS__ITEM_INDEX_UNUSED; 1117 ref->base_revision = SVN_INVALID_REVNUM; 1118 } 1119 1120 APR_ARRAY_PUSH(rep_refs, rep_ref_t *) = ref; 1121 } 1122 1123 /* advance offset */ 1124 offset += entry->size; 1125 } 1126 } 1127 1128 /* Resolve the delta chain links. */ 1129 SVN_ERR(resolve_representation_refs(query, rep_refs)); 1130 1131 /* clean up and close file handles */ 1132 svn_pool_destroy(iterpool); 1133 1134 return SVN_NO_ERROR; 1135} 1136 1137/* Read the content of the pack file staring at revision BASE logical 1138 * addressing mode and store it in QUERY. 1139 * 1140 * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for 1141 * temporaries. 1142 */ 1143static svn_error_t * 1144read_log_pack_file(query_t *query, 1145 svn_revnum_t base, 1146 apr_pool_t *result_pool, 1147 apr_pool_t *scratch_pool) 1148{ 1149 SVN_ERR(read_log_rev_or_packfile(query, base, query->shard_size, 1150 result_pool, scratch_pool)); 1151 1152 /* one more pack file processed */ 1153 if (query->progress_func) 1154 query->progress_func(base, query->progress_baton, scratch_pool); 1155 1156 return SVN_NO_ERROR; 1157} 1158 1159/* Read the content of the file for REVISION in logical addressing mode 1160 * and store its contents in QUERY. 1161 * 1162 * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for 1163 * temporaries. 1164 */ 1165static svn_error_t * 1166read_log_revision_file(query_t *query, 1167 svn_revnum_t revision, 1168 apr_pool_t *result_pool, 1169 apr_pool_t *scratch_pool) 1170{ 1171 SVN_ERR(read_log_rev_or_packfile(query, revision, 1, 1172 result_pool, scratch_pool)); 1173 1174 /* show progress every 1000 revs or so */ 1175 if (query->progress_func) 1176 { 1177 if (query->shard_size && (revision % query->shard_size == 0)) 1178 query->progress_func(revision, query->progress_baton, scratch_pool); 1179 if (!query->shard_size && (revision % 1000 == 0)) 1180 query->progress_func(revision, query->progress_baton, scratch_pool); 1181 } 1182 1183 return SVN_NO_ERROR; 1184} 1185 1186/* Read the repository and collect the stats info in QUERY. 1187 * 1188 * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for 1189 * temporaries. 1190 */ 1191static svn_error_t * 1192read_revisions(query_t *query, 1193 apr_pool_t *result_pool, 1194 apr_pool_t *scratch_pool) 1195{ 1196 apr_pool_t *iterpool = svn_pool_create(scratch_pool); 1197 svn_revnum_t revision; 1198 1199 /* read all packed revs */ 1200 for ( revision = 0 1201 ; revision < query->min_unpacked_rev 1202 ; revision += query->shard_size) 1203 { 1204 svn_pool_clear(iterpool); 1205 1206 if (svn_fs_fs__use_log_addressing(query->fs)) 1207 SVN_ERR(read_log_pack_file(query, revision, result_pool, iterpool)); 1208 else 1209 SVN_ERR(read_phys_pack_file(query, revision, result_pool, iterpool)); 1210 } 1211 1212 /* read non-packed revs */ 1213 for ( ; revision <= query->head; ++revision) 1214 { 1215 svn_pool_clear(iterpool); 1216 1217 if (svn_fs_fs__use_log_addressing(query->fs)) 1218 SVN_ERR(read_log_revision_file(query, revision, result_pool, 1219 iterpool)); 1220 else 1221 SVN_ERR(read_phys_revision_file(query, revision, result_pool, 1222 iterpool)); 1223 } 1224 1225 svn_pool_destroy(iterpool); 1226 1227 return SVN_NO_ERROR; 1228} 1229 1230/* Accumulate stats of REP in STATS. 1231 */ 1232static void 1233add_rep_pack_stats(svn_fs_fs__rep_pack_stats_t *stats, 1234 rep_stats_t *rep) 1235{ 1236 stats->count++; 1237 1238 stats->packed_size += rep->size; 1239 stats->expanded_size += rep->expanded_size; 1240 stats->overhead_size += rep->header_size + 7 /* ENDREP\n */; 1241} 1242 1243/* Accumulate stats of REP in STATS. 1244 */ 1245static void 1246add_rep_stats(svn_fs_fs__representation_stats_t *stats, 1247 rep_stats_t *rep) 1248{ 1249 add_rep_pack_stats(&stats->total, rep); 1250 if (rep->ref_count == 1) 1251 add_rep_pack_stats(&stats->uniques, rep); 1252 else 1253 add_rep_pack_stats(&stats->shared, rep); 1254 1255 stats->references += rep->ref_count; 1256 stats->expanded_size += rep->ref_count * rep->expanded_size; 1257 stats->chain_len += rep->chain_length; 1258} 1259 1260/* Aggregate the info the in revision_info_t * array REVISIONS into the 1261 * respectve fields of STATS. 1262 */ 1263static void 1264aggregate_stats(const apr_array_header_t *revisions, 1265 svn_fs_fs__stats_t *stats) 1266{ 1267 int i, k; 1268 1269 /* aggregate info from all revisions */ 1270 stats->revision_count = revisions->nelts; 1271 for (i = 0; i < revisions->nelts; ++i) 1272 { 1273 revision_info_t *revision = APR_ARRAY_IDX(revisions, i, 1274 revision_info_t *); 1275 1276 /* data gathered on a revision level */ 1277 stats->change_count += revision->change_count; 1278 stats->change_len += revision->changes_len; 1279 stats->total_size += revision->end - revision->offset; 1280 1281 stats->dir_node_stats.count += revision->dir_noderev_count; 1282 stats->dir_node_stats.size += revision->dir_noderev_size; 1283 stats->file_node_stats.count += revision->file_noderev_count; 1284 stats->file_node_stats.size += revision->file_noderev_size; 1285 stats->total_node_stats.count += revision->dir_noderev_count 1286 + revision->file_noderev_count; 1287 stats->total_node_stats.size += revision->dir_noderev_size 1288 + revision->file_noderev_size; 1289 1290 /* process representations */ 1291 for (k = 0; k < revision->representations->nelts; ++k) 1292 { 1293 rep_stats_t *rep = APR_ARRAY_IDX(revision->representations, k, 1294 rep_stats_t *); 1295 1296 /* accumulate in the right bucket */ 1297 switch(rep->kind) 1298 { 1299 case file_rep: 1300 add_rep_stats(&stats->file_rep_stats, rep); 1301 break; 1302 case dir_rep: 1303 add_rep_stats(&stats->dir_rep_stats, rep); 1304 break; 1305 case file_property_rep: 1306 add_rep_stats(&stats->file_prop_rep_stats, rep); 1307 break; 1308 case dir_property_rep: 1309 add_rep_stats(&stats->dir_prop_rep_stats, rep); 1310 break; 1311 default: 1312 break; 1313 } 1314 1315 add_rep_stats(&stats->total_rep_stats, rep); 1316 } 1317 } 1318} 1319 1320/* Return a new svn_fs_fs__stats_t instance, allocated in RESULT_POOL. 1321 */ 1322static svn_fs_fs__stats_t * 1323create_stats(apr_pool_t *result_pool) 1324{ 1325 svn_fs_fs__stats_t *stats = apr_pcalloc(result_pool, sizeof(*stats)); 1326 1327 initialize_largest_changes(stats, 64, result_pool); 1328 stats->by_extension = apr_hash_make(result_pool); 1329 1330 return stats; 1331} 1332 1333/* Create a *QUERY, allocated in RESULT_POOL, reading filesystem FS and 1334 * collecting results in STATS. Store the optional PROCESS_FUNC and 1335 * PROGRESS_BATON as well as CANCEL_FUNC and CANCEL_BATON in *QUERY, too. 1336 * Use SCRATCH_POOL for temporary allocations. 1337 */ 1338static svn_error_t * 1339create_query(query_t **query, 1340 svn_fs_t *fs, 1341 svn_fs_fs__stats_t *stats, 1342 svn_fs_progress_notify_func_t progress_func, 1343 void *progress_baton, 1344 svn_cancel_func_t cancel_func, 1345 void *cancel_baton, 1346 apr_pool_t *result_pool, 1347 apr_pool_t *scratch_pool) 1348{ 1349 *query = apr_pcalloc(result_pool, sizeof(**query)); 1350 1351 /* Read repository dimensions. */ 1352 (*query)->shard_size = svn_fs_fs__shard_size(fs); 1353 SVN_ERR(svn_fs_fs__youngest_rev(&(*query)->head, fs, scratch_pool)); 1354 SVN_ERR(svn_fs_fs__min_unpacked_rev(&(*query)->min_unpacked_rev, fs, 1355 scratch_pool)); 1356 1357 /* create data containers and caches 1358 * Note: this assumes that int is at least 32-bits and that we only support 1359 * 32-bit wide revision numbers (actually 31-bits due to the signedness 1360 * of both the nelts field of the array and our revision numbers). This 1361 * means this code will fail on platforms where int is less than 32-bits 1362 * and the repository has more revisions than int can hold. */ 1363 (*query)->revisions = apr_array_make(result_pool, (int) (*query)->head + 1, 1364 sizeof(revision_info_t *)); 1365 (*query)->null_base = apr_pcalloc(result_pool, 1366 sizeof(*(*query)->null_base)); 1367 1368 /* Store other parameters */ 1369 (*query)->fs = fs; 1370 (*query)->stats = stats; 1371 (*query)->progress_func = progress_func; 1372 (*query)->progress_baton = progress_baton; 1373 (*query)->cancel_func = cancel_func; 1374 (*query)->cancel_baton = cancel_baton; 1375 1376 return SVN_NO_ERROR; 1377} 1378 1379svn_error_t * 1380svn_fs_fs__get_stats(svn_fs_fs__stats_t **stats, 1381 svn_fs_t *fs, 1382 svn_fs_progress_notify_func_t progress_func, 1383 void *progress_baton, 1384 svn_cancel_func_t cancel_func, 1385 void *cancel_baton, 1386 apr_pool_t *result_pool, 1387 apr_pool_t *scratch_pool) 1388{ 1389 query_t *query; 1390 1391 *stats = create_stats(result_pool); 1392 SVN_ERR(create_query(&query, fs, *stats, progress_func, progress_baton, 1393 cancel_func, cancel_baton, scratch_pool, 1394 scratch_pool)); 1395 SVN_ERR(read_revisions(query, scratch_pool, scratch_pool)); 1396 aggregate_stats(query->revisions, *stats); 1397 1398 return SVN_NO_ERROR; 1399} 1400 1401/* Baton for rev_size_index_entry_cb. */ 1402struct rev_size_baton_t { 1403 svn_revnum_t revision; 1404 apr_off_t rev_size; 1405}; 1406 1407/* Implements svn_fs_fs__dump_index_func_t, summing object sizes for 1408 * revision BATON->revision into BATON->rev_size. 1409 */ 1410static svn_error_t * 1411rev_size_index_entry_cb(const svn_fs_fs__p2l_entry_t *entry, 1412 void *baton, 1413 apr_pool_t *scratch_pool) 1414{ 1415 struct rev_size_baton_t *b = baton; 1416 1417 if (entry->item.revision == b->revision) 1418 b->rev_size += entry->size; 1419 return SVN_NO_ERROR; 1420} 1421 1422svn_error_t * 1423svn_fs_fs__revision_size(apr_off_t *rev_size, 1424 svn_fs_t *fs, 1425 svn_revnum_t revision, 1426 apr_pool_t *scratch_pool) 1427{ 1428 /* Get the size of the revision (excluding rev-props) */ 1429 if (svn_fs_fs__use_log_addressing(fs)) 1430 { 1431 /* This works for a packed or a non-packed revision. 1432 We could provide an optimized case for a non-packed revision 1433 using svn_fs_fs__p2l_get_max_offset(). */ 1434 struct rev_size_baton_t b = { 0, 0 }; 1435 1436 b.revision = revision; 1437 SVN_ERR(svn_fs_fs__dump_index(fs, revision, 1438 rev_size_index_entry_cb, &b, 1439 NULL, NULL, scratch_pool)); 1440 *rev_size = b.rev_size; 1441 } 1442 else 1443 { 1444 svn_fs_fs__revision_file_t *rev_file; 1445 svn_revnum_t min_unpacked_rev; 1446 1447 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, revision, 1448 scratch_pool, scratch_pool)); 1449 SVN_ERR(svn_fs_fs__min_unpacked_rev(&min_unpacked_rev, fs, 1450 scratch_pool)); 1451 if (revision < min_unpacked_rev) 1452 { 1453 int shard_size = svn_fs_fs__shard_size(fs); 1454 apr_off_t start_offset, end_offset; 1455 1456 SVN_ERR(svn_fs_fs__get_packed_offset(&start_offset, fs, revision, 1457 scratch_pool)); 1458 if (((revision + 1) % shard_size) == 0) 1459 { 1460 svn_filesize_t file_size; 1461 1462 SVN_ERR(svn_io_file_size_get(&file_size, rev_file->file, scratch_pool)); 1463 end_offset = (apr_off_t)file_size; 1464 } 1465 else 1466 { 1467 SVN_ERR(svn_fs_fs__get_packed_offset(&end_offset, fs, 1468 revision + 1, scratch_pool)); 1469 } 1470 *rev_size = (end_offset - start_offset); 1471 } 1472 else 1473 { 1474 svn_filesize_t file_size; 1475 1476 SVN_ERR(svn_io_file_size_get(&file_size, rev_file->file, scratch_pool)); 1477 *rev_size = (apr_off_t)file_size; 1478 } 1479 1480 SVN_ERR(svn_fs_fs__close_revision_file(rev_file)); 1481 } 1482 1483 /* Add the size of the rev-props */ 1484 { 1485 apr_off_t size; 1486 1487 SVN_ERR(svn_fs_fs__get_revision_props_size(&size, fs, revision, scratch_pool)); 1488 *rev_size += size; 1489 } 1490 1491 return SVN_NO_ERROR; 1492} 1493