1289177Speter/* stats.c -- implements the svn_fs_fs__get_stats private API. 2289177Speter * 3289177Speter * ==================================================================== 4289177Speter * Licensed to the Apache Software Foundation (ASF) under one 5289177Speter * or more contributor license agreements. See the NOTICE file 6289177Speter * distributed with this work for additional information 7289177Speter * regarding copyright ownership. The ASF licenses this file 8289177Speter * to you under the Apache License, Version 2.0 (the 9289177Speter * "License"); you may not use this file except in compliance 10289177Speter * with the License. You may obtain a copy of the License at 11289177Speter * 12289177Speter * http://www.apache.org/licenses/LICENSE-2.0 13289177Speter * 14289177Speter * Unless required by applicable law or agreed to in writing, 15289177Speter * software distributed under the License is distributed on an 16289177Speter * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17289177Speter * KIND, either express or implied. See the License for the 18289177Speter * specific language governing permissions and limitations 19289177Speter * under the License. 20289177Speter * ==================================================================== 21289177Speter */ 22289177Speter 23289177Speter#include "svn_dirent_uri.h" 24289177Speter#include "svn_fs.h" 25289177Speter#include "svn_pools.h" 26289177Speter#include "svn_sorts.h" 27289177Speter 28289177Speter#include "private/svn_cache.h" 29289177Speter#include "private/svn_sorts_private.h" 30289177Speter#include "private/svn_string_private.h" 31289177Speter#include "private/svn_fs_fs_private.h" 32289177Speter 33289177Speter#include "index.h" 34289177Speter#include "pack.h" 35289177Speter#include "rev_file.h" 36289177Speter#include "util.h" 37289177Speter#include "fs_fs.h" 38289177Speter#include "cached_data.h" 39289177Speter#include "low_level.h" 40289177Speter 41289177Speter#include "../libsvn_fs/fs-loader.h" 42289177Speter 43289177Speter#include "svn_private_config.h" 44289177Speter 45289177Speter/* We group representations into 2x2 different kinds plus one default: 46289177Speter * [dir / file] x [text / prop]. The assignment is done by the first node 47289177Speter * that references the respective representation. 48289177Speter */ 49289177Spetertypedef enum rep_kind_t 50289177Speter{ 51289177Speter /* The representation is not used _directly_, i.e. not referenced by any 52289177Speter * noderev. However, some other representation may use it as delta base. 53289177Speter * Null value. Should not occur in real-word repositories. */ 54289177Speter unused_rep, 55289177Speter 56289177Speter /* a properties on directory rep */ 57289177Speter dir_property_rep, 58289177Speter 59289177Speter /* a properties on file rep */ 60289177Speter file_property_rep, 61289177Speter 62289177Speter /* a directory rep */ 63289177Speter dir_rep, 64289177Speter 65289177Speter /* a file rep */ 66289177Speter file_rep 67289177Speter} rep_kind_t; 68289177Speter 69289177Speter/* A representation fragment. 70289177Speter */ 71289177Spetertypedef struct rep_stats_t 72289177Speter{ 73289177Speter /* absolute offset in the file */ 74289177Speter apr_off_t offset; 75289177Speter 76289177Speter /* item length in bytes */ 77289177Speter apr_uint64_t size; 78289177Speter 79289177Speter /* item length after de-deltification */ 80289177Speter apr_uint64_t expanded_size; 81289177Speter 82289177Speter /* revision that contains this representation 83289177Speter * (may be referenced by other revisions, though) */ 84289177Speter svn_revnum_t revision; 85289177Speter 86289177Speter /* number of nodes that reference this representation */ 87289177Speter apr_uint32_t ref_count; 88289177Speter 89289177Speter /* length of the PLAIN / DELTA line in the source file in bytes */ 90289177Speter apr_uint16_t header_size; 91289177Speter 92289177Speter /* classification of the representation. values of rep_kind_t */ 93289177Speter char kind; 94289177Speter 95289177Speter} rep_stats_t; 96289177Speter 97289177Speter/* Represents a single revision. 98289177Speter * There will be only one instance per revision. */ 99289177Spetertypedef struct revision_info_t 100289177Speter{ 101289177Speter /* number of this revision */ 102289177Speter svn_revnum_t revision; 103289177Speter 104289177Speter /* pack file offset (manifest value), 0 for non-packed files */ 105289177Speter apr_off_t offset; 106289177Speter 107289177Speter /* length of the changes list on bytes */ 108289177Speter apr_uint64_t changes_len; 109289177Speter 110289177Speter /* offset of the changes list relative to OFFSET */ 111289177Speter apr_uint64_t change_count; 112289177Speter 113289177Speter /* first offset behind the revision data in the pack file (file length 114289177Speter * for non-packed revs) */ 115289177Speter apr_off_t end; 116289177Speter 117289177Speter /* number of directory noderevs in this revision */ 118289177Speter apr_uint64_t dir_noderev_count; 119289177Speter 120289177Speter /* number of file noderevs in this revision */ 121289177Speter apr_uint64_t file_noderev_count; 122289177Speter 123289177Speter /* total size of directory noderevs (i.e. the structs - not the rep) */ 124289177Speter apr_uint64_t dir_noderev_size; 125289177Speter 126289177Speter /* total size of file noderevs (i.e. the structs - not the rep) */ 127289177Speter apr_uint64_t file_noderev_size; 128289177Speter 129289177Speter /* all rep_stats_t of this revision (in no particular order), 130289177Speter * i.e. those that point back to this struct */ 131289177Speter apr_array_header_t *representations; 132289177Speter 133289177Speter /* Temporary rev / pack file access object, used in phys. addressing 134289177Speter * mode only. NULL when done reading this revision. */ 135289177Speter svn_fs_fs__revision_file_t *rev_file; 136289177Speter} revision_info_t; 137289177Speter 138289177Speter/* Root data structure containing all information about a given repository. 139289177Speter * We use it as a wrapper around svn_fs_t and pass it around where we would 140289177Speter * otherwise just use a svn_fs_t. 141289177Speter */ 142289177Spetertypedef struct query_t 143289177Speter{ 144289177Speter /* FS API object*/ 145289177Speter svn_fs_t *fs; 146289177Speter 147289177Speter /* The HEAD revision. */ 148289177Speter svn_revnum_t head; 149289177Speter 150289177Speter /* Number of revs per shard; 0 for non-sharded repos. */ 151289177Speter int shard_size; 152289177Speter 153289177Speter /* First non-packed revision. */ 154289177Speter svn_revnum_t min_unpacked_rev; 155289177Speter 156289177Speter /* all revisions */ 157289177Speter apr_array_header_t *revisions; 158289177Speter 159289177Speter /* empty representation. 160289177Speter * Used as a dummy base for DELTA reps without base. */ 161289177Speter rep_stats_t *null_base; 162289177Speter 163289177Speter /* collected statistics */ 164289177Speter svn_fs_fs__stats_t *stats; 165289177Speter 166289177Speter /* Progress notification callback to call after each shard. May be NULL. */ 167289177Speter svn_fs_progress_notify_func_t progress_func; 168289177Speter 169289177Speter /* Baton for PROGRESS_FUNC. */ 170289177Speter void *progress_baton; 171289177Speter 172289177Speter /* Cancellation support callback to call once in a while. May be NULL. */ 173289177Speter svn_cancel_func_t cancel_func; 174289177Speter 175289177Speter /* Baton for CANCEL_FUNC. */ 176289177Speter void *cancel_baton; 177289177Speter} query_t; 178289177Speter 179289177Speter/* Return the length of REV_FILE in *FILE_SIZE. 180289177Speter * Use SCRATCH_POOL for temporary allocations. 181289177Speter */ 182289177Speterstatic svn_error_t * 183289177Speterget_file_size(apr_off_t *file_size, 184289177Speter svn_fs_fs__revision_file_t *rev_file, 185289177Speter apr_pool_t *scratch_pool) 186289177Speter{ 187289177Speter apr_finfo_t finfo; 188289177Speter 189289177Speter SVN_ERR(svn_io_file_info_get(&finfo, APR_FINFO_SIZE, rev_file->file, 190289177Speter scratch_pool)); 191289177Speter 192289177Speter *file_size = finfo.size; 193289177Speter return SVN_NO_ERROR; 194289177Speter} 195289177Speter 196289177Speter/* Initialize the LARGEST_CHANGES member in STATS with a capacity of COUNT 197289177Speter * entries. Allocate the result in RESULT_POOL. 198289177Speter */ 199289177Speterstatic void 200289177Speterinitialize_largest_changes(svn_fs_fs__stats_t *stats, 201289177Speter apr_size_t count, 202289177Speter apr_pool_t *result_pool) 203289177Speter{ 204289177Speter apr_size_t i; 205289177Speter 206289177Speter stats->largest_changes = apr_pcalloc(result_pool, 207289177Speter sizeof(*stats->largest_changes)); 208289177Speter stats->largest_changes->count = count; 209289177Speter stats->largest_changes->min_size = 1; 210289177Speter stats->largest_changes->changes 211289177Speter = apr_palloc(result_pool, count * sizeof(*stats->largest_changes->changes)); 212289177Speter 213289177Speter /* allocate *all* entries before the path stringbufs. This increases 214289177Speter * cache locality and enhances performance significantly. */ 215289177Speter for (i = 0; i < count; ++i) 216289177Speter stats->largest_changes->changes[i] 217289177Speter = apr_palloc(result_pool, sizeof(**stats->largest_changes->changes)); 218289177Speter 219289177Speter /* now initialize them and allocate the stringbufs */ 220289177Speter for (i = 0; i < count; ++i) 221289177Speter { 222289177Speter stats->largest_changes->changes[i]->size = 0; 223289177Speter stats->largest_changes->changes[i]->revision = SVN_INVALID_REVNUM; 224289177Speter stats->largest_changes->changes[i]->path 225289177Speter = svn_stringbuf_create_ensure(1024, result_pool); 226289177Speter } 227289177Speter} 228289177Speter 229289177Speter/* Add entry for SIZE to HISTOGRAM. 230289177Speter */ 231289177Speterstatic void 232289177Speteradd_to_histogram(svn_fs_fs__histogram_t *histogram, 233289177Speter apr_int64_t size) 234289177Speter{ 235289177Speter apr_int64_t shift = 0; 236289177Speter 237289177Speter while (((apr_int64_t)(1) << shift) <= size) 238289177Speter shift++; 239289177Speter 240289177Speter histogram->total.count++; 241289177Speter histogram->total.sum += size; 242289177Speter histogram->lines[(apr_size_t)shift].count++; 243289177Speter histogram->lines[(apr_size_t)shift].sum += size; 244289177Speter} 245289177Speter 246289177Speter/* Update data aggregators in STATS with this representation of type KIND, 247289177Speter * on-disk REP_SIZE and expanded node size EXPANDED_SIZE for PATH in REVSION. 248289177Speter * PLAIN_ADDED indicates whether the node has a deltification predecessor. 249289177Speter */ 250289177Speterstatic void 251289177Speteradd_change(svn_fs_fs__stats_t *stats, 252289177Speter apr_uint64_t rep_size, 253289177Speter apr_uint64_t expanded_size, 254289177Speter svn_revnum_t revision, 255289177Speter const char *path, 256289177Speter rep_kind_t kind, 257289177Speter svn_boolean_t plain_added) 258289177Speter{ 259289177Speter /* identify largest reps */ 260289177Speter if (rep_size >= stats->largest_changes->min_size) 261289177Speter { 262289177Speter apr_size_t i; 263289177Speter svn_fs_fs__largest_changes_t *largest_changes = stats->largest_changes; 264289177Speter svn_fs_fs__large_change_info_t *info 265289177Speter = largest_changes->changes[largest_changes->count - 1]; 266289177Speter info->size = rep_size; 267289177Speter info->revision = revision; 268289177Speter svn_stringbuf_set(info->path, path); 269289177Speter 270289177Speter /* linear insertion but not too bad since count is low and insertions 271289177Speter * near the end are more likely than close to front */ 272289177Speter for (i = largest_changes->count - 1; i > 0; --i) 273289177Speter if (largest_changes->changes[i-1]->size >= rep_size) 274289177Speter break; 275289177Speter else 276289177Speter largest_changes->changes[i] = largest_changes->changes[i-1]; 277289177Speter 278289177Speter largest_changes->changes[i] = info; 279289177Speter largest_changes->min_size 280289177Speter = largest_changes->changes[largest_changes->count-1]->size; 281289177Speter } 282289177Speter 283289177Speter /* global histograms */ 284289177Speter add_to_histogram(&stats->rep_size_histogram, rep_size); 285289177Speter add_to_histogram(&stats->node_size_histogram, expanded_size); 286289177Speter 287289177Speter if (plain_added) 288289177Speter { 289289177Speter add_to_histogram(&stats->added_rep_size_histogram, rep_size); 290289177Speter add_to_histogram(&stats->added_node_size_histogram, expanded_size); 291289177Speter } 292289177Speter 293289177Speter /* specific histograms by type */ 294289177Speter switch (kind) 295289177Speter { 296289177Speter case unused_rep: 297289177Speter add_to_histogram(&stats->unused_rep_histogram, rep_size); 298289177Speter break; 299289177Speter case dir_property_rep: 300289177Speter add_to_histogram(&stats->dir_prop_rep_histogram, rep_size); 301289177Speter add_to_histogram(&stats->dir_prop_histogram, expanded_size); 302289177Speter break; 303289177Speter case file_property_rep: 304289177Speter add_to_histogram(&stats->file_prop_rep_histogram, rep_size); 305289177Speter add_to_histogram(&stats->file_prop_histogram, expanded_size); 306289177Speter break; 307289177Speter case dir_rep: 308289177Speter add_to_histogram(&stats->dir_rep_histogram, rep_size); 309289177Speter add_to_histogram(&stats->dir_histogram, expanded_size); 310289177Speter break; 311289177Speter case file_rep: 312289177Speter add_to_histogram(&stats->file_rep_histogram, rep_size); 313289177Speter add_to_histogram(&stats->file_histogram, expanded_size); 314289177Speter break; 315289177Speter } 316289177Speter 317289177Speter /* by extension */ 318289177Speter if (kind == file_rep) 319289177Speter { 320289177Speter /* determine extension */ 321289177Speter svn_fs_fs__extension_info_t *info; 322289177Speter const char * file_name = strrchr(path, '/'); 323289177Speter const char * extension = file_name ? strrchr(file_name, '.') : NULL; 324289177Speter 325289177Speter if (extension == NULL || extension == file_name + 1) 326289177Speter extension = "(none)"; 327289177Speter 328289177Speter /* get / auto-insert entry for this extension */ 329289177Speter info = apr_hash_get(stats->by_extension, extension, APR_HASH_KEY_STRING); 330289177Speter if (info == NULL) 331289177Speter { 332289177Speter apr_pool_t *pool = apr_hash_pool_get(stats->by_extension); 333289177Speter info = apr_pcalloc(pool, sizeof(*info)); 334289177Speter info->extension = apr_pstrdup(pool, extension); 335289177Speter 336289177Speter apr_hash_set(stats->by_extension, info->extension, 337289177Speter APR_HASH_KEY_STRING, info); 338289177Speter } 339289177Speter 340289177Speter /* update per-extension histogram */ 341289177Speter add_to_histogram(&info->node_histogram, expanded_size); 342289177Speter add_to_histogram(&info->rep_histogram, rep_size); 343289177Speter } 344289177Speter} 345289177Speter 346289177Speter/* Comparator used for binary search comparing the absolute file offset 347289177Speter * of a representation to some other offset. DATA is a *rep_stats_t, 348289177Speter * KEY is a pointer to an apr_off_t. 349289177Speter */ 350289177Speterstatic int 351289177Spetercompare_representation_offsets(const void *data, const void *key) 352289177Speter{ 353289177Speter apr_off_t lhs = (*(const rep_stats_t *const *)data)->offset; 354289177Speter apr_off_t rhs = *(const apr_off_t *)key; 355289177Speter 356289177Speter if (lhs < rhs) 357289177Speter return -1; 358289177Speter return (lhs > rhs ? 1 : 0); 359289177Speter} 360289177Speter 361289177Speter/* Find the revision_info_t object to the given REVISION in QUERY and 362289177Speter * return it in *REVISION_INFO. For performance reasons, we skip the 363289177Speter * lookup if the info is already provided. 364289177Speter * 365289177Speter * In that revision, look for the rep_stats_t object for offset OFFSET. 366289177Speter * If it already exists, set *IDX to its index in *REVISION_INFO's 367289177Speter * representations list and return the representation object. Otherwise, 368289177Speter * set the index to where it must be inserted and return NULL. 369289177Speter */ 370289177Speterstatic rep_stats_t * 371289177Speterfind_representation(int *idx, 372289177Speter query_t *query, 373289177Speter revision_info_t **revision_info, 374289177Speter svn_revnum_t revision, 375289177Speter apr_off_t offset) 376289177Speter{ 377289177Speter revision_info_t *info; 378289177Speter *idx = -1; 379289177Speter 380289177Speter /* first let's find the revision */ 381289177Speter info = revision_info ? *revision_info : NULL; 382289177Speter if (info == NULL || info->revision != revision) 383289177Speter { 384289177Speter info = APR_ARRAY_IDX(query->revisions, revision, revision_info_t*); 385289177Speter if (revision_info) 386289177Speter *revision_info = info; 387289177Speter } 388289177Speter 389289177Speter /* not found -> no result */ 390289177Speter if (info == NULL) 391289177Speter return NULL; 392289177Speter 393289177Speter /* look for the representation */ 394289177Speter *idx = svn_sort__bsearch_lower_bound(info->representations, 395289177Speter &offset, 396289177Speter compare_representation_offsets); 397289177Speter if (*idx < info->representations->nelts) 398289177Speter { 399289177Speter /* return the representation, if this is the one we were looking for */ 400289177Speter rep_stats_t *result 401289177Speter = APR_ARRAY_IDX(info->representations, *idx, rep_stats_t *); 402289177Speter if (result->offset == offset) 403289177Speter return result; 404289177Speter } 405289177Speter 406289177Speter /* not parsed, yet */ 407289177Speter return NULL; 408289177Speter} 409289177Speter 410289177Speter/* Find / auto-construct the representation stats for REP in QUERY and 411289177Speter * return it in *REPRESENTATION. 412289177Speter * 413289177Speter * If necessary, allocate the result in RESULT_POOL; use SCRATCH_POOL for 414289177Speter * temporary allocations. 415289177Speter */ 416289177Speterstatic svn_error_t * 417289177Speterparse_representation(rep_stats_t **representation, 418289177Speter query_t *query, 419289177Speter representation_t *rep, 420289177Speter revision_info_t *revision_info, 421289177Speter apr_pool_t *result_pool, 422289177Speter apr_pool_t *scratch_pool) 423289177Speter{ 424289177Speter rep_stats_t *result; 425289177Speter int idx; 426289177Speter 427289177Speter /* read location (revision, offset) and size */ 428289177Speter 429289177Speter /* look it up */ 430289177Speter result = find_representation(&idx, query, &revision_info, rep->revision, 431289177Speter (apr_off_t)rep->item_index); 432289177Speter if (!result) 433289177Speter { 434289177Speter /* not parsed, yet (probably a rep in the same revision). 435289177Speter * Create a new rep object and determine its base rep as well. 436289177Speter */ 437289177Speter result = apr_pcalloc(result_pool, sizeof(*result)); 438289177Speter result->revision = rep->revision; 439289177Speter result->expanded_size = (rep->expanded_size ? rep->expanded_size 440289177Speter : rep->size); 441289177Speter result->offset = (apr_off_t)rep->item_index; 442289177Speter result->size = rep->size; 443289177Speter 444289177Speter /* In phys. addressing mode, follow link to the actual representation. 445289177Speter * In log. addressing mode, we will find it already as part of our 446289177Speter * linear walk through the whole file. */ 447289177Speter if (!svn_fs_fs__use_log_addressing(query->fs)) 448289177Speter { 449289177Speter svn_fs_fs__rep_header_t *header; 450289177Speter apr_off_t offset = revision_info->offset + result->offset; 451289177Speter 452289177Speter SVN_ERR_ASSERT(revision_info->rev_file); 453289177Speter SVN_ERR(svn_io_file_seek(revision_info->rev_file->file, APR_SET, 454289177Speter &offset, scratch_pool)); 455289177Speter SVN_ERR(svn_fs_fs__read_rep_header(&header, 456289177Speter revision_info->rev_file->stream, 457289177Speter scratch_pool, scratch_pool)); 458289177Speter 459289177Speter result->header_size = header->header_size; 460289177Speter } 461289177Speter 462289177Speter svn_sort__array_insert(revision_info->representations, &result, idx); 463289177Speter } 464289177Speter 465289177Speter *representation = result; 466289177Speter 467289177Speter return SVN_NO_ERROR; 468289177Speter} 469289177Speter 470289177Speter 471289177Speter/* forward declaration */ 472289177Speterstatic svn_error_t * 473289177Speterread_noderev(query_t *query, 474289177Speter svn_stringbuf_t *noderev_str, 475289177Speter revision_info_t *revision_info, 476289177Speter apr_pool_t *result_pool, 477289177Speter apr_pool_t *scratch_pool); 478289177Speter 479289177Speter/* Read the noderev item at OFFSET in REVISION_INFO from the filesystem 480289177Speter * provided by QUERY. Return it in *NODEREV, allocated in RESULT_POOL. 481289177Speter * Use SCRATCH_POOL for temporary allocations. 482289177Speter * 483289177Speter * The textual representation of the noderev will be used to determine 484289177Speter * the on-disk size of the noderev. Only called in phys. addressing mode. 485289177Speter */ 486289177Speterstatic svn_error_t * 487289177Speterread_phsy_noderev(svn_stringbuf_t **noderev, 488289177Speter query_t *query, 489289177Speter apr_off_t offset, 490289177Speter revision_info_t *revision_info, 491289177Speter apr_pool_t *result_pool, 492289177Speter apr_pool_t *scratch_pool) 493289177Speter{ 494289177Speter svn_stringbuf_t *noderev_str = svn_stringbuf_create_empty(result_pool); 495289177Speter svn_stringbuf_t *line; 496289177Speter svn_boolean_t eof; 497289177Speter 498289177Speter apr_pool_t *iterpool = svn_pool_create(scratch_pool); 499289177Speter 500289177Speter /* Navigate the file stream to the start of noderev. */ 501289177Speter SVN_ERR_ASSERT(revision_info->rev_file); 502289177Speter 503289177Speter offset += revision_info->offset; 504289177Speter SVN_ERR(svn_io_file_seek(revision_info->rev_file->file, APR_SET, 505289177Speter &offset, scratch_pool)); 506289177Speter 507289177Speter /* Read it (terminated by an empty line) */ 508289177Speter do 509289177Speter { 510289177Speter svn_pool_clear(iterpool); 511289177Speter 512289177Speter SVN_ERR(svn_stream_readline(revision_info->rev_file->stream, &line, 513289177Speter "\n", &eof, iterpool)); 514289177Speter svn_stringbuf_appendstr(noderev_str, line); 515289177Speter svn_stringbuf_appendbyte(noderev_str, '\n'); 516289177Speter } 517289177Speter while (line->len > 0 && !eof); 518289177Speter 519289177Speter /* Return the result. */ 520289177Speter *noderev = noderev_str; 521289177Speter 522289177Speter svn_pool_destroy(iterpool); 523289177Speter 524289177Speter return SVN_NO_ERROR; 525289177Speter} 526289177Speter 527289177Speter/* Starting at the directory in NODEREV's text, read all DAG nodes, 528289177Speter * directories and representations linked in that tree structure. 529289177Speter * Store them in QUERY and REVISION_INFO. Also, read them only once. 530289177Speter * 531289177Speter * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for 532289177Speter * temporaries. 533289177Speter */ 534289177Speterstatic svn_error_t * 535289177Speterparse_dir(query_t *query, 536289177Speter node_revision_t *noderev, 537289177Speter revision_info_t *revision_info, 538289177Speter apr_pool_t *result_pool, 539289177Speter apr_pool_t *scratch_pool) 540289177Speter{ 541289177Speter apr_pool_t *iterpool = svn_pool_create(scratch_pool); 542289177Speter 543289177Speter int i; 544289177Speter apr_array_header_t *entries; 545289177Speter SVN_ERR(svn_fs_fs__rep_contents_dir(&entries, query->fs, noderev, 546289177Speter scratch_pool, scratch_pool)); 547289177Speter 548289177Speter for (i = 0; i < entries->nelts; ++i) 549289177Speter { 550289177Speter svn_fs_dirent_t *dirent = APR_ARRAY_IDX(entries, i, svn_fs_dirent_t *); 551289177Speter 552289177Speter if (svn_fs_fs__id_rev(dirent->id) == revision_info->revision) 553289177Speter { 554289177Speter svn_stringbuf_t *noderev_str; 555289177Speter svn_pool_clear(iterpool); 556289177Speter 557289177Speter SVN_ERR(read_phsy_noderev(&noderev_str, query, 558289177Speter svn_fs_fs__id_item(dirent->id), 559289177Speter revision_info, iterpool, iterpool)); 560289177Speter SVN_ERR(read_noderev(query, noderev_str, revision_info, 561289177Speter result_pool, iterpool)); 562289177Speter } 563289177Speter } 564289177Speter 565289177Speter svn_pool_destroy(iterpool); 566289177Speter 567289177Speter return SVN_NO_ERROR; 568289177Speter} 569289177Speter 570289177Speter/* Parse the noderev given as NODEREV_STR and store the info in QUERY and 571289177Speter * REVISION_INFO. In phys. addressing mode, continue reading all DAG nodes, 572289177Speter * directories and representations linked in that tree structure. 573289177Speter * 574289177Speter * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for 575289177Speter * temporaries. 576289177Speter */ 577289177Speterstatic svn_error_t * 578289177Speterread_noderev(query_t *query, 579289177Speter svn_stringbuf_t *noderev_str, 580289177Speter revision_info_t *revision_info, 581289177Speter apr_pool_t *result_pool, 582289177Speter apr_pool_t *scratch_pool) 583289177Speter{ 584289177Speter rep_stats_t *text = NULL; 585289177Speter rep_stats_t *props = NULL; 586289177Speter node_revision_t *noderev; 587289177Speter 588289177Speter svn_stream_t *stream = svn_stream_from_stringbuf(noderev_str, scratch_pool); 589289177Speter SVN_ERR(svn_fs_fs__read_noderev(&noderev, stream, scratch_pool, 590289177Speter scratch_pool)); 591289177Speter 592289177Speter if (noderev->data_rep) 593289177Speter { 594289177Speter SVN_ERR(parse_representation(&text, query, 595289177Speter noderev->data_rep, revision_info, 596289177Speter result_pool, scratch_pool)); 597289177Speter 598289177Speter /* if we are the first to use this rep, mark it as "text rep" */ 599289177Speter if (++text->ref_count == 1) 600289177Speter text->kind = noderev->kind == svn_node_dir ? dir_rep : file_rep; 601289177Speter } 602289177Speter 603289177Speter if (noderev->prop_rep) 604289177Speter { 605289177Speter SVN_ERR(parse_representation(&props, query, 606289177Speter noderev->prop_rep, revision_info, 607289177Speter result_pool, scratch_pool)); 608289177Speter 609289177Speter /* if we are the first to use this rep, mark it as "prop rep" */ 610289177Speter if (++props->ref_count == 1) 611289177Speter props->kind = noderev->kind == svn_node_dir ? dir_property_rep 612289177Speter : file_property_rep; 613289177Speter } 614289177Speter 615289177Speter /* record largest changes */ 616289177Speter if (text && text->ref_count == 1) 617289177Speter add_change(query->stats, text->size, text->expanded_size, text->revision, 618289177Speter noderev->created_path, text->kind, !noderev->predecessor_id); 619289177Speter if (props && props->ref_count == 1) 620289177Speter add_change(query->stats, props->size, props->expanded_size, 621289177Speter props->revision, noderev->created_path, props->kind, 622289177Speter !noderev->predecessor_id); 623289177Speter 624289177Speter /* if this is a directory and has not been processed, yet, read and 625289177Speter * process it recursively */ 626289177Speter if ( noderev->kind == svn_node_dir && text && text->ref_count == 1 627289177Speter && !svn_fs_fs__use_log_addressing(query->fs)) 628289177Speter SVN_ERR(parse_dir(query, noderev, revision_info, result_pool, 629289177Speter scratch_pool)); 630289177Speter 631289177Speter /* update stats */ 632289177Speter if (noderev->kind == svn_node_dir) 633289177Speter { 634289177Speter revision_info->dir_noderev_size += noderev_str->len; 635289177Speter revision_info->dir_noderev_count++; 636289177Speter } 637289177Speter else 638289177Speter { 639289177Speter revision_info->file_noderev_size += noderev_str->len; 640289177Speter revision_info->file_noderev_count++; 641289177Speter } 642289177Speter 643289177Speter return SVN_NO_ERROR; 644289177Speter} 645289177Speter 646289177Speter/* For the revision given as REVISION_INFO within QUERY, determine the number 647289177Speter * of entries in its changed paths list and store that info in REVISION_INFO. 648289177Speter * Use SCRATCH_POOL for temporary allocations. 649289177Speter */ 650289177Speterstatic svn_error_t * 651289177Speterget_phys_change_count(query_t *query, 652289177Speter revision_info_t *revision_info, 653289177Speter apr_pool_t *scratch_pool) 654289177Speter{ 655289177Speter /* We are going to use our own sub-pool here because the changes object 656289177Speter * may well be >100MB and SCRATCH_POOL may not get cleared until all other 657289177Speter * info has been read by read_phys_revision(). Therefore, tidy up early. 658289177Speter */ 659289177Speter apr_pool_t *subpool = svn_pool_create(scratch_pool); 660289177Speter apr_array_header_t *changes; 661289177Speter 662289177Speter SVN_ERR(svn_fs_fs__get_changes(&changes, query->fs, 663289177Speter revision_info->revision, subpool)); 664289177Speter revision_info->change_count = changes->nelts; 665289177Speter 666289177Speter /* Release potentially tons of memory. */ 667289177Speter svn_pool_destroy(subpool); 668289177Speter 669289177Speter return SVN_NO_ERROR; 670289177Speter} 671289177Speter 672289177Speter/* Read header information for the revision stored in FILE_CONTENT (one 673289177Speter * whole revision). Return the offsets within FILE_CONTENT for the 674289177Speter * *ROOT_NODEREV, the list of *CHANGES and its len in *CHANGES_LEN. 675289177Speter * Use POOL for temporary allocations. */ 676289177Speterstatic svn_error_t * 677289177Speterread_phys_revision(query_t *query, 678289177Speter revision_info_t *info, 679289177Speter apr_pool_t *result_pool, 680289177Speter apr_pool_t *scratch_pool) 681289177Speter{ 682289177Speter char buf[64]; 683289177Speter apr_off_t root_node_offset; 684289177Speter apr_off_t changes_offset; 685289177Speter svn_stringbuf_t *trailer; 686289177Speter svn_stringbuf_t *noderev_str; 687289177Speter 688289177Speter /* Read the last 64 bytes of the revision (if long enough). */ 689289177Speter apr_off_t start = MAX(info->offset, info->end - sizeof(buf)); 690289177Speter apr_size_t len = (apr_size_t)(info->end - start); 691289177Speter SVN_ERR(svn_io_file_seek(info->rev_file->file, APR_SET, &start, 692289177Speter scratch_pool)); 693289177Speter SVN_ERR(svn_io_file_read_full2(info->rev_file->file, buf, len, NULL, NULL, 694289177Speter scratch_pool)); 695289177Speter trailer = svn_stringbuf_ncreate(buf, len, scratch_pool); 696289177Speter 697289177Speter /* Parse that trailer. */ 698289177Speter SVN_ERR(svn_fs_fs__parse_revision_trailer(&root_node_offset, 699289177Speter &changes_offset, trailer, 700289177Speter info->revision)); 701289177Speter SVN_ERR(get_phys_change_count(query, info, scratch_pool)); 702289177Speter 703289177Speter /* Calculate the length of the changes list. */ 704289177Speter trailer = svn_fs_fs__unparse_revision_trailer(root_node_offset, 705289177Speter changes_offset, 706289177Speter scratch_pool); 707289177Speter info->changes_len = info->end - info->offset - changes_offset 708289177Speter - trailer->len; 709289177Speter 710289177Speter /* Recursively read nodes added in this rev. */ 711289177Speter SVN_ERR(read_phsy_noderev(&noderev_str, query, root_node_offset, info, 712289177Speter scratch_pool, scratch_pool)); 713289177Speter SVN_ERR(read_noderev(query, noderev_str, info, result_pool, scratch_pool)); 714289177Speter 715289177Speter return SVN_NO_ERROR; 716289177Speter} 717289177Speter 718289177Speter/* Read the content of the pack file staring at revision BASE physical 719289177Speter * addressing mode and store it in QUERY. 720289177Speter * 721289177Speter * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for 722289177Speter * temporaries. 723289177Speter */ 724289177Speterstatic svn_error_t * 725289177Speterread_phys_pack_file(query_t *query, 726289177Speter svn_revnum_t base, 727289177Speter apr_pool_t *result_pool, 728289177Speter apr_pool_t *scratch_pool) 729289177Speter{ 730289177Speter apr_pool_t *iterpool = svn_pool_create(scratch_pool); 731289177Speter int i; 732289177Speter apr_off_t file_size = 0; 733289177Speter svn_fs_fs__revision_file_t *rev_file; 734289177Speter 735289177Speter SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, query->fs, base, 736289177Speter scratch_pool, scratch_pool)); 737289177Speter SVN_ERR(get_file_size(&file_size, rev_file, scratch_pool)); 738289177Speter 739289177Speter /* process each revision in the pack file */ 740289177Speter for (i = 0; i < query->shard_size; ++i) 741289177Speter { 742289177Speter revision_info_t *info; 743289177Speter 744289177Speter /* cancellation support */ 745289177Speter if (query->cancel_func) 746289177Speter SVN_ERR(query->cancel_func(query->cancel_baton)); 747289177Speter 748289177Speter /* create the revision info for the current rev */ 749289177Speter info = apr_pcalloc(result_pool, sizeof(*info)); 750289177Speter info->representations = apr_array_make(result_pool, 4, 751289177Speter sizeof(rep_stats_t*)); 752289177Speter info->rev_file = rev_file; 753289177Speter 754289177Speter info->revision = base + i; 755289177Speter SVN_ERR(svn_fs_fs__get_packed_offset(&info->offset, query->fs, base + i, 756289177Speter iterpool)); 757289177Speter if (i + 1 == query->shard_size) 758289177Speter info->end = file_size; 759289177Speter else 760289177Speter SVN_ERR(svn_fs_fs__get_packed_offset(&info->end, query->fs, 761289177Speter base + i + 1, iterpool)); 762289177Speter 763289177Speter SVN_ERR(read_phys_revision(query, info, result_pool, iterpool)); 764289177Speter 765289177Speter info->representations = apr_array_copy(result_pool, 766289177Speter info->representations); 767289177Speter 768289177Speter /* Done with this revision. */ 769289177Speter info->rev_file = NULL; 770289177Speter 771289177Speter /* put it into our container */ 772289177Speter APR_ARRAY_PUSH(query->revisions, revision_info_t*) = info; 773289177Speter 774289177Speter /* destroy temps */ 775289177Speter svn_pool_clear(iterpool); 776289177Speter } 777289177Speter 778289177Speter /* Done with this pack file. */ 779289177Speter SVN_ERR(svn_fs_fs__close_revision_file(rev_file)); 780289177Speter 781289177Speter /* one more pack file processed */ 782289177Speter if (query->progress_func) 783289177Speter query->progress_func(base, query->progress_baton, scratch_pool); 784289177Speter 785289177Speter return SVN_NO_ERROR; 786289177Speter} 787289177Speter 788289177Speter/* Read the content of the file for REVISION in physical addressing mode 789289177Speter * and store its contents in QUERY. 790289177Speter * 791289177Speter * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for 792289177Speter * temporaries. 793289177Speter */ 794289177Speterstatic svn_error_t * 795289177Speterread_phys_revision_file(query_t *query, 796289177Speter svn_revnum_t revision, 797289177Speter apr_pool_t *result_pool, 798289177Speter apr_pool_t *scratch_pool) 799289177Speter{ 800289177Speter revision_info_t *info = apr_pcalloc(result_pool, sizeof(*info)); 801289177Speter apr_off_t file_size = 0; 802289177Speter svn_fs_fs__revision_file_t *rev_file; 803289177Speter 804289177Speter /* cancellation support */ 805289177Speter if (query->cancel_func) 806289177Speter SVN_ERR(query->cancel_func(query->cancel_baton)); 807289177Speter 808289177Speter /* read the whole pack file into memory */ 809289177Speter SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, query->fs, revision, 810289177Speter scratch_pool, scratch_pool)); 811289177Speter SVN_ERR(get_file_size(&file_size, rev_file, scratch_pool)); 812289177Speter 813289177Speter /* create the revision info for the current rev */ 814289177Speter info->representations = apr_array_make(result_pool, 4, sizeof(rep_stats_t*)); 815289177Speter 816289177Speter info->rev_file = rev_file; 817289177Speter info->revision = revision; 818289177Speter info->offset = 0; 819289177Speter info->end = file_size; 820289177Speter 821289177Speter SVN_ERR(read_phys_revision(query, info, result_pool, scratch_pool)); 822289177Speter 823289177Speter /* Done with this revision. */ 824289177Speter SVN_ERR(svn_fs_fs__close_revision_file(rev_file)); 825289177Speter info->rev_file = NULL; 826289177Speter 827289177Speter /* put it into our container */ 828289177Speter APR_ARRAY_PUSH(query->revisions, revision_info_t*) = info; 829289177Speter 830289177Speter /* show progress every 1000 revs or so */ 831289177Speter if (query->progress_func) 832289177Speter { 833289177Speter if (query->shard_size && (revision % query->shard_size == 0)) 834289177Speter query->progress_func(revision, query->progress_baton, scratch_pool); 835289177Speter if (!query->shard_size && (revision % 1000 == 0)) 836289177Speter query->progress_func(revision, query->progress_baton, scratch_pool); 837289177Speter } 838289177Speter 839289177Speter return SVN_NO_ERROR; 840289177Speter} 841289177Speter 842289177Speter/* Given the unparsed changes list in CHANGES with LEN chars, return the 843289177Speter * number of changed paths encoded in it. Only used in log. addressing 844289177Speter * mode. 845289177Speter */ 846289177Speterstatic apr_uint64_t 847289177Speterget_log_change_count(const char *changes, 848289177Speter apr_size_t len) 849289177Speter{ 850289177Speter apr_size_t lines = 0; 851289177Speter const char *end = changes + len; 852289177Speter 853289177Speter /* line count */ 854289177Speter for (; changes < end; ++changes) 855289177Speter if (*changes == '\n') 856289177Speter ++lines; 857289177Speter 858289177Speter /* two lines per change */ 859289177Speter return lines / 2; 860289177Speter} 861289177Speter 862289177Speter/* Read the item described by ENTRY from the REV_FILE and return the 863289177Speter * respective byte sequence in *CONTENTS, allocated in RESULT_POOL. 864289177Speter * Use SCRATCH_POOL for temporary allocations 865289177Speter */ 866289177Speterstatic svn_error_t * 867289177Speterread_item(svn_stringbuf_t **contents, 868289177Speter svn_fs_fs__revision_file_t *rev_file, 869289177Speter svn_fs_fs__p2l_entry_t *entry, 870289177Speter apr_pool_t *result_pool, 871289177Speter apr_pool_t *scratch_pool) 872289177Speter{ 873289177Speter svn_stringbuf_t *item = svn_stringbuf_create_ensure(entry->size, 874289177Speter result_pool); 875289177Speter item->len = entry->size; 876289177Speter item->data[item->len] = 0; 877289177Speter 878289177Speter SVN_ERR(svn_io_file_aligned_seek(rev_file->file, rev_file->block_size, 879289177Speter NULL, entry->offset, scratch_pool)); 880289177Speter SVN_ERR(svn_io_file_read_full2(rev_file->file, item->data, item->len, 881289177Speter NULL, NULL, scratch_pool)); 882289177Speter 883289177Speter *contents = item; 884289177Speter 885289177Speter return SVN_NO_ERROR; 886289177Speter} 887289177Speter 888289177Speter/* Process the logically addressed revision contents of revisions BASE to 889289177Speter * BASE + COUNT - 1 in QUERY. 890289177Speter * 891289177Speter * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for 892289177Speter * temporaries. 893289177Speter */ 894289177Speterstatic svn_error_t * 895289177Speterread_log_rev_or_packfile(query_t *query, 896289177Speter svn_revnum_t base, 897289177Speter int count, 898289177Speter apr_pool_t *result_pool, 899289177Speter apr_pool_t *scratch_pool) 900289177Speter{ 901289177Speter fs_fs_data_t *ffd = query->fs->fsap_data; 902289177Speter apr_pool_t *iterpool = svn_pool_create(scratch_pool); 903289177Speter apr_off_t max_offset; 904289177Speter apr_off_t offset = 0; 905289177Speter int i; 906289177Speter svn_fs_fs__revision_file_t *rev_file; 907289177Speter 908289177Speter /* we will process every revision in the rev / pack file */ 909289177Speter for (i = 0; i < count; ++i) 910289177Speter { 911289177Speter /* create the revision info for the current rev */ 912289177Speter revision_info_t *info = apr_pcalloc(result_pool, sizeof(*info)); 913289177Speter info->representations = apr_array_make(result_pool, 4, 914289177Speter sizeof(rep_stats_t*)); 915289177Speter info->revision = base + i; 916289177Speter 917289177Speter APR_ARRAY_PUSH(query->revisions, revision_info_t*) = info; 918289177Speter } 919289177Speter 920289177Speter /* open the pack / rev file that is covered by the p2l index */ 921289177Speter SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, query->fs, base, 922289177Speter scratch_pool, iterpool)); 923289177Speter SVN_ERR(svn_fs_fs__p2l_get_max_offset(&max_offset, query->fs, rev_file, 924289177Speter base, scratch_pool)); 925289177Speter 926289177Speter /* record the whole pack size in the first rev so the total sum will 927289177Speter still be correct */ 928289177Speter APR_ARRAY_IDX(query->revisions, base, revision_info_t*)->end = max_offset; 929289177Speter 930289177Speter /* for all offsets in the file, get the P2L index entries and process 931289177Speter the interesting items (change lists, noderevs) */ 932289177Speter for (offset = 0; offset < max_offset; ) 933289177Speter { 934289177Speter apr_array_header_t *entries; 935289177Speter 936289177Speter svn_pool_clear(iterpool); 937289177Speter 938289177Speter /* cancellation support */ 939289177Speter if (query->cancel_func) 940289177Speter SVN_ERR(query->cancel_func(query->cancel_baton)); 941289177Speter 942289177Speter /* get all entries for the current block */ 943289177Speter SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, query->fs, rev_file, base, 944289177Speter offset, ffd->p2l_page_size, 945289177Speter iterpool, iterpool)); 946289177Speter 947289177Speter /* process all entries (and later continue with the next block) */ 948289177Speter for (i = 0; i < entries->nelts; ++i) 949289177Speter { 950289177Speter svn_fs_fs__p2l_entry_t *entry 951289177Speter = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t); 952289177Speter 953289177Speter /* skip bits we previously processed */ 954289177Speter if (i == 0 && entry->offset < offset) 955289177Speter continue; 956289177Speter 957289177Speter /* skip zero-sized entries */ 958289177Speter if (entry->size == 0) 959289177Speter continue; 960289177Speter 961289177Speter /* read and process interesting items */ 962289177Speter if (entry->type == SVN_FS_FS__ITEM_TYPE_NODEREV) 963289177Speter { 964289177Speter svn_stringbuf_t *item; 965289177Speter revision_info_t *info = APR_ARRAY_IDX(query->revisions, 966289177Speter entry->item.revision, 967289177Speter revision_info_t*); 968289177Speter SVN_ERR(read_item(&item, rev_file, entry, iterpool, iterpool)); 969289177Speter SVN_ERR(read_noderev(query, item, info, result_pool, iterpool)); 970289177Speter } 971289177Speter else if (entry->type == SVN_FS_FS__ITEM_TYPE_CHANGES) 972289177Speter { 973289177Speter svn_stringbuf_t *item; 974289177Speter revision_info_t *info = APR_ARRAY_IDX(query->revisions, 975289177Speter entry->item.revision, 976289177Speter revision_info_t*); 977289177Speter SVN_ERR(read_item(&item, rev_file, entry, iterpool, iterpool)); 978289177Speter info->change_count 979289177Speter = get_log_change_count(item->data + 0, item->len); 980289177Speter info->changes_len += entry->size; 981289177Speter } 982289177Speter 983289177Speter /* advance offset */ 984289177Speter offset += entry->size; 985289177Speter } 986289177Speter } 987289177Speter 988289177Speter /* clean up and close file handles */ 989289177Speter svn_pool_destroy(iterpool); 990289177Speter 991289177Speter return SVN_NO_ERROR; 992289177Speter} 993289177Speter 994289177Speter/* Read the content of the pack file staring at revision BASE logical 995289177Speter * addressing mode and store it in QUERY. 996289177Speter * 997289177Speter * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for 998289177Speter * temporaries. 999289177Speter */ 1000289177Speterstatic svn_error_t * 1001289177Speterread_log_pack_file(query_t *query, 1002289177Speter svn_revnum_t base, 1003289177Speter apr_pool_t *result_pool, 1004289177Speter apr_pool_t *scratch_pool) 1005289177Speter{ 1006289177Speter SVN_ERR(read_log_rev_or_packfile(query, base, query->shard_size, 1007289177Speter result_pool, scratch_pool)); 1008289177Speter 1009289177Speter /* one more pack file processed */ 1010289177Speter if (query->progress_func) 1011289177Speter query->progress_func(base, query->progress_baton, scratch_pool); 1012289177Speter 1013289177Speter return SVN_NO_ERROR; 1014289177Speter} 1015289177Speter 1016289177Speter/* Read the content of the file for REVISION in logical addressing mode 1017289177Speter * and store its contents in QUERY. 1018289177Speter * 1019289177Speter * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for 1020289177Speter * temporaries. 1021289177Speter */ 1022289177Speterstatic svn_error_t * 1023289177Speterread_log_revision_file(query_t *query, 1024289177Speter svn_revnum_t revision, 1025289177Speter apr_pool_t *result_pool, 1026289177Speter apr_pool_t *scratch_pool) 1027289177Speter{ 1028289177Speter SVN_ERR(read_log_rev_or_packfile(query, revision, 1, 1029289177Speter result_pool, scratch_pool)); 1030289177Speter 1031289177Speter /* show progress every 1000 revs or so */ 1032289177Speter if (query->progress_func) 1033289177Speter { 1034289177Speter if (query->shard_size && (revision % query->shard_size == 0)) 1035289177Speter query->progress_func(revision, query->progress_baton, scratch_pool); 1036289177Speter if (!query->shard_size && (revision % 1000 == 0)) 1037289177Speter query->progress_func(revision, query->progress_baton, scratch_pool); 1038289177Speter } 1039289177Speter 1040289177Speter return SVN_NO_ERROR; 1041289177Speter} 1042289177Speter 1043289177Speter/* Read the repository and collect the stats info in QUERY. 1044289177Speter * 1045289177Speter * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for 1046289177Speter * temporaries. 1047289177Speter */ 1048289177Speterstatic svn_error_t * 1049289177Speterread_revisions(query_t *query, 1050289177Speter apr_pool_t *result_pool, 1051289177Speter apr_pool_t *scratch_pool) 1052289177Speter{ 1053289177Speter apr_pool_t *iterpool = svn_pool_create(scratch_pool); 1054289177Speter svn_revnum_t revision; 1055289177Speter 1056289177Speter /* read all packed revs */ 1057289177Speter for ( revision = 0 1058289177Speter ; revision < query->min_unpacked_rev 1059289177Speter ; revision += query->shard_size) 1060289177Speter { 1061289177Speter svn_pool_clear(iterpool); 1062289177Speter 1063289177Speter if (svn_fs_fs__use_log_addressing(query->fs)) 1064289177Speter SVN_ERR(read_log_pack_file(query, revision, result_pool, iterpool)); 1065289177Speter else 1066289177Speter SVN_ERR(read_phys_pack_file(query, revision, result_pool, iterpool)); 1067289177Speter } 1068289177Speter 1069289177Speter /* read non-packed revs */ 1070289177Speter for ( ; revision <= query->head; ++revision) 1071289177Speter { 1072289177Speter svn_pool_clear(iterpool); 1073289177Speter 1074289177Speter if (svn_fs_fs__use_log_addressing(query->fs)) 1075289177Speter SVN_ERR(read_log_revision_file(query, revision, result_pool, 1076289177Speter iterpool)); 1077289177Speter else 1078289177Speter SVN_ERR(read_phys_revision_file(query, revision, result_pool, 1079289177Speter iterpool)); 1080289177Speter } 1081289177Speter 1082289177Speter svn_pool_destroy(iterpool); 1083289177Speter 1084289177Speter return SVN_NO_ERROR; 1085289177Speter} 1086289177Speter 1087289177Speter/* Accumulate stats of REP in STATS. 1088289177Speter */ 1089289177Speterstatic void 1090289177Speteradd_rep_pack_stats(svn_fs_fs__rep_pack_stats_t *stats, 1091289177Speter rep_stats_t *rep) 1092289177Speter{ 1093289177Speter stats->count++; 1094289177Speter 1095289177Speter stats->packed_size += rep->size; 1096289177Speter stats->expanded_size += rep->expanded_size; 1097289177Speter stats->overhead_size += rep->header_size + 7 /* ENDREP\n */; 1098289177Speter} 1099289177Speter 1100289177Speter/* Accumulate stats of REP in STATS. 1101289177Speter */ 1102289177Speterstatic void 1103289177Speteradd_rep_stats(svn_fs_fs__representation_stats_t *stats, 1104289177Speter rep_stats_t *rep) 1105289177Speter{ 1106289177Speter add_rep_pack_stats(&stats->total, rep); 1107289177Speter if (rep->ref_count == 1) 1108289177Speter add_rep_pack_stats(&stats->uniques, rep); 1109289177Speter else 1110289177Speter add_rep_pack_stats(&stats->shared, rep); 1111289177Speter 1112289177Speter stats->references += rep->ref_count; 1113289177Speter stats->expanded_size += rep->ref_count * rep->expanded_size; 1114289177Speter} 1115289177Speter 1116289177Speter/* Aggregate the info the in revision_info_t * array REVISIONS into the 1117289177Speter * respectve fields of STATS. 1118289177Speter */ 1119289177Speterstatic void 1120289177Speteraggregate_stats(const apr_array_header_t *revisions, 1121289177Speter svn_fs_fs__stats_t *stats) 1122289177Speter{ 1123289177Speter int i, k; 1124289177Speter 1125289177Speter /* aggregate info from all revisions */ 1126289177Speter stats->revision_count = revisions->nelts; 1127289177Speter for (i = 0; i < revisions->nelts; ++i) 1128289177Speter { 1129289177Speter revision_info_t *revision = APR_ARRAY_IDX(revisions, i, 1130289177Speter revision_info_t *); 1131289177Speter 1132289177Speter /* data gathered on a revision level */ 1133289177Speter stats->change_count += revision->change_count; 1134289177Speter stats->change_len += revision->changes_len; 1135289177Speter stats->total_size += revision->end - revision->offset; 1136289177Speter 1137289177Speter stats->dir_node_stats.count += revision->dir_noderev_count; 1138289177Speter stats->dir_node_stats.size += revision->dir_noderev_size; 1139289177Speter stats->file_node_stats.count += revision->file_noderev_count; 1140289177Speter stats->file_node_stats.size += revision->file_noderev_size; 1141289177Speter stats->total_node_stats.count += revision->dir_noderev_count 1142289177Speter + revision->file_noderev_count; 1143289177Speter stats->total_node_stats.size += revision->dir_noderev_size 1144289177Speter + revision->file_noderev_size; 1145289177Speter 1146289177Speter /* process representations */ 1147289177Speter for (k = 0; k < revision->representations->nelts; ++k) 1148289177Speter { 1149289177Speter rep_stats_t *rep = APR_ARRAY_IDX(revision->representations, k, 1150289177Speter rep_stats_t *); 1151289177Speter 1152289177Speter /* accumulate in the right bucket */ 1153289177Speter switch(rep->kind) 1154289177Speter { 1155289177Speter case file_rep: 1156289177Speter add_rep_stats(&stats->file_rep_stats, rep); 1157289177Speter break; 1158289177Speter case dir_rep: 1159289177Speter add_rep_stats(&stats->dir_rep_stats, rep); 1160289177Speter break; 1161289177Speter case file_property_rep: 1162289177Speter add_rep_stats(&stats->file_prop_rep_stats, rep); 1163289177Speter break; 1164289177Speter case dir_property_rep: 1165289177Speter add_rep_stats(&stats->dir_prop_rep_stats, rep); 1166289177Speter break; 1167289177Speter default: 1168289177Speter break; 1169289177Speter } 1170289177Speter 1171289177Speter add_rep_stats(&stats->total_rep_stats, rep); 1172289177Speter } 1173289177Speter } 1174289177Speter} 1175289177Speter 1176289177Speter/* Return a new svn_fs_fs__stats_t instance, allocated in RESULT_POOL. 1177289177Speter */ 1178289177Speterstatic svn_fs_fs__stats_t * 1179289177Spetercreate_stats(apr_pool_t *result_pool) 1180289177Speter{ 1181289177Speter svn_fs_fs__stats_t *stats = apr_pcalloc(result_pool, sizeof(*stats)); 1182289177Speter 1183289177Speter initialize_largest_changes(stats, 64, result_pool); 1184289177Speter stats->by_extension = apr_hash_make(result_pool); 1185289177Speter 1186289177Speter return stats; 1187289177Speter} 1188289177Speter 1189289177Speter/* Create a *QUERY, allocated in RESULT_POOL, reading filesystem FS and 1190289177Speter * collecting results in STATS. Store the optional PROCESS_FUNC and 1191289177Speter * PROGRESS_BATON as well as CANCEL_FUNC and CANCEL_BATON in *QUERY, too. 1192289177Speter * Use SCRATCH_POOL for temporary allocations. 1193289177Speter */ 1194289177Speterstatic svn_error_t * 1195289177Spetercreate_query(query_t **query, 1196289177Speter svn_fs_t *fs, 1197289177Speter svn_fs_fs__stats_t *stats, 1198289177Speter svn_fs_progress_notify_func_t progress_func, 1199289177Speter void *progress_baton, 1200289177Speter svn_cancel_func_t cancel_func, 1201289177Speter void *cancel_baton, 1202289177Speter apr_pool_t *result_pool, 1203289177Speter apr_pool_t *scratch_pool) 1204289177Speter{ 1205289177Speter *query = apr_pcalloc(result_pool, sizeof(**query)); 1206289177Speter 1207289177Speter /* Read repository dimensions. */ 1208289177Speter (*query)->shard_size = svn_fs_fs__shard_size(fs); 1209289177Speter SVN_ERR(svn_fs_fs__youngest_rev(&(*query)->head, fs, scratch_pool)); 1210289177Speter SVN_ERR(svn_fs_fs__min_unpacked_rev(&(*query)->min_unpacked_rev, fs, 1211289177Speter scratch_pool)); 1212289177Speter 1213289177Speter /* create data containers and caches 1214289177Speter * Note: this assumes that int is at least 32-bits and that we only support 1215289177Speter * 32-bit wide revision numbers (actually 31-bits due to the signedness 1216289177Speter * of both the nelts field of the array and our revision numbers). This 1217289177Speter * means this code will fail on platforms where int is less than 32-bits 1218289177Speter * and the repository has more revisions than int can hold. */ 1219289177Speter (*query)->revisions = apr_array_make(result_pool, (int) (*query)->head + 1, 1220289177Speter sizeof(revision_info_t *)); 1221289177Speter (*query)->null_base = apr_pcalloc(result_pool, 1222289177Speter sizeof(*(*query)->null_base)); 1223289177Speter 1224289177Speter /* Store other parameters */ 1225289177Speter (*query)->fs = fs; 1226289177Speter (*query)->stats = stats; 1227289177Speter (*query)->progress_func = progress_func; 1228289177Speter (*query)->progress_baton = progress_baton; 1229289177Speter (*query)->cancel_func = cancel_func; 1230289177Speter (*query)->cancel_baton = cancel_baton; 1231289177Speter 1232289177Speter return SVN_NO_ERROR; 1233289177Speter} 1234289177Speter 1235289177Spetersvn_error_t * 1236289177Spetersvn_fs_fs__get_stats(svn_fs_fs__stats_t **stats, 1237289177Speter svn_fs_t *fs, 1238289177Speter svn_fs_progress_notify_func_t progress_func, 1239289177Speter void *progress_baton, 1240289177Speter svn_cancel_func_t cancel_func, 1241289177Speter void *cancel_baton, 1242289177Speter apr_pool_t *result_pool, 1243289177Speter apr_pool_t *scratch_pool) 1244289177Speter{ 1245289177Speter query_t *query; 1246289177Speter 1247289177Speter *stats = create_stats(result_pool); 1248289177Speter SVN_ERR(create_query(&query, fs, *stats, progress_func, progress_baton, 1249289177Speter cancel_func, cancel_baton, scratch_pool, 1250289177Speter scratch_pool)); 1251289177Speter SVN_ERR(read_revisions(query, scratch_pool, scratch_pool)); 1252289177Speter aggregate_stats(query->revisions, *stats); 1253289177Speter 1254289177Speter return SVN_NO_ERROR; 1255289177Speter} 1256