stats.c revision 362181
1/* stats.c -- implements the svn_fs_fs__get_stats private API.
2 *
3 * ====================================================================
4 *    Licensed to the Apache Software Foundation (ASF) under one
5 *    or more contributor license agreements.  See the NOTICE file
6 *    distributed with this work for additional information
7 *    regarding copyright ownership.  The ASF licenses this file
8 *    to you under the Apache License, Version 2.0 (the
9 *    "License"); you may not use this file except in compliance
10 *    with the License.  You may obtain a copy of the License at
11 *
12 *      http://www.apache.org/licenses/LICENSE-2.0
13 *
14 *    Unless required by applicable law or agreed to in writing,
15 *    software distributed under the License is distributed on an
16 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 *    KIND, either express or implied.  See the License for the
18 *    specific language governing permissions and limitations
19 *    under the License.
20 * ====================================================================
21 */
22
23#include "svn_dirent_uri.h"
24#include "svn_fs.h"
25#include "svn_pools.h"
26#include "svn_sorts.h"
27
28#include "private/svn_cache.h"
29#include "private/svn_sorts_private.h"
30#include "private/svn_string_private.h"
31
32#include "index.h"
33#include "pack.h"
34#include "rev_file.h"
35#include "util.h"
36#include "fs_fs.h"
37#include "cached_data.h"
38#include "low_level.h"
39#include "revprops.h"
40
41#include "../libsvn_fs/fs-loader.h"
42
43#include "svn_private_config.h"
44
45/* We group representations into 2x2 different kinds plus one default:
46 * [dir / file] x [text / prop]. The assignment is done by the first node
47 * that references the respective representation.
48 */
49typedef enum rep_kind_t
50{
51  /* The representation is not used _directly_, i.e. not referenced by any
52   * noderev. However, some other representation may use it as delta base.
53   * Null value. Should not occur in real-word repositories. */
54  unused_rep,
55
56  /* a properties on directory rep  */
57  dir_property_rep,
58
59  /* a properties on file rep  */
60  file_property_rep,
61
62  /* a directory rep  */
63  dir_rep,
64
65  /* a file rep  */
66  file_rep
67} rep_kind_t;
68
69/* A representation fragment.
70 */
71typedef struct rep_stats_t
72{
73  /* offset in the revision file (phys. addressing) /
74   * item index within REVISION (log. addressing) */
75  apr_uint64_t item_index;
76
77  /* item length in bytes */
78  apr_uint64_t size;
79
80  /* item length after de-deltification */
81  apr_uint64_t expanded_size;
82
83  /* revision that contains this representation
84   * (may be referenced by other revisions, though) */
85  svn_revnum_t revision;
86
87  /* number of nodes that reference this representation */
88  apr_uint32_t ref_count;
89
90  /* length of the PLAIN / DELTA line in the source file in bytes */
91  apr_uint16_t header_size;
92
93  /* classification of the representation. values of rep_kind_t */
94  char kind;
95
96  /* length of the delta chain, including this representation,
97   * saturated to 255 - if need be */
98  apr_byte_t chain_length;
99} rep_stats_t;
100
101/* Represents a link in the rep delta chain.  REVISION + ITEM_INDEX points
102 * to BASE_REVISION + BASE_ITEM_INDEX.  We collect this info while scanning
103 * a f7 repo in a single pass and resolve it afterwards. */
104typedef struct rep_ref_t
105{
106  /* Revision that contains this representation. */
107  svn_revnum_t revision;
108
109  /* Item index of this rep within REVISION. */
110  apr_uint64_t item_index;
111
112  /* Revision of the representation we deltified against.
113   * -1 if this representation is either PLAIN or a self-delta. */
114  svn_revnum_t base_revision;
115
116  /* Item index of that rep within BASE_REVISION. */
117  apr_uint64_t base_item_index;
118
119  /* Length of the PLAIN / DELTA line in the source file in bytes.
120   * We use this to update the info in the rep stats after scanning the
121   * whole file. */
122  apr_uint16_t header_size;
123
124} rep_ref_t;
125
126/* Represents a single revision.
127 * There will be only one instance per revision. */
128typedef struct revision_info_t
129{
130  /* number of this revision */
131  svn_revnum_t revision;
132
133  /* pack file offset (manifest value), 0 for non-packed files */
134  apr_off_t offset;
135
136  /* length of the changes list on bytes */
137  apr_uint64_t changes_len;
138
139  /* offset of the changes list relative to OFFSET */
140  apr_uint64_t change_count;
141
142  /* first offset behind the revision data in the pack file (file length
143   * for non-packed revs) */
144  apr_off_t end;
145
146  /* number of directory noderevs in this revision */
147  apr_uint64_t dir_noderev_count;
148
149  /* number of file noderevs in this revision */
150  apr_uint64_t file_noderev_count;
151
152  /* total size of directory noderevs (i.e. the structs - not the rep) */
153  apr_uint64_t dir_noderev_size;
154
155  /* total size of file noderevs (i.e. the structs - not the rep) */
156  apr_uint64_t file_noderev_size;
157
158  /* all rep_stats_t of this revision (in no particular order),
159   * i.e. those that point back to this struct */
160  apr_array_header_t *representations;
161
162  /* Temporary rev / pack file access object, used in phys. addressing
163   * mode only.  NULL when done reading this revision. */
164  svn_fs_fs__revision_file_t *rev_file;
165} revision_info_t;
166
167/* Root data structure containing all information about a given repository.
168 * We use it as a wrapper around svn_fs_t and pass it around where we would
169 * otherwise just use a svn_fs_t.
170 */
171typedef struct query_t
172{
173  /* FS API object*/
174  svn_fs_t *fs;
175
176  /* The HEAD revision. */
177  svn_revnum_t head;
178
179  /* Number of revs per shard; 0 for non-sharded repos. */
180  int shard_size;
181
182  /* First non-packed revision. */
183  svn_revnum_t min_unpacked_rev;
184
185  /* all revisions */
186  apr_array_header_t *revisions;
187
188  /* empty representation.
189   * Used as a dummy base for DELTA reps without base. */
190  rep_stats_t *null_base;
191
192  /* collected statistics */
193  svn_fs_fs__stats_t *stats;
194
195  /* Progress notification callback to call after each shard.  May be NULL. */
196  svn_fs_progress_notify_func_t progress_func;
197
198  /* Baton for PROGRESS_FUNC. */
199  void *progress_baton;
200
201  /* Cancellation support callback to call once in a while.  May be NULL. */
202  svn_cancel_func_t cancel_func;
203
204  /* Baton for CANCEL_FUNC. */
205  void *cancel_baton;
206} query_t;
207
208/* Initialize the LARGEST_CHANGES member in STATS with a capacity of COUNT
209 * entries.  Allocate the result in RESULT_POOL.
210 */
211static void
212initialize_largest_changes(svn_fs_fs__stats_t *stats,
213                           apr_size_t count,
214                           apr_pool_t *result_pool)
215{
216  apr_size_t i;
217
218  stats->largest_changes = apr_pcalloc(result_pool,
219                                       sizeof(*stats->largest_changes));
220  stats->largest_changes->count = count;
221  stats->largest_changes->min_size = 1;
222  stats->largest_changes->changes
223    = apr_palloc(result_pool, count * sizeof(*stats->largest_changes->changes));
224
225  /* allocate *all* entries before the path stringbufs.  This increases
226   * cache locality and enhances performance significantly. */
227  for (i = 0; i < count; ++i)
228    stats->largest_changes->changes[i]
229      = apr_palloc(result_pool, sizeof(**stats->largest_changes->changes));
230
231  /* now initialize them and allocate the stringbufs */
232  for (i = 0; i < count; ++i)
233    {
234      stats->largest_changes->changes[i]->size = 0;
235      stats->largest_changes->changes[i]->revision = SVN_INVALID_REVNUM;
236      stats->largest_changes->changes[i]->path
237        = svn_stringbuf_create_ensure(1024, result_pool);
238    }
239}
240
241/* Add entry for SIZE to HISTOGRAM.
242 */
243static void
244add_to_histogram(svn_fs_fs__histogram_t *histogram,
245                 apr_int64_t size)
246{
247  apr_int64_t shift = 0;
248
249  while (((apr_int64_t)(1) << shift) <= size)
250    shift++;
251
252  histogram->total.count++;
253  histogram->total.sum += size;
254  histogram->lines[(apr_size_t)shift].count++;
255  histogram->lines[(apr_size_t)shift].sum += size;
256}
257
258/* Update data aggregators in STATS with this representation of type KIND,
259 * on-disk REP_SIZE and expanded node size EXPANDED_SIZE for PATH in REVSION.
260 * PLAIN_ADDED indicates whether the node has a deltification predecessor.
261 */
262static void
263add_change(svn_fs_fs__stats_t *stats,
264           apr_uint64_t rep_size,
265           apr_uint64_t expanded_size,
266           svn_revnum_t revision,
267           const char *path,
268           rep_kind_t kind,
269           svn_boolean_t plain_added)
270{
271  /* identify largest reps */
272  if (rep_size >= stats->largest_changes->min_size)
273    {
274      apr_size_t i;
275      svn_fs_fs__largest_changes_t *largest_changes = stats->largest_changes;
276      svn_fs_fs__large_change_info_t *info
277        = largest_changes->changes[largest_changes->count - 1];
278      info->size = rep_size;
279      info->revision = revision;
280      svn_stringbuf_set(info->path, path);
281
282      /* linear insertion but not too bad since count is low and insertions
283       * near the end are more likely than close to front */
284      for (i = largest_changes->count - 1; i > 0; --i)
285        if (largest_changes->changes[i-1]->size >= rep_size)
286          break;
287        else
288          largest_changes->changes[i] = largest_changes->changes[i-1];
289
290      largest_changes->changes[i] = info;
291      largest_changes->min_size
292        = largest_changes->changes[largest_changes->count-1]->size;
293    }
294
295  /* global histograms */
296  add_to_histogram(&stats->rep_size_histogram, rep_size);
297  add_to_histogram(&stats->node_size_histogram, expanded_size);
298
299  if (plain_added)
300    {
301      add_to_histogram(&stats->added_rep_size_histogram, rep_size);
302      add_to_histogram(&stats->added_node_size_histogram, expanded_size);
303    }
304
305  /* specific histograms by type */
306  switch (kind)
307    {
308      case unused_rep:
309        add_to_histogram(&stats->unused_rep_histogram, rep_size);
310        break;
311      case dir_property_rep:
312        add_to_histogram(&stats->dir_prop_rep_histogram, rep_size);
313        add_to_histogram(&stats->dir_prop_histogram, expanded_size);
314        break;
315      case file_property_rep:
316        add_to_histogram(&stats->file_prop_rep_histogram, rep_size);
317        add_to_histogram(&stats->file_prop_histogram, expanded_size);
318        break;
319      case dir_rep:
320        add_to_histogram(&stats->dir_rep_histogram, rep_size);
321        add_to_histogram(&stats->dir_histogram, expanded_size);
322        break;
323      case file_rep:
324        add_to_histogram(&stats->file_rep_histogram, rep_size);
325        add_to_histogram(&stats->file_histogram, expanded_size);
326        break;
327    }
328
329  /* by extension */
330  if (kind == file_rep)
331    {
332      /* determine extension */
333      svn_fs_fs__extension_info_t *info;
334      const char * file_name = strrchr(path, '/');
335      const char * extension = file_name ? strrchr(file_name, '.') : NULL;
336
337      if (extension == NULL || extension == file_name + 1)
338        extension = "(none)";
339
340      /* get / auto-insert entry for this extension */
341      info = apr_hash_get(stats->by_extension, extension, APR_HASH_KEY_STRING);
342      if (info == NULL)
343        {
344          apr_pool_t *pool = apr_hash_pool_get(stats->by_extension);
345          info = apr_pcalloc(pool, sizeof(*info));
346          info->extension = apr_pstrdup(pool, extension);
347
348          apr_hash_set(stats->by_extension, info->extension,
349                       APR_HASH_KEY_STRING, info);
350        }
351
352      /* update per-extension histogram */
353      add_to_histogram(&info->node_histogram, expanded_size);
354      add_to_histogram(&info->rep_histogram, rep_size);
355    }
356}
357
358/* Comparator used for binary search comparing the absolute file offset
359 * of a representation to some other offset. DATA is a *rep_stats_t,
360 * KEY is a pointer to an apr_uint64_t.
361 */
362static int
363compare_representation_item_index(const void *data, const void *key)
364{
365  apr_uint64_t lhs = (*(const rep_stats_t *const *)data)->item_index;
366  apr_uint64_t rhs = *(const apr_uint64_t *)key;
367
368  if (lhs < rhs)
369    return -1;
370  return (lhs > rhs ? 1 : 0);
371}
372
373/* Find the revision_info_t object to the given REVISION in QUERY and
374 * return it in *REVISION_INFO. For performance reasons, we skip the
375 * lookup if the info is already provided.
376 *
377 * In that revision, look for the rep_stats_t object for item ITEM_INDEX.
378 * If it already exists, set *IDX to its index in *REVISION_INFO's
379 * representations list and return the representation object. Otherwise,
380 * set the index to where it must be inserted and return NULL.
381 */
382static rep_stats_t *
383find_representation(int *idx,
384                    query_t *query,
385                    revision_info_t **revision_info,
386                    svn_revnum_t revision,
387                    apr_uint64_t item_index)
388{
389  revision_info_t *info;
390  *idx = -1;
391
392  /* first let's find the revision */
393  info = revision_info ? *revision_info : NULL;
394  if (info == NULL || info->revision != revision)
395    {
396      info = APR_ARRAY_IDX(query->revisions, revision, revision_info_t*);
397      if (revision_info)
398        *revision_info = info;
399    }
400
401  /* not found -> no result */
402  if (info == NULL)
403    return NULL;
404
405  /* look for the representation */
406  *idx = svn_sort__bsearch_lower_bound(info->representations,
407                                       &item_index,
408                                       compare_representation_item_index);
409  if (*idx < info->representations->nelts)
410    {
411      /* return the representation, if this is the one we were looking for */
412      rep_stats_t *result
413        = APR_ARRAY_IDX(info->representations, *idx, rep_stats_t *);
414      if (result->item_index == item_index)
415        return result;
416    }
417
418  /* not parsed, yet */
419  return NULL;
420}
421
422/* Find / auto-construct the representation stats for REP in QUERY and
423 * return it in *REPRESENTATION.
424 *
425 * If necessary, allocate the result in RESULT_POOL; use SCRATCH_POOL for
426 * temporary allocations.
427 */
428static svn_error_t *
429parse_representation(rep_stats_t **representation,
430                     query_t *query,
431                     representation_t *rep,
432                     revision_info_t *revision_info,
433                     apr_pool_t *result_pool,
434                     apr_pool_t *scratch_pool)
435{
436  rep_stats_t *result;
437  int idx;
438
439  /* read location (revision, offset) and size */
440
441  /* look it up */
442  result = find_representation(&idx, query, &revision_info, rep->revision,
443                               rep->item_index);
444  if (!result)
445    {
446      /* not parsed, yet (probably a rep in the same revision).
447       * Create a new rep object and determine its base rep as well.
448       */
449      result = apr_pcalloc(result_pool, sizeof(*result));
450      result->revision = rep->revision;
451      result->expanded_size = rep->expanded_size;
452      result->item_index = rep->item_index;
453      result->size = rep->size;
454
455      /* In phys. addressing mode, follow link to the actual representation.
456       * In log. addressing mode, we will find it already as part of our
457       * linear walk through the whole file. */
458      if (!svn_fs_fs__use_log_addressing(query->fs))
459        {
460          svn_fs_fs__rep_header_t *header;
461          apr_off_t offset = revision_info->offset
462                           + (apr_off_t)rep->item_index;
463
464          SVN_ERR_ASSERT(revision_info->rev_file);
465          SVN_ERR(svn_io_file_seek(revision_info->rev_file->file, APR_SET,
466                                   &offset, scratch_pool));
467          SVN_ERR(svn_fs_fs__read_rep_header(&header,
468                                             revision_info->rev_file->stream,
469                                             scratch_pool, scratch_pool));
470
471          result->header_size = header->header_size;
472
473          /* Determine length of the delta chain. */
474          if (header->type == svn_fs_fs__rep_delta)
475            {
476              int base_idx;
477              rep_stats_t *base_rep
478                = find_representation(&base_idx, query, NULL,
479                                      header->base_revision,
480                                      header->base_item_index);
481
482              result->chain_length = 1 + MIN(base_rep->chain_length,
483                                             (apr_byte_t)0xfe);
484            }
485          else
486            {
487              result->chain_length = 1;
488            }
489        }
490
491      SVN_ERR(svn_sort__array_insert2(revision_info->representations, &result, idx));
492    }
493
494  *representation = result;
495
496  return SVN_NO_ERROR;
497}
498
499
500/* forward declaration */
501static svn_error_t *
502read_noderev(query_t *query,
503             svn_stringbuf_t *noderev_str,
504             revision_info_t *revision_info,
505             apr_pool_t *result_pool,
506             apr_pool_t *scratch_pool);
507
508/* Read the noderev item at OFFSET in REVISION_INFO from the filesystem
509 * provided by QUERY.  Return it in *NODEREV, allocated in RESULT_POOL.
510 * Use SCRATCH_POOL for temporary allocations.
511 *
512 * The textual representation of the noderev will be used to determine
513 * the on-disk size of the noderev.  Only called in phys. addressing mode.
514 */
515static svn_error_t *
516read_phsy_noderev(svn_stringbuf_t **noderev,
517                  query_t *query,
518                  apr_off_t offset,
519                  revision_info_t *revision_info,
520                  apr_pool_t *result_pool,
521                  apr_pool_t *scratch_pool)
522{
523  svn_stringbuf_t *noderev_str = svn_stringbuf_create_empty(result_pool);
524  svn_stringbuf_t *line;
525  svn_boolean_t eof;
526
527  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
528
529  /* Navigate the file stream to the start of noderev. */
530  SVN_ERR_ASSERT(revision_info->rev_file);
531
532  offset += revision_info->offset;
533  SVN_ERR(svn_io_file_seek(revision_info->rev_file->file, APR_SET,
534                           &offset, scratch_pool));
535
536  /* Read it (terminated by an empty line) */
537  do
538    {
539      svn_pool_clear(iterpool);
540
541      SVN_ERR(svn_stream_readline(revision_info->rev_file->stream, &line,
542                                  "\n", &eof, iterpool));
543      svn_stringbuf_appendstr(noderev_str, line);
544      svn_stringbuf_appendbyte(noderev_str, '\n');
545    }
546  while (line->len > 0 && !eof);
547
548  /* Return the result. */
549  *noderev = noderev_str;
550
551  svn_pool_destroy(iterpool);
552
553  return SVN_NO_ERROR;
554}
555
556/* Starting at the directory in NODEREV's text, read all DAG nodes,
557 * directories and representations linked in that tree structure.
558 * Store them in QUERY and REVISION_INFO.  Also, read them only once.
559 *
560 * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for
561 * temporaries.
562 */
563static svn_error_t *
564parse_dir(query_t *query,
565          node_revision_t *noderev,
566          revision_info_t *revision_info,
567          apr_pool_t *result_pool,
568          apr_pool_t *scratch_pool)
569{
570  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
571
572  int i;
573  apr_array_header_t *entries;
574  SVN_ERR(svn_fs_fs__rep_contents_dir(&entries, query->fs, noderev,
575                                      scratch_pool, scratch_pool));
576
577  for (i = 0; i < entries->nelts; ++i)
578    {
579      svn_fs_dirent_t *dirent = APR_ARRAY_IDX(entries, i, svn_fs_dirent_t *);
580
581      if (svn_fs_fs__id_rev(dirent->id) == revision_info->revision)
582        {
583          svn_stringbuf_t *noderev_str;
584          svn_pool_clear(iterpool);
585
586          SVN_ERR(read_phsy_noderev(&noderev_str, query,
587                                    svn_fs_fs__id_item(dirent->id),
588                                    revision_info, iterpool, iterpool));
589          SVN_ERR(read_noderev(query, noderev_str, revision_info,
590                               result_pool, iterpool));
591        }
592    }
593
594  svn_pool_destroy(iterpool);
595
596  return SVN_NO_ERROR;
597}
598
599/* Parse the noderev given as NODEREV_STR and store the info in QUERY and
600 * REVISION_INFO.  In phys. addressing mode, continue reading all DAG nodes,
601 * directories and representations linked in that tree structure.
602 *
603 * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for
604 * temporaries.
605 */
606static svn_error_t *
607read_noderev(query_t *query,
608             svn_stringbuf_t *noderev_str,
609             revision_info_t *revision_info,
610             apr_pool_t *result_pool,
611             apr_pool_t *scratch_pool)
612{
613  rep_stats_t *text = NULL;
614  rep_stats_t *props = NULL;
615  node_revision_t *noderev;
616
617  svn_stream_t *stream = svn_stream_from_stringbuf(noderev_str, scratch_pool);
618  SVN_ERR(svn_fs_fs__read_noderev(&noderev, stream, scratch_pool,
619                                  scratch_pool));
620  SVN_ERR(svn_fs_fs__fixup_expanded_size(query->fs, noderev->data_rep,
621                                         scratch_pool));
622  SVN_ERR(svn_fs_fs__fixup_expanded_size(query->fs, noderev->prop_rep,
623                                         scratch_pool));
624
625  if (noderev->data_rep)
626    {
627      SVN_ERR(parse_representation(&text, query,
628                                   noderev->data_rep, revision_info,
629                                   result_pool, scratch_pool));
630
631      /* if we are the first to use this rep, mark it as "text rep" */
632      if (++text->ref_count == 1)
633        text->kind = noderev->kind == svn_node_dir ? dir_rep : file_rep;
634    }
635
636  if (noderev->prop_rep)
637    {
638      SVN_ERR(parse_representation(&props, query,
639                                   noderev->prop_rep, revision_info,
640                                   result_pool, scratch_pool));
641
642      /* if we are the first to use this rep, mark it as "prop rep" */
643      if (++props->ref_count == 1)
644        props->kind = noderev->kind == svn_node_dir ? dir_property_rep
645                                                    : file_property_rep;
646    }
647
648  /* record largest changes */
649  if (text && text->ref_count == 1)
650    add_change(query->stats, text->size, text->expanded_size, text->revision,
651               noderev->created_path, text->kind, !noderev->predecessor_id);
652  if (props && props->ref_count == 1)
653    add_change(query->stats, props->size, props->expanded_size,
654               props->revision, noderev->created_path, props->kind,
655               !noderev->predecessor_id);
656
657  /* if this is a directory and has not been processed, yet, read and
658   * process it recursively */
659  if (   noderev->kind == svn_node_dir && text && text->ref_count == 1
660      && !svn_fs_fs__use_log_addressing(query->fs))
661    SVN_ERR(parse_dir(query, noderev, revision_info, result_pool,
662                      scratch_pool));
663
664  /* update stats */
665  if (noderev->kind == svn_node_dir)
666    {
667      revision_info->dir_noderev_size += noderev_str->len;
668      revision_info->dir_noderev_count++;
669    }
670  else
671    {
672      revision_info->file_noderev_size += noderev_str->len;
673      revision_info->file_noderev_count++;
674    }
675
676  return SVN_NO_ERROR;
677}
678
679/* For the revision given as REVISION_INFO within QUERY, determine the number
680 * of entries in its changed paths list and store that info in REVISION_INFO.
681 * Use SCRATCH_POOL for temporary allocations.
682 */
683static svn_error_t *
684get_phys_change_count(query_t *query,
685                      revision_info_t *revision_info,
686                      apr_pool_t *scratch_pool)
687{
688  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
689  svn_fs_fs__changes_context_t *context;
690
691  /* Fetch the first block of data. */
692  SVN_ERR(svn_fs_fs__create_changes_context(&context, query->fs,
693                                            revision_info->revision,
694                                            scratch_pool));
695
696  revision_info->change_count = 0;
697  while (!context->eol)
698    {
699      apr_array_header_t *changes;
700
701      svn_pool_clear(iterpool);
702      SVN_ERR(svn_fs_fs__get_changes(&changes, context, iterpool, iterpool));
703      revision_info->change_count = changes->nelts;
704    }
705
706  svn_pool_destroy(iterpool);
707
708  return SVN_NO_ERROR;
709}
710
711/* Read header information for the revision stored in FILE_CONTENT (one
712 * whole revision).  Return the offsets within FILE_CONTENT for the
713 * *ROOT_NODEREV, the list of *CHANGES and its len in *CHANGES_LEN.
714 * Use POOL for temporary allocations. */
715static svn_error_t *
716read_phys_revision(query_t *query,
717                   revision_info_t *info,
718                   apr_pool_t *result_pool,
719                   apr_pool_t *scratch_pool)
720{
721  char buf[64];
722  apr_off_t root_node_offset;
723  apr_off_t changes_offset;
724  svn_stringbuf_t *trailer;
725  svn_stringbuf_t *noderev_str;
726
727  /* Read the last 64 bytes of the revision (if long enough). */
728  apr_off_t start = MAX(info->offset, info->end - sizeof(buf));
729  apr_size_t len = (apr_size_t)(info->end - start);
730  SVN_ERR(svn_io_file_seek(info->rev_file->file, APR_SET, &start,
731                           scratch_pool));
732  SVN_ERR(svn_io_file_read_full2(info->rev_file->file, buf, len, NULL, NULL,
733                                 scratch_pool));
734  trailer = svn_stringbuf_ncreate(buf, len, scratch_pool);
735
736  /* Parse that trailer. */
737  SVN_ERR(svn_fs_fs__parse_revision_trailer(&root_node_offset,
738                                            &changes_offset, trailer,
739                                            info->revision));
740  SVN_ERR(get_phys_change_count(query, info, scratch_pool));
741
742  /* Calculate the length of the changes list. */
743  trailer = svn_fs_fs__unparse_revision_trailer(root_node_offset,
744                                                changes_offset,
745                                                scratch_pool);
746  info->changes_len = info->end - info->offset - changes_offset
747                    - trailer->len;
748
749  /* Recursively read nodes added in this rev. */
750  SVN_ERR(read_phsy_noderev(&noderev_str, query, root_node_offset, info,
751                            scratch_pool, scratch_pool));
752  SVN_ERR(read_noderev(query, noderev_str, info, result_pool, scratch_pool));
753
754  return SVN_NO_ERROR;
755}
756
757/* Read the content of the pack file staring at revision BASE physical
758 * addressing mode and store it in QUERY.
759 *
760 * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for
761 * temporaries.
762 */
763static svn_error_t *
764read_phys_pack_file(query_t *query,
765                    svn_revnum_t base,
766                    apr_pool_t *result_pool,
767                    apr_pool_t *scratch_pool)
768{
769  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
770  int i;
771  svn_filesize_t file_size = 0;
772  svn_fs_fs__revision_file_t *rev_file;
773
774  SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, query->fs, base,
775                                           scratch_pool, scratch_pool));
776  SVN_ERR(svn_io_file_size_get(&file_size, rev_file->file, scratch_pool));
777
778  /* process each revision in the pack file */
779  for (i = 0; i < query->shard_size; ++i)
780    {
781      revision_info_t *info;
782
783      /* cancellation support */
784      if (query->cancel_func)
785        SVN_ERR(query->cancel_func(query->cancel_baton));
786
787      /* create the revision info for the current rev */
788      info = apr_pcalloc(result_pool, sizeof(*info));
789      info->representations = apr_array_make(result_pool, 4,
790                                             sizeof(rep_stats_t*));
791      info->rev_file = rev_file;
792
793      info->revision = base + i;
794      SVN_ERR(svn_fs_fs__get_packed_offset(&info->offset, query->fs, base + i,
795                                           iterpool));
796      if (i + 1 == query->shard_size)
797        info->end = file_size;
798      else
799        SVN_ERR(svn_fs_fs__get_packed_offset(&info->end, query->fs,
800                                             base + i + 1, iterpool));
801
802      SVN_ERR(read_phys_revision(query, info, result_pool, iterpool));
803
804      info->representations = apr_array_copy(result_pool,
805                                             info->representations);
806
807      /* Done with this revision. */
808      info->rev_file = NULL;
809
810      /* put it into our container */
811      APR_ARRAY_PUSH(query->revisions, revision_info_t*) = info;
812
813      /* destroy temps */
814      svn_pool_clear(iterpool);
815    }
816
817  /* Done with this pack file. */
818  SVN_ERR(svn_fs_fs__close_revision_file(rev_file));
819
820  /* one more pack file processed */
821  if (query->progress_func)
822    query->progress_func(base, query->progress_baton, scratch_pool);
823
824  return SVN_NO_ERROR;
825}
826
827/* Read the content of the file for REVISION in physical addressing mode
828 * and store its contents in QUERY.
829 *
830 * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for
831 * temporaries.
832 */
833static svn_error_t *
834read_phys_revision_file(query_t *query,
835                        svn_revnum_t revision,
836                        apr_pool_t *result_pool,
837                        apr_pool_t *scratch_pool)
838{
839  revision_info_t *info = apr_pcalloc(result_pool, sizeof(*info));
840  svn_filesize_t file_size = 0;
841  svn_fs_fs__revision_file_t *rev_file;
842
843  /* cancellation support */
844  if (query->cancel_func)
845    SVN_ERR(query->cancel_func(query->cancel_baton));
846
847  /* read the whole pack file into memory */
848  SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, query->fs, revision,
849                                           scratch_pool, scratch_pool));
850  SVN_ERR(svn_io_file_size_get(&file_size, rev_file->file, scratch_pool));
851
852  /* create the revision info for the current rev */
853  info->representations = apr_array_make(result_pool, 4, sizeof(rep_stats_t*));
854
855  info->rev_file = rev_file;
856  info->revision = revision;
857  info->offset = 0;
858  info->end = file_size;
859
860  SVN_ERR(read_phys_revision(query, info, result_pool, scratch_pool));
861
862  /* Done with this revision. */
863  SVN_ERR(svn_fs_fs__close_revision_file(rev_file));
864  info->rev_file = NULL;
865
866  /* put it into our container */
867  APR_ARRAY_PUSH(query->revisions, revision_info_t*) = info;
868
869  /* show progress every 1000 revs or so */
870  if (query->progress_func)
871    {
872      if (query->shard_size && (revision % query->shard_size == 0))
873        query->progress_func(revision, query->progress_baton, scratch_pool);
874      if (!query->shard_size && (revision % 1000 == 0))
875        query->progress_func(revision, query->progress_baton, scratch_pool);
876    }
877
878  return SVN_NO_ERROR;
879}
880
881/* Given the unparsed changes list in CHANGES with LEN chars, return the
882 * number of changed paths encoded in it.  Only used in log. addressing
883 * mode.
884 */
885static apr_uint64_t
886get_log_change_count(const char *changes,
887                     apr_size_t len)
888{
889  apr_size_t lines = 0;
890  const char *end = changes + len;
891
892  /* line count */
893  for (; changes < end; ++changes)
894    if (*changes == '\n')
895      ++lines;
896
897  /* two lines per change */
898  return lines / 2;
899}
900
901/* Read the item described by ENTRY from the REV_FILE and return the
902 * respective byte sequence in *CONTENTS, allocated in RESULT_POOL.
903 * Use SCRATCH_POOL for temporary allocations
904 */
905static svn_error_t *
906read_item(svn_stringbuf_t **contents,
907          svn_fs_fs__revision_file_t *rev_file,
908          svn_fs_fs__p2l_entry_t *entry,
909          apr_pool_t *result_pool,
910          apr_pool_t *scratch_pool)
911{
912  svn_stringbuf_t *item = svn_stringbuf_create_ensure(entry->size,
913                                                      result_pool);
914  item->len = entry->size;
915  item->data[item->len] = 0;
916
917  SVN_ERR(svn_io_file_aligned_seek(rev_file->file, rev_file->block_size,
918                                   NULL, entry->offset, scratch_pool));
919  SVN_ERR(svn_io_file_read_full2(rev_file->file, item->data, item->len,
920                                 NULL, NULL, scratch_pool));
921
922  *contents = item;
923
924  return SVN_NO_ERROR;
925}
926
927/* Predicate comparing the two rep_ref_t** LHS and RHS by the respective
928 * representation's revision.
929 */
930static int
931compare_representation_refs(const void *lhs, const void *rhs)
932{
933  svn_revnum_t lhs_rev = (*(const rep_ref_t *const *)lhs)->revision;
934  svn_revnum_t rhs_rev = (*(const rep_ref_t *const *)rhs)->revision;
935
936  if (lhs_rev < rhs_rev)
937    return -1;
938  return (lhs_rev > rhs_rev ? 1 : 0);
939}
940
941/* Given all the presentations found in a single rev / pack file as
942 * rep_ref_t * in REP_REFS, update the delta chain lengths in QUERY.
943 * REP_REFS and its contents can then be discarded.
944 */
945static svn_error_t *
946resolve_representation_refs(query_t *query,
947                            apr_array_header_t *rep_refs)
948{
949  int i;
950
951  /* Because delta chains can only point to previous revs, after sorting
952   * REP_REFS, all base refs have already been updated. */
953  svn_sort__array(rep_refs, compare_representation_refs);
954
955  /* Build up the CHAIN_LENGTH values. */
956  for (i = 0; i < rep_refs->nelts; ++i)
957    {
958      int idx;
959      rep_ref_t *ref = APR_ARRAY_IDX(rep_refs, i, rep_ref_t *);
960      rep_stats_t *rep = find_representation(&idx, query, NULL,
961                                             ref->revision, ref->item_index);
962
963      /* No dangling pointers and all base reps have been processed. */
964      SVN_ERR_ASSERT(rep);
965      SVN_ERR_ASSERT(!rep->chain_length);
966
967      /* Set the HEADER_SIZE as we found it during the scan. */
968      rep->header_size = ref->header_size;
969
970      /* The delta chain got 1 element longer. */
971      if (ref->base_revision == SVN_INVALID_REVNUM)
972        {
973          rep->chain_length = 1;
974        }
975      else
976        {
977          rep_stats_t *base;
978
979          base = find_representation(&idx, query, NULL, ref->base_revision,
980                                     ref->base_item_index);
981          SVN_ERR_ASSERT(base);
982          SVN_ERR_ASSERT(base->chain_length);
983
984          rep->chain_length = 1 + MIN(base->chain_length, (apr_byte_t)0xfe);
985        }
986    }
987
988  return SVN_NO_ERROR;
989}
990
991/* Process the logically addressed revision contents of revisions BASE to
992 * BASE + COUNT - 1 in QUERY.
993 *
994 * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for
995 * temporaries.
996 */
997static svn_error_t *
998read_log_rev_or_packfile(query_t *query,
999                         svn_revnum_t base,
1000                         int count,
1001                         apr_pool_t *result_pool,
1002                         apr_pool_t *scratch_pool)
1003{
1004  fs_fs_data_t *ffd = query->fs->fsap_data;
1005  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1006  apr_off_t max_offset;
1007  apr_off_t offset = 0;
1008  int i;
1009  svn_fs_fs__revision_file_t *rev_file;
1010
1011  /* We collect the delta chain links as we scan the file.  Afterwards,
1012   * we determine the lengths of those delta chains and throw this
1013   * temporary container away. */
1014  apr_array_header_t *rep_refs = apr_array_make(scratch_pool, 64,
1015                                                sizeof(rep_ref_t *));
1016
1017  /* we will process every revision in the rev / pack file */
1018  for (i = 0; i < count; ++i)
1019    {
1020      /* create the revision info for the current rev */
1021      revision_info_t *info = apr_pcalloc(result_pool, sizeof(*info));
1022      info->representations = apr_array_make(result_pool, 4,
1023                                             sizeof(rep_stats_t*));
1024      info->revision = base + i;
1025
1026      APR_ARRAY_PUSH(query->revisions, revision_info_t*) = info;
1027    }
1028
1029  /* open the pack / rev file that is covered by the p2l index */
1030  SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, query->fs, base,
1031                                           scratch_pool, iterpool));
1032  SVN_ERR(svn_fs_fs__p2l_get_max_offset(&max_offset, query->fs, rev_file,
1033                                        base, scratch_pool));
1034
1035  /* record the whole pack size in the first rev so the total sum will
1036     still be correct */
1037  APR_ARRAY_IDX(query->revisions, base, revision_info_t*)->end = max_offset;
1038
1039  /* for all offsets in the file, get the P2L index entries and process
1040     the interesting items (change lists, noderevs) */
1041  for (offset = 0; offset < max_offset; )
1042    {
1043      apr_array_header_t *entries;
1044
1045      svn_pool_clear(iterpool);
1046
1047      /* cancellation support */
1048      if (query->cancel_func)
1049        SVN_ERR(query->cancel_func(query->cancel_baton));
1050
1051      /* get all entries for the current block */
1052      SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, query->fs, rev_file, base,
1053                                          offset, ffd->p2l_page_size,
1054                                          iterpool, iterpool));
1055
1056      /* process all entries (and later continue with the next block) */
1057      for (i = 0; i < entries->nelts; ++i)
1058        {
1059          svn_stringbuf_t *item;
1060          revision_info_t *info;
1061          svn_fs_fs__p2l_entry_t *entry
1062            = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t);
1063
1064          /* skip bits we previously processed */
1065          if (i == 0 && entry->offset < offset)
1066            continue;
1067
1068          /* skip zero-sized entries */
1069          if (entry->size == 0)
1070            continue;
1071
1072          /* read and process interesting items */
1073          info = APR_ARRAY_IDX(query->revisions, entry->item.revision,
1074                               revision_info_t*);
1075
1076          if (entry->type == SVN_FS_FS__ITEM_TYPE_NODEREV)
1077            {
1078              SVN_ERR(read_item(&item, rev_file, entry, iterpool, iterpool));
1079              SVN_ERR(read_noderev(query, item, info, result_pool, iterpool));
1080            }
1081          else if (entry->type == SVN_FS_FS__ITEM_TYPE_CHANGES)
1082            {
1083              SVN_ERR(read_item(&item, rev_file, entry, iterpool, iterpool));
1084              info->change_count
1085                = get_log_change_count(item->data + 0, item->len);
1086              info->changes_len += entry->size;
1087            }
1088          else if (   (entry->type == SVN_FS_FS__ITEM_TYPE_FILE_REP)
1089                   || (entry->type == SVN_FS_FS__ITEM_TYPE_DIR_REP)
1090                   || (entry->type == SVN_FS_FS__ITEM_TYPE_FILE_PROPS)
1091                   || (entry->type == SVN_FS_FS__ITEM_TYPE_DIR_PROPS))
1092            {
1093              /* Collect the delta chain link. */
1094              svn_fs_fs__rep_header_t *header;
1095              rep_ref_t *ref = apr_pcalloc(scratch_pool, sizeof(*ref));
1096
1097              SVN_ERR(svn_io_file_aligned_seek(rev_file->file,
1098                                               rev_file->block_size,
1099                                               NULL, entry->offset,
1100                                               iterpool));
1101              SVN_ERR(svn_fs_fs__read_rep_header(&header,
1102                                                 rev_file->stream,
1103                                                 iterpool, iterpool));
1104
1105              ref->header_size = header->header_size;
1106              ref->revision = entry->item.revision;
1107              ref->item_index = entry->item.number;
1108
1109              if (header->type == svn_fs_fs__rep_delta)
1110                {
1111                  ref->base_item_index = header->base_item_index;
1112                  ref->base_revision = header->base_revision;
1113                }
1114              else
1115                {
1116                  ref->base_item_index = SVN_FS_FS__ITEM_INDEX_UNUSED;
1117                  ref->base_revision = SVN_INVALID_REVNUM;
1118                }
1119
1120              APR_ARRAY_PUSH(rep_refs, rep_ref_t *) = ref;
1121            }
1122
1123          /* advance offset */
1124          offset += entry->size;
1125        }
1126    }
1127
1128  /* Resolve the delta chain links. */
1129  SVN_ERR(resolve_representation_refs(query, rep_refs));
1130
1131  /* clean up and close file handles */
1132  svn_pool_destroy(iterpool);
1133
1134  return SVN_NO_ERROR;
1135}
1136
1137/* Read the content of the pack file staring at revision BASE logical
1138 * addressing mode and store it in QUERY.
1139 *
1140 * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for
1141 * temporaries.
1142 */
1143static svn_error_t *
1144read_log_pack_file(query_t *query,
1145                   svn_revnum_t base,
1146                   apr_pool_t *result_pool,
1147                   apr_pool_t *scratch_pool)
1148{
1149  SVN_ERR(read_log_rev_or_packfile(query, base, query->shard_size,
1150                                   result_pool, scratch_pool));
1151
1152  /* one more pack file processed */
1153  if (query->progress_func)
1154    query->progress_func(base, query->progress_baton, scratch_pool);
1155
1156  return SVN_NO_ERROR;
1157}
1158
1159/* Read the content of the file for REVISION in logical addressing mode
1160 * and store its contents in QUERY.
1161 *
1162 * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for
1163 * temporaries.
1164 */
1165static svn_error_t *
1166read_log_revision_file(query_t *query,
1167                       svn_revnum_t revision,
1168                       apr_pool_t *result_pool,
1169                       apr_pool_t *scratch_pool)
1170{
1171  SVN_ERR(read_log_rev_or_packfile(query, revision, 1,
1172                                   result_pool, scratch_pool));
1173
1174  /* show progress every 1000 revs or so */
1175  if (query->progress_func)
1176    {
1177      if (query->shard_size && (revision % query->shard_size == 0))
1178        query->progress_func(revision, query->progress_baton, scratch_pool);
1179      if (!query->shard_size && (revision % 1000 == 0))
1180        query->progress_func(revision, query->progress_baton, scratch_pool);
1181    }
1182
1183  return SVN_NO_ERROR;
1184}
1185
1186/* Read the repository and collect the stats info in QUERY.
1187 *
1188 * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for
1189 * temporaries.
1190 */
1191static svn_error_t *
1192read_revisions(query_t *query,
1193               apr_pool_t *result_pool,
1194               apr_pool_t *scratch_pool)
1195{
1196  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1197  svn_revnum_t revision;
1198
1199  /* read all packed revs */
1200  for ( revision = 0
1201      ; revision < query->min_unpacked_rev
1202      ; revision += query->shard_size)
1203    {
1204      svn_pool_clear(iterpool);
1205
1206      if (svn_fs_fs__use_log_addressing(query->fs))
1207        SVN_ERR(read_log_pack_file(query, revision, result_pool, iterpool));
1208      else
1209        SVN_ERR(read_phys_pack_file(query, revision, result_pool, iterpool));
1210    }
1211
1212  /* read non-packed revs */
1213  for ( ; revision <= query->head; ++revision)
1214    {
1215      svn_pool_clear(iterpool);
1216
1217      if (svn_fs_fs__use_log_addressing(query->fs))
1218        SVN_ERR(read_log_revision_file(query, revision, result_pool,
1219                                       iterpool));
1220      else
1221        SVN_ERR(read_phys_revision_file(query, revision, result_pool,
1222                                        iterpool));
1223    }
1224
1225  svn_pool_destroy(iterpool);
1226
1227  return SVN_NO_ERROR;
1228}
1229
1230/* Accumulate stats of REP in STATS.
1231 */
1232static void
1233add_rep_pack_stats(svn_fs_fs__rep_pack_stats_t *stats,
1234                   rep_stats_t *rep)
1235{
1236  stats->count++;
1237
1238  stats->packed_size += rep->size;
1239  stats->expanded_size += rep->expanded_size;
1240  stats->overhead_size += rep->header_size + 7 /* ENDREP\n */;
1241}
1242
1243/* Accumulate stats of REP in STATS.
1244 */
1245static void
1246add_rep_stats(svn_fs_fs__representation_stats_t *stats,
1247              rep_stats_t *rep)
1248{
1249  add_rep_pack_stats(&stats->total, rep);
1250  if (rep->ref_count == 1)
1251    add_rep_pack_stats(&stats->uniques, rep);
1252  else
1253    add_rep_pack_stats(&stats->shared, rep);
1254
1255  stats->references += rep->ref_count;
1256  stats->expanded_size += rep->ref_count * rep->expanded_size;
1257  stats->chain_len += rep->chain_length;
1258}
1259
1260/* Aggregate the info the in revision_info_t * array REVISIONS into the
1261 * respectve fields of STATS.
1262 */
1263static void
1264aggregate_stats(const apr_array_header_t *revisions,
1265                svn_fs_fs__stats_t *stats)
1266{
1267  int i, k;
1268
1269  /* aggregate info from all revisions */
1270  stats->revision_count = revisions->nelts;
1271  for (i = 0; i < revisions->nelts; ++i)
1272    {
1273      revision_info_t *revision = APR_ARRAY_IDX(revisions, i,
1274                                                revision_info_t *);
1275
1276      /* data gathered on a revision level */
1277      stats->change_count += revision->change_count;
1278      stats->change_len += revision->changes_len;
1279      stats->total_size += revision->end - revision->offset;
1280
1281      stats->dir_node_stats.count += revision->dir_noderev_count;
1282      stats->dir_node_stats.size += revision->dir_noderev_size;
1283      stats->file_node_stats.count += revision->file_noderev_count;
1284      stats->file_node_stats.size += revision->file_noderev_size;
1285      stats->total_node_stats.count += revision->dir_noderev_count
1286                                    + revision->file_noderev_count;
1287      stats->total_node_stats.size += revision->dir_noderev_size
1288                                   + revision->file_noderev_size;
1289
1290      /* process representations */
1291      for (k = 0; k < revision->representations->nelts; ++k)
1292        {
1293          rep_stats_t *rep = APR_ARRAY_IDX(revision->representations, k,
1294                                           rep_stats_t *);
1295
1296          /* accumulate in the right bucket */
1297          switch(rep->kind)
1298            {
1299              case file_rep:
1300                add_rep_stats(&stats->file_rep_stats, rep);
1301                break;
1302              case dir_rep:
1303                add_rep_stats(&stats->dir_rep_stats, rep);
1304                break;
1305              case file_property_rep:
1306                add_rep_stats(&stats->file_prop_rep_stats, rep);
1307                break;
1308              case dir_property_rep:
1309                add_rep_stats(&stats->dir_prop_rep_stats, rep);
1310                break;
1311              default:
1312                break;
1313            }
1314
1315          add_rep_stats(&stats->total_rep_stats, rep);
1316        }
1317    }
1318}
1319
1320/* Return a new svn_fs_fs__stats_t instance, allocated in RESULT_POOL.
1321 */
1322static svn_fs_fs__stats_t *
1323create_stats(apr_pool_t *result_pool)
1324{
1325  svn_fs_fs__stats_t *stats = apr_pcalloc(result_pool, sizeof(*stats));
1326
1327  initialize_largest_changes(stats, 64, result_pool);
1328  stats->by_extension = apr_hash_make(result_pool);
1329
1330  return stats;
1331}
1332
1333/* Create a *QUERY, allocated in RESULT_POOL, reading filesystem FS and
1334 * collecting results in STATS.  Store the optional PROCESS_FUNC and
1335 * PROGRESS_BATON as well as CANCEL_FUNC and CANCEL_BATON in *QUERY, too.
1336 * Use SCRATCH_POOL for temporary allocations.
1337 */
1338static svn_error_t *
1339create_query(query_t **query,
1340             svn_fs_t *fs,
1341             svn_fs_fs__stats_t *stats,
1342             svn_fs_progress_notify_func_t progress_func,
1343             void *progress_baton,
1344             svn_cancel_func_t cancel_func,
1345             void *cancel_baton,
1346             apr_pool_t *result_pool,
1347             apr_pool_t *scratch_pool)
1348{
1349  *query = apr_pcalloc(result_pool, sizeof(**query));
1350
1351  /* Read repository dimensions. */
1352  (*query)->shard_size = svn_fs_fs__shard_size(fs);
1353  SVN_ERR(svn_fs_fs__youngest_rev(&(*query)->head, fs, scratch_pool));
1354  SVN_ERR(svn_fs_fs__min_unpacked_rev(&(*query)->min_unpacked_rev, fs,
1355                                      scratch_pool));
1356
1357  /* create data containers and caches
1358   * Note: this assumes that int is at least 32-bits and that we only support
1359   * 32-bit wide revision numbers (actually 31-bits due to the signedness
1360   * of both the nelts field of the array and our revision numbers). This
1361   * means this code will fail on platforms where int is less than 32-bits
1362   * and the repository has more revisions than int can hold. */
1363  (*query)->revisions = apr_array_make(result_pool, (int) (*query)->head + 1,
1364                                       sizeof(revision_info_t *));
1365  (*query)->null_base = apr_pcalloc(result_pool,
1366                                    sizeof(*(*query)->null_base));
1367
1368  /* Store other parameters */
1369  (*query)->fs = fs;
1370  (*query)->stats = stats;
1371  (*query)->progress_func = progress_func;
1372  (*query)->progress_baton = progress_baton;
1373  (*query)->cancel_func = cancel_func;
1374  (*query)->cancel_baton = cancel_baton;
1375
1376  return SVN_NO_ERROR;
1377}
1378
1379svn_error_t *
1380svn_fs_fs__get_stats(svn_fs_fs__stats_t **stats,
1381                     svn_fs_t *fs,
1382                     svn_fs_progress_notify_func_t progress_func,
1383                     void *progress_baton,
1384                     svn_cancel_func_t cancel_func,
1385                     void *cancel_baton,
1386                     apr_pool_t *result_pool,
1387                     apr_pool_t *scratch_pool)
1388{
1389  query_t *query;
1390
1391  *stats = create_stats(result_pool);
1392  SVN_ERR(create_query(&query, fs, *stats, progress_func, progress_baton,
1393                       cancel_func, cancel_baton, scratch_pool,
1394                       scratch_pool));
1395  SVN_ERR(read_revisions(query, scratch_pool, scratch_pool));
1396  aggregate_stats(query->revisions, *stats);
1397
1398  return SVN_NO_ERROR;
1399}
1400
1401/* Baton for rev_size_index_entry_cb. */
1402struct rev_size_baton_t {
1403  svn_revnum_t revision;
1404  apr_off_t rev_size;
1405};
1406
1407/* Implements svn_fs_fs__dump_index_func_t, summing object sizes for
1408 * revision BATON->revision into BATON->rev_size.
1409 */
1410static svn_error_t *
1411rev_size_index_entry_cb(const svn_fs_fs__p2l_entry_t *entry,
1412                        void *baton,
1413                        apr_pool_t *scratch_pool)
1414{
1415  struct rev_size_baton_t *b = baton;
1416
1417  if (entry->item.revision == b->revision)
1418    b->rev_size += entry->size;
1419  return SVN_NO_ERROR;
1420}
1421
1422svn_error_t *
1423svn_fs_fs__revision_size(apr_off_t *rev_size,
1424                         svn_fs_t *fs,
1425                         svn_revnum_t revision,
1426                         apr_pool_t *scratch_pool)
1427{
1428  /* Get the size of the revision (excluding rev-props) */
1429  if (svn_fs_fs__use_log_addressing(fs))
1430    {
1431      /* This works for a packed or a non-packed revision.
1432         We could provide an optimized case for a non-packed revision
1433         using svn_fs_fs__p2l_get_max_offset(). */
1434      struct rev_size_baton_t b = { 0, 0 };
1435
1436      b.revision = revision;
1437      SVN_ERR(svn_fs_fs__dump_index(fs, revision,
1438                                    rev_size_index_entry_cb, &b,
1439                                    NULL, NULL, scratch_pool));
1440      *rev_size = b.rev_size;
1441    }
1442  else
1443    {
1444      svn_fs_fs__revision_file_t *rev_file;
1445      svn_revnum_t min_unpacked_rev;
1446
1447      SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, revision,
1448                                               scratch_pool, scratch_pool));
1449      SVN_ERR(svn_fs_fs__min_unpacked_rev(&min_unpacked_rev, fs,
1450                                          scratch_pool));
1451      if (revision < min_unpacked_rev)
1452        {
1453          int shard_size = svn_fs_fs__shard_size(fs);
1454          apr_off_t start_offset, end_offset;
1455
1456          SVN_ERR(svn_fs_fs__get_packed_offset(&start_offset, fs, revision,
1457                                               scratch_pool));
1458          if (((revision + 1) % shard_size) == 0)
1459            {
1460              svn_filesize_t file_size;
1461
1462              SVN_ERR(svn_io_file_size_get(&file_size, rev_file->file, scratch_pool));
1463              end_offset = (apr_off_t)file_size;
1464            }
1465          else
1466            {
1467              SVN_ERR(svn_fs_fs__get_packed_offset(&end_offset, fs,
1468                                                   revision + 1, scratch_pool));
1469            }
1470          *rev_size = (end_offset - start_offset);
1471        }
1472      else
1473        {
1474          svn_filesize_t file_size;
1475
1476          SVN_ERR(svn_io_file_size_get(&file_size, rev_file->file, scratch_pool));
1477          *rev_size = (apr_off_t)file_size;
1478        }
1479
1480      SVN_ERR(svn_fs_fs__close_revision_file(rev_file));
1481    }
1482
1483  /* Add the size of the rev-props */
1484  {
1485    apr_off_t size;
1486
1487    SVN_ERR(svn_fs_fs__get_revision_props_size(&size, fs, revision, scratch_pool));
1488    *rev_size += size;
1489  }
1490
1491  return SVN_NO_ERROR;
1492}
1493