1289177Speter/* stats.c -- implements the svn_fs_fs__get_stats private API.
2289177Speter *
3289177Speter * ====================================================================
4289177Speter *    Licensed to the Apache Software Foundation (ASF) under one
5289177Speter *    or more contributor license agreements.  See the NOTICE file
6289177Speter *    distributed with this work for additional information
7289177Speter *    regarding copyright ownership.  The ASF licenses this file
8289177Speter *    to you under the Apache License, Version 2.0 (the
9289177Speter *    "License"); you may not use this file except in compliance
10289177Speter *    with the License.  You may obtain a copy of the License at
11289177Speter *
12289177Speter *      http://www.apache.org/licenses/LICENSE-2.0
13289177Speter *
14289177Speter *    Unless required by applicable law or agreed to in writing,
15289177Speter *    software distributed under the License is distributed on an
16289177Speter *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17289177Speter *    KIND, either express or implied.  See the License for the
18289177Speter *    specific language governing permissions and limitations
19289177Speter *    under the License.
20289177Speter * ====================================================================
21289177Speter */
22289177Speter
23289177Speter#include "svn_dirent_uri.h"
24289177Speter#include "svn_fs.h"
25289177Speter#include "svn_pools.h"
26289177Speter#include "svn_sorts.h"
27289177Speter
28289177Speter#include "private/svn_cache.h"
29289177Speter#include "private/svn_sorts_private.h"
30289177Speter#include "private/svn_string_private.h"
31289177Speter#include "private/svn_fs_fs_private.h"
32289177Speter
33289177Speter#include "index.h"
34289177Speter#include "pack.h"
35289177Speter#include "rev_file.h"
36289177Speter#include "util.h"
37289177Speter#include "fs_fs.h"
38289177Speter#include "cached_data.h"
39289177Speter#include "low_level.h"
40289177Speter
41289177Speter#include "../libsvn_fs/fs-loader.h"
42289177Speter
43289177Speter#include "svn_private_config.h"
44289177Speter
45289177Speter/* We group representations into 2x2 different kinds plus one default:
46289177Speter * [dir / file] x [text / prop]. The assignment is done by the first node
47289177Speter * that references the respective representation.
48289177Speter */
49289177Spetertypedef enum rep_kind_t
50289177Speter{
51289177Speter  /* The representation is not used _directly_, i.e. not referenced by any
52289177Speter   * noderev. However, some other representation may use it as delta base.
53289177Speter   * Null value. Should not occur in real-word repositories. */
54289177Speter  unused_rep,
55289177Speter
56289177Speter  /* a properties on directory rep  */
57289177Speter  dir_property_rep,
58289177Speter
59289177Speter  /* a properties on file rep  */
60289177Speter  file_property_rep,
61289177Speter
62289177Speter  /* a directory rep  */
63289177Speter  dir_rep,
64289177Speter
65289177Speter  /* a file rep  */
66289177Speter  file_rep
67289177Speter} rep_kind_t;
68289177Speter
69289177Speter/* A representation fragment.
70289177Speter */
71289177Spetertypedef struct rep_stats_t
72289177Speter{
73289177Speter  /* absolute offset in the file */
74289177Speter  apr_off_t offset;
75289177Speter
76289177Speter  /* item length in bytes */
77289177Speter  apr_uint64_t size;
78289177Speter
79289177Speter  /* item length after de-deltification */
80289177Speter  apr_uint64_t expanded_size;
81289177Speter
82289177Speter  /* revision that contains this representation
83289177Speter   * (may be referenced by other revisions, though) */
84289177Speter  svn_revnum_t revision;
85289177Speter
86289177Speter  /* number of nodes that reference this representation */
87289177Speter  apr_uint32_t ref_count;
88289177Speter
89289177Speter  /* length of the PLAIN / DELTA line in the source file in bytes */
90289177Speter  apr_uint16_t header_size;
91289177Speter
92289177Speter  /* classification of the representation. values of rep_kind_t */
93289177Speter  char kind;
94289177Speter
95289177Speter} rep_stats_t;
96289177Speter
97289177Speter/* Represents a single revision.
98289177Speter * There will be only one instance per revision. */
99289177Spetertypedef struct revision_info_t
100289177Speter{
101289177Speter  /* number of this revision */
102289177Speter  svn_revnum_t revision;
103289177Speter
104289177Speter  /* pack file offset (manifest value), 0 for non-packed files */
105289177Speter  apr_off_t offset;
106289177Speter
107289177Speter  /* length of the changes list on bytes */
108289177Speter  apr_uint64_t changes_len;
109289177Speter
110289177Speter  /* offset of the changes list relative to OFFSET */
111289177Speter  apr_uint64_t change_count;
112289177Speter
113289177Speter  /* first offset behind the revision data in the pack file (file length
114289177Speter   * for non-packed revs) */
115289177Speter  apr_off_t end;
116289177Speter
117289177Speter  /* number of directory noderevs in this revision */
118289177Speter  apr_uint64_t dir_noderev_count;
119289177Speter
120289177Speter  /* number of file noderevs in this revision */
121289177Speter  apr_uint64_t file_noderev_count;
122289177Speter
123289177Speter  /* total size of directory noderevs (i.e. the structs - not the rep) */
124289177Speter  apr_uint64_t dir_noderev_size;
125289177Speter
126289177Speter  /* total size of file noderevs (i.e. the structs - not the rep) */
127289177Speter  apr_uint64_t file_noderev_size;
128289177Speter
129289177Speter  /* all rep_stats_t of this revision (in no particular order),
130289177Speter   * i.e. those that point back to this struct */
131289177Speter  apr_array_header_t *representations;
132289177Speter
133289177Speter  /* Temporary rev / pack file access object, used in phys. addressing
134289177Speter   * mode only.  NULL when done reading this revision. */
135289177Speter  svn_fs_fs__revision_file_t *rev_file;
136289177Speter} revision_info_t;
137289177Speter
138289177Speter/* Root data structure containing all information about a given repository.
139289177Speter * We use it as a wrapper around svn_fs_t and pass it around where we would
140289177Speter * otherwise just use a svn_fs_t.
141289177Speter */
142289177Spetertypedef struct query_t
143289177Speter{
144289177Speter  /* FS API object*/
145289177Speter  svn_fs_t *fs;
146289177Speter
147289177Speter  /* The HEAD revision. */
148289177Speter  svn_revnum_t head;
149289177Speter
150289177Speter  /* Number of revs per shard; 0 for non-sharded repos. */
151289177Speter  int shard_size;
152289177Speter
153289177Speter  /* First non-packed revision. */
154289177Speter  svn_revnum_t min_unpacked_rev;
155289177Speter
156289177Speter  /* all revisions */
157289177Speter  apr_array_header_t *revisions;
158289177Speter
159289177Speter  /* empty representation.
160289177Speter   * Used as a dummy base for DELTA reps without base. */
161289177Speter  rep_stats_t *null_base;
162289177Speter
163289177Speter  /* collected statistics */
164289177Speter  svn_fs_fs__stats_t *stats;
165289177Speter
166289177Speter  /* Progress notification callback to call after each shard.  May be NULL. */
167289177Speter  svn_fs_progress_notify_func_t progress_func;
168289177Speter
169289177Speter  /* Baton for PROGRESS_FUNC. */
170289177Speter  void *progress_baton;
171289177Speter
172289177Speter  /* Cancellation support callback to call once in a while.  May be NULL. */
173289177Speter  svn_cancel_func_t cancel_func;
174289177Speter
175289177Speter  /* Baton for CANCEL_FUNC. */
176289177Speter  void *cancel_baton;
177289177Speter} query_t;
178289177Speter
179289177Speter/* Return the length of REV_FILE in *FILE_SIZE.
180289177Speter * Use SCRATCH_POOL for temporary allocations.
181289177Speter */
182289177Speterstatic svn_error_t *
183289177Speterget_file_size(apr_off_t *file_size,
184289177Speter              svn_fs_fs__revision_file_t *rev_file,
185289177Speter              apr_pool_t *scratch_pool)
186289177Speter{
187289177Speter  apr_finfo_t finfo;
188289177Speter
189289177Speter  SVN_ERR(svn_io_file_info_get(&finfo, APR_FINFO_SIZE, rev_file->file,
190289177Speter                               scratch_pool));
191289177Speter
192289177Speter  *file_size = finfo.size;
193289177Speter  return SVN_NO_ERROR;
194289177Speter}
195289177Speter
196289177Speter/* Initialize the LARGEST_CHANGES member in STATS with a capacity of COUNT
197289177Speter * entries.  Allocate the result in RESULT_POOL.
198289177Speter */
199289177Speterstatic void
200289177Speterinitialize_largest_changes(svn_fs_fs__stats_t *stats,
201289177Speter                           apr_size_t count,
202289177Speter                           apr_pool_t *result_pool)
203289177Speter{
204289177Speter  apr_size_t i;
205289177Speter
206289177Speter  stats->largest_changes = apr_pcalloc(result_pool,
207289177Speter                                       sizeof(*stats->largest_changes));
208289177Speter  stats->largest_changes->count = count;
209289177Speter  stats->largest_changes->min_size = 1;
210289177Speter  stats->largest_changes->changes
211289177Speter    = apr_palloc(result_pool, count * sizeof(*stats->largest_changes->changes));
212289177Speter
213289177Speter  /* allocate *all* entries before the path stringbufs.  This increases
214289177Speter   * cache locality and enhances performance significantly. */
215289177Speter  for (i = 0; i < count; ++i)
216289177Speter    stats->largest_changes->changes[i]
217289177Speter      = apr_palloc(result_pool, sizeof(**stats->largest_changes->changes));
218289177Speter
219289177Speter  /* now initialize them and allocate the stringbufs */
220289177Speter  for (i = 0; i < count; ++i)
221289177Speter    {
222289177Speter      stats->largest_changes->changes[i]->size = 0;
223289177Speter      stats->largest_changes->changes[i]->revision = SVN_INVALID_REVNUM;
224289177Speter      stats->largest_changes->changes[i]->path
225289177Speter        = svn_stringbuf_create_ensure(1024, result_pool);
226289177Speter    }
227289177Speter}
228289177Speter
229289177Speter/* Add entry for SIZE to HISTOGRAM.
230289177Speter */
231289177Speterstatic void
232289177Speteradd_to_histogram(svn_fs_fs__histogram_t *histogram,
233289177Speter                 apr_int64_t size)
234289177Speter{
235289177Speter  apr_int64_t shift = 0;
236289177Speter
237289177Speter  while (((apr_int64_t)(1) << shift) <= size)
238289177Speter    shift++;
239289177Speter
240289177Speter  histogram->total.count++;
241289177Speter  histogram->total.sum += size;
242289177Speter  histogram->lines[(apr_size_t)shift].count++;
243289177Speter  histogram->lines[(apr_size_t)shift].sum += size;
244289177Speter}
245289177Speter
246289177Speter/* Update data aggregators in STATS with this representation of type KIND,
247289177Speter * on-disk REP_SIZE and expanded node size EXPANDED_SIZE for PATH in REVSION.
248289177Speter * PLAIN_ADDED indicates whether the node has a deltification predecessor.
249289177Speter */
250289177Speterstatic void
251289177Speteradd_change(svn_fs_fs__stats_t *stats,
252289177Speter           apr_uint64_t rep_size,
253289177Speter           apr_uint64_t expanded_size,
254289177Speter           svn_revnum_t revision,
255289177Speter           const char *path,
256289177Speter           rep_kind_t kind,
257289177Speter           svn_boolean_t plain_added)
258289177Speter{
259289177Speter  /* identify largest reps */
260289177Speter  if (rep_size >= stats->largest_changes->min_size)
261289177Speter    {
262289177Speter      apr_size_t i;
263289177Speter      svn_fs_fs__largest_changes_t *largest_changes = stats->largest_changes;
264289177Speter      svn_fs_fs__large_change_info_t *info
265289177Speter        = largest_changes->changes[largest_changes->count - 1];
266289177Speter      info->size = rep_size;
267289177Speter      info->revision = revision;
268289177Speter      svn_stringbuf_set(info->path, path);
269289177Speter
270289177Speter      /* linear insertion but not too bad since count is low and insertions
271289177Speter       * near the end are more likely than close to front */
272289177Speter      for (i = largest_changes->count - 1; i > 0; --i)
273289177Speter        if (largest_changes->changes[i-1]->size >= rep_size)
274289177Speter          break;
275289177Speter        else
276289177Speter          largest_changes->changes[i] = largest_changes->changes[i-1];
277289177Speter
278289177Speter      largest_changes->changes[i] = info;
279289177Speter      largest_changes->min_size
280289177Speter        = largest_changes->changes[largest_changes->count-1]->size;
281289177Speter    }
282289177Speter
283289177Speter  /* global histograms */
284289177Speter  add_to_histogram(&stats->rep_size_histogram, rep_size);
285289177Speter  add_to_histogram(&stats->node_size_histogram, expanded_size);
286289177Speter
287289177Speter  if (plain_added)
288289177Speter    {
289289177Speter      add_to_histogram(&stats->added_rep_size_histogram, rep_size);
290289177Speter      add_to_histogram(&stats->added_node_size_histogram, expanded_size);
291289177Speter    }
292289177Speter
293289177Speter  /* specific histograms by type */
294289177Speter  switch (kind)
295289177Speter    {
296289177Speter      case unused_rep:
297289177Speter        add_to_histogram(&stats->unused_rep_histogram, rep_size);
298289177Speter        break;
299289177Speter      case dir_property_rep:
300289177Speter        add_to_histogram(&stats->dir_prop_rep_histogram, rep_size);
301289177Speter        add_to_histogram(&stats->dir_prop_histogram, expanded_size);
302289177Speter        break;
303289177Speter      case file_property_rep:
304289177Speter        add_to_histogram(&stats->file_prop_rep_histogram, rep_size);
305289177Speter        add_to_histogram(&stats->file_prop_histogram, expanded_size);
306289177Speter        break;
307289177Speter      case dir_rep:
308289177Speter        add_to_histogram(&stats->dir_rep_histogram, rep_size);
309289177Speter        add_to_histogram(&stats->dir_histogram, expanded_size);
310289177Speter        break;
311289177Speter      case file_rep:
312289177Speter        add_to_histogram(&stats->file_rep_histogram, rep_size);
313289177Speter        add_to_histogram(&stats->file_histogram, expanded_size);
314289177Speter        break;
315289177Speter    }
316289177Speter
317289177Speter  /* by extension */
318289177Speter  if (kind == file_rep)
319289177Speter    {
320289177Speter      /* determine extension */
321289177Speter      svn_fs_fs__extension_info_t *info;
322289177Speter      const char * file_name = strrchr(path, '/');
323289177Speter      const char * extension = file_name ? strrchr(file_name, '.') : NULL;
324289177Speter
325289177Speter      if (extension == NULL || extension == file_name + 1)
326289177Speter        extension = "(none)";
327289177Speter
328289177Speter      /* get / auto-insert entry for this extension */
329289177Speter      info = apr_hash_get(stats->by_extension, extension, APR_HASH_KEY_STRING);
330289177Speter      if (info == NULL)
331289177Speter        {
332289177Speter          apr_pool_t *pool = apr_hash_pool_get(stats->by_extension);
333289177Speter          info = apr_pcalloc(pool, sizeof(*info));
334289177Speter          info->extension = apr_pstrdup(pool, extension);
335289177Speter
336289177Speter          apr_hash_set(stats->by_extension, info->extension,
337289177Speter                       APR_HASH_KEY_STRING, info);
338289177Speter        }
339289177Speter
340289177Speter      /* update per-extension histogram */
341289177Speter      add_to_histogram(&info->node_histogram, expanded_size);
342289177Speter      add_to_histogram(&info->rep_histogram, rep_size);
343289177Speter    }
344289177Speter}
345289177Speter
346289177Speter/* Comparator used for binary search comparing the absolute file offset
347289177Speter * of a representation to some other offset. DATA is a *rep_stats_t,
348289177Speter * KEY is a pointer to an apr_off_t.
349289177Speter */
350289177Speterstatic int
351289177Spetercompare_representation_offsets(const void *data, const void *key)
352289177Speter{
353289177Speter  apr_off_t lhs = (*(const rep_stats_t *const *)data)->offset;
354289177Speter  apr_off_t rhs = *(const apr_off_t *)key;
355289177Speter
356289177Speter  if (lhs < rhs)
357289177Speter    return -1;
358289177Speter  return (lhs > rhs ? 1 : 0);
359289177Speter}
360289177Speter
361289177Speter/* Find the revision_info_t object to the given REVISION in QUERY and
362289177Speter * return it in *REVISION_INFO. For performance reasons, we skip the
363289177Speter * lookup if the info is already provided.
364289177Speter *
365289177Speter * In that revision, look for the rep_stats_t object for offset OFFSET.
366289177Speter * If it already exists, set *IDX to its index in *REVISION_INFO's
367289177Speter * representations list and return the representation object. Otherwise,
368289177Speter * set the index to where it must be inserted and return NULL.
369289177Speter */
370289177Speterstatic rep_stats_t *
371289177Speterfind_representation(int *idx,
372289177Speter                    query_t *query,
373289177Speter                    revision_info_t **revision_info,
374289177Speter                    svn_revnum_t revision,
375289177Speter                    apr_off_t offset)
376289177Speter{
377289177Speter  revision_info_t *info;
378289177Speter  *idx = -1;
379289177Speter
380289177Speter  /* first let's find the revision */
381289177Speter  info = revision_info ? *revision_info : NULL;
382289177Speter  if (info == NULL || info->revision != revision)
383289177Speter    {
384289177Speter      info = APR_ARRAY_IDX(query->revisions, revision, revision_info_t*);
385289177Speter      if (revision_info)
386289177Speter        *revision_info = info;
387289177Speter    }
388289177Speter
389289177Speter  /* not found -> no result */
390289177Speter  if (info == NULL)
391289177Speter    return NULL;
392289177Speter
393289177Speter  /* look for the representation */
394289177Speter  *idx = svn_sort__bsearch_lower_bound(info->representations,
395289177Speter                                       &offset,
396289177Speter                                       compare_representation_offsets);
397289177Speter  if (*idx < info->representations->nelts)
398289177Speter    {
399289177Speter      /* return the representation, if this is the one we were looking for */
400289177Speter      rep_stats_t *result
401289177Speter        = APR_ARRAY_IDX(info->representations, *idx, rep_stats_t *);
402289177Speter      if (result->offset == offset)
403289177Speter        return result;
404289177Speter    }
405289177Speter
406289177Speter  /* not parsed, yet */
407289177Speter  return NULL;
408289177Speter}
409289177Speter
410289177Speter/* Find / auto-construct the representation stats for REP in QUERY and
411289177Speter * return it in *REPRESENTATION.
412289177Speter *
413289177Speter * If necessary, allocate the result in RESULT_POOL; use SCRATCH_POOL for
414289177Speter * temporary allocations.
415289177Speter */
416289177Speterstatic svn_error_t *
417289177Speterparse_representation(rep_stats_t **representation,
418289177Speter                     query_t *query,
419289177Speter                     representation_t *rep,
420289177Speter                     revision_info_t *revision_info,
421289177Speter                     apr_pool_t *result_pool,
422289177Speter                     apr_pool_t *scratch_pool)
423289177Speter{
424289177Speter  rep_stats_t *result;
425289177Speter  int idx;
426289177Speter
427289177Speter  /* read location (revision, offset) and size */
428289177Speter
429289177Speter  /* look it up */
430289177Speter  result = find_representation(&idx, query, &revision_info, rep->revision,
431289177Speter                               (apr_off_t)rep->item_index);
432289177Speter  if (!result)
433289177Speter    {
434289177Speter      /* not parsed, yet (probably a rep in the same revision).
435289177Speter       * Create a new rep object and determine its base rep as well.
436289177Speter       */
437289177Speter      result = apr_pcalloc(result_pool, sizeof(*result));
438289177Speter      result->revision = rep->revision;
439289177Speter      result->expanded_size = (rep->expanded_size ? rep->expanded_size
440289177Speter                                                  : rep->size);
441289177Speter      result->offset = (apr_off_t)rep->item_index;
442289177Speter      result->size = rep->size;
443289177Speter
444289177Speter      /* In phys. addressing mode, follow link to the actual representation.
445289177Speter       * In log. addressing mode, we will find it already as part of our
446289177Speter       * linear walk through the whole file. */
447289177Speter      if (!svn_fs_fs__use_log_addressing(query->fs))
448289177Speter        {
449289177Speter          svn_fs_fs__rep_header_t *header;
450289177Speter          apr_off_t offset = revision_info->offset + result->offset;
451289177Speter
452289177Speter          SVN_ERR_ASSERT(revision_info->rev_file);
453289177Speter          SVN_ERR(svn_io_file_seek(revision_info->rev_file->file, APR_SET,
454289177Speter                                   &offset, scratch_pool));
455289177Speter          SVN_ERR(svn_fs_fs__read_rep_header(&header,
456289177Speter                                             revision_info->rev_file->stream,
457289177Speter                                             scratch_pool, scratch_pool));
458289177Speter
459289177Speter          result->header_size = header->header_size;
460289177Speter        }
461289177Speter
462289177Speter      svn_sort__array_insert(revision_info->representations, &result, idx);
463289177Speter    }
464289177Speter
465289177Speter  *representation = result;
466289177Speter
467289177Speter  return SVN_NO_ERROR;
468289177Speter}
469289177Speter
470289177Speter
471289177Speter/* forward declaration */
472289177Speterstatic svn_error_t *
473289177Speterread_noderev(query_t *query,
474289177Speter             svn_stringbuf_t *noderev_str,
475289177Speter             revision_info_t *revision_info,
476289177Speter             apr_pool_t *result_pool,
477289177Speter             apr_pool_t *scratch_pool);
478289177Speter
479289177Speter/* Read the noderev item at OFFSET in REVISION_INFO from the filesystem
480289177Speter * provided by QUERY.  Return it in *NODEREV, allocated in RESULT_POOL.
481289177Speter * Use SCRATCH_POOL for temporary allocations.
482289177Speter *
483289177Speter * The textual representation of the noderev will be used to determine
484289177Speter * the on-disk size of the noderev.  Only called in phys. addressing mode.
485289177Speter */
486289177Speterstatic svn_error_t *
487289177Speterread_phsy_noderev(svn_stringbuf_t **noderev,
488289177Speter                  query_t *query,
489289177Speter                  apr_off_t offset,
490289177Speter                  revision_info_t *revision_info,
491289177Speter                  apr_pool_t *result_pool,
492289177Speter                  apr_pool_t *scratch_pool)
493289177Speter{
494289177Speter  svn_stringbuf_t *noderev_str = svn_stringbuf_create_empty(result_pool);
495289177Speter  svn_stringbuf_t *line;
496289177Speter  svn_boolean_t eof;
497289177Speter
498289177Speter  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
499289177Speter
500289177Speter  /* Navigate the file stream to the start of noderev. */
501289177Speter  SVN_ERR_ASSERT(revision_info->rev_file);
502289177Speter
503289177Speter  offset += revision_info->offset;
504289177Speter  SVN_ERR(svn_io_file_seek(revision_info->rev_file->file, APR_SET,
505289177Speter                           &offset, scratch_pool));
506289177Speter
507289177Speter  /* Read it (terminated by an empty line) */
508289177Speter  do
509289177Speter    {
510289177Speter      svn_pool_clear(iterpool);
511289177Speter
512289177Speter      SVN_ERR(svn_stream_readline(revision_info->rev_file->stream, &line,
513289177Speter                                  "\n", &eof, iterpool));
514289177Speter      svn_stringbuf_appendstr(noderev_str, line);
515289177Speter      svn_stringbuf_appendbyte(noderev_str, '\n');
516289177Speter    }
517289177Speter  while (line->len > 0 && !eof);
518289177Speter
519289177Speter  /* Return the result. */
520289177Speter  *noderev = noderev_str;
521289177Speter
522289177Speter  svn_pool_destroy(iterpool);
523289177Speter
524289177Speter  return SVN_NO_ERROR;
525289177Speter}
526289177Speter
527289177Speter/* Starting at the directory in NODEREV's text, read all DAG nodes,
528289177Speter * directories and representations linked in that tree structure.
529289177Speter * Store them in QUERY and REVISION_INFO.  Also, read them only once.
530289177Speter *
531289177Speter * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for
532289177Speter * temporaries.
533289177Speter */
534289177Speterstatic svn_error_t *
535289177Speterparse_dir(query_t *query,
536289177Speter          node_revision_t *noderev,
537289177Speter          revision_info_t *revision_info,
538289177Speter          apr_pool_t *result_pool,
539289177Speter          apr_pool_t *scratch_pool)
540289177Speter{
541289177Speter  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
542289177Speter
543289177Speter  int i;
544289177Speter  apr_array_header_t *entries;
545289177Speter  SVN_ERR(svn_fs_fs__rep_contents_dir(&entries, query->fs, noderev,
546289177Speter                                      scratch_pool, scratch_pool));
547289177Speter
548289177Speter  for (i = 0; i < entries->nelts; ++i)
549289177Speter    {
550289177Speter      svn_fs_dirent_t *dirent = APR_ARRAY_IDX(entries, i, svn_fs_dirent_t *);
551289177Speter
552289177Speter      if (svn_fs_fs__id_rev(dirent->id) == revision_info->revision)
553289177Speter        {
554289177Speter          svn_stringbuf_t *noderev_str;
555289177Speter          svn_pool_clear(iterpool);
556289177Speter
557289177Speter          SVN_ERR(read_phsy_noderev(&noderev_str, query,
558289177Speter                                    svn_fs_fs__id_item(dirent->id),
559289177Speter                                    revision_info, iterpool, iterpool));
560289177Speter          SVN_ERR(read_noderev(query, noderev_str, revision_info,
561289177Speter                               result_pool, iterpool));
562289177Speter        }
563289177Speter    }
564289177Speter
565289177Speter  svn_pool_destroy(iterpool);
566289177Speter
567289177Speter  return SVN_NO_ERROR;
568289177Speter}
569289177Speter
570289177Speter/* Parse the noderev given as NODEREV_STR and store the info in QUERY and
571289177Speter * REVISION_INFO.  In phys. addressing mode, continue reading all DAG nodes,
572289177Speter * directories and representations linked in that tree structure.
573289177Speter *
574289177Speter * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for
575289177Speter * temporaries.
576289177Speter */
577289177Speterstatic svn_error_t *
578289177Speterread_noderev(query_t *query,
579289177Speter             svn_stringbuf_t *noderev_str,
580289177Speter             revision_info_t *revision_info,
581289177Speter             apr_pool_t *result_pool,
582289177Speter             apr_pool_t *scratch_pool)
583289177Speter{
584289177Speter  rep_stats_t *text = NULL;
585289177Speter  rep_stats_t *props = NULL;
586289177Speter  node_revision_t *noderev;
587289177Speter
588289177Speter  svn_stream_t *stream = svn_stream_from_stringbuf(noderev_str, scratch_pool);
589289177Speter  SVN_ERR(svn_fs_fs__read_noderev(&noderev, stream, scratch_pool,
590289177Speter                                  scratch_pool));
591289177Speter
592289177Speter  if (noderev->data_rep)
593289177Speter    {
594289177Speter      SVN_ERR(parse_representation(&text, query,
595289177Speter                                   noderev->data_rep, revision_info,
596289177Speter                                   result_pool, scratch_pool));
597289177Speter
598289177Speter      /* if we are the first to use this rep, mark it as "text rep" */
599289177Speter      if (++text->ref_count == 1)
600289177Speter        text->kind = noderev->kind == svn_node_dir ? dir_rep : file_rep;
601289177Speter    }
602289177Speter
603289177Speter  if (noderev->prop_rep)
604289177Speter    {
605289177Speter      SVN_ERR(parse_representation(&props, query,
606289177Speter                                   noderev->prop_rep, revision_info,
607289177Speter                                   result_pool, scratch_pool));
608289177Speter
609289177Speter      /* if we are the first to use this rep, mark it as "prop rep" */
610289177Speter      if (++props->ref_count == 1)
611289177Speter        props->kind = noderev->kind == svn_node_dir ? dir_property_rep
612289177Speter                                                    : file_property_rep;
613289177Speter    }
614289177Speter
615289177Speter  /* record largest changes */
616289177Speter  if (text && text->ref_count == 1)
617289177Speter    add_change(query->stats, text->size, text->expanded_size, text->revision,
618289177Speter               noderev->created_path, text->kind, !noderev->predecessor_id);
619289177Speter  if (props && props->ref_count == 1)
620289177Speter    add_change(query->stats, props->size, props->expanded_size,
621289177Speter               props->revision, noderev->created_path, props->kind,
622289177Speter               !noderev->predecessor_id);
623289177Speter
624289177Speter  /* if this is a directory and has not been processed, yet, read and
625289177Speter   * process it recursively */
626289177Speter  if (   noderev->kind == svn_node_dir && text && text->ref_count == 1
627289177Speter      && !svn_fs_fs__use_log_addressing(query->fs))
628289177Speter    SVN_ERR(parse_dir(query, noderev, revision_info, result_pool,
629289177Speter                      scratch_pool));
630289177Speter
631289177Speter  /* update stats */
632289177Speter  if (noderev->kind == svn_node_dir)
633289177Speter    {
634289177Speter      revision_info->dir_noderev_size += noderev_str->len;
635289177Speter      revision_info->dir_noderev_count++;
636289177Speter    }
637289177Speter  else
638289177Speter    {
639289177Speter      revision_info->file_noderev_size += noderev_str->len;
640289177Speter      revision_info->file_noderev_count++;
641289177Speter    }
642289177Speter
643289177Speter  return SVN_NO_ERROR;
644289177Speter}
645289177Speter
646289177Speter/* For the revision given as REVISION_INFO within QUERY, determine the number
647289177Speter * of entries in its changed paths list and store that info in REVISION_INFO.
648289177Speter * Use SCRATCH_POOL for temporary allocations.
649289177Speter */
650289177Speterstatic svn_error_t *
651289177Speterget_phys_change_count(query_t *query,
652289177Speter                      revision_info_t *revision_info,
653289177Speter                      apr_pool_t *scratch_pool)
654289177Speter{
655289177Speter  /* We are going to use our own sub-pool here because the changes object
656289177Speter   * may well be >100MB and SCRATCH_POOL may not get cleared until all other
657289177Speter   * info has been read by read_phys_revision().  Therefore, tidy up early.
658289177Speter   */
659289177Speter  apr_pool_t *subpool = svn_pool_create(scratch_pool);
660289177Speter  apr_array_header_t *changes;
661289177Speter
662289177Speter  SVN_ERR(svn_fs_fs__get_changes(&changes, query->fs,
663289177Speter                                 revision_info->revision, subpool));
664289177Speter  revision_info->change_count = changes->nelts;
665289177Speter
666289177Speter  /* Release potentially tons of memory. */
667289177Speter  svn_pool_destroy(subpool);
668289177Speter
669289177Speter  return SVN_NO_ERROR;
670289177Speter}
671289177Speter
672289177Speter/* Read header information for the revision stored in FILE_CONTENT (one
673289177Speter * whole revision).  Return the offsets within FILE_CONTENT for the
674289177Speter * *ROOT_NODEREV, the list of *CHANGES and its len in *CHANGES_LEN.
675289177Speter * Use POOL for temporary allocations. */
676289177Speterstatic svn_error_t *
677289177Speterread_phys_revision(query_t *query,
678289177Speter                   revision_info_t *info,
679289177Speter                   apr_pool_t *result_pool,
680289177Speter                   apr_pool_t *scratch_pool)
681289177Speter{
682289177Speter  char buf[64];
683289177Speter  apr_off_t root_node_offset;
684289177Speter  apr_off_t changes_offset;
685289177Speter  svn_stringbuf_t *trailer;
686289177Speter  svn_stringbuf_t *noderev_str;
687289177Speter
688289177Speter  /* Read the last 64 bytes of the revision (if long enough). */
689289177Speter  apr_off_t start = MAX(info->offset, info->end - sizeof(buf));
690289177Speter  apr_size_t len = (apr_size_t)(info->end - start);
691289177Speter  SVN_ERR(svn_io_file_seek(info->rev_file->file, APR_SET, &start,
692289177Speter                           scratch_pool));
693289177Speter  SVN_ERR(svn_io_file_read_full2(info->rev_file->file, buf, len, NULL, NULL,
694289177Speter                                 scratch_pool));
695289177Speter  trailer = svn_stringbuf_ncreate(buf, len, scratch_pool);
696289177Speter
697289177Speter  /* Parse that trailer. */
698289177Speter  SVN_ERR(svn_fs_fs__parse_revision_trailer(&root_node_offset,
699289177Speter                                            &changes_offset, trailer,
700289177Speter                                            info->revision));
701289177Speter  SVN_ERR(get_phys_change_count(query, info, scratch_pool));
702289177Speter
703289177Speter  /* Calculate the length of the changes list. */
704289177Speter  trailer = svn_fs_fs__unparse_revision_trailer(root_node_offset,
705289177Speter                                                changes_offset,
706289177Speter                                                scratch_pool);
707289177Speter  info->changes_len = info->end - info->offset - changes_offset
708289177Speter                    - trailer->len;
709289177Speter
710289177Speter  /* Recursively read nodes added in this rev. */
711289177Speter  SVN_ERR(read_phsy_noderev(&noderev_str, query, root_node_offset, info,
712289177Speter                            scratch_pool, scratch_pool));
713289177Speter  SVN_ERR(read_noderev(query, noderev_str, info, result_pool, scratch_pool));
714289177Speter
715289177Speter  return SVN_NO_ERROR;
716289177Speter}
717289177Speter
718289177Speter/* Read the content of the pack file staring at revision BASE physical
719289177Speter * addressing mode and store it in QUERY.
720289177Speter *
721289177Speter * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for
722289177Speter * temporaries.
723289177Speter */
724289177Speterstatic svn_error_t *
725289177Speterread_phys_pack_file(query_t *query,
726289177Speter                    svn_revnum_t base,
727289177Speter                    apr_pool_t *result_pool,
728289177Speter                    apr_pool_t *scratch_pool)
729289177Speter{
730289177Speter  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
731289177Speter  int i;
732289177Speter  apr_off_t file_size = 0;
733289177Speter  svn_fs_fs__revision_file_t *rev_file;
734289177Speter
735289177Speter  SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, query->fs, base,
736289177Speter                                           scratch_pool, scratch_pool));
737289177Speter  SVN_ERR(get_file_size(&file_size, rev_file, scratch_pool));
738289177Speter
739289177Speter  /* process each revision in the pack file */
740289177Speter  for (i = 0; i < query->shard_size; ++i)
741289177Speter    {
742289177Speter      revision_info_t *info;
743289177Speter
744289177Speter      /* cancellation support */
745289177Speter      if (query->cancel_func)
746289177Speter        SVN_ERR(query->cancel_func(query->cancel_baton));
747289177Speter
748289177Speter      /* create the revision info for the current rev */
749289177Speter      info = apr_pcalloc(result_pool, sizeof(*info));
750289177Speter      info->representations = apr_array_make(result_pool, 4,
751289177Speter                                             sizeof(rep_stats_t*));
752289177Speter      info->rev_file = rev_file;
753289177Speter
754289177Speter      info->revision = base + i;
755289177Speter      SVN_ERR(svn_fs_fs__get_packed_offset(&info->offset, query->fs, base + i,
756289177Speter                                           iterpool));
757289177Speter      if (i + 1 == query->shard_size)
758289177Speter        info->end = file_size;
759289177Speter      else
760289177Speter        SVN_ERR(svn_fs_fs__get_packed_offset(&info->end, query->fs,
761289177Speter                                             base + i + 1, iterpool));
762289177Speter
763289177Speter      SVN_ERR(read_phys_revision(query, info, result_pool, iterpool));
764289177Speter
765289177Speter      info->representations = apr_array_copy(result_pool,
766289177Speter                                             info->representations);
767289177Speter
768289177Speter      /* Done with this revision. */
769289177Speter      info->rev_file = NULL;
770289177Speter
771289177Speter      /* put it into our container */
772289177Speter      APR_ARRAY_PUSH(query->revisions, revision_info_t*) = info;
773289177Speter
774289177Speter      /* destroy temps */
775289177Speter      svn_pool_clear(iterpool);
776289177Speter    }
777289177Speter
778289177Speter  /* Done with this pack file. */
779289177Speter  SVN_ERR(svn_fs_fs__close_revision_file(rev_file));
780289177Speter
781289177Speter  /* one more pack file processed */
782289177Speter  if (query->progress_func)
783289177Speter    query->progress_func(base, query->progress_baton, scratch_pool);
784289177Speter
785289177Speter  return SVN_NO_ERROR;
786289177Speter}
787289177Speter
788289177Speter/* Read the content of the file for REVISION in physical addressing mode
789289177Speter * and store its contents in QUERY.
790289177Speter *
791289177Speter * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for
792289177Speter * temporaries.
793289177Speter */
794289177Speterstatic svn_error_t *
795289177Speterread_phys_revision_file(query_t *query,
796289177Speter                        svn_revnum_t revision,
797289177Speter                        apr_pool_t *result_pool,
798289177Speter                        apr_pool_t *scratch_pool)
799289177Speter{
800289177Speter  revision_info_t *info = apr_pcalloc(result_pool, sizeof(*info));
801289177Speter  apr_off_t file_size = 0;
802289177Speter  svn_fs_fs__revision_file_t *rev_file;
803289177Speter
804289177Speter  /* cancellation support */
805289177Speter  if (query->cancel_func)
806289177Speter    SVN_ERR(query->cancel_func(query->cancel_baton));
807289177Speter
808289177Speter  /* read the whole pack file into memory */
809289177Speter  SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, query->fs, revision,
810289177Speter                                           scratch_pool, scratch_pool));
811289177Speter  SVN_ERR(get_file_size(&file_size, rev_file, scratch_pool));
812289177Speter
813289177Speter  /* create the revision info for the current rev */
814289177Speter  info->representations = apr_array_make(result_pool, 4, sizeof(rep_stats_t*));
815289177Speter
816289177Speter  info->rev_file = rev_file;
817289177Speter  info->revision = revision;
818289177Speter  info->offset = 0;
819289177Speter  info->end = file_size;
820289177Speter
821289177Speter  SVN_ERR(read_phys_revision(query, info, result_pool, scratch_pool));
822289177Speter
823289177Speter  /* Done with this revision. */
824289177Speter  SVN_ERR(svn_fs_fs__close_revision_file(rev_file));
825289177Speter  info->rev_file = NULL;
826289177Speter
827289177Speter  /* put it into our container */
828289177Speter  APR_ARRAY_PUSH(query->revisions, revision_info_t*) = info;
829289177Speter
830289177Speter  /* show progress every 1000 revs or so */
831289177Speter  if (query->progress_func)
832289177Speter    {
833289177Speter      if (query->shard_size && (revision % query->shard_size == 0))
834289177Speter        query->progress_func(revision, query->progress_baton, scratch_pool);
835289177Speter      if (!query->shard_size && (revision % 1000 == 0))
836289177Speter        query->progress_func(revision, query->progress_baton, scratch_pool);
837289177Speter    }
838289177Speter
839289177Speter  return SVN_NO_ERROR;
840289177Speter}
841289177Speter
842289177Speter/* Given the unparsed changes list in CHANGES with LEN chars, return the
843289177Speter * number of changed paths encoded in it.  Only used in log. addressing
844289177Speter * mode.
845289177Speter */
846289177Speterstatic apr_uint64_t
847289177Speterget_log_change_count(const char *changes,
848289177Speter                     apr_size_t len)
849289177Speter{
850289177Speter  apr_size_t lines = 0;
851289177Speter  const char *end = changes + len;
852289177Speter
853289177Speter  /* line count */
854289177Speter  for (; changes < end; ++changes)
855289177Speter    if (*changes == '\n')
856289177Speter      ++lines;
857289177Speter
858289177Speter  /* two lines per change */
859289177Speter  return lines / 2;
860289177Speter}
861289177Speter
862289177Speter/* Read the item described by ENTRY from the REV_FILE and return the
863289177Speter * respective byte sequence in *CONTENTS, allocated in RESULT_POOL.
864289177Speter * Use SCRATCH_POOL for temporary allocations
865289177Speter */
866289177Speterstatic svn_error_t *
867289177Speterread_item(svn_stringbuf_t **contents,
868289177Speter          svn_fs_fs__revision_file_t *rev_file,
869289177Speter          svn_fs_fs__p2l_entry_t *entry,
870289177Speter          apr_pool_t *result_pool,
871289177Speter          apr_pool_t *scratch_pool)
872289177Speter{
873289177Speter  svn_stringbuf_t *item = svn_stringbuf_create_ensure(entry->size,
874289177Speter                                                      result_pool);
875289177Speter  item->len = entry->size;
876289177Speter  item->data[item->len] = 0;
877289177Speter
878289177Speter  SVN_ERR(svn_io_file_aligned_seek(rev_file->file, rev_file->block_size,
879289177Speter                                   NULL, entry->offset, scratch_pool));
880289177Speter  SVN_ERR(svn_io_file_read_full2(rev_file->file, item->data, item->len,
881289177Speter                                 NULL, NULL, scratch_pool));
882289177Speter
883289177Speter  *contents = item;
884289177Speter
885289177Speter  return SVN_NO_ERROR;
886289177Speter}
887289177Speter
888289177Speter/* Process the logically addressed revision contents of revisions BASE to
889289177Speter * BASE + COUNT - 1 in QUERY.
890289177Speter *
891289177Speter * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for
892289177Speter * temporaries.
893289177Speter */
894289177Speterstatic svn_error_t *
895289177Speterread_log_rev_or_packfile(query_t *query,
896289177Speter                         svn_revnum_t base,
897289177Speter                         int count,
898289177Speter                         apr_pool_t *result_pool,
899289177Speter                         apr_pool_t *scratch_pool)
900289177Speter{
901289177Speter  fs_fs_data_t *ffd = query->fs->fsap_data;
902289177Speter  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
903289177Speter  apr_off_t max_offset;
904289177Speter  apr_off_t offset = 0;
905289177Speter  int i;
906289177Speter  svn_fs_fs__revision_file_t *rev_file;
907289177Speter
908289177Speter  /* we will process every revision in the rev / pack file */
909289177Speter  for (i = 0; i < count; ++i)
910289177Speter    {
911289177Speter      /* create the revision info for the current rev */
912289177Speter      revision_info_t *info = apr_pcalloc(result_pool, sizeof(*info));
913289177Speter      info->representations = apr_array_make(result_pool, 4,
914289177Speter                                             sizeof(rep_stats_t*));
915289177Speter      info->revision = base + i;
916289177Speter
917289177Speter      APR_ARRAY_PUSH(query->revisions, revision_info_t*) = info;
918289177Speter    }
919289177Speter
920289177Speter  /* open the pack / rev file that is covered by the p2l index */
921289177Speter  SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, query->fs, base,
922289177Speter                                           scratch_pool, iterpool));
923289177Speter  SVN_ERR(svn_fs_fs__p2l_get_max_offset(&max_offset, query->fs, rev_file,
924289177Speter                                        base, scratch_pool));
925289177Speter
926289177Speter  /* record the whole pack size in the first rev so the total sum will
927289177Speter     still be correct */
928289177Speter  APR_ARRAY_IDX(query->revisions, base, revision_info_t*)->end = max_offset;
929289177Speter
930289177Speter  /* for all offsets in the file, get the P2L index entries and process
931289177Speter     the interesting items (change lists, noderevs) */
932289177Speter  for (offset = 0; offset < max_offset; )
933289177Speter    {
934289177Speter      apr_array_header_t *entries;
935289177Speter
936289177Speter      svn_pool_clear(iterpool);
937289177Speter
938289177Speter      /* cancellation support */
939289177Speter      if (query->cancel_func)
940289177Speter        SVN_ERR(query->cancel_func(query->cancel_baton));
941289177Speter
942289177Speter      /* get all entries for the current block */
943289177Speter      SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, query->fs, rev_file, base,
944289177Speter                                          offset, ffd->p2l_page_size,
945289177Speter                                          iterpool, iterpool));
946289177Speter
947289177Speter      /* process all entries (and later continue with the next block) */
948289177Speter      for (i = 0; i < entries->nelts; ++i)
949289177Speter        {
950289177Speter          svn_fs_fs__p2l_entry_t *entry
951289177Speter            = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t);
952289177Speter
953289177Speter          /* skip bits we previously processed */
954289177Speter          if (i == 0 && entry->offset < offset)
955289177Speter            continue;
956289177Speter
957289177Speter          /* skip zero-sized entries */
958289177Speter          if (entry->size == 0)
959289177Speter            continue;
960289177Speter
961289177Speter          /* read and process interesting items */
962289177Speter          if (entry->type == SVN_FS_FS__ITEM_TYPE_NODEREV)
963289177Speter            {
964289177Speter              svn_stringbuf_t *item;
965289177Speter              revision_info_t *info = APR_ARRAY_IDX(query->revisions,
966289177Speter                                                    entry->item.revision,
967289177Speter                                                    revision_info_t*);
968289177Speter              SVN_ERR(read_item(&item, rev_file, entry, iterpool, iterpool));
969289177Speter              SVN_ERR(read_noderev(query, item, info, result_pool, iterpool));
970289177Speter            }
971289177Speter          else if (entry->type == SVN_FS_FS__ITEM_TYPE_CHANGES)
972289177Speter            {
973289177Speter              svn_stringbuf_t *item;
974289177Speter              revision_info_t *info = APR_ARRAY_IDX(query->revisions,
975289177Speter                                                    entry->item.revision,
976289177Speter                                                    revision_info_t*);
977289177Speter              SVN_ERR(read_item(&item, rev_file, entry, iterpool, iterpool));
978289177Speter              info->change_count
979289177Speter                = get_log_change_count(item->data + 0, item->len);
980289177Speter              info->changes_len += entry->size;
981289177Speter            }
982289177Speter
983289177Speter          /* advance offset */
984289177Speter          offset += entry->size;
985289177Speter        }
986289177Speter    }
987289177Speter
988289177Speter  /* clean up and close file handles */
989289177Speter  svn_pool_destroy(iterpool);
990289177Speter
991289177Speter  return SVN_NO_ERROR;
992289177Speter}
993289177Speter
994289177Speter/* Read the content of the pack file staring at revision BASE logical
995289177Speter * addressing mode and store it in QUERY.
996289177Speter *
997289177Speter * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for
998289177Speter * temporaries.
999289177Speter */
1000289177Speterstatic svn_error_t *
1001289177Speterread_log_pack_file(query_t *query,
1002289177Speter                   svn_revnum_t base,
1003289177Speter                   apr_pool_t *result_pool,
1004289177Speter                   apr_pool_t *scratch_pool)
1005289177Speter{
1006289177Speter  SVN_ERR(read_log_rev_or_packfile(query, base, query->shard_size,
1007289177Speter                                   result_pool, scratch_pool));
1008289177Speter
1009289177Speter  /* one more pack file processed */
1010289177Speter  if (query->progress_func)
1011289177Speter    query->progress_func(base, query->progress_baton, scratch_pool);
1012289177Speter
1013289177Speter  return SVN_NO_ERROR;
1014289177Speter}
1015289177Speter
1016289177Speter/* Read the content of the file for REVISION in logical addressing mode
1017289177Speter * and store its contents in QUERY.
1018289177Speter *
1019289177Speter * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for
1020289177Speter * temporaries.
1021289177Speter */
1022289177Speterstatic svn_error_t *
1023289177Speterread_log_revision_file(query_t *query,
1024289177Speter                       svn_revnum_t revision,
1025289177Speter                       apr_pool_t *result_pool,
1026289177Speter                       apr_pool_t *scratch_pool)
1027289177Speter{
1028289177Speter  SVN_ERR(read_log_rev_or_packfile(query, revision, 1,
1029289177Speter                                   result_pool, scratch_pool));
1030289177Speter
1031289177Speter  /* show progress every 1000 revs or so */
1032289177Speter  if (query->progress_func)
1033289177Speter    {
1034289177Speter      if (query->shard_size && (revision % query->shard_size == 0))
1035289177Speter        query->progress_func(revision, query->progress_baton, scratch_pool);
1036289177Speter      if (!query->shard_size && (revision % 1000 == 0))
1037289177Speter        query->progress_func(revision, query->progress_baton, scratch_pool);
1038289177Speter    }
1039289177Speter
1040289177Speter  return SVN_NO_ERROR;
1041289177Speter}
1042289177Speter
1043289177Speter/* Read the repository and collect the stats info in QUERY.
1044289177Speter *
1045289177Speter * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for
1046289177Speter * temporaries.
1047289177Speter */
1048289177Speterstatic svn_error_t *
1049289177Speterread_revisions(query_t *query,
1050289177Speter               apr_pool_t *result_pool,
1051289177Speter               apr_pool_t *scratch_pool)
1052289177Speter{
1053289177Speter  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1054289177Speter  svn_revnum_t revision;
1055289177Speter
1056289177Speter  /* read all packed revs */
1057289177Speter  for ( revision = 0
1058289177Speter      ; revision < query->min_unpacked_rev
1059289177Speter      ; revision += query->shard_size)
1060289177Speter    {
1061289177Speter      svn_pool_clear(iterpool);
1062289177Speter
1063289177Speter      if (svn_fs_fs__use_log_addressing(query->fs))
1064289177Speter        SVN_ERR(read_log_pack_file(query, revision, result_pool, iterpool));
1065289177Speter      else
1066289177Speter        SVN_ERR(read_phys_pack_file(query, revision, result_pool, iterpool));
1067289177Speter    }
1068289177Speter
1069289177Speter  /* read non-packed revs */
1070289177Speter  for ( ; revision <= query->head; ++revision)
1071289177Speter    {
1072289177Speter      svn_pool_clear(iterpool);
1073289177Speter
1074289177Speter      if (svn_fs_fs__use_log_addressing(query->fs))
1075289177Speter        SVN_ERR(read_log_revision_file(query, revision, result_pool,
1076289177Speter                                       iterpool));
1077289177Speter      else
1078289177Speter        SVN_ERR(read_phys_revision_file(query, revision, result_pool,
1079289177Speter                                        iterpool));
1080289177Speter    }
1081289177Speter
1082289177Speter  svn_pool_destroy(iterpool);
1083289177Speter
1084289177Speter  return SVN_NO_ERROR;
1085289177Speter}
1086289177Speter
1087289177Speter/* Accumulate stats of REP in STATS.
1088289177Speter */
1089289177Speterstatic void
1090289177Speteradd_rep_pack_stats(svn_fs_fs__rep_pack_stats_t *stats,
1091289177Speter                   rep_stats_t *rep)
1092289177Speter{
1093289177Speter  stats->count++;
1094289177Speter
1095289177Speter  stats->packed_size += rep->size;
1096289177Speter  stats->expanded_size += rep->expanded_size;
1097289177Speter  stats->overhead_size += rep->header_size + 7 /* ENDREP\n */;
1098289177Speter}
1099289177Speter
1100289177Speter/* Accumulate stats of REP in STATS.
1101289177Speter */
1102289177Speterstatic void
1103289177Speteradd_rep_stats(svn_fs_fs__representation_stats_t *stats,
1104289177Speter              rep_stats_t *rep)
1105289177Speter{
1106289177Speter  add_rep_pack_stats(&stats->total, rep);
1107289177Speter  if (rep->ref_count == 1)
1108289177Speter    add_rep_pack_stats(&stats->uniques, rep);
1109289177Speter  else
1110289177Speter    add_rep_pack_stats(&stats->shared, rep);
1111289177Speter
1112289177Speter  stats->references += rep->ref_count;
1113289177Speter  stats->expanded_size += rep->ref_count * rep->expanded_size;
1114289177Speter}
1115289177Speter
1116289177Speter/* Aggregate the info the in revision_info_t * array REVISIONS into the
1117289177Speter * respectve fields of STATS.
1118289177Speter */
1119289177Speterstatic void
1120289177Speteraggregate_stats(const apr_array_header_t *revisions,
1121289177Speter                svn_fs_fs__stats_t *stats)
1122289177Speter{
1123289177Speter  int i, k;
1124289177Speter
1125289177Speter  /* aggregate info from all revisions */
1126289177Speter  stats->revision_count = revisions->nelts;
1127289177Speter  for (i = 0; i < revisions->nelts; ++i)
1128289177Speter    {
1129289177Speter      revision_info_t *revision = APR_ARRAY_IDX(revisions, i,
1130289177Speter                                                revision_info_t *);
1131289177Speter
1132289177Speter      /* data gathered on a revision level */
1133289177Speter      stats->change_count += revision->change_count;
1134289177Speter      stats->change_len += revision->changes_len;
1135289177Speter      stats->total_size += revision->end - revision->offset;
1136289177Speter
1137289177Speter      stats->dir_node_stats.count += revision->dir_noderev_count;
1138289177Speter      stats->dir_node_stats.size += revision->dir_noderev_size;
1139289177Speter      stats->file_node_stats.count += revision->file_noderev_count;
1140289177Speter      stats->file_node_stats.size += revision->file_noderev_size;
1141289177Speter      stats->total_node_stats.count += revision->dir_noderev_count
1142289177Speter                                    + revision->file_noderev_count;
1143289177Speter      stats->total_node_stats.size += revision->dir_noderev_size
1144289177Speter                                   + revision->file_noderev_size;
1145289177Speter
1146289177Speter      /* process representations */
1147289177Speter      for (k = 0; k < revision->representations->nelts; ++k)
1148289177Speter        {
1149289177Speter          rep_stats_t *rep = APR_ARRAY_IDX(revision->representations, k,
1150289177Speter                                           rep_stats_t *);
1151289177Speter
1152289177Speter          /* accumulate in the right bucket */
1153289177Speter          switch(rep->kind)
1154289177Speter            {
1155289177Speter              case file_rep:
1156289177Speter                add_rep_stats(&stats->file_rep_stats, rep);
1157289177Speter                break;
1158289177Speter              case dir_rep:
1159289177Speter                add_rep_stats(&stats->dir_rep_stats, rep);
1160289177Speter                break;
1161289177Speter              case file_property_rep:
1162289177Speter                add_rep_stats(&stats->file_prop_rep_stats, rep);
1163289177Speter                break;
1164289177Speter              case dir_property_rep:
1165289177Speter                add_rep_stats(&stats->dir_prop_rep_stats, rep);
1166289177Speter                break;
1167289177Speter              default:
1168289177Speter                break;
1169289177Speter            }
1170289177Speter
1171289177Speter          add_rep_stats(&stats->total_rep_stats, rep);
1172289177Speter        }
1173289177Speter    }
1174289177Speter}
1175289177Speter
1176289177Speter/* Return a new svn_fs_fs__stats_t instance, allocated in RESULT_POOL.
1177289177Speter */
1178289177Speterstatic svn_fs_fs__stats_t *
1179289177Spetercreate_stats(apr_pool_t *result_pool)
1180289177Speter{
1181289177Speter  svn_fs_fs__stats_t *stats = apr_pcalloc(result_pool, sizeof(*stats));
1182289177Speter
1183289177Speter  initialize_largest_changes(stats, 64, result_pool);
1184289177Speter  stats->by_extension = apr_hash_make(result_pool);
1185289177Speter
1186289177Speter  return stats;
1187289177Speter}
1188289177Speter
1189289177Speter/* Create a *QUERY, allocated in RESULT_POOL, reading filesystem FS and
1190289177Speter * collecting results in STATS.  Store the optional PROCESS_FUNC and
1191289177Speter * PROGRESS_BATON as well as CANCEL_FUNC and CANCEL_BATON in *QUERY, too.
1192289177Speter * Use SCRATCH_POOL for temporary allocations.
1193289177Speter */
1194289177Speterstatic svn_error_t *
1195289177Spetercreate_query(query_t **query,
1196289177Speter             svn_fs_t *fs,
1197289177Speter             svn_fs_fs__stats_t *stats,
1198289177Speter             svn_fs_progress_notify_func_t progress_func,
1199289177Speter             void *progress_baton,
1200289177Speter             svn_cancel_func_t cancel_func,
1201289177Speter             void *cancel_baton,
1202289177Speter             apr_pool_t *result_pool,
1203289177Speter             apr_pool_t *scratch_pool)
1204289177Speter{
1205289177Speter  *query = apr_pcalloc(result_pool, sizeof(**query));
1206289177Speter
1207289177Speter  /* Read repository dimensions. */
1208289177Speter  (*query)->shard_size = svn_fs_fs__shard_size(fs);
1209289177Speter  SVN_ERR(svn_fs_fs__youngest_rev(&(*query)->head, fs, scratch_pool));
1210289177Speter  SVN_ERR(svn_fs_fs__min_unpacked_rev(&(*query)->min_unpacked_rev, fs,
1211289177Speter                                      scratch_pool));
1212289177Speter
1213289177Speter  /* create data containers and caches
1214289177Speter   * Note: this assumes that int is at least 32-bits and that we only support
1215289177Speter   * 32-bit wide revision numbers (actually 31-bits due to the signedness
1216289177Speter   * of both the nelts field of the array and our revision numbers). This
1217289177Speter   * means this code will fail on platforms where int is less than 32-bits
1218289177Speter   * and the repository has more revisions than int can hold. */
1219289177Speter  (*query)->revisions = apr_array_make(result_pool, (int) (*query)->head + 1,
1220289177Speter                                       sizeof(revision_info_t *));
1221289177Speter  (*query)->null_base = apr_pcalloc(result_pool,
1222289177Speter                                    sizeof(*(*query)->null_base));
1223289177Speter
1224289177Speter  /* Store other parameters */
1225289177Speter  (*query)->fs = fs;
1226289177Speter  (*query)->stats = stats;
1227289177Speter  (*query)->progress_func = progress_func;
1228289177Speter  (*query)->progress_baton = progress_baton;
1229289177Speter  (*query)->cancel_func = cancel_func;
1230289177Speter  (*query)->cancel_baton = cancel_baton;
1231289177Speter
1232289177Speter  return SVN_NO_ERROR;
1233289177Speter}
1234289177Speter
1235289177Spetersvn_error_t *
1236289177Spetersvn_fs_fs__get_stats(svn_fs_fs__stats_t **stats,
1237289177Speter                     svn_fs_t *fs,
1238289177Speter                     svn_fs_progress_notify_func_t progress_func,
1239289177Speter                     void *progress_baton,
1240289177Speter                     svn_cancel_func_t cancel_func,
1241289177Speter                     void *cancel_baton,
1242289177Speter                     apr_pool_t *result_pool,
1243289177Speter                     apr_pool_t *scratch_pool)
1244289177Speter{
1245289177Speter  query_t *query;
1246289177Speter
1247289177Speter  *stats = create_stats(result_pool);
1248289177Speter  SVN_ERR(create_query(&query, fs, *stats, progress_func, progress_baton,
1249289177Speter                       cancel_func, cancel_baton, scratch_pool,
1250289177Speter                       scratch_pool));
1251289177Speter  SVN_ERR(read_revisions(query, scratch_pool, scratch_pool));
1252289177Speter  aggregate_stats(query->revisions, *stats);
1253289177Speter
1254289177Speter  return SVN_NO_ERROR;
1255289177Speter}
1256