1/* stats-cmd.c -- implements the size stats sub-command.
2 *
3 * ====================================================================
4 *    Licensed to the Apache Software Foundation (ASF) under one
5 *    or more contributor license agreements.  See the NOTICE file
6 *    distributed with this work for additional information
7 *    regarding copyright ownership.  The ASF licenses this file
8 *    to you under the Apache License, Version 2.0 (the
9 *    "License"); you may not use this file except in compliance
10 *    with the License.  You may obtain a copy of the License at
11 *
12 *      http://www.apache.org/licenses/LICENSE-2.0
13 *
14 *    Unless required by applicable law or agreed to in writing,
15 *    software distributed under the License is distributed on an
16 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 *    KIND, either express or implied.  See the License for the
18 *    specific language governing permissions and limitations
19 *    under the License.
20 * ====================================================================
21 */
22
23#include <assert.h>
24
25#include "svn_fs.h"
26#include "svn_pools.h"
27#include "svn_sorts.h"
28
29#include "private/svn_sorts_private.h"
30#include "private/svn_string_private.h"
31#include "private/svn_fs_fs_private.h"
32
33#include "svn_private_config.h"
34#include "svnfsfs.h"
35
36/* Return the string, allocated in RESULT_POOL, describing the value 2**I.
37 */
38static const char *
39print_two_power(int i,
40                apr_pool_t *result_pool)
41{
42  /* These are the SI prefixes for base-1000, the binary ones with base-1024
43     are too clumsy and require appending B for "byte" to be intelligible,
44     e.g. "MiB".
45
46     Therefore, we ignore the official standard and revert to the traditional
47     contextual use were the base-1000 prefixes are understood as base-1024
48     when it came to data sizes.
49   */
50  const char *si_prefixes = " kMGTPEZY";
51
52  int number = (i >= 0) ? (1 << (i % 10)) : 0;
53  int thousands = (i >= 0) ? (i / 10) : 0;
54
55  char si_prefix = (thousands < strlen(si_prefixes))
56                 ? si_prefixes[thousands]
57                 : '?';
58
59  if (si_prefix == ' ')
60    return apr_psprintf(result_pool, "%d", number);
61
62  return apr_psprintf(result_pool, "%d%c", number, si_prefix);
63}
64
65/* Print statistics for the given group of representations to console.
66 * Use POOL for allocations.
67 */
68static void
69print_rep_stats(svn_fs_fs__representation_stats_t *stats,
70                apr_pool_t *pool)
71{
72  printf(_("%20s bytes in %12s reps\n"
73           "%20s bytes in %12s shared reps\n"
74           "%20s bytes expanded size\n"
75           "%20s bytes expanded shared size\n"
76           "%20s bytes with rep-sharing off\n"
77           "%20s shared references\n"
78           "%20.3f average delta chain length\n"),
79         svn__ui64toa_sep(stats->total.packed_size, ',', pool),
80         svn__ui64toa_sep(stats->total.count, ',', pool),
81         svn__ui64toa_sep(stats->shared.packed_size, ',', pool),
82         svn__ui64toa_sep(stats->shared.count, ',', pool),
83         svn__ui64toa_sep(stats->total.expanded_size, ',', pool),
84         svn__ui64toa_sep(stats->shared.expanded_size, ',', pool),
85         svn__ui64toa_sep(stats->expanded_size, ',', pool),
86         svn__ui64toa_sep(stats->references - stats->total.count, ',', pool),
87         stats->chain_len / MAX(1.0, (double)stats->total.count));
88}
89
90/* Print the (used) contents of CHANGES.  Use POOL for allocations.
91 */
92static void
93print_largest_reps(svn_fs_fs__largest_changes_t *changes,
94                   apr_pool_t *pool)
95{
96  apr_size_t i;
97  for (i = 0; i < changes->count && changes->changes[i]->size; ++i)
98    printf(_("%12s r%-8ld %s\n"),
99           svn__ui64toa_sep(changes->changes[i]->size, ',', pool),
100           changes->changes[i]->revision,
101           changes->changes[i]->path->data);
102}
103
104/* Print the non-zero section of HISTOGRAM to console.
105 * Use POOL for allocations.
106 */
107static void
108print_histogram(svn_fs_fs__histogram_t *histogram,
109                apr_pool_t *pool)
110{
111  int first = 0;
112  int last = 63;
113  int i;
114
115  /* identify non-zero range */
116  while (last > 0 && histogram->lines[last].count == 0)
117    --last;
118
119  while (first <= last && histogram->lines[first].count == 0)
120    ++first;
121
122  /* display histogram lines */
123  for (i = last; i >= first; --i)
124    printf(_("  %4s .. < %-4s %19s (%2d%%) bytes in %12s (%2d%%) items\n"),
125           print_two_power(i-1, pool), print_two_power(i, pool),
126           svn__ui64toa_sep(histogram->lines[i].sum, ',', pool),
127           (int)(histogram->lines[i].sum * 100 / histogram->total.sum),
128           svn__ui64toa_sep(histogram->lines[i].count, ',', pool),
129           (int)(histogram->lines[i].count * 100 / histogram->total.count));
130}
131
132/* COMPARISON_FUNC for svn_sort__hash.
133 * Sort extension_info_t values by total count in descending order.
134 */
135static int
136compare_count(const svn_sort__item_t *a,
137              const svn_sort__item_t *b)
138{
139  const svn_fs_fs__extension_info_t *lhs = a->value;
140  const svn_fs_fs__extension_info_t *rhs = b->value;
141  apr_int64_t diff = lhs->node_histogram.total.count
142                   - rhs->node_histogram.total.count;
143
144  return diff > 0 ? -1 : (diff < 0 ? 1 : 0);
145}
146
147/* COMPARISON_FUNC for svn_sort__hash.
148 * Sort extension_info_t values by total uncompressed size in descending order.
149 */
150static int
151compare_node_size(const svn_sort__item_t *a,
152                  const svn_sort__item_t *b)
153{
154  const svn_fs_fs__extension_info_t *lhs = a->value;
155  const svn_fs_fs__extension_info_t *rhs = b->value;
156  apr_int64_t diff = lhs->node_histogram.total.sum
157                   - rhs->node_histogram.total.sum;
158
159  return diff > 0 ? -1 : (diff < 0 ? 1 : 0);
160}
161
162/* COMPARISON_FUNC for svn_sort__hash.
163 * Sort extension_info_t values by total prep count in descending order.
164 */
165static int
166compare_rep_size(const svn_sort__item_t *a,
167                 const svn_sort__item_t *b)
168{
169  const svn_fs_fs__extension_info_t *lhs = a->value;
170  const svn_fs_fs__extension_info_t *rhs = b->value;
171  apr_int64_t diff = lhs->rep_histogram.total.sum
172                   - rhs->rep_histogram.total.sum;
173
174  return diff > 0 ? -1 : (diff < 0 ? 1 : 0);
175}
176
177/* Return an array of extension_info_t* for the (up to) 16 most prominent
178 * extensions in STATS according to the sort criterion COMPARISON_FUNC.
179 * Allocate results in POOL.
180 */
181static apr_array_header_t *
182get_by_extensions(svn_fs_fs__stats_t *stats,
183                  int (*comparison_func)(const svn_sort__item_t *,
184                                         const svn_sort__item_t *),
185                  apr_pool_t *pool)
186{
187  /* sort all data by extension */
188  apr_array_header_t *sorted
189    = svn_sort__hash(stats->by_extension, comparison_func, pool);
190
191  /* select the top (first) 16 entries */
192  int count = MIN(sorted->nelts, 16);
193  apr_array_header_t *result
194    = apr_array_make(pool, count, sizeof(svn_fs_fs__extension_info_t*));
195  int i;
196
197  for (i = 0; i < count; ++i)
198    APR_ARRAY_PUSH(result, svn_fs_fs__extension_info_t*)
199     = APR_ARRAY_IDX(sorted, i, svn_sort__item_t).value;
200
201  return result;
202}
203
204/* Add all extension_info_t* entries of TO_ADD not already in TARGET to
205 * TARGET.
206 */
207static void
208merge_by_extension(apr_array_header_t *target,
209                   apr_array_header_t *to_add)
210{
211  int i, k, count;
212
213  count = target->nelts;
214  for (i = 0; i < to_add->nelts; ++i)
215    {
216      svn_fs_fs__extension_info_t *info
217        = APR_ARRAY_IDX(to_add, i, svn_fs_fs__extension_info_t *);
218      for (k = 0; k < count; ++k)
219        if (info == APR_ARRAY_IDX(target, k, svn_fs_fs__extension_info_t *))
220          break;
221
222      if (k == count)
223        APR_ARRAY_PUSH(target, svn_fs_fs__extension_info_t*) = info;
224    }
225}
226
227/* Print the (up to) 16 extensions in STATS with the most changes.
228 * Use POOL for allocations.
229 */
230static void
231print_extensions_by_changes(svn_fs_fs__stats_t *stats,
232                            apr_pool_t *pool)
233{
234  apr_array_header_t *data = get_by_extensions(stats, compare_count, pool);
235  apr_int64_t sum = 0;
236  int i;
237
238  for (i = 0; i < data->nelts; ++i)
239    {
240      svn_fs_fs__extension_info_t *info
241        = APR_ARRAY_IDX(data, i, svn_fs_fs__extension_info_t *);
242
243      /* If there are elements, then their count cannot be 0. */
244      assert(stats->file_histogram.total.count);
245
246      sum += info->node_histogram.total.count;
247      printf(_("%11s %20s (%2d%%) representations\n"),
248             info->extension,
249             svn__ui64toa_sep(info->node_histogram.total.count, ',', pool),
250             (int)(info->node_histogram.total.count * 100 /
251                   stats->file_histogram.total.count));
252    }
253
254  if (stats->file_histogram.total.count)
255    {
256      printf(_("%11s %20s (%2d%%) representations\n"),
257             "(others)",
258             svn__ui64toa_sep(stats->file_histogram.total.count - sum, ',',
259                              pool),
260             (int)((stats->file_histogram.total.count - sum) * 100 /
261                   stats->file_histogram.total.count));
262    }
263}
264
265/* Calculate a percentage, handling edge cases. */
266static int
267get_percentage(apr_uint64_t part,
268               apr_uint64_t total)
269{
270  /* This include total == 0. */
271  if (part >= total)
272    return 100;
273
274  /* Standard case. */
275  return (int)(part * 100.0 / total);
276}
277
278/* Print the (up to) 16 extensions in STATS with the largest total size of
279 * changed file content.  Use POOL for allocations.
280 */
281static void
282print_extensions_by_nodes(svn_fs_fs__stats_t *stats,
283                          apr_pool_t *pool)
284{
285  apr_array_header_t *data = get_by_extensions(stats, compare_node_size, pool);
286  apr_int64_t sum = 0;
287  int i;
288
289  for (i = 0; i < data->nelts; ++i)
290    {
291      svn_fs_fs__extension_info_t *info
292        = APR_ARRAY_IDX(data, i, svn_fs_fs__extension_info_t *);
293      sum += info->node_histogram.total.sum;
294      printf(_("%11s %20s (%2d%%) bytes\n"),
295             info->extension,
296             svn__ui64toa_sep(info->node_histogram.total.sum, ',', pool),
297             get_percentage(info->node_histogram.total.sum,
298                            stats->file_histogram.total.sum));
299    }
300
301  if (stats->file_histogram.total.sum > sum)
302    {
303      /* Total sum can't be zero here. */
304      printf(_("%11s %20s (%2d%%) bytes\n"),
305             "(others)",
306             svn__ui64toa_sep(stats->file_histogram.total.sum - sum, ',',
307                              pool),
308             get_percentage(stats->file_histogram.total.sum - sum,
309                            stats->file_histogram.total.sum));
310    }
311}
312
313/* Print the (up to) 16 extensions in STATS with the largest total size of
314 * changed file content.  Use POOL for allocations.
315 */
316static void
317print_extensions_by_reps(svn_fs_fs__stats_t *stats,
318                         apr_pool_t *pool)
319{
320  apr_array_header_t *data = get_by_extensions(stats, compare_rep_size, pool);
321  apr_int64_t sum = 0;
322  int i;
323
324  for (i = 0; i < data->nelts; ++i)
325    {
326      svn_fs_fs__extension_info_t *info
327        = APR_ARRAY_IDX(data, i, svn_fs_fs__extension_info_t *);
328      sum += info->rep_histogram.total.sum;
329      printf(_("%11s %20s (%2d%%) bytes\n"),
330             info->extension,
331             svn__ui64toa_sep(info->rep_histogram.total.sum, ',', pool),
332             get_percentage(info->rep_histogram.total.sum,
333                            stats->rep_size_histogram.total.sum));
334    }
335
336  if (stats->rep_size_histogram.total.sum > sum)
337    {
338      /* Total sum can't be zero here. */
339      printf(_("%11s %20s (%2d%%) bytes\n"),
340             "(others)",
341             svn__ui64toa_sep(stats->rep_size_histogram.total.sum - sum, ',',
342                              pool),
343             get_percentage(stats->rep_size_histogram.total.sum - sum,
344                            stats->rep_size_histogram.total.sum));
345    }
346}
347
348/* Print per-extension histograms for the most frequent extensions in STATS.
349 * Use POOL for allocations. */
350static void
351print_histograms_by_extension(svn_fs_fs__stats_t *stats,
352                              apr_pool_t *pool)
353{
354  apr_array_header_t *data = get_by_extensions(stats, compare_count, pool);
355  int i;
356
357  merge_by_extension(data, get_by_extensions(stats, compare_node_size, pool));
358  merge_by_extension(data, get_by_extensions(stats, compare_rep_size, pool));
359
360  for (i = 0; i < data->nelts; ++i)
361    {
362      svn_fs_fs__extension_info_t *info
363        = APR_ARRAY_IDX(data, i, svn_fs_fs__extension_info_t *);
364      printf("\nHistogram of '%s' file sizes:\n", info->extension);
365      print_histogram(&info->node_histogram, pool);
366      printf("\nHistogram of '%s' file representation sizes:\n",
367             info->extension);
368      print_histogram(&info->rep_histogram, pool);
369    }
370}
371
372/* Print the contents of STATS to the console.
373 * Use POOL for allocations.
374 */
375static void
376print_stats(svn_fs_fs__stats_t *stats,
377            apr_pool_t *pool)
378{
379  /* print results */
380  printf("\n\nGlobal statistics:\n");
381  printf(_("%20s bytes in %12s revisions\n"
382           "%20s bytes in %12s changes\n"
383           "%20s bytes in %12s node revision records\n"
384           "%20s bytes in %12s representations\n"
385           "%20s bytes expanded representation size\n"
386           "%20s bytes with rep-sharing off\n"),
387         svn__ui64toa_sep(stats->total_size, ',', pool),
388         svn__ui64toa_sep(stats->revision_count, ',', pool),
389         svn__ui64toa_sep(stats->change_len, ',', pool),
390         svn__ui64toa_sep(stats->change_count, ',', pool),
391         svn__ui64toa_sep(stats->total_node_stats.size, ',', pool),
392         svn__ui64toa_sep(stats->total_node_stats.count, ',', pool),
393         svn__ui64toa_sep(stats->total_rep_stats.total.packed_size, ',',
394                         pool),
395         svn__ui64toa_sep(stats->total_rep_stats.total.count, ',', pool),
396         svn__ui64toa_sep(stats->total_rep_stats.total.expanded_size, ',',
397                         pool),
398         svn__ui64toa_sep(stats->total_rep_stats.expanded_size, ',', pool));
399
400  printf("\nNoderev statistics:\n");
401  printf(_("%20s bytes in %12s nodes total\n"
402           "%20s bytes in %12s directory noderevs\n"
403           "%20s bytes in %12s file noderevs\n"),
404         svn__ui64toa_sep(stats->total_node_stats.size, ',', pool),
405         svn__ui64toa_sep(stats->total_node_stats.count, ',', pool),
406         svn__ui64toa_sep(stats->dir_node_stats.size, ',', pool),
407         svn__ui64toa_sep(stats->dir_node_stats.count, ',', pool),
408         svn__ui64toa_sep(stats->file_node_stats.size, ',', pool),
409         svn__ui64toa_sep(stats->file_node_stats.count, ',', pool));
410
411  printf("\nRepresentation statistics:\n");
412  printf(_("%20s bytes in %12s representations total\n"
413           "%20s bytes in %12s directory representations\n"
414           "%20s bytes in %12s file representations\n"
415           "%20s bytes in %12s representations of added file nodes\n"
416           "%20s bytes in %12s directory property representations\n"
417           "%20s bytes in %12s file property representations\n"
418           "                         with %12.3f average delta chain length\n"
419           "%20s bytes in header & footer overhead\n"),
420         svn__ui64toa_sep(stats->total_rep_stats.total.packed_size, ',',
421                         pool),
422         svn__ui64toa_sep(stats->total_rep_stats.total.count, ',', pool),
423         svn__ui64toa_sep(stats->dir_rep_stats.total.packed_size, ',',
424                         pool),
425         svn__ui64toa_sep(stats->dir_rep_stats.total.count, ',', pool),
426         svn__ui64toa_sep(stats->file_rep_stats.total.packed_size, ',',
427                         pool),
428         svn__ui64toa_sep(stats->file_rep_stats.total.count, ',', pool),
429         svn__ui64toa_sep(stats->added_rep_size_histogram.total.sum, ',',
430                         pool),
431         svn__ui64toa_sep(stats->added_rep_size_histogram.total.count, ',',
432                         pool),
433         svn__ui64toa_sep(stats->dir_prop_rep_stats.total.packed_size, ',',
434                         pool),
435         svn__ui64toa_sep(stats->dir_prop_rep_stats.total.count, ',', pool),
436         svn__ui64toa_sep(stats->file_prop_rep_stats.total.packed_size, ',',
437                         pool),
438         svn__ui64toa_sep(stats->file_prop_rep_stats.total.count, ',', pool),
439         stats->total_rep_stats.chain_len
440            / (double)stats->total_rep_stats.total.count,
441         svn__ui64toa_sep(stats->total_rep_stats.total.overhead_size, ',',
442                         pool));
443
444  printf("\nDirectory representation statistics:\n");
445  print_rep_stats(&stats->dir_rep_stats, pool);
446  printf("\nFile representation statistics:\n");
447  print_rep_stats(&stats->file_rep_stats, pool);
448  printf("\nDirectory property representation statistics:\n");
449  print_rep_stats(&stats->dir_prop_rep_stats, pool);
450  printf("\nFile property representation statistics:\n");
451  print_rep_stats(&stats->file_prop_rep_stats, pool);
452
453  printf("\nLargest representations:\n");
454  print_largest_reps(stats->largest_changes, pool);
455  printf("\nExtensions by number of representations:\n");
456  print_extensions_by_changes(stats, pool);
457  printf("\nExtensions by size of changed files:\n");
458  print_extensions_by_nodes(stats, pool);
459  printf("\nExtensions by size of representations:\n");
460  print_extensions_by_reps(stats, pool);
461
462  printf("\nHistogram of expanded node sizes:\n");
463  print_histogram(&stats->node_size_histogram, pool);
464  printf("\nHistogram of representation sizes:\n");
465  print_histogram(&stats->rep_size_histogram, pool);
466  printf("\nHistogram of file sizes:\n");
467  print_histogram(&stats->file_histogram, pool);
468  printf("\nHistogram of file representation sizes:\n");
469  print_histogram(&stats->file_rep_histogram, pool);
470  printf("\nHistogram of file property sizes:\n");
471  print_histogram(&stats->file_prop_histogram, pool);
472  printf("\nHistogram of file property representation sizes:\n");
473  print_histogram(&stats->file_prop_rep_histogram, pool);
474  printf("\nHistogram of directory sizes:\n");
475  print_histogram(&stats->dir_histogram, pool);
476  printf("\nHistogram of directory representation sizes:\n");
477  print_histogram(&stats->dir_rep_histogram, pool);
478  printf("\nHistogram of directory property sizes:\n");
479  print_histogram(&stats->dir_prop_histogram, pool);
480  printf("\nHistogram of directory property representation sizes:\n");
481  print_histogram(&stats->dir_prop_rep_histogram, pool);
482
483  print_histograms_by_extension(stats, pool);
484}
485
486/* Our progress function simply prints the REVISION number and makes it
487 * appear immediately.
488 */
489static void
490print_progress(svn_revnum_t revision,
491               void *baton,
492               apr_pool_t *pool)
493{
494  printf("%8ld", revision);
495  fflush(stdout);
496}
497
498/* This implements `svn_opt_subcommand_t'. */
499svn_error_t *
500subcommand__stats(apr_getopt_t *os, void *baton, apr_pool_t *pool)
501{
502  svnfsfs__opt_state *opt_state = baton;
503  svn_fs_t *fs;
504  svn_fs_fs__ioctl_get_stats_input_t input = {0};
505  svn_fs_fs__ioctl_get_stats_output_t *output;
506
507  printf("Reading revisions\n");
508  SVN_ERR(open_fs(&fs, opt_state->repository_path, pool));
509
510  input.progress_func = print_progress;
511  SVN_ERR(svn_fs_ioctl(fs, SVN_FS_FS__IOCTL_GET_STATS, &input, (void **)&output,
512                       check_cancel, NULL, pool, pool));
513  print_stats(output->stats, pool);
514
515  return SVN_NO_ERROR;
516}
517