1/* stats-cmd.c -- implements the size stats sub-command. 2 * 3 * ==================================================================== 4 * Licensed to the Apache Software Foundation (ASF) under one 5 * or more contributor license agreements. See the NOTICE file 6 * distributed with this work for additional information 7 * regarding copyright ownership. The ASF licenses this file 8 * to you under the Apache License, Version 2.0 (the 9 * "License"); you may not use this file except in compliance 10 * with the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, 15 * software distributed under the License is distributed on an 16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 * KIND, either express or implied. See the License for the 18 * specific language governing permissions and limitations 19 * under the License. 20 * ==================================================================== 21 */ 22 23#include <assert.h> 24 25#include "svn_fs.h" 26#include "svn_pools.h" 27#include "svn_sorts.h" 28 29#include "private/svn_sorts_private.h" 30#include "private/svn_string_private.h" 31#include "private/svn_fs_fs_private.h" 32 33#include "svn_private_config.h" 34#include "svnfsfs.h" 35 36/* Return the string, allocated in RESULT_POOL, describing the value 2**I. 37 */ 38static const char * 39print_two_power(int i, 40 apr_pool_t *result_pool) 41{ 42 /* These are the SI prefixes for base-1000, the binary ones with base-1024 43 are too clumsy and require appending B for "byte" to be intelligible, 44 e.g. "MiB". 45 46 Therefore, we ignore the official standard and revert to the traditional 47 contextual use were the base-1000 prefixes are understood as base-1024 48 when it came to data sizes. 49 */ 50 const char *si_prefixes = " kMGTPEZY"; 51 52 int number = (i >= 0) ? (1 << (i % 10)) : 0; 53 int thousands = (i >= 0) ? (i / 10) : 0; 54 55 char si_prefix = (thousands < strlen(si_prefixes)) 56 ? si_prefixes[thousands] 57 : '?'; 58 59 if (si_prefix == ' ') 60 return apr_psprintf(result_pool, "%d", number); 61 62 return apr_psprintf(result_pool, "%d%c", number, si_prefix); 63} 64 65/* Print statistics for the given group of representations to console. 66 * Use POOL for allocations. 67 */ 68static void 69print_rep_stats(svn_fs_fs__representation_stats_t *stats, 70 apr_pool_t *pool) 71{ 72 printf(_("%20s bytes in %12s reps\n" 73 "%20s bytes in %12s shared reps\n" 74 "%20s bytes expanded size\n" 75 "%20s bytes expanded shared size\n" 76 "%20s bytes with rep-sharing off\n" 77 "%20s shared references\n" 78 "%20.3f average delta chain length\n"), 79 svn__ui64toa_sep(stats->total.packed_size, ',', pool), 80 svn__ui64toa_sep(stats->total.count, ',', pool), 81 svn__ui64toa_sep(stats->shared.packed_size, ',', pool), 82 svn__ui64toa_sep(stats->shared.count, ',', pool), 83 svn__ui64toa_sep(stats->total.expanded_size, ',', pool), 84 svn__ui64toa_sep(stats->shared.expanded_size, ',', pool), 85 svn__ui64toa_sep(stats->expanded_size, ',', pool), 86 svn__ui64toa_sep(stats->references - stats->total.count, ',', pool), 87 stats->chain_len / MAX(1.0, (double)stats->total.count)); 88} 89 90/* Print the (used) contents of CHANGES. Use POOL for allocations. 91 */ 92static void 93print_largest_reps(svn_fs_fs__largest_changes_t *changes, 94 apr_pool_t *pool) 95{ 96 apr_size_t i; 97 for (i = 0; i < changes->count && changes->changes[i]->size; ++i) 98 printf(_("%12s r%-8ld %s\n"), 99 svn__ui64toa_sep(changes->changes[i]->size, ',', pool), 100 changes->changes[i]->revision, 101 changes->changes[i]->path->data); 102} 103 104/* Print the non-zero section of HISTOGRAM to console. 105 * Use POOL for allocations. 106 */ 107static void 108print_histogram(svn_fs_fs__histogram_t *histogram, 109 apr_pool_t *pool) 110{ 111 int first = 0; 112 int last = 63; 113 int i; 114 115 /* identify non-zero range */ 116 while (last > 0 && histogram->lines[last].count == 0) 117 --last; 118 119 while (first <= last && histogram->lines[first].count == 0) 120 ++first; 121 122 /* display histogram lines */ 123 for (i = last; i >= first; --i) 124 printf(_(" %4s .. < %-4s %19s (%2d%%) bytes in %12s (%2d%%) items\n"), 125 print_two_power(i-1, pool), print_two_power(i, pool), 126 svn__ui64toa_sep(histogram->lines[i].sum, ',', pool), 127 (int)(histogram->lines[i].sum * 100 / histogram->total.sum), 128 svn__ui64toa_sep(histogram->lines[i].count, ',', pool), 129 (int)(histogram->lines[i].count * 100 / histogram->total.count)); 130} 131 132/* COMPARISON_FUNC for svn_sort__hash. 133 * Sort extension_info_t values by total count in descending order. 134 */ 135static int 136compare_count(const svn_sort__item_t *a, 137 const svn_sort__item_t *b) 138{ 139 const svn_fs_fs__extension_info_t *lhs = a->value; 140 const svn_fs_fs__extension_info_t *rhs = b->value; 141 apr_int64_t diff = lhs->node_histogram.total.count 142 - rhs->node_histogram.total.count; 143 144 return diff > 0 ? -1 : (diff < 0 ? 1 : 0); 145} 146 147/* COMPARISON_FUNC for svn_sort__hash. 148 * Sort extension_info_t values by total uncompressed size in descending order. 149 */ 150static int 151compare_node_size(const svn_sort__item_t *a, 152 const svn_sort__item_t *b) 153{ 154 const svn_fs_fs__extension_info_t *lhs = a->value; 155 const svn_fs_fs__extension_info_t *rhs = b->value; 156 apr_int64_t diff = lhs->node_histogram.total.sum 157 - rhs->node_histogram.total.sum; 158 159 return diff > 0 ? -1 : (diff < 0 ? 1 : 0); 160} 161 162/* COMPARISON_FUNC for svn_sort__hash. 163 * Sort extension_info_t values by total prep count in descending order. 164 */ 165static int 166compare_rep_size(const svn_sort__item_t *a, 167 const svn_sort__item_t *b) 168{ 169 const svn_fs_fs__extension_info_t *lhs = a->value; 170 const svn_fs_fs__extension_info_t *rhs = b->value; 171 apr_int64_t diff = lhs->rep_histogram.total.sum 172 - rhs->rep_histogram.total.sum; 173 174 return diff > 0 ? -1 : (diff < 0 ? 1 : 0); 175} 176 177/* Return an array of extension_info_t* for the (up to) 16 most prominent 178 * extensions in STATS according to the sort criterion COMPARISON_FUNC. 179 * Allocate results in POOL. 180 */ 181static apr_array_header_t * 182get_by_extensions(svn_fs_fs__stats_t *stats, 183 int (*comparison_func)(const svn_sort__item_t *, 184 const svn_sort__item_t *), 185 apr_pool_t *pool) 186{ 187 /* sort all data by extension */ 188 apr_array_header_t *sorted 189 = svn_sort__hash(stats->by_extension, comparison_func, pool); 190 191 /* select the top (first) 16 entries */ 192 int count = MIN(sorted->nelts, 16); 193 apr_array_header_t *result 194 = apr_array_make(pool, count, sizeof(svn_fs_fs__extension_info_t*)); 195 int i; 196 197 for (i = 0; i < count; ++i) 198 APR_ARRAY_PUSH(result, svn_fs_fs__extension_info_t*) 199 = APR_ARRAY_IDX(sorted, i, svn_sort__item_t).value; 200 201 return result; 202} 203 204/* Add all extension_info_t* entries of TO_ADD not already in TARGET to 205 * TARGET. 206 */ 207static void 208merge_by_extension(apr_array_header_t *target, 209 apr_array_header_t *to_add) 210{ 211 int i, k, count; 212 213 count = target->nelts; 214 for (i = 0; i < to_add->nelts; ++i) 215 { 216 svn_fs_fs__extension_info_t *info 217 = APR_ARRAY_IDX(to_add, i, svn_fs_fs__extension_info_t *); 218 for (k = 0; k < count; ++k) 219 if (info == APR_ARRAY_IDX(target, k, svn_fs_fs__extension_info_t *)) 220 break; 221 222 if (k == count) 223 APR_ARRAY_PUSH(target, svn_fs_fs__extension_info_t*) = info; 224 } 225} 226 227/* Print the (up to) 16 extensions in STATS with the most changes. 228 * Use POOL for allocations. 229 */ 230static void 231print_extensions_by_changes(svn_fs_fs__stats_t *stats, 232 apr_pool_t *pool) 233{ 234 apr_array_header_t *data = get_by_extensions(stats, compare_count, pool); 235 apr_int64_t sum = 0; 236 int i; 237 238 for (i = 0; i < data->nelts; ++i) 239 { 240 svn_fs_fs__extension_info_t *info 241 = APR_ARRAY_IDX(data, i, svn_fs_fs__extension_info_t *); 242 243 /* If there are elements, then their count cannot be 0. */ 244 assert(stats->file_histogram.total.count); 245 246 sum += info->node_histogram.total.count; 247 printf(_("%11s %20s (%2d%%) representations\n"), 248 info->extension, 249 svn__ui64toa_sep(info->node_histogram.total.count, ',', pool), 250 (int)(info->node_histogram.total.count * 100 / 251 stats->file_histogram.total.count)); 252 } 253 254 if (stats->file_histogram.total.count) 255 { 256 printf(_("%11s %20s (%2d%%) representations\n"), 257 "(others)", 258 svn__ui64toa_sep(stats->file_histogram.total.count - sum, ',', 259 pool), 260 (int)((stats->file_histogram.total.count - sum) * 100 / 261 stats->file_histogram.total.count)); 262 } 263} 264 265/* Calculate a percentage, handling edge cases. */ 266static int 267get_percentage(apr_uint64_t part, 268 apr_uint64_t total) 269{ 270 /* This include total == 0. */ 271 if (part >= total) 272 return 100; 273 274 /* Standard case. */ 275 return (int)(part * 100.0 / total); 276} 277 278/* Print the (up to) 16 extensions in STATS with the largest total size of 279 * changed file content. Use POOL for allocations. 280 */ 281static void 282print_extensions_by_nodes(svn_fs_fs__stats_t *stats, 283 apr_pool_t *pool) 284{ 285 apr_array_header_t *data = get_by_extensions(stats, compare_node_size, pool); 286 apr_int64_t sum = 0; 287 int i; 288 289 for (i = 0; i < data->nelts; ++i) 290 { 291 svn_fs_fs__extension_info_t *info 292 = APR_ARRAY_IDX(data, i, svn_fs_fs__extension_info_t *); 293 sum += info->node_histogram.total.sum; 294 printf(_("%11s %20s (%2d%%) bytes\n"), 295 info->extension, 296 svn__ui64toa_sep(info->node_histogram.total.sum, ',', pool), 297 get_percentage(info->node_histogram.total.sum, 298 stats->file_histogram.total.sum)); 299 } 300 301 if (stats->file_histogram.total.sum > sum) 302 { 303 /* Total sum can't be zero here. */ 304 printf(_("%11s %20s (%2d%%) bytes\n"), 305 "(others)", 306 svn__ui64toa_sep(stats->file_histogram.total.sum - sum, ',', 307 pool), 308 get_percentage(stats->file_histogram.total.sum - sum, 309 stats->file_histogram.total.sum)); 310 } 311} 312 313/* Print the (up to) 16 extensions in STATS with the largest total size of 314 * changed file content. Use POOL for allocations. 315 */ 316static void 317print_extensions_by_reps(svn_fs_fs__stats_t *stats, 318 apr_pool_t *pool) 319{ 320 apr_array_header_t *data = get_by_extensions(stats, compare_rep_size, pool); 321 apr_int64_t sum = 0; 322 int i; 323 324 for (i = 0; i < data->nelts; ++i) 325 { 326 svn_fs_fs__extension_info_t *info 327 = APR_ARRAY_IDX(data, i, svn_fs_fs__extension_info_t *); 328 sum += info->rep_histogram.total.sum; 329 printf(_("%11s %20s (%2d%%) bytes\n"), 330 info->extension, 331 svn__ui64toa_sep(info->rep_histogram.total.sum, ',', pool), 332 get_percentage(info->rep_histogram.total.sum, 333 stats->rep_size_histogram.total.sum)); 334 } 335 336 if (stats->rep_size_histogram.total.sum > sum) 337 { 338 /* Total sum can't be zero here. */ 339 printf(_("%11s %20s (%2d%%) bytes\n"), 340 "(others)", 341 svn__ui64toa_sep(stats->rep_size_histogram.total.sum - sum, ',', 342 pool), 343 get_percentage(stats->rep_size_histogram.total.sum - sum, 344 stats->rep_size_histogram.total.sum)); 345 } 346} 347 348/* Print per-extension histograms for the most frequent extensions in STATS. 349 * Use POOL for allocations. */ 350static void 351print_histograms_by_extension(svn_fs_fs__stats_t *stats, 352 apr_pool_t *pool) 353{ 354 apr_array_header_t *data = get_by_extensions(stats, compare_count, pool); 355 int i; 356 357 merge_by_extension(data, get_by_extensions(stats, compare_node_size, pool)); 358 merge_by_extension(data, get_by_extensions(stats, compare_rep_size, pool)); 359 360 for (i = 0; i < data->nelts; ++i) 361 { 362 svn_fs_fs__extension_info_t *info 363 = APR_ARRAY_IDX(data, i, svn_fs_fs__extension_info_t *); 364 printf("\nHistogram of '%s' file sizes:\n", info->extension); 365 print_histogram(&info->node_histogram, pool); 366 printf("\nHistogram of '%s' file representation sizes:\n", 367 info->extension); 368 print_histogram(&info->rep_histogram, pool); 369 } 370} 371 372/* Print the contents of STATS to the console. 373 * Use POOL for allocations. 374 */ 375static void 376print_stats(svn_fs_fs__stats_t *stats, 377 apr_pool_t *pool) 378{ 379 /* print results */ 380 printf("\n\nGlobal statistics:\n"); 381 printf(_("%20s bytes in %12s revisions\n" 382 "%20s bytes in %12s changes\n" 383 "%20s bytes in %12s node revision records\n" 384 "%20s bytes in %12s representations\n" 385 "%20s bytes expanded representation size\n" 386 "%20s bytes with rep-sharing off\n"), 387 svn__ui64toa_sep(stats->total_size, ',', pool), 388 svn__ui64toa_sep(stats->revision_count, ',', pool), 389 svn__ui64toa_sep(stats->change_len, ',', pool), 390 svn__ui64toa_sep(stats->change_count, ',', pool), 391 svn__ui64toa_sep(stats->total_node_stats.size, ',', pool), 392 svn__ui64toa_sep(stats->total_node_stats.count, ',', pool), 393 svn__ui64toa_sep(stats->total_rep_stats.total.packed_size, ',', 394 pool), 395 svn__ui64toa_sep(stats->total_rep_stats.total.count, ',', pool), 396 svn__ui64toa_sep(stats->total_rep_stats.total.expanded_size, ',', 397 pool), 398 svn__ui64toa_sep(stats->total_rep_stats.expanded_size, ',', pool)); 399 400 printf("\nNoderev statistics:\n"); 401 printf(_("%20s bytes in %12s nodes total\n" 402 "%20s bytes in %12s directory noderevs\n" 403 "%20s bytes in %12s file noderevs\n"), 404 svn__ui64toa_sep(stats->total_node_stats.size, ',', pool), 405 svn__ui64toa_sep(stats->total_node_stats.count, ',', pool), 406 svn__ui64toa_sep(stats->dir_node_stats.size, ',', pool), 407 svn__ui64toa_sep(stats->dir_node_stats.count, ',', pool), 408 svn__ui64toa_sep(stats->file_node_stats.size, ',', pool), 409 svn__ui64toa_sep(stats->file_node_stats.count, ',', pool)); 410 411 printf("\nRepresentation statistics:\n"); 412 printf(_("%20s bytes in %12s representations total\n" 413 "%20s bytes in %12s directory representations\n" 414 "%20s bytes in %12s file representations\n" 415 "%20s bytes in %12s representations of added file nodes\n" 416 "%20s bytes in %12s directory property representations\n" 417 "%20s bytes in %12s file property representations\n" 418 " with %12.3f average delta chain length\n" 419 "%20s bytes in header & footer overhead\n"), 420 svn__ui64toa_sep(stats->total_rep_stats.total.packed_size, ',', 421 pool), 422 svn__ui64toa_sep(stats->total_rep_stats.total.count, ',', pool), 423 svn__ui64toa_sep(stats->dir_rep_stats.total.packed_size, ',', 424 pool), 425 svn__ui64toa_sep(stats->dir_rep_stats.total.count, ',', pool), 426 svn__ui64toa_sep(stats->file_rep_stats.total.packed_size, ',', 427 pool), 428 svn__ui64toa_sep(stats->file_rep_stats.total.count, ',', pool), 429 svn__ui64toa_sep(stats->added_rep_size_histogram.total.sum, ',', 430 pool), 431 svn__ui64toa_sep(stats->added_rep_size_histogram.total.count, ',', 432 pool), 433 svn__ui64toa_sep(stats->dir_prop_rep_stats.total.packed_size, ',', 434 pool), 435 svn__ui64toa_sep(stats->dir_prop_rep_stats.total.count, ',', pool), 436 svn__ui64toa_sep(stats->file_prop_rep_stats.total.packed_size, ',', 437 pool), 438 svn__ui64toa_sep(stats->file_prop_rep_stats.total.count, ',', pool), 439 stats->total_rep_stats.chain_len 440 / (double)stats->total_rep_stats.total.count, 441 svn__ui64toa_sep(stats->total_rep_stats.total.overhead_size, ',', 442 pool)); 443 444 printf("\nDirectory representation statistics:\n"); 445 print_rep_stats(&stats->dir_rep_stats, pool); 446 printf("\nFile representation statistics:\n"); 447 print_rep_stats(&stats->file_rep_stats, pool); 448 printf("\nDirectory property representation statistics:\n"); 449 print_rep_stats(&stats->dir_prop_rep_stats, pool); 450 printf("\nFile property representation statistics:\n"); 451 print_rep_stats(&stats->file_prop_rep_stats, pool); 452 453 printf("\nLargest representations:\n"); 454 print_largest_reps(stats->largest_changes, pool); 455 printf("\nExtensions by number of representations:\n"); 456 print_extensions_by_changes(stats, pool); 457 printf("\nExtensions by size of changed files:\n"); 458 print_extensions_by_nodes(stats, pool); 459 printf("\nExtensions by size of representations:\n"); 460 print_extensions_by_reps(stats, pool); 461 462 printf("\nHistogram of expanded node sizes:\n"); 463 print_histogram(&stats->node_size_histogram, pool); 464 printf("\nHistogram of representation sizes:\n"); 465 print_histogram(&stats->rep_size_histogram, pool); 466 printf("\nHistogram of file sizes:\n"); 467 print_histogram(&stats->file_histogram, pool); 468 printf("\nHistogram of file representation sizes:\n"); 469 print_histogram(&stats->file_rep_histogram, pool); 470 printf("\nHistogram of file property sizes:\n"); 471 print_histogram(&stats->file_prop_histogram, pool); 472 printf("\nHistogram of file property representation sizes:\n"); 473 print_histogram(&stats->file_prop_rep_histogram, pool); 474 printf("\nHistogram of directory sizes:\n"); 475 print_histogram(&stats->dir_histogram, pool); 476 printf("\nHistogram of directory representation sizes:\n"); 477 print_histogram(&stats->dir_rep_histogram, pool); 478 printf("\nHistogram of directory property sizes:\n"); 479 print_histogram(&stats->dir_prop_histogram, pool); 480 printf("\nHistogram of directory property representation sizes:\n"); 481 print_histogram(&stats->dir_prop_rep_histogram, pool); 482 483 print_histograms_by_extension(stats, pool); 484} 485 486/* Our progress function simply prints the REVISION number and makes it 487 * appear immediately. 488 */ 489static void 490print_progress(svn_revnum_t revision, 491 void *baton, 492 apr_pool_t *pool) 493{ 494 printf("%8ld", revision); 495 fflush(stdout); 496} 497 498/* This implements `svn_opt_subcommand_t'. */ 499svn_error_t * 500subcommand__stats(apr_getopt_t *os, void *baton, apr_pool_t *pool) 501{ 502 svnfsfs__opt_state *opt_state = baton; 503 svn_fs_t *fs; 504 svn_fs_fs__ioctl_get_stats_input_t input = {0}; 505 svn_fs_fs__ioctl_get_stats_output_t *output; 506 507 printf("Reading revisions\n"); 508 SVN_ERR(open_fs(&fs, opt_state->repository_path, pool)); 509 510 input.progress_func = print_progress; 511 SVN_ERR(svn_fs_ioctl(fs, SVN_FS_FS__IOCTL_GET_STATS, &input, (void **)&output, 512 check_cancel, NULL, pool, pool)); 513 print_stats(output->stats, pool); 514 515 return SVN_NO_ERROR; 516} 517