1/* pack.c --- FSFS shard packing functionality 2 * 3 * ==================================================================== 4 * Licensed to the Apache Software Foundation (ASF) under one 5 * or more contributor license agreements. See the NOTICE file 6 * distributed with this work for additional information 7 * regarding copyright ownership. The ASF licenses this file 8 * to you under the Apache License, Version 2.0 (the 9 * "License"); you may not use this file except in compliance 10 * with the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, 15 * software distributed under the License is distributed on an 16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 * KIND, either express or implied. See the License for the 18 * specific language governing permissions and limitations 19 * under the License. 20 * ==================================================================== 21 */ 22#include <assert.h> 23#include <string.h> 24 25#include "svn_pools.h" 26#include "svn_dirent_uri.h" 27#include "svn_sorts.h" 28#include "private/svn_temp_serializer.h" 29#include "private/svn_sorts_private.h" 30#include "private/svn_subr_private.h" 31#include "private/svn_string_private.h" 32#include "private/svn_io_private.h" 33 34#include "fs_fs.h" 35#include "pack.h" 36#include "util.h" 37#include "id.h" 38#include "index.h" 39#include "low_level.h" 40#include "revprops.h" 41#include "transaction.h" 42 43#include "../libsvn_fs/fs-loader.h" 44 45#include "svn_private_config.h" 46#include "temp_serializer.h" 47 48/* Logical addressing packing logic: 49 * 50 * We pack files on a pack file basis (e.g. 1000 revs) without changing 51 * existing pack files nor the revision files outside the range to pack. 52 * 53 * First, we will scan the revision file indexes to determine the number 54 * of items to "place" (i.e. determine their optimal position within the 55 * future pack file). For each item, we will need a constant amount of 56 * memory to track it. A MAX_MEM parameter sets a limit to the number of 57 * items we may place in one go. That means, we may not be able to add 58 * all revisions at once. Instead, we will run the placement for a subset 59 * of revisions at a time. The very unlikely worst case will simply append 60 * all revision data with just a little reshuffling inside each revision. 61 * 62 * In a second step, we read all revisions in the selected range, build 63 * the item tracking information and copy the items themselves from the 64 * revision files to temporary files. The latter serve as buckets for a 65 * very coarse bucket presort: Separate change lists, file properties, 66 * directory properties and noderevs + representations from one another. 67 * 68 * The third step will determine an optimized placement for the items in 69 * each of the 4 buckets separately. The first three will simply order 70 * their items by revision, starting with the newest once. Placing rep 71 * and noderev items is a more elaborate process documented in the code. 72 * 73 * In short, we store items in the following order: 74 * - changed paths lists 75 * - node property 76 * - directory properties 77 * - directory representations corresponding noderevs, lexical path order 78 * with special treatment of "trunk" and "branches" 79 * - same for file representations 80 * 81 * Step 4 copies the items from the temporary buckets into the final 82 * pack file and writes the temporary index files. 83 * 84 * Finally, after the last range of revisions, create the final indexes. 85 */ 86 87/* Maximum amount of memory we allocate for placement information during 88 * the pack process. 89 */ 90#define DEFAULT_MAX_MEM (64 * 1024 * 1024) 91 92/* Data structure describing a node change at PATH, REVISION. 93 * We will sort these instances by PATH and NODE_ID such that we can combine 94 * similar nodes in the same reps container and store containers in path 95 * major order. 96 */ 97typedef struct path_order_t 98{ 99 /* changed path */ 100 svn_prefix_string__t *path; 101 102 /* node ID for this PATH in REVISION */ 103 svn_fs_fs__id_part_t node_id; 104 105 /* when this change happened */ 106 svn_revnum_t revision; 107 108 /* noderev predecessor count */ 109 int predecessor_count; 110 111 /* this is a directory node */ 112 svn_boolean_t is_dir; 113 114 /* length of the expanded representation content */ 115 apr_int64_t expanded_size; 116 117 /* item ID of the noderev linked to the change. May be (0, 0). */ 118 svn_fs_fs__id_part_t noderev_id; 119 120 /* item ID of the representation containing the new data. May be (0, 0). */ 121 svn_fs_fs__id_part_t rep_id; 122} path_order_t; 123 124/* Represents a reference from item FROM to item TO. FROM may be a noderev 125 * or rep_id while TO is (currently) always a representation. We will sort 126 * them by TO which allows us to collect all dependent items. 127 */ 128typedef struct reference_t 129{ 130 svn_fs_fs__id_part_t to; 131 svn_fs_fs__id_part_t from; 132} reference_t; 133 134/* This structure keeps track of all the temporary data and status that 135 * needs to be kept around during the creation of one pack file. After 136 * each revision range (in case we can't process all revs at once due to 137 * memory restrictions), parts of the data will get re-initialized. 138 */ 139typedef struct pack_context_t 140{ 141 /* file system that we operate on */ 142 svn_fs_t *fs; 143 144 /* cancel function to invoke at regular intervals. May be NULL */ 145 svn_cancel_func_t cancel_func; 146 147 /* baton to pass to CANCEL_FUNC */ 148 void *cancel_baton; 149 150 /* first revision in the shard (and future pack file) */ 151 svn_revnum_t shard_rev; 152 153 /* first revision in the range to process (>= SHARD_REV) */ 154 svn_revnum_t start_rev; 155 156 /* first revision after the range to process (<= SHARD_END_REV) */ 157 svn_revnum_t end_rev; 158 159 /* first revision after the current shard */ 160 svn_revnum_t shard_end_rev; 161 162 /* log-to-phys proto index for the whole pack file */ 163 apr_file_t *proto_l2p_index; 164 165 /* phys-to-log proto index for the whole pack file */ 166 apr_file_t *proto_p2l_index; 167 168 /* full shard directory path (containing the unpacked revisions) */ 169 const char *shard_dir; 170 171 /* full packed shard directory path (containing the pack file + indexes) */ 172 const char *pack_file_dir; 173 174 /* full pack file path (including PACK_FILE_DIR) */ 175 const char *pack_file_path; 176 177 /* current write position (i.e. file length) in the pack file */ 178 apr_off_t pack_offset; 179 180 /* the pack file to ultimately write all data to */ 181 apr_file_t *pack_file; 182 183 /* array of svn_fs_fs__p2l_entry_t *, all referring to change lists. 184 * Will be filled in phase 2 and be cleared after each revision range. */ 185 apr_array_header_t *changes; 186 187 /* temp file receiving all change list items (referenced by CHANGES). 188 * Will be filled in phase 2 and be cleared after each revision range. */ 189 apr_file_t *changes_file; 190 191 /* array of svn_fs_fs__p2l_entry_t *, all referring to file properties. 192 * Will be filled in phase 2 and be cleared after each revision range. */ 193 apr_array_header_t *file_props; 194 195 /* temp file receiving all file prop items (referenced by FILE_PROPS). 196 * Will be filled in phase 2 and be cleared after each revision range.*/ 197 apr_file_t *file_props_file; 198 199 /* array of svn_fs_fs__p2l_entry_t *, all referring to directory properties. 200 * Will be filled in phase 2 and be cleared after each revision range. */ 201 apr_array_header_t *dir_props; 202 203 /* temp file receiving all directory prop items (referenced by DIR_PROPS). 204 * Will be filled in phase 2 and be cleared after each revision range.*/ 205 apr_file_t *dir_props_file; 206 207 /* container for all PATH members in PATH_ORDER. */ 208 svn_prefix_tree__t *paths; 209 210 /* array of path_order_t *. Will be filled in phase 2 and be cleared 211 * after each revision range. Sorted by PATH, NODE_ID. */ 212 apr_array_header_t *path_order; 213 214 /* array of reference_t* linking representations to their delta bases. 215 * Will be filled in phase 2 and be cleared after each revision range. 216 * It will be sorted by the FROM members (for rep->base rep lookup). */ 217 apr_array_header_t *references; 218 219 /* array of svn_fs_fs__p2l_entry_t*. Will be filled in phase 2 and be 220 * cleared after each revision range. During phase 3, we will set items 221 * to NULL that we already processed. */ 222 apr_array_header_t *reps; 223 224 /* array of int, marking for each revision, the which offset their items 225 * begin in REPS. Will be filled in phase 2 and be cleared after 226 * each revision range. */ 227 apr_array_header_t *rev_offsets; 228 229 /* temp file receiving all items referenced by REPS. 230 * Will be filled in phase 2 and be cleared after each revision range.*/ 231 apr_file_t *reps_file; 232 233 /* pool used for temporary data structures that will be cleaned up when 234 * the next range of revisions is being processed */ 235 apr_pool_t *info_pool; 236} pack_context_t; 237 238/* Create and initialize a new pack context for packing shard SHARD_REV in 239 * SHARD_DIR into PACK_FILE_DIR within filesystem FS. Allocate it in POOL 240 * and return the structure in *CONTEXT. 241 * 242 * Limit the number of items being copied per iteration to MAX_ITEMS. 243 * Set CANCEL_FUNC and CANCEL_BATON as well. 244 */ 245static svn_error_t * 246initialize_pack_context(pack_context_t *context, 247 svn_fs_t *fs, 248 const char *pack_file_dir, 249 const char *shard_dir, 250 svn_revnum_t shard_rev, 251 int max_items, 252 svn_cancel_func_t cancel_func, 253 void *cancel_baton, 254 apr_pool_t *pool) 255{ 256 fs_fs_data_t *ffd = fs->fsap_data; 257 const char *temp_dir; 258 int max_revs = MIN(ffd->max_files_per_dir, max_items); 259 260 SVN_ERR_ASSERT(ffd->format >= SVN_FS_FS__MIN_LOG_ADDRESSING_FORMAT); 261 SVN_ERR_ASSERT(shard_rev % ffd->max_files_per_dir == 0); 262 263 /* where we will place our various temp files */ 264 SVN_ERR(svn_io_temp_dir(&temp_dir, pool)); 265 266 /* store parameters */ 267 context->fs = fs; 268 context->cancel_func = cancel_func; 269 context->cancel_baton = cancel_baton; 270 271 context->shard_rev = shard_rev; 272 context->start_rev = shard_rev; 273 context->end_rev = shard_rev; 274 context->shard_end_rev = shard_rev + ffd->max_files_per_dir; 275 276 /* Create the new directory and pack file. */ 277 context->shard_dir = shard_dir; 278 context->pack_file_dir = pack_file_dir; 279 context->pack_file_path 280 = svn_dirent_join(pack_file_dir, PATH_PACKED, pool); 281 SVN_ERR(svn_io_file_open(&context->pack_file, context->pack_file_path, 282 APR_WRITE | APR_BUFFERED | APR_BINARY | APR_EXCL 283 | APR_CREATE, APR_OS_DEFAULT, pool)); 284 285 /* Proto index files */ 286 SVN_ERR(svn_fs_fs__l2p_proto_index_open( 287 &context->proto_l2p_index, 288 svn_dirent_join(pack_file_dir, 289 PATH_INDEX PATH_EXT_L2P_INDEX, 290 pool), 291 pool)); 292 SVN_ERR(svn_fs_fs__p2l_proto_index_open( 293 &context->proto_p2l_index, 294 svn_dirent_join(pack_file_dir, 295 PATH_INDEX PATH_EXT_P2L_INDEX, 296 pool), 297 pool)); 298 299 /* item buckets: one item info array and one temp file per bucket */ 300 context->changes = apr_array_make(pool, max_items, 301 sizeof(svn_fs_fs__p2l_entry_t *)); 302 SVN_ERR(svn_io_open_unique_file3(&context->changes_file, NULL, temp_dir, 303 svn_io_file_del_on_close, pool, pool)); 304 context->file_props = apr_array_make(pool, max_items, 305 sizeof(svn_fs_fs__p2l_entry_t *)); 306 SVN_ERR(svn_io_open_unique_file3(&context->file_props_file, NULL, temp_dir, 307 svn_io_file_del_on_close, pool, pool)); 308 context->dir_props = apr_array_make(pool, max_items, 309 sizeof(svn_fs_fs__p2l_entry_t *)); 310 SVN_ERR(svn_io_open_unique_file3(&context->dir_props_file, NULL, temp_dir, 311 svn_io_file_del_on_close, pool, pool)); 312 313 /* noderev and representation item bucket */ 314 context->rev_offsets = apr_array_make(pool, max_revs, sizeof(int)); 315 context->path_order = apr_array_make(pool, max_items, 316 sizeof(path_order_t *)); 317 context->references = apr_array_make(pool, max_items, 318 sizeof(reference_t *)); 319 context->reps = apr_array_make(pool, max_items, 320 sizeof(svn_fs_fs__p2l_entry_t *)); 321 SVN_ERR(svn_io_open_unique_file3(&context->reps_file, NULL, temp_dir, 322 svn_io_file_del_on_close, pool, pool)); 323 324 /* the pool used for temp structures */ 325 context->info_pool = svn_pool_create(pool); 326 context->paths = svn_prefix_tree__create(context->info_pool); 327 328 return SVN_NO_ERROR; 329} 330 331/* Clean up / free all revision range specific data and files in CONTEXT. 332 * Use POOL for temporary allocations. 333 */ 334static svn_error_t * 335reset_pack_context(pack_context_t *context, 336 apr_pool_t *pool) 337{ 338 const char *temp_dir; 339 340 apr_array_clear(context->changes); 341 SVN_ERR(svn_io_file_close(context->changes_file, pool)); 342 apr_array_clear(context->file_props); 343 SVN_ERR(svn_io_file_close(context->file_props_file, pool)); 344 apr_array_clear(context->dir_props); 345 SVN_ERR(svn_io_file_close(context->dir_props_file, pool)); 346 347 apr_array_clear(context->rev_offsets); 348 apr_array_clear(context->path_order); 349 apr_array_clear(context->references); 350 apr_array_clear(context->reps); 351 SVN_ERR(svn_io_file_close(context->reps_file, pool)); 352 353 svn_pool_clear(context->info_pool); 354 355 /* The new temporary files must live at least as long as any other info 356 * object in CONTEXT. */ 357 SVN_ERR(svn_io_temp_dir(&temp_dir, pool)); 358 SVN_ERR(svn_io_open_unique_file3(&context->changes_file, NULL, temp_dir, 359 svn_io_file_del_on_close, 360 context->info_pool, pool)); 361 SVN_ERR(svn_io_open_unique_file3(&context->file_props_file, NULL, temp_dir, 362 svn_io_file_del_on_close, 363 context->info_pool, pool)); 364 SVN_ERR(svn_io_open_unique_file3(&context->dir_props_file, NULL, temp_dir, 365 svn_io_file_del_on_close, 366 context->info_pool, pool)); 367 SVN_ERR(svn_io_open_unique_file3(&context->reps_file, NULL, temp_dir, 368 svn_io_file_del_on_close, 369 context->info_pool, pool)); 370 context->paths = svn_prefix_tree__create(context->info_pool); 371 372 return SVN_NO_ERROR; 373} 374 375/* Call this after the last revision range. It will finalize all index files 376 * for CONTEXT and close any open files. Use POOL for temporary allocations. 377 */ 378static svn_error_t * 379close_pack_context(pack_context_t *context, 380 apr_pool_t *pool) 381{ 382 const char *proto_l2p_index_path; 383 const char *proto_p2l_index_path; 384 385 /* need the file names for the actual index creation call further down */ 386 SVN_ERR(svn_io_file_name_get(&proto_l2p_index_path, 387 context->proto_l2p_index, pool)); 388 SVN_ERR(svn_io_file_name_get(&proto_p2l_index_path, 389 context->proto_p2l_index, pool)); 390 391 /* finalize proto index files */ 392 SVN_ERR(svn_io_file_close(context->proto_l2p_index, pool)); 393 SVN_ERR(svn_io_file_close(context->proto_p2l_index, pool)); 394 395 /* Append the actual index data to the pack file. */ 396 SVN_ERR(svn_fs_fs__add_index_data(context->fs, context->pack_file, 397 proto_l2p_index_path, 398 proto_p2l_index_path, 399 context->shard_rev, 400 pool)); 401 402 /* remove proto index files */ 403 SVN_ERR(svn_io_remove_file2(proto_l2p_index_path, FALSE, pool)); 404 SVN_ERR(svn_io_remove_file2(proto_p2l_index_path, FALSE, pool)); 405 406 /* Ensure that packed file is written to disk.*/ 407 SVN_ERR(svn_io_file_flush_to_disk(context->pack_file, pool)); 408 SVN_ERR(svn_io_file_close(context->pack_file, pool)); 409 410 return SVN_NO_ERROR; 411} 412 413/* Efficiently copy SIZE bytes from SOURCE to DEST. Invoke the CANCEL_FUNC 414 * from CONTEXT at regular intervals. Use POOL for allocations. 415 */ 416static svn_error_t * 417copy_file_data(pack_context_t *context, 418 apr_file_t *dest, 419 apr_file_t *source, 420 apr_off_t size, 421 apr_pool_t *pool) 422{ 423 /* most non-representation items will be small. Minimize the buffer 424 * and infrastructure overhead in that case. */ 425 enum { STACK_BUFFER_SIZE = 1024 }; 426 427 if (size < STACK_BUFFER_SIZE) 428 { 429 /* copy small data using a fixed-size buffer on stack */ 430 char buffer[STACK_BUFFER_SIZE]; 431 SVN_ERR(svn_io_file_read_full2(source, buffer, (apr_size_t)size, 432 NULL, NULL, pool)); 433 SVN_ERR(svn_io_file_write_full(dest, buffer, (apr_size_t)size, 434 NULL, pool)); 435 } 436 else 437 { 438 /* use streaming copies for larger data blocks. That may require 439 * the allocation of larger buffers and we should make sure that 440 * this extra memory is released asap. */ 441 fs_fs_data_t *ffd = context->fs->fsap_data; 442 apr_pool_t *copypool = svn_pool_create(pool); 443 char *buffer = apr_palloc(copypool, ffd->block_size); 444 445 while (size) 446 { 447 apr_size_t to_copy = (apr_size_t)(MIN(size, ffd->block_size)); 448 if (context->cancel_func) 449 SVN_ERR(context->cancel_func(context->cancel_baton)); 450 451 SVN_ERR(svn_io_file_read_full2(source, buffer, to_copy, 452 NULL, NULL, pool)); 453 SVN_ERR(svn_io_file_write_full(dest, buffer, to_copy, 454 NULL, pool)); 455 456 size -= to_copy; 457 } 458 459 svn_pool_destroy(copypool); 460 } 461 462 return SVN_NO_ERROR; 463} 464 465/* Writes SIZE bytes, all 0, to DEST. Uses POOL for allocations. 466 */ 467static svn_error_t * 468write_null_bytes(apr_file_t *dest, 469 apr_off_t size, 470 apr_pool_t *pool) 471{ 472 /* Have a collection of high-quality, easy to access NUL bytes handy. */ 473 enum { BUFFER_SIZE = 1024 }; 474 static const char buffer[BUFFER_SIZE] = { 0 }; 475 476 /* copy SIZE of them into the file's buffer */ 477 while (size) 478 { 479 apr_size_t to_write = MIN(size, BUFFER_SIZE); 480 SVN_ERR(svn_io_file_write_full(dest, buffer, to_write, NULL, pool)); 481 size -= to_write; 482 } 483 484 return SVN_NO_ERROR; 485} 486 487/* Copy the "simple" item (changed paths list or property representation) 488 * from the current position in REV_FILE to TEMP_FILE using CONTEXT. Add 489 * a copy of ENTRY to ENTRIES but with an updated offset value that points 490 * to the copy destination in TEMP_FILE. Use POOL for allocations. 491 */ 492static svn_error_t * 493copy_item_to_temp(pack_context_t *context, 494 apr_array_header_t *entries, 495 apr_file_t *temp_file, 496 apr_file_t *rev_file, 497 svn_fs_fs__p2l_entry_t *entry, 498 apr_pool_t *pool) 499{ 500 svn_fs_fs__p2l_entry_t *new_entry 501 = apr_pmemdup(context->info_pool, entry, sizeof(*entry)); 502 503 SVN_ERR(svn_fs_fs__get_file_offset(&new_entry->offset, temp_file, pool)); 504 APR_ARRAY_PUSH(entries, svn_fs_fs__p2l_entry_t *) = new_entry; 505 506 SVN_ERR(copy_file_data(context, temp_file, rev_file, entry->size, pool)); 507 508 return SVN_NO_ERROR; 509} 510 511/* Return the offset within CONTEXT->REPS that corresponds to item 512 * ITEM_INDEX in REVISION. 513 */ 514static int 515get_item_array_index(pack_context_t *context, 516 svn_revnum_t revision, 517 apr_int64_t item_index) 518{ 519 assert(revision >= context->start_rev); 520 return (int)item_index + APR_ARRAY_IDX(context->rev_offsets, 521 revision - context->start_rev, 522 int); 523} 524 525/* Write INFO to the correct position in CONTEXT->REP_INFOS. The latter 526 * may need auto-expanding. Overwriting an array element is not allowed. 527 */ 528static void 529add_item_rep_mapping(pack_context_t *context, 530 svn_fs_fs__p2l_entry_t *entry) 531{ 532 int idx; 533 534 /* index of INFO */ 535 idx = get_item_array_index(context, 536 entry->item.revision, 537 entry->item.number); 538 539 /* make sure the index exists in the array */ 540 while (context->reps->nelts <= idx) 541 APR_ARRAY_PUSH(context->reps, void *) = NULL; 542 543 /* set the element. If there is already an entry, there are probably 544 * two items claiming to be the same -> bail out */ 545 assert(!APR_ARRAY_IDX(context->reps, idx, void *)); 546 APR_ARRAY_IDX(context->reps, idx, void *) = entry; 547} 548 549/* Return the P2L entry from CONTEXT->REPS for the given ID. If there is 550 * none (or not anymore), return NULL. If RESET has been specified, set 551 * the array entry to NULL after returning the entry. 552 */ 553static svn_fs_fs__p2l_entry_t * 554get_item(pack_context_t *context, 555 const svn_fs_fs__id_part_t *id, 556 svn_boolean_t reset) 557{ 558 svn_fs_fs__p2l_entry_t *result = NULL; 559 if (id->number && id->revision >= context->start_rev) 560 { 561 int idx = get_item_array_index(context, id->revision, id->number); 562 if (context->reps->nelts > idx) 563 { 564 result = APR_ARRAY_IDX(context->reps, idx, void *); 565 if (result && reset) 566 APR_ARRAY_IDX(context->reps, idx, void *) = NULL; 567 } 568 } 569 570 return result; 571} 572 573/* Copy representation item identified by ENTRY from the current position 574 * in REV_FILE into CONTEXT->REPS_FILE. Add all tracking into needed by 575 * our placement algorithm to CONTEXT. Use POOL for temporary allocations. 576 */ 577static svn_error_t * 578copy_rep_to_temp(pack_context_t *context, 579 apr_file_t *rev_file, 580 svn_fs_fs__p2l_entry_t *entry, 581 apr_pool_t *pool) 582{ 583 svn_fs_fs__rep_header_t *rep_header; 584 svn_stream_t *stream; 585 apr_off_t source_offset = entry->offset; 586 587 /* create a copy of ENTRY, make it point to the copy destination and 588 * store it in CONTEXT */ 589 entry = apr_pmemdup(context->info_pool, entry, sizeof(*entry)); 590 SVN_ERR(svn_fs_fs__get_file_offset(&entry->offset, context->reps_file, pool)); 591 add_item_rep_mapping(context, entry); 592 593 /* read & parse the representation header */ 594 stream = svn_stream_from_aprfile2(rev_file, TRUE, pool); 595 SVN_ERR(svn_fs_fs__read_rep_header(&rep_header, stream, pool, pool)); 596 svn_stream_close(stream); 597 598 /* if the representation is a delta against some other rep, link the two */ 599 if ( rep_header->type == svn_fs_fs__rep_delta 600 && rep_header->base_revision >= context->start_rev) 601 { 602 reference_t *reference = apr_pcalloc(context->info_pool, 603 sizeof(*reference)); 604 reference->from = entry->item; 605 reference->to.revision = rep_header->base_revision; 606 reference->to.number = rep_header->base_item_index; 607 APR_ARRAY_PUSH(context->references, reference_t *) = reference; 608 } 609 610 /* copy the whole rep (including header!) to our temp file */ 611 SVN_ERR(svn_io_file_seek(rev_file, APR_SET, &source_offset, pool)); 612 SVN_ERR(copy_file_data(context, context->reps_file, rev_file, entry->size, 613 pool)); 614 615 return SVN_NO_ERROR; 616} 617 618/* Directories first, dirs / files sorted by name in reverse lexical order. 619 * This maximizes the chance of two items being located close to one another 620 * in *all* pack files independent of their change order. It also groups 621 * multi-project repos nicely according to their sub-projects. The reverse 622 * order aspect gives "trunk" preference over "tags" and "branches", so 623 * trunk-related items are more likely to be contiguous. 624 */ 625static int 626compare_dir_entries_format7(const svn_sort__item_t *a, 627 const svn_sort__item_t *b) 628{ 629 const svn_fs_dirent_t *lhs = (const svn_fs_dirent_t *) a->value; 630 const svn_fs_dirent_t *rhs = (const svn_fs_dirent_t *) b->value; 631 632 if (lhs->kind != rhs->kind) 633 return lhs->kind == svn_node_dir ? -1 : 1; 634 635 return strcmp(lhs->name, rhs->name); 636} 637 638/* Directories entries sorted by revision (decreasing - to max cache hits) 639 * and offset (increasing - to max benefit from APR file buffering). 640 */ 641static int 642compare_dir_entries_format6(const svn_sort__item_t *a, 643 const svn_sort__item_t *b) 644{ 645 const svn_fs_dirent_t *lhs = (const svn_fs_dirent_t *) a->value; 646 const svn_fs_dirent_t *rhs = (const svn_fs_dirent_t *) b->value; 647 648 const svn_fs_fs__id_part_t *lhs_rev_item 649 = svn_fs_fs__id_rev_item(lhs->id); 650 const svn_fs_fs__id_part_t *rhs_rev_item 651 = svn_fs_fs__id_rev_item(rhs->id); 652 653 /* decreasing ("reverse") order on revs */ 654 if (lhs_rev_item->revision != rhs_rev_item->revision) 655 return lhs_rev_item->revision > rhs_rev_item->revision ? -1 : 1; 656 657 /* increasing order on offsets */ 658 if (lhs_rev_item->number != rhs_rev_item->number) 659 return lhs_rev_item->number > rhs_rev_item->number ? 1 : -1; 660 661 return 0; 662} 663 664apr_array_header_t * 665svn_fs_fs__order_dir_entries(svn_fs_t *fs, 666 apr_hash_t *directory, 667 apr_pool_t *result_pool, 668 apr_pool_t *scratch_pool) 669{ 670 apr_array_header_t *ordered 671 = svn_sort__hash(directory, 672 svn_fs_fs__use_log_addressing(fs) 673 ? compare_dir_entries_format7 674 : compare_dir_entries_format6, 675 scratch_pool); 676 677 apr_array_header_t *result 678 = apr_array_make(result_pool, ordered->nelts, sizeof(svn_fs_dirent_t *)); 679 680 int i; 681 for (i = 0; i < ordered->nelts; ++i) 682 APR_ARRAY_PUSH(result, svn_fs_dirent_t *) 683 = APR_ARRAY_IDX(ordered, i, svn_sort__item_t).value; 684 685 return result; 686} 687 688/* Return a duplicate of the the ORIGINAL path and with special sub-strins 689 * (e.g. "trunk") modified in such a way that have a lower lexicographic 690 * value than any other "normal" file name. 691 */ 692static const char * 693tweak_path_for_ordering(const char *original, 694 apr_pool_t *pool) 695{ 696 /* We may add further special cases as needed. */ 697 enum {SPECIAL_COUNT = 2}; 698 static const char *special[SPECIAL_COUNT] = {"trunk", "branch"}; 699 char *pos; 700 char *path = apr_pstrdup(pool, original); 701 int i; 702 703 /* Replace the first char of any "special" sub-string we find by 704 * a control char, i.e. '\1' .. '\31'. In the rare event that this 705 * would clash with existing paths, no data will be lost but merely 706 * the node ordering will be sub-optimal. 707 */ 708 for (i = 0; i < SPECIAL_COUNT; ++i) 709 for (pos = strstr(path, special[i]); 710 pos; 711 pos = strstr(pos + 1, special[i])) 712 { 713 *pos = (char)(i + '\1'); 714 } 715 716 return path; 717} 718 719/* Copy node revision item identified by ENTRY from the current position 720 * in REV_FILE into CONTEXT->REPS_FILE. Add all tracking into needed by 721 * our placement algorithm to CONTEXT. Use POOL for temporary allocations. 722 */ 723static svn_error_t * 724copy_node_to_temp(pack_context_t *context, 725 apr_file_t *rev_file, 726 svn_fs_fs__p2l_entry_t *entry, 727 apr_pool_t *pool) 728{ 729 path_order_t *path_order = apr_pcalloc(context->info_pool, 730 sizeof(*path_order)); 731 node_revision_t *noderev; 732 const char *sort_path; 733 svn_stream_t *stream; 734 apr_off_t source_offset = entry->offset; 735 736 /* read & parse noderev */ 737 stream = svn_stream_from_aprfile2(rev_file, TRUE, pool); 738 SVN_ERR(svn_fs_fs__read_noderev(&noderev, stream, pool, pool)); 739 svn_stream_close(stream); 740 741 /* create a copy of ENTRY, make it point to the copy destination and 742 * store it in CONTEXT */ 743 entry = apr_pmemdup(context->info_pool, entry, sizeof(*entry)); 744 SVN_ERR(svn_fs_fs__get_file_offset(&entry->offset, context->reps_file, 745 pool)); 746 add_item_rep_mapping(context, entry); 747 748 /* copy the noderev to our temp file */ 749 SVN_ERR(svn_io_file_seek(rev_file, APR_SET, &source_offset, pool)); 750 SVN_ERR(copy_file_data(context, context->reps_file, rev_file, entry->size, 751 pool)); 752 753 /* if the node has a data representation, make that the node's "base". 754 * This will (often) cause the noderev to be placed right in front of 755 * its data representation. */ 756 757 if (noderev->data_rep && noderev->data_rep->revision >= context->start_rev) 758 { 759 path_order->rep_id.revision = noderev->data_rep->revision; 760 path_order->rep_id.number = noderev->data_rep->item_index; 761 path_order->expanded_size = noderev->data_rep->expanded_size 762 ? noderev->data_rep->expanded_size 763 : noderev->data_rep->size; 764 } 765 766 /* Sort path is the key used for ordering noderevs and associated reps. 767 * It will not be stored in the final pack file. */ 768 sort_path = tweak_path_for_ordering(noderev->created_path, pool); 769 path_order->path = svn_prefix_string__create(context->paths, sort_path); 770 path_order->node_id = *svn_fs_fs__id_node_id(noderev->id); 771 path_order->revision = svn_fs_fs__id_rev(noderev->id); 772 path_order->predecessor_count = noderev->predecessor_count; 773 path_order->is_dir = noderev->kind == svn_node_dir; 774 path_order->noderev_id = *svn_fs_fs__id_rev_item(noderev->id); 775 APR_ARRAY_PUSH(context->path_order, path_order_t *) = path_order; 776 777 return SVN_NO_ERROR; 778} 779 780/* implements compare_fn_t. Bring all directories in front of the files 781 and sort descendingly by PATH, NODE_ID and REVISION. 782 */ 783static int 784compare_path_order(const path_order_t * const * lhs_p, 785 const path_order_t * const * rhs_p) 786{ 787 const path_order_t * lhs = *lhs_p; 788 const path_order_t * rhs = *rhs_p; 789 790 /* cluster all directories */ 791 int diff = rhs->is_dir - lhs->is_dir; 792 if (diff) 793 return diff; 794 795 /* lexicographic order on path and node (i.e. latest first) */ 796 diff = svn_prefix_string__compare(lhs->path, rhs->path); 797 if (diff) 798 return diff; 799 800 /* reverse order on node (i.e. latest first) */ 801 diff = svn_fs_fs__id_part_compare(&rhs->node_id, &lhs->node_id); 802 if (diff) 803 return diff; 804 805 /* reverse order on revision (i.e. latest first) */ 806 if (lhs->revision != rhs->revision) 807 return lhs->revision < rhs->revision ? 1 : -1; 808 809 return 0; 810} 811 812/* implements compare_fn_t. Sort ascendingly by FROM, TO. 813 */ 814static int 815compare_references(const reference_t * const * lhs_p, 816 const reference_t * const * rhs_p) 817{ 818 const reference_t * lhs = *lhs_p; 819 const reference_t * rhs = *rhs_p; 820 821 int diff = svn_fs_fs__id_part_compare(&lhs->from, &rhs->from); 822 return diff ? diff : svn_fs_fs__id_part_compare(&lhs->to, &rhs->to); 823} 824 825/* implements compare_fn_t. Assume ascending order by FROM. 826 */ 827static int 828compare_ref_to_item(const reference_t * const * lhs_p, 829 const svn_fs_fs__id_part_t * rhs_p) 830{ 831 return svn_fs_fs__id_part_compare(&(*lhs_p)->from, rhs_p); 832} 833 834/* implements compare_fn_t. Finds the DIR / FILE boundary. 835 */ 836static int 837compare_is_dir(const path_order_t * const * lhs_p, 838 const void *unused) 839{ 840 return (*lhs_p)->is_dir ? -1 : 0; 841} 842 843/* Look for the least significant bit set in VALUE and return the smallest 844 * number with the same property, i.e. the largest power of 2 that is a 845 * factor in VALUE. */ 846static int 847roundness(int value) 848{ 849 return value ? value - (value & (value - 1)) : INT_MAX; 850} 851 852/* Order a range of data collected in CONTEXT such that we can place them 853 * in the desired order. The input is taken from *PATH_ORDER, offsets FIRST 854 * to LAST and then written in the final order to the same range in *TEMP. 855 */ 856static void 857sort_reps_range(pack_context_t *context, 858 const path_order_t **path_order, 859 const path_order_t **temp, 860 int first, 861 int last) 862{ 863 const svn_prefix_string__t *path; 864 int i, dest, best; 865 svn_fs_fs__id_part_t rep_id; 866 fs_fs_data_t *ffd = context->fs->fsap_data; 867 868 /* The logic below would fail for empty ranges. */ 869 if (first == last) 870 return; 871 872 /* Re-order noderevs like this: 873 * 874 * (1) Most likely to be referenced by future pack files, in path order. 875 * (2) highest revision rep per path + dependency chain 876 * (3) Remaining reps in path, rev order 877 * 878 * We simply pick & chose from the existing path, rev order. 879 */ 880 dest = first; 881 path = path_order[first]->path; 882 best = first; 883 884 /* (1) For each path, pick the "roundest" representation and put it in 885 * front of all other nodes in the pack file. The "roundest" rep is 886 * the one most likely to be referenced from future pack files, i.e. we 887 * concentrate those potential "foreign link targets" in one section of 888 * the pack file. 889 * 890 * And we only apply this to reps outside the linear deltification 891 * sections because references *into* linear deltification ranges are 892 * much less likely. 893 */ 894 for (i = first; i < last; ++i) 895 { 896 /* Investigated all nodes for the current path? */ 897 if (svn_prefix_string__compare(path, path_order[i]->path)) 898 { 899 /* next path */ 900 path = path_order[i]->path; 901 902 /* Pick roundest non-linear deltified node. */ 903 if (roundness(path_order[best]->predecessor_count) 904 >= ffd->max_linear_deltification) 905 { 906 temp[dest++] = path_order[best]; 907 path_order[best] = NULL; 908 best = i; 909 } 910 } 911 912 /* next entry */ 913 if ( roundness(path_order[best]->predecessor_count) 914 < roundness(path_order[i]->predecessor_count)) 915 best = i; 916 } 917 918 /* Treat the last path the same as all others. */ 919 if (roundness(path_order[best]->predecessor_count) 920 >= ffd->max_linear_deltification) 921 { 922 temp[dest++] = path_order[best]; 923 path_order[best] = NULL; 924 } 925 926 /* (2) For each (remaining) path, pick the nodes along the delta chain 927 * for the highest revision. Due to our ordering, this is the first 928 * node we encounter for any path. 929 * 930 * Most references that don't hit a delta base picked in (1), will 931 * access HEAD of the respective path. Keeping all its dependency chain 932 * in one place turns reconstruction into a linear scan of minimal length. 933 */ 934 for (i = first; i < last; ++i) 935 if (path_order[i]) 936 { 937 /* This is the first path we still have to handle. */ 938 path = path_order[i]->path; 939 rep_id = path_order[i]->rep_id; 940 break; 941 } 942 943 for (i = first; i < last; ++i) 944 if (path_order[i]) 945 { 946 /* New path? */ 947 if (svn_prefix_string__compare(path, path_order[i]->path)) 948 { 949 path = path_order[i]->path; 950 rep_id = path_order[i]->rep_id; 951 } 952 953 /* Pick nodes along the deltification chain. Skip side-branches. */ 954 if (svn_fs_fs__id_part_eq(&path_order[i]->rep_id, &rep_id)) 955 { 956 reference_t **reference; 957 958 temp[dest++] = path_order[i]; 959 path_order[i] = NULL; 960 961 reference = svn_sort__array_lookup(context->references, 962 &rep_id, NULL, 963 (int (*)(const void *, const void *))compare_ref_to_item); 964 if (reference) 965 rep_id = (*reference)->to; 966 } 967 } 968 969 /* (3) All remaining nodes in path, rev order. Linear deltification 970 * makes HEAD delta chains from (2) cover all or most of their deltas 971 * in a given pack file. So, this is just a few remnants that we put 972 * at the end of the pack file. 973 */ 974 for (i = first; i < last; ++i) 975 if (path_order[i]) 976 temp[dest++] = path_order[i]; 977 978 /* We now know the final ordering. */ 979 assert(dest == last); 980} 981 982/* Order the data collected in CONTEXT such that we can place them in the 983 * desired order. 984 */ 985static void 986sort_reps(pack_context_t *context) 987{ 988 apr_pool_t *temp_pool; 989 const path_order_t **temp, **path_order; 990 int i, count, dir_count; 991 992 /* We will later assume that there is at least one node / path. 993 */ 994 if (context->path_order->nelts == 0) 995 { 996 assert(context->references->nelts == 0); 997 return; 998 } 999 1000 /* Sort containers by path and IDs, respectively. 1001 */ 1002 svn_sort__array(context->path_order, 1003 (int (*)(const void *, const void *))compare_path_order); 1004 svn_sort__array(context->references, 1005 (int (*)(const void *, const void *))compare_references); 1006 1007 /* Directories are already in front; sort directories section and files 1008 * section separately but use the same heuristics (see sub-function). 1009 */ 1010 temp_pool = svn_pool_create(context->info_pool); 1011 count = context->path_order->nelts; 1012 temp = apr_pcalloc(temp_pool, count * sizeof(*temp)); 1013 path_order = (void *)context->path_order->elts; 1014 1015 /* Find the boundary between DIR and FILE section. */ 1016 dir_count = svn_sort__bsearch_lower_bound(context->path_order, NULL, 1017 (int (*)(const void *, const void *))compare_is_dir); 1018 1019 /* Sort those sub-sections separately. */ 1020 sort_reps_range(context, path_order, temp, 0, dir_count); 1021 sort_reps_range(context, path_order, temp, dir_count, count); 1022 1023 /* We now know the final ordering. */ 1024 for (i = 0; i < count; ++i) 1025 path_order[i] = temp[i]; 1026 1027 svn_pool_destroy(temp_pool); 1028} 1029 1030/* implements compare_fn_t. Place LHS before RHS, if the latter is older. 1031 */ 1032static int 1033compare_p2l_info(const svn_fs_fs__p2l_entry_t * const * lhs, 1034 const svn_fs_fs__p2l_entry_t * const * rhs) 1035{ 1036 assert(*lhs != *rhs); 1037 1038 if ((*lhs)->item.revision == (*rhs)->item.revision) 1039 return (*lhs)->item.number > (*rhs)->item.number ? -1 : 1; 1040 1041 return (*lhs)->item.revision > (*rhs)->item.revision ? -1 : 1; 1042} 1043 1044/* Sort svn_fs_fs__p2l_entry_t * array ENTRIES by age. Place the latest 1045 * items first. 1046 */ 1047static void 1048sort_items(apr_array_header_t *entries) 1049{ 1050 svn_sort__array(entries, 1051 (int (*)(const void *, const void *))compare_p2l_info); 1052} 1053 1054/* Return the remaining unused bytes in the current block in CONTEXT's 1055 * pack file. 1056 */ 1057static apr_ssize_t 1058get_block_left(pack_context_t *context) 1059{ 1060 fs_fs_data_t *ffd = context->fs->fsap_data; 1061 return ffd->block_size - (context->pack_offset % ffd->block_size); 1062} 1063 1064/* To prevent items from overlapping a block boundary, we will usually 1065 * put them into the next block and top up the old one with NUL bytes. 1066 * Pad CONTEXT's pack file to the end of the current block, if TO_ADD does 1067 * not fit into the current block and the padding is short enough. 1068 * Use POOL for allocations. 1069 */ 1070static svn_error_t * 1071auto_pad_block(pack_context_t *context, 1072 apr_off_t to_add, 1073 apr_pool_t *pool) 1074{ 1075 fs_fs_data_t *ffd = context->fs->fsap_data; 1076 1077 /* This is the maximum number of bytes "wasted" that way per block. 1078 * Larger items will cross the block boundaries. */ 1079 const apr_off_t max_padding = MAX(ffd->block_size / 50, 512); 1080 1081 /* Is wasted space small enough to align the current item to the next 1082 * block? */ 1083 apr_off_t padding = get_block_left(context); 1084 1085 if (padding < to_add && padding < max_padding) 1086 { 1087 /* Yes. To up with NUL bytes and don't forget to create 1088 * an P2L index entry marking this section as unused. */ 1089 svn_fs_fs__p2l_entry_t null_entry; 1090 1091 null_entry.offset = context->pack_offset; 1092 null_entry.size = padding; 1093 null_entry.type = SVN_FS_FS__ITEM_TYPE_UNUSED; 1094 null_entry.item.revision = SVN_INVALID_REVNUM; 1095 null_entry.item.number = SVN_FS_FS__ITEM_INDEX_UNUSED; 1096 null_entry.fnv1_checksum = 0; 1097 1098 SVN_ERR(write_null_bytes(context->pack_file, padding, pool)); 1099 SVN_ERR(svn_fs_fs__p2l_proto_index_add_entry( 1100 context->proto_p2l_index, &null_entry, pool)); 1101 context->pack_offset += padding; 1102 } 1103 1104 return SVN_NO_ERROR; 1105} 1106 1107/* Read the contents of ITEM, if not empty, from TEMP_FILE and write it 1108 * to CONTEXT->PACK_FILE. Use POOL for allocations. 1109 */ 1110static svn_error_t * 1111store_item(pack_context_t *context, 1112 apr_file_t *temp_file, 1113 svn_fs_fs__p2l_entry_t *item, 1114 apr_pool_t *pool) 1115{ 1116 apr_off_t safety_margin; 1117 1118 /* skip empty entries */ 1119 if (item->type == SVN_FS_FS__ITEM_TYPE_UNUSED) 1120 return SVN_NO_ERROR; 1121 1122 /* If the next item does not fit into the current block, auto-pad it. 1123 Take special care of textual noderevs since their parsers may 1124 prefetch up to 80 bytes and we don't want them to cross block 1125 boundaries. */ 1126 safety_margin = item->type == SVN_FS_FS__ITEM_TYPE_NODEREV 1127 ? SVN__LINE_CHUNK_SIZE 1128 : 0; 1129 SVN_ERR(auto_pad_block(context, item->size + safety_margin, pool)); 1130 1131 /* select the item in the source file and copy it into the target 1132 * pack file */ 1133 SVN_ERR(svn_io_file_seek(temp_file, APR_SET, &item->offset, pool)); 1134 SVN_ERR(copy_file_data(context, context->pack_file, temp_file, 1135 item->size, pool)); 1136 1137 /* write index entry and update current position */ 1138 item->offset = context->pack_offset; 1139 context->pack_offset += item->size; 1140 1141 SVN_ERR(svn_fs_fs__p2l_proto_index_add_entry(context->proto_p2l_index, 1142 item, pool)); 1143 1144 APR_ARRAY_PUSH(context->reps, svn_fs_fs__p2l_entry_t *) = item; 1145 1146 return SVN_NO_ERROR; 1147} 1148 1149/* Read the contents of the non-empty items in ITEMS from TEMP_FILE and 1150 * write them to CONTEXT->PACK_FILE. Use POOL for allocations. 1151 */ 1152static svn_error_t * 1153store_items(pack_context_t *context, 1154 apr_file_t *temp_file, 1155 apr_array_header_t *items, 1156 apr_pool_t *pool) 1157{ 1158 int i; 1159 apr_pool_t *iterpool = svn_pool_create(pool); 1160 1161 /* copy all items in strict order */ 1162 for (i = 0; i < items->nelts; ++i) 1163 { 1164 svn_pool_clear(iterpool); 1165 SVN_ERR(store_item(context, temp_file, 1166 APR_ARRAY_IDX(items, i, svn_fs_fs__p2l_entry_t *), 1167 iterpool)); 1168 } 1169 1170 svn_pool_destroy(iterpool); 1171 1172 return SVN_NO_ERROR; 1173} 1174 1175/* Copy (append) the items identified by svn_fs_fs__p2l_entry_t * elements 1176 * in ENTRIES strictly in order from TEMP_FILE into CONTEXT->PACK_FILE. 1177 * Use POOL for temporary allocations. 1178 */ 1179static svn_error_t * 1180copy_reps_from_temp(pack_context_t *context, 1181 apr_file_t *temp_file, 1182 apr_pool_t *pool) 1183{ 1184 apr_pool_t *iterpool = svn_pool_create(pool); 1185 apr_array_header_t *path_order = context->path_order; 1186 int i; 1187 1188 /* copy items in path order. */ 1189 for (i = 0; i < path_order->nelts; ++i) 1190 { 1191 path_order_t *current_path; 1192 svn_fs_fs__p2l_entry_t *node_part; 1193 svn_fs_fs__p2l_entry_t *rep_part; 1194 1195 svn_pool_clear(iterpool); 1196 1197 current_path = APR_ARRAY_IDX(path_order, i, path_order_t *); 1198 node_part = get_item(context, ¤t_path->noderev_id, TRUE); 1199 rep_part = get_item(context, ¤t_path->rep_id, TRUE); 1200 1201 if (node_part) 1202 SVN_ERR(store_item(context, temp_file, node_part, iterpool)); 1203 if (rep_part) 1204 SVN_ERR(store_item(context, temp_file, rep_part, iterpool)); 1205 } 1206 1207 svn_pool_destroy(iterpool); 1208 1209 return SVN_NO_ERROR; 1210} 1211 1212/* implements compare_fn_t. Place LHS before RHS, if the latter belongs to 1213 * a newer revision. 1214 */ 1215static int 1216compare_p2l_info_rev(const svn_fs_fs__p2l_entry_t * const * lhs_p, 1217 const svn_fs_fs__p2l_entry_t * const * rhs_p) 1218{ 1219 const svn_fs_fs__p2l_entry_t * lhs = *lhs_p; 1220 const svn_fs_fs__p2l_entry_t * rhs = *rhs_p; 1221 1222 if (lhs->item.revision == rhs->item.revision) 1223 return 0; 1224 1225 return lhs->item.revision < rhs->item.revision ? -1 : 1; 1226} 1227 1228/* Write the log-to-phys proto index file for CONTEXT and use POOL for 1229 * temporary allocations. All items in all buckets must have been placed 1230 * by now. 1231 */ 1232static svn_error_t * 1233write_l2p_index(pack_context_t *context, 1234 apr_pool_t *pool) 1235{ 1236 apr_pool_t *iterpool = svn_pool_create(pool); 1237 svn_revnum_t prev_rev = SVN_INVALID_REVNUM; 1238 int i, dest; 1239 1240 /* eliminate empty entries from CONTEXT->REPS */ 1241 for (i = 0, dest = 0; i < context->reps->nelts; ++i) 1242 { 1243 svn_fs_fs__p2l_entry_t *entry 1244 = APR_ARRAY_IDX(context->reps, i, svn_fs_fs__p2l_entry_t *); 1245 if (entry) 1246 APR_ARRAY_IDX(context->reps, dest++, svn_fs_fs__p2l_entry_t *) 1247 = entry; 1248 } 1249 context->reps->nelts = dest; 1250 1251 /* we need to write the l2p index revision by revision */ 1252 svn_sort__array(context->reps, 1253 (int (*)(const void *, const void *))compare_p2l_info_rev); 1254 1255 /* write index entries */ 1256 for (i = 0; i < context->reps->nelts; ++i) 1257 { 1258 svn_fs_fs__p2l_entry_t *p2l_entry 1259 = APR_ARRAY_IDX(context->reps, i, svn_fs_fs__p2l_entry_t *); 1260 if (p2l_entry == NULL) 1261 continue; 1262 1263 /* next revision? */ 1264 if (prev_rev != p2l_entry->item.revision) 1265 { 1266 prev_rev = p2l_entry->item.revision; 1267 SVN_ERR(svn_fs_fs__l2p_proto_index_add_revision( 1268 context->proto_l2p_index, iterpool)); 1269 } 1270 1271 /* add entry */ 1272 SVN_ERR(svn_fs_fs__l2p_proto_index_add_entry(context->proto_l2p_index, 1273 p2l_entry->offset, 1274 p2l_entry->item.number, 1275 iterpool)); 1276 1277 /* keep memory usage in check */ 1278 if (i % 256 == 0) 1279 svn_pool_clear(iterpool); 1280 } 1281 1282 svn_pool_destroy(iterpool); 1283 1284 return SVN_NO_ERROR; 1285} 1286 1287/* Pack the current revision range of CONTEXT, i.e. this covers phases 2 1288 * to 4. Use POOL for allocations. 1289 */ 1290static svn_error_t * 1291pack_range(pack_context_t *context, 1292 apr_pool_t *pool) 1293{ 1294 fs_fs_data_t *ffd = context->fs->fsap_data; 1295 apr_pool_t *revpool = svn_pool_create(pool); 1296 apr_pool_t *iterpool = svn_pool_create(pool); 1297 apr_pool_t *iterpool2 = svn_pool_create(pool); 1298 1299 /* Phase 2: Copy items into various buckets and build tracking info */ 1300 svn_revnum_t revision; 1301 for (revision = context->start_rev; revision < context->end_rev; ++revision) 1302 { 1303 apr_off_t offset = 0; 1304 svn_fs_fs__revision_file_t *rev_file; 1305 1306 svn_pool_clear(revpool); 1307 1308 /* Get the rev file dimensions (mainly index locations). */ 1309 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, context->fs, 1310 revision, revpool, iterpool)); 1311 SVN_ERR(svn_fs_fs__auto_read_footer(rev_file)); 1312 1313 /* store the indirect array index */ 1314 APR_ARRAY_PUSH(context->rev_offsets, int) = context->reps->nelts; 1315 1316 /* read the phys-to-log index file until we covered the whole rev file. 1317 * That index contains enough info to build both target indexes from it. */ 1318 while (offset < rev_file->l2p_offset) 1319 { 1320 /* read one cluster */ 1321 int i; 1322 apr_array_header_t *entries; 1323 1324 svn_pool_clear(iterpool); 1325 1326 SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, context->fs, 1327 rev_file, revision, offset, 1328 ffd->p2l_page_size, iterpool, 1329 iterpool)); 1330 1331 for (i = 0; i < entries->nelts; ++i) 1332 { 1333 svn_fs_fs__p2l_entry_t *entry 1334 = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t); 1335 1336 /* skip first entry if that was duplicated due crossing a 1337 cluster boundary */ 1338 if (offset > entry->offset) 1339 continue; 1340 1341 svn_pool_clear(iterpool2); 1342 1343 /* process entry while inside the rev file */ 1344 offset = entry->offset; 1345 if (offset < rev_file->l2p_offset) 1346 { 1347 SVN_ERR(svn_io_file_seek(rev_file->file, APR_SET, &offset, 1348 iterpool2)); 1349 1350 if (entry->type == SVN_FS_FS__ITEM_TYPE_CHANGES) 1351 SVN_ERR(copy_item_to_temp(context, 1352 context->changes, 1353 context->changes_file, 1354 rev_file->file, entry, 1355 iterpool2)); 1356 else if (entry->type == SVN_FS_FS__ITEM_TYPE_FILE_PROPS) 1357 SVN_ERR(copy_item_to_temp(context, 1358 context->file_props, 1359 context->file_props_file, 1360 rev_file->file, entry, 1361 iterpool2)); 1362 else if (entry->type == SVN_FS_FS__ITEM_TYPE_DIR_PROPS) 1363 SVN_ERR(copy_item_to_temp(context, 1364 context->dir_props, 1365 context->dir_props_file, 1366 rev_file->file, entry, 1367 iterpool2)); 1368 else if ( entry->type == SVN_FS_FS__ITEM_TYPE_FILE_REP 1369 || entry->type == SVN_FS_FS__ITEM_TYPE_DIR_REP) 1370 SVN_ERR(copy_rep_to_temp(context, rev_file->file, entry, 1371 iterpool2)); 1372 else if (entry->type == SVN_FS_FS__ITEM_TYPE_NODEREV) 1373 SVN_ERR(copy_node_to_temp(context, rev_file->file, entry, 1374 iterpool2)); 1375 else 1376 SVN_ERR_ASSERT(entry->type == SVN_FS_FS__ITEM_TYPE_UNUSED); 1377 1378 offset += entry->size; 1379 } 1380 } 1381 1382 if (context->cancel_func) 1383 SVN_ERR(context->cancel_func(context->cancel_baton)); 1384 } 1385 } 1386 1387 svn_pool_destroy(iterpool2); 1388 svn_pool_destroy(iterpool); 1389 1390 /* phase 3: placement. 1391 * Use "newest first" placement for simple items. */ 1392 sort_items(context->changes); 1393 sort_items(context->file_props); 1394 sort_items(context->dir_props); 1395 1396 /* follow dependencies recursively for noderevs and data representations */ 1397 sort_reps(context); 1398 1399 /* phase 4: copy bucket data to pack file. Write P2L index. */ 1400 SVN_ERR(store_items(context, context->changes_file, context->changes, 1401 revpool)); 1402 svn_pool_clear(revpool); 1403 SVN_ERR(store_items(context, context->file_props_file, context->file_props, 1404 revpool)); 1405 svn_pool_clear(revpool); 1406 SVN_ERR(store_items(context, context->dir_props_file, context->dir_props, 1407 revpool)); 1408 svn_pool_clear(revpool); 1409 SVN_ERR(copy_reps_from_temp(context, context->reps_file, revpool)); 1410 svn_pool_clear(revpool); 1411 1412 /* write L2P index as well (now that we know all target offsets) */ 1413 SVN_ERR(write_l2p_index(context, revpool)); 1414 1415 svn_pool_destroy(revpool); 1416 1417 return SVN_NO_ERROR; 1418} 1419 1420/* Append CONTEXT->START_REV to the context's pack file with no re-ordering. 1421 * This function will only be used for very large revisions (>>100k changes). 1422 * Use POOL for temporary allocations. 1423 */ 1424static svn_error_t * 1425append_revision(pack_context_t *context, 1426 apr_pool_t *pool) 1427{ 1428 fs_fs_data_t *ffd = context->fs->fsap_data; 1429 apr_off_t offset = 0; 1430 apr_pool_t *iterpool = svn_pool_create(pool); 1431 svn_fs_fs__revision_file_t *rev_file; 1432 svn_filesize_t revdata_size; 1433 1434 /* Copy all non-index contents the rev file to the end of the pack file. */ 1435 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, context->fs, 1436 context->start_rev, pool, 1437 iterpool)); 1438 1439 SVN_ERR(svn_fs_fs__auto_read_footer(rev_file)); 1440 revdata_size = rev_file->l2p_offset; 1441 1442 SVN_ERR(svn_io_file_aligned_seek(rev_file->file, ffd->block_size, NULL, 0, 1443 iterpool)); 1444 SVN_ERR(copy_file_data(context, context->pack_file, rev_file->file, 1445 revdata_size, iterpool)); 1446 1447 /* mark the start of a new revision */ 1448 SVN_ERR(svn_fs_fs__l2p_proto_index_add_revision(context->proto_l2p_index, 1449 pool)); 1450 1451 /* read the phys-to-log index file until we covered the whole rev file. 1452 * That index contains enough info to build both target indexes from it. */ 1453 while (offset < revdata_size) 1454 { 1455 /* read one cluster */ 1456 int i; 1457 apr_array_header_t *entries; 1458 1459 svn_pool_clear(iterpool); 1460 SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, context->fs, rev_file, 1461 context->start_rev, offset, 1462 ffd->p2l_page_size, iterpool, 1463 iterpool)); 1464 1465 for (i = 0; i < entries->nelts; ++i) 1466 { 1467 svn_fs_fs__p2l_entry_t *entry 1468 = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t); 1469 1470 /* skip first entry if that was duplicated due crossing a 1471 cluster boundary */ 1472 if (offset > entry->offset) 1473 continue; 1474 1475 /* process entry while inside the rev file */ 1476 offset = entry->offset; 1477 if (offset < revdata_size) 1478 { 1479 entry->offset += context->pack_offset; 1480 offset += entry->size; 1481 SVN_ERR(svn_fs_fs__l2p_proto_index_add_entry( 1482 context->proto_l2p_index, entry->offset, 1483 entry->item.number, iterpool)); 1484 SVN_ERR(svn_fs_fs__p2l_proto_index_add_entry( 1485 context->proto_p2l_index, entry, iterpool)); 1486 } 1487 } 1488 } 1489 1490 svn_pool_destroy(iterpool); 1491 context->pack_offset += revdata_size; 1492 1493 SVN_ERR(svn_fs_fs__close_revision_file(rev_file)); 1494 1495 return SVN_NO_ERROR; 1496} 1497 1498/* Logical addressing mode packing logic. 1499 * 1500 * Pack the revision shard starting at SHARD_REV in filesystem FS from 1501 * SHARD_DIR into the PACK_FILE_DIR, using POOL for allocations. Limit 1502 * the extra memory consumption to MAX_MEM bytes. CANCEL_FUNC and 1503 * CANCEL_BATON are what you think they are. 1504 */ 1505static svn_error_t * 1506pack_log_addressed(svn_fs_t *fs, 1507 const char *pack_file_dir, 1508 const char *shard_dir, 1509 svn_revnum_t shard_rev, 1510 apr_size_t max_mem, 1511 svn_cancel_func_t cancel_func, 1512 void *cancel_baton, 1513 apr_pool_t *pool) 1514{ 1515 enum 1516 { 1517 /* estimated amount of memory used to represent one item in memory 1518 * during rev file packing */ 1519 PER_ITEM_MEM = APR_ALIGN_DEFAULT(sizeof(path_order_t)) 1520 + APR_ALIGN_DEFAULT(2 *sizeof(void*)) 1521 + APR_ALIGN_DEFAULT(sizeof(reference_t)) 1522 + APR_ALIGN_DEFAULT(sizeof(svn_fs_fs__p2l_entry_t)) 1523 + 6 * sizeof(void*) 1524 }; 1525 1526 int max_items; 1527 apr_array_header_t *max_ids; 1528 pack_context_t context = { 0 }; 1529 int i; 1530 apr_size_t item_count = 0; 1531 apr_pool_t *iterpool = svn_pool_create(pool); 1532 1533 /* Prevent integer overflow. We use apr arrays to process the items so 1534 * the maximum number of items is INT_MAX. */ 1535 { 1536 apr_size_t temp = max_mem / PER_ITEM_MEM; 1537 SVN_ERR_ASSERT(temp <= INT_MAX); 1538 max_items = (int)temp; 1539 } 1540 1541 /* set up a pack context */ 1542 SVN_ERR(initialize_pack_context(&context, fs, pack_file_dir, shard_dir, 1543 shard_rev, max_items, cancel_func, 1544 cancel_baton, pool)); 1545 1546 /* phase 1: determine the size of the revisions to pack */ 1547 SVN_ERR(svn_fs_fs__l2p_get_max_ids(&max_ids, fs, shard_rev, 1548 context.shard_end_rev - shard_rev, 1549 pool, pool)); 1550 1551 /* pack revisions in ranges that don't exceed MAX_MEM */ 1552 for (i = 0; i < max_ids->nelts; ++i) 1553 if (APR_ARRAY_IDX(max_ids, i, apr_uint64_t) + item_count <= max_items) 1554 { 1555 item_count += APR_ARRAY_IDX(max_ids, i, apr_uint64_t); 1556 context.end_rev++; 1557 } 1558 else 1559 { 1560 svn_pool_clear(iterpool); 1561 1562 /* some unpacked revisions before this one? */ 1563 if (context.start_rev < context.end_rev) 1564 { 1565 /* pack them intelligently (might be just 1 rev but still ...) */ 1566 SVN_ERR(pack_range(&context, iterpool)); 1567 SVN_ERR(reset_pack_context(&context, iterpool)); 1568 item_count = 0; 1569 } 1570 1571 /* next revision range is to start with the current revision */ 1572 context.start_rev = i + context.shard_rev; 1573 context.end_rev = context.start_rev + 1; 1574 1575 /* if this is a very large revision, we must place it as is */ 1576 if (APR_ARRAY_IDX(max_ids, i, apr_uint64_t) > max_items) 1577 { 1578 SVN_ERR(append_revision(&context, iterpool)); 1579 context.start_rev++; 1580 } 1581 else 1582 item_count += (apr_size_t)APR_ARRAY_IDX(max_ids, i, apr_uint64_t); 1583 } 1584 1585 /* non-empty revision range at the end? */ 1586 if (context.start_rev < context.end_rev) 1587 SVN_ERR(pack_range(&context, iterpool)); 1588 1589 /* last phase: finalize indexes and clean up */ 1590 SVN_ERR(reset_pack_context(&context, iterpool)); 1591 SVN_ERR(close_pack_context(&context, iterpool)); 1592 svn_pool_destroy(iterpool); 1593 1594 return SVN_NO_ERROR; 1595} 1596 1597/* Given REV in FS, set *REV_OFFSET to REV's offset in the packed file. 1598 Use POOL for temporary allocations. */ 1599svn_error_t * 1600svn_fs_fs__get_packed_offset(apr_off_t *rev_offset, 1601 svn_fs_t *fs, 1602 svn_revnum_t rev, 1603 apr_pool_t *pool) 1604{ 1605 fs_fs_data_t *ffd = fs->fsap_data; 1606 svn_stream_t *manifest_stream; 1607 svn_boolean_t is_cached; 1608 svn_revnum_t shard; 1609 apr_int64_t shard_pos; 1610 apr_array_header_t *manifest; 1611 apr_pool_t *iterpool; 1612 1613 shard = rev / ffd->max_files_per_dir; 1614 1615 /* position of the shard within the manifest */ 1616 shard_pos = rev % ffd->max_files_per_dir; 1617 1618 /* fetch exactly that element into *rev_offset, if the manifest is found 1619 in the cache */ 1620 SVN_ERR(svn_cache__get_partial((void **) rev_offset, &is_cached, 1621 ffd->packed_offset_cache, &shard, 1622 svn_fs_fs__get_sharded_offset, &shard_pos, 1623 pool)); 1624 1625 if (is_cached) 1626 return SVN_NO_ERROR; 1627 1628 /* Open the manifest file. */ 1629 SVN_ERR(svn_stream_open_readonly(&manifest_stream, 1630 svn_fs_fs__path_rev_packed(fs, rev, 1631 PATH_MANIFEST, 1632 pool), 1633 pool, pool)); 1634 1635 /* While we're here, let's just read the entire manifest file into an array, 1636 so we can cache the entire thing. */ 1637 iterpool = svn_pool_create(pool); 1638 manifest = apr_array_make(pool, ffd->max_files_per_dir, sizeof(apr_off_t)); 1639 while (1) 1640 { 1641 svn_boolean_t eof; 1642 apr_int64_t val; 1643 1644 svn_pool_clear(iterpool); 1645 SVN_ERR(svn_fs_fs__read_number_from_stream(&val, &eof, manifest_stream, 1646 iterpool)); 1647 if (eof) 1648 break; 1649 1650 APR_ARRAY_PUSH(manifest, apr_off_t) = (apr_off_t)val; 1651 } 1652 svn_pool_destroy(iterpool); 1653 1654 *rev_offset = APR_ARRAY_IDX(manifest, rev % ffd->max_files_per_dir, 1655 apr_off_t); 1656 1657 /* Close up shop and cache the array. */ 1658 SVN_ERR(svn_stream_close(manifest_stream)); 1659 return svn_cache__set(ffd->packed_offset_cache, &shard, manifest, pool); 1660} 1661 1662/* Packing logic for physical addresssing mode: 1663 * Simply concatenate all revision contents. 1664 * 1665 * Pack the revision shard starting at SHARD_REV containing exactly 1666 * MAX_FILES_PER_DIR revisions from SHARD_PATH into the PACK_FILE_DIR, 1667 * using POOL for allocations. CANCEL_FUNC and CANCEL_BATON are what you 1668 * think they are. 1669 */ 1670static svn_error_t * 1671pack_phys_addressed(const char *pack_file_dir, 1672 const char *shard_path, 1673 svn_revnum_t start_rev, 1674 int max_files_per_dir, 1675 svn_cancel_func_t cancel_func, 1676 void *cancel_baton, 1677 apr_pool_t *pool) 1678{ 1679 const char *pack_file_path, *manifest_file_path; 1680 apr_file_t *pack_file; 1681 apr_file_t *manifest_file; 1682 svn_stream_t *manifest_stream; 1683 svn_revnum_t end_rev, rev; 1684 apr_off_t next_offset; 1685 apr_pool_t *iterpool; 1686 1687 /* Some useful paths. */ 1688 pack_file_path = svn_dirent_join(pack_file_dir, PATH_PACKED, pool); 1689 manifest_file_path = svn_dirent_join(pack_file_dir, PATH_MANIFEST, pool); 1690 1691 /* Create the new directory and pack file. 1692 * Use unbuffered apr_file_t since we're going to write using 16kb 1693 * chunks. */ 1694 SVN_ERR(svn_io_file_open(&pack_file, pack_file_path, 1695 APR_WRITE | APR_CREATE | APR_EXCL, 1696 APR_OS_DEFAULT, pool)); 1697 1698 /* Create the manifest file. */ 1699 SVN_ERR(svn_io_file_open(&manifest_file, manifest_file_path, 1700 APR_WRITE | APR_BUFFERED | APR_CREATE | APR_EXCL, 1701 APR_OS_DEFAULT, pool)); 1702 manifest_stream = svn_stream_from_aprfile2(manifest_file, TRUE, pool); 1703 1704 end_rev = start_rev + max_files_per_dir - 1; 1705 next_offset = 0; 1706 iterpool = svn_pool_create(pool); 1707 1708 /* Iterate over the revisions in this shard, squashing them together. */ 1709 for (rev = start_rev; rev <= end_rev; rev++) 1710 { 1711 svn_stream_t *rev_stream; 1712 apr_finfo_t finfo; 1713 const char *path; 1714 1715 svn_pool_clear(iterpool); 1716 1717 /* Get the size of the file. */ 1718 path = svn_dirent_join(shard_path, apr_psprintf(iterpool, "%ld", rev), 1719 iterpool); 1720 SVN_ERR(svn_io_stat(&finfo, path, APR_FINFO_SIZE, iterpool)); 1721 1722 /* build manifest */ 1723 SVN_ERR(svn_stream_printf(manifest_stream, iterpool, 1724 "%" APR_OFF_T_FMT "\n", next_offset)); 1725 next_offset += finfo.size; 1726 1727 /* Copy all the bits from the rev file to the end of the pack file. */ 1728 SVN_ERR(svn_stream_open_readonly(&rev_stream, path, iterpool, iterpool)); 1729 SVN_ERR(svn_stream_copy3(rev_stream, 1730 svn_stream_from_aprfile2(pack_file, TRUE, pool), 1731 cancel_func, cancel_baton, iterpool)); 1732 } 1733 1734 /* Close stream over APR file. */ 1735 SVN_ERR(svn_stream_close(manifest_stream)); 1736 1737 /* Ensure that pack file is written to disk. */ 1738 SVN_ERR(svn_io_file_flush_to_disk(manifest_file, pool)); 1739 SVN_ERR(svn_io_file_close(manifest_file, pool)); 1740 1741 /* disallow write access to the manifest file */ 1742 SVN_ERR(svn_io_set_file_read_only(manifest_file_path, FALSE, iterpool)); 1743 1744 /* Ensure that pack file is written to disk. */ 1745 SVN_ERR(svn_io_file_flush_to_disk(pack_file, pool)); 1746 SVN_ERR(svn_io_file_close(pack_file, pool)); 1747 1748 svn_pool_destroy(iterpool); 1749 1750 return SVN_NO_ERROR; 1751} 1752 1753/* In filesystem FS, pack the revision SHARD containing exactly 1754 * MAX_FILES_PER_DIR revisions from SHARD_PATH into the PACK_FILE_DIR, 1755 * using POOL for allocations. Try to limit the amount of temporary 1756 * memory needed to MAX_MEM bytes. CANCEL_FUNC and CANCEL_BATON are what 1757 * you think they are. 1758 * 1759 * If for some reason we detect a partial packing already performed, we 1760 * remove the pack file and start again. 1761 * 1762 * The actual packing will be done in a format-specific sub-function. 1763 */ 1764static svn_error_t * 1765pack_rev_shard(svn_fs_t *fs, 1766 const char *pack_file_dir, 1767 const char *shard_path, 1768 apr_int64_t shard, 1769 int max_files_per_dir, 1770 apr_size_t max_mem, 1771 svn_cancel_func_t cancel_func, 1772 void *cancel_baton, 1773 apr_pool_t *pool) 1774{ 1775 const char *pack_file_path; 1776 svn_revnum_t shard_rev = (svn_revnum_t) (shard * max_files_per_dir); 1777 1778 /* Some useful paths. */ 1779 pack_file_path = svn_dirent_join(pack_file_dir, PATH_PACKED, pool); 1780 1781 /* Remove any existing pack file for this shard, since it is incomplete. */ 1782 SVN_ERR(svn_io_remove_dir2(pack_file_dir, TRUE, cancel_func, cancel_baton, 1783 pool)); 1784 1785 /* Create the new directory and pack file. */ 1786 SVN_ERR(svn_io_dir_make(pack_file_dir, APR_OS_DEFAULT, pool)); 1787 1788 /* Index information files */ 1789 if (svn_fs_fs__use_log_addressing(fs)) 1790 SVN_ERR(pack_log_addressed(fs, pack_file_dir, shard_path, shard_rev, 1791 max_mem, cancel_func, cancel_baton, pool)); 1792 else 1793 SVN_ERR(pack_phys_addressed(pack_file_dir, shard_path, shard_rev, 1794 max_files_per_dir, cancel_func, 1795 cancel_baton, pool)); 1796 1797 SVN_ERR(svn_io_copy_perms(shard_path, pack_file_dir, pool)); 1798 SVN_ERR(svn_io_set_file_read_only(pack_file_path, FALSE, pool)); 1799 1800 return SVN_NO_ERROR; 1801} 1802 1803/* Baton struct used by pack_body(), pack_shard() and synced_pack_shard(). 1804 These calls are nested and for every level additional fields will be 1805 available. */ 1806struct pack_baton 1807{ 1808 /* Valid when entering pack_body(). */ 1809 svn_fs_t *fs; 1810 svn_fs_pack_notify_t notify_func; 1811 void *notify_baton; 1812 svn_cancel_func_t cancel_func; 1813 void *cancel_baton; 1814 size_t max_mem; 1815 1816 /* Additional entries valid when entering pack_shard(). */ 1817 const char *revs_dir; 1818 const char *revsprops_dir; 1819 apr_int64_t shard; 1820 1821 /* Additional entries valid when entering synced_pack_shard(). */ 1822 const char *rev_shard_path; 1823}; 1824 1825 1826/* Part of the pack process that requires global (write) synchronization. 1827 * We pack the revision properties of the shard described by BATON and 1828 * In the file system at FS_PATH, pack the SHARD in REVS_DIR and replace 1829 * the non-packed revprop & rev shard folder(s) with the packed ones. 1830 * The packed rev folder has been created prior to calling this function. 1831 */ 1832static svn_error_t * 1833synced_pack_shard(void *baton, 1834 apr_pool_t *pool) 1835{ 1836 struct pack_baton *pb = baton; 1837 fs_fs_data_t *ffd = pb->fs->fsap_data; 1838 const char *revprops_shard_path, *revprops_pack_file_dir; 1839 1840 /* if enabled, pack the revprops in an equivalent way */ 1841 if (pb->revsprops_dir) 1842 { 1843 revprops_pack_file_dir = svn_dirent_join(pb->revsprops_dir, 1844 apr_psprintf(pool, 1845 "%" APR_INT64_T_FMT PATH_EXT_PACKED_SHARD, 1846 pb->shard), 1847 pool); 1848 revprops_shard_path = svn_dirent_join(pb->revsprops_dir, 1849 apr_psprintf(pool, "%" APR_INT64_T_FMT, pb->shard), 1850 pool); 1851 1852 SVN_ERR(svn_fs_fs__pack_revprops_shard(revprops_pack_file_dir, 1853 revprops_shard_path, 1854 pb->shard, 1855 ffd->max_files_per_dir, 1856 (int)(0.9*ffd->revprop_pack_size), 1857 ffd->compress_packed_revprops 1858 ? SVN__COMPRESSION_ZLIB_DEFAULT 1859 : SVN__COMPRESSION_NONE, 1860 pb->cancel_func, 1861 pb->cancel_baton, 1862 pool)); 1863 } 1864 1865 /* Update the min-unpacked-rev file to reflect our newly packed shard. */ 1866 SVN_ERR(svn_fs_fs__write_min_unpacked_rev(pb->fs, 1867 (svn_revnum_t)((pb->shard + 1) * ffd->max_files_per_dir), 1868 pool)); 1869 ffd->min_unpacked_rev 1870 = (svn_revnum_t)((pb->shard + 1) * ffd->max_files_per_dir); 1871 1872 /* Finally, remove the existing shard directories. 1873 * For revprops, clean up older obsolete shards as well as they might 1874 * have been left over from an interrupted FS upgrade. */ 1875 SVN_ERR(svn_io_remove_dir2(pb->rev_shard_path, TRUE, 1876 pb->cancel_func, pb->cancel_baton, pool)); 1877 if (pb->revsprops_dir) 1878 { 1879 svn_node_kind_t kind = svn_node_dir; 1880 apr_int64_t to_cleanup = pb->shard; 1881 do 1882 { 1883 SVN_ERR(svn_fs_fs__delete_revprops_shard(revprops_shard_path, 1884 to_cleanup, 1885 ffd->max_files_per_dir, 1886 pb->cancel_func, 1887 pb->cancel_baton, 1888 pool)); 1889 1890 /* If the previous shard exists, clean it up as well. 1891 Don't try to clean up shard 0 as it we can't tell quickly 1892 whether it actually needs cleaning up. */ 1893 revprops_shard_path = svn_dirent_join(pb->revsprops_dir, 1894 apr_psprintf(pool, "%" APR_INT64_T_FMT, --to_cleanup), 1895 pool); 1896 SVN_ERR(svn_io_check_path(revprops_shard_path, &kind, pool)); 1897 } 1898 while (kind == svn_node_dir && to_cleanup > 0); 1899 } 1900 1901 return SVN_NO_ERROR; 1902} 1903 1904/* Pack the shard described by BATON. 1905 * 1906 * If for some reason we detect a partial packing already performed, 1907 * we remove the pack file and start again. 1908 */ 1909static svn_error_t * 1910pack_shard(struct pack_baton *baton, 1911 apr_pool_t *pool) 1912{ 1913 fs_fs_data_t *ffd = baton->fs->fsap_data; 1914 const char *rev_pack_file_dir; 1915 1916 /* Notify caller we're starting to pack this shard. */ 1917 if (baton->notify_func) 1918 SVN_ERR(baton->notify_func(baton->notify_baton, baton->shard, 1919 svn_fs_pack_notify_start, pool)); 1920 1921 /* Some useful paths. */ 1922 rev_pack_file_dir = svn_dirent_join(baton->revs_dir, 1923 apr_psprintf(pool, 1924 "%" APR_INT64_T_FMT PATH_EXT_PACKED_SHARD, 1925 baton->shard), 1926 pool); 1927 baton->rev_shard_path = svn_dirent_join(baton->revs_dir, 1928 apr_psprintf(pool, 1929 "%" APR_INT64_T_FMT, 1930 baton->shard), 1931 pool); 1932 1933 /* pack the revision content */ 1934 SVN_ERR(pack_rev_shard(baton->fs, rev_pack_file_dir, baton->rev_shard_path, 1935 baton->shard, ffd->max_files_per_dir, 1936 baton->max_mem, baton->cancel_func, 1937 baton->cancel_baton, pool)); 1938 1939 /* For newer repo formats, we only acquired the pack lock so far. 1940 Before modifying the repo state by switching over to the packed 1941 data, we need to acquire the global (write) lock. */ 1942 if (ffd->format >= SVN_FS_FS__MIN_PACK_LOCK_FORMAT) 1943 SVN_ERR(svn_fs_fs__with_write_lock(baton->fs, synced_pack_shard, baton, 1944 pool)); 1945 else 1946 SVN_ERR(synced_pack_shard(baton, pool)); 1947 1948 /* Notify caller we're starting to pack this shard. */ 1949 if (baton->notify_func) 1950 SVN_ERR(baton->notify_func(baton->notify_baton, baton->shard, 1951 svn_fs_pack_notify_end, pool)); 1952 1953 return SVN_NO_ERROR; 1954} 1955 1956/* The work-horse for svn_fs_fs__pack, called with the FS write lock. 1957 This implements the svn_fs_fs__with_write_lock() 'body' callback 1958 type. BATON is a 'struct pack_baton *'. 1959 1960 WARNING: if you add a call to this function, please note: 1961 The code currently assumes that any piece of code running with 1962 the write-lock set can rely on the ffd->min_unpacked_rev and 1963 ffd->min_unpacked_revprop caches to be up-to-date (and, by 1964 extension, on not having to use a retry when calling 1965 svn_fs_fs__path_rev_absolute() and friends). If you add a call 1966 to this function, consider whether you have to call 1967 svn_fs_fs__update_min_unpacked_rev(). 1968 See this thread: http://thread.gmane.org/1291206765.3782.3309.camel@edith 1969 */ 1970static svn_error_t * 1971pack_body(void *baton, 1972 apr_pool_t *pool) 1973{ 1974 struct pack_baton *pb = baton; 1975 fs_fs_data_t *ffd = pb->fs->fsap_data; 1976 apr_int64_t completed_shards; 1977 svn_revnum_t youngest; 1978 apr_pool_t *iterpool; 1979 1980 /* If the repository isn't a new enough format, we don't support packing. 1981 Return a friendly error to that effect. */ 1982 if (ffd->format < SVN_FS_FS__MIN_PACKED_FORMAT) 1983 return svn_error_createf(SVN_ERR_UNSUPPORTED_FEATURE, NULL, 1984 _("FSFS format (%d) too old to pack; please upgrade the filesystem."), 1985 ffd->format); 1986 1987 /* If we aren't using sharding, we can't do any packing, so quit. */ 1988 if (!ffd->max_files_per_dir) 1989 return SVN_NO_ERROR; 1990 1991 SVN_ERR(svn_fs_fs__read_min_unpacked_rev(&ffd->min_unpacked_rev, pb->fs, 1992 pool)); 1993 1994 SVN_ERR(svn_fs_fs__youngest_rev(&youngest, pb->fs, pool)); 1995 completed_shards = (youngest + 1) / ffd->max_files_per_dir; 1996 1997 /* See if we've already completed all possible shards thus far. */ 1998 if (ffd->min_unpacked_rev == (completed_shards * ffd->max_files_per_dir)) 1999 return SVN_NO_ERROR; 2000 2001 pb->revs_dir = svn_dirent_join(pb->fs->path, PATH_REVS_DIR, pool); 2002 if (ffd->format >= SVN_FS_FS__MIN_PACKED_REVPROP_FORMAT) 2003 pb->revsprops_dir = svn_dirent_join(pb->fs->path, PATH_REVPROPS_DIR, 2004 pool); 2005 2006 iterpool = svn_pool_create(pool); 2007 for (pb->shard = ffd->min_unpacked_rev / ffd->max_files_per_dir; 2008 pb->shard < completed_shards; 2009 pb->shard++) 2010 { 2011 svn_pool_clear(iterpool); 2012 2013 if (pb->cancel_func) 2014 SVN_ERR(pb->cancel_func(pb->cancel_baton)); 2015 2016 SVN_ERR(pack_shard(pb, iterpool)); 2017 } 2018 2019 svn_pool_destroy(iterpool); 2020 return SVN_NO_ERROR; 2021} 2022 2023svn_error_t * 2024svn_fs_fs__pack(svn_fs_t *fs, 2025 apr_size_t max_mem, 2026 svn_fs_pack_notify_t notify_func, 2027 void *notify_baton, 2028 svn_cancel_func_t cancel_func, 2029 void *cancel_baton, 2030 apr_pool_t *pool) 2031{ 2032 struct pack_baton pb = { 0 }; 2033 fs_fs_data_t *ffd = fs->fsap_data; 2034 svn_error_t *err; 2035 2036 pb.fs = fs; 2037 pb.notify_func = notify_func; 2038 pb.notify_baton = notify_baton; 2039 pb.cancel_func = cancel_func; 2040 pb.cancel_baton = cancel_baton; 2041 pb.max_mem = max_mem ? max_mem : DEFAULT_MAX_MEM; 2042 2043 if (ffd->format >= SVN_FS_FS__MIN_PACK_LOCK_FORMAT) 2044 { 2045 /* Newer repositories provide a pack operation specific lock. 2046 Acquire it to prevent concurrent packs. 2047 2048 Since the file lock's lifetime is bound to a pool, we create a 2049 separate subpool here to release the lock immediately after the 2050 operation finished. 2051 */ 2052 err = svn_fs_fs__with_pack_lock(fs, pack_body, &pb, pool); 2053 } 2054 else 2055 { 2056 /* Use the global write lock for older repos. */ 2057 err = svn_fs_fs__with_write_lock(fs, pack_body, &pb, pool); 2058 } 2059 2060 return svn_error_trace(err); 2061} 2062