1/* pack.c --- FSX shard packing functionality 2 * 3 * ==================================================================== 4 * Licensed to the Apache Software Foundation (ASF) under one 5 * or more contributor license agreements. See the NOTICE file 6 * distributed with this work for additional information 7 * regarding copyright ownership. The ASF licenses this file 8 * to you under the Apache License, Version 2.0 (the 9 * "License"); you may not use this file except in compliance 10 * with the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, 15 * software distributed under the License is distributed on an 16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 * KIND, either express or implied. See the License for the 18 * specific language governing permissions and limitations 19 * under the License. 20 * ==================================================================== 21 */ 22#include <assert.h> 23 24#include "svn_pools.h" 25#include "svn_dirent_uri.h" 26#include "svn_sorts.h" 27#include "private/svn_sorts_private.h" 28#include "private/svn_subr_private.h" 29#include "private/svn_string_private.h" 30#include "private/svn_temp_serializer.h" 31 32#include "fs_x.h" 33#include "pack.h" 34#include "util.h" 35#include "revprops.h" 36#include "transaction.h" 37#include "index.h" 38#include "low_level.h" 39#include "cached_data.h" 40#include "changes.h" 41#include "noderevs.h" 42#include "reps.h" 43 44#include "../libsvn_fs/fs-loader.h" 45 46#include "svn_private_config.h" 47#include "temp_serializer.h" 48 49/* Packing logic: 50 * 51 * We pack files on a pack file basis (e.g. 1000 revs) without changing 52 * existing pack files nor the revision files outside the range to pack. 53 * 54 * First, we will scan the revision file indexes to determine the number 55 * of items to "place" (i.e. determine their optimal position within the 56 * future pack file). For each item, we will need a constant amount of 57 * memory to track it. A MAX_MEM parameter sets a limit to the number of 58 * items we may place in one go. That means, we may not be able to add 59 * all revisions at once. Instead, we will run the placement for a subset 60 * of revisions at a time. The very unlikely worst case will simply append 61 * all revision data with just a little reshuffling inside each revision. 62 * 63 * In a second step, we read all revisions in the selected range, build 64 * the item tracking information and copy the items themselves from the 65 * revision files to temporary files. The latter serve as buckets for a 66 * very coarse bucket presort: Separate change lists, file properties, 67 * directory properties and noderevs + representations from one another. 68 * 69 * The third step will determine an optimized placement for the items in 70 * each of the 4 buckets separately. The first three will simply order 71 * their items by revision, starting with the newest once. Placing rep 72 * and noderev items is a more elaborate process documented in the code. 73 * 74 * In short, we store items in the following order: 75 * - changed paths lists 76 * - node property 77 * - directory properties 78 * - directory representations corresponding noderevs, lexical path order 79 * with special treatment of "trunk" and "branches" 80 * - same for file representations 81 * 82 * Step 4 copies the items from the temporary buckets into the final 83 * pack file and writes the temporary index files. 84 * 85 * Finally, after the last range of revisions, create the final indexes. 86 */ 87 88/* Maximum amount of memory we allocate for placement information during 89 * the pack process. 90 */ 91#define DEFAULT_MAX_MEM (64 * 1024 * 1024) 92 93/* Data structure describing a node change at PATH, REVISION. 94 * We will sort these instances by PATH and NODE_ID such that we can combine 95 * similar nodes in the same reps container and store containers in path 96 * major order. 97 */ 98typedef struct path_order_t 99{ 100 /* changed path */ 101 svn_prefix_string__t *path; 102 103 /* node ID for this PATH in REVISION */ 104 svn_fs_x__id_t node_id; 105 106 /* when this change happened */ 107 svn_revnum_t revision; 108 109 /* length of the expanded representation content */ 110 apr_int64_t expanded_size; 111 112 /* item ID of the noderev linked to the change. May be (0, 0). */ 113 svn_fs_x__id_t noderev_id; 114 115 /* item ID of the representation containing the new data. May be (0, 0). */ 116 svn_fs_x__id_t rep_id; 117} path_order_t; 118 119/* Represents a reference from item FROM to item TO. FROM may be a noderev 120 * or rep_id while TO is (currently) always a representation. We will sort 121 * them by TO which allows us to collect all dependent items. 122 */ 123typedef struct reference_t 124{ 125 svn_fs_x__id_t to; 126 svn_fs_x__id_t from; 127} reference_t; 128 129/* This structure keeps track of all the temporary data and status that 130 * needs to be kept around during the creation of one pack file. After 131 * each revision range (in case we can't process all revs at once due to 132 * memory restrictions), parts of the data will get re-initialized. 133 */ 134typedef struct pack_context_t 135{ 136 /* file system that we operate on */ 137 svn_fs_t *fs; 138 139 /* cancel function to invoke at regular intervals. May be NULL */ 140 svn_cancel_func_t cancel_func; 141 142 /* baton to pass to CANCEL_FUNC */ 143 void *cancel_baton; 144 145 /* first revision in the shard (and future pack file) */ 146 svn_revnum_t shard_rev; 147 148 /* first revision in the range to process (>= SHARD_REV) */ 149 svn_revnum_t start_rev; 150 151 /* first revision after the range to process (<= SHARD_END_REV) */ 152 svn_revnum_t end_rev; 153 154 /* first revision after the current shard */ 155 svn_revnum_t shard_end_rev; 156 157 /* log-to-phys proto index for the whole pack file */ 158 apr_file_t *proto_l2p_index; 159 160 /* phys-to-log proto index for the whole pack file */ 161 apr_file_t *proto_p2l_index; 162 163 /* full shard directory path (containing the unpacked revisions) */ 164 const char *shard_dir; 165 166 /* full packed shard directory path (containing the pack file + indexes) */ 167 const char *pack_file_dir; 168 169 /* full pack file path (including PACK_FILE_DIR) */ 170 const char *pack_file_path; 171 172 /* current write position (i.e. file length) in the pack file */ 173 apr_off_t pack_offset; 174 175 /* the pack file to ultimately write all data to */ 176 apr_file_t *pack_file; 177 178 /* array of svn_fs_x__p2l_entry_t *, all referring to change lists. 179 * Will be filled in phase 2 and be cleared after each revision range. */ 180 apr_array_header_t *changes; 181 182 /* temp file receiving all change list items (referenced by CHANGES). 183 * Will be filled in phase 2 and be cleared after each revision range. */ 184 apr_file_t *changes_file; 185 186 /* array of svn_fs_x__p2l_entry_t *, all referring to file properties. 187 * Will be filled in phase 2 and be cleared after each revision range. */ 188 apr_array_header_t *file_props; 189 190 /* temp file receiving all file prop items (referenced by FILE_PROPS). 191 * Will be filled in phase 2 and be cleared after each revision range.*/ 192 apr_file_t *file_props_file; 193 194 /* array of svn_fs_x__p2l_entry_t *, all referring to directory properties. 195 * Will be filled in phase 2 and be cleared after each revision range. */ 196 apr_array_header_t *dir_props; 197 198 /* temp file receiving all directory prop items (referenced by DIR_PROPS). 199 * Will be filled in phase 2 and be cleared after each revision range.*/ 200 apr_file_t *dir_props_file; 201 202 /* container for all PATH members in PATH_ORDER. */ 203 svn_prefix_tree__t *paths; 204 205 /* array of path_order_t *. Will be filled in phase 2 and be cleared 206 * after each revision range. Sorted by PATH, NODE_ID. */ 207 apr_array_header_t *path_order; 208 209 /* array of reference_t* linking representations to their delta bases. 210 * Will be filled in phase 2 and be cleared after each revision range. 211 * It will be sorted by the FROM members (for rep->base rep lookup). */ 212 apr_array_header_t *references; 213 214 /* array of svn_fs_x__p2l_entry_t*. Will be filled in phase 2 and be 215 * cleared after each revision range. During phase 3, we will set items 216 * to NULL that we already processed. */ 217 apr_array_header_t *reps; 218 219 /* array of int, marking for each revision, at which offset their items 220 * begin in REPS. Will be filled in phase 2 and be cleared after 221 * each revision range. */ 222 apr_array_header_t *rev_offsets; 223 224 /* temp file receiving all items referenced by REPS. 225 * Will be filled in phase 2 and be cleared after each revision range.*/ 226 apr_file_t *reps_file; 227 228 /* pool used for temporary data structures that will be cleaned up when 229 * the next range of revisions is being processed */ 230 apr_pool_t *info_pool; 231} pack_context_t; 232 233/* Create and initialize a new pack context for packing shard SHARD_REV in 234 * SHARD_DIR into PACK_FILE_DIR within filesystem FS. Allocate it in POOL 235 * and return the structure in *CONTEXT. 236 * 237 * Limit the number of items being copied per iteration to MAX_ITEMS. 238 * Set CANCEL_FUNC and CANCEL_BATON as well. 239 */ 240static svn_error_t * 241initialize_pack_context(pack_context_t *context, 242 svn_fs_t *fs, 243 const char *pack_file_dir, 244 const char *shard_dir, 245 svn_revnum_t shard_rev, 246 int max_items, 247 svn_fs_x__batch_fsync_t *batch, 248 svn_cancel_func_t cancel_func, 249 void *cancel_baton, 250 apr_pool_t *pool) 251{ 252 svn_fs_x__data_t *ffd = fs->fsap_data; 253 const char *temp_dir; 254 int max_revs = MIN(ffd->max_files_per_dir, max_items); 255 256 SVN_ERR_ASSERT(shard_rev % ffd->max_files_per_dir == 0); 257 258 /* where we will place our various temp files */ 259 SVN_ERR(svn_io_temp_dir(&temp_dir, pool)); 260 261 /* store parameters */ 262 context->fs = fs; 263 context->cancel_func = cancel_func; 264 context->cancel_baton = cancel_baton; 265 266 context->shard_rev = shard_rev; 267 context->start_rev = shard_rev; 268 context->end_rev = shard_rev; 269 context->shard_end_rev = shard_rev + ffd->max_files_per_dir; 270 271 /* Create the new directory and pack file. */ 272 context->shard_dir = shard_dir; 273 context->pack_file_dir = pack_file_dir; 274 context->pack_file_path 275 = svn_dirent_join(pack_file_dir, PATH_PACKED, pool); 276 277 SVN_ERR(svn_fs_x__batch_fsync_open_file(&context->pack_file, batch, 278 context->pack_file_path, pool)); 279 280 /* Proto index files */ 281 SVN_ERR(svn_fs_x__l2p_proto_index_open( 282 &context->proto_l2p_index, 283 svn_dirent_join(pack_file_dir, 284 PATH_INDEX PATH_EXT_L2P_INDEX, 285 pool), 286 pool)); 287 SVN_ERR(svn_fs_x__p2l_proto_index_open( 288 &context->proto_p2l_index, 289 svn_dirent_join(pack_file_dir, 290 PATH_INDEX PATH_EXT_P2L_INDEX, 291 pool), 292 pool)); 293 294 /* item buckets: one item info array and one temp file per bucket */ 295 context->changes = apr_array_make(pool, max_items, 296 sizeof(svn_fs_x__p2l_entry_t *)); 297 SVN_ERR(svn_io_open_unique_file3(&context->changes_file, NULL, temp_dir, 298 svn_io_file_del_on_close, pool, pool)); 299 context->file_props = apr_array_make(pool, max_items, 300 sizeof(svn_fs_x__p2l_entry_t *)); 301 SVN_ERR(svn_io_open_unique_file3(&context->file_props_file, NULL, temp_dir, 302 svn_io_file_del_on_close, pool, pool)); 303 context->dir_props = apr_array_make(pool, max_items, 304 sizeof(svn_fs_x__p2l_entry_t *)); 305 SVN_ERR(svn_io_open_unique_file3(&context->dir_props_file, NULL, temp_dir, 306 svn_io_file_del_on_close, pool, pool)); 307 308 /* noderev and representation item bucket */ 309 context->rev_offsets = apr_array_make(pool, max_revs, sizeof(int)); 310 context->path_order = apr_array_make(pool, max_items, 311 sizeof(path_order_t *)); 312 context->references = apr_array_make(pool, max_items, 313 sizeof(reference_t *)); 314 context->reps = apr_array_make(pool, max_items, 315 sizeof(svn_fs_x__p2l_entry_t *)); 316 SVN_ERR(svn_io_open_unique_file3(&context->reps_file, NULL, temp_dir, 317 svn_io_file_del_on_close, pool, pool)); 318 319 /* the pool used for temp structures */ 320 context->info_pool = svn_pool_create(pool); 321 context->paths = svn_prefix_tree__create(context->info_pool); 322 323 return SVN_NO_ERROR; 324} 325 326/* Clean up / free all revision range specific data and files in CONTEXT. 327 * Use SCRATCH_POOL for temporary allocations. 328 */ 329static svn_error_t * 330reset_pack_context(pack_context_t *context, 331 apr_pool_t *scratch_pool) 332{ 333 apr_array_clear(context->changes); 334 SVN_ERR(svn_io_file_trunc(context->changes_file, 0, scratch_pool)); 335 apr_array_clear(context->file_props); 336 SVN_ERR(svn_io_file_trunc(context->file_props_file, 0, scratch_pool)); 337 apr_array_clear(context->dir_props); 338 SVN_ERR(svn_io_file_trunc(context->dir_props_file, 0, scratch_pool)); 339 340 apr_array_clear(context->rev_offsets); 341 apr_array_clear(context->path_order); 342 apr_array_clear(context->references); 343 apr_array_clear(context->reps); 344 SVN_ERR(svn_io_file_trunc(context->reps_file, 0, scratch_pool)); 345 346 svn_pool_clear(context->info_pool); 347 context->paths = svn_prefix_tree__create(context->info_pool); 348 349 return SVN_NO_ERROR; 350} 351 352/* Call this after the last revision range. It will finalize all index files 353 * for CONTEXT and close any open files. 354 * Use SCRATCH_POOL for temporary allocations. 355 */ 356static svn_error_t * 357close_pack_context(pack_context_t *context, 358 apr_pool_t *scratch_pool) 359{ 360 const char *proto_l2p_index_path; 361 const char *proto_p2l_index_path; 362 363 /* need the file names for the actual index creation call further down */ 364 SVN_ERR(svn_io_file_name_get(&proto_l2p_index_path, 365 context->proto_l2p_index, scratch_pool)); 366 SVN_ERR(svn_io_file_name_get(&proto_p2l_index_path, 367 context->proto_p2l_index, scratch_pool)); 368 369 /* finalize proto index files */ 370 SVN_ERR(svn_io_file_close(context->proto_l2p_index, scratch_pool)); 371 SVN_ERR(svn_io_file_close(context->proto_p2l_index, scratch_pool)); 372 373 /* Append the actual index data to the pack file. */ 374 SVN_ERR(svn_fs_x__add_index_data(context->fs, context->pack_file, 375 proto_l2p_index_path, 376 proto_p2l_index_path, 377 context->shard_rev, 378 scratch_pool)); 379 380 /* remove proto index files */ 381 SVN_ERR(svn_io_remove_file2(proto_l2p_index_path, FALSE, scratch_pool)); 382 SVN_ERR(svn_io_remove_file2(proto_p2l_index_path, FALSE, scratch_pool)); 383 384 return SVN_NO_ERROR; 385} 386 387/* Efficiently copy SIZE bytes from SOURCE to DEST. Invoke the CANCEL_FUNC 388 * from CONTEXT at regular intervals. 389 * Use SCRATCH_POOL for temporary allocations. 390 */ 391static svn_error_t * 392copy_file_data(pack_context_t *context, 393 apr_file_t *dest, 394 apr_file_t *source, 395 svn_filesize_t size, 396 apr_pool_t *scratch_pool) 397{ 398 /* most non-representation items will be small. Minimize the buffer 399 * and infrastructure overhead in that case. */ 400 enum { STACK_BUFFER_SIZE = 1024 }; 401 402 if (size < STACK_BUFFER_SIZE) 403 { 404 /* copy small data using a fixed-size buffer on stack */ 405 char buffer[STACK_BUFFER_SIZE]; 406 SVN_ERR(svn_io_file_read_full2(source, buffer, (apr_size_t)size, 407 NULL, NULL, scratch_pool)); 408 SVN_ERR(svn_io_file_write_full(dest, buffer, (apr_size_t)size, 409 NULL, scratch_pool)); 410 } 411 else 412 { 413 /* use streaming copies for larger data blocks. That may require 414 * the allocation of larger buffers and we should make sure that 415 * this extra memory is released asap. */ 416 svn_fs_x__data_t *ffd = context->fs->fsap_data; 417 apr_pool_t *copypool = svn_pool_create(scratch_pool); 418 char *buffer = apr_palloc(copypool, ffd->block_size); 419 420 while (size) 421 { 422 apr_size_t to_copy = (apr_size_t)(MIN(size, ffd->block_size)); 423 if (context->cancel_func) 424 SVN_ERR(context->cancel_func(context->cancel_baton)); 425 426 SVN_ERR(svn_io_file_read_full2(source, buffer, to_copy, 427 NULL, NULL, scratch_pool)); 428 SVN_ERR(svn_io_file_write_full(dest, buffer, to_copy, 429 NULL, scratch_pool)); 430 431 size -= to_copy; 432 } 433 434 svn_pool_destroy(copypool); 435 } 436 437 return SVN_NO_ERROR; 438} 439 440/* Writes SIZE bytes, all 0, to DEST. 441 * Use SCRATCH_POOL for temporary allocations. 442 */ 443static svn_error_t * 444write_null_bytes(apr_file_t *dest, 445 apr_off_t size, 446 apr_pool_t *scratch_pool) 447{ 448 /* Have a collection of high-quality, easy to access NUL bytes handy. */ 449 enum { BUFFER_SIZE = 1024 }; 450 static const char buffer[BUFFER_SIZE] = { 0 }; 451 452 /* copy SIZE of them into the file's buffer */ 453 while (size) 454 { 455 apr_size_t to_write = MIN(size, BUFFER_SIZE); 456 SVN_ERR(svn_io_file_write_full(dest, buffer, to_write, NULL, 457 scratch_pool)); 458 size -= to_write; 459 } 460 461 return SVN_NO_ERROR; 462} 463 464/* Copy the "simple" item (changed paths list or property representation) 465 * from the current position in REV_FILE to TEMP_FILE using CONTEXT. Add 466 * a copy of ENTRY to ENTRIES but with an updated offset value that points 467 * to the copy destination in TEMP_FILE. 468 * Use SCRATCH_POOL for temporary allocations. 469 */ 470static svn_error_t * 471copy_item_to_temp(pack_context_t *context, 472 apr_array_header_t *entries, 473 apr_file_t *temp_file, 474 svn_fs_x__revision_file_t *rev_file, 475 svn_fs_x__p2l_entry_t *entry, 476 apr_pool_t *scratch_pool) 477{ 478 apr_file_t *file; 479 svn_fs_x__p2l_entry_t *new_entry 480 = svn_fs_x__p2l_entry_dup(entry, context->info_pool); 481 482 SVN_ERR(svn_io_file_get_offset(&new_entry->offset, temp_file, 483 scratch_pool)); 484 APR_ARRAY_PUSH(entries, svn_fs_x__p2l_entry_t *) = new_entry; 485 486 SVN_ERR(svn_fs_x__rev_file_get(&file, rev_file)); 487 SVN_ERR(copy_file_data(context, temp_file, file, entry->size, 488 scratch_pool)); 489 490 return SVN_NO_ERROR; 491} 492 493/* Return the offset within CONTEXT->REPS that corresponds to item 494 * ITEM_INDEX in REVISION. 495 */ 496static int 497get_item_array_index(pack_context_t *context, 498 svn_revnum_t revision, 499 apr_int64_t item_index) 500{ 501 assert(revision >= context->start_rev); 502 return (int)item_index + APR_ARRAY_IDX(context->rev_offsets, 503 revision - context->start_rev, 504 int); 505} 506 507/* Write INFO to the correct position in CONTEXT->REPS. The latter may 508 * need auto-expanding. Overwriting an array element is not allowed. 509 */ 510static void 511add_item_rep_mapping(pack_context_t *context, 512 svn_fs_x__p2l_entry_t *entry) 513{ 514 int idx; 515 assert(entry->item_count == 1); 516 517 /* index of INFO */ 518 idx = get_item_array_index(context, 519 entry->items[0].change_set, 520 entry->items[0].number); 521 522 /* make sure the index exists in the array */ 523 while (context->reps->nelts <= idx) 524 APR_ARRAY_PUSH(context->reps, void *) = NULL; 525 526 /* set the element. If there is already an entry, there are probably 527 * two items claiming to be the same -> bail out */ 528 assert(!APR_ARRAY_IDX(context->reps, idx, void *)); 529 APR_ARRAY_IDX(context->reps, idx, void *) = entry; 530} 531 532/* Return the P2L entry from CONTEXT->REPS for the given ID. If there is 533 * none (or not anymore), return NULL. If RESET has been specified, set 534 * the array entry to NULL after returning the entry. 535 */ 536static svn_fs_x__p2l_entry_t * 537get_item(pack_context_t *context, 538 const svn_fs_x__id_t *id, 539 svn_boolean_t reset) 540{ 541 svn_fs_x__p2l_entry_t *result = NULL; 542 svn_revnum_t revision = svn_fs_x__get_revnum(id->change_set); 543 if (id->number && revision >= context->start_rev) 544 { 545 int idx = get_item_array_index(context, revision, id->number); 546 if (context->reps->nelts > idx) 547 { 548 result = APR_ARRAY_IDX(context->reps, idx, void *); 549 if (result && reset) 550 APR_ARRAY_IDX(context->reps, idx, void *) = NULL; 551 } 552 } 553 554 return result; 555} 556 557/* Copy representation item identified by ENTRY from the current position 558 * in REV_FILE into CONTEXT->REPS_FILE. Add all tracking into needed by 559 * our placement algorithm to CONTEXT. 560 * Use SCRATCH_POOL for temporary allocations. 561 */ 562static svn_error_t * 563copy_rep_to_temp(pack_context_t *context, 564 svn_fs_x__revision_file_t *rev_file, 565 svn_fs_x__p2l_entry_t *entry, 566 apr_pool_t *scratch_pool) 567{ 568 svn_fs_x__rep_header_t *rep_header; 569 svn_stream_t *stream; 570 apr_file_t *file; 571 apr_off_t source_offset = entry->offset; 572 573 /* create a copy of ENTRY, make it point to the copy destination and 574 * store it in CONTEXT */ 575 entry = svn_fs_x__p2l_entry_dup(entry, context->info_pool); 576 SVN_ERR(svn_io_file_get_offset(&entry->offset, context->reps_file, 577 scratch_pool)); 578 add_item_rep_mapping(context, entry); 579 580 /* read & parse the representation header */ 581 SVN_ERR(svn_fs_x__rev_file_stream(&stream, rev_file)); 582 SVN_ERR(svn_fs_x__read_rep_header(&rep_header, stream, 583 scratch_pool, scratch_pool)); 584 585 /* if the representation is a delta against some other rep, link the two */ 586 if ( rep_header->type == svn_fs_x__rep_delta 587 && rep_header->base_revision >= context->start_rev) 588 { 589 reference_t *reference = apr_pcalloc(context->info_pool, 590 sizeof(*reference)); 591 reference->from = entry->items[0]; 592 reference->to.change_set 593 = svn_fs_x__change_set_by_rev(rep_header->base_revision); 594 reference->to.number = rep_header->base_item_index; 595 APR_ARRAY_PUSH(context->references, reference_t *) = reference; 596 } 597 598 /* copy the whole rep (including header!) to our temp file */ 599 SVN_ERR(svn_fs_x__rev_file_seek(rev_file, NULL, source_offset)); 600 SVN_ERR(svn_fs_x__rev_file_get(&file, rev_file)); 601 SVN_ERR(copy_file_data(context, context->reps_file, file, entry->size, 602 scratch_pool)); 603 604 return SVN_NO_ERROR; 605} 606 607/* Directories first, dirs / files sorted by name in reverse lexical order. 608 * This maximizes the chance of two items being located close to one another 609 * in *all* pack files independent of their change order. It also groups 610 * multi-project repos nicely according to their sub-projects. The reverse 611 * order aspect gives "trunk" preference over "tags" and "branches", so 612 * trunk-related items are more likely to be contiguous. 613 */ 614static int 615compare_dir_entries(const svn_sort__item_t *a, 616 const svn_sort__item_t *b) 617{ 618 const svn_fs_dirent_t *lhs = (const svn_fs_dirent_t *) a->value; 619 const svn_fs_dirent_t *rhs = (const svn_fs_dirent_t *) b->value; 620 621 return strcmp(lhs->name, rhs->name); 622} 623 624apr_array_header_t * 625svn_fs_x__order_dir_entries(svn_fs_t *fs, 626 apr_hash_t *directory, 627 apr_pool_t *result_pool, 628 apr_pool_t *scratch_pool) 629{ 630 apr_array_header_t *ordered 631 = svn_sort__hash(directory, compare_dir_entries, scratch_pool); 632 633 apr_array_header_t *result 634 = apr_array_make(result_pool, ordered->nelts, sizeof(svn_fs_dirent_t *)); 635 636 int i; 637 for (i = 0; i < ordered->nelts; ++i) 638 APR_ARRAY_PUSH(result, svn_fs_dirent_t *) 639 = APR_ARRAY_IDX(ordered, i, svn_sort__item_t).value; 640 641 return result; 642} 643 644/* Return a duplicate of the ORIGINAL path and with special sub-strings 645 * (e.g. "trunk") modified in such a way that have a lower lexicographic 646 * value than any other "normal" file name. 647 */ 648static const char * 649tweak_path_for_ordering(const char *original, 650 apr_pool_t *pool) 651{ 652 /* We may add further special cases as needed. */ 653 enum {SPECIAL_COUNT = 2}; 654 static const char *special[SPECIAL_COUNT] = {"trunk", "branch"}; 655 char *pos; 656 char *path = apr_pstrdup(pool, original); 657 int i; 658 659 /* Replace the first char of any "special" sub-string we find by 660 * a control char, i.e. '\1' .. '\31'. In the rare event that this 661 * would clash with existing paths, no data will be lost but merely 662 * the node ordering will be sub-optimal. 663 */ 664 for (i = 0; i < SPECIAL_COUNT; ++i) 665 for (pos = strstr(path, special[i]); 666 pos; 667 pos = strstr(pos + 1, special[i])) 668 { 669 *pos = (char)(i + '\1'); 670 } 671 672 return path; 673} 674 675/* Copy node revision item identified by ENTRY from the current position 676 * in REV_FILE into CONTEXT->REPS_FILE. Add all tracking into needed by 677 * our placement algorithm to CONTEXT. 678 * Use SCRATCH_POOL for temporary allocations. 679 */ 680static svn_error_t * 681copy_node_to_temp(pack_context_t *context, 682 svn_fs_x__revision_file_t *rev_file, 683 svn_fs_x__p2l_entry_t *entry, 684 apr_pool_t *scratch_pool) 685{ 686 path_order_t *path_order = apr_pcalloc(context->info_pool, 687 sizeof(*path_order)); 688 svn_fs_x__noderev_t *noderev; 689 svn_stream_t *stream; 690 apr_file_t *file; 691 const char *sort_path; 692 apr_off_t source_offset = entry->offset; 693 694 /* read & parse noderev */ 695 SVN_ERR(svn_fs_x__rev_file_stream(&stream, rev_file)); 696 SVN_ERR(svn_fs_x__read_noderev(&noderev, stream, scratch_pool, 697 scratch_pool)); 698 699 /* create a copy of ENTRY, make it point to the copy destination and 700 * store it in CONTEXT */ 701 entry = svn_fs_x__p2l_entry_dup(entry, context->info_pool); 702 SVN_ERR(svn_io_file_get_offset(&entry->offset, context->reps_file, 703 scratch_pool)); 704 add_item_rep_mapping(context, entry); 705 706 /* copy the noderev to our temp file */ 707 SVN_ERR(svn_fs_x__rev_file_seek(rev_file, NULL, source_offset)); 708 SVN_ERR(svn_fs_x__rev_file_get(&file, rev_file)); 709 SVN_ERR(copy_file_data(context, context->reps_file, file, entry->size, 710 scratch_pool)); 711 712 /* if the node has a data representation, make that the node's "base". 713 * This will (often) cause the noderev to be placed right in front of 714 * its data representation. */ 715 716 if (noderev->data_rep 717 && svn_fs_x__get_revnum(noderev->data_rep->id.change_set) 718 >= context->start_rev) 719 { 720 reference_t *reference = apr_pcalloc(context->info_pool, 721 sizeof(*reference)); 722 reference->from = entry->items[0]; 723 reference->to.change_set = noderev->data_rep->id.change_set; 724 reference->to.number = noderev->data_rep->id.number; 725 APR_ARRAY_PUSH(context->references, reference_t *) = reference; 726 727 path_order->rep_id = reference->to; 728 path_order->expanded_size = noderev->data_rep->expanded_size; 729 } 730 731 /* Sort path is the key used for ordering noderevs and associated reps. 732 * It will not be stored in the final pack file. */ 733 sort_path = tweak_path_for_ordering(noderev->created_path, scratch_pool); 734 path_order->path = svn_prefix_string__create(context->paths, sort_path); 735 path_order->node_id = noderev->node_id; 736 path_order->revision = svn_fs_x__get_revnum(noderev->noderev_id.change_set); 737 path_order->noderev_id = noderev->noderev_id; 738 APR_ARRAY_PUSH(context->path_order, path_order_t *) = path_order; 739 740 return SVN_NO_ERROR; 741} 742 743/* implements compare_fn_t. Place LHS before RHS, if the latter is older. 744 */ 745static int 746compare_p2l_info(const svn_fs_x__p2l_entry_t * const * lhs, 747 const svn_fs_x__p2l_entry_t * const * rhs) 748{ 749 assert(*lhs != *rhs); 750 if ((*lhs)->item_count == 0) 751 return (*lhs)->item_count == 0 ? 0 : -1; 752 if ((*lhs)->item_count == 0) 753 return 1; 754 755 if ((*lhs)->items[0].change_set == (*rhs)->items[0].change_set) 756 return (*lhs)->items[0].number > (*rhs)->items[0].number ? -1 : 1; 757 758 return (*lhs)->items[0].change_set > (*rhs)->items[0].change_set ? -1 : 1; 759} 760 761/* Sort svn_fs_x__p2l_entry_t * array ENTRIES by age. Place the latest 762 * items first. 763 */ 764static void 765sort_items(apr_array_header_t *entries) 766{ 767 svn_sort__array(entries, 768 (int (*)(const void *, const void *))compare_p2l_info); 769} 770 771/* implements compare_fn_t. Sort descending by PATH, NODE_ID and REVISION. 772 */ 773static int 774compare_path_order(const path_order_t * const * lhs_p, 775 const path_order_t * const * rhs_p) 776{ 777 const path_order_t * lhs = *lhs_p; 778 const path_order_t * rhs = *rhs_p; 779 780 /* lexicographic order on path and node (i.e. latest first) */ 781 int diff = svn_prefix_string__compare(lhs->path, rhs->path); 782 if (diff) 783 return diff; 784 785 /* reverse order on node (i.e. latest first) */ 786 diff = svn_fs_x__id_compare(&rhs->node_id, &lhs->node_id); 787 if (diff) 788 return diff; 789 790 /* reverse order on revision (i.e. latest first) */ 791 if (lhs->revision != rhs->revision) 792 return lhs->revision < rhs->revision ? 1 : -1; 793 794 return 0; 795} 796 797/* implements compare_fn_t. Sort ascending by TO, FROM. 798 */ 799static int 800compare_references(const reference_t * const * lhs_p, 801 const reference_t * const * rhs_p) 802{ 803 const reference_t * lhs = *lhs_p; 804 const reference_t * rhs = *rhs_p; 805 806 int diff = svn_fs_x__id_compare(&lhs->to, &rhs->to); 807 return diff ? diff : svn_fs_x__id_compare(&lhs->from, &rhs->from); 808} 809 810/* Order the data collected in CONTEXT such that we can place them in the 811 * desired order. 812 */ 813static void 814sort_reps(pack_context_t *context) 815{ 816 svn_sort__array(context->path_order, 817 (int (*)(const void *, const void *))compare_path_order); 818 svn_sort__array(context->references, 819 (int (*)(const void *, const void *))compare_references); 820} 821 822/* Return the remaining unused bytes in the current block in CONTEXT's 823 * pack file. 824 */ 825static apr_off_t 826get_block_left(pack_context_t *context) 827{ 828 svn_fs_x__data_t *ffd = context->fs->fsap_data; 829 return ffd->block_size - (context->pack_offset % ffd->block_size); 830} 831 832/* To prevent items from overlapping a block boundary, we will usually 833 * put them into the next block and top up the old one with NUL bytes. 834 * Pad CONTEXT's pack file to the end of the current block, if that padding 835 * is short enough. Use SCRATCH_POOL for temporary allocations. 836 */ 837static svn_error_t * 838auto_pad_block(pack_context_t *context, 839 apr_pool_t *scratch_pool) 840{ 841 svn_fs_x__data_t *ffd = context->fs->fsap_data; 842 843 /* This is the maximum number of bytes "wasted" that way per block. 844 * Larger items will cross the block boundaries. */ 845 const apr_off_t max_padding = MAX(ffd->block_size / 50, 512); 846 847 /* Is wasted space small enough to align the current item to the next 848 * block? */ 849 apr_off_t padding = get_block_left(context); 850 851 if (padding < max_padding) 852 { 853 /* Yes. To up with NUL bytes and don't forget to create 854 * an P2L index entry marking this section as unused. */ 855 svn_fs_x__p2l_entry_t null_entry; 856 857 null_entry.offset = context->pack_offset; 858 null_entry.size = padding; 859 null_entry.type = SVN_FS_X__ITEM_TYPE_UNUSED; 860 null_entry.fnv1_checksum = 0; 861 null_entry.item_count = 0; 862 null_entry.items = NULL; 863 864 SVN_ERR(write_null_bytes(context->pack_file, padding, scratch_pool)); 865 SVN_ERR(svn_fs_x__p2l_proto_index_add_entry 866 (context->proto_p2l_index, &null_entry, scratch_pool)); 867 context->pack_offset += padding; 868 } 869 870 return SVN_NO_ERROR; 871} 872 873/* Return the index of the first entry in CONTEXT->REFERENCES that 874 * references ITEM->ITEMS[0] if such entries exist. All matching items 875 * will be consecutive. 876 */ 877static int 878find_first_reference(pack_context_t *context, 879 svn_fs_x__p2l_entry_t *item) 880{ 881 int lower = 0; 882 int upper = context->references->nelts - 1; 883 884 while (lower <= upper) 885 { 886 int current = lower + (upper - lower) / 2; 887 reference_t *reference 888 = APR_ARRAY_IDX(context->references, current, reference_t *); 889 890 if (svn_fs_x__id_compare(&reference->to, item->items) < 0) 891 lower = current + 1; 892 else 893 upper = current - 1; 894 } 895 896 return lower; 897} 898 899/* Check whether entry number IDX in CONTEXT->REFERENCES references ITEM. 900 */ 901static svn_boolean_t 902is_reference_match(pack_context_t *context, 903 int idx, 904 svn_fs_x__p2l_entry_t *item) 905{ 906 reference_t *reference; 907 if (context->references->nelts <= idx) 908 return FALSE; 909 910 reference = APR_ARRAY_IDX(context->references, idx, reference_t *); 911 return svn_fs_x__id_eq(&reference->to, item->items); 912} 913 914/* Starting at IDX in CONTEXT->PATH_ORDER, select all representations and 915 * noderevs that should be placed into the same container, respectively. 916 * Append the path_order_t * elements encountered in SELECTED, the 917 * svn_fs_x__p2l_entry_t * of the representations that should be placed 918 * into the same reps container will be appended to REP_PARTS and the 919 * svn_fs_x__p2l_entry_t * of the noderevs referencing those reps will 920 * be appended to NODE_PARTS. 921 * 922 * Remove all returned items from the CONTEXT->REPS container and prevent 923 * them from being placed a second time later on. That also means that the 924 * caller has to place all items returned. 925 */ 926static svn_error_t * 927select_reps(pack_context_t *context, 928 int idx, 929 apr_array_header_t *selected, 930 apr_array_header_t *node_parts, 931 apr_array_header_t *rep_parts) 932{ 933 apr_array_header_t *path_order = context->path_order; 934 path_order_t *start_path = APR_ARRAY_IDX(path_order, idx, path_order_t *); 935 936 svn_fs_x__p2l_entry_t *node_part; 937 svn_fs_x__p2l_entry_t *rep_part; 938 svn_fs_x__p2l_entry_t *depending; 939 int i, k; 940 941 /* collect all path_order records as well as rep and noderev items 942 * that occupy the same path with the same node. */ 943 for (; idx < path_order->nelts; ++idx) 944 { 945 path_order_t *current_path 946 = APR_ARRAY_IDX(path_order, idx, path_order_t *); 947 948 if (!svn_fs_x__id_eq(&start_path->node_id, ¤t_path->node_id)) 949 break; 950 951 APR_ARRAY_IDX(path_order, idx, path_order_t *) = NULL; 952 node_part = get_item(context, ¤t_path->noderev_id, TRUE); 953 rep_part = get_item(context, ¤t_path->rep_id, TRUE); 954 955 if (node_part && rep_part) 956 APR_ARRAY_PUSH(selected, path_order_t *) = current_path; 957 958 if (node_part) 959 APR_ARRAY_PUSH(node_parts, svn_fs_x__p2l_entry_t *) = node_part; 960 if (rep_part) 961 APR_ARRAY_PUSH(rep_parts, svn_fs_x__p2l_entry_t *) = rep_part; 962 } 963 964 /* collect depending reps and noderevs that reference any of the collected 965 * reps */ 966 for (i = 0; i < rep_parts->nelts; ++i) 967 { 968 rep_part = APR_ARRAY_IDX(rep_parts, i, svn_fs_x__p2l_entry_t*); 969 for (k = find_first_reference(context, rep_part); 970 is_reference_match(context, k, rep_part); 971 ++k) 972 { 973 reference_t *reference 974 = APR_ARRAY_IDX(context->references, k, reference_t *); 975 976 depending = get_item(context, &reference->from, TRUE); 977 if (!depending) 978 continue; 979 980 if (depending->type == SVN_FS_X__ITEM_TYPE_NODEREV) 981 APR_ARRAY_PUSH(node_parts, svn_fs_x__p2l_entry_t *) = depending; 982 else 983 APR_ARRAY_PUSH(rep_parts, svn_fs_x__p2l_entry_t *) = depending; 984 } 985 } 986 987 return SVN_NO_ERROR; 988} 989 990/* Return TRUE, if all path_order_t * in SELECTED reference contents that is 991 * not longer than LIMIT. 992 */ 993static svn_boolean_t 994reps_fit_into_containers(apr_array_header_t *selected, 995 apr_uint64_t limit) 996{ 997 int i; 998 for (i = 0; i < selected->nelts; ++i) 999 if (APR_ARRAY_IDX(selected, i, path_order_t *)->expanded_size > limit) 1000 return FALSE; 1001 1002 return TRUE; 1003} 1004 1005/* Write the *CONTAINER containing the noderevs described by the 1006 * svn_fs_x__p2l_entry_t * in ITEMS to the pack file on CONTEXT. 1007 * Append a P2L entry for the container to CONTAINER->REPS. 1008 * Afterwards, clear ITEMS and re-allocate *CONTAINER in CONTAINER_POOL 1009 * so the caller may fill them again. 1010 * Use SCRATCH_POOL for temporary allocations. 1011 */ 1012static svn_error_t * 1013write_nodes_container(pack_context_t *context, 1014 svn_fs_x__noderevs_t **container, 1015 apr_array_header_t *items, 1016 apr_pool_t *container_pool, 1017 apr_pool_t *scratch_pool) 1018{ 1019 int i; 1020 apr_off_t offset = 0; 1021 svn_fs_x__p2l_entry_t *container_entry; 1022 svn_stream_t *pack_stream; 1023 1024 if (items->nelts == 0) 1025 return SVN_NO_ERROR; 1026 1027 /* serialize container */ 1028 container_entry = apr_palloc(context->info_pool, sizeof(*container_entry)); 1029 pack_stream = svn_checksum__wrap_write_stream_fnv1a_32x4 1030 (&container_entry->fnv1_checksum, 1031 svn_stream_from_aprfile2(context->pack_file, 1032 TRUE, scratch_pool), 1033 scratch_pool); 1034 SVN_ERR(svn_fs_x__write_noderevs_container(pack_stream, *container, 1035 scratch_pool)); 1036 SVN_ERR(svn_stream_close(pack_stream)); 1037 SVN_ERR(svn_io_file_seek(context->pack_file, APR_CUR, &offset, 1038 scratch_pool)); 1039 1040 /* replace first noderev item in ENTRIES with the container 1041 and set all others to NULL */ 1042 container_entry->offset = context->pack_offset; 1043 container_entry->size = offset - container_entry->offset; 1044 container_entry->type = SVN_FS_X__ITEM_TYPE_NODEREVS_CONT; 1045 container_entry->item_count = items->nelts; 1046 container_entry->items = apr_palloc(context->info_pool, 1047 sizeof(svn_fs_x__id_t) * container_entry->item_count); 1048 1049 for (i = 0; i < items->nelts; ++i) 1050 container_entry->items[i] 1051 = APR_ARRAY_IDX(items, i, svn_fs_x__p2l_entry_t *)->items[0]; 1052 1053 context->pack_offset = offset; 1054 APR_ARRAY_PUSH(context->reps, svn_fs_x__p2l_entry_t *) 1055 = container_entry; 1056 1057 /* Write P2L index for copied items, i.e. the 1 container */ 1058 SVN_ERR(svn_fs_x__p2l_proto_index_add_entry 1059 (context->proto_p2l_index, container_entry, scratch_pool)); 1060 1061 svn_pool_clear(container_pool); 1062 *container = svn_fs_x__noderevs_create(16, container_pool); 1063 apr_array_clear(items); 1064 1065 return SVN_NO_ERROR; 1066} 1067 1068/* Read the noderevs given by the svn_fs_x__p2l_entry_t * in NODE_PARTS 1069 * from TEMP_FILE and add them to *CONTAINER and NODES_IN_CONTAINER. 1070 * Whenever the container grows bigger than the current block in CONTEXT, 1071 * write the data to disk and continue in the next block. 1072 * 1073 * Use CONTAINER_POOL to re-allocate the *CONTAINER as necessary and 1074 * SCRATCH_POOL to temporary allocations. 1075 */ 1076static svn_error_t * 1077store_nodes(pack_context_t *context, 1078 apr_file_t *temp_file, 1079 apr_array_header_t *node_parts, 1080 svn_fs_x__noderevs_t **container, 1081 apr_array_header_t *nodes_in_container, 1082 apr_pool_t *container_pool, 1083 apr_pool_t *scratch_pool) 1084{ 1085 int i; 1086 1087 apr_pool_t *iterpool = svn_pool_create(scratch_pool); 1088 svn_stream_t *stream 1089 = svn_stream_from_aprfile2(temp_file, TRUE, scratch_pool); 1090 1091 /* number of bytes in the current block not being spent on fixed-size 1092 items (i.e. those not put into the container). */ 1093 apr_size_t capacity_left = get_block_left(context); 1094 1095 /* Estimated noderev container size */ 1096 apr_size_t last_container_size = 0, container_size = 0; 1097 1098 /* Estimate extra capacity we will gain from container compression. */ 1099 apr_size_t pack_savings = 0; 1100 for (i = 0; i < node_parts->nelts; ++i) 1101 { 1102 svn_fs_x__noderev_t *noderev; 1103 svn_fs_x__p2l_entry_t *entry 1104 = APR_ARRAY_IDX(node_parts, i, svn_fs_x__p2l_entry_t *); 1105 1106 /* if we reached the limit, check whether we saved some space 1107 through the container. */ 1108 if (capacity_left + pack_savings < container_size + entry->size) 1109 container_size = svn_fs_x__noderevs_estimate_size(*container); 1110 1111 /* If necessary and the container is large enough, try harder 1112 by actually serializing the container and determine current 1113 savings due to compression. */ 1114 if ( capacity_left + pack_savings < container_size + entry->size 1115 && container_size > last_container_size + 2000) 1116 { 1117 svn_stringbuf_t *serialized 1118 = svn_stringbuf_create_ensure(container_size, iterpool); 1119 svn_stream_t *temp_stream 1120 = svn_stream_from_stringbuf(serialized, iterpool); 1121 1122 SVN_ERR(svn_fs_x__write_noderevs_container(temp_stream, *container, 1123 iterpool)); 1124 SVN_ERR(svn_stream_close(temp_stream)); 1125 1126 last_container_size = container_size; 1127 pack_savings = container_size - serialized->len; 1128 } 1129 1130 /* still doesn't fit? -> block is full. Flush */ 1131 if ( capacity_left + pack_savings < container_size + entry->size 1132 && nodes_in_container->nelts < 2) 1133 { 1134 SVN_ERR(auto_pad_block(context, iterpool)); 1135 capacity_left = get_block_left(context); 1136 } 1137 1138 /* still doesn't fit? -> block is full. Flush */ 1139 if (capacity_left + pack_savings < container_size + entry->size) 1140 { 1141 SVN_ERR(write_nodes_container(context, container, 1142 nodes_in_container, container_pool, 1143 iterpool)); 1144 1145 capacity_left = get_block_left(context); 1146 pack_savings = 0; 1147 container_size = 0; 1148 } 1149 1150 /* item will fit into the block. */ 1151 SVN_ERR(svn_io_file_seek(temp_file, APR_SET, &entry->offset, iterpool)); 1152 SVN_ERR(svn_fs_x__read_noderev(&noderev, stream, iterpool, iterpool)); 1153 svn_fs_x__noderevs_add(*container, noderev); 1154 1155 container_size += entry->size; 1156 APR_ARRAY_PUSH(nodes_in_container, svn_fs_x__p2l_entry_t *) = entry; 1157 1158 svn_pool_clear(iterpool); 1159 } 1160 1161 svn_pool_destroy(iterpool); 1162 1163 return SVN_NO_ERROR; 1164} 1165 1166 1167/* Finalize CONTAINER and write it to CONTEXT's pack file. 1168 * Append an P2L entry containing the given SUB_ITEMS to NEW_ENTRIES. 1169 * Use SCRATCH_POOL for temporary allocations. 1170 */ 1171static svn_error_t * 1172write_reps_container(pack_context_t *context, 1173 svn_fs_x__reps_builder_t *container, 1174 apr_array_header_t *sub_items, 1175 apr_array_header_t *new_entries, 1176 apr_pool_t *scratch_pool) 1177{ 1178 apr_off_t offset = 0; 1179 svn_fs_x__p2l_entry_t container_entry; 1180 1181 svn_stream_t *pack_stream 1182 = svn_checksum__wrap_write_stream_fnv1a_32x4 1183 (&container_entry.fnv1_checksum, 1184 svn_stream_from_aprfile2(context->pack_file, 1185 TRUE, scratch_pool), 1186 scratch_pool); 1187 1188 SVN_ERR(svn_fs_x__write_reps_container(pack_stream, container, 1189 scratch_pool)); 1190 SVN_ERR(svn_stream_close(pack_stream)); 1191 SVN_ERR(svn_io_file_seek(context->pack_file, APR_CUR, &offset, 1192 scratch_pool)); 1193 1194 container_entry.offset = context->pack_offset; 1195 container_entry.size = offset - container_entry.offset; 1196 container_entry.type = SVN_FS_X__ITEM_TYPE_REPS_CONT; 1197 container_entry.item_count = sub_items->nelts; 1198 container_entry.items = (svn_fs_x__id_t *)sub_items->elts; 1199 1200 context->pack_offset = offset; 1201 APR_ARRAY_PUSH(new_entries, svn_fs_x__p2l_entry_t *) 1202 = svn_fs_x__p2l_entry_dup(&container_entry, context->info_pool); 1203 1204 SVN_ERR(svn_fs_x__p2l_proto_index_add_entry 1205 (context->proto_p2l_index, &container_entry, scratch_pool)); 1206 1207 return SVN_NO_ERROR; 1208} 1209 1210/* Read the (property) representations identified by svn_fs_x__p2l_entry_t 1211 * elements in ENTRIES from TEMP_FILE, aggregate them and write them into 1212 * CONTEXT->PACK_FILE. Use SCRATCH_POOL for temporary allocations. 1213 */ 1214static svn_error_t * 1215write_reps_containers(pack_context_t *context, 1216 apr_array_header_t *entries, 1217 apr_file_t *temp_file, 1218 apr_array_header_t *new_entries, 1219 apr_pool_t *scratch_pool) 1220{ 1221 apr_pool_t *iterpool = svn_pool_create(scratch_pool); 1222 apr_pool_t *container_pool = svn_pool_create(scratch_pool); 1223 int i; 1224 1225 apr_ssize_t block_left = get_block_left(context); 1226 1227 svn_fs_x__reps_builder_t *container 1228 = svn_fs_x__reps_builder_create(context->fs, container_pool); 1229 apr_array_header_t *sub_items 1230 = apr_array_make(scratch_pool, 64, sizeof(svn_fs_x__id_t)); 1231 svn_fs_x__revision_file_t *file; 1232 1233 SVN_ERR(svn_fs_x__rev_file_wrap_temp(&file, context->fs, temp_file, 1234 scratch_pool)); 1235 1236 /* copy all items in strict order */ 1237 for (i = entries->nelts-1; i >= 0; --i) 1238 { 1239 svn_fs_x__representation_t representation = { 0 }; 1240 svn_stringbuf_t *contents; 1241 svn_stream_t *stream; 1242 apr_size_t list_index; 1243 svn_fs_x__p2l_entry_t *entry 1244 = APR_ARRAY_IDX(entries, i, svn_fs_x__p2l_entry_t *); 1245 1246 if ((block_left < entry->size) && sub_items->nelts) 1247 { 1248 block_left = get_block_left(context) 1249 - svn_fs_x__reps_estimate_size(container); 1250 } 1251 1252 if ((block_left < entry->size) && sub_items->nelts) 1253 { 1254 SVN_ERR(write_reps_container(context, container, sub_items, 1255 new_entries, iterpool)); 1256 1257 apr_array_clear(sub_items); 1258 svn_pool_clear(container_pool); 1259 container = svn_fs_x__reps_builder_create(context->fs, 1260 container_pool); 1261 block_left = get_block_left(context); 1262 } 1263 1264 /* still enough space in current block? */ 1265 if (block_left < entry->size) 1266 { 1267 SVN_ERR(auto_pad_block(context, iterpool)); 1268 block_left = get_block_left(context); 1269 } 1270 1271 assert(entry->item_count == 1); 1272 representation.id = entry->items[0]; 1273 1274 /* select the change list in the source file, parse it and add it to 1275 * the container */ 1276 SVN_ERR(svn_io_file_seek(temp_file, APR_SET, &entry->offset, 1277 iterpool)); 1278 SVN_ERR(svn_fs_x__get_representation_length(&representation.size, 1279 &representation.expanded_size, 1280 context->fs, file, 1281 entry, iterpool)); 1282 SVN_ERR(svn_fs_x__get_contents(&stream, context->fs, &representation, 1283 FALSE, iterpool)); 1284 contents = svn_stringbuf_create_ensure(representation.expanded_size, 1285 iterpool); 1286 contents->len = representation.expanded_size; 1287 1288 /* The representation is immutable. Read it normally. */ 1289 SVN_ERR(svn_stream_read_full(stream, contents->data, &contents->len)); 1290 SVN_ERR(svn_stream_close(stream)); 1291 1292 SVN_ERR(svn_fs_x__reps_add(&list_index, container, 1293 svn_stringbuf__morph_into_string(contents))); 1294 SVN_ERR_ASSERT(list_index == sub_items->nelts); 1295 block_left -= entry->size; 1296 1297 APR_ARRAY_PUSH(sub_items, svn_fs_x__id_t) = entry->items[0]; 1298 1299 svn_pool_clear(iterpool); 1300 } 1301 1302 if (sub_items->nelts) 1303 SVN_ERR(write_reps_container(context, container, sub_items, 1304 new_entries, iterpool)); 1305 1306 svn_pool_destroy(iterpool); 1307 svn_pool_destroy(container_pool); 1308 1309 return SVN_NO_ERROR; 1310} 1311 1312/* Return TRUE if the estimated size of the NODES_IN_CONTAINER plus the 1313 * representations given as svn_fs_x__p2l_entry_t * in ENTRIES may exceed 1314 * the space left in the current block. 1315 */ 1316static svn_boolean_t 1317should_flush_nodes_container(pack_context_t *context, 1318 svn_fs_x__noderevs_t *nodes_container, 1319 apr_array_header_t *entries) 1320{ 1321 apr_ssize_t block_left = get_block_left(context); 1322 apr_ssize_t rep_sum = 0; 1323 apr_ssize_t container_size 1324 = svn_fs_x__noderevs_estimate_size(nodes_container); 1325 1326 int i; 1327 for (i = 0; i < entries->nelts; ++i) 1328 { 1329 svn_fs_x__p2l_entry_t *entry 1330 = APR_ARRAY_IDX(entries, i, svn_fs_x__p2l_entry_t *); 1331 rep_sum += entry->size; 1332 } 1333 1334 return block_left < rep_sum + container_size; 1335} 1336 1337/* Read the contents of the first COUNT non-NULL, non-empty items in ITEMS 1338 * from TEMP_FILE and write them to CONTEXT->PACK_FILE. 1339 * Use SCRATCH_POOL for temporary allocations. 1340 */ 1341static svn_error_t * 1342store_items(pack_context_t *context, 1343 apr_file_t *temp_file, 1344 apr_array_header_t *items, 1345 int count, 1346 apr_pool_t *scratch_pool) 1347{ 1348 int i; 1349 apr_pool_t *iterpool = svn_pool_create(scratch_pool); 1350 1351 /* copy all items in strict order */ 1352 for (i = 0; i < count; ++i) 1353 { 1354 svn_fs_x__p2l_entry_t *entry 1355 = APR_ARRAY_IDX(items, i, svn_fs_x__p2l_entry_t *); 1356 if (!entry 1357 || entry->type == SVN_FS_X__ITEM_TYPE_UNUSED 1358 || entry->item_count == 0) 1359 continue; 1360 1361 /* select the item in the source file and copy it into the target 1362 * pack file */ 1363 SVN_ERR(svn_io_file_seek(temp_file, APR_SET, &entry->offset, 1364 iterpool)); 1365 SVN_ERR(copy_file_data(context, context->pack_file, temp_file, 1366 entry->size, iterpool)); 1367 1368 /* write index entry and update current position */ 1369 entry->offset = context->pack_offset; 1370 context->pack_offset += entry->size; 1371 1372 SVN_ERR(svn_fs_x__p2l_proto_index_add_entry 1373 (context->proto_p2l_index, entry, iterpool)); 1374 1375 APR_ARRAY_PUSH(context->reps, svn_fs_x__p2l_entry_t *) = entry; 1376 svn_pool_clear(iterpool); 1377 } 1378 1379 svn_pool_destroy(iterpool); 1380 1381 return SVN_NO_ERROR; 1382} 1383 1384/* Copy (append) the items identified by svn_fs_x__p2l_entry_t * elements 1385 * in ENTRIES strictly in order from TEMP_FILE into CONTEXT->PACK_FILE. 1386 * Use SCRATCH_POOL for temporary allocations. 1387 */ 1388static svn_error_t * 1389copy_reps_from_temp(pack_context_t *context, 1390 apr_file_t *temp_file, 1391 apr_pool_t *scratch_pool) 1392{ 1393 svn_fs_x__data_t *ffd = context->fs->fsap_data; 1394 1395 apr_pool_t *iterpool = svn_pool_create(scratch_pool); 1396 apr_pool_t *container_pool = svn_pool_create(scratch_pool); 1397 apr_array_header_t *path_order = context->path_order; 1398 apr_array_header_t *reps = context->reps; 1399 apr_array_header_t *selected = apr_array_make(scratch_pool, 16, 1400 path_order->elt_size); 1401 apr_array_header_t *node_parts = apr_array_make(scratch_pool, 16, 1402 reps->elt_size); 1403 apr_array_header_t *rep_parts = apr_array_make(scratch_pool, 16, 1404 reps->elt_size); 1405 apr_array_header_t *nodes_in_container = apr_array_make(scratch_pool, 16, 1406 reps->elt_size); 1407 int i, k; 1408 int initial_reps_count = reps->nelts; 1409 1410 /* 1 container for all noderevs in the current block. We will try to 1411 * not write it to disk until the current block fills up, i.e. aim for 1412 * a single noderevs container per block. */ 1413 svn_fs_x__noderevs_t *nodes_container 1414 = svn_fs_x__noderevs_create(16, container_pool); 1415 1416 /* copy items in path order. Create block-sized containers. */ 1417 for (i = 0; i < path_order->nelts; ++i) 1418 { 1419 if (APR_ARRAY_IDX(path_order, i, path_order_t *) == NULL) 1420 continue; 1421 1422 /* Collect reps to combine and all noderevs referencing them */ 1423 SVN_ERR(select_reps(context, i, selected, node_parts, rep_parts)); 1424 1425 /* store the noderevs container in front of the reps */ 1426 SVN_ERR(store_nodes(context, temp_file, node_parts, &nodes_container, 1427 nodes_in_container, container_pool, iterpool)); 1428 1429 /* actually flush the noderevs to disk if the reps container is likely 1430 * to fill the block, i.e. no further noderevs will be added to the 1431 * nodes container. */ 1432 if (should_flush_nodes_container(context, nodes_container, node_parts)) 1433 SVN_ERR(write_nodes_container(context, &nodes_container, 1434 nodes_in_container, container_pool, 1435 iterpool)); 1436 1437 /* if all reps are short enough put them into one container. 1438 * Otherwise, just store all containers here. */ 1439 if (reps_fit_into_containers(selected, 2 * ffd->block_size)) 1440 SVN_ERR(write_reps_containers(context, rep_parts, temp_file, 1441 context->reps, iterpool)); 1442 else 1443 SVN_ERR(store_items(context, temp_file, rep_parts, rep_parts->nelts, 1444 iterpool)); 1445 1446 /* processed all items */ 1447 apr_array_clear(selected); 1448 apr_array_clear(node_parts); 1449 apr_array_clear(rep_parts); 1450 1451 svn_pool_clear(iterpool); 1452 } 1453 1454 /* flush noderevs container to disk */ 1455 if (nodes_in_container->nelts) 1456 SVN_ERR(write_nodes_container(context, &nodes_container, 1457 nodes_in_container, container_pool, 1458 iterpool)); 1459 1460 /* copy all items in strict order */ 1461 SVN_ERR(store_items(context, temp_file, reps, initial_reps_count, 1462 scratch_pool)); 1463 1464 /* vaccum ENTRIES array: eliminate NULL entries */ 1465 for (i = 0, k = 0; i < reps->nelts; ++i) 1466 { 1467 svn_fs_x__p2l_entry_t *entry 1468 = APR_ARRAY_IDX(reps, i, svn_fs_x__p2l_entry_t *); 1469 if (entry) 1470 { 1471 APR_ARRAY_IDX(reps, k, svn_fs_x__p2l_entry_t *) = entry; 1472 ++k; 1473 } 1474 } 1475 reps->nelts = k; 1476 1477 svn_pool_destroy(iterpool); 1478 svn_pool_destroy(container_pool); 1479 1480 return SVN_NO_ERROR; 1481} 1482 1483/* Finalize CONTAINER and write it to CONTEXT's pack file. 1484 * Append an P2L entry containing the given SUB_ITEMS to NEW_ENTRIES. 1485 * Use SCRATCH_POOL for temporary allocations. 1486 */ 1487static svn_error_t * 1488write_changes_container(pack_context_t *context, 1489 svn_fs_x__changes_t *container, 1490 apr_array_header_t *sub_items, 1491 apr_array_header_t *new_entries, 1492 apr_pool_t *scratch_pool) 1493{ 1494 apr_off_t offset = 0; 1495 svn_fs_x__p2l_entry_t container_entry; 1496 1497 svn_stream_t *pack_stream 1498 = svn_checksum__wrap_write_stream_fnv1a_32x4 1499 (&container_entry.fnv1_checksum, 1500 svn_stream_from_aprfile2(context->pack_file, 1501 TRUE, scratch_pool), 1502 scratch_pool); 1503 1504 SVN_ERR(svn_fs_x__write_changes_container(pack_stream, 1505 container, 1506 scratch_pool)); 1507 SVN_ERR(svn_stream_close(pack_stream)); 1508 SVN_ERR(svn_io_file_seek(context->pack_file, APR_CUR, &offset, 1509 scratch_pool)); 1510 1511 container_entry.offset = context->pack_offset; 1512 container_entry.size = offset - container_entry.offset; 1513 container_entry.type = SVN_FS_X__ITEM_TYPE_CHANGES_CONT; 1514 container_entry.item_count = sub_items->nelts; 1515 container_entry.items = (svn_fs_x__id_t *)sub_items->elts; 1516 1517 context->pack_offset = offset; 1518 APR_ARRAY_PUSH(new_entries, svn_fs_x__p2l_entry_t *) 1519 = svn_fs_x__p2l_entry_dup(&container_entry, context->info_pool); 1520 1521 SVN_ERR(svn_fs_x__p2l_proto_index_add_entry 1522 (context->proto_p2l_index, &container_entry, scratch_pool)); 1523 1524 return SVN_NO_ERROR; 1525} 1526 1527/* Read the change lists identified by svn_fs_x__p2l_entry_t * elements 1528 * in ENTRIES strictly in from TEMP_FILE, aggregate them and write them 1529 * into CONTEXT->PACK_FILE. Use SCRATCH_POOL for temporary allocations. 1530 */ 1531static svn_error_t * 1532write_changes_containers(pack_context_t *context, 1533 apr_array_header_t *entries, 1534 apr_file_t *temp_file, 1535 apr_pool_t *scratch_pool) 1536{ 1537 apr_pool_t *iterpool = svn_pool_create(scratch_pool); 1538 apr_pool_t *container_pool = svn_pool_create(scratch_pool); 1539 int i; 1540 1541 apr_ssize_t block_left = get_block_left(context); 1542 apr_ssize_t estimated_addition = 0; 1543 1544 svn_fs_x__changes_t *container 1545 = svn_fs_x__changes_create(1000, container_pool); 1546 apr_array_header_t *sub_items 1547 = apr_array_make(scratch_pool, 64, sizeof(svn_fs_x__id_t)); 1548 apr_array_header_t *new_entries 1549 = apr_array_make(context->info_pool, 16, entries->elt_size); 1550 svn_stream_t *temp_stream 1551 = svn_stream_from_aprfile2(temp_file, TRUE, scratch_pool); 1552 1553 /* copy all items in strict order */ 1554 for (i = entries->nelts-1; i >= 0; --i) 1555 { 1556 apr_array_header_t *changes; 1557 apr_size_t list_index; 1558 svn_fs_x__p2l_entry_t *entry 1559 = APR_ARRAY_IDX(entries, i, svn_fs_x__p2l_entry_t *); 1560 1561 /* zip compression alone will significantly reduce the size of large 1562 * change lists. So, we will probably need even less than this estimate. 1563 */ 1564 apr_ssize_t estimated_size = (entry->size / 5) + 250; 1565 1566 /* If necessary and enough data has been added to the container since 1567 * the last test, try harder by actually serializing the container and 1568 * determine current savings due to compression. */ 1569 if (block_left < estimated_size && estimated_addition > 2000) 1570 { 1571 svn_stringbuf_t *serialized 1572 = svn_stringbuf_create_ensure(get_block_left(context), iterpool); 1573 svn_stream_t *memory_stream 1574 = svn_stream_from_stringbuf(serialized, iterpool); 1575 1576 SVN_ERR(svn_fs_x__write_changes_container(memory_stream, 1577 container, iterpool)); 1578 SVN_ERR(svn_stream_close(temp_stream)); 1579 1580 block_left = get_block_left(context) - serialized->len; 1581 estimated_addition = 0; 1582 } 1583 1584 if ((block_left < estimated_size) && sub_items->nelts) 1585 { 1586 SVN_ERR(write_changes_container(context, container, sub_items, 1587 new_entries, iterpool)); 1588 1589 apr_array_clear(sub_items); 1590 svn_pool_clear(container_pool); 1591 container = svn_fs_x__changes_create(1000, container_pool); 1592 block_left = get_block_left(context); 1593 estimated_addition = 0; 1594 } 1595 1596 /* still enough space in current block? */ 1597 if (block_left < estimated_size) 1598 { 1599 SVN_ERR(auto_pad_block(context, iterpool)); 1600 block_left = get_block_left(context); 1601 } 1602 1603 /* select the change list in the source file, parse it and add it to 1604 * the container */ 1605 SVN_ERR(svn_io_file_seek(temp_file, APR_SET, &entry->offset, 1606 iterpool)); 1607 SVN_ERR(svn_fs_x__read_changes(&changes, temp_stream, INT_MAX, 1608 scratch_pool, iterpool)); 1609 SVN_ERR(svn_fs_x__changes_append_list(&list_index, container, changes)); 1610 SVN_ERR_ASSERT(list_index == sub_items->nelts); 1611 block_left -= estimated_size; 1612 estimated_addition += estimated_size; 1613 1614 APR_ARRAY_PUSH(sub_items, svn_fs_x__id_t) = entry->items[0]; 1615 1616 svn_pool_clear(iterpool); 1617 } 1618 1619 if (sub_items->nelts) 1620 SVN_ERR(write_changes_container(context, container, sub_items, 1621 new_entries, iterpool)); 1622 1623 *entries = *new_entries; 1624 svn_pool_destroy(iterpool); 1625 svn_pool_destroy(container_pool); 1626 1627 return SVN_NO_ERROR; 1628} 1629 1630/* Read the (property) representations identified by svn_fs_x__p2l_entry_t 1631 * elements in ENTRIES from TEMP_FILE, aggregate them and write them into 1632 * CONTEXT->PACK_FILE. Use SCRATCH_POOL for temporary allocations. 1633 */ 1634static svn_error_t * 1635write_property_containers(pack_context_t *context, 1636 apr_array_header_t *entries, 1637 apr_file_t *temp_file, 1638 apr_pool_t *scratch_pool) 1639{ 1640 apr_array_header_t *new_entries 1641 = apr_array_make(context->info_pool, 16, entries->elt_size); 1642 1643 SVN_ERR(write_reps_containers(context, entries, temp_file, new_entries, 1644 scratch_pool)); 1645 1646 *entries = *new_entries; 1647 1648 return SVN_NO_ERROR; 1649} 1650 1651/* Append all entries of svn_fs_x__p2l_entry_t * array TO_APPEND to 1652 * svn_fs_x__p2l_entry_t * array DEST. 1653 */ 1654static void 1655append_entries(apr_array_header_t *dest, 1656 apr_array_header_t *to_append) 1657{ 1658 int i; 1659 for (i = 0; i < to_append->nelts; ++i) 1660 APR_ARRAY_PUSH(dest, svn_fs_x__p2l_entry_t *) 1661 = APR_ARRAY_IDX(to_append, i, svn_fs_x__p2l_entry_t *); 1662} 1663 1664/* Write the log-to-phys proto index file for CONTEXT and use POOL for 1665 * temporary allocations. All items in all buckets must have been placed 1666 * by now. 1667 */ 1668static svn_error_t * 1669write_l2p_index(pack_context_t *context, 1670 apr_pool_t *pool) 1671{ 1672 apr_pool_t *scratch_pool = svn_pool_create(pool); 1673 const char *temp_name; 1674 const char *proto_index; 1675 apr_off_t offset = 0; 1676 1677 /* lump all items into one bucket. As target, use the bucket that 1678 * probably has the most entries already. */ 1679 append_entries(context->reps, context->changes); 1680 append_entries(context->reps, context->file_props); 1681 append_entries(context->reps, context->dir_props); 1682 1683 /* Let the index code do the expensive L2P -> P2L transformation. */ 1684 SVN_ERR(svn_fs_x__l2p_index_from_p2l_entries(&temp_name, 1685 context->fs, 1686 context->reps, 1687 pool, scratch_pool)); 1688 1689 /* Append newly written segment to exisiting proto index file. */ 1690 SVN_ERR(svn_io_file_name_get(&proto_index, context->proto_l2p_index, 1691 scratch_pool)); 1692 1693 SVN_ERR(svn_io_file_flush(context->proto_l2p_index, scratch_pool)); 1694 SVN_ERR(svn_io_append_file(temp_name, proto_index, scratch_pool)); 1695 SVN_ERR(svn_io_remove_file2(temp_name, FALSE, scratch_pool)); 1696 SVN_ERR(svn_io_file_seek(context->proto_l2p_index, APR_END, &offset, 1697 scratch_pool)); 1698 1699 /* Done. */ 1700 svn_pool_destroy(scratch_pool); 1701 1702 return SVN_NO_ERROR; 1703} 1704 1705/* Pack the current revision range of CONTEXT, i.e. this covers phases 2 1706 * to 4. Use SCRATCH_POOL for temporary allocations. 1707 */ 1708static svn_error_t * 1709pack_range(pack_context_t *context, 1710 apr_pool_t *scratch_pool) 1711{ 1712 svn_fs_x__data_t *ffd = context->fs->fsap_data; 1713 apr_pool_t *revpool = svn_pool_create(scratch_pool); 1714 apr_pool_t *iterpool = svn_pool_create(scratch_pool); 1715 1716 /* Phase 2: Copy items into various buckets and build tracking info */ 1717 svn_revnum_t revision; 1718 for (revision = context->start_rev; revision < context->end_rev; ++revision) 1719 { 1720 apr_off_t offset = 0; 1721 svn_fs_x__revision_file_t *rev_file; 1722 svn_fs_x__index_info_t l2p_index_info; 1723 1724 /* Get the rev file dimensions (mainly index locations). */ 1725 SVN_ERR(svn_fs_x__rev_file_init(&rev_file, context->fs, revision, 1726 revpool)); 1727 SVN_ERR(svn_fs_x__rev_file_l2p_info(&l2p_index_info, rev_file)); 1728 1729 /* store the indirect array index */ 1730 APR_ARRAY_PUSH(context->rev_offsets, int) = context->reps->nelts; 1731 1732 /* read the phys-to-log index file until we covered the whole rev file. 1733 * That index contains enough info to build both target indexes from it. */ 1734 while (offset < l2p_index_info.start) 1735 { 1736 /* read one cluster */ 1737 int i; 1738 apr_array_header_t *entries; 1739 svn_pool_clear(iterpool); 1740 1741 SVN_ERR(svn_fs_x__p2l_index_lookup(&entries, context->fs, 1742 rev_file, revision, offset, 1743 ffd->p2l_page_size, iterpool, 1744 iterpool)); 1745 1746 for (i = 0; i < entries->nelts; ++i) 1747 { 1748 svn_fs_x__p2l_entry_t *entry 1749 = &APR_ARRAY_IDX(entries, i, svn_fs_x__p2l_entry_t); 1750 1751 /* skip first entry if that was duplicated due crossing a 1752 cluster boundary */ 1753 if (offset > entry->offset) 1754 continue; 1755 1756 /* process entry while inside the rev file */ 1757 offset = entry->offset; 1758 if (offset < l2p_index_info.start) 1759 { 1760 SVN_ERR(svn_fs_x__rev_file_seek(rev_file, NULL, offset)); 1761 1762 if (entry->type == SVN_FS_X__ITEM_TYPE_CHANGES) 1763 SVN_ERR(copy_item_to_temp(context, 1764 context->changes, 1765 context->changes_file, 1766 rev_file, entry, iterpool)); 1767 else if (entry->type == SVN_FS_X__ITEM_TYPE_FILE_PROPS) 1768 SVN_ERR(copy_item_to_temp(context, 1769 context->file_props, 1770 context->file_props_file, 1771 rev_file, entry, iterpool)); 1772 else if (entry->type == SVN_FS_X__ITEM_TYPE_DIR_PROPS) 1773 SVN_ERR(copy_item_to_temp(context, 1774 context->dir_props, 1775 context->dir_props_file, 1776 rev_file, entry, iterpool)); 1777 else if ( entry->type == SVN_FS_X__ITEM_TYPE_FILE_REP 1778 || entry->type == SVN_FS_X__ITEM_TYPE_DIR_REP) 1779 SVN_ERR(copy_rep_to_temp(context, rev_file, entry, 1780 iterpool)); 1781 else if (entry->type == SVN_FS_X__ITEM_TYPE_NODEREV) 1782 SVN_ERR(copy_node_to_temp(context, rev_file, entry, 1783 iterpool)); 1784 else 1785 SVN_ERR_ASSERT(entry->type == SVN_FS_X__ITEM_TYPE_UNUSED); 1786 1787 offset += entry->size; 1788 } 1789 } 1790 1791 if (context->cancel_func) 1792 SVN_ERR(context->cancel_func(context->cancel_baton)); 1793 } 1794 1795 svn_pool_clear(revpool); 1796 } 1797 1798 svn_pool_destroy(iterpool); 1799 1800 /* phase 3: placement. 1801 * Use "newest first" placement for simple items. */ 1802 sort_items(context->changes); 1803 sort_items(context->file_props); 1804 sort_items(context->dir_props); 1805 1806 /* follow dependencies recursively for noderevs and data representations */ 1807 sort_reps(context); 1808 1809 /* phase 4: copy bucket data to pack file. Write P2L index. */ 1810 SVN_ERR(write_changes_containers(context, context->changes, 1811 context->changes_file, revpool)); 1812 svn_pool_clear(revpool); 1813 SVN_ERR(write_property_containers(context, context->file_props, 1814 context->file_props_file, revpool)); 1815 svn_pool_clear(revpool); 1816 SVN_ERR(write_property_containers(context, context->dir_props, 1817 context->dir_props_file, revpool)); 1818 svn_pool_clear(revpool); 1819 SVN_ERR(copy_reps_from_temp(context, context->reps_file, revpool)); 1820 svn_pool_clear(revpool); 1821 1822 /* write L2P index as well (now that we know all target offsets) */ 1823 SVN_ERR(write_l2p_index(context, revpool)); 1824 1825 svn_pool_destroy(revpool); 1826 1827 return SVN_NO_ERROR; 1828} 1829 1830/* Append CONTEXT->START_REV to the context's pack file with no re-ordering. 1831 * This function will only be used for very large revisions (>>100k changes). 1832 * Use SCRATCH_POOL for temporary allocations. 1833 */ 1834static svn_error_t * 1835append_revision(pack_context_t *context, 1836 apr_pool_t *scratch_pool) 1837{ 1838 svn_fs_x__data_t *ffd = context->fs->fsap_data; 1839 apr_off_t offset = 0; 1840 apr_pool_t *iterpool = svn_pool_create(scratch_pool); 1841 svn_fs_x__revision_file_t *rev_file; 1842 apr_file_t *file; 1843 svn_filesize_t revdata_size; 1844 1845 /* Copy all non-index contents the rev file to the end of the pack file. */ 1846 SVN_ERR(svn_fs_x__rev_file_init(&rev_file, context->fs, context->start_rev, 1847 scratch_pool)); 1848 SVN_ERR(svn_fs_x__rev_file_data_size(&revdata_size, rev_file)); 1849 1850 SVN_ERR(svn_fs_x__rev_file_get(&file, rev_file)); 1851 SVN_ERR(svn_io_file_aligned_seek(file, ffd->block_size, NULL, 0, 1852 iterpool)); 1853 SVN_ERR(copy_file_data(context, context->pack_file, file, revdata_size, 1854 iterpool)); 1855 1856 /* mark the start of a new revision */ 1857 SVN_ERR(svn_fs_x__l2p_proto_index_add_revision(context->proto_l2p_index, 1858 scratch_pool)); 1859 1860 /* read the phys-to-log index file until we covered the whole rev file. 1861 * That index contains enough info to build both target indexes from it. */ 1862 while (offset < revdata_size) 1863 { 1864 /* read one cluster */ 1865 int i; 1866 apr_array_header_t *entries; 1867 SVN_ERR(svn_fs_x__p2l_index_lookup(&entries, context->fs, rev_file, 1868 context->start_rev, offset, 1869 ffd->p2l_page_size, iterpool, 1870 iterpool)); 1871 1872 for (i = 0; i < entries->nelts; ++i) 1873 { 1874 svn_fs_x__p2l_entry_t *entry 1875 = &APR_ARRAY_IDX(entries, i, svn_fs_x__p2l_entry_t); 1876 1877 /* skip first entry if that was duplicated due crossing a 1878 cluster boundary */ 1879 if (offset > entry->offset) 1880 continue; 1881 1882 /* process entry while inside the rev file */ 1883 offset = entry->offset; 1884 if (offset < revdata_size) 1885 { 1886 /* there should be true containers */ 1887 SVN_ERR_ASSERT(entry->item_count == 1); 1888 1889 entry->offset += context->pack_offset; 1890 offset += entry->size; 1891 SVN_ERR(svn_fs_x__l2p_proto_index_add_entry 1892 (context->proto_l2p_index, entry->offset, 0, 1893 entry->items[0].number, iterpool)); 1894 SVN_ERR(svn_fs_x__p2l_proto_index_add_entry 1895 (context->proto_p2l_index, entry, iterpool)); 1896 } 1897 } 1898 1899 svn_pool_clear(iterpool); 1900 } 1901 1902 svn_pool_destroy(iterpool); 1903 context->pack_offset += revdata_size; 1904 1905 return SVN_NO_ERROR; 1906} 1907 1908/* Format 7 packing logic. 1909 * 1910 * Pack the revision shard starting at SHARD_REV in filesystem FS from 1911 * SHARD_DIR into the PACK_FILE_DIR, using SCRATCH_POOL for temporary 1912 * allocations. Limit the extra memory consumption to MAX_MEM bytes. 1913 * CANCEL_FUNC and CANCEL_BATON are what you think they are. 1914 * Schedule necessary fsync calls in BATCH. 1915 */ 1916static svn_error_t * 1917pack_log_addressed(svn_fs_t *fs, 1918 const char *pack_file_dir, 1919 const char *shard_dir, 1920 svn_revnum_t shard_rev, 1921 apr_size_t max_mem, 1922 svn_fs_x__batch_fsync_t *batch, 1923 svn_cancel_func_t cancel_func, 1924 void *cancel_baton, 1925 apr_pool_t *scratch_pool) 1926{ 1927 enum 1928 { 1929 /* estimated amount of memory used to represent one item in memory 1930 * during rev file packing */ 1931 PER_ITEM_MEM = APR_ALIGN_DEFAULT(sizeof(path_order_t)) 1932 + APR_ALIGN_DEFAULT(2 *sizeof(void*)) 1933 + APR_ALIGN_DEFAULT(sizeof(reference_t)) 1934 + APR_ALIGN_DEFAULT(sizeof(svn_fs_x__p2l_entry_t)) 1935 + 6 * sizeof(void*) 1936 }; 1937 1938 int max_items = max_mem / PER_ITEM_MEM > INT_MAX 1939 ? INT_MAX 1940 : (int)(max_mem / PER_ITEM_MEM); 1941 apr_array_header_t *max_ids; 1942 pack_context_t context = { 0 }; 1943 int i; 1944 apr_size_t item_count = 0; 1945 apr_pool_t *iterpool = svn_pool_create(scratch_pool); 1946 1947 /* set up a pack context */ 1948 SVN_ERR(initialize_pack_context(&context, fs, pack_file_dir, shard_dir, 1949 shard_rev, max_items, batch, cancel_func, 1950 cancel_baton, scratch_pool)); 1951 1952 /* phase 1: determine the size of the revisions to pack */ 1953 SVN_ERR(svn_fs_x__l2p_get_max_ids(&max_ids, fs, shard_rev, 1954 context.shard_end_rev - shard_rev, 1955 scratch_pool, scratch_pool)); 1956 1957 /* pack revisions in ranges that don't exceed MAX_MEM */ 1958 for (i = 0; i < max_ids->nelts; ++i) 1959 if ( APR_ARRAY_IDX(max_ids, i, apr_uint64_t) 1960 <= (apr_uint64_t)max_items - item_count) 1961 { 1962 item_count += APR_ARRAY_IDX(max_ids, i, apr_uint64_t); 1963 context.end_rev++; 1964 } 1965 else 1966 { 1967 /* some unpacked revisions before this one? */ 1968 if (context.start_rev < context.end_rev) 1969 { 1970 /* pack them intelligently (might be just 1 rev but still ...) */ 1971 SVN_ERR(pack_range(&context, iterpool)); 1972 SVN_ERR(reset_pack_context(&context, iterpool)); 1973 item_count = 0; 1974 } 1975 1976 /* next revision range is to start with the current revision */ 1977 context.start_rev = i + context.shard_rev; 1978 context.end_rev = context.start_rev + 1; 1979 1980 /* if this is a very large revision, we must place it as is */ 1981 if (APR_ARRAY_IDX(max_ids, i, apr_uint64_t) > max_items) 1982 { 1983 SVN_ERR(append_revision(&context, iterpool)); 1984 context.start_rev++; 1985 } 1986 else 1987 item_count += (apr_size_t)APR_ARRAY_IDX(max_ids, i, apr_uint64_t); 1988 1989 svn_pool_clear(iterpool); 1990 } 1991 1992 /* non-empty revision range at the end? */ 1993 if (context.start_rev < context.end_rev) 1994 SVN_ERR(pack_range(&context, iterpool)); 1995 1996 /* last phase: finalize indexes and clean up */ 1997 SVN_ERR(reset_pack_context(&context, iterpool)); 1998 SVN_ERR(close_pack_context(&context, iterpool)); 1999 svn_pool_destroy(iterpool); 2000 2001 return SVN_NO_ERROR; 2002} 2003 2004/* In filesystem FS, pack the revision SHARD containing exactly 2005 * MAX_FILES_PER_DIR revisions from SHARD_PATH into the PACK_FILE_DIR, 2006 * using SCRATCH_POOL for temporary allocations. Try to limit the amount of 2007 * temporary memory needed to MAX_MEM bytes. CANCEL_FUNC and CANCEL_BATON 2008 * are what you think they are. Schedule necessary fsync calls in BATCH. 2009 * 2010 * If for some reason we detect a partial packing already performed, we 2011 * remove the pack file and start again. 2012 * 2013 * The actual packing will be done in a format-specific sub-function. 2014 */ 2015static svn_error_t * 2016pack_rev_shard(svn_fs_t *fs, 2017 const char *pack_file_dir, 2018 const char *shard_path, 2019 apr_int64_t shard, 2020 int max_files_per_dir, 2021 apr_size_t max_mem, 2022 svn_fs_x__batch_fsync_t *batch, 2023 svn_cancel_func_t cancel_func, 2024 void *cancel_baton, 2025 apr_pool_t *scratch_pool) 2026{ 2027 const char *pack_file_path; 2028 svn_revnum_t shard_rev = (svn_revnum_t) (shard * max_files_per_dir); 2029 2030 /* Some useful paths. */ 2031 pack_file_path = svn_dirent_join(pack_file_dir, PATH_PACKED, scratch_pool); 2032 2033 /* Remove any existing pack file for this shard, since it is incomplete. */ 2034 SVN_ERR(svn_io_remove_dir2(pack_file_dir, TRUE, cancel_func, cancel_baton, 2035 scratch_pool)); 2036 2037 /* Create the new directory and pack file. */ 2038 SVN_ERR(svn_io_dir_make(pack_file_dir, APR_OS_DEFAULT, scratch_pool)); 2039 SVN_ERR(svn_fs_x__batch_fsync_new_path(batch, pack_file_dir, scratch_pool)); 2040 2041 /* Index information files */ 2042 SVN_ERR(pack_log_addressed(fs, pack_file_dir, shard_path, shard_rev, 2043 max_mem, batch, cancel_func, cancel_baton, 2044 scratch_pool)); 2045 2046 SVN_ERR(svn_io_copy_perms(shard_path, pack_file_dir, scratch_pool)); 2047 SVN_ERR(svn_io_set_file_read_only(pack_file_path, FALSE, scratch_pool)); 2048 2049 return SVN_NO_ERROR; 2050} 2051 2052/* In the file system at FS_PATH, pack the SHARD in DIR containing exactly 2053 * MAX_FILES_PER_DIR revisions, using SCRATCH_POOL temporary for allocations. 2054 * COMPRESSION_LEVEL and MAX_PACK_SIZE will be ignored in that case. 2055 * An attempt will be made to keep memory usage below MAX_MEM. 2056 * 2057 * CANCEL_FUNC and CANCEL_BATON are what you think they are; similarly 2058 * NOTIFY_FUNC and NOTIFY_BATON. 2059 * 2060 * If for some reason we detect a partial packing already performed, we 2061 * remove the pack file and start again. 2062 */ 2063static svn_error_t * 2064pack_shard(const char *dir, 2065 svn_fs_t *fs, 2066 apr_int64_t shard, 2067 int max_files_per_dir, 2068 apr_off_t max_pack_size, 2069 int compression_level, 2070 apr_size_t max_mem, 2071 svn_fs_pack_notify_t notify_func, 2072 void *notify_baton, 2073 svn_cancel_func_t cancel_func, 2074 void *cancel_baton, 2075 apr_pool_t *scratch_pool) 2076{ 2077 svn_fs_x__data_t *ffd = fs->fsap_data; 2078 const char *shard_path, *pack_file_dir; 2079 svn_fs_x__batch_fsync_t *batch; 2080 2081 /* Notify caller we're starting to pack this shard. */ 2082 if (notify_func) 2083 SVN_ERR(notify_func(notify_baton, shard, svn_fs_pack_notify_start, 2084 scratch_pool)); 2085 2086 /* Perform all fsyncs through this instance. */ 2087 SVN_ERR(svn_fs_x__batch_fsync_create(&batch, ffd->flush_to_disk, 2088 scratch_pool)); 2089 2090 /* Some useful paths. */ 2091 pack_file_dir = svn_dirent_join(dir, 2092 apr_psprintf(scratch_pool, 2093 "%" APR_INT64_T_FMT PATH_EXT_PACKED_SHARD, 2094 shard), 2095 scratch_pool); 2096 shard_path = svn_dirent_join(dir, 2097 apr_psprintf(scratch_pool, "%" APR_INT64_T_FMT, shard), 2098 scratch_pool); 2099 2100 /* pack the revision content */ 2101 SVN_ERR(pack_rev_shard(fs, pack_file_dir, shard_path, 2102 shard, max_files_per_dir, max_mem, batch, 2103 cancel_func, cancel_baton, scratch_pool)); 2104 2105 /* pack the revprops in an equivalent way */ 2106 SVN_ERR(svn_fs_x__pack_revprops_shard(fs, 2107 pack_file_dir, 2108 shard_path, 2109 shard, max_files_per_dir, 2110 (int)(0.9 * max_pack_size), 2111 compression_level, batch, 2112 cancel_func, cancel_baton, 2113 scratch_pool)); 2114 2115 /* Update the min-unpacked-rev file to reflect our newly packed shard. */ 2116 SVN_ERR(svn_fs_x__write_min_unpacked_rev(fs, 2117 (svn_revnum_t)((shard + 1) * max_files_per_dir), 2118 scratch_pool)); 2119 ffd->min_unpacked_rev = (svn_revnum_t)((shard + 1) * max_files_per_dir); 2120 2121 /* Ensure that packed file is written to disk.*/ 2122 SVN_ERR(svn_fs_x__batch_fsync_run(batch, scratch_pool)); 2123 2124 /* Finally, remove the existing shard directories. */ 2125 SVN_ERR(svn_io_remove_dir2(shard_path, TRUE, 2126 cancel_func, cancel_baton, scratch_pool)); 2127 2128 /* Notify caller we're starting to pack this shard. */ 2129 if (notify_func) 2130 SVN_ERR(notify_func(notify_baton, shard, svn_fs_pack_notify_end, 2131 scratch_pool)); 2132 2133 return SVN_NO_ERROR; 2134} 2135 2136/* Read the youngest rev and the first non-packed rev info for FS from disk. 2137 Set *FULLY_PACKED when there is no completed unpacked shard. 2138 Use SCRATCH_POOL for temporary allocations. 2139 */ 2140static svn_error_t * 2141get_pack_status(svn_boolean_t *fully_packed, 2142 svn_fs_t *fs, 2143 apr_pool_t *scratch_pool) 2144{ 2145 svn_fs_x__data_t *ffd = fs->fsap_data; 2146 apr_int64_t completed_shards; 2147 svn_revnum_t youngest; 2148 2149 SVN_ERR(svn_fs_x__read_min_unpacked_rev(&ffd->min_unpacked_rev, fs, 2150 scratch_pool)); 2151 2152 SVN_ERR(svn_fs_x__youngest_rev(&youngest, fs, scratch_pool)); 2153 completed_shards = (youngest + 1) / ffd->max_files_per_dir; 2154 2155 /* See if we've already completed all possible shards thus far. */ 2156 if (ffd->min_unpacked_rev == (completed_shards * ffd->max_files_per_dir)) 2157 *fully_packed = TRUE; 2158 else 2159 *fully_packed = FALSE; 2160 2161 return SVN_NO_ERROR; 2162} 2163 2164typedef struct pack_baton_t 2165{ 2166 svn_fs_t *fs; 2167 apr_size_t max_mem; 2168 svn_fs_pack_notify_t notify_func; 2169 void *notify_baton; 2170 svn_cancel_func_t cancel_func; 2171 void *cancel_baton; 2172} pack_baton_t; 2173 2174 2175/* The work-horse for svn_fs_x__pack, called with the FS write lock. 2176 This implements the svn_fs_x__with_write_lock() 'body' callback 2177 type. BATON is a 'pack_baton_t *'. 2178 2179 WARNING: if you add a call to this function, please note: 2180 The code currently assumes that any piece of code running with 2181 the write-lock set can rely on the ffd->min_unpacked_rev and 2182 ffd->min_unpacked_revprop caches to be up-to-date (and, by 2183 extension, on not having to use a retry when calling 2184 svn_fs_x__path_rev_absolute() and friends). If you add a call 2185 to this function, consider whether you have to call 2186 update_min_unpacked_rev(). 2187 See this thread: http://thread.gmane.org/1291206765.3782.3309.camel@edith 2188 */ 2189static svn_error_t * 2190pack_body(void *baton, 2191 apr_pool_t *scratch_pool) 2192{ 2193 pack_baton_t *pb = baton; 2194 svn_fs_x__data_t *ffd = pb->fs->fsap_data; 2195 apr_int64_t completed_shards; 2196 apr_int64_t i; 2197 apr_pool_t *iterpool; 2198 const char *data_path; 2199 svn_boolean_t fully_packed; 2200 2201 /* Since another process might have already packed the repo, 2202 we need to re-read the pack status. */ 2203 SVN_ERR(get_pack_status(&fully_packed, pb->fs, scratch_pool)); 2204 if (fully_packed) 2205 { 2206 if (pb->notify_func) 2207 SVN_ERR(pb->notify_func(pb->notify_baton, 2208 ffd->min_unpacked_rev / ffd->max_files_per_dir, 2209 svn_fs_pack_notify_noop, scratch_pool)); 2210 2211 return SVN_NO_ERROR; 2212 } 2213 2214 completed_shards = (ffd->youngest_rev_cache + 1) / ffd->max_files_per_dir; 2215 data_path = svn_dirent_join(pb->fs->path, PATH_REVS_DIR, scratch_pool); 2216 2217 iterpool = svn_pool_create(scratch_pool); 2218 for (i = ffd->min_unpacked_rev / ffd->max_files_per_dir; 2219 i < completed_shards; 2220 i++) 2221 { 2222 svn_pool_clear(iterpool); 2223 2224 if (pb->cancel_func) 2225 SVN_ERR(pb->cancel_func(pb->cancel_baton)); 2226 2227 SVN_ERR(pack_shard(data_path, 2228 pb->fs, i, ffd->max_files_per_dir, 2229 ffd->revprop_pack_size, 2230 ffd->compress_packed_revprops 2231 ? SVN__COMPRESSION_ZLIB_DEFAULT 2232 : SVN__COMPRESSION_NONE, 2233 pb->max_mem, 2234 pb->notify_func, pb->notify_baton, 2235 pb->cancel_func, pb->cancel_baton, iterpool)); 2236 } 2237 2238 svn_pool_destroy(iterpool); 2239 return SVN_NO_ERROR; 2240} 2241 2242svn_error_t * 2243svn_fs_x__pack(svn_fs_t *fs, 2244 apr_size_t max_mem, 2245 svn_fs_pack_notify_t notify_func, 2246 void *notify_baton, 2247 svn_cancel_func_t cancel_func, 2248 void *cancel_baton, 2249 apr_pool_t *scratch_pool) 2250{ 2251 pack_baton_t pb = { 0 }; 2252 svn_boolean_t fully_packed; 2253 2254 /* Is there we even anything to do?. */ 2255 SVN_ERR(get_pack_status(&fully_packed, fs, scratch_pool)); 2256 if (fully_packed) 2257 { 2258 svn_fs_x__data_t *ffd = fs->fsap_data; 2259 2260 if (notify_func) 2261 SVN_ERR(notify_func(notify_baton, 2262 ffd->min_unpacked_rev / ffd->max_files_per_dir, 2263 svn_fs_pack_notify_noop, scratch_pool)); 2264 2265 return SVN_NO_ERROR; 2266 } 2267 2268 /* Lock the repo and start the pack process. */ 2269 pb.fs = fs; 2270 pb.notify_func = notify_func; 2271 pb.notify_baton = notify_baton; 2272 pb.cancel_func = cancel_func; 2273 pb.cancel_baton = cancel_baton; 2274 pb.max_mem = max_mem ? max_mem : DEFAULT_MAX_MEM; 2275 2276 return svn_fs_x__with_pack_lock(fs, pack_body, &pb, scratch_pool); 2277} 2278