1/* pack.c --- FSX shard packing functionality 2 * 3 * ==================================================================== 4 * Licensed to the Apache Software Foundation (ASF) under one 5 * or more contributor license agreements. See the NOTICE file 6 * distributed with this work for additional information 7 * regarding copyright ownership. The ASF licenses this file 8 * to you under the Apache License, Version 2.0 (the 9 * "License"); you may not use this file except in compliance 10 * with the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, 15 * software distributed under the License is distributed on an 16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 * KIND, either express or implied. See the License for the 18 * specific language governing permissions and limitations 19 * under the License. 20 * ==================================================================== 21 */ 22#include <assert.h> 23 24#include "svn_pools.h" 25#include "svn_dirent_uri.h" 26#include "svn_sorts.h" 27#include "private/svn_sorts_private.h" 28#include "private/svn_subr_private.h" 29#include "private/svn_string_private.h" 30#include "private/svn_temp_serializer.h" 31 32#include "fs_x.h" 33#include "pack.h" 34#include "util.h" 35#include "revprops.h" 36#include "transaction.h" 37#include "index.h" 38#include "low_level.h" 39#include "cached_data.h" 40#include "changes.h" 41#include "noderevs.h" 42#include "reps.h" 43 44#include "../libsvn_fs/fs-loader.h" 45 46#include "svn_private_config.h" 47#include "temp_serializer.h" 48 49/* Packing logic: 50 * 51 * We pack files on a pack file basis (e.g. 1000 revs) without changing 52 * existing pack files nor the revision files outside the range to pack. 53 * 54 * First, we will scan the revision file indexes to determine the number 55 * of items to "place" (i.e. determine their optimal position within the 56 * future pack file). For each item, we will need a constant amount of 57 * memory to track it. A MAX_MEM parameter sets a limit to the number of 58 * items we may place in one go. That means, we may not be able to add 59 * all revisions at once. Instead, we will run the placement for a subset 60 * of revisions at a time. The very unlikely worst case will simply append 61 * all revision data with just a little reshuffling inside each revision. 62 * 63 * In a second step, we read all revisions in the selected range, build 64 * the item tracking information and copy the items themselves from the 65 * revision files to temporary files. The latter serve as buckets for a 66 * very coarse bucket presort: Separate change lists, file properties, 67 * directory properties and noderevs + representations from one another. 68 * 69 * The third step will determine an optimized placement for the items in 70 * each of the 4 buckets separately. The first three will simply order 71 * their items by revision, starting with the newest once. Placing rep 72 * and noderev items is a more elaborate process documented in the code. 73 * 74 * In short, we store items in the following order: 75 * - changed paths lists 76 * - node property 77 * - directory properties 78 * - directory representations corresponding noderevs, lexical path order 79 * with special treatment of "trunk" and "branches" 80 * - same for file representations 81 * 82 * Step 4 copies the items from the temporary buckets into the final 83 * pack file and writes the temporary index files. 84 * 85 * Finally, after the last range of revisions, create the final indexes. 86 */ 87 88/* Maximum amount of memory we allocate for placement information during 89 * the pack process. 90 */ 91#define DEFAULT_MAX_MEM (64 * 1024 * 1024) 92 93/* Data structure describing a node change at PATH, REVISION. 94 * We will sort these instances by PATH and NODE_ID such that we can combine 95 * similar nodes in the same reps container and store containers in path 96 * major order. 97 */ 98typedef struct path_order_t 99{ 100 /* changed path */ 101 svn_prefix_string__t *path; 102 103 /* node ID for this PATH in REVISION */ 104 svn_fs_x__id_t node_id; 105 106 /* when this change happened */ 107 svn_revnum_t revision; 108 109 /* this is a directory node */ 110 svn_boolean_t is_dir; 111 112 /* length of the expanded representation content */ 113 apr_int64_t expanded_size; 114 115 /* item ID of the noderev linked to the change. May be (0, 0). */ 116 svn_fs_x__id_t noderev_id; 117 118 /* item ID of the representation containing the new data. May be (0, 0). */ 119 svn_fs_x__id_t rep_id; 120} path_order_t; 121 122/* Represents a reference from item FROM to item TO. FROM may be a noderev 123 * or rep_id while TO is (currently) always a representation. We will sort 124 * them by TO which allows us to collect all dependent items. 125 */ 126typedef struct reference_t 127{ 128 svn_fs_x__id_t to; 129 svn_fs_x__id_t from; 130} reference_t; 131 132/* This structure keeps track of all the temporary data and status that 133 * needs to be kept around during the creation of one pack file. After 134 * each revision range (in case we can't process all revs at once due to 135 * memory restrictions), parts of the data will get re-initialized. 136 */ 137typedef struct pack_context_t 138{ 139 /* file system that we operate on */ 140 svn_fs_t *fs; 141 142 /* cancel function to invoke at regular intervals. May be NULL */ 143 svn_cancel_func_t cancel_func; 144 145 /* baton to pass to CANCEL_FUNC */ 146 void *cancel_baton; 147 148 /* first revision in the shard (and future pack file) */ 149 svn_revnum_t shard_rev; 150 151 /* first revision in the range to process (>= SHARD_REV) */ 152 svn_revnum_t start_rev; 153 154 /* first revision after the range to process (<= SHARD_END_REV) */ 155 svn_revnum_t end_rev; 156 157 /* first revision after the current shard */ 158 svn_revnum_t shard_end_rev; 159 160 /* log-to-phys proto index for the whole pack file */ 161 apr_file_t *proto_l2p_index; 162 163 /* phys-to-log proto index for the whole pack file */ 164 apr_file_t *proto_p2l_index; 165 166 /* full shard directory path (containing the unpacked revisions) */ 167 const char *shard_dir; 168 169 /* full packed shard directory path (containing the pack file + indexes) */ 170 const char *pack_file_dir; 171 172 /* full pack file path (including PACK_FILE_DIR) */ 173 const char *pack_file_path; 174 175 /* current write position (i.e. file length) in the pack file */ 176 apr_off_t pack_offset; 177 178 /* the pack file to ultimately write all data to */ 179 apr_file_t *pack_file; 180 181 /* array of svn_fs_x__p2l_entry_t *, all referring to change lists. 182 * Will be filled in phase 2 and be cleared after each revision range. */ 183 apr_array_header_t *changes; 184 185 /* temp file receiving all change list items (referenced by CHANGES). 186 * Will be filled in phase 2 and be cleared after each revision range. */ 187 apr_file_t *changes_file; 188 189 /* array of svn_fs_x__p2l_entry_t *, all referring to file properties. 190 * Will be filled in phase 2 and be cleared after each revision range. */ 191 apr_array_header_t *file_props; 192 193 /* temp file receiving all file prop items (referenced by FILE_PROPS). 194 * Will be filled in phase 2 and be cleared after each revision range.*/ 195 apr_file_t *file_props_file; 196 197 /* array of svn_fs_x__p2l_entry_t *, all referring to directory properties. 198 * Will be filled in phase 2 and be cleared after each revision range. */ 199 apr_array_header_t *dir_props; 200 201 /* temp file receiving all directory prop items (referenced by DIR_PROPS). 202 * Will be filled in phase 2 and be cleared after each revision range.*/ 203 apr_file_t *dir_props_file; 204 205 /* container for all PATH members in PATH_ORDER. */ 206 svn_prefix_tree__t *paths; 207 208 /* array of path_order_t *. Will be filled in phase 2 and be cleared 209 * after each revision range. Sorted by PATH, NODE_ID. */ 210 apr_array_header_t *path_order; 211 212 /* array of reference_t* linking representations to their delta bases. 213 * Will be filled in phase 2 and be cleared after each revision range. 214 * It will be sorted by the FROM members (for rep->base rep lookup). */ 215 apr_array_header_t *references; 216 217 /* array of svn_fs_x__p2l_entry_t*. Will be filled in phase 2 and be 218 * cleared after each revision range. During phase 3, we will set items 219 * to NULL that we already processed. */ 220 apr_array_header_t *reps; 221 222 /* array of int, marking for each revision, the which offset their items 223 * begin in REPS. Will be filled in phase 2 and be cleared after 224 * each revision range. */ 225 apr_array_header_t *rev_offsets; 226 227 /* temp file receiving all items referenced by REPS. 228 * Will be filled in phase 2 and be cleared after each revision range.*/ 229 apr_file_t *reps_file; 230 231 /* pool used for temporary data structures that will be cleaned up when 232 * the next range of revisions is being processed */ 233 apr_pool_t *info_pool; 234} pack_context_t; 235 236/* Create and initialize a new pack context for packing shard SHARD_REV in 237 * SHARD_DIR into PACK_FILE_DIR within filesystem FS. Allocate it in POOL 238 * and return the structure in *CONTEXT. 239 * 240 * Limit the number of items being copied per iteration to MAX_ITEMS. 241 * Set CANCEL_FUNC and CANCEL_BATON as well. 242 */ 243static svn_error_t * 244initialize_pack_context(pack_context_t *context, 245 svn_fs_t *fs, 246 const char *pack_file_dir, 247 const char *shard_dir, 248 svn_revnum_t shard_rev, 249 int max_items, 250 svn_cancel_func_t cancel_func, 251 void *cancel_baton, 252 apr_pool_t *pool) 253{ 254 svn_fs_x__data_t *ffd = fs->fsap_data; 255 const char *temp_dir; 256 int max_revs = MIN(ffd->max_files_per_dir, max_items); 257 258 SVN_ERR_ASSERT(shard_rev % ffd->max_files_per_dir == 0); 259 260 /* where we will place our various temp files */ 261 SVN_ERR(svn_io_temp_dir(&temp_dir, pool)); 262 263 /* store parameters */ 264 context->fs = fs; 265 context->cancel_func = cancel_func; 266 context->cancel_baton = cancel_baton; 267 268 context->shard_rev = shard_rev; 269 context->start_rev = shard_rev; 270 context->end_rev = shard_rev; 271 context->shard_end_rev = shard_rev + ffd->max_files_per_dir; 272 273 /* Create the new directory and pack file. */ 274 context->shard_dir = shard_dir; 275 context->pack_file_dir = pack_file_dir; 276 context->pack_file_path 277 = svn_dirent_join(pack_file_dir, PATH_PACKED, pool); 278 SVN_ERR(svn_io_file_open(&context->pack_file, context->pack_file_path, 279 APR_WRITE | APR_BUFFERED | APR_BINARY | APR_EXCL 280 | APR_CREATE, APR_OS_DEFAULT, pool)); 281 282 /* Proto index files */ 283 SVN_ERR(svn_fs_x__l2p_proto_index_open( 284 &context->proto_l2p_index, 285 svn_dirent_join(pack_file_dir, 286 PATH_INDEX PATH_EXT_L2P_INDEX, 287 pool), 288 pool)); 289 SVN_ERR(svn_fs_x__p2l_proto_index_open( 290 &context->proto_p2l_index, 291 svn_dirent_join(pack_file_dir, 292 PATH_INDEX PATH_EXT_P2L_INDEX, 293 pool), 294 pool)); 295 296 /* item buckets: one item info array and one temp file per bucket */ 297 context->changes = apr_array_make(pool, max_items, 298 sizeof(svn_fs_x__p2l_entry_t *)); 299 SVN_ERR(svn_io_open_unique_file3(&context->changes_file, NULL, temp_dir, 300 svn_io_file_del_on_close, pool, pool)); 301 context->file_props = apr_array_make(pool, max_items, 302 sizeof(svn_fs_x__p2l_entry_t *)); 303 SVN_ERR(svn_io_open_unique_file3(&context->file_props_file, NULL, temp_dir, 304 svn_io_file_del_on_close, pool, pool)); 305 context->dir_props = apr_array_make(pool, max_items, 306 sizeof(svn_fs_x__p2l_entry_t *)); 307 SVN_ERR(svn_io_open_unique_file3(&context->dir_props_file, NULL, temp_dir, 308 svn_io_file_del_on_close, pool, pool)); 309 310 /* noderev and representation item bucket */ 311 context->rev_offsets = apr_array_make(pool, max_revs, sizeof(int)); 312 context->path_order = apr_array_make(pool, max_items, 313 sizeof(path_order_t *)); 314 context->references = apr_array_make(pool, max_items, 315 sizeof(reference_t *)); 316 context->reps = apr_array_make(pool, max_items, 317 sizeof(svn_fs_x__p2l_entry_t *)); 318 SVN_ERR(svn_io_open_unique_file3(&context->reps_file, NULL, temp_dir, 319 svn_io_file_del_on_close, pool, pool)); 320 321 /* the pool used for temp structures */ 322 context->info_pool = svn_pool_create(pool); 323 context->paths = svn_prefix_tree__create(context->info_pool); 324 325 return SVN_NO_ERROR; 326} 327 328/* Clean up / free all revision range specific data and files in CONTEXT. 329 * Use SCRATCH_POOL for temporary allocations. 330 */ 331static svn_error_t * 332reset_pack_context(pack_context_t *context, 333 apr_pool_t *scratch_pool) 334{ 335 apr_array_clear(context->changes); 336 SVN_ERR(svn_io_file_trunc(context->changes_file, 0, scratch_pool)); 337 apr_array_clear(context->file_props); 338 SVN_ERR(svn_io_file_trunc(context->file_props_file, 0, scratch_pool)); 339 apr_array_clear(context->dir_props); 340 SVN_ERR(svn_io_file_trunc(context->dir_props_file, 0, scratch_pool)); 341 342 apr_array_clear(context->rev_offsets); 343 apr_array_clear(context->path_order); 344 apr_array_clear(context->references); 345 apr_array_clear(context->reps); 346 SVN_ERR(svn_io_file_trunc(context->reps_file, 0, scratch_pool)); 347 348 svn_pool_clear(context->info_pool); 349 350 return SVN_NO_ERROR; 351} 352 353/* Call this after the last revision range. It will finalize all index files 354 * for CONTEXT and close any open files. 355 * Use SCRATCH_POOL for temporary allocations. 356 */ 357static svn_error_t * 358close_pack_context(pack_context_t *context, 359 apr_pool_t *scratch_pool) 360{ 361 const char *proto_l2p_index_path; 362 const char *proto_p2l_index_path; 363 364 /* need the file names for the actual index creation call further down */ 365 SVN_ERR(svn_io_file_name_get(&proto_l2p_index_path, 366 context->proto_l2p_index, scratch_pool)); 367 SVN_ERR(svn_io_file_name_get(&proto_p2l_index_path, 368 context->proto_p2l_index, scratch_pool)); 369 370 /* finalize proto index files */ 371 SVN_ERR(svn_io_file_close(context->proto_l2p_index, scratch_pool)); 372 SVN_ERR(svn_io_file_close(context->proto_p2l_index, scratch_pool)); 373 374 /* Append the actual index data to the pack file. */ 375 SVN_ERR(svn_fs_x__add_index_data(context->fs, context->pack_file, 376 proto_l2p_index_path, 377 proto_p2l_index_path, 378 context->shard_rev, 379 scratch_pool)); 380 381 /* remove proto index files */ 382 SVN_ERR(svn_io_remove_file2(proto_l2p_index_path, FALSE, scratch_pool)); 383 SVN_ERR(svn_io_remove_file2(proto_p2l_index_path, FALSE, scratch_pool)); 384 385 SVN_ERR(svn_io_file_close(context->pack_file, scratch_pool)); 386 387 return SVN_NO_ERROR; 388} 389 390/* Efficiently copy SIZE bytes from SOURCE to DEST. Invoke the CANCEL_FUNC 391 * from CONTEXT at regular intervals. 392 * Use SCRATCH_POOL for temporary allocations. 393 */ 394static svn_error_t * 395copy_file_data(pack_context_t *context, 396 apr_file_t *dest, 397 apr_file_t *source, 398 apr_off_t size, 399 apr_pool_t *scratch_pool) 400{ 401 /* most non-representation items will be small. Minimize the buffer 402 * and infrastructure overhead in that case. */ 403 enum { STACK_BUFFER_SIZE = 1024 }; 404 405 if (size < STACK_BUFFER_SIZE) 406 { 407 /* copy small data using a fixed-size buffer on stack */ 408 char buffer[STACK_BUFFER_SIZE]; 409 SVN_ERR(svn_io_file_read_full2(source, buffer, (apr_size_t)size, 410 NULL, NULL, scratch_pool)); 411 SVN_ERR(svn_io_file_write_full(dest, buffer, (apr_size_t)size, 412 NULL, scratch_pool)); 413 } 414 else 415 { 416 /* use streaming copies for larger data blocks. That may require 417 * the allocation of larger buffers and we should make sure that 418 * this extra memory is released asap. */ 419 svn_fs_x__data_t *ffd = context->fs->fsap_data; 420 apr_pool_t *copypool = svn_pool_create(scratch_pool); 421 char *buffer = apr_palloc(copypool, ffd->block_size); 422 423 while (size) 424 { 425 apr_size_t to_copy = (apr_size_t)(MIN(size, ffd->block_size)); 426 if (context->cancel_func) 427 SVN_ERR(context->cancel_func(context->cancel_baton)); 428 429 SVN_ERR(svn_io_file_read_full2(source, buffer, to_copy, 430 NULL, NULL, scratch_pool)); 431 SVN_ERR(svn_io_file_write_full(dest, buffer, to_copy, 432 NULL, scratch_pool)); 433 434 size -= to_copy; 435 } 436 437 svn_pool_destroy(copypool); 438 } 439 440 return SVN_NO_ERROR; 441} 442 443/* Writes SIZE bytes, all 0, to DEST. 444 * Use SCRATCH_POOL for temporary allocations. 445 */ 446static svn_error_t * 447write_null_bytes(apr_file_t *dest, 448 apr_off_t size, 449 apr_pool_t *scratch_pool) 450{ 451 /* Have a collection of high-quality, easy to access NUL bytes handy. */ 452 enum { BUFFER_SIZE = 1024 }; 453 static const char buffer[BUFFER_SIZE] = { 0 }; 454 455 /* copy SIZE of them into the file's buffer */ 456 while (size) 457 { 458 apr_size_t to_write = MIN(size, BUFFER_SIZE); 459 SVN_ERR(svn_io_file_write_full(dest, buffer, to_write, NULL, 460 scratch_pool)); 461 size -= to_write; 462 } 463 464 return SVN_NO_ERROR; 465} 466 467/* Copy the "simple" item (changed paths list or property representation) 468 * from the current position in REV_FILE to TEMP_FILE using CONTEXT. Add 469 * a copy of ENTRY to ENTRIES but with an updated offset value that points 470 * to the copy destination in TEMP_FILE. 471 * Use SCRATCH_POOL for temporary allocations. 472 */ 473static svn_error_t * 474copy_item_to_temp(pack_context_t *context, 475 apr_array_header_t *entries, 476 apr_file_t *temp_file, 477 svn_fs_x__revision_file_t *rev_file, 478 svn_fs_x__p2l_entry_t *entry, 479 apr_pool_t *scratch_pool) 480{ 481 svn_fs_x__p2l_entry_t *new_entry 482 = svn_fs_x__p2l_entry_dup(entry, context->info_pool); 483 484 SVN_ERR(svn_fs_x__get_file_offset(&new_entry->offset, temp_file, 485 scratch_pool)); 486 APR_ARRAY_PUSH(entries, svn_fs_x__p2l_entry_t *) = new_entry; 487 488 SVN_ERR(copy_file_data(context, temp_file, rev_file->file, entry->size, 489 scratch_pool)); 490 491 return SVN_NO_ERROR; 492} 493 494/* Return the offset within CONTEXT->REPS that corresponds to item 495 * ITEM_INDEX in REVISION. 496 */ 497static int 498get_item_array_index(pack_context_t *context, 499 svn_revnum_t revision, 500 apr_int64_t item_index) 501{ 502 assert(revision >= context->start_rev); 503 return (int)item_index + APR_ARRAY_IDX(context->rev_offsets, 504 revision - context->start_rev, 505 int); 506} 507 508/* Write INFO to the correct position in CONTEXT->REPS. The latter may 509 * need auto-expanding. Overwriting an array element is not allowed. 510 */ 511static void 512add_item_rep_mapping(pack_context_t *context, 513 svn_fs_x__p2l_entry_t *entry) 514{ 515 int idx; 516 assert(entry->item_count == 1); 517 518 /* index of INFO */ 519 idx = get_item_array_index(context, 520 entry->items[0].change_set, 521 entry->items[0].number); 522 523 /* make sure the index exists in the array */ 524 while (context->reps->nelts <= idx) 525 APR_ARRAY_PUSH(context->reps, void *) = NULL; 526 527 /* set the element. If there is already an entry, there are probably 528 * two items claiming to be the same -> bail out */ 529 assert(!APR_ARRAY_IDX(context->reps, idx, void *)); 530 APR_ARRAY_IDX(context->reps, idx, void *) = entry; 531} 532 533/* Return the P2L entry from CONTEXT->REPS for the given ID. If there is 534 * none (or not anymore), return NULL. If RESET has been specified, set 535 * the array entry to NULL after returning the entry. 536 */ 537static svn_fs_x__p2l_entry_t * 538get_item(pack_context_t *context, 539 const svn_fs_x__id_t *id, 540 svn_boolean_t reset) 541{ 542 svn_fs_x__p2l_entry_t *result = NULL; 543 svn_revnum_t revision = svn_fs_x__get_revnum(id->change_set); 544 if (id->number && revision >= context->start_rev) 545 { 546 int idx = get_item_array_index(context, revision, id->number); 547 if (context->reps->nelts > idx) 548 { 549 result = APR_ARRAY_IDX(context->reps, idx, void *); 550 if (result && reset) 551 APR_ARRAY_IDX(context->reps, idx, void *) = NULL; 552 } 553 } 554 555 return result; 556} 557 558/* Copy representation item identified by ENTRY from the current position 559 * in REV_FILE into CONTEXT->REPS_FILE. Add all tracking into needed by 560 * our placement algorithm to CONTEXT. 561 * Use SCRATCH_POOL for temporary allocations. 562 */ 563static svn_error_t * 564copy_rep_to_temp(pack_context_t *context, 565 svn_fs_x__revision_file_t *rev_file, 566 svn_fs_x__p2l_entry_t *entry, 567 apr_pool_t *scratch_pool) 568{ 569 svn_fs_x__rep_header_t *rep_header; 570 apr_off_t source_offset = entry->offset; 571 572 /* create a copy of ENTRY, make it point to the copy destination and 573 * store it in CONTEXT */ 574 entry = svn_fs_x__p2l_entry_dup(entry, context->info_pool); 575 SVN_ERR(svn_fs_x__get_file_offset(&entry->offset, context->reps_file, 576 scratch_pool)); 577 add_item_rep_mapping(context, entry); 578 579 /* read & parse the representation header */ 580 SVN_ERR(svn_fs_x__read_rep_header(&rep_header, rev_file->stream, 581 scratch_pool, scratch_pool)); 582 583 /* if the representation is a delta against some other rep, link the two */ 584 if ( rep_header->type == svn_fs_x__rep_delta 585 && rep_header->base_revision >= context->start_rev) 586 { 587 reference_t *reference = apr_pcalloc(context->info_pool, 588 sizeof(*reference)); 589 reference->from = entry->items[0]; 590 reference->to.change_set 591 = svn_fs_x__change_set_by_rev(rep_header->base_revision); 592 reference->to.number = rep_header->base_item_index; 593 APR_ARRAY_PUSH(context->references, reference_t *) = reference; 594 } 595 596 /* copy the whole rep (including header!) to our temp file */ 597 SVN_ERR(svn_io_file_seek(rev_file->file, APR_SET, &source_offset, 598 scratch_pool)); 599 SVN_ERR(copy_file_data(context, context->reps_file, rev_file->file, 600 entry->size, scratch_pool)); 601 602 return SVN_NO_ERROR; 603} 604 605/* Directories first, dirs / files sorted by name in reverse lexical order. 606 * This maximizes the chance of two items being located close to one another 607 * in *all* pack files independent of their change order. It also groups 608 * multi-project repos nicely according to their sub-projects. The reverse 609 * order aspect gives "trunk" preference over "tags" and "branches", so 610 * trunk-related items are more likely to be contiguous. 611 */ 612static int 613compare_dir_entries(const svn_sort__item_t *a, 614 const svn_sort__item_t *b) 615{ 616 const svn_fs_dirent_t *lhs = (const svn_fs_dirent_t *) a->value; 617 const svn_fs_dirent_t *rhs = (const svn_fs_dirent_t *) b->value; 618 619 if (lhs->kind != rhs->kind) 620 return lhs->kind == svn_node_dir ? -1 : 1; 621 622 return strcmp(lhs->name, rhs->name); 623} 624 625apr_array_header_t * 626svn_fs_x__order_dir_entries(svn_fs_t *fs, 627 apr_hash_t *directory, 628 apr_pool_t *result_pool, 629 apr_pool_t *scratch_pool) 630{ 631 apr_array_header_t *ordered 632 = svn_sort__hash(directory, compare_dir_entries, scratch_pool); 633 634 apr_array_header_t *result 635 = apr_array_make(result_pool, ordered->nelts, sizeof(svn_fs_dirent_t *)); 636 637 int i; 638 for (i = 0; i < ordered->nelts; ++i) 639 APR_ARRAY_PUSH(result, svn_fs_dirent_t *) 640 = APR_ARRAY_IDX(ordered, i, svn_sort__item_t).value; 641 642 return result; 643} 644 645/* Return a duplicate of the the ORIGINAL path and with special sub-strins 646 * (e.g. "trunk") modified in such a way that have a lower lexicographic 647 * value than any other "normal" file name. 648 */ 649static const char * 650tweak_path_for_ordering(const char *original, 651 apr_pool_t *pool) 652{ 653 /* We may add further special cases as needed. */ 654 enum {SPECIAL_COUNT = 2}; 655 static const char *special[SPECIAL_COUNT] = {"trunk", "branch"}; 656 char *pos; 657 char *path = apr_pstrdup(pool, original); 658 int i; 659 660 /* Replace the first char of any "special" sub-string we find by 661 * a control char, i.e. '\1' .. '\31'. In the rare event that this 662 * would clash with existing paths, no data will be lost but merely 663 * the node ordering will be sub-optimal. 664 */ 665 for (i = 0; i < SPECIAL_COUNT; ++i) 666 for (pos = strstr(path, special[i]); 667 pos; 668 pos = strstr(pos + 1, special[i])) 669 { 670 *pos = (char)(i + '\1'); 671 } 672 673 return path; 674} 675 676/* Copy node revision item identified by ENTRY from the current position 677 * in REV_FILE into CONTEXT->REPS_FILE. Add all tracking into needed by 678 * our placement algorithm to CONTEXT. 679 * Use SCRATCH_POOL for temporary allocations. 680 */ 681static svn_error_t * 682copy_node_to_temp(pack_context_t *context, 683 svn_fs_x__revision_file_t *rev_file, 684 svn_fs_x__p2l_entry_t *entry, 685 apr_pool_t *scratch_pool) 686{ 687 path_order_t *path_order = apr_pcalloc(context->info_pool, 688 sizeof(*path_order)); 689 svn_fs_x__noderev_t *noderev; 690 const char *sort_path; 691 apr_off_t source_offset = entry->offset; 692 693 /* read & parse noderev */ 694 SVN_ERR(svn_fs_x__read_noderev(&noderev, rev_file->stream, scratch_pool, 695 scratch_pool)); 696 697 /* create a copy of ENTRY, make it point to the copy destination and 698 * store it in CONTEXT */ 699 entry = svn_fs_x__p2l_entry_dup(entry, context->info_pool); 700 SVN_ERR(svn_fs_x__get_file_offset(&entry->offset, context->reps_file, 701 scratch_pool)); 702 add_item_rep_mapping(context, entry); 703 704 /* copy the noderev to our temp file */ 705 SVN_ERR(svn_io_file_seek(rev_file->file, APR_SET, &source_offset, 706 scratch_pool)); 707 SVN_ERR(copy_file_data(context, context->reps_file, rev_file->file, 708 entry->size, scratch_pool)); 709 710 /* if the node has a data representation, make that the node's "base". 711 * This will (often) cause the noderev to be placed right in front of 712 * its data representation. */ 713 714 if (noderev->data_rep 715 && svn_fs_x__get_revnum(noderev->data_rep->id.change_set) 716 >= context->start_rev) 717 { 718 reference_t *reference = apr_pcalloc(context->info_pool, 719 sizeof(*reference)); 720 reference->from = entry->items[0]; 721 reference->to.change_set = noderev->data_rep->id.change_set; 722 reference->to.number = noderev->data_rep->id.number; 723 APR_ARRAY_PUSH(context->references, reference_t *) = reference; 724 725 path_order->rep_id = reference->to; 726 path_order->expanded_size = noderev->data_rep->expanded_size; 727 } 728 729 /* Sort path is the key used for ordering noderevs and associated reps. 730 * It will not be stored in the final pack file. */ 731 sort_path = tweak_path_for_ordering(noderev->created_path, scratch_pool); 732 path_order->path = svn_prefix_string__create(context->paths, sort_path); 733 path_order->node_id = noderev->node_id; 734 path_order->revision = svn_fs_x__get_revnum(noderev->noderev_id.change_set); 735 path_order->is_dir = noderev->kind == svn_node_dir; 736 path_order->noderev_id = noderev->noderev_id; 737 APR_ARRAY_PUSH(context->path_order, path_order_t *) = path_order; 738 739 return SVN_NO_ERROR; 740} 741 742/* implements compare_fn_t. Place LHS before RHS, if the latter is older. 743 */ 744static int 745compare_p2l_info(const svn_fs_x__p2l_entry_t * const * lhs, 746 const svn_fs_x__p2l_entry_t * const * rhs) 747{ 748 assert(*lhs != *rhs); 749 if ((*lhs)->item_count == 0) 750 return (*lhs)->item_count == 0 ? 0 : -1; 751 if ((*lhs)->item_count == 0) 752 return 1; 753 754 if ((*lhs)->items[0].change_set == (*rhs)->items[0].change_set) 755 return (*lhs)->items[0].number > (*rhs)->items[0].number ? -1 : 1; 756 757 return (*lhs)->items[0].change_set > (*rhs)->items[0].change_set ? -1 : 1; 758} 759 760/* Sort svn_fs_x__p2l_entry_t * array ENTRIES by age. Place the latest 761 * items first. 762 */ 763static void 764sort_items(apr_array_header_t *entries) 765{ 766 svn_sort__array(entries, 767 (int (*)(const void *, const void *))compare_p2l_info); 768} 769 770/* implements compare_fn_t. Sort descending by PATH, NODE_ID and REVISION. 771 */ 772static int 773compare_path_order(const path_order_t * const * lhs_p, 774 const path_order_t * const * rhs_p) 775{ 776 const path_order_t * lhs = *lhs_p; 777 const path_order_t * rhs = *rhs_p; 778 779 /* cluster all directories */ 780 int diff = rhs->is_dir - lhs->is_dir; 781 if (diff) 782 return diff; 783 784 /* lexicographic order on path and node (i.e. latest first) */ 785 diff = svn_prefix_string__compare(lhs->path, rhs->path); 786 if (diff) 787 return diff; 788 789 /* reverse order on node (i.e. latest first) */ 790 diff = svn_fs_x__id_compare(&rhs->node_id, &lhs->node_id); 791 if (diff) 792 return diff; 793 794 /* reverse order on revision (i.e. latest first) */ 795 if (lhs->revision != rhs->revision) 796 return lhs->revision < rhs->revision ? 1 : -1; 797 798 return 0; 799} 800 801/* implements compare_fn_t. Sort ascending by TO, FROM. 802 */ 803static int 804compare_references(const reference_t * const * lhs_p, 805 const reference_t * const * rhs_p) 806{ 807 const reference_t * lhs = *lhs_p; 808 const reference_t * rhs = *rhs_p; 809 810 int diff = svn_fs_x__id_compare(&lhs->to, &rhs->to); 811 return diff ? diff : svn_fs_x__id_compare(&lhs->from, &rhs->from); 812} 813 814/* Order the data collected in CONTEXT such that we can place them in the 815 * desired order. 816 */ 817static void 818sort_reps(pack_context_t *context) 819{ 820 svn_sort__array(context->path_order, 821 (int (*)(const void *, const void *))compare_path_order); 822 svn_sort__array(context->references, 823 (int (*)(const void *, const void *))compare_references); 824} 825 826/* Return the remaining unused bytes in the current block in CONTEXT's 827 * pack file. 828 */ 829static apr_ssize_t 830get_block_left(pack_context_t *context) 831{ 832 svn_fs_x__data_t *ffd = context->fs->fsap_data; 833 return ffd->block_size - (context->pack_offset % ffd->block_size); 834} 835 836/* To prevent items from overlapping a block boundary, we will usually 837 * put them into the next block and top up the old one with NUL bytes. 838 * Pad CONTEXT's pack file to the end of the current block, if that padding 839 * is short enough. Use SCRATCH_POOL for temporary allocations. 840 */ 841static svn_error_t * 842auto_pad_block(pack_context_t *context, 843 apr_pool_t *scratch_pool) 844{ 845 svn_fs_x__data_t *ffd = context->fs->fsap_data; 846 847 /* This is the maximum number of bytes "wasted" that way per block. 848 * Larger items will cross the block boundaries. */ 849 const apr_off_t max_padding = MAX(ffd->block_size / 50, 512); 850 851 /* Is wasted space small enough to align the current item to the next 852 * block? */ 853 apr_off_t padding = get_block_left(context); 854 855 if (padding < max_padding) 856 { 857 /* Yes. To up with NUL bytes and don't forget to create 858 * an P2L index entry marking this section as unused. */ 859 svn_fs_x__p2l_entry_t null_entry; 860 861 null_entry.offset = context->pack_offset; 862 null_entry.size = padding; 863 null_entry.type = SVN_FS_X__ITEM_TYPE_UNUSED; 864 null_entry.fnv1_checksum = 0; 865 null_entry.item_count = 0; 866 null_entry.items = NULL; 867 868 SVN_ERR(write_null_bytes(context->pack_file, padding, scratch_pool)); 869 SVN_ERR(svn_fs_x__p2l_proto_index_add_entry 870 (context->proto_p2l_index, &null_entry, scratch_pool)); 871 context->pack_offset += padding; 872 } 873 874 return SVN_NO_ERROR; 875} 876 877/* Return the index of the first entry in CONTEXT->REFERENCES that 878 * references ITEM->ITEMS[0] if such entries exist. All matching items 879 * will be consecutive. 880 */ 881static int 882find_first_reference(pack_context_t *context, 883 svn_fs_x__p2l_entry_t *item) 884{ 885 int lower = 0; 886 int upper = context->references->nelts - 1; 887 888 while (lower <= upper) 889 { 890 int current = lower + (upper - lower) / 2; 891 reference_t *reference 892 = APR_ARRAY_IDX(context->references, current, reference_t *); 893 894 if (svn_fs_x__id_compare(&reference->to, item->items) < 0) 895 lower = current + 1; 896 else 897 upper = current - 1; 898 } 899 900 return lower; 901} 902 903/* Check whether entry number IDX in CONTEXT->REFERENCES references ITEM. 904 */ 905static svn_boolean_t 906is_reference_match(pack_context_t *context, 907 int idx, 908 svn_fs_x__p2l_entry_t *item) 909{ 910 reference_t *reference; 911 if (context->references->nelts <= idx) 912 return FALSE; 913 914 reference = APR_ARRAY_IDX(context->references, idx, reference_t *); 915 return svn_fs_x__id_eq(&reference->to, item->items); 916} 917 918/* Starting at IDX in CONTEXT->PATH_ORDER, select all representations and 919 * noderevs that should be placed into the same container, respectively. 920 * Append the path_order_t * elements encountered in SELECTED, the 921 * svn_fs_x__p2l_entry_t * of the representations that should be placed 922 * into the same reps container will be appended to REP_PARTS and the 923 * svn_fs_x__p2l_entry_t * of the noderevs referencing those reps will 924 * be appended to NODE_PARTS. 925 * 926 * Remove all returned items from the CONTEXT->REPS container and prevent 927 * them from being placed a second time later on. That also means that the 928 * caller has to place all items returned. 929 */ 930static svn_error_t * 931select_reps(pack_context_t *context, 932 int idx, 933 apr_array_header_t *selected, 934 apr_array_header_t *node_parts, 935 apr_array_header_t *rep_parts) 936{ 937 apr_array_header_t *path_order = context->path_order; 938 path_order_t *start_path = APR_ARRAY_IDX(path_order, idx, path_order_t *); 939 940 svn_fs_x__p2l_entry_t *node_part; 941 svn_fs_x__p2l_entry_t *rep_part; 942 svn_fs_x__p2l_entry_t *depending; 943 int i, k; 944 945 /* collect all path_order records as well as rep and noderev items 946 * that occupy the same path with the same node. */ 947 for (; idx < path_order->nelts; ++idx) 948 { 949 path_order_t *current_path 950 = APR_ARRAY_IDX(path_order, idx, path_order_t *); 951 952 if (!svn_fs_x__id_eq(&start_path->node_id, ¤t_path->node_id)) 953 break; 954 955 APR_ARRAY_IDX(path_order, idx, path_order_t *) = NULL; 956 node_part = get_item(context, ¤t_path->noderev_id, TRUE); 957 rep_part = get_item(context, ¤t_path->rep_id, TRUE); 958 959 if (node_part && rep_part) 960 APR_ARRAY_PUSH(selected, path_order_t *) = current_path; 961 962 if (node_part) 963 APR_ARRAY_PUSH(node_parts, svn_fs_x__p2l_entry_t *) = node_part; 964 if (rep_part) 965 APR_ARRAY_PUSH(rep_parts, svn_fs_x__p2l_entry_t *) = rep_part; 966 } 967 968 /* collect depending reps and noderevs that reference any of the collected 969 * reps */ 970 for (i = 0; i < rep_parts->nelts; ++i) 971 { 972 rep_part = APR_ARRAY_IDX(rep_parts, i, svn_fs_x__p2l_entry_t*); 973 for (k = find_first_reference(context, rep_part); 974 is_reference_match(context, k, rep_part); 975 ++k) 976 { 977 reference_t *reference 978 = APR_ARRAY_IDX(context->references, k, reference_t *); 979 980 depending = get_item(context, &reference->from, TRUE); 981 if (!depending) 982 continue; 983 984 if (depending->type == SVN_FS_X__ITEM_TYPE_NODEREV) 985 APR_ARRAY_PUSH(node_parts, svn_fs_x__p2l_entry_t *) = depending; 986 else 987 APR_ARRAY_PUSH(rep_parts, svn_fs_x__p2l_entry_t *) = depending; 988 } 989 } 990 991 return SVN_NO_ERROR; 992} 993 994/* Return TRUE, if all path_order_t * in SELECTED reference contents that is 995 * not longer than LIMIT. 996 */ 997static svn_boolean_t 998reps_fit_into_containers(apr_array_header_t *selected, 999 apr_uint64_t limit) 1000{ 1001 int i; 1002 for (i = 0; i < selected->nelts; ++i) 1003 if (APR_ARRAY_IDX(selected, i, path_order_t *)->expanded_size > limit) 1004 return FALSE; 1005 1006 return TRUE; 1007} 1008 1009/* Write the *CONTAINER containing the noderevs described by the 1010 * svn_fs_x__p2l_entry_t * in ITEMS to the pack file on CONTEXT. 1011 * Append a P2L entry for the container to CONTAINER->REPS. 1012 * Afterwards, clear ITEMS and re-allocate *CONTAINER in CONTAINER_POOL 1013 * so the caller may fill them again. 1014 * Use SCRATCH_POOL for temporary allocations. 1015 */ 1016static svn_error_t * 1017write_nodes_container(pack_context_t *context, 1018 svn_fs_x__noderevs_t **container, 1019 apr_array_header_t *items, 1020 apr_pool_t *container_pool, 1021 apr_pool_t *scratch_pool) 1022{ 1023 int i; 1024 apr_off_t offset = 0; 1025 svn_fs_x__p2l_entry_t *container_entry; 1026 svn_stream_t *pack_stream; 1027 1028 if (items->nelts == 0) 1029 return SVN_NO_ERROR; 1030 1031 /* serialize container */ 1032 container_entry = apr_palloc(context->info_pool, sizeof(*container_entry)); 1033 pack_stream = svn_checksum__wrap_write_stream_fnv1a_32x4 1034 (&container_entry->fnv1_checksum, 1035 svn_stream_from_aprfile2(context->pack_file, 1036 TRUE, scratch_pool), 1037 scratch_pool); 1038 SVN_ERR(svn_fs_x__write_noderevs_container(pack_stream, *container, 1039 scratch_pool)); 1040 SVN_ERR(svn_stream_close(pack_stream)); 1041 SVN_ERR(svn_io_file_seek(context->pack_file, APR_CUR, &offset, 1042 scratch_pool)); 1043 1044 /* replace first noderev item in ENTRIES with the container 1045 and set all others to NULL */ 1046 container_entry->offset = context->pack_offset; 1047 container_entry->size = offset - container_entry->offset; 1048 container_entry->type = SVN_FS_X__ITEM_TYPE_NODEREVS_CONT; 1049 container_entry->item_count = items->nelts; 1050 container_entry->items = apr_palloc(context->info_pool, 1051 sizeof(svn_fs_x__id_t) * container_entry->item_count); 1052 1053 for (i = 0; i < items->nelts; ++i) 1054 container_entry->items[i] 1055 = APR_ARRAY_IDX(items, i, svn_fs_x__p2l_entry_t *)->items[0]; 1056 1057 context->pack_offset = offset; 1058 APR_ARRAY_PUSH(context->reps, svn_fs_x__p2l_entry_t *) 1059 = container_entry; 1060 1061 /* Write P2L index for copied items, i.e. the 1 container */ 1062 SVN_ERR(svn_fs_x__p2l_proto_index_add_entry 1063 (context->proto_p2l_index, container_entry, scratch_pool)); 1064 1065 svn_pool_clear(container_pool); 1066 *container = svn_fs_x__noderevs_create(16, container_pool); 1067 apr_array_clear(items); 1068 1069 return SVN_NO_ERROR; 1070} 1071 1072/* Read the noderevs given by the svn_fs_x__p2l_entry_t * in NODE_PARTS 1073 * from TEMP_FILE and add them to *CONTAINER and NODES_IN_CONTAINER. 1074 * Whenever the container grows bigger than the current block in CONTEXT, 1075 * write the data to disk and continue in the next block. 1076 * 1077 * Use CONTAINER_POOL to re-allocate the *CONTAINER as necessary and 1078 * SCRATCH_POOL to temporary allocations. 1079 */ 1080static svn_error_t * 1081store_nodes(pack_context_t *context, 1082 apr_file_t *temp_file, 1083 apr_array_header_t *node_parts, 1084 svn_fs_x__noderevs_t **container, 1085 apr_array_header_t *nodes_in_container, 1086 apr_pool_t *container_pool, 1087 apr_pool_t *scratch_pool) 1088{ 1089 int i; 1090 1091 apr_pool_t *iterpool = svn_pool_create(scratch_pool); 1092 svn_stream_t *stream 1093 = svn_stream_from_aprfile2(temp_file, TRUE, scratch_pool); 1094 1095 /* number of bytes in the current block not being spent on fixed-size 1096 items (i.e. those not put into the container). */ 1097 apr_size_t capacity_left = get_block_left(context); 1098 1099 /* Estimated noderev container size */ 1100 apr_size_t last_container_size = 0, container_size = 0; 1101 1102 /* Estimate extra capacity we will gain from container compression. */ 1103 apr_size_t pack_savings = 0; 1104 for (i = 0; i < node_parts->nelts; ++i) 1105 { 1106 svn_fs_x__noderev_t *noderev; 1107 svn_fs_x__p2l_entry_t *entry 1108 = APR_ARRAY_IDX(node_parts, i, svn_fs_x__p2l_entry_t *); 1109 1110 /* if we reached the limit, check whether we saved some space 1111 through the container. */ 1112 if (capacity_left + pack_savings < container_size + entry->size) 1113 container_size = svn_fs_x__noderevs_estimate_size(*container); 1114 1115 /* If necessary and the container is large enough, try harder 1116 by actually serializing the container and determine current 1117 savings due to compression. */ 1118 if ( capacity_left + pack_savings < container_size + entry->size 1119 && container_size > last_container_size + 2000) 1120 { 1121 svn_stringbuf_t *serialized 1122 = svn_stringbuf_create_ensure(container_size, iterpool); 1123 svn_stream_t *temp_stream 1124 = svn_stream_from_stringbuf(serialized, iterpool); 1125 1126 SVN_ERR(svn_fs_x__write_noderevs_container(temp_stream, *container, 1127 iterpool)); 1128 SVN_ERR(svn_stream_close(temp_stream)); 1129 1130 last_container_size = container_size; 1131 pack_savings = container_size - serialized->len; 1132 } 1133 1134 /* still doesn't fit? -> block is full. Flush */ 1135 if ( capacity_left + pack_savings < container_size + entry->size 1136 && nodes_in_container->nelts < 2) 1137 { 1138 SVN_ERR(auto_pad_block(context, iterpool)); 1139 capacity_left = get_block_left(context); 1140 } 1141 1142 /* still doesn't fit? -> block is full. Flush */ 1143 if (capacity_left + pack_savings < container_size + entry->size) 1144 { 1145 SVN_ERR(write_nodes_container(context, container, 1146 nodes_in_container, container_pool, 1147 iterpool)); 1148 1149 capacity_left = get_block_left(context); 1150 pack_savings = 0; 1151 container_size = 0; 1152 } 1153 1154 /* item will fit into the block. */ 1155 SVN_ERR(svn_io_file_seek(temp_file, APR_SET, &entry->offset, iterpool)); 1156 SVN_ERR(svn_fs_x__read_noderev(&noderev, stream, iterpool, iterpool)); 1157 svn_fs_x__noderevs_add(*container, noderev); 1158 1159 container_size += entry->size; 1160 APR_ARRAY_PUSH(nodes_in_container, svn_fs_x__p2l_entry_t *) = entry; 1161 1162 svn_pool_clear(iterpool); 1163 } 1164 1165 svn_pool_destroy(iterpool); 1166 1167 return SVN_NO_ERROR; 1168} 1169 1170 1171/* Finalize CONTAINER and write it to CONTEXT's pack file. 1172 * Append an P2L entry containing the given SUB_ITEMS to NEW_ENTRIES. 1173 * Use SCRATCH_POOL for temporary allocations. 1174 */ 1175static svn_error_t * 1176write_reps_container(pack_context_t *context, 1177 svn_fs_x__reps_builder_t *container, 1178 apr_array_header_t *sub_items, 1179 apr_array_header_t *new_entries, 1180 apr_pool_t *scratch_pool) 1181{ 1182 apr_off_t offset = 0; 1183 svn_fs_x__p2l_entry_t container_entry; 1184 1185 svn_stream_t *pack_stream 1186 = svn_checksum__wrap_write_stream_fnv1a_32x4 1187 (&container_entry.fnv1_checksum, 1188 svn_stream_from_aprfile2(context->pack_file, 1189 TRUE, scratch_pool), 1190 scratch_pool); 1191 1192 SVN_ERR(svn_fs_x__write_reps_container(pack_stream, container, 1193 scratch_pool)); 1194 SVN_ERR(svn_stream_close(pack_stream)); 1195 SVN_ERR(svn_io_file_seek(context->pack_file, APR_CUR, &offset, 1196 scratch_pool)); 1197 1198 container_entry.offset = context->pack_offset; 1199 container_entry.size = offset - container_entry.offset; 1200 container_entry.type = SVN_FS_X__ITEM_TYPE_REPS_CONT; 1201 container_entry.item_count = sub_items->nelts; 1202 container_entry.items = (svn_fs_x__id_t *)sub_items->elts; 1203 1204 context->pack_offset = offset; 1205 APR_ARRAY_PUSH(new_entries, svn_fs_x__p2l_entry_t *) 1206 = svn_fs_x__p2l_entry_dup(&container_entry, context->info_pool); 1207 1208 SVN_ERR(svn_fs_x__p2l_proto_index_add_entry 1209 (context->proto_p2l_index, &container_entry, scratch_pool)); 1210 1211 return SVN_NO_ERROR; 1212} 1213 1214/* Read the (property) representations identified by svn_fs_x__p2l_entry_t 1215 * elements in ENTRIES from TEMP_FILE, aggregate them and write them into 1216 * CONTEXT->PACK_FILE. Use SCRATCH_POOL for temporary allocations. 1217 */ 1218static svn_error_t * 1219write_reps_containers(pack_context_t *context, 1220 apr_array_header_t *entries, 1221 apr_file_t *temp_file, 1222 apr_array_header_t *new_entries, 1223 apr_pool_t *scratch_pool) 1224{ 1225 apr_pool_t *iterpool = svn_pool_create(scratch_pool); 1226 apr_pool_t *container_pool = svn_pool_create(scratch_pool); 1227 int i; 1228 1229 apr_ssize_t block_left = get_block_left(context); 1230 1231 svn_fs_x__reps_builder_t *container 1232 = svn_fs_x__reps_builder_create(context->fs, container_pool); 1233 apr_array_header_t *sub_items 1234 = apr_array_make(scratch_pool, 64, sizeof(svn_fs_x__id_t)); 1235 svn_fs_x__revision_file_t *file; 1236 1237 SVN_ERR(svn_fs_x__wrap_temp_rev_file(&file, context->fs, temp_file, 1238 scratch_pool)); 1239 1240 /* copy all items in strict order */ 1241 for (i = entries->nelts-1; i >= 0; --i) 1242 { 1243 svn_fs_x__representation_t representation = { 0 }; 1244 svn_stringbuf_t *contents; 1245 svn_stream_t *stream; 1246 apr_size_t list_index; 1247 svn_fs_x__p2l_entry_t *entry 1248 = APR_ARRAY_IDX(entries, i, svn_fs_x__p2l_entry_t *); 1249 1250 if ((block_left < entry->size) && sub_items->nelts) 1251 { 1252 block_left = get_block_left(context) 1253 - svn_fs_x__reps_estimate_size(container); 1254 } 1255 1256 if ((block_left < entry->size) && sub_items->nelts) 1257 { 1258 SVN_ERR(write_reps_container(context, container, sub_items, 1259 new_entries, iterpool)); 1260 1261 apr_array_clear(sub_items); 1262 svn_pool_clear(container_pool); 1263 container = svn_fs_x__reps_builder_create(context->fs, 1264 container_pool); 1265 block_left = get_block_left(context); 1266 } 1267 1268 /* still enough space in current block? */ 1269 if (block_left < entry->size) 1270 { 1271 SVN_ERR(auto_pad_block(context, iterpool)); 1272 block_left = get_block_left(context); 1273 } 1274 1275 assert(entry->item_count == 1); 1276 representation.id = entry->items[0]; 1277 1278 /* select the change list in the source file, parse it and add it to 1279 * the container */ 1280 SVN_ERR(svn_io_file_seek(temp_file, APR_SET, &entry->offset, 1281 iterpool)); 1282 SVN_ERR(svn_fs_x__get_representation_length(&representation.size, 1283 &representation.expanded_size, 1284 context->fs, file, 1285 entry, iterpool)); 1286 SVN_ERR(svn_fs_x__get_contents(&stream, context->fs, &representation, 1287 FALSE, iterpool)); 1288 contents = svn_stringbuf_create_ensure(representation.expanded_size, 1289 iterpool); 1290 contents->len = representation.expanded_size; 1291 1292 /* The representation is immutable. Read it normally. */ 1293 SVN_ERR(svn_stream_read_full(stream, contents->data, &contents->len)); 1294 SVN_ERR(svn_stream_close(stream)); 1295 1296 SVN_ERR(svn_fs_x__reps_add(&list_index, container, 1297 svn_stringbuf__morph_into_string(contents))); 1298 SVN_ERR_ASSERT(list_index == sub_items->nelts); 1299 block_left -= entry->size; 1300 1301 APR_ARRAY_PUSH(sub_items, svn_fs_x__id_t) = entry->items[0]; 1302 1303 svn_pool_clear(iterpool); 1304 } 1305 1306 if (sub_items->nelts) 1307 SVN_ERR(write_reps_container(context, container, sub_items, 1308 new_entries, iterpool)); 1309 1310 svn_pool_destroy(iterpool); 1311 svn_pool_destroy(container_pool); 1312 1313 return SVN_NO_ERROR; 1314} 1315 1316/* Return TRUE if the estimated size of the NODES_IN_CONTAINER plus the 1317 * representations given as svn_fs_x__p2l_entry_t * in ENTRIES may exceed 1318 * the space left in the current block. 1319 */ 1320static svn_boolean_t 1321should_flush_nodes_container(pack_context_t *context, 1322 svn_fs_x__noderevs_t *nodes_container, 1323 apr_array_header_t *entries) 1324{ 1325 apr_ssize_t block_left = get_block_left(context); 1326 apr_ssize_t rep_sum = 0; 1327 apr_ssize_t container_size 1328 = svn_fs_x__noderevs_estimate_size(nodes_container); 1329 1330 int i; 1331 for (i = 0; i < entries->nelts; ++i) 1332 { 1333 svn_fs_x__p2l_entry_t *entry 1334 = APR_ARRAY_IDX(entries, i, svn_fs_x__p2l_entry_t *); 1335 rep_sum += entry->size; 1336 } 1337 1338 return block_left < rep_sum + container_size; 1339} 1340 1341/* Read the contents of the first COUNT non-NULL, non-empty items in ITEMS 1342 * from TEMP_FILE and write them to CONTEXT->PACK_FILE. 1343 * Use SCRATCH_POOL for temporary allocations. 1344 */ 1345static svn_error_t * 1346store_items(pack_context_t *context, 1347 apr_file_t *temp_file, 1348 apr_array_header_t *items, 1349 int count, 1350 apr_pool_t *scratch_pool) 1351{ 1352 int i; 1353 apr_pool_t *iterpool = svn_pool_create(scratch_pool); 1354 1355 /* copy all items in strict order */ 1356 for (i = 0; i < count; ++i) 1357 { 1358 svn_fs_x__p2l_entry_t *entry 1359 = APR_ARRAY_IDX(items, i, svn_fs_x__p2l_entry_t *); 1360 if (!entry 1361 || entry->type == SVN_FS_X__ITEM_TYPE_UNUSED 1362 || entry->item_count == 0) 1363 continue; 1364 1365 /* select the item in the source file and copy it into the target 1366 * pack file */ 1367 SVN_ERR(svn_io_file_seek(temp_file, APR_SET, &entry->offset, 1368 iterpool)); 1369 SVN_ERR(copy_file_data(context, context->pack_file, temp_file, 1370 entry->size, iterpool)); 1371 1372 /* write index entry and update current position */ 1373 entry->offset = context->pack_offset; 1374 context->pack_offset += entry->size; 1375 1376 SVN_ERR(svn_fs_x__p2l_proto_index_add_entry 1377 (context->proto_p2l_index, entry, iterpool)); 1378 1379 APR_ARRAY_PUSH(context->reps, svn_fs_x__p2l_entry_t *) = entry; 1380 svn_pool_clear(iterpool); 1381 } 1382 1383 svn_pool_destroy(iterpool); 1384 1385 return SVN_NO_ERROR; 1386} 1387 1388/* Copy (append) the items identified by svn_fs_x__p2l_entry_t * elements 1389 * in ENTRIES strictly in order from TEMP_FILE into CONTEXT->PACK_FILE. 1390 * Use SCRATCH_POOL for temporary allocations. 1391 */ 1392static svn_error_t * 1393copy_reps_from_temp(pack_context_t *context, 1394 apr_file_t *temp_file, 1395 apr_pool_t *scratch_pool) 1396{ 1397 svn_fs_x__data_t *ffd = context->fs->fsap_data; 1398 1399 apr_pool_t *iterpool = svn_pool_create(scratch_pool); 1400 apr_pool_t *container_pool = svn_pool_create(scratch_pool); 1401 apr_array_header_t *path_order = context->path_order; 1402 apr_array_header_t *reps = context->reps; 1403 apr_array_header_t *selected = apr_array_make(scratch_pool, 16, 1404 path_order->elt_size); 1405 apr_array_header_t *node_parts = apr_array_make(scratch_pool, 16, 1406 reps->elt_size); 1407 apr_array_header_t *rep_parts = apr_array_make(scratch_pool, 16, 1408 reps->elt_size); 1409 apr_array_header_t *nodes_in_container = apr_array_make(scratch_pool, 16, 1410 reps->elt_size); 1411 int i, k; 1412 int initial_reps_count = reps->nelts; 1413 1414 /* 1 container for all noderevs in the current block. We will try to 1415 * not write it to disk until the current block fills up, i.e. aim for 1416 * a single noderevs container per block. */ 1417 svn_fs_x__noderevs_t *nodes_container 1418 = svn_fs_x__noderevs_create(16, container_pool); 1419 1420 /* copy items in path order. Create block-sized containers. */ 1421 for (i = 0; i < path_order->nelts; ++i) 1422 { 1423 if (APR_ARRAY_IDX(path_order, i, path_order_t *) == NULL) 1424 continue; 1425 1426 /* Collect reps to combine and all noderevs referencing them */ 1427 SVN_ERR(select_reps(context, i, selected, node_parts, rep_parts)); 1428 1429 /* store the noderevs container in front of the reps */ 1430 SVN_ERR(store_nodes(context, temp_file, node_parts, &nodes_container, 1431 nodes_in_container, container_pool, iterpool)); 1432 1433 /* actually flush the noderevs to disk if the reps container is likely 1434 * to fill the block, i.e. no further noderevs will be added to the 1435 * nodes container. */ 1436 if (should_flush_nodes_container(context, nodes_container, node_parts)) 1437 SVN_ERR(write_nodes_container(context, &nodes_container, 1438 nodes_in_container, container_pool, 1439 iterpool)); 1440 1441 /* if all reps are short enough put them into one container. 1442 * Otherwise, just store all containers here. */ 1443 if (reps_fit_into_containers(selected, 2 * ffd->block_size)) 1444 SVN_ERR(write_reps_containers(context, rep_parts, temp_file, 1445 context->reps, iterpool)); 1446 else 1447 SVN_ERR(store_items(context, temp_file, rep_parts, rep_parts->nelts, 1448 iterpool)); 1449 1450 /* processed all items */ 1451 apr_array_clear(selected); 1452 apr_array_clear(node_parts); 1453 apr_array_clear(rep_parts); 1454 1455 svn_pool_clear(iterpool); 1456 } 1457 1458 /* flush noderevs container to disk */ 1459 if (nodes_in_container->nelts) 1460 SVN_ERR(write_nodes_container(context, &nodes_container, 1461 nodes_in_container, container_pool, 1462 iterpool)); 1463 1464 /* copy all items in strict order */ 1465 SVN_ERR(store_items(context, temp_file, reps, initial_reps_count, 1466 scratch_pool)); 1467 1468 /* vaccum ENTRIES array: eliminate NULL entries */ 1469 for (i = 0, k = 0; i < reps->nelts; ++i) 1470 { 1471 svn_fs_x__p2l_entry_t *entry 1472 = APR_ARRAY_IDX(reps, i, svn_fs_x__p2l_entry_t *); 1473 if (entry) 1474 { 1475 APR_ARRAY_IDX(reps, k, svn_fs_x__p2l_entry_t *) = entry; 1476 ++k; 1477 } 1478 } 1479 reps->nelts = k; 1480 1481 svn_pool_destroy(iterpool); 1482 svn_pool_destroy(container_pool); 1483 1484 return SVN_NO_ERROR; 1485} 1486 1487/* Finalize CONTAINER and write it to CONTEXT's pack file. 1488 * Append an P2L entry containing the given SUB_ITEMS to NEW_ENTRIES. 1489 * Use SCRATCH_POOL for temporary allocations. 1490 */ 1491static svn_error_t * 1492write_changes_container(pack_context_t *context, 1493 svn_fs_x__changes_t *container, 1494 apr_array_header_t *sub_items, 1495 apr_array_header_t *new_entries, 1496 apr_pool_t *scratch_pool) 1497{ 1498 apr_off_t offset = 0; 1499 svn_fs_x__p2l_entry_t container_entry; 1500 1501 svn_stream_t *pack_stream 1502 = svn_checksum__wrap_write_stream_fnv1a_32x4 1503 (&container_entry.fnv1_checksum, 1504 svn_stream_from_aprfile2(context->pack_file, 1505 TRUE, scratch_pool), 1506 scratch_pool); 1507 1508 SVN_ERR(svn_fs_x__write_changes_container(pack_stream, 1509 container, 1510 scratch_pool)); 1511 SVN_ERR(svn_stream_close(pack_stream)); 1512 SVN_ERR(svn_io_file_seek(context->pack_file, APR_CUR, &offset, 1513 scratch_pool)); 1514 1515 container_entry.offset = context->pack_offset; 1516 container_entry.size = offset - container_entry.offset; 1517 container_entry.type = SVN_FS_X__ITEM_TYPE_CHANGES_CONT; 1518 container_entry.item_count = sub_items->nelts; 1519 container_entry.items = (svn_fs_x__id_t *)sub_items->elts; 1520 1521 context->pack_offset = offset; 1522 APR_ARRAY_PUSH(new_entries, svn_fs_x__p2l_entry_t *) 1523 = svn_fs_x__p2l_entry_dup(&container_entry, context->info_pool); 1524 1525 SVN_ERR(svn_fs_x__p2l_proto_index_add_entry 1526 (context->proto_p2l_index, &container_entry, scratch_pool)); 1527 1528 return SVN_NO_ERROR; 1529} 1530 1531/* Read the change lists identified by svn_fs_x__p2l_entry_t * elements 1532 * in ENTRIES strictly in from TEMP_FILE, aggregate them and write them 1533 * into CONTEXT->PACK_FILE. Use SCRATCH_POOL for temporary allocations. 1534 */ 1535static svn_error_t * 1536write_changes_containers(pack_context_t *context, 1537 apr_array_header_t *entries, 1538 apr_file_t *temp_file, 1539 apr_pool_t *scratch_pool) 1540{ 1541 apr_pool_t *iterpool = svn_pool_create(scratch_pool); 1542 apr_pool_t *container_pool = svn_pool_create(scratch_pool); 1543 int i; 1544 1545 apr_ssize_t block_left = get_block_left(context); 1546 apr_ssize_t estimated_addition = 0; 1547 1548 svn_fs_x__changes_t *container 1549 = svn_fs_x__changes_create(1000, container_pool); 1550 apr_array_header_t *sub_items 1551 = apr_array_make(scratch_pool, 64, sizeof(svn_fs_x__id_t)); 1552 apr_array_header_t *new_entries 1553 = apr_array_make(context->info_pool, 16, entries->elt_size); 1554 svn_stream_t *temp_stream 1555 = svn_stream_from_aprfile2(temp_file, TRUE, scratch_pool); 1556 1557 /* copy all items in strict order */ 1558 for (i = entries->nelts-1; i >= 0; --i) 1559 { 1560 apr_array_header_t *changes; 1561 apr_size_t list_index; 1562 svn_fs_x__p2l_entry_t *entry 1563 = APR_ARRAY_IDX(entries, i, svn_fs_x__p2l_entry_t *); 1564 1565 /* zip compression alone will significantly reduce the size of large 1566 * change lists. So, we will probably need even less than this estimate. 1567 */ 1568 apr_ssize_t estimated_size = (entry->size / 5) + 250; 1569 1570 /* If necessary and enough data has been added to the container since 1571 * the last test, try harder by actually serializing the container and 1572 * determine current savings due to compression. */ 1573 if (block_left < estimated_size && estimated_addition > 2000) 1574 { 1575 svn_stringbuf_t *serialized 1576 = svn_stringbuf_create_ensure(get_block_left(context), iterpool); 1577 svn_stream_t *memory_stream 1578 = svn_stream_from_stringbuf(serialized, iterpool); 1579 1580 SVN_ERR(svn_fs_x__write_changes_container(memory_stream, 1581 container, iterpool)); 1582 SVN_ERR(svn_stream_close(temp_stream)); 1583 1584 block_left = get_block_left(context) - serialized->len; 1585 estimated_addition = 0; 1586 } 1587 1588 if ((block_left < estimated_size) && sub_items->nelts) 1589 { 1590 SVN_ERR(write_changes_container(context, container, sub_items, 1591 new_entries, iterpool)); 1592 1593 apr_array_clear(sub_items); 1594 svn_pool_clear(container_pool); 1595 container = svn_fs_x__changes_create(1000, container_pool); 1596 block_left = get_block_left(context); 1597 estimated_addition = 0; 1598 } 1599 1600 /* still enough space in current block? */ 1601 if (block_left < estimated_size) 1602 { 1603 SVN_ERR(auto_pad_block(context, iterpool)); 1604 block_left = get_block_left(context); 1605 } 1606 1607 /* select the change list in the source file, parse it and add it to 1608 * the container */ 1609 SVN_ERR(svn_io_file_seek(temp_file, APR_SET, &entry->offset, 1610 iterpool)); 1611 SVN_ERR(svn_fs_x__read_changes(&changes, temp_stream, scratch_pool, 1612 iterpool)); 1613 SVN_ERR(svn_fs_x__changes_append_list(&list_index, container, changes)); 1614 SVN_ERR_ASSERT(list_index == sub_items->nelts); 1615 block_left -= estimated_size; 1616 estimated_addition += estimated_size; 1617 1618 APR_ARRAY_PUSH(sub_items, svn_fs_x__id_t) = entry->items[0]; 1619 1620 svn_pool_clear(iterpool); 1621 } 1622 1623 if (sub_items->nelts) 1624 SVN_ERR(write_changes_container(context, container, sub_items, 1625 new_entries, iterpool)); 1626 1627 *entries = *new_entries; 1628 svn_pool_destroy(iterpool); 1629 svn_pool_destroy(container_pool); 1630 1631 return SVN_NO_ERROR; 1632} 1633 1634/* Read the (property) representations identified by svn_fs_x__p2l_entry_t 1635 * elements in ENTRIES from TEMP_FILE, aggregate them and write them into 1636 * CONTEXT->PACK_FILE. Use SCRATCH_POOL for temporary allocations. 1637 */ 1638static svn_error_t * 1639write_property_containers(pack_context_t *context, 1640 apr_array_header_t *entries, 1641 apr_file_t *temp_file, 1642 apr_pool_t *scratch_pool) 1643{ 1644 apr_array_header_t *new_entries 1645 = apr_array_make(context->info_pool, 16, entries->elt_size); 1646 1647 SVN_ERR(write_reps_containers(context, entries, temp_file, new_entries, 1648 scratch_pool)); 1649 1650 *entries = *new_entries; 1651 1652 return SVN_NO_ERROR; 1653} 1654 1655/* Append all entries of svn_fs_x__p2l_entry_t * array TO_APPEND to 1656 * svn_fs_x__p2l_entry_t * array DEST. 1657 */ 1658static void 1659append_entries(apr_array_header_t *dest, 1660 apr_array_header_t *to_append) 1661{ 1662 int i; 1663 for (i = 0; i < to_append->nelts; ++i) 1664 APR_ARRAY_PUSH(dest, svn_fs_x__p2l_entry_t *) 1665 = APR_ARRAY_IDX(to_append, i, svn_fs_x__p2l_entry_t *); 1666} 1667 1668/* Write the log-to-phys proto index file for CONTEXT and use POOL for 1669 * temporary allocations. All items in all buckets must have been placed 1670 * by now. 1671 */ 1672static svn_error_t * 1673write_l2p_index(pack_context_t *context, 1674 apr_pool_t *pool) 1675{ 1676 apr_pool_t *scratch_pool = svn_pool_create(pool); 1677 const char *temp_name; 1678 const char *proto_index; 1679 apr_off_t offset = 0; 1680 1681 /* lump all items into one bucket. As target, use the bucket that 1682 * probably has the most entries already. */ 1683 append_entries(context->reps, context->changes); 1684 append_entries(context->reps, context->file_props); 1685 append_entries(context->reps, context->dir_props); 1686 1687 /* Let the index code do the expensive L2P -> P2L transformation. */ 1688 SVN_ERR(svn_fs_x__l2p_index_from_p2l_entries(&temp_name, 1689 context->fs, 1690 context->reps, 1691 pool, scratch_pool)); 1692 1693 /* Append newly written segment to exisiting proto index file. */ 1694 SVN_ERR(svn_io_file_name_get(&proto_index, context->proto_l2p_index, 1695 scratch_pool)); 1696 1697 SVN_ERR(svn_io_file_flush(context->proto_l2p_index, scratch_pool)); 1698 SVN_ERR(svn_io_append_file(temp_name, proto_index, scratch_pool)); 1699 SVN_ERR(svn_io_remove_file2(temp_name, FALSE, scratch_pool)); 1700 SVN_ERR(svn_io_file_seek(context->proto_l2p_index, APR_END, &offset, 1701 scratch_pool)); 1702 1703 /* Done. */ 1704 svn_pool_destroy(scratch_pool); 1705 1706 return SVN_NO_ERROR; 1707} 1708 1709/* Pack the current revision range of CONTEXT, i.e. this covers phases 2 1710 * to 4. Use SCRATCH_POOL for temporary allocations. 1711 */ 1712static svn_error_t * 1713pack_range(pack_context_t *context, 1714 apr_pool_t *scratch_pool) 1715{ 1716 svn_fs_x__data_t *ffd = context->fs->fsap_data; 1717 apr_pool_t *revpool = svn_pool_create(scratch_pool); 1718 apr_pool_t *iterpool = svn_pool_create(scratch_pool); 1719 1720 /* Phase 2: Copy items into various buckets and build tracking info */ 1721 svn_revnum_t revision; 1722 for (revision = context->start_rev; revision < context->end_rev; ++revision) 1723 { 1724 apr_off_t offset = 0; 1725 svn_fs_x__revision_file_t *rev_file; 1726 1727 /* Get the rev file dimensions (mainly index locations). */ 1728 SVN_ERR(svn_fs_x__open_pack_or_rev_file(&rev_file, context->fs, 1729 revision, revpool, iterpool)); 1730 SVN_ERR(svn_fs_x__auto_read_footer(rev_file)); 1731 1732 /* store the indirect array index */ 1733 APR_ARRAY_PUSH(context->rev_offsets, int) = context->reps->nelts; 1734 1735 /* read the phys-to-log index file until we covered the whole rev file. 1736 * That index contains enough info to build both target indexes from it. */ 1737 while (offset < rev_file->l2p_offset) 1738 { 1739 /* read one cluster */ 1740 int i; 1741 apr_array_header_t *entries; 1742 svn_pool_clear(iterpool); 1743 1744 SVN_ERR(svn_fs_x__p2l_index_lookup(&entries, context->fs, 1745 rev_file, revision, offset, 1746 ffd->p2l_page_size, iterpool, 1747 iterpool)); 1748 1749 for (i = 0; i < entries->nelts; ++i) 1750 { 1751 svn_fs_x__p2l_entry_t *entry 1752 = &APR_ARRAY_IDX(entries, i, svn_fs_x__p2l_entry_t); 1753 1754 /* skip first entry if that was duplicated due crossing a 1755 cluster boundary */ 1756 if (offset > entry->offset) 1757 continue; 1758 1759 /* process entry while inside the rev file */ 1760 offset = entry->offset; 1761 if (offset < rev_file->l2p_offset) 1762 { 1763 SVN_ERR(svn_io_file_seek(rev_file->file, APR_SET, &offset, 1764 iterpool)); 1765 1766 if (entry->type == SVN_FS_X__ITEM_TYPE_CHANGES) 1767 SVN_ERR(copy_item_to_temp(context, 1768 context->changes, 1769 context->changes_file, 1770 rev_file, entry, iterpool)); 1771 else if (entry->type == SVN_FS_X__ITEM_TYPE_FILE_PROPS) 1772 SVN_ERR(copy_item_to_temp(context, 1773 context->file_props, 1774 context->file_props_file, 1775 rev_file, entry, iterpool)); 1776 else if (entry->type == SVN_FS_X__ITEM_TYPE_DIR_PROPS) 1777 SVN_ERR(copy_item_to_temp(context, 1778 context->dir_props, 1779 context->dir_props_file, 1780 rev_file, entry, iterpool)); 1781 else if ( entry->type == SVN_FS_X__ITEM_TYPE_FILE_REP 1782 || entry->type == SVN_FS_X__ITEM_TYPE_DIR_REP) 1783 SVN_ERR(copy_rep_to_temp(context, rev_file, entry, 1784 iterpool)); 1785 else if (entry->type == SVN_FS_X__ITEM_TYPE_NODEREV) 1786 SVN_ERR(copy_node_to_temp(context, rev_file, entry, 1787 iterpool)); 1788 else 1789 SVN_ERR_ASSERT(entry->type == SVN_FS_X__ITEM_TYPE_UNUSED); 1790 1791 offset += entry->size; 1792 } 1793 } 1794 1795 if (context->cancel_func) 1796 SVN_ERR(context->cancel_func(context->cancel_baton)); 1797 } 1798 1799 svn_pool_clear(revpool); 1800 } 1801 1802 svn_pool_destroy(iterpool); 1803 1804 /* phase 3: placement. 1805 * Use "newest first" placement for simple items. */ 1806 sort_items(context->changes); 1807 sort_items(context->file_props); 1808 sort_items(context->dir_props); 1809 1810 /* follow dependencies recursively for noderevs and data representations */ 1811 sort_reps(context); 1812 1813 /* phase 4: copy bucket data to pack file. Write P2L index. */ 1814 SVN_ERR(write_changes_containers(context, context->changes, 1815 context->changes_file, revpool)); 1816 svn_pool_clear(revpool); 1817 SVN_ERR(write_property_containers(context, context->file_props, 1818 context->file_props_file, revpool)); 1819 svn_pool_clear(revpool); 1820 SVN_ERR(write_property_containers(context, context->dir_props, 1821 context->dir_props_file, revpool)); 1822 svn_pool_clear(revpool); 1823 SVN_ERR(copy_reps_from_temp(context, context->reps_file, revpool)); 1824 svn_pool_clear(revpool); 1825 1826 /* write L2P index as well (now that we know all target offsets) */ 1827 SVN_ERR(write_l2p_index(context, revpool)); 1828 1829 svn_pool_destroy(revpool); 1830 1831 return SVN_NO_ERROR; 1832} 1833 1834/* Append CONTEXT->START_REV to the context's pack file with no re-ordering. 1835 * This function will only be used for very large revisions (>>100k changes). 1836 * Use SCRATCH_POOL for temporary allocations. 1837 */ 1838static svn_error_t * 1839append_revision(pack_context_t *context, 1840 apr_pool_t *scratch_pool) 1841{ 1842 svn_fs_x__data_t *ffd = context->fs->fsap_data; 1843 apr_off_t offset = 0; 1844 apr_pool_t *iterpool = svn_pool_create(scratch_pool); 1845 svn_fs_x__revision_file_t *rev_file; 1846 apr_finfo_t finfo; 1847 1848 /* Get the size of the file. */ 1849 const char *path = svn_dirent_join(context->shard_dir, 1850 apr_psprintf(iterpool, "%ld", 1851 context->start_rev), 1852 scratch_pool); 1853 SVN_ERR(svn_io_stat(&finfo, path, APR_FINFO_SIZE, scratch_pool)); 1854 1855 /* Copy all the bits from the rev file to the end of the pack file. */ 1856 SVN_ERR(svn_fs_x__open_pack_or_rev_file(&rev_file, context->fs, 1857 context->start_rev, scratch_pool, 1858 iterpool)); 1859 SVN_ERR(copy_file_data(context, context->pack_file, rev_file->file, 1860 finfo.size, iterpool)); 1861 1862 /* mark the start of a new revision */ 1863 SVN_ERR(svn_fs_x__l2p_proto_index_add_revision(context->proto_l2p_index, 1864 scratch_pool)); 1865 1866 /* read the phys-to-log index file until we covered the whole rev file. 1867 * That index contains enough info to build both target indexes from it. */ 1868 while (offset < finfo.size) 1869 { 1870 /* read one cluster */ 1871 int i; 1872 apr_array_header_t *entries; 1873 SVN_ERR(svn_fs_x__p2l_index_lookup(&entries, context->fs, rev_file, 1874 context->start_rev, offset, 1875 ffd->p2l_page_size, iterpool, 1876 iterpool)); 1877 1878 for (i = 0; i < entries->nelts; ++i) 1879 { 1880 svn_fs_x__p2l_entry_t *entry 1881 = &APR_ARRAY_IDX(entries, i, svn_fs_x__p2l_entry_t); 1882 1883 /* skip first entry if that was duplicated due crossing a 1884 cluster boundary */ 1885 if (offset > entry->offset) 1886 continue; 1887 1888 /* process entry while inside the rev file */ 1889 offset = entry->offset; 1890 if (offset < finfo.size) 1891 { 1892 /* there should be true containers */ 1893 SVN_ERR_ASSERT(entry->item_count == 1); 1894 1895 entry->offset += context->pack_offset; 1896 offset += entry->size; 1897 SVN_ERR(svn_fs_x__l2p_proto_index_add_entry 1898 (context->proto_l2p_index, entry->offset, 0, 1899 entry->items[0].number, iterpool)); 1900 SVN_ERR(svn_fs_x__p2l_proto_index_add_entry 1901 (context->proto_p2l_index, entry, iterpool)); 1902 } 1903 } 1904 1905 svn_pool_clear(iterpool); 1906 } 1907 1908 svn_pool_destroy(iterpool); 1909 context->pack_offset += finfo.size; 1910 1911 return SVN_NO_ERROR; 1912} 1913 1914/* Format 7 packing logic. 1915 * 1916 * Pack the revision shard starting at SHARD_REV in filesystem FS from 1917 * SHARD_DIR into the PACK_FILE_DIR, using SCRATCH_POOL for temporary 1918 * allocations. Limit the extra memory consumption to MAX_MEM bytes. 1919 * CANCEL_FUNC and CANCEL_BATON are what you think they are. 1920 */ 1921static svn_error_t * 1922pack_log_addressed(svn_fs_t *fs, 1923 const char *pack_file_dir, 1924 const char *shard_dir, 1925 svn_revnum_t shard_rev, 1926 apr_size_t max_mem, 1927 svn_cancel_func_t cancel_func, 1928 void *cancel_baton, 1929 apr_pool_t *scratch_pool) 1930{ 1931 enum 1932 { 1933 /* estimated amount of memory used to represent one item in memory 1934 * during rev file packing */ 1935 PER_ITEM_MEM = APR_ALIGN_DEFAULT(sizeof(path_order_t)) 1936 + APR_ALIGN_DEFAULT(2 *sizeof(void*)) 1937 + APR_ALIGN_DEFAULT(sizeof(reference_t)) 1938 + APR_ALIGN_DEFAULT(sizeof(svn_fs_x__p2l_entry_t)) 1939 + 6 * sizeof(void*) 1940 }; 1941 1942 int max_items = max_mem / PER_ITEM_MEM > INT_MAX 1943 ? INT_MAX 1944 : (int)(max_mem / PER_ITEM_MEM); 1945 apr_array_header_t *max_ids; 1946 pack_context_t context = { 0 }; 1947 int i; 1948 apr_size_t item_count = 0; 1949 apr_pool_t *iterpool = svn_pool_create(scratch_pool); 1950 1951 /* set up a pack context */ 1952 SVN_ERR(initialize_pack_context(&context, fs, pack_file_dir, shard_dir, 1953 shard_rev, max_items, cancel_func, 1954 cancel_baton, scratch_pool)); 1955 1956 /* phase 1: determine the size of the revisions to pack */ 1957 SVN_ERR(svn_fs_x__l2p_get_max_ids(&max_ids, fs, shard_rev, 1958 context.shard_end_rev - shard_rev, 1959 scratch_pool, scratch_pool)); 1960 1961 /* pack revisions in ranges that don't exceed MAX_MEM */ 1962 for (i = 0; i < max_ids->nelts; ++i) 1963 if (APR_ARRAY_IDX(max_ids, i, apr_uint64_t) + item_count <= max_items) 1964 { 1965 context.end_rev++; 1966 } 1967 else 1968 { 1969 /* some unpacked revisions before this one? */ 1970 if (context.start_rev < context.end_rev) 1971 { 1972 /* pack them intelligently (might be just 1 rev but still ...) */ 1973 SVN_ERR(pack_range(&context, iterpool)); 1974 SVN_ERR(reset_pack_context(&context, iterpool)); 1975 item_count = 0; 1976 } 1977 1978 /* next revision range is to start with the current revision */ 1979 context.start_rev = i + context.shard_rev; 1980 context.end_rev = context.start_rev + 1; 1981 1982 /* if this is a very large revision, we must place it as is */ 1983 if (APR_ARRAY_IDX(max_ids, i, apr_uint64_t) > max_items) 1984 { 1985 SVN_ERR(append_revision(&context, iterpool)); 1986 context.start_rev++; 1987 } 1988 else 1989 item_count += (apr_size_t)APR_ARRAY_IDX(max_ids, i, apr_uint64_t); 1990 1991 svn_pool_clear(iterpool); 1992 } 1993 1994 /* non-empty revision range at the end? */ 1995 if (context.start_rev < context.end_rev) 1996 SVN_ERR(pack_range(&context, iterpool)); 1997 1998 /* last phase: finalize indexes and clean up */ 1999 SVN_ERR(reset_pack_context(&context, iterpool)); 2000 SVN_ERR(close_pack_context(&context, iterpool)); 2001 svn_pool_destroy(iterpool); 2002 2003 return SVN_NO_ERROR; 2004} 2005 2006/* Given REV in FS, set *REV_OFFSET to REV's offset in the packed file. 2007 Use SCRATCH_POOL for temporary allocations. */ 2008svn_error_t * 2009svn_fs_x__get_packed_offset(apr_off_t *rev_offset, 2010 svn_fs_t *fs, 2011 svn_revnum_t rev, 2012 apr_pool_t *scratch_pool) 2013{ 2014 svn_fs_x__data_t *ffd = fs->fsap_data; 2015 svn_stream_t *manifest_stream; 2016 svn_boolean_t is_cached; 2017 svn_revnum_t shard; 2018 apr_int64_t shard_pos; 2019 apr_array_header_t *manifest; 2020 apr_pool_t *iterpool; 2021 2022 shard = rev / ffd->max_files_per_dir; 2023 2024 /* position of the shard within the manifest */ 2025 shard_pos = rev % ffd->max_files_per_dir; 2026 2027 /* fetch exactly that element into *rev_offset, if the manifest is found 2028 in the cache */ 2029 SVN_ERR(svn_cache__get_partial((void **) rev_offset, &is_cached, 2030 ffd->packed_offset_cache, &shard, 2031 svn_fs_x__get_sharded_offset, &shard_pos, 2032 scratch_pool)); 2033 2034 if (is_cached) 2035 return SVN_NO_ERROR; 2036 2037 /* Open the manifest file. */ 2038 SVN_ERR(svn_stream_open_readonly(&manifest_stream, 2039 svn_fs_x__path_rev_packed(fs, rev, PATH_MANIFEST, 2040 scratch_pool), 2041 scratch_pool, scratch_pool)); 2042 2043 /* While we're here, let's just read the entire manifest file into an array, 2044 so we can cache the entire thing. */ 2045 iterpool = svn_pool_create(scratch_pool); 2046 manifest = apr_array_make(scratch_pool, ffd->max_files_per_dir, 2047 sizeof(apr_off_t)); 2048 while (1) 2049 { 2050 svn_boolean_t eof; 2051 apr_int64_t val; 2052 2053 svn_pool_clear(iterpool); 2054 SVN_ERR(svn_fs_x__read_number_from_stream(&val, &eof, manifest_stream, 2055 iterpool)); 2056 if (eof) 2057 break; 2058 2059 APR_ARRAY_PUSH(manifest, apr_off_t) = (apr_off_t)val; 2060 } 2061 svn_pool_destroy(iterpool); 2062 2063 *rev_offset = APR_ARRAY_IDX(manifest, rev % ffd->max_files_per_dir, 2064 apr_off_t); 2065 2066 /* Close up shop and cache the array. */ 2067 SVN_ERR(svn_stream_close(manifest_stream)); 2068 return svn_cache__set(ffd->packed_offset_cache, &shard, manifest, 2069 scratch_pool); 2070} 2071 2072/* In filesystem FS, pack the revision SHARD containing exactly 2073 * MAX_FILES_PER_DIR revisions from SHARD_PATH into the PACK_FILE_DIR, 2074 * using SCRATCH_POOL for temporary allocations. Try to limit the amount of 2075 * temporary memory needed to MAX_MEM bytes. CANCEL_FUNC and CANCEL_BATON 2076 * are what you think they are. 2077 * 2078 * If for some reason we detect a partial packing already performed, we 2079 * remove the pack file and start again. 2080 * 2081 * The actual packing will be done in a format-specific sub-function. 2082 */ 2083static svn_error_t * 2084pack_rev_shard(svn_fs_t *fs, 2085 const char *pack_file_dir, 2086 const char *shard_path, 2087 apr_int64_t shard, 2088 int max_files_per_dir, 2089 apr_size_t max_mem, 2090 svn_cancel_func_t cancel_func, 2091 void *cancel_baton, 2092 apr_pool_t *scratch_pool) 2093{ 2094 const char *pack_file_path; 2095 svn_revnum_t shard_rev = (svn_revnum_t) (shard * max_files_per_dir); 2096 2097 /* Some useful paths. */ 2098 pack_file_path = svn_dirent_join(pack_file_dir, PATH_PACKED, scratch_pool); 2099 2100 /* Remove any existing pack file for this shard, since it is incomplete. */ 2101 SVN_ERR(svn_io_remove_dir2(pack_file_dir, TRUE, cancel_func, cancel_baton, 2102 scratch_pool)); 2103 2104 /* Create the new directory and pack file. */ 2105 SVN_ERR(svn_io_dir_make(pack_file_dir, APR_OS_DEFAULT, scratch_pool)); 2106 2107 /* Index information files */ 2108 SVN_ERR(pack_log_addressed(fs, pack_file_dir, shard_path, shard_rev, 2109 max_mem, cancel_func, cancel_baton, 2110 scratch_pool)); 2111 2112 SVN_ERR(svn_io_copy_perms(shard_path, pack_file_dir, scratch_pool)); 2113 SVN_ERR(svn_io_set_file_read_only(pack_file_path, FALSE, scratch_pool)); 2114 2115 return SVN_NO_ERROR; 2116} 2117 2118/* In the file system at FS_PATH, pack the SHARD in REVS_DIR and 2119 * REVPROPS_DIR containing exactly MAX_FILES_PER_DIR revisions, using 2120 * SCRATCH_POOL temporary for allocations. REVPROPS_DIR will be NULL if 2121 * revprop packing is not supported. COMPRESSION_LEVEL and MAX_PACK_SIZE 2122 * will be ignored in that case. 2123 * 2124 * CANCEL_FUNC and CANCEL_BATON are what you think they are; similarly 2125 * NOTIFY_FUNC and NOTIFY_BATON. 2126 * 2127 * If for some reason we detect a partial packing already performed, we 2128 * remove the pack file and start again. 2129 */ 2130static svn_error_t * 2131pack_shard(const char *revs_dir, 2132 const char *revsprops_dir, 2133 svn_fs_t *fs, 2134 apr_int64_t shard, 2135 int max_files_per_dir, 2136 apr_off_t max_pack_size, 2137 int compression_level, 2138 svn_fs_pack_notify_t notify_func, 2139 void *notify_baton, 2140 svn_cancel_func_t cancel_func, 2141 void *cancel_baton, 2142 apr_pool_t *scratch_pool) 2143{ 2144 svn_fs_x__data_t *ffd = fs->fsap_data; 2145 const char *rev_shard_path, *rev_pack_file_dir; 2146 const char *revprops_shard_path, *revprops_pack_file_dir; 2147 2148 /* Notify caller we're starting to pack this shard. */ 2149 if (notify_func) 2150 SVN_ERR(notify_func(notify_baton, shard, svn_fs_pack_notify_start, 2151 scratch_pool)); 2152 2153 /* Some useful paths. */ 2154 rev_pack_file_dir = svn_dirent_join(revs_dir, 2155 apr_psprintf(scratch_pool, 2156 "%" APR_INT64_T_FMT PATH_EXT_PACKED_SHARD, 2157 shard), 2158 scratch_pool); 2159 rev_shard_path = svn_dirent_join(revs_dir, 2160 apr_psprintf(scratch_pool, "%" APR_INT64_T_FMT, shard), 2161 scratch_pool); 2162 2163 /* pack the revision content */ 2164 SVN_ERR(pack_rev_shard(fs, rev_pack_file_dir, rev_shard_path, 2165 shard, max_files_per_dir, DEFAULT_MAX_MEM, 2166 cancel_func, cancel_baton, scratch_pool)); 2167 2168 /* if enabled, pack the revprops in an equivalent way */ 2169 if (revsprops_dir) 2170 { 2171 revprops_pack_file_dir = svn_dirent_join(revsprops_dir, 2172 apr_psprintf(scratch_pool, 2173 "%" APR_INT64_T_FMT PATH_EXT_PACKED_SHARD, 2174 shard), 2175 scratch_pool); 2176 revprops_shard_path = svn_dirent_join(revsprops_dir, 2177 apr_psprintf(scratch_pool, "%" APR_INT64_T_FMT, shard), 2178 scratch_pool); 2179 2180 SVN_ERR(svn_fs_x__pack_revprops_shard(revprops_pack_file_dir, 2181 revprops_shard_path, 2182 shard, max_files_per_dir, 2183 (int)(0.9 * max_pack_size), 2184 compression_level, 2185 cancel_func, cancel_baton, 2186 scratch_pool)); 2187 } 2188 2189 /* Update the min-unpacked-rev file to reflect our newly packed shard. */ 2190 SVN_ERR(svn_fs_x__write_min_unpacked_rev(fs, 2191 (svn_revnum_t)((shard + 1) * max_files_per_dir), 2192 scratch_pool)); 2193 ffd->min_unpacked_rev = (svn_revnum_t)((shard + 1) * max_files_per_dir); 2194 2195 /* Finally, remove the existing shard directories. 2196 * For revprops, clean up older obsolete shards as well as they might 2197 * have been left over from an interrupted FS upgrade. */ 2198 SVN_ERR(svn_io_remove_dir2(rev_shard_path, TRUE, 2199 cancel_func, cancel_baton, scratch_pool)); 2200 if (revsprops_dir) 2201 { 2202 svn_node_kind_t kind = svn_node_dir; 2203 apr_int64_t to_cleanup = shard; 2204 do 2205 { 2206 SVN_ERR(svn_fs_x__delete_revprops_shard(revprops_shard_path, 2207 to_cleanup, 2208 max_files_per_dir, 2209 cancel_func, cancel_baton, 2210 scratch_pool)); 2211 2212 /* If the previous shard exists, clean it up as well. 2213 Don't try to clean up shard 0 as it we can't tell quickly 2214 whether it actually needs cleaning up. */ 2215 revprops_shard_path = svn_dirent_join(revsprops_dir, 2216 apr_psprintf(scratch_pool, 2217 "%" APR_INT64_T_FMT, 2218 --to_cleanup), 2219 scratch_pool); 2220 SVN_ERR(svn_io_check_path(revprops_shard_path, &kind, scratch_pool)); 2221 } 2222 while (kind == svn_node_dir && to_cleanup > 0); 2223 } 2224 2225 /* Notify caller we're starting to pack this shard. */ 2226 if (notify_func) 2227 SVN_ERR(notify_func(notify_baton, shard, svn_fs_pack_notify_end, 2228 scratch_pool)); 2229 2230 return SVN_NO_ERROR; 2231} 2232 2233typedef struct pack_baton_t 2234{ 2235 svn_fs_t *fs; 2236 svn_fs_pack_notify_t notify_func; 2237 void *notify_baton; 2238 svn_cancel_func_t cancel_func; 2239 void *cancel_baton; 2240} pack_baton_t; 2241 2242 2243/* The work-horse for svn_fs_x__pack, called with the FS write lock. 2244 This implements the svn_fs_x__with_write_lock() 'body' callback 2245 type. BATON is a 'pack_baton_t *'. 2246 2247 WARNING: if you add a call to this function, please note: 2248 The code currently assumes that any piece of code running with 2249 the write-lock set can rely on the ffd->min_unpacked_rev and 2250 ffd->min_unpacked_revprop caches to be up-to-date (and, by 2251 extension, on not having to use a retry when calling 2252 svn_fs_x__path_rev_absolute() and friends). If you add a call 2253 to this function, consider whether you have to call 2254 update_min_unpacked_rev(). 2255 See this thread: http://thread.gmane.org/1291206765.3782.3309.camel@edith 2256 */ 2257static svn_error_t * 2258pack_body(void *baton, 2259 apr_pool_t *scratch_pool) 2260{ 2261 pack_baton_t *pb = baton; 2262 svn_fs_x__data_t *ffd = pb->fs->fsap_data; 2263 apr_int64_t completed_shards; 2264 apr_int64_t i; 2265 svn_revnum_t youngest; 2266 apr_pool_t *iterpool; 2267 const char *rev_data_path; 2268 const char *revprops_data_path = NULL; 2269 2270 /* If we aren't using sharding, we can't do any packing, so quit. */ 2271 SVN_ERR(svn_fs_x__read_min_unpacked_rev(&ffd->min_unpacked_rev, pb->fs, 2272 scratch_pool)); 2273 2274 SVN_ERR(svn_fs_x__youngest_rev(&youngest, pb->fs, scratch_pool)); 2275 completed_shards = (youngest + 1) / ffd->max_files_per_dir; 2276 2277 /* See if we've already completed all possible shards thus far. */ 2278 if (ffd->min_unpacked_rev == (completed_shards * ffd->max_files_per_dir)) 2279 return SVN_NO_ERROR; 2280 2281 rev_data_path = svn_dirent_join(pb->fs->path, PATH_REVS_DIR, scratch_pool); 2282 revprops_data_path = svn_dirent_join(pb->fs->path, PATH_REVPROPS_DIR, 2283 scratch_pool); 2284 2285 iterpool = svn_pool_create(scratch_pool); 2286 for (i = ffd->min_unpacked_rev / ffd->max_files_per_dir; 2287 i < completed_shards; 2288 i++) 2289 { 2290 svn_pool_clear(iterpool); 2291 2292 if (pb->cancel_func) 2293 SVN_ERR(pb->cancel_func(pb->cancel_baton)); 2294 2295 SVN_ERR(pack_shard(rev_data_path, revprops_data_path, 2296 pb->fs, i, ffd->max_files_per_dir, 2297 ffd->revprop_pack_size, 2298 ffd->compress_packed_revprops 2299 ? SVN__COMPRESSION_ZLIB_DEFAULT 2300 : SVN__COMPRESSION_NONE, 2301 pb->notify_func, pb->notify_baton, 2302 pb->cancel_func, pb->cancel_baton, iterpool)); 2303 } 2304 2305 svn_pool_destroy(iterpool); 2306 return SVN_NO_ERROR; 2307} 2308 2309svn_error_t * 2310svn_fs_x__pack(svn_fs_t *fs, 2311 svn_fs_pack_notify_t notify_func, 2312 void *notify_baton, 2313 svn_cancel_func_t cancel_func, 2314 void *cancel_baton, 2315 apr_pool_t *scratch_pool) 2316{ 2317 pack_baton_t pb = { 0 }; 2318 pb.fs = fs; 2319 pb.notify_func = notify_func; 2320 pb.notify_baton = notify_baton; 2321 pb.cancel_func = cancel_func; 2322 pb.cancel_baton = cancel_baton; 2323 return svn_fs_x__with_pack_lock(fs, pack_body, &pb, scratch_pool); 2324} 2325