1/* pack.c --- FSFS shard packing functionality
2 *
3 * ====================================================================
4 *    Licensed to the Apache Software Foundation (ASF) under one
5 *    or more contributor license agreements.  See the NOTICE file
6 *    distributed with this work for additional information
7 *    regarding copyright ownership.  The ASF licenses this file
8 *    to you under the Apache License, Version 2.0 (the
9 *    "License"); you may not use this file except in compliance
10 *    with the License.  You may obtain a copy of the License at
11 *
12 *      http://www.apache.org/licenses/LICENSE-2.0
13 *
14 *    Unless required by applicable law or agreed to in writing,
15 *    software distributed under the License is distributed on an
16 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 *    KIND, either express or implied.  See the License for the
18 *    specific language governing permissions and limitations
19 *    under the License.
20 * ====================================================================
21 */
22#include <assert.h>
23#include <string.h>
24
25#include "svn_pools.h"
26#include "svn_dirent_uri.h"
27#include "svn_sorts.h"
28#include "private/svn_temp_serializer.h"
29#include "private/svn_sorts_private.h"
30#include "private/svn_subr_private.h"
31#include "private/svn_string_private.h"
32#include "private/svn_io_private.h"
33
34#include "fs_fs.h"
35#include "pack.h"
36#include "util.h"
37#include "id.h"
38#include "index.h"
39#include "low_level.h"
40#include "revprops.h"
41#include "transaction.h"
42
43#include "../libsvn_fs/fs-loader.h"
44
45#include "svn_private_config.h"
46#include "temp_serializer.h"
47
48/* Logical addressing packing logic:
49 *
50 * We pack files on a pack file basis (e.g. 1000 revs) without changing
51 * existing pack files nor the revision files outside the range to pack.
52 *
53 * First, we will scan the revision file indexes to determine the number
54 * of items to "place" (i.e. determine their optimal position within the
55 * future pack file).  For each item, we will need a constant amount of
56 * memory to track it.  A MAX_MEM parameter sets a limit to the number of
57 * items we may place in one go.  That means, we may not be able to add
58 * all revisions at once.  Instead, we will run the placement for a subset
59 * of revisions at a time.  The very unlikely worst case will simply append
60 * all revision data with just a little reshuffling inside each revision.
61 *
62 * In a second step, we read all revisions in the selected range, build
63 * the item tracking information and copy the items themselves from the
64 * revision files to temporary files.  The latter serve as buckets for a
65 * very coarse bucket presort:  Separate change lists, file properties,
66 * directory properties and noderevs + representations from one another.
67 *
68 * The third step will determine an optimized placement for the items in
69 * each of the 4 buckets separately.  The first three will simply order
70 * their items by revision, starting with the newest once.  Placing rep
71 * and noderev items is a more elaborate process documented in the code.
72 *
73 * In short, we store items in the following order:
74 * - changed paths lists
75 * - node property
76 * - directory properties
77 * - directory representations corresponding noderevs, lexical path order
78 *   with special treatment of "trunk" and "branches"
79 * - same for file representations
80 *
81 * Step 4 copies the items from the temporary buckets into the final
82 * pack file and writes the temporary index files.
83 *
84 * Finally, after the last range of revisions, create the final indexes.
85 */
86
87/* Maximum amount of memory we allocate for placement information during
88 * the pack process.
89 */
90#define DEFAULT_MAX_MEM (64 * 1024 * 1024)
91
92/* Data structure describing a node change at PATH, REVISION.
93 * We will sort these instances by PATH and NODE_ID such that we can combine
94 * similar nodes in the same reps container and store containers in path
95 * major order.
96 */
97typedef struct path_order_t
98{
99  /* changed path */
100  svn_prefix_string__t *path;
101
102  /* node ID for this PATH in REVISION */
103  svn_fs_fs__id_part_t node_id;
104
105  /* when this change happened */
106  svn_revnum_t revision;
107
108  /* noderev predecessor count */
109  int predecessor_count;
110
111  /* this is a directory node */
112  svn_boolean_t is_dir;
113
114  /* length of the expanded representation content */
115  apr_int64_t expanded_size;
116
117  /* item ID of the noderev linked to the change. May be (0, 0). */
118  svn_fs_fs__id_part_t noderev_id;
119
120  /* item ID of the representation containing the new data. May be (0, 0). */
121  svn_fs_fs__id_part_t rep_id;
122} path_order_t;
123
124/* Represents a reference from item FROM to item TO.  FROM may be a noderev
125 * or rep_id while TO is (currently) always a representation.  We will sort
126 * them by TO which allows us to collect all dependent items.
127 */
128typedef struct reference_t
129{
130  svn_fs_fs__id_part_t to;
131  svn_fs_fs__id_part_t from;
132} reference_t;
133
134/* This structure keeps track of all the temporary data and status that
135 * needs to be kept around during the creation of one pack file.  After
136 * each revision range (in case we can't process all revs at once due to
137 * memory restrictions), parts of the data will get re-initialized.
138 */
139typedef struct pack_context_t
140{
141  /* file system that we operate on */
142  svn_fs_t *fs;
143
144  /* cancel function to invoke at regular intervals. May be NULL */
145  svn_cancel_func_t cancel_func;
146
147  /* baton to pass to CANCEL_FUNC */
148  void *cancel_baton;
149
150  /* first revision in the shard (and future pack file) */
151  svn_revnum_t shard_rev;
152
153  /* first revision in the range to process (>= SHARD_REV) */
154  svn_revnum_t start_rev;
155
156  /* first revision after the range to process (<= SHARD_END_REV) */
157  svn_revnum_t end_rev;
158
159  /* first revision after the current shard */
160  svn_revnum_t shard_end_rev;
161
162  /* log-to-phys proto index for the whole pack file */
163  apr_file_t *proto_l2p_index;
164
165  /* phys-to-log proto index for the whole pack file */
166  apr_file_t *proto_p2l_index;
167
168  /* full shard directory path (containing the unpacked revisions) */
169  const char *shard_dir;
170
171  /* full packed shard directory path (containing the pack file + indexes) */
172  const char *pack_file_dir;
173
174  /* full pack file path (including PACK_FILE_DIR) */
175  const char *pack_file_path;
176
177  /* current write position (i.e. file length) in the pack file */
178  apr_off_t pack_offset;
179
180  /* the pack file to ultimately write all data to */
181  apr_file_t *pack_file;
182
183  /* array of svn_fs_fs__p2l_entry_t *, all referring to change lists.
184   * Will be filled in phase 2 and be cleared after each revision range. */
185  apr_array_header_t *changes;
186
187  /* temp file receiving all change list items (referenced by CHANGES).
188   * Will be filled in phase 2 and be cleared after each revision range. */
189  apr_file_t *changes_file;
190
191  /* array of svn_fs_fs__p2l_entry_t *, all referring to file properties.
192   * Will be filled in phase 2 and be cleared after each revision range. */
193  apr_array_header_t *file_props;
194
195  /* temp file receiving all file prop items (referenced by FILE_PROPS).
196   * Will be filled in phase 2 and be cleared after each revision range.*/
197  apr_file_t *file_props_file;
198
199  /* array of svn_fs_fs__p2l_entry_t *, all referring to directory properties.
200   * Will be filled in phase 2 and be cleared after each revision range. */
201  apr_array_header_t *dir_props;
202
203  /* temp file receiving all directory prop items (referenced by DIR_PROPS).
204   * Will be filled in phase 2 and be cleared after each revision range.*/
205  apr_file_t *dir_props_file;
206
207  /* container for all PATH members in PATH_ORDER. */
208  svn_prefix_tree__t *paths;
209
210  /* array of path_order_t *.  Will be filled in phase 2 and be cleared
211   * after each revision range.  Sorted by PATH, NODE_ID. */
212  apr_array_header_t *path_order;
213
214  /* array of reference_t* linking representations to their delta bases.
215   * Will be filled in phase 2 and be cleared after each revision range.
216   * It will be sorted by the FROM members (for rep->base rep lookup). */
217  apr_array_header_t *references;
218
219  /* array of svn_fs_fs__p2l_entry_t*.  Will be filled in phase 2 and be
220   * cleared after each revision range.  During phase 3, we will set items
221   * to NULL that we already processed. */
222  apr_array_header_t *reps;
223
224  /* array of int, marking for each revision, the which offset their items
225   * begin in REPS.  Will be filled in phase 2 and be cleared after
226   * each revision range. */
227  apr_array_header_t *rev_offsets;
228
229  /* temp file receiving all items referenced by REPS.
230   * Will be filled in phase 2 and be cleared after each revision range.*/
231  apr_file_t *reps_file;
232
233  /* pool used for temporary data structures that will be cleaned up when
234   * the next range of revisions is being processed */
235  apr_pool_t *info_pool;
236} pack_context_t;
237
238/* Create and initialize a new pack context for packing shard SHARD_REV in
239 * SHARD_DIR into PACK_FILE_DIR within filesystem FS.  Allocate it in POOL
240 * and return the structure in *CONTEXT.
241 *
242 * Limit the number of items being copied per iteration to MAX_ITEMS.
243 * Set CANCEL_FUNC and CANCEL_BATON as well.
244 */
245static svn_error_t *
246initialize_pack_context(pack_context_t *context,
247                        svn_fs_t *fs,
248                        const char *pack_file_dir,
249                        const char *shard_dir,
250                        svn_revnum_t shard_rev,
251                        int max_items,
252                        svn_cancel_func_t cancel_func,
253                        void *cancel_baton,
254                        apr_pool_t *pool)
255{
256  fs_fs_data_t *ffd = fs->fsap_data;
257  const char *temp_dir;
258  int max_revs = MIN(ffd->max_files_per_dir, max_items);
259
260  SVN_ERR_ASSERT(ffd->format >= SVN_FS_FS__MIN_LOG_ADDRESSING_FORMAT);
261  SVN_ERR_ASSERT(shard_rev % ffd->max_files_per_dir == 0);
262
263  /* where we will place our various temp files */
264  SVN_ERR(svn_io_temp_dir(&temp_dir, pool));
265
266  /* store parameters */
267  context->fs = fs;
268  context->cancel_func = cancel_func;
269  context->cancel_baton = cancel_baton;
270
271  context->shard_rev = shard_rev;
272  context->start_rev = shard_rev;
273  context->end_rev = shard_rev;
274  context->shard_end_rev = shard_rev + ffd->max_files_per_dir;
275
276  /* Create the new directory and pack file. */
277  context->shard_dir = shard_dir;
278  context->pack_file_dir = pack_file_dir;
279  context->pack_file_path
280    = svn_dirent_join(pack_file_dir, PATH_PACKED, pool);
281  SVN_ERR(svn_io_file_open(&context->pack_file, context->pack_file_path,
282                           APR_WRITE | APR_BUFFERED | APR_BINARY | APR_EXCL
283                             | APR_CREATE, APR_OS_DEFAULT, pool));
284
285  /* Proto index files */
286  SVN_ERR(svn_fs_fs__l2p_proto_index_open(
287             &context->proto_l2p_index,
288             svn_dirent_join(pack_file_dir,
289                             PATH_INDEX PATH_EXT_L2P_INDEX,
290                             pool),
291             pool));
292  SVN_ERR(svn_fs_fs__p2l_proto_index_open(
293             &context->proto_p2l_index,
294             svn_dirent_join(pack_file_dir,
295                             PATH_INDEX PATH_EXT_P2L_INDEX,
296                             pool),
297             pool));
298
299  /* item buckets: one item info array and one temp file per bucket */
300  context->changes = apr_array_make(pool, max_items,
301                                    sizeof(svn_fs_fs__p2l_entry_t *));
302  SVN_ERR(svn_io_open_unique_file3(&context->changes_file, NULL, temp_dir,
303                                   svn_io_file_del_on_close, pool, pool));
304  context->file_props = apr_array_make(pool, max_items,
305                                       sizeof(svn_fs_fs__p2l_entry_t *));
306  SVN_ERR(svn_io_open_unique_file3(&context->file_props_file, NULL, temp_dir,
307                                   svn_io_file_del_on_close, pool, pool));
308  context->dir_props = apr_array_make(pool, max_items,
309                                      sizeof(svn_fs_fs__p2l_entry_t *));
310  SVN_ERR(svn_io_open_unique_file3(&context->dir_props_file, NULL, temp_dir,
311                                   svn_io_file_del_on_close, pool, pool));
312
313  /* noderev and representation item bucket */
314  context->rev_offsets = apr_array_make(pool, max_revs, sizeof(int));
315  context->path_order = apr_array_make(pool, max_items,
316                                       sizeof(path_order_t *));
317  context->references = apr_array_make(pool, max_items,
318                                       sizeof(reference_t *));
319  context->reps = apr_array_make(pool, max_items,
320                                 sizeof(svn_fs_fs__p2l_entry_t *));
321  SVN_ERR(svn_io_open_unique_file3(&context->reps_file, NULL, temp_dir,
322                                   svn_io_file_del_on_close, pool, pool));
323
324  /* the pool used for temp structures */
325  context->info_pool = svn_pool_create(pool);
326  context->paths = svn_prefix_tree__create(context->info_pool);
327
328  return SVN_NO_ERROR;
329}
330
331/* Clean up / free all revision range specific data and files in CONTEXT.
332 * Use POOL for temporary allocations.
333 */
334static svn_error_t *
335reset_pack_context(pack_context_t *context,
336                   apr_pool_t *pool)
337{
338  const char *temp_dir;
339
340  apr_array_clear(context->changes);
341  SVN_ERR(svn_io_file_close(context->changes_file, pool));
342  apr_array_clear(context->file_props);
343  SVN_ERR(svn_io_file_close(context->file_props_file, pool));
344  apr_array_clear(context->dir_props);
345  SVN_ERR(svn_io_file_close(context->dir_props_file, pool));
346
347  apr_array_clear(context->rev_offsets);
348  apr_array_clear(context->path_order);
349  apr_array_clear(context->references);
350  apr_array_clear(context->reps);
351  SVN_ERR(svn_io_file_close(context->reps_file, pool));
352
353  svn_pool_clear(context->info_pool);
354
355  /* The new temporary files must live at least as long as any other info
356   * object in CONTEXT. */
357  SVN_ERR(svn_io_temp_dir(&temp_dir, pool));
358  SVN_ERR(svn_io_open_unique_file3(&context->changes_file, NULL, temp_dir,
359                                   svn_io_file_del_on_close,
360                                   context->info_pool, pool));
361  SVN_ERR(svn_io_open_unique_file3(&context->file_props_file, NULL, temp_dir,
362                                   svn_io_file_del_on_close,
363                                   context->info_pool, pool));
364  SVN_ERR(svn_io_open_unique_file3(&context->dir_props_file, NULL, temp_dir,
365                                   svn_io_file_del_on_close,
366                                   context->info_pool, pool));
367  SVN_ERR(svn_io_open_unique_file3(&context->reps_file, NULL, temp_dir,
368                                   svn_io_file_del_on_close,
369                                   context->info_pool, pool));
370  context->paths = svn_prefix_tree__create(context->info_pool);
371
372  return SVN_NO_ERROR;
373}
374
375/* Call this after the last revision range.  It will finalize all index files
376 * for CONTEXT and close any open files.  Use POOL for temporary allocations.
377 */
378static svn_error_t *
379close_pack_context(pack_context_t *context,
380                   apr_pool_t *pool)
381{
382  const char *proto_l2p_index_path;
383  const char *proto_p2l_index_path;
384
385  /* need the file names for the actual index creation call further down */
386  SVN_ERR(svn_io_file_name_get(&proto_l2p_index_path,
387                               context->proto_l2p_index, pool));
388  SVN_ERR(svn_io_file_name_get(&proto_p2l_index_path,
389                               context->proto_p2l_index, pool));
390
391  /* finalize proto index files */
392  SVN_ERR(svn_io_file_close(context->proto_l2p_index, pool));
393  SVN_ERR(svn_io_file_close(context->proto_p2l_index, pool));
394
395  /* Append the actual index data to the pack file. */
396  SVN_ERR(svn_fs_fs__add_index_data(context->fs, context->pack_file,
397                                    proto_l2p_index_path,
398                                    proto_p2l_index_path,
399                                    context->shard_rev,
400                                    pool));
401
402  /* remove proto index files */
403  SVN_ERR(svn_io_remove_file2(proto_l2p_index_path, FALSE, pool));
404  SVN_ERR(svn_io_remove_file2(proto_p2l_index_path, FALSE, pool));
405
406  /* Ensure that packed file is written to disk.*/
407  SVN_ERR(svn_io_file_flush_to_disk(context->pack_file, pool));
408  SVN_ERR(svn_io_file_close(context->pack_file, pool));
409
410  return SVN_NO_ERROR;
411}
412
413/* Efficiently copy SIZE bytes from SOURCE to DEST.  Invoke the CANCEL_FUNC
414 * from CONTEXT at regular intervals.  Use POOL for allocations.
415 */
416static svn_error_t *
417copy_file_data(pack_context_t *context,
418               apr_file_t *dest,
419               apr_file_t *source,
420               apr_off_t size,
421               apr_pool_t *pool)
422{
423  /* most non-representation items will be small.  Minimize the buffer
424   * and infrastructure overhead in that case. */
425  enum { STACK_BUFFER_SIZE = 1024 };
426
427  if (size < STACK_BUFFER_SIZE)
428    {
429      /* copy small data using a fixed-size buffer on stack */
430      char buffer[STACK_BUFFER_SIZE];
431      SVN_ERR(svn_io_file_read_full2(source, buffer, (apr_size_t)size,
432                                     NULL, NULL, pool));
433      SVN_ERR(svn_io_file_write_full(dest, buffer, (apr_size_t)size,
434                                     NULL, pool));
435    }
436  else
437    {
438      /* use streaming copies for larger data blocks.  That may require
439       * the allocation of larger buffers and we should make sure that
440       * this extra memory is released asap. */
441      fs_fs_data_t *ffd = context->fs->fsap_data;
442      apr_pool_t *copypool = svn_pool_create(pool);
443      char *buffer = apr_palloc(copypool, ffd->block_size);
444
445      while (size)
446        {
447          apr_size_t to_copy = (apr_size_t)(MIN(size, ffd->block_size));
448          if (context->cancel_func)
449            SVN_ERR(context->cancel_func(context->cancel_baton));
450
451          SVN_ERR(svn_io_file_read_full2(source, buffer, to_copy,
452                                         NULL, NULL, pool));
453          SVN_ERR(svn_io_file_write_full(dest, buffer, to_copy,
454                                         NULL, pool));
455
456          size -= to_copy;
457        }
458
459      svn_pool_destroy(copypool);
460    }
461
462  return SVN_NO_ERROR;
463}
464
465/* Writes SIZE bytes, all 0, to DEST.  Uses POOL for allocations.
466 */
467static svn_error_t *
468write_null_bytes(apr_file_t *dest,
469                 apr_off_t size,
470                 apr_pool_t *pool)
471{
472  /* Have a collection of high-quality, easy to access NUL bytes handy. */
473  enum { BUFFER_SIZE = 1024 };
474  static const char buffer[BUFFER_SIZE] = { 0 };
475
476  /* copy SIZE of them into the file's buffer */
477  while (size)
478    {
479      apr_size_t to_write = MIN(size, BUFFER_SIZE);
480      SVN_ERR(svn_io_file_write_full(dest, buffer, to_write, NULL, pool));
481      size -= to_write;
482    }
483
484  return SVN_NO_ERROR;
485}
486
487/* Copy the "simple" item (changed paths list or property representation)
488 * from the current position in REV_FILE to TEMP_FILE using CONTEXT.  Add
489 * a copy of ENTRY to ENTRIES but with an updated offset value that points
490 * to the copy destination in TEMP_FILE.  Use POOL for allocations.
491 */
492static svn_error_t *
493copy_item_to_temp(pack_context_t *context,
494                  apr_array_header_t *entries,
495                  apr_file_t *temp_file,
496                  apr_file_t *rev_file,
497                  svn_fs_fs__p2l_entry_t *entry,
498                  apr_pool_t *pool)
499{
500  svn_fs_fs__p2l_entry_t *new_entry
501    = apr_pmemdup(context->info_pool, entry, sizeof(*entry));
502
503  SVN_ERR(svn_fs_fs__get_file_offset(&new_entry->offset, temp_file, pool));
504  APR_ARRAY_PUSH(entries, svn_fs_fs__p2l_entry_t *) = new_entry;
505
506  SVN_ERR(copy_file_data(context, temp_file, rev_file, entry->size, pool));
507
508  return SVN_NO_ERROR;
509}
510
511/* Return the offset within CONTEXT->REPS that corresponds to item
512 * ITEM_INDEX in  REVISION.
513 */
514static int
515get_item_array_index(pack_context_t *context,
516                     svn_revnum_t revision,
517                     apr_int64_t item_index)
518{
519  assert(revision >= context->start_rev);
520  return (int)item_index + APR_ARRAY_IDX(context->rev_offsets,
521                                         revision - context->start_rev,
522                                         int);
523}
524
525/* Write INFO to the correct position in CONTEXT->REP_INFOS.  The latter
526 * may need auto-expanding.  Overwriting an array element is not allowed.
527 */
528static void
529add_item_rep_mapping(pack_context_t *context,
530                     svn_fs_fs__p2l_entry_t *entry)
531{
532  int idx;
533
534  /* index of INFO */
535  idx = get_item_array_index(context,
536                             entry->item.revision,
537                             entry->item.number);
538
539  /* make sure the index exists in the array */
540  while (context->reps->nelts <= idx)
541    APR_ARRAY_PUSH(context->reps, void *) = NULL;
542
543  /* set the element.  If there is already an entry, there are probably
544   * two items claiming to be the same -> bail out */
545  assert(!APR_ARRAY_IDX(context->reps, idx, void *));
546  APR_ARRAY_IDX(context->reps, idx, void *) = entry;
547}
548
549/* Return the P2L entry from CONTEXT->REPS for the given ID.  If there is
550 * none (or not anymore), return NULL.  If RESET has been specified, set
551 * the array entry to NULL after returning the entry.
552 */
553static svn_fs_fs__p2l_entry_t *
554get_item(pack_context_t *context,
555         const svn_fs_fs__id_part_t *id,
556         svn_boolean_t reset)
557{
558  svn_fs_fs__p2l_entry_t *result = NULL;
559  if (id->number && id->revision >= context->start_rev)
560    {
561      int idx = get_item_array_index(context, id->revision, id->number);
562      if (context->reps->nelts > idx)
563        {
564          result = APR_ARRAY_IDX(context->reps, idx, void *);
565          if (result && reset)
566            APR_ARRAY_IDX(context->reps, idx, void *) = NULL;
567        }
568    }
569
570  return result;
571}
572
573/* Copy representation item identified by ENTRY from the current position
574 * in REV_FILE into CONTEXT->REPS_FILE.  Add all tracking into needed by
575 * our placement algorithm to CONTEXT.  Use POOL for temporary allocations.
576 */
577static svn_error_t *
578copy_rep_to_temp(pack_context_t *context,
579                 apr_file_t *rev_file,
580                 svn_fs_fs__p2l_entry_t *entry,
581                 apr_pool_t *pool)
582{
583  svn_fs_fs__rep_header_t *rep_header;
584  svn_stream_t *stream;
585  apr_off_t source_offset = entry->offset;
586
587  /* create a copy of ENTRY, make it point to the copy destination and
588   * store it in CONTEXT */
589  entry = apr_pmemdup(context->info_pool, entry, sizeof(*entry));
590  SVN_ERR(svn_fs_fs__get_file_offset(&entry->offset, context->reps_file, pool));
591  add_item_rep_mapping(context, entry);
592
593  /* read & parse the representation header */
594  stream = svn_stream_from_aprfile2(rev_file, TRUE, pool);
595  SVN_ERR(svn_fs_fs__read_rep_header(&rep_header, stream, pool, pool));
596  svn_stream_close(stream);
597
598  /* if the representation is a delta against some other rep, link the two */
599  if (   rep_header->type == svn_fs_fs__rep_delta
600      && rep_header->base_revision >= context->start_rev)
601    {
602      reference_t *reference = apr_pcalloc(context->info_pool,
603                                           sizeof(*reference));
604      reference->from = entry->item;
605      reference->to.revision = rep_header->base_revision;
606      reference->to.number = rep_header->base_item_index;
607      APR_ARRAY_PUSH(context->references, reference_t *) = reference;
608    }
609
610  /* copy the whole rep (including header!) to our temp file */
611  SVN_ERR(svn_io_file_seek(rev_file, APR_SET, &source_offset, pool));
612  SVN_ERR(copy_file_data(context, context->reps_file, rev_file, entry->size,
613                         pool));
614
615  return SVN_NO_ERROR;
616}
617
618/* Directories first, dirs / files sorted by name in reverse lexical order.
619 * This maximizes the chance of two items being located close to one another
620 * in *all* pack files independent of their change order.  It also groups
621 * multi-project repos nicely according to their sub-projects.  The reverse
622 * order aspect gives "trunk" preference over "tags" and "branches", so
623 * trunk-related items are more likely to be contiguous.
624 */
625static int
626compare_dir_entries_format7(const svn_sort__item_t *a,
627                            const svn_sort__item_t *b)
628{
629  const svn_fs_dirent_t *lhs = (const svn_fs_dirent_t *) a->value;
630  const svn_fs_dirent_t *rhs = (const svn_fs_dirent_t *) b->value;
631
632  if (lhs->kind != rhs->kind)
633    return lhs->kind == svn_node_dir ? -1 : 1;
634
635  return strcmp(lhs->name, rhs->name);
636}
637
638/* Directories entries sorted by revision (decreasing - to max cache hits)
639 * and offset (increasing - to max benefit from APR file buffering).
640 */
641static int
642compare_dir_entries_format6(const svn_sort__item_t *a,
643                            const svn_sort__item_t *b)
644{
645  const svn_fs_dirent_t *lhs = (const svn_fs_dirent_t *) a->value;
646  const svn_fs_dirent_t *rhs = (const svn_fs_dirent_t *) b->value;
647
648  const svn_fs_fs__id_part_t *lhs_rev_item
649    = svn_fs_fs__id_rev_item(lhs->id);
650  const svn_fs_fs__id_part_t *rhs_rev_item
651    = svn_fs_fs__id_rev_item(rhs->id);
652
653  /* decreasing ("reverse") order on revs */
654  if (lhs_rev_item->revision != rhs_rev_item->revision)
655    return lhs_rev_item->revision > rhs_rev_item->revision ? -1 : 1;
656
657  /* increasing order on offsets */
658  if (lhs_rev_item->number != rhs_rev_item->number)
659    return lhs_rev_item->number > rhs_rev_item->number ? 1 : -1;
660
661  return 0;
662}
663
664apr_array_header_t *
665svn_fs_fs__order_dir_entries(svn_fs_t *fs,
666                             apr_hash_t *directory,
667                             apr_pool_t *result_pool,
668                             apr_pool_t *scratch_pool)
669{
670  apr_array_header_t *ordered
671    = svn_sort__hash(directory,
672                     svn_fs_fs__use_log_addressing(fs)
673                       ? compare_dir_entries_format7
674                       : compare_dir_entries_format6,
675                     scratch_pool);
676
677  apr_array_header_t *result
678    = apr_array_make(result_pool, ordered->nelts, sizeof(svn_fs_dirent_t *));
679
680  int i;
681  for (i = 0; i < ordered->nelts; ++i)
682    APR_ARRAY_PUSH(result, svn_fs_dirent_t *)
683      = APR_ARRAY_IDX(ordered, i, svn_sort__item_t).value;
684
685  return result;
686}
687
688/* Return a duplicate of the the ORIGINAL path and with special sub-strins
689 * (e.g. "trunk") modified in such a way that have a lower lexicographic
690 * value than any other "normal" file name.
691 */
692static const char *
693tweak_path_for_ordering(const char *original,
694                        apr_pool_t *pool)
695{
696  /* We may add further special cases as needed. */
697  enum {SPECIAL_COUNT = 2};
698  static const char *special[SPECIAL_COUNT] = {"trunk", "branch"};
699  char *pos;
700  char *path = apr_pstrdup(pool, original);
701  int i;
702
703  /* Replace the first char of any "special" sub-string we find by
704   * a control char, i.e. '\1' .. '\31'.  In the rare event that this
705   * would clash with existing paths, no data will be lost but merely
706   * the node ordering will be sub-optimal.
707   */
708  for (i = 0; i < SPECIAL_COUNT; ++i)
709    for (pos = strstr(path, special[i]);
710         pos;
711         pos = strstr(pos + 1, special[i]))
712      {
713        *pos = (char)(i + '\1');
714      }
715
716   return path;
717}
718
719/* Copy node revision item identified by ENTRY from the current position
720 * in REV_FILE into CONTEXT->REPS_FILE.  Add all tracking into needed by
721 * our placement algorithm to CONTEXT.  Use POOL for temporary allocations.
722 */
723static svn_error_t *
724copy_node_to_temp(pack_context_t *context,
725                  apr_file_t *rev_file,
726                  svn_fs_fs__p2l_entry_t *entry,
727                  apr_pool_t *pool)
728{
729  path_order_t *path_order = apr_pcalloc(context->info_pool,
730                                         sizeof(*path_order));
731  node_revision_t *noderev;
732  const char *sort_path;
733  svn_stream_t *stream;
734  apr_off_t source_offset = entry->offset;
735
736  /* read & parse noderev */
737  stream = svn_stream_from_aprfile2(rev_file, TRUE, pool);
738  SVN_ERR(svn_fs_fs__read_noderev(&noderev, stream, pool, pool));
739  svn_stream_close(stream);
740
741  /* create a copy of ENTRY, make it point to the copy destination and
742   * store it in CONTEXT */
743  entry = apr_pmemdup(context->info_pool, entry, sizeof(*entry));
744  SVN_ERR(svn_fs_fs__get_file_offset(&entry->offset, context->reps_file,
745                                     pool));
746  add_item_rep_mapping(context, entry);
747
748  /* copy the noderev to our temp file */
749  SVN_ERR(svn_io_file_seek(rev_file, APR_SET, &source_offset, pool));
750  SVN_ERR(copy_file_data(context, context->reps_file, rev_file, entry->size,
751                         pool));
752
753  /* if the node has a data representation, make that the node's "base".
754   * This will (often) cause the noderev to be placed right in front of
755   * its data representation. */
756
757  if (noderev->data_rep && noderev->data_rep->revision >= context->start_rev)
758    {
759      path_order->rep_id.revision = noderev->data_rep->revision;
760      path_order->rep_id.number = noderev->data_rep->item_index;
761      path_order->expanded_size = noderev->data_rep->expanded_size
762                                ? noderev->data_rep->expanded_size
763                                : noderev->data_rep->size;
764    }
765
766  /* Sort path is the key used for ordering noderevs and associated reps.
767   * It will not be stored in the final pack file. */
768  sort_path = tweak_path_for_ordering(noderev->created_path, pool);
769  path_order->path = svn_prefix_string__create(context->paths, sort_path);
770  path_order->node_id = *svn_fs_fs__id_node_id(noderev->id);
771  path_order->revision = svn_fs_fs__id_rev(noderev->id);
772  path_order->predecessor_count = noderev->predecessor_count;
773  path_order->is_dir = noderev->kind == svn_node_dir;
774  path_order->noderev_id = *svn_fs_fs__id_rev_item(noderev->id);
775  APR_ARRAY_PUSH(context->path_order, path_order_t *) = path_order;
776
777  return SVN_NO_ERROR;
778}
779
780/* implements compare_fn_t.  Bring all directories in front of the files
781   and sort descendingly by PATH, NODE_ID and REVISION.
782 */
783static int
784compare_path_order(const path_order_t * const * lhs_p,
785                   const path_order_t * const * rhs_p)
786{
787  const path_order_t * lhs = *lhs_p;
788  const path_order_t * rhs = *rhs_p;
789
790  /* cluster all directories */
791  int diff = rhs->is_dir - lhs->is_dir;
792  if (diff)
793    return diff;
794
795  /* lexicographic order on path and node (i.e. latest first) */
796  diff = svn_prefix_string__compare(lhs->path, rhs->path);
797  if (diff)
798    return diff;
799
800  /* reverse order on node (i.e. latest first) */
801  diff = svn_fs_fs__id_part_compare(&rhs->node_id, &lhs->node_id);
802  if (diff)
803    return diff;
804
805  /* reverse order on revision (i.e. latest first) */
806  if (lhs->revision != rhs->revision)
807    return lhs->revision < rhs->revision ? 1 : -1;
808
809  return 0;
810}
811
812/* implements compare_fn_t.  Sort ascendingly by FROM, TO.
813 */
814static int
815compare_references(const reference_t * const * lhs_p,
816                   const reference_t * const * rhs_p)
817{
818  const reference_t * lhs = *lhs_p;
819  const reference_t * rhs = *rhs_p;
820
821  int diff = svn_fs_fs__id_part_compare(&lhs->from, &rhs->from);
822  return diff ? diff : svn_fs_fs__id_part_compare(&lhs->to, &rhs->to);
823}
824
825/* implements compare_fn_t.  Assume ascending order by FROM.
826 */
827static int
828compare_ref_to_item(const reference_t * const * lhs_p,
829                    const svn_fs_fs__id_part_t * rhs_p)
830{
831  return svn_fs_fs__id_part_compare(&(*lhs_p)->from, rhs_p);
832}
833
834/* implements compare_fn_t.  Finds the DIR / FILE boundary.
835 */
836static int
837compare_is_dir(const path_order_t * const * lhs_p,
838               const void *unused)
839{
840  return (*lhs_p)->is_dir ? -1 : 0;
841}
842
843/* Look for the least significant bit set in VALUE and return the smallest
844 * number with the same property, i.e. the largest power of 2 that is a
845 * factor in VALUE. */
846static int
847roundness(int value)
848{
849  return value ? value - (value & (value - 1)) : INT_MAX;
850}
851
852/* Order a range of data collected in CONTEXT such that we can place them
853 * in the desired order.  The input is taken from *PATH_ORDER, offsets FIRST
854 * to LAST and then written in the final order to the same range in *TEMP.
855 */
856static void
857sort_reps_range(pack_context_t *context,
858                const path_order_t **path_order,
859                const path_order_t **temp,
860                int first,
861                int last)
862{
863  const svn_prefix_string__t *path;
864  int i, dest, best;
865  svn_fs_fs__id_part_t rep_id;
866  fs_fs_data_t *ffd = context->fs->fsap_data;
867
868  /* The logic below would fail for empty ranges. */
869  if (first == last)
870    return;
871
872  /* Re-order noderevs like this:
873   *
874   * (1) Most likely to be referenced by future pack files, in path order.
875   * (2) highest revision rep per path + dependency chain
876   * (3) Remaining reps in path, rev order
877   *
878   * We simply pick & chose from the existing path, rev order.
879   */
880  dest = first;
881  path = path_order[first]->path;
882  best = first;
883
884  /* (1) For each path, pick the "roundest" representation and put it in
885   * front of all other nodes in the pack file.  The "roundest" rep is
886   * the one most likely to be referenced from future pack files, i.e. we
887   * concentrate those potential "foreign link targets" in one section of
888   * the pack file.
889   *
890   * And we only apply this to reps outside the linear deltification
891   * sections because references *into* linear deltification ranges are
892   * much less likely.
893   */
894  for (i = first; i < last; ++i)
895    {
896      /* Investigated all nodes for the current path? */
897      if (svn_prefix_string__compare(path, path_order[i]->path))
898        {
899          /* next path */
900          path = path_order[i]->path;
901
902          /* Pick roundest non-linear deltified node. */
903          if (roundness(path_order[best]->predecessor_count)
904              >= ffd->max_linear_deltification)
905            {
906              temp[dest++] = path_order[best];
907              path_order[best] = NULL;
908              best = i;
909            }
910        }
911
912      /* next entry */
913      if (  roundness(path_order[best]->predecessor_count)
914          < roundness(path_order[i]->predecessor_count))
915        best = i;
916    }
917
918  /* Treat the last path the same as all others. */
919  if (roundness(path_order[best]->predecessor_count)
920      >= ffd->max_linear_deltification)
921    {
922      temp[dest++] = path_order[best];
923      path_order[best] = NULL;
924    }
925
926  /* (2) For each (remaining) path, pick the nodes along the delta chain
927   * for the highest revision.  Due to our ordering, this is the first
928   * node we encounter for any path.
929   *
930   * Most references that don't hit a delta base picked in (1), will
931   * access HEAD of the respective path.  Keeping all its dependency chain
932   * in one place turns reconstruction into a linear scan of minimal length.
933   */
934  for (i = first; i < last; ++i)
935    if (path_order[i])
936      {
937        /* This is the first path we still have to handle. */
938        path = path_order[i]->path;
939        rep_id = path_order[i]->rep_id;
940        break;
941      }
942
943  for (i = first; i < last; ++i)
944    if (path_order[i])
945      {
946        /* New path? */
947        if (svn_prefix_string__compare(path, path_order[i]->path))
948          {
949            path = path_order[i]->path;
950            rep_id = path_order[i]->rep_id;
951          }
952
953        /* Pick nodes along the deltification chain.  Skip side-branches. */
954        if (svn_fs_fs__id_part_eq(&path_order[i]->rep_id, &rep_id))
955          {
956            reference_t **reference;
957
958            temp[dest++] = path_order[i];
959            path_order[i] = NULL;
960
961            reference = svn_sort__array_lookup(context->references,
962                                               &rep_id, NULL,
963              (int (*)(const void *, const void *))compare_ref_to_item);
964            if (reference)
965              rep_id = (*reference)->to;
966          }
967      }
968
969  /* (3) All remaining nodes in path, rev order.  Linear deltification
970   * makes HEAD delta chains from (2) cover all or most of their deltas
971   * in a given pack file.  So, this is just a few remnants that we put
972   * at the end of the pack file.
973   */
974  for (i = first; i < last; ++i)
975    if (path_order[i])
976      temp[dest++] = path_order[i];
977
978  /* We now know the final ordering. */
979  assert(dest == last);
980}
981
982/* Order the data collected in CONTEXT such that we can place them in the
983 * desired order.
984 */
985static void
986sort_reps(pack_context_t *context)
987{
988  apr_pool_t *temp_pool;
989  const path_order_t **temp, **path_order;
990  int i, count, dir_count;
991
992  /* We will later assume that there is at least one node / path.
993   */
994  if (context->path_order->nelts == 0)
995    {
996      assert(context->references->nelts == 0);
997      return;
998    }
999
1000  /* Sort containers by path and IDs, respectively.
1001   */
1002  svn_sort__array(context->path_order,
1003                  (int (*)(const void *, const void *))compare_path_order);
1004  svn_sort__array(context->references,
1005                  (int (*)(const void *, const void *))compare_references);
1006
1007  /* Directories are already in front; sort directories section and files
1008   * section separately but use the same heuristics (see sub-function).
1009   */
1010  temp_pool = svn_pool_create(context->info_pool);
1011  count = context->path_order->nelts;
1012  temp = apr_pcalloc(temp_pool, count * sizeof(*temp));
1013  path_order = (void *)context->path_order->elts;
1014
1015  /* Find the boundary between DIR and FILE section. */
1016  dir_count = svn_sort__bsearch_lower_bound(context->path_order, NULL,
1017                     (int (*)(const void *, const void *))compare_is_dir);
1018
1019  /* Sort those sub-sections separately. */
1020  sort_reps_range(context, path_order, temp, 0, dir_count);
1021  sort_reps_range(context, path_order, temp, dir_count, count);
1022
1023  /* We now know the final ordering. */
1024  for (i = 0; i < count; ++i)
1025    path_order[i] = temp[i];
1026
1027  svn_pool_destroy(temp_pool);
1028}
1029
1030/* implements compare_fn_t. Place LHS before RHS, if the latter is older.
1031 */
1032static int
1033compare_p2l_info(const svn_fs_fs__p2l_entry_t * const * lhs,
1034                 const svn_fs_fs__p2l_entry_t * const * rhs)
1035{
1036  assert(*lhs != *rhs);
1037
1038  if ((*lhs)->item.revision == (*rhs)->item.revision)
1039    return (*lhs)->item.number > (*rhs)->item.number ? -1 : 1;
1040
1041  return (*lhs)->item.revision > (*rhs)->item.revision ? -1 : 1;
1042}
1043
1044/* Sort svn_fs_fs__p2l_entry_t * array ENTRIES by age.  Place the latest
1045 * items first.
1046 */
1047static void
1048sort_items(apr_array_header_t *entries)
1049{
1050  svn_sort__array(entries,
1051                  (int (*)(const void *, const void *))compare_p2l_info);
1052}
1053
1054/* Return the remaining unused bytes in the current block in CONTEXT's
1055 * pack file.
1056 */
1057static apr_ssize_t
1058get_block_left(pack_context_t *context)
1059{
1060  fs_fs_data_t *ffd = context->fs->fsap_data;
1061  return ffd->block_size - (context->pack_offset % ffd->block_size);
1062}
1063
1064/* To prevent items from overlapping a block boundary, we will usually
1065 * put them into the next block and top up the old one with NUL bytes.
1066 * Pad CONTEXT's pack file to the end of the current block, if TO_ADD does
1067 * not fit into the current block and the padding is short enough.
1068 * Use POOL for allocations.
1069 */
1070static svn_error_t *
1071auto_pad_block(pack_context_t *context,
1072               apr_off_t to_add,
1073               apr_pool_t *pool)
1074{
1075  fs_fs_data_t *ffd = context->fs->fsap_data;
1076
1077  /* This is the maximum number of bytes "wasted" that way per block.
1078   * Larger items will cross the block boundaries. */
1079  const apr_off_t max_padding = MAX(ffd->block_size / 50, 512);
1080
1081  /* Is wasted space small enough to align the current item to the next
1082   * block? */
1083  apr_off_t padding = get_block_left(context);
1084
1085  if (padding < to_add && padding < max_padding)
1086    {
1087      /* Yes. To up with NUL bytes and don't forget to create
1088       * an P2L index entry marking this section as unused. */
1089      svn_fs_fs__p2l_entry_t null_entry;
1090
1091      null_entry.offset = context->pack_offset;
1092      null_entry.size = padding;
1093      null_entry.type = SVN_FS_FS__ITEM_TYPE_UNUSED;
1094      null_entry.item.revision = SVN_INVALID_REVNUM;
1095      null_entry.item.number = SVN_FS_FS__ITEM_INDEX_UNUSED;
1096      null_entry.fnv1_checksum = 0;
1097
1098      SVN_ERR(write_null_bytes(context->pack_file, padding, pool));
1099      SVN_ERR(svn_fs_fs__p2l_proto_index_add_entry(
1100                   context->proto_p2l_index, &null_entry, pool));
1101      context->pack_offset += padding;
1102    }
1103
1104  return SVN_NO_ERROR;
1105}
1106
1107/* Read the contents of ITEM, if not empty, from TEMP_FILE and write it
1108 * to CONTEXT->PACK_FILE.  Use POOL for allocations.
1109 */
1110static svn_error_t *
1111store_item(pack_context_t *context,
1112           apr_file_t *temp_file,
1113           svn_fs_fs__p2l_entry_t *item,
1114           apr_pool_t *pool)
1115{
1116  apr_off_t safety_margin;
1117
1118  /* skip empty entries */
1119  if (item->type == SVN_FS_FS__ITEM_TYPE_UNUSED)
1120    return SVN_NO_ERROR;
1121
1122  /* If the next item does not fit into the current block, auto-pad it.
1123      Take special care of textual noderevs since their parsers may
1124      prefetch up to 80 bytes and we don't want them to cross block
1125      boundaries. */
1126  safety_margin = item->type == SVN_FS_FS__ITEM_TYPE_NODEREV
1127                ? SVN__LINE_CHUNK_SIZE
1128                : 0;
1129  SVN_ERR(auto_pad_block(context, item->size + safety_margin, pool));
1130
1131  /* select the item in the source file and copy it into the target
1132    * pack file */
1133  SVN_ERR(svn_io_file_seek(temp_file, APR_SET, &item->offset, pool));
1134  SVN_ERR(copy_file_data(context, context->pack_file, temp_file,
1135                         item->size, pool));
1136
1137  /* write index entry and update current position */
1138  item->offset = context->pack_offset;
1139  context->pack_offset += item->size;
1140
1141  SVN_ERR(svn_fs_fs__p2l_proto_index_add_entry(context->proto_p2l_index,
1142                                               item, pool));
1143
1144  APR_ARRAY_PUSH(context->reps, svn_fs_fs__p2l_entry_t *) = item;
1145
1146  return SVN_NO_ERROR;
1147}
1148
1149/* Read the contents of the non-empty items in ITEMS from TEMP_FILE and
1150 * write them to CONTEXT->PACK_FILE.  Use POOL for allocations.
1151 */
1152static svn_error_t *
1153store_items(pack_context_t *context,
1154            apr_file_t *temp_file,
1155            apr_array_header_t *items,
1156            apr_pool_t *pool)
1157{
1158  int i;
1159  apr_pool_t *iterpool = svn_pool_create(pool);
1160
1161  /* copy all items in strict order */
1162  for (i = 0; i < items->nelts; ++i)
1163    {
1164      svn_pool_clear(iterpool);
1165      SVN_ERR(store_item(context, temp_file,
1166                         APR_ARRAY_IDX(items, i, svn_fs_fs__p2l_entry_t *),
1167                         iterpool));
1168    }
1169
1170  svn_pool_destroy(iterpool);
1171
1172  return SVN_NO_ERROR;
1173}
1174
1175/* Copy (append) the items identified by svn_fs_fs__p2l_entry_t * elements
1176 * in ENTRIES strictly in order from TEMP_FILE into CONTEXT->PACK_FILE.
1177 * Use POOL for temporary allocations.
1178 */
1179static svn_error_t *
1180copy_reps_from_temp(pack_context_t *context,
1181                    apr_file_t *temp_file,
1182                    apr_pool_t *pool)
1183{
1184  apr_pool_t *iterpool = svn_pool_create(pool);
1185  apr_array_header_t *path_order = context->path_order;
1186  int i;
1187
1188  /* copy items in path order. */
1189  for (i = 0; i < path_order->nelts; ++i)
1190    {
1191      path_order_t *current_path;
1192      svn_fs_fs__p2l_entry_t *node_part;
1193      svn_fs_fs__p2l_entry_t *rep_part;
1194
1195      svn_pool_clear(iterpool);
1196
1197      current_path = APR_ARRAY_IDX(path_order, i, path_order_t *);
1198      node_part = get_item(context, &current_path->noderev_id, TRUE);
1199      rep_part = get_item(context, &current_path->rep_id, TRUE);
1200
1201      if (node_part)
1202        SVN_ERR(store_item(context, temp_file, node_part, iterpool));
1203      if (rep_part)
1204        SVN_ERR(store_item(context, temp_file, rep_part, iterpool));
1205    }
1206
1207  svn_pool_destroy(iterpool);
1208
1209  return SVN_NO_ERROR;
1210}
1211
1212/* implements compare_fn_t. Place LHS before RHS, if the latter belongs to
1213 * a newer revision.
1214 */
1215static int
1216compare_p2l_info_rev(const svn_fs_fs__p2l_entry_t * const * lhs_p,
1217                     const svn_fs_fs__p2l_entry_t * const * rhs_p)
1218{
1219  const svn_fs_fs__p2l_entry_t * lhs = *lhs_p;
1220  const svn_fs_fs__p2l_entry_t * rhs = *rhs_p;
1221
1222  if (lhs->item.revision == rhs->item.revision)
1223    return 0;
1224
1225  return lhs->item.revision < rhs->item.revision ? -1 : 1;
1226}
1227
1228/* Write the log-to-phys proto index file for CONTEXT and use POOL for
1229 * temporary allocations.  All items in all buckets must have been placed
1230 * by now.
1231 */
1232static svn_error_t *
1233write_l2p_index(pack_context_t *context,
1234                apr_pool_t *pool)
1235{
1236  apr_pool_t *iterpool = svn_pool_create(pool);
1237  svn_revnum_t prev_rev = SVN_INVALID_REVNUM;
1238  int i, dest;
1239
1240  /* eliminate empty entries from CONTEXT->REPS */
1241  for (i = 0, dest = 0; i < context->reps->nelts; ++i)
1242    {
1243      svn_fs_fs__p2l_entry_t *entry
1244        = APR_ARRAY_IDX(context->reps, i, svn_fs_fs__p2l_entry_t *);
1245      if (entry)
1246        APR_ARRAY_IDX(context->reps, dest++, svn_fs_fs__p2l_entry_t *)
1247          = entry;
1248    }
1249  context->reps->nelts = dest;
1250
1251  /* we need to write the l2p index revision by revision */
1252  svn_sort__array(context->reps,
1253                  (int (*)(const void *, const void *))compare_p2l_info_rev);
1254
1255  /* write index entries */
1256  for (i = 0; i < context->reps->nelts; ++i)
1257    {
1258      svn_fs_fs__p2l_entry_t *p2l_entry
1259        = APR_ARRAY_IDX(context->reps, i, svn_fs_fs__p2l_entry_t *);
1260      if (p2l_entry == NULL)
1261        continue;
1262
1263      /* next revision? */
1264      if (prev_rev != p2l_entry->item.revision)
1265        {
1266          prev_rev = p2l_entry->item.revision;
1267          SVN_ERR(svn_fs_fs__l2p_proto_index_add_revision(
1268                       context->proto_l2p_index, iterpool));
1269        }
1270
1271      /* add entry */
1272      SVN_ERR(svn_fs_fs__l2p_proto_index_add_entry(context->proto_l2p_index,
1273                                                   p2l_entry->offset,
1274                                                   p2l_entry->item.number,
1275                                                   iterpool));
1276
1277      /* keep memory usage in check */
1278      if (i % 256 == 0)
1279        svn_pool_clear(iterpool);
1280    }
1281
1282  svn_pool_destroy(iterpool);
1283
1284  return SVN_NO_ERROR;
1285}
1286
1287/* Pack the current revision range of CONTEXT, i.e. this covers phases 2
1288 * to 4.  Use POOL for allocations.
1289 */
1290static svn_error_t *
1291pack_range(pack_context_t *context,
1292           apr_pool_t *pool)
1293{
1294  fs_fs_data_t *ffd = context->fs->fsap_data;
1295  apr_pool_t *revpool = svn_pool_create(pool);
1296  apr_pool_t *iterpool = svn_pool_create(pool);
1297  apr_pool_t *iterpool2 = svn_pool_create(pool);
1298
1299  /* Phase 2: Copy items into various buckets and build tracking info */
1300  svn_revnum_t revision;
1301  for (revision = context->start_rev; revision < context->end_rev; ++revision)
1302    {
1303      apr_off_t offset = 0;
1304      svn_fs_fs__revision_file_t *rev_file;
1305
1306      svn_pool_clear(revpool);
1307
1308      /* Get the rev file dimensions (mainly index locations). */
1309      SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, context->fs,
1310                                               revision, revpool, iterpool));
1311      SVN_ERR(svn_fs_fs__auto_read_footer(rev_file));
1312
1313      /* store the indirect array index */
1314      APR_ARRAY_PUSH(context->rev_offsets, int) = context->reps->nelts;
1315
1316      /* read the phys-to-log index file until we covered the whole rev file.
1317       * That index contains enough info to build both target indexes from it. */
1318      while (offset < rev_file->l2p_offset)
1319        {
1320          /* read one cluster */
1321          int i;
1322          apr_array_header_t *entries;
1323
1324          svn_pool_clear(iterpool);
1325
1326          SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, context->fs,
1327                                              rev_file, revision, offset,
1328                                              ffd->p2l_page_size, iterpool,
1329                                              iterpool));
1330
1331          for (i = 0; i < entries->nelts; ++i)
1332            {
1333              svn_fs_fs__p2l_entry_t *entry
1334                = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t);
1335
1336              /* skip first entry if that was duplicated due crossing a
1337                 cluster boundary */
1338              if (offset > entry->offset)
1339                continue;
1340
1341              svn_pool_clear(iterpool2);
1342
1343              /* process entry while inside the rev file */
1344              offset = entry->offset;
1345              if (offset < rev_file->l2p_offset)
1346                {
1347                  SVN_ERR(svn_io_file_seek(rev_file->file, APR_SET, &offset,
1348                                           iterpool2));
1349
1350                  if (entry->type == SVN_FS_FS__ITEM_TYPE_CHANGES)
1351                    SVN_ERR(copy_item_to_temp(context,
1352                                              context->changes,
1353                                              context->changes_file,
1354                                              rev_file->file, entry,
1355                                              iterpool2));
1356                  else if (entry->type == SVN_FS_FS__ITEM_TYPE_FILE_PROPS)
1357                    SVN_ERR(copy_item_to_temp(context,
1358                                              context->file_props,
1359                                              context->file_props_file,
1360                                              rev_file->file, entry,
1361                                              iterpool2));
1362                  else if (entry->type == SVN_FS_FS__ITEM_TYPE_DIR_PROPS)
1363                    SVN_ERR(copy_item_to_temp(context,
1364                                              context->dir_props,
1365                                              context->dir_props_file,
1366                                              rev_file->file, entry,
1367                                              iterpool2));
1368                  else if (   entry->type == SVN_FS_FS__ITEM_TYPE_FILE_REP
1369                           || entry->type == SVN_FS_FS__ITEM_TYPE_DIR_REP)
1370                    SVN_ERR(copy_rep_to_temp(context, rev_file->file, entry,
1371                                             iterpool2));
1372                  else if (entry->type == SVN_FS_FS__ITEM_TYPE_NODEREV)
1373                    SVN_ERR(copy_node_to_temp(context, rev_file->file, entry,
1374                                              iterpool2));
1375                  else
1376                    SVN_ERR_ASSERT(entry->type == SVN_FS_FS__ITEM_TYPE_UNUSED);
1377
1378                  offset += entry->size;
1379                }
1380            }
1381
1382          if (context->cancel_func)
1383            SVN_ERR(context->cancel_func(context->cancel_baton));
1384        }
1385    }
1386
1387  svn_pool_destroy(iterpool2);
1388  svn_pool_destroy(iterpool);
1389
1390  /* phase 3: placement.
1391   * Use "newest first" placement for simple items. */
1392  sort_items(context->changes);
1393  sort_items(context->file_props);
1394  sort_items(context->dir_props);
1395
1396  /* follow dependencies recursively for noderevs and data representations */
1397  sort_reps(context);
1398
1399  /* phase 4: copy bucket data to pack file.  Write P2L index. */
1400  SVN_ERR(store_items(context, context->changes_file, context->changes,
1401                      revpool));
1402  svn_pool_clear(revpool);
1403  SVN_ERR(store_items(context, context->file_props_file, context->file_props,
1404                      revpool));
1405  svn_pool_clear(revpool);
1406  SVN_ERR(store_items(context, context->dir_props_file, context->dir_props,
1407                      revpool));
1408  svn_pool_clear(revpool);
1409  SVN_ERR(copy_reps_from_temp(context, context->reps_file, revpool));
1410  svn_pool_clear(revpool);
1411
1412  /* write L2P index as well (now that we know all target offsets) */
1413  SVN_ERR(write_l2p_index(context, revpool));
1414
1415  svn_pool_destroy(revpool);
1416
1417  return SVN_NO_ERROR;
1418}
1419
1420/* Append CONTEXT->START_REV to the context's pack file with no re-ordering.
1421 * This function will only be used for very large revisions (>>100k changes).
1422 * Use POOL for temporary allocations.
1423 */
1424static svn_error_t *
1425append_revision(pack_context_t *context,
1426                apr_pool_t *pool)
1427{
1428  fs_fs_data_t *ffd = context->fs->fsap_data;
1429  apr_off_t offset = 0;
1430  apr_pool_t *iterpool = svn_pool_create(pool);
1431  svn_fs_fs__revision_file_t *rev_file;
1432  svn_filesize_t revdata_size;
1433
1434  /* Copy all non-index contents the rev file to the end of the pack file. */
1435  SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, context->fs,
1436                                           context->start_rev, pool,
1437                                           iterpool));
1438
1439  SVN_ERR(svn_fs_fs__auto_read_footer(rev_file));
1440  revdata_size = rev_file->l2p_offset;
1441
1442  SVN_ERR(svn_io_file_aligned_seek(rev_file->file, ffd->block_size, NULL, 0,
1443                                   iterpool));
1444  SVN_ERR(copy_file_data(context, context->pack_file, rev_file->file,
1445                         revdata_size, iterpool));
1446
1447  /* mark the start of a new revision */
1448  SVN_ERR(svn_fs_fs__l2p_proto_index_add_revision(context->proto_l2p_index,
1449                                                  pool));
1450
1451  /* read the phys-to-log index file until we covered the whole rev file.
1452   * That index contains enough info to build both target indexes from it. */
1453  while (offset < revdata_size)
1454    {
1455      /* read one cluster */
1456      int i;
1457      apr_array_header_t *entries;
1458
1459      svn_pool_clear(iterpool);
1460      SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, context->fs, rev_file,
1461                                          context->start_rev, offset,
1462                                          ffd->p2l_page_size, iterpool,
1463                                          iterpool));
1464
1465      for (i = 0; i < entries->nelts; ++i)
1466        {
1467          svn_fs_fs__p2l_entry_t *entry
1468            = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t);
1469
1470          /* skip first entry if that was duplicated due crossing a
1471             cluster boundary */
1472          if (offset > entry->offset)
1473            continue;
1474
1475          /* process entry while inside the rev file */
1476          offset = entry->offset;
1477          if (offset < revdata_size)
1478            {
1479              entry->offset += context->pack_offset;
1480              offset += entry->size;
1481              SVN_ERR(svn_fs_fs__l2p_proto_index_add_entry(
1482                         context->proto_l2p_index, entry->offset,
1483                         entry->item.number, iterpool));
1484              SVN_ERR(svn_fs_fs__p2l_proto_index_add_entry(
1485                         context->proto_p2l_index, entry, iterpool));
1486            }
1487        }
1488    }
1489
1490  svn_pool_destroy(iterpool);
1491  context->pack_offset += revdata_size;
1492
1493  SVN_ERR(svn_fs_fs__close_revision_file(rev_file));
1494
1495  return SVN_NO_ERROR;
1496}
1497
1498/* Logical addressing mode packing logic.
1499 *
1500 * Pack the revision shard starting at SHARD_REV in filesystem FS from
1501 * SHARD_DIR into the PACK_FILE_DIR, using POOL for allocations.  Limit
1502 * the extra memory consumption to MAX_MEM bytes.  CANCEL_FUNC and
1503 * CANCEL_BATON are what you think they are.
1504 */
1505static svn_error_t *
1506pack_log_addressed(svn_fs_t *fs,
1507                   const char *pack_file_dir,
1508                   const char *shard_dir,
1509                   svn_revnum_t shard_rev,
1510                   apr_size_t max_mem,
1511                   svn_cancel_func_t cancel_func,
1512                   void *cancel_baton,
1513                   apr_pool_t *pool)
1514{
1515  enum
1516    {
1517      /* estimated amount of memory used to represent one item in memory
1518       * during rev file packing */
1519      PER_ITEM_MEM = APR_ALIGN_DEFAULT(sizeof(path_order_t))
1520                   + APR_ALIGN_DEFAULT(2 *sizeof(void*))
1521                   + APR_ALIGN_DEFAULT(sizeof(reference_t))
1522                   + APR_ALIGN_DEFAULT(sizeof(svn_fs_fs__p2l_entry_t))
1523                   + 6 * sizeof(void*)
1524    };
1525
1526  int max_items;
1527  apr_array_header_t *max_ids;
1528  pack_context_t context = { 0 };
1529  int i;
1530  apr_size_t item_count = 0;
1531  apr_pool_t *iterpool = svn_pool_create(pool);
1532
1533  /* Prevent integer overflow.  We use apr arrays to process the items so
1534   * the maximum number of items is INT_MAX. */
1535    {
1536      apr_size_t temp = max_mem / PER_ITEM_MEM;
1537      SVN_ERR_ASSERT(temp <= INT_MAX);
1538      max_items = (int)temp;
1539    }
1540
1541  /* set up a pack context */
1542  SVN_ERR(initialize_pack_context(&context, fs, pack_file_dir, shard_dir,
1543                                  shard_rev, max_items, cancel_func,
1544                                  cancel_baton, pool));
1545
1546  /* phase 1: determine the size of the revisions to pack */
1547  SVN_ERR(svn_fs_fs__l2p_get_max_ids(&max_ids, fs, shard_rev,
1548                                     context.shard_end_rev - shard_rev,
1549                                     pool, pool));
1550
1551  /* pack revisions in ranges that don't exceed MAX_MEM */
1552  for (i = 0; i < max_ids->nelts; ++i)
1553    if (APR_ARRAY_IDX(max_ids, i, apr_uint64_t) + item_count <= max_items)
1554      {
1555        item_count += APR_ARRAY_IDX(max_ids, i, apr_uint64_t);
1556        context.end_rev++;
1557      }
1558    else
1559      {
1560        svn_pool_clear(iterpool);
1561
1562        /* some unpacked revisions before this one? */
1563        if (context.start_rev < context.end_rev)
1564          {
1565            /* pack them intelligently (might be just 1 rev but still ...) */
1566            SVN_ERR(pack_range(&context, iterpool));
1567            SVN_ERR(reset_pack_context(&context, iterpool));
1568            item_count = 0;
1569          }
1570
1571        /* next revision range is to start with the current revision */
1572        context.start_rev = i + context.shard_rev;
1573        context.end_rev = context.start_rev + 1;
1574
1575        /* if this is a very large revision, we must place it as is */
1576        if (APR_ARRAY_IDX(max_ids, i, apr_uint64_t) > max_items)
1577          {
1578            SVN_ERR(append_revision(&context, iterpool));
1579            context.start_rev++;
1580          }
1581        else
1582          item_count += (apr_size_t)APR_ARRAY_IDX(max_ids, i, apr_uint64_t);
1583      }
1584
1585  /* non-empty revision range at the end? */
1586  if (context.start_rev < context.end_rev)
1587    SVN_ERR(pack_range(&context, iterpool));
1588
1589  /* last phase: finalize indexes and clean up */
1590  SVN_ERR(reset_pack_context(&context, iterpool));
1591  SVN_ERR(close_pack_context(&context, iterpool));
1592  svn_pool_destroy(iterpool);
1593
1594  return SVN_NO_ERROR;
1595}
1596
1597/* Given REV in FS, set *REV_OFFSET to REV's offset in the packed file.
1598   Use POOL for temporary allocations. */
1599svn_error_t *
1600svn_fs_fs__get_packed_offset(apr_off_t *rev_offset,
1601                             svn_fs_t *fs,
1602                             svn_revnum_t rev,
1603                             apr_pool_t *pool)
1604{
1605  fs_fs_data_t *ffd = fs->fsap_data;
1606  svn_stream_t *manifest_stream;
1607  svn_boolean_t is_cached;
1608  svn_revnum_t shard;
1609  apr_int64_t shard_pos;
1610  apr_array_header_t *manifest;
1611  apr_pool_t *iterpool;
1612
1613  shard = rev / ffd->max_files_per_dir;
1614
1615  /* position of the shard within the manifest */
1616  shard_pos = rev % ffd->max_files_per_dir;
1617
1618  /* fetch exactly that element into *rev_offset, if the manifest is found
1619     in the cache */
1620  SVN_ERR(svn_cache__get_partial((void **) rev_offset, &is_cached,
1621                                 ffd->packed_offset_cache, &shard,
1622                                 svn_fs_fs__get_sharded_offset, &shard_pos,
1623                                 pool));
1624
1625  if (is_cached)
1626      return SVN_NO_ERROR;
1627
1628  /* Open the manifest file. */
1629  SVN_ERR(svn_stream_open_readonly(&manifest_stream,
1630                                   svn_fs_fs__path_rev_packed(fs, rev,
1631                                                              PATH_MANIFEST,
1632                                                              pool),
1633                                   pool, pool));
1634
1635  /* While we're here, let's just read the entire manifest file into an array,
1636     so we can cache the entire thing. */
1637  iterpool = svn_pool_create(pool);
1638  manifest = apr_array_make(pool, ffd->max_files_per_dir, sizeof(apr_off_t));
1639  while (1)
1640    {
1641      svn_boolean_t eof;
1642      apr_int64_t val;
1643
1644      svn_pool_clear(iterpool);
1645      SVN_ERR(svn_fs_fs__read_number_from_stream(&val, &eof, manifest_stream,
1646                                                 iterpool));
1647      if (eof)
1648        break;
1649
1650      APR_ARRAY_PUSH(manifest, apr_off_t) = (apr_off_t)val;
1651    }
1652  svn_pool_destroy(iterpool);
1653
1654  *rev_offset = APR_ARRAY_IDX(manifest, rev % ffd->max_files_per_dir,
1655                              apr_off_t);
1656
1657  /* Close up shop and cache the array. */
1658  SVN_ERR(svn_stream_close(manifest_stream));
1659  return svn_cache__set(ffd->packed_offset_cache, &shard, manifest, pool);
1660}
1661
1662/* Packing logic for physical addresssing mode:
1663 * Simply concatenate all revision contents.
1664 *
1665 * Pack the revision shard starting at SHARD_REV containing exactly
1666 * MAX_FILES_PER_DIR revisions from SHARD_PATH into the PACK_FILE_DIR,
1667 * using POOL for allocations.  CANCEL_FUNC and CANCEL_BATON are what you
1668 * think they are.
1669 */
1670static svn_error_t *
1671pack_phys_addressed(const char *pack_file_dir,
1672                    const char *shard_path,
1673                    svn_revnum_t start_rev,
1674                    int max_files_per_dir,
1675                    svn_cancel_func_t cancel_func,
1676                    void *cancel_baton,
1677                    apr_pool_t *pool)
1678{
1679  const char *pack_file_path, *manifest_file_path;
1680  apr_file_t *pack_file;
1681  apr_file_t *manifest_file;
1682  svn_stream_t *manifest_stream;
1683  svn_revnum_t end_rev, rev;
1684  apr_off_t next_offset;
1685  apr_pool_t *iterpool;
1686
1687  /* Some useful paths. */
1688  pack_file_path = svn_dirent_join(pack_file_dir, PATH_PACKED, pool);
1689  manifest_file_path = svn_dirent_join(pack_file_dir, PATH_MANIFEST, pool);
1690
1691  /* Create the new directory and pack file.
1692   * Use unbuffered apr_file_t since we're going to write using 16kb
1693   * chunks. */
1694  SVN_ERR(svn_io_file_open(&pack_file, pack_file_path,
1695                           APR_WRITE | APR_CREATE | APR_EXCL,
1696                           APR_OS_DEFAULT, pool));
1697
1698  /* Create the manifest file. */
1699  SVN_ERR(svn_io_file_open(&manifest_file, manifest_file_path,
1700                           APR_WRITE | APR_BUFFERED | APR_CREATE | APR_EXCL,
1701                           APR_OS_DEFAULT, pool));
1702  manifest_stream = svn_stream_from_aprfile2(manifest_file, TRUE, pool);
1703
1704  end_rev = start_rev + max_files_per_dir - 1;
1705  next_offset = 0;
1706  iterpool = svn_pool_create(pool);
1707
1708  /* Iterate over the revisions in this shard, squashing them together. */
1709  for (rev = start_rev; rev <= end_rev; rev++)
1710    {
1711      svn_stream_t *rev_stream;
1712      apr_finfo_t finfo;
1713      const char *path;
1714
1715      svn_pool_clear(iterpool);
1716
1717      /* Get the size of the file. */
1718      path = svn_dirent_join(shard_path, apr_psprintf(iterpool, "%ld", rev),
1719                             iterpool);
1720      SVN_ERR(svn_io_stat(&finfo, path, APR_FINFO_SIZE, iterpool));
1721
1722      /* build manifest */
1723      SVN_ERR(svn_stream_printf(manifest_stream, iterpool,
1724                                "%" APR_OFF_T_FMT "\n", next_offset));
1725      next_offset += finfo.size;
1726
1727      /* Copy all the bits from the rev file to the end of the pack file. */
1728      SVN_ERR(svn_stream_open_readonly(&rev_stream, path, iterpool, iterpool));
1729      SVN_ERR(svn_stream_copy3(rev_stream,
1730                               svn_stream_from_aprfile2(pack_file, TRUE, pool),
1731                               cancel_func, cancel_baton, iterpool));
1732    }
1733
1734  /* Close stream over APR file. */
1735  SVN_ERR(svn_stream_close(manifest_stream));
1736
1737  /* Ensure that pack file is written to disk. */
1738  SVN_ERR(svn_io_file_flush_to_disk(manifest_file, pool));
1739  SVN_ERR(svn_io_file_close(manifest_file, pool));
1740
1741  /* disallow write access to the manifest file */
1742  SVN_ERR(svn_io_set_file_read_only(manifest_file_path, FALSE, iterpool));
1743
1744  /* Ensure that pack file is written to disk. */
1745  SVN_ERR(svn_io_file_flush_to_disk(pack_file, pool));
1746  SVN_ERR(svn_io_file_close(pack_file, pool));
1747
1748  svn_pool_destroy(iterpool);
1749
1750  return SVN_NO_ERROR;
1751}
1752
1753/* In filesystem FS, pack the revision SHARD containing exactly
1754 * MAX_FILES_PER_DIR revisions from SHARD_PATH into the PACK_FILE_DIR,
1755 * using POOL for allocations.  Try to limit the amount of temporary
1756 * memory needed to MAX_MEM bytes.  CANCEL_FUNC and CANCEL_BATON are what
1757 * you think they are.
1758 *
1759 * If for some reason we detect a partial packing already performed, we
1760 * remove the pack file and start again.
1761 *
1762 * The actual packing will be done in a format-specific sub-function.
1763 */
1764static svn_error_t *
1765pack_rev_shard(svn_fs_t *fs,
1766               const char *pack_file_dir,
1767               const char *shard_path,
1768               apr_int64_t shard,
1769               int max_files_per_dir,
1770               apr_size_t max_mem,
1771               svn_cancel_func_t cancel_func,
1772               void *cancel_baton,
1773               apr_pool_t *pool)
1774{
1775  const char *pack_file_path;
1776  svn_revnum_t shard_rev = (svn_revnum_t) (shard * max_files_per_dir);
1777
1778  /* Some useful paths. */
1779  pack_file_path = svn_dirent_join(pack_file_dir, PATH_PACKED, pool);
1780
1781  /* Remove any existing pack file for this shard, since it is incomplete. */
1782  SVN_ERR(svn_io_remove_dir2(pack_file_dir, TRUE, cancel_func, cancel_baton,
1783                             pool));
1784
1785  /* Create the new directory and pack file. */
1786  SVN_ERR(svn_io_dir_make(pack_file_dir, APR_OS_DEFAULT, pool));
1787
1788  /* Index information files */
1789  if (svn_fs_fs__use_log_addressing(fs))
1790    SVN_ERR(pack_log_addressed(fs, pack_file_dir, shard_path, shard_rev,
1791                               max_mem, cancel_func, cancel_baton, pool));
1792  else
1793    SVN_ERR(pack_phys_addressed(pack_file_dir, shard_path, shard_rev,
1794                                max_files_per_dir, cancel_func,
1795                                cancel_baton, pool));
1796
1797  SVN_ERR(svn_io_copy_perms(shard_path, pack_file_dir, pool));
1798  SVN_ERR(svn_io_set_file_read_only(pack_file_path, FALSE, pool));
1799
1800  return SVN_NO_ERROR;
1801}
1802
1803/* Baton struct used by pack_body(), pack_shard() and synced_pack_shard().
1804   These calls are nested and for every level additional fields will be
1805   available. */
1806struct pack_baton
1807{
1808  /* Valid when entering pack_body(). */
1809  svn_fs_t *fs;
1810  svn_fs_pack_notify_t notify_func;
1811  void *notify_baton;
1812  svn_cancel_func_t cancel_func;
1813  void *cancel_baton;
1814  size_t max_mem;
1815
1816  /* Additional entries valid when entering pack_shard(). */
1817  const char *revs_dir;
1818  const char *revsprops_dir;
1819  apr_int64_t shard;
1820
1821  /* Additional entries valid when entering synced_pack_shard(). */
1822  const char *rev_shard_path;
1823};
1824
1825
1826/* Part of the pack process that requires global (write) synchronization.
1827 * We pack the revision properties of the shard described by BATON and
1828 * In the file system at FS_PATH, pack the SHARD in REVS_DIR and replace
1829 * the non-packed revprop & rev shard folder(s) with the packed ones.
1830 * The packed rev folder has been created prior to calling this function.
1831 */
1832static svn_error_t *
1833synced_pack_shard(void *baton,
1834                  apr_pool_t *pool)
1835{
1836  struct pack_baton *pb = baton;
1837  fs_fs_data_t *ffd = pb->fs->fsap_data;
1838  const char *revprops_shard_path, *revprops_pack_file_dir;
1839
1840  /* if enabled, pack the revprops in an equivalent way */
1841  if (pb->revsprops_dir)
1842    {
1843      revprops_pack_file_dir = svn_dirent_join(pb->revsprops_dir,
1844                   apr_psprintf(pool,
1845                                "%" APR_INT64_T_FMT PATH_EXT_PACKED_SHARD,
1846                                pb->shard),
1847                   pool);
1848      revprops_shard_path = svn_dirent_join(pb->revsprops_dir,
1849                    apr_psprintf(pool, "%" APR_INT64_T_FMT, pb->shard),
1850                    pool);
1851
1852      SVN_ERR(svn_fs_fs__pack_revprops_shard(revprops_pack_file_dir,
1853                                             revprops_shard_path,
1854                                             pb->shard,
1855                                             ffd->max_files_per_dir,
1856                                             (int)(0.9*ffd->revprop_pack_size),
1857                                             ffd->compress_packed_revprops
1858                                               ? SVN__COMPRESSION_ZLIB_DEFAULT
1859                                               : SVN__COMPRESSION_NONE,
1860                                             pb->cancel_func,
1861                                             pb->cancel_baton,
1862                                             pool));
1863    }
1864
1865  /* Update the min-unpacked-rev file to reflect our newly packed shard. */
1866  SVN_ERR(svn_fs_fs__write_min_unpacked_rev(pb->fs,
1867                    (svn_revnum_t)((pb->shard + 1) * ffd->max_files_per_dir),
1868                    pool));
1869  ffd->min_unpacked_rev
1870    = (svn_revnum_t)((pb->shard + 1) * ffd->max_files_per_dir);
1871
1872  /* Finally, remove the existing shard directories.
1873   * For revprops, clean up older obsolete shards as well as they might
1874   * have been left over from an interrupted FS upgrade. */
1875  SVN_ERR(svn_io_remove_dir2(pb->rev_shard_path, TRUE,
1876                             pb->cancel_func, pb->cancel_baton, pool));
1877  if (pb->revsprops_dir)
1878    {
1879      svn_node_kind_t kind = svn_node_dir;
1880      apr_int64_t to_cleanup = pb->shard;
1881      do
1882        {
1883          SVN_ERR(svn_fs_fs__delete_revprops_shard(revprops_shard_path,
1884                                                   to_cleanup,
1885                                                   ffd->max_files_per_dir,
1886                                                   pb->cancel_func,
1887                                                   pb->cancel_baton,
1888                                                   pool));
1889
1890          /* If the previous shard exists, clean it up as well.
1891             Don't try to clean up shard 0 as it we can't tell quickly
1892             whether it actually needs cleaning up. */
1893          revprops_shard_path = svn_dirent_join(pb->revsprops_dir,
1894                      apr_psprintf(pool, "%" APR_INT64_T_FMT, --to_cleanup),
1895                      pool);
1896          SVN_ERR(svn_io_check_path(revprops_shard_path, &kind, pool));
1897        }
1898      while (kind == svn_node_dir && to_cleanup > 0);
1899    }
1900
1901  return SVN_NO_ERROR;
1902}
1903
1904/* Pack the shard described by BATON.
1905 *
1906 * If for some reason we detect a partial packing already performed,
1907 * we remove the pack file and start again.
1908 */
1909static svn_error_t *
1910pack_shard(struct pack_baton *baton,
1911           apr_pool_t *pool)
1912{
1913  fs_fs_data_t *ffd = baton->fs->fsap_data;
1914  const char *rev_pack_file_dir;
1915
1916  /* Notify caller we're starting to pack this shard. */
1917  if (baton->notify_func)
1918    SVN_ERR(baton->notify_func(baton->notify_baton, baton->shard,
1919                               svn_fs_pack_notify_start, pool));
1920
1921  /* Some useful paths. */
1922  rev_pack_file_dir = svn_dirent_join(baton->revs_dir,
1923                  apr_psprintf(pool,
1924                               "%" APR_INT64_T_FMT PATH_EXT_PACKED_SHARD,
1925                               baton->shard),
1926                  pool);
1927  baton->rev_shard_path = svn_dirent_join(baton->revs_dir,
1928                                          apr_psprintf(pool,
1929                                                       "%" APR_INT64_T_FMT,
1930                                                       baton->shard),
1931                                          pool);
1932
1933  /* pack the revision content */
1934  SVN_ERR(pack_rev_shard(baton->fs, rev_pack_file_dir, baton->rev_shard_path,
1935                         baton->shard, ffd->max_files_per_dir,
1936                         baton->max_mem, baton->cancel_func,
1937                         baton->cancel_baton, pool));
1938
1939  /* For newer repo formats, we only acquired the pack lock so far.
1940     Before modifying the repo state by switching over to the packed
1941     data, we need to acquire the global (write) lock. */
1942  if (ffd->format >= SVN_FS_FS__MIN_PACK_LOCK_FORMAT)
1943    SVN_ERR(svn_fs_fs__with_write_lock(baton->fs, synced_pack_shard, baton,
1944                                       pool));
1945  else
1946    SVN_ERR(synced_pack_shard(baton, pool));
1947
1948  /* Notify caller we're starting to pack this shard. */
1949  if (baton->notify_func)
1950    SVN_ERR(baton->notify_func(baton->notify_baton, baton->shard,
1951                               svn_fs_pack_notify_end, pool));
1952
1953  return SVN_NO_ERROR;
1954}
1955
1956/* The work-horse for svn_fs_fs__pack, called with the FS write lock.
1957   This implements the svn_fs_fs__with_write_lock() 'body' callback
1958   type.  BATON is a 'struct pack_baton *'.
1959
1960   WARNING: if you add a call to this function, please note:
1961     The code currently assumes that any piece of code running with
1962     the write-lock set can rely on the ffd->min_unpacked_rev and
1963     ffd->min_unpacked_revprop caches to be up-to-date (and, by
1964     extension, on not having to use a retry when calling
1965     svn_fs_fs__path_rev_absolute() and friends).  If you add a call
1966     to this function, consider whether you have to call
1967     svn_fs_fs__update_min_unpacked_rev().
1968     See this thread: http://thread.gmane.org/1291206765.3782.3309.camel@edith
1969 */
1970static svn_error_t *
1971pack_body(void *baton,
1972          apr_pool_t *pool)
1973{
1974  struct pack_baton *pb = baton;
1975  fs_fs_data_t *ffd = pb->fs->fsap_data;
1976  apr_int64_t completed_shards;
1977  svn_revnum_t youngest;
1978  apr_pool_t *iterpool;
1979
1980  /* If the repository isn't a new enough format, we don't support packing.
1981     Return a friendly error to that effect. */
1982  if (ffd->format < SVN_FS_FS__MIN_PACKED_FORMAT)
1983    return svn_error_createf(SVN_ERR_UNSUPPORTED_FEATURE, NULL,
1984      _("FSFS format (%d) too old to pack; please upgrade the filesystem."),
1985      ffd->format);
1986
1987  /* If we aren't using sharding, we can't do any packing, so quit. */
1988  if (!ffd->max_files_per_dir)
1989    return SVN_NO_ERROR;
1990
1991  SVN_ERR(svn_fs_fs__read_min_unpacked_rev(&ffd->min_unpacked_rev, pb->fs,
1992                                           pool));
1993
1994  SVN_ERR(svn_fs_fs__youngest_rev(&youngest, pb->fs, pool));
1995  completed_shards = (youngest + 1) / ffd->max_files_per_dir;
1996
1997  /* See if we've already completed all possible shards thus far. */
1998  if (ffd->min_unpacked_rev == (completed_shards * ffd->max_files_per_dir))
1999    return SVN_NO_ERROR;
2000
2001  pb->revs_dir = svn_dirent_join(pb->fs->path, PATH_REVS_DIR, pool);
2002  if (ffd->format >= SVN_FS_FS__MIN_PACKED_REVPROP_FORMAT)
2003    pb->revsprops_dir = svn_dirent_join(pb->fs->path, PATH_REVPROPS_DIR,
2004                                        pool);
2005
2006  iterpool = svn_pool_create(pool);
2007  for (pb->shard = ffd->min_unpacked_rev / ffd->max_files_per_dir;
2008       pb->shard < completed_shards;
2009       pb->shard++)
2010    {
2011      svn_pool_clear(iterpool);
2012
2013      if (pb->cancel_func)
2014        SVN_ERR(pb->cancel_func(pb->cancel_baton));
2015
2016      SVN_ERR(pack_shard(pb, iterpool));
2017    }
2018
2019  svn_pool_destroy(iterpool);
2020  return SVN_NO_ERROR;
2021}
2022
2023svn_error_t *
2024svn_fs_fs__pack(svn_fs_t *fs,
2025                apr_size_t max_mem,
2026                svn_fs_pack_notify_t notify_func,
2027                void *notify_baton,
2028                svn_cancel_func_t cancel_func,
2029                void *cancel_baton,
2030                apr_pool_t *pool)
2031{
2032  struct pack_baton pb = { 0 };
2033  fs_fs_data_t *ffd = fs->fsap_data;
2034  svn_error_t *err;
2035
2036  pb.fs = fs;
2037  pb.notify_func = notify_func;
2038  pb.notify_baton = notify_baton;
2039  pb.cancel_func = cancel_func;
2040  pb.cancel_baton = cancel_baton;
2041  pb.max_mem = max_mem ? max_mem : DEFAULT_MAX_MEM;
2042
2043  if (ffd->format >= SVN_FS_FS__MIN_PACK_LOCK_FORMAT)
2044    {
2045      /* Newer repositories provide a pack operation specific lock.
2046         Acquire it to prevent concurrent packs.
2047
2048         Since the file lock's lifetime is bound to a pool, we create a
2049         separate subpool here to release the lock immediately after the
2050         operation finished.
2051       */
2052      err = svn_fs_fs__with_pack_lock(fs, pack_body, &pb, pool);
2053    }
2054  else
2055    {
2056      /* Use the global write lock for older repos. */
2057      err = svn_fs_fs__with_write_lock(fs, pack_body, &pb, pool);
2058    }
2059
2060  return svn_error_trace(err);
2061}
2062