1/* pack.c --- FSX shard packing functionality
2 *
3 * ====================================================================
4 *    Licensed to the Apache Software Foundation (ASF) under one
5 *    or more contributor license agreements.  See the NOTICE file
6 *    distributed with this work for additional information
7 *    regarding copyright ownership.  The ASF licenses this file
8 *    to you under the Apache License, Version 2.0 (the
9 *    "License"); you may not use this file except in compliance
10 *    with the License.  You may obtain a copy of the License at
11 *
12 *      http://www.apache.org/licenses/LICENSE-2.0
13 *
14 *    Unless required by applicable law or agreed to in writing,
15 *    software distributed under the License is distributed on an
16 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 *    KIND, either express or implied.  See the License for the
18 *    specific language governing permissions and limitations
19 *    under the License.
20 * ====================================================================
21 */
22#include <assert.h>
23
24#include "svn_pools.h"
25#include "svn_dirent_uri.h"
26#include "svn_sorts.h"
27#include "private/svn_sorts_private.h"
28#include "private/svn_subr_private.h"
29#include "private/svn_string_private.h"
30#include "private/svn_temp_serializer.h"
31
32#include "fs_x.h"
33#include "pack.h"
34#include "util.h"
35#include "revprops.h"
36#include "transaction.h"
37#include "index.h"
38#include "low_level.h"
39#include "cached_data.h"
40#include "changes.h"
41#include "noderevs.h"
42#include "reps.h"
43
44#include "../libsvn_fs/fs-loader.h"
45
46#include "svn_private_config.h"
47#include "temp_serializer.h"
48
49/* Packing logic:
50 *
51 * We pack files on a pack file basis (e.g. 1000 revs) without changing
52 * existing pack files nor the revision files outside the range to pack.
53 *
54 * First, we will scan the revision file indexes to determine the number
55 * of items to "place" (i.e. determine their optimal position within the
56 * future pack file).  For each item, we will need a constant amount of
57 * memory to track it.  A MAX_MEM parameter sets a limit to the number of
58 * items we may place in one go.  That means, we may not be able to add
59 * all revisions at once.  Instead, we will run the placement for a subset
60 * of revisions at a time.  The very unlikely worst case will simply append
61 * all revision data with just a little reshuffling inside each revision.
62 *
63 * In a second step, we read all revisions in the selected range, build
64 * the item tracking information and copy the items themselves from the
65 * revision files to temporary files.  The latter serve as buckets for a
66 * very coarse bucket presort:  Separate change lists, file properties,
67 * directory properties and noderevs + representations from one another.
68 *
69 * The third step will determine an optimized placement for the items in
70 * each of the 4 buckets separately.  The first three will simply order
71 * their items by revision, starting with the newest once.  Placing rep
72 * and noderev items is a more elaborate process documented in the code.
73 *
74 * In short, we store items in the following order:
75 * - changed paths lists
76 * - node property
77 * - directory properties
78 * - directory representations corresponding noderevs, lexical path order
79 *   with special treatment of "trunk" and "branches"
80 * - same for file representations
81 *
82 * Step 4 copies the items from the temporary buckets into the final
83 * pack file and writes the temporary index files.
84 *
85 * Finally, after the last range of revisions, create the final indexes.
86 */
87
88/* Maximum amount of memory we allocate for placement information during
89 * the pack process.
90 */
91#define DEFAULT_MAX_MEM (64 * 1024 * 1024)
92
93/* Data structure describing a node change at PATH, REVISION.
94 * We will sort these instances by PATH and NODE_ID such that we can combine
95 * similar nodes in the same reps container and store containers in path
96 * major order.
97 */
98typedef struct path_order_t
99{
100  /* changed path */
101  svn_prefix_string__t *path;
102
103  /* node ID for this PATH in REVISION */
104  svn_fs_x__id_t node_id;
105
106  /* when this change happened */
107  svn_revnum_t revision;
108
109  /* this is a directory node */
110  svn_boolean_t is_dir;
111
112  /* length of the expanded representation content */
113  apr_int64_t expanded_size;
114
115  /* item ID of the noderev linked to the change. May be (0, 0). */
116  svn_fs_x__id_t noderev_id;
117
118  /* item ID of the representation containing the new data. May be (0, 0). */
119  svn_fs_x__id_t rep_id;
120} path_order_t;
121
122/* Represents a reference from item FROM to item TO.  FROM may be a noderev
123 * or rep_id while TO is (currently) always a representation.  We will sort
124 * them by TO which allows us to collect all dependent items.
125 */
126typedef struct reference_t
127{
128  svn_fs_x__id_t to;
129  svn_fs_x__id_t from;
130} reference_t;
131
132/* This structure keeps track of all the temporary data and status that
133 * needs to be kept around during the creation of one pack file.  After
134 * each revision range (in case we can't process all revs at once due to
135 * memory restrictions), parts of the data will get re-initialized.
136 */
137typedef struct pack_context_t
138{
139  /* file system that we operate on */
140  svn_fs_t *fs;
141
142  /* cancel function to invoke at regular intervals. May be NULL */
143  svn_cancel_func_t cancel_func;
144
145  /* baton to pass to CANCEL_FUNC */
146  void *cancel_baton;
147
148  /* first revision in the shard (and future pack file) */
149  svn_revnum_t shard_rev;
150
151  /* first revision in the range to process (>= SHARD_REV) */
152  svn_revnum_t start_rev;
153
154  /* first revision after the range to process (<= SHARD_END_REV) */
155  svn_revnum_t end_rev;
156
157  /* first revision after the current shard */
158  svn_revnum_t shard_end_rev;
159
160  /* log-to-phys proto index for the whole pack file */
161  apr_file_t *proto_l2p_index;
162
163  /* phys-to-log proto index for the whole pack file */
164  apr_file_t *proto_p2l_index;
165
166  /* full shard directory path (containing the unpacked revisions) */
167  const char *shard_dir;
168
169  /* full packed shard directory path (containing the pack file + indexes) */
170  const char *pack_file_dir;
171
172  /* full pack file path (including PACK_FILE_DIR) */
173  const char *pack_file_path;
174
175  /* current write position (i.e. file length) in the pack file */
176  apr_off_t pack_offset;
177
178  /* the pack file to ultimately write all data to */
179  apr_file_t *pack_file;
180
181  /* array of svn_fs_x__p2l_entry_t *, all referring to change lists.
182   * Will be filled in phase 2 and be cleared after each revision range. */
183  apr_array_header_t *changes;
184
185  /* temp file receiving all change list items (referenced by CHANGES).
186   * Will be filled in phase 2 and be cleared after each revision range. */
187  apr_file_t *changes_file;
188
189  /* array of svn_fs_x__p2l_entry_t *, all referring to file properties.
190   * Will be filled in phase 2 and be cleared after each revision range. */
191  apr_array_header_t *file_props;
192
193  /* temp file receiving all file prop items (referenced by FILE_PROPS).
194   * Will be filled in phase 2 and be cleared after each revision range.*/
195  apr_file_t *file_props_file;
196
197  /* array of svn_fs_x__p2l_entry_t *, all referring to directory properties.
198   * Will be filled in phase 2 and be cleared after each revision range. */
199  apr_array_header_t *dir_props;
200
201  /* temp file receiving all directory prop items (referenced by DIR_PROPS).
202   * Will be filled in phase 2 and be cleared after each revision range.*/
203  apr_file_t *dir_props_file;
204
205  /* container for all PATH members in PATH_ORDER. */
206  svn_prefix_tree__t *paths;
207
208  /* array of path_order_t *.  Will be filled in phase 2 and be cleared
209   * after each revision range.  Sorted by PATH, NODE_ID. */
210  apr_array_header_t *path_order;
211
212  /* array of reference_t* linking representations to their delta bases.
213   * Will be filled in phase 2 and be cleared after each revision range.
214   * It will be sorted by the FROM members (for rep->base rep lookup). */
215  apr_array_header_t *references;
216
217  /* array of svn_fs_x__p2l_entry_t*.  Will be filled in phase 2 and be
218   * cleared after each revision range.  During phase 3, we will set items
219   * to NULL that we already processed. */
220  apr_array_header_t *reps;
221
222  /* array of int, marking for each revision, the which offset their items
223   * begin in REPS.  Will be filled in phase 2 and be cleared after
224   * each revision range. */
225  apr_array_header_t *rev_offsets;
226
227  /* temp file receiving all items referenced by REPS.
228   * Will be filled in phase 2 and be cleared after each revision range.*/
229  apr_file_t *reps_file;
230
231  /* pool used for temporary data structures that will be cleaned up when
232   * the next range of revisions is being processed */
233  apr_pool_t *info_pool;
234} pack_context_t;
235
236/* Create and initialize a new pack context for packing shard SHARD_REV in
237 * SHARD_DIR into PACK_FILE_DIR within filesystem FS.  Allocate it in POOL
238 * and return the structure in *CONTEXT.
239 *
240 * Limit the number of items being copied per iteration to MAX_ITEMS.
241 * Set CANCEL_FUNC and CANCEL_BATON as well.
242 */
243static svn_error_t *
244initialize_pack_context(pack_context_t *context,
245                        svn_fs_t *fs,
246                        const char *pack_file_dir,
247                        const char *shard_dir,
248                        svn_revnum_t shard_rev,
249                        int max_items,
250                        svn_cancel_func_t cancel_func,
251                        void *cancel_baton,
252                        apr_pool_t *pool)
253{
254  svn_fs_x__data_t *ffd = fs->fsap_data;
255  const char *temp_dir;
256  int max_revs = MIN(ffd->max_files_per_dir, max_items);
257
258  SVN_ERR_ASSERT(shard_rev % ffd->max_files_per_dir == 0);
259
260  /* where we will place our various temp files */
261  SVN_ERR(svn_io_temp_dir(&temp_dir, pool));
262
263  /* store parameters */
264  context->fs = fs;
265  context->cancel_func = cancel_func;
266  context->cancel_baton = cancel_baton;
267
268  context->shard_rev = shard_rev;
269  context->start_rev = shard_rev;
270  context->end_rev = shard_rev;
271  context->shard_end_rev = shard_rev + ffd->max_files_per_dir;
272
273  /* Create the new directory and pack file. */
274  context->shard_dir = shard_dir;
275  context->pack_file_dir = pack_file_dir;
276  context->pack_file_path
277    = svn_dirent_join(pack_file_dir, PATH_PACKED, pool);
278  SVN_ERR(svn_io_file_open(&context->pack_file, context->pack_file_path,
279                           APR_WRITE | APR_BUFFERED | APR_BINARY | APR_EXCL
280                             | APR_CREATE, APR_OS_DEFAULT, pool));
281
282  /* Proto index files */
283  SVN_ERR(svn_fs_x__l2p_proto_index_open(
284             &context->proto_l2p_index,
285             svn_dirent_join(pack_file_dir,
286                             PATH_INDEX PATH_EXT_L2P_INDEX,
287                             pool),
288             pool));
289  SVN_ERR(svn_fs_x__p2l_proto_index_open(
290             &context->proto_p2l_index,
291             svn_dirent_join(pack_file_dir,
292                             PATH_INDEX PATH_EXT_P2L_INDEX,
293                             pool),
294             pool));
295
296  /* item buckets: one item info array and one temp file per bucket */
297  context->changes = apr_array_make(pool, max_items,
298                                    sizeof(svn_fs_x__p2l_entry_t *));
299  SVN_ERR(svn_io_open_unique_file3(&context->changes_file, NULL, temp_dir,
300                                   svn_io_file_del_on_close, pool, pool));
301  context->file_props = apr_array_make(pool, max_items,
302                                       sizeof(svn_fs_x__p2l_entry_t *));
303  SVN_ERR(svn_io_open_unique_file3(&context->file_props_file, NULL, temp_dir,
304                                   svn_io_file_del_on_close, pool, pool));
305  context->dir_props = apr_array_make(pool, max_items,
306                                      sizeof(svn_fs_x__p2l_entry_t *));
307  SVN_ERR(svn_io_open_unique_file3(&context->dir_props_file, NULL, temp_dir,
308                                   svn_io_file_del_on_close, pool, pool));
309
310  /* noderev and representation item bucket */
311  context->rev_offsets = apr_array_make(pool, max_revs, sizeof(int));
312  context->path_order = apr_array_make(pool, max_items,
313                                       sizeof(path_order_t *));
314  context->references = apr_array_make(pool, max_items,
315                                       sizeof(reference_t *));
316  context->reps = apr_array_make(pool, max_items,
317                                 sizeof(svn_fs_x__p2l_entry_t *));
318  SVN_ERR(svn_io_open_unique_file3(&context->reps_file, NULL, temp_dir,
319                                   svn_io_file_del_on_close, pool, pool));
320
321  /* the pool used for temp structures */
322  context->info_pool = svn_pool_create(pool);
323  context->paths = svn_prefix_tree__create(context->info_pool);
324
325  return SVN_NO_ERROR;
326}
327
328/* Clean up / free all revision range specific data and files in CONTEXT.
329 * Use SCRATCH_POOL for temporary allocations.
330 */
331static svn_error_t *
332reset_pack_context(pack_context_t *context,
333                   apr_pool_t *scratch_pool)
334{
335  apr_array_clear(context->changes);
336  SVN_ERR(svn_io_file_trunc(context->changes_file, 0, scratch_pool));
337  apr_array_clear(context->file_props);
338  SVN_ERR(svn_io_file_trunc(context->file_props_file, 0, scratch_pool));
339  apr_array_clear(context->dir_props);
340  SVN_ERR(svn_io_file_trunc(context->dir_props_file, 0, scratch_pool));
341
342  apr_array_clear(context->rev_offsets);
343  apr_array_clear(context->path_order);
344  apr_array_clear(context->references);
345  apr_array_clear(context->reps);
346  SVN_ERR(svn_io_file_trunc(context->reps_file, 0, scratch_pool));
347
348  svn_pool_clear(context->info_pool);
349
350  return SVN_NO_ERROR;
351}
352
353/* Call this after the last revision range.  It will finalize all index files
354 * for CONTEXT and close any open files.
355 * Use SCRATCH_POOL for temporary allocations.
356 */
357static svn_error_t *
358close_pack_context(pack_context_t *context,
359                   apr_pool_t *scratch_pool)
360{
361  const char *proto_l2p_index_path;
362  const char *proto_p2l_index_path;
363
364  /* need the file names for the actual index creation call further down */
365  SVN_ERR(svn_io_file_name_get(&proto_l2p_index_path,
366                               context->proto_l2p_index, scratch_pool));
367  SVN_ERR(svn_io_file_name_get(&proto_p2l_index_path,
368                               context->proto_p2l_index, scratch_pool));
369
370  /* finalize proto index files */
371  SVN_ERR(svn_io_file_close(context->proto_l2p_index, scratch_pool));
372  SVN_ERR(svn_io_file_close(context->proto_p2l_index, scratch_pool));
373
374  /* Append the actual index data to the pack file. */
375  SVN_ERR(svn_fs_x__add_index_data(context->fs, context->pack_file,
376                                    proto_l2p_index_path,
377                                    proto_p2l_index_path,
378                                    context->shard_rev,
379                                    scratch_pool));
380
381  /* remove proto index files */
382  SVN_ERR(svn_io_remove_file2(proto_l2p_index_path, FALSE, scratch_pool));
383  SVN_ERR(svn_io_remove_file2(proto_p2l_index_path, FALSE, scratch_pool));
384
385  SVN_ERR(svn_io_file_close(context->pack_file, scratch_pool));
386
387  return SVN_NO_ERROR;
388}
389
390/* Efficiently copy SIZE bytes from SOURCE to DEST.  Invoke the CANCEL_FUNC
391 * from CONTEXT at regular intervals.
392 * Use SCRATCH_POOL for temporary allocations.
393 */
394static svn_error_t *
395copy_file_data(pack_context_t *context,
396               apr_file_t *dest,
397               apr_file_t *source,
398               apr_off_t size,
399               apr_pool_t *scratch_pool)
400{
401  /* most non-representation items will be small.  Minimize the buffer
402   * and infrastructure overhead in that case. */
403  enum { STACK_BUFFER_SIZE = 1024 };
404
405  if (size < STACK_BUFFER_SIZE)
406    {
407      /* copy small data using a fixed-size buffer on stack */
408      char buffer[STACK_BUFFER_SIZE];
409      SVN_ERR(svn_io_file_read_full2(source, buffer, (apr_size_t)size,
410                                     NULL, NULL, scratch_pool));
411      SVN_ERR(svn_io_file_write_full(dest, buffer, (apr_size_t)size,
412                                     NULL, scratch_pool));
413    }
414  else
415    {
416      /* use streaming copies for larger data blocks.  That may require
417       * the allocation of larger buffers and we should make sure that
418       * this extra memory is released asap. */
419      svn_fs_x__data_t *ffd = context->fs->fsap_data;
420      apr_pool_t *copypool = svn_pool_create(scratch_pool);
421      char *buffer = apr_palloc(copypool, ffd->block_size);
422
423      while (size)
424        {
425          apr_size_t to_copy = (apr_size_t)(MIN(size, ffd->block_size));
426          if (context->cancel_func)
427            SVN_ERR(context->cancel_func(context->cancel_baton));
428
429          SVN_ERR(svn_io_file_read_full2(source, buffer, to_copy,
430                                         NULL, NULL, scratch_pool));
431          SVN_ERR(svn_io_file_write_full(dest, buffer, to_copy,
432                                         NULL, scratch_pool));
433
434          size -= to_copy;
435        }
436
437      svn_pool_destroy(copypool);
438    }
439
440  return SVN_NO_ERROR;
441}
442
443/* Writes SIZE bytes, all 0, to DEST.
444 * Use SCRATCH_POOL for temporary allocations.
445 */
446static svn_error_t *
447write_null_bytes(apr_file_t *dest,
448                 apr_off_t size,
449                 apr_pool_t *scratch_pool)
450{
451  /* Have a collection of high-quality, easy to access NUL bytes handy. */
452  enum { BUFFER_SIZE = 1024 };
453  static const char buffer[BUFFER_SIZE] = { 0 };
454
455  /* copy SIZE of them into the file's buffer */
456  while (size)
457    {
458      apr_size_t to_write = MIN(size, BUFFER_SIZE);
459      SVN_ERR(svn_io_file_write_full(dest, buffer, to_write, NULL,
460                                     scratch_pool));
461      size -= to_write;
462    }
463
464  return SVN_NO_ERROR;
465}
466
467/* Copy the "simple" item (changed paths list or property representation)
468 * from the current position in REV_FILE to TEMP_FILE using CONTEXT.  Add
469 * a copy of ENTRY to ENTRIES but with an updated offset value that points
470 * to the copy destination in TEMP_FILE.
471 * Use SCRATCH_POOL for temporary allocations.
472 */
473static svn_error_t *
474copy_item_to_temp(pack_context_t *context,
475                  apr_array_header_t *entries,
476                  apr_file_t *temp_file,
477                  svn_fs_x__revision_file_t *rev_file,
478                  svn_fs_x__p2l_entry_t *entry,
479                  apr_pool_t *scratch_pool)
480{
481  svn_fs_x__p2l_entry_t *new_entry
482    = svn_fs_x__p2l_entry_dup(entry, context->info_pool);
483
484  SVN_ERR(svn_fs_x__get_file_offset(&new_entry->offset, temp_file,
485                                    scratch_pool));
486  APR_ARRAY_PUSH(entries, svn_fs_x__p2l_entry_t *) = new_entry;
487
488  SVN_ERR(copy_file_data(context, temp_file, rev_file->file, entry->size,
489                         scratch_pool));
490
491  return SVN_NO_ERROR;
492}
493
494/* Return the offset within CONTEXT->REPS that corresponds to item
495 * ITEM_INDEX in  REVISION.
496 */
497static int
498get_item_array_index(pack_context_t *context,
499                     svn_revnum_t revision,
500                     apr_int64_t item_index)
501{
502  assert(revision >= context->start_rev);
503  return (int)item_index + APR_ARRAY_IDX(context->rev_offsets,
504                                         revision - context->start_rev,
505                                         int);
506}
507
508/* Write INFO to the correct position in CONTEXT->REPS.  The latter may
509 * need auto-expanding.  Overwriting an array element is not allowed.
510 */
511static void
512add_item_rep_mapping(pack_context_t *context,
513                     svn_fs_x__p2l_entry_t *entry)
514{
515  int idx;
516  assert(entry->item_count == 1);
517
518  /* index of INFO */
519  idx = get_item_array_index(context,
520                             entry->items[0].change_set,
521                             entry->items[0].number);
522
523  /* make sure the index exists in the array */
524  while (context->reps->nelts <= idx)
525    APR_ARRAY_PUSH(context->reps, void *) = NULL;
526
527  /* set the element.  If there is already an entry, there are probably
528   * two items claiming to be the same -> bail out */
529  assert(!APR_ARRAY_IDX(context->reps, idx, void *));
530  APR_ARRAY_IDX(context->reps, idx, void *) = entry;
531}
532
533/* Return the P2L entry from CONTEXT->REPS for the given ID.  If there is
534 * none (or not anymore), return NULL.  If RESET has been specified, set
535 * the array entry to NULL after returning the entry.
536 */
537static svn_fs_x__p2l_entry_t *
538get_item(pack_context_t *context,
539         const svn_fs_x__id_t *id,
540         svn_boolean_t reset)
541{
542  svn_fs_x__p2l_entry_t *result = NULL;
543  svn_revnum_t revision = svn_fs_x__get_revnum(id->change_set);
544  if (id->number && revision >= context->start_rev)
545    {
546      int idx = get_item_array_index(context, revision, id->number);
547      if (context->reps->nelts > idx)
548        {
549          result = APR_ARRAY_IDX(context->reps, idx, void *);
550          if (result && reset)
551            APR_ARRAY_IDX(context->reps, idx, void *) = NULL;
552        }
553    }
554
555  return result;
556}
557
558/* Copy representation item identified by ENTRY from the current position
559 * in REV_FILE into CONTEXT->REPS_FILE.  Add all tracking into needed by
560 * our placement algorithm to CONTEXT.
561 * Use SCRATCH_POOL for temporary allocations.
562 */
563static svn_error_t *
564copy_rep_to_temp(pack_context_t *context,
565                 svn_fs_x__revision_file_t *rev_file,
566                 svn_fs_x__p2l_entry_t *entry,
567                 apr_pool_t *scratch_pool)
568{
569  svn_fs_x__rep_header_t *rep_header;
570  apr_off_t source_offset = entry->offset;
571
572  /* create a copy of ENTRY, make it point to the copy destination and
573   * store it in CONTEXT */
574  entry = svn_fs_x__p2l_entry_dup(entry, context->info_pool);
575  SVN_ERR(svn_fs_x__get_file_offset(&entry->offset, context->reps_file,
576                                    scratch_pool));
577  add_item_rep_mapping(context, entry);
578
579  /* read & parse the representation header */
580  SVN_ERR(svn_fs_x__read_rep_header(&rep_header, rev_file->stream,
581                                    scratch_pool, scratch_pool));
582
583  /* if the representation is a delta against some other rep, link the two */
584  if (   rep_header->type == svn_fs_x__rep_delta
585      && rep_header->base_revision >= context->start_rev)
586    {
587      reference_t *reference = apr_pcalloc(context->info_pool,
588                                           sizeof(*reference));
589      reference->from = entry->items[0];
590      reference->to.change_set
591        = svn_fs_x__change_set_by_rev(rep_header->base_revision);
592      reference->to.number = rep_header->base_item_index;
593      APR_ARRAY_PUSH(context->references, reference_t *) = reference;
594    }
595
596  /* copy the whole rep (including header!) to our temp file */
597  SVN_ERR(svn_io_file_seek(rev_file->file, APR_SET, &source_offset,
598                           scratch_pool));
599  SVN_ERR(copy_file_data(context, context->reps_file, rev_file->file,
600                         entry->size, scratch_pool));
601
602  return SVN_NO_ERROR;
603}
604
605/* Directories first, dirs / files sorted by name in reverse lexical order.
606 * This maximizes the chance of two items being located close to one another
607 * in *all* pack files independent of their change order.  It also groups
608 * multi-project repos nicely according to their sub-projects.  The reverse
609 * order aspect gives "trunk" preference over "tags" and "branches", so
610 * trunk-related items are more likely to be contiguous.
611 */
612static int
613compare_dir_entries(const svn_sort__item_t *a,
614                    const svn_sort__item_t *b)
615{
616  const svn_fs_dirent_t *lhs = (const svn_fs_dirent_t *) a->value;
617  const svn_fs_dirent_t *rhs = (const svn_fs_dirent_t *) b->value;
618
619  if (lhs->kind != rhs->kind)
620    return lhs->kind == svn_node_dir ? -1 : 1;
621
622  return strcmp(lhs->name, rhs->name);
623}
624
625apr_array_header_t *
626svn_fs_x__order_dir_entries(svn_fs_t *fs,
627                            apr_hash_t *directory,
628                            apr_pool_t *result_pool,
629                            apr_pool_t *scratch_pool)
630{
631  apr_array_header_t *ordered
632    = svn_sort__hash(directory, compare_dir_entries, scratch_pool);
633
634  apr_array_header_t *result
635    = apr_array_make(result_pool, ordered->nelts, sizeof(svn_fs_dirent_t *));
636
637  int i;
638  for (i = 0; i < ordered->nelts; ++i)
639    APR_ARRAY_PUSH(result, svn_fs_dirent_t *)
640      = APR_ARRAY_IDX(ordered, i, svn_sort__item_t).value;
641
642  return result;
643}
644
645/* Return a duplicate of the the ORIGINAL path and with special sub-strins
646 * (e.g. "trunk") modified in such a way that have a lower lexicographic
647 * value than any other "normal" file name.
648 */
649static const char *
650tweak_path_for_ordering(const char *original,
651                        apr_pool_t *pool)
652{
653  /* We may add further special cases as needed. */
654  enum {SPECIAL_COUNT = 2};
655  static const char *special[SPECIAL_COUNT] = {"trunk", "branch"};
656  char *pos;
657  char *path = apr_pstrdup(pool, original);
658  int i;
659
660  /* Replace the first char of any "special" sub-string we find by
661   * a control char, i.e. '\1' .. '\31'.  In the rare event that this
662   * would clash with existing paths, no data will be lost but merely
663   * the node ordering will be sub-optimal.
664   */
665  for (i = 0; i < SPECIAL_COUNT; ++i)
666    for (pos = strstr(path, special[i]);
667         pos;
668         pos = strstr(pos + 1, special[i]))
669      {
670        *pos = (char)(i + '\1');
671      }
672
673   return path;
674}
675
676/* Copy node revision item identified by ENTRY from the current position
677 * in REV_FILE into CONTEXT->REPS_FILE.  Add all tracking into needed by
678 * our placement algorithm to CONTEXT.
679 * Use SCRATCH_POOL for temporary allocations.
680 */
681static svn_error_t *
682copy_node_to_temp(pack_context_t *context,
683                  svn_fs_x__revision_file_t *rev_file,
684                  svn_fs_x__p2l_entry_t *entry,
685                  apr_pool_t *scratch_pool)
686{
687  path_order_t *path_order = apr_pcalloc(context->info_pool,
688                                         sizeof(*path_order));
689  svn_fs_x__noderev_t *noderev;
690  const char *sort_path;
691  apr_off_t source_offset = entry->offset;
692
693  /* read & parse noderev */
694  SVN_ERR(svn_fs_x__read_noderev(&noderev, rev_file->stream, scratch_pool,
695                                 scratch_pool));
696
697  /* create a copy of ENTRY, make it point to the copy destination and
698   * store it in CONTEXT */
699  entry = svn_fs_x__p2l_entry_dup(entry, context->info_pool);
700  SVN_ERR(svn_fs_x__get_file_offset(&entry->offset, context->reps_file,
701                                     scratch_pool));
702  add_item_rep_mapping(context, entry);
703
704  /* copy the noderev to our temp file */
705  SVN_ERR(svn_io_file_seek(rev_file->file, APR_SET, &source_offset,
706                           scratch_pool));
707  SVN_ERR(copy_file_data(context, context->reps_file, rev_file->file,
708                         entry->size, scratch_pool));
709
710  /* if the node has a data representation, make that the node's "base".
711   * This will (often) cause the noderev to be placed right in front of
712   * its data representation. */
713
714  if (noderev->data_rep
715      &&    svn_fs_x__get_revnum(noderev->data_rep->id.change_set)
716         >= context->start_rev)
717    {
718      reference_t *reference = apr_pcalloc(context->info_pool,
719                                           sizeof(*reference));
720      reference->from = entry->items[0];
721      reference->to.change_set = noderev->data_rep->id.change_set;
722      reference->to.number = noderev->data_rep->id.number;
723      APR_ARRAY_PUSH(context->references, reference_t *) = reference;
724
725      path_order->rep_id = reference->to;
726      path_order->expanded_size = noderev->data_rep->expanded_size;
727    }
728
729  /* Sort path is the key used for ordering noderevs and associated reps.
730   * It will not be stored in the final pack file. */
731  sort_path = tweak_path_for_ordering(noderev->created_path, scratch_pool);
732  path_order->path = svn_prefix_string__create(context->paths, sort_path);
733  path_order->node_id = noderev->node_id;
734  path_order->revision = svn_fs_x__get_revnum(noderev->noderev_id.change_set);
735  path_order->is_dir = noderev->kind == svn_node_dir;
736  path_order->noderev_id = noderev->noderev_id;
737  APR_ARRAY_PUSH(context->path_order, path_order_t *) = path_order;
738
739  return SVN_NO_ERROR;
740}
741
742/* implements compare_fn_t. Place LHS before RHS, if the latter is older.
743 */
744static int
745compare_p2l_info(const svn_fs_x__p2l_entry_t * const * lhs,
746                 const svn_fs_x__p2l_entry_t * const * rhs)
747{
748  assert(*lhs != *rhs);
749  if ((*lhs)->item_count == 0)
750    return (*lhs)->item_count == 0 ? 0 : -1;
751  if ((*lhs)->item_count == 0)
752    return 1;
753
754  if ((*lhs)->items[0].change_set == (*rhs)->items[0].change_set)
755    return (*lhs)->items[0].number > (*rhs)->items[0].number ? -1 : 1;
756
757  return (*lhs)->items[0].change_set > (*rhs)->items[0].change_set ? -1 : 1;
758}
759
760/* Sort svn_fs_x__p2l_entry_t * array ENTRIES by age.  Place the latest
761 * items first.
762 */
763static void
764sort_items(apr_array_header_t *entries)
765{
766  svn_sort__array(entries,
767                  (int (*)(const void *, const void *))compare_p2l_info);
768}
769
770/* implements compare_fn_t.  Sort descending by PATH, NODE_ID and REVISION.
771 */
772static int
773compare_path_order(const path_order_t * const * lhs_p,
774                   const path_order_t * const * rhs_p)
775{
776  const path_order_t * lhs = *lhs_p;
777  const path_order_t * rhs = *rhs_p;
778
779  /* cluster all directories */
780  int diff = rhs->is_dir - lhs->is_dir;
781  if (diff)
782    return diff;
783
784  /* lexicographic order on path and node (i.e. latest first) */
785  diff = svn_prefix_string__compare(lhs->path, rhs->path);
786  if (diff)
787    return diff;
788
789  /* reverse order on node (i.e. latest first) */
790  diff = svn_fs_x__id_compare(&rhs->node_id, &lhs->node_id);
791  if (diff)
792    return diff;
793
794  /* reverse order on revision (i.e. latest first) */
795  if (lhs->revision != rhs->revision)
796    return lhs->revision < rhs->revision ? 1 : -1;
797
798  return 0;
799}
800
801/* implements compare_fn_t.  Sort ascending by TO, FROM.
802 */
803static int
804compare_references(const reference_t * const * lhs_p,
805                   const reference_t * const * rhs_p)
806{
807  const reference_t * lhs = *lhs_p;
808  const reference_t * rhs = *rhs_p;
809
810  int diff = svn_fs_x__id_compare(&lhs->to, &rhs->to);
811  return diff ? diff : svn_fs_x__id_compare(&lhs->from, &rhs->from);
812}
813
814/* Order the data collected in CONTEXT such that we can place them in the
815 * desired order.
816 */
817static void
818sort_reps(pack_context_t *context)
819{
820  svn_sort__array(context->path_order,
821                  (int (*)(const void *, const void *))compare_path_order);
822  svn_sort__array(context->references,
823                  (int (*)(const void *, const void *))compare_references);
824}
825
826/* Return the remaining unused bytes in the current block in CONTEXT's
827 * pack file.
828 */
829static apr_ssize_t
830get_block_left(pack_context_t *context)
831{
832  svn_fs_x__data_t *ffd = context->fs->fsap_data;
833  return ffd->block_size - (context->pack_offset % ffd->block_size);
834}
835
836/* To prevent items from overlapping a block boundary, we will usually
837 * put them into the next block and top up the old one with NUL bytes.
838 * Pad CONTEXT's pack file to the end of the current block, if that padding
839 * is short enough.  Use SCRATCH_POOL for temporary allocations.
840 */
841static svn_error_t *
842auto_pad_block(pack_context_t *context,
843               apr_pool_t *scratch_pool)
844{
845  svn_fs_x__data_t *ffd = context->fs->fsap_data;
846
847  /* This is the maximum number of bytes "wasted" that way per block.
848   * Larger items will cross the block boundaries. */
849  const apr_off_t max_padding = MAX(ffd->block_size / 50, 512);
850
851  /* Is wasted space small enough to align the current item to the next
852   * block? */
853  apr_off_t padding = get_block_left(context);
854
855  if (padding < max_padding)
856    {
857      /* Yes. To up with NUL bytes and don't forget to create
858       * an P2L index entry marking this section as unused. */
859      svn_fs_x__p2l_entry_t null_entry;
860
861      null_entry.offset = context->pack_offset;
862      null_entry.size = padding;
863      null_entry.type = SVN_FS_X__ITEM_TYPE_UNUSED;
864      null_entry.fnv1_checksum = 0;
865      null_entry.item_count = 0;
866      null_entry.items = NULL;
867
868      SVN_ERR(write_null_bytes(context->pack_file, padding, scratch_pool));
869      SVN_ERR(svn_fs_x__p2l_proto_index_add_entry
870                  (context->proto_p2l_index, &null_entry, scratch_pool));
871      context->pack_offset += padding;
872    }
873
874  return SVN_NO_ERROR;
875}
876
877/* Return the index of the first entry in CONTEXT->REFERENCES that
878 * references ITEM->ITEMS[0] if such entries exist.  All matching items
879 * will be consecutive.
880 */
881static int
882find_first_reference(pack_context_t *context,
883                     svn_fs_x__p2l_entry_t *item)
884{
885  int lower = 0;
886  int upper = context->references->nelts - 1;
887
888  while (lower <= upper)
889    {
890      int current = lower + (upper - lower) / 2;
891      reference_t *reference
892        = APR_ARRAY_IDX(context->references, current, reference_t *);
893
894      if (svn_fs_x__id_compare(&reference->to, item->items) < 0)
895        lower = current + 1;
896      else
897        upper = current - 1;
898    }
899
900  return lower;
901}
902
903/* Check whether entry number IDX in CONTEXT->REFERENCES references ITEM.
904 */
905static svn_boolean_t
906is_reference_match(pack_context_t *context,
907                   int idx,
908                   svn_fs_x__p2l_entry_t *item)
909{
910  reference_t *reference;
911  if (context->references->nelts <= idx)
912    return FALSE;
913
914  reference = APR_ARRAY_IDX(context->references, idx, reference_t *);
915  return svn_fs_x__id_eq(&reference->to, item->items);
916}
917
918/* Starting at IDX in CONTEXT->PATH_ORDER, select all representations and
919 * noderevs that should be placed into the same container, respectively.
920 * Append the path_order_t * elements encountered in SELECTED, the
921 * svn_fs_x__p2l_entry_t * of the representations that should be placed
922 * into the same reps container will be appended to REP_PARTS and the
923 * svn_fs_x__p2l_entry_t * of the noderevs referencing those reps will
924 * be appended to NODE_PARTS.
925 *
926 * Remove all returned items from the CONTEXT->REPS container and prevent
927 * them from being placed a second time later on.  That also means that the
928 * caller has to place all items returned.
929 */
930static svn_error_t *
931select_reps(pack_context_t *context,
932            int idx,
933            apr_array_header_t *selected,
934            apr_array_header_t *node_parts,
935            apr_array_header_t *rep_parts)
936{
937  apr_array_header_t *path_order = context->path_order;
938  path_order_t *start_path = APR_ARRAY_IDX(path_order, idx, path_order_t *);
939
940  svn_fs_x__p2l_entry_t *node_part;
941  svn_fs_x__p2l_entry_t *rep_part;
942  svn_fs_x__p2l_entry_t *depending;
943  int i, k;
944
945  /* collect all path_order records as well as rep and noderev items
946   * that occupy the same path with the same node. */
947  for (; idx < path_order->nelts; ++idx)
948    {
949      path_order_t *current_path
950        = APR_ARRAY_IDX(path_order, idx, path_order_t *);
951
952      if (!svn_fs_x__id_eq(&start_path->node_id, &current_path->node_id))
953        break;
954
955      APR_ARRAY_IDX(path_order, idx, path_order_t *) = NULL;
956      node_part = get_item(context, &current_path->noderev_id, TRUE);
957      rep_part = get_item(context, &current_path->rep_id, TRUE);
958
959      if (node_part && rep_part)
960        APR_ARRAY_PUSH(selected, path_order_t *) = current_path;
961
962      if (node_part)
963        APR_ARRAY_PUSH(node_parts, svn_fs_x__p2l_entry_t *) = node_part;
964      if (rep_part)
965        APR_ARRAY_PUSH(rep_parts, svn_fs_x__p2l_entry_t *) = rep_part;
966    }
967
968  /* collect depending reps and noderevs that reference any of the collected
969   * reps */
970  for (i = 0; i < rep_parts->nelts; ++i)
971    {
972      rep_part = APR_ARRAY_IDX(rep_parts, i, svn_fs_x__p2l_entry_t*);
973      for (k = find_first_reference(context, rep_part);
974           is_reference_match(context, k, rep_part);
975           ++k)
976        {
977          reference_t *reference
978            = APR_ARRAY_IDX(context->references, k, reference_t *);
979
980          depending = get_item(context, &reference->from, TRUE);
981          if (!depending)
982            continue;
983
984          if (depending->type == SVN_FS_X__ITEM_TYPE_NODEREV)
985            APR_ARRAY_PUSH(node_parts, svn_fs_x__p2l_entry_t *) = depending;
986          else
987            APR_ARRAY_PUSH(rep_parts, svn_fs_x__p2l_entry_t *) = depending;
988        }
989    }
990
991  return SVN_NO_ERROR;
992}
993
994/* Return TRUE, if all path_order_t * in SELECTED reference contents that is
995 * not longer than LIMIT.
996 */
997static svn_boolean_t
998reps_fit_into_containers(apr_array_header_t *selected,
999                         apr_uint64_t limit)
1000{
1001  int i;
1002  for (i = 0; i < selected->nelts; ++i)
1003    if (APR_ARRAY_IDX(selected, i, path_order_t *)->expanded_size > limit)
1004      return FALSE;
1005
1006  return TRUE;
1007}
1008
1009/* Write the *CONTAINER containing the noderevs described by the
1010 * svn_fs_x__p2l_entry_t * in ITEMS to the pack file on CONTEXT.
1011 * Append a P2L entry for the container to CONTAINER->REPS.
1012 * Afterwards, clear ITEMS and re-allocate *CONTAINER in CONTAINER_POOL
1013 * so the caller may fill them again.
1014 * Use SCRATCH_POOL for temporary allocations.
1015 */
1016static svn_error_t *
1017write_nodes_container(pack_context_t *context,
1018                      svn_fs_x__noderevs_t **container,
1019                      apr_array_header_t *items,
1020                      apr_pool_t *container_pool,
1021                      apr_pool_t *scratch_pool)
1022{
1023  int i;
1024  apr_off_t offset = 0;
1025  svn_fs_x__p2l_entry_t *container_entry;
1026  svn_stream_t *pack_stream;
1027
1028  if (items->nelts == 0)
1029    return SVN_NO_ERROR;
1030
1031  /* serialize container */
1032  container_entry = apr_palloc(context->info_pool, sizeof(*container_entry));
1033  pack_stream = svn_checksum__wrap_write_stream_fnv1a_32x4
1034                                (&container_entry->fnv1_checksum,
1035                                 svn_stream_from_aprfile2(context->pack_file,
1036                                                          TRUE, scratch_pool),
1037                                 scratch_pool);
1038  SVN_ERR(svn_fs_x__write_noderevs_container(pack_stream, *container,
1039                                             scratch_pool));
1040  SVN_ERR(svn_stream_close(pack_stream));
1041  SVN_ERR(svn_io_file_seek(context->pack_file, APR_CUR, &offset,
1042                           scratch_pool));
1043
1044  /* replace first noderev item in ENTRIES with the container
1045     and set all others to NULL */
1046  container_entry->offset = context->pack_offset;
1047  container_entry->size = offset - container_entry->offset;
1048  container_entry->type = SVN_FS_X__ITEM_TYPE_NODEREVS_CONT;
1049  container_entry->item_count = items->nelts;
1050  container_entry->items = apr_palloc(context->info_pool,
1051      sizeof(svn_fs_x__id_t) * container_entry->item_count);
1052
1053  for (i = 0; i < items->nelts; ++i)
1054    container_entry->items[i]
1055      = APR_ARRAY_IDX(items, i, svn_fs_x__p2l_entry_t *)->items[0];
1056
1057  context->pack_offset = offset;
1058  APR_ARRAY_PUSH(context->reps, svn_fs_x__p2l_entry_t *)
1059    = container_entry;
1060
1061  /* Write P2L index for copied items, i.e. the 1 container */
1062  SVN_ERR(svn_fs_x__p2l_proto_index_add_entry
1063            (context->proto_p2l_index, container_entry, scratch_pool));
1064
1065  svn_pool_clear(container_pool);
1066  *container = svn_fs_x__noderevs_create(16, container_pool);
1067  apr_array_clear(items);
1068
1069  return SVN_NO_ERROR;
1070}
1071
1072/* Read the noderevs given by the svn_fs_x__p2l_entry_t * in NODE_PARTS
1073 * from TEMP_FILE and add them to *CONTAINER and NODES_IN_CONTAINER.
1074 * Whenever the container grows bigger than the current block in CONTEXT,
1075 * write the data to disk and continue in the next block.
1076 *
1077 * Use CONTAINER_POOL to re-allocate the *CONTAINER as necessary and
1078 * SCRATCH_POOL to temporary allocations.
1079 */
1080static svn_error_t *
1081store_nodes(pack_context_t *context,
1082            apr_file_t *temp_file,
1083            apr_array_header_t *node_parts,
1084            svn_fs_x__noderevs_t **container,
1085            apr_array_header_t *nodes_in_container,
1086            apr_pool_t *container_pool,
1087            apr_pool_t *scratch_pool)
1088{
1089  int i;
1090
1091  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1092  svn_stream_t *stream
1093    = svn_stream_from_aprfile2(temp_file, TRUE, scratch_pool);
1094
1095  /* number of bytes in the current block not being spent on fixed-size
1096     items (i.e. those not put into the container). */
1097  apr_size_t capacity_left = get_block_left(context);
1098
1099  /* Estimated noderev container size */
1100  apr_size_t last_container_size = 0, container_size = 0;
1101
1102  /* Estimate extra capacity we will gain from container compression. */
1103  apr_size_t pack_savings = 0;
1104  for (i = 0; i < node_parts->nelts; ++i)
1105    {
1106      svn_fs_x__noderev_t *noderev;
1107      svn_fs_x__p2l_entry_t *entry
1108        = APR_ARRAY_IDX(node_parts, i, svn_fs_x__p2l_entry_t *);
1109
1110      /* if we reached the limit, check whether we saved some space
1111         through the container. */
1112      if (capacity_left + pack_savings < container_size + entry->size)
1113        container_size = svn_fs_x__noderevs_estimate_size(*container);
1114
1115      /* If necessary and the container is large enough, try harder
1116         by actually serializing the container and determine current
1117         savings due to compression. */
1118      if (   capacity_left + pack_savings < container_size + entry->size
1119          && container_size > last_container_size + 2000)
1120        {
1121          svn_stringbuf_t *serialized
1122            = svn_stringbuf_create_ensure(container_size, iterpool);
1123          svn_stream_t *temp_stream
1124            = svn_stream_from_stringbuf(serialized, iterpool);
1125
1126          SVN_ERR(svn_fs_x__write_noderevs_container(temp_stream, *container,
1127                                                     iterpool));
1128          SVN_ERR(svn_stream_close(temp_stream));
1129
1130          last_container_size = container_size;
1131          pack_savings = container_size - serialized->len;
1132        }
1133
1134      /* still doesn't fit? -> block is full. Flush */
1135      if (   capacity_left + pack_savings < container_size + entry->size
1136          && nodes_in_container->nelts < 2)
1137        {
1138          SVN_ERR(auto_pad_block(context, iterpool));
1139          capacity_left = get_block_left(context);
1140        }
1141
1142      /* still doesn't fit? -> block is full. Flush */
1143      if (capacity_left + pack_savings < container_size + entry->size)
1144        {
1145          SVN_ERR(write_nodes_container(context, container,
1146                                        nodes_in_container, container_pool,
1147                                        iterpool));
1148
1149          capacity_left = get_block_left(context);
1150          pack_savings = 0;
1151          container_size = 0;
1152        }
1153
1154      /* item will fit into the block. */
1155      SVN_ERR(svn_io_file_seek(temp_file, APR_SET, &entry->offset, iterpool));
1156      SVN_ERR(svn_fs_x__read_noderev(&noderev, stream, iterpool, iterpool));
1157      svn_fs_x__noderevs_add(*container, noderev);
1158
1159      container_size += entry->size;
1160      APR_ARRAY_PUSH(nodes_in_container, svn_fs_x__p2l_entry_t *) = entry;
1161
1162      svn_pool_clear(iterpool);
1163    }
1164
1165  svn_pool_destroy(iterpool);
1166
1167  return SVN_NO_ERROR;
1168}
1169
1170
1171/* Finalize CONTAINER and write it to CONTEXT's pack file.
1172 * Append an P2L entry containing the given SUB_ITEMS to NEW_ENTRIES.
1173 * Use SCRATCH_POOL for temporary allocations.
1174 */
1175static svn_error_t *
1176write_reps_container(pack_context_t *context,
1177                     svn_fs_x__reps_builder_t *container,
1178                     apr_array_header_t *sub_items,
1179                     apr_array_header_t *new_entries,
1180                     apr_pool_t *scratch_pool)
1181{
1182  apr_off_t offset = 0;
1183  svn_fs_x__p2l_entry_t container_entry;
1184
1185  svn_stream_t *pack_stream
1186    = svn_checksum__wrap_write_stream_fnv1a_32x4
1187                                (&container_entry.fnv1_checksum,
1188                                 svn_stream_from_aprfile2(context->pack_file,
1189                                                          TRUE, scratch_pool),
1190                                 scratch_pool);
1191
1192  SVN_ERR(svn_fs_x__write_reps_container(pack_stream, container,
1193                                         scratch_pool));
1194  SVN_ERR(svn_stream_close(pack_stream));
1195  SVN_ERR(svn_io_file_seek(context->pack_file, APR_CUR, &offset,
1196                           scratch_pool));
1197
1198  container_entry.offset = context->pack_offset;
1199  container_entry.size = offset - container_entry.offset;
1200  container_entry.type = SVN_FS_X__ITEM_TYPE_REPS_CONT;
1201  container_entry.item_count = sub_items->nelts;
1202  container_entry.items = (svn_fs_x__id_t *)sub_items->elts;
1203
1204  context->pack_offset = offset;
1205  APR_ARRAY_PUSH(new_entries, svn_fs_x__p2l_entry_t *)
1206    = svn_fs_x__p2l_entry_dup(&container_entry, context->info_pool);
1207
1208  SVN_ERR(svn_fs_x__p2l_proto_index_add_entry
1209            (context->proto_p2l_index, &container_entry, scratch_pool));
1210
1211  return SVN_NO_ERROR;
1212}
1213
1214/* Read the (property) representations identified by svn_fs_x__p2l_entry_t
1215 * elements in ENTRIES from TEMP_FILE, aggregate them and write them into
1216 * CONTEXT->PACK_FILE.  Use SCRATCH_POOL for temporary allocations.
1217 */
1218static svn_error_t *
1219write_reps_containers(pack_context_t *context,
1220                      apr_array_header_t *entries,
1221                      apr_file_t *temp_file,
1222                      apr_array_header_t *new_entries,
1223                      apr_pool_t *scratch_pool)
1224{
1225  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1226  apr_pool_t *container_pool = svn_pool_create(scratch_pool);
1227  int i;
1228
1229  apr_ssize_t block_left = get_block_left(context);
1230
1231  svn_fs_x__reps_builder_t *container
1232    = svn_fs_x__reps_builder_create(context->fs, container_pool);
1233  apr_array_header_t *sub_items
1234    = apr_array_make(scratch_pool, 64, sizeof(svn_fs_x__id_t));
1235  svn_fs_x__revision_file_t *file;
1236
1237  SVN_ERR(svn_fs_x__wrap_temp_rev_file(&file, context->fs, temp_file,
1238                                       scratch_pool));
1239
1240  /* copy all items in strict order */
1241  for (i = entries->nelts-1; i >= 0; --i)
1242    {
1243      svn_fs_x__representation_t representation = { 0 };
1244      svn_stringbuf_t *contents;
1245      svn_stream_t *stream;
1246      apr_size_t list_index;
1247      svn_fs_x__p2l_entry_t *entry
1248        = APR_ARRAY_IDX(entries, i, svn_fs_x__p2l_entry_t *);
1249
1250      if ((block_left < entry->size) && sub_items->nelts)
1251        {
1252          block_left = get_block_left(context)
1253                     - svn_fs_x__reps_estimate_size(container);
1254        }
1255
1256      if ((block_left < entry->size) && sub_items->nelts)
1257        {
1258          SVN_ERR(write_reps_container(context, container, sub_items,
1259                                       new_entries, iterpool));
1260
1261          apr_array_clear(sub_items);
1262          svn_pool_clear(container_pool);
1263          container = svn_fs_x__reps_builder_create(context->fs,
1264                                                    container_pool);
1265          block_left = get_block_left(context);
1266        }
1267
1268      /* still enough space in current block? */
1269      if (block_left < entry->size)
1270        {
1271          SVN_ERR(auto_pad_block(context, iterpool));
1272          block_left = get_block_left(context);
1273        }
1274
1275      assert(entry->item_count == 1);
1276      representation.id = entry->items[0];
1277
1278      /* select the change list in the source file, parse it and add it to
1279       * the container */
1280      SVN_ERR(svn_io_file_seek(temp_file, APR_SET, &entry->offset,
1281                               iterpool));
1282      SVN_ERR(svn_fs_x__get_representation_length(&representation.size,
1283                                             &representation.expanded_size,
1284                                             context->fs, file,
1285                                             entry, iterpool));
1286      SVN_ERR(svn_fs_x__get_contents(&stream, context->fs, &representation,
1287                                     FALSE, iterpool));
1288      contents = svn_stringbuf_create_ensure(representation.expanded_size,
1289                                             iterpool);
1290      contents->len = representation.expanded_size;
1291
1292      /* The representation is immutable.  Read it normally. */
1293      SVN_ERR(svn_stream_read_full(stream, contents->data, &contents->len));
1294      SVN_ERR(svn_stream_close(stream));
1295
1296      SVN_ERR(svn_fs_x__reps_add(&list_index, container,
1297                                 svn_stringbuf__morph_into_string(contents)));
1298      SVN_ERR_ASSERT(list_index == sub_items->nelts);
1299      block_left -= entry->size;
1300
1301      APR_ARRAY_PUSH(sub_items, svn_fs_x__id_t) = entry->items[0];
1302
1303      svn_pool_clear(iterpool);
1304    }
1305
1306  if (sub_items->nelts)
1307    SVN_ERR(write_reps_container(context, container, sub_items,
1308                                 new_entries, iterpool));
1309
1310  svn_pool_destroy(iterpool);
1311  svn_pool_destroy(container_pool);
1312
1313  return SVN_NO_ERROR;
1314}
1315
1316/* Return TRUE if the estimated size of the NODES_IN_CONTAINER plus the
1317 * representations given as svn_fs_x__p2l_entry_t * in ENTRIES may exceed
1318 * the space left in the current block.
1319 */
1320static svn_boolean_t
1321should_flush_nodes_container(pack_context_t *context,
1322                             svn_fs_x__noderevs_t *nodes_container,
1323                             apr_array_header_t *entries)
1324{
1325  apr_ssize_t block_left = get_block_left(context);
1326  apr_ssize_t rep_sum = 0;
1327  apr_ssize_t container_size
1328    = svn_fs_x__noderevs_estimate_size(nodes_container);
1329
1330  int i;
1331  for (i = 0; i < entries->nelts; ++i)
1332    {
1333      svn_fs_x__p2l_entry_t *entry
1334        = APR_ARRAY_IDX(entries, i, svn_fs_x__p2l_entry_t *);
1335      rep_sum += entry->size;
1336    }
1337
1338  return block_left < rep_sum + container_size;
1339}
1340
1341/* Read the contents of the first COUNT non-NULL, non-empty items in ITEMS
1342 * from TEMP_FILE and write them to CONTEXT->PACK_FILE.
1343 * Use SCRATCH_POOL for temporary allocations.
1344 */
1345static svn_error_t *
1346store_items(pack_context_t *context,
1347            apr_file_t *temp_file,
1348            apr_array_header_t *items,
1349            int count,
1350            apr_pool_t *scratch_pool)
1351{
1352  int i;
1353  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1354
1355  /* copy all items in strict order */
1356  for (i = 0; i < count; ++i)
1357    {
1358      svn_fs_x__p2l_entry_t *entry
1359        = APR_ARRAY_IDX(items, i, svn_fs_x__p2l_entry_t *);
1360      if (!entry
1361          || entry->type == SVN_FS_X__ITEM_TYPE_UNUSED
1362          || entry->item_count == 0)
1363        continue;
1364
1365      /* select the item in the source file and copy it into the target
1366       * pack file */
1367      SVN_ERR(svn_io_file_seek(temp_file, APR_SET, &entry->offset,
1368                               iterpool));
1369      SVN_ERR(copy_file_data(context, context->pack_file, temp_file,
1370                             entry->size, iterpool));
1371
1372      /* write index entry and update current position */
1373      entry->offset = context->pack_offset;
1374      context->pack_offset += entry->size;
1375
1376      SVN_ERR(svn_fs_x__p2l_proto_index_add_entry
1377                  (context->proto_p2l_index, entry, iterpool));
1378
1379      APR_ARRAY_PUSH(context->reps, svn_fs_x__p2l_entry_t *) = entry;
1380      svn_pool_clear(iterpool);
1381    }
1382
1383  svn_pool_destroy(iterpool);
1384
1385  return SVN_NO_ERROR;
1386}
1387
1388/* Copy (append) the items identified by svn_fs_x__p2l_entry_t * elements
1389 * in ENTRIES strictly in order from TEMP_FILE into CONTEXT->PACK_FILE.
1390 * Use SCRATCH_POOL for temporary allocations.
1391 */
1392static svn_error_t *
1393copy_reps_from_temp(pack_context_t *context,
1394                    apr_file_t *temp_file,
1395                    apr_pool_t *scratch_pool)
1396{
1397  svn_fs_x__data_t *ffd = context->fs->fsap_data;
1398
1399  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1400  apr_pool_t *container_pool = svn_pool_create(scratch_pool);
1401  apr_array_header_t *path_order = context->path_order;
1402  apr_array_header_t *reps = context->reps;
1403  apr_array_header_t *selected = apr_array_make(scratch_pool, 16,
1404                                                path_order->elt_size);
1405  apr_array_header_t *node_parts = apr_array_make(scratch_pool, 16,
1406                                                  reps->elt_size);
1407  apr_array_header_t *rep_parts = apr_array_make(scratch_pool, 16,
1408                                                 reps->elt_size);
1409  apr_array_header_t *nodes_in_container = apr_array_make(scratch_pool, 16,
1410                                                          reps->elt_size);
1411  int i, k;
1412  int initial_reps_count = reps->nelts;
1413
1414  /* 1 container for all noderevs in the current block.  We will try to
1415   * not write it to disk until the current block fills up, i.e. aim for
1416   * a single noderevs container per block. */
1417  svn_fs_x__noderevs_t *nodes_container
1418    = svn_fs_x__noderevs_create(16, container_pool);
1419
1420  /* copy items in path order. Create block-sized containers. */
1421  for (i = 0; i < path_order->nelts; ++i)
1422    {
1423      if (APR_ARRAY_IDX(path_order, i, path_order_t *) == NULL)
1424        continue;
1425
1426      /* Collect reps to combine and all noderevs referencing them */
1427      SVN_ERR(select_reps(context, i, selected, node_parts, rep_parts));
1428
1429      /* store the noderevs container in front of the reps */
1430      SVN_ERR(store_nodes(context, temp_file, node_parts, &nodes_container,
1431                          nodes_in_container, container_pool, iterpool));
1432
1433      /* actually flush the noderevs to disk if the reps container is likely
1434       * to fill the block, i.e. no further noderevs will be added to the
1435       * nodes container. */
1436      if (should_flush_nodes_container(context, nodes_container, node_parts))
1437        SVN_ERR(write_nodes_container(context, &nodes_container,
1438                                      nodes_in_container, container_pool,
1439                                      iterpool));
1440
1441      /* if all reps are short enough put them into one container.
1442       * Otherwise, just store all containers here. */
1443      if (reps_fit_into_containers(selected, 2 * ffd->block_size))
1444        SVN_ERR(write_reps_containers(context, rep_parts, temp_file,
1445                                      context->reps, iterpool));
1446      else
1447        SVN_ERR(store_items(context, temp_file, rep_parts, rep_parts->nelts,
1448                            iterpool));
1449
1450      /* processed all items */
1451      apr_array_clear(selected);
1452      apr_array_clear(node_parts);
1453      apr_array_clear(rep_parts);
1454
1455      svn_pool_clear(iterpool);
1456    }
1457
1458  /* flush noderevs container to disk */
1459  if (nodes_in_container->nelts)
1460    SVN_ERR(write_nodes_container(context, &nodes_container,
1461                                  nodes_in_container, container_pool,
1462                                  iterpool));
1463
1464  /* copy all items in strict order */
1465  SVN_ERR(store_items(context, temp_file, reps, initial_reps_count,
1466                      scratch_pool));
1467
1468  /* vaccum ENTRIES array: eliminate NULL entries */
1469  for (i = 0, k = 0; i < reps->nelts; ++i)
1470    {
1471      svn_fs_x__p2l_entry_t *entry
1472        = APR_ARRAY_IDX(reps, i, svn_fs_x__p2l_entry_t *);
1473      if (entry)
1474        {
1475          APR_ARRAY_IDX(reps, k, svn_fs_x__p2l_entry_t *) = entry;
1476          ++k;
1477        }
1478    }
1479  reps->nelts = k;
1480
1481  svn_pool_destroy(iterpool);
1482  svn_pool_destroy(container_pool);
1483
1484  return SVN_NO_ERROR;
1485}
1486
1487/* Finalize CONTAINER and write it to CONTEXT's pack file.
1488 * Append an P2L entry containing the given SUB_ITEMS to NEW_ENTRIES.
1489 * Use SCRATCH_POOL for temporary allocations.
1490 */
1491static svn_error_t *
1492write_changes_container(pack_context_t *context,
1493                        svn_fs_x__changes_t *container,
1494                        apr_array_header_t *sub_items,
1495                        apr_array_header_t *new_entries,
1496                        apr_pool_t *scratch_pool)
1497{
1498  apr_off_t offset = 0;
1499  svn_fs_x__p2l_entry_t container_entry;
1500
1501  svn_stream_t *pack_stream
1502    = svn_checksum__wrap_write_stream_fnv1a_32x4
1503                                (&container_entry.fnv1_checksum,
1504                                 svn_stream_from_aprfile2(context->pack_file,
1505                                                          TRUE, scratch_pool),
1506                                 scratch_pool);
1507
1508  SVN_ERR(svn_fs_x__write_changes_container(pack_stream,
1509                                             container,
1510                                             scratch_pool));
1511  SVN_ERR(svn_stream_close(pack_stream));
1512  SVN_ERR(svn_io_file_seek(context->pack_file, APR_CUR, &offset,
1513                           scratch_pool));
1514
1515  container_entry.offset = context->pack_offset;
1516  container_entry.size = offset - container_entry.offset;
1517  container_entry.type = SVN_FS_X__ITEM_TYPE_CHANGES_CONT;
1518  container_entry.item_count = sub_items->nelts;
1519  container_entry.items = (svn_fs_x__id_t *)sub_items->elts;
1520
1521  context->pack_offset = offset;
1522  APR_ARRAY_PUSH(new_entries, svn_fs_x__p2l_entry_t *)
1523    = svn_fs_x__p2l_entry_dup(&container_entry, context->info_pool);
1524
1525  SVN_ERR(svn_fs_x__p2l_proto_index_add_entry
1526            (context->proto_p2l_index, &container_entry, scratch_pool));
1527
1528  return SVN_NO_ERROR;
1529}
1530
1531/* Read the change lists identified by svn_fs_x__p2l_entry_t * elements
1532 * in ENTRIES strictly in from TEMP_FILE, aggregate them and write them
1533 * into CONTEXT->PACK_FILE.  Use SCRATCH_POOL for temporary allocations.
1534 */
1535static svn_error_t *
1536write_changes_containers(pack_context_t *context,
1537                         apr_array_header_t *entries,
1538                         apr_file_t *temp_file,
1539                         apr_pool_t *scratch_pool)
1540{
1541  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1542  apr_pool_t *container_pool = svn_pool_create(scratch_pool);
1543  int i;
1544
1545  apr_ssize_t block_left = get_block_left(context);
1546  apr_ssize_t estimated_addition = 0;
1547
1548  svn_fs_x__changes_t *container
1549    = svn_fs_x__changes_create(1000, container_pool);
1550  apr_array_header_t *sub_items
1551    = apr_array_make(scratch_pool, 64, sizeof(svn_fs_x__id_t));
1552  apr_array_header_t *new_entries
1553    = apr_array_make(context->info_pool, 16, entries->elt_size);
1554  svn_stream_t *temp_stream
1555    = svn_stream_from_aprfile2(temp_file, TRUE, scratch_pool);
1556
1557  /* copy all items in strict order */
1558  for (i = entries->nelts-1; i >= 0; --i)
1559    {
1560      apr_array_header_t *changes;
1561      apr_size_t list_index;
1562      svn_fs_x__p2l_entry_t *entry
1563        = APR_ARRAY_IDX(entries, i, svn_fs_x__p2l_entry_t *);
1564
1565      /* zip compression alone will significantly reduce the size of large
1566       * change lists. So, we will probably need even less than this estimate.
1567       */
1568      apr_ssize_t estimated_size = (entry->size / 5) + 250;
1569
1570      /* If necessary and enough data has been added to the container since
1571       * the last test, try harder by actually serializing the container and
1572       * determine current savings due to compression. */
1573      if (block_left < estimated_size && estimated_addition > 2000)
1574        {
1575          svn_stringbuf_t *serialized
1576            = svn_stringbuf_create_ensure(get_block_left(context), iterpool);
1577          svn_stream_t *memory_stream
1578            = svn_stream_from_stringbuf(serialized, iterpool);
1579
1580          SVN_ERR(svn_fs_x__write_changes_container(memory_stream,
1581                                                     container, iterpool));
1582          SVN_ERR(svn_stream_close(temp_stream));
1583
1584          block_left = get_block_left(context) - serialized->len;
1585          estimated_addition = 0;
1586        }
1587
1588      if ((block_left < estimated_size) && sub_items->nelts)
1589        {
1590          SVN_ERR(write_changes_container(context, container, sub_items,
1591                                          new_entries, iterpool));
1592
1593          apr_array_clear(sub_items);
1594          svn_pool_clear(container_pool);
1595          container = svn_fs_x__changes_create(1000, container_pool);
1596          block_left = get_block_left(context);
1597          estimated_addition = 0;
1598        }
1599
1600      /* still enough space in current block? */
1601      if (block_left < estimated_size)
1602        {
1603          SVN_ERR(auto_pad_block(context, iterpool));
1604          block_left = get_block_left(context);
1605        }
1606
1607      /* select the change list in the source file, parse it and add it to
1608       * the container */
1609      SVN_ERR(svn_io_file_seek(temp_file, APR_SET, &entry->offset,
1610                               iterpool));
1611      SVN_ERR(svn_fs_x__read_changes(&changes, temp_stream, scratch_pool,
1612                                     iterpool));
1613      SVN_ERR(svn_fs_x__changes_append_list(&list_index, container, changes));
1614      SVN_ERR_ASSERT(list_index == sub_items->nelts);
1615      block_left -= estimated_size;
1616      estimated_addition += estimated_size;
1617
1618      APR_ARRAY_PUSH(sub_items, svn_fs_x__id_t) = entry->items[0];
1619
1620      svn_pool_clear(iterpool);
1621    }
1622
1623  if (sub_items->nelts)
1624    SVN_ERR(write_changes_container(context, container, sub_items,
1625                                    new_entries, iterpool));
1626
1627  *entries = *new_entries;
1628  svn_pool_destroy(iterpool);
1629  svn_pool_destroy(container_pool);
1630
1631  return SVN_NO_ERROR;
1632}
1633
1634/* Read the (property) representations identified by svn_fs_x__p2l_entry_t
1635 * elements in ENTRIES from TEMP_FILE, aggregate them and write them into
1636 * CONTEXT->PACK_FILE.  Use SCRATCH_POOL for temporary allocations.
1637 */
1638static svn_error_t *
1639write_property_containers(pack_context_t *context,
1640                          apr_array_header_t *entries,
1641                          apr_file_t *temp_file,
1642                          apr_pool_t *scratch_pool)
1643{
1644  apr_array_header_t *new_entries
1645    = apr_array_make(context->info_pool, 16, entries->elt_size);
1646
1647  SVN_ERR(write_reps_containers(context, entries, temp_file, new_entries,
1648                                scratch_pool));
1649
1650  *entries = *new_entries;
1651
1652  return SVN_NO_ERROR;
1653}
1654
1655/* Append all entries of svn_fs_x__p2l_entry_t * array TO_APPEND to
1656 * svn_fs_x__p2l_entry_t * array DEST.
1657 */
1658static void
1659append_entries(apr_array_header_t *dest,
1660               apr_array_header_t *to_append)
1661{
1662  int i;
1663  for (i = 0; i < to_append->nelts; ++i)
1664    APR_ARRAY_PUSH(dest, svn_fs_x__p2l_entry_t *)
1665      = APR_ARRAY_IDX(to_append, i, svn_fs_x__p2l_entry_t *);
1666}
1667
1668/* Write the log-to-phys proto index file for CONTEXT and use POOL for
1669 * temporary allocations.  All items in all buckets must have been placed
1670 * by now.
1671 */
1672static svn_error_t *
1673write_l2p_index(pack_context_t *context,
1674                apr_pool_t *pool)
1675{
1676  apr_pool_t *scratch_pool = svn_pool_create(pool);
1677  const char *temp_name;
1678  const char *proto_index;
1679  apr_off_t offset = 0;
1680
1681  /* lump all items into one bucket.  As target, use the bucket that
1682   * probably has the most entries already. */
1683  append_entries(context->reps, context->changes);
1684  append_entries(context->reps, context->file_props);
1685  append_entries(context->reps, context->dir_props);
1686
1687  /* Let the index code do the expensive L2P -> P2L transformation. */
1688  SVN_ERR(svn_fs_x__l2p_index_from_p2l_entries(&temp_name,
1689                                               context->fs,
1690                                               context->reps,
1691                                               pool, scratch_pool));
1692
1693  /* Append newly written segment to exisiting proto index file. */
1694  SVN_ERR(svn_io_file_name_get(&proto_index, context->proto_l2p_index,
1695                               scratch_pool));
1696
1697  SVN_ERR(svn_io_file_flush(context->proto_l2p_index, scratch_pool));
1698  SVN_ERR(svn_io_append_file(temp_name, proto_index, scratch_pool));
1699  SVN_ERR(svn_io_remove_file2(temp_name, FALSE, scratch_pool));
1700  SVN_ERR(svn_io_file_seek(context->proto_l2p_index, APR_END, &offset,
1701                           scratch_pool));
1702
1703  /* Done. */
1704  svn_pool_destroy(scratch_pool);
1705
1706  return SVN_NO_ERROR;
1707}
1708
1709/* Pack the current revision range of CONTEXT, i.e. this covers phases 2
1710 * to 4.  Use SCRATCH_POOL for temporary allocations.
1711 */
1712static svn_error_t *
1713pack_range(pack_context_t *context,
1714           apr_pool_t *scratch_pool)
1715{
1716  svn_fs_x__data_t *ffd = context->fs->fsap_data;
1717  apr_pool_t *revpool = svn_pool_create(scratch_pool);
1718  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1719
1720  /* Phase 2: Copy items into various buckets and build tracking info */
1721  svn_revnum_t revision;
1722  for (revision = context->start_rev; revision < context->end_rev; ++revision)
1723    {
1724      apr_off_t offset = 0;
1725      svn_fs_x__revision_file_t *rev_file;
1726
1727      /* Get the rev file dimensions (mainly index locations). */
1728      SVN_ERR(svn_fs_x__open_pack_or_rev_file(&rev_file, context->fs,
1729                                              revision, revpool, iterpool));
1730      SVN_ERR(svn_fs_x__auto_read_footer(rev_file));
1731
1732      /* store the indirect array index */
1733      APR_ARRAY_PUSH(context->rev_offsets, int) = context->reps->nelts;
1734
1735      /* read the phys-to-log index file until we covered the whole rev file.
1736       * That index contains enough info to build both target indexes from it. */
1737      while (offset < rev_file->l2p_offset)
1738        {
1739          /* read one cluster */
1740          int i;
1741          apr_array_header_t *entries;
1742          svn_pool_clear(iterpool);
1743
1744          SVN_ERR(svn_fs_x__p2l_index_lookup(&entries, context->fs,
1745                                             rev_file, revision, offset,
1746                                             ffd->p2l_page_size, iterpool,
1747                                             iterpool));
1748
1749          for (i = 0; i < entries->nelts; ++i)
1750            {
1751              svn_fs_x__p2l_entry_t *entry
1752                = &APR_ARRAY_IDX(entries, i, svn_fs_x__p2l_entry_t);
1753
1754              /* skip first entry if that was duplicated due crossing a
1755                 cluster boundary */
1756              if (offset > entry->offset)
1757                continue;
1758
1759              /* process entry while inside the rev file */
1760              offset = entry->offset;
1761              if (offset < rev_file->l2p_offset)
1762                {
1763                  SVN_ERR(svn_io_file_seek(rev_file->file, APR_SET, &offset,
1764                                           iterpool));
1765
1766                  if (entry->type == SVN_FS_X__ITEM_TYPE_CHANGES)
1767                    SVN_ERR(copy_item_to_temp(context,
1768                                              context->changes,
1769                                              context->changes_file,
1770                                              rev_file, entry, iterpool));
1771                  else if (entry->type == SVN_FS_X__ITEM_TYPE_FILE_PROPS)
1772                    SVN_ERR(copy_item_to_temp(context,
1773                                              context->file_props,
1774                                              context->file_props_file,
1775                                              rev_file, entry, iterpool));
1776                  else if (entry->type == SVN_FS_X__ITEM_TYPE_DIR_PROPS)
1777                    SVN_ERR(copy_item_to_temp(context,
1778                                              context->dir_props,
1779                                              context->dir_props_file,
1780                                              rev_file, entry, iterpool));
1781                  else if (   entry->type == SVN_FS_X__ITEM_TYPE_FILE_REP
1782                           || entry->type == SVN_FS_X__ITEM_TYPE_DIR_REP)
1783                    SVN_ERR(copy_rep_to_temp(context, rev_file, entry,
1784                                             iterpool));
1785                  else if (entry->type == SVN_FS_X__ITEM_TYPE_NODEREV)
1786                    SVN_ERR(copy_node_to_temp(context, rev_file, entry,
1787                                              iterpool));
1788                  else
1789                    SVN_ERR_ASSERT(entry->type == SVN_FS_X__ITEM_TYPE_UNUSED);
1790
1791                  offset += entry->size;
1792                }
1793            }
1794
1795          if (context->cancel_func)
1796            SVN_ERR(context->cancel_func(context->cancel_baton));
1797        }
1798
1799      svn_pool_clear(revpool);
1800    }
1801
1802  svn_pool_destroy(iterpool);
1803
1804  /* phase 3: placement.
1805   * Use "newest first" placement for simple items. */
1806  sort_items(context->changes);
1807  sort_items(context->file_props);
1808  sort_items(context->dir_props);
1809
1810  /* follow dependencies recursively for noderevs and data representations */
1811  sort_reps(context);
1812
1813  /* phase 4: copy bucket data to pack file.  Write P2L index. */
1814  SVN_ERR(write_changes_containers(context, context->changes,
1815                                   context->changes_file, revpool));
1816  svn_pool_clear(revpool);
1817  SVN_ERR(write_property_containers(context, context->file_props,
1818                                    context->file_props_file, revpool));
1819  svn_pool_clear(revpool);
1820  SVN_ERR(write_property_containers(context, context->dir_props,
1821                                    context->dir_props_file, revpool));
1822  svn_pool_clear(revpool);
1823  SVN_ERR(copy_reps_from_temp(context, context->reps_file, revpool));
1824  svn_pool_clear(revpool);
1825
1826  /* write L2P index as well (now that we know all target offsets) */
1827  SVN_ERR(write_l2p_index(context, revpool));
1828
1829  svn_pool_destroy(revpool);
1830
1831  return SVN_NO_ERROR;
1832}
1833
1834/* Append CONTEXT->START_REV to the context's pack file with no re-ordering.
1835 * This function will only be used for very large revisions (>>100k changes).
1836 * Use SCRATCH_POOL for temporary allocations.
1837 */
1838static svn_error_t *
1839append_revision(pack_context_t *context,
1840                apr_pool_t *scratch_pool)
1841{
1842  svn_fs_x__data_t *ffd = context->fs->fsap_data;
1843  apr_off_t offset = 0;
1844  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1845  svn_fs_x__revision_file_t *rev_file;
1846  apr_finfo_t finfo;
1847
1848  /* Get the size of the file. */
1849  const char *path = svn_dirent_join(context->shard_dir,
1850                                     apr_psprintf(iterpool, "%ld",
1851                                                  context->start_rev),
1852                                     scratch_pool);
1853  SVN_ERR(svn_io_stat(&finfo, path, APR_FINFO_SIZE, scratch_pool));
1854
1855  /* Copy all the bits from the rev file to the end of the pack file. */
1856  SVN_ERR(svn_fs_x__open_pack_or_rev_file(&rev_file, context->fs,
1857                                          context->start_rev, scratch_pool,
1858                                          iterpool));
1859  SVN_ERR(copy_file_data(context, context->pack_file, rev_file->file,
1860                         finfo.size, iterpool));
1861
1862  /* mark the start of a new revision */
1863  SVN_ERR(svn_fs_x__l2p_proto_index_add_revision(context->proto_l2p_index,
1864                                                 scratch_pool));
1865
1866  /* read the phys-to-log index file until we covered the whole rev file.
1867   * That index contains enough info to build both target indexes from it. */
1868  while (offset < finfo.size)
1869    {
1870      /* read one cluster */
1871      int i;
1872      apr_array_header_t *entries;
1873      SVN_ERR(svn_fs_x__p2l_index_lookup(&entries, context->fs, rev_file,
1874                                         context->start_rev, offset,
1875                                         ffd->p2l_page_size, iterpool,
1876                                         iterpool));
1877
1878      for (i = 0; i < entries->nelts; ++i)
1879        {
1880          svn_fs_x__p2l_entry_t *entry
1881            = &APR_ARRAY_IDX(entries, i, svn_fs_x__p2l_entry_t);
1882
1883          /* skip first entry if that was duplicated due crossing a
1884             cluster boundary */
1885          if (offset > entry->offset)
1886            continue;
1887
1888          /* process entry while inside the rev file */
1889          offset = entry->offset;
1890          if (offset < finfo.size)
1891            {
1892              /* there should be true containers */
1893              SVN_ERR_ASSERT(entry->item_count == 1);
1894
1895              entry->offset += context->pack_offset;
1896              offset += entry->size;
1897              SVN_ERR(svn_fs_x__l2p_proto_index_add_entry
1898                        (context->proto_l2p_index, entry->offset, 0,
1899                         entry->items[0].number, iterpool));
1900              SVN_ERR(svn_fs_x__p2l_proto_index_add_entry
1901                        (context->proto_p2l_index, entry, iterpool));
1902            }
1903        }
1904
1905      svn_pool_clear(iterpool);
1906    }
1907
1908  svn_pool_destroy(iterpool);
1909  context->pack_offset += finfo.size;
1910
1911  return SVN_NO_ERROR;
1912}
1913
1914/* Format 7 packing logic.
1915 *
1916 * Pack the revision shard starting at SHARD_REV in filesystem FS from
1917 * SHARD_DIR into the PACK_FILE_DIR, using SCRATCH_POOL for temporary
1918 * allocations.  Limit the extra memory consumption to MAX_MEM bytes.
1919 * CANCEL_FUNC and CANCEL_BATON are what you think they are.
1920 */
1921static svn_error_t *
1922pack_log_addressed(svn_fs_t *fs,
1923                   const char *pack_file_dir,
1924                   const char *shard_dir,
1925                   svn_revnum_t shard_rev,
1926                   apr_size_t max_mem,
1927                   svn_cancel_func_t cancel_func,
1928                   void *cancel_baton,
1929                   apr_pool_t *scratch_pool)
1930{
1931  enum
1932    {
1933      /* estimated amount of memory used to represent one item in memory
1934       * during rev file packing */
1935      PER_ITEM_MEM = APR_ALIGN_DEFAULT(sizeof(path_order_t))
1936                   + APR_ALIGN_DEFAULT(2 *sizeof(void*))
1937                   + APR_ALIGN_DEFAULT(sizeof(reference_t))
1938                   + APR_ALIGN_DEFAULT(sizeof(svn_fs_x__p2l_entry_t))
1939                   + 6 * sizeof(void*)
1940    };
1941
1942  int max_items = max_mem / PER_ITEM_MEM > INT_MAX
1943                ? INT_MAX
1944                : (int)(max_mem / PER_ITEM_MEM);
1945  apr_array_header_t *max_ids;
1946  pack_context_t context = { 0 };
1947  int i;
1948  apr_size_t item_count = 0;
1949  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1950
1951  /* set up a pack context */
1952  SVN_ERR(initialize_pack_context(&context, fs, pack_file_dir, shard_dir,
1953                                  shard_rev, max_items, cancel_func,
1954                                  cancel_baton, scratch_pool));
1955
1956  /* phase 1: determine the size of the revisions to pack */
1957  SVN_ERR(svn_fs_x__l2p_get_max_ids(&max_ids, fs, shard_rev,
1958                                    context.shard_end_rev - shard_rev,
1959                                    scratch_pool, scratch_pool));
1960
1961  /* pack revisions in ranges that don't exceed MAX_MEM */
1962  for (i = 0; i < max_ids->nelts; ++i)
1963    if (APR_ARRAY_IDX(max_ids, i, apr_uint64_t) + item_count <= max_items)
1964      {
1965        context.end_rev++;
1966      }
1967    else
1968      {
1969        /* some unpacked revisions before this one? */
1970        if (context.start_rev < context.end_rev)
1971          {
1972            /* pack them intelligently (might be just 1 rev but still ...) */
1973            SVN_ERR(pack_range(&context, iterpool));
1974            SVN_ERR(reset_pack_context(&context, iterpool));
1975            item_count = 0;
1976          }
1977
1978        /* next revision range is to start with the current revision */
1979        context.start_rev = i + context.shard_rev;
1980        context.end_rev = context.start_rev + 1;
1981
1982        /* if this is a very large revision, we must place it as is */
1983        if (APR_ARRAY_IDX(max_ids, i, apr_uint64_t) > max_items)
1984          {
1985            SVN_ERR(append_revision(&context, iterpool));
1986            context.start_rev++;
1987          }
1988        else
1989          item_count += (apr_size_t)APR_ARRAY_IDX(max_ids, i, apr_uint64_t);
1990
1991        svn_pool_clear(iterpool);
1992      }
1993
1994  /* non-empty revision range at the end? */
1995  if (context.start_rev < context.end_rev)
1996    SVN_ERR(pack_range(&context, iterpool));
1997
1998  /* last phase: finalize indexes and clean up */
1999  SVN_ERR(reset_pack_context(&context, iterpool));
2000  SVN_ERR(close_pack_context(&context, iterpool));
2001  svn_pool_destroy(iterpool);
2002
2003  return SVN_NO_ERROR;
2004}
2005
2006/* Given REV in FS, set *REV_OFFSET to REV's offset in the packed file.
2007   Use SCRATCH_POOL for temporary allocations. */
2008svn_error_t *
2009svn_fs_x__get_packed_offset(apr_off_t *rev_offset,
2010                            svn_fs_t *fs,
2011                            svn_revnum_t rev,
2012                            apr_pool_t *scratch_pool)
2013{
2014  svn_fs_x__data_t *ffd = fs->fsap_data;
2015  svn_stream_t *manifest_stream;
2016  svn_boolean_t is_cached;
2017  svn_revnum_t shard;
2018  apr_int64_t shard_pos;
2019  apr_array_header_t *manifest;
2020  apr_pool_t *iterpool;
2021
2022  shard = rev / ffd->max_files_per_dir;
2023
2024  /* position of the shard within the manifest */
2025  shard_pos = rev % ffd->max_files_per_dir;
2026
2027  /* fetch exactly that element into *rev_offset, if the manifest is found
2028     in the cache */
2029  SVN_ERR(svn_cache__get_partial((void **) rev_offset, &is_cached,
2030                                 ffd->packed_offset_cache, &shard,
2031                                 svn_fs_x__get_sharded_offset, &shard_pos,
2032                                 scratch_pool));
2033
2034  if (is_cached)
2035      return SVN_NO_ERROR;
2036
2037  /* Open the manifest file. */
2038  SVN_ERR(svn_stream_open_readonly(&manifest_stream,
2039                    svn_fs_x__path_rev_packed(fs, rev, PATH_MANIFEST,
2040                                              scratch_pool),
2041                    scratch_pool, scratch_pool));
2042
2043  /* While we're here, let's just read the entire manifest file into an array,
2044     so we can cache the entire thing. */
2045  iterpool = svn_pool_create(scratch_pool);
2046  manifest = apr_array_make(scratch_pool, ffd->max_files_per_dir,
2047                            sizeof(apr_off_t));
2048  while (1)
2049    {
2050      svn_boolean_t eof;
2051      apr_int64_t val;
2052
2053      svn_pool_clear(iterpool);
2054      SVN_ERR(svn_fs_x__read_number_from_stream(&val, &eof, manifest_stream,
2055                                                iterpool));
2056      if (eof)
2057        break;
2058
2059      APR_ARRAY_PUSH(manifest, apr_off_t) = (apr_off_t)val;
2060    }
2061  svn_pool_destroy(iterpool);
2062
2063  *rev_offset = APR_ARRAY_IDX(manifest, rev % ffd->max_files_per_dir,
2064                              apr_off_t);
2065
2066  /* Close up shop and cache the array. */
2067  SVN_ERR(svn_stream_close(manifest_stream));
2068  return svn_cache__set(ffd->packed_offset_cache, &shard, manifest,
2069                        scratch_pool);
2070}
2071
2072/* In filesystem FS, pack the revision SHARD containing exactly
2073 * MAX_FILES_PER_DIR revisions from SHARD_PATH into the PACK_FILE_DIR,
2074 * using SCRATCH_POOL for temporary allocations.  Try to limit the amount of
2075 * temporary memory needed to MAX_MEM bytes.  CANCEL_FUNC and CANCEL_BATON
2076 * are what you think they are.
2077 *
2078 * If for some reason we detect a partial packing already performed, we
2079 * remove the pack file and start again.
2080 *
2081 * The actual packing will be done in a format-specific sub-function.
2082 */
2083static svn_error_t *
2084pack_rev_shard(svn_fs_t *fs,
2085               const char *pack_file_dir,
2086               const char *shard_path,
2087               apr_int64_t shard,
2088               int max_files_per_dir,
2089               apr_size_t max_mem,
2090               svn_cancel_func_t cancel_func,
2091               void *cancel_baton,
2092               apr_pool_t *scratch_pool)
2093{
2094  const char *pack_file_path;
2095  svn_revnum_t shard_rev = (svn_revnum_t) (shard * max_files_per_dir);
2096
2097  /* Some useful paths. */
2098  pack_file_path = svn_dirent_join(pack_file_dir, PATH_PACKED, scratch_pool);
2099
2100  /* Remove any existing pack file for this shard, since it is incomplete. */
2101  SVN_ERR(svn_io_remove_dir2(pack_file_dir, TRUE, cancel_func, cancel_baton,
2102                             scratch_pool));
2103
2104  /* Create the new directory and pack file. */
2105  SVN_ERR(svn_io_dir_make(pack_file_dir, APR_OS_DEFAULT, scratch_pool));
2106
2107  /* Index information files */
2108  SVN_ERR(pack_log_addressed(fs, pack_file_dir, shard_path, shard_rev,
2109                              max_mem, cancel_func, cancel_baton,
2110                             scratch_pool));
2111
2112  SVN_ERR(svn_io_copy_perms(shard_path, pack_file_dir, scratch_pool));
2113  SVN_ERR(svn_io_set_file_read_only(pack_file_path, FALSE, scratch_pool));
2114
2115  return SVN_NO_ERROR;
2116}
2117
2118/* In the file system at FS_PATH, pack the SHARD in REVS_DIR and
2119 * REVPROPS_DIR containing exactly MAX_FILES_PER_DIR revisions, using
2120 * SCRATCH_POOL temporary for allocations.  REVPROPS_DIR will be NULL if
2121 * revprop packing is not supported.  COMPRESSION_LEVEL and MAX_PACK_SIZE
2122 * will be ignored in that case.
2123 *
2124 * CANCEL_FUNC and CANCEL_BATON are what you think they are; similarly
2125 * NOTIFY_FUNC and NOTIFY_BATON.
2126 *
2127 * If for some reason we detect a partial packing already performed, we
2128 * remove the pack file and start again.
2129 */
2130static svn_error_t *
2131pack_shard(const char *revs_dir,
2132           const char *revsprops_dir,
2133           svn_fs_t *fs,
2134           apr_int64_t shard,
2135           int max_files_per_dir,
2136           apr_off_t max_pack_size,
2137           int compression_level,
2138           svn_fs_pack_notify_t notify_func,
2139           void *notify_baton,
2140           svn_cancel_func_t cancel_func,
2141           void *cancel_baton,
2142           apr_pool_t *scratch_pool)
2143{
2144  svn_fs_x__data_t *ffd = fs->fsap_data;
2145  const char *rev_shard_path, *rev_pack_file_dir;
2146  const char *revprops_shard_path, *revprops_pack_file_dir;
2147
2148  /* Notify caller we're starting to pack this shard. */
2149  if (notify_func)
2150    SVN_ERR(notify_func(notify_baton, shard, svn_fs_pack_notify_start,
2151                        scratch_pool));
2152
2153  /* Some useful paths. */
2154  rev_pack_file_dir = svn_dirent_join(revs_dir,
2155                  apr_psprintf(scratch_pool,
2156                               "%" APR_INT64_T_FMT PATH_EXT_PACKED_SHARD,
2157                               shard),
2158                  scratch_pool);
2159  rev_shard_path = svn_dirent_join(revs_dir,
2160                      apr_psprintf(scratch_pool, "%" APR_INT64_T_FMT, shard),
2161                      scratch_pool);
2162
2163  /* pack the revision content */
2164  SVN_ERR(pack_rev_shard(fs, rev_pack_file_dir, rev_shard_path,
2165                         shard, max_files_per_dir, DEFAULT_MAX_MEM,
2166                         cancel_func, cancel_baton, scratch_pool));
2167
2168  /* if enabled, pack the revprops in an equivalent way */
2169  if (revsprops_dir)
2170    {
2171      revprops_pack_file_dir = svn_dirent_join(revsprops_dir,
2172                   apr_psprintf(scratch_pool,
2173                                "%" APR_INT64_T_FMT PATH_EXT_PACKED_SHARD,
2174                                shard),
2175                   scratch_pool);
2176      revprops_shard_path = svn_dirent_join(revsprops_dir,
2177                   apr_psprintf(scratch_pool, "%" APR_INT64_T_FMT, shard),
2178                   scratch_pool);
2179
2180      SVN_ERR(svn_fs_x__pack_revprops_shard(revprops_pack_file_dir,
2181                                            revprops_shard_path,
2182                                            shard, max_files_per_dir,
2183                                            (int)(0.9 * max_pack_size),
2184                                            compression_level,
2185                                            cancel_func, cancel_baton,
2186                                            scratch_pool));
2187    }
2188
2189  /* Update the min-unpacked-rev file to reflect our newly packed shard. */
2190  SVN_ERR(svn_fs_x__write_min_unpacked_rev(fs,
2191                          (svn_revnum_t)((shard + 1) * max_files_per_dir),
2192                          scratch_pool));
2193  ffd->min_unpacked_rev = (svn_revnum_t)((shard + 1) * max_files_per_dir);
2194
2195  /* Finally, remove the existing shard directories.
2196   * For revprops, clean up older obsolete shards as well as they might
2197   * have been left over from an interrupted FS upgrade. */
2198  SVN_ERR(svn_io_remove_dir2(rev_shard_path, TRUE,
2199                             cancel_func, cancel_baton, scratch_pool));
2200  if (revsprops_dir)
2201    {
2202      svn_node_kind_t kind = svn_node_dir;
2203      apr_int64_t to_cleanup = shard;
2204      do
2205        {
2206          SVN_ERR(svn_fs_x__delete_revprops_shard(revprops_shard_path,
2207                                                  to_cleanup,
2208                                                  max_files_per_dir,
2209                                                  cancel_func, cancel_baton,
2210                                                  scratch_pool));
2211
2212          /* If the previous shard exists, clean it up as well.
2213             Don't try to clean up shard 0 as it we can't tell quickly
2214             whether it actually needs cleaning up. */
2215          revprops_shard_path = svn_dirent_join(revsprops_dir,
2216                                                apr_psprintf(scratch_pool,
2217                                                          "%" APR_INT64_T_FMT,
2218                                                          --to_cleanup),
2219                                                scratch_pool);
2220          SVN_ERR(svn_io_check_path(revprops_shard_path, &kind, scratch_pool));
2221        }
2222      while (kind == svn_node_dir && to_cleanup > 0);
2223    }
2224
2225  /* Notify caller we're starting to pack this shard. */
2226  if (notify_func)
2227    SVN_ERR(notify_func(notify_baton, shard, svn_fs_pack_notify_end,
2228                        scratch_pool));
2229
2230  return SVN_NO_ERROR;
2231}
2232
2233typedef struct pack_baton_t
2234{
2235  svn_fs_t *fs;
2236  svn_fs_pack_notify_t notify_func;
2237  void *notify_baton;
2238  svn_cancel_func_t cancel_func;
2239  void *cancel_baton;
2240} pack_baton_t;
2241
2242
2243/* The work-horse for svn_fs_x__pack, called with the FS write lock.
2244   This implements the svn_fs_x__with_write_lock() 'body' callback
2245   type.  BATON is a 'pack_baton_t *'.
2246
2247   WARNING: if you add a call to this function, please note:
2248     The code currently assumes that any piece of code running with
2249     the write-lock set can rely on the ffd->min_unpacked_rev and
2250     ffd->min_unpacked_revprop caches to be up-to-date (and, by
2251     extension, on not having to use a retry when calling
2252     svn_fs_x__path_rev_absolute() and friends).  If you add a call
2253     to this function, consider whether you have to call
2254     update_min_unpacked_rev().
2255     See this thread: http://thread.gmane.org/1291206765.3782.3309.camel@edith
2256 */
2257static svn_error_t *
2258pack_body(void *baton,
2259          apr_pool_t *scratch_pool)
2260{
2261  pack_baton_t *pb = baton;
2262  svn_fs_x__data_t *ffd = pb->fs->fsap_data;
2263  apr_int64_t completed_shards;
2264  apr_int64_t i;
2265  svn_revnum_t youngest;
2266  apr_pool_t *iterpool;
2267  const char *rev_data_path;
2268  const char *revprops_data_path = NULL;
2269
2270  /* If we aren't using sharding, we can't do any packing, so quit. */
2271  SVN_ERR(svn_fs_x__read_min_unpacked_rev(&ffd->min_unpacked_rev, pb->fs,
2272                                          scratch_pool));
2273
2274  SVN_ERR(svn_fs_x__youngest_rev(&youngest, pb->fs, scratch_pool));
2275  completed_shards = (youngest + 1) / ffd->max_files_per_dir;
2276
2277  /* See if we've already completed all possible shards thus far. */
2278  if (ffd->min_unpacked_rev == (completed_shards * ffd->max_files_per_dir))
2279    return SVN_NO_ERROR;
2280
2281  rev_data_path = svn_dirent_join(pb->fs->path, PATH_REVS_DIR, scratch_pool);
2282  revprops_data_path = svn_dirent_join(pb->fs->path, PATH_REVPROPS_DIR,
2283                                        scratch_pool);
2284
2285  iterpool = svn_pool_create(scratch_pool);
2286  for (i = ffd->min_unpacked_rev / ffd->max_files_per_dir;
2287       i < completed_shards;
2288       i++)
2289    {
2290      svn_pool_clear(iterpool);
2291
2292      if (pb->cancel_func)
2293        SVN_ERR(pb->cancel_func(pb->cancel_baton));
2294
2295      SVN_ERR(pack_shard(rev_data_path, revprops_data_path,
2296                         pb->fs, i, ffd->max_files_per_dir,
2297                         ffd->revprop_pack_size,
2298                         ffd->compress_packed_revprops
2299                           ? SVN__COMPRESSION_ZLIB_DEFAULT
2300                           : SVN__COMPRESSION_NONE,
2301                         pb->notify_func, pb->notify_baton,
2302                         pb->cancel_func, pb->cancel_baton, iterpool));
2303    }
2304
2305  svn_pool_destroy(iterpool);
2306  return SVN_NO_ERROR;
2307}
2308
2309svn_error_t *
2310svn_fs_x__pack(svn_fs_t *fs,
2311               svn_fs_pack_notify_t notify_func,
2312               void *notify_baton,
2313               svn_cancel_func_t cancel_func,
2314               void *cancel_baton,
2315               apr_pool_t *scratch_pool)
2316{
2317  pack_baton_t pb = { 0 };
2318  pb.fs = fs;
2319  pb.notify_func = notify_func;
2320  pb.notify_baton = notify_baton;
2321  pb.cancel_func = cancel_func;
2322  pb.cancel_baton = cancel_baton;
2323  return svn_fs_x__with_pack_lock(fs, pack_body, &pb, scratch_pool);
2324}
2325