1/* pack.c --- FSX shard packing functionality
2 *
3 * ====================================================================
4 *    Licensed to the Apache Software Foundation (ASF) under one
5 *    or more contributor license agreements.  See the NOTICE file
6 *    distributed with this work for additional information
7 *    regarding copyright ownership.  The ASF licenses this file
8 *    to you under the Apache License, Version 2.0 (the
9 *    "License"); you may not use this file except in compliance
10 *    with the License.  You may obtain a copy of the License at
11 *
12 *      http://www.apache.org/licenses/LICENSE-2.0
13 *
14 *    Unless required by applicable law or agreed to in writing,
15 *    software distributed under the License is distributed on an
16 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 *    KIND, either express or implied.  See the License for the
18 *    specific language governing permissions and limitations
19 *    under the License.
20 * ====================================================================
21 */
22#include <assert.h>
23
24#include "svn_pools.h"
25#include "svn_dirent_uri.h"
26#include "svn_sorts.h"
27#include "private/svn_sorts_private.h"
28#include "private/svn_subr_private.h"
29#include "private/svn_string_private.h"
30#include "private/svn_temp_serializer.h"
31
32#include "fs_x.h"
33#include "pack.h"
34#include "util.h"
35#include "revprops.h"
36#include "transaction.h"
37#include "index.h"
38#include "low_level.h"
39#include "cached_data.h"
40#include "changes.h"
41#include "noderevs.h"
42#include "reps.h"
43
44#include "../libsvn_fs/fs-loader.h"
45
46#include "svn_private_config.h"
47#include "temp_serializer.h"
48
49/* Packing logic:
50 *
51 * We pack files on a pack file basis (e.g. 1000 revs) without changing
52 * existing pack files nor the revision files outside the range to pack.
53 *
54 * First, we will scan the revision file indexes to determine the number
55 * of items to "place" (i.e. determine their optimal position within the
56 * future pack file).  For each item, we will need a constant amount of
57 * memory to track it.  A MAX_MEM parameter sets a limit to the number of
58 * items we may place in one go.  That means, we may not be able to add
59 * all revisions at once.  Instead, we will run the placement for a subset
60 * of revisions at a time.  The very unlikely worst case will simply append
61 * all revision data with just a little reshuffling inside each revision.
62 *
63 * In a second step, we read all revisions in the selected range, build
64 * the item tracking information and copy the items themselves from the
65 * revision files to temporary files.  The latter serve as buckets for a
66 * very coarse bucket presort:  Separate change lists, file properties,
67 * directory properties and noderevs + representations from one another.
68 *
69 * The third step will determine an optimized placement for the items in
70 * each of the 4 buckets separately.  The first three will simply order
71 * their items by revision, starting with the newest once.  Placing rep
72 * and noderev items is a more elaborate process documented in the code.
73 *
74 * In short, we store items in the following order:
75 * - changed paths lists
76 * - node property
77 * - directory properties
78 * - directory representations corresponding noderevs, lexical path order
79 *   with special treatment of "trunk" and "branches"
80 * - same for file representations
81 *
82 * Step 4 copies the items from the temporary buckets into the final
83 * pack file and writes the temporary index files.
84 *
85 * Finally, after the last range of revisions, create the final indexes.
86 */
87
88/* Maximum amount of memory we allocate for placement information during
89 * the pack process.
90 */
91#define DEFAULT_MAX_MEM (64 * 1024 * 1024)
92
93/* Data structure describing a node change at PATH, REVISION.
94 * We will sort these instances by PATH and NODE_ID such that we can combine
95 * similar nodes in the same reps container and store containers in path
96 * major order.
97 */
98typedef struct path_order_t
99{
100  /* changed path */
101  svn_prefix_string__t *path;
102
103  /* node ID for this PATH in REVISION */
104  svn_fs_x__id_t node_id;
105
106  /* when this change happened */
107  svn_revnum_t revision;
108
109  /* length of the expanded representation content */
110  apr_int64_t expanded_size;
111
112  /* item ID of the noderev linked to the change. May be (0, 0). */
113  svn_fs_x__id_t noderev_id;
114
115  /* item ID of the representation containing the new data. May be (0, 0). */
116  svn_fs_x__id_t rep_id;
117} path_order_t;
118
119/* Represents a reference from item FROM to item TO.  FROM may be a noderev
120 * or rep_id while TO is (currently) always a representation.  We will sort
121 * them by TO which allows us to collect all dependent items.
122 */
123typedef struct reference_t
124{
125  svn_fs_x__id_t to;
126  svn_fs_x__id_t from;
127} reference_t;
128
129/* This structure keeps track of all the temporary data and status that
130 * needs to be kept around during the creation of one pack file.  After
131 * each revision range (in case we can't process all revs at once due to
132 * memory restrictions), parts of the data will get re-initialized.
133 */
134typedef struct pack_context_t
135{
136  /* file system that we operate on */
137  svn_fs_t *fs;
138
139  /* cancel function to invoke at regular intervals. May be NULL */
140  svn_cancel_func_t cancel_func;
141
142  /* baton to pass to CANCEL_FUNC */
143  void *cancel_baton;
144
145  /* first revision in the shard (and future pack file) */
146  svn_revnum_t shard_rev;
147
148  /* first revision in the range to process (>= SHARD_REV) */
149  svn_revnum_t start_rev;
150
151  /* first revision after the range to process (<= SHARD_END_REV) */
152  svn_revnum_t end_rev;
153
154  /* first revision after the current shard */
155  svn_revnum_t shard_end_rev;
156
157  /* log-to-phys proto index for the whole pack file */
158  apr_file_t *proto_l2p_index;
159
160  /* phys-to-log proto index for the whole pack file */
161  apr_file_t *proto_p2l_index;
162
163  /* full shard directory path (containing the unpacked revisions) */
164  const char *shard_dir;
165
166  /* full packed shard directory path (containing the pack file + indexes) */
167  const char *pack_file_dir;
168
169  /* full pack file path (including PACK_FILE_DIR) */
170  const char *pack_file_path;
171
172  /* current write position (i.e. file length) in the pack file */
173  apr_off_t pack_offset;
174
175  /* the pack file to ultimately write all data to */
176  apr_file_t *pack_file;
177
178  /* array of svn_fs_x__p2l_entry_t *, all referring to change lists.
179   * Will be filled in phase 2 and be cleared after each revision range. */
180  apr_array_header_t *changes;
181
182  /* temp file receiving all change list items (referenced by CHANGES).
183   * Will be filled in phase 2 and be cleared after each revision range. */
184  apr_file_t *changes_file;
185
186  /* array of svn_fs_x__p2l_entry_t *, all referring to file properties.
187   * Will be filled in phase 2 and be cleared after each revision range. */
188  apr_array_header_t *file_props;
189
190  /* temp file receiving all file prop items (referenced by FILE_PROPS).
191   * Will be filled in phase 2 and be cleared after each revision range.*/
192  apr_file_t *file_props_file;
193
194  /* array of svn_fs_x__p2l_entry_t *, all referring to directory properties.
195   * Will be filled in phase 2 and be cleared after each revision range. */
196  apr_array_header_t *dir_props;
197
198  /* temp file receiving all directory prop items (referenced by DIR_PROPS).
199   * Will be filled in phase 2 and be cleared after each revision range.*/
200  apr_file_t *dir_props_file;
201
202  /* container for all PATH members in PATH_ORDER. */
203  svn_prefix_tree__t *paths;
204
205  /* array of path_order_t *.  Will be filled in phase 2 and be cleared
206   * after each revision range.  Sorted by PATH, NODE_ID. */
207  apr_array_header_t *path_order;
208
209  /* array of reference_t* linking representations to their delta bases.
210   * Will be filled in phase 2 and be cleared after each revision range.
211   * It will be sorted by the FROM members (for rep->base rep lookup). */
212  apr_array_header_t *references;
213
214  /* array of svn_fs_x__p2l_entry_t*.  Will be filled in phase 2 and be
215   * cleared after each revision range.  During phase 3, we will set items
216   * to NULL that we already processed. */
217  apr_array_header_t *reps;
218
219  /* array of int, marking for each revision, at which offset their items
220   * begin in REPS.  Will be filled in phase 2 and be cleared after
221   * each revision range. */
222  apr_array_header_t *rev_offsets;
223
224  /* temp file receiving all items referenced by REPS.
225   * Will be filled in phase 2 and be cleared after each revision range.*/
226  apr_file_t *reps_file;
227
228  /* pool used for temporary data structures that will be cleaned up when
229   * the next range of revisions is being processed */
230  apr_pool_t *info_pool;
231} pack_context_t;
232
233/* Create and initialize a new pack context for packing shard SHARD_REV in
234 * SHARD_DIR into PACK_FILE_DIR within filesystem FS.  Allocate it in POOL
235 * and return the structure in *CONTEXT.
236 *
237 * Limit the number of items being copied per iteration to MAX_ITEMS.
238 * Set CANCEL_FUNC and CANCEL_BATON as well.
239 */
240static svn_error_t *
241initialize_pack_context(pack_context_t *context,
242                        svn_fs_t *fs,
243                        const char *pack_file_dir,
244                        const char *shard_dir,
245                        svn_revnum_t shard_rev,
246                        int max_items,
247                        svn_fs_x__batch_fsync_t *batch,
248                        svn_cancel_func_t cancel_func,
249                        void *cancel_baton,
250                        apr_pool_t *pool)
251{
252  svn_fs_x__data_t *ffd = fs->fsap_data;
253  const char *temp_dir;
254  int max_revs = MIN(ffd->max_files_per_dir, max_items);
255
256  SVN_ERR_ASSERT(shard_rev % ffd->max_files_per_dir == 0);
257
258  /* where we will place our various temp files */
259  SVN_ERR(svn_io_temp_dir(&temp_dir, pool));
260
261  /* store parameters */
262  context->fs = fs;
263  context->cancel_func = cancel_func;
264  context->cancel_baton = cancel_baton;
265
266  context->shard_rev = shard_rev;
267  context->start_rev = shard_rev;
268  context->end_rev = shard_rev;
269  context->shard_end_rev = shard_rev + ffd->max_files_per_dir;
270
271  /* Create the new directory and pack file. */
272  context->shard_dir = shard_dir;
273  context->pack_file_dir = pack_file_dir;
274  context->pack_file_path
275    = svn_dirent_join(pack_file_dir, PATH_PACKED, pool);
276
277  SVN_ERR(svn_fs_x__batch_fsync_open_file(&context->pack_file, batch,
278                                          context->pack_file_path, pool));
279
280  /* Proto index files */
281  SVN_ERR(svn_fs_x__l2p_proto_index_open(
282             &context->proto_l2p_index,
283             svn_dirent_join(pack_file_dir,
284                             PATH_INDEX PATH_EXT_L2P_INDEX,
285                             pool),
286             pool));
287  SVN_ERR(svn_fs_x__p2l_proto_index_open(
288             &context->proto_p2l_index,
289             svn_dirent_join(pack_file_dir,
290                             PATH_INDEX PATH_EXT_P2L_INDEX,
291                             pool),
292             pool));
293
294  /* item buckets: one item info array and one temp file per bucket */
295  context->changes = apr_array_make(pool, max_items,
296                                    sizeof(svn_fs_x__p2l_entry_t *));
297  SVN_ERR(svn_io_open_unique_file3(&context->changes_file, NULL, temp_dir,
298                                   svn_io_file_del_on_close, pool, pool));
299  context->file_props = apr_array_make(pool, max_items,
300                                       sizeof(svn_fs_x__p2l_entry_t *));
301  SVN_ERR(svn_io_open_unique_file3(&context->file_props_file, NULL, temp_dir,
302                                   svn_io_file_del_on_close, pool, pool));
303  context->dir_props = apr_array_make(pool, max_items,
304                                      sizeof(svn_fs_x__p2l_entry_t *));
305  SVN_ERR(svn_io_open_unique_file3(&context->dir_props_file, NULL, temp_dir,
306                                   svn_io_file_del_on_close, pool, pool));
307
308  /* noderev and representation item bucket */
309  context->rev_offsets = apr_array_make(pool, max_revs, sizeof(int));
310  context->path_order = apr_array_make(pool, max_items,
311                                       sizeof(path_order_t *));
312  context->references = apr_array_make(pool, max_items,
313                                       sizeof(reference_t *));
314  context->reps = apr_array_make(pool, max_items,
315                                 sizeof(svn_fs_x__p2l_entry_t *));
316  SVN_ERR(svn_io_open_unique_file3(&context->reps_file, NULL, temp_dir,
317                                   svn_io_file_del_on_close, pool, pool));
318
319  /* the pool used for temp structures */
320  context->info_pool = svn_pool_create(pool);
321  context->paths = svn_prefix_tree__create(context->info_pool);
322
323  return SVN_NO_ERROR;
324}
325
326/* Clean up / free all revision range specific data and files in CONTEXT.
327 * Use SCRATCH_POOL for temporary allocations.
328 */
329static svn_error_t *
330reset_pack_context(pack_context_t *context,
331                   apr_pool_t *scratch_pool)
332{
333  apr_array_clear(context->changes);
334  SVN_ERR(svn_io_file_trunc(context->changes_file, 0, scratch_pool));
335  apr_array_clear(context->file_props);
336  SVN_ERR(svn_io_file_trunc(context->file_props_file, 0, scratch_pool));
337  apr_array_clear(context->dir_props);
338  SVN_ERR(svn_io_file_trunc(context->dir_props_file, 0, scratch_pool));
339
340  apr_array_clear(context->rev_offsets);
341  apr_array_clear(context->path_order);
342  apr_array_clear(context->references);
343  apr_array_clear(context->reps);
344  SVN_ERR(svn_io_file_trunc(context->reps_file, 0, scratch_pool));
345
346  svn_pool_clear(context->info_pool);
347  context->paths = svn_prefix_tree__create(context->info_pool);
348
349  return SVN_NO_ERROR;
350}
351
352/* Call this after the last revision range.  It will finalize all index files
353 * for CONTEXT and close any open files.
354 * Use SCRATCH_POOL for temporary allocations.
355 */
356static svn_error_t *
357close_pack_context(pack_context_t *context,
358                   apr_pool_t *scratch_pool)
359{
360  const char *proto_l2p_index_path;
361  const char *proto_p2l_index_path;
362
363  /* need the file names for the actual index creation call further down */
364  SVN_ERR(svn_io_file_name_get(&proto_l2p_index_path,
365                               context->proto_l2p_index, scratch_pool));
366  SVN_ERR(svn_io_file_name_get(&proto_p2l_index_path,
367                               context->proto_p2l_index, scratch_pool));
368
369  /* finalize proto index files */
370  SVN_ERR(svn_io_file_close(context->proto_l2p_index, scratch_pool));
371  SVN_ERR(svn_io_file_close(context->proto_p2l_index, scratch_pool));
372
373  /* Append the actual index data to the pack file. */
374  SVN_ERR(svn_fs_x__add_index_data(context->fs, context->pack_file,
375                                    proto_l2p_index_path,
376                                    proto_p2l_index_path,
377                                    context->shard_rev,
378                                    scratch_pool));
379
380  /* remove proto index files */
381  SVN_ERR(svn_io_remove_file2(proto_l2p_index_path, FALSE, scratch_pool));
382  SVN_ERR(svn_io_remove_file2(proto_p2l_index_path, FALSE, scratch_pool));
383
384  return SVN_NO_ERROR;
385}
386
387/* Efficiently copy SIZE bytes from SOURCE to DEST.  Invoke the CANCEL_FUNC
388 * from CONTEXT at regular intervals.
389 * Use SCRATCH_POOL for temporary allocations.
390 */
391static svn_error_t *
392copy_file_data(pack_context_t *context,
393               apr_file_t *dest,
394               apr_file_t *source,
395               svn_filesize_t size,
396               apr_pool_t *scratch_pool)
397{
398  /* most non-representation items will be small.  Minimize the buffer
399   * and infrastructure overhead in that case. */
400  enum { STACK_BUFFER_SIZE = 1024 };
401
402  if (size < STACK_BUFFER_SIZE)
403    {
404      /* copy small data using a fixed-size buffer on stack */
405      char buffer[STACK_BUFFER_SIZE];
406      SVN_ERR(svn_io_file_read_full2(source, buffer, (apr_size_t)size,
407                                     NULL, NULL, scratch_pool));
408      SVN_ERR(svn_io_file_write_full(dest, buffer, (apr_size_t)size,
409                                     NULL, scratch_pool));
410    }
411  else
412    {
413      /* use streaming copies for larger data blocks.  That may require
414       * the allocation of larger buffers and we should make sure that
415       * this extra memory is released asap. */
416      svn_fs_x__data_t *ffd = context->fs->fsap_data;
417      apr_pool_t *copypool = svn_pool_create(scratch_pool);
418      char *buffer = apr_palloc(copypool, ffd->block_size);
419
420      while (size)
421        {
422          apr_size_t to_copy = (apr_size_t)(MIN(size, ffd->block_size));
423          if (context->cancel_func)
424            SVN_ERR(context->cancel_func(context->cancel_baton));
425
426          SVN_ERR(svn_io_file_read_full2(source, buffer, to_copy,
427                                         NULL, NULL, scratch_pool));
428          SVN_ERR(svn_io_file_write_full(dest, buffer, to_copy,
429                                         NULL, scratch_pool));
430
431          size -= to_copy;
432        }
433
434      svn_pool_destroy(copypool);
435    }
436
437  return SVN_NO_ERROR;
438}
439
440/* Writes SIZE bytes, all 0, to DEST.
441 * Use SCRATCH_POOL for temporary allocations.
442 */
443static svn_error_t *
444write_null_bytes(apr_file_t *dest,
445                 apr_off_t size,
446                 apr_pool_t *scratch_pool)
447{
448  /* Have a collection of high-quality, easy to access NUL bytes handy. */
449  enum { BUFFER_SIZE = 1024 };
450  static const char buffer[BUFFER_SIZE] = { 0 };
451
452  /* copy SIZE of them into the file's buffer */
453  while (size)
454    {
455      apr_size_t to_write = MIN(size, BUFFER_SIZE);
456      SVN_ERR(svn_io_file_write_full(dest, buffer, to_write, NULL,
457                                     scratch_pool));
458      size -= to_write;
459    }
460
461  return SVN_NO_ERROR;
462}
463
464/* Copy the "simple" item (changed paths list or property representation)
465 * from the current position in REV_FILE to TEMP_FILE using CONTEXT.  Add
466 * a copy of ENTRY to ENTRIES but with an updated offset value that points
467 * to the copy destination in TEMP_FILE.
468 * Use SCRATCH_POOL for temporary allocations.
469 */
470static svn_error_t *
471copy_item_to_temp(pack_context_t *context,
472                  apr_array_header_t *entries,
473                  apr_file_t *temp_file,
474                  svn_fs_x__revision_file_t *rev_file,
475                  svn_fs_x__p2l_entry_t *entry,
476                  apr_pool_t *scratch_pool)
477{
478  apr_file_t *file;
479  svn_fs_x__p2l_entry_t *new_entry
480    = svn_fs_x__p2l_entry_dup(entry, context->info_pool);
481
482  SVN_ERR(svn_io_file_get_offset(&new_entry->offset, temp_file,
483                                 scratch_pool));
484  APR_ARRAY_PUSH(entries, svn_fs_x__p2l_entry_t *) = new_entry;
485
486  SVN_ERR(svn_fs_x__rev_file_get(&file, rev_file));
487  SVN_ERR(copy_file_data(context, temp_file, file, entry->size,
488                         scratch_pool));
489
490  return SVN_NO_ERROR;
491}
492
493/* Return the offset within CONTEXT->REPS that corresponds to item
494 * ITEM_INDEX in  REVISION.
495 */
496static int
497get_item_array_index(pack_context_t *context,
498                     svn_revnum_t revision,
499                     apr_int64_t item_index)
500{
501  assert(revision >= context->start_rev);
502  return (int)item_index + APR_ARRAY_IDX(context->rev_offsets,
503                                         revision - context->start_rev,
504                                         int);
505}
506
507/* Write INFO to the correct position in CONTEXT->REPS.  The latter may
508 * need auto-expanding.  Overwriting an array element is not allowed.
509 */
510static void
511add_item_rep_mapping(pack_context_t *context,
512                     svn_fs_x__p2l_entry_t *entry)
513{
514  int idx;
515  assert(entry->item_count == 1);
516
517  /* index of INFO */
518  idx = get_item_array_index(context,
519                             entry->items[0].change_set,
520                             entry->items[0].number);
521
522  /* make sure the index exists in the array */
523  while (context->reps->nelts <= idx)
524    APR_ARRAY_PUSH(context->reps, void *) = NULL;
525
526  /* set the element.  If there is already an entry, there are probably
527   * two items claiming to be the same -> bail out */
528  assert(!APR_ARRAY_IDX(context->reps, idx, void *));
529  APR_ARRAY_IDX(context->reps, idx, void *) = entry;
530}
531
532/* Return the P2L entry from CONTEXT->REPS for the given ID.  If there is
533 * none (or not anymore), return NULL.  If RESET has been specified, set
534 * the array entry to NULL after returning the entry.
535 */
536static svn_fs_x__p2l_entry_t *
537get_item(pack_context_t *context,
538         const svn_fs_x__id_t *id,
539         svn_boolean_t reset)
540{
541  svn_fs_x__p2l_entry_t *result = NULL;
542  svn_revnum_t revision = svn_fs_x__get_revnum(id->change_set);
543  if (id->number && revision >= context->start_rev)
544    {
545      int idx = get_item_array_index(context, revision, id->number);
546      if (context->reps->nelts > idx)
547        {
548          result = APR_ARRAY_IDX(context->reps, idx, void *);
549          if (result && reset)
550            APR_ARRAY_IDX(context->reps, idx, void *) = NULL;
551        }
552    }
553
554  return result;
555}
556
557/* Copy representation item identified by ENTRY from the current position
558 * in REV_FILE into CONTEXT->REPS_FILE.  Add all tracking into needed by
559 * our placement algorithm to CONTEXT.
560 * Use SCRATCH_POOL for temporary allocations.
561 */
562static svn_error_t *
563copy_rep_to_temp(pack_context_t *context,
564                 svn_fs_x__revision_file_t *rev_file,
565                 svn_fs_x__p2l_entry_t *entry,
566                 apr_pool_t *scratch_pool)
567{
568  svn_fs_x__rep_header_t *rep_header;
569  svn_stream_t *stream;
570  apr_file_t *file;
571  apr_off_t source_offset = entry->offset;
572
573  /* create a copy of ENTRY, make it point to the copy destination and
574   * store it in CONTEXT */
575  entry = svn_fs_x__p2l_entry_dup(entry, context->info_pool);
576  SVN_ERR(svn_io_file_get_offset(&entry->offset, context->reps_file,
577                                 scratch_pool));
578  add_item_rep_mapping(context, entry);
579
580  /* read & parse the representation header */
581  SVN_ERR(svn_fs_x__rev_file_stream(&stream, rev_file));
582  SVN_ERR(svn_fs_x__read_rep_header(&rep_header, stream,
583                                    scratch_pool, scratch_pool));
584
585  /* if the representation is a delta against some other rep, link the two */
586  if (   rep_header->type == svn_fs_x__rep_delta
587      && rep_header->base_revision >= context->start_rev)
588    {
589      reference_t *reference = apr_pcalloc(context->info_pool,
590                                           sizeof(*reference));
591      reference->from = entry->items[0];
592      reference->to.change_set
593        = svn_fs_x__change_set_by_rev(rep_header->base_revision);
594      reference->to.number = rep_header->base_item_index;
595      APR_ARRAY_PUSH(context->references, reference_t *) = reference;
596    }
597
598  /* copy the whole rep (including header!) to our temp file */
599  SVN_ERR(svn_fs_x__rev_file_seek(rev_file, NULL, source_offset));
600  SVN_ERR(svn_fs_x__rev_file_get(&file, rev_file));
601  SVN_ERR(copy_file_data(context, context->reps_file, file, entry->size,
602                         scratch_pool));
603
604  return SVN_NO_ERROR;
605}
606
607/* Directories first, dirs / files sorted by name in reverse lexical order.
608 * This maximizes the chance of two items being located close to one another
609 * in *all* pack files independent of their change order.  It also groups
610 * multi-project repos nicely according to their sub-projects.  The reverse
611 * order aspect gives "trunk" preference over "tags" and "branches", so
612 * trunk-related items are more likely to be contiguous.
613 */
614static int
615compare_dir_entries(const svn_sort__item_t *a,
616                    const svn_sort__item_t *b)
617{
618  const svn_fs_dirent_t *lhs = (const svn_fs_dirent_t *) a->value;
619  const svn_fs_dirent_t *rhs = (const svn_fs_dirent_t *) b->value;
620
621  return strcmp(lhs->name, rhs->name);
622}
623
624apr_array_header_t *
625svn_fs_x__order_dir_entries(svn_fs_t *fs,
626                            apr_hash_t *directory,
627                            apr_pool_t *result_pool,
628                            apr_pool_t *scratch_pool)
629{
630  apr_array_header_t *ordered
631    = svn_sort__hash(directory, compare_dir_entries, scratch_pool);
632
633  apr_array_header_t *result
634    = apr_array_make(result_pool, ordered->nelts, sizeof(svn_fs_dirent_t *));
635
636  int i;
637  for (i = 0; i < ordered->nelts; ++i)
638    APR_ARRAY_PUSH(result, svn_fs_dirent_t *)
639      = APR_ARRAY_IDX(ordered, i, svn_sort__item_t).value;
640
641  return result;
642}
643
644/* Return a duplicate of the ORIGINAL path and with special sub-strings
645 * (e.g. "trunk") modified in such a way that have a lower lexicographic
646 * value than any other "normal" file name.
647 */
648static const char *
649tweak_path_for_ordering(const char *original,
650                        apr_pool_t *pool)
651{
652  /* We may add further special cases as needed. */
653  enum {SPECIAL_COUNT = 2};
654  static const char *special[SPECIAL_COUNT] = {"trunk", "branch"};
655  char *pos;
656  char *path = apr_pstrdup(pool, original);
657  int i;
658
659  /* Replace the first char of any "special" sub-string we find by
660   * a control char, i.e. '\1' .. '\31'.  In the rare event that this
661   * would clash with existing paths, no data will be lost but merely
662   * the node ordering will be sub-optimal.
663   */
664  for (i = 0; i < SPECIAL_COUNT; ++i)
665    for (pos = strstr(path, special[i]);
666         pos;
667         pos = strstr(pos + 1, special[i]))
668      {
669        *pos = (char)(i + '\1');
670      }
671
672   return path;
673}
674
675/* Copy node revision item identified by ENTRY from the current position
676 * in REV_FILE into CONTEXT->REPS_FILE.  Add all tracking into needed by
677 * our placement algorithm to CONTEXT.
678 * Use SCRATCH_POOL for temporary allocations.
679 */
680static svn_error_t *
681copy_node_to_temp(pack_context_t *context,
682                  svn_fs_x__revision_file_t *rev_file,
683                  svn_fs_x__p2l_entry_t *entry,
684                  apr_pool_t *scratch_pool)
685{
686  path_order_t *path_order = apr_pcalloc(context->info_pool,
687                                         sizeof(*path_order));
688  svn_fs_x__noderev_t *noderev;
689  svn_stream_t *stream;
690  apr_file_t *file;
691  const char *sort_path;
692  apr_off_t source_offset = entry->offset;
693
694  /* read & parse noderev */
695  SVN_ERR(svn_fs_x__rev_file_stream(&stream, rev_file));
696  SVN_ERR(svn_fs_x__read_noderev(&noderev, stream, scratch_pool,
697                                 scratch_pool));
698
699  /* create a copy of ENTRY, make it point to the copy destination and
700   * store it in CONTEXT */
701  entry = svn_fs_x__p2l_entry_dup(entry, context->info_pool);
702  SVN_ERR(svn_io_file_get_offset(&entry->offset, context->reps_file,
703                                 scratch_pool));
704  add_item_rep_mapping(context, entry);
705
706  /* copy the noderev to our temp file */
707  SVN_ERR(svn_fs_x__rev_file_seek(rev_file, NULL, source_offset));
708  SVN_ERR(svn_fs_x__rev_file_get(&file, rev_file));
709  SVN_ERR(copy_file_data(context, context->reps_file, file, entry->size,
710                         scratch_pool));
711
712  /* if the node has a data representation, make that the node's "base".
713   * This will (often) cause the noderev to be placed right in front of
714   * its data representation. */
715
716  if (noderev->data_rep
717      &&    svn_fs_x__get_revnum(noderev->data_rep->id.change_set)
718         >= context->start_rev)
719    {
720      reference_t *reference = apr_pcalloc(context->info_pool,
721                                           sizeof(*reference));
722      reference->from = entry->items[0];
723      reference->to.change_set = noderev->data_rep->id.change_set;
724      reference->to.number = noderev->data_rep->id.number;
725      APR_ARRAY_PUSH(context->references, reference_t *) = reference;
726
727      path_order->rep_id = reference->to;
728      path_order->expanded_size = noderev->data_rep->expanded_size;
729    }
730
731  /* Sort path is the key used for ordering noderevs and associated reps.
732   * It will not be stored in the final pack file. */
733  sort_path = tweak_path_for_ordering(noderev->created_path, scratch_pool);
734  path_order->path = svn_prefix_string__create(context->paths, sort_path);
735  path_order->node_id = noderev->node_id;
736  path_order->revision = svn_fs_x__get_revnum(noderev->noderev_id.change_set);
737  path_order->noderev_id = noderev->noderev_id;
738  APR_ARRAY_PUSH(context->path_order, path_order_t *) = path_order;
739
740  return SVN_NO_ERROR;
741}
742
743/* implements compare_fn_t. Place LHS before RHS, if the latter is older.
744 */
745static int
746compare_p2l_info(const svn_fs_x__p2l_entry_t * const * lhs,
747                 const svn_fs_x__p2l_entry_t * const * rhs)
748{
749  assert(*lhs != *rhs);
750  if ((*lhs)->item_count == 0)
751    return (*lhs)->item_count == 0 ? 0 : -1;
752  if ((*lhs)->item_count == 0)
753    return 1;
754
755  if ((*lhs)->items[0].change_set == (*rhs)->items[0].change_set)
756    return (*lhs)->items[0].number > (*rhs)->items[0].number ? -1 : 1;
757
758  return (*lhs)->items[0].change_set > (*rhs)->items[0].change_set ? -1 : 1;
759}
760
761/* Sort svn_fs_x__p2l_entry_t * array ENTRIES by age.  Place the latest
762 * items first.
763 */
764static void
765sort_items(apr_array_header_t *entries)
766{
767  svn_sort__array(entries,
768                  (int (*)(const void *, const void *))compare_p2l_info);
769}
770
771/* implements compare_fn_t.  Sort descending by PATH, NODE_ID and REVISION.
772 */
773static int
774compare_path_order(const path_order_t * const * lhs_p,
775                   const path_order_t * const * rhs_p)
776{
777  const path_order_t * lhs = *lhs_p;
778  const path_order_t * rhs = *rhs_p;
779
780  /* lexicographic order on path and node (i.e. latest first) */
781  int diff = svn_prefix_string__compare(lhs->path, rhs->path);
782  if (diff)
783    return diff;
784
785  /* reverse order on node (i.e. latest first) */
786  diff = svn_fs_x__id_compare(&rhs->node_id, &lhs->node_id);
787  if (diff)
788    return diff;
789
790  /* reverse order on revision (i.e. latest first) */
791  if (lhs->revision != rhs->revision)
792    return lhs->revision < rhs->revision ? 1 : -1;
793
794  return 0;
795}
796
797/* implements compare_fn_t.  Sort ascending by TO, FROM.
798 */
799static int
800compare_references(const reference_t * const * lhs_p,
801                   const reference_t * const * rhs_p)
802{
803  const reference_t * lhs = *lhs_p;
804  const reference_t * rhs = *rhs_p;
805
806  int diff = svn_fs_x__id_compare(&lhs->to, &rhs->to);
807  return diff ? diff : svn_fs_x__id_compare(&lhs->from, &rhs->from);
808}
809
810/* Order the data collected in CONTEXT such that we can place them in the
811 * desired order.
812 */
813static void
814sort_reps(pack_context_t *context)
815{
816  svn_sort__array(context->path_order,
817                  (int (*)(const void *, const void *))compare_path_order);
818  svn_sort__array(context->references,
819                  (int (*)(const void *, const void *))compare_references);
820}
821
822/* Return the remaining unused bytes in the current block in CONTEXT's
823 * pack file.
824 */
825static apr_off_t
826get_block_left(pack_context_t *context)
827{
828  svn_fs_x__data_t *ffd = context->fs->fsap_data;
829  return ffd->block_size - (context->pack_offset % ffd->block_size);
830}
831
832/* To prevent items from overlapping a block boundary, we will usually
833 * put them into the next block and top up the old one with NUL bytes.
834 * Pad CONTEXT's pack file to the end of the current block, if that padding
835 * is short enough.  Use SCRATCH_POOL for temporary allocations.
836 */
837static svn_error_t *
838auto_pad_block(pack_context_t *context,
839               apr_pool_t *scratch_pool)
840{
841  svn_fs_x__data_t *ffd = context->fs->fsap_data;
842
843  /* This is the maximum number of bytes "wasted" that way per block.
844   * Larger items will cross the block boundaries. */
845  const apr_off_t max_padding = MAX(ffd->block_size / 50, 512);
846
847  /* Is wasted space small enough to align the current item to the next
848   * block? */
849  apr_off_t padding = get_block_left(context);
850
851  if (padding < max_padding)
852    {
853      /* Yes. To up with NUL bytes and don't forget to create
854       * an P2L index entry marking this section as unused. */
855      svn_fs_x__p2l_entry_t null_entry;
856
857      null_entry.offset = context->pack_offset;
858      null_entry.size = padding;
859      null_entry.type = SVN_FS_X__ITEM_TYPE_UNUSED;
860      null_entry.fnv1_checksum = 0;
861      null_entry.item_count = 0;
862      null_entry.items = NULL;
863
864      SVN_ERR(write_null_bytes(context->pack_file, padding, scratch_pool));
865      SVN_ERR(svn_fs_x__p2l_proto_index_add_entry
866                  (context->proto_p2l_index, &null_entry, scratch_pool));
867      context->pack_offset += padding;
868    }
869
870  return SVN_NO_ERROR;
871}
872
873/* Return the index of the first entry in CONTEXT->REFERENCES that
874 * references ITEM->ITEMS[0] if such entries exist.  All matching items
875 * will be consecutive.
876 */
877static int
878find_first_reference(pack_context_t *context,
879                     svn_fs_x__p2l_entry_t *item)
880{
881  int lower = 0;
882  int upper = context->references->nelts - 1;
883
884  while (lower <= upper)
885    {
886      int current = lower + (upper - lower) / 2;
887      reference_t *reference
888        = APR_ARRAY_IDX(context->references, current, reference_t *);
889
890      if (svn_fs_x__id_compare(&reference->to, item->items) < 0)
891        lower = current + 1;
892      else
893        upper = current - 1;
894    }
895
896  return lower;
897}
898
899/* Check whether entry number IDX in CONTEXT->REFERENCES references ITEM.
900 */
901static svn_boolean_t
902is_reference_match(pack_context_t *context,
903                   int idx,
904                   svn_fs_x__p2l_entry_t *item)
905{
906  reference_t *reference;
907  if (context->references->nelts <= idx)
908    return FALSE;
909
910  reference = APR_ARRAY_IDX(context->references, idx, reference_t *);
911  return svn_fs_x__id_eq(&reference->to, item->items);
912}
913
914/* Starting at IDX in CONTEXT->PATH_ORDER, select all representations and
915 * noderevs that should be placed into the same container, respectively.
916 * Append the path_order_t * elements encountered in SELECTED, the
917 * svn_fs_x__p2l_entry_t * of the representations that should be placed
918 * into the same reps container will be appended to REP_PARTS and the
919 * svn_fs_x__p2l_entry_t * of the noderevs referencing those reps will
920 * be appended to NODE_PARTS.
921 *
922 * Remove all returned items from the CONTEXT->REPS container and prevent
923 * them from being placed a second time later on.  That also means that the
924 * caller has to place all items returned.
925 */
926static svn_error_t *
927select_reps(pack_context_t *context,
928            int idx,
929            apr_array_header_t *selected,
930            apr_array_header_t *node_parts,
931            apr_array_header_t *rep_parts)
932{
933  apr_array_header_t *path_order = context->path_order;
934  path_order_t *start_path = APR_ARRAY_IDX(path_order, idx, path_order_t *);
935
936  svn_fs_x__p2l_entry_t *node_part;
937  svn_fs_x__p2l_entry_t *rep_part;
938  svn_fs_x__p2l_entry_t *depending;
939  int i, k;
940
941  /* collect all path_order records as well as rep and noderev items
942   * that occupy the same path with the same node. */
943  for (; idx < path_order->nelts; ++idx)
944    {
945      path_order_t *current_path
946        = APR_ARRAY_IDX(path_order, idx, path_order_t *);
947
948      if (!svn_fs_x__id_eq(&start_path->node_id, &current_path->node_id))
949        break;
950
951      APR_ARRAY_IDX(path_order, idx, path_order_t *) = NULL;
952      node_part = get_item(context, &current_path->noderev_id, TRUE);
953      rep_part = get_item(context, &current_path->rep_id, TRUE);
954
955      if (node_part && rep_part)
956        APR_ARRAY_PUSH(selected, path_order_t *) = current_path;
957
958      if (node_part)
959        APR_ARRAY_PUSH(node_parts, svn_fs_x__p2l_entry_t *) = node_part;
960      if (rep_part)
961        APR_ARRAY_PUSH(rep_parts, svn_fs_x__p2l_entry_t *) = rep_part;
962    }
963
964  /* collect depending reps and noderevs that reference any of the collected
965   * reps */
966  for (i = 0; i < rep_parts->nelts; ++i)
967    {
968      rep_part = APR_ARRAY_IDX(rep_parts, i, svn_fs_x__p2l_entry_t*);
969      for (k = find_first_reference(context, rep_part);
970           is_reference_match(context, k, rep_part);
971           ++k)
972        {
973          reference_t *reference
974            = APR_ARRAY_IDX(context->references, k, reference_t *);
975
976          depending = get_item(context, &reference->from, TRUE);
977          if (!depending)
978            continue;
979
980          if (depending->type == SVN_FS_X__ITEM_TYPE_NODEREV)
981            APR_ARRAY_PUSH(node_parts, svn_fs_x__p2l_entry_t *) = depending;
982          else
983            APR_ARRAY_PUSH(rep_parts, svn_fs_x__p2l_entry_t *) = depending;
984        }
985    }
986
987  return SVN_NO_ERROR;
988}
989
990/* Return TRUE, if all path_order_t * in SELECTED reference contents that is
991 * not longer than LIMIT.
992 */
993static svn_boolean_t
994reps_fit_into_containers(apr_array_header_t *selected,
995                         apr_uint64_t limit)
996{
997  int i;
998  for (i = 0; i < selected->nelts; ++i)
999    if (APR_ARRAY_IDX(selected, i, path_order_t *)->expanded_size > limit)
1000      return FALSE;
1001
1002  return TRUE;
1003}
1004
1005/* Write the *CONTAINER containing the noderevs described by the
1006 * svn_fs_x__p2l_entry_t * in ITEMS to the pack file on CONTEXT.
1007 * Append a P2L entry for the container to CONTAINER->REPS.
1008 * Afterwards, clear ITEMS and re-allocate *CONTAINER in CONTAINER_POOL
1009 * so the caller may fill them again.
1010 * Use SCRATCH_POOL for temporary allocations.
1011 */
1012static svn_error_t *
1013write_nodes_container(pack_context_t *context,
1014                      svn_fs_x__noderevs_t **container,
1015                      apr_array_header_t *items,
1016                      apr_pool_t *container_pool,
1017                      apr_pool_t *scratch_pool)
1018{
1019  int i;
1020  apr_off_t offset = 0;
1021  svn_fs_x__p2l_entry_t *container_entry;
1022  svn_stream_t *pack_stream;
1023
1024  if (items->nelts == 0)
1025    return SVN_NO_ERROR;
1026
1027  /* serialize container */
1028  container_entry = apr_palloc(context->info_pool, sizeof(*container_entry));
1029  pack_stream = svn_checksum__wrap_write_stream_fnv1a_32x4
1030                                (&container_entry->fnv1_checksum,
1031                                 svn_stream_from_aprfile2(context->pack_file,
1032                                                          TRUE, scratch_pool),
1033                                 scratch_pool);
1034  SVN_ERR(svn_fs_x__write_noderevs_container(pack_stream, *container,
1035                                             scratch_pool));
1036  SVN_ERR(svn_stream_close(pack_stream));
1037  SVN_ERR(svn_io_file_seek(context->pack_file, APR_CUR, &offset,
1038                           scratch_pool));
1039
1040  /* replace first noderev item in ENTRIES with the container
1041     and set all others to NULL */
1042  container_entry->offset = context->pack_offset;
1043  container_entry->size = offset - container_entry->offset;
1044  container_entry->type = SVN_FS_X__ITEM_TYPE_NODEREVS_CONT;
1045  container_entry->item_count = items->nelts;
1046  container_entry->items = apr_palloc(context->info_pool,
1047      sizeof(svn_fs_x__id_t) * container_entry->item_count);
1048
1049  for (i = 0; i < items->nelts; ++i)
1050    container_entry->items[i]
1051      = APR_ARRAY_IDX(items, i, svn_fs_x__p2l_entry_t *)->items[0];
1052
1053  context->pack_offset = offset;
1054  APR_ARRAY_PUSH(context->reps, svn_fs_x__p2l_entry_t *)
1055    = container_entry;
1056
1057  /* Write P2L index for copied items, i.e. the 1 container */
1058  SVN_ERR(svn_fs_x__p2l_proto_index_add_entry
1059            (context->proto_p2l_index, container_entry, scratch_pool));
1060
1061  svn_pool_clear(container_pool);
1062  *container = svn_fs_x__noderevs_create(16, container_pool);
1063  apr_array_clear(items);
1064
1065  return SVN_NO_ERROR;
1066}
1067
1068/* Read the noderevs given by the svn_fs_x__p2l_entry_t * in NODE_PARTS
1069 * from TEMP_FILE and add them to *CONTAINER and NODES_IN_CONTAINER.
1070 * Whenever the container grows bigger than the current block in CONTEXT,
1071 * write the data to disk and continue in the next block.
1072 *
1073 * Use CONTAINER_POOL to re-allocate the *CONTAINER as necessary and
1074 * SCRATCH_POOL to temporary allocations.
1075 */
1076static svn_error_t *
1077store_nodes(pack_context_t *context,
1078            apr_file_t *temp_file,
1079            apr_array_header_t *node_parts,
1080            svn_fs_x__noderevs_t **container,
1081            apr_array_header_t *nodes_in_container,
1082            apr_pool_t *container_pool,
1083            apr_pool_t *scratch_pool)
1084{
1085  int i;
1086
1087  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1088  svn_stream_t *stream
1089    = svn_stream_from_aprfile2(temp_file, TRUE, scratch_pool);
1090
1091  /* number of bytes in the current block not being spent on fixed-size
1092     items (i.e. those not put into the container). */
1093  apr_size_t capacity_left = get_block_left(context);
1094
1095  /* Estimated noderev container size */
1096  apr_size_t last_container_size = 0, container_size = 0;
1097
1098  /* Estimate extra capacity we will gain from container compression. */
1099  apr_size_t pack_savings = 0;
1100  for (i = 0; i < node_parts->nelts; ++i)
1101    {
1102      svn_fs_x__noderev_t *noderev;
1103      svn_fs_x__p2l_entry_t *entry
1104        = APR_ARRAY_IDX(node_parts, i, svn_fs_x__p2l_entry_t *);
1105
1106      /* if we reached the limit, check whether we saved some space
1107         through the container. */
1108      if (capacity_left + pack_savings < container_size + entry->size)
1109        container_size = svn_fs_x__noderevs_estimate_size(*container);
1110
1111      /* If necessary and the container is large enough, try harder
1112         by actually serializing the container and determine current
1113         savings due to compression. */
1114      if (   capacity_left + pack_savings < container_size + entry->size
1115          && container_size > last_container_size + 2000)
1116        {
1117          svn_stringbuf_t *serialized
1118            = svn_stringbuf_create_ensure(container_size, iterpool);
1119          svn_stream_t *temp_stream
1120            = svn_stream_from_stringbuf(serialized, iterpool);
1121
1122          SVN_ERR(svn_fs_x__write_noderevs_container(temp_stream, *container,
1123                                                     iterpool));
1124          SVN_ERR(svn_stream_close(temp_stream));
1125
1126          last_container_size = container_size;
1127          pack_savings = container_size - serialized->len;
1128        }
1129
1130      /* still doesn't fit? -> block is full. Flush */
1131      if (   capacity_left + pack_savings < container_size + entry->size
1132          && nodes_in_container->nelts < 2)
1133        {
1134          SVN_ERR(auto_pad_block(context, iterpool));
1135          capacity_left = get_block_left(context);
1136        }
1137
1138      /* still doesn't fit? -> block is full. Flush */
1139      if (capacity_left + pack_savings < container_size + entry->size)
1140        {
1141          SVN_ERR(write_nodes_container(context, container,
1142                                        nodes_in_container, container_pool,
1143                                        iterpool));
1144
1145          capacity_left = get_block_left(context);
1146          pack_savings = 0;
1147          container_size = 0;
1148        }
1149
1150      /* item will fit into the block. */
1151      SVN_ERR(svn_io_file_seek(temp_file, APR_SET, &entry->offset, iterpool));
1152      SVN_ERR(svn_fs_x__read_noderev(&noderev, stream, iterpool, iterpool));
1153      svn_fs_x__noderevs_add(*container, noderev);
1154
1155      container_size += entry->size;
1156      APR_ARRAY_PUSH(nodes_in_container, svn_fs_x__p2l_entry_t *) = entry;
1157
1158      svn_pool_clear(iterpool);
1159    }
1160
1161  svn_pool_destroy(iterpool);
1162
1163  return SVN_NO_ERROR;
1164}
1165
1166
1167/* Finalize CONTAINER and write it to CONTEXT's pack file.
1168 * Append an P2L entry containing the given SUB_ITEMS to NEW_ENTRIES.
1169 * Use SCRATCH_POOL for temporary allocations.
1170 */
1171static svn_error_t *
1172write_reps_container(pack_context_t *context,
1173                     svn_fs_x__reps_builder_t *container,
1174                     apr_array_header_t *sub_items,
1175                     apr_array_header_t *new_entries,
1176                     apr_pool_t *scratch_pool)
1177{
1178  apr_off_t offset = 0;
1179  svn_fs_x__p2l_entry_t container_entry;
1180
1181  svn_stream_t *pack_stream
1182    = svn_checksum__wrap_write_stream_fnv1a_32x4
1183                                (&container_entry.fnv1_checksum,
1184                                 svn_stream_from_aprfile2(context->pack_file,
1185                                                          TRUE, scratch_pool),
1186                                 scratch_pool);
1187
1188  SVN_ERR(svn_fs_x__write_reps_container(pack_stream, container,
1189                                         scratch_pool));
1190  SVN_ERR(svn_stream_close(pack_stream));
1191  SVN_ERR(svn_io_file_seek(context->pack_file, APR_CUR, &offset,
1192                           scratch_pool));
1193
1194  container_entry.offset = context->pack_offset;
1195  container_entry.size = offset - container_entry.offset;
1196  container_entry.type = SVN_FS_X__ITEM_TYPE_REPS_CONT;
1197  container_entry.item_count = sub_items->nelts;
1198  container_entry.items = (svn_fs_x__id_t *)sub_items->elts;
1199
1200  context->pack_offset = offset;
1201  APR_ARRAY_PUSH(new_entries, svn_fs_x__p2l_entry_t *)
1202    = svn_fs_x__p2l_entry_dup(&container_entry, context->info_pool);
1203
1204  SVN_ERR(svn_fs_x__p2l_proto_index_add_entry
1205            (context->proto_p2l_index, &container_entry, scratch_pool));
1206
1207  return SVN_NO_ERROR;
1208}
1209
1210/* Read the (property) representations identified by svn_fs_x__p2l_entry_t
1211 * elements in ENTRIES from TEMP_FILE, aggregate them and write them into
1212 * CONTEXT->PACK_FILE.  Use SCRATCH_POOL for temporary allocations.
1213 */
1214static svn_error_t *
1215write_reps_containers(pack_context_t *context,
1216                      apr_array_header_t *entries,
1217                      apr_file_t *temp_file,
1218                      apr_array_header_t *new_entries,
1219                      apr_pool_t *scratch_pool)
1220{
1221  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1222  apr_pool_t *container_pool = svn_pool_create(scratch_pool);
1223  int i;
1224
1225  apr_ssize_t block_left = get_block_left(context);
1226
1227  svn_fs_x__reps_builder_t *container
1228    = svn_fs_x__reps_builder_create(context->fs, container_pool);
1229  apr_array_header_t *sub_items
1230    = apr_array_make(scratch_pool, 64, sizeof(svn_fs_x__id_t));
1231  svn_fs_x__revision_file_t *file;
1232
1233  SVN_ERR(svn_fs_x__rev_file_wrap_temp(&file, context->fs, temp_file,
1234                                       scratch_pool));
1235
1236  /* copy all items in strict order */
1237  for (i = entries->nelts-1; i >= 0; --i)
1238    {
1239      svn_fs_x__representation_t representation = { 0 };
1240      svn_stringbuf_t *contents;
1241      svn_stream_t *stream;
1242      apr_size_t list_index;
1243      svn_fs_x__p2l_entry_t *entry
1244        = APR_ARRAY_IDX(entries, i, svn_fs_x__p2l_entry_t *);
1245
1246      if ((block_left < entry->size) && sub_items->nelts)
1247        {
1248          block_left = get_block_left(context)
1249                     - svn_fs_x__reps_estimate_size(container);
1250        }
1251
1252      if ((block_left < entry->size) && sub_items->nelts)
1253        {
1254          SVN_ERR(write_reps_container(context, container, sub_items,
1255                                       new_entries, iterpool));
1256
1257          apr_array_clear(sub_items);
1258          svn_pool_clear(container_pool);
1259          container = svn_fs_x__reps_builder_create(context->fs,
1260                                                    container_pool);
1261          block_left = get_block_left(context);
1262        }
1263
1264      /* still enough space in current block? */
1265      if (block_left < entry->size)
1266        {
1267          SVN_ERR(auto_pad_block(context, iterpool));
1268          block_left = get_block_left(context);
1269        }
1270
1271      assert(entry->item_count == 1);
1272      representation.id = entry->items[0];
1273
1274      /* select the change list in the source file, parse it and add it to
1275       * the container */
1276      SVN_ERR(svn_io_file_seek(temp_file, APR_SET, &entry->offset,
1277                               iterpool));
1278      SVN_ERR(svn_fs_x__get_representation_length(&representation.size,
1279                                             &representation.expanded_size,
1280                                             context->fs, file,
1281                                             entry, iterpool));
1282      SVN_ERR(svn_fs_x__get_contents(&stream, context->fs, &representation,
1283                                     FALSE, iterpool));
1284      contents = svn_stringbuf_create_ensure(representation.expanded_size,
1285                                             iterpool);
1286      contents->len = representation.expanded_size;
1287
1288      /* The representation is immutable.  Read it normally. */
1289      SVN_ERR(svn_stream_read_full(stream, contents->data, &contents->len));
1290      SVN_ERR(svn_stream_close(stream));
1291
1292      SVN_ERR(svn_fs_x__reps_add(&list_index, container,
1293                                 svn_stringbuf__morph_into_string(contents)));
1294      SVN_ERR_ASSERT(list_index == sub_items->nelts);
1295      block_left -= entry->size;
1296
1297      APR_ARRAY_PUSH(sub_items, svn_fs_x__id_t) = entry->items[0];
1298
1299      svn_pool_clear(iterpool);
1300    }
1301
1302  if (sub_items->nelts)
1303    SVN_ERR(write_reps_container(context, container, sub_items,
1304                                 new_entries, iterpool));
1305
1306  svn_pool_destroy(iterpool);
1307  svn_pool_destroy(container_pool);
1308
1309  return SVN_NO_ERROR;
1310}
1311
1312/* Return TRUE if the estimated size of the NODES_IN_CONTAINER plus the
1313 * representations given as svn_fs_x__p2l_entry_t * in ENTRIES may exceed
1314 * the space left in the current block.
1315 */
1316static svn_boolean_t
1317should_flush_nodes_container(pack_context_t *context,
1318                             svn_fs_x__noderevs_t *nodes_container,
1319                             apr_array_header_t *entries)
1320{
1321  apr_ssize_t block_left = get_block_left(context);
1322  apr_ssize_t rep_sum = 0;
1323  apr_ssize_t container_size
1324    = svn_fs_x__noderevs_estimate_size(nodes_container);
1325
1326  int i;
1327  for (i = 0; i < entries->nelts; ++i)
1328    {
1329      svn_fs_x__p2l_entry_t *entry
1330        = APR_ARRAY_IDX(entries, i, svn_fs_x__p2l_entry_t *);
1331      rep_sum += entry->size;
1332    }
1333
1334  return block_left < rep_sum + container_size;
1335}
1336
1337/* Read the contents of the first COUNT non-NULL, non-empty items in ITEMS
1338 * from TEMP_FILE and write them to CONTEXT->PACK_FILE.
1339 * Use SCRATCH_POOL for temporary allocations.
1340 */
1341static svn_error_t *
1342store_items(pack_context_t *context,
1343            apr_file_t *temp_file,
1344            apr_array_header_t *items,
1345            int count,
1346            apr_pool_t *scratch_pool)
1347{
1348  int i;
1349  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1350
1351  /* copy all items in strict order */
1352  for (i = 0; i < count; ++i)
1353    {
1354      svn_fs_x__p2l_entry_t *entry
1355        = APR_ARRAY_IDX(items, i, svn_fs_x__p2l_entry_t *);
1356      if (!entry
1357          || entry->type == SVN_FS_X__ITEM_TYPE_UNUSED
1358          || entry->item_count == 0)
1359        continue;
1360
1361      /* select the item in the source file and copy it into the target
1362       * pack file */
1363      SVN_ERR(svn_io_file_seek(temp_file, APR_SET, &entry->offset,
1364                               iterpool));
1365      SVN_ERR(copy_file_data(context, context->pack_file, temp_file,
1366                             entry->size, iterpool));
1367
1368      /* write index entry and update current position */
1369      entry->offset = context->pack_offset;
1370      context->pack_offset += entry->size;
1371
1372      SVN_ERR(svn_fs_x__p2l_proto_index_add_entry
1373                  (context->proto_p2l_index, entry, iterpool));
1374
1375      APR_ARRAY_PUSH(context->reps, svn_fs_x__p2l_entry_t *) = entry;
1376      svn_pool_clear(iterpool);
1377    }
1378
1379  svn_pool_destroy(iterpool);
1380
1381  return SVN_NO_ERROR;
1382}
1383
1384/* Copy (append) the items identified by svn_fs_x__p2l_entry_t * elements
1385 * in ENTRIES strictly in order from TEMP_FILE into CONTEXT->PACK_FILE.
1386 * Use SCRATCH_POOL for temporary allocations.
1387 */
1388static svn_error_t *
1389copy_reps_from_temp(pack_context_t *context,
1390                    apr_file_t *temp_file,
1391                    apr_pool_t *scratch_pool)
1392{
1393  svn_fs_x__data_t *ffd = context->fs->fsap_data;
1394
1395  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1396  apr_pool_t *container_pool = svn_pool_create(scratch_pool);
1397  apr_array_header_t *path_order = context->path_order;
1398  apr_array_header_t *reps = context->reps;
1399  apr_array_header_t *selected = apr_array_make(scratch_pool, 16,
1400                                                path_order->elt_size);
1401  apr_array_header_t *node_parts = apr_array_make(scratch_pool, 16,
1402                                                  reps->elt_size);
1403  apr_array_header_t *rep_parts = apr_array_make(scratch_pool, 16,
1404                                                 reps->elt_size);
1405  apr_array_header_t *nodes_in_container = apr_array_make(scratch_pool, 16,
1406                                                          reps->elt_size);
1407  int i, k;
1408  int initial_reps_count = reps->nelts;
1409
1410  /* 1 container for all noderevs in the current block.  We will try to
1411   * not write it to disk until the current block fills up, i.e. aim for
1412   * a single noderevs container per block. */
1413  svn_fs_x__noderevs_t *nodes_container
1414    = svn_fs_x__noderevs_create(16, container_pool);
1415
1416  /* copy items in path order. Create block-sized containers. */
1417  for (i = 0; i < path_order->nelts; ++i)
1418    {
1419      if (APR_ARRAY_IDX(path_order, i, path_order_t *) == NULL)
1420        continue;
1421
1422      /* Collect reps to combine and all noderevs referencing them */
1423      SVN_ERR(select_reps(context, i, selected, node_parts, rep_parts));
1424
1425      /* store the noderevs container in front of the reps */
1426      SVN_ERR(store_nodes(context, temp_file, node_parts, &nodes_container,
1427                          nodes_in_container, container_pool, iterpool));
1428
1429      /* actually flush the noderevs to disk if the reps container is likely
1430       * to fill the block, i.e. no further noderevs will be added to the
1431       * nodes container. */
1432      if (should_flush_nodes_container(context, nodes_container, node_parts))
1433        SVN_ERR(write_nodes_container(context, &nodes_container,
1434                                      nodes_in_container, container_pool,
1435                                      iterpool));
1436
1437      /* if all reps are short enough put them into one container.
1438       * Otherwise, just store all containers here. */
1439      if (reps_fit_into_containers(selected, 2 * ffd->block_size))
1440        SVN_ERR(write_reps_containers(context, rep_parts, temp_file,
1441                                      context->reps, iterpool));
1442      else
1443        SVN_ERR(store_items(context, temp_file, rep_parts, rep_parts->nelts,
1444                            iterpool));
1445
1446      /* processed all items */
1447      apr_array_clear(selected);
1448      apr_array_clear(node_parts);
1449      apr_array_clear(rep_parts);
1450
1451      svn_pool_clear(iterpool);
1452    }
1453
1454  /* flush noderevs container to disk */
1455  if (nodes_in_container->nelts)
1456    SVN_ERR(write_nodes_container(context, &nodes_container,
1457                                  nodes_in_container, container_pool,
1458                                  iterpool));
1459
1460  /* copy all items in strict order */
1461  SVN_ERR(store_items(context, temp_file, reps, initial_reps_count,
1462                      scratch_pool));
1463
1464  /* vaccum ENTRIES array: eliminate NULL entries */
1465  for (i = 0, k = 0; i < reps->nelts; ++i)
1466    {
1467      svn_fs_x__p2l_entry_t *entry
1468        = APR_ARRAY_IDX(reps, i, svn_fs_x__p2l_entry_t *);
1469      if (entry)
1470        {
1471          APR_ARRAY_IDX(reps, k, svn_fs_x__p2l_entry_t *) = entry;
1472          ++k;
1473        }
1474    }
1475  reps->nelts = k;
1476
1477  svn_pool_destroy(iterpool);
1478  svn_pool_destroy(container_pool);
1479
1480  return SVN_NO_ERROR;
1481}
1482
1483/* Finalize CONTAINER and write it to CONTEXT's pack file.
1484 * Append an P2L entry containing the given SUB_ITEMS to NEW_ENTRIES.
1485 * Use SCRATCH_POOL for temporary allocations.
1486 */
1487static svn_error_t *
1488write_changes_container(pack_context_t *context,
1489                        svn_fs_x__changes_t *container,
1490                        apr_array_header_t *sub_items,
1491                        apr_array_header_t *new_entries,
1492                        apr_pool_t *scratch_pool)
1493{
1494  apr_off_t offset = 0;
1495  svn_fs_x__p2l_entry_t container_entry;
1496
1497  svn_stream_t *pack_stream
1498    = svn_checksum__wrap_write_stream_fnv1a_32x4
1499                                (&container_entry.fnv1_checksum,
1500                                 svn_stream_from_aprfile2(context->pack_file,
1501                                                          TRUE, scratch_pool),
1502                                 scratch_pool);
1503
1504  SVN_ERR(svn_fs_x__write_changes_container(pack_stream,
1505                                             container,
1506                                             scratch_pool));
1507  SVN_ERR(svn_stream_close(pack_stream));
1508  SVN_ERR(svn_io_file_seek(context->pack_file, APR_CUR, &offset,
1509                           scratch_pool));
1510
1511  container_entry.offset = context->pack_offset;
1512  container_entry.size = offset - container_entry.offset;
1513  container_entry.type = SVN_FS_X__ITEM_TYPE_CHANGES_CONT;
1514  container_entry.item_count = sub_items->nelts;
1515  container_entry.items = (svn_fs_x__id_t *)sub_items->elts;
1516
1517  context->pack_offset = offset;
1518  APR_ARRAY_PUSH(new_entries, svn_fs_x__p2l_entry_t *)
1519    = svn_fs_x__p2l_entry_dup(&container_entry, context->info_pool);
1520
1521  SVN_ERR(svn_fs_x__p2l_proto_index_add_entry
1522            (context->proto_p2l_index, &container_entry, scratch_pool));
1523
1524  return SVN_NO_ERROR;
1525}
1526
1527/* Read the change lists identified by svn_fs_x__p2l_entry_t * elements
1528 * in ENTRIES strictly in from TEMP_FILE, aggregate them and write them
1529 * into CONTEXT->PACK_FILE.  Use SCRATCH_POOL for temporary allocations.
1530 */
1531static svn_error_t *
1532write_changes_containers(pack_context_t *context,
1533                         apr_array_header_t *entries,
1534                         apr_file_t *temp_file,
1535                         apr_pool_t *scratch_pool)
1536{
1537  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1538  apr_pool_t *container_pool = svn_pool_create(scratch_pool);
1539  int i;
1540
1541  apr_ssize_t block_left = get_block_left(context);
1542  apr_ssize_t estimated_addition = 0;
1543
1544  svn_fs_x__changes_t *container
1545    = svn_fs_x__changes_create(1000, container_pool);
1546  apr_array_header_t *sub_items
1547    = apr_array_make(scratch_pool, 64, sizeof(svn_fs_x__id_t));
1548  apr_array_header_t *new_entries
1549    = apr_array_make(context->info_pool, 16, entries->elt_size);
1550  svn_stream_t *temp_stream
1551    = svn_stream_from_aprfile2(temp_file, TRUE, scratch_pool);
1552
1553  /* copy all items in strict order */
1554  for (i = entries->nelts-1; i >= 0; --i)
1555    {
1556      apr_array_header_t *changes;
1557      apr_size_t list_index;
1558      svn_fs_x__p2l_entry_t *entry
1559        = APR_ARRAY_IDX(entries, i, svn_fs_x__p2l_entry_t *);
1560
1561      /* zip compression alone will significantly reduce the size of large
1562       * change lists. So, we will probably need even less than this estimate.
1563       */
1564      apr_ssize_t estimated_size = (entry->size / 5) + 250;
1565
1566      /* If necessary and enough data has been added to the container since
1567       * the last test, try harder by actually serializing the container and
1568       * determine current savings due to compression. */
1569      if (block_left < estimated_size && estimated_addition > 2000)
1570        {
1571          svn_stringbuf_t *serialized
1572            = svn_stringbuf_create_ensure(get_block_left(context), iterpool);
1573          svn_stream_t *memory_stream
1574            = svn_stream_from_stringbuf(serialized, iterpool);
1575
1576          SVN_ERR(svn_fs_x__write_changes_container(memory_stream,
1577                                                     container, iterpool));
1578          SVN_ERR(svn_stream_close(temp_stream));
1579
1580          block_left = get_block_left(context) - serialized->len;
1581          estimated_addition = 0;
1582        }
1583
1584      if ((block_left < estimated_size) && sub_items->nelts)
1585        {
1586          SVN_ERR(write_changes_container(context, container, sub_items,
1587                                          new_entries, iterpool));
1588
1589          apr_array_clear(sub_items);
1590          svn_pool_clear(container_pool);
1591          container = svn_fs_x__changes_create(1000, container_pool);
1592          block_left = get_block_left(context);
1593          estimated_addition = 0;
1594        }
1595
1596      /* still enough space in current block? */
1597      if (block_left < estimated_size)
1598        {
1599          SVN_ERR(auto_pad_block(context, iterpool));
1600          block_left = get_block_left(context);
1601        }
1602
1603      /* select the change list in the source file, parse it and add it to
1604       * the container */
1605      SVN_ERR(svn_io_file_seek(temp_file, APR_SET, &entry->offset,
1606                               iterpool));
1607      SVN_ERR(svn_fs_x__read_changes(&changes, temp_stream, INT_MAX,
1608                                     scratch_pool, iterpool));
1609      SVN_ERR(svn_fs_x__changes_append_list(&list_index, container, changes));
1610      SVN_ERR_ASSERT(list_index == sub_items->nelts);
1611      block_left -= estimated_size;
1612      estimated_addition += estimated_size;
1613
1614      APR_ARRAY_PUSH(sub_items, svn_fs_x__id_t) = entry->items[0];
1615
1616      svn_pool_clear(iterpool);
1617    }
1618
1619  if (sub_items->nelts)
1620    SVN_ERR(write_changes_container(context, container, sub_items,
1621                                    new_entries, iterpool));
1622
1623  *entries = *new_entries;
1624  svn_pool_destroy(iterpool);
1625  svn_pool_destroy(container_pool);
1626
1627  return SVN_NO_ERROR;
1628}
1629
1630/* Read the (property) representations identified by svn_fs_x__p2l_entry_t
1631 * elements in ENTRIES from TEMP_FILE, aggregate them and write them into
1632 * CONTEXT->PACK_FILE.  Use SCRATCH_POOL for temporary allocations.
1633 */
1634static svn_error_t *
1635write_property_containers(pack_context_t *context,
1636                          apr_array_header_t *entries,
1637                          apr_file_t *temp_file,
1638                          apr_pool_t *scratch_pool)
1639{
1640  apr_array_header_t *new_entries
1641    = apr_array_make(context->info_pool, 16, entries->elt_size);
1642
1643  SVN_ERR(write_reps_containers(context, entries, temp_file, new_entries,
1644                                scratch_pool));
1645
1646  *entries = *new_entries;
1647
1648  return SVN_NO_ERROR;
1649}
1650
1651/* Append all entries of svn_fs_x__p2l_entry_t * array TO_APPEND to
1652 * svn_fs_x__p2l_entry_t * array DEST.
1653 */
1654static void
1655append_entries(apr_array_header_t *dest,
1656               apr_array_header_t *to_append)
1657{
1658  int i;
1659  for (i = 0; i < to_append->nelts; ++i)
1660    APR_ARRAY_PUSH(dest, svn_fs_x__p2l_entry_t *)
1661      = APR_ARRAY_IDX(to_append, i, svn_fs_x__p2l_entry_t *);
1662}
1663
1664/* Write the log-to-phys proto index file for CONTEXT and use POOL for
1665 * temporary allocations.  All items in all buckets must have been placed
1666 * by now.
1667 */
1668static svn_error_t *
1669write_l2p_index(pack_context_t *context,
1670                apr_pool_t *pool)
1671{
1672  apr_pool_t *scratch_pool = svn_pool_create(pool);
1673  const char *temp_name;
1674  const char *proto_index;
1675  apr_off_t offset = 0;
1676
1677  /* lump all items into one bucket.  As target, use the bucket that
1678   * probably has the most entries already. */
1679  append_entries(context->reps, context->changes);
1680  append_entries(context->reps, context->file_props);
1681  append_entries(context->reps, context->dir_props);
1682
1683  /* Let the index code do the expensive L2P -> P2L transformation. */
1684  SVN_ERR(svn_fs_x__l2p_index_from_p2l_entries(&temp_name,
1685                                               context->fs,
1686                                               context->reps,
1687                                               pool, scratch_pool));
1688
1689  /* Append newly written segment to exisiting proto index file. */
1690  SVN_ERR(svn_io_file_name_get(&proto_index, context->proto_l2p_index,
1691                               scratch_pool));
1692
1693  SVN_ERR(svn_io_file_flush(context->proto_l2p_index, scratch_pool));
1694  SVN_ERR(svn_io_append_file(temp_name, proto_index, scratch_pool));
1695  SVN_ERR(svn_io_remove_file2(temp_name, FALSE, scratch_pool));
1696  SVN_ERR(svn_io_file_seek(context->proto_l2p_index, APR_END, &offset,
1697                           scratch_pool));
1698
1699  /* Done. */
1700  svn_pool_destroy(scratch_pool);
1701
1702  return SVN_NO_ERROR;
1703}
1704
1705/* Pack the current revision range of CONTEXT, i.e. this covers phases 2
1706 * to 4.  Use SCRATCH_POOL for temporary allocations.
1707 */
1708static svn_error_t *
1709pack_range(pack_context_t *context,
1710           apr_pool_t *scratch_pool)
1711{
1712  svn_fs_x__data_t *ffd = context->fs->fsap_data;
1713  apr_pool_t *revpool = svn_pool_create(scratch_pool);
1714  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1715
1716  /* Phase 2: Copy items into various buckets and build tracking info */
1717  svn_revnum_t revision;
1718  for (revision = context->start_rev; revision < context->end_rev; ++revision)
1719    {
1720      apr_off_t offset = 0;
1721      svn_fs_x__revision_file_t *rev_file;
1722      svn_fs_x__index_info_t l2p_index_info;
1723
1724      /* Get the rev file dimensions (mainly index locations). */
1725      SVN_ERR(svn_fs_x__rev_file_init(&rev_file, context->fs, revision,
1726                                      revpool));
1727      SVN_ERR(svn_fs_x__rev_file_l2p_info(&l2p_index_info, rev_file));
1728
1729      /* store the indirect array index */
1730      APR_ARRAY_PUSH(context->rev_offsets, int) = context->reps->nelts;
1731
1732      /* read the phys-to-log index file until we covered the whole rev file.
1733       * That index contains enough info to build both target indexes from it. */
1734      while (offset < l2p_index_info.start)
1735        {
1736          /* read one cluster */
1737          int i;
1738          apr_array_header_t *entries;
1739          svn_pool_clear(iterpool);
1740
1741          SVN_ERR(svn_fs_x__p2l_index_lookup(&entries, context->fs,
1742                                             rev_file, revision, offset,
1743                                             ffd->p2l_page_size, iterpool,
1744                                             iterpool));
1745
1746          for (i = 0; i < entries->nelts; ++i)
1747            {
1748              svn_fs_x__p2l_entry_t *entry
1749                = &APR_ARRAY_IDX(entries, i, svn_fs_x__p2l_entry_t);
1750
1751              /* skip first entry if that was duplicated due crossing a
1752                 cluster boundary */
1753              if (offset > entry->offset)
1754                continue;
1755
1756              /* process entry while inside the rev file */
1757              offset = entry->offset;
1758              if (offset < l2p_index_info.start)
1759                {
1760                  SVN_ERR(svn_fs_x__rev_file_seek(rev_file, NULL, offset));
1761
1762                  if (entry->type == SVN_FS_X__ITEM_TYPE_CHANGES)
1763                    SVN_ERR(copy_item_to_temp(context,
1764                                              context->changes,
1765                                              context->changes_file,
1766                                              rev_file, entry, iterpool));
1767                  else if (entry->type == SVN_FS_X__ITEM_TYPE_FILE_PROPS)
1768                    SVN_ERR(copy_item_to_temp(context,
1769                                              context->file_props,
1770                                              context->file_props_file,
1771                                              rev_file, entry, iterpool));
1772                  else if (entry->type == SVN_FS_X__ITEM_TYPE_DIR_PROPS)
1773                    SVN_ERR(copy_item_to_temp(context,
1774                                              context->dir_props,
1775                                              context->dir_props_file,
1776                                              rev_file, entry, iterpool));
1777                  else if (   entry->type == SVN_FS_X__ITEM_TYPE_FILE_REP
1778                           || entry->type == SVN_FS_X__ITEM_TYPE_DIR_REP)
1779                    SVN_ERR(copy_rep_to_temp(context, rev_file, entry,
1780                                             iterpool));
1781                  else if (entry->type == SVN_FS_X__ITEM_TYPE_NODEREV)
1782                    SVN_ERR(copy_node_to_temp(context, rev_file, entry,
1783                                              iterpool));
1784                  else
1785                    SVN_ERR_ASSERT(entry->type == SVN_FS_X__ITEM_TYPE_UNUSED);
1786
1787                  offset += entry->size;
1788                }
1789            }
1790
1791          if (context->cancel_func)
1792            SVN_ERR(context->cancel_func(context->cancel_baton));
1793        }
1794
1795      svn_pool_clear(revpool);
1796    }
1797
1798  svn_pool_destroy(iterpool);
1799
1800  /* phase 3: placement.
1801   * Use "newest first" placement for simple items. */
1802  sort_items(context->changes);
1803  sort_items(context->file_props);
1804  sort_items(context->dir_props);
1805
1806  /* follow dependencies recursively for noderevs and data representations */
1807  sort_reps(context);
1808
1809  /* phase 4: copy bucket data to pack file.  Write P2L index. */
1810  SVN_ERR(write_changes_containers(context, context->changes,
1811                                   context->changes_file, revpool));
1812  svn_pool_clear(revpool);
1813  SVN_ERR(write_property_containers(context, context->file_props,
1814                                    context->file_props_file, revpool));
1815  svn_pool_clear(revpool);
1816  SVN_ERR(write_property_containers(context, context->dir_props,
1817                                    context->dir_props_file, revpool));
1818  svn_pool_clear(revpool);
1819  SVN_ERR(copy_reps_from_temp(context, context->reps_file, revpool));
1820  svn_pool_clear(revpool);
1821
1822  /* write L2P index as well (now that we know all target offsets) */
1823  SVN_ERR(write_l2p_index(context, revpool));
1824
1825  svn_pool_destroy(revpool);
1826
1827  return SVN_NO_ERROR;
1828}
1829
1830/* Append CONTEXT->START_REV to the context's pack file with no re-ordering.
1831 * This function will only be used for very large revisions (>>100k changes).
1832 * Use SCRATCH_POOL for temporary allocations.
1833 */
1834static svn_error_t *
1835append_revision(pack_context_t *context,
1836                apr_pool_t *scratch_pool)
1837{
1838  svn_fs_x__data_t *ffd = context->fs->fsap_data;
1839  apr_off_t offset = 0;
1840  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1841  svn_fs_x__revision_file_t *rev_file;
1842  apr_file_t *file;
1843  svn_filesize_t revdata_size;
1844
1845  /* Copy all non-index contents the rev file to the end of the pack file. */
1846  SVN_ERR(svn_fs_x__rev_file_init(&rev_file, context->fs, context->start_rev,
1847                                  scratch_pool));
1848  SVN_ERR(svn_fs_x__rev_file_data_size(&revdata_size, rev_file));
1849
1850  SVN_ERR(svn_fs_x__rev_file_get(&file, rev_file));
1851  SVN_ERR(svn_io_file_aligned_seek(file, ffd->block_size, NULL, 0,
1852                                   iterpool));
1853  SVN_ERR(copy_file_data(context, context->pack_file, file, revdata_size,
1854                         iterpool));
1855
1856  /* mark the start of a new revision */
1857  SVN_ERR(svn_fs_x__l2p_proto_index_add_revision(context->proto_l2p_index,
1858                                                 scratch_pool));
1859
1860  /* read the phys-to-log index file until we covered the whole rev file.
1861   * That index contains enough info to build both target indexes from it. */
1862  while (offset < revdata_size)
1863    {
1864      /* read one cluster */
1865      int i;
1866      apr_array_header_t *entries;
1867      SVN_ERR(svn_fs_x__p2l_index_lookup(&entries, context->fs, rev_file,
1868                                         context->start_rev, offset,
1869                                         ffd->p2l_page_size, iterpool,
1870                                         iterpool));
1871
1872      for (i = 0; i < entries->nelts; ++i)
1873        {
1874          svn_fs_x__p2l_entry_t *entry
1875            = &APR_ARRAY_IDX(entries, i, svn_fs_x__p2l_entry_t);
1876
1877          /* skip first entry if that was duplicated due crossing a
1878             cluster boundary */
1879          if (offset > entry->offset)
1880            continue;
1881
1882          /* process entry while inside the rev file */
1883          offset = entry->offset;
1884          if (offset < revdata_size)
1885            {
1886              /* there should be true containers */
1887              SVN_ERR_ASSERT(entry->item_count == 1);
1888
1889              entry->offset += context->pack_offset;
1890              offset += entry->size;
1891              SVN_ERR(svn_fs_x__l2p_proto_index_add_entry
1892                        (context->proto_l2p_index, entry->offset, 0,
1893                         entry->items[0].number, iterpool));
1894              SVN_ERR(svn_fs_x__p2l_proto_index_add_entry
1895                        (context->proto_p2l_index, entry, iterpool));
1896            }
1897        }
1898
1899      svn_pool_clear(iterpool);
1900    }
1901
1902  svn_pool_destroy(iterpool);
1903  context->pack_offset += revdata_size;
1904
1905  return SVN_NO_ERROR;
1906}
1907
1908/* Format 7 packing logic.
1909 *
1910 * Pack the revision shard starting at SHARD_REV in filesystem FS from
1911 * SHARD_DIR into the PACK_FILE_DIR, using SCRATCH_POOL for temporary
1912 * allocations.  Limit the extra memory consumption to MAX_MEM bytes.
1913 * CANCEL_FUNC and CANCEL_BATON are what you think they are.
1914 * Schedule necessary fsync calls in BATCH.
1915 */
1916static svn_error_t *
1917pack_log_addressed(svn_fs_t *fs,
1918                   const char *pack_file_dir,
1919                   const char *shard_dir,
1920                   svn_revnum_t shard_rev,
1921                   apr_size_t max_mem,
1922                   svn_fs_x__batch_fsync_t *batch,
1923                   svn_cancel_func_t cancel_func,
1924                   void *cancel_baton,
1925                   apr_pool_t *scratch_pool)
1926{
1927  enum
1928    {
1929      /* estimated amount of memory used to represent one item in memory
1930       * during rev file packing */
1931      PER_ITEM_MEM = APR_ALIGN_DEFAULT(sizeof(path_order_t))
1932                   + APR_ALIGN_DEFAULT(2 *sizeof(void*))
1933                   + APR_ALIGN_DEFAULT(sizeof(reference_t))
1934                   + APR_ALIGN_DEFAULT(sizeof(svn_fs_x__p2l_entry_t))
1935                   + 6 * sizeof(void*)
1936    };
1937
1938  int max_items = max_mem / PER_ITEM_MEM > INT_MAX
1939                ? INT_MAX
1940                : (int)(max_mem / PER_ITEM_MEM);
1941  apr_array_header_t *max_ids;
1942  pack_context_t context = { 0 };
1943  int i;
1944  apr_size_t item_count = 0;
1945  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1946
1947  /* set up a pack context */
1948  SVN_ERR(initialize_pack_context(&context, fs, pack_file_dir, shard_dir,
1949                                  shard_rev, max_items, batch, cancel_func,
1950                                  cancel_baton, scratch_pool));
1951
1952  /* phase 1: determine the size of the revisions to pack */
1953  SVN_ERR(svn_fs_x__l2p_get_max_ids(&max_ids, fs, shard_rev,
1954                                    context.shard_end_rev - shard_rev,
1955                                    scratch_pool, scratch_pool));
1956
1957  /* pack revisions in ranges that don't exceed MAX_MEM */
1958  for (i = 0; i < max_ids->nelts; ++i)
1959    if (   APR_ARRAY_IDX(max_ids, i, apr_uint64_t)
1960        <= (apr_uint64_t)max_items - item_count)
1961      {
1962        item_count += APR_ARRAY_IDX(max_ids, i, apr_uint64_t);
1963        context.end_rev++;
1964      }
1965    else
1966      {
1967        /* some unpacked revisions before this one? */
1968        if (context.start_rev < context.end_rev)
1969          {
1970            /* pack them intelligently (might be just 1 rev but still ...) */
1971            SVN_ERR(pack_range(&context, iterpool));
1972            SVN_ERR(reset_pack_context(&context, iterpool));
1973            item_count = 0;
1974          }
1975
1976        /* next revision range is to start with the current revision */
1977        context.start_rev = i + context.shard_rev;
1978        context.end_rev = context.start_rev + 1;
1979
1980        /* if this is a very large revision, we must place it as is */
1981        if (APR_ARRAY_IDX(max_ids, i, apr_uint64_t) > max_items)
1982          {
1983            SVN_ERR(append_revision(&context, iterpool));
1984            context.start_rev++;
1985          }
1986        else
1987          item_count += (apr_size_t)APR_ARRAY_IDX(max_ids, i, apr_uint64_t);
1988
1989        svn_pool_clear(iterpool);
1990      }
1991
1992  /* non-empty revision range at the end? */
1993  if (context.start_rev < context.end_rev)
1994    SVN_ERR(pack_range(&context, iterpool));
1995
1996  /* last phase: finalize indexes and clean up */
1997  SVN_ERR(reset_pack_context(&context, iterpool));
1998  SVN_ERR(close_pack_context(&context, iterpool));
1999  svn_pool_destroy(iterpool);
2000
2001  return SVN_NO_ERROR;
2002}
2003
2004/* In filesystem FS, pack the revision SHARD containing exactly
2005 * MAX_FILES_PER_DIR revisions from SHARD_PATH into the PACK_FILE_DIR,
2006 * using SCRATCH_POOL for temporary allocations.  Try to limit the amount of
2007 * temporary memory needed to MAX_MEM bytes.  CANCEL_FUNC and CANCEL_BATON
2008 * are what you think they are.  Schedule necessary fsync calls in BATCH.
2009 *
2010 * If for some reason we detect a partial packing already performed, we
2011 * remove the pack file and start again.
2012 *
2013 * The actual packing will be done in a format-specific sub-function.
2014 */
2015static svn_error_t *
2016pack_rev_shard(svn_fs_t *fs,
2017               const char *pack_file_dir,
2018               const char *shard_path,
2019               apr_int64_t shard,
2020               int max_files_per_dir,
2021               apr_size_t max_mem,
2022               svn_fs_x__batch_fsync_t *batch,
2023               svn_cancel_func_t cancel_func,
2024               void *cancel_baton,
2025               apr_pool_t *scratch_pool)
2026{
2027  const char *pack_file_path;
2028  svn_revnum_t shard_rev = (svn_revnum_t) (shard * max_files_per_dir);
2029
2030  /* Some useful paths. */
2031  pack_file_path = svn_dirent_join(pack_file_dir, PATH_PACKED, scratch_pool);
2032
2033  /* Remove any existing pack file for this shard, since it is incomplete. */
2034  SVN_ERR(svn_io_remove_dir2(pack_file_dir, TRUE, cancel_func, cancel_baton,
2035                             scratch_pool));
2036
2037  /* Create the new directory and pack file. */
2038  SVN_ERR(svn_io_dir_make(pack_file_dir, APR_OS_DEFAULT, scratch_pool));
2039  SVN_ERR(svn_fs_x__batch_fsync_new_path(batch, pack_file_dir, scratch_pool));
2040
2041  /* Index information files */
2042  SVN_ERR(pack_log_addressed(fs, pack_file_dir, shard_path, shard_rev,
2043                             max_mem, batch, cancel_func, cancel_baton,
2044                             scratch_pool));
2045
2046  SVN_ERR(svn_io_copy_perms(shard_path, pack_file_dir, scratch_pool));
2047  SVN_ERR(svn_io_set_file_read_only(pack_file_path, FALSE, scratch_pool));
2048
2049  return SVN_NO_ERROR;
2050}
2051
2052/* In the file system at FS_PATH, pack the SHARD in DIR containing exactly
2053 * MAX_FILES_PER_DIR revisions, using SCRATCH_POOL temporary for allocations.
2054 * COMPRESSION_LEVEL and MAX_PACK_SIZE will be ignored in that case.
2055 * An attempt will be made to keep memory usage below MAX_MEM.
2056 *
2057 * CANCEL_FUNC and CANCEL_BATON are what you think they are; similarly
2058 * NOTIFY_FUNC and NOTIFY_BATON.
2059 *
2060 * If for some reason we detect a partial packing already performed, we
2061 * remove the pack file and start again.
2062 */
2063static svn_error_t *
2064pack_shard(const char *dir,
2065           svn_fs_t *fs,
2066           apr_int64_t shard,
2067           int max_files_per_dir,
2068           apr_off_t max_pack_size,
2069           int compression_level,
2070           apr_size_t max_mem,
2071           svn_fs_pack_notify_t notify_func,
2072           void *notify_baton,
2073           svn_cancel_func_t cancel_func,
2074           void *cancel_baton,
2075           apr_pool_t *scratch_pool)
2076{
2077  svn_fs_x__data_t *ffd = fs->fsap_data;
2078  const char *shard_path, *pack_file_dir;
2079  svn_fs_x__batch_fsync_t *batch;
2080
2081  /* Notify caller we're starting to pack this shard. */
2082  if (notify_func)
2083    SVN_ERR(notify_func(notify_baton, shard, svn_fs_pack_notify_start,
2084                        scratch_pool));
2085
2086  /* Perform all fsyncs through this instance. */
2087  SVN_ERR(svn_fs_x__batch_fsync_create(&batch, ffd->flush_to_disk,
2088                                       scratch_pool));
2089
2090  /* Some useful paths. */
2091  pack_file_dir = svn_dirent_join(dir,
2092                  apr_psprintf(scratch_pool,
2093                               "%" APR_INT64_T_FMT PATH_EXT_PACKED_SHARD,
2094                               shard),
2095                  scratch_pool);
2096  shard_path = svn_dirent_join(dir,
2097                      apr_psprintf(scratch_pool, "%" APR_INT64_T_FMT, shard),
2098                      scratch_pool);
2099
2100  /* pack the revision content */
2101  SVN_ERR(pack_rev_shard(fs, pack_file_dir, shard_path,
2102                         shard, max_files_per_dir, max_mem, batch,
2103                         cancel_func, cancel_baton, scratch_pool));
2104
2105  /* pack the revprops in an equivalent way */
2106  SVN_ERR(svn_fs_x__pack_revprops_shard(fs,
2107                                        pack_file_dir,
2108                                        shard_path,
2109                                        shard, max_files_per_dir,
2110                                        (int)(0.9 * max_pack_size),
2111                                        compression_level, batch,
2112                                        cancel_func, cancel_baton,
2113                                        scratch_pool));
2114
2115  /* Update the min-unpacked-rev file to reflect our newly packed shard. */
2116  SVN_ERR(svn_fs_x__write_min_unpacked_rev(fs,
2117                          (svn_revnum_t)((shard + 1) * max_files_per_dir),
2118                          scratch_pool));
2119  ffd->min_unpacked_rev = (svn_revnum_t)((shard + 1) * max_files_per_dir);
2120
2121  /* Ensure that packed file is written to disk.*/
2122  SVN_ERR(svn_fs_x__batch_fsync_run(batch, scratch_pool));
2123
2124  /* Finally, remove the existing shard directories. */
2125  SVN_ERR(svn_io_remove_dir2(shard_path, TRUE,
2126                             cancel_func, cancel_baton, scratch_pool));
2127
2128  /* Notify caller we're starting to pack this shard. */
2129  if (notify_func)
2130    SVN_ERR(notify_func(notify_baton, shard, svn_fs_pack_notify_end,
2131                        scratch_pool));
2132
2133  return SVN_NO_ERROR;
2134}
2135
2136/* Read the youngest rev and the first non-packed rev info for FS from disk.
2137   Set *FULLY_PACKED when there is no completed unpacked shard.
2138   Use SCRATCH_POOL for temporary allocations.
2139 */
2140static svn_error_t *
2141get_pack_status(svn_boolean_t *fully_packed,
2142                svn_fs_t *fs,
2143                apr_pool_t *scratch_pool)
2144{
2145  svn_fs_x__data_t *ffd = fs->fsap_data;
2146  apr_int64_t completed_shards;
2147  svn_revnum_t youngest;
2148
2149  SVN_ERR(svn_fs_x__read_min_unpacked_rev(&ffd->min_unpacked_rev, fs,
2150                                          scratch_pool));
2151
2152  SVN_ERR(svn_fs_x__youngest_rev(&youngest, fs, scratch_pool));
2153  completed_shards = (youngest + 1) / ffd->max_files_per_dir;
2154
2155  /* See if we've already completed all possible shards thus far. */
2156  if (ffd->min_unpacked_rev == (completed_shards * ffd->max_files_per_dir))
2157    *fully_packed = TRUE;
2158  else
2159    *fully_packed = FALSE;
2160
2161  return SVN_NO_ERROR;
2162}
2163
2164typedef struct pack_baton_t
2165{
2166  svn_fs_t *fs;
2167  apr_size_t max_mem;
2168  svn_fs_pack_notify_t notify_func;
2169  void *notify_baton;
2170  svn_cancel_func_t cancel_func;
2171  void *cancel_baton;
2172} pack_baton_t;
2173
2174
2175/* The work-horse for svn_fs_x__pack, called with the FS write lock.
2176   This implements the svn_fs_x__with_write_lock() 'body' callback
2177   type.  BATON is a 'pack_baton_t *'.
2178
2179   WARNING: if you add a call to this function, please note:
2180     The code currently assumes that any piece of code running with
2181     the write-lock set can rely on the ffd->min_unpacked_rev and
2182     ffd->min_unpacked_revprop caches to be up-to-date (and, by
2183     extension, on not having to use a retry when calling
2184     svn_fs_x__path_rev_absolute() and friends).  If you add a call
2185     to this function, consider whether you have to call
2186     update_min_unpacked_rev().
2187     See this thread: http://thread.gmane.org/1291206765.3782.3309.camel@edith
2188 */
2189static svn_error_t *
2190pack_body(void *baton,
2191          apr_pool_t *scratch_pool)
2192{
2193  pack_baton_t *pb = baton;
2194  svn_fs_x__data_t *ffd = pb->fs->fsap_data;
2195  apr_int64_t completed_shards;
2196  apr_int64_t i;
2197  apr_pool_t *iterpool;
2198  const char *data_path;
2199  svn_boolean_t fully_packed;
2200
2201  /* Since another process might have already packed the repo,
2202     we need to re-read the pack status. */
2203  SVN_ERR(get_pack_status(&fully_packed, pb->fs, scratch_pool));
2204  if (fully_packed)
2205    {
2206      if (pb->notify_func)
2207        SVN_ERR(pb->notify_func(pb->notify_baton,
2208                                ffd->min_unpacked_rev / ffd->max_files_per_dir,
2209                                svn_fs_pack_notify_noop, scratch_pool));
2210
2211      return SVN_NO_ERROR;
2212    }
2213
2214  completed_shards = (ffd->youngest_rev_cache + 1) / ffd->max_files_per_dir;
2215  data_path = svn_dirent_join(pb->fs->path, PATH_REVS_DIR, scratch_pool);
2216
2217  iterpool = svn_pool_create(scratch_pool);
2218  for (i = ffd->min_unpacked_rev / ffd->max_files_per_dir;
2219       i < completed_shards;
2220       i++)
2221    {
2222      svn_pool_clear(iterpool);
2223
2224      if (pb->cancel_func)
2225        SVN_ERR(pb->cancel_func(pb->cancel_baton));
2226
2227      SVN_ERR(pack_shard(data_path,
2228                         pb->fs, i, ffd->max_files_per_dir,
2229                         ffd->revprop_pack_size,
2230                         ffd->compress_packed_revprops
2231                           ? SVN__COMPRESSION_ZLIB_DEFAULT
2232                           : SVN__COMPRESSION_NONE,
2233                         pb->max_mem,
2234                         pb->notify_func, pb->notify_baton,
2235                         pb->cancel_func, pb->cancel_baton, iterpool));
2236    }
2237
2238  svn_pool_destroy(iterpool);
2239  return SVN_NO_ERROR;
2240}
2241
2242svn_error_t *
2243svn_fs_x__pack(svn_fs_t *fs,
2244               apr_size_t max_mem,
2245               svn_fs_pack_notify_t notify_func,
2246               void *notify_baton,
2247               svn_cancel_func_t cancel_func,
2248               void *cancel_baton,
2249               apr_pool_t *scratch_pool)
2250{
2251  pack_baton_t pb = { 0 };
2252  svn_boolean_t fully_packed;
2253
2254  /* Is there we even anything to do?. */
2255  SVN_ERR(get_pack_status(&fully_packed, fs, scratch_pool));
2256  if (fully_packed)
2257    {
2258      svn_fs_x__data_t *ffd = fs->fsap_data;
2259
2260      if (notify_func)
2261        SVN_ERR(notify_func(notify_baton,
2262                            ffd->min_unpacked_rev / ffd->max_files_per_dir,
2263                            svn_fs_pack_notify_noop, scratch_pool));
2264
2265      return SVN_NO_ERROR;
2266    }
2267
2268  /* Lock the repo and start the pack process. */
2269  pb.fs = fs;
2270  pb.notify_func = notify_func;
2271  pb.notify_baton = notify_baton;
2272  pb.cancel_func = cancel_func;
2273  pb.cancel_baton = cancel_baton;
2274  pb.max_mem = max_mem ? max_mem : DEFAULT_MAX_MEM;
2275
2276  return svn_fs_x__with_pack_lock(fs, pack_body, &pb, scratch_pool);
2277}
2278