index.c revision 362181
1/* index.c indexing support for FSFS support
2 *
3 * ====================================================================
4 *    Licensed to the Apache Software Foundation (ASF) under one
5 *    or more contributor license agreements.  See the NOTICE file
6 *    distributed with this work for additional information
7 *    regarding copyright ownership.  The ASF licenses this file
8 *    to you under the Apache License, Version 2.0 (the
9 *    "License"); you may not use this file except in compliance
10 *    with the License.  You may obtain a copy of the License at
11 *
12 *      http://www.apache.org/licenses/LICENSE-2.0
13 *
14 *    Unless required by applicable law or agreed to in writing,
15 *    software distributed under the License is distributed on an
16 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 *    KIND, either express or implied.  See the License for the
18 *    specific language governing permissions and limitations
19 *    under the License.
20 * ====================================================================
21 */
22
23#include <assert.h>
24
25#include "svn_io.h"
26#include "svn_pools.h"
27#include "svn_sorts.h"
28
29#include "svn_private_config.h"
30
31#include "private/svn_sorts_private.h"
32#include "private/svn_subr_private.h"
33#include "private/svn_temp_serializer.h"
34
35#include "index.h"
36#include "pack.h"
37#include "temp_serializer.h"
38#include "util.h"
39#include "fs_fs.h"
40
41#include "../libsvn_fs/fs-loader.h"
42
43/* maximum length of a uint64 in an 7/8b encoding */
44#define ENCODED_INT_LENGTH 10
45
46/* APR is missing an APR_OFF_T_MAX.  So, define one.  We will use it to
47 * limit file offsets stored in the indexes.
48 *
49 * We assume that everything shorter than 64 bits, it is at least 32 bits.
50 * We also assume that the type is always signed meaning we only have an
51 * effective positive range of 63 or 31 bits, respectively.
52 */
53static
54const apr_uint64_t off_t_max = (sizeof(apr_off_t) == sizeof(apr_int64_t))
55                             ? APR_INT64_MAX
56                             : APR_INT32_MAX;
57
58/* We store P2L proto-index entries as 6 values, 64 bits each on disk.
59 * See also svn_fs_fs__p2l_proto_index_add_entry().
60 */
61#define P2L_PROTO_INDEX_ENTRY_SIZE (6 * sizeof(apr_uint64_t))
62
63/* We put this string in front of the L2P index header. */
64#define L2P_STREAM_PREFIX "L2P-INDEX\n"
65
66/* We put this string in front of the P2L index header. */
67#define P2L_STREAM_PREFIX "P2L-INDEX\n"
68
69/* Size of the buffer that will fit the index header prefixes. */
70#define STREAM_PREFIX_LEN MAX(sizeof(L2P_STREAM_PREFIX), \
71                              sizeof(P2L_STREAM_PREFIX))
72
73/* Page tables in the log-to-phys index file exclusively contain entries
74 * of this type to describe position and size of a given page.
75 */
76typedef struct l2p_page_table_entry_t
77{
78  /* global offset on the page within the index file */
79  apr_uint64_t offset;
80
81  /* number of mapping entries in that page */
82  apr_uint32_t entry_count;
83
84  /* size of the page on disk (in the index file) */
85  apr_uint32_t size;
86} l2p_page_table_entry_t;
87
88/* Master run-time data structure of an log-to-phys index.  It contains
89 * the page tables of every revision covered by that index - but not the
90 * pages themselves.
91 */
92typedef struct l2p_header_t
93{
94  /* first revision covered by this index */
95  svn_revnum_t first_revision;
96
97  /* number of revisions covered */
98  apr_size_t revision_count;
99
100  /* (max) number of entries per page */
101  apr_uint32_t page_size;
102
103  /* indexes into PAGE_TABLE that mark the first page of the respective
104   * revision.  PAGE_TABLE_INDEX[REVISION_COUNT] points to the end of
105   * PAGE_TABLE.
106   */
107  apr_size_t * page_table_index;
108
109  /* Page table covering all pages in the index */
110  l2p_page_table_entry_t * page_table;
111} l2p_header_t;
112
113/* Run-time data structure containing a single log-to-phys index page.
114 */
115typedef struct l2p_page_t
116{
117  /* number of entries in the OFFSETS array */
118  apr_uint32_t entry_count;
119
120  /* global file offsets (item index is the array index) within the
121   * packed or non-packed rev file.  Offset will be -1 for unused /
122   * invalid item index values. */
123  apr_uint64_t *offsets;
124} l2p_page_t;
125
126/* All of the log-to-phys proto index file consist of entries of this type.
127 */
128typedef struct l2p_proto_entry_t
129{
130  /* phys offset + 1 of the data container. 0 for "new revision" entries. */
131  apr_uint64_t offset;
132
133  /* corresponding item index. 0 for "new revision" entries. */
134  apr_uint64_t item_index;
135} l2p_proto_entry_t;
136
137/* Master run-time data structure of an phys-to-log index.  It contains
138 * an array with one offset value for each rev file cluster.
139 */
140typedef struct p2l_header_t
141{
142  /* first revision covered by the index (and rev file) */
143  svn_revnum_t first_revision;
144
145  /* number of bytes in the rev files covered by each p2l page */
146  apr_uint64_t page_size;
147
148  /* number of pages / clusters in that rev file */
149  apr_size_t page_count;
150
151  /* number of bytes in the rev file */
152  apr_uint64_t file_size;
153
154  /* offsets of the pages / cluster descriptions within the index file */
155  apr_off_t *offsets;
156} p2l_header_t;
157
158/*
159 * packed stream
160 *
161 * This is a utility object that will read files containing 7b/8b encoded
162 * unsigned integers.  It decodes them in batches to minimize overhead
163 * and supports random access to random file locations.
164 */
165
166/* How many numbers we will pre-fetch and buffer in a packed number stream.
167 */
168enum { MAX_NUMBER_PREFETCH = 64 };
169
170/* Prefetched number entry in a packed number stream.
171 */
172typedef struct value_position_pair_t
173{
174  /* prefetched number */
175  apr_uint64_t value;
176
177  /* number of bytes read, *including* this number, since the buffer start */
178  apr_size_t total_len;
179} value_position_pair_t;
180
181/* State of a prefetching packed number stream.  It will read compressed
182 * index data efficiently and present it as a series of non-packed uint64.
183 */
184struct svn_fs_fs__packed_number_stream_t
185{
186  /* underlying data file containing the packed values */
187  apr_file_t *file;
188
189  /* Offset within FILE at which the stream data starts
190   * (i.e. which offset will reported as offset 0 by packed_stream_offset). */
191  apr_off_t stream_start;
192
193  /* First offset within FILE after the stream data.
194   * Attempts to read beyond this will cause an "Unexpected End Of Stream"
195   * error. */
196  apr_off_t stream_end;
197
198  /* number of used entries in BUFFER (starting at index 0) */
199  apr_size_t used;
200
201  /* index of the next number to read from the BUFFER (0 .. USED).
202   * If CURRENT == USED, we need to read more data upon get() */
203  apr_size_t current;
204
205  /* offset in FILE from which the first entry in BUFFER has been read */
206  apr_off_t start_offset;
207
208  /* offset in FILE from which the next number has to be read */
209  apr_off_t next_offset;
210
211  /* read the file in chunks of this size */
212  apr_size_t block_size;
213
214  /* pool to be used for file ops etc. */
215  apr_pool_t *pool;
216
217  /* buffer for prefetched values */
218  value_position_pair_t buffer[MAX_NUMBER_PREFETCH];
219};
220
221/* Return an svn_error_t * object for error ERR on STREAM with the given
222 * MESSAGE string.  The latter must have a placeholder for the index file
223 * name ("%s") and the current read offset (e.g. "0x%lx").
224 */
225static svn_error_t *
226stream_error_create(svn_fs_fs__packed_number_stream_t *stream,
227                    apr_status_t err,
228                    const char *message)
229{
230  const char *file_name;
231  apr_off_t offset;
232  SVN_ERR(svn_io_file_name_get(&file_name, stream->file,
233                               stream->pool));
234  SVN_ERR(svn_io_file_get_offset(&offset, stream->file, stream->pool));
235
236  return svn_error_createf(err, NULL, message, file_name,
237                           apr_psprintf(stream->pool,
238                                        "%" APR_UINT64_T_HEX_FMT,
239                                        (apr_uint64_t)offset));
240}
241
242/* Read up to MAX_NUMBER_PREFETCH numbers from the STREAM->NEXT_OFFSET in
243 * STREAM->FILE and buffer them.
244 *
245 * We don't want GCC and others to inline this (infrequently called)
246 * function into packed_stream_get() because it prevents the latter from
247 * being inlined itself.
248 */
249SVN__PREVENT_INLINE
250static svn_error_t *
251packed_stream_read(svn_fs_fs__packed_number_stream_t *stream)
252{
253  unsigned char buffer[MAX_NUMBER_PREFETCH];
254  apr_size_t bytes_read = 0;
255  apr_size_t i;
256  value_position_pair_t *target;
257  apr_off_t block_start = 0;
258  apr_off_t block_left = 0;
259  apr_status_t err;
260
261  /* all buffered data will have been read starting here */
262  stream->start_offset = stream->next_offset;
263
264  /* packed numbers are usually not aligned to MAX_NUMBER_PREFETCH blocks,
265   * i.e. the last number has been incomplete (and not buffered in stream)
266   * and need to be re-read.  Therefore, always correct the file pointer.
267   */
268  SVN_ERR(svn_io_file_aligned_seek(stream->file, stream->block_size,
269                                   &block_start, stream->next_offset,
270                                   stream->pool));
271
272  /* prefetch at least one number but, if feasible, don't cross block
273   * boundaries.  This shall prevent jumping back and forth between two
274   * blocks because the extra data was not actually request _now_.
275   */
276  bytes_read = sizeof(buffer);
277  block_left = stream->block_size - (stream->next_offset - block_start);
278  if (block_left >= 10 && block_left < bytes_read)
279    bytes_read = (apr_size_t)block_left;
280
281  /* Don't read beyond the end of the file section that belongs to this
282   * index / stream. */
283  bytes_read = (apr_size_t)MIN(bytes_read,
284                               stream->stream_end - stream->next_offset);
285
286  err = apr_file_read(stream->file, buffer, &bytes_read);
287  if (err && !APR_STATUS_IS_EOF(err))
288    return stream_error_create(stream, err,
289      _("Can't read index file '%s' at offset 0x%s"));
290
291  /* if the last number is incomplete, trim it from the buffer */
292  while (bytes_read > 0 && buffer[bytes_read-1] >= 0x80)
293    --bytes_read;
294
295  /* we call read() only if get() requires more data.  So, there must be
296   * at least *one* further number. */
297  if SVN__PREDICT_FALSE(bytes_read == 0)
298    return stream_error_create(stream, err,
299      _("Unexpected end of index file %s at offset 0x%s"));
300
301  /* parse file buffer and expand into stream buffer */
302  target = stream->buffer;
303  for (i = 0; i < bytes_read;)
304    {
305      if (buffer[i] < 0x80)
306        {
307          /* numbers < 128 are relatively frequent and particularly easy
308           * to decode.  Give them special treatment. */
309          target->value = buffer[i];
310          ++i;
311          target->total_len = i;
312          ++target;
313        }
314      else
315        {
316          apr_uint64_t value = 0;
317          apr_uint64_t shift = 0;
318          while (buffer[i] >= 0x80)
319            {
320              value += ((apr_uint64_t)buffer[i] & 0x7f) << shift;
321              shift += 7;
322              ++i;
323            }
324
325          target->value = value + ((apr_uint64_t)buffer[i] << shift);
326          ++i;
327          target->total_len = i;
328          ++target;
329
330          /* let's catch corrupted data early.  It would surely cause
331           * havoc further down the line. */
332          if SVN__PREDICT_FALSE(shift > 8 * sizeof(value))
333            return svn_error_createf(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
334                                     _("Corrupt index: number too large"));
335       }
336    }
337
338  /* update stream state */
339  stream->used = target - stream->buffer;
340  stream->next_offset = stream->start_offset + i;
341  stream->current = 0;
342
343  return SVN_NO_ERROR;
344}
345
346/* Create and open a packed number stream reading from offsets START to
347 * END in FILE and return it in *STREAM.  Access the file in chunks of
348 * BLOCK_SIZE bytes.  Expect the stream to be prefixed by STREAM_PREFIX.
349 * Allocate *STREAM in RESULT_POOL and use SCRATCH_POOL for temporaries.
350 */
351static svn_error_t *
352packed_stream_open(svn_fs_fs__packed_number_stream_t **stream,
353                   apr_file_t *file,
354                   apr_off_t start,
355                   apr_off_t end,
356                   const char *stream_prefix,
357                   apr_size_t block_size,
358                   apr_pool_t *result_pool,
359                   apr_pool_t *scratch_pool)
360{
361  char buffer[STREAM_PREFIX_LEN + 1] = { 0 };
362  apr_size_t len = strlen(stream_prefix);
363  svn_fs_fs__packed_number_stream_t *result;
364
365  /* If this is violated, we forgot to adjust STREAM_PREFIX_LEN after
366   * changing the index header prefixes. */
367  SVN_ERR_ASSERT(len < sizeof(buffer));
368
369  /* Read the header prefix and compare it with the expected prefix */
370  SVN_ERR(svn_io_file_aligned_seek(file, block_size, NULL, start,
371                                   scratch_pool));
372  SVN_ERR(svn_io_file_read_full2(file, buffer, len, NULL, NULL,
373                                 scratch_pool));
374
375  if (strncmp(buffer, stream_prefix, len))
376    return svn_error_createf(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
377                             _("Index stream header prefix mismatch.\n"
378                               "  expected: %s"
379                               "  found: %s"), stream_prefix, buffer);
380
381  /* Construct the actual stream object. */
382  result = apr_palloc(result_pool, sizeof(*result));
383
384  result->pool = result_pool;
385  result->file = file;
386  result->stream_start = start + len;
387  result->stream_end = end;
388
389  result->used = 0;
390  result->current = 0;
391  result->start_offset = result->stream_start;
392  result->next_offset = result->stream_start;
393  result->block_size = block_size;
394
395  *stream = result;
396
397  return SVN_NO_ERROR;
398}
399
400/*
401 * The forced inline is required for performance reasons:  This is a very
402 * hot code path (called for every item we read) but e.g. GCC would rather
403 * chose to inline packed_stream_read() here, preventing packed_stream_get
404 * from being inlined itself.
405 */
406SVN__FORCE_INLINE
407static svn_error_t*
408packed_stream_get(apr_uint64_t *value,
409                  svn_fs_fs__packed_number_stream_t *stream)
410{
411  if (stream->current == stream->used)
412    SVN_ERR(packed_stream_read(stream));
413
414  *value = stream->buffer[stream->current].value;
415  ++stream->current;
416
417  return SVN_NO_ERROR;
418}
419
420/* Navigate STREAM to packed stream offset OFFSET.  There will be no checks
421 * whether the given OFFSET is valid.
422 */
423static void
424packed_stream_seek(svn_fs_fs__packed_number_stream_t *stream,
425                   apr_off_t offset)
426{
427  apr_off_t file_offset = offset + stream->stream_start;
428
429  if (   stream->used == 0
430      || offset < stream->start_offset
431      || offset >= stream->next_offset)
432    {
433      /* outside buffered data.  Next get() will read() from OFFSET. */
434      stream->start_offset = file_offset;
435      stream->next_offset = file_offset;
436      stream->current = 0;
437      stream->used = 0;
438    }
439  else
440    {
441      /* Find the suitable location in the stream buffer.
442       * Since our buffer is small, it is efficient enough to simply scan
443       * it for the desired position. */
444      apr_size_t i;
445      for (i = 0; i < stream->used; ++i)
446        if (stream->buffer[i].total_len > file_offset - stream->start_offset)
447          break;
448
449      stream->current = i;
450    }
451}
452
453/* Return the packed stream offset of at which the next number in the stream
454 * can be found.
455 */
456static apr_off_t
457packed_stream_offset(svn_fs_fs__packed_number_stream_t *stream)
458{
459  apr_off_t file_offset
460       = stream->current == 0
461       ? stream->start_offset
462       : stream->buffer[stream->current-1].total_len + stream->start_offset;
463
464  return file_offset - stream->stream_start;
465}
466
467/* Encode VALUE as 7/8b into P and return the number of bytes written.
468 * This will be used when _writing_ packed data.  packed_stream_* is for
469 * read operations only.
470 */
471static apr_size_t
472encode_uint(unsigned char *p, apr_uint64_t value)
473{
474  unsigned char *start = p;
475  while (value >= 0x80)
476    {
477      *p = (unsigned char)((value % 0x80) + 0x80);
478      value /= 0x80;
479      ++p;
480    }
481
482  *p = (unsigned char)(value % 0x80);
483  return (p - start) + 1;
484}
485
486/* Encode VALUE as 7/8b into P and return the number of bytes written.
487 * This maps signed ints onto unsigned ones.
488 */
489static apr_size_t
490encode_int(unsigned char *p, apr_int64_t value)
491{
492  return encode_uint(p, (apr_uint64_t)(value < 0 ? -1 - 2*value : 2*value));
493}
494
495/* Append VALUE to STREAM in 7/8b encoding.
496 */
497static svn_error_t *
498stream_write_encoded(svn_stream_t *stream,
499                     apr_uint64_t value)
500{
501  unsigned char encoded[ENCODED_INT_LENGTH];
502
503  apr_size_t len = encode_uint(encoded, value);
504  return svn_error_trace(svn_stream_write(stream, (char *)encoded, &len));
505}
506
507/* Map unsigned VALUE back to signed integer.
508 */
509static apr_int64_t
510decode_int(apr_uint64_t value)
511{
512  return (apr_int64_t)(value % 2 ? -1 - value / 2 : value / 2);
513}
514
515/* Write VALUE to the PROTO_INDEX file, using SCRATCH_POOL for temporary
516 * allocations.
517 *
518 * The point of this function is to ensure an architecture-independent
519 * proto-index file format.  All data is written as unsigned 64 bits ints
520 * in little endian byte order.  64 bits is the largest portable integer
521 * we have and unsigned values have well-defined conversions in C.
522 */
523static svn_error_t *
524write_uint64_to_proto_index(apr_file_t *proto_index,
525                            apr_uint64_t value,
526                            apr_pool_t *scratch_pool)
527{
528  apr_byte_t buffer[sizeof(value)];
529  int i;
530  apr_size_t written;
531
532  /* Split VALUE into 8 bytes using LE ordering. */
533  for (i = 0; i < sizeof(buffer); ++i)
534    {
535      /* Unsigned conversions are well-defined ... */
536      buffer[i] = (apr_byte_t)value;
537      value >>= CHAR_BIT;
538    }
539
540  /* Write it all to disk. */
541  SVN_ERR(svn_io_file_write_full(proto_index, buffer, sizeof(buffer),
542                                 &written, scratch_pool));
543  SVN_ERR_ASSERT(written == sizeof(buffer));
544
545  return SVN_NO_ERROR;
546}
547
548/* Read one unsigned 64 bit value from PROTO_INDEX file and return it in
549 * *VALUE_P.  If EOF is NULL, error out when trying to read beyond EOF.
550 * Use SCRATCH_POOL for temporary allocations.
551 *
552 * This function is the inverse to write_uint64_to_proto_index (see there),
553 * reading the external LE byte order and convert it into host byte order.
554 */
555static svn_error_t *
556read_uint64_from_proto_index(apr_file_t *proto_index,
557                             apr_uint64_t *value_p,
558                             svn_boolean_t *eof,
559                             apr_pool_t *scratch_pool)
560{
561  apr_byte_t buffer[sizeof(*value_p)];
562  apr_size_t bytes_read;
563
564  /* Read the full 8 bytes or our 64 bit value, unless we hit EOF.
565   * Assert that we never read partial values. */
566  SVN_ERR(svn_io_file_read_full2(proto_index, buffer, sizeof(buffer),
567                                 &bytes_read, eof, scratch_pool));
568  SVN_ERR_ASSERT((eof && *eof) || bytes_read == sizeof(buffer));
569
570  /* If we did not hit EOF, reconstruct the uint64 value and return it. */
571  if (!eof || !*eof)
572    {
573      int i;
574      apr_uint64_t value;
575
576      /* This could only overflow if CHAR_BIT had a value that is not
577       * a divisor of 64. */
578      value = 0;
579      for (i = sizeof(buffer) - 1; i >= 0; --i)
580        value = (value << CHAR_BIT) + buffer[i];
581
582      *value_p = value;
583    }
584
585  return SVN_NO_ERROR;
586}
587
588/* Convenience function similar to read_uint64_from_proto_index, but returns
589 * an uint32 value in VALUE_P.  Return an error if the value does not fit.
590 */
591static svn_error_t *
592read_uint32_from_proto_index(apr_file_t *proto_index,
593                             apr_uint32_t *value_p,
594                             svn_boolean_t *eof,
595                             apr_pool_t *scratch_pool)
596{
597  apr_uint64_t value;
598  SVN_ERR(read_uint64_from_proto_index(proto_index, &value, eof,
599                                       scratch_pool));
600  if (!eof || !*eof)
601    {
602      if (value > APR_UINT32_MAX)
603        return svn_error_createf(SVN_ERR_FS_INDEX_OVERFLOW, NULL,
604                                _("UINT32 0x%s too large, max = 0x%s"),
605                                apr_psprintf(scratch_pool,
606                                             "%" APR_UINT64_T_HEX_FMT,
607                                             value),
608                                apr_psprintf(scratch_pool,
609                                             "%" APR_UINT64_T_HEX_FMT,
610                                             (apr_uint64_t)APR_UINT32_MAX));
611
612      /* This conversion is not lossy because the value can be represented
613       * in the target type. */
614      *value_p = (apr_uint32_t)value;
615    }
616
617  return SVN_NO_ERROR;
618}
619
620/* Convenience function similar to read_uint64_from_proto_index, but returns
621 * an off_t value in VALUE_P.  Return an error if the value does not fit.
622 */
623static svn_error_t *
624read_off_t_from_proto_index(apr_file_t *proto_index,
625                            apr_off_t *value_p,
626                            svn_boolean_t *eof,
627                            apr_pool_t *scratch_pool)
628{
629  apr_uint64_t value;
630  SVN_ERR(read_uint64_from_proto_index(proto_index, &value, eof,
631                                       scratch_pool));
632  if (!eof || !*eof)
633    {
634      if (value > off_t_max)
635        return svn_error_createf(SVN_ERR_FS_INDEX_OVERFLOW, NULL,
636                                _("File offset 0x%s too large, max = 0x%s"),
637                                apr_psprintf(scratch_pool,
638                                             "%" APR_UINT64_T_HEX_FMT,
639                                             value),
640                                apr_psprintf(scratch_pool,
641                                             "%" APR_UINT64_T_HEX_FMT,
642                                             off_t_max));
643
644      /* Shortening conversion from unsigned to signed int is well-defined
645       * and not lossy in C because the value can be represented in the
646       * target type. */
647      *value_p = (apr_off_t)value;
648    }
649
650  return SVN_NO_ERROR;
651}
652
653/*
654 * log-to-phys index
655 */
656
657/* Append ENTRY to log-to-phys PROTO_INDEX file.
658 * Use SCRATCH_POOL for temporary allocations.
659 */
660static svn_error_t *
661write_l2p_entry_to_proto_index(apr_file_t *proto_index,
662                               l2p_proto_entry_t entry,
663                               apr_pool_t *scratch_pool)
664{
665  SVN_ERR(write_uint64_to_proto_index(proto_index, entry.offset,
666                                      scratch_pool));
667  SVN_ERR(write_uint64_to_proto_index(proto_index, entry.item_index,
668                                      scratch_pool));
669
670  return SVN_NO_ERROR;
671}
672
673/* Read *ENTRY from log-to-phys PROTO_INDEX file and indicate end-of-file
674 * in *EOF, or error out in that case if EOF is NULL.  *ENTRY is in an
675 * undefined state if an end-of-file occurred.
676 * Use SCRATCH_POOL for temporary allocations.
677 */
678static svn_error_t *
679read_l2p_entry_from_proto_index(apr_file_t *proto_index,
680                                l2p_proto_entry_t *entry,
681                                svn_boolean_t *eof,
682                                apr_pool_t *scratch_pool)
683{
684  SVN_ERR(read_uint64_from_proto_index(proto_index, &entry->offset, eof,
685                                       scratch_pool));
686  SVN_ERR(read_uint64_from_proto_index(proto_index, &entry->item_index, eof,
687                                       scratch_pool));
688
689  return SVN_NO_ERROR;
690}
691
692/* Write the log-2-phys index page description for the l2p_page_entry_t
693 * array ENTRIES, starting with element START up to but not including END.
694 * Write the resulting representation into BUFFER.  Use SCRATCH_POOL for
695 * temporary allocations.
696 */
697static svn_error_t *
698encode_l2p_page(apr_array_header_t *entries,
699                int start,
700                int end,
701                svn_spillbuf_t *buffer,
702                apr_pool_t *scratch_pool)
703{
704  unsigned char encoded[ENCODED_INT_LENGTH];
705  int i;
706  const apr_uint64_t *values = (const apr_uint64_t *)entries->elts;
707  apr_uint64_t last_value = 0;
708
709  /* encode items */
710  for (i = start; i < end; ++i)
711    {
712      apr_int64_t diff = values[i] - last_value;
713      last_value = values[i];
714      SVN_ERR(svn_spillbuf__write(buffer, (const char *)encoded,
715                                  encode_int(encoded, diff), scratch_pool));
716    }
717
718  return SVN_NO_ERROR;
719}
720
721svn_error_t *
722svn_fs_fs__l2p_proto_index_open(apr_file_t **proto_index,
723                                const char *file_name,
724                                apr_pool_t *result_pool)
725{
726  SVN_ERR(svn_io_file_open(proto_index, file_name, APR_READ | APR_WRITE
727                           | APR_CREATE | APR_APPEND | APR_BUFFERED,
728                           APR_OS_DEFAULT, result_pool));
729
730  return SVN_NO_ERROR;
731}
732
733svn_error_t *
734svn_fs_fs__l2p_proto_index_add_revision(apr_file_t *proto_index,
735                                        apr_pool_t *scratch_pool)
736{
737  l2p_proto_entry_t entry;
738  entry.offset = 0;
739  entry.item_index = 0;
740
741  return svn_error_trace(write_l2p_entry_to_proto_index(proto_index, entry,
742                                                        scratch_pool));
743}
744
745svn_error_t *
746svn_fs_fs__l2p_proto_index_add_entry(apr_file_t *proto_index,
747                                     apr_off_t offset,
748                                     apr_uint64_t item_index,
749                                     apr_pool_t *scratch_pool)
750{
751  l2p_proto_entry_t entry;
752
753  /* make sure the conversion to uint64 works */
754  SVN_ERR_ASSERT(offset >= -1);
755
756  /* we support offset '-1' as a "not used" indication */
757  entry.offset = (apr_uint64_t)offset + 1;
758
759  /* make sure we can use item_index as an array index when building the
760   * final index file */
761  SVN_ERR_ASSERT(item_index < UINT_MAX / 2);
762  entry.item_index = item_index;
763
764  return svn_error_trace(write_l2p_entry_to_proto_index(proto_index, entry,
765                                                        scratch_pool));
766}
767
768svn_error_t *
769svn_fs_fs__l2p_index_append(svn_checksum_t **checksum,
770                            svn_fs_t *fs,
771                            apr_file_t *index_file,
772                            const char *proto_file_name,
773                            svn_revnum_t revision,
774                            apr_pool_t * result_pool,
775                            apr_pool_t *scratch_pool)
776{
777  fs_fs_data_t *ffd = fs->fsap_data;
778  apr_file_t *proto_index = NULL;
779  svn_stream_t *stream;
780  int i;
781  apr_uint64_t entry;
782  svn_boolean_t eof = FALSE;
783
784  int last_page_count = 0;          /* total page count at the start of
785                                       the current revision */
786
787  /* temporary data structures that collect the data which will be moved
788     to the target file in a second step */
789  apr_pool_t *local_pool = svn_pool_create(scratch_pool);
790  apr_pool_t *iterpool = svn_pool_create(local_pool);
791  apr_array_header_t *page_counts
792    = apr_array_make(local_pool, 16, sizeof(apr_uint64_t));
793  apr_array_header_t *page_sizes
794    = apr_array_make(local_pool, 16, sizeof(apr_uint64_t));
795  apr_array_header_t *entry_counts
796    = apr_array_make(local_pool, 16, sizeof(apr_uint64_t));
797
798  /* collect the item offsets and sub-item value for the current revision */
799  apr_array_header_t *entries
800    = apr_array_make(local_pool, 256, sizeof(apr_uint64_t));
801
802  /* 64k blocks, spill after 16MB */
803  svn_spillbuf_t *buffer
804    = svn_spillbuf__create(0x10000, 0x1000000, local_pool);
805
806  /* Paranoia check that makes later casting to int32 safe.
807   * The current implementation is limited to 2G entries per page. */
808  if (ffd->l2p_page_size > APR_INT32_MAX)
809    return svn_error_createf(SVN_ERR_FS_INDEX_OVERFLOW , NULL,
810                            _("L2P index page size  %s"
811                              " exceeds current limit of 2G entries"),
812                            apr_psprintf(local_pool, "%" APR_UINT64_T_FMT,
813                                         ffd->l2p_page_size));
814
815  /* start at the beginning of the source file */
816  SVN_ERR(svn_io_file_open(&proto_index, proto_file_name,
817                           APR_READ | APR_CREATE | APR_BUFFERED,
818                           APR_OS_DEFAULT, scratch_pool));
819
820  /* process all entries until we fail due to EOF */
821  for (entry = 0; !eof; ++entry)
822    {
823      l2p_proto_entry_t proto_entry;
824
825      /* (attempt to) read the next entry from the source */
826      SVN_ERR(read_l2p_entry_from_proto_index(proto_index, &proto_entry,
827                                              &eof, local_pool));
828
829      /* handle new revision */
830      if ((entry > 0 && proto_entry.offset == 0) || eof)
831        {
832          /* dump entries, grouped into pages */
833
834          int entry_count = 0;
835          for (i = 0; i < entries->nelts; i += entry_count)
836            {
837              /* 1 page with up to L2P_PAGE_SIZE entries.
838               * fsfs.conf settings validation guarantees this to fit into
839               * our address space. */
840              apr_uint64_t last_buffer_size
841                = (apr_uint64_t)svn_spillbuf__get_size(buffer);
842
843              svn_pool_clear(iterpool);
844
845              entry_count = ffd->l2p_page_size < entries->nelts - i
846                          ? (int)ffd->l2p_page_size
847                          : entries->nelts - i;
848              SVN_ERR(encode_l2p_page(entries, i, i + entry_count,
849                                      buffer, iterpool));
850
851              APR_ARRAY_PUSH(entry_counts, apr_uint64_t) = entry_count;
852              APR_ARRAY_PUSH(page_sizes, apr_uint64_t)
853                = svn_spillbuf__get_size(buffer) - last_buffer_size;
854            }
855
856          apr_array_clear(entries);
857
858          /* store the number of pages in this revision */
859          APR_ARRAY_PUSH(page_counts, apr_uint64_t)
860            = page_sizes->nelts - last_page_count;
861
862          last_page_count = page_sizes->nelts;
863        }
864      else
865        {
866          int idx;
867
868          /* store the mapping in our array */
869          if (proto_entry.item_index > APR_INT32_MAX)
870            return svn_error_createf(SVN_ERR_FS_INDEX_OVERFLOW , NULL,
871                                    _("Item index %s too large "
872                                      "in l2p proto index for revision %ld"),
873                                    apr_psprintf(local_pool, "%" APR_UINT64_T_FMT,
874                                                 proto_entry.item_index),
875                                    revision + page_counts->nelts);
876
877          idx = (int)proto_entry.item_index;
878          while (idx >= entries->nelts)
879            APR_ARRAY_PUSH(entries, apr_uint64_t) = 0;
880
881          APR_ARRAY_IDX(entries, idx, apr_uint64_t) = proto_entry.offset;
882        }
883    }
884
885  /* close the source file */
886  SVN_ERR(svn_io_file_close(proto_index, local_pool));
887
888  /* Paranoia check that makes later casting to int32 safe.
889   * The current implementation is limited to 2G pages per index. */
890  if (page_counts->nelts > APR_INT32_MAX)
891    return svn_error_createf(SVN_ERR_FS_INDEX_OVERFLOW , NULL,
892                            _("L2P index page count  %d"
893                              " exceeds current limit of 2G pages"),
894                            page_counts->nelts);
895
896  /* open target stream. */
897  stream = svn_stream_checksummed2(svn_stream_from_aprfile2(index_file, TRUE,
898                                                            local_pool),
899                                   NULL, checksum, svn_checksum_md5, FALSE,
900                                   result_pool);
901
902
903  /* write header info */
904  SVN_ERR(svn_stream_puts(stream, L2P_STREAM_PREFIX));
905  SVN_ERR(stream_write_encoded(stream, revision));
906  SVN_ERR(stream_write_encoded(stream, ffd->l2p_page_size));
907  SVN_ERR(stream_write_encoded(stream, page_counts->nelts));
908  SVN_ERR(stream_write_encoded(stream, page_sizes->nelts));
909
910  /* write the revision table */
911  for (i = 0; i < page_counts->nelts; ++i)
912    {
913      apr_uint64_t value = APR_ARRAY_IDX(page_counts, i, apr_uint64_t);
914      SVN_ERR(stream_write_encoded(stream, value));
915    }
916
917  /* write the page table */
918  for (i = 0; i < page_sizes->nelts; ++i)
919    {
920      apr_uint64_t value = APR_ARRAY_IDX(page_sizes, i, apr_uint64_t);
921      SVN_ERR(stream_write_encoded(stream, value));
922      value = APR_ARRAY_IDX(entry_counts, i, apr_uint64_t);
923      SVN_ERR(stream_write_encoded(stream, value));
924    }
925
926  /* append page contents and implicitly close STREAM */
927  SVN_ERR(svn_stream_copy3(svn_stream__from_spillbuf(buffer, local_pool),
928                           stream, NULL, NULL, local_pool));
929
930  svn_pool_destroy(local_pool);
931
932  return SVN_NO_ERROR;
933}
934
935/* If REV_FILE->L2P_STREAM is NULL, create a new stream for the log-to-phys
936 * index for REVISION in FS and return it in REV_FILE.
937 */
938static svn_error_t *
939auto_open_l2p_index(svn_fs_fs__revision_file_t *rev_file,
940                    svn_fs_t *fs,
941                    svn_revnum_t revision)
942{
943  if (rev_file->l2p_stream == NULL)
944    {
945      fs_fs_data_t *ffd = fs->fsap_data;
946
947      SVN_ERR(svn_fs_fs__auto_read_footer(rev_file));
948      SVN_ERR(packed_stream_open(&rev_file->l2p_stream,
949                                 rev_file->file,
950                                 rev_file->l2p_offset,
951                                 rev_file->p2l_offset,
952                                 L2P_STREAM_PREFIX,
953                                 (apr_size_t)ffd->block_size,
954                                 rev_file->pool,
955                                 rev_file->pool));
956    }
957
958  return SVN_NO_ERROR;
959}
960
961/* Read the header data structure of the log-to-phys index for REVISION
962 * in FS and return it in *HEADER, allocated in RESULT_POOL.  Use REV_FILE
963 * to access on-disk data.  Use SCRATCH_POOL for temporary allocations.
964 */
965static svn_error_t *
966get_l2p_header_body(l2p_header_t **header,
967                    svn_fs_fs__revision_file_t *rev_file,
968                    svn_fs_t *fs,
969                    svn_revnum_t revision,
970                    apr_pool_t *result_pool,
971                    apr_pool_t *scratch_pool)
972{
973  fs_fs_data_t *ffd = fs->fsap_data;
974  apr_uint64_t value;
975  apr_size_t i;
976  apr_size_t page, page_count;
977  apr_off_t offset;
978  l2p_header_t *result = apr_pcalloc(result_pool, sizeof(*result));
979  apr_size_t page_table_index;
980  svn_revnum_t next_rev;
981
982  pair_cache_key_t key;
983  key.revision = rev_file->start_revision;
984  key.second = rev_file->is_packed;
985
986  SVN_ERR(auto_open_l2p_index(rev_file, fs, revision));
987  packed_stream_seek(rev_file->l2p_stream, 0);
988
989  /* Read the table sizes.  Check the data for plausibility and
990   * consistency with other bits. */
991  SVN_ERR(packed_stream_get(&value, rev_file->l2p_stream));
992  result->first_revision = (svn_revnum_t)value;
993  if (result->first_revision != rev_file->start_revision)
994    return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
995                  _("Index rev / pack file revision numbers do not match"));
996
997  SVN_ERR(packed_stream_get(&value, rev_file->l2p_stream));
998  result->page_size = (apr_uint32_t)value;
999  if (!result->page_size || (result->page_size & (result->page_size - 1)))
1000    return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
1001                            _("L2P index page size is not a power of two"));
1002
1003  SVN_ERR(packed_stream_get(&value, rev_file->l2p_stream));
1004  result->revision_count = (int)value;
1005  if (   result->revision_count != 1
1006      && result->revision_count != (apr_uint64_t)ffd->max_files_per_dir)
1007    return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
1008                            _("Invalid number of revisions in L2P index"));
1009
1010  SVN_ERR(packed_stream_get(&value, rev_file->l2p_stream));
1011  page_count = (apr_size_t)value;
1012  if (page_count < result->revision_count)
1013    return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
1014                            _("Fewer L2P index pages than revisions"));
1015  if (page_count > (rev_file->p2l_offset - rev_file->l2p_offset) / 2)
1016    return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
1017                            _("L2P index page count implausibly large"));
1018
1019  next_rev = result->first_revision + (svn_revnum_t)result->revision_count;
1020  if (result->first_revision > revision || next_rev <= revision)
1021    return svn_error_createf(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
1022                      _("Corrupt L2P index for r%ld only covers r%ld:%ld"),
1023                      revision, result->first_revision, next_rev);
1024
1025  /* allocate the page tables */
1026  result->page_table
1027    = apr_pcalloc(result_pool, page_count * sizeof(*result->page_table));
1028  result->page_table_index
1029    = apr_pcalloc(result_pool, (result->revision_count + 1)
1030                             * sizeof(*result->page_table_index));
1031
1032  /* read per-revision page table sizes (i.e. number of pages per rev) */
1033  page_table_index = 0;
1034  result->page_table_index[0] = page_table_index;
1035
1036  for (i = 0; i < result->revision_count; ++i)
1037    {
1038      SVN_ERR(packed_stream_get(&value, rev_file->l2p_stream));
1039      if (value == 0)
1040        return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
1041                                _("Revision with no L2P index pages"));
1042
1043      page_table_index += (apr_size_t)value;
1044      if (page_table_index > page_count)
1045        return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
1046                                _("L2P page table exceeded"));
1047
1048      result->page_table_index[i+1] = page_table_index;
1049    }
1050
1051  if (page_table_index != page_count)
1052    return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
1053                 _("Revisions do not cover the full L2P index page table"));
1054
1055  /* read actual page tables */
1056  for (page = 0; page < page_count; ++page)
1057    {
1058      SVN_ERR(packed_stream_get(&value, rev_file->l2p_stream));
1059      if (value == 0)
1060        return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
1061                                _("Empty L2P index page"));
1062
1063      result->page_table[page].size = (apr_uint32_t)value;
1064      SVN_ERR(packed_stream_get(&value, rev_file->l2p_stream));
1065      if (value > result->page_size)
1066        return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
1067                                _("Page exceeds L2P index page size"));
1068
1069      result->page_table[page].entry_count = (apr_uint32_t)value;
1070    }
1071
1072  /* correct the page description offsets */
1073  offset = packed_stream_offset(rev_file->l2p_stream);
1074  for (page = 0; page < page_count; ++page)
1075    {
1076      result->page_table[page].offset = offset;
1077      offset += result->page_table[page].size;
1078    }
1079
1080  /* return and cache the header */
1081  *header = result;
1082  SVN_ERR(svn_cache__set(ffd->l2p_header_cache, &key, result, scratch_pool));
1083
1084  return SVN_NO_ERROR;
1085}
1086
1087/* Data structure that describes which l2p page info shall be extracted
1088 * from the cache and contains the fields that receive the result.
1089 */
1090typedef struct l2p_page_info_baton_t
1091{
1092  /* input data: we want the page covering (REVISION,ITEM_INDEX) */
1093  svn_revnum_t revision;
1094  apr_uint64_t item_index;
1095
1096  /* out data */
1097  /* page location and size of the page within the l2p index file */
1098  l2p_page_table_entry_t entry;
1099
1100  /* page number within the pages for REVISION (not l2p index global!) */
1101  apr_uint32_t page_no;
1102
1103  /* offset of ITEM_INDEX within that page */
1104  apr_uint32_t page_offset;
1105
1106  /* revision identifying the l2p index file, also the first rev in that */
1107  svn_revnum_t first_revision;
1108} l2p_page_info_baton_t;
1109
1110
1111/* Utility function that copies the info requested by BATON->REVISION and
1112 * BATON->ITEM_INDEX and from HEADER and PAGE_TABLE into the output fields
1113 * of *BATON.  Use SCRATCH_POOL for temporary allocations.
1114 */
1115static svn_error_t *
1116l2p_page_info_copy(l2p_page_info_baton_t *baton,
1117                   const l2p_header_t *header,
1118                   const l2p_page_table_entry_t *page_table,
1119                   const apr_size_t *page_table_index,
1120                   apr_pool_t *scratch_pool)
1121{
1122  /* revision offset within the index file */
1123  apr_size_t rel_revision = baton->revision - header->first_revision;
1124  if (rel_revision >= header->revision_count)
1125    return svn_error_createf(SVN_ERR_FS_INDEX_REVISION , NULL,
1126                             _("Revision %ld not covered by item index"),
1127                             baton->revision);
1128
1129  /* select the relevant page */
1130  if (baton->item_index < header->page_size)
1131    {
1132      /* most revs fit well into a single page */
1133      baton->page_offset = (apr_uint32_t)baton->item_index;
1134      baton->page_no = 0;
1135      baton->entry = page_table[page_table_index[rel_revision]];
1136    }
1137  else
1138    {
1139      const l2p_page_table_entry_t *first_entry;
1140      const l2p_page_table_entry_t *last_entry;
1141      apr_uint64_t max_item_index;
1142
1143      /* range of pages for this rev */
1144      first_entry = page_table + page_table_index[rel_revision];
1145      last_entry = page_table + page_table_index[rel_revision + 1];
1146
1147      /* do we hit a valid index page? */
1148      max_item_index =   (apr_uint64_t)header->page_size
1149                       * (last_entry - first_entry);
1150      if (baton->item_index >= max_item_index)
1151        return svn_error_createf(SVN_ERR_FS_INDEX_OVERFLOW , NULL,
1152                                _("Item index %s exceeds l2p limit "
1153                                  "of %s for revision %ld"),
1154                                apr_psprintf(scratch_pool,
1155                                             "%" APR_UINT64_T_FMT,
1156                                             baton->item_index),
1157                                apr_psprintf(scratch_pool,
1158                                             "%" APR_UINT64_T_FMT,
1159                                             max_item_index),
1160                                baton->revision);
1161
1162      /* all pages are of the same size and full, except for the last one */
1163      baton->page_offset = (apr_uint32_t)(baton->item_index % header->page_size);
1164      baton->page_no = (apr_uint32_t)(baton->item_index / header->page_size);
1165      baton->entry = first_entry[baton->page_no];
1166    }
1167
1168  baton->first_revision = header->first_revision;
1169
1170  return SVN_NO_ERROR;
1171}
1172
1173/* Implement svn_cache__partial_getter_func_t: copy the data requested in
1174 * l2p_page_info_baton_t *BATON from l2p_header_t *DATA into the output
1175 * fields in *BATON.
1176 */
1177static svn_error_t *
1178l2p_page_info_access_func(void **out,
1179                          const void *data,
1180                          apr_size_t data_len,
1181                          void *baton,
1182                          apr_pool_t *result_pool)
1183{
1184  /* resolve all pointer values of in-cache data */
1185  const l2p_header_t *header = data;
1186  const l2p_page_table_entry_t *page_table
1187    = svn_temp_deserializer__ptr(header,
1188                                 (const void *const *)&header->page_table);
1189  const apr_size_t *page_table_index
1190    = svn_temp_deserializer__ptr(header,
1191                           (const void *const *)&header->page_table_index);
1192
1193  /* copy the info */
1194  return l2p_page_info_copy(baton, header, page_table, page_table_index,
1195                            result_pool);
1196}
1197
1198/* Get the page info requested in *BATON from FS and set the output fields
1199 * in *BATON.  Use REV_FILE for on-disk file access.
1200 * Use SCRATCH_POOL for temporary allocations.
1201 */
1202static svn_error_t *
1203get_l2p_page_info(l2p_page_info_baton_t *baton,
1204                  svn_fs_fs__revision_file_t *rev_file,
1205                  svn_fs_t *fs,
1206                  apr_pool_t *scratch_pool)
1207{
1208  fs_fs_data_t *ffd = fs->fsap_data;
1209  l2p_header_t *result;
1210  svn_boolean_t is_cached = FALSE;
1211  void *dummy = NULL;
1212
1213  /* try to find the info in the cache */
1214  pair_cache_key_t key;
1215  key.revision = rev_file->start_revision;
1216  key.second = rev_file->is_packed;
1217  SVN_ERR(svn_cache__get_partial((void**)&dummy, &is_cached,
1218                                 ffd->l2p_header_cache, &key,
1219                                 l2p_page_info_access_func, baton,
1220                                 scratch_pool));
1221  if (is_cached)
1222    return SVN_NO_ERROR;
1223
1224  /* read from disk, cache and copy the result */
1225  SVN_ERR(get_l2p_header_body(&result, rev_file, fs, baton->revision,
1226                              scratch_pool, scratch_pool));
1227  SVN_ERR(l2p_page_info_copy(baton, result, result->page_table,
1228                             result->page_table_index, scratch_pool));
1229
1230  return SVN_NO_ERROR;
1231}
1232
1233/* Data request structure used by l2p_page_table_access_func.
1234 */
1235typedef struct l2p_page_table_baton_t
1236{
1237  /* revision for which to read the page table */
1238  svn_revnum_t revision;
1239
1240  /* page table entries (of type l2p_page_table_entry_t).
1241   * Must be created by caller and will be filled by callee. */
1242  apr_array_header_t *pages;
1243} l2p_page_table_baton_t;
1244
1245/* Implement svn_cache__partial_getter_func_t: copy the data requested in
1246 * l2p_page_baton_t *BATON from l2p_page_t *DATA into BATON->PAGES and *OUT.
1247 */
1248static svn_error_t *
1249l2p_page_table_access_func(void **out,
1250                           const void *data,
1251                           apr_size_t data_len,
1252                           void *baton,
1253                           apr_pool_t *result_pool)
1254{
1255  /* resolve in-cache pointers */
1256  l2p_page_table_baton_t *table_baton = baton;
1257  const l2p_header_t *header = (const l2p_header_t *)data;
1258  const l2p_page_table_entry_t *page_table
1259    = svn_temp_deserializer__ptr(header,
1260                                 (const void *const *)&header->page_table);
1261  const apr_size_t *page_table_index
1262    = svn_temp_deserializer__ptr(header,
1263                           (const void *const *)&header->page_table_index);
1264
1265  /* copy the revision's page table into BATON */
1266  apr_size_t rel_revision = table_baton->revision - header->first_revision;
1267  if (rel_revision < header->revision_count)
1268    {
1269      const l2p_page_table_entry_t *entry
1270        = page_table + page_table_index[rel_revision];
1271      const l2p_page_table_entry_t *last_entry
1272        = page_table + page_table_index[rel_revision + 1];
1273
1274      for (; entry < last_entry; ++entry)
1275        APR_ARRAY_PUSH(table_baton->pages, l2p_page_table_entry_t)
1276          = *entry;
1277    }
1278
1279  /* set output as a courtesy to the caller */
1280  *out = table_baton->pages;
1281
1282  return SVN_NO_ERROR;
1283}
1284
1285/* Read the l2p index page table for REVISION in FS from cache and return
1286 * it in PAGES.  The later must be provided by the caller (and can be
1287 * re-used); existing entries will be removed before writing the result.
1288 * If the data cannot be found in the cache, the result will be empty
1289 * (it never can be empty for a valid REVISION if the data is cached).
1290 * Use the info from REV_FILE to determine pack / rev file properties.
1291 * Use SCRATCH_POOL for temporary allocations.
1292 */
1293static svn_error_t *
1294get_l2p_page_table(apr_array_header_t *pages,
1295                   svn_fs_t *fs,
1296                   svn_fs_fs__revision_file_t *rev_file,
1297                   svn_revnum_t revision,
1298                   apr_pool_t *scratch_pool)
1299{
1300  fs_fs_data_t *ffd = fs->fsap_data;
1301  svn_boolean_t is_cached = FALSE;
1302  l2p_page_table_baton_t baton;
1303
1304  pair_cache_key_t key;
1305  key.revision = rev_file->start_revision;
1306  key.second = rev_file->is_packed;
1307
1308  apr_array_clear(pages);
1309  baton.revision = revision;
1310  baton.pages = pages;
1311  SVN_ERR(svn_cache__get_partial((void**)&pages, &is_cached,
1312                                 ffd->l2p_header_cache, &key,
1313                                 l2p_page_table_access_func, &baton,
1314                                 scratch_pool));
1315
1316  return SVN_NO_ERROR;
1317}
1318
1319/* From the log-to-phys index file starting at START_REVISION in FS, read
1320 * the mapping page identified by TABLE_ENTRY and return it in *PAGE.
1321 * Use REV_FILE to access on-disk files.
1322 * Use RESULT_POOL for allocations.
1323 */
1324static svn_error_t *
1325get_l2p_page(l2p_page_t **page,
1326             svn_fs_fs__revision_file_t *rev_file,
1327             svn_fs_t *fs,
1328             svn_revnum_t start_revision,
1329             l2p_page_table_entry_t *table_entry,
1330             apr_pool_t *result_pool)
1331{
1332  apr_uint32_t i;
1333  l2p_page_t *result = apr_pcalloc(result_pool, sizeof(*result));
1334  apr_uint64_t last_value = 0;
1335
1336  /* open index file and select page */
1337  SVN_ERR(auto_open_l2p_index(rev_file, fs, start_revision));
1338  packed_stream_seek(rev_file->l2p_stream, table_entry->offset);
1339
1340  /* initialize the page content */
1341  result->entry_count = table_entry->entry_count;
1342  result->offsets = apr_pcalloc(result_pool, result->entry_count
1343                                           * sizeof(*result->offsets));
1344
1345  /* read all page entries (offsets in rev file and container sub-items) */
1346  for (i = 0; i < result->entry_count; ++i)
1347    {
1348      apr_uint64_t value = 0;
1349      SVN_ERR(packed_stream_get(&value, rev_file->l2p_stream));
1350      last_value += decode_int(value);
1351      result->offsets[i] = last_value - 1;
1352    }
1353
1354  /* After reading all page entries, the read cursor must have moved by
1355   * TABLE_ENTRY->SIZE bytes. */
1356  if (   packed_stream_offset(rev_file->l2p_stream)
1357      != table_entry->offset + table_entry->size)
1358    return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
1359                _("L2P actual page size does not match page table value."));
1360
1361  *page = result;
1362
1363  return SVN_NO_ERROR;
1364}
1365
1366/* Utility function.  Read the l2p index pages for REVISION in FS from
1367 * REV_FILE and put them into the cache.  Skip page number EXLCUDED_PAGE_NO
1368 * (use -1 for 'skip none') and pages outside the MIN_OFFSET, MAX_OFFSET
1369 * range in the l2p index file.  The index is being identified by
1370 * FIRST_REVISION.  PAGES is a scratch container provided by the caller.
1371 * SCRATCH_POOL is used for temporary allocations.
1372 *
1373 * This function may be a no-op if the header cache lookup fails / misses.
1374 */
1375static svn_error_t *
1376prefetch_l2p_pages(svn_boolean_t *end,
1377                   svn_fs_t *fs,
1378                   svn_fs_fs__revision_file_t *rev_file,
1379                   svn_revnum_t first_revision,
1380                   svn_revnum_t revision,
1381                   apr_array_header_t *pages,
1382                   int exlcuded_page_no,
1383                   apr_off_t min_offset,
1384                   apr_off_t max_offset,
1385                   apr_pool_t *scratch_pool)
1386{
1387  fs_fs_data_t *ffd = fs->fsap_data;
1388  int i;
1389  apr_pool_t *iterpool;
1390  svn_fs_fs__page_cache_key_t key = { 0 };
1391
1392  /* Parameter check. */
1393  if (min_offset < 0)
1394    min_offset = 0;
1395
1396  if (max_offset <= 0)
1397    {
1398      /* Nothing to do */
1399      *end = TRUE;
1400      return SVN_NO_ERROR;
1401    }
1402
1403  /* get the page table for REVISION from cache */
1404  *end = FALSE;
1405  SVN_ERR(get_l2p_page_table(pages, fs, rev_file, revision, scratch_pool));
1406  if (pages->nelts == 0 || rev_file->l2p_stream == NULL)
1407    {
1408      /* not found -> we can't continue without hitting the disk again */
1409      *end = TRUE;
1410      return SVN_NO_ERROR;
1411    }
1412
1413  /* prefetch pages individually until all are done or we found one in
1414   * the cache */
1415  iterpool = svn_pool_create(scratch_pool);
1416  assert(revision <= APR_UINT32_MAX);
1417  key.revision = (apr_uint32_t)revision;
1418  key.is_packed = rev_file->is_packed;
1419
1420  for (i = 0; i < pages->nelts && !*end; ++i)
1421    {
1422      svn_boolean_t is_cached;
1423
1424      l2p_page_table_entry_t *entry
1425        = &APR_ARRAY_IDX(pages, i, l2p_page_table_entry_t);
1426      svn_pool_clear(iterpool);
1427
1428      if (i == exlcuded_page_no)
1429        continue;
1430
1431      /* skip pages outside the specified index file range */
1432      if (   entry->offset < (apr_uint64_t)min_offset
1433          || entry->offset + entry->size > (apr_uint64_t)max_offset)
1434        {
1435          *end = TRUE;
1436          continue;
1437        }
1438
1439      /* page already in cache? */
1440      key.page = i;
1441      SVN_ERR(svn_cache__has_key(&is_cached, ffd->l2p_page_cache,
1442                                 &key, iterpool));
1443      if (!is_cached)
1444        {
1445          /* no in cache -> read from stream (data already buffered in APR)
1446           * and cache the result */
1447          l2p_page_t *page = NULL;
1448          SVN_ERR(get_l2p_page(&page, rev_file, fs, first_revision, entry,
1449                               iterpool));
1450
1451          SVN_ERR(svn_cache__set(ffd->l2p_page_cache, &key, page,
1452                                 iterpool));
1453        }
1454    }
1455
1456  svn_pool_destroy(iterpool);
1457
1458  return SVN_NO_ERROR;
1459}
1460
1461/* Request data structure for l2p_entry_access_func.
1462 */
1463typedef struct l2p_entry_baton_t
1464{
1465  /* in data */
1466  /* revision. Used for error messages only */
1467  svn_revnum_t revision;
1468
1469  /* item index to look up. Used for error messages only */
1470  apr_uint64_t item_index;
1471
1472  /* offset within the cached page */
1473  apr_uint32_t page_offset;
1474
1475  /* out data */
1476  /* absolute item or container offset in rev / pack file */
1477  apr_uint64_t offset;
1478} l2p_entry_baton_t;
1479
1480/* Return the rev / pack file offset of the item at BATON->PAGE_OFFSET in
1481 * OFFSETS of PAGE and write it to *OFFSET.
1482 */
1483static svn_error_t *
1484l2p_page_get_entry(l2p_entry_baton_t *baton,
1485                   const l2p_page_t *page,
1486                   const apr_uint64_t *offsets,
1487                   apr_pool_t *scratch_pool)
1488{
1489  /* overflow check */
1490  if (page->entry_count <= baton->page_offset)
1491    return svn_error_createf(SVN_ERR_FS_INDEX_OVERFLOW , NULL,
1492                             _("Item index %s"
1493                               " too large in revision %ld"),
1494                             apr_psprintf(scratch_pool, "%" APR_UINT64_T_FMT,
1495                                          baton->item_index),
1496                             baton->revision);
1497
1498  /* return the result */
1499  baton->offset = offsets[baton->page_offset];
1500
1501  return SVN_NO_ERROR;
1502}
1503
1504/* Implement svn_cache__partial_getter_func_t: copy the data requested in
1505 * l2p_entry_baton_t *BATON from l2p_page_t *DATA into BATON->OFFSET.
1506 * *OUT remains unchanged.
1507 */
1508static svn_error_t *
1509l2p_entry_access_func(void **out,
1510                      const void *data,
1511                      apr_size_t data_len,
1512                      void *baton,
1513                      apr_pool_t *result_pool)
1514{
1515  /* resolve all in-cache pointers */
1516  const l2p_page_t *page = data;
1517  const apr_uint64_t *offsets
1518    = svn_temp_deserializer__ptr(page, (const void *const *)&page->offsets);
1519
1520  /* return the requested data */
1521  return l2p_page_get_entry(baton, page, offsets, result_pool);
1522}
1523
1524/* Using the log-to-phys indexes in FS, find the absolute offset in the
1525 * rev file for (REVISION, ITEM_INDEX) and return it in *OFFSET.
1526 * Use SCRATCH_POOL for temporary allocations.
1527 */
1528static svn_error_t *
1529l2p_index_lookup(apr_off_t *offset,
1530                 svn_fs_t *fs,
1531                 svn_fs_fs__revision_file_t *rev_file,
1532                 svn_revnum_t revision,
1533                 apr_uint64_t item_index,
1534                 apr_pool_t *scratch_pool)
1535{
1536  fs_fs_data_t *ffd = fs->fsap_data;
1537  l2p_page_info_baton_t info_baton;
1538  l2p_entry_baton_t page_baton;
1539  l2p_page_t *page = NULL;
1540  svn_fs_fs__page_cache_key_t key = { 0 };
1541  svn_boolean_t is_cached = FALSE;
1542  void *dummy = NULL;
1543
1544  /* read index master data structure and extract the info required to
1545   * access the l2p index page for (REVISION,ITEM_INDEX)*/
1546  info_baton.revision = revision;
1547  info_baton.item_index = item_index;
1548  SVN_ERR(get_l2p_page_info(&info_baton, rev_file, fs, scratch_pool));
1549
1550  /* try to find the page in the cache and get the OFFSET from it */
1551  page_baton.revision = revision;
1552  page_baton.item_index = item_index;
1553  page_baton.page_offset = info_baton.page_offset;
1554
1555  assert(revision <= APR_UINT32_MAX);
1556  key.revision = (apr_uint32_t)revision;
1557  key.is_packed = svn_fs_fs__is_packed_rev(fs, revision);
1558  key.page = info_baton.page_no;
1559
1560  SVN_ERR(svn_cache__get_partial(&dummy, &is_cached,
1561                                 ffd->l2p_page_cache, &key,
1562                                 l2p_entry_access_func, &page_baton,
1563                                 scratch_pool));
1564
1565  if (!is_cached)
1566    {
1567      /* we need to read the info from disk (might already be in the
1568       * APR file buffer, though) */
1569      apr_array_header_t *pages;
1570      svn_revnum_t prefetch_revision;
1571      svn_revnum_t last_revision
1572        = info_baton.first_revision
1573          + (key.is_packed ? ffd->max_files_per_dir : 1);
1574      svn_boolean_t end;
1575      apr_off_t max_offset
1576        = APR_ALIGN(info_baton.entry.offset + info_baton.entry.size,
1577                    ffd->block_size);
1578      apr_off_t min_offset = max_offset - ffd->block_size;
1579
1580      /* read the relevant page */
1581      SVN_ERR(get_l2p_page(&page, rev_file, fs, info_baton.first_revision,
1582                           &info_baton.entry, scratch_pool));
1583
1584      /* cache the page and extract the result we need */
1585      SVN_ERR(svn_cache__set(ffd->l2p_page_cache, &key, page, scratch_pool));
1586      SVN_ERR(l2p_page_get_entry(&page_baton, page, page->offsets,
1587                                 scratch_pool));
1588
1589      if (ffd->use_block_read)
1590        {
1591          apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1592
1593          /* prefetch pages from following and preceding revisions */
1594          pages = apr_array_make(scratch_pool, 16,
1595                                 sizeof(l2p_page_table_entry_t));
1596          end = FALSE;
1597          for (prefetch_revision = revision;
1598              prefetch_revision < last_revision && !end;
1599              ++prefetch_revision)
1600            {
1601              int excluded_page_no = prefetch_revision == revision
1602                                  ? info_baton.page_no
1603                                  : -1;
1604              svn_pool_clear(iterpool);
1605
1606              SVN_ERR(prefetch_l2p_pages(&end, fs, rev_file,
1607                                        info_baton.first_revision,
1608                                        prefetch_revision, pages,
1609                                        excluded_page_no, min_offset,
1610                                        max_offset, iterpool));
1611            }
1612
1613          end = FALSE;
1614          for (prefetch_revision = revision-1;
1615              prefetch_revision >= info_baton.first_revision && !end;
1616              --prefetch_revision)
1617            {
1618              svn_pool_clear(iterpool);
1619
1620              SVN_ERR(prefetch_l2p_pages(&end, fs, rev_file,
1621                                        info_baton.first_revision,
1622                                        prefetch_revision, pages, -1,
1623                                        min_offset, max_offset, iterpool));
1624            }
1625
1626          svn_pool_destroy(iterpool);
1627        }
1628    }
1629
1630  *offset = page_baton.offset;
1631
1632  return SVN_NO_ERROR;
1633}
1634
1635/* Using the log-to-phys proto index in transaction TXN_ID in FS, find the
1636 * absolute offset in the proto rev file for the given ITEM_INDEX and return
1637 * it in *OFFSET.  Use SCRATCH_POOL for temporary allocations.
1638 */
1639static svn_error_t *
1640l2p_proto_index_lookup(apr_off_t *offset,
1641                       svn_fs_t *fs,
1642                       const svn_fs_fs__id_part_t *txn_id,
1643                       apr_uint64_t item_index,
1644                       apr_pool_t *scratch_pool)
1645{
1646  svn_boolean_t eof = FALSE;
1647  apr_file_t *file = NULL;
1648  SVN_ERR(svn_io_file_open(&file,
1649                           svn_fs_fs__path_l2p_proto_index(fs, txn_id,
1650                                                           scratch_pool),
1651                           APR_READ | APR_BUFFERED, APR_OS_DEFAULT,
1652                           scratch_pool));
1653
1654  /* process all entries until we fail due to EOF */
1655  *offset = -1;
1656  while (!eof)
1657    {
1658      l2p_proto_entry_t entry;
1659
1660      /* (attempt to) read the next entry from the source */
1661      SVN_ERR(read_l2p_entry_from_proto_index(file, &entry, &eof,
1662                                              scratch_pool));
1663
1664      /* handle new revision */
1665      if (!eof && entry.item_index == item_index)
1666        {
1667          *offset = (apr_off_t)entry.offset - 1;
1668          break;
1669        }
1670    }
1671
1672  SVN_ERR(svn_io_file_close(file, scratch_pool));
1673
1674  return SVN_NO_ERROR;
1675}
1676
1677/* Read the log-to-phys header info of the index covering REVISION from FS
1678 * and return it in *HEADER.  REV_FILE provides the pack / rev status.
1679 * Allocate *HEADER in RESULT_POOL, use SCRATCH_POOL for temporary
1680 * allocations.
1681 */
1682static svn_error_t *
1683get_l2p_header(l2p_header_t **header,
1684               svn_fs_fs__revision_file_t *rev_file,
1685               svn_fs_t *fs,
1686               svn_revnum_t revision,
1687               apr_pool_t *result_pool,
1688               apr_pool_t *scratch_pool)
1689{
1690  fs_fs_data_t *ffd = fs->fsap_data;
1691  svn_boolean_t is_cached = FALSE;
1692
1693  /* first, try cache lookop */
1694  pair_cache_key_t key;
1695  key.revision = rev_file->start_revision;
1696  key.second = rev_file->is_packed;
1697  SVN_ERR(svn_cache__get((void**)header, &is_cached, ffd->l2p_header_cache,
1698                         &key, result_pool));
1699  if (is_cached)
1700    return SVN_NO_ERROR;
1701
1702  /* read from disk and cache the result */
1703  SVN_ERR(get_l2p_header_body(header, rev_file, fs, revision, result_pool,
1704                              scratch_pool));
1705
1706  return SVN_NO_ERROR;
1707}
1708
1709svn_error_t *
1710svn_fs_fs__l2p_get_max_ids(apr_array_header_t **max_ids,
1711                           svn_fs_t *fs,
1712                           svn_revnum_t start_rev,
1713                           apr_size_t count,
1714                           apr_pool_t *result_pool,
1715                           apr_pool_t *scratch_pool)
1716{
1717  l2p_header_t *header = NULL;
1718  svn_revnum_t revision;
1719  svn_revnum_t last_rev = (svn_revnum_t)(start_rev + count);
1720  svn_fs_fs__revision_file_t *rev_file;
1721  apr_pool_t *header_pool = svn_pool_create(scratch_pool);
1722
1723  /* read index master data structure for the index covering START_REV */
1724  SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, start_rev,
1725                                           header_pool, header_pool));
1726  SVN_ERR(get_l2p_header(&header, rev_file, fs, start_rev, header_pool,
1727                         header_pool));
1728  SVN_ERR(svn_fs_fs__close_revision_file(rev_file));
1729
1730  /* Determine the length of the item index list for each rev.
1731   * Read new index headers as required. */
1732  *max_ids = apr_array_make(result_pool, (int)count, sizeof(apr_uint64_t));
1733  for (revision = start_rev; revision < last_rev; ++revision)
1734    {
1735      apr_uint64_t full_page_count;
1736      apr_uint64_t item_count;
1737      apr_size_t first_page_index, last_page_index;
1738
1739      if (revision - header->first_revision >= header->revision_count)
1740        {
1741          /* need to read the next index. Clear up memory used for the
1742           * previous one.  Note that intermittent pack runs do not change
1743           * the number of items in a revision, i.e. there is no consistency
1744           * issue here. */
1745          svn_pool_clear(header_pool);
1746          SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, revision,
1747                                                  header_pool, header_pool));
1748          SVN_ERR(get_l2p_header(&header, rev_file, fs, revision,
1749                                 header_pool, header_pool));
1750          SVN_ERR(svn_fs_fs__close_revision_file(rev_file));
1751        }
1752
1753      /* in a revision with N index pages, the first N-1 index pages are
1754       * "full", i.e. contain HEADER->PAGE_SIZE entries */
1755      first_page_index
1756         = header->page_table_index[revision - header->first_revision];
1757      last_page_index
1758         = header->page_table_index[revision - header->first_revision + 1];
1759      full_page_count = last_page_index - first_page_index - 1;
1760      item_count = full_page_count * header->page_size
1761                 + header->page_table[last_page_index - 1].entry_count;
1762
1763      APR_ARRAY_PUSH(*max_ids, apr_uint64_t) = item_count;
1764    }
1765
1766  svn_pool_destroy(header_pool);
1767  return SVN_NO_ERROR;
1768}
1769
1770svn_error_t *
1771svn_fs_fs__item_offset(apr_off_t *absolute_position,
1772                       svn_fs_t *fs,
1773                       svn_fs_fs__revision_file_t *rev_file,
1774                       svn_revnum_t revision,
1775                       const svn_fs_fs__id_part_t *txn_id,
1776                       apr_uint64_t item_index,
1777                       apr_pool_t *scratch_pool)
1778{
1779  svn_error_t *err = SVN_NO_ERROR;
1780  if (txn_id)
1781    {
1782      if (svn_fs_fs__use_log_addressing(fs))
1783        {
1784          /* the txn is going to produce a rev with logical addressing.
1785             So, we need to get our info from the (proto) index file. */
1786          SVN_ERR(l2p_proto_index_lookup(absolute_position, fs, txn_id,
1787                                         item_index, scratch_pool));
1788        }
1789      else
1790        {
1791          /* for data in txns, item_index *is* the offset */
1792          *absolute_position = item_index;
1793        }
1794    }
1795  else if (svn_fs_fs__use_log_addressing(fs))
1796    {
1797      /* ordinary index lookup */
1798      SVN_ERR(l2p_index_lookup(absolute_position, fs, rev_file, revision,
1799                               item_index, scratch_pool));
1800    }
1801  else if (rev_file->is_packed)
1802    {
1803      /* pack file with physical addressing */
1804      apr_off_t rev_offset;
1805      SVN_ERR(svn_fs_fs__get_packed_offset(&rev_offset, fs, revision,
1806                                           scratch_pool));
1807      *absolute_position = rev_offset + item_index;
1808    }
1809  else
1810    {
1811      /* for non-packed revs with physical addressing,
1812         item_index *is* the offset */
1813      *absolute_position = item_index;
1814    }
1815
1816  return svn_error_trace(err);
1817}
1818
1819/*
1820 * phys-to-log index
1821 */
1822svn_error_t *
1823svn_fs_fs__p2l_proto_index_open(apr_file_t **proto_index,
1824                                const char *file_name,
1825                                apr_pool_t *result_pool)
1826{
1827  SVN_ERR(svn_io_file_open(proto_index, file_name, APR_READ | APR_WRITE
1828                           | APR_CREATE | APR_APPEND | APR_BUFFERED,
1829                           APR_OS_DEFAULT, result_pool));
1830
1831  return SVN_NO_ERROR;
1832}
1833
1834
1835svn_error_t *
1836svn_fs_fs__p2l_proto_index_add_entry(apr_file_t *proto_index,
1837                                     const svn_fs_fs__p2l_entry_t *entry,
1838                                     apr_pool_t *scratch_pool)
1839{
1840  apr_uint64_t revision;
1841
1842  /* Make sure all signed elements of ENTRY have non-negative values.
1843   *
1844   * For file offsets and sizes, this is a given as we use them to describe
1845   * absolute positions and sizes.  For revisions, SVN_INVALID_REVNUM is
1846   * valid, hence we have to shift it by 1.
1847   */
1848  SVN_ERR_ASSERT(entry->offset >= 0);
1849  SVN_ERR_ASSERT(entry->size >= 0);
1850  SVN_ERR_ASSERT(   entry->item.revision >= 0
1851                 || entry->item.revision == SVN_INVALID_REVNUM);
1852
1853  revision = entry->item.revision == SVN_INVALID_REVNUM
1854           ? 0
1855           : ((apr_uint64_t)entry->item.revision + 1);
1856
1857  /* Now, all values will nicely convert to uint64. */
1858  /* Make sure to keep P2L_PROTO_INDEX_ENTRY_SIZE consistent with this: */
1859
1860  SVN_ERR(write_uint64_to_proto_index(proto_index, entry->offset,
1861                                      scratch_pool));
1862  SVN_ERR(write_uint64_to_proto_index(proto_index, entry->size,
1863                                      scratch_pool));
1864  SVN_ERR(write_uint64_to_proto_index(proto_index, entry->type,
1865                                      scratch_pool));
1866  SVN_ERR(write_uint64_to_proto_index(proto_index, entry->fnv1_checksum,
1867                                      scratch_pool));
1868  SVN_ERR(write_uint64_to_proto_index(proto_index, revision,
1869                                      scratch_pool));
1870  SVN_ERR(write_uint64_to_proto_index(proto_index, entry->item.number,
1871                                      scratch_pool));
1872
1873  return SVN_NO_ERROR;
1874}
1875
1876/* Read *ENTRY from log-to-phys PROTO_INDEX file and indicate end-of-file
1877 * in *EOF, or error out in that case if EOF is NULL.  *ENTRY is in an
1878 * undefined state if an end-of-file occurred.
1879 * Use SCRATCH_POOL for temporary allocations.
1880 */
1881static svn_error_t *
1882read_p2l_entry_from_proto_index(apr_file_t *proto_index,
1883                                svn_fs_fs__p2l_entry_t *entry,
1884                                svn_boolean_t *eof,
1885                                apr_pool_t *scratch_pool)
1886{
1887  apr_uint64_t revision;
1888
1889  SVN_ERR(read_off_t_from_proto_index(proto_index, &entry->offset,
1890                                      eof, scratch_pool));
1891  SVN_ERR(read_off_t_from_proto_index(proto_index, &entry->size,
1892                                      eof, scratch_pool));
1893  SVN_ERR(read_uint32_from_proto_index(proto_index, &entry->type,
1894                                       eof, scratch_pool));
1895  SVN_ERR(read_uint32_from_proto_index(proto_index, &entry->fnv1_checksum,
1896                                       eof, scratch_pool));
1897  SVN_ERR(read_uint64_from_proto_index(proto_index, &revision,
1898                                       eof, scratch_pool));
1899  SVN_ERR(read_uint64_from_proto_index(proto_index, &entry->item.number,
1900                                       eof, scratch_pool));
1901
1902  /* Do the inverse REVSION number conversion (see
1903   * svn_fs_fs__p2l_proto_index_add_entry), if we actually read a complete
1904   * record.
1905   */
1906  if (!eof || !*eof)
1907    {
1908      /* Be careful with the arithmetics here (overflows and wrap-around): */
1909      if (revision > 0 && revision - 1 > LONG_MAX)
1910        return svn_error_createf(SVN_ERR_FS_INDEX_OVERFLOW, NULL,
1911                                _("Revision 0x%s too large, max = 0x%s"),
1912                                apr_psprintf(scratch_pool,
1913                                             "%" APR_UINT64_T_HEX_FMT,
1914                                             revision),
1915                                apr_psprintf(scratch_pool,
1916                                             "%" APR_UINT64_T_HEX_FMT,
1917                                             (apr_uint64_t)LONG_MAX));
1918
1919      /* Shortening conversion from unsigned to signed int is well-defined
1920       * and not lossy in C because the value can be represented in the
1921       * target type.  Also, cast to 'long' instead of 'svn_revnum_t' here
1922       * to provoke a compiler warning if those types should differ and we
1923       * would need to change the overflow checking logic.
1924       */
1925      entry->item.revision = revision == 0
1926                           ? SVN_INVALID_REVNUM
1927                           : (long)(revision - 1);
1928    }
1929
1930  return SVN_NO_ERROR;
1931}
1932
1933svn_error_t *
1934svn_fs_fs__p2l_proto_index_next_offset(apr_off_t *next_offset,
1935                                       apr_file_t *proto_index,
1936                                       apr_pool_t *scratch_pool)
1937{
1938  apr_off_t offset = 0;
1939
1940  /* Empty index file? */
1941  SVN_ERR(svn_io_file_seek(proto_index, APR_END, &offset, scratch_pool));
1942  if (offset == 0)
1943    {
1944      *next_offset = 0;
1945    }
1946  else
1947    {
1948      /* At least one entry.  Read last entry. */
1949      svn_fs_fs__p2l_entry_t entry;
1950      offset -= P2L_PROTO_INDEX_ENTRY_SIZE;
1951
1952      SVN_ERR(svn_io_file_seek(proto_index, APR_SET, &offset, scratch_pool));
1953      SVN_ERR(read_p2l_entry_from_proto_index(proto_index, &entry,
1954                                              NULL, scratch_pool));
1955
1956      /* Return next offset. */
1957      *next_offset = entry.offset + entry.size;
1958    }
1959
1960  return SVN_NO_ERROR;
1961}
1962
1963svn_error_t *
1964svn_fs_fs__p2l_index_append(svn_checksum_t **checksum,
1965                            svn_fs_t *fs,
1966                            apr_file_t *index_file,
1967                            const char *proto_file_name,
1968                            svn_revnum_t revision,
1969                            apr_pool_t *result_pool,
1970                            apr_pool_t *scratch_pool)
1971{
1972  fs_fs_data_t *ffd = fs->fsap_data;
1973  apr_uint64_t page_size = ffd->p2l_page_size;
1974  apr_file_t *proto_index = NULL;
1975  svn_stream_t *stream;
1976  int i;
1977  svn_boolean_t eof = FALSE;
1978  unsigned char encoded[ENCODED_INT_LENGTH];
1979  svn_revnum_t last_revision = revision;
1980  apr_uint64_t last_compound = 0;
1981
1982  apr_uint64_t last_entry_end = 0;
1983  apr_uint64_t last_page_end = 0;
1984  apr_uint64_t last_buffer_size = 0;  /* byte offset in the spill buffer at
1985                                         the begin of the current revision */
1986  apr_uint64_t file_size = 0;
1987
1988  /* temporary data structures that collect the data which will be moved
1989     to the target file in a second step */
1990  apr_pool_t *local_pool = svn_pool_create(scratch_pool);
1991  apr_array_header_t *table_sizes
1992     = apr_array_make(local_pool, 16, sizeof(apr_uint64_t));
1993
1994  /* 64k blocks, spill after 16MB */
1995  svn_spillbuf_t *buffer
1996     = svn_spillbuf__create(0x10000, 0x1000000, local_pool);
1997
1998  /* for loop temps ... */
1999  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
2000
2001  /* start at the beginning of the source file */
2002  SVN_ERR(svn_io_file_open(&proto_index, proto_file_name,
2003                           APR_READ | APR_CREATE | APR_BUFFERED,
2004                           APR_OS_DEFAULT, scratch_pool));
2005
2006  /* process all entries until we fail due to EOF */
2007  while (!eof)
2008    {
2009      svn_fs_fs__p2l_entry_t entry;
2010      apr_uint64_t entry_end;
2011      svn_boolean_t new_page = svn_spillbuf__get_size(buffer) == 0;
2012      apr_uint64_t compound;
2013      apr_int64_t rev_diff, compound_diff;
2014
2015      svn_pool_clear(iterpool);
2016
2017      /* (attempt to) read the next entry from the source */
2018      SVN_ERR(read_p2l_entry_from_proto_index(proto_index, &entry,
2019                                              &eof, iterpool));
2020
2021      /* "unused" (and usually non-existent) section to cover the offsets
2022         at the end the of the last page. */
2023      if (eof)
2024        {
2025          file_size = last_entry_end;
2026
2027          entry.offset = last_entry_end;
2028          entry.size = APR_ALIGN(entry.offset, page_size) - entry.offset;
2029          entry.type = SVN_FS_FS__ITEM_TYPE_UNUSED;
2030          entry.fnv1_checksum = 0;
2031          entry.item.revision = last_revision;
2032          entry.item.number = 0;
2033        }
2034      else
2035        {
2036          /* fix-up items created when the txn's target rev was unknown */
2037          if (entry.item.revision == SVN_INVALID_REVNUM)
2038            entry.item.revision = revision;
2039        }
2040
2041      /* end pages if entry is extending beyond their boundaries */
2042      entry_end = entry.offset + entry.size;
2043      while (entry_end - last_page_end > page_size)
2044        {
2045          apr_uint64_t buffer_size = svn_spillbuf__get_size(buffer);
2046          APR_ARRAY_PUSH(table_sizes, apr_uint64_t)
2047             = buffer_size - last_buffer_size;
2048
2049          last_buffer_size = buffer_size;
2050          last_page_end += page_size;
2051          new_page = TRUE;
2052        }
2053
2054      /* this entry starts a new table -> store its offset
2055         (all following entries in the same table will store sizes only) */
2056      if (new_page)
2057        {
2058          SVN_ERR(svn_spillbuf__write(buffer, (const char *)encoded,
2059                                      encode_uint(encoded, entry.offset),
2060                                      iterpool));
2061          last_revision = revision;
2062          last_compound = 0;
2063        }
2064
2065      /* write simple item entry */
2066      SVN_ERR(svn_spillbuf__write(buffer, (const char *)encoded,
2067                                  encode_uint(encoded, entry.size),
2068                                  iterpool));
2069
2070      rev_diff = entry.item.revision - last_revision;
2071      last_revision = entry.item.revision;
2072
2073      compound = entry.item.number * 8 + entry.type;
2074      compound_diff = compound - last_compound;
2075      last_compound = compound;
2076
2077      SVN_ERR(svn_spillbuf__write(buffer, (const char *)encoded,
2078                                  encode_int(encoded, compound_diff),
2079                                  iterpool));
2080      SVN_ERR(svn_spillbuf__write(buffer, (const char *)encoded,
2081                                  encode_int(encoded, rev_diff),
2082                                  iterpool));
2083      SVN_ERR(svn_spillbuf__write(buffer, (const char *)encoded,
2084                                  encode_uint(encoded, entry.fnv1_checksum),
2085                                  iterpool));
2086
2087      last_entry_end = entry_end;
2088    }
2089
2090  /* close the source file */
2091  SVN_ERR(svn_io_file_close(proto_index, local_pool));
2092
2093  /* store length of last table */
2094  APR_ARRAY_PUSH(table_sizes, apr_uint64_t)
2095      = svn_spillbuf__get_size(buffer) - last_buffer_size;
2096
2097  /* Open target stream. */
2098  stream = svn_stream_checksummed2(svn_stream_from_aprfile2(index_file, TRUE,
2099                                                            local_pool),
2100                                   NULL, checksum, svn_checksum_md5, FALSE,
2101                                   result_pool);
2102
2103  /* write the start revision, file size and page size */
2104  SVN_ERR(svn_stream_puts(stream, P2L_STREAM_PREFIX));
2105  SVN_ERR(stream_write_encoded(stream, revision));
2106  SVN_ERR(stream_write_encoded(stream, file_size));
2107  SVN_ERR(stream_write_encoded(stream, page_size));
2108
2109  /* write the page table (actually, the sizes of each page description) */
2110  SVN_ERR(stream_write_encoded(stream, table_sizes->nelts));
2111  for (i = 0; i < table_sizes->nelts; ++i)
2112    {
2113      apr_uint64_t value = APR_ARRAY_IDX(table_sizes, i, apr_uint64_t);
2114      SVN_ERR(stream_write_encoded(stream, value));
2115    }
2116
2117  /* append page contents and implicitly close STREAM */
2118  SVN_ERR(svn_stream_copy3(svn_stream__from_spillbuf(buffer, local_pool),
2119                           stream, NULL, NULL, local_pool));
2120
2121  svn_pool_destroy(iterpool);
2122  svn_pool_destroy(local_pool);
2123
2124  return SVN_NO_ERROR;
2125}
2126
2127/* If REV_FILE->P2L_STREAM is NULL, create a new stream for the phys-to-log
2128 * index for REVISION in FS using the rev / pack file provided by REV_FILE.
2129 */
2130static svn_error_t *
2131auto_open_p2l_index(svn_fs_fs__revision_file_t *rev_file,
2132                    svn_fs_t *fs,
2133                    svn_revnum_t revision)
2134{
2135  if (rev_file->p2l_stream == NULL)
2136    {
2137      fs_fs_data_t *ffd = fs->fsap_data;
2138
2139      SVN_ERR(svn_fs_fs__auto_read_footer(rev_file));
2140      SVN_ERR(packed_stream_open(&rev_file->p2l_stream,
2141                                 rev_file->file,
2142                                 rev_file->p2l_offset,
2143                                 rev_file->footer_offset,
2144                                 P2L_STREAM_PREFIX,
2145                                 (apr_size_t)ffd->block_size,
2146                                 rev_file->pool,
2147                                 rev_file->pool));
2148    }
2149
2150  return SVN_NO_ERROR;
2151}
2152
2153
2154/* Read the header data structure of the phys-to-log index for REVISION in
2155 * FS and return it in *HEADER, allocated in RESULT_POOL. Use REV_FILE to
2156 * access on-disk data.  Use SCRATCH_POOL for temporary allocations.
2157 */
2158static svn_error_t *
2159get_p2l_header(p2l_header_t **header,
2160               svn_fs_fs__revision_file_t *rev_file,
2161               svn_fs_t *fs,
2162               svn_revnum_t revision,
2163               apr_pool_t *result_pool,
2164               apr_pool_t *scratch_pool)
2165{
2166  fs_fs_data_t *ffd = fs->fsap_data;
2167  apr_uint64_t value;
2168  apr_size_t i;
2169  apr_off_t offset;
2170  p2l_header_t *result;
2171  svn_boolean_t is_cached = FALSE;
2172
2173  /* look for the header data in our cache */
2174  pair_cache_key_t key;
2175  key.revision = rev_file->start_revision;
2176  key.second = rev_file->is_packed;
2177
2178  SVN_ERR(svn_cache__get((void**)header, &is_cached, ffd->p2l_header_cache,
2179                         &key, result_pool));
2180  if (is_cached)
2181    return SVN_NO_ERROR;
2182
2183  /* not found -> must read it from disk.
2184   * Open index file or position read pointer to the begin of the file */
2185  if (rev_file->p2l_stream == NULL)
2186    SVN_ERR(auto_open_p2l_index(rev_file, fs, rev_file->start_revision));
2187  else
2188    packed_stream_seek(rev_file->p2l_stream, 0);
2189
2190  /* allocate result data structure */
2191  result = apr_pcalloc(result_pool, sizeof(*result));
2192
2193  /* Read table sizes, check them for plausibility and allocate page array. */
2194  SVN_ERR(packed_stream_get(&value, rev_file->p2l_stream));
2195  result->first_revision = (svn_revnum_t)value;
2196  if (result->first_revision != rev_file->start_revision)
2197    return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
2198                  _("Index rev / pack file revision numbers do not match"));
2199
2200  SVN_ERR(packed_stream_get(&value, rev_file->p2l_stream));
2201  result->file_size = value;
2202  if (result->file_size != (apr_uint64_t)rev_file->l2p_offset)
2203    return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
2204                   _("Index offset and rev / pack file size do not match"));
2205
2206  SVN_ERR(packed_stream_get(&value, rev_file->p2l_stream));
2207  result->page_size = value;
2208  if (!result->page_size || (result->page_size & (result->page_size - 1)))
2209    return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
2210                            _("P2L index page size is not a power of two"));
2211
2212  SVN_ERR(packed_stream_get(&value, rev_file->p2l_stream));
2213  result->page_count = (apr_size_t)value;
2214  if (result->page_count != (result->file_size - 1) / result->page_size + 1)
2215    return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
2216                   _("P2L page count does not match rev / pack file size"));
2217
2218  result->offsets
2219    = apr_pcalloc(result_pool, (result->page_count + 1) * sizeof(*result->offsets));
2220
2221  /* read page sizes and derive page description offsets from them */
2222  result->offsets[0] = 0;
2223  for (i = 0; i < result->page_count; ++i)
2224    {
2225      SVN_ERR(packed_stream_get(&value, rev_file->p2l_stream));
2226      result->offsets[i+1] = result->offsets[i] + (apr_off_t)value;
2227    }
2228
2229  /* correct the offset values */
2230  offset = packed_stream_offset(rev_file->p2l_stream);
2231  for (i = 0; i <= result->page_count; ++i)
2232    result->offsets[i] += offset;
2233
2234  /* cache the header data */
2235  SVN_ERR(svn_cache__set(ffd->p2l_header_cache, &key, result, scratch_pool));
2236
2237  /* return the result */
2238  *header = result;
2239
2240  return SVN_NO_ERROR;
2241}
2242
2243/* Data structure that describes which p2l page info shall be extracted
2244 * from the cache and contains the fields that receive the result.
2245 */
2246typedef struct p2l_page_info_baton_t
2247{
2248  /* input variables */
2249  /* revision identifying the index file */
2250  svn_revnum_t revision;
2251
2252  /* offset within the page in rev / pack file */
2253  apr_off_t offset;
2254
2255  /* output variables */
2256  /* page containing OFFSET */
2257  apr_size_t page_no;
2258
2259  /* first revision in this p2l index */
2260  svn_revnum_t first_revision;
2261
2262  /* offset within the p2l index file describing this page */
2263  apr_off_t start_offset;
2264
2265  /* offset within the p2l index file describing the following page */
2266  apr_off_t next_offset;
2267
2268  /* PAGE_NO * PAGE_SIZE (if <= OFFSET) */
2269  apr_off_t page_start;
2270
2271  /* total number of pages indexed */
2272  apr_size_t page_count;
2273
2274  /* size of each page in pack / rev file */
2275  apr_uint64_t page_size;
2276} p2l_page_info_baton_t;
2277
2278/* From HEADER and the list of all OFFSETS, fill BATON with the page info
2279 * requested by BATON->OFFSET.
2280 */
2281static void
2282p2l_page_info_copy(p2l_page_info_baton_t *baton,
2283                   const p2l_header_t *header,
2284                   const apr_off_t *offsets)
2285{
2286  /* if the requested offset is out of bounds, return info for
2287   * a zero-sized empty page right behind the last page.
2288   */
2289  if (baton->offset / header->page_size < header->page_count)
2290    {
2291      /* This cast is safe because the value is < header->page_count. */
2292      baton->page_no = (apr_size_t)(baton->offset / header->page_size);
2293      baton->start_offset = offsets[baton->page_no];
2294      baton->next_offset = offsets[baton->page_no + 1];
2295      baton->page_size = header->page_size;
2296    }
2297  else
2298    {
2299      /* Beyond the last page. */
2300      baton->page_no = header->page_count;
2301      baton->start_offset = offsets[baton->page_no];
2302      baton->next_offset = offsets[baton->page_no];
2303      baton->page_size = 0;
2304    }
2305
2306  baton->first_revision = header->first_revision;
2307  baton->page_start = (apr_off_t)(header->page_size * baton->page_no);
2308  baton->page_count = header->page_count;
2309}
2310
2311/* Implement svn_cache__partial_getter_func_t: extract the p2l page info
2312 * requested by BATON and return it in BATON.
2313 */
2314static svn_error_t *
2315p2l_page_info_func(void **out,
2316                   const void *data,
2317                   apr_size_t data_len,
2318                   void *baton,
2319                   apr_pool_t *result_pool)
2320{
2321  /* all the pointers to cached data we need */
2322  const p2l_header_t *header = data;
2323  const apr_off_t *offsets
2324    = svn_temp_deserializer__ptr(header,
2325                                 (const void *const *)&header->offsets);
2326
2327  /* copy data from cache to BATON */
2328  p2l_page_info_copy(baton, header, offsets);
2329  return SVN_NO_ERROR;
2330}
2331
2332/* Read the header data structure of the phys-to-log index for revision
2333 * BATON->REVISION in FS.  Return in *BATON all info relevant to read the
2334 * index page for the rev / pack file offset BATON->OFFSET.  Use REV_FILE
2335 * to access on-disk data.  Use SCRATCH_POOL for temporary allocations.
2336 */
2337static svn_error_t *
2338get_p2l_page_info(p2l_page_info_baton_t *baton,
2339                  svn_fs_fs__revision_file_t *rev_file,
2340                  svn_fs_t *fs,
2341                  apr_pool_t *scratch_pool)
2342{
2343  fs_fs_data_t *ffd = fs->fsap_data;
2344  p2l_header_t *header;
2345  svn_boolean_t is_cached = FALSE;
2346  void *dummy = NULL;
2347
2348  /* look for the header data in our cache */
2349  pair_cache_key_t key;
2350  key.revision = rev_file->start_revision;
2351  key.second = rev_file->is_packed;
2352
2353  SVN_ERR(svn_cache__get_partial(&dummy, &is_cached, ffd->p2l_header_cache,
2354                                 &key, p2l_page_info_func, baton,
2355                                 scratch_pool));
2356  if (is_cached)
2357    return SVN_NO_ERROR;
2358
2359  SVN_ERR(get_p2l_header(&header, rev_file, fs, baton->revision,
2360                         scratch_pool, scratch_pool));
2361
2362  /* copy the requested info into *BATON */
2363  p2l_page_info_copy(baton, header, header->offsets);
2364
2365  return SVN_NO_ERROR;
2366}
2367
2368/* Read a mapping entry from the phys-to-log index STREAM and append it to
2369 * RESULT.  *ITEM_INDEX contains the phys offset for the entry and will
2370 * be moved forward by the size of entry.
2371 */
2372static svn_error_t *
2373read_entry(svn_fs_fs__packed_number_stream_t *stream,
2374           apr_off_t *item_offset,
2375           svn_revnum_t *last_revision,
2376           apr_uint64_t *last_compound,
2377           apr_array_header_t *result)
2378{
2379  apr_uint64_t value;
2380
2381  svn_fs_fs__p2l_entry_t entry;
2382
2383  entry.offset = *item_offset;
2384  SVN_ERR(packed_stream_get(&value, stream));
2385  entry.size = (apr_off_t)value;
2386
2387  SVN_ERR(packed_stream_get(&value, stream));
2388  *last_compound += decode_int(value);
2389
2390  entry.type = *last_compound & 7;
2391  entry.item.number = *last_compound / 8;
2392
2393  /* Verify item type. */
2394  if (entry.type > SVN_FS_FS__ITEM_TYPE_CHANGES)
2395    return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
2396                            _("Invalid item type in P2L index"));
2397  if (   entry.type == SVN_FS_FS__ITEM_TYPE_CHANGES
2398      && entry.item.number != SVN_FS_FS__ITEM_INDEX_CHANGES)
2399    return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
2400                            _("Changed path list must have item number 1"));
2401
2402  SVN_ERR(packed_stream_get(&value, stream));
2403  *last_revision += (svn_revnum_t)decode_int(value);
2404  entry.item.revision = *last_revision;
2405
2406  SVN_ERR(packed_stream_get(&value, stream));
2407  entry.fnv1_checksum = (apr_uint32_t)value;
2408
2409  /* Truncating the checksum to 32 bits may have hidden random data in the
2410   * unused extra bits of the on-disk representation (7/8 bit representation
2411   * uses 5 bytes on disk for the 32 bit value, leaving 3 bits unused). */
2412  if (value > APR_UINT32_MAX)
2413    return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
2414                            _("Invalid FNV1 checksum in P2L index"));
2415
2416  /* Some of the index data for empty rev / pack file sections will not be
2417   * used during normal operation.  Thus, we have strict rules for the
2418   * contents of those unused fields. */
2419  if (entry.type == SVN_FS_FS__ITEM_TYPE_UNUSED)
2420    if (   entry.item.number != SVN_FS_FS__ITEM_INDEX_UNUSED
2421        || entry.fnv1_checksum != 0)
2422      return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
2423                 _("Empty regions must have item number 0 and checksum 0"));
2424
2425  /* Corrupted SIZE values might cause arithmetic overflow.
2426   * The same can happen if you copy a repository from a system with 63 bit
2427   * file lengths to one with 31 bit file lengths. */
2428  if ((apr_uint64_t)entry.offset + (apr_uint64_t)entry.size > off_t_max)
2429    return svn_error_create(SVN_ERR_FS_INDEX_OVERFLOW , NULL,
2430                            _("P2L index entry size overflow."));
2431
2432  APR_ARRAY_PUSH(result, svn_fs_fs__p2l_entry_t) = entry;
2433  *item_offset += entry.size;
2434
2435  return SVN_NO_ERROR;
2436}
2437
2438/* Read the phys-to-log mappings for the cluster beginning at rev file
2439 * offset PAGE_START from the index for START_REVISION in FS.  The data
2440 * can be found in the index page beginning at START_OFFSET with the next
2441 * page beginning at NEXT_OFFSET.  PAGE_SIZE is the L2P index page size.
2442 * Return the relevant index entries in *ENTRIES.  Use REV_FILE to access
2443 * on-disk data.  Allocate *ENTRIES in RESULT_POOL.
2444 */
2445static svn_error_t *
2446get_p2l_page(apr_array_header_t **entries,
2447             svn_fs_fs__revision_file_t *rev_file,
2448             svn_fs_t *fs,
2449             svn_revnum_t start_revision,
2450             apr_off_t start_offset,
2451             apr_off_t next_offset,
2452             apr_off_t page_start,
2453             apr_uint64_t page_size,
2454             apr_pool_t *result_pool)
2455{
2456  apr_uint64_t value;
2457  apr_array_header_t *result
2458    = apr_array_make(result_pool, 16, sizeof(svn_fs_fs__p2l_entry_t));
2459  apr_off_t item_offset;
2460  apr_off_t offset;
2461  svn_revnum_t last_revision;
2462  apr_uint64_t last_compound;
2463
2464  /* open index and navigate to page start */
2465  SVN_ERR(auto_open_p2l_index(rev_file, fs, start_revision));
2466  packed_stream_seek(rev_file->p2l_stream, start_offset);
2467
2468  /* read rev file offset of the first page entry (all page entries will
2469   * only store their sizes). */
2470  SVN_ERR(packed_stream_get(&value, rev_file->p2l_stream));
2471  item_offset = (apr_off_t)value;
2472
2473  /* read all entries of this page */
2474  last_revision = start_revision;
2475  last_compound = 0;
2476
2477  /* Special case: empty pages. */
2478  if (start_offset == next_offset)
2479    {
2480      /* Empty page. This only happens if the first entry of the next page
2481       * also covers this page (and possibly more) completely. */
2482      SVN_ERR(read_entry(rev_file->p2l_stream, &item_offset,
2483                         &last_revision, &last_compound, result));
2484    }
2485  else
2486    {
2487      /* Read non-empty page. */
2488      do
2489        {
2490          SVN_ERR(read_entry(rev_file->p2l_stream, &item_offset,
2491                             &last_revision, &last_compound, result));
2492          offset = packed_stream_offset(rev_file->p2l_stream);
2493        }
2494      while (offset < next_offset);
2495
2496      /* We should now be exactly at the next offset, i.e. the numbers in
2497       * the stream cannot overlap into the next page description. */
2498      if (offset != next_offset)
2499        return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
2500             _("P2L page description overlaps with next page description"));
2501
2502      /* if we haven't covered the cluster end yet, we must read the first
2503       * entry of the next page */
2504      if (item_offset < page_start + page_size)
2505        {
2506          SVN_ERR(packed_stream_get(&value, rev_file->p2l_stream));
2507          item_offset = (apr_off_t)value;
2508          last_revision = start_revision;
2509          last_compound = 0;
2510          SVN_ERR(read_entry(rev_file->p2l_stream, &item_offset,
2511                             &last_revision, &last_compound, result));
2512        }
2513    }
2514
2515  *entries = result;
2516
2517  return SVN_NO_ERROR;
2518}
2519
2520/* If it cannot be found in FS's caches, read the p2l index page selected
2521 * by BATON->OFFSET from REV_FILE.  Don't read the page if it precedes
2522 * MIN_OFFSET.  Set *END to TRUE if the caller should stop refeching.
2523 *
2524 * *BATON will be updated with the selected page's info and SCRATCH_POOL
2525 * will be used for temporary allocations.  If the data is alread in the
2526 * cache, descrease *LEAKING_BUCKET and increase it otherwise.  With that
2527 * pattern we will still read all pages from the block even if some of
2528 * them survived in the cached.
2529 */
2530static svn_error_t *
2531prefetch_p2l_page(svn_boolean_t *end,
2532                  int *leaking_bucket,
2533                  svn_fs_t *fs,
2534                  svn_fs_fs__revision_file_t *rev_file,
2535                  p2l_page_info_baton_t *baton,
2536                  apr_off_t min_offset,
2537                  apr_pool_t *scratch_pool)
2538{
2539  fs_fs_data_t *ffd = fs->fsap_data;
2540  svn_boolean_t already_cached;
2541  apr_array_header_t *page;
2542  svn_fs_fs__page_cache_key_t key = { 0 };
2543
2544  /* fetch the page info */
2545  *end = FALSE;
2546  baton->revision = baton->first_revision;
2547  SVN_ERR(get_p2l_page_info(baton, rev_file, fs, scratch_pool));
2548  if (baton->start_offset < min_offset || !rev_file->p2l_stream)
2549    {
2550      /* page outside limits -> stop prefetching */
2551      *end = TRUE;
2552      return SVN_NO_ERROR;
2553    }
2554
2555  /* do we have that page in our caches already? */
2556  assert(baton->first_revision <= APR_UINT32_MAX);
2557  key.revision = (apr_uint32_t)baton->first_revision;
2558  key.is_packed = svn_fs_fs__is_packed_rev(fs, baton->first_revision);
2559  key.page = baton->page_no;
2560  SVN_ERR(svn_cache__has_key(&already_cached, ffd->p2l_page_cache,
2561                             &key, scratch_pool));
2562
2563  /* yes, already cached */
2564  if (already_cached)
2565    {
2566      /* stop prefetching if most pages are already cached. */
2567      if (!--*leaking_bucket)
2568        *end = TRUE;
2569
2570      return SVN_NO_ERROR;
2571    }
2572
2573  ++*leaking_bucket;
2574
2575  /* read from disk */
2576  SVN_ERR(get_p2l_page(&page, rev_file, fs,
2577                       baton->first_revision,
2578                       baton->start_offset,
2579                       baton->next_offset,
2580                       baton->page_start,
2581                       baton->page_size,
2582                       scratch_pool));
2583
2584  /* and put it into our cache */
2585  SVN_ERR(svn_cache__set(ffd->p2l_page_cache, &key, page, scratch_pool));
2586
2587  return SVN_NO_ERROR;
2588}
2589
2590/* Lookup & construct the baton and key information that we will need for
2591 * a P2L page cache lookup.  We want the page covering OFFSET in the rev /
2592 * pack file containing REVSION in FS.  Return the results in *PAGE_INFO_P
2593 * and *KEY_P.  Read data through REV_FILE.  Use SCRATCH_POOL for temporary
2594 * allocations.
2595 */
2596static svn_error_t *
2597get_p2l_keys(p2l_page_info_baton_t *page_info_p,
2598             svn_fs_fs__page_cache_key_t *key_p,
2599             svn_fs_fs__revision_file_t *rev_file,
2600             svn_fs_t *fs,
2601             svn_revnum_t revision,
2602             apr_off_t offset,
2603             apr_pool_t *scratch_pool)
2604{
2605  p2l_page_info_baton_t page_info;
2606
2607  /* request info for the index pages that describes the pack / rev file
2608   * contents at pack / rev file position OFFSET. */
2609  page_info.offset = offset;
2610  page_info.revision = revision;
2611  SVN_ERR(get_p2l_page_info(&page_info, rev_file, fs, scratch_pool));
2612
2613  /* if the offset refers to a non-existent page, bail out */
2614  if (page_info.page_count <= page_info.page_no)
2615    return svn_error_createf(SVN_ERR_FS_INDEX_OVERFLOW , NULL,
2616                              _("Offset %s too large in revision %ld"),
2617                              apr_off_t_toa(scratch_pool, offset), revision);
2618
2619  /* return results */
2620  if (page_info_p)
2621    *page_info_p = page_info;
2622
2623  /* construct cache key */
2624  if (key_p)
2625    {
2626      svn_fs_fs__page_cache_key_t key = { 0 };
2627      assert(page_info.first_revision <= APR_UINT32_MAX);
2628      key.revision = (apr_uint32_t)page_info.first_revision;
2629      key.is_packed = rev_file->is_packed;
2630      key.page = page_info.page_no;
2631
2632      *key_p = key;
2633    }
2634
2635  return SVN_NO_ERROR;
2636}
2637
2638/* qsort-compatible compare function that compares the OFFSET of the
2639 * svn_fs_fs__p2l_entry_t in *LHS with the apr_off_t in *RHS. */
2640static int
2641compare_start_p2l_entry(const void *lhs,
2642                        const void *rhs)
2643{
2644  const svn_fs_fs__p2l_entry_t *entry = lhs;
2645  apr_off_t start = *(const apr_off_t*)rhs;
2646  apr_off_t diff = entry->offset - start;
2647
2648  /* restrict result to int */
2649  return diff < 0 ? -1 : (diff == 0 ? 0 : 1);
2650}
2651
2652/* From the PAGE_ENTRIES array of svn_fs_fs__p2l_entry_t, ordered
2653 * by their OFFSET member, copy all elements overlapping the range
2654 * [BLOCK_START, BLOCK_END) to ENTRIES. */
2655static void
2656append_p2l_entries(apr_array_header_t *entries,
2657                   apr_array_header_t *page_entries,
2658                   apr_off_t block_start,
2659                   apr_off_t block_end)
2660{
2661  const svn_fs_fs__p2l_entry_t *entry;
2662  int idx = svn_sort__bsearch_lower_bound(page_entries, &block_start,
2663                                          compare_start_p2l_entry);
2664
2665  /* start at the first entry that overlaps with BLOCK_START */
2666  if (idx > 0)
2667    {
2668      entry = &APR_ARRAY_IDX(page_entries, idx - 1, svn_fs_fs__p2l_entry_t);
2669      if (entry->offset + entry->size > block_start)
2670        --idx;
2671    }
2672
2673  /* copy all entries covering the requested range */
2674  for ( ; idx < page_entries->nelts; ++idx)
2675    {
2676      entry = &APR_ARRAY_IDX(page_entries, idx, svn_fs_fs__p2l_entry_t);
2677      if (entry->offset >= block_end)
2678        break;
2679
2680      APR_ARRAY_PUSH(entries, svn_fs_fs__p2l_entry_t) = *entry;
2681    }
2682}
2683
2684/* Auxilliary struct passed to p2l_entries_func selecting the relevant
2685 * data range. */
2686typedef struct p2l_entries_baton_t
2687{
2688  apr_off_t start;
2689  apr_off_t end;
2690} p2l_entries_baton_t;
2691
2692/* Implement svn_cache__partial_getter_func_t: extract p2l entries from
2693 * the page in DATA which overlap the p2l_entries_baton_t in BATON.
2694 * The target array is already provided in *OUT.
2695 */
2696static svn_error_t *
2697p2l_entries_func(void **out,
2698                 const void *data,
2699                 apr_size_t data_len,
2700                 void *baton,
2701                 apr_pool_t *result_pool)
2702{
2703  apr_array_header_t *entries = *(apr_array_header_t **)out;
2704  const apr_array_header_t *raw_page = data;
2705  p2l_entries_baton_t *block = baton;
2706
2707  /* Make PAGE a readable APR array. */
2708  apr_array_header_t page = *raw_page;
2709  page.elts = (void *)svn_temp_deserializer__ptr(raw_page,
2710                                    (const void * const *)&raw_page->elts);
2711
2712  /* append relevant information to result */
2713  append_p2l_entries(entries, &page, block->start, block->end);
2714
2715  return SVN_NO_ERROR;
2716}
2717
2718
2719/* Body of svn_fs_fs__p2l_index_lookup.  However, do a single index page
2720 * lookup and append the result to the ENTRIES array provided by the caller.
2721 * Use successive calls to cover larger ranges.
2722 */
2723static svn_error_t *
2724p2l_index_lookup(apr_array_header_t *entries,
2725                 svn_fs_fs__revision_file_t *rev_file,
2726                 svn_fs_t *fs,
2727                 svn_revnum_t revision,
2728                 apr_off_t block_start,
2729                 apr_off_t block_end,
2730                 apr_pool_t *scratch_pool)
2731{
2732  fs_fs_data_t *ffd = fs->fsap_data;
2733  svn_fs_fs__page_cache_key_t key;
2734  svn_boolean_t is_cached = FALSE;
2735  p2l_page_info_baton_t page_info;
2736  apr_array_header_t *local_result = entries;
2737
2738  /* baton selecting the relevant entries from the one page we access */
2739  p2l_entries_baton_t block;
2740  block.start = block_start;
2741  block.end = block_end;
2742
2743  /* if we requested an empty range, the result would be empty */
2744  SVN_ERR_ASSERT(block_start < block_end);
2745
2746  /* look for the fist page of the range in our cache */
2747  SVN_ERR(get_p2l_keys(&page_info, &key, rev_file, fs, revision, block_start,
2748                       scratch_pool));
2749  SVN_ERR(svn_cache__get_partial((void**)&local_result, &is_cached,
2750                                 ffd->p2l_page_cache, &key, p2l_entries_func,
2751                                 &block, scratch_pool));
2752
2753  if (!is_cached)
2754    {
2755      svn_boolean_t end;
2756      apr_pool_t *iterpool = svn_pool_create(scratch_pool);
2757      apr_off_t original_page_start = page_info.page_start;
2758      int leaking_bucket = 4;
2759      p2l_page_info_baton_t prefetch_info = page_info;
2760      apr_array_header_t *page_entries;
2761
2762      apr_off_t max_offset
2763        = APR_ALIGN(page_info.next_offset, ffd->block_size);
2764      apr_off_t min_offset
2765        = APR_ALIGN(page_info.start_offset, ffd->block_size) - ffd->block_size;
2766
2767      /* Since we read index data in larger chunks, we probably got more
2768       * page data than we requested.  Parse & cache that until either we
2769       * encounter pages already cached or reach the end of the buffer.
2770       */
2771
2772      /* pre-fetch preceding pages */
2773      if (ffd->use_block_read)
2774        {
2775          end = FALSE;
2776          prefetch_info.offset = original_page_start;
2777          while (prefetch_info.offset >= prefetch_info.page_size && !end)
2778            {
2779              svn_pool_clear(iterpool);
2780
2781              prefetch_info.offset -= prefetch_info.page_size;
2782              SVN_ERR(prefetch_p2l_page(&end, &leaking_bucket, fs, rev_file,
2783                                        &prefetch_info, min_offset,
2784                                        iterpool));
2785            }
2786        }
2787
2788      /* fetch page from disk and put it into the cache */
2789      SVN_ERR(get_p2l_page(&page_entries, rev_file, fs,
2790                           page_info.first_revision,
2791                           page_info.start_offset,
2792                           page_info.next_offset,
2793                           page_info.page_start,
2794                           page_info.page_size, iterpool));
2795
2796      /* The last cache entry must not end beyond the range covered by
2797       * this index.  The same applies for any subset of entries. */
2798      if (page_entries->nelts)
2799        {
2800          const svn_fs_fs__p2l_entry_t *entry
2801            = &APR_ARRAY_IDX(page_entries, page_entries->nelts - 1,
2802                             svn_fs_fs__p2l_entry_t);
2803          if (  entry->offset + entry->size
2804              > page_info.page_size * page_info.page_count)
2805            return svn_error_createf(SVN_ERR_FS_INDEX_OVERFLOW , NULL,
2806                                     _("Last P2L index entry extends beyond "
2807                                       "the last page in revision %ld."),
2808                                     revision);
2809        }
2810
2811      SVN_ERR(svn_cache__set(ffd->p2l_page_cache, &key, page_entries,
2812                             iterpool));
2813
2814      /* append relevant information to result */
2815      append_p2l_entries(entries, page_entries, block_start, block_end);
2816
2817      /* pre-fetch following pages */
2818      if (ffd->use_block_read)
2819        {
2820          end = FALSE;
2821          leaking_bucket = 4;
2822          prefetch_info = page_info;
2823          prefetch_info.offset = original_page_start;
2824          while (   prefetch_info.next_offset < max_offset
2825                && prefetch_info.page_no + 1 < prefetch_info.page_count
2826                && !end)
2827            {
2828              svn_pool_clear(iterpool);
2829
2830              prefetch_info.offset += prefetch_info.page_size;
2831              SVN_ERR(prefetch_p2l_page(&end, &leaking_bucket, fs, rev_file,
2832                                        &prefetch_info, min_offset,
2833                                        iterpool));
2834            }
2835        }
2836
2837      svn_pool_destroy(iterpool);
2838    }
2839
2840  /* We access a valid page (otherwise, we had seen an error in the
2841   * get_p2l_keys request).  Hence, at least one entry must be found. */
2842  SVN_ERR_ASSERT(entries->nelts > 0);
2843
2844  /* Add an "unused" entry if it extends beyond the end of the data file.
2845   * Since the index page size might be smaller than the current data
2846   * read block size, the trailing "unused" entry in this index may not
2847   * fully cover the end of the last block. */
2848  if (page_info.page_no + 1 >= page_info.page_count)
2849    {
2850      svn_fs_fs__p2l_entry_t *entry
2851        = &APR_ARRAY_IDX(entries, entries->nelts-1, svn_fs_fs__p2l_entry_t);
2852
2853      apr_off_t entry_end = entry->offset + entry->size;
2854      if (entry_end < block_end)
2855        {
2856          if (entry->type == SVN_FS_FS__ITEM_TYPE_UNUSED)
2857            {
2858              /* extend the terminal filler */
2859              entry->size = block_end - entry->offset;
2860            }
2861          else
2862            {
2863              /* No terminal filler. Add one. */
2864              entry = apr_array_push(entries);
2865              entry->offset = entry_end;
2866              entry->size = block_end - entry_end;
2867              entry->type = SVN_FS_FS__ITEM_TYPE_UNUSED;
2868              entry->fnv1_checksum = 0;
2869              entry->item.revision = SVN_INVALID_REVNUM;
2870              entry->item.number = SVN_FS_FS__ITEM_INDEX_UNUSED;
2871            }
2872        }
2873    }
2874
2875  return SVN_NO_ERROR;
2876}
2877
2878svn_error_t *
2879svn_fs_fs__p2l_index_lookup(apr_array_header_t **entries,
2880                            svn_fs_t *fs,
2881                            svn_fs_fs__revision_file_t *rev_file,
2882                            svn_revnum_t revision,
2883                            apr_off_t block_start,
2884                            apr_off_t block_size,
2885                            apr_pool_t *result_pool,
2886                            apr_pool_t *scratch_pool)
2887{
2888  apr_off_t block_end = block_start + block_size;
2889
2890  /* the receiving container */
2891  int last_count = 0;
2892  apr_array_header_t *result = apr_array_make(result_pool, 16,
2893                                              sizeof(svn_fs_fs__p2l_entry_t));
2894
2895  /* Fetch entries page-by-page.  Since the p2l index is supposed to cover
2896   * every single byte in the rev / pack file - even unused sections -
2897   * every iteration must result in some progress. */
2898  while (block_start < block_end)
2899    {
2900      svn_fs_fs__p2l_entry_t *entry;
2901      SVN_ERR(p2l_index_lookup(result, rev_file, fs, revision, block_start,
2902                               block_end, scratch_pool));
2903      SVN_ERR_ASSERT(result->nelts > 0);
2904
2905      /* continue directly behind last item */
2906      entry = &APR_ARRAY_IDX(result, result->nelts-1, svn_fs_fs__p2l_entry_t);
2907      block_start = entry->offset + entry->size;
2908
2909      /* Some paranoia check.  Successive iterations should never return
2910       * duplicates but if it did, we might get into trouble later on. */
2911      if (last_count > 0 && last_count < result->nelts)
2912        {
2913           entry =  &APR_ARRAY_IDX(result, last_count - 1,
2914                                   svn_fs_fs__p2l_entry_t);
2915           SVN_ERR_ASSERT(APR_ARRAY_IDX(result, last_count,
2916                                        svn_fs_fs__p2l_entry_t).offset
2917                          >= entry->offset + entry->size);
2918        }
2919
2920      last_count = result->nelts;
2921    }
2922
2923  *entries = result;
2924  return SVN_NO_ERROR;
2925}
2926
2927/* compare_fn_t comparing a svn_fs_fs__p2l_entry_t at LHS with an offset
2928 * RHS.
2929 */
2930static int
2931compare_p2l_entry_offsets(const void *lhs, const void *rhs)
2932{
2933  const svn_fs_fs__p2l_entry_t *entry = (const svn_fs_fs__p2l_entry_t *)lhs;
2934  apr_off_t offset = *(const apr_off_t *)rhs;
2935
2936  return entry->offset < offset ? -1 : (entry->offset == offset ? 0 : 1);
2937}
2938
2939/* Cached data extraction utility.  DATA is a P2L index page, e.g. an APR
2940 * array of svn_fs_fs__p2l_entry_t elements.  Return the entry for the item,
2941 * allocated in RESULT_POOL, starting at OFFSET or NULL if that's not an
2942 * the start offset of any item. Use SCRATCH_POOL for temporary allocations.
2943 */
2944static svn_fs_fs__p2l_entry_t *
2945get_p2l_entry_from_cached_page(const void *data,
2946                               apr_uint64_t offset,
2947                               apr_pool_t *result_pool,
2948                               apr_pool_t *scratch_pool)
2949{
2950  /* resolve all pointer values of in-cache data */
2951  const apr_array_header_t *page = data;
2952  apr_array_header_t *entries = apr_pmemdup(scratch_pool, page,
2953                                            sizeof(*page));
2954  svn_fs_fs__p2l_entry_t *entry;
2955
2956  entries->elts = (char *)svn_temp_deserializer__ptr(page,
2957                                     (const void *const *)&page->elts);
2958
2959  /* search of the offset we want */
2960  entry = svn_sort__array_lookup(entries, &offset, NULL,
2961      (int (*)(const void *, const void *))compare_p2l_entry_offsets);
2962
2963  /* return it, if it is a perfect match */
2964  return entry ? apr_pmemdup(result_pool, entry, sizeof(*entry)) : NULL;
2965}
2966
2967/* Implements svn_cache__partial_getter_func_t for P2L index pages, copying
2968 * the entry for the apr_off_t at BATON into *OUT.  *OUT will be NULL if
2969 * there is no matching entry in the index page at DATA.
2970 */
2971static svn_error_t *
2972p2l_entry_lookup_func(void **out,
2973                      const void *data,
2974                      apr_size_t data_len,
2975                      void *baton,
2976                      apr_pool_t *result_pool)
2977{
2978  svn_fs_fs__p2l_entry_t *entry
2979    = get_p2l_entry_from_cached_page(data, *(apr_off_t *)baton, result_pool,
2980                                     result_pool);
2981
2982  *out = entry && entry->offset == *(apr_off_t *)baton
2983       ? apr_pmemdup(result_pool, entry, sizeof(*entry))
2984       : NULL;
2985
2986  return SVN_NO_ERROR;
2987}
2988
2989svn_error_t *
2990svn_fs_fs__p2l_entry_lookup(svn_fs_fs__p2l_entry_t **entry_p,
2991                            svn_fs_t *fs,
2992                            svn_fs_fs__revision_file_t *rev_file,
2993                            svn_revnum_t revision,
2994                            apr_off_t offset,
2995                            apr_pool_t *result_pool,
2996                            apr_pool_t *scratch_pool)
2997{
2998  fs_fs_data_t *ffd = fs->fsap_data;
2999  svn_fs_fs__page_cache_key_t key = { 0 };
3000  svn_boolean_t is_cached = FALSE;
3001  p2l_page_info_baton_t page_info;
3002
3003  *entry_p = NULL;
3004
3005  /* look for this info in our cache */
3006  SVN_ERR(get_p2l_keys(&page_info, &key, rev_file, fs, revision, offset,
3007                       scratch_pool));
3008  SVN_ERR(svn_cache__get_partial((void**)entry_p, &is_cached,
3009                                 ffd->p2l_page_cache, &key,
3010                                 p2l_entry_lookup_func, &offset,
3011                                 result_pool));
3012  if (!is_cached)
3013    {
3014      /* do a standard index lookup.  This is will automatically prefetch
3015       * data to speed up future lookups. */
3016      apr_array_header_t *entries = apr_array_make(result_pool, 1,
3017                                                   sizeof(**entry_p));
3018      SVN_ERR(p2l_index_lookup(entries, rev_file, fs, revision, offset,
3019                               offset + 1, scratch_pool));
3020
3021      /* Find the entry that we want. */
3022      *entry_p = svn_sort__array_lookup(entries, &offset, NULL,
3023          (int (*)(const void *, const void *))compare_p2l_entry_offsets);
3024    }
3025
3026  return SVN_NO_ERROR;
3027}
3028
3029/* Implements svn_cache__partial_getter_func_t for P2L headers, setting *OUT
3030 * to the largest the first offset not covered by this P2L index.
3031 */
3032static svn_error_t *
3033p2l_get_max_offset_func(void **out,
3034                        const void *data,
3035                        apr_size_t data_len,
3036                        void *baton,
3037                        apr_pool_t *result_pool)
3038{
3039  const p2l_header_t *header = data;
3040  apr_off_t max_offset = header->file_size;
3041  *out = apr_pmemdup(result_pool, &max_offset, sizeof(max_offset));
3042
3043  return SVN_NO_ERROR;
3044}
3045
3046/* Core functionality of to svn_fs_fs__p2l_get_max_offset with identical
3047 * signature. */
3048static svn_error_t *
3049p2l_get_max_offset(apr_off_t *offset,
3050                   svn_fs_t *fs,
3051                   svn_fs_fs__revision_file_t *rev_file,
3052                   svn_revnum_t revision,
3053                   apr_pool_t *scratch_pool)
3054{
3055  fs_fs_data_t *ffd = fs->fsap_data;
3056  p2l_header_t *header;
3057  svn_boolean_t is_cached = FALSE;
3058  apr_off_t *offset_p;
3059
3060  /* look for the header data in our cache */
3061  pair_cache_key_t key;
3062  key.revision = rev_file->start_revision;
3063  key.second = rev_file->is_packed;
3064
3065  SVN_ERR(svn_cache__get_partial((void **)&offset_p, &is_cached,
3066                                 ffd->p2l_header_cache, &key,
3067                                 p2l_get_max_offset_func, NULL,
3068                                 scratch_pool));
3069  if (is_cached)
3070    {
3071      *offset = *offset_p;
3072      return SVN_NO_ERROR;
3073    }
3074
3075  SVN_ERR(get_p2l_header(&header, rev_file, fs, revision, scratch_pool,
3076                         scratch_pool));
3077  *offset = header->file_size;
3078
3079  return SVN_NO_ERROR;
3080}
3081
3082svn_error_t *
3083svn_fs_fs__p2l_get_max_offset(apr_off_t *offset,
3084                              svn_fs_t *fs,
3085                              svn_fs_fs__revision_file_t *rev_file,
3086                              svn_revnum_t revision,
3087                              apr_pool_t *scratch_pool)
3088{
3089  return svn_error_trace(p2l_get_max_offset(offset, fs, rev_file, revision,
3090                                            scratch_pool));
3091}
3092
3093/* Calculate the FNV1 checksum over the offset range in REV_FILE, covered by
3094 * ENTRY.  Store the result in ENTRY->FNV1_CHECKSUM.  Use SCRATCH_POOL for
3095 * temporary allocations. */
3096static svn_error_t *
3097calc_fnv1(svn_fs_fs__p2l_entry_t *entry,
3098          svn_fs_fs__revision_file_t *rev_file,
3099          apr_pool_t *scratch_pool)
3100{
3101  unsigned char buffer[4096];
3102  svn_checksum_t *checksum;
3103  svn_checksum_ctx_t *context
3104    = svn_checksum_ctx_create(svn_checksum_fnv1a_32x4, scratch_pool);
3105  apr_off_t size = entry->size;
3106
3107  /* Special rules apply to unused sections / items.  The data must be a
3108   * sequence of NUL bytes (not checked here) and the checksum is fixed to 0.
3109   */
3110  if (entry->type == SVN_FS_FS__ITEM_TYPE_UNUSED)
3111    {
3112      entry->fnv1_checksum = 0;
3113      return SVN_NO_ERROR;
3114    }
3115
3116  /* Read the block and feed it to the checksum calculator. */
3117  SVN_ERR(svn_io_file_seek(rev_file->file, APR_SET, &entry->offset,
3118                           scratch_pool));
3119  while (size > 0)
3120    {
3121      apr_size_t to_read = size > sizeof(buffer)
3122                         ? sizeof(buffer)
3123                         : (apr_size_t)size;
3124      SVN_ERR(svn_io_file_read_full2(rev_file->file, buffer, to_read, NULL,
3125                                     NULL, scratch_pool));
3126      SVN_ERR(svn_checksum_update(context, buffer, to_read));
3127      size -= to_read;
3128    }
3129
3130  /* Store final checksum in ENTRY. */
3131  SVN_ERR(svn_checksum_final(&checksum, context, scratch_pool));
3132  entry->fnv1_checksum = ntohl(*(const apr_uint32_t *)checksum->digest);
3133
3134  return SVN_NO_ERROR;
3135}
3136
3137/*
3138 * Index (re-)creation utilities.
3139 */
3140
3141svn_error_t *
3142svn_fs_fs__p2l_index_from_p2l_entries(const char **protoname,
3143                                      svn_fs_t *fs,
3144                                      svn_fs_fs__revision_file_t *rev_file,
3145                                      apr_array_header_t *entries,
3146                                      apr_pool_t *result_pool,
3147                                      apr_pool_t *scratch_pool)
3148{
3149  apr_file_t *proto_index;
3150
3151  /* Use a subpool for immediate temp file cleanup at the end of this
3152   * function. */
3153  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
3154  int i;
3155
3156  /* Create a proto-index file. */
3157  SVN_ERR(svn_io_open_unique_file3(NULL, protoname, NULL,
3158                                   svn_io_file_del_on_pool_cleanup,
3159                                   result_pool, scratch_pool));
3160  SVN_ERR(svn_fs_fs__p2l_proto_index_open(&proto_index, *protoname,
3161                                          scratch_pool));
3162
3163  /* Write ENTRIES to proto-index file and calculate checksums as we go. */
3164  for (i = 0; i < entries->nelts; ++i)
3165    {
3166      svn_fs_fs__p2l_entry_t *entry
3167        = APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t *);
3168      svn_pool_clear(iterpool);
3169
3170      SVN_ERR(calc_fnv1(entry, rev_file, iterpool));
3171      SVN_ERR(svn_fs_fs__p2l_proto_index_add_entry(proto_index, entry,
3172                                                   iterpool));
3173    }
3174
3175  /* Convert proto-index into final index and move it into position.
3176   * Note that REV_FILE contains the start revision of the shard file if it
3177   * has been packed while REVISION may be somewhere in the middle.  For
3178   * non-packed shards, they will have identical values. */
3179  SVN_ERR(svn_io_file_close(proto_index, iterpool));
3180
3181  /* Temp file cleanup. */
3182  svn_pool_destroy(iterpool);
3183
3184  return SVN_NO_ERROR;
3185}
3186
3187/* A svn_sort__array compatible comparator function, sorting the
3188 * svn_fs_fs__p2l_entry_t** given in LHS, RHS by revision. */
3189static int
3190compare_p2l_entry_revision(const void *lhs,
3191                           const void *rhs)
3192{
3193  const svn_fs_fs__p2l_entry_t *lhs_entry
3194    =*(const svn_fs_fs__p2l_entry_t *const *)lhs;
3195  const svn_fs_fs__p2l_entry_t *rhs_entry
3196    =*(const svn_fs_fs__p2l_entry_t *const *)rhs;
3197
3198  if (lhs_entry->item.revision < rhs_entry->item.revision)
3199    return -1;
3200
3201  return lhs_entry->item.revision == rhs_entry->item.revision ? 0 : 1;
3202}
3203
3204svn_error_t *
3205svn_fs_fs__l2p_index_from_p2l_entries(const char **protoname,
3206                                      svn_fs_t *fs,
3207                                      apr_array_header_t *entries,
3208                                      apr_pool_t *result_pool,
3209                                      apr_pool_t *scratch_pool)
3210{
3211  apr_file_t *proto_index;
3212
3213  /* Use a subpool for immediate temp file cleanup at the end of this
3214   * function. */
3215  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
3216  int i;
3217  svn_revnum_t last_revision = SVN_INVALID_REVNUM;
3218
3219  /* L2P index must be written in revision order.
3220   * Sort ENTRIES accordingly. */
3221  svn_sort__array(entries, compare_p2l_entry_revision);
3222
3223  /* Create the temporary proto-rev file. */
3224  SVN_ERR(svn_io_open_unique_file3(NULL, protoname, NULL,
3225                                   svn_io_file_del_on_pool_cleanup,
3226                                   result_pool, scratch_pool));
3227  SVN_ERR(svn_fs_fs__l2p_proto_index_open(&proto_index, *protoname,
3228                                          scratch_pool));
3229
3230  /*  Write all entries. */
3231  for (i = 0; i < entries->nelts; ++i)
3232    {
3233      const svn_fs_fs__p2l_entry_t *entry
3234        = APR_ARRAY_IDX(entries, i, const svn_fs_fs__p2l_entry_t *);
3235      svn_pool_clear(iterpool);
3236
3237      if (entry->type == SVN_FS_FS__ITEM_TYPE_UNUSED)
3238        continue;
3239
3240      if (last_revision != entry->item.revision)
3241        {
3242          SVN_ERR(svn_fs_fs__l2p_proto_index_add_revision(proto_index,
3243                                                          scratch_pool));
3244          last_revision = entry->item.revision;
3245        }
3246
3247      SVN_ERR(svn_fs_fs__l2p_proto_index_add_entry(proto_index,
3248                                                   entry->offset,
3249                                                   entry->item.number,
3250                                                   iterpool));
3251    }
3252
3253  /* Convert proto-index into final index and move it into position. */
3254  SVN_ERR(svn_io_file_close(proto_index, iterpool));
3255
3256  /* Temp file cleanup. */
3257  svn_pool_destroy(iterpool);
3258
3259  return SVN_NO_ERROR;
3260}
3261
3262
3263/*
3264 * Standard (de-)serialization functions
3265 */
3266
3267svn_error_t *
3268svn_fs_fs__serialize_l2p_header(void **data,
3269                                apr_size_t *data_len,
3270                                void *in,
3271                                apr_pool_t *pool)
3272{
3273  l2p_header_t *header = in;
3274  svn_temp_serializer__context_t *context;
3275  svn_stringbuf_t *serialized;
3276  apr_size_t page_count = header->page_table_index[header->revision_count];
3277  apr_size_t page_table_size = page_count * sizeof(*header->page_table);
3278  apr_size_t index_size
3279    = (header->revision_count + 1) * sizeof(*header->page_table_index);
3280  apr_size_t data_size = sizeof(*header) + index_size + page_table_size;
3281
3282  /* serialize header and all its elements */
3283  context = svn_temp_serializer__init(header,
3284                                      sizeof(*header),
3285                                      data_size + 32,
3286                                      pool);
3287
3288  /* page table index array */
3289  svn_temp_serializer__add_leaf(context,
3290                                (const void * const *)&header->page_table_index,
3291                                index_size);
3292
3293  /* page table array */
3294  svn_temp_serializer__add_leaf(context,
3295                                (const void * const *)&header->page_table,
3296                                page_table_size);
3297
3298  /* return the serialized result */
3299  serialized = svn_temp_serializer__get(context);
3300
3301  *data = serialized->data;
3302  *data_len = serialized->len;
3303
3304  return SVN_NO_ERROR;
3305}
3306
3307svn_error_t *
3308svn_fs_fs__deserialize_l2p_header(void **out,
3309                                  void *data,
3310                                  apr_size_t data_len,
3311                                  apr_pool_t *pool)
3312{
3313  l2p_header_t *header = (l2p_header_t *)data;
3314
3315  /* resolve the pointers in the struct */
3316  svn_temp_deserializer__resolve(header, (void**)&header->page_table_index);
3317  svn_temp_deserializer__resolve(header, (void**)&header->page_table);
3318
3319  /* done */
3320  *out = header;
3321
3322  return SVN_NO_ERROR;
3323}
3324
3325svn_error_t *
3326svn_fs_fs__serialize_l2p_page(void **data,
3327                              apr_size_t *data_len,
3328                              void *in,
3329                              apr_pool_t *pool)
3330{
3331  l2p_page_t *page = in;
3332  svn_temp_serializer__context_t *context;
3333  svn_stringbuf_t *serialized;
3334  apr_size_t of_table_size = page->entry_count * sizeof(*page->offsets);
3335
3336  /* serialize struct and all its elements */
3337  context = svn_temp_serializer__init(page,
3338                                      sizeof(*page),
3339                                      of_table_size + sizeof(*page) + 32,
3340                                      pool);
3341
3342  /* offsets and sub_items arrays */
3343  svn_temp_serializer__add_leaf(context,
3344                                (const void * const *)&page->offsets,
3345                                of_table_size);
3346
3347  /* return the serialized result */
3348  serialized = svn_temp_serializer__get(context);
3349
3350  *data = serialized->data;
3351  *data_len = serialized->len;
3352
3353  return SVN_NO_ERROR;
3354}
3355
3356svn_error_t *
3357svn_fs_fs__deserialize_l2p_page(void **out,
3358                                void *data,
3359                                apr_size_t data_len,
3360                                apr_pool_t *pool)
3361{
3362  l2p_page_t *page = data;
3363
3364  /* resolve the pointers in the struct */
3365  svn_temp_deserializer__resolve(page, (void**)&page->offsets);
3366
3367  /* done */
3368  *out = page;
3369
3370  return SVN_NO_ERROR;
3371}
3372
3373svn_error_t *
3374svn_fs_fs__serialize_p2l_header(void **data,
3375                                apr_size_t *data_len,
3376                                void *in,
3377                                apr_pool_t *pool)
3378{
3379  p2l_header_t *header = in;
3380  svn_temp_serializer__context_t *context;
3381  svn_stringbuf_t *serialized;
3382  apr_size_t table_size = (header->page_count + 1) * sizeof(*header->offsets);
3383
3384  /* serialize header and all its elements */
3385  context = svn_temp_serializer__init(header,
3386                                      sizeof(*header),
3387                                      table_size + sizeof(*header) + 32,
3388                                      pool);
3389
3390  /* offsets array */
3391  svn_temp_serializer__add_leaf(context,
3392                                (const void * const *)&header->offsets,
3393                                table_size);
3394
3395  /* return the serialized result */
3396  serialized = svn_temp_serializer__get(context);
3397
3398  *data = serialized->data;
3399  *data_len = serialized->len;
3400
3401  return SVN_NO_ERROR;
3402}
3403
3404svn_error_t *
3405svn_fs_fs__deserialize_p2l_header(void **out,
3406                                  void *data,
3407                                  apr_size_t data_len,
3408                                  apr_pool_t *pool)
3409{
3410  p2l_header_t *header = data;
3411
3412  /* resolve the only pointer in the struct */
3413  svn_temp_deserializer__resolve(header, (void**)&header->offsets);
3414
3415  /* done */
3416  *out = header;
3417
3418  return SVN_NO_ERROR;
3419}
3420
3421svn_error_t *
3422svn_fs_fs__serialize_p2l_page(void **data,
3423                              apr_size_t *data_len,
3424                              void *in,
3425                              apr_pool_t *pool)
3426{
3427  apr_array_header_t *page = in;
3428  svn_temp_serializer__context_t *context;
3429  svn_stringbuf_t *serialized;
3430  apr_size_t table_size = page->elt_size * page->nelts;
3431
3432  /* serialize array header and all its elements */
3433  context = svn_temp_serializer__init(page,
3434                                      sizeof(*page),
3435                                      table_size + sizeof(*page) + 32,
3436                                      pool);
3437
3438  /* items in the array */
3439  svn_temp_serializer__add_leaf(context,
3440                                (const void * const *)&page->elts,
3441                                table_size);
3442
3443  /* return the serialized result */
3444  serialized = svn_temp_serializer__get(context);
3445
3446  *data = serialized->data;
3447  *data_len = serialized->len;
3448
3449  return SVN_NO_ERROR;
3450}
3451
3452svn_error_t *
3453svn_fs_fs__deserialize_p2l_page(void **out,
3454                                void *data,
3455                                apr_size_t data_len,
3456                                apr_pool_t *pool)
3457{
3458  apr_array_header_t *page = (apr_array_header_t *)data;
3459
3460  /* resolve the only pointer in the struct */
3461  svn_temp_deserializer__resolve(page, (void**)&page->elts);
3462
3463  /* patch up members */
3464  page->pool = pool;
3465  page->nalloc = page->nelts;
3466
3467  /* done */
3468  *out = page;
3469
3470  return SVN_NO_ERROR;
3471}
3472