1/* verify.c --- verification of FSFS filesystems
2 *
3 * ====================================================================
4 *    Licensed to the Apache Software Foundation (ASF) under one
5 *    or more contributor license agreements.  See the NOTICE file
6 *    distributed with this work for additional information
7 *    regarding copyright ownership.  The ASF licenses this file
8 *    to you under the Apache License, Version 2.0 (the
9 *    "License"); you may not use this file except in compliance
10 *    with the License.  You may obtain a copy of the License at
11 *
12 *      http://www.apache.org/licenses/LICENSE-2.0
13 *
14 *    Unless required by applicable law or agreed to in writing,
15 *    software distributed under the License is distributed on an
16 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 *    KIND, either express or implied.  See the License for the
18 *    specific language governing permissions and limitations
19 *    under the License.
20 * ====================================================================
21 */
22
23#include "svn_sorts.h"
24#include "svn_checksum.h"
25#include "svn_time.h"
26#include "private/svn_subr_private.h"
27
28#include "verify.h"
29#include "fs_fs.h"
30
31#include "cached_data.h"
32#include "rep-cache.h"
33#include "util.h"
34#include "index.h"
35
36#include "../libsvn_fs/fs-loader.h"
37
38#include "svn_private_config.h"
39
40
41/** Verifying. **/
42
43/* Baton type expected by verify_walker().  The purpose is to reuse open
44 * rev / pack file handles between calls.  Its contents need to be cleaned
45 * periodically to limit resource usage.
46 */
47typedef struct verify_walker_baton_t
48{
49  /* number of calls to verify_walker() since the last clean */
50  int iteration_count;
51
52  /* number of files opened since the last clean */
53  int file_count;
54
55  /* progress notification callback to invoke periodically (may be NULL) */
56  svn_fs_progress_notify_func_t notify_func;
57
58  /* baton to use with NOTIFY_FUNC */
59  void *notify_baton;
60
61  /* remember the last revision for which we called notify_func */
62  svn_revnum_t last_notified_revision;
63
64  /* cached hint for successive calls to svn_fs_fs__check_rep() */
65  void *hint;
66
67  /* pool to use for the file handles etc. */
68  apr_pool_t *pool;
69} verify_walker_baton_t;
70
71/* Used by svn_fs_fs__verify().
72   Implements svn_fs_fs__walk_rep_reference().walker.  */
73static svn_error_t *
74verify_walker(representation_t *rep,
75              void *baton,
76              svn_fs_t *fs,
77              apr_pool_t *scratch_pool)
78{
79  verify_walker_baton_t *walker_baton = baton;
80  void *previous_hint;
81
82  /* notify and free resources periodically */
83  if (   walker_baton->iteration_count > 1000
84      || walker_baton->file_count > 16)
85    {
86      if (   walker_baton->notify_func
87          && rep->revision != walker_baton->last_notified_revision)
88        {
89          walker_baton->notify_func(rep->revision,
90                                    walker_baton->notify_baton,
91                                    scratch_pool);
92          walker_baton->last_notified_revision = rep->revision;
93        }
94
95      svn_pool_clear(walker_baton->pool);
96
97      walker_baton->iteration_count = 0;
98      walker_baton->file_count = 0;
99      walker_baton->hint = NULL;
100    }
101
102  /* access the repo data */
103  previous_hint = walker_baton->hint;
104  SVN_ERR(svn_fs_fs__check_rep(rep, fs, &walker_baton->hint,
105                               walker_baton->pool));
106
107  /* update resource usage counters */
108  walker_baton->iteration_count++;
109  if (previous_hint != walker_baton->hint)
110    walker_baton->file_count++;
111
112  return SVN_NO_ERROR;
113}
114
115/* Verify the rep cache DB's consistency with our rev / pack data.
116 * The function signature is similar to svn_fs_fs__verify.
117 * The values of START and END have already been auto-selected and
118 * verified.
119 */
120static svn_error_t *
121verify_rep_cache(svn_fs_t *fs,
122                 svn_revnum_t start,
123                 svn_revnum_t end,
124                 svn_fs_progress_notify_func_t notify_func,
125                 void *notify_baton,
126                 svn_cancel_func_t cancel_func,
127                 void *cancel_baton,
128                 apr_pool_t *pool)
129{
130  svn_boolean_t exists;
131
132  /* rep-cache verification. */
133  SVN_ERR(svn_fs_fs__exists_rep_cache(&exists, fs, pool));
134  if (exists)
135    {
136      /* provide a baton to allow the reuse of open file handles between
137         iterations (saves 2/3 of OS level file operations). */
138      verify_walker_baton_t *baton = apr_pcalloc(pool, sizeof(*baton));
139      baton->pool = svn_pool_create(pool);
140      baton->last_notified_revision = SVN_INVALID_REVNUM;
141      baton->notify_func = notify_func;
142      baton->notify_baton = notify_baton;
143
144      /* tell the user that we are now ready to do *something* */
145      if (notify_func)
146        notify_func(SVN_INVALID_REVNUM, notify_baton, baton->pool);
147
148      /* Do not attempt to walk the rep-cache database if its file does
149         not exist,  since doing so would create it --- which may confuse
150         the administrator.   Don't take any lock. */
151      SVN_ERR(svn_fs_fs__walk_rep_reference(fs, start, end,
152                                            verify_walker, baton,
153                                            cancel_func, cancel_baton,
154                                            pool));
155
156      /* walker resource cleanup */
157      svn_pool_destroy(baton->pool);
158    }
159
160  return SVN_NO_ERROR;
161}
162
163/* Verify that the MD5 checksum of the data between offsets START and END
164 * in FILE matches the EXPECTED checksum.  If there is a mismatch use the
165 * indedx NAME in the error message.  Supports cancellation with CANCEL_FUNC
166 * and CANCEL_BATON.  SCRATCH_POOL is for temporary allocations. */
167static svn_error_t *
168verify_index_checksum(apr_file_t *file,
169                      const char *name,
170                      apr_off_t start,
171                      apr_off_t end,
172                      svn_checksum_t *expected,
173                      svn_cancel_func_t cancel_func,
174                      void *cancel_baton,
175                      apr_pool_t *scratch_pool)
176{
177  unsigned char buffer[SVN__STREAM_CHUNK_SIZE];
178  apr_off_t size = end - start;
179  svn_checksum_t *actual;
180  svn_checksum_ctx_t *context
181    = svn_checksum_ctx_create(svn_checksum_md5, scratch_pool);
182
183  /* Calculate the index checksum. */
184  SVN_ERR(svn_io_file_seek(file, APR_SET, &start, scratch_pool));
185  while (size > 0)
186    {
187      apr_size_t to_read = size > sizeof(buffer)
188                         ? sizeof(buffer)
189                         : (apr_size_t)size;
190      SVN_ERR(svn_io_file_read_full2(file, buffer, to_read, NULL, NULL,
191                                     scratch_pool));
192      SVN_ERR(svn_checksum_update(context, buffer, to_read));
193      size -= to_read;
194
195      if (cancel_func)
196        SVN_ERR(cancel_func(cancel_baton));
197    }
198
199  SVN_ERR(svn_checksum_final(&actual, context, scratch_pool));
200
201  /* Verify that it matches the expected checksum. */
202  if (!svn_checksum_match(expected, actual))
203    {
204      const char *file_name;
205
206      SVN_ERR(svn_io_file_name_get(&file_name, file, scratch_pool));
207      SVN_ERR(svn_checksum_mismatch_err(expected, actual, scratch_pool,
208                                        _("%s checksum mismatch in file %s"),
209                                        name, file_name));
210    }
211
212  return SVN_NO_ERROR;
213}
214
215/* Verify the MD5 checksums of the index data in the rev / pack file
216 * containing revision START in FS.  If given, invoke CANCEL_FUNC with
217 * CANCEL_BATON at regular intervals.  Use SCRATCH_POOL for temporary
218 * allocations.
219 */
220static svn_error_t *
221verify_index_checksums(svn_fs_t *fs,
222                       svn_revnum_t start,
223                       svn_cancel_func_t cancel_func,
224                       void *cancel_baton,
225                       apr_pool_t *scratch_pool)
226{
227  svn_fs_fs__revision_file_t *rev_file;
228
229  /* Open the rev / pack file and read the footer */
230  SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, start,
231                                           scratch_pool, scratch_pool));
232  SVN_ERR(svn_fs_fs__auto_read_footer(rev_file));
233
234  /* Verify the index contents against the checksum from the footer. */
235  SVN_ERR(verify_index_checksum(rev_file->file, "L2P index",
236                                rev_file->l2p_offset, rev_file->p2l_offset,
237                                rev_file->l2p_checksum,
238                                cancel_func, cancel_baton, scratch_pool));
239  SVN_ERR(verify_index_checksum(rev_file->file, "P2L index",
240                                rev_file->p2l_offset, rev_file->footer_offset,
241                                rev_file->p2l_checksum,
242                                cancel_func, cancel_baton, scratch_pool));
243
244  /* Done. */
245  SVN_ERR(svn_fs_fs__close_revision_file(rev_file));
246
247  return SVN_NO_ERROR;
248}
249
250/* Verify that for all log-to-phys index entries for revisions START to
251 * START + COUNT-1 in FS there is a consistent entry in the phys-to-log
252 * index.  If given, invoke CANCEL_FUNC with CANCEL_BATON at regular
253 * intervals. Use POOL for allocations.
254 */
255static svn_error_t *
256compare_l2p_to_p2l_index(svn_fs_t *fs,
257                         svn_revnum_t start,
258                         svn_revnum_t count,
259                         svn_cancel_func_t cancel_func,
260                         void *cancel_baton,
261                         apr_pool_t *pool)
262{
263  svn_revnum_t i;
264  apr_pool_t *iterpool = svn_pool_create(pool);
265  apr_array_header_t *max_ids;
266
267  /* common file access structure */
268  svn_fs_fs__revision_file_t *rev_file;
269  SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, start, pool,
270                                           iterpool));
271
272  /* determine the range of items to check for each revision */
273  SVN_ERR(svn_fs_fs__l2p_get_max_ids(&max_ids, fs, start, count, pool,
274                                     iterpool));
275
276  /* check all items in all revisions if the given range */
277  for (i = 0; i < max_ids->nelts; ++i)
278    {
279      apr_uint64_t k;
280      apr_uint64_t max_id = APR_ARRAY_IDX(max_ids, i, apr_uint64_t);
281      svn_revnum_t revision = start + i;
282
283      for (k = 0; k < max_id; ++k)
284        {
285          apr_off_t offset;
286          svn_fs_fs__p2l_entry_t *p2l_entry;
287          svn_pool_clear(iterpool);
288
289          /* get L2P entry.  Ignore unused entries. */
290          SVN_ERR(svn_fs_fs__item_offset(&offset, fs, rev_file, revision,
291                                         NULL, k, iterpool));
292          if (offset == -1)
293            continue;
294
295          /* find the corresponding P2L entry */
296          SVN_ERR(svn_fs_fs__p2l_entry_lookup(&p2l_entry, fs, rev_file,
297                                              revision, offset, iterpool,
298                                              iterpool));
299
300          if (p2l_entry == NULL)
301            return svn_error_createf(SVN_ERR_FS_INDEX_INCONSISTENT,
302                                     NULL,
303                                     _("p2l index entry not found for "
304                                       "PHYS %s returned by "
305                                       "l2p index for LOG r%ld:i%ld"),
306                                     apr_off_t_toa(pool, offset),
307                                     revision, (long)k);
308
309          if (   p2l_entry->item.number != k
310              || p2l_entry->item.revision != revision)
311            return svn_error_createf(SVN_ERR_FS_INDEX_INCONSISTENT,
312                                     NULL,
313                                     _("p2l index info LOG r%ld:i%ld"
314                                       " does not match "
315                                       "l2p index for LOG r%ld:i%ld"),
316                                     p2l_entry->item.revision,
317                                     (long)p2l_entry->item.number,
318                                     revision, (long)k);
319        }
320
321      if (cancel_func)
322        SVN_ERR(cancel_func(cancel_baton));
323    }
324
325  svn_pool_destroy(iterpool);
326
327  SVN_ERR(svn_fs_fs__close_revision_file(rev_file));
328
329  return SVN_NO_ERROR;
330}
331
332/* Verify that for all phys-to-log index entries for revisions START to
333 * START + COUNT-1 in FS there is a consistent entry in the log-to-phys
334 * index.  If given, invoke CANCEL_FUNC with CANCEL_BATON at regular
335 * intervals. Use POOL for allocations.
336 *
337 * Please note that we can only check on pack / rev file granularity and
338 * must only be called for a single rev / pack file.
339 */
340static svn_error_t *
341compare_p2l_to_l2p_index(svn_fs_t *fs,
342                         svn_revnum_t start,
343                         svn_revnum_t count,
344                         svn_cancel_func_t cancel_func,
345                         void *cancel_baton,
346                         apr_pool_t *pool)
347{
348  fs_fs_data_t *ffd = fs->fsap_data;
349  apr_pool_t *iterpool = svn_pool_create(pool);
350  apr_off_t max_offset;
351  apr_off_t offset = 0;
352
353  /* common file access structure */
354  svn_fs_fs__revision_file_t *rev_file;
355  SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, start, pool,
356                                           iterpool));
357
358  /* get the size of the rev / pack file as covered by the P2L index */
359  SVN_ERR(svn_fs_fs__p2l_get_max_offset(&max_offset, fs, rev_file, start,
360                                        pool));
361
362  /* for all offsets in the file, get the P2L index entries and check
363     them against the L2P index */
364  for (offset = 0; offset < max_offset; )
365    {
366      apr_array_header_t *entries;
367      svn_fs_fs__p2l_entry_t *last_entry;
368      int i;
369
370      svn_pool_clear(iterpool);
371
372      /* get all entries for the current block */
373      SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, fs, rev_file, start,
374                                          offset, ffd->p2l_page_size,
375                                          iterpool, iterpool));
376      if (entries->nelts == 0)
377        return svn_error_createf(SVN_ERR_FS_INDEX_CORRUPTION,
378                                 NULL,
379                                 _("p2l does not cover offset %s"
380                                   " for revision %ld"),
381                                  apr_off_t_toa(pool, offset), start);
382
383      /* process all entries (and later continue with the next block) */
384      last_entry
385        = &APR_ARRAY_IDX(entries, entries->nelts-1, svn_fs_fs__p2l_entry_t);
386      offset = last_entry->offset + last_entry->size;
387
388      for (i = 0; i < entries->nelts; ++i)
389        {
390          svn_fs_fs__p2l_entry_t *entry
391            = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t);
392
393          /* check all sub-items for consist entries in the L2P index */
394          if (entry->type == SVN_FS_FS__ITEM_TYPE_UNUSED)
395            {
396              /* There is no L2P entry for unused rev file sections.
397               * And its P2L index data is hardly ever used.  But we
398               * should still check whether someone tempered with it. */
399              if (   entry->item.revision != SVN_INVALID_REVNUM
400                  && (   entry->item.revision < start
401                      || entry->item.revision >= start + count))
402                return svn_error_createf(SVN_ERR_FS_INDEX_INCONSISTENT,
403                                         NULL,
404                                         _("Empty P2L entry for PHYS %s "
405                                           "refers to revision %ld outside "
406                                           "the rev / pack file (%ld-%ld)"),
407                                         apr_off_t_toa(pool, entry->offset),
408                                         entry->item.revision,
409                                         start, start + count - 1);
410            }
411          else
412            {
413              apr_off_t l2p_offset;
414              SVN_ERR(svn_fs_fs__item_offset(&l2p_offset, fs, rev_file,
415                                             entry->item.revision, NULL,
416                                             entry->item.number, iterpool));
417
418              if (l2p_offset != entry->offset)
419                return svn_error_createf(SVN_ERR_FS_INDEX_INCONSISTENT,
420                                         NULL,
421                                         _("l2p index entry PHYS %s"
422                                           "does not match p2l index value "
423                                           "LOG r%ld:i%ld for PHYS %s"),
424                                         apr_off_t_toa(pool, l2p_offset),
425                                         entry->item.revision,
426                                         (long)entry->item.number,
427                                         apr_off_t_toa(pool, entry->offset));
428            }
429        }
430
431      if (cancel_func)
432        SVN_ERR(cancel_func(cancel_baton));
433    }
434
435  svn_pool_destroy(iterpool);
436
437  SVN_ERR(svn_fs_fs__close_revision_file(rev_file));
438
439  return SVN_NO_ERROR;
440}
441
442/* Items smaller than this can be read at once into a buffer and directly
443 * be checksummed.  Larger items require stream processing.
444 * Must be a multiple of 8. */
445#define STREAM_THRESHOLD 4096
446
447/* Verify that the next SIZE bytes read from FILE are NUL.
448 * SIZE must not exceed STREAM_THRESHOLD.  Use POOL for allocations.
449 */
450static svn_error_t *
451expect_buffer_nul(apr_file_t *file,
452                  apr_off_t size,
453                  apr_pool_t *pool)
454{
455  union
456  {
457    unsigned char buffer[STREAM_THRESHOLD];
458    apr_uint64_t chunks[STREAM_THRESHOLD / sizeof(apr_uint64_t)];
459  } data;
460
461  apr_size_t i;
462  SVN_ERR_ASSERT(size <= STREAM_THRESHOLD);
463
464  /* read the whole data block; error out on failure */
465  data.chunks[(size - 1)/ sizeof(apr_uint64_t)] = 0;
466  SVN_ERR(svn_io_file_read_full2(file, data.buffer, size, NULL, NULL, pool));
467
468  /* chunky check */
469  for (i = 0; i < size / sizeof(apr_uint64_t); ++i)
470    if (data.chunks[i] != 0)
471      break;
472
473  /* byte-wise check upon mismatch or at the end of the block */
474  for (i *= sizeof(apr_uint64_t); i < size; ++i)
475    if (data.buffer[i] != 0)
476      {
477        const char *file_name;
478        apr_off_t offset;
479
480        SVN_ERR(svn_io_file_name_get(&file_name, file, pool));
481        SVN_ERR(svn_fs_fs__get_file_offset(&offset, file, pool));
482        offset -= size - i;
483
484        return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
485                                 _("Empty section in file %s contains "
486                                   "non-NUL data at offset %s"),
487                                 file_name, apr_off_t_toa(pool, offset));
488      }
489
490  return SVN_NO_ERROR;
491}
492
493/* Verify that the next SIZE bytes read from FILE are NUL.
494 * Use POOL for allocations.
495 */
496static svn_error_t *
497read_all_nul(apr_file_t *file,
498             apr_off_t size,
499             apr_pool_t *pool)
500{
501  for (; size >= STREAM_THRESHOLD; size -= STREAM_THRESHOLD)
502    SVN_ERR(expect_buffer_nul(file, STREAM_THRESHOLD, pool));
503
504  if (size)
505    SVN_ERR(expect_buffer_nul(file, size, pool));
506
507  return SVN_NO_ERROR;
508}
509
510/* Compare the ACTUAL checksum with the one expected by ENTRY.
511 * Return an error in case of mismatch.  Use the name of FILE
512 * in error message.  Allocate data in POOL.
513 */
514static svn_error_t *
515expected_checksum(apr_file_t *file,
516                  svn_fs_fs__p2l_entry_t *entry,
517                  apr_uint32_t actual,
518                  apr_pool_t *pool)
519{
520  if (actual != entry->fnv1_checksum)
521    {
522      const char *file_name;
523
524      SVN_ERR(svn_io_file_name_get(&file_name, file, pool));
525      return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
526                               _("Checksum mismatch in item at offset %s of "
527                                 "length %s bytes in file %s"),
528                               apr_off_t_toa(pool, entry->offset),
529                               apr_off_t_toa(pool, entry->size), file_name);
530    }
531
532  return SVN_NO_ERROR;
533}
534
535/* Verify that the FNV checksum over the next ENTRY->SIZE bytes read
536 * from FILE will match ENTRY's expected checksum.  SIZE must not
537 * exceed STREAM_THRESHOLD.  Use POOL for allocations.
538 */
539static svn_error_t *
540expected_buffered_checksum(apr_file_t *file,
541                           svn_fs_fs__p2l_entry_t *entry,
542                           apr_pool_t *pool)
543{
544  unsigned char buffer[STREAM_THRESHOLD];
545  SVN_ERR_ASSERT(entry->size <= STREAM_THRESHOLD);
546
547  SVN_ERR(svn_io_file_read_full2(file, buffer, (apr_size_t)entry->size,
548                                 NULL, NULL, pool));
549  SVN_ERR(expected_checksum(file, entry,
550                            svn__fnv1a_32x4(buffer, (apr_size_t)entry->size),
551                            pool));
552
553  return SVN_NO_ERROR;
554}
555
556/* Verify that the FNV checksum over the next ENTRY->SIZE bytes read from
557 * FILE will match ENTRY's expected checksum.  Use POOL for allocations.
558 */
559static svn_error_t *
560expected_streamed_checksum(apr_file_t *file,
561                           svn_fs_fs__p2l_entry_t *entry,
562                           apr_pool_t *pool)
563{
564  unsigned char buffer[STREAM_THRESHOLD];
565  svn_checksum_t *checksum;
566  svn_checksum_ctx_t *context
567    = svn_checksum_ctx_create(svn_checksum_fnv1a_32x4, pool);
568  apr_off_t size = entry->size;
569
570  while (size > 0)
571    {
572      apr_size_t to_read = size > sizeof(buffer)
573                         ? sizeof(buffer)
574                         : (apr_size_t)size;
575      SVN_ERR(svn_io_file_read_full2(file, buffer, to_read, NULL, NULL,
576                                     pool));
577      SVN_ERR(svn_checksum_update(context, buffer, to_read));
578      size -= to_read;
579    }
580
581  SVN_ERR(svn_checksum_final(&checksum, context, pool));
582  SVN_ERR(expected_checksum(file, entry,
583                            ntohl(*(const apr_uint32_t *)checksum->digest),
584                            pool));
585
586  return SVN_NO_ERROR;
587}
588
589/* Verify that for all phys-to-log index entries for revisions START to
590 * START + COUNT-1 in FS match the actual pack / rev file contents.
591 * If given, invoke CANCEL_FUNC with CANCEL_BATON at regular intervals.
592 * Use POOL for allocations.
593 *
594 * Please note that we can only check on pack / rev file granularity and
595 * must only be called for a single rev / pack file.
596 */
597static svn_error_t *
598compare_p2l_to_rev(svn_fs_t *fs,
599                   svn_revnum_t start,
600                   svn_revnum_t count,
601                   svn_cancel_func_t cancel_func,
602                   void *cancel_baton,
603                   apr_pool_t *pool)
604{
605  fs_fs_data_t *ffd = fs->fsap_data;
606  apr_pool_t *iterpool = svn_pool_create(pool);
607  apr_off_t max_offset;
608  apr_off_t offset = 0;
609  svn_fs_fs__revision_file_t *rev_file;
610
611  /* open the pack / rev file that is covered by the p2l index */
612  SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, start, pool,
613                                           iterpool));
614
615  /* check file size vs. range covered by index */
616  SVN_ERR(svn_fs_fs__auto_read_footer(rev_file));
617  SVN_ERR(svn_fs_fs__p2l_get_max_offset(&max_offset, fs, rev_file, start,
618                                        pool));
619
620  if (rev_file->l2p_offset != max_offset)
621    return svn_error_createf(SVN_ERR_FS_INDEX_INCONSISTENT, NULL,
622                             _("File size of %s for revision r%ld does "
623                               "not match p2l index size of %s"),
624                             apr_off_t_toa(pool, rev_file->l2p_offset), start,
625                             apr_off_t_toa(pool, max_offset));
626
627  SVN_ERR(svn_io_file_aligned_seek(rev_file->file, ffd->block_size, NULL, 0,
628                                   pool));
629
630  /* for all offsets in the file, get the P2L index entries and check
631     them against the L2P index */
632  for (offset = 0; offset < max_offset; )
633    {
634      apr_array_header_t *entries;
635      int i;
636
637      svn_pool_clear(iterpool);
638
639      /* get all entries for the current block */
640      SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, fs, rev_file, start,
641                                          offset, ffd->p2l_page_size,
642                                          iterpool, iterpool));
643
644      /* The above might have moved the file pointer.
645       * Ensure we actually start reading at OFFSET.  */
646      SVN_ERR(svn_io_file_aligned_seek(rev_file->file, ffd->block_size,
647                                       NULL, offset, iterpool));
648
649      /* process all entries (and later continue with the next block) */
650      for (i = 0; i < entries->nelts; ++i)
651        {
652          svn_fs_fs__p2l_entry_t *entry
653            = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t);
654
655          /* skip bits we previously checked */
656          if (i == 0 && entry->offset < offset)
657            continue;
658
659          /* skip zero-sized entries */
660          if (entry->size == 0)
661            continue;
662
663          /* p2l index must cover all rev / pack file offsets exactly once */
664          if (entry->offset != offset)
665            return svn_error_createf(SVN_ERR_FS_INDEX_INCONSISTENT,
666                                     NULL,
667                                     _("p2l index entry for revision r%ld"
668                                       " is non-contiguous between offsets "
669                                       " %s and %s"),
670                                     start,
671                                     apr_off_t_toa(pool, offset),
672                                     apr_off_t_toa(pool, entry->offset));
673
674          /* empty sections must contain NUL bytes only */
675          if (entry->type == SVN_FS_FS__ITEM_TYPE_UNUSED)
676            {
677              /* skip filler entry at the end of the p2l index */
678              if (entry->offset != max_offset)
679                SVN_ERR(read_all_nul(rev_file->file, entry->size, pool));
680            }
681          else
682            {
683              if (entry->size < STREAM_THRESHOLD)
684                SVN_ERR(expected_buffered_checksum(rev_file->file, entry,
685                                                   pool));
686              else
687                SVN_ERR(expected_streamed_checksum(rev_file->file, entry,
688                                                   pool));
689            }
690
691          /* advance offset */
692          offset += entry->size;
693        }
694
695      if (cancel_func)
696        SVN_ERR(cancel_func(cancel_baton));
697    }
698
699  svn_pool_destroy(iterpool);
700
701  SVN_ERR(svn_fs_fs__close_revision_file(rev_file));
702
703  return SVN_NO_ERROR;
704}
705
706/* Verify that the revprops of the revisions START to END in FS can be
707 * accessed.  Invoke CANCEL_FUNC with CANCEL_BATON at regular intervals.
708 *
709 * The values of START and END have already been auto-selected and
710 * verified.
711 */
712static svn_error_t *
713verify_revprops(svn_fs_t *fs,
714                svn_revnum_t start,
715                svn_revnum_t end,
716                svn_cancel_func_t cancel_func,
717                void *cancel_baton,
718                apr_pool_t *pool)
719{
720  svn_revnum_t revision;
721  apr_pool_t *iterpool = svn_pool_create(pool);
722
723  for (revision = start; revision < end; ++revision)
724    {
725      svn_string_t *date;
726      apr_time_t timetemp;
727
728      svn_pool_clear(iterpool);
729
730      /* Access the svn:date revprop.
731       * This implies parsing all revprops for that revision. */
732      SVN_ERR(svn_fs_fs__revision_prop(&date, fs, revision,
733                                       SVN_PROP_REVISION_DATE, iterpool));
734
735      /* The time stamp is the only revprop that, if given, needs to
736       * have a valid content. */
737      if (date)
738        SVN_ERR(svn_time_from_cstring(&timetemp, date->data, iterpool));
739
740      if (cancel_func)
741        SVN_ERR(cancel_func(cancel_baton));
742    }
743
744  svn_pool_destroy(iterpool);
745
746  return SVN_NO_ERROR;
747}
748
749static svn_revnum_t
750pack_size(svn_fs_t *fs, svn_revnum_t rev)
751{
752  fs_fs_data_t *ffd = fs->fsap_data;
753
754  return rev < ffd->min_unpacked_rev ? ffd->max_files_per_dir : 1;
755}
756
757/* Verify that on-disk representation has not been tempered with (in a way
758 * that leaves the repository in a corrupted state).  This compares log-to-
759 * phys with phys-to-log indexes, verifies the low-level checksums and
760 * checks that all revprops are available.  The function signature is
761 * similar to svn_fs_fs__verify.
762 *
763 * The values of START and END have already been auto-selected and
764 * verified.  You may call this for format7 or higher repos.
765 */
766static svn_error_t *
767verify_f7_metadata_consistency(svn_fs_t *fs,
768                               svn_revnum_t start,
769                               svn_revnum_t end,
770                               svn_fs_progress_notify_func_t notify_func,
771                               void *notify_baton,
772                               svn_cancel_func_t cancel_func,
773                               void *cancel_baton,
774                               apr_pool_t *pool)
775{
776  fs_fs_data_t *ffd = fs->fsap_data;
777  svn_revnum_t revision, next_revision;
778  apr_pool_t *iterpool = svn_pool_create(pool);
779
780  for (revision = start; revision <= end; revision = next_revision)
781    {
782      svn_error_t *err = SVN_NO_ERROR;
783
784      svn_revnum_t count = pack_size(fs, revision);
785      svn_revnum_t pack_start = svn_fs_fs__packed_base_rev(fs, revision);
786      svn_revnum_t pack_end = pack_start + count;
787
788      svn_pool_clear(iterpool);
789
790      if (notify_func && (pack_start % ffd->max_files_per_dir == 0))
791        notify_func(pack_start, notify_baton, iterpool);
792
793      /* Check for external corruption to the indexes. */
794      err = verify_index_checksums(fs, pack_start, cancel_func,
795                                   cancel_baton, iterpool);
796
797      /* two-way index check */
798      if (!err)
799        err = compare_l2p_to_p2l_index(fs, pack_start, pack_end - pack_start,
800                                       cancel_func, cancel_baton, iterpool);
801      if (!err)
802        err = compare_p2l_to_l2p_index(fs, pack_start, pack_end - pack_start,
803                                       cancel_func, cancel_baton, iterpool);
804
805      /* verify in-index checksums and types vs. actual rev / pack files */
806      if (!err)
807        err = compare_p2l_to_rev(fs, pack_start, pack_end - pack_start,
808                                 cancel_func, cancel_baton, iterpool);
809
810      /* ensure that revprops are available and accessible */
811      if (!err)
812        err = verify_revprops(fs, pack_start, pack_end,
813                              cancel_func, cancel_baton, iterpool);
814
815      /* concurrent packing is one of the reasons why verification may fail.
816         Make sure, we operate on up-to-date information. */
817      if (err)
818        {
819          svn_error_t *err2
820            = svn_fs_fs__read_min_unpacked_rev(&ffd->min_unpacked_rev,
821                                               fs, pool);
822
823          /* Be careful to not leak ERR. */
824          if (err2)
825            return svn_error_trace(svn_error_compose_create(err, err2));
826        }
827
828      /* retry the whole shard if it got packed in the meantime */
829      if (err && count != pack_size(fs, revision))
830        {
831          svn_error_clear(err);
832
833          /* We could simply assign revision here but the code below is
834             more intuitive to maintainers. */
835          next_revision = svn_fs_fs__packed_base_rev(fs, revision);
836        }
837      else
838        {
839          SVN_ERR(err);
840          next_revision = pack_end;
841        }
842    }
843
844  svn_pool_destroy(iterpool);
845
846  return SVN_NO_ERROR;
847}
848
849svn_error_t *
850svn_fs_fs__verify(svn_fs_t *fs,
851                  svn_revnum_t start,
852                  svn_revnum_t end,
853                  svn_fs_progress_notify_func_t notify_func,
854                  void *notify_baton,
855                  svn_cancel_func_t cancel_func,
856                  void *cancel_baton,
857                  apr_pool_t *pool)
858{
859  fs_fs_data_t *ffd = fs->fsap_data;
860  svn_revnum_t youngest = ffd->youngest_rev_cache; /* cache is current */
861
862  /* Input validation. */
863  if (! SVN_IS_VALID_REVNUM(start))
864    start = 0;
865  if (! SVN_IS_VALID_REVNUM(end))
866    end = youngest;
867  SVN_ERR(svn_fs_fs__ensure_revision_exists(start, fs, pool));
868  SVN_ERR(svn_fs_fs__ensure_revision_exists(end, fs, pool));
869
870  /* log/phys index consistency.  We need to check them first to make
871     sure we can access the rev / pack files in format7. */
872  if (svn_fs_fs__use_log_addressing(fs))
873    SVN_ERR(verify_f7_metadata_consistency(fs, start, end,
874                                           notify_func, notify_baton,
875                                           cancel_func, cancel_baton, pool));
876
877  /* rep cache consistency */
878  if (ffd->format >= SVN_FS_FS__MIN_REP_SHARING_FORMAT)
879    SVN_ERR(verify_rep_cache(fs, start, end, notify_func, notify_baton,
880                             cancel_func, cancel_baton, pool));
881
882  return SVN_NO_ERROR;
883}
884