verify.c revision 362181
1/* verify.c --- verification of FSFS filesystems
2 *
3 * ====================================================================
4 *    Licensed to the Apache Software Foundation (ASF) under one
5 *    or more contributor license agreements.  See the NOTICE file
6 *    distributed with this work for additional information
7 *    regarding copyright ownership.  The ASF licenses this file
8 *    to you under the Apache License, Version 2.0 (the
9 *    "License"); you may not use this file except in compliance
10 *    with the License.  You may obtain a copy of the License at
11 *
12 *      http://www.apache.org/licenses/LICENSE-2.0
13 *
14 *    Unless required by applicable law or agreed to in writing,
15 *    software distributed under the License is distributed on an
16 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 *    KIND, either express or implied.  See the License for the
18 *    specific language governing permissions and limitations
19 *    under the License.
20 * ====================================================================
21 */
22
23#include "svn_sorts.h"
24#include "svn_checksum.h"
25#include "svn_time.h"
26#include "private/svn_subr_private.h"
27
28#include "verify.h"
29#include "fs_fs.h"
30
31#include "cached_data.h"
32#include "rep-cache.h"
33#include "revprops.h"
34#include "util.h"
35#include "index.h"
36
37#include "../libsvn_fs/fs-loader.h"
38
39#include "svn_private_config.h"
40
41
42/** Verifying. **/
43
44/* Baton type expected by verify_walker().  The purpose is to reuse open
45 * rev / pack file handles between calls.  Its contents need to be cleaned
46 * periodically to limit resource usage.
47 */
48typedef struct verify_walker_baton_t
49{
50  /* number of calls to verify_walker() since the last clean */
51  int iteration_count;
52
53  /* number of files opened since the last clean */
54  int file_count;
55
56  /* progress notification callback to invoke periodically (may be NULL) */
57  svn_fs_progress_notify_func_t notify_func;
58
59  /* baton to use with NOTIFY_FUNC */
60  void *notify_baton;
61
62  /* remember the last revision for which we called notify_func */
63  svn_revnum_t last_notified_revision;
64
65  /* cached hint for successive calls to svn_fs_fs__check_rep() */
66  void *hint;
67
68  /* pool to use for the file handles etc. */
69  apr_pool_t *pool;
70} verify_walker_baton_t;
71
72/* Used by svn_fs_fs__verify().
73   Implements svn_fs_fs__walk_rep_reference().walker.  */
74static svn_error_t *
75verify_walker(representation_t *rep,
76              void *baton,
77              svn_fs_t *fs,
78              apr_pool_t *scratch_pool)
79{
80  verify_walker_baton_t *walker_baton = baton;
81  void *previous_hint;
82
83  /* notify and free resources periodically */
84  if (   walker_baton->iteration_count > 1000
85      || walker_baton->file_count > 16)
86    {
87      if (   walker_baton->notify_func
88          && rep->revision != walker_baton->last_notified_revision)
89        {
90          walker_baton->notify_func(rep->revision,
91                                    walker_baton->notify_baton,
92                                    scratch_pool);
93          walker_baton->last_notified_revision = rep->revision;
94        }
95
96      svn_pool_clear(walker_baton->pool);
97
98      walker_baton->iteration_count = 0;
99      walker_baton->file_count = 0;
100      walker_baton->hint = NULL;
101    }
102
103  /* access the repo data */
104  previous_hint = walker_baton->hint;
105  SVN_ERR(svn_fs_fs__check_rep(rep, fs, &walker_baton->hint,
106                               walker_baton->pool));
107
108  /* update resource usage counters */
109  walker_baton->iteration_count++;
110  if (previous_hint != walker_baton->hint)
111    walker_baton->file_count++;
112
113  return SVN_NO_ERROR;
114}
115
116/* Verify the rep cache DB's consistency with our rev / pack data.
117 * The function signature is similar to svn_fs_fs__verify.
118 * The values of START and END have already been auto-selected and
119 * verified.
120 */
121static svn_error_t *
122verify_rep_cache(svn_fs_t *fs,
123                 svn_revnum_t start,
124                 svn_revnum_t end,
125                 svn_fs_progress_notify_func_t notify_func,
126                 void *notify_baton,
127                 svn_cancel_func_t cancel_func,
128                 void *cancel_baton,
129                 apr_pool_t *pool)
130{
131  svn_boolean_t exists;
132
133  /* rep-cache verification. */
134  SVN_ERR(svn_fs_fs__exists_rep_cache(&exists, fs, pool));
135  if (exists)
136    {
137      /* provide a baton to allow the reuse of open file handles between
138         iterations (saves 2/3 of OS level file operations). */
139      verify_walker_baton_t *baton = apr_pcalloc(pool, sizeof(*baton));
140      baton->pool = svn_pool_create(pool);
141      baton->last_notified_revision = SVN_INVALID_REVNUM;
142      baton->notify_func = notify_func;
143      baton->notify_baton = notify_baton;
144
145      /* tell the user that we are now ready to do *something* */
146      if (notify_func)
147        notify_func(SVN_INVALID_REVNUM, notify_baton, baton->pool);
148
149      /* Do not attempt to walk the rep-cache database if its file does
150         not exist,  since doing so would create it --- which may confuse
151         the administrator.   Don't take any lock. */
152      SVN_ERR(svn_fs_fs__walk_rep_reference(fs, start, end,
153                                            verify_walker, baton,
154                                            cancel_func, cancel_baton,
155                                            pool));
156
157      /* walker resource cleanup */
158      svn_pool_destroy(baton->pool);
159    }
160
161  return SVN_NO_ERROR;
162}
163
164/* Verify that the MD5 checksum of the data between offsets START and END
165 * in FILE matches the EXPECTED checksum.  If there is a mismatch use the
166 * indedx NAME in the error message.  Supports cancellation with CANCEL_FUNC
167 * and CANCEL_BATON.  SCRATCH_POOL is for temporary allocations. */
168static svn_error_t *
169verify_index_checksum(apr_file_t *file,
170                      const char *name,
171                      apr_off_t start,
172                      apr_off_t end,
173                      svn_checksum_t *expected,
174                      svn_cancel_func_t cancel_func,
175                      void *cancel_baton,
176                      apr_pool_t *scratch_pool)
177{
178  unsigned char buffer[SVN__STREAM_CHUNK_SIZE];
179  apr_off_t size = end - start;
180  svn_checksum_t *actual;
181  svn_checksum_ctx_t *context
182    = svn_checksum_ctx_create(svn_checksum_md5, scratch_pool);
183
184  /* Calculate the index checksum. */
185  SVN_ERR(svn_io_file_seek(file, APR_SET, &start, scratch_pool));
186  while (size > 0)
187    {
188      apr_size_t to_read = size > sizeof(buffer)
189                         ? sizeof(buffer)
190                         : (apr_size_t)size;
191      SVN_ERR(svn_io_file_read_full2(file, buffer, to_read, NULL, NULL,
192                                     scratch_pool));
193      SVN_ERR(svn_checksum_update(context, buffer, to_read));
194      size -= to_read;
195
196      if (cancel_func)
197        SVN_ERR(cancel_func(cancel_baton));
198    }
199
200  SVN_ERR(svn_checksum_final(&actual, context, scratch_pool));
201
202  /* Verify that it matches the expected checksum. */
203  if (!svn_checksum_match(expected, actual))
204    {
205      const char *file_name;
206
207      SVN_ERR(svn_io_file_name_get(&file_name, file, scratch_pool));
208      SVN_ERR(svn_checksum_mismatch_err(expected, actual, scratch_pool,
209                                        _("%s checksum mismatch in file %s"),
210                                        name, file_name));
211    }
212
213  return SVN_NO_ERROR;
214}
215
216/* Verify the MD5 checksums of the index data in the rev / pack file
217 * containing revision START in FS.  If given, invoke CANCEL_FUNC with
218 * CANCEL_BATON at regular intervals.  Use SCRATCH_POOL for temporary
219 * allocations.
220 */
221static svn_error_t *
222verify_index_checksums(svn_fs_t *fs,
223                       svn_revnum_t start,
224                       svn_cancel_func_t cancel_func,
225                       void *cancel_baton,
226                       apr_pool_t *scratch_pool)
227{
228  svn_fs_fs__revision_file_t *rev_file;
229
230  /* Open the rev / pack file and read the footer */
231  SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, start,
232                                           scratch_pool, scratch_pool));
233  SVN_ERR(svn_fs_fs__auto_read_footer(rev_file));
234
235  /* Verify the index contents against the checksum from the footer. */
236  SVN_ERR(verify_index_checksum(rev_file->file, "L2P index",
237                                rev_file->l2p_offset, rev_file->p2l_offset,
238                                rev_file->l2p_checksum,
239                                cancel_func, cancel_baton, scratch_pool));
240  SVN_ERR(verify_index_checksum(rev_file->file, "P2L index",
241                                rev_file->p2l_offset, rev_file->footer_offset,
242                                rev_file->p2l_checksum,
243                                cancel_func, cancel_baton, scratch_pool));
244
245  /* Done. */
246  SVN_ERR(svn_fs_fs__close_revision_file(rev_file));
247
248  return SVN_NO_ERROR;
249}
250
251/* Verify that for all log-to-phys index entries for revisions START to
252 * START + COUNT-1 in FS there is a consistent entry in the phys-to-log
253 * index.  If given, invoke CANCEL_FUNC with CANCEL_BATON at regular
254 * intervals. Use POOL for allocations.
255 */
256static svn_error_t *
257compare_l2p_to_p2l_index(svn_fs_t *fs,
258                         svn_revnum_t start,
259                         svn_revnum_t count,
260                         svn_cancel_func_t cancel_func,
261                         void *cancel_baton,
262                         apr_pool_t *pool)
263{
264  svn_revnum_t i;
265  apr_pool_t *iterpool = svn_pool_create(pool);
266  apr_array_header_t *max_ids;
267
268  /* common file access structure */
269  svn_fs_fs__revision_file_t *rev_file;
270  SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, start, pool,
271                                           iterpool));
272
273  /* determine the range of items to check for each revision */
274  SVN_ERR(svn_fs_fs__l2p_get_max_ids(&max_ids, fs, start, count, pool,
275                                     iterpool));
276
277  /* check all items in all revisions if the given range */
278  for (i = 0; i < max_ids->nelts; ++i)
279    {
280      apr_uint64_t k;
281      apr_uint64_t max_id = APR_ARRAY_IDX(max_ids, i, apr_uint64_t);
282      svn_revnum_t revision = start + i;
283
284      for (k = 0; k < max_id; ++k)
285        {
286          apr_off_t offset;
287          svn_fs_fs__p2l_entry_t *p2l_entry;
288          svn_pool_clear(iterpool);
289
290          /* get L2P entry.  Ignore unused entries. */
291          SVN_ERR(svn_fs_fs__item_offset(&offset, fs, rev_file, revision,
292                                         NULL, k, iterpool));
293          if (offset == -1)
294            continue;
295
296          /* find the corresponding P2L entry */
297          SVN_ERR(svn_fs_fs__p2l_entry_lookup(&p2l_entry, fs, rev_file,
298                                              revision, offset, iterpool,
299                                              iterpool));
300
301          if (p2l_entry == NULL)
302            return svn_error_createf(SVN_ERR_FS_INDEX_INCONSISTENT,
303                                     NULL,
304                                     _("p2l index entry not found for "
305                                       "PHYS %s returned by "
306                                       "l2p index for LOG r%ld:i%ld"),
307                                     apr_off_t_toa(pool, offset),
308                                     revision, (long)k);
309
310          if (   p2l_entry->item.number != k
311              || p2l_entry->item.revision != revision)
312            return svn_error_createf(SVN_ERR_FS_INDEX_INCONSISTENT,
313                                     NULL,
314                                     _("p2l index info LOG r%ld:i%ld"
315                                       " does not match "
316                                       "l2p index for LOG r%ld:i%ld"),
317                                     p2l_entry->item.revision,
318                                     (long)p2l_entry->item.number,
319                                     revision, (long)k);
320        }
321
322      if (cancel_func)
323        SVN_ERR(cancel_func(cancel_baton));
324    }
325
326  svn_pool_destroy(iterpool);
327
328  SVN_ERR(svn_fs_fs__close_revision_file(rev_file));
329
330  return SVN_NO_ERROR;
331}
332
333/* Verify that for all phys-to-log index entries for revisions START to
334 * START + COUNT-1 in FS there is a consistent entry in the log-to-phys
335 * index.  If given, invoke CANCEL_FUNC with CANCEL_BATON at regular
336 * intervals. Use POOL for allocations.
337 *
338 * Please note that we can only check on pack / rev file granularity and
339 * must only be called for a single rev / pack file.
340 */
341static svn_error_t *
342compare_p2l_to_l2p_index(svn_fs_t *fs,
343                         svn_revnum_t start,
344                         svn_revnum_t count,
345                         svn_cancel_func_t cancel_func,
346                         void *cancel_baton,
347                         apr_pool_t *pool)
348{
349  fs_fs_data_t *ffd = fs->fsap_data;
350  apr_pool_t *iterpool = svn_pool_create(pool);
351  apr_off_t max_offset;
352  apr_off_t offset = 0;
353
354  /* common file access structure */
355  svn_fs_fs__revision_file_t *rev_file;
356  SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, start, pool,
357                                           iterpool));
358
359  /* get the size of the rev / pack file as covered by the P2L index */
360  SVN_ERR(svn_fs_fs__p2l_get_max_offset(&max_offset, fs, rev_file, start,
361                                        pool));
362
363  /* for all offsets in the file, get the P2L index entries and check
364     them against the L2P index */
365  for (offset = 0; offset < max_offset; )
366    {
367      apr_array_header_t *entries;
368      svn_fs_fs__p2l_entry_t *last_entry;
369      int i;
370
371      svn_pool_clear(iterpool);
372
373      /* get all entries for the current block */
374      SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, fs, rev_file, start,
375                                          offset, ffd->p2l_page_size,
376                                          iterpool, iterpool));
377      if (entries->nelts == 0)
378        return svn_error_createf(SVN_ERR_FS_INDEX_CORRUPTION,
379                                 NULL,
380                                 _("p2l does not cover offset %s"
381                                   " for revision %ld"),
382                                  apr_off_t_toa(pool, offset), start);
383
384      /* process all entries (and later continue with the next block) */
385      last_entry
386        = &APR_ARRAY_IDX(entries, entries->nelts-1, svn_fs_fs__p2l_entry_t);
387      offset = last_entry->offset + last_entry->size;
388
389      for (i = 0; i < entries->nelts; ++i)
390        {
391          svn_fs_fs__p2l_entry_t *entry
392            = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t);
393
394          /* check all sub-items for consist entries in the L2P index */
395          if (entry->type == SVN_FS_FS__ITEM_TYPE_UNUSED)
396            {
397              /* There is no L2P entry for unused rev file sections.
398               * And its P2L index data is hardly ever used.  But we
399               * should still check whether someone tempered with it. */
400              if (   entry->item.revision != SVN_INVALID_REVNUM
401                  && (   entry->item.revision < start
402                      || entry->item.revision >= start + count))
403                return svn_error_createf(SVN_ERR_FS_INDEX_INCONSISTENT,
404                                         NULL,
405                                         _("Empty P2L entry for PHYS %s "
406                                           "refers to revision %ld outside "
407                                           "the rev / pack file (%ld-%ld)"),
408                                         apr_off_t_toa(pool, entry->offset),
409                                         entry->item.revision,
410                                         start, start + count - 1);
411            }
412          else
413            {
414              apr_off_t l2p_offset;
415              SVN_ERR(svn_fs_fs__item_offset(&l2p_offset, fs, rev_file,
416                                             entry->item.revision, NULL,
417                                             entry->item.number, iterpool));
418
419              if (l2p_offset != entry->offset)
420                return svn_error_createf(SVN_ERR_FS_INDEX_INCONSISTENT,
421                                         NULL,
422                                         _("l2p index entry PHYS %s"
423                                           "does not match p2l index value "
424                                           "LOG r%ld:i%ld for PHYS %s"),
425                                         apr_off_t_toa(pool, l2p_offset),
426                                         entry->item.revision,
427                                         (long)entry->item.number,
428                                         apr_off_t_toa(pool, entry->offset));
429            }
430        }
431
432      if (cancel_func)
433        SVN_ERR(cancel_func(cancel_baton));
434    }
435
436  svn_pool_destroy(iterpool);
437
438  SVN_ERR(svn_fs_fs__close_revision_file(rev_file));
439
440  return SVN_NO_ERROR;
441}
442
443/* Items smaller than this can be read at once into a buffer and directly
444 * be checksummed.  Larger items require stream processing.
445 * Must be a multiple of 8. */
446#define STREAM_THRESHOLD 4096
447
448/* Verify that the next SIZE bytes read from FILE are NUL.
449 * SIZE must not exceed STREAM_THRESHOLD.  Use POOL for allocations.
450 */
451static svn_error_t *
452expect_buffer_nul(apr_file_t *file,
453                  apr_off_t size,
454                  apr_pool_t *pool)
455{
456  union
457  {
458    unsigned char buffer[STREAM_THRESHOLD];
459    apr_uint64_t chunks[STREAM_THRESHOLD / sizeof(apr_uint64_t)];
460  } data;
461
462  apr_size_t i;
463  SVN_ERR_ASSERT(size <= STREAM_THRESHOLD);
464
465  /* read the whole data block; error out on failure */
466  data.chunks[(size - 1)/ sizeof(apr_uint64_t)] = 0;
467  SVN_ERR(svn_io_file_read_full2(file, data.buffer, (apr_size_t)size, NULL,
468                                 NULL, pool));
469
470  /* chunky check */
471  for (i = 0; i < size / sizeof(apr_uint64_t); ++i)
472    if (data.chunks[i] != 0)
473      break;
474
475  /* byte-wise check upon mismatch or at the end of the block */
476  for (i *= sizeof(apr_uint64_t); i < size; ++i)
477    if (data.buffer[i] != 0)
478      {
479        const char *file_name;
480        apr_off_t offset;
481
482        SVN_ERR(svn_io_file_name_get(&file_name, file, pool));
483        SVN_ERR(svn_io_file_get_offset(&offset, file, pool));
484        offset -= size - i;
485
486        return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
487                                 _("Empty section in file %s contains "
488                                   "non-NUL data at offset %s"),
489                                 file_name, apr_off_t_toa(pool, offset));
490      }
491
492  return SVN_NO_ERROR;
493}
494
495/* Verify that the next SIZE bytes read from FILE are NUL.
496 * Use POOL for allocations.
497 */
498static svn_error_t *
499read_all_nul(apr_file_t *file,
500             apr_off_t size,
501             apr_pool_t *pool)
502{
503  for (; size >= STREAM_THRESHOLD; size -= STREAM_THRESHOLD)
504    SVN_ERR(expect_buffer_nul(file, STREAM_THRESHOLD, pool));
505
506  if (size)
507    SVN_ERR(expect_buffer_nul(file, size, pool));
508
509  return SVN_NO_ERROR;
510}
511
512/* Compare the ACTUAL checksum with the one expected by ENTRY.
513 * Return an error in case of mismatch.  Use the name of FILE
514 * in error message.  Allocate data in POOL.
515 */
516static svn_error_t *
517expected_checksum(apr_file_t *file,
518                  svn_fs_fs__p2l_entry_t *entry,
519                  apr_uint32_t actual,
520                  apr_pool_t *pool)
521{
522  if (actual != entry->fnv1_checksum)
523    {
524      const char *file_name;
525
526      SVN_ERR(svn_io_file_name_get(&file_name, file, pool));
527      return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
528                               _("Checksum mismatch in item at offset %s of "
529                                 "length %s bytes in file %s"),
530                               apr_off_t_toa(pool, entry->offset),
531                               apr_off_t_toa(pool, entry->size), file_name);
532    }
533
534  return SVN_NO_ERROR;
535}
536
537/* Verify that the FNV checksum over the next ENTRY->SIZE bytes read
538 * from FILE will match ENTRY's expected checksum.  SIZE must not
539 * exceed STREAM_THRESHOLD.  Use POOL for allocations.
540 */
541static svn_error_t *
542expected_buffered_checksum(apr_file_t *file,
543                           svn_fs_fs__p2l_entry_t *entry,
544                           apr_pool_t *pool)
545{
546  unsigned char buffer[STREAM_THRESHOLD];
547  SVN_ERR_ASSERT(entry->size <= STREAM_THRESHOLD);
548
549  SVN_ERR(svn_io_file_read_full2(file, buffer, (apr_size_t)entry->size,
550                                 NULL, NULL, pool));
551  SVN_ERR(expected_checksum(file, entry,
552                            svn__fnv1a_32x4(buffer, (apr_size_t)entry->size),
553                            pool));
554
555  return SVN_NO_ERROR;
556}
557
558/* Verify that the FNV checksum over the next ENTRY->SIZE bytes read from
559 * FILE will match ENTRY's expected checksum.  Use POOL for allocations.
560 */
561static svn_error_t *
562expected_streamed_checksum(apr_file_t *file,
563                           svn_fs_fs__p2l_entry_t *entry,
564                           apr_pool_t *pool)
565{
566  unsigned char buffer[STREAM_THRESHOLD];
567  svn_checksum_t *checksum;
568  svn_checksum_ctx_t *context
569    = svn_checksum_ctx_create(svn_checksum_fnv1a_32x4, pool);
570  apr_off_t size = entry->size;
571
572  while (size > 0)
573    {
574      apr_size_t to_read = size > sizeof(buffer)
575                         ? sizeof(buffer)
576                         : (apr_size_t)size;
577      SVN_ERR(svn_io_file_read_full2(file, buffer, to_read, NULL, NULL,
578                                     pool));
579      SVN_ERR(svn_checksum_update(context, buffer, to_read));
580      size -= to_read;
581    }
582
583  SVN_ERR(svn_checksum_final(&checksum, context, pool));
584  SVN_ERR(expected_checksum(file, entry,
585                            ntohl(*(const apr_uint32_t *)checksum->digest),
586                            pool));
587
588  return SVN_NO_ERROR;
589}
590
591/* Verify that for all phys-to-log index entries for revisions START to
592 * START + COUNT-1 in FS match the actual pack / rev file contents.
593 * If given, invoke CANCEL_FUNC with CANCEL_BATON at regular intervals.
594 * Use POOL for allocations.
595 *
596 * Please note that we can only check on pack / rev file granularity and
597 * must only be called for a single rev / pack file.
598 */
599static svn_error_t *
600compare_p2l_to_rev(svn_fs_t *fs,
601                   svn_revnum_t start,
602                   svn_revnum_t count,
603                   svn_cancel_func_t cancel_func,
604                   void *cancel_baton,
605                   apr_pool_t *pool)
606{
607  fs_fs_data_t *ffd = fs->fsap_data;
608  apr_pool_t *iterpool = svn_pool_create(pool);
609  apr_off_t max_offset;
610  apr_off_t offset = 0;
611  svn_fs_fs__revision_file_t *rev_file;
612
613  /* open the pack / rev file that is covered by the p2l index */
614  SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, start, pool,
615                                           iterpool));
616
617  /* check file size vs. range covered by index */
618  SVN_ERR(svn_fs_fs__auto_read_footer(rev_file));
619  SVN_ERR(svn_fs_fs__p2l_get_max_offset(&max_offset, fs, rev_file, start,
620                                        pool));
621
622  if (rev_file->l2p_offset != max_offset)
623    return svn_error_createf(SVN_ERR_FS_INDEX_INCONSISTENT, NULL,
624                             _("File size of %s for revision r%ld does "
625                               "not match p2l index size of %s"),
626                             apr_off_t_toa(pool, rev_file->l2p_offset), start,
627                             apr_off_t_toa(pool, max_offset));
628
629  SVN_ERR(svn_io_file_aligned_seek(rev_file->file, ffd->block_size, NULL, 0,
630                                   pool));
631
632  /* for all offsets in the file, get the P2L index entries and check
633     them against the L2P index */
634  for (offset = 0; offset < max_offset; )
635    {
636      apr_array_header_t *entries;
637      int i;
638
639      svn_pool_clear(iterpool);
640
641      /* get all entries for the current block */
642      SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, fs, rev_file, start,
643                                          offset, ffd->p2l_page_size,
644                                          iterpool, iterpool));
645
646      /* The above might have moved the file pointer.
647       * Ensure we actually start reading at OFFSET.  */
648      SVN_ERR(svn_io_file_aligned_seek(rev_file->file, ffd->block_size,
649                                       NULL, offset, iterpool));
650
651      /* process all entries (and later continue with the next block) */
652      for (i = 0; i < entries->nelts; ++i)
653        {
654          svn_fs_fs__p2l_entry_t *entry
655            = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t);
656
657          /* skip bits we previously checked */
658          if (i == 0 && entry->offset < offset)
659            continue;
660
661          /* skip zero-sized entries */
662          if (entry->size == 0)
663            continue;
664
665          /* p2l index must cover all rev / pack file offsets exactly once */
666          if (entry->offset != offset)
667            return svn_error_createf(SVN_ERR_FS_INDEX_INCONSISTENT,
668                                     NULL,
669                                     _("p2l index entry for revision r%ld"
670                                       " is non-contiguous between offsets "
671                                       " %s and %s"),
672                                     start,
673                                     apr_off_t_toa(pool, offset),
674                                     apr_off_t_toa(pool, entry->offset));
675
676          /* Check type <-> item dependencies. */
677
678          /* Entry types must be within the valid range. */
679          if (entry->type >= SVN_FS_FS__ITEM_TYPE_ANY_REP)
680            return svn_error_createf(SVN_ERR_FS_INDEX_CORRUPTION,
681                                     NULL,
682                                     _("p2l index entry for revision r%ld"
683                                       " at offset %s contains invalid item"
684                                       " type %u"),
685                                     start,
686                                     apr_off_t_toa(pool, offset),
687                                     (unsigned int)entry->type);
688
689          /* There can be only one changes entry and that has a fixed type
690           * and item number.  Its presence and parse-ability will be checked
691           * during later stages of the verification process. */
692          if (   (entry->type == SVN_FS_FS__ITEM_TYPE_CHANGES)
693              != (entry->item.number == SVN_FS_FS__ITEM_INDEX_CHANGES))
694            return svn_error_createf(SVN_ERR_FS_INDEX_CORRUPTION,
695                                     NULL,
696                                     _("p2l index entry for changes in"
697                                       " revision r%ld is item"
698                                       " %"APR_UINT64_T_FMT
699                                       " of type %u at offset %s"),
700                                     entry->item.revision,
701                                     entry->item.number,
702                                     (unsigned int)entry->type,
703                                     apr_off_t_toa(pool, offset));
704
705          /* Check contents. */
706          if (entry->type == SVN_FS_FS__ITEM_TYPE_UNUSED)
707            {
708              /* Empty sections must contain NUL bytes only.
709               * Beware of the filler at the end of the p2l index. */
710              if (entry->offset != max_offset)
711                SVN_ERR(read_all_nul(rev_file->file, entry->size, pool));
712            }
713          else
714            {
715              /* Generic contents check against checksum. */
716              if (entry->size < STREAM_THRESHOLD)
717                SVN_ERR(expected_buffered_checksum(rev_file->file, entry,
718                                                   pool));
719              else
720                SVN_ERR(expected_streamed_checksum(rev_file->file, entry,
721                                                   pool));
722            }
723
724          /* advance offset */
725          offset += entry->size;
726        }
727
728      if (cancel_func)
729        SVN_ERR(cancel_func(cancel_baton));
730    }
731
732  svn_pool_destroy(iterpool);
733
734  SVN_ERR(svn_fs_fs__close_revision_file(rev_file));
735
736  return SVN_NO_ERROR;
737}
738
739/* Verify that the revprops of the revisions START to END in FS can be
740 * accessed.  Invoke CANCEL_FUNC with CANCEL_BATON at regular intervals.
741 *
742 * The values of START and END have already been auto-selected and
743 * verified.
744 */
745static svn_error_t *
746verify_revprops(svn_fs_t *fs,
747                svn_revnum_t start,
748                svn_revnum_t end,
749                svn_cancel_func_t cancel_func,
750                void *cancel_baton,
751                apr_pool_t *pool)
752{
753  svn_revnum_t revision;
754  apr_pool_t *iterpool = svn_pool_create(pool);
755
756  /* Invalidate the revprop cache once.
757   * Use the cache inside the loop to speed up packed revprop access. */
758  svn_fs_fs__reset_revprop_cache(fs);
759
760  for (revision = start; revision < end; ++revision)
761    {
762      svn_string_t *date;
763      apr_time_t timetemp;
764
765      svn_pool_clear(iterpool);
766
767      /* Access the svn:date revprop.
768       * This implies parsing all revprops for that revision. */
769      SVN_ERR(svn_fs_fs__revision_prop(&date, fs, revision,
770                                       SVN_PROP_REVISION_DATE, FALSE,
771                                       iterpool, iterpool));
772
773      /* The time stamp is the only revprop that, if given, needs to
774       * have a valid content. */
775      if (date)
776        SVN_ERR(svn_time_from_cstring(&timetemp, date->data, iterpool));
777
778      if (cancel_func)
779        SVN_ERR(cancel_func(cancel_baton));
780    }
781
782  svn_pool_destroy(iterpool);
783
784  return SVN_NO_ERROR;
785}
786
787static svn_revnum_t
788pack_size(svn_fs_t *fs, svn_revnum_t rev)
789{
790  fs_fs_data_t *ffd = fs->fsap_data;
791
792  return rev < ffd->min_unpacked_rev ? ffd->max_files_per_dir : 1;
793}
794
795/* Verify that on-disk representation has not been tempered with (in a way
796 * that leaves the repository in a corrupted state).  This compares log-to-
797 * phys with phys-to-log indexes, verifies the low-level checksums and
798 * checks that all revprops are available.  The function signature is
799 * similar to svn_fs_fs__verify.
800 *
801 * The values of START and END have already been auto-selected and
802 * verified.  You may call this for format7 or higher repos.
803 */
804static svn_error_t *
805verify_f7_metadata_consistency(svn_fs_t *fs,
806                               svn_revnum_t start,
807                               svn_revnum_t end,
808                               svn_fs_progress_notify_func_t notify_func,
809                               void *notify_baton,
810                               svn_cancel_func_t cancel_func,
811                               void *cancel_baton,
812                               apr_pool_t *pool)
813{
814  fs_fs_data_t *ffd = fs->fsap_data;
815  svn_revnum_t revision, next_revision;
816  apr_pool_t *iterpool = svn_pool_create(pool);
817
818  for (revision = start; revision <= end; revision = next_revision)
819    {
820      svn_error_t *err = SVN_NO_ERROR;
821
822      svn_revnum_t count = pack_size(fs, revision);
823      svn_revnum_t pack_start = svn_fs_fs__packed_base_rev(fs, revision);
824      svn_revnum_t pack_end = pack_start + count;
825
826      svn_pool_clear(iterpool);
827
828      if (notify_func && (pack_start % ffd->max_files_per_dir == 0))
829        notify_func(pack_start, notify_baton, iterpool);
830
831      /* Check for external corruption to the indexes. */
832      err = verify_index_checksums(fs, pack_start, cancel_func,
833                                   cancel_baton, iterpool);
834
835      /* two-way index check */
836      if (!err)
837        err = compare_l2p_to_p2l_index(fs, pack_start, pack_end - pack_start,
838                                       cancel_func, cancel_baton, iterpool);
839      if (!err)
840        err = compare_p2l_to_l2p_index(fs, pack_start, pack_end - pack_start,
841                                       cancel_func, cancel_baton, iterpool);
842
843      /* verify in-index checksums and types vs. actual rev / pack files */
844      if (!err)
845        err = compare_p2l_to_rev(fs, pack_start, pack_end - pack_start,
846                                 cancel_func, cancel_baton, iterpool);
847
848      /* ensure that revprops are available and accessible */
849      if (!err)
850        err = verify_revprops(fs, pack_start, pack_end,
851                              cancel_func, cancel_baton, iterpool);
852
853      /* concurrent packing is one of the reasons why verification may fail.
854         Make sure, we operate on up-to-date information. */
855      if (err)
856        {
857          svn_error_t *err2
858            = svn_fs_fs__read_min_unpacked_rev(&ffd->min_unpacked_rev,
859                                               fs, pool);
860
861          /* Be careful to not leak ERR. */
862          if (err2)
863            return svn_error_trace(svn_error_compose_create(err, err2));
864        }
865
866      /* retry the whole shard if it got packed in the meantime */
867      if (err && count != pack_size(fs, revision))
868        {
869          svn_error_clear(err);
870
871          /* We could simply assign revision here but the code below is
872             more intuitive to maintainers. */
873          next_revision = svn_fs_fs__packed_base_rev(fs, revision);
874        }
875      else
876        {
877          SVN_ERR(err);
878          next_revision = pack_end;
879        }
880    }
881
882  svn_pool_destroy(iterpool);
883
884  return SVN_NO_ERROR;
885}
886
887svn_error_t *
888svn_fs_fs__verify(svn_fs_t *fs,
889                  svn_revnum_t start,
890                  svn_revnum_t end,
891                  svn_fs_progress_notify_func_t notify_func,
892                  void *notify_baton,
893                  svn_cancel_func_t cancel_func,
894                  void *cancel_baton,
895                  apr_pool_t *pool)
896{
897  fs_fs_data_t *ffd = fs->fsap_data;
898
899  /* Input validation. */
900  if (! SVN_IS_VALID_REVNUM(start))
901    start = 0;
902  if (! SVN_IS_VALID_REVNUM(end))
903    {
904      SVN_ERR(svn_fs_fs__youngest_rev(&end, fs, pool));
905    }
906
907  SVN_ERR(svn_fs_fs__ensure_revision_exists(start, fs, pool));
908  SVN_ERR(svn_fs_fs__ensure_revision_exists(end, fs, pool));
909
910  /* log/phys index consistency.  We need to check them first to make
911     sure we can access the rev / pack files in format7. */
912  if (svn_fs_fs__use_log_addressing(fs))
913    SVN_ERR(verify_f7_metadata_consistency(fs, start, end,
914                                           notify_func, notify_baton,
915                                           cancel_func, cancel_baton, pool));
916
917  /* rep cache consistency */
918  if (ffd->format >= SVN_FS_FS__MIN_REP_SHARING_FORMAT)
919    SVN_ERR(verify_rep_cache(fs, start, end, notify_func, notify_baton,
920                             cancel_func, cancel_baton, pool));
921
922  return SVN_NO_ERROR;
923}
924