verify.c revision 302408
1219820Sjeff/* verify.c --- verification of FSFS filesystems 2219820Sjeff * 3219820Sjeff * ==================================================================== 4219820Sjeff * Licensed to the Apache Software Foundation (ASF) under one 5219820Sjeff * or more contributor license agreements. See the NOTICE file 6219820Sjeff * distributed with this work for additional information 7219820Sjeff * regarding copyright ownership. The ASF licenses this file 8219820Sjeff * to you under the Apache License, Version 2.0 (the 9219820Sjeff * "License"); you may not use this file except in compliance 10219820Sjeff * with the License. You may obtain a copy of the License at 11219820Sjeff * 12219820Sjeff * http://www.apache.org/licenses/LICENSE-2.0 13219820Sjeff * 14219820Sjeff * Unless required by applicable law or agreed to in writing, 15219820Sjeff * software distributed under the License is distributed on an 16219820Sjeff * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17219820Sjeff * KIND, either express or implied. See the License for the 18219820Sjeff * specific language governing permissions and limitations 19219820Sjeff * under the License. 20219820Sjeff * ==================================================================== 21219820Sjeff */ 22219820Sjeff 23219820Sjeff#include "svn_sorts.h" 24219820Sjeff#include "svn_checksum.h" 25219820Sjeff#include "svn_time.h" 26219820Sjeff#include "private/svn_subr_private.h" 27219820Sjeff 28219820Sjeff#include "verify.h" 29219820Sjeff#include "fs_fs.h" 30219820Sjeff 31219820Sjeff#include "cached_data.h" 32219820Sjeff#include "rep-cache.h" 33219820Sjeff#include "util.h" 34219820Sjeff#include "index.h" 35219820Sjeff 36219820Sjeff#include "../libsvn_fs/fs-loader.h" 37219820Sjeff 38219820Sjeff#include "svn_private_config.h" 39219820Sjeff 40219820Sjeff 41219820Sjeff/** Verifying. **/ 42219820Sjeff 43219820Sjeff/* Baton type expected by verify_walker(). The purpose is to reuse open 44219820Sjeff * rev / pack file handles between calls. Its contents need to be cleaned 45219820Sjeff * periodically to limit resource usage. 46219820Sjeff */ 47219820Sjefftypedef struct verify_walker_baton_t 48219820Sjeff{ 49219820Sjeff /* number of calls to verify_walker() since the last clean */ 50219820Sjeff int iteration_count; 51219820Sjeff 52219820Sjeff /* number of files opened since the last clean */ 53219820Sjeff int file_count; 54219820Sjeff 55219820Sjeff /* progress notification callback to invoke periodically (may be NULL) */ 56219820Sjeff svn_fs_progress_notify_func_t notify_func; 57219820Sjeff 58219820Sjeff /* baton to use with NOTIFY_FUNC */ 59219820Sjeff void *notify_baton; 60219820Sjeff 61219820Sjeff /* remember the last revision for which we called notify_func */ 62219820Sjeff svn_revnum_t last_notified_revision; 63219820Sjeff 64219820Sjeff /* cached hint for successive calls to svn_fs_fs__check_rep() */ 65219820Sjeff void *hint; 66219820Sjeff 67219820Sjeff /* pool to use for the file handles etc. */ 68219820Sjeff apr_pool_t *pool; 69219820Sjeff} verify_walker_baton_t; 70219820Sjeff 71219820Sjeff/* Used by svn_fs_fs__verify(). 72219820Sjeff Implements svn_fs_fs__walk_rep_reference().walker. */ 73219820Sjeffstatic svn_error_t * 74219820Sjeffverify_walker(representation_t *rep, 75219820Sjeff void *baton, 76219820Sjeff svn_fs_t *fs, 77219820Sjeff apr_pool_t *scratch_pool) 78219820Sjeff{ 79219820Sjeff verify_walker_baton_t *walker_baton = baton; 80219820Sjeff void *previous_hint; 81219820Sjeff 82219820Sjeff /* notify and free resources periodically */ 83219820Sjeff if ( walker_baton->iteration_count > 1000 84219820Sjeff || walker_baton->file_count > 16) 85219820Sjeff { 86219820Sjeff if ( walker_baton->notify_func 87219820Sjeff && rep->revision != walker_baton->last_notified_revision) 88219820Sjeff { 89219820Sjeff walker_baton->notify_func(rep->revision, 90219820Sjeff walker_baton->notify_baton, 91219820Sjeff scratch_pool); 92219820Sjeff walker_baton->last_notified_revision = rep->revision; 93219820Sjeff } 94219820Sjeff 95219820Sjeff svn_pool_clear(walker_baton->pool); 96219820Sjeff 97219820Sjeff walker_baton->iteration_count = 0; 98219820Sjeff walker_baton->file_count = 0; 99219820Sjeff walker_baton->hint = NULL; 100219820Sjeff } 101219820Sjeff 102219820Sjeff /* access the repo data */ 103219820Sjeff previous_hint = walker_baton->hint; 104219820Sjeff SVN_ERR(svn_fs_fs__check_rep(rep, fs, &walker_baton->hint, 105219820Sjeff walker_baton->pool)); 106219820Sjeff 107219820Sjeff /* update resource usage counters */ 108219820Sjeff walker_baton->iteration_count++; 109219820Sjeff if (previous_hint != walker_baton->hint) 110219820Sjeff walker_baton->file_count++; 111219820Sjeff 112219820Sjeff return SVN_NO_ERROR; 113219820Sjeff} 114219820Sjeff 115219820Sjeff/* Verify the rep cache DB's consistency with our rev / pack data. 116219820Sjeff * The function signature is similar to svn_fs_fs__verify. 117219820Sjeff * The values of START and END have already been auto-selected and 118219820Sjeff * verified. 119219820Sjeff */ 120219820Sjeffstatic svn_error_t * 121219820Sjeffverify_rep_cache(svn_fs_t *fs, 122219820Sjeff svn_revnum_t start, 123219820Sjeff svn_revnum_t end, 124219820Sjeff svn_fs_progress_notify_func_t notify_func, 125219820Sjeff void *notify_baton, 126219820Sjeff svn_cancel_func_t cancel_func, 127219820Sjeff void *cancel_baton, 128219820Sjeff apr_pool_t *pool) 129219820Sjeff{ 130219820Sjeff svn_boolean_t exists; 131219820Sjeff 132219820Sjeff /* rep-cache verification. */ 133219820Sjeff SVN_ERR(svn_fs_fs__exists_rep_cache(&exists, fs, pool)); 134219820Sjeff if (exists) 135219820Sjeff { 136219820Sjeff /* provide a baton to allow the reuse of open file handles between 137219820Sjeff iterations (saves 2/3 of OS level file operations). */ 138219820Sjeff verify_walker_baton_t *baton = apr_pcalloc(pool, sizeof(*baton)); 139219820Sjeff baton->pool = svn_pool_create(pool); 140219820Sjeff baton->last_notified_revision = SVN_INVALID_REVNUM; 141219820Sjeff baton->notify_func = notify_func; 142219820Sjeff baton->notify_baton = notify_baton; 143219820Sjeff 144219820Sjeff /* tell the user that we are now ready to do *something* */ 145219820Sjeff if (notify_func) 146219820Sjeff notify_func(SVN_INVALID_REVNUM, notify_baton, baton->pool); 147219820Sjeff 148219820Sjeff /* Do not attempt to walk the rep-cache database if its file does 149219820Sjeff not exist, since doing so would create it --- which may confuse 150219820Sjeff the administrator. Don't take any lock. */ 151219820Sjeff SVN_ERR(svn_fs_fs__walk_rep_reference(fs, start, end, 152219820Sjeff verify_walker, baton, 153219820Sjeff cancel_func, cancel_baton, 154219820Sjeff pool)); 155219820Sjeff 156219820Sjeff /* walker resource cleanup */ 157219820Sjeff svn_pool_destroy(baton->pool); 158219820Sjeff } 159219820Sjeff 160219820Sjeff return SVN_NO_ERROR; 161219820Sjeff} 162219820Sjeff 163219820Sjeff/* Verify that the MD5 checksum of the data between offsets START and END 164219820Sjeff * in FILE matches the EXPECTED checksum. If there is a mismatch use the 165219820Sjeff * indedx NAME in the error message. Supports cancellation with CANCEL_FUNC 166219820Sjeff * and CANCEL_BATON. SCRATCH_POOL is for temporary allocations. */ 167219820Sjeffstatic svn_error_t * 168219820Sjeffverify_index_checksum(apr_file_t *file, 169219820Sjeff const char *name, 170219820Sjeff apr_off_t start, 171219820Sjeff apr_off_t end, 172219820Sjeff svn_checksum_t *expected, 173219820Sjeff svn_cancel_func_t cancel_func, 174219820Sjeff void *cancel_baton, 175219820Sjeff apr_pool_t *scratch_pool) 176219820Sjeff{ 177219820Sjeff unsigned char buffer[SVN__STREAM_CHUNK_SIZE]; 178219820Sjeff apr_off_t size = end - start; 179219820Sjeff svn_checksum_t *actual; 180219820Sjeff svn_checksum_ctx_t *context 181219820Sjeff = svn_checksum_ctx_create(svn_checksum_md5, scratch_pool); 182219820Sjeff 183219820Sjeff /* Calculate the index checksum. */ 184219820Sjeff SVN_ERR(svn_io_file_seek(file, APR_SET, &start, scratch_pool)); 185219820Sjeff while (size > 0) 186219820Sjeff { 187219820Sjeff apr_size_t to_read = size > sizeof(buffer) 188219820Sjeff ? sizeof(buffer) 189219820Sjeff : (apr_size_t)size; 190219820Sjeff SVN_ERR(svn_io_file_read_full2(file, buffer, to_read, NULL, NULL, 191219820Sjeff scratch_pool)); 192219820Sjeff SVN_ERR(svn_checksum_update(context, buffer, to_read)); 193219820Sjeff size -= to_read; 194219820Sjeff 195219820Sjeff if (cancel_func) 196219820Sjeff SVN_ERR(cancel_func(cancel_baton)); 197219820Sjeff } 198219820Sjeff 199219820Sjeff SVN_ERR(svn_checksum_final(&actual, context, scratch_pool)); 200219820Sjeff 201219820Sjeff /* Verify that it matches the expected checksum. */ 202219820Sjeff if (!svn_checksum_match(expected, actual)) 203219820Sjeff { 204219820Sjeff const char *file_name; 205219820Sjeff 206219820Sjeff SVN_ERR(svn_io_file_name_get(&file_name, file, scratch_pool)); 207219820Sjeff SVN_ERR(svn_checksum_mismatch_err(expected, actual, scratch_pool, 208219820Sjeff _("%s checksum mismatch in file %s"), 209219820Sjeff name, file_name)); 210219820Sjeff } 211219820Sjeff 212219820Sjeff return SVN_NO_ERROR; 213219820Sjeff} 214219820Sjeff 215219820Sjeff/* Verify the MD5 checksums of the index data in the rev / pack file 216219820Sjeff * containing revision START in FS. If given, invoke CANCEL_FUNC with 217219820Sjeff * CANCEL_BATON at regular intervals. Use SCRATCH_POOL for temporary 218219820Sjeff * allocations. 219219820Sjeff */ 220219820Sjeffstatic svn_error_t * 221219820Sjeffverify_index_checksums(svn_fs_t *fs, 222219820Sjeff svn_revnum_t start, 223219820Sjeff svn_cancel_func_t cancel_func, 224219820Sjeff void *cancel_baton, 225219820Sjeff apr_pool_t *scratch_pool) 226219820Sjeff{ 227219820Sjeff svn_fs_fs__revision_file_t *rev_file; 228219820Sjeff 229219820Sjeff /* Open the rev / pack file and read the footer */ 230219820Sjeff SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, start, 231219820Sjeff scratch_pool, scratch_pool)); 232219820Sjeff SVN_ERR(svn_fs_fs__auto_read_footer(rev_file)); 233219820Sjeff 234219820Sjeff /* Verify the index contents against the checksum from the footer. */ 235219820Sjeff SVN_ERR(verify_index_checksum(rev_file->file, "L2P index", 236219820Sjeff rev_file->l2p_offset, rev_file->p2l_offset, 237219820Sjeff rev_file->l2p_checksum, 238219820Sjeff cancel_func, cancel_baton, scratch_pool)); 239219820Sjeff SVN_ERR(verify_index_checksum(rev_file->file, "P2L index", 240219820Sjeff rev_file->p2l_offset, rev_file->footer_offset, 241219820Sjeff rev_file->p2l_checksum, 242219820Sjeff cancel_func, cancel_baton, scratch_pool)); 243219820Sjeff 244219820Sjeff /* Done. */ 245219820Sjeff SVN_ERR(svn_fs_fs__close_revision_file(rev_file)); 246219820Sjeff 247219820Sjeff return SVN_NO_ERROR; 248219820Sjeff} 249219820Sjeff 250219820Sjeff/* Verify that for all log-to-phys index entries for revisions START to 251219820Sjeff * START + COUNT-1 in FS there is a consistent entry in the phys-to-log 252219820Sjeff * index. If given, invoke CANCEL_FUNC with CANCEL_BATON at regular 253219820Sjeff * intervals. Use POOL for allocations. 254219820Sjeff */ 255static svn_error_t * 256compare_l2p_to_p2l_index(svn_fs_t *fs, 257 svn_revnum_t start, 258 svn_revnum_t count, 259 svn_cancel_func_t cancel_func, 260 void *cancel_baton, 261 apr_pool_t *pool) 262{ 263 svn_revnum_t i; 264 apr_pool_t *iterpool = svn_pool_create(pool); 265 apr_array_header_t *max_ids; 266 267 /* common file access structure */ 268 svn_fs_fs__revision_file_t *rev_file; 269 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, start, pool, 270 iterpool)); 271 272 /* determine the range of items to check for each revision */ 273 SVN_ERR(svn_fs_fs__l2p_get_max_ids(&max_ids, fs, start, count, pool, 274 iterpool)); 275 276 /* check all items in all revisions if the given range */ 277 for (i = 0; i < max_ids->nelts; ++i) 278 { 279 apr_uint64_t k; 280 apr_uint64_t max_id = APR_ARRAY_IDX(max_ids, i, apr_uint64_t); 281 svn_revnum_t revision = start + i; 282 283 for (k = 0; k < max_id; ++k) 284 { 285 apr_off_t offset; 286 svn_fs_fs__p2l_entry_t *p2l_entry; 287 svn_pool_clear(iterpool); 288 289 /* get L2P entry. Ignore unused entries. */ 290 SVN_ERR(svn_fs_fs__item_offset(&offset, fs, rev_file, revision, 291 NULL, k, iterpool)); 292 if (offset == -1) 293 continue; 294 295 /* find the corresponding P2L entry */ 296 SVN_ERR(svn_fs_fs__p2l_entry_lookup(&p2l_entry, fs, rev_file, 297 revision, offset, iterpool, 298 iterpool)); 299 300 if (p2l_entry == NULL) 301 return svn_error_createf(SVN_ERR_FS_INDEX_INCONSISTENT, 302 NULL, 303 _("p2l index entry not found for " 304 "PHYS %s returned by " 305 "l2p index for LOG r%ld:i%ld"), 306 apr_off_t_toa(pool, offset), 307 revision, (long)k); 308 309 if ( p2l_entry->item.number != k 310 || p2l_entry->item.revision != revision) 311 return svn_error_createf(SVN_ERR_FS_INDEX_INCONSISTENT, 312 NULL, 313 _("p2l index info LOG r%ld:i%ld" 314 " does not match " 315 "l2p index for LOG r%ld:i%ld"), 316 p2l_entry->item.revision, 317 (long)p2l_entry->item.number, 318 revision, (long)k); 319 } 320 321 if (cancel_func) 322 SVN_ERR(cancel_func(cancel_baton)); 323 } 324 325 svn_pool_destroy(iterpool); 326 327 SVN_ERR(svn_fs_fs__close_revision_file(rev_file)); 328 329 return SVN_NO_ERROR; 330} 331 332/* Verify that for all phys-to-log index entries for revisions START to 333 * START + COUNT-1 in FS there is a consistent entry in the log-to-phys 334 * index. If given, invoke CANCEL_FUNC with CANCEL_BATON at regular 335 * intervals. Use POOL for allocations. 336 * 337 * Please note that we can only check on pack / rev file granularity and 338 * must only be called for a single rev / pack file. 339 */ 340static svn_error_t * 341compare_p2l_to_l2p_index(svn_fs_t *fs, 342 svn_revnum_t start, 343 svn_revnum_t count, 344 svn_cancel_func_t cancel_func, 345 void *cancel_baton, 346 apr_pool_t *pool) 347{ 348 fs_fs_data_t *ffd = fs->fsap_data; 349 apr_pool_t *iterpool = svn_pool_create(pool); 350 apr_off_t max_offset; 351 apr_off_t offset = 0; 352 353 /* common file access structure */ 354 svn_fs_fs__revision_file_t *rev_file; 355 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, start, pool, 356 iterpool)); 357 358 /* get the size of the rev / pack file as covered by the P2L index */ 359 SVN_ERR(svn_fs_fs__p2l_get_max_offset(&max_offset, fs, rev_file, start, 360 pool)); 361 362 /* for all offsets in the file, get the P2L index entries and check 363 them against the L2P index */ 364 for (offset = 0; offset < max_offset; ) 365 { 366 apr_array_header_t *entries; 367 svn_fs_fs__p2l_entry_t *last_entry; 368 int i; 369 370 svn_pool_clear(iterpool); 371 372 /* get all entries for the current block */ 373 SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, fs, rev_file, start, 374 offset, ffd->p2l_page_size, 375 iterpool, iterpool)); 376 if (entries->nelts == 0) 377 return svn_error_createf(SVN_ERR_FS_INDEX_CORRUPTION, 378 NULL, 379 _("p2l does not cover offset %s" 380 " for revision %ld"), 381 apr_off_t_toa(pool, offset), start); 382 383 /* process all entries (and later continue with the next block) */ 384 last_entry 385 = &APR_ARRAY_IDX(entries, entries->nelts-1, svn_fs_fs__p2l_entry_t); 386 offset = last_entry->offset + last_entry->size; 387 388 for (i = 0; i < entries->nelts; ++i) 389 { 390 svn_fs_fs__p2l_entry_t *entry 391 = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t); 392 393 /* check all sub-items for consist entries in the L2P index */ 394 if (entry->type == SVN_FS_FS__ITEM_TYPE_UNUSED) 395 { 396 /* There is no L2P entry for unused rev file sections. 397 * And its P2L index data is hardly ever used. But we 398 * should still check whether someone tempered with it. */ 399 if ( entry->item.revision != SVN_INVALID_REVNUM 400 && ( entry->item.revision < start 401 || entry->item.revision >= start + count)) 402 return svn_error_createf(SVN_ERR_FS_INDEX_INCONSISTENT, 403 NULL, 404 _("Empty P2L entry for PHYS %s " 405 "refers to revision %ld outside " 406 "the rev / pack file (%ld-%ld)"), 407 apr_off_t_toa(pool, entry->offset), 408 entry->item.revision, 409 start, start + count - 1); 410 } 411 else 412 { 413 apr_off_t l2p_offset; 414 SVN_ERR(svn_fs_fs__item_offset(&l2p_offset, fs, rev_file, 415 entry->item.revision, NULL, 416 entry->item.number, iterpool)); 417 418 if (l2p_offset != entry->offset) 419 return svn_error_createf(SVN_ERR_FS_INDEX_INCONSISTENT, 420 NULL, 421 _("l2p index entry PHYS %s" 422 "does not match p2l index value " 423 "LOG r%ld:i%ld for PHYS %s"), 424 apr_off_t_toa(pool, l2p_offset), 425 entry->item.revision, 426 (long)entry->item.number, 427 apr_off_t_toa(pool, entry->offset)); 428 } 429 } 430 431 if (cancel_func) 432 SVN_ERR(cancel_func(cancel_baton)); 433 } 434 435 svn_pool_destroy(iterpool); 436 437 SVN_ERR(svn_fs_fs__close_revision_file(rev_file)); 438 439 return SVN_NO_ERROR; 440} 441 442/* Items smaller than this can be read at once into a buffer and directly 443 * be checksummed. Larger items require stream processing. 444 * Must be a multiple of 8. */ 445#define STREAM_THRESHOLD 4096 446 447/* Verify that the next SIZE bytes read from FILE are NUL. 448 * SIZE must not exceed STREAM_THRESHOLD. Use POOL for allocations. 449 */ 450static svn_error_t * 451expect_buffer_nul(apr_file_t *file, 452 apr_off_t size, 453 apr_pool_t *pool) 454{ 455 union 456 { 457 unsigned char buffer[STREAM_THRESHOLD]; 458 apr_uint64_t chunks[STREAM_THRESHOLD / sizeof(apr_uint64_t)]; 459 } data; 460 461 apr_size_t i; 462 SVN_ERR_ASSERT(size <= STREAM_THRESHOLD); 463 464 /* read the whole data block; error out on failure */ 465 data.chunks[(size - 1)/ sizeof(apr_uint64_t)] = 0; 466 SVN_ERR(svn_io_file_read_full2(file, data.buffer, size, NULL, NULL, pool)); 467 468 /* chunky check */ 469 for (i = 0; i < size / sizeof(apr_uint64_t); ++i) 470 if (data.chunks[i] != 0) 471 break; 472 473 /* byte-wise check upon mismatch or at the end of the block */ 474 for (i *= sizeof(apr_uint64_t); i < size; ++i) 475 if (data.buffer[i] != 0) 476 { 477 const char *file_name; 478 apr_off_t offset; 479 480 SVN_ERR(svn_io_file_name_get(&file_name, file, pool)); 481 SVN_ERR(svn_fs_fs__get_file_offset(&offset, file, pool)); 482 offset -= size - i; 483 484 return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, 485 _("Empty section in file %s contains " 486 "non-NUL data at offset %s"), 487 file_name, apr_off_t_toa(pool, offset)); 488 } 489 490 return SVN_NO_ERROR; 491} 492 493/* Verify that the next SIZE bytes read from FILE are NUL. 494 * Use POOL for allocations. 495 */ 496static svn_error_t * 497read_all_nul(apr_file_t *file, 498 apr_off_t size, 499 apr_pool_t *pool) 500{ 501 for (; size >= STREAM_THRESHOLD; size -= STREAM_THRESHOLD) 502 SVN_ERR(expect_buffer_nul(file, STREAM_THRESHOLD, pool)); 503 504 if (size) 505 SVN_ERR(expect_buffer_nul(file, size, pool)); 506 507 return SVN_NO_ERROR; 508} 509 510/* Compare the ACTUAL checksum with the one expected by ENTRY. 511 * Return an error in case of mismatch. Use the name of FILE 512 * in error message. Allocate data in POOL. 513 */ 514static svn_error_t * 515expected_checksum(apr_file_t *file, 516 svn_fs_fs__p2l_entry_t *entry, 517 apr_uint32_t actual, 518 apr_pool_t *pool) 519{ 520 if (actual != entry->fnv1_checksum) 521 { 522 const char *file_name; 523 524 SVN_ERR(svn_io_file_name_get(&file_name, file, pool)); 525 return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, 526 _("Checksum mismatch in item at offset %s of " 527 "length %s bytes in file %s"), 528 apr_off_t_toa(pool, entry->offset), 529 apr_off_t_toa(pool, entry->size), file_name); 530 } 531 532 return SVN_NO_ERROR; 533} 534 535/* Verify that the FNV checksum over the next ENTRY->SIZE bytes read 536 * from FILE will match ENTRY's expected checksum. SIZE must not 537 * exceed STREAM_THRESHOLD. Use POOL for allocations. 538 */ 539static svn_error_t * 540expected_buffered_checksum(apr_file_t *file, 541 svn_fs_fs__p2l_entry_t *entry, 542 apr_pool_t *pool) 543{ 544 unsigned char buffer[STREAM_THRESHOLD]; 545 SVN_ERR_ASSERT(entry->size <= STREAM_THRESHOLD); 546 547 SVN_ERR(svn_io_file_read_full2(file, buffer, (apr_size_t)entry->size, 548 NULL, NULL, pool)); 549 SVN_ERR(expected_checksum(file, entry, 550 svn__fnv1a_32x4(buffer, (apr_size_t)entry->size), 551 pool)); 552 553 return SVN_NO_ERROR; 554} 555 556/* Verify that the FNV checksum over the next ENTRY->SIZE bytes read from 557 * FILE will match ENTRY's expected checksum. Use POOL for allocations. 558 */ 559static svn_error_t * 560expected_streamed_checksum(apr_file_t *file, 561 svn_fs_fs__p2l_entry_t *entry, 562 apr_pool_t *pool) 563{ 564 unsigned char buffer[STREAM_THRESHOLD]; 565 svn_checksum_t *checksum; 566 svn_checksum_ctx_t *context 567 = svn_checksum_ctx_create(svn_checksum_fnv1a_32x4, pool); 568 apr_off_t size = entry->size; 569 570 while (size > 0) 571 { 572 apr_size_t to_read = size > sizeof(buffer) 573 ? sizeof(buffer) 574 : (apr_size_t)size; 575 SVN_ERR(svn_io_file_read_full2(file, buffer, to_read, NULL, NULL, 576 pool)); 577 SVN_ERR(svn_checksum_update(context, buffer, to_read)); 578 size -= to_read; 579 } 580 581 SVN_ERR(svn_checksum_final(&checksum, context, pool)); 582 SVN_ERR(expected_checksum(file, entry, 583 ntohl(*(const apr_uint32_t *)checksum->digest), 584 pool)); 585 586 return SVN_NO_ERROR; 587} 588 589/* Verify that for all phys-to-log index entries for revisions START to 590 * START + COUNT-1 in FS match the actual pack / rev file contents. 591 * If given, invoke CANCEL_FUNC with CANCEL_BATON at regular intervals. 592 * Use POOL for allocations. 593 * 594 * Please note that we can only check on pack / rev file granularity and 595 * must only be called for a single rev / pack file. 596 */ 597static svn_error_t * 598compare_p2l_to_rev(svn_fs_t *fs, 599 svn_revnum_t start, 600 svn_revnum_t count, 601 svn_cancel_func_t cancel_func, 602 void *cancel_baton, 603 apr_pool_t *pool) 604{ 605 fs_fs_data_t *ffd = fs->fsap_data; 606 apr_pool_t *iterpool = svn_pool_create(pool); 607 apr_off_t max_offset; 608 apr_off_t offset = 0; 609 svn_fs_fs__revision_file_t *rev_file; 610 611 /* open the pack / rev file that is covered by the p2l index */ 612 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, start, pool, 613 iterpool)); 614 615 /* check file size vs. range covered by index */ 616 SVN_ERR(svn_fs_fs__auto_read_footer(rev_file)); 617 SVN_ERR(svn_fs_fs__p2l_get_max_offset(&max_offset, fs, rev_file, start, 618 pool)); 619 620 if (rev_file->l2p_offset != max_offset) 621 return svn_error_createf(SVN_ERR_FS_INDEX_INCONSISTENT, NULL, 622 _("File size of %s for revision r%ld does " 623 "not match p2l index size of %s"), 624 apr_off_t_toa(pool, rev_file->l2p_offset), start, 625 apr_off_t_toa(pool, max_offset)); 626 627 SVN_ERR(svn_io_file_aligned_seek(rev_file->file, ffd->block_size, NULL, 0, 628 pool)); 629 630 /* for all offsets in the file, get the P2L index entries and check 631 them against the L2P index */ 632 for (offset = 0; offset < max_offset; ) 633 { 634 apr_array_header_t *entries; 635 int i; 636 637 svn_pool_clear(iterpool); 638 639 /* get all entries for the current block */ 640 SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, fs, rev_file, start, 641 offset, ffd->p2l_page_size, 642 iterpool, iterpool)); 643 644 /* The above might have moved the file pointer. 645 * Ensure we actually start reading at OFFSET. */ 646 SVN_ERR(svn_io_file_aligned_seek(rev_file->file, ffd->block_size, 647 NULL, offset, iterpool)); 648 649 /* process all entries (and later continue with the next block) */ 650 for (i = 0; i < entries->nelts; ++i) 651 { 652 svn_fs_fs__p2l_entry_t *entry 653 = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t); 654 655 /* skip bits we previously checked */ 656 if (i == 0 && entry->offset < offset) 657 continue; 658 659 /* skip zero-sized entries */ 660 if (entry->size == 0) 661 continue; 662 663 /* p2l index must cover all rev / pack file offsets exactly once */ 664 if (entry->offset != offset) 665 return svn_error_createf(SVN_ERR_FS_INDEX_INCONSISTENT, 666 NULL, 667 _("p2l index entry for revision r%ld" 668 " is non-contiguous between offsets " 669 " %s and %s"), 670 start, 671 apr_off_t_toa(pool, offset), 672 apr_off_t_toa(pool, entry->offset)); 673 674 /* empty sections must contain NUL bytes only */ 675 if (entry->type == SVN_FS_FS__ITEM_TYPE_UNUSED) 676 { 677 /* skip filler entry at the end of the p2l index */ 678 if (entry->offset != max_offset) 679 SVN_ERR(read_all_nul(rev_file->file, entry->size, pool)); 680 } 681 else 682 { 683 if (entry->size < STREAM_THRESHOLD) 684 SVN_ERR(expected_buffered_checksum(rev_file->file, entry, 685 pool)); 686 else 687 SVN_ERR(expected_streamed_checksum(rev_file->file, entry, 688 pool)); 689 } 690 691 /* advance offset */ 692 offset += entry->size; 693 } 694 695 if (cancel_func) 696 SVN_ERR(cancel_func(cancel_baton)); 697 } 698 699 svn_pool_destroy(iterpool); 700 701 SVN_ERR(svn_fs_fs__close_revision_file(rev_file)); 702 703 return SVN_NO_ERROR; 704} 705 706/* Verify that the revprops of the revisions START to END in FS can be 707 * accessed. Invoke CANCEL_FUNC with CANCEL_BATON at regular intervals. 708 * 709 * The values of START and END have already been auto-selected and 710 * verified. 711 */ 712static svn_error_t * 713verify_revprops(svn_fs_t *fs, 714 svn_revnum_t start, 715 svn_revnum_t end, 716 svn_cancel_func_t cancel_func, 717 void *cancel_baton, 718 apr_pool_t *pool) 719{ 720 svn_revnum_t revision; 721 apr_pool_t *iterpool = svn_pool_create(pool); 722 723 for (revision = start; revision < end; ++revision) 724 { 725 svn_string_t *date; 726 apr_time_t timetemp; 727 728 svn_pool_clear(iterpool); 729 730 /* Access the svn:date revprop. 731 * This implies parsing all revprops for that revision. */ 732 SVN_ERR(svn_fs_fs__revision_prop(&date, fs, revision, 733 SVN_PROP_REVISION_DATE, iterpool)); 734 735 /* The time stamp is the only revprop that, if given, needs to 736 * have a valid content. */ 737 if (date) 738 SVN_ERR(svn_time_from_cstring(&timetemp, date->data, iterpool)); 739 740 if (cancel_func) 741 SVN_ERR(cancel_func(cancel_baton)); 742 } 743 744 svn_pool_destroy(iterpool); 745 746 return SVN_NO_ERROR; 747} 748 749static svn_revnum_t 750pack_size(svn_fs_t *fs, svn_revnum_t rev) 751{ 752 fs_fs_data_t *ffd = fs->fsap_data; 753 754 return rev < ffd->min_unpacked_rev ? ffd->max_files_per_dir : 1; 755} 756 757/* Verify that on-disk representation has not been tempered with (in a way 758 * that leaves the repository in a corrupted state). This compares log-to- 759 * phys with phys-to-log indexes, verifies the low-level checksums and 760 * checks that all revprops are available. The function signature is 761 * similar to svn_fs_fs__verify. 762 * 763 * The values of START and END have already been auto-selected and 764 * verified. You may call this for format7 or higher repos. 765 */ 766static svn_error_t * 767verify_f7_metadata_consistency(svn_fs_t *fs, 768 svn_revnum_t start, 769 svn_revnum_t end, 770 svn_fs_progress_notify_func_t notify_func, 771 void *notify_baton, 772 svn_cancel_func_t cancel_func, 773 void *cancel_baton, 774 apr_pool_t *pool) 775{ 776 fs_fs_data_t *ffd = fs->fsap_data; 777 svn_revnum_t revision, next_revision; 778 apr_pool_t *iterpool = svn_pool_create(pool); 779 780 for (revision = start; revision <= end; revision = next_revision) 781 { 782 svn_error_t *err = SVN_NO_ERROR; 783 784 svn_revnum_t count = pack_size(fs, revision); 785 svn_revnum_t pack_start = svn_fs_fs__packed_base_rev(fs, revision); 786 svn_revnum_t pack_end = pack_start + count; 787 788 svn_pool_clear(iterpool); 789 790 if (notify_func && (pack_start % ffd->max_files_per_dir == 0)) 791 notify_func(pack_start, notify_baton, iterpool); 792 793 /* Check for external corruption to the indexes. */ 794 err = verify_index_checksums(fs, pack_start, cancel_func, 795 cancel_baton, iterpool); 796 797 /* two-way index check */ 798 if (!err) 799 err = compare_l2p_to_p2l_index(fs, pack_start, pack_end - pack_start, 800 cancel_func, cancel_baton, iterpool); 801 if (!err) 802 err = compare_p2l_to_l2p_index(fs, pack_start, pack_end - pack_start, 803 cancel_func, cancel_baton, iterpool); 804 805 /* verify in-index checksums and types vs. actual rev / pack files */ 806 if (!err) 807 err = compare_p2l_to_rev(fs, pack_start, pack_end - pack_start, 808 cancel_func, cancel_baton, iterpool); 809 810 /* ensure that revprops are available and accessible */ 811 if (!err) 812 err = verify_revprops(fs, pack_start, pack_end, 813 cancel_func, cancel_baton, iterpool); 814 815 /* concurrent packing is one of the reasons why verification may fail. 816 Make sure, we operate on up-to-date information. */ 817 if (err) 818 { 819 svn_error_t *err2 820 = svn_fs_fs__read_min_unpacked_rev(&ffd->min_unpacked_rev, 821 fs, pool); 822 823 /* Be careful to not leak ERR. */ 824 if (err2) 825 return svn_error_trace(svn_error_compose_create(err, err2)); 826 } 827 828 /* retry the whole shard if it got packed in the meantime */ 829 if (err && count != pack_size(fs, revision)) 830 { 831 svn_error_clear(err); 832 833 /* We could simply assign revision here but the code below is 834 more intuitive to maintainers. */ 835 next_revision = svn_fs_fs__packed_base_rev(fs, revision); 836 } 837 else 838 { 839 SVN_ERR(err); 840 next_revision = pack_end; 841 } 842 } 843 844 svn_pool_destroy(iterpool); 845 846 return SVN_NO_ERROR; 847} 848 849svn_error_t * 850svn_fs_fs__verify(svn_fs_t *fs, 851 svn_revnum_t start, 852 svn_revnum_t end, 853 svn_fs_progress_notify_func_t notify_func, 854 void *notify_baton, 855 svn_cancel_func_t cancel_func, 856 void *cancel_baton, 857 apr_pool_t *pool) 858{ 859 fs_fs_data_t *ffd = fs->fsap_data; 860 svn_revnum_t youngest = ffd->youngest_rev_cache; /* cache is current */ 861 862 /* Input validation. */ 863 if (! SVN_IS_VALID_REVNUM(start)) 864 start = 0; 865 if (! SVN_IS_VALID_REVNUM(end)) 866 end = youngest; 867 SVN_ERR(svn_fs_fs__ensure_revision_exists(start, fs, pool)); 868 SVN_ERR(svn_fs_fs__ensure_revision_exists(end, fs, pool)); 869 870 /* log/phys index consistency. We need to check them first to make 871 sure we can access the rev / pack files in format7. */ 872 if (svn_fs_fs__use_log_addressing(fs)) 873 SVN_ERR(verify_f7_metadata_consistency(fs, start, end, 874 notify_func, notify_baton, 875 cancel_func, cancel_baton, pool)); 876 877 /* rep cache consistency */ 878 if (ffd->format >= SVN_FS_FS__MIN_REP_SHARING_FORMAT) 879 SVN_ERR(verify_rep_cache(fs, start, end, notify_func, notify_baton, 880 cancel_func, cancel_baton, pool)); 881 882 return SVN_NO_ERROR; 883} 884