Deleted Added
full compact
dsl_scan.c (268649) dsl_scan.c (268650)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE

--- 38 unchanged lines hidden (view full) ---

47#include <sys/sa_impl.h>
48#include <sys/zfeature.h>
49#ifdef _KERNEL
50#include <sys/zfs_vfsops.h>
51#endif
52
53typedef int (scan_cb_t)(dsl_pool_t *, const blkptr_t *, const zbookmark_t *);
54
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE

--- 38 unchanged lines hidden (view full) ---

47#include <sys/sa_impl.h>
48#include <sys/zfeature.h>
49#ifdef _KERNEL
50#include <sys/zfs_vfsops.h>
51#endif
52
53typedef int (scan_cb_t)(dsl_pool_t *, const blkptr_t *, const zbookmark_t *);
54
55static scan_cb_t dsl_scan_defrag_cb;
56static scan_cb_t dsl_scan_scrub_cb;
55static scan_cb_t dsl_scan_scrub_cb;
57static scan_cb_t dsl_scan_remove_cb;
58static void dsl_scan_cancel_sync(void *, dmu_tx_t *);
59static void dsl_scan_sync_state(dsl_scan_t *, dmu_tx_t *tx);
60
61unsigned int zfs_top_maxinflight = 32; /* maximum I/Os per top-level */
62unsigned int zfs_resilver_delay = 2; /* number of ticks to delay resilver */
63unsigned int zfs_scrub_delay = 4; /* number of ticks to delay scrub */
64unsigned int zfs_scan_idle = 50; /* idle window in clock ticks */
65
66unsigned int zfs_scan_min_time_ms = 1000; /* min millisecs to scrub per txg */
67unsigned int zfs_free_min_time_ms = 1000; /* min millisecs to free per txg */
68unsigned int zfs_resilver_min_time_ms = 3000; /* min millisecs to resilver
69 per txg */
70boolean_t zfs_no_scrub_io = B_FALSE; /* set to disable scrub i/o */
56static void dsl_scan_cancel_sync(void *, dmu_tx_t *);
57static void dsl_scan_sync_state(dsl_scan_t *, dmu_tx_t *tx);
58
59unsigned int zfs_top_maxinflight = 32; /* maximum I/Os per top-level */
60unsigned int zfs_resilver_delay = 2; /* number of ticks to delay resilver */
61unsigned int zfs_scrub_delay = 4; /* number of ticks to delay scrub */
62unsigned int zfs_scan_idle = 50; /* idle window in clock ticks */
63
64unsigned int zfs_scan_min_time_ms = 1000; /* min millisecs to scrub per txg */
65unsigned int zfs_free_min_time_ms = 1000; /* min millisecs to free per txg */
66unsigned int zfs_resilver_min_time_ms = 3000; /* min millisecs to resilver
67 per txg */
68boolean_t zfs_no_scrub_io = B_FALSE; /* set to disable scrub i/o */
71boolean_t zfs_no_scrub_prefetch = B_FALSE; /* set to disable srub prefetching */
69boolean_t zfs_no_scrub_prefetch = B_FALSE; /* set to disable scrub prefetch */
72
73SYSCTL_DECL(_vfs_zfs);
74TUNABLE_INT("vfs.zfs.top_maxinflight", &zfs_top_maxinflight);
75SYSCTL_UINT(_vfs_zfs, OID_AUTO, top_maxinflight, CTLFLAG_RW,
76 &zfs_top_maxinflight, 0, "Maximum I/Os per top-level vdev");
77TUNABLE_INT("vfs.zfs.resilver_delay", &zfs_resilver_delay);
78SYSCTL_UINT(_vfs_zfs, OID_AUTO, resilver_delay, CTLFLAG_RW,
79 &zfs_resilver_delay, 0, "Number of ticks to delay resilver");

--- 1304 unchanged lines hidden (view full) ---

1384 spa_t *spa = scn->scn_dp->dp_spa;
1385 uint64_t used = 0, comp, uncomp;
1386
1387 if (spa->spa_load_state != SPA_LOAD_NONE)
1388 return (B_FALSE);
1389 if (spa_shutting_down(spa))
1390 return (B_FALSE);
1391 if (scn->scn_phys.scn_state == DSS_SCANNING ||
70
71SYSCTL_DECL(_vfs_zfs);
72TUNABLE_INT("vfs.zfs.top_maxinflight", &zfs_top_maxinflight);
73SYSCTL_UINT(_vfs_zfs, OID_AUTO, top_maxinflight, CTLFLAG_RW,
74 &zfs_top_maxinflight, 0, "Maximum I/Os per top-level vdev");
75TUNABLE_INT("vfs.zfs.resilver_delay", &zfs_resilver_delay);
76SYSCTL_UINT(_vfs_zfs, OID_AUTO, resilver_delay, CTLFLAG_RW,
77 &zfs_resilver_delay, 0, "Number of ticks to delay resilver");

--- 1304 unchanged lines hidden (view full) ---

1382 spa_t *spa = scn->scn_dp->dp_spa;
1383 uint64_t used = 0, comp, uncomp;
1384
1385 if (spa->spa_load_state != SPA_LOAD_NONE)
1386 return (B_FALSE);
1387 if (spa_shutting_down(spa))
1388 return (B_FALSE);
1389 if (scn->scn_phys.scn_state == DSS_SCANNING ||
1392 scn->scn_async_destroying)
1390 (scn->scn_async_destroying && !scn->scn_async_stalled))
1393 return (B_TRUE);
1394
1395 if (spa_version(scn->scn_dp->dp_spa) >= SPA_VERSION_DEADLISTS) {
1396 (void) bpobj_space(&scn->scn_dp->dp_free_bpobj,
1397 &used, &comp, &uncomp);
1398 }
1399 return (used != 0);
1400}
1401
1402void
1403dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
1404{
1405 dsl_scan_t *scn = dp->dp_scan;
1406 spa_t *spa = dp->dp_spa;
1391 return (B_TRUE);
1392
1393 if (spa_version(scn->scn_dp->dp_spa) >= SPA_VERSION_DEADLISTS) {
1394 (void) bpobj_space(&scn->scn_dp->dp_free_bpobj,
1395 &used, &comp, &uncomp);
1396 }
1397 return (used != 0);
1398}
1399
1400void
1401dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
1402{
1403 dsl_scan_t *scn = dp->dp_scan;
1404 spa_t *spa = dp->dp_spa;
1407 int err;
1405 int err = 0;
1408
1409 /*
1410 * Check for scn_restart_txg before checking spa_load_state, so
1411 * that we can restart an old-style scan while the pool is being
1412 * imported (see dsl_scan_init).
1413 */
1414 if (scn->scn_restart_txg != 0 &&
1415 scn->scn_restart_txg <= tx->tx_txg) {
1416 pool_scan_func_t func = POOL_SCAN_SCRUB;
1417 dsl_scan_done(scn, B_FALSE, tx);
1418 if (vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL))
1419 func = POOL_SCAN_RESILVER;
1420 zfs_dbgmsg("restarting scan func=%u txg=%llu",
1421 func, tx->tx_txg);
1422 dsl_scan_setup_sync(&func, tx);
1423 }
1424
1406
1407 /*
1408 * Check for scn_restart_txg before checking spa_load_state, so
1409 * that we can restart an old-style scan while the pool is being
1410 * imported (see dsl_scan_init).
1411 */
1412 if (scn->scn_restart_txg != 0 &&
1413 scn->scn_restart_txg <= tx->tx_txg) {
1414 pool_scan_func_t func = POOL_SCAN_SCRUB;
1415 dsl_scan_done(scn, B_FALSE, tx);
1416 if (vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL))
1417 func = POOL_SCAN_RESILVER;
1418 zfs_dbgmsg("restarting scan func=%u txg=%llu",
1419 func, tx->tx_txg);
1420 dsl_scan_setup_sync(&func, tx);
1421 }
1422
1425 if (!dsl_scan_active(scn) ||
1423 /*
1424 * If the scan is inactive due to a stalled async destroy, try again.
1425 */
1426 if ((!scn->scn_async_stalled && !dsl_scan_active(scn)) ||
1426 spa_sync_pass(dp->dp_spa) > 1)
1427 return;
1428
1429 scn->scn_visited_this_txg = 0;
1430 scn->scn_pausing = B_FALSE;
1431 scn->scn_sync_start_time = gethrtime();
1432 spa->spa_scrub_active = B_TRUE;
1433
1434 /*
1427 spa_sync_pass(dp->dp_spa) > 1)
1428 return;
1429
1430 scn->scn_visited_this_txg = 0;
1431 scn->scn_pausing = B_FALSE;
1432 scn->scn_sync_start_time = gethrtime();
1433 spa->spa_scrub_active = B_TRUE;
1434
1435 /*
1435 * First process the free list. If we pause the free, don't do
1436 * any scanning. This ensures that there is no free list when
1437 * we are scanning, so the scan code doesn't have to worry about
1438 * traversing it.
1436 * First process the async destroys. If we pause, don't do
1437 * any scrubbing or resilvering. This ensures that there are no
1438 * async destroys while we are scanning, so the scan code doesn't
1439 * have to worry about traversing it. It is also faster to free the
1440 * blocks than to scrub them.
1439 */
1440 if (spa_version(dp->dp_spa) >= SPA_VERSION_DEADLISTS) {
1441 scn->scn_is_bptree = B_FALSE;
1442 scn->scn_zio_root = zio_root(dp->dp_spa, NULL,
1443 NULL, ZIO_FLAG_MUSTSUCCEED);
1444 err = bpobj_iterate(&dp->dp_free_bpobj,
1445 dsl_scan_free_block_cb, scn, tx);
1446 VERIFY3U(0, ==, zio_wait(scn->scn_zio_root));
1447
1441 */
1442 if (spa_version(dp->dp_spa) >= SPA_VERSION_DEADLISTS) {
1443 scn->scn_is_bptree = B_FALSE;
1444 scn->scn_zio_root = zio_root(dp->dp_spa, NULL,
1445 NULL, ZIO_FLAG_MUSTSUCCEED);
1446 err = bpobj_iterate(&dp->dp_free_bpobj,
1447 dsl_scan_free_block_cb, scn, tx);
1448 VERIFY3U(0, ==, zio_wait(scn->scn_zio_root));
1449
1448 if (err == 0 && spa_feature_is_active(spa,
1449 SPA_FEATURE_ASYNC_DESTROY)) {
1450 ASSERT(scn->scn_async_destroying);
1451 scn->scn_is_bptree = B_TRUE;
1452 scn->scn_zio_root = zio_root(dp->dp_spa, NULL,
1453 NULL, ZIO_FLAG_MUSTSUCCEED);
1454 err = bptree_iterate(dp->dp_meta_objset,
1455 dp->dp_bptree_obj, B_TRUE, dsl_scan_free_block_cb,
1456 scn, tx);
1457 VERIFY0(zio_wait(scn->scn_zio_root));
1450 if (err != 0 && err != ERESTART)
1451 zfs_panic_recover("error %u from bpobj_iterate()", err);
1452 }
1458
1453
1459 if (err == 0) {
1460 /* finished; deactivate async destroy feature */
1461 spa_feature_decr(spa, SPA_FEATURE_ASYNC_DESTROY,
1462 tx);
1463 ASSERT(!spa_feature_is_active(spa,
1464 SPA_FEATURE_ASYNC_DESTROY));
1465 VERIFY0(zap_remove(dp->dp_meta_objset,
1466 DMU_POOL_DIRECTORY_OBJECT,
1467 DMU_POOL_BPTREE_OBJ, tx));
1468 VERIFY0(bptree_free(dp->dp_meta_objset,
1469 dp->dp_bptree_obj, tx));
1470 dp->dp_bptree_obj = 0;
1471 scn->scn_async_destroying = B_FALSE;
1472 }
1454 if (err == 0 && spa_feature_is_active(spa, SPA_FEATURE_ASYNC_DESTROY)) {
1455 ASSERT(scn->scn_async_destroying);
1456 scn->scn_is_bptree = B_TRUE;
1457 scn->scn_zio_root = zio_root(dp->dp_spa, NULL,
1458 NULL, ZIO_FLAG_MUSTSUCCEED);
1459 err = bptree_iterate(dp->dp_meta_objset,
1460 dp->dp_bptree_obj, B_TRUE, dsl_scan_free_block_cb, scn, tx);
1461 VERIFY0(zio_wait(scn->scn_zio_root));
1462
1463 if (err == EIO || err == ECKSUM) {
1464 err = 0;
1465 } else if (err != 0 && err != ERESTART) {
1466 zfs_panic_recover("error %u from "
1467 "traverse_dataset_destroyed()", err);
1473 }
1468 }
1474 if (scn->scn_visited_this_txg) {
1475 zfs_dbgmsg("freed %llu blocks in %llums from "
1476 "free_bpobj/bptree txg %llu",
1477 (longlong_t)scn->scn_visited_this_txg,
1478 (longlong_t)
1479 NSEC2MSEC(gethrtime() - scn->scn_sync_start_time),
1480 (longlong_t)tx->tx_txg);
1481 scn->scn_visited_this_txg = 0;
1482 /*
1483 * Re-sync the ddt so that we can further modify
1484 * it when doing bprewrite.
1485 */
1486 ddt_sync(spa, tx->tx_txg);
1469
1470 /*
1471 * If we didn't make progress, mark the async destroy as
1472 * stalled, so that we will not initiate a spa_sync() on
1473 * its behalf.
1474 */
1475 scn->scn_async_stalled = (scn->scn_visited_this_txg == 0);
1476
1477 if (bptree_is_empty(dp->dp_meta_objset, dp->dp_bptree_obj)) {
1478 /* finished; deactivate async destroy feature */
1479 spa_feature_decr(spa, SPA_FEATURE_ASYNC_DESTROY, tx);
1480 ASSERT(!spa_feature_is_active(spa,
1481 SPA_FEATURE_ASYNC_DESTROY));
1482 VERIFY0(zap_remove(dp->dp_meta_objset,
1483 DMU_POOL_DIRECTORY_OBJECT,
1484 DMU_POOL_BPTREE_OBJ, tx));
1485 VERIFY0(bptree_free(dp->dp_meta_objset,
1486 dp->dp_bptree_obj, tx));
1487 dp->dp_bptree_obj = 0;
1488 scn->scn_async_destroying = B_FALSE;
1487 }
1489 }
1488 if (err == ERESTART)
1489 return;
1490 }
1491 if (scn->scn_visited_this_txg) {
1492 zfs_dbgmsg("freed %llu blocks in %llums from "
1493 "free_bpobj/bptree txg %llu; err=%u",
1494 (longlong_t)scn->scn_visited_this_txg,
1495 (longlong_t)
1496 NSEC2MSEC(gethrtime() - scn->scn_sync_start_time),
1497 (longlong_t)tx->tx_txg, err);
1498 scn->scn_visited_this_txg = 0;
1499
1500 /*
1501 * Write out changes to the DDT that may be required as a
1502 * result of the blocks freed. This ensures that the DDT
1503 * is clean when a scrub/resilver runs.
1504 */
1505 ddt_sync(spa, tx->tx_txg);
1506 }
1507 if (err != 0)
1508 return;
1509 if (!scn->scn_async_destroying && zfs_free_leak_on_eio &&
1510 (dp->dp_free_dir->dd_phys->dd_used_bytes != 0 ||
1511 dp->dp_free_dir->dd_phys->dd_compressed_bytes != 0 ||
1512 dp->dp_free_dir->dd_phys->dd_uncompressed_bytes != 0)) {
1513 /*
1514 * We have finished background destroying, but there is still
1515 * some space left in the dp_free_dir. Transfer this leaked
1516 * space to the dp_leak_dir.
1517 */
1518 if (dp->dp_leak_dir == NULL) {
1519 rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG);
1520 (void) dsl_dir_create_sync(dp, dp->dp_root_dir,
1521 LEAK_DIR_NAME, tx);
1522 VERIFY0(dsl_pool_open_special_dir(dp,
1523 LEAK_DIR_NAME, &dp->dp_leak_dir));
1524 rrw_exit(&dp->dp_config_rwlock, FTAG);
1525 }
1526 dsl_dir_diduse_space(dp->dp_leak_dir, DD_USED_HEAD,
1527 dp->dp_free_dir->dd_phys->dd_used_bytes,
1528 dp->dp_free_dir->dd_phys->dd_compressed_bytes,
1529 dp->dp_free_dir->dd_phys->dd_uncompressed_bytes, tx);
1530 dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD,
1531 -dp->dp_free_dir->dd_phys->dd_used_bytes,
1532 -dp->dp_free_dir->dd_phys->dd_compressed_bytes,
1533 -dp->dp_free_dir->dd_phys->dd_uncompressed_bytes, tx);
1534 }
1535 if (!scn->scn_async_destroying) {
1490 /* finished; verify that space accounting went to zero */
1491 ASSERT0(dp->dp_free_dir->dd_phys->dd_used_bytes);
1492 ASSERT0(dp->dp_free_dir->dd_phys->dd_compressed_bytes);
1493 ASSERT0(dp->dp_free_dir->dd_phys->dd_uncompressed_bytes);
1494 }
1495
1496 if (scn->scn_phys.scn_state != DSS_SCANNING)
1497 return;

--- 276 unchanged lines hidden ---
1536 /* finished; verify that space accounting went to zero */
1537 ASSERT0(dp->dp_free_dir->dd_phys->dd_used_bytes);
1538 ASSERT0(dp->dp_free_dir->dd_phys->dd_compressed_bytes);
1539 ASSERT0(dp->dp_free_dir->dd_phys->dd_uncompressed_bytes);
1540 }
1541
1542 if (scn->scn_phys.scn_state != DSS_SCANNING)
1543 return;

--- 276 unchanged lines hidden ---