dsl_scan.c (268649) | dsl_scan.c (268650) |
---|---|
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE --- 38 unchanged lines hidden (view full) --- 47#include <sys/sa_impl.h> 48#include <sys/zfeature.h> 49#ifdef _KERNEL 50#include <sys/zfs_vfsops.h> 51#endif 52 53typedef int (scan_cb_t)(dsl_pool_t *, const blkptr_t *, const zbookmark_t *); 54 | 1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE --- 38 unchanged lines hidden (view full) --- 47#include <sys/sa_impl.h> 48#include <sys/zfeature.h> 49#ifdef _KERNEL 50#include <sys/zfs_vfsops.h> 51#endif 52 53typedef int (scan_cb_t)(dsl_pool_t *, const blkptr_t *, const zbookmark_t *); 54 |
55static scan_cb_t dsl_scan_defrag_cb; | |
56static scan_cb_t dsl_scan_scrub_cb; | 55static scan_cb_t dsl_scan_scrub_cb; |
57static scan_cb_t dsl_scan_remove_cb; | |
58static void dsl_scan_cancel_sync(void *, dmu_tx_t *); 59static void dsl_scan_sync_state(dsl_scan_t *, dmu_tx_t *tx); 60 61unsigned int zfs_top_maxinflight = 32; /* maximum I/Os per top-level */ 62unsigned int zfs_resilver_delay = 2; /* number of ticks to delay resilver */ 63unsigned int zfs_scrub_delay = 4; /* number of ticks to delay scrub */ 64unsigned int zfs_scan_idle = 50; /* idle window in clock ticks */ 65 66unsigned int zfs_scan_min_time_ms = 1000; /* min millisecs to scrub per txg */ 67unsigned int zfs_free_min_time_ms = 1000; /* min millisecs to free per txg */ 68unsigned int zfs_resilver_min_time_ms = 3000; /* min millisecs to resilver 69 per txg */ 70boolean_t zfs_no_scrub_io = B_FALSE; /* set to disable scrub i/o */ | 56static void dsl_scan_cancel_sync(void *, dmu_tx_t *); 57static void dsl_scan_sync_state(dsl_scan_t *, dmu_tx_t *tx); 58 59unsigned int zfs_top_maxinflight = 32; /* maximum I/Os per top-level */ 60unsigned int zfs_resilver_delay = 2; /* number of ticks to delay resilver */ 61unsigned int zfs_scrub_delay = 4; /* number of ticks to delay scrub */ 62unsigned int zfs_scan_idle = 50; /* idle window in clock ticks */ 63 64unsigned int zfs_scan_min_time_ms = 1000; /* min millisecs to scrub per txg */ 65unsigned int zfs_free_min_time_ms = 1000; /* min millisecs to free per txg */ 66unsigned int zfs_resilver_min_time_ms = 3000; /* min millisecs to resilver 67 per txg */ 68boolean_t zfs_no_scrub_io = B_FALSE; /* set to disable scrub i/o */ |
71boolean_t zfs_no_scrub_prefetch = B_FALSE; /* set to disable srub prefetching */ | 69boolean_t zfs_no_scrub_prefetch = B_FALSE; /* set to disable scrub prefetch */ |
72 73SYSCTL_DECL(_vfs_zfs); 74TUNABLE_INT("vfs.zfs.top_maxinflight", &zfs_top_maxinflight); 75SYSCTL_UINT(_vfs_zfs, OID_AUTO, top_maxinflight, CTLFLAG_RW, 76 &zfs_top_maxinflight, 0, "Maximum I/Os per top-level vdev"); 77TUNABLE_INT("vfs.zfs.resilver_delay", &zfs_resilver_delay); 78SYSCTL_UINT(_vfs_zfs, OID_AUTO, resilver_delay, CTLFLAG_RW, 79 &zfs_resilver_delay, 0, "Number of ticks to delay resilver"); --- 1304 unchanged lines hidden (view full) --- 1384 spa_t *spa = scn->scn_dp->dp_spa; 1385 uint64_t used = 0, comp, uncomp; 1386 1387 if (spa->spa_load_state != SPA_LOAD_NONE) 1388 return (B_FALSE); 1389 if (spa_shutting_down(spa)) 1390 return (B_FALSE); 1391 if (scn->scn_phys.scn_state == DSS_SCANNING || | 70 71SYSCTL_DECL(_vfs_zfs); 72TUNABLE_INT("vfs.zfs.top_maxinflight", &zfs_top_maxinflight); 73SYSCTL_UINT(_vfs_zfs, OID_AUTO, top_maxinflight, CTLFLAG_RW, 74 &zfs_top_maxinflight, 0, "Maximum I/Os per top-level vdev"); 75TUNABLE_INT("vfs.zfs.resilver_delay", &zfs_resilver_delay); 76SYSCTL_UINT(_vfs_zfs, OID_AUTO, resilver_delay, CTLFLAG_RW, 77 &zfs_resilver_delay, 0, "Number of ticks to delay resilver"); --- 1304 unchanged lines hidden (view full) --- 1382 spa_t *spa = scn->scn_dp->dp_spa; 1383 uint64_t used = 0, comp, uncomp; 1384 1385 if (spa->spa_load_state != SPA_LOAD_NONE) 1386 return (B_FALSE); 1387 if (spa_shutting_down(spa)) 1388 return (B_FALSE); 1389 if (scn->scn_phys.scn_state == DSS_SCANNING || |
1392 scn->scn_async_destroying) | 1390 (scn->scn_async_destroying && !scn->scn_async_stalled)) |
1393 return (B_TRUE); 1394 1395 if (spa_version(scn->scn_dp->dp_spa) >= SPA_VERSION_DEADLISTS) { 1396 (void) bpobj_space(&scn->scn_dp->dp_free_bpobj, 1397 &used, &comp, &uncomp); 1398 } 1399 return (used != 0); 1400} 1401 1402void 1403dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) 1404{ 1405 dsl_scan_t *scn = dp->dp_scan; 1406 spa_t *spa = dp->dp_spa; | 1391 return (B_TRUE); 1392 1393 if (spa_version(scn->scn_dp->dp_spa) >= SPA_VERSION_DEADLISTS) { 1394 (void) bpobj_space(&scn->scn_dp->dp_free_bpobj, 1395 &used, &comp, &uncomp); 1396 } 1397 return (used != 0); 1398} 1399 1400void 1401dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) 1402{ 1403 dsl_scan_t *scn = dp->dp_scan; 1404 spa_t *spa = dp->dp_spa; |
1407 int err; | 1405 int err = 0; |
1408 1409 /* 1410 * Check for scn_restart_txg before checking spa_load_state, so 1411 * that we can restart an old-style scan while the pool is being 1412 * imported (see dsl_scan_init). 1413 */ 1414 if (scn->scn_restart_txg != 0 && 1415 scn->scn_restart_txg <= tx->tx_txg) { 1416 pool_scan_func_t func = POOL_SCAN_SCRUB; 1417 dsl_scan_done(scn, B_FALSE, tx); 1418 if (vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) 1419 func = POOL_SCAN_RESILVER; 1420 zfs_dbgmsg("restarting scan func=%u txg=%llu", 1421 func, tx->tx_txg); 1422 dsl_scan_setup_sync(&func, tx); 1423 } 1424 | 1406 1407 /* 1408 * Check for scn_restart_txg before checking spa_load_state, so 1409 * that we can restart an old-style scan while the pool is being 1410 * imported (see dsl_scan_init). 1411 */ 1412 if (scn->scn_restart_txg != 0 && 1413 scn->scn_restart_txg <= tx->tx_txg) { 1414 pool_scan_func_t func = POOL_SCAN_SCRUB; 1415 dsl_scan_done(scn, B_FALSE, tx); 1416 if (vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) 1417 func = POOL_SCAN_RESILVER; 1418 zfs_dbgmsg("restarting scan func=%u txg=%llu", 1419 func, tx->tx_txg); 1420 dsl_scan_setup_sync(&func, tx); 1421 } 1422 |
1425 if (!dsl_scan_active(scn) || | 1423 /* 1424 * If the scan is inactive due to a stalled async destroy, try again. 1425 */ 1426 if ((!scn->scn_async_stalled && !dsl_scan_active(scn)) || |
1426 spa_sync_pass(dp->dp_spa) > 1) 1427 return; 1428 1429 scn->scn_visited_this_txg = 0; 1430 scn->scn_pausing = B_FALSE; 1431 scn->scn_sync_start_time = gethrtime(); 1432 spa->spa_scrub_active = B_TRUE; 1433 1434 /* | 1427 spa_sync_pass(dp->dp_spa) > 1) 1428 return; 1429 1430 scn->scn_visited_this_txg = 0; 1431 scn->scn_pausing = B_FALSE; 1432 scn->scn_sync_start_time = gethrtime(); 1433 spa->spa_scrub_active = B_TRUE; 1434 1435 /* |
1435 * First process the free list. If we pause the free, don't do 1436 * any scanning. This ensures that there is no free list when 1437 * we are scanning, so the scan code doesn't have to worry about 1438 * traversing it. | 1436 * First process the async destroys. If we pause, don't do 1437 * any scrubbing or resilvering. This ensures that there are no 1438 * async destroys while we are scanning, so the scan code doesn't 1439 * have to worry about traversing it. It is also faster to free the 1440 * blocks than to scrub them. |
1439 */ 1440 if (spa_version(dp->dp_spa) >= SPA_VERSION_DEADLISTS) { 1441 scn->scn_is_bptree = B_FALSE; 1442 scn->scn_zio_root = zio_root(dp->dp_spa, NULL, 1443 NULL, ZIO_FLAG_MUSTSUCCEED); 1444 err = bpobj_iterate(&dp->dp_free_bpobj, 1445 dsl_scan_free_block_cb, scn, tx); 1446 VERIFY3U(0, ==, zio_wait(scn->scn_zio_root)); 1447 | 1441 */ 1442 if (spa_version(dp->dp_spa) >= SPA_VERSION_DEADLISTS) { 1443 scn->scn_is_bptree = B_FALSE; 1444 scn->scn_zio_root = zio_root(dp->dp_spa, NULL, 1445 NULL, ZIO_FLAG_MUSTSUCCEED); 1446 err = bpobj_iterate(&dp->dp_free_bpobj, 1447 dsl_scan_free_block_cb, scn, tx); 1448 VERIFY3U(0, ==, zio_wait(scn->scn_zio_root)); 1449 |
1448 if (err == 0 && spa_feature_is_active(spa, 1449 SPA_FEATURE_ASYNC_DESTROY)) { 1450 ASSERT(scn->scn_async_destroying); 1451 scn->scn_is_bptree = B_TRUE; 1452 scn->scn_zio_root = zio_root(dp->dp_spa, NULL, 1453 NULL, ZIO_FLAG_MUSTSUCCEED); 1454 err = bptree_iterate(dp->dp_meta_objset, 1455 dp->dp_bptree_obj, B_TRUE, dsl_scan_free_block_cb, 1456 scn, tx); 1457 VERIFY0(zio_wait(scn->scn_zio_root)); | 1450 if (err != 0 && err != ERESTART) 1451 zfs_panic_recover("error %u from bpobj_iterate()", err); 1452 } |
1458 | 1453 |
1459 if (err == 0) { 1460 /* finished; deactivate async destroy feature */ 1461 spa_feature_decr(spa, SPA_FEATURE_ASYNC_DESTROY, 1462 tx); 1463 ASSERT(!spa_feature_is_active(spa, 1464 SPA_FEATURE_ASYNC_DESTROY)); 1465 VERIFY0(zap_remove(dp->dp_meta_objset, 1466 DMU_POOL_DIRECTORY_OBJECT, 1467 DMU_POOL_BPTREE_OBJ, tx)); 1468 VERIFY0(bptree_free(dp->dp_meta_objset, 1469 dp->dp_bptree_obj, tx)); 1470 dp->dp_bptree_obj = 0; 1471 scn->scn_async_destroying = B_FALSE; 1472 } | 1454 if (err == 0 && spa_feature_is_active(spa, SPA_FEATURE_ASYNC_DESTROY)) { 1455 ASSERT(scn->scn_async_destroying); 1456 scn->scn_is_bptree = B_TRUE; 1457 scn->scn_zio_root = zio_root(dp->dp_spa, NULL, 1458 NULL, ZIO_FLAG_MUSTSUCCEED); 1459 err = bptree_iterate(dp->dp_meta_objset, 1460 dp->dp_bptree_obj, B_TRUE, dsl_scan_free_block_cb, scn, tx); 1461 VERIFY0(zio_wait(scn->scn_zio_root)); 1462 1463 if (err == EIO || err == ECKSUM) { 1464 err = 0; 1465 } else if (err != 0 && err != ERESTART) { 1466 zfs_panic_recover("error %u from " 1467 "traverse_dataset_destroyed()", err); |
1473 } | 1468 } |
1474 if (scn->scn_visited_this_txg) { 1475 zfs_dbgmsg("freed %llu blocks in %llums from " 1476 "free_bpobj/bptree txg %llu", 1477 (longlong_t)scn->scn_visited_this_txg, 1478 (longlong_t) 1479 NSEC2MSEC(gethrtime() - scn->scn_sync_start_time), 1480 (longlong_t)tx->tx_txg); 1481 scn->scn_visited_this_txg = 0; 1482 /* 1483 * Re-sync the ddt so that we can further modify 1484 * it when doing bprewrite. 1485 */ 1486 ddt_sync(spa, tx->tx_txg); | 1469 1470 /* 1471 * If we didn't make progress, mark the async destroy as 1472 * stalled, so that we will not initiate a spa_sync() on 1473 * its behalf. 1474 */ 1475 scn->scn_async_stalled = (scn->scn_visited_this_txg == 0); 1476 1477 if (bptree_is_empty(dp->dp_meta_objset, dp->dp_bptree_obj)) { 1478 /* finished; deactivate async destroy feature */ 1479 spa_feature_decr(spa, SPA_FEATURE_ASYNC_DESTROY, tx); 1480 ASSERT(!spa_feature_is_active(spa, 1481 SPA_FEATURE_ASYNC_DESTROY)); 1482 VERIFY0(zap_remove(dp->dp_meta_objset, 1483 DMU_POOL_DIRECTORY_OBJECT, 1484 DMU_POOL_BPTREE_OBJ, tx)); 1485 VERIFY0(bptree_free(dp->dp_meta_objset, 1486 dp->dp_bptree_obj, tx)); 1487 dp->dp_bptree_obj = 0; 1488 scn->scn_async_destroying = B_FALSE; |
1487 } | 1489 } |
1488 if (err == ERESTART) 1489 return; | 1490 } 1491 if (scn->scn_visited_this_txg) { 1492 zfs_dbgmsg("freed %llu blocks in %llums from " 1493 "free_bpobj/bptree txg %llu; err=%u", 1494 (longlong_t)scn->scn_visited_this_txg, 1495 (longlong_t) 1496 NSEC2MSEC(gethrtime() - scn->scn_sync_start_time), 1497 (longlong_t)tx->tx_txg, err); 1498 scn->scn_visited_this_txg = 0; 1499 1500 /* 1501 * Write out changes to the DDT that may be required as a 1502 * result of the blocks freed. This ensures that the DDT 1503 * is clean when a scrub/resilver runs. 1504 */ 1505 ddt_sync(spa, tx->tx_txg); 1506 } 1507 if (err != 0) 1508 return; 1509 if (!scn->scn_async_destroying && zfs_free_leak_on_eio && 1510 (dp->dp_free_dir->dd_phys->dd_used_bytes != 0 || 1511 dp->dp_free_dir->dd_phys->dd_compressed_bytes != 0 || 1512 dp->dp_free_dir->dd_phys->dd_uncompressed_bytes != 0)) { 1513 /* 1514 * We have finished background destroying, but there is still 1515 * some space left in the dp_free_dir. Transfer this leaked 1516 * space to the dp_leak_dir. 1517 */ 1518 if (dp->dp_leak_dir == NULL) { 1519 rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG); 1520 (void) dsl_dir_create_sync(dp, dp->dp_root_dir, 1521 LEAK_DIR_NAME, tx); 1522 VERIFY0(dsl_pool_open_special_dir(dp, 1523 LEAK_DIR_NAME, &dp->dp_leak_dir)); 1524 rrw_exit(&dp->dp_config_rwlock, FTAG); 1525 } 1526 dsl_dir_diduse_space(dp->dp_leak_dir, DD_USED_HEAD, 1527 dp->dp_free_dir->dd_phys->dd_used_bytes, 1528 dp->dp_free_dir->dd_phys->dd_compressed_bytes, 1529 dp->dp_free_dir->dd_phys->dd_uncompressed_bytes, tx); 1530 dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD, 1531 -dp->dp_free_dir->dd_phys->dd_used_bytes, 1532 -dp->dp_free_dir->dd_phys->dd_compressed_bytes, 1533 -dp->dp_free_dir->dd_phys->dd_uncompressed_bytes, tx); 1534 } 1535 if (!scn->scn_async_destroying) { |
1490 /* finished; verify that space accounting went to zero */ 1491 ASSERT0(dp->dp_free_dir->dd_phys->dd_used_bytes); 1492 ASSERT0(dp->dp_free_dir->dd_phys->dd_compressed_bytes); 1493 ASSERT0(dp->dp_free_dir->dd_phys->dd_uncompressed_bytes); 1494 } 1495 1496 if (scn->scn_phys.scn_state != DSS_SCANNING) 1497 return; --- 276 unchanged lines hidden --- | 1536 /* finished; verify that space accounting went to zero */ 1537 ASSERT0(dp->dp_free_dir->dd_phys->dd_used_bytes); 1538 ASSERT0(dp->dp_free_dir->dd_phys->dd_compressed_bytes); 1539 ASSERT0(dp->dp_free_dir->dd_phys->dd_uncompressed_bytes); 1540 } 1541 1542 if (scn->scn_phys.scn_state != DSS_SCANNING) 1543 return; --- 276 unchanged lines hidden --- |