Deleted Added
full compact
vdev.c (332525) vdev.c (332530)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE

--- 157 unchanged lines hidden (view full) ---

166 * When a vdev is added, it will be divided into approximately (but no
167 * more than) this number of metaslabs.
168 */
169int metaslabs_per_vdev = 200;
170SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, metaslabs_per_vdev, CTLFLAG_RDTUN,
171 &metaslabs_per_vdev, 0,
172 "When a vdev is added, how many metaslabs the vdev should be divided into");
173
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE

--- 157 unchanged lines hidden (view full) ---

166 * When a vdev is added, it will be divided into approximately (but no
167 * more than) this number of metaslabs.
168 */
169int metaslabs_per_vdev = 200;
170SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, metaslabs_per_vdev, CTLFLAG_RDTUN,
171 &metaslabs_per_vdev, 0,
172 "When a vdev is added, how many metaslabs the vdev should be divided into");
173
174/*PRINTFLIKE2*/
175void
176vdev_dbgmsg(vdev_t *vd, const char *fmt, ...)
177{
178 va_list adx;
179 char buf[256];
180
181 va_start(adx, fmt);
182 (void) vsnprintf(buf, sizeof (buf), fmt, adx);
183 va_end(adx);
184
185 if (vd->vdev_path != NULL) {
186 zfs_dbgmsg("%s vdev '%s': %s", vd->vdev_ops->vdev_op_type,
187 vd->vdev_path, buf);
188 } else {
189 zfs_dbgmsg("%s-%llu vdev (guid %llu): %s",
190 vd->vdev_ops->vdev_op_type,
191 (u_longlong_t)vd->vdev_id,
192 (u_longlong_t)vd->vdev_guid, buf);
193 }
194}
195
174/*
175 * Given a vdev type, return the appropriate ops vector.
176 */
177static vdev_ops_t *
178vdev_getops(const char *type)
179{
180 vdev_ops_t *ops, **opspp;
181

--- 843 unchanged lines hidden (view full) ---

1025 * vdev_ms_array may be 0 if we are creating the "fake"
1026 * metaslabs for an indirect vdev for zdb's leak detection.
1027 * See zdb_leak_init().
1028 */
1029 if (txg == 0 && vd->vdev_ms_array != 0) {
1030 error = dmu_read(mos, vd->vdev_ms_array,
1031 m * sizeof (uint64_t), sizeof (uint64_t), &object,
1032 DMU_READ_PREFETCH);
196/*
197 * Given a vdev type, return the appropriate ops vector.
198 */
199static vdev_ops_t *
200vdev_getops(const char *type)
201{
202 vdev_ops_t *ops, **opspp;
203

--- 843 unchanged lines hidden (view full) ---

1047 * vdev_ms_array may be 0 if we are creating the "fake"
1048 * metaslabs for an indirect vdev for zdb's leak detection.
1049 * See zdb_leak_init().
1050 */
1051 if (txg == 0 && vd->vdev_ms_array != 0) {
1052 error = dmu_read(mos, vd->vdev_ms_array,
1053 m * sizeof (uint64_t), sizeof (uint64_t), &object,
1054 DMU_READ_PREFETCH);
1033 if (error)
1055 if (error != 0) {
1056 vdev_dbgmsg(vd, "unable to read the metaslab "
1057 "array [error=%d]", error);
1034 return (error);
1058 return (error);
1059 }
1035 }
1036
1037 error = metaslab_init(vd->vdev_mg, m, object, txg,
1038 &(vd->vdev_ms[m]));
1060 }
1061
1062 error = metaslab_init(vd->vdev_mg, m, object, txg,
1063 &(vd->vdev_ms[m]));
1039 if (error)
1064 if (error != 0) {
1065 vdev_dbgmsg(vd, "metaslab_init failed [error=%d]",
1066 error);
1040 return (error);
1067 return (error);
1068 }
1041 }
1042
1043 if (txg == 0)
1044 spa_config_enter(spa, SCL_ALLOC, FTAG, RW_WRITER);
1045
1046 /*
1047 * If the vdev is being removed we don't activate
1048 * the metaslabs since we want to ensure that no new

--- 65 unchanged lines hidden (view full) ---

1114 vd->vdev_cant_read |= !vps->vps_readable;
1115 vd->vdev_cant_write |= !vps->vps_writeable;
1116
1117 if (vdev_readable(vd) &&
1118 (vdev_writeable(vd) || !spa_writeable(spa))) {
1119 zio->io_error = 0;
1120 } else {
1121 ASSERT(zio->io_error != 0);
1069 }
1070
1071 if (txg == 0)
1072 spa_config_enter(spa, SCL_ALLOC, FTAG, RW_WRITER);
1073
1074 /*
1075 * If the vdev is being removed we don't activate
1076 * the metaslabs since we want to ensure that no new

--- 65 unchanged lines hidden (view full) ---

1142 vd->vdev_cant_read |= !vps->vps_readable;
1143 vd->vdev_cant_write |= !vps->vps_writeable;
1144
1145 if (vdev_readable(vd) &&
1146 (vdev_writeable(vd) || !spa_writeable(spa))) {
1147 zio->io_error = 0;
1148 } else {
1149 ASSERT(zio->io_error != 0);
1122 zfs_dbgmsg("failed probe on vdev %llu",
1123 (longlong_t)vd->vdev_id);
1150 vdev_dbgmsg(vd, "failed probe");
1124 zfs_ereport_post(FM_EREPORT_ZFS_PROBE_FAILURE,
1125 spa, vd, NULL, 0, 0);
1126 zio->io_error = SET_ERROR(ENXIO);
1127 }
1128
1129 mutex_enter(&vd->vdev_probe_lock);
1130 ASSERT(vd->vdev_probe_zio == zio);
1131 vd->vdev_probe_zio = NULL;

--- 439 unchanged lines hidden (view full) ---

1571 uint64_t aux_guid = 0;
1572 nvlist_t *nvl;
1573 uint64_t txg = spa_last_synced_txg(spa) != 0 ?
1574 spa_last_synced_txg(spa) : -1ULL;
1575
1576 if ((label = vdev_label_read_config(vd, txg)) == NULL) {
1577 vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
1578 VDEV_AUX_BAD_LABEL);
1151 zfs_ereport_post(FM_EREPORT_ZFS_PROBE_FAILURE,
1152 spa, vd, NULL, 0, 0);
1153 zio->io_error = SET_ERROR(ENXIO);
1154 }
1155
1156 mutex_enter(&vd->vdev_probe_lock);
1157 ASSERT(vd->vdev_probe_zio == zio);
1158 vd->vdev_probe_zio = NULL;

--- 439 unchanged lines hidden (view full) ---

1598 uint64_t aux_guid = 0;
1599 nvlist_t *nvl;
1600 uint64_t txg = spa_last_synced_txg(spa) != 0 ?
1601 spa_last_synced_txg(spa) : -1ULL;
1602
1603 if ((label = vdev_label_read_config(vd, txg)) == NULL) {
1604 vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
1605 VDEV_AUX_BAD_LABEL);
1606 vdev_dbgmsg(vd, "vdev_validate: failed reading config");
1579 return (0);
1580 }
1581
1582 /*
1583 * Determine if this vdev has been split off into another
1584 * pool. If so, then refuse to open it.
1585 */
1586 if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_SPLIT_GUID,
1587 &aux_guid) == 0 && aux_guid == spa_guid(spa)) {
1588 vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
1589 VDEV_AUX_SPLIT_POOL);
1590 nvlist_free(label);
1607 return (0);
1608 }
1609
1610 /*
1611 * Determine if this vdev has been split off into another
1612 * pool. If so, then refuse to open it.
1613 */
1614 if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_SPLIT_GUID,
1615 &aux_guid) == 0 && aux_guid == spa_guid(spa)) {
1616 vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
1617 VDEV_AUX_SPLIT_POOL);
1618 nvlist_free(label);
1619 vdev_dbgmsg(vd, "vdev_validate: vdev split into other "
1620 "pool");
1591 return (0);
1592 }
1593
1594 if (strict && (nvlist_lookup_uint64(label,
1595 ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
1596 guid != spa_guid(spa))) {
1597 vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
1598 VDEV_AUX_CORRUPT_DATA);
1599 nvlist_free(label);
1621 return (0);
1622 }
1623
1624 if (strict && (nvlist_lookup_uint64(label,
1625 ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
1626 guid != spa_guid(spa))) {
1627 vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
1628 VDEV_AUX_CORRUPT_DATA);
1629 nvlist_free(label);
1630 vdev_dbgmsg(vd, "vdev_validate: vdev label pool_guid "
1631 "doesn't match config (%llu != %llu)",
1632 (u_longlong_t)guid,
1633 (u_longlong_t)spa_guid(spa));
1600 return (0);
1601 }
1602
1603 if (nvlist_lookup_nvlist(label, ZPOOL_CONFIG_VDEV_TREE, &nvl)
1604 != 0 || nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_ORIG_GUID,
1605 &aux_guid) != 0)
1606 aux_guid = 0;
1607

--- 13 unchanged lines hidden (view full) ---

1621 &guid) != 0 ||
1622 nvlist_lookup_uint64(label, ZPOOL_CONFIG_TOP_GUID,
1623 &top_guid) != 0 ||
1624 ((vd->vdev_guid != guid && vd->vdev_guid != aux_guid) &&
1625 (vd->vdev_guid != top_guid || vd != vd->vdev_top))) {
1626 vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
1627 VDEV_AUX_CORRUPT_DATA);
1628 nvlist_free(label);
1634 return (0);
1635 }
1636
1637 if (nvlist_lookup_nvlist(label, ZPOOL_CONFIG_VDEV_TREE, &nvl)
1638 != 0 || nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_ORIG_GUID,
1639 &aux_guid) != 0)
1640 aux_guid = 0;
1641

--- 13 unchanged lines hidden (view full) ---

1655 &guid) != 0 ||
1656 nvlist_lookup_uint64(label, ZPOOL_CONFIG_TOP_GUID,
1657 &top_guid) != 0 ||
1658 ((vd->vdev_guid != guid && vd->vdev_guid != aux_guid) &&
1659 (vd->vdev_guid != top_guid || vd != vd->vdev_top))) {
1660 vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
1661 VDEV_AUX_CORRUPT_DATA);
1662 nvlist_free(label);
1663 vdev_dbgmsg(vd, "vdev_validate: config guid doesn't "
1664 "match label guid (%llu != %llu)",
1665 (u_longlong_t)vd->vdev_guid, (u_longlong_t)guid);
1629 return (0);
1630 }
1631
1632 if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_STATE,
1633 &state) != 0) {
1634 vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
1635 VDEV_AUX_CORRUPT_DATA);
1636 nvlist_free(label);
1666 return (0);
1667 }
1668
1669 if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_STATE,
1670 &state) != 0) {
1671 vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
1672 VDEV_AUX_CORRUPT_DATA);
1673 nvlist_free(label);
1674 vdev_dbgmsg(vd, "vdev_validate: '%s' missing",
1675 ZPOOL_CONFIG_POOL_STATE);
1637 return (0);
1638 }
1639
1640 nvlist_free(label);
1641
1642 /*
1643 * If this is a verbatim import, no need to check the
1644 * state of the pool.
1645 */
1646 if (!(spa->spa_import_flags & ZFS_IMPORT_VERBATIM) &&
1647 spa_load_state(spa) == SPA_LOAD_OPEN &&
1676 return (0);
1677 }
1678
1679 nvlist_free(label);
1680
1681 /*
1682 * If this is a verbatim import, no need to check the
1683 * state of the pool.
1684 */
1685 if (!(spa->spa_import_flags & ZFS_IMPORT_VERBATIM) &&
1686 spa_load_state(spa) == SPA_LOAD_OPEN &&
1648 state != POOL_STATE_ACTIVE)
1687 state != POOL_STATE_ACTIVE) {
1688 vdev_dbgmsg(vd, "vdev_validate: invalid pool state "
1689 "(%llu) for spa %s", (u_longlong_t)state,
1690 spa->spa_name);
1649 return (SET_ERROR(EBADF));
1691 return (SET_ERROR(EBADF));
1692 }
1650
1651 /*
1652 * If we were able to open and validate a vdev that was
1653 * previously marked permanently unavailable, clear that state
1654 * now.
1655 */
1656 if (vd->vdev_not_present)
1657 vd->vdev_not_present = 0;

--- 631 unchanged lines hidden (view full) ---

2289
2290 range_tree_destroy(rtsync);
2291
2292 /*
2293 * If the object for the space map has changed then dirty
2294 * the top level so that we update the config.
2295 */
2296 if (object != space_map_object(vd->vdev_dtl_sm)) {
1693
1694 /*
1695 * If we were able to open and validate a vdev that was
1696 * previously marked permanently unavailable, clear that state
1697 * now.
1698 */
1699 if (vd->vdev_not_present)
1700 vd->vdev_not_present = 0;

--- 631 unchanged lines hidden (view full) ---

2332
2333 range_tree_destroy(rtsync);
2334
2335 /*
2336 * If the object for the space map has changed then dirty
2337 * the top level so that we update the config.
2338 */
2339 if (object != space_map_object(vd->vdev_dtl_sm)) {
2297 zfs_dbgmsg("txg %llu, spa %s, DTL old object %llu, "
2298 "new object %llu", txg, spa_name(spa), object,
2299 space_map_object(vd->vdev_dtl_sm));
2340 vdev_dbgmsg(vd, "txg %llu, spa %s, DTL old object %llu, "
2341 "new object %llu", (u_longlong_t)txg, spa_name(spa),
2342 (u_longlong_t)object,
2343 (u_longlong_t)space_map_object(vd->vdev_dtl_sm));
2300 vdev_config_dirty(vd->vdev_top);
2301 }
2302
2303 dmu_tx_commit(tx);
2304
2305 mutex_enter(&vd->vdev_dtl_lock);
2306 space_map_update(vd->vdev_dtl_sm);
2307 mutex_exit(&vd->vdev_dtl_lock);

--- 91 unchanged lines hidden (view full) ---

2399
2400 /*
2401 * If this is a top-level vdev, initialize its metaslabs.
2402 */
2403 if (vd == vd->vdev_top && vdev_is_concrete(vd)) {
2404 if (vd->vdev_ashift == 0 || vd->vdev_asize == 0) {
2405 vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
2406 VDEV_AUX_CORRUPT_DATA);
2344 vdev_config_dirty(vd->vdev_top);
2345 }
2346
2347 dmu_tx_commit(tx);
2348
2349 mutex_enter(&vd->vdev_dtl_lock);
2350 space_map_update(vd->vdev_dtl_sm);
2351 mutex_exit(&vd->vdev_dtl_lock);

--- 91 unchanged lines hidden (view full) ---

2443
2444 /*
2445 * If this is a top-level vdev, initialize its metaslabs.
2446 */
2447 if (vd == vd->vdev_top && vdev_is_concrete(vd)) {
2448 if (vd->vdev_ashift == 0 || vd->vdev_asize == 0) {
2449 vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
2450 VDEV_AUX_CORRUPT_DATA);
2451 vdev_dbgmsg(vd, "vdev_load: invalid size. ashift=%llu, "
2452 "asize=%llu", (u_longlong_t)vd->vdev_ashift,
2453 (u_longlong_t)vd->vdev_asize);
2407 return (SET_ERROR(ENXIO));
2408 } else if ((error = vdev_metaslab_init(vd, 0)) != 0) {
2454 return (SET_ERROR(ENXIO));
2455 } else if ((error = vdev_metaslab_init(vd, 0)) != 0) {
2456 vdev_dbgmsg(vd, "vdev_load: metaslab_init failed "
2457 "[error=%d]", error);
2409 vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
2410 VDEV_AUX_CORRUPT_DATA);
2411 return (error);
2412 }
2413 }
2414
2415 /*
2416 * If this is a leaf vdev, load its DTL.
2417 */
2418 if (vd->vdev_ops->vdev_op_leaf && (error = vdev_dtl_load(vd)) != 0) {
2419 vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
2420 VDEV_AUX_CORRUPT_DATA);
2458 vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
2459 VDEV_AUX_CORRUPT_DATA);
2460 return (error);
2461 }
2462 }
2463
2464 /*
2465 * If this is a leaf vdev, load its DTL.
2466 */
2467 if (vd->vdev_ops->vdev_op_leaf && (error = vdev_dtl_load(vd)) != 0) {
2468 vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
2469 VDEV_AUX_CORRUPT_DATA);
2470 vdev_dbgmsg(vd, "vdev_load: vdev_dtl_load failed "
2471 "[error=%d]", error);
2421 return (error);
2422 }
2423
2424 uint64_t obsolete_sm_object = vdev_obsolete_sm_object(vd);
2425 if (obsolete_sm_object != 0) {
2426 objset_t *mos = vd->vdev_spa->spa_meta_objset;
2427 ASSERT(vd->vdev_asize != 0);
2428 ASSERT(vd->vdev_obsolete_sm == NULL);
2429
2430 if ((error = space_map_open(&vd->vdev_obsolete_sm, mos,
2431 obsolete_sm_object, 0, vd->vdev_asize, 0))) {
2432 vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
2433 VDEV_AUX_CORRUPT_DATA);
2472 return (error);
2473 }
2474
2475 uint64_t obsolete_sm_object = vdev_obsolete_sm_object(vd);
2476 if (obsolete_sm_object != 0) {
2477 objset_t *mos = vd->vdev_spa->spa_meta_objset;
2478 ASSERT(vd->vdev_asize != 0);
2479 ASSERT(vd->vdev_obsolete_sm == NULL);
2480
2481 if ((error = space_map_open(&vd->vdev_obsolete_sm, mos,
2482 obsolete_sm_object, 0, vd->vdev_asize, 0))) {
2483 vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
2484 VDEV_AUX_CORRUPT_DATA);
2485 vdev_dbgmsg(vd, "vdev_load: space_map_open failed for "
2486 "obsolete spacemap (obj %llu) [error=%d]",
2487 (u_longlong_t)obsolete_sm_object, error);
2434 return (error);
2435 }
2436 space_map_update(vd->vdev_obsolete_sm);
2437 }
2438
2439 return (0);
2440}
2441

--- 1358 unchanged lines hidden (view full) ---

3800 /*
3801 * Look at the head of all the pending queues,
3802 * if any I/O has been outstanding for longer than
3803 * the spa_deadman_synctime we panic the system.
3804 */
3805 fio = avl_first(&vq->vq_active_tree);
3806 delta = gethrtime() - fio->io_timestamp;
3807 if (delta > spa_deadman_synctime(spa)) {
2488 return (error);
2489 }
2490 space_map_update(vd->vdev_obsolete_sm);
2491 }
2492
2493 return (0);
2494}
2495

--- 1358 unchanged lines hidden (view full) ---

3854 /*
3855 * Look at the head of all the pending queues,
3856 * if any I/O has been outstanding for longer than
3857 * the spa_deadman_synctime we panic the system.
3858 */
3859 fio = avl_first(&vq->vq_active_tree);
3860 delta = gethrtime() - fio->io_timestamp;
3861 if (delta > spa_deadman_synctime(spa)) {
3808 zfs_dbgmsg("SLOW IO: zio timestamp %lluns, "
3809 "delta %lluns, last io %lluns",
3810 fio->io_timestamp, delta,
3862 vdev_dbgmsg(vd, "SLOW IO: zio timestamp "
3863 "%lluns, delta %lluns, last io %lluns",
3864 fio->io_timestamp, (u_longlong_t)delta,
3811 vq->vq_io_complete_ts);
3812 fm_panic("I/O to pool '%s' appears to be "
3813 "hung on vdev guid %llu at '%s'.",
3814 spa_name(spa),
3815 (long long unsigned int) vd->vdev_guid,
3816 vd->vdev_path);
3817 }
3818 }
3819 mutex_exit(&vq->vq_lock);
3820 }
3821}
3865 vq->vq_io_complete_ts);
3866 fm_panic("I/O to pool '%s' appears to be "
3867 "hung on vdev guid %llu at '%s'.",
3868 spa_name(spa),
3869 (long long unsigned int) vd->vdev_guid,
3870 vd->vdev_path);
3871 }
3872 }
3873 mutex_exit(&vq->vq_lock);
3874 }
3875}