Deleted Added
full compact
vdev.c (211931) vdev.c (213197)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE

--- 25 unchanged lines hidden (view full) ---

34#include <sys/uberblock_impl.h>
35#include <sys/metaslab.h>
36#include <sys/metaslab_impl.h>
37#include <sys/space_map.h>
38#include <sys/zio.h>
39#include <sys/zap.h>
40#include <sys/fs/zfs.h>
41#include <sys/arc.h>
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE

--- 25 unchanged lines hidden (view full) ---

34#include <sys/uberblock_impl.h>
35#include <sys/metaslab.h>
36#include <sys/metaslab_impl.h>
37#include <sys/space_map.h>
38#include <sys/zio.h>
39#include <sys/zap.h>
40#include <sys/fs/zfs.h>
41#include <sys/arc.h>
42#include <sys/zil.h>
42
43SYSCTL_DECL(_vfs_zfs);
44SYSCTL_NODE(_vfs_zfs, OID_AUTO, vdev, CTLFLAG_RW, 0, "ZFS VDEV");
45
46/*
47 * Virtual device management.
48 */
49

--- 710 unchanged lines hidden (view full) ---

760 uint64_t oldc = vd->vdev_ms_count;
761 uint64_t newc = vd->vdev_asize >> vd->vdev_ms_shift;
762 metaslab_t **mspp;
763 int error;
764
765 if (vd->vdev_ms_shift == 0) /* not being allocated from yet */
766 return (0);
767
43
44SYSCTL_DECL(_vfs_zfs);
45SYSCTL_NODE(_vfs_zfs, OID_AUTO, vdev, CTLFLAG_RW, 0, "ZFS VDEV");
46
47/*
48 * Virtual device management.
49 */
50

--- 710 unchanged lines hidden (view full) ---

761 uint64_t oldc = vd->vdev_ms_count;
762 uint64_t newc = vd->vdev_asize >> vd->vdev_ms_shift;
763 metaslab_t **mspp;
764 int error;
765
766 if (vd->vdev_ms_shift == 0) /* not being allocated from yet */
767 return (0);
768
769 /*
770 * Compute the raidz-deflation ratio. Note, we hard-code
771 * in 128k (1 << 17) because it is the current "typical" blocksize.
772 * Even if SPA_MAXBLOCKSIZE changes, this algorithm must never change,
773 * or we will inconsistently account for existing bp's.
774 */
775 vd->vdev_deflate_ratio = (1 << 17) /
776 (vdev_psize_to_asize(vd, 1 << 17) >> SPA_MINBLOCKSHIFT);
777
768 ASSERT(oldc <= newc);
769
770 if (vd->vdev_islog)
771 mc = spa->spa_log_class;
772 else
773 mc = spa->spa_normal_class;
774
775 if (vd->vdev_mg == NULL)

--- 217 unchanged lines hidden (view full) ---

993
994 ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
995
996 ASSERT(vd->vdev_state == VDEV_STATE_CLOSED ||
997 vd->vdev_state == VDEV_STATE_CANT_OPEN ||
998 vd->vdev_state == VDEV_STATE_OFFLINE);
999
1000 vd->vdev_stat.vs_aux = VDEV_AUX_NONE;
778 ASSERT(oldc <= newc);
779
780 if (vd->vdev_islog)
781 mc = spa->spa_log_class;
782 else
783 mc = spa->spa_normal_class;
784
785 if (vd->vdev_mg == NULL)

--- 217 unchanged lines hidden (view full) ---

1003
1004 ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
1005
1006 ASSERT(vd->vdev_state == VDEV_STATE_CLOSED ||
1007 vd->vdev_state == VDEV_STATE_CANT_OPEN ||
1008 vd->vdev_state == VDEV_STATE_OFFLINE);
1009
1010 vd->vdev_stat.vs_aux = VDEV_AUX_NONE;
1011 vd->vdev_cant_read = B_FALSE;
1012 vd->vdev_cant_write = B_FALSE;
1001
1002 if (!vd->vdev_removed && vd->vdev_faulted) {
1003 ASSERT(vd->vdev_children == 0);
1004 vdev_set_state(vd, B_TRUE, VDEV_STATE_FAULTED,
1005 VDEV_AUX_ERR_EXCEEDED);
1006 return (ENXIO);
1007 } else if (vd->vdev_offline) {
1008 ASSERT(vd->vdev_children == 0);

--- 99 unchanged lines hidden (view full) ---

1108 if (vd->vdev_ops->vdev_op_leaf &&
1109 (error = zio_wait(vdev_probe(vd, NULL))) != 0) {
1110 vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
1111 VDEV_AUX_IO_FAILURE);
1112 return (error);
1113 }
1114
1115 /*
1013
1014 if (!vd->vdev_removed && vd->vdev_faulted) {
1015 ASSERT(vd->vdev_children == 0);
1016 vdev_set_state(vd, B_TRUE, VDEV_STATE_FAULTED,
1017 VDEV_AUX_ERR_EXCEEDED);
1018 return (ENXIO);
1019 } else if (vd->vdev_offline) {
1020 ASSERT(vd->vdev_children == 0);

--- 99 unchanged lines hidden (view full) ---

1120 if (vd->vdev_ops->vdev_op_leaf &&
1121 (error = zio_wait(vdev_probe(vd, NULL))) != 0) {
1122 vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
1123 VDEV_AUX_IO_FAILURE);
1124 return (error);
1125 }
1126
1127 /*
1116 * If this is a top-level vdev, compute the raidz-deflation
1117 * ratio. Note, we hard-code in 128k (1<<17) because it is the
1118 * current "typical" blocksize. Even if SPA_MAXBLOCKSIZE
1119 * changes, this algorithm must never change, or we will
1120 * inconsistently account for existing bp's.
1121 */
1122 if (vd->vdev_top == vd) {
1123 vd->vdev_deflate_ratio = (1<<17) /
1124 (vdev_psize_to_asize(vd, 1<<17) >> SPA_MINBLOCKSHIFT);
1125 }
1126
1127 /*
1128 * If a leaf vdev has a DTL, and seems healthy, then kick off a
1129 * resilver. But don't do this if we are doing a reopen for a scrub,
1130 * since this would just restart the scrub we are already doing.
1131 */
1132 if (vd->vdev_ops->vdev_op_leaf && !spa->spa_scrub_reopen &&
1133 vdev_resilver_needed(vd, NULL, NULL))
1134 spa_async_request(spa, SPA_ASYNC_RESILVER);
1135

--- 796 unchanged lines hidden (view full) ---

1932 vd->vdev_unspare = B_TRUE;
1933
1934 return (spa_vdev_state_exit(spa, vd, 0));
1935}
1936
1937int
1938vdev_offline(spa_t *spa, uint64_t guid, uint64_t flags)
1939{
1128 * If a leaf vdev has a DTL, and seems healthy, then kick off a
1129 * resilver. But don't do this if we are doing a reopen for a scrub,
1130 * since this would just restart the scrub we are already doing.
1131 */
1132 if (vd->vdev_ops->vdev_op_leaf && !spa->spa_scrub_reopen &&
1133 vdev_resilver_needed(vd, NULL, NULL))
1134 spa_async_request(spa, SPA_ASYNC_RESILVER);
1135

--- 796 unchanged lines hidden (view full) ---

1932 vd->vdev_unspare = B_TRUE;
1933
1934 return (spa_vdev_state_exit(spa, vd, 0));
1935}
1936
1937int
1938vdev_offline(spa_t *spa, uint64_t guid, uint64_t flags)
1939{
1940 vdev_t *vd;
1940 vdev_t *vd, *tvd;
1941 int error;
1941
1942 spa_vdev_state_enter(spa);
1943
1944 if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL)
1945 return (spa_vdev_state_exit(spa, NULL, ENODEV));
1946
1947 if (!vd->vdev_ops->vdev_op_leaf)
1948 return (spa_vdev_state_exit(spa, NULL, ENOTSUP));
1949
1942
1943 spa_vdev_state_enter(spa);
1944
1945 if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL)
1946 return (spa_vdev_state_exit(spa, NULL, ENODEV));
1947
1948 if (!vd->vdev_ops->vdev_op_leaf)
1949 return (spa_vdev_state_exit(spa, NULL, ENOTSUP));
1950
1951 tvd = vd->vdev_top;
1952
1950 /*
1951 * If the device isn't already offline, try to offline it.
1952 */
1953 if (!vd->vdev_offline) {
1954 /*
1955 * If this device has the only valid copy of some data,
1953 /*
1954 * If the device isn't already offline, try to offline it.
1955 */
1956 if (!vd->vdev_offline) {
1957 /*
1958 * If this device has the only valid copy of some data,
1956 * don't allow it to be offlined.
1959 * don't allow it to be offlined. Log devices are always
1960 * expendable.
1957 */
1961 */
1958 if (vd->vdev_aux == NULL && vdev_dtl_required(vd))
1962 if (!tvd->vdev_islog && vd->vdev_aux == NULL &&
1963 vdev_dtl_required(vd))
1959 return (spa_vdev_state_exit(spa, NULL, EBUSY));
1960
1961 /*
1962 * Offline this device and reopen its top-level vdev.
1964 return (spa_vdev_state_exit(spa, NULL, EBUSY));
1965
1966 /*
1967 * Offline this device and reopen its top-level vdev.
1963 * If this action results in the top-level vdev becoming
1964 * unusable, undo it and fail the request.
1968 * If the top-level vdev is a log device then just offline
1969 * it. Otherwise, if this action results in the top-level
1970 * vdev becoming unusable, undo it and fail the request.
1965 */
1966 vd->vdev_offline = B_TRUE;
1971 */
1972 vd->vdev_offline = B_TRUE;
1967 vdev_reopen(vd->vdev_top);
1968 if (vd->vdev_aux == NULL && vdev_is_dead(vd->vdev_top)) {
1973 vdev_reopen(tvd);
1974
1975 if (!tvd->vdev_islog && vd->vdev_aux == NULL &&
1976 vdev_is_dead(tvd)) {
1969 vd->vdev_offline = B_FALSE;
1977 vd->vdev_offline = B_FALSE;
1970 vdev_reopen(vd->vdev_top);
1978 vdev_reopen(tvd);
1971 return (spa_vdev_state_exit(spa, NULL, EBUSY));
1972 }
1973 }
1974
1975 vd->vdev_tmpoffline = !!(flags & ZFS_OFFLINE_TEMPORARY);
1976
1979 return (spa_vdev_state_exit(spa, NULL, EBUSY));
1980 }
1981 }
1982
1983 vd->vdev_tmpoffline = !!(flags & ZFS_OFFLINE_TEMPORARY);
1984
1977 return (spa_vdev_state_exit(spa, vd, 0));
1985 if (!tvd->vdev_islog || !vdev_is_dead(tvd))
1986 return (spa_vdev_state_exit(spa, vd, 0));
1987
1988 (void) spa_vdev_state_exit(spa, vd, 0);
1989
1990 error = dmu_objset_find(spa_name(spa), zil_vdev_offline,
1991 NULL, DS_FIND_CHILDREN);
1992 if (error) {
1993 (void) vdev_online(spa, guid, 0, NULL);
1994 return (error);
1995 }
1996 /*
1997 * If we successfully offlined the log device then we need to
1998 * sync out the current txg so that the "stubby" block can be
1999 * removed by zil_sync().
2000 */
2001 txg_wait_synced(spa->spa_dsl_pool, 0);
2002 return (0);
1978}
1979
1980/*
1981 * Clear the error counts associated with this vdev. Unlike vdev_online() and
1982 * vdev_offline(), we assume the spa config is locked. We also clear all
1983 * children. If 'vd' is NULL, then the user wants to clear all vdevs.
1984 */
1985void

--- 288 unchanged lines hidden (view full) ---

2274
2275 /*
2276 * Apply the inverse of the psize-to-asize (ie. RAID-Z) space-expansion
2277 * factor. We must calculate this here and not at the root vdev
2278 * because the root vdev's psize-to-asize is simply the max of its
2279 * childrens', thus not accurate enough for us.
2280 */
2281 ASSERT((dspace_delta & (SPA_MINBLOCKSIZE-1)) == 0);
2003}
2004
2005/*
2006 * Clear the error counts associated with this vdev. Unlike vdev_online() and
2007 * vdev_offline(), we assume the spa config is locked. We also clear all
2008 * children. If 'vd' is NULL, then the user wants to clear all vdevs.
2009 */
2010void

--- 288 unchanged lines hidden (view full) ---

2299
2300 /*
2301 * Apply the inverse of the psize-to-asize (ie. RAID-Z) space-expansion
2302 * factor. We must calculate this here and not at the root vdev
2303 * because the root vdev's psize-to-asize is simply the max of its
2304 * childrens', thus not accurate enough for us.
2305 */
2306 ASSERT((dspace_delta & (SPA_MINBLOCKSIZE-1)) == 0);
2307 ASSERT(vd->vdev_deflate_ratio != 0 || vd->vdev_isl2cache);
2282 dspace_delta = (dspace_delta >> SPA_MINBLOCKSHIFT) *
2283 vd->vdev_deflate_ratio;
2284
2285 mutex_enter(&vd->vdev_stat_lock);
2286 vd->vdev_stat.vs_space += space_delta;
2287 vd->vdev_stat.vs_alloc += alloc_delta;
2288 vd->vdev_stat.vs_dspace += dspace_delta;
2289 mutex_exit(&vd->vdev_stat_lock);

--- 336 unchanged lines hidden (view full) ---

2626 * For FreeBSD, we can boot from any configuration. There is a
2627 * limitation that the boot filesystem must be either uncompressed or
2628 * compresses with lzjb compression but I'm not sure how to enforce
2629 * that here.
2630 */
2631boolean_t
2632vdev_is_bootable(vdev_t *vd)
2633{
2308 dspace_delta = (dspace_delta >> SPA_MINBLOCKSHIFT) *
2309 vd->vdev_deflate_ratio;
2310
2311 mutex_enter(&vd->vdev_stat_lock);
2312 vd->vdev_stat.vs_space += space_delta;
2313 vd->vdev_stat.vs_alloc += alloc_delta;
2314 vd->vdev_stat.vs_dspace += dspace_delta;
2315 mutex_exit(&vd->vdev_stat_lock);

--- 336 unchanged lines hidden (view full) ---

2652 * For FreeBSD, we can boot from any configuration. There is a
2653 * limitation that the boot filesystem must be either uncompressed or
2654 * compresses with lzjb compression but I'm not sure how to enforce
2655 * that here.
2656 */
2657boolean_t
2658vdev_is_bootable(vdev_t *vd)
2659{
2634#ifdef __FreeBSD_version
2635 return (B_TRUE);
2636#else
2637 int c;
2638
2660#ifdef sun
2639 if (!vd->vdev_ops->vdev_op_leaf) {
2640 char *vdev_type = vd->vdev_ops->vdev_op_type;
2641
2642 if (strcmp(vdev_type, VDEV_TYPE_ROOT) == 0 &&
2643 vd->vdev_children > 1) {
2644 return (B_FALSE);
2645 } else if (strcmp(vdev_type, VDEV_TYPE_RAIDZ) == 0 ||
2646 strcmp(vdev_type, VDEV_TYPE_MISSING) == 0) {
2647 return (B_FALSE);
2648 }
2649 } else if (vd->vdev_wholedisk == 1) {
2650 return (B_FALSE);
2651 }
2652
2653 for (c = 0; c < vd->vdev_children; c++) {
2654 if (!vdev_is_bootable(vd->vdev_child[c]))
2655 return (B_FALSE);
2656 }
2661 if (!vd->vdev_ops->vdev_op_leaf) {
2662 char *vdev_type = vd->vdev_ops->vdev_op_type;
2663
2664 if (strcmp(vdev_type, VDEV_TYPE_ROOT) == 0 &&
2665 vd->vdev_children > 1) {
2666 return (B_FALSE);
2667 } else if (strcmp(vdev_type, VDEV_TYPE_RAIDZ) == 0 ||
2668 strcmp(vdev_type, VDEV_TYPE_MISSING) == 0) {
2669 return (B_FALSE);
2670 }
2671 } else if (vd->vdev_wholedisk == 1) {
2672 return (B_FALSE);
2673 }
2674
2675 for (c = 0; c < vd->vdev_children; c++) {
2676 if (!vdev_is_bootable(vd->vdev_child[c]))
2677 return (B_FALSE);
2678 }
2679#endif /* sun */
2657 return (B_TRUE);
2680 return (B_TRUE);
2658#endif
2659}
2681}
2682
2683void
2684vdev_load_log_state(vdev_t *vd, nvlist_t *nv)
2685{
2686 uint_t c, children;
2687 nvlist_t **child;
2688 uint64_t val;
2689 spa_t *spa = vd->vdev_spa;
2690
2691 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
2692 &child, &children) == 0) {
2693 for (c = 0; c < children; c++)
2694 vdev_load_log_state(vd->vdev_child[c], child[c]);
2695 }
2696
2697 if (vd->vdev_ops->vdev_op_leaf && nvlist_lookup_uint64(nv,
2698 ZPOOL_CONFIG_OFFLINE, &val) == 0 && val) {
2699
2700 /*
2701 * It would be nice to call vdev_offline()
2702 * directly but the pool isn't fully loaded and
2703 * the txg threads have not been started yet.
2704 */
2705 spa_config_enter(spa, SCL_STATE_ALL, FTAG, RW_WRITER);
2706 vd->vdev_offline = val;
2707 vdev_reopen(vd->vdev_top);
2708 spa_config_exit(spa, SCL_STATE_ALL, FTAG);
2709 }
2710}