vdev.c (211931) | vdev.c (213197) |
---|---|
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE --- 25 unchanged lines hidden (view full) --- 34#include <sys/uberblock_impl.h> 35#include <sys/metaslab.h> 36#include <sys/metaslab_impl.h> 37#include <sys/space_map.h> 38#include <sys/zio.h> 39#include <sys/zap.h> 40#include <sys/fs/zfs.h> 41#include <sys/arc.h> | 1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE --- 25 unchanged lines hidden (view full) --- 34#include <sys/uberblock_impl.h> 35#include <sys/metaslab.h> 36#include <sys/metaslab_impl.h> 37#include <sys/space_map.h> 38#include <sys/zio.h> 39#include <sys/zap.h> 40#include <sys/fs/zfs.h> 41#include <sys/arc.h> |
42#include <sys/zil.h> |
|
42 43SYSCTL_DECL(_vfs_zfs); 44SYSCTL_NODE(_vfs_zfs, OID_AUTO, vdev, CTLFLAG_RW, 0, "ZFS VDEV"); 45 46/* 47 * Virtual device management. 48 */ 49 --- 710 unchanged lines hidden (view full) --- 760 uint64_t oldc = vd->vdev_ms_count; 761 uint64_t newc = vd->vdev_asize >> vd->vdev_ms_shift; 762 metaslab_t **mspp; 763 int error; 764 765 if (vd->vdev_ms_shift == 0) /* not being allocated from yet */ 766 return (0); 767 | 43 44SYSCTL_DECL(_vfs_zfs); 45SYSCTL_NODE(_vfs_zfs, OID_AUTO, vdev, CTLFLAG_RW, 0, "ZFS VDEV"); 46 47/* 48 * Virtual device management. 49 */ 50 --- 710 unchanged lines hidden (view full) --- 761 uint64_t oldc = vd->vdev_ms_count; 762 uint64_t newc = vd->vdev_asize >> vd->vdev_ms_shift; 763 metaslab_t **mspp; 764 int error; 765 766 if (vd->vdev_ms_shift == 0) /* not being allocated from yet */ 767 return (0); 768 |
769 /* 770 * Compute the raidz-deflation ratio. Note, we hard-code 771 * in 128k (1 << 17) because it is the current "typical" blocksize. 772 * Even if SPA_MAXBLOCKSIZE changes, this algorithm must never change, 773 * or we will inconsistently account for existing bp's. 774 */ 775 vd->vdev_deflate_ratio = (1 << 17) / 776 (vdev_psize_to_asize(vd, 1 << 17) >> SPA_MINBLOCKSHIFT); 777 |
|
768 ASSERT(oldc <= newc); 769 770 if (vd->vdev_islog) 771 mc = spa->spa_log_class; 772 else 773 mc = spa->spa_normal_class; 774 775 if (vd->vdev_mg == NULL) --- 217 unchanged lines hidden (view full) --- 993 994 ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL); 995 996 ASSERT(vd->vdev_state == VDEV_STATE_CLOSED || 997 vd->vdev_state == VDEV_STATE_CANT_OPEN || 998 vd->vdev_state == VDEV_STATE_OFFLINE); 999 1000 vd->vdev_stat.vs_aux = VDEV_AUX_NONE; | 778 ASSERT(oldc <= newc); 779 780 if (vd->vdev_islog) 781 mc = spa->spa_log_class; 782 else 783 mc = spa->spa_normal_class; 784 785 if (vd->vdev_mg == NULL) --- 217 unchanged lines hidden (view full) --- 1003 1004 ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL); 1005 1006 ASSERT(vd->vdev_state == VDEV_STATE_CLOSED || 1007 vd->vdev_state == VDEV_STATE_CANT_OPEN || 1008 vd->vdev_state == VDEV_STATE_OFFLINE); 1009 1010 vd->vdev_stat.vs_aux = VDEV_AUX_NONE; |
1011 vd->vdev_cant_read = B_FALSE; 1012 vd->vdev_cant_write = B_FALSE; |
|
1001 1002 if (!vd->vdev_removed && vd->vdev_faulted) { 1003 ASSERT(vd->vdev_children == 0); 1004 vdev_set_state(vd, B_TRUE, VDEV_STATE_FAULTED, 1005 VDEV_AUX_ERR_EXCEEDED); 1006 return (ENXIO); 1007 } else if (vd->vdev_offline) { 1008 ASSERT(vd->vdev_children == 0); --- 99 unchanged lines hidden (view full) --- 1108 if (vd->vdev_ops->vdev_op_leaf && 1109 (error = zio_wait(vdev_probe(vd, NULL))) != 0) { 1110 vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, 1111 VDEV_AUX_IO_FAILURE); 1112 return (error); 1113 } 1114 1115 /* | 1013 1014 if (!vd->vdev_removed && vd->vdev_faulted) { 1015 ASSERT(vd->vdev_children == 0); 1016 vdev_set_state(vd, B_TRUE, VDEV_STATE_FAULTED, 1017 VDEV_AUX_ERR_EXCEEDED); 1018 return (ENXIO); 1019 } else if (vd->vdev_offline) { 1020 ASSERT(vd->vdev_children == 0); --- 99 unchanged lines hidden (view full) --- 1120 if (vd->vdev_ops->vdev_op_leaf && 1121 (error = zio_wait(vdev_probe(vd, NULL))) != 0) { 1122 vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, 1123 VDEV_AUX_IO_FAILURE); 1124 return (error); 1125 } 1126 1127 /* |
1116 * If this is a top-level vdev, compute the raidz-deflation 1117 * ratio. Note, we hard-code in 128k (1<<17) because it is the 1118 * current "typical" blocksize. Even if SPA_MAXBLOCKSIZE 1119 * changes, this algorithm must never change, or we will 1120 * inconsistently account for existing bp's. 1121 */ 1122 if (vd->vdev_top == vd) { 1123 vd->vdev_deflate_ratio = (1<<17) / 1124 (vdev_psize_to_asize(vd, 1<<17) >> SPA_MINBLOCKSHIFT); 1125 } 1126 1127 /* | |
1128 * If a leaf vdev has a DTL, and seems healthy, then kick off a 1129 * resilver. But don't do this if we are doing a reopen for a scrub, 1130 * since this would just restart the scrub we are already doing. 1131 */ 1132 if (vd->vdev_ops->vdev_op_leaf && !spa->spa_scrub_reopen && 1133 vdev_resilver_needed(vd, NULL, NULL)) 1134 spa_async_request(spa, SPA_ASYNC_RESILVER); 1135 --- 796 unchanged lines hidden (view full) --- 1932 vd->vdev_unspare = B_TRUE; 1933 1934 return (spa_vdev_state_exit(spa, vd, 0)); 1935} 1936 1937int 1938vdev_offline(spa_t *spa, uint64_t guid, uint64_t flags) 1939{ | 1128 * If a leaf vdev has a DTL, and seems healthy, then kick off a 1129 * resilver. But don't do this if we are doing a reopen for a scrub, 1130 * since this would just restart the scrub we are already doing. 1131 */ 1132 if (vd->vdev_ops->vdev_op_leaf && !spa->spa_scrub_reopen && 1133 vdev_resilver_needed(vd, NULL, NULL)) 1134 spa_async_request(spa, SPA_ASYNC_RESILVER); 1135 --- 796 unchanged lines hidden (view full) --- 1932 vd->vdev_unspare = B_TRUE; 1933 1934 return (spa_vdev_state_exit(spa, vd, 0)); 1935} 1936 1937int 1938vdev_offline(spa_t *spa, uint64_t guid, uint64_t flags) 1939{ |
1940 vdev_t *vd; | 1940 vdev_t *vd, *tvd; 1941 int error; |
1941 1942 spa_vdev_state_enter(spa); 1943 1944 if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL) 1945 return (spa_vdev_state_exit(spa, NULL, ENODEV)); 1946 1947 if (!vd->vdev_ops->vdev_op_leaf) 1948 return (spa_vdev_state_exit(spa, NULL, ENOTSUP)); 1949 | 1942 1943 spa_vdev_state_enter(spa); 1944 1945 if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL) 1946 return (spa_vdev_state_exit(spa, NULL, ENODEV)); 1947 1948 if (!vd->vdev_ops->vdev_op_leaf) 1949 return (spa_vdev_state_exit(spa, NULL, ENOTSUP)); 1950 |
1951 tvd = vd->vdev_top; 1952 |
|
1950 /* 1951 * If the device isn't already offline, try to offline it. 1952 */ 1953 if (!vd->vdev_offline) { 1954 /* 1955 * If this device has the only valid copy of some data, | 1953 /* 1954 * If the device isn't already offline, try to offline it. 1955 */ 1956 if (!vd->vdev_offline) { 1957 /* 1958 * If this device has the only valid copy of some data, |
1956 * don't allow it to be offlined. | 1959 * don't allow it to be offlined. Log devices are always 1960 * expendable. |
1957 */ | 1961 */ |
1958 if (vd->vdev_aux == NULL && vdev_dtl_required(vd)) | 1962 if (!tvd->vdev_islog && vd->vdev_aux == NULL && 1963 vdev_dtl_required(vd)) |
1959 return (spa_vdev_state_exit(spa, NULL, EBUSY)); 1960 1961 /* 1962 * Offline this device and reopen its top-level vdev. | 1964 return (spa_vdev_state_exit(spa, NULL, EBUSY)); 1965 1966 /* 1967 * Offline this device and reopen its top-level vdev. |
1963 * If this action results in the top-level vdev becoming 1964 * unusable, undo it and fail the request. | 1968 * If the top-level vdev is a log device then just offline 1969 * it. Otherwise, if this action results in the top-level 1970 * vdev becoming unusable, undo it and fail the request. |
1965 */ 1966 vd->vdev_offline = B_TRUE; | 1971 */ 1972 vd->vdev_offline = B_TRUE; |
1967 vdev_reopen(vd->vdev_top); 1968 if (vd->vdev_aux == NULL && vdev_is_dead(vd->vdev_top)) { | 1973 vdev_reopen(tvd); 1974 1975 if (!tvd->vdev_islog && vd->vdev_aux == NULL && 1976 vdev_is_dead(tvd)) { |
1969 vd->vdev_offline = B_FALSE; | 1977 vd->vdev_offline = B_FALSE; |
1970 vdev_reopen(vd->vdev_top); | 1978 vdev_reopen(tvd); |
1971 return (spa_vdev_state_exit(spa, NULL, EBUSY)); 1972 } 1973 } 1974 1975 vd->vdev_tmpoffline = !!(flags & ZFS_OFFLINE_TEMPORARY); 1976 | 1979 return (spa_vdev_state_exit(spa, NULL, EBUSY)); 1980 } 1981 } 1982 1983 vd->vdev_tmpoffline = !!(flags & ZFS_OFFLINE_TEMPORARY); 1984 |
1977 return (spa_vdev_state_exit(spa, vd, 0)); | 1985 if (!tvd->vdev_islog || !vdev_is_dead(tvd)) 1986 return (spa_vdev_state_exit(spa, vd, 0)); 1987 1988 (void) spa_vdev_state_exit(spa, vd, 0); 1989 1990 error = dmu_objset_find(spa_name(spa), zil_vdev_offline, 1991 NULL, DS_FIND_CHILDREN); 1992 if (error) { 1993 (void) vdev_online(spa, guid, 0, NULL); 1994 return (error); 1995 } 1996 /* 1997 * If we successfully offlined the log device then we need to 1998 * sync out the current txg so that the "stubby" block can be 1999 * removed by zil_sync(). 2000 */ 2001 txg_wait_synced(spa->spa_dsl_pool, 0); 2002 return (0); |
1978} 1979 1980/* 1981 * Clear the error counts associated with this vdev. Unlike vdev_online() and 1982 * vdev_offline(), we assume the spa config is locked. We also clear all 1983 * children. If 'vd' is NULL, then the user wants to clear all vdevs. 1984 */ 1985void --- 288 unchanged lines hidden (view full) --- 2274 2275 /* 2276 * Apply the inverse of the psize-to-asize (ie. RAID-Z) space-expansion 2277 * factor. We must calculate this here and not at the root vdev 2278 * because the root vdev's psize-to-asize is simply the max of its 2279 * childrens', thus not accurate enough for us. 2280 */ 2281 ASSERT((dspace_delta & (SPA_MINBLOCKSIZE-1)) == 0); | 2003} 2004 2005/* 2006 * Clear the error counts associated with this vdev. Unlike vdev_online() and 2007 * vdev_offline(), we assume the spa config is locked. We also clear all 2008 * children. If 'vd' is NULL, then the user wants to clear all vdevs. 2009 */ 2010void --- 288 unchanged lines hidden (view full) --- 2299 2300 /* 2301 * Apply the inverse of the psize-to-asize (ie. RAID-Z) space-expansion 2302 * factor. We must calculate this here and not at the root vdev 2303 * because the root vdev's psize-to-asize is simply the max of its 2304 * childrens', thus not accurate enough for us. 2305 */ 2306 ASSERT((dspace_delta & (SPA_MINBLOCKSIZE-1)) == 0); |
2307 ASSERT(vd->vdev_deflate_ratio != 0 || vd->vdev_isl2cache); |
|
2282 dspace_delta = (dspace_delta >> SPA_MINBLOCKSHIFT) * 2283 vd->vdev_deflate_ratio; 2284 2285 mutex_enter(&vd->vdev_stat_lock); 2286 vd->vdev_stat.vs_space += space_delta; 2287 vd->vdev_stat.vs_alloc += alloc_delta; 2288 vd->vdev_stat.vs_dspace += dspace_delta; 2289 mutex_exit(&vd->vdev_stat_lock); --- 336 unchanged lines hidden (view full) --- 2626 * For FreeBSD, we can boot from any configuration. There is a 2627 * limitation that the boot filesystem must be either uncompressed or 2628 * compresses with lzjb compression but I'm not sure how to enforce 2629 * that here. 2630 */ 2631boolean_t 2632vdev_is_bootable(vdev_t *vd) 2633{ | 2308 dspace_delta = (dspace_delta >> SPA_MINBLOCKSHIFT) * 2309 vd->vdev_deflate_ratio; 2310 2311 mutex_enter(&vd->vdev_stat_lock); 2312 vd->vdev_stat.vs_space += space_delta; 2313 vd->vdev_stat.vs_alloc += alloc_delta; 2314 vd->vdev_stat.vs_dspace += dspace_delta; 2315 mutex_exit(&vd->vdev_stat_lock); --- 336 unchanged lines hidden (view full) --- 2652 * For FreeBSD, we can boot from any configuration. There is a 2653 * limitation that the boot filesystem must be either uncompressed or 2654 * compresses with lzjb compression but I'm not sure how to enforce 2655 * that here. 2656 */ 2657boolean_t 2658vdev_is_bootable(vdev_t *vd) 2659{ |
2634#ifdef __FreeBSD_version 2635 return (B_TRUE); 2636#else 2637 int c; 2638 | 2660#ifdef sun |
2639 if (!vd->vdev_ops->vdev_op_leaf) { 2640 char *vdev_type = vd->vdev_ops->vdev_op_type; 2641 2642 if (strcmp(vdev_type, VDEV_TYPE_ROOT) == 0 && 2643 vd->vdev_children > 1) { 2644 return (B_FALSE); 2645 } else if (strcmp(vdev_type, VDEV_TYPE_RAIDZ) == 0 || 2646 strcmp(vdev_type, VDEV_TYPE_MISSING) == 0) { 2647 return (B_FALSE); 2648 } 2649 } else if (vd->vdev_wholedisk == 1) { 2650 return (B_FALSE); 2651 } 2652 2653 for (c = 0; c < vd->vdev_children; c++) { 2654 if (!vdev_is_bootable(vd->vdev_child[c])) 2655 return (B_FALSE); 2656 } | 2661 if (!vd->vdev_ops->vdev_op_leaf) { 2662 char *vdev_type = vd->vdev_ops->vdev_op_type; 2663 2664 if (strcmp(vdev_type, VDEV_TYPE_ROOT) == 0 && 2665 vd->vdev_children > 1) { 2666 return (B_FALSE); 2667 } else if (strcmp(vdev_type, VDEV_TYPE_RAIDZ) == 0 || 2668 strcmp(vdev_type, VDEV_TYPE_MISSING) == 0) { 2669 return (B_FALSE); 2670 } 2671 } else if (vd->vdev_wholedisk == 1) { 2672 return (B_FALSE); 2673 } 2674 2675 for (c = 0; c < vd->vdev_children; c++) { 2676 if (!vdev_is_bootable(vd->vdev_child[c])) 2677 return (B_FALSE); 2678 } |
2679#endif /* sun */ |
|
2657 return (B_TRUE); | 2680 return (B_TRUE); |
2658#endif | |
2659} | 2681} |
2682 2683void 2684vdev_load_log_state(vdev_t *vd, nvlist_t *nv) 2685{ 2686 uint_t c, children; 2687 nvlist_t **child; 2688 uint64_t val; 2689 spa_t *spa = vd->vdev_spa; 2690 2691 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 2692 &child, &children) == 0) { 2693 for (c = 0; c < children; c++) 2694 vdev_load_log_state(vd->vdev_child[c], child[c]); 2695 } 2696 2697 if (vd->vdev_ops->vdev_op_leaf && nvlist_lookup_uint64(nv, 2698 ZPOOL_CONFIG_OFFLINE, &val) == 0 && val) { 2699 2700 /* 2701 * It would be nice to call vdev_offline() 2702 * directly but the pool isn't fully loaded and 2703 * the txg threads have not been started yet. 2704 */ 2705 spa_config_enter(spa, SCL_STATE_ALL, FTAG, RW_WRITER); 2706 vd->vdev_offline = val; 2707 vdev_reopen(vd->vdev_top); 2708 spa_config_exit(spa, SCL_STATE_ALL, FTAG); 2709 } 2710} |
|