Cross Reference: /freebsd-11.0-release/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa

Deleted Added

sdiff udiff text old ( 243503 ) new ( 247265 )

full compact

spa_misc.c (243503)	spa_misc.c (247265)
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 / 21/ 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2012 by Delphix. All rights reserved. 24 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.	1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 / 21/ 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2012 by Delphix. All rights reserved. 24 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
	25 * Copyright 2013 Martin Matuska <mm@FreeBSD.org>. All rights reserved.
25 */ 26 27#include <sys/zfs_context.h> 28#include <sys/spa_impl.h>	26 */ 27 28#include <sys/zfs_context.h> 29#include <sys/spa_impl.h>
	30#include <sys/spa_boot.h>
29#include <sys/zio.h> 30#include <sys/zio_checksum.h> 31#include <sys/zio_compress.h> 32#include <sys/dmu.h> 33#include <sys/dmu_tx.h> 34#include <sys/zap.h> 35#include <sys/zil.h> 36#include <sys/vdev_impl.h> 37#include <sys/metaslab.h> 38#include <sys/uberblock_impl.h> 39#include <sys/txg.h> 40#include <sys/avl.h> 41#include <sys/unique.h> 42#include <sys/dsl_pool.h> 43#include <sys/dsl_dir.h> 44#include <sys/dsl_prop.h> 45#include <sys/dsl_scan.h> 46#include <sys/fs/zfs.h> 47#include <sys/metaslab_impl.h> 48#include <sys/arc.h> 49#include <sys/ddt.h> 50#include "zfs_prop.h" 51#include "zfeature_common.h" 52 53/* 54 * SPA locking 55 * 56 * There are four basic locks for managing spa_t structures: 57 * 58 * spa_namespace_lock (global mutex) 59 * 60 * This lock must be acquired to do any of the following: 61 * 62 * - Lookup a spa_t by name 63 * - Add or remove a spa_t from the namespace 64 * - Increase spa_refcount from non-zero 65 * - Check if spa_refcount is zero 66 * - Rename a spa_t 67 * - add/remove/attach/detach devices 68 * - Held for the duration of create/destroy/import/export 69 * 70 * It does not need to handle recursion. A create or destroy may 71 * reference objects (files or zvols) in other pools, but by 72 * definition they must have an existing reference, and will never need 73 * to lookup a spa_t by name. 74 * 75 * spa_refcount (per-spa refcount_t protected by mutex) 76 * 77 * This reference count keep track of any active users of the spa_t. The 78 * spa_t cannot be destroyed or freed while this is non-zero. Internally, 79 * the refcount is never really 'zero' - opening a pool implicitly keeps 80 * some references in the DMU. Internally we check against spa_minref, but 81 * present the image of a zero/non-zero value to consumers. 82 * 83 * spa_config_lock[] (per-spa array of rwlocks) 84 * 85 * This protects the spa_t from config changes, and must be held in 86 * the following circumstances: 87 * 88 * - RW_READER to perform I/O to the spa 89 * - RW_WRITER to change the vdev config 90 * 91 * The locking order is fairly straightforward: 92 * 93 * spa_namespace_lock -> spa_refcount 94 * 95 * The namespace lock must be acquired to increase the refcount from 0 96 * or to check if it is zero. 97 * 98 * spa_refcount -> spa_config_lock[] 99 * 100 * There must be at least one valid reference on the spa_t to acquire 101 * the config lock. 102 * 103 * spa_namespace_lock -> spa_config_lock[] 104 * 105 * The namespace lock must always be taken before the config lock. 106 * 107 * 108 * The spa_namespace_lock can be acquired directly and is globally visible. 109 * 110 * The namespace is manipulated using the following functions, all of which 111 * require the spa_namespace_lock to be held. 112 * 113 * spa_lookup() Lookup a spa_t by name. 114 * 115 * spa_add() Create a new spa_t in the namespace. 116 * 117 * spa_remove() Remove a spa_t from the namespace. This also 118 * frees up any memory associated with the spa_t. 119 * 120 * spa_next() Returns the next spa_t in the system, or the 121 * first if NULL is passed. 122 * 123 * spa_evict_all() Shutdown and remove all spa_t structures in 124 * the system. 125 * 126 * spa_guid_exists() Determine whether a pool/device guid exists. 127 * 128 * The spa_refcount is manipulated using the following functions: 129 * 130 * spa_open_ref() Adds a reference to the given spa_t. Must be 131 * called with spa_namespace_lock held if the 132 * refcount is currently zero. 133 * 134 * spa_close() Remove a reference from the spa_t. This will 135 * not free the spa_t or remove it from the 136 * namespace. No locking is required. 137 * 138 * spa_refcount_zero() Returns true if the refcount is currently 139 * zero. Must be called with spa_namespace_lock 140 * held. 141 * 142 * The spa_config_lock[] is an array of rwlocks, ordered as follows: 143 * SCL_CONFIG > SCL_STATE > SCL_ALLOC > SCL_ZIO > SCL_FREE > SCL_VDEV. 144 * spa_config_lock[] is manipulated with spa_config_{enter,exit,held}(). 145 * 146 * To read the configuration, it suffices to hold one of these locks as reader. 147 * To modify the configuration, you must hold all locks as writer. To modify 148 * vdev state without altering the vdev tree's topology (e.g. online/offline), 149 * you must hold SCL_STATE and SCL_ZIO as writer. 150 * 151 * We use these distinct config locks to avoid recursive lock entry. 152 * For example, spa_sync() (which holds SCL_CONFIG as reader) induces 153 * block allocations (SCL_ALLOC), which may require reading space maps 154 * from disk (dmu_read() -> zio_read() -> SCL_ZIO). 155 * 156 * The spa config locks cannot be normal rwlocks because we need the 157 * ability to hand off ownership. For example, SCL_ZIO is acquired 158 * by the issuing thread and later released by an interrupt thread. 159 * They do, however, obey the usual write-wanted semantics to prevent 160 * writer (i.e. system administrator) starvation. 161 * 162 * The lock acquisition rules are as follows: 163 * 164 * SCL_CONFIG 165 * Protects changes to the vdev tree topology, such as vdev 166 * add/remove/attach/detach. Protects the dirty config list 167 * (spa_config_dirty_list) and the set of spares and l2arc devices. 168 * 169 * SCL_STATE 170 * Protects changes to pool state and vdev state, such as vdev 171 * online/offline/fault/degrade/clear. Protects the dirty state list 172 * (spa_state_dirty_list) and global pool state (spa_state). 173 * 174 * SCL_ALLOC 175 * Protects changes to metaslab groups and classes. 176 * Held as reader by metaslab_alloc() and metaslab_claim(). 177 * 178 * SCL_ZIO 179 * Held by bp-level zios (those which have no io_vd upon entry) 180 * to prevent changes to the vdev tree. The bp-level zio implicitly 181 * protects all of its vdev child zios, which do not hold SCL_ZIO. 182 * 183 * SCL_FREE 184 * Protects changes to metaslab groups and classes. 185 * Held as reader by metaslab_free(). SCL_FREE is distinct from 186 * SCL_ALLOC, and lower than SCL_ZIO, so that we can safely free 187 * blocks in zio_done() while another i/o that holds either 188 * SCL_ALLOC or SCL_ZIO is waiting for this i/o to complete. 189 * 190 * SCL_VDEV 191 * Held as reader to prevent changes to the vdev tree during trivial 192 * inquiries such as bp_get_dsize(). SCL_VDEV is distinct from the 193 * other locks, and lower than all of them, to ensure that it's safe 194 * to acquire regardless of caller context. 195 * 196 * In addition, the following rules apply: 197 * 198 * (a) spa_props_lock protects pool properties, spa_config and spa_config_list. 199 * The lock ordering is SCL_CONFIG > spa_props_lock. 200 * 201 * (b) I/O operations on leaf vdevs. For any zio operation that takes 202 * an explicit vdev_t argument -- such as zio_ioctl(), zio_read_phys(), 203 * or zio_write_phys() -- the caller must ensure that the config cannot 204 * cannot change in the interim, and that the vdev cannot be reopened. 205 * SCL_STATE as reader suffices for both. 206 * 207 * The vdev configuration is protected by spa_vdev_enter() / spa_vdev_exit(). 208 * 209 * spa_vdev_enter() Acquire the namespace lock and the config lock 210 * for writing. 211 * 212 * spa_vdev_exit() Release the config lock, wait for all I/O 213 * to complete, sync the updated configs to the 214 * cache, and release the namespace lock. 215 * 216 * vdev state is protected by spa_vdev_state_enter() / spa_vdev_state_exit(). 217 * Like spa_vdev_enter/exit, these are convenience wrappers -- the actual 218 * locking is, always, based on spa_namespace_lock and spa_config_lock[]. 219 * 220 * spa_rename() is also implemented within this file since it requires 221 * manipulation of the namespace. 222 / 223* 224static avl_tree_t spa_namespace_avl; 225kmutex_t spa_namespace_lock; 226static kcondvar_t spa_namespace_cv; 227static int spa_active_count; 228int spa_max_replication_override = SPA_DVAS_PER_BP; 229 230static kmutex_t spa_spare_lock; 231static avl_tree_t spa_spare_avl; 232static kmutex_t spa_l2cache_lock; 233static avl_tree_t spa_l2cache_avl; 234 235kmem_cache_t spa_buffer_pool; 236int spa_mode_global; 237* 238#ifdef ZFS_DEBUG 239/* Everything except dprintf is on by default in debug builds / 240int zfs_flags = ~ZFS_DEBUG_DPRINTF; 241#else 242int zfs_flags = 0; 243#endif 244* 245/* 246 * zfs_recover can be set to nonzero to attempt to recover from 247 * otherwise-fatal errors, typically caused by on-disk corruption. When 248 * set, calls to zfs_panic_recover() will turn into warning messages. 249 / 250int zfs_recover = 0; 251SYSCTL_DECL(_vfs_zfs); 252TUNABLE_INT("vfs.zfs.recover", &zfs_recover); 253SYSCTL_INT(_vfs_zfs, OID_AUTO, recover, CTLFLAG_RDTUN, &zfs_recover, 0, 254* "Try to recover from otherwise-fatal errors."); 255	31#include <sys/zio.h> 32#include <sys/zio_checksum.h> 33#include <sys/zio_compress.h> 34#include <sys/dmu.h> 35#include <sys/dmu_tx.h> 36#include <sys/zap.h> 37#include <sys/zil.h> 38#include <sys/vdev_impl.h> 39#include <sys/metaslab.h> 40#include <sys/uberblock_impl.h> 41#include <sys/txg.h> 42#include <sys/avl.h> 43#include <sys/unique.h> 44#include <sys/dsl_pool.h> 45#include <sys/dsl_dir.h> 46#include <sys/dsl_prop.h> 47#include <sys/dsl_scan.h> 48#include <sys/fs/zfs.h> 49#include <sys/metaslab_impl.h> 50#include <sys/arc.h> 51#include <sys/ddt.h> 52#include "zfs_prop.h" 53#include "zfeature_common.h" 54 55/* 56 * SPA locking 57 * 58 * There are four basic locks for managing spa_t structures: 59 * 60 * spa_namespace_lock (global mutex) 61 * 62 * This lock must be acquired to do any of the following: 63 * 64 * - Lookup a spa_t by name 65 * - Add or remove a spa_t from the namespace 66 * - Increase spa_refcount from non-zero 67 * - Check if spa_refcount is zero 68 * - Rename a spa_t 69 * - add/remove/attach/detach devices 70 * - Held for the duration of create/destroy/import/export 71 * 72 * It does not need to handle recursion. A create or destroy may 73 * reference objects (files or zvols) in other pools, but by 74 * definition they must have an existing reference, and will never need 75 * to lookup a spa_t by name. 76 * 77 * spa_refcount (per-spa refcount_t protected by mutex) 78 * 79 * This reference count keep track of any active users of the spa_t. The 80 * spa_t cannot be destroyed or freed while this is non-zero. Internally, 81 * the refcount is never really 'zero' - opening a pool implicitly keeps 82 * some references in the DMU. Internally we check against spa_minref, but 83 * present the image of a zero/non-zero value to consumers. 84 * 85 * spa_config_lock[] (per-spa array of rwlocks) 86 * 87 * This protects the spa_t from config changes, and must be held in 88 * the following circumstances: 89 * 90 * - RW_READER to perform I/O to the spa 91 * - RW_WRITER to change the vdev config 92 * 93 * The locking order is fairly straightforward: 94 * 95 * spa_namespace_lock -> spa_refcount 96 * 97 * The namespace lock must be acquired to increase the refcount from 0 98 * or to check if it is zero. 99 * 100 * spa_refcount -> spa_config_lock[] 101 * 102 * There must be at least one valid reference on the spa_t to acquire 103 * the config lock. 104 * 105 * spa_namespace_lock -> spa_config_lock[] 106 * 107 * The namespace lock must always be taken before the config lock. 108 * 109 * 110 * The spa_namespace_lock can be acquired directly and is globally visible. 111 * 112 * The namespace is manipulated using the following functions, all of which 113 * require the spa_namespace_lock to be held. 114 * 115 * spa_lookup() Lookup a spa_t by name. 116 * 117 * spa_add() Create a new spa_t in the namespace. 118 * 119 * spa_remove() Remove a spa_t from the namespace. This also 120 * frees up any memory associated with the spa_t. 121 * 122 * spa_next() Returns the next spa_t in the system, or the 123 * first if NULL is passed. 124 * 125 * spa_evict_all() Shutdown and remove all spa_t structures in 126 * the system. 127 * 128 * spa_guid_exists() Determine whether a pool/device guid exists. 129 * 130 * The spa_refcount is manipulated using the following functions: 131 * 132 * spa_open_ref() Adds a reference to the given spa_t. Must be 133 * called with spa_namespace_lock held if the 134 * refcount is currently zero. 135 * 136 * spa_close() Remove a reference from the spa_t. This will 137 * not free the spa_t or remove it from the 138 * namespace. No locking is required. 139 * 140 * spa_refcount_zero() Returns true if the refcount is currently 141 * zero. Must be called with spa_namespace_lock 142 * held. 143 * 144 * The spa_config_lock[] is an array of rwlocks, ordered as follows: 145 * SCL_CONFIG > SCL_STATE > SCL_ALLOC > SCL_ZIO > SCL_FREE > SCL_VDEV. 146 * spa_config_lock[] is manipulated with spa_config_{enter,exit,held}(). 147 * 148 * To read the configuration, it suffices to hold one of these locks as reader. 149 * To modify the configuration, you must hold all locks as writer. To modify 150 * vdev state without altering the vdev tree's topology (e.g. online/offline), 151 * you must hold SCL_STATE and SCL_ZIO as writer. 152 * 153 * We use these distinct config locks to avoid recursive lock entry. 154 * For example, spa_sync() (which holds SCL_CONFIG as reader) induces 155 * block allocations (SCL_ALLOC), which may require reading space maps 156 * from disk (dmu_read() -> zio_read() -> SCL_ZIO). 157 * 158 * The spa config locks cannot be normal rwlocks because we need the 159 * ability to hand off ownership. For example, SCL_ZIO is acquired 160 * by the issuing thread and later released by an interrupt thread. 161 * They do, however, obey the usual write-wanted semantics to prevent 162 * writer (i.e. system administrator) starvation. 163 * 164 * The lock acquisition rules are as follows: 165 * 166 * SCL_CONFIG 167 * Protects changes to the vdev tree topology, such as vdev 168 * add/remove/attach/detach. Protects the dirty config list 169 * (spa_config_dirty_list) and the set of spares and l2arc devices. 170 * 171 * SCL_STATE 172 * Protects changes to pool state and vdev state, such as vdev 173 * online/offline/fault/degrade/clear. Protects the dirty state list 174 * (spa_state_dirty_list) and global pool state (spa_state). 175 * 176 * SCL_ALLOC 177 * Protects changes to metaslab groups and classes. 178 * Held as reader by metaslab_alloc() and metaslab_claim(). 179 * 180 * SCL_ZIO 181 * Held by bp-level zios (those which have no io_vd upon entry) 182 * to prevent changes to the vdev tree. The bp-level zio implicitly 183 * protects all of its vdev child zios, which do not hold SCL_ZIO. 184 * 185 * SCL_FREE 186 * Protects changes to metaslab groups and classes. 187 * Held as reader by metaslab_free(). SCL_FREE is distinct from 188 * SCL_ALLOC, and lower than SCL_ZIO, so that we can safely free 189 * blocks in zio_done() while another i/o that holds either 190 * SCL_ALLOC or SCL_ZIO is waiting for this i/o to complete. 191 * 192 * SCL_VDEV 193 * Held as reader to prevent changes to the vdev tree during trivial 194 * inquiries such as bp_get_dsize(). SCL_VDEV is distinct from the 195 * other locks, and lower than all of them, to ensure that it's safe 196 * to acquire regardless of caller context. 197 * 198 * In addition, the following rules apply: 199 * 200 * (a) spa_props_lock protects pool properties, spa_config and spa_config_list. 201 * The lock ordering is SCL_CONFIG > spa_props_lock. 202 * 203 * (b) I/O operations on leaf vdevs. For any zio operation that takes 204 * an explicit vdev_t argument -- such as zio_ioctl(), zio_read_phys(), 205 * or zio_write_phys() -- the caller must ensure that the config cannot 206 * cannot change in the interim, and that the vdev cannot be reopened. 207 * SCL_STATE as reader suffices for both. 208 * 209 * The vdev configuration is protected by spa_vdev_enter() / spa_vdev_exit(). 210 * 211 * spa_vdev_enter() Acquire the namespace lock and the config lock 212 * for writing. 213 * 214 * spa_vdev_exit() Release the config lock, wait for all I/O 215 * to complete, sync the updated configs to the 216 * cache, and release the namespace lock. 217 * 218 * vdev state is protected by spa_vdev_state_enter() / spa_vdev_state_exit(). 219 * Like spa_vdev_enter/exit, these are convenience wrappers -- the actual 220 * locking is, always, based on spa_namespace_lock and spa_config_lock[]. 221 * 222 * spa_rename() is also implemented within this file since it requires 223 * manipulation of the namespace. 224 / 225* 226static avl_tree_t spa_namespace_avl; 227kmutex_t spa_namespace_lock; 228static kcondvar_t spa_namespace_cv; 229static int spa_active_count; 230int spa_max_replication_override = SPA_DVAS_PER_BP; 231 232static kmutex_t spa_spare_lock; 233static avl_tree_t spa_spare_avl; 234static kmutex_t spa_l2cache_lock; 235static avl_tree_t spa_l2cache_avl; 236 237kmem_cache_t spa_buffer_pool; 238int spa_mode_global; 239* 240#ifdef ZFS_DEBUG 241/* Everything except dprintf is on by default in debug builds / 242int zfs_flags = ~ZFS_DEBUG_DPRINTF; 243#else 244int zfs_flags = 0; 245#endif 246* 247/* 248 * zfs_recover can be set to nonzero to attempt to recover from 249 * otherwise-fatal errors, typically caused by on-disk corruption. When 250 * set, calls to zfs_panic_recover() will turn into warning messages. 251 / 252int zfs_recover = 0; 253SYSCTL_DECL(_vfs_zfs); 254TUNABLE_INT("vfs.zfs.recover", &zfs_recover); 255SYSCTL_INT(_vfs_zfs, OID_AUTO, recover, CTLFLAG_RDTUN, &zfs_recover, 0, 256* "Try to recover from otherwise-fatal errors."); 257
	258extern int zfs_txg_synctime_ms;
256 257/*	259 260/*
	261 * Expiration time in units of zfs_txg_synctime_ms. This value has two 262 * meanings. First it is used to determine when the spa_deadman logic 263 * should fire. By default the spa_deadman will fire if spa_sync has 264 * not completed in 1000 * zfs_txg_synctime_ms (i.e. 1000 seconds). 265 * Secondly, the value determines if an I/O is considered "hung". 266 * Any I/O that has not completed in zfs_deadman_synctime is considered 267 * "hung" resulting in a system panic. 268 * 1000 zfs_txg_synctime_ms (i.e. 1000 seconds). 269 / 270uint64_t zfs_deadman_synctime = 1000ULL; 271TUNABLE_QUAD("vfs.zfs.deadman_synctime", &zfs_deadman_synctime); 272SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, deadman_synctime, CTLFLAG_RDTUN, 273* &zfs_deadman_synctime, 0, 274 "Stalled ZFS I/O expiration time in units of vfs.zfs.txg_synctime_ms"); 275 276/* 277 * Default value of -1 for zfs_deadman_enabled is resolved in 278 * zfs_deadman_init() 279 / 280int zfs_deadman_enabled = -1; 281TUNABLE_INT("vfs.zfs.deadman_enabled", &zfs_deadman_enabled); 282SYSCTL_INT(_vfs_zfs, OID_AUTO, deadman_enabled, CTLFLAG_RDTUN, 283* &zfs_deadman_enabled, 0, "Kernel panic on stalled ZFS I/O"); 284 285#ifndef illumos 286#ifdef _KERNEL 287static void 288zfs_deadman_init() 289{ 290 /* 291 * If we are not i386 or amd64 or in a virtual machine, 292 * disable ZFS deadman thread by default 293 / 294* if (zfs_deadman_enabled == -1) { 295#if defined(__amd64__) \|\| defined(__i386__) 296 zfs_deadman_enabled = (vm_guest == VM_GUEST_NO) ? 1 : 0; 297#else 298 zfs_deadman_enabled = 0; 299#endif 300 } 301} 302#endif /* _KERNEL / 303#endif / !illumos / 304* 305/*
258 * ========================================================================== 259 * SPA config locking 260 * ========================================================================== 261 / 262static void 263spa_config_lock_init(spa_t spa) 264{ 265 for (int i = 0; i < SCL_LOCKS; i++) { 266 spa_config_lock_t scl = &spa->spa_config_lock[i]; 267* mutex_init(&scl->scl_lock, NULL, MUTEX_DEFAULT, NULL); 268 cv_init(&scl->scl_cv, NULL, CV_DEFAULT, NULL); 269 refcount_create(&scl->scl_count); 270 scl->scl_writer = NULL; 271 scl->scl_write_wanted = 0; 272 } 273} 274 275static void 276spa_config_lock_destroy(spa_t spa) 277{ 278* for (int i = 0; i < SCL_LOCKS; i++) { 279 spa_config_lock_t scl = &spa->spa_config_lock[i]; 280* mutex_destroy(&scl->scl_lock); 281 cv_destroy(&scl->scl_cv); 282 refcount_destroy(&scl->scl_count); 283 ASSERT(scl->scl_writer == NULL); 284 ASSERT(scl->scl_write_wanted == 0); 285 } 286} 287 288int 289spa_config_tryenter(spa_t spa, int locks, void tag, krw_t rw) 290{ 291 for (int i = 0; i < SCL_LOCKS; i++) { 292 spa_config_lock_t scl = &spa->spa_config_lock[i]; 293* if (!(locks & (1 << i))) 294 continue; 295 mutex_enter(&scl->scl_lock); 296 if (rw == RW_READER) { 297 if (scl->scl_writer \|\| scl->scl_write_wanted) { 298 mutex_exit(&scl->scl_lock); 299 spa_config_exit(spa, locks ^ (1 << i), tag); 300 return (0); 301 } 302 } else { 303 ASSERT(scl->scl_writer != curthread); 304 if (!refcount_is_zero(&scl->scl_count)) { 305 mutex_exit(&scl->scl_lock); 306 spa_config_exit(spa, locks ^ (1 << i), tag); 307 return (0); 308 } 309 scl->scl_writer = curthread; 310 } 311 (void) refcount_add(&scl->scl_count, tag); 312 mutex_exit(&scl->scl_lock); 313 } 314 return (1); 315} 316 317void 318spa_config_enter(spa_t spa, int locks, void tag, krw_t rw) 319{ 320 int wlocks_held = 0; 321 322 for (int i = 0; i < SCL_LOCKS; i++) { 323 spa_config_lock_t scl = &spa->spa_config_lock[i]; 324* if (scl->scl_writer == curthread) 325 wlocks_held \|= (1 << i); 326 if (!(locks & (1 << i))) 327 continue; 328 mutex_enter(&scl->scl_lock); 329 if (rw == RW_READER) { 330 while (scl->scl_writer \|\| scl->scl_write_wanted) { 331 cv_wait(&scl->scl_cv, &scl->scl_lock); 332 } 333 } else { 334 ASSERT(scl->scl_writer != curthread); 335 while (!refcount_is_zero(&scl->scl_count)) { 336 scl->scl_write_wanted++; 337 cv_wait(&scl->scl_cv, &scl->scl_lock); 338 scl->scl_write_wanted--; 339 } 340 scl->scl_writer = curthread; 341 } 342 (void) refcount_add(&scl->scl_count, tag); 343 mutex_exit(&scl->scl_lock); 344 } 345 ASSERT(wlocks_held <= locks); 346} 347 348void 349spa_config_exit(spa_t spa, int locks, void tag) 350{ 351 for (int i = SCL_LOCKS - 1; i >= 0; i--) { 352 spa_config_lock_t scl = &spa->spa_config_lock[i]; 353* if (!(locks & (1 << i))) 354 continue; 355 mutex_enter(&scl->scl_lock); 356 ASSERT(!refcount_is_zero(&scl->scl_count)); 357 if (refcount_remove(&scl->scl_count, tag) == 0) { 358 ASSERT(scl->scl_writer == NULL \|\| 359 scl->scl_writer == curthread); 360 scl->scl_writer = NULL; /* OK in either case / 361* cv_broadcast(&scl->scl_cv); 362 } 363 mutex_exit(&scl->scl_lock); 364 } 365} 366 367int 368spa_config_held(spa_t spa, int locks, krw_t rw) 369{ 370* int locks_held = 0; 371 372 for (int i = 0; i < SCL_LOCKS; i++) { 373 spa_config_lock_t scl = &spa->spa_config_lock[i]; 374* if (!(locks & (1 << i))) 375 continue; 376 if ((rw == RW_READER && !refcount_is_zero(&scl->scl_count)) \|\| 377 (rw == RW_WRITER && scl->scl_writer == curthread)) 378 locks_held \|= 1 << i; 379 } 380 381 return (locks_held); 382} 383 384/* 385 * ========================================================================== 386 * SPA namespace functions 387 * ========================================================================== 388 / 389* 390/* 391 * Lookup the named spa_t in the AVL tree. The spa_namespace_lock must be held. 392 * Returns NULL if no matching spa_t is found. 393 / 394spa_t 395spa_lookup(const char name) 396{ 397* static spa_t search; /* spa_t is large; don't allocate on stack / 398* spa_t spa; 399* avl_index_t where; 400 char c; 401 char cp; 402* 403 ASSERT(MUTEX_HELD(&spa_namespace_lock)); 404 405 /* 406 * If it's a full dataset name, figure out the pool name and 407 * just use that. 408 / 409* cp = strpbrk(name, "/@"); 410 if (cp) { 411 c = cp; 412* cp = '\0'; 413* } 414 415 (void) strlcpy(search.spa_name, name, sizeof (search.spa_name)); 416 spa = avl_find(&spa_namespace_avl, &search, &where); 417 418 if (cp) 419 cp = c; 420* 421 return (spa); 422} 423 424/*	306 * ========================================================================== 307 * SPA config locking 308 * ========================================================================== 309 / 310static void 311spa_config_lock_init(spa_t spa) 312{ 313 for (int i = 0; i < SCL_LOCKS; i++) { 314 spa_config_lock_t scl = &spa->spa_config_lock[i]; 315* mutex_init(&scl->scl_lock, NULL, MUTEX_DEFAULT, NULL); 316 cv_init(&scl->scl_cv, NULL, CV_DEFAULT, NULL); 317 refcount_create(&scl->scl_count); 318 scl->scl_writer = NULL; 319 scl->scl_write_wanted = 0; 320 } 321} 322 323static void 324spa_config_lock_destroy(spa_t spa) 325{ 326* for (int i = 0; i < SCL_LOCKS; i++) { 327 spa_config_lock_t scl = &spa->spa_config_lock[i]; 328* mutex_destroy(&scl->scl_lock); 329 cv_destroy(&scl->scl_cv); 330 refcount_destroy(&scl->scl_count); 331 ASSERT(scl->scl_writer == NULL); 332 ASSERT(scl->scl_write_wanted == 0); 333 } 334} 335 336int 337spa_config_tryenter(spa_t spa, int locks, void tag, krw_t rw) 338{ 339 for (int i = 0; i < SCL_LOCKS; i++) { 340 spa_config_lock_t scl = &spa->spa_config_lock[i]; 341* if (!(locks & (1 << i))) 342 continue; 343 mutex_enter(&scl->scl_lock); 344 if (rw == RW_READER) { 345 if (scl->scl_writer \|\| scl->scl_write_wanted) { 346 mutex_exit(&scl->scl_lock); 347 spa_config_exit(spa, locks ^ (1 << i), tag); 348 return (0); 349 } 350 } else { 351 ASSERT(scl->scl_writer != curthread); 352 if (!refcount_is_zero(&scl->scl_count)) { 353 mutex_exit(&scl->scl_lock); 354 spa_config_exit(spa, locks ^ (1 << i), tag); 355 return (0); 356 } 357 scl->scl_writer = curthread; 358 } 359 (void) refcount_add(&scl->scl_count, tag); 360 mutex_exit(&scl->scl_lock); 361 } 362 return (1); 363} 364 365void 366spa_config_enter(spa_t spa, int locks, void tag, krw_t rw) 367{ 368 int wlocks_held = 0; 369 370 for (int i = 0; i < SCL_LOCKS; i++) { 371 spa_config_lock_t scl = &spa->spa_config_lock[i]; 372* if (scl->scl_writer == curthread) 373 wlocks_held \|= (1 << i); 374 if (!(locks & (1 << i))) 375 continue; 376 mutex_enter(&scl->scl_lock); 377 if (rw == RW_READER) { 378 while (scl->scl_writer \|\| scl->scl_write_wanted) { 379 cv_wait(&scl->scl_cv, &scl->scl_lock); 380 } 381 } else { 382 ASSERT(scl->scl_writer != curthread); 383 while (!refcount_is_zero(&scl->scl_count)) { 384 scl->scl_write_wanted++; 385 cv_wait(&scl->scl_cv, &scl->scl_lock); 386 scl->scl_write_wanted--; 387 } 388 scl->scl_writer = curthread; 389 } 390 (void) refcount_add(&scl->scl_count, tag); 391 mutex_exit(&scl->scl_lock); 392 } 393 ASSERT(wlocks_held <= locks); 394} 395 396void 397spa_config_exit(spa_t spa, int locks, void tag) 398{ 399 for (int i = SCL_LOCKS - 1; i >= 0; i--) { 400 spa_config_lock_t scl = &spa->spa_config_lock[i]; 401* if (!(locks & (1 << i))) 402 continue; 403 mutex_enter(&scl->scl_lock); 404 ASSERT(!refcount_is_zero(&scl->scl_count)); 405 if (refcount_remove(&scl->scl_count, tag) == 0) { 406 ASSERT(scl->scl_writer == NULL \|\| 407 scl->scl_writer == curthread); 408 scl->scl_writer = NULL; /* OK in either case / 409* cv_broadcast(&scl->scl_cv); 410 } 411 mutex_exit(&scl->scl_lock); 412 } 413} 414 415int 416spa_config_held(spa_t spa, int locks, krw_t rw) 417{ 418* int locks_held = 0; 419 420 for (int i = 0; i < SCL_LOCKS; i++) { 421 spa_config_lock_t scl = &spa->spa_config_lock[i]; 422* if (!(locks & (1 << i))) 423 continue; 424 if ((rw == RW_READER && !refcount_is_zero(&scl->scl_count)) \|\| 425 (rw == RW_WRITER && scl->scl_writer == curthread)) 426 locks_held \|= 1 << i; 427 } 428 429 return (locks_held); 430} 431 432/* 433 * ========================================================================== 434 * SPA namespace functions 435 * ========================================================================== 436 / 437* 438/* 439 * Lookup the named spa_t in the AVL tree. The spa_namespace_lock must be held. 440 * Returns NULL if no matching spa_t is found. 441 / 442spa_t 443spa_lookup(const char name) 444{ 445* static spa_t search; /* spa_t is large; don't allocate on stack / 446* spa_t spa; 447* avl_index_t where; 448 char c; 449 char cp; 450* 451 ASSERT(MUTEX_HELD(&spa_namespace_lock)); 452 453 /* 454 * If it's a full dataset name, figure out the pool name and 455 * just use that. 456 / 457* cp = strpbrk(name, "/@"); 458 if (cp) { 459 c = cp; 460* cp = '\0'; 461* } 462 463 (void) strlcpy(search.spa_name, name, sizeof (search.spa_name)); 464 spa = avl_find(&spa_namespace_avl, &search, &where); 465 466 if (cp) 467 cp = c; 468* 469 return (spa); 470} 471 472/*
	473 * Fires when spa_sync has not completed within zfs_deadman_synctime_ms. 474 * If the zfs_deadman_enabled flag is set then it inspects all vdev queues 475 * looking for potentially hung I/Os. 476 / 477void 478spa_deadman(void arg) 479{ 480 spa_t spa = arg; 481* 482 zfs_dbgmsg("slow spa_sync: started %llu seconds ago, calls %llu", 483 (gethrtime() - spa->spa_sync_starttime) / NANOSEC, 484 ++spa->spa_deadman_calls); 485 if (zfs_deadman_enabled) 486 vdev_deadman(spa->spa_root_vdev); 487} 488 489/*
425 * Create an uninitialized spa_t with the given name. Requires 426 * spa_namespace_lock. The caller must ensure that the spa_t doesn't already 427 * exist by calling spa_lookup() first. 428 / 429spa_t 430spa_add(const char name, nvlist_t config, const char altroot) 431{ 432* spa_t spa; 433* spa_config_dirent_t *dp;	490 * Create an uninitialized spa_t with the given name. Requires 491 * spa_namespace_lock. The caller must ensure that the spa_t doesn't already 492 * exist by calling spa_lookup() first. 493 / 494spa_t 495spa_add(const char name, nvlist_t config, const char altroot) 496{ 497* spa_t spa; 498* spa_config_dirent_t *dp;
	499#ifdef illumos 500 cyc_handler_t hdlr; 501 cyc_time_t when; 502#endif
434 435 ASSERT(MUTEX_HELD(&spa_namespace_lock)); 436 437 spa = kmem_zalloc(sizeof (spa_t), KM_SLEEP); 438 439 mutex_init(&spa->spa_async_lock, NULL, MUTEX_DEFAULT, NULL); 440 mutex_init(&spa->spa_errlist_lock, NULL, MUTEX_DEFAULT, NULL); 441 mutex_init(&spa->spa_errlog_lock, NULL, MUTEX_DEFAULT, NULL); 442 mutex_init(&spa->spa_history_lock, NULL, MUTEX_DEFAULT, NULL); 443 mutex_init(&spa->spa_proc_lock, NULL, MUTEX_DEFAULT, NULL); 444 mutex_init(&spa->spa_props_lock, NULL, MUTEX_DEFAULT, NULL); 445 mutex_init(&spa->spa_scrub_lock, NULL, MUTEX_DEFAULT, NULL); 446 mutex_init(&spa->spa_suspend_lock, NULL, MUTEX_DEFAULT, NULL); 447 mutex_init(&spa->spa_vdev_top_lock, NULL, MUTEX_DEFAULT, NULL); 448 449 cv_init(&spa->spa_async_cv, NULL, CV_DEFAULT, NULL); 450 cv_init(&spa->spa_proc_cv, NULL, CV_DEFAULT, NULL); 451 cv_init(&spa->spa_scrub_io_cv, NULL, CV_DEFAULT, NULL); 452 cv_init(&spa->spa_suspend_cv, NULL, CV_DEFAULT, NULL); 453 454 for (int t = 0; t < TXG_SIZE; t++) 455 bplist_create(&spa->spa_free_bplist[t]); 456 457 (void) strlcpy(spa->spa_name, name, sizeof (spa->spa_name)); 458 spa->spa_state = POOL_STATE_UNINITIALIZED; 459 spa->spa_freeze_txg = UINT64_MAX; 460 spa->spa_final_txg = UINT64_MAX; 461 spa->spa_load_max_txg = UINT64_MAX; 462 spa->spa_proc = &p0; 463 spa->spa_proc_state = SPA_PROC_NONE; 464	503 504 ASSERT(MUTEX_HELD(&spa_namespace_lock)); 505 506 spa = kmem_zalloc(sizeof (spa_t), KM_SLEEP); 507 508 mutex_init(&spa->spa_async_lock, NULL, MUTEX_DEFAULT, NULL); 509 mutex_init(&spa->spa_errlist_lock, NULL, MUTEX_DEFAULT, NULL); 510 mutex_init(&spa->spa_errlog_lock, NULL, MUTEX_DEFAULT, NULL); 511 mutex_init(&spa->spa_history_lock, NULL, MUTEX_DEFAULT, NULL); 512 mutex_init(&spa->spa_proc_lock, NULL, MUTEX_DEFAULT, NULL); 513 mutex_init(&spa->spa_props_lock, NULL, MUTEX_DEFAULT, NULL); 514 mutex_init(&spa->spa_scrub_lock, NULL, MUTEX_DEFAULT, NULL); 515 mutex_init(&spa->spa_suspend_lock, NULL, MUTEX_DEFAULT, NULL); 516 mutex_init(&spa->spa_vdev_top_lock, NULL, MUTEX_DEFAULT, NULL); 517 518 cv_init(&spa->spa_async_cv, NULL, CV_DEFAULT, NULL); 519 cv_init(&spa->spa_proc_cv, NULL, CV_DEFAULT, NULL); 520 cv_init(&spa->spa_scrub_io_cv, NULL, CV_DEFAULT, NULL); 521 cv_init(&spa->spa_suspend_cv, NULL, CV_DEFAULT, NULL); 522 523 for (int t = 0; t < TXG_SIZE; t++) 524 bplist_create(&spa->spa_free_bplist[t]); 525 526 (void) strlcpy(spa->spa_name, name, sizeof (spa->spa_name)); 527 spa->spa_state = POOL_STATE_UNINITIALIZED; 528 spa->spa_freeze_txg = UINT64_MAX; 529 spa->spa_final_txg = UINT64_MAX; 530 spa->spa_load_max_txg = UINT64_MAX; 531 spa->spa_proc = &p0; 532 spa->spa_proc_state = SPA_PROC_NONE; 533
	534#ifdef illumos 535 hdlr.cyh_func = spa_deadman; 536 hdlr.cyh_arg = spa; 537 hdlr.cyh_level = CY_LOW_LEVEL; 538#endif 539 540 spa->spa_deadman_synctime = zfs_deadman_synctime * 541 zfs_txg_synctime_ms * MICROSEC; 542 543#ifdef illumos 544 /* 545 * This determines how often we need to check for hung I/Os after 546 * the cyclic has already fired. Since checking for hung I/Os is 547 * an expensive operation we don't want to check too frequently. 548 * Instead wait for 5 synctimes before checking again. 549 / 550* when.cyt_interval = 5ULL * zfs_txg_synctime_ms * MICROSEC; 551 when.cyt_when = CY_INFINITY; 552 mutex_enter(&cpu_lock); 553 spa->spa_deadman_cycid = cyclic_add(&hdlr, &when); 554 mutex_exit(&cpu_lock); 555#else /* !illumos / 556#ifdef _KERNEL 557* callout_init(&spa->spa_deadman_cycid, CALLOUT_MPSAFE); 558#endif 559#endif
465 refcount_create(&spa->spa_refcount); 466 spa_config_lock_init(spa); 467 468 avl_add(&spa_namespace_avl, spa); 469 470 /* 471 * Set the alternate root, if there is one. 472 / 473* if (altroot) { 474 spa->spa_root = spa_strdup(altroot); 475 spa_active_count++; 476 } 477 478 /* 479 * Every pool starts with the default cachefile 480 / 481* list_create(&spa->spa_config_list, sizeof (spa_config_dirent_t), 482 offsetof(spa_config_dirent_t, scd_link)); 483 484 dp = kmem_zalloc(sizeof (spa_config_dirent_t), KM_SLEEP); 485 dp->scd_path = altroot ? NULL : spa_strdup(spa_config_path); 486 list_insert_head(&spa->spa_config_list, dp); 487 488 VERIFY(nvlist_alloc(&spa->spa_load_info, NV_UNIQUE_NAME, 489 KM_SLEEP) == 0); 490 491 if (config != NULL) { 492 nvlist_t features; 493* 494 if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_FEATURES_FOR_READ, 495 &features) == 0) { 496 VERIFY(nvlist_dup(features, &spa->spa_label_features, 497 0) == 0); 498 } 499 500 VERIFY(nvlist_dup(config, &spa->spa_config, 0) == 0); 501 } 502 503 if (spa->spa_label_features == NULL) { 504 VERIFY(nvlist_alloc(&spa->spa_label_features, NV_UNIQUE_NAME, 505 KM_SLEEP) == 0); 506 } 507 508 return (spa); 509} 510 511/* 512 * Removes a spa_t from the namespace, freeing up any memory used. Requires 513 * spa_namespace_lock. This is called only after the spa_t has been closed and 514 * deactivated. 515 / 516void 517spa_remove(spa_t spa) 518{ 519 spa_config_dirent_t dp; 520* 521 ASSERT(MUTEX_HELD(&spa_namespace_lock)); 522 ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); 523 524 nvlist_free(spa->spa_config_splitting); 525 526 avl_remove(&spa_namespace_avl, spa); 527 cv_broadcast(&spa_namespace_cv); 528 529 if (spa->spa_root) { 530 spa_strfree(spa->spa_root); 531 spa_active_count--; 532 } 533 534 while ((dp = list_head(&spa->spa_config_list)) != NULL) { 535 list_remove(&spa->spa_config_list, dp); 536 if (dp->scd_path != NULL) 537 spa_strfree(dp->scd_path); 538 kmem_free(dp, sizeof (spa_config_dirent_t)); 539 } 540 541 list_destroy(&spa->spa_config_list); 542 543 nvlist_free(spa->spa_label_features); 544 nvlist_free(spa->spa_load_info); 545 spa_config_set(spa, NULL); 546	560 refcount_create(&spa->spa_refcount); 561 spa_config_lock_init(spa); 562 563 avl_add(&spa_namespace_avl, spa); 564 565 /* 566 * Set the alternate root, if there is one. 567 / 568* if (altroot) { 569 spa->spa_root = spa_strdup(altroot); 570 spa_active_count++; 571 } 572 573 /* 574 * Every pool starts with the default cachefile 575 / 576* list_create(&spa->spa_config_list, sizeof (spa_config_dirent_t), 577 offsetof(spa_config_dirent_t, scd_link)); 578 579 dp = kmem_zalloc(sizeof (spa_config_dirent_t), KM_SLEEP); 580 dp->scd_path = altroot ? NULL : spa_strdup(spa_config_path); 581 list_insert_head(&spa->spa_config_list, dp); 582 583 VERIFY(nvlist_alloc(&spa->spa_load_info, NV_UNIQUE_NAME, 584 KM_SLEEP) == 0); 585 586 if (config != NULL) { 587 nvlist_t features; 588* 589 if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_FEATURES_FOR_READ, 590 &features) == 0) { 591 VERIFY(nvlist_dup(features, &spa->spa_label_features, 592 0) == 0); 593 } 594 595 VERIFY(nvlist_dup(config, &spa->spa_config, 0) == 0); 596 } 597 598 if (spa->spa_label_features == NULL) { 599 VERIFY(nvlist_alloc(&spa->spa_label_features, NV_UNIQUE_NAME, 600 KM_SLEEP) == 0); 601 } 602 603 return (spa); 604} 605 606/* 607 * Removes a spa_t from the namespace, freeing up any memory used. Requires 608 * spa_namespace_lock. This is called only after the spa_t has been closed and 609 * deactivated. 610 / 611void 612spa_remove(spa_t spa) 613{ 614 spa_config_dirent_t dp; 615* 616 ASSERT(MUTEX_HELD(&spa_namespace_lock)); 617 ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); 618 619 nvlist_free(spa->spa_config_splitting); 620 621 avl_remove(&spa_namespace_avl, spa); 622 cv_broadcast(&spa_namespace_cv); 623 624 if (spa->spa_root) { 625 spa_strfree(spa->spa_root); 626 spa_active_count--; 627 } 628 629 while ((dp = list_head(&spa->spa_config_list)) != NULL) { 630 list_remove(&spa->spa_config_list, dp); 631 if (dp->scd_path != NULL) 632 spa_strfree(dp->scd_path); 633 kmem_free(dp, sizeof (spa_config_dirent_t)); 634 } 635 636 list_destroy(&spa->spa_config_list); 637 638 nvlist_free(spa->spa_label_features); 639 nvlist_free(spa->spa_load_info); 640 spa_config_set(spa, NULL); 641
	642#ifdef illumos 643 mutex_enter(&cpu_lock); 644 if (spa->spa_deadman_cycid != CYCLIC_NONE) 645 cyclic_remove(spa->spa_deadman_cycid); 646 mutex_exit(&cpu_lock); 647 spa->spa_deadman_cycid = CYCLIC_NONE; 648#else /* !illumos / 649#ifdef _KERNEL 650* callout_drain(&spa->spa_deadman_cycid); 651#endif 652#endif 653
547 refcount_destroy(&spa->spa_refcount); 548 549 spa_config_lock_destroy(spa); 550 551 for (int t = 0; t < TXG_SIZE; t++) 552 bplist_destroy(&spa->spa_free_bplist[t]); 553 554 cv_destroy(&spa->spa_async_cv); 555 cv_destroy(&spa->spa_proc_cv); 556 cv_destroy(&spa->spa_scrub_io_cv); 557 cv_destroy(&spa->spa_suspend_cv); 558 559 mutex_destroy(&spa->spa_async_lock); 560 mutex_destroy(&spa->spa_errlist_lock); 561 mutex_destroy(&spa->spa_errlog_lock); 562 mutex_destroy(&spa->spa_history_lock); 563 mutex_destroy(&spa->spa_proc_lock); 564 mutex_destroy(&spa->spa_props_lock); 565 mutex_destroy(&spa->spa_scrub_lock); 566 mutex_destroy(&spa->spa_suspend_lock); 567 mutex_destroy(&spa->spa_vdev_top_lock); 568 569 kmem_free(spa, sizeof (spa_t)); 570} 571 572/* 573 * Given a pool, return the next pool in the namespace, or NULL if there is 574 * none. If 'prev' is NULL, return the first pool. 575 / 576spa_t 577spa_next(spa_t prev) 578{ 579* ASSERT(MUTEX_HELD(&spa_namespace_lock)); 580 581 if (prev) 582 return (AVL_NEXT(&spa_namespace_avl, prev)); 583 else 584 return (avl_first(&spa_namespace_avl)); 585} 586 587/* 588 * ========================================================================== 589 * SPA refcount functions 590 * ========================================================================== 591 / 592* 593/* 594 * Add a reference to the given spa_t. Must have at least one reference, or 595 * have the namespace lock held. 596 / 597void 598spa_open_ref(spa_t spa, void tag) 599{ 600* ASSERT(refcount_count(&spa->spa_refcount) >= spa->spa_minref \|\| 601 MUTEX_HELD(&spa_namespace_lock)); 602 (void) refcount_add(&spa->spa_refcount, tag); 603} 604 605/* 606 * Remove a reference to the given spa_t. Must have at least one reference, or 607 * have the namespace lock held. 608 / 609void 610spa_close(spa_t spa, void tag) 611{ 612* ASSERT(refcount_count(&spa->spa_refcount) > spa->spa_minref \|\| 613 MUTEX_HELD(&spa_namespace_lock)); 614 (void) refcount_remove(&spa->spa_refcount, tag); 615} 616 617/* 618 * Check to see if the spa refcount is zero. Must be called with 619 * spa_namespace_lock held. We really compare against spa_minref, which is the 620 * number of references acquired when opening a pool 621 / 622boolean_t 623spa_refcount_zero(spa_t spa) 624{ 625 ASSERT(MUTEX_HELD(&spa_namespace_lock)); 626 627 return (refcount_count(&spa->spa_refcount) == spa->spa_minref); 628} 629 630/* 631 * ========================================================================== 632 * SPA spare and l2cache tracking 633 * ========================================================================== 634 / 635* 636/* 637 * Hot spares and cache devices are tracked using the same code below, 638 * for 'auxiliary' devices. 639 / 640* 641typedef struct spa_aux { 642 uint64_t aux_guid; 643 uint64_t aux_pool; 644 avl_node_t aux_avl; 645 int aux_count; 646} spa_aux_t; 647 648static int 649spa_aux_compare(const void a, const void b) 650{ 651 const spa_aux_t sa = a; 652* const spa_aux_t sb = b; 653* 654 if (sa->aux_guid < sb->aux_guid) 655 return (-1); 656 else if (sa->aux_guid > sb->aux_guid) 657 return (1); 658 else 659 return (0); 660} 661 662void 663spa_aux_add(vdev_t vd, avl_tree_t avl) 664{ 665 avl_index_t where; 666 spa_aux_t search; 667 spa_aux_t aux; 668* 669 search.aux_guid = vd->vdev_guid; 670 if ((aux = avl_find(avl, &search, &where)) != NULL) { 671 aux->aux_count++; 672 } else { 673 aux = kmem_zalloc(sizeof (spa_aux_t), KM_SLEEP); 674 aux->aux_guid = vd->vdev_guid; 675 aux->aux_count = 1; 676 avl_insert(avl, aux, where); 677 } 678} 679 680void 681spa_aux_remove(vdev_t vd, avl_tree_t avl) 682{ 683 spa_aux_t search; 684 spa_aux_t aux; 685* avl_index_t where; 686 687 search.aux_guid = vd->vdev_guid; 688 aux = avl_find(avl, &search, &where); 689 690 ASSERT(aux != NULL); 691 692 if (--aux->aux_count == 0) { 693 avl_remove(avl, aux); 694 kmem_free(aux, sizeof (spa_aux_t)); 695 } else if (aux->aux_pool == spa_guid(vd->vdev_spa)) { 696 aux->aux_pool = 0ULL; 697 } 698} 699 700boolean_t 701spa_aux_exists(uint64_t guid, uint64_t pool, int refcnt, avl_tree_t avl) 702{ 703* spa_aux_t search, found; 704* 705 search.aux_guid = guid; 706 found = avl_find(avl, &search, NULL); 707 708 if (pool) { 709 if (found) 710 pool = found->aux_pool; 711* else 712 pool = 0ULL; 713* } 714 715 if (refcnt) { 716 if (found) 717 refcnt = found->aux_count; 718* else 719 refcnt = 0; 720* } 721 722 return (found != NULL); 723} 724 725void 726spa_aux_activate(vdev_t vd, avl_tree_t avl) 727{ 728 spa_aux_t search, found; 729* avl_index_t where; 730 731 search.aux_guid = vd->vdev_guid; 732 found = avl_find(avl, &search, &where); 733 ASSERT(found != NULL); 734 ASSERT(found->aux_pool == 0ULL); 735 736 found->aux_pool = spa_guid(vd->vdev_spa); 737} 738 739/* 740 * Spares are tracked globally due to the following constraints: 741 * 742 * - A spare may be part of multiple pools. 743 * - A spare may be added to a pool even if it's actively in use within 744 * another pool. 745 * - A spare in use in any pool can only be the source of a replacement if 746 * the target is a spare in the same pool. 747 * 748 * We keep track of all spares on the system through the use of a reference 749 * counted AVL tree. When a vdev is added as a spare, or used as a replacement 750 * spare, then we bump the reference count in the AVL tree. In addition, we set 751 * the 'vdev_isspare' member to indicate that the device is a spare (active or 752 * inactive). When a spare is made active (used to replace a device in the 753 * pool), we also keep track of which pool its been made a part of. 754 * 755 * The 'spa_spare_lock' protects the AVL tree. These functions are normally 756 * called under the spa_namespace lock as part of vdev reconfiguration. The 757 * separate spare lock exists for the status query path, which does not need to 758 * be completely consistent with respect to other vdev configuration changes. 759 / 760* 761static int 762spa_spare_compare(const void a, const void b) 763{ 764 return (spa_aux_compare(a, b)); 765} 766 767void 768spa_spare_add(vdev_t vd) 769{ 770* mutex_enter(&spa_spare_lock); 771 ASSERT(!vd->vdev_isspare); 772 spa_aux_add(vd, &spa_spare_avl); 773 vd->vdev_isspare = B_TRUE; 774 mutex_exit(&spa_spare_lock); 775} 776 777void 778spa_spare_remove(vdev_t vd) 779{ 780* mutex_enter(&spa_spare_lock); 781 ASSERT(vd->vdev_isspare); 782 spa_aux_remove(vd, &spa_spare_avl); 783 vd->vdev_isspare = B_FALSE; 784 mutex_exit(&spa_spare_lock); 785} 786 787boolean_t 788spa_spare_exists(uint64_t guid, uint64_t pool, int refcnt) 789{ 790 boolean_t found; 791 792 mutex_enter(&spa_spare_lock); 793 found = spa_aux_exists(guid, pool, refcnt, &spa_spare_avl); 794 mutex_exit(&spa_spare_lock); 795 796 return (found); 797} 798 799void 800spa_spare_activate(vdev_t vd) 801{ 802* mutex_enter(&spa_spare_lock); 803 ASSERT(vd->vdev_isspare); 804 spa_aux_activate(vd, &spa_spare_avl); 805 mutex_exit(&spa_spare_lock); 806} 807 808/* 809 * Level 2 ARC devices are tracked globally for the same reasons as spares. 810 * Cache devices currently only support one pool per cache device, and so 811 * for these devices the aux reference count is currently unused beyond 1. 812 / 813* 814static int 815spa_l2cache_compare(const void a, const void b) 816{ 817 return (spa_aux_compare(a, b)); 818} 819 820void 821spa_l2cache_add(vdev_t vd) 822{ 823* mutex_enter(&spa_l2cache_lock); 824 ASSERT(!vd->vdev_isl2cache); 825 spa_aux_add(vd, &spa_l2cache_avl); 826 vd->vdev_isl2cache = B_TRUE; 827 mutex_exit(&spa_l2cache_lock); 828} 829 830void 831spa_l2cache_remove(vdev_t vd) 832{ 833* mutex_enter(&spa_l2cache_lock); 834 ASSERT(vd->vdev_isl2cache); 835 spa_aux_remove(vd, &spa_l2cache_avl); 836 vd->vdev_isl2cache = B_FALSE; 837 mutex_exit(&spa_l2cache_lock); 838} 839 840boolean_t 841spa_l2cache_exists(uint64_t guid, uint64_t pool) 842{ 843* boolean_t found; 844 845 mutex_enter(&spa_l2cache_lock); 846 found = spa_aux_exists(guid, pool, NULL, &spa_l2cache_avl); 847 mutex_exit(&spa_l2cache_lock); 848 849 return (found); 850} 851 852void 853spa_l2cache_activate(vdev_t vd) 854{ 855* mutex_enter(&spa_l2cache_lock); 856 ASSERT(vd->vdev_isl2cache); 857 spa_aux_activate(vd, &spa_l2cache_avl); 858 mutex_exit(&spa_l2cache_lock); 859} 860 861/* 862 * ========================================================================== 863 * SPA vdev locking 864 * ========================================================================== 865 / 866* 867/* 868 * Lock the given spa_t for the purpose of adding or removing a vdev. 869 * Grabs the global spa_namespace_lock plus the spa config lock for writing. 870 * It returns the next transaction group for the spa_t. 871 / 872uint64_t 873spa_vdev_enter(spa_t spa) 874{ 875 mutex_enter(&spa->spa_vdev_top_lock); 876 mutex_enter(&spa_namespace_lock); 877 return (spa_vdev_config_enter(spa)); 878} 879 880/* 881 * Internal implementation for spa_vdev_enter(). Used when a vdev 882 * operation requires multiple syncs (i.e. removing a device) while 883 * keeping the spa_namespace_lock held. 884 / 885uint64_t 886spa_vdev_config_enter(spa_t spa) 887{ 888 ASSERT(MUTEX_HELD(&spa_namespace_lock)); 889 890 spa_config_enter(spa, SCL_ALL, spa, RW_WRITER); 891 892 return (spa_last_synced_txg(spa) + 1); 893} 894 895/* 896 * Used in combination with spa_vdev_config_enter() to allow the syncing 897 * of multiple transactions without releasing the spa_namespace_lock. 898 / 899void 900spa_vdev_config_exit(spa_t spa, vdev_t vd, uint64_t txg, int error, char tag) 901{ 902 ASSERT(MUTEX_HELD(&spa_namespace_lock)); 903 904 int config_changed = B_FALSE; 905 906 ASSERT(txg > spa_last_synced_txg(spa)); 907 908 spa->spa_pending_vdev = NULL; 909 910 /* 911 * Reassess the DTLs. 912 / 913* vdev_dtl_reassess(spa->spa_root_vdev, 0, 0, B_FALSE); 914 915 if (error == 0 && !list_is_empty(&spa->spa_config_dirty_list)) { 916 config_changed = B_TRUE; 917 spa->spa_config_generation++; 918 } 919 920 /* 921 * Verify the metaslab classes. 922 / 923* ASSERT(metaslab_class_validate(spa_normal_class(spa)) == 0); 924 ASSERT(metaslab_class_validate(spa_log_class(spa)) == 0); 925 926 spa_config_exit(spa, SCL_ALL, spa); 927 928 /* 929 * Panic the system if the specified tag requires it. This 930 * is useful for ensuring that configurations are updated 931 * transactionally. 932 / 933* if (zio_injection_enabled) 934 zio_handle_panic_injection(spa, tag, 0); 935 936 /* 937 * Note: this txg_wait_synced() is important because it ensures 938 * that there won't be more than one config change per txg. 939 * This allows us to use the txg as the generation number. 940 / 941* if (error == 0) 942 txg_wait_synced(spa->spa_dsl_pool, txg); 943 944 if (vd != NULL) { 945 ASSERT(!vd->vdev_detached \|\| vd->vdev_dtl_smo.smo_object == 0); 946 spa_config_enter(spa, SCL_ALL, spa, RW_WRITER); 947 vdev_free(vd); 948 spa_config_exit(spa, SCL_ALL, spa); 949 } 950 951 /* 952 * If the config changed, update the config cache. 953 / 954* if (config_changed) 955 spa_config_sync(spa, B_FALSE, B_TRUE); 956} 957 958/* 959 * Unlock the spa_t after adding or removing a vdev. Besides undoing the 960 * locking of spa_vdev_enter(), we also want make sure the transactions have 961 * synced to disk, and then update the global configuration cache with the new 962 * information. 963 / 964int 965spa_vdev_exit(spa_t spa, vdev_t vd, uint64_t txg, int error) 966{ 967* spa_vdev_config_exit(spa, vd, txg, error, FTAG); 968 mutex_exit(&spa_namespace_lock); 969 mutex_exit(&spa->spa_vdev_top_lock); 970 971 return (error); 972} 973 974/* 975 * Lock the given spa_t for the purpose of changing vdev state. 976 / 977void 978spa_vdev_state_enter(spa_t spa, int oplocks) 979{ 980 int locks = SCL_STATE_ALL \| oplocks; 981 982 /* 983 * Root pools may need to read of the underlying devfs filesystem 984 * when opening up a vdev. Unfortunately if we're holding the 985 * SCL_ZIO lock it will result in a deadlock when we try to issue 986 * the read from the root filesystem. Instead we "prefetch" 987 * the associated vnodes that we need prior to opening the 988 * underlying devices and cache them so that we can prevent 989 * any I/O when we are doing the actual open. 990 / 991* if (spa_is_root(spa)) { 992 int low = locks & ~(SCL_ZIO - 1); 993 int high = locks & ~low; 994 995 spa_config_enter(spa, high, spa, RW_WRITER); 996 vdev_hold(spa->spa_root_vdev); 997 spa_config_enter(spa, low, spa, RW_WRITER); 998 } else { 999 spa_config_enter(spa, locks, spa, RW_WRITER); 1000 } 1001 spa->spa_vdev_locks = locks; 1002} 1003 1004int 1005spa_vdev_state_exit(spa_t spa, vdev_t vd, int error) 1006{ 1007 boolean_t config_changed = B_FALSE; 1008 1009 if (vd != NULL \|\| error == 0) 1010 vdev_dtl_reassess(vd ? vd->vdev_top : spa->spa_root_vdev, 1011 0, 0, B_FALSE); 1012 1013 if (vd != NULL) { 1014 vdev_state_dirty(vd->vdev_top); 1015 config_changed = B_TRUE; 1016 spa->spa_config_generation++; 1017 } 1018 1019 if (spa_is_root(spa)) 1020 vdev_rele(spa->spa_root_vdev); 1021 1022 ASSERT3U(spa->spa_vdev_locks, >=, SCL_STATE_ALL); 1023 spa_config_exit(spa, spa->spa_vdev_locks, spa); 1024 1025 /* 1026 * If anything changed, wait for it to sync. This ensures that, 1027 * from the system administrator's perspective, zpool(1M) commands 1028 * are synchronous. This is important for things like zpool offline: 1029 * when the command completes, you expect no further I/O from ZFS. 1030 / 1031* if (vd != NULL) 1032 txg_wait_synced(spa->spa_dsl_pool, 0); 1033 1034 /* 1035 * If the config changed, update the config cache. 1036 / 1037* if (config_changed) { 1038 mutex_enter(&spa_namespace_lock); 1039 spa_config_sync(spa, B_FALSE, B_TRUE); 1040 mutex_exit(&spa_namespace_lock); 1041 } 1042 1043 return (error); 1044} 1045 1046/* 1047 * ========================================================================== 1048 * Miscellaneous functions 1049 * ========================================================================== 1050 / 1051* 1052void 1053spa_activate_mos_feature(spa_t spa, const char feature) 1054{ 1055 (void) nvlist_add_boolean(spa->spa_label_features, feature); 1056 vdev_config_dirty(spa->spa_root_vdev); 1057} 1058 1059void 1060spa_deactivate_mos_feature(spa_t spa, const char feature) 1061{ 1062 (void) nvlist_remove_all(spa->spa_label_features, feature); 1063 vdev_config_dirty(spa->spa_root_vdev); 1064} 1065 1066/* 1067 * Rename a spa_t. 1068 / 1069int 1070spa_rename(const char name, const char newname) 1071{ 1072* spa_t spa; 1073* int err; 1074 1075 /* 1076 * Lookup the spa_t and grab the config lock for writing. We need to 1077 * actually open the pool so that we can sync out the necessary labels. 1078 * It's OK to call spa_open() with the namespace lock held because we 1079 * allow recursive calls for other reasons. 1080 / 1081* mutex_enter(&spa_namespace_lock); 1082 if ((err = spa_open(name, &spa, FTAG)) != 0) { 1083 mutex_exit(&spa_namespace_lock); 1084 return (err); 1085 } 1086 1087 spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 1088 1089 avl_remove(&spa_namespace_avl, spa); 1090 (void) strlcpy(spa->spa_name, newname, sizeof (spa->spa_name)); 1091 avl_add(&spa_namespace_avl, spa); 1092 1093 /* 1094 * Sync all labels to disk with the new names by marking the root vdev 1095 * dirty and waiting for it to sync. It will pick up the new pool name 1096 * during the sync. 1097 / 1098* vdev_config_dirty(spa->spa_root_vdev); 1099 1100 spa_config_exit(spa, SCL_ALL, FTAG); 1101 1102 txg_wait_synced(spa->spa_dsl_pool, 0); 1103 1104 /* 1105 * Sync the updated config cache. 1106 / 1107* spa_config_sync(spa, B_FALSE, B_TRUE); 1108 1109 spa_close(spa, FTAG); 1110 1111 mutex_exit(&spa_namespace_lock); 1112 1113 return (0); 1114} 1115 1116/* 1117 * Return the spa_t associated with given pool_guid, if it exists. If 1118 * device_guid is non-zero, determine whether the pool exists and contains 1119 * a device with the specified device_guid. 1120 / 1121spa_t 1122spa_by_guid(uint64_t pool_guid, uint64_t device_guid) 1123{ 1124 spa_t spa; 1125* avl_tree_t t = &spa_namespace_avl; 1126* 1127 ASSERT(MUTEX_HELD(&spa_namespace_lock)); 1128 1129 for (spa = avl_first(t); spa != NULL; spa = AVL_NEXT(t, spa)) { 1130 if (spa->spa_state == POOL_STATE_UNINITIALIZED) 1131 continue; 1132 if (spa->spa_root_vdev == NULL) 1133 continue; 1134 if (spa_guid(spa) == pool_guid) { 1135 if (device_guid == 0) 1136 break; 1137 1138 if (vdev_lookup_by_guid(spa->spa_root_vdev, 1139 device_guid) != NULL) 1140 break; 1141 1142 /* 1143 * Check any devices we may be in the process of adding. 1144 / 1145* if (spa->spa_pending_vdev) { 1146 if (vdev_lookup_by_guid(spa->spa_pending_vdev, 1147 device_guid) != NULL) 1148 break; 1149 } 1150 } 1151 } 1152 1153 return (spa); 1154} 1155 1156/* 1157 * Determine whether a pool with the given pool_guid exists. 1158 / 1159boolean_t 1160spa_guid_exists(uint64_t pool_guid, uint64_t device_guid) 1161{ 1162* return (spa_by_guid(pool_guid, device_guid) != NULL); 1163} 1164 1165char * 1166spa_strdup(const char s) 1167{ 1168* size_t len; 1169 char new; 1170* 1171 len = strlen(s); 1172 new = kmem_alloc(len + 1, KM_SLEEP); 1173 bcopy(s, new, len); 1174 new[len] = '\0'; 1175 1176 return (new); 1177} 1178 1179void 1180spa_strfree(char s) 1181{ 1182* kmem_free(s, strlen(s) + 1); 1183} 1184 1185uint64_t 1186spa_get_random(uint64_t range) 1187{ 1188 uint64_t r; 1189 1190 ASSERT(range != 0); 1191 1192 (void) random_get_pseudo_bytes((void )&r, sizeof (uint64_t)); 1193* 1194 return (r % range); 1195} 1196 1197uint64_t 1198spa_generate_guid(spa_t spa) 1199{ 1200* uint64_t guid = spa_get_random(-1ULL); 1201 1202 if (spa != NULL) { 1203 while (guid == 0 \|\| spa_guid_exists(spa_guid(spa), guid)) 1204 guid = spa_get_random(-1ULL); 1205 } else { 1206 while (guid == 0 \|\| spa_guid_exists(guid, 0)) 1207 guid = spa_get_random(-1ULL); 1208 } 1209 1210 return (guid); 1211} 1212 1213void 1214sprintf_blkptr(char buf, const blkptr_t bp) 1215{ 1216 char type[256]; 1217 char checksum = NULL; 1218* char compress = NULL; 1219* 1220 if (bp != NULL) { 1221 if (BP_GET_TYPE(bp) & DMU_OT_NEWTYPE) { 1222 dmu_object_byteswap_t bswap = 1223 DMU_OT_BYTESWAP(BP_GET_TYPE(bp)); 1224 (void) snprintf(type, sizeof (type), "bswap %s %s", 1225 DMU_OT_IS_METADATA(BP_GET_TYPE(bp)) ? 1226 "metadata" : "data", 1227 dmu_ot_byteswap[bswap].ob_name); 1228 } else { 1229 (void) strlcpy(type, dmu_ot[BP_GET_TYPE(bp)].ot_name, 1230 sizeof (type)); 1231 } 1232 checksum = zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_name; 1233 compress = zio_compress_table[BP_GET_COMPRESS(bp)].ci_name; 1234 } 1235 1236 SPRINTF_BLKPTR(snprintf, ' ', buf, bp, type, checksum, compress); 1237} 1238 1239void 1240spa_freeze(spa_t spa) 1241{ 1242* uint64_t freeze_txg = 0; 1243 1244 spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 1245 if (spa->spa_freeze_txg == UINT64_MAX) { 1246 freeze_txg = spa_last_synced_txg(spa) + TXG_SIZE; 1247 spa->spa_freeze_txg = freeze_txg; 1248 } 1249 spa_config_exit(spa, SCL_ALL, FTAG); 1250 if (freeze_txg != 0) 1251 txg_wait_synced(spa_get_dsl(spa), freeze_txg); 1252} 1253 1254void 1255zfs_panic_recover(const char fmt, ...) 1256{ 1257* va_list adx; 1258 1259 va_start(adx, fmt); 1260 vcmn_err(zfs_recover ? CE_WARN : CE_PANIC, fmt, adx); 1261 va_end(adx); 1262} 1263 1264/* 1265 * This is a stripped-down version of strtoull, suitable only for converting 1266 * lowercase hexidecimal numbers that don't overflow. 1267 / 1268uint64_t 1269zfs_strtonum(const char str, char *nptr) 1270{ 1271* uint64_t val = 0; 1272 char c; 1273 int digit; 1274 1275 while ((c = str) != '\0') { 1276* if (c >= '0' && c <= '9') 1277 digit = c - '0'; 1278 else if (c >= 'a' && c <= 'f') 1279 digit = 10 + c - 'a'; 1280 else 1281 break; 1282 1283 val = 16; 1284* val += digit; 1285 1286 str++; 1287 } 1288 1289 if (nptr) 1290 nptr = (char )str; 1291 1292 return (val); 1293} 1294 1295/* 1296 * ========================================================================== 1297 * Accessor functions 1298 * ========================================================================== 1299 / 1300* 1301boolean_t 1302spa_shutting_down(spa_t spa) 1303{ 1304* return (spa->spa_async_suspended); 1305} 1306 1307dsl_pool_t * 1308spa_get_dsl(spa_t spa) 1309{ 1310* return (spa->spa_dsl_pool); 1311} 1312 1313boolean_t 1314spa_is_initializing(spa_t spa) 1315{ 1316* return (spa->spa_is_initializing); 1317} 1318 1319blkptr_t * 1320spa_get_rootblkptr(spa_t spa) 1321{ 1322* return (&spa->spa_ubsync.ub_rootbp); 1323} 1324 1325void 1326spa_set_rootblkptr(spa_t spa, const blkptr_t bp) 1327{ 1328 spa->spa_uberblock.ub_rootbp = bp; 1329} 1330* 1331void 1332spa_altroot(spa_t spa, char buf, size_t buflen) 1333{ 1334 if (spa->spa_root == NULL) 1335 buf[0] = '\0'; 1336 else 1337 (void) strncpy(buf, spa->spa_root, buflen); 1338} 1339 1340int 1341spa_sync_pass(spa_t spa) 1342{ 1343* return (spa->spa_sync_pass); 1344} 1345 1346char * 1347spa_name(spa_t spa) 1348{ 1349* return (spa->spa_name); 1350} 1351 1352uint64_t 1353spa_guid(spa_t spa) 1354{ 1355* dsl_pool_t dp = spa_get_dsl(spa); 1356* uint64_t guid; 1357 1358 /* 1359 * If we fail to parse the config during spa_load(), we can go through 1360 * the error path (which posts an ereport) and end up here with no root 1361 * vdev. We stash the original pool guid in 'spa_config_guid' to handle 1362 * this case. 1363 / 1364* if (spa->spa_root_vdev == NULL) 1365 return (spa->spa_config_guid); 1366 1367 guid = spa->spa_last_synced_guid != 0 ? 1368 spa->spa_last_synced_guid : spa->spa_root_vdev->vdev_guid; 1369 1370 /* 1371 * Return the most recently synced out guid unless we're 1372 * in syncing context. 1373 / 1374* if (dp && dsl_pool_sync_context(dp)) 1375 return (spa->spa_root_vdev->vdev_guid); 1376 else 1377 return (guid); 1378} 1379 1380uint64_t 1381spa_load_guid(spa_t spa) 1382{ 1383* /* 1384 * This is a GUID that exists solely as a reference for the 1385 * purposes of the arc. It is generated at load time, and 1386 * is never written to persistent storage. 1387 / 1388* return (spa->spa_load_guid); 1389} 1390 1391uint64_t 1392spa_last_synced_txg(spa_t spa) 1393{ 1394* return (spa->spa_ubsync.ub_txg); 1395} 1396 1397uint64_t 1398spa_first_txg(spa_t spa) 1399{ 1400* return (spa->spa_first_txg); 1401} 1402 1403uint64_t 1404spa_syncing_txg(spa_t spa) 1405{ 1406* return (spa->spa_syncing_txg); 1407} 1408 1409pool_state_t 1410spa_state(spa_t spa) 1411{ 1412* return (spa->spa_state); 1413} 1414 1415spa_load_state_t 1416spa_load_state(spa_t spa) 1417{ 1418* return (spa->spa_load_state); 1419} 1420 1421uint64_t 1422spa_freeze_txg(spa_t spa) 1423{ 1424* return (spa->spa_freeze_txg); 1425} 1426 1427/* ARGSUSED / 1428uint64_t 1429spa_get_asize(spa_t spa, uint64_t lsize) 1430{ 1431 /* 1432 * The worst case is single-sector max-parity RAID-Z blocks, in which 1433 * case the space requirement is exactly (VDEV_RAIDZ_MAXPARITY + 1) 1434 * times the size; so just assume that. Add to this the fact that 1435 * we can have up to 3 DVAs per bp, and one more factor of 2 because 1436 * the block may be dittoed with up to 3 DVAs by ddt_sync(). 1437 / 1438* return (lsize * (VDEV_RAIDZ_MAXPARITY + 1) * SPA_DVAS_PER_BP * 2); 1439} 1440 1441uint64_t 1442spa_get_dspace(spa_t spa) 1443{ 1444* return (spa->spa_dspace); 1445} 1446 1447void 1448spa_update_dspace(spa_t spa) 1449{ 1450* spa->spa_dspace = metaslab_class_get_dspace(spa_normal_class(spa)) + 1451 ddt_get_dedup_dspace(spa); 1452} 1453 1454/* 1455 * Return the failure mode that has been set to this pool. The default 1456 * behavior will be to block all I/Os when a complete failure occurs. 1457 / 1458uint8_t 1459spa_get_failmode(spa_t spa) 1460{ 1461 return (spa->spa_failmode); 1462} 1463 1464boolean_t 1465spa_suspended(spa_t spa) 1466{ 1467* return (spa->spa_suspended); 1468} 1469 1470uint64_t 1471spa_version(spa_t spa) 1472{ 1473* return (spa->spa_ubsync.ub_version); 1474} 1475 1476boolean_t 1477spa_deflate(spa_t spa) 1478{ 1479* return (spa->spa_deflate); 1480} 1481 1482metaslab_class_t * 1483spa_normal_class(spa_t spa) 1484{ 1485* return (spa->spa_normal_class); 1486} 1487 1488metaslab_class_t * 1489spa_log_class(spa_t spa) 1490{ 1491* return (spa->spa_log_class); 1492} 1493 1494int 1495spa_max_replication(spa_t spa) 1496{ 1497* /* 1498 * As of SPA_VERSION == SPA_VERSION_DITTO_BLOCKS, we are able to 1499 * handle BPs with more than one DVA allocated. Set our max 1500 * replication level accordingly. 1501 / 1502* if (spa_version(spa) < SPA_VERSION_DITTO_BLOCKS) 1503 return (1); 1504 return (MIN(SPA_DVAS_PER_BP, spa_max_replication_override)); 1505} 1506 1507int 1508spa_prev_software_version(spa_t spa) 1509{ 1510* return (spa->spa_prev_software_version); 1511} 1512 1513uint64_t	654 refcount_destroy(&spa->spa_refcount); 655 656 spa_config_lock_destroy(spa); 657 658 for (int t = 0; t < TXG_SIZE; t++) 659 bplist_destroy(&spa->spa_free_bplist[t]); 660 661 cv_destroy(&spa->spa_async_cv); 662 cv_destroy(&spa->spa_proc_cv); 663 cv_destroy(&spa->spa_scrub_io_cv); 664 cv_destroy(&spa->spa_suspend_cv); 665 666 mutex_destroy(&spa->spa_async_lock); 667 mutex_destroy(&spa->spa_errlist_lock); 668 mutex_destroy(&spa->spa_errlog_lock); 669 mutex_destroy(&spa->spa_history_lock); 670 mutex_destroy(&spa->spa_proc_lock); 671 mutex_destroy(&spa->spa_props_lock); 672 mutex_destroy(&spa->spa_scrub_lock); 673 mutex_destroy(&spa->spa_suspend_lock); 674 mutex_destroy(&spa->spa_vdev_top_lock); 675 676 kmem_free(spa, sizeof (spa_t)); 677} 678 679/* 680 * Given a pool, return the next pool in the namespace, or NULL if there is 681 * none. If 'prev' is NULL, return the first pool. 682 / 683spa_t 684spa_next(spa_t prev) 685{ 686* ASSERT(MUTEX_HELD(&spa_namespace_lock)); 687 688 if (prev) 689 return (AVL_NEXT(&spa_namespace_avl, prev)); 690 else 691 return (avl_first(&spa_namespace_avl)); 692} 693 694/* 695 * ========================================================================== 696 * SPA refcount functions 697 * ========================================================================== 698 / 699* 700/* 701 * Add a reference to the given spa_t. Must have at least one reference, or 702 * have the namespace lock held. 703 / 704void 705spa_open_ref(spa_t spa, void tag) 706{ 707* ASSERT(refcount_count(&spa->spa_refcount) >= spa->spa_minref \|\| 708 MUTEX_HELD(&spa_namespace_lock)); 709 (void) refcount_add(&spa->spa_refcount, tag); 710} 711 712/* 713 * Remove a reference to the given spa_t. Must have at least one reference, or 714 * have the namespace lock held. 715 / 716void 717spa_close(spa_t spa, void tag) 718{ 719* ASSERT(refcount_count(&spa->spa_refcount) > spa->spa_minref \|\| 720 MUTEX_HELD(&spa_namespace_lock)); 721 (void) refcount_remove(&spa->spa_refcount, tag); 722} 723 724/* 725 * Check to see if the spa refcount is zero. Must be called with 726 * spa_namespace_lock held. We really compare against spa_minref, which is the 727 * number of references acquired when opening a pool 728 / 729boolean_t 730spa_refcount_zero(spa_t spa) 731{ 732 ASSERT(MUTEX_HELD(&spa_namespace_lock)); 733 734 return (refcount_count(&spa->spa_refcount) == spa->spa_minref); 735} 736 737/* 738 * ========================================================================== 739 * SPA spare and l2cache tracking 740 * ========================================================================== 741 / 742* 743/* 744 * Hot spares and cache devices are tracked using the same code below, 745 * for 'auxiliary' devices. 746 / 747* 748typedef struct spa_aux { 749 uint64_t aux_guid; 750 uint64_t aux_pool; 751 avl_node_t aux_avl; 752 int aux_count; 753} spa_aux_t; 754 755static int 756spa_aux_compare(const void a, const void b) 757{ 758 const spa_aux_t sa = a; 759* const spa_aux_t sb = b; 760* 761 if (sa->aux_guid < sb->aux_guid) 762 return (-1); 763 else if (sa->aux_guid > sb->aux_guid) 764 return (1); 765 else 766 return (0); 767} 768 769void 770spa_aux_add(vdev_t vd, avl_tree_t avl) 771{ 772 avl_index_t where; 773 spa_aux_t search; 774 spa_aux_t aux; 775* 776 search.aux_guid = vd->vdev_guid; 777 if ((aux = avl_find(avl, &search, &where)) != NULL) { 778 aux->aux_count++; 779 } else { 780 aux = kmem_zalloc(sizeof (spa_aux_t), KM_SLEEP); 781 aux->aux_guid = vd->vdev_guid; 782 aux->aux_count = 1; 783 avl_insert(avl, aux, where); 784 } 785} 786 787void 788spa_aux_remove(vdev_t vd, avl_tree_t avl) 789{ 790 spa_aux_t search; 791 spa_aux_t aux; 792* avl_index_t where; 793 794 search.aux_guid = vd->vdev_guid; 795 aux = avl_find(avl, &search, &where); 796 797 ASSERT(aux != NULL); 798 799 if (--aux->aux_count == 0) { 800 avl_remove(avl, aux); 801 kmem_free(aux, sizeof (spa_aux_t)); 802 } else if (aux->aux_pool == spa_guid(vd->vdev_spa)) { 803 aux->aux_pool = 0ULL; 804 } 805} 806 807boolean_t 808spa_aux_exists(uint64_t guid, uint64_t pool, int refcnt, avl_tree_t avl) 809{ 810* spa_aux_t search, found; 811* 812 search.aux_guid = guid; 813 found = avl_find(avl, &search, NULL); 814 815 if (pool) { 816 if (found) 817 pool = found->aux_pool; 818* else 819 pool = 0ULL; 820* } 821 822 if (refcnt) { 823 if (found) 824 refcnt = found->aux_count; 825* else 826 refcnt = 0; 827* } 828 829 return (found != NULL); 830} 831 832void 833spa_aux_activate(vdev_t vd, avl_tree_t avl) 834{ 835 spa_aux_t search, found; 836* avl_index_t where; 837 838 search.aux_guid = vd->vdev_guid; 839 found = avl_find(avl, &search, &where); 840 ASSERT(found != NULL); 841 ASSERT(found->aux_pool == 0ULL); 842 843 found->aux_pool = spa_guid(vd->vdev_spa); 844} 845 846/* 847 * Spares are tracked globally due to the following constraints: 848 * 849 * - A spare may be part of multiple pools. 850 * - A spare may be added to a pool even if it's actively in use within 851 * another pool. 852 * - A spare in use in any pool can only be the source of a replacement if 853 * the target is a spare in the same pool. 854 * 855 * We keep track of all spares on the system through the use of a reference 856 * counted AVL tree. When a vdev is added as a spare, or used as a replacement 857 * spare, then we bump the reference count in the AVL tree. In addition, we set 858 * the 'vdev_isspare' member to indicate that the device is a spare (active or 859 * inactive). When a spare is made active (used to replace a device in the 860 * pool), we also keep track of which pool its been made a part of. 861 * 862 * The 'spa_spare_lock' protects the AVL tree. These functions are normally 863 * called under the spa_namespace lock as part of vdev reconfiguration. The 864 * separate spare lock exists for the status query path, which does not need to 865 * be completely consistent with respect to other vdev configuration changes. 866 / 867* 868static int 869spa_spare_compare(const void a, const void b) 870{ 871 return (spa_aux_compare(a, b)); 872} 873 874void 875spa_spare_add(vdev_t vd) 876{ 877* mutex_enter(&spa_spare_lock); 878 ASSERT(!vd->vdev_isspare); 879 spa_aux_add(vd, &spa_spare_avl); 880 vd->vdev_isspare = B_TRUE; 881 mutex_exit(&spa_spare_lock); 882} 883 884void 885spa_spare_remove(vdev_t vd) 886{ 887* mutex_enter(&spa_spare_lock); 888 ASSERT(vd->vdev_isspare); 889 spa_aux_remove(vd, &spa_spare_avl); 890 vd->vdev_isspare = B_FALSE; 891 mutex_exit(&spa_spare_lock); 892} 893 894boolean_t 895spa_spare_exists(uint64_t guid, uint64_t pool, int refcnt) 896{ 897 boolean_t found; 898 899 mutex_enter(&spa_spare_lock); 900 found = spa_aux_exists(guid, pool, refcnt, &spa_spare_avl); 901 mutex_exit(&spa_spare_lock); 902 903 return (found); 904} 905 906void 907spa_spare_activate(vdev_t vd) 908{ 909* mutex_enter(&spa_spare_lock); 910 ASSERT(vd->vdev_isspare); 911 spa_aux_activate(vd, &spa_spare_avl); 912 mutex_exit(&spa_spare_lock); 913} 914 915/* 916 * Level 2 ARC devices are tracked globally for the same reasons as spares. 917 * Cache devices currently only support one pool per cache device, and so 918 * for these devices the aux reference count is currently unused beyond 1. 919 / 920* 921static int 922spa_l2cache_compare(const void a, const void b) 923{ 924 return (spa_aux_compare(a, b)); 925} 926 927void 928spa_l2cache_add(vdev_t vd) 929{ 930* mutex_enter(&spa_l2cache_lock); 931 ASSERT(!vd->vdev_isl2cache); 932 spa_aux_add(vd, &spa_l2cache_avl); 933 vd->vdev_isl2cache = B_TRUE; 934 mutex_exit(&spa_l2cache_lock); 935} 936 937void 938spa_l2cache_remove(vdev_t vd) 939{ 940* mutex_enter(&spa_l2cache_lock); 941 ASSERT(vd->vdev_isl2cache); 942 spa_aux_remove(vd, &spa_l2cache_avl); 943 vd->vdev_isl2cache = B_FALSE; 944 mutex_exit(&spa_l2cache_lock); 945} 946 947boolean_t 948spa_l2cache_exists(uint64_t guid, uint64_t pool) 949{ 950* boolean_t found; 951 952 mutex_enter(&spa_l2cache_lock); 953 found = spa_aux_exists(guid, pool, NULL, &spa_l2cache_avl); 954 mutex_exit(&spa_l2cache_lock); 955 956 return (found); 957} 958 959void 960spa_l2cache_activate(vdev_t vd) 961{ 962* mutex_enter(&spa_l2cache_lock); 963 ASSERT(vd->vdev_isl2cache); 964 spa_aux_activate(vd, &spa_l2cache_avl); 965 mutex_exit(&spa_l2cache_lock); 966} 967 968/* 969 * ========================================================================== 970 * SPA vdev locking 971 * ========================================================================== 972 / 973* 974/* 975 * Lock the given spa_t for the purpose of adding or removing a vdev. 976 * Grabs the global spa_namespace_lock plus the spa config lock for writing. 977 * It returns the next transaction group for the spa_t. 978 / 979uint64_t 980spa_vdev_enter(spa_t spa) 981{ 982 mutex_enter(&spa->spa_vdev_top_lock); 983 mutex_enter(&spa_namespace_lock); 984 return (spa_vdev_config_enter(spa)); 985} 986 987/* 988 * Internal implementation for spa_vdev_enter(). Used when a vdev 989 * operation requires multiple syncs (i.e. removing a device) while 990 * keeping the spa_namespace_lock held. 991 / 992uint64_t 993spa_vdev_config_enter(spa_t spa) 994{ 995 ASSERT(MUTEX_HELD(&spa_namespace_lock)); 996 997 spa_config_enter(spa, SCL_ALL, spa, RW_WRITER); 998 999 return (spa_last_synced_txg(spa) + 1); 1000} 1001 1002/* 1003 * Used in combination with spa_vdev_config_enter() to allow the syncing 1004 * of multiple transactions without releasing the spa_namespace_lock. 1005 / 1006void 1007spa_vdev_config_exit(spa_t spa, vdev_t vd, uint64_t txg, int error, char tag) 1008{ 1009 ASSERT(MUTEX_HELD(&spa_namespace_lock)); 1010 1011 int config_changed = B_FALSE; 1012 1013 ASSERT(txg > spa_last_synced_txg(spa)); 1014 1015 spa->spa_pending_vdev = NULL; 1016 1017 /* 1018 * Reassess the DTLs. 1019 / 1020* vdev_dtl_reassess(spa->spa_root_vdev, 0, 0, B_FALSE); 1021 1022 if (error == 0 && !list_is_empty(&spa->spa_config_dirty_list)) { 1023 config_changed = B_TRUE; 1024 spa->spa_config_generation++; 1025 } 1026 1027 /* 1028 * Verify the metaslab classes. 1029 / 1030* ASSERT(metaslab_class_validate(spa_normal_class(spa)) == 0); 1031 ASSERT(metaslab_class_validate(spa_log_class(spa)) == 0); 1032 1033 spa_config_exit(spa, SCL_ALL, spa); 1034 1035 /* 1036 * Panic the system if the specified tag requires it. This 1037 * is useful for ensuring that configurations are updated 1038 * transactionally. 1039 / 1040* if (zio_injection_enabled) 1041 zio_handle_panic_injection(spa, tag, 0); 1042 1043 /* 1044 * Note: this txg_wait_synced() is important because it ensures 1045 * that there won't be more than one config change per txg. 1046 * This allows us to use the txg as the generation number. 1047 / 1048* if (error == 0) 1049 txg_wait_synced(spa->spa_dsl_pool, txg); 1050 1051 if (vd != NULL) { 1052 ASSERT(!vd->vdev_detached \|\| vd->vdev_dtl_smo.smo_object == 0); 1053 spa_config_enter(spa, SCL_ALL, spa, RW_WRITER); 1054 vdev_free(vd); 1055 spa_config_exit(spa, SCL_ALL, spa); 1056 } 1057 1058 /* 1059 * If the config changed, update the config cache. 1060 / 1061* if (config_changed) 1062 spa_config_sync(spa, B_FALSE, B_TRUE); 1063} 1064 1065/* 1066 * Unlock the spa_t after adding or removing a vdev. Besides undoing the 1067 * locking of spa_vdev_enter(), we also want make sure the transactions have 1068 * synced to disk, and then update the global configuration cache with the new 1069 * information. 1070 / 1071int 1072spa_vdev_exit(spa_t spa, vdev_t vd, uint64_t txg, int error) 1073{ 1074* spa_vdev_config_exit(spa, vd, txg, error, FTAG); 1075 mutex_exit(&spa_namespace_lock); 1076 mutex_exit(&spa->spa_vdev_top_lock); 1077 1078 return (error); 1079} 1080 1081/* 1082 * Lock the given spa_t for the purpose of changing vdev state. 1083 / 1084void 1085spa_vdev_state_enter(spa_t spa, int oplocks) 1086{ 1087 int locks = SCL_STATE_ALL \| oplocks; 1088 1089 /* 1090 * Root pools may need to read of the underlying devfs filesystem 1091 * when opening up a vdev. Unfortunately if we're holding the 1092 * SCL_ZIO lock it will result in a deadlock when we try to issue 1093 * the read from the root filesystem. Instead we "prefetch" 1094 * the associated vnodes that we need prior to opening the 1095 * underlying devices and cache them so that we can prevent 1096 * any I/O when we are doing the actual open. 1097 / 1098* if (spa_is_root(spa)) { 1099 int low = locks & ~(SCL_ZIO - 1); 1100 int high = locks & ~low; 1101 1102 spa_config_enter(spa, high, spa, RW_WRITER); 1103 vdev_hold(spa->spa_root_vdev); 1104 spa_config_enter(spa, low, spa, RW_WRITER); 1105 } else { 1106 spa_config_enter(spa, locks, spa, RW_WRITER); 1107 } 1108 spa->spa_vdev_locks = locks; 1109} 1110 1111int 1112spa_vdev_state_exit(spa_t spa, vdev_t vd, int error) 1113{ 1114 boolean_t config_changed = B_FALSE; 1115 1116 if (vd != NULL \|\| error == 0) 1117 vdev_dtl_reassess(vd ? vd->vdev_top : spa->spa_root_vdev, 1118 0, 0, B_FALSE); 1119 1120 if (vd != NULL) { 1121 vdev_state_dirty(vd->vdev_top); 1122 config_changed = B_TRUE; 1123 spa->spa_config_generation++; 1124 } 1125 1126 if (spa_is_root(spa)) 1127 vdev_rele(spa->spa_root_vdev); 1128 1129 ASSERT3U(spa->spa_vdev_locks, >=, SCL_STATE_ALL); 1130 spa_config_exit(spa, spa->spa_vdev_locks, spa); 1131 1132 /* 1133 * If anything changed, wait for it to sync. This ensures that, 1134 * from the system administrator's perspective, zpool(1M) commands 1135 * are synchronous. This is important for things like zpool offline: 1136 * when the command completes, you expect no further I/O from ZFS. 1137 / 1138* if (vd != NULL) 1139 txg_wait_synced(spa->spa_dsl_pool, 0); 1140 1141 /* 1142 * If the config changed, update the config cache. 1143 / 1144* if (config_changed) { 1145 mutex_enter(&spa_namespace_lock); 1146 spa_config_sync(spa, B_FALSE, B_TRUE); 1147 mutex_exit(&spa_namespace_lock); 1148 } 1149 1150 return (error); 1151} 1152 1153/* 1154 * ========================================================================== 1155 * Miscellaneous functions 1156 * ========================================================================== 1157 / 1158* 1159void 1160spa_activate_mos_feature(spa_t spa, const char feature) 1161{ 1162 (void) nvlist_add_boolean(spa->spa_label_features, feature); 1163 vdev_config_dirty(spa->spa_root_vdev); 1164} 1165 1166void 1167spa_deactivate_mos_feature(spa_t spa, const char feature) 1168{ 1169 (void) nvlist_remove_all(spa->spa_label_features, feature); 1170 vdev_config_dirty(spa->spa_root_vdev); 1171} 1172 1173/* 1174 * Rename a spa_t. 1175 / 1176int 1177spa_rename(const char name, const char newname) 1178{ 1179* spa_t spa; 1180* int err; 1181 1182 /* 1183 * Lookup the spa_t and grab the config lock for writing. We need to 1184 * actually open the pool so that we can sync out the necessary labels. 1185 * It's OK to call spa_open() with the namespace lock held because we 1186 * allow recursive calls for other reasons. 1187 / 1188* mutex_enter(&spa_namespace_lock); 1189 if ((err = spa_open(name, &spa, FTAG)) != 0) { 1190 mutex_exit(&spa_namespace_lock); 1191 return (err); 1192 } 1193 1194 spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 1195 1196 avl_remove(&spa_namespace_avl, spa); 1197 (void) strlcpy(spa->spa_name, newname, sizeof (spa->spa_name)); 1198 avl_add(&spa_namespace_avl, spa); 1199 1200 /* 1201 * Sync all labels to disk with the new names by marking the root vdev 1202 * dirty and waiting for it to sync. It will pick up the new pool name 1203 * during the sync. 1204 / 1205* vdev_config_dirty(spa->spa_root_vdev); 1206 1207 spa_config_exit(spa, SCL_ALL, FTAG); 1208 1209 txg_wait_synced(spa->spa_dsl_pool, 0); 1210 1211 /* 1212 * Sync the updated config cache. 1213 / 1214* spa_config_sync(spa, B_FALSE, B_TRUE); 1215 1216 spa_close(spa, FTAG); 1217 1218 mutex_exit(&spa_namespace_lock); 1219 1220 return (0); 1221} 1222 1223/* 1224 * Return the spa_t associated with given pool_guid, if it exists. If 1225 * device_guid is non-zero, determine whether the pool exists and contains 1226 * a device with the specified device_guid. 1227 / 1228spa_t 1229spa_by_guid(uint64_t pool_guid, uint64_t device_guid) 1230{ 1231 spa_t spa; 1232* avl_tree_t t = &spa_namespace_avl; 1233* 1234 ASSERT(MUTEX_HELD(&spa_namespace_lock)); 1235 1236 for (spa = avl_first(t); spa != NULL; spa = AVL_NEXT(t, spa)) { 1237 if (spa->spa_state == POOL_STATE_UNINITIALIZED) 1238 continue; 1239 if (spa->spa_root_vdev == NULL) 1240 continue; 1241 if (spa_guid(spa) == pool_guid) { 1242 if (device_guid == 0) 1243 break; 1244 1245 if (vdev_lookup_by_guid(spa->spa_root_vdev, 1246 device_guid) != NULL) 1247 break; 1248 1249 /* 1250 * Check any devices we may be in the process of adding. 1251 / 1252* if (spa->spa_pending_vdev) { 1253 if (vdev_lookup_by_guid(spa->spa_pending_vdev, 1254 device_guid) != NULL) 1255 break; 1256 } 1257 } 1258 } 1259 1260 return (spa); 1261} 1262 1263/* 1264 * Determine whether a pool with the given pool_guid exists. 1265 / 1266boolean_t 1267spa_guid_exists(uint64_t pool_guid, uint64_t device_guid) 1268{ 1269* return (spa_by_guid(pool_guid, device_guid) != NULL); 1270} 1271 1272char * 1273spa_strdup(const char s) 1274{ 1275* size_t len; 1276 char new; 1277* 1278 len = strlen(s); 1279 new = kmem_alloc(len + 1, KM_SLEEP); 1280 bcopy(s, new, len); 1281 new[len] = '\0'; 1282 1283 return (new); 1284} 1285 1286void 1287spa_strfree(char s) 1288{ 1289* kmem_free(s, strlen(s) + 1); 1290} 1291 1292uint64_t 1293spa_get_random(uint64_t range) 1294{ 1295 uint64_t r; 1296 1297 ASSERT(range != 0); 1298 1299 (void) random_get_pseudo_bytes((void )&r, sizeof (uint64_t)); 1300* 1301 return (r % range); 1302} 1303 1304uint64_t 1305spa_generate_guid(spa_t spa) 1306{ 1307* uint64_t guid = spa_get_random(-1ULL); 1308 1309 if (spa != NULL) { 1310 while (guid == 0 \|\| spa_guid_exists(spa_guid(spa), guid)) 1311 guid = spa_get_random(-1ULL); 1312 } else { 1313 while (guid == 0 \|\| spa_guid_exists(guid, 0)) 1314 guid = spa_get_random(-1ULL); 1315 } 1316 1317 return (guid); 1318} 1319 1320void 1321sprintf_blkptr(char buf, const blkptr_t bp) 1322{ 1323 char type[256]; 1324 char checksum = NULL; 1325* char compress = NULL; 1326* 1327 if (bp != NULL) { 1328 if (BP_GET_TYPE(bp) & DMU_OT_NEWTYPE) { 1329 dmu_object_byteswap_t bswap = 1330 DMU_OT_BYTESWAP(BP_GET_TYPE(bp)); 1331 (void) snprintf(type, sizeof (type), "bswap %s %s", 1332 DMU_OT_IS_METADATA(BP_GET_TYPE(bp)) ? 1333 "metadata" : "data", 1334 dmu_ot_byteswap[bswap].ob_name); 1335 } else { 1336 (void) strlcpy(type, dmu_ot[BP_GET_TYPE(bp)].ot_name, 1337 sizeof (type)); 1338 } 1339 checksum = zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_name; 1340 compress = zio_compress_table[BP_GET_COMPRESS(bp)].ci_name; 1341 } 1342 1343 SPRINTF_BLKPTR(snprintf, ' ', buf, bp, type, checksum, compress); 1344} 1345 1346void 1347spa_freeze(spa_t spa) 1348{ 1349* uint64_t freeze_txg = 0; 1350 1351 spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 1352 if (spa->spa_freeze_txg == UINT64_MAX) { 1353 freeze_txg = spa_last_synced_txg(spa) + TXG_SIZE; 1354 spa->spa_freeze_txg = freeze_txg; 1355 } 1356 spa_config_exit(spa, SCL_ALL, FTAG); 1357 if (freeze_txg != 0) 1358 txg_wait_synced(spa_get_dsl(spa), freeze_txg); 1359} 1360 1361void 1362zfs_panic_recover(const char fmt, ...) 1363{ 1364* va_list adx; 1365 1366 va_start(adx, fmt); 1367 vcmn_err(zfs_recover ? CE_WARN : CE_PANIC, fmt, adx); 1368 va_end(adx); 1369} 1370 1371/* 1372 * This is a stripped-down version of strtoull, suitable only for converting 1373 * lowercase hexidecimal numbers that don't overflow. 1374 / 1375uint64_t 1376zfs_strtonum(const char str, char *nptr) 1377{ 1378* uint64_t val = 0; 1379 char c; 1380 int digit; 1381 1382 while ((c = str) != '\0') { 1383* if (c >= '0' && c <= '9') 1384 digit = c - '0'; 1385 else if (c >= 'a' && c <= 'f') 1386 digit = 10 + c - 'a'; 1387 else 1388 break; 1389 1390 val = 16; 1391* val += digit; 1392 1393 str++; 1394 } 1395 1396 if (nptr) 1397 nptr = (char )str; 1398 1399 return (val); 1400} 1401 1402/* 1403 * ========================================================================== 1404 * Accessor functions 1405 * ========================================================================== 1406 / 1407* 1408boolean_t 1409spa_shutting_down(spa_t spa) 1410{ 1411* return (spa->spa_async_suspended); 1412} 1413 1414dsl_pool_t * 1415spa_get_dsl(spa_t spa) 1416{ 1417* return (spa->spa_dsl_pool); 1418} 1419 1420boolean_t 1421spa_is_initializing(spa_t spa) 1422{ 1423* return (spa->spa_is_initializing); 1424} 1425 1426blkptr_t * 1427spa_get_rootblkptr(spa_t spa) 1428{ 1429* return (&spa->spa_ubsync.ub_rootbp); 1430} 1431 1432void 1433spa_set_rootblkptr(spa_t spa, const blkptr_t bp) 1434{ 1435 spa->spa_uberblock.ub_rootbp = bp; 1436} 1437* 1438void 1439spa_altroot(spa_t spa, char buf, size_t buflen) 1440{ 1441 if (spa->spa_root == NULL) 1442 buf[0] = '\0'; 1443 else 1444 (void) strncpy(buf, spa->spa_root, buflen); 1445} 1446 1447int 1448spa_sync_pass(spa_t spa) 1449{ 1450* return (spa->spa_sync_pass); 1451} 1452 1453char * 1454spa_name(spa_t spa) 1455{ 1456* return (spa->spa_name); 1457} 1458 1459uint64_t 1460spa_guid(spa_t spa) 1461{ 1462* dsl_pool_t dp = spa_get_dsl(spa); 1463* uint64_t guid; 1464 1465 /* 1466 * If we fail to parse the config during spa_load(), we can go through 1467 * the error path (which posts an ereport) and end up here with no root 1468 * vdev. We stash the original pool guid in 'spa_config_guid' to handle 1469 * this case. 1470 / 1471* if (spa->spa_root_vdev == NULL) 1472 return (spa->spa_config_guid); 1473 1474 guid = spa->spa_last_synced_guid != 0 ? 1475 spa->spa_last_synced_guid : spa->spa_root_vdev->vdev_guid; 1476 1477 /* 1478 * Return the most recently synced out guid unless we're 1479 * in syncing context. 1480 / 1481* if (dp && dsl_pool_sync_context(dp)) 1482 return (spa->spa_root_vdev->vdev_guid); 1483 else 1484 return (guid); 1485} 1486 1487uint64_t 1488spa_load_guid(spa_t spa) 1489{ 1490* /* 1491 * This is a GUID that exists solely as a reference for the 1492 * purposes of the arc. It is generated at load time, and 1493 * is never written to persistent storage. 1494 / 1495* return (spa->spa_load_guid); 1496} 1497 1498uint64_t 1499spa_last_synced_txg(spa_t spa) 1500{ 1501* return (spa->spa_ubsync.ub_txg); 1502} 1503 1504uint64_t 1505spa_first_txg(spa_t spa) 1506{ 1507* return (spa->spa_first_txg); 1508} 1509 1510uint64_t 1511spa_syncing_txg(spa_t spa) 1512{ 1513* return (spa->spa_syncing_txg); 1514} 1515 1516pool_state_t 1517spa_state(spa_t spa) 1518{ 1519* return (spa->spa_state); 1520} 1521 1522spa_load_state_t 1523spa_load_state(spa_t spa) 1524{ 1525* return (spa->spa_load_state); 1526} 1527 1528uint64_t 1529spa_freeze_txg(spa_t spa) 1530{ 1531* return (spa->spa_freeze_txg); 1532} 1533 1534/* ARGSUSED / 1535uint64_t 1536spa_get_asize(spa_t spa, uint64_t lsize) 1537{ 1538 /* 1539 * The worst case is single-sector max-parity RAID-Z blocks, in which 1540 * case the space requirement is exactly (VDEV_RAIDZ_MAXPARITY + 1) 1541 * times the size; so just assume that. Add to this the fact that 1542 * we can have up to 3 DVAs per bp, and one more factor of 2 because 1543 * the block may be dittoed with up to 3 DVAs by ddt_sync(). 1544 / 1545* return (lsize * (VDEV_RAIDZ_MAXPARITY + 1) * SPA_DVAS_PER_BP * 2); 1546} 1547 1548uint64_t 1549spa_get_dspace(spa_t spa) 1550{ 1551* return (spa->spa_dspace); 1552} 1553 1554void 1555spa_update_dspace(spa_t spa) 1556{ 1557* spa->spa_dspace = metaslab_class_get_dspace(spa_normal_class(spa)) + 1558 ddt_get_dedup_dspace(spa); 1559} 1560 1561/* 1562 * Return the failure mode that has been set to this pool. The default 1563 * behavior will be to block all I/Os when a complete failure occurs. 1564 / 1565uint8_t 1566spa_get_failmode(spa_t spa) 1567{ 1568 return (spa->spa_failmode); 1569} 1570 1571boolean_t 1572spa_suspended(spa_t spa) 1573{ 1574* return (spa->spa_suspended); 1575} 1576 1577uint64_t 1578spa_version(spa_t spa) 1579{ 1580* return (spa->spa_ubsync.ub_version); 1581} 1582 1583boolean_t 1584spa_deflate(spa_t spa) 1585{ 1586* return (spa->spa_deflate); 1587} 1588 1589metaslab_class_t * 1590spa_normal_class(spa_t spa) 1591{ 1592* return (spa->spa_normal_class); 1593} 1594 1595metaslab_class_t * 1596spa_log_class(spa_t spa) 1597{ 1598* return (spa->spa_log_class); 1599} 1600 1601int 1602spa_max_replication(spa_t spa) 1603{ 1604* /* 1605 * As of SPA_VERSION == SPA_VERSION_DITTO_BLOCKS, we are able to 1606 * handle BPs with more than one DVA allocated. Set our max 1607 * replication level accordingly. 1608 / 1609* if (spa_version(spa) < SPA_VERSION_DITTO_BLOCKS) 1610 return (1); 1611 return (MIN(SPA_DVAS_PER_BP, spa_max_replication_override)); 1612} 1613 1614int 1615spa_prev_software_version(spa_t spa) 1616{ 1617* return (spa->spa_prev_software_version); 1618} 1619 1620uint64_t
	1621spa_deadman_synctime(spa_t spa) 1622{ 1623* return (spa->spa_deadman_synctime); 1624} 1625 1626uint64_t
1514dva_get_dsize_sync(spa_t spa, const dva_t dva) 1515{ 1516 uint64_t asize = DVA_GET_ASIZE(dva); 1517 uint64_t dsize = asize; 1518 1519 ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0); 1520 1521 if (asize != 0 && spa->spa_deflate) { 1522 vdev_t vd = vdev_lookup_top(spa, DVA_GET_VDEV(dva)); 1523* dsize = (asize >> SPA_MINBLOCKSHIFT) * vd->vdev_deflate_ratio; 1524 } 1525 1526 return (dsize); 1527} 1528 1529uint64_t 1530bp_get_dsize_sync(spa_t spa, const blkptr_t bp) 1531{ 1532 uint64_t dsize = 0; 1533 1534 for (int d = 0; d < SPA_DVAS_PER_BP; d++) 1535 dsize += dva_get_dsize_sync(spa, &bp->blk_dva[d]); 1536 1537 return (dsize); 1538} 1539 1540uint64_t 1541bp_get_dsize(spa_t spa, const blkptr_t bp) 1542{ 1543 uint64_t dsize = 0; 1544 1545 spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); 1546 1547 for (int d = 0; d < SPA_DVAS_PER_BP; d++) 1548 dsize += dva_get_dsize_sync(spa, &bp->blk_dva[d]); 1549 1550 spa_config_exit(spa, SCL_VDEV, FTAG); 1551 1552 return (dsize); 1553} 1554 1555/* 1556 * ========================================================================== 1557 * Initialization and Termination 1558 * ========================================================================== 1559 / 1560* 1561static int 1562spa_name_compare(const void a1, const void a2) 1563{ 1564 const spa_t s1 = a1; 1565* const spa_t s2 = a2; 1566* int s; 1567 1568 s = strcmp(s1->spa_name, s2->spa_name); 1569 if (s > 0) 1570 return (1); 1571 if (s < 0) 1572 return (-1); 1573 return (0); 1574} 1575 1576int 1577spa_busy(void) 1578{ 1579 return (spa_active_count); 1580} 1581 1582void 1583spa_boot_init() 1584{ 1585 spa_config_load(); 1586} 1587 1588void 1589spa_init(int mode) 1590{ 1591 mutex_init(&spa_namespace_lock, NULL, MUTEX_DEFAULT, NULL); 1592 mutex_init(&spa_spare_lock, NULL, MUTEX_DEFAULT, NULL); 1593 mutex_init(&spa_l2cache_lock, NULL, MUTEX_DEFAULT, NULL); 1594 cv_init(&spa_namespace_cv, NULL, CV_DEFAULT, NULL); 1595 1596 avl_create(&spa_namespace_avl, spa_name_compare, sizeof (spa_t), 1597 offsetof(spa_t, spa_avl)); 1598 1599 avl_create(&spa_spare_avl, spa_spare_compare, sizeof (spa_aux_t), 1600 offsetof(spa_aux_t, aux_avl)); 1601 1602 avl_create(&spa_l2cache_avl, spa_l2cache_compare, sizeof (spa_aux_t), 1603 offsetof(spa_aux_t, aux_avl)); 1604 1605 spa_mode_global = mode; 1606 1607#ifdef illumos	1627dva_get_dsize_sync(spa_t spa, const dva_t dva) 1628{ 1629 uint64_t asize = DVA_GET_ASIZE(dva); 1630 uint64_t dsize = asize; 1631 1632 ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0); 1633 1634 if (asize != 0 && spa->spa_deflate) { 1635 vdev_t vd = vdev_lookup_top(spa, DVA_GET_VDEV(dva)); 1636* dsize = (asize >> SPA_MINBLOCKSHIFT) * vd->vdev_deflate_ratio; 1637 } 1638 1639 return (dsize); 1640} 1641 1642uint64_t 1643bp_get_dsize_sync(spa_t spa, const blkptr_t bp) 1644{ 1645 uint64_t dsize = 0; 1646 1647 for (int d = 0; d < SPA_DVAS_PER_BP; d++) 1648 dsize += dva_get_dsize_sync(spa, &bp->blk_dva[d]); 1649 1650 return (dsize); 1651} 1652 1653uint64_t 1654bp_get_dsize(spa_t spa, const blkptr_t bp) 1655{ 1656 uint64_t dsize = 0; 1657 1658 spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); 1659 1660 for (int d = 0; d < SPA_DVAS_PER_BP; d++) 1661 dsize += dva_get_dsize_sync(spa, &bp->blk_dva[d]); 1662 1663 spa_config_exit(spa, SCL_VDEV, FTAG); 1664 1665 return (dsize); 1666} 1667 1668/* 1669 * ========================================================================== 1670 * Initialization and Termination 1671 * ========================================================================== 1672 / 1673* 1674static int 1675spa_name_compare(const void a1, const void a2) 1676{ 1677 const spa_t s1 = a1; 1678* const spa_t s2 = a2; 1679* int s; 1680 1681 s = strcmp(s1->spa_name, s2->spa_name); 1682 if (s > 0) 1683 return (1); 1684 if (s < 0) 1685 return (-1); 1686 return (0); 1687} 1688 1689int 1690spa_busy(void) 1691{ 1692 return (spa_active_count); 1693} 1694 1695void 1696spa_boot_init() 1697{ 1698 spa_config_load(); 1699} 1700 1701void 1702spa_init(int mode) 1703{ 1704 mutex_init(&spa_namespace_lock, NULL, MUTEX_DEFAULT, NULL); 1705 mutex_init(&spa_spare_lock, NULL, MUTEX_DEFAULT, NULL); 1706 mutex_init(&spa_l2cache_lock, NULL, MUTEX_DEFAULT, NULL); 1707 cv_init(&spa_namespace_cv, NULL, CV_DEFAULT, NULL); 1708 1709 avl_create(&spa_namespace_avl, spa_name_compare, sizeof (spa_t), 1710 offsetof(spa_t, spa_avl)); 1711 1712 avl_create(&spa_spare_avl, spa_spare_compare, sizeof (spa_aux_t), 1713 offsetof(spa_aux_t, aux_avl)); 1714 1715 avl_create(&spa_l2cache_avl, spa_l2cache_compare, sizeof (spa_aux_t), 1716 offsetof(spa_aux_t, aux_avl)); 1717 1718 spa_mode_global = mode; 1719 1720#ifdef illumos
1608#ifndef _KERNEL	1721#ifdef _KERNEL 1722 spa_arch_init(); 1723#else
1609 if (spa_mode_global != FREAD && dprintf_find_string("watch")) { 1610 arc_procfd = open("/proc/self/ctl", O_WRONLY); 1611 if (arc_procfd == -1) { 1612 perror("could not enable watchpoints: " 1613 "opening /proc/self/ctl failed: "); 1614 } else { 1615 arc_watch = B_TRUE; 1616 } 1617 } 1618#endif 1619#endif /* illumos / 1620* refcount_sysinit(); 1621 unique_init(); 1622 space_map_init(); 1623 zio_init(); 1624 dmu_init(); 1625 zil_init(); 1626 vdev_cache_stat_init(); 1627 zfs_prop_init(); 1628 zpool_prop_init(); 1629 zpool_feature_init(); 1630 spa_config_load(); 1631 l2arc_start();	1724 if (spa_mode_global != FREAD && dprintf_find_string("watch")) { 1725 arc_procfd = open("/proc/self/ctl", O_WRONLY); 1726 if (arc_procfd == -1) { 1727 perror("could not enable watchpoints: " 1728 "opening /proc/self/ctl failed: "); 1729 } else { 1730 arc_watch = B_TRUE; 1731 } 1732 } 1733#endif 1734#endif /* illumos / 1735* refcount_sysinit(); 1736 unique_init(); 1737 space_map_init(); 1738 zio_init(); 1739 dmu_init(); 1740 zil_init(); 1741 vdev_cache_stat_init(); 1742 zfs_prop_init(); 1743 zpool_prop_init(); 1744 zpool_feature_init(); 1745 spa_config_load(); 1746 l2arc_start();
	1747#ifndef illumos 1748#ifdef _KERNEL 1749 zfs_deadman_init(); 1750#endif 1751#endif /* !illumos */
1632} 1633 1634void 1635spa_fini(void) 1636{ 1637 l2arc_stop(); 1638 1639 spa_evict_all(); 1640 1641 vdev_cache_stat_fini(); 1642 zil_fini(); 1643 dmu_fini(); 1644 zio_fini(); 1645 space_map_fini(); 1646 unique_fini(); 1647 refcount_fini(); 1648 1649 avl_destroy(&spa_namespace_avl); 1650 avl_destroy(&spa_spare_avl); 1651 avl_destroy(&spa_l2cache_avl); 1652 1653 cv_destroy(&spa_namespace_cv); 1654 mutex_destroy(&spa_namespace_lock); 1655 mutex_destroy(&spa_spare_lock); 1656 mutex_destroy(&spa_l2cache_lock); 1657} 1658 1659/* 1660 * Return whether this pool has slogs. No locking needed. 1661 * It's not a problem if the wrong answer is returned as it's only for 1662 * performance and not correctness 1663 / 1664boolean_t 1665spa_has_slogs(spa_t spa) 1666{ 1667 return (spa->spa_log_class->mc_rotor != NULL); 1668} 1669 1670spa_log_state_t 1671spa_get_log_state(spa_t spa) 1672{ 1673* return (spa->spa_log_state); 1674} 1675 1676void 1677spa_set_log_state(spa_t spa, spa_log_state_t state) 1678{ 1679* spa->spa_log_state = state; 1680} 1681 1682boolean_t 1683spa_is_root(spa_t spa) 1684{ 1685* return (spa->spa_is_root); 1686} 1687 1688boolean_t 1689spa_writeable(spa_t spa) 1690{ 1691* return (!!(spa->spa_mode & FWRITE)); 1692} 1693 1694int 1695spa_mode(spa_t spa) 1696{ 1697* return (spa->spa_mode); 1698} 1699 1700uint64_t 1701spa_bootfs(spa_t spa) 1702{ 1703* return (spa->spa_bootfs); 1704} 1705 1706uint64_t 1707spa_delegation(spa_t spa) 1708{ 1709* return (spa->spa_delegation); 1710} 1711 1712objset_t * 1713spa_meta_objset(spa_t spa) 1714{ 1715* return (spa->spa_meta_objset); 1716} 1717 1718enum zio_checksum 1719spa_dedup_checksum(spa_t spa) 1720{ 1721* return (spa->spa_dedup_checksum); 1722} 1723 1724/* 1725 * Reset pool scan stat per scan pass (or reboot). 1726 / 1727void 1728spa_scan_stat_init(spa_t spa) 1729{ 1730 /* data not stored on disk / 1731* spa->spa_scan_pass_start = gethrestime_sec(); 1732 spa->spa_scan_pass_exam = 0; 1733 vdev_scan_stat_init(spa->spa_root_vdev); 1734} 1735 1736/* 1737 * Get scan stats for zpool status reports 1738 / 1739int 1740spa_scan_get_stats(spa_t spa, pool_scan_stat_t ps) 1741{ 1742* dsl_scan_t scn = spa->spa_dsl_pool ? spa->spa_dsl_pool->dp_scan : NULL; 1743* 1744 if (scn == NULL \|\| scn->scn_phys.scn_func == POOL_SCAN_NONE) 1745 return (ENOENT); 1746 bzero(ps, sizeof (pool_scan_stat_t)); 1747 1748 /* data stored on disk / 1749* ps->pss_func = scn->scn_phys.scn_func; 1750 ps->pss_start_time = scn->scn_phys.scn_start_time; 1751 ps->pss_end_time = scn->scn_phys.scn_end_time; 1752 ps->pss_to_examine = scn->scn_phys.scn_to_examine; 1753 ps->pss_examined = scn->scn_phys.scn_examined; 1754 ps->pss_to_process = scn->scn_phys.scn_to_process; 1755 ps->pss_processed = scn->scn_phys.scn_processed; 1756 ps->pss_errors = scn->scn_phys.scn_errors; 1757 ps->pss_state = scn->scn_phys.scn_state; 1758 1759 /* data not stored on disk / 1760* ps->pss_pass_start = spa->spa_scan_pass_start; 1761 ps->pss_pass_exam = spa->spa_scan_pass_exam; 1762 1763 return (0); 1764} 1765 1766boolean_t 1767spa_debug_enabled(spa_t spa) 1768{ 1769* return (spa->spa_debug); 1770}	1752} 1753 1754void 1755spa_fini(void) 1756{ 1757 l2arc_stop(); 1758 1759 spa_evict_all(); 1760 1761 vdev_cache_stat_fini(); 1762 zil_fini(); 1763 dmu_fini(); 1764 zio_fini(); 1765 space_map_fini(); 1766 unique_fini(); 1767 refcount_fini(); 1768 1769 avl_destroy(&spa_namespace_avl); 1770 avl_destroy(&spa_spare_avl); 1771 avl_destroy(&spa_l2cache_avl); 1772 1773 cv_destroy(&spa_namespace_cv); 1774 mutex_destroy(&spa_namespace_lock); 1775 mutex_destroy(&spa_spare_lock); 1776 mutex_destroy(&spa_l2cache_lock); 1777} 1778 1779/* 1780 * Return whether this pool has slogs. No locking needed. 1781 * It's not a problem if the wrong answer is returned as it's only for 1782 * performance and not correctness 1783 / 1784boolean_t 1785spa_has_slogs(spa_t spa) 1786{ 1787 return (spa->spa_log_class->mc_rotor != NULL); 1788} 1789 1790spa_log_state_t 1791spa_get_log_state(spa_t spa) 1792{ 1793* return (spa->spa_log_state); 1794} 1795 1796void 1797spa_set_log_state(spa_t spa, spa_log_state_t state) 1798{ 1799* spa->spa_log_state = state; 1800} 1801 1802boolean_t 1803spa_is_root(spa_t spa) 1804{ 1805* return (spa->spa_is_root); 1806} 1807 1808boolean_t 1809spa_writeable(spa_t spa) 1810{ 1811* return (!!(spa->spa_mode & FWRITE)); 1812} 1813 1814int 1815spa_mode(spa_t spa) 1816{ 1817* return (spa->spa_mode); 1818} 1819 1820uint64_t 1821spa_bootfs(spa_t spa) 1822{ 1823* return (spa->spa_bootfs); 1824} 1825 1826uint64_t 1827spa_delegation(spa_t spa) 1828{ 1829* return (spa->spa_delegation); 1830} 1831 1832objset_t * 1833spa_meta_objset(spa_t spa) 1834{ 1835* return (spa->spa_meta_objset); 1836} 1837 1838enum zio_checksum 1839spa_dedup_checksum(spa_t spa) 1840{ 1841* return (spa->spa_dedup_checksum); 1842} 1843 1844/* 1845 * Reset pool scan stat per scan pass (or reboot). 1846 / 1847void 1848spa_scan_stat_init(spa_t spa) 1849{ 1850 /* data not stored on disk / 1851* spa->spa_scan_pass_start = gethrestime_sec(); 1852 spa->spa_scan_pass_exam = 0; 1853 vdev_scan_stat_init(spa->spa_root_vdev); 1854} 1855 1856/* 1857 * Get scan stats for zpool status reports 1858 / 1859int 1860spa_scan_get_stats(spa_t spa, pool_scan_stat_t ps) 1861{ 1862* dsl_scan_t scn = spa->spa_dsl_pool ? spa->spa_dsl_pool->dp_scan : NULL; 1863* 1864 if (scn == NULL \|\| scn->scn_phys.scn_func == POOL_SCAN_NONE) 1865 return (ENOENT); 1866 bzero(ps, sizeof (pool_scan_stat_t)); 1867 1868 /* data stored on disk / 1869* ps->pss_func = scn->scn_phys.scn_func; 1870 ps->pss_start_time = scn->scn_phys.scn_start_time; 1871 ps->pss_end_time = scn->scn_phys.scn_end_time; 1872 ps->pss_to_examine = scn->scn_phys.scn_to_examine; 1873 ps->pss_examined = scn->scn_phys.scn_examined; 1874 ps->pss_to_process = scn->scn_phys.scn_to_process; 1875 ps->pss_processed = scn->scn_phys.scn_processed; 1876 ps->pss_errors = scn->scn_phys.scn_errors; 1877 ps->pss_state = scn->scn_phys.scn_state; 1878 1879 /* data not stored on disk / 1880* ps->pss_pass_start = spa->spa_scan_pass_start; 1881 ps->pss_pass_exam = spa->spa_scan_pass_exam; 1882 1883 return (0); 1884} 1885 1886boolean_t 1887spa_debug_enabled(spa_t spa) 1888{ 1889* return (spa->spa_debug); 1890}