spa.c revision 207670
1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd 22168404Spjd/* 23185029Spjd * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24168404Spjd * Use is subject to license terms. 25168404Spjd */ 26168404Spjd 27168404Spjd/* 28168404Spjd * This file contains all the routines used when modifying on-disk SPA state. 29168404Spjd * This includes opening, importing, destroying, exporting a pool, and syncing a 30168404Spjd * pool. 31168404Spjd */ 32168404Spjd 33168404Spjd#include <sys/zfs_context.h> 34168404Spjd#include <sys/fm/fs/zfs.h> 35168404Spjd#include <sys/spa_impl.h> 36168404Spjd#include <sys/zio.h> 37168404Spjd#include <sys/zio_checksum.h> 38168404Spjd#include <sys/zio_compress.h> 39168404Spjd#include <sys/dmu.h> 40168404Spjd#include <sys/dmu_tx.h> 41168404Spjd#include <sys/zap.h> 42168404Spjd#include <sys/zil.h> 43168404Spjd#include <sys/vdev_impl.h> 44168404Spjd#include <sys/metaslab.h> 45168404Spjd#include <sys/uberblock_impl.h> 46168404Spjd#include <sys/txg.h> 47168404Spjd#include <sys/avl.h> 48168404Spjd#include <sys/dmu_traverse.h> 49168404Spjd#include <sys/dmu_objset.h> 50168404Spjd#include <sys/unique.h> 51168404Spjd#include <sys/dsl_pool.h> 52168404Spjd#include <sys/dsl_dataset.h> 53168404Spjd#include <sys/dsl_dir.h> 54168404Spjd#include <sys/dsl_prop.h> 55168404Spjd#include <sys/dsl_synctask.h> 56168404Spjd#include <sys/fs/zfs.h> 57185029Spjd#include <sys/arc.h> 58168404Spjd#include <sys/callb.h> 59168962Spjd#include <sys/sunddi.h> 60185029Spjd#include <sys/spa_boot.h> 61168404Spjd 62185029Spjd#include "zfs_prop.h" 63185029Spjd#include "zfs_comutil.h" 64168404Spjd 65204073Spjd/* Check hostid on import? */ 66204073Spjdstatic int check_hostid = 1; 67204073Spjd 68204073SpjdSYSCTL_DECL(_vfs_zfs); 69204073SpjdTUNABLE_INT("vfs.zfs.check_hostid", &check_hostid); 70204073SpjdSYSCTL_INT(_vfs_zfs, OID_AUTO, check_hostid, CTLFLAG_RW, &check_hostid, 0, 71204073Spjd "Check hostid on import?"); 72204073Spjd 73185029Spjdint zio_taskq_threads[ZIO_TYPES][ZIO_TASKQ_TYPES] = { 74185029Spjd /* ISSUE INTR */ 75185029Spjd { 1, 1 }, /* ZIO_TYPE_NULL */ 76185029Spjd { 1, 8 }, /* ZIO_TYPE_READ */ 77185029Spjd { 8, 1 }, /* ZIO_TYPE_WRITE */ 78185029Spjd { 1, 1 }, /* ZIO_TYPE_FREE */ 79185029Spjd { 1, 1 }, /* ZIO_TYPE_CLAIM */ 80185029Spjd { 1, 1 }, /* ZIO_TYPE_IOCTL */ 81185029Spjd}; 82168712Spjd 83185029Spjdstatic void spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx); 84185029Spjdstatic boolean_t spa_has_active_shared_spare(spa_t *spa); 85185029Spjd 86168404Spjd/* 87168404Spjd * ========================================================================== 88185029Spjd * SPA properties routines 89185029Spjd * ========================================================================== 90185029Spjd */ 91185029Spjd 92185029Spjd/* 93185029Spjd * Add a (source=src, propname=propval) list to an nvlist. 94185029Spjd */ 95185029Spjdstatic void 96185029Spjdspa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval, 97185029Spjd uint64_t intval, zprop_source_t src) 98185029Spjd{ 99185029Spjd const char *propname = zpool_prop_to_name(prop); 100185029Spjd nvlist_t *propval; 101185029Spjd 102185029Spjd VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); 103185029Spjd VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0); 104185029Spjd 105185029Spjd if (strval != NULL) 106185029Spjd VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0); 107185029Spjd else 108185029Spjd VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, intval) == 0); 109185029Spjd 110185029Spjd VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0); 111185029Spjd nvlist_free(propval); 112185029Spjd} 113185029Spjd 114185029Spjd/* 115185029Spjd * Get property values from the spa configuration. 116185029Spjd */ 117185029Spjdstatic void 118185029Spjdspa_prop_get_config(spa_t *spa, nvlist_t **nvp) 119185029Spjd{ 120185029Spjd uint64_t size = spa_get_space(spa); 121185029Spjd uint64_t used = spa_get_alloc(spa); 122185029Spjd uint64_t cap, version; 123185029Spjd zprop_source_t src = ZPROP_SRC_NONE; 124185029Spjd spa_config_dirent_t *dp; 125185029Spjd 126185029Spjd ASSERT(MUTEX_HELD(&spa->spa_props_lock)); 127185029Spjd 128185029Spjd /* 129185029Spjd * readonly properties 130185029Spjd */ 131185029Spjd spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src); 132185029Spjd spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src); 133185029Spjd spa_prop_add_list(*nvp, ZPOOL_PROP_USED, NULL, used, src); 134185029Spjd spa_prop_add_list(*nvp, ZPOOL_PROP_AVAILABLE, NULL, size - used, src); 135185029Spjd 136185029Spjd cap = (size == 0) ? 0 : (used * 100 / size); 137185029Spjd spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src); 138185029Spjd 139185029Spjd spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src); 140185029Spjd spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL, 141185029Spjd spa->spa_root_vdev->vdev_state, src); 142185029Spjd 143185029Spjd /* 144185029Spjd * settable properties that are not stored in the pool property object. 145185029Spjd */ 146185029Spjd version = spa_version(spa); 147185029Spjd if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION)) 148185029Spjd src = ZPROP_SRC_DEFAULT; 149185029Spjd else 150185029Spjd src = ZPROP_SRC_LOCAL; 151185029Spjd spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, version, src); 152185029Spjd 153185029Spjd if (spa->spa_root != NULL) 154185029Spjd spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root, 155185029Spjd 0, ZPROP_SRC_LOCAL); 156185029Spjd 157185029Spjd if ((dp = list_head(&spa->spa_config_list)) != NULL) { 158185029Spjd if (dp->scd_path == NULL) { 159185029Spjd spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, 160185029Spjd "none", 0, ZPROP_SRC_LOCAL); 161185029Spjd } else if (strcmp(dp->scd_path, spa_config_path) != 0) { 162185029Spjd spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, 163185029Spjd dp->scd_path, 0, ZPROP_SRC_LOCAL); 164185029Spjd } 165185029Spjd } 166185029Spjd} 167185029Spjd 168185029Spjd/* 169185029Spjd * Get zpool property values. 170185029Spjd */ 171185029Spjdint 172185029Spjdspa_prop_get(spa_t *spa, nvlist_t **nvp) 173185029Spjd{ 174185029Spjd zap_cursor_t zc; 175185029Spjd zap_attribute_t za; 176185029Spjd objset_t *mos = spa->spa_meta_objset; 177185029Spjd int err; 178185029Spjd 179185029Spjd VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); 180185029Spjd 181185029Spjd mutex_enter(&spa->spa_props_lock); 182185029Spjd 183185029Spjd /* 184185029Spjd * Get properties from the spa config. 185185029Spjd */ 186185029Spjd spa_prop_get_config(spa, nvp); 187185029Spjd 188185029Spjd /* If no pool property object, no more prop to get. */ 189185029Spjd if (spa->spa_pool_props_object == 0) { 190185029Spjd mutex_exit(&spa->spa_props_lock); 191185029Spjd return (0); 192185029Spjd } 193185029Spjd 194185029Spjd /* 195185029Spjd * Get properties from the MOS pool property object. 196185029Spjd */ 197185029Spjd for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object); 198185029Spjd (err = zap_cursor_retrieve(&zc, &za)) == 0; 199185029Spjd zap_cursor_advance(&zc)) { 200185029Spjd uint64_t intval = 0; 201185029Spjd char *strval = NULL; 202185029Spjd zprop_source_t src = ZPROP_SRC_DEFAULT; 203185029Spjd zpool_prop_t prop; 204185029Spjd 205185029Spjd if ((prop = zpool_name_to_prop(za.za_name)) == ZPROP_INVAL) 206185029Spjd continue; 207185029Spjd 208185029Spjd switch (za.za_integer_length) { 209185029Spjd case 8: 210185029Spjd /* integer property */ 211185029Spjd if (za.za_first_integer != 212185029Spjd zpool_prop_default_numeric(prop)) 213185029Spjd src = ZPROP_SRC_LOCAL; 214185029Spjd 215185029Spjd if (prop == ZPOOL_PROP_BOOTFS) { 216185029Spjd dsl_pool_t *dp; 217185029Spjd dsl_dataset_t *ds = NULL; 218185029Spjd 219185029Spjd dp = spa_get_dsl(spa); 220185029Spjd rw_enter(&dp->dp_config_rwlock, RW_READER); 221185029Spjd if (err = dsl_dataset_hold_obj(dp, 222185029Spjd za.za_first_integer, FTAG, &ds)) { 223185029Spjd rw_exit(&dp->dp_config_rwlock); 224185029Spjd break; 225185029Spjd } 226185029Spjd 227185029Spjd strval = kmem_alloc( 228185029Spjd MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, 229185029Spjd KM_SLEEP); 230185029Spjd dsl_dataset_name(ds, strval); 231185029Spjd dsl_dataset_rele(ds, FTAG); 232185029Spjd rw_exit(&dp->dp_config_rwlock); 233185029Spjd } else { 234185029Spjd strval = NULL; 235185029Spjd intval = za.za_first_integer; 236185029Spjd } 237185029Spjd 238185029Spjd spa_prop_add_list(*nvp, prop, strval, intval, src); 239185029Spjd 240185029Spjd if (strval != NULL) 241185029Spjd kmem_free(strval, 242185029Spjd MAXNAMELEN + strlen(MOS_DIR_NAME) + 1); 243185029Spjd 244185029Spjd break; 245185029Spjd 246185029Spjd case 1: 247185029Spjd /* string property */ 248185029Spjd strval = kmem_alloc(za.za_num_integers, KM_SLEEP); 249185029Spjd err = zap_lookup(mos, spa->spa_pool_props_object, 250185029Spjd za.za_name, 1, za.za_num_integers, strval); 251185029Spjd if (err) { 252185029Spjd kmem_free(strval, za.za_num_integers); 253185029Spjd break; 254185029Spjd } 255185029Spjd spa_prop_add_list(*nvp, prop, strval, 0, src); 256185029Spjd kmem_free(strval, za.za_num_integers); 257185029Spjd break; 258185029Spjd 259185029Spjd default: 260185029Spjd break; 261185029Spjd } 262185029Spjd } 263185029Spjd zap_cursor_fini(&zc); 264185029Spjd mutex_exit(&spa->spa_props_lock); 265185029Spjdout: 266185029Spjd if (err && err != ENOENT) { 267185029Spjd nvlist_free(*nvp); 268185029Spjd *nvp = NULL; 269185029Spjd return (err); 270185029Spjd } 271185029Spjd 272185029Spjd return (0); 273185029Spjd} 274185029Spjd 275185029Spjd/* 276185029Spjd * Validate the given pool properties nvlist and modify the list 277185029Spjd * for the property values to be set. 278185029Spjd */ 279185029Spjdstatic int 280185029Spjdspa_prop_validate(spa_t *spa, nvlist_t *props) 281185029Spjd{ 282185029Spjd nvpair_t *elem; 283185029Spjd int error = 0, reset_bootfs = 0; 284185029Spjd uint64_t objnum; 285185029Spjd 286185029Spjd elem = NULL; 287185029Spjd while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { 288185029Spjd zpool_prop_t prop; 289185029Spjd char *propname, *strval; 290185029Spjd uint64_t intval; 291185029Spjd objset_t *os; 292185029Spjd char *slash; 293185029Spjd 294185029Spjd propname = nvpair_name(elem); 295185029Spjd 296185029Spjd if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL) 297185029Spjd return (EINVAL); 298185029Spjd 299185029Spjd switch (prop) { 300185029Spjd case ZPOOL_PROP_VERSION: 301185029Spjd error = nvpair_value_uint64(elem, &intval); 302185029Spjd if (!error && 303185029Spjd (intval < spa_version(spa) || intval > SPA_VERSION)) 304185029Spjd error = EINVAL; 305185029Spjd break; 306185029Spjd 307185029Spjd case ZPOOL_PROP_DELEGATION: 308185029Spjd case ZPOOL_PROP_AUTOREPLACE: 309185029Spjd case ZPOOL_PROP_LISTSNAPS: 310185029Spjd error = nvpair_value_uint64(elem, &intval); 311185029Spjd if (!error && intval > 1) 312185029Spjd error = EINVAL; 313185029Spjd break; 314185029Spjd 315185029Spjd case ZPOOL_PROP_BOOTFS: 316185029Spjd if (spa_version(spa) < SPA_VERSION_BOOTFS) { 317185029Spjd error = ENOTSUP; 318185029Spjd break; 319185029Spjd } 320185029Spjd 321185029Spjd /* 322185029Spjd * Make sure the vdev config is bootable 323185029Spjd */ 324185029Spjd if (!vdev_is_bootable(spa->spa_root_vdev)) { 325185029Spjd error = ENOTSUP; 326185029Spjd break; 327185029Spjd } 328185029Spjd 329185029Spjd reset_bootfs = 1; 330185029Spjd 331185029Spjd error = nvpair_value_string(elem, &strval); 332185029Spjd 333185029Spjd if (!error) { 334185029Spjd uint64_t compress; 335185029Spjd 336185029Spjd if (strval == NULL || strval[0] == '\0') { 337185029Spjd objnum = zpool_prop_default_numeric( 338185029Spjd ZPOOL_PROP_BOOTFS); 339185029Spjd break; 340185029Spjd } 341185029Spjd 342185029Spjd if (error = dmu_objset_open(strval, DMU_OST_ZFS, 343185029Spjd DS_MODE_USER | DS_MODE_READONLY, &os)) 344185029Spjd break; 345185029Spjd 346185029Spjd /* We don't support gzip bootable datasets */ 347185029Spjd if ((error = dsl_prop_get_integer(strval, 348185029Spjd zfs_prop_to_name(ZFS_PROP_COMPRESSION), 349185029Spjd &compress, NULL)) == 0 && 350185029Spjd !BOOTFS_COMPRESS_VALID(compress)) { 351185029Spjd error = ENOTSUP; 352185029Spjd } else { 353185029Spjd objnum = dmu_objset_id(os); 354185029Spjd } 355185029Spjd dmu_objset_close(os); 356185029Spjd } 357185029Spjd break; 358185029Spjd 359185029Spjd case ZPOOL_PROP_FAILUREMODE: 360185029Spjd error = nvpair_value_uint64(elem, &intval); 361185029Spjd if (!error && (intval < ZIO_FAILURE_MODE_WAIT || 362185029Spjd intval > ZIO_FAILURE_MODE_PANIC)) 363185029Spjd error = EINVAL; 364185029Spjd 365185029Spjd /* 366185029Spjd * This is a special case which only occurs when 367185029Spjd * the pool has completely failed. This allows 368185029Spjd * the user to change the in-core failmode property 369185029Spjd * without syncing it out to disk (I/Os might 370185029Spjd * currently be blocked). We do this by returning 371185029Spjd * EIO to the caller (spa_prop_set) to trick it 372185029Spjd * into thinking we encountered a property validation 373185029Spjd * error. 374185029Spjd */ 375185029Spjd if (!error && spa_suspended(spa)) { 376185029Spjd spa->spa_failmode = intval; 377185029Spjd error = EIO; 378185029Spjd } 379185029Spjd break; 380185029Spjd 381185029Spjd case ZPOOL_PROP_CACHEFILE: 382185029Spjd if ((error = nvpair_value_string(elem, &strval)) != 0) 383185029Spjd break; 384185029Spjd 385185029Spjd if (strval[0] == '\0') 386185029Spjd break; 387185029Spjd 388185029Spjd if (strcmp(strval, "none") == 0) 389185029Spjd break; 390185029Spjd 391185029Spjd if (strval[0] != '/') { 392185029Spjd error = EINVAL; 393185029Spjd break; 394185029Spjd } 395185029Spjd 396185029Spjd slash = strrchr(strval, '/'); 397185029Spjd ASSERT(slash != NULL); 398185029Spjd 399185029Spjd if (slash[1] == '\0' || strcmp(slash, "/.") == 0 || 400185029Spjd strcmp(slash, "/..") == 0) 401185029Spjd error = EINVAL; 402185029Spjd break; 403185029Spjd } 404185029Spjd 405185029Spjd if (error) 406185029Spjd break; 407185029Spjd } 408185029Spjd 409185029Spjd if (!error && reset_bootfs) { 410185029Spjd error = nvlist_remove(props, 411185029Spjd zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING); 412185029Spjd 413185029Spjd if (!error) { 414185029Spjd error = nvlist_add_uint64(props, 415185029Spjd zpool_prop_to_name(ZPOOL_PROP_BOOTFS), objnum); 416185029Spjd } 417185029Spjd } 418185029Spjd 419185029Spjd return (error); 420185029Spjd} 421185029Spjd 422185029Spjdint 423185029Spjdspa_prop_set(spa_t *spa, nvlist_t *nvp) 424185029Spjd{ 425185029Spjd int error; 426185029Spjd 427185029Spjd if ((error = spa_prop_validate(spa, nvp)) != 0) 428185029Spjd return (error); 429185029Spjd 430185029Spjd return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props, 431185029Spjd spa, nvp, 3)); 432185029Spjd} 433185029Spjd 434185029Spjd/* 435185029Spjd * If the bootfs property value is dsobj, clear it. 436185029Spjd */ 437185029Spjdvoid 438185029Spjdspa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx) 439185029Spjd{ 440185029Spjd if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) { 441185029Spjd VERIFY(zap_remove(spa->spa_meta_objset, 442185029Spjd spa->spa_pool_props_object, 443185029Spjd zpool_prop_to_name(ZPOOL_PROP_BOOTFS), tx) == 0); 444185029Spjd spa->spa_bootfs = 0; 445185029Spjd } 446185029Spjd} 447185029Spjd 448185029Spjd/* 449185029Spjd * ========================================================================== 450168404Spjd * SPA state manipulation (open/create/destroy/import/export) 451168404Spjd * ========================================================================== 452168404Spjd */ 453168404Spjd 454168404Spjdstatic int 455168404Spjdspa_error_entry_compare(const void *a, const void *b) 456168404Spjd{ 457168404Spjd spa_error_entry_t *sa = (spa_error_entry_t *)a; 458168404Spjd spa_error_entry_t *sb = (spa_error_entry_t *)b; 459168404Spjd int ret; 460168404Spjd 461168404Spjd ret = bcmp(&sa->se_bookmark, &sb->se_bookmark, 462168404Spjd sizeof (zbookmark_t)); 463168404Spjd 464168404Spjd if (ret < 0) 465168404Spjd return (-1); 466168404Spjd else if (ret > 0) 467168404Spjd return (1); 468168404Spjd else 469168404Spjd return (0); 470168404Spjd} 471168404Spjd 472168404Spjd/* 473168404Spjd * Utility function which retrieves copies of the current logs and 474168404Spjd * re-initializes them in the process. 475168404Spjd */ 476168404Spjdvoid 477168404Spjdspa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub) 478168404Spjd{ 479168404Spjd ASSERT(MUTEX_HELD(&spa->spa_errlist_lock)); 480168404Spjd 481168404Spjd bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t)); 482168404Spjd bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t)); 483168404Spjd 484168404Spjd avl_create(&spa->spa_errlist_scrub, 485168404Spjd spa_error_entry_compare, sizeof (spa_error_entry_t), 486168404Spjd offsetof(spa_error_entry_t, se_avl)); 487168404Spjd avl_create(&spa->spa_errlist_last, 488168404Spjd spa_error_entry_compare, sizeof (spa_error_entry_t), 489168404Spjd offsetof(spa_error_entry_t, se_avl)); 490168404Spjd} 491168404Spjd 492168404Spjd/* 493168404Spjd * Activate an uninitialized pool. 494168404Spjd */ 495168404Spjdstatic void 496168404Spjdspa_activate(spa_t *spa) 497168404Spjd{ 498168404Spjd 499168404Spjd ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); 500168404Spjd 501168404Spjd spa->spa_state = POOL_STATE_ACTIVE; 502168404Spjd 503168404Spjd spa->spa_normal_class = metaslab_class_create(); 504185029Spjd spa->spa_log_class = metaslab_class_create(); 505168404Spjd 506185029Spjd for (int t = 0; t < ZIO_TYPES; t++) { 507185029Spjd for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { 508185029Spjd spa->spa_zio_taskq[t][q] = taskq_create("spa_zio", 509185029Spjd zio_taskq_threads[t][q], maxclsyspri, 50, 510185029Spjd INT_MAX, TASKQ_PREPOPULATE); 511185029Spjd } 512168404Spjd } 513168404Spjd 514185029Spjd list_create(&spa->spa_config_dirty_list, sizeof (vdev_t), 515185029Spjd offsetof(vdev_t, vdev_config_dirty_node)); 516185029Spjd list_create(&spa->spa_state_dirty_list, sizeof (vdev_t), 517185029Spjd offsetof(vdev_t, vdev_state_dirty_node)); 518168404Spjd 519168404Spjd txg_list_create(&spa->spa_vdev_txg_list, 520168404Spjd offsetof(struct vdev, vdev_txg_node)); 521168404Spjd 522168404Spjd avl_create(&spa->spa_errlist_scrub, 523168404Spjd spa_error_entry_compare, sizeof (spa_error_entry_t), 524168404Spjd offsetof(spa_error_entry_t, se_avl)); 525168404Spjd avl_create(&spa->spa_errlist_last, 526168404Spjd spa_error_entry_compare, sizeof (spa_error_entry_t), 527168404Spjd offsetof(spa_error_entry_t, se_avl)); 528168404Spjd} 529168404Spjd 530168404Spjd/* 531168404Spjd * Opposite of spa_activate(). 532168404Spjd */ 533168404Spjdstatic void 534168404Spjdspa_deactivate(spa_t *spa) 535168404Spjd{ 536168404Spjd ASSERT(spa->spa_sync_on == B_FALSE); 537168404Spjd ASSERT(spa->spa_dsl_pool == NULL); 538168404Spjd ASSERT(spa->spa_root_vdev == NULL); 539168404Spjd 540168404Spjd ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED); 541168404Spjd 542168404Spjd txg_list_destroy(&spa->spa_vdev_txg_list); 543168404Spjd 544185029Spjd list_destroy(&spa->spa_config_dirty_list); 545185029Spjd list_destroy(&spa->spa_state_dirty_list); 546168404Spjd 547185029Spjd for (int t = 0; t < ZIO_TYPES; t++) { 548185029Spjd for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { 549185029Spjd taskq_destroy(spa->spa_zio_taskq[t][q]); 550185029Spjd spa->spa_zio_taskq[t][q] = NULL; 551185029Spjd } 552168404Spjd } 553168404Spjd 554168404Spjd metaslab_class_destroy(spa->spa_normal_class); 555168404Spjd spa->spa_normal_class = NULL; 556168404Spjd 557185029Spjd metaslab_class_destroy(spa->spa_log_class); 558185029Spjd spa->spa_log_class = NULL; 559185029Spjd 560168404Spjd /* 561168404Spjd * If this was part of an import or the open otherwise failed, we may 562168404Spjd * still have errors left in the queues. Empty them just in case. 563168404Spjd */ 564168404Spjd spa_errlog_drain(spa); 565168404Spjd 566168404Spjd avl_destroy(&spa->spa_errlist_scrub); 567168404Spjd avl_destroy(&spa->spa_errlist_last); 568168404Spjd 569168404Spjd spa->spa_state = POOL_STATE_UNINITIALIZED; 570168404Spjd} 571168404Spjd 572168404Spjd/* 573168404Spjd * Verify a pool configuration, and construct the vdev tree appropriately. This 574168404Spjd * will create all the necessary vdevs in the appropriate layout, with each vdev 575168404Spjd * in the CLOSED state. This will prep the pool before open/creation/import. 576168404Spjd * All vdev validation is done by the vdev_alloc() routine. 577168404Spjd */ 578168404Spjdstatic int 579168404Spjdspa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, 580168404Spjd uint_t id, int atype) 581168404Spjd{ 582168404Spjd nvlist_t **child; 583168404Spjd uint_t c, children; 584168404Spjd int error; 585168404Spjd 586168404Spjd if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0) 587168404Spjd return (error); 588168404Spjd 589168404Spjd if ((*vdp)->vdev_ops->vdev_op_leaf) 590168404Spjd return (0); 591168404Spjd 592185029Spjd error = nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 593185029Spjd &child, &children); 594185029Spjd 595185029Spjd if (error == ENOENT) 596185029Spjd return (0); 597185029Spjd 598185029Spjd if (error) { 599168404Spjd vdev_free(*vdp); 600168404Spjd *vdp = NULL; 601168404Spjd return (EINVAL); 602168404Spjd } 603168404Spjd 604168404Spjd for (c = 0; c < children; c++) { 605168404Spjd vdev_t *vd; 606168404Spjd if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c, 607168404Spjd atype)) != 0) { 608168404Spjd vdev_free(*vdp); 609168404Spjd *vdp = NULL; 610168404Spjd return (error); 611168404Spjd } 612168404Spjd } 613168404Spjd 614168404Spjd ASSERT(*vdp != NULL); 615168404Spjd 616168404Spjd return (0); 617168404Spjd} 618168404Spjd 619168404Spjd/* 620168404Spjd * Opposite of spa_load(). 621168404Spjd */ 622168404Spjdstatic void 623168404Spjdspa_unload(spa_t *spa) 624168404Spjd{ 625168404Spjd int i; 626168404Spjd 627185029Spjd ASSERT(MUTEX_HELD(&spa_namespace_lock)); 628185029Spjd 629168404Spjd /* 630168404Spjd * Stop async tasks. 631168404Spjd */ 632168404Spjd spa_async_suspend(spa); 633168404Spjd 634168404Spjd /* 635168404Spjd * Stop syncing. 636168404Spjd */ 637168404Spjd if (spa->spa_sync_on) { 638168404Spjd txg_sync_stop(spa->spa_dsl_pool); 639168404Spjd spa->spa_sync_on = B_FALSE; 640168404Spjd } 641168404Spjd 642168404Spjd /* 643185029Spjd * Wait for any outstanding async I/O to complete. 644168404Spjd */ 645185029Spjd mutex_enter(&spa->spa_async_root_lock); 646185029Spjd while (spa->spa_async_root_count != 0) 647185029Spjd cv_wait(&spa->spa_async_root_cv, &spa->spa_async_root_lock); 648185029Spjd mutex_exit(&spa->spa_async_root_lock); 649168404Spjd 650168404Spjd /* 651185029Spjd * Drop and purge level 2 cache 652185029Spjd */ 653185029Spjd spa_l2cache_drop(spa); 654185029Spjd 655185029Spjd /* 656168404Spjd * Close the dsl pool. 657168404Spjd */ 658168404Spjd if (spa->spa_dsl_pool) { 659168404Spjd dsl_pool_close(spa->spa_dsl_pool); 660168404Spjd spa->spa_dsl_pool = NULL; 661168404Spjd } 662168404Spjd 663168404Spjd /* 664168404Spjd * Close all vdevs. 665168404Spjd */ 666168404Spjd if (spa->spa_root_vdev) 667168404Spjd vdev_free(spa->spa_root_vdev); 668168404Spjd ASSERT(spa->spa_root_vdev == NULL); 669168404Spjd 670185029Spjd for (i = 0; i < spa->spa_spares.sav_count; i++) 671185029Spjd vdev_free(spa->spa_spares.sav_vdevs[i]); 672185029Spjd if (spa->spa_spares.sav_vdevs) { 673185029Spjd kmem_free(spa->spa_spares.sav_vdevs, 674185029Spjd spa->spa_spares.sav_count * sizeof (void *)); 675185029Spjd spa->spa_spares.sav_vdevs = NULL; 676168404Spjd } 677185029Spjd if (spa->spa_spares.sav_config) { 678185029Spjd nvlist_free(spa->spa_spares.sav_config); 679185029Spjd spa->spa_spares.sav_config = NULL; 680168404Spjd } 681185029Spjd spa->spa_spares.sav_count = 0; 682168404Spjd 683185029Spjd for (i = 0; i < spa->spa_l2cache.sav_count; i++) 684185029Spjd vdev_free(spa->spa_l2cache.sav_vdevs[i]); 685185029Spjd if (spa->spa_l2cache.sav_vdevs) { 686185029Spjd kmem_free(spa->spa_l2cache.sav_vdevs, 687185029Spjd spa->spa_l2cache.sav_count * sizeof (void *)); 688185029Spjd spa->spa_l2cache.sav_vdevs = NULL; 689185029Spjd } 690185029Spjd if (spa->spa_l2cache.sav_config) { 691185029Spjd nvlist_free(spa->spa_l2cache.sav_config); 692185029Spjd spa->spa_l2cache.sav_config = NULL; 693185029Spjd } 694185029Spjd spa->spa_l2cache.sav_count = 0; 695185029Spjd 696168404Spjd spa->spa_async_suspended = 0; 697168404Spjd} 698168404Spjd 699168404Spjd/* 700168404Spjd * Load (or re-load) the current list of vdevs describing the active spares for 701168404Spjd * this pool. When this is called, we have some form of basic information in 702185029Spjd * 'spa_spares.sav_config'. We parse this into vdevs, try to open them, and 703185029Spjd * then re-generate a more complete list including status information. 704168404Spjd */ 705168404Spjdstatic void 706168404Spjdspa_load_spares(spa_t *spa) 707168404Spjd{ 708168404Spjd nvlist_t **spares; 709168404Spjd uint_t nspares; 710168404Spjd int i; 711168404Spjd vdev_t *vd, *tvd; 712168404Spjd 713185029Spjd ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 714185029Spjd 715168404Spjd /* 716168404Spjd * First, close and free any existing spare vdevs. 717168404Spjd */ 718185029Spjd for (i = 0; i < spa->spa_spares.sav_count; i++) { 719185029Spjd vd = spa->spa_spares.sav_vdevs[i]; 720168404Spjd 721168404Spjd /* Undo the call to spa_activate() below */ 722185029Spjd if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, 723185029Spjd B_FALSE)) != NULL && tvd->vdev_isspare) 724168404Spjd spa_spare_remove(tvd); 725168404Spjd vdev_close(vd); 726168404Spjd vdev_free(vd); 727168404Spjd } 728168404Spjd 729185029Spjd if (spa->spa_spares.sav_vdevs) 730185029Spjd kmem_free(spa->spa_spares.sav_vdevs, 731185029Spjd spa->spa_spares.sav_count * sizeof (void *)); 732168404Spjd 733185029Spjd if (spa->spa_spares.sav_config == NULL) 734168404Spjd nspares = 0; 735168404Spjd else 736185029Spjd VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 737168404Spjd ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 738168404Spjd 739185029Spjd spa->spa_spares.sav_count = (int)nspares; 740185029Spjd spa->spa_spares.sav_vdevs = NULL; 741168404Spjd 742168404Spjd if (nspares == 0) 743168404Spjd return; 744168404Spjd 745168404Spjd /* 746168404Spjd * Construct the array of vdevs, opening them to get status in the 747168404Spjd * process. For each spare, there is potentially two different vdev_t 748168404Spjd * structures associated with it: one in the list of spares (used only 749168404Spjd * for basic validation purposes) and one in the active vdev 750168404Spjd * configuration (if it's spared in). During this phase we open and 751168404Spjd * validate each vdev on the spare list. If the vdev also exists in the 752168404Spjd * active configuration, then we also mark this vdev as an active spare. 753168404Spjd */ 754185029Spjd spa->spa_spares.sav_vdevs = kmem_alloc(nspares * sizeof (void *), 755185029Spjd KM_SLEEP); 756185029Spjd for (i = 0; i < spa->spa_spares.sav_count; i++) { 757168404Spjd VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0, 758168404Spjd VDEV_ALLOC_SPARE) == 0); 759168404Spjd ASSERT(vd != NULL); 760168404Spjd 761185029Spjd spa->spa_spares.sav_vdevs[i] = vd; 762168404Spjd 763185029Spjd if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, 764185029Spjd B_FALSE)) != NULL) { 765168404Spjd if (!tvd->vdev_isspare) 766168404Spjd spa_spare_add(tvd); 767168404Spjd 768168404Spjd /* 769168404Spjd * We only mark the spare active if we were successfully 770168404Spjd * able to load the vdev. Otherwise, importing a pool 771168404Spjd * with a bad active spare would result in strange 772168404Spjd * behavior, because multiple pool would think the spare 773168404Spjd * is actively in use. 774168404Spjd * 775168404Spjd * There is a vulnerability here to an equally bizarre 776168404Spjd * circumstance, where a dead active spare is later 777168404Spjd * brought back to life (onlined or otherwise). Given 778168404Spjd * the rarity of this scenario, and the extra complexity 779168404Spjd * it adds, we ignore the possibility. 780168404Spjd */ 781168404Spjd if (!vdev_is_dead(tvd)) 782168404Spjd spa_spare_activate(tvd); 783168404Spjd } 784168404Spjd 785185029Spjd vd->vdev_top = vd; 786185029Spjd 787168404Spjd if (vdev_open(vd) != 0) 788168404Spjd continue; 789168404Spjd 790185029Spjd if (vdev_validate_aux(vd) == 0) 791185029Spjd spa_spare_add(vd); 792168404Spjd } 793168404Spjd 794168404Spjd /* 795168404Spjd * Recompute the stashed list of spares, with status information 796168404Spjd * this time. 797168404Spjd */ 798185029Spjd VERIFY(nvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, 799168404Spjd DATA_TYPE_NVLIST_ARRAY) == 0); 800168404Spjd 801185029Spjd spares = kmem_alloc(spa->spa_spares.sav_count * sizeof (void *), 802185029Spjd KM_SLEEP); 803185029Spjd for (i = 0; i < spa->spa_spares.sav_count; i++) 804185029Spjd spares[i] = vdev_config_generate(spa, 805185029Spjd spa->spa_spares.sav_vdevs[i], B_TRUE, B_TRUE, B_FALSE); 806185029Spjd VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 807185029Spjd ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count) == 0); 808185029Spjd for (i = 0; i < spa->spa_spares.sav_count; i++) 809168404Spjd nvlist_free(spares[i]); 810185029Spjd kmem_free(spares, spa->spa_spares.sav_count * sizeof (void *)); 811168404Spjd} 812168404Spjd 813185029Spjd/* 814185029Spjd * Load (or re-load) the current list of vdevs describing the active l2cache for 815185029Spjd * this pool. When this is called, we have some form of basic information in 816185029Spjd * 'spa_l2cache.sav_config'. We parse this into vdevs, try to open them, and 817185029Spjd * then re-generate a more complete list including status information. 818185029Spjd * Devices which are already active have their details maintained, and are 819185029Spjd * not re-opened. 820185029Spjd */ 821185029Spjdstatic void 822185029Spjdspa_load_l2cache(spa_t *spa) 823185029Spjd{ 824185029Spjd nvlist_t **l2cache; 825185029Spjd uint_t nl2cache; 826185029Spjd int i, j, oldnvdevs; 827185029Spjd uint64_t guid, size; 828185029Spjd vdev_t *vd, **oldvdevs, **newvdevs; 829185029Spjd spa_aux_vdev_t *sav = &spa->spa_l2cache; 830185029Spjd 831185029Spjd ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 832185029Spjd 833185029Spjd if (sav->sav_config != NULL) { 834185029Spjd VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, 835185029Spjd ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 836185029Spjd newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP); 837185029Spjd } else { 838185029Spjd nl2cache = 0; 839185029Spjd } 840185029Spjd 841185029Spjd oldvdevs = sav->sav_vdevs; 842185029Spjd oldnvdevs = sav->sav_count; 843185029Spjd sav->sav_vdevs = NULL; 844185029Spjd sav->sav_count = 0; 845185029Spjd 846185029Spjd /* 847185029Spjd * Process new nvlist of vdevs. 848185029Spjd */ 849185029Spjd for (i = 0; i < nl2cache; i++) { 850185029Spjd VERIFY(nvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID, 851185029Spjd &guid) == 0); 852185029Spjd 853185029Spjd newvdevs[i] = NULL; 854185029Spjd for (j = 0; j < oldnvdevs; j++) { 855185029Spjd vd = oldvdevs[j]; 856185029Spjd if (vd != NULL && guid == vd->vdev_guid) { 857185029Spjd /* 858185029Spjd * Retain previous vdev for add/remove ops. 859185029Spjd */ 860185029Spjd newvdevs[i] = vd; 861185029Spjd oldvdevs[j] = NULL; 862185029Spjd break; 863185029Spjd } 864185029Spjd } 865185029Spjd 866185029Spjd if (newvdevs[i] == NULL) { 867185029Spjd /* 868185029Spjd * Create new vdev 869185029Spjd */ 870185029Spjd VERIFY(spa_config_parse(spa, &vd, l2cache[i], NULL, 0, 871185029Spjd VDEV_ALLOC_L2CACHE) == 0); 872185029Spjd ASSERT(vd != NULL); 873185029Spjd newvdevs[i] = vd; 874185029Spjd 875185029Spjd /* 876185029Spjd * Commit this vdev as an l2cache device, 877185029Spjd * even if it fails to open. 878185029Spjd */ 879185029Spjd spa_l2cache_add(vd); 880185029Spjd 881185029Spjd vd->vdev_top = vd; 882185029Spjd vd->vdev_aux = sav; 883185029Spjd 884185029Spjd spa_l2cache_activate(vd); 885185029Spjd 886185029Spjd if (vdev_open(vd) != 0) 887185029Spjd continue; 888185029Spjd 889185029Spjd (void) vdev_validate_aux(vd); 890185029Spjd 891185029Spjd if (!vdev_is_dead(vd)) { 892185029Spjd size = vdev_get_rsize(vd); 893185029Spjd l2arc_add_vdev(spa, vd, 894185029Spjd VDEV_LABEL_START_SIZE, 895185029Spjd size - VDEV_LABEL_START_SIZE); 896185029Spjd } 897185029Spjd } 898185029Spjd } 899185029Spjd 900185029Spjd /* 901185029Spjd * Purge vdevs that were dropped 902185029Spjd */ 903185029Spjd for (i = 0; i < oldnvdevs; i++) { 904185029Spjd uint64_t pool; 905185029Spjd 906185029Spjd vd = oldvdevs[i]; 907185029Spjd if (vd != NULL) { 908185029Spjd if ((spa_mode & FWRITE) && 909185029Spjd spa_l2cache_exists(vd->vdev_guid, &pool) && 910185029Spjd pool != 0ULL && 911185029Spjd l2arc_vdev_present(vd)) { 912185029Spjd l2arc_remove_vdev(vd); 913185029Spjd } 914185029Spjd (void) vdev_close(vd); 915185029Spjd spa_l2cache_remove(vd); 916185029Spjd } 917185029Spjd } 918185029Spjd 919185029Spjd if (oldvdevs) 920185029Spjd kmem_free(oldvdevs, oldnvdevs * sizeof (void *)); 921185029Spjd 922185029Spjd if (sav->sav_config == NULL) 923185029Spjd goto out; 924185029Spjd 925185029Spjd sav->sav_vdevs = newvdevs; 926185029Spjd sav->sav_count = (int)nl2cache; 927185029Spjd 928185029Spjd /* 929185029Spjd * Recompute the stashed list of l2cache devices, with status 930185029Spjd * information this time. 931185029Spjd */ 932185029Spjd VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE, 933185029Spjd DATA_TYPE_NVLIST_ARRAY) == 0); 934185029Spjd 935185029Spjd l2cache = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); 936185029Spjd for (i = 0; i < sav->sav_count; i++) 937185029Spjd l2cache[i] = vdev_config_generate(spa, 938185029Spjd sav->sav_vdevs[i], B_TRUE, B_FALSE, B_TRUE); 939185029Spjd VERIFY(nvlist_add_nvlist_array(sav->sav_config, 940185029Spjd ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0); 941185029Spjdout: 942185029Spjd for (i = 0; i < sav->sav_count; i++) 943185029Spjd nvlist_free(l2cache[i]); 944185029Spjd if (sav->sav_count) 945185029Spjd kmem_free(l2cache, sav->sav_count * sizeof (void *)); 946185029Spjd} 947185029Spjd 948168404Spjdstatic int 949168404Spjdload_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value) 950168404Spjd{ 951168404Spjd dmu_buf_t *db; 952168404Spjd char *packed = NULL; 953168404Spjd size_t nvsize = 0; 954168404Spjd int error; 955168404Spjd *value = NULL; 956168404Spjd 957168404Spjd VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); 958168404Spjd nvsize = *(uint64_t *)db->db_data; 959168404Spjd dmu_buf_rele(db, FTAG); 960168404Spjd 961168404Spjd packed = kmem_alloc(nvsize, KM_SLEEP); 962168404Spjd error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed); 963168404Spjd if (error == 0) 964168404Spjd error = nvlist_unpack(packed, nvsize, value, 0); 965168404Spjd kmem_free(packed, nvsize); 966168404Spjd 967168404Spjd return (error); 968168404Spjd} 969168404Spjd 970168404Spjd/* 971185029Spjd * Checks to see if the given vdev could not be opened, in which case we post a 972185029Spjd * sysevent to notify the autoreplace code that the device has been removed. 973185029Spjd */ 974185029Spjdstatic void 975185029Spjdspa_check_removed(vdev_t *vd) 976185029Spjd{ 977185029Spjd int c; 978185029Spjd 979185029Spjd for (c = 0; c < vd->vdev_children; c++) 980185029Spjd spa_check_removed(vd->vdev_child[c]); 981185029Spjd 982185029Spjd if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd)) { 983185029Spjd zfs_post_autoreplace(vd->vdev_spa, vd); 984185029Spjd spa_event_notify(vd->vdev_spa, vd, ESC_ZFS_VDEV_CHECK); 985185029Spjd } 986185029Spjd} 987185029Spjd 988185029Spjd/* 989185029Spjd * Check for missing log devices 990185029Spjd */ 991185029Spjdint 992185029Spjdspa_check_logs(spa_t *spa) 993185029Spjd{ 994185029Spjd switch (spa->spa_log_state) { 995185029Spjd case SPA_LOG_MISSING: 996185029Spjd /* need to recheck in case slog has been restored */ 997185029Spjd case SPA_LOG_UNKNOWN: 998185029Spjd if (dmu_objset_find(spa->spa_name, zil_check_log_chain, NULL, 999185029Spjd DS_FIND_CHILDREN)) { 1000185029Spjd spa->spa_log_state = SPA_LOG_MISSING; 1001185029Spjd return (1); 1002185029Spjd } 1003185029Spjd break; 1004185029Spjd 1005185029Spjd case SPA_LOG_CLEAR: 1006185029Spjd (void) dmu_objset_find(spa->spa_name, zil_clear_log_chain, NULL, 1007185029Spjd DS_FIND_CHILDREN); 1008185029Spjd break; 1009185029Spjd } 1010185029Spjd spa->spa_log_state = SPA_LOG_GOOD; 1011185029Spjd return (0); 1012185029Spjd} 1013185029Spjd 1014185029Spjd/* 1015168404Spjd * Load an existing storage pool, using the pool's builtin spa_config as a 1016168404Spjd * source of configuration information. 1017168404Spjd */ 1018168404Spjdstatic int 1019168404Spjdspa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig) 1020168404Spjd{ 1021168404Spjd int error = 0; 1022168404Spjd nvlist_t *nvroot = NULL; 1023168404Spjd vdev_t *rvd; 1024168404Spjd uberblock_t *ub = &spa->spa_uberblock; 1025168404Spjd uint64_t config_cache_txg = spa->spa_config_txg; 1026168404Spjd uint64_t pool_guid; 1027168404Spjd uint64_t version; 1028185029Spjd uint64_t autoreplace = 0; 1029185029Spjd char *ereport = FM_EREPORT_ZFS_POOL; 1030168404Spjd 1031185029Spjd ASSERT(MUTEX_HELD(&spa_namespace_lock)); 1032185029Spjd 1033168404Spjd spa->spa_load_state = state; 1034168404Spjd 1035168404Spjd if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) || 1036168404Spjd nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) { 1037168404Spjd error = EINVAL; 1038168404Spjd goto out; 1039168404Spjd } 1040168404Spjd 1041168404Spjd /* 1042168404Spjd * Versioning wasn't explicitly added to the label until later, so if 1043168404Spjd * it's not present treat it as the initial version. 1044168404Spjd */ 1045168404Spjd if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &version) != 0) 1046185029Spjd version = SPA_VERSION_INITIAL; 1047168404Spjd 1048168404Spjd (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, 1049168404Spjd &spa->spa_config_txg); 1050168404Spjd 1051168404Spjd if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) && 1052168404Spjd spa_guid_exists(pool_guid, 0)) { 1053168404Spjd error = EEXIST; 1054168404Spjd goto out; 1055168404Spjd } 1056168404Spjd 1057168404Spjd spa->spa_load_guid = pool_guid; 1058168404Spjd 1059168404Spjd /* 1060168404Spjd * Parse the configuration into a vdev tree. We explicitly set the 1061168404Spjd * value that will be returned by spa_version() since parsing the 1062168404Spjd * configuration requires knowing the version number. 1063168404Spjd */ 1064185029Spjd spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 1065168404Spjd spa->spa_ubsync.ub_version = version; 1066168404Spjd error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_LOAD); 1067185029Spjd spa_config_exit(spa, SCL_ALL, FTAG); 1068168404Spjd 1069168404Spjd if (error != 0) 1070168404Spjd goto out; 1071168404Spjd 1072168404Spjd ASSERT(spa->spa_root_vdev == rvd); 1073168404Spjd ASSERT(spa_guid(spa) == pool_guid); 1074168404Spjd 1075168404Spjd /* 1076168404Spjd * Try to open all vdevs, loading each label in the process. 1077168404Spjd */ 1078185029Spjd spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 1079168926Spjd error = vdev_open(rvd); 1080185029Spjd spa_config_exit(spa, SCL_ALL, FTAG); 1081168926Spjd if (error != 0) 1082168404Spjd goto out; 1083168404Spjd 1084168404Spjd /* 1085168404Spjd * Validate the labels for all leaf vdevs. We need to grab the config 1086185029Spjd * lock because all label I/O is done with ZIO_FLAG_CONFIG_WRITER. 1087168404Spjd */ 1088185029Spjd spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 1089168404Spjd error = vdev_validate(rvd); 1090185029Spjd spa_config_exit(spa, SCL_ALL, FTAG); 1091168404Spjd 1092168926Spjd if (error != 0) 1093168404Spjd goto out; 1094168404Spjd 1095168404Spjd if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { 1096168404Spjd error = ENXIO; 1097168404Spjd goto out; 1098168404Spjd } 1099168404Spjd 1100168404Spjd /* 1101168404Spjd * Find the best uberblock. 1102168404Spjd */ 1103185029Spjd vdev_uberblock_load(NULL, rvd, ub); 1104168404Spjd 1105168404Spjd /* 1106168404Spjd * If we weren't able to find a single valid uberblock, return failure. 1107168404Spjd */ 1108168404Spjd if (ub->ub_txg == 0) { 1109168404Spjd vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1110168404Spjd VDEV_AUX_CORRUPT_DATA); 1111168404Spjd error = ENXIO; 1112168404Spjd goto out; 1113168404Spjd } 1114168404Spjd 1115168404Spjd /* 1116168404Spjd * If the pool is newer than the code, we can't open it. 1117168404Spjd */ 1118185029Spjd if (ub->ub_version > SPA_VERSION) { 1119168404Spjd vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1120168404Spjd VDEV_AUX_VERSION_NEWER); 1121168404Spjd error = ENOTSUP; 1122168404Spjd goto out; 1123168404Spjd } 1124168404Spjd 1125168404Spjd /* 1126168404Spjd * If the vdev guid sum doesn't match the uberblock, we have an 1127168404Spjd * incomplete configuration. 1128168404Spjd */ 1129168404Spjd if (rvd->vdev_guid_sum != ub->ub_guid_sum && mosconfig) { 1130168404Spjd vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1131168404Spjd VDEV_AUX_BAD_GUID_SUM); 1132168404Spjd error = ENXIO; 1133168404Spjd goto out; 1134168404Spjd } 1135168404Spjd 1136168404Spjd /* 1137168404Spjd * Initialize internal SPA structures. 1138168404Spjd */ 1139168404Spjd spa->spa_state = POOL_STATE_ACTIVE; 1140168404Spjd spa->spa_ubsync = spa->spa_uberblock; 1141168404Spjd spa->spa_first_txg = spa_last_synced_txg(spa) + 1; 1142168404Spjd error = dsl_pool_open(spa, spa->spa_first_txg, &spa->spa_dsl_pool); 1143168404Spjd if (error) { 1144168404Spjd vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1145168404Spjd VDEV_AUX_CORRUPT_DATA); 1146168404Spjd goto out; 1147168404Spjd } 1148168404Spjd spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset; 1149168404Spjd 1150168404Spjd if (zap_lookup(spa->spa_meta_objset, 1151168404Spjd DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, 1152168404Spjd sizeof (uint64_t), 1, &spa->spa_config_object) != 0) { 1153168404Spjd vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1154168404Spjd VDEV_AUX_CORRUPT_DATA); 1155168404Spjd error = EIO; 1156168404Spjd goto out; 1157168404Spjd } 1158168404Spjd 1159168404Spjd if (!mosconfig) { 1160168404Spjd nvlist_t *newconfig; 1161168498Spjd uint64_t hostid; 1162168404Spjd 1163168404Spjd if (load_nvlist(spa, spa->spa_config_object, &newconfig) != 0) { 1164168404Spjd vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1165168404Spjd VDEV_AUX_CORRUPT_DATA); 1166168404Spjd error = EIO; 1167168404Spjd goto out; 1168168404Spjd } 1169168404Spjd 1170185029Spjd if (!spa_is_root(spa) && nvlist_lookup_uint64(newconfig, 1171185029Spjd ZPOOL_CONFIG_HOSTID, &hostid) == 0) { 1172168498Spjd char *hostname; 1173168498Spjd unsigned long myhostid = 0; 1174168498Spjd 1175168498Spjd VERIFY(nvlist_lookup_string(newconfig, 1176168498Spjd ZPOOL_CONFIG_HOSTNAME, &hostname) == 0); 1177168498Spjd 1178168498Spjd (void) ddi_strtoul(hw_serial, NULL, 10, &myhostid); 1179204073Spjd if (check_hostid && hostid != 0 && myhostid != 0 && 1180185029Spjd (unsigned long)hostid != myhostid) { 1181168498Spjd cmn_err(CE_WARN, "pool '%s' could not be " 1182168498Spjd "loaded as it was last accessed by " 1183185029Spjd "another system (host: %s hostid: 0x%lx). " 1184168498Spjd "See: http://www.sun.com/msg/ZFS-8000-EY", 1185185029Spjd spa_name(spa), hostname, 1186168498Spjd (unsigned long)hostid); 1187168498Spjd error = EBADF; 1188168498Spjd goto out; 1189168498Spjd } 1190168498Spjd } 1191168498Spjd 1192168404Spjd spa_config_set(spa, newconfig); 1193168404Spjd spa_unload(spa); 1194168404Spjd spa_deactivate(spa); 1195168404Spjd spa_activate(spa); 1196168404Spjd 1197168404Spjd return (spa_load(spa, newconfig, state, B_TRUE)); 1198168404Spjd } 1199168404Spjd 1200168404Spjd if (zap_lookup(spa->spa_meta_objset, 1201168404Spjd DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST, 1202168404Spjd sizeof (uint64_t), 1, &spa->spa_sync_bplist_obj) != 0) { 1203168404Spjd vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1204168404Spjd VDEV_AUX_CORRUPT_DATA); 1205168404Spjd error = EIO; 1206168404Spjd goto out; 1207168404Spjd } 1208168404Spjd 1209168404Spjd /* 1210168404Spjd * Load the bit that tells us to use the new accounting function 1211168404Spjd * (raid-z deflation). If we have an older pool, this will not 1212168404Spjd * be present. 1213168404Spjd */ 1214168404Spjd error = zap_lookup(spa->spa_meta_objset, 1215168404Spjd DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 1216168404Spjd sizeof (uint64_t), 1, &spa->spa_deflate); 1217168404Spjd if (error != 0 && error != ENOENT) { 1218168404Spjd vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1219168404Spjd VDEV_AUX_CORRUPT_DATA); 1220168404Spjd error = EIO; 1221168404Spjd goto out; 1222168404Spjd } 1223168404Spjd 1224168404Spjd /* 1225168404Spjd * Load the persistent error log. If we have an older pool, this will 1226168404Spjd * not be present. 1227168404Spjd */ 1228168404Spjd error = zap_lookup(spa->spa_meta_objset, 1229168404Spjd DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_LAST, 1230168404Spjd sizeof (uint64_t), 1, &spa->spa_errlog_last); 1231168404Spjd if (error != 0 && error != ENOENT) { 1232168404Spjd vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1233168404Spjd VDEV_AUX_CORRUPT_DATA); 1234168404Spjd error = EIO; 1235168404Spjd goto out; 1236168404Spjd } 1237168404Spjd 1238168404Spjd error = zap_lookup(spa->spa_meta_objset, 1239168404Spjd DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_SCRUB, 1240168404Spjd sizeof (uint64_t), 1, &spa->spa_errlog_scrub); 1241168404Spjd if (error != 0 && error != ENOENT) { 1242168404Spjd vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1243168404Spjd VDEV_AUX_CORRUPT_DATA); 1244168404Spjd error = EIO; 1245168404Spjd goto out; 1246168404Spjd } 1247168404Spjd 1248168404Spjd /* 1249168404Spjd * Load the history object. If we have an older pool, this 1250168404Spjd * will not be present. 1251168404Spjd */ 1252168404Spjd error = zap_lookup(spa->spa_meta_objset, 1253168404Spjd DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_HISTORY, 1254168404Spjd sizeof (uint64_t), 1, &spa->spa_history); 1255168404Spjd if (error != 0 && error != ENOENT) { 1256168404Spjd vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1257168404Spjd VDEV_AUX_CORRUPT_DATA); 1258168404Spjd error = EIO; 1259168404Spjd goto out; 1260168404Spjd } 1261168404Spjd 1262168404Spjd /* 1263168404Spjd * Load any hot spares for this pool. 1264168404Spjd */ 1265168404Spjd error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 1266185029Spjd DMU_POOL_SPARES, sizeof (uint64_t), 1, &spa->spa_spares.sav_object); 1267168404Spjd if (error != 0 && error != ENOENT) { 1268168404Spjd vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1269168404Spjd VDEV_AUX_CORRUPT_DATA); 1270168404Spjd error = EIO; 1271168404Spjd goto out; 1272168404Spjd } 1273168404Spjd if (error == 0) { 1274185029Spjd ASSERT(spa_version(spa) >= SPA_VERSION_SPARES); 1275185029Spjd if (load_nvlist(spa, spa->spa_spares.sav_object, 1276185029Spjd &spa->spa_spares.sav_config) != 0) { 1277168404Spjd vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1278168404Spjd VDEV_AUX_CORRUPT_DATA); 1279168404Spjd error = EIO; 1280168404Spjd goto out; 1281168404Spjd } 1282168404Spjd 1283185029Spjd spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 1284168404Spjd spa_load_spares(spa); 1285185029Spjd spa_config_exit(spa, SCL_ALL, FTAG); 1286168404Spjd } 1287168404Spjd 1288185029Spjd /* 1289185029Spjd * Load any level 2 ARC devices for this pool. 1290185029Spjd */ 1291168404Spjd error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 1292185029Spjd DMU_POOL_L2CACHE, sizeof (uint64_t), 1, 1293185029Spjd &spa->spa_l2cache.sav_object); 1294185029Spjd if (error != 0 && error != ENOENT) { 1295185029Spjd vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1296185029Spjd VDEV_AUX_CORRUPT_DATA); 1297185029Spjd error = EIO; 1298185029Spjd goto out; 1299185029Spjd } 1300185029Spjd if (error == 0) { 1301185029Spjd ASSERT(spa_version(spa) >= SPA_VERSION_L2CACHE); 1302185029Spjd if (load_nvlist(spa, spa->spa_l2cache.sav_object, 1303185029Spjd &spa->spa_l2cache.sav_config) != 0) { 1304185029Spjd vdev_set_state(rvd, B_TRUE, 1305185029Spjd VDEV_STATE_CANT_OPEN, 1306185029Spjd VDEV_AUX_CORRUPT_DATA); 1307185029Spjd error = EIO; 1308185029Spjd goto out; 1309185029Spjd } 1310185029Spjd 1311185029Spjd spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 1312185029Spjd spa_load_l2cache(spa); 1313185029Spjd spa_config_exit(spa, SCL_ALL, FTAG); 1314185029Spjd } 1315185029Spjd 1316185029Spjd if (spa_check_logs(spa)) { 1317185029Spjd vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1318185029Spjd VDEV_AUX_BAD_LOG); 1319185029Spjd error = ENXIO; 1320185029Spjd ereport = FM_EREPORT_ZFS_LOG_REPLAY; 1321185029Spjd goto out; 1322185029Spjd } 1323185029Spjd 1324185029Spjd 1325185029Spjd spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); 1326185029Spjd 1327185029Spjd error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 1328168404Spjd DMU_POOL_PROPS, sizeof (uint64_t), 1, &spa->spa_pool_props_object); 1329168404Spjd 1330168404Spjd if (error && error != ENOENT) { 1331168404Spjd vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, 1332168404Spjd VDEV_AUX_CORRUPT_DATA); 1333168404Spjd error = EIO; 1334168404Spjd goto out; 1335168404Spjd } 1336168404Spjd 1337168404Spjd if (error == 0) { 1338168404Spjd (void) zap_lookup(spa->spa_meta_objset, 1339168404Spjd spa->spa_pool_props_object, 1340185029Spjd zpool_prop_to_name(ZPOOL_PROP_BOOTFS), 1341168404Spjd sizeof (uint64_t), 1, &spa->spa_bootfs); 1342185029Spjd (void) zap_lookup(spa->spa_meta_objset, 1343185029Spjd spa->spa_pool_props_object, 1344185029Spjd zpool_prop_to_name(ZPOOL_PROP_AUTOREPLACE), 1345185029Spjd sizeof (uint64_t), 1, &autoreplace); 1346185029Spjd (void) zap_lookup(spa->spa_meta_objset, 1347185029Spjd spa->spa_pool_props_object, 1348185029Spjd zpool_prop_to_name(ZPOOL_PROP_DELEGATION), 1349185029Spjd sizeof (uint64_t), 1, &spa->spa_delegation); 1350185029Spjd (void) zap_lookup(spa->spa_meta_objset, 1351185029Spjd spa->spa_pool_props_object, 1352185029Spjd zpool_prop_to_name(ZPOOL_PROP_FAILUREMODE), 1353185029Spjd sizeof (uint64_t), 1, &spa->spa_failmode); 1354168404Spjd } 1355168404Spjd 1356168404Spjd /* 1357185029Spjd * If the 'autoreplace' property is set, then post a resource notifying 1358185029Spjd * the ZFS DE that it should not issue any faults for unopenable 1359185029Spjd * devices. We also iterate over the vdevs, and post a sysevent for any 1360185029Spjd * unopenable vdevs so that the normal autoreplace handler can take 1361185029Spjd * over. 1362185029Spjd */ 1363185029Spjd if (autoreplace && state != SPA_LOAD_TRYIMPORT) 1364185029Spjd spa_check_removed(spa->spa_root_vdev); 1365185029Spjd 1366185029Spjd /* 1367168404Spjd * Load the vdev state for all toplevel vdevs. 1368168404Spjd */ 1369168404Spjd vdev_load(rvd); 1370168404Spjd 1371168404Spjd /* 1372168404Spjd * Propagate the leaf DTLs we just loaded all the way up the tree. 1373168404Spjd */ 1374185029Spjd spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 1375168404Spjd vdev_dtl_reassess(rvd, 0, 0, B_FALSE); 1376185029Spjd spa_config_exit(spa, SCL_ALL, FTAG); 1377168404Spjd 1378168404Spjd /* 1379168404Spjd * Check the state of the root vdev. If it can't be opened, it 1380168404Spjd * indicates one or more toplevel vdevs are faulted. 1381168404Spjd */ 1382168404Spjd if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { 1383168404Spjd error = ENXIO; 1384168404Spjd goto out; 1385168404Spjd } 1386168404Spjd 1387168404Spjd if ((spa_mode & FWRITE) && state != SPA_LOAD_TRYIMPORT) { 1388168404Spjd dmu_tx_t *tx; 1389168404Spjd int need_update = B_FALSE; 1390168404Spjd int c; 1391168404Spjd 1392168404Spjd /* 1393168404Spjd * Claim log blocks that haven't been committed yet. 1394168404Spjd * This must all happen in a single txg. 1395168404Spjd */ 1396168404Spjd tx = dmu_tx_create_assigned(spa_get_dsl(spa), 1397168404Spjd spa_first_txg(spa)); 1398185029Spjd (void) dmu_objset_find(spa_name(spa), 1399168404Spjd zil_claim, tx, DS_FIND_CHILDREN); 1400168404Spjd dmu_tx_commit(tx); 1401168404Spjd 1402168404Spjd spa->spa_sync_on = B_TRUE; 1403168404Spjd txg_sync_start(spa->spa_dsl_pool); 1404168404Spjd 1405168404Spjd /* 1406168404Spjd * Wait for all claims to sync. 1407168404Spjd */ 1408168404Spjd txg_wait_synced(spa->spa_dsl_pool, 0); 1409168404Spjd 1410168404Spjd /* 1411168404Spjd * If the config cache is stale, or we have uninitialized 1412168404Spjd * metaslabs (see spa_vdev_add()), then update the config. 1413168404Spjd */ 1414168404Spjd if (config_cache_txg != spa->spa_config_txg || 1415168404Spjd state == SPA_LOAD_IMPORT) 1416168404Spjd need_update = B_TRUE; 1417168404Spjd 1418168404Spjd for (c = 0; c < rvd->vdev_children; c++) 1419168404Spjd if (rvd->vdev_child[c]->vdev_ms_array == 0) 1420168404Spjd need_update = B_TRUE; 1421168404Spjd 1422168404Spjd /* 1423168404Spjd * Update the config cache asychronously in case we're the 1424168404Spjd * root pool, in which case the config cache isn't writable yet. 1425168404Spjd */ 1426168404Spjd if (need_update) 1427168404Spjd spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 1428168404Spjd } 1429168404Spjd 1430168404Spjd error = 0; 1431168404Spjdout: 1432185029Spjd spa->spa_minref = refcount_count(&spa->spa_refcount); 1433168404Spjd if (error && error != EBADF) 1434185029Spjd zfs_ereport_post(ereport, spa, NULL, NULL, 0, 0); 1435168404Spjd spa->spa_load_state = SPA_LOAD_NONE; 1436168404Spjd spa->spa_ena = 0; 1437168404Spjd 1438168404Spjd return (error); 1439168404Spjd} 1440168404Spjd 1441168404Spjd/* 1442168404Spjd * Pool Open/Import 1443168404Spjd * 1444168404Spjd * The import case is identical to an open except that the configuration is sent 1445168404Spjd * down from userland, instead of grabbed from the configuration cache. For the 1446168404Spjd * case of an open, the pool configuration will exist in the 1447185029Spjd * POOL_STATE_UNINITIALIZED state. 1448168404Spjd * 1449168404Spjd * The stats information (gen/count/ustats) is used to gather vdev statistics at 1450168404Spjd * the same time open the pool, without having to keep around the spa_t in some 1451168404Spjd * ambiguous state. 1452168404Spjd */ 1453168404Spjdstatic int 1454168404Spjdspa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t **config) 1455168404Spjd{ 1456168404Spjd spa_t *spa; 1457168404Spjd int error; 1458168404Spjd int locked = B_FALSE; 1459168404Spjd 1460168404Spjd *spapp = NULL; 1461168404Spjd 1462168404Spjd /* 1463168404Spjd * As disgusting as this is, we need to support recursive calls to this 1464168404Spjd * function because dsl_dir_open() is called during spa_load(), and ends 1465168404Spjd * up calling spa_open() again. The real fix is to figure out how to 1466168404Spjd * avoid dsl_dir_open() calling this in the first place. 1467168404Spjd */ 1468168404Spjd if (mutex_owner(&spa_namespace_lock) != curthread) { 1469168404Spjd mutex_enter(&spa_namespace_lock); 1470168404Spjd locked = B_TRUE; 1471168404Spjd } 1472168404Spjd 1473168404Spjd if ((spa = spa_lookup(pool)) == NULL) { 1474168404Spjd if (locked) 1475168404Spjd mutex_exit(&spa_namespace_lock); 1476168404Spjd return (ENOENT); 1477168404Spjd } 1478168404Spjd if (spa->spa_state == POOL_STATE_UNINITIALIZED) { 1479168404Spjd 1480168404Spjd spa_activate(spa); 1481168404Spjd 1482168404Spjd error = spa_load(spa, spa->spa_config, SPA_LOAD_OPEN, B_FALSE); 1483168404Spjd 1484168404Spjd if (error == EBADF) { 1485168404Spjd /* 1486168404Spjd * If vdev_validate() returns failure (indicated by 1487168404Spjd * EBADF), it indicates that one of the vdevs indicates 1488168404Spjd * that the pool has been exported or destroyed. If 1489168404Spjd * this is the case, the config cache is out of sync and 1490168404Spjd * we should remove the pool from the namespace. 1491168404Spjd */ 1492168404Spjd spa_unload(spa); 1493168404Spjd spa_deactivate(spa); 1494185029Spjd spa_config_sync(spa, B_TRUE, B_TRUE); 1495168404Spjd spa_remove(spa); 1496168404Spjd if (locked) 1497168404Spjd mutex_exit(&spa_namespace_lock); 1498168404Spjd return (ENOENT); 1499168404Spjd } 1500168404Spjd 1501168404Spjd if (error) { 1502168404Spjd /* 1503168404Spjd * We can't open the pool, but we still have useful 1504168404Spjd * information: the state of each vdev after the 1505168404Spjd * attempted vdev_open(). Return this to the user. 1506168404Spjd */ 1507185029Spjd if (config != NULL && spa->spa_root_vdev != NULL) 1508168404Spjd *config = spa_config_generate(spa, NULL, -1ULL, 1509168404Spjd B_TRUE); 1510168404Spjd spa_unload(spa); 1511168404Spjd spa_deactivate(spa); 1512168404Spjd spa->spa_last_open_failed = B_TRUE; 1513168404Spjd if (locked) 1514168404Spjd mutex_exit(&spa_namespace_lock); 1515168404Spjd *spapp = NULL; 1516168404Spjd return (error); 1517168404Spjd } else { 1518168404Spjd spa->spa_last_open_failed = B_FALSE; 1519168404Spjd } 1520168404Spjd } 1521168404Spjd 1522168404Spjd spa_open_ref(spa, tag); 1523185029Spjd 1524168404Spjd if (locked) 1525168404Spjd mutex_exit(&spa_namespace_lock); 1526168404Spjd 1527168404Spjd *spapp = spa; 1528168404Spjd 1529185029Spjd if (config != NULL) 1530168404Spjd *config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 1531168404Spjd 1532168404Spjd return (0); 1533168404Spjd} 1534168404Spjd 1535168404Spjdint 1536168404Spjdspa_open(const char *name, spa_t **spapp, void *tag) 1537168404Spjd{ 1538168404Spjd return (spa_open_common(name, spapp, tag, NULL)); 1539168404Spjd} 1540168404Spjd 1541168404Spjd/* 1542168404Spjd * Lookup the given spa_t, incrementing the inject count in the process, 1543168404Spjd * preventing it from being exported or destroyed. 1544168404Spjd */ 1545168404Spjdspa_t * 1546168404Spjdspa_inject_addref(char *name) 1547168404Spjd{ 1548168404Spjd spa_t *spa; 1549168404Spjd 1550168404Spjd mutex_enter(&spa_namespace_lock); 1551168404Spjd if ((spa = spa_lookup(name)) == NULL) { 1552168404Spjd mutex_exit(&spa_namespace_lock); 1553168404Spjd return (NULL); 1554168404Spjd } 1555168404Spjd spa->spa_inject_ref++; 1556168404Spjd mutex_exit(&spa_namespace_lock); 1557168404Spjd 1558168404Spjd return (spa); 1559168404Spjd} 1560168404Spjd 1561168404Spjdvoid 1562168404Spjdspa_inject_delref(spa_t *spa) 1563168404Spjd{ 1564168404Spjd mutex_enter(&spa_namespace_lock); 1565168404Spjd spa->spa_inject_ref--; 1566168404Spjd mutex_exit(&spa_namespace_lock); 1567168404Spjd} 1568168404Spjd 1569185029Spjd/* 1570185029Spjd * Add spares device information to the nvlist. 1571185029Spjd */ 1572168404Spjdstatic void 1573168404Spjdspa_add_spares(spa_t *spa, nvlist_t *config) 1574168404Spjd{ 1575168404Spjd nvlist_t **spares; 1576168404Spjd uint_t i, nspares; 1577168404Spjd nvlist_t *nvroot; 1578168404Spjd uint64_t guid; 1579168404Spjd vdev_stat_t *vs; 1580168404Spjd uint_t vsc; 1581168404Spjd uint64_t pool; 1582168404Spjd 1583185029Spjd if (spa->spa_spares.sav_count == 0) 1584168404Spjd return; 1585168404Spjd 1586168404Spjd VERIFY(nvlist_lookup_nvlist(config, 1587168404Spjd ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 1588185029Spjd VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 1589168404Spjd ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 1590168404Spjd if (nspares != 0) { 1591168404Spjd VERIFY(nvlist_add_nvlist_array(nvroot, 1592168404Spjd ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 1593168404Spjd VERIFY(nvlist_lookup_nvlist_array(nvroot, 1594168404Spjd ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 1595168404Spjd 1596168404Spjd /* 1597168404Spjd * Go through and find any spares which have since been 1598168404Spjd * repurposed as an active spare. If this is the case, update 1599168404Spjd * their status appropriately. 1600168404Spjd */ 1601168404Spjd for (i = 0; i < nspares; i++) { 1602168404Spjd VERIFY(nvlist_lookup_uint64(spares[i], 1603168404Spjd ZPOOL_CONFIG_GUID, &guid) == 0); 1604185029Spjd if (spa_spare_exists(guid, &pool, NULL) && 1605185029Spjd pool != 0ULL) { 1606168404Spjd VERIFY(nvlist_lookup_uint64_array( 1607168404Spjd spares[i], ZPOOL_CONFIG_STATS, 1608168404Spjd (uint64_t **)&vs, &vsc) == 0); 1609168404Spjd vs->vs_state = VDEV_STATE_CANT_OPEN; 1610168404Spjd vs->vs_aux = VDEV_AUX_SPARED; 1611168404Spjd } 1612168404Spjd } 1613168404Spjd } 1614168404Spjd} 1615168404Spjd 1616185029Spjd/* 1617185029Spjd * Add l2cache device information to the nvlist, including vdev stats. 1618185029Spjd */ 1619185029Spjdstatic void 1620185029Spjdspa_add_l2cache(spa_t *spa, nvlist_t *config) 1621185029Spjd{ 1622185029Spjd nvlist_t **l2cache; 1623185029Spjd uint_t i, j, nl2cache; 1624185029Spjd nvlist_t *nvroot; 1625185029Spjd uint64_t guid; 1626185029Spjd vdev_t *vd; 1627185029Spjd vdev_stat_t *vs; 1628185029Spjd uint_t vsc; 1629185029Spjd 1630185029Spjd if (spa->spa_l2cache.sav_count == 0) 1631185029Spjd return; 1632185029Spjd 1633185029Spjd spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 1634185029Spjd 1635185029Spjd VERIFY(nvlist_lookup_nvlist(config, 1636185029Spjd ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 1637185029Spjd VERIFY(nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, 1638185029Spjd ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 1639185029Spjd if (nl2cache != 0) { 1640185029Spjd VERIFY(nvlist_add_nvlist_array(nvroot, 1641185029Spjd ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 1642185029Spjd VERIFY(nvlist_lookup_nvlist_array(nvroot, 1643185029Spjd ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 1644185029Spjd 1645185029Spjd /* 1646185029Spjd * Update level 2 cache device stats. 1647185029Spjd */ 1648185029Spjd 1649185029Spjd for (i = 0; i < nl2cache; i++) { 1650185029Spjd VERIFY(nvlist_lookup_uint64(l2cache[i], 1651185029Spjd ZPOOL_CONFIG_GUID, &guid) == 0); 1652185029Spjd 1653185029Spjd vd = NULL; 1654185029Spjd for (j = 0; j < spa->spa_l2cache.sav_count; j++) { 1655185029Spjd if (guid == 1656185029Spjd spa->spa_l2cache.sav_vdevs[j]->vdev_guid) { 1657185029Spjd vd = spa->spa_l2cache.sav_vdevs[j]; 1658185029Spjd break; 1659185029Spjd } 1660185029Spjd } 1661185029Spjd ASSERT(vd != NULL); 1662185029Spjd 1663185029Spjd VERIFY(nvlist_lookup_uint64_array(l2cache[i], 1664185029Spjd ZPOOL_CONFIG_STATS, (uint64_t **)&vs, &vsc) == 0); 1665185029Spjd vdev_get_stats(vd, vs); 1666185029Spjd } 1667185029Spjd } 1668185029Spjd 1669185029Spjd spa_config_exit(spa, SCL_CONFIG, FTAG); 1670185029Spjd} 1671185029Spjd 1672168404Spjdint 1673168404Spjdspa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen) 1674168404Spjd{ 1675168404Spjd int error; 1676168404Spjd spa_t *spa; 1677168404Spjd 1678168404Spjd *config = NULL; 1679168404Spjd error = spa_open_common(name, &spa, FTAG, config); 1680168404Spjd 1681168404Spjd if (spa && *config != NULL) { 1682168404Spjd VERIFY(nvlist_add_uint64(*config, ZPOOL_CONFIG_ERRCOUNT, 1683168404Spjd spa_get_errlog_size(spa)) == 0); 1684168404Spjd 1685185029Spjd if (spa_suspended(spa)) 1686185029Spjd VERIFY(nvlist_add_uint64(*config, 1687185029Spjd ZPOOL_CONFIG_SUSPENDED, spa->spa_failmode) == 0); 1688185029Spjd 1689168404Spjd spa_add_spares(spa, *config); 1690185029Spjd spa_add_l2cache(spa, *config); 1691168404Spjd } 1692168404Spjd 1693168404Spjd /* 1694168404Spjd * We want to get the alternate root even for faulted pools, so we cheat 1695168404Spjd * and call spa_lookup() directly. 1696168404Spjd */ 1697168404Spjd if (altroot) { 1698168404Spjd if (spa == NULL) { 1699168404Spjd mutex_enter(&spa_namespace_lock); 1700168404Spjd spa = spa_lookup(name); 1701168404Spjd if (spa) 1702168404Spjd spa_altroot(spa, altroot, buflen); 1703168404Spjd else 1704168404Spjd altroot[0] = '\0'; 1705168404Spjd spa = NULL; 1706168404Spjd mutex_exit(&spa_namespace_lock); 1707168404Spjd } else { 1708168404Spjd spa_altroot(spa, altroot, buflen); 1709168404Spjd } 1710168404Spjd } 1711168404Spjd 1712168404Spjd if (spa != NULL) 1713168404Spjd spa_close(spa, FTAG); 1714168404Spjd 1715168404Spjd return (error); 1716168404Spjd} 1717168404Spjd 1718168404Spjd/* 1719185029Spjd * Validate that the auxiliary device array is well formed. We must have an 1720185029Spjd * array of nvlists, each which describes a valid leaf vdev. If this is an 1721185029Spjd * import (mode is VDEV_ALLOC_SPARE), then we allow corrupted spares to be 1722185029Spjd * specified, as long as they are well-formed. 1723168404Spjd */ 1724168404Spjdstatic int 1725185029Spjdspa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode, 1726185029Spjd spa_aux_vdev_t *sav, const char *config, uint64_t version, 1727185029Spjd vdev_labeltype_t label) 1728168404Spjd{ 1729185029Spjd nvlist_t **dev; 1730185029Spjd uint_t i, ndev; 1731168404Spjd vdev_t *vd; 1732168404Spjd int error; 1733168404Spjd 1734185029Spjd ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 1735185029Spjd 1736168404Spjd /* 1737185029Spjd * It's acceptable to have no devs specified. 1738168404Spjd */ 1739185029Spjd if (nvlist_lookup_nvlist_array(nvroot, config, &dev, &ndev) != 0) 1740168404Spjd return (0); 1741168404Spjd 1742185029Spjd if (ndev == 0) 1743168404Spjd return (EINVAL); 1744168404Spjd 1745168404Spjd /* 1746185029Spjd * Make sure the pool is formatted with a version that supports this 1747185029Spjd * device type. 1748168404Spjd */ 1749185029Spjd if (spa_version(spa) < version) 1750168404Spjd return (ENOTSUP); 1751168404Spjd 1752168404Spjd /* 1753185029Spjd * Set the pending device list so we correctly handle device in-use 1754168404Spjd * checking. 1755168404Spjd */ 1756185029Spjd sav->sav_pending = dev; 1757185029Spjd sav->sav_npending = ndev; 1758168404Spjd 1759185029Spjd for (i = 0; i < ndev; i++) { 1760185029Spjd if ((error = spa_config_parse(spa, &vd, dev[i], NULL, 0, 1761168404Spjd mode)) != 0) 1762168404Spjd goto out; 1763168404Spjd 1764168404Spjd if (!vd->vdev_ops->vdev_op_leaf) { 1765168404Spjd vdev_free(vd); 1766168404Spjd error = EINVAL; 1767168404Spjd goto out; 1768168404Spjd } 1769168404Spjd 1770185029Spjd /* 1771185029Spjd * The L2ARC currently only supports disk devices in 1772185029Spjd * kernel context. For user-level testing, we allow it. 1773185029Spjd */ 1774185029Spjd#ifdef _KERNEL 1775185029Spjd if ((strcmp(config, ZPOOL_CONFIG_L2CACHE) == 0) && 1776185029Spjd strcmp(vd->vdev_ops->vdev_op_type, VDEV_TYPE_DISK) != 0) { 1777185029Spjd error = ENOTBLK; 1778185029Spjd goto out; 1779185029Spjd } 1780185029Spjd#endif 1781168404Spjd vd->vdev_top = vd; 1782168404Spjd 1783168404Spjd if ((error = vdev_open(vd)) == 0 && 1784185029Spjd (error = vdev_label_init(vd, crtxg, label)) == 0) { 1785185029Spjd VERIFY(nvlist_add_uint64(dev[i], ZPOOL_CONFIG_GUID, 1786168404Spjd vd->vdev_guid) == 0); 1787168404Spjd } 1788168404Spjd 1789168404Spjd vdev_free(vd); 1790168404Spjd 1791185029Spjd if (error && 1792185029Spjd (mode != VDEV_ALLOC_SPARE && mode != VDEV_ALLOC_L2CACHE)) 1793168404Spjd goto out; 1794168404Spjd else 1795168404Spjd error = 0; 1796168404Spjd } 1797168404Spjd 1798168404Spjdout: 1799185029Spjd sav->sav_pending = NULL; 1800185029Spjd sav->sav_npending = 0; 1801168404Spjd return (error); 1802168404Spjd} 1803168404Spjd 1804185029Spjdstatic int 1805185029Spjdspa_validate_aux(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode) 1806185029Spjd{ 1807185029Spjd int error; 1808185029Spjd 1809185029Spjd ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 1810185029Spjd 1811185029Spjd if ((error = spa_validate_aux_devs(spa, nvroot, crtxg, mode, 1812185029Spjd &spa->spa_spares, ZPOOL_CONFIG_SPARES, SPA_VERSION_SPARES, 1813185029Spjd VDEV_LABEL_SPARE)) != 0) { 1814185029Spjd return (error); 1815185029Spjd } 1816185029Spjd 1817185029Spjd return (spa_validate_aux_devs(spa, nvroot, crtxg, mode, 1818185029Spjd &spa->spa_l2cache, ZPOOL_CONFIG_L2CACHE, SPA_VERSION_L2CACHE, 1819185029Spjd VDEV_LABEL_L2CACHE)); 1820185029Spjd} 1821185029Spjd 1822185029Spjdstatic void 1823185029Spjdspa_set_aux_vdevs(spa_aux_vdev_t *sav, nvlist_t **devs, int ndevs, 1824185029Spjd const char *config) 1825185029Spjd{ 1826185029Spjd int i; 1827185029Spjd 1828185029Spjd if (sav->sav_config != NULL) { 1829185029Spjd nvlist_t **olddevs; 1830185029Spjd uint_t oldndevs; 1831185029Spjd nvlist_t **newdevs; 1832185029Spjd 1833185029Spjd /* 1834185029Spjd * Generate new dev list by concatentating with the 1835185029Spjd * current dev list. 1836185029Spjd */ 1837185029Spjd VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, config, 1838185029Spjd &olddevs, &oldndevs) == 0); 1839185029Spjd 1840185029Spjd newdevs = kmem_alloc(sizeof (void *) * 1841185029Spjd (ndevs + oldndevs), KM_SLEEP); 1842185029Spjd for (i = 0; i < oldndevs; i++) 1843185029Spjd VERIFY(nvlist_dup(olddevs[i], &newdevs[i], 1844185029Spjd KM_SLEEP) == 0); 1845185029Spjd for (i = 0; i < ndevs; i++) 1846185029Spjd VERIFY(nvlist_dup(devs[i], &newdevs[i + oldndevs], 1847185029Spjd KM_SLEEP) == 0); 1848185029Spjd 1849185029Spjd VERIFY(nvlist_remove(sav->sav_config, config, 1850185029Spjd DATA_TYPE_NVLIST_ARRAY) == 0); 1851185029Spjd 1852185029Spjd VERIFY(nvlist_add_nvlist_array(sav->sav_config, 1853185029Spjd config, newdevs, ndevs + oldndevs) == 0); 1854185029Spjd for (i = 0; i < oldndevs + ndevs; i++) 1855185029Spjd nvlist_free(newdevs[i]); 1856185029Spjd kmem_free(newdevs, (oldndevs + ndevs) * sizeof (void *)); 1857185029Spjd } else { 1858185029Spjd /* 1859185029Spjd * Generate a new dev list. 1860185029Spjd */ 1861185029Spjd VERIFY(nvlist_alloc(&sav->sav_config, NV_UNIQUE_NAME, 1862185029Spjd KM_SLEEP) == 0); 1863185029Spjd VERIFY(nvlist_add_nvlist_array(sav->sav_config, config, 1864185029Spjd devs, ndevs) == 0); 1865185029Spjd } 1866185029Spjd} 1867185029Spjd 1868168404Spjd/* 1869185029Spjd * Stop and drop level 2 ARC devices 1870185029Spjd */ 1871185029Spjdvoid 1872185029Spjdspa_l2cache_drop(spa_t *spa) 1873185029Spjd{ 1874185029Spjd vdev_t *vd; 1875185029Spjd int i; 1876185029Spjd spa_aux_vdev_t *sav = &spa->spa_l2cache; 1877185029Spjd 1878185029Spjd for (i = 0; i < sav->sav_count; i++) { 1879185029Spjd uint64_t pool; 1880185029Spjd 1881185029Spjd vd = sav->sav_vdevs[i]; 1882185029Spjd ASSERT(vd != NULL); 1883185029Spjd 1884185029Spjd if ((spa_mode & FWRITE) && 1885185029Spjd spa_l2cache_exists(vd->vdev_guid, &pool) && pool != 0ULL && 1886185029Spjd l2arc_vdev_present(vd)) { 1887185029Spjd l2arc_remove_vdev(vd); 1888185029Spjd } 1889185029Spjd if (vd->vdev_isl2cache) 1890185029Spjd spa_l2cache_remove(vd); 1891185029Spjd vdev_clear_stats(vd); 1892185029Spjd (void) vdev_close(vd); 1893185029Spjd } 1894185029Spjd} 1895185029Spjd 1896185029Spjd/* 1897168404Spjd * Pool Creation 1898168404Spjd */ 1899168404Spjdint 1900185029Spjdspa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, 1901185029Spjd const char *history_str, nvlist_t *zplprops) 1902168404Spjd{ 1903168404Spjd spa_t *spa; 1904185029Spjd char *altroot = NULL; 1905168404Spjd vdev_t *rvd; 1906168404Spjd dsl_pool_t *dp; 1907168404Spjd dmu_tx_t *tx; 1908168404Spjd int c, error = 0; 1909168404Spjd uint64_t txg = TXG_INITIAL; 1910185029Spjd nvlist_t **spares, **l2cache; 1911185029Spjd uint_t nspares, nl2cache; 1912185029Spjd uint64_t version; 1913168404Spjd 1914168404Spjd /* 1915168404Spjd * If this pool already exists, return failure. 1916168404Spjd */ 1917168404Spjd mutex_enter(&spa_namespace_lock); 1918168404Spjd if (spa_lookup(pool) != NULL) { 1919168404Spjd mutex_exit(&spa_namespace_lock); 1920168404Spjd return (EEXIST); 1921168404Spjd } 1922168404Spjd 1923168404Spjd /* 1924168404Spjd * Allocate a new spa_t structure. 1925168404Spjd */ 1926185029Spjd (void) nvlist_lookup_string(props, 1927185029Spjd zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 1928168404Spjd spa = spa_add(pool, altroot); 1929168404Spjd spa_activate(spa); 1930168404Spjd 1931168404Spjd spa->spa_uberblock.ub_txg = txg - 1; 1932185029Spjd 1933185029Spjd if (props && (error = spa_prop_validate(spa, props))) { 1934185029Spjd spa_unload(spa); 1935185029Spjd spa_deactivate(spa); 1936185029Spjd spa_remove(spa); 1937185029Spjd mutex_exit(&spa_namespace_lock); 1938185029Spjd return (error); 1939185029Spjd } 1940185029Spjd 1941185029Spjd if (nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION), 1942185029Spjd &version) != 0) 1943185029Spjd version = SPA_VERSION; 1944185029Spjd ASSERT(version <= SPA_VERSION); 1945185029Spjd spa->spa_uberblock.ub_version = version; 1946168404Spjd spa->spa_ubsync = spa->spa_uberblock; 1947168404Spjd 1948168404Spjd /* 1949168404Spjd * Create the root vdev. 1950168404Spjd */ 1951185029Spjd spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 1952168404Spjd 1953168404Spjd error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD); 1954168404Spjd 1955168404Spjd ASSERT(error != 0 || rvd != NULL); 1956168404Spjd ASSERT(error != 0 || spa->spa_root_vdev == rvd); 1957168404Spjd 1958185029Spjd if (error == 0 && !zfs_allocatable_devs(nvroot)) 1959168404Spjd error = EINVAL; 1960168404Spjd 1961168404Spjd if (error == 0 && 1962168404Spjd (error = vdev_create(rvd, txg, B_FALSE)) == 0 && 1963185029Spjd (error = spa_validate_aux(spa, nvroot, txg, 1964168404Spjd VDEV_ALLOC_ADD)) == 0) { 1965168404Spjd for (c = 0; c < rvd->vdev_children; c++) 1966168404Spjd vdev_init(rvd->vdev_child[c], txg); 1967168404Spjd vdev_config_dirty(rvd); 1968168404Spjd } 1969168404Spjd 1970185029Spjd spa_config_exit(spa, SCL_ALL, FTAG); 1971168404Spjd 1972168404Spjd if (error != 0) { 1973168404Spjd spa_unload(spa); 1974168404Spjd spa_deactivate(spa); 1975168404Spjd spa_remove(spa); 1976168404Spjd mutex_exit(&spa_namespace_lock); 1977168404Spjd return (error); 1978168404Spjd } 1979168404Spjd 1980168404Spjd /* 1981168404Spjd * Get the list of spares, if specified. 1982168404Spjd */ 1983168404Spjd if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 1984168404Spjd &spares, &nspares) == 0) { 1985185029Spjd VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, NV_UNIQUE_NAME, 1986168404Spjd KM_SLEEP) == 0); 1987185029Spjd VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 1988168404Spjd ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 1989185029Spjd spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 1990168404Spjd spa_load_spares(spa); 1991185029Spjd spa_config_exit(spa, SCL_ALL, FTAG); 1992185029Spjd spa->spa_spares.sav_sync = B_TRUE; 1993168404Spjd } 1994168404Spjd 1995185029Spjd /* 1996185029Spjd * Get the list of level 2 cache devices, if specified. 1997185029Spjd */ 1998185029Spjd if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, 1999185029Spjd &l2cache, &nl2cache) == 0) { 2000185029Spjd VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, 2001185029Spjd NV_UNIQUE_NAME, KM_SLEEP) == 0); 2002185029Spjd VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, 2003185029Spjd ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 2004185029Spjd spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 2005185029Spjd spa_load_l2cache(spa); 2006185029Spjd spa_config_exit(spa, SCL_ALL, FTAG); 2007185029Spjd spa->spa_l2cache.sav_sync = B_TRUE; 2008185029Spjd } 2009185029Spjd 2010185029Spjd spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, txg); 2011168404Spjd spa->spa_meta_objset = dp->dp_meta_objset; 2012168404Spjd 2013168404Spjd tx = dmu_tx_create_assigned(dp, txg); 2014168404Spjd 2015168404Spjd /* 2016168404Spjd * Create the pool config object. 2017168404Spjd */ 2018168404Spjd spa->spa_config_object = dmu_object_alloc(spa->spa_meta_objset, 2019185029Spjd DMU_OT_PACKED_NVLIST, SPA_CONFIG_BLOCKSIZE, 2020168404Spjd DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx); 2021168404Spjd 2022168404Spjd if (zap_add(spa->spa_meta_objset, 2023168404Spjd DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, 2024168404Spjd sizeof (uint64_t), 1, &spa->spa_config_object, tx) != 0) { 2025168404Spjd cmn_err(CE_PANIC, "failed to add pool config"); 2026168404Spjd } 2027168404Spjd 2028185029Spjd /* Newly created pools with the right version are always deflated. */ 2029185029Spjd if (version >= SPA_VERSION_RAIDZ_DEFLATE) { 2030185029Spjd spa->spa_deflate = TRUE; 2031185029Spjd if (zap_add(spa->spa_meta_objset, 2032185029Spjd DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 2033185029Spjd sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) { 2034185029Spjd cmn_err(CE_PANIC, "failed to add deflate"); 2035185029Spjd } 2036168404Spjd } 2037168404Spjd 2038168404Spjd /* 2039168404Spjd * Create the deferred-free bplist object. Turn off compression 2040168404Spjd * because sync-to-convergence takes longer if the blocksize 2041168404Spjd * keeps changing. 2042168404Spjd */ 2043168404Spjd spa->spa_sync_bplist_obj = bplist_create(spa->spa_meta_objset, 2044168404Spjd 1 << 14, tx); 2045168404Spjd dmu_object_set_compress(spa->spa_meta_objset, spa->spa_sync_bplist_obj, 2046168404Spjd ZIO_COMPRESS_OFF, tx); 2047168404Spjd 2048168404Spjd if (zap_add(spa->spa_meta_objset, 2049168404Spjd DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST, 2050168404Spjd sizeof (uint64_t), 1, &spa->spa_sync_bplist_obj, tx) != 0) { 2051168404Spjd cmn_err(CE_PANIC, "failed to add bplist"); 2052168404Spjd } 2053168404Spjd 2054168404Spjd /* 2055168404Spjd * Create the pool's history object. 2056168404Spjd */ 2057185029Spjd if (version >= SPA_VERSION_ZPOOL_HISTORY) 2058185029Spjd spa_history_create_obj(spa, tx); 2059168404Spjd 2060185029Spjd /* 2061185029Spjd * Set pool properties. 2062185029Spjd */ 2063185029Spjd spa->spa_bootfs = zpool_prop_default_numeric(ZPOOL_PROP_BOOTFS); 2064185029Spjd spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); 2065185029Spjd spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE); 2066185029Spjd if (props) 2067185029Spjd spa_sync_props(spa, props, CRED(), tx); 2068185029Spjd 2069168404Spjd dmu_tx_commit(tx); 2070168404Spjd 2071168404Spjd spa->spa_sync_on = B_TRUE; 2072168404Spjd txg_sync_start(spa->spa_dsl_pool); 2073168404Spjd 2074168404Spjd /* 2075168404Spjd * We explicitly wait for the first transaction to complete so that our 2076168404Spjd * bean counters are appropriately updated. 2077168404Spjd */ 2078168404Spjd txg_wait_synced(spa->spa_dsl_pool, txg); 2079168404Spjd 2080185029Spjd spa_config_sync(spa, B_FALSE, B_TRUE); 2081168404Spjd 2082185029Spjd if (version >= SPA_VERSION_ZPOOL_HISTORY && history_str != NULL) 2083185029Spjd (void) spa_history_log(spa, history_str, LOG_CMD_POOL_CREATE); 2084185029Spjd 2085168404Spjd mutex_exit(&spa_namespace_lock); 2086168404Spjd 2087185029Spjd spa->spa_minref = refcount_count(&spa->spa_refcount); 2088185029Spjd 2089168404Spjd return (0); 2090168404Spjd} 2091168404Spjd 2092168404Spjd/* 2093168404Spjd * Import the given pool into the system. We set up the necessary spa_t and 2094168404Spjd * then call spa_load() to do the dirty work. 2095168404Spjd */ 2096185029Spjdstatic int 2097185029Spjdspa_import_common(const char *pool, nvlist_t *config, nvlist_t *props, 2098185029Spjd boolean_t isroot, boolean_t allowfaulted) 2099168404Spjd{ 2100168404Spjd spa_t *spa; 2101185029Spjd char *altroot = NULL; 2102185029Spjd int error, loaderr; 2103168404Spjd nvlist_t *nvroot; 2104185029Spjd nvlist_t **spares, **l2cache; 2105185029Spjd uint_t nspares, nl2cache; 2106168404Spjd 2107168404Spjd /* 2108168404Spjd * If a pool with this name exists, return failure. 2109168404Spjd */ 2110168404Spjd mutex_enter(&spa_namespace_lock); 2111168404Spjd if (spa_lookup(pool) != NULL) { 2112168404Spjd mutex_exit(&spa_namespace_lock); 2113168404Spjd return (EEXIST); 2114168404Spjd } 2115168404Spjd 2116168404Spjd /* 2117168404Spjd * Create and initialize the spa structure. 2118168404Spjd */ 2119185029Spjd (void) nvlist_lookup_string(props, 2120185029Spjd zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 2121168404Spjd spa = spa_add(pool, altroot); 2122168404Spjd spa_activate(spa); 2123168404Spjd 2124185029Spjd if (allowfaulted) 2125185029Spjd spa->spa_import_faulted = B_TRUE; 2126185029Spjd spa->spa_is_root = isroot; 2127185029Spjd 2128168404Spjd /* 2129168404Spjd * Pass off the heavy lifting to spa_load(). 2130185029Spjd * Pass TRUE for mosconfig (unless this is a root pool) because 2131185029Spjd * the user-supplied config is actually the one to trust when 2132185029Spjd * doing an import. 2133168404Spjd */ 2134185029Spjd loaderr = error = spa_load(spa, config, SPA_LOAD_IMPORT, !isroot); 2135168404Spjd 2136185029Spjd spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 2137168404Spjd /* 2138168404Spjd * Toss any existing sparelist, as it doesn't have any validity anymore, 2139168404Spjd * and conflicts with spa_has_spare(). 2140168404Spjd */ 2141185029Spjd if (!isroot && spa->spa_spares.sav_config) { 2142185029Spjd nvlist_free(spa->spa_spares.sav_config); 2143185029Spjd spa->spa_spares.sav_config = NULL; 2144168404Spjd spa_load_spares(spa); 2145168404Spjd } 2146185029Spjd if (!isroot && spa->spa_l2cache.sav_config) { 2147185029Spjd nvlist_free(spa->spa_l2cache.sav_config); 2148185029Spjd spa->spa_l2cache.sav_config = NULL; 2149185029Spjd spa_load_l2cache(spa); 2150185029Spjd } 2151168404Spjd 2152168404Spjd VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 2153168404Spjd &nvroot) == 0); 2154168404Spjd if (error == 0) 2155185029Spjd error = spa_validate_aux(spa, nvroot, -1ULL, VDEV_ALLOC_SPARE); 2156185029Spjd if (error == 0) 2157185029Spjd error = spa_validate_aux(spa, nvroot, -1ULL, 2158185029Spjd VDEV_ALLOC_L2CACHE); 2159185029Spjd spa_config_exit(spa, SCL_ALL, FTAG); 2160168404Spjd 2161185029Spjd if (error != 0 || (props && (error = spa_prop_set(spa, props)))) { 2162185029Spjd if (loaderr != 0 && loaderr != EINVAL && allowfaulted) { 2163185029Spjd /* 2164185029Spjd * If we failed to load the pool, but 'allowfaulted' is 2165185029Spjd * set, then manually set the config as if the config 2166185029Spjd * passed in was specified in the cache file. 2167185029Spjd */ 2168185029Spjd error = 0; 2169185029Spjd spa->spa_import_faulted = B_FALSE; 2170185029Spjd if (spa->spa_config == NULL) 2171185029Spjd spa->spa_config = spa_config_generate(spa, 2172185029Spjd NULL, -1ULL, B_TRUE); 2173185029Spjd spa_unload(spa); 2174185029Spjd spa_deactivate(spa); 2175185029Spjd spa_config_sync(spa, B_FALSE, B_TRUE); 2176185029Spjd } else { 2177185029Spjd spa_unload(spa); 2178185029Spjd spa_deactivate(spa); 2179185029Spjd spa_remove(spa); 2180185029Spjd } 2181168404Spjd mutex_exit(&spa_namespace_lock); 2182168404Spjd return (error); 2183168404Spjd } 2184168404Spjd 2185168404Spjd /* 2186185029Spjd * Override any spares and level 2 cache devices as specified by 2187185029Spjd * the user, as these may have correct device names/devids, etc. 2188168404Spjd */ 2189168404Spjd if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 2190168404Spjd &spares, &nspares) == 0) { 2191185029Spjd if (spa->spa_spares.sav_config) 2192185029Spjd VERIFY(nvlist_remove(spa->spa_spares.sav_config, 2193168404Spjd ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0); 2194168404Spjd else 2195185029Spjd VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, 2196168404Spjd NV_UNIQUE_NAME, KM_SLEEP) == 0); 2197185029Spjd VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 2198168404Spjd ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 2199185029Spjd spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 2200168404Spjd spa_load_spares(spa); 2201185029Spjd spa_config_exit(spa, SCL_ALL, FTAG); 2202185029Spjd spa->spa_spares.sav_sync = B_TRUE; 2203168404Spjd } 2204185029Spjd if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, 2205185029Spjd &l2cache, &nl2cache) == 0) { 2206185029Spjd if (spa->spa_l2cache.sav_config) 2207185029Spjd VERIFY(nvlist_remove(spa->spa_l2cache.sav_config, 2208185029Spjd ZPOOL_CONFIG_L2CACHE, DATA_TYPE_NVLIST_ARRAY) == 0); 2209185029Spjd else 2210185029Spjd VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, 2211185029Spjd NV_UNIQUE_NAME, KM_SLEEP) == 0); 2212185029Spjd VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, 2213185029Spjd ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 2214185029Spjd spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 2215185029Spjd spa_load_l2cache(spa); 2216185029Spjd spa_config_exit(spa, SCL_ALL, FTAG); 2217185029Spjd spa->spa_l2cache.sav_sync = B_TRUE; 2218185029Spjd } 2219168404Spjd 2220185029Spjd if (spa_mode & FWRITE) { 2221185029Spjd /* 2222185029Spjd * Update the config cache to include the newly-imported pool. 2223185029Spjd */ 2224185029Spjd spa_config_update_common(spa, SPA_CONFIG_UPDATE_POOL, isroot); 2225185029Spjd } 2226185029Spjd 2227185029Spjd spa->spa_import_faulted = B_FALSE; 2228185029Spjd mutex_exit(&spa_namespace_lock); 2229185029Spjd 2230185029Spjd return (0); 2231185029Spjd} 2232185029Spjd 2233185029Spjd#if defined(sun) 2234185029Spjd#ifdef _KERNEL 2235185029Spjd/* 2236185029Spjd * Build a "root" vdev for a top level vdev read in from a rootpool 2237185029Spjd * device label. 2238185029Spjd */ 2239185029Spjdstatic void 2240185029Spjdspa_build_rootpool_config(nvlist_t *config) 2241185029Spjd{ 2242185029Spjd nvlist_t *nvtop, *nvroot; 2243185029Spjd uint64_t pgid; 2244185029Spjd 2245168404Spjd /* 2246185029Spjd * Add this top-level vdev to the child array. 2247168404Spjd */ 2248185029Spjd VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvtop) 2249185029Spjd == 0); 2250185029Spjd VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pgid) 2251185029Spjd == 0); 2252168404Spjd 2253185029Spjd /* 2254185029Spjd * Put this pool's top-level vdevs into a root vdev. 2255185029Spjd */ 2256185029Spjd VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); 2257185029Spjd VERIFY(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) 2258185029Spjd == 0); 2259185029Spjd VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) == 0); 2260185029Spjd VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, pgid) == 0); 2261185029Spjd VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 2262185029Spjd &nvtop, 1) == 0); 2263168404Spjd 2264168404Spjd /* 2265185029Spjd * Replace the existing vdev_tree with the new root vdev in 2266185029Spjd * this pool's configuration (remove the old, add the new). 2267168404Spjd */ 2268185029Spjd VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0); 2269185029Spjd nvlist_free(nvroot); 2270185029Spjd} 2271168404Spjd 2272185029Spjd/* 2273185029Spjd * Get the root pool information from the root disk, then import the root pool 2274185029Spjd * during the system boot up time. 2275185029Spjd */ 2276185029Spjdextern int vdev_disk_read_rootlabel(char *, char *, nvlist_t **); 2277185029Spjd 2278185029Spjdint 2279185029Spjdspa_check_rootconf(char *devpath, char *devid, nvlist_t **bestconf, 2280185029Spjd uint64_t *besttxg) 2281185029Spjd{ 2282185029Spjd nvlist_t *config; 2283185029Spjd uint64_t txg; 2284185029Spjd int error; 2285185029Spjd 2286185029Spjd if (error = vdev_disk_read_rootlabel(devpath, devid, &config)) 2287185029Spjd return (error); 2288185029Spjd 2289185029Spjd VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); 2290185029Spjd 2291185029Spjd if (bestconf != NULL) 2292185029Spjd *bestconf = config; 2293185029Spjd else 2294185029Spjd nvlist_free(config); 2295185029Spjd *besttxg = txg; 2296168404Spjd return (0); 2297168404Spjd} 2298168404Spjd 2299185029Spjdboolean_t 2300185029Spjdspa_rootdev_validate(nvlist_t *nv) 2301185029Spjd{ 2302185029Spjd uint64_t ival; 2303185029Spjd 2304185029Spjd if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE, &ival) == 0 || 2305185029Spjd nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED, &ival) == 0 || 2306185029Spjd nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVED, &ival) == 0) 2307185029Spjd return (B_FALSE); 2308185029Spjd 2309185029Spjd return (B_TRUE); 2310185029Spjd} 2311185029Spjd 2312185029Spjd 2313168404Spjd/* 2314185029Spjd * Given the boot device's physical path or devid, check if the device 2315185029Spjd * is in a valid state. If so, return the configuration from the vdev 2316185029Spjd * label. 2317185029Spjd */ 2318185029Spjdint 2319185029Spjdspa_get_rootconf(char *devpath, char *devid, nvlist_t **bestconf) 2320185029Spjd{ 2321185029Spjd nvlist_t *conf = NULL; 2322185029Spjd uint64_t txg = 0; 2323185029Spjd nvlist_t *nvtop, **child; 2324185029Spjd char *type; 2325185029Spjd char *bootpath = NULL; 2326185029Spjd uint_t children, c; 2327185029Spjd char *tmp; 2328185029Spjd int error; 2329185029Spjd 2330185029Spjd if (devpath && ((tmp = strchr(devpath, ' ')) != NULL)) 2331185029Spjd *tmp = '\0'; 2332185029Spjd if (error = spa_check_rootconf(devpath, devid, &conf, &txg)) { 2333185029Spjd cmn_err(CE_NOTE, "error reading device label"); 2334185029Spjd return (error); 2335185029Spjd } 2336185029Spjd if (txg == 0) { 2337185029Spjd cmn_err(CE_NOTE, "this device is detached"); 2338185029Spjd nvlist_free(conf); 2339185029Spjd return (EINVAL); 2340185029Spjd } 2341185029Spjd 2342185029Spjd VERIFY(nvlist_lookup_nvlist(conf, ZPOOL_CONFIG_VDEV_TREE, 2343185029Spjd &nvtop) == 0); 2344185029Spjd VERIFY(nvlist_lookup_string(nvtop, ZPOOL_CONFIG_TYPE, &type) == 0); 2345185029Spjd 2346185029Spjd if (strcmp(type, VDEV_TYPE_DISK) == 0) { 2347185029Spjd if (spa_rootdev_validate(nvtop)) { 2348185029Spjd goto out; 2349185029Spjd } else { 2350185029Spjd nvlist_free(conf); 2351185029Spjd return (EINVAL); 2352185029Spjd } 2353185029Spjd } 2354185029Spjd 2355185029Spjd ASSERT(strcmp(type, VDEV_TYPE_MIRROR) == 0); 2356185029Spjd 2357185029Spjd VERIFY(nvlist_lookup_nvlist_array(nvtop, ZPOOL_CONFIG_CHILDREN, 2358185029Spjd &child, &children) == 0); 2359185029Spjd 2360185029Spjd /* 2361185029Spjd * Go thru vdevs in the mirror to see if the given device 2362185029Spjd * has the most recent txg. Only the device with the most 2363185029Spjd * recent txg has valid information and should be booted. 2364185029Spjd */ 2365185029Spjd for (c = 0; c < children; c++) { 2366185029Spjd char *cdevid, *cpath; 2367185029Spjd uint64_t tmptxg; 2368185029Spjd 2369185029Spjd if (nvlist_lookup_string(child[c], ZPOOL_CONFIG_PHYS_PATH, 2370185029Spjd &cpath) != 0) 2371185029Spjd return (EINVAL); 2372185029Spjd if (nvlist_lookup_string(child[c], ZPOOL_CONFIG_DEVID, 2373185029Spjd &cdevid) != 0) 2374185029Spjd return (EINVAL); 2375185029Spjd if ((spa_check_rootconf(cpath, cdevid, NULL, 2376185029Spjd &tmptxg) == 0) && (tmptxg > txg)) { 2377185029Spjd txg = tmptxg; 2378185029Spjd VERIFY(nvlist_lookup_string(child[c], 2379185029Spjd ZPOOL_CONFIG_PATH, &bootpath) == 0); 2380185029Spjd } 2381185029Spjd } 2382185029Spjd 2383185029Spjd /* Does the best device match the one we've booted from? */ 2384185029Spjd if (bootpath) { 2385185029Spjd cmn_err(CE_NOTE, "try booting from '%s'", bootpath); 2386185029Spjd return (EINVAL); 2387185029Spjd } 2388185029Spjdout: 2389185029Spjd *bestconf = conf; 2390185029Spjd return (0); 2391185029Spjd} 2392185029Spjd 2393185029Spjd/* 2394185029Spjd * Import a root pool. 2395185029Spjd * 2396185029Spjd * For x86. devpath_list will consist of devid and/or physpath name of 2397185029Spjd * the vdev (e.g. "id1,sd@SSEAGATE..." or "/pci@1f,0/ide@d/disk@0,0:a"). 2398185029Spjd * The GRUB "findroot" command will return the vdev we should boot. 2399185029Spjd * 2400185029Spjd * For Sparc, devpath_list consists the physpath name of the booting device 2401185029Spjd * no matter the rootpool is a single device pool or a mirrored pool. 2402185029Spjd * e.g. 2403185029Spjd * "/pci@1f,0/ide@d/disk@0,0:a" 2404185029Spjd */ 2405185029Spjdint 2406185029Spjdspa_import_rootpool(char *devpath, char *devid) 2407185029Spjd{ 2408185029Spjd nvlist_t *conf = NULL; 2409185029Spjd char *pname; 2410185029Spjd int error; 2411185029Spjd 2412185029Spjd /* 2413185029Spjd * Get the vdev pathname and configuation from the most 2414185029Spjd * recently updated vdev (highest txg). 2415185029Spjd */ 2416185029Spjd if (error = spa_get_rootconf(devpath, devid, &conf)) 2417185029Spjd goto msg_out; 2418185029Spjd 2419185029Spjd /* 2420185029Spjd * Add type "root" vdev to the config. 2421185029Spjd */ 2422185029Spjd spa_build_rootpool_config(conf); 2423185029Spjd 2424185029Spjd VERIFY(nvlist_lookup_string(conf, ZPOOL_CONFIG_POOL_NAME, &pname) == 0); 2425185029Spjd 2426185029Spjd /* 2427185029Spjd * We specify 'allowfaulted' for this to be treated like spa_open() 2428185029Spjd * instead of spa_import(). This prevents us from marking vdevs as 2429185029Spjd * persistently unavailable, and generates FMA ereports as if it were a 2430185029Spjd * pool open, not import. 2431185029Spjd */ 2432185029Spjd error = spa_import_common(pname, conf, NULL, B_TRUE, B_TRUE); 2433185029Spjd if (error == EEXIST) 2434185029Spjd error = 0; 2435185029Spjd 2436185029Spjd nvlist_free(conf); 2437185029Spjd return (error); 2438185029Spjd 2439185029Spjdmsg_out: 2440185029Spjd cmn_err(CE_NOTE, "\n" 2441185029Spjd " *************************************************** \n" 2442185029Spjd " * This device is not bootable! * \n" 2443185029Spjd " * It is either offlined or detached or faulted. * \n" 2444185029Spjd " * Please try to boot from a different device. * \n" 2445185029Spjd " *************************************************** "); 2446185029Spjd 2447185029Spjd return (error); 2448185029Spjd} 2449185029Spjd#endif 2450185029Spjd#endif 2451185029Spjd 2452185029Spjd/* 2453185029Spjd * Import a non-root pool into the system. 2454185029Spjd */ 2455185029Spjdint 2456185029Spjdspa_import(const char *pool, nvlist_t *config, nvlist_t *props) 2457185029Spjd{ 2458185029Spjd return (spa_import_common(pool, config, props, B_FALSE, B_FALSE)); 2459185029Spjd} 2460185029Spjd 2461185029Spjdint 2462185029Spjdspa_import_faulted(const char *pool, nvlist_t *config, nvlist_t *props) 2463185029Spjd{ 2464185029Spjd return (spa_import_common(pool, config, props, B_FALSE, B_TRUE)); 2465185029Spjd} 2466185029Spjd 2467185029Spjd 2468185029Spjd/* 2469168404Spjd * This (illegal) pool name is used when temporarily importing a spa_t in order 2470168404Spjd * to get the vdev stats associated with the imported devices. 2471168404Spjd */ 2472168404Spjd#define TRYIMPORT_NAME "$import" 2473168404Spjd 2474168404Spjdnvlist_t * 2475168404Spjdspa_tryimport(nvlist_t *tryconfig) 2476168404Spjd{ 2477168404Spjd nvlist_t *config = NULL; 2478168404Spjd char *poolname; 2479168404Spjd spa_t *spa; 2480168404Spjd uint64_t state; 2481168404Spjd 2482168404Spjd if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_POOL_NAME, &poolname)) 2483168404Spjd return (NULL); 2484168404Spjd 2485168404Spjd if (nvlist_lookup_uint64(tryconfig, ZPOOL_CONFIG_POOL_STATE, &state)) 2486168404Spjd return (NULL); 2487168404Spjd 2488168404Spjd /* 2489168404Spjd * Create and initialize the spa structure. 2490168404Spjd */ 2491168404Spjd mutex_enter(&spa_namespace_lock); 2492168404Spjd spa = spa_add(TRYIMPORT_NAME, NULL); 2493168404Spjd spa_activate(spa); 2494168404Spjd 2495168404Spjd /* 2496168404Spjd * Pass off the heavy lifting to spa_load(). 2497168404Spjd * Pass TRUE for mosconfig because the user-supplied config 2498168404Spjd * is actually the one to trust when doing an import. 2499168404Spjd */ 2500168404Spjd (void) spa_load(spa, tryconfig, SPA_LOAD_TRYIMPORT, B_TRUE); 2501168404Spjd 2502168404Spjd /* 2503168404Spjd * If 'tryconfig' was at least parsable, return the current config. 2504168404Spjd */ 2505168404Spjd if (spa->spa_root_vdev != NULL) { 2506168404Spjd config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 2507168404Spjd VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, 2508168404Spjd poolname) == 0); 2509168404Spjd VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, 2510168404Spjd state) == 0); 2511168498Spjd VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP, 2512168498Spjd spa->spa_uberblock.ub_timestamp) == 0); 2513168404Spjd 2514168404Spjd /* 2515185029Spjd * If the bootfs property exists on this pool then we 2516185029Spjd * copy it out so that external consumers can tell which 2517185029Spjd * pools are bootable. 2518168404Spjd */ 2519185029Spjd if (spa->spa_bootfs) { 2520185029Spjd char *tmpname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 2521185029Spjd 2522185029Spjd /* 2523185029Spjd * We have to play games with the name since the 2524185029Spjd * pool was opened as TRYIMPORT_NAME. 2525185029Spjd */ 2526185029Spjd if (dsl_dsobj_to_dsname(spa_name(spa), 2527185029Spjd spa->spa_bootfs, tmpname) == 0) { 2528185029Spjd char *cp; 2529185029Spjd char *dsname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 2530185029Spjd 2531185029Spjd cp = strchr(tmpname, '/'); 2532185029Spjd if (cp == NULL) { 2533185029Spjd (void) strlcpy(dsname, tmpname, 2534185029Spjd MAXPATHLEN); 2535185029Spjd } else { 2536185029Spjd (void) snprintf(dsname, MAXPATHLEN, 2537185029Spjd "%s/%s", poolname, ++cp); 2538185029Spjd } 2539185029Spjd VERIFY(nvlist_add_string(config, 2540185029Spjd ZPOOL_CONFIG_BOOTFS, dsname) == 0); 2541185029Spjd kmem_free(dsname, MAXPATHLEN); 2542185029Spjd } 2543185029Spjd kmem_free(tmpname, MAXPATHLEN); 2544185029Spjd } 2545185029Spjd 2546185029Spjd /* 2547185029Spjd * Add the list of hot spares and level 2 cache devices. 2548185029Spjd */ 2549168404Spjd spa_add_spares(spa, config); 2550185029Spjd spa_add_l2cache(spa, config); 2551168404Spjd } 2552168404Spjd 2553168404Spjd spa_unload(spa); 2554168404Spjd spa_deactivate(spa); 2555168404Spjd spa_remove(spa); 2556168404Spjd mutex_exit(&spa_namespace_lock); 2557168404Spjd 2558168404Spjd return (config); 2559168404Spjd} 2560168404Spjd 2561168404Spjd/* 2562168404Spjd * Pool export/destroy 2563168404Spjd * 2564168404Spjd * The act of destroying or exporting a pool is very simple. We make sure there 2565168404Spjd * is no more pending I/O and any references to the pool are gone. Then, we 2566168404Spjd * update the pool state and sync all the labels to disk, removing the 2567207670Smm * configuration from the cache afterwards. If the 'hardforce' flag is set, then 2568207670Smm * we don't sync the labels or remove the configuration cache. 2569168404Spjd */ 2570168404Spjdstatic int 2571185029Spjdspa_export_common(char *pool, int new_state, nvlist_t **oldconfig, 2572207670Smm boolean_t force, boolean_t hardforce) 2573168404Spjd{ 2574168404Spjd spa_t *spa; 2575168404Spjd 2576168404Spjd if (oldconfig) 2577168404Spjd *oldconfig = NULL; 2578168404Spjd 2579168404Spjd if (!(spa_mode & FWRITE)) 2580168404Spjd return (EROFS); 2581168404Spjd 2582168404Spjd mutex_enter(&spa_namespace_lock); 2583168404Spjd if ((spa = spa_lookup(pool)) == NULL) { 2584168404Spjd mutex_exit(&spa_namespace_lock); 2585168404Spjd return (ENOENT); 2586168404Spjd } 2587168404Spjd 2588168404Spjd /* 2589168404Spjd * Put a hold on the pool, drop the namespace lock, stop async tasks, 2590168404Spjd * reacquire the namespace lock, and see if we can export. 2591168404Spjd */ 2592168404Spjd spa_open_ref(spa, FTAG); 2593168404Spjd mutex_exit(&spa_namespace_lock); 2594168404Spjd spa_async_suspend(spa); 2595168404Spjd mutex_enter(&spa_namespace_lock); 2596168404Spjd spa_close(spa, FTAG); 2597168404Spjd 2598168404Spjd /* 2599168404Spjd * The pool will be in core if it's openable, 2600168404Spjd * in which case we can modify its state. 2601168404Spjd */ 2602168404Spjd if (spa->spa_state != POOL_STATE_UNINITIALIZED && spa->spa_sync_on) { 2603168404Spjd /* 2604168404Spjd * Objsets may be open only because they're dirty, so we 2605168404Spjd * have to force it to sync before checking spa_refcnt. 2606168404Spjd */ 2607168404Spjd txg_wait_synced(spa->spa_dsl_pool, 0); 2608168404Spjd 2609168404Spjd /* 2610168404Spjd * A pool cannot be exported or destroyed if there are active 2611168404Spjd * references. If we are resetting a pool, allow references by 2612168404Spjd * fault injection handlers. 2613168404Spjd */ 2614168404Spjd if (!spa_refcount_zero(spa) || 2615168404Spjd (spa->spa_inject_ref != 0 && 2616168404Spjd new_state != POOL_STATE_UNINITIALIZED)) { 2617168404Spjd spa_async_resume(spa); 2618168404Spjd mutex_exit(&spa_namespace_lock); 2619168404Spjd return (EBUSY); 2620168404Spjd } 2621168404Spjd 2622185029Spjd /* 2623185029Spjd * A pool cannot be exported if it has an active shared spare. 2624185029Spjd * This is to prevent other pools stealing the active spare 2625185029Spjd * from an exported pool. At user's own will, such pool can 2626185029Spjd * be forcedly exported. 2627185029Spjd */ 2628185029Spjd if (!force && new_state == POOL_STATE_EXPORTED && 2629185029Spjd spa_has_active_shared_spare(spa)) { 2630185029Spjd spa_async_resume(spa); 2631185029Spjd mutex_exit(&spa_namespace_lock); 2632185029Spjd return (EXDEV); 2633185029Spjd } 2634168404Spjd 2635168404Spjd /* 2636168404Spjd * We want this to be reflected on every label, 2637168404Spjd * so mark them all dirty. spa_unload() will do the 2638168404Spjd * final sync that pushes these changes out. 2639168404Spjd */ 2640207670Smm if (new_state != POOL_STATE_UNINITIALIZED && !hardforce) { 2641185029Spjd spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 2642168404Spjd spa->spa_state = new_state; 2643168404Spjd spa->spa_final_txg = spa_last_synced_txg(spa) + 1; 2644168404Spjd vdev_config_dirty(spa->spa_root_vdev); 2645185029Spjd spa_config_exit(spa, SCL_ALL, FTAG); 2646168404Spjd } 2647168404Spjd } 2648168404Spjd 2649185029Spjd spa_event_notify(spa, NULL, ESC_ZFS_POOL_DESTROY); 2650185029Spjd 2651168404Spjd if (spa->spa_state != POOL_STATE_UNINITIALIZED) { 2652168404Spjd spa_unload(spa); 2653168404Spjd spa_deactivate(spa); 2654168404Spjd } 2655168404Spjd 2656168404Spjd if (oldconfig && spa->spa_config) 2657168404Spjd VERIFY(nvlist_dup(spa->spa_config, oldconfig, 0) == 0); 2658168404Spjd 2659168404Spjd if (new_state != POOL_STATE_UNINITIALIZED) { 2660207670Smm if (!hardforce) 2661207670Smm spa_config_sync(spa, B_TRUE, B_TRUE); 2662168404Spjd spa_remove(spa); 2663168404Spjd } 2664168404Spjd mutex_exit(&spa_namespace_lock); 2665168404Spjd 2666168404Spjd return (0); 2667168404Spjd} 2668168404Spjd 2669168404Spjd/* 2670168404Spjd * Destroy a storage pool. 2671168404Spjd */ 2672168404Spjdint 2673168404Spjdspa_destroy(char *pool) 2674168404Spjd{ 2675207670Smm return (spa_export_common(pool, POOL_STATE_DESTROYED, NULL, 2676207670Smm B_FALSE, B_FALSE)); 2677168404Spjd} 2678168404Spjd 2679168404Spjd/* 2680168404Spjd * Export a storage pool. 2681168404Spjd */ 2682168404Spjdint 2683207670Smmspa_export(char *pool, nvlist_t **oldconfig, boolean_t force, 2684207670Smm boolean_t hardforce) 2685168404Spjd{ 2686207670Smm return (spa_export_common(pool, POOL_STATE_EXPORTED, oldconfig, 2687207670Smm force, hardforce)); 2688168404Spjd} 2689168404Spjd 2690168404Spjd/* 2691168404Spjd * Similar to spa_export(), this unloads the spa_t without actually removing it 2692168404Spjd * from the namespace in any way. 2693168404Spjd */ 2694168404Spjdint 2695168404Spjdspa_reset(char *pool) 2696168404Spjd{ 2697185029Spjd return (spa_export_common(pool, POOL_STATE_UNINITIALIZED, NULL, 2698207670Smm B_FALSE, B_FALSE)); 2699168404Spjd} 2700168404Spjd 2701168404Spjd/* 2702168404Spjd * ========================================================================== 2703168404Spjd * Device manipulation 2704168404Spjd * ========================================================================== 2705168404Spjd */ 2706168404Spjd 2707168404Spjd/* 2708185029Spjd * Add a device to a storage pool. 2709168404Spjd */ 2710168404Spjdint 2711168404Spjdspa_vdev_add(spa_t *spa, nvlist_t *nvroot) 2712168404Spjd{ 2713168404Spjd uint64_t txg; 2714168404Spjd int c, error; 2715168404Spjd vdev_t *rvd = spa->spa_root_vdev; 2716168404Spjd vdev_t *vd, *tvd; 2717185029Spjd nvlist_t **spares, **l2cache; 2718185029Spjd uint_t nspares, nl2cache; 2719168404Spjd 2720168404Spjd txg = spa_vdev_enter(spa); 2721168404Spjd 2722168404Spjd if ((error = spa_config_parse(spa, &vd, nvroot, NULL, 0, 2723168404Spjd VDEV_ALLOC_ADD)) != 0) 2724168404Spjd return (spa_vdev_exit(spa, NULL, txg, error)); 2725168404Spjd 2726185029Spjd spa->spa_pending_vdev = vd; /* spa_vdev_exit() will clear this */ 2727168404Spjd 2728185029Spjd if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, 2729185029Spjd &nspares) != 0) 2730168404Spjd nspares = 0; 2731168404Spjd 2732185029Spjd if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache, 2733185029Spjd &nl2cache) != 0) 2734185029Spjd nl2cache = 0; 2735185029Spjd 2736185029Spjd if (vd->vdev_children == 0 && nspares == 0 && nl2cache == 0) 2737168404Spjd return (spa_vdev_exit(spa, vd, txg, EINVAL)); 2738168404Spjd 2739185029Spjd if (vd->vdev_children != 0 && 2740185029Spjd (error = vdev_create(vd, txg, B_FALSE)) != 0) 2741185029Spjd return (spa_vdev_exit(spa, vd, txg, error)); 2742168404Spjd 2743168404Spjd /* 2744185029Spjd * We must validate the spares and l2cache devices after checking the 2745185029Spjd * children. Otherwise, vdev_inuse() will blindly overwrite the spare. 2746168404Spjd */ 2747185029Spjd if ((error = spa_validate_aux(spa, nvroot, txg, VDEV_ALLOC_ADD)) != 0) 2748168404Spjd return (spa_vdev_exit(spa, vd, txg, error)); 2749168404Spjd 2750168404Spjd /* 2751168404Spjd * Transfer each new top-level vdev from vd to rvd. 2752168404Spjd */ 2753168404Spjd for (c = 0; c < vd->vdev_children; c++) { 2754168404Spjd tvd = vd->vdev_child[c]; 2755168404Spjd vdev_remove_child(vd, tvd); 2756168404Spjd tvd->vdev_id = rvd->vdev_children; 2757168404Spjd vdev_add_child(rvd, tvd); 2758168404Spjd vdev_config_dirty(tvd); 2759168404Spjd } 2760168404Spjd 2761168404Spjd if (nspares != 0) { 2762185029Spjd spa_set_aux_vdevs(&spa->spa_spares, spares, nspares, 2763185029Spjd ZPOOL_CONFIG_SPARES); 2764168404Spjd spa_load_spares(spa); 2765185029Spjd spa->spa_spares.sav_sync = B_TRUE; 2766168404Spjd } 2767168404Spjd 2768185029Spjd if (nl2cache != 0) { 2769185029Spjd spa_set_aux_vdevs(&spa->spa_l2cache, l2cache, nl2cache, 2770185029Spjd ZPOOL_CONFIG_L2CACHE); 2771185029Spjd spa_load_l2cache(spa); 2772185029Spjd spa->spa_l2cache.sav_sync = B_TRUE; 2773185029Spjd } 2774185029Spjd 2775168404Spjd /* 2776168404Spjd * We have to be careful when adding new vdevs to an existing pool. 2777168404Spjd * If other threads start allocating from these vdevs before we 2778168404Spjd * sync the config cache, and we lose power, then upon reboot we may 2779168404Spjd * fail to open the pool because there are DVAs that the config cache 2780168404Spjd * can't translate. Therefore, we first add the vdevs without 2781168404Spjd * initializing metaslabs; sync the config cache (via spa_vdev_exit()); 2782168404Spjd * and then let spa_config_update() initialize the new metaslabs. 2783168404Spjd * 2784168404Spjd * spa_load() checks for added-but-not-initialized vdevs, so that 2785168404Spjd * if we lose power at any point in this sequence, the remaining 2786168404Spjd * steps will be completed the next time we load the pool. 2787168404Spjd */ 2788168404Spjd (void) spa_vdev_exit(spa, vd, txg, 0); 2789168404Spjd 2790168404Spjd mutex_enter(&spa_namespace_lock); 2791168404Spjd spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 2792168404Spjd mutex_exit(&spa_namespace_lock); 2793168404Spjd 2794168404Spjd return (0); 2795168404Spjd} 2796168404Spjd 2797168404Spjd/* 2798168404Spjd * Attach a device to a mirror. The arguments are the path to any device 2799168404Spjd * in the mirror, and the nvroot for the new device. If the path specifies 2800168404Spjd * a device that is not mirrored, we automatically insert the mirror vdev. 2801168404Spjd * 2802168404Spjd * If 'replacing' is specified, the new device is intended to replace the 2803168404Spjd * existing device; in this case the two devices are made into their own 2804185029Spjd * mirror using the 'replacing' vdev, which is functionally identical to 2805168404Spjd * the mirror vdev (it actually reuses all the same ops) but has a few 2806168404Spjd * extra rules: you can't attach to it after it's been created, and upon 2807168404Spjd * completion of resilvering, the first disk (the one being replaced) 2808168404Spjd * is automatically detached. 2809168404Spjd */ 2810168404Spjdint 2811168404Spjdspa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) 2812168404Spjd{ 2813168404Spjd uint64_t txg, open_txg; 2814168404Spjd vdev_t *rvd = spa->spa_root_vdev; 2815168404Spjd vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd; 2816168404Spjd vdev_ops_t *pvops; 2817185029Spjd dmu_tx_t *tx; 2818185029Spjd char *oldvdpath, *newvdpath; 2819185029Spjd int newvd_isspare; 2820185029Spjd int error; 2821168404Spjd 2822168404Spjd txg = spa_vdev_enter(spa); 2823168404Spjd 2824185029Spjd oldvd = spa_lookup_by_guid(spa, guid, B_FALSE); 2825168404Spjd 2826168404Spjd if (oldvd == NULL) 2827168404Spjd return (spa_vdev_exit(spa, NULL, txg, ENODEV)); 2828168404Spjd 2829168404Spjd if (!oldvd->vdev_ops->vdev_op_leaf) 2830168404Spjd return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 2831168404Spjd 2832168404Spjd pvd = oldvd->vdev_parent; 2833168404Spjd 2834168404Spjd if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0, 2835185029Spjd VDEV_ALLOC_ADD)) != 0) 2836185029Spjd return (spa_vdev_exit(spa, NULL, txg, EINVAL)); 2837185029Spjd 2838185029Spjd if (newrootvd->vdev_children != 1) 2839168404Spjd return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); 2840168404Spjd 2841168404Spjd newvd = newrootvd->vdev_child[0]; 2842168404Spjd 2843168404Spjd if (!newvd->vdev_ops->vdev_op_leaf) 2844168404Spjd return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); 2845168404Spjd 2846168404Spjd if ((error = vdev_create(newrootvd, txg, replacing)) != 0) 2847168404Spjd return (spa_vdev_exit(spa, newrootvd, txg, error)); 2848168404Spjd 2849185029Spjd /* 2850185029Spjd * Spares can't replace logs 2851185029Spjd */ 2852185029Spjd if (oldvd->vdev_top->vdev_islog && newvd->vdev_isspare) 2853185029Spjd return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 2854185029Spjd 2855168404Spjd if (!replacing) { 2856168404Spjd /* 2857168404Spjd * For attach, the only allowable parent is a mirror or the root 2858168404Spjd * vdev. 2859168404Spjd */ 2860168404Spjd if (pvd->vdev_ops != &vdev_mirror_ops && 2861168404Spjd pvd->vdev_ops != &vdev_root_ops) 2862168404Spjd return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 2863168404Spjd 2864168404Spjd pvops = &vdev_mirror_ops; 2865168404Spjd } else { 2866168404Spjd /* 2867168404Spjd * Active hot spares can only be replaced by inactive hot 2868168404Spjd * spares. 2869168404Spjd */ 2870168404Spjd if (pvd->vdev_ops == &vdev_spare_ops && 2871168404Spjd pvd->vdev_child[1] == oldvd && 2872168404Spjd !spa_has_spare(spa, newvd->vdev_guid)) 2873168404Spjd return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 2874168404Spjd 2875168404Spjd /* 2876168404Spjd * If the source is a hot spare, and the parent isn't already a 2877168404Spjd * spare, then we want to create a new hot spare. Otherwise, we 2878168404Spjd * want to create a replacing vdev. The user is not allowed to 2879168404Spjd * attach to a spared vdev child unless the 'isspare' state is 2880168404Spjd * the same (spare replaces spare, non-spare replaces 2881168404Spjd * non-spare). 2882168404Spjd */ 2883168404Spjd if (pvd->vdev_ops == &vdev_replacing_ops) 2884168404Spjd return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 2885168404Spjd else if (pvd->vdev_ops == &vdev_spare_ops && 2886168404Spjd newvd->vdev_isspare != oldvd->vdev_isspare) 2887168404Spjd return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 2888168404Spjd else if (pvd->vdev_ops != &vdev_spare_ops && 2889168404Spjd newvd->vdev_isspare) 2890168404Spjd pvops = &vdev_spare_ops; 2891168404Spjd else 2892168404Spjd pvops = &vdev_replacing_ops; 2893168404Spjd } 2894168404Spjd 2895168404Spjd /* 2896168404Spjd * Compare the new device size with the replaceable/attachable 2897168404Spjd * device size. 2898168404Spjd */ 2899168404Spjd if (newvd->vdev_psize < vdev_get_rsize(oldvd)) 2900168404Spjd return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW)); 2901168404Spjd 2902168404Spjd /* 2903168404Spjd * The new device cannot have a higher alignment requirement 2904168404Spjd * than the top-level vdev. 2905168404Spjd */ 2906168404Spjd if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift) 2907168404Spjd return (spa_vdev_exit(spa, newrootvd, txg, EDOM)); 2908168404Spjd 2909168404Spjd /* 2910168404Spjd * If this is an in-place replacement, update oldvd's path and devid 2911168404Spjd * to make it distinguishable from newvd, and unopenable from now on. 2912168404Spjd */ 2913168404Spjd if (strcmp(oldvd->vdev_path, newvd->vdev_path) == 0) { 2914168404Spjd spa_strfree(oldvd->vdev_path); 2915168404Spjd oldvd->vdev_path = kmem_alloc(strlen(newvd->vdev_path) + 5, 2916168404Spjd KM_SLEEP); 2917168404Spjd (void) sprintf(oldvd->vdev_path, "%s/%s", 2918168404Spjd newvd->vdev_path, "old"); 2919168404Spjd if (oldvd->vdev_devid != NULL) { 2920168404Spjd spa_strfree(oldvd->vdev_devid); 2921168404Spjd oldvd->vdev_devid = NULL; 2922168404Spjd } 2923168404Spjd } 2924168404Spjd 2925168404Spjd /* 2926168404Spjd * If the parent is not a mirror, or if we're replacing, insert the new 2927168404Spjd * mirror/replacing/spare vdev above oldvd. 2928168404Spjd */ 2929168404Spjd if (pvd->vdev_ops != pvops) 2930168404Spjd pvd = vdev_add_parent(oldvd, pvops); 2931168404Spjd 2932168404Spjd ASSERT(pvd->vdev_top->vdev_parent == rvd); 2933168404Spjd ASSERT(pvd->vdev_ops == pvops); 2934168404Spjd ASSERT(oldvd->vdev_parent == pvd); 2935168404Spjd 2936168404Spjd /* 2937168404Spjd * Extract the new device from its root and add it to pvd. 2938168404Spjd */ 2939168404Spjd vdev_remove_child(newrootvd, newvd); 2940168404Spjd newvd->vdev_id = pvd->vdev_children; 2941168404Spjd vdev_add_child(pvd, newvd); 2942168404Spjd 2943168404Spjd /* 2944168404Spjd * If newvd is smaller than oldvd, but larger than its rsize, 2945168404Spjd * the addition of newvd may have decreased our parent's asize. 2946168404Spjd */ 2947168404Spjd pvd->vdev_asize = MIN(pvd->vdev_asize, newvd->vdev_asize); 2948168404Spjd 2949168404Spjd tvd = newvd->vdev_top; 2950168404Spjd ASSERT(pvd->vdev_top == tvd); 2951168404Spjd ASSERT(tvd->vdev_parent == rvd); 2952168404Spjd 2953168404Spjd vdev_config_dirty(tvd); 2954168404Spjd 2955168404Spjd /* 2956168404Spjd * Set newvd's DTL to [TXG_INITIAL, open_txg]. It will propagate 2957168404Spjd * upward when spa_vdev_exit() calls vdev_dtl_reassess(). 2958168404Spjd */ 2959168404Spjd open_txg = txg + TXG_CONCURRENT_STATES - 1; 2960168404Spjd 2961168404Spjd mutex_enter(&newvd->vdev_dtl_lock); 2962168404Spjd space_map_add(&newvd->vdev_dtl_map, TXG_INITIAL, 2963168404Spjd open_txg - TXG_INITIAL + 1); 2964168404Spjd mutex_exit(&newvd->vdev_dtl_lock); 2965168404Spjd 2966168404Spjd if (newvd->vdev_isspare) 2967168404Spjd spa_spare_activate(newvd); 2968185029Spjd oldvdpath = spa_strdup(oldvd->vdev_path); 2969185029Spjd newvdpath = spa_strdup(newvd->vdev_path); 2970185029Spjd newvd_isspare = newvd->vdev_isspare; 2971168404Spjd 2972168404Spjd /* 2973168404Spjd * Mark newvd's DTL dirty in this txg. 2974168404Spjd */ 2975168404Spjd vdev_dirty(tvd, VDD_DTL, newvd, txg); 2976168404Spjd 2977168404Spjd (void) spa_vdev_exit(spa, newrootvd, open_txg, 0); 2978168404Spjd 2979185029Spjd tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); 2980185029Spjd if (dmu_tx_assign(tx, TXG_WAIT) == 0) { 2981185029Spjd spa_history_internal_log(LOG_POOL_VDEV_ATTACH, spa, tx, 2982185029Spjd CRED(), "%s vdev=%s %s vdev=%s", 2983185029Spjd replacing && newvd_isspare ? "spare in" : 2984185029Spjd replacing ? "replace" : "attach", newvdpath, 2985185029Spjd replacing ? "for" : "to", oldvdpath); 2986185029Spjd dmu_tx_commit(tx); 2987185029Spjd } else { 2988185029Spjd dmu_tx_abort(tx); 2989185029Spjd } 2990185029Spjd 2991185029Spjd spa_strfree(oldvdpath); 2992185029Spjd spa_strfree(newvdpath); 2993185029Spjd 2994168404Spjd /* 2995168404Spjd * Kick off a resilver to update newvd. 2996168404Spjd */ 2997185029Spjd VERIFY3U(spa_scrub(spa, POOL_SCRUB_RESILVER), ==, 0); 2998168404Spjd 2999168404Spjd return (0); 3000168404Spjd} 3001168404Spjd 3002168404Spjd/* 3003168404Spjd * Detach a device from a mirror or replacing vdev. 3004168404Spjd * If 'replace_done' is specified, only detach if the parent 3005168404Spjd * is a replacing vdev. 3006168404Spjd */ 3007168404Spjdint 3008168404Spjdspa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done) 3009168404Spjd{ 3010168404Spjd uint64_t txg; 3011168404Spjd int c, t, error; 3012168404Spjd vdev_t *rvd = spa->spa_root_vdev; 3013168404Spjd vdev_t *vd, *pvd, *cvd, *tvd; 3014168404Spjd boolean_t unspare = B_FALSE; 3015168404Spjd uint64_t unspare_guid; 3016185029Spjd size_t len; 3017168404Spjd 3018168404Spjd txg = spa_vdev_enter(spa); 3019168404Spjd 3020185029Spjd vd = spa_lookup_by_guid(spa, guid, B_FALSE); 3021168404Spjd 3022168404Spjd if (vd == NULL) 3023168404Spjd return (spa_vdev_exit(spa, NULL, txg, ENODEV)); 3024168404Spjd 3025168404Spjd if (!vd->vdev_ops->vdev_op_leaf) 3026168404Spjd return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 3027168404Spjd 3028168404Spjd pvd = vd->vdev_parent; 3029168404Spjd 3030168404Spjd /* 3031168404Spjd * If replace_done is specified, only remove this device if it's 3032168404Spjd * the first child of a replacing vdev. For the 'spare' vdev, either 3033168404Spjd * disk can be removed. 3034168404Spjd */ 3035168404Spjd if (replace_done) { 3036168404Spjd if (pvd->vdev_ops == &vdev_replacing_ops) { 3037168404Spjd if (vd->vdev_id != 0) 3038168404Spjd return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 3039168404Spjd } else if (pvd->vdev_ops != &vdev_spare_ops) { 3040168404Spjd return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 3041168404Spjd } 3042168404Spjd } 3043168404Spjd 3044168404Spjd ASSERT(pvd->vdev_ops != &vdev_spare_ops || 3045185029Spjd spa_version(spa) >= SPA_VERSION_SPARES); 3046168404Spjd 3047168404Spjd /* 3048168404Spjd * Only mirror, replacing, and spare vdevs support detach. 3049168404Spjd */ 3050168404Spjd if (pvd->vdev_ops != &vdev_replacing_ops && 3051168404Spjd pvd->vdev_ops != &vdev_mirror_ops && 3052168404Spjd pvd->vdev_ops != &vdev_spare_ops) 3053168404Spjd return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 3054168404Spjd 3055168404Spjd /* 3056168404Spjd * If there's only one replica, you can't detach it. 3057168404Spjd */ 3058168404Spjd if (pvd->vdev_children <= 1) 3059168404Spjd return (spa_vdev_exit(spa, NULL, txg, EBUSY)); 3060168404Spjd 3061168404Spjd /* 3062168404Spjd * If all siblings have non-empty DTLs, this device may have the only 3063168404Spjd * valid copy of the data, which means we cannot safely detach it. 3064168404Spjd * 3065168404Spjd * XXX -- as in the vdev_offline() case, we really want a more 3066168404Spjd * precise DTL check. 3067168404Spjd */ 3068168404Spjd for (c = 0; c < pvd->vdev_children; c++) { 3069168404Spjd uint64_t dirty; 3070168404Spjd 3071168404Spjd cvd = pvd->vdev_child[c]; 3072168404Spjd if (cvd == vd) 3073168404Spjd continue; 3074168404Spjd if (vdev_is_dead(cvd)) 3075168404Spjd continue; 3076168404Spjd mutex_enter(&cvd->vdev_dtl_lock); 3077168404Spjd dirty = cvd->vdev_dtl_map.sm_space | 3078168404Spjd cvd->vdev_dtl_scrub.sm_space; 3079168404Spjd mutex_exit(&cvd->vdev_dtl_lock); 3080168404Spjd if (!dirty) 3081168404Spjd break; 3082168404Spjd } 3083168404Spjd 3084185029Spjd if (c == pvd->vdev_children) 3085185029Spjd return (spa_vdev_exit(spa, NULL, txg, EBUSY)); 3086185029Spjd 3087168404Spjd /* 3088185029Spjd * If we are detaching the second disk from a replacing vdev, then 3089185029Spjd * check to see if we changed the original vdev's path to have "/old" 3090185029Spjd * at the end in spa_vdev_attach(). If so, undo that change now. 3091168404Spjd */ 3092185029Spjd if (pvd->vdev_ops == &vdev_replacing_ops && vd->vdev_id == 1 && 3093185029Spjd pvd->vdev_child[0]->vdev_path != NULL && 3094185029Spjd pvd->vdev_child[1]->vdev_path != NULL) { 3095185029Spjd ASSERT(pvd->vdev_child[1] == vd); 3096185029Spjd cvd = pvd->vdev_child[0]; 3097185029Spjd len = strlen(vd->vdev_path); 3098185029Spjd if (strncmp(cvd->vdev_path, vd->vdev_path, len) == 0 && 3099185029Spjd strcmp(cvd->vdev_path + len, "/old") == 0) { 3100185029Spjd spa_strfree(cvd->vdev_path); 3101185029Spjd cvd->vdev_path = spa_strdup(vd->vdev_path); 3102185029Spjd } 3103185029Spjd } 3104168404Spjd 3105168404Spjd /* 3106168404Spjd * If we are detaching the original disk from a spare, then it implies 3107168404Spjd * that the spare should become a real disk, and be removed from the 3108168404Spjd * active spare list for the pool. 3109168404Spjd */ 3110168404Spjd if (pvd->vdev_ops == &vdev_spare_ops && 3111168404Spjd vd->vdev_id == 0) 3112168404Spjd unspare = B_TRUE; 3113168404Spjd 3114168404Spjd /* 3115168404Spjd * Erase the disk labels so the disk can be used for other things. 3116168404Spjd * This must be done after all other error cases are handled, 3117168404Spjd * but before we disembowel vd (so we can still do I/O to it). 3118168404Spjd * But if we can't do it, don't treat the error as fatal -- 3119168404Spjd * it may be that the unwritability of the disk is the reason 3120168404Spjd * it's being detached! 3121168404Spjd */ 3122168404Spjd error = vdev_label_init(vd, 0, VDEV_LABEL_REMOVE); 3123168404Spjd 3124168404Spjd /* 3125168404Spjd * Remove vd from its parent and compact the parent's children. 3126168404Spjd */ 3127168404Spjd vdev_remove_child(pvd, vd); 3128168404Spjd vdev_compact_children(pvd); 3129168404Spjd 3130168404Spjd /* 3131168404Spjd * Remember one of the remaining children so we can get tvd below. 3132168404Spjd */ 3133168404Spjd cvd = pvd->vdev_child[0]; 3134168404Spjd 3135168404Spjd /* 3136168404Spjd * If we need to remove the remaining child from the list of hot spares, 3137168404Spjd * do it now, marking the vdev as no longer a spare in the process. We 3138168404Spjd * must do this before vdev_remove_parent(), because that can change the 3139168404Spjd * GUID if it creates a new toplevel GUID. 3140168404Spjd */ 3141168404Spjd if (unspare) { 3142168404Spjd ASSERT(cvd->vdev_isspare); 3143168404Spjd spa_spare_remove(cvd); 3144168404Spjd unspare_guid = cvd->vdev_guid; 3145168404Spjd } 3146168404Spjd 3147168404Spjd /* 3148168404Spjd * If the parent mirror/replacing vdev only has one child, 3149168404Spjd * the parent is no longer needed. Remove it from the tree. 3150168404Spjd */ 3151168404Spjd if (pvd->vdev_children == 1) 3152168404Spjd vdev_remove_parent(cvd); 3153168404Spjd 3154168404Spjd /* 3155168404Spjd * We don't set tvd until now because the parent we just removed 3156168404Spjd * may have been the previous top-level vdev. 3157168404Spjd */ 3158168404Spjd tvd = cvd->vdev_top; 3159168404Spjd ASSERT(tvd->vdev_parent == rvd); 3160168404Spjd 3161168404Spjd /* 3162168404Spjd * Reevaluate the parent vdev state. 3163168404Spjd */ 3164185029Spjd vdev_propagate_state(cvd); 3165168404Spjd 3166168404Spjd /* 3167168404Spjd * If the device we just detached was smaller than the others, it may be 3168168404Spjd * possible to add metaslabs (i.e. grow the pool). vdev_metaslab_init() 3169168404Spjd * can't fail because the existing metaslabs are already in core, so 3170168404Spjd * there's nothing to read from disk. 3171168404Spjd */ 3172168404Spjd VERIFY(vdev_metaslab_init(tvd, txg) == 0); 3173168404Spjd 3174168404Spjd vdev_config_dirty(tvd); 3175168404Spjd 3176168404Spjd /* 3177168404Spjd * Mark vd's DTL as dirty in this txg. vdev_dtl_sync() will see that 3178168404Spjd * vd->vdev_detached is set and free vd's DTL object in syncing context. 3179168404Spjd * But first make sure we're not on any *other* txg's DTL list, to 3180168404Spjd * prevent vd from being accessed after it's freed. 3181168404Spjd */ 3182168404Spjd for (t = 0; t < TXG_SIZE; t++) 3183168404Spjd (void) txg_list_remove_this(&tvd->vdev_dtl_list, vd, t); 3184168404Spjd vd->vdev_detached = B_TRUE; 3185168404Spjd vdev_dirty(tvd, VDD_DTL, vd, txg); 3186168404Spjd 3187185029Spjd spa_event_notify(spa, vd, ESC_ZFS_VDEV_REMOVE); 3188185029Spjd 3189168404Spjd error = spa_vdev_exit(spa, vd, txg, 0); 3190168404Spjd 3191168404Spjd /* 3192168404Spjd * If this was the removal of the original device in a hot spare vdev, 3193168404Spjd * then we want to go through and remove the device from the hot spare 3194168404Spjd * list of every other pool. 3195168404Spjd */ 3196168404Spjd if (unspare) { 3197168404Spjd spa = NULL; 3198168404Spjd mutex_enter(&spa_namespace_lock); 3199168404Spjd while ((spa = spa_next(spa)) != NULL) { 3200168404Spjd if (spa->spa_state != POOL_STATE_ACTIVE) 3201168404Spjd continue; 3202185029Spjd spa_open_ref(spa, FTAG); 3203185029Spjd mutex_exit(&spa_namespace_lock); 3204168404Spjd (void) spa_vdev_remove(spa, unspare_guid, B_TRUE); 3205185029Spjd mutex_enter(&spa_namespace_lock); 3206185029Spjd spa_close(spa, FTAG); 3207168404Spjd } 3208168404Spjd mutex_exit(&spa_namespace_lock); 3209168404Spjd } 3210168404Spjd 3211168404Spjd return (error); 3212168404Spjd} 3213168404Spjd 3214185029Spjdstatic nvlist_t * 3215185029Spjdspa_nvlist_lookup_by_guid(nvlist_t **nvpp, int count, uint64_t target_guid) 3216185029Spjd{ 3217185029Spjd for (int i = 0; i < count; i++) { 3218185029Spjd uint64_t guid; 3219185029Spjd 3220185029Spjd VERIFY(nvlist_lookup_uint64(nvpp[i], ZPOOL_CONFIG_GUID, 3221185029Spjd &guid) == 0); 3222185029Spjd 3223185029Spjd if (guid == target_guid) 3224185029Spjd return (nvpp[i]); 3225185029Spjd } 3226185029Spjd 3227185029Spjd return (NULL); 3228185029Spjd} 3229185029Spjd 3230185029Spjdstatic void 3231185029Spjdspa_vdev_remove_aux(nvlist_t *config, char *name, nvlist_t **dev, int count, 3232185029Spjd nvlist_t *dev_to_remove) 3233185029Spjd{ 3234185029Spjd nvlist_t **newdev = NULL; 3235185029Spjd 3236185029Spjd if (count > 1) 3237185029Spjd newdev = kmem_alloc((count - 1) * sizeof (void *), KM_SLEEP); 3238185029Spjd 3239185029Spjd for (int i = 0, j = 0; i < count; i++) { 3240185029Spjd if (dev[i] == dev_to_remove) 3241185029Spjd continue; 3242185029Spjd VERIFY(nvlist_dup(dev[i], &newdev[j++], KM_SLEEP) == 0); 3243185029Spjd } 3244185029Spjd 3245185029Spjd VERIFY(nvlist_remove(config, name, DATA_TYPE_NVLIST_ARRAY) == 0); 3246185029Spjd VERIFY(nvlist_add_nvlist_array(config, name, newdev, count - 1) == 0); 3247185029Spjd 3248185029Spjd for (int i = 0; i < count - 1; i++) 3249185029Spjd nvlist_free(newdev[i]); 3250185029Spjd 3251185029Spjd if (count > 1) 3252185029Spjd kmem_free(newdev, (count - 1) * sizeof (void *)); 3253185029Spjd} 3254185029Spjd 3255168404Spjd/* 3256168404Spjd * Remove a device from the pool. Currently, this supports removing only hot 3257185029Spjd * spares and level 2 ARC devices. 3258168404Spjd */ 3259168404Spjdint 3260168404Spjdspa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) 3261168404Spjd{ 3262168404Spjd vdev_t *vd; 3263185029Spjd nvlist_t **spares, **l2cache, *nv; 3264185029Spjd uint_t nspares, nl2cache; 3265185029Spjd uint64_t txg; 3266185029Spjd int error = 0; 3267168404Spjd 3268185029Spjd txg = spa_vdev_enter(spa); 3269168404Spjd 3270185029Spjd vd = spa_lookup_by_guid(spa, guid, B_FALSE); 3271168404Spjd 3272185029Spjd if (spa->spa_spares.sav_vdevs != NULL && 3273185029Spjd nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 3274185029Spjd ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0 && 3275185029Spjd (nv = spa_nvlist_lookup_by_guid(spares, nspares, guid)) != NULL) { 3276185029Spjd /* 3277185029Spjd * Only remove the hot spare if it's not currently in use 3278185029Spjd * in this pool. 3279185029Spjd */ 3280185029Spjd if (vd == NULL || unspare) { 3281185029Spjd spa_vdev_remove_aux(spa->spa_spares.sav_config, 3282185029Spjd ZPOOL_CONFIG_SPARES, spares, nspares, nv); 3283185029Spjd spa_load_spares(spa); 3284185029Spjd spa->spa_spares.sav_sync = B_TRUE; 3285185029Spjd } else { 3286185029Spjd error = EBUSY; 3287168404Spjd } 3288185029Spjd } else if (spa->spa_l2cache.sav_vdevs != NULL && 3289185029Spjd nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, 3290185029Spjd ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0 && 3291185029Spjd (nv = spa_nvlist_lookup_by_guid(l2cache, nl2cache, guid)) != NULL) { 3292185029Spjd /* 3293185029Spjd * Cache devices can always be removed. 3294185029Spjd */ 3295185029Spjd spa_vdev_remove_aux(spa->spa_l2cache.sav_config, 3296185029Spjd ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache, nv); 3297185029Spjd spa_load_l2cache(spa); 3298185029Spjd spa->spa_l2cache.sav_sync = B_TRUE; 3299185029Spjd } else if (vd != NULL) { 3300185029Spjd /* 3301185029Spjd * Normal vdevs cannot be removed (yet). 3302185029Spjd */ 3303185029Spjd error = ENOTSUP; 3304168404Spjd } else { 3305185029Spjd /* 3306185029Spjd * There is no vdev of any kind with the specified guid. 3307185029Spjd */ 3308185029Spjd error = ENOENT; 3309168404Spjd } 3310168404Spjd 3311185029Spjd return (spa_vdev_exit(spa, NULL, txg, error)); 3312168404Spjd} 3313168404Spjd 3314168404Spjd/* 3315185029Spjd * Find any device that's done replacing, or a vdev marked 'unspare' that's 3316185029Spjd * current spared, so we can detach it. 3317168404Spjd */ 3318168404Spjdstatic vdev_t * 3319185029Spjdspa_vdev_resilver_done_hunt(vdev_t *vd) 3320168404Spjd{ 3321168404Spjd vdev_t *newvd, *oldvd; 3322168404Spjd int c; 3323168404Spjd 3324168404Spjd for (c = 0; c < vd->vdev_children; c++) { 3325185029Spjd oldvd = spa_vdev_resilver_done_hunt(vd->vdev_child[c]); 3326168404Spjd if (oldvd != NULL) 3327168404Spjd return (oldvd); 3328168404Spjd } 3329168404Spjd 3330185029Spjd /* 3331185029Spjd * Check for a completed replacement. 3332185029Spjd */ 3333168404Spjd if (vd->vdev_ops == &vdev_replacing_ops && vd->vdev_children == 2) { 3334168404Spjd oldvd = vd->vdev_child[0]; 3335168404Spjd newvd = vd->vdev_child[1]; 3336168404Spjd 3337168404Spjd mutex_enter(&newvd->vdev_dtl_lock); 3338168404Spjd if (newvd->vdev_dtl_map.sm_space == 0 && 3339168404Spjd newvd->vdev_dtl_scrub.sm_space == 0) { 3340168404Spjd mutex_exit(&newvd->vdev_dtl_lock); 3341168404Spjd return (oldvd); 3342168404Spjd } 3343168404Spjd mutex_exit(&newvd->vdev_dtl_lock); 3344168404Spjd } 3345168404Spjd 3346185029Spjd /* 3347185029Spjd * Check for a completed resilver with the 'unspare' flag set. 3348185029Spjd */ 3349185029Spjd if (vd->vdev_ops == &vdev_spare_ops && vd->vdev_children == 2) { 3350185029Spjd newvd = vd->vdev_child[0]; 3351185029Spjd oldvd = vd->vdev_child[1]; 3352185029Spjd 3353185029Spjd mutex_enter(&newvd->vdev_dtl_lock); 3354185029Spjd if (newvd->vdev_unspare && 3355185029Spjd newvd->vdev_dtl_map.sm_space == 0 && 3356185029Spjd newvd->vdev_dtl_scrub.sm_space == 0) { 3357185029Spjd newvd->vdev_unspare = 0; 3358185029Spjd mutex_exit(&newvd->vdev_dtl_lock); 3359185029Spjd return (oldvd); 3360185029Spjd } 3361185029Spjd mutex_exit(&newvd->vdev_dtl_lock); 3362185029Spjd } 3363185029Spjd 3364168404Spjd return (NULL); 3365168404Spjd} 3366168404Spjd 3367168404Spjdstatic void 3368185029Spjdspa_vdev_resilver_done(spa_t *spa) 3369168404Spjd{ 3370168404Spjd vdev_t *vd; 3371168404Spjd vdev_t *pvd; 3372168404Spjd uint64_t guid; 3373168404Spjd uint64_t pguid = 0; 3374168404Spjd 3375185029Spjd spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 3376168404Spjd 3377185029Spjd while ((vd = spa_vdev_resilver_done_hunt(spa->spa_root_vdev)) != NULL) { 3378168404Spjd guid = vd->vdev_guid; 3379168404Spjd /* 3380168404Spjd * If we have just finished replacing a hot spared device, then 3381168404Spjd * we need to detach the parent's first child (the original hot 3382168404Spjd * spare) as well. 3383168404Spjd */ 3384168404Spjd pvd = vd->vdev_parent; 3385168404Spjd if (pvd->vdev_parent->vdev_ops == &vdev_spare_ops && 3386168404Spjd pvd->vdev_id == 0) { 3387168404Spjd ASSERT(pvd->vdev_ops == &vdev_replacing_ops); 3388168404Spjd ASSERT(pvd->vdev_parent->vdev_children == 2); 3389168404Spjd pguid = pvd->vdev_parent->vdev_child[1]->vdev_guid; 3390168404Spjd } 3391185029Spjd spa_config_exit(spa, SCL_CONFIG, FTAG); 3392168404Spjd if (spa_vdev_detach(spa, guid, B_TRUE) != 0) 3393168404Spjd return; 3394168404Spjd if (pguid != 0 && spa_vdev_detach(spa, pguid, B_TRUE) != 0) 3395168404Spjd return; 3396185029Spjd spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 3397168404Spjd } 3398168404Spjd 3399185029Spjd spa_config_exit(spa, SCL_CONFIG, FTAG); 3400168404Spjd} 3401168404Spjd 3402168404Spjd/* 3403168404Spjd * Update the stored path for this vdev. Dirty the vdev configuration, relying 3404168404Spjd * on spa_vdev_enter/exit() to synchronize the labels and cache. 3405168404Spjd */ 3406168404Spjdint 3407168404Spjdspa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath) 3408168404Spjd{ 3409185029Spjd vdev_t *vd; 3410168404Spjd uint64_t txg; 3411168404Spjd 3412168404Spjd txg = spa_vdev_enter(spa); 3413168404Spjd 3414185029Spjd if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL) { 3415168404Spjd /* 3416185029Spjd * Determine if this is a reference to a hot spare device. If 3417185029Spjd * it is, update the path manually as there is no associated 3418185029Spjd * vdev_t that can be synced to disk. 3419168404Spjd */ 3420168404Spjd nvlist_t **spares; 3421168404Spjd uint_t i, nspares; 3422185029Spjd 3423185029Spjd if (spa->spa_spares.sav_config != NULL) { 3424185029Spjd VERIFY(nvlist_lookup_nvlist_array( 3425185029Spjd spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, 3426185029Spjd &spares, &nspares) == 0); 3427168404Spjd for (i = 0; i < nspares; i++) { 3428168404Spjd uint64_t theguid; 3429168404Spjd VERIFY(nvlist_lookup_uint64(spares[i], 3430168404Spjd ZPOOL_CONFIG_GUID, &theguid) == 0); 3431185029Spjd if (theguid == guid) { 3432185029Spjd VERIFY(nvlist_add_string(spares[i], 3433185029Spjd ZPOOL_CONFIG_PATH, newpath) == 0); 3434185029Spjd spa_load_spares(spa); 3435185029Spjd spa->spa_spares.sav_sync = B_TRUE; 3436185029Spjd return (spa_vdev_exit(spa, NULL, txg, 3437185029Spjd 0)); 3438185029Spjd } 3439168404Spjd } 3440185029Spjd } 3441168404Spjd 3442185029Spjd return (spa_vdev_exit(spa, NULL, txg, ENOENT)); 3443168404Spjd } 3444168404Spjd 3445168404Spjd if (!vd->vdev_ops->vdev_op_leaf) 3446168404Spjd return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 3447168404Spjd 3448168404Spjd spa_strfree(vd->vdev_path); 3449168404Spjd vd->vdev_path = spa_strdup(newpath); 3450168404Spjd 3451168404Spjd vdev_config_dirty(vd->vdev_top); 3452168404Spjd 3453168404Spjd return (spa_vdev_exit(spa, NULL, txg, 0)); 3454168404Spjd} 3455168404Spjd 3456168404Spjd/* 3457168404Spjd * ========================================================================== 3458168404Spjd * SPA Scrubbing 3459168404Spjd * ========================================================================== 3460168404Spjd */ 3461168404Spjd 3462168404Spjdint 3463185029Spjdspa_scrub(spa_t *spa, pool_scrub_type_t type) 3464168404Spjd{ 3465185029Spjd ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); 3466168404Spjd 3467168404Spjd if ((uint_t)type >= POOL_SCRUB_TYPES) 3468168404Spjd return (ENOTSUP); 3469168404Spjd 3470168404Spjd /* 3471185029Spjd * If a resilver was requested, but there is no DTL on a 3472185029Spjd * writeable leaf device, we have nothing to do. 3473168404Spjd */ 3474185029Spjd if (type == POOL_SCRUB_RESILVER && 3475185029Spjd !vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) { 3476185029Spjd spa_async_request(spa, SPA_ASYNC_RESILVER_DONE); 3477168404Spjd return (0); 3478168404Spjd } 3479168404Spjd 3480185029Spjd if (type == POOL_SCRUB_EVERYTHING && 3481185029Spjd spa->spa_dsl_pool->dp_scrub_func != SCRUB_FUNC_NONE && 3482185029Spjd spa->spa_dsl_pool->dp_scrub_isresilver) 3483185029Spjd return (EBUSY); 3484168404Spjd 3485185029Spjd if (type == POOL_SCRUB_EVERYTHING || type == POOL_SCRUB_RESILVER) { 3486185029Spjd return (dsl_pool_scrub_clean(spa->spa_dsl_pool)); 3487185029Spjd } else if (type == POOL_SCRUB_NONE) { 3488185029Spjd return (dsl_pool_scrub_cancel(spa->spa_dsl_pool)); 3489168404Spjd } else { 3490185029Spjd return (EINVAL); 3491168404Spjd } 3492168404Spjd} 3493168404Spjd 3494168404Spjd/* 3495168404Spjd * ========================================================================== 3496168404Spjd * SPA async task processing 3497168404Spjd * ========================================================================== 3498168404Spjd */ 3499168404Spjd 3500168404Spjdstatic void 3501185029Spjdspa_async_remove(spa_t *spa, vdev_t *vd) 3502168404Spjd{ 3503185029Spjd if (vd->vdev_remove_wanted) { 3504185029Spjd vd->vdev_remove_wanted = 0; 3505185029Spjd vdev_set_state(vd, B_FALSE, VDEV_STATE_REMOVED, VDEV_AUX_NONE); 3506185029Spjd vdev_clear(spa, vd); 3507185029Spjd vdev_state_dirty(vd->vdev_top); 3508185029Spjd } 3509168404Spjd 3510185029Spjd for (int c = 0; c < vd->vdev_children; c++) 3511185029Spjd spa_async_remove(spa, vd->vdev_child[c]); 3512185029Spjd} 3513168404Spjd 3514185029Spjdstatic void 3515185029Spjdspa_async_probe(spa_t *spa, vdev_t *vd) 3516185029Spjd{ 3517185029Spjd if (vd->vdev_probe_wanted) { 3518185029Spjd vd->vdev_probe_wanted = 0; 3519185029Spjd vdev_reopen(vd); /* vdev_open() does the actual probe */ 3520168404Spjd } 3521168404Spjd 3522185029Spjd for (int c = 0; c < vd->vdev_children; c++) 3523185029Spjd spa_async_probe(spa, vd->vdev_child[c]); 3524168404Spjd} 3525168404Spjd 3526168404Spjdstatic void 3527168404Spjdspa_async_thread(void *arg) 3528168404Spjd{ 3529168404Spjd spa_t *spa = arg; 3530168404Spjd int tasks; 3531168404Spjd 3532168404Spjd ASSERT(spa->spa_sync_on); 3533168404Spjd 3534168404Spjd mutex_enter(&spa->spa_async_lock); 3535168404Spjd tasks = spa->spa_async_tasks; 3536168404Spjd spa->spa_async_tasks = 0; 3537168404Spjd mutex_exit(&spa->spa_async_lock); 3538168404Spjd 3539168404Spjd /* 3540168404Spjd * See if the config needs to be updated. 3541168404Spjd */ 3542168404Spjd if (tasks & SPA_ASYNC_CONFIG_UPDATE) { 3543168404Spjd mutex_enter(&spa_namespace_lock); 3544168404Spjd spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 3545168404Spjd mutex_exit(&spa_namespace_lock); 3546168404Spjd } 3547168404Spjd 3548168404Spjd /* 3549185029Spjd * See if any devices need to be marked REMOVED. 3550168404Spjd */ 3551185029Spjd if (tasks & SPA_ASYNC_REMOVE) { 3552185029Spjd spa_vdev_state_enter(spa); 3553185029Spjd spa_async_remove(spa, spa->spa_root_vdev); 3554185029Spjd for (int i = 0; i < spa->spa_l2cache.sav_count; i++) 3555185029Spjd spa_async_remove(spa, spa->spa_l2cache.sav_vdevs[i]); 3556185029Spjd for (int i = 0; i < spa->spa_spares.sav_count; i++) 3557185029Spjd spa_async_remove(spa, spa->spa_spares.sav_vdevs[i]); 3558185029Spjd (void) spa_vdev_state_exit(spa, NULL, 0); 3559185029Spjd } 3560168404Spjd 3561168404Spjd /* 3562185029Spjd * See if any devices need to be probed. 3563168404Spjd */ 3564185029Spjd if (tasks & SPA_ASYNC_PROBE) { 3565185029Spjd spa_vdev_state_enter(spa); 3566185029Spjd spa_async_probe(spa, spa->spa_root_vdev); 3567185029Spjd (void) spa_vdev_state_exit(spa, NULL, 0); 3568185029Spjd } 3569168404Spjd 3570168404Spjd /* 3571185029Spjd * If any devices are done replacing, detach them. 3572168404Spjd */ 3573185029Spjd if (tasks & SPA_ASYNC_RESILVER_DONE) 3574185029Spjd spa_vdev_resilver_done(spa); 3575168404Spjd 3576168404Spjd /* 3577168404Spjd * Kick off a resilver. 3578168404Spjd */ 3579168404Spjd if (tasks & SPA_ASYNC_RESILVER) 3580185029Spjd VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER) == 0); 3581168404Spjd 3582168404Spjd /* 3583168404Spjd * Let the world know that we're done. 3584168404Spjd */ 3585168404Spjd mutex_enter(&spa->spa_async_lock); 3586168404Spjd spa->spa_async_thread = NULL; 3587168404Spjd cv_broadcast(&spa->spa_async_cv); 3588168404Spjd mutex_exit(&spa->spa_async_lock); 3589168404Spjd thread_exit(); 3590168404Spjd} 3591168404Spjd 3592168404Spjdvoid 3593168404Spjdspa_async_suspend(spa_t *spa) 3594168404Spjd{ 3595168404Spjd mutex_enter(&spa->spa_async_lock); 3596168404Spjd spa->spa_async_suspended++; 3597168404Spjd while (spa->spa_async_thread != NULL) 3598168404Spjd cv_wait(&spa->spa_async_cv, &spa->spa_async_lock); 3599168404Spjd mutex_exit(&spa->spa_async_lock); 3600168404Spjd} 3601168404Spjd 3602168404Spjdvoid 3603168404Spjdspa_async_resume(spa_t *spa) 3604168404Spjd{ 3605168404Spjd mutex_enter(&spa->spa_async_lock); 3606168404Spjd ASSERT(spa->spa_async_suspended != 0); 3607168404Spjd spa->spa_async_suspended--; 3608168404Spjd mutex_exit(&spa->spa_async_lock); 3609168404Spjd} 3610168404Spjd 3611168404Spjdstatic void 3612168404Spjdspa_async_dispatch(spa_t *spa) 3613168404Spjd{ 3614168404Spjd mutex_enter(&spa->spa_async_lock); 3615168404Spjd if (spa->spa_async_tasks && !spa->spa_async_suspended && 3616168404Spjd spa->spa_async_thread == NULL && 3617168404Spjd rootdir != NULL && !vn_is_readonly(rootdir)) 3618168404Spjd spa->spa_async_thread = thread_create(NULL, 0, 3619168404Spjd spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri); 3620168404Spjd mutex_exit(&spa->spa_async_lock); 3621168404Spjd} 3622168404Spjd 3623168404Spjdvoid 3624168404Spjdspa_async_request(spa_t *spa, int task) 3625168404Spjd{ 3626168404Spjd mutex_enter(&spa->spa_async_lock); 3627168404Spjd spa->spa_async_tasks |= task; 3628168404Spjd mutex_exit(&spa->spa_async_lock); 3629168404Spjd} 3630168404Spjd 3631168404Spjd/* 3632168404Spjd * ========================================================================== 3633168404Spjd * SPA syncing routines 3634168404Spjd * ========================================================================== 3635168404Spjd */ 3636168404Spjd 3637168404Spjdstatic void 3638168404Spjdspa_sync_deferred_frees(spa_t *spa, uint64_t txg) 3639168404Spjd{ 3640168404Spjd bplist_t *bpl = &spa->spa_sync_bplist; 3641168404Spjd dmu_tx_t *tx; 3642168404Spjd blkptr_t blk; 3643168404Spjd uint64_t itor = 0; 3644168404Spjd zio_t *zio; 3645168404Spjd int error; 3646168404Spjd uint8_t c = 1; 3647168404Spjd 3648185029Spjd zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL); 3649168404Spjd 3650185029Spjd while (bplist_iterate(bpl, &itor, &blk) == 0) { 3651185029Spjd ASSERT(blk.blk_birth < txg); 3652185029Spjd zio_nowait(zio_free(zio, spa, txg, &blk, NULL, NULL, 3653185029Spjd ZIO_FLAG_MUSTSUCCEED)); 3654185029Spjd } 3655168404Spjd 3656168404Spjd error = zio_wait(zio); 3657168404Spjd ASSERT3U(error, ==, 0); 3658168404Spjd 3659168404Spjd tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); 3660168404Spjd bplist_vacate(bpl, tx); 3661168404Spjd 3662168404Spjd /* 3663168404Spjd * Pre-dirty the first block so we sync to convergence faster. 3664168404Spjd * (Usually only the first block is needed.) 3665168404Spjd */ 3666168404Spjd dmu_write(spa->spa_meta_objset, spa->spa_sync_bplist_obj, 0, 1, &c, tx); 3667168404Spjd dmu_tx_commit(tx); 3668168404Spjd} 3669168404Spjd 3670168404Spjdstatic void 3671168404Spjdspa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx) 3672168404Spjd{ 3673168404Spjd char *packed = NULL; 3674185029Spjd size_t bufsize; 3675168404Spjd size_t nvsize = 0; 3676168404Spjd dmu_buf_t *db; 3677168404Spjd 3678168404Spjd VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0); 3679168404Spjd 3680185029Spjd /* 3681185029Spjd * Write full (SPA_CONFIG_BLOCKSIZE) blocks of configuration 3682185029Spjd * information. This avoids the dbuf_will_dirty() path and 3683185029Spjd * saves us a pre-read to get data we don't actually care about. 3684185029Spjd */ 3685185029Spjd bufsize = P2ROUNDUP(nvsize, SPA_CONFIG_BLOCKSIZE); 3686185029Spjd packed = kmem_alloc(bufsize, KM_SLEEP); 3687168404Spjd 3688168404Spjd VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR, 3689168404Spjd KM_SLEEP) == 0); 3690185029Spjd bzero(packed + nvsize, bufsize - nvsize); 3691168404Spjd 3692185029Spjd dmu_write(spa->spa_meta_objset, obj, 0, bufsize, packed, tx); 3693168404Spjd 3694185029Spjd kmem_free(packed, bufsize); 3695168404Spjd 3696168404Spjd VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); 3697168404Spjd dmu_buf_will_dirty(db, tx); 3698168404Spjd *(uint64_t *)db->db_data = nvsize; 3699168404Spjd dmu_buf_rele(db, FTAG); 3700168404Spjd} 3701168404Spjd 3702168404Spjdstatic void 3703185029Spjdspa_sync_aux_dev(spa_t *spa, spa_aux_vdev_t *sav, dmu_tx_t *tx, 3704185029Spjd const char *config, const char *entry) 3705168404Spjd{ 3706168404Spjd nvlist_t *nvroot; 3707185029Spjd nvlist_t **list; 3708168404Spjd int i; 3709168404Spjd 3710185029Spjd if (!sav->sav_sync) 3711168404Spjd return; 3712168404Spjd 3713168404Spjd /* 3714185029Spjd * Update the MOS nvlist describing the list of available devices. 3715185029Spjd * spa_validate_aux() will have already made sure this nvlist is 3716185029Spjd * valid and the vdevs are labeled appropriately. 3717168404Spjd */ 3718185029Spjd if (sav->sav_object == 0) { 3719185029Spjd sav->sav_object = dmu_object_alloc(spa->spa_meta_objset, 3720185029Spjd DMU_OT_PACKED_NVLIST, 1 << 14, DMU_OT_PACKED_NVLIST_SIZE, 3721185029Spjd sizeof (uint64_t), tx); 3722168404Spjd VERIFY(zap_update(spa->spa_meta_objset, 3723185029Spjd DMU_POOL_DIRECTORY_OBJECT, entry, sizeof (uint64_t), 1, 3724185029Spjd &sav->sav_object, tx) == 0); 3725168404Spjd } 3726168404Spjd 3727168404Spjd VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); 3728185029Spjd if (sav->sav_count == 0) { 3729185029Spjd VERIFY(nvlist_add_nvlist_array(nvroot, config, NULL, 0) == 0); 3730168404Spjd } else { 3731185029Spjd list = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); 3732185029Spjd for (i = 0; i < sav->sav_count; i++) 3733185029Spjd list[i] = vdev_config_generate(spa, sav->sav_vdevs[i], 3734185029Spjd B_FALSE, B_FALSE, B_TRUE); 3735185029Spjd VERIFY(nvlist_add_nvlist_array(nvroot, config, list, 3736185029Spjd sav->sav_count) == 0); 3737185029Spjd for (i = 0; i < sav->sav_count; i++) 3738185029Spjd nvlist_free(list[i]); 3739185029Spjd kmem_free(list, sav->sav_count * sizeof (void *)); 3740168404Spjd } 3741168404Spjd 3742185029Spjd spa_sync_nvlist(spa, sav->sav_object, nvroot, tx); 3743168404Spjd nvlist_free(nvroot); 3744168404Spjd 3745185029Spjd sav->sav_sync = B_FALSE; 3746168404Spjd} 3747168404Spjd 3748168404Spjdstatic void 3749168404Spjdspa_sync_config_object(spa_t *spa, dmu_tx_t *tx) 3750168404Spjd{ 3751168404Spjd nvlist_t *config; 3752168404Spjd 3753185029Spjd if (list_is_empty(&spa->spa_config_dirty_list)) 3754168404Spjd return; 3755168404Spjd 3756185029Spjd spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 3757168404Spjd 3758185029Spjd config = spa_config_generate(spa, spa->spa_root_vdev, 3759185029Spjd dmu_tx_get_txg(tx), B_FALSE); 3760185029Spjd 3761185029Spjd spa_config_exit(spa, SCL_STATE, FTAG); 3762185029Spjd 3763168404Spjd if (spa->spa_config_syncing) 3764168404Spjd nvlist_free(spa->spa_config_syncing); 3765168404Spjd spa->spa_config_syncing = config; 3766168404Spjd 3767168404Spjd spa_sync_nvlist(spa, spa->spa_config_object, config, tx); 3768168404Spjd} 3769168404Spjd 3770185029Spjd/* 3771185029Spjd * Set zpool properties. 3772185029Spjd */ 3773168404Spjdstatic void 3774185029Spjdspa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 3775168404Spjd{ 3776168404Spjd spa_t *spa = arg1; 3777185029Spjd objset_t *mos = spa->spa_meta_objset; 3778168404Spjd nvlist_t *nvp = arg2; 3779185029Spjd nvpair_t *elem; 3780185029Spjd uint64_t intval; 3781185029Spjd char *strval; 3782185029Spjd zpool_prop_t prop; 3783185029Spjd const char *propname; 3784185029Spjd zprop_type_t proptype; 3785185029Spjd spa_config_dirent_t *dp; 3786168404Spjd 3787168404Spjd mutex_enter(&spa->spa_props_lock); 3788168404Spjd 3789185029Spjd elem = NULL; 3790185029Spjd while ((elem = nvlist_next_nvpair(nvp, elem))) { 3791185029Spjd switch (prop = zpool_name_to_prop(nvpair_name(elem))) { 3792185029Spjd case ZPOOL_PROP_VERSION: 3793185029Spjd /* 3794185029Spjd * Only set version for non-zpool-creation cases 3795185029Spjd * (set/import). spa_create() needs special care 3796185029Spjd * for version setting. 3797185029Spjd */ 3798185029Spjd if (tx->tx_txg != TXG_INITIAL) { 3799185029Spjd VERIFY(nvpair_value_uint64(elem, 3800185029Spjd &intval) == 0); 3801185029Spjd ASSERT(intval <= SPA_VERSION); 3802185029Spjd ASSERT(intval >= spa_version(spa)); 3803185029Spjd spa->spa_uberblock.ub_version = intval; 3804185029Spjd vdev_config_dirty(spa->spa_root_vdev); 3805185029Spjd } 3806185029Spjd break; 3807168404Spjd 3808185029Spjd case ZPOOL_PROP_ALTROOT: 3809185029Spjd /* 3810185029Spjd * 'altroot' is a non-persistent property. It should 3811185029Spjd * have been set temporarily at creation or import time. 3812185029Spjd */ 3813185029Spjd ASSERT(spa->spa_root != NULL); 3814185029Spjd break; 3815168404Spjd 3816185029Spjd case ZPOOL_PROP_CACHEFILE: 3817185029Spjd /* 3818185029Spjd * 'cachefile' is a non-persistent property, but note 3819185029Spjd * an async request that the config cache needs to be 3820185029Spjd * udpated. 3821185029Spjd */ 3822185029Spjd VERIFY(nvpair_value_string(elem, &strval) == 0); 3823185029Spjd 3824185029Spjd dp = kmem_alloc(sizeof (spa_config_dirent_t), KM_SLEEP); 3825185029Spjd 3826185029Spjd if (strval[0] == '\0') 3827185029Spjd dp->scd_path = spa_strdup(spa_config_path); 3828185029Spjd else if (strcmp(strval, "none") == 0) 3829185029Spjd dp->scd_path = NULL; 3830185029Spjd else 3831185029Spjd dp->scd_path = spa_strdup(strval); 3832185029Spjd 3833185029Spjd list_insert_head(&spa->spa_config_list, dp); 3834185029Spjd spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 3835168404Spjd break; 3836185029Spjd default: 3837185029Spjd /* 3838185029Spjd * Set pool property values in the poolprops mos object. 3839185029Spjd */ 3840185029Spjd if (spa->spa_pool_props_object == 0) { 3841185029Spjd objset_t *mos = spa->spa_meta_objset; 3842185029Spjd 3843185029Spjd VERIFY((spa->spa_pool_props_object = 3844185029Spjd zap_create(mos, DMU_OT_POOL_PROPS, 3845185029Spjd DMU_OT_NONE, 0, tx)) > 0); 3846185029Spjd 3847185029Spjd VERIFY(zap_update(mos, 3848185029Spjd DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_PROPS, 3849185029Spjd 8, 1, &spa->spa_pool_props_object, tx) 3850185029Spjd == 0); 3851185029Spjd } 3852185029Spjd 3853185029Spjd /* normalize the property name */ 3854185029Spjd propname = zpool_prop_to_name(prop); 3855185029Spjd proptype = zpool_prop_get_type(prop); 3856185029Spjd 3857185029Spjd if (nvpair_type(elem) == DATA_TYPE_STRING) { 3858185029Spjd ASSERT(proptype == PROP_TYPE_STRING); 3859185029Spjd VERIFY(nvpair_value_string(elem, &strval) == 0); 3860185029Spjd VERIFY(zap_update(mos, 3861185029Spjd spa->spa_pool_props_object, propname, 3862185029Spjd 1, strlen(strval) + 1, strval, tx) == 0); 3863185029Spjd 3864185029Spjd } else if (nvpair_type(elem) == DATA_TYPE_UINT64) { 3865185029Spjd VERIFY(nvpair_value_uint64(elem, &intval) == 0); 3866185029Spjd 3867185029Spjd if (proptype == PROP_TYPE_INDEX) { 3868185029Spjd const char *unused; 3869185029Spjd VERIFY(zpool_prop_index_to_string( 3870185029Spjd prop, intval, &unused) == 0); 3871185029Spjd } 3872185029Spjd VERIFY(zap_update(mos, 3873185029Spjd spa->spa_pool_props_object, propname, 3874185029Spjd 8, 1, &intval, tx) == 0); 3875185029Spjd } else { 3876185029Spjd ASSERT(0); /* not allowed */ 3877185029Spjd } 3878185029Spjd 3879185029Spjd switch (prop) { 3880185029Spjd case ZPOOL_PROP_DELEGATION: 3881185029Spjd spa->spa_delegation = intval; 3882185029Spjd break; 3883185029Spjd case ZPOOL_PROP_BOOTFS: 3884185029Spjd spa->spa_bootfs = intval; 3885185029Spjd break; 3886185029Spjd case ZPOOL_PROP_FAILUREMODE: 3887185029Spjd spa->spa_failmode = intval; 3888185029Spjd break; 3889185029Spjd default: 3890185029Spjd break; 3891185029Spjd } 3892168404Spjd } 3893185029Spjd 3894185029Spjd /* log internal history if this is not a zpool create */ 3895185029Spjd if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY && 3896185029Spjd tx->tx_txg != TXG_INITIAL) { 3897185029Spjd spa_history_internal_log(LOG_POOL_PROPSET, 3898185029Spjd spa, tx, cr, "%s %lld %s", 3899185029Spjd nvpair_name(elem), intval, spa_name(spa)); 3900185029Spjd } 3901168404Spjd } 3902185029Spjd 3903185029Spjd mutex_exit(&spa->spa_props_lock); 3904168404Spjd} 3905168404Spjd 3906168404Spjd/* 3907168404Spjd * Sync the specified transaction group. New blocks may be dirtied as 3908168404Spjd * part of the process, so we iterate until it converges. 3909168404Spjd */ 3910168404Spjdvoid 3911168404Spjdspa_sync(spa_t *spa, uint64_t txg) 3912168404Spjd{ 3913168404Spjd dsl_pool_t *dp = spa->spa_dsl_pool; 3914168404Spjd objset_t *mos = spa->spa_meta_objset; 3915168404Spjd bplist_t *bpl = &spa->spa_sync_bplist; 3916168404Spjd vdev_t *rvd = spa->spa_root_vdev; 3917168404Spjd vdev_t *vd; 3918168404Spjd dmu_tx_t *tx; 3919168404Spjd int dirty_vdevs; 3920185029Spjd int error; 3921168404Spjd 3922168404Spjd /* 3923168404Spjd * Lock out configuration changes. 3924168404Spjd */ 3925185029Spjd spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 3926168404Spjd 3927168404Spjd spa->spa_syncing_txg = txg; 3928168404Spjd spa->spa_sync_pass = 0; 3929168404Spjd 3930185029Spjd /* 3931185029Spjd * If there are any pending vdev state changes, convert them 3932185029Spjd * into config changes that go out with this transaction group. 3933185029Spjd */ 3934185029Spjd spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 3935185029Spjd while ((vd = list_head(&spa->spa_state_dirty_list)) != NULL) { 3936185029Spjd vdev_state_clean(vd); 3937185029Spjd vdev_config_dirty(vd); 3938185029Spjd } 3939185029Spjd spa_config_exit(spa, SCL_STATE, FTAG); 3940185029Spjd 3941168404Spjd VERIFY(0 == bplist_open(bpl, mos, spa->spa_sync_bplist_obj)); 3942168404Spjd 3943168404Spjd tx = dmu_tx_create_assigned(dp, txg); 3944168404Spjd 3945168404Spjd /* 3946185029Spjd * If we are upgrading to SPA_VERSION_RAIDZ_DEFLATE this txg, 3947168404Spjd * set spa_deflate if we have no raid-z vdevs. 3948168404Spjd */ 3949185029Spjd if (spa->spa_ubsync.ub_version < SPA_VERSION_RAIDZ_DEFLATE && 3950185029Spjd spa->spa_uberblock.ub_version >= SPA_VERSION_RAIDZ_DEFLATE) { 3951168404Spjd int i; 3952168404Spjd 3953168404Spjd for (i = 0; i < rvd->vdev_children; i++) { 3954168404Spjd vd = rvd->vdev_child[i]; 3955168404Spjd if (vd->vdev_deflate_ratio != SPA_MINBLOCKSIZE) 3956168404Spjd break; 3957168404Spjd } 3958168404Spjd if (i == rvd->vdev_children) { 3959168404Spjd spa->spa_deflate = TRUE; 3960168404Spjd VERIFY(0 == zap_add(spa->spa_meta_objset, 3961168404Spjd DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 3962168404Spjd sizeof (uint64_t), 1, &spa->spa_deflate, tx)); 3963168404Spjd } 3964168404Spjd } 3965168404Spjd 3966185029Spjd if (spa->spa_ubsync.ub_version < SPA_VERSION_ORIGIN && 3967185029Spjd spa->spa_uberblock.ub_version >= SPA_VERSION_ORIGIN) { 3968185029Spjd dsl_pool_create_origin(dp, tx); 3969185029Spjd 3970185029Spjd /* Keeping the origin open increases spa_minref */ 3971185029Spjd spa->spa_minref += 3; 3972185029Spjd } 3973185029Spjd 3974185029Spjd if (spa->spa_ubsync.ub_version < SPA_VERSION_NEXT_CLONES && 3975185029Spjd spa->spa_uberblock.ub_version >= SPA_VERSION_NEXT_CLONES) { 3976185029Spjd dsl_pool_upgrade_clones(dp, tx); 3977185029Spjd } 3978185029Spjd 3979168404Spjd /* 3980168404Spjd * If anything has changed in this txg, push the deferred frees 3981168404Spjd * from the previous txg. If not, leave them alone so that we 3982168404Spjd * don't generate work on an otherwise idle system. 3983168404Spjd */ 3984168404Spjd if (!txg_list_empty(&dp->dp_dirty_datasets, txg) || 3985168404Spjd !txg_list_empty(&dp->dp_dirty_dirs, txg) || 3986168404Spjd !txg_list_empty(&dp->dp_sync_tasks, txg)) 3987168404Spjd spa_sync_deferred_frees(spa, txg); 3988168404Spjd 3989168404Spjd /* 3990168404Spjd * Iterate to convergence. 3991168404Spjd */ 3992168404Spjd do { 3993168404Spjd spa->spa_sync_pass++; 3994168404Spjd 3995168404Spjd spa_sync_config_object(spa, tx); 3996185029Spjd spa_sync_aux_dev(spa, &spa->spa_spares, tx, 3997185029Spjd ZPOOL_CONFIG_SPARES, DMU_POOL_SPARES); 3998185029Spjd spa_sync_aux_dev(spa, &spa->spa_l2cache, tx, 3999185029Spjd ZPOOL_CONFIG_L2CACHE, DMU_POOL_L2CACHE); 4000168404Spjd spa_errlog_sync(spa, txg); 4001168404Spjd dsl_pool_sync(dp, txg); 4002168404Spjd 4003168404Spjd dirty_vdevs = 0; 4004168404Spjd while (vd = txg_list_remove(&spa->spa_vdev_txg_list, txg)) { 4005168404Spjd vdev_sync(vd, txg); 4006168404Spjd dirty_vdevs++; 4007168404Spjd } 4008168404Spjd 4009168404Spjd bplist_sync(bpl, tx); 4010168404Spjd } while (dirty_vdevs); 4011168404Spjd 4012168404Spjd bplist_close(bpl); 4013168404Spjd 4014168404Spjd dprintf("txg %llu passes %d\n", txg, spa->spa_sync_pass); 4015168404Spjd 4016168404Spjd /* 4017168404Spjd * Rewrite the vdev configuration (which includes the uberblock) 4018168404Spjd * to commit the transaction group. 4019168404Spjd * 4020185029Spjd * If there are no dirty vdevs, we sync the uberblock to a few 4021185029Spjd * random top-level vdevs that are known to be visible in the 4022185029Spjd * config cache (see spa_vdev_add() for a complete description). 4023185029Spjd * If there *are* dirty vdevs, sync the uberblock to all vdevs. 4024168404Spjd */ 4025185029Spjd for (;;) { 4026185029Spjd /* 4027185029Spjd * We hold SCL_STATE to prevent vdev open/close/etc. 4028185029Spjd * while we're attempting to write the vdev labels. 4029185029Spjd */ 4030185029Spjd spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 4031168404Spjd 4032185029Spjd if (list_is_empty(&spa->spa_config_dirty_list)) { 4033185029Spjd vdev_t *svd[SPA_DVAS_PER_BP]; 4034185029Spjd int svdcount = 0; 4035185029Spjd int children = rvd->vdev_children; 4036185029Spjd int c0 = spa_get_random(children); 4037185029Spjd int c; 4038185029Spjd 4039185029Spjd for (c = 0; c < children; c++) { 4040185029Spjd vd = rvd->vdev_child[(c0 + c) % children]; 4041185029Spjd if (vd->vdev_ms_array == 0 || vd->vdev_islog) 4042185029Spjd continue; 4043185029Spjd svd[svdcount++] = vd; 4044185029Spjd if (svdcount == SPA_DVAS_PER_BP) 4045185029Spjd break; 4046185029Spjd } 4047185029Spjd error = vdev_config_sync(svd, svdcount, txg); 4048185029Spjd } else { 4049185029Spjd error = vdev_config_sync(rvd->vdev_child, 4050185029Spjd rvd->vdev_children, txg); 4051168404Spjd } 4052185029Spjd 4053185029Spjd spa_config_exit(spa, SCL_STATE, FTAG); 4054185029Spjd 4055185029Spjd if (error == 0) 4056185029Spjd break; 4057185029Spjd zio_suspend(spa, NULL); 4058185029Spjd zio_resume_wait(spa); 4059168404Spjd } 4060168404Spjd dmu_tx_commit(tx); 4061168404Spjd 4062168404Spjd /* 4063168404Spjd * Clear the dirty config list. 4064168404Spjd */ 4065185029Spjd while ((vd = list_head(&spa->spa_config_dirty_list)) != NULL) 4066168404Spjd vdev_config_clean(vd); 4067168404Spjd 4068168404Spjd /* 4069168404Spjd * Now that the new config has synced transactionally, 4070168404Spjd * let it become visible to the config cache. 4071168404Spjd */ 4072168404Spjd if (spa->spa_config_syncing != NULL) { 4073168404Spjd spa_config_set(spa, spa->spa_config_syncing); 4074168404Spjd spa->spa_config_txg = txg; 4075168404Spjd spa->spa_config_syncing = NULL; 4076168404Spjd } 4077168404Spjd 4078185029Spjd spa->spa_traverse_wanted = B_TRUE; 4079168404Spjd rw_enter(&spa->spa_traverse_lock, RW_WRITER); 4080185029Spjd spa->spa_traverse_wanted = B_FALSE; 4081168404Spjd spa->spa_ubsync = spa->spa_uberblock; 4082168404Spjd rw_exit(&spa->spa_traverse_lock); 4083168404Spjd 4084168404Spjd /* 4085168404Spjd * Clean up the ZIL records for the synced txg. 4086168404Spjd */ 4087168404Spjd dsl_pool_zil_clean(dp); 4088168404Spjd 4089168404Spjd /* 4090168404Spjd * Update usable space statistics. 4091168404Spjd */ 4092168404Spjd while (vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg))) 4093168404Spjd vdev_sync_done(vd, txg); 4094168404Spjd 4095168404Spjd /* 4096168404Spjd * It had better be the case that we didn't dirty anything 4097168404Spjd * since vdev_config_sync(). 4098168404Spjd */ 4099168404Spjd ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg)); 4100168404Spjd ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg)); 4101168404Spjd ASSERT(txg_list_empty(&spa->spa_vdev_txg_list, txg)); 4102168404Spjd ASSERT(bpl->bpl_queue == NULL); 4103168404Spjd 4104185029Spjd spa_config_exit(spa, SCL_CONFIG, FTAG); 4105168404Spjd 4106168404Spjd /* 4107168404Spjd * If any async tasks have been requested, kick them off. 4108168404Spjd */ 4109168404Spjd spa_async_dispatch(spa); 4110168404Spjd} 4111168404Spjd 4112168404Spjd/* 4113168404Spjd * Sync all pools. We don't want to hold the namespace lock across these 4114168404Spjd * operations, so we take a reference on the spa_t and drop the lock during the 4115168404Spjd * sync. 4116168404Spjd */ 4117168404Spjdvoid 4118168404Spjdspa_sync_allpools(void) 4119168404Spjd{ 4120168404Spjd spa_t *spa = NULL; 4121168404Spjd mutex_enter(&spa_namespace_lock); 4122168404Spjd while ((spa = spa_next(spa)) != NULL) { 4123185029Spjd if (spa_state(spa) != POOL_STATE_ACTIVE || spa_suspended(spa)) 4124168404Spjd continue; 4125168404Spjd spa_open_ref(spa, FTAG); 4126168404Spjd mutex_exit(&spa_namespace_lock); 4127168404Spjd txg_wait_synced(spa_get_dsl(spa), 0); 4128168404Spjd mutex_enter(&spa_namespace_lock); 4129168404Spjd spa_close(spa, FTAG); 4130168404Spjd } 4131168404Spjd mutex_exit(&spa_namespace_lock); 4132168404Spjd} 4133168404Spjd 4134168404Spjd/* 4135168404Spjd * ========================================================================== 4136168404Spjd * Miscellaneous routines 4137168404Spjd * ========================================================================== 4138168404Spjd */ 4139168404Spjd 4140168404Spjd/* 4141168404Spjd * Remove all pools in the system. 4142168404Spjd */ 4143168404Spjdvoid 4144168404Spjdspa_evict_all(void) 4145168404Spjd{ 4146168404Spjd spa_t *spa; 4147168404Spjd 4148168404Spjd /* 4149168404Spjd * Remove all cached state. All pools should be closed now, 4150168404Spjd * so every spa in the AVL tree should be unreferenced. 4151168404Spjd */ 4152168404Spjd mutex_enter(&spa_namespace_lock); 4153168404Spjd while ((spa = spa_next(NULL)) != NULL) { 4154168404Spjd /* 4155168404Spjd * Stop async tasks. The async thread may need to detach 4156168404Spjd * a device that's been replaced, which requires grabbing 4157168404Spjd * spa_namespace_lock, so we must drop it here. 4158168404Spjd */ 4159168404Spjd spa_open_ref(spa, FTAG); 4160168404Spjd mutex_exit(&spa_namespace_lock); 4161168404Spjd spa_async_suspend(spa); 4162168404Spjd mutex_enter(&spa_namespace_lock); 4163168404Spjd spa_close(spa, FTAG); 4164168404Spjd 4165168404Spjd if (spa->spa_state != POOL_STATE_UNINITIALIZED) { 4166168404Spjd spa_unload(spa); 4167168404Spjd spa_deactivate(spa); 4168168404Spjd } 4169168404Spjd spa_remove(spa); 4170168404Spjd } 4171168404Spjd mutex_exit(&spa_namespace_lock); 4172168404Spjd} 4173168404Spjd 4174168404Spjdvdev_t * 4175185029Spjdspa_lookup_by_guid(spa_t *spa, uint64_t guid, boolean_t l2cache) 4176168404Spjd{ 4177185029Spjd vdev_t *vd; 4178185029Spjd int i; 4179185029Spjd 4180185029Spjd if ((vd = vdev_lookup_by_guid(spa->spa_root_vdev, guid)) != NULL) 4181185029Spjd return (vd); 4182185029Spjd 4183185029Spjd if (l2cache) { 4184185029Spjd for (i = 0; i < spa->spa_l2cache.sav_count; i++) { 4185185029Spjd vd = spa->spa_l2cache.sav_vdevs[i]; 4186185029Spjd if (vd->vdev_guid == guid) 4187185029Spjd return (vd); 4188185029Spjd } 4189185029Spjd } 4190185029Spjd 4191185029Spjd return (NULL); 4192168404Spjd} 4193168404Spjd 4194168404Spjdvoid 4195185029Spjdspa_upgrade(spa_t *spa, uint64_t version) 4196168404Spjd{ 4197185029Spjd spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 4198168404Spjd 4199168404Spjd /* 4200168404Spjd * This should only be called for a non-faulted pool, and since a 4201168404Spjd * future version would result in an unopenable pool, this shouldn't be 4202168404Spjd * possible. 4203168404Spjd */ 4204185029Spjd ASSERT(spa->spa_uberblock.ub_version <= SPA_VERSION); 4205185029Spjd ASSERT(version >= spa->spa_uberblock.ub_version); 4206168404Spjd 4207185029Spjd spa->spa_uberblock.ub_version = version; 4208168404Spjd vdev_config_dirty(spa->spa_root_vdev); 4209168404Spjd 4210185029Spjd spa_config_exit(spa, SCL_ALL, FTAG); 4211168404Spjd 4212168404Spjd txg_wait_synced(spa_get_dsl(spa), 0); 4213168404Spjd} 4214168404Spjd 4215168404Spjdboolean_t 4216168404Spjdspa_has_spare(spa_t *spa, uint64_t guid) 4217168404Spjd{ 4218168404Spjd int i; 4219168404Spjd uint64_t spareguid; 4220185029Spjd spa_aux_vdev_t *sav = &spa->spa_spares; 4221168404Spjd 4222185029Spjd for (i = 0; i < sav->sav_count; i++) 4223185029Spjd if (sav->sav_vdevs[i]->vdev_guid == guid) 4224168404Spjd return (B_TRUE); 4225168404Spjd 4226185029Spjd for (i = 0; i < sav->sav_npending; i++) { 4227185029Spjd if (nvlist_lookup_uint64(sav->sav_pending[i], ZPOOL_CONFIG_GUID, 4228185029Spjd &spareguid) == 0 && spareguid == guid) 4229168404Spjd return (B_TRUE); 4230168404Spjd } 4231168404Spjd 4232168404Spjd return (B_FALSE); 4233168404Spjd} 4234168404Spjd 4235185029Spjd/* 4236185029Spjd * Check if a pool has an active shared spare device. 4237185029Spjd * Note: reference count of an active spare is 2, as a spare and as a replace 4238185029Spjd */ 4239185029Spjdstatic boolean_t 4240185029Spjdspa_has_active_shared_spare(spa_t *spa) 4241168404Spjd{ 4242185029Spjd int i, refcnt; 4243185029Spjd uint64_t pool; 4244185029Spjd spa_aux_vdev_t *sav = &spa->spa_spares; 4245185029Spjd 4246185029Spjd for (i = 0; i < sav->sav_count; i++) { 4247185029Spjd if (spa_spare_exists(sav->sav_vdevs[i]->vdev_guid, &pool, 4248185029Spjd &refcnt) && pool != 0ULL && pool == spa_guid(spa) && 4249185029Spjd refcnt > 2) 4250185029Spjd return (B_TRUE); 4251185029Spjd } 4252185029Spjd 4253185029Spjd return (B_FALSE); 4254168404Spjd} 4255168404Spjd 4256185029Spjd/* 4257185029Spjd * Post a sysevent corresponding to the given event. The 'name' must be one of 4258185029Spjd * the event definitions in sys/sysevent/eventdefs.h. The payload will be 4259185029Spjd * filled in from the spa and (optionally) the vdev. This doesn't do anything 4260185029Spjd * in the userland libzpool, as we don't want consumers to misinterpret ztest 4261185029Spjd * or zdb as real changes. 4262185029Spjd */ 4263185029Spjdvoid 4264185029Spjdspa_event_notify(spa_t *spa, vdev_t *vd, const char *name) 4265168404Spjd{ 4266185029Spjd#if 0 4267185029Spjd#ifdef _KERNEL 4268185029Spjd sysevent_t *ev; 4269185029Spjd sysevent_attr_list_t *attr = NULL; 4270185029Spjd sysevent_value_t value; 4271185029Spjd sysevent_id_t eid; 4272168404Spjd 4273185029Spjd ev = sysevent_alloc(EC_ZFS, (char *)name, SUNW_KERN_PUB "zfs", 4274185029Spjd SE_SLEEP); 4275168404Spjd 4276185029Spjd value.value_type = SE_DATA_TYPE_STRING; 4277185029Spjd value.value.sv_string = spa_name(spa); 4278185029Spjd if (sysevent_add_attr(&attr, ZFS_EV_POOL_NAME, &value, SE_SLEEP) != 0) 4279185029Spjd goto done; 4280168404Spjd 4281185029Spjd value.value_type = SE_DATA_TYPE_UINT64; 4282185029Spjd value.value.sv_uint64 = spa_guid(spa); 4283185029Spjd if (sysevent_add_attr(&attr, ZFS_EV_POOL_GUID, &value, SE_SLEEP) != 0) 4284185029Spjd goto done; 4285168404Spjd 4286185029Spjd if (vd) { 4287185029Spjd value.value_type = SE_DATA_TYPE_UINT64; 4288185029Spjd value.value.sv_uint64 = vd->vdev_guid; 4289185029Spjd if (sysevent_add_attr(&attr, ZFS_EV_VDEV_GUID, &value, 4290185029Spjd SE_SLEEP) != 0) 4291185029Spjd goto done; 4292168404Spjd 4293185029Spjd if (vd->vdev_path) { 4294185029Spjd value.value_type = SE_DATA_TYPE_STRING; 4295185029Spjd value.value.sv_string = vd->vdev_path; 4296185029Spjd if (sysevent_add_attr(&attr, ZFS_EV_VDEV_PATH, 4297185029Spjd &value, SE_SLEEP) != 0) 4298185029Spjd goto done; 4299168404Spjd } 4300168404Spjd } 4301168404Spjd 4302185029Spjd if (sysevent_attach_attributes(ev, attr) != 0) 4303185029Spjd goto done; 4304185029Spjd attr = NULL; 4305168404Spjd 4306185029Spjd (void) log_sysevent(ev, SE_SLEEP, &eid); 4307185029Spjd 4308185029Spjddone: 4309185029Spjd if (attr) 4310185029Spjd sysevent_free_attr(attr); 4311185029Spjd sysevent_free(ev); 4312185029Spjd#endif 4313185029Spjd#endif 4314168404Spjd} 4315