spa.c revision 247592
1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd 22168404Spjd/* 23219089Spjd * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24236155Smm * Copyright (c) 2012 by Delphix. All rights reserved. 25247265Smm * Copyright (c) 2013 Martin Matuska <mm@FreeBSD.org>. All rights reserved. 26168404Spjd */ 27168404Spjd 28168404Spjd/* 29168404Spjd * This file contains all the routines used when modifying on-disk SPA state. 30168404Spjd * This includes opening, importing, destroying, exporting a pool, and syncing a 31168404Spjd * pool. 32168404Spjd */ 33168404Spjd 34168404Spjd#include <sys/zfs_context.h> 35168404Spjd#include <sys/fm/fs/zfs.h> 36168404Spjd#include <sys/spa_impl.h> 37168404Spjd#include <sys/zio.h> 38168404Spjd#include <sys/zio_checksum.h> 39168404Spjd#include <sys/dmu.h> 40168404Spjd#include <sys/dmu_tx.h> 41168404Spjd#include <sys/zap.h> 42168404Spjd#include <sys/zil.h> 43219089Spjd#include <sys/ddt.h> 44168404Spjd#include <sys/vdev_impl.h> 45168404Spjd#include <sys/metaslab.h> 46219089Spjd#include <sys/metaslab_impl.h> 47168404Spjd#include <sys/uberblock_impl.h> 48168404Spjd#include <sys/txg.h> 49168404Spjd#include <sys/avl.h> 50168404Spjd#include <sys/dmu_traverse.h> 51168404Spjd#include <sys/dmu_objset.h> 52168404Spjd#include <sys/unique.h> 53168404Spjd#include <sys/dsl_pool.h> 54168404Spjd#include <sys/dsl_dataset.h> 55168404Spjd#include <sys/dsl_dir.h> 56168404Spjd#include <sys/dsl_prop.h> 57168404Spjd#include <sys/dsl_synctask.h> 58168404Spjd#include <sys/fs/zfs.h> 59185029Spjd#include <sys/arc.h> 60168404Spjd#include <sys/callb.h> 61185029Spjd#include <sys/spa_boot.h> 62219089Spjd#include <sys/zfs_ioctl.h> 63219089Spjd#include <sys/dsl_scan.h> 64236884Smm#include <sys/zfeature.h> 65219089Spjd#include <sys/zvol.h> 66240868Spjd#include <sys/trim_map.h> 67168404Spjd 68219089Spjd#ifdef _KERNEL 69219089Spjd#include <sys/callb.h> 70219089Spjd#include <sys/cpupart.h> 71219089Spjd#include <sys/zone.h> 72219089Spjd#endif /* _KERNEL */ 73219089Spjd 74185029Spjd#include "zfs_prop.h" 75185029Spjd#include "zfs_comutil.h" 76168404Spjd 77204073Spjd/* Check hostid on import? */ 78204073Spjdstatic int check_hostid = 1; 79204073Spjd 80204073SpjdSYSCTL_DECL(_vfs_zfs); 81204073SpjdTUNABLE_INT("vfs.zfs.check_hostid", &check_hostid); 82204073SpjdSYSCTL_INT(_vfs_zfs, OID_AUTO, check_hostid, CTLFLAG_RW, &check_hostid, 0, 83204073Spjd "Check hostid on import?"); 84204073Spjd 85219089Spjdtypedef enum zti_modes { 86209962Smm zti_mode_fixed, /* value is # of threads (min 1) */ 87209962Smm zti_mode_online_percent, /* value is % of online CPUs */ 88219089Spjd zti_mode_batch, /* cpu-intensive; value is ignored */ 89211931Smm zti_mode_null, /* don't create a taskq */ 90209962Smm zti_nmodes 91219089Spjd} zti_modes_t; 92168712Spjd 93211931Smm#define ZTI_FIX(n) { zti_mode_fixed, (n) } 94211931Smm#define ZTI_PCT(n) { zti_mode_online_percent, (n) } 95219089Spjd#define ZTI_BATCH { zti_mode_batch, 0 } 96211931Smm#define ZTI_NULL { zti_mode_null, 0 } 97209962Smm 98211931Smm#define ZTI_ONE ZTI_FIX(1) 99209962Smm 100209962Smmtypedef struct zio_taskq_info { 101211931Smm enum zti_modes zti_mode; 102211931Smm uint_t zti_value; 103209962Smm} zio_taskq_info_t; 104209962Smm 105209962Smmstatic const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = { 106219089Spjd "issue", "issue_high", "intr", "intr_high" 107209962Smm}; 108209962Smm 109211931Smm/* 110211931Smm * Define the taskq threads for the following I/O types: 111211931Smm * NULL, READ, WRITE, FREE, CLAIM, and IOCTL 112211931Smm */ 113211931Smmconst zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = { 114211931Smm /* ISSUE ISSUE_HIGH INTR INTR_HIGH */ 115211931Smm { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, 116219089Spjd { ZTI_FIX(8), ZTI_NULL, ZTI_BATCH, ZTI_NULL }, 117219089Spjd { ZTI_BATCH, ZTI_FIX(5), ZTI_FIX(8), ZTI_FIX(5) }, 118219089Spjd { ZTI_FIX(100), ZTI_NULL, ZTI_ONE, ZTI_NULL }, 119211931Smm { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, 120211931Smm { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, 121209962Smm}; 122209962Smm 123236884Smmstatic dsl_syncfunc_t spa_sync_version; 124219089Spjdstatic dsl_syncfunc_t spa_sync_props; 125239620Smmstatic dsl_checkfunc_t spa_change_guid_check; 126239620Smmstatic dsl_syncfunc_t spa_change_guid_sync; 127185029Spjdstatic boolean_t spa_has_active_shared_spare(spa_t *spa); 128219089Spjdstatic int spa_load_impl(spa_t *spa, uint64_t, nvlist_t *config, 129219089Spjd spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig, 130219089Spjd char **ereport); 131219089Spjdstatic void spa_vdev_resilver_done(spa_t *spa); 132185029Spjd 133219089Spjduint_t zio_taskq_batch_pct = 100; /* 1 thread per cpu in pset */ 134219089Spjd#ifdef PSRSET_BIND 135219089Spjdid_t zio_taskq_psrset_bind = PS_NONE; 136219089Spjd#endif 137219089Spjd#ifdef SYSDC 138219089Spjdboolean_t zio_taskq_sysdc = B_TRUE; /* use SDC scheduling class */ 139219089Spjd#endif 140219089Spjduint_t zio_taskq_basedc = 80; /* base duty cycle */ 141219089Spjd 142219089Spjdboolean_t spa_create_process = B_TRUE; /* no process ==> no sysdc */ 143243503Smmextern int zfs_sync_pass_deferred_free; 144219089Spjd 145247265Smm#ifndef illumos 146247265Smmextern void spa_deadman(void *arg); 147247265Smm#endif 148247265Smm 149168404Spjd/* 150219089Spjd * This (illegal) pool name is used when temporarily importing a spa_t in order 151219089Spjd * to get the vdev stats associated with the imported devices. 152219089Spjd */ 153219089Spjd#define TRYIMPORT_NAME "$import" 154219089Spjd 155219089Spjd/* 156168404Spjd * ========================================================================== 157185029Spjd * SPA properties routines 158185029Spjd * ========================================================================== 159185029Spjd */ 160185029Spjd 161185029Spjd/* 162185029Spjd * Add a (source=src, propname=propval) list to an nvlist. 163185029Spjd */ 164185029Spjdstatic void 165185029Spjdspa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval, 166185029Spjd uint64_t intval, zprop_source_t src) 167185029Spjd{ 168185029Spjd const char *propname = zpool_prop_to_name(prop); 169185029Spjd nvlist_t *propval; 170185029Spjd 171185029Spjd VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); 172185029Spjd VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0); 173185029Spjd 174185029Spjd if (strval != NULL) 175185029Spjd VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0); 176185029Spjd else 177185029Spjd VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, intval) == 0); 178185029Spjd 179185029Spjd VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0); 180185029Spjd nvlist_free(propval); 181185029Spjd} 182185029Spjd 183185029Spjd/* 184185029Spjd * Get property values from the spa configuration. 185185029Spjd */ 186185029Spjdstatic void 187185029Spjdspa_prop_get_config(spa_t *spa, nvlist_t **nvp) 188185029Spjd{ 189236155Smm vdev_t *rvd = spa->spa_root_vdev; 190236884Smm dsl_pool_t *pool = spa->spa_dsl_pool; 191209962Smm uint64_t size; 192219089Spjd uint64_t alloc; 193236155Smm uint64_t space; 194185029Spjd uint64_t cap, version; 195185029Spjd zprop_source_t src = ZPROP_SRC_NONE; 196185029Spjd spa_config_dirent_t *dp; 197185029Spjd 198185029Spjd ASSERT(MUTEX_HELD(&spa->spa_props_lock)); 199185029Spjd 200236155Smm if (rvd != NULL) { 201219089Spjd alloc = metaslab_class_get_alloc(spa_normal_class(spa)); 202219089Spjd size = metaslab_class_get_space(spa_normal_class(spa)); 203209962Smm spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src); 204209962Smm spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src); 205219089Spjd spa_prop_add_list(*nvp, ZPOOL_PROP_ALLOCATED, NULL, alloc, src); 206219089Spjd spa_prop_add_list(*nvp, ZPOOL_PROP_FREE, NULL, 207219089Spjd size - alloc, src); 208236155Smm 209236155Smm space = 0; 210236155Smm for (int c = 0; c < rvd->vdev_children; c++) { 211236155Smm vdev_t *tvd = rvd->vdev_child[c]; 212236155Smm space += tvd->vdev_max_asize - tvd->vdev_asize; 213236155Smm } 214236155Smm spa_prop_add_list(*nvp, ZPOOL_PROP_EXPANDSZ, NULL, space, 215236155Smm src); 216236155Smm 217219089Spjd spa_prop_add_list(*nvp, ZPOOL_PROP_READONLY, NULL, 218219089Spjd (spa_mode(spa) == FREAD), src); 219185029Spjd 220219089Spjd cap = (size == 0) ? 0 : (alloc * 100 / size); 221209962Smm spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src); 222185029Spjd 223219089Spjd spa_prop_add_list(*nvp, ZPOOL_PROP_DEDUPRATIO, NULL, 224219089Spjd ddt_get_pool_dedup_ratio(spa), src); 225219089Spjd 226209962Smm spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL, 227236155Smm rvd->vdev_state, src); 228209962Smm 229209962Smm version = spa_version(spa); 230209962Smm if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION)) 231209962Smm src = ZPROP_SRC_DEFAULT; 232209962Smm else 233209962Smm src = ZPROP_SRC_LOCAL; 234209962Smm spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, version, src); 235209962Smm } 236209962Smm 237236884Smm if (pool != NULL) { 238236884Smm dsl_dir_t *freedir = pool->dp_free_dir; 239236884Smm 240236884Smm /* 241236884Smm * The $FREE directory was introduced in SPA_VERSION_DEADLISTS, 242236884Smm * when opening pools before this version freedir will be NULL. 243236884Smm */ 244236884Smm if (freedir != NULL) { 245236884Smm spa_prop_add_list(*nvp, ZPOOL_PROP_FREEING, NULL, 246236884Smm freedir->dd_phys->dd_used_bytes, src); 247236884Smm } else { 248236884Smm spa_prop_add_list(*nvp, ZPOOL_PROP_FREEING, 249236884Smm NULL, 0, src); 250236884Smm } 251236884Smm } 252236884Smm 253185029Spjd spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src); 254185029Spjd 255228103Smm if (spa->spa_comment != NULL) { 256228103Smm spa_prop_add_list(*nvp, ZPOOL_PROP_COMMENT, spa->spa_comment, 257228103Smm 0, ZPROP_SRC_LOCAL); 258228103Smm } 259228103Smm 260185029Spjd if (spa->spa_root != NULL) 261185029Spjd spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root, 262185029Spjd 0, ZPROP_SRC_LOCAL); 263185029Spjd 264185029Spjd if ((dp = list_head(&spa->spa_config_list)) != NULL) { 265185029Spjd if (dp->scd_path == NULL) { 266185029Spjd spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, 267185029Spjd "none", 0, ZPROP_SRC_LOCAL); 268185029Spjd } else if (strcmp(dp->scd_path, spa_config_path) != 0) { 269185029Spjd spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, 270185029Spjd dp->scd_path, 0, ZPROP_SRC_LOCAL); 271185029Spjd } 272185029Spjd } 273185029Spjd} 274185029Spjd 275185029Spjd/* 276185029Spjd * Get zpool property values. 277185029Spjd */ 278185029Spjdint 279185029Spjdspa_prop_get(spa_t *spa, nvlist_t **nvp) 280185029Spjd{ 281219089Spjd objset_t *mos = spa->spa_meta_objset; 282185029Spjd zap_cursor_t zc; 283185029Spjd zap_attribute_t za; 284185029Spjd int err; 285185029Spjd 286185029Spjd VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); 287185029Spjd 288185029Spjd mutex_enter(&spa->spa_props_lock); 289185029Spjd 290185029Spjd /* 291185029Spjd * Get properties from the spa config. 292185029Spjd */ 293185029Spjd spa_prop_get_config(spa, nvp); 294185029Spjd 295185029Spjd /* If no pool property object, no more prop to get. */ 296219089Spjd if (mos == NULL || spa->spa_pool_props_object == 0) { 297185029Spjd mutex_exit(&spa->spa_props_lock); 298185029Spjd return (0); 299185029Spjd } 300185029Spjd 301185029Spjd /* 302185029Spjd * Get properties from the MOS pool property object. 303185029Spjd */ 304185029Spjd for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object); 305185029Spjd (err = zap_cursor_retrieve(&zc, &za)) == 0; 306185029Spjd zap_cursor_advance(&zc)) { 307185029Spjd uint64_t intval = 0; 308185029Spjd char *strval = NULL; 309185029Spjd zprop_source_t src = ZPROP_SRC_DEFAULT; 310185029Spjd zpool_prop_t prop; 311185029Spjd 312185029Spjd if ((prop = zpool_name_to_prop(za.za_name)) == ZPROP_INVAL) 313185029Spjd continue; 314185029Spjd 315185029Spjd switch (za.za_integer_length) { 316185029Spjd case 8: 317185029Spjd /* integer property */ 318185029Spjd if (za.za_first_integer != 319185029Spjd zpool_prop_default_numeric(prop)) 320185029Spjd src = ZPROP_SRC_LOCAL; 321185029Spjd 322185029Spjd if (prop == ZPOOL_PROP_BOOTFS) { 323185029Spjd dsl_pool_t *dp; 324185029Spjd dsl_dataset_t *ds = NULL; 325185029Spjd 326185029Spjd dp = spa_get_dsl(spa); 327185029Spjd rw_enter(&dp->dp_config_rwlock, RW_READER); 328185029Spjd if (err = dsl_dataset_hold_obj(dp, 329185029Spjd za.za_first_integer, FTAG, &ds)) { 330185029Spjd rw_exit(&dp->dp_config_rwlock); 331185029Spjd break; 332185029Spjd } 333185029Spjd 334185029Spjd strval = kmem_alloc( 335185029Spjd MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, 336185029Spjd KM_SLEEP); 337185029Spjd dsl_dataset_name(ds, strval); 338185029Spjd dsl_dataset_rele(ds, FTAG); 339185029Spjd rw_exit(&dp->dp_config_rwlock); 340185029Spjd } else { 341185029Spjd strval = NULL; 342185029Spjd intval = za.za_first_integer; 343185029Spjd } 344185029Spjd 345185029Spjd spa_prop_add_list(*nvp, prop, strval, intval, src); 346185029Spjd 347185029Spjd if (strval != NULL) 348185029Spjd kmem_free(strval, 349185029Spjd MAXNAMELEN + strlen(MOS_DIR_NAME) + 1); 350185029Spjd 351185029Spjd break; 352185029Spjd 353185029Spjd case 1: 354185029Spjd /* string property */ 355185029Spjd strval = kmem_alloc(za.za_num_integers, KM_SLEEP); 356185029Spjd err = zap_lookup(mos, spa->spa_pool_props_object, 357185029Spjd za.za_name, 1, za.za_num_integers, strval); 358185029Spjd if (err) { 359185029Spjd kmem_free(strval, za.za_num_integers); 360185029Spjd break; 361185029Spjd } 362185029Spjd spa_prop_add_list(*nvp, prop, strval, 0, src); 363185029Spjd kmem_free(strval, za.za_num_integers); 364185029Spjd break; 365185029Spjd 366185029Spjd default: 367185029Spjd break; 368185029Spjd } 369185029Spjd } 370185029Spjd zap_cursor_fini(&zc); 371185029Spjd mutex_exit(&spa->spa_props_lock); 372185029Spjdout: 373185029Spjd if (err && err != ENOENT) { 374185029Spjd nvlist_free(*nvp); 375185029Spjd *nvp = NULL; 376185029Spjd return (err); 377185029Spjd } 378185029Spjd 379185029Spjd return (0); 380185029Spjd} 381185029Spjd 382185029Spjd/* 383185029Spjd * Validate the given pool properties nvlist and modify the list 384185029Spjd * for the property values to be set. 385185029Spjd */ 386185029Spjdstatic int 387185029Spjdspa_prop_validate(spa_t *spa, nvlist_t *props) 388185029Spjd{ 389185029Spjd nvpair_t *elem; 390185029Spjd int error = 0, reset_bootfs = 0; 391247187Smm uint64_t objnum = 0; 392236884Smm boolean_t has_feature = B_FALSE; 393185029Spjd 394185029Spjd elem = NULL; 395185029Spjd while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { 396185029Spjd uint64_t intval; 397236884Smm char *strval, *slash, *check, *fname; 398236884Smm const char *propname = nvpair_name(elem); 399236884Smm zpool_prop_t prop = zpool_name_to_prop(propname); 400185029Spjd 401236884Smm switch (prop) { 402236884Smm case ZPROP_INVAL: 403236884Smm if (!zpool_prop_feature(propname)) { 404236884Smm error = EINVAL; 405236884Smm break; 406236884Smm } 407185029Spjd 408236884Smm /* 409236884Smm * Sanitize the input. 410236884Smm */ 411236884Smm if (nvpair_type(elem) != DATA_TYPE_UINT64) { 412236884Smm error = EINVAL; 413236884Smm break; 414236884Smm } 415185029Spjd 416236884Smm if (nvpair_value_uint64(elem, &intval) != 0) { 417236884Smm error = EINVAL; 418236884Smm break; 419236884Smm } 420236884Smm 421236884Smm if (intval != 0) { 422236884Smm error = EINVAL; 423236884Smm break; 424236884Smm } 425236884Smm 426236884Smm fname = strchr(propname, '@') + 1; 427236884Smm if (zfeature_lookup_name(fname, NULL) != 0) { 428236884Smm error = EINVAL; 429236884Smm break; 430236884Smm } 431236884Smm 432236884Smm has_feature = B_TRUE; 433236884Smm break; 434236884Smm 435185029Spjd case ZPOOL_PROP_VERSION: 436185029Spjd error = nvpair_value_uint64(elem, &intval); 437185029Spjd if (!error && 438236884Smm (intval < spa_version(spa) || 439236884Smm intval > SPA_VERSION_BEFORE_FEATURES || 440236884Smm has_feature)) 441185029Spjd error = EINVAL; 442185029Spjd break; 443185029Spjd 444185029Spjd case ZPOOL_PROP_DELEGATION: 445185029Spjd case ZPOOL_PROP_AUTOREPLACE: 446185029Spjd case ZPOOL_PROP_LISTSNAPS: 447219089Spjd case ZPOOL_PROP_AUTOEXPAND: 448185029Spjd error = nvpair_value_uint64(elem, &intval); 449185029Spjd if (!error && intval > 1) 450185029Spjd error = EINVAL; 451185029Spjd break; 452185029Spjd 453185029Spjd case ZPOOL_PROP_BOOTFS: 454209962Smm /* 455209962Smm * If the pool version is less than SPA_VERSION_BOOTFS, 456209962Smm * or the pool is still being created (version == 0), 457209962Smm * the bootfs property cannot be set. 458209962Smm */ 459185029Spjd if (spa_version(spa) < SPA_VERSION_BOOTFS) { 460185029Spjd error = ENOTSUP; 461185029Spjd break; 462185029Spjd } 463185029Spjd 464185029Spjd /* 465185029Spjd * Make sure the vdev config is bootable 466185029Spjd */ 467185029Spjd if (!vdev_is_bootable(spa->spa_root_vdev)) { 468185029Spjd error = ENOTSUP; 469185029Spjd break; 470185029Spjd } 471185029Spjd 472185029Spjd reset_bootfs = 1; 473185029Spjd 474185029Spjd error = nvpair_value_string(elem, &strval); 475185029Spjd 476185029Spjd if (!error) { 477236884Smm objset_t *os; 478185029Spjd uint64_t compress; 479185029Spjd 480185029Spjd if (strval == NULL || strval[0] == '\0') { 481185029Spjd objnum = zpool_prop_default_numeric( 482185029Spjd ZPOOL_PROP_BOOTFS); 483185029Spjd break; 484185029Spjd } 485185029Spjd 486219089Spjd if (error = dmu_objset_hold(strval, FTAG, &os)) 487185029Spjd break; 488185029Spjd 489219089Spjd /* Must be ZPL and not gzip compressed. */ 490219089Spjd 491219089Spjd if (dmu_objset_type(os) != DMU_OST_ZFS) { 492219089Spjd error = ENOTSUP; 493219089Spjd } else if ((error = dsl_prop_get_integer(strval, 494185029Spjd zfs_prop_to_name(ZFS_PROP_COMPRESSION), 495185029Spjd &compress, NULL)) == 0 && 496185029Spjd !BOOTFS_COMPRESS_VALID(compress)) { 497185029Spjd error = ENOTSUP; 498185029Spjd } else { 499185029Spjd objnum = dmu_objset_id(os); 500185029Spjd } 501219089Spjd dmu_objset_rele(os, FTAG); 502185029Spjd } 503185029Spjd break; 504185029Spjd 505185029Spjd case ZPOOL_PROP_FAILUREMODE: 506185029Spjd error = nvpair_value_uint64(elem, &intval); 507185029Spjd if (!error && (intval < ZIO_FAILURE_MODE_WAIT || 508185029Spjd intval > ZIO_FAILURE_MODE_PANIC)) 509185029Spjd error = EINVAL; 510185029Spjd 511185029Spjd /* 512185029Spjd * This is a special case which only occurs when 513185029Spjd * the pool has completely failed. This allows 514185029Spjd * the user to change the in-core failmode property 515185029Spjd * without syncing it out to disk (I/Os might 516185029Spjd * currently be blocked). We do this by returning 517185029Spjd * EIO to the caller (spa_prop_set) to trick it 518185029Spjd * into thinking we encountered a property validation 519185029Spjd * error. 520185029Spjd */ 521185029Spjd if (!error && spa_suspended(spa)) { 522185029Spjd spa->spa_failmode = intval; 523185029Spjd error = EIO; 524185029Spjd } 525185029Spjd break; 526185029Spjd 527185029Spjd case ZPOOL_PROP_CACHEFILE: 528185029Spjd if ((error = nvpair_value_string(elem, &strval)) != 0) 529185029Spjd break; 530185029Spjd 531185029Spjd if (strval[0] == '\0') 532185029Spjd break; 533185029Spjd 534185029Spjd if (strcmp(strval, "none") == 0) 535185029Spjd break; 536185029Spjd 537185029Spjd if (strval[0] != '/') { 538185029Spjd error = EINVAL; 539185029Spjd break; 540185029Spjd } 541185029Spjd 542185029Spjd slash = strrchr(strval, '/'); 543185029Spjd ASSERT(slash != NULL); 544185029Spjd 545185029Spjd if (slash[1] == '\0' || strcmp(slash, "/.") == 0 || 546185029Spjd strcmp(slash, "/..") == 0) 547185029Spjd error = EINVAL; 548185029Spjd break; 549219089Spjd 550228103Smm case ZPOOL_PROP_COMMENT: 551228103Smm if ((error = nvpair_value_string(elem, &strval)) != 0) 552228103Smm break; 553228103Smm for (check = strval; *check != '\0'; check++) { 554228103Smm /* 555228103Smm * The kernel doesn't have an easy isprint() 556228103Smm * check. For this kernel check, we merely 557228103Smm * check ASCII apart from DEL. Fix this if 558228103Smm * there is an easy-to-use kernel isprint(). 559228103Smm */ 560228103Smm if (*check >= 0x7f) { 561228103Smm error = EINVAL; 562228103Smm break; 563228103Smm } 564228103Smm check++; 565228103Smm } 566228103Smm if (strlen(strval) > ZPROP_MAX_COMMENT) 567228103Smm error = E2BIG; 568228103Smm break; 569228103Smm 570219089Spjd case ZPOOL_PROP_DEDUPDITTO: 571219089Spjd if (spa_version(spa) < SPA_VERSION_DEDUP) 572219089Spjd error = ENOTSUP; 573219089Spjd else 574219089Spjd error = nvpair_value_uint64(elem, &intval); 575219089Spjd if (error == 0 && 576219089Spjd intval != 0 && intval < ZIO_DEDUPDITTO_MIN) 577219089Spjd error = EINVAL; 578219089Spjd break; 579185029Spjd } 580185029Spjd 581185029Spjd if (error) 582185029Spjd break; 583185029Spjd } 584185029Spjd 585185029Spjd if (!error && reset_bootfs) { 586185029Spjd error = nvlist_remove(props, 587185029Spjd zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING); 588185029Spjd 589185029Spjd if (!error) { 590185029Spjd error = nvlist_add_uint64(props, 591185029Spjd zpool_prop_to_name(ZPOOL_PROP_BOOTFS), objnum); 592185029Spjd } 593185029Spjd } 594185029Spjd 595185029Spjd return (error); 596185029Spjd} 597185029Spjd 598209962Smmvoid 599209962Smmspa_configfile_set(spa_t *spa, nvlist_t *nvp, boolean_t need_sync) 600209962Smm{ 601209962Smm char *cachefile; 602209962Smm spa_config_dirent_t *dp; 603209962Smm 604209962Smm if (nvlist_lookup_string(nvp, zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), 605209962Smm &cachefile) != 0) 606209962Smm return; 607209962Smm 608209962Smm dp = kmem_alloc(sizeof (spa_config_dirent_t), 609209962Smm KM_SLEEP); 610209962Smm 611209962Smm if (cachefile[0] == '\0') 612209962Smm dp->scd_path = spa_strdup(spa_config_path); 613209962Smm else if (strcmp(cachefile, "none") == 0) 614209962Smm dp->scd_path = NULL; 615209962Smm else 616209962Smm dp->scd_path = spa_strdup(cachefile); 617209962Smm 618209962Smm list_insert_head(&spa->spa_config_list, dp); 619209962Smm if (need_sync) 620209962Smm spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 621209962Smm} 622209962Smm 623185029Spjdint 624185029Spjdspa_prop_set(spa_t *spa, nvlist_t *nvp) 625185029Spjd{ 626185029Spjd int error; 627236884Smm nvpair_t *elem = NULL; 628209962Smm boolean_t need_sync = B_FALSE; 629185029Spjd 630185029Spjd if ((error = spa_prop_validate(spa, nvp)) != 0) 631185029Spjd return (error); 632185029Spjd 633209962Smm while ((elem = nvlist_next_nvpair(nvp, elem)) != NULL) { 634236884Smm zpool_prop_t prop = zpool_name_to_prop(nvpair_name(elem)); 635209962Smm 636219089Spjd if (prop == ZPOOL_PROP_CACHEFILE || 637219089Spjd prop == ZPOOL_PROP_ALTROOT || 638219089Spjd prop == ZPOOL_PROP_READONLY) 639209962Smm continue; 640209962Smm 641236884Smm if (prop == ZPOOL_PROP_VERSION || prop == ZPROP_INVAL) { 642236884Smm uint64_t ver; 643236884Smm 644236884Smm if (prop == ZPOOL_PROP_VERSION) { 645236884Smm VERIFY(nvpair_value_uint64(elem, &ver) == 0); 646236884Smm } else { 647236884Smm ASSERT(zpool_prop_feature(nvpair_name(elem))); 648236884Smm ver = SPA_VERSION_FEATURES; 649236884Smm need_sync = B_TRUE; 650236884Smm } 651236884Smm 652236884Smm /* Save time if the version is already set. */ 653236884Smm if (ver == spa_version(spa)) 654236884Smm continue; 655236884Smm 656236884Smm /* 657236884Smm * In addition to the pool directory object, we might 658236884Smm * create the pool properties object, the features for 659236884Smm * read object, the features for write object, or the 660236884Smm * feature descriptions object. 661236884Smm */ 662236884Smm error = dsl_sync_task_do(spa_get_dsl(spa), NULL, 663236884Smm spa_sync_version, spa, &ver, 6); 664236884Smm if (error) 665236884Smm return (error); 666236884Smm continue; 667236884Smm } 668236884Smm 669209962Smm need_sync = B_TRUE; 670209962Smm break; 671209962Smm } 672209962Smm 673236884Smm if (need_sync) { 674209962Smm return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props, 675236884Smm spa, nvp, 6)); 676236884Smm } 677236884Smm 678236884Smm return (0); 679185029Spjd} 680185029Spjd 681185029Spjd/* 682185029Spjd * If the bootfs property value is dsobj, clear it. 683185029Spjd */ 684185029Spjdvoid 685185029Spjdspa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx) 686185029Spjd{ 687185029Spjd if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) { 688185029Spjd VERIFY(zap_remove(spa->spa_meta_objset, 689185029Spjd spa->spa_pool_props_object, 690185029Spjd zpool_prop_to_name(ZPOOL_PROP_BOOTFS), tx) == 0); 691185029Spjd spa->spa_bootfs = 0; 692185029Spjd } 693185029Spjd} 694185029Spjd 695239620Smm/*ARGSUSED*/ 696239620Smmstatic int 697239620Smmspa_change_guid_check(void *arg1, void *arg2, dmu_tx_t *tx) 698239620Smm{ 699239620Smm spa_t *spa = arg1; 700239620Smm uint64_t *newguid = arg2; 701239620Smm vdev_t *rvd = spa->spa_root_vdev; 702239620Smm uint64_t vdev_state; 703239620Smm 704239620Smm spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 705239620Smm vdev_state = rvd->vdev_state; 706239620Smm spa_config_exit(spa, SCL_STATE, FTAG); 707239620Smm 708239620Smm if (vdev_state != VDEV_STATE_HEALTHY) 709239620Smm return (ENXIO); 710239620Smm 711239620Smm ASSERT3U(spa_guid(spa), !=, *newguid); 712239620Smm 713239620Smm return (0); 714239620Smm} 715239620Smm 716239620Smmstatic void 717239620Smmspa_change_guid_sync(void *arg1, void *arg2, dmu_tx_t *tx) 718239620Smm{ 719239620Smm spa_t *spa = arg1; 720239620Smm uint64_t *newguid = arg2; 721239620Smm uint64_t oldguid; 722239620Smm vdev_t *rvd = spa->spa_root_vdev; 723239620Smm 724239620Smm oldguid = spa_guid(spa); 725239620Smm 726239620Smm spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 727239620Smm rvd->vdev_guid = *newguid; 728239620Smm rvd->vdev_guid_sum += (*newguid - oldguid); 729239620Smm vdev_config_dirty(rvd); 730239620Smm spa_config_exit(spa, SCL_STATE, FTAG); 731239620Smm 732239620Smm#ifdef __FreeBSD__ 733239620Smm /* 734239620Smm * TODO: until recent illumos logging changes are merged 735239620Smm * log reguid as pool property change 736239620Smm */ 737239620Smm spa_history_log_internal(LOG_POOL_PROPSET, spa, tx, 738239620Smm "guid change old=%llu new=%llu", oldguid, *newguid); 739239620Smm#else 740239620Smm spa_history_log_internal(spa, "guid change", tx, "old=%lld new=%lld", 741239620Smm oldguid, *newguid); 742239620Smm#endif 743239620Smm} 744239620Smm 745185029Spjd/* 746228103Smm * Change the GUID for the pool. This is done so that we can later 747228103Smm * re-import a pool built from a clone of our own vdevs. We will modify 748228103Smm * the root vdev's guid, our own pool guid, and then mark all of our 749228103Smm * vdevs dirty. Note that we must make sure that all our vdevs are 750228103Smm * online when we do this, or else any vdevs that weren't present 751228103Smm * would be orphaned from our pool. We are also going to issue a 752228103Smm * sysevent to update any watchers. 753228103Smm */ 754228103Smmint 755228103Smmspa_change_guid(spa_t *spa) 756228103Smm{ 757239620Smm int error; 758239620Smm uint64_t guid; 759228103Smm 760239620Smm mutex_enter(&spa_namespace_lock); 761239620Smm guid = spa_generate_guid(NULL); 762228103Smm 763239620Smm error = dsl_sync_task_do(spa_get_dsl(spa), spa_change_guid_check, 764239620Smm spa_change_guid_sync, spa, &guid, 5); 765228103Smm 766239620Smm if (error == 0) { 767239620Smm spa_config_sync(spa, B_FALSE, B_TRUE); 768239620Smm spa_event_notify(spa, NULL, ESC_ZFS_POOL_REGUID); 769239620Smm } 770228103Smm 771239620Smm mutex_exit(&spa_namespace_lock); 772228103Smm 773239620Smm return (error); 774228103Smm} 775228103Smm 776228103Smm/* 777185029Spjd * ========================================================================== 778168404Spjd * SPA state manipulation (open/create/destroy/import/export) 779168404Spjd * ========================================================================== 780168404Spjd */ 781168404Spjd 782168404Spjdstatic int 783168404Spjdspa_error_entry_compare(const void *a, const void *b) 784168404Spjd{ 785168404Spjd spa_error_entry_t *sa = (spa_error_entry_t *)a; 786168404Spjd spa_error_entry_t *sb = (spa_error_entry_t *)b; 787168404Spjd int ret; 788168404Spjd 789168404Spjd ret = bcmp(&sa->se_bookmark, &sb->se_bookmark, 790168404Spjd sizeof (zbookmark_t)); 791168404Spjd 792168404Spjd if (ret < 0) 793168404Spjd return (-1); 794168404Spjd else if (ret > 0) 795168404Spjd return (1); 796168404Spjd else 797168404Spjd return (0); 798168404Spjd} 799168404Spjd 800168404Spjd/* 801168404Spjd * Utility function which retrieves copies of the current logs and 802168404Spjd * re-initializes them in the process. 803168404Spjd */ 804168404Spjdvoid 805168404Spjdspa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub) 806168404Spjd{ 807168404Spjd ASSERT(MUTEX_HELD(&spa->spa_errlist_lock)); 808168404Spjd 809168404Spjd bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t)); 810168404Spjd bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t)); 811168404Spjd 812168404Spjd avl_create(&spa->spa_errlist_scrub, 813168404Spjd spa_error_entry_compare, sizeof (spa_error_entry_t), 814168404Spjd offsetof(spa_error_entry_t, se_avl)); 815168404Spjd avl_create(&spa->spa_errlist_last, 816168404Spjd spa_error_entry_compare, sizeof (spa_error_entry_t), 817168404Spjd offsetof(spa_error_entry_t, se_avl)); 818168404Spjd} 819168404Spjd 820219089Spjdstatic taskq_t * 821219089Spjdspa_taskq_create(spa_t *spa, const char *name, enum zti_modes mode, 822219089Spjd uint_t value) 823168404Spjd{ 824219089Spjd uint_t flags = TASKQ_PREPOPULATE; 825219089Spjd boolean_t batch = B_FALSE; 826168404Spjd 827219089Spjd switch (mode) { 828219089Spjd case zti_mode_null: 829219089Spjd return (NULL); /* no taskq needed */ 830168404Spjd 831219089Spjd case zti_mode_fixed: 832219089Spjd ASSERT3U(value, >=, 1); 833219089Spjd value = MAX(value, 1); 834219089Spjd break; 835168404Spjd 836219089Spjd case zti_mode_batch: 837219089Spjd batch = B_TRUE; 838219089Spjd flags |= TASKQ_THREADS_CPU_PCT; 839219089Spjd value = zio_taskq_batch_pct; 840219089Spjd break; 841219089Spjd 842219089Spjd case zti_mode_online_percent: 843219089Spjd flags |= TASKQ_THREADS_CPU_PCT; 844219089Spjd break; 845219089Spjd 846219089Spjd default: 847219089Spjd panic("unrecognized mode for %s taskq (%u:%u) in " 848219089Spjd "spa_activate()", 849219089Spjd name, mode, value); 850219089Spjd break; 851219089Spjd } 852219089Spjd 853219089Spjd#ifdef SYSDC 854219089Spjd if (zio_taskq_sysdc && spa->spa_proc != &p0) { 855219089Spjd if (batch) 856219089Spjd flags |= TASKQ_DC_BATCH; 857219089Spjd 858219089Spjd return (taskq_create_sysdc(name, value, 50, INT_MAX, 859219089Spjd spa->spa_proc, zio_taskq_basedc, flags)); 860219089Spjd } 861219089Spjd#endif 862219089Spjd return (taskq_create_proc(name, value, maxclsyspri, 50, INT_MAX, 863219089Spjd spa->spa_proc, flags)); 864219089Spjd} 865219089Spjd 866219089Spjdstatic void 867219089Spjdspa_create_zio_taskqs(spa_t *spa) 868219089Spjd{ 869185029Spjd for (int t = 0; t < ZIO_TYPES; t++) { 870185029Spjd for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { 871211931Smm const zio_taskq_info_t *ztip = &zio_taskqs[t][q]; 872211931Smm enum zti_modes mode = ztip->zti_mode; 873211931Smm uint_t value = ztip->zti_value; 874209962Smm char name[32]; 875209962Smm 876209962Smm (void) snprintf(name, sizeof (name), 877211931Smm "%s_%s", zio_type_name[t], zio_taskq_types[q]); 878209962Smm 879219089Spjd spa->spa_zio_taskq[t][q] = 880219089Spjd spa_taskq_create(spa, name, mode, value); 881219089Spjd } 882219089Spjd } 883219089Spjd} 884209962Smm 885219089Spjd#ifdef _KERNEL 886219089Spjd#ifdef SPA_PROCESS 887219089Spjdstatic void 888219089Spjdspa_thread(void *arg) 889219089Spjd{ 890219089Spjd callb_cpr_t cprinfo; 891209962Smm 892219089Spjd spa_t *spa = arg; 893219089Spjd user_t *pu = PTOU(curproc); 894209962Smm 895219089Spjd CALLB_CPR_INIT(&cprinfo, &spa->spa_proc_lock, callb_generic_cpr, 896219089Spjd spa->spa_name); 897209962Smm 898219089Spjd ASSERT(curproc != &p0); 899219089Spjd (void) snprintf(pu->u_psargs, sizeof (pu->u_psargs), 900219089Spjd "zpool-%s", spa->spa_name); 901219089Spjd (void) strlcpy(pu->u_comm, pu->u_psargs, sizeof (pu->u_comm)); 902211931Smm 903219089Spjd#ifdef PSRSET_BIND 904219089Spjd /* bind this thread to the requested psrset */ 905219089Spjd if (zio_taskq_psrset_bind != PS_NONE) { 906219089Spjd pool_lock(); 907219089Spjd mutex_enter(&cpu_lock); 908219089Spjd mutex_enter(&pidlock); 909219089Spjd mutex_enter(&curproc->p_lock); 910219089Spjd 911219089Spjd if (cpupart_bind_thread(curthread, zio_taskq_psrset_bind, 912219089Spjd 0, NULL, NULL) == 0) { 913219089Spjd curthread->t_bind_pset = zio_taskq_psrset_bind; 914219089Spjd } else { 915219089Spjd cmn_err(CE_WARN, 916219089Spjd "Couldn't bind process for zfs pool \"%s\" to " 917219089Spjd "pset %d\n", spa->spa_name, zio_taskq_psrset_bind); 918219089Spjd } 919219089Spjd 920219089Spjd mutex_exit(&curproc->p_lock); 921219089Spjd mutex_exit(&pidlock); 922219089Spjd mutex_exit(&cpu_lock); 923219089Spjd pool_unlock(); 924219089Spjd } 925219089Spjd#endif 926219089Spjd 927219089Spjd#ifdef SYSDC 928219089Spjd if (zio_taskq_sysdc) { 929219089Spjd sysdc_thread_enter(curthread, 100, 0); 930219089Spjd } 931219089Spjd#endif 932219089Spjd 933219089Spjd spa->spa_proc = curproc; 934219089Spjd spa->spa_did = curthread->t_did; 935219089Spjd 936219089Spjd spa_create_zio_taskqs(spa); 937219089Spjd 938219089Spjd mutex_enter(&spa->spa_proc_lock); 939219089Spjd ASSERT(spa->spa_proc_state == SPA_PROC_CREATED); 940219089Spjd 941219089Spjd spa->spa_proc_state = SPA_PROC_ACTIVE; 942219089Spjd cv_broadcast(&spa->spa_proc_cv); 943219089Spjd 944219089Spjd CALLB_CPR_SAFE_BEGIN(&cprinfo); 945219089Spjd while (spa->spa_proc_state == SPA_PROC_ACTIVE) 946219089Spjd cv_wait(&spa->spa_proc_cv, &spa->spa_proc_lock); 947219089Spjd CALLB_CPR_SAFE_END(&cprinfo, &spa->spa_proc_lock); 948219089Spjd 949219089Spjd ASSERT(spa->spa_proc_state == SPA_PROC_DEACTIVATE); 950219089Spjd spa->spa_proc_state = SPA_PROC_GONE; 951219089Spjd spa->spa_proc = &p0; 952219089Spjd cv_broadcast(&spa->spa_proc_cv); 953219089Spjd CALLB_CPR_EXIT(&cprinfo); /* drops spa_proc_lock */ 954219089Spjd 955219089Spjd mutex_enter(&curproc->p_lock); 956219089Spjd lwp_exit(); 957219089Spjd} 958219089Spjd#endif /* SPA_PROCESS */ 959219089Spjd#endif 960219089Spjd 961219089Spjd/* 962219089Spjd * Activate an uninitialized pool. 963219089Spjd */ 964219089Spjdstatic void 965219089Spjdspa_activate(spa_t *spa, int mode) 966219089Spjd{ 967219089Spjd ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); 968219089Spjd 969219089Spjd spa->spa_state = POOL_STATE_ACTIVE; 970219089Spjd spa->spa_mode = mode; 971219089Spjd 972219089Spjd spa->spa_normal_class = metaslab_class_create(spa, zfs_metaslab_ops); 973219089Spjd spa->spa_log_class = metaslab_class_create(spa, zfs_metaslab_ops); 974219089Spjd 975219089Spjd /* Try to create a covering process */ 976219089Spjd mutex_enter(&spa->spa_proc_lock); 977219089Spjd ASSERT(spa->spa_proc_state == SPA_PROC_NONE); 978219089Spjd ASSERT(spa->spa_proc == &p0); 979219089Spjd spa->spa_did = 0; 980219089Spjd 981219089Spjd#ifdef SPA_PROCESS 982219089Spjd /* Only create a process if we're going to be around a while. */ 983219089Spjd if (spa_create_process && strcmp(spa->spa_name, TRYIMPORT_NAME) != 0) { 984219089Spjd if (newproc(spa_thread, (caddr_t)spa, syscid, maxclsyspri, 985219089Spjd NULL, 0) == 0) { 986219089Spjd spa->spa_proc_state = SPA_PROC_CREATED; 987219089Spjd while (spa->spa_proc_state == SPA_PROC_CREATED) { 988219089Spjd cv_wait(&spa->spa_proc_cv, 989219089Spjd &spa->spa_proc_lock); 990209962Smm } 991219089Spjd ASSERT(spa->spa_proc_state == SPA_PROC_ACTIVE); 992219089Spjd ASSERT(spa->spa_proc != &p0); 993219089Spjd ASSERT(spa->spa_did != 0); 994219089Spjd } else { 995219089Spjd#ifdef _KERNEL 996219089Spjd cmn_err(CE_WARN, 997219089Spjd "Couldn't create process for zfs pool \"%s\"\n", 998219089Spjd spa->spa_name); 999219089Spjd#endif 1000185029Spjd } 1001168404Spjd } 1002219089Spjd#endif /* SPA_PROCESS */ 1003219089Spjd mutex_exit(&spa->spa_proc_lock); 1004168404Spjd 1005219089Spjd /* If we didn't create a process, we need to create our taskqs. */ 1006219089Spjd ASSERT(spa->spa_proc == &p0); 1007219089Spjd if (spa->spa_proc == &p0) { 1008219089Spjd spa_create_zio_taskqs(spa); 1009219089Spjd } 1010219089Spjd 1011240868Spjd /* 1012240868Spjd * Start TRIM thread. 1013240868Spjd */ 1014240868Spjd trim_thread_create(spa); 1015240868Spjd 1016185029Spjd list_create(&spa->spa_config_dirty_list, sizeof (vdev_t), 1017185029Spjd offsetof(vdev_t, vdev_config_dirty_node)); 1018185029Spjd list_create(&spa->spa_state_dirty_list, sizeof (vdev_t), 1019185029Spjd offsetof(vdev_t, vdev_state_dirty_node)); 1020168404Spjd 1021168404Spjd txg_list_create(&spa->spa_vdev_txg_list, 1022168404Spjd offsetof(struct vdev, vdev_txg_node)); 1023168404Spjd 1024168404Spjd avl_create(&spa->spa_errlist_scrub, 1025168404Spjd spa_error_entry_compare, sizeof (spa_error_entry_t), 1026168404Spjd offsetof(spa_error_entry_t, se_avl)); 1027168404Spjd avl_create(&spa->spa_errlist_last, 1028168404Spjd spa_error_entry_compare, sizeof (spa_error_entry_t), 1029168404Spjd offsetof(spa_error_entry_t, se_avl)); 1030168404Spjd} 1031168404Spjd 1032168404Spjd/* 1033168404Spjd * Opposite of spa_activate(). 1034168404Spjd */ 1035168404Spjdstatic void 1036168404Spjdspa_deactivate(spa_t *spa) 1037168404Spjd{ 1038168404Spjd ASSERT(spa->spa_sync_on == B_FALSE); 1039168404Spjd ASSERT(spa->spa_dsl_pool == NULL); 1040168404Spjd ASSERT(spa->spa_root_vdev == NULL); 1041209962Smm ASSERT(spa->spa_async_zio_root == NULL); 1042168404Spjd ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED); 1043168404Spjd 1044240868Spjd /* 1045240868Spjd * Stop TRIM thread in case spa_unload() wasn't called directly 1046240868Spjd * before spa_deactivate(). 1047240868Spjd */ 1048240868Spjd trim_thread_destroy(spa); 1049240868Spjd 1050168404Spjd txg_list_destroy(&spa->spa_vdev_txg_list); 1051168404Spjd 1052185029Spjd list_destroy(&spa->spa_config_dirty_list); 1053185029Spjd list_destroy(&spa->spa_state_dirty_list); 1054168404Spjd 1055185029Spjd for (int t = 0; t < ZIO_TYPES; t++) { 1056185029Spjd for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { 1057211931Smm if (spa->spa_zio_taskq[t][q] != NULL) 1058211931Smm taskq_destroy(spa->spa_zio_taskq[t][q]); 1059185029Spjd spa->spa_zio_taskq[t][q] = NULL; 1060185029Spjd } 1061168404Spjd } 1062168404Spjd 1063168404Spjd metaslab_class_destroy(spa->spa_normal_class); 1064168404Spjd spa->spa_normal_class = NULL; 1065168404Spjd 1066185029Spjd metaslab_class_destroy(spa->spa_log_class); 1067185029Spjd spa->spa_log_class = NULL; 1068185029Spjd 1069168404Spjd /* 1070168404Spjd * If this was part of an import or the open otherwise failed, we may 1071168404Spjd * still have errors left in the queues. Empty them just in case. 1072168404Spjd */ 1073168404Spjd spa_errlog_drain(spa); 1074168404Spjd 1075168404Spjd avl_destroy(&spa->spa_errlist_scrub); 1076168404Spjd avl_destroy(&spa->spa_errlist_last); 1077168404Spjd 1078168404Spjd spa->spa_state = POOL_STATE_UNINITIALIZED; 1079219089Spjd 1080219089Spjd mutex_enter(&spa->spa_proc_lock); 1081219089Spjd if (spa->spa_proc_state != SPA_PROC_NONE) { 1082219089Spjd ASSERT(spa->spa_proc_state == SPA_PROC_ACTIVE); 1083219089Spjd spa->spa_proc_state = SPA_PROC_DEACTIVATE; 1084219089Spjd cv_broadcast(&spa->spa_proc_cv); 1085219089Spjd while (spa->spa_proc_state == SPA_PROC_DEACTIVATE) { 1086219089Spjd ASSERT(spa->spa_proc != &p0); 1087219089Spjd cv_wait(&spa->spa_proc_cv, &spa->spa_proc_lock); 1088219089Spjd } 1089219089Spjd ASSERT(spa->spa_proc_state == SPA_PROC_GONE); 1090219089Spjd spa->spa_proc_state = SPA_PROC_NONE; 1091219089Spjd } 1092219089Spjd ASSERT(spa->spa_proc == &p0); 1093219089Spjd mutex_exit(&spa->spa_proc_lock); 1094219089Spjd 1095219089Spjd#ifdef SPA_PROCESS 1096219089Spjd /* 1097219089Spjd * We want to make sure spa_thread() has actually exited the ZFS 1098219089Spjd * module, so that the module can't be unloaded out from underneath 1099219089Spjd * it. 1100219089Spjd */ 1101219089Spjd if (spa->spa_did != 0) { 1102219089Spjd thread_join(spa->spa_did); 1103219089Spjd spa->spa_did = 0; 1104219089Spjd } 1105219089Spjd#endif /* SPA_PROCESS */ 1106168404Spjd} 1107168404Spjd 1108168404Spjd/* 1109168404Spjd * Verify a pool configuration, and construct the vdev tree appropriately. This 1110168404Spjd * will create all the necessary vdevs in the appropriate layout, with each vdev 1111168404Spjd * in the CLOSED state. This will prep the pool before open/creation/import. 1112168404Spjd * All vdev validation is done by the vdev_alloc() routine. 1113168404Spjd */ 1114168404Spjdstatic int 1115168404Spjdspa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, 1116168404Spjd uint_t id, int atype) 1117168404Spjd{ 1118168404Spjd nvlist_t **child; 1119219089Spjd uint_t children; 1120168404Spjd int error; 1121168404Spjd 1122168404Spjd if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0) 1123168404Spjd return (error); 1124168404Spjd 1125168404Spjd if ((*vdp)->vdev_ops->vdev_op_leaf) 1126168404Spjd return (0); 1127168404Spjd 1128185029Spjd error = nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 1129185029Spjd &child, &children); 1130185029Spjd 1131185029Spjd if (error == ENOENT) 1132185029Spjd return (0); 1133185029Spjd 1134185029Spjd if (error) { 1135168404Spjd vdev_free(*vdp); 1136168404Spjd *vdp = NULL; 1137168404Spjd return (EINVAL); 1138168404Spjd } 1139168404Spjd 1140219089Spjd for (int c = 0; c < children; c++) { 1141168404Spjd vdev_t *vd; 1142168404Spjd if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c, 1143168404Spjd atype)) != 0) { 1144168404Spjd vdev_free(*vdp); 1145168404Spjd *vdp = NULL; 1146168404Spjd return (error); 1147168404Spjd } 1148168404Spjd } 1149168404Spjd 1150168404Spjd ASSERT(*vdp != NULL); 1151168404Spjd 1152168404Spjd return (0); 1153168404Spjd} 1154168404Spjd 1155168404Spjd/* 1156168404Spjd * Opposite of spa_load(). 1157168404Spjd */ 1158168404Spjdstatic void 1159168404Spjdspa_unload(spa_t *spa) 1160168404Spjd{ 1161168404Spjd int i; 1162168404Spjd 1163185029Spjd ASSERT(MUTEX_HELD(&spa_namespace_lock)); 1164185029Spjd 1165168404Spjd /* 1166240868Spjd * Stop TRIM thread. 1167240868Spjd */ 1168240868Spjd trim_thread_destroy(spa); 1169240868Spjd 1170240868Spjd /* 1171168404Spjd * Stop async tasks. 1172168404Spjd */ 1173168404Spjd spa_async_suspend(spa); 1174168404Spjd 1175168404Spjd /* 1176168404Spjd * Stop syncing. 1177168404Spjd */ 1178168404Spjd if (spa->spa_sync_on) { 1179168404Spjd txg_sync_stop(spa->spa_dsl_pool); 1180168404Spjd spa->spa_sync_on = B_FALSE; 1181168404Spjd } 1182168404Spjd 1183168404Spjd /* 1184185029Spjd * Wait for any outstanding async I/O to complete. 1185168404Spjd */ 1186209962Smm if (spa->spa_async_zio_root != NULL) { 1187209962Smm (void) zio_wait(spa->spa_async_zio_root); 1188209962Smm spa->spa_async_zio_root = NULL; 1189209962Smm } 1190168404Spjd 1191219089Spjd bpobj_close(&spa->spa_deferred_bpobj); 1192219089Spjd 1193168404Spjd /* 1194168404Spjd * Close the dsl pool. 1195168404Spjd */ 1196168404Spjd if (spa->spa_dsl_pool) { 1197168404Spjd dsl_pool_close(spa->spa_dsl_pool); 1198168404Spjd spa->spa_dsl_pool = NULL; 1199219089Spjd spa->spa_meta_objset = NULL; 1200168404Spjd } 1201168404Spjd 1202219089Spjd ddt_unload(spa); 1203219089Spjd 1204209962Smm spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 1205209962Smm 1206168404Spjd /* 1207209962Smm * Drop and purge level 2 cache 1208209962Smm */ 1209209962Smm spa_l2cache_drop(spa); 1210209962Smm 1211209962Smm /* 1212168404Spjd * Close all vdevs. 1213168404Spjd */ 1214168404Spjd if (spa->spa_root_vdev) 1215168404Spjd vdev_free(spa->spa_root_vdev); 1216168404Spjd ASSERT(spa->spa_root_vdev == NULL); 1217168404Spjd 1218185029Spjd for (i = 0; i < spa->spa_spares.sav_count; i++) 1219185029Spjd vdev_free(spa->spa_spares.sav_vdevs[i]); 1220185029Spjd if (spa->spa_spares.sav_vdevs) { 1221185029Spjd kmem_free(spa->spa_spares.sav_vdevs, 1222185029Spjd spa->spa_spares.sav_count * sizeof (void *)); 1223185029Spjd spa->spa_spares.sav_vdevs = NULL; 1224168404Spjd } 1225185029Spjd if (spa->spa_spares.sav_config) { 1226185029Spjd nvlist_free(spa->spa_spares.sav_config); 1227185029Spjd spa->spa_spares.sav_config = NULL; 1228168404Spjd } 1229185029Spjd spa->spa_spares.sav_count = 0; 1230168404Spjd 1231230514Smm for (i = 0; i < spa->spa_l2cache.sav_count; i++) { 1232230514Smm vdev_clear_stats(spa->spa_l2cache.sav_vdevs[i]); 1233185029Spjd vdev_free(spa->spa_l2cache.sav_vdevs[i]); 1234230514Smm } 1235185029Spjd if (spa->spa_l2cache.sav_vdevs) { 1236185029Spjd kmem_free(spa->spa_l2cache.sav_vdevs, 1237185029Spjd spa->spa_l2cache.sav_count * sizeof (void *)); 1238185029Spjd spa->spa_l2cache.sav_vdevs = NULL; 1239185029Spjd } 1240185029Spjd if (spa->spa_l2cache.sav_config) { 1241185029Spjd nvlist_free(spa->spa_l2cache.sav_config); 1242185029Spjd spa->spa_l2cache.sav_config = NULL; 1243185029Spjd } 1244185029Spjd spa->spa_l2cache.sav_count = 0; 1245185029Spjd 1246168404Spjd spa->spa_async_suspended = 0; 1247209962Smm 1248228103Smm if (spa->spa_comment != NULL) { 1249228103Smm spa_strfree(spa->spa_comment); 1250228103Smm spa->spa_comment = NULL; 1251228103Smm } 1252228103Smm 1253209962Smm spa_config_exit(spa, SCL_ALL, FTAG); 1254168404Spjd} 1255168404Spjd 1256168404Spjd/* 1257168404Spjd * Load (or re-load) the current list of vdevs describing the active spares for 1258168404Spjd * this pool. When this is called, we have some form of basic information in 1259185029Spjd * 'spa_spares.sav_config'. We parse this into vdevs, try to open them, and 1260185029Spjd * then re-generate a more complete list including status information. 1261168404Spjd */ 1262168404Spjdstatic void 1263168404Spjdspa_load_spares(spa_t *spa) 1264168404Spjd{ 1265168404Spjd nvlist_t **spares; 1266168404Spjd uint_t nspares; 1267168404Spjd int i; 1268168404Spjd vdev_t *vd, *tvd; 1269168404Spjd 1270185029Spjd ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 1271185029Spjd 1272168404Spjd /* 1273168404Spjd * First, close and free any existing spare vdevs. 1274168404Spjd */ 1275185029Spjd for (i = 0; i < spa->spa_spares.sav_count; i++) { 1276185029Spjd vd = spa->spa_spares.sav_vdevs[i]; 1277168404Spjd 1278168404Spjd /* Undo the call to spa_activate() below */ 1279185029Spjd if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, 1280185029Spjd B_FALSE)) != NULL && tvd->vdev_isspare) 1281168404Spjd spa_spare_remove(tvd); 1282168404Spjd vdev_close(vd); 1283168404Spjd vdev_free(vd); 1284168404Spjd } 1285168404Spjd 1286185029Spjd if (spa->spa_spares.sav_vdevs) 1287185029Spjd kmem_free(spa->spa_spares.sav_vdevs, 1288185029Spjd spa->spa_spares.sav_count * sizeof (void *)); 1289168404Spjd 1290185029Spjd if (spa->spa_spares.sav_config == NULL) 1291168404Spjd nspares = 0; 1292168404Spjd else 1293185029Spjd VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 1294168404Spjd ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 1295168404Spjd 1296185029Spjd spa->spa_spares.sav_count = (int)nspares; 1297185029Spjd spa->spa_spares.sav_vdevs = NULL; 1298168404Spjd 1299168404Spjd if (nspares == 0) 1300168404Spjd return; 1301168404Spjd 1302168404Spjd /* 1303168404Spjd * Construct the array of vdevs, opening them to get status in the 1304168404Spjd * process. For each spare, there is potentially two different vdev_t 1305168404Spjd * structures associated with it: one in the list of spares (used only 1306168404Spjd * for basic validation purposes) and one in the active vdev 1307168404Spjd * configuration (if it's spared in). During this phase we open and 1308168404Spjd * validate each vdev on the spare list. If the vdev also exists in the 1309168404Spjd * active configuration, then we also mark this vdev as an active spare. 1310168404Spjd */ 1311185029Spjd spa->spa_spares.sav_vdevs = kmem_alloc(nspares * sizeof (void *), 1312185029Spjd KM_SLEEP); 1313185029Spjd for (i = 0; i < spa->spa_spares.sav_count; i++) { 1314168404Spjd VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0, 1315168404Spjd VDEV_ALLOC_SPARE) == 0); 1316168404Spjd ASSERT(vd != NULL); 1317168404Spjd 1318185029Spjd spa->spa_spares.sav_vdevs[i] = vd; 1319168404Spjd 1320185029Spjd if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, 1321185029Spjd B_FALSE)) != NULL) { 1322168404Spjd if (!tvd->vdev_isspare) 1323168404Spjd spa_spare_add(tvd); 1324168404Spjd 1325168404Spjd /* 1326168404Spjd * We only mark the spare active if we were successfully 1327168404Spjd * able to load the vdev. Otherwise, importing a pool 1328168404Spjd * with a bad active spare would result in strange 1329168404Spjd * behavior, because multiple pool would think the spare 1330168404Spjd * is actively in use. 1331168404Spjd * 1332168404Spjd * There is a vulnerability here to an equally bizarre 1333168404Spjd * circumstance, where a dead active spare is later 1334168404Spjd * brought back to life (onlined or otherwise). Given 1335168404Spjd * the rarity of this scenario, and the extra complexity 1336168404Spjd * it adds, we ignore the possibility. 1337168404Spjd */ 1338168404Spjd if (!vdev_is_dead(tvd)) 1339168404Spjd spa_spare_activate(tvd); 1340168404Spjd } 1341168404Spjd 1342185029Spjd vd->vdev_top = vd; 1343209962Smm vd->vdev_aux = &spa->spa_spares; 1344185029Spjd 1345168404Spjd if (vdev_open(vd) != 0) 1346168404Spjd continue; 1347168404Spjd 1348185029Spjd if (vdev_validate_aux(vd) == 0) 1349185029Spjd spa_spare_add(vd); 1350168404Spjd } 1351168404Spjd 1352168404Spjd /* 1353168404Spjd * Recompute the stashed list of spares, with status information 1354168404Spjd * this time. 1355168404Spjd */ 1356185029Spjd VERIFY(nvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, 1357168404Spjd DATA_TYPE_NVLIST_ARRAY) == 0); 1358168404Spjd 1359185029Spjd spares = kmem_alloc(spa->spa_spares.sav_count * sizeof (void *), 1360185029Spjd KM_SLEEP); 1361185029Spjd for (i = 0; i < spa->spa_spares.sav_count; i++) 1362185029Spjd spares[i] = vdev_config_generate(spa, 1363219089Spjd spa->spa_spares.sav_vdevs[i], B_TRUE, VDEV_CONFIG_SPARE); 1364185029Spjd VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 1365185029Spjd ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count) == 0); 1366185029Spjd for (i = 0; i < spa->spa_spares.sav_count; i++) 1367168404Spjd nvlist_free(spares[i]); 1368185029Spjd kmem_free(spares, spa->spa_spares.sav_count * sizeof (void *)); 1369168404Spjd} 1370168404Spjd 1371185029Spjd/* 1372185029Spjd * Load (or re-load) the current list of vdevs describing the active l2cache for 1373185029Spjd * this pool. When this is called, we have some form of basic information in 1374185029Spjd * 'spa_l2cache.sav_config'. We parse this into vdevs, try to open them, and 1375185029Spjd * then re-generate a more complete list including status information. 1376185029Spjd * Devices which are already active have their details maintained, and are 1377185029Spjd * not re-opened. 1378185029Spjd */ 1379185029Spjdstatic void 1380185029Spjdspa_load_l2cache(spa_t *spa) 1381185029Spjd{ 1382185029Spjd nvlist_t **l2cache; 1383185029Spjd uint_t nl2cache; 1384185029Spjd int i, j, oldnvdevs; 1385219089Spjd uint64_t guid; 1386185029Spjd vdev_t *vd, **oldvdevs, **newvdevs; 1387185029Spjd spa_aux_vdev_t *sav = &spa->spa_l2cache; 1388185029Spjd 1389185029Spjd ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 1390185029Spjd 1391185029Spjd if (sav->sav_config != NULL) { 1392185029Spjd VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, 1393185029Spjd ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 1394185029Spjd newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP); 1395185029Spjd } else { 1396185029Spjd nl2cache = 0; 1397247187Smm newvdevs = NULL; 1398185029Spjd } 1399185029Spjd 1400185029Spjd oldvdevs = sav->sav_vdevs; 1401185029Spjd oldnvdevs = sav->sav_count; 1402185029Spjd sav->sav_vdevs = NULL; 1403185029Spjd sav->sav_count = 0; 1404185029Spjd 1405185029Spjd /* 1406185029Spjd * Process new nvlist of vdevs. 1407185029Spjd */ 1408185029Spjd for (i = 0; i < nl2cache; i++) { 1409185029Spjd VERIFY(nvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID, 1410185029Spjd &guid) == 0); 1411185029Spjd 1412185029Spjd newvdevs[i] = NULL; 1413185029Spjd for (j = 0; j < oldnvdevs; j++) { 1414185029Spjd vd = oldvdevs[j]; 1415185029Spjd if (vd != NULL && guid == vd->vdev_guid) { 1416185029Spjd /* 1417185029Spjd * Retain previous vdev for add/remove ops. 1418185029Spjd */ 1419185029Spjd newvdevs[i] = vd; 1420185029Spjd oldvdevs[j] = NULL; 1421185029Spjd break; 1422185029Spjd } 1423185029Spjd } 1424185029Spjd 1425185029Spjd if (newvdevs[i] == NULL) { 1426185029Spjd /* 1427185029Spjd * Create new vdev 1428185029Spjd */ 1429185029Spjd VERIFY(spa_config_parse(spa, &vd, l2cache[i], NULL, 0, 1430185029Spjd VDEV_ALLOC_L2CACHE) == 0); 1431185029Spjd ASSERT(vd != NULL); 1432185029Spjd newvdevs[i] = vd; 1433185029Spjd 1434185029Spjd /* 1435185029Spjd * Commit this vdev as an l2cache device, 1436185029Spjd * even if it fails to open. 1437185029Spjd */ 1438185029Spjd spa_l2cache_add(vd); 1439185029Spjd 1440185029Spjd vd->vdev_top = vd; 1441185029Spjd vd->vdev_aux = sav; 1442185029Spjd 1443185029Spjd spa_l2cache_activate(vd); 1444185029Spjd 1445185029Spjd if (vdev_open(vd) != 0) 1446185029Spjd continue; 1447185029Spjd 1448185029Spjd (void) vdev_validate_aux(vd); 1449185029Spjd 1450219089Spjd if (!vdev_is_dead(vd)) 1451219089Spjd l2arc_add_vdev(spa, vd); 1452185029Spjd } 1453185029Spjd } 1454185029Spjd 1455185029Spjd /* 1456185029Spjd * Purge vdevs that were dropped 1457185029Spjd */ 1458185029Spjd for (i = 0; i < oldnvdevs; i++) { 1459185029Spjd uint64_t pool; 1460185029Spjd 1461185029Spjd vd = oldvdevs[i]; 1462185029Spjd if (vd != NULL) { 1463230514Smm ASSERT(vd->vdev_isl2cache); 1464230514Smm 1465209962Smm if (spa_l2cache_exists(vd->vdev_guid, &pool) && 1466209962Smm pool != 0ULL && l2arc_vdev_present(vd)) 1467185029Spjd l2arc_remove_vdev(vd); 1468230514Smm vdev_clear_stats(vd); 1469230514Smm vdev_free(vd); 1470185029Spjd } 1471185029Spjd } 1472185029Spjd 1473185029Spjd if (oldvdevs) 1474185029Spjd kmem_free(oldvdevs, oldnvdevs * sizeof (void *)); 1475185029Spjd 1476185029Spjd if (sav->sav_config == NULL) 1477185029Spjd goto out; 1478185029Spjd 1479185029Spjd sav->sav_vdevs = newvdevs; 1480185029Spjd sav->sav_count = (int)nl2cache; 1481185029Spjd 1482185029Spjd /* 1483185029Spjd * Recompute the stashed list of l2cache devices, with status 1484185029Spjd * information this time. 1485185029Spjd */ 1486185029Spjd VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE, 1487185029Spjd DATA_TYPE_NVLIST_ARRAY) == 0); 1488185029Spjd 1489185029Spjd l2cache = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); 1490185029Spjd for (i = 0; i < sav->sav_count; i++) 1491185029Spjd l2cache[i] = vdev_config_generate(spa, 1492219089Spjd sav->sav_vdevs[i], B_TRUE, VDEV_CONFIG_L2CACHE); 1493185029Spjd VERIFY(nvlist_add_nvlist_array(sav->sav_config, 1494185029Spjd ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0); 1495185029Spjdout: 1496185029Spjd for (i = 0; i < sav->sav_count; i++) 1497185029Spjd nvlist_free(l2cache[i]); 1498185029Spjd if (sav->sav_count) 1499185029Spjd kmem_free(l2cache, sav->sav_count * sizeof (void *)); 1500185029Spjd} 1501185029Spjd 1502168404Spjdstatic int 1503168404Spjdload_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value) 1504168404Spjd{ 1505168404Spjd dmu_buf_t *db; 1506168404Spjd char *packed = NULL; 1507168404Spjd size_t nvsize = 0; 1508168404Spjd int error; 1509168404Spjd *value = NULL; 1510168404Spjd 1511168404Spjd VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); 1512168404Spjd nvsize = *(uint64_t *)db->db_data; 1513168404Spjd dmu_buf_rele(db, FTAG); 1514168404Spjd 1515168404Spjd packed = kmem_alloc(nvsize, KM_SLEEP); 1516209962Smm error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed, 1517209962Smm DMU_READ_PREFETCH); 1518168404Spjd if (error == 0) 1519168404Spjd error = nvlist_unpack(packed, nvsize, value, 0); 1520168404Spjd kmem_free(packed, nvsize); 1521168404Spjd 1522168404Spjd return (error); 1523168404Spjd} 1524168404Spjd 1525168404Spjd/* 1526185029Spjd * Checks to see if the given vdev could not be opened, in which case we post a 1527185029Spjd * sysevent to notify the autoreplace code that the device has been removed. 1528185029Spjd */ 1529185029Spjdstatic void 1530185029Spjdspa_check_removed(vdev_t *vd) 1531185029Spjd{ 1532219089Spjd for (int c = 0; c < vd->vdev_children; c++) 1533185029Spjd spa_check_removed(vd->vdev_child[c]); 1534185029Spjd 1535185029Spjd if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd)) { 1536185029Spjd zfs_post_autoreplace(vd->vdev_spa, vd); 1537185029Spjd spa_event_notify(vd->vdev_spa, vd, ESC_ZFS_VDEV_CHECK); 1538185029Spjd } 1539185029Spjd} 1540185029Spjd 1541185029Spjd/* 1542219089Spjd * Validate the current config against the MOS config 1543213197Smm */ 1544219089Spjdstatic boolean_t 1545219089Spjdspa_config_valid(spa_t *spa, nvlist_t *config) 1546213197Smm{ 1547219089Spjd vdev_t *mrvd, *rvd = spa->spa_root_vdev; 1548219089Spjd nvlist_t *nv; 1549213197Smm 1550219089Spjd VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nv) == 0); 1551213197Smm 1552219089Spjd spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 1553219089Spjd VERIFY(spa_config_parse(spa, &mrvd, nv, NULL, 0, VDEV_ALLOC_LOAD) == 0); 1554219089Spjd 1555219089Spjd ASSERT3U(rvd->vdev_children, ==, mrvd->vdev_children); 1556219089Spjd 1557219089Spjd /* 1558219089Spjd * If we're doing a normal import, then build up any additional 1559219089Spjd * diagnostic information about missing devices in this config. 1560219089Spjd * We'll pass this up to the user for further processing. 1561219089Spjd */ 1562219089Spjd if (!(spa->spa_import_flags & ZFS_IMPORT_MISSING_LOG)) { 1563219089Spjd nvlist_t **child, *nv; 1564219089Spjd uint64_t idx = 0; 1565219089Spjd 1566219089Spjd child = kmem_alloc(rvd->vdev_children * sizeof (nvlist_t **), 1567219089Spjd KM_SLEEP); 1568219089Spjd VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0); 1569219089Spjd 1570219089Spjd for (int c = 0; c < rvd->vdev_children; c++) { 1571219089Spjd vdev_t *tvd = rvd->vdev_child[c]; 1572219089Spjd vdev_t *mtvd = mrvd->vdev_child[c]; 1573219089Spjd 1574219089Spjd if (tvd->vdev_ops == &vdev_missing_ops && 1575219089Spjd mtvd->vdev_ops != &vdev_missing_ops && 1576219089Spjd mtvd->vdev_islog) 1577219089Spjd child[idx++] = vdev_config_generate(spa, mtvd, 1578219089Spjd B_FALSE, 0); 1579219089Spjd } 1580219089Spjd 1581219089Spjd if (idx) { 1582219089Spjd VERIFY(nvlist_add_nvlist_array(nv, 1583219089Spjd ZPOOL_CONFIG_CHILDREN, child, idx) == 0); 1584219089Spjd VERIFY(nvlist_add_nvlist(spa->spa_load_info, 1585219089Spjd ZPOOL_CONFIG_MISSING_DEVICES, nv) == 0); 1586219089Spjd 1587219089Spjd for (int i = 0; i < idx; i++) 1588219089Spjd nvlist_free(child[i]); 1589219089Spjd } 1590219089Spjd nvlist_free(nv); 1591219089Spjd kmem_free(child, rvd->vdev_children * sizeof (char **)); 1592219089Spjd } 1593219089Spjd 1594219089Spjd /* 1595219089Spjd * Compare the root vdev tree with the information we have 1596219089Spjd * from the MOS config (mrvd). Check each top-level vdev 1597219089Spjd * with the corresponding MOS config top-level (mtvd). 1598219089Spjd */ 1599219089Spjd for (int c = 0; c < rvd->vdev_children; c++) { 1600213197Smm vdev_t *tvd = rvd->vdev_child[c]; 1601219089Spjd vdev_t *mtvd = mrvd->vdev_child[c]; 1602213197Smm 1603219089Spjd /* 1604219089Spjd * Resolve any "missing" vdevs in the current configuration. 1605219089Spjd * If we find that the MOS config has more accurate information 1606219089Spjd * about the top-level vdev then use that vdev instead. 1607219089Spjd */ 1608219089Spjd if (tvd->vdev_ops == &vdev_missing_ops && 1609219089Spjd mtvd->vdev_ops != &vdev_missing_ops) { 1610219089Spjd 1611219089Spjd if (!(spa->spa_import_flags & ZFS_IMPORT_MISSING_LOG)) 1612219089Spjd continue; 1613219089Spjd 1614219089Spjd /* 1615219089Spjd * Device specific actions. 1616219089Spjd */ 1617219089Spjd if (mtvd->vdev_islog) { 1618219089Spjd spa_set_log_state(spa, SPA_LOG_CLEAR); 1619219089Spjd } else { 1620219089Spjd /* 1621219089Spjd * XXX - once we have 'readonly' pool 1622219089Spjd * support we should be able to handle 1623219089Spjd * missing data devices by transitioning 1624219089Spjd * the pool to readonly. 1625219089Spjd */ 1626219089Spjd continue; 1627219089Spjd } 1628219089Spjd 1629219089Spjd /* 1630219089Spjd * Swap the missing vdev with the data we were 1631219089Spjd * able to obtain from the MOS config. 1632219089Spjd */ 1633219089Spjd vdev_remove_child(rvd, tvd); 1634219089Spjd vdev_remove_child(mrvd, mtvd); 1635219089Spjd 1636219089Spjd vdev_add_child(rvd, mtvd); 1637219089Spjd vdev_add_child(mrvd, tvd); 1638219089Spjd 1639219089Spjd spa_config_exit(spa, SCL_ALL, FTAG); 1640219089Spjd vdev_load(mtvd); 1641219089Spjd spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 1642219089Spjd 1643219089Spjd vdev_reopen(rvd); 1644219089Spjd } else if (mtvd->vdev_islog) { 1645219089Spjd /* 1646219089Spjd * Load the slog device's state from the MOS config 1647219089Spjd * since it's possible that the label does not 1648219089Spjd * contain the most up-to-date information. 1649219089Spjd */ 1650219089Spjd vdev_load_log_state(tvd, mtvd); 1651219089Spjd vdev_reopen(tvd); 1652219089Spjd } 1653213197Smm } 1654219089Spjd vdev_free(mrvd); 1655219089Spjd spa_config_exit(spa, SCL_ALL, FTAG); 1656219089Spjd 1657219089Spjd /* 1658219089Spjd * Ensure we were able to validate the config. 1659219089Spjd */ 1660219089Spjd return (rvd->vdev_guid_sum == spa->spa_uberblock.ub_guid_sum); 1661213197Smm} 1662213197Smm 1663213197Smm/* 1664185029Spjd * Check for missing log devices 1665185029Spjd */ 1666219089Spjdstatic int 1667185029Spjdspa_check_logs(spa_t *spa) 1668185029Spjd{ 1669185029Spjd switch (spa->spa_log_state) { 1670185029Spjd case SPA_LOG_MISSING: 1671185029Spjd /* need to recheck in case slog has been restored */ 1672185029Spjd case SPA_LOG_UNKNOWN: 1673185029Spjd if (dmu_objset_find(spa->spa_name, zil_check_log_chain, NULL, 1674185029Spjd DS_FIND_CHILDREN)) { 1675219089Spjd spa_set_log_state(spa, SPA_LOG_MISSING); 1676185029Spjd return (1); 1677185029Spjd } 1678185029Spjd break; 1679185029Spjd } 1680185029Spjd return (0); 1681185029Spjd} 1682185029Spjd 1683219089Spjdstatic boolean_t 1684219089Spjdspa_passivate_log(spa_t *spa) 1685219089Spjd{ 1686219089Spjd vdev_t *rvd = spa->spa_root_vdev; 1687219089Spjd boolean_t slog_found = B_FALSE; 1688219089Spjd 1689219089Spjd ASSERT(spa_config_held(spa, SCL_ALLOC, RW_WRITER)); 1690219089Spjd 1691219089Spjd if (!spa_has_slogs(spa)) 1692219089Spjd return (B_FALSE); 1693219089Spjd 1694219089Spjd for (int c = 0; c < rvd->vdev_children; c++) { 1695219089Spjd vdev_t *tvd = rvd->vdev_child[c]; 1696219089Spjd metaslab_group_t *mg = tvd->vdev_mg; 1697219089Spjd 1698219089Spjd if (tvd->vdev_islog) { 1699219089Spjd metaslab_group_passivate(mg); 1700219089Spjd slog_found = B_TRUE; 1701219089Spjd } 1702219089Spjd } 1703219089Spjd 1704219089Spjd return (slog_found); 1705219089Spjd} 1706219089Spjd 1707219089Spjdstatic void 1708219089Spjdspa_activate_log(spa_t *spa) 1709219089Spjd{ 1710219089Spjd vdev_t *rvd = spa->spa_root_vdev; 1711219089Spjd 1712219089Spjd ASSERT(spa_config_held(spa, SCL_ALLOC, RW_WRITER)); 1713219089Spjd 1714219089Spjd for (int c = 0; c < rvd->vdev_children; c++) { 1715219089Spjd vdev_t *tvd = rvd->vdev_child[c]; 1716219089Spjd metaslab_group_t *mg = tvd->vdev_mg; 1717219089Spjd 1718219089Spjd if (tvd->vdev_islog) 1719219089Spjd metaslab_group_activate(mg); 1720219089Spjd } 1721219089Spjd} 1722219089Spjd 1723219089Spjdint 1724219089Spjdspa_offline_log(spa_t *spa) 1725219089Spjd{ 1726219089Spjd int error = 0; 1727219089Spjd 1728219089Spjd if ((error = dmu_objset_find(spa_name(spa), zil_vdev_offline, 1729219089Spjd NULL, DS_FIND_CHILDREN)) == 0) { 1730219089Spjd 1731219089Spjd /* 1732219089Spjd * We successfully offlined the log device, sync out the 1733219089Spjd * current txg so that the "stubby" block can be removed 1734219089Spjd * by zil_sync(). 1735219089Spjd */ 1736219089Spjd txg_wait_synced(spa->spa_dsl_pool, 0); 1737219089Spjd } 1738219089Spjd return (error); 1739219089Spjd} 1740219089Spjd 1741219089Spjdstatic void 1742219089Spjdspa_aux_check_removed(spa_aux_vdev_t *sav) 1743219089Spjd{ 1744219089Spjd int i; 1745219089Spjd 1746219089Spjd for (i = 0; i < sav->sav_count; i++) 1747219089Spjd spa_check_removed(sav->sav_vdevs[i]); 1748219089Spjd} 1749219089Spjd 1750219089Spjdvoid 1751219089Spjdspa_claim_notify(zio_t *zio) 1752219089Spjd{ 1753219089Spjd spa_t *spa = zio->io_spa; 1754219089Spjd 1755219089Spjd if (zio->io_error) 1756219089Spjd return; 1757219089Spjd 1758219089Spjd mutex_enter(&spa->spa_props_lock); /* any mutex will do */ 1759219089Spjd if (spa->spa_claim_max_txg < zio->io_bp->blk_birth) 1760219089Spjd spa->spa_claim_max_txg = zio->io_bp->blk_birth; 1761219089Spjd mutex_exit(&spa->spa_props_lock); 1762219089Spjd} 1763219089Spjd 1764219089Spjdtypedef struct spa_load_error { 1765219089Spjd uint64_t sle_meta_count; 1766219089Spjd uint64_t sle_data_count; 1767219089Spjd} spa_load_error_t; 1768219089Spjd 1769219089Spjdstatic void 1770219089Spjdspa_load_verify_done(zio_t *zio) 1771219089Spjd{ 1772219089Spjd blkptr_t *bp = zio->io_bp; 1773219089Spjd spa_load_error_t *sle = zio->io_private; 1774219089Spjd dmu_object_type_t type = BP_GET_TYPE(bp); 1775219089Spjd int error = zio->io_error; 1776219089Spjd 1777219089Spjd if (error) { 1778236884Smm if ((BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type)) && 1779219089Spjd type != DMU_OT_INTENT_LOG) 1780219089Spjd atomic_add_64(&sle->sle_meta_count, 1); 1781219089Spjd else 1782219089Spjd atomic_add_64(&sle->sle_data_count, 1); 1783219089Spjd } 1784219089Spjd zio_data_buf_free(zio->io_data, zio->io_size); 1785219089Spjd} 1786219089Spjd 1787219089Spjd/*ARGSUSED*/ 1788219089Spjdstatic int 1789219089Spjdspa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, 1790246666Smm const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) 1791219089Spjd{ 1792219089Spjd if (bp != NULL) { 1793219089Spjd zio_t *rio = arg; 1794219089Spjd size_t size = BP_GET_PSIZE(bp); 1795219089Spjd void *data = zio_data_buf_alloc(size); 1796219089Spjd 1797219089Spjd zio_nowait(zio_read(rio, spa, bp, data, size, 1798219089Spjd spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB, 1799219089Spjd ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CANFAIL | 1800219089Spjd ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb)); 1801219089Spjd } 1802219089Spjd return (0); 1803219089Spjd} 1804219089Spjd 1805219089Spjdstatic int 1806219089Spjdspa_load_verify(spa_t *spa) 1807219089Spjd{ 1808219089Spjd zio_t *rio; 1809219089Spjd spa_load_error_t sle = { 0 }; 1810219089Spjd zpool_rewind_policy_t policy; 1811219089Spjd boolean_t verify_ok = B_FALSE; 1812219089Spjd int error; 1813219089Spjd 1814219089Spjd zpool_get_rewind_policy(spa->spa_config, &policy); 1815219089Spjd 1816219089Spjd if (policy.zrp_request & ZPOOL_NEVER_REWIND) 1817219089Spjd return (0); 1818219089Spjd 1819219089Spjd rio = zio_root(spa, NULL, &sle, 1820219089Spjd ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE); 1821219089Spjd 1822219089Spjd error = traverse_pool(spa, spa->spa_verify_min_txg, 1823219089Spjd TRAVERSE_PRE | TRAVERSE_PREFETCH, spa_load_verify_cb, rio); 1824219089Spjd 1825219089Spjd (void) zio_wait(rio); 1826219089Spjd 1827219089Spjd spa->spa_load_meta_errors = sle.sle_meta_count; 1828219089Spjd spa->spa_load_data_errors = sle.sle_data_count; 1829219089Spjd 1830219089Spjd if (!error && sle.sle_meta_count <= policy.zrp_maxmeta && 1831219089Spjd sle.sle_data_count <= policy.zrp_maxdata) { 1832219089Spjd int64_t loss = 0; 1833219089Spjd 1834219089Spjd verify_ok = B_TRUE; 1835219089Spjd spa->spa_load_txg = spa->spa_uberblock.ub_txg; 1836219089Spjd spa->spa_load_txg_ts = spa->spa_uberblock.ub_timestamp; 1837219089Spjd 1838219089Spjd loss = spa->spa_last_ubsync_txg_ts - spa->spa_load_txg_ts; 1839219089Spjd VERIFY(nvlist_add_uint64(spa->spa_load_info, 1840219089Spjd ZPOOL_CONFIG_LOAD_TIME, spa->spa_load_txg_ts) == 0); 1841219089Spjd VERIFY(nvlist_add_int64(spa->spa_load_info, 1842219089Spjd ZPOOL_CONFIG_REWIND_TIME, loss) == 0); 1843219089Spjd VERIFY(nvlist_add_uint64(spa->spa_load_info, 1844219089Spjd ZPOOL_CONFIG_LOAD_DATA_ERRORS, sle.sle_data_count) == 0); 1845219089Spjd } else { 1846219089Spjd spa->spa_load_max_txg = spa->spa_uberblock.ub_txg; 1847219089Spjd } 1848219089Spjd 1849219089Spjd if (error) { 1850219089Spjd if (error != ENXIO && error != EIO) 1851219089Spjd error = EIO; 1852219089Spjd return (error); 1853219089Spjd } 1854219089Spjd 1855219089Spjd return (verify_ok ? 0 : EIO); 1856219089Spjd} 1857219089Spjd 1858185029Spjd/* 1859219089Spjd * Find a value in the pool props object. 1860168404Spjd */ 1861219089Spjdstatic void 1862219089Spjdspa_prop_find(spa_t *spa, zpool_prop_t prop, uint64_t *val) 1863219089Spjd{ 1864219089Spjd (void) zap_lookup(spa->spa_meta_objset, spa->spa_pool_props_object, 1865219089Spjd zpool_prop_to_name(prop), sizeof (uint64_t), 1, val); 1866219089Spjd} 1867219089Spjd 1868219089Spjd/* 1869219089Spjd * Find a value in the pool directory object. 1870219089Spjd */ 1871168404Spjdstatic int 1872219089Spjdspa_dir_prop(spa_t *spa, const char *name, uint64_t *val) 1873168404Spjd{ 1874219089Spjd return (zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 1875219089Spjd name, sizeof (uint64_t), 1, val)); 1876219089Spjd} 1877168404Spjd 1878219089Spjdstatic int 1879219089Spjdspa_vdev_err(vdev_t *vdev, vdev_aux_t aux, int err) 1880219089Spjd{ 1881219089Spjd vdev_set_state(vdev, B_TRUE, VDEV_STATE_CANT_OPEN, aux); 1882219089Spjd return (err); 1883219089Spjd} 1884219089Spjd 1885219089Spjd/* 1886219089Spjd * Fix up config after a partly-completed split. This is done with the 1887219089Spjd * ZPOOL_CONFIG_SPLIT nvlist. Both the splitting pool and the split-off 1888219089Spjd * pool have that entry in their config, but only the splitting one contains 1889219089Spjd * a list of all the guids of the vdevs that are being split off. 1890219089Spjd * 1891219089Spjd * This function determines what to do with that list: either rejoin 1892219089Spjd * all the disks to the pool, or complete the splitting process. To attempt 1893219089Spjd * the rejoin, each disk that is offlined is marked online again, and 1894219089Spjd * we do a reopen() call. If the vdev label for every disk that was 1895219089Spjd * marked online indicates it was successfully split off (VDEV_AUX_SPLIT_POOL) 1896219089Spjd * then we call vdev_split() on each disk, and complete the split. 1897219089Spjd * 1898219089Spjd * Otherwise we leave the config alone, with all the vdevs in place in 1899219089Spjd * the original pool. 1900219089Spjd */ 1901219089Spjdstatic void 1902219089Spjdspa_try_repair(spa_t *spa, nvlist_t *config) 1903219089Spjd{ 1904219089Spjd uint_t extracted; 1905219089Spjd uint64_t *glist; 1906219089Spjd uint_t i, gcount; 1907219089Spjd nvlist_t *nvl; 1908219089Spjd vdev_t **vd; 1909219089Spjd boolean_t attempt_reopen; 1910219089Spjd 1911219089Spjd if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_SPLIT, &nvl) != 0) 1912219089Spjd return; 1913219089Spjd 1914219089Spjd /* check that the config is complete */ 1915219089Spjd if (nvlist_lookup_uint64_array(nvl, ZPOOL_CONFIG_SPLIT_LIST, 1916219089Spjd &glist, &gcount) != 0) 1917219089Spjd return; 1918219089Spjd 1919219089Spjd vd = kmem_zalloc(gcount * sizeof (vdev_t *), KM_SLEEP); 1920219089Spjd 1921219089Spjd /* attempt to online all the vdevs & validate */ 1922219089Spjd attempt_reopen = B_TRUE; 1923219089Spjd for (i = 0; i < gcount; i++) { 1924219089Spjd if (glist[i] == 0) /* vdev is hole */ 1925219089Spjd continue; 1926219089Spjd 1927219089Spjd vd[i] = spa_lookup_by_guid(spa, glist[i], B_FALSE); 1928219089Spjd if (vd[i] == NULL) { 1929219089Spjd /* 1930219089Spjd * Don't bother attempting to reopen the disks; 1931219089Spjd * just do the split. 1932219089Spjd */ 1933219089Spjd attempt_reopen = B_FALSE; 1934219089Spjd } else { 1935219089Spjd /* attempt to re-online it */ 1936219089Spjd vd[i]->vdev_offline = B_FALSE; 1937219089Spjd } 1938219089Spjd } 1939219089Spjd 1940219089Spjd if (attempt_reopen) { 1941219089Spjd vdev_reopen(spa->spa_root_vdev); 1942219089Spjd 1943219089Spjd /* check each device to see what state it's in */ 1944219089Spjd for (extracted = 0, i = 0; i < gcount; i++) { 1945219089Spjd if (vd[i] != NULL && 1946219089Spjd vd[i]->vdev_stat.vs_aux != VDEV_AUX_SPLIT_POOL) 1947219089Spjd break; 1948219089Spjd ++extracted; 1949219089Spjd } 1950219089Spjd } 1951219089Spjd 1952209962Smm /* 1953219089Spjd * If every disk has been moved to the new pool, or if we never 1954219089Spjd * even attempted to look at them, then we split them off for 1955219089Spjd * good. 1956209962Smm */ 1957219089Spjd if (!attempt_reopen || gcount == extracted) { 1958219089Spjd for (i = 0; i < gcount; i++) 1959219089Spjd if (vd[i] != NULL) 1960219089Spjd vdev_split(vd[i]); 1961219089Spjd vdev_reopen(spa->spa_root_vdev); 1962219089Spjd } 1963209962Smm 1964219089Spjd kmem_free(vd, gcount * sizeof (vdev_t *)); 1965219089Spjd} 1966185029Spjd 1967219089Spjdstatic int 1968219089Spjdspa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type, 1969219089Spjd boolean_t mosconfig) 1970219089Spjd{ 1971219089Spjd nvlist_t *config = spa->spa_config; 1972219089Spjd char *ereport = FM_EREPORT_ZFS_POOL; 1973228103Smm char *comment; 1974219089Spjd int error; 1975219089Spjd uint64_t pool_guid; 1976219089Spjd nvlist_t *nvl; 1977168404Spjd 1978219089Spjd if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) 1979219089Spjd return (EINVAL); 1980168404Spjd 1981228103Smm ASSERT(spa->spa_comment == NULL); 1982228103Smm if (nvlist_lookup_string(config, ZPOOL_CONFIG_COMMENT, &comment) == 0) 1983228103Smm spa->spa_comment = spa_strdup(comment); 1984228103Smm 1985168404Spjd /* 1986168404Spjd * Versioning wasn't explicitly added to the label until later, so if 1987168404Spjd * it's not present treat it as the initial version. 1988168404Spjd */ 1989219089Spjd if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, 1990219089Spjd &spa->spa_ubsync.ub_version) != 0) 1991219089Spjd spa->spa_ubsync.ub_version = SPA_VERSION_INITIAL; 1992168404Spjd 1993168404Spjd (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, 1994168404Spjd &spa->spa_config_txg); 1995168404Spjd 1996168404Spjd if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) && 1997168404Spjd spa_guid_exists(pool_guid, 0)) { 1998168404Spjd error = EEXIST; 1999219089Spjd } else { 2000228103Smm spa->spa_config_guid = pool_guid; 2001219089Spjd 2002219089Spjd if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_SPLIT, 2003219089Spjd &nvl) == 0) { 2004219089Spjd VERIFY(nvlist_dup(nvl, &spa->spa_config_splitting, 2005219089Spjd KM_SLEEP) == 0); 2006219089Spjd } 2007219089Spjd 2008236884Smm nvlist_free(spa->spa_load_info); 2009236884Smm spa->spa_load_info = fnvlist_alloc(); 2010236884Smm 2011219089Spjd gethrestime(&spa->spa_loaded_ts); 2012219089Spjd error = spa_load_impl(spa, pool_guid, config, state, type, 2013219089Spjd mosconfig, &ereport); 2014168404Spjd } 2015168404Spjd 2016219089Spjd spa->spa_minref = refcount_count(&spa->spa_refcount); 2017219089Spjd if (error) { 2018219089Spjd if (error != EEXIST) { 2019219089Spjd spa->spa_loaded_ts.tv_sec = 0; 2020219089Spjd spa->spa_loaded_ts.tv_nsec = 0; 2021219089Spjd } 2022219089Spjd if (error != EBADF) { 2023219089Spjd zfs_ereport_post(ereport, spa, NULL, NULL, 0, 0); 2024219089Spjd } 2025219089Spjd } 2026219089Spjd spa->spa_load_state = error ? SPA_LOAD_ERROR : SPA_LOAD_NONE; 2027219089Spjd spa->spa_ena = 0; 2028168404Spjd 2029219089Spjd return (error); 2030219089Spjd} 2031219089Spjd 2032219089Spjd/* 2033219089Spjd * Load an existing storage pool, using the pool's builtin spa_config as a 2034219089Spjd * source of configuration information. 2035219089Spjd */ 2036219089Spjdstatic int 2037219089Spjdspa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config, 2038219089Spjd spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig, 2039219089Spjd char **ereport) 2040219089Spjd{ 2041219089Spjd int error = 0; 2042219089Spjd nvlist_t *nvroot = NULL; 2043236884Smm nvlist_t *label; 2044219089Spjd vdev_t *rvd; 2045219089Spjd uberblock_t *ub = &spa->spa_uberblock; 2046219089Spjd uint64_t children, config_cache_txg = spa->spa_config_txg; 2047219089Spjd int orig_mode = spa->spa_mode; 2048219089Spjd int parse; 2049219089Spjd uint64_t obj; 2050236884Smm boolean_t missing_feat_write = B_FALSE; 2051219089Spjd 2052168404Spjd /* 2053219089Spjd * If this is an untrusted config, access the pool in read-only mode. 2054219089Spjd * This prevents things like resilvering recently removed devices. 2055219089Spjd */ 2056219089Spjd if (!mosconfig) 2057219089Spjd spa->spa_mode = FREAD; 2058219089Spjd 2059219089Spjd ASSERT(MUTEX_HELD(&spa_namespace_lock)); 2060219089Spjd 2061219089Spjd spa->spa_load_state = state; 2062219089Spjd 2063219089Spjd if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot)) 2064219089Spjd return (EINVAL); 2065219089Spjd 2066219089Spjd parse = (type == SPA_IMPORT_EXISTING ? 2067219089Spjd VDEV_ALLOC_LOAD : VDEV_ALLOC_SPLIT); 2068219089Spjd 2069219089Spjd /* 2070209962Smm * Create "The Godfather" zio to hold all async IOs 2071209962Smm */ 2072209962Smm spa->spa_async_zio_root = zio_root(spa, NULL, NULL, 2073209962Smm ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER); 2074209962Smm 2075209962Smm /* 2076168404Spjd * Parse the configuration into a vdev tree. We explicitly set the 2077168404Spjd * value that will be returned by spa_version() since parsing the 2078168404Spjd * configuration requires knowing the version number. 2079168404Spjd */ 2080185029Spjd spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 2081219089Spjd error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, parse); 2082185029Spjd spa_config_exit(spa, SCL_ALL, FTAG); 2083168404Spjd 2084168404Spjd if (error != 0) 2085219089Spjd return (error); 2086168404Spjd 2087168404Spjd ASSERT(spa->spa_root_vdev == rvd); 2088168404Spjd 2089219089Spjd if (type != SPA_IMPORT_ASSEMBLE) { 2090219089Spjd ASSERT(spa_guid(spa) == pool_guid); 2091219089Spjd } 2092219089Spjd 2093168404Spjd /* 2094168404Spjd * Try to open all vdevs, loading each label in the process. 2095168404Spjd */ 2096185029Spjd spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 2097168926Spjd error = vdev_open(rvd); 2098185029Spjd spa_config_exit(spa, SCL_ALL, FTAG); 2099168926Spjd if (error != 0) 2100219089Spjd return (error); 2101168404Spjd 2102168404Spjd /* 2103209962Smm * We need to validate the vdev labels against the configuration that 2104209962Smm * we have in hand, which is dependent on the setting of mosconfig. If 2105209962Smm * mosconfig is true then we're validating the vdev labels based on 2106219089Spjd * that config. Otherwise, we're validating against the cached config 2107209962Smm * (zpool.cache) that was read when we loaded the zfs module, and then 2108209962Smm * later we will recursively call spa_load() and validate against 2109209962Smm * the vdev config. 2110219089Spjd * 2111219089Spjd * If we're assembling a new pool that's been split off from an 2112219089Spjd * existing pool, the labels haven't yet been updated so we skip 2113219089Spjd * validation for now. 2114168404Spjd */ 2115219089Spjd if (type != SPA_IMPORT_ASSEMBLE) { 2116219089Spjd spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 2117230514Smm error = vdev_validate(rvd, mosconfig); 2118219089Spjd spa_config_exit(spa, SCL_ALL, FTAG); 2119168404Spjd 2120219089Spjd if (error != 0) 2121219089Spjd return (error); 2122219089Spjd 2123219089Spjd if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) 2124219089Spjd return (ENXIO); 2125168404Spjd } 2126168404Spjd 2127168404Spjd /* 2128168404Spjd * Find the best uberblock. 2129168404Spjd */ 2130236884Smm vdev_uberblock_load(rvd, ub, &label); 2131168404Spjd 2132168404Spjd /* 2133168404Spjd * If we weren't able to find a single valid uberblock, return failure. 2134168404Spjd */ 2135236884Smm if (ub->ub_txg == 0) { 2136236884Smm nvlist_free(label); 2137219089Spjd return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, ENXIO)); 2138236884Smm } 2139168404Spjd 2140168404Spjd /* 2141236884Smm * If the pool has an unsupported version we can't open it. 2142168404Spjd */ 2143236884Smm if (!SPA_VERSION_IS_SUPPORTED(ub->ub_version)) { 2144236884Smm nvlist_free(label); 2145219089Spjd return (spa_vdev_err(rvd, VDEV_AUX_VERSION_NEWER, ENOTSUP)); 2146236884Smm } 2147168404Spjd 2148236884Smm if (ub->ub_version >= SPA_VERSION_FEATURES) { 2149236884Smm nvlist_t *features; 2150236884Smm 2151236884Smm /* 2152236884Smm * If we weren't able to find what's necessary for reading the 2153236884Smm * MOS in the label, return failure. 2154236884Smm */ 2155236884Smm if (label == NULL || nvlist_lookup_nvlist(label, 2156236884Smm ZPOOL_CONFIG_FEATURES_FOR_READ, &features) != 0) { 2157236884Smm nvlist_free(label); 2158236884Smm return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, 2159236884Smm ENXIO)); 2160236884Smm } 2161236884Smm 2162236884Smm /* 2163236884Smm * Update our in-core representation with the definitive values 2164236884Smm * from the label. 2165236884Smm */ 2166236884Smm nvlist_free(spa->spa_label_features); 2167236884Smm VERIFY(nvlist_dup(features, &spa->spa_label_features, 0) == 0); 2168236884Smm } 2169236884Smm 2170236884Smm nvlist_free(label); 2171236884Smm 2172168404Spjd /* 2173236884Smm * Look through entries in the label nvlist's features_for_read. If 2174236884Smm * there is a feature listed there which we don't understand then we 2175236884Smm * cannot open a pool. 2176236884Smm */ 2177236884Smm if (ub->ub_version >= SPA_VERSION_FEATURES) { 2178236884Smm nvlist_t *unsup_feat; 2179236884Smm 2180236884Smm VERIFY(nvlist_alloc(&unsup_feat, NV_UNIQUE_NAME, KM_SLEEP) == 2181236884Smm 0); 2182236884Smm 2183236884Smm for (nvpair_t *nvp = nvlist_next_nvpair(spa->spa_label_features, 2184236884Smm NULL); nvp != NULL; 2185236884Smm nvp = nvlist_next_nvpair(spa->spa_label_features, nvp)) { 2186236884Smm if (!zfeature_is_supported(nvpair_name(nvp))) { 2187236884Smm VERIFY(nvlist_add_string(unsup_feat, 2188236884Smm nvpair_name(nvp), "") == 0); 2189236884Smm } 2190236884Smm } 2191236884Smm 2192236884Smm if (!nvlist_empty(unsup_feat)) { 2193236884Smm VERIFY(nvlist_add_nvlist(spa->spa_load_info, 2194236884Smm ZPOOL_CONFIG_UNSUP_FEAT, unsup_feat) == 0); 2195236884Smm nvlist_free(unsup_feat); 2196236884Smm return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT, 2197236884Smm ENOTSUP)); 2198236884Smm } 2199236884Smm 2200236884Smm nvlist_free(unsup_feat); 2201236884Smm } 2202236884Smm 2203236884Smm /* 2204168404Spjd * If the vdev guid sum doesn't match the uberblock, we have an 2205219089Spjd * incomplete configuration. We first check to see if the pool 2206219089Spjd * is aware of the complete config (i.e ZPOOL_CONFIG_VDEV_CHILDREN). 2207219089Spjd * If it is, defer the vdev_guid_sum check till later so we 2208219089Spjd * can handle missing vdevs. 2209168404Spjd */ 2210219089Spjd if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VDEV_CHILDREN, 2211219089Spjd &children) != 0 && mosconfig && type != SPA_IMPORT_ASSEMBLE && 2212219089Spjd rvd->vdev_guid_sum != ub->ub_guid_sum) 2213219089Spjd return (spa_vdev_err(rvd, VDEV_AUX_BAD_GUID_SUM, ENXIO)); 2214219089Spjd 2215219089Spjd if (type != SPA_IMPORT_ASSEMBLE && spa->spa_config_splitting) { 2216219089Spjd spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 2217219089Spjd spa_try_repair(spa, config); 2218219089Spjd spa_config_exit(spa, SCL_ALL, FTAG); 2219219089Spjd nvlist_free(spa->spa_config_splitting); 2220219089Spjd spa->spa_config_splitting = NULL; 2221168404Spjd } 2222168404Spjd 2223168404Spjd /* 2224168404Spjd * Initialize internal SPA structures. 2225168404Spjd */ 2226168404Spjd spa->spa_state = POOL_STATE_ACTIVE; 2227168404Spjd spa->spa_ubsync = spa->spa_uberblock; 2228219089Spjd spa->spa_verify_min_txg = spa->spa_extreme_rewind ? 2229219089Spjd TXG_INITIAL - 1 : spa_last_synced_txg(spa) - TXG_DEFER_SIZE - 1; 2230219089Spjd spa->spa_first_txg = spa->spa_last_ubsync_txg ? 2231219089Spjd spa->spa_last_ubsync_txg : spa_last_synced_txg(spa) + 1; 2232219089Spjd spa->spa_claim_max_txg = spa->spa_first_txg; 2233219089Spjd spa->spa_prev_software_version = ub->ub_software_version; 2234219089Spjd 2235236884Smm error = dsl_pool_init(spa, spa->spa_first_txg, &spa->spa_dsl_pool); 2236219089Spjd if (error) 2237219089Spjd return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 2238168404Spjd spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset; 2239168404Spjd 2240219089Spjd if (spa_dir_prop(spa, DMU_POOL_CONFIG, &spa->spa_config_object) != 0) 2241219089Spjd return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 2242168404Spjd 2243236884Smm if (spa_version(spa) >= SPA_VERSION_FEATURES) { 2244236884Smm boolean_t missing_feat_read = B_FALSE; 2245238926Smm nvlist_t *unsup_feat, *enabled_feat; 2246236884Smm 2247236884Smm if (spa_dir_prop(spa, DMU_POOL_FEATURES_FOR_READ, 2248236884Smm &spa->spa_feat_for_read_obj) != 0) { 2249236884Smm return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 2250236884Smm } 2251236884Smm 2252236884Smm if (spa_dir_prop(spa, DMU_POOL_FEATURES_FOR_WRITE, 2253236884Smm &spa->spa_feat_for_write_obj) != 0) { 2254236884Smm return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 2255236884Smm } 2256236884Smm 2257236884Smm if (spa_dir_prop(spa, DMU_POOL_FEATURE_DESCRIPTIONS, 2258236884Smm &spa->spa_feat_desc_obj) != 0) { 2259236884Smm return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 2260236884Smm } 2261236884Smm 2262238926Smm enabled_feat = fnvlist_alloc(); 2263238926Smm unsup_feat = fnvlist_alloc(); 2264236884Smm 2265236884Smm if (!feature_is_supported(spa->spa_meta_objset, 2266236884Smm spa->spa_feat_for_read_obj, spa->spa_feat_desc_obj, 2267238926Smm unsup_feat, enabled_feat)) 2268236884Smm missing_feat_read = B_TRUE; 2269236884Smm 2270236884Smm if (spa_writeable(spa) || state == SPA_LOAD_TRYIMPORT) { 2271236884Smm if (!feature_is_supported(spa->spa_meta_objset, 2272236884Smm spa->spa_feat_for_write_obj, spa->spa_feat_desc_obj, 2273238926Smm unsup_feat, enabled_feat)) { 2274236884Smm missing_feat_write = B_TRUE; 2275238926Smm } 2276236884Smm } 2277236884Smm 2278238926Smm fnvlist_add_nvlist(spa->spa_load_info, 2279238926Smm ZPOOL_CONFIG_ENABLED_FEAT, enabled_feat); 2280238926Smm 2281236884Smm if (!nvlist_empty(unsup_feat)) { 2282238926Smm fnvlist_add_nvlist(spa->spa_load_info, 2283238926Smm ZPOOL_CONFIG_UNSUP_FEAT, unsup_feat); 2284236884Smm } 2285236884Smm 2286238926Smm fnvlist_free(enabled_feat); 2287238926Smm fnvlist_free(unsup_feat); 2288236884Smm 2289236884Smm if (!missing_feat_read) { 2290236884Smm fnvlist_add_boolean(spa->spa_load_info, 2291236884Smm ZPOOL_CONFIG_CAN_RDONLY); 2292236884Smm } 2293236884Smm 2294236884Smm /* 2295236884Smm * If the state is SPA_LOAD_TRYIMPORT, our objective is 2296236884Smm * twofold: to determine whether the pool is available for 2297236884Smm * import in read-write mode and (if it is not) whether the 2298236884Smm * pool is available for import in read-only mode. If the pool 2299236884Smm * is available for import in read-write mode, it is displayed 2300236884Smm * as available in userland; if it is not available for import 2301236884Smm * in read-only mode, it is displayed as unavailable in 2302236884Smm * userland. If the pool is available for import in read-only 2303236884Smm * mode but not read-write mode, it is displayed as unavailable 2304236884Smm * in userland with a special note that the pool is actually 2305236884Smm * available for open in read-only mode. 2306236884Smm * 2307236884Smm * As a result, if the state is SPA_LOAD_TRYIMPORT and we are 2308236884Smm * missing a feature for write, we must first determine whether 2309236884Smm * the pool can be opened read-only before returning to 2310236884Smm * userland in order to know whether to display the 2311236884Smm * abovementioned note. 2312236884Smm */ 2313236884Smm if (missing_feat_read || (missing_feat_write && 2314236884Smm spa_writeable(spa))) { 2315236884Smm return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT, 2316236884Smm ENOTSUP)); 2317236884Smm } 2318236884Smm } 2319236884Smm 2320236884Smm spa->spa_is_initializing = B_TRUE; 2321236884Smm error = dsl_pool_open(spa->spa_dsl_pool); 2322236884Smm spa->spa_is_initializing = B_FALSE; 2323236884Smm if (error != 0) 2324236884Smm return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 2325236884Smm 2326168404Spjd if (!mosconfig) { 2327168498Spjd uint64_t hostid; 2328219089Spjd nvlist_t *policy = NULL, *nvconfig; 2329168404Spjd 2330219089Spjd if (load_nvlist(spa, spa->spa_config_object, &nvconfig) != 0) 2331219089Spjd return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 2332168404Spjd 2333219089Spjd if (!spa_is_root(spa) && nvlist_lookup_uint64(nvconfig, 2334185029Spjd ZPOOL_CONFIG_HOSTID, &hostid) == 0) { 2335168498Spjd char *hostname; 2336168498Spjd unsigned long myhostid = 0; 2337168498Spjd 2338219089Spjd VERIFY(nvlist_lookup_string(nvconfig, 2339168498Spjd ZPOOL_CONFIG_HOSTNAME, &hostname) == 0); 2340168498Spjd 2341219089Spjd#ifdef _KERNEL 2342219089Spjd myhostid = zone_get_hostid(NULL); 2343219089Spjd#else /* _KERNEL */ 2344219089Spjd /* 2345219089Spjd * We're emulating the system's hostid in userland, so 2346219089Spjd * we can't use zone_get_hostid(). 2347219089Spjd */ 2348168498Spjd (void) ddi_strtoul(hw_serial, NULL, 10, &myhostid); 2349219089Spjd#endif /* _KERNEL */ 2350204073Spjd if (check_hostid && hostid != 0 && myhostid != 0 && 2351219089Spjd hostid != myhostid) { 2352219089Spjd nvlist_free(nvconfig); 2353168498Spjd cmn_err(CE_WARN, "pool '%s' could not be " 2354168498Spjd "loaded as it was last accessed by " 2355185029Spjd "another system (host: %s hostid: 0x%lx). " 2356236146Smm "See: http://illumos.org/msg/ZFS-8000-EY", 2357185029Spjd spa_name(spa), hostname, 2358168498Spjd (unsigned long)hostid); 2359219089Spjd return (EBADF); 2360168498Spjd } 2361168498Spjd } 2362219089Spjd if (nvlist_lookup_nvlist(spa->spa_config, 2363219089Spjd ZPOOL_REWIND_POLICY, &policy) == 0) 2364219089Spjd VERIFY(nvlist_add_nvlist(nvconfig, 2365219089Spjd ZPOOL_REWIND_POLICY, policy) == 0); 2366168498Spjd 2367219089Spjd spa_config_set(spa, nvconfig); 2368168404Spjd spa_unload(spa); 2369168404Spjd spa_deactivate(spa); 2370209962Smm spa_activate(spa, orig_mode); 2371168404Spjd 2372219089Spjd return (spa_load(spa, state, SPA_IMPORT_EXISTING, B_TRUE)); 2373168404Spjd } 2374168404Spjd 2375219089Spjd if (spa_dir_prop(spa, DMU_POOL_SYNC_BPOBJ, &obj) != 0) 2376219089Spjd return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 2377219089Spjd error = bpobj_open(&spa->spa_deferred_bpobj, spa->spa_meta_objset, obj); 2378219089Spjd if (error != 0) 2379219089Spjd return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 2380168404Spjd 2381168404Spjd /* 2382168404Spjd * Load the bit that tells us to use the new accounting function 2383168404Spjd * (raid-z deflation). If we have an older pool, this will not 2384168404Spjd * be present. 2385168404Spjd */ 2386219089Spjd error = spa_dir_prop(spa, DMU_POOL_DEFLATE, &spa->spa_deflate); 2387219089Spjd if (error != 0 && error != ENOENT) 2388219089Spjd return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 2389168404Spjd 2390219089Spjd error = spa_dir_prop(spa, DMU_POOL_CREATION_VERSION, 2391219089Spjd &spa->spa_creation_version); 2392219089Spjd if (error != 0 && error != ENOENT) 2393219089Spjd return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 2394219089Spjd 2395168404Spjd /* 2396168404Spjd * Load the persistent error log. If we have an older pool, this will 2397168404Spjd * not be present. 2398168404Spjd */ 2399219089Spjd error = spa_dir_prop(spa, DMU_POOL_ERRLOG_LAST, &spa->spa_errlog_last); 2400219089Spjd if (error != 0 && error != ENOENT) 2401219089Spjd return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 2402168404Spjd 2403219089Spjd error = spa_dir_prop(spa, DMU_POOL_ERRLOG_SCRUB, 2404219089Spjd &spa->spa_errlog_scrub); 2405219089Spjd if (error != 0 && error != ENOENT) 2406219089Spjd return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 2407168404Spjd 2408168404Spjd /* 2409168404Spjd * Load the history object. If we have an older pool, this 2410168404Spjd * will not be present. 2411168404Spjd */ 2412219089Spjd error = spa_dir_prop(spa, DMU_POOL_HISTORY, &spa->spa_history); 2413219089Spjd if (error != 0 && error != ENOENT) 2414219089Spjd return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 2415168404Spjd 2416168404Spjd /* 2417219089Spjd * If we're assembling the pool from the split-off vdevs of 2418219089Spjd * an existing pool, we don't want to attach the spares & cache 2419219089Spjd * devices. 2420219089Spjd */ 2421219089Spjd 2422219089Spjd /* 2423168404Spjd * Load any hot spares for this pool. 2424168404Spjd */ 2425219089Spjd error = spa_dir_prop(spa, DMU_POOL_SPARES, &spa->spa_spares.sav_object); 2426219089Spjd if (error != 0 && error != ENOENT) 2427219089Spjd return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 2428219089Spjd if (error == 0 && type != SPA_IMPORT_ASSEMBLE) { 2429185029Spjd ASSERT(spa_version(spa) >= SPA_VERSION_SPARES); 2430185029Spjd if (load_nvlist(spa, spa->spa_spares.sav_object, 2431219089Spjd &spa->spa_spares.sav_config) != 0) 2432219089Spjd return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 2433168404Spjd 2434185029Spjd spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 2435168404Spjd spa_load_spares(spa); 2436185029Spjd spa_config_exit(spa, SCL_ALL, FTAG); 2437219089Spjd } else if (error == 0) { 2438219089Spjd spa->spa_spares.sav_sync = B_TRUE; 2439168404Spjd } 2440168404Spjd 2441185029Spjd /* 2442185029Spjd * Load any level 2 ARC devices for this pool. 2443185029Spjd */ 2444219089Spjd error = spa_dir_prop(spa, DMU_POOL_L2CACHE, 2445185029Spjd &spa->spa_l2cache.sav_object); 2446219089Spjd if (error != 0 && error != ENOENT) 2447219089Spjd return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 2448219089Spjd if (error == 0 && type != SPA_IMPORT_ASSEMBLE) { 2449185029Spjd ASSERT(spa_version(spa) >= SPA_VERSION_L2CACHE); 2450185029Spjd if (load_nvlist(spa, spa->spa_l2cache.sav_object, 2451219089Spjd &spa->spa_l2cache.sav_config) != 0) 2452219089Spjd return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 2453185029Spjd 2454185029Spjd spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 2455185029Spjd spa_load_l2cache(spa); 2456185029Spjd spa_config_exit(spa, SCL_ALL, FTAG); 2457219089Spjd } else if (error == 0) { 2458219089Spjd spa->spa_l2cache.sav_sync = B_TRUE; 2459185029Spjd } 2460185029Spjd 2461219089Spjd spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); 2462213197Smm 2463219089Spjd error = spa_dir_prop(spa, DMU_POOL_PROPS, &spa->spa_pool_props_object); 2464219089Spjd if (error && error != ENOENT) 2465219089Spjd return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 2466185029Spjd 2467219089Spjd if (error == 0) { 2468219089Spjd uint64_t autoreplace; 2469185029Spjd 2470219089Spjd spa_prop_find(spa, ZPOOL_PROP_BOOTFS, &spa->spa_bootfs); 2471219089Spjd spa_prop_find(spa, ZPOOL_PROP_AUTOREPLACE, &autoreplace); 2472219089Spjd spa_prop_find(spa, ZPOOL_PROP_DELEGATION, &spa->spa_delegation); 2473219089Spjd spa_prop_find(spa, ZPOOL_PROP_FAILUREMODE, &spa->spa_failmode); 2474219089Spjd spa_prop_find(spa, ZPOOL_PROP_AUTOEXPAND, &spa->spa_autoexpand); 2475219089Spjd spa_prop_find(spa, ZPOOL_PROP_DEDUPDITTO, 2476219089Spjd &spa->spa_dedup_ditto); 2477185029Spjd 2478219089Spjd spa->spa_autoreplace = (autoreplace != 0); 2479168404Spjd } 2480168404Spjd 2481168404Spjd /* 2482185029Spjd * If the 'autoreplace' property is set, then post a resource notifying 2483185029Spjd * the ZFS DE that it should not issue any faults for unopenable 2484185029Spjd * devices. We also iterate over the vdevs, and post a sysevent for any 2485185029Spjd * unopenable vdevs so that the normal autoreplace handler can take 2486185029Spjd * over. 2487185029Spjd */ 2488219089Spjd if (spa->spa_autoreplace && state != SPA_LOAD_TRYIMPORT) { 2489185029Spjd spa_check_removed(spa->spa_root_vdev); 2490219089Spjd /* 2491219089Spjd * For the import case, this is done in spa_import(), because 2492219089Spjd * at this point we're using the spare definitions from 2493219089Spjd * the MOS config, not necessarily from the userland config. 2494219089Spjd */ 2495219089Spjd if (state != SPA_LOAD_IMPORT) { 2496219089Spjd spa_aux_check_removed(&spa->spa_spares); 2497219089Spjd spa_aux_check_removed(&spa->spa_l2cache); 2498219089Spjd } 2499219089Spjd } 2500185029Spjd 2501185029Spjd /* 2502168404Spjd * Load the vdev state for all toplevel vdevs. 2503168404Spjd */ 2504168404Spjd vdev_load(rvd); 2505168404Spjd 2506168404Spjd /* 2507168404Spjd * Propagate the leaf DTLs we just loaded all the way up the tree. 2508168404Spjd */ 2509185029Spjd spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 2510168404Spjd vdev_dtl_reassess(rvd, 0, 0, B_FALSE); 2511185029Spjd spa_config_exit(spa, SCL_ALL, FTAG); 2512168404Spjd 2513168404Spjd /* 2514219089Spjd * Load the DDTs (dedup tables). 2515168404Spjd */ 2516219089Spjd error = ddt_load(spa); 2517219089Spjd if (error != 0) 2518219089Spjd return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 2519219089Spjd 2520219089Spjd spa_update_dspace(spa); 2521219089Spjd 2522219089Spjd /* 2523219089Spjd * Validate the config, using the MOS config to fill in any 2524219089Spjd * information which might be missing. If we fail to validate 2525219089Spjd * the config then declare the pool unfit for use. If we're 2526219089Spjd * assembling a pool from a split, the log is not transferred 2527219089Spjd * over. 2528219089Spjd */ 2529219089Spjd if (type != SPA_IMPORT_ASSEMBLE) { 2530219089Spjd nvlist_t *nvconfig; 2531219089Spjd 2532219089Spjd if (load_nvlist(spa, spa->spa_config_object, &nvconfig) != 0) 2533219089Spjd return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); 2534219089Spjd 2535219089Spjd if (!spa_config_valid(spa, nvconfig)) { 2536219089Spjd nvlist_free(nvconfig); 2537219089Spjd return (spa_vdev_err(rvd, VDEV_AUX_BAD_GUID_SUM, 2538219089Spjd ENXIO)); 2539219089Spjd } 2540219089Spjd nvlist_free(nvconfig); 2541219089Spjd 2542219089Spjd /* 2543236884Smm * Now that we've validated the config, check the state of the 2544219089Spjd * root vdev. If it can't be opened, it indicates one or 2545219089Spjd * more toplevel vdevs are faulted. 2546219089Spjd */ 2547219089Spjd if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) 2548219089Spjd return (ENXIO); 2549219089Spjd 2550219089Spjd if (spa_check_logs(spa)) { 2551219089Spjd *ereport = FM_EREPORT_ZFS_LOG_REPLAY; 2552219089Spjd return (spa_vdev_err(rvd, VDEV_AUX_BAD_LOG, ENXIO)); 2553219089Spjd } 2554168404Spjd } 2555168404Spjd 2556236884Smm if (missing_feat_write) { 2557236884Smm ASSERT(state == SPA_LOAD_TRYIMPORT); 2558236884Smm 2559236884Smm /* 2560236884Smm * At this point, we know that we can open the pool in 2561236884Smm * read-only mode but not read-write mode. We now have enough 2562236884Smm * information and can return to userland. 2563236884Smm */ 2564236884Smm return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT, ENOTSUP)); 2565236884Smm } 2566236884Smm 2567219089Spjd /* 2568219089Spjd * We've successfully opened the pool, verify that we're ready 2569219089Spjd * to start pushing transactions. 2570219089Spjd */ 2571219089Spjd if (state != SPA_LOAD_TRYIMPORT) { 2572219089Spjd if (error = spa_load_verify(spa)) 2573219089Spjd return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, 2574219089Spjd error)); 2575219089Spjd } 2576219089Spjd 2577219089Spjd if (spa_writeable(spa) && (state == SPA_LOAD_RECOVER || 2578219089Spjd spa->spa_load_max_txg == UINT64_MAX)) { 2579168404Spjd dmu_tx_t *tx; 2580168404Spjd int need_update = B_FALSE; 2581168404Spjd 2582209962Smm ASSERT(state != SPA_LOAD_TRYIMPORT); 2583209962Smm 2584168404Spjd /* 2585168404Spjd * Claim log blocks that haven't been committed yet. 2586168404Spjd * This must all happen in a single txg. 2587219089Spjd * Note: spa_claim_max_txg is updated by spa_claim_notify(), 2588219089Spjd * invoked from zil_claim_log_block()'s i/o done callback. 2589219089Spjd * Price of rollback is that we abandon the log. 2590168404Spjd */ 2591219089Spjd spa->spa_claiming = B_TRUE; 2592219089Spjd 2593168404Spjd tx = dmu_tx_create_assigned(spa_get_dsl(spa), 2594168404Spjd spa_first_txg(spa)); 2595185029Spjd (void) dmu_objset_find(spa_name(spa), 2596168404Spjd zil_claim, tx, DS_FIND_CHILDREN); 2597168404Spjd dmu_tx_commit(tx); 2598168404Spjd 2599219089Spjd spa->spa_claiming = B_FALSE; 2600219089Spjd 2601219089Spjd spa_set_log_state(spa, SPA_LOG_GOOD); 2602168404Spjd spa->spa_sync_on = B_TRUE; 2603168404Spjd txg_sync_start(spa->spa_dsl_pool); 2604168404Spjd 2605168404Spjd /* 2606219089Spjd * Wait for all claims to sync. We sync up to the highest 2607219089Spjd * claimed log block birth time so that claimed log blocks 2608219089Spjd * don't appear to be from the future. spa_claim_max_txg 2609219089Spjd * will have been set for us by either zil_check_log_chain() 2610219089Spjd * (invoked from spa_check_logs()) or zil_claim() above. 2611168404Spjd */ 2612219089Spjd txg_wait_synced(spa->spa_dsl_pool, spa->spa_claim_max_txg); 2613168404Spjd 2614168404Spjd /* 2615168404Spjd * If the config cache is stale, or we have uninitialized 2616168404Spjd * metaslabs (see spa_vdev_add()), then update the config. 2617209962Smm * 2618219089Spjd * If this is a verbatim import, trust the current 2619209962Smm * in-core spa_config and update the disk labels. 2620168404Spjd */ 2621168404Spjd if (config_cache_txg != spa->spa_config_txg || 2622219089Spjd state == SPA_LOAD_IMPORT || 2623219089Spjd state == SPA_LOAD_RECOVER || 2624219089Spjd (spa->spa_import_flags & ZFS_IMPORT_VERBATIM)) 2625168404Spjd need_update = B_TRUE; 2626168404Spjd 2627209962Smm for (int c = 0; c < rvd->vdev_children; c++) 2628168404Spjd if (rvd->vdev_child[c]->vdev_ms_array == 0) 2629168404Spjd need_update = B_TRUE; 2630168404Spjd 2631168404Spjd /* 2632168404Spjd * Update the config cache asychronously in case we're the 2633168404Spjd * root pool, in which case the config cache isn't writable yet. 2634168404Spjd */ 2635168404Spjd if (need_update) 2636168404Spjd spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); 2637208683Spjd 2638208683Spjd /* 2639208683Spjd * Check all DTLs to see if anything needs resilvering. 2640208683Spjd */ 2641219089Spjd if (!dsl_scan_resilvering(spa->spa_dsl_pool) && 2642219089Spjd vdev_resilver_needed(rvd, NULL, NULL)) 2643208683Spjd spa_async_request(spa, SPA_ASYNC_RESILVER); 2644219089Spjd 2645219089Spjd /* 2646219089Spjd * Delete any inconsistent datasets. 2647219089Spjd */ 2648219089Spjd (void) dmu_objset_find(spa_name(spa), 2649219089Spjd dsl_destroy_inconsistent, NULL, DS_FIND_CHILDREN); 2650219089Spjd 2651219089Spjd /* 2652219089Spjd * Clean up any stale temporary dataset userrefs. 2653219089Spjd */ 2654219089Spjd dsl_pool_clean_tmp_userrefs(spa->spa_dsl_pool); 2655168404Spjd } 2656168404Spjd 2657219089Spjd return (0); 2658219089Spjd} 2659168404Spjd 2660219089Spjdstatic int 2661219089Spjdspa_load_retry(spa_t *spa, spa_load_state_t state, int mosconfig) 2662219089Spjd{ 2663219089Spjd int mode = spa->spa_mode; 2664219089Spjd 2665219089Spjd spa_unload(spa); 2666219089Spjd spa_deactivate(spa); 2667219089Spjd 2668219089Spjd spa->spa_load_max_txg--; 2669219089Spjd 2670219089Spjd spa_activate(spa, mode); 2671219089Spjd spa_async_suspend(spa); 2672219089Spjd 2673219089Spjd return (spa_load(spa, state, SPA_IMPORT_EXISTING, mosconfig)); 2674168404Spjd} 2675168404Spjd 2676236884Smm/* 2677236884Smm * If spa_load() fails this function will try loading prior txg's. If 2678236884Smm * 'state' is SPA_LOAD_RECOVER and one of these loads succeeds the pool 2679236884Smm * will be rewound to that txg. If 'state' is not SPA_LOAD_RECOVER this 2680236884Smm * function will not rewind the pool and will return the same error as 2681236884Smm * spa_load(). 2682236884Smm */ 2683219089Spjdstatic int 2684219089Spjdspa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig, 2685219089Spjd uint64_t max_request, int rewind_flags) 2686219089Spjd{ 2687236884Smm nvlist_t *loadinfo = NULL; 2688219089Spjd nvlist_t *config = NULL; 2689219089Spjd int load_error, rewind_error; 2690219089Spjd uint64_t safe_rewind_txg; 2691219089Spjd uint64_t min_txg; 2692219089Spjd 2693219089Spjd if (spa->spa_load_txg && state == SPA_LOAD_RECOVER) { 2694219089Spjd spa->spa_load_max_txg = spa->spa_load_txg; 2695219089Spjd spa_set_log_state(spa, SPA_LOG_CLEAR); 2696219089Spjd } else { 2697219089Spjd spa->spa_load_max_txg = max_request; 2698219089Spjd } 2699219089Spjd 2700219089Spjd load_error = rewind_error = spa_load(spa, state, SPA_IMPORT_EXISTING, 2701219089Spjd mosconfig); 2702219089Spjd if (load_error == 0) 2703219089Spjd return (0); 2704219089Spjd 2705219089Spjd if (spa->spa_root_vdev != NULL) 2706219089Spjd config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 2707219089Spjd 2708219089Spjd spa->spa_last_ubsync_txg = spa->spa_uberblock.ub_txg; 2709219089Spjd spa->spa_last_ubsync_txg_ts = spa->spa_uberblock.ub_timestamp; 2710219089Spjd 2711219089Spjd if (rewind_flags & ZPOOL_NEVER_REWIND) { 2712219089Spjd nvlist_free(config); 2713219089Spjd return (load_error); 2714219089Spjd } 2715219089Spjd 2716236884Smm if (state == SPA_LOAD_RECOVER) { 2717236884Smm /* Price of rolling back is discarding txgs, including log */ 2718219089Spjd spa_set_log_state(spa, SPA_LOG_CLEAR); 2719236884Smm } else { 2720236884Smm /* 2721236884Smm * If we aren't rolling back save the load info from our first 2722236884Smm * import attempt so that we can restore it after attempting 2723236884Smm * to rewind. 2724236884Smm */ 2725236884Smm loadinfo = spa->spa_load_info; 2726236884Smm spa->spa_load_info = fnvlist_alloc(); 2727236884Smm } 2728219089Spjd 2729219089Spjd spa->spa_load_max_txg = spa->spa_last_ubsync_txg; 2730219089Spjd safe_rewind_txg = spa->spa_last_ubsync_txg - TXG_DEFER_SIZE; 2731219089Spjd min_txg = (rewind_flags & ZPOOL_EXTREME_REWIND) ? 2732219089Spjd TXG_INITIAL : safe_rewind_txg; 2733219089Spjd 2734219089Spjd /* 2735219089Spjd * Continue as long as we're finding errors, we're still within 2736219089Spjd * the acceptable rewind range, and we're still finding uberblocks 2737219089Spjd */ 2738219089Spjd while (rewind_error && spa->spa_uberblock.ub_txg >= min_txg && 2739219089Spjd spa->spa_uberblock.ub_txg <= spa->spa_load_max_txg) { 2740219089Spjd if (spa->spa_load_max_txg < safe_rewind_txg) 2741219089Spjd spa->spa_extreme_rewind = B_TRUE; 2742219089Spjd rewind_error = spa_load_retry(spa, state, mosconfig); 2743219089Spjd } 2744219089Spjd 2745219089Spjd spa->spa_extreme_rewind = B_FALSE; 2746219089Spjd spa->spa_load_max_txg = UINT64_MAX; 2747219089Spjd 2748219089Spjd if (config && (rewind_error || state != SPA_LOAD_RECOVER)) 2749219089Spjd spa_config_set(spa, config); 2750219089Spjd 2751236884Smm if (state == SPA_LOAD_RECOVER) { 2752236884Smm ASSERT3P(loadinfo, ==, NULL); 2753236884Smm return (rewind_error); 2754236884Smm } else { 2755236884Smm /* Store the rewind info as part of the initial load info */ 2756236884Smm fnvlist_add_nvlist(loadinfo, ZPOOL_CONFIG_REWIND_INFO, 2757236884Smm spa->spa_load_info); 2758236884Smm 2759236884Smm /* Restore the initial load info */ 2760236884Smm fnvlist_free(spa->spa_load_info); 2761236884Smm spa->spa_load_info = loadinfo; 2762236884Smm 2763236884Smm return (load_error); 2764236884Smm } 2765219089Spjd} 2766219089Spjd 2767168404Spjd/* 2768168404Spjd * Pool Open/Import 2769168404Spjd * 2770168404Spjd * The import case is identical to an open except that the configuration is sent 2771168404Spjd * down from userland, instead of grabbed from the configuration cache. For the 2772168404Spjd * case of an open, the pool configuration will exist in the 2773185029Spjd * POOL_STATE_UNINITIALIZED state. 2774168404Spjd * 2775168404Spjd * The stats information (gen/count/ustats) is used to gather vdev statistics at 2776168404Spjd * the same time open the pool, without having to keep around the spa_t in some 2777168404Spjd * ambiguous state. 2778168404Spjd */ 2779168404Spjdstatic int 2780219089Spjdspa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy, 2781219089Spjd nvlist_t **config) 2782168404Spjd{ 2783168404Spjd spa_t *spa; 2784219089Spjd spa_load_state_t state = SPA_LOAD_OPEN; 2785168404Spjd int error; 2786168404Spjd int locked = B_FALSE; 2787219089Spjd int firstopen = B_FALSE; 2788168404Spjd 2789168404Spjd *spapp = NULL; 2790168404Spjd 2791168404Spjd /* 2792168404Spjd * As disgusting as this is, we need to support recursive calls to this 2793168404Spjd * function because dsl_dir_open() is called during spa_load(), and ends 2794168404Spjd * up calling spa_open() again. The real fix is to figure out how to 2795168404Spjd * avoid dsl_dir_open() calling this in the first place. 2796168404Spjd */ 2797168404Spjd if (mutex_owner(&spa_namespace_lock) != curthread) { 2798168404Spjd mutex_enter(&spa_namespace_lock); 2799168404Spjd locked = B_TRUE; 2800168404Spjd } 2801168404Spjd 2802168404Spjd if ((spa = spa_lookup(pool)) == NULL) { 2803168404Spjd if (locked) 2804168404Spjd mutex_exit(&spa_namespace_lock); 2805168404Spjd return (ENOENT); 2806168404Spjd } 2807219089Spjd 2808168404Spjd if (spa->spa_state == POOL_STATE_UNINITIALIZED) { 2809219089Spjd zpool_rewind_policy_t policy; 2810168404Spjd 2811219089Spjd firstopen = B_TRUE; 2812219089Spjd 2813219089Spjd zpool_get_rewind_policy(nvpolicy ? nvpolicy : spa->spa_config, 2814219089Spjd &policy); 2815219089Spjd if (policy.zrp_request & ZPOOL_DO_REWIND) 2816219089Spjd state = SPA_LOAD_RECOVER; 2817219089Spjd 2818209962Smm spa_activate(spa, spa_mode_global); 2819168404Spjd 2820219089Spjd if (state != SPA_LOAD_RECOVER) 2821219089Spjd spa->spa_last_ubsync_txg = spa->spa_load_txg = 0; 2822168404Spjd 2823219089Spjd error = spa_load_best(spa, state, B_FALSE, policy.zrp_txg, 2824219089Spjd policy.zrp_request); 2825219089Spjd 2826168404Spjd if (error == EBADF) { 2827168404Spjd /* 2828168404Spjd * If vdev_validate() returns failure (indicated by 2829168404Spjd * EBADF), it indicates that one of the vdevs indicates 2830168404Spjd * that the pool has been exported or destroyed. If 2831168404Spjd * this is the case, the config cache is out of sync and 2832168404Spjd * we should remove the pool from the namespace. 2833168404Spjd */ 2834168404Spjd spa_unload(spa); 2835168404Spjd spa_deactivate(spa); 2836185029Spjd spa_config_sync(spa, B_TRUE, B_TRUE); 2837168404Spjd spa_remove(spa); 2838168404Spjd if (locked) 2839168404Spjd mutex_exit(&spa_namespace_lock); 2840168404Spjd return (ENOENT); 2841168404Spjd } 2842168404Spjd 2843168404Spjd if (error) { 2844168404Spjd /* 2845168404Spjd * We can't open the pool, but we still have useful 2846168404Spjd * information: the state of each vdev after the 2847168404Spjd * attempted vdev_open(). Return this to the user. 2848168404Spjd */ 2849219089Spjd if (config != NULL && spa->spa_config) { 2850219089Spjd VERIFY(nvlist_dup(spa->spa_config, config, 2851219089Spjd KM_SLEEP) == 0); 2852219089Spjd VERIFY(nvlist_add_nvlist(*config, 2853219089Spjd ZPOOL_CONFIG_LOAD_INFO, 2854219089Spjd spa->spa_load_info) == 0); 2855219089Spjd } 2856168404Spjd spa_unload(spa); 2857168404Spjd spa_deactivate(spa); 2858219089Spjd spa->spa_last_open_failed = error; 2859168404Spjd if (locked) 2860168404Spjd mutex_exit(&spa_namespace_lock); 2861168404Spjd *spapp = NULL; 2862168404Spjd return (error); 2863168404Spjd } 2864168404Spjd } 2865168404Spjd 2866168404Spjd spa_open_ref(spa, tag); 2867185029Spjd 2868219089Spjd if (config != NULL) 2869219089Spjd *config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 2870219089Spjd 2871219089Spjd /* 2872219089Spjd * If we've recovered the pool, pass back any information we 2873219089Spjd * gathered while doing the load. 2874219089Spjd */ 2875219089Spjd if (state == SPA_LOAD_RECOVER) { 2876219089Spjd VERIFY(nvlist_add_nvlist(*config, ZPOOL_CONFIG_LOAD_INFO, 2877219089Spjd spa->spa_load_info) == 0); 2878219089Spjd } 2879219089Spjd 2880219089Spjd if (locked) { 2881219089Spjd spa->spa_last_open_failed = 0; 2882219089Spjd spa->spa_last_ubsync_txg = 0; 2883219089Spjd spa->spa_load_txg = 0; 2884168404Spjd mutex_exit(&spa_namespace_lock); 2885219089Spjd#ifdef __FreeBSD__ 2886219089Spjd#ifdef _KERNEL 2887219089Spjd if (firstopen) 2888219089Spjd zvol_create_minors(pool); 2889219089Spjd#endif 2890219089Spjd#endif 2891219089Spjd } 2892168404Spjd 2893168404Spjd *spapp = spa; 2894168404Spjd 2895168404Spjd return (0); 2896168404Spjd} 2897168404Spjd 2898168404Spjdint 2899219089Spjdspa_open_rewind(const char *name, spa_t **spapp, void *tag, nvlist_t *policy, 2900219089Spjd nvlist_t **config) 2901219089Spjd{ 2902219089Spjd return (spa_open_common(name, spapp, tag, policy, config)); 2903219089Spjd} 2904219089Spjd 2905219089Spjdint 2906168404Spjdspa_open(const char *name, spa_t **spapp, void *tag) 2907168404Spjd{ 2908219089Spjd return (spa_open_common(name, spapp, tag, NULL, NULL)); 2909168404Spjd} 2910168404Spjd 2911168404Spjd/* 2912168404Spjd * Lookup the given spa_t, incrementing the inject count in the process, 2913168404Spjd * preventing it from being exported or destroyed. 2914168404Spjd */ 2915168404Spjdspa_t * 2916168404Spjdspa_inject_addref(char *name) 2917168404Spjd{ 2918168404Spjd spa_t *spa; 2919168404Spjd 2920168404Spjd mutex_enter(&spa_namespace_lock); 2921168404Spjd if ((spa = spa_lookup(name)) == NULL) { 2922168404Spjd mutex_exit(&spa_namespace_lock); 2923168404Spjd return (NULL); 2924168404Spjd } 2925168404Spjd spa->spa_inject_ref++; 2926168404Spjd mutex_exit(&spa_namespace_lock); 2927168404Spjd 2928168404Spjd return (spa); 2929168404Spjd} 2930168404Spjd 2931168404Spjdvoid 2932168404Spjdspa_inject_delref(spa_t *spa) 2933168404Spjd{ 2934168404Spjd mutex_enter(&spa_namespace_lock); 2935168404Spjd spa->spa_inject_ref--; 2936168404Spjd mutex_exit(&spa_namespace_lock); 2937168404Spjd} 2938168404Spjd 2939185029Spjd/* 2940185029Spjd * Add spares device information to the nvlist. 2941185029Spjd */ 2942168404Spjdstatic void 2943168404Spjdspa_add_spares(spa_t *spa, nvlist_t *config) 2944168404Spjd{ 2945168404Spjd nvlist_t **spares; 2946168404Spjd uint_t i, nspares; 2947168404Spjd nvlist_t *nvroot; 2948168404Spjd uint64_t guid; 2949168404Spjd vdev_stat_t *vs; 2950168404Spjd uint_t vsc; 2951168404Spjd uint64_t pool; 2952168404Spjd 2953209962Smm ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); 2954209962Smm 2955185029Spjd if (spa->spa_spares.sav_count == 0) 2956168404Spjd return; 2957168404Spjd 2958168404Spjd VERIFY(nvlist_lookup_nvlist(config, 2959168404Spjd ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 2960185029Spjd VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 2961168404Spjd ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 2962168404Spjd if (nspares != 0) { 2963168404Spjd VERIFY(nvlist_add_nvlist_array(nvroot, 2964168404Spjd ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 2965168404Spjd VERIFY(nvlist_lookup_nvlist_array(nvroot, 2966168404Spjd ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); 2967168404Spjd 2968168404Spjd /* 2969168404Spjd * Go through and find any spares which have since been 2970168404Spjd * repurposed as an active spare. If this is the case, update 2971168404Spjd * their status appropriately. 2972168404Spjd */ 2973168404Spjd for (i = 0; i < nspares; i++) { 2974168404Spjd VERIFY(nvlist_lookup_uint64(spares[i], 2975168404Spjd ZPOOL_CONFIG_GUID, &guid) == 0); 2976185029Spjd if (spa_spare_exists(guid, &pool, NULL) && 2977185029Spjd pool != 0ULL) { 2978168404Spjd VERIFY(nvlist_lookup_uint64_array( 2979219089Spjd spares[i], ZPOOL_CONFIG_VDEV_STATS, 2980168404Spjd (uint64_t **)&vs, &vsc) == 0); 2981168404Spjd vs->vs_state = VDEV_STATE_CANT_OPEN; 2982168404Spjd vs->vs_aux = VDEV_AUX_SPARED; 2983168404Spjd } 2984168404Spjd } 2985168404Spjd } 2986168404Spjd} 2987168404Spjd 2988185029Spjd/* 2989185029Spjd * Add l2cache device information to the nvlist, including vdev stats. 2990185029Spjd */ 2991185029Spjdstatic void 2992185029Spjdspa_add_l2cache(spa_t *spa, nvlist_t *config) 2993185029Spjd{ 2994185029Spjd nvlist_t **l2cache; 2995185029Spjd uint_t i, j, nl2cache; 2996185029Spjd nvlist_t *nvroot; 2997185029Spjd uint64_t guid; 2998185029Spjd vdev_t *vd; 2999185029Spjd vdev_stat_t *vs; 3000185029Spjd uint_t vsc; 3001185029Spjd 3002209962Smm ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); 3003209962Smm 3004185029Spjd if (spa->spa_l2cache.sav_count == 0) 3005185029Spjd return; 3006185029Spjd 3007185029Spjd VERIFY(nvlist_lookup_nvlist(config, 3008185029Spjd ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 3009185029Spjd VERIFY(nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, 3010185029Spjd ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 3011185029Spjd if (nl2cache != 0) { 3012185029Spjd VERIFY(nvlist_add_nvlist_array(nvroot, 3013185029Spjd ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 3014185029Spjd VERIFY(nvlist_lookup_nvlist_array(nvroot, 3015185029Spjd ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); 3016185029Spjd 3017185029Spjd /* 3018185029Spjd * Update level 2 cache device stats. 3019185029Spjd */ 3020185029Spjd 3021185029Spjd for (i = 0; i < nl2cache; i++) { 3022185029Spjd VERIFY(nvlist_lookup_uint64(l2cache[i], 3023185029Spjd ZPOOL_CONFIG_GUID, &guid) == 0); 3024185029Spjd 3025185029Spjd vd = NULL; 3026185029Spjd for (j = 0; j < spa->spa_l2cache.sav_count; j++) { 3027185029Spjd if (guid == 3028185029Spjd spa->spa_l2cache.sav_vdevs[j]->vdev_guid) { 3029185029Spjd vd = spa->spa_l2cache.sav_vdevs[j]; 3030185029Spjd break; 3031185029Spjd } 3032185029Spjd } 3033185029Spjd ASSERT(vd != NULL); 3034185029Spjd 3035185029Spjd VERIFY(nvlist_lookup_uint64_array(l2cache[i], 3036219089Spjd ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc) 3037219089Spjd == 0); 3038185029Spjd vdev_get_stats(vd, vs); 3039185029Spjd } 3040185029Spjd } 3041185029Spjd} 3042185029Spjd 3043236884Smmstatic void 3044236884Smmspa_add_feature_stats(spa_t *spa, nvlist_t *config) 3045236884Smm{ 3046236884Smm nvlist_t *features; 3047236884Smm zap_cursor_t zc; 3048236884Smm zap_attribute_t za; 3049236884Smm 3050236884Smm ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); 3051236884Smm VERIFY(nvlist_alloc(&features, NV_UNIQUE_NAME, KM_SLEEP) == 0); 3052236884Smm 3053236884Smm if (spa->spa_feat_for_read_obj != 0) { 3054236884Smm for (zap_cursor_init(&zc, spa->spa_meta_objset, 3055236884Smm spa->spa_feat_for_read_obj); 3056236884Smm zap_cursor_retrieve(&zc, &za) == 0; 3057236884Smm zap_cursor_advance(&zc)) { 3058236884Smm ASSERT(za.za_integer_length == sizeof (uint64_t) && 3059236884Smm za.za_num_integers == 1); 3060236884Smm VERIFY3U(0, ==, nvlist_add_uint64(features, za.za_name, 3061236884Smm za.za_first_integer)); 3062236884Smm } 3063236884Smm zap_cursor_fini(&zc); 3064236884Smm } 3065236884Smm 3066236884Smm if (spa->spa_feat_for_write_obj != 0) { 3067236884Smm for (zap_cursor_init(&zc, spa->spa_meta_objset, 3068236884Smm spa->spa_feat_for_write_obj); 3069236884Smm zap_cursor_retrieve(&zc, &za) == 0; 3070236884Smm zap_cursor_advance(&zc)) { 3071236884Smm ASSERT(za.za_integer_length == sizeof (uint64_t) && 3072236884Smm za.za_num_integers == 1); 3073236884Smm VERIFY3U(0, ==, nvlist_add_uint64(features, za.za_name, 3074236884Smm za.za_first_integer)); 3075236884Smm } 3076236884Smm zap_cursor_fini(&zc); 3077236884Smm } 3078236884Smm 3079236884Smm VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_FEATURE_STATS, 3080236884Smm features) == 0); 3081236884Smm nvlist_free(features); 3082236884Smm} 3083236884Smm 3084168404Spjdint 3085236884Smmspa_get_stats(const char *name, nvlist_t **config, 3086236884Smm char *altroot, size_t buflen) 3087168404Spjd{ 3088168404Spjd int error; 3089168404Spjd spa_t *spa; 3090168404Spjd 3091168404Spjd *config = NULL; 3092219089Spjd error = spa_open_common(name, &spa, FTAG, NULL, config); 3093168404Spjd 3094209962Smm if (spa != NULL) { 3095209962Smm /* 3096209962Smm * This still leaves a window of inconsistency where the spares 3097209962Smm * or l2cache devices could change and the config would be 3098209962Smm * self-inconsistent. 3099209962Smm */ 3100209962Smm spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 3101168404Spjd 3102209962Smm if (*config != NULL) { 3103219089Spjd uint64_t loadtimes[2]; 3104219089Spjd 3105219089Spjd loadtimes[0] = spa->spa_loaded_ts.tv_sec; 3106219089Spjd loadtimes[1] = spa->spa_loaded_ts.tv_nsec; 3107219089Spjd VERIFY(nvlist_add_uint64_array(*config, 3108219089Spjd ZPOOL_CONFIG_LOADED_TIME, loadtimes, 2) == 0); 3109219089Spjd 3110185029Spjd VERIFY(nvlist_add_uint64(*config, 3111209962Smm ZPOOL_CONFIG_ERRCOUNT, 3112209962Smm spa_get_errlog_size(spa)) == 0); 3113185029Spjd 3114209962Smm if (spa_suspended(spa)) 3115209962Smm VERIFY(nvlist_add_uint64(*config, 3116209962Smm ZPOOL_CONFIG_SUSPENDED, 3117209962Smm spa->spa_failmode) == 0); 3118209962Smm 3119209962Smm spa_add_spares(spa, *config); 3120209962Smm spa_add_l2cache(spa, *config); 3121236884Smm spa_add_feature_stats(spa, *config); 3122209962Smm } 3123168404Spjd } 3124168404Spjd 3125168404Spjd /* 3126168404Spjd * We want to get the alternate root even for faulted pools, so we cheat 3127168404Spjd * and call spa_lookup() directly. 3128168404Spjd */ 3129168404Spjd if (altroot) { 3130168404Spjd if (spa == NULL) { 3131168404Spjd mutex_enter(&spa_namespace_lock); 3132168404Spjd spa = spa_lookup(name); 3133168404Spjd if (spa) 3134168404Spjd spa_altroot(spa, altroot, buflen); 3135168404Spjd else 3136168404Spjd altroot[0] = '\0'; 3137168404Spjd spa = NULL; 3138168404Spjd mutex_exit(&spa_namespace_lock); 3139168404Spjd } else { 3140168404Spjd spa_altroot(spa, altroot, buflen); 3141168404Spjd } 3142168404Spjd } 3143168404Spjd 3144209962Smm if (spa != NULL) { 3145209962Smm spa_config_exit(spa, SCL_CONFIG, FTAG); 3146168404Spjd spa_close(spa, FTAG); 3147209962Smm } 3148168404Spjd 3149168404Spjd return (error); 3150168404Spjd} 3151168404Spjd 3152168404Spjd/* 3153185029Spjd * Validate that the auxiliary device array is well formed. We must have an 3154185029Spjd * array of nvlists, each which describes a valid leaf vdev. If this is an 3155185029Spjd * import (mode is VDEV_ALLOC_SPARE), then we allow corrupted spares to be 3156185029Spjd * specified, as long as they are well-formed. 3157168404Spjd */ 3158168404Spjdstatic int 3159185029Spjdspa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode, 3160185029Spjd spa_aux_vdev_t *sav, const char *config, uint64_t version, 3161185029Spjd vdev_labeltype_t label) 3162168404Spjd{ 3163185029Spjd nvlist_t **dev; 3164185029Spjd uint_t i, ndev; 3165168404Spjd vdev_t *vd; 3166168404Spjd int error; 3167168404Spjd 3168185029Spjd ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 3169185029Spjd 3170168404Spjd /* 3171185029Spjd * It's acceptable to have no devs specified. 3172168404Spjd */ 3173185029Spjd if (nvlist_lookup_nvlist_array(nvroot, config, &dev, &ndev) != 0) 3174168404Spjd return (0); 3175168404Spjd 3176185029Spjd if (ndev == 0) 3177168404Spjd return (EINVAL); 3178168404Spjd 3179168404Spjd /* 3180185029Spjd * Make sure the pool is formatted with a version that supports this 3181185029Spjd * device type. 3182168404Spjd */ 3183185029Spjd if (spa_version(spa) < version) 3184168404Spjd return (ENOTSUP); 3185168404Spjd 3186168404Spjd /* 3187185029Spjd * Set the pending device list so we correctly handle device in-use 3188168404Spjd * checking. 3189168404Spjd */ 3190185029Spjd sav->sav_pending = dev; 3191185029Spjd sav->sav_npending = ndev; 3192168404Spjd 3193185029Spjd for (i = 0; i < ndev; i++) { 3194185029Spjd if ((error = spa_config_parse(spa, &vd, dev[i], NULL, 0, 3195168404Spjd mode)) != 0) 3196168404Spjd goto out; 3197168404Spjd 3198168404Spjd if (!vd->vdev_ops->vdev_op_leaf) { 3199168404Spjd vdev_free(vd); 3200168404Spjd error = EINVAL; 3201168404Spjd goto out; 3202168404Spjd } 3203168404Spjd 3204185029Spjd /* 3205185029Spjd * The L2ARC currently only supports disk devices in 3206185029Spjd * kernel context. For user-level testing, we allow it. 3207185029Spjd */ 3208185029Spjd#ifdef _KERNEL 3209185029Spjd if ((strcmp(config, ZPOOL_CONFIG_L2CACHE) == 0) && 3210185029Spjd strcmp(vd->vdev_ops->vdev_op_type, VDEV_TYPE_DISK) != 0) { 3211185029Spjd error = ENOTBLK; 3212230514Smm vdev_free(vd); 3213185029Spjd goto out; 3214185029Spjd } 3215185029Spjd#endif 3216168404Spjd vd->vdev_top = vd; 3217168404Spjd 3218168404Spjd if ((error = vdev_open(vd)) == 0 && 3219185029Spjd (error = vdev_label_init(vd, crtxg, label)) == 0) { 3220185029Spjd VERIFY(nvlist_add_uint64(dev[i], ZPOOL_CONFIG_GUID, 3221168404Spjd vd->vdev_guid) == 0); 3222168404Spjd } 3223168404Spjd 3224168404Spjd vdev_free(vd); 3225168404Spjd 3226185029Spjd if (error && 3227185029Spjd (mode != VDEV_ALLOC_SPARE && mode != VDEV_ALLOC_L2CACHE)) 3228168404Spjd goto out; 3229168404Spjd else 3230168404Spjd error = 0; 3231168404Spjd } 3232168404Spjd 3233168404Spjdout: 3234185029Spjd sav->sav_pending = NULL; 3235185029Spjd sav->sav_npending = 0; 3236168404Spjd return (error); 3237168404Spjd} 3238168404Spjd 3239185029Spjdstatic int 3240185029Spjdspa_validate_aux(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode) 3241185029Spjd{ 3242185029Spjd int error; 3243185029Spjd 3244185029Spjd ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 3245185029Spjd 3246185029Spjd if ((error = spa_validate_aux_devs(spa, nvroot, crtxg, mode, 3247185029Spjd &spa->spa_spares, ZPOOL_CONFIG_SPARES, SPA_VERSION_SPARES, 3248185029Spjd VDEV_LABEL_SPARE)) != 0) { 3249185029Spjd return (error); 3250185029Spjd } 3251185029Spjd 3252185029Spjd return (spa_validate_aux_devs(spa, nvroot, crtxg, mode, 3253185029Spjd &spa->spa_l2cache, ZPOOL_CONFIG_L2CACHE, SPA_VERSION_L2CACHE, 3254185029Spjd VDEV_LABEL_L2CACHE)); 3255185029Spjd} 3256185029Spjd 3257185029Spjdstatic void 3258185029Spjdspa_set_aux_vdevs(spa_aux_vdev_t *sav, nvlist_t **devs, int ndevs, 3259185029Spjd const char *config) 3260185029Spjd{ 3261185029Spjd int i; 3262185029Spjd 3263185029Spjd if (sav->sav_config != NULL) { 3264185029Spjd nvlist_t **olddevs; 3265185029Spjd uint_t oldndevs; 3266185029Spjd nvlist_t **newdevs; 3267185029Spjd 3268185029Spjd /* 3269185029Spjd * Generate new dev list by concatentating with the 3270185029Spjd * current dev list. 3271185029Spjd */ 3272185029Spjd VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, config, 3273185029Spjd &olddevs, &oldndevs) == 0); 3274185029Spjd 3275185029Spjd newdevs = kmem_alloc(sizeof (void *) * 3276185029Spjd (ndevs + oldndevs), KM_SLEEP); 3277185029Spjd for (i = 0; i < oldndevs; i++) 3278185029Spjd VERIFY(nvlist_dup(olddevs[i], &newdevs[i], 3279185029Spjd KM_SLEEP) == 0); 3280185029Spjd for (i = 0; i < ndevs; i++) 3281185029Spjd VERIFY(nvlist_dup(devs[i], &newdevs[i + oldndevs], 3282185029Spjd KM_SLEEP) == 0); 3283185029Spjd 3284185029Spjd VERIFY(nvlist_remove(sav->sav_config, config, 3285185029Spjd DATA_TYPE_NVLIST_ARRAY) == 0); 3286185029Spjd 3287185029Spjd VERIFY(nvlist_add_nvlist_array(sav->sav_config, 3288185029Spjd config, newdevs, ndevs + oldndevs) == 0); 3289185029Spjd for (i = 0; i < oldndevs + ndevs; i++) 3290185029Spjd nvlist_free(newdevs[i]); 3291185029Spjd kmem_free(newdevs, (oldndevs + ndevs) * sizeof (void *)); 3292185029Spjd } else { 3293185029Spjd /* 3294185029Spjd * Generate a new dev list. 3295185029Spjd */ 3296185029Spjd VERIFY(nvlist_alloc(&sav->sav_config, NV_UNIQUE_NAME, 3297185029Spjd KM_SLEEP) == 0); 3298185029Spjd VERIFY(nvlist_add_nvlist_array(sav->sav_config, config, 3299185029Spjd devs, ndevs) == 0); 3300185029Spjd } 3301185029Spjd} 3302185029Spjd 3303168404Spjd/* 3304185029Spjd * Stop and drop level 2 ARC devices 3305185029Spjd */ 3306185029Spjdvoid 3307185029Spjdspa_l2cache_drop(spa_t *spa) 3308185029Spjd{ 3309185029Spjd vdev_t *vd; 3310185029Spjd int i; 3311185029Spjd spa_aux_vdev_t *sav = &spa->spa_l2cache; 3312185029Spjd 3313185029Spjd for (i = 0; i < sav->sav_count; i++) { 3314185029Spjd uint64_t pool; 3315185029Spjd 3316185029Spjd vd = sav->sav_vdevs[i]; 3317185029Spjd ASSERT(vd != NULL); 3318185029Spjd 3319209962Smm if (spa_l2cache_exists(vd->vdev_guid, &pool) && 3320209962Smm pool != 0ULL && l2arc_vdev_present(vd)) 3321185029Spjd l2arc_remove_vdev(vd); 3322185029Spjd } 3323185029Spjd} 3324185029Spjd 3325185029Spjd/* 3326168404Spjd * Pool Creation 3327168404Spjd */ 3328168404Spjdint 3329185029Spjdspa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, 3330185029Spjd const char *history_str, nvlist_t *zplprops) 3331168404Spjd{ 3332168404Spjd spa_t *spa; 3333185029Spjd char *altroot = NULL; 3334168404Spjd vdev_t *rvd; 3335168404Spjd dsl_pool_t *dp; 3336168404Spjd dmu_tx_t *tx; 3337219089Spjd int error = 0; 3338168404Spjd uint64_t txg = TXG_INITIAL; 3339185029Spjd nvlist_t **spares, **l2cache; 3340185029Spjd uint_t nspares, nl2cache; 3341219089Spjd uint64_t version, obj; 3342236884Smm boolean_t has_features; 3343168404Spjd 3344168404Spjd /* 3345168404Spjd * If this pool already exists, return failure. 3346168404Spjd */ 3347168404Spjd mutex_enter(&spa_namespace_lock); 3348168404Spjd if (spa_lookup(pool) != NULL) { 3349168404Spjd mutex_exit(&spa_namespace_lock); 3350168404Spjd return (EEXIST); 3351168404Spjd } 3352168404Spjd 3353168404Spjd /* 3354168404Spjd * Allocate a new spa_t structure. 3355168404Spjd */ 3356185029Spjd (void) nvlist_lookup_string(props, 3357185029Spjd zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 3358219089Spjd spa = spa_add(pool, NULL, altroot); 3359209962Smm spa_activate(spa, spa_mode_global); 3360168404Spjd 3361185029Spjd if (props && (error = spa_prop_validate(spa, props))) { 3362185029Spjd spa_deactivate(spa); 3363185029Spjd spa_remove(spa); 3364185029Spjd mutex_exit(&spa_namespace_lock); 3365185029Spjd return (error); 3366185029Spjd } 3367185029Spjd 3368236884Smm has_features = B_FALSE; 3369236884Smm for (nvpair_t *elem = nvlist_next_nvpair(props, NULL); 3370236884Smm elem != NULL; elem = nvlist_next_nvpair(props, elem)) { 3371236884Smm if (zpool_prop_feature(nvpair_name(elem))) 3372236884Smm has_features = B_TRUE; 3373236884Smm } 3374236884Smm 3375236884Smm if (has_features || nvlist_lookup_uint64(props, 3376236884Smm zpool_prop_to_name(ZPOOL_PROP_VERSION), &version) != 0) { 3377185029Spjd version = SPA_VERSION; 3378236884Smm } 3379236884Smm ASSERT(SPA_VERSION_IS_SUPPORTED(version)); 3380219089Spjd 3381219089Spjd spa->spa_first_txg = txg; 3382219089Spjd spa->spa_uberblock.ub_txg = txg - 1; 3383185029Spjd spa->spa_uberblock.ub_version = version; 3384168404Spjd spa->spa_ubsync = spa->spa_uberblock; 3385168404Spjd 3386168404Spjd /* 3387209962Smm * Create "The Godfather" zio to hold all async IOs 3388209962Smm */ 3389209962Smm spa->spa_async_zio_root = zio_root(spa, NULL, NULL, 3390209962Smm ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER); 3391209962Smm 3392209962Smm /* 3393168404Spjd * Create the root vdev. 3394168404Spjd */ 3395185029Spjd spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 3396168404Spjd 3397168404Spjd error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD); 3398168404Spjd 3399168404Spjd ASSERT(error != 0 || rvd != NULL); 3400168404Spjd ASSERT(error != 0 || spa->spa_root_vdev == rvd); 3401168404Spjd 3402185029Spjd if (error == 0 && !zfs_allocatable_devs(nvroot)) 3403168404Spjd error = EINVAL; 3404168404Spjd 3405168404Spjd if (error == 0 && 3406168404Spjd (error = vdev_create(rvd, txg, B_FALSE)) == 0 && 3407185029Spjd (error = spa_validate_aux(spa, nvroot, txg, 3408168404Spjd VDEV_ALLOC_ADD)) == 0) { 3409219089Spjd for (int c = 0; c < rvd->vdev_children; c++) { 3410219089Spjd vdev_metaslab_set_size(rvd->vdev_child[c]); 3411219089Spjd vdev_expand(rvd->vdev_child[c], txg); 3412219089Spjd } 3413168404Spjd } 3414168404Spjd 3415185029Spjd spa_config_exit(spa, SCL_ALL, FTAG); 3416168404Spjd 3417168404Spjd if (error != 0) { 3418168404Spjd spa_unload(spa); 3419168404Spjd spa_deactivate(spa); 3420168404Spjd spa_remove(spa); 3421168404Spjd mutex_exit(&spa_namespace_lock); 3422168404Spjd return (error); 3423168404Spjd } 3424168404Spjd 3425168404Spjd /* 3426168404Spjd * Get the list of spares, if specified. 3427168404Spjd */ 3428168404Spjd if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 3429168404Spjd &spares, &nspares) == 0) { 3430185029Spjd VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, NV_UNIQUE_NAME, 3431168404Spjd KM_SLEEP) == 0); 3432185029Spjd VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 3433168404Spjd ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 3434185029Spjd spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 3435168404Spjd spa_load_spares(spa); 3436185029Spjd spa_config_exit(spa, SCL_ALL, FTAG); 3437185029Spjd spa->spa_spares.sav_sync = B_TRUE; 3438168404Spjd } 3439168404Spjd 3440185029Spjd /* 3441185029Spjd * Get the list of level 2 cache devices, if specified. 3442185029Spjd */ 3443185029Spjd if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, 3444185029Spjd &l2cache, &nl2cache) == 0) { 3445185029Spjd VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, 3446185029Spjd NV_UNIQUE_NAME, KM_SLEEP) == 0); 3447185029Spjd VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, 3448185029Spjd ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 3449185029Spjd spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 3450185029Spjd spa_load_l2cache(spa); 3451185029Spjd spa_config_exit(spa, SCL_ALL, FTAG); 3452185029Spjd spa->spa_l2cache.sav_sync = B_TRUE; 3453185029Spjd } 3454185029Spjd 3455236884Smm spa->spa_is_initializing = B_TRUE; 3456185029Spjd spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, txg); 3457168404Spjd spa->spa_meta_objset = dp->dp_meta_objset; 3458236884Smm spa->spa_is_initializing = B_FALSE; 3459168404Spjd 3460219089Spjd /* 3461219089Spjd * Create DDTs (dedup tables). 3462219089Spjd */ 3463219089Spjd ddt_create(spa); 3464219089Spjd 3465219089Spjd spa_update_dspace(spa); 3466219089Spjd 3467168404Spjd tx = dmu_tx_create_assigned(dp, txg); 3468168404Spjd 3469168404Spjd /* 3470168404Spjd * Create the pool config object. 3471168404Spjd */ 3472168404Spjd spa->spa_config_object = dmu_object_alloc(spa->spa_meta_objset, 3473185029Spjd DMU_OT_PACKED_NVLIST, SPA_CONFIG_BLOCKSIZE, 3474168404Spjd DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx); 3475168404Spjd 3476168404Spjd if (zap_add(spa->spa_meta_objset, 3477168404Spjd DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, 3478168404Spjd sizeof (uint64_t), 1, &spa->spa_config_object, tx) != 0) { 3479168404Spjd cmn_err(CE_PANIC, "failed to add pool config"); 3480168404Spjd } 3481168404Spjd 3482236884Smm if (spa_version(spa) >= SPA_VERSION_FEATURES) 3483236884Smm spa_feature_create_zap_objects(spa, tx); 3484236884Smm 3485219089Spjd if (zap_add(spa->spa_meta_objset, 3486219089Spjd DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CREATION_VERSION, 3487219089Spjd sizeof (uint64_t), 1, &version, tx) != 0) { 3488219089Spjd cmn_err(CE_PANIC, "failed to add pool version"); 3489219089Spjd } 3490219089Spjd 3491185029Spjd /* Newly created pools with the right version are always deflated. */ 3492185029Spjd if (version >= SPA_VERSION_RAIDZ_DEFLATE) { 3493185029Spjd spa->spa_deflate = TRUE; 3494185029Spjd if (zap_add(spa->spa_meta_objset, 3495185029Spjd DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 3496185029Spjd sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) { 3497185029Spjd cmn_err(CE_PANIC, "failed to add deflate"); 3498185029Spjd } 3499168404Spjd } 3500168404Spjd 3501168404Spjd /* 3502219089Spjd * Create the deferred-free bpobj. Turn off compression 3503168404Spjd * because sync-to-convergence takes longer if the blocksize 3504168404Spjd * keeps changing. 3505168404Spjd */ 3506219089Spjd obj = bpobj_alloc(spa->spa_meta_objset, 1 << 14, tx); 3507219089Spjd dmu_object_set_compress(spa->spa_meta_objset, obj, 3508168404Spjd ZIO_COMPRESS_OFF, tx); 3509168404Spjd if (zap_add(spa->spa_meta_objset, 3510219089Spjd DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPOBJ, 3511219089Spjd sizeof (uint64_t), 1, &obj, tx) != 0) { 3512219089Spjd cmn_err(CE_PANIC, "failed to add bpobj"); 3513168404Spjd } 3514219089Spjd VERIFY3U(0, ==, bpobj_open(&spa->spa_deferred_bpobj, 3515219089Spjd spa->spa_meta_objset, obj)); 3516168404Spjd 3517168404Spjd /* 3518168404Spjd * Create the pool's history object. 3519168404Spjd */ 3520185029Spjd if (version >= SPA_VERSION_ZPOOL_HISTORY) 3521185029Spjd spa_history_create_obj(spa, tx); 3522168404Spjd 3523185029Spjd /* 3524185029Spjd * Set pool properties. 3525185029Spjd */ 3526185029Spjd spa->spa_bootfs = zpool_prop_default_numeric(ZPOOL_PROP_BOOTFS); 3527185029Spjd spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); 3528185029Spjd spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE); 3529219089Spjd spa->spa_autoexpand = zpool_prop_default_numeric(ZPOOL_PROP_AUTOEXPAND); 3530219089Spjd 3531209962Smm if (props != NULL) { 3532209962Smm spa_configfile_set(spa, props, B_FALSE); 3533219089Spjd spa_sync_props(spa, props, tx); 3534209962Smm } 3535185029Spjd 3536168404Spjd dmu_tx_commit(tx); 3537168404Spjd 3538168404Spjd spa->spa_sync_on = B_TRUE; 3539168404Spjd txg_sync_start(spa->spa_dsl_pool); 3540168404Spjd 3541168404Spjd /* 3542168404Spjd * We explicitly wait for the first transaction to complete so that our 3543168404Spjd * bean counters are appropriately updated. 3544168404Spjd */ 3545168404Spjd txg_wait_synced(spa->spa_dsl_pool, txg); 3546168404Spjd 3547185029Spjd spa_config_sync(spa, B_FALSE, B_TRUE); 3548168404Spjd 3549185029Spjd if (version >= SPA_VERSION_ZPOOL_HISTORY && history_str != NULL) 3550185029Spjd (void) spa_history_log(spa, history_str, LOG_CMD_POOL_CREATE); 3551219089Spjd spa_history_log_version(spa, LOG_POOL_CREATE); 3552185029Spjd 3553208442Smm spa->spa_minref = refcount_count(&spa->spa_refcount); 3554208442Smm 3555168404Spjd mutex_exit(&spa_namespace_lock); 3556168404Spjd 3557168404Spjd return (0); 3558168404Spjd} 3559168404Spjd 3560241286Savg#ifdef _KERNEL 3561219089Spjd#if defined(sun) 3562185029Spjd/* 3563219089Spjd * Get the root pool information from the root disk, then import the root pool 3564219089Spjd * during the system boot up time. 3565185029Spjd */ 3566219089Spjdextern int vdev_disk_read_rootlabel(char *, char *, nvlist_t **); 3567219089Spjd 3568219089Spjdstatic nvlist_t * 3569219089Spjdspa_generate_rootconf(char *devpath, char *devid, uint64_t *guid) 3570185029Spjd{ 3571219089Spjd nvlist_t *config; 3572185029Spjd nvlist_t *nvtop, *nvroot; 3573185029Spjd uint64_t pgid; 3574185029Spjd 3575219089Spjd if (vdev_disk_read_rootlabel(devpath, devid, &config) != 0) 3576219089Spjd return (NULL); 3577219089Spjd 3578168404Spjd /* 3579185029Spjd * Add this top-level vdev to the child array. 3580168404Spjd */ 3581219089Spjd VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 3582219089Spjd &nvtop) == 0); 3583219089Spjd VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, 3584219089Spjd &pgid) == 0); 3585219089Spjd VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, guid) == 0); 3586168404Spjd 3587185029Spjd /* 3588185029Spjd * Put this pool's top-level vdevs into a root vdev. 3589185029Spjd */ 3590185029Spjd VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); 3591219089Spjd VERIFY(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, 3592219089Spjd VDEV_TYPE_ROOT) == 0); 3593185029Spjd VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) == 0); 3594185029Spjd VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, pgid) == 0); 3595185029Spjd VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 3596185029Spjd &nvtop, 1) == 0); 3597168404Spjd 3598168404Spjd /* 3599185029Spjd * Replace the existing vdev_tree with the new root vdev in 3600185029Spjd * this pool's configuration (remove the old, add the new). 3601168404Spjd */ 3602185029Spjd VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0); 3603185029Spjd nvlist_free(nvroot); 3604219089Spjd return (config); 3605185029Spjd} 3606168404Spjd 3607185029Spjd/* 3608219089Spjd * Walk the vdev tree and see if we can find a device with "better" 3609219089Spjd * configuration. A configuration is "better" if the label on that 3610219089Spjd * device has a more recent txg. 3611185029Spjd */ 3612219089Spjdstatic void 3613219089Spjdspa_alt_rootvdev(vdev_t *vd, vdev_t **avd, uint64_t *txg) 3614185029Spjd{ 3615219089Spjd for (int c = 0; c < vd->vdev_children; c++) 3616219089Spjd spa_alt_rootvdev(vd->vdev_child[c], avd, txg); 3617185029Spjd 3618219089Spjd if (vd->vdev_ops->vdev_op_leaf) { 3619219089Spjd nvlist_t *label; 3620219089Spjd uint64_t label_txg; 3621185029Spjd 3622219089Spjd if (vdev_disk_read_rootlabel(vd->vdev_physpath, vd->vdev_devid, 3623219089Spjd &label) != 0) 3624219089Spjd return; 3625185029Spjd 3626219089Spjd VERIFY(nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_TXG, 3627219089Spjd &label_txg) == 0); 3628168404Spjd 3629219089Spjd /* 3630219089Spjd * Do we have a better boot device? 3631219089Spjd */ 3632219089Spjd if (label_txg > *txg) { 3633219089Spjd *txg = label_txg; 3634219089Spjd *avd = vd; 3635185029Spjd } 3636219089Spjd nvlist_free(label); 3637185029Spjd } 3638185029Spjd} 3639185029Spjd 3640185029Spjd/* 3641185029Spjd * Import a root pool. 3642185029Spjd * 3643185029Spjd * For x86. devpath_list will consist of devid and/or physpath name of 3644185029Spjd * the vdev (e.g. "id1,sd@SSEAGATE..." or "/pci@1f,0/ide@d/disk@0,0:a"). 3645185029Spjd * The GRUB "findroot" command will return the vdev we should boot. 3646185029Spjd * 3647185029Spjd * For Sparc, devpath_list consists the physpath name of the booting device 3648185029Spjd * no matter the rootpool is a single device pool or a mirrored pool. 3649185029Spjd * e.g. 3650185029Spjd * "/pci@1f,0/ide@d/disk@0,0:a" 3651185029Spjd */ 3652185029Spjdint 3653185029Spjdspa_import_rootpool(char *devpath, char *devid) 3654185029Spjd{ 3655219089Spjd spa_t *spa; 3656219089Spjd vdev_t *rvd, *bvd, *avd = NULL; 3657219089Spjd nvlist_t *config, *nvtop; 3658219089Spjd uint64_t guid, txg; 3659185029Spjd char *pname; 3660185029Spjd int error; 3661185029Spjd 3662185029Spjd /* 3663219089Spjd * Read the label from the boot device and generate a configuration. 3664185029Spjd */ 3665219089Spjd config = spa_generate_rootconf(devpath, devid, &guid); 3666219089Spjd#if defined(_OBP) && defined(_KERNEL) 3667219089Spjd if (config == NULL) { 3668219089Spjd if (strstr(devpath, "/iscsi/ssd") != NULL) { 3669219089Spjd /* iscsi boot */ 3670219089Spjd get_iscsi_bootpath_phy(devpath); 3671219089Spjd config = spa_generate_rootconf(devpath, devid, &guid); 3672219089Spjd } 3673219089Spjd } 3674219089Spjd#endif 3675219089Spjd if (config == NULL) { 3676236884Smm cmn_err(CE_NOTE, "Cannot read the pool label from '%s'", 3677219089Spjd devpath); 3678219089Spjd return (EIO); 3679219089Spjd } 3680185029Spjd 3681219089Spjd VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, 3682219089Spjd &pname) == 0); 3683219089Spjd VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); 3684185029Spjd 3685209962Smm mutex_enter(&spa_namespace_lock); 3686209962Smm if ((spa = spa_lookup(pname)) != NULL) { 3687209962Smm /* 3688209962Smm * Remove the existing root pool from the namespace so that we 3689209962Smm * can replace it with the correct config we just read in. 3690209962Smm */ 3691209962Smm spa_remove(spa); 3692209962Smm } 3693185029Spjd 3694219089Spjd spa = spa_add(pname, config, NULL); 3695209962Smm spa->spa_is_root = B_TRUE; 3696219089Spjd spa->spa_import_flags = ZFS_IMPORT_VERBATIM; 3697209962Smm 3698219089Spjd /* 3699219089Spjd * Build up a vdev tree based on the boot device's label config. 3700219089Spjd */ 3701219089Spjd VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 3702219089Spjd &nvtop) == 0); 3703219089Spjd spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 3704219089Spjd error = spa_config_parse(spa, &rvd, nvtop, NULL, 0, 3705219089Spjd VDEV_ALLOC_ROOTPOOL); 3706219089Spjd spa_config_exit(spa, SCL_ALL, FTAG); 3707219089Spjd if (error) { 3708209962Smm mutex_exit(&spa_namespace_lock); 3709219089Spjd nvlist_free(config); 3710219089Spjd cmn_err(CE_NOTE, "Can not parse the config for pool '%s'", 3711219089Spjd pname); 3712219089Spjd return (error); 3713209962Smm } 3714209962Smm 3715219089Spjd /* 3716219089Spjd * Get the boot vdev. 3717219089Spjd */ 3718219089Spjd if ((bvd = vdev_lookup_by_guid(rvd, guid)) == NULL) { 3719219089Spjd cmn_err(CE_NOTE, "Can not find the boot vdev for guid %llu", 3720219089Spjd (u_longlong_t)guid); 3721219089Spjd error = ENOENT; 3722219089Spjd goto out; 3723219089Spjd } 3724209962Smm 3725219089Spjd /* 3726219089Spjd * Determine if there is a better boot device. 3727219089Spjd */ 3728219089Spjd avd = bvd; 3729219089Spjd spa_alt_rootvdev(rvd, &avd, &txg); 3730219089Spjd if (avd != bvd) { 3731219089Spjd cmn_err(CE_NOTE, "The boot device is 'degraded'. Please " 3732219089Spjd "try booting from '%s'", avd->vdev_path); 3733219089Spjd error = EINVAL; 3734219089Spjd goto out; 3735219089Spjd } 3736209962Smm 3737219089Spjd /* 3738219089Spjd * If the boot device is part of a spare vdev then ensure that 3739219089Spjd * we're booting off the active spare. 3740219089Spjd */ 3741219089Spjd if (bvd->vdev_parent->vdev_ops == &vdev_spare_ops && 3742219089Spjd !bvd->vdev_isspare) { 3743219089Spjd cmn_err(CE_NOTE, "The boot device is currently spared. Please " 3744219089Spjd "try booting from '%s'", 3745219089Spjd bvd->vdev_parent-> 3746219089Spjd vdev_child[bvd->vdev_parent->vdev_children - 1]->vdev_path); 3747219089Spjd error = EINVAL; 3748219089Spjd goto out; 3749219089Spjd } 3750209962Smm 3751219089Spjd error = 0; 3752219089Spjd spa_history_log_version(spa, LOG_POOL_IMPORT); 3753219089Spjdout: 3754219089Spjd spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 3755219089Spjd vdev_free(rvd); 3756219089Spjd spa_config_exit(spa, SCL_ALL, FTAG); 3757209962Smm mutex_exit(&spa_namespace_lock); 3758209962Smm 3759219089Spjd nvlist_free(config); 3760219089Spjd return (error); 3761185029Spjd} 3762185029Spjd 3763241286Savg#else 3764241286Savg 3765243502Savgextern int vdev_geom_read_pool_label(const char *name, nvlist_t ***configs, 3766243502Savg uint64_t *count); 3767241286Savg 3768241286Savgstatic nvlist_t * 3769241286Savgspa_generate_rootconf(const char *name) 3770241286Savg{ 3771243502Savg nvlist_t **configs, **tops; 3772241286Savg nvlist_t *config; 3773243502Savg nvlist_t *best_cfg, *nvtop, *nvroot; 3774243502Savg uint64_t *holes; 3775243502Savg uint64_t best_txg; 3776243213Savg uint64_t nchildren; 3777241286Savg uint64_t pgid; 3778243502Savg uint64_t count; 3779243502Savg uint64_t i; 3780243502Savg uint_t nholes; 3781241286Savg 3782243502Savg if (vdev_geom_read_pool_label(name, &configs, &count) != 0) 3783241286Savg return (NULL); 3784241286Savg 3785243502Savg ASSERT3U(count, !=, 0); 3786243502Savg best_txg = 0; 3787243502Savg for (i = 0; i < count; i++) { 3788243502Savg uint64_t txg; 3789243502Savg 3790243502Savg VERIFY(nvlist_lookup_uint64(configs[i], ZPOOL_CONFIG_POOL_TXG, 3791243502Savg &txg) == 0); 3792243502Savg if (txg > best_txg) { 3793243502Savg best_txg = txg; 3794243502Savg best_cfg = configs[i]; 3795243502Savg } 3796243502Savg } 3797243502Savg 3798241286Savg /* 3799243213Savg * Multi-vdev root pool configuration discovery is not supported yet. 3800243213Savg */ 3801245945Savg nchildren = 1; 3802245945Savg nvlist_lookup_uint64(best_cfg, ZPOOL_CONFIG_VDEV_CHILDREN, &nchildren); 3803243502Savg holes = NULL; 3804243502Savg nvlist_lookup_uint64_array(best_cfg, ZPOOL_CONFIG_HOLE_ARRAY, 3805243502Savg &holes, &nholes); 3806243502Savg 3807244635Savg tops = kmem_zalloc(nchildren * sizeof(void *), KM_SLEEP); 3808243502Savg for (i = 0; i < nchildren; i++) { 3809243502Savg if (i >= count) 3810243502Savg break; 3811243502Savg if (configs[i] == NULL) 3812243502Savg continue; 3813243502Savg VERIFY(nvlist_lookup_nvlist(configs[i], ZPOOL_CONFIG_VDEV_TREE, 3814243502Savg &nvtop) == 0); 3815243502Savg nvlist_dup(nvtop, &tops[i], KM_SLEEP); 3816243213Savg } 3817243502Savg for (i = 0; holes != NULL && i < nholes; i++) { 3818243502Savg if (i >= nchildren) 3819243502Savg continue; 3820243502Savg if (tops[holes[i]] != NULL) 3821243502Savg continue; 3822243502Savg nvlist_alloc(&tops[holes[i]], NV_UNIQUE_NAME, KM_SLEEP); 3823243502Savg VERIFY(nvlist_add_string(tops[holes[i]], ZPOOL_CONFIG_TYPE, 3824243502Savg VDEV_TYPE_HOLE) == 0); 3825243502Savg VERIFY(nvlist_add_uint64(tops[holes[i]], ZPOOL_CONFIG_ID, 3826243502Savg holes[i]) == 0); 3827243502Savg VERIFY(nvlist_add_uint64(tops[holes[i]], ZPOOL_CONFIG_GUID, 3828243502Savg 0) == 0); 3829243502Savg } 3830243502Savg for (i = 0; i < nchildren; i++) { 3831243502Savg if (tops[i] != NULL) 3832243502Savg continue; 3833243502Savg nvlist_alloc(&tops[i], NV_UNIQUE_NAME, KM_SLEEP); 3834243502Savg VERIFY(nvlist_add_string(tops[i], ZPOOL_CONFIG_TYPE, 3835243502Savg VDEV_TYPE_MISSING) == 0); 3836243502Savg VERIFY(nvlist_add_uint64(tops[i], ZPOOL_CONFIG_ID, 3837243502Savg i) == 0); 3838243502Savg VERIFY(nvlist_add_uint64(tops[i], ZPOOL_CONFIG_GUID, 3839243502Savg 0) == 0); 3840243502Savg } 3841243213Savg 3842243213Savg /* 3843243502Savg * Create pool config based on the best vdev config. 3844241286Savg */ 3845243502Savg nvlist_dup(best_cfg, &config, KM_SLEEP); 3846241286Savg 3847241286Savg /* 3848241286Savg * Put this pool's top-level vdevs into a root vdev. 3849241286Savg */ 3850243502Savg VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, 3851243502Savg &pgid) == 0); 3852241286Savg VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); 3853241286Savg VERIFY(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, 3854241286Savg VDEV_TYPE_ROOT) == 0); 3855241286Savg VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) == 0); 3856241286Savg VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, pgid) == 0); 3857241286Savg VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 3858243502Savg tops, nchildren) == 0); 3859241286Savg 3860241286Savg /* 3861241286Savg * Replace the existing vdev_tree with the new root vdev in 3862241286Savg * this pool's configuration (remove the old, add the new). 3863241286Savg */ 3864241286Savg VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0); 3865243502Savg 3866243502Savg /* 3867243502Savg * Drop vdev config elements that should not be present at pool level. 3868243502Savg */ 3869243502Savg nvlist_remove(config, ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64); 3870243502Savg nvlist_remove(config, ZPOOL_CONFIG_TOP_GUID, DATA_TYPE_UINT64); 3871243502Savg 3872243502Savg for (i = 0; i < count; i++) 3873243502Savg nvlist_free(configs[i]); 3874243502Savg kmem_free(configs, count * sizeof(void *)); 3875243502Savg for (i = 0; i < nchildren; i++) 3876243502Savg nvlist_free(tops[i]); 3877243502Savg kmem_free(tops, nchildren * sizeof(void *)); 3878241286Savg nvlist_free(nvroot); 3879241286Savg return (config); 3880241286Savg} 3881241286Savg 3882241286Savgint 3883241286Savgspa_import_rootpool(const char *name) 3884241286Savg{ 3885241286Savg spa_t *spa; 3886241286Savg vdev_t *rvd, *bvd, *avd = NULL; 3887241286Savg nvlist_t *config, *nvtop; 3888241286Savg uint64_t txg; 3889241286Savg char *pname; 3890241286Savg int error; 3891241286Savg 3892241286Savg /* 3893241286Savg * Read the label from the boot device and generate a configuration. 3894241286Savg */ 3895241286Savg config = spa_generate_rootconf(name); 3896243213Savg 3897243213Savg mutex_enter(&spa_namespace_lock); 3898243213Savg if (config != NULL) { 3899243213Savg VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, 3900243213Savg &pname) == 0 && strcmp(name, pname) == 0); 3901243213Savg VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg) 3902243213Savg == 0); 3903243213Savg 3904243213Savg if ((spa = spa_lookup(pname)) != NULL) { 3905243213Savg /* 3906243213Savg * Remove the existing root pool from the namespace so 3907243213Savg * that we can replace it with the correct config 3908243213Savg * we just read in. 3909243213Savg */ 3910243213Savg spa_remove(spa); 3911243213Savg } 3912243213Savg spa = spa_add(pname, config, NULL); 3913243501Savg 3914243501Savg /* 3915243501Savg * Set spa_ubsync.ub_version as it can be used in vdev_alloc() 3916243501Savg * via spa_version(). 3917243501Savg */ 3918243501Savg if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, 3919243501Savg &spa->spa_ubsync.ub_version) != 0) 3920243501Savg spa->spa_ubsync.ub_version = SPA_VERSION_INITIAL; 3921243213Savg } else if ((spa = spa_lookup(name)) == NULL) { 3922241286Savg cmn_err(CE_NOTE, "Cannot find the pool label for '%s'", 3923241286Savg name); 3924241286Savg return (EIO); 3925243213Savg } else { 3926243213Savg VERIFY(nvlist_dup(spa->spa_config, &config, KM_SLEEP) == 0); 3927241286Savg } 3928241286Savg spa->spa_is_root = B_TRUE; 3929241286Savg spa->spa_import_flags = ZFS_IMPORT_VERBATIM; 3930241286Savg 3931241286Savg /* 3932241286Savg * Build up a vdev tree based on the boot device's label config. 3933241286Savg */ 3934241286Savg VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 3935241286Savg &nvtop) == 0); 3936241286Savg spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 3937241286Savg error = spa_config_parse(spa, &rvd, nvtop, NULL, 0, 3938241286Savg VDEV_ALLOC_ROOTPOOL); 3939241286Savg spa_config_exit(spa, SCL_ALL, FTAG); 3940241286Savg if (error) { 3941241286Savg mutex_exit(&spa_namespace_lock); 3942241286Savg nvlist_free(config); 3943241286Savg cmn_err(CE_NOTE, "Can not parse the config for pool '%s'", 3944241286Savg pname); 3945241286Savg return (error); 3946241286Savg } 3947241286Savg 3948241286Savg spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 3949241286Savg vdev_free(rvd); 3950241286Savg spa_config_exit(spa, SCL_ALL, FTAG); 3951241286Savg mutex_exit(&spa_namespace_lock); 3952241286Savg 3953243213Savg nvlist_free(config); 3954243213Savg return (0); 3955241286Savg} 3956241286Savg 3957241286Savg#endif /* sun */ 3958219089Spjd#endif 3959219089Spjd 3960209962Smm/* 3961209962Smm * Import a non-root pool into the system. 3962209962Smm */ 3963185029Spjdint 3964219089Spjdspa_import(const char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags) 3965185029Spjd{ 3966209962Smm spa_t *spa; 3967209962Smm char *altroot = NULL; 3968219089Spjd spa_load_state_t state = SPA_LOAD_IMPORT; 3969219089Spjd zpool_rewind_policy_t policy; 3970219089Spjd uint64_t mode = spa_mode_global; 3971219089Spjd uint64_t readonly = B_FALSE; 3972209962Smm int error; 3973209962Smm nvlist_t *nvroot; 3974209962Smm nvlist_t **spares, **l2cache; 3975209962Smm uint_t nspares, nl2cache; 3976209962Smm 3977209962Smm /* 3978209962Smm * If a pool with this name exists, return failure. 3979209962Smm */ 3980209962Smm mutex_enter(&spa_namespace_lock); 3981219089Spjd if (spa_lookup(pool) != NULL) { 3982209962Smm mutex_exit(&spa_namespace_lock); 3983209962Smm return (EEXIST); 3984209962Smm } 3985209962Smm 3986209962Smm /* 3987209962Smm * Create and initialize the spa structure. 3988209962Smm */ 3989209962Smm (void) nvlist_lookup_string(props, 3990209962Smm zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 3991219089Spjd (void) nvlist_lookup_uint64(props, 3992219089Spjd zpool_prop_to_name(ZPOOL_PROP_READONLY), &readonly); 3993219089Spjd if (readonly) 3994219089Spjd mode = FREAD; 3995219089Spjd spa = spa_add(pool, config, altroot); 3996219089Spjd spa->spa_import_flags = flags; 3997209962Smm 3998209962Smm /* 3999219089Spjd * Verbatim import - Take a pool and insert it into the namespace 4000219089Spjd * as if it had been loaded at boot. 4001219089Spjd */ 4002219089Spjd if (spa->spa_import_flags & ZFS_IMPORT_VERBATIM) { 4003219089Spjd if (props != NULL) 4004219089Spjd spa_configfile_set(spa, props, B_FALSE); 4005219089Spjd 4006219089Spjd spa_config_sync(spa, B_FALSE, B_TRUE); 4007219089Spjd 4008219089Spjd mutex_exit(&spa_namespace_lock); 4009219089Spjd spa_history_log_version(spa, LOG_POOL_IMPORT); 4010219089Spjd 4011219089Spjd return (0); 4012219089Spjd } 4013219089Spjd 4014219089Spjd spa_activate(spa, mode); 4015219089Spjd 4016219089Spjd /* 4017209962Smm * Don't start async tasks until we know everything is healthy. 4018209962Smm */ 4019209962Smm spa_async_suspend(spa); 4020209962Smm 4021219089Spjd zpool_get_rewind_policy(config, &policy); 4022219089Spjd if (policy.zrp_request & ZPOOL_DO_REWIND) 4023219089Spjd state = SPA_LOAD_RECOVER; 4024219089Spjd 4025209962Smm /* 4026209962Smm * Pass off the heavy lifting to spa_load(). Pass TRUE for mosconfig 4027209962Smm * because the user-supplied config is actually the one to trust when 4028209962Smm * doing an import. 4029209962Smm */ 4030219089Spjd if (state != SPA_LOAD_RECOVER) 4031219089Spjd spa->spa_last_ubsync_txg = spa->spa_load_txg = 0; 4032209962Smm 4033219089Spjd error = spa_load_best(spa, state, B_TRUE, policy.zrp_txg, 4034219089Spjd policy.zrp_request); 4035219089Spjd 4036219089Spjd /* 4037219089Spjd * Propagate anything learned while loading the pool and pass it 4038219089Spjd * back to caller (i.e. rewind info, missing devices, etc). 4039219089Spjd */ 4040219089Spjd VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, 4041219089Spjd spa->spa_load_info) == 0); 4042219089Spjd 4043209962Smm spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 4044209962Smm /* 4045209962Smm * Toss any existing sparelist, as it doesn't have any validity 4046209962Smm * anymore, and conflicts with spa_has_spare(). 4047209962Smm */ 4048209962Smm if (spa->spa_spares.sav_config) { 4049209962Smm nvlist_free(spa->spa_spares.sav_config); 4050209962Smm spa->spa_spares.sav_config = NULL; 4051209962Smm spa_load_spares(spa); 4052209962Smm } 4053209962Smm if (spa->spa_l2cache.sav_config) { 4054209962Smm nvlist_free(spa->spa_l2cache.sav_config); 4055209962Smm spa->spa_l2cache.sav_config = NULL; 4056209962Smm spa_load_l2cache(spa); 4057209962Smm } 4058209962Smm 4059209962Smm VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 4060209962Smm &nvroot) == 0); 4061209962Smm if (error == 0) 4062209962Smm error = spa_validate_aux(spa, nvroot, -1ULL, 4063209962Smm VDEV_ALLOC_SPARE); 4064209962Smm if (error == 0) 4065209962Smm error = spa_validate_aux(spa, nvroot, -1ULL, 4066209962Smm VDEV_ALLOC_L2CACHE); 4067209962Smm spa_config_exit(spa, SCL_ALL, FTAG); 4068209962Smm 4069209962Smm if (props != NULL) 4070209962Smm spa_configfile_set(spa, props, B_FALSE); 4071209962Smm 4072209962Smm if (error != 0 || (props && spa_writeable(spa) && 4073209962Smm (error = spa_prop_set(spa, props)))) { 4074209962Smm spa_unload(spa); 4075209962Smm spa_deactivate(spa); 4076209962Smm spa_remove(spa); 4077209962Smm mutex_exit(&spa_namespace_lock); 4078209962Smm return (error); 4079209962Smm } 4080209962Smm 4081209962Smm spa_async_resume(spa); 4082209962Smm 4083209962Smm /* 4084209962Smm * Override any spares and level 2 cache devices as specified by 4085209962Smm * the user, as these may have correct device names/devids, etc. 4086209962Smm */ 4087209962Smm if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 4088209962Smm &spares, &nspares) == 0) { 4089209962Smm if (spa->spa_spares.sav_config) 4090209962Smm VERIFY(nvlist_remove(spa->spa_spares.sav_config, 4091209962Smm ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0); 4092209962Smm else 4093209962Smm VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, 4094209962Smm NV_UNIQUE_NAME, KM_SLEEP) == 0); 4095209962Smm VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, 4096209962Smm ZPOOL_CONFIG_SPARES, spares, nspares) == 0); 4097209962Smm spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 4098209962Smm spa_load_spares(spa); 4099209962Smm spa_config_exit(spa, SCL_ALL, FTAG); 4100209962Smm spa->spa_spares.sav_sync = B_TRUE; 4101209962Smm } 4102209962Smm if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, 4103209962Smm &l2cache, &nl2cache) == 0) { 4104209962Smm if (spa->spa_l2cache.sav_config) 4105209962Smm VERIFY(nvlist_remove(spa->spa_l2cache.sav_config, 4106209962Smm ZPOOL_CONFIG_L2CACHE, DATA_TYPE_NVLIST_ARRAY) == 0); 4107209962Smm else 4108209962Smm VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, 4109209962Smm NV_UNIQUE_NAME, KM_SLEEP) == 0); 4110209962Smm VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, 4111209962Smm ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); 4112209962Smm spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 4113209962Smm spa_load_l2cache(spa); 4114209962Smm spa_config_exit(spa, SCL_ALL, FTAG); 4115209962Smm spa->spa_l2cache.sav_sync = B_TRUE; 4116209962Smm } 4117209962Smm 4118219089Spjd /* 4119219089Spjd * Check for any removed devices. 4120219089Spjd */ 4121219089Spjd if (spa->spa_autoreplace) { 4122219089Spjd spa_aux_check_removed(&spa->spa_spares); 4123219089Spjd spa_aux_check_removed(&spa->spa_l2cache); 4124219089Spjd } 4125219089Spjd 4126209962Smm if (spa_writeable(spa)) { 4127209962Smm /* 4128209962Smm * Update the config cache to include the newly-imported pool. 4129209962Smm */ 4130209962Smm spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 4131209962Smm } 4132209962Smm 4133219089Spjd /* 4134219089Spjd * It's possible that the pool was expanded while it was exported. 4135219089Spjd * We kick off an async task to handle this for us. 4136219089Spjd */ 4137219089Spjd spa_async_request(spa, SPA_ASYNC_AUTOEXPAND); 4138219089Spjd 4139209962Smm mutex_exit(&spa_namespace_lock); 4140219089Spjd spa_history_log_version(spa, LOG_POOL_IMPORT); 4141209962Smm 4142219089Spjd#ifdef __FreeBSD__ 4143219089Spjd#ifdef _KERNEL 4144219089Spjd zvol_create_minors(pool); 4145219089Spjd#endif 4146219089Spjd#endif 4147209962Smm return (0); 4148185029Spjd} 4149185029Spjd 4150168404Spjdnvlist_t * 4151168404Spjdspa_tryimport(nvlist_t *tryconfig) 4152168404Spjd{ 4153168404Spjd nvlist_t *config = NULL; 4154168404Spjd char *poolname; 4155168404Spjd spa_t *spa; 4156168404Spjd uint64_t state; 4157208443Smm int error; 4158168404Spjd 4159168404Spjd if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_POOL_NAME, &poolname)) 4160168404Spjd return (NULL); 4161168404Spjd 4162168404Spjd if (nvlist_lookup_uint64(tryconfig, ZPOOL_CONFIG_POOL_STATE, &state)) 4163168404Spjd return (NULL); 4164168404Spjd 4165168404Spjd /* 4166168404Spjd * Create and initialize the spa structure. 4167168404Spjd */ 4168168404Spjd mutex_enter(&spa_namespace_lock); 4169219089Spjd spa = spa_add(TRYIMPORT_NAME, tryconfig, NULL); 4170209962Smm spa_activate(spa, FREAD); 4171168404Spjd 4172168404Spjd /* 4173168404Spjd * Pass off the heavy lifting to spa_load(). 4174168404Spjd * Pass TRUE for mosconfig because the user-supplied config 4175168404Spjd * is actually the one to trust when doing an import. 4176168404Spjd */ 4177219089Spjd error = spa_load(spa, SPA_LOAD_TRYIMPORT, SPA_IMPORT_EXISTING, B_TRUE); 4178168404Spjd 4179168404Spjd /* 4180168404Spjd * If 'tryconfig' was at least parsable, return the current config. 4181168404Spjd */ 4182168404Spjd if (spa->spa_root_vdev != NULL) { 4183168404Spjd config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); 4184168404Spjd VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, 4185168404Spjd poolname) == 0); 4186168404Spjd VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, 4187168404Spjd state) == 0); 4188168498Spjd VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP, 4189168498Spjd spa->spa_uberblock.ub_timestamp) == 0); 4190236884Smm VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, 4191236884Smm spa->spa_load_info) == 0); 4192168404Spjd 4193168404Spjd /* 4194185029Spjd * If the bootfs property exists on this pool then we 4195185029Spjd * copy it out so that external consumers can tell which 4196185029Spjd * pools are bootable. 4197168404Spjd */ 4198208443Smm if ((!error || error == EEXIST) && spa->spa_bootfs) { 4199185029Spjd char *tmpname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4200185029Spjd 4201185029Spjd /* 4202185029Spjd * We have to play games with the name since the 4203185029Spjd * pool was opened as TRYIMPORT_NAME. 4204185029Spjd */ 4205185029Spjd if (dsl_dsobj_to_dsname(spa_name(spa), 4206185029Spjd spa->spa_bootfs, tmpname) == 0) { 4207185029Spjd char *cp; 4208185029Spjd char *dsname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4209185029Spjd 4210185029Spjd cp = strchr(tmpname, '/'); 4211185029Spjd if (cp == NULL) { 4212185029Spjd (void) strlcpy(dsname, tmpname, 4213185029Spjd MAXPATHLEN); 4214185029Spjd } else { 4215185029Spjd (void) snprintf(dsname, MAXPATHLEN, 4216185029Spjd "%s/%s", poolname, ++cp); 4217185029Spjd } 4218185029Spjd VERIFY(nvlist_add_string(config, 4219185029Spjd ZPOOL_CONFIG_BOOTFS, dsname) == 0); 4220185029Spjd kmem_free(dsname, MAXPATHLEN); 4221185029Spjd } 4222185029Spjd kmem_free(tmpname, MAXPATHLEN); 4223185029Spjd } 4224185029Spjd 4225185029Spjd /* 4226185029Spjd * Add the list of hot spares and level 2 cache devices. 4227185029Spjd */ 4228209962Smm spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 4229168404Spjd spa_add_spares(spa, config); 4230185029Spjd spa_add_l2cache(spa, config); 4231209962Smm spa_config_exit(spa, SCL_CONFIG, FTAG); 4232168404Spjd } 4233168404Spjd 4234168404Spjd spa_unload(spa); 4235168404Spjd spa_deactivate(spa); 4236168404Spjd spa_remove(spa); 4237168404Spjd mutex_exit(&spa_namespace_lock); 4238168404Spjd 4239168404Spjd return (config); 4240168404Spjd} 4241168404Spjd 4242168404Spjd/* 4243168404Spjd * Pool export/destroy 4244168404Spjd * 4245168404Spjd * The act of destroying or exporting a pool is very simple. We make sure there 4246168404Spjd * is no more pending I/O and any references to the pool are gone. Then, we 4247168404Spjd * update the pool state and sync all the labels to disk, removing the 4248207670Smm * configuration from the cache afterwards. If the 'hardforce' flag is set, then 4249207670Smm * we don't sync the labels or remove the configuration cache. 4250168404Spjd */ 4251168404Spjdstatic int 4252185029Spjdspa_export_common(char *pool, int new_state, nvlist_t **oldconfig, 4253207670Smm boolean_t force, boolean_t hardforce) 4254168404Spjd{ 4255168404Spjd spa_t *spa; 4256168404Spjd 4257168404Spjd if (oldconfig) 4258168404Spjd *oldconfig = NULL; 4259168404Spjd 4260209962Smm if (!(spa_mode_global & FWRITE)) 4261168404Spjd return (EROFS); 4262168404Spjd 4263168404Spjd mutex_enter(&spa_namespace_lock); 4264168404Spjd if ((spa = spa_lookup(pool)) == NULL) { 4265168404Spjd mutex_exit(&spa_namespace_lock); 4266168404Spjd return (ENOENT); 4267168404Spjd } 4268168404Spjd 4269168404Spjd /* 4270168404Spjd * Put a hold on the pool, drop the namespace lock, stop async tasks, 4271168404Spjd * reacquire the namespace lock, and see if we can export. 4272168404Spjd */ 4273168404Spjd spa_open_ref(spa, FTAG); 4274168404Spjd mutex_exit(&spa_namespace_lock); 4275168404Spjd spa_async_suspend(spa); 4276168404Spjd mutex_enter(&spa_namespace_lock); 4277168404Spjd spa_close(spa, FTAG); 4278168404Spjd 4279168404Spjd /* 4280168404Spjd * The pool will be in core if it's openable, 4281168404Spjd * in which case we can modify its state. 4282168404Spjd */ 4283168404Spjd if (spa->spa_state != POOL_STATE_UNINITIALIZED && spa->spa_sync_on) { 4284168404Spjd /* 4285168404Spjd * Objsets may be open only because they're dirty, so we 4286168404Spjd * have to force it to sync before checking spa_refcnt. 4287168404Spjd */ 4288168404Spjd txg_wait_synced(spa->spa_dsl_pool, 0); 4289168404Spjd 4290168404Spjd /* 4291168404Spjd * A pool cannot be exported or destroyed if there are active 4292168404Spjd * references. If we are resetting a pool, allow references by 4293168404Spjd * fault injection handlers. 4294168404Spjd */ 4295168404Spjd if (!spa_refcount_zero(spa) || 4296168404Spjd (spa->spa_inject_ref != 0 && 4297168404Spjd new_state != POOL_STATE_UNINITIALIZED)) { 4298168404Spjd spa_async_resume(spa); 4299168404Spjd mutex_exit(&spa_namespace_lock); 4300168404Spjd return (EBUSY); 4301168404Spjd } 4302168404Spjd 4303185029Spjd /* 4304185029Spjd * A pool cannot be exported if it has an active shared spare. 4305185029Spjd * This is to prevent other pools stealing the active spare 4306185029Spjd * from an exported pool. At user's own will, such pool can 4307185029Spjd * be forcedly exported. 4308185029Spjd */ 4309185029Spjd if (!force && new_state == POOL_STATE_EXPORTED && 4310185029Spjd spa_has_active_shared_spare(spa)) { 4311185029Spjd spa_async_resume(spa); 4312185029Spjd mutex_exit(&spa_namespace_lock); 4313185029Spjd return (EXDEV); 4314185029Spjd } 4315168404Spjd 4316168404Spjd /* 4317168404Spjd * We want this to be reflected on every label, 4318168404Spjd * so mark them all dirty. spa_unload() will do the 4319168404Spjd * final sync that pushes these changes out. 4320168404Spjd */ 4321207670Smm if (new_state != POOL_STATE_UNINITIALIZED && !hardforce) { 4322185029Spjd spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 4323168404Spjd spa->spa_state = new_state; 4324219089Spjd spa->spa_final_txg = spa_last_synced_txg(spa) + 4325219089Spjd TXG_DEFER_SIZE + 1; 4326168404Spjd vdev_config_dirty(spa->spa_root_vdev); 4327185029Spjd spa_config_exit(spa, SCL_ALL, FTAG); 4328168404Spjd } 4329168404Spjd } 4330168404Spjd 4331185029Spjd spa_event_notify(spa, NULL, ESC_ZFS_POOL_DESTROY); 4332185029Spjd 4333168404Spjd if (spa->spa_state != POOL_STATE_UNINITIALIZED) { 4334168404Spjd spa_unload(spa); 4335168404Spjd spa_deactivate(spa); 4336168404Spjd } 4337168404Spjd 4338168404Spjd if (oldconfig && spa->spa_config) 4339168404Spjd VERIFY(nvlist_dup(spa->spa_config, oldconfig, 0) == 0); 4340168404Spjd 4341168404Spjd if (new_state != POOL_STATE_UNINITIALIZED) { 4342207670Smm if (!hardforce) 4343207670Smm spa_config_sync(spa, B_TRUE, B_TRUE); 4344168404Spjd spa_remove(spa); 4345168404Spjd } 4346168404Spjd mutex_exit(&spa_namespace_lock); 4347168404Spjd 4348168404Spjd return (0); 4349168404Spjd} 4350168404Spjd 4351168404Spjd/* 4352168404Spjd * Destroy a storage pool. 4353168404Spjd */ 4354168404Spjdint 4355168404Spjdspa_destroy(char *pool) 4356168404Spjd{ 4357207670Smm return (spa_export_common(pool, POOL_STATE_DESTROYED, NULL, 4358207670Smm B_FALSE, B_FALSE)); 4359168404Spjd} 4360168404Spjd 4361168404Spjd/* 4362168404Spjd * Export a storage pool. 4363168404Spjd */ 4364168404Spjdint 4365207670Smmspa_export(char *pool, nvlist_t **oldconfig, boolean_t force, 4366207670Smm boolean_t hardforce) 4367168404Spjd{ 4368207670Smm return (spa_export_common(pool, POOL_STATE_EXPORTED, oldconfig, 4369207670Smm force, hardforce)); 4370168404Spjd} 4371168404Spjd 4372168404Spjd/* 4373168404Spjd * Similar to spa_export(), this unloads the spa_t without actually removing it 4374168404Spjd * from the namespace in any way. 4375168404Spjd */ 4376168404Spjdint 4377168404Spjdspa_reset(char *pool) 4378168404Spjd{ 4379185029Spjd return (spa_export_common(pool, POOL_STATE_UNINITIALIZED, NULL, 4380207670Smm B_FALSE, B_FALSE)); 4381168404Spjd} 4382168404Spjd 4383168404Spjd/* 4384168404Spjd * ========================================================================== 4385168404Spjd * Device manipulation 4386168404Spjd * ========================================================================== 4387168404Spjd */ 4388168404Spjd 4389168404Spjd/* 4390185029Spjd * Add a device to a storage pool. 4391168404Spjd */ 4392168404Spjdint 4393168404Spjdspa_vdev_add(spa_t *spa, nvlist_t *nvroot) 4394168404Spjd{ 4395219089Spjd uint64_t txg, id; 4396209962Smm int error; 4397168404Spjd vdev_t *rvd = spa->spa_root_vdev; 4398168404Spjd vdev_t *vd, *tvd; 4399185029Spjd nvlist_t **spares, **l2cache; 4400185029Spjd uint_t nspares, nl2cache; 4401168404Spjd 4402219089Spjd ASSERT(spa_writeable(spa)); 4403219089Spjd 4404168404Spjd txg = spa_vdev_enter(spa); 4405168404Spjd 4406168404Spjd if ((error = spa_config_parse(spa, &vd, nvroot, NULL, 0, 4407168404Spjd VDEV_ALLOC_ADD)) != 0) 4408168404Spjd return (spa_vdev_exit(spa, NULL, txg, error)); 4409168404Spjd 4410185029Spjd spa->spa_pending_vdev = vd; /* spa_vdev_exit() will clear this */ 4411168404Spjd 4412185029Spjd if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, 4413185029Spjd &nspares) != 0) 4414168404Spjd nspares = 0; 4415168404Spjd 4416185029Spjd if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache, 4417185029Spjd &nl2cache) != 0) 4418185029Spjd nl2cache = 0; 4419185029Spjd 4420185029Spjd if (vd->vdev_children == 0 && nspares == 0 && nl2cache == 0) 4421168404Spjd return (spa_vdev_exit(spa, vd, txg, EINVAL)); 4422168404Spjd 4423185029Spjd if (vd->vdev_children != 0 && 4424185029Spjd (error = vdev_create(vd, txg, B_FALSE)) != 0) 4425185029Spjd return (spa_vdev_exit(spa, vd, txg, error)); 4426168404Spjd 4427168404Spjd /* 4428185029Spjd * We must validate the spares and l2cache devices after checking the 4429185029Spjd * children. Otherwise, vdev_inuse() will blindly overwrite the spare. 4430168404Spjd */ 4431185029Spjd if ((error = spa_validate_aux(spa, nvroot, txg, VDEV_ALLOC_ADD)) != 0) 4432168404Spjd return (spa_vdev_exit(spa, vd, txg, error)); 4433168404Spjd 4434168404Spjd /* 4435168404Spjd * Transfer each new top-level vdev from vd to rvd. 4436168404Spjd */ 4437209962Smm for (int c = 0; c < vd->vdev_children; c++) { 4438219089Spjd 4439219089Spjd /* 4440219089Spjd * Set the vdev id to the first hole, if one exists. 4441219089Spjd */ 4442219089Spjd for (id = 0; id < rvd->vdev_children; id++) { 4443219089Spjd if (rvd->vdev_child[id]->vdev_ishole) { 4444219089Spjd vdev_free(rvd->vdev_child[id]); 4445219089Spjd break; 4446219089Spjd } 4447219089Spjd } 4448168404Spjd tvd = vd->vdev_child[c]; 4449168404Spjd vdev_remove_child(vd, tvd); 4450219089Spjd tvd->vdev_id = id; 4451168404Spjd vdev_add_child(rvd, tvd); 4452168404Spjd vdev_config_dirty(tvd); 4453168404Spjd } 4454168404Spjd 4455168404Spjd if (nspares != 0) { 4456185029Spjd spa_set_aux_vdevs(&spa->spa_spares, spares, nspares, 4457185029Spjd ZPOOL_CONFIG_SPARES); 4458168404Spjd spa_load_spares(spa); 4459185029Spjd spa->spa_spares.sav_sync = B_TRUE; 4460168404Spjd } 4461168404Spjd 4462185029Spjd if (nl2cache != 0) { 4463185029Spjd spa_set_aux_vdevs(&spa->spa_l2cache, l2cache, nl2cache, 4464185029Spjd ZPOOL_CONFIG_L2CACHE); 4465185029Spjd spa_load_l2cache(spa); 4466185029Spjd spa->spa_l2cache.sav_sync = B_TRUE; 4467185029Spjd } 4468185029Spjd 4469168404Spjd /* 4470168404Spjd * We have to be careful when adding new vdevs to an existing pool. 4471168404Spjd * If other threads start allocating from these vdevs before we 4472168404Spjd * sync the config cache, and we lose power, then upon reboot we may 4473168404Spjd * fail to open the pool because there are DVAs that the config cache 4474168404Spjd * can't translate. Therefore, we first add the vdevs without 4475168404Spjd * initializing metaslabs; sync the config cache (via spa_vdev_exit()); 4476168404Spjd * and then let spa_config_update() initialize the new metaslabs. 4477168404Spjd * 4478168404Spjd * spa_load() checks for added-but-not-initialized vdevs, so that 4479168404Spjd * if we lose power at any point in this sequence, the remaining 4480168404Spjd * steps will be completed the next time we load the pool. 4481168404Spjd */ 4482168404Spjd (void) spa_vdev_exit(spa, vd, txg, 0); 4483168404Spjd 4484168404Spjd mutex_enter(&spa_namespace_lock); 4485168404Spjd spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 4486168404Spjd mutex_exit(&spa_namespace_lock); 4487168404Spjd 4488168404Spjd return (0); 4489168404Spjd} 4490168404Spjd 4491168404Spjd/* 4492168404Spjd * Attach a device to a mirror. The arguments are the path to any device 4493168404Spjd * in the mirror, and the nvroot for the new device. If the path specifies 4494168404Spjd * a device that is not mirrored, we automatically insert the mirror vdev. 4495168404Spjd * 4496168404Spjd * If 'replacing' is specified, the new device is intended to replace the 4497168404Spjd * existing device; in this case the two devices are made into their own 4498185029Spjd * mirror using the 'replacing' vdev, which is functionally identical to 4499168404Spjd * the mirror vdev (it actually reuses all the same ops) but has a few 4500168404Spjd * extra rules: you can't attach to it after it's been created, and upon 4501168404Spjd * completion of resilvering, the first disk (the one being replaced) 4502168404Spjd * is automatically detached. 4503168404Spjd */ 4504168404Spjdint 4505168404Spjdspa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) 4506168404Spjd{ 4507219089Spjd uint64_t txg, dtl_max_txg; 4508168404Spjd vdev_t *rvd = spa->spa_root_vdev; 4509168404Spjd vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd; 4510168404Spjd vdev_ops_t *pvops; 4511185029Spjd char *oldvdpath, *newvdpath; 4512185029Spjd int newvd_isspare; 4513185029Spjd int error; 4514168404Spjd 4515219089Spjd ASSERT(spa_writeable(spa)); 4516219089Spjd 4517168404Spjd txg = spa_vdev_enter(spa); 4518168404Spjd 4519185029Spjd oldvd = spa_lookup_by_guid(spa, guid, B_FALSE); 4520168404Spjd 4521168404Spjd if (oldvd == NULL) 4522168404Spjd return (spa_vdev_exit(spa, NULL, txg, ENODEV)); 4523168404Spjd 4524168404Spjd if (!oldvd->vdev_ops->vdev_op_leaf) 4525168404Spjd return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 4526168404Spjd 4527168404Spjd pvd = oldvd->vdev_parent; 4528168404Spjd 4529168404Spjd if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0, 4530230514Smm VDEV_ALLOC_ATTACH)) != 0) 4531185029Spjd return (spa_vdev_exit(spa, NULL, txg, EINVAL)); 4532185029Spjd 4533185029Spjd if (newrootvd->vdev_children != 1) 4534168404Spjd return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); 4535168404Spjd 4536168404Spjd newvd = newrootvd->vdev_child[0]; 4537168404Spjd 4538168404Spjd if (!newvd->vdev_ops->vdev_op_leaf) 4539168404Spjd return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); 4540168404Spjd 4541168404Spjd if ((error = vdev_create(newrootvd, txg, replacing)) != 0) 4542168404Spjd return (spa_vdev_exit(spa, newrootvd, txg, error)); 4543168404Spjd 4544185029Spjd /* 4545185029Spjd * Spares can't replace logs 4546185029Spjd */ 4547185029Spjd if (oldvd->vdev_top->vdev_islog && newvd->vdev_isspare) 4548185029Spjd return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 4549185029Spjd 4550168404Spjd if (!replacing) { 4551168404Spjd /* 4552168404Spjd * For attach, the only allowable parent is a mirror or the root 4553168404Spjd * vdev. 4554168404Spjd */ 4555168404Spjd if (pvd->vdev_ops != &vdev_mirror_ops && 4556168404Spjd pvd->vdev_ops != &vdev_root_ops) 4557168404Spjd return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 4558168404Spjd 4559168404Spjd pvops = &vdev_mirror_ops; 4560168404Spjd } else { 4561168404Spjd /* 4562168404Spjd * Active hot spares can only be replaced by inactive hot 4563168404Spjd * spares. 4564168404Spjd */ 4565168404Spjd if (pvd->vdev_ops == &vdev_spare_ops && 4566219089Spjd oldvd->vdev_isspare && 4567168404Spjd !spa_has_spare(spa, newvd->vdev_guid)) 4568168404Spjd return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 4569168404Spjd 4570168404Spjd /* 4571168404Spjd * If the source is a hot spare, and the parent isn't already a 4572168404Spjd * spare, then we want to create a new hot spare. Otherwise, we 4573168404Spjd * want to create a replacing vdev. The user is not allowed to 4574168404Spjd * attach to a spared vdev child unless the 'isspare' state is 4575168404Spjd * the same (spare replaces spare, non-spare replaces 4576168404Spjd * non-spare). 4577168404Spjd */ 4578219089Spjd if (pvd->vdev_ops == &vdev_replacing_ops && 4579219089Spjd spa_version(spa) < SPA_VERSION_MULTI_REPLACE) { 4580168404Spjd return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 4581219089Spjd } else if (pvd->vdev_ops == &vdev_spare_ops && 4582219089Spjd newvd->vdev_isspare != oldvd->vdev_isspare) { 4583168404Spjd return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); 4584219089Spjd } 4585219089Spjd 4586219089Spjd if (newvd->vdev_isspare) 4587168404Spjd pvops = &vdev_spare_ops; 4588168404Spjd else 4589168404Spjd pvops = &vdev_replacing_ops; 4590168404Spjd } 4591168404Spjd 4592168404Spjd /* 4593219089Spjd * Make sure the new device is big enough. 4594168404Spjd */ 4595219089Spjd if (newvd->vdev_asize < vdev_get_min_asize(oldvd)) 4596168404Spjd return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW)); 4597168404Spjd 4598168404Spjd /* 4599168404Spjd * The new device cannot have a higher alignment requirement 4600168404Spjd * than the top-level vdev. 4601168404Spjd */ 4602168404Spjd if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift) 4603168404Spjd return (spa_vdev_exit(spa, newrootvd, txg, EDOM)); 4604168404Spjd 4605168404Spjd /* 4606168404Spjd * If this is an in-place replacement, update oldvd's path and devid 4607168404Spjd * to make it distinguishable from newvd, and unopenable from now on. 4608168404Spjd */ 4609168404Spjd if (strcmp(oldvd->vdev_path, newvd->vdev_path) == 0) { 4610168404Spjd spa_strfree(oldvd->vdev_path); 4611168404Spjd oldvd->vdev_path = kmem_alloc(strlen(newvd->vdev_path) + 5, 4612168404Spjd KM_SLEEP); 4613168404Spjd (void) sprintf(oldvd->vdev_path, "%s/%s", 4614168404Spjd newvd->vdev_path, "old"); 4615168404Spjd if (oldvd->vdev_devid != NULL) { 4616168404Spjd spa_strfree(oldvd->vdev_devid); 4617168404Spjd oldvd->vdev_devid = NULL; 4618168404Spjd } 4619168404Spjd } 4620168404Spjd 4621219089Spjd /* mark the device being resilvered */ 4622219089Spjd newvd->vdev_resilvering = B_TRUE; 4623219089Spjd 4624168404Spjd /* 4625168404Spjd * If the parent is not a mirror, or if we're replacing, insert the new 4626168404Spjd * mirror/replacing/spare vdev above oldvd. 4627168404Spjd */ 4628168404Spjd if (pvd->vdev_ops != pvops) 4629168404Spjd pvd = vdev_add_parent(oldvd, pvops); 4630168404Spjd 4631168404Spjd ASSERT(pvd->vdev_top->vdev_parent == rvd); 4632168404Spjd ASSERT(pvd->vdev_ops == pvops); 4633168404Spjd ASSERT(oldvd->vdev_parent == pvd); 4634168404Spjd 4635168404Spjd /* 4636168404Spjd * Extract the new device from its root and add it to pvd. 4637168404Spjd */ 4638168404Spjd vdev_remove_child(newrootvd, newvd); 4639168404Spjd newvd->vdev_id = pvd->vdev_children; 4640219089Spjd newvd->vdev_crtxg = oldvd->vdev_crtxg; 4641168404Spjd vdev_add_child(pvd, newvd); 4642168404Spjd 4643168404Spjd tvd = newvd->vdev_top; 4644168404Spjd ASSERT(pvd->vdev_top == tvd); 4645168404Spjd ASSERT(tvd->vdev_parent == rvd); 4646168404Spjd 4647168404Spjd vdev_config_dirty(tvd); 4648168404Spjd 4649168404Spjd /* 4650219089Spjd * Set newvd's DTL to [TXG_INITIAL, dtl_max_txg) so that we account 4651219089Spjd * for any dmu_sync-ed blocks. It will propagate upward when 4652219089Spjd * spa_vdev_exit() calls vdev_dtl_reassess(). 4653168404Spjd */ 4654219089Spjd dtl_max_txg = txg + TXG_CONCURRENT_STATES; 4655168404Spjd 4656219089Spjd vdev_dtl_dirty(newvd, DTL_MISSING, TXG_INITIAL, 4657219089Spjd dtl_max_txg - TXG_INITIAL); 4658168404Spjd 4659209962Smm if (newvd->vdev_isspare) { 4660168404Spjd spa_spare_activate(newvd); 4661209962Smm spa_event_notify(spa, newvd, ESC_ZFS_VDEV_SPARE); 4662209962Smm } 4663209962Smm 4664185029Spjd oldvdpath = spa_strdup(oldvd->vdev_path); 4665185029Spjd newvdpath = spa_strdup(newvd->vdev_path); 4666185029Spjd newvd_isspare = newvd->vdev_isspare; 4667168404Spjd 4668168404Spjd /* 4669168404Spjd * Mark newvd's DTL dirty in this txg. 4670168404Spjd */ 4671168404Spjd vdev_dirty(tvd, VDD_DTL, newvd, txg); 4672168404Spjd 4673219089Spjd /* 4674219089Spjd * Restart the resilver 4675219089Spjd */ 4676219089Spjd dsl_resilver_restart(spa->spa_dsl_pool, dtl_max_txg); 4677168404Spjd 4678219089Spjd /* 4679219089Spjd * Commit the config 4680219089Spjd */ 4681219089Spjd (void) spa_vdev_exit(spa, newrootvd, dtl_max_txg, 0); 4682185029Spjd 4683219089Spjd spa_history_log_internal(LOG_POOL_VDEV_ATTACH, spa, NULL, 4684219089Spjd "%s vdev=%s %s vdev=%s", 4685219089Spjd replacing && newvd_isspare ? "spare in" : 4686219089Spjd replacing ? "replace" : "attach", newvdpath, 4687219089Spjd replacing ? "for" : "to", oldvdpath); 4688219089Spjd 4689185029Spjd spa_strfree(oldvdpath); 4690185029Spjd spa_strfree(newvdpath); 4691185029Spjd 4692219089Spjd if (spa->spa_bootfs) 4693219089Spjd spa_event_notify(spa, newvd, ESC_ZFS_BOOTFS_VDEV_ATTACH); 4694168404Spjd 4695168404Spjd return (0); 4696168404Spjd} 4697168404Spjd 4698168404Spjd/* 4699168404Spjd * Detach a device from a mirror or replacing vdev. 4700168404Spjd * If 'replace_done' is specified, only detach if the parent 4701168404Spjd * is a replacing vdev. 4702168404Spjd */ 4703168404Spjdint 4704209962Smmspa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done) 4705168404Spjd{ 4706168404Spjd uint64_t txg; 4707209962Smm int error; 4708168404Spjd vdev_t *rvd = spa->spa_root_vdev; 4709168404Spjd vdev_t *vd, *pvd, *cvd, *tvd; 4710168404Spjd boolean_t unspare = B_FALSE; 4711247187Smm uint64_t unspare_guid = 0; 4712219089Spjd char *vdpath; 4713168404Spjd 4714219089Spjd ASSERT(spa_writeable(spa)); 4715219089Spjd 4716168404Spjd txg = spa_vdev_enter(spa); 4717168404Spjd 4718185029Spjd vd = spa_lookup_by_guid(spa, guid, B_FALSE); 4719168404Spjd 4720168404Spjd if (vd == NULL) 4721168404Spjd return (spa_vdev_exit(spa, NULL, txg, ENODEV)); 4722168404Spjd 4723168404Spjd if (!vd->vdev_ops->vdev_op_leaf) 4724168404Spjd return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 4725168404Spjd 4726168404Spjd pvd = vd->vdev_parent; 4727168404Spjd 4728168404Spjd /* 4729209962Smm * If the parent/child relationship is not as expected, don't do it. 4730209962Smm * Consider M(A,R(B,C)) -- that is, a mirror of A with a replacing 4731209962Smm * vdev that's replacing B with C. The user's intent in replacing 4732209962Smm * is to go from M(A,B) to M(A,C). If the user decides to cancel 4733209962Smm * the replace by detaching C, the expected behavior is to end up 4734209962Smm * M(A,B). But suppose that right after deciding to detach C, 4735209962Smm * the replacement of B completes. We would have M(A,C), and then 4736209962Smm * ask to detach C, which would leave us with just A -- not what 4737209962Smm * the user wanted. To prevent this, we make sure that the 4738209962Smm * parent/child relationship hasn't changed -- in this example, 4739209962Smm * that C's parent is still the replacing vdev R. 4740209962Smm */ 4741209962Smm if (pvd->vdev_guid != pguid && pguid != 0) 4742209962Smm return (spa_vdev_exit(spa, NULL, txg, EBUSY)); 4743209962Smm 4744209962Smm /* 4745219089Spjd * Only 'replacing' or 'spare' vdevs can be replaced. 4746168404Spjd */ 4747219089Spjd if (replace_done && pvd->vdev_ops != &vdev_replacing_ops && 4748219089Spjd pvd->vdev_ops != &vdev_spare_ops) 4749219089Spjd return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 4750168404Spjd 4751168404Spjd ASSERT(pvd->vdev_ops != &vdev_spare_ops || 4752185029Spjd spa_version(spa) >= SPA_VERSION_SPARES); 4753168404Spjd 4754168404Spjd /* 4755168404Spjd * Only mirror, replacing, and spare vdevs support detach. 4756168404Spjd */ 4757168404Spjd if (pvd->vdev_ops != &vdev_replacing_ops && 4758168404Spjd pvd->vdev_ops != &vdev_mirror_ops && 4759168404Spjd pvd->vdev_ops != &vdev_spare_ops) 4760168404Spjd return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); 4761168404Spjd 4762168404Spjd /* 4763209962Smm * If this device has the only valid copy of some data, 4764209962Smm * we cannot safely detach it. 4765168404Spjd */ 4766209962Smm if (vdev_dtl_required(vd)) 4767168404Spjd return (spa_vdev_exit(spa, NULL, txg, EBUSY)); 4768168404Spjd 4769209962Smm ASSERT(pvd->vdev_children >= 2); 4770168404Spjd 4771168404Spjd /* 4772185029Spjd * If we are detaching the second disk from a replacing vdev, then 4773185029Spjd * check to see if we changed the original vdev's path to have "/old" 4774185029Spjd * at the end in spa_vdev_attach(). If so, undo that change now. 4775168404Spjd */ 4776219089Spjd if (pvd->vdev_ops == &vdev_replacing_ops && vd->vdev_id > 0 && 4777219089Spjd vd->vdev_path != NULL) { 4778219089Spjd size_t len = strlen(vd->vdev_path); 4779219089Spjd 4780219089Spjd for (int c = 0; c < pvd->vdev_children; c++) { 4781219089Spjd cvd = pvd->vdev_child[c]; 4782219089Spjd 4783219089Spjd if (cvd == vd || cvd->vdev_path == NULL) 4784219089Spjd continue; 4785219089Spjd 4786219089Spjd if (strncmp(cvd->vdev_path, vd->vdev_path, len) == 0 && 4787219089Spjd strcmp(cvd->vdev_path + len, "/old") == 0) { 4788219089Spjd spa_strfree(cvd->vdev_path); 4789219089Spjd cvd->vdev_path = spa_strdup(vd->vdev_path); 4790219089Spjd break; 4791219089Spjd } 4792185029Spjd } 4793185029Spjd } 4794168404Spjd 4795168404Spjd /* 4796168404Spjd * If we are detaching the original disk from a spare, then it implies 4797168404Spjd * that the spare should become a real disk, and be removed from the 4798168404Spjd * active spare list for the pool. 4799168404Spjd */ 4800168404Spjd if (pvd->vdev_ops == &vdev_spare_ops && 4801219089Spjd vd->vdev_id == 0 && 4802219089Spjd pvd->vdev_child[pvd->vdev_children - 1]->vdev_isspare) 4803168404Spjd unspare = B_TRUE; 4804168404Spjd 4805168404Spjd /* 4806168404Spjd * Erase the disk labels so the disk can be used for other things. 4807168404Spjd * This must be done after all other error cases are handled, 4808168404Spjd * but before we disembowel vd (so we can still do I/O to it). 4809168404Spjd * But if we can't do it, don't treat the error as fatal -- 4810168404Spjd * it may be that the unwritability of the disk is the reason 4811168404Spjd * it's being detached! 4812168404Spjd */ 4813168404Spjd error = vdev_label_init(vd, 0, VDEV_LABEL_REMOVE); 4814168404Spjd 4815168404Spjd /* 4816168404Spjd * Remove vd from its parent and compact the parent's children. 4817168404Spjd */ 4818168404Spjd vdev_remove_child(pvd, vd); 4819168404Spjd vdev_compact_children(pvd); 4820168404Spjd 4821168404Spjd /* 4822168404Spjd * Remember one of the remaining children so we can get tvd below. 4823168404Spjd */ 4824219089Spjd cvd = pvd->vdev_child[pvd->vdev_children - 1]; 4825168404Spjd 4826168404Spjd /* 4827168404Spjd * If we need to remove the remaining child from the list of hot spares, 4828209962Smm * do it now, marking the vdev as no longer a spare in the process. 4829209962Smm * We must do this before vdev_remove_parent(), because that can 4830209962Smm * change the GUID if it creates a new toplevel GUID. For a similar 4831209962Smm * reason, we must remove the spare now, in the same txg as the detach; 4832209962Smm * otherwise someone could attach a new sibling, change the GUID, and 4833209962Smm * the subsequent attempt to spa_vdev_remove(unspare_guid) would fail. 4834168404Spjd */ 4835168404Spjd if (unspare) { 4836168404Spjd ASSERT(cvd->vdev_isspare); 4837168404Spjd spa_spare_remove(cvd); 4838168404Spjd unspare_guid = cvd->vdev_guid; 4839209962Smm (void) spa_vdev_remove(spa, unspare_guid, B_TRUE); 4840219089Spjd cvd->vdev_unspare = B_TRUE; 4841168404Spjd } 4842168404Spjd 4843168404Spjd /* 4844168404Spjd * If the parent mirror/replacing vdev only has one child, 4845168404Spjd * the parent is no longer needed. Remove it from the tree. 4846168404Spjd */ 4847219089Spjd if (pvd->vdev_children == 1) { 4848219089Spjd if (pvd->vdev_ops == &vdev_spare_ops) 4849219089Spjd cvd->vdev_unspare = B_FALSE; 4850168404Spjd vdev_remove_parent(cvd); 4851219089Spjd cvd->vdev_resilvering = B_FALSE; 4852219089Spjd } 4853168404Spjd 4854219089Spjd 4855168404Spjd /* 4856168404Spjd * We don't set tvd until now because the parent we just removed 4857168404Spjd * may have been the previous top-level vdev. 4858168404Spjd */ 4859168404Spjd tvd = cvd->vdev_top; 4860168404Spjd ASSERT(tvd->vdev_parent == rvd); 4861168404Spjd 4862168404Spjd /* 4863168404Spjd * Reevaluate the parent vdev state. 4864168404Spjd */ 4865185029Spjd vdev_propagate_state(cvd); 4866168404Spjd 4867168404Spjd /* 4868219089Spjd * If the 'autoexpand' property is set on the pool then automatically 4869219089Spjd * try to expand the size of the pool. For example if the device we 4870219089Spjd * just detached was smaller than the others, it may be possible to 4871219089Spjd * add metaslabs (i.e. grow the pool). We need to reopen the vdev 4872219089Spjd * first so that we can obtain the updated sizes of the leaf vdevs. 4873168404Spjd */ 4874219089Spjd if (spa->spa_autoexpand) { 4875219089Spjd vdev_reopen(tvd); 4876219089Spjd vdev_expand(tvd, txg); 4877219089Spjd } 4878168404Spjd 4879168404Spjd vdev_config_dirty(tvd); 4880168404Spjd 4881168404Spjd /* 4882168404Spjd * Mark vd's DTL as dirty in this txg. vdev_dtl_sync() will see that 4883168404Spjd * vd->vdev_detached is set and free vd's DTL object in syncing context. 4884168404Spjd * But first make sure we're not on any *other* txg's DTL list, to 4885168404Spjd * prevent vd from being accessed after it's freed. 4886168404Spjd */ 4887219089Spjd vdpath = spa_strdup(vd->vdev_path); 4888209962Smm for (int t = 0; t < TXG_SIZE; t++) 4889168404Spjd (void) txg_list_remove_this(&tvd->vdev_dtl_list, vd, t); 4890168404Spjd vd->vdev_detached = B_TRUE; 4891168404Spjd vdev_dirty(tvd, VDD_DTL, vd, txg); 4892168404Spjd 4893185029Spjd spa_event_notify(spa, vd, ESC_ZFS_VDEV_REMOVE); 4894185029Spjd 4895219089Spjd /* hang on to the spa before we release the lock */ 4896219089Spjd spa_open_ref(spa, FTAG); 4897219089Spjd 4898168404Spjd error = spa_vdev_exit(spa, vd, txg, 0); 4899168404Spjd 4900219089Spjd spa_history_log_internal(LOG_POOL_VDEV_DETACH, spa, NULL, 4901219089Spjd "vdev=%s", vdpath); 4902219089Spjd spa_strfree(vdpath); 4903219089Spjd 4904168404Spjd /* 4905168404Spjd * If this was the removal of the original device in a hot spare vdev, 4906168404Spjd * then we want to go through and remove the device from the hot spare 4907168404Spjd * list of every other pool. 4908168404Spjd */ 4909168404Spjd if (unspare) { 4910219089Spjd spa_t *altspa = NULL; 4911219089Spjd 4912168404Spjd mutex_enter(&spa_namespace_lock); 4913219089Spjd while ((altspa = spa_next(altspa)) != NULL) { 4914219089Spjd if (altspa->spa_state != POOL_STATE_ACTIVE || 4915219089Spjd altspa == spa) 4916168404Spjd continue; 4917219089Spjd 4918219089Spjd spa_open_ref(altspa, FTAG); 4919185029Spjd mutex_exit(&spa_namespace_lock); 4920219089Spjd (void) spa_vdev_remove(altspa, unspare_guid, B_TRUE); 4921185029Spjd mutex_enter(&spa_namespace_lock); 4922219089Spjd spa_close(altspa, FTAG); 4923168404Spjd } 4924168404Spjd mutex_exit(&spa_namespace_lock); 4925219089Spjd 4926219089Spjd /* search the rest of the vdevs for spares to remove */ 4927219089Spjd spa_vdev_resilver_done(spa); 4928168404Spjd } 4929168404Spjd 4930219089Spjd /* all done with the spa; OK to release */ 4931219089Spjd mutex_enter(&spa_namespace_lock); 4932219089Spjd spa_close(spa, FTAG); 4933219089Spjd mutex_exit(&spa_namespace_lock); 4934219089Spjd 4935168404Spjd return (error); 4936168404Spjd} 4937168404Spjd 4938219089Spjd/* 4939219089Spjd * Split a set of devices from their mirrors, and create a new pool from them. 4940219089Spjd */ 4941219089Spjdint 4942219089Spjdspa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config, 4943219089Spjd nvlist_t *props, boolean_t exp) 4944219089Spjd{ 4945219089Spjd int error = 0; 4946219089Spjd uint64_t txg, *glist; 4947219089Spjd spa_t *newspa; 4948219089Spjd uint_t c, children, lastlog; 4949219089Spjd nvlist_t **child, *nvl, *tmp; 4950219089Spjd dmu_tx_t *tx; 4951219089Spjd char *altroot = NULL; 4952219089Spjd vdev_t *rvd, **vml = NULL; /* vdev modify list */ 4953219089Spjd boolean_t activate_slog; 4954219089Spjd 4955219089Spjd ASSERT(spa_writeable(spa)); 4956219089Spjd 4957219089Spjd txg = spa_vdev_enter(spa); 4958219089Spjd 4959219089Spjd /* clear the log and flush everything up to now */ 4960219089Spjd activate_slog = spa_passivate_log(spa); 4961219089Spjd (void) spa_vdev_config_exit(spa, NULL, txg, 0, FTAG); 4962219089Spjd error = spa_offline_log(spa); 4963219089Spjd txg = spa_vdev_config_enter(spa); 4964219089Spjd 4965219089Spjd if (activate_slog) 4966219089Spjd spa_activate_log(spa); 4967219089Spjd 4968219089Spjd if (error != 0) 4969219089Spjd return (spa_vdev_exit(spa, NULL, txg, error)); 4970219089Spjd 4971219089Spjd /* check new spa name before going any further */ 4972219089Spjd if (spa_lookup(newname) != NULL) 4973219089Spjd return (spa_vdev_exit(spa, NULL, txg, EEXIST)); 4974219089Spjd 4975219089Spjd /* 4976219089Spjd * scan through all the children to ensure they're all mirrors 4977219089Spjd */ 4978219089Spjd if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvl) != 0 || 4979219089Spjd nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN, &child, 4980219089Spjd &children) != 0) 4981219089Spjd return (spa_vdev_exit(spa, NULL, txg, EINVAL)); 4982219089Spjd 4983219089Spjd /* first, check to ensure we've got the right child count */ 4984219089Spjd rvd = spa->spa_root_vdev; 4985219089Spjd lastlog = 0; 4986219089Spjd for (c = 0; c < rvd->vdev_children; c++) { 4987219089Spjd vdev_t *vd = rvd->vdev_child[c]; 4988219089Spjd 4989219089Spjd /* don't count the holes & logs as children */ 4990219089Spjd if (vd->vdev_islog || vd->vdev_ishole) { 4991219089Spjd if (lastlog == 0) 4992219089Spjd lastlog = c; 4993219089Spjd continue; 4994219089Spjd } 4995219089Spjd 4996219089Spjd lastlog = 0; 4997219089Spjd } 4998219089Spjd if (children != (lastlog != 0 ? lastlog : rvd->vdev_children)) 4999219089Spjd return (spa_vdev_exit(spa, NULL, txg, EINVAL)); 5000219089Spjd 5001219089Spjd /* next, ensure no spare or cache devices are part of the split */ 5002219089Spjd if (nvlist_lookup_nvlist(nvl, ZPOOL_CONFIG_SPARES, &tmp) == 0 || 5003219089Spjd nvlist_lookup_nvlist(nvl, ZPOOL_CONFIG_L2CACHE, &tmp) == 0) 5004219089Spjd return (spa_vdev_exit(spa, NULL, txg, EINVAL)); 5005219089Spjd 5006219089Spjd vml = kmem_zalloc(children * sizeof (vdev_t *), KM_SLEEP); 5007219089Spjd glist = kmem_zalloc(children * sizeof (uint64_t), KM_SLEEP); 5008219089Spjd 5009219089Spjd /* then, loop over each vdev and validate it */ 5010219089Spjd for (c = 0; c < children; c++) { 5011219089Spjd uint64_t is_hole = 0; 5012219089Spjd 5013219089Spjd (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE, 5014219089Spjd &is_hole); 5015219089Spjd 5016219089Spjd if (is_hole != 0) { 5017219089Spjd if (spa->spa_root_vdev->vdev_child[c]->vdev_ishole || 5018219089Spjd spa->spa_root_vdev->vdev_child[c]->vdev_islog) { 5019219089Spjd continue; 5020219089Spjd } else { 5021219089Spjd error = EINVAL; 5022219089Spjd break; 5023219089Spjd } 5024219089Spjd } 5025219089Spjd 5026219089Spjd /* which disk is going to be split? */ 5027219089Spjd if (nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_GUID, 5028219089Spjd &glist[c]) != 0) { 5029219089Spjd error = EINVAL; 5030219089Spjd break; 5031219089Spjd } 5032219089Spjd 5033219089Spjd /* look it up in the spa */ 5034219089Spjd vml[c] = spa_lookup_by_guid(spa, glist[c], B_FALSE); 5035219089Spjd if (vml[c] == NULL) { 5036219089Spjd error = ENODEV; 5037219089Spjd break; 5038219089Spjd } 5039219089Spjd 5040219089Spjd /* make sure there's nothing stopping the split */ 5041219089Spjd if (vml[c]->vdev_parent->vdev_ops != &vdev_mirror_ops || 5042219089Spjd vml[c]->vdev_islog || 5043219089Spjd vml[c]->vdev_ishole || 5044219089Spjd vml[c]->vdev_isspare || 5045219089Spjd vml[c]->vdev_isl2cache || 5046219089Spjd !vdev_writeable(vml[c]) || 5047219089Spjd vml[c]->vdev_children != 0 || 5048219089Spjd vml[c]->vdev_state != VDEV_STATE_HEALTHY || 5049219089Spjd c != spa->spa_root_vdev->vdev_child[c]->vdev_id) { 5050219089Spjd error = EINVAL; 5051219089Spjd break; 5052219089Spjd } 5053219089Spjd 5054219089Spjd if (vdev_dtl_required(vml[c])) { 5055219089Spjd error = EBUSY; 5056219089Spjd break; 5057219089Spjd } 5058219089Spjd 5059219089Spjd /* we need certain info from the top level */ 5060219089Spjd VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_METASLAB_ARRAY, 5061219089Spjd vml[c]->vdev_top->vdev_ms_array) == 0); 5062219089Spjd VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_METASLAB_SHIFT, 5063219089Spjd vml[c]->vdev_top->vdev_ms_shift) == 0); 5064219089Spjd VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_ASIZE, 5065219089Spjd vml[c]->vdev_top->vdev_asize) == 0); 5066219089Spjd VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_ASHIFT, 5067219089Spjd vml[c]->vdev_top->vdev_ashift) == 0); 5068219089Spjd } 5069219089Spjd 5070219089Spjd if (error != 0) { 5071219089Spjd kmem_free(vml, children * sizeof (vdev_t *)); 5072219089Spjd kmem_free(glist, children * sizeof (uint64_t)); 5073219089Spjd return (spa_vdev_exit(spa, NULL, txg, error)); 5074219089Spjd } 5075219089Spjd 5076219089Spjd /* stop writers from using the disks */ 5077219089Spjd for (c = 0; c < children; c++) { 5078219089Spjd if (vml[c] != NULL) 5079219089Spjd vml[c]->vdev_offline = B_TRUE; 5080219089Spjd } 5081219089Spjd vdev_reopen(spa->spa_root_vdev); 5082219089Spjd 5083219089Spjd /* 5084219089Spjd * Temporarily record the splitting vdevs in the spa config. This 5085219089Spjd * will disappear once the config is regenerated. 5086219089Spjd */ 5087219089Spjd VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); 5088219089Spjd VERIFY(nvlist_add_uint64_array(nvl, ZPOOL_CONFIG_SPLIT_LIST, 5089219089Spjd glist, children) == 0); 5090219089Spjd kmem_free(glist, children * sizeof (uint64_t)); 5091219089Spjd 5092219089Spjd mutex_enter(&spa->spa_props_lock); 5093219089Spjd VERIFY(nvlist_add_nvlist(spa->spa_config, ZPOOL_CONFIG_SPLIT, 5094219089Spjd nvl) == 0); 5095219089Spjd mutex_exit(&spa->spa_props_lock); 5096219089Spjd spa->spa_config_splitting = nvl; 5097219089Spjd vdev_config_dirty(spa->spa_root_vdev); 5098219089Spjd 5099219089Spjd /* configure and create the new pool */ 5100219089Spjd VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, newname) == 0); 5101219089Spjd VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, 5102219089Spjd exp ? POOL_STATE_EXPORTED : POOL_STATE_ACTIVE) == 0); 5103219089Spjd VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION, 5104219089Spjd spa_version(spa)) == 0); 5105219089Spjd VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG, 5106219089Spjd spa->spa_config_txg) == 0); 5107219089Spjd VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID, 5108219089Spjd spa_generate_guid(NULL)) == 0); 5109219089Spjd (void) nvlist_lookup_string(props, 5110219089Spjd zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); 5111219089Spjd 5112219089Spjd /* add the new pool to the namespace */ 5113219089Spjd newspa = spa_add(newname, config, altroot); 5114219089Spjd newspa->spa_config_txg = spa->spa_config_txg; 5115219089Spjd spa_set_log_state(newspa, SPA_LOG_CLEAR); 5116219089Spjd 5117219089Spjd /* release the spa config lock, retaining the namespace lock */ 5118219089Spjd spa_vdev_config_exit(spa, NULL, txg, 0, FTAG); 5119219089Spjd 5120219089Spjd if (zio_injection_enabled) 5121219089Spjd zio_handle_panic_injection(spa, FTAG, 1); 5122219089Spjd 5123219089Spjd spa_activate(newspa, spa_mode_global); 5124219089Spjd spa_async_suspend(newspa); 5125219089Spjd 5126219089Spjd#ifndef sun 5127219089Spjd /* mark that we are creating new spa by splitting */ 5128219089Spjd newspa->spa_splitting_newspa = B_TRUE; 5129219089Spjd#endif 5130219089Spjd /* create the new pool from the disks of the original pool */ 5131219089Spjd error = spa_load(newspa, SPA_LOAD_IMPORT, SPA_IMPORT_ASSEMBLE, B_TRUE); 5132219089Spjd#ifndef sun 5133219089Spjd newspa->spa_splitting_newspa = B_FALSE; 5134219089Spjd#endif 5135219089Spjd if (error) 5136219089Spjd goto out; 5137219089Spjd 5138219089Spjd /* if that worked, generate a real config for the new pool */ 5139219089Spjd if (newspa->spa_root_vdev != NULL) { 5140219089Spjd VERIFY(nvlist_alloc(&newspa->spa_config_splitting, 5141219089Spjd NV_UNIQUE_NAME, KM_SLEEP) == 0); 5142219089Spjd VERIFY(nvlist_add_uint64(newspa->spa_config_splitting, 5143219089Spjd ZPOOL_CONFIG_SPLIT_GUID, spa_guid(spa)) == 0); 5144219089Spjd spa_config_set(newspa, spa_config_generate(newspa, NULL, -1ULL, 5145219089Spjd B_TRUE)); 5146219089Spjd } 5147219089Spjd 5148219089Spjd /* set the props */ 5149219089Spjd if (props != NULL) { 5150219089Spjd spa_configfile_set(newspa, props, B_FALSE); 5151219089Spjd error = spa_prop_set(newspa, props); 5152219089Spjd if (error) 5153219089Spjd goto out; 5154219089Spjd } 5155219089Spjd 5156219089Spjd /* flush everything */ 5157219089Spjd txg = spa_vdev_config_enter(newspa); 5158219089Spjd vdev_config_dirty(newspa->spa_root_vdev); 5159219089Spjd (void) spa_vdev_config_exit(newspa, NULL, txg, 0, FTAG); 5160219089Spjd 5161219089Spjd if (zio_injection_enabled) 5162219089Spjd zio_handle_panic_injection(spa, FTAG, 2); 5163219089Spjd 5164219089Spjd spa_async_resume(newspa); 5165219089Spjd 5166219089Spjd /* finally, update the original pool's config */ 5167219089Spjd txg = spa_vdev_config_enter(spa); 5168219089Spjd tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); 5169219089Spjd error = dmu_tx_assign(tx, TXG_WAIT); 5170219089Spjd if (error != 0) 5171219089Spjd dmu_tx_abort(tx); 5172219089Spjd for (c = 0; c < children; c++) { 5173219089Spjd if (vml[c] != NULL) { 5174219089Spjd vdev_split(vml[c]); 5175219089Spjd if (error == 0) 5176219089Spjd spa_history_log_internal(LOG_POOL_VDEV_DETACH, 5177219089Spjd spa, tx, "vdev=%s", 5178219089Spjd vml[c]->vdev_path); 5179219089Spjd vdev_free(vml[c]); 5180219089Spjd } 5181219089Spjd } 5182219089Spjd vdev_config_dirty(spa->spa_root_vdev); 5183219089Spjd spa->spa_config_splitting = NULL; 5184219089Spjd nvlist_free(nvl); 5185219089Spjd if (error == 0) 5186219089Spjd dmu_tx_commit(tx); 5187219089Spjd (void) spa_vdev_exit(spa, NULL, txg, 0); 5188219089Spjd 5189219089Spjd if (zio_injection_enabled) 5190219089Spjd zio_handle_panic_injection(spa, FTAG, 3); 5191219089Spjd 5192219089Spjd /* split is complete; log a history record */ 5193219089Spjd spa_history_log_internal(LOG_POOL_SPLIT, newspa, NULL, 5194219089Spjd "split new pool %s from pool %s", newname, spa_name(spa)); 5195219089Spjd 5196219089Spjd kmem_free(vml, children * sizeof (vdev_t *)); 5197219089Spjd 5198219089Spjd /* if we're not going to mount the filesystems in userland, export */ 5199219089Spjd if (exp) 5200219089Spjd error = spa_export_common(newname, POOL_STATE_EXPORTED, NULL, 5201219089Spjd B_FALSE, B_FALSE); 5202219089Spjd 5203219089Spjd return (error); 5204219089Spjd 5205219089Spjdout: 5206219089Spjd spa_unload(newspa); 5207219089Spjd spa_deactivate(newspa); 5208219089Spjd spa_remove(newspa); 5209219089Spjd 5210219089Spjd txg = spa_vdev_config_enter(spa); 5211219089Spjd 5212219089Spjd /* re-online all offlined disks */ 5213219089Spjd for (c = 0; c < children; c++) { 5214219089Spjd if (vml[c] != NULL) 5215219089Spjd vml[c]->vdev_offline = B_FALSE; 5216219089Spjd } 5217219089Spjd vdev_reopen(spa->spa_root_vdev); 5218219089Spjd 5219219089Spjd nvlist_free(spa->spa_config_splitting); 5220219089Spjd spa->spa_config_splitting = NULL; 5221219089Spjd (void) spa_vdev_exit(spa, NULL, txg, error); 5222219089Spjd 5223219089Spjd kmem_free(vml, children * sizeof (vdev_t *)); 5224219089Spjd return (error); 5225219089Spjd} 5226219089Spjd 5227185029Spjdstatic nvlist_t * 5228185029Spjdspa_nvlist_lookup_by_guid(nvlist_t **nvpp, int count, uint64_t target_guid) 5229185029Spjd{ 5230185029Spjd for (int i = 0; i < count; i++) { 5231185029Spjd uint64_t guid; 5232185029Spjd 5233185029Spjd VERIFY(nvlist_lookup_uint64(nvpp[i], ZPOOL_CONFIG_GUID, 5234185029Spjd &guid) == 0); 5235185029Spjd 5236185029Spjd if (guid == target_guid) 5237185029Spjd return (nvpp[i]); 5238185029Spjd } 5239185029Spjd 5240185029Spjd return (NULL); 5241185029Spjd} 5242185029Spjd 5243185029Spjdstatic void 5244185029Spjdspa_vdev_remove_aux(nvlist_t *config, char *name, nvlist_t **dev, int count, 5245185029Spjd nvlist_t *dev_to_remove) 5246185029Spjd{ 5247185029Spjd nvlist_t **newdev = NULL; 5248185029Spjd 5249185029Spjd if (count > 1) 5250185029Spjd newdev = kmem_alloc((count - 1) * sizeof (void *), KM_SLEEP); 5251185029Spjd 5252185029Spjd for (int i = 0, j = 0; i < count; i++) { 5253185029Spjd if (dev[i] == dev_to_remove) 5254185029Spjd continue; 5255185029Spjd VERIFY(nvlist_dup(dev[i], &newdev[j++], KM_SLEEP) == 0); 5256185029Spjd } 5257185029Spjd 5258185029Spjd VERIFY(nvlist_remove(config, name, DATA_TYPE_NVLIST_ARRAY) == 0); 5259185029Spjd VERIFY(nvlist_add_nvlist_array(config, name, newdev, count - 1) == 0); 5260185029Spjd 5261185029Spjd for (int i = 0; i < count - 1; i++) 5262185029Spjd nvlist_free(newdev[i]); 5263185029Spjd 5264185029Spjd if (count > 1) 5265185029Spjd kmem_free(newdev, (count - 1) * sizeof (void *)); 5266185029Spjd} 5267185029Spjd 5268168404Spjd/* 5269219089Spjd * Evacuate the device. 5270219089Spjd */ 5271219089Spjdstatic int 5272219089Spjdspa_vdev_remove_evacuate(spa_t *spa, vdev_t *vd) 5273219089Spjd{ 5274219089Spjd uint64_t txg; 5275219089Spjd int error = 0; 5276219089Spjd 5277219089Spjd ASSERT(MUTEX_HELD(&spa_namespace_lock)); 5278219089Spjd ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); 5279219089Spjd ASSERT(vd == vd->vdev_top); 5280219089Spjd 5281219089Spjd /* 5282219089Spjd * Evacuate the device. We don't hold the config lock as writer 5283219089Spjd * since we need to do I/O but we do keep the 5284219089Spjd * spa_namespace_lock held. Once this completes the device 5285219089Spjd * should no longer have any blocks allocated on it. 5286219089Spjd */ 5287219089Spjd if (vd->vdev_islog) { 5288219089Spjd if (vd->vdev_stat.vs_alloc != 0) 5289219089Spjd error = spa_offline_log(spa); 5290219089Spjd } else { 5291219089Spjd error = ENOTSUP; 5292219089Spjd } 5293219089Spjd 5294219089Spjd if (error) 5295219089Spjd return (error); 5296219089Spjd 5297219089Spjd /* 5298219089Spjd * The evacuation succeeded. Remove any remaining MOS metadata 5299219089Spjd * associated with this vdev, and wait for these changes to sync. 5300219089Spjd */ 5301240415Smm ASSERT0(vd->vdev_stat.vs_alloc); 5302219089Spjd txg = spa_vdev_config_enter(spa); 5303219089Spjd vd->vdev_removing = B_TRUE; 5304219089Spjd vdev_dirty(vd, 0, NULL, txg); 5305219089Spjd vdev_config_dirty(vd); 5306219089Spjd spa_vdev_config_exit(spa, NULL, txg, 0, FTAG); 5307219089Spjd 5308219089Spjd return (0); 5309219089Spjd} 5310219089Spjd 5311219089Spjd/* 5312219089Spjd * Complete the removal by cleaning up the namespace. 5313219089Spjd */ 5314219089Spjdstatic void 5315219089Spjdspa_vdev_remove_from_namespace(spa_t *spa, vdev_t *vd) 5316219089Spjd{ 5317219089Spjd vdev_t *rvd = spa->spa_root_vdev; 5318219089Spjd uint64_t id = vd->vdev_id; 5319219089Spjd boolean_t last_vdev = (id == (rvd->vdev_children - 1)); 5320219089Spjd 5321219089Spjd ASSERT(MUTEX_HELD(&spa_namespace_lock)); 5322219089Spjd ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 5323219089Spjd ASSERT(vd == vd->vdev_top); 5324219089Spjd 5325219089Spjd /* 5326219089Spjd * Only remove any devices which are empty. 5327219089Spjd */ 5328219089Spjd if (vd->vdev_stat.vs_alloc != 0) 5329219089Spjd return; 5330219089Spjd 5331219089Spjd (void) vdev_label_init(vd, 0, VDEV_LABEL_REMOVE); 5332219089Spjd 5333219089Spjd if (list_link_active(&vd->vdev_state_dirty_node)) 5334219089Spjd vdev_state_clean(vd); 5335219089Spjd if (list_link_active(&vd->vdev_config_dirty_node)) 5336219089Spjd vdev_config_clean(vd); 5337219089Spjd 5338219089Spjd vdev_free(vd); 5339219089Spjd 5340219089Spjd if (last_vdev) { 5341219089Spjd vdev_compact_children(rvd); 5342219089Spjd } else { 5343219089Spjd vd = vdev_alloc_common(spa, id, 0, &vdev_hole_ops); 5344219089Spjd vdev_add_child(rvd, vd); 5345219089Spjd } 5346219089Spjd vdev_config_dirty(rvd); 5347219089Spjd 5348219089Spjd /* 5349219089Spjd * Reassess the health of our root vdev. 5350219089Spjd */ 5351219089Spjd vdev_reopen(rvd); 5352219089Spjd} 5353219089Spjd 5354219089Spjd/* 5355219089Spjd * Remove a device from the pool - 5356219089Spjd * 5357219089Spjd * Removing a device from the vdev namespace requires several steps 5358219089Spjd * and can take a significant amount of time. As a result we use 5359219089Spjd * the spa_vdev_config_[enter/exit] functions which allow us to 5360219089Spjd * grab and release the spa_config_lock while still holding the namespace 5361219089Spjd * lock. During each step the configuration is synced out. 5362219089Spjd */ 5363219089Spjd 5364219089Spjd/* 5365168404Spjd * Remove a device from the pool. Currently, this supports removing only hot 5366219089Spjd * spares, slogs, and level 2 ARC devices. 5367168404Spjd */ 5368168404Spjdint 5369168404Spjdspa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) 5370168404Spjd{ 5371168404Spjd vdev_t *vd; 5372219089Spjd metaslab_group_t *mg; 5373185029Spjd nvlist_t **spares, **l2cache, *nv; 5374219089Spjd uint64_t txg = 0; 5375185029Spjd uint_t nspares, nl2cache; 5376185029Spjd int error = 0; 5377209962Smm boolean_t locked = MUTEX_HELD(&spa_namespace_lock); 5378168404Spjd 5379219089Spjd ASSERT(spa_writeable(spa)); 5380219089Spjd 5381209962Smm if (!locked) 5382209962Smm txg = spa_vdev_enter(spa); 5383168404Spjd 5384185029Spjd vd = spa_lookup_by_guid(spa, guid, B_FALSE); 5385168404Spjd 5386185029Spjd if (spa->spa_spares.sav_vdevs != NULL && 5387185029Spjd nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, 5388185029Spjd ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0 && 5389185029Spjd (nv = spa_nvlist_lookup_by_guid(spares, nspares, guid)) != NULL) { 5390185029Spjd /* 5391185029Spjd * Only remove the hot spare if it's not currently in use 5392185029Spjd * in this pool. 5393185029Spjd */ 5394185029Spjd if (vd == NULL || unspare) { 5395185029Spjd spa_vdev_remove_aux(spa->spa_spares.sav_config, 5396185029Spjd ZPOOL_CONFIG_SPARES, spares, nspares, nv); 5397185029Spjd spa_load_spares(spa); 5398185029Spjd spa->spa_spares.sav_sync = B_TRUE; 5399185029Spjd } else { 5400185029Spjd error = EBUSY; 5401168404Spjd } 5402185029Spjd } else if (spa->spa_l2cache.sav_vdevs != NULL && 5403185029Spjd nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, 5404185029Spjd ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0 && 5405185029Spjd (nv = spa_nvlist_lookup_by_guid(l2cache, nl2cache, guid)) != NULL) { 5406185029Spjd /* 5407185029Spjd * Cache devices can always be removed. 5408185029Spjd */ 5409185029Spjd spa_vdev_remove_aux(spa->spa_l2cache.sav_config, 5410185029Spjd ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache, nv); 5411185029Spjd spa_load_l2cache(spa); 5412185029Spjd spa->spa_l2cache.sav_sync = B_TRUE; 5413219089Spjd } else if (vd != NULL && vd->vdev_islog) { 5414219089Spjd ASSERT(!locked); 5415219089Spjd ASSERT(vd == vd->vdev_top); 5416219089Spjd 5417219089Spjd /* 5418219089Spjd * XXX - Once we have bp-rewrite this should 5419219089Spjd * become the common case. 5420219089Spjd */ 5421219089Spjd 5422219089Spjd mg = vd->vdev_mg; 5423219089Spjd 5424219089Spjd /* 5425219089Spjd * Stop allocating from this vdev. 5426219089Spjd */ 5427219089Spjd metaslab_group_passivate(mg); 5428219089Spjd 5429219089Spjd /* 5430219089Spjd * Wait for the youngest allocations and frees to sync, 5431219089Spjd * and then wait for the deferral of those frees to finish. 5432219089Spjd */ 5433219089Spjd spa_vdev_config_exit(spa, NULL, 5434219089Spjd txg + TXG_CONCURRENT_STATES + TXG_DEFER_SIZE, 0, FTAG); 5435219089Spjd 5436219089Spjd /* 5437219089Spjd * Attempt to evacuate the vdev. 5438219089Spjd */ 5439219089Spjd error = spa_vdev_remove_evacuate(spa, vd); 5440219089Spjd 5441219089Spjd txg = spa_vdev_config_enter(spa); 5442219089Spjd 5443219089Spjd /* 5444219089Spjd * If we couldn't evacuate the vdev, unwind. 5445219089Spjd */ 5446219089Spjd if (error) { 5447219089Spjd metaslab_group_activate(mg); 5448219089Spjd return (spa_vdev_exit(spa, NULL, txg, error)); 5449219089Spjd } 5450219089Spjd 5451219089Spjd /* 5452219089Spjd * Clean up the vdev namespace. 5453219089Spjd */ 5454219089Spjd spa_vdev_remove_from_namespace(spa, vd); 5455219089Spjd 5456185029Spjd } else if (vd != NULL) { 5457185029Spjd /* 5458185029Spjd * Normal vdevs cannot be removed (yet). 5459185029Spjd */ 5460185029Spjd error = ENOTSUP; 5461168404Spjd } else { 5462185029Spjd /* 5463185029Spjd * There is no vdev of any kind with the specified guid. 5464185029Spjd */ 5465185029Spjd error = ENOENT; 5466168404Spjd } 5467168404Spjd 5468209962Smm if (!locked) 5469209962Smm return (spa_vdev_exit(spa, NULL, txg, error)); 5470209962Smm 5471209962Smm return (error); 5472168404Spjd} 5473168404Spjd 5474168404Spjd/* 5475185029Spjd * Find any device that's done replacing, or a vdev marked 'unspare' that's 5476185029Spjd * current spared, so we can detach it. 5477168404Spjd */ 5478168404Spjdstatic vdev_t * 5479185029Spjdspa_vdev_resilver_done_hunt(vdev_t *vd) 5480168404Spjd{ 5481168404Spjd vdev_t *newvd, *oldvd; 5482168404Spjd 5483219089Spjd for (int c = 0; c < vd->vdev_children; c++) { 5484185029Spjd oldvd = spa_vdev_resilver_done_hunt(vd->vdev_child[c]); 5485168404Spjd if (oldvd != NULL) 5486168404Spjd return (oldvd); 5487168404Spjd } 5488168404Spjd 5489185029Spjd /* 5490219089Spjd * Check for a completed replacement. We always consider the first 5491219089Spjd * vdev in the list to be the oldest vdev, and the last one to be 5492219089Spjd * the newest (see spa_vdev_attach() for how that works). In 5493219089Spjd * the case where the newest vdev is faulted, we will not automatically 5494219089Spjd * remove it after a resilver completes. This is OK as it will require 5495219089Spjd * user intervention to determine which disk the admin wishes to keep. 5496185029Spjd */ 5497219089Spjd if (vd->vdev_ops == &vdev_replacing_ops) { 5498219089Spjd ASSERT(vd->vdev_children > 1); 5499219089Spjd 5500219089Spjd newvd = vd->vdev_child[vd->vdev_children - 1]; 5501168404Spjd oldvd = vd->vdev_child[0]; 5502168404Spjd 5503209962Smm if (vdev_dtl_empty(newvd, DTL_MISSING) && 5504219089Spjd vdev_dtl_empty(newvd, DTL_OUTAGE) && 5505209962Smm !vdev_dtl_required(oldvd)) 5506168404Spjd return (oldvd); 5507168404Spjd } 5508168404Spjd 5509185029Spjd /* 5510185029Spjd * Check for a completed resilver with the 'unspare' flag set. 5511185029Spjd */ 5512219089Spjd if (vd->vdev_ops == &vdev_spare_ops) { 5513219089Spjd vdev_t *first = vd->vdev_child[0]; 5514219089Spjd vdev_t *last = vd->vdev_child[vd->vdev_children - 1]; 5515185029Spjd 5516219089Spjd if (last->vdev_unspare) { 5517219089Spjd oldvd = first; 5518219089Spjd newvd = last; 5519219089Spjd } else if (first->vdev_unspare) { 5520219089Spjd oldvd = last; 5521219089Spjd newvd = first; 5522219089Spjd } else { 5523219089Spjd oldvd = NULL; 5524219089Spjd } 5525219089Spjd 5526219089Spjd if (oldvd != NULL && 5527209962Smm vdev_dtl_empty(newvd, DTL_MISSING) && 5528219089Spjd vdev_dtl_empty(newvd, DTL_OUTAGE) && 5529219089Spjd !vdev_dtl_required(oldvd)) 5530185029Spjd return (oldvd); 5531219089Spjd 5532219089Spjd /* 5533219089Spjd * If there are more than two spares attached to a disk, 5534219089Spjd * and those spares are not required, then we want to 5535219089Spjd * attempt to free them up now so that they can be used 5536219089Spjd * by other pools. Once we're back down to a single 5537219089Spjd * disk+spare, we stop removing them. 5538219089Spjd */ 5539219089Spjd if (vd->vdev_children > 2) { 5540219089Spjd newvd = vd->vdev_child[1]; 5541219089Spjd 5542219089Spjd if (newvd->vdev_isspare && last->vdev_isspare && 5543219089Spjd vdev_dtl_empty(last, DTL_MISSING) && 5544219089Spjd vdev_dtl_empty(last, DTL_OUTAGE) && 5545219089Spjd !vdev_dtl_required(newvd)) 5546219089Spjd return (newvd); 5547185029Spjd } 5548185029Spjd } 5549185029Spjd 5550168404Spjd return (NULL); 5551168404Spjd} 5552168404Spjd 5553168404Spjdstatic void 5554185029Spjdspa_vdev_resilver_done(spa_t *spa) 5555168404Spjd{ 5556209962Smm vdev_t *vd, *pvd, *ppvd; 5557209962Smm uint64_t guid, sguid, pguid, ppguid; 5558168404Spjd 5559209962Smm spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 5560168404Spjd 5561185029Spjd while ((vd = spa_vdev_resilver_done_hunt(spa->spa_root_vdev)) != NULL) { 5562209962Smm pvd = vd->vdev_parent; 5563209962Smm ppvd = pvd->vdev_parent; 5564168404Spjd guid = vd->vdev_guid; 5565209962Smm pguid = pvd->vdev_guid; 5566209962Smm ppguid = ppvd->vdev_guid; 5567209962Smm sguid = 0; 5568168404Spjd /* 5569168404Spjd * If we have just finished replacing a hot spared device, then 5570168404Spjd * we need to detach the parent's first child (the original hot 5571168404Spjd * spare) as well. 5572168404Spjd */ 5573219089Spjd if (ppvd->vdev_ops == &vdev_spare_ops && pvd->vdev_id == 0 && 5574219089Spjd ppvd->vdev_children == 2) { 5575168404Spjd ASSERT(pvd->vdev_ops == &vdev_replacing_ops); 5576209962Smm sguid = ppvd->vdev_child[1]->vdev_guid; 5577168404Spjd } 5578209962Smm spa_config_exit(spa, SCL_ALL, FTAG); 5579209962Smm if (spa_vdev_detach(spa, guid, pguid, B_TRUE) != 0) 5580168404Spjd return; 5581209962Smm if (sguid && spa_vdev_detach(spa, sguid, ppguid, B_TRUE) != 0) 5582168404Spjd return; 5583209962Smm spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 5584168404Spjd } 5585168404Spjd 5586209962Smm spa_config_exit(spa, SCL_ALL, FTAG); 5587168404Spjd} 5588168404Spjd 5589168404Spjd/* 5590219089Spjd * Update the stored path or FRU for this vdev. 5591168404Spjd */ 5592168404Spjdint 5593209962Smmspa_vdev_set_common(spa_t *spa, uint64_t guid, const char *value, 5594209962Smm boolean_t ispath) 5595168404Spjd{ 5596185029Spjd vdev_t *vd; 5597219089Spjd boolean_t sync = B_FALSE; 5598168404Spjd 5599219089Spjd ASSERT(spa_writeable(spa)); 5600168404Spjd 5601219089Spjd spa_vdev_state_enter(spa, SCL_ALL); 5602219089Spjd 5603209962Smm if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL) 5604219089Spjd return (spa_vdev_state_exit(spa, NULL, ENOENT)); 5605168404Spjd 5606168404Spjd if (!vd->vdev_ops->vdev_op_leaf) 5607219089Spjd return (spa_vdev_state_exit(spa, NULL, ENOTSUP)); 5608168404Spjd 5609209962Smm if (ispath) { 5610219089Spjd if (strcmp(value, vd->vdev_path) != 0) { 5611219089Spjd spa_strfree(vd->vdev_path); 5612219089Spjd vd->vdev_path = spa_strdup(value); 5613219089Spjd sync = B_TRUE; 5614219089Spjd } 5615209962Smm } else { 5616219089Spjd if (vd->vdev_fru == NULL) { 5617219089Spjd vd->vdev_fru = spa_strdup(value); 5618219089Spjd sync = B_TRUE; 5619219089Spjd } else if (strcmp(value, vd->vdev_fru) != 0) { 5620209962Smm spa_strfree(vd->vdev_fru); 5621219089Spjd vd->vdev_fru = spa_strdup(value); 5622219089Spjd sync = B_TRUE; 5623219089Spjd } 5624209962Smm } 5625168404Spjd 5626219089Spjd return (spa_vdev_state_exit(spa, sync ? vd : NULL, 0)); 5627168404Spjd} 5628168404Spjd 5629209962Smmint 5630209962Smmspa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath) 5631209962Smm{ 5632209962Smm return (spa_vdev_set_common(spa, guid, newpath, B_TRUE)); 5633209962Smm} 5634209962Smm 5635209962Smmint 5636209962Smmspa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru) 5637209962Smm{ 5638209962Smm return (spa_vdev_set_common(spa, guid, newfru, B_FALSE)); 5639209962Smm} 5640209962Smm 5641168404Spjd/* 5642168404Spjd * ========================================================================== 5643219089Spjd * SPA Scanning 5644168404Spjd * ========================================================================== 5645168404Spjd */ 5646168404Spjd 5647168404Spjdint 5648219089Spjdspa_scan_stop(spa_t *spa) 5649168404Spjd{ 5650185029Spjd ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); 5651219089Spjd if (dsl_scan_resilvering(spa->spa_dsl_pool)) 5652219089Spjd return (EBUSY); 5653219089Spjd return (dsl_scan_cancel(spa->spa_dsl_pool)); 5654219089Spjd} 5655168404Spjd 5656219089Spjdint 5657219089Spjdspa_scan(spa_t *spa, pool_scan_func_t func) 5658219089Spjd{ 5659219089Spjd ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); 5660219089Spjd 5661219089Spjd if (func >= POOL_SCAN_FUNCS || func == POOL_SCAN_NONE) 5662168404Spjd return (ENOTSUP); 5663168404Spjd 5664168404Spjd /* 5665185029Spjd * If a resilver was requested, but there is no DTL on a 5666185029Spjd * writeable leaf device, we have nothing to do. 5667168404Spjd */ 5668219089Spjd if (func == POOL_SCAN_RESILVER && 5669185029Spjd !vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) { 5670185029Spjd spa_async_request(spa, SPA_ASYNC_RESILVER_DONE); 5671168404Spjd return (0); 5672168404Spjd } 5673168404Spjd 5674219089Spjd return (dsl_scan(spa->spa_dsl_pool, func)); 5675168404Spjd} 5676168404Spjd 5677168404Spjd/* 5678168404Spjd * ========================================================================== 5679168404Spjd * SPA async task processing 5680168404Spjd * ========================================================================== 5681168404Spjd */ 5682168404Spjd 5683168404Spjdstatic void 5684185029Spjdspa_async_remove(spa_t *spa, vdev_t *vd) 5685168404Spjd{ 5686185029Spjd if (vd->vdev_remove_wanted) { 5687219089Spjd vd->vdev_remove_wanted = B_FALSE; 5688219089Spjd vd->vdev_delayed_close = B_FALSE; 5689185029Spjd vdev_set_state(vd, B_FALSE, VDEV_STATE_REMOVED, VDEV_AUX_NONE); 5690209962Smm 5691209962Smm /* 5692209962Smm * We want to clear the stats, but we don't want to do a full 5693209962Smm * vdev_clear() as that will cause us to throw away 5694209962Smm * degraded/faulted state as well as attempt to reopen the 5695209962Smm * device, all of which is a waste. 5696209962Smm */ 5697209962Smm vd->vdev_stat.vs_read_errors = 0; 5698209962Smm vd->vdev_stat.vs_write_errors = 0; 5699209962Smm vd->vdev_stat.vs_checksum_errors = 0; 5700209962Smm 5701185029Spjd vdev_state_dirty(vd->vdev_top); 5702185029Spjd } 5703168404Spjd 5704185029Spjd for (int c = 0; c < vd->vdev_children; c++) 5705185029Spjd spa_async_remove(spa, vd->vdev_child[c]); 5706185029Spjd} 5707168404Spjd 5708185029Spjdstatic void 5709185029Spjdspa_async_probe(spa_t *spa, vdev_t *vd) 5710185029Spjd{ 5711185029Spjd if (vd->vdev_probe_wanted) { 5712219089Spjd vd->vdev_probe_wanted = B_FALSE; 5713185029Spjd vdev_reopen(vd); /* vdev_open() does the actual probe */ 5714168404Spjd } 5715168404Spjd 5716185029Spjd for (int c = 0; c < vd->vdev_children; c++) 5717185029Spjd spa_async_probe(spa, vd->vdev_child[c]); 5718168404Spjd} 5719168404Spjd 5720168404Spjdstatic void 5721219089Spjdspa_async_autoexpand(spa_t *spa, vdev_t *vd) 5722219089Spjd{ 5723219089Spjd sysevent_id_t eid; 5724219089Spjd nvlist_t *attr; 5725219089Spjd char *physpath; 5726219089Spjd 5727219089Spjd if (!spa->spa_autoexpand) 5728219089Spjd return; 5729219089Spjd 5730219089Spjd for (int c = 0; c < vd->vdev_children; c++) { 5731219089Spjd vdev_t *cvd = vd->vdev_child[c]; 5732219089Spjd spa_async_autoexpand(spa, cvd); 5733219089Spjd } 5734219089Spjd 5735219089Spjd if (!vd->vdev_ops->vdev_op_leaf || vd->vdev_physpath == NULL) 5736219089Spjd return; 5737219089Spjd 5738219089Spjd physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 5739219089Spjd (void) snprintf(physpath, MAXPATHLEN, "/devices%s", vd->vdev_physpath); 5740219089Spjd 5741219089Spjd VERIFY(nvlist_alloc(&attr, NV_UNIQUE_NAME, KM_SLEEP) == 0); 5742219089Spjd VERIFY(nvlist_add_string(attr, DEV_PHYS_PATH, physpath) == 0); 5743219089Spjd 5744219089Spjd (void) ddi_log_sysevent(zfs_dip, SUNW_VENDOR, EC_DEV_STATUS, 5745219089Spjd ESC_ZFS_VDEV_AUTOEXPAND, attr, &eid, DDI_SLEEP); 5746219089Spjd 5747219089Spjd nvlist_free(attr); 5748219089Spjd kmem_free(physpath, MAXPATHLEN); 5749219089Spjd} 5750219089Spjd 5751219089Spjdstatic void 5752168404Spjdspa_async_thread(void *arg) 5753168404Spjd{ 5754168404Spjd spa_t *spa = arg; 5755168404Spjd int tasks; 5756168404Spjd 5757168404Spjd ASSERT(spa->spa_sync_on); 5758168404Spjd 5759168404Spjd mutex_enter(&spa->spa_async_lock); 5760168404Spjd tasks = spa->spa_async_tasks; 5761168404Spjd spa->spa_async_tasks = 0; 5762168404Spjd mutex_exit(&spa->spa_async_lock); 5763168404Spjd 5764168404Spjd /* 5765168404Spjd * See if the config needs to be updated. 5766168404Spjd */ 5767168404Spjd if (tasks & SPA_ASYNC_CONFIG_UPDATE) { 5768219089Spjd uint64_t old_space, new_space; 5769219089Spjd 5770168404Spjd mutex_enter(&spa_namespace_lock); 5771219089Spjd old_space = metaslab_class_get_space(spa_normal_class(spa)); 5772168404Spjd spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); 5773219089Spjd new_space = metaslab_class_get_space(spa_normal_class(spa)); 5774168404Spjd mutex_exit(&spa_namespace_lock); 5775219089Spjd 5776219089Spjd /* 5777219089Spjd * If the pool grew as a result of the config update, 5778219089Spjd * then log an internal history event. 5779219089Spjd */ 5780219089Spjd if (new_space != old_space) { 5781219089Spjd spa_history_log_internal(LOG_POOL_VDEV_ONLINE, 5782219089Spjd spa, NULL, 5783219089Spjd "pool '%s' size: %llu(+%llu)", 5784219089Spjd spa_name(spa), new_space, new_space - old_space); 5785219089Spjd } 5786168404Spjd } 5787168404Spjd 5788168404Spjd /* 5789185029Spjd * See if any devices need to be marked REMOVED. 5790168404Spjd */ 5791185029Spjd if (tasks & SPA_ASYNC_REMOVE) { 5792219089Spjd spa_vdev_state_enter(spa, SCL_NONE); 5793185029Spjd spa_async_remove(spa, spa->spa_root_vdev); 5794185029Spjd for (int i = 0; i < spa->spa_l2cache.sav_count; i++) 5795185029Spjd spa_async_remove(spa, spa->spa_l2cache.sav_vdevs[i]); 5796185029Spjd for (int i = 0; i < spa->spa_spares.sav_count; i++) 5797185029Spjd spa_async_remove(spa, spa->spa_spares.sav_vdevs[i]); 5798185029Spjd (void) spa_vdev_state_exit(spa, NULL, 0); 5799185029Spjd } 5800168404Spjd 5801219089Spjd if ((tasks & SPA_ASYNC_AUTOEXPAND) && !spa_suspended(spa)) { 5802219089Spjd spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 5803219089Spjd spa_async_autoexpand(spa, spa->spa_root_vdev); 5804219089Spjd spa_config_exit(spa, SCL_CONFIG, FTAG); 5805219089Spjd } 5806219089Spjd 5807168404Spjd /* 5808185029Spjd * See if any devices need to be probed. 5809168404Spjd */ 5810185029Spjd if (tasks & SPA_ASYNC_PROBE) { 5811219089Spjd spa_vdev_state_enter(spa, SCL_NONE); 5812185029Spjd spa_async_probe(spa, spa->spa_root_vdev); 5813185029Spjd (void) spa_vdev_state_exit(spa, NULL, 0); 5814185029Spjd } 5815168404Spjd 5816168404Spjd /* 5817185029Spjd * If any devices are done replacing, detach them. 5818168404Spjd */ 5819185029Spjd if (tasks & SPA_ASYNC_RESILVER_DONE) 5820185029Spjd spa_vdev_resilver_done(spa); 5821168404Spjd 5822168404Spjd /* 5823168404Spjd * Kick off a resilver. 5824168404Spjd */ 5825168404Spjd if (tasks & SPA_ASYNC_RESILVER) 5826219089Spjd dsl_resilver_restart(spa->spa_dsl_pool, 0); 5827168404Spjd 5828168404Spjd /* 5829168404Spjd * Let the world know that we're done. 5830168404Spjd */ 5831168404Spjd mutex_enter(&spa->spa_async_lock); 5832168404Spjd spa->spa_async_thread = NULL; 5833168404Spjd cv_broadcast(&spa->spa_async_cv); 5834168404Spjd mutex_exit(&spa->spa_async_lock); 5835168404Spjd thread_exit(); 5836168404Spjd} 5837168404Spjd 5838168404Spjdvoid 5839168404Spjdspa_async_suspend(spa_t *spa) 5840168404Spjd{ 5841168404Spjd mutex_enter(&spa->spa_async_lock); 5842168404Spjd spa->spa_async_suspended++; 5843168404Spjd while (spa->spa_async_thread != NULL) 5844168404Spjd cv_wait(&spa->spa_async_cv, &spa->spa_async_lock); 5845168404Spjd mutex_exit(&spa->spa_async_lock); 5846168404Spjd} 5847168404Spjd 5848168404Spjdvoid 5849168404Spjdspa_async_resume(spa_t *spa) 5850168404Spjd{ 5851168404Spjd mutex_enter(&spa->spa_async_lock); 5852168404Spjd ASSERT(spa->spa_async_suspended != 0); 5853168404Spjd spa->spa_async_suspended--; 5854168404Spjd mutex_exit(&spa->spa_async_lock); 5855168404Spjd} 5856168404Spjd 5857168404Spjdstatic void 5858168404Spjdspa_async_dispatch(spa_t *spa) 5859168404Spjd{ 5860168404Spjd mutex_enter(&spa->spa_async_lock); 5861168404Spjd if (spa->spa_async_tasks && !spa->spa_async_suspended && 5862168404Spjd spa->spa_async_thread == NULL && 5863168404Spjd rootdir != NULL && !vn_is_readonly(rootdir)) 5864168404Spjd spa->spa_async_thread = thread_create(NULL, 0, 5865168404Spjd spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri); 5866168404Spjd mutex_exit(&spa->spa_async_lock); 5867168404Spjd} 5868168404Spjd 5869168404Spjdvoid 5870168404Spjdspa_async_request(spa_t *spa, int task) 5871168404Spjd{ 5872219089Spjd zfs_dbgmsg("spa=%s async request task=%u", spa->spa_name, task); 5873168404Spjd mutex_enter(&spa->spa_async_lock); 5874168404Spjd spa->spa_async_tasks |= task; 5875168404Spjd mutex_exit(&spa->spa_async_lock); 5876168404Spjd} 5877168404Spjd 5878168404Spjd/* 5879168404Spjd * ========================================================================== 5880168404Spjd * SPA syncing routines 5881168404Spjd * ========================================================================== 5882168404Spjd */ 5883168404Spjd 5884219089Spjdstatic int 5885219089Spjdbpobj_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) 5886168404Spjd{ 5887219089Spjd bpobj_t *bpo = arg; 5888219089Spjd bpobj_enqueue(bpo, bp, tx); 5889219089Spjd return (0); 5890219089Spjd} 5891168404Spjd 5892219089Spjdstatic int 5893219089Spjdspa_free_sync_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) 5894219089Spjd{ 5895219089Spjd zio_t *zio = arg; 5896168404Spjd 5897219089Spjd zio_nowait(zio_free_sync(zio, zio->io_spa, dmu_tx_get_txg(tx), bp, 5898240868Spjd BP_GET_PSIZE(bp), zio->io_flags)); 5899219089Spjd return (0); 5900168404Spjd} 5901168404Spjd 5902168404Spjdstatic void 5903168404Spjdspa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx) 5904168404Spjd{ 5905168404Spjd char *packed = NULL; 5906185029Spjd size_t bufsize; 5907168404Spjd size_t nvsize = 0; 5908168404Spjd dmu_buf_t *db; 5909168404Spjd 5910168404Spjd VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0); 5911168404Spjd 5912185029Spjd /* 5913185029Spjd * Write full (SPA_CONFIG_BLOCKSIZE) blocks of configuration 5914185029Spjd * information. This avoids the dbuf_will_dirty() path and 5915185029Spjd * saves us a pre-read to get data we don't actually care about. 5916185029Spjd */ 5917236884Smm bufsize = P2ROUNDUP((uint64_t)nvsize, SPA_CONFIG_BLOCKSIZE); 5918185029Spjd packed = kmem_alloc(bufsize, KM_SLEEP); 5919168404Spjd 5920168404Spjd VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR, 5921168404Spjd KM_SLEEP) == 0); 5922185029Spjd bzero(packed + nvsize, bufsize - nvsize); 5923168404Spjd 5924185029Spjd dmu_write(spa->spa_meta_objset, obj, 0, bufsize, packed, tx); 5925168404Spjd 5926185029Spjd kmem_free(packed, bufsize); 5927168404Spjd 5928168404Spjd VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); 5929168404Spjd dmu_buf_will_dirty(db, tx); 5930168404Spjd *(uint64_t *)db->db_data = nvsize; 5931168404Spjd dmu_buf_rele(db, FTAG); 5932168404Spjd} 5933168404Spjd 5934168404Spjdstatic void 5935185029Spjdspa_sync_aux_dev(spa_t *spa, spa_aux_vdev_t *sav, dmu_tx_t *tx, 5936185029Spjd const char *config, const char *entry) 5937168404Spjd{ 5938168404Spjd nvlist_t *nvroot; 5939185029Spjd nvlist_t **list; 5940168404Spjd int i; 5941168404Spjd 5942185029Spjd if (!sav->sav_sync) 5943168404Spjd return; 5944168404Spjd 5945168404Spjd /* 5946185029Spjd * Update the MOS nvlist describing the list of available devices. 5947185029Spjd * spa_validate_aux() will have already made sure this nvlist is 5948185029Spjd * valid and the vdevs are labeled appropriately. 5949168404Spjd */ 5950185029Spjd if (sav->sav_object == 0) { 5951185029Spjd sav->sav_object = dmu_object_alloc(spa->spa_meta_objset, 5952185029Spjd DMU_OT_PACKED_NVLIST, 1 << 14, DMU_OT_PACKED_NVLIST_SIZE, 5953185029Spjd sizeof (uint64_t), tx); 5954168404Spjd VERIFY(zap_update(spa->spa_meta_objset, 5955185029Spjd DMU_POOL_DIRECTORY_OBJECT, entry, sizeof (uint64_t), 1, 5956185029Spjd &sav->sav_object, tx) == 0); 5957168404Spjd } 5958168404Spjd 5959168404Spjd VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); 5960185029Spjd if (sav->sav_count == 0) { 5961185029Spjd VERIFY(nvlist_add_nvlist_array(nvroot, config, NULL, 0) == 0); 5962168404Spjd } else { 5963185029Spjd list = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); 5964185029Spjd for (i = 0; i < sav->sav_count; i++) 5965185029Spjd list[i] = vdev_config_generate(spa, sav->sav_vdevs[i], 5966219089Spjd B_FALSE, VDEV_CONFIG_L2CACHE); 5967185029Spjd VERIFY(nvlist_add_nvlist_array(nvroot, config, list, 5968185029Spjd sav->sav_count) == 0); 5969185029Spjd for (i = 0; i < sav->sav_count; i++) 5970185029Spjd nvlist_free(list[i]); 5971185029Spjd kmem_free(list, sav->sav_count * sizeof (void *)); 5972168404Spjd } 5973168404Spjd 5974185029Spjd spa_sync_nvlist(spa, sav->sav_object, nvroot, tx); 5975168404Spjd nvlist_free(nvroot); 5976168404Spjd 5977185029Spjd sav->sav_sync = B_FALSE; 5978168404Spjd} 5979168404Spjd 5980168404Spjdstatic void 5981168404Spjdspa_sync_config_object(spa_t *spa, dmu_tx_t *tx) 5982168404Spjd{ 5983168404Spjd nvlist_t *config; 5984168404Spjd 5985185029Spjd if (list_is_empty(&spa->spa_config_dirty_list)) 5986168404Spjd return; 5987168404Spjd 5988185029Spjd spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 5989168404Spjd 5990185029Spjd config = spa_config_generate(spa, spa->spa_root_vdev, 5991185029Spjd dmu_tx_get_txg(tx), B_FALSE); 5992185029Spjd 5993243505Smm /* 5994243505Smm * If we're upgrading the spa version then make sure that 5995243505Smm * the config object gets updated with the correct version. 5996243505Smm */ 5997243505Smm if (spa->spa_ubsync.ub_version < spa->spa_uberblock.ub_version) 5998243505Smm fnvlist_add_uint64(config, ZPOOL_CONFIG_VERSION, 5999243505Smm spa->spa_uberblock.ub_version); 6000243505Smm 6001185029Spjd spa_config_exit(spa, SCL_STATE, FTAG); 6002185029Spjd 6003168404Spjd if (spa->spa_config_syncing) 6004168404Spjd nvlist_free(spa->spa_config_syncing); 6005168404Spjd spa->spa_config_syncing = config; 6006168404Spjd 6007168404Spjd spa_sync_nvlist(spa, spa->spa_config_object, config, tx); 6008168404Spjd} 6009168404Spjd 6010236884Smmstatic void 6011236884Smmspa_sync_version(void *arg1, void *arg2, dmu_tx_t *tx) 6012236884Smm{ 6013236884Smm spa_t *spa = arg1; 6014236884Smm uint64_t version = *(uint64_t *)arg2; 6015236884Smm 6016236884Smm /* 6017236884Smm * Setting the version is special cased when first creating the pool. 6018236884Smm */ 6019236884Smm ASSERT(tx->tx_txg != TXG_INITIAL); 6020236884Smm 6021247592Sdelphij ASSERT(SPA_VERSION_IS_SUPPORTED(version)); 6022236884Smm ASSERT(version >= spa_version(spa)); 6023236884Smm 6024236884Smm spa->spa_uberblock.ub_version = version; 6025236884Smm vdev_config_dirty(spa->spa_root_vdev); 6026236884Smm} 6027236884Smm 6028185029Spjd/* 6029185029Spjd * Set zpool properties. 6030185029Spjd */ 6031168404Spjdstatic void 6032219089Spjdspa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx) 6033168404Spjd{ 6034168404Spjd spa_t *spa = arg1; 6035185029Spjd objset_t *mos = spa->spa_meta_objset; 6036168404Spjd nvlist_t *nvp = arg2; 6037236884Smm nvpair_t *elem = NULL; 6038168404Spjd 6039168404Spjd mutex_enter(&spa->spa_props_lock); 6040168404Spjd 6041185029Spjd while ((elem = nvlist_next_nvpair(nvp, elem))) { 6042236884Smm uint64_t intval; 6043236884Smm char *strval, *fname; 6044236884Smm zpool_prop_t prop; 6045236884Smm const char *propname; 6046236884Smm zprop_type_t proptype; 6047236884Smm zfeature_info_t *feature; 6048236884Smm 6049185029Spjd switch (prop = zpool_name_to_prop(nvpair_name(elem))) { 6050236884Smm case ZPROP_INVAL: 6051236884Smm /* 6052236884Smm * We checked this earlier in spa_prop_validate(). 6053236884Smm */ 6054236884Smm ASSERT(zpool_prop_feature(nvpair_name(elem))); 6055236884Smm 6056236884Smm fname = strchr(nvpair_name(elem), '@') + 1; 6057236884Smm VERIFY3U(0, ==, zfeature_lookup_name(fname, &feature)); 6058236884Smm 6059236884Smm spa_feature_enable(spa, feature, tx); 6060236884Smm break; 6061236884Smm 6062185029Spjd case ZPOOL_PROP_VERSION: 6063236884Smm VERIFY(nvpair_value_uint64(elem, &intval) == 0); 6064185029Spjd /* 6065236884Smm * The version is synced seperatly before other 6066236884Smm * properties and should be correct by now. 6067185029Spjd */ 6068236884Smm ASSERT3U(spa_version(spa), >=, intval); 6069185029Spjd break; 6070168404Spjd 6071185029Spjd case ZPOOL_PROP_ALTROOT: 6072185029Spjd /* 6073185029Spjd * 'altroot' is a non-persistent property. It should 6074185029Spjd * have been set temporarily at creation or import time. 6075185029Spjd */ 6076185029Spjd ASSERT(spa->spa_root != NULL); 6077185029Spjd break; 6078168404Spjd 6079219089Spjd case ZPOOL_PROP_READONLY: 6080185029Spjd case ZPOOL_PROP_CACHEFILE: 6081185029Spjd /* 6082219089Spjd * 'readonly' and 'cachefile' are also non-persisitent 6083219089Spjd * properties. 6084185029Spjd */ 6085168404Spjd break; 6086228103Smm case ZPOOL_PROP_COMMENT: 6087228103Smm VERIFY(nvpair_value_string(elem, &strval) == 0); 6088228103Smm if (spa->spa_comment != NULL) 6089228103Smm spa_strfree(spa->spa_comment); 6090228103Smm spa->spa_comment = spa_strdup(strval); 6091228103Smm /* 6092228103Smm * We need to dirty the configuration on all the vdevs 6093228103Smm * so that their labels get updated. It's unnecessary 6094228103Smm * to do this for pool creation since the vdev's 6095228103Smm * configuratoin has already been dirtied. 6096228103Smm */ 6097228103Smm if (tx->tx_txg != TXG_INITIAL) 6098228103Smm vdev_config_dirty(spa->spa_root_vdev); 6099228103Smm break; 6100185029Spjd default: 6101185029Spjd /* 6102185029Spjd * Set pool property values in the poolprops mos object. 6103185029Spjd */ 6104185029Spjd if (spa->spa_pool_props_object == 0) { 6105236884Smm spa->spa_pool_props_object = 6106236884Smm zap_create_link(mos, DMU_OT_POOL_PROPS, 6107185029Spjd DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_PROPS, 6108236884Smm tx); 6109185029Spjd } 6110185029Spjd 6111185029Spjd /* normalize the property name */ 6112185029Spjd propname = zpool_prop_to_name(prop); 6113185029Spjd proptype = zpool_prop_get_type(prop); 6114185029Spjd 6115185029Spjd if (nvpair_type(elem) == DATA_TYPE_STRING) { 6116185029Spjd ASSERT(proptype == PROP_TYPE_STRING); 6117185029Spjd VERIFY(nvpair_value_string(elem, &strval) == 0); 6118185029Spjd VERIFY(zap_update(mos, 6119185029Spjd spa->spa_pool_props_object, propname, 6120185029Spjd 1, strlen(strval) + 1, strval, tx) == 0); 6121185029Spjd 6122185029Spjd } else if (nvpair_type(elem) == DATA_TYPE_UINT64) { 6123185029Spjd VERIFY(nvpair_value_uint64(elem, &intval) == 0); 6124185029Spjd 6125185029Spjd if (proptype == PROP_TYPE_INDEX) { 6126185029Spjd const char *unused; 6127185029Spjd VERIFY(zpool_prop_index_to_string( 6128185029Spjd prop, intval, &unused) == 0); 6129185029Spjd } 6130185029Spjd VERIFY(zap_update(mos, 6131185029Spjd spa->spa_pool_props_object, propname, 6132185029Spjd 8, 1, &intval, tx) == 0); 6133185029Spjd } else { 6134185029Spjd ASSERT(0); /* not allowed */ 6135185029Spjd } 6136185029Spjd 6137185029Spjd switch (prop) { 6138185029Spjd case ZPOOL_PROP_DELEGATION: 6139185029Spjd spa->spa_delegation = intval; 6140185029Spjd break; 6141185029Spjd case ZPOOL_PROP_BOOTFS: 6142185029Spjd spa->spa_bootfs = intval; 6143185029Spjd break; 6144185029Spjd case ZPOOL_PROP_FAILUREMODE: 6145185029Spjd spa->spa_failmode = intval; 6146185029Spjd break; 6147219089Spjd case ZPOOL_PROP_AUTOEXPAND: 6148219089Spjd spa->spa_autoexpand = intval; 6149219089Spjd if (tx->tx_txg != TXG_INITIAL) 6150219089Spjd spa_async_request(spa, 6151219089Spjd SPA_ASYNC_AUTOEXPAND); 6152219089Spjd break; 6153219089Spjd case ZPOOL_PROP_DEDUPDITTO: 6154219089Spjd spa->spa_dedup_ditto = intval; 6155219089Spjd break; 6156185029Spjd default: 6157185029Spjd break; 6158185029Spjd } 6159168404Spjd } 6160185029Spjd 6161185029Spjd /* log internal history if this is not a zpool create */ 6162185029Spjd if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY && 6163185029Spjd tx->tx_txg != TXG_INITIAL) { 6164219089Spjd spa_history_log_internal(LOG_POOL_PROPSET, 6165219089Spjd spa, tx, "%s %lld %s", 6166185029Spjd nvpair_name(elem), intval, spa_name(spa)); 6167185029Spjd } 6168168404Spjd } 6169185029Spjd 6170185029Spjd mutex_exit(&spa->spa_props_lock); 6171168404Spjd} 6172168404Spjd 6173168404Spjd/* 6174219089Spjd * Perform one-time upgrade on-disk changes. spa_version() does not 6175219089Spjd * reflect the new version this txg, so there must be no changes this 6176219089Spjd * txg to anything that the upgrade code depends on after it executes. 6177219089Spjd * Therefore this must be called after dsl_pool_sync() does the sync 6178219089Spjd * tasks. 6179219089Spjd */ 6180219089Spjdstatic void 6181219089Spjdspa_sync_upgrades(spa_t *spa, dmu_tx_t *tx) 6182219089Spjd{ 6183219089Spjd dsl_pool_t *dp = spa->spa_dsl_pool; 6184219089Spjd 6185219089Spjd ASSERT(spa->spa_sync_pass == 1); 6186219089Spjd 6187219089Spjd if (spa->spa_ubsync.ub_version < SPA_VERSION_ORIGIN && 6188219089Spjd spa->spa_uberblock.ub_version >= SPA_VERSION_ORIGIN) { 6189219089Spjd dsl_pool_create_origin(dp, tx); 6190219089Spjd 6191219089Spjd /* Keeping the origin open increases spa_minref */ 6192219089Spjd spa->spa_minref += 3; 6193219089Spjd } 6194219089Spjd 6195219089Spjd if (spa->spa_ubsync.ub_version < SPA_VERSION_NEXT_CLONES && 6196219089Spjd spa->spa_uberblock.ub_version >= SPA_VERSION_NEXT_CLONES) { 6197219089Spjd dsl_pool_upgrade_clones(dp, tx); 6198219089Spjd } 6199219089Spjd 6200219089Spjd if (spa->spa_ubsync.ub_version < SPA_VERSION_DIR_CLONES && 6201219089Spjd spa->spa_uberblock.ub_version >= SPA_VERSION_DIR_CLONES) { 6202219089Spjd dsl_pool_upgrade_dir_clones(dp, tx); 6203219089Spjd 6204219089Spjd /* Keeping the freedir open increases spa_minref */ 6205219089Spjd spa->spa_minref += 3; 6206219089Spjd } 6207236884Smm 6208236884Smm if (spa->spa_ubsync.ub_version < SPA_VERSION_FEATURES && 6209236884Smm spa->spa_uberblock.ub_version >= SPA_VERSION_FEATURES) { 6210236884Smm spa_feature_create_zap_objects(spa, tx); 6211236884Smm } 6212219089Spjd} 6213219089Spjd 6214219089Spjd/* 6215168404Spjd * Sync the specified transaction group. New blocks may be dirtied as 6216168404Spjd * part of the process, so we iterate until it converges. 6217168404Spjd */ 6218168404Spjdvoid 6219168404Spjdspa_sync(spa_t *spa, uint64_t txg) 6220168404Spjd{ 6221168404Spjd dsl_pool_t *dp = spa->spa_dsl_pool; 6222168404Spjd objset_t *mos = spa->spa_meta_objset; 6223219089Spjd bpobj_t *defer_bpo = &spa->spa_deferred_bpobj; 6224219089Spjd bplist_t *free_bpl = &spa->spa_free_bplist[txg & TXG_MASK]; 6225168404Spjd vdev_t *rvd = spa->spa_root_vdev; 6226168404Spjd vdev_t *vd; 6227168404Spjd dmu_tx_t *tx; 6228185029Spjd int error; 6229168404Spjd 6230219089Spjd VERIFY(spa_writeable(spa)); 6231219089Spjd 6232168404Spjd /* 6233168404Spjd * Lock out configuration changes. 6234168404Spjd */ 6235185029Spjd spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); 6236168404Spjd 6237168404Spjd spa->spa_syncing_txg = txg; 6238168404Spjd spa->spa_sync_pass = 0; 6239168404Spjd 6240185029Spjd /* 6241185029Spjd * If there are any pending vdev state changes, convert them 6242185029Spjd * into config changes that go out with this transaction group. 6243185029Spjd */ 6244185029Spjd spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 6245209962Smm while (list_head(&spa->spa_state_dirty_list) != NULL) { 6246209962Smm /* 6247209962Smm * We need the write lock here because, for aux vdevs, 6248209962Smm * calling vdev_config_dirty() modifies sav_config. 6249209962Smm * This is ugly and will become unnecessary when we 6250209962Smm * eliminate the aux vdev wart by integrating all vdevs 6251209962Smm * into the root vdev tree. 6252209962Smm */ 6253209962Smm spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); 6254209962Smm spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_WRITER); 6255209962Smm while ((vd = list_head(&spa->spa_state_dirty_list)) != NULL) { 6256209962Smm vdev_state_clean(vd); 6257209962Smm vdev_config_dirty(vd); 6258209962Smm } 6259209962Smm spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); 6260209962Smm spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER); 6261185029Spjd } 6262185029Spjd spa_config_exit(spa, SCL_STATE, FTAG); 6263185029Spjd 6264168404Spjd tx = dmu_tx_create_assigned(dp, txg); 6265168404Spjd 6266247265Smm spa->spa_sync_starttime = gethrtime(); 6267247265Smm#ifdef illumos 6268247265Smm VERIFY(cyclic_reprogram(spa->spa_deadman_cycid, 6269247265Smm spa->spa_sync_starttime + spa->spa_deadman_synctime)); 6270247265Smm#else /* FreeBSD */ 6271247265Smm#ifdef _KERNEL 6272247265Smm callout_reset(&spa->spa_deadman_cycid, 6273247265Smm hz * spa->spa_deadman_synctime / NANOSEC, spa_deadman, spa); 6274247265Smm#endif 6275247265Smm#endif 6276247265Smm 6277168404Spjd /* 6278185029Spjd * If we are upgrading to SPA_VERSION_RAIDZ_DEFLATE this txg, 6279168404Spjd * set spa_deflate if we have no raid-z vdevs. 6280168404Spjd */ 6281185029Spjd if (spa->spa_ubsync.ub_version < SPA_VERSION_RAIDZ_DEFLATE && 6282185029Spjd spa->spa_uberblock.ub_version >= SPA_VERSION_RAIDZ_DEFLATE) { 6283168404Spjd int i; 6284168404Spjd 6285168404Spjd for (i = 0; i < rvd->vdev_children; i++) { 6286168404Spjd vd = rvd->vdev_child[i]; 6287168404Spjd if (vd->vdev_deflate_ratio != SPA_MINBLOCKSIZE) 6288168404Spjd break; 6289168404Spjd } 6290168404Spjd if (i == rvd->vdev_children) { 6291168404Spjd spa->spa_deflate = TRUE; 6292168404Spjd VERIFY(0 == zap_add(spa->spa_meta_objset, 6293168404Spjd DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, 6294168404Spjd sizeof (uint64_t), 1, &spa->spa_deflate, tx)); 6295168404Spjd } 6296168404Spjd } 6297168404Spjd 6298168404Spjd /* 6299219089Spjd * If anything has changed in this txg, or if someone is waiting 6300219089Spjd * for this txg to sync (eg, spa_vdev_remove()), push the 6301219089Spjd * deferred frees from the previous txg. If not, leave them 6302219089Spjd * alone so that we don't generate work on an otherwise idle 6303219089Spjd * system. 6304168404Spjd */ 6305168404Spjd if (!txg_list_empty(&dp->dp_dirty_datasets, txg) || 6306168404Spjd !txg_list_empty(&dp->dp_dirty_dirs, txg) || 6307219089Spjd !txg_list_empty(&dp->dp_sync_tasks, txg) || 6308219089Spjd ((dsl_scan_active(dp->dp_scan) || 6309219089Spjd txg_sync_waiting(dp)) && !spa_shutting_down(spa))) { 6310219089Spjd zio_t *zio = zio_root(spa, NULL, NULL, 0); 6311219089Spjd VERIFY3U(bpobj_iterate(defer_bpo, 6312219089Spjd spa_free_sync_cb, zio, tx), ==, 0); 6313240415Smm VERIFY0(zio_wait(zio)); 6314219089Spjd } 6315168404Spjd 6316168404Spjd /* 6317168404Spjd * Iterate to convergence. 6318168404Spjd */ 6319168404Spjd do { 6320219089Spjd int pass = ++spa->spa_sync_pass; 6321168404Spjd 6322168404Spjd spa_sync_config_object(spa, tx); 6323185029Spjd spa_sync_aux_dev(spa, &spa->spa_spares, tx, 6324185029Spjd ZPOOL_CONFIG_SPARES, DMU_POOL_SPARES); 6325185029Spjd spa_sync_aux_dev(spa, &spa->spa_l2cache, tx, 6326185029Spjd ZPOOL_CONFIG_L2CACHE, DMU_POOL_L2CACHE); 6327168404Spjd spa_errlog_sync(spa, txg); 6328168404Spjd dsl_pool_sync(dp, txg); 6329168404Spjd 6330243503Smm if (pass < zfs_sync_pass_deferred_free) { 6331219089Spjd zio_t *zio = zio_root(spa, NULL, NULL, 0); 6332219089Spjd bplist_iterate(free_bpl, spa_free_sync_cb, 6333219089Spjd zio, tx); 6334219089Spjd VERIFY(zio_wait(zio) == 0); 6335219089Spjd } else { 6336219089Spjd bplist_iterate(free_bpl, bpobj_enqueue_cb, 6337219089Spjd defer_bpo, tx); 6338168404Spjd } 6339168404Spjd 6340219089Spjd ddt_sync(spa, txg); 6341219089Spjd dsl_scan_sync(dp, tx); 6342168404Spjd 6343219089Spjd while (vd = txg_list_remove(&spa->spa_vdev_txg_list, txg)) 6344219089Spjd vdev_sync(vd, txg); 6345168404Spjd 6346219089Spjd if (pass == 1) 6347219089Spjd spa_sync_upgrades(spa, tx); 6348168404Spjd 6349219089Spjd } while (dmu_objset_is_dirty(mos, txg)); 6350219089Spjd 6351168404Spjd /* 6352168404Spjd * Rewrite the vdev configuration (which includes the uberblock) 6353168404Spjd * to commit the transaction group. 6354168404Spjd * 6355185029Spjd * If there are no dirty vdevs, we sync the uberblock to a few 6356185029Spjd * random top-level vdevs that are known to be visible in the 6357185029Spjd * config cache (see spa_vdev_add() for a complete description). 6358185029Spjd * If there *are* dirty vdevs, sync the uberblock to all vdevs. 6359168404Spjd */ 6360185029Spjd for (;;) { 6361185029Spjd /* 6362185029Spjd * We hold SCL_STATE to prevent vdev open/close/etc. 6363185029Spjd * while we're attempting to write the vdev labels. 6364185029Spjd */ 6365185029Spjd spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 6366168404Spjd 6367185029Spjd if (list_is_empty(&spa->spa_config_dirty_list)) { 6368185029Spjd vdev_t *svd[SPA_DVAS_PER_BP]; 6369185029Spjd int svdcount = 0; 6370185029Spjd int children = rvd->vdev_children; 6371185029Spjd int c0 = spa_get_random(children); 6372185029Spjd 6373219089Spjd for (int c = 0; c < children; c++) { 6374185029Spjd vd = rvd->vdev_child[(c0 + c) % children]; 6375185029Spjd if (vd->vdev_ms_array == 0 || vd->vdev_islog) 6376185029Spjd continue; 6377185029Spjd svd[svdcount++] = vd; 6378185029Spjd if (svdcount == SPA_DVAS_PER_BP) 6379185029Spjd break; 6380185029Spjd } 6381213198Smm error = vdev_config_sync(svd, svdcount, txg, B_FALSE); 6382213198Smm if (error != 0) 6383213198Smm error = vdev_config_sync(svd, svdcount, txg, 6384213198Smm B_TRUE); 6385185029Spjd } else { 6386185029Spjd error = vdev_config_sync(rvd->vdev_child, 6387213198Smm rvd->vdev_children, txg, B_FALSE); 6388213198Smm if (error != 0) 6389213198Smm error = vdev_config_sync(rvd->vdev_child, 6390213198Smm rvd->vdev_children, txg, B_TRUE); 6391168404Spjd } 6392185029Spjd 6393239620Smm if (error == 0) 6394239620Smm spa->spa_last_synced_guid = rvd->vdev_guid; 6395239620Smm 6396185029Spjd spa_config_exit(spa, SCL_STATE, FTAG); 6397185029Spjd 6398185029Spjd if (error == 0) 6399185029Spjd break; 6400185029Spjd zio_suspend(spa, NULL); 6401185029Spjd zio_resume_wait(spa); 6402168404Spjd } 6403168404Spjd dmu_tx_commit(tx); 6404168404Spjd 6405247265Smm#ifdef illumos 6406247265Smm VERIFY(cyclic_reprogram(spa->spa_deadman_cycid, CY_INFINITY)); 6407247265Smm#else /* FreeBSD */ 6408247265Smm#ifdef _KERNEL 6409247265Smm callout_drain(&spa->spa_deadman_cycid); 6410247265Smm#endif 6411247265Smm#endif 6412247265Smm 6413168404Spjd /* 6414168404Spjd * Clear the dirty config list. 6415168404Spjd */ 6416185029Spjd while ((vd = list_head(&spa->spa_config_dirty_list)) != NULL) 6417168404Spjd vdev_config_clean(vd); 6418168404Spjd 6419168404Spjd /* 6420168404Spjd * Now that the new config has synced transactionally, 6421168404Spjd * let it become visible to the config cache. 6422168404Spjd */ 6423168404Spjd if (spa->spa_config_syncing != NULL) { 6424168404Spjd spa_config_set(spa, spa->spa_config_syncing); 6425168404Spjd spa->spa_config_txg = txg; 6426168404Spjd spa->spa_config_syncing = NULL; 6427168404Spjd } 6428168404Spjd 6429168404Spjd spa->spa_ubsync = spa->spa_uberblock; 6430168404Spjd 6431219089Spjd dsl_pool_sync_done(dp, txg); 6432168404Spjd 6433168404Spjd /* 6434168404Spjd * Update usable space statistics. 6435168404Spjd */ 6436168404Spjd while (vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg))) 6437168404Spjd vdev_sync_done(vd, txg); 6438168404Spjd 6439219089Spjd spa_update_dspace(spa); 6440219089Spjd 6441168404Spjd /* 6442168404Spjd * It had better be the case that we didn't dirty anything 6443168404Spjd * since vdev_config_sync(). 6444168404Spjd */ 6445168404Spjd ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg)); 6446168404Spjd ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg)); 6447168404Spjd ASSERT(txg_list_empty(&spa->spa_vdev_txg_list, txg)); 6448168404Spjd 6449219089Spjd spa->spa_sync_pass = 0; 6450219089Spjd 6451185029Spjd spa_config_exit(spa, SCL_CONFIG, FTAG); 6452168404Spjd 6453219089Spjd spa_handle_ignored_writes(spa); 6454219089Spjd 6455168404Spjd /* 6456168404Spjd * If any async tasks have been requested, kick them off. 6457168404Spjd */ 6458168404Spjd spa_async_dispatch(spa); 6459168404Spjd} 6460168404Spjd 6461168404Spjd/* 6462168404Spjd * Sync all pools. We don't want to hold the namespace lock across these 6463168404Spjd * operations, so we take a reference on the spa_t and drop the lock during the 6464168404Spjd * sync. 6465168404Spjd */ 6466168404Spjdvoid 6467168404Spjdspa_sync_allpools(void) 6468168404Spjd{ 6469168404Spjd spa_t *spa = NULL; 6470168404Spjd mutex_enter(&spa_namespace_lock); 6471168404Spjd while ((spa = spa_next(spa)) != NULL) { 6472219089Spjd if (spa_state(spa) != POOL_STATE_ACTIVE || 6473219089Spjd !spa_writeable(spa) || spa_suspended(spa)) 6474168404Spjd continue; 6475168404Spjd spa_open_ref(spa, FTAG); 6476168404Spjd mutex_exit(&spa_namespace_lock); 6477168404Spjd txg_wait_synced(spa_get_dsl(spa), 0); 6478168404Spjd mutex_enter(&spa_namespace_lock); 6479168404Spjd spa_close(spa, FTAG); 6480168404Spjd } 6481168404Spjd mutex_exit(&spa_namespace_lock); 6482168404Spjd} 6483168404Spjd 6484168404Spjd/* 6485168404Spjd * ========================================================================== 6486168404Spjd * Miscellaneous routines 6487168404Spjd * ========================================================================== 6488168404Spjd */ 6489168404Spjd 6490168404Spjd/* 6491168404Spjd * Remove all pools in the system. 6492168404Spjd */ 6493168404Spjdvoid 6494168404Spjdspa_evict_all(void) 6495168404Spjd{ 6496168404Spjd spa_t *spa; 6497168404Spjd 6498168404Spjd /* 6499168404Spjd * Remove all cached state. All pools should be closed now, 6500168404Spjd * so every spa in the AVL tree should be unreferenced. 6501168404Spjd */ 6502168404Spjd mutex_enter(&spa_namespace_lock); 6503168404Spjd while ((spa = spa_next(NULL)) != NULL) { 6504168404Spjd /* 6505168404Spjd * Stop async tasks. The async thread may need to detach 6506168404Spjd * a device that's been replaced, which requires grabbing 6507168404Spjd * spa_namespace_lock, so we must drop it here. 6508168404Spjd */ 6509168404Spjd spa_open_ref(spa, FTAG); 6510168404Spjd mutex_exit(&spa_namespace_lock); 6511168404Spjd spa_async_suspend(spa); 6512168404Spjd mutex_enter(&spa_namespace_lock); 6513168404Spjd spa_close(spa, FTAG); 6514168404Spjd 6515168404Spjd if (spa->spa_state != POOL_STATE_UNINITIALIZED) { 6516168404Spjd spa_unload(spa); 6517168404Spjd spa_deactivate(spa); 6518168404Spjd } 6519168404Spjd spa_remove(spa); 6520168404Spjd } 6521168404Spjd mutex_exit(&spa_namespace_lock); 6522168404Spjd} 6523168404Spjd 6524168404Spjdvdev_t * 6525209962Smmspa_lookup_by_guid(spa_t *spa, uint64_t guid, boolean_t aux) 6526168404Spjd{ 6527185029Spjd vdev_t *vd; 6528185029Spjd int i; 6529185029Spjd 6530185029Spjd if ((vd = vdev_lookup_by_guid(spa->spa_root_vdev, guid)) != NULL) 6531185029Spjd return (vd); 6532185029Spjd 6533209962Smm if (aux) { 6534185029Spjd for (i = 0; i < spa->spa_l2cache.sav_count; i++) { 6535185029Spjd vd = spa->spa_l2cache.sav_vdevs[i]; 6536185029Spjd if (vd->vdev_guid == guid) 6537185029Spjd return (vd); 6538185029Spjd } 6539209962Smm 6540209962Smm for (i = 0; i < spa->spa_spares.sav_count; i++) { 6541209962Smm vd = spa->spa_spares.sav_vdevs[i]; 6542209962Smm if (vd->vdev_guid == guid) 6543209962Smm return (vd); 6544209962Smm } 6545185029Spjd } 6546185029Spjd 6547185029Spjd return (NULL); 6548168404Spjd} 6549168404Spjd 6550168404Spjdvoid 6551185029Spjdspa_upgrade(spa_t *spa, uint64_t version) 6552168404Spjd{ 6553219089Spjd ASSERT(spa_writeable(spa)); 6554219089Spjd 6555185029Spjd spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); 6556168404Spjd 6557168404Spjd /* 6558168404Spjd * This should only be called for a non-faulted pool, and since a 6559168404Spjd * future version would result in an unopenable pool, this shouldn't be 6560168404Spjd * possible. 6561168404Spjd */ 6562247592Sdelphij ASSERT(SPA_VERSION_IS_SUPPORTED(spa->spa_uberblock.ub_version)); 6563185029Spjd ASSERT(version >= spa->spa_uberblock.ub_version); 6564168404Spjd 6565185029Spjd spa->spa_uberblock.ub_version = version; 6566168404Spjd vdev_config_dirty(spa->spa_root_vdev); 6567168404Spjd 6568185029Spjd spa_config_exit(spa, SCL_ALL, FTAG); 6569168404Spjd 6570168404Spjd txg_wait_synced(spa_get_dsl(spa), 0); 6571168404Spjd} 6572168404Spjd 6573168404Spjdboolean_t 6574168404Spjdspa_has_spare(spa_t *spa, uint64_t guid) 6575168404Spjd{ 6576168404Spjd int i; 6577168404Spjd uint64_t spareguid; 6578185029Spjd spa_aux_vdev_t *sav = &spa->spa_spares; 6579168404Spjd 6580185029Spjd for (i = 0; i < sav->sav_count; i++) 6581185029Spjd if (sav->sav_vdevs[i]->vdev_guid == guid) 6582168404Spjd return (B_TRUE); 6583168404Spjd 6584185029Spjd for (i = 0; i < sav->sav_npending; i++) { 6585185029Spjd if (nvlist_lookup_uint64(sav->sav_pending[i], ZPOOL_CONFIG_GUID, 6586185029Spjd &spareguid) == 0 && spareguid == guid) 6587168404Spjd return (B_TRUE); 6588168404Spjd } 6589168404Spjd 6590168404Spjd return (B_FALSE); 6591168404Spjd} 6592168404Spjd 6593185029Spjd/* 6594185029Spjd * Check if a pool has an active shared spare device. 6595185029Spjd * Note: reference count of an active spare is 2, as a spare and as a replace 6596185029Spjd */ 6597185029Spjdstatic boolean_t 6598185029Spjdspa_has_active_shared_spare(spa_t *spa) 6599168404Spjd{ 6600185029Spjd int i, refcnt; 6601185029Spjd uint64_t pool; 6602185029Spjd spa_aux_vdev_t *sav = &spa->spa_spares; 6603185029Spjd 6604185029Spjd for (i = 0; i < sav->sav_count; i++) { 6605185029Spjd if (spa_spare_exists(sav->sav_vdevs[i]->vdev_guid, &pool, 6606185029Spjd &refcnt) && pool != 0ULL && pool == spa_guid(spa) && 6607185029Spjd refcnt > 2) 6608185029Spjd return (B_TRUE); 6609185029Spjd } 6610185029Spjd 6611185029Spjd return (B_FALSE); 6612168404Spjd} 6613168404Spjd 6614185029Spjd/* 6615185029Spjd * Post a sysevent corresponding to the given event. The 'name' must be one of 6616185029Spjd * the event definitions in sys/sysevent/eventdefs.h. The payload will be 6617185029Spjd * filled in from the spa and (optionally) the vdev. This doesn't do anything 6618185029Spjd * in the userland libzpool, as we don't want consumers to misinterpret ztest 6619185029Spjd * or zdb as real changes. 6620185029Spjd */ 6621185029Spjdvoid 6622185029Spjdspa_event_notify(spa_t *spa, vdev_t *vd, const char *name) 6623168404Spjd{ 6624185029Spjd#ifdef _KERNEL 6625185029Spjd sysevent_t *ev; 6626185029Spjd sysevent_attr_list_t *attr = NULL; 6627185029Spjd sysevent_value_t value; 6628185029Spjd sysevent_id_t eid; 6629168404Spjd 6630185029Spjd ev = sysevent_alloc(EC_ZFS, (char *)name, SUNW_KERN_PUB "zfs", 6631185029Spjd SE_SLEEP); 6632168404Spjd 6633185029Spjd value.value_type = SE_DATA_TYPE_STRING; 6634185029Spjd value.value.sv_string = spa_name(spa); 6635185029Spjd if (sysevent_add_attr(&attr, ZFS_EV_POOL_NAME, &value, SE_SLEEP) != 0) 6636185029Spjd goto done; 6637168404Spjd 6638185029Spjd value.value_type = SE_DATA_TYPE_UINT64; 6639185029Spjd value.value.sv_uint64 = spa_guid(spa); 6640185029Spjd if (sysevent_add_attr(&attr, ZFS_EV_POOL_GUID, &value, SE_SLEEP) != 0) 6641185029Spjd goto done; 6642168404Spjd 6643185029Spjd if (vd) { 6644185029Spjd value.value_type = SE_DATA_TYPE_UINT64; 6645185029Spjd value.value.sv_uint64 = vd->vdev_guid; 6646185029Spjd if (sysevent_add_attr(&attr, ZFS_EV_VDEV_GUID, &value, 6647185029Spjd SE_SLEEP) != 0) 6648185029Spjd goto done; 6649168404Spjd 6650185029Spjd if (vd->vdev_path) { 6651185029Spjd value.value_type = SE_DATA_TYPE_STRING; 6652185029Spjd value.value.sv_string = vd->vdev_path; 6653185029Spjd if (sysevent_add_attr(&attr, ZFS_EV_VDEV_PATH, 6654185029Spjd &value, SE_SLEEP) != 0) 6655185029Spjd goto done; 6656168404Spjd } 6657168404Spjd } 6658168404Spjd 6659185029Spjd if (sysevent_attach_attributes(ev, attr) != 0) 6660185029Spjd goto done; 6661185029Spjd attr = NULL; 6662168404Spjd 6663185029Spjd (void) log_sysevent(ev, SE_SLEEP, &eid); 6664185029Spjd 6665185029Spjddone: 6666185029Spjd if (attr) 6667185029Spjd sysevent_free_attr(attr); 6668185029Spjd sysevent_free(ev); 6669185029Spjd#endif 6670168404Spjd} 6671