1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22219089Spjd * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23219089Spjd * 24219089Spjd * Copyright (c) 2006-2010 Pawel Jakub Dawidek <pjd@FreeBSD.org> 25168404Spjd * All rights reserved. 26249195Smm * Copyright (c) 2013 by Delphix. All rights reserved. 27255750Sdelphij * Copyright (c) 2013, Joyent, Inc. All rights reserved. 28168404Spjd */ 29168404Spjd 30219089Spjd/* Portions Copyright 2010 Robert Milkowski */ 31226724Smm/* Portions Copyright 2011 Martin Matuska <mm@FreeBSD.org> */ 32219089Spjd 33168404Spjd/* 34168404Spjd * ZFS volume emulation driver. 35168404Spjd * 36168404Spjd * Makes a DMU object look like a volume of arbitrary size, up to 2^64 bytes. 37168404Spjd * Volumes are accessed through the symbolic links named: 38168404Spjd * 39168404Spjd * /dev/zvol/dsk/<pool_name>/<dataset_name> 40168404Spjd * /dev/zvol/rdsk/<pool_name>/<dataset_name> 41168404Spjd * 42219089Spjd * These links are created by the /dev filesystem (sdev_zvolops.c). 43168404Spjd * Volumes are persistent through reboot. No user command needs to be 44168404Spjd * run before opening and using a device. 45219089Spjd * 46219089Spjd * FreeBSD notes. 47219089Spjd * On FreeBSD ZVOLs are simply GEOM providers like any other storage device 48219089Spjd * in the system. 49168404Spjd */ 50168404Spjd 51168962Spjd#include <sys/types.h> 52168404Spjd#include <sys/param.h> 53168404Spjd#include <sys/kernel.h> 54168404Spjd#include <sys/errno.h> 55168404Spjd#include <sys/uio.h> 56168404Spjd#include <sys/bio.h> 57168962Spjd#include <sys/buf.h> 58168404Spjd#include <sys/kmem.h> 59168404Spjd#include <sys/conf.h> 60168404Spjd#include <sys/cmn_err.h> 61168404Spjd#include <sys/stat.h> 62168404Spjd#include <sys/zap.h> 63168404Spjd#include <sys/spa.h> 64255750Sdelphij#include <sys/spa_impl.h> 65168404Spjd#include <sys/zio.h> 66185029Spjd#include <sys/dmu_traverse.h> 67185029Spjd#include <sys/dnode.h> 68185029Spjd#include <sys/dsl_dataset.h> 69168404Spjd#include <sys/dsl_prop.h> 70168962Spjd#include <sys/dkio.h> 71168404Spjd#include <sys/byteorder.h> 72168962Spjd#include <sys/sunddi.h> 73168404Spjd#include <sys/dirent.h> 74168962Spjd#include <sys/policy.h> 75168404Spjd#include <sys/fs/zfs.h> 76168404Spjd#include <sys/zfs_ioctl.h> 77168404Spjd#include <sys/zil.h> 78168404Spjd#include <sys/refcount.h> 79168404Spjd#include <sys/zfs_znode.h> 80168404Spjd#include <sys/zfs_rlock.h> 81185029Spjd#include <sys/vdev_impl.h> 82255750Sdelphij#include <sys/vdev_raidz.h> 83185029Spjd#include <sys/zvol.h> 84209962Smm#include <sys/zil_impl.h> 85243524Smm#include <sys/dbuf.h> 86255750Sdelphij#include <sys/dmu_tx.h> 87255750Sdelphij#include <sys/zfeature.h> 88255750Sdelphij#include <sys/zio_checksum.h> 89255750Sdelphij 90168404Spjd#include <geom/geom.h> 91168404Spjd 92168404Spjd#include "zfs_namecheck.h" 93168404Spjd 94168404Spjdstruct g_class zfs_zvol_class = { 95168404Spjd .name = "ZFS::ZVOL", 96168404Spjd .version = G_VERSION, 97168404Spjd}; 98168404Spjd 99168404SpjdDECLARE_GEOM_CLASS(zfs_zvol_class, zfs_zvol); 100168404Spjd 101219089Spjdvoid *zfsdev_state; 102219089Spjdstatic char *zvol_tag = "zvol_tag"; 103219089Spjd 104219089Spjd#define ZVOL_DUMPSIZE "dumpsize" 105219089Spjd 106185029Spjd/* 107224791Spjd * The spa_namespace_lock protects the zfsdev_state structure from being 108224791Spjd * modified while it's being used, e.g. an open that comes in before a 109224791Spjd * create finishes. It also protects temporary opens of the dataset so that, 110185029Spjd * e.g., an open doesn't get a spurious EBUSY. 111185029Spjd */ 112168404Spjdstatic uint32_t zvol_minors; 113168404Spjd 114185029Spjdtypedef struct zvol_extent { 115208047Smm list_node_t ze_node; 116185029Spjd dva_t ze_dva; /* dva associated with this extent */ 117208047Smm uint64_t ze_nblks; /* number of blocks in extent */ 118185029Spjd} zvol_extent_t; 119185029Spjd 120168404Spjd/* 121168404Spjd * The in-core state of each volume. 122168404Spjd */ 123168404Spjdtypedef struct zvol_state { 124168404Spjd char zv_name[MAXPATHLEN]; /* pool/dd name */ 125168404Spjd uint64_t zv_volsize; /* amount of space we advertise */ 126168404Spjd uint64_t zv_volblocksize; /* volume block size */ 127168404Spjd struct g_provider *zv_provider; /* GEOM provider */ 128168404Spjd uint8_t zv_min_bs; /* minimum addressable block shift */ 129219089Spjd uint8_t zv_flags; /* readonly, dumpified, etc. */ 130168404Spjd objset_t *zv_objset; /* objset handle */ 131168404Spjd uint32_t zv_total_opens; /* total open count */ 132168404Spjd zilog_t *zv_zilog; /* ZIL handle */ 133208047Smm list_t zv_extents; /* List of extents for dump */ 134168404Spjd znode_t zv_znode; /* for range locking */ 135219089Spjd dmu_buf_t *zv_dbuf; /* bonus handle */ 136168404Spjd int zv_state; 137168404Spjd struct bio_queue_head zv_queue; 138168404Spjd struct mtx zv_queue_mtx; /* zv_queue mutex */ 139168404Spjd} zvol_state_t; 140168404Spjd 141168404Spjd/* 142185029Spjd * zvol specific flags 143185029Spjd */ 144185029Spjd#define ZVOL_RDONLY 0x1 145185029Spjd#define ZVOL_DUMPIFIED 0x2 146185029Spjd#define ZVOL_EXCL 0x4 147219089Spjd#define ZVOL_WCE 0x8 148185029Spjd 149185029Spjd/* 150168404Spjd * zvol maximum transfer in one DMU tx. 151168404Spjd */ 152168404Spjdint zvol_maxphys = DMU_MAX_ACCESS/2; 153168404Spjd 154219089Spjdextern int zfs_set_prop_nvlist(const char *, zprop_source_t, 155248571Smm nvlist_t *, nvlist_t *); 156219089Spjdstatic int zvol_remove_zv(zvol_state_t *); 157168404Spjdstatic int zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio); 158185029Spjdstatic int zvol_dumpify(zvol_state_t *zv); 159185029Spjdstatic int zvol_dump_fini(zvol_state_t *zv); 160185029Spjdstatic int zvol_dump_init(zvol_state_t *zv, boolean_t resize); 161168404Spjd 162219089Spjdstatic zvol_state_t *zvol_geom_create(const char *name); 163219089Spjdstatic void zvol_geom_run(zvol_state_t *zv); 164219089Spjdstatic void zvol_geom_destroy(zvol_state_t *zv); 165219089Spjdstatic int zvol_geom_access(struct g_provider *pp, int acr, int acw, int ace); 166219089Spjdstatic void zvol_geom_start(struct bio *bp); 167219089Spjdstatic void zvol_geom_worker(void *arg); 168219089Spjd 169185029Spjdstatic void 170219089Spjdzvol_size_changed(zvol_state_t *zv) 171185029Spjd{ 172219089Spjd#ifdef sun 173219089Spjd dev_t dev = makedevice(maj, min); 174219089Spjd 175219089Spjd VERIFY(ddi_prop_update_int64(dev, zfs_dip, 176219089Spjd "Size", volsize) == DDI_SUCCESS); 177219089Spjd VERIFY(ddi_prop_update_int64(dev, zfs_dip, 178219089Spjd "Nblocks", lbtodb(volsize)) == DDI_SUCCESS); 179219089Spjd 180219089Spjd /* Notify specfs to invalidate the cached size */ 181219089Spjd spec_size_invalidate(dev, VBLK); 182219089Spjd spec_size_invalidate(dev, VCHR); 183219089Spjd#else /* !sun */ 184196927Spjd struct g_provider *pp; 185185029Spjd 186196927Spjd pp = zv->zv_provider; 187196927Spjd if (pp == NULL) 188196927Spjd return; 189238656Strasz g_topology_lock(); 190238656Strasz g_resize_provider(pp, zv->zv_volsize); 191238656Strasz g_topology_unlock(); 192219089Spjd#endif /* !sun */ 193185029Spjd} 194185029Spjd 195168404Spjdint 196168404Spjdzvol_check_volsize(uint64_t volsize, uint64_t blocksize) 197168404Spjd{ 198168404Spjd if (volsize == 0) 199249195Smm return (SET_ERROR(EINVAL)); 200168404Spjd 201168404Spjd if (volsize % blocksize != 0) 202249195Smm return (SET_ERROR(EINVAL)); 203168404Spjd 204168404Spjd#ifdef _ILP32 205168404Spjd if (volsize - 1 > SPEC_MAXOFFSET_T) 206249195Smm return (SET_ERROR(EOVERFLOW)); 207168404Spjd#endif 208168404Spjd return (0); 209168404Spjd} 210168404Spjd 211168404Spjdint 212168404Spjdzvol_check_volblocksize(uint64_t volblocksize) 213168404Spjd{ 214168404Spjd if (volblocksize < SPA_MINBLOCKSIZE || 215168404Spjd volblocksize > SPA_MAXBLOCKSIZE || 216168404Spjd !ISP2(volblocksize)) 217249195Smm return (SET_ERROR(EDOM)); 218168404Spjd 219168404Spjd return (0); 220168404Spjd} 221168404Spjd 222168404Spjdint 223168404Spjdzvol_get_stats(objset_t *os, nvlist_t *nv) 224168404Spjd{ 225168404Spjd int error; 226168404Spjd dmu_object_info_t doi; 227168404Spjd uint64_t val; 228168404Spjd 229168404Spjd error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &val); 230168404Spjd if (error) 231168404Spjd return (error); 232168404Spjd 233168404Spjd dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLSIZE, val); 234168404Spjd 235168404Spjd error = dmu_object_info(os, ZVOL_OBJ, &doi); 236168404Spjd 237168404Spjd if (error == 0) { 238168404Spjd dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLBLOCKSIZE, 239168404Spjd doi.doi_data_block_size); 240168404Spjd } 241168404Spjd 242168404Spjd return (error); 243168404Spjd} 244168404Spjd 245168404Spjdstatic zvol_state_t * 246168404Spjdzvol_minor_lookup(const char *name) 247168404Spjd{ 248168404Spjd struct g_provider *pp; 249168404Spjd struct g_geom *gp; 250219089Spjd zvol_state_t *zv = NULL; 251168404Spjd 252224791Spjd ASSERT(MUTEX_HELD(&spa_namespace_lock)); 253168404Spjd 254219089Spjd g_topology_lock(); 255168404Spjd LIST_FOREACH(gp, &zfs_zvol_class.geom, geom) { 256219089Spjd pp = LIST_FIRST(&gp->provider); 257219089Spjd if (pp == NULL) 258219089Spjd continue; 259219089Spjd zv = pp->private; 260219089Spjd if (zv == NULL) 261219089Spjd continue; 262219089Spjd if (strcmp(zv->zv_name, name) == 0) 263200126Spjd break; 264168404Spjd } 265219089Spjd g_topology_unlock(); 266168404Spjd 267219089Spjd return (gp != NULL ? zv : NULL); 268168404Spjd} 269168404Spjd 270185029Spjd/* extent mapping arg */ 271185029Spjdstruct maparg { 272208047Smm zvol_state_t *ma_zv; 273208047Smm uint64_t ma_blks; 274185029Spjd}; 275185029Spjd 276185029Spjd/*ARGSUSED*/ 277185029Spjdstatic int 278246666Smmzvol_map_block(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, 279219089Spjd const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) 280185029Spjd{ 281208047Smm struct maparg *ma = arg; 282208047Smm zvol_extent_t *ze; 283208047Smm int bs = ma->ma_zv->zv_volblocksize; 284185029Spjd 285208047Smm if (bp == NULL || zb->zb_object != ZVOL_OBJ || zb->zb_level != 0) 286208047Smm return (0); 287185029Spjd 288208047Smm VERIFY3U(ma->ma_blks, ==, zb->zb_blkid); 289208047Smm ma->ma_blks++; 290185029Spjd 291208047Smm /* Abort immediately if we have encountered gang blocks */ 292208047Smm if (BP_IS_GANG(bp)) 293249195Smm return (SET_ERROR(EFRAGS)); 294185029Spjd 295208047Smm /* 296208047Smm * See if the block is at the end of the previous extent. 297208047Smm */ 298208047Smm ze = list_tail(&ma->ma_zv->zv_extents); 299208047Smm if (ze && 300208047Smm DVA_GET_VDEV(BP_IDENTITY(bp)) == DVA_GET_VDEV(&ze->ze_dva) && 301208047Smm DVA_GET_OFFSET(BP_IDENTITY(bp)) == 302208047Smm DVA_GET_OFFSET(&ze->ze_dva) + ze->ze_nblks * bs) { 303208047Smm ze->ze_nblks++; 304208047Smm return (0); 305185029Spjd } 306185029Spjd 307208047Smm dprintf_bp(bp, "%s", "next blkptr:"); 308185029Spjd 309208047Smm /* start a new extent */ 310208047Smm ze = kmem_zalloc(sizeof (zvol_extent_t), KM_SLEEP); 311208047Smm ze->ze_dva = bp->blk_dva[0]; /* structure assignment */ 312208047Smm ze->ze_nblks = 1; 313208047Smm list_insert_tail(&ma->ma_zv->zv_extents, ze); 314208047Smm return (0); 315208047Smm} 316185029Spjd 317208047Smmstatic void 318208047Smmzvol_free_extents(zvol_state_t *zv) 319208047Smm{ 320208047Smm zvol_extent_t *ze; 321185029Spjd 322208047Smm while (ze = list_head(&zv->zv_extents)) { 323208047Smm list_remove(&zv->zv_extents, ze); 324208047Smm kmem_free(ze, sizeof (zvol_extent_t)); 325185029Spjd } 326208047Smm} 327185029Spjd 328208047Smmstatic int 329208047Smmzvol_get_lbas(zvol_state_t *zv) 330208047Smm{ 331219089Spjd objset_t *os = zv->zv_objset; 332208047Smm struct maparg ma; 333208047Smm int err; 334185029Spjd 335208047Smm ma.ma_zv = zv; 336208047Smm ma.ma_blks = 0; 337208047Smm zvol_free_extents(zv); 338208047Smm 339219089Spjd /* commit any in-flight changes before traversing the dataset */ 340219089Spjd txg_wait_synced(dmu_objset_pool(os), 0); 341219089Spjd err = traverse_dataset(dmu_objset_ds(os), 0, 342208047Smm TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, zvol_map_block, &ma); 343208047Smm if (err || ma.ma_blks != (zv->zv_volsize / zv->zv_volblocksize)) { 344208047Smm zvol_free_extents(zv); 345208047Smm return (err ? err : EIO); 346185029Spjd } 347185029Spjd 348185029Spjd return (0); 349185029Spjd} 350185029Spjd 351185029Spjd/* ARGSUSED */ 352185029Spjdvoid 353185029Spjdzvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx) 354185029Spjd{ 355185029Spjd zfs_creat_t *zct = arg; 356185029Spjd nvlist_t *nvprops = zct->zct_props; 357168404Spjd int error; 358168404Spjd uint64_t volblocksize, volsize; 359168404Spjd 360185029Spjd VERIFY(nvlist_lookup_uint64(nvprops, 361168404Spjd zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) == 0); 362185029Spjd if (nvlist_lookup_uint64(nvprops, 363168404Spjd zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &volblocksize) != 0) 364168404Spjd volblocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE); 365168404Spjd 366168404Spjd /* 367185029Spjd * These properties must be removed from the list so the generic 368168404Spjd * property setting step won't apply to them. 369168404Spjd */ 370185029Spjd VERIFY(nvlist_remove_all(nvprops, 371168404Spjd zfs_prop_to_name(ZFS_PROP_VOLSIZE)) == 0); 372185029Spjd (void) nvlist_remove_all(nvprops, 373168404Spjd zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE)); 374168404Spjd 375168404Spjd error = dmu_object_claim(os, ZVOL_OBJ, DMU_OT_ZVOL, volblocksize, 376168404Spjd DMU_OT_NONE, 0, tx); 377168404Spjd ASSERT(error == 0); 378168404Spjd 379168404Spjd error = zap_create_claim(os, ZVOL_ZAP_OBJ, DMU_OT_ZVOL_PROP, 380168404Spjd DMU_OT_NONE, 0, tx); 381168404Spjd ASSERT(error == 0); 382168404Spjd 383168404Spjd error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize, tx); 384168404Spjd ASSERT(error == 0); 385168404Spjd} 386168404Spjd 387168404Spjd/* 388168404Spjd * Replay a TX_WRITE ZIL transaction that didn't get committed 389168404Spjd * after a system failure 390168404Spjd */ 391168404Spjdstatic int 392168404Spjdzvol_replay_write(zvol_state_t *zv, lr_write_t *lr, boolean_t byteswap) 393168404Spjd{ 394168404Spjd objset_t *os = zv->zv_objset; 395168404Spjd char *data = (char *)(lr + 1); /* data follows lr_write_t */ 396219089Spjd uint64_t offset, length; 397168404Spjd dmu_tx_t *tx; 398168404Spjd int error; 399168404Spjd 400168404Spjd if (byteswap) 401168404Spjd byteswap_uint64_array(lr, sizeof (*lr)); 402168404Spjd 403219089Spjd offset = lr->lr_offset; 404219089Spjd length = lr->lr_length; 405209962Smm 406219089Spjd /* If it's a dmu_sync() block, write the whole block */ 407219089Spjd if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) { 408219089Spjd uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr); 409219089Spjd if (length < blocksize) { 410219089Spjd offset -= offset % blocksize; 411219089Spjd length = blocksize; 412219089Spjd } 413219089Spjd } 414219089Spjd 415168404Spjd tx = dmu_tx_create(os); 416219089Spjd dmu_tx_hold_write(tx, ZVOL_OBJ, offset, length); 417209962Smm error = dmu_tx_assign(tx, TXG_WAIT); 418168404Spjd if (error) { 419168404Spjd dmu_tx_abort(tx); 420168404Spjd } else { 421219089Spjd dmu_write(os, ZVOL_OBJ, offset, length, data, tx); 422168404Spjd dmu_tx_commit(tx); 423168404Spjd } 424168404Spjd 425168404Spjd return (error); 426168404Spjd} 427168404Spjd 428168404Spjd/* ARGSUSED */ 429168404Spjdstatic int 430168404Spjdzvol_replay_err(zvol_state_t *zv, lr_t *lr, boolean_t byteswap) 431168404Spjd{ 432249195Smm return (SET_ERROR(ENOTSUP)); 433168404Spjd} 434168404Spjd 435168404Spjd/* 436168404Spjd * Callback vectors for replaying records. 437168404Spjd * Only TX_WRITE is needed for zvol. 438168404Spjd */ 439168404Spjdzil_replay_func_t *zvol_replay_vector[TX_MAX_TYPE] = { 440168404Spjd zvol_replay_err, /* 0 no such transaction type */ 441168404Spjd zvol_replay_err, /* TX_CREATE */ 442168404Spjd zvol_replay_err, /* TX_MKDIR */ 443168404Spjd zvol_replay_err, /* TX_MKXATTR */ 444168404Spjd zvol_replay_err, /* TX_SYMLINK */ 445168404Spjd zvol_replay_err, /* TX_REMOVE */ 446168404Spjd zvol_replay_err, /* TX_RMDIR */ 447168404Spjd zvol_replay_err, /* TX_LINK */ 448168404Spjd zvol_replay_err, /* TX_RENAME */ 449168404Spjd zvol_replay_write, /* TX_WRITE */ 450168404Spjd zvol_replay_err, /* TX_TRUNCATE */ 451168404Spjd zvol_replay_err, /* TX_SETATTR */ 452168404Spjd zvol_replay_err, /* TX_ACL */ 453209962Smm zvol_replay_err, /* TX_CREATE_ACL */ 454209962Smm zvol_replay_err, /* TX_CREATE_ATTR */ 455209962Smm zvol_replay_err, /* TX_CREATE_ACL_ATTR */ 456209962Smm zvol_replay_err, /* TX_MKDIR_ACL */ 457209962Smm zvol_replay_err, /* TX_MKDIR_ATTR */ 458209962Smm zvol_replay_err, /* TX_MKDIR_ACL_ATTR */ 459209962Smm zvol_replay_err, /* TX_WRITE2 */ 460168404Spjd}; 461168404Spjd 462219089Spjd#ifdef sun 463219089Spjdint 464219089Spjdzvol_name2minor(const char *name, minor_t *minor) 465219089Spjd{ 466219089Spjd zvol_state_t *zv; 467219089Spjd 468224791Spjd mutex_enter(&spa_namespace_lock); 469219089Spjd zv = zvol_minor_lookup(name); 470219089Spjd if (minor && zv) 471219089Spjd *minor = zv->zv_minor; 472224791Spjd mutex_exit(&spa_namespace_lock); 473219089Spjd return (zv ? 0 : -1); 474219089Spjd} 475219089Spjd#endif /* sun */ 476219089Spjd 477168404Spjd/* 478185029Spjd * Create a minor node (plus a whole lot more) for the specified volume. 479185029Spjd */ 480185029Spjdint 481219089Spjdzvol_create_minor(const char *name) 482185029Spjd{ 483219089Spjd zfs_soft_state_t *zs; 484168404Spjd zvol_state_t *zv; 485168404Spjd objset_t *os; 486168404Spjd dmu_object_info_t doi; 487241297Savg uint64_t volsize; 488168404Spjd int error; 489168404Spjd 490219089Spjd ZFS_LOG(1, "Creating ZVOL %s...", name); 491168404Spjd 492224791Spjd mutex_enter(&spa_namespace_lock); 493219089Spjd 494219089Spjd if (zvol_minor_lookup(name) != NULL) { 495224791Spjd mutex_exit(&spa_namespace_lock); 496249195Smm return (SET_ERROR(EEXIST)); 497168404Spjd } 498168404Spjd 499219089Spjd /* lie and say we're read-only */ 500219089Spjd error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, FTAG, &os); 501168404Spjd 502168404Spjd if (error) { 503224791Spjd mutex_exit(&spa_namespace_lock); 504219089Spjd return (error); 505168404Spjd } 506168404Spjd 507219089Spjd#ifdef sun 508219089Spjd if ((minor = zfsdev_minor_alloc()) == 0) { 509219089Spjd dmu_objset_disown(os, FTAG); 510224791Spjd mutex_exit(&spa_namespace_lock); 511249195Smm return (SET_ERROR(ENXIO)); 512219089Spjd } 513168404Spjd 514219089Spjd if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS) { 515219089Spjd dmu_objset_disown(os, FTAG); 516224791Spjd mutex_exit(&spa_namespace_lock); 517249195Smm return (SET_ERROR(EAGAIN)); 518219089Spjd } 519219089Spjd (void) ddi_prop_update_string(minor, zfs_dip, ZVOL_PROP_NAME, 520219089Spjd (char *)name); 521219089Spjd 522219089Spjd (void) snprintf(chrbuf, sizeof (chrbuf), "%u,raw", minor); 523219089Spjd 524219089Spjd if (ddi_create_minor_node(zfs_dip, chrbuf, S_IFCHR, 525219089Spjd minor, DDI_PSEUDO, 0) == DDI_FAILURE) { 526219089Spjd ddi_soft_state_free(zfsdev_state, minor); 527219089Spjd dmu_objset_disown(os, FTAG); 528224791Spjd mutex_exit(&spa_namespace_lock); 529249195Smm return (SET_ERROR(EAGAIN)); 530219089Spjd } 531219089Spjd 532219089Spjd (void) snprintf(blkbuf, sizeof (blkbuf), "%u", minor); 533219089Spjd 534219089Spjd if (ddi_create_minor_node(zfs_dip, blkbuf, S_IFBLK, 535219089Spjd minor, DDI_PSEUDO, 0) == DDI_FAILURE) { 536219089Spjd ddi_remove_minor_node(zfs_dip, chrbuf); 537219089Spjd ddi_soft_state_free(zfsdev_state, minor); 538219089Spjd dmu_objset_disown(os, FTAG); 539224791Spjd mutex_exit(&spa_namespace_lock); 540249195Smm return (SET_ERROR(EAGAIN)); 541219089Spjd } 542219089Spjd 543219089Spjd zs = ddi_get_soft_state(zfsdev_state, minor); 544219089Spjd zs->zss_type = ZSST_ZVOL; 545219089Spjd zv = zs->zss_data = kmem_zalloc(sizeof (zvol_state_t), KM_SLEEP); 546219089Spjd#else /* !sun */ 547219089Spjd 548241297Savg error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize); 549241297Savg if (error) { 550241297Savg ASSERT(error == 0); 551241297Savg dmu_objset_disown(os, zvol_tag); 552241297Savg mutex_exit(&spa_namespace_lock); 553241297Savg return (error); 554241297Savg } 555241297Savg 556219089Spjd DROP_GIANT(); 557219089Spjd g_topology_lock(); 558219089Spjd zv = zvol_geom_create(name); 559241297Savg zv->zv_volsize = volsize; 560241297Savg zv->zv_provider->mediasize = zv->zv_volsize; 561241297Savg 562219089Spjd#endif /* !sun */ 563219089Spjd 564219089Spjd (void) strlcpy(zv->zv_name, name, MAXPATHLEN); 565168404Spjd zv->zv_min_bs = DEV_BSHIFT; 566168404Spjd zv->zv_objset = os; 567219089Spjd if (dmu_objset_is_snapshot(os) || !spa_writeable(dmu_objset_spa(os))) 568219089Spjd zv->zv_flags |= ZVOL_RDONLY; 569168404Spjd mutex_init(&zv->zv_znode.z_range_lock, NULL, MUTEX_DEFAULT, NULL); 570168404Spjd avl_create(&zv->zv_znode.z_range_avl, zfs_range_compare, 571168404Spjd sizeof (rl_t), offsetof(rl_t, r_node)); 572208047Smm list_create(&zv->zv_extents, sizeof (zvol_extent_t), 573208047Smm offsetof(zvol_extent_t, ze_node)); 574168404Spjd /* get and cache the blocksize */ 575168404Spjd error = dmu_object_info(os, ZVOL_OBJ, &doi); 576168404Spjd ASSERT(error == 0); 577168404Spjd zv->zv_volblocksize = doi.doi_data_block_size; 578168404Spjd 579219089Spjd if (spa_writeable(dmu_objset_spa(os))) { 580219089Spjd if (zil_replay_disable) 581219089Spjd zil_destroy(dmu_objset_zil(os), B_FALSE); 582219089Spjd else 583219089Spjd zil_replay(os, zv, zvol_replay_vector); 584219089Spjd } 585219089Spjd dmu_objset_disown(os, FTAG); 586219089Spjd zv->zv_objset = NULL; 587168404Spjd 588219089Spjd zvol_minors++; 589168404Spjd 590224791Spjd mutex_exit(&spa_namespace_lock); 591168404Spjd 592219089Spjd zvol_geom_run(zv); 593168404Spjd 594168404Spjd g_topology_unlock(); 595168404Spjd PICKUP_GIANT(); 596168404Spjd 597219089Spjd ZFS_LOG(1, "ZVOL %s created.", name); 598219089Spjd 599219089Spjd return (0); 600168404Spjd} 601168404Spjd 602168404Spjd/* 603168404Spjd * Remove minor node for the specified volume. 604168404Spjd */ 605219089Spjdstatic int 606219089Spjdzvol_remove_zv(zvol_state_t *zv) 607219089Spjd{ 608219089Spjd#ifdef sun 609219089Spjd minor_t minor = zv->zv_minor; 610219089Spjd#endif 611219089Spjd 612224791Spjd ASSERT(MUTEX_HELD(&spa_namespace_lock)); 613219089Spjd if (zv->zv_total_opens != 0) 614249195Smm return (SET_ERROR(EBUSY)); 615219089Spjd 616219089Spjd ZFS_LOG(1, "ZVOL %s destroyed.", zv->zv_name); 617219089Spjd 618219089Spjd#ifdef sun 619219089Spjd (void) snprintf(nmbuf, sizeof (nmbuf), "%u,raw", minor); 620219089Spjd ddi_remove_minor_node(zfs_dip, nmbuf); 621219089Spjd#endif /* sun */ 622219089Spjd 623219089Spjd avl_destroy(&zv->zv_znode.z_range_avl); 624219089Spjd mutex_destroy(&zv->zv_znode.z_range_lock); 625219089Spjd 626219089Spjd zvol_geom_destroy(zv); 627219089Spjd 628219089Spjd zvol_minors--; 629219089Spjd return (0); 630219089Spjd} 631219089Spjd 632168404Spjdint 633168404Spjdzvol_remove_minor(const char *name) 634168404Spjd{ 635168404Spjd zvol_state_t *zv; 636219089Spjd int rc; 637168404Spjd 638224791Spjd mutex_enter(&spa_namespace_lock); 639168404Spjd if ((zv = zvol_minor_lookup(name)) == NULL) { 640224791Spjd mutex_exit(&spa_namespace_lock); 641249195Smm return (SET_ERROR(ENXIO)); 642168404Spjd } 643219089Spjd g_topology_lock(); 644219089Spjd rc = zvol_remove_zv(zv); 645219089Spjd g_topology_unlock(); 646224791Spjd mutex_exit(&spa_namespace_lock); 647219089Spjd return (rc); 648219089Spjd} 649168404Spjd 650219089Spjdint 651219089Spjdzvol_first_open(zvol_state_t *zv) 652219089Spjd{ 653219089Spjd objset_t *os; 654219089Spjd uint64_t volsize; 655219089Spjd int error; 656219089Spjd uint64_t readonly; 657168404Spjd 658219089Spjd /* lie and say we're read-only */ 659219089Spjd error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, B_TRUE, 660219089Spjd zvol_tag, &os); 661219089Spjd if (error) 662219089Spjd return (error); 663168404Spjd 664219089Spjd error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize); 665219089Spjd if (error) { 666219089Spjd ASSERT(error == 0); 667219089Spjd dmu_objset_disown(os, zvol_tag); 668219089Spjd return (error); 669219089Spjd } 670219089Spjd zv->zv_objset = os; 671219089Spjd error = dmu_bonus_hold(os, ZVOL_OBJ, zvol_tag, &zv->zv_dbuf); 672219089Spjd if (error) { 673219089Spjd dmu_objset_disown(os, zvol_tag); 674219089Spjd return (error); 675219089Spjd } 676219089Spjd zv->zv_volsize = volsize; 677219089Spjd zv->zv_zilog = zil_open(os, zvol_get_data); 678219089Spjd zvol_size_changed(zv); 679168404Spjd 680219089Spjd VERIFY(dsl_prop_get_integer(zv->zv_name, "readonly", &readonly, 681219089Spjd NULL) == 0); 682219089Spjd if (readonly || dmu_objset_is_snapshot(os) || 683219089Spjd !spa_writeable(dmu_objset_spa(os))) 684219089Spjd zv->zv_flags |= ZVOL_RDONLY; 685219089Spjd else 686219089Spjd zv->zv_flags &= ~ZVOL_RDONLY; 687219089Spjd return (error); 688219089Spjd} 689168404Spjd 690219089Spjdvoid 691219089Spjdzvol_last_close(zvol_state_t *zv) 692219089Spjd{ 693168404Spjd zil_close(zv->zv_zilog); 694168404Spjd zv->zv_zilog = NULL; 695239774Smm 696219089Spjd dmu_buf_rele(zv->zv_dbuf, zvol_tag); 697219089Spjd zv->zv_dbuf = NULL; 698239774Smm 699239774Smm /* 700239774Smm * Evict cached data 701239774Smm */ 702239774Smm if (dsl_dataset_is_dirty(dmu_objset_ds(zv->zv_objset)) && 703239774Smm !(zv->zv_flags & ZVOL_RDONLY)) 704239774Smm txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0); 705248571Smm dmu_objset_evict_dbufs(zv->zv_objset); 706239774Smm 707219089Spjd dmu_objset_disown(zv->zv_objset, zvol_tag); 708168404Spjd zv->zv_objset = NULL; 709168404Spjd} 710168404Spjd 711219089Spjd#ifdef sun 712168404Spjdint 713185029Spjdzvol_prealloc(zvol_state_t *zv) 714168404Spjd{ 715185029Spjd objset_t *os = zv->zv_objset; 716168404Spjd dmu_tx_t *tx; 717185029Spjd uint64_t refd, avail, usedobjs, availobjs; 718185029Spjd uint64_t resid = zv->zv_volsize; 719185029Spjd uint64_t off = 0; 720185029Spjd 721185029Spjd /* Check the space usage before attempting to allocate the space */ 722185029Spjd dmu_objset_space(os, &refd, &avail, &usedobjs, &availobjs); 723185029Spjd if (avail < zv->zv_volsize) 724249195Smm return (SET_ERROR(ENOSPC)); 725185029Spjd 726185029Spjd /* Free old extents if they exist */ 727185029Spjd zvol_free_extents(zv); 728185029Spjd 729185029Spjd while (resid != 0) { 730185029Spjd int error; 731185029Spjd uint64_t bytes = MIN(resid, SPA_MAXBLOCKSIZE); 732185029Spjd 733185029Spjd tx = dmu_tx_create(os); 734185029Spjd dmu_tx_hold_write(tx, ZVOL_OBJ, off, bytes); 735185029Spjd error = dmu_tx_assign(tx, TXG_WAIT); 736185029Spjd if (error) { 737185029Spjd dmu_tx_abort(tx); 738185029Spjd (void) dmu_free_long_range(os, ZVOL_OBJ, 0, off); 739185029Spjd return (error); 740185029Spjd } 741219089Spjd dmu_prealloc(os, ZVOL_OBJ, off, bytes, tx); 742185029Spjd dmu_tx_commit(tx); 743185029Spjd off += bytes; 744185029Spjd resid -= bytes; 745185029Spjd } 746185029Spjd txg_wait_synced(dmu_objset_pool(os), 0); 747185029Spjd 748185029Spjd return (0); 749185029Spjd} 750219089Spjd#endif /* sun */ 751185029Spjd 752248571Smmstatic int 753219089Spjdzvol_update_volsize(objset_t *os, uint64_t volsize) 754185029Spjd{ 755185029Spjd dmu_tx_t *tx; 756168404Spjd int error; 757185029Spjd 758224791Spjd ASSERT(MUTEX_HELD(&spa_namespace_lock)); 759185029Spjd 760219089Spjd tx = dmu_tx_create(os); 761185029Spjd dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); 762185029Spjd error = dmu_tx_assign(tx, TXG_WAIT); 763185029Spjd if (error) { 764185029Spjd dmu_tx_abort(tx); 765185029Spjd return (error); 766185029Spjd } 767185029Spjd 768219089Spjd error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1, 769185029Spjd &volsize, tx); 770185029Spjd dmu_tx_commit(tx); 771185029Spjd 772185029Spjd if (error == 0) 773219089Spjd error = dmu_free_long_range(os, 774185029Spjd ZVOL_OBJ, volsize, DMU_OBJECT_END); 775219089Spjd return (error); 776219089Spjd} 777185029Spjd 778219089Spjdvoid 779219089Spjdzvol_remove_minors(const char *name) 780219089Spjd{ 781219089Spjd struct g_geom *gp, *gptmp; 782219316Spjd struct g_provider *pp; 783219089Spjd zvol_state_t *zv; 784219316Spjd size_t namelen; 785219089Spjd 786219316Spjd namelen = strlen(name); 787219316Spjd 788219089Spjd DROP_GIANT(); 789224791Spjd mutex_enter(&spa_namespace_lock); 790219089Spjd g_topology_lock(); 791219089Spjd 792219089Spjd LIST_FOREACH_SAFE(gp, &zfs_zvol_class.geom, geom, gptmp) { 793219089Spjd pp = LIST_FIRST(&gp->provider); 794219089Spjd if (pp == NULL) 795219089Spjd continue; 796219089Spjd zv = pp->private; 797219089Spjd if (zv == NULL) 798219089Spjd continue; 799219316Spjd if (strcmp(zv->zv_name, name) == 0 || 800219316Spjd (strncmp(zv->zv_name, name, namelen) == 0 && 801219316Spjd zv->zv_name[namelen] == '/')) { 802219089Spjd (void) zvol_remove_zv(zv); 803219316Spjd } 804185029Spjd } 805219089Spjd 806219089Spjd g_topology_unlock(); 807224791Spjd mutex_exit(&spa_namespace_lock); 808219089Spjd PICKUP_GIANT(); 809185029Spjd} 810185029Spjd 811185029Spjdint 812185029Spjdzvol_set_volsize(const char *name, major_t maj, uint64_t volsize) 813185029Spjd{ 814219089Spjd zvol_state_t *zv = NULL; 815219089Spjd objset_t *os; 816185029Spjd int error; 817168404Spjd dmu_object_info_t doi; 818185029Spjd uint64_t old_volsize = 0ULL; 819219089Spjd uint64_t readonly; 820168404Spjd 821224791Spjd mutex_enter(&spa_namespace_lock); 822219089Spjd zv = zvol_minor_lookup(name); 823219089Spjd if ((error = dmu_objset_hold(name, FTAG, &os)) != 0) { 824224791Spjd mutex_exit(&spa_namespace_lock); 825219089Spjd return (error); 826168404Spjd } 827168404Spjd 828219089Spjd if ((error = dmu_object_info(os, ZVOL_OBJ, &doi)) != 0 || 829168404Spjd (error = zvol_check_volsize(volsize, 830185029Spjd doi.doi_data_block_size)) != 0) 831185029Spjd goto out; 832168404Spjd 833219089Spjd VERIFY(dsl_prop_get_integer(name, "readonly", &readonly, 834219089Spjd NULL) == 0); 835219089Spjd if (readonly) { 836168404Spjd error = EROFS; 837185029Spjd goto out; 838168404Spjd } 839168404Spjd 840219089Spjd error = zvol_update_volsize(os, volsize); 841185029Spjd /* 842185029Spjd * Reinitialize the dump area to the new size. If we 843219089Spjd * failed to resize the dump area then restore it back to 844219089Spjd * its original size. 845185029Spjd */ 846219089Spjd if (zv && error == 0) { 847219089Spjd#ifdef ZVOL_DUMP 848219089Spjd if (zv->zv_flags & ZVOL_DUMPIFIED) { 849219089Spjd old_volsize = zv->zv_volsize; 850219089Spjd zv->zv_volsize = volsize; 851219089Spjd if ((error = zvol_dumpify(zv)) != 0 || 852219089Spjd (error = dumpvp_resize()) != 0) { 853219089Spjd (void) zvol_update_volsize(os, old_volsize); 854219089Spjd zv->zv_volsize = old_volsize; 855219089Spjd error = zvol_dumpify(zv); 856219089Spjd } 857185029Spjd } 858219089Spjd#endif /* ZVOL_DUMP */ 859219089Spjd if (error == 0) { 860219089Spjd zv->zv_volsize = volsize; 861219089Spjd zvol_size_changed(zv); 862219089Spjd } 863168404Spjd } 864168404Spjd 865219089Spjd#ifdef sun 866219089Spjd /* 867219089Spjd * Generate a LUN expansion event. 868219089Spjd */ 869219089Spjd if (zv && error == 0) { 870219089Spjd sysevent_id_t eid; 871219089Spjd nvlist_t *attr; 872219089Spjd char *physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 873219089Spjd 874219089Spjd (void) snprintf(physpath, MAXPATHLEN, "%s%u", ZVOL_PSEUDO_DEV, 875219089Spjd zv->zv_minor); 876219089Spjd 877219089Spjd VERIFY(nvlist_alloc(&attr, NV_UNIQUE_NAME, KM_SLEEP) == 0); 878219089Spjd VERIFY(nvlist_add_string(attr, DEV_PHYS_PATH, physpath) == 0); 879219089Spjd 880219089Spjd (void) ddi_log_sysevent(zfs_dip, SUNW_VENDOR, EC_DEV_STATUS, 881219089Spjd ESC_DEV_DLE, attr, &eid, DDI_SLEEP); 882219089Spjd 883219089Spjd nvlist_free(attr); 884219089Spjd kmem_free(physpath, MAXPATHLEN); 885219089Spjd } 886219089Spjd#endif /* sun */ 887219089Spjd 888185029Spjdout: 889219089Spjd dmu_objset_rele(os, FTAG); 890168404Spjd 891224791Spjd mutex_exit(&spa_namespace_lock); 892168404Spjd 893168404Spjd return (error); 894168404Spjd} 895168404Spjd 896219089Spjd/*ARGSUSED*/ 897219089Spjdstatic int 898219089Spjdzvol_open(struct g_provider *pp, int flag, int count) 899168404Spjd{ 900168404Spjd zvol_state_t *zv; 901219089Spjd int err = 0; 902240831Savg boolean_t locked = B_FALSE; 903168404Spjd 904240831Savg /* 905240831Savg * Protect against recursively entering spa_namespace_lock 906240831Savg * when spa_open() is used for a pool on a (local) ZVOL(s). 907240831Savg * This is needed since we replaced upstream zfsdev_state_lock 908240831Savg * with spa_namespace_lock in the ZVOL code. 909240831Savg * We are using the same trick as spa_open(). 910240831Savg * Note that calls in zvol_first_open which need to resolve 911240831Savg * pool name to a spa object will enter spa_open() 912240831Savg * recursively, but that function already has all the 913240831Savg * necessary protection. 914240831Savg */ 915240831Savg if (!MUTEX_HELD(&spa_namespace_lock)) { 916240831Savg mutex_enter(&spa_namespace_lock); 917240831Savg locked = B_TRUE; 918227110Spjd } 919227110Spjd 920219089Spjd zv = pp->private; 921219089Spjd if (zv == NULL) { 922240831Savg if (locked) 923240831Savg mutex_exit(&spa_namespace_lock); 924249195Smm return (SET_ERROR(ENXIO)); 925168404Spjd } 926219089Spjd 927219089Spjd if (zv->zv_total_opens == 0) 928219089Spjd err = zvol_first_open(zv); 929219089Spjd if (err) { 930240831Savg if (locked) 931240831Savg mutex_exit(&spa_namespace_lock); 932219089Spjd return (err); 933168404Spjd } 934219089Spjd if ((flag & FWRITE) && (zv->zv_flags & ZVOL_RDONLY)) { 935249195Smm err = SET_ERROR(EROFS); 936219089Spjd goto out; 937219089Spjd } 938219089Spjd if (zv->zv_flags & ZVOL_EXCL) { 939249195Smm err = SET_ERROR(EBUSY); 940219089Spjd goto out; 941219089Spjd } 942219089Spjd#ifdef FEXCL 943219089Spjd if (flag & FEXCL) { 944219089Spjd if (zv->zv_total_opens != 0) { 945249195Smm err = SET_ERROR(EBUSY); 946219089Spjd goto out; 947219089Spjd } 948219089Spjd zv->zv_flags |= ZVOL_EXCL; 949219089Spjd } 950219089Spjd#endif 951168404Spjd 952219089Spjd zv->zv_total_opens += count; 953240831Savg if (locked) 954240831Savg mutex_exit(&spa_namespace_lock); 955219089Spjd 956219089Spjd return (err); 957219089Spjdout: 958219089Spjd if (zv->zv_total_opens == 0) 959219089Spjd zvol_last_close(zv); 960240831Savg if (locked) 961240831Savg mutex_exit(&spa_namespace_lock); 962219089Spjd return (err); 963219089Spjd} 964219089Spjd 965219089Spjd/*ARGSUSED*/ 966219089Spjdstatic int 967219089Spjdzvol_close(struct g_provider *pp, int flag, int count) 968219089Spjd{ 969219089Spjd zvol_state_t *zv; 970219089Spjd int error = 0; 971240831Savg boolean_t locked = B_FALSE; 972219089Spjd 973240831Savg /* See comment in zvol_open(). */ 974240831Savg if (!MUTEX_HELD(&spa_namespace_lock)) { 975240831Savg mutex_enter(&spa_namespace_lock); 976240831Savg locked = B_TRUE; 977240831Savg } 978219089Spjd 979219089Spjd zv = pp->private; 980219089Spjd if (zv == NULL) { 981240831Savg if (locked) 982240831Savg mutex_exit(&spa_namespace_lock); 983249195Smm return (SET_ERROR(ENXIO)); 984168404Spjd } 985168404Spjd 986219089Spjd if (zv->zv_flags & ZVOL_EXCL) { 987219089Spjd ASSERT(zv->zv_total_opens == 1); 988219089Spjd zv->zv_flags &= ~ZVOL_EXCL; 989219089Spjd } 990219089Spjd 991219089Spjd /* 992219089Spjd * If the open count is zero, this is a spurious close. 993219089Spjd * That indicates a bug in the kernel / DDI framework. 994219089Spjd */ 995219089Spjd ASSERT(zv->zv_total_opens != 0); 996219089Spjd 997219089Spjd /* 998219089Spjd * You may get multiple opens, but only one close. 999219089Spjd */ 1000219089Spjd zv->zv_total_opens -= count; 1001219089Spjd 1002219089Spjd if (zv->zv_total_opens == 0) 1003219089Spjd zvol_last_close(zv); 1004219089Spjd 1005240831Savg if (locked) 1006240831Savg mutex_exit(&spa_namespace_lock); 1007168404Spjd return (error); 1008168404Spjd} 1009168404Spjd 1010219089Spjdstatic void 1011219089Spjdzvol_get_done(zgd_t *zgd, int error) 1012168404Spjd{ 1013219089Spjd if (zgd->zgd_db) 1014219089Spjd dmu_buf_rele(zgd->zgd_db, zgd); 1015168404Spjd 1016219089Spjd zfs_range_unlock(zgd->zgd_rl); 1017219089Spjd 1018219089Spjd if (error == 0 && zgd->zgd_bp) 1019219089Spjd zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); 1020219089Spjd 1021168404Spjd kmem_free(zgd, sizeof (zgd_t)); 1022168404Spjd} 1023168404Spjd 1024168404Spjd/* 1025168404Spjd * Get data to generate a TX_WRITE intent log record. 1026168404Spjd */ 1027168404Spjdstatic int 1028168404Spjdzvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) 1029168404Spjd{ 1030168404Spjd zvol_state_t *zv = arg; 1031168404Spjd objset_t *os = zv->zv_objset; 1032219089Spjd uint64_t object = ZVOL_OBJ; 1033219089Spjd uint64_t offset = lr->lr_offset; 1034219089Spjd uint64_t size = lr->lr_length; /* length of user data */ 1035219089Spjd blkptr_t *bp = &lr->lr_blkptr; 1036168404Spjd dmu_buf_t *db; 1037168404Spjd zgd_t *zgd; 1038168404Spjd int error; 1039168404Spjd 1040219089Spjd ASSERT(zio != NULL); 1041219089Spjd ASSERT(size != 0); 1042168404Spjd 1043219089Spjd zgd = kmem_zalloc(sizeof (zgd_t), KM_SLEEP); 1044219089Spjd zgd->zgd_zilog = zv->zv_zilog; 1045219089Spjd zgd->zgd_rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_READER); 1046219089Spjd 1047168404Spjd /* 1048168404Spjd * Write records come in two flavors: immediate and indirect. 1049168404Spjd * For small writes it's cheaper to store the data with the 1050168404Spjd * log record (immediate); for large writes it's cheaper to 1051168404Spjd * sync the data and get a pointer to it (indirect) so that 1052168404Spjd * we don't have to write the data twice. 1053168404Spjd */ 1054219089Spjd if (buf != NULL) { /* immediate write */ 1055219089Spjd error = dmu_read(os, object, offset, size, buf, 1056219089Spjd DMU_READ_NO_PREFETCH); 1057219089Spjd } else { 1058219089Spjd size = zv->zv_volblocksize; 1059219089Spjd offset = P2ALIGN(offset, size); 1060219089Spjd error = dmu_buf_hold(os, object, offset, zgd, &db, 1061219089Spjd DMU_READ_NO_PREFETCH); 1062219089Spjd if (error == 0) { 1063243524Smm blkptr_t *obp = dmu_buf_get_blkptr(db); 1064243524Smm if (obp) { 1065243524Smm ASSERT(BP_IS_HOLE(bp)); 1066243524Smm *bp = *obp; 1067243524Smm } 1068243524Smm 1069219089Spjd zgd->zgd_db = db; 1070219089Spjd zgd->zgd_bp = bp; 1071168404Spjd 1072219089Spjd ASSERT(db->db_offset == offset); 1073219089Spjd ASSERT(db->db_size == size); 1074168404Spjd 1075219089Spjd error = dmu_sync(zio, lr->lr_common.lrc_txg, 1076219089Spjd zvol_get_done, zgd); 1077168404Spjd 1078219089Spjd if (error == 0) 1079219089Spjd return (0); 1080219089Spjd } 1081219089Spjd } 1082209962Smm 1083219089Spjd zvol_get_done(zgd, error); 1084219089Spjd 1085219089Spjd return (error); 1086219089Spjd} 1087219089Spjd 1088219089Spjd/* 1089219089Spjd * zvol_log_write() handles synchronous writes using TX_WRITE ZIL transactions. 1090219089Spjd * 1091219089Spjd * We store data in the log buffers if it's small enough. 1092219089Spjd * Otherwise we will later flush the data out via dmu_sync(). 1093219089Spjd */ 1094219089Spjdssize_t zvol_immediate_write_sz = 32768; 1095219089Spjd 1096219089Spjdstatic void 1097219089Spjdzvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t resid, 1098219089Spjd boolean_t sync) 1099219089Spjd{ 1100219089Spjd uint32_t blocksize = zv->zv_volblocksize; 1101219089Spjd zilog_t *zilog = zv->zv_zilog; 1102219089Spjd boolean_t slogging; 1103219089Spjd ssize_t immediate_write_sz; 1104219089Spjd 1105219089Spjd if (zil_replaying(zilog, tx)) 1106219089Spjd return; 1107219089Spjd 1108219089Spjd immediate_write_sz = (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT) 1109219089Spjd ? 0 : zvol_immediate_write_sz; 1110219089Spjd 1111219089Spjd slogging = spa_has_slogs(zilog->zl_spa) && 1112219089Spjd (zilog->zl_logbias == ZFS_LOGBIAS_LATENCY); 1113219089Spjd 1114219089Spjd while (resid) { 1115219089Spjd itx_t *itx; 1116219089Spjd lr_write_t *lr; 1117219089Spjd ssize_t len; 1118219089Spjd itx_wr_state_t write_state; 1119219089Spjd 1120209962Smm /* 1121219089Spjd * Unlike zfs_log_write() we can be called with 1122219089Spjd * upto DMU_MAX_ACCESS/2 (5MB) writes. 1123209962Smm */ 1124219089Spjd if (blocksize > immediate_write_sz && !slogging && 1125219089Spjd resid >= blocksize && off % blocksize == 0) { 1126219089Spjd write_state = WR_INDIRECT; /* uses dmu_sync */ 1127219089Spjd len = blocksize; 1128219089Spjd } else if (sync) { 1129219089Spjd write_state = WR_COPIED; 1130219089Spjd len = MIN(ZIL_MAX_LOG_DATA, resid); 1131219089Spjd } else { 1132219089Spjd write_state = WR_NEED_COPY; 1133219089Spjd len = MIN(ZIL_MAX_LOG_DATA, resid); 1134219089Spjd } 1135219089Spjd 1136219089Spjd itx = zil_itx_create(TX_WRITE, sizeof (*lr) + 1137219089Spjd (write_state == WR_COPIED ? len : 0)); 1138219089Spjd lr = (lr_write_t *)&itx->itx_lr; 1139219089Spjd if (write_state == WR_COPIED && dmu_read(zv->zv_objset, 1140219089Spjd ZVOL_OBJ, off, len, lr + 1, DMU_READ_NO_PREFETCH) != 0) { 1141219089Spjd zil_itx_destroy(itx); 1142219089Spjd itx = zil_itx_create(TX_WRITE, sizeof (*lr)); 1143219089Spjd lr = (lr_write_t *)&itx->itx_lr; 1144219089Spjd write_state = WR_NEED_COPY; 1145219089Spjd } 1146219089Spjd 1147219089Spjd itx->itx_wr_state = write_state; 1148219089Spjd if (write_state == WR_NEED_COPY) 1149219089Spjd itx->itx_sod += len; 1150219089Spjd lr->lr_foid = ZVOL_OBJ; 1151219089Spjd lr->lr_offset = off; 1152219089Spjd lr->lr_length = len; 1153219089Spjd lr->lr_blkoff = 0; 1154219089Spjd BP_ZERO(&lr->lr_blkptr); 1155219089Spjd 1156219089Spjd itx->itx_private = zv; 1157219089Spjd itx->itx_sync = sync; 1158219089Spjd 1159219089Spjd zil_itx_assign(zilog, itx, tx); 1160219089Spjd 1161219089Spjd off += len; 1162219089Spjd resid -= len; 1163209962Smm } 1164219089Spjd} 1165209962Smm 1166219089Spjd#ifdef sun 1167219089Spjdstatic int 1168255750Sdelphijzvol_dumpio_vdev(vdev_t *vd, void *addr, uint64_t offset, uint64_t origoffset, 1169255750Sdelphij uint64_t size, boolean_t doread, boolean_t isdump) 1170219089Spjd{ 1171219089Spjd vdev_disk_t *dvd; 1172219089Spjd int c; 1173219089Spjd int numerrors = 0; 1174219089Spjd 1175255750Sdelphij if (vd->vdev_ops == &vdev_mirror_ops || 1176255750Sdelphij vd->vdev_ops == &vdev_replacing_ops || 1177255750Sdelphij vd->vdev_ops == &vdev_spare_ops) { 1178255750Sdelphij for (c = 0; c < vd->vdev_children; c++) { 1179255750Sdelphij int err = zvol_dumpio_vdev(vd->vdev_child[c], 1180255750Sdelphij addr, offset, origoffset, size, doread, isdump); 1181255750Sdelphij if (err != 0) { 1182255750Sdelphij numerrors++; 1183255750Sdelphij } else if (doread) { 1184255750Sdelphij break; 1185255750Sdelphij } 1186219089Spjd } 1187219089Spjd } 1188219089Spjd 1189255750Sdelphij if (!vd->vdev_ops->vdev_op_leaf && vd->vdev_ops != &vdev_raidz_ops) 1190219089Spjd return (numerrors < vd->vdev_children ? 0 : EIO); 1191219089Spjd 1192219089Spjd if (doread && !vdev_readable(vd)) 1193249195Smm return (SET_ERROR(EIO)); 1194219089Spjd else if (!doread && !vdev_writeable(vd)) 1195249195Smm return (SET_ERROR(EIO)); 1196219089Spjd 1197255750Sdelphij if (vd->vdev_ops == &vdev_raidz_ops) { 1198255750Sdelphij return (vdev_raidz_physio(vd, 1199255750Sdelphij addr, size, offset, origoffset, doread, isdump)); 1200255750Sdelphij } 1201255750Sdelphij 1202219089Spjd offset += VDEV_LABEL_START_SIZE; 1203219089Spjd 1204219089Spjd if (ddi_in_panic() || isdump) { 1205219089Spjd ASSERT(!doread); 1206219089Spjd if (doread) 1207249195Smm return (SET_ERROR(EIO)); 1208255750Sdelphij dvd = vd->vdev_tsd; 1209255750Sdelphij ASSERT3P(dvd, !=, NULL); 1210219089Spjd return (ldi_dump(dvd->vd_lh, addr, lbtodb(offset), 1211219089Spjd lbtodb(size))); 1212219089Spjd } else { 1213255750Sdelphij dvd = vd->vdev_tsd; 1214255750Sdelphij ASSERT3P(dvd, !=, NULL); 1215255750Sdelphij return (vdev_disk_ldi_physio(dvd->vd_lh, addr, size, 1216255750Sdelphij offset, doread ? B_READ : B_WRITE)); 1217219089Spjd } 1218219089Spjd} 1219219089Spjd 1220219089Spjdstatic int 1221219089Spjdzvol_dumpio(zvol_state_t *zv, void *addr, uint64_t offset, uint64_t size, 1222219089Spjd boolean_t doread, boolean_t isdump) 1223219089Spjd{ 1224219089Spjd vdev_t *vd; 1225219089Spjd int error; 1226219089Spjd zvol_extent_t *ze; 1227219089Spjd spa_t *spa = dmu_objset_spa(zv->zv_objset); 1228219089Spjd 1229219089Spjd /* Must be sector aligned, and not stradle a block boundary. */ 1230219089Spjd if (P2PHASE(offset, DEV_BSIZE) || P2PHASE(size, DEV_BSIZE) || 1231219089Spjd P2BOUNDARY(offset, size, zv->zv_volblocksize)) { 1232249195Smm return (SET_ERROR(EINVAL)); 1233219089Spjd } 1234219089Spjd ASSERT(size <= zv->zv_volblocksize); 1235219089Spjd 1236219089Spjd /* Locate the extent this belongs to */ 1237219089Spjd ze = list_head(&zv->zv_extents); 1238219089Spjd while (offset >= ze->ze_nblks * zv->zv_volblocksize) { 1239219089Spjd offset -= ze->ze_nblks * zv->zv_volblocksize; 1240219089Spjd ze = list_next(&zv->zv_extents, ze); 1241219089Spjd } 1242219089Spjd 1243248571Smm if (ze == NULL) 1244249195Smm return (SET_ERROR(EINVAL)); 1245248571Smm 1246219089Spjd if (!ddi_in_panic()) 1247219089Spjd spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 1248219089Spjd 1249219089Spjd vd = vdev_lookup_top(spa, DVA_GET_VDEV(&ze->ze_dva)); 1250219089Spjd offset += DVA_GET_OFFSET(&ze->ze_dva); 1251255750Sdelphij error = zvol_dumpio_vdev(vd, addr, offset, DVA_GET_OFFSET(&ze->ze_dva), 1252255750Sdelphij size, doread, isdump); 1253219089Spjd 1254219089Spjd if (!ddi_in_panic()) 1255219089Spjd spa_config_exit(spa, SCL_STATE, FTAG); 1256219089Spjd 1257219089Spjd return (error); 1258219089Spjd} 1259219089Spjd#endif /* sun */ 1260219089Spjd 1261219089Spjdint 1262219089Spjdzvol_strategy(struct bio *bp) 1263219089Spjd{ 1264219089Spjd zvol_state_t *zv = bp->bio_to->private; 1265219089Spjd uint64_t off, volsize; 1266219089Spjd size_t resid; 1267219089Spjd char *addr; 1268219089Spjd objset_t *os; 1269219089Spjd rl_t *rl; 1270219089Spjd int error = 0; 1271219089Spjd boolean_t doread = (bp->bio_cmd == BIO_READ); 1272255750Sdelphij boolean_t is_dumpified; 1273219089Spjd boolean_t sync; 1274219089Spjd 1275219089Spjd if (zv == NULL) { 1276219089Spjd g_io_deliver(bp, ENXIO); 1277219089Spjd return (0); 1278219089Spjd } 1279219089Spjd 1280219089Spjd if (bp->bio_cmd != BIO_READ && (zv->zv_flags & ZVOL_RDONLY)) { 1281219089Spjd g_io_deliver(bp, EROFS); 1282219089Spjd return (0); 1283219089Spjd } 1284219089Spjd 1285219089Spjd off = bp->bio_offset; 1286219089Spjd volsize = zv->zv_volsize; 1287219089Spjd 1288219089Spjd os = zv->zv_objset; 1289219089Spjd ASSERT(os != NULL); 1290219089Spjd 1291219089Spjd addr = bp->bio_data; 1292219089Spjd resid = bp->bio_length; 1293219089Spjd 1294219089Spjd if (resid > 0 && (off < 0 || off >= volsize)) { 1295219089Spjd g_io_deliver(bp, EIO); 1296219089Spjd return (0); 1297219089Spjd } 1298219089Spjd 1299255750Sdelphij#ifdef illumos 1300255750Sdelphij is_dumpified = zv->zv_flags & ZVOL_DUMPIFIED; 1301255750Sdelphij#else 1302255750Sdelphij is_dumpified = B_FALSE; 1303255750Sdelphij#endif 1304255750Sdelphij sync = !doread && !is_dumpified && 1305255750Sdelphij zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS; 1306219089Spjd 1307168404Spjd /* 1308219089Spjd * There must be no buffer changes when doing a dmu_sync() because 1309219089Spjd * we can't change the data whilst calculating the checksum. 1310168404Spjd */ 1311219089Spjd rl = zfs_range_lock(&zv->zv_znode, off, resid, 1312219089Spjd doread ? RL_READER : RL_WRITER); 1313219089Spjd 1314219089Spjd while (resid != 0 && off < volsize) { 1315219089Spjd size_t size = MIN(resid, zvol_maxphys); 1316255750Sdelphij#ifdef illumos 1317255750Sdelphij if (is_dumpified) { 1318255750Sdelphij size = MIN(size, P2END(off, zv->zv_volblocksize) - off); 1319255750Sdelphij error = zvol_dumpio(zv, addr, off, size, 1320255750Sdelphij doread, B_FALSE); 1321255750Sdelphij } else if (doread) { 1322255750Sdelphij#else 1323219089Spjd if (doread) { 1324255750Sdelphij#endif 1325219089Spjd error = dmu_read(os, ZVOL_OBJ, off, size, addr, 1326219089Spjd DMU_READ_PREFETCH); 1327219089Spjd } else { 1328219089Spjd dmu_tx_t *tx = dmu_tx_create(os); 1329219089Spjd dmu_tx_hold_write(tx, ZVOL_OBJ, off, size); 1330219089Spjd error = dmu_tx_assign(tx, TXG_WAIT); 1331219089Spjd if (error) { 1332219089Spjd dmu_tx_abort(tx); 1333219089Spjd } else { 1334219089Spjd dmu_write(os, ZVOL_OBJ, off, size, addr, tx); 1335219089Spjd zvol_log_write(zv, tx, off, size, sync); 1336219089Spjd dmu_tx_commit(tx); 1337219089Spjd } 1338219089Spjd } 1339219089Spjd if (error) { 1340219089Spjd /* convert checksum errors into IO errors */ 1341219089Spjd if (error == ECKSUM) 1342249195Smm error = SET_ERROR(EIO); 1343219089Spjd break; 1344219089Spjd } 1345219089Spjd off += size; 1346219089Spjd addr += size; 1347219089Spjd resid -= size; 1348219089Spjd } 1349168404Spjd zfs_range_unlock(rl); 1350219089Spjd 1351219089Spjd bp->bio_completed = bp->bio_length - resid; 1352219089Spjd if (bp->bio_completed < bp->bio_length) 1353219089Spjd bp->bio_error = (off > volsize ? EINVAL : error); 1354219089Spjd 1355219089Spjd if (sync) 1356219089Spjd zil_commit(zv->zv_zilog, ZVOL_OBJ); 1357219089Spjd g_io_deliver(bp, 0); 1358219089Spjd 1359219089Spjd return (0); 1360219089Spjd} 1361219089Spjd 1362219089Spjd#ifdef sun 1363219089Spjd/* 1364219089Spjd * Set the buffer count to the zvol maximum transfer. 1365219089Spjd * Using our own routine instead of the default minphys() 1366219089Spjd * means that for larger writes we write bigger buffers on X86 1367219089Spjd * (128K instead of 56K) and flush the disk write cache less often 1368219089Spjd * (every zvol_maxphys - currently 1MB) instead of minphys (currently 1369219089Spjd * 56K on X86 and 128K on sparc). 1370219089Spjd */ 1371219089Spjdvoid 1372219089Spjdzvol_minphys(struct buf *bp) 1373219089Spjd{ 1374219089Spjd if (bp->b_bcount > zvol_maxphys) 1375219089Spjd bp->b_bcount = zvol_maxphys; 1376219089Spjd} 1377219089Spjd 1378219089Spjdint 1379219089Spjdzvol_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblocks) 1380219089Spjd{ 1381219089Spjd minor_t minor = getminor(dev); 1382219089Spjd zvol_state_t *zv; 1383219089Spjd int error = 0; 1384219089Spjd uint64_t size; 1385219089Spjd uint64_t boff; 1386219089Spjd uint64_t resid; 1387219089Spjd 1388219089Spjd zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); 1389219089Spjd if (zv == NULL) 1390249195Smm return (SET_ERROR(ENXIO)); 1391219089Spjd 1392248571Smm if ((zv->zv_flags & ZVOL_DUMPIFIED) == 0) 1393249195Smm return (SET_ERROR(EINVAL)); 1394248571Smm 1395219089Spjd boff = ldbtob(blkno); 1396219089Spjd resid = ldbtob(nblocks); 1397219089Spjd 1398219089Spjd VERIFY3U(boff + resid, <=, zv->zv_volsize); 1399219089Spjd 1400219089Spjd while (resid) { 1401219089Spjd size = MIN(resid, P2END(boff, zv->zv_volblocksize) - boff); 1402219089Spjd error = zvol_dumpio(zv, addr, boff, size, B_FALSE, B_TRUE); 1403219089Spjd if (error) 1404219089Spjd break; 1405219089Spjd boff += size; 1406219089Spjd addr += size; 1407219089Spjd resid -= size; 1408219089Spjd } 1409219089Spjd 1410168404Spjd return (error); 1411168404Spjd} 1412168404Spjd 1413219089Spjd/*ARGSUSED*/ 1414168404Spjdint 1415219089Spjdzvol_read(dev_t dev, uio_t *uio, cred_t *cr) 1416219089Spjd{ 1417219089Spjd minor_t minor = getminor(dev); 1418219089Spjd zvol_state_t *zv; 1419219089Spjd uint64_t volsize; 1420219089Spjd rl_t *rl; 1421219089Spjd int error = 0; 1422219089Spjd 1423219089Spjd zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); 1424219089Spjd if (zv == NULL) 1425249195Smm return (SET_ERROR(ENXIO)); 1426219089Spjd 1427219089Spjd volsize = zv->zv_volsize; 1428219089Spjd if (uio->uio_resid > 0 && 1429219089Spjd (uio->uio_loffset < 0 || uio->uio_loffset >= volsize)) 1430249195Smm return (SET_ERROR(EIO)); 1431219089Spjd 1432219089Spjd if (zv->zv_flags & ZVOL_DUMPIFIED) { 1433219089Spjd error = physio(zvol_strategy, NULL, dev, B_READ, 1434219089Spjd zvol_minphys, uio); 1435219089Spjd return (error); 1436219089Spjd } 1437219089Spjd 1438219089Spjd rl = zfs_range_lock(&zv->zv_znode, uio->uio_loffset, uio->uio_resid, 1439219089Spjd RL_READER); 1440219089Spjd while (uio->uio_resid > 0 && uio->uio_loffset < volsize) { 1441219089Spjd uint64_t bytes = MIN(uio->uio_resid, DMU_MAX_ACCESS >> 1); 1442219089Spjd 1443219089Spjd /* don't read past the end */ 1444219089Spjd if (bytes > volsize - uio->uio_loffset) 1445219089Spjd bytes = volsize - uio->uio_loffset; 1446219089Spjd 1447219089Spjd error = dmu_read_uio(zv->zv_objset, ZVOL_OBJ, uio, bytes); 1448219089Spjd if (error) { 1449219089Spjd /* convert checksum errors into IO errors */ 1450219089Spjd if (error == ECKSUM) 1451249195Smm error = SET_ERROR(EIO); 1452219089Spjd break; 1453219089Spjd } 1454219089Spjd } 1455219089Spjd zfs_range_unlock(rl); 1456219089Spjd return (error); 1457219089Spjd} 1458219089Spjd 1459219089Spjd/*ARGSUSED*/ 1460219089Spjdint 1461219089Spjdzvol_write(dev_t dev, uio_t *uio, cred_t *cr) 1462219089Spjd{ 1463219089Spjd minor_t minor = getminor(dev); 1464219089Spjd zvol_state_t *zv; 1465219089Spjd uint64_t volsize; 1466219089Spjd rl_t *rl; 1467219089Spjd int error = 0; 1468219089Spjd boolean_t sync; 1469219089Spjd 1470219089Spjd zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); 1471219089Spjd if (zv == NULL) 1472249195Smm return (SET_ERROR(ENXIO)); 1473219089Spjd 1474219089Spjd volsize = zv->zv_volsize; 1475219089Spjd if (uio->uio_resid > 0 && 1476219089Spjd (uio->uio_loffset < 0 || uio->uio_loffset >= volsize)) 1477249195Smm return (SET_ERROR(EIO)); 1478219089Spjd 1479219089Spjd if (zv->zv_flags & ZVOL_DUMPIFIED) { 1480219089Spjd error = physio(zvol_strategy, NULL, dev, B_WRITE, 1481219089Spjd zvol_minphys, uio); 1482219089Spjd return (error); 1483219089Spjd } 1484219089Spjd 1485219089Spjd sync = !(zv->zv_flags & ZVOL_WCE) || 1486219089Spjd (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS); 1487219089Spjd 1488219089Spjd rl = zfs_range_lock(&zv->zv_znode, uio->uio_loffset, uio->uio_resid, 1489219089Spjd RL_WRITER); 1490219089Spjd while (uio->uio_resid > 0 && uio->uio_loffset < volsize) { 1491219089Spjd uint64_t bytes = MIN(uio->uio_resid, DMU_MAX_ACCESS >> 1); 1492219089Spjd uint64_t off = uio->uio_loffset; 1493219089Spjd dmu_tx_t *tx = dmu_tx_create(zv->zv_objset); 1494219089Spjd 1495219089Spjd if (bytes > volsize - off) /* don't write past the end */ 1496219089Spjd bytes = volsize - off; 1497219089Spjd 1498219089Spjd dmu_tx_hold_write(tx, ZVOL_OBJ, off, bytes); 1499219089Spjd error = dmu_tx_assign(tx, TXG_WAIT); 1500219089Spjd if (error) { 1501219089Spjd dmu_tx_abort(tx); 1502219089Spjd break; 1503219089Spjd } 1504219089Spjd error = dmu_write_uio_dbuf(zv->zv_dbuf, uio, bytes, tx); 1505219089Spjd if (error == 0) 1506219089Spjd zvol_log_write(zv, tx, off, bytes, sync); 1507219089Spjd dmu_tx_commit(tx); 1508219089Spjd 1509219089Spjd if (error) 1510219089Spjd break; 1511219089Spjd } 1512219089Spjd zfs_range_unlock(rl); 1513219089Spjd if (sync) 1514219089Spjd zil_commit(zv->zv_zilog, ZVOL_OBJ); 1515219089Spjd return (error); 1516219089Spjd} 1517219089Spjd 1518219089Spjdint 1519219089Spjdzvol_getefi(void *arg, int flag, uint64_t vs, uint8_t bs) 1520219089Spjd{ 1521219089Spjd struct uuid uuid = EFI_RESERVED; 1522219089Spjd efi_gpe_t gpe = { 0 }; 1523219089Spjd uint32_t crc; 1524219089Spjd dk_efi_t efi; 1525219089Spjd int length; 1526219089Spjd char *ptr; 1527219089Spjd 1528219089Spjd if (ddi_copyin(arg, &efi, sizeof (dk_efi_t), flag)) 1529249195Smm return (SET_ERROR(EFAULT)); 1530219089Spjd ptr = (char *)(uintptr_t)efi.dki_data_64; 1531219089Spjd length = efi.dki_length; 1532219089Spjd /* 1533219089Spjd * Some clients may attempt to request a PMBR for the 1534219089Spjd * zvol. Currently this interface will return EINVAL to 1535219089Spjd * such requests. These requests could be supported by 1536219089Spjd * adding a check for lba == 0 and consing up an appropriate 1537219089Spjd * PMBR. 1538219089Spjd */ 1539219089Spjd if (efi.dki_lba < 1 || efi.dki_lba > 2 || length <= 0) 1540249195Smm return (SET_ERROR(EINVAL)); 1541219089Spjd 1542219089Spjd gpe.efi_gpe_StartingLBA = LE_64(34ULL); 1543219089Spjd gpe.efi_gpe_EndingLBA = LE_64((vs >> bs) - 1); 1544219089Spjd UUID_LE_CONVERT(gpe.efi_gpe_PartitionTypeGUID, uuid); 1545219089Spjd 1546219089Spjd if (efi.dki_lba == 1) { 1547219089Spjd efi_gpt_t gpt = { 0 }; 1548219089Spjd 1549219089Spjd gpt.efi_gpt_Signature = LE_64(EFI_SIGNATURE); 1550219089Spjd gpt.efi_gpt_Revision = LE_32(EFI_VERSION_CURRENT); 1551219089Spjd gpt.efi_gpt_HeaderSize = LE_32(sizeof (gpt)); 1552219089Spjd gpt.efi_gpt_MyLBA = LE_64(1ULL); 1553219089Spjd gpt.efi_gpt_FirstUsableLBA = LE_64(34ULL); 1554219089Spjd gpt.efi_gpt_LastUsableLBA = LE_64((vs >> bs) - 1); 1555219089Spjd gpt.efi_gpt_PartitionEntryLBA = LE_64(2ULL); 1556219089Spjd gpt.efi_gpt_NumberOfPartitionEntries = LE_32(1); 1557219089Spjd gpt.efi_gpt_SizeOfPartitionEntry = 1558219089Spjd LE_32(sizeof (efi_gpe_t)); 1559219089Spjd CRC32(crc, &gpe, sizeof (gpe), -1U, crc32_table); 1560219089Spjd gpt.efi_gpt_PartitionEntryArrayCRC32 = LE_32(~crc); 1561219089Spjd CRC32(crc, &gpt, sizeof (gpt), -1U, crc32_table); 1562219089Spjd gpt.efi_gpt_HeaderCRC32 = LE_32(~crc); 1563219089Spjd if (ddi_copyout(&gpt, ptr, MIN(sizeof (gpt), length), 1564219089Spjd flag)) 1565249195Smm return (SET_ERROR(EFAULT)); 1566219089Spjd ptr += sizeof (gpt); 1567219089Spjd length -= sizeof (gpt); 1568219089Spjd } 1569219089Spjd if (length > 0 && ddi_copyout(&gpe, ptr, MIN(sizeof (gpe), 1570219089Spjd length), flag)) 1571249195Smm return (SET_ERROR(EFAULT)); 1572219089Spjd return (0); 1573219089Spjd} 1574219089Spjd 1575219089Spjd/* 1576219089Spjd * BEGIN entry points to allow external callers access to the volume. 1577219089Spjd */ 1578219089Spjd/* 1579219089Spjd * Return the volume parameters needed for access from an external caller. 1580219089Spjd * These values are invariant as long as the volume is held open. 1581219089Spjd */ 1582219089Spjdint 1583219089Spjdzvol_get_volume_params(minor_t minor, uint64_t *blksize, 1584219089Spjd uint64_t *max_xfer_len, void **minor_hdl, void **objset_hdl, void **zil_hdl, 1585219089Spjd void **rl_hdl, void **bonus_hdl) 1586219089Spjd{ 1587219089Spjd zvol_state_t *zv; 1588219089Spjd 1589219089Spjd zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); 1590219089Spjd if (zv == NULL) 1591249195Smm return (SET_ERROR(ENXIO)); 1592219089Spjd if (zv->zv_flags & ZVOL_DUMPIFIED) 1593249195Smm return (SET_ERROR(ENXIO)); 1594219089Spjd 1595219089Spjd ASSERT(blksize && max_xfer_len && minor_hdl && 1596219089Spjd objset_hdl && zil_hdl && rl_hdl && bonus_hdl); 1597219089Spjd 1598219089Spjd *blksize = zv->zv_volblocksize; 1599219089Spjd *max_xfer_len = (uint64_t)zvol_maxphys; 1600219089Spjd *minor_hdl = zv; 1601219089Spjd *objset_hdl = zv->zv_objset; 1602219089Spjd *zil_hdl = zv->zv_zilog; 1603219089Spjd *rl_hdl = &zv->zv_znode; 1604219089Spjd *bonus_hdl = zv->zv_dbuf; 1605219089Spjd return (0); 1606219089Spjd} 1607219089Spjd 1608219089Spjd/* 1609219089Spjd * Return the current volume size to an external caller. 1610219089Spjd * The size can change while the volume is open. 1611219089Spjd */ 1612219089Spjduint64_t 1613219089Spjdzvol_get_volume_size(void *minor_hdl) 1614219089Spjd{ 1615219089Spjd zvol_state_t *zv = minor_hdl; 1616219089Spjd 1617219089Spjd return (zv->zv_volsize); 1618219089Spjd} 1619219089Spjd 1620219089Spjd/* 1621219089Spjd * Return the current WCE setting to an external caller. 1622219089Spjd * The WCE setting can change while the volume is open. 1623219089Spjd */ 1624219089Spjdint 1625219089Spjdzvol_get_volume_wce(void *minor_hdl) 1626219089Spjd{ 1627219089Spjd zvol_state_t *zv = minor_hdl; 1628219089Spjd 1629219089Spjd return ((zv->zv_flags & ZVOL_WCE) ? 1 : 0); 1630219089Spjd} 1631219089Spjd 1632219089Spjd/* 1633219089Spjd * Entry point for external callers to zvol_log_write 1634219089Spjd */ 1635219089Spjdvoid 1636219089Spjdzvol_log_write_minor(void *minor_hdl, dmu_tx_t *tx, offset_t off, ssize_t resid, 1637219089Spjd boolean_t sync) 1638219089Spjd{ 1639219089Spjd zvol_state_t *zv = minor_hdl; 1640219089Spjd 1641219089Spjd zvol_log_write(zv, tx, off, resid, sync); 1642219089Spjd} 1643219089Spjd/* 1644219089Spjd * END entry points to allow external callers access to the volume. 1645219089Spjd */ 1646219089Spjd 1647219089Spjd/* 1648219089Spjd * Dirtbag ioctls to support mkfs(1M) for UFS filesystems. See dkio(7I). 1649219089Spjd */ 1650219089Spjd/*ARGSUSED*/ 1651219089Spjdint 1652219089Spjdzvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp) 1653219089Spjd{ 1654219089Spjd zvol_state_t *zv; 1655219089Spjd struct dk_cinfo dki; 1656219089Spjd struct dk_minfo dkm; 1657219089Spjd struct dk_callback *dkc; 1658219089Spjd int error = 0; 1659219089Spjd rl_t *rl; 1660219089Spjd 1661224791Spjd mutex_enter(&spa_namespace_lock); 1662219089Spjd 1663219089Spjd zv = zfsdev_get_soft_state(getminor(dev), ZSST_ZVOL); 1664219089Spjd 1665219089Spjd if (zv == NULL) { 1666224791Spjd mutex_exit(&spa_namespace_lock); 1667249195Smm return (SET_ERROR(ENXIO)); 1668219089Spjd } 1669219089Spjd ASSERT(zv->zv_total_opens > 0); 1670219089Spjd 1671219089Spjd switch (cmd) { 1672219089Spjd 1673219089Spjd case DKIOCINFO: 1674219089Spjd bzero(&dki, sizeof (dki)); 1675219089Spjd (void) strcpy(dki.dki_cname, "zvol"); 1676219089Spjd (void) strcpy(dki.dki_dname, "zvol"); 1677219089Spjd dki.dki_ctype = DKC_UNKNOWN; 1678219089Spjd dki.dki_unit = getminor(dev); 1679219089Spjd dki.dki_maxtransfer = 1 << (SPA_MAXBLOCKSHIFT - zv->zv_min_bs); 1680224791Spjd mutex_exit(&spa_namespace_lock); 1681219089Spjd if (ddi_copyout(&dki, (void *)arg, sizeof (dki), flag)) 1682249195Smm error = SET_ERROR(EFAULT); 1683219089Spjd return (error); 1684219089Spjd 1685219089Spjd case DKIOCGMEDIAINFO: 1686219089Spjd bzero(&dkm, sizeof (dkm)); 1687219089Spjd dkm.dki_lbsize = 1U << zv->zv_min_bs; 1688219089Spjd dkm.dki_capacity = zv->zv_volsize >> zv->zv_min_bs; 1689219089Spjd dkm.dki_media_type = DK_UNKNOWN; 1690224791Spjd mutex_exit(&spa_namespace_lock); 1691219089Spjd if (ddi_copyout(&dkm, (void *)arg, sizeof (dkm), flag)) 1692249195Smm error = SET_ERROR(EFAULT); 1693219089Spjd return (error); 1694219089Spjd 1695219089Spjd case DKIOCGETEFI: 1696219089Spjd { 1697219089Spjd uint64_t vs = zv->zv_volsize; 1698219089Spjd uint8_t bs = zv->zv_min_bs; 1699219089Spjd 1700224791Spjd mutex_exit(&spa_namespace_lock); 1701219089Spjd error = zvol_getefi((void *)arg, flag, vs, bs); 1702219089Spjd return (error); 1703219089Spjd } 1704219089Spjd 1705219089Spjd case DKIOCFLUSHWRITECACHE: 1706219089Spjd dkc = (struct dk_callback *)arg; 1707224791Spjd mutex_exit(&spa_namespace_lock); 1708219089Spjd zil_commit(zv->zv_zilog, ZVOL_OBJ); 1709219089Spjd if ((flag & FKIOCTL) && dkc != NULL && dkc->dkc_callback) { 1710219089Spjd (*dkc->dkc_callback)(dkc->dkc_cookie, error); 1711219089Spjd error = 0; 1712219089Spjd } 1713219089Spjd return (error); 1714219089Spjd 1715219089Spjd case DKIOCGETWCE: 1716219089Spjd { 1717219089Spjd int wce = (zv->zv_flags & ZVOL_WCE) ? 1 : 0; 1718219089Spjd if (ddi_copyout(&wce, (void *)arg, sizeof (int), 1719219089Spjd flag)) 1720249195Smm error = SET_ERROR(EFAULT); 1721219089Spjd break; 1722219089Spjd } 1723219089Spjd case DKIOCSETWCE: 1724219089Spjd { 1725219089Spjd int wce; 1726219089Spjd if (ddi_copyin((void *)arg, &wce, sizeof (int), 1727219089Spjd flag)) { 1728249195Smm error = SET_ERROR(EFAULT); 1729219089Spjd break; 1730219089Spjd } 1731219089Spjd if (wce) { 1732219089Spjd zv->zv_flags |= ZVOL_WCE; 1733224791Spjd mutex_exit(&spa_namespace_lock); 1734219089Spjd } else { 1735219089Spjd zv->zv_flags &= ~ZVOL_WCE; 1736224791Spjd mutex_exit(&spa_namespace_lock); 1737219089Spjd zil_commit(zv->zv_zilog, ZVOL_OBJ); 1738219089Spjd } 1739219089Spjd return (0); 1740219089Spjd } 1741219089Spjd 1742219089Spjd case DKIOCGGEOM: 1743219089Spjd case DKIOCGVTOC: 1744219089Spjd /* 1745219089Spjd * commands using these (like prtvtoc) expect ENOTSUP 1746219089Spjd * since we're emulating an EFI label 1747219089Spjd */ 1748249195Smm error = SET_ERROR(ENOTSUP); 1749219089Spjd break; 1750219089Spjd 1751219089Spjd case DKIOCDUMPINIT: 1752219089Spjd rl = zfs_range_lock(&zv->zv_znode, 0, zv->zv_volsize, 1753219089Spjd RL_WRITER); 1754219089Spjd error = zvol_dumpify(zv); 1755219089Spjd zfs_range_unlock(rl); 1756219089Spjd break; 1757219089Spjd 1758219089Spjd case DKIOCDUMPFINI: 1759219089Spjd if (!(zv->zv_flags & ZVOL_DUMPIFIED)) 1760219089Spjd break; 1761219089Spjd rl = zfs_range_lock(&zv->zv_znode, 0, zv->zv_volsize, 1762219089Spjd RL_WRITER); 1763219089Spjd error = zvol_dump_fini(zv); 1764219089Spjd zfs_range_unlock(rl); 1765219089Spjd break; 1766219089Spjd 1767249195Smm case DKIOCFREE: 1768249195Smm { 1769249195Smm dkioc_free_t df; 1770249195Smm dmu_tx_t *tx; 1771249195Smm 1772249195Smm if (ddi_copyin((void *)arg, &df, sizeof (df), flag)) { 1773249195Smm error = SET_ERROR(EFAULT); 1774249195Smm break; 1775249195Smm } 1776249195Smm 1777249195Smm /* 1778249195Smm * Apply Postel's Law to length-checking. If they overshoot, 1779249195Smm * just blank out until the end, if there's a need to blank 1780249195Smm * out anything. 1781249195Smm */ 1782249195Smm if (df.df_start >= zv->zv_volsize) 1783249195Smm break; /* No need to do anything... */ 1784249195Smm if (df.df_start + df.df_length > zv->zv_volsize) 1785249195Smm df.df_length = DMU_OBJECT_END; 1786249195Smm 1787249195Smm rl = zfs_range_lock(&zv->zv_znode, df.df_start, df.df_length, 1788249195Smm RL_WRITER); 1789249195Smm tx = dmu_tx_create(zv->zv_objset); 1790249195Smm error = dmu_tx_assign(tx, TXG_WAIT); 1791249195Smm if (error != 0) { 1792249195Smm dmu_tx_abort(tx); 1793249195Smm } else { 1794249195Smm zvol_log_truncate(zv, tx, df.df_start, 1795249195Smm df.df_length, B_TRUE); 1796249195Smm dmu_tx_commit(tx); 1797249195Smm error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, 1798249195Smm df.df_start, df.df_length); 1799249195Smm } 1800249195Smm 1801249195Smm zfs_range_unlock(rl); 1802249195Smm 1803249195Smm if (error == 0) { 1804249195Smm /* 1805249195Smm * If the write-cache is disabled or 'sync' property 1806249195Smm * is set to 'always' then treat this as a synchronous 1807249195Smm * operation (i.e. commit to zil). 1808249195Smm */ 1809249195Smm if (!(zv->zv_flags & ZVOL_WCE) || 1810249195Smm (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS)) 1811249195Smm zil_commit(zv->zv_zilog, ZVOL_OBJ); 1812249195Smm 1813249195Smm /* 1814249195Smm * If the caller really wants synchronous writes, and 1815249195Smm * can't wait for them, don't return until the write 1816249195Smm * is done. 1817249195Smm */ 1818249195Smm if (df.df_flags & DF_WAIT_SYNC) { 1819249195Smm txg_wait_synced( 1820249195Smm dmu_objset_pool(zv->zv_objset), 0); 1821249195Smm } 1822249195Smm } 1823249195Smm break; 1824249195Smm } 1825249195Smm 1826219089Spjd default: 1827249195Smm error = SET_ERROR(ENOTTY); 1828219089Spjd break; 1829219089Spjd 1830219089Spjd } 1831224791Spjd mutex_exit(&spa_namespace_lock); 1832219089Spjd return (error); 1833219089Spjd} 1834219089Spjd#endif /* sun */ 1835219089Spjd 1836219089Spjdint 1837168404Spjdzvol_busy(void) 1838168404Spjd{ 1839168404Spjd return (zvol_minors != 0); 1840168404Spjd} 1841168404Spjd 1842168404Spjdvoid 1843168404Spjdzvol_init(void) 1844168404Spjd{ 1845219089Spjd VERIFY(ddi_soft_state_init(&zfsdev_state, sizeof (zfs_soft_state_t), 1846219089Spjd 1) == 0); 1847168404Spjd ZFS_LOG(1, "ZVOL Initialized."); 1848168404Spjd} 1849168404Spjd 1850168404Spjdvoid 1851168404Spjdzvol_fini(void) 1852168404Spjd{ 1853219089Spjd ddi_soft_state_fini(&zfsdev_state); 1854168404Spjd ZFS_LOG(1, "ZVOL Deinitialized."); 1855168404Spjd} 1856185029Spjd 1857219089Spjd#ifdef sun 1858255750Sdelphij/*ARGSUSED*/ 1859185029Spjdstatic int 1860255750Sdelphijzfs_mvdev_dump_feature_check(void *arg, dmu_tx_t *tx) 1861255750Sdelphij{ 1862255750Sdelphij spa_t *spa = dmu_tx_pool(tx)->dp_spa; 1863255750Sdelphij 1864255750Sdelphij if (spa_feature_is_active(spa, 1865255750Sdelphij &spa_feature_table[SPA_FEATURE_MULTI_VDEV_CRASH_DUMP])) 1866255750Sdelphij return (1); 1867255750Sdelphij return (0); 1868255750Sdelphij} 1869255750Sdelphij 1870255750Sdelphij/*ARGSUSED*/ 1871255750Sdelphijstatic void 1872255750Sdelphijzfs_mvdev_dump_activate_feature_sync(void *arg, dmu_tx_t *tx) 1873255750Sdelphij{ 1874255750Sdelphij spa_t *spa = dmu_tx_pool(tx)->dp_spa; 1875255750Sdelphij 1876255750Sdelphij spa_feature_incr(spa, 1877255750Sdelphij &spa_feature_table[SPA_FEATURE_MULTI_VDEV_CRASH_DUMP], tx); 1878255750Sdelphij} 1879255750Sdelphij 1880255750Sdelphijstatic int 1881185029Spjdzvol_dump_init(zvol_state_t *zv, boolean_t resize) 1882185029Spjd{ 1883185029Spjd dmu_tx_t *tx; 1884255750Sdelphij int error; 1885185029Spjd objset_t *os = zv->zv_objset; 1886255750Sdelphij spa_t *spa = dmu_objset_spa(os); 1887255750Sdelphij vdev_t *vd = spa->spa_root_vdev; 1888185029Spjd nvlist_t *nv = NULL; 1889255750Sdelphij uint64_t version = spa_version(spa); 1890255750Sdelphij enum zio_checksum checksum; 1891185029Spjd 1892224791Spjd ASSERT(MUTEX_HELD(&spa_namespace_lock)); 1893255750Sdelphij ASSERT(vd->vdev_ops == &vdev_root_ops); 1894255750Sdelphij 1895219089Spjd error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, 0, 1896219089Spjd DMU_OBJECT_END); 1897219089Spjd /* wait for dmu_free_long_range to actually free the blocks */ 1898219089Spjd txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0); 1899185029Spjd 1900255750Sdelphij /* 1901255750Sdelphij * If the pool on which the dump device is being initialized has more 1902255750Sdelphij * than one child vdev, check that the MULTI_VDEV_CRASH_DUMP feature is 1903255750Sdelphij * enabled. If so, bump that feature's counter to indicate that the 1904255750Sdelphij * feature is active. We also check the vdev type to handle the 1905255750Sdelphij * following case: 1906255750Sdelphij * # zpool create test raidz disk1 disk2 disk3 1907255750Sdelphij * Now have spa_root_vdev->vdev_children == 1 (the raidz vdev), 1908255750Sdelphij * the raidz vdev itself has 3 children. 1909255750Sdelphij */ 1910255750Sdelphij if (vd->vdev_children > 1 || vd->vdev_ops == &vdev_raidz_ops) { 1911255750Sdelphij if (!spa_feature_is_enabled(spa, 1912255750Sdelphij &spa_feature_table[SPA_FEATURE_MULTI_VDEV_CRASH_DUMP])) 1913255750Sdelphij return (SET_ERROR(ENOTSUP)); 1914255750Sdelphij (void) dsl_sync_task(spa_name(spa), 1915255750Sdelphij zfs_mvdev_dump_feature_check, 1916255750Sdelphij zfs_mvdev_dump_activate_feature_sync, NULL, 2); 1917255750Sdelphij } 1918255750Sdelphij 1919185029Spjd tx = dmu_tx_create(os); 1920185029Spjd dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); 1921219089Spjd dmu_tx_hold_bonus(tx, ZVOL_OBJ); 1922185029Spjd error = dmu_tx_assign(tx, TXG_WAIT); 1923185029Spjd if (error) { 1924185029Spjd dmu_tx_abort(tx); 1925185029Spjd return (error); 1926185029Spjd } 1927185029Spjd 1928185029Spjd /* 1929255750Sdelphij * If MULTI_VDEV_CRASH_DUMP is active, use the NOPARITY checksum 1930255750Sdelphij * function. Otherwise, use the old default -- OFF. 1931255750Sdelphij */ 1932255750Sdelphij checksum = spa_feature_is_active(spa, 1933255750Sdelphij &spa_feature_table[SPA_FEATURE_MULTI_VDEV_CRASH_DUMP]) ? 1934255750Sdelphij ZIO_CHECKSUM_NOPARITY : ZIO_CHECKSUM_OFF; 1935255750Sdelphij 1936255750Sdelphij /* 1937185029Spjd * If we are resizing the dump device then we only need to 1938185029Spjd * update the refreservation to match the newly updated 1939185029Spjd * zvolsize. Otherwise, we save off the original state of the 1940185029Spjd * zvol so that we can restore them if the zvol is ever undumpified. 1941185029Spjd */ 1942185029Spjd if (resize) { 1943185029Spjd error = zap_update(os, ZVOL_ZAP_OBJ, 1944185029Spjd zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1, 1945185029Spjd &zv->zv_volsize, tx); 1946185029Spjd } else { 1947219089Spjd uint64_t checksum, compress, refresrv, vbs, dedup; 1948208047Smm 1949185029Spjd error = dsl_prop_get_integer(zv->zv_name, 1950185029Spjd zfs_prop_to_name(ZFS_PROP_COMPRESSION), &compress, NULL); 1951185029Spjd error = error ? error : dsl_prop_get_integer(zv->zv_name, 1952185029Spjd zfs_prop_to_name(ZFS_PROP_CHECKSUM), &checksum, NULL); 1953185029Spjd error = error ? error : dsl_prop_get_integer(zv->zv_name, 1954185029Spjd zfs_prop_to_name(ZFS_PROP_REFRESERVATION), &refresrv, NULL); 1955208047Smm error = error ? error : dsl_prop_get_integer(zv->zv_name, 1956208047Smm zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &vbs, NULL); 1957219089Spjd if (version >= SPA_VERSION_DEDUP) { 1958219089Spjd error = error ? error : 1959219089Spjd dsl_prop_get_integer(zv->zv_name, 1960219089Spjd zfs_prop_to_name(ZFS_PROP_DEDUP), &dedup, NULL); 1961219089Spjd } 1962185029Spjd 1963185029Spjd error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, 1964185029Spjd zfs_prop_to_name(ZFS_PROP_COMPRESSION), 8, 1, 1965185029Spjd &compress, tx); 1966185029Spjd error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, 1967185029Spjd zfs_prop_to_name(ZFS_PROP_CHECKSUM), 8, 1, &checksum, tx); 1968185029Spjd error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, 1969185029Spjd zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1, 1970185029Spjd &refresrv, tx); 1971208047Smm error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, 1972208047Smm zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), 8, 1, 1973208047Smm &vbs, tx); 1974219089Spjd error = error ? error : dmu_object_set_blocksize( 1975219089Spjd os, ZVOL_OBJ, SPA_MAXBLOCKSIZE, 0, tx); 1976219089Spjd if (version >= SPA_VERSION_DEDUP) { 1977219089Spjd error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, 1978219089Spjd zfs_prop_to_name(ZFS_PROP_DEDUP), 8, 1, 1979219089Spjd &dedup, tx); 1980219089Spjd } 1981219089Spjd if (error == 0) 1982219089Spjd zv->zv_volblocksize = SPA_MAXBLOCKSIZE; 1983185029Spjd } 1984185029Spjd dmu_tx_commit(tx); 1985185029Spjd 1986185029Spjd /* 1987185029Spjd * We only need update the zvol's property if we are initializing 1988185029Spjd * the dump area for the first time. 1989185029Spjd */ 1990185029Spjd if (!resize) { 1991185029Spjd VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0); 1992185029Spjd VERIFY(nvlist_add_uint64(nv, 1993185029Spjd zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 0) == 0); 1994185029Spjd VERIFY(nvlist_add_uint64(nv, 1995185029Spjd zfs_prop_to_name(ZFS_PROP_COMPRESSION), 1996185029Spjd ZIO_COMPRESS_OFF) == 0); 1997185029Spjd VERIFY(nvlist_add_uint64(nv, 1998185029Spjd zfs_prop_to_name(ZFS_PROP_CHECKSUM), 1999255750Sdelphij checksum) == 0); 2000219089Spjd if (version >= SPA_VERSION_DEDUP) { 2001219089Spjd VERIFY(nvlist_add_uint64(nv, 2002219089Spjd zfs_prop_to_name(ZFS_PROP_DEDUP), 2003219089Spjd ZIO_CHECKSUM_OFF) == 0); 2004219089Spjd } 2005185029Spjd 2006219089Spjd error = zfs_set_prop_nvlist(zv->zv_name, ZPROP_SRC_LOCAL, 2007219089Spjd nv, NULL); 2008185029Spjd nvlist_free(nv); 2009185029Spjd 2010185029Spjd if (error) 2011185029Spjd return (error); 2012185029Spjd } 2013185029Spjd 2014185029Spjd /* Allocate the space for the dump */ 2015185029Spjd error = zvol_prealloc(zv); 2016185029Spjd return (error); 2017185029Spjd} 2018185029Spjd 2019185029Spjdstatic int 2020185029Spjdzvol_dumpify(zvol_state_t *zv) 2021185029Spjd{ 2022185029Spjd int error = 0; 2023185029Spjd uint64_t dumpsize = 0; 2024185029Spjd dmu_tx_t *tx; 2025185029Spjd objset_t *os = zv->zv_objset; 2026185029Spjd 2027219089Spjd if (zv->zv_flags & ZVOL_RDONLY) 2028249195Smm return (SET_ERROR(EROFS)); 2029185029Spjd 2030185029Spjd if (zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, 2031185029Spjd 8, 1, &dumpsize) != 0 || dumpsize != zv->zv_volsize) { 2032248571Smm boolean_t resize = (dumpsize > 0); 2033185029Spjd 2034185029Spjd if ((error = zvol_dump_init(zv, resize)) != 0) { 2035185029Spjd (void) zvol_dump_fini(zv); 2036185029Spjd return (error); 2037185029Spjd } 2038185029Spjd } 2039185029Spjd 2040185029Spjd /* 2041185029Spjd * Build up our lba mapping. 2042185029Spjd */ 2043185029Spjd error = zvol_get_lbas(zv); 2044185029Spjd if (error) { 2045185029Spjd (void) zvol_dump_fini(zv); 2046185029Spjd return (error); 2047185029Spjd } 2048185029Spjd 2049185029Spjd tx = dmu_tx_create(os); 2050185029Spjd dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); 2051185029Spjd error = dmu_tx_assign(tx, TXG_WAIT); 2052185029Spjd if (error) { 2053185029Spjd dmu_tx_abort(tx); 2054185029Spjd (void) zvol_dump_fini(zv); 2055185029Spjd return (error); 2056185029Spjd } 2057185029Spjd 2058185029Spjd zv->zv_flags |= ZVOL_DUMPIFIED; 2059185029Spjd error = zap_update(os, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, 8, 1, 2060185029Spjd &zv->zv_volsize, tx); 2061185029Spjd dmu_tx_commit(tx); 2062185029Spjd 2063185029Spjd if (error) { 2064185029Spjd (void) zvol_dump_fini(zv); 2065185029Spjd return (error); 2066185029Spjd } 2067185029Spjd 2068185029Spjd txg_wait_synced(dmu_objset_pool(os), 0); 2069185029Spjd return (0); 2070185029Spjd} 2071185029Spjd 2072185029Spjdstatic int 2073185029Spjdzvol_dump_fini(zvol_state_t *zv) 2074185029Spjd{ 2075185029Spjd dmu_tx_t *tx; 2076185029Spjd objset_t *os = zv->zv_objset; 2077185029Spjd nvlist_t *nv; 2078185029Spjd int error = 0; 2079219089Spjd uint64_t checksum, compress, refresrv, vbs, dedup; 2080219089Spjd uint64_t version = spa_version(dmu_objset_spa(zv->zv_objset)); 2081185029Spjd 2082185029Spjd /* 2083185029Spjd * Attempt to restore the zvol back to its pre-dumpified state. 2084185029Spjd * This is a best-effort attempt as it's possible that not all 2085185029Spjd * of these properties were initialized during the dumpify process 2086185029Spjd * (i.e. error during zvol_dump_init). 2087185029Spjd */ 2088185029Spjd 2089185029Spjd tx = dmu_tx_create(os); 2090185029Spjd dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); 2091185029Spjd error = dmu_tx_assign(tx, TXG_WAIT); 2092185029Spjd if (error) { 2093185029Spjd dmu_tx_abort(tx); 2094185029Spjd return (error); 2095185029Spjd } 2096185029Spjd (void) zap_remove(os, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, tx); 2097185029Spjd dmu_tx_commit(tx); 2098185029Spjd 2099185029Spjd (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, 2100185029Spjd zfs_prop_to_name(ZFS_PROP_CHECKSUM), 8, 1, &checksum); 2101185029Spjd (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, 2102185029Spjd zfs_prop_to_name(ZFS_PROP_COMPRESSION), 8, 1, &compress); 2103185029Spjd (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, 2104185029Spjd zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1, &refresrv); 2105208047Smm (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, 2106208047Smm zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), 8, 1, &vbs); 2107185029Spjd 2108185029Spjd VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0); 2109185029Spjd (void) nvlist_add_uint64(nv, 2110185029Spjd zfs_prop_to_name(ZFS_PROP_CHECKSUM), checksum); 2111185029Spjd (void) nvlist_add_uint64(nv, 2112185029Spjd zfs_prop_to_name(ZFS_PROP_COMPRESSION), compress); 2113185029Spjd (void) nvlist_add_uint64(nv, 2114185029Spjd zfs_prop_to_name(ZFS_PROP_REFRESERVATION), refresrv); 2115219089Spjd if (version >= SPA_VERSION_DEDUP && 2116219089Spjd zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, 2117219089Spjd zfs_prop_to_name(ZFS_PROP_DEDUP), 8, 1, &dedup) == 0) { 2118219089Spjd (void) nvlist_add_uint64(nv, 2119219089Spjd zfs_prop_to_name(ZFS_PROP_DEDUP), dedup); 2120219089Spjd } 2121219089Spjd (void) zfs_set_prop_nvlist(zv->zv_name, ZPROP_SRC_LOCAL, 2122219089Spjd nv, NULL); 2123185029Spjd nvlist_free(nv); 2124185029Spjd 2125185029Spjd zvol_free_extents(zv); 2126185029Spjd zv->zv_flags &= ~ZVOL_DUMPIFIED; 2127185029Spjd (void) dmu_free_long_range(os, ZVOL_OBJ, 0, DMU_OBJECT_END); 2128219089Spjd /* wait for dmu_free_long_range to actually free the blocks */ 2129219089Spjd txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0); 2130219089Spjd tx = dmu_tx_create(os); 2131219089Spjd dmu_tx_hold_bonus(tx, ZVOL_OBJ); 2132219089Spjd error = dmu_tx_assign(tx, TXG_WAIT); 2133219089Spjd if (error) { 2134219089Spjd dmu_tx_abort(tx); 2135219089Spjd return (error); 2136219089Spjd } 2137219089Spjd if (dmu_object_set_blocksize(os, ZVOL_OBJ, vbs, 0, tx) == 0) 2138219089Spjd zv->zv_volblocksize = vbs; 2139219089Spjd dmu_tx_commit(tx); 2140185029Spjd 2141185029Spjd return (0); 2142185029Spjd} 2143219089Spjd#endif /* sun */ 2144219089Spjd 2145219089Spjdstatic zvol_state_t * 2146219089Spjdzvol_geom_create(const char *name) 2147219089Spjd{ 2148219089Spjd struct g_provider *pp; 2149219089Spjd struct g_geom *gp; 2150219089Spjd zvol_state_t *zv; 2151219089Spjd 2152219089Spjd gp = g_new_geomf(&zfs_zvol_class, "zfs::zvol::%s", name); 2153219089Spjd gp->start = zvol_geom_start; 2154219089Spjd gp->access = zvol_geom_access; 2155219089Spjd pp = g_new_providerf(gp, "%s/%s", ZVOL_DRIVER, name); 2156219089Spjd pp->sectorsize = DEV_BSIZE; 2157219089Spjd 2158219089Spjd zv = kmem_zalloc(sizeof(*zv), KM_SLEEP); 2159219089Spjd zv->zv_provider = pp; 2160219089Spjd zv->zv_state = 0; 2161219089Spjd bioq_init(&zv->zv_queue); 2162219089Spjd mtx_init(&zv->zv_queue_mtx, "zvol", NULL, MTX_DEF); 2163219089Spjd 2164219089Spjd pp->private = zv; 2165219089Spjd 2166219089Spjd return (zv); 2167219089Spjd} 2168219089Spjd 2169219089Spjdstatic void 2170219089Spjdzvol_geom_run(zvol_state_t *zv) 2171219089Spjd{ 2172219089Spjd struct g_provider *pp; 2173219089Spjd 2174219089Spjd pp = zv->zv_provider; 2175219089Spjd g_error_provider(pp, 0); 2176219089Spjd 2177219089Spjd kproc_kthread_add(zvol_geom_worker, zv, &zfsproc, NULL, 0, 0, 2178219089Spjd "zfskern", "zvol %s", pp->name + sizeof(ZVOL_DRIVER)); 2179219089Spjd} 2180219089Spjd 2181219089Spjdstatic void 2182219089Spjdzvol_geom_destroy(zvol_state_t *zv) 2183219089Spjd{ 2184219089Spjd struct g_provider *pp; 2185219089Spjd 2186219089Spjd g_topology_assert(); 2187219089Spjd 2188219089Spjd mtx_lock(&zv->zv_queue_mtx); 2189219089Spjd zv->zv_state = 1; 2190219089Spjd wakeup_one(&zv->zv_queue); 2191219089Spjd while (zv->zv_state != 2) 2192219089Spjd msleep(&zv->zv_state, &zv->zv_queue_mtx, 0, "zvol:w", 0); 2193219089Spjd mtx_destroy(&zv->zv_queue_mtx); 2194219089Spjd 2195219089Spjd pp = zv->zv_provider; 2196219089Spjd zv->zv_provider = NULL; 2197219089Spjd pp->private = NULL; 2198219089Spjd g_wither_geom(pp->geom, ENXIO); 2199219089Spjd 2200219089Spjd kmem_free(zv, sizeof(*zv)); 2201219089Spjd} 2202219089Spjd 2203219089Spjdstatic int 2204219089Spjdzvol_geom_access(struct g_provider *pp, int acr, int acw, int ace) 2205219089Spjd{ 2206219089Spjd int count, error, flags; 2207219089Spjd 2208219089Spjd g_topology_assert(); 2209219089Spjd 2210219089Spjd /* 2211219089Spjd * To make it easier we expect either open or close, but not both 2212219089Spjd * at the same time. 2213219089Spjd */ 2214219089Spjd KASSERT((acr >= 0 && acw >= 0 && ace >= 0) || 2215219089Spjd (acr <= 0 && acw <= 0 && ace <= 0), 2216219089Spjd ("Unsupported access request to %s (acr=%d, acw=%d, ace=%d).", 2217219089Spjd pp->name, acr, acw, ace)); 2218219089Spjd 2219219089Spjd if (pp->private == NULL) { 2220219089Spjd if (acr <= 0 && acw <= 0 && ace <= 0) 2221219089Spjd return (0); 2222219089Spjd return (pp->error); 2223219089Spjd } 2224219089Spjd 2225219089Spjd /* 2226219089Spjd * We don't pass FEXCL flag to zvol_open()/zvol_close() if ace != 0, 2227219089Spjd * because GEOM already handles that and handles it a bit differently. 2228219089Spjd * GEOM allows for multiple read/exclusive consumers and ZFS allows 2229219089Spjd * only one exclusive consumer, no matter if it is reader or writer. 2230219089Spjd * I like better the way GEOM works so I'll leave it for GEOM to 2231219089Spjd * decide what to do. 2232219089Spjd */ 2233219089Spjd 2234219089Spjd count = acr + acw + ace; 2235219089Spjd if (count == 0) 2236219089Spjd return (0); 2237219089Spjd 2238219089Spjd flags = 0; 2239219089Spjd if (acr != 0 || ace != 0) 2240219089Spjd flags |= FREAD; 2241219089Spjd if (acw != 0) 2242219089Spjd flags |= FWRITE; 2243219089Spjd 2244219089Spjd g_topology_unlock(); 2245219089Spjd if (count > 0) 2246219089Spjd error = zvol_open(pp, flags, count); 2247219089Spjd else 2248219089Spjd error = zvol_close(pp, flags, -count); 2249219089Spjd g_topology_lock(); 2250219089Spjd return (error); 2251219089Spjd} 2252219089Spjd 2253219089Spjdstatic void 2254219089Spjdzvol_geom_start(struct bio *bp) 2255219089Spjd{ 2256219089Spjd zvol_state_t *zv; 2257219089Spjd boolean_t first; 2258219089Spjd 2259219089Spjd switch (bp->bio_cmd) { 2260219089Spjd case BIO_READ: 2261219089Spjd case BIO_WRITE: 2262219089Spjd case BIO_FLUSH: 2263219089Spjd zv = bp->bio_to->private; 2264219089Spjd ASSERT(zv != NULL); 2265219089Spjd mtx_lock(&zv->zv_queue_mtx); 2266219089Spjd first = (bioq_first(&zv->zv_queue) == NULL); 2267219089Spjd bioq_insert_tail(&zv->zv_queue, bp); 2268219089Spjd mtx_unlock(&zv->zv_queue_mtx); 2269219089Spjd if (first) 2270219089Spjd wakeup_one(&zv->zv_queue); 2271219089Spjd break; 2272219089Spjd case BIO_GETATTR: 2273219089Spjd case BIO_DELETE: 2274219089Spjd default: 2275219089Spjd g_io_deliver(bp, EOPNOTSUPP); 2276219089Spjd break; 2277219089Spjd } 2278219089Spjd} 2279219089Spjd 2280219089Spjdstatic void 2281219089Spjdzvol_geom_worker(void *arg) 2282219089Spjd{ 2283219089Spjd zvol_state_t *zv; 2284219089Spjd struct bio *bp; 2285219089Spjd 2286219089Spjd thread_lock(curthread); 2287219089Spjd sched_prio(curthread, PRIBIO); 2288219089Spjd thread_unlock(curthread); 2289219089Spjd 2290219089Spjd zv = arg; 2291219089Spjd for (;;) { 2292219089Spjd mtx_lock(&zv->zv_queue_mtx); 2293219089Spjd bp = bioq_takefirst(&zv->zv_queue); 2294219089Spjd if (bp == NULL) { 2295219089Spjd if (zv->zv_state == 1) { 2296219089Spjd zv->zv_state = 2; 2297219089Spjd wakeup(&zv->zv_state); 2298219089Spjd mtx_unlock(&zv->zv_queue_mtx); 2299219089Spjd kthread_exit(); 2300219089Spjd } 2301219089Spjd msleep(&zv->zv_queue, &zv->zv_queue_mtx, PRIBIO | PDROP, 2302219089Spjd "zvol:io", 0); 2303219089Spjd continue; 2304219089Spjd } 2305219089Spjd mtx_unlock(&zv->zv_queue_mtx); 2306219089Spjd switch (bp->bio_cmd) { 2307219089Spjd case BIO_FLUSH: 2308219089Spjd zil_commit(zv->zv_zilog, ZVOL_OBJ); 2309219089Spjd g_io_deliver(bp, 0); 2310219089Spjd break; 2311219089Spjd case BIO_READ: 2312219089Spjd case BIO_WRITE: 2313219089Spjd zvol_strategy(bp); 2314219089Spjd break; 2315219089Spjd } 2316219089Spjd } 2317219089Spjd} 2318219089Spjd 2319219089Spjdextern boolean_t dataset_name_hidden(const char *name); 2320219089Spjd 2321219089Spjdstatic int 2322219089Spjdzvol_create_snapshots(objset_t *os, const char *name) 2323219089Spjd{ 2324219089Spjd uint64_t cookie, obj; 2325219089Spjd char *sname; 2326219089Spjd int error, len; 2327219089Spjd 2328219089Spjd cookie = obj = 0; 2329219089Spjd sname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 2330219089Spjd 2331248571Smm#if 0 2332219089Spjd (void) dmu_objset_find(name, dmu_objset_prefetch, NULL, 2333219089Spjd DS_FIND_SNAPSHOTS); 2334248571Smm#endif 2335219089Spjd 2336219089Spjd for (;;) { 2337219089Spjd len = snprintf(sname, MAXPATHLEN, "%s@", name); 2338219089Spjd if (len >= MAXPATHLEN) { 2339219089Spjd dmu_objset_rele(os, FTAG); 2340219089Spjd error = ENAMETOOLONG; 2341219089Spjd break; 2342219089Spjd } 2343219089Spjd 2344248976Smm dsl_pool_config_enter(dmu_objset_pool(os), FTAG); 2345219089Spjd error = dmu_snapshot_list_next(os, MAXPATHLEN - len, 2346219089Spjd sname + len, &obj, &cookie, NULL); 2347248976Smm dsl_pool_config_exit(dmu_objset_pool(os), FTAG); 2348219089Spjd if (error != 0) { 2349219089Spjd if (error == ENOENT) 2350219089Spjd error = 0; 2351219089Spjd break; 2352219089Spjd } 2353219089Spjd 2354219089Spjd if ((error = zvol_create_minor(sname)) != 0) { 2355219089Spjd printf("ZFS WARNING: Unable to create ZVOL %s (error=%d).\n", 2356219089Spjd sname, error); 2357219089Spjd break; 2358219089Spjd } 2359219089Spjd } 2360219089Spjd 2361219089Spjd kmem_free(sname, MAXPATHLEN); 2362219089Spjd return (error); 2363219089Spjd} 2364219089Spjd 2365219089Spjdint 2366219089Spjdzvol_create_minors(const char *name) 2367219089Spjd{ 2368219089Spjd uint64_t cookie; 2369219089Spjd objset_t *os; 2370219089Spjd char *osname, *p; 2371219089Spjd int error, len; 2372219089Spjd 2373219089Spjd if (dataset_name_hidden(name)) 2374219089Spjd return (0); 2375219089Spjd 2376219089Spjd if ((error = dmu_objset_hold(name, FTAG, &os)) != 0) { 2377219089Spjd printf("ZFS WARNING: Unable to put hold on %s (error=%d).\n", 2378219089Spjd name, error); 2379219089Spjd return (error); 2380219089Spjd } 2381219089Spjd if (dmu_objset_type(os) == DMU_OST_ZVOL) { 2382248571Smm dsl_dataset_long_hold(os->os_dsl_dataset, FTAG); 2383248571Smm dsl_pool_rele(dmu_objset_pool(os), FTAG); 2384219089Spjd if ((error = zvol_create_minor(name)) == 0) 2385219089Spjd error = zvol_create_snapshots(os, name); 2386219089Spjd else { 2387219089Spjd printf("ZFS WARNING: Unable to create ZVOL %s (error=%d).\n", 2388219089Spjd name, error); 2389219089Spjd } 2390248571Smm dsl_dataset_long_rele(os->os_dsl_dataset, FTAG); 2391248571Smm dsl_dataset_rele(os->os_dsl_dataset, FTAG); 2392219089Spjd return (error); 2393219089Spjd } 2394219089Spjd if (dmu_objset_type(os) != DMU_OST_ZFS) { 2395219089Spjd dmu_objset_rele(os, FTAG); 2396219089Spjd return (0); 2397219089Spjd } 2398219089Spjd 2399219089Spjd osname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 2400219089Spjd if (snprintf(osname, MAXPATHLEN, "%s/", name) >= MAXPATHLEN) { 2401219089Spjd dmu_objset_rele(os, FTAG); 2402219089Spjd kmem_free(osname, MAXPATHLEN); 2403219089Spjd return (ENOENT); 2404219089Spjd } 2405219089Spjd p = osname + strlen(osname); 2406219089Spjd len = MAXPATHLEN - (p - osname); 2407219089Spjd 2408248571Smm#if 0 2409224855Smm /* Prefetch the datasets. */ 2410224855Smm cookie = 0; 2411224855Smm while (dmu_dir_list_next(os, len, p, NULL, &cookie) == 0) { 2412224855Smm if (!dataset_name_hidden(osname)) 2413224855Smm (void) dmu_objset_prefetch(osname, NULL); 2414219089Spjd } 2415248571Smm#endif 2416219089Spjd 2417219089Spjd cookie = 0; 2418219089Spjd while (dmu_dir_list_next(os, MAXPATHLEN - (p - osname), p, NULL, 2419219089Spjd &cookie) == 0) { 2420219089Spjd dmu_objset_rele(os, FTAG); 2421219089Spjd (void)zvol_create_minors(osname); 2422219089Spjd if ((error = dmu_objset_hold(name, FTAG, &os)) != 0) { 2423219089Spjd printf("ZFS WARNING: Unable to put hold on %s (error=%d).\n", 2424219089Spjd name, error); 2425219089Spjd return (error); 2426219089Spjd } 2427219089Spjd } 2428219089Spjd 2429219089Spjd dmu_objset_rele(os, FTAG); 2430219089Spjd kmem_free(osname, MAXPATHLEN); 2431219089Spjd return (0); 2432219089Spjd} 2433219317Spjd 2434219317Spjdstatic void 2435219317Spjdzvol_rename_minor(struct g_geom *gp, const char *newname) 2436219317Spjd{ 2437219317Spjd struct g_provider *pp; 2438219317Spjd zvol_state_t *zv; 2439219317Spjd 2440224791Spjd ASSERT(MUTEX_HELD(&spa_namespace_lock)); 2441219317Spjd g_topology_assert(); 2442219317Spjd 2443219317Spjd pp = LIST_FIRST(&gp->provider); 2444219317Spjd ASSERT(pp != NULL); 2445219317Spjd zv = pp->private; 2446219317Spjd ASSERT(zv != NULL); 2447219317Spjd 2448219317Spjd zv->zv_provider = NULL; 2449219317Spjd g_wither_provider(pp, ENXIO); 2450219317Spjd 2451219317Spjd pp = g_new_providerf(gp, "%s/%s", ZVOL_DRIVER, newname); 2452219317Spjd pp->sectorsize = DEV_BSIZE; 2453219317Spjd pp->mediasize = zv->zv_volsize; 2454219317Spjd pp->private = zv; 2455219317Spjd zv->zv_provider = pp; 2456219317Spjd strlcpy(zv->zv_name, newname, sizeof(zv->zv_name)); 2457219317Spjd g_error_provider(pp, 0); 2458219317Spjd} 2459219317Spjd 2460219317Spjdvoid 2461219317Spjdzvol_rename_minors(const char *oldname, const char *newname) 2462219317Spjd{ 2463219317Spjd char name[MAXPATHLEN]; 2464219317Spjd struct g_provider *pp; 2465219317Spjd struct g_geom *gp; 2466219317Spjd size_t oldnamelen, newnamelen; 2467219317Spjd zvol_state_t *zv; 2468219317Spjd char *namebuf; 2469219317Spjd 2470219317Spjd oldnamelen = strlen(oldname); 2471219317Spjd newnamelen = strlen(newname); 2472219317Spjd 2473219317Spjd DROP_GIANT(); 2474224791Spjd mutex_enter(&spa_namespace_lock); 2475219317Spjd g_topology_lock(); 2476219317Spjd 2477219317Spjd LIST_FOREACH(gp, &zfs_zvol_class.geom, geom) { 2478219317Spjd pp = LIST_FIRST(&gp->provider); 2479219317Spjd if (pp == NULL) 2480219317Spjd continue; 2481219317Spjd zv = pp->private; 2482219317Spjd if (zv == NULL) 2483219317Spjd continue; 2484219317Spjd if (strcmp(zv->zv_name, oldname) == 0) { 2485219317Spjd zvol_rename_minor(gp, newname); 2486219317Spjd } else if (strncmp(zv->zv_name, oldname, oldnamelen) == 0 && 2487219317Spjd (zv->zv_name[oldnamelen] == '/' || 2488219317Spjd zv->zv_name[oldnamelen] == '@')) { 2489219317Spjd snprintf(name, sizeof(name), "%s%c%s", newname, 2490219317Spjd zv->zv_name[oldnamelen], 2491219317Spjd zv->zv_name + oldnamelen + 1); 2492219317Spjd zvol_rename_minor(gp, name); 2493219317Spjd } 2494219317Spjd } 2495219317Spjd 2496219317Spjd g_topology_unlock(); 2497224791Spjd mutex_exit(&spa_namespace_lock); 2498219317Spjd PICKUP_GIANT(); 2499219317Spjd} 2500