1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22219089Spjd * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23219089Spjd * 24219089Spjd * Copyright (c) 2006-2010 Pawel Jakub Dawidek <pjd@FreeBSD.org> 25168404Spjd * All rights reserved. 26264732Smav * 27264732Smav * Portions Copyright 2010 Robert Milkowski 28264732Smav * 29264732Smav * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 30268657Sdelphij * Copyright (c) 2012, 2014 by Delphix. All rights reserved. 31268657Sdelphij * Copyright (c) 2013, Joyent, Inc. All rights reserved. 32297112Smav * Copyright (c) 2014 Integros [integros.com] 33168404Spjd */ 34168404Spjd 35226724Smm/* Portions Copyright 2011 Martin Matuska <mm@FreeBSD.org> */ 36219089Spjd 37168404Spjd/* 38168404Spjd * ZFS volume emulation driver. 39168404Spjd * 40168404Spjd * Makes a DMU object look like a volume of arbitrary size, up to 2^64 bytes. 41168404Spjd * Volumes are accessed through the symbolic links named: 42168404Spjd * 43168404Spjd * /dev/zvol/dsk/<pool_name>/<dataset_name> 44168404Spjd * /dev/zvol/rdsk/<pool_name>/<dataset_name> 45168404Spjd * 46219089Spjd * These links are created by the /dev filesystem (sdev_zvolops.c). 47168404Spjd * Volumes are persistent through reboot. No user command needs to be 48168404Spjd * run before opening and using a device. 49219089Spjd * 50219089Spjd * FreeBSD notes. 51219089Spjd * On FreeBSD ZVOLs are simply GEOM providers like any other storage device 52219089Spjd * in the system. 53168404Spjd */ 54168404Spjd 55168962Spjd#include <sys/types.h> 56168404Spjd#include <sys/param.h> 57168404Spjd#include <sys/kernel.h> 58168404Spjd#include <sys/errno.h> 59168404Spjd#include <sys/uio.h> 60168404Spjd#include <sys/bio.h> 61168962Spjd#include <sys/buf.h> 62168404Spjd#include <sys/kmem.h> 63168404Spjd#include <sys/conf.h> 64168404Spjd#include <sys/cmn_err.h> 65168404Spjd#include <sys/stat.h> 66168404Spjd#include <sys/zap.h> 67168404Spjd#include <sys/spa.h> 68255750Sdelphij#include <sys/spa_impl.h> 69168404Spjd#include <sys/zio.h> 70265678Smav#include <sys/disk.h> 71185029Spjd#include <sys/dmu_traverse.h> 72185029Spjd#include <sys/dnode.h> 73185029Spjd#include <sys/dsl_dataset.h> 74168404Spjd#include <sys/dsl_prop.h> 75168962Spjd#include <sys/dkio.h> 76168404Spjd#include <sys/byteorder.h> 77168962Spjd#include <sys/sunddi.h> 78168404Spjd#include <sys/dirent.h> 79168962Spjd#include <sys/policy.h> 80265678Smav#include <sys/queue.h> 81168404Spjd#include <sys/fs/zfs.h> 82168404Spjd#include <sys/zfs_ioctl.h> 83168404Spjd#include <sys/zil.h> 84168404Spjd#include <sys/refcount.h> 85168404Spjd#include <sys/zfs_znode.h> 86168404Spjd#include <sys/zfs_rlock.h> 87185029Spjd#include <sys/vdev_impl.h> 88255750Sdelphij#include <sys/vdev_raidz.h> 89185029Spjd#include <sys/zvol.h> 90209962Smm#include <sys/zil_impl.h> 91243524Smm#include <sys/dbuf.h> 92255750Sdelphij#include <sys/dmu_tx.h> 93255750Sdelphij#include <sys/zfeature.h> 94255750Sdelphij#include <sys/zio_checksum.h> 95275892Smav#include <sys/filio.h> 96255750Sdelphij 97168404Spjd#include <geom/geom.h> 98168404Spjd 99168404Spjd#include "zfs_namecheck.h" 100168404Spjd 101277483Ssmh#ifndef illumos 102168404Spjdstruct g_class zfs_zvol_class = { 103168404Spjd .name = "ZFS::ZVOL", 104168404Spjd .version = G_VERSION, 105168404Spjd}; 106168404Spjd 107168404SpjdDECLARE_GEOM_CLASS(zfs_zvol_class, zfs_zvol); 108168404Spjd 109277483Ssmh#endif 110219089Spjdvoid *zfsdev_state; 111219089Spjdstatic char *zvol_tag = "zvol_tag"; 112219089Spjd 113219089Spjd#define ZVOL_DUMPSIZE "dumpsize" 114219089Spjd 115185029Spjd/* 116277483Ssmh * This lock protects the zfsdev_state structure from being modified 117277483Ssmh * while it's being used, e.g. an open that comes in before a create 118277483Ssmh * finishes. It also protects temporary opens of the dataset so that, 119185029Spjd * e.g., an open doesn't get a spurious EBUSY. 120185029Spjd */ 121277483Ssmh#ifdef illumos 122277483Ssmhkmutex_t zfsdev_state_lock; 123277483Ssmh#else 124277483Ssmh/* 125277483Ssmh * In FreeBSD we've replaced the upstream zfsdev_state_lock with the 126277483Ssmh * spa_namespace_lock in the ZVOL code. 127277483Ssmh */ 128277483Ssmh#define zfsdev_state_lock spa_namespace_lock 129277483Ssmh#endif 130168404Spjdstatic uint32_t zvol_minors; 131168404Spjd 132277483Ssmh#ifndef illumos 133265678SmavSYSCTL_DECL(_vfs_zfs); 134265678SmavSYSCTL_NODE(_vfs_zfs, OID_AUTO, vol, CTLFLAG_RW, 0, "ZFS VOLUME"); 135265678Smavstatic int volmode = ZFS_VOLMODE_GEOM; 136265678SmavTUNABLE_INT("vfs.zfs.vol.mode", &volmode); 137265678SmavSYSCTL_INT(_vfs_zfs_vol, OID_AUTO, mode, CTLFLAG_RWTUN, &volmode, 0, 138265678Smav "Expose as GEOM providers (1), device files (2) or neither"); 139265678Smav 140277483Ssmh#endif 141185029Spjdtypedef struct zvol_extent { 142208047Smm list_node_t ze_node; 143185029Spjd dva_t ze_dva; /* dva associated with this extent */ 144208047Smm uint64_t ze_nblks; /* number of blocks in extent */ 145185029Spjd} zvol_extent_t; 146185029Spjd 147168404Spjd/* 148168404Spjd * The in-core state of each volume. 149168404Spjd */ 150168404Spjdtypedef struct zvol_state { 151277483Ssmh#ifndef illumos 152265678Smav LIST_ENTRY(zvol_state) zv_links; 153277483Ssmh#endif 154168404Spjd char zv_name[MAXPATHLEN]; /* pool/dd name */ 155168404Spjd uint64_t zv_volsize; /* amount of space we advertise */ 156168404Spjd uint64_t zv_volblocksize; /* volume block size */ 157277483Ssmh#ifdef illumos 158277483Ssmh minor_t zv_minor; /* minor number */ 159277483Ssmh#else 160265678Smav struct cdev *zv_dev; /* non-GEOM device */ 161168404Spjd struct g_provider *zv_provider; /* GEOM provider */ 162277483Ssmh#endif 163168404Spjd uint8_t zv_min_bs; /* minimum addressable block shift */ 164219089Spjd uint8_t zv_flags; /* readonly, dumpified, etc. */ 165168404Spjd objset_t *zv_objset; /* objset handle */ 166277483Ssmh#ifdef illumos 167277483Ssmh uint32_t zv_open_count[OTYPCNT]; /* open counts */ 168277483Ssmh#endif 169168404Spjd uint32_t zv_total_opens; /* total open count */ 170308596Smav uint32_t zv_sync_cnt; /* synchronous open count */ 171168404Spjd zilog_t *zv_zilog; /* ZIL handle */ 172208047Smm list_t zv_extents; /* List of extents for dump */ 173168404Spjd znode_t zv_znode; /* for range locking */ 174219089Spjd dmu_buf_t *zv_dbuf; /* bonus handle */ 175277483Ssmh#ifndef illumos 176168404Spjd int zv_state; 177265678Smav int zv_volmode; /* Provide GEOM or cdev */ 178168404Spjd struct bio_queue_head zv_queue; 179168404Spjd struct mtx zv_queue_mtx; /* zv_queue mutex */ 180277483Ssmh#endif 181168404Spjd} zvol_state_t; 182168404Spjd 183277483Ssmh#ifndef illumos 184265678Smavstatic LIST_HEAD(, zvol_state) all_zvols; 185277483Ssmh#endif 186168404Spjd/* 187185029Spjd * zvol specific flags 188185029Spjd */ 189185029Spjd#define ZVOL_RDONLY 0x1 190185029Spjd#define ZVOL_DUMPIFIED 0x2 191185029Spjd#define ZVOL_EXCL 0x4 192219089Spjd#define ZVOL_WCE 0x8 193185029Spjd 194185029Spjd/* 195168404Spjd * zvol maximum transfer in one DMU tx. 196168404Spjd */ 197168404Spjdint zvol_maxphys = DMU_MAX_ACCESS/2; 198168404Spjd 199273345Sdelphij/* 200273345Sdelphij * Toggle unmap functionality. 201273345Sdelphij */ 202273345Sdelphijboolean_t zvol_unmap_enabled = B_TRUE; 203277483Ssmh#ifndef illumos 204273345SdelphijSYSCTL_INT(_vfs_zfs_vol, OID_AUTO, unmap_enabled, CTLFLAG_RWTUN, 205273345Sdelphij &zvol_unmap_enabled, 0, 206273345Sdelphij "Enable UNMAP functionality"); 207273345Sdelphij 208265678Smavstatic d_open_t zvol_d_open; 209265678Smavstatic d_close_t zvol_d_close; 210265678Smavstatic d_read_t zvol_read; 211265678Smavstatic d_write_t zvol_write; 212265678Smavstatic d_ioctl_t zvol_d_ioctl; 213265678Smavstatic d_strategy_t zvol_strategy; 214265678Smav 215265678Smavstatic struct cdevsw zvol_cdevsw = { 216265678Smav .d_version = D_VERSION, 217265678Smav .d_open = zvol_d_open, 218265678Smav .d_close = zvol_d_close, 219265678Smav .d_read = zvol_read, 220265678Smav .d_write = zvol_write, 221265678Smav .d_ioctl = zvol_d_ioctl, 222265678Smav .d_strategy = zvol_strategy, 223265678Smav .d_name = "zvol", 224265678Smav .d_flags = D_DISK | D_TRACKCLOSE, 225265678Smav}; 226265678Smav 227277483Ssmhstatic void zvol_geom_run(zvol_state_t *zv); 228277483Ssmhstatic void zvol_geom_destroy(zvol_state_t *zv); 229277483Ssmhstatic int zvol_geom_access(struct g_provider *pp, int acr, int acw, int ace); 230277483Ssmhstatic void zvol_geom_start(struct bio *bp); 231277483Ssmhstatic void zvol_geom_worker(void *arg); 232277483Ssmhstatic void zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off, 233277483Ssmh uint64_t len, boolean_t sync); 234277483Ssmh#endif /* !illumos */ 235277483Ssmh 236219089Spjdextern int zfs_set_prop_nvlist(const char *, zprop_source_t, 237248571Smm nvlist_t *, nvlist_t *); 238219089Spjdstatic int zvol_remove_zv(zvol_state_t *); 239168404Spjdstatic int zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio); 240185029Spjdstatic int zvol_dumpify(zvol_state_t *zv); 241185029Spjdstatic int zvol_dump_fini(zvol_state_t *zv); 242185029Spjdstatic int zvol_dump_init(zvol_state_t *zv, boolean_t resize); 243168404Spjd 244185029Spjdstatic void 245277483Ssmhzvol_size_changed(zvol_state_t *zv, uint64_t volsize) 246185029Spjd{ 247277483Ssmh#ifdef illumos 248277483Ssmh dev_t dev = makedevice(ddi_driver_major(zfs_dip), zv->zv_minor); 249219089Spjd 250277483Ssmh zv->zv_volsize = volsize; 251219089Spjd VERIFY(ddi_prop_update_int64(dev, zfs_dip, 252219089Spjd "Size", volsize) == DDI_SUCCESS); 253219089Spjd VERIFY(ddi_prop_update_int64(dev, zfs_dip, 254219089Spjd "Nblocks", lbtodb(volsize)) == DDI_SUCCESS); 255219089Spjd 256219089Spjd /* Notify specfs to invalidate the cached size */ 257219089Spjd spec_size_invalidate(dev, VBLK); 258219089Spjd spec_size_invalidate(dev, VCHR); 259277483Ssmh#else /* !illumos */ 260277483Ssmh zv->zv_volsize = volsize; 261265678Smav if (zv->zv_volmode == ZFS_VOLMODE_GEOM) { 262265678Smav struct g_provider *pp; 263185029Spjd 264265678Smav pp = zv->zv_provider; 265265678Smav if (pp == NULL) 266265678Smav return; 267265678Smav g_topology_lock(); 268265678Smav g_resize_provider(pp, zv->zv_volsize); 269265678Smav g_topology_unlock(); 270265678Smav } 271277483Ssmh#endif /* illumos */ 272185029Spjd} 273185029Spjd 274168404Spjdint 275168404Spjdzvol_check_volsize(uint64_t volsize, uint64_t blocksize) 276168404Spjd{ 277168404Spjd if (volsize == 0) 278249195Smm return (SET_ERROR(EINVAL)); 279168404Spjd 280168404Spjd if (volsize % blocksize != 0) 281249195Smm return (SET_ERROR(EINVAL)); 282168404Spjd 283168404Spjd#ifdef _ILP32 284168404Spjd if (volsize - 1 > SPEC_MAXOFFSET_T) 285249195Smm return (SET_ERROR(EOVERFLOW)); 286168404Spjd#endif 287168404Spjd return (0); 288168404Spjd} 289168404Spjd 290168404Spjdint 291168404Spjdzvol_check_volblocksize(uint64_t volblocksize) 292168404Spjd{ 293168404Spjd if (volblocksize < SPA_MINBLOCKSIZE || 294276081Sdelphij volblocksize > SPA_OLD_MAXBLOCKSIZE || 295168404Spjd !ISP2(volblocksize)) 296249195Smm return (SET_ERROR(EDOM)); 297168404Spjd 298168404Spjd return (0); 299168404Spjd} 300168404Spjd 301168404Spjdint 302168404Spjdzvol_get_stats(objset_t *os, nvlist_t *nv) 303168404Spjd{ 304168404Spjd int error; 305168404Spjd dmu_object_info_t doi; 306168404Spjd uint64_t val; 307168404Spjd 308168404Spjd error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &val); 309168404Spjd if (error) 310168404Spjd return (error); 311168404Spjd 312168404Spjd dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLSIZE, val); 313168404Spjd 314168404Spjd error = dmu_object_info(os, ZVOL_OBJ, &doi); 315168404Spjd 316168404Spjd if (error == 0) { 317168404Spjd dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLBLOCKSIZE, 318168404Spjd doi.doi_data_block_size); 319168404Spjd } 320168404Spjd 321168404Spjd return (error); 322168404Spjd} 323168404Spjd 324168404Spjdstatic zvol_state_t * 325168404Spjdzvol_minor_lookup(const char *name) 326168404Spjd{ 327277483Ssmh#ifdef illumos 328277483Ssmh minor_t minor; 329277483Ssmh#endif 330265678Smav zvol_state_t *zv; 331168404Spjd 332277483Ssmh ASSERT(MUTEX_HELD(&zfsdev_state_lock)); 333168404Spjd 334277483Ssmh#ifdef illumos 335277483Ssmh for (minor = 1; minor <= ZFSDEV_MAX_MINOR; minor++) { 336277483Ssmh zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); 337277483Ssmh if (zv == NULL) 338277483Ssmh continue; 339277483Ssmh#else 340265678Smav LIST_FOREACH(zv, &all_zvols, zv_links) { 341277483Ssmh#endif 342219089Spjd if (strcmp(zv->zv_name, name) == 0) 343277483Ssmh return (zv); 344168404Spjd } 345168404Spjd 346277483Ssmh return (NULL); 347168404Spjd} 348168404Spjd 349185029Spjd/* extent mapping arg */ 350185029Spjdstruct maparg { 351208047Smm zvol_state_t *ma_zv; 352208047Smm uint64_t ma_blks; 353185029Spjd}; 354185029Spjd 355185029Spjd/*ARGSUSED*/ 356185029Spjdstatic int 357246666Smmzvol_map_block(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, 358268657Sdelphij const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) 359185029Spjd{ 360208047Smm struct maparg *ma = arg; 361208047Smm zvol_extent_t *ze; 362208047Smm int bs = ma->ma_zv->zv_volblocksize; 363185029Spjd 364288571Smav if (bp == NULL || BP_IS_HOLE(bp) || 365263397Sdelphij zb->zb_object != ZVOL_OBJ || zb->zb_level != 0) 366208047Smm return (0); 367185029Spjd 368268649Sdelphij VERIFY(!BP_IS_EMBEDDED(bp)); 369268649Sdelphij 370208047Smm VERIFY3U(ma->ma_blks, ==, zb->zb_blkid); 371208047Smm ma->ma_blks++; 372185029Spjd 373208047Smm /* Abort immediately if we have encountered gang blocks */ 374208047Smm if (BP_IS_GANG(bp)) 375249195Smm return (SET_ERROR(EFRAGS)); 376185029Spjd 377208047Smm /* 378208047Smm * See if the block is at the end of the previous extent. 379208047Smm */ 380208047Smm ze = list_tail(&ma->ma_zv->zv_extents); 381208047Smm if (ze && 382208047Smm DVA_GET_VDEV(BP_IDENTITY(bp)) == DVA_GET_VDEV(&ze->ze_dva) && 383208047Smm DVA_GET_OFFSET(BP_IDENTITY(bp)) == 384208047Smm DVA_GET_OFFSET(&ze->ze_dva) + ze->ze_nblks * bs) { 385208047Smm ze->ze_nblks++; 386208047Smm return (0); 387185029Spjd } 388185029Spjd 389208047Smm dprintf_bp(bp, "%s", "next blkptr:"); 390185029Spjd 391208047Smm /* start a new extent */ 392208047Smm ze = kmem_zalloc(sizeof (zvol_extent_t), KM_SLEEP); 393208047Smm ze->ze_dva = bp->blk_dva[0]; /* structure assignment */ 394208047Smm ze->ze_nblks = 1; 395208047Smm list_insert_tail(&ma->ma_zv->zv_extents, ze); 396208047Smm return (0); 397208047Smm} 398185029Spjd 399208047Smmstatic void 400208047Smmzvol_free_extents(zvol_state_t *zv) 401208047Smm{ 402208047Smm zvol_extent_t *ze; 403185029Spjd 404208047Smm while (ze = list_head(&zv->zv_extents)) { 405208047Smm list_remove(&zv->zv_extents, ze); 406208047Smm kmem_free(ze, sizeof (zvol_extent_t)); 407185029Spjd } 408208047Smm} 409185029Spjd 410208047Smmstatic int 411208047Smmzvol_get_lbas(zvol_state_t *zv) 412208047Smm{ 413219089Spjd objset_t *os = zv->zv_objset; 414208047Smm struct maparg ma; 415208047Smm int err; 416185029Spjd 417208047Smm ma.ma_zv = zv; 418208047Smm ma.ma_blks = 0; 419208047Smm zvol_free_extents(zv); 420208047Smm 421219089Spjd /* commit any in-flight changes before traversing the dataset */ 422219089Spjd txg_wait_synced(dmu_objset_pool(os), 0); 423219089Spjd err = traverse_dataset(dmu_objset_ds(os), 0, 424208047Smm TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, zvol_map_block, &ma); 425208047Smm if (err || ma.ma_blks != (zv->zv_volsize / zv->zv_volblocksize)) { 426208047Smm zvol_free_extents(zv); 427208047Smm return (err ? err : EIO); 428185029Spjd } 429185029Spjd 430185029Spjd return (0); 431185029Spjd} 432185029Spjd 433185029Spjd/* ARGSUSED */ 434185029Spjdvoid 435185029Spjdzvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx) 436185029Spjd{ 437185029Spjd zfs_creat_t *zct = arg; 438185029Spjd nvlist_t *nvprops = zct->zct_props; 439168404Spjd int error; 440168404Spjd uint64_t volblocksize, volsize; 441168404Spjd 442185029Spjd VERIFY(nvlist_lookup_uint64(nvprops, 443168404Spjd zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) == 0); 444185029Spjd if (nvlist_lookup_uint64(nvprops, 445168404Spjd zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &volblocksize) != 0) 446168404Spjd volblocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE); 447168404Spjd 448168404Spjd /* 449185029Spjd * These properties must be removed from the list so the generic 450168404Spjd * property setting step won't apply to them. 451168404Spjd */ 452185029Spjd VERIFY(nvlist_remove_all(nvprops, 453168404Spjd zfs_prop_to_name(ZFS_PROP_VOLSIZE)) == 0); 454185029Spjd (void) nvlist_remove_all(nvprops, 455168404Spjd zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE)); 456168404Spjd 457168404Spjd error = dmu_object_claim(os, ZVOL_OBJ, DMU_OT_ZVOL, volblocksize, 458168404Spjd DMU_OT_NONE, 0, tx); 459168404Spjd ASSERT(error == 0); 460168404Spjd 461168404Spjd error = zap_create_claim(os, ZVOL_ZAP_OBJ, DMU_OT_ZVOL_PROP, 462168404Spjd DMU_OT_NONE, 0, tx); 463168404Spjd ASSERT(error == 0); 464168404Spjd 465168404Spjd error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize, tx); 466168404Spjd ASSERT(error == 0); 467168404Spjd} 468168404Spjd 469168404Spjd/* 470264732Smav * Replay a TX_TRUNCATE ZIL transaction if asked. TX_TRUNCATE is how we 471264732Smav * implement DKIOCFREE/free-long-range. 472264732Smav */ 473264732Smavstatic int 474264732Smavzvol_replay_truncate(zvol_state_t *zv, lr_truncate_t *lr, boolean_t byteswap) 475264732Smav{ 476264732Smav uint64_t offset, length; 477264732Smav 478264732Smav if (byteswap) 479264732Smav byteswap_uint64_array(lr, sizeof (*lr)); 480264732Smav 481264732Smav offset = lr->lr_offset; 482264732Smav length = lr->lr_length; 483264732Smav 484264732Smav return (dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, offset, length)); 485264732Smav} 486264732Smav 487264732Smav/* 488168404Spjd * Replay a TX_WRITE ZIL transaction that didn't get committed 489168404Spjd * after a system failure 490168404Spjd */ 491168404Spjdstatic int 492168404Spjdzvol_replay_write(zvol_state_t *zv, lr_write_t *lr, boolean_t byteswap) 493168404Spjd{ 494168404Spjd objset_t *os = zv->zv_objset; 495168404Spjd char *data = (char *)(lr + 1); /* data follows lr_write_t */ 496219089Spjd uint64_t offset, length; 497168404Spjd dmu_tx_t *tx; 498168404Spjd int error; 499168404Spjd 500168404Spjd if (byteswap) 501168404Spjd byteswap_uint64_array(lr, sizeof (*lr)); 502168404Spjd 503219089Spjd offset = lr->lr_offset; 504219089Spjd length = lr->lr_length; 505209962Smm 506219089Spjd /* If it's a dmu_sync() block, write the whole block */ 507219089Spjd if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) { 508219089Spjd uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr); 509219089Spjd if (length < blocksize) { 510219089Spjd offset -= offset % blocksize; 511219089Spjd length = blocksize; 512219089Spjd } 513219089Spjd } 514219089Spjd 515168404Spjd tx = dmu_tx_create(os); 516219089Spjd dmu_tx_hold_write(tx, ZVOL_OBJ, offset, length); 517209962Smm error = dmu_tx_assign(tx, TXG_WAIT); 518168404Spjd if (error) { 519168404Spjd dmu_tx_abort(tx); 520168404Spjd } else { 521219089Spjd dmu_write(os, ZVOL_OBJ, offset, length, data, tx); 522168404Spjd dmu_tx_commit(tx); 523168404Spjd } 524168404Spjd 525168404Spjd return (error); 526168404Spjd} 527168404Spjd 528168404Spjd/* ARGSUSED */ 529168404Spjdstatic int 530168404Spjdzvol_replay_err(zvol_state_t *zv, lr_t *lr, boolean_t byteswap) 531168404Spjd{ 532249195Smm return (SET_ERROR(ENOTSUP)); 533168404Spjd} 534168404Spjd 535168404Spjd/* 536168404Spjd * Callback vectors for replaying records. 537264732Smav * Only TX_WRITE and TX_TRUNCATE are needed for zvol. 538168404Spjd */ 539168404Spjdzil_replay_func_t *zvol_replay_vector[TX_MAX_TYPE] = { 540168404Spjd zvol_replay_err, /* 0 no such transaction type */ 541168404Spjd zvol_replay_err, /* TX_CREATE */ 542168404Spjd zvol_replay_err, /* TX_MKDIR */ 543168404Spjd zvol_replay_err, /* TX_MKXATTR */ 544168404Spjd zvol_replay_err, /* TX_SYMLINK */ 545168404Spjd zvol_replay_err, /* TX_REMOVE */ 546168404Spjd zvol_replay_err, /* TX_RMDIR */ 547168404Spjd zvol_replay_err, /* TX_LINK */ 548168404Spjd zvol_replay_err, /* TX_RENAME */ 549168404Spjd zvol_replay_write, /* TX_WRITE */ 550264732Smav zvol_replay_truncate, /* TX_TRUNCATE */ 551168404Spjd zvol_replay_err, /* TX_SETATTR */ 552168404Spjd zvol_replay_err, /* TX_ACL */ 553209962Smm zvol_replay_err, /* TX_CREATE_ACL */ 554209962Smm zvol_replay_err, /* TX_CREATE_ATTR */ 555209962Smm zvol_replay_err, /* TX_CREATE_ACL_ATTR */ 556209962Smm zvol_replay_err, /* TX_MKDIR_ACL */ 557209962Smm zvol_replay_err, /* TX_MKDIR_ATTR */ 558209962Smm zvol_replay_err, /* TX_MKDIR_ACL_ATTR */ 559209962Smm zvol_replay_err, /* TX_WRITE2 */ 560168404Spjd}; 561168404Spjd 562277483Ssmh#ifdef illumos 563219089Spjdint 564219089Spjdzvol_name2minor(const char *name, minor_t *minor) 565219089Spjd{ 566219089Spjd zvol_state_t *zv; 567219089Spjd 568277483Ssmh mutex_enter(&zfsdev_state_lock); 569219089Spjd zv = zvol_minor_lookup(name); 570219089Spjd if (minor && zv) 571219089Spjd *minor = zv->zv_minor; 572277483Ssmh mutex_exit(&zfsdev_state_lock); 573219089Spjd return (zv ? 0 : -1); 574219089Spjd} 575277483Ssmh#endif /* illumos */ 576219089Spjd 577168404Spjd/* 578185029Spjd * Create a minor node (plus a whole lot more) for the specified volume. 579185029Spjd */ 580185029Spjdint 581219089Spjdzvol_create_minor(const char *name) 582185029Spjd{ 583219089Spjd zfs_soft_state_t *zs; 584168404Spjd zvol_state_t *zv; 585168404Spjd objset_t *os; 586277483Ssmh dmu_object_info_t doi; 587277483Ssmh#ifdef illumos 588277483Ssmh minor_t minor = 0; 589277483Ssmh char chrbuf[30], blkbuf[30]; 590277483Ssmh#else 591265678Smav struct g_provider *pp; 592265678Smav struct g_geom *gp; 593265678Smav uint64_t volsize, mode; 594277483Ssmh#endif 595168404Spjd int error; 596168404Spjd 597277483Ssmh#ifndef illumos 598219089Spjd ZFS_LOG(1, "Creating ZVOL %s...", name); 599277483Ssmh#endif 600168404Spjd 601277483Ssmh mutex_enter(&zfsdev_state_lock); 602219089Spjd 603219089Spjd if (zvol_minor_lookup(name) != NULL) { 604277483Ssmh mutex_exit(&zfsdev_state_lock); 605249195Smm return (SET_ERROR(EEXIST)); 606168404Spjd } 607168404Spjd 608219089Spjd /* lie and say we're read-only */ 609219089Spjd error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, FTAG, &os); 610168404Spjd 611168404Spjd if (error) { 612277483Ssmh mutex_exit(&zfsdev_state_lock); 613219089Spjd return (error); 614168404Spjd } 615168404Spjd 616277483Ssmh#ifdef illumos 617219089Spjd if ((minor = zfsdev_minor_alloc()) == 0) { 618219089Spjd dmu_objset_disown(os, FTAG); 619277483Ssmh mutex_exit(&zfsdev_state_lock); 620249195Smm return (SET_ERROR(ENXIO)); 621219089Spjd } 622168404Spjd 623219089Spjd if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS) { 624219089Spjd dmu_objset_disown(os, FTAG); 625277483Ssmh mutex_exit(&zfsdev_state_lock); 626249195Smm return (SET_ERROR(EAGAIN)); 627219089Spjd } 628219089Spjd (void) ddi_prop_update_string(minor, zfs_dip, ZVOL_PROP_NAME, 629219089Spjd (char *)name); 630219089Spjd 631219089Spjd (void) snprintf(chrbuf, sizeof (chrbuf), "%u,raw", minor); 632219089Spjd 633219089Spjd if (ddi_create_minor_node(zfs_dip, chrbuf, S_IFCHR, 634219089Spjd minor, DDI_PSEUDO, 0) == DDI_FAILURE) { 635219089Spjd ddi_soft_state_free(zfsdev_state, minor); 636219089Spjd dmu_objset_disown(os, FTAG); 637277483Ssmh mutex_exit(&zfsdev_state_lock); 638249195Smm return (SET_ERROR(EAGAIN)); 639219089Spjd } 640219089Spjd 641219089Spjd (void) snprintf(blkbuf, sizeof (blkbuf), "%u", minor); 642219089Spjd 643219089Spjd if (ddi_create_minor_node(zfs_dip, blkbuf, S_IFBLK, 644219089Spjd minor, DDI_PSEUDO, 0) == DDI_FAILURE) { 645219089Spjd ddi_remove_minor_node(zfs_dip, chrbuf); 646219089Spjd ddi_soft_state_free(zfsdev_state, minor); 647219089Spjd dmu_objset_disown(os, FTAG); 648277483Ssmh mutex_exit(&zfsdev_state_lock); 649249195Smm return (SET_ERROR(EAGAIN)); 650219089Spjd } 651219089Spjd 652219089Spjd zs = ddi_get_soft_state(zfsdev_state, minor); 653219089Spjd zs->zss_type = ZSST_ZVOL; 654219089Spjd zv = zs->zss_data = kmem_zalloc(sizeof (zvol_state_t), KM_SLEEP); 655277483Ssmh#else /* !illumos */ 656219089Spjd 657265678Smav zv = kmem_zalloc(sizeof(*zv), KM_SLEEP); 658265678Smav zv->zv_state = 0; 659241297Savg error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize); 660241297Savg if (error) { 661265678Smav kmem_free(zv, sizeof(*zv)); 662241297Savg dmu_objset_disown(os, zvol_tag); 663277483Ssmh mutex_exit(&zfsdev_state_lock); 664241297Savg return (error); 665241297Savg } 666265678Smav error = dsl_prop_get_integer(name, 667265678Smav zfs_prop_to_name(ZFS_PROP_VOLMODE), &mode, NULL); 668265678Smav if (error != 0 || mode == ZFS_VOLMODE_DEFAULT) 669265678Smav mode = volmode; 670241297Savg 671219089Spjd DROP_GIANT(); 672241297Savg zv->zv_volsize = volsize; 673265678Smav zv->zv_volmode = mode; 674265678Smav if (zv->zv_volmode == ZFS_VOLMODE_GEOM) { 675265678Smav g_topology_lock(); 676265678Smav gp = g_new_geomf(&zfs_zvol_class, "zfs::zvol::%s", name); 677265678Smav gp->start = zvol_geom_start; 678265678Smav gp->access = zvol_geom_access; 679265678Smav pp = g_new_providerf(gp, "%s/%s", ZVOL_DRIVER, name); 680265678Smav pp->flags |= G_PF_DIRECT_RECEIVE | G_PF_DIRECT_SEND; 681265678Smav pp->sectorsize = DEV_BSIZE; 682265678Smav pp->mediasize = zv->zv_volsize; 683265678Smav pp->private = zv; 684241297Savg 685265678Smav zv->zv_provider = pp; 686265678Smav bioq_init(&zv->zv_queue); 687265678Smav mtx_init(&zv->zv_queue_mtx, "zvol", NULL, MTX_DEF); 688265678Smav } else if (zv->zv_volmode == ZFS_VOLMODE_DEV) { 689297548Smav struct make_dev_args args; 690297548Smav 691297548Smav make_dev_args_init(&args); 692297548Smav args.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK; 693297548Smav args.mda_devsw = &zvol_cdevsw; 694297548Smav args.mda_cr = NULL; 695297548Smav args.mda_uid = UID_ROOT; 696297548Smav args.mda_gid = GID_OPERATOR; 697297548Smav args.mda_mode = 0640; 698297548Smav args.mda_si_drv2 = zv; 699297548Smav error = make_dev_s(&args, &zv->zv_dev, 700297548Smav "%s/%s", ZVOL_DRIVER, name); 701297547Smav if (error != 0) { 702265678Smav kmem_free(zv, sizeof(*zv)); 703265678Smav dmu_objset_disown(os, FTAG); 704277483Ssmh mutex_exit(&zfsdev_state_lock); 705297547Smav return (error); 706265678Smav } 707297548Smav zv->zv_dev->si_iosize_max = MAXPHYS; 708265678Smav } 709265678Smav LIST_INSERT_HEAD(&all_zvols, zv, zv_links); 710277483Ssmh#endif /* illumos */ 711219089Spjd 712219089Spjd (void) strlcpy(zv->zv_name, name, MAXPATHLEN); 713168404Spjd zv->zv_min_bs = DEV_BSHIFT; 714277483Ssmh#ifdef illumos 715277483Ssmh zv->zv_minor = minor; 716277483Ssmh#endif 717168404Spjd zv->zv_objset = os; 718219089Spjd if (dmu_objset_is_snapshot(os) || !spa_writeable(dmu_objset_spa(os))) 719219089Spjd zv->zv_flags |= ZVOL_RDONLY; 720168404Spjd mutex_init(&zv->zv_znode.z_range_lock, NULL, MUTEX_DEFAULT, NULL); 721168404Spjd avl_create(&zv->zv_znode.z_range_avl, zfs_range_compare, 722168404Spjd sizeof (rl_t), offsetof(rl_t, r_node)); 723208047Smm list_create(&zv->zv_extents, sizeof (zvol_extent_t), 724208047Smm offsetof(zvol_extent_t, ze_node)); 725168404Spjd /* get and cache the blocksize */ 726168404Spjd error = dmu_object_info(os, ZVOL_OBJ, &doi); 727168404Spjd ASSERT(error == 0); 728168404Spjd zv->zv_volblocksize = doi.doi_data_block_size; 729168404Spjd 730219089Spjd if (spa_writeable(dmu_objset_spa(os))) { 731219089Spjd if (zil_replay_disable) 732219089Spjd zil_destroy(dmu_objset_zil(os), B_FALSE); 733219089Spjd else 734219089Spjd zil_replay(os, zv, zvol_replay_vector); 735219089Spjd } 736219089Spjd dmu_objset_disown(os, FTAG); 737219089Spjd zv->zv_objset = NULL; 738168404Spjd 739219089Spjd zvol_minors++; 740168404Spjd 741277483Ssmh mutex_exit(&zfsdev_state_lock); 742277483Ssmh#ifndef illumos 743265678Smav if (zv->zv_volmode == ZFS_VOLMODE_GEOM) { 744265678Smav zvol_geom_run(zv); 745265678Smav g_topology_unlock(); 746265678Smav } 747168404Spjd PICKUP_GIANT(); 748168404Spjd 749219089Spjd ZFS_LOG(1, "ZVOL %s created.", name); 750277483Ssmh#endif 751219089Spjd 752219089Spjd return (0); 753168404Spjd} 754168404Spjd 755168404Spjd/* 756168404Spjd * Remove minor node for the specified volume. 757168404Spjd */ 758219089Spjdstatic int 759219089Spjdzvol_remove_zv(zvol_state_t *zv) 760219089Spjd{ 761277483Ssmh#ifdef illumos 762277483Ssmh char nmbuf[20]; 763219089Spjd minor_t minor = zv->zv_minor; 764219089Spjd#endif 765219089Spjd 766277483Ssmh ASSERT(MUTEX_HELD(&zfsdev_state_lock)); 767219089Spjd if (zv->zv_total_opens != 0) 768249195Smm return (SET_ERROR(EBUSY)); 769219089Spjd 770277483Ssmh#ifdef illumos 771219089Spjd (void) snprintf(nmbuf, sizeof (nmbuf), "%u,raw", minor); 772219089Spjd ddi_remove_minor_node(zfs_dip, nmbuf); 773277483Ssmh 774277483Ssmh (void) snprintf(nmbuf, sizeof (nmbuf), "%u", minor); 775277483Ssmh ddi_remove_minor_node(zfs_dip, nmbuf); 776265678Smav#else 777277483Ssmh ZFS_LOG(1, "ZVOL %s destroyed.", zv->zv_name); 778277483Ssmh 779265678Smav LIST_REMOVE(zv, zv_links); 780265678Smav if (zv->zv_volmode == ZFS_VOLMODE_GEOM) { 781265678Smav g_topology_lock(); 782265678Smav zvol_geom_destroy(zv); 783265678Smav g_topology_unlock(); 784308448Smav } else if (zv->zv_volmode == ZFS_VOLMODE_DEV) { 785308448Smav if (zv->zv_dev != NULL) 786308448Smav destroy_dev(zv->zv_dev); 787308448Smav } 788277483Ssmh#endif 789219089Spjd 790219089Spjd avl_destroy(&zv->zv_znode.z_range_avl); 791219089Spjd mutex_destroy(&zv->zv_znode.z_range_lock); 792219089Spjd 793277483Ssmh kmem_free(zv, sizeof (zvol_state_t)); 794277483Ssmh#ifdef illumos 795277483Ssmh ddi_soft_state_free(zfsdev_state, minor); 796277483Ssmh#endif 797219089Spjd zvol_minors--; 798219089Spjd return (0); 799219089Spjd} 800219089Spjd 801168404Spjdint 802168404Spjdzvol_remove_minor(const char *name) 803168404Spjd{ 804168404Spjd zvol_state_t *zv; 805219089Spjd int rc; 806168404Spjd 807277483Ssmh mutex_enter(&zfsdev_state_lock); 808168404Spjd if ((zv = zvol_minor_lookup(name)) == NULL) { 809277483Ssmh mutex_exit(&zfsdev_state_lock); 810249195Smm return (SET_ERROR(ENXIO)); 811168404Spjd } 812219089Spjd rc = zvol_remove_zv(zv); 813277483Ssmh mutex_exit(&zfsdev_state_lock); 814219089Spjd return (rc); 815219089Spjd} 816168404Spjd 817219089Spjdint 818219089Spjdzvol_first_open(zvol_state_t *zv) 819219089Spjd{ 820219089Spjd objset_t *os; 821219089Spjd uint64_t volsize; 822219089Spjd int error; 823219089Spjd uint64_t readonly; 824168404Spjd 825219089Spjd /* lie and say we're read-only */ 826219089Spjd error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, B_TRUE, 827219089Spjd zvol_tag, &os); 828219089Spjd if (error) 829219089Spjd return (error); 830168404Spjd 831277483Ssmh zv->zv_objset = os; 832219089Spjd error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize); 833219089Spjd if (error) { 834219089Spjd ASSERT(error == 0); 835219089Spjd dmu_objset_disown(os, zvol_tag); 836219089Spjd return (error); 837219089Spjd } 838277483Ssmh 839219089Spjd error = dmu_bonus_hold(os, ZVOL_OBJ, zvol_tag, &zv->zv_dbuf); 840219089Spjd if (error) { 841219089Spjd dmu_objset_disown(os, zvol_tag); 842219089Spjd return (error); 843219089Spjd } 844277483Ssmh 845277483Ssmh zvol_size_changed(zv, volsize); 846219089Spjd zv->zv_zilog = zil_open(os, zvol_get_data); 847168404Spjd 848219089Spjd VERIFY(dsl_prop_get_integer(zv->zv_name, "readonly", &readonly, 849219089Spjd NULL) == 0); 850219089Spjd if (readonly || dmu_objset_is_snapshot(os) || 851219089Spjd !spa_writeable(dmu_objset_spa(os))) 852219089Spjd zv->zv_flags |= ZVOL_RDONLY; 853219089Spjd else 854219089Spjd zv->zv_flags &= ~ZVOL_RDONLY; 855219089Spjd return (error); 856219089Spjd} 857168404Spjd 858219089Spjdvoid 859219089Spjdzvol_last_close(zvol_state_t *zv) 860219089Spjd{ 861168404Spjd zil_close(zv->zv_zilog); 862168404Spjd zv->zv_zilog = NULL; 863239774Smm 864219089Spjd dmu_buf_rele(zv->zv_dbuf, zvol_tag); 865219089Spjd zv->zv_dbuf = NULL; 866239774Smm 867239774Smm /* 868239774Smm * Evict cached data 869239774Smm */ 870239774Smm if (dsl_dataset_is_dirty(dmu_objset_ds(zv->zv_objset)) && 871239774Smm !(zv->zv_flags & ZVOL_RDONLY)) 872239774Smm txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0); 873248571Smm dmu_objset_evict_dbufs(zv->zv_objset); 874239774Smm 875219089Spjd dmu_objset_disown(zv->zv_objset, zvol_tag); 876168404Spjd zv->zv_objset = NULL; 877168404Spjd} 878168404Spjd 879277483Ssmh#ifdef illumos 880168404Spjdint 881185029Spjdzvol_prealloc(zvol_state_t *zv) 882168404Spjd{ 883185029Spjd objset_t *os = zv->zv_objset; 884168404Spjd dmu_tx_t *tx; 885185029Spjd uint64_t refd, avail, usedobjs, availobjs; 886185029Spjd uint64_t resid = zv->zv_volsize; 887185029Spjd uint64_t off = 0; 888185029Spjd 889185029Spjd /* Check the space usage before attempting to allocate the space */ 890185029Spjd dmu_objset_space(os, &refd, &avail, &usedobjs, &availobjs); 891185029Spjd if (avail < zv->zv_volsize) 892249195Smm return (SET_ERROR(ENOSPC)); 893185029Spjd 894185029Spjd /* Free old extents if they exist */ 895185029Spjd zvol_free_extents(zv); 896185029Spjd 897185029Spjd while (resid != 0) { 898185029Spjd int error; 899276081Sdelphij uint64_t bytes = MIN(resid, SPA_OLD_MAXBLOCKSIZE); 900185029Spjd 901185029Spjd tx = dmu_tx_create(os); 902185029Spjd dmu_tx_hold_write(tx, ZVOL_OBJ, off, bytes); 903185029Spjd error = dmu_tx_assign(tx, TXG_WAIT); 904185029Spjd if (error) { 905185029Spjd dmu_tx_abort(tx); 906185029Spjd (void) dmu_free_long_range(os, ZVOL_OBJ, 0, off); 907185029Spjd return (error); 908185029Spjd } 909219089Spjd dmu_prealloc(os, ZVOL_OBJ, off, bytes, tx); 910185029Spjd dmu_tx_commit(tx); 911185029Spjd off += bytes; 912185029Spjd resid -= bytes; 913185029Spjd } 914185029Spjd txg_wait_synced(dmu_objset_pool(os), 0); 915185029Spjd 916185029Spjd return (0); 917185029Spjd} 918277483Ssmh#endif /* illumos */ 919185029Spjd 920248571Smmstatic int 921219089Spjdzvol_update_volsize(objset_t *os, uint64_t volsize) 922185029Spjd{ 923185029Spjd dmu_tx_t *tx; 924168404Spjd int error; 925185029Spjd 926277483Ssmh ASSERT(MUTEX_HELD(&zfsdev_state_lock)); 927185029Spjd 928219089Spjd tx = dmu_tx_create(os); 929185029Spjd dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); 930269002Sdelphij dmu_tx_mark_netfree(tx); 931185029Spjd error = dmu_tx_assign(tx, TXG_WAIT); 932185029Spjd if (error) { 933185029Spjd dmu_tx_abort(tx); 934185029Spjd return (error); 935185029Spjd } 936185029Spjd 937219089Spjd error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1, 938185029Spjd &volsize, tx); 939185029Spjd dmu_tx_commit(tx); 940185029Spjd 941185029Spjd if (error == 0) 942219089Spjd error = dmu_free_long_range(os, 943185029Spjd ZVOL_OBJ, volsize, DMU_OBJECT_END); 944219089Spjd return (error); 945219089Spjd} 946185029Spjd 947219089Spjdvoid 948219089Spjdzvol_remove_minors(const char *name) 949219089Spjd{ 950277483Ssmh#ifdef illumos 951277483Ssmh zvol_state_t *zv; 952277483Ssmh char *namebuf; 953277483Ssmh minor_t minor; 954277483Ssmh 955277483Ssmh namebuf = kmem_zalloc(strlen(name) + 2, KM_SLEEP); 956277483Ssmh (void) strncpy(namebuf, name, strlen(name)); 957277483Ssmh (void) strcat(namebuf, "/"); 958277483Ssmh mutex_enter(&zfsdev_state_lock); 959277483Ssmh for (minor = 1; minor <= ZFSDEV_MAX_MINOR; minor++) { 960277483Ssmh 961277483Ssmh zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); 962277483Ssmh if (zv == NULL) 963277483Ssmh continue; 964277483Ssmh if (strncmp(namebuf, zv->zv_name, strlen(namebuf)) == 0) 965277483Ssmh (void) zvol_remove_zv(zv); 966277483Ssmh } 967277483Ssmh kmem_free(namebuf, strlen(name) + 2); 968277483Ssmh 969277483Ssmh mutex_exit(&zfsdev_state_lock); 970277483Ssmh#else /* !illumos */ 971265678Smav zvol_state_t *zv, *tzv; 972219316Spjd size_t namelen; 973219089Spjd 974219316Spjd namelen = strlen(name); 975219316Spjd 976219089Spjd DROP_GIANT(); 977277483Ssmh mutex_enter(&zfsdev_state_lock); 978219089Spjd 979265678Smav LIST_FOREACH_SAFE(zv, &all_zvols, zv_links, tzv) { 980219316Spjd if (strcmp(zv->zv_name, name) == 0 || 981219316Spjd (strncmp(zv->zv_name, name, namelen) == 0 && 982272883Ssmh strlen(zv->zv_name) > namelen && (zv->zv_name[namelen] == '/' || 983272883Ssmh zv->zv_name[namelen] == '@'))) { 984219089Spjd (void) zvol_remove_zv(zv); 985219316Spjd } 986185029Spjd } 987219089Spjd 988277483Ssmh mutex_exit(&zfsdev_state_lock); 989219089Spjd PICKUP_GIANT(); 990277483Ssmh#endif /* illumos */ 991185029Spjd} 992185029Spjd 993277483Ssmhstatic int 994277483Ssmhzvol_update_live_volsize(zvol_state_t *zv, uint64_t volsize) 995185029Spjd{ 996185029Spjd uint64_t old_volsize = 0ULL; 997277483Ssmh int error = 0; 998168404Spjd 999277483Ssmh ASSERT(MUTEX_HELD(&zfsdev_state_lock)); 1000168404Spjd 1001185029Spjd /* 1002185029Spjd * Reinitialize the dump area to the new size. If we 1003219089Spjd * failed to resize the dump area then restore it back to 1004277483Ssmh * its original size. We must set the new volsize prior 1005277483Ssmh * to calling dumpvp_resize() to ensure that the devices' 1006277483Ssmh * size(9P) is not visible by the dump subsystem. 1007185029Spjd */ 1008277483Ssmh old_volsize = zv->zv_volsize; 1009277483Ssmh zvol_size_changed(zv, volsize); 1010277483Ssmh 1011219089Spjd#ifdef ZVOL_DUMP 1012277483Ssmh if (zv->zv_flags & ZVOL_DUMPIFIED) { 1013277483Ssmh if ((error = zvol_dumpify(zv)) != 0 || 1014277483Ssmh (error = dumpvp_resize()) != 0) { 1015277483Ssmh int dumpify_error; 1016277483Ssmh 1017277483Ssmh (void) zvol_update_volsize(zv->zv_objset, old_volsize); 1018277483Ssmh zvol_size_changed(zv, old_volsize); 1019277483Ssmh dumpify_error = zvol_dumpify(zv); 1020277483Ssmh error = dumpify_error ? dumpify_error : error; 1021185029Spjd } 1022277483Ssmh } 1023219089Spjd#endif /* ZVOL_DUMP */ 1024168404Spjd 1025277483Ssmh#ifdef illumos 1026219089Spjd /* 1027219089Spjd * Generate a LUN expansion event. 1028219089Spjd */ 1029277483Ssmh if (error == 0) { 1030219089Spjd sysevent_id_t eid; 1031219089Spjd nvlist_t *attr; 1032219089Spjd char *physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1033219089Spjd 1034219089Spjd (void) snprintf(physpath, MAXPATHLEN, "%s%u", ZVOL_PSEUDO_DEV, 1035219089Spjd zv->zv_minor); 1036219089Spjd 1037219089Spjd VERIFY(nvlist_alloc(&attr, NV_UNIQUE_NAME, KM_SLEEP) == 0); 1038219089Spjd VERIFY(nvlist_add_string(attr, DEV_PHYS_PATH, physpath) == 0); 1039219089Spjd 1040219089Spjd (void) ddi_log_sysevent(zfs_dip, SUNW_VENDOR, EC_DEV_STATUS, 1041219089Spjd ESC_DEV_DLE, attr, &eid, DDI_SLEEP); 1042219089Spjd 1043219089Spjd nvlist_free(attr); 1044219089Spjd kmem_free(physpath, MAXPATHLEN); 1045219089Spjd } 1046277483Ssmh#endif /* illumos */ 1047277483Ssmh return (error); 1048277483Ssmh} 1049219089Spjd 1050277483Ssmhint 1051277483Ssmhzvol_set_volsize(const char *name, uint64_t volsize) 1052277483Ssmh{ 1053277483Ssmh zvol_state_t *zv = NULL; 1054277483Ssmh objset_t *os; 1055277483Ssmh int error; 1056277483Ssmh dmu_object_info_t doi; 1057277483Ssmh uint64_t readonly; 1058277483Ssmh boolean_t owned = B_FALSE; 1059168404Spjd 1060277483Ssmh error = dsl_prop_get_integer(name, 1061277483Ssmh zfs_prop_to_name(ZFS_PROP_READONLY), &readonly, NULL); 1062277483Ssmh if (error != 0) 1063277483Ssmh return (error); 1064277483Ssmh if (readonly) 1065277483Ssmh return (SET_ERROR(EROFS)); 1066168404Spjd 1067277483Ssmh mutex_enter(&zfsdev_state_lock); 1068277483Ssmh zv = zvol_minor_lookup(name); 1069277483Ssmh 1070277483Ssmh if (zv == NULL || zv->zv_objset == NULL) { 1071277483Ssmh if ((error = dmu_objset_own(name, DMU_OST_ZVOL, B_FALSE, 1072277483Ssmh FTAG, &os)) != 0) { 1073277483Ssmh mutex_exit(&zfsdev_state_lock); 1074277483Ssmh return (error); 1075277483Ssmh } 1076277483Ssmh owned = B_TRUE; 1077277483Ssmh if (zv != NULL) 1078277483Ssmh zv->zv_objset = os; 1079277483Ssmh } else { 1080277483Ssmh os = zv->zv_objset; 1081277483Ssmh } 1082277483Ssmh 1083277483Ssmh if ((error = dmu_object_info(os, ZVOL_OBJ, &doi)) != 0 || 1084277483Ssmh (error = zvol_check_volsize(volsize, doi.doi_data_block_size)) != 0) 1085277483Ssmh goto out; 1086277483Ssmh 1087277483Ssmh error = zvol_update_volsize(os, volsize); 1088277483Ssmh 1089277483Ssmh if (error == 0 && zv != NULL) 1090277483Ssmh error = zvol_update_live_volsize(zv, volsize); 1091277483Ssmhout: 1092277483Ssmh if (owned) { 1093277483Ssmh dmu_objset_disown(os, FTAG); 1094277483Ssmh if (zv != NULL) 1095277483Ssmh zv->zv_objset = NULL; 1096277483Ssmh } 1097277483Ssmh mutex_exit(&zfsdev_state_lock); 1098168404Spjd return (error); 1099168404Spjd} 1100168404Spjd 1101219089Spjd/*ARGSUSED*/ 1102277483Ssmh#ifdef illumos 1103277483Ssmhint 1104277483Ssmhzvol_open(dev_t *devp, int flag, int otyp, cred_t *cr) 1105277483Ssmh#else 1106219089Spjdstatic int 1107219089Spjdzvol_open(struct g_provider *pp, int flag, int count) 1108277483Ssmh#endif 1109168404Spjd{ 1110168404Spjd zvol_state_t *zv; 1111219089Spjd int err = 0; 1112277483Ssmh#ifdef illumos 1113277483Ssmh 1114277483Ssmh mutex_enter(&zfsdev_state_lock); 1115277483Ssmh 1116277483Ssmh zv = zfsdev_get_soft_state(getminor(*devp), ZSST_ZVOL); 1117277483Ssmh if (zv == NULL) { 1118277483Ssmh mutex_exit(&zfsdev_state_lock); 1119277483Ssmh return (SET_ERROR(ENXIO)); 1120277483Ssmh } 1121277483Ssmh 1122277483Ssmh if (zv->zv_total_opens == 0) 1123277483Ssmh err = zvol_first_open(zv); 1124277483Ssmh if (err) { 1125277483Ssmh mutex_exit(&zfsdev_state_lock); 1126277483Ssmh return (err); 1127277483Ssmh } 1128277483Ssmh#else /* !illumos */ 1129308057Smav if (tsd_get(zfs_geom_probe_vdev_key) != NULL) { 1130308057Smav /* 1131308057Smav * if zfs_geom_probe_vdev_key is set, that means that zfs is 1132308057Smav * attempting to probe geom providers while looking for a 1133308057Smav * replacement for a missing VDEV. In this case, the 1134308057Smav * spa_namespace_lock will not be held, but it is still illegal 1135308057Smav * to use a zvol as a vdev. Deadlocks can result if another 1136308057Smav * thread has spa_namespace_lock 1137308057Smav */ 1138308057Smav return (EOPNOTSUPP); 1139227110Spjd } 1140227110Spjd 1141308057Smav mutex_enter(&zfsdev_state_lock); 1142308057Smav 1143219089Spjd zv = pp->private; 1144219089Spjd if (zv == NULL) { 1145308057Smav mutex_exit(&zfsdev_state_lock); 1146249195Smm return (SET_ERROR(ENXIO)); 1147168404Spjd } 1148219089Spjd 1149263987Smav if (zv->zv_total_opens == 0) { 1150219089Spjd err = zvol_first_open(zv); 1151263987Smav if (err) { 1152308057Smav mutex_exit(&zfsdev_state_lock); 1153263987Smav return (err); 1154263987Smav } 1155263987Smav pp->mediasize = zv->zv_volsize; 1156263987Smav pp->stripeoffset = 0; 1157263987Smav pp->stripesize = zv->zv_volblocksize; 1158168404Spjd } 1159277483Ssmh#endif /* illumos */ 1160219089Spjd if ((flag & FWRITE) && (zv->zv_flags & ZVOL_RDONLY)) { 1161249195Smm err = SET_ERROR(EROFS); 1162219089Spjd goto out; 1163219089Spjd } 1164219089Spjd if (zv->zv_flags & ZVOL_EXCL) { 1165249195Smm err = SET_ERROR(EBUSY); 1166219089Spjd goto out; 1167219089Spjd } 1168219089Spjd#ifdef FEXCL 1169219089Spjd if (flag & FEXCL) { 1170219089Spjd if (zv->zv_total_opens != 0) { 1171249195Smm err = SET_ERROR(EBUSY); 1172219089Spjd goto out; 1173219089Spjd } 1174219089Spjd zv->zv_flags |= ZVOL_EXCL; 1175219089Spjd } 1176219089Spjd#endif 1177168404Spjd 1178277483Ssmh#ifdef illumos 1179277483Ssmh if (zv->zv_open_count[otyp] == 0 || otyp == OTYP_LYR) { 1180277483Ssmh zv->zv_open_count[otyp]++; 1181277483Ssmh zv->zv_total_opens++; 1182277483Ssmh } 1183277483Ssmh mutex_exit(&zfsdev_state_lock); 1184277483Ssmh#else 1185219089Spjd zv->zv_total_opens += count; 1186308057Smav mutex_exit(&zfsdev_state_lock); 1187277483Ssmh#endif 1188219089Spjd 1189219089Spjd return (err); 1190219089Spjdout: 1191219089Spjd if (zv->zv_total_opens == 0) 1192219089Spjd zvol_last_close(zv); 1193277483Ssmh#ifdef illumos 1194277483Ssmh mutex_exit(&zfsdev_state_lock); 1195277483Ssmh#else 1196308057Smav mutex_exit(&zfsdev_state_lock); 1197277483Ssmh#endif 1198219089Spjd return (err); 1199219089Spjd} 1200219089Spjd 1201219089Spjd/*ARGSUSED*/ 1202277483Ssmh#ifdef illumos 1203277483Ssmhint 1204277483Ssmhzvol_close(dev_t dev, int flag, int otyp, cred_t *cr) 1205277483Ssmh{ 1206277483Ssmh minor_t minor = getminor(dev); 1207277483Ssmh zvol_state_t *zv; 1208277483Ssmh int error = 0; 1209277483Ssmh 1210277483Ssmh mutex_enter(&zfsdev_state_lock); 1211277483Ssmh 1212277483Ssmh zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); 1213277483Ssmh if (zv == NULL) { 1214277483Ssmh mutex_exit(&zfsdev_state_lock); 1215277483Ssmh#else /* !illumos */ 1216219089Spjdstatic int 1217219089Spjdzvol_close(struct g_provider *pp, int flag, int count) 1218219089Spjd{ 1219219089Spjd zvol_state_t *zv; 1220219089Spjd int error = 0; 1221240831Savg boolean_t locked = B_FALSE; 1222219089Spjd 1223240831Savg /* See comment in zvol_open(). */ 1224277483Ssmh if (!MUTEX_HELD(&zfsdev_state_lock)) { 1225277483Ssmh mutex_enter(&zfsdev_state_lock); 1226240831Savg locked = B_TRUE; 1227240831Savg } 1228219089Spjd 1229219089Spjd zv = pp->private; 1230219089Spjd if (zv == NULL) { 1231240831Savg if (locked) 1232277483Ssmh mutex_exit(&zfsdev_state_lock); 1233277483Ssmh#endif /* illumos */ 1234249195Smm return (SET_ERROR(ENXIO)); 1235168404Spjd } 1236168404Spjd 1237219089Spjd if (zv->zv_flags & ZVOL_EXCL) { 1238219089Spjd ASSERT(zv->zv_total_opens == 1); 1239219089Spjd zv->zv_flags &= ~ZVOL_EXCL; 1240219089Spjd } 1241219089Spjd 1242219089Spjd /* 1243219089Spjd * If the open count is zero, this is a spurious close. 1244219089Spjd * That indicates a bug in the kernel / DDI framework. 1245219089Spjd */ 1246277483Ssmh#ifdef illumos 1247277483Ssmh ASSERT(zv->zv_open_count[otyp] != 0); 1248277483Ssmh#endif 1249219089Spjd ASSERT(zv->zv_total_opens != 0); 1250219089Spjd 1251219089Spjd /* 1252219089Spjd * You may get multiple opens, but only one close. 1253219089Spjd */ 1254277483Ssmh#ifdef illumos 1255277483Ssmh zv->zv_open_count[otyp]--; 1256277483Ssmh zv->zv_total_opens--; 1257277483Ssmh#else 1258219089Spjd zv->zv_total_opens -= count; 1259277483Ssmh#endif 1260219089Spjd 1261219089Spjd if (zv->zv_total_opens == 0) 1262219089Spjd zvol_last_close(zv); 1263219089Spjd 1264277483Ssmh#ifdef illumos 1265277483Ssmh mutex_exit(&zfsdev_state_lock); 1266277483Ssmh#else 1267240831Savg if (locked) 1268277483Ssmh mutex_exit(&zfsdev_state_lock); 1269277483Ssmh#endif 1270168404Spjd return (error); 1271168404Spjd} 1272168404Spjd 1273219089Spjdstatic void 1274219089Spjdzvol_get_done(zgd_t *zgd, int error) 1275168404Spjd{ 1276219089Spjd if (zgd->zgd_db) 1277219089Spjd dmu_buf_rele(zgd->zgd_db, zgd); 1278168404Spjd 1279219089Spjd zfs_range_unlock(zgd->zgd_rl); 1280219089Spjd 1281219089Spjd if (error == 0 && zgd->zgd_bp) 1282219089Spjd zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); 1283219089Spjd 1284168404Spjd kmem_free(zgd, sizeof (zgd_t)); 1285168404Spjd} 1286168404Spjd 1287168404Spjd/* 1288168404Spjd * Get data to generate a TX_WRITE intent log record. 1289168404Spjd */ 1290168404Spjdstatic int 1291168404Spjdzvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) 1292168404Spjd{ 1293168404Spjd zvol_state_t *zv = arg; 1294168404Spjd objset_t *os = zv->zv_objset; 1295219089Spjd uint64_t object = ZVOL_OBJ; 1296219089Spjd uint64_t offset = lr->lr_offset; 1297219089Spjd uint64_t size = lr->lr_length; /* length of user data */ 1298219089Spjd blkptr_t *bp = &lr->lr_blkptr; 1299168404Spjd dmu_buf_t *db; 1300168404Spjd zgd_t *zgd; 1301168404Spjd int error; 1302168404Spjd 1303219089Spjd ASSERT(zio != NULL); 1304219089Spjd ASSERT(size != 0); 1305168404Spjd 1306219089Spjd zgd = kmem_zalloc(sizeof (zgd_t), KM_SLEEP); 1307219089Spjd zgd->zgd_zilog = zv->zv_zilog; 1308219089Spjd 1309168404Spjd /* 1310168404Spjd * Write records come in two flavors: immediate and indirect. 1311168404Spjd * For small writes it's cheaper to store the data with the 1312168404Spjd * log record (immediate); for large writes it's cheaper to 1313168404Spjd * sync the data and get a pointer to it (indirect) so that 1314168404Spjd * we don't have to write the data twice. 1315168404Spjd */ 1316324204Savg if (buf != NULL) { /* immediate write */ 1317324204Savg zgd->zgd_rl = zfs_range_lock(&zv->zv_znode, offset, size, 1318324204Savg RL_READER); 1319219089Spjd error = dmu_read(os, object, offset, size, buf, 1320219089Spjd DMU_READ_NO_PREFETCH); 1321324204Savg } else { /* indirect write */ 1322324204Savg /* 1323324204Savg * Have to lock the whole block to ensure when it's written out 1324324204Savg * and its checksum is being calculated that no one can change 1325324204Savg * the data. Contrarily to zfs_get_data we need not re-check 1326324204Savg * blocksize after we get the lock because it cannot be changed. 1327324204Savg */ 1328219089Spjd size = zv->zv_volblocksize; 1329219089Spjd offset = P2ALIGN(offset, size); 1330324204Savg zgd->zgd_rl = zfs_range_lock(&zv->zv_znode, offset, size, 1331324204Savg RL_READER); 1332219089Spjd error = dmu_buf_hold(os, object, offset, zgd, &db, 1333219089Spjd DMU_READ_NO_PREFETCH); 1334219089Spjd if (error == 0) { 1335243524Smm blkptr_t *obp = dmu_buf_get_blkptr(db); 1336243524Smm if (obp) { 1337243524Smm ASSERT(BP_IS_HOLE(bp)); 1338243524Smm *bp = *obp; 1339243524Smm } 1340243524Smm 1341219089Spjd zgd->zgd_db = db; 1342219089Spjd zgd->zgd_bp = bp; 1343168404Spjd 1344219089Spjd ASSERT(db->db_offset == offset); 1345219089Spjd ASSERT(db->db_size == size); 1346168404Spjd 1347219089Spjd error = dmu_sync(zio, lr->lr_common.lrc_txg, 1348219089Spjd zvol_get_done, zgd); 1349168404Spjd 1350219089Spjd if (error == 0) 1351219089Spjd return (0); 1352219089Spjd } 1353219089Spjd } 1354209962Smm 1355219089Spjd zvol_get_done(zgd, error); 1356219089Spjd 1357219089Spjd return (error); 1358219089Spjd} 1359219089Spjd 1360219089Spjd/* 1361219089Spjd * zvol_log_write() handles synchronous writes using TX_WRITE ZIL transactions. 1362219089Spjd * 1363219089Spjd * We store data in the log buffers if it's small enough. 1364219089Spjd * Otherwise we will later flush the data out via dmu_sync(). 1365219089Spjd */ 1366219089Spjdssize_t zvol_immediate_write_sz = 32768; 1367219089Spjd 1368219089Spjdstatic void 1369219089Spjdzvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t resid, 1370219089Spjd boolean_t sync) 1371219089Spjd{ 1372219089Spjd uint32_t blocksize = zv->zv_volblocksize; 1373219089Spjd zilog_t *zilog = zv->zv_zilog; 1374320496Savg itx_wr_state_t write_state; 1375219089Spjd 1376219089Spjd if (zil_replaying(zilog, tx)) 1377219089Spjd return; 1378219089Spjd 1379320496Savg if (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT) 1380320496Savg write_state = WR_INDIRECT; 1381320496Savg else if (!spa_has_slogs(zilog->zl_spa) && 1382320496Savg resid >= blocksize && blocksize > zvol_immediate_write_sz) 1383320496Savg write_state = WR_INDIRECT; 1384320496Savg else if (sync) 1385320496Savg write_state = WR_COPIED; 1386320496Savg else 1387320496Savg write_state = WR_NEED_COPY; 1388219089Spjd 1389219089Spjd while (resid) { 1390219089Spjd itx_t *itx; 1391219089Spjd lr_write_t *lr; 1392320496Savg itx_wr_state_t wr_state = write_state; 1393320496Savg ssize_t len = resid; 1394219089Spjd 1395320496Savg if (wr_state == WR_COPIED && resid > ZIL_MAX_COPIED_DATA) 1396320496Savg wr_state = WR_NEED_COPY; 1397320496Savg else if (wr_state == WR_INDIRECT) 1398320496Savg len = MIN(blocksize - P2PHASE(off, blocksize), resid); 1399219089Spjd 1400219089Spjd itx = zil_itx_create(TX_WRITE, sizeof (*lr) + 1401320496Savg (wr_state == WR_COPIED ? len : 0)); 1402219089Spjd lr = (lr_write_t *)&itx->itx_lr; 1403320496Savg if (wr_state == WR_COPIED && dmu_read(zv->zv_objset, 1404219089Spjd ZVOL_OBJ, off, len, lr + 1, DMU_READ_NO_PREFETCH) != 0) { 1405219089Spjd zil_itx_destroy(itx); 1406219089Spjd itx = zil_itx_create(TX_WRITE, sizeof (*lr)); 1407219089Spjd lr = (lr_write_t *)&itx->itx_lr; 1408320496Savg wr_state = WR_NEED_COPY; 1409219089Spjd } 1410219089Spjd 1411320496Savg itx->itx_wr_state = wr_state; 1412219089Spjd lr->lr_foid = ZVOL_OBJ; 1413219089Spjd lr->lr_offset = off; 1414219089Spjd lr->lr_length = len; 1415219089Spjd lr->lr_blkoff = 0; 1416219089Spjd BP_ZERO(&lr->lr_blkptr); 1417219089Spjd 1418219089Spjd itx->itx_private = zv; 1419219089Spjd 1420308596Smav if (!sync && (zv->zv_sync_cnt == 0)) 1421308596Smav itx->itx_sync = B_FALSE; 1422308596Smav 1423219089Spjd zil_itx_assign(zilog, itx, tx); 1424219089Spjd 1425219089Spjd off += len; 1426219089Spjd resid -= len; 1427209962Smm } 1428219089Spjd} 1429209962Smm 1430277483Ssmh#ifdef illumos 1431219089Spjdstatic int 1432255750Sdelphijzvol_dumpio_vdev(vdev_t *vd, void *addr, uint64_t offset, uint64_t origoffset, 1433255750Sdelphij uint64_t size, boolean_t doread, boolean_t isdump) 1434219089Spjd{ 1435219089Spjd vdev_disk_t *dvd; 1436219089Spjd int c; 1437219089Spjd int numerrors = 0; 1438219089Spjd 1439255750Sdelphij if (vd->vdev_ops == &vdev_mirror_ops || 1440255750Sdelphij vd->vdev_ops == &vdev_replacing_ops || 1441255750Sdelphij vd->vdev_ops == &vdev_spare_ops) { 1442255750Sdelphij for (c = 0; c < vd->vdev_children; c++) { 1443255750Sdelphij int err = zvol_dumpio_vdev(vd->vdev_child[c], 1444255750Sdelphij addr, offset, origoffset, size, doread, isdump); 1445255750Sdelphij if (err != 0) { 1446255750Sdelphij numerrors++; 1447255750Sdelphij } else if (doread) { 1448255750Sdelphij break; 1449255750Sdelphij } 1450219089Spjd } 1451219089Spjd } 1452219089Spjd 1453255750Sdelphij if (!vd->vdev_ops->vdev_op_leaf && vd->vdev_ops != &vdev_raidz_ops) 1454219089Spjd return (numerrors < vd->vdev_children ? 0 : EIO); 1455219089Spjd 1456219089Spjd if (doread && !vdev_readable(vd)) 1457249195Smm return (SET_ERROR(EIO)); 1458219089Spjd else if (!doread && !vdev_writeable(vd)) 1459249195Smm return (SET_ERROR(EIO)); 1460219089Spjd 1461255750Sdelphij if (vd->vdev_ops == &vdev_raidz_ops) { 1462255750Sdelphij return (vdev_raidz_physio(vd, 1463255750Sdelphij addr, size, offset, origoffset, doread, isdump)); 1464255750Sdelphij } 1465255750Sdelphij 1466219089Spjd offset += VDEV_LABEL_START_SIZE; 1467219089Spjd 1468219089Spjd if (ddi_in_panic() || isdump) { 1469219089Spjd ASSERT(!doread); 1470219089Spjd if (doread) 1471249195Smm return (SET_ERROR(EIO)); 1472255750Sdelphij dvd = vd->vdev_tsd; 1473255750Sdelphij ASSERT3P(dvd, !=, NULL); 1474219089Spjd return (ldi_dump(dvd->vd_lh, addr, lbtodb(offset), 1475219089Spjd lbtodb(size))); 1476219089Spjd } else { 1477255750Sdelphij dvd = vd->vdev_tsd; 1478255750Sdelphij ASSERT3P(dvd, !=, NULL); 1479255750Sdelphij return (vdev_disk_ldi_physio(dvd->vd_lh, addr, size, 1480255750Sdelphij offset, doread ? B_READ : B_WRITE)); 1481219089Spjd } 1482219089Spjd} 1483219089Spjd 1484219089Spjdstatic int 1485219089Spjdzvol_dumpio(zvol_state_t *zv, void *addr, uint64_t offset, uint64_t size, 1486219089Spjd boolean_t doread, boolean_t isdump) 1487219089Spjd{ 1488219089Spjd vdev_t *vd; 1489219089Spjd int error; 1490219089Spjd zvol_extent_t *ze; 1491219089Spjd spa_t *spa = dmu_objset_spa(zv->zv_objset); 1492219089Spjd 1493219089Spjd /* Must be sector aligned, and not stradle a block boundary. */ 1494219089Spjd if (P2PHASE(offset, DEV_BSIZE) || P2PHASE(size, DEV_BSIZE) || 1495219089Spjd P2BOUNDARY(offset, size, zv->zv_volblocksize)) { 1496249195Smm return (SET_ERROR(EINVAL)); 1497219089Spjd } 1498219089Spjd ASSERT(size <= zv->zv_volblocksize); 1499219089Spjd 1500219089Spjd /* Locate the extent this belongs to */ 1501219089Spjd ze = list_head(&zv->zv_extents); 1502219089Spjd while (offset >= ze->ze_nblks * zv->zv_volblocksize) { 1503219089Spjd offset -= ze->ze_nblks * zv->zv_volblocksize; 1504219089Spjd ze = list_next(&zv->zv_extents, ze); 1505219089Spjd } 1506219089Spjd 1507248571Smm if (ze == NULL) 1508249195Smm return (SET_ERROR(EINVAL)); 1509248571Smm 1510219089Spjd if (!ddi_in_panic()) 1511219089Spjd spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 1512219089Spjd 1513219089Spjd vd = vdev_lookup_top(spa, DVA_GET_VDEV(&ze->ze_dva)); 1514219089Spjd offset += DVA_GET_OFFSET(&ze->ze_dva); 1515255750Sdelphij error = zvol_dumpio_vdev(vd, addr, offset, DVA_GET_OFFSET(&ze->ze_dva), 1516255750Sdelphij size, doread, isdump); 1517219089Spjd 1518219089Spjd if (!ddi_in_panic()) 1519219089Spjd spa_config_exit(spa, SCL_STATE, FTAG); 1520219089Spjd 1521219089Spjd return (error); 1522219089Spjd} 1523219089Spjd 1524277483Ssmhint 1525277483Ssmhzvol_strategy(buf_t *bp) 1526277483Ssmh{ 1527277483Ssmh zfs_soft_state_t *zs = NULL; 1528277483Ssmh#else /* !illumos */ 1529265678Smavvoid 1530219089Spjdzvol_strategy(struct bio *bp) 1531219089Spjd{ 1532277483Ssmh#endif /* illumos */ 1533265678Smav zvol_state_t *zv; 1534219089Spjd uint64_t off, volsize; 1535219089Spjd size_t resid; 1536219089Spjd char *addr; 1537219089Spjd objset_t *os; 1538219089Spjd rl_t *rl; 1539219089Spjd int error = 0; 1540277483Ssmh#ifdef illumos 1541277483Ssmh boolean_t doread = bp->b_flags & B_READ; 1542277483Ssmh#else 1543265678Smav boolean_t doread = 0; 1544277483Ssmh#endif 1545255750Sdelphij boolean_t is_dumpified; 1546219089Spjd boolean_t sync; 1547219089Spjd 1548277483Ssmh#ifdef illumos 1549277483Ssmh if (getminor(bp->b_edev) == 0) { 1550277483Ssmh error = SET_ERROR(EINVAL); 1551277483Ssmh } else { 1552277483Ssmh zs = ddi_get_soft_state(zfsdev_state, getminor(bp->b_edev)); 1553277483Ssmh if (zs == NULL) 1554277483Ssmh error = SET_ERROR(ENXIO); 1555277483Ssmh else if (zs->zss_type != ZSST_ZVOL) 1556277483Ssmh error = SET_ERROR(EINVAL); 1557277483Ssmh } 1558277483Ssmh 1559277483Ssmh if (error) { 1560277483Ssmh bioerror(bp, error); 1561277483Ssmh biodone(bp); 1562277483Ssmh return (0); 1563277483Ssmh } 1564277483Ssmh 1565277483Ssmh zv = zs->zss_data; 1566277483Ssmh 1567277483Ssmh if (!(bp->b_flags & B_READ) && (zv->zv_flags & ZVOL_RDONLY)) { 1568277483Ssmh bioerror(bp, EROFS); 1569277483Ssmh biodone(bp); 1570277483Ssmh return (0); 1571277483Ssmh } 1572277483Ssmh 1573277483Ssmh off = ldbtob(bp->b_blkno); 1574277483Ssmh#else /* !illumos */ 1575265678Smav if (bp->bio_to) 1576265678Smav zv = bp->bio_to->private; 1577265678Smav else 1578265678Smav zv = bp->bio_dev->si_drv2; 1579265678Smav 1580219089Spjd if (zv == NULL) { 1581277483Ssmh error = SET_ERROR(ENXIO); 1582265678Smav goto out; 1583219089Spjd } 1584219089Spjd 1585219089Spjd if (bp->bio_cmd != BIO_READ && (zv->zv_flags & ZVOL_RDONLY)) { 1586277483Ssmh error = SET_ERROR(EROFS); 1587265678Smav goto out; 1588219089Spjd } 1589219089Spjd 1590265678Smav switch (bp->bio_cmd) { 1591265678Smav case BIO_FLUSH: 1592265678Smav goto sync; 1593265678Smav case BIO_READ: 1594265678Smav doread = 1; 1595265678Smav case BIO_WRITE: 1596265678Smav case BIO_DELETE: 1597265678Smav break; 1598265678Smav default: 1599265678Smav error = EOPNOTSUPP; 1600265678Smav goto out; 1601265678Smav } 1602265678Smav 1603219089Spjd off = bp->bio_offset; 1604277483Ssmh#endif /* illumos */ 1605219089Spjd volsize = zv->zv_volsize; 1606219089Spjd 1607219089Spjd os = zv->zv_objset; 1608219089Spjd ASSERT(os != NULL); 1609219089Spjd 1610277483Ssmh#ifdef illumos 1611277483Ssmh bp_mapin(bp); 1612277483Ssmh addr = bp->b_un.b_addr; 1613277483Ssmh resid = bp->b_bcount; 1614277483Ssmh 1615277483Ssmh if (resid > 0 && (off < 0 || off >= volsize)) { 1616277483Ssmh bioerror(bp, EIO); 1617277483Ssmh biodone(bp); 1618277483Ssmh return (0); 1619277483Ssmh } 1620277483Ssmh 1621277483Ssmh is_dumpified = zv->zv_flags & ZVOL_DUMPIFIED; 1622277483Ssmh sync = ((!(bp->b_flags & B_ASYNC) && 1623277483Ssmh !(zv->zv_flags & ZVOL_WCE)) || 1624277483Ssmh (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS)) && 1625277483Ssmh !doread && !is_dumpified; 1626277483Ssmh#else /* !illumos */ 1627219089Spjd addr = bp->bio_data; 1628219089Spjd resid = bp->bio_length; 1629219089Spjd 1630219089Spjd if (resid > 0 && (off < 0 || off >= volsize)) { 1631277483Ssmh error = SET_ERROR(EIO); 1632265678Smav goto out; 1633219089Spjd } 1634219089Spjd 1635255750Sdelphij is_dumpified = B_FALSE; 1636277483Ssmh sync = !doread && !is_dumpified && 1637255750Sdelphij zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS; 1638277483Ssmh#endif /* illumos */ 1639219089Spjd 1640168404Spjd /* 1641219089Spjd * There must be no buffer changes when doing a dmu_sync() because 1642219089Spjd * we can't change the data whilst calculating the checksum. 1643168404Spjd */ 1644219089Spjd rl = zfs_range_lock(&zv->zv_znode, off, resid, 1645219089Spjd doread ? RL_READER : RL_WRITER); 1646219089Spjd 1647277483Ssmh#ifndef illumos 1648264732Smav if (bp->bio_cmd == BIO_DELETE) { 1649264732Smav dmu_tx_t *tx = dmu_tx_create(zv->zv_objset); 1650264732Smav error = dmu_tx_assign(tx, TXG_WAIT); 1651264732Smav if (error != 0) { 1652264732Smav dmu_tx_abort(tx); 1653264732Smav } else { 1654308594Smav zvol_log_truncate(zv, tx, off, resid, sync); 1655264732Smav dmu_tx_commit(tx); 1656264732Smav error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, 1657264732Smav off, resid); 1658264732Smav resid = 0; 1659264732Smav } 1660264732Smav goto unlock; 1661264732Smav } 1662277483Ssmh#endif 1663219089Spjd while (resid != 0 && off < volsize) { 1664219089Spjd size_t size = MIN(resid, zvol_maxphys); 1665255750Sdelphij#ifdef illumos 1666255750Sdelphij if (is_dumpified) { 1667255750Sdelphij size = MIN(size, P2END(off, zv->zv_volblocksize) - off); 1668255750Sdelphij error = zvol_dumpio(zv, addr, off, size, 1669255750Sdelphij doread, B_FALSE); 1670255750Sdelphij } else if (doread) { 1671255750Sdelphij#else 1672219089Spjd if (doread) { 1673255750Sdelphij#endif 1674219089Spjd error = dmu_read(os, ZVOL_OBJ, off, size, addr, 1675219089Spjd DMU_READ_PREFETCH); 1676219089Spjd } else { 1677219089Spjd dmu_tx_t *tx = dmu_tx_create(os); 1678219089Spjd dmu_tx_hold_write(tx, ZVOL_OBJ, off, size); 1679219089Spjd error = dmu_tx_assign(tx, TXG_WAIT); 1680219089Spjd if (error) { 1681219089Spjd dmu_tx_abort(tx); 1682219089Spjd } else { 1683219089Spjd dmu_write(os, ZVOL_OBJ, off, size, addr, tx); 1684219089Spjd zvol_log_write(zv, tx, off, size, sync); 1685219089Spjd dmu_tx_commit(tx); 1686219089Spjd } 1687219089Spjd } 1688219089Spjd if (error) { 1689219089Spjd /* convert checksum errors into IO errors */ 1690219089Spjd if (error == ECKSUM) 1691249195Smm error = SET_ERROR(EIO); 1692219089Spjd break; 1693219089Spjd } 1694219089Spjd off += size; 1695219089Spjd addr += size; 1696219089Spjd resid -= size; 1697219089Spjd } 1698277483Ssmh#ifndef illumos 1699264732Smavunlock: 1700277483Ssmh#endif 1701168404Spjd zfs_range_unlock(rl); 1702219089Spjd 1703277483Ssmh#ifdef illumos 1704277483Ssmh if ((bp->b_resid = resid) == bp->b_bcount) 1705277483Ssmh bioerror(bp, off > volsize ? EINVAL : error); 1706277483Ssmh 1707277483Ssmh if (sync) 1708277483Ssmh zil_commit(zv->zv_zilog, ZVOL_OBJ); 1709277483Ssmh biodone(bp); 1710277483Ssmh 1711277483Ssmh return (0); 1712277483Ssmh#else /* !illumos */ 1713219089Spjd bp->bio_completed = bp->bio_length - resid; 1714265678Smav if (bp->bio_completed < bp->bio_length && off > volsize) 1715265678Smav error = EINVAL; 1716219089Spjd 1717265678Smav if (sync) { 1718265678Smavsync: 1719219089Spjd zil_commit(zv->zv_zilog, ZVOL_OBJ); 1720265678Smav } 1721265678Smavout: 1722265678Smav if (bp->bio_to) 1723265678Smav g_io_deliver(bp, error); 1724265678Smav else 1725265678Smav biofinish(bp, NULL, error); 1726277483Ssmh#endif /* illumos */ 1727219089Spjd} 1728219089Spjd 1729277483Ssmh#ifdef illumos 1730219089Spjd/* 1731219089Spjd * Set the buffer count to the zvol maximum transfer. 1732219089Spjd * Using our own routine instead of the default minphys() 1733219089Spjd * means that for larger writes we write bigger buffers on X86 1734219089Spjd * (128K instead of 56K) and flush the disk write cache less often 1735219089Spjd * (every zvol_maxphys - currently 1MB) instead of minphys (currently 1736219089Spjd * 56K on X86 and 128K on sparc). 1737219089Spjd */ 1738219089Spjdvoid 1739219089Spjdzvol_minphys(struct buf *bp) 1740219089Spjd{ 1741219089Spjd if (bp->b_bcount > zvol_maxphys) 1742219089Spjd bp->b_bcount = zvol_maxphys; 1743219089Spjd} 1744219089Spjd 1745219089Spjdint 1746219089Spjdzvol_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblocks) 1747219089Spjd{ 1748219089Spjd minor_t minor = getminor(dev); 1749219089Spjd zvol_state_t *zv; 1750219089Spjd int error = 0; 1751219089Spjd uint64_t size; 1752219089Spjd uint64_t boff; 1753219089Spjd uint64_t resid; 1754219089Spjd 1755219089Spjd zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); 1756219089Spjd if (zv == NULL) 1757249195Smm return (SET_ERROR(ENXIO)); 1758219089Spjd 1759248571Smm if ((zv->zv_flags & ZVOL_DUMPIFIED) == 0) 1760249195Smm return (SET_ERROR(EINVAL)); 1761248571Smm 1762219089Spjd boff = ldbtob(blkno); 1763219089Spjd resid = ldbtob(nblocks); 1764219089Spjd 1765219089Spjd VERIFY3U(boff + resid, <=, zv->zv_volsize); 1766219089Spjd 1767219089Spjd while (resid) { 1768219089Spjd size = MIN(resid, P2END(boff, zv->zv_volblocksize) - boff); 1769219089Spjd error = zvol_dumpio(zv, addr, boff, size, B_FALSE, B_TRUE); 1770219089Spjd if (error) 1771219089Spjd break; 1772219089Spjd boff += size; 1773219089Spjd addr += size; 1774219089Spjd resid -= size; 1775219089Spjd } 1776219089Spjd 1777168404Spjd return (error); 1778168404Spjd} 1779168404Spjd 1780219089Spjd/*ARGSUSED*/ 1781168404Spjdint 1782219089Spjdzvol_read(dev_t dev, uio_t *uio, cred_t *cr) 1783219089Spjd{ 1784219089Spjd minor_t minor = getminor(dev); 1785277483Ssmh#else /* !illumos */ 1786265678Smavint 1787265678Smavzvol_read(struct cdev *dev, struct uio *uio, int ioflag) 1788265678Smav{ 1789277483Ssmh#endif /* illumos */ 1790219089Spjd zvol_state_t *zv; 1791219089Spjd uint64_t volsize; 1792219089Spjd rl_t *rl; 1793219089Spjd int error = 0; 1794219089Spjd 1795277483Ssmh#ifdef illumos 1796219089Spjd zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); 1797219089Spjd if (zv == NULL) 1798249195Smm return (SET_ERROR(ENXIO)); 1799265678Smav#else 1800265678Smav zv = dev->si_drv2; 1801265678Smav#endif 1802219089Spjd 1803219089Spjd volsize = zv->zv_volsize; 1804277483Ssmh /* uio_loffset == volsize isn't an error as its required for EOF processing. */ 1805219089Spjd if (uio->uio_resid > 0 && 1806265678Smav (uio->uio_loffset < 0 || uio->uio_loffset > volsize)) 1807249195Smm return (SET_ERROR(EIO)); 1808219089Spjd 1809265678Smav#ifdef illumos 1810219089Spjd if (zv->zv_flags & ZVOL_DUMPIFIED) { 1811219089Spjd error = physio(zvol_strategy, NULL, dev, B_READ, 1812219089Spjd zvol_minphys, uio); 1813219089Spjd return (error); 1814219089Spjd } 1815265678Smav#endif 1816219089Spjd 1817219089Spjd rl = zfs_range_lock(&zv->zv_znode, uio->uio_loffset, uio->uio_resid, 1818219089Spjd RL_READER); 1819219089Spjd while (uio->uio_resid > 0 && uio->uio_loffset < volsize) { 1820219089Spjd uint64_t bytes = MIN(uio->uio_resid, DMU_MAX_ACCESS >> 1); 1821219089Spjd 1822219089Spjd /* don't read past the end */ 1823219089Spjd if (bytes > volsize - uio->uio_loffset) 1824219089Spjd bytes = volsize - uio->uio_loffset; 1825219089Spjd 1826277699Smav error = dmu_read_uio_dbuf(zv->zv_dbuf, uio, bytes); 1827219089Spjd if (error) { 1828219089Spjd /* convert checksum errors into IO errors */ 1829219089Spjd if (error == ECKSUM) 1830249195Smm error = SET_ERROR(EIO); 1831219089Spjd break; 1832219089Spjd } 1833219089Spjd } 1834219089Spjd zfs_range_unlock(rl); 1835219089Spjd return (error); 1836219089Spjd} 1837219089Spjd 1838277483Ssmh#ifdef illumos 1839219089Spjd/*ARGSUSED*/ 1840219089Spjdint 1841219089Spjdzvol_write(dev_t dev, uio_t *uio, cred_t *cr) 1842219089Spjd{ 1843219089Spjd minor_t minor = getminor(dev); 1844277483Ssmh#else /* !illumos */ 1845265678Smavint 1846265678Smavzvol_write(struct cdev *dev, struct uio *uio, int ioflag) 1847265678Smav{ 1848277483Ssmh#endif /* illumos */ 1849219089Spjd zvol_state_t *zv; 1850219089Spjd uint64_t volsize; 1851219089Spjd rl_t *rl; 1852219089Spjd int error = 0; 1853219089Spjd boolean_t sync; 1854219089Spjd 1855277483Ssmh#ifdef illumos 1856219089Spjd zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); 1857219089Spjd if (zv == NULL) 1858249195Smm return (SET_ERROR(ENXIO)); 1859265678Smav#else 1860265678Smav zv = dev->si_drv2; 1861265678Smav#endif 1862219089Spjd 1863219089Spjd volsize = zv->zv_volsize; 1864277483Ssmh /* uio_loffset == volsize isn't an error as its required for EOF processing. */ 1865219089Spjd if (uio->uio_resid > 0 && 1866265678Smav (uio->uio_loffset < 0 || uio->uio_loffset > volsize)) 1867249195Smm return (SET_ERROR(EIO)); 1868219089Spjd 1869265678Smav#ifdef illumos 1870219089Spjd if (zv->zv_flags & ZVOL_DUMPIFIED) { 1871219089Spjd error = physio(zvol_strategy, NULL, dev, B_WRITE, 1872219089Spjd zvol_minphys, uio); 1873219089Spjd return (error); 1874219089Spjd } 1875219089Spjd 1876219089Spjd sync = !(zv->zv_flags & ZVOL_WCE) || 1877268274Smav#else 1878272615Smav sync = (ioflag & IO_SYNC) || 1879268274Smav#endif 1880219089Spjd (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS); 1881219089Spjd 1882219089Spjd rl = zfs_range_lock(&zv->zv_znode, uio->uio_loffset, uio->uio_resid, 1883219089Spjd RL_WRITER); 1884219089Spjd while (uio->uio_resid > 0 && uio->uio_loffset < volsize) { 1885219089Spjd uint64_t bytes = MIN(uio->uio_resid, DMU_MAX_ACCESS >> 1); 1886219089Spjd uint64_t off = uio->uio_loffset; 1887219089Spjd dmu_tx_t *tx = dmu_tx_create(zv->zv_objset); 1888219089Spjd 1889219089Spjd if (bytes > volsize - off) /* don't write past the end */ 1890219089Spjd bytes = volsize - off; 1891219089Spjd 1892219089Spjd dmu_tx_hold_write(tx, ZVOL_OBJ, off, bytes); 1893219089Spjd error = dmu_tx_assign(tx, TXG_WAIT); 1894219089Spjd if (error) { 1895219089Spjd dmu_tx_abort(tx); 1896219089Spjd break; 1897219089Spjd } 1898219089Spjd error = dmu_write_uio_dbuf(zv->zv_dbuf, uio, bytes, tx); 1899219089Spjd if (error == 0) 1900219089Spjd zvol_log_write(zv, tx, off, bytes, sync); 1901219089Spjd dmu_tx_commit(tx); 1902219089Spjd 1903219089Spjd if (error) 1904219089Spjd break; 1905219089Spjd } 1906219089Spjd zfs_range_unlock(rl); 1907219089Spjd if (sync) 1908219089Spjd zil_commit(zv->zv_zilog, ZVOL_OBJ); 1909219089Spjd return (error); 1910219089Spjd} 1911219089Spjd 1912277483Ssmh#ifdef illumos 1913219089Spjdint 1914219089Spjdzvol_getefi(void *arg, int flag, uint64_t vs, uint8_t bs) 1915219089Spjd{ 1916219089Spjd struct uuid uuid = EFI_RESERVED; 1917219089Spjd efi_gpe_t gpe = { 0 }; 1918219089Spjd uint32_t crc; 1919219089Spjd dk_efi_t efi; 1920219089Spjd int length; 1921219089Spjd char *ptr; 1922219089Spjd 1923219089Spjd if (ddi_copyin(arg, &efi, sizeof (dk_efi_t), flag)) 1924249195Smm return (SET_ERROR(EFAULT)); 1925219089Spjd ptr = (char *)(uintptr_t)efi.dki_data_64; 1926219089Spjd length = efi.dki_length; 1927219089Spjd /* 1928219089Spjd * Some clients may attempt to request a PMBR for the 1929219089Spjd * zvol. Currently this interface will return EINVAL to 1930219089Spjd * such requests. These requests could be supported by 1931219089Spjd * adding a check for lba == 0 and consing up an appropriate 1932219089Spjd * PMBR. 1933219089Spjd */ 1934219089Spjd if (efi.dki_lba < 1 || efi.dki_lba > 2 || length <= 0) 1935249195Smm return (SET_ERROR(EINVAL)); 1936219089Spjd 1937219089Spjd gpe.efi_gpe_StartingLBA = LE_64(34ULL); 1938219089Spjd gpe.efi_gpe_EndingLBA = LE_64((vs >> bs) - 1); 1939219089Spjd UUID_LE_CONVERT(gpe.efi_gpe_PartitionTypeGUID, uuid); 1940219089Spjd 1941219089Spjd if (efi.dki_lba == 1) { 1942219089Spjd efi_gpt_t gpt = { 0 }; 1943219089Spjd 1944219089Spjd gpt.efi_gpt_Signature = LE_64(EFI_SIGNATURE); 1945219089Spjd gpt.efi_gpt_Revision = LE_32(EFI_VERSION_CURRENT); 1946219089Spjd gpt.efi_gpt_HeaderSize = LE_32(sizeof (gpt)); 1947219089Spjd gpt.efi_gpt_MyLBA = LE_64(1ULL); 1948219089Spjd gpt.efi_gpt_FirstUsableLBA = LE_64(34ULL); 1949219089Spjd gpt.efi_gpt_LastUsableLBA = LE_64((vs >> bs) - 1); 1950219089Spjd gpt.efi_gpt_PartitionEntryLBA = LE_64(2ULL); 1951219089Spjd gpt.efi_gpt_NumberOfPartitionEntries = LE_32(1); 1952219089Spjd gpt.efi_gpt_SizeOfPartitionEntry = 1953219089Spjd LE_32(sizeof (efi_gpe_t)); 1954219089Spjd CRC32(crc, &gpe, sizeof (gpe), -1U, crc32_table); 1955219089Spjd gpt.efi_gpt_PartitionEntryArrayCRC32 = LE_32(~crc); 1956219089Spjd CRC32(crc, &gpt, sizeof (gpt), -1U, crc32_table); 1957219089Spjd gpt.efi_gpt_HeaderCRC32 = LE_32(~crc); 1958219089Spjd if (ddi_copyout(&gpt, ptr, MIN(sizeof (gpt), length), 1959219089Spjd flag)) 1960249195Smm return (SET_ERROR(EFAULT)); 1961219089Spjd ptr += sizeof (gpt); 1962219089Spjd length -= sizeof (gpt); 1963219089Spjd } 1964219089Spjd if (length > 0 && ddi_copyout(&gpe, ptr, MIN(sizeof (gpe), 1965219089Spjd length), flag)) 1966249195Smm return (SET_ERROR(EFAULT)); 1967219089Spjd return (0); 1968219089Spjd} 1969219089Spjd 1970219089Spjd/* 1971219089Spjd * BEGIN entry points to allow external callers access to the volume. 1972219089Spjd */ 1973219089Spjd/* 1974219089Spjd * Return the volume parameters needed for access from an external caller. 1975219089Spjd * These values are invariant as long as the volume is held open. 1976219089Spjd */ 1977219089Spjdint 1978219089Spjdzvol_get_volume_params(minor_t minor, uint64_t *blksize, 1979219089Spjd uint64_t *max_xfer_len, void **minor_hdl, void **objset_hdl, void **zil_hdl, 1980219089Spjd void **rl_hdl, void **bonus_hdl) 1981219089Spjd{ 1982219089Spjd zvol_state_t *zv; 1983219089Spjd 1984219089Spjd zv = zfsdev_get_soft_state(minor, ZSST_ZVOL); 1985219089Spjd if (zv == NULL) 1986249195Smm return (SET_ERROR(ENXIO)); 1987219089Spjd if (zv->zv_flags & ZVOL_DUMPIFIED) 1988249195Smm return (SET_ERROR(ENXIO)); 1989219089Spjd 1990219089Spjd ASSERT(blksize && max_xfer_len && minor_hdl && 1991219089Spjd objset_hdl && zil_hdl && rl_hdl && bonus_hdl); 1992219089Spjd 1993219089Spjd *blksize = zv->zv_volblocksize; 1994219089Spjd *max_xfer_len = (uint64_t)zvol_maxphys; 1995219089Spjd *minor_hdl = zv; 1996219089Spjd *objset_hdl = zv->zv_objset; 1997219089Spjd *zil_hdl = zv->zv_zilog; 1998219089Spjd *rl_hdl = &zv->zv_znode; 1999219089Spjd *bonus_hdl = zv->zv_dbuf; 2000219089Spjd return (0); 2001219089Spjd} 2002219089Spjd 2003219089Spjd/* 2004219089Spjd * Return the current volume size to an external caller. 2005219089Spjd * The size can change while the volume is open. 2006219089Spjd */ 2007219089Spjduint64_t 2008219089Spjdzvol_get_volume_size(void *minor_hdl) 2009219089Spjd{ 2010219089Spjd zvol_state_t *zv = minor_hdl; 2011219089Spjd 2012219089Spjd return (zv->zv_volsize); 2013219089Spjd} 2014219089Spjd 2015219089Spjd/* 2016219089Spjd * Return the current WCE setting to an external caller. 2017219089Spjd * The WCE setting can change while the volume is open. 2018219089Spjd */ 2019219089Spjdint 2020219089Spjdzvol_get_volume_wce(void *minor_hdl) 2021219089Spjd{ 2022219089Spjd zvol_state_t *zv = minor_hdl; 2023219089Spjd 2024219089Spjd return ((zv->zv_flags & ZVOL_WCE) ? 1 : 0); 2025219089Spjd} 2026219089Spjd 2027219089Spjd/* 2028219089Spjd * Entry point for external callers to zvol_log_write 2029219089Spjd */ 2030219089Spjdvoid 2031219089Spjdzvol_log_write_minor(void *minor_hdl, dmu_tx_t *tx, offset_t off, ssize_t resid, 2032219089Spjd boolean_t sync) 2033219089Spjd{ 2034219089Spjd zvol_state_t *zv = minor_hdl; 2035219089Spjd 2036219089Spjd zvol_log_write(zv, tx, off, resid, sync); 2037219089Spjd} 2038219089Spjd/* 2039219089Spjd * END entry points to allow external callers access to the volume. 2040219089Spjd */ 2041277483Ssmh#endif /* illumos */ 2042219089Spjd 2043219089Spjd/* 2044264732Smav * Log a DKIOCFREE/free-long-range to the ZIL with TX_TRUNCATE. 2045264732Smav */ 2046264732Smavstatic void 2047264732Smavzvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off, uint64_t len, 2048264732Smav boolean_t sync) 2049264732Smav{ 2050264732Smav itx_t *itx; 2051264732Smav lr_truncate_t *lr; 2052264732Smav zilog_t *zilog = zv->zv_zilog; 2053264732Smav 2054264732Smav if (zil_replaying(zilog, tx)) 2055264732Smav return; 2056264732Smav 2057264732Smav itx = zil_itx_create(TX_TRUNCATE, sizeof (*lr)); 2058264732Smav lr = (lr_truncate_t *)&itx->itx_lr; 2059264732Smav lr->lr_foid = ZVOL_OBJ; 2060264732Smav lr->lr_offset = off; 2061264732Smav lr->lr_length = len; 2062264732Smav 2063308596Smav itx->itx_sync = (sync || zv->zv_sync_cnt != 0); 2064264732Smav zil_itx_assign(zilog, itx, tx); 2065264732Smav} 2066264732Smav 2067277483Ssmh#ifdef illumos 2068264732Smav/* 2069219089Spjd * Dirtbag ioctls to support mkfs(1M) for UFS filesystems. See dkio(7I). 2070264732Smav * Also a dirtbag dkio ioctl for unmap/free-block functionality. 2071219089Spjd */ 2072219089Spjd/*ARGSUSED*/ 2073219089Spjdint 2074219089Spjdzvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp) 2075219089Spjd{ 2076219089Spjd zvol_state_t *zv; 2077219089Spjd struct dk_callback *dkc; 2078219089Spjd int error = 0; 2079219089Spjd rl_t *rl; 2080219089Spjd 2081277483Ssmh mutex_enter(&zfsdev_state_lock); 2082219089Spjd 2083219089Spjd zv = zfsdev_get_soft_state(getminor(dev), ZSST_ZVOL); 2084219089Spjd 2085219089Spjd if (zv == NULL) { 2086277483Ssmh mutex_exit(&zfsdev_state_lock); 2087249195Smm return (SET_ERROR(ENXIO)); 2088219089Spjd } 2089219089Spjd ASSERT(zv->zv_total_opens > 0); 2090219089Spjd 2091219089Spjd switch (cmd) { 2092219089Spjd 2093219089Spjd case DKIOCINFO: 2094265677Smav { 2095265677Smav struct dk_cinfo dki; 2096265677Smav 2097219089Spjd bzero(&dki, sizeof (dki)); 2098219089Spjd (void) strcpy(dki.dki_cname, "zvol"); 2099219089Spjd (void) strcpy(dki.dki_dname, "zvol"); 2100219089Spjd dki.dki_ctype = DKC_UNKNOWN; 2101219089Spjd dki.dki_unit = getminor(dev); 2102276081Sdelphij dki.dki_maxtransfer = 2103276081Sdelphij 1 << (SPA_OLD_MAXBLOCKSHIFT - zv->zv_min_bs); 2104277483Ssmh mutex_exit(&zfsdev_state_lock); 2105219089Spjd if (ddi_copyout(&dki, (void *)arg, sizeof (dki), flag)) 2106249195Smm error = SET_ERROR(EFAULT); 2107219089Spjd return (error); 2108265677Smav } 2109219089Spjd 2110219089Spjd case DKIOCGMEDIAINFO: 2111265677Smav { 2112265677Smav struct dk_minfo dkm; 2113265677Smav 2114219089Spjd bzero(&dkm, sizeof (dkm)); 2115219089Spjd dkm.dki_lbsize = 1U << zv->zv_min_bs; 2116219089Spjd dkm.dki_capacity = zv->zv_volsize >> zv->zv_min_bs; 2117219089Spjd dkm.dki_media_type = DK_UNKNOWN; 2118277483Ssmh mutex_exit(&zfsdev_state_lock); 2119219089Spjd if (ddi_copyout(&dkm, (void *)arg, sizeof (dkm), flag)) 2120249195Smm error = SET_ERROR(EFAULT); 2121219089Spjd return (error); 2122265677Smav } 2123219089Spjd 2124265677Smav case DKIOCGMEDIAINFOEXT: 2125265677Smav { 2126265677Smav struct dk_minfo_ext dkmext; 2127265677Smav 2128265677Smav bzero(&dkmext, sizeof (dkmext)); 2129265677Smav dkmext.dki_lbsize = 1U << zv->zv_min_bs; 2130265677Smav dkmext.dki_pbsize = zv->zv_volblocksize; 2131265677Smav dkmext.dki_capacity = zv->zv_volsize >> zv->zv_min_bs; 2132265677Smav dkmext.dki_media_type = DK_UNKNOWN; 2133277483Ssmh mutex_exit(&zfsdev_state_lock); 2134265677Smav if (ddi_copyout(&dkmext, (void *)arg, sizeof (dkmext), flag)) 2135265677Smav error = SET_ERROR(EFAULT); 2136265677Smav return (error); 2137265677Smav } 2138265677Smav 2139219089Spjd case DKIOCGETEFI: 2140265677Smav { 2141265677Smav uint64_t vs = zv->zv_volsize; 2142265677Smav uint8_t bs = zv->zv_min_bs; 2143219089Spjd 2144277483Ssmh mutex_exit(&zfsdev_state_lock); 2145265677Smav error = zvol_getefi((void *)arg, flag, vs, bs); 2146265677Smav return (error); 2147265677Smav } 2148219089Spjd 2149219089Spjd case DKIOCFLUSHWRITECACHE: 2150219089Spjd dkc = (struct dk_callback *)arg; 2151277483Ssmh mutex_exit(&zfsdev_state_lock); 2152219089Spjd zil_commit(zv->zv_zilog, ZVOL_OBJ); 2153219089Spjd if ((flag & FKIOCTL) && dkc != NULL && dkc->dkc_callback) { 2154219089Spjd (*dkc->dkc_callback)(dkc->dkc_cookie, error); 2155219089Spjd error = 0; 2156219089Spjd } 2157219089Spjd return (error); 2158219089Spjd 2159219089Spjd case DKIOCGETWCE: 2160265677Smav { 2161265677Smav int wce = (zv->zv_flags & ZVOL_WCE) ? 1 : 0; 2162265677Smav if (ddi_copyout(&wce, (void *)arg, sizeof (int), 2163265677Smav flag)) 2164265677Smav error = SET_ERROR(EFAULT); 2165265677Smav break; 2166265677Smav } 2167265677Smav case DKIOCSETWCE: 2168265677Smav { 2169265677Smav int wce; 2170265677Smav if (ddi_copyin((void *)arg, &wce, sizeof (int), 2171265677Smav flag)) { 2172265677Smav error = SET_ERROR(EFAULT); 2173219089Spjd break; 2174219089Spjd } 2175265677Smav if (wce) { 2176265677Smav zv->zv_flags |= ZVOL_WCE; 2177277483Ssmh mutex_exit(&zfsdev_state_lock); 2178265677Smav } else { 2179265677Smav zv->zv_flags &= ~ZVOL_WCE; 2180277483Ssmh mutex_exit(&zfsdev_state_lock); 2181265677Smav zil_commit(zv->zv_zilog, ZVOL_OBJ); 2182219089Spjd } 2183265677Smav return (0); 2184265677Smav } 2185219089Spjd 2186219089Spjd case DKIOCGGEOM: 2187219089Spjd case DKIOCGVTOC: 2188219089Spjd /* 2189219089Spjd * commands using these (like prtvtoc) expect ENOTSUP 2190219089Spjd * since we're emulating an EFI label 2191219089Spjd */ 2192249195Smm error = SET_ERROR(ENOTSUP); 2193219089Spjd break; 2194219089Spjd 2195219089Spjd case DKIOCDUMPINIT: 2196219089Spjd rl = zfs_range_lock(&zv->zv_znode, 0, zv->zv_volsize, 2197219089Spjd RL_WRITER); 2198219089Spjd error = zvol_dumpify(zv); 2199219089Spjd zfs_range_unlock(rl); 2200219089Spjd break; 2201219089Spjd 2202219089Spjd case DKIOCDUMPFINI: 2203219089Spjd if (!(zv->zv_flags & ZVOL_DUMPIFIED)) 2204219089Spjd break; 2205219089Spjd rl = zfs_range_lock(&zv->zv_znode, 0, zv->zv_volsize, 2206219089Spjd RL_WRITER); 2207219089Spjd error = zvol_dump_fini(zv); 2208219089Spjd zfs_range_unlock(rl); 2209219089Spjd break; 2210219089Spjd 2211249195Smm case DKIOCFREE: 2212249195Smm { 2213249195Smm dkioc_free_t df; 2214249195Smm dmu_tx_t *tx; 2215249195Smm 2216273345Sdelphij if (!zvol_unmap_enabled) 2217273345Sdelphij break; 2218273345Sdelphij 2219249195Smm if (ddi_copyin((void *)arg, &df, sizeof (df), flag)) { 2220249195Smm error = SET_ERROR(EFAULT); 2221249195Smm break; 2222249195Smm } 2223249195Smm 2224249195Smm /* 2225249195Smm * Apply Postel's Law to length-checking. If they overshoot, 2226249195Smm * just blank out until the end, if there's a need to blank 2227249195Smm * out anything. 2228249195Smm */ 2229249195Smm if (df.df_start >= zv->zv_volsize) 2230249195Smm break; /* No need to do anything... */ 2231249195Smm 2232277483Ssmh mutex_exit(&zfsdev_state_lock); 2233277482Ssmh 2234249195Smm rl = zfs_range_lock(&zv->zv_znode, df.df_start, df.df_length, 2235249195Smm RL_WRITER); 2236249195Smm tx = dmu_tx_create(zv->zv_objset); 2237269002Sdelphij dmu_tx_mark_netfree(tx); 2238249195Smm error = dmu_tx_assign(tx, TXG_WAIT); 2239249195Smm if (error != 0) { 2240249195Smm dmu_tx_abort(tx); 2241249195Smm } else { 2242249195Smm zvol_log_truncate(zv, tx, df.df_start, 2243249195Smm df.df_length, B_TRUE); 2244249195Smm dmu_tx_commit(tx); 2245249195Smm error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, 2246249195Smm df.df_start, df.df_length); 2247249195Smm } 2248249195Smm 2249249195Smm zfs_range_unlock(rl); 2250249195Smm 2251249195Smm if (error == 0) { 2252249195Smm /* 2253249195Smm * If the write-cache is disabled or 'sync' property 2254249195Smm * is set to 'always' then treat this as a synchronous 2255249195Smm * operation (i.e. commit to zil). 2256249195Smm */ 2257249195Smm if (!(zv->zv_flags & ZVOL_WCE) || 2258249195Smm (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS)) 2259249195Smm zil_commit(zv->zv_zilog, ZVOL_OBJ); 2260249195Smm 2261249195Smm /* 2262249195Smm * If the caller really wants synchronous writes, and 2263249195Smm * can't wait for them, don't return until the write 2264249195Smm * is done. 2265249195Smm */ 2266249195Smm if (df.df_flags & DF_WAIT_SYNC) { 2267249195Smm txg_wait_synced( 2268249195Smm dmu_objset_pool(zv->zv_objset), 0); 2269249195Smm } 2270249195Smm } 2271277482Ssmh return (error); 2272249195Smm } 2273249195Smm 2274219089Spjd default: 2275249195Smm error = SET_ERROR(ENOTTY); 2276219089Spjd break; 2277219089Spjd 2278219089Spjd } 2279277483Ssmh mutex_exit(&zfsdev_state_lock); 2280219089Spjd return (error); 2281219089Spjd} 2282277483Ssmh#endif /* illumos */ 2283219089Spjd 2284219089Spjdint 2285168404Spjdzvol_busy(void) 2286168404Spjd{ 2287168404Spjd return (zvol_minors != 0); 2288168404Spjd} 2289168404Spjd 2290168404Spjdvoid 2291168404Spjdzvol_init(void) 2292168404Spjd{ 2293219089Spjd VERIFY(ddi_soft_state_init(&zfsdev_state, sizeof (zfs_soft_state_t), 2294219089Spjd 1) == 0); 2295277483Ssmh#ifdef illumos 2296277483Ssmh mutex_init(&zfsdev_state_lock, NULL, MUTEX_DEFAULT, NULL); 2297277483Ssmh#else 2298168404Spjd ZFS_LOG(1, "ZVOL Initialized."); 2299277483Ssmh#endif 2300168404Spjd} 2301168404Spjd 2302168404Spjdvoid 2303168404Spjdzvol_fini(void) 2304168404Spjd{ 2305277483Ssmh#ifdef illumos 2306277483Ssmh mutex_destroy(&zfsdev_state_lock); 2307277483Ssmh#endif 2308219089Spjd ddi_soft_state_fini(&zfsdev_state); 2309168404Spjd ZFS_LOG(1, "ZVOL Deinitialized."); 2310168404Spjd} 2311185029Spjd 2312277483Ssmh#ifdef illumos 2313255750Sdelphij/*ARGSUSED*/ 2314185029Spjdstatic int 2315255750Sdelphijzfs_mvdev_dump_feature_check(void *arg, dmu_tx_t *tx) 2316255750Sdelphij{ 2317255750Sdelphij spa_t *spa = dmu_tx_pool(tx)->dp_spa; 2318255750Sdelphij 2319263390Sdelphij if (spa_feature_is_active(spa, SPA_FEATURE_MULTI_VDEV_CRASH_DUMP)) 2320255750Sdelphij return (1); 2321255750Sdelphij return (0); 2322255750Sdelphij} 2323255750Sdelphij 2324255750Sdelphij/*ARGSUSED*/ 2325255750Sdelphijstatic void 2326255750Sdelphijzfs_mvdev_dump_activate_feature_sync(void *arg, dmu_tx_t *tx) 2327255750Sdelphij{ 2328255750Sdelphij spa_t *spa = dmu_tx_pool(tx)->dp_spa; 2329255750Sdelphij 2330263390Sdelphij spa_feature_incr(spa, SPA_FEATURE_MULTI_VDEV_CRASH_DUMP, tx); 2331255750Sdelphij} 2332255750Sdelphij 2333255750Sdelphijstatic int 2334185029Spjdzvol_dump_init(zvol_state_t *zv, boolean_t resize) 2335185029Spjd{ 2336185029Spjd dmu_tx_t *tx; 2337255750Sdelphij int error; 2338185029Spjd objset_t *os = zv->zv_objset; 2339255750Sdelphij spa_t *spa = dmu_objset_spa(os); 2340255750Sdelphij vdev_t *vd = spa->spa_root_vdev; 2341185029Spjd nvlist_t *nv = NULL; 2342255750Sdelphij uint64_t version = spa_version(spa); 2343290746Smav uint64_t checksum, compress, refresrv, vbs, dedup; 2344185029Spjd 2345277483Ssmh ASSERT(MUTEX_HELD(&zfsdev_state_lock)); 2346255750Sdelphij ASSERT(vd->vdev_ops == &vdev_root_ops); 2347255750Sdelphij 2348219089Spjd error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, 0, 2349219089Spjd DMU_OBJECT_END); 2350290746Smav if (error != 0) 2351290746Smav return (error); 2352219089Spjd /* wait for dmu_free_long_range to actually free the blocks */ 2353219089Spjd txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0); 2354185029Spjd 2355255750Sdelphij /* 2356255750Sdelphij * If the pool on which the dump device is being initialized has more 2357255750Sdelphij * than one child vdev, check that the MULTI_VDEV_CRASH_DUMP feature is 2358255750Sdelphij * enabled. If so, bump that feature's counter to indicate that the 2359255750Sdelphij * feature is active. We also check the vdev type to handle the 2360255750Sdelphij * following case: 2361255750Sdelphij * # zpool create test raidz disk1 disk2 disk3 2362255750Sdelphij * Now have spa_root_vdev->vdev_children == 1 (the raidz vdev), 2363255750Sdelphij * the raidz vdev itself has 3 children. 2364255750Sdelphij */ 2365255750Sdelphij if (vd->vdev_children > 1 || vd->vdev_ops == &vdev_raidz_ops) { 2366255750Sdelphij if (!spa_feature_is_enabled(spa, 2367263390Sdelphij SPA_FEATURE_MULTI_VDEV_CRASH_DUMP)) 2368255750Sdelphij return (SET_ERROR(ENOTSUP)); 2369255750Sdelphij (void) dsl_sync_task(spa_name(spa), 2370255750Sdelphij zfs_mvdev_dump_feature_check, 2371269006Sdelphij zfs_mvdev_dump_activate_feature_sync, NULL, 2372269006Sdelphij 2, ZFS_SPACE_CHECK_RESERVED); 2373255750Sdelphij } 2374255750Sdelphij 2375290746Smav if (!resize) { 2376290746Smav error = dsl_prop_get_integer(zv->zv_name, 2377290746Smav zfs_prop_to_name(ZFS_PROP_COMPRESSION), &compress, NULL); 2378290746Smav if (error == 0) { 2379290746Smav error = dsl_prop_get_integer(zv->zv_name, 2380290746Smav zfs_prop_to_name(ZFS_PROP_CHECKSUM), &checksum, 2381290746Smav NULL); 2382290746Smav } 2383290746Smav if (error == 0) { 2384290746Smav error = dsl_prop_get_integer(zv->zv_name, 2385290746Smav zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 2386290746Smav &refresrv, NULL); 2387290746Smav } 2388290746Smav if (error == 0) { 2389290746Smav error = dsl_prop_get_integer(zv->zv_name, 2390290746Smav zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &vbs, 2391290746Smav NULL); 2392290746Smav } 2393290746Smav if (version >= SPA_VERSION_DEDUP && error == 0) { 2394290746Smav error = dsl_prop_get_integer(zv->zv_name, 2395290746Smav zfs_prop_to_name(ZFS_PROP_DEDUP), &dedup, NULL); 2396290746Smav } 2397290746Smav } 2398290746Smav if (error != 0) 2399290746Smav return (error); 2400290746Smav 2401185029Spjd tx = dmu_tx_create(os); 2402185029Spjd dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); 2403219089Spjd dmu_tx_hold_bonus(tx, ZVOL_OBJ); 2404185029Spjd error = dmu_tx_assign(tx, TXG_WAIT); 2405290746Smav if (error != 0) { 2406185029Spjd dmu_tx_abort(tx); 2407185029Spjd return (error); 2408185029Spjd } 2409185029Spjd 2410185029Spjd /* 2411185029Spjd * If we are resizing the dump device then we only need to 2412185029Spjd * update the refreservation to match the newly updated 2413185029Spjd * zvolsize. Otherwise, we save off the original state of the 2414185029Spjd * zvol so that we can restore them if the zvol is ever undumpified. 2415185029Spjd */ 2416185029Spjd if (resize) { 2417185029Spjd error = zap_update(os, ZVOL_ZAP_OBJ, 2418185029Spjd zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1, 2419185029Spjd &zv->zv_volsize, tx); 2420185029Spjd } else { 2421290746Smav error = zap_update(os, ZVOL_ZAP_OBJ, 2422185029Spjd zfs_prop_to_name(ZFS_PROP_COMPRESSION), 8, 1, 2423185029Spjd &compress, tx); 2424290746Smav if (error == 0) { 2425290746Smav error = zap_update(os, ZVOL_ZAP_OBJ, 2426290746Smav zfs_prop_to_name(ZFS_PROP_CHECKSUM), 8, 1, 2427290746Smav &checksum, tx); 2428290746Smav } 2429290746Smav if (error == 0) { 2430290746Smav error = zap_update(os, ZVOL_ZAP_OBJ, 2431290746Smav zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1, 2432290746Smav &refresrv, tx); 2433290746Smav } 2434290746Smav if (error == 0) { 2435290746Smav error = zap_update(os, ZVOL_ZAP_OBJ, 2436290746Smav zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), 8, 1, 2437290746Smav &vbs, tx); 2438290746Smav } 2439290746Smav if (error == 0) { 2440290746Smav error = dmu_object_set_blocksize( 2441290746Smav os, ZVOL_OBJ, SPA_OLD_MAXBLOCKSIZE, 0, tx); 2442290746Smav } 2443290746Smav if (version >= SPA_VERSION_DEDUP && error == 0) { 2444290746Smav error = zap_update(os, ZVOL_ZAP_OBJ, 2445219089Spjd zfs_prop_to_name(ZFS_PROP_DEDUP), 8, 1, 2446219089Spjd &dedup, tx); 2447219089Spjd } 2448219089Spjd if (error == 0) 2449276081Sdelphij zv->zv_volblocksize = SPA_OLD_MAXBLOCKSIZE; 2450185029Spjd } 2451185029Spjd dmu_tx_commit(tx); 2452185029Spjd 2453185029Spjd /* 2454185029Spjd * We only need update the zvol's property if we are initializing 2455185029Spjd * the dump area for the first time. 2456185029Spjd */ 2457290746Smav if (error == 0 && !resize) { 2458290746Smav /* 2459290746Smav * If MULTI_VDEV_CRASH_DUMP is active, use the NOPARITY checksum 2460290746Smav * function. Otherwise, use the old default -- OFF. 2461290746Smav */ 2462290746Smav checksum = spa_feature_is_active(spa, 2463290746Smav SPA_FEATURE_MULTI_VDEV_CRASH_DUMP) ? ZIO_CHECKSUM_NOPARITY : 2464290746Smav ZIO_CHECKSUM_OFF; 2465290746Smav 2466185029Spjd VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0); 2467185029Spjd VERIFY(nvlist_add_uint64(nv, 2468185029Spjd zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 0) == 0); 2469185029Spjd VERIFY(nvlist_add_uint64(nv, 2470185029Spjd zfs_prop_to_name(ZFS_PROP_COMPRESSION), 2471185029Spjd ZIO_COMPRESS_OFF) == 0); 2472185029Spjd VERIFY(nvlist_add_uint64(nv, 2473185029Spjd zfs_prop_to_name(ZFS_PROP_CHECKSUM), 2474255750Sdelphij checksum) == 0); 2475219089Spjd if (version >= SPA_VERSION_DEDUP) { 2476219089Spjd VERIFY(nvlist_add_uint64(nv, 2477219089Spjd zfs_prop_to_name(ZFS_PROP_DEDUP), 2478219089Spjd ZIO_CHECKSUM_OFF) == 0); 2479219089Spjd } 2480185029Spjd 2481219089Spjd error = zfs_set_prop_nvlist(zv->zv_name, ZPROP_SRC_LOCAL, 2482219089Spjd nv, NULL); 2483185029Spjd nvlist_free(nv); 2484185029Spjd } 2485185029Spjd 2486185029Spjd /* Allocate the space for the dump */ 2487290746Smav if (error == 0) 2488290746Smav error = zvol_prealloc(zv); 2489185029Spjd return (error); 2490185029Spjd} 2491185029Spjd 2492185029Spjdstatic int 2493185029Spjdzvol_dumpify(zvol_state_t *zv) 2494185029Spjd{ 2495185029Spjd int error = 0; 2496185029Spjd uint64_t dumpsize = 0; 2497185029Spjd dmu_tx_t *tx; 2498185029Spjd objset_t *os = zv->zv_objset; 2499185029Spjd 2500219089Spjd if (zv->zv_flags & ZVOL_RDONLY) 2501249195Smm return (SET_ERROR(EROFS)); 2502185029Spjd 2503185029Spjd if (zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, 2504185029Spjd 8, 1, &dumpsize) != 0 || dumpsize != zv->zv_volsize) { 2505248571Smm boolean_t resize = (dumpsize > 0); 2506185029Spjd 2507185029Spjd if ((error = zvol_dump_init(zv, resize)) != 0) { 2508185029Spjd (void) zvol_dump_fini(zv); 2509185029Spjd return (error); 2510185029Spjd } 2511185029Spjd } 2512185029Spjd 2513185029Spjd /* 2514185029Spjd * Build up our lba mapping. 2515185029Spjd */ 2516185029Spjd error = zvol_get_lbas(zv); 2517185029Spjd if (error) { 2518185029Spjd (void) zvol_dump_fini(zv); 2519185029Spjd return (error); 2520185029Spjd } 2521185029Spjd 2522185029Spjd tx = dmu_tx_create(os); 2523185029Spjd dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); 2524185029Spjd error = dmu_tx_assign(tx, TXG_WAIT); 2525185029Spjd if (error) { 2526185029Spjd dmu_tx_abort(tx); 2527185029Spjd (void) zvol_dump_fini(zv); 2528185029Spjd return (error); 2529185029Spjd } 2530185029Spjd 2531185029Spjd zv->zv_flags |= ZVOL_DUMPIFIED; 2532185029Spjd error = zap_update(os, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, 8, 1, 2533185029Spjd &zv->zv_volsize, tx); 2534185029Spjd dmu_tx_commit(tx); 2535185029Spjd 2536185029Spjd if (error) { 2537185029Spjd (void) zvol_dump_fini(zv); 2538185029Spjd return (error); 2539185029Spjd } 2540185029Spjd 2541185029Spjd txg_wait_synced(dmu_objset_pool(os), 0); 2542185029Spjd return (0); 2543185029Spjd} 2544185029Spjd 2545185029Spjdstatic int 2546185029Spjdzvol_dump_fini(zvol_state_t *zv) 2547185029Spjd{ 2548185029Spjd dmu_tx_t *tx; 2549185029Spjd objset_t *os = zv->zv_objset; 2550185029Spjd nvlist_t *nv; 2551185029Spjd int error = 0; 2552219089Spjd uint64_t checksum, compress, refresrv, vbs, dedup; 2553219089Spjd uint64_t version = spa_version(dmu_objset_spa(zv->zv_objset)); 2554185029Spjd 2555185029Spjd /* 2556185029Spjd * Attempt to restore the zvol back to its pre-dumpified state. 2557185029Spjd * This is a best-effort attempt as it's possible that not all 2558185029Spjd * of these properties were initialized during the dumpify process 2559185029Spjd * (i.e. error during zvol_dump_init). 2560185029Spjd */ 2561185029Spjd 2562185029Spjd tx = dmu_tx_create(os); 2563185029Spjd dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); 2564185029Spjd error = dmu_tx_assign(tx, TXG_WAIT); 2565185029Spjd if (error) { 2566185029Spjd dmu_tx_abort(tx); 2567185029Spjd return (error); 2568185029Spjd } 2569185029Spjd (void) zap_remove(os, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, tx); 2570185029Spjd dmu_tx_commit(tx); 2571185029Spjd 2572185029Spjd (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, 2573185029Spjd zfs_prop_to_name(ZFS_PROP_CHECKSUM), 8, 1, &checksum); 2574185029Spjd (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, 2575185029Spjd zfs_prop_to_name(ZFS_PROP_COMPRESSION), 8, 1, &compress); 2576185029Spjd (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, 2577185029Spjd zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1, &refresrv); 2578208047Smm (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, 2579208047Smm zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), 8, 1, &vbs); 2580185029Spjd 2581185029Spjd VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0); 2582185029Spjd (void) nvlist_add_uint64(nv, 2583185029Spjd zfs_prop_to_name(ZFS_PROP_CHECKSUM), checksum); 2584185029Spjd (void) nvlist_add_uint64(nv, 2585185029Spjd zfs_prop_to_name(ZFS_PROP_COMPRESSION), compress); 2586185029Spjd (void) nvlist_add_uint64(nv, 2587185029Spjd zfs_prop_to_name(ZFS_PROP_REFRESERVATION), refresrv); 2588219089Spjd if (version >= SPA_VERSION_DEDUP && 2589219089Spjd zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, 2590219089Spjd zfs_prop_to_name(ZFS_PROP_DEDUP), 8, 1, &dedup) == 0) { 2591219089Spjd (void) nvlist_add_uint64(nv, 2592219089Spjd zfs_prop_to_name(ZFS_PROP_DEDUP), dedup); 2593219089Spjd } 2594219089Spjd (void) zfs_set_prop_nvlist(zv->zv_name, ZPROP_SRC_LOCAL, 2595219089Spjd nv, NULL); 2596185029Spjd nvlist_free(nv); 2597185029Spjd 2598185029Spjd zvol_free_extents(zv); 2599185029Spjd zv->zv_flags &= ~ZVOL_DUMPIFIED; 2600185029Spjd (void) dmu_free_long_range(os, ZVOL_OBJ, 0, DMU_OBJECT_END); 2601219089Spjd /* wait for dmu_free_long_range to actually free the blocks */ 2602219089Spjd txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0); 2603219089Spjd tx = dmu_tx_create(os); 2604219089Spjd dmu_tx_hold_bonus(tx, ZVOL_OBJ); 2605219089Spjd error = dmu_tx_assign(tx, TXG_WAIT); 2606219089Spjd if (error) { 2607219089Spjd dmu_tx_abort(tx); 2608219089Spjd return (error); 2609219089Spjd } 2610219089Spjd if (dmu_object_set_blocksize(os, ZVOL_OBJ, vbs, 0, tx) == 0) 2611219089Spjd zv->zv_volblocksize = vbs; 2612219089Spjd dmu_tx_commit(tx); 2613185029Spjd 2614185029Spjd return (0); 2615185029Spjd} 2616277483Ssmh#else /* !illumos */ 2617219089Spjd 2618219089Spjdstatic void 2619219089Spjdzvol_geom_run(zvol_state_t *zv) 2620219089Spjd{ 2621219089Spjd struct g_provider *pp; 2622219089Spjd 2623219089Spjd pp = zv->zv_provider; 2624219089Spjd g_error_provider(pp, 0); 2625219089Spjd 2626219089Spjd kproc_kthread_add(zvol_geom_worker, zv, &zfsproc, NULL, 0, 0, 2627219089Spjd "zfskern", "zvol %s", pp->name + sizeof(ZVOL_DRIVER)); 2628219089Spjd} 2629219089Spjd 2630219089Spjdstatic void 2631219089Spjdzvol_geom_destroy(zvol_state_t *zv) 2632219089Spjd{ 2633219089Spjd struct g_provider *pp; 2634219089Spjd 2635219089Spjd g_topology_assert(); 2636219089Spjd 2637219089Spjd mtx_lock(&zv->zv_queue_mtx); 2638219089Spjd zv->zv_state = 1; 2639219089Spjd wakeup_one(&zv->zv_queue); 2640219089Spjd while (zv->zv_state != 2) 2641219089Spjd msleep(&zv->zv_state, &zv->zv_queue_mtx, 0, "zvol:w", 0); 2642219089Spjd mtx_destroy(&zv->zv_queue_mtx); 2643219089Spjd 2644219089Spjd pp = zv->zv_provider; 2645219089Spjd zv->zv_provider = NULL; 2646219089Spjd pp->private = NULL; 2647219089Spjd g_wither_geom(pp->geom, ENXIO); 2648219089Spjd} 2649219089Spjd 2650219089Spjdstatic int 2651219089Spjdzvol_geom_access(struct g_provider *pp, int acr, int acw, int ace) 2652219089Spjd{ 2653219089Spjd int count, error, flags; 2654219089Spjd 2655219089Spjd g_topology_assert(); 2656219089Spjd 2657219089Spjd /* 2658219089Spjd * To make it easier we expect either open or close, but not both 2659219089Spjd * at the same time. 2660219089Spjd */ 2661219089Spjd KASSERT((acr >= 0 && acw >= 0 && ace >= 0) || 2662219089Spjd (acr <= 0 && acw <= 0 && ace <= 0), 2663219089Spjd ("Unsupported access request to %s (acr=%d, acw=%d, ace=%d).", 2664219089Spjd pp->name, acr, acw, ace)); 2665219089Spjd 2666219089Spjd if (pp->private == NULL) { 2667219089Spjd if (acr <= 0 && acw <= 0 && ace <= 0) 2668219089Spjd return (0); 2669219089Spjd return (pp->error); 2670219089Spjd } 2671219089Spjd 2672219089Spjd /* 2673219089Spjd * We don't pass FEXCL flag to zvol_open()/zvol_close() if ace != 0, 2674219089Spjd * because GEOM already handles that and handles it a bit differently. 2675219089Spjd * GEOM allows for multiple read/exclusive consumers and ZFS allows 2676219089Spjd * only one exclusive consumer, no matter if it is reader or writer. 2677219089Spjd * I like better the way GEOM works so I'll leave it for GEOM to 2678219089Spjd * decide what to do. 2679219089Spjd */ 2680219089Spjd 2681219089Spjd count = acr + acw + ace; 2682219089Spjd if (count == 0) 2683219089Spjd return (0); 2684219089Spjd 2685219089Spjd flags = 0; 2686219089Spjd if (acr != 0 || ace != 0) 2687219089Spjd flags |= FREAD; 2688219089Spjd if (acw != 0) 2689219089Spjd flags |= FWRITE; 2690219089Spjd 2691219089Spjd g_topology_unlock(); 2692219089Spjd if (count > 0) 2693219089Spjd error = zvol_open(pp, flags, count); 2694219089Spjd else 2695219089Spjd error = zvol_close(pp, flags, -count); 2696219089Spjd g_topology_lock(); 2697219089Spjd return (error); 2698219089Spjd} 2699219089Spjd 2700219089Spjdstatic void 2701219089Spjdzvol_geom_start(struct bio *bp) 2702219089Spjd{ 2703219089Spjd zvol_state_t *zv; 2704219089Spjd boolean_t first; 2705219089Spjd 2706260385Sscottl zv = bp->bio_to->private; 2707260385Sscottl ASSERT(zv != NULL); 2708219089Spjd switch (bp->bio_cmd) { 2709260385Sscottl case BIO_FLUSH: 2710260385Sscottl if (!THREAD_CAN_SLEEP()) 2711260385Sscottl goto enqueue; 2712260385Sscottl zil_commit(zv->zv_zilog, ZVOL_OBJ); 2713260385Sscottl g_io_deliver(bp, 0); 2714260385Sscottl break; 2715219089Spjd case BIO_READ: 2716219089Spjd case BIO_WRITE: 2717264732Smav case BIO_DELETE: 2718260385Sscottl if (!THREAD_CAN_SLEEP()) 2719260385Sscottl goto enqueue; 2720260385Sscottl zvol_strategy(bp); 2721219089Spjd break; 2722274732Smav case BIO_GETATTR: { 2723274732Smav spa_t *spa = dmu_objset_spa(zv->zv_objset); 2724274732Smav uint64_t refd, avail, usedobjs, availobjs, val; 2725274732Smav 2726264733Smav if (g_handleattr_int(bp, "GEOM::candelete", 1)) 2727264733Smav return; 2728274732Smav if (strcmp(bp->bio_attribute, "blocksavail") == 0) { 2729274732Smav dmu_objset_space(zv->zv_objset, &refd, &avail, 2730274732Smav &usedobjs, &availobjs); 2731274732Smav if (g_handleattr_off_t(bp, "blocksavail", 2732274732Smav avail / DEV_BSIZE)) 2733274732Smav return; 2734274732Smav } else if (strcmp(bp->bio_attribute, "blocksused") == 0) { 2735274732Smav dmu_objset_space(zv->zv_objset, &refd, &avail, 2736274732Smav &usedobjs, &availobjs); 2737274732Smav if (g_handleattr_off_t(bp, "blocksused", 2738274732Smav refd / DEV_BSIZE)) 2739274732Smav return; 2740274732Smav } else if (strcmp(bp->bio_attribute, "poolblocksavail") == 0) { 2741274732Smav avail = metaslab_class_get_space(spa_normal_class(spa)); 2742274732Smav avail -= metaslab_class_get_alloc(spa_normal_class(spa)); 2743274732Smav if (g_handleattr_off_t(bp, "poolblocksavail", 2744274732Smav avail / DEV_BSIZE)) 2745274732Smav return; 2746274732Smav } else if (strcmp(bp->bio_attribute, "poolblocksused") == 0) { 2747274732Smav refd = metaslab_class_get_alloc(spa_normal_class(spa)); 2748274732Smav if (g_handleattr_off_t(bp, "poolblocksused", 2749274732Smav refd / DEV_BSIZE)) 2750274732Smav return; 2751274732Smav } 2752264733Smav /* FALLTHROUGH */ 2753274732Smav } 2754219089Spjd default: 2755219089Spjd g_io_deliver(bp, EOPNOTSUPP); 2756219089Spjd break; 2757219089Spjd } 2758260385Sscottl return; 2759260385Sscottl 2760260385Sscottlenqueue: 2761260385Sscottl mtx_lock(&zv->zv_queue_mtx); 2762260385Sscottl first = (bioq_first(&zv->zv_queue) == NULL); 2763260385Sscottl bioq_insert_tail(&zv->zv_queue, bp); 2764260385Sscottl mtx_unlock(&zv->zv_queue_mtx); 2765260385Sscottl if (first) 2766260385Sscottl wakeup_one(&zv->zv_queue); 2767219089Spjd} 2768219089Spjd 2769219089Spjdstatic void 2770219089Spjdzvol_geom_worker(void *arg) 2771219089Spjd{ 2772219089Spjd zvol_state_t *zv; 2773219089Spjd struct bio *bp; 2774219089Spjd 2775219089Spjd thread_lock(curthread); 2776219089Spjd sched_prio(curthread, PRIBIO); 2777219089Spjd thread_unlock(curthread); 2778219089Spjd 2779219089Spjd zv = arg; 2780219089Spjd for (;;) { 2781219089Spjd mtx_lock(&zv->zv_queue_mtx); 2782219089Spjd bp = bioq_takefirst(&zv->zv_queue); 2783219089Spjd if (bp == NULL) { 2784219089Spjd if (zv->zv_state == 1) { 2785219089Spjd zv->zv_state = 2; 2786219089Spjd wakeup(&zv->zv_state); 2787219089Spjd mtx_unlock(&zv->zv_queue_mtx); 2788219089Spjd kthread_exit(); 2789219089Spjd } 2790219089Spjd msleep(&zv->zv_queue, &zv->zv_queue_mtx, PRIBIO | PDROP, 2791219089Spjd "zvol:io", 0); 2792219089Spjd continue; 2793219089Spjd } 2794219089Spjd mtx_unlock(&zv->zv_queue_mtx); 2795219089Spjd switch (bp->bio_cmd) { 2796219089Spjd case BIO_FLUSH: 2797219089Spjd zil_commit(zv->zv_zilog, ZVOL_OBJ); 2798219089Spjd g_io_deliver(bp, 0); 2799219089Spjd break; 2800219089Spjd case BIO_READ: 2801219089Spjd case BIO_WRITE: 2802288520Smav case BIO_DELETE: 2803219089Spjd zvol_strategy(bp); 2804219089Spjd break; 2805288520Smav default: 2806288520Smav g_io_deliver(bp, EOPNOTSUPP); 2807288520Smav break; 2808219089Spjd } 2809219089Spjd } 2810219089Spjd} 2811219089Spjd 2812219089Spjdextern boolean_t dataset_name_hidden(const char *name); 2813219089Spjd 2814219089Spjdstatic int 2815219089Spjdzvol_create_snapshots(objset_t *os, const char *name) 2816219089Spjd{ 2817219089Spjd uint64_t cookie, obj; 2818219089Spjd char *sname; 2819219089Spjd int error, len; 2820219089Spjd 2821219089Spjd cookie = obj = 0; 2822219089Spjd sname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 2823219089Spjd 2824248571Smm#if 0 2825219089Spjd (void) dmu_objset_find(name, dmu_objset_prefetch, NULL, 2826219089Spjd DS_FIND_SNAPSHOTS); 2827248571Smm#endif 2828219089Spjd 2829219089Spjd for (;;) { 2830219089Spjd len = snprintf(sname, MAXPATHLEN, "%s@", name); 2831219089Spjd if (len >= MAXPATHLEN) { 2832219089Spjd dmu_objset_rele(os, FTAG); 2833219089Spjd error = ENAMETOOLONG; 2834219089Spjd break; 2835219089Spjd } 2836219089Spjd 2837248976Smm dsl_pool_config_enter(dmu_objset_pool(os), FTAG); 2838219089Spjd error = dmu_snapshot_list_next(os, MAXPATHLEN - len, 2839219089Spjd sname + len, &obj, &cookie, NULL); 2840248976Smm dsl_pool_config_exit(dmu_objset_pool(os), FTAG); 2841219089Spjd if (error != 0) { 2842219089Spjd if (error == ENOENT) 2843219089Spjd error = 0; 2844219089Spjd break; 2845219089Spjd } 2846219089Spjd 2847297546Smav error = zvol_create_minor(sname); 2848297546Smav if (error != 0 && error != EEXIST) { 2849219089Spjd printf("ZFS WARNING: Unable to create ZVOL %s (error=%d).\n", 2850219089Spjd sname, error); 2851219089Spjd break; 2852219089Spjd } 2853219089Spjd } 2854219089Spjd 2855219089Spjd kmem_free(sname, MAXPATHLEN); 2856219089Spjd return (error); 2857219089Spjd} 2858219089Spjd 2859219089Spjdint 2860219089Spjdzvol_create_minors(const char *name) 2861219089Spjd{ 2862219089Spjd uint64_t cookie; 2863219089Spjd objset_t *os; 2864219089Spjd char *osname, *p; 2865219089Spjd int error, len; 2866219089Spjd 2867219089Spjd if (dataset_name_hidden(name)) 2868219089Spjd return (0); 2869219089Spjd 2870219089Spjd if ((error = dmu_objset_hold(name, FTAG, &os)) != 0) { 2871219089Spjd printf("ZFS WARNING: Unable to put hold on %s (error=%d).\n", 2872219089Spjd name, error); 2873219089Spjd return (error); 2874219089Spjd } 2875219089Spjd if (dmu_objset_type(os) == DMU_OST_ZVOL) { 2876248571Smm dsl_dataset_long_hold(os->os_dsl_dataset, FTAG); 2877248571Smm dsl_pool_rele(dmu_objset_pool(os), FTAG); 2878272883Ssmh error = zvol_create_minor(name); 2879272883Ssmh if (error == 0 || error == EEXIST) { 2880219089Spjd error = zvol_create_snapshots(os, name); 2881272883Ssmh } else { 2882219089Spjd printf("ZFS WARNING: Unable to create ZVOL %s (error=%d).\n", 2883219089Spjd name, error); 2884219089Spjd } 2885248571Smm dsl_dataset_long_rele(os->os_dsl_dataset, FTAG); 2886248571Smm dsl_dataset_rele(os->os_dsl_dataset, FTAG); 2887219089Spjd return (error); 2888219089Spjd } 2889219089Spjd if (dmu_objset_type(os) != DMU_OST_ZFS) { 2890219089Spjd dmu_objset_rele(os, FTAG); 2891219089Spjd return (0); 2892219089Spjd } 2893219089Spjd 2894219089Spjd osname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 2895219089Spjd if (snprintf(osname, MAXPATHLEN, "%s/", name) >= MAXPATHLEN) { 2896219089Spjd dmu_objset_rele(os, FTAG); 2897219089Spjd kmem_free(osname, MAXPATHLEN); 2898219089Spjd return (ENOENT); 2899219089Spjd } 2900219089Spjd p = osname + strlen(osname); 2901219089Spjd len = MAXPATHLEN - (p - osname); 2902219089Spjd 2903248571Smm#if 0 2904224855Smm /* Prefetch the datasets. */ 2905224855Smm cookie = 0; 2906224855Smm while (dmu_dir_list_next(os, len, p, NULL, &cookie) == 0) { 2907224855Smm if (!dataset_name_hidden(osname)) 2908224855Smm (void) dmu_objset_prefetch(osname, NULL); 2909219089Spjd } 2910248571Smm#endif 2911219089Spjd 2912219089Spjd cookie = 0; 2913219089Spjd while (dmu_dir_list_next(os, MAXPATHLEN - (p - osname), p, NULL, 2914219089Spjd &cookie) == 0) { 2915219089Spjd dmu_objset_rele(os, FTAG); 2916219089Spjd (void)zvol_create_minors(osname); 2917219089Spjd if ((error = dmu_objset_hold(name, FTAG, &os)) != 0) { 2918219089Spjd printf("ZFS WARNING: Unable to put hold on %s (error=%d).\n", 2919219089Spjd name, error); 2920219089Spjd return (error); 2921219089Spjd } 2922219089Spjd } 2923219089Spjd 2924219089Spjd dmu_objset_rele(os, FTAG); 2925219089Spjd kmem_free(osname, MAXPATHLEN); 2926219089Spjd return (0); 2927219089Spjd} 2928219317Spjd 2929219317Spjdstatic void 2930265678Smavzvol_rename_minor(zvol_state_t *zv, const char *newname) 2931219317Spjd{ 2932265678Smav struct g_geom *gp; 2933219317Spjd struct g_provider *pp; 2934265678Smav struct cdev *dev; 2935219317Spjd 2936277483Ssmh ASSERT(MUTEX_HELD(&zfsdev_state_lock)); 2937219317Spjd 2938265678Smav if (zv->zv_volmode == ZFS_VOLMODE_GEOM) { 2939265678Smav g_topology_lock(); 2940265678Smav pp = zv->zv_provider; 2941265678Smav ASSERT(pp != NULL); 2942265678Smav gp = pp->geom; 2943265678Smav ASSERT(gp != NULL); 2944219317Spjd 2945265678Smav zv->zv_provider = NULL; 2946265678Smav g_wither_provider(pp, ENXIO); 2947219317Spjd 2948265678Smav pp = g_new_providerf(gp, "%s/%s", ZVOL_DRIVER, newname); 2949265678Smav pp->flags |= G_PF_DIRECT_RECEIVE | G_PF_DIRECT_SEND; 2950265678Smav pp->sectorsize = DEV_BSIZE; 2951265678Smav pp->mediasize = zv->zv_volsize; 2952265678Smav pp->private = zv; 2953265678Smav zv->zv_provider = pp; 2954265678Smav g_error_provider(pp, 0); 2955265678Smav g_topology_unlock(); 2956265678Smav } else if (zv->zv_volmode == ZFS_VOLMODE_DEV) { 2957297548Smav struct make_dev_args args; 2958297548Smav 2959308448Smav if ((dev = zv->zv_dev) != NULL) { 2960308448Smav zv->zv_dev = NULL; 2961308448Smav destroy_dev(dev); 2962308448Smav if (zv->zv_total_opens > 0) { 2963308448Smav zv->zv_flags &= ~ZVOL_EXCL; 2964308448Smav zv->zv_total_opens = 0; 2965308448Smav zvol_last_close(zv); 2966308448Smav } 2967297549Smav } 2968265678Smav 2969297548Smav make_dev_args_init(&args); 2970297548Smav args.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK; 2971297548Smav args.mda_devsw = &zvol_cdevsw; 2972297548Smav args.mda_cr = NULL; 2973297548Smav args.mda_uid = UID_ROOT; 2974297548Smav args.mda_gid = GID_OPERATOR; 2975297548Smav args.mda_mode = 0640; 2976297548Smav args.mda_si_drv2 = zv; 2977297548Smav if (make_dev_s(&args, &zv->zv_dev, 2978297548Smav "%s/%s", ZVOL_DRIVER, newname) == 0) 2979297548Smav zv->zv_dev->si_iosize_max = MAXPHYS; 2980265678Smav } 2981219317Spjd strlcpy(zv->zv_name, newname, sizeof(zv->zv_name)); 2982219317Spjd} 2983219317Spjd 2984219317Spjdvoid 2985219317Spjdzvol_rename_minors(const char *oldname, const char *newname) 2986219317Spjd{ 2987219317Spjd char name[MAXPATHLEN]; 2988219317Spjd struct g_provider *pp; 2989219317Spjd struct g_geom *gp; 2990219317Spjd size_t oldnamelen, newnamelen; 2991219317Spjd zvol_state_t *zv; 2992219317Spjd char *namebuf; 2993272883Ssmh boolean_t locked = B_FALSE; 2994219317Spjd 2995219317Spjd oldnamelen = strlen(oldname); 2996219317Spjd newnamelen = strlen(newname); 2997219317Spjd 2998219317Spjd DROP_GIANT(); 2999272883Ssmh /* See comment in zvol_open(). */ 3000277483Ssmh if (!MUTEX_HELD(&zfsdev_state_lock)) { 3001277483Ssmh mutex_enter(&zfsdev_state_lock); 3002272883Ssmh locked = B_TRUE; 3003272883Ssmh } 3004219317Spjd 3005265678Smav LIST_FOREACH(zv, &all_zvols, zv_links) { 3006219317Spjd if (strcmp(zv->zv_name, oldname) == 0) { 3007265678Smav zvol_rename_minor(zv, newname); 3008219317Spjd } else if (strncmp(zv->zv_name, oldname, oldnamelen) == 0 && 3009219317Spjd (zv->zv_name[oldnamelen] == '/' || 3010219317Spjd zv->zv_name[oldnamelen] == '@')) { 3011219317Spjd snprintf(name, sizeof(name), "%s%c%s", newname, 3012219317Spjd zv->zv_name[oldnamelen], 3013219317Spjd zv->zv_name + oldnamelen + 1); 3014265678Smav zvol_rename_minor(zv, name); 3015219317Spjd } 3016219317Spjd } 3017219317Spjd 3018272883Ssmh if (locked) 3019277483Ssmh mutex_exit(&zfsdev_state_lock); 3020219317Spjd PICKUP_GIANT(); 3021219317Spjd} 3022265678Smav 3023265678Smavstatic int 3024265678Smavzvol_d_open(struct cdev *dev, int flags, int fmt, struct thread *td) 3025265678Smav{ 3026297548Smav zvol_state_t *zv = dev->si_drv2; 3027265678Smav int err = 0; 3028265678Smav 3029277483Ssmh mutex_enter(&zfsdev_state_lock); 3030265678Smav if (zv->zv_total_opens == 0) 3031265678Smav err = zvol_first_open(zv); 3032265678Smav if (err) { 3033277483Ssmh mutex_exit(&zfsdev_state_lock); 3034265678Smav return (err); 3035265678Smav } 3036265678Smav if ((flags & FWRITE) && (zv->zv_flags & ZVOL_RDONLY)) { 3037265678Smav err = SET_ERROR(EROFS); 3038265678Smav goto out; 3039265678Smav } 3040265678Smav if (zv->zv_flags & ZVOL_EXCL) { 3041265678Smav err = SET_ERROR(EBUSY); 3042265678Smav goto out; 3043265678Smav } 3044265678Smav#ifdef FEXCL 3045265678Smav if (flags & FEXCL) { 3046265678Smav if (zv->zv_total_opens != 0) { 3047265678Smav err = SET_ERROR(EBUSY); 3048265678Smav goto out; 3049265678Smav } 3050265678Smav zv->zv_flags |= ZVOL_EXCL; 3051265678Smav } 3052265678Smav#endif 3053265678Smav 3054265678Smav zv->zv_total_opens++; 3055308596Smav if (flags & (FSYNC | FDSYNC)) { 3056308596Smav zv->zv_sync_cnt++; 3057308596Smav if (zv->zv_sync_cnt == 1) 3058308596Smav zil_async_to_sync(zv->zv_zilog, ZVOL_OBJ); 3059308596Smav } 3060277483Ssmh mutex_exit(&zfsdev_state_lock); 3061265678Smav return (err); 3062265678Smavout: 3063265678Smav if (zv->zv_total_opens == 0) 3064265678Smav zvol_last_close(zv); 3065277483Ssmh mutex_exit(&zfsdev_state_lock); 3066265678Smav return (err); 3067265678Smav} 3068265678Smav 3069265678Smavstatic int 3070265678Smavzvol_d_close(struct cdev *dev, int flags, int fmt, struct thread *td) 3071265678Smav{ 3072297548Smav zvol_state_t *zv = dev->si_drv2; 3073265678Smav 3074277483Ssmh mutex_enter(&zfsdev_state_lock); 3075265678Smav if (zv->zv_flags & ZVOL_EXCL) { 3076265678Smav ASSERT(zv->zv_total_opens == 1); 3077265678Smav zv->zv_flags &= ~ZVOL_EXCL; 3078265678Smav } 3079265678Smav 3080265678Smav /* 3081265678Smav * If the open count is zero, this is a spurious close. 3082265678Smav * That indicates a bug in the kernel / DDI framework. 3083265678Smav */ 3084265678Smav ASSERT(zv->zv_total_opens != 0); 3085265678Smav 3086265678Smav /* 3087265678Smav * You may get multiple opens, but only one close. 3088265678Smav */ 3089265678Smav zv->zv_total_opens--; 3090308596Smav if (flags & (FSYNC | FDSYNC)) 3091308596Smav zv->zv_sync_cnt--; 3092265678Smav 3093265678Smav if (zv->zv_total_opens == 0) 3094265678Smav zvol_last_close(zv); 3095265678Smav 3096277483Ssmh mutex_exit(&zfsdev_state_lock); 3097265678Smav return (0); 3098265678Smav} 3099265678Smav 3100265678Smavstatic int 3101265678Smavzvol_d_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td) 3102265678Smav{ 3103265678Smav zvol_state_t *zv; 3104265678Smav rl_t *rl; 3105308594Smav off_t offset, length; 3106265678Smav int i, error; 3107308594Smav boolean_t sync; 3108265678Smav 3109265678Smav zv = dev->si_drv2; 3110265678Smav 3111265678Smav error = 0; 3112265678Smav KASSERT(zv->zv_total_opens > 0, 3113265678Smav ("Device with zero access count in zvol_d_ioctl")); 3114265678Smav 3115265678Smav i = IOCPARM_LEN(cmd); 3116265678Smav switch (cmd) { 3117265678Smav case DIOCGSECTORSIZE: 3118265678Smav *(u_int *)data = DEV_BSIZE; 3119265678Smav break; 3120265678Smav case DIOCGMEDIASIZE: 3121265678Smav *(off_t *)data = zv->zv_volsize; 3122265678Smav break; 3123265678Smav case DIOCGFLUSH: 3124265678Smav zil_commit(zv->zv_zilog, ZVOL_OBJ); 3125265678Smav break; 3126265678Smav case DIOCGDELETE: 3127273345Sdelphij if (!zvol_unmap_enabled) 3128273345Sdelphij break; 3129273345Sdelphij 3130265678Smav offset = ((off_t *)data)[0]; 3131265678Smav length = ((off_t *)data)[1]; 3132265678Smav if ((offset % DEV_BSIZE) != 0 || (length % DEV_BSIZE) != 0 || 3133265678Smav offset < 0 || offset >= zv->zv_volsize || 3134265678Smav length <= 0) { 3135265678Smav printf("%s: offset=%jd length=%jd\n", __func__, offset, 3136265678Smav length); 3137265678Smav error = EINVAL; 3138265678Smav break; 3139265678Smav } 3140265678Smav 3141265678Smav rl = zfs_range_lock(&zv->zv_znode, offset, length, RL_WRITER); 3142265678Smav dmu_tx_t *tx = dmu_tx_create(zv->zv_objset); 3143265678Smav error = dmu_tx_assign(tx, TXG_WAIT); 3144265678Smav if (error != 0) { 3145308594Smav sync = FALSE; 3146265678Smav dmu_tx_abort(tx); 3147265678Smav } else { 3148308594Smav sync = (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS); 3149308594Smav zvol_log_truncate(zv, tx, offset, length, sync); 3150265678Smav dmu_tx_commit(tx); 3151265678Smav error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, 3152265678Smav offset, length); 3153265678Smav } 3154265678Smav zfs_range_unlock(rl); 3155308594Smav if (sync) 3156265678Smav zil_commit(zv->zv_zilog, ZVOL_OBJ); 3157265678Smav break; 3158265678Smav case DIOCGSTRIPESIZE: 3159265678Smav *(off_t *)data = zv->zv_volblocksize; 3160265678Smav break; 3161265678Smav case DIOCGSTRIPEOFFSET: 3162265678Smav *(off_t *)data = 0; 3163265678Smav break; 3164274732Smav case DIOCGATTR: { 3165274732Smav spa_t *spa = dmu_objset_spa(zv->zv_objset); 3166274732Smav struct diocgattr_arg *arg = (struct diocgattr_arg *)data; 3167274732Smav uint64_t refd, avail, usedobjs, availobjs; 3168274732Smav 3169280753Smav if (strcmp(arg->name, "GEOM::candelete") == 0) 3170280753Smav arg->value.i = 1; 3171280753Smav else if (strcmp(arg->name, "blocksavail") == 0) { 3172274732Smav dmu_objset_space(zv->zv_objset, &refd, &avail, 3173274732Smav &usedobjs, &availobjs); 3174274732Smav arg->value.off = avail / DEV_BSIZE; 3175274732Smav } else if (strcmp(arg->name, "blocksused") == 0) { 3176274732Smav dmu_objset_space(zv->zv_objset, &refd, &avail, 3177274732Smav &usedobjs, &availobjs); 3178274732Smav arg->value.off = refd / DEV_BSIZE; 3179274732Smav } else if (strcmp(arg->name, "poolblocksavail") == 0) { 3180274732Smav avail = metaslab_class_get_space(spa_normal_class(spa)); 3181274732Smav avail -= metaslab_class_get_alloc(spa_normal_class(spa)); 3182274732Smav arg->value.off = avail / DEV_BSIZE; 3183274732Smav } else if (strcmp(arg->name, "poolblocksused") == 0) { 3184274732Smav refd = metaslab_class_get_alloc(spa_normal_class(spa)); 3185274732Smav arg->value.off = refd / DEV_BSIZE; 3186274732Smav } else 3187274732Smav error = ENOIOCTL; 3188274732Smav break; 3189274732Smav } 3190275892Smav case FIOSEEKHOLE: 3191275892Smav case FIOSEEKDATA: { 3192275892Smav off_t *off = (off_t *)data; 3193275892Smav uint64_t noff; 3194275892Smav boolean_t hole; 3195275892Smav 3196275892Smav hole = (cmd == FIOSEEKHOLE); 3197275892Smav noff = *off; 3198275892Smav error = dmu_offset_next(zv->zv_objset, ZVOL_OBJ, hole, &noff); 3199275892Smav *off = noff; 3200275892Smav break; 3201275892Smav } 3202265678Smav default: 3203265678Smav error = ENOIOCTL; 3204265678Smav } 3205265678Smav 3206265678Smav return (error); 3207265678Smav} 3208277483Ssmh#endif /* illumos */ 3209