Deleted Added
full compact
zvol.c (241769) zvol.c (243674)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 *
24 * Copyright (c) 2006-2010 Pawel Jakub Dawidek <pjd@FreeBSD.org>
25 * All rights reserved.
26 */
27
28/* Portions Copyright 2010 Robert Milkowski */
29/* Portions Copyright 2011 Martin Matuska <mm@FreeBSD.org> */
30
31/*
32 * ZFS volume emulation driver.
33 *
34 * Makes a DMU object look like a volume of arbitrary size, up to 2^64 bytes.
35 * Volumes are accessed through the symbolic links named:
36 *
37 * /dev/zvol/dsk/<pool_name>/<dataset_name>
38 * /dev/zvol/rdsk/<pool_name>/<dataset_name>
39 *
40 * These links are created by the /dev filesystem (sdev_zvolops.c).
41 * Volumes are persistent through reboot. No user command needs to be
42 * run before opening and using a device.
43 *
44 * FreeBSD notes.
45 * On FreeBSD ZVOLs are simply GEOM providers like any other storage device
46 * in the system.
47 */
48
49#include <sys/types.h>
50#include <sys/param.h>
51#include <sys/kernel.h>
52#include <sys/errno.h>
53#include <sys/uio.h>
54#include <sys/bio.h>
55#include <sys/buf.h>
56#include <sys/kmem.h>
57#include <sys/conf.h>
58#include <sys/cmn_err.h>
59#include <sys/stat.h>
60#include <sys/zap.h>
61#include <sys/spa.h>
62#include <sys/zio.h>
63#include <sys/dmu_traverse.h>
64#include <sys/dnode.h>
65#include <sys/dsl_dataset.h>
66#include <sys/dsl_prop.h>
67#include <sys/dkio.h>
68#include <sys/byteorder.h>
69#include <sys/sunddi.h>
70#include <sys/dirent.h>
71#include <sys/policy.h>
72#include <sys/fs/zfs.h>
73#include <sys/zfs_ioctl.h>
74#include <sys/zil.h>
75#include <sys/refcount.h>
76#include <sys/zfs_znode.h>
77#include <sys/zfs_rlock.h>
78#include <sys/vdev_impl.h>
79#include <sys/zvol.h>
80#include <sys/zil_impl.h>
81#include <geom/geom.h>
82
83#include "zfs_namecheck.h"
84
85struct g_class zfs_zvol_class = {
86 .name = "ZFS::ZVOL",
87 .version = G_VERSION,
88};
89
90DECLARE_GEOM_CLASS(zfs_zvol_class, zfs_zvol);
91
92void *zfsdev_state;
93static char *zvol_tag = "zvol_tag";
94
95#define ZVOL_DUMPSIZE "dumpsize"
96
97/*
98 * The spa_namespace_lock protects the zfsdev_state structure from being
99 * modified while it's being used, e.g. an open that comes in before a
100 * create finishes. It also protects temporary opens of the dataset so that,
101 * e.g., an open doesn't get a spurious EBUSY.
102 */
103static uint32_t zvol_minors;
104
105typedef struct zvol_extent {
106 list_node_t ze_node;
107 dva_t ze_dva; /* dva associated with this extent */
108 uint64_t ze_nblks; /* number of blocks in extent */
109} zvol_extent_t;
110
111/*
112 * The in-core state of each volume.
113 */
114typedef struct zvol_state {
115 char zv_name[MAXPATHLEN]; /* pool/dd name */
116 uint64_t zv_volsize; /* amount of space we advertise */
117 uint64_t zv_volblocksize; /* volume block size */
118 struct g_provider *zv_provider; /* GEOM provider */
119 uint8_t zv_min_bs; /* minimum addressable block shift */
120 uint8_t zv_flags; /* readonly, dumpified, etc. */
121 objset_t *zv_objset; /* objset handle */
122 uint32_t zv_total_opens; /* total open count */
123 zilog_t *zv_zilog; /* ZIL handle */
124 list_t zv_extents; /* List of extents for dump */
125 znode_t zv_znode; /* for range locking */
126 dmu_buf_t *zv_dbuf; /* bonus handle */
127 int zv_state;
128 struct bio_queue_head zv_queue;
129 struct mtx zv_queue_mtx; /* zv_queue mutex */
130} zvol_state_t;
131
132/*
133 * zvol specific flags
134 */
135#define ZVOL_RDONLY 0x1
136#define ZVOL_DUMPIFIED 0x2
137#define ZVOL_EXCL 0x4
138#define ZVOL_WCE 0x8
139
140/*
141 * zvol maximum transfer in one DMU tx.
142 */
143int zvol_maxphys = DMU_MAX_ACCESS/2;
144
145extern int zfs_set_prop_nvlist(const char *, zprop_source_t,
146 nvlist_t *, nvlist_t **);
147static int zvol_remove_zv(zvol_state_t *);
148static int zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio);
149static int zvol_dumpify(zvol_state_t *zv);
150static int zvol_dump_fini(zvol_state_t *zv);
151static int zvol_dump_init(zvol_state_t *zv, boolean_t resize);
152
153static zvol_state_t *zvol_geom_create(const char *name);
154static void zvol_geom_run(zvol_state_t *zv);
155static void zvol_geom_destroy(zvol_state_t *zv);
156static int zvol_geom_access(struct g_provider *pp, int acr, int acw, int ace);
157static void zvol_geom_start(struct bio *bp);
158static void zvol_geom_worker(void *arg);
159
160static void
161zvol_size_changed(zvol_state_t *zv)
162{
163#ifdef sun
164 dev_t dev = makedevice(maj, min);
165
166 VERIFY(ddi_prop_update_int64(dev, zfs_dip,
167 "Size", volsize) == DDI_SUCCESS);
168 VERIFY(ddi_prop_update_int64(dev, zfs_dip,
169 "Nblocks", lbtodb(volsize)) == DDI_SUCCESS);
170
171 /* Notify specfs to invalidate the cached size */
172 spec_size_invalidate(dev, VBLK);
173 spec_size_invalidate(dev, VCHR);
174#else /* !sun */
175 struct g_provider *pp;
176
177 pp = zv->zv_provider;
178 if (pp == NULL)
179 return;
180 if (zv->zv_volsize == pp->mediasize)
181 return;
182 /*
183 * Changing provider size is not really supported by GEOM, but it
184 * should be safe when provider is closed.
185 */
186 if (zv->zv_total_opens > 0)
187 return;
188 pp->mediasize = zv->zv_volsize;
189#endif /* !sun */
190}
191
192int
193zvol_check_volsize(uint64_t volsize, uint64_t blocksize)
194{
195 if (volsize == 0)
196 return (EINVAL);
197
198 if (volsize % blocksize != 0)
199 return (EINVAL);
200
201#ifdef _ILP32
202 if (volsize - 1 > SPEC_MAXOFFSET_T)
203 return (EOVERFLOW);
204#endif
205 return (0);
206}
207
208int
209zvol_check_volblocksize(uint64_t volblocksize)
210{
211 if (volblocksize < SPA_MINBLOCKSIZE ||
212 volblocksize > SPA_MAXBLOCKSIZE ||
213 !ISP2(volblocksize))
214 return (EDOM);
215
216 return (0);
217}
218
219int
220zvol_get_stats(objset_t *os, nvlist_t *nv)
221{
222 int error;
223 dmu_object_info_t doi;
224 uint64_t val;
225
226 error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &val);
227 if (error)
228 return (error);
229
230 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLSIZE, val);
231
232 error = dmu_object_info(os, ZVOL_OBJ, &doi);
233
234 if (error == 0) {
235 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLBLOCKSIZE,
236 doi.doi_data_block_size);
237 }
238
239 return (error);
240}
241
242static zvol_state_t *
243zvol_minor_lookup(const char *name)
244{
245 struct g_provider *pp;
246 struct g_geom *gp;
247 zvol_state_t *zv = NULL;
248
249 ASSERT(MUTEX_HELD(&spa_namespace_lock));
250
251 g_topology_lock();
252 LIST_FOREACH(gp, &zfs_zvol_class.geom, geom) {
253 pp = LIST_FIRST(&gp->provider);
254 if (pp == NULL)
255 continue;
256 zv = pp->private;
257 if (zv == NULL)
258 continue;
259 if (strcmp(zv->zv_name, name) == 0)
260 break;
261 }
262 g_topology_unlock();
263
264 return (gp != NULL ? zv : NULL);
265}
266
267/* extent mapping arg */
268struct maparg {
269 zvol_state_t *ma_zv;
270 uint64_t ma_blks;
271};
272
273/*ARGSUSED*/
274static int
275zvol_map_block(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
276 const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
277{
278 struct maparg *ma = arg;
279 zvol_extent_t *ze;
280 int bs = ma->ma_zv->zv_volblocksize;
281
282 if (bp == NULL || zb->zb_object != ZVOL_OBJ || zb->zb_level != 0)
283 return (0);
284
285 VERIFY3U(ma->ma_blks, ==, zb->zb_blkid);
286 ma->ma_blks++;
287
288 /* Abort immediately if we have encountered gang blocks */
289 if (BP_IS_GANG(bp))
290 return (EFRAGS);
291
292 /*
293 * See if the block is at the end of the previous extent.
294 */
295 ze = list_tail(&ma->ma_zv->zv_extents);
296 if (ze &&
297 DVA_GET_VDEV(BP_IDENTITY(bp)) == DVA_GET_VDEV(&ze->ze_dva) &&
298 DVA_GET_OFFSET(BP_IDENTITY(bp)) ==
299 DVA_GET_OFFSET(&ze->ze_dva) + ze->ze_nblks * bs) {
300 ze->ze_nblks++;
301 return (0);
302 }
303
304 dprintf_bp(bp, "%s", "next blkptr:");
305
306 /* start a new extent */
307 ze = kmem_zalloc(sizeof (zvol_extent_t), KM_SLEEP);
308 ze->ze_dva = bp->blk_dva[0]; /* structure assignment */
309 ze->ze_nblks = 1;
310 list_insert_tail(&ma->ma_zv->zv_extents, ze);
311 return (0);
312}
313
314static void
315zvol_free_extents(zvol_state_t *zv)
316{
317 zvol_extent_t *ze;
318
319 while (ze = list_head(&zv->zv_extents)) {
320 list_remove(&zv->zv_extents, ze);
321 kmem_free(ze, sizeof (zvol_extent_t));
322 }
323}
324
325static int
326zvol_get_lbas(zvol_state_t *zv)
327{
328 objset_t *os = zv->zv_objset;
329 struct maparg ma;
330 int err;
331
332 ma.ma_zv = zv;
333 ma.ma_blks = 0;
334 zvol_free_extents(zv);
335
336 /* commit any in-flight changes before traversing the dataset */
337 txg_wait_synced(dmu_objset_pool(os), 0);
338 err = traverse_dataset(dmu_objset_ds(os), 0,
339 TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, zvol_map_block, &ma);
340 if (err || ma.ma_blks != (zv->zv_volsize / zv->zv_volblocksize)) {
341 zvol_free_extents(zv);
342 return (err ? err : EIO);
343 }
344
345 return (0);
346}
347
348/* ARGSUSED */
349void
350zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
351{
352 zfs_creat_t *zct = arg;
353 nvlist_t *nvprops = zct->zct_props;
354 int error;
355 uint64_t volblocksize, volsize;
356
357 VERIFY(nvlist_lookup_uint64(nvprops,
358 zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) == 0);
359 if (nvlist_lookup_uint64(nvprops,
360 zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &volblocksize) != 0)
361 volblocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE);
362
363 /*
364 * These properties must be removed from the list so the generic
365 * property setting step won't apply to them.
366 */
367 VERIFY(nvlist_remove_all(nvprops,
368 zfs_prop_to_name(ZFS_PROP_VOLSIZE)) == 0);
369 (void) nvlist_remove_all(nvprops,
370 zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE));
371
372 error = dmu_object_claim(os, ZVOL_OBJ, DMU_OT_ZVOL, volblocksize,
373 DMU_OT_NONE, 0, tx);
374 ASSERT(error == 0);
375
376 error = zap_create_claim(os, ZVOL_ZAP_OBJ, DMU_OT_ZVOL_PROP,
377 DMU_OT_NONE, 0, tx);
378 ASSERT(error == 0);
379
380 error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize, tx);
381 ASSERT(error == 0);
382}
383
384/*
385 * Replay a TX_WRITE ZIL transaction that didn't get committed
386 * after a system failure
387 */
388static int
389zvol_replay_write(zvol_state_t *zv, lr_write_t *lr, boolean_t byteswap)
390{
391 objset_t *os = zv->zv_objset;
392 char *data = (char *)(lr + 1); /* data follows lr_write_t */
393 uint64_t offset, length;
394 dmu_tx_t *tx;
395 int error;
396
397 if (byteswap)
398 byteswap_uint64_array(lr, sizeof (*lr));
399
400 offset = lr->lr_offset;
401 length = lr->lr_length;
402
403 /* If it's a dmu_sync() block, write the whole block */
404 if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
405 uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr);
406 if (length < blocksize) {
407 offset -= offset % blocksize;
408 length = blocksize;
409 }
410 }
411
412 tx = dmu_tx_create(os);
413 dmu_tx_hold_write(tx, ZVOL_OBJ, offset, length);
414 error = dmu_tx_assign(tx, TXG_WAIT);
415 if (error) {
416 dmu_tx_abort(tx);
417 } else {
418 dmu_write(os, ZVOL_OBJ, offset, length, data, tx);
419 dmu_tx_commit(tx);
420 }
421
422 return (error);
423}
424
425/* ARGSUSED */
426static int
427zvol_replay_err(zvol_state_t *zv, lr_t *lr, boolean_t byteswap)
428{
429 return (ENOTSUP);
430}
431
432/*
433 * Callback vectors for replaying records.
434 * Only TX_WRITE is needed for zvol.
435 */
436zil_replay_func_t *zvol_replay_vector[TX_MAX_TYPE] = {
437 zvol_replay_err, /* 0 no such transaction type */
438 zvol_replay_err, /* TX_CREATE */
439 zvol_replay_err, /* TX_MKDIR */
440 zvol_replay_err, /* TX_MKXATTR */
441 zvol_replay_err, /* TX_SYMLINK */
442 zvol_replay_err, /* TX_REMOVE */
443 zvol_replay_err, /* TX_RMDIR */
444 zvol_replay_err, /* TX_LINK */
445 zvol_replay_err, /* TX_RENAME */
446 zvol_replay_write, /* TX_WRITE */
447 zvol_replay_err, /* TX_TRUNCATE */
448 zvol_replay_err, /* TX_SETATTR */
449 zvol_replay_err, /* TX_ACL */
450 zvol_replay_err, /* TX_CREATE_ACL */
451 zvol_replay_err, /* TX_CREATE_ATTR */
452 zvol_replay_err, /* TX_CREATE_ACL_ATTR */
453 zvol_replay_err, /* TX_MKDIR_ACL */
454 zvol_replay_err, /* TX_MKDIR_ATTR */
455 zvol_replay_err, /* TX_MKDIR_ACL_ATTR */
456 zvol_replay_err, /* TX_WRITE2 */
457};
458
459#ifdef sun
460int
461zvol_name2minor(const char *name, minor_t *minor)
462{
463 zvol_state_t *zv;
464
465 mutex_enter(&spa_namespace_lock);
466 zv = zvol_minor_lookup(name);
467 if (minor && zv)
468 *minor = zv->zv_minor;
469 mutex_exit(&spa_namespace_lock);
470 return (zv ? 0 : -1);
471}
472#endif /* sun */
473
474/*
475 * Create a minor node (plus a whole lot more) for the specified volume.
476 */
477int
478zvol_create_minor(const char *name)
479{
480 zfs_soft_state_t *zs;
481 zvol_state_t *zv;
482 objset_t *os;
483 dmu_object_info_t doi;
484 uint64_t volsize;
485 int error;
486
487 ZFS_LOG(1, "Creating ZVOL %s...", name);
488
489 mutex_enter(&spa_namespace_lock);
490
491 if (zvol_minor_lookup(name) != NULL) {
492 mutex_exit(&spa_namespace_lock);
493 return (EEXIST);
494 }
495
496 /* lie and say we're read-only */
497 error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, FTAG, &os);
498
499 if (error) {
500 mutex_exit(&spa_namespace_lock);
501 return (error);
502 }
503
504#ifdef sun
505 if ((minor = zfsdev_minor_alloc()) == 0) {
506 dmu_objset_disown(os, FTAG);
507 mutex_exit(&spa_namespace_lock);
508 return (ENXIO);
509 }
510
511 if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS) {
512 dmu_objset_disown(os, FTAG);
513 mutex_exit(&spa_namespace_lock);
514 return (EAGAIN);
515 }
516 (void) ddi_prop_update_string(minor, zfs_dip, ZVOL_PROP_NAME,
517 (char *)name);
518
519 (void) snprintf(chrbuf, sizeof (chrbuf), "%u,raw", minor);
520
521 if (ddi_create_minor_node(zfs_dip, chrbuf, S_IFCHR,
522 minor, DDI_PSEUDO, 0) == DDI_FAILURE) {
523 ddi_soft_state_free(zfsdev_state, minor);
524 dmu_objset_disown(os, FTAG);
525 mutex_exit(&spa_namespace_lock);
526 return (EAGAIN);
527 }
528
529 (void) snprintf(blkbuf, sizeof (blkbuf), "%u", minor);
530
531 if (ddi_create_minor_node(zfs_dip, blkbuf, S_IFBLK,
532 minor, DDI_PSEUDO, 0) == DDI_FAILURE) {
533 ddi_remove_minor_node(zfs_dip, chrbuf);
534 ddi_soft_state_free(zfsdev_state, minor);
535 dmu_objset_disown(os, FTAG);
536 mutex_exit(&spa_namespace_lock);
537 return (EAGAIN);
538 }
539
540 zs = ddi_get_soft_state(zfsdev_state, minor);
541 zs->zss_type = ZSST_ZVOL;
542 zv = zs->zss_data = kmem_zalloc(sizeof (zvol_state_t), KM_SLEEP);
543#else /* !sun */
544
545 error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize);
546 if (error) {
547 ASSERT(error == 0);
548 dmu_objset_disown(os, zvol_tag);
549 mutex_exit(&spa_namespace_lock);
550 return (error);
551 }
552
553 DROP_GIANT();
554 g_topology_lock();
555 zv = zvol_geom_create(name);
556 zv->zv_volsize = volsize;
557 zv->zv_provider->mediasize = zv->zv_volsize;
558
559#endif /* !sun */
560
561 (void) strlcpy(zv->zv_name, name, MAXPATHLEN);
562 zv->zv_min_bs = DEV_BSHIFT;
563 zv->zv_objset = os;
564 if (dmu_objset_is_snapshot(os) || !spa_writeable(dmu_objset_spa(os)))
565 zv->zv_flags |= ZVOL_RDONLY;
566 mutex_init(&zv->zv_znode.z_range_lock, NULL, MUTEX_DEFAULT, NULL);
567 avl_create(&zv->zv_znode.z_range_avl, zfs_range_compare,
568 sizeof (rl_t), offsetof(rl_t, r_node));
569 list_create(&zv->zv_extents, sizeof (zvol_extent_t),
570 offsetof(zvol_extent_t, ze_node));
571 /* get and cache the blocksize */
572 error = dmu_object_info(os, ZVOL_OBJ, &doi);
573 ASSERT(error == 0);
574 zv->zv_volblocksize = doi.doi_data_block_size;
575
576 if (spa_writeable(dmu_objset_spa(os))) {
577 if (zil_replay_disable)
578 zil_destroy(dmu_objset_zil(os), B_FALSE);
579 else
580 zil_replay(os, zv, zvol_replay_vector);
581 }
582 dmu_objset_disown(os, FTAG);
583 zv->zv_objset = NULL;
584
585 zvol_minors++;
586
587 mutex_exit(&spa_namespace_lock);
588
589 zvol_geom_run(zv);
590
591 g_topology_unlock();
592 PICKUP_GIANT();
593
594 ZFS_LOG(1, "ZVOL %s created.", name);
595
596 return (0);
597}
598
599/*
600 * Remove minor node for the specified volume.
601 */
602static int
603zvol_remove_zv(zvol_state_t *zv)
604{
605#ifdef sun
606 minor_t minor = zv->zv_minor;
607#endif
608
609 ASSERT(MUTEX_HELD(&spa_namespace_lock));
610 if (zv->zv_total_opens != 0)
611 return (EBUSY);
612
613 ZFS_LOG(1, "ZVOL %s destroyed.", zv->zv_name);
614
615#ifdef sun
616 (void) snprintf(nmbuf, sizeof (nmbuf), "%u,raw", minor);
617 ddi_remove_minor_node(zfs_dip, nmbuf);
618#endif /* sun */
619
620 avl_destroy(&zv->zv_znode.z_range_avl);
621 mutex_destroy(&zv->zv_znode.z_range_lock);
622
623 zvol_geom_destroy(zv);
624
625 zvol_minors--;
626 return (0);
627}
628
629int
630zvol_remove_minor(const char *name)
631{
632 zvol_state_t *zv;
633 int rc;
634
635 mutex_enter(&spa_namespace_lock);
636 if ((zv = zvol_minor_lookup(name)) == NULL) {
637 mutex_exit(&spa_namespace_lock);
638 return (ENXIO);
639 }
640 g_topology_lock();
641 rc = zvol_remove_zv(zv);
642 g_topology_unlock();
643 mutex_exit(&spa_namespace_lock);
644 return (rc);
645}
646
647int
648zvol_first_open(zvol_state_t *zv)
649{
650 objset_t *os;
651 uint64_t volsize;
652 int error;
653 uint64_t readonly;
654
655 /* lie and say we're read-only */
656 error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, B_TRUE,
657 zvol_tag, &os);
658 if (error)
659 return (error);
660
661 error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize);
662 if (error) {
663 ASSERT(error == 0);
664 dmu_objset_disown(os, zvol_tag);
665 return (error);
666 }
667 zv->zv_objset = os;
668 error = dmu_bonus_hold(os, ZVOL_OBJ, zvol_tag, &zv->zv_dbuf);
669 if (error) {
670 dmu_objset_disown(os, zvol_tag);
671 return (error);
672 }
673 zv->zv_volsize = volsize;
674 zv->zv_zilog = zil_open(os, zvol_get_data);
675 zvol_size_changed(zv);
676
677 VERIFY(dsl_prop_get_integer(zv->zv_name, "readonly", &readonly,
678 NULL) == 0);
679 if (readonly || dmu_objset_is_snapshot(os) ||
680 !spa_writeable(dmu_objset_spa(os)))
681 zv->zv_flags |= ZVOL_RDONLY;
682 else
683 zv->zv_flags &= ~ZVOL_RDONLY;
684 return (error);
685}
686
687void
688zvol_last_close(zvol_state_t *zv)
689{
690 zil_close(zv->zv_zilog);
691 zv->zv_zilog = NULL;
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 *
24 * Copyright (c) 2006-2010 Pawel Jakub Dawidek <pjd@FreeBSD.org>
25 * All rights reserved.
26 */
27
28/* Portions Copyright 2010 Robert Milkowski */
29/* Portions Copyright 2011 Martin Matuska <mm@FreeBSD.org> */
30
31/*
32 * ZFS volume emulation driver.
33 *
34 * Makes a DMU object look like a volume of arbitrary size, up to 2^64 bytes.
35 * Volumes are accessed through the symbolic links named:
36 *
37 * /dev/zvol/dsk/<pool_name>/<dataset_name>
38 * /dev/zvol/rdsk/<pool_name>/<dataset_name>
39 *
40 * These links are created by the /dev filesystem (sdev_zvolops.c).
41 * Volumes are persistent through reboot. No user command needs to be
42 * run before opening and using a device.
43 *
44 * FreeBSD notes.
45 * On FreeBSD ZVOLs are simply GEOM providers like any other storage device
46 * in the system.
47 */
48
49#include <sys/types.h>
50#include <sys/param.h>
51#include <sys/kernel.h>
52#include <sys/errno.h>
53#include <sys/uio.h>
54#include <sys/bio.h>
55#include <sys/buf.h>
56#include <sys/kmem.h>
57#include <sys/conf.h>
58#include <sys/cmn_err.h>
59#include <sys/stat.h>
60#include <sys/zap.h>
61#include <sys/spa.h>
62#include <sys/zio.h>
63#include <sys/dmu_traverse.h>
64#include <sys/dnode.h>
65#include <sys/dsl_dataset.h>
66#include <sys/dsl_prop.h>
67#include <sys/dkio.h>
68#include <sys/byteorder.h>
69#include <sys/sunddi.h>
70#include <sys/dirent.h>
71#include <sys/policy.h>
72#include <sys/fs/zfs.h>
73#include <sys/zfs_ioctl.h>
74#include <sys/zil.h>
75#include <sys/refcount.h>
76#include <sys/zfs_znode.h>
77#include <sys/zfs_rlock.h>
78#include <sys/vdev_impl.h>
79#include <sys/zvol.h>
80#include <sys/zil_impl.h>
81#include <geom/geom.h>
82
83#include "zfs_namecheck.h"
84
85struct g_class zfs_zvol_class = {
86 .name = "ZFS::ZVOL",
87 .version = G_VERSION,
88};
89
90DECLARE_GEOM_CLASS(zfs_zvol_class, zfs_zvol);
91
92void *zfsdev_state;
93static char *zvol_tag = "zvol_tag";
94
95#define ZVOL_DUMPSIZE "dumpsize"
96
97/*
98 * The spa_namespace_lock protects the zfsdev_state structure from being
99 * modified while it's being used, e.g. an open that comes in before a
100 * create finishes. It also protects temporary opens of the dataset so that,
101 * e.g., an open doesn't get a spurious EBUSY.
102 */
103static uint32_t zvol_minors;
104
105typedef struct zvol_extent {
106 list_node_t ze_node;
107 dva_t ze_dva; /* dva associated with this extent */
108 uint64_t ze_nblks; /* number of blocks in extent */
109} zvol_extent_t;
110
111/*
112 * The in-core state of each volume.
113 */
114typedef struct zvol_state {
115 char zv_name[MAXPATHLEN]; /* pool/dd name */
116 uint64_t zv_volsize; /* amount of space we advertise */
117 uint64_t zv_volblocksize; /* volume block size */
118 struct g_provider *zv_provider; /* GEOM provider */
119 uint8_t zv_min_bs; /* minimum addressable block shift */
120 uint8_t zv_flags; /* readonly, dumpified, etc. */
121 objset_t *zv_objset; /* objset handle */
122 uint32_t zv_total_opens; /* total open count */
123 zilog_t *zv_zilog; /* ZIL handle */
124 list_t zv_extents; /* List of extents for dump */
125 znode_t zv_znode; /* for range locking */
126 dmu_buf_t *zv_dbuf; /* bonus handle */
127 int zv_state;
128 struct bio_queue_head zv_queue;
129 struct mtx zv_queue_mtx; /* zv_queue mutex */
130} zvol_state_t;
131
132/*
133 * zvol specific flags
134 */
135#define ZVOL_RDONLY 0x1
136#define ZVOL_DUMPIFIED 0x2
137#define ZVOL_EXCL 0x4
138#define ZVOL_WCE 0x8
139
140/*
141 * zvol maximum transfer in one DMU tx.
142 */
143int zvol_maxphys = DMU_MAX_ACCESS/2;
144
145extern int zfs_set_prop_nvlist(const char *, zprop_source_t,
146 nvlist_t *, nvlist_t **);
147static int zvol_remove_zv(zvol_state_t *);
148static int zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio);
149static int zvol_dumpify(zvol_state_t *zv);
150static int zvol_dump_fini(zvol_state_t *zv);
151static int zvol_dump_init(zvol_state_t *zv, boolean_t resize);
152
153static zvol_state_t *zvol_geom_create(const char *name);
154static void zvol_geom_run(zvol_state_t *zv);
155static void zvol_geom_destroy(zvol_state_t *zv);
156static int zvol_geom_access(struct g_provider *pp, int acr, int acw, int ace);
157static void zvol_geom_start(struct bio *bp);
158static void zvol_geom_worker(void *arg);
159
160static void
161zvol_size_changed(zvol_state_t *zv)
162{
163#ifdef sun
164 dev_t dev = makedevice(maj, min);
165
166 VERIFY(ddi_prop_update_int64(dev, zfs_dip,
167 "Size", volsize) == DDI_SUCCESS);
168 VERIFY(ddi_prop_update_int64(dev, zfs_dip,
169 "Nblocks", lbtodb(volsize)) == DDI_SUCCESS);
170
171 /* Notify specfs to invalidate the cached size */
172 spec_size_invalidate(dev, VBLK);
173 spec_size_invalidate(dev, VCHR);
174#else /* !sun */
175 struct g_provider *pp;
176
177 pp = zv->zv_provider;
178 if (pp == NULL)
179 return;
180 if (zv->zv_volsize == pp->mediasize)
181 return;
182 /*
183 * Changing provider size is not really supported by GEOM, but it
184 * should be safe when provider is closed.
185 */
186 if (zv->zv_total_opens > 0)
187 return;
188 pp->mediasize = zv->zv_volsize;
189#endif /* !sun */
190}
191
192int
193zvol_check_volsize(uint64_t volsize, uint64_t blocksize)
194{
195 if (volsize == 0)
196 return (EINVAL);
197
198 if (volsize % blocksize != 0)
199 return (EINVAL);
200
201#ifdef _ILP32
202 if (volsize - 1 > SPEC_MAXOFFSET_T)
203 return (EOVERFLOW);
204#endif
205 return (0);
206}
207
208int
209zvol_check_volblocksize(uint64_t volblocksize)
210{
211 if (volblocksize < SPA_MINBLOCKSIZE ||
212 volblocksize > SPA_MAXBLOCKSIZE ||
213 !ISP2(volblocksize))
214 return (EDOM);
215
216 return (0);
217}
218
219int
220zvol_get_stats(objset_t *os, nvlist_t *nv)
221{
222 int error;
223 dmu_object_info_t doi;
224 uint64_t val;
225
226 error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &val);
227 if (error)
228 return (error);
229
230 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLSIZE, val);
231
232 error = dmu_object_info(os, ZVOL_OBJ, &doi);
233
234 if (error == 0) {
235 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLBLOCKSIZE,
236 doi.doi_data_block_size);
237 }
238
239 return (error);
240}
241
242static zvol_state_t *
243zvol_minor_lookup(const char *name)
244{
245 struct g_provider *pp;
246 struct g_geom *gp;
247 zvol_state_t *zv = NULL;
248
249 ASSERT(MUTEX_HELD(&spa_namespace_lock));
250
251 g_topology_lock();
252 LIST_FOREACH(gp, &zfs_zvol_class.geom, geom) {
253 pp = LIST_FIRST(&gp->provider);
254 if (pp == NULL)
255 continue;
256 zv = pp->private;
257 if (zv == NULL)
258 continue;
259 if (strcmp(zv->zv_name, name) == 0)
260 break;
261 }
262 g_topology_unlock();
263
264 return (gp != NULL ? zv : NULL);
265}
266
267/* extent mapping arg */
268struct maparg {
269 zvol_state_t *ma_zv;
270 uint64_t ma_blks;
271};
272
273/*ARGSUSED*/
274static int
275zvol_map_block(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
276 const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
277{
278 struct maparg *ma = arg;
279 zvol_extent_t *ze;
280 int bs = ma->ma_zv->zv_volblocksize;
281
282 if (bp == NULL || zb->zb_object != ZVOL_OBJ || zb->zb_level != 0)
283 return (0);
284
285 VERIFY3U(ma->ma_blks, ==, zb->zb_blkid);
286 ma->ma_blks++;
287
288 /* Abort immediately if we have encountered gang blocks */
289 if (BP_IS_GANG(bp))
290 return (EFRAGS);
291
292 /*
293 * See if the block is at the end of the previous extent.
294 */
295 ze = list_tail(&ma->ma_zv->zv_extents);
296 if (ze &&
297 DVA_GET_VDEV(BP_IDENTITY(bp)) == DVA_GET_VDEV(&ze->ze_dva) &&
298 DVA_GET_OFFSET(BP_IDENTITY(bp)) ==
299 DVA_GET_OFFSET(&ze->ze_dva) + ze->ze_nblks * bs) {
300 ze->ze_nblks++;
301 return (0);
302 }
303
304 dprintf_bp(bp, "%s", "next blkptr:");
305
306 /* start a new extent */
307 ze = kmem_zalloc(sizeof (zvol_extent_t), KM_SLEEP);
308 ze->ze_dva = bp->blk_dva[0]; /* structure assignment */
309 ze->ze_nblks = 1;
310 list_insert_tail(&ma->ma_zv->zv_extents, ze);
311 return (0);
312}
313
314static void
315zvol_free_extents(zvol_state_t *zv)
316{
317 zvol_extent_t *ze;
318
319 while (ze = list_head(&zv->zv_extents)) {
320 list_remove(&zv->zv_extents, ze);
321 kmem_free(ze, sizeof (zvol_extent_t));
322 }
323}
324
325static int
326zvol_get_lbas(zvol_state_t *zv)
327{
328 objset_t *os = zv->zv_objset;
329 struct maparg ma;
330 int err;
331
332 ma.ma_zv = zv;
333 ma.ma_blks = 0;
334 zvol_free_extents(zv);
335
336 /* commit any in-flight changes before traversing the dataset */
337 txg_wait_synced(dmu_objset_pool(os), 0);
338 err = traverse_dataset(dmu_objset_ds(os), 0,
339 TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, zvol_map_block, &ma);
340 if (err || ma.ma_blks != (zv->zv_volsize / zv->zv_volblocksize)) {
341 zvol_free_extents(zv);
342 return (err ? err : EIO);
343 }
344
345 return (0);
346}
347
348/* ARGSUSED */
349void
350zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
351{
352 zfs_creat_t *zct = arg;
353 nvlist_t *nvprops = zct->zct_props;
354 int error;
355 uint64_t volblocksize, volsize;
356
357 VERIFY(nvlist_lookup_uint64(nvprops,
358 zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) == 0);
359 if (nvlist_lookup_uint64(nvprops,
360 zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &volblocksize) != 0)
361 volblocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE);
362
363 /*
364 * These properties must be removed from the list so the generic
365 * property setting step won't apply to them.
366 */
367 VERIFY(nvlist_remove_all(nvprops,
368 zfs_prop_to_name(ZFS_PROP_VOLSIZE)) == 0);
369 (void) nvlist_remove_all(nvprops,
370 zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE));
371
372 error = dmu_object_claim(os, ZVOL_OBJ, DMU_OT_ZVOL, volblocksize,
373 DMU_OT_NONE, 0, tx);
374 ASSERT(error == 0);
375
376 error = zap_create_claim(os, ZVOL_ZAP_OBJ, DMU_OT_ZVOL_PROP,
377 DMU_OT_NONE, 0, tx);
378 ASSERT(error == 0);
379
380 error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize, tx);
381 ASSERT(error == 0);
382}
383
384/*
385 * Replay a TX_WRITE ZIL transaction that didn't get committed
386 * after a system failure
387 */
388static int
389zvol_replay_write(zvol_state_t *zv, lr_write_t *lr, boolean_t byteswap)
390{
391 objset_t *os = zv->zv_objset;
392 char *data = (char *)(lr + 1); /* data follows lr_write_t */
393 uint64_t offset, length;
394 dmu_tx_t *tx;
395 int error;
396
397 if (byteswap)
398 byteswap_uint64_array(lr, sizeof (*lr));
399
400 offset = lr->lr_offset;
401 length = lr->lr_length;
402
403 /* If it's a dmu_sync() block, write the whole block */
404 if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
405 uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr);
406 if (length < blocksize) {
407 offset -= offset % blocksize;
408 length = blocksize;
409 }
410 }
411
412 tx = dmu_tx_create(os);
413 dmu_tx_hold_write(tx, ZVOL_OBJ, offset, length);
414 error = dmu_tx_assign(tx, TXG_WAIT);
415 if (error) {
416 dmu_tx_abort(tx);
417 } else {
418 dmu_write(os, ZVOL_OBJ, offset, length, data, tx);
419 dmu_tx_commit(tx);
420 }
421
422 return (error);
423}
424
425/* ARGSUSED */
426static int
427zvol_replay_err(zvol_state_t *zv, lr_t *lr, boolean_t byteswap)
428{
429 return (ENOTSUP);
430}
431
432/*
433 * Callback vectors for replaying records.
434 * Only TX_WRITE is needed for zvol.
435 */
436zil_replay_func_t *zvol_replay_vector[TX_MAX_TYPE] = {
437 zvol_replay_err, /* 0 no such transaction type */
438 zvol_replay_err, /* TX_CREATE */
439 zvol_replay_err, /* TX_MKDIR */
440 zvol_replay_err, /* TX_MKXATTR */
441 zvol_replay_err, /* TX_SYMLINK */
442 zvol_replay_err, /* TX_REMOVE */
443 zvol_replay_err, /* TX_RMDIR */
444 zvol_replay_err, /* TX_LINK */
445 zvol_replay_err, /* TX_RENAME */
446 zvol_replay_write, /* TX_WRITE */
447 zvol_replay_err, /* TX_TRUNCATE */
448 zvol_replay_err, /* TX_SETATTR */
449 zvol_replay_err, /* TX_ACL */
450 zvol_replay_err, /* TX_CREATE_ACL */
451 zvol_replay_err, /* TX_CREATE_ATTR */
452 zvol_replay_err, /* TX_CREATE_ACL_ATTR */
453 zvol_replay_err, /* TX_MKDIR_ACL */
454 zvol_replay_err, /* TX_MKDIR_ATTR */
455 zvol_replay_err, /* TX_MKDIR_ACL_ATTR */
456 zvol_replay_err, /* TX_WRITE2 */
457};
458
459#ifdef sun
460int
461zvol_name2minor(const char *name, minor_t *minor)
462{
463 zvol_state_t *zv;
464
465 mutex_enter(&spa_namespace_lock);
466 zv = zvol_minor_lookup(name);
467 if (minor && zv)
468 *minor = zv->zv_minor;
469 mutex_exit(&spa_namespace_lock);
470 return (zv ? 0 : -1);
471}
472#endif /* sun */
473
474/*
475 * Create a minor node (plus a whole lot more) for the specified volume.
476 */
477int
478zvol_create_minor(const char *name)
479{
480 zfs_soft_state_t *zs;
481 zvol_state_t *zv;
482 objset_t *os;
483 dmu_object_info_t doi;
484 uint64_t volsize;
485 int error;
486
487 ZFS_LOG(1, "Creating ZVOL %s...", name);
488
489 mutex_enter(&spa_namespace_lock);
490
491 if (zvol_minor_lookup(name) != NULL) {
492 mutex_exit(&spa_namespace_lock);
493 return (EEXIST);
494 }
495
496 /* lie and say we're read-only */
497 error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, FTAG, &os);
498
499 if (error) {
500 mutex_exit(&spa_namespace_lock);
501 return (error);
502 }
503
504#ifdef sun
505 if ((minor = zfsdev_minor_alloc()) == 0) {
506 dmu_objset_disown(os, FTAG);
507 mutex_exit(&spa_namespace_lock);
508 return (ENXIO);
509 }
510
511 if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS) {
512 dmu_objset_disown(os, FTAG);
513 mutex_exit(&spa_namespace_lock);
514 return (EAGAIN);
515 }
516 (void) ddi_prop_update_string(minor, zfs_dip, ZVOL_PROP_NAME,
517 (char *)name);
518
519 (void) snprintf(chrbuf, sizeof (chrbuf), "%u,raw", minor);
520
521 if (ddi_create_minor_node(zfs_dip, chrbuf, S_IFCHR,
522 minor, DDI_PSEUDO, 0) == DDI_FAILURE) {
523 ddi_soft_state_free(zfsdev_state, minor);
524 dmu_objset_disown(os, FTAG);
525 mutex_exit(&spa_namespace_lock);
526 return (EAGAIN);
527 }
528
529 (void) snprintf(blkbuf, sizeof (blkbuf), "%u", minor);
530
531 if (ddi_create_minor_node(zfs_dip, blkbuf, S_IFBLK,
532 minor, DDI_PSEUDO, 0) == DDI_FAILURE) {
533 ddi_remove_minor_node(zfs_dip, chrbuf);
534 ddi_soft_state_free(zfsdev_state, minor);
535 dmu_objset_disown(os, FTAG);
536 mutex_exit(&spa_namespace_lock);
537 return (EAGAIN);
538 }
539
540 zs = ddi_get_soft_state(zfsdev_state, minor);
541 zs->zss_type = ZSST_ZVOL;
542 zv = zs->zss_data = kmem_zalloc(sizeof (zvol_state_t), KM_SLEEP);
543#else /* !sun */
544
545 error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize);
546 if (error) {
547 ASSERT(error == 0);
548 dmu_objset_disown(os, zvol_tag);
549 mutex_exit(&spa_namespace_lock);
550 return (error);
551 }
552
553 DROP_GIANT();
554 g_topology_lock();
555 zv = zvol_geom_create(name);
556 zv->zv_volsize = volsize;
557 zv->zv_provider->mediasize = zv->zv_volsize;
558
559#endif /* !sun */
560
561 (void) strlcpy(zv->zv_name, name, MAXPATHLEN);
562 zv->zv_min_bs = DEV_BSHIFT;
563 zv->zv_objset = os;
564 if (dmu_objset_is_snapshot(os) || !spa_writeable(dmu_objset_spa(os)))
565 zv->zv_flags |= ZVOL_RDONLY;
566 mutex_init(&zv->zv_znode.z_range_lock, NULL, MUTEX_DEFAULT, NULL);
567 avl_create(&zv->zv_znode.z_range_avl, zfs_range_compare,
568 sizeof (rl_t), offsetof(rl_t, r_node));
569 list_create(&zv->zv_extents, sizeof (zvol_extent_t),
570 offsetof(zvol_extent_t, ze_node));
571 /* get and cache the blocksize */
572 error = dmu_object_info(os, ZVOL_OBJ, &doi);
573 ASSERT(error == 0);
574 zv->zv_volblocksize = doi.doi_data_block_size;
575
576 if (spa_writeable(dmu_objset_spa(os))) {
577 if (zil_replay_disable)
578 zil_destroy(dmu_objset_zil(os), B_FALSE);
579 else
580 zil_replay(os, zv, zvol_replay_vector);
581 }
582 dmu_objset_disown(os, FTAG);
583 zv->zv_objset = NULL;
584
585 zvol_minors++;
586
587 mutex_exit(&spa_namespace_lock);
588
589 zvol_geom_run(zv);
590
591 g_topology_unlock();
592 PICKUP_GIANT();
593
594 ZFS_LOG(1, "ZVOL %s created.", name);
595
596 return (0);
597}
598
599/*
600 * Remove minor node for the specified volume.
601 */
602static int
603zvol_remove_zv(zvol_state_t *zv)
604{
605#ifdef sun
606 minor_t minor = zv->zv_minor;
607#endif
608
609 ASSERT(MUTEX_HELD(&spa_namespace_lock));
610 if (zv->zv_total_opens != 0)
611 return (EBUSY);
612
613 ZFS_LOG(1, "ZVOL %s destroyed.", zv->zv_name);
614
615#ifdef sun
616 (void) snprintf(nmbuf, sizeof (nmbuf), "%u,raw", minor);
617 ddi_remove_minor_node(zfs_dip, nmbuf);
618#endif /* sun */
619
620 avl_destroy(&zv->zv_znode.z_range_avl);
621 mutex_destroy(&zv->zv_znode.z_range_lock);
622
623 zvol_geom_destroy(zv);
624
625 zvol_minors--;
626 return (0);
627}
628
629int
630zvol_remove_minor(const char *name)
631{
632 zvol_state_t *zv;
633 int rc;
634
635 mutex_enter(&spa_namespace_lock);
636 if ((zv = zvol_minor_lookup(name)) == NULL) {
637 mutex_exit(&spa_namespace_lock);
638 return (ENXIO);
639 }
640 g_topology_lock();
641 rc = zvol_remove_zv(zv);
642 g_topology_unlock();
643 mutex_exit(&spa_namespace_lock);
644 return (rc);
645}
646
647int
648zvol_first_open(zvol_state_t *zv)
649{
650 objset_t *os;
651 uint64_t volsize;
652 int error;
653 uint64_t readonly;
654
655 /* lie and say we're read-only */
656 error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, B_TRUE,
657 zvol_tag, &os);
658 if (error)
659 return (error);
660
661 error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize);
662 if (error) {
663 ASSERT(error == 0);
664 dmu_objset_disown(os, zvol_tag);
665 return (error);
666 }
667 zv->zv_objset = os;
668 error = dmu_bonus_hold(os, ZVOL_OBJ, zvol_tag, &zv->zv_dbuf);
669 if (error) {
670 dmu_objset_disown(os, zvol_tag);
671 return (error);
672 }
673 zv->zv_volsize = volsize;
674 zv->zv_zilog = zil_open(os, zvol_get_data);
675 zvol_size_changed(zv);
676
677 VERIFY(dsl_prop_get_integer(zv->zv_name, "readonly", &readonly,
678 NULL) == 0);
679 if (readonly || dmu_objset_is_snapshot(os) ||
680 !spa_writeable(dmu_objset_spa(os)))
681 zv->zv_flags |= ZVOL_RDONLY;
682 else
683 zv->zv_flags &= ~ZVOL_RDONLY;
684 return (error);
685}
686
687void
688zvol_last_close(zvol_state_t *zv)
689{
690 zil_close(zv->zv_zilog);
691 zv->zv_zilog = NULL;
692
692 dmu_buf_rele(zv->zv_dbuf, zvol_tag);
693 zv->zv_dbuf = NULL;
693 dmu_buf_rele(zv->zv_dbuf, zvol_tag);
694 zv->zv_dbuf = NULL;
695
696 /*
697 * Evict cached data
698 */
699 if (dsl_dataset_is_dirty(dmu_objset_ds(zv->zv_objset)) &&
700 !(zv->zv_flags & ZVOL_RDONLY))
701 txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0);
702 (void) dmu_objset_evict_dbufs(zv->zv_objset);
703
694 dmu_objset_disown(zv->zv_objset, zvol_tag);
695 zv->zv_objset = NULL;
696}
697
698#ifdef sun
699int
700zvol_prealloc(zvol_state_t *zv)
701{
702 objset_t *os = zv->zv_objset;
703 dmu_tx_t *tx;
704 uint64_t refd, avail, usedobjs, availobjs;
705 uint64_t resid = zv->zv_volsize;
706 uint64_t off = 0;
707
708 /* Check the space usage before attempting to allocate the space */
709 dmu_objset_space(os, &refd, &avail, &usedobjs, &availobjs);
710 if (avail < zv->zv_volsize)
711 return (ENOSPC);
712
713 /* Free old extents if they exist */
714 zvol_free_extents(zv);
715
716 while (resid != 0) {
717 int error;
718 uint64_t bytes = MIN(resid, SPA_MAXBLOCKSIZE);
719
720 tx = dmu_tx_create(os);
721 dmu_tx_hold_write(tx, ZVOL_OBJ, off, bytes);
722 error = dmu_tx_assign(tx, TXG_WAIT);
723 if (error) {
724 dmu_tx_abort(tx);
725 (void) dmu_free_long_range(os, ZVOL_OBJ, 0, off);
726 return (error);
727 }
728 dmu_prealloc(os, ZVOL_OBJ, off, bytes, tx);
729 dmu_tx_commit(tx);
730 off += bytes;
731 resid -= bytes;
732 }
733 txg_wait_synced(dmu_objset_pool(os), 0);
734
735 return (0);
736}
737#endif /* sun */
738
739int
740zvol_update_volsize(objset_t *os, uint64_t volsize)
741{
742 dmu_tx_t *tx;
743 int error;
744
745 ASSERT(MUTEX_HELD(&spa_namespace_lock));
746
747 tx = dmu_tx_create(os);
748 dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL);
749 error = dmu_tx_assign(tx, TXG_WAIT);
750 if (error) {
751 dmu_tx_abort(tx);
752 return (error);
753 }
754
755 error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1,
756 &volsize, tx);
757 dmu_tx_commit(tx);
758
759 if (error == 0)
760 error = dmu_free_long_range(os,
761 ZVOL_OBJ, volsize, DMU_OBJECT_END);
762 return (error);
763}
764
765void
766zvol_remove_minors(const char *name)
767{
768 struct g_geom *gp, *gptmp;
769 struct g_provider *pp;
770 zvol_state_t *zv;
771 size_t namelen;
772
773 namelen = strlen(name);
774
775 DROP_GIANT();
776 mutex_enter(&spa_namespace_lock);
777 g_topology_lock();
778
779 LIST_FOREACH_SAFE(gp, &zfs_zvol_class.geom, geom, gptmp) {
780 pp = LIST_FIRST(&gp->provider);
781 if (pp == NULL)
782 continue;
783 zv = pp->private;
784 if (zv == NULL)
785 continue;
786 if (strcmp(zv->zv_name, name) == 0 ||
787 (strncmp(zv->zv_name, name, namelen) == 0 &&
788 zv->zv_name[namelen] == '/')) {
789 (void) zvol_remove_zv(zv);
790 }
791 }
792
793 g_topology_unlock();
794 mutex_exit(&spa_namespace_lock);
795 PICKUP_GIANT();
796}
797
798int
799zvol_set_volsize(const char *name, major_t maj, uint64_t volsize)
800{
801 zvol_state_t *zv = NULL;
802 objset_t *os;
803 int error;
804 dmu_object_info_t doi;
805 uint64_t old_volsize = 0ULL;
806 uint64_t readonly;
807
808 mutex_enter(&spa_namespace_lock);
809 zv = zvol_minor_lookup(name);
810 if ((error = dmu_objset_hold(name, FTAG, &os)) != 0) {
811 mutex_exit(&spa_namespace_lock);
812 return (error);
813 }
814
815 if ((error = dmu_object_info(os, ZVOL_OBJ, &doi)) != 0 ||
816 (error = zvol_check_volsize(volsize,
817 doi.doi_data_block_size)) != 0)
818 goto out;
819
820 VERIFY(dsl_prop_get_integer(name, "readonly", &readonly,
821 NULL) == 0);
822 if (readonly) {
823 error = EROFS;
824 goto out;
825 }
826
827 error = zvol_update_volsize(os, volsize);
828 /*
829 * Reinitialize the dump area to the new size. If we
830 * failed to resize the dump area then restore it back to
831 * its original size.
832 */
833 if (zv && error == 0) {
834#ifdef ZVOL_DUMP
835 if (zv->zv_flags & ZVOL_DUMPIFIED) {
836 old_volsize = zv->zv_volsize;
837 zv->zv_volsize = volsize;
838 if ((error = zvol_dumpify(zv)) != 0 ||
839 (error = dumpvp_resize()) != 0) {
840 (void) zvol_update_volsize(os, old_volsize);
841 zv->zv_volsize = old_volsize;
842 error = zvol_dumpify(zv);
843 }
844 }
845#endif /* ZVOL_DUMP */
846 if (error == 0) {
847 zv->zv_volsize = volsize;
848 zvol_size_changed(zv);
849 }
850 }
851
852#ifdef sun
853 /*
854 * Generate a LUN expansion event.
855 */
856 if (zv && error == 0) {
857 sysevent_id_t eid;
858 nvlist_t *attr;
859 char *physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
860
861 (void) snprintf(physpath, MAXPATHLEN, "%s%u", ZVOL_PSEUDO_DEV,
862 zv->zv_minor);
863
864 VERIFY(nvlist_alloc(&attr, NV_UNIQUE_NAME, KM_SLEEP) == 0);
865 VERIFY(nvlist_add_string(attr, DEV_PHYS_PATH, physpath) == 0);
866
867 (void) ddi_log_sysevent(zfs_dip, SUNW_VENDOR, EC_DEV_STATUS,
868 ESC_DEV_DLE, attr, &eid, DDI_SLEEP);
869
870 nvlist_free(attr);
871 kmem_free(physpath, MAXPATHLEN);
872 }
873#endif /* sun */
874
875out:
876 dmu_objset_rele(os, FTAG);
877
878 mutex_exit(&spa_namespace_lock);
879
880 return (error);
881}
882
883/*ARGSUSED*/
884static int
885zvol_open(struct g_provider *pp, int flag, int count)
886{
887 zvol_state_t *zv;
888 int err = 0;
889 boolean_t locked = B_FALSE;
890
891 /*
892 * Protect against recursively entering spa_namespace_lock
893 * when spa_open() is used for a pool on a (local) ZVOL(s).
894 * This is needed since we replaced upstream zfsdev_state_lock
895 * with spa_namespace_lock in the ZVOL code.
896 * We are using the same trick as spa_open().
897 * Note that calls in zvol_first_open which need to resolve
898 * pool name to a spa object will enter spa_open()
899 * recursively, but that function already has all the
900 * necessary protection.
901 */
902 if (!MUTEX_HELD(&spa_namespace_lock)) {
903 mutex_enter(&spa_namespace_lock);
904 locked = B_TRUE;
905 }
906
907 zv = pp->private;
908 if (zv == NULL) {
909 if (locked)
910 mutex_exit(&spa_namespace_lock);
911 return (ENXIO);
912 }
913
914 if (zv->zv_total_opens == 0)
915 err = zvol_first_open(zv);
916 if (err) {
917 if (locked)
918 mutex_exit(&spa_namespace_lock);
919 return (err);
920 }
921 if ((flag & FWRITE) && (zv->zv_flags & ZVOL_RDONLY)) {
922 err = EROFS;
923 goto out;
924 }
925 if (zv->zv_flags & ZVOL_EXCL) {
926 err = EBUSY;
927 goto out;
928 }
929#ifdef FEXCL
930 if (flag & FEXCL) {
931 if (zv->zv_total_opens != 0) {
932 err = EBUSY;
933 goto out;
934 }
935 zv->zv_flags |= ZVOL_EXCL;
936 }
937#endif
938
939 zv->zv_total_opens += count;
940 if (locked)
941 mutex_exit(&spa_namespace_lock);
942
943 return (err);
944out:
945 if (zv->zv_total_opens == 0)
946 zvol_last_close(zv);
947 if (locked)
948 mutex_exit(&spa_namespace_lock);
949 return (err);
950}
951
952/*ARGSUSED*/
953static int
954zvol_close(struct g_provider *pp, int flag, int count)
955{
956 zvol_state_t *zv;
957 int error = 0;
958 boolean_t locked = B_FALSE;
959
960 /* See comment in zvol_open(). */
961 if (!MUTEX_HELD(&spa_namespace_lock)) {
962 mutex_enter(&spa_namespace_lock);
963 locked = B_TRUE;
964 }
965
966 zv = pp->private;
967 if (zv == NULL) {
968 if (locked)
969 mutex_exit(&spa_namespace_lock);
970 return (ENXIO);
971 }
972
973 if (zv->zv_flags & ZVOL_EXCL) {
974 ASSERT(zv->zv_total_opens == 1);
975 zv->zv_flags &= ~ZVOL_EXCL;
976 }
977
978 /*
979 * If the open count is zero, this is a spurious close.
980 * That indicates a bug in the kernel / DDI framework.
981 */
982 ASSERT(zv->zv_total_opens != 0);
983
984 /*
985 * You may get multiple opens, but only one close.
986 */
987 zv->zv_total_opens -= count;
988
989 if (zv->zv_total_opens == 0)
990 zvol_last_close(zv);
991
992 if (locked)
993 mutex_exit(&spa_namespace_lock);
994 return (error);
995}
996
997static void
998zvol_get_done(zgd_t *zgd, int error)
999{
1000 if (zgd->zgd_db)
1001 dmu_buf_rele(zgd->zgd_db, zgd);
1002
1003 zfs_range_unlock(zgd->zgd_rl);
1004
1005 if (error == 0 && zgd->zgd_bp)
1006 zil_add_block(zgd->zgd_zilog, zgd->zgd_bp);
1007
1008 kmem_free(zgd, sizeof (zgd_t));
1009}
1010
1011/*
1012 * Get data to generate a TX_WRITE intent log record.
1013 */
1014static int
1015zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
1016{
1017 zvol_state_t *zv = arg;
1018 objset_t *os = zv->zv_objset;
1019 uint64_t object = ZVOL_OBJ;
1020 uint64_t offset = lr->lr_offset;
1021 uint64_t size = lr->lr_length; /* length of user data */
1022 blkptr_t *bp = &lr->lr_blkptr;
1023 dmu_buf_t *db;
1024 zgd_t *zgd;
1025 int error;
1026
1027 ASSERT(zio != NULL);
1028 ASSERT(size != 0);
1029
1030 zgd = kmem_zalloc(sizeof (zgd_t), KM_SLEEP);
1031 zgd->zgd_zilog = zv->zv_zilog;
1032 zgd->zgd_rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_READER);
1033
1034 /*
1035 * Write records come in two flavors: immediate and indirect.
1036 * For small writes it's cheaper to store the data with the
1037 * log record (immediate); for large writes it's cheaper to
1038 * sync the data and get a pointer to it (indirect) so that
1039 * we don't have to write the data twice.
1040 */
1041 if (buf != NULL) { /* immediate write */
1042 error = dmu_read(os, object, offset, size, buf,
1043 DMU_READ_NO_PREFETCH);
1044 } else {
1045 size = zv->zv_volblocksize;
1046 offset = P2ALIGN(offset, size);
1047 error = dmu_buf_hold(os, object, offset, zgd, &db,
1048 DMU_READ_NO_PREFETCH);
1049 if (error == 0) {
1050 zgd->zgd_db = db;
1051 zgd->zgd_bp = bp;
1052
1053 ASSERT(db->db_offset == offset);
1054 ASSERT(db->db_size == size);
1055
1056 error = dmu_sync(zio, lr->lr_common.lrc_txg,
1057 zvol_get_done, zgd);
1058
1059 if (error == 0)
1060 return (0);
1061 }
1062 }
1063
1064 zvol_get_done(zgd, error);
1065
1066 return (error);
1067}
1068
1069/*
1070 * zvol_log_write() handles synchronous writes using TX_WRITE ZIL transactions.
1071 *
1072 * We store data in the log buffers if it's small enough.
1073 * Otherwise we will later flush the data out via dmu_sync().
1074 */
1075ssize_t zvol_immediate_write_sz = 32768;
1076
1077static void
1078zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t resid,
1079 boolean_t sync)
1080{
1081 uint32_t blocksize = zv->zv_volblocksize;
1082 zilog_t *zilog = zv->zv_zilog;
1083 boolean_t slogging;
1084 ssize_t immediate_write_sz;
1085
1086 if (zil_replaying(zilog, tx))
1087 return;
1088
1089 immediate_write_sz = (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
1090 ? 0 : zvol_immediate_write_sz;
1091
1092 slogging = spa_has_slogs(zilog->zl_spa) &&
1093 (zilog->zl_logbias == ZFS_LOGBIAS_LATENCY);
1094
1095 while (resid) {
1096 itx_t *itx;
1097 lr_write_t *lr;
1098 ssize_t len;
1099 itx_wr_state_t write_state;
1100
1101 /*
1102 * Unlike zfs_log_write() we can be called with
1103 * upto DMU_MAX_ACCESS/2 (5MB) writes.
1104 */
1105 if (blocksize > immediate_write_sz && !slogging &&
1106 resid >= blocksize && off % blocksize == 0) {
1107 write_state = WR_INDIRECT; /* uses dmu_sync */
1108 len = blocksize;
1109 } else if (sync) {
1110 write_state = WR_COPIED;
1111 len = MIN(ZIL_MAX_LOG_DATA, resid);
1112 } else {
1113 write_state = WR_NEED_COPY;
1114 len = MIN(ZIL_MAX_LOG_DATA, resid);
1115 }
1116
1117 itx = zil_itx_create(TX_WRITE, sizeof (*lr) +
1118 (write_state == WR_COPIED ? len : 0));
1119 lr = (lr_write_t *)&itx->itx_lr;
1120 if (write_state == WR_COPIED && dmu_read(zv->zv_objset,
1121 ZVOL_OBJ, off, len, lr + 1, DMU_READ_NO_PREFETCH) != 0) {
1122 zil_itx_destroy(itx);
1123 itx = zil_itx_create(TX_WRITE, sizeof (*lr));
1124 lr = (lr_write_t *)&itx->itx_lr;
1125 write_state = WR_NEED_COPY;
1126 }
1127
1128 itx->itx_wr_state = write_state;
1129 if (write_state == WR_NEED_COPY)
1130 itx->itx_sod += len;
1131 lr->lr_foid = ZVOL_OBJ;
1132 lr->lr_offset = off;
1133 lr->lr_length = len;
1134 lr->lr_blkoff = 0;
1135 BP_ZERO(&lr->lr_blkptr);
1136
1137 itx->itx_private = zv;
1138 itx->itx_sync = sync;
1139
1140 zil_itx_assign(zilog, itx, tx);
1141
1142 off += len;
1143 resid -= len;
1144 }
1145}
1146
1147#ifdef sun
1148static int
1149zvol_dumpio_vdev(vdev_t *vd, void *addr, uint64_t offset, uint64_t size,
1150 boolean_t doread, boolean_t isdump)
1151{
1152 vdev_disk_t *dvd;
1153 int c;
1154 int numerrors = 0;
1155
1156 for (c = 0; c < vd->vdev_children; c++) {
1157 ASSERT(vd->vdev_ops == &vdev_mirror_ops ||
1158 vd->vdev_ops == &vdev_replacing_ops ||
1159 vd->vdev_ops == &vdev_spare_ops);
1160 int err = zvol_dumpio_vdev(vd->vdev_child[c],
1161 addr, offset, size, doread, isdump);
1162 if (err != 0) {
1163 numerrors++;
1164 } else if (doread) {
1165 break;
1166 }
1167 }
1168
1169 if (!vd->vdev_ops->vdev_op_leaf)
1170 return (numerrors < vd->vdev_children ? 0 : EIO);
1171
1172 if (doread && !vdev_readable(vd))
1173 return (EIO);
1174 else if (!doread && !vdev_writeable(vd))
1175 return (EIO);
1176
1177 dvd = vd->vdev_tsd;
1178 ASSERT3P(dvd, !=, NULL);
1179 offset += VDEV_LABEL_START_SIZE;
1180
1181 if (ddi_in_panic() || isdump) {
1182 ASSERT(!doread);
1183 if (doread)
1184 return (EIO);
1185 return (ldi_dump(dvd->vd_lh, addr, lbtodb(offset),
1186 lbtodb(size)));
1187 } else {
1188 return (vdev_disk_physio(dvd->vd_lh, addr, size, offset,
1189 doread ? B_READ : B_WRITE));
1190 }
1191}
1192
1193static int
1194zvol_dumpio(zvol_state_t *zv, void *addr, uint64_t offset, uint64_t size,
1195 boolean_t doread, boolean_t isdump)
1196{
1197 vdev_t *vd;
1198 int error;
1199 zvol_extent_t *ze;
1200 spa_t *spa = dmu_objset_spa(zv->zv_objset);
1201
1202 /* Must be sector aligned, and not stradle a block boundary. */
1203 if (P2PHASE(offset, DEV_BSIZE) || P2PHASE(size, DEV_BSIZE) ||
1204 P2BOUNDARY(offset, size, zv->zv_volblocksize)) {
1205 return (EINVAL);
1206 }
1207 ASSERT(size <= zv->zv_volblocksize);
1208
1209 /* Locate the extent this belongs to */
1210 ze = list_head(&zv->zv_extents);
1211 while (offset >= ze->ze_nblks * zv->zv_volblocksize) {
1212 offset -= ze->ze_nblks * zv->zv_volblocksize;
1213 ze = list_next(&zv->zv_extents, ze);
1214 }
1215
1216 if (!ddi_in_panic())
1217 spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
1218
1219 vd = vdev_lookup_top(spa, DVA_GET_VDEV(&ze->ze_dva));
1220 offset += DVA_GET_OFFSET(&ze->ze_dva);
1221 error = zvol_dumpio_vdev(vd, addr, offset, size, doread, isdump);
1222
1223 if (!ddi_in_panic())
1224 spa_config_exit(spa, SCL_STATE, FTAG);
1225
1226 return (error);
1227}
1228#endif /* sun */
1229
1230int
1231zvol_strategy(struct bio *bp)
1232{
1233 zvol_state_t *zv = bp->bio_to->private;
1234 uint64_t off, volsize;
1235 size_t resid;
1236 char *addr;
1237 objset_t *os;
1238 rl_t *rl;
1239 int error = 0;
1240 boolean_t doread = (bp->bio_cmd == BIO_READ);
1241 boolean_t sync;
1242
1243 if (zv == NULL) {
1244 g_io_deliver(bp, ENXIO);
1245 return (0);
1246 }
1247
1248 if (bp->bio_cmd != BIO_READ && (zv->zv_flags & ZVOL_RDONLY)) {
1249 g_io_deliver(bp, EROFS);
1250 return (0);
1251 }
1252
1253 off = bp->bio_offset;
1254 volsize = zv->zv_volsize;
1255
1256 os = zv->zv_objset;
1257 ASSERT(os != NULL);
1258
1259 addr = bp->bio_data;
1260 resid = bp->bio_length;
1261
1262 if (resid > 0 && (off < 0 || off >= volsize)) {
1263 g_io_deliver(bp, EIO);
1264 return (0);
1265 }
1266
1267 sync = !doread && zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
1268
1269 /*
1270 * There must be no buffer changes when doing a dmu_sync() because
1271 * we can't change the data whilst calculating the checksum.
1272 */
1273 rl = zfs_range_lock(&zv->zv_znode, off, resid,
1274 doread ? RL_READER : RL_WRITER);
1275
1276 while (resid != 0 && off < volsize) {
1277 size_t size = MIN(resid, zvol_maxphys);
1278 if (doread) {
1279 error = dmu_read(os, ZVOL_OBJ, off, size, addr,
1280 DMU_READ_PREFETCH);
1281 } else {
1282 dmu_tx_t *tx = dmu_tx_create(os);
1283 dmu_tx_hold_write(tx, ZVOL_OBJ, off, size);
1284 error = dmu_tx_assign(tx, TXG_WAIT);
1285 if (error) {
1286 dmu_tx_abort(tx);
1287 } else {
1288 dmu_write(os, ZVOL_OBJ, off, size, addr, tx);
1289 zvol_log_write(zv, tx, off, size, sync);
1290 dmu_tx_commit(tx);
1291 }
1292 }
1293 if (error) {
1294 /* convert checksum errors into IO errors */
1295 if (error == ECKSUM)
1296 error = EIO;
1297 break;
1298 }
1299 off += size;
1300 addr += size;
1301 resid -= size;
1302 }
1303 zfs_range_unlock(rl);
1304
1305 bp->bio_completed = bp->bio_length - resid;
1306 if (bp->bio_completed < bp->bio_length)
1307 bp->bio_error = (off > volsize ? EINVAL : error);
1308
1309 if (sync)
1310 zil_commit(zv->zv_zilog, ZVOL_OBJ);
1311 g_io_deliver(bp, 0);
1312
1313 return (0);
1314}
1315
1316#ifdef sun
1317/*
1318 * Set the buffer count to the zvol maximum transfer.
1319 * Using our own routine instead of the default minphys()
1320 * means that for larger writes we write bigger buffers on X86
1321 * (128K instead of 56K) and flush the disk write cache less often
1322 * (every zvol_maxphys - currently 1MB) instead of minphys (currently
1323 * 56K on X86 and 128K on sparc).
1324 */
1325void
1326zvol_minphys(struct buf *bp)
1327{
1328 if (bp->b_bcount > zvol_maxphys)
1329 bp->b_bcount = zvol_maxphys;
1330}
1331
1332int
1333zvol_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblocks)
1334{
1335 minor_t minor = getminor(dev);
1336 zvol_state_t *zv;
1337 int error = 0;
1338 uint64_t size;
1339 uint64_t boff;
1340 uint64_t resid;
1341
1342 zv = zfsdev_get_soft_state(minor, ZSST_ZVOL);
1343 if (zv == NULL)
1344 return (ENXIO);
1345
1346 boff = ldbtob(blkno);
1347 resid = ldbtob(nblocks);
1348
1349 VERIFY3U(boff + resid, <=, zv->zv_volsize);
1350
1351 while (resid) {
1352 size = MIN(resid, P2END(boff, zv->zv_volblocksize) - boff);
1353 error = zvol_dumpio(zv, addr, boff, size, B_FALSE, B_TRUE);
1354 if (error)
1355 break;
1356 boff += size;
1357 addr += size;
1358 resid -= size;
1359 }
1360
1361 return (error);
1362}
1363
1364/*ARGSUSED*/
1365int
1366zvol_read(dev_t dev, uio_t *uio, cred_t *cr)
1367{
1368 minor_t minor = getminor(dev);
1369 zvol_state_t *zv;
1370 uint64_t volsize;
1371 rl_t *rl;
1372 int error = 0;
1373
1374 zv = zfsdev_get_soft_state(minor, ZSST_ZVOL);
1375 if (zv == NULL)
1376 return (ENXIO);
1377
1378 volsize = zv->zv_volsize;
1379 if (uio->uio_resid > 0 &&
1380 (uio->uio_loffset < 0 || uio->uio_loffset >= volsize))
1381 return (EIO);
1382
1383 if (zv->zv_flags & ZVOL_DUMPIFIED) {
1384 error = physio(zvol_strategy, NULL, dev, B_READ,
1385 zvol_minphys, uio);
1386 return (error);
1387 }
1388
1389 rl = zfs_range_lock(&zv->zv_znode, uio->uio_loffset, uio->uio_resid,
1390 RL_READER);
1391 while (uio->uio_resid > 0 && uio->uio_loffset < volsize) {
1392 uint64_t bytes = MIN(uio->uio_resid, DMU_MAX_ACCESS >> 1);
1393
1394 /* don't read past the end */
1395 if (bytes > volsize - uio->uio_loffset)
1396 bytes = volsize - uio->uio_loffset;
1397
1398 error = dmu_read_uio(zv->zv_objset, ZVOL_OBJ, uio, bytes);
1399 if (error) {
1400 /* convert checksum errors into IO errors */
1401 if (error == ECKSUM)
1402 error = EIO;
1403 break;
1404 }
1405 }
1406 zfs_range_unlock(rl);
1407 return (error);
1408}
1409
1410/*ARGSUSED*/
1411int
1412zvol_write(dev_t dev, uio_t *uio, cred_t *cr)
1413{
1414 minor_t minor = getminor(dev);
1415 zvol_state_t *zv;
1416 uint64_t volsize;
1417 rl_t *rl;
1418 int error = 0;
1419 boolean_t sync;
1420
1421 zv = zfsdev_get_soft_state(minor, ZSST_ZVOL);
1422 if (zv == NULL)
1423 return (ENXIO);
1424
1425 volsize = zv->zv_volsize;
1426 if (uio->uio_resid > 0 &&
1427 (uio->uio_loffset < 0 || uio->uio_loffset >= volsize))
1428 return (EIO);
1429
1430 if (zv->zv_flags & ZVOL_DUMPIFIED) {
1431 error = physio(zvol_strategy, NULL, dev, B_WRITE,
1432 zvol_minphys, uio);
1433 return (error);
1434 }
1435
1436 sync = !(zv->zv_flags & ZVOL_WCE) ||
1437 (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS);
1438
1439 rl = zfs_range_lock(&zv->zv_znode, uio->uio_loffset, uio->uio_resid,
1440 RL_WRITER);
1441 while (uio->uio_resid > 0 && uio->uio_loffset < volsize) {
1442 uint64_t bytes = MIN(uio->uio_resid, DMU_MAX_ACCESS >> 1);
1443 uint64_t off = uio->uio_loffset;
1444 dmu_tx_t *tx = dmu_tx_create(zv->zv_objset);
1445
1446 if (bytes > volsize - off) /* don't write past the end */
1447 bytes = volsize - off;
1448
1449 dmu_tx_hold_write(tx, ZVOL_OBJ, off, bytes);
1450 error = dmu_tx_assign(tx, TXG_WAIT);
1451 if (error) {
1452 dmu_tx_abort(tx);
1453 break;
1454 }
1455 error = dmu_write_uio_dbuf(zv->zv_dbuf, uio, bytes, tx);
1456 if (error == 0)
1457 zvol_log_write(zv, tx, off, bytes, sync);
1458 dmu_tx_commit(tx);
1459
1460 if (error)
1461 break;
1462 }
1463 zfs_range_unlock(rl);
1464 if (sync)
1465 zil_commit(zv->zv_zilog, ZVOL_OBJ);
1466 return (error);
1467}
1468
1469int
1470zvol_getefi(void *arg, int flag, uint64_t vs, uint8_t bs)
1471{
1472 struct uuid uuid = EFI_RESERVED;
1473 efi_gpe_t gpe = { 0 };
1474 uint32_t crc;
1475 dk_efi_t efi;
1476 int length;
1477 char *ptr;
1478
1479 if (ddi_copyin(arg, &efi, sizeof (dk_efi_t), flag))
1480 return (EFAULT);
1481 ptr = (char *)(uintptr_t)efi.dki_data_64;
1482 length = efi.dki_length;
1483 /*
1484 * Some clients may attempt to request a PMBR for the
1485 * zvol. Currently this interface will return EINVAL to
1486 * such requests. These requests could be supported by
1487 * adding a check for lba == 0 and consing up an appropriate
1488 * PMBR.
1489 */
1490 if (efi.dki_lba < 1 || efi.dki_lba > 2 || length <= 0)
1491 return (EINVAL);
1492
1493 gpe.efi_gpe_StartingLBA = LE_64(34ULL);
1494 gpe.efi_gpe_EndingLBA = LE_64((vs >> bs) - 1);
1495 UUID_LE_CONVERT(gpe.efi_gpe_PartitionTypeGUID, uuid);
1496
1497 if (efi.dki_lba == 1) {
1498 efi_gpt_t gpt = { 0 };
1499
1500 gpt.efi_gpt_Signature = LE_64(EFI_SIGNATURE);
1501 gpt.efi_gpt_Revision = LE_32(EFI_VERSION_CURRENT);
1502 gpt.efi_gpt_HeaderSize = LE_32(sizeof (gpt));
1503 gpt.efi_gpt_MyLBA = LE_64(1ULL);
1504 gpt.efi_gpt_FirstUsableLBA = LE_64(34ULL);
1505 gpt.efi_gpt_LastUsableLBA = LE_64((vs >> bs) - 1);
1506 gpt.efi_gpt_PartitionEntryLBA = LE_64(2ULL);
1507 gpt.efi_gpt_NumberOfPartitionEntries = LE_32(1);
1508 gpt.efi_gpt_SizeOfPartitionEntry =
1509 LE_32(sizeof (efi_gpe_t));
1510 CRC32(crc, &gpe, sizeof (gpe), -1U, crc32_table);
1511 gpt.efi_gpt_PartitionEntryArrayCRC32 = LE_32(~crc);
1512 CRC32(crc, &gpt, sizeof (gpt), -1U, crc32_table);
1513 gpt.efi_gpt_HeaderCRC32 = LE_32(~crc);
1514 if (ddi_copyout(&gpt, ptr, MIN(sizeof (gpt), length),
1515 flag))
1516 return (EFAULT);
1517 ptr += sizeof (gpt);
1518 length -= sizeof (gpt);
1519 }
1520 if (length > 0 && ddi_copyout(&gpe, ptr, MIN(sizeof (gpe),
1521 length), flag))
1522 return (EFAULT);
1523 return (0);
1524}
1525
1526/*
1527 * BEGIN entry points to allow external callers access to the volume.
1528 */
1529/*
1530 * Return the volume parameters needed for access from an external caller.
1531 * These values are invariant as long as the volume is held open.
1532 */
1533int
1534zvol_get_volume_params(minor_t minor, uint64_t *blksize,
1535 uint64_t *max_xfer_len, void **minor_hdl, void **objset_hdl, void **zil_hdl,
1536 void **rl_hdl, void **bonus_hdl)
1537{
1538 zvol_state_t *zv;
1539
1540 zv = zfsdev_get_soft_state(minor, ZSST_ZVOL);
1541 if (zv == NULL)
1542 return (ENXIO);
1543 if (zv->zv_flags & ZVOL_DUMPIFIED)
1544 return (ENXIO);
1545
1546 ASSERT(blksize && max_xfer_len && minor_hdl &&
1547 objset_hdl && zil_hdl && rl_hdl && bonus_hdl);
1548
1549 *blksize = zv->zv_volblocksize;
1550 *max_xfer_len = (uint64_t)zvol_maxphys;
1551 *minor_hdl = zv;
1552 *objset_hdl = zv->zv_objset;
1553 *zil_hdl = zv->zv_zilog;
1554 *rl_hdl = &zv->zv_znode;
1555 *bonus_hdl = zv->zv_dbuf;
1556 return (0);
1557}
1558
1559/*
1560 * Return the current volume size to an external caller.
1561 * The size can change while the volume is open.
1562 */
1563uint64_t
1564zvol_get_volume_size(void *minor_hdl)
1565{
1566 zvol_state_t *zv = minor_hdl;
1567
1568 return (zv->zv_volsize);
1569}
1570
1571/*
1572 * Return the current WCE setting to an external caller.
1573 * The WCE setting can change while the volume is open.
1574 */
1575int
1576zvol_get_volume_wce(void *minor_hdl)
1577{
1578 zvol_state_t *zv = minor_hdl;
1579
1580 return ((zv->zv_flags & ZVOL_WCE) ? 1 : 0);
1581}
1582
1583/*
1584 * Entry point for external callers to zvol_log_write
1585 */
1586void
1587zvol_log_write_minor(void *minor_hdl, dmu_tx_t *tx, offset_t off, ssize_t resid,
1588 boolean_t sync)
1589{
1590 zvol_state_t *zv = minor_hdl;
1591
1592 zvol_log_write(zv, tx, off, resid, sync);
1593}
1594/*
1595 * END entry points to allow external callers access to the volume.
1596 */
1597
1598/*
1599 * Dirtbag ioctls to support mkfs(1M) for UFS filesystems. See dkio(7I).
1600 */
1601/*ARGSUSED*/
1602int
1603zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp)
1604{
1605 zvol_state_t *zv;
1606 struct dk_cinfo dki;
1607 struct dk_minfo dkm;
1608 struct dk_callback *dkc;
1609 int error = 0;
1610 rl_t *rl;
1611
1612 mutex_enter(&spa_namespace_lock);
1613
1614 zv = zfsdev_get_soft_state(getminor(dev), ZSST_ZVOL);
1615
1616 if (zv == NULL) {
1617 mutex_exit(&spa_namespace_lock);
1618 return (ENXIO);
1619 }
1620 ASSERT(zv->zv_total_opens > 0);
1621
1622 switch (cmd) {
1623
1624 case DKIOCINFO:
1625 bzero(&dki, sizeof (dki));
1626 (void) strcpy(dki.dki_cname, "zvol");
1627 (void) strcpy(dki.dki_dname, "zvol");
1628 dki.dki_ctype = DKC_UNKNOWN;
1629 dki.dki_unit = getminor(dev);
1630 dki.dki_maxtransfer = 1 << (SPA_MAXBLOCKSHIFT - zv->zv_min_bs);
1631 mutex_exit(&spa_namespace_lock);
1632 if (ddi_copyout(&dki, (void *)arg, sizeof (dki), flag))
1633 error = EFAULT;
1634 return (error);
1635
1636 case DKIOCGMEDIAINFO:
1637 bzero(&dkm, sizeof (dkm));
1638 dkm.dki_lbsize = 1U << zv->zv_min_bs;
1639 dkm.dki_capacity = zv->zv_volsize >> zv->zv_min_bs;
1640 dkm.dki_media_type = DK_UNKNOWN;
1641 mutex_exit(&spa_namespace_lock);
1642 if (ddi_copyout(&dkm, (void *)arg, sizeof (dkm), flag))
1643 error = EFAULT;
1644 return (error);
1645
1646 case DKIOCGETEFI:
1647 {
1648 uint64_t vs = zv->zv_volsize;
1649 uint8_t bs = zv->zv_min_bs;
1650
1651 mutex_exit(&spa_namespace_lock);
1652 error = zvol_getefi((void *)arg, flag, vs, bs);
1653 return (error);
1654 }
1655
1656 case DKIOCFLUSHWRITECACHE:
1657 dkc = (struct dk_callback *)arg;
1658 mutex_exit(&spa_namespace_lock);
1659 zil_commit(zv->zv_zilog, ZVOL_OBJ);
1660 if ((flag & FKIOCTL) && dkc != NULL && dkc->dkc_callback) {
1661 (*dkc->dkc_callback)(dkc->dkc_cookie, error);
1662 error = 0;
1663 }
1664 return (error);
1665
1666 case DKIOCGETWCE:
1667 {
1668 int wce = (zv->zv_flags & ZVOL_WCE) ? 1 : 0;
1669 if (ddi_copyout(&wce, (void *)arg, sizeof (int),
1670 flag))
1671 error = EFAULT;
1672 break;
1673 }
1674 case DKIOCSETWCE:
1675 {
1676 int wce;
1677 if (ddi_copyin((void *)arg, &wce, sizeof (int),
1678 flag)) {
1679 error = EFAULT;
1680 break;
1681 }
1682 if (wce) {
1683 zv->zv_flags |= ZVOL_WCE;
1684 mutex_exit(&spa_namespace_lock);
1685 } else {
1686 zv->zv_flags &= ~ZVOL_WCE;
1687 mutex_exit(&spa_namespace_lock);
1688 zil_commit(zv->zv_zilog, ZVOL_OBJ);
1689 }
1690 return (0);
1691 }
1692
1693 case DKIOCGGEOM:
1694 case DKIOCGVTOC:
1695 /*
1696 * commands using these (like prtvtoc) expect ENOTSUP
1697 * since we're emulating an EFI label
1698 */
1699 error = ENOTSUP;
1700 break;
1701
1702 case DKIOCDUMPINIT:
1703 rl = zfs_range_lock(&zv->zv_znode, 0, zv->zv_volsize,
1704 RL_WRITER);
1705 error = zvol_dumpify(zv);
1706 zfs_range_unlock(rl);
1707 break;
1708
1709 case DKIOCDUMPFINI:
1710 if (!(zv->zv_flags & ZVOL_DUMPIFIED))
1711 break;
1712 rl = zfs_range_lock(&zv->zv_znode, 0, zv->zv_volsize,
1713 RL_WRITER);
1714 error = zvol_dump_fini(zv);
1715 zfs_range_unlock(rl);
1716 break;
1717
1718 default:
1719 error = ENOTTY;
1720 break;
1721
1722 }
1723 mutex_exit(&spa_namespace_lock);
1724 return (error);
1725}
1726#endif /* sun */
1727
1728int
1729zvol_busy(void)
1730{
1731 return (zvol_minors != 0);
1732}
1733
1734void
1735zvol_init(void)
1736{
1737 VERIFY(ddi_soft_state_init(&zfsdev_state, sizeof (zfs_soft_state_t),
1738 1) == 0);
1739 ZFS_LOG(1, "ZVOL Initialized.");
1740}
1741
1742void
1743zvol_fini(void)
1744{
1745 ddi_soft_state_fini(&zfsdev_state);
1746 ZFS_LOG(1, "ZVOL Deinitialized.");
1747}
1748
1749#ifdef sun
1750static int
1751zvol_dump_init(zvol_state_t *zv, boolean_t resize)
1752{
1753 dmu_tx_t *tx;
1754 int error = 0;
1755 objset_t *os = zv->zv_objset;
1756 nvlist_t *nv = NULL;
1757 uint64_t version = spa_version(dmu_objset_spa(zv->zv_objset));
1758
1759 ASSERT(MUTEX_HELD(&spa_namespace_lock));
1760 error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, 0,
1761 DMU_OBJECT_END);
1762 /* wait for dmu_free_long_range to actually free the blocks */
1763 txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0);
1764
1765 tx = dmu_tx_create(os);
1766 dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL);
1767 dmu_tx_hold_bonus(tx, ZVOL_OBJ);
1768 error = dmu_tx_assign(tx, TXG_WAIT);
1769 if (error) {
1770 dmu_tx_abort(tx);
1771 return (error);
1772 }
1773
1774 /*
1775 * If we are resizing the dump device then we only need to
1776 * update the refreservation to match the newly updated
1777 * zvolsize. Otherwise, we save off the original state of the
1778 * zvol so that we can restore them if the zvol is ever undumpified.
1779 */
1780 if (resize) {
1781 error = zap_update(os, ZVOL_ZAP_OBJ,
1782 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1,
1783 &zv->zv_volsize, tx);
1784 } else {
1785 uint64_t checksum, compress, refresrv, vbs, dedup;
1786
1787 error = dsl_prop_get_integer(zv->zv_name,
1788 zfs_prop_to_name(ZFS_PROP_COMPRESSION), &compress, NULL);
1789 error = error ? error : dsl_prop_get_integer(zv->zv_name,
1790 zfs_prop_to_name(ZFS_PROP_CHECKSUM), &checksum, NULL);
1791 error = error ? error : dsl_prop_get_integer(zv->zv_name,
1792 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), &refresrv, NULL);
1793 error = error ? error : dsl_prop_get_integer(zv->zv_name,
1794 zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &vbs, NULL);
1795 if (version >= SPA_VERSION_DEDUP) {
1796 error = error ? error :
1797 dsl_prop_get_integer(zv->zv_name,
1798 zfs_prop_to_name(ZFS_PROP_DEDUP), &dedup, NULL);
1799 }
1800
1801 error = error ? error : zap_update(os, ZVOL_ZAP_OBJ,
1802 zfs_prop_to_name(ZFS_PROP_COMPRESSION), 8, 1,
1803 &compress, tx);
1804 error = error ? error : zap_update(os, ZVOL_ZAP_OBJ,
1805 zfs_prop_to_name(ZFS_PROP_CHECKSUM), 8, 1, &checksum, tx);
1806 error = error ? error : zap_update(os, ZVOL_ZAP_OBJ,
1807 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1,
1808 &refresrv, tx);
1809 error = error ? error : zap_update(os, ZVOL_ZAP_OBJ,
1810 zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), 8, 1,
1811 &vbs, tx);
1812 error = error ? error : dmu_object_set_blocksize(
1813 os, ZVOL_OBJ, SPA_MAXBLOCKSIZE, 0, tx);
1814 if (version >= SPA_VERSION_DEDUP) {
1815 error = error ? error : zap_update(os, ZVOL_ZAP_OBJ,
1816 zfs_prop_to_name(ZFS_PROP_DEDUP), 8, 1,
1817 &dedup, tx);
1818 }
1819 if (error == 0)
1820 zv->zv_volblocksize = SPA_MAXBLOCKSIZE;
1821 }
1822 dmu_tx_commit(tx);
1823
1824 /*
1825 * We only need update the zvol's property if we are initializing
1826 * the dump area for the first time.
1827 */
1828 if (!resize) {
1829 VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1830 VERIFY(nvlist_add_uint64(nv,
1831 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 0) == 0);
1832 VERIFY(nvlist_add_uint64(nv,
1833 zfs_prop_to_name(ZFS_PROP_COMPRESSION),
1834 ZIO_COMPRESS_OFF) == 0);
1835 VERIFY(nvlist_add_uint64(nv,
1836 zfs_prop_to_name(ZFS_PROP_CHECKSUM),
1837 ZIO_CHECKSUM_OFF) == 0);
1838 if (version >= SPA_VERSION_DEDUP) {
1839 VERIFY(nvlist_add_uint64(nv,
1840 zfs_prop_to_name(ZFS_PROP_DEDUP),
1841 ZIO_CHECKSUM_OFF) == 0);
1842 }
1843
1844 error = zfs_set_prop_nvlist(zv->zv_name, ZPROP_SRC_LOCAL,
1845 nv, NULL);
1846 nvlist_free(nv);
1847
1848 if (error)
1849 return (error);
1850 }
1851
1852 /* Allocate the space for the dump */
1853 error = zvol_prealloc(zv);
1854 return (error);
1855}
1856
1857static int
1858zvol_dumpify(zvol_state_t *zv)
1859{
1860 int error = 0;
1861 uint64_t dumpsize = 0;
1862 dmu_tx_t *tx;
1863 objset_t *os = zv->zv_objset;
1864
1865 if (zv->zv_flags & ZVOL_RDONLY)
1866 return (EROFS);
1867
1868 if (zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE,
1869 8, 1, &dumpsize) != 0 || dumpsize != zv->zv_volsize) {
1870 boolean_t resize = (dumpsize > 0) ? B_TRUE : B_FALSE;
1871
1872 if ((error = zvol_dump_init(zv, resize)) != 0) {
1873 (void) zvol_dump_fini(zv);
1874 return (error);
1875 }
1876 }
1877
1878 /*
1879 * Build up our lba mapping.
1880 */
1881 error = zvol_get_lbas(zv);
1882 if (error) {
1883 (void) zvol_dump_fini(zv);
1884 return (error);
1885 }
1886
1887 tx = dmu_tx_create(os);
1888 dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL);
1889 error = dmu_tx_assign(tx, TXG_WAIT);
1890 if (error) {
1891 dmu_tx_abort(tx);
1892 (void) zvol_dump_fini(zv);
1893 return (error);
1894 }
1895
1896 zv->zv_flags |= ZVOL_DUMPIFIED;
1897 error = zap_update(os, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, 8, 1,
1898 &zv->zv_volsize, tx);
1899 dmu_tx_commit(tx);
1900
1901 if (error) {
1902 (void) zvol_dump_fini(zv);
1903 return (error);
1904 }
1905
1906 txg_wait_synced(dmu_objset_pool(os), 0);
1907 return (0);
1908}
1909
1910static int
1911zvol_dump_fini(zvol_state_t *zv)
1912{
1913 dmu_tx_t *tx;
1914 objset_t *os = zv->zv_objset;
1915 nvlist_t *nv;
1916 int error = 0;
1917 uint64_t checksum, compress, refresrv, vbs, dedup;
1918 uint64_t version = spa_version(dmu_objset_spa(zv->zv_objset));
1919
1920 /*
1921 * Attempt to restore the zvol back to its pre-dumpified state.
1922 * This is a best-effort attempt as it's possible that not all
1923 * of these properties were initialized during the dumpify process
1924 * (i.e. error during zvol_dump_init).
1925 */
1926
1927 tx = dmu_tx_create(os);
1928 dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL);
1929 error = dmu_tx_assign(tx, TXG_WAIT);
1930 if (error) {
1931 dmu_tx_abort(tx);
1932 return (error);
1933 }
1934 (void) zap_remove(os, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, tx);
1935 dmu_tx_commit(tx);
1936
1937 (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ,
1938 zfs_prop_to_name(ZFS_PROP_CHECKSUM), 8, 1, &checksum);
1939 (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ,
1940 zfs_prop_to_name(ZFS_PROP_COMPRESSION), 8, 1, &compress);
1941 (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ,
1942 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1, &refresrv);
1943 (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ,
1944 zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), 8, 1, &vbs);
1945
1946 VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1947 (void) nvlist_add_uint64(nv,
1948 zfs_prop_to_name(ZFS_PROP_CHECKSUM), checksum);
1949 (void) nvlist_add_uint64(nv,
1950 zfs_prop_to_name(ZFS_PROP_COMPRESSION), compress);
1951 (void) nvlist_add_uint64(nv,
1952 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), refresrv);
1953 if (version >= SPA_VERSION_DEDUP &&
1954 zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ,
1955 zfs_prop_to_name(ZFS_PROP_DEDUP), 8, 1, &dedup) == 0) {
1956 (void) nvlist_add_uint64(nv,
1957 zfs_prop_to_name(ZFS_PROP_DEDUP), dedup);
1958 }
1959 (void) zfs_set_prop_nvlist(zv->zv_name, ZPROP_SRC_LOCAL,
1960 nv, NULL);
1961 nvlist_free(nv);
1962
1963 zvol_free_extents(zv);
1964 zv->zv_flags &= ~ZVOL_DUMPIFIED;
1965 (void) dmu_free_long_range(os, ZVOL_OBJ, 0, DMU_OBJECT_END);
1966 /* wait for dmu_free_long_range to actually free the blocks */
1967 txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0);
1968 tx = dmu_tx_create(os);
1969 dmu_tx_hold_bonus(tx, ZVOL_OBJ);
1970 error = dmu_tx_assign(tx, TXG_WAIT);
1971 if (error) {
1972 dmu_tx_abort(tx);
1973 return (error);
1974 }
1975 if (dmu_object_set_blocksize(os, ZVOL_OBJ, vbs, 0, tx) == 0)
1976 zv->zv_volblocksize = vbs;
1977 dmu_tx_commit(tx);
1978
1979 return (0);
1980}
1981#endif /* sun */
1982
1983static zvol_state_t *
1984zvol_geom_create(const char *name)
1985{
1986 struct g_provider *pp;
1987 struct g_geom *gp;
1988 zvol_state_t *zv;
1989
1990 gp = g_new_geomf(&zfs_zvol_class, "zfs::zvol::%s", name);
1991 gp->start = zvol_geom_start;
1992 gp->access = zvol_geom_access;
1993 pp = g_new_providerf(gp, "%s/%s", ZVOL_DRIVER, name);
1994 pp->sectorsize = DEV_BSIZE;
1995
1996 zv = kmem_zalloc(sizeof(*zv), KM_SLEEP);
1997 zv->zv_provider = pp;
1998 zv->zv_state = 0;
1999 bioq_init(&zv->zv_queue);
2000 mtx_init(&zv->zv_queue_mtx, "zvol", NULL, MTX_DEF);
2001
2002 pp->private = zv;
2003
2004 return (zv);
2005}
2006
2007static void
2008zvol_geom_run(zvol_state_t *zv)
2009{
2010 struct g_provider *pp;
2011
2012 pp = zv->zv_provider;
2013 g_error_provider(pp, 0);
2014
2015 kproc_kthread_add(zvol_geom_worker, zv, &zfsproc, NULL, 0, 0,
2016 "zfskern", "zvol %s", pp->name + sizeof(ZVOL_DRIVER));
2017}
2018
2019static void
2020zvol_geom_destroy(zvol_state_t *zv)
2021{
2022 struct g_provider *pp;
2023
2024 g_topology_assert();
2025
2026 mtx_lock(&zv->zv_queue_mtx);
2027 zv->zv_state = 1;
2028 wakeup_one(&zv->zv_queue);
2029 while (zv->zv_state != 2)
2030 msleep(&zv->zv_state, &zv->zv_queue_mtx, 0, "zvol:w", 0);
2031 mtx_destroy(&zv->zv_queue_mtx);
2032
2033 pp = zv->zv_provider;
2034 zv->zv_provider = NULL;
2035 pp->private = NULL;
2036 g_wither_geom(pp->geom, ENXIO);
2037
2038 kmem_free(zv, sizeof(*zv));
2039}
2040
2041static int
2042zvol_geom_access(struct g_provider *pp, int acr, int acw, int ace)
2043{
2044 int count, error, flags;
2045
2046 g_topology_assert();
2047
2048 /*
2049 * To make it easier we expect either open or close, but not both
2050 * at the same time.
2051 */
2052 KASSERT((acr >= 0 && acw >= 0 && ace >= 0) ||
2053 (acr <= 0 && acw <= 0 && ace <= 0),
2054 ("Unsupported access request to %s (acr=%d, acw=%d, ace=%d).",
2055 pp->name, acr, acw, ace));
2056
2057 if (pp->private == NULL) {
2058 if (acr <= 0 && acw <= 0 && ace <= 0)
2059 return (0);
2060 return (pp->error);
2061 }
2062
2063 /*
2064 * We don't pass FEXCL flag to zvol_open()/zvol_close() if ace != 0,
2065 * because GEOM already handles that and handles it a bit differently.
2066 * GEOM allows for multiple read/exclusive consumers and ZFS allows
2067 * only one exclusive consumer, no matter if it is reader or writer.
2068 * I like better the way GEOM works so I'll leave it for GEOM to
2069 * decide what to do.
2070 */
2071
2072 count = acr + acw + ace;
2073 if (count == 0)
2074 return (0);
2075
2076 flags = 0;
2077 if (acr != 0 || ace != 0)
2078 flags |= FREAD;
2079 if (acw != 0)
2080 flags |= FWRITE;
2081
2082 g_topology_unlock();
2083 if (count > 0)
2084 error = zvol_open(pp, flags, count);
2085 else
2086 error = zvol_close(pp, flags, -count);
2087 g_topology_lock();
2088 return (error);
2089}
2090
2091static void
2092zvol_geom_start(struct bio *bp)
2093{
2094 zvol_state_t *zv;
2095 boolean_t first;
2096
2097 switch (bp->bio_cmd) {
2098 case BIO_READ:
2099 case BIO_WRITE:
2100 case BIO_FLUSH:
2101 zv = bp->bio_to->private;
2102 ASSERT(zv != NULL);
2103 mtx_lock(&zv->zv_queue_mtx);
2104 first = (bioq_first(&zv->zv_queue) == NULL);
2105 bioq_insert_tail(&zv->zv_queue, bp);
2106 mtx_unlock(&zv->zv_queue_mtx);
2107 if (first)
2108 wakeup_one(&zv->zv_queue);
2109 break;
2110 case BIO_GETATTR:
2111 case BIO_DELETE:
2112 default:
2113 g_io_deliver(bp, EOPNOTSUPP);
2114 break;
2115 }
2116}
2117
2118static void
2119zvol_geom_worker(void *arg)
2120{
2121 zvol_state_t *zv;
2122 struct bio *bp;
2123
2124 thread_lock(curthread);
2125 sched_prio(curthread, PRIBIO);
2126 thread_unlock(curthread);
2127
2128 zv = arg;
2129 for (;;) {
2130 mtx_lock(&zv->zv_queue_mtx);
2131 bp = bioq_takefirst(&zv->zv_queue);
2132 if (bp == NULL) {
2133 if (zv->zv_state == 1) {
2134 zv->zv_state = 2;
2135 wakeup(&zv->zv_state);
2136 mtx_unlock(&zv->zv_queue_mtx);
2137 kthread_exit();
2138 }
2139 msleep(&zv->zv_queue, &zv->zv_queue_mtx, PRIBIO | PDROP,
2140 "zvol:io", 0);
2141 continue;
2142 }
2143 mtx_unlock(&zv->zv_queue_mtx);
2144 switch (bp->bio_cmd) {
2145 case BIO_FLUSH:
2146 zil_commit(zv->zv_zilog, ZVOL_OBJ);
2147 g_io_deliver(bp, 0);
2148 break;
2149 case BIO_READ:
2150 case BIO_WRITE:
2151 zvol_strategy(bp);
2152 break;
2153 }
2154 }
2155}
2156
2157extern boolean_t dataset_name_hidden(const char *name);
2158
2159static int
2160zvol_create_snapshots(objset_t *os, const char *name)
2161{
2162 uint64_t cookie, obj;
2163 char *sname;
2164 int error, len;
2165
2166 cookie = obj = 0;
2167 sname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
2168
2169 (void) dmu_objset_find(name, dmu_objset_prefetch, NULL,
2170 DS_FIND_SNAPSHOTS);
2171
2172 for (;;) {
2173 len = snprintf(sname, MAXPATHLEN, "%s@", name);
2174 if (len >= MAXPATHLEN) {
2175 dmu_objset_rele(os, FTAG);
2176 error = ENAMETOOLONG;
2177 break;
2178 }
2179
2180 error = dmu_snapshot_list_next(os, MAXPATHLEN - len,
2181 sname + len, &obj, &cookie, NULL);
2182 if (error != 0) {
2183 if (error == ENOENT)
2184 error = 0;
2185 break;
2186 }
2187
2188 if ((error = zvol_create_minor(sname)) != 0) {
2189 printf("ZFS WARNING: Unable to create ZVOL %s (error=%d).\n",
2190 sname, error);
2191 break;
2192 }
2193 }
2194
2195 kmem_free(sname, MAXPATHLEN);
2196 return (error);
2197}
2198
2199int
2200zvol_create_minors(const char *name)
2201{
2202 uint64_t cookie;
2203 objset_t *os;
2204 char *osname, *p;
2205 int error, len;
2206
2207 if (dataset_name_hidden(name))
2208 return (0);
2209
2210 if ((error = dmu_objset_hold(name, FTAG, &os)) != 0) {
2211 printf("ZFS WARNING: Unable to put hold on %s (error=%d).\n",
2212 name, error);
2213 return (error);
2214 }
2215 if (dmu_objset_type(os) == DMU_OST_ZVOL) {
2216 if ((error = zvol_create_minor(name)) == 0)
2217 error = zvol_create_snapshots(os, name);
2218 else {
2219 printf("ZFS WARNING: Unable to create ZVOL %s (error=%d).\n",
2220 name, error);
2221 }
2222 dmu_objset_rele(os, FTAG);
2223 return (error);
2224 }
2225 if (dmu_objset_type(os) != DMU_OST_ZFS) {
2226 dmu_objset_rele(os, FTAG);
2227 return (0);
2228 }
2229
2230 osname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
2231 if (snprintf(osname, MAXPATHLEN, "%s/", name) >= MAXPATHLEN) {
2232 dmu_objset_rele(os, FTAG);
2233 kmem_free(osname, MAXPATHLEN);
2234 return (ENOENT);
2235 }
2236 p = osname + strlen(osname);
2237 len = MAXPATHLEN - (p - osname);
2238
2239 /* Prefetch the datasets. */
2240 cookie = 0;
2241 while (dmu_dir_list_next(os, len, p, NULL, &cookie) == 0) {
2242 if (!dataset_name_hidden(osname))
2243 (void) dmu_objset_prefetch(osname, NULL);
2244 }
2245
2246 cookie = 0;
2247 while (dmu_dir_list_next(os, MAXPATHLEN - (p - osname), p, NULL,
2248 &cookie) == 0) {
2249 dmu_objset_rele(os, FTAG);
2250 (void)zvol_create_minors(osname);
2251 if ((error = dmu_objset_hold(name, FTAG, &os)) != 0) {
2252 printf("ZFS WARNING: Unable to put hold on %s (error=%d).\n",
2253 name, error);
2254 return (error);
2255 }
2256 }
2257
2258 dmu_objset_rele(os, FTAG);
2259 kmem_free(osname, MAXPATHLEN);
2260 return (0);
2261}
2262
2263static void
2264zvol_rename_minor(struct g_geom *gp, const char *newname)
2265{
2266 struct g_provider *pp;
2267 zvol_state_t *zv;
2268
2269 ASSERT(MUTEX_HELD(&spa_namespace_lock));
2270 g_topology_assert();
2271
2272 pp = LIST_FIRST(&gp->provider);
2273 ASSERT(pp != NULL);
2274 zv = pp->private;
2275 ASSERT(zv != NULL);
2276
2277 zv->zv_provider = NULL;
2278 g_wither_provider(pp, ENXIO);
2279
2280 pp = g_new_providerf(gp, "%s/%s", ZVOL_DRIVER, newname);
2281 pp->sectorsize = DEV_BSIZE;
2282 pp->mediasize = zv->zv_volsize;
2283 pp->private = zv;
2284 zv->zv_provider = pp;
2285 strlcpy(zv->zv_name, newname, sizeof(zv->zv_name));
2286 g_error_provider(pp, 0);
2287}
2288
2289void
2290zvol_rename_minors(const char *oldname, const char *newname)
2291{
2292 char name[MAXPATHLEN];
2293 struct g_provider *pp;
2294 struct g_geom *gp;
2295 size_t oldnamelen, newnamelen;
2296 zvol_state_t *zv;
2297 char *namebuf;
2298
2299 oldnamelen = strlen(oldname);
2300 newnamelen = strlen(newname);
2301
2302 DROP_GIANT();
2303 mutex_enter(&spa_namespace_lock);
2304 g_topology_lock();
2305
2306 LIST_FOREACH(gp, &zfs_zvol_class.geom, geom) {
2307 pp = LIST_FIRST(&gp->provider);
2308 if (pp == NULL)
2309 continue;
2310 zv = pp->private;
2311 if (zv == NULL)
2312 continue;
2313 if (strcmp(zv->zv_name, oldname) == 0) {
2314 zvol_rename_minor(gp, newname);
2315 } else if (strncmp(zv->zv_name, oldname, oldnamelen) == 0 &&
2316 (zv->zv_name[oldnamelen] == '/' ||
2317 zv->zv_name[oldnamelen] == '@')) {
2318 snprintf(name, sizeof(name), "%s%c%s", newname,
2319 zv->zv_name[oldnamelen],
2320 zv->zv_name + oldnamelen + 1);
2321 zvol_rename_minor(gp, name);
2322 }
2323 }
2324
2325 g_topology_unlock();
2326 mutex_exit(&spa_namespace_lock);
2327 PICKUP_GIANT();
2328}
704 dmu_objset_disown(zv->zv_objset, zvol_tag);
705 zv->zv_objset = NULL;
706}
707
708#ifdef sun
709int
710zvol_prealloc(zvol_state_t *zv)
711{
712 objset_t *os = zv->zv_objset;
713 dmu_tx_t *tx;
714 uint64_t refd, avail, usedobjs, availobjs;
715 uint64_t resid = zv->zv_volsize;
716 uint64_t off = 0;
717
718 /* Check the space usage before attempting to allocate the space */
719 dmu_objset_space(os, &refd, &avail, &usedobjs, &availobjs);
720 if (avail < zv->zv_volsize)
721 return (ENOSPC);
722
723 /* Free old extents if they exist */
724 zvol_free_extents(zv);
725
726 while (resid != 0) {
727 int error;
728 uint64_t bytes = MIN(resid, SPA_MAXBLOCKSIZE);
729
730 tx = dmu_tx_create(os);
731 dmu_tx_hold_write(tx, ZVOL_OBJ, off, bytes);
732 error = dmu_tx_assign(tx, TXG_WAIT);
733 if (error) {
734 dmu_tx_abort(tx);
735 (void) dmu_free_long_range(os, ZVOL_OBJ, 0, off);
736 return (error);
737 }
738 dmu_prealloc(os, ZVOL_OBJ, off, bytes, tx);
739 dmu_tx_commit(tx);
740 off += bytes;
741 resid -= bytes;
742 }
743 txg_wait_synced(dmu_objset_pool(os), 0);
744
745 return (0);
746}
747#endif /* sun */
748
749int
750zvol_update_volsize(objset_t *os, uint64_t volsize)
751{
752 dmu_tx_t *tx;
753 int error;
754
755 ASSERT(MUTEX_HELD(&spa_namespace_lock));
756
757 tx = dmu_tx_create(os);
758 dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL);
759 error = dmu_tx_assign(tx, TXG_WAIT);
760 if (error) {
761 dmu_tx_abort(tx);
762 return (error);
763 }
764
765 error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1,
766 &volsize, tx);
767 dmu_tx_commit(tx);
768
769 if (error == 0)
770 error = dmu_free_long_range(os,
771 ZVOL_OBJ, volsize, DMU_OBJECT_END);
772 return (error);
773}
774
775void
776zvol_remove_minors(const char *name)
777{
778 struct g_geom *gp, *gptmp;
779 struct g_provider *pp;
780 zvol_state_t *zv;
781 size_t namelen;
782
783 namelen = strlen(name);
784
785 DROP_GIANT();
786 mutex_enter(&spa_namespace_lock);
787 g_topology_lock();
788
789 LIST_FOREACH_SAFE(gp, &zfs_zvol_class.geom, geom, gptmp) {
790 pp = LIST_FIRST(&gp->provider);
791 if (pp == NULL)
792 continue;
793 zv = pp->private;
794 if (zv == NULL)
795 continue;
796 if (strcmp(zv->zv_name, name) == 0 ||
797 (strncmp(zv->zv_name, name, namelen) == 0 &&
798 zv->zv_name[namelen] == '/')) {
799 (void) zvol_remove_zv(zv);
800 }
801 }
802
803 g_topology_unlock();
804 mutex_exit(&spa_namespace_lock);
805 PICKUP_GIANT();
806}
807
808int
809zvol_set_volsize(const char *name, major_t maj, uint64_t volsize)
810{
811 zvol_state_t *zv = NULL;
812 objset_t *os;
813 int error;
814 dmu_object_info_t doi;
815 uint64_t old_volsize = 0ULL;
816 uint64_t readonly;
817
818 mutex_enter(&spa_namespace_lock);
819 zv = zvol_minor_lookup(name);
820 if ((error = dmu_objset_hold(name, FTAG, &os)) != 0) {
821 mutex_exit(&spa_namespace_lock);
822 return (error);
823 }
824
825 if ((error = dmu_object_info(os, ZVOL_OBJ, &doi)) != 0 ||
826 (error = zvol_check_volsize(volsize,
827 doi.doi_data_block_size)) != 0)
828 goto out;
829
830 VERIFY(dsl_prop_get_integer(name, "readonly", &readonly,
831 NULL) == 0);
832 if (readonly) {
833 error = EROFS;
834 goto out;
835 }
836
837 error = zvol_update_volsize(os, volsize);
838 /*
839 * Reinitialize the dump area to the new size. If we
840 * failed to resize the dump area then restore it back to
841 * its original size.
842 */
843 if (zv && error == 0) {
844#ifdef ZVOL_DUMP
845 if (zv->zv_flags & ZVOL_DUMPIFIED) {
846 old_volsize = zv->zv_volsize;
847 zv->zv_volsize = volsize;
848 if ((error = zvol_dumpify(zv)) != 0 ||
849 (error = dumpvp_resize()) != 0) {
850 (void) zvol_update_volsize(os, old_volsize);
851 zv->zv_volsize = old_volsize;
852 error = zvol_dumpify(zv);
853 }
854 }
855#endif /* ZVOL_DUMP */
856 if (error == 0) {
857 zv->zv_volsize = volsize;
858 zvol_size_changed(zv);
859 }
860 }
861
862#ifdef sun
863 /*
864 * Generate a LUN expansion event.
865 */
866 if (zv && error == 0) {
867 sysevent_id_t eid;
868 nvlist_t *attr;
869 char *physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
870
871 (void) snprintf(physpath, MAXPATHLEN, "%s%u", ZVOL_PSEUDO_DEV,
872 zv->zv_minor);
873
874 VERIFY(nvlist_alloc(&attr, NV_UNIQUE_NAME, KM_SLEEP) == 0);
875 VERIFY(nvlist_add_string(attr, DEV_PHYS_PATH, physpath) == 0);
876
877 (void) ddi_log_sysevent(zfs_dip, SUNW_VENDOR, EC_DEV_STATUS,
878 ESC_DEV_DLE, attr, &eid, DDI_SLEEP);
879
880 nvlist_free(attr);
881 kmem_free(physpath, MAXPATHLEN);
882 }
883#endif /* sun */
884
885out:
886 dmu_objset_rele(os, FTAG);
887
888 mutex_exit(&spa_namespace_lock);
889
890 return (error);
891}
892
893/*ARGSUSED*/
894static int
895zvol_open(struct g_provider *pp, int flag, int count)
896{
897 zvol_state_t *zv;
898 int err = 0;
899 boolean_t locked = B_FALSE;
900
901 /*
902 * Protect against recursively entering spa_namespace_lock
903 * when spa_open() is used for a pool on a (local) ZVOL(s).
904 * This is needed since we replaced upstream zfsdev_state_lock
905 * with spa_namespace_lock in the ZVOL code.
906 * We are using the same trick as spa_open().
907 * Note that calls in zvol_first_open which need to resolve
908 * pool name to a spa object will enter spa_open()
909 * recursively, but that function already has all the
910 * necessary protection.
911 */
912 if (!MUTEX_HELD(&spa_namespace_lock)) {
913 mutex_enter(&spa_namespace_lock);
914 locked = B_TRUE;
915 }
916
917 zv = pp->private;
918 if (zv == NULL) {
919 if (locked)
920 mutex_exit(&spa_namespace_lock);
921 return (ENXIO);
922 }
923
924 if (zv->zv_total_opens == 0)
925 err = zvol_first_open(zv);
926 if (err) {
927 if (locked)
928 mutex_exit(&spa_namespace_lock);
929 return (err);
930 }
931 if ((flag & FWRITE) && (zv->zv_flags & ZVOL_RDONLY)) {
932 err = EROFS;
933 goto out;
934 }
935 if (zv->zv_flags & ZVOL_EXCL) {
936 err = EBUSY;
937 goto out;
938 }
939#ifdef FEXCL
940 if (flag & FEXCL) {
941 if (zv->zv_total_opens != 0) {
942 err = EBUSY;
943 goto out;
944 }
945 zv->zv_flags |= ZVOL_EXCL;
946 }
947#endif
948
949 zv->zv_total_opens += count;
950 if (locked)
951 mutex_exit(&spa_namespace_lock);
952
953 return (err);
954out:
955 if (zv->zv_total_opens == 0)
956 zvol_last_close(zv);
957 if (locked)
958 mutex_exit(&spa_namespace_lock);
959 return (err);
960}
961
962/*ARGSUSED*/
963static int
964zvol_close(struct g_provider *pp, int flag, int count)
965{
966 zvol_state_t *zv;
967 int error = 0;
968 boolean_t locked = B_FALSE;
969
970 /* See comment in zvol_open(). */
971 if (!MUTEX_HELD(&spa_namespace_lock)) {
972 mutex_enter(&spa_namespace_lock);
973 locked = B_TRUE;
974 }
975
976 zv = pp->private;
977 if (zv == NULL) {
978 if (locked)
979 mutex_exit(&spa_namespace_lock);
980 return (ENXIO);
981 }
982
983 if (zv->zv_flags & ZVOL_EXCL) {
984 ASSERT(zv->zv_total_opens == 1);
985 zv->zv_flags &= ~ZVOL_EXCL;
986 }
987
988 /*
989 * If the open count is zero, this is a spurious close.
990 * That indicates a bug in the kernel / DDI framework.
991 */
992 ASSERT(zv->zv_total_opens != 0);
993
994 /*
995 * You may get multiple opens, but only one close.
996 */
997 zv->zv_total_opens -= count;
998
999 if (zv->zv_total_opens == 0)
1000 zvol_last_close(zv);
1001
1002 if (locked)
1003 mutex_exit(&spa_namespace_lock);
1004 return (error);
1005}
1006
1007static void
1008zvol_get_done(zgd_t *zgd, int error)
1009{
1010 if (zgd->zgd_db)
1011 dmu_buf_rele(zgd->zgd_db, zgd);
1012
1013 zfs_range_unlock(zgd->zgd_rl);
1014
1015 if (error == 0 && zgd->zgd_bp)
1016 zil_add_block(zgd->zgd_zilog, zgd->zgd_bp);
1017
1018 kmem_free(zgd, sizeof (zgd_t));
1019}
1020
1021/*
1022 * Get data to generate a TX_WRITE intent log record.
1023 */
1024static int
1025zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
1026{
1027 zvol_state_t *zv = arg;
1028 objset_t *os = zv->zv_objset;
1029 uint64_t object = ZVOL_OBJ;
1030 uint64_t offset = lr->lr_offset;
1031 uint64_t size = lr->lr_length; /* length of user data */
1032 blkptr_t *bp = &lr->lr_blkptr;
1033 dmu_buf_t *db;
1034 zgd_t *zgd;
1035 int error;
1036
1037 ASSERT(zio != NULL);
1038 ASSERT(size != 0);
1039
1040 zgd = kmem_zalloc(sizeof (zgd_t), KM_SLEEP);
1041 zgd->zgd_zilog = zv->zv_zilog;
1042 zgd->zgd_rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_READER);
1043
1044 /*
1045 * Write records come in two flavors: immediate and indirect.
1046 * For small writes it's cheaper to store the data with the
1047 * log record (immediate); for large writes it's cheaper to
1048 * sync the data and get a pointer to it (indirect) so that
1049 * we don't have to write the data twice.
1050 */
1051 if (buf != NULL) { /* immediate write */
1052 error = dmu_read(os, object, offset, size, buf,
1053 DMU_READ_NO_PREFETCH);
1054 } else {
1055 size = zv->zv_volblocksize;
1056 offset = P2ALIGN(offset, size);
1057 error = dmu_buf_hold(os, object, offset, zgd, &db,
1058 DMU_READ_NO_PREFETCH);
1059 if (error == 0) {
1060 zgd->zgd_db = db;
1061 zgd->zgd_bp = bp;
1062
1063 ASSERT(db->db_offset == offset);
1064 ASSERT(db->db_size == size);
1065
1066 error = dmu_sync(zio, lr->lr_common.lrc_txg,
1067 zvol_get_done, zgd);
1068
1069 if (error == 0)
1070 return (0);
1071 }
1072 }
1073
1074 zvol_get_done(zgd, error);
1075
1076 return (error);
1077}
1078
1079/*
1080 * zvol_log_write() handles synchronous writes using TX_WRITE ZIL transactions.
1081 *
1082 * We store data in the log buffers if it's small enough.
1083 * Otherwise we will later flush the data out via dmu_sync().
1084 */
1085ssize_t zvol_immediate_write_sz = 32768;
1086
1087static void
1088zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t resid,
1089 boolean_t sync)
1090{
1091 uint32_t blocksize = zv->zv_volblocksize;
1092 zilog_t *zilog = zv->zv_zilog;
1093 boolean_t slogging;
1094 ssize_t immediate_write_sz;
1095
1096 if (zil_replaying(zilog, tx))
1097 return;
1098
1099 immediate_write_sz = (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
1100 ? 0 : zvol_immediate_write_sz;
1101
1102 slogging = spa_has_slogs(zilog->zl_spa) &&
1103 (zilog->zl_logbias == ZFS_LOGBIAS_LATENCY);
1104
1105 while (resid) {
1106 itx_t *itx;
1107 lr_write_t *lr;
1108 ssize_t len;
1109 itx_wr_state_t write_state;
1110
1111 /*
1112 * Unlike zfs_log_write() we can be called with
1113 * upto DMU_MAX_ACCESS/2 (5MB) writes.
1114 */
1115 if (blocksize > immediate_write_sz && !slogging &&
1116 resid >= blocksize && off % blocksize == 0) {
1117 write_state = WR_INDIRECT; /* uses dmu_sync */
1118 len = blocksize;
1119 } else if (sync) {
1120 write_state = WR_COPIED;
1121 len = MIN(ZIL_MAX_LOG_DATA, resid);
1122 } else {
1123 write_state = WR_NEED_COPY;
1124 len = MIN(ZIL_MAX_LOG_DATA, resid);
1125 }
1126
1127 itx = zil_itx_create(TX_WRITE, sizeof (*lr) +
1128 (write_state == WR_COPIED ? len : 0));
1129 lr = (lr_write_t *)&itx->itx_lr;
1130 if (write_state == WR_COPIED && dmu_read(zv->zv_objset,
1131 ZVOL_OBJ, off, len, lr + 1, DMU_READ_NO_PREFETCH) != 0) {
1132 zil_itx_destroy(itx);
1133 itx = zil_itx_create(TX_WRITE, sizeof (*lr));
1134 lr = (lr_write_t *)&itx->itx_lr;
1135 write_state = WR_NEED_COPY;
1136 }
1137
1138 itx->itx_wr_state = write_state;
1139 if (write_state == WR_NEED_COPY)
1140 itx->itx_sod += len;
1141 lr->lr_foid = ZVOL_OBJ;
1142 lr->lr_offset = off;
1143 lr->lr_length = len;
1144 lr->lr_blkoff = 0;
1145 BP_ZERO(&lr->lr_blkptr);
1146
1147 itx->itx_private = zv;
1148 itx->itx_sync = sync;
1149
1150 zil_itx_assign(zilog, itx, tx);
1151
1152 off += len;
1153 resid -= len;
1154 }
1155}
1156
1157#ifdef sun
1158static int
1159zvol_dumpio_vdev(vdev_t *vd, void *addr, uint64_t offset, uint64_t size,
1160 boolean_t doread, boolean_t isdump)
1161{
1162 vdev_disk_t *dvd;
1163 int c;
1164 int numerrors = 0;
1165
1166 for (c = 0; c < vd->vdev_children; c++) {
1167 ASSERT(vd->vdev_ops == &vdev_mirror_ops ||
1168 vd->vdev_ops == &vdev_replacing_ops ||
1169 vd->vdev_ops == &vdev_spare_ops);
1170 int err = zvol_dumpio_vdev(vd->vdev_child[c],
1171 addr, offset, size, doread, isdump);
1172 if (err != 0) {
1173 numerrors++;
1174 } else if (doread) {
1175 break;
1176 }
1177 }
1178
1179 if (!vd->vdev_ops->vdev_op_leaf)
1180 return (numerrors < vd->vdev_children ? 0 : EIO);
1181
1182 if (doread && !vdev_readable(vd))
1183 return (EIO);
1184 else if (!doread && !vdev_writeable(vd))
1185 return (EIO);
1186
1187 dvd = vd->vdev_tsd;
1188 ASSERT3P(dvd, !=, NULL);
1189 offset += VDEV_LABEL_START_SIZE;
1190
1191 if (ddi_in_panic() || isdump) {
1192 ASSERT(!doread);
1193 if (doread)
1194 return (EIO);
1195 return (ldi_dump(dvd->vd_lh, addr, lbtodb(offset),
1196 lbtodb(size)));
1197 } else {
1198 return (vdev_disk_physio(dvd->vd_lh, addr, size, offset,
1199 doread ? B_READ : B_WRITE));
1200 }
1201}
1202
1203static int
1204zvol_dumpio(zvol_state_t *zv, void *addr, uint64_t offset, uint64_t size,
1205 boolean_t doread, boolean_t isdump)
1206{
1207 vdev_t *vd;
1208 int error;
1209 zvol_extent_t *ze;
1210 spa_t *spa = dmu_objset_spa(zv->zv_objset);
1211
1212 /* Must be sector aligned, and not stradle a block boundary. */
1213 if (P2PHASE(offset, DEV_BSIZE) || P2PHASE(size, DEV_BSIZE) ||
1214 P2BOUNDARY(offset, size, zv->zv_volblocksize)) {
1215 return (EINVAL);
1216 }
1217 ASSERT(size <= zv->zv_volblocksize);
1218
1219 /* Locate the extent this belongs to */
1220 ze = list_head(&zv->zv_extents);
1221 while (offset >= ze->ze_nblks * zv->zv_volblocksize) {
1222 offset -= ze->ze_nblks * zv->zv_volblocksize;
1223 ze = list_next(&zv->zv_extents, ze);
1224 }
1225
1226 if (!ddi_in_panic())
1227 spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
1228
1229 vd = vdev_lookup_top(spa, DVA_GET_VDEV(&ze->ze_dva));
1230 offset += DVA_GET_OFFSET(&ze->ze_dva);
1231 error = zvol_dumpio_vdev(vd, addr, offset, size, doread, isdump);
1232
1233 if (!ddi_in_panic())
1234 spa_config_exit(spa, SCL_STATE, FTAG);
1235
1236 return (error);
1237}
1238#endif /* sun */
1239
1240int
1241zvol_strategy(struct bio *bp)
1242{
1243 zvol_state_t *zv = bp->bio_to->private;
1244 uint64_t off, volsize;
1245 size_t resid;
1246 char *addr;
1247 objset_t *os;
1248 rl_t *rl;
1249 int error = 0;
1250 boolean_t doread = (bp->bio_cmd == BIO_READ);
1251 boolean_t sync;
1252
1253 if (zv == NULL) {
1254 g_io_deliver(bp, ENXIO);
1255 return (0);
1256 }
1257
1258 if (bp->bio_cmd != BIO_READ && (zv->zv_flags & ZVOL_RDONLY)) {
1259 g_io_deliver(bp, EROFS);
1260 return (0);
1261 }
1262
1263 off = bp->bio_offset;
1264 volsize = zv->zv_volsize;
1265
1266 os = zv->zv_objset;
1267 ASSERT(os != NULL);
1268
1269 addr = bp->bio_data;
1270 resid = bp->bio_length;
1271
1272 if (resid > 0 && (off < 0 || off >= volsize)) {
1273 g_io_deliver(bp, EIO);
1274 return (0);
1275 }
1276
1277 sync = !doread && zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
1278
1279 /*
1280 * There must be no buffer changes when doing a dmu_sync() because
1281 * we can't change the data whilst calculating the checksum.
1282 */
1283 rl = zfs_range_lock(&zv->zv_znode, off, resid,
1284 doread ? RL_READER : RL_WRITER);
1285
1286 while (resid != 0 && off < volsize) {
1287 size_t size = MIN(resid, zvol_maxphys);
1288 if (doread) {
1289 error = dmu_read(os, ZVOL_OBJ, off, size, addr,
1290 DMU_READ_PREFETCH);
1291 } else {
1292 dmu_tx_t *tx = dmu_tx_create(os);
1293 dmu_tx_hold_write(tx, ZVOL_OBJ, off, size);
1294 error = dmu_tx_assign(tx, TXG_WAIT);
1295 if (error) {
1296 dmu_tx_abort(tx);
1297 } else {
1298 dmu_write(os, ZVOL_OBJ, off, size, addr, tx);
1299 zvol_log_write(zv, tx, off, size, sync);
1300 dmu_tx_commit(tx);
1301 }
1302 }
1303 if (error) {
1304 /* convert checksum errors into IO errors */
1305 if (error == ECKSUM)
1306 error = EIO;
1307 break;
1308 }
1309 off += size;
1310 addr += size;
1311 resid -= size;
1312 }
1313 zfs_range_unlock(rl);
1314
1315 bp->bio_completed = bp->bio_length - resid;
1316 if (bp->bio_completed < bp->bio_length)
1317 bp->bio_error = (off > volsize ? EINVAL : error);
1318
1319 if (sync)
1320 zil_commit(zv->zv_zilog, ZVOL_OBJ);
1321 g_io_deliver(bp, 0);
1322
1323 return (0);
1324}
1325
1326#ifdef sun
1327/*
1328 * Set the buffer count to the zvol maximum transfer.
1329 * Using our own routine instead of the default minphys()
1330 * means that for larger writes we write bigger buffers on X86
1331 * (128K instead of 56K) and flush the disk write cache less often
1332 * (every zvol_maxphys - currently 1MB) instead of minphys (currently
1333 * 56K on X86 and 128K on sparc).
1334 */
1335void
1336zvol_minphys(struct buf *bp)
1337{
1338 if (bp->b_bcount > zvol_maxphys)
1339 bp->b_bcount = zvol_maxphys;
1340}
1341
1342int
1343zvol_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblocks)
1344{
1345 minor_t minor = getminor(dev);
1346 zvol_state_t *zv;
1347 int error = 0;
1348 uint64_t size;
1349 uint64_t boff;
1350 uint64_t resid;
1351
1352 zv = zfsdev_get_soft_state(minor, ZSST_ZVOL);
1353 if (zv == NULL)
1354 return (ENXIO);
1355
1356 boff = ldbtob(blkno);
1357 resid = ldbtob(nblocks);
1358
1359 VERIFY3U(boff + resid, <=, zv->zv_volsize);
1360
1361 while (resid) {
1362 size = MIN(resid, P2END(boff, zv->zv_volblocksize) - boff);
1363 error = zvol_dumpio(zv, addr, boff, size, B_FALSE, B_TRUE);
1364 if (error)
1365 break;
1366 boff += size;
1367 addr += size;
1368 resid -= size;
1369 }
1370
1371 return (error);
1372}
1373
1374/*ARGSUSED*/
1375int
1376zvol_read(dev_t dev, uio_t *uio, cred_t *cr)
1377{
1378 minor_t minor = getminor(dev);
1379 zvol_state_t *zv;
1380 uint64_t volsize;
1381 rl_t *rl;
1382 int error = 0;
1383
1384 zv = zfsdev_get_soft_state(minor, ZSST_ZVOL);
1385 if (zv == NULL)
1386 return (ENXIO);
1387
1388 volsize = zv->zv_volsize;
1389 if (uio->uio_resid > 0 &&
1390 (uio->uio_loffset < 0 || uio->uio_loffset >= volsize))
1391 return (EIO);
1392
1393 if (zv->zv_flags & ZVOL_DUMPIFIED) {
1394 error = physio(zvol_strategy, NULL, dev, B_READ,
1395 zvol_minphys, uio);
1396 return (error);
1397 }
1398
1399 rl = zfs_range_lock(&zv->zv_znode, uio->uio_loffset, uio->uio_resid,
1400 RL_READER);
1401 while (uio->uio_resid > 0 && uio->uio_loffset < volsize) {
1402 uint64_t bytes = MIN(uio->uio_resid, DMU_MAX_ACCESS >> 1);
1403
1404 /* don't read past the end */
1405 if (bytes > volsize - uio->uio_loffset)
1406 bytes = volsize - uio->uio_loffset;
1407
1408 error = dmu_read_uio(zv->zv_objset, ZVOL_OBJ, uio, bytes);
1409 if (error) {
1410 /* convert checksum errors into IO errors */
1411 if (error == ECKSUM)
1412 error = EIO;
1413 break;
1414 }
1415 }
1416 zfs_range_unlock(rl);
1417 return (error);
1418}
1419
1420/*ARGSUSED*/
1421int
1422zvol_write(dev_t dev, uio_t *uio, cred_t *cr)
1423{
1424 minor_t minor = getminor(dev);
1425 zvol_state_t *zv;
1426 uint64_t volsize;
1427 rl_t *rl;
1428 int error = 0;
1429 boolean_t sync;
1430
1431 zv = zfsdev_get_soft_state(minor, ZSST_ZVOL);
1432 if (zv == NULL)
1433 return (ENXIO);
1434
1435 volsize = zv->zv_volsize;
1436 if (uio->uio_resid > 0 &&
1437 (uio->uio_loffset < 0 || uio->uio_loffset >= volsize))
1438 return (EIO);
1439
1440 if (zv->zv_flags & ZVOL_DUMPIFIED) {
1441 error = physio(zvol_strategy, NULL, dev, B_WRITE,
1442 zvol_minphys, uio);
1443 return (error);
1444 }
1445
1446 sync = !(zv->zv_flags & ZVOL_WCE) ||
1447 (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS);
1448
1449 rl = zfs_range_lock(&zv->zv_znode, uio->uio_loffset, uio->uio_resid,
1450 RL_WRITER);
1451 while (uio->uio_resid > 0 && uio->uio_loffset < volsize) {
1452 uint64_t bytes = MIN(uio->uio_resid, DMU_MAX_ACCESS >> 1);
1453 uint64_t off = uio->uio_loffset;
1454 dmu_tx_t *tx = dmu_tx_create(zv->zv_objset);
1455
1456 if (bytes > volsize - off) /* don't write past the end */
1457 bytes = volsize - off;
1458
1459 dmu_tx_hold_write(tx, ZVOL_OBJ, off, bytes);
1460 error = dmu_tx_assign(tx, TXG_WAIT);
1461 if (error) {
1462 dmu_tx_abort(tx);
1463 break;
1464 }
1465 error = dmu_write_uio_dbuf(zv->zv_dbuf, uio, bytes, tx);
1466 if (error == 0)
1467 zvol_log_write(zv, tx, off, bytes, sync);
1468 dmu_tx_commit(tx);
1469
1470 if (error)
1471 break;
1472 }
1473 zfs_range_unlock(rl);
1474 if (sync)
1475 zil_commit(zv->zv_zilog, ZVOL_OBJ);
1476 return (error);
1477}
1478
1479int
1480zvol_getefi(void *arg, int flag, uint64_t vs, uint8_t bs)
1481{
1482 struct uuid uuid = EFI_RESERVED;
1483 efi_gpe_t gpe = { 0 };
1484 uint32_t crc;
1485 dk_efi_t efi;
1486 int length;
1487 char *ptr;
1488
1489 if (ddi_copyin(arg, &efi, sizeof (dk_efi_t), flag))
1490 return (EFAULT);
1491 ptr = (char *)(uintptr_t)efi.dki_data_64;
1492 length = efi.dki_length;
1493 /*
1494 * Some clients may attempt to request a PMBR for the
1495 * zvol. Currently this interface will return EINVAL to
1496 * such requests. These requests could be supported by
1497 * adding a check for lba == 0 and consing up an appropriate
1498 * PMBR.
1499 */
1500 if (efi.dki_lba < 1 || efi.dki_lba > 2 || length <= 0)
1501 return (EINVAL);
1502
1503 gpe.efi_gpe_StartingLBA = LE_64(34ULL);
1504 gpe.efi_gpe_EndingLBA = LE_64((vs >> bs) - 1);
1505 UUID_LE_CONVERT(gpe.efi_gpe_PartitionTypeGUID, uuid);
1506
1507 if (efi.dki_lba == 1) {
1508 efi_gpt_t gpt = { 0 };
1509
1510 gpt.efi_gpt_Signature = LE_64(EFI_SIGNATURE);
1511 gpt.efi_gpt_Revision = LE_32(EFI_VERSION_CURRENT);
1512 gpt.efi_gpt_HeaderSize = LE_32(sizeof (gpt));
1513 gpt.efi_gpt_MyLBA = LE_64(1ULL);
1514 gpt.efi_gpt_FirstUsableLBA = LE_64(34ULL);
1515 gpt.efi_gpt_LastUsableLBA = LE_64((vs >> bs) - 1);
1516 gpt.efi_gpt_PartitionEntryLBA = LE_64(2ULL);
1517 gpt.efi_gpt_NumberOfPartitionEntries = LE_32(1);
1518 gpt.efi_gpt_SizeOfPartitionEntry =
1519 LE_32(sizeof (efi_gpe_t));
1520 CRC32(crc, &gpe, sizeof (gpe), -1U, crc32_table);
1521 gpt.efi_gpt_PartitionEntryArrayCRC32 = LE_32(~crc);
1522 CRC32(crc, &gpt, sizeof (gpt), -1U, crc32_table);
1523 gpt.efi_gpt_HeaderCRC32 = LE_32(~crc);
1524 if (ddi_copyout(&gpt, ptr, MIN(sizeof (gpt), length),
1525 flag))
1526 return (EFAULT);
1527 ptr += sizeof (gpt);
1528 length -= sizeof (gpt);
1529 }
1530 if (length > 0 && ddi_copyout(&gpe, ptr, MIN(sizeof (gpe),
1531 length), flag))
1532 return (EFAULT);
1533 return (0);
1534}
1535
1536/*
1537 * BEGIN entry points to allow external callers access to the volume.
1538 */
1539/*
1540 * Return the volume parameters needed for access from an external caller.
1541 * These values are invariant as long as the volume is held open.
1542 */
1543int
1544zvol_get_volume_params(minor_t minor, uint64_t *blksize,
1545 uint64_t *max_xfer_len, void **minor_hdl, void **objset_hdl, void **zil_hdl,
1546 void **rl_hdl, void **bonus_hdl)
1547{
1548 zvol_state_t *zv;
1549
1550 zv = zfsdev_get_soft_state(minor, ZSST_ZVOL);
1551 if (zv == NULL)
1552 return (ENXIO);
1553 if (zv->zv_flags & ZVOL_DUMPIFIED)
1554 return (ENXIO);
1555
1556 ASSERT(blksize && max_xfer_len && minor_hdl &&
1557 objset_hdl && zil_hdl && rl_hdl && bonus_hdl);
1558
1559 *blksize = zv->zv_volblocksize;
1560 *max_xfer_len = (uint64_t)zvol_maxphys;
1561 *minor_hdl = zv;
1562 *objset_hdl = zv->zv_objset;
1563 *zil_hdl = zv->zv_zilog;
1564 *rl_hdl = &zv->zv_znode;
1565 *bonus_hdl = zv->zv_dbuf;
1566 return (0);
1567}
1568
1569/*
1570 * Return the current volume size to an external caller.
1571 * The size can change while the volume is open.
1572 */
1573uint64_t
1574zvol_get_volume_size(void *minor_hdl)
1575{
1576 zvol_state_t *zv = minor_hdl;
1577
1578 return (zv->zv_volsize);
1579}
1580
1581/*
1582 * Return the current WCE setting to an external caller.
1583 * The WCE setting can change while the volume is open.
1584 */
1585int
1586zvol_get_volume_wce(void *minor_hdl)
1587{
1588 zvol_state_t *zv = minor_hdl;
1589
1590 return ((zv->zv_flags & ZVOL_WCE) ? 1 : 0);
1591}
1592
1593/*
1594 * Entry point for external callers to zvol_log_write
1595 */
1596void
1597zvol_log_write_minor(void *minor_hdl, dmu_tx_t *tx, offset_t off, ssize_t resid,
1598 boolean_t sync)
1599{
1600 zvol_state_t *zv = minor_hdl;
1601
1602 zvol_log_write(zv, tx, off, resid, sync);
1603}
1604/*
1605 * END entry points to allow external callers access to the volume.
1606 */
1607
1608/*
1609 * Dirtbag ioctls to support mkfs(1M) for UFS filesystems. See dkio(7I).
1610 */
1611/*ARGSUSED*/
1612int
1613zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp)
1614{
1615 zvol_state_t *zv;
1616 struct dk_cinfo dki;
1617 struct dk_minfo dkm;
1618 struct dk_callback *dkc;
1619 int error = 0;
1620 rl_t *rl;
1621
1622 mutex_enter(&spa_namespace_lock);
1623
1624 zv = zfsdev_get_soft_state(getminor(dev), ZSST_ZVOL);
1625
1626 if (zv == NULL) {
1627 mutex_exit(&spa_namespace_lock);
1628 return (ENXIO);
1629 }
1630 ASSERT(zv->zv_total_opens > 0);
1631
1632 switch (cmd) {
1633
1634 case DKIOCINFO:
1635 bzero(&dki, sizeof (dki));
1636 (void) strcpy(dki.dki_cname, "zvol");
1637 (void) strcpy(dki.dki_dname, "zvol");
1638 dki.dki_ctype = DKC_UNKNOWN;
1639 dki.dki_unit = getminor(dev);
1640 dki.dki_maxtransfer = 1 << (SPA_MAXBLOCKSHIFT - zv->zv_min_bs);
1641 mutex_exit(&spa_namespace_lock);
1642 if (ddi_copyout(&dki, (void *)arg, sizeof (dki), flag))
1643 error = EFAULT;
1644 return (error);
1645
1646 case DKIOCGMEDIAINFO:
1647 bzero(&dkm, sizeof (dkm));
1648 dkm.dki_lbsize = 1U << zv->zv_min_bs;
1649 dkm.dki_capacity = zv->zv_volsize >> zv->zv_min_bs;
1650 dkm.dki_media_type = DK_UNKNOWN;
1651 mutex_exit(&spa_namespace_lock);
1652 if (ddi_copyout(&dkm, (void *)arg, sizeof (dkm), flag))
1653 error = EFAULT;
1654 return (error);
1655
1656 case DKIOCGETEFI:
1657 {
1658 uint64_t vs = zv->zv_volsize;
1659 uint8_t bs = zv->zv_min_bs;
1660
1661 mutex_exit(&spa_namespace_lock);
1662 error = zvol_getefi((void *)arg, flag, vs, bs);
1663 return (error);
1664 }
1665
1666 case DKIOCFLUSHWRITECACHE:
1667 dkc = (struct dk_callback *)arg;
1668 mutex_exit(&spa_namespace_lock);
1669 zil_commit(zv->zv_zilog, ZVOL_OBJ);
1670 if ((flag & FKIOCTL) && dkc != NULL && dkc->dkc_callback) {
1671 (*dkc->dkc_callback)(dkc->dkc_cookie, error);
1672 error = 0;
1673 }
1674 return (error);
1675
1676 case DKIOCGETWCE:
1677 {
1678 int wce = (zv->zv_flags & ZVOL_WCE) ? 1 : 0;
1679 if (ddi_copyout(&wce, (void *)arg, sizeof (int),
1680 flag))
1681 error = EFAULT;
1682 break;
1683 }
1684 case DKIOCSETWCE:
1685 {
1686 int wce;
1687 if (ddi_copyin((void *)arg, &wce, sizeof (int),
1688 flag)) {
1689 error = EFAULT;
1690 break;
1691 }
1692 if (wce) {
1693 zv->zv_flags |= ZVOL_WCE;
1694 mutex_exit(&spa_namespace_lock);
1695 } else {
1696 zv->zv_flags &= ~ZVOL_WCE;
1697 mutex_exit(&spa_namespace_lock);
1698 zil_commit(zv->zv_zilog, ZVOL_OBJ);
1699 }
1700 return (0);
1701 }
1702
1703 case DKIOCGGEOM:
1704 case DKIOCGVTOC:
1705 /*
1706 * commands using these (like prtvtoc) expect ENOTSUP
1707 * since we're emulating an EFI label
1708 */
1709 error = ENOTSUP;
1710 break;
1711
1712 case DKIOCDUMPINIT:
1713 rl = zfs_range_lock(&zv->zv_znode, 0, zv->zv_volsize,
1714 RL_WRITER);
1715 error = zvol_dumpify(zv);
1716 zfs_range_unlock(rl);
1717 break;
1718
1719 case DKIOCDUMPFINI:
1720 if (!(zv->zv_flags & ZVOL_DUMPIFIED))
1721 break;
1722 rl = zfs_range_lock(&zv->zv_znode, 0, zv->zv_volsize,
1723 RL_WRITER);
1724 error = zvol_dump_fini(zv);
1725 zfs_range_unlock(rl);
1726 break;
1727
1728 default:
1729 error = ENOTTY;
1730 break;
1731
1732 }
1733 mutex_exit(&spa_namespace_lock);
1734 return (error);
1735}
1736#endif /* sun */
1737
1738int
1739zvol_busy(void)
1740{
1741 return (zvol_minors != 0);
1742}
1743
1744void
1745zvol_init(void)
1746{
1747 VERIFY(ddi_soft_state_init(&zfsdev_state, sizeof (zfs_soft_state_t),
1748 1) == 0);
1749 ZFS_LOG(1, "ZVOL Initialized.");
1750}
1751
1752void
1753zvol_fini(void)
1754{
1755 ddi_soft_state_fini(&zfsdev_state);
1756 ZFS_LOG(1, "ZVOL Deinitialized.");
1757}
1758
1759#ifdef sun
1760static int
1761zvol_dump_init(zvol_state_t *zv, boolean_t resize)
1762{
1763 dmu_tx_t *tx;
1764 int error = 0;
1765 objset_t *os = zv->zv_objset;
1766 nvlist_t *nv = NULL;
1767 uint64_t version = spa_version(dmu_objset_spa(zv->zv_objset));
1768
1769 ASSERT(MUTEX_HELD(&spa_namespace_lock));
1770 error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, 0,
1771 DMU_OBJECT_END);
1772 /* wait for dmu_free_long_range to actually free the blocks */
1773 txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0);
1774
1775 tx = dmu_tx_create(os);
1776 dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL);
1777 dmu_tx_hold_bonus(tx, ZVOL_OBJ);
1778 error = dmu_tx_assign(tx, TXG_WAIT);
1779 if (error) {
1780 dmu_tx_abort(tx);
1781 return (error);
1782 }
1783
1784 /*
1785 * If we are resizing the dump device then we only need to
1786 * update the refreservation to match the newly updated
1787 * zvolsize. Otherwise, we save off the original state of the
1788 * zvol so that we can restore them if the zvol is ever undumpified.
1789 */
1790 if (resize) {
1791 error = zap_update(os, ZVOL_ZAP_OBJ,
1792 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1,
1793 &zv->zv_volsize, tx);
1794 } else {
1795 uint64_t checksum, compress, refresrv, vbs, dedup;
1796
1797 error = dsl_prop_get_integer(zv->zv_name,
1798 zfs_prop_to_name(ZFS_PROP_COMPRESSION), &compress, NULL);
1799 error = error ? error : dsl_prop_get_integer(zv->zv_name,
1800 zfs_prop_to_name(ZFS_PROP_CHECKSUM), &checksum, NULL);
1801 error = error ? error : dsl_prop_get_integer(zv->zv_name,
1802 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), &refresrv, NULL);
1803 error = error ? error : dsl_prop_get_integer(zv->zv_name,
1804 zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &vbs, NULL);
1805 if (version >= SPA_VERSION_DEDUP) {
1806 error = error ? error :
1807 dsl_prop_get_integer(zv->zv_name,
1808 zfs_prop_to_name(ZFS_PROP_DEDUP), &dedup, NULL);
1809 }
1810
1811 error = error ? error : zap_update(os, ZVOL_ZAP_OBJ,
1812 zfs_prop_to_name(ZFS_PROP_COMPRESSION), 8, 1,
1813 &compress, tx);
1814 error = error ? error : zap_update(os, ZVOL_ZAP_OBJ,
1815 zfs_prop_to_name(ZFS_PROP_CHECKSUM), 8, 1, &checksum, tx);
1816 error = error ? error : zap_update(os, ZVOL_ZAP_OBJ,
1817 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1,
1818 &refresrv, tx);
1819 error = error ? error : zap_update(os, ZVOL_ZAP_OBJ,
1820 zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), 8, 1,
1821 &vbs, tx);
1822 error = error ? error : dmu_object_set_blocksize(
1823 os, ZVOL_OBJ, SPA_MAXBLOCKSIZE, 0, tx);
1824 if (version >= SPA_VERSION_DEDUP) {
1825 error = error ? error : zap_update(os, ZVOL_ZAP_OBJ,
1826 zfs_prop_to_name(ZFS_PROP_DEDUP), 8, 1,
1827 &dedup, tx);
1828 }
1829 if (error == 0)
1830 zv->zv_volblocksize = SPA_MAXBLOCKSIZE;
1831 }
1832 dmu_tx_commit(tx);
1833
1834 /*
1835 * We only need update the zvol's property if we are initializing
1836 * the dump area for the first time.
1837 */
1838 if (!resize) {
1839 VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1840 VERIFY(nvlist_add_uint64(nv,
1841 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 0) == 0);
1842 VERIFY(nvlist_add_uint64(nv,
1843 zfs_prop_to_name(ZFS_PROP_COMPRESSION),
1844 ZIO_COMPRESS_OFF) == 0);
1845 VERIFY(nvlist_add_uint64(nv,
1846 zfs_prop_to_name(ZFS_PROP_CHECKSUM),
1847 ZIO_CHECKSUM_OFF) == 0);
1848 if (version >= SPA_VERSION_DEDUP) {
1849 VERIFY(nvlist_add_uint64(nv,
1850 zfs_prop_to_name(ZFS_PROP_DEDUP),
1851 ZIO_CHECKSUM_OFF) == 0);
1852 }
1853
1854 error = zfs_set_prop_nvlist(zv->zv_name, ZPROP_SRC_LOCAL,
1855 nv, NULL);
1856 nvlist_free(nv);
1857
1858 if (error)
1859 return (error);
1860 }
1861
1862 /* Allocate the space for the dump */
1863 error = zvol_prealloc(zv);
1864 return (error);
1865}
1866
1867static int
1868zvol_dumpify(zvol_state_t *zv)
1869{
1870 int error = 0;
1871 uint64_t dumpsize = 0;
1872 dmu_tx_t *tx;
1873 objset_t *os = zv->zv_objset;
1874
1875 if (zv->zv_flags & ZVOL_RDONLY)
1876 return (EROFS);
1877
1878 if (zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE,
1879 8, 1, &dumpsize) != 0 || dumpsize != zv->zv_volsize) {
1880 boolean_t resize = (dumpsize > 0) ? B_TRUE : B_FALSE;
1881
1882 if ((error = zvol_dump_init(zv, resize)) != 0) {
1883 (void) zvol_dump_fini(zv);
1884 return (error);
1885 }
1886 }
1887
1888 /*
1889 * Build up our lba mapping.
1890 */
1891 error = zvol_get_lbas(zv);
1892 if (error) {
1893 (void) zvol_dump_fini(zv);
1894 return (error);
1895 }
1896
1897 tx = dmu_tx_create(os);
1898 dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL);
1899 error = dmu_tx_assign(tx, TXG_WAIT);
1900 if (error) {
1901 dmu_tx_abort(tx);
1902 (void) zvol_dump_fini(zv);
1903 return (error);
1904 }
1905
1906 zv->zv_flags |= ZVOL_DUMPIFIED;
1907 error = zap_update(os, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, 8, 1,
1908 &zv->zv_volsize, tx);
1909 dmu_tx_commit(tx);
1910
1911 if (error) {
1912 (void) zvol_dump_fini(zv);
1913 return (error);
1914 }
1915
1916 txg_wait_synced(dmu_objset_pool(os), 0);
1917 return (0);
1918}
1919
1920static int
1921zvol_dump_fini(zvol_state_t *zv)
1922{
1923 dmu_tx_t *tx;
1924 objset_t *os = zv->zv_objset;
1925 nvlist_t *nv;
1926 int error = 0;
1927 uint64_t checksum, compress, refresrv, vbs, dedup;
1928 uint64_t version = spa_version(dmu_objset_spa(zv->zv_objset));
1929
1930 /*
1931 * Attempt to restore the zvol back to its pre-dumpified state.
1932 * This is a best-effort attempt as it's possible that not all
1933 * of these properties were initialized during the dumpify process
1934 * (i.e. error during zvol_dump_init).
1935 */
1936
1937 tx = dmu_tx_create(os);
1938 dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL);
1939 error = dmu_tx_assign(tx, TXG_WAIT);
1940 if (error) {
1941 dmu_tx_abort(tx);
1942 return (error);
1943 }
1944 (void) zap_remove(os, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, tx);
1945 dmu_tx_commit(tx);
1946
1947 (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ,
1948 zfs_prop_to_name(ZFS_PROP_CHECKSUM), 8, 1, &checksum);
1949 (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ,
1950 zfs_prop_to_name(ZFS_PROP_COMPRESSION), 8, 1, &compress);
1951 (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ,
1952 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1, &refresrv);
1953 (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ,
1954 zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), 8, 1, &vbs);
1955
1956 VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1957 (void) nvlist_add_uint64(nv,
1958 zfs_prop_to_name(ZFS_PROP_CHECKSUM), checksum);
1959 (void) nvlist_add_uint64(nv,
1960 zfs_prop_to_name(ZFS_PROP_COMPRESSION), compress);
1961 (void) nvlist_add_uint64(nv,
1962 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), refresrv);
1963 if (version >= SPA_VERSION_DEDUP &&
1964 zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ,
1965 zfs_prop_to_name(ZFS_PROP_DEDUP), 8, 1, &dedup) == 0) {
1966 (void) nvlist_add_uint64(nv,
1967 zfs_prop_to_name(ZFS_PROP_DEDUP), dedup);
1968 }
1969 (void) zfs_set_prop_nvlist(zv->zv_name, ZPROP_SRC_LOCAL,
1970 nv, NULL);
1971 nvlist_free(nv);
1972
1973 zvol_free_extents(zv);
1974 zv->zv_flags &= ~ZVOL_DUMPIFIED;
1975 (void) dmu_free_long_range(os, ZVOL_OBJ, 0, DMU_OBJECT_END);
1976 /* wait for dmu_free_long_range to actually free the blocks */
1977 txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0);
1978 tx = dmu_tx_create(os);
1979 dmu_tx_hold_bonus(tx, ZVOL_OBJ);
1980 error = dmu_tx_assign(tx, TXG_WAIT);
1981 if (error) {
1982 dmu_tx_abort(tx);
1983 return (error);
1984 }
1985 if (dmu_object_set_blocksize(os, ZVOL_OBJ, vbs, 0, tx) == 0)
1986 zv->zv_volblocksize = vbs;
1987 dmu_tx_commit(tx);
1988
1989 return (0);
1990}
1991#endif /* sun */
1992
1993static zvol_state_t *
1994zvol_geom_create(const char *name)
1995{
1996 struct g_provider *pp;
1997 struct g_geom *gp;
1998 zvol_state_t *zv;
1999
2000 gp = g_new_geomf(&zfs_zvol_class, "zfs::zvol::%s", name);
2001 gp->start = zvol_geom_start;
2002 gp->access = zvol_geom_access;
2003 pp = g_new_providerf(gp, "%s/%s", ZVOL_DRIVER, name);
2004 pp->sectorsize = DEV_BSIZE;
2005
2006 zv = kmem_zalloc(sizeof(*zv), KM_SLEEP);
2007 zv->zv_provider = pp;
2008 zv->zv_state = 0;
2009 bioq_init(&zv->zv_queue);
2010 mtx_init(&zv->zv_queue_mtx, "zvol", NULL, MTX_DEF);
2011
2012 pp->private = zv;
2013
2014 return (zv);
2015}
2016
2017static void
2018zvol_geom_run(zvol_state_t *zv)
2019{
2020 struct g_provider *pp;
2021
2022 pp = zv->zv_provider;
2023 g_error_provider(pp, 0);
2024
2025 kproc_kthread_add(zvol_geom_worker, zv, &zfsproc, NULL, 0, 0,
2026 "zfskern", "zvol %s", pp->name + sizeof(ZVOL_DRIVER));
2027}
2028
2029static void
2030zvol_geom_destroy(zvol_state_t *zv)
2031{
2032 struct g_provider *pp;
2033
2034 g_topology_assert();
2035
2036 mtx_lock(&zv->zv_queue_mtx);
2037 zv->zv_state = 1;
2038 wakeup_one(&zv->zv_queue);
2039 while (zv->zv_state != 2)
2040 msleep(&zv->zv_state, &zv->zv_queue_mtx, 0, "zvol:w", 0);
2041 mtx_destroy(&zv->zv_queue_mtx);
2042
2043 pp = zv->zv_provider;
2044 zv->zv_provider = NULL;
2045 pp->private = NULL;
2046 g_wither_geom(pp->geom, ENXIO);
2047
2048 kmem_free(zv, sizeof(*zv));
2049}
2050
2051static int
2052zvol_geom_access(struct g_provider *pp, int acr, int acw, int ace)
2053{
2054 int count, error, flags;
2055
2056 g_topology_assert();
2057
2058 /*
2059 * To make it easier we expect either open or close, but not both
2060 * at the same time.
2061 */
2062 KASSERT((acr >= 0 && acw >= 0 && ace >= 0) ||
2063 (acr <= 0 && acw <= 0 && ace <= 0),
2064 ("Unsupported access request to %s (acr=%d, acw=%d, ace=%d).",
2065 pp->name, acr, acw, ace));
2066
2067 if (pp->private == NULL) {
2068 if (acr <= 0 && acw <= 0 && ace <= 0)
2069 return (0);
2070 return (pp->error);
2071 }
2072
2073 /*
2074 * We don't pass FEXCL flag to zvol_open()/zvol_close() if ace != 0,
2075 * because GEOM already handles that and handles it a bit differently.
2076 * GEOM allows for multiple read/exclusive consumers and ZFS allows
2077 * only one exclusive consumer, no matter if it is reader or writer.
2078 * I like better the way GEOM works so I'll leave it for GEOM to
2079 * decide what to do.
2080 */
2081
2082 count = acr + acw + ace;
2083 if (count == 0)
2084 return (0);
2085
2086 flags = 0;
2087 if (acr != 0 || ace != 0)
2088 flags |= FREAD;
2089 if (acw != 0)
2090 flags |= FWRITE;
2091
2092 g_topology_unlock();
2093 if (count > 0)
2094 error = zvol_open(pp, flags, count);
2095 else
2096 error = zvol_close(pp, flags, -count);
2097 g_topology_lock();
2098 return (error);
2099}
2100
2101static void
2102zvol_geom_start(struct bio *bp)
2103{
2104 zvol_state_t *zv;
2105 boolean_t first;
2106
2107 switch (bp->bio_cmd) {
2108 case BIO_READ:
2109 case BIO_WRITE:
2110 case BIO_FLUSH:
2111 zv = bp->bio_to->private;
2112 ASSERT(zv != NULL);
2113 mtx_lock(&zv->zv_queue_mtx);
2114 first = (bioq_first(&zv->zv_queue) == NULL);
2115 bioq_insert_tail(&zv->zv_queue, bp);
2116 mtx_unlock(&zv->zv_queue_mtx);
2117 if (first)
2118 wakeup_one(&zv->zv_queue);
2119 break;
2120 case BIO_GETATTR:
2121 case BIO_DELETE:
2122 default:
2123 g_io_deliver(bp, EOPNOTSUPP);
2124 break;
2125 }
2126}
2127
2128static void
2129zvol_geom_worker(void *arg)
2130{
2131 zvol_state_t *zv;
2132 struct bio *bp;
2133
2134 thread_lock(curthread);
2135 sched_prio(curthread, PRIBIO);
2136 thread_unlock(curthread);
2137
2138 zv = arg;
2139 for (;;) {
2140 mtx_lock(&zv->zv_queue_mtx);
2141 bp = bioq_takefirst(&zv->zv_queue);
2142 if (bp == NULL) {
2143 if (zv->zv_state == 1) {
2144 zv->zv_state = 2;
2145 wakeup(&zv->zv_state);
2146 mtx_unlock(&zv->zv_queue_mtx);
2147 kthread_exit();
2148 }
2149 msleep(&zv->zv_queue, &zv->zv_queue_mtx, PRIBIO | PDROP,
2150 "zvol:io", 0);
2151 continue;
2152 }
2153 mtx_unlock(&zv->zv_queue_mtx);
2154 switch (bp->bio_cmd) {
2155 case BIO_FLUSH:
2156 zil_commit(zv->zv_zilog, ZVOL_OBJ);
2157 g_io_deliver(bp, 0);
2158 break;
2159 case BIO_READ:
2160 case BIO_WRITE:
2161 zvol_strategy(bp);
2162 break;
2163 }
2164 }
2165}
2166
2167extern boolean_t dataset_name_hidden(const char *name);
2168
2169static int
2170zvol_create_snapshots(objset_t *os, const char *name)
2171{
2172 uint64_t cookie, obj;
2173 char *sname;
2174 int error, len;
2175
2176 cookie = obj = 0;
2177 sname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
2178
2179 (void) dmu_objset_find(name, dmu_objset_prefetch, NULL,
2180 DS_FIND_SNAPSHOTS);
2181
2182 for (;;) {
2183 len = snprintf(sname, MAXPATHLEN, "%s@", name);
2184 if (len >= MAXPATHLEN) {
2185 dmu_objset_rele(os, FTAG);
2186 error = ENAMETOOLONG;
2187 break;
2188 }
2189
2190 error = dmu_snapshot_list_next(os, MAXPATHLEN - len,
2191 sname + len, &obj, &cookie, NULL);
2192 if (error != 0) {
2193 if (error == ENOENT)
2194 error = 0;
2195 break;
2196 }
2197
2198 if ((error = zvol_create_minor(sname)) != 0) {
2199 printf("ZFS WARNING: Unable to create ZVOL %s (error=%d).\n",
2200 sname, error);
2201 break;
2202 }
2203 }
2204
2205 kmem_free(sname, MAXPATHLEN);
2206 return (error);
2207}
2208
2209int
2210zvol_create_minors(const char *name)
2211{
2212 uint64_t cookie;
2213 objset_t *os;
2214 char *osname, *p;
2215 int error, len;
2216
2217 if (dataset_name_hidden(name))
2218 return (0);
2219
2220 if ((error = dmu_objset_hold(name, FTAG, &os)) != 0) {
2221 printf("ZFS WARNING: Unable to put hold on %s (error=%d).\n",
2222 name, error);
2223 return (error);
2224 }
2225 if (dmu_objset_type(os) == DMU_OST_ZVOL) {
2226 if ((error = zvol_create_minor(name)) == 0)
2227 error = zvol_create_snapshots(os, name);
2228 else {
2229 printf("ZFS WARNING: Unable to create ZVOL %s (error=%d).\n",
2230 name, error);
2231 }
2232 dmu_objset_rele(os, FTAG);
2233 return (error);
2234 }
2235 if (dmu_objset_type(os) != DMU_OST_ZFS) {
2236 dmu_objset_rele(os, FTAG);
2237 return (0);
2238 }
2239
2240 osname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
2241 if (snprintf(osname, MAXPATHLEN, "%s/", name) >= MAXPATHLEN) {
2242 dmu_objset_rele(os, FTAG);
2243 kmem_free(osname, MAXPATHLEN);
2244 return (ENOENT);
2245 }
2246 p = osname + strlen(osname);
2247 len = MAXPATHLEN - (p - osname);
2248
2249 /* Prefetch the datasets. */
2250 cookie = 0;
2251 while (dmu_dir_list_next(os, len, p, NULL, &cookie) == 0) {
2252 if (!dataset_name_hidden(osname))
2253 (void) dmu_objset_prefetch(osname, NULL);
2254 }
2255
2256 cookie = 0;
2257 while (dmu_dir_list_next(os, MAXPATHLEN - (p - osname), p, NULL,
2258 &cookie) == 0) {
2259 dmu_objset_rele(os, FTAG);
2260 (void)zvol_create_minors(osname);
2261 if ((error = dmu_objset_hold(name, FTAG, &os)) != 0) {
2262 printf("ZFS WARNING: Unable to put hold on %s (error=%d).\n",
2263 name, error);
2264 return (error);
2265 }
2266 }
2267
2268 dmu_objset_rele(os, FTAG);
2269 kmem_free(osname, MAXPATHLEN);
2270 return (0);
2271}
2272
2273static void
2274zvol_rename_minor(struct g_geom *gp, const char *newname)
2275{
2276 struct g_provider *pp;
2277 zvol_state_t *zv;
2278
2279 ASSERT(MUTEX_HELD(&spa_namespace_lock));
2280 g_topology_assert();
2281
2282 pp = LIST_FIRST(&gp->provider);
2283 ASSERT(pp != NULL);
2284 zv = pp->private;
2285 ASSERT(zv != NULL);
2286
2287 zv->zv_provider = NULL;
2288 g_wither_provider(pp, ENXIO);
2289
2290 pp = g_new_providerf(gp, "%s/%s", ZVOL_DRIVER, newname);
2291 pp->sectorsize = DEV_BSIZE;
2292 pp->mediasize = zv->zv_volsize;
2293 pp->private = zv;
2294 zv->zv_provider = pp;
2295 strlcpy(zv->zv_name, newname, sizeof(zv->zv_name));
2296 g_error_provider(pp, 0);
2297}
2298
2299void
2300zvol_rename_minors(const char *oldname, const char *newname)
2301{
2302 char name[MAXPATHLEN];
2303 struct g_provider *pp;
2304 struct g_geom *gp;
2305 size_t oldnamelen, newnamelen;
2306 zvol_state_t *zv;
2307 char *namebuf;
2308
2309 oldnamelen = strlen(oldname);
2310 newnamelen = strlen(newname);
2311
2312 DROP_GIANT();
2313 mutex_enter(&spa_namespace_lock);
2314 g_topology_lock();
2315
2316 LIST_FOREACH(gp, &zfs_zvol_class.geom, geom) {
2317 pp = LIST_FIRST(&gp->provider);
2318 if (pp == NULL)
2319 continue;
2320 zv = pp->private;
2321 if (zv == NULL)
2322 continue;
2323 if (strcmp(zv->zv_name, oldname) == 0) {
2324 zvol_rename_minor(gp, newname);
2325 } else if (strncmp(zv->zv_name, oldname, oldnamelen) == 0 &&
2326 (zv->zv_name[oldnamelen] == '/' ||
2327 zv->zv_name[oldnamelen] == '@')) {
2328 snprintf(name, sizeof(name), "%s%c%s", newname,
2329 zv->zv_name[oldnamelen],
2330 zv->zv_name + oldnamelen + 1);
2331 zvol_rename_minor(gp, name);
2332 }
2333 }
2334
2335 g_topology_unlock();
2336 mutex_exit(&spa_namespace_lock);
2337 PICKUP_GIANT();
2338}