1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*
28 * Soft partitioning metadevice driver (md_sp), administrative routines.
29 *
30 * This file contains the administrative routines for the soft partitioning
31 * metadevice driver.  All administration is done through the use of ioctl's.
32 *
33 * The primary ioctl's supported by soft partitions are as follows:
34 *
35 *	MD_IOCSET	- set up a new soft partition.
36 *	MD_IOCGET	- get the unit structure of a soft partition.
37 *	MD_IOCRESET	- delete a soft partition.
38 *	MD_IOCGROW	- add space to a soft partition.
39 *	MD_IOCGETDEVS	- get the device the soft partition is built on.
40 *	MD_IOC_SPSTATUS	- set the status (un_status field in the soft
41 *			  partition unit structure) for one or more soft
42 *			  partitions.
43 *
44 * Note that, as with other metadevices, the majority of the work for
45 * building/growing/deleting soft partitions is performed in userland
46 * (specifically in libmeta, see meta_sp.c).  The driver's main administrative
47 * function is to maintain the in-core & metadb entries associated with a soft
48 * partition.
49 *
50 * In addition, a few other ioctl's are supported via helper routines in
51 * the md driver.  These are:
52 *
53 *	DKIOCINFO	- get "disk" information.
54 *	DKIOCGEOM	- get geometry information.
55 *	DKIOCGVTOC	- get vtoc information.
56 */
57#include <sys/param.h>
58#include <sys/systm.h>
59#include <sys/conf.h>
60#include <sys/file.h>
61#include <sys/user.h>
62#include <sys/uio.h>
63#include <sys/t_lock.h>
64#include <sys/buf.h>
65#include <sys/dkio.h>
66#include <sys/vtoc.h>
67#include <sys/kmem.h>
68#include <vm/page.h>
69#include <sys/sysmacros.h>
70#include <sys/types.h>
71#include <sys/mkdev.h>
72#include <sys/stat.h>
73#include <sys/open.h>
74#include <sys/lvm/mdvar.h>
75#include <sys/lvm/md_sp.h>
76#include <sys/lvm/md_notify.h>
77#include <sys/modctl.h>
78#include <sys/ddi.h>
79#include <sys/sunddi.h>
80#include <sys/debug.h>
81#include <sys/model.h>
82
83#include <sys/sysevent/eventdefs.h>
84#include <sys/sysevent/svm.h>
85
86extern int		md_status;
87
88extern unit_t		md_nunits;
89extern set_t		md_nsets;
90extern md_set_t		md_set[];
91
92extern md_ops_t		sp_md_ops;
93extern md_krwlock_t	md_unit_array_rw;
94extern major_t		md_major;
95
96/*
97 * FUNCTION:	sp_getun()
98 * INPUT:	mnum	- minor number of soft partition to get.
99 * OUTPUT:	mde	- return error pointer.
100 * RETURNS:	mp_unit_t *	- ptr to unit structure requested
101 *		NULL		- error
102 * PURPOSE:	Returns a reference to the soft partition unit structure
103 *		indicated by the passed-in minor number.
104 */
105static mp_unit_t *
106sp_getun(minor_t mnum, md_error_t *mde)
107{
108	mp_unit_t	*un;
109	mdi_unit_t	*ui;
110	set_t		setno = MD_MIN2SET(mnum);
111
112	/* check set */
113	if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits)) {
114		(void) mdmderror(mde, MDE_INVAL_UNIT, mnum);
115		return (NULL);
116	}
117
118	if (md_get_setstatus(setno) & MD_SET_STALE) {
119		(void) mdmddberror(mde, MDE_DB_STALE, mnum, setno);
120		return (NULL);
121	}
122
123	ui = MDI_UNIT(mnum);
124
125	if (ui == NULL) {
126		(void) mdmderror(mde, MDE_UNIT_NOT_SETUP, mnum);
127		return (NULL);
128	}
129
130	un = (mp_unit_t *)MD_UNIT(mnum);
131
132	if (un->c.un_type != MD_METASP) {
133		(void) mdmderror(mde, MDE_NOT_SP, mnum);
134		return (NULL);
135	}
136
137	return (un);
138}
139
140
141/*
142 * FUNCTION:	sp_setstatus()
143 * INPUT:	d	- data ptr passed in from ioctl.
144 *		mode	- pass-through to ddi_copyin.
145 *		lockp	- lock ptr.
146 * OUTPUT:	none.
147 * RETURNS:	0		- success.
148 *		non-zero	- error.
149 * PURPOSE:	Set the status of one or more soft partitions atomically.
150 *		this implements the MD_IOC_SPSTATUS ioctl.  Soft partitions
151 *		are passed in as an array of minor numbers.  The un_status
152 *		field in the unit structure of each soft partition is set to
153 *		the status passed in and all unit structures are recommitted
154 *		to the metadb at once.
155 */
156static int
157sp_setstatus(void *d, int mode, IOLOCK *lockp)
158{
159	minor_t		*minors;
160	mp_unit_t	*un;
161	mddb_recid_t	*recids;
162	int		i, nunits, sz;
163	int		err = 0;
164	sp_status_t	status;
165	md_error_t	*mdep;
166
167	md_sp_statusset_t	*msp = (md_sp_statusset_t *)d;
168
169	nunits = msp->num_units;
170	sz = msp->size;
171	status = msp->new_status;
172	mdep = &msp->mde;
173
174	mdclrerror(mdep);
175	/* allocate minor number and recids arrays */
176	minors = kmem_alloc(sz, KM_SLEEP);
177	recids = kmem_alloc((nunits + 1) * sizeof (mddb_recid_t), KM_SLEEP);
178
179	/* copyin minor number array */
180	if (err = ddi_copyin((void *)(uintptr_t)msp->minors, minors, sz, mode))
181		goto out;
182
183	/* check to make sure all units are valid first */
184	for (i = 0; i < nunits; i++) {
185		if ((un = sp_getun(minors[i], mdep)) == NULL) {
186			err = mdmderror(mdep, MDE_INVAL_UNIT, minors[i]);
187			goto out;
188		}
189	}
190
191	/* update state for all units */
192	for (i = 0; i < nunits; i++) {
193		un = sp_getun(minors[i], mdep);
194		(void) md_ioctl_writerlock(lockp, MDI_UNIT(minors[i]));
195		un->un_status = status;
196		recids[i] = un->c.un_record_id;
197		md_ioctl_writerexit(lockp);
198	}
199
200	recids[i] = 0;
201	mddb_commitrecs_wrapper(recids);
202
203out:
204	kmem_free(minors, sz);
205	kmem_free(recids, ((nunits + 1) * sizeof (mddb_recid_t)));
206	return (err);
207}
208
209
210/*
211 * FUNCTION:	sp_update_watermarks()
212 * INPUT:	d	- data ptr passed in from ioctl.
213 *		mode	- pass-through to ddi_copyin.
214 * OUTPUT:	none.
215 * RETURNS:	0		- success.
216 *		non-zero	- error.
217 * PURPOSE:	This implements the MD_IOC_SPUPDATEWM ioctl.
218 *              Watermarks are passed in an array.
219 */
220static int
221sp_update_watermarks(void *d, int mode)
222{
223	minor_t			mnum;
224	set_t			setno;
225	md_error_t		*mdep;
226	mp_unit_t		*un;
227	int			err = 0;
228	size_t			wsz;
229	size_t			osz;
230	mp_watermark_t		*watermarks;
231	sp_ext_offset_t		*offsets;
232	md_dev64_t		device;
233	buf_t			*bp;
234	int			i;
235	md_sp_update_wm_t	*mup = (md_sp_update_wm_t *)d;
236	side_t			side;
237
238	mnum = mup->mnum;
239	setno = MD_MIN2SET(mnum);
240	side = mddb_getsidenum(setno);
241	un = MD_UNIT(mnum);
242
243	if (un == NULL)
244		return (EFAULT);
245
246	mdep = &mup->mde;
247
248	mdclrerror(mdep);
249
250	/* Validate the set */
251	if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits))
252		return (mdmderror(mdep, MDE_INVAL_UNIT, mnum));
253	if (md_get_setstatus(setno) & MD_SET_STALE)
254		return (mdmddberror(mdep, MDE_DB_STALE, mnum, setno));
255
256	wsz = mup->count * sizeof (mp_watermark_t);
257	watermarks = kmem_alloc(wsz, KM_SLEEP);
258
259	osz = mup->count * sizeof (sp_ext_offset_t);
260	offsets = kmem_alloc(osz, KM_SLEEP);
261
262	/*
263	 * Once we're here, we are no longer stateless: we cannot
264	 * return without first freeing the watermarks and offset
265	 * arrays we just allocated.  So use the "out" label instead
266	 * of "return."
267	 */
268
269	/* Retrieve the watermark and offset arrays from user land */
270
271	if (ddi_copyin((void *)(uintptr_t)mup->wmp, watermarks, wsz, mode)) {
272		err = EFAULT;
273		goto out;
274	}
275
276	if (ddi_copyin((void *)(uintptr_t)mup->osp, offsets, osz, mode)) {
277		err = EFAULT;
278		goto out;
279	}
280
281	/*
282	 * NOTE: For multi-node sets we only commit the watermarks if we are
283	 * the master node. This avoids an ioctl-within-ioctl deadlock if the
284	 * underlying device is a mirror.
285	 */
286	if (MD_MNSET_SETNO(setno) && !md_set[setno].s_am_i_master) {
287		goto out;
288	}
289
290	device = un->un_dev;
291	if ((md_getmajor(device) != md_major) &&
292	    (md_devid_found(setno, side, un->un_key) == 1)) {
293		device = md_resolve_bydevid(mnum, device, un->un_key);
294	}
295	/*
296	 * Flag the fact that we're coming from an ioctl handler to the
297	 * underlying device so that it can take appropriate action if needed.
298	 * This is necessary for multi-owner mirrors as they may need to
299	 * update the metadevice state as a result of the layered open.
300	 */
301	if (md_layered_open(mnum, &device, MD_OFLG_FROMIOCTL)) {
302		err = mdcomperror(mdep, MDE_SP_COMP_OPEN_ERR,
303		    mnum, device);
304		goto out;
305	}
306
307	bp = kmem_alloc(biosize(), KM_SLEEP);
308	bioinit(bp);
309
310	for (i = 0; i < mup->count; i++) {
311
312		/*
313		 * Even the "constant" fields should be initialized
314		 * here, since bioreset() below will clear them.
315		 */
316		bp->b_flags = B_WRITE;
317		bp->b_bcount = sizeof (mp_watermark_t);
318		bp->b_bufsize = sizeof (mp_watermark_t);
319		bp->b_un.b_addr = (caddr_t)&watermarks[i];
320		bp->b_lblkno = offsets[i];
321		bp->b_edev = md_dev64_to_dev(device);
322
323		/*
324		 * For MN sets only:
325		 * Use a special flag MD_STR_WMUPDATE, for the following case:
326		 * If the watermarks reside on a mirror disk and a switch
327		 * of ownership is triggered by this IO,
328		 * the message that is generated by that request must be
329		 * processed even if the commd subsystem is currently suspended.
330		 *
331		 * For non-MN sets or non-mirror metadevices,
332		 * this flag has no meaning and is not checked.
333		 */
334
335		md_call_strategy(bp, MD_NOBLOCK | MD_STR_WMUPDATE, NULL);
336
337		if (biowait(bp)) {
338			err = mdmderror(mdep,
339			    MDE_SP_BADWMWRITE, mnum);
340			break;
341		}
342
343		/* Get the buf_t ready for the next iteration */
344		bioreset(bp);
345	}
346
347	biofini(bp);
348	kmem_free(bp, biosize());
349
350	md_layered_close(device, MD_OFLG_NULL);
351
352out:
353	kmem_free(watermarks, wsz);
354	kmem_free(offsets, osz);
355
356	return (err);
357}
358
359
360/*
361 * FUNCTION:	sp_read_watermark()
362 * INPUT:	d	- data ptr passed in from ioctl.
363 *		mode	- pass-through to ddi_copyin.
364 * OUTPUT:	none.
365 * RETURNS:	0		- success.
366 *		non-zero	- error.
367 * PURPOSE:	This implements the MD_IOC_SPREADWM ioctl.
368 */
369static int
370sp_read_watermark(void *d, int mode)
371{
372	md_error_t		*mdep;
373	mp_watermark_t		watermark;
374	md_dev64_t		device;
375	buf_t			*bp;
376	md_sp_read_wm_t		*mrp = (md_sp_read_wm_t *)d;
377
378	mdep = &mrp->mde;
379
380	mdclrerror(mdep);
381
382	device = mrp->rdev;
383
384	/*
385	 * Flag the fact that we are being called from ioctl context so that
386	 * the underlying device can take any necessary extra steps to handle
387	 * this scenario.
388	 */
389	if (md_layered_open((minor_t)-1, &device, MD_OFLG_FROMIOCTL)) {
390		return (mdcomperror(mdep, MDE_SP_COMP_OPEN_ERR,
391		    (minor_t)NODEV, device));
392	}
393
394	bp = kmem_alloc(biosize(), KM_SLEEP);
395	bioinit(bp);
396
397	bp->b_flags = B_READ;
398	bp->b_bcount = sizeof (mp_watermark_t);
399	bp->b_bufsize = sizeof (mp_watermark_t);
400	bp->b_un.b_addr = (caddr_t)&watermark;
401	bp->b_lblkno = mrp->offset;
402	bp->b_edev = md_dev64_to_dev(device);
403
404	md_call_strategy(bp, MD_NOBLOCK, NULL);
405
406	if (biowait(bp)) {
407		/*
408		 * Taking advantage of the knowledge that mdmderror()
409		 * returns 0, so we don't really need to keep track of
410		 * an error code other than in the error struct.
411		 */
412		(void) mdmderror(mdep, MDE_SP_BADWMREAD,
413		    getminor(device));
414	}
415
416	biofini(bp);
417	kmem_free(bp, biosize());
418
419	md_layered_close(device, MD_OFLG_NULL);
420
421	if (ddi_copyout(&watermark, (void *)(uintptr_t)mrp->wmp,
422	    sizeof (mp_watermark_t), mode)) {
423		return (EFAULT);
424	}
425
426	return (0);
427}
428
429
430/*
431 * FUNCTION:	sp_set()
432 * INPUT:	d	- data ptr passed in from ioctl.
433 *		mode	- pass-through to ddi_copyin.
434 * OUTPUT:	none.
435 * RETURNS:	0		- success.
436 *		non-zero	- error.
437 * PURPOSE:	Create a soft partition.  The unit structure representing
438 *		the soft partiton is passed down from userland.  We allocate
439 *		a metadb entry, copyin the unit the structure, handle any
440 *		metadevice parenting issues, then commit the record to the
441 *		metadb.  Once the record is in the metadb, we must also
442 *		build the associated in-core structures.  This is done via
443 *		sp_build_incore() (see sp.c).
444 */
445static int
446sp_set(void *d, int mode)
447{
448	minor_t		mnum;
449	mp_unit_t	*un;
450	void		*rec_addr;
451	mddb_recid_t	recids[3];
452	mddb_type_t	rec_type;
453	int		err;
454	set_t		setno;
455	md_error_t	*mdep;
456	md_unit_t	*child_un;
457	md_set_params_t *msp = (md_set_params_t *)d;
458
459	mnum = msp->mnum;
460	setno = MD_MIN2SET(mnum);
461	mdep = &msp->mde;
462
463	mdclrerror(mdep);
464
465	/* validate set */
466
467	if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits))
468		return (mdmderror(mdep, MDE_INVAL_UNIT, mnum));
469	if (md_get_setstatus(setno) & MD_SET_STALE)
470		return (mdmddberror(mdep, MDE_DB_STALE, mnum, setno));
471
472	/* get the record type */
473	rec_type = (mddb_type_t)md_getshared_key(setno,
474	    sp_md_ops.md_driver.md_drivername);
475
476	/* check if there is already a device with this minor number */
477	un = MD_UNIT(mnum);
478	if (un != NULL)
479		return (mdmderror(mdep, MDE_UNIT_ALREADY_SETUP, mnum));
480
481	/* create the db record for this soft partition */
482
483	if (msp->options & MD_CRO_64BIT) {
484#if defined(_ILP32)
485		return (mdmderror(mdep, MDE_UNIT_TOO_LARGE, mnum));
486#else
487		recids[0] = mddb_createrec((size_t)msp->size, rec_type, 0,
488		    MD_CRO_64BIT | MD_CRO_SOFTPART | MD_CRO_FN, setno);
489#endif
490	} else {
491		recids[0] = mddb_createrec((size_t)msp->size, rec_type, 0,
492		    MD_CRO_32BIT | MD_CRO_SOFTPART | MD_CRO_FN, setno);
493	}
494	/* set initial value for possible child record */
495	recids[1] = 0;
496	if (recids[0] < 0)
497		return (mddbstatus2error(mdep, recids[0], mnum, setno));
498
499	/* get the address of the soft partition db record */
500	rec_addr = (void *) mddb_getrecaddr(recids[0]);
501
502	/*
503	 * at this point we can happily mess with the soft partition
504	 * db record since we haven't committed it to the metadb yet.
505	 * if we crash before we commit, the uncommitted record will be
506	 * automatically purged.
507	 */
508
509	/* copy in the user's soft partition unit struct */
510	if (err = ddi_copyin((void *)(uintptr_t)msp->mdp,
511	    rec_addr, (size_t)msp->size, mode)) {
512		mddb_deleterec_wrapper(recids[0]);
513		return (EFAULT);
514	}
515
516	/* fill in common unit structure fields which aren't set in userland */
517	un = (mp_unit_t *)rec_addr;
518
519	/* All 64 bit metadevices only support EFI labels. */
520	if (msp->options & MD_CRO_64BIT) {
521		un->c.un_flag |= MD_EFILABEL;
522	}
523
524	MD_SID(un) = mnum;
525	MD_RECID(un) = recids[0];
526	MD_PARENT(un) = MD_NO_PARENT;
527	un->c.un_revision |= MD_FN_META_DEV;
528
529	/* if we are parenting a metadevice, set our child's parent field */
530	if (md_getmajor(un->un_dev) == md_major) {
531		/* it's a metadevice, need to parent it */
532		child_un = MD_UNIT(md_getminor(un->un_dev));
533		if (child_un == NULL) {
534			mddb_deleterec_wrapper(recids[0]);
535			return (mdmderror(mdep, MDE_INVAL_UNIT,
536			    md_getminor(un->un_dev)));
537		}
538		md_set_parent(un->un_dev, MD_SID(un));
539
540		/* set child recid and recids end marker */
541		recids[1] = MD_RECID(child_un);
542		recids[2] = 0;
543	}
544
545	/*
546	 * build the incore structures.
547	 */
548	if (err = sp_build_incore(rec_addr, 0)) {
549		md_nblocks_set(mnum, -1ULL);
550		MD_UNIT(mnum) = NULL;
551
552		mddb_deleterec_wrapper(recids[0]);
553		return (err);
554	}
555
556	/*
557	 * Update unit availability
558	 */
559	md_set[setno].s_un_avail--;
560
561	/*
562	 * commit the record.
563	 * if we had to update a child record, it will get commited
564	 * as well.
565	 */
566	mddb_commitrecs_wrapper(recids);
567
568	/* create the mdi_unit struct for this soft partition */
569	md_create_unit_incore(mnum, &sp_md_ops, 0);
570
571	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_CREATE, TAG_METADEVICE, MD_UN2SET(un),
572	    MD_SID(un));
573	return (0);
574}
575
576
577/*
578 * FUNCTION:	sp_get()
579 * INPUT:	d	- data ptr.
580 *		mode	- pass-through to ddi_copyout.
581 *		lock	- lock ptr.
582 * OUTPUT:	none.
583 * RETURNS:	0		- success.
584 *		non-zero	- error.
585 * PURPOSE:	Get the soft partition unit structure specified by the
586 *		minor number.  the in-core unit structure is obtained
587 *		and copied into the md_i_get structure passed down from
588 *		userland.
589 */
590static int
591sp_get(void *d, int mode, IOLOCK *lock)
592{
593	minor_t		mnum;
594	mdi_unit_t	*ui;
595	mp_unit_t	*un;
596	md_error_t	*mdep;
597	md_i_get_t	*migp = d;
598
599
600	mnum = migp->id;
601	mdep = &migp->mde;
602
603	mdclrerror(mdep);
604
605	/* make sure this is a valid unit structure */
606	if ((MD_MIN2SET(mnum) >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits))
607		return (mdmderror(mdep, MDE_INVAL_UNIT, mnum));
608
609	/* get the mdi_unit */
610	if ((ui = MDI_UNIT(mnum)) == NULL) {
611		return (mdmderror(mdep, MDE_UNIT_NOT_SETUP, mnum));
612	}
613
614	/*
615	 * md_ioctl_readerlock returns a reference to the in-core
616	 * unit structure.  this lock will be dropped by
617	 * md_ioctl_lock_exit() before the ioctl returns.
618	 */
619	un = (mp_unit_t *)md_ioctl_readerlock(lock, ui);
620
621	/* verify the md_i_get structure */
622	if (migp->size == 0) {
623		migp->size = un->c.un_size;
624		return (0);
625	}
626	if (migp->size < un->c.un_size) {
627		return (EFAULT);
628	}
629
630	/* copyout unit */
631	if (ddi_copyout(un, (void *)(uintptr_t)migp->mdp,
632	    un->c.un_size, mode))
633		return (EFAULT);
634	return (0);
635}
636
637
638/*
639 * FUNCTION:	sp_reset()
640 * INPUT:	reset_params	- soft partitioning reset parameters.
641 * OUTPUT:	none.
642 * RETURNS:	0		- success.
643 *		non-zero	- error.
644 * PURPOSE:	Do the setup work needed to delete a soft partition.
645 *		note that the actual removal of both in-core and metadb
646 *		structures is done in the reset_sp() routine (see sp.c).
647 *		In addition, since multiple soft partitions may exist
648 *		on top of a single metadevice, the soft partition reset
649 *		parameters (md_sp_reset_t) contains information about
650 *		how the soft partition should deparent/reparent the
651 *		underlying metadevice.  If the underlying metadevice is
652 *		to be deparented, the new_parent field will be MD_NO_PARENT,
653 *		otherwise it will be contain the minor number of another
654 *		soft partition built on top of the underlying metadevice.
655 */
656static int
657sp_reset(md_sp_reset_t *softp)
658{
659	minor_t		mnum = softp->mnum;
660	mdi_unit_t	*ui;
661	mp_unit_t	*un;
662	md_unit_t	*child_un;
663	set_t		setno = MD_MIN2SET(mnum);
664
665	mdclrerror(&softp->mde);
666
667	/* get the unit structure */
668	if ((un = sp_getun(mnum, &softp->mde)) == NULL) {
669		return (mdmderror(&softp->mde, MDE_INVAL_UNIT, mnum));
670	}
671
672	/* don't delete if we have a parent */
673	if (MD_HAS_PARENT(un->c.un_parent)) {
674		return (mdmderror(&softp->mde, MDE_IN_USE, mnum));
675	}
676
677	rw_enter(&md_unit_array_rw.lock, RW_WRITER);
678
679	ui = MDI_UNIT(mnum);
680	(void) md_unit_openclose_enter(ui);
681
682	/* don't delete if we are currently open */
683	if (md_unit_isopen(ui)) {
684		md_unit_openclose_exit(ui);
685		rw_exit(&md_unit_array_rw.lock);
686		return (mdmderror(&softp->mde, MDE_IS_OPEN, mnum));
687	}
688
689	md_unit_openclose_exit(ui);
690
691	/*
692	 * if we are built on metadevice, we need to deparent
693	 * or reparent that metadevice.
694	 */
695	if (md_getmajor(un->un_dev) == md_major) {
696		child_un = MD_UNIT(md_getminor(un->un_dev));
697		md_set_parent(un->un_dev, softp->new_parent);
698		mddb_commitrec_wrapper(MD_RECID(child_un));
699	}
700	/* remove the soft partition */
701	reset_sp(un, mnum, 1);
702
703	/*
704	 * Update unit availability
705	 */
706	md_set[setno].s_un_avail++;
707
708	/*
709	 * If MN set, reset s_un_next so all nodes can have
710	 * the same view of the next available slot when
711	 * nodes are -w and -j
712	 */
713	if (MD_MNSET_SETNO(setno)) {
714		md_upd_set_unnext(setno, MD_MIN2UNIT(mnum));
715	}
716
717	/* release locks and return */
718out:
719	rw_exit(&md_unit_array_rw.lock);
720	return (0);
721}
722
723
724/*
725 * FUNCTION:	sp_grow()
726 * INPUT:	d	- data ptr.
727 *		mode	- pass-through to ddi_copyin.
728 *		lockp	- lock ptr.
729 * OUTPUT:	none.
730 * RETURNS:	0		- success.
731 *		non-zero	- error.
732 * PURPOSE:	Attach more space to a soft partition.  We are passed in
733 *		a new unit structure with the new extents and other updated
734 *		information.  The new unit structure essentially replaces
735 *		the old unit for this soft partition.  We place the new
736 *		unit into the metadb, delete the old metadb record, and
737 *		then update the in-core unit structure array to point to
738 *		the new unit.
739 */
740static int
741sp_grow(void *d, int mode, IOLOCK *lockp)
742{
743	minor_t		mnum;
744	mp_unit_t	*un, *new_un;
745	mdi_unit_t	*ui;
746	minor_t		*par = NULL;
747	IOLOCK		*plock = NULL;
748	int		i;
749	mddb_recid_t	recid;
750	mddb_type_t	rec_type;
751	mddb_recid_t	old_vtoc = 0;
752	md_create_rec_option_t options;
753	int		err;
754	int		rval = 0;
755	set_t		setno;
756	md_error_t	*mdep;
757	int		npar;
758	md_grow_params_t *mgp = (md_grow_params_t *)d;
759
760	mnum = mgp->mnum;
761	mdep = &mgp->mde;
762	setno = MD_MIN2SET(mnum);
763	npar = mgp->npar;
764
765	mdclrerror(mdep);
766
767	/* validate set */
768	if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits))
769		return (mdmderror(mdep, MDE_INVAL_UNIT, mnum));
770	if (md_get_setstatus(setno) & MD_SET_STALE)
771		return (mdmddberror(mdep, MDE_DB_STALE, mnum, setno));
772
773	/* make sure this soft partition already exists */
774	ui = MDI_UNIT(mnum);
775	if (ui == NULL)
776		return (mdmderror(mdep, MDE_UNIT_NOT_SETUP, mnum));
777
778	/* handle any parents */
779	if (npar >= 1) {
780		ASSERT((minor_t *)(uintptr_t)mgp->par != NULL);
781		par = kmem_alloc(npar * sizeof (*par), KM_SLEEP);
782		plock = kmem_alloc(npar * sizeof (*plock), KM_SLEEP);
783		if (ddi_copyin((void *)(uintptr_t)mgp->par, par,
784		    (npar * sizeof (*par)), mode) != 0) {
785			kmem_free(par, npar * sizeof (*par));
786			kmem_free(plock, npar * sizeof (*plock));
787			return (EFAULT);
788		}
789	}
790
791	/*
792	 * handle parent locking.  grab the unit writer lock,
793	 * then all parent ioctl locks, and then finally our own.
794	 * parents should be sorted to avoid deadlock.
795	 */
796	rw_enter(&md_unit_array_rw.lock, RW_WRITER);
797	for (i = 0; i < npar; ++i) {
798		(void) md_ioctl_writerlock(&plock[i],
799		    MDI_UNIT(par[i]));
800	}
801	un = (mp_unit_t *)md_ioctl_writerlock(lockp, ui);
802
803	rec_type = (mddb_type_t)md_getshared_key(setno,
804	    sp_md_ops.md_driver.md_drivername);
805
806	/*
807	 * Preserve the friendly name nature of the unit that is growing.
808	 */
809	options = MD_CRO_SOFTPART;
810	if (un->c.un_revision & MD_FN_META_DEV)
811		options |= MD_CRO_FN;
812	if (mgp->options & MD_CRO_64BIT) {
813#if defined(_ILP32)
814		rval = mdmderror(mdep, MDE_UNIT_TOO_LARGE, mnum);
815		goto out;
816#else
817		recid = mddb_createrec((size_t)mgp->size, rec_type, 0,
818		    MD_CRO_64BIT | options, setno);
819#endif
820	} else {
821		recid = mddb_createrec((size_t)mgp->size, rec_type, 0,
822		    MD_CRO_32BIT | options, setno);
823	}
824	if (recid < 0) {
825		rval = mddbstatus2error(mdep, (int)recid, mnum, setno);
826		goto out;
827	}
828
829	/* get the address of the new unit */
830	new_un = (mp_unit_t *)mddb_getrecaddr(recid);
831
832	/* copy in the user's unit struct */
833	err = ddi_copyin((void *)(uintptr_t)mgp->mdp, new_un,
834	    (size_t)mgp->size, mode);
835	if (err) {
836		mddb_deleterec_wrapper(recid);
837		rval = EFAULT;
838		goto out;
839	}
840	if (options & MD_CRO_FN)
841		new_un->c.un_revision |= MD_FN_META_DEV;
842
843	/* All 64 bit metadevices only support EFI labels. */
844	if (mgp->options & MD_CRO_64BIT) {
845		new_un->c.un_flag |= MD_EFILABEL;
846		/*
847		 * If the device was previously smaller than a terabyte,
848		 * and had a vtoc record attached to it, we remove the
849		 * vtoc record, because the layout has changed completely.
850		 */
851		if (((un->c.un_revision & MD_64BIT_META_DEV) == 0) &&
852		    (un->c.un_vtoc_id != 0)) {
853			old_vtoc = un->c.un_vtoc_id;
854			new_un->c.un_vtoc_id =
855			    md_vtoc_to_efi_record(old_vtoc, setno);
856		}
857	}
858
859	/* commit new unit struct */
860	MD_RECID(new_un) = recid;
861	mddb_commitrec_wrapper(recid);
862
863	/*
864	 * delete old unit struct.
865	 */
866	mddb_deleterec_wrapper(MD_RECID(un));
867
868	/* place new unit in in-core array */
869	md_nblocks_set(mnum, new_un->c.un_total_blocks);
870	MD_UNIT(mnum) = new_un;
871
872	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_GROW, TAG_METADEVICE,
873	    MD_UN2SET(new_un), MD_SID(new_un));
874
875	/*
876	 * If old_vtoc has a non zero value, we know:
877	 * - This unit crossed the border from smaller to larger one TB
878	 * - There was a vtoc record for the unit,
879	 * - This vtoc record is no longer needed, because
880	 *   a new efi record has been created for this un.
881	 */
882	if (old_vtoc != 0) {
883		mddb_deleterec_wrapper(old_vtoc);
884	}
885
886	/* release locks, return success */
887out:
888	for (i =  npar - 1; (i >= 0); --i)
889		md_ioctl_writerexit(&plock[i]);
890	rw_exit(&md_unit_array_rw.lock);
891	if (plock != NULL)
892		kmem_free(plock, npar * sizeof (*plock));
893	if (par != NULL)
894		kmem_free(par, npar * sizeof (*par));
895	return (rval);
896}
897
898/*
899 * FUNCTION:	sp_getdevs()
900 * INPUT:	d	- data ptr.
901 *		mode	- pass-through to ddi_copyout.
902 *		lockp	- lock ptr.
903 * OUTPUT:	none.
904 * RETURNS:	0		- success.
905 *		non-zero	- error.
906 * PURPOSE:	Get the device on which the soft partition is built.
907 *		This is simply a matter of copying out the md_dev64_t stored
908 *		in the soft partition unit structure.
909 */
910static int
911sp_getdevs(
912	void			*d,
913	int			mode,
914	IOLOCK			*lockp
915)
916{
917	minor_t			mnum;
918	mdi_unit_t		*ui;
919	mp_unit_t		*un;
920	md_error_t		*mdep;
921	md_dev64_t		*devsp;
922	md_dev64_t		unit_dev;
923	md_getdevs_params_t	*mgdp = (md_getdevs_params_t *)d;
924
925
926	mnum = mgdp->mnum;
927	mdep = &(mgdp->mde);
928
929	mdclrerror(mdep);
930
931	/* check set */
932	if ((MD_MIN2SET(mnum) >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits))
933		return (mdmderror(mdep, MDE_INVAL_UNIT, mnum));
934	/* check unit */
935	if ((ui = MDI_UNIT(mnum)) == NULL) {
936		return (mdmderror(mdep, MDE_UNIT_NOT_SETUP, mnum));
937	}
938	/* get unit */
939	un = (mp_unit_t *)md_ioctl_readerlock(lockp, ui);
940	devsp = (md_dev64_t *)(uintptr_t)mgdp->devs;
941
942	/* only ever 1 device for a soft partition */
943	if (mgdp->cnt != 0) {
944		/* do miniroot->target device translation */
945		unit_dev = un->un_dev;
946		if (md_getmajor(unit_dev) != md_major) {
947			if ((unit_dev = md_xlate_mini_2_targ(unit_dev))
948			    == NODEV64)
949				return (ENODEV);
950		}
951		/* copyout dev information */
952		if (ddi_copyout(&unit_dev, devsp, sizeof (*devsp), mode) != 0)
953			return (EFAULT);
954	}
955	mgdp->cnt = 1;
956
957	return (0);
958}
959
960/*
961 * sp_set_capability:
962 * ------------------
963 * Called to set or clear a capability for a softpart
964 * called by the MD_MN_SET_CAP ioctl.
965 */
966static int
967sp_set_capability(md_mn_setcap_params_t *p, IOLOCK *lockp)
968{
969	set_t		setno;
970	mdi_unit_t	*ui;
971	mp_unit_t	*un;
972	int		err = 0;
973
974	if ((un = sp_getun(p->mnum, &p->mde)) == NULL)
975		return (EINVAL);
976
977	/* This function is only valid for a multi-node set */
978	setno = MD_MIN2SET(p->mnum);
979	if (!MD_MNSET_SETNO(setno)) {
980		return (EINVAL);
981	}
982	ui = MDI_UNIT(p->mnum);
983	(void) md_ioctl_readerlock(lockp, ui);
984
985	if (p->sc_set & DKV_ABR_CAP) {
986		void (*inc_abr_count)();
987
988		ui->ui_tstate |= MD_ABR_CAP; /* Set ABR capability */
989		/* Increment abr count in underlying metadevice */
990		inc_abr_count = (void(*)())md_get_named_service(un->un_dev,
991		    0, MD_INC_ABR_COUNT, 0);
992		if (inc_abr_count != NULL)
993			(void) (*inc_abr_count)(un->un_dev);
994	} else {
995		void (*dec_abr_count)();
996
997		ui->ui_tstate &= ~MD_ABR_CAP; /* Clear ABR capability */
998		/* Decrement abr count in underlying metadevice */
999		dec_abr_count = (void(*)())md_get_named_service(un->un_dev,
1000		    0, MD_DEC_ABR_COUNT, 0);
1001		if (dec_abr_count != NULL)
1002			(void) (*dec_abr_count)(un->un_dev);
1003	}
1004	if (p->sc_set & DKV_DMR_CAP) {
1005		ui->ui_tstate |= MD_DMR_CAP; /* Set DMR capability */
1006	} else {
1007		ui->ui_tstate &= ~MD_DMR_CAP; /* Clear DMR capability */
1008	}
1009	md_ioctl_readerexit(lockp);
1010	return (err);
1011}
1012
1013
1014/*
1015 * FUNCTION:	sp_admin_ioctl().
1016 * INPUT:	cmd	- ioctl to be handled.
1017 *		data	- data ptr.
1018 *		mode	- pass-through to copyin/copyout routines.
1019 *		lockp	- lock ptr.
1020 * OUTPUT:	none.
1021 * RETURNS:	0		- success.
1022 *		non-zero	- error.
1023 * PURPOSE:	Handle administrative ioctl's.  Essentially a large
1024 *		switch statement to dispatch the ioctl's to their
1025 *		handlers.  See comment at beginning of file for specifics
1026 *		on which ioctl's are handled.
1027 */
1028static int
1029sp_admin_ioctl(int cmd, void *data, int mode, IOLOCK *lockp)
1030{
1031	size_t	sz = 0;
1032	void	*d = NULL;
1033	int	err = 0;
1034
1035	/* We can only handle 32-bit clients for internal commands */
1036	if ((mode & DATAMODEL_MASK) != DATAMODEL_ILP32) {
1037		return (EINVAL);
1038	}
1039
1040	/* handle ioctl */
1041	switch (cmd) {
1042
1043	case MD_IOCSET:
1044	{
1045		/* create new soft partition */
1046		if (! (mode & FWRITE))
1047			return (EACCES);
1048
1049		sz = sizeof (md_set_params_t);
1050
1051		d = kmem_alloc(sz, KM_SLEEP);
1052
1053		if (ddi_copyin(data, d, sz, mode)) {
1054			err = EFAULT;
1055			break;
1056		}
1057
1058		err = sp_set(d, mode);
1059		break;
1060	}
1061
1062	case MD_IOCGET:
1063	{
1064		/* get soft partition unit structure */
1065		if (! (mode & FREAD))
1066			return (EACCES);
1067
1068		sz = sizeof (md_i_get_t);
1069
1070		d = kmem_alloc(sz, KM_SLEEP);
1071
1072		if (ddi_copyin(data, d, sz, mode)) {
1073			err = EFAULT;
1074			break;
1075		}
1076
1077		err = sp_get(d, mode, lockp);
1078		break;
1079	}
1080	case MD_IOCRESET:
1081	{
1082		/* delete soft partition */
1083		if (! (mode & FWRITE))
1084			return (EACCES);
1085
1086		sz = sizeof (md_sp_reset_t);
1087		d = kmem_alloc(sz, KM_SLEEP);
1088
1089		if (ddi_copyin(data, d, sz, mode)) {
1090			err = EFAULT;
1091			break;
1092		}
1093
1094		err = sp_reset((md_sp_reset_t *)d);
1095		break;
1096	}
1097
1098	case MD_IOCGROW:
1099	{
1100		/* grow soft partition */
1101		if (! (mode & FWRITE))
1102			return (EACCES);
1103
1104		sz = sizeof (md_grow_params_t);
1105		d  = kmem_alloc(sz, KM_SLEEP);
1106
1107		if (ddi_copyin(data, d, sz, mode)) {
1108			err = EFAULT;
1109			break;
1110		}
1111
1112		err = sp_grow(d, mode, lockp);
1113		break;
1114	}
1115
1116	case MD_IOCGET_DEVS:
1117	{
1118		/* get underlying device */
1119		if (! (mode & FREAD))
1120			return (EACCES);
1121
1122		sz = sizeof (md_getdevs_params_t);
1123		d  = kmem_alloc(sz, KM_SLEEP);
1124
1125		if (ddi_copyin(data, d, sz, mode)) {
1126			err = EFAULT;
1127			break;
1128		}
1129
1130		err = sp_getdevs(d, mode, lockp);
1131		break;
1132	}
1133
1134	case MD_IOC_SPSTATUS:
1135	{
1136		/* set the status field of one or more soft partitions */
1137		if (! (mode & FWRITE))
1138			return (EACCES);
1139
1140		sz = sizeof (md_sp_statusset_t);
1141		d  = kmem_alloc(sz, KM_SLEEP);
1142
1143		if (ddi_copyin(data, d, sz, mode)) {
1144			err = EFAULT;
1145			break;
1146		}
1147
1148		err = sp_setstatus(d, mode, lockp);
1149		break;
1150	}
1151
1152	case MD_IOC_SPUPDATEWM:
1153	case MD_MN_IOC_SPUPDATEWM:
1154	{
1155		if (! (mode & FWRITE))
1156			return (EACCES);
1157
1158		sz = sizeof (md_sp_update_wm_t);
1159		d  = kmem_alloc(sz, KM_SLEEP);
1160
1161		if (ddi_copyin(data, d, sz, mode)) {
1162			err = EFAULT;
1163			break;
1164		}
1165
1166		err = sp_update_watermarks(d, mode);
1167		break;
1168	}
1169
1170	case MD_IOC_SPREADWM:
1171	{
1172		if (! (mode & FREAD))
1173			return (EACCES);
1174
1175		sz = sizeof (md_sp_read_wm_t);
1176		d  = kmem_alloc(sz, KM_SLEEP);
1177
1178		if (ddi_copyin(data, d, sz, mode)) {
1179			err = EFAULT;
1180			break;
1181		}
1182
1183		err = sp_read_watermark(d, mode);
1184		break;
1185	}
1186
1187	case MD_MN_SET_CAP:
1188	{
1189		if (! (mode & FWRITE))
1190			return (EACCES);
1191
1192		sz = sizeof (md_mn_setcap_params_t);
1193		d  = kmem_alloc(sz, KM_SLEEP);
1194
1195		if (ddi_copyin(data, d, sz, mode)) {
1196			err = EFAULT;
1197			break;
1198		}
1199
1200		err = sp_set_capability((md_mn_setcap_params_t *)d, lockp);
1201		break;
1202	}
1203
1204	default:
1205		return (ENOTTY);
1206	}
1207
1208	/*
1209	 * copyout and free any args
1210	 */
1211	if (sz != 0) {
1212		if (err == 0) {
1213			if (ddi_copyout(d, data, sz, mode) != 0) {
1214				err = EFAULT;
1215			}
1216		}
1217		kmem_free(d, sz);
1218	}
1219	return (err);
1220}
1221
1222
1223/*
1224 * FUNCTION:	md_sp_ioctl()
1225 * INPUT:	dev	- device we are operating on.
1226 *		cmd	- ioctl to be handled.
1227 *		data	- data ptr.
1228 *		mode	- pass-through to copyin/copyout routines.
1229 *		lockp	- lock ptr.
1230 * OUTPUT:	none.
1231 * RETURNS:	0		- success.
1232 *		non-zero	- error.
1233 * PURPOSE:	Dispatch ioctl's.  Administrative ioctl's are handled
1234 *		by sp_admin_ioctl.  All others (see comment at beginning
1235 *		of this file) are handled in-line here.
1236 */
1237int
1238md_sp_ioctl(dev_t dev, int cmd, void *data, int mode, IOLOCK *lockp)
1239{
1240	minor_t		mnum = getminor(dev);
1241	mp_unit_t	*un;
1242	mdi_unit_t	*ui;
1243	int		err = 0;
1244
1245	/* handle admin ioctls */
1246	if (mnum == MD_ADM_MINOR)
1247		return (sp_admin_ioctl(cmd, data, mode, lockp));
1248
1249	/* check unit */
1250	if ((MD_MIN2SET(mnum) >= md_nsets) ||
1251	    (MD_MIN2UNIT(mnum) >= md_nunits) ||
1252	    ((ui = MDI_UNIT(mnum)) == NULL) ||
1253	    ((un = MD_UNIT(mnum)) == NULL))
1254		return (ENXIO);
1255
1256	/* is this a supported ioctl? */
1257	err = md_check_ioctl_against_unit(cmd, un->c);
1258	if (err != 0) {
1259		return (err);
1260	}
1261
1262
1263	/* handle ioctl */
1264	switch (cmd) {
1265
1266	case DKIOCINFO:
1267	{
1268		/* "disk" info */
1269		struct dk_cinfo		*p;
1270
1271		if (! (mode & FREAD))
1272			return (EACCES);
1273
1274		p = kmem_alloc(sizeof (*p), KM_SLEEP);
1275
1276		get_info(p, mnum);
1277		if (ddi_copyout((caddr_t)p, data, sizeof (*p), mode) != 0)
1278			err = EFAULT;
1279
1280		kmem_free(p, sizeof (*p));
1281		return (err);
1282	}
1283
1284	case DKIOCGMEDIAINFO:
1285	{
1286		struct dk_minfo	p;
1287
1288		if (! (mode & FREAD))
1289			return (EACCES);
1290
1291		get_minfo(&p, mnum);
1292		if (ddi_copyout(&p, data, sizeof (struct dk_minfo), mode) != 0)
1293			err = EFAULT;
1294
1295		return (err);
1296	}
1297
1298	case DKIOCGGEOM:
1299	{
1300		/* geometry information */
1301		struct dk_geom		*p;
1302
1303		if (! (mode & FREAD))
1304			return (EACCES);
1305
1306		p = kmem_alloc(sizeof (*p), KM_SLEEP);
1307
1308		md_get_geom((md_unit_t *)un, p);
1309		if (ddi_copyout((caddr_t)p, data, sizeof (*p),
1310		    mode) != 0)
1311			err = EFAULT;
1312
1313		kmem_free(p, sizeof (*p));
1314		return (err);
1315	}
1316	case DKIOCGAPART:
1317	{
1318		struct dk_map	dmp;
1319
1320		err = 0;
1321		md_get_cgapart((md_unit_t *)un, &dmp);
1322
1323		if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
1324			if (ddi_copyout((caddr_t)&dmp, data, sizeof (dmp),
1325			    mode) != 0)
1326				err = EFAULT;
1327		}
1328#ifdef _SYSCALL32
1329		else {
1330			struct dk_map32 dmp32;
1331
1332			dmp32.dkl_cylno = dmp.dkl_cylno;
1333			dmp32.dkl_nblk = dmp.dkl_nblk;
1334
1335			if (ddi_copyout((caddr_t)&dmp32, data, sizeof (dmp32),
1336			    mode) != 0)
1337				err = EFAULT;
1338		}
1339#endif /* _SYSCALL32 */
1340
1341		return (err);
1342	}
1343	case DKIOCGVTOC:
1344	{
1345		/* vtoc information */
1346		struct vtoc	*vtoc;
1347
1348		if (! (mode & FREAD))
1349			return (EACCES);
1350
1351		vtoc = kmem_zalloc(sizeof (*vtoc), KM_SLEEP);
1352		md_get_vtoc((md_unit_t *)un, vtoc);
1353
1354		if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
1355			if (ddi_copyout(vtoc, data, sizeof (*vtoc), mode))
1356				err = EFAULT;
1357		}
1358#ifdef _SYSCALL32
1359		else {
1360			struct vtoc32	*vtoc32;
1361
1362			vtoc32 = kmem_zalloc(sizeof (*vtoc32), KM_SLEEP);
1363
1364			vtoctovtoc32((*vtoc), (*vtoc32));
1365			if (ddi_copyout(vtoc32, data, sizeof (*vtoc32), mode))
1366				err = EFAULT;
1367			kmem_free(vtoc32, sizeof (*vtoc32));
1368		}
1369#endif /* _SYSCALL32 */
1370
1371		kmem_free(vtoc, sizeof (*vtoc));
1372		return (err);
1373	}
1374
1375	case DKIOCSVTOC:
1376	{
1377		struct vtoc	*vtoc;
1378
1379		if (! (mode & FWRITE))
1380			return (EACCES);
1381
1382		vtoc = kmem_zalloc(sizeof (*vtoc), KM_SLEEP);
1383		if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
1384			if (ddi_copyin(data, vtoc, sizeof (*vtoc), mode)) {
1385				err = EFAULT;
1386			}
1387		}
1388#ifdef _SYSCALL32
1389		else {
1390			struct vtoc32	*vtoc32;
1391
1392			vtoc32 = kmem_zalloc(sizeof (*vtoc32), KM_SLEEP);
1393
1394			if (ddi_copyin(data, vtoc32, sizeof (*vtoc32), mode)) {
1395				err = EFAULT;
1396			} else {
1397				vtoc32tovtoc((*vtoc32), (*vtoc));
1398			}
1399			kmem_free(vtoc32, sizeof (*vtoc32));
1400		}
1401#endif /* _SYSCALL32 */
1402
1403		if (err == 0)
1404			err = md_set_vtoc((md_unit_t *)un, vtoc);
1405
1406		kmem_free(vtoc, sizeof (*vtoc));
1407		return (err);
1408	}
1409
1410	case DKIOCGEXTVTOC:
1411	{
1412		/* extended vtoc information */
1413		struct extvtoc	*extvtoc;
1414
1415		if (! (mode & FREAD))
1416			return (EACCES);
1417
1418		extvtoc = kmem_zalloc(sizeof (*extvtoc), KM_SLEEP);
1419		md_get_extvtoc((md_unit_t *)un, extvtoc);
1420
1421		if (ddi_copyout(extvtoc, data, sizeof (*extvtoc), mode))
1422			err = EFAULT;
1423
1424		kmem_free(extvtoc, sizeof (*extvtoc));
1425		return (err);
1426	}
1427
1428	case DKIOCSEXTVTOC:
1429	{
1430		struct extvtoc	*extvtoc;
1431
1432		if (! (mode & FWRITE))
1433			return (EACCES);
1434
1435		extvtoc = kmem_zalloc(sizeof (*extvtoc), KM_SLEEP);
1436		if (ddi_copyin(data, extvtoc, sizeof (*extvtoc), mode)) {
1437			err = EFAULT;
1438		}
1439
1440		if (err == 0)
1441			err = md_set_extvtoc((md_unit_t *)un, extvtoc);
1442
1443		kmem_free(extvtoc, sizeof (*extvtoc));
1444		return (err);
1445	}
1446
1447	case DKIOCGETEFI:
1448	{
1449		/*
1450		 * This one can be done centralized,
1451		 * no need to put in the same code for all types of metadevices
1452		 */
1453		return (md_dkiocgetefi(mnum, data, mode));
1454	}
1455	case DKIOCSETEFI:
1456	{
1457		/*
1458		 * This one can be done centralized,
1459		 * no need to put in the same code for all types of metadevices
1460		 */
1461		return (md_dkiocsetefi(mnum, data, mode));
1462	}
1463
1464	case DKIOCPARTITION:
1465	{
1466		return (md_dkiocpartition(mnum, data, mode));
1467	}
1468
1469	case DKIOCGETVOLCAP:
1470	{
1471		/*
1472		 * Return the supported capabilities for the soft-partition.
1473		 * We can only support those caps that are provided by the
1474		 * underlying device.
1475		 */
1476
1477		volcap_t	vc;
1478
1479		if (!MD_MNSET_SETNO(MD_MIN2SET(mnum)))
1480			return (EINVAL);
1481
1482		if (! (mode & FREAD))
1483			return (EACCES);
1484
1485		bzero(&vc, sizeof (vc));
1486
1487		/* Send ioctl to underlying driver */
1488
1489		err = md_call_ioctl(un->un_dev, cmd, &vc, (mode | FKIOCTL),
1490		    lockp);
1491
1492		if (err == 0)
1493			ui->ui_capab = vc.vc_info;
1494
1495		if (ddi_copyout(&vc, data, sizeof (vc), mode))
1496			err = EFAULT;
1497
1498		return (err);
1499	}
1500
1501	case DKIOCSETVOLCAP:
1502	{
1503		/*
1504		 * Enable a supported capability (as returned by DKIOCGETVOLCAP)
1505		 * Do not pass the request down as we're the top-level device
1506		 * handler for the application.
1507		 * If the requested capability is supported (set in ui_capab),
1508		 * set the corresponding bit in ui_tstate so that we can pass
1509		 * the appropriate flag when performing i/o.
1510		 * This request is propagated to all nodes.
1511		 */
1512		volcap_t	vc, vc1;
1513		volcapset_t	volcap = 0;
1514		void 		(*check_offline)();
1515		int		offline_status = 0;
1516
1517		if (!MD_MNSET_SETNO(MD_MIN2SET(mnum)))
1518			return (EINVAL);
1519
1520		if (! (mode & FWRITE))
1521			return (EACCES);
1522
1523		if (ddi_copyin(data, &vc, sizeof (vc), mode))
1524			return (EFAULT);
1525
1526		/*
1527		 * Send DKIOCGETVOLCAP to underlying driver to see if
1528		 * capability supported
1529		 */
1530
1531		vc1.vc_info = 0;
1532		err = md_call_ioctl(un->un_dev, DKIOCGETVOLCAP, &vc1,
1533		    (mode | FKIOCTL), lockp);
1534		if (err != 0)
1535			return (err);
1536
1537		/* Save capabilities */
1538		ui->ui_capab = vc1.vc_info;
1539		/*
1540		 * Error if required capability not supported by underlying
1541		 * driver
1542		 */
1543		if ((vc1.vc_info & vc.vc_set) == 0)
1544			return (ENOTSUP);
1545
1546
1547		/*
1548		 * Check if underlying mirror has an offline submirror,
1549		 * fail if there is on offline submirror
1550		 */
1551		check_offline = (void(*)())md_get_named_service(un->un_dev,
1552		    0, MD_CHECK_OFFLINE, 0);
1553		if (check_offline != NULL)
1554			(void) (*check_offline)(un->un_dev, &offline_status);
1555		if (offline_status)
1556			return (EINVAL);
1557
1558		if (ui->ui_tstate & MD_ABR_CAP)
1559			volcap |= DKV_ABR_CAP;
1560
1561		/* Only send capability message if there is a change */
1562		if ((vc.vc_set & (DKV_ABR_CAP)) != volcap)
1563			err = mdmn_send_capability_message(mnum, vc, lockp);
1564		return (err);
1565	}
1566
1567	case DKIOCDMR:
1568	{
1569		/*
1570		 * Only valid for MN sets. We need to pass it down to the
1571		 * underlying driver if its a metadevice, after we've modified
1572		 * the offsets to pick up the correct lower-level device
1573		 * position.
1574		 */
1575		vol_directed_rd_t	*vdr;
1576#ifdef _MULTI_DATAMODEL
1577		vol_directed_rd32_t	*vdr32;
1578#endif	/* _MULTI_DATAMODEL */
1579
1580		if (!MD_MNSET_SETNO(MD_MIN2SET(mnum)))
1581			return (EINVAL);
1582
1583		if (! (ui->ui_capab & DKV_DMR_CAP))
1584			return (EINVAL);
1585
1586		vdr = kmem_zalloc(sizeof (vol_directed_rd_t), KM_NOSLEEP);
1587		if (vdr == NULL)
1588			return (ENOMEM);
1589
1590		/*
1591		 * Underlying device supports directed mirror read, so update
1592		 * the user-supplied offset to pick the correct block from the
1593		 * partitioned metadevice.
1594		 */
1595#ifdef _MULTI_DATAMODEL
1596		vdr32 = kmem_zalloc(sizeof (vol_directed_rd32_t), KM_NOSLEEP);
1597		if (vdr32 == NULL) {
1598			kmem_free(vdr, sizeof (vol_directed_rd_t));
1599			return (ENOMEM);
1600		}
1601
1602		switch (ddi_model_convert_from(mode & FMODELS)) {
1603		case DDI_MODEL_ILP32:
1604			if (ddi_copyin(data, vdr32, sizeof (*vdr32), mode)) {
1605				kmem_free(vdr, sizeof (*vdr));
1606				return (EFAULT);
1607			}
1608			vdr->vdr_flags = vdr32->vdr_flags;
1609			vdr->vdr_offset = vdr32->vdr_offset;
1610			vdr->vdr_nbytes = vdr32->vdr_nbytes;
1611			vdr->vdr_data = (void *)(uintptr_t)vdr32->vdr_data;
1612			vdr->vdr_side = vdr32->vdr_side;
1613			break;
1614
1615		case DDI_MODEL_NONE:
1616			if (ddi_copyin(data, vdr, sizeof (*vdr), mode)) {
1617				kmem_free(vdr32, sizeof (*vdr32));
1618				kmem_free(vdr, sizeof (*vdr));
1619				return (EFAULT);
1620			}
1621			break;
1622
1623		default:
1624			kmem_free(vdr32, sizeof (*vdr32));
1625			kmem_free(vdr, sizeof (*vdr));
1626			return (EFAULT);
1627		}
1628#else	/* ! _MULTI_DATAMODEL */
1629		if (ddi_copyin(data, vdr, sizeof (*vdr), mode)) {
1630			kmem_free(vdr, sizeof (*vdr));
1631			return (EFAULT);
1632		}
1633#endif	/* _MULTI_DATA_MODEL */
1634
1635		err = sp_directed_read(mnum, vdr, mode);
1636
1637
1638#ifdef _MULTI_DATAMODEL
1639		switch (ddi_model_convert_from(mode & FMODELS)) {
1640		case DDI_MODEL_ILP32:
1641			vdr32->vdr_flags = vdr->vdr_flags;
1642			vdr32->vdr_offset = vdr->vdr_offset;
1643			vdr32->vdr_side = vdr->vdr_side;
1644			vdr32->vdr_bytesread = vdr->vdr_bytesread;
1645			bcopy(vdr->vdr_side_name, vdr32->vdr_side_name,
1646			    sizeof (vdr32->vdr_side_name));
1647
1648			if (ddi_copyout(vdr32, data, sizeof (*vdr32), mode))
1649				err = EFAULT;
1650			break;
1651
1652		case DDI_MODEL_NONE:
1653			if (ddi_copyout(&vdr, data, sizeof (vdr), mode))
1654				err = EFAULT;
1655			break;
1656		}
1657#else	/* ! _MULTI_DATA_MODEL */
1658		if (ddi_copyout(&vdr, data, sizeof (vdr), mode))
1659			err = EFAULT;
1660#endif	/* _MULTI_DATA_MODEL */
1661
1662#ifdef _MULTI_DATAMODEL
1663		kmem_free(vdr32, sizeof (*vdr32));
1664#endif	/* _MULTI_DATAMODEL */
1665		kmem_free(vdr, sizeof (*vdr));
1666
1667		return (err);
1668	}
1669
1670	}
1671
1672	/* Option not handled */
1673	return (ENOTTY);
1674}
1675