md_ioctl.c revision 8452:89d32dfdae6e
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*
28 * Driver for Virtual Disk.
29 */
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/buf.h>
33#include <sys/conf.h>
34#include <sys/user.h>
35#include <sys/uio.h>
36#include <sys/proc.h>
37#include <sys/t_lock.h>
38#include <sys/dkio.h>
39#include <sys/kmem.h>
40#include <sys/utsname.h>
41#include <sys/debug.h>
42#include <sys/sysmacros.h>
43#include <sys/types.h>
44#include <sys/mkdev.h>
45#include <sys/vtoc.h>
46#include <sys/efi_partition.h>
47#include <sys/open.h>
48#include <sys/file.h>
49#include <sys/ddi.h>
50#include <sys/sunddi.h>
51#include <sys/lvm/mdmn_commd.h>
52
53#include <sys/lvm/mdvar.h>
54#include <sys/lvm/md_rename.h>
55#include <sys/lvm/md_names.h>
56#include <sys/lvm/md_hotspares.h>
57
58extern md_ops_t		**md_ops;
59extern unit_t		md_nunits;
60extern set_t		md_nsets;
61extern int		md_nmedh;
62extern md_set_t		md_set[];
63extern md_set_io_t	md_set_io[];
64extern int		md_status;
65extern int		md_ioctl_cnt;
66extern int		md_in_upgrade;
67extern major_t		md_major;
68
69/* md.c */
70extern kmutex_t		md_mx;
71extern kcondvar_t	md_cv;
72
73/* md_hotspares.c */
74extern	hot_spare_pool_t *find_hot_spare_pool(set_t setno, int hsp_id);
75
76/* md_med.c */
77extern int		med_addr_tab_nents;
78extern int		med_get_t_size_ioctl(mddb_med_t_parm_t *tpp, int mode);
79extern int		med_get_t_ioctl(mddb_med_t_parm_t *tpp, int mode);
80extern int		med_set_t_ioctl(mddb_med_t_parm_t *tpp, int mode);
81extern unit_t		md_get_nextunit(set_t setno);
82
83/* md_mddb.c */
84extern mddb_set_t	*mddb_setenter(set_t setno, int flag, int *errorcodep);
85extern void		mddb_setexit(mddb_set_t *s);
86extern md_krwlock_t	nm_lock;
87
88#define	MD_MN_COMMD_CMD "rpc.mdcommd"
89static pid_t		md_mn_commd_pid;
90
91/*
92 * md_mn_is_commd_present:
93 * ----------------------
94 * Determine if commd is running on this node.
95 *
96 * If md_mn_commd_pid is 0, trust it.  Otherwise, do some in-depth checking
97 * to make sure it's still the one we originally set up by checking the
98 * provided PID's u_comm for the right program name in u_comm.
99 *
100 * This one's intended for the "something went awry" cases, and not for
101 * general use, due to its higher cost for the good/normal case.
102 */
103int
104md_mn_is_commd_present(void)
105{
106	proc_t  *commd_procp;
107
108	if (md_mn_commd_pid == (pid_t)0) {
109		return (0);
110	}
111
112	/* some in-depth checking */
113	mutex_enter(&pidlock);
114	if ((commd_procp = prfind(md_mn_commd_pid)) != NULL &&
115	    strncmp(commd_procp->p_user.u_comm,
116	    MD_MN_COMMD_CMD, strlen(MD_MN_COMMD_CMD)) == 0) {
117		mutex_exit(&pidlock);
118		/*
119		 * returns a little more info than asked for, but it will
120		 * never be PID 0 when valid.
121		 */
122		return ((int)md_mn_commd_pid);
123	}
124	/* if it's not there, make sure we only do these contortions once */
125	md_mn_commd_pid = (pid_t)0;
126	mutex_exit(&pidlock);
127
128	cmn_err(CE_WARN, "!rpc.mdcommd exited abnormally");
129	return (0);
130}
131
132/*
133 * This version merely checks the PID value that was set via an ioctl.
134 * It's intended to be used in the main code flow, where performance is
135 * critical, and accuracy can be sacrificed a little.  If something is
136 * already known to be wrong, don't use this, but use
137 * md_mn_is_commd_present() instead.
138 */
139int
140md_mn_is_commd_present_lite(void)
141{
142	return ((int)md_mn_commd_pid);
143}
144
145/*
146 * md_mn_clear_commd_present:
147 * -------------------------
148 * Clear the md_mn_commd_pid. Called only from a CPR request to suspend /
149 * terminate a resync thread. We clear the md_mn_commd_pid so that
150 * any RPC request that was in transit can complete with a failure and _not_
151 * result in an unexpected system panic.
152 */
153void
154md_mn_clear_commd_present()
155{
156	md_mn_commd_pid = (pid_t)0;
157}
158
159/*
160 * It is possible to pass in a minor number via the ioctl interface
161 * and this minor number is used to reference elements in arrays.
162 * Therefore we need to make sure that the value passed in is
163 * correct within the array sizes, and array dereference. Not
164 * doing so allows for incorrect values which may result in panics.
165 */
166static int
167verify_minor(minor_t mnum)
168{
169	set_t	setno = MD_MIN2SET(mnum);
170
171	/*
172	 * Check the bounds.
173	 */
174	if (setno >= md_nsets || (MD_MIN2UNIT(mnum) >= md_nunits)) {
175		return (EINVAL);
176	}
177
178	/* has the set been initialised ? */
179	if ((md_get_setstatus(setno) & MD_SET_SNARFED) == 0)
180		return (ENODEV);
181
182	return (0);
183}
184
185static int
186get_lb_inittime_ioctl(
187	mddb_config_t	*cp
188)
189{
190	set_t		setno = cp->c_setno;
191	int		err;
192	mddb_set_t	*s;
193
194	if (setno >= md_nsets)
195		return (-1);
196
197	if ((s = mddb_setenter(setno, MDDB_MUSTEXIST, &err)) == NULL)
198		return (-1);
199
200	if (s->s_lbp == NULL) {
201		mddb_setexit(s);
202		return (-1);
203	}
204
205	cp->c_timestamp = s->s_lbp->lb_inittime;
206
207	mddb_setexit(s);
208	return (0);
209}
210
211static int
212setnm_ioctl(mdnm_params_t *nm, int mode)
213{
214	char 	*name, *minorname = NULL;
215	side_t	side;
216	int	err = 0;
217	void	*devid = NULL;
218	int	devid_sz;
219
220	/*
221	 * Don't allow addition of new names to namespace during upgrade.
222	 */
223	if (MD_UPGRADE)  {
224		return (EAGAIN);
225	}
226
227	mdclrerror(&nm->mde);
228
229	if ((mode & FWRITE) == 0)
230		return (EACCES);
231
232	if (md_snarf_db_set(MD_LOCAL_SET, &nm->mde) != 0)
233		return (0);
234
235	if ((md_get_setstatus(nm->setno) & MD_SET_SNARFED) == 0)
236		return (ENODEV);
237
238	if (md_get_setstatus(nm->setno) & MD_SET_STALE)
239		return (mdmddberror(&nm->mde, MDE_DB_STALE, NODEV32,
240		    nm->setno));
241
242	name = kmem_alloc(MAXPATHLEN, KM_SLEEP);
243
244	err = ddi_copyin((caddr_t)(uintptr_t)nm->devname, name,
245	    (size_t)nm->devname_len, mode);
246	if (err) {
247		err = EFAULT;
248		goto out;
249	}
250
251	if (nm->imp_flag) {
252		if ((nm->devid == NULL) || (nm->minorname == NULL)) {
253			err = EINVAL;
254			goto out;
255		}
256		if (nm->devid) {
257			devid_sz = nm->devid_size;
258			devid = kmem_zalloc(devid_sz, KM_SLEEP);
259			err = ddi_copyin((caddr_t)(uintptr_t)nm->devid,
260			    devid, devid_sz, mode);
261			if (err) {
262				err = EFAULT;
263				goto out;
264			}
265		}
266		if (nm->minorname) {
267			if (nm->minorname_len > MAXPATHLEN) {
268				err = EINVAL;
269				goto out;
270			}
271			minorname = kmem_zalloc(nm->minorname_len, KM_SLEEP);
272			err = ddi_copyin((caddr_t)(uintptr_t)nm->minorname,
273			    minorname, (size_t)nm->minorname_len, mode);
274			if (err) {
275				err = EFAULT;
276				goto out;
277			}
278		}
279	}
280
281	if (nm->side == -1)
282		side = mddb_getsidenum(nm->setno);
283	else
284		side = nm->side;
285
286	if (strcmp(nm->drvnm, "") == 0) {
287		char *drvnm;
288		drvnm = ddi_major_to_name(nm->major);
289		(void) strncpy(nm->drvnm, drvnm, sizeof (nm->drvnm));
290	}
291
292	nm->key = md_setdevname(nm->setno, side, nm->key, nm->drvnm,
293	    nm->mnum, name, nm->imp_flag, (ddi_devid_t)devid, minorname,
294	    0, &nm->mde);
295	/*
296	 * If we got an error from md_setdevname & md_setdevname did not
297	 * set the error code, we'll default to MDE_DB_NOSPACE.
298	 */
299	if ((((int)nm->key) < 0) && mdisok(&nm->mde)) {
300		err = mdmddberror(&nm->mde, MDE_DB_NOSPACE, NODEV32, nm->setno);
301		goto out;
302	}
303
304out:
305	kmem_free(name, MAXPATHLEN);
306	if (devid) {
307		kmem_free(devid, devid_sz);
308	}
309	if (minorname)
310		kmem_free(minorname, nm->minorname_len);
311	return (err);
312}
313
314static int
315getnm_ioctl(
316	mdnm_params_t	*nm,
317	int		mode
318)
319{
320	char		*name;
321	side_t		side;
322	md_dev64_t	dev = NODEV64;
323	mdc_unit_t	*un;
324	uint_t		id;
325	char		*setname;
326	int		err = 0;
327
328	mdclrerror(&nm->mde);
329
330	if (md_snarf_db_set(MD_LOCAL_SET, &nm->mde) != 0)
331		return (0);
332
333	if ((md_get_setstatus(nm->setno) & MD_SET_SNARFED) == 0)
334		return (ENODEV);
335
336
337	name = kmem_alloc(MAXPATHLEN, KM_SLEEP);
338
339	if (nm->side == -1)
340		side = mddb_getsidenum(nm->setno);
341	else
342		side = nm->side;
343
344	if (nm->drvnm[0] == '\0') {
345		char *drvnm;
346
347		if (MD_UPGRADE)
348			drvnm = md_targ_major_to_name(nm->major);
349		else
350			drvnm = ddi_major_to_name(nm->major);
351		if (drvnm != NULL)
352			(void) strncpy(nm->drvnm, drvnm, sizeof (nm->drvnm));
353	}
354
355	if (nm->drvnm[0] != '\0') {
356		if (MD_UPGRADE)
357			dev = md_makedevice(md_targ_name_to_major(nm->drvnm),
358			    nm->mnum);
359		else
360			dev = md_makedevice(ddi_name_to_major(nm->drvnm),
361			    nm->mnum);
362	}
363
364	/*
365	 * With the introduction of friendly names, all friendly named
366	 * metadevices will have an entry in the name space. However,
367	 * systems upgraded from pre-friendly name to a friendly name
368	 * release won't have name space entries for pre-friendly name
369	 * top level metadevices.
370	 *
371	 * So we search the name space for the our entry with either the
372	 * given dev_t or key. If we can't find the entry, we'll try the
373	 * un array to get information for our target metadevice. Note
374	 * we only use the un array when searching by dev_t since a
375	 * key implies an existing device which should have been
376	 * found in the name space with the call md_getdevname.
377	 */
378	if (md_getdevname(nm->setno, side, nm->key, dev, name,
379	    MAXPATHLEN) == 0) {
380		err = md_getnment(nm->setno, side, nm->key, dev, nm->drvnm,
381		    sizeof (nm->drvnm), &nm->major, &nm->mnum, &nm->retkey);
382		if (err) {
383			if (err < 0)
384				err = EINVAL;
385			goto out;
386		}
387	} else {
388		if ((nm->key != MD_KEYWILD) ||
389		    (md_set[MD_MIN2SET(nm->mnum)].s_un == NULL) ||
390		    (MD_UNIT(nm->mnum) == NULL)) {
391			err = ENOENT;
392			goto out;
393		}
394
395		/*
396		 * We're here because the mnum is of a pre-friendly
397		 * name device. Make sure the major value is for
398		 * metadevices.
399		 */
400		if (nm->major != md_major) {
401			err = ENOENT;
402			goto out;
403		}
404
405		/*
406		 * get the unit number and setname to construct the
407		 * fully qualified name for the metadevice.
408		 */
409		un = MD_UNIT(nm->mnum);
410		id =  MD_MIN2UNIT(un->un_self_id);
411		if (nm->setno != MD_LOCAL_SET) {
412			setname = mddb_getsetname(nm->setno);
413			(void) snprintf(name, MAXPATHLEN,
414			    "/dev/md/%s/dsk/d%u", setname, id);
415		} else {
416			(void) snprintf(name, MAXPATHLEN,
417			    "/dev/md/dsk/d%u", id);
418		}
419	}
420
421	err = ddi_copyout(name, (caddr_t)(uintptr_t)nm->devname,
422	    strlen(name) + 1, mode);
423	if (err) {
424		err = EFAULT;
425		goto out;
426	}
427
428out:
429	kmem_free(name, MAXPATHLEN);
430	return (err);
431}
432
433static int
434gethspnm_ioctl(
435	mdhspnm_params_t	*nm,
436	int			mode
437)
438{
439	char			*name;
440	char			*tmpname;
441	char			*setname = NULL;
442	side_t			side;
443	hot_spare_pool_t	*hsp = NULL;
444	mdkey_t			key = MD_KEYWILD;
445	int			err = 0;
446
447	mdclrerror(&nm->mde);
448
449	if (md_snarf_db_set(MD_LOCAL_SET, &nm->mde) != 0)
450		return (0);
451
452	if ((md_get_setstatus(nm->setno) & MD_SET_SNARFED) == 0)
453		return (ENODEV);
454
455	name = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
456
457	if (nm->side == -1)
458		side = mddb_getsidenum(nm->setno);
459	else
460		side = nm->side;
461
462	/*
463	 * Get the key from input hspid, use different macros
464	 * since the hspid could be either a FN or pre-FN hspid.
465	 */
466	if (nm->hspid != MD_HSPID_WILD) {
467		if (HSP_ID_IS_FN(nm->hspid))
468			key = HSP_ID_TO_KEY(nm->hspid);
469		else
470			key = HSP_ID(nm->hspid);
471	}
472
473	/*
474	 * Get the input name if we're searching by hsp name. Check
475	 * that the input name length is less than MAXPATHLEN.
476	 */
477	if ((nm->hspid == MD_HSPID_WILD) &&
478	    (nm->hspname_len <= MAXPATHLEN)) {
479		err = ddi_copyin((caddr_t)(uintptr_t)nm->hspname,
480		    name, (sizeof (char)) * nm->hspname_len, mode);
481
482		/* Stop if ddi_copyin failed. */
483		if (err) {
484			err = EFAULT;
485			goto out;
486		}
487	}
488
489	/* Must have either a valid hspid or a name to continue */
490	if ((nm->hspid == MD_HSPID_WILD) && (name[0] == '\0')) {
491		err = EINVAL;
492		goto out;
493	}
494
495	/*
496	 * Try to find the hsp namespace entry corresponds to either
497	 * the given hspid or name. If we can't find it, the hsp maybe
498	 * a pre-friendly name hsp so we'll try to find it in the
499	 * s_hsp array.
500	 */
501	if ((nm->hspid == MD_HSPID_WILD) || (HSP_ID_IS_FN(nm->hspid))) {
502
503		if (md_gethspinfo(nm->setno, side, key, nm->drvnm,
504		    &nm->ret_hspid, name) != 0) {
505			/*
506			 * If we were given a key for a FN hsp and
507			 * couldn't find its entry, simply errored
508			 * out.
509			 */
510			if (HSP_ID_IS_FN(nm->hspid)) {
511				err = ENOENT;
512				goto out;
513			}
514
515			/*
516			 * Since md_gethspinfo failed and the hspid is
517			 * not a FN hspid,  we must have a name for a
518			 * pre-FN hotspare pool
519			 */
520			if (name[0] == '\0') {
521				err = EINVAL;
522				goto out;
523			}
524
525			tmpname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
526			if (nm->setno != MD_LOCAL_SET)
527				setname = mddb_getsetname(nm->setno);
528
529			hsp = (hot_spare_pool_t *)md_set[nm->setno].s_hsp;
530			while (hsp != NULL) {
531				/* Only use the pre-friendly name hsp */
532				if (!(hsp->hsp_revision & MD_FN_META_DEV)) {
533
534					if (setname != NULL) {
535						(void) snprintf(tmpname,
536						    MAXPATHLEN,
537						    "%s/hsp%03u", setname,
538						    HSP_ID(hsp->hsp_self_id));
539					} else {
540						(void) snprintf(tmpname,
541						    MAXPATHLEN, "hsp%03u",
542						    HSP_ID(hsp->hsp_self_id));
543					}
544
545					if (strcmp(name, tmpname) == 0)
546						break;
547				}
548
549				hsp = hsp->hsp_next;
550			}
551			kmem_free(tmpname, MAXPATHLEN);
552
553			if (hsp == NULL) {
554				err = ENOENT;
555				goto out;
556			}
557
558			/* Return hsp_self_id */
559			nm->ret_hspid = hsp->hsp_self_id;
560		}
561
562	} else {
563		/*
564		 * We have a hspid for a pre-FN hotspare pool. Let's
565		 * try to find the matching hsp using the given
566		 * hspid.
567		 */
568		if (nm->hspid == MD_HSPID_WILD) {
569			err = ENOENT;
570			goto out;
571		}
572
573		hsp = (hot_spare_pool_t *)md_set[nm->setno].s_hsp;
574		while (hsp != NULL) {
575			if (hsp->hsp_self_id == nm->hspid)
576				break;
577			hsp = hsp->hsp_next;
578		}
579
580		if (hsp == NULL) {
581			err = ENOENT;
582			goto out;
583		}
584
585		/* Prepare a name to return */
586		if (nm->setno != MD_LOCAL_SET)
587			setname = mddb_getsetname(nm->setno);
588
589		if (setname != NULL) {
590			(void) snprintf(name, MAXPATHLEN, "%s/hsp%03u",
591			    setname, HSP_ID(hsp->hsp_self_id));
592		} else {
593			(void) snprintf(name, MAXPATHLEN, "hsp%03u",
594			    HSP_ID(hsp->hsp_self_id));
595		}
596
597		nm->ret_hspid = hsp->hsp_self_id;
598	}
599
600	if (nm->hspid != MD_HSPID_WILD) {
601		if ((strlen(name) + 1) > nm->hspname_len) {
602			err = EINVAL;
603			goto out;
604		}
605		err = ddi_copyout(name, (caddr_t)
606		    (uintptr_t)nm->hspname, strlen(name)+1, mode);
607	}
608
609	if (err) {
610		if (err < 0)
611			err = EINVAL;
612	}
613
614out:
615	kmem_free(name, MAXPATHLEN);
616	return (err);
617}
618
619
620/*ARGSUSED*/
621static int
622update_loc_namespace_ioctl(
623	mdnm_params_t	*nm,
624	char		*dname,
625	char		*pname,
626	int		mode
627)
628{
629
630	side_t		side;
631
632	mdclrerror(&nm->mde);
633
634	if (md_snarf_db_set(MD_LOCAL_SET, &nm->mde) != 0)
635		return (0);
636
637	if (MD_MNSET_SETNO(nm->setno))
638		return (0);
639
640	if ((md_get_setstatus(nm->setno) & MD_SET_STALE))
641		return (0);
642
643	if ((md_get_setstatus(nm->setno) & MD_SET_SNARFED) == 0)
644		return (ENODEV);
645
646	if (nm->side == -1)
647		side = mddb_getsidenum(nm->setno);
648	else
649		side = nm->side;
650
651	return (md_update_locator_namespace(nm->setno, side, dname,
652	    pname, nm->devt));
653}
654
655/*ARGSUSED*/
656static int
657update_namespace_did_ioctl(
658	mdnm_params_t	*nm,
659	int		mode
660)
661{
662	side_t		side;
663
664	mdclrerror(&nm->mde);
665
666	if (md_snarf_db_set(MD_LOCAL_SET, &nm->mde) != 0)
667		return (0);
668
669	if (MD_MNSET_SETNO(nm->setno))
670		return (0);
671
672	if ((md_get_setstatus(nm->setno) & MD_SET_STALE))
673		return (0);
674
675	if ((md_get_setstatus(nm->setno) & MD_SET_SNARFED) == 0)
676		return (ENODEV);
677
678	if (nm->side == -1)
679		side = mddb_getsidenum(nm->setno);
680	else
681		side = nm->side;
682
683	return (md_update_namespace_did(nm->setno, side, nm->key, &nm->mde));
684}
685
686/*ARGSUSED*/
687static int
688update_namespace_ioctl(
689	mdnm_params_t	*nm,
690	char		*dname,
691	char		*pname,
692	int		mode
693)
694{
695	side_t		side;
696
697	mdclrerror(&nm->mde);
698
699	if (md_snarf_db_set(MD_LOCAL_SET, &nm->mde) != 0)
700		return (0);
701
702	if (MD_MNSET_SETNO(nm->setno))
703		return (0);
704
705	if ((md_get_setstatus(nm->setno) & MD_SET_STALE))
706		return (0);
707
708	if ((md_get_setstatus(nm->setno) & MD_SET_SNARFED) == 0)
709		return (ENODEV);
710
711	if (nm->side == -1)
712		side = mddb_getsidenum(nm->setno);
713	else
714		side = nm->side;
715
716	return (md_update_namespace(nm->setno, side, nm->key,
717	    dname, pname, nm->mnum));
718
719}
720
721/*ARGSUSED*/
722static int
723getnextkey_ioctl(
724	mdnm_params_t	*nm,
725	int		mode
726)
727{
728	side_t		side;
729
730	mdclrerror(&nm->mde);
731
732	if (md_snarf_db_set(MD_LOCAL_SET, &nm->mde) != 0)
733		return (0);
734
735	if (nm->setno >= md_nsets)
736		return (EINVAL);
737
738	if ((md_get_setstatus(nm->setno) & MD_SET_SNARFED) == 0)
739		return (ENODEV);
740
741	if (nm->side == -1)
742		side = mddb_getsidenum(nm->setno);
743	else
744		side = nm->side;
745
746	nm->key = md_getnextkey(nm->setno, side, nm->key, &nm->ref_count);
747	return (0);
748}
749
750/*ARGSUSED*/
751static int
752remnm_ioctl(mdnm_params_t *nm, int mode)
753{
754	side_t	side;
755
756	mdclrerror(&nm->mde);
757
758	if (md_snarf_db_set(MD_LOCAL_SET, &nm->mde) != 0)
759		return (0);
760
761	if ((md_get_setstatus(nm->setno) & MD_SET_SNARFED) == 0)
762		return (ENODEV);
763
764	if (nm->side == -1)
765		side = mddb_getsidenum(nm->setno);
766	else
767		side = nm->side;
768
769	return (md_remdevname(nm->setno, side, nm->key));
770}
771
772
773/*ARGSUSED*/
774static int
775getdrvnm_ioctl(md_dev64_t dev, md_i_driverinfo_t *di, int mode)
776{
777	mdi_unit_t 	*ui;
778	minor_t		mnum = di->mnum;
779	set_t		setno = MD_MIN2SET(mnum);
780
781	mdclrerror(&di->mde);
782
783	if (md_snarf_db_set(MD_LOCAL_SET, &di->mde) != 0)
784		return (0);
785
786	ui = MDI_UNIT(mnum);
787	if (ui == NULL) {
788		return (mdmderror(&di->mde, MDE_UNIT_NOT_SETUP, mnum));
789	}
790
791	MD_SETDRIVERNAME(di, md_ops[ui->ui_opsindex]->md_driver.md_drivername,
792	    setno);
793
794	return (0);
795}
796
797/*ARGSUSED*/
798static int
799getnext_ioctl(md_i_getnext_t *gn, int mode)
800{
801	int		modindex;
802	md_link_t	*next;
803	uint_t		id;
804	int		found = 0;
805	set_t		setno = gn->md_driver.md_setno;
806
807	mdclrerror(&gn->mde);
808
809	if (md_snarf_db_set(MD_LOCAL_SET, &gn->mde) != 0)
810		return (0);
811
812	if ((md_get_setstatus(setno) & MD_SET_SNARFED) == 0) {
813		if (md_get_setstatus(setno) & MD_SET_TAGDATA)
814			return (mdmddberror(&gn->mde, MDE_DB_TAGDATA,
815			    NODEV32, setno));
816		else
817			return (mderror(&gn->mde, MDE_UNIT_NOT_FOUND));
818	}
819
820	modindex = md_getmodindex((md_driver_t *)gn, 1, 0);
821	if (modindex == -1) {
822		return (mderror(&gn->mde, MDE_UNIT_NOT_FOUND));
823	}
824
825	rw_enter(&md_ops[modindex]->md_link_rw.lock, RW_READER);
826	id = gn->id;
827	next = md_ops[modindex]->md_head;
828	while (next) {
829		if ((next->ln_setno == setno) && (next->ln_id == id)) {
830			gn->id = id;
831			found = 1;
832			break;
833		}
834
835		if ((next->ln_setno == setno) &&(next->ln_id > id) &&
836		    (! found || (next->ln_id < gn->id))) {
837			gn->id = next->ln_id;
838			found = 1;
839			/* continue looking for smallest */
840		}
841		next = next->ln_next;
842	}
843	rw_exit(&md_ops[modindex]->md_link_rw.lock);
844
845	if (! found)
846		return (mderror(&gn->mde, MDE_UNIT_NOT_FOUND));
847
848	return (0);
849}
850
851/*ARGSUSED*/
852static int
853getnum_ioctl(void *d, int mode)
854{
855	int		modindex;
856	md_link_t	*next;
857	int		sz;
858	minor_t		*minors;
859	minor_t		*m_ptr;
860	set_t		setno;
861	int		err = 0;
862	md_error_t	*mdep;
863	int		minor_array_length;
864	md_driver_t	*driver;
865	int		count = 0;
866	struct md_i_getnum	*gn = d;
867
868
869	/* number of specified devices in specified set - if 0 return count */
870	minor_array_length = gn->size;
871	if (minor_array_length > md_nunits)
872		return (EINVAL);
873
874	mdep = &gn->mde;
875	driver = &gn->md_driver;
876	setno = driver->md_setno;
877
878	mdclrerror(mdep);
879
880	if (md_snarf_db_set(MD_LOCAL_SET, mdep) != 0)
881		return (0);
882
883	if ((md_get_setstatus(setno) & MD_SET_SNARFED) == 0) {
884		if (md_get_setstatus(setno) & MD_SET_TAGDATA) {
885			return (mdmddberror(mdep, MDE_DB_TAGDATA,
886			    NODEV32, setno));
887		} else {
888			return (mderror(mdep, MDE_UNIT_NOT_FOUND));
889		}
890	}
891
892	modindex = md_getmodindex(driver, 0, 0);
893	if (modindex == -1) {
894
895		return (mderror(mdep, MDE_UNIT_NOT_FOUND));
896	}
897
898	/* if array length is not 0 then allocate the output buffers */
899	if (minor_array_length != 0) {
900		sz = minor_array_length * ((int)sizeof (minor_t));
901		minors = kmem_zalloc(sz, KM_SLEEP);
902		m_ptr = minors;
903	}
904
905	rw_enter(&md_ops[modindex]->md_link_rw.lock, RW_READER);
906	next = md_ops[modindex]->md_head;
907	count = 0;
908	while (next) {
909		if (next->ln_setno == setno) {
910			if ((minor_array_length > 0) &&
911			    (count < minor_array_length)) {
912				*m_ptr = next->ln_id;
913				m_ptr++;
914			}
915			count++;
916		}
917		next = next->ln_next;
918	}
919	rw_exit(&md_ops[modindex]->md_link_rw.lock);
920
921	gn->size = count;
922	/* now copy the array back */
923	if (minor_array_length > 0) {
924		err = ddi_copyout(minors,
925		    (caddr_t)(uintptr_t)gn->minors, sz, mode);
926		kmem_free(minors, sz);
927	}
928
929	return (err);
930}
931
932/*ARGSUSED*/
933static int
934didstat_ioctl(
935	md_i_didstat_t	*ds
936)
937{
938	int		cnt = 0;
939	int		err = 0;
940
941	mdclrerror(&ds->mde);
942
943	if (md_snarf_db_set(MD_LOCAL_SET, &ds->mde) != 0)
944		return (0);
945
946	if (ds->setno >= md_nsets) {
947		return (EINVAL);
948	}
949
950	if ((md_get_setstatus(ds->setno) & MD_SET_SNARFED) == 0)
951		return (ENODEV);
952
953	if (ds->mode == MD_FIND_INVDID) {
954		cnt = md_validate_devid(ds->setno, ds->side, &ds->maxsz);
955		if (cnt == -1)
956			err = -1;
957		ds->cnt = cnt;
958	} else if (ds->mode == MD_GET_INVDID) {
959		if (md_get_invdid(ds->setno, ds->side, ds->cnt, ds->maxsz,
960		    (caddr_t)(uintptr_t)ds->ctdp) == -1) {
961			err = -1;
962		}
963	} else {
964		/* invalid mode */
965		err = EINVAL;
966	}
967
968	return (err);
969}
970
971/*ARGSUSED*/
972static int
973getdid_ioctl(
974	mdnm_params_t	*nm,
975	int		mode
976)
977{
978	int		err = 0;
979	ddi_devid_t	did = NULL;
980
981	mdclrerror(&nm->mde);
982
983	if (md_snarf_db_set(MD_LOCAL_SET, &nm->mde) != 0)
984		return (0);
985
986	if (nm->setno >= md_nsets) {
987		return (EINVAL);
988	}
989
990	if ((md_get_setstatus(nm->setno) & MD_SET_SNARFED) == 0)
991		return (ENODEV);
992
993	/*
994	 * Tell user that replica is not in devid mode
995	 */
996	if (!(((mddb_set_t *)md_set[nm->setno].s_db)->s_lbp->lb_flags
997	    & MDDB_DEVID_STYLE) && md_keep_repl_state) {
998		return (mdsyserror(&nm->mde, MDDB_F_NODEVID));
999	}
1000
1001	/*
1002	 * If user is prepared to receive the devid allocate a kernel buffer.
1003	 */
1004	if (nm->devid_size != 0) {
1005		/* check for bogus value of devid_size */
1006		if (nm->devid_size > MAXPATHLEN) {
1007			return (EINVAL);
1008		}
1009		did = kmem_alloc(nm->devid_size, KM_SLEEP);
1010	}
1011
1012	err = md_getdevid(nm->setno, nm->side, nm->key, did, &nm->devid_size);
1013
1014	if (err) {
1015		if (err < 0)
1016			err = EINVAL;
1017		goto out;
1018	}
1019
1020	/*
1021	 * If devid size was already known to user then give them the devid.
1022	 */
1023	if (did != NULL)
1024		err = ddi_copyout(did,
1025		    (caddr_t)(uintptr_t)nm->devid, nm->devid_size, mode);
1026
1027out:
1028	if (did != NULL)
1029		kmem_free(did, nm->devid_size);
1030	return (err);
1031}
1032
1033int
1034mddb_setmaster_ioctl(mddb_setmaster_config_t *info)
1035{
1036	/* Verify that setno is in valid range */
1037	if (info->c_setno >= md_nsets)
1038		return (EINVAL);
1039
1040	/*
1041	 * When adding the first disk to a MN diskset, the master
1042	 * needs to be set (in order to write out the mddb)
1043	 * before the set is snarfed or even before the set
1044	 * is marked as a MNset in the md_set structure.
1045	 * So, don't check for MNset or SNARFED and don't call
1046	 * mddb_setenter. In order to discourage bad ioctl calls,
1047	 * verify that magic field in structure is set correctly.
1048	 */
1049	if (info->c_magic != MDDB_SETMASTER_MAGIC)
1050		return (EINVAL);
1051
1052	if (info->c_current_host_master)
1053		md_set[info->c_setno].s_am_i_master = 1;
1054	else
1055		md_set[info->c_setno].s_am_i_master = 0;
1056
1057	return (0);
1058}
1059
1060/*
1061 * Set the devid for the namespace record identified by the tuple
1062 * [setno, sideno, key]. The key is the namespace key. The md_getdevnum()
1063 * function is used to actually regenerate the devid.
1064 */
1065/*ARGSUSED*/
1066static int
1067setdid_ioctl(
1068	mdnm_params_t	*nm,
1069	int		mode
1070)
1071{
1072	dev_t		devt;
1073
1074	/*
1075	 * If upgrading do not allow modification of the namespace.
1076	 */
1077	if (MD_UPGRADE)
1078		return (EAGAIN);
1079
1080	mdclrerror(&nm->mde);
1081
1082	if (md_snarf_db_set(MD_LOCAL_SET, &nm->mde) != 0)
1083		return (0);
1084
1085	if (nm->setno >= md_nsets)
1086		return (EINVAL);
1087
1088	if (MD_MNSET_SETNO(nm->setno))
1089		return (0);
1090
1091	if ((md_get_setstatus(nm->setno) & MD_SET_SNARFED) == 0)
1092		return (ENODEV);
1093
1094	devt = md_dev64_to_dev(
1095	    md_getdevnum(nm->setno, nm->side, nm->key, MD_TRUST_DEVT));
1096
1097	if (devt == NODEV)
1098		return (ENODEV);
1099
1100	return (0);
1101}
1102
1103/*ARGSUSED*/
1104static int
1105getdidmin_ioctl(
1106	mdnm_params_t   *nm,
1107	int		mode
1108)
1109{
1110	int	err = 0;
1111	char	*minorname = NULL;
1112
1113	mdclrerror(&nm->mde);
1114
1115	if (md_snarf_db_set(MD_LOCAL_SET, &nm->mde) != 0)
1116		return (0);
1117
1118	if (nm->setno >= md_nsets)
1119		return (EINVAL);
1120
1121	if (MD_MNSET_SETNO(nm->setno))
1122		return (0);
1123
1124	if ((md_get_setstatus(nm->setno) & MD_SET_SNARFED) == 0)
1125		return (ENODEV);
1126
1127	minorname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1128
1129	if (nm->side == -1) {
1130		err = EINVAL;
1131		goto out;
1132	}
1133
1134	err = md_getdevidminor(nm->setno, nm->side, nm->key, minorname,
1135	    MAXPATHLEN);
1136
1137	if (err) {
1138		if (err < 0)
1139			err = EINVAL;
1140		goto out;
1141	}
1142
1143	err = ddi_copyout(minorname, (caddr_t)(uintptr_t)nm->minorname,
1144	    strlen(minorname) + 1, mode);
1145
1146out:
1147
1148	kmem_free(minorname, MAXPATHLEN);
1149	return (err);
1150}
1151
1152static int
1153mddb_userreq_ioctl(mddb_userreq_t *ur, int mode)
1154{
1155	void			*data;
1156	int			status;
1157	mddb_recid_t		*recids;
1158	int			flags;
1159
1160	if (ur->ur_setno >= md_nsets)
1161		return (EINVAL);
1162
1163	mdclrerror(&ur->ur_mde);
1164
1165	if (md_snarf_db_set(MD_LOCAL_SET, &ur->ur_mde) != 0)
1166		return (0);
1167
1168	if ((md_get_setstatus(ur->ur_setno) & MD_SET_SNARFED) == 0)
1169		return (ENODEV);
1170
1171	switch (ur->ur_cmd) {
1172	case MD_DB_GETNEXTREC:
1173		if (ur->ur_recid == 0)
1174			ur->ur_recid = mddb_makerecid(ur->ur_setno, 0);
1175		/*
1176		 * Is ur_recid a valid one ?
1177		 */
1178		if (DBSET(ur->ur_recid) < 0 || DBSET(ur->ur_recid) >= md_nsets)
1179			return (EINVAL);
1180
1181		ur->ur_recid = mddb_getnextrec(ur->ur_recid, ur->ur_type,
1182		    ur->ur_type2);
1183		if (ur->ur_recid > 0) {
1184			ur->ur_type = mddb_getrectype1(ur->ur_recid);
1185			ur->ur_type2 = mddb_getrectype2(ur->ur_recid);
1186			ur->ur_recstat = mddb_getrecstatus(ur->ur_recid);
1187		}
1188		break;
1189
1190	case MD_DB_COMMIT_ONE:
1191		/*
1192		 * Is ur_recid a valid one?
1193		 */
1194		if (DBSET(ur->ur_recid) < 0 || DBSET(ur->ur_recid) >= md_nsets)
1195			return (EINVAL);
1196
1197		ur->ur_recstat = mddb_getrecstatus(ur->ur_recid);
1198		if (ur->ur_recstat == MDDB_NORECORD)
1199			return (ENXIO);
1200		status = mddb_commitrec(ur->ur_recid);
1201		/*
1202		 * For MN sets we panic if there are too few database replicas
1203		 * and we're attempting to add entries to the log.
1204		 */
1205		if (status != 0) {
1206			if ((MD_MNSET_SETNO(ur->ur_setno) &&
1207			    (ur->ur_type2 == MDDB_UR_LR)) &&
1208			    (md_get_setstatus(ur->ur_setno) & MD_SET_TOOFEW)) {
1209				cmn_err(CE_PANIC,
1210				    "md: Panic due to lack of DiskSuite state\n"
1211				    " database replicas. Fewer than 50%% of "
1212				    "the total were available,\n so panic to "
1213				    "ensure data integrity.");
1214			}
1215			return (mddbstatus2error(&ur->ur_mde, status, NODEV32,
1216			    ur->ur_setno));
1217		}
1218		break;
1219
1220	case MD_DB_COMMIT_MANY:
1221		if (ur->ur_size <= 0)
1222			return (EINVAL);
1223
1224		data = kmem_alloc(ur->ur_size, KM_SLEEP);
1225
1226		if (ddi_copyin((caddr_t)(uintptr_t)ur->ur_data, data,
1227		    (size_t)ur->ur_size, mode)) {
1228			kmem_free(data, ur->ur_size);
1229			return (EFAULT);
1230		}
1231
1232		recids = (mddb_recid_t *)data;
1233		while (*recids != 0) {
1234			/*
1235			 * Is recid a valid ?
1236			 */
1237			if (DBSET(*recids) < 0 || DBSET(*recids) >= md_nsets) {
1238				kmem_free(data, ur->ur_size);
1239				return (EINVAL);
1240			}
1241			ur->ur_recstat = mddb_getrecstatus(*recids++);
1242			if (ur->ur_recstat == MDDB_NORECORD) {
1243				kmem_free(data, ur->ur_size);
1244				return (ENXIO);
1245			}
1246		}
1247		status = mddb_commitrecs(data);
1248		kmem_free(data, ur->ur_size);
1249		/*
1250		 * For MN sets we panic if there are too few database replicas
1251		 * and we're attempting to add entries to the log.
1252		 */
1253		if (status != 0) {
1254			if ((MD_MNSET_SETNO(ur->ur_setno) &&
1255			    (ur->ur_type2 == MDDB_UR_LR)) &&
1256			    (md_get_setstatus(ur->ur_setno) & MD_SET_TOOFEW)) {
1257				cmn_err(CE_PANIC,
1258				    "md: Panic due to lack of DiskSuite state\n"
1259				    " database replicas. Fewer than 50%% of "
1260				    "the total were available,\n so panic to "
1261				    "ensure data integrity.");
1262			}
1263			return (mddbstatus2error(&ur->ur_mde, status, NODEV32,
1264			    ur->ur_setno));
1265		}
1266		break;
1267
1268	case MD_DB_GETDATA:
1269		/*
1270		 * Check ur_recid
1271		 */
1272		if (DBSET(ur->ur_recid) < 0 || DBSET(ur->ur_recid) >= md_nsets)
1273			return (EINVAL);
1274
1275		ur->ur_recstat = mddb_getrecstatus(ur->ur_recid);
1276		if (ur->ur_recstat == MDDB_NORECORD ||
1277		    ur->ur_recstat == MDDB_NODATA)
1278			return (ENXIO);
1279
1280		if (ur->ur_size > mddb_getrecsize(ur->ur_recid))
1281			return (EINVAL);
1282
1283		data = mddb_getrecaddr(ur->ur_recid);
1284		if (ddi_copyout(data, (caddr_t)(uintptr_t)ur->ur_data,
1285		    (size_t)ur->ur_size, mode)) {
1286			return (EFAULT);
1287		}
1288		break;
1289
1290	case MD_DB_SETDATA:
1291		if (DBSET(ur->ur_recid) < 0 || DBSET(ur->ur_recid) >= md_nsets)
1292			return (EINVAL);
1293
1294		ur->ur_recstat = mddb_getrecstatus(ur->ur_recid);
1295		if (ur->ur_recstat == MDDB_NORECORD)
1296			return (ENXIO);
1297
1298		if (ur->ur_size > mddb_getrecsize(ur->ur_recid))
1299			return (EINVAL);
1300
1301		data = mddb_getrecaddr(ur->ur_recid);
1302		if (ddi_copyin((caddr_t)(uintptr_t)ur->ur_data, data,
1303		    (size_t)ur->ur_size, mode)) {
1304			return (EFAULT);
1305		}
1306		break;
1307
1308	case MD_DB_DELETE:
1309		if (DBSET(ur->ur_recid) < 0 || DBSET(ur->ur_recid) >= md_nsets)
1310			return (EINVAL);
1311
1312		ur->ur_recstat = mddb_getrecstatus(ur->ur_recid);
1313		if (ur->ur_recstat == MDDB_NORECORD)
1314			return (ENXIO);
1315		status = mddb_deleterec(ur->ur_recid);
1316		if (status < 0)
1317			return (mddbstatus2error(&ur->ur_mde, status, NODEV32,
1318			    ur->ur_setno));
1319		break;
1320
1321	case MD_DB_CREATE:
1322	{
1323		int	mn_set = 0;
1324
1325		if (md_get_setstatus(ur->ur_setno) & MD_SET_MNSET)
1326			mn_set = 1;
1327
1328		if (ur->ur_setno >= md_nsets)
1329			return (EINVAL);
1330		if ((mn_set) && (ur->ur_type2 == MDDB_UR_LR))
1331			flags = MD_CRO_32BIT | MD_CRO_CHANGELOG;
1332		else
1333			flags = MD_CRO_32BIT;
1334		ur->ur_recid = mddb_createrec(ur->ur_size, ur->ur_type,
1335		    ur->ur_type2, flags, ur->ur_setno);
1336		if (ur->ur_recid < 0)
1337			return (mddbstatus2error(&ur->ur_mde, ur->ur_recid,
1338			    NODEV32, ur->ur_setno));
1339		break;
1340	}
1341
1342	case MD_DB_GETSTATUS:
1343		if (DBSET(ur->ur_recid) < 0 || DBSET(ur->ur_recid) >= md_nsets)
1344			return (EINVAL);
1345		ur->ur_recstat = mddb_getrecstatus(ur->ur_recid);
1346		break;
1347
1348	case MD_DB_GETSIZE:
1349		if (DBSET(ur->ur_recid) < 0 || DBSET(ur->ur_recid) >= md_nsets)
1350			return (EINVAL);
1351		ur->ur_size = mddb_getrecsize(ur->ur_recid);
1352		break;
1353
1354	case MD_DB_MAKEID:
1355		if (ur->ur_setno >= md_nsets)
1356			return (EINVAL);
1357		ur->ur_recid = mddb_makerecid(ur->ur_setno, ur->ur_recid);
1358		break;
1359
1360	default:
1361		return (EINVAL);
1362	}
1363	return (0);
1364}
1365
1366static int
1367setuserflags(
1368	md_set_userflags_t	*msu,
1369	IOLOCK			*lock
1370)
1371{
1372	minor_t			mnum = msu->mnum;
1373	set_t			setno = MD_MIN2SET(mnum);
1374	md_unit_t		*un;
1375	mdi_unit_t		*ui;
1376
1377	mdclrerror(&msu->mde);
1378
1379	if (md_get_setstatus(setno) & MD_SET_STALE)
1380		return (mdmddberror(&msu->mde, MDE_DB_STALE, mnum, setno));
1381
1382	if ((ui = MDI_UNIT(mnum)) == NULL) {
1383		return (mdmderror(&msu->mde, MDE_UNIT_NOT_SETUP, mnum));
1384	}
1385
1386	un = (md_unit_t *)md_ioctl_writerlock(lock, ui);
1387
1388	un->c.un_user_flags = msu->userflags;
1389	mddb_commitrec_wrapper(un->c.un_record_id);
1390
1391	return (0);
1392}
1393
1394/*
1395 * mddb_didstat_from_user -- called for DIDSTAT ioctl. 2 different calling
1396 * 	scenarios.
1397 * 	1) data->mode == MD_FIND_INVDID
1398 *	   when user is inquiring about the existence of invalid device id's.
1399 *	   Upon return to the user d->cnt may have a value in it.
1400 *	2) data->mode == MD_GET_INVDID
1401 *	   when the user wants a list of the invalid device id's.
1402 *	   In this case d->ctdp is non Null and cnt has	a value in it.
1403 *
1404 * Basically this routine along with mddb_didstat_to_user can be eliminated
1405 * by pushing ddi_copyout down to lower level interfaces.  To minimize impact
1406 * just keep the current implementation intact.
1407 */
1408static int
1409mddb_didstat_from_user(
1410	void		**d,
1411	caddr_t		data,
1412	int		mode,
1413	caddr_t		*ds_ctd_addr
1414)
1415{
1416	size_t		sz1 = 0, sz2 = 0;
1417	md_i_didstat_t	*d1;
1418	void		*d2;
1419	*ds_ctd_addr	= 0;
1420
1421	sz1 = sizeof (md_i_didstat_t);
1422	d1 = (md_i_didstat_t *)kmem_zalloc(sz1, KM_SLEEP);
1423
1424	if (ddi_copyin(data, (void *)d1, sz1, mode) != 0) {
1425		kmem_free((void *)d1, sz1);
1426		return (EFAULT);
1427	}
1428
1429	/*
1430	 * ds_ctd_addr has actual user ctdp
1431	 */
1432	*ds_ctd_addr = (caddr_t)(uintptr_t)d1->ctdp;
1433	if (d1->mode == MD_GET_INVDID) {
1434		sz2 = (d1->cnt * d1->maxsz) + 1;
1435		if (sz2 <= 0) {
1436			kmem_free(d1, sz1);
1437			return (EINVAL);
1438		}
1439		d2 = kmem_zalloc(sz2, KM_SLEEP);
1440		d1->ctdp = (uint64_t)(uintptr_t)d2;
1441	} else if (d1->mode != MD_FIND_INVDID) {
1442		kmem_free(d1, sz1);
1443		return (EINVAL);
1444	}
1445	*d = (void *)d1;
1446	return (0);
1447}
1448
1449/*
1450 * mddb_didstat_to_user -- see comment for mddb_didstat_from_user. In this
1451 * 		case d->cnt could have a value in it for either usage of
1452 *		the ioctl.
1453 */
1454/*ARGSUSED*/
1455static int
1456mddb_didstat_to_user(
1457	void 		*d,
1458	caddr_t		data,
1459	int		mode,
1460	caddr_t		ds_ctd_addr
1461)
1462{
1463	size_t		sz1 = 0, sz2 = 0;
1464	md_i_didstat_t		*d1;
1465	void			*d2;
1466
1467
1468	d1 = (md_i_didstat_t *)d;
1469	sz1 = sizeof (md_i_didstat_t);
1470
1471	sz2 = (d1->cnt * d1->maxsz) + 1;
1472	d2 = (caddr_t)(uintptr_t)d1->ctdp;
1473	if (d2 && sz2) {
1474		/*
1475		 * Copy out from kernel ctdp to user ctdp area
1476		 */
1477		if (ddi_copyout(d2, (caddr_t)ds_ctd_addr, sz2, mode) != 0) {
1478			kmem_free(d1, sz1);
1479			kmem_free(d2, sz2);
1480			return (EFAULT);
1481		}
1482		d1->ctdp = (uint64_t)(uintptr_t)ds_ctd_addr;
1483	}
1484	if (ddi_copyout(d1, data, sz1, mode) != 0) {
1485		kmem_free(d1, sz1);
1486		if (sz2 && d2)
1487			kmem_free(d2, sz2);
1488		return (EFAULT);
1489	}
1490	kmem_free(d1, sz1);
1491	if (sz2 && d2)
1492		kmem_free(d2, sz2);
1493	return (0);
1494}
1495
1496
1497static int
1498mddb_config_from_user(
1499	void 		**d,
1500	caddr_t 	data,
1501	int 		mode,
1502	caddr_t 	*c_devid_addr,
1503	caddr_t		*c_old_devid_addr
1504)
1505{
1506	size_t		sz1 = 0, sz2 = 0, sz3 = 0;
1507	mddb_config_t	*d1;
1508	void		*d2;
1509	void 		*d3;
1510
1511	*c_devid_addr = 0;
1512
1513	sz1 = sizeof (mddb_config_t);
1514	d1 = (mddb_config_t *)kmem_zalloc(sz1, KM_SLEEP);
1515
1516	if (ddi_copyin(data, (void *)d1, sz1, mode) != 0) {
1517		kmem_free((void *)d1, sz1);
1518		return (EFAULT);
1519	}
1520	*c_devid_addr = (caddr_t)(uintptr_t)d1->c_locator.l_devid;
1521
1522	if (d1->c_locator.l_devid_flags & MDDB_DEVID_SPACE) {
1523		sz2 = d1->c_locator.l_devid_sz;
1524		if (d1->c_locator.l_devid_sz <= 0 ||
1525		    d1->c_locator.l_devid_sz > MAXPATHLEN) {
1526			kmem_free((void *)d1, sz1);
1527			return (EINVAL);
1528		}
1529		d2 = kmem_zalloc(sz2, KM_SLEEP);
1530		if (ddi_copyin((caddr_t)(uintptr_t)d1->c_locator.l_devid,
1531		    d2, sz2, mode) != 0) {
1532			kmem_free(d1, sz1);
1533			kmem_free(d2, sz2);
1534			return (EFAULT);
1535		}
1536		d1->c_locator.l_devid = (uint64_t)(uintptr_t)d2;
1537
1538		if ((caddr_t)(uintptr_t)d1->c_locator.l_old_devid) {
1539			*c_old_devid_addr = (caddr_t)(uintptr_t)
1540			    d1->c_locator.l_old_devid;
1541
1542			sz3 = d1->c_locator.l_old_devid_sz;
1543			if (d1->c_locator.l_old_devid_sz <= 0 ||
1544			    d1->c_locator.l_old_devid_sz > MAXPATHLEN) {
1545				kmem_free((void *)d1, sz1);
1546				kmem_free(d2, sz2);
1547				return (EINVAL);
1548			}
1549			d3 = kmem_zalloc(sz3, KM_SLEEP);
1550			if (ddi_copyin(
1551			    (caddr_t)(uintptr_t)d1->c_locator.l_old_devid,
1552			    d3, sz3, mode) != 0) {
1553				kmem_free((void *)d1, sz1);
1554				kmem_free(d2, sz2);
1555				kmem_free(d3, sz3);
1556				return (EFAULT);
1557			}
1558			d1->c_locator.l_old_devid = (uintptr_t)d3;
1559		}
1560	} else {
1561		d1->c_locator.l_devid = (uint64_t)0;
1562		d1->c_locator.l_old_devid = (uint64_t)0;
1563	}
1564
1565	*d = (void *)d1;
1566	return (0);
1567}
1568
1569/*ARGSUSED*/
1570static int
1571mddb_config_to_user(
1572	void 		*d,
1573	caddr_t 	data,
1574	int 		mode,
1575	caddr_t 	c_devid_addr,
1576	caddr_t		c_old_devid_addr
1577)
1578{
1579	size_t		sz1 = 0, sz2 = 0, sz3 = 0;
1580	mddb_config_t		*d1;
1581	void			*d2;
1582	void			*d3;
1583
1584	d1 = (mddb_config_t *)d;
1585	sz1 = sizeof (mddb_config_t);
1586
1587	if (d1->c_locator.l_devid_flags & MDDB_DEVID_SPACE) {
1588		sz2 = d1->c_locator.l_devid_sz;
1589		d2 = (caddr_t)(uintptr_t)d1->c_locator.l_devid;
1590		/* Only copyout devid if valid */
1591		if (d1->c_locator.l_devid_flags & MDDB_DEVID_VALID) {
1592			if (ddi_copyout(d2, (caddr_t)c_devid_addr,
1593			    sz2, mode) != 0) {
1594				kmem_free(d1, sz1);
1595				kmem_free(d2, sz2);
1596				return (EFAULT);
1597			}
1598		}
1599	}
1600
1601	d1->c_locator.l_devid = (uint64_t)(uintptr_t)c_devid_addr;
1602
1603	if (d1->c_locator.l_old_devid) {
1604		sz3 = d1->c_locator.l_old_devid_sz;
1605		d3 = (caddr_t)(uintptr_t)d1->c_locator.l_old_devid;
1606		if (ddi_copyout(d3, (caddr_t)c_old_devid_addr,
1607		    sz3, mode) != 0) {
1608			kmem_free(d1, sz1);
1609			kmem_free(d2, sz2);
1610			kmem_free(d3, sz3);
1611		}
1612	}
1613	d1->c_locator.l_old_devid = (uintptr_t)c_old_devid_addr;
1614
1615	if (ddi_copyout(d1, data, sz1, mode) != 0) {
1616		kmem_free(d1, sz1);
1617		if (sz2)
1618			kmem_free(d2, sz2);
1619		if (sz3)
1620			kmem_free(d3, sz3);
1621		return (EFAULT);
1622	}
1623
1624	if (d1)
1625		kmem_free(d1, sz1);
1626	if (sz2)
1627		kmem_free(d2, sz2);
1628	if (sz3)
1629		kmem_free(d3, sz3);
1630
1631	return (0);
1632}
1633
1634/*
1635 * NAME:	get_tstate
1636 * PURPOSE:	Return unit's transient error state to user.
1637 * INPUT:	device node (set + metadevice number)
1638 * OUTPUT:	gu->tstate
1639 * RETURNS:	0 on success
1640 *		EINVAL on failure
1641 */
1642static int
1643get_tstate(md_i_get_tstate_t *gu, IOLOCK *lock)
1644{
1645	mdi_unit_t	*ui;
1646
1647	ui = MDI_UNIT(gu->id);
1648	if (ui == (mdi_unit_t *)NULL) {
1649		(void) mdmderror(&gu->mde, MDE_UNIT_NOT_SETUP, gu->id);
1650		return (EINVAL);
1651	}
1652
1653	(void) md_ioctl_readerlock(lock, ui);
1654	gu->tstate = ui->ui_tstate;
1655	md_ioctl_readerexit(lock);
1656
1657	return (0);
1658}
1659
1660/*
1661 * NAME:	md_clu_ioctl
1662 * PURPOSE:	depending on clu_cmd:
1663 *		- Check open state,
1664 *		- lock opens and check open state
1665 *		- unlock opens again
1666 * INPUT:	metadevice and clu_cmd
1667 * OUTPUT:	open state (for MD_MN_LCU_UNLOCK always 0)
1668 * RETURNS:	0 on success
1669 *		EINVAL on failure
1670 */
1671int
1672md_clu_ioctl(md_clu_open_t *clu)
1673{
1674	mdi_unit_t	*ui;
1675	minor_t		mnum;
1676
1677	if ((clu->clu_dev <= 0) ||
1678	    (md_getmajor(clu->clu_dev)) != md_major) {
1679		return (EINVAL);
1680	}
1681
1682	mnum = md_getminor(clu->clu_dev);
1683	if ((ui = MDI_UNIT(mnum)) == NULL) {
1684		return (mdmderror(&clu->clu_mde, MDE_UNIT_NOT_SETUP, mnum));
1685	}
1686
1687	switch (clu->clu_cmd) {
1688	case MD_MN_LCU_CHECK:
1689		/* No lock here, just checking */
1690		clu->clu_isopen = md_unit_isopen(ui);
1691		break;
1692	case MD_MN_LCU_LOCK:
1693		/* This inhibits later opens to succeed */
1694		ui->ui_tstate |= MD_OPENLOCKED;
1695		clu->clu_isopen = md_unit_isopen(ui);
1696		/* In case the md is opened, reset the lock immediately */
1697		if (clu->clu_isopen != 0) {
1698			ui->ui_tstate &= ~MD_OPENLOCKED;
1699		}
1700		break;
1701	case MD_MN_LCU_UNLOCK:
1702		ui->ui_tstate &= ~MD_OPENLOCKED;
1703		clu->clu_isopen = 0;	/* always sucess */
1704		break;
1705	}
1706	return (0);
1707}
1708
1709/*
1710 * NAME:	mkdev_ioctl
1711 * PURPOSE:	Create device node for specified set / metadevice tuple
1712 * INPUT:	device tuple (set number + metadevice number)
1713 * OUTPUT:	None
1714 * RETURNS:	0 on success
1715 *		EINVAL on failure
1716 */
1717static int
1718mkdev_ioctl(md_mkdev_params_t *p)
1719{
1720	set_t	setno = p->md_driver.md_setno;
1721	unit_t	un;
1722
1723	mdclrerror(&p->mde);
1724
1725	/* Validate arguments passed in to ioctl */
1726	if (setno >= MD_MAXSETS) {
1727		(void) mderror(&p->mde, MDE_NO_SET);
1728		return (EINVAL);
1729	}
1730
1731	/*
1732	 * Get the next available unit number in this set
1733	 */
1734	un = md_get_nextunit(setno);
1735	if (un == MD_UNITBAD) {
1736		(void) mdmderror(&p->mde, MDE_UNIT_NOT_SETUP, un);
1737		return (ENODEV);
1738	}
1739
1740	/* Create the device node */
1741	if (md_create_minor_node(setno, un)) {
1742		(void) mdmderror(&p->mde, MDE_UNIT_NOT_SETUP, un);
1743		return (ENODEV);
1744	}
1745
1746	/* Return the minor number */
1747	p->un = un;
1748
1749	return (0);
1750}
1751
1752/*
1753 * admin device ioctls
1754 */
1755static int
1756md_base_ioctl(md_dev64_t dev, int cmd, caddr_t data, int mode, IOLOCK *lockp)
1757{
1758	size_t		sz = 0;
1759	void		*d = NULL;
1760	mddb_config_t	*cp;
1761	set_t		setno;
1762	int		err = 0;
1763	int		err_to_user = 0;
1764	int		mddb_config_case = 0;
1765	int		mddb_didstat_case = 0;
1766	caddr_t		c_devid_addr = 0;
1767	caddr_t		c_old_devid_addr = 0;
1768	caddr_t		ds_ctd_addr = 0;
1769	mddb_set_node_params_t	*snp;
1770
1771	/* For now we can only handle 32-bit clients for internal commands */
1772	if ((cmd != DKIOCINFO) &&
1773	    ((mode & DATAMODEL_MASK) != DATAMODEL_ILP32)) {
1774		return (EINVAL);
1775	}
1776
1777	switch (cmd) {
1778
1779	case DKIOCINFO:
1780	{
1781		if (! (mode & FREAD))
1782			return (EACCES);
1783
1784		sz = sizeof (struct dk_cinfo);
1785		d = kmem_alloc(sz, KM_SLEEP);
1786
1787		get_info((struct dk_cinfo *)d, md_getminor(dev));
1788		break;
1789	}
1790
1791	case MD_DB_USEDEV:
1792	{
1793		if (! (mode & FWRITE))
1794			return (EACCES);
1795
1796		mddb_config_case = 1;
1797
1798		err = mddb_config_from_user(&d, data, mode, &c_devid_addr,
1799		    &c_old_devid_addr);
1800
1801		if (err)
1802			return (err);
1803
1804		err = mddb_configure(MDDB_USEDEV, (mddb_config_t *)d);
1805		break;
1806	}
1807
1808	case MD_DB_GETDEV:
1809	{
1810		if (! (mode & FREAD))
1811			return (EACCES);
1812
1813		mddb_config_case = 1;
1814
1815		err = mddb_config_from_user(&d, data, mode, &c_devid_addr,
1816		    &c_old_devid_addr);
1817
1818		if (err)
1819			return (err);
1820
1821		err = mddb_configure(MDDB_GETDEV, (mddb_config_t *)d);
1822		break;
1823	}
1824
1825	case MD_DB_GETDRVNM:
1826	{
1827		if (! (mode & FREAD))
1828			return (EACCES);
1829
1830		mddb_config_case = 1;
1831
1832		err = mddb_config_from_user(&d, data, mode, &c_devid_addr,
1833		    &c_old_devid_addr);
1834
1835		if (err)
1836			return (err);
1837
1838		err = mddb_configure(MDDB_GETDRVRNAME, (mddb_config_t *)d);
1839		break;
1840	}
1841
1842	case MD_DB_ENDDEV:
1843	{
1844		if (! (mode & FREAD))
1845			return (EACCES);
1846
1847		mddb_config_case = 1;
1848
1849		err = mddb_config_from_user(&d, data, mode, &c_devid_addr,
1850		    &c_old_devid_addr);
1851
1852		if (err)
1853			return (err);
1854
1855		err = mddb_configure(MDDB_ENDDEV, (mddb_config_t *)d);
1856		break;
1857	}
1858
1859	case MD_DB_DELDEV:
1860	{
1861		if (! (mode & FWRITE))
1862			return (EACCES);
1863
1864		mddb_config_case = 1;
1865
1866		err = mddb_config_from_user(&d, data, mode, &c_devid_addr,
1867		    &c_old_devid_addr);
1868
1869		if (err)
1870			return (err);
1871
1872		cp = (mddb_config_t *)d;
1873		setno = cp->c_setno;
1874		err = mddb_configure(MDDB_DELDEV, cp);
1875		if (! mdisok(&cp->c_mde))
1876			break;
1877
1878		if (setno == MD_LOCAL_SET)
1879			break;
1880
1881		if (cp->c_dbcnt != 0)
1882			break;
1883
1884		/*
1885		 * if the last db replica of a diskset is deleted
1886		 * unload everything.
1887		 */
1888
1889		/* Requesting a release, clean up everything */
1890		md_clr_setstatus(setno, MD_SET_KEEPTAG);
1891
1892		err = release_set(cp, mode);
1893
1894		break;
1895	}
1896
1897	case MD_DB_NEWDEV:
1898	{
1899		if (! (mode & FWRITE))
1900			return (EACCES);
1901
1902		mddb_config_case = 1;
1903
1904		err = mddb_config_from_user(&d, data, mode, &c_devid_addr,
1905		    &c_old_devid_addr);
1906
1907		if (err)
1908			return (err);
1909
1910		cp = (mddb_config_t *)d;
1911		setno = cp->c_setno;
1912		err = mddb_configure(MDDB_NEWDEV, cp);
1913		if (! err && mdisok(&cp->c_mde))
1914			(void) md_snarf_db_set(setno, &cp->c_mde);
1915		break;
1916	}
1917
1918	case MD_DB_NEWSIDE:
1919	{
1920		if (! (mode & FWRITE))
1921			return (EACCES);
1922
1923		mddb_config_case = 1;
1924
1925		err = mddb_config_from_user(&d, data, mode, &c_devid_addr,
1926		    &c_old_devid_addr);
1927
1928		if (err)
1929			return (err);
1930
1931		err = mddb_configure(MDDB_NEWSIDE, (mddb_config_t *)d);
1932		break;
1933	}
1934
1935	case MD_DB_DELSIDE:
1936	{
1937		if (! (mode & FWRITE))
1938			return (EACCES);
1939
1940		mddb_config_case = 1;
1941
1942		err = mddb_config_from_user(&d, data, mode, &c_devid_addr,
1943		    &c_old_devid_addr);
1944
1945		if (err)
1946			return (err);
1947
1948		err = mddb_configure(MDDB_DELSIDE, (mddb_config_t *)d);
1949		break;
1950	}
1951
1952	case MD_DB_SETDID:
1953	{
1954		if (!(mode & FWRITE)) {
1955			return (EACCES);
1956		}
1957
1958		mddb_config_case = 1;
1959
1960		err = mddb_config_from_user(&d, data, mode, &c_devid_addr,
1961		    &c_old_devid_addr);
1962
1963		if (err) {
1964			return (err);
1965		}
1966
1967		err = mddb_configure(MDDB_SETDID, (mddb_config_t *)d);
1968
1969		break;
1970	}
1971
1972	case MD_GRAB_SET:
1973	{
1974		if (! (mode & FWRITE))
1975			return (EACCES);
1976
1977		mddb_config_case = 1;
1978
1979		err = mddb_config_from_user(&d, data, mode, &c_devid_addr,
1980		    &c_old_devid_addr);
1981
1982		if (err)
1983			return (err);
1984
1985		cp = (mddb_config_t *)d;
1986		setno = cp->c_setno;
1987
1988		err = take_set(cp, mode);
1989
1990		if (err || ! mdisok(&cp->c_mde))
1991			break;
1992
1993		if (md_get_setstatus(setno) & MD_SET_ACCOK)
1994			err = mdmddberror(&cp->c_mde, MDE_DB_ACCOK, NODEV32,
1995			    setno);
1996
1997		md_unblock_setio(setno);
1998		break;
1999	}
2000
2001	case MD_RELEASE_SET:
2002	{
2003		if (! (mode & FWRITE))
2004			return (EACCES);
2005
2006		mddb_config_case = 1;
2007
2008		err = mddb_config_from_user(&d, data, mode, &c_devid_addr,
2009		    &c_old_devid_addr);
2010
2011		if (err)
2012			return (err);
2013
2014		/* shorthand */
2015		cp = (mddb_config_t *)d;
2016		setno = cp->c_setno;
2017
2018		/* If the user requests a release, clean up everything */
2019		md_clr_setstatus(setno, MD_SET_KEEPTAG);
2020
2021		/* Block incoming I/Os during release_set operation */
2022		if (MD_MNSET_SETNO(setno)) {
2023			/*
2024			 * md_tas_block_setio will block the set if
2025			 * there are no outstanding I/O requests,
2026			 * otherwise it returns -1.
2027			 */
2028			if (md_tas_block_setio(setno) != 1) {
2029				err = EBUSY;
2030				break;
2031			}
2032		} else {
2033			/*
2034			 * Should not return something other than 1
2035			 */
2036			if (md_block_setio(setno) != 1) {
2037				md_clearblock_setio(setno);
2038				err = EACCES;
2039				break;
2040			}
2041		}
2042
2043		err = release_set(cp, mode);
2044
2045		/* Always unblock I/O even if release_set fails */
2046		md_clearblock_setio(setno);
2047
2048		break;
2049	}
2050
2051	case MD_DB_GETOPTLOC:
2052	{
2053		if (! (mode & FREAD))
2054			return (EACCES);
2055
2056		sz = sizeof (mddb_optloc_t);
2057		d = kmem_alloc(sz, KM_SLEEP);
2058
2059		if (ddi_copyin(data, d, sz, mode) != 0) {
2060			err = EFAULT;
2061			break;
2062		}
2063
2064		err = mddb_getoptloc((mddb_optloc_t *)d);
2065		break;
2066	}
2067
2068	case MD_HALT:
2069	{
2070		if (! (mode & FWRITE))
2071			return (EACCES);
2072
2073		/* already have the ioctl lock */
2074		return (md_halt(MD_GBL_IOCTL_LOCK));
2075	}
2076
2077	case MD_IOCSET_NM:
2078	{
2079		if (! (mode & FREAD))
2080			return (EACCES);
2081
2082		sz = sizeof (mdnm_params_t);
2083		d = kmem_alloc(sz, KM_SLEEP);
2084
2085		if (ddi_copyin(data, d, sz, mode) != 0) {
2086			err = EFAULT;
2087			break;
2088		}
2089
2090		/* check data integrity */
2091		if (((mdnm_params_t *)d)->setno >= md_nsets) {
2092			err = EINVAL;
2093			break;
2094		}
2095
2096		if ((((mdnm_params_t *)d)->devname_len == 0) ||
2097		    (((mdnm_params_t *)d)->devname_len > MAXPATHLEN)) {
2098			err = EINVAL;
2099			break;
2100		}
2101
2102		if (((mdnm_params_t *)d)->devname == NULL) {
2103			err = EINVAL;
2104			break;
2105		}
2106
2107		err = setnm_ioctl((mdnm_params_t *)d, mode);
2108		break;
2109	}
2110
2111	case MD_IOCGET_NM:
2112	{
2113		if (! (mode & FREAD))
2114			return (EACCES);
2115
2116		sz = sizeof (mdnm_params_t);
2117		d = kmem_alloc(sz, KM_SLEEP);
2118
2119		if (ddi_copyin(data, d, sz, mode) != 0) {
2120			err = EFAULT;
2121			break;
2122		}
2123
2124		/* check data integrity */
2125		if (((mdnm_params_t *)d)->setno >= md_nsets) {
2126			err = EINVAL;
2127			break;
2128		}
2129		if (((mdnm_params_t *)d)->devname == NULL) {
2130			err = EINVAL;
2131			break;
2132		}
2133
2134		err = getnm_ioctl((mdnm_params_t *)d, mode);
2135		break;
2136	}
2137
2138	case MD_IOCGET_HSP_NM:
2139	{
2140		if (! (mode & FREAD))
2141			return (EACCES);
2142
2143		sz = sizeof (mdhspnm_params_t);
2144		d = kmem_alloc(sz, KM_SLEEP);
2145
2146		if (ddi_copyin(data, d, sz, mode) != 0) {
2147			err = EFAULT;
2148			break;
2149		}
2150
2151		/* check data integrity */
2152		if (((mdhspnm_params_t *)d)->setno >= md_nsets) {
2153			err = EINVAL;
2154			break;
2155		}
2156		if (((mdhspnm_params_t *)d)->hspname == NULL) {
2157			err = EINVAL;
2158			break;
2159		}
2160
2161		err = gethspnm_ioctl((mdhspnm_params_t *)d, mode);
2162		break;
2163	}
2164
2165	case MD_IOCNXTKEY_NM:
2166	{
2167		if (! (mode & FREAD))
2168			return (EACCES);
2169
2170		sz = sizeof (mdnm_params_t);
2171		d = kmem_alloc(sz, KM_SLEEP);
2172
2173		if (ddi_copyin(data, d, sz, mode) != 0) {
2174			err = EFAULT;
2175			break;
2176		}
2177
2178		err = getnextkey_ioctl((mdnm_params_t *)d, mode);
2179		break;
2180	}
2181
2182	case MD_IOCREM_NM:
2183	{
2184		if (! (mode & FREAD))
2185			return (EACCES);
2186
2187		sz = sizeof (mdnm_params_t);
2188		d = kmem_alloc(sz, KM_SLEEP);
2189
2190		if (ddi_copyin(data, d, sz, mode) != 0) {
2191			err = EFAULT;
2192			break;
2193		}
2194
2195		/* check data integrity */
2196		if (((mdnm_params_t *)d)->setno >= md_nsets) {
2197			err = EINVAL;
2198			break;
2199		}
2200
2201		err = remnm_ioctl((mdnm_params_t *)d, mode);
2202		break;
2203	}
2204
2205	case MD_IOCGET_TSTATE:
2206	{
2207		md_i_get_tstate_t	*p;
2208
2209		if (! (mode & FREAD))
2210			return (EACCES);
2211
2212		sz = sizeof (md_i_get_tstate_t);
2213		d = kmem_alloc(sz, KM_SLEEP);
2214
2215		if (ddi_copyin(data, d, sz, mode) != 0) {
2216			err = EFAULT;
2217			break;
2218		}
2219
2220		p = (md_i_get_tstate_t *)d;
2221
2222		if ((err = verify_minor(p->id)) != 0) {
2223			if (err == EINVAL)
2224				(void) mdmderror(&p->mde, MDE_INVAL_UNIT,
2225				    p->id);
2226			break;
2227		}
2228
2229		err = get_tstate(p, lockp);
2230		break;
2231	}
2232
2233	case MD_IOCGET_DRVNM:
2234	{
2235		md_i_driverinfo_t	*p;
2236
2237		if (! (mode & FREAD))
2238			return (EACCES);
2239
2240		sz = sizeof (md_i_driverinfo_t);
2241		d = kmem_alloc(sz, KM_SLEEP);
2242
2243		if (ddi_copyin(data, d, sz, mode) != 0) {
2244			err = EFAULT;
2245			break;
2246		}
2247
2248		p = (md_i_driverinfo_t *)d;
2249
2250		/* check data integrity */
2251		if (p->md_driver.md_drivername == NULL) {
2252			err = EINVAL;
2253			break;
2254		}
2255
2256		if ((err = verify_minor(p->mnum)) != 0) {
2257			if (err == EINVAL)
2258				(void) mdmderror(&p->mde, MDE_INVAL_UNIT,
2259				    p->mnum);
2260			break;
2261		}
2262
2263		err = getdrvnm_ioctl(dev, p, mode);
2264		break;
2265	}
2266
2267	case MD_IOCGET_NEXT:
2268	{
2269		if (! (mode & FREAD))
2270			return (EACCES);
2271
2272		sz = sizeof (md_i_getnext_t);
2273		d = kmem_alloc(sz, KM_SLEEP);
2274
2275		if (ddi_copyin(data, d, sz, mode) != 0) {
2276			err = EFAULT;
2277			break;
2278		}
2279
2280		/* check data integrity */
2281		if (((md_i_getnext_t *)d)->md_driver.md_setno >= md_nsets) {
2282			err = EINVAL;
2283			break;
2284		}
2285
2286		err = getnext_ioctl((md_i_getnext_t *)d, mode);
2287		break;
2288	}
2289
2290	case MD_DB_USERREQ:
2291	case MD_MN_DB_USERREQ:
2292	{
2293		if (! (mode & FREAD))
2294			return (EACCES);
2295
2296		sz = sizeof (mddb_userreq_t);
2297		d = kmem_alloc(sz, KM_SLEEP);
2298
2299		if (ddi_copyin(data, d, sz, mode) != 0) {
2300			err = EFAULT;
2301			break;
2302		}
2303		err = mddb_userreq_ioctl((mddb_userreq_t *)d, mode);
2304		break;
2305	}
2306
2307	case MD_IOCGET_NUM:
2308	{
2309		if (! (mode & FREAD))
2310			return (EACCES);
2311
2312		sz = sizeof (md_i_getnum_t);
2313		d = kmem_alloc(sz, KM_SLEEP);
2314
2315		if (ddi_copyin(data, d, sz, mode) != 0) {
2316			err = EFAULT;
2317			break;
2318		}
2319
2320		err = getnum_ioctl(d, mode);
2321		break;
2322	}
2323
2324	case MD_DB_OWNSET:
2325	{
2326		if (! (mode & FREAD))
2327			return (EACCES);
2328
2329		sz = sizeof (mddb_ownset_t);
2330		d = kmem_alloc(sz, KM_SLEEP);
2331
2332		if (ddi_copyin(data, d, sz, mode) != 0) {
2333			err = EFAULT;
2334			break;
2335		}
2336
2337		if (((mddb_ownset_t *)d)->setno >= md_nsets) {
2338			err = EINVAL;
2339			break;
2340		}
2341
2342		((mddb_ownset_t *)d)->owns_set =
2343		    mddb_ownset(((mddb_ownset_t *)d)->setno);
2344
2345		break;
2346	}
2347
2348	case MD_IOCGETNSET:
2349	{
2350		if (! (mode & FREAD))
2351			return (EACCES);
2352
2353		if (ddi_copyout((caddr_t)&md_nsets, data,
2354		    sizeof (set_t), mode) != 0) {
2355			err = EFAULT;
2356			break;
2357		}
2358		break;
2359	}
2360
2361	case MD_IOCGETNUNITS:
2362	{
2363		if (! (mode & FREAD))
2364			return (EACCES);
2365
2366		if (ddi_copyout((caddr_t)&md_nunits, data,
2367		    sizeof (set_t), mode) != 0) {
2368			err = EFAULT;
2369			break;
2370		}
2371		break;
2372	}
2373
2374	case MD_IOCGVERSION:
2375	{
2376		uint_t	dversion = MD_DVERSION;
2377
2378		if (! (mode & FREAD))
2379			return (EACCES);
2380
2381		if (ddi_copyout((caddr_t)&dversion, data,
2382		    sizeof (dversion), mode) != 0) {
2383			err = EFAULT;
2384			break;
2385		}
2386		break;
2387	}
2388
2389	case MD_IOCSET_FLAGS:
2390	{
2391		md_set_userflags_t	*p;
2392
2393		if (! (mode & FWRITE))
2394			return (EACCES);
2395
2396		sz = sizeof (md_set_userflags_t);
2397		d = kmem_alloc(sz, KM_SLEEP);
2398
2399		if (ddi_copyin(data, d, sz, mode)) {
2400			err = EFAULT;
2401			break;
2402		}
2403
2404		p = (md_set_userflags_t *)d;
2405
2406		if ((err = verify_minor(p->mnum)) != 0) {
2407			if (err == EINVAL)
2408				(void) mdmderror(&p->mde, MDE_INVAL_UNIT,
2409				    p->mnum);
2410			break;
2411		}
2412
2413		err = setuserflags(p, lockp);
2414		break;
2415	}
2416
2417	case MD_IOCRENAME:
2418	{
2419		md_rename_t	*p;
2420
2421		if (! (mode & FWRITE)) {
2422			return (EACCES);
2423		}
2424
2425		sz = sizeof (md_rename_t);
2426		d = kmem_alloc(sz, KM_SLEEP);
2427
2428		if (ddi_copyin(data, d, sz, mode)) {
2429			err = EFAULT;
2430			break;
2431		}
2432
2433		p = (md_rename_t *)d;
2434
2435		if ((err = verify_minor(p->to.mnum)) != 0) {
2436			if (err == EINVAL)
2437				(void) mdmderror(&p->mde, MDE_INVAL_UNIT,
2438				    p->to.mnum);
2439			break;
2440		}
2441
2442		if ((err = verify_minor(p->from.mnum)) != 0) {
2443			if (err == EINVAL)
2444				(void) mdmderror(&p->mde, MDE_INVAL_UNIT,
2445				    p->from.mnum);
2446			break;
2447		}
2448
2449		err = md_rename(p, lockp);
2450		break;
2451	}
2452
2453	case MD_IOCISOPEN:
2454	{
2455		md_isopen_t	*p;
2456		mdi_unit_t	*ui;
2457		minor_t		mnum;
2458
2459		if (! (mode & FREAD))
2460			return (EACCES);
2461
2462		sz = sizeof (md_isopen_t);
2463		d = kmem_alloc(sz, KM_SLEEP);
2464
2465		if (ddi_copyin(data, d, sz, mode)) {
2466			err = EFAULT;
2467			break;
2468		}
2469
2470		p = (md_isopen_t *)d;
2471		if ((p->dev <= 0) || (md_getmajor(p->dev)) != md_major) {
2472			err = EINVAL;
2473			break;
2474		}
2475
2476		mnum = md_getminor(p->dev);
2477
2478		if ((err = verify_minor(mnum)) != 0) {
2479			if (err == EINVAL)
2480				(void) mdmderror(&p->mde, MDE_INVAL_UNIT, mnum);
2481			break;
2482		}
2483
2484		if ((ui = MDI_UNIT(mnum)) == NULL) {
2485			/*
2486			 * If the incore unit does not exist then rather
2487			 * than set err we need to set it to 0 because the
2488			 * multi-node code is expecting a return of
2489			 * 0 (from mdmderror() but with the mde structure
2490			 * filled with particular information
2491			 * (MDE_UNIT_NOT_SETUP).
2492			 */
2493			err = mdmderror(&p->mde, MDE_UNIT_NOT_SETUP, mnum);
2494			break;
2495		}
2496
2497		p->isopen = md_unit_isopen(ui);
2498		break;
2499	}
2500
2501	case MD_MED_GET_LST:
2502	{
2503		mddb_med_parm_t		*medpp;
2504
2505		if (! (mode & FREAD))
2506			return (EACCES);
2507
2508		sz = sizeof (mddb_med_parm_t);
2509		d = kmem_alloc(sz, KM_SLEEP);
2510
2511		if (ddi_copyin(data, d, sz, mode) != 0) {
2512			err = EFAULT;
2513			break;
2514		}
2515
2516		medpp = (mddb_med_parm_t *)d;
2517
2518		err = getmed_ioctl(medpp, mode);
2519		break;
2520	}
2521
2522	case MD_MED_SET_LST:
2523	{
2524		mddb_med_parm_t		*medpp;
2525
2526		if (! (mode & FWRITE))
2527			return (EACCES);
2528
2529		sz = sizeof (mddb_med_parm_t);
2530		d = kmem_alloc(sz, KM_SLEEP);
2531
2532		if (ddi_copyin(data, d, sz, mode) != 0) {
2533			err = EFAULT;
2534			break;
2535		}
2536
2537		medpp = (mddb_med_parm_t *)d;
2538
2539		err = setmed_ioctl(medpp, mode);
2540
2541		break;
2542	}
2543
2544	case MD_MED_UPD_MED:
2545	{
2546		if (! (mode & FWRITE))
2547			return (EACCES);
2548
2549		sz = sizeof (mddb_med_upd_parm_t);
2550		d = kmem_alloc(sz, KM_SLEEP);
2551
2552		if (ddi_copyin(data, d, sz, mode) != 0) {
2553			err = EFAULT;
2554			break;
2555		}
2556
2557		err = updmed_ioctl((mddb_med_upd_parm_t *)d, mode);
2558
2559		break;
2560	}
2561
2562	case MD_MED_GET_NMED:
2563	{
2564		if (! (mode & FREAD))
2565			return (EACCES);
2566
2567		if (ddi_copyout((caddr_t)&md_nmedh, data,
2568		    sizeof (int), mode) != 0) {
2569			err = EFAULT;
2570			break;
2571		}
2572		break;
2573	}
2574
2575	case MD_MED_GET_TAG:
2576	{
2577		if (! (mode & FREAD))
2578			return (EACCES);
2579
2580		sz = sizeof (mddb_dtag_get_parm_t);
2581		d = kmem_alloc(sz, KM_SLEEP);
2582
2583		if (ddi_copyin(data, d, sz, mode) != 0) {
2584			err = EFAULT;
2585			break;
2586		}
2587
2588		err = gettag_ioctl((mddb_dtag_get_parm_t *)d, mode);
2589
2590		break;
2591	}
2592
2593	case MD_MED_USE_TAG:
2594	{
2595		if (! (mode & FWRITE))
2596			return (EACCES);
2597
2598		sz = sizeof (mddb_dtag_use_parm_t);
2599		d = kmem_alloc(sz, KM_SLEEP);
2600
2601		if (ddi_copyin(data, d, sz, mode) != 0) {
2602			err = EFAULT;
2603			break;
2604		}
2605
2606		err = usetag_ioctl((mddb_dtag_use_parm_t *)d, mode);
2607
2608		break;
2609	}
2610
2611	case MD_MED_ACCEPT:
2612	{
2613		if (! (mode & FWRITE))
2614			return (EACCES);
2615
2616		sz = sizeof (mddb_accept_parm_t);
2617		d = kmem_alloc(sz, KM_SLEEP);
2618
2619		if (ddi_copyin(data, d, sz, mode) != 0) {
2620			err = EFAULT;
2621			break;
2622		}
2623
2624		err = accept_ioctl((mddb_accept_parm_t *)d, mode);
2625
2626		break;
2627	}
2628
2629	case MD_MED_GET_TLEN:
2630	{
2631		if (! (mode & FREAD))
2632			return (EACCES);
2633
2634		sz = sizeof (mddb_med_t_parm_t);
2635		d = kmem_alloc(sz, KM_SLEEP);
2636
2637		if (ddi_copyin(data, d, sz, mode) != 0) {
2638			err = EFAULT;
2639			break;
2640		}
2641
2642		err = med_get_t_size_ioctl((mddb_med_t_parm_t *)d, mode);
2643
2644		break;
2645	}
2646
2647	case MD_MED_GET_T:
2648	{
2649		if (! (mode & FREAD))
2650			return (EACCES);
2651
2652		sz = (sizeof (mddb_med_t_parm_t) - sizeof (mddb_med_t_ent_t)) +
2653		    (sizeof (mddb_med_t_ent_t) * med_addr_tab_nents);
2654		d = kmem_alloc(sz, KM_SLEEP);
2655
2656		if (ddi_copyin(data, d, sz, mode) != 0) {
2657			err = EFAULT;
2658			break;
2659		}
2660
2661		err = med_get_t_ioctl((mddb_med_t_parm_t *)d, mode);
2662
2663		break;
2664	}
2665
2666	case MD_MED_SET_T:
2667	{
2668		if (! (mode & FWRITE))
2669			return (EACCES);
2670
2671		sz = (sizeof (mddb_med_t_parm_t) - sizeof (mddb_med_t_ent_t)) +
2672		    (sizeof (mddb_med_t_ent_t) * med_addr_tab_nents);
2673		d = kmem_alloc(sz, KM_SLEEP);
2674
2675		if (ddi_copyin(data, d, sz, mode) != 0) {
2676			err = EFAULT;
2677			break;
2678		}
2679
2680		err = med_set_t_ioctl((mddb_med_t_parm_t *)d, mode);
2681
2682		break;
2683	}
2684
2685	case  MD_GET_SETSTAT:
2686	{
2687		md_gs_stat_parm_t	*gsp;
2688
2689		if (! (mode & FREAD))
2690			return (EACCES);
2691
2692		sz = sizeof (md_gs_stat_parm_t);
2693		d = kmem_alloc(sz, KM_SLEEP);
2694
2695		if (ddi_copyin(data, d, sz, mode) != 0) {
2696			err = EFAULT;
2697			break;
2698		}
2699
2700		gsp = (md_gs_stat_parm_t *)d;
2701
2702		if (gsp->gs_setno > (md_nsets - 1)) {
2703			err = EINVAL;
2704			break;
2705		}
2706
2707		gsp->gs_status = md_set[gsp->gs_setno].s_status;
2708
2709		break;
2710	}
2711
2712	case  MD_SETNMDID:
2713	{
2714		if (!(mode & FREAD))
2715			return (EACCES);
2716
2717		sz = sizeof (mdnm_params_t);
2718		d = kmem_alloc(sz, KM_SLEEP);
2719
2720		if (ddi_copyin(data, d, sz, mode) != 0) {
2721			err = EFAULT;
2722			break;
2723		}
2724
2725		err = update_namespace_did_ioctl((mdnm_params_t *)d, mode);
2726		break;
2727
2728	}
2729	case  MD_IOCUPD_NM:
2730	{
2731		char *dname;
2732		char *pname;
2733		uint_t	devnamelen, pathnamelen;
2734
2735		if (!(mode & FREAD))
2736			return (EACCES);
2737
2738		sz = sizeof (mdnm_params_t);
2739		d = kmem_alloc(sz, KM_SLEEP);
2740
2741		if (ddi_copyin(data, d, sz, mode) != 0) {
2742			err = EFAULT;
2743			break;
2744		}
2745
2746		devnamelen = ((mdnm_params_t *)d)->devname_len;
2747		pathnamelen = ((mdnm_params_t *)d)->pathname_len;
2748
2749		if ((devnamelen > MAXPATHLEN) || (pathnamelen > MAXPATHLEN) ||
2750		    (devnamelen == 0) || (pathnamelen == 0)) {
2751			kmem_free(d, sz);
2752			return (EINVAL);
2753		}
2754
2755		/* alloc memory for devname */
2756		dname = kmem_alloc(devnamelen + 1, KM_SLEEP);
2757
2758		if (ddi_copyin(
2759		    (void *)(uintptr_t)((mdnm_params_t *)d)->devname,
2760		    (void *)dname, devnamelen + 1, mode) != 0) {
2761			err = EFAULT;
2762			kmem_free(dname, devnamelen + 1);
2763			break;
2764		}
2765
2766		pname = kmem_alloc(pathnamelen + 1, KM_SLEEP);
2767
2768		if (ddi_copyin(
2769		    (void *)(uintptr_t)((mdnm_params_t *)d)->pathname,
2770		    (void *)pname, pathnamelen + 1, mode) != 0) {
2771			err = EFAULT;
2772			kmem_free(dname, devnamelen + 1);
2773			kmem_free(pname, pathnamelen + 1);
2774			break;
2775		}
2776
2777		err = update_namespace_ioctl((mdnm_params_t *)d, dname, pname,
2778		    mode);
2779
2780		kmem_free(dname, devnamelen + 1);
2781		kmem_free(pname, pathnamelen + 1);
2782		break;
2783	}
2784
2785	case	MD_IOCUPD_LOCNM:
2786	{
2787		char *dname;
2788		char *pname;
2789		uint_t	devnamelen, pathnamelen;
2790
2791		if (!(mode & FREAD))
2792			return (EACCES);
2793
2794		sz = sizeof (mdnm_params_t);
2795		d = kmem_alloc(sz, KM_SLEEP);
2796
2797		if (ddi_copyin(data, d, sz, mode) != 0) {
2798			err = EFAULT;
2799			break;
2800		}
2801
2802		devnamelen = ((mdnm_params_t *)d)->devname_len;
2803		pathnamelen = ((mdnm_params_t *)d)->pathname_len;
2804
2805		if ((devnamelen > MAXPATHLEN) || (pathnamelen > MAXPATHLEN) ||
2806		    (devnamelen == 0) || (pathnamelen == 0)) {
2807			kmem_free(d, sz);
2808			return (EINVAL);
2809		}
2810
2811		/* alloc memory for devname */
2812		dname = kmem_alloc(devnamelen + 1, KM_SLEEP);
2813
2814		if (ddi_copyin(
2815		    (void *)(uintptr_t)((mdnm_params_t *)d)->devname,
2816		    (void *)dname, devnamelen + 1, mode) != 0) {
2817			err = EFAULT;
2818			kmem_free(dname, devnamelen + 1);
2819			break;
2820		}
2821
2822		pname = kmem_alloc(pathnamelen + 1, KM_SLEEP);
2823
2824		if (ddi_copyin(
2825		    (void *)(uintptr_t)((mdnm_params_t *)d)->pathname,
2826		    (void *)pname, pathnamelen + 1, mode) != 0) {
2827			err = EFAULT;
2828			kmem_free(dname, devnamelen + 1);
2829			kmem_free(pname, pathnamelen + 1);
2830			break;
2831		}
2832
2833		err = update_loc_namespace_ioctl((mdnm_params_t *)d, dname,
2834		    pname, mode);
2835
2836		kmem_free(dname, devnamelen + 1);
2837		kmem_free(pname, pathnamelen + 1);
2838		break;
2839	}
2840
2841	case  MD_SET_SETSTAT:
2842	{
2843#ifdef DEBUG
2844		/* Can be used to set the s_status flags from user code */
2845		md_gs_stat_parm_t	*gsp;
2846
2847		if (! (mode & FWRITE))
2848			return (EACCES);
2849
2850		sz = sizeof (md_gs_stat_parm_t);
2851		d = kmem_alloc(sz, KM_SLEEP);
2852
2853		if (ddi_copyin(data, d, sz, mode) != 0) {
2854			err = EFAULT;
2855			break;
2856		}
2857
2858		gsp = (md_gs_stat_parm_t *)d;
2859
2860		if (gsp->gs_setno > (md_nsets - 1)) {
2861			err = EINVAL;
2862			break;
2863		}
2864
2865		md_set[gsp->gs_setno].s_status = gsp->gs_status;
2866
2867#endif	/* DEBUG */
2868		break;
2869	}
2870
2871	case MD_IOCGET_DID:
2872	{
2873		if (! (mode & FREAD))
2874			return (EACCES);
2875
2876		sz = sizeof (mdnm_params_t);
2877		d = kmem_alloc(sz, KM_SLEEP);
2878
2879		if (ddi_copyin(data, d, sz, mode) != 0) {
2880			err = EFAULT;
2881			break;
2882		}
2883
2884		err = getdid_ioctl((mdnm_params_t *)d, mode);
2885		break;
2886	}
2887
2888	case MD_IOCSET_DID:
2889	{
2890		if (! (mode & FWRITE))
2891			return (EACCES);
2892
2893		sz = sizeof (mdnm_params_t);
2894		d = kmem_alloc(sz, KM_SLEEP);
2895
2896		if (ddi_copyin(data, d, sz, mode) != 0) {
2897			err = EFAULT;
2898			break;
2899		}
2900
2901		err = setdid_ioctl((mdnm_params_t *)d, mode);
2902		break;
2903	}
2904
2905	case MD_IOCGET_DIDMIN:
2906	{
2907		if (! (mode & FREAD))
2908			return (EACCES);
2909
2910		sz = sizeof (mdnm_params_t);
2911		d = kmem_alloc(sz, KM_SLEEP);
2912
2913		if (ddi_copyin(data, d, sz, mode) != 0) {
2914			err = EFAULT;
2915			break;
2916		}
2917
2918		if (((mdnm_params_t *)d)->setno >= md_nsets) {
2919			err = EINVAL;
2920			break;
2921		}
2922
2923		err = getdidmin_ioctl((mdnm_params_t *)d, mode);
2924		break;
2925	}
2926
2927	case MD_IOCDID_STAT:
2928	{
2929		if (!(mode & FREAD))
2930			return (EACCES);
2931
2932		mddb_didstat_case = 1;
2933
2934		err = mddb_didstat_from_user(&d, data, mode, &ds_ctd_addr);
2935
2936		if (err) {
2937			return (err);
2938		}
2939
2940		err = didstat_ioctl((md_i_didstat_t *)d);
2941		break;
2942	}
2943
2944	case MD_UPGRADE_STAT:
2945	{
2946		if (! (mode & FREAD))
2947			return (EACCES);
2948
2949		if (ddi_copyout((caddr_t)&md_in_upgrade, data,
2950		    sizeof (int), mode) != 0) {
2951			err = EFAULT;
2952			break;
2953		}
2954		break;
2955	}
2956
2957	case MD_SETMASTER:
2958	{
2959		if (! (mode & FREAD))
2960			return (EACCES);
2961
2962		sz = sizeof (mddb_setmaster_config_t);
2963		d = kmem_alloc(sz, KM_SLEEP);
2964
2965		if (ddi_copyin(data, d, sz, mode) != 0) {
2966			err = EFAULT;
2967			break;
2968		}
2969
2970		err = mddb_setmaster_ioctl((mddb_setmaster_config_t *)d);
2971		break;
2972	}
2973
2974	case MD_MN_SET_DOORH:
2975	{
2976	/* This ioctl sets the global kernel variable mdmn_door_handle */
2977		if (ddi_copyin(data, &mdmn_door_did, sizeof (int), mode) != 0) {
2978			err = EFAULT;
2979		} else {
2980			err = 0;
2981		}
2982		mdmn_door_handle = door_ki_lookup(mdmn_door_did);
2983
2984		break;
2985	}
2986
2987#ifdef DEBUG
2988	case MD_MN_CHECK_DOOR1:
2989	{
2990	/* This ioctl sends a message through a previously opened door */
2991		int		ret;
2992		int		msg_test = 11111111;
2993		int		nloops = 0;
2994		set_t		setno;
2995		md_mn_kresult_t	*result;
2996		uint_t		flags = MD_MSGF_NO_LOG | MD_MSGF_NO_BCAST;
2997
2998		result = kmem_zalloc(sizeof (md_mn_kresult_t), KM_SLEEP);
2999		if (ddi_copyin(data, &nloops, sizeof (int), mode) != 0) {
3000			err = EFAULT;
3001		} else {
3002			err = 0;
3003		}
3004
3005		/*
3006		 * This is a way to tell ksend_message() to use different sets.
3007		 * Odd numbers go to set 1 even numbers go to set 2
3008		 */
3009		if (nloops & 0x1) {
3010			setno = 1;
3011		} else {
3012			setno = 2;
3013		}
3014		while (nloops--)  {
3015			ret = mdmn_ksend_message(
3016			    setno,
3017			    MD_MN_MSG_TEST1,
3018			    flags,
3019			    0,
3020			    (char *)&msg_test,
3021			    sizeof (msg_test),
3022			    result);
3023
3024			if (ret != 0) {
3025				printf("mdmn_ksend_message failed (%d)\n", ret);
3026			}
3027		}
3028		kmem_free(result, sizeof (md_mn_kresult_t));
3029
3030		break;
3031	}
3032
3033	case MD_MN_CHECK_DOOR2:
3034	{
3035	/* This ioctl sends a message through a previously opened door */
3036		int		ret;
3037		int		msg_test = 22222222;
3038		int		nloops = 0;
3039		md_mn_kresult_t	*result;
3040		set_t		setno;
3041		uint_t		flags = MD_MSGF_NO_LOG;
3042
3043		result = kmem_zalloc(sizeof (md_mn_kresult_t), KM_SLEEP);
3044		if (ddi_copyin(data, &nloops, sizeof (int), mode) != 0) {
3045			err = EFAULT;
3046		} else {
3047			err = 0;
3048		}
3049		/*
3050		 * This is a way to tell ksend_message() to use different sets.
3051		 * Odd numbers go to set 1 even numbers go to set 2
3052		 */
3053		if (nloops & 0x1) {
3054			setno = 1;
3055		} else {
3056			setno = 2;
3057		}
3058		while (nloops--)  {
3059			ret = mdmn_ksend_message(
3060			    setno,
3061			    MD_MN_MSG_TEST2,
3062			    flags,
3063			    0,
3064			    (char *)&msg_test,
3065			    sizeof (msg_test),
3066			    result);
3067
3068			if (ret != 0) {
3069				printf("mdmn_ksend_message failed (%d)\n", ret);
3070			}
3071		}
3072		kmem_free(result, sizeof (md_mn_kresult_t));
3073
3074		break;
3075	}
3076#endif
3077
3078	case MD_MN_OPEN_TEST:
3079	{
3080		md_clu_open_t	*p;
3081		minor_t		mnum;
3082
3083		sz = sizeof (md_clu_open_t);
3084		d = kmem_alloc(sz, KM_SLEEP);
3085
3086		if (ddi_copyin(data, d, sizeof (md_clu_open_t), mode) != 0) {
3087			err = EFAULT;
3088			break;
3089		}
3090
3091		p = (md_clu_open_t *)d;
3092		mnum = md_getminor(p->clu_dev);
3093
3094		if ((err = verify_minor(mnum)) != 0) {
3095			if (err == EINVAL)
3096				(void) mdmderror(&p->clu_mde, MDE_INVAL_UNIT,
3097				    mnum);
3098			break;
3099		}
3100		err = md_clu_ioctl(p);
3101		break;
3102	}
3103
3104	case MD_MN_SET_NODEID:
3105	{
3106		if (! (mode & FWRITE))
3107			return (EACCES);
3108
3109		sz = sizeof (mddb_set_node_params_t);
3110		d = kmem_alloc(sz, KM_SLEEP);
3111
3112		if (ddi_copyin(data, d, sz, mode) != 0) {
3113			err = EFAULT;
3114			break;
3115		}
3116		snp = (mddb_set_node_params_t *)d;
3117
3118		if (snp->sn_setno >= md_nsets) {
3119			err = EINVAL;
3120			break;
3121		}
3122
3123		md_set[snp->sn_setno].s_nodeid = snp->sn_nodeid;
3124
3125		if (md_mn_mynode_id == MD_MN_INVALID_NID)
3126			md_mn_mynode_id = snp->sn_nodeid;
3127#ifdef DEBUG
3128		else if (md_mn_mynode_id != snp->sn_nodeid)
3129			cmn_err(CE_WARN, "Previously set nodeid 0x%x for this"
3130			    "node doesn't match nodeid being set 0x%x\n",
3131			    md_mn_mynode_id, snp->sn_nodeid);
3132#endif /* DEBUG */
3133		err = 0;
3134		break;
3135	}
3136	case MD_IOCGUNIQMSGID:
3137	{
3138		md_mn_msgid_t msgid;
3139		struct timeval32 tv;
3140
3141		if (! (mode & FREAD))
3142			return (EACCES);
3143
3144		uniqtime32(&tv);
3145
3146		/* high 32 bits are the seconds */
3147		msgid.mid_time = (u_longlong_t)tv.tv_sec << 32;
3148		/* low 32 bits are the micro secs */
3149		msgid.mid_time |= tv.tv_usec;
3150
3151		msgid.mid_nid = md_mn_mynode_id;
3152		/*
3153		 * This is never called for submessages, so we better
3154		 * null out the submessage ID
3155		 */
3156		msgid.mid_smid = 0;
3157
3158		if (ddi_copyout((caddr_t)&msgid, data, sizeof (msgid), mode)
3159		    != 0) {
3160			err = EFAULT;
3161			break;
3162		}
3163		break;
3164	}
3165
3166	/*
3167	 * suspend the IO's for a given set number.
3168	 *
3169	 * If setno = 0 is specified, try operation on all snarfed MN disksets.
3170	 * If there are no snarfed MN disksets, then return success.
3171	 *
3172	 * If a specific set number is given, then return EINVAL if unable
3173	 * to perform operation.
3174	 */
3175	case MD_MN_SUSPEND_SET:
3176	{
3177		set_t	setno;
3178		int	rval = 0;
3179		int	i;
3180
3181		if (! (mode & FWRITE))
3182			return (EACCES);
3183
3184		if (ddi_copyin(data, &setno, sizeof (set_t), mode) != 0) {
3185			return (EFAULT);
3186		}
3187		if (setno >= MD_MAXSETS) {
3188			return (EINVAL);
3189		}
3190
3191		mutex_enter(&md_mx);
3192		if (setno == 0) {
3193			/* if set number is 0, we walk all sets */
3194			for (i = 1; i <= (MD_MAXSETS - 1); i++) {
3195				if ((md_set[i].s_status &
3196				    (MD_SET_SNARFED|MD_SET_MNSET)) ==
3197				    (MD_SET_SNARFED|MD_SET_MNSET)) {
3198					md_set[i].s_status |= MD_SET_HALTED;
3199				}
3200			}
3201		} else {
3202			/* If unable to halt specified set, set EINVAL */
3203			if ((md_set[setno].s_status &
3204			    (MD_SET_SNARFED|MD_SET_MNSET)) ==
3205			    (MD_SET_SNARFED|MD_SET_MNSET)) {
3206				md_set[setno].s_status |= MD_SET_HALTED;
3207			} else {
3208				rval = EINVAL;
3209			}
3210		}
3211		mutex_exit(&md_mx);
3212		return (rval);
3213	}
3214
3215	/*
3216	 * resume the IO's for a given set number.
3217	 *
3218	 * If setno = 0 is specified, try operation on all snarfed MN disksets.
3219	 * If there are no snarfed MN disksets, then return success.
3220	 *
3221	 * If a specific set number is given, then return EINVAL if unable
3222	 * to perform operation.
3223	 */
3224	case MD_MN_RESUME_SET:
3225	{
3226		set_t	setno;
3227		int	resumed_set = 0;
3228		int	rval = 0;
3229		int	i;
3230
3231		if (! (mode & FWRITE))
3232			return (EACCES);
3233
3234		if (ddi_copyin(data, &setno, sizeof (set_t), mode) != 0) {
3235			return (EFAULT);
3236		}
3237		if (setno >= MD_MAXSETS) {
3238			return (EINVAL);
3239		}
3240
3241		/* if 0 is specified as the set number, we walk all sets */
3242		mutex_enter(&md_mx);
3243		if (setno == 0) {
3244			/* if set number is 0, we walk all sets */
3245			for (i = 1; i <= (MD_MAXSETS - 1); i++) {
3246				if ((md_set[i].s_status &
3247				    (MD_SET_SNARFED|MD_SET_MNSET)) ==
3248				    (MD_SET_SNARFED|MD_SET_MNSET)) {
3249					md_set[i].s_status &= ~MD_SET_HALTED;
3250					resumed_set = 1;
3251				}
3252			}
3253		} else {
3254			/* If unable to resume specified set, set EINVAL */
3255			if ((md_set[setno].s_status &
3256			    (MD_SET_SNARFED|MD_SET_MNSET)) ==
3257			    (MD_SET_SNARFED|MD_SET_MNSET)) {
3258				md_set[setno].s_status &= ~MD_SET_HALTED;
3259				resumed_set = 1;
3260			} else {
3261				rval = EINVAL;
3262			}
3263		}
3264
3265		/*
3266		 * In case we actually resumed at least one set,
3267		 * Inform all threads waiting for this change
3268		 */
3269		if (resumed_set == 1) {
3270			cv_broadcast(&md_cv);
3271		}
3272
3273		mutex_exit(&md_mx);
3274		return (rval);
3275	}
3276
3277	case MD_MN_MDDB_PARSE:
3278	{
3279		if (! (mode & FWRITE))
3280			return (EACCES);
3281
3282		sz = sizeof (mddb_parse_parm_t);
3283		d = kmem_alloc(sz, KM_SLEEP);
3284
3285		if (ddi_copyin(data, d, sz, mode) != 0) {
3286			err = EFAULT;
3287			break;
3288		}
3289		err = mddb_parse((mddb_parse_parm_t *)d);
3290		break;
3291
3292	}
3293
3294	case MD_MN_MDDB_BLOCK:
3295	{
3296		if (! (mode & FWRITE))
3297			return (EACCES);
3298
3299		sz = sizeof (mddb_block_parm_t);
3300		d = kmem_alloc(sz, KM_SLEEP);
3301
3302		if (ddi_copyin(data, d, sz, mode) != 0) {
3303			err = EFAULT;
3304			break;
3305		}
3306		err = mddb_block((mddb_block_parm_t *)d);
3307		break;
3308
3309	}
3310
3311	case MD_MN_MDDB_OPTRECFIX:
3312	{
3313		if (! (mode & FWRITE))
3314			return (EACCES);
3315
3316		sz = sizeof (mddb_optrec_parm_t);
3317		d = kmem_alloc(sz, KM_SLEEP);
3318
3319		if (ddi_copyin(data, d, sz, mode) != 0) {
3320			err = EFAULT;
3321			break;
3322		}
3323		err = mddb_optrecfix((mddb_optrec_parm_t *)d);
3324		break;
3325
3326	}
3327
3328	case MD_MN_CHK_WRT_MDDB:
3329	{
3330		if (! (mode & FWRITE))
3331			return (EACCES);
3332
3333		sz = sizeof (mddb_config_t);
3334		d = kmem_alloc(sz, KM_SLEEP);
3335
3336		if (ddi_copyin(data, d, sz, mode) != 0) {
3337			err = EFAULT;
3338			break;
3339		}
3340
3341		err = mddb_check_write_ioctl((mddb_config_t *)d);
3342		break;
3343	}
3344
3345	case MD_MN_SET_SETFLAGS:
3346	case MD_MN_GET_SETFLAGS:
3347	{
3348		if (! (mode & FREAD))
3349			return (EACCES);
3350
3351		sz = sizeof (mddb_setflags_config_t);
3352		d = kmem_alloc(sz, KM_SLEEP);
3353
3354		if (ddi_copyin(data, d, sz, mode) != 0) {
3355			err = EFAULT;
3356			break;
3357		}
3358
3359		err = mddb_setflags_ioctl((mddb_setflags_config_t *)d);
3360		break;
3361	}
3362
3363	case MD_MN_COMMD_ERR:
3364	{
3365		md_mn_commd_err_t *cmp;
3366		char *msg;
3367
3368		sz = sizeof (md_mn_commd_err_t);
3369		d = kmem_zalloc(sz, KM_SLEEP);
3370
3371		if (ddi_copyin(data, d, sz, mode) != 0) {
3372			err = EFAULT;
3373			break;
3374		}
3375
3376		cmp = (md_mn_commd_err_t *)d;
3377		if (cmp->size > MAXPATHLEN) {
3378			err = EINVAL;
3379			break;
3380		}
3381
3382		msg = (char *)kmem_zalloc(cmp->size + 1, KM_SLEEP);
3383		if (ddi_copyin((caddr_t)(uintptr_t)cmp->md_message, msg,
3384		    cmp->size, mode) != 0) {
3385			kmem_free(msg, cmp->size + 1);
3386			err = EFAULT;
3387			break;
3388		}
3389		cmn_err(CE_WARN, "%s\n", msg);
3390		kmem_free(msg, cmp->size + 1);
3391		break;
3392	}
3393
3394	case MD_IOCMAKE_DEV:
3395	{
3396		if (! (mode & FWRITE))
3397			return (EACCES);
3398
3399		sz = sizeof (md_mkdev_params_t);
3400
3401		if ((d = kmem_alloc(sz, KM_NOSLEEP)) == NULL)
3402			return (ENOMEM);
3403
3404		if (ddi_copyin(data, d, sz, mode) != 0) {
3405			err = EFAULT;
3406			break;
3407		}
3408
3409		err = mkdev_ioctl((md_mkdev_params_t *)d);
3410		break;
3411	}
3412
3413	case MD_IOCREM_DEV:
3414	{
3415		set_t	setno;
3416
3417		if (! (mode & FWRITE))
3418			return (EACCES);
3419
3420		sz = sizeof (minor_t);
3421
3422		d = kmem_zalloc(sz, KM_SLEEP);
3423
3424		if (ddi_copyin(data, d, sz, mode) != 0) {
3425			err = EFAULT;
3426			break;
3427		}
3428
3429		/*
3430		 * This ioctl is called to cleanup the device name
3431		 * space when metainit fails or -n is invoked
3432		 * In this case, reclaim the dispatched un slot
3433		 */
3434		setno = MD_MIN2SET(*(minor_t *)d);
3435		if (setno >= md_nsets) {
3436			err = EINVAL;
3437			break;
3438		} else if (md_set[setno].s_un_next <= 0) {
3439			err = EFAULT;
3440			break;
3441		} else {
3442			md_set[setno].s_un_next--;
3443		}
3444
3445		/*
3446		 * Attempt to remove the assocated device node
3447		 */
3448		md_remove_minor_node(*(minor_t *)d);
3449		break;
3450	}
3451
3452	/*
3453	 * Update md_mn_commd_pid global to reflect presence or absence of
3454	 * /usr/sbin/rpc.mdcommd. This allows us to determine if an RPC failure
3455	 * is expected during a mdmn_ksend_message() handshake. If the commd is
3456	 * not present then an RPC failure is acceptable. If the commd _is_
3457	 * present then an RPC failure means we have an inconsistent view across
3458	 * the cluster.
3459	 */
3460	case MD_MN_SET_COMMD_RUNNING:
3461	{
3462		if (! (mode & FWRITE))
3463			return (EACCES);
3464
3465		md_mn_commd_pid = (pid_t)(intptr_t)data;
3466		err = 0;
3467		break;
3468	}
3469
3470	case MD_IOCIMP_LOAD:
3471	{
3472		if (! (mode & FWRITE))
3473			return (EACCES);
3474
3475		mddb_config_case = 1;
3476
3477		err = mddb_config_from_user(&d, data, mode, &c_devid_addr,
3478		    &c_old_devid_addr);
3479
3480		if (err) {
3481			return (err);
3482		}
3483
3484		err = md_imp_snarf_set((mddb_config_t *)d);
3485		break;
3486
3487	}
3488
3489	case MD_DB_LBINITTIME:
3490	{
3491		if (! (mode & FWRITE))
3492			return (EACCES);
3493
3494		mddb_config_case = 1;
3495
3496		err = mddb_config_from_user(&d, data, mode, &c_devid_addr,
3497		    &c_old_devid_addr);
3498
3499		if (err)
3500			return (err);
3501
3502		err = get_lb_inittime_ioctl((mddb_config_t *)d);
3503		break;
3504	}
3505	case MD_IOCUPDATE_NM_RR_DID:
3506	{
3507		if (! (mode & FWRITE))
3508			return (EACCES);
3509
3510		mddb_config_case = 1;
3511
3512		err = mddb_config_from_user(&d, data, mode, &c_devid_addr,
3513		    &c_old_devid_addr);
3514
3515		if (err)
3516			return (err);
3517
3518		err = md_update_nm_rr_did_ioctl((mddb_config_t *)d);
3519		break;
3520	}
3521	default:
3522		return (ENOTTY);	/* used by next level up */
3523	}
3524
3525	/*
3526	 * copyout and free any args
3527	 */
3528	if (mddb_config_case) {
3529		err_to_user = mddb_config_to_user(d, data, mode, c_devid_addr,
3530		    c_old_devid_addr);
3531	} else if (mddb_didstat_case) {
3532		err_to_user = mddb_didstat_to_user(d, data, mode, ds_ctd_addr);
3533	} else if (sz != 0) {
3534		if (ddi_copyout(d, data, sz, mode) != 0) {
3535			err = EFAULT;
3536		}
3537		kmem_free(d, sz);
3538	}
3539
3540	if (err)
3541		return (err);
3542	return (err_to_user);
3543}
3544
3545int
3546md_admin_ioctl(md_dev64_t dev, int cmd, caddr_t data, int mode, IOLOCK *lockp)
3547{
3548	md_driver_t	drv;
3549	int		modindex;
3550	int		err;
3551
3552	/*
3553	 * see if we can do this without involving the subdriver
3554	 */
3555	if ((err = md_base_ioctl(dev, cmd, data, mode, lockp)) != ENOTTY)
3556		return (err);
3557
3558	/*
3559	 * see what subdriver we need
3560	 */
3561	if (! ISMDIOC(cmd))
3562		return (ENOTTY);
3563
3564	if ((!NODBNEEDED(cmd)) && md_snarf_db_set(MD_LOCAL_SET, NULL) != 0)
3565		return (ENODEV);
3566
3567	if (ddi_copyin(data, (caddr_t)&drv, sizeof (drv), mode) != 0)
3568		return (EFAULT);
3569
3570	/*
3571	 * load subdriver if not already loaded
3572	 */
3573	if (((modindex = md_getmodindex(&drv, 0, NODBNEEDED(cmd))) == -1) ||
3574	    (md_ops[modindex]->md_ioctl == NULL))
3575		return (ENOTTY);
3576
3577	/*
3578	 * dispatch to subdriver
3579	 */
3580	return ((*md_ops[modindex]->md_ioctl)(md_dev64_to_dev(dev), cmd, data,
3581	    mode, lockp));
3582}
3583
3584void
3585md_get_geom(
3586	md_unit_t	*un,
3587	struct dk_geom	*gp
3588)
3589{
3590	diskaddr_t		tb = un->c.un_total_blocks;
3591	uint_t			cylsize = un->c.un_nhead * un->c.un_nsect;
3592
3593	bzero((caddr_t)gp, sizeof (*gp));
3594	gp->dkg_nhead = un->c.un_nhead;
3595	gp->dkg_nsect = un->c.un_nsect;
3596	gp->dkg_rpm = un->c.un_rpm;
3597	gp->dkg_write_reinstruct = un->c.un_wr_reinstruct;
3598	gp->dkg_read_reinstruct = un->c.un_rd_reinstruct;
3599	gp->dkg_ncyl = (ushort_t)(tb / cylsize);
3600	if (! (un->c.un_flag & MD_LABELED))	/* skip first cyl */
3601		gp->dkg_ncyl += 1;
3602	gp->dkg_pcyl = gp->dkg_ncyl;
3603}
3604
3605void
3606md_get_vtoc(md_unit_t *un, struct vtoc *vtoc)
3607{
3608	caddr_t			v;
3609	mddb_recstatus_t	status;
3610	struct vtoc32		*vt32;
3611
3612	/*
3613	 * Return vtoc structure fields in the provided VTOC area, addressed
3614	 * by *vtoc.
3615	 *
3616	 */
3617
3618	if (un->c.un_vtoc_id) {
3619		status = mddb_getrecstatus(un->c.un_vtoc_id);
3620		if (status == MDDB_OK) {
3621			v = mddb_getrecaddr(un->c.un_vtoc_id);
3622			/* if this seems to be a sane vtoc, just copy it ... */
3623			if (((struct vtoc *)v)->v_sanity == VTOC_SANE) {
3624				bcopy(v, (caddr_t)vtoc, sizeof (struct vtoc));
3625			} else {
3626				/* ... else assume a vtoc32 was stored here */
3627				vt32 = (struct vtoc32 *)v;
3628				vtoc32tovtoc((*vt32), (*vtoc));
3629			}
3630			if (un->c.un_flag & MD_LABELED)
3631				vtoc->v_part[0].p_start = 0ULL;
3632			else
3633				vtoc->v_part[0].p_start = (diskaddr_t)
3634				    (un->c.un_nhead * un->c.un_nsect);
3635			vtoc->v_part[0].p_size = un->c.un_total_blocks;
3636			vtoc->v_version = V_VERSION;
3637			vtoc->v_sectorsz = DEV_BSIZE;
3638			return;
3639		}
3640
3641		un->c.un_vtoc_id = 0;
3642		mddb_commitrec_wrapper(un->c.un_record_id);
3643	}
3644
3645	bzero((caddr_t)vtoc, sizeof (struct vtoc));
3646	vtoc->v_sanity = VTOC_SANE;
3647	vtoc->v_nparts = 1;
3648	vtoc->v_version = V_VERSION;
3649	vtoc->v_sectorsz = DEV_BSIZE;
3650	if (un->c.un_flag & MD_LABELED)
3651		vtoc->v_part[0].p_start = 0ULL;
3652	else
3653		vtoc->v_part[0].p_start = (diskaddr_t)(un->c.un_nhead *
3654		    un->c.un_nsect);
3655	vtoc->v_part[0].p_size = un->c.un_total_blocks;
3656}
3657
3658int
3659md_set_vtoc(md_unit_t *un, struct vtoc *vtoc)
3660{
3661
3662	struct partition	*vpart;
3663	int			i;
3664	mddb_recid_t		recid;
3665	mddb_recid_t		recids[3];
3666	mddb_recstatus_t	status;
3667	caddr_t			v;
3668	diskaddr_t		sb;
3669
3670	/*
3671	 * Sanity-check the vtoc
3672	 */
3673	if (vtoc->v_sanity != VTOC_SANE || vtoc->v_nparts != 1)
3674		return (EINVAL);
3675
3676	/* don't allow to create a vtoc for a big metadevice */
3677	if (un->c.un_revision & MD_64BIT_META_DEV)
3678		return (ENOTSUP);
3679	/*
3680	 * Validate the partition table
3681	 */
3682	vpart = vtoc->v_part;
3683	for (i = 0; i < V_NUMPAR; i++, vpart++) {
3684		if (i == 0) {
3685			if (un->c.un_flag & MD_LABELED)
3686				sb = 0ULL;
3687			else
3688				sb = (diskaddr_t)(un->c.un_nhead *
3689				    un->c.un_nsect);
3690			if (vpart->p_start != sb)
3691				return (EINVAL);
3692			if (vpart->p_size != un->c.un_total_blocks)
3693				return (EINVAL);
3694			continue;
3695		}
3696		/* all other partitions must be zero */
3697		if (vpart->p_start != 0ULL)
3698			return (EINVAL);
3699		if (vpart->p_size != 0ULL)
3700			return (EINVAL);
3701	}
3702
3703	if (un->c.un_vtoc_id) {
3704		recid = un->c.un_vtoc_id;
3705		status = mddb_getrecstatus(recid);
3706		if (status == MDDB_OK) {
3707			/*
3708			 * If there's enough space in the record, and the
3709			 * existing record is a vtoc record (not EFI),
3710			 * we just can use the existing space.
3711			 * Otherwise, we create a new MDDB_VTOC record for
3712			 * this unit.
3713			 */
3714			if ((mddb_getrecsize(recid) >= sizeof (struct vtoc)) &&
3715			    ((un->c.un_flag & MD_EFILABEL) == 0)) {
3716				v = mddb_getrecaddr(recid);
3717				bcopy((caddr_t)vtoc, v, sizeof (struct vtoc));
3718				mddb_commitrec_wrapper(recid);
3719				recids[0] = recid;
3720				recids[1] = un->c.un_record_id;
3721				recids[2] = 0;
3722				un->c.un_flag &= ~MD_EFILABEL;
3723				mddb_commitrecs_wrapper(recids);
3724				return (0);
3725			}
3726
3727			un->c.un_vtoc_id = 0;
3728			mddb_commitrec_wrapper(un->c.un_record_id);
3729			mddb_deleterec_wrapper(recid);
3730		}
3731	}
3732
3733	recid = mddb_createrec(sizeof (struct vtoc), MDDB_VTOC, 0,
3734	    MD_CRO_32BIT, MD_UN2SET(un));
3735
3736	if (recid < 0) {
3737		return (ENOSPC);
3738	}
3739
3740	recids[0] = recid;
3741	recids[1] = un->c.un_record_id;
3742	recids[2] = 0;
3743	v = mddb_getrecaddr(recid);
3744	bcopy((caddr_t)vtoc, v, sizeof (struct vtoc));
3745
3746	un->c.un_vtoc_id = recid;
3747	un->c.un_flag &= ~MD_EFILABEL;
3748	mddb_commitrecs_wrapper(recids);
3749	return (0);
3750}
3751
3752void
3753md_get_extvtoc(md_unit_t *un, struct extvtoc *extvtoc)
3754{
3755	caddr_t			v;
3756	mddb_recstatus_t	status;
3757	struct vtoc32		*vt32;
3758	struct vtoc		*vtoc;
3759
3760	/*
3761	 * Return extvtoc structure fields in the provided VTOC area, addressed
3762	 * by *extvtoc.
3763	 *
3764	 */
3765
3766	bzero((caddr_t)extvtoc, sizeof (struct extvtoc));
3767	if (un->c.un_vtoc_id) {
3768		status = mddb_getrecstatus(un->c.un_vtoc_id);
3769		if (status == MDDB_OK) {
3770			v = mddb_getrecaddr(un->c.un_vtoc_id);
3771			if (un->c.un_flag & MD_EFILABEL) {
3772				bcopy(v, (caddr_t)&(extvtoc->v_volume),
3773				    LEN_DKL_VVOL);
3774			} else {
3775				/*
3776				 * if this seems to be a sane vtoc,
3777				 * just copy it ...
3778				 */
3779				if (((struct vtoc *)v)->v_sanity == VTOC_SANE) {
3780					vtoc = (struct vtoc *)v;
3781					vtoctoextvtoc((*vtoc), (*extvtoc));
3782				} else {
3783					/* assume a vtoc32 was stored here */
3784					vt32 = (struct vtoc32 *)v;
3785					vtoc32toextvtoc((*vt32), (*extvtoc));
3786				}
3787			}
3788		} else {
3789			un->c.un_vtoc_id = 0;
3790			mddb_commitrec_wrapper(un->c.un_record_id);
3791		}
3792	}
3793
3794	extvtoc->v_sanity = VTOC_SANE;
3795	extvtoc->v_nparts = 1;
3796	extvtoc->v_version = V_VERSION;
3797	extvtoc->v_sectorsz = DEV_BSIZE;
3798	if (un->c.un_flag & MD_LABELED)
3799		extvtoc->v_part[0].p_start = 0ULL;
3800	else
3801		extvtoc->v_part[0].p_start = (diskaddr_t)(un->c.un_nhead *
3802		    un->c.un_nsect);
3803	extvtoc->v_part[0].p_size = un->c.un_total_blocks;
3804}
3805
3806int
3807md_set_extvtoc(md_unit_t *un, struct extvtoc *extvtoc)
3808{
3809
3810	struct extpartition	*vpart;
3811	int			i;
3812	mddb_recid_t		recid;
3813	mddb_recid_t		recids[3];
3814	mddb_recstatus_t	status;
3815	caddr_t			v;
3816	diskaddr_t		sb;
3817	struct vtoc		vtoc;
3818
3819	/*
3820	 * Sanity-check the vtoc
3821	 */
3822	if (extvtoc->v_sanity != VTOC_SANE || extvtoc->v_nparts != 1)
3823		return (EINVAL);
3824
3825	/*
3826	 * Validate the partition table
3827	 */
3828	vpart = extvtoc->v_part;
3829	for (i = 0; i < V_NUMPAR; i++, vpart++) {
3830		if (i == 0) {
3831			if (un->c.un_flag & MD_LABELED)
3832				sb = 0ULL;
3833			else
3834				sb = (diskaddr_t)(un->c.un_nhead *
3835				    un->c.un_nsect);
3836			if (vpart->p_start != sb)
3837				return (EINVAL);
3838			if (vpart->p_size != un->c.un_total_blocks)
3839				return (EINVAL);
3840			continue;
3841		}
3842		/* all other partitions must be zero */
3843		if (vpart->p_start != 0ULL)
3844			return (EINVAL);
3845		if (vpart->p_size != 0)
3846			return (EINVAL);
3847	}
3848
3849	if (!(un->c.un_revision & MD_64BIT_META_DEV)) {
3850		extvtoctovtoc((*extvtoc), (vtoc));
3851		return (md_set_vtoc(un, &vtoc));
3852	}
3853
3854	/*
3855	 * Since the size is greater than 1 TB the information can either
3856	 * be stored as a VTOC or EFI.  Since EFI uses less space just use
3857	 * it.  md_get_extvtoc can reconstruct the label information from
3858	 * either format.
3859	 */
3860	if (un->c.un_vtoc_id) {
3861		recid = un->c.un_vtoc_id;
3862		status = mddb_getrecstatus(recid);
3863		if (status == MDDB_OK) {
3864			/*
3865			 * If there's enough space in the record, and the
3866			 * existing record is an EFI record (not vtoc),
3867			 * we just can use the existing space.
3868			 * Otherwise, we create a new MDDB_EFILABEL record for
3869			 * this unit.
3870			 */
3871			if ((mddb_getrecsize(recid) >= MD_EFI_PARTNAME_BYTES) &&
3872			    (un->c.un_flag & MD_EFILABEL))  {
3873				v = mddb_getrecaddr(recid);
3874				bzero((caddr_t)v, MD_EFI_PARTNAME_BYTES);
3875				bcopy((caddr_t)&(extvtoc->v_volume),
3876				    v, LEN_DKL_VVOL);
3877				mddb_commitrec_wrapper(recid);
3878				return (0);
3879			}
3880
3881			un->c.un_vtoc_id = 0;
3882			mddb_commitrec_wrapper(un->c.un_record_id);
3883			mddb_deleterec_wrapper(recid);
3884		}
3885	}
3886
3887	recid = mddb_createrec(MD_EFI_PARTNAME_BYTES, MDDB_EFILABEL, 0,
3888	    MD_CRO_32BIT, MD_UN2SET(un));
3889
3890	if (recid < 0) {
3891		return (ENOSPC);
3892	}
3893
3894	recids[0] = recid;
3895	recids[1] = un->c.un_record_id;
3896	recids[2] = 0;
3897	v = mddb_getrecaddr(recid);
3898	bzero((caddr_t)v, MD_EFI_PARTNAME_BYTES);
3899	bcopy((caddr_t)&(extvtoc->v_volume), v, LEN_DKL_VVOL);
3900
3901	un->c.un_vtoc_id = recid;
3902	un->c.un_flag |= MD_EFILABEL;
3903	mddb_commitrecs_wrapper(recids);
3904	return (0);
3905}
3906
3907
3908void
3909md_get_cgapart(md_unit_t *un, struct dk_map *dkmapp)
3910{
3911
3912	/* skip the first cyl */
3913	dkmapp->dkl_cylno = 1;
3914
3915	dkmapp->dkl_nblk = (daddr_t)un->c.un_total_blocks;
3916}
3917
3918static struct uuid md_efi_reserved = EFI_RESERVED;
3919
3920/*
3921 * md_get_efi
3922 * INPUT:
3923 *	un; the md_unit
3924 *	buf; the buffer that is preallocated by the calling routine and
3925 *		capable of taking the EFI label for this unit
3926 * OUTPUT:
3927 *	A filled buffer, containing one struct efi_gpt followed by one
3928 *		struct efi_gpe, because a md efi only has one valid partition
3929 *		We fetch that date either from the mddb (like vtoc)
3930 *		or we a fake an EFI label.
3931 *
3932 * NOTES:
3933 *	We do not provide for any global unique identifiers,
3934 *	We also use the field c.un_vtoc_id, as the semantic is very similar
3935 *	When we are called, it's already checked, that this unit has an EFI
3936 *		label and not a vtoc
3937 */
3938
3939void
3940md_get_efi(md_unit_t *un, char *buf)
3941{
3942	caddr_t		v;
3943	efi_gpt_t	*efi_header = (efi_gpt_t *)buf;
3944	efi_gpe_t	*efi_part = (efi_gpe_t *)(buf + sizeof (efi_gpt_t));
3945	mddb_recstatus_t	status;
3946
3947	/* first comes the header */
3948	efi_header->efi_gpt_Signature = LE_64(EFI_SIGNATURE);
3949	efi_header->efi_gpt_HeaderSize = LE_32(sizeof (efi_gpt_t));
3950	efi_header->efi_gpt_NumberOfPartitionEntries = LE_32(1);
3951	efi_header->efi_gpt_SizeOfPartitionEntry = LE_32(sizeof (efi_gpe_t));
3952	efi_header->efi_gpt_LastUsableLBA = LE_64(un->c.un_total_blocks - 1);
3953	efi_header->efi_gpt_FirstUsableLBA = 0;
3954	efi_header->efi_gpt_Revision = LE_32(EFI_VERSION_CURRENT);
3955
3956	/*
3957	 * We don't fill out any of these:
3958	 *
3959	 * efi_header->efi_gpt_HeaderCRC32;
3960	 * efi_header->efi_gpt_DiskGUID;
3961	 * efi_header->efi_gpt_PartitionEntryArrayCRC32;
3962	 * efi_header->efi_gpt_Reserved1;
3963	 * efi_header->efi_gpt_MyLBA;
3964	 * efi_header->efi_gpt_AlternateLBA;
3965	 * efi_header->efi_gpt_Reserved2[LEN_EFI_PAD];
3966	 * efi_header->efi_gpt_PartitionEntryLBA;
3967	 */
3968
3969	/*
3970	 * We copy back one partition, of type reserved,
3971	 * which may contain the name of the metadevice
3972	 * (this is what was used to be v_volume for a vtoc device)
3973	 * if no name is stored in the vtoc record, we hand an empty name
3974	 * to the user
3975	 */
3976
3977	UUID_LE_CONVERT(efi_part->efi_gpe_PartitionTypeGUID, md_efi_reserved);
3978	if (un->c.un_flag & MD_LABELED)
3979		efi_part->efi_gpe_StartingLBA = LE_64(1ULL);
3980	else
3981		efi_part->efi_gpe_StartingLBA = 0;
3982
3983	efi_part->efi_gpe_EndingLBA = LE_64(un->c.un_total_blocks - 1);
3984
3985	if (un->c.un_vtoc_id) {
3986		status = mddb_getrecstatus(un->c.un_vtoc_id);
3987		if (status == MDDB_OK) {
3988			v = mddb_getrecaddr(un->c.un_vtoc_id);
3989			bcopy(v, (caddr_t)&(efi_part->efi_gpe_PartitionName),
3990			    MD_EFI_PARTNAME_BYTES);
3991			return;
3992		}
3993		un->c.un_vtoc_id = 0;
3994		mddb_commitrec_wrapper(un->c.un_record_id);
3995	}
3996
3997	/*
3998	 * We don't fill out any of these
3999	 * efi_part->efi_gpe_UniquePartitionGUID
4000	 * efi_part->efi_gpe_Attributes
4001	 */
4002}
4003
4004
4005/*
4006 * md_set_efi
4007 * INPUT:
4008 *	un; a md_unit
4009 *	buf; a buffer that is holding an EFI label for this unit
4010 *
4011 * PURPOSE:
4012 *	Perform some sanity checks on the EFI label provided,
4013 *	Then store efi_gpe_PartitionName in the mddb
4014 *	and link the unit's c.un_vtoc_id field to it.
4015 *
4016 * RETURN:
4017 *	EINVAL if any of the sanity checks fail
4018 *	0 on succes
4019 *
4020 * NOTES:
4021 *	We do not provide for any global unique identifiers,
4022 *	We also use the field c.un_vtoc_id, as the semantic is very similar
4023 *	When we are called, it's already checked, that this unit has an EFI
4024 *		label and not a vtoc
4025 */
4026
4027
4028int
4029md_set_efi(md_unit_t *un, char *buf)
4030{
4031
4032	mddb_recid_t		recid;
4033	mddb_recid_t		recids[3];
4034	mddb_recstatus_t	status;
4035	caddr_t			v;
4036	efi_gpt_t	*efi_header = (efi_gpt_t *)buf;
4037	efi_gpe_t	*efi_part = (efi_gpe_t *)(buf + sizeof (efi_gpt_t));
4038	struct uuid	md_efi_reserved_le;
4039
4040	/*
4041	 * Sanity-check the EFI label
4042	 */
4043	if ((efi_header->efi_gpt_Signature != LE_64(EFI_SIGNATURE)) ||
4044	    (efi_header->efi_gpt_NumberOfPartitionEntries != LE_32(1)))
4045		return (EINVAL);
4046
4047	UUID_LE_CONVERT(md_efi_reserved_le, md_efi_reserved);
4048
4049	/*
4050	 * Validate the partition
4051	 */
4052	if (efi_part->efi_gpe_StartingLBA != 0 ||
4053	    efi_part->efi_gpe_EndingLBA != LE_64(un->c.un_total_blocks - 1) ||
4054	    bcmp(&efi_part->efi_gpe_PartitionTypeGUID, &md_efi_reserved_le,
4055	    sizeof (struct uuid))) {
4056		return (EINVAL);
4057	}
4058	/*
4059	 * If no name is specified, we have nothing to do and return success.
4060	 * because efi_gpe_PartitionName is in unicode form, we have to
4061	 * check the first two bytes of efi_gpe_PartitionName.
4062	 */
4063	if (((char *)(uintptr_t)efi_part->efi_gpe_PartitionName[0] == NULL) &&
4064	    ((char *)(uintptr_t)efi_part->efi_gpe_PartitionName[1] == NULL)) {
4065		return (0);
4066	}
4067
4068	if (un->c.un_vtoc_id) {
4069		recid = un->c.un_vtoc_id;
4070		status = mddb_getrecstatus(recid);
4071		if (status == MDDB_OK) {
4072			/*
4073			 * If there's enough space in the record, and the
4074			 * existing record is an EFI record (not vtoc),
4075			 * we just can use the existing space.
4076			 * Otherwise, we create a new MDDB_EFILABEL record for
4077			 * this unit.
4078			 */
4079			if ((mddb_getrecsize(recid) >= MD_EFI_PARTNAME_BYTES) &&
4080			    (un->c.un_flag & MD_EFILABEL))  {
4081				v = mddb_getrecaddr(recid);
4082				bcopy((caddr_t)&efi_part->efi_gpe_PartitionName,
4083				    v, MD_EFI_PARTNAME_BYTES);
4084				mddb_commitrec_wrapper(recid);
4085				return (0);
4086			}
4087
4088			un->c.un_vtoc_id = 0;
4089			mddb_commitrec_wrapper(un->c.un_record_id);
4090			mddb_deleterec_wrapper(recid);
4091		}
4092	}
4093
4094	recid = mddb_createrec(MD_EFI_PARTNAME_BYTES, MDDB_EFILABEL, 0,
4095	    MD_CRO_32BIT, MD_UN2SET(un));
4096
4097	if (recid < 0) {
4098		return (ENOSPC);
4099	}
4100
4101	recids[0] = recid;
4102	recids[1] = un->c.un_record_id;
4103	recids[2] = 0;
4104	v = mddb_getrecaddr(recid);
4105	bcopy((caddr_t)&efi_part->efi_gpe_PartitionName, v,
4106	    MD_EFI_PARTNAME_BYTES);
4107
4108	un->c.un_vtoc_id = recid;
4109	un->c.un_flag |= MD_EFILABEL;
4110	mddb_commitrecs_wrapper(recids);
4111	return (0);
4112}
4113
4114int
4115md_dkiocgetefi(minor_t mnum, void *data, int mode)
4116{
4117	dk_efi_t	efi;
4118	caddr_t		*buf;
4119	int		rval = 0;
4120	mdi_unit_t	*ui;
4121	md_unit_t	*mdun;
4122
4123	if (!(mode & FREAD))
4124		return (EACCES);
4125
4126	if (ddi_copyin(data, &efi, sizeof (dk_efi_t), mode))
4127		return (EFAULT);
4128
4129	efi.dki_data = (void *)(uintptr_t)efi.dki_data_64;
4130
4131	/*
4132	 * If the user specified a zero length or a null pointer, we give them
4133	 * the number of bytes to alloc in user land.
4134	 */
4135	if (efi.dki_length == 0 || efi.dki_data == NULL) {
4136		efi.dki_length = MD_EFI_LABEL_SIZE;
4137		if (ddi_copyout(&efi, data, sizeof (dk_efi_t), mode))
4138			return (EFAULT);
4139		return (0);
4140	}
4141	/* Bad size specified, better not answer to that query */
4142	if (efi.dki_length < MD_EFI_LABEL_SIZE)
4143		return (EINVAL);
4144
4145	if ((ui = MDI_UNIT(mnum)) == NULL)
4146		return (ENXIO);
4147
4148	/*
4149	 * We don't want to allocate as much bytes as we are told,
4150	 * because we know the good size is MD_EFI_LABEL_SIZE
4151	 */
4152	efi.dki_length = MD_EFI_LABEL_SIZE;
4153	buf = kmem_zalloc(MD_EFI_LABEL_SIZE, KM_SLEEP);
4154
4155	mdun = (md_unit_t *)md_unit_readerlock(ui);
4156	md_get_efi(mdun, (char *)buf);
4157	md_unit_readerexit(ui);
4158
4159	if (ddi_copyout(buf, efi.dki_data, efi.dki_length, mode))
4160		rval = EFAULT;
4161
4162	kmem_free(buf, MD_EFI_LABEL_SIZE);
4163	return (rval);
4164}
4165
4166int
4167md_dkiocsetefi(minor_t mnum, void *data, int mode)
4168{
4169	dk_efi_t	efi;
4170	caddr_t		*buf;
4171	int		rval = 0;
4172	mdi_unit_t	*ui;
4173	md_unit_t	*mdun;
4174
4175	if (!(mode & FREAD))
4176		return (EACCES);
4177
4178	if ((ui = MDI_UNIT(mnum)) == NULL)
4179		return (ENXIO);
4180
4181	if (ddi_copyin(data, &efi, sizeof (dk_efi_t), mode))
4182		return (EFAULT);
4183
4184	efi.dki_data = (void *)(uintptr_t)efi.dki_data_64;
4185
4186	/* Sanity check of the skeleton */
4187	if ((efi.dki_length > sizeof (efi_gpt_t) + EFI_MIN_ARRAY_SIZE) ||
4188	    (efi.dki_length < sizeof (efi_gpt_t) + sizeof (efi_gpe_t)) ||
4189	    (efi.dki_data == NULL))
4190		return (EINVAL);
4191
4192	/*
4193	 * It's only a real EFI label if the location is 1
4194	 * in all other cases, we do nothing but say we did.
4195	 */
4196	if (efi.dki_lba != 1)
4197		return (0);	/* success */
4198
4199	buf = kmem_alloc(efi.dki_length, KM_SLEEP);
4200	/* And here we copy in the real data */
4201	if (ddi_copyin(efi.dki_data, buf, efi.dki_length, mode)) {
4202		rval = EFAULT;
4203	} else {
4204		mdun = (md_unit_t *)md_unit_readerlock(ui);
4205		rval = md_set_efi(mdun, (char *)buf);
4206		md_unit_readerexit(ui);
4207	}
4208
4209	kmem_free(buf, efi.dki_length);
4210	return (rval);
4211}
4212
4213/*
4214 * md_dkiocpartition()
4215 * Return the appropriate partition64 structure for a given metadevice.
4216 *
4217 * Actually the only real information being returned is the number of blocks
4218 * of the specified metadevice.
4219 * The starting block is always 0, and so is the partition number, because
4220 * metadevices don't have slices.
4221 *
4222 * This function is generic for all types of metadevices.
4223 */
4224int
4225md_dkiocpartition(minor_t mnum, void *data, int mode)
4226{
4227	struct partition64	p64;
4228	mdi_unit_t		*ui;
4229	md_unit_t		*un;
4230	int			rval = 0;
4231
4232	if (!(mode & FREAD))
4233		return (EACCES);
4234
4235
4236	if ((ui = MDI_UNIT(mnum)) == NULL)
4237		return (ENXIO);
4238
4239	if (ddi_copyin(data, &p64, sizeof (struct partition64), mode))
4240		return (EFAULT);
4241
4242	if (p64.p_partno != 0)
4243		return (ESRCH);
4244
4245	un = (md_unit_t *)md_unit_readerlock(ui);
4246	/* All metadevices share the same PartitionTypeGUID (see md_get_efi) */
4247	UUID_LE_CONVERT(p64.p_type, md_efi_reserved);
4248
4249	p64.p_partno = 0;
4250	p64.p_start = 0;
4251	p64.p_size = un->c.un_total_blocks;
4252	md_unit_readerexit(ui);
4253
4254	if (ddi_copyout(&p64, data, sizeof (struct partition64), mode)) {
4255		rval = EFAULT;
4256	}
4257
4258	return (rval);
4259}
4260
4261
4262/*
4263 * Remove device node
4264 */
4265void
4266md_remove_minor_node(minor_t mnum)
4267{
4268	char			name[16];
4269	extern dev_info_t	*md_devinfo;
4270
4271	/*
4272	 * Attempt release of its minor node
4273	 */
4274	(void) snprintf(name, sizeof (name), "%d,%d,blk", MD_MIN2SET(mnum),
4275	    MD_MIN2UNIT(mnum));
4276	ddi_remove_minor_node(md_devinfo, name);
4277
4278	(void) snprintf(name, sizeof (name), "%d,%d,raw", MD_MIN2SET(mnum),
4279	    MD_MIN2UNIT(mnum));
4280	ddi_remove_minor_node(md_devinfo, name);
4281}
4282