1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25/*
26 * Metadevice diskset utility.
27 */
28
29#include <meta.h>
30#include <sys/lvm/md_mddb.h>
31#include <sdssc.h>
32
33enum metaset_cmd {
34	notspecified,
35	add,
36	balance,
37	delete,
38	cluster,
39	isowner,
40	purge,
41	query,
42	release,
43	take,
44	join,			/* Join a multinode diskset */
45	withdraw		/* Withdraw from a multinode diskset */
46};
47
48enum cluster_cmd {
49	ccnotspecified,
50	clusterversion,		/* Return the version of the cluster I/F */
51	clusterdisksin,		/* List disks in a given diskset */
52	clustertake,		/* back door for Cluster take */
53	clusterrelease,		/* ditto */
54	clusterpurge,		/* back door for Cluster purge */
55	clusterproxy		/* proxy the args after '--' to primary */
56};
57
58static void
59usage(
60	mdsetname_t	*sp,
61	char		*string)
62{
63	if ((string != NULL) && (*string != '\0'))
64		md_eprintf("%s\n", string);
65	(void) fprintf(stderr, gettext(
66	    "usage:\t%s -s setname -a [-A enable | disable] -h hostname ...\n"
67	    "	%s -s setname -a [-M] -h hostname ...\n"
68	    "	%s -s setname -a [-M] [-l length] [-L] drivename ...\n"
69	    "	%s -s setname -d [-M] -h hostname ...\n"
70	    "	%s -s setname -d [-M] -f -h all-hostnames\n"
71	    "	%s -s setname -d [-M] [-f] drivename ...\n"
72	    "	%s -s setname -d [-M] [-f] hostname ...\n"
73	    "	%s -s setname -A enable | disable\n"
74	    "	%s -s setname -t [-f]\n"
75	    "	%s -s setname -r\n"
76	    "	%s [-s setname] -j [-M]\n"
77	    "	%s [-s setname] -w [-M]\n"
78	    "	%s -s setname -P [-M]\n"
79	    "	%s -s setname -b [-M]\n"
80	    "	%s -s setname -o [-M] [-h hostname]\n"
81	    "	%s [-s setname]\n"
82	    "\n"
83	    "		hostname = contents of /etc/nodename\n"
84	    "		drivename = cNtNdN no slice\n"
85	    "		[-M] for multi-owner set is optional except"
86	    " on set creation\n"),
87	    myname, myname, myname, myname, myname, myname, myname, myname,
88	    myname, myname, myname, myname, myname, myname, myname, myname);
89	md_exit(sp, (string == NULL) ? 0 : 1);
90}
91
92/*
93 * The svm.sync rc script relies heavily on the metaset output.
94 * Any changes to the metaset output MUST verify that the rc script
95 * does not break. Not doing so may potentially leave the system
96 * unusable. You have been WARNED.
97 */
98static int
99printset(mdsetname_t *sp, md_error_t *ep)
100{
101	int			i, j;
102	md_set_desc		*sd;
103	md_drive_desc		*dd, *p;
104	int			max_meds;
105	md_mnnode_desc		*nd;
106
107	if ((sd = metaget_setdesc(sp, ep)) == NULL)
108		return (-1);
109
110	/*
111	 * Only get set owner information for traditional diskset.
112	 * This set owner information is stored in the node records
113	 * for a MN diskset.
114	 */
115	if (!(MD_MNSET_DESC(sd))) {
116		if (metaget_setownership(sp, ep) == -1)
117			return (-1);
118	}
119
120	if (((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST),
121	    ep)) == NULL) && !mdisok(ep))
122		return (-1);
123
124	if (MD_MNSET_DESC(sd)) {
125		(void) printf(gettext(
126		"\nMulti-owner Set name = %s, Set number = %d, Master = %s\n"),
127		    sp->setname, sp->setno, sd->sd_mn_master_nodenm);
128		if ((sd->sd_mn_master_nodeid == MD_MN_INVALID_NID) &&
129		    (dd != NULL)) {
130			(void) printf(gettext(
131			    "Master and owner information unavailable "
132			    "until joined (metaset -j)\n"));
133		}
134	} else {
135		(void) printf(gettext(
136		    "\nSet name = %s, Set number = %d\n"),
137		    sp->setname, sp->setno);
138	}
139
140	if (MD_MNSET_DESC(sd)) {
141		(void) printf(gettext("\n%-19.19s %-14.14s %-6.6s\n"),
142		    gettext("Host"), gettext("Owner"), gettext("Member"));
143		nd = sd->sd_nodelist;
144		while (nd) {
145			/*
146			 * Don't print nodes that aren't ok since they may be
147			 * removed from config during a reconfig cycle.  If a
148			 * node was being added to a diskset and the entire
149			 * cluster went down but the node being added was unable
150			 * to reboot, there's no way to know if that node had
151			 * its own node record set to OK or not.  So, node
152			 * record is left in ADD state during reconfig cycle.
153			 * When that node reboots and returns to the cluster,
154			 * the reconfig cycle will either remove the node
155			 * record (if not marked OK on that node) or will mark
156			 * it OK on all nodes.
157			 * It is very important to only remove a node record
158			 * from the other nodes when that node record is not
159			 * marked OK on its own node - otherwise, different
160			 * nodes would have different nodelists possibly
161			 * causing different nodes to to choose different
162			 * masters.
163			 *
164			 * Standard hostname field is 17 bytes but metaset
165			 * will display up to MD_MAX_NODENAME, defined in
166			 * meta_basic.h
167			 */
168			if (!(nd->nd_flags & MD_MN_NODE_OK)) {
169				nd = nd->nd_next;
170				continue;
171			}
172			if ((nd->nd_flags & MD_MN_NODE_ALIVE) &&
173			    (nd->nd_flags & MD_MN_NODE_OWN)) {
174				(void) printf(
175				    gettext("  %-17.*s  %-12.12s  %-4.4s\n"),
176				    MD_MAX_NODENAME,
177				    nd->nd_nodename, gettext("multi-owner"),
178				    gettext("Yes"));
179			} else if ((!(nd->nd_flags & MD_MN_NODE_ALIVE)) &&
180			    (nd->nd_flags & MD_MN_NODE_OWN)) {
181				/* Should never be able to happen */
182				(void) printf(
183				    gettext("  %-17.*s  %-12.12s  %-4.4s\n"),
184				    MD_MAX_NODENAME,
185				    nd->nd_nodename, gettext("multi-owner"),
186				    gettext("No"));
187			} else if ((nd->nd_flags & MD_MN_NODE_ALIVE) &&
188			    (!(nd->nd_flags & MD_MN_NODE_OWN))) {
189				(void) printf(
190				    gettext("  %-17.*s  %-12.12s  %-4.4s\n"),
191				    MD_MAX_NODENAME,
192				    nd->nd_nodename, gettext(""),
193				    gettext("Yes"));
194			} else if ((!(nd->nd_flags & MD_MN_NODE_ALIVE)) &&
195			    (!(nd->nd_flags & MD_MN_NODE_OWN))) {
196				(void) printf(
197				    gettext("  %-17.*s  %-12.12s  %-4.4s\n"),
198				    MD_MAX_NODENAME,
199				    nd->nd_nodename, gettext(""),
200				    gettext("No"));
201			}
202			nd = nd->nd_next;
203		}
204	} else {
205		(void) printf("\n%-19.19s %-5.5s\n",
206		    gettext("Host"), gettext("Owner"));
207		for (i = 0; i < MD_MAXSIDES; i++) {
208			/* Skip empty slots */
209			if (sd->sd_nodes[i][0] == '\0')
210				continue;
211
212			/*
213			 * Standard hostname field is 17 bytes but metaset will
214			 * display up to MD_MAX_NODENAME, def in meta_basic.h
215			 */
216			(void) printf("  %-17.*s  %s\n", MD_MAX_NODENAME,
217			    sd->sd_nodes[i], (sd->sd_flags & MD_SR_AUTO_TAKE ?
218			    (sd->sd_isown[i] ? gettext("Yes (auto)") :
219			    gettext("No (auto)"))
220			    : (sd->sd_isown[i] ? gettext("Yes") : "")));
221		}
222	}
223
224	if (sd->sd_med.n_cnt > 0)
225		(void) printf("\n%-19.19s %-7.7s\n",
226		    gettext("Mediator Host(s)"), gettext("Aliases"));
227
228	if ((max_meds = get_max_meds(ep)) == 0)
229		return (-1);
230
231	for (i = 0; i < max_meds; i++) {
232		if (sd->sd_med.n_lst[i].a_cnt == 0)
233			continue;
234		/*
235		 * Standard hostname field is 17 bytes but metaset will
236		 * display up to MD_MAX_NODENAME, def in meta_basic.h
237		 */
238		(void) printf("  %-17.*s   ", MD_MAX_NODENAME,
239		    sd->sd_med.n_lst[i].a_nm[0]);
240		for (j = 1; j < sd->sd_med.n_lst[i].a_cnt; j++) {
241			(void) printf("%s", sd->sd_med.n_lst[i].a_nm[j]);
242			if (sd->sd_med.n_lst[i].a_cnt - j > 1)
243				(void) printf(gettext(", "));
244		}
245		(void) printf("\n");
246	}
247
248	if (dd) {
249		int	len = 0;
250
251
252		/*
253		 * Building a format string on the fly that will
254		 * be used in (f)printf. This allows the length
255		 * of the ctd to vary from small to large without
256		 * looking horrible.
257		 */
258		for (p = dd; p != NULL; p = p->dd_next)
259			len = max(len, strlen(p->dd_dnp->cname));
260
261		len += 2;
262		(void) printf("\n%-*.*s %-5.5s\n", len, len,
263		    gettext("Drive"),
264		    gettext("Dbase"));
265		for (p = dd; p != NULL; p = p->dd_next) {
266			(void) printf("\n%-*.*s %-5.5s\n", len, len,
267			    p->dd_dnp->cname,
268			    (p->dd_dbcnt ? gettext("Yes") :
269			    gettext("No")));
270		}
271	}
272
273	return (0);
274}
275
276static int
277printsets(mdsetname_t *sp, md_error_t *ep)
278{
279	int			i;
280	mdsetname_t		*sp1;
281	set_t			max_sets;
282
283	/*
284	 * print setname given.
285	 */
286	if (! metaislocalset(sp)) {
287		if (printset(sp, ep))
288			return (-1);
289		return (0);
290	}
291
292	if ((max_sets = get_max_sets(ep)) == 0)
293		return (-1);
294
295	/*
296	 * Print all known sets
297	 */
298	for (i = 1; i < max_sets; i++) {
299		if ((sp1 = metasetnosetname(i, ep)) == NULL) {
300			if (! mdiserror(ep, MDE_NO_SET))
301				break;
302			mdclrerror(ep);
303			continue;
304		}
305
306		if (printset(sp1, ep))
307			break;
308	}
309	if (! mdisok(ep))
310		return (-1);
311
312	return (0);
313}
314
315/*
316 * Print the current versionn of the cluster contract private interface.
317 */
318static void
319printclusterversion()
320{
321	(void) printf("%s\n", METASETIFVERSION);
322}
323
324/*
325 * Print the disks that make up the given disk set. This is used
326 * exclusively by Sun Cluster and is contract private.
327 * Should never be called with sname of a Multinode diskset.
328 */
329static int
330printdisksin(char *sname, md_error_t *ep)
331{
332	mdsetname_t	*sp;
333	md_drive_desc	*dd, *p;
334
335	if ((sp = metasetname(sname, ep)) == NULL) {
336
337		/*
338		 * During a deletion of a set the associated service is
339		 * put offline. The SC3.0 reservation code calls disksuite
340		 * to find a list of disks associated with the set so that
341		 * it can release the reservation on those disks. In this
342		 * case there won't be any disks or even a set left. So just
343		 * return.
344		 */
345		return (0);
346	}
347
348	if (metaget_setownership(sp, ep) == -1)
349		return (-1);
350
351	if (((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST),
352	    ep)) == NULL) && !mdisok(ep))
353		return (-1);
354
355	for (p = dd; p != NULL; p = p->dd_next)
356		(void) printf("%s\n", p->dd_dnp->rname);
357
358	return (0);
359}
360
361static void
362parse_printset(int argc, char **argv)
363{
364	int		c;
365	mdsetname_t	*sp = NULL;
366	char		*sname = MD_LOCAL_NAME;
367	md_error_t	status = mdnullerror;
368	md_error_t	*ep = &status;
369
370	/* reset and parse args */
371	optind = 1;
372	opterr = 1;
373	while ((c = getopt(argc, argv, "s:")) != -1) {
374		switch (c) {
375		case 's':
376			sname = optarg;
377			break;
378		default:
379			usage(sp, gettext("unknown options"));
380		}
381	}
382
383	argc -= optind;
384	argv += optind;
385
386	if (argc != 0)
387		usage(sp, gettext("too many args"));
388
389	if ((sp = metasetname(sname, ep)) == NULL) {
390		mde_perror(ep, "");
391		md_exit(sp, 1);
392	}
393
394	if (printsets(sp, ep) && !mdiserror(ep, MDE_SMF_NO_SERVICE)) {
395		mde_perror(ep, "");
396		md_exit(sp, 1);
397	}
398
399	if (meta_smf_isonline(meta_smf_getmask(), ep) == 0) {
400		mde_perror(ep, "");
401		md_exit(sp, 1);
402	}
403
404	md_exit(sp, 0);
405}
406
407static void
408parse_add(int argc, char **argv)
409{
410	int			c, created_set;
411	int			hosts = FALSE;
412	int			meds = FALSE;
413	int			auto_take = FALSE;
414	int			force_label = FALSE;
415	int			default_size = TRUE;
416	mdsetname_t		*sp = NULL;
417	char			*sname = MD_LOCAL_NAME;
418	md_error_t		status = mdnullerror;
419	md_error_t		 *ep = &status;
420	mddrivenamelist_t	*dnlp = NULL;
421	mddrivenamelist_t	*p;
422	daddr_t			dbsize, nblks;
423	mdsetname_t		*local_sp = NULL;
424	int			multi_node = 0;
425	md_set_desc		*sd;
426	rval_e			sdssc_rval;
427
428	/* reset and parse args */
429	optind = 1;
430	opterr = 1;
431	while ((c = getopt(argc, argv, "MaA:hl:Lms:")) != -1) {
432		switch (c) {
433		case 'M':
434			multi_node = 1;
435			break;
436		case 'A':
437			/* verified sub-option in main */
438			if (strcmp(optarg, "enable") == 0)
439				auto_take = TRUE;
440			break;
441		case 'a':
442			break;
443		case 'h':
444		case 'm':
445			if (meds == TRUE || hosts == TRUE)
446				usage(sp, gettext(
447				    "only one -m or -h option allowed"));
448
449			if (default_size == FALSE || force_label == TRUE)
450				usage(sp, gettext(
451				    "conflicting options"));
452
453			if (c == 'h')
454				hosts = TRUE;
455			else
456				meds = TRUE;
457			break;
458		case 'l':
459			if (hosts == TRUE || meds == TRUE)
460				usage(sp, gettext(
461				    "conflicting options"));
462			if (sscanf(optarg, "%ld", &dbsize) != 1) {
463				md_eprintf(gettext(
464				    "%s: bad format\n"), optarg);
465				usage(sp, "");
466			}
467
468			default_size = FALSE;
469			break;
470		case 'L':
471			/* Same criteria as -l */
472			if (hosts == TRUE || meds == TRUE)
473				usage(sp, gettext(
474				    "conflicting options"));
475			force_label = TRUE;
476			break;
477		case 's':
478			sname = optarg;
479			break;
480		default:
481			usage(sp, gettext(
482			    "unknown options"));
483		}
484	}
485
486	/* Can only use -A enable when creating the single-node set */
487	if (auto_take && hosts != TRUE)
488		usage(sp, gettext("conflicting options"));
489
490	argc -= optind;
491	argv += optind;
492
493	/*
494	 * Add hosts
495	 */
496	if (hosts == TRUE) {
497
498		if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
499			mde_perror(ep, "");
500			md_exit(local_sp, 1);
501		}
502
503		if (meta_lock(local_sp, TRUE, ep) != 0) {
504			mde_perror(ep, "");
505			md_exit(local_sp, 1);
506		}
507
508		/*
509		 * Keep track of Cluster set creation. Need to complete
510		 * the transaction no matter if the set was created or not.
511		 */
512		created_set = 0;
513
514		/*
515		 * Have no set, cannot take the lock, so only take the
516		 * local lock.
517		 */
518		if ((sp = metasetname(sname, ep)) == NULL) {
519			sdssc_rval = 0;
520			if (multi_node) {
521				/*
522				 * When running on a cluster system that
523				 * does not support MN disksets, the routine
524				 * sdssc_mo_create_begin will be bound
525				 * to the SVM routine not_bound_error
526				 * which returns SDSSC_NOT_BOUND_ERROR.
527				 *
528				 * When running on a cluster system that
529				 * does support MN disksets, the routine
530				 * sdssc_mo_create_begin will be bound to
531				 * the sdssc_mo_create_begin routine in
532				 * library libsdssc_so.  A call to
533				 * sdssc_mo_create_begin will return with
534				 * either SDSSC_ERROR or SDSSC_OKAY. If
535				 * an SDSSC_OKAY is returned, then the
536				 * cluster framework has allocated a
537				 * set number for this new set that is unique
538				 * across traditional and MN disksets.
539				 * Libmeta will get this unique set number
540				 * by calling sdssc_get_index.
541				 *
542				 * When running on a non-cluster system,
543				 * the routine sdssc_mo_create_begin
544				 * will be bound to the SVM routine
545				 * not_bound which returns SDSSC_NOT_BOUND.
546				 * In this case, all sdssc routines will
547				 * return SDSSC_NOT_BOUND.  No need to check
548				 * for return value of SDSSC_NOT_BOUND since
549				 * the libmeta call to get the set number
550				 * (sdssc_get_index) will also fail with
551				 * SDSSC_NOT_BOUND causing libmeta to
552				 * determine its own set number.
553				 */
554				sdssc_rval = sdssc_mo_create_begin(sname, argc,
555				    argv, SDSSC_PICK_SETNO);
556				if (sdssc_rval == SDSSC_NOT_BOUND_ERROR) {
557					(void) mderror(ep, MDE_NOT_MN, NULL);
558					mde_perror(ep,
559					"Cluster node does not support "
560					"multi-owner diskset operations");
561					md_exit(local_sp, 1);
562				} else if (sdssc_rval == SDSSC_ERROR) {
563					mde_perror(ep, "");
564					md_exit(local_sp, 1);
565				}
566			} else {
567				sdssc_rval = sdssc_create_begin(sname, argc,
568				    argv, SDSSC_PICK_SETNO);
569				if (sdssc_rval == SDSSC_ERROR) {
570					mde_perror(ep, "");
571					md_exit(local_sp, 1);
572				}
573			}
574			/*
575			 * Created diskset (as opposed to adding a
576			 * host to an existing diskset).
577			 */
578			created_set = 1;
579
580			sp = Zalloc(sizeof (*sp));
581			sp->setname = Strdup(sname);
582			sp->lockfd = MD_NO_LOCK;
583			mdclrerror(ep);
584		} else {
585			if ((sd = metaget_setdesc(sp, ep)) == NULL) {
586				mde_perror(ep, "");
587				md_exit(local_sp, 1);
588			}
589			if (MD_MNSET_DESC(sd)) {
590				multi_node = 1;
591			}
592
593			/*
594			 * can't add hosts to an existing set & enable
595			 * auto-take
596			 */
597			if (auto_take)
598				usage(sp, gettext("conflicting options"));
599
600			/*
601			 * Have a valid set, take the set lock also.
602			 *
603			 * A MN diskset does not use the set meta_lock but
604			 * instead uses the clnt_lock of rpc.metad and the
605			 * suspend/resume feature of the rpc.mdcommd.  Can't
606			 * use set meta_lock since class 1 messages are
607			 * grabbing this lock and if this thread is holding
608			 * the set meta_lock then no rpc.mdcommd suspend
609			 * can occur.
610			 */
611			if (!multi_node) {
612				if (meta_lock(sp, TRUE, ep) != 0) {
613					mde_perror(ep, "");
614					md_exit(local_sp, 1);
615				}
616			}
617		}
618
619		if (meta_set_addhosts(sp, multi_node, argc, argv, auto_take,
620		    ep)) {
621			if (created_set)
622				sdssc_create_end(sname, SDSSC_CLEANUP);
623			mde_perror(&status, "");
624			if (!multi_node)
625				(void) meta_unlock(sp, ep);
626			md_exit(local_sp, 1);
627		}
628
629		if (created_set)
630			sdssc_create_end(sname, SDSSC_COMMIT);
631
632		else {
633			/*
634			 * If adding hosts to existing diskset,
635			 * call DCS svcs
636			 */
637			sdssc_add_hosts(sname, argc, argv);
638		}
639		if (!multi_node)
640			(void) meta_unlock(sp, ep);
641		md_exit(local_sp, 0);
642	}
643
644	/*
645	 * Add mediators
646	 */
647	if (meds == TRUE) {
648
649		if ((sp = metasetname(sname, ep)) == NULL) {
650			mde_perror(ep, "");
651			md_exit(local_sp, 1);
652		}
653
654		if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
655			mde_perror(ep, "");
656			md_exit(local_sp, 1);
657		}
658
659		if ((sd = metaget_setdesc(sp, ep)) == NULL) {
660			mde_perror(ep, "");
661			md_exit(local_sp, 1);
662		}
663		if (MD_MNSET_DESC(sd)) {
664			multi_node = 1;
665		}
666
667		if (meta_lock(local_sp, TRUE, ep) != 0) {
668			mde_perror(ep, "");
669			md_exit(local_sp, 1);
670		}
671		/*
672		 * A MN diskset does not use the set meta_lock but
673		 * instead uses the clnt_lock of rpc.metad and the
674		 * suspend/resume feature of the rpc.mdcommd.  Can't
675		 * use set meta_lock since class 1 messages are
676		 * grabbing this lock and if this thread is holding
677		 * the set meta_lock then no rpc.mdcommd suspend
678		 * can occur.
679		 */
680		if (!multi_node) {
681			if (meta_lock(sp, TRUE, ep) != 0) {
682				mde_perror(ep, "");
683				md_exit(local_sp, 1);
684			}
685		}
686
687		if (meta_set_addmeds(sp, argc, argv, ep)) {
688			mde_perror(&status, "");
689			if (!multi_node)
690				(void) meta_unlock(sp, ep);
691			md_exit(local_sp, 1);
692		}
693
694		if (!multi_node)
695			(void) meta_unlock(sp, ep);
696		md_exit(local_sp, 0);
697	}
698
699	/*
700	 * Add drives
701	 */
702	if ((sp = metasetname(sname, ep)) == NULL) {
703		mde_perror(ep, "");
704		md_exit(local_sp, 1);
705	}
706
707	if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
708		mde_perror(ep, "");
709		md_exit(local_sp, 1);
710	}
711
712	/* Determine if diskset is a MN diskset or not */
713	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
714		mde_perror(ep, "");
715		md_exit(local_sp, 1);
716	}
717	if (MD_MNSET_DESC(sd)) {
718		multi_node = 1;
719	}
720
721	if (meta_lock(local_sp, TRUE, ep) != 0) {
722		mde_perror(ep, "");
723		md_exit(local_sp, 1);
724	}
725
726	/* Make sure database size is within limits */
727	if (default_size == FALSE) {
728		if ((multi_node && dbsize < MDDB_MN_MINBLKS) ||
729		    (!multi_node && dbsize < MDDB_MINBLKS))
730			usage(sp, gettext(
731			    "size (-l) is too small"));
732
733		if ((multi_node && dbsize > MDDB_MN_MAXBLKS) ||
734		    (!multi_node && dbsize > MDDB_MAXBLKS))
735			usage(sp, gettext(
736			    "size (-l) is too big"));
737	}
738
739	/*
740	 * Have a valid set, take the set lock also.
741	 *
742	 * A MN diskset does not use the set meta_lock but
743	 * instead uses the clnt_lock of rpc.metad and the
744	 * suspend/resume feature of the rpc.mdcommd.  Can't
745	 * use set meta_lock since class 1 messages are
746	 * grabbing this lock and if this thread is holding
747	 * the set meta_lock then no rpc.mdcommd suspend
748	 * can occur.
749	 */
750	if (!multi_node) {
751		if (meta_lock(sp, TRUE, ep) != 0) {
752			mde_perror(ep, "");
753			md_exit(local_sp, 1);
754		}
755	}
756
757
758	/*
759	 * If using the default size,
760	 *   then let's adjust the default to the minimum
761	 *   size currently in use.
762	 */
763	if (default_size) {
764		dbsize = multi_node ? MD_MN_DBSIZE : MD_DBSIZE;
765		if ((nblks = meta_db_minreplica(sp, ep)) < 0)
766			mdclrerror(ep);
767		else
768			dbsize = nblks;	/* adjust replica size */
769	}
770
771	if ((c = metadrivenamelist(&sp, &dnlp, argc, argv, ep)) < 0) {
772		mde_perror(ep, "");
773		if (!multi_node)
774			(void) meta_unlock(sp, ep);
775		md_exit(local_sp, 1);
776	}
777
778	if (c == 0) {
779		md_perror(gettext(
780		    "No drives specified to add.\n"));
781		if (!multi_node)
782			(void) meta_unlock(sp, ep);
783		md_exit(local_sp, 1);
784	}
785
786	if (meta_set_adddrives(sp, dnlp, dbsize, force_label, ep)) {
787		metafreedrivenamelist(dnlp);
788		mde_perror(ep, "");
789		if (!multi_node)
790			(void) meta_unlock(sp, ep);
791		md_exit(local_sp, 1);
792	}
793
794	/*
795	 * MN disksets don't have a device id in the master block
796	 * For traditional disksets, check for the drive device
797	 * id not fitting in the master block
798	 */
799	if (!multi_node) {
800		for (p = dnlp; p != NULL; p = p->next) {
801			int 		fd;
802			ddi_devid_t	devid;
803			mdname_t	*np;
804
805			np = metaslicename(p->drivenamep, 0, ep);
806			if (np == NULL)
807				continue;
808
809			if ((fd = open(np->rname, O_RDONLY | O_NDELAY)) < 0)
810				continue;
811
812			if (devid_get(fd, &devid) == 0) {
813				size_t len;
814
815				len = devid_sizeof(devid);
816				if (len > (DEV_BSIZE - sizeof (mddb_mb_t)))
817					(void) mddserror(ep,
818					    MDE_DS_NOTSELFIDENTIFY, NULL, NULL,
819					    np->rname, NULL);
820				devid_free(devid);
821			} else {
822				(void) mddserror(ep, MDE_DS_NOTSELFIDENTIFY,
823				    NULL, NULL, np->rname, NULL);
824			}
825			(void) close(fd);
826		}
827	}
828
829	/*
830	 * MN disksets don't use DCS clustering services.
831	 * For traditional disksets:
832	 * There's not really much we can do here if this call fails.
833	 * The drives have been added to the set and DiskSuite believes
834	 * it owns the drives.
835	 * Relase the set and hope for the best.
836	 */
837	if ((!multi_node) &&
838	    (sdssc_notify_service(sname, Make_Primary) == SDSSC_ERROR)) {
839		(void) meta_set_release(sp, ep);
840		(void) printf(gettext(
841		    "Sun Clustering failed to make set primary\n"));
842	}
843
844	metafreedrivenamelist(dnlp);
845	if (!multi_node)
846		(void) meta_unlock(sp, ep);
847	md_exit(local_sp, 0);
848}
849
850static void
851parse_balance(int argc, char **argv)
852{
853	int		c;
854	mdsetname_t	*sp = NULL;
855	char		*sname = MD_LOCAL_NAME;
856	md_error_t	status = mdnullerror;
857	md_set_desc	*sd;
858	int		multi_node = 0;
859
860	/* reset and parse args */
861	optind = 1;
862	opterr = 1;
863	while ((c = getopt(argc, argv, "Mbs:")) != -1) {
864		switch (c) {
865		case 'M':
866			break;
867		case 'b':
868			break;
869		case 's':
870			sname = optarg;
871			break;
872		default:
873			usage(sp, gettext("unknown options"));
874		}
875	}
876
877	argc -= optind;
878	argv += optind;
879
880	if (argc != 0)
881		usage(sp, gettext("too many args"));
882
883	if ((sp = metasetname(sname, &status)) == NULL) {
884		mde_perror(&status, "");
885		md_exit(sp, 1);
886	}
887	if ((sd = metaget_setdesc(sp, &status)) == NULL) {
888		mde_perror(&status, "");
889		md_exit(sp, 1);
890	}
891	if (MD_MNSET_DESC(sd)) {
892		multi_node = 1;
893	}
894	/*
895	 * Have a valid set, take the set lock also.
896	 *
897	 * A MN diskset does not use the set meta_lock but
898	 * instead uses the clnt_lock of rpc.metad and the
899	 * suspend/resume feature of the rpc.mdcommd.  Can't
900	 * use set meta_lock since class 1 messages are
901	 * grabbing this lock and if this thread is holding
902	 * the set meta_lock then no rpc.mdcommd suspend
903	 * can occur.
904	 */
905	if (!multi_node) {
906		if (meta_lock(sp, TRUE, &status) != 0) {
907			mde_perror(&status, "");
908			md_exit(sp, 1);
909		}
910	}
911
912	if (meta_set_balance(sp, &status) != 0) {
913		mde_perror(&status, "");
914		md_exit(sp, 1);
915	}
916	md_exit(sp, 0);
917}
918
919static void
920parse_autotake(int argc, char **argv)
921{
922	int			c;
923	int			enable = 0;
924	mdsetname_t		*sp = NULL;
925	char			*sname = MD_LOCAL_NAME;
926	md_error_t		status = mdnullerror;
927	md_error_t		*ep = &status;
928
929	/* reset and parse args */
930	optind = 1;
931	opterr = 1;
932	while ((c = getopt(argc, argv, "A:s:")) != -1) {
933		switch (c) {
934		case 'A':
935			/* verified sub-option in main */
936			if (strcmp(optarg, "enable") == 0)
937				enable = 1;
938			break;
939		case 's':
940			/* verified presence of setname in main */
941			sname = optarg;
942			break;
943		default:
944			usage(sp, gettext("unknown options"));
945		}
946	}
947
948	if ((sp = metasetname(sname, ep)) == NULL) {
949		mde_perror(ep, "");
950		md_exit(sp, 1);
951	}
952
953	if (meta_lock(sp, TRUE, ep) != 0) {
954		mde_perror(ep, "");
955		md_exit(sp, 1);
956	}
957
958	if (meta_check_ownership(sp, ep) != 0) {
959		mde_perror(ep, "");
960		md_exit(sp, 1);
961	}
962
963	if (meta_set_auto_take(sp, enable, ep) != 0) {
964		mde_perror(ep, "");
965		md_exit(sp, 1);
966	}
967
968	md_exit(sp, 0);
969}
970
971static void
972parse_del(int argc, char **argv)
973{
974	int			c;
975	mdsetname_t		*sp = NULL;
976	char			*sname = MD_LOCAL_NAME;
977	int			hosts = FALSE;
978	int			meds = FALSE;
979	int			forceflg = FALSE;
980	md_error_t		status = mdnullerror;
981	md_error_t		*ep = &status;
982	mddrivenamelist_t	*dnlp = NULL;
983	mdsetname_t		*local_sp = NULL;
984	md_set_desc		*sd;
985	int			multi_node = 0;
986
987	/* reset and parse args */
988	optind = 1;
989	opterr = 1;
990	while ((c = getopt(argc, argv, "Mdfhms:")) != -1) {
991		switch (c) {
992		case 'M':
993			break;
994		case 'd':
995			break;
996		case 'f':
997			forceflg = TRUE;
998			break;
999		case 'h':
1000		case 'm':
1001			if (meds == TRUE || hosts == TRUE)
1002				usage(sp, gettext(
1003				    "only one -m or -h option allowed"));
1004
1005			if (c == 'h')
1006				hosts = TRUE;
1007			else
1008				meds = TRUE;
1009			break;
1010		case 's':
1011			sname = optarg;
1012			break;
1013		default:
1014			usage(sp, gettext("unknown options"));
1015		}
1016	}
1017
1018	argc -= optind;
1019	argv += optind;
1020
1021	if ((sp = metasetname(sname, ep)) == NULL) {
1022		mde_perror(ep, "");
1023		md_exit(local_sp, 1);
1024	}
1025
1026	if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
1027		mde_perror(ep, "");
1028		md_exit(local_sp, 1);
1029	}
1030
1031	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
1032		mde_perror(ep, "");
1033		md_exit(local_sp, 1);
1034	}
1035	if (MD_MNSET_DESC(sd))
1036		multi_node = 1;
1037
1038	if (meta_lock(local_sp, TRUE, ep) != 0) {
1039		mde_perror(ep, "");
1040		md_exit(local_sp, 1);
1041	}
1042
1043	/*
1044	 * Have a valid set, take the set lock also.
1045	 *
1046	 * A MN diskset does not use the set meta_lock but
1047	 * instead uses the clnt_lock of rpc.metad and the
1048	 * suspend/resume feature of the rpc.mdcommd.  Can't
1049	 * use set meta_lock since class 1 messages are
1050	 * grabbing this lock and if this thread is holding
1051	 * the set meta_lock then no rpc.mdcommd suspend
1052	 * can occur.
1053	 */
1054	if (!multi_node) {
1055		if (meta_lock(sp, TRUE, ep) != 0) {
1056			mde_perror(ep, "");
1057			md_exit(local_sp, 1);
1058		}
1059	}
1060
1061	/*
1062	 * Delete hosts
1063	 */
1064	if (hosts == TRUE) {
1065		if (meta_check_ownership(sp, ep) != 0) {
1066			/*
1067			 * If we don't own the set bail out here otherwise
1068			 * we could delete the node from the DCS service
1069			 * yet not delete the host from the set.
1070			 */
1071			mde_perror(ep, "");
1072			if (!multi_node)
1073				(void) meta_unlock(sp, ep);
1074			md_exit(local_sp, 1);
1075		}
1076		if (sdssc_delete_hosts(sname, argc, argv) == SDSSC_ERROR) {
1077			if (!metad_isautotakebyname(sname)) {
1078				/*
1079				 * SC could have been installed after the set
1080				 * was created. We still want to be able to
1081				 * delete these sets.
1082				 */
1083				md_perror(gettext(
1084				    "Failed to delete hosts from DCS service"));
1085				if (!multi_node)
1086					(void) meta_unlock(sp, ep);
1087				md_exit(local_sp, 1);
1088			}
1089		}
1090		if (meta_set_deletehosts(sp, argc, argv, forceflg, ep)) {
1091			if (sdssc_add_hosts(sname, argc, argv) == SDSSC_ERROR) {
1092				(void) printf(gettext(
1093				    "Failed to restore host(s) in DCS "
1094				    "database\n"));
1095			}
1096			mde_perror(ep, "");
1097			if (!multi_node)
1098				(void) meta_unlock(sp, ep);
1099			md_exit(local_sp, 1);
1100		}
1101		if (!multi_node)
1102			(void) meta_unlock(sp, ep);
1103		md_exit(local_sp, 0);
1104	}
1105
1106	/*
1107	 * Delete mediators
1108	 */
1109	if (meds == TRUE) {
1110		if (meta_set_deletemeds(sp, argc, argv, forceflg, ep)) {
1111			mde_perror(ep, "");
1112			if (!multi_node)
1113				(void) meta_unlock(sp, ep);
1114			md_exit(local_sp, 1);
1115		}
1116		if (!multi_node)
1117			(void) meta_unlock(sp, ep);
1118		md_exit(local_sp, 0);
1119	}
1120
1121	/*
1122	 * Delete drives
1123	 */
1124
1125	if ((c = metadrivenamelist(&sp, &dnlp, argc, argv, ep)) < 0) {
1126		mde_perror(ep, "");
1127		if (!multi_node)
1128			(void) meta_unlock(sp, ep);
1129		md_exit(local_sp, 1);
1130	}
1131
1132	if (c == 0) {
1133		md_perror(gettext(
1134		    "No drives specified to delete.\n"));
1135		if (!multi_node)
1136			(void) meta_unlock(sp, ep);
1137		md_exit(local_sp, 1);
1138	}
1139
1140	if (meta_set_deletedrives(sp, dnlp, forceflg, ep)) {
1141		metafreedrivenamelist(dnlp);
1142		mde_perror(ep, "");
1143		if (!multi_node)
1144			(void) meta_unlock(sp, ep);
1145		md_exit(local_sp, 1);
1146	}
1147
1148	metafreedrivenamelist(dnlp);
1149	if (!multi_node)
1150		(void) meta_unlock(sp, ep);
1151	md_exit(local_sp, 0);
1152}
1153
1154static void
1155parse_isowner(int argc, char **argv)
1156{
1157	int		c;
1158	mdsetname_t	*sp = NULL;
1159	char		*sname = MD_LOCAL_NAME;
1160	md_error_t	status = mdnullerror;
1161	md_error_t	*ep = &status;
1162	char		*host = NULL;
1163
1164	/* reset and parse args */
1165	optind = 1;
1166	opterr = 1;
1167	while ((c = getopt(argc, argv, "Moh:s:")) != -1) {
1168		switch (c) {
1169		case 'M':
1170			break;
1171		case 'o':
1172			break;
1173		case 'h':
1174			if (host != NULL) {
1175				usage(sp, gettext(
1176				    "only one -h option allowed"));
1177			}
1178			host = optarg;
1179			break;
1180		case 's':
1181			sname = optarg;
1182			break;
1183		default:
1184			usage(sp, gettext("unknown options"));
1185		}
1186	}
1187
1188	argc -= optind;
1189	argv += optind;
1190
1191	if (argc != 0)
1192		usage(sp, gettext("too many args"));
1193
1194	if ((sp = metasetname(sname, ep)) == NULL) {
1195		mde_perror(ep, "");
1196		md_exit(sp, 1);
1197	}
1198
1199	if (host == NULL) {
1200		if (meta_check_ownership(sp, ep) != 0) {
1201			mde_perror(ep, "");
1202			md_exit(sp, 1);
1203		}
1204	} else {
1205		if (meta_check_ownership_on_host(sp, host, ep) != 0) {
1206			mde_perror(ep, "");
1207			md_exit(sp, 1);
1208		}
1209	}
1210	md_exit(sp, 0);
1211}
1212
1213static void
1214parse_purge(int argc, char **argv)
1215{
1216	int		c;
1217	mdsetname_t	*sp = NULL;
1218	mdsetname_t	*local_sp = NULL;
1219	md_drive_desc	*dd;
1220	char		*sname = MD_LOCAL_NAME;
1221	char		*thishost = mynode();
1222	md_error_t	status = mdnullerror;
1223	md_error_t	*ep = &status;
1224	int		bypass_cluster_purge = 0;
1225	int		forceflg = FALSE;
1226	int		ret = 0;
1227	int		multi_node = 0;
1228	md_set_desc		*sd;
1229
1230	optind = 1;
1231	opterr = 1;
1232	while ((c = getopt(argc, argv, "C:fPs:")) != -1) {
1233		switch (c) {
1234		case 'M':
1235			break;
1236		case 'C':
1237			bypass_cluster_purge = 1;
1238			break;
1239		case 'f':
1240			forceflg = TRUE;
1241			break;
1242		case 'P':
1243			break;
1244		case 's':
1245			sname = optarg;
1246			break;
1247		default:
1248			usage(sp, gettext("unknown options"));
1249		}
1250	}
1251
1252	argc -= optind;
1253	argv += optind;
1254
1255	if (argc != 0)
1256		usage(sp, gettext("too many arguments"));
1257
1258	if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
1259		mde_perror(ep, "");
1260		md_exit(local_sp, 1);
1261	}
1262
1263	if (meta_lock(local_sp, TRUE, ep) != 0) {
1264		mde_perror(ep, "");
1265		md_exit(local_sp, 1);
1266	}
1267
1268	if ((sp = metasetname(sname, ep)) == NULL) {
1269		mde_perror(ep, "");
1270		md_exit(sp, 1);
1271	}
1272
1273	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
1274		mde_perror(ep, "");
1275		md_exit(local_sp, 1);
1276	}
1277	if (MD_MNSET_DESC(sd))
1278		multi_node = 1;
1279
1280	if (!multi_node) {
1281		if (meta_lock(sp, TRUE, ep) != 0) {
1282			mde_perror(ep, "");
1283			md_exit(local_sp, 1);
1284		}
1285	}
1286
1287	/* Must not own the set if purging it from this host */
1288	if (meta_check_ownership(sp, ep) == 0) {
1289		/*
1290		 * Need to see if there are disks in the set, if not then
1291		 * there is no ownership but meta_check_ownership returns 0
1292		 */
1293		dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST), ep);
1294		if (!mdisok(ep)) {
1295			mde_perror(ep, "");
1296			if (!multi_node)
1297				(void) meta_unlock(sp, ep);
1298			md_exit(local_sp, 1);
1299		}
1300		if (dd != NULL) {
1301			(void) printf(gettext
1302			    ("Must not be owner of the set when purging it\n"));
1303			if (!multi_node)
1304				(void) meta_unlock(sp, ep);
1305			md_exit(local_sp, 1);
1306		}
1307	}
1308	/*
1309	 * Remove the node from the DCS service
1310	 */
1311	if (!bypass_cluster_purge) {
1312		if (sdssc_delete_hosts(sname, 1, &thishost) == SDSSC_ERROR) {
1313			md_perror(gettext
1314			    ("Failed to purge hosts from DCS service"));
1315			if (!multi_node)
1316				(void) meta_unlock(sp, ep);
1317			md_exit(local_sp, 1);
1318		}
1319	}
1320
1321	if ((ret = meta_set_purge(sp, bypass_cluster_purge, forceflg,
1322	    ep)) != 0) {
1323		if (!bypass_cluster_purge) {
1324			if (sdssc_add_hosts(sname, 1, &thishost) ==
1325			    SDSSC_ERROR) {
1326				(void) printf(gettext(
1327				    "Failed to restore host in DCS "
1328				    "database\n"));
1329			}
1330		}
1331		mde_perror(ep, "");
1332		if (!multi_node)
1333			(void) meta_unlock(sp, ep);
1334		md_exit(local_sp, ret);
1335	}
1336
1337	if (!multi_node)
1338		(void) meta_unlock(sp, ep);
1339	md_exit(local_sp, 0);
1340}
1341
1342static void
1343parse_query(int argc, char **argv)
1344{
1345	int		c;
1346	mdsetname_t	*sp = NULL;
1347	mddb_dtag_lst_t	*dtlp = NULL;
1348	mddb_dtag_lst_t	*tdtlp;
1349	char		*sname = MD_LOCAL_NAME;
1350	md_error_t	status = mdnullerror;
1351
1352	/* reset and parse args */
1353	optind = 1;
1354	opterr = 1;
1355	while ((c = getopt(argc, argv, "Mqs:")) != -1) {
1356		switch (c) {
1357		case 'M':
1358			break;
1359		case 'q':
1360			break;
1361		case 's':
1362			sname = optarg;
1363			break;
1364		default:
1365			usage(sp, gettext("unknown options"));
1366		}
1367	}
1368
1369	argc -= optind;
1370	argv += optind;
1371
1372	if (argc != 0)
1373		usage(sp, gettext("too many args"));
1374
1375	if ((sp = metasetname(sname, &status)) == NULL) {
1376		mde_perror(&status, "");
1377		md_exit(sp, 1);
1378	}
1379
1380	if (meta_lock(sp, TRUE, &status) != 0) {
1381		mde_perror(&status, "");
1382		md_exit(sp, 1);
1383	}
1384
1385	if (meta_set_query(sp, &dtlp, &status) != 0) {
1386		mde_perror(&status, "");
1387		md_exit(sp, 1);
1388	}
1389
1390	if (dtlp != NULL)
1391		(void) printf("The following tag(s) were found:\n");
1392
1393	for (tdtlp = dtlp; tdtlp != NULL; tdtlp = dtlp) {
1394		dtlp = tdtlp->dtl_nx;
1395		(void) printf("%2d - %s - %s", tdtlp->dtl_dt.dt_id,
1396		    tdtlp->dtl_dt.dt_hn,
1397		    ctime((long *)&tdtlp->dtl_dt.dt_tv.tv_sec));
1398		Free(tdtlp);
1399	}
1400
1401	md_exit(sp, 0);
1402}
1403
1404/* Should never be called with sname of a Multinode diskset. */
1405static void
1406parse_releaseset(int argc, char **argv)
1407{
1408	int		c;
1409	mdsetname_t	*sp = NULL;
1410	md_error_t	status = mdnullerror;
1411	md_error_t	*ep = &status;
1412	char		*sname = MD_LOCAL_NAME;
1413	sdssc_boolean_e	cluster_release = SDSSC_False;
1414	sdssc_version_t	vers;
1415	rval_e		rval;
1416	md_set_desc	*sd;
1417
1418	/* reset and parse args */
1419	optind = 1;
1420	opterr = 1;
1421	while ((c = getopt(argc, argv, "C:s:r")) != -1) {
1422		switch (c) {
1423		case 'C':
1424			cluster_release = SDSSC_True;
1425			break;
1426		case 's':
1427			sname = optarg;
1428			break;
1429		case 'r':
1430			break;
1431		default:
1432			usage(sp, gettext("unknown options"));
1433		}
1434	}
1435
1436	argc -= optind;
1437	argv += optind;
1438
1439	if (argc > 0)
1440		usage(sp, gettext("too many args"));
1441
1442	(void) memset(&vers, 0, sizeof (vers));
1443
1444	if ((sdssc_version(&vers) == SDSSC_OKAY) &&
1445	    (vers.major == 3) &&
1446	    (cluster_release == SDSSC_False)) {
1447
1448		/*
1449		 * If the release is being done by the user via the CLI
1450		 * we need to notify the DCS to release this node as being
1451		 * the primary. The reason nothing else needs to be done
1452		 * is due to the fact that the reservation code will exec
1453		 * metaset -C release to complete the operation.
1454		 */
1455		rval = sdssc_notify_service(sname, Release_Primary);
1456		if (rval == SDSSC_ERROR) {
1457			(void) printf(gettext(
1458			    "metaset: failed to notify DCS of release\n"));
1459		}
1460		md_exit(NULL, rval == SDSSC_ERROR);
1461	}
1462
1463	if ((sp = metasetname(sname, ep)) == NULL) {
1464
1465		/*
1466		 * It's entirely possible for the SC3.0 reservation code
1467		 * to call for DiskSet to release a diskset and have that
1468		 * diskset not exist. During a diskset removal DiskSuite
1469		 * maybe able to remove all traces of the diskset before
1470		 * the reservation code execs metaset -C release in which
1471		 * case the metasetname will fail, but the overall command
1472		 * shouldn't.
1473		 */
1474		if (vers.major == 3)
1475			md_exit(sp, 0);
1476		else {
1477			mde_perror(ep, "");
1478			md_exit(sp, 1);
1479		}
1480	}
1481
1482	if ((sd = metaget_setdesc(sp, ep)) == NULL) {
1483		mde_perror(ep, "");
1484		md_exit(sp, 1);
1485	}
1486
1487	if (sd->sd_flags & MD_SR_AUTO_TAKE) {
1488		md_eprintf(gettext("cannot release auto-take diskset\n"));
1489		md_exit(sp, 1);
1490	}
1491
1492	if (meta_lock_nowait(sp, ep) != 0) {
1493		mde_perror(ep, "");
1494		md_exit(sp, 10);	/* special errcode */
1495	}
1496
1497	if (meta_set_release(sp, ep)) {
1498		mde_perror(ep, "");
1499		md_exit(sp, 1);
1500	}
1501	md_exit(sp, 0);
1502}
1503
1504/* Should never be called with sname of a Multinode diskset. */
1505static void
1506parse_takeset(int argc, char **argv)
1507{
1508	int		c;
1509	mdsetname_t	*sp = NULL;
1510	int		flags = 0;
1511	char		*sname = MD_LOCAL_NAME;
1512	mhd_mhiargs_t	mhiargs;
1513	char 		*cp = NULL;
1514	int		pos = -1;	/* position of timeout value */
1515	int		usetag = 0;
1516	static char	*nullopts[] = { NULL };
1517	md_error_t	status = mdnullerror;
1518	md_error_t	*ep = &status;
1519	sdssc_boolean_e	cluster_take = SDSSC_False;
1520	sdssc_version_t	vers;
1521	rval_e		rval;
1522	int		set_take_rval;
1523
1524	/* reset and parse args */
1525	optind = 1;
1526	opterr = 1;
1527	while ((c = getopt(argc, argv, "C:fs:tu:y")) != -1) {
1528		switch (c) {
1529		case 'C':
1530			cluster_take = SDSSC_True;
1531			break;
1532		case 'f':
1533			flags |= TAKE_FORCE;
1534			break;
1535		case 's':
1536			sname = optarg;
1537			break;
1538		case 't':
1539			break;
1540		case 'u':
1541			usetag = atoi(optarg);
1542			flags |= TAKE_USETAG;
1543			break;
1544		case 'y':
1545			flags |= TAKE_USEIT;
1546			break;
1547		default:
1548			usage(sp, gettext("unknown options"));
1549		}
1550	}
1551
1552	mhiargs = defmhiargs;
1553
1554	argc -= optind;
1555	argv += optind;
1556
1557	if (argc > 1)
1558		usage(sp, gettext("too many args"));
1559
1560	/*
1561	 * If we have a list of timeout value overrides, handle it here
1562	 */
1563	while (argv[0] != NULL && *argv[0] != '\0') {
1564		/*
1565		 * The use of the nullopts[] "token list" here is to make
1566		 * getsubopts() simply parse a comma separated list
1567		 * returning either "" or the contents of the field, the
1568		 * end condition is exaustion of the initial string, which
1569		 * is modified in the process.
1570		 */
1571		(void) getsubopt(&argv[0], nullopts, &cp);
1572
1573		c = 0;			/* re-use c as temp value of timeout */
1574
1575		if (*cp != '-')		/* '-' uses default */
1576			c = atoi(cp);
1577
1578		if (c < 0) {
1579			usage(sp, gettext(
1580			    "time out values must be > 0"));
1581		}
1582
1583		if (++pos > 3) {
1584			usage(sp, gettext(
1585			    "too many timeout values specified."));
1586		}
1587
1588		if (c == 0)		/* 0 or "" field uses default */
1589			continue;
1590
1591		/*
1592		 * Assign temp value to appropriate structure member based on
1593		 * its position in the comma separated list.
1594		 */
1595		switch (pos) {
1596			case 0:
1597				mhiargs.mh_ff = c;
1598				break;
1599
1600			case 1:
1601				mhiargs.mh_tk.reinstate_resv_delay = c;
1602				break;
1603
1604			case 2:
1605				mhiargs.mh_tk.min_ownership_delay = c;
1606				break;
1607
1608			case 3:
1609				mhiargs.mh_tk.max_ownership_delay = c;
1610				break;
1611		}
1612	}
1613
1614	(void) memset(&vers, 0, sizeof (vers));
1615
1616	if ((sdssc_version(&vers) == SDSSC_OKAY) &&
1617	    (vers.major == 3) &&
1618	    (cluster_take == SDSSC_False)) {
1619
1620		/*
1621		 * If the take is beging done by the user via the CLI we need
1622		 * to notify the DCS to make this current node the primary.
1623		 * The SC3.0 reservation code will in turn exec metaset with
1624		 * the -C take arg to complete this operation.
1625		 */
1626		if ((rval = sdssc_notify_service(sname, Make_Primary)) ==
1627		    SDSSC_ERROR) {
1628			(void) printf(gettext(
1629			    "metaset: failed to notify DCS of take\n"));
1630		}
1631		md_exit(NULL, rval == SDSSC_ERROR);
1632	}
1633
1634	if ((sp = metasetname(sname, ep)) == NULL) {
1635		mde_perror(ep, "");
1636		md_exit(sp, 1);
1637	}
1638
1639	if ((vers.major == 3) && (meta_check_ownership(sp, ep) == 0)) {
1640
1641		/*
1642		 * If we're running in a cluster environment and this
1643		 * node already owns the set. Don't bother trying to
1644		 * take the set again. There's one case where an adminstrator
1645		 * is adding disks to a set for the first time. metaset
1646		 * will take the ownership of the set at that point. During
1647		 * that add operation SC3.0 notices activity on the device
1648		 * and also tries to perform a take operation. The SC3.0 take
1649		 * will fail because the adminstrative add has the set locked
1650		 */
1651		md_exit(sp, 0);
1652	}
1653
1654	if (meta_lock_nowait(sp, ep) != 0) {
1655		mde_perror(ep, "");
1656		md_exit(sp, 10);	/* special errcode */
1657	}
1658
1659	/*
1660	 * If a 2 is returned from meta_set_take, this take was able to resolve
1661	 * an unresolved replicated disk (i.e. a disk is now available that
1662	 * had been missing during the import of the replicated diskset).
1663	 * Need to release the diskset and re-take in order to have
1664	 * the subdrivers re-snarf using the newly resolved (or newly mapped)
1665	 * devids.  This also allows the namespace to be updated with the
1666	 * correct major names in the case where the disk being replicated
1667	 * was handled by a different driver than the replicated disk.
1668	 */
1669	set_take_rval = meta_set_take(sp, &mhiargs, flags, usetag, &status);
1670	if (set_take_rval == 2) {
1671		if (meta_set_release(sp, &status)) {
1672			mde_perror(&status,
1673			    "Need to release and take set to resolve names.");
1674			md_exit(sp, 1);
1675		}
1676		metaflushdrivenames();
1677		metaflushsetname(sp);
1678		set_take_rval = meta_set_take(sp, &mhiargs,
1679		    (flags | TAKE_RETAKE), usetag, &status);
1680	}
1681
1682	if (set_take_rval == -1) {
1683		mde_perror(&status, "");
1684		if (mdismddberror(&status, MDE_DB_TAGDATA))
1685			md_exit(sp, 2);
1686		if (mdismddberror(&status, MDE_DB_ACCOK))
1687			md_exit(sp, 3);
1688		if (mdismddberror(&status, MDE_DB_STALE))
1689			md_exit(sp, 66);
1690		md_exit(sp, 1);
1691	}
1692	md_exit(sp, 0);
1693}
1694
1695/*
1696 * Joins a node to a specific set or to all multinode disksets known
1697 * by this node.  If set is specified then caller should have verified
1698 * that the set is a multinode diskset.
1699 *
1700 * If an error occurs, metaset exits with a 1.
1701 * If there is no error, metaset exits with a 0.
1702 */
1703static void
1704parse_joinset(int argc, char **argv)
1705{
1706	int		c;
1707	mdsetname_t	*sp = NULL, *local_sp = NULL;
1708	char		*sname = MD_LOCAL_NAME;
1709	md_error_t	status = mdnullerror;
1710	md_error_t	*ep = &status;
1711	md_set_desc	*sd;
1712	char		buf[BUFSIZ];
1713	char		*p = buf;
1714	set_t		max_sets, setno;
1715	int		err, cumm_err = 0;
1716	size_t		bufsz;
1717
1718	bufsz = sizeof (buf);
1719	/* reset and parse args */
1720	optind = 1;
1721	opterr = 1;
1722	while ((c = getopt(argc, argv, "Ms:j")) != -1) {
1723		switch (c) {
1724		case 'M':
1725			break;
1726		case 'j':
1727			break;
1728		case 's':
1729			sname = optarg;
1730			break;
1731		default:
1732			usage(sp, gettext("unknown options"));
1733		}
1734	}
1735
1736	argc -= optind;
1737	argv += optind;
1738
1739	if (argc > 1)
1740		usage(sp, gettext("too many args"));
1741
1742	/*
1743	 * If no setname option was used, then join all disksets
1744	 * that this node knows about.   Attempt to join all
1745	 * disksets that this node knows about.
1746	 *
1747	 * Additional text is added to the error messages during
1748	 * this section of code in order to help the user understand
1749	 * why the 'join of all sets' failed and which set caused
1750	 * the failure.
1751	 */
1752
1753	/*
1754	 * Hold local set lock throughout this call to keep
1755	 * other actions from interfering (such as creating a new
1756	 * set, etc.).
1757	 */
1758	if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
1759		mde_perror(ep, "");
1760		md_exit(sp, 1);
1761	}
1762
1763	if (meta_lock(local_sp, TRUE, ep) != 0) {
1764		mde_perror(ep, "");
1765		md_exit(local_sp, 1);
1766	}
1767
1768	if (strcmp(sname, MD_LOCAL_NAME) == 0) {
1769		/*
1770		 * If no set name is given, then walk through all sets
1771		 * on this node which could include:
1772		 * 	- MN disksets
1773		 *	- traditional disksets
1774		 *	- non-existent disksets
1775		 * Attempt to join the MN disksets.
1776		 * If the join of one set fails, print out an error message
1777		 * about that set and continue the walk.
1778		 */
1779		if ((max_sets = get_max_sets(ep)) == 0) {
1780			mde_perror(ep, "");
1781			md_exit(local_sp, 1);
1782		}
1783
1784		/* Start walking through all possible disksets */
1785		for (setno = 1; setno < max_sets; setno++) {
1786			if ((sp = metasetnosetname(setno, ep)) == NULL) {
1787				if (mdiserror(ep, MDE_NO_SET)) {
1788					/* No set for this setno - continue */
1789					mdclrerror(ep);
1790					continue;
1791				} else {
1792					(void) sprintf(p, gettext(
1793					"Unable to get set %d information"),
1794					    setno);
1795					mde_perror(ep, p);
1796					cumm_err = 1;
1797					mdclrerror(ep);
1798					continue;
1799				}
1800			}
1801
1802			/* If setname is there, set desc should exist. */
1803			if ((sd = metaget_setdesc(sp, ep)) == NULL) {
1804				(void) snprintf(p, bufsz, gettext(
1805				    "Unable to get set %s desc information"),
1806				    sp->setname);
1807				mde_perror(ep, p);
1808				cumm_err = 1;
1809				mdclrerror(ep);
1810				continue;
1811			}
1812
1813			/* Only check MN disksets */
1814			if (!MD_MNSET_DESC(sd)) {
1815				continue;
1816			}
1817
1818			/*
1819			 * Return value of 0 is success.
1820			 * Return value of -1 means a failure.
1821			 * Return value of -2 means set could not be
1822			 * joined, but shouldn't cause an error.
1823			 * Reasons would be:
1824			 * 	- no drives in set
1825			 * 	- node already joined to set
1826			 * Return value of -3 means joined stale set.
1827			 * Can't check for all reasons here
1828			 * since set isn't locked yet across all
1829			 * nodes in the cluster.  The call
1830			 * to libmeta routine, meta_set_join, will
1831			 * lock across the cluster and perform
1832			 * the checks.
1833			 */
1834			if ((err = meta_set_join(sp, ep)) == -1) {
1835				/* Print error of diskset join failure */
1836				(void) snprintf(p, bufsz,
1837				    gettext("Join to diskset %s failed"),
1838				    sp->setname);
1839				mde_perror(ep, p);
1840				cumm_err = 1;
1841				mdclrerror(ep);
1842				continue;
1843			}
1844
1845			if (err == -3) {
1846				/* Print error of diskset join failure */
1847				(void) snprintf(p, bufsz,
1848				    gettext("Joined to stale diskset %s"),
1849				    sp->setname);
1850				mde_perror(ep, p);
1851				mdclrerror(ep);
1852			}
1853
1854			mdclrerror(ep);
1855		}
1856
1857		md_exit(local_sp, cumm_err);
1858	}
1859
1860	/*
1861	 * Code for a specific set is much simpler.
1862	 * Error messages don't need extra text since specific setname
1863	 * was used.
1864	 * Don't need to lock the local set, just the specific set given.
1865	 */
1866	if ((sp = metasetname(sname, ep)) == NULL) {
1867		mde_perror(ep, "");
1868		md_exit(local_sp, 1);
1869	}
1870
1871	/*
1872	 * Fail command if meta_set_join returns -1.
1873	 *
1874	 * Return of 0 means that node joined set.
1875	 *
1876	 * Return of -2 means that node was unable to
1877	 * join a set since that set had no drives
1878	 * or that had already joined the set.  No
1879	 * need to fail the command for these reasons.
1880	 *
1881	 * Return of -3 means that set is stale.
1882	 * Return a value of 66 to historically match traditional disksets.
1883	 */
1884	if ((err = meta_set_join(sp, ep)) == -1) {
1885		mde_perror(&status, "");
1886		md_exit(local_sp, 1);
1887	}
1888
1889	if (err == -3) {
1890		/* Print error of diskset join failure */
1891		(void) snprintf(p, bufsz,
1892		    gettext("Joined to stale diskset %s"),
1893		    sp->setname);
1894		mde_perror(&status, "");
1895		md_exit(local_sp, 66);
1896	}
1897
1898	md_exit(local_sp, 0);
1899}
1900
1901/*
1902 * Withdraws a node from a specific set or from all multinode disksets known
1903 * by this node.  If set is specified then caller should have verified
1904 * that the set is a multinode diskset.
1905 *
1906 * If an error occurs, metaset exits with a 1.
1907 * If there is no error, metaset exits with a 0.
1908 */
1909static void
1910parse_withdrawset(int argc, char **argv)
1911{
1912	int		c;
1913	mdsetname_t	*sp = NULL, *local_sp = NULL;
1914	char		*sname = MD_LOCAL_NAME;
1915	md_error_t	status = mdnullerror;
1916	md_error_t	*ep = &status;
1917	char		buf[BUFSIZ];
1918	char		*p = buf;
1919	md_set_desc	*sd;
1920	set_t		max_sets, setno;
1921	int		err, cumm_err = 0;
1922	size_t		bufsz;
1923
1924	bufsz = sizeof (buf);
1925	/* reset and parse args */
1926	optind = 1;
1927	opterr = 1;
1928	while ((c = getopt(argc, argv, "Ms:w")) != -1) {
1929		switch (c) {
1930		case 'M':
1931			break;
1932		case 'w':
1933			break;
1934		case 's':
1935			sname = optarg;
1936			break;
1937		default:
1938			usage(sp, gettext("unknown options"));
1939		}
1940	}
1941
1942	argc -= optind;
1943	argv += optind;
1944
1945	if (argc > 1)
1946		usage(sp, gettext("too many args"));
1947
1948	/*
1949	 * If no setname option was used, then withdraw from all disksets
1950	 * that this node knows about.
1951	 *
1952	 * Additional text is added to the error messages during
1953	 * this section of code in order to help the user understand
1954	 * why the 'withdraw from all sets' failed and which set caused
1955	 * the failure.
1956	 */
1957
1958	/*
1959	 * Hold local set lock throughout this call to keep
1960	 * other actions from interfering (such as creating a new
1961	 * set, etc.).
1962	 */
1963	if ((local_sp = metasetname(MD_LOCAL_NAME, ep)) == NULL) {
1964		mde_perror(ep, "");
1965		md_exit(sp, 1);
1966	}
1967
1968	if (meta_lock(local_sp, TRUE, ep) != 0) {
1969		mde_perror(ep, "");
1970		md_exit(local_sp, 1);
1971	}
1972
1973	if (strcmp(sname, MD_LOCAL_NAME) == 0) {
1974		/*
1975		 * If no set name is given, then walk through all sets
1976		 * on this node which could include:
1977		 * 	- MN disksets
1978		 *	- traditional disksets
1979		 *	- non-existent disksets
1980		 * Attempt to withdraw from the MN disksets.
1981		 * If the withdraw of one set fails, print out an error
1982		 * message about that set and continue the walk.
1983		 */
1984		if ((max_sets = get_max_sets(ep)) == 0) {
1985			mde_perror(ep, "");
1986			md_exit(local_sp, 1);
1987		}
1988
1989		/* Start walking through all possible disksets */
1990		for (setno = 1; setno < max_sets; setno++) {
1991			if ((sp = metasetnosetname(setno, ep)) == NULL) {
1992				if (mdiserror(ep, MDE_NO_SET)) {
1993					/* No set for this setno - continue */
1994					mdclrerror(ep);
1995					continue;
1996				} else {
1997					(void) sprintf(p, gettext(
1998					    "Unable to get set %d information"),
1999					    setno);
2000					mde_perror(ep, p);
2001					cumm_err = 1;
2002					mdclrerror(ep);
2003					continue;
2004				}
2005			}
2006
2007			/* If setname is there, set desc should exist. */
2008			if ((sd = metaget_setdesc(sp, ep)) == NULL) {
2009				(void) snprintf(p, bufsz, gettext(
2010				    "Unable to get set %s desc information"),
2011				    sp->setname);
2012				mde_perror(ep, p);
2013				cumm_err = 1;
2014				mdclrerror(ep);
2015				continue;
2016			}
2017
2018			/* Only check MN disksets */
2019			if (!MD_MNSET_DESC(sd)) {
2020				continue;
2021			}
2022
2023			/*
2024			 * Return value of 0 is success.
2025			 * Return value of -1 means a failure.
2026			 * Return value of -2 means set could not be
2027			 * withdrawn from, but this shouldn't cause
2028			 * an error.  Reasons would be:
2029			 * 	- no drives in set
2030			 * 	- node already withdrawn from set
2031			 * Can't check for all reasons here
2032			 * since set isn't locked yet across all
2033			 * nodes in the cluster.  The call
2034			 * to libmeta routine, meta_set_withdraw, will
2035			 * lock across the cluster and perform
2036			 * the checks.
2037			 */
2038			if ((err = meta_set_withdraw(sp, ep)) == -1) {
2039				/* Print error of diskset withdraw failure */
2040				(void) snprintf(p, bufsz,
2041				    gettext("Withdraw from diskset %s failed"),
2042				    sp->setname);
2043				mde_perror(ep, p);
2044				mdclrerror(ep);
2045				cumm_err = 1;
2046				continue;
2047			}
2048
2049			if (err == -2) {
2050				mdclrerror(ep);
2051				continue;
2052			}
2053
2054			mdclrerror(ep);
2055		}
2056		md_exit(local_sp, cumm_err);
2057	}
2058
2059
2060	/*
2061	 * Code for a specific set is much simpler.
2062	 * Error messages don't need extra text since specific setname
2063	 * was used.
2064	 * Don't need to lock the local set, just the specific set given.
2065	 */
2066	if ((sp = metasetname(sname, ep)) == NULL) {
2067		mde_perror(ep, "");
2068		md_exit(local_sp, 1);
2069	}
2070
2071	/*
2072	 * Fail command if meta_set_withdraw returns -1.
2073	 *
2074	 * Return of 0 means that node withdrew from set.
2075	 *
2076	 * Return of -2 means that node was unable to
2077	 * withdraw from a set since that set had no drives
2078	 * or node was not joined to set.  No
2079	 * need to fail the command for these reasons.
2080	 */
2081	if (meta_set_withdraw(sp, ep) == -1) {
2082		mde_perror(&status, "");
2083		md_exit(local_sp, 1);
2084	}
2085
2086	md_exit(local_sp, 0);
2087}
2088
2089static void
2090parse_cluster(int argc, char **argv, int multi_node)
2091{
2092	int			c, error, new_argc, x;
2093	enum cluster_cmd	cmd = ccnotspecified;
2094	char			*hostname = SDSSC_PROXY_PRIMARY;
2095	char			*argument = NULL;
2096	char			*sname = MD_LOCAL_NAME;
2097	char			primary_node[SDSSC_NODE_NAME_LEN];
2098	char			**new_argv = NULL;
2099	char			**np = NULL;
2100	mdsetname_t		*sp = NULL;
2101	md_error_t		status = mdnullerror;
2102	md_error_t		*ep = &status;
2103
2104	/* reset and parse args */
2105	optind = 1;
2106	opterr = 1;
2107	while ((c = getopt(argc, argv, "C:s:h:ftu:yr")) != -1) {
2108		switch (c) {
2109		case 'C':
2110			if (cmd != ccnotspecified) {
2111				md_exit(sp, -1);
2112			}
2113			argument = optarg;
2114
2115			if (strcmp(argument, "disksin") == 0) {
2116				cmd = clusterdisksin;
2117			} else if (strcmp(argument, "version") == 0) {
2118				cmd = clusterversion;
2119			} else if (strcmp(argument, "release") == 0) {
2120				cmd = clusterrelease;
2121			} else if (strcmp(argument, "take") == 0) {
2122				cmd = clustertake;
2123			} else if (strcmp(argument, "proxy") == 0) {
2124				cmd = clusterproxy;
2125			} else if (strcmp(argument, "purge") == 0) {
2126				cmd = clusterpurge;
2127			} else {
2128				md_exit(sp, -1);
2129			}
2130
2131			break;
2132
2133		case 'h':
2134			hostname = optarg;
2135			break;
2136
2137		case 's':
2138			sname = optarg;
2139			break;
2140
2141		case 'f':
2142		case 't':
2143		case 'u':
2144		case 'y':
2145		case 'r':
2146			break;
2147
2148		default:
2149			md_exit(sp, -1);
2150		}
2151	}
2152
2153	/* Now call the appropriate command function. */
2154	switch (cmd) {
2155	case clusterversion:
2156		printclusterversion();
2157		break;
2158
2159	case clusterdisksin:
2160		if (printdisksin(sname, ep)) {
2161			md_exit(sp, -1);
2162		}
2163		break;
2164
2165	case clusterrelease:
2166		if (multi_node) {
2167			usage(sp, gettext(
2168			    "-C release is not allowed on multi-owner"
2169			    " disksets"));
2170		}
2171		parse_releaseset(argc, argv);
2172		break;
2173
2174	case clustertake:
2175		if (multi_node) {
2176			usage(sp, gettext(
2177			    "-C take is not allowed on multi-owner disksets"));
2178		}
2179		parse_takeset(argc, argv);
2180		break;
2181
2182	case clusterproxy:
2183		if (multi_node) {
2184			usage(sp, gettext(
2185			    "-C proxy is not allowed on multi-owner disksets"));
2186		}
2187
2188		if ((new_argv = calloc(argc, sizeof (char *))) == NULL) {
2189			(void) printf(gettext("Out of memory\n"));
2190			md_exit(sp, 1);
2191		}
2192
2193		np = new_argv;
2194		new_argc = 0;
2195		(void) memset(primary_node, '\0', SDSSC_NODE_NAME_LEN);
2196
2197		for (x = 0; x < argc; x++) {
2198			if (strcmp(argv[x], "-C") == 0) {
2199
2200				/*
2201				 * Need to skip the '-C proxy' args so
2202				 * just increase x by one and the work is
2203				 * done.
2204				 */
2205				x++;
2206			} else {
2207				*np++ = strdup(argv[x]);
2208				new_argc++;
2209			}
2210		}
2211
2212		switch (sdssc_get_primary_host(sname, primary_node,
2213		    SDSSC_NODE_NAME_LEN)) {
2214		case SDSSC_ERROR:
2215			md_exit(sp, 1);
2216			break;
2217
2218		case SDSSC_NO_SERVICE:
2219			if (hostname != SDSSC_PROXY_PRIMARY) {
2220				(void) strlcpy(primary_node, hostname,
2221				    SDSSC_NODE_NAME_LEN);
2222			}
2223			break;
2224		}
2225
2226		if (sdssc_cmd_proxy(new_argc, new_argv,
2227		    primary_node[0] == '\0' ? SDSSC_PROXY_PRIMARY :
2228		    primary_node, &error) == SDSSC_PROXY_DONE) {
2229			md_exit(sp, error);
2230		} else {
2231			(void) printf(gettext(
2232			    "Couldn't proxy command\n"));
2233			md_exit(sp, 1);
2234		}
2235		break;
2236
2237	case clusterpurge:
2238		parse_purge(argc, argv);
2239		break;
2240
2241	default:
2242		break;
2243	}
2244
2245	md_exit(sp, 0);
2246}
2247
2248/*
2249 * parse args and do it
2250 */
2251int
2252main(int argc, char *argv[])
2253{
2254	enum metaset_cmd	cmd = notspecified;
2255	md_error_t		status = mdnullerror;
2256	md_error_t		*ep = &status;
2257	mdsetname_t		*sp = NULL;
2258	char			*hostname = SDSSC_PROXY_PRIMARY;
2259	char			*sname = MD_LOCAL_NAME;
2260	char			*auto_take_option = NULL;
2261	char			primary_node[SDSSC_NODE_NAME_LEN];
2262	int			error, c, stat;
2263	int			auto_take = FALSE;
2264	md_set_desc		*sd;
2265	int			mflag = 0;
2266	int			multi_node = 0;
2267	rval_e			sdssc_res;
2268
2269	/*
2270	 * Get the locale set up before calling any other routines
2271	 * with messages to ouput.  Just in case we're not in a build
2272	 * environment, make sure that TEXT_DOMAIN gets set to
2273	 * something.
2274	 */
2275#if !defined(TEXT_DOMAIN)
2276#define	TEXT_DOMAIN "SYS_TEST"
2277#endif
2278	(void) setlocale(LC_ALL, "");
2279	(void) textdomain(TEXT_DOMAIN);
2280
2281	sdssc_res = sdssc_bind_library();
2282	if (sdssc_res == SDSSC_ERROR) {
2283		(void) printf(gettext(
2284		    "%s: Interface error with libsds_sc.so\n"), argv[0]);
2285		exit(1);
2286	}
2287
2288	/* initialize */
2289	if (md_init(argc, argv, 0, 1, ep) != 0) {
2290		mde_perror(ep, "");
2291		md_exit(sp, 1);
2292	}
2293
2294	optind = 1;
2295	opterr = 1;
2296
2297	/*
2298	 * NOTE: The "C" option is strictly for cluster use. it is not
2299	 * and should not be documented for the customer. - JST
2300	 */
2301	while ((c = getopt(argc, argv, "C:MaA:bdfh:jl:Lm:oPqrs:tu:wy?"))
2302	    != -1) {
2303		switch (c) {
2304		case 'M':
2305			mflag = 1;
2306			break;
2307		case 'A':
2308			auto_take = TRUE;
2309			if (optarg == NULL || !(strcmp(optarg, "enable") == 0 ||
2310			    strcmp(optarg, "disable") == 0))
2311				usage(sp, gettext(
2312				    "-A: enable or disable must be specified"));
2313			auto_take_option = optarg;
2314			break;
2315		case 'a':
2316			if (cmd != notspecified) {
2317				usage(sp, gettext(
2318				    "conflicting options"));
2319			}
2320			cmd = add;
2321			break;
2322		case 'b':
2323			if (cmd != notspecified) {
2324				usage(sp, gettext(
2325				    "conflicting options"));
2326			}
2327			cmd = balance;
2328			break;
2329		case 'd':
2330			if (cmd != notspecified) {
2331				usage(sp, gettext(
2332				    "conflicting options"));
2333			}
2334			cmd = delete;
2335			break;
2336		case 'C':	/* cluster commands */
2337			if (cmd != notspecified) {
2338				md_exit(sp, -1);    /* conflicting options */
2339			}
2340			cmd = cluster;
2341			break;
2342		case 'f':
2343			break;
2344		case 'h':
2345			hostname = optarg;
2346			break;
2347		case 'j':
2348			if (cmd != notspecified) {
2349				usage(sp, gettext(
2350				    "conflicting options"));
2351			}
2352			cmd = join;
2353			break;
2354		case 'l':
2355			break;
2356		case 'L':
2357			break;
2358		case 'm':
2359			break;
2360		case 'o':
2361			if (cmd != notspecified) {
2362				usage(sp, gettext(
2363				    "conflicting options"));
2364			}
2365			cmd = isowner;
2366			break;
2367		case 'P':
2368			if (cmd != notspecified) {
2369				usage(sp, gettext(
2370				    "conflicting options"));
2371			}
2372			cmd = purge;
2373			break;
2374		case 'q':
2375			if (cmd != notspecified) {
2376				usage(sp, gettext(
2377				    "conflicting options"));
2378			}
2379			cmd = query;
2380			break;
2381		case 'r':
2382			if (cmd != notspecified) {
2383				usage(sp, gettext(
2384				    "conflicting options"));
2385			}
2386			cmd = release;
2387			break;
2388		case 's':
2389			sname = optarg;
2390			break;
2391		case 't':
2392			if (cmd != notspecified) {
2393				usage(sp, gettext(
2394				    "conflicting options"));
2395			}
2396			cmd = take;
2397			break;
2398		case 'u':
2399			break;
2400		case 'w':
2401			if (cmd != notspecified) {
2402				usage(sp, gettext(
2403				    "conflicting options"));
2404			}
2405			cmd = withdraw;
2406			break;
2407		case 'y':
2408			break;
2409		case '?':
2410			if (optopt == '?')
2411				usage(sp, NULL);
2412			/*FALLTHROUGH*/
2413		default:
2414			if (cmd == cluster) {    /* cluster is silent */
2415				md_exit(sp, -1);
2416			} else {
2417				usage(sp, gettext(
2418				    "unknown command"));
2419			}
2420		}
2421	}
2422
2423	/* check if suncluster is installed and -A enable specified */
2424	if (auto_take && sdssc_res != SDSSC_NOT_BOUND &&
2425	    strcmp(auto_take_option, "enable") == 0) {
2426		md_eprintf(gettext(
2427		    "cannot enable auto-take when SunCluster is installed\n"));
2428		md_exit(sp, 1);
2429	}
2430
2431	/*
2432	 * At this point we know that if the -A enable option is specified
2433	 * for an auto-take diskset that SC is not installed on the machine, so
2434	 * all of the sdssc calls will just be no-ops.
2435	 */
2436
2437	/* list sets */
2438	if (cmd == notspecified && auto_take == FALSE) {
2439		parse_printset(argc, argv);
2440		/*NOTREACHED*/
2441	}
2442
2443	if (meta_check_root(ep) != 0) {
2444		mde_perror(ep, "");
2445		md_exit(sp, 1);
2446	}
2447
2448	/* snarf MDDB */
2449	if (meta_setup_db_locations(ep) != 0) {
2450		mde_perror(ep, "");
2451		md_exit(sp, 1);
2452	}
2453
2454	/*
2455	 * If sname is a diskset - check for multi_node.
2456	 * It is possible for sname to not exist.
2457	 */
2458	if (strcmp(sname, MD_LOCAL_NAME)) {
2459		if ((sp = metasetname(sname, ep)) != NULL) {
2460			/* Set exists - check for MN diskset */
2461			if ((sd = metaget_setdesc(sp, ep)) == NULL) {
2462				mde_perror(ep, "");
2463				md_exit(sp, 1);
2464			}
2465			if (MD_MNSET_DESC(sd)) {
2466				/*
2467				 * If a MN diskset always set multi_node
2468				 * regardless of whether the -M option was
2469				 * used or not (mflag).
2470				 */
2471				multi_node = 1;
2472			} else {
2473				/*
2474				 * If a traditional diskset, mflag must
2475				 * not be set.
2476				 */
2477				if (mflag) {
2478					usage(sp, gettext(
2479					    "-M option only allowed "
2480					    "on multi-owner diskset"));
2481				}
2482			}
2483		} else {
2484			/*
2485			 * Set name does not exist, set multi_node
2486			 * based on -M option.
2487			 */
2488			if (mflag) {
2489				multi_node = 1;
2490			}
2491		}
2492	}
2493
2494	if (auto_take && multi_node) {
2495		/* Can't mix multinode and auto-take on a diskset */
2496		usage(sp,
2497		    gettext("-A option not allowed on multi-owner diskset"));
2498	}
2499
2500	/*
2501	 * MN disksets don't use DCS clustering services, so
2502	 * do not get primary_node for MN diskset since no command
2503	 * proxying is done to Primary cluster node.  Do not proxy
2504	 * MN diskset commands of join and withdraw when issued without
2505	 * a valid setname.
2506	 * For traditional disksets: proxy all commands except a take
2507	 * and release.  Use first host listed as the host to send the
2508	 * command to if there isn't already a primary
2509	 */
2510	if (strcmp(sname, MD_LOCAL_NAME) && (multi_node == 0) &&
2511	    (cmd != take) && (cmd != release) &&
2512	    (cmd != cluster) && (cmd != join) &&
2513	    (cmd != withdraw) && (cmd != purge)) {
2514		stat = sdssc_get_primary_host(sname, primary_node,
2515		    SDSSC_NODE_NAME_LEN);
2516		switch (stat) {
2517			case SDSSC_ERROR:
2518				return (0);
2519
2520			case SDSSC_NO_SERVICE:
2521				if (hostname != SDSSC_PROXY_PRIMARY) {
2522					(void) strlcpy(primary_node, hostname,
2523					    SDSSC_NODE_NAME_LEN);
2524				} else {
2525					(void) memset(primary_node, '\0',
2526					    SDSSC_NODE_NAME_LEN);
2527				}
2528				break;
2529		}
2530
2531		/*
2532		 * We've got a complicated decision here regarding
2533		 * the hostname. If we didn't get a primary host
2534		 * and a host name wasn't supplied on the command line
2535		 * then we need to revert to SDSSC_PROXY_PRIMARY. Otherwise
2536		 * use what's been found.
2537		 */
2538		if (sdssc_cmd_proxy(argc, argv,
2539		    primary_node[0] == '\0' ?
2540		    SDSSC_PROXY_PRIMARY : primary_node,
2541		    &error) == SDSSC_PROXY_DONE) {
2542			exit(error);
2543		}
2544	}
2545
2546	/* cluster-specific commands */
2547	if (cmd == cluster) {
2548		parse_cluster(argc, argv, multi_node);
2549		/*NOTREACHED*/
2550	}
2551
2552	/* join MultiNode diskset */
2553	if (cmd == join) {
2554		/*
2555		 * If diskset specified, verify that it exists
2556		 * and is a multinode diskset.
2557		 */
2558		if (strcmp(sname, MD_LOCAL_NAME)) {
2559			if ((sp = metasetname(sname, ep)) == NULL) {
2560				mde_perror(ep, "");
2561				md_exit(sp, 1);
2562			}
2563
2564			if (!multi_node) {
2565				usage(sp, gettext(
2566				    "-j option only allowed on "
2567				    "multi-owner diskset"));
2568			}
2569		}
2570		/*
2571		 * Start mddoors daemon here.
2572		 * mddoors itself takes care there will be only one
2573		 * instance running, so starting it twice won't hurt
2574		 */
2575		(void) pclose(popen("/usr/lib/lvm/mddoors", "w"));
2576		parse_joinset(argc, argv);
2577		/*NOTREACHED*/
2578	}
2579
2580	/* withdraw from MultiNode diskset */
2581	if (cmd == withdraw) {
2582		/*
2583		 * If diskset specified, verify that it exists
2584		 * and is a multinode diskset.
2585		 */
2586		if (strcmp(sname, MD_LOCAL_NAME)) {
2587			if ((sp = metasetname(sname, ep)) == NULL) {
2588				mde_perror(ep, "");
2589				md_exit(sp, 1);
2590			}
2591
2592			if (!multi_node) {
2593				usage(sp, gettext(
2594				    "-w option only allowed on "
2595				    "multi-owner diskset"));
2596			}
2597		}
2598		parse_withdrawset(argc, argv);
2599		/*NOTREACHED*/
2600	}
2601
2602	/* must have set for everything else */
2603	if (strcmp(sname, MD_LOCAL_NAME) == 0)
2604		usage(sp, gettext("setname must be specified"));
2605
2606	/* add hosts or drives */
2607	if (cmd == add) {
2608		/*
2609		 * In the multi node case start mddoors daemon.
2610		 * mddoors itself takes care there will be
2611		 * only one instance running, so starting it twice won't hurt
2612		 */
2613		if (multi_node) {
2614			(void) pclose(popen("/usr/lib/lvm/mddoors", "w"));
2615		}
2616
2617		parse_add(argc, argv);
2618		/*NOTREACHED*/
2619	}
2620
2621	/* re-balance the replicas */
2622	if (cmd == balance) {
2623		parse_balance(argc, argv);
2624		/*NOTREACHED*/
2625	}
2626
2627	/* delete hosts or drives */
2628	if (cmd == delete) {
2629		parse_del(argc, argv);
2630		/*NOTREACHED*/
2631	}
2632
2633	/* check ownership */
2634	if (cmd == isowner) {
2635		parse_isowner(argc, argv);
2636		/*NOTREACHED*/
2637	}
2638
2639	/* purge the diskset */
2640	if (cmd == purge) {
2641		parse_purge(argc, argv);
2642		/*NOTREACHED*/
2643	}
2644
2645	/* query for data marks */
2646	if (cmd == query) {
2647		parse_query(argc, argv);
2648		/*NOTREACHED*/
2649	}
2650
2651	/* release ownership */
2652	if (cmd == release) {
2653		if (multi_node) {
2654			/* Can't release multinode diskset */
2655			usage(sp, gettext(
2656			    "-r option not allowed on multi-owner diskset"));
2657		} else {
2658			parse_releaseset(argc, argv);
2659			/*NOTREACHED*/
2660		}
2661	}
2662
2663	/* take ownership */
2664	if (cmd == take) {
2665		if (multi_node) {
2666			/* Can't take multinode diskset */
2667			usage(sp, gettext(
2668			    "-t option not allowed on multi-owner diskset"));
2669		} else {
2670			parse_takeset(argc, argv);
2671			/*NOTREACHED*/
2672		}
2673	}
2674
2675	/* take ownership of auto-take sets */
2676	if (auto_take) {
2677		parse_autotake(argc, argv);
2678		/*NOTREACHED*/
2679	}
2680
2681	/*NOTREACHED*/
2682	return (0);
2683}
2684