meta_sp.c revision 8452:89d32dfdae6e
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*
28 * Just in case we're not in a build environment, make sure that
29 * TEXT_DOMAIN gets set to something.
30 */
31#if !defined(TEXT_DOMAIN)
32#define	TEXT_DOMAIN "SYS_TEST"
33#endif
34
35/*
36 * soft partition operations
37 *
38 * Soft Partitions provide a virtual disk mechanism which is used to
39 * divide a large volume into many small pieces, each appearing as a
40 * separate device.  A soft partition consists of a series of extents,
41 * each having an offset and a length.  The extents are logically
42 * contiguous, so where the first extent leaves off the second extent
43 * picks up.  Which extent a given "virtual offset" belongs to is
44 * dependent on the size of all the previous extents in the soft
45 * partition.
46 *
47 * Soft partitions are represented in memory by an extent node
48 * (sp_ext_node_t) which contains all of the information necessary to
49 * create a unit structure and update the on-disk format, called
50 * "watermarks".  These extent nodes are typically kept in a doubly
51 * linked list and are manipulated by list manipulation routines.  A
52 * list of extents may represent all of the soft partitions on a volume,
53 * a single soft partition, or perhaps just a set of extents that need
54 * to be updated.  Extent lists may be sorted by extent or by name/seq#,
55 * depending on which compare function is used.  Most of the routines
56 * require the list be sorted by offset to work, and that's the typical
57 * configuration.
58 *
59 * In order to do an allocation, knowledge of all soft partitions on the
60 * volume is required.  Then free space is determined from the space
61 * that is not allocated, and new allocations can be made from the free
62 * space.  Once the new allocations are made, a unit structure is created
63 * and the watermarks are updated.  The status is then changed to "okay"
64 * on the unit structure to commit the transaction.  If updating the
65 * watermarks fails, the unit structure is in an intermediate state and
66 * the driver will not allow access to the device.
67 *
68 * A typical sequence of events is:
69 *     1. Fetch the list of names for all soft partitions on a volume
70 *         meta_sp_get_by_component()
71 *     2. Construct an extent list from the name list
72 *         meta_sp_extlist_from_namelist()
73 *     3. Fill the gaps in the extent list with free extents
74 *         meta_sp_list_freefill()
75 *     4. Allocate from the free extents
76 *         meta_sp_alloc_by_len()
77 *         meta_sp_alloc_by_list()
78 *     5. Create the unit structure from the extent list
79 *         meta_sp_createunit()
80 *         meta_sp_updateunit()
81 *     6. Write out the watermarks
82 *         meta_sp_update_wm()
83 *     7. Set the status to "Okay"
84 *         meta_sp_setstatus()
85 *
86 */
87
88#include <stdio.h>
89#include <meta.h>
90#include "meta_repartition.h"
91#include <sys/lvm/md_sp.h>
92#include <sys/lvm/md_crc.h>
93#include <strings.h>
94#include <sys/lvm/md_mirror.h>
95#include <sys/bitmap.h>
96
97extern int	md_in_daemon;
98
99typedef struct sp_ext_node {
100	struct sp_ext_node	*ext_next;	/* next element */
101	struct sp_ext_node	*ext_prev;	/* previous element */
102	sp_ext_type_t		ext_type;	/* type of extent */
103	sp_ext_offset_t		ext_offset;	/* starting offset */
104	sp_ext_length_t		ext_length;	/* length of this node */
105	uint_t			ext_flags;	/* extent flags */
106	uint32_t		ext_seq;	/* watermark seq no */
107	mdname_t		*ext_namep;	/* name pointer */
108	mdsetname_t		*ext_setp;	/* set pointer */
109} sp_ext_node_t;
110
111/* extent flags */
112#define	EXTFLG_UPDATE	(1)
113
114/* Extent node compare function for list sorting */
115typedef int (*ext_cmpfunc_t)(sp_ext_node_t *, sp_ext_node_t *);
116
117
118/* Function Prototypes */
119
120/* Debugging Functions */
121static void meta_sp_debug(char *format, ...);
122static void meta_sp_printunit(mp_unit_t *mp);
123
124/* Misc Support Functions */
125int meta_sp_parsesize(char *s, sp_ext_length_t *szp);
126static int meta_sp_parsesizestring(char *s, sp_ext_length_t *szp);
127static int meta_sp_setgeom(mdname_t *np, mdname_t *compnp, mp_unit_t *mp,
128	md_error_t *ep);
129static int meta_sp_get_by_component(mdsetname_t *sp, mdname_t *compnp,
130    mdnamelist_t **nlpp, int force, md_error_t *ep);
131static sp_ext_length_t meta_sp_get_default_alignment(mdsetname_t *sp,
132    mdname_t *compnp, md_error_t *ep);
133
134/* Extent List Manipulation Functions */
135static int meta_sp_cmp_by_nameseq(sp_ext_node_t *e1, sp_ext_node_t *e2);
136static int meta_sp_cmp_by_offset(sp_ext_node_t *e1, sp_ext_node_t *e2);
137static void meta_sp_list_insert(mdsetname_t *sp, mdname_t *np,
138    sp_ext_node_t **head, sp_ext_offset_t offset, sp_ext_length_t length,
139    sp_ext_type_t type, uint_t seq, uint_t flags, ext_cmpfunc_t compare);
140static void meta_sp_list_free(sp_ext_node_t **head);
141static void meta_sp_list_remove(sp_ext_node_t **head, sp_ext_node_t *ext);
142static sp_ext_length_t meta_sp_list_size(sp_ext_node_t *head,
143    sp_ext_type_t exttype, int exclude_wm);
144static sp_ext_node_t *meta_sp_list_find(sp_ext_node_t *head,
145    sp_ext_offset_t offset);
146static void meta_sp_list_freefill(sp_ext_node_t **extlist,
147    sp_ext_length_t size);
148static void meta_sp_list_dump(sp_ext_node_t *head);
149static int meta_sp_list_overlaps(sp_ext_node_t *head);
150
151/* Extent List Query Functions */
152static boolean_t meta_sp_enough_space(int desired_number_of_sps,
153	blkcnt_t desired_sp_size, sp_ext_node_t **extent_listpp,
154	sp_ext_length_t alignment);
155static boolean_t meta_sp_get_extent_list(mdsetname_t *mdsetnamep,
156	mdname_t *device_mdnamep, sp_ext_node_t **extent_listpp,
157	md_error_t *ep);
158static boolean_t meta_sp_get_extent_list_for_drive(mdsetname_t *mdsetnamep,
159	mddrivename_t *mddrivenamep, sp_ext_node_t **extent_listpp);
160
161
162/* Extent Allocation Functions */
163static void meta_sp_alloc_by_ext(mdsetname_t *sp, mdname_t *np,
164    sp_ext_node_t **extlist, sp_ext_node_t *free_ext,
165    sp_ext_offset_t alloc_offset, sp_ext_length_t alloc_length, uint_t seq);
166static int meta_sp_alloc_by_len(mdsetname_t *sp, mdname_t *np,
167    sp_ext_node_t **extlist, sp_ext_length_t *lp,
168    sp_ext_offset_t last_off, sp_ext_length_t alignment);
169static int meta_sp_alloc_by_list(mdsetname_t *sp, mdname_t *np,
170    sp_ext_node_t **extlist, sp_ext_node_t *oblist);
171
172/* Extent List Population Functions */
173static int meta_sp_extlist_from_namelist(mdsetname_t *sp, mdnamelist_t *spnlp,
174    sp_ext_node_t **extlist, md_error_t *ep);
175static int meta_sp_extlist_from_wm(mdsetname_t *sp, mdname_t *compnp,
176    sp_ext_node_t **extlist, ext_cmpfunc_t compare, md_error_t *ep);
177
178/* Print (metastat) Functions */
179static int meta_sp_short_print(md_sp_t *msp, char *fname, FILE *fp,
180    mdprtopts_t options, md_error_t *ep);
181static char *meta_sp_status_to_name(xsp_status_t xsp_status, uint_t tstate);
182static int meta_sp_report(mdsetname_t *sp, md_sp_t *msp, mdnamelist_t **nlpp,
183    char *fname, FILE *fp, mdprtopts_t options, md_error_t *ep);
184
185/* Watermark Manipulation Functions */
186static int meta_sp_update_wm(mdsetname_t *sp, md_sp_t *msp,
187    sp_ext_node_t *extlist, md_error_t *ep);
188static int meta_sp_clear_wm(mdsetname_t *sp, md_sp_t *msp, md_error_t *ep);
189static int meta_sp_read_wm(mdsetname_t *sp, mdname_t *compnp,
190    mp_watermark_t *wm, sp_ext_offset_t offset,  md_error_t *ep);
191static diskaddr_t meta_sp_get_start(mdsetname_t *sp, mdname_t *compnp,
192    md_error_t *ep);
193
194/* Unit Structure Manipulation Functions */
195static void meta_sp_fillextarray(mp_unit_t *mp, sp_ext_node_t *extlist);
196static mp_unit_t *meta_sp_createunit(mdname_t *np, mdname_t *compnp,
197    sp_ext_node_t *extlist, int numexts, sp_ext_length_t len,
198    sp_status_t status, md_error_t *ep);
199static mp_unit_t *meta_sp_updateunit(mdname_t *np,  mp_unit_t *old_un,
200    sp_ext_node_t *extlist, sp_ext_length_t grow_len, int numexts,
201    md_error_t *ep);
202static int meta_create_sp(mdsetname_t *sp, md_sp_t *msp, sp_ext_node_t *oblist,
203    mdcmdopts_t options, sp_ext_length_t alignment, md_error_t *ep);
204static int meta_check_sp(mdsetname_t *sp, md_sp_t *msp, mdcmdopts_t options,
205    int *repart_options, md_error_t *ep);
206
207/* Reset (metaclear) Functions */
208static int meta_sp_reset_common(mdsetname_t *sp, mdname_t *np, md_sp_t *msp,
209    md_sp_reset_t reset_params, mdcmdopts_t options, md_error_t *ep);
210
211/* Recovery (metarecover) Functions */
212static void meta_sp_display_exthdr(void);
213static void meta_sp_display_ext(sp_ext_node_t *ext);
214static int meta_sp_checkseq(sp_ext_node_t *extlist);
215static int meta_sp_resolve_name_conflict(mdsetname_t *, mdname_t *,
216    mdname_t **, md_error_t *);
217static int meta_sp_validate_wm(mdsetname_t *sp, mdname_t *np,
218    mdcmdopts_t options, md_error_t *ep);
219static int meta_sp_validate_unit(mdsetname_t *sp, mdname_t *compnp,
220    mdcmdopts_t options, md_error_t *ep);
221static int meta_sp_validate_wm_and_unit(mdsetname_t *sp, mdname_t *np,
222    mdcmdopts_t options, md_error_t *ep);
223static int meta_sp_validate_exts(mdname_t *np, sp_ext_node_t *wmext,
224    sp_ext_node_t *unitext, md_error_t *ep);
225static int meta_sp_recover_from_wm(mdsetname_t *sp, mdname_t *compnp,
226    mdcmdopts_t options, md_error_t *ep);
227static int meta_sp_recover_from_unit(mdsetname_t *sp, mdname_t *np,
228    mdcmdopts_t options, md_error_t *ep);
229
230/*
231 * Private Constants
232 */
233
234static const int FORCE_RELOAD_CACHE = 1;
235static const uint_t NO_FLAGS = 0;
236static const sp_ext_offset_t NO_OFFSET = 0ULL;
237static const uint_t NO_SEQUENCE_NUMBER = 0;
238static const int ONE_SOFT_PARTITION = 1;
239
240static unsigned long sp_parent_printed[BT_BITOUL(MD_MAXUNITS)];
241
242#define	TEST_SOFT_PARTITION_NAMEP NULL
243#define	TEST_SETNAMEP NULL
244
245#define	EXCLUDE_WM	(1)
246#define	INCLUDE_WM	(0)
247
248#define	SP_UNALIGNED	(0LL)
249
250/*
251 * **************************************************************************
252 *                          Debugging Functions                             *
253 * **************************************************************************
254 */
255
256/*PRINTFLIKE1*/
257static void
258meta_sp_debug(char *format, ...)
259{
260	static int debug;
261	static int debug_set = 0;
262	va_list ap;
263
264	if (!debug_set) {
265		debug = getenv(META_SP_DEBUG) ? 1 : 0;
266		debug_set = 1;
267	}
268
269	if (debug) {
270		va_start(ap, format);
271		(void) vfprintf(stderr, format, ap);
272		va_end(ap);
273	}
274}
275
276static void
277meta_sp_printunit(mp_unit_t *mp)
278{
279	int i;
280
281	if (mp == NULL)
282		return;
283
284	/* print the common fields we know about */
285	(void) fprintf(stderr, "\tmp->c.un_type: %d\n", mp->c.un_type);
286	(void) fprintf(stderr, "\tmp->c.un_size: %u\n", mp->c.un_size);
287	(void) fprintf(stderr, "\tmp->c.un_self_id: %lu\n", MD_SID(mp));
288
289	/* sp-specific fields */
290	(void) fprintf(stderr, "\tmp->un_status: %u\n", mp->un_status);
291	(void) fprintf(stderr, "\tmp->un_numexts: %u\n", mp->un_numexts);
292	(void) fprintf(stderr, "\tmp->un_length: %llu\n", mp->un_length);
293	(void) fprintf(stderr, "\tmp->un_dev(32): 0x%llx\n", mp->un_dev);
294	(void) fprintf(stderr, "\tmp->un_dev(64): 0x%llx\n", mp->un_dev);
295	(void) fprintf(stderr, "\tmp->un_key: %d\n", mp->un_key);
296
297	/* print extent information */
298	(void) fprintf(stderr, "\tExt#\tvoff\t\tpoff\t\tLen\n");
299	for (i = 0; i < mp->un_numexts; i++) {
300		(void) fprintf(stderr, "\t%d\t%llu\t\t%llu\t\t%llu\n", i,
301		    mp->un_ext[i].un_voff, mp->un_ext[i].un_poff,
302		    mp->un_ext[i].un_len);
303	}
304}
305
306/*
307 * FUNCTION:    meta_sp_parsesize()
308 * INPUT:       s       - the string to parse
309 * OUTPUT:      *szp    - disk block count (0 for "all")
310 * RETURNS:     -1 for error, 0 for success
311 * PURPOSE:     parses the command line parameter that specifies the
312 *              requested size of a soft partition.  The input string
313 *              is either the literal "all" or a numeric value
314 *              followed by a single character, b for disk blocks, k
315 *              for kilobytes, m for megabytes, g for gigabytes, or t
316 *              for terabytes.  p for petabytes and e for exabytes
317 *              have been added as undocumented features for future
318 *              expansion.  For example, 100m is 100 megabytes, while
319 *              50g is 50 gigabytes.  All values are rounded up to the
320 *              nearest block size.
321 */
322int
323meta_sp_parsesize(char *s, sp_ext_length_t *szp)
324{
325	if (s == NULL || szp == NULL) {
326		return (-1);
327	}
328
329	/* Check for literal "all" */
330	if (strcasecmp(s, "all") == 0) {
331		*szp = 0;
332		return (0);
333	}
334
335	return (meta_sp_parsesizestring(s, szp));
336}
337
338/*
339 * FUNCTION:	meta_sp_parsesizestring()
340 * INPUT:	s	- the string to parse
341 * OUTPUT:	*szp	- disk block count
342 * RETURNS:	-1 for error, 0 for success
343 * PURPOSE:	parses a string that specifies size. The input string is a
344 *		numeric value followed by a single character, b for disk blocks,
345 *		k for kilobytes, m for megabytes, g for gigabytes, or t for
346 *		terabytes.  p for petabytes and e for exabytes have been added
347 *		as undocumented features for future expansion.  For example,
348 *		100m is 100 megabytes, while 50g is 50 gigabytes.  All values
349 *		are rounded up to the nearest block size.
350 */
351static int
352meta_sp_parsesizestring(char *s, sp_ext_length_t *szp)
353{
354	sp_ext_length_t	len = 0;
355	char		len_type[2];
356
357	if (s == NULL || szp == NULL) {
358		return (-1);
359	}
360
361	/*
362	 * make sure block offset does not overflow 2^64 bytes.
363	 */
364	if ((sscanf(s, "%llu%1[BbKkMmGgTt]", &len, len_type) != 2) ||
365	    (len == 0LL) ||
366	    (len > (1LL << (64 - DEV_BSHIFT))))
367		return (-1);
368
369	switch (len_type[0]) {
370	case 'B':
371	case 'b':
372		len = lbtodb(roundup(len * DEV_BSIZE, DEV_BSIZE));
373		break;
374	case 'K':
375	case 'k':
376		len = lbtodb(roundup(len * 1024ULL, DEV_BSIZE));
377		break;
378	case 'M':
379	case 'm':
380		len = lbtodb(roundup(len * 1024ULL*1024ULL, DEV_BSIZE));
381		break;
382	case 'g':
383	case 'G':
384		len = lbtodb(roundup(len * 1024ULL*1024ULL*1024ULL, DEV_BSIZE));
385		break;
386	case 't':
387	case 'T':
388		len = lbtodb(roundup(len * 1024ULL*1024ULL*1024ULL*1024ULL,
389		    DEV_BSIZE));
390		break;
391	case 'p':
392	case 'P':
393		len = lbtodb(roundup(
394		    len * 1024ULL*1024ULL*1024ULL*1024ULL*1024ULL,
395		    DEV_BSIZE));
396		break;
397	case 'e':
398	case 'E':
399		len = lbtodb(roundup(
400		    len * 1024ULL*1024ULL*1024ULL*1024ULL*1024ULL*1024ULL,
401		    DEV_BSIZE));
402		break;
403	default:
404		/* error */
405		return (-1);
406	}
407
408	*szp = len;
409	return (0);
410}
411
412/*
413 * FUNCTION:	meta_sp_setgeom()
414 * INPUT:	np      - the underlying device to setup geometry for
415 *		compnp	- the underlying device to setup geometry for
416 *		mp	- the unit structure to set the geometry for
417 * OUTPUT:	ep	- return error pointer
418 * RETURNS:	int	- -1 if error, 0 otherwise
419 * PURPOSE:	establishes geometry information for a device
420 */
421static int
422meta_sp_setgeom(
423	mdname_t	*np,
424	mdname_t	*compnp,
425	mp_unit_t	*mp,
426	md_error_t	*ep
427)
428{
429	mdgeom_t	*geomp;
430	uint_t		round_cyl = 0;
431
432	if ((geomp = metagetgeom(compnp, ep)) == NULL)
433		return (-1);
434	if (meta_setup_geom((md_unit_t *)mp, np, geomp, geomp->write_reinstruct,
435	    geomp->read_reinstruct, round_cyl, ep) != 0)
436		return (-1);
437
438	return (0);
439}
440
441/*
442 * FUNCTION:	meta_sp_setstatus()
443 * INPUT:	sp	- the set name for the devices to set the status on
444 *		minors	- an array of minor numbers of devices to set status on
445 *		num_units - number of entries in the array
446 *		status	- status value to set all units to
447 * OUTPUT:	ep	- return error pointer
448 * RETURNS:	int	- -1 if error, 0 success
449 * PURPOSE:	sets the status of one or more soft partitions to the
450 *		requested value
451 */
452int
453meta_sp_setstatus(
454	mdsetname_t	*sp,
455	minor_t		*minors,
456	int		num_units,
457	sp_status_t	status,
458	md_error_t	*ep
459)
460{
461	md_sp_statusset_t	status_params;
462
463	assert(minors != NULL);
464
465	/* update status of all soft partitions to the status passed in */
466	(void) memset(&status_params, 0, sizeof (status_params));
467	status_params.num_units = num_units;
468	status_params.new_status = status;
469	status_params.size = num_units * sizeof (minor_t);
470	status_params.minors = (uintptr_t)minors;
471	MD_SETDRIVERNAME(&status_params, MD_SP, sp->setno);
472	if (metaioctl(MD_IOC_SPSTATUS, &status_params, &status_params.mde,
473	    NULL) != 0) {
474		(void) mdstealerror(ep, &status_params.mde);
475		return (-1);
476	}
477	return (0);
478}
479
480/*
481 * FUNCTION:	meta_get_sp_names()
482 * INPUT:	sp	- the set name to get soft partitions from
483 *		options	- options from the command line
484 * OUTPUT:	nlpp	- list of all soft partition names
485 *		ep	- return error pointer
486 * RETURNS:	int	- -1 if error, 0 success
487 * PURPOSE:	returns a list of all soft partitions in the metadb
488 *		for all devices in the specified set
489 */
490int
491meta_get_sp_names(
492	mdsetname_t	*sp,
493	mdnamelist_t	**nlpp,
494	int		options,
495	md_error_t	*ep
496)
497{
498	return (meta_get_names(MD_SP, sp, nlpp, options, ep));
499}
500
501/*
502 * FUNCTION:	meta_get_by_component()
503 * INPUT:	sp	- the set name to get soft partitions from
504 *		compnp	- the name of the device containing the soft
505 *			  partitions that will be returned
506 *		force	- 0 - reads cached namelist if available,
507 *			  1 - reloads cached namelist, frees old namelist
508 * OUTPUT:	nlpp	- list of all soft partition names
509 *		ep	- return error pointer
510 * RETURNS:	int	- -1 error, otherwise the number of soft partitions
511 *			  found on the component (0 = none found).
512 * PURPOSE:	returns a list of all soft partitions on a given device
513 *		from the metadb information
514 */
515static int
516meta_sp_get_by_component(
517	mdsetname_t	*sp,
518	mdname_t	*compnp,
519	mdnamelist_t	**nlpp,
520	int		force,
521	md_error_t	*ep
522)
523{
524	static mdnamelist_t	*cached_list = NULL;	/* cached namelist */
525	static int		cached_count = 0;	/* cached count */
526	mdnamelist_t		*spnlp = NULL;		/* all sp names */
527	mdnamelist_t		*namep;			/* list iterator */
528	mdnamelist_t		**tailpp = nlpp;	/* namelist tail */
529	mdnamelist_t		**cachetailpp;		/* cache tail */
530	md_sp_t			*msp;			/* unit structure */
531	int			count = 0;		/* count of sp's */
532	int			err;
533	mdname_t		*curnp;
534
535	if ((cached_list != NULL) && (!force)) {
536		/* return a copy of the cached list */
537		for (namep = cached_list; namep != NULL; namep = namep->next)
538			tailpp = meta_namelist_append_wrapper(tailpp,
539			    namep->namep);
540		return (cached_count);
541	}
542
543	/* free the cache and reset values to zeros to prepare for a new list */
544	metafreenamelist(cached_list);
545	cached_count = 0;
546	cached_list = NULL;
547	cachetailpp = &cached_list;
548	*nlpp = NULL;
549
550	/* get all the softpartitions first of all */
551	if (meta_get_sp_names(sp, &spnlp, 0, ep) < 0)
552		return (-1);
553
554	/*
555	 * Now for each sp, see if it resides on the component we
556	 * are interested in, if so then add it to our list
557	 */
558	for (namep = spnlp; namep != NULL; namep = namep->next) {
559		curnp = namep->namep;
560
561		/* get the unit structure */
562		if ((msp = meta_get_sp_common(sp, curnp, 0, ep)) == NULL)
563			continue;
564
565		/*
566		 * If the current soft partition is not on the same
567		 * component, continue the search.  If it is on the same
568		 * component, add it to our namelist.
569		 */
570		err = meta_check_samedrive(compnp, msp->compnamep, ep);
571		if (err <= 0) {
572			/* not on the same device, check the next one */
573			continue;
574		}
575
576		/* it's on the same drive */
577
578		/*
579		 * Check for overlapping partitions if the component is not
580		 * a metadevice.
581		 */
582		if (!metaismeta(msp->compnamep)) {
583			/*
584			 * if they're on the same drive, neither
585			 * should be a metadevice if one isn't
586			 */
587			assert(!metaismeta(compnp));
588
589			if (meta_check_overlap(msp->compnamep->cname,
590			    compnp, 0, -1, msp->compnamep, 0, -1, ep) == 0)
591				continue;
592
593			/* in this case it's not an error for them to overlap */
594			mdclrerror(ep);
595		}
596
597		/* Component is on the same device, add to the used list */
598		tailpp = meta_namelist_append_wrapper(tailpp, curnp);
599		cachetailpp = meta_namelist_append_wrapper(cachetailpp,
600		    curnp);
601
602		++count;
603		++cached_count;
604	}
605
606	assert(count == cached_count);
607	return (count);
608
609out:
610	metafreenamelist(*nlpp);
611	*nlpp = NULL;
612	return (-1);
613}
614
615/*
616 * FUNCTION:    meta_sp_get_default_alignment()
617 * INPUT:       sp      - the pertinent set name
618 *              compnp  - the name of the underlying component
619 * OUTPUT:      ep      - return error pointer
620 * RETURNS:     sp_ext_length_t =0: no default alignment
621 *                              >0: default alignment
622 * PURPOSE:     returns the default alignment for soft partitions to
623 *              be built on top of the specified component or
624 *              metadevice
625 */
626static sp_ext_length_t
627meta_sp_get_default_alignment(
628	mdsetname_t	*sp,
629	mdname_t	*compnp,
630	md_error_t	*ep
631)
632{
633	sp_ext_length_t	a = SP_UNALIGNED;
634	char		*mname;
635
636	assert(compnp != NULL);
637
638	/*
639	 * We treat raw devices as opaque, and assume nothing about
640	 * their alignment requirements.
641	 */
642	if (!metaismeta(compnp))
643		return (SP_UNALIGNED);
644
645	/*
646	 * We already know it's a metadevice from the previous test;
647	 * metagetmiscname() will tell us which metadevice type we
648	 * have
649	 */
650	mname = metagetmiscname(compnp, ep);
651	if (mname == NULL)
652		goto out;
653
654	/*
655	 * For a mirror, we want to deal with the stripe that is the
656	 * primary side.  If it happens to be asymmetrically
657	 * configured, there is no simple way to fake a universal
658	 * alignment.  There's a chance that the least common
659	 * denominator of the set of interlaces from all stripes of
660	 * all submirrors would do it, but nobody that really cared
661	 * that much about this issue would create an asymmetric
662	 * config to start with.
663	 *
664	 * If the component underlying the soft partition is a mirror,
665	 * then at the exit of this loop, compnp will have been
666	 * updated to describe the first active submirror.
667	 */
668	if (strcmp(mname, MD_MIRROR) == 0) {
669		md_mirror_t	*mp;
670		int		smi;
671		md_submirror_t	*smp;
672
673		mp = meta_get_mirror(sp, compnp, ep);
674		if (mp == NULL)
675			goto out;
676
677		for (smi = 0; smi < NMIRROR; smi++) {
678
679			smp = &mp->submirrors[smi];
680			if (smp->state == SMS_UNUSED)
681				continue;
682
683			compnp = smp->submirnamep;
684			assert(compnp != NULL);
685
686			mname = metagetmiscname(compnp, ep);
687			if (mname == NULL)
688				goto out;
689
690			break;
691		}
692
693		if (smi == NMIRROR)
694			goto out;
695	}
696
697	/*
698	 * Handle stripes and submirrors identically; just return the
699	 * interlace of the first row.
700	 */
701	if (strcmp(mname, MD_STRIPE) == 0) {
702		md_stripe_t	*stp;
703
704		stp = meta_get_stripe(sp, compnp, ep);
705		if (stp == NULL)
706			goto out;
707
708		a = stp->rows.rows_val[0].interlace;
709		goto out;
710	}
711
712	/*
713	 * Raid is even more straightforward; the interlace applies to
714	 * the entire device.
715	 */
716	if (strcmp(mname, MD_RAID) == 0) {
717		md_raid_t	*rp;
718
719		rp = meta_get_raid(sp, compnp, ep);
720		if (rp == NULL)
721			goto out;
722
723		a = rp->interlace;
724		goto out;
725	}
726
727	/*
728	 * If we have arrived here with the alignment still not set,
729	 * then we expect the error to have been set by one of the
730	 * routines we called.  If neither is the case, something has
731	 * really gone wrong above.  (Probably the submirror walk
732	 * failed to produce a valid submirror, but that would be
733	 * really bad...)
734	 */
735out:
736	meta_sp_debug("meta_sp_get_default_alignment: miscname %s, "
737	    "alignment %lld\n", (mname == NULL) ? "NULL" : mname, a);
738
739	if (getenv(META_SP_DEBUG) && !mdisok(ep)) {
740		mde_perror(ep, NULL);
741	}
742
743	assert((a > 0) || (!mdisok(ep)));
744
745	return (a);
746}
747
748
749
750/*
751 * FUNCTION:	meta_check_insp()
752 * INPUT:	sp	- the set name for the device to check
753 *		np	- the name of the device to check
754 *		slblk	- the starting offset of the device to check
755 *		nblks	- the number of blocks in the device to check
756 * OUTPUT:	ep	- return error pointer
757 * RETURNS:	int	-  0 - device contains soft partitions
758 *			  -1 - device does not contain soft partitions
759 * PURPOSE:	determines whether a device contains any soft partitions
760 */
761/* ARGSUSED */
762int
763meta_check_insp(
764	mdsetname_t	*sp,
765	mdname_t	*np,
766	diskaddr_t	slblk,
767	diskaddr_t	nblks,
768	md_error_t	*ep
769)
770{
771	mdnamelist_t	*spnlp = NULL;	/* soft partition name list */
772	int		count;
773	int		rval;
774
775	/* check set pointer */
776	assert(sp != NULL);
777
778	/*
779	 * Get a list of the soft partitions that currently reside on
780	 * the component.  We should ALWAYS force reload the cache,
781	 * because if we're using the md.tab, we must rebuild
782	 * the list because it won't contain the previous (if any)
783	 * soft partition.
784	 */
785	/* find all soft partitions on the component */
786	count = meta_sp_get_by_component(sp, np, &spnlp, 1, ep);
787
788	if (count == -1) {
789		rval = -1;
790	} else if (count > 0) {
791		rval = mduseerror(ep, MDE_ALREADY, np->dev,
792		    spnlp->namep->cname, np->cname);
793	} else {
794		rval = 0;
795	}
796
797	metafreenamelist(spnlp);
798	return (rval);
799}
800
801/*
802 * **************************************************************************
803 *                    Extent List Manipulation Functions                    *
804 * **************************************************************************
805 */
806
807/*
808 * FUNCTION:	meta_sp_cmp_by_nameseq()
809 * INPUT:	e1	- first node to compare
810 *		e2	- second node to compare
811 * OUTPUT:	none
812 * RETURNS:	int	- =0 - nodes are equal
813 *			  <0 - e1 should go before e2
814 *			  >0 - e1 should go after e2
815 * PURPOSE:	used for sorted list inserts to build a list sorted by
816 *		name first and sequence number second.
817 */
818static int
819meta_sp_cmp_by_nameseq(sp_ext_node_t *e1, sp_ext_node_t *e2)
820{
821	int rval;
822
823	if (e1->ext_namep == NULL)
824		return (1);
825	if (e2->ext_namep == NULL)
826		return (-1);
827	if ((rval = strcmp(e1->ext_namep->cname, e2->ext_namep->cname)) != 0)
828		return (rval);
829
830	/* the names are equal, compare sequence numbers */
831	if (e1->ext_seq > e2->ext_seq)
832		return (1);
833	if (e1->ext_seq < e2->ext_seq)
834		return (-1);
835	/* sequence numbers are also equal */
836	return (0);
837}
838
839/*
840 * FUNCTION:	meta_sp_cmp_by_offset()
841 * INPUT:	e1	- first node to compare
842 *		e2	- second node to compare
843 * OUTPUT:	none
844 * RETURNS:	int	- =0 - nodes are equal
845 *			  <0 - e1 should go before e2
846 *			  >0 - e1 should go after e2
847 * PURPOSE:	used for sorted list inserts to build a list sorted by offset
848 */
849static int
850meta_sp_cmp_by_offset(sp_ext_node_t *e1, sp_ext_node_t *e2)
851{
852	if (e1->ext_offset > e2->ext_offset)
853		return (1);
854	if (e1->ext_offset < e2->ext_offset)
855		return (-1);
856	/* offsets are equal */
857	return (0);
858}
859
860/*
861 * FUNCTION:	meta_sp_list_insert()
862 * INPUT:	sp	- the set name for the device the node belongs to
863 *		np	- the name of the device the node belongs to
864 *		head	- the head of the list, must be NULL for empty list
865 *		offset	- the physical offset of this extent in sectors
866 *		length	- the length of this extent in sectors
867 *		type	- the type of the extent being inserted
868 *		seq	- the sequence number of the extent being inserted
869 *		flags	- extent flags (eg. whether it needs to be updated)
870 *		compare	- the compare function to use
871 * OUTPUT:	head	- points to the new head if a node was inserted
872 *			  at the beginning
873 * RETURNS:	void
874 * PURPOSE:	inserts an extent node into a sorted doubly linked list.
875 *		The sort order is determined by the compare function.
876 *		Memory is allocated for the node in this function and it
877 *		is up to the caller to free it, possibly using
878 *		meta_sp_list_free().  If a node is inserted at the
879 *		beginning of the list, the head pointer is updated to
880 *		point to the new first node.
881 */
882static void
883meta_sp_list_insert(
884	mdsetname_t	*sp,
885	mdname_t	*np,
886	sp_ext_node_t	**head,
887	sp_ext_offset_t	offset,
888	sp_ext_length_t	length,
889	sp_ext_type_t	type,
890	uint_t		seq,
891	uint_t		flags,
892	ext_cmpfunc_t	compare
893)
894{
895	sp_ext_node_t	*newext;
896	sp_ext_node_t	*curext;
897
898	assert(head != NULL);
899
900	/* Don't bother adding zero length nodes */
901	if (length == 0ULL)
902		return;
903
904	/* allocate and fill in new ext_node */
905	newext = Zalloc(sizeof (sp_ext_node_t));
906
907	newext->ext_offset = offset;
908	newext->ext_length = length;
909	newext->ext_flags = flags;
910	newext->ext_type = type;
911	newext->ext_seq = seq;
912	newext->ext_setp = sp;
913	newext->ext_namep = np;
914
915	/* first node in the list */
916	if (*head == NULL) {
917		newext->ext_next = newext->ext_prev = NULL;
918		*head = newext;
919	} else if ((*compare)(*head, newext) >= 0) {
920		/* the first node has a bigger offset, so insert before it */
921		assert((*head)->ext_prev == NULL);
922
923		newext->ext_prev = NULL;
924		newext->ext_next = *head;
925		(*head)->ext_prev = newext;
926		*head = newext;
927	} else {
928		/*
929		 * find the next node whose offset is greater than
930		 * the one we want to insert, or the end of the list.
931		 */
932		for (curext = *head;
933		    (curext->ext_next != NULL) &&
934		    ((*compare)(curext->ext_next, newext) < 0);
935		    (curext = curext->ext_next))
936			;
937
938		/* link the new node in after the current node */
939		newext->ext_next = curext->ext_next;
940		newext->ext_prev = curext;
941
942		if (curext->ext_next != NULL)
943			curext->ext_next->ext_prev = newext;
944
945		curext->ext_next = newext;
946	}
947}
948
949/*
950 * FUNCTION:	meta_sp_list_free()
951 * INPUT:	head	- the head of the list, must be NULL for empty list
952 * OUTPUT:	head	- points to NULL on return
953 * RETURNS:	void
954 * PURPOSE:	walks a double linked extent list and frees each node
955 */
956static void
957meta_sp_list_free(sp_ext_node_t **head)
958{
959	sp_ext_node_t	*ext;
960	sp_ext_node_t	*next;
961
962	assert(head != NULL);
963
964	ext = *head;
965	while (ext) {
966		next = ext->ext_next;
967		Free(ext);
968		ext = next;
969	}
970	*head = NULL;
971}
972
973/*
974 * FUNCTION:	meta_sp_list_remove()
975 * INPUT:	head	- the head of the list, must be NULL for empty list
976 *		ext	- the extent to remove, must be a member of the list
977 * OUTPUT:	head	- points to the new head of the list
978 * RETURNS:	void
979 * PURPOSE:	unlinks the node specified by ext from the list and
980 *		frees it, possibly moving the head pointer forward if
981 *		the head is the node being removed.
982 */
983static void
984meta_sp_list_remove(sp_ext_node_t **head, sp_ext_node_t *ext)
985{
986	assert(head != NULL);
987	assert(*head != NULL);
988
989	if (*head == ext)
990		*head = ext->ext_next;
991
992	if (ext->ext_prev != NULL)
993		ext->ext_prev->ext_next = ext->ext_next;
994	if (ext->ext_next != NULL)
995		ext->ext_next->ext_prev = ext->ext_prev;
996	Free(ext);
997}
998
999/*
1000 * FUNCTION:	meta_sp_list_size()
1001 * INPUT:	head	- the head of the list, must be NULL for empty list
1002 *		exttype	- the type of the extents to sum
1003 *		exclude_wm - subtract space for extent headers from total
1004 * OUTPUT:	none
1005 * RETURNS:	sp_ext_length_t	- the sum of all of the lengths
1006 * PURPOSE:	sums the lengths of all extents in the list matching the
1007 *		specified type.  This could be used for computing the
1008 *		amount of free or used space, for example.
1009 */
1010static sp_ext_length_t
1011meta_sp_list_size(sp_ext_node_t *head, sp_ext_type_t exttype, int exclude_wm)
1012{
1013	sp_ext_node_t	*ext;
1014	sp_ext_length_t	size = 0LL;
1015
1016	for (ext = head; ext != NULL; ext = ext->ext_next)
1017		if (ext->ext_type == exttype)
1018			size += ext->ext_length -
1019			    ((exclude_wm) ? MD_SP_WMSIZE : 0);
1020
1021	return (size);
1022}
1023
1024/*
1025 * FUNCTION:	meta_sp_list_find()
1026 * INPUT:	head	- the head of the list, must be NULL for empty list
1027 *		offset	- the offset contained by the node to find
1028 * OUTPUT:	none
1029 * RETURNS:	sp_ext_node_t *	- the node containing the requested offset
1030 *				  or NULL if no such nodes were found.
1031 * PURPOSE:	finds a node in a list containing the requested offset
1032 *		(inclusive).  If multiple nodes contain this offset then
1033 *		only the first will be returned, though typically these
1034 *		lists are managed with non-overlapping nodes.
1035 *
1036 *		*The list MUST be sorted by offset for this function to work.*
1037 */
1038static sp_ext_node_t *
1039meta_sp_list_find(
1040	sp_ext_node_t	*head,
1041	sp_ext_offset_t	offset
1042)
1043{
1044	sp_ext_node_t	*ext;
1045
1046	for (ext = head; ext != NULL; ext = ext->ext_next) {
1047		/* check if the offset lies within this extent */
1048		if ((offset >= ext->ext_offset) &&
1049		    (offset < ext->ext_offset + ext->ext_length)) {
1050			/*
1051			 * the requested extent should always be a
1052			 * subset of an extent in the list.
1053			 */
1054			return (ext);
1055		}
1056	}
1057	return (NULL);
1058}
1059
1060/*
1061 * FUNCTION:	meta_sp_list_freefill()
1062 * INPUT:	head	- the head of the list, must be NULL for empty list
1063 *		size	- the size of the volume this extent list is
1064 *			  representing
1065 * OUTPUT:	head	- the new head of the list
1066 * RETURNS:	void
1067 * PURPOSE:	finds gaps in the extent list and fills them with a free
1068 *		node.  If there is a gap at the beginning the head
1069 *		pointer will be changed to point to the new free node.
1070 *		If there is free space at the end, the last free extent
1071 *		will extend all the way out to the size specified.
1072 *
1073 *		*The list MUST be sorted by offset for this function to work.*
1074 */
1075static void
1076meta_sp_list_freefill(
1077	sp_ext_node_t	**head,
1078	sp_ext_length_t	size
1079)
1080{
1081	sp_ext_node_t	*ext;
1082	sp_ext_offset_t	curoff = 0LL;
1083
1084	for (ext = *head; ext != NULL; ext = ext->ext_next) {
1085		if (curoff < ext->ext_offset)
1086			meta_sp_list_insert(NULL, NULL, head,
1087			    curoff, ext->ext_offset - curoff,
1088			    EXTTYP_FREE, 0, 0, meta_sp_cmp_by_offset);
1089		curoff = ext->ext_offset + ext->ext_length;
1090	}
1091
1092	/* pad inverse list out to the end */
1093	if (curoff < size)
1094		meta_sp_list_insert(NULL, NULL, head, curoff, size - curoff,
1095		    EXTTYP_FREE, 0, 0, meta_sp_cmp_by_offset);
1096
1097	if (getenv(META_SP_DEBUG)) {
1098		meta_sp_debug("meta_sp_list_freefill: Extent list with "
1099		    "holes freefilled:\n");
1100		meta_sp_list_dump(*head);
1101	}
1102}
1103
1104/*
1105 * FUNCTION:	meta_sp_list_dump()
1106 * INPUT:	head	- the head of the list, must be NULL for empty list
1107 * OUTPUT:	none
1108 * RETURNS:	void
1109 * PURPOSE:	dumps the entire extent list to stdout for easy debugging
1110 */
1111static void
1112meta_sp_list_dump(sp_ext_node_t *head)
1113{
1114	sp_ext_node_t	*ext;
1115
1116	meta_sp_debug("meta_sp_list_dump: dumping extent list:\n");
1117	meta_sp_debug("%5s %10s %5s %7s %10s %10s %5s %10s %10s\n", "Name",
1118	    "Addr", "Seq#", "Type", "Offset", "Length", "Flags", "Prev",
1119	    "Next");
1120	for (ext = head; ext != NULL; ext = ext->ext_next) {
1121		if (ext->ext_namep != NULL)
1122			meta_sp_debug("%5s", ext->ext_namep->cname);
1123		else
1124			meta_sp_debug("%5s", "NONE");
1125
1126		meta_sp_debug("%10p %5u ", (void *) ext, ext->ext_seq);
1127		switch (ext->ext_type) {
1128		case EXTTYP_ALLOC:
1129			meta_sp_debug("%7s ", "ALLOC");
1130			break;
1131		case EXTTYP_FREE:
1132			meta_sp_debug("%7s ", "FREE");
1133			break;
1134		case EXTTYP_END:
1135			meta_sp_debug("%7s ", "END");
1136			break;
1137		case EXTTYP_RESERVED:
1138			meta_sp_debug("%7s ", "RESV");
1139			break;
1140		default:
1141			meta_sp_debug("%7s ", "INVLD");
1142			break;
1143		}
1144
1145		meta_sp_debug("%10llu %10llu %5u %10p %10p\n",
1146		    ext->ext_offset, ext->ext_length,
1147		    ext->ext_flags, (void *) ext->ext_prev,
1148		    (void *) ext->ext_next);
1149	}
1150	meta_sp_debug("\n");
1151}
1152
1153/*
1154 * FUNCTION:	meta_sp_list_overlaps()
1155 * INPUT:	head	- the head of the list, must be NULL for empty list
1156 * OUTPUT:	none
1157 * RETURNS:	int	- 1 if extents overlap, 0 if ok
1158 * PURPOSE:	checks a list for overlaps.  The list MUST be sorted by
1159 *		offset for this function to work properly.
1160 */
1161static int
1162meta_sp_list_overlaps(sp_ext_node_t *head)
1163{
1164	sp_ext_node_t	*ext;
1165
1166	for (ext = head; ext->ext_next != NULL; ext = ext->ext_next) {
1167		if (ext->ext_offset + ext->ext_length >
1168		    ext->ext_next->ext_offset)
1169			return (1);
1170	}
1171	return (0);
1172}
1173
1174/*
1175 * **************************************************************************
1176 *                        Extent Allocation Functions                       *
1177 * **************************************************************************
1178 */
1179
1180/*
1181 * FUNCTION:	meta_sp_alloc_by_ext()
1182 * INPUT:	sp	- the set name for the device the node belongs to
1183 *		np	- the name of the device the node belongs to
1184 *		head	- the head of the list, must be NULL for empty list
1185 *		free_ext	- the free extent being allocated from
1186 *		alloc_offset	- the offset of the allocation
1187 *		alloc_len	- the length of the allocation
1188 *		seq		- the sequence number of the allocation
1189 * OUTPUT:	head	- the new head pointer
1190 * RETURNS:	void
1191 * PURPOSE:	allocates a portion of the free extent free_ext.  The
1192 *		allocated portion starts at alloc_offset and is
1193 *		alloc_length long.  Both (alloc_offset) and (alloc_offset +
1194 *		alloc_length) must be contained within the free extent.
1195 *
1196 *		The free extent is split into as many as 3 pieces - a
1197 *		free extent containing [ free_offset .. alloc_offset ), an
1198 *		allocated extent containing the range [ alloc_offset ..
1199 *		alloc_end ], and another free extent containing the
1200 *		range ( alloc_end .. free_end ].  If either of the two
1201 *		new free extents would be zero length, they are not created.
1202 *
1203 *		Finally, the original free extent is removed.  All newly
1204 *		created extents have the EXTFLG_UPDATE flag set.
1205 */
1206static void
1207meta_sp_alloc_by_ext(
1208	mdsetname_t	*sp,
1209	mdname_t	*np,
1210	sp_ext_node_t	**head,
1211	sp_ext_node_t	*free_ext,
1212	sp_ext_offset_t	alloc_offset,
1213	sp_ext_length_t	alloc_length,
1214	uint_t		seq
1215)
1216{
1217	sp_ext_offset_t	free_offset = free_ext->ext_offset;
1218	sp_ext_length_t	free_length = free_ext->ext_length;
1219
1220	sp_ext_offset_t	alloc_end = alloc_offset + alloc_length;
1221	sp_ext_offset_t	free_end  = free_offset  + free_length;
1222
1223	/* allocated extent must be a subset of the free extent */
1224	assert(free_offset <= alloc_offset);
1225	assert(free_end >= alloc_end);
1226
1227	meta_sp_list_remove(head, free_ext);
1228
1229	if (free_offset < alloc_offset) {
1230		meta_sp_list_insert(NULL, NULL, head, free_offset,
1231		    (alloc_offset - free_offset), EXTTYP_FREE, 0,
1232		    EXTFLG_UPDATE, meta_sp_cmp_by_offset);
1233	}
1234
1235	if (free_end > alloc_end) {
1236		meta_sp_list_insert(NULL, NULL, head, alloc_end,
1237		    (free_end - alloc_end), EXTTYP_FREE, 0, EXTFLG_UPDATE,
1238		    meta_sp_cmp_by_offset);
1239	}
1240
1241	meta_sp_list_insert(sp, np, head, alloc_offset, alloc_length,
1242	    EXTTYP_ALLOC, seq, EXTFLG_UPDATE, meta_sp_cmp_by_offset);
1243
1244	if (getenv(META_SP_DEBUG)) {
1245		meta_sp_debug("meta_sp_alloc_by_ext: extent list:\n");
1246		meta_sp_list_dump(*head);
1247	}
1248}
1249
1250/*
1251 * FUNCTION:	meta_sp_alloc_by_len()
1252 * INPUT:	sp	- the set name for the device the node belongs to
1253 *		np	- the name of the device the node belongs to
1254 *		head	- the head of the list, must be NULL for empty list
1255 *		*lp	- the requested length to allocate
1256 *		last_off	- the last offset already allocated.
1257 *		alignment	- the desired extent alignmeent
1258 * OUTPUT:	head	- the new head pointer
1259 *		*lp	- the length allocated
1260 * RETURNS:	int	- -1 if error, the number of new extents on success
1261 * PURPOSE:	allocates extents from free space to satisfy the requested
1262 *		length.  If requested length is zero, allocates all
1263 *		remaining free space.  This function provides the meat
1264 *		of the extent allocation algorithm.  Allocation is a
1265 *		three tier process:
1266 *
1267 *		1. If last_off is nonzero and there is free space following
1268 *		   that node, then it is extended to allocate as much of that
1269 *		   free space as possible.  This is useful for metattach.
1270 *		2. If a free extent can be found to satisfy the remaining
1271 *		   requested space, then satisfy the rest of the request
1272 *		   from that extent.
1273 *		3. Start allocating space from any remaining free extents until
1274 *		   the remainder of the request is satisified.
1275 *
1276 *              If alignment is non-zero, then every extent modified
1277 *              or newly allocated will be aligned modulo alignment,
1278 *              with a length that is an integer multiple of
1279 *              alignment.
1280 *
1281 *		The EXTFLG_UPDATE flag is set for all nodes (free and
1282 *		allocated) that require updated watermarks.
1283 *
1284 *		This algorithm may have a negative impact on fragmentation
1285 *		in pathological cases and may be improved if it turns out
1286 *		to be a problem.  This may be exacerbated by particularly
1287 *		large alignments.
1288 *
1289 * NOTE:	It's confusing, so it demands an explanation:
1290 *		- len is used to represent requested data space; it
1291 *		  does not include room for a watermark.  On each full
1292 *		  or partial allocation, len will be decremented by
1293 *		  alloc_len (see next paragraph) until it reaches
1294 *		  zero.
1295 *		- alloc_len is used to represent data space allocated
1296 *		  from a particular extent; it does not include space
1297 *		  for a watermark.  In the rare event that a_length
1298 *		  (see next paragraph) is equal to MD_SP_WMSIZE,
1299 *		  alloc_len will be zero and the resulting MD_SP_WMSIZE
1300 *		  fragment of space will be utterly unusable.
1301 *		- a_length is used to represent all space to be
1302 *		  allocated from a particular extent; it DOES include
1303 *		  space for a watermark.
1304 */
1305static int
1306meta_sp_alloc_by_len(
1307	mdsetname_t	*sp,
1308	mdname_t	*np,
1309	sp_ext_node_t	**head,
1310	sp_ext_length_t	*lp,
1311	sp_ext_offset_t	last_off,
1312	sp_ext_offset_t	alignment
1313)
1314{
1315	sp_ext_node_t	*free_ext;
1316	sp_ext_node_t	*alloc_ext;
1317	uint_t		last_seq = 0;
1318	uint_t		numexts = 0;
1319	sp_ext_length_t	freespace;
1320	sp_ext_length_t	alloc_len;
1321	sp_ext_length_t	len;
1322
1323	/* We're DOA if we can't read *lp */
1324	assert(lp != NULL);
1325	len = *lp;
1326
1327	/*
1328	 * Process the nominal case first: we've been given an actual
1329	 * size argument, rather than the literal "all"
1330	 */
1331
1332	if (len != 0) {
1333
1334		/*
1335		 * Short circuit the check for free space.  This may
1336		 * tell us we have enough space when we really don't
1337		 * because each extent loses space to a watermark, but
1338		 * it will always tell us there isn't enough space
1339		 * correctly.  Worst case we do some extra work.
1340		 */
1341		freespace = meta_sp_list_size(*head, EXTTYP_FREE,
1342		    INCLUDE_WM);
1343
1344		if (freespace < len)
1345			return (-1);
1346
1347		/*
1348		 * First see if we can extend the last extent for an
1349		 * attach.
1350		 */
1351		if (last_off != 0LL) {
1352			int align = 0;
1353
1354			alloc_ext =
1355			    meta_sp_list_find(*head, last_off);
1356			assert(alloc_ext != NULL);
1357
1358			/*
1359			 * The offset test reflects the
1360			 * inclusion of the watermark in the extent
1361			 */
1362			align = (alignment > 0) &&
1363			    (((alloc_ext->ext_offset + MD_SP_WMSIZE) %
1364			    alignment) == 0);
1365
1366			/*
1367			 * If we decided not to align here, we should
1368			 * also reset "alignment" so we don't bother
1369			 * later, either.
1370			 */
1371			if (!align) {
1372				alignment = 0;
1373			}
1374
1375			last_seq = alloc_ext->ext_seq;
1376
1377			free_ext = meta_sp_list_find(*head,
1378			    alloc_ext->ext_offset +
1379			    alloc_ext->ext_length);
1380
1381			/*
1382			 * If a free extent follows our last allocated
1383			 * extent, then remove the last allocated
1384			 * extent and increase the size of the free
1385			 * extent to overlap it, then allocate the
1386			 * total space from the new free extent.
1387			 */
1388			if (free_ext != NULL &&
1389			    free_ext->ext_type == EXTTYP_FREE) {
1390				assert(free_ext->ext_offset ==
1391				    alloc_ext->ext_offset +
1392				    alloc_ext->ext_length);
1393
1394				alloc_len =
1395				    MIN(len, free_ext->ext_length);
1396
1397				if (align && (alloc_len < len)) {
1398					/* No watermark space needed */
1399					alloc_len -= alloc_len % alignment;
1400				}
1401
1402				if (alloc_len > 0) {
1403					free_ext->ext_offset -=
1404					    alloc_ext->ext_length;
1405					free_ext->ext_length +=
1406					    alloc_ext->ext_length;
1407
1408					meta_sp_alloc_by_ext(sp, np, head,
1409					    free_ext, free_ext->ext_offset,
1410					    alloc_ext->ext_length + alloc_len,
1411					    last_seq);
1412
1413					/*
1414					 * now remove the original allocated
1415					 * node.  We may have overlapping
1416					 * extents for a short time before
1417					 * this node is removed.
1418					 */
1419					meta_sp_list_remove(head, alloc_ext);
1420					len -= alloc_len;
1421				}
1422			}
1423			last_seq++;
1424		}
1425
1426		if (len == 0LL)
1427			goto out;
1428
1429		/*
1430		 * Next, see if we can find a single allocation for
1431		 * the remainder.  This may make fragmentation worse
1432		 * in some cases, but there's no good way to allocate
1433		 * that doesn't have a highly fragmented corner case.
1434		 */
1435		for (free_ext = *head; free_ext != NULL;
1436		    free_ext = free_ext->ext_next) {
1437			sp_ext_offset_t	a_offset;
1438			sp_ext_offset_t	a_length;
1439
1440			if (free_ext->ext_type != EXTTYP_FREE)
1441				continue;
1442
1443			/*
1444			 * The length test should include space for
1445			 * the watermark
1446			 */
1447
1448			a_offset = free_ext->ext_offset;
1449			a_length = free_ext->ext_length;
1450
1451			if (alignment > 0) {
1452
1453				/*
1454				 * Shortcut for extents that have been
1455				 * previously added to pad out the
1456				 * data space
1457				 */
1458				if (a_length < alignment) {
1459					continue;
1460				}
1461
1462				/*
1463				 * Round up so the data space begins
1464				 * on a properly aligned boundary.
1465				 */
1466				a_offset += alignment -
1467				    (a_offset % alignment) - MD_SP_WMSIZE;
1468
1469				/*
1470				 * This is only necessary in case the
1471				 * watermark size is ever greater than
1472				 * one.  It'll never happen, of
1473				 * course; we'll get rid of watermarks
1474				 * before we make 'em bigger.
1475				 */
1476				if (a_offset < free_ext->ext_offset) {
1477					a_offset += alignment;
1478				}
1479
1480				/*
1481				 * Adjust the length to account for
1482				 * the space lost above (if any)
1483				 */
1484				a_length -=
1485				    (a_offset - free_ext->ext_offset);
1486			}
1487
1488			if (a_length >= len + MD_SP_WMSIZE) {
1489				meta_sp_alloc_by_ext(sp, np, head,
1490				    free_ext, a_offset,
1491				    len + MD_SP_WMSIZE, last_seq);
1492
1493				len = 0LL;
1494				numexts++;
1495				break;
1496			}
1497		}
1498
1499		if (len == 0LL)
1500			goto out;
1501
1502
1503		/*
1504		 * If the request could not be satisfied by extending
1505		 * the last extent or by a single extent, then put
1506		 * multiple smaller extents together until the request
1507		 * is satisfied.
1508		 */
1509		for (free_ext = *head; (free_ext != NULL) && (len > 0);
1510		    free_ext = free_ext->ext_next) {
1511			sp_ext_offset_t a_offset;
1512			sp_ext_length_t a_length;
1513
1514			if (free_ext->ext_type != EXTTYP_FREE)
1515				continue;
1516
1517			a_offset = free_ext->ext_offset;
1518			a_length = free_ext->ext_length;
1519
1520			if (alignment > 0) {
1521
1522				/*
1523				 * Shortcut for extents that have been
1524				 * previously added to pad out the
1525				 * data space
1526				 */
1527				if (a_length < alignment) {
1528					continue;
1529				}
1530
1531				/*
1532				 * Round up so the data space begins
1533				 * on a properly aligned boundary.
1534				 */
1535				a_offset += alignment -
1536				    (a_offset % alignment) - MD_SP_WMSIZE;
1537
1538				/*
1539				 * This is only necessary in case the
1540				 * watermark size is ever greater than
1541				 * one.  It'll never happen, of
1542				 * course; we'll get rid of watermarks
1543				 * before we make 'em bigger.
1544				 */
1545				if (a_offset < free_ext->ext_offset) {
1546					a_offset += alignment;
1547				}
1548
1549				/*
1550				 * Adjust the length to account for
1551				 * the space lost above (if any)
1552				 */
1553				a_length -=
1554				    (a_offset - free_ext->ext_offset);
1555
1556				/*
1557				 * Adjust the length to be properly
1558				 * aligned if it is NOT to be the
1559				 * last extent in the soft partition.
1560				 */
1561				if ((a_length - MD_SP_WMSIZE) < len)
1562					a_length -=
1563					    (a_length - MD_SP_WMSIZE)
1564					    % alignment;
1565			}
1566
1567			alloc_len = MIN(len, a_length - MD_SP_WMSIZE);
1568			if (alloc_len == 0)
1569				continue;
1570
1571			/*
1572			 * meta_sp_alloc_by_ext() expects the
1573			 * allocation length to include the watermark
1574			 * size, which is why we don't simply pass in
1575			 * alloc_len here.
1576			 */
1577			meta_sp_alloc_by_ext(sp, np, head, free_ext,
1578			    a_offset, MIN(len + MD_SP_WMSIZE, a_length),
1579			    last_seq);
1580
1581			len -= alloc_len;
1582			numexts++;
1583			last_seq++;
1584		}
1585
1586
1587		/*
1588		 * If there was not enough space we can throw it all
1589		 * away since no real work has been done yet.
1590		 */
1591		if (len != 0) {
1592			meta_sp_list_free(head);
1593			return (-1);
1594		}
1595	}
1596
1597	/*
1598	 * Otherwise, the literal "all" was specified: allocate all
1599	 * available free space.  Don't bother with alignment.
1600	 */
1601	else {
1602		/* First, extend the last extent if this is a grow */
1603		if (last_off != 0LL) {
1604			alloc_ext =
1605			    meta_sp_list_find(*head, last_off);
1606			assert(alloc_ext != NULL);
1607
1608			last_seq = alloc_ext->ext_seq;
1609
1610			free_ext = meta_sp_list_find(*head,
1611			    alloc_ext->ext_offset +
1612			    alloc_ext->ext_length);
1613
1614			/*
1615			 * If a free extent follows our last allocated
1616			 * extent, then remove the last allocated
1617			 * extent and increase the size of the free
1618			 * extent to overlap it, then allocate the
1619			 * total space from the new free extent.
1620			 */
1621			if (free_ext != NULL &&
1622			    free_ext->ext_type == EXTTYP_FREE) {
1623				assert(free_ext->ext_offset ==
1624				    alloc_ext->ext_offset +
1625				    alloc_ext->ext_length);
1626
1627				len = alloc_len =
1628				    free_ext->ext_length;
1629
1630				free_ext->ext_offset -=
1631				    alloc_ext->ext_length;
1632				free_ext->ext_length +=
1633				    alloc_ext->ext_length;
1634
1635				meta_sp_alloc_by_ext(sp, np, head,
1636				    free_ext, free_ext->ext_offset,
1637				    alloc_ext->ext_length + alloc_len,
1638				    last_seq);
1639
1640				/*
1641				 * now remove the original allocated
1642				 * node.  We may have overlapping
1643				 * extents for a short time before
1644				 * this node is removed.
1645				 */
1646				meta_sp_list_remove(head, alloc_ext);
1647			}
1648
1649			last_seq++;
1650		}
1651
1652		/* Next, grab all remaining free space */
1653		for (free_ext = *head; free_ext != NULL;
1654		    free_ext = free_ext->ext_next) {
1655
1656			if (free_ext->ext_type == EXTTYP_FREE) {
1657				alloc_len =
1658				    free_ext->ext_length - MD_SP_WMSIZE;
1659				if (alloc_len == 0)
1660					continue;
1661
1662				/*
1663				 * meta_sp_alloc_by_ext() expects the
1664				 * allocation length to include the
1665				 * watermark size, which is why we
1666				 * don't simply pass in alloc_len
1667				 * here.
1668				 */
1669				meta_sp_alloc_by_ext(sp, np, head,
1670				    free_ext, free_ext->ext_offset,
1671				    free_ext->ext_length,
1672				    last_seq);
1673
1674				len += alloc_len;
1675				numexts++;
1676				last_seq++;
1677			}
1678		}
1679	}
1680
1681out:
1682	if (getenv(META_SP_DEBUG)) {
1683		meta_sp_debug("meta_sp_alloc_by_len: Extent list after "
1684		    "allocation:\n");
1685		meta_sp_list_dump(*head);
1686	}
1687
1688	if (*lp == 0) {
1689		*lp = len;
1690
1691		/*
1692		 * Make sure the callers hit a no space error if we
1693		 * didn't actually find anything.
1694		 */
1695		if (len == 0) {
1696			return (-1);
1697		}
1698	}
1699
1700	return (numexts);
1701}
1702
1703/*
1704 * FUNCTION:	meta_sp_alloc_by_list()
1705 * INPUT:	sp	- the set name for the device the node belongs to
1706 *		np	- the name of the device the node belongs to
1707 *		head	- the head of the list, must be NULL for empty list
1708 *		oblist	- an extent list containing requested nodes to allocate
1709 * OUTPUT:	head	- the new head pointer
1710 * RETURNS:	int	- -1 if error, the number of new extents on success
1711 * PURPOSE:	allocates extents from free space to satisfy the requested
1712 *		extent list.  This is primarily used for the -o/-b options
1713 *		where the user may specifically request extents to allocate.
1714 *		Each extent in the oblist must be a subset (inclusive) of a
1715 *		free extent and may not overlap each other.  This
1716 *		function sets the EXTFLG_UPDATE flag for each node that
1717 *		requires a watermark update after allocating.
1718 */
1719static int
1720meta_sp_alloc_by_list(
1721	mdsetname_t	*sp,
1722	mdname_t	*np,
1723	sp_ext_node_t	**head,
1724	sp_ext_node_t	*oblist
1725)
1726{
1727	sp_ext_node_t	*ext;
1728	sp_ext_node_t	*free_ext;
1729	uint_t		numexts = 0;
1730
1731	for (ext = oblist; ext != NULL; ext = ext->ext_next) {
1732
1733		free_ext = meta_sp_list_find(*head,
1734		    ext->ext_offset - MD_SP_WMSIZE);
1735
1736		/* Make sure the allocation is within the free extent */
1737		if ((free_ext == NULL) ||
1738		    (ext->ext_offset + ext->ext_length >
1739		    free_ext->ext_offset + free_ext->ext_length) ||
1740		    (free_ext->ext_type != EXTTYP_FREE))
1741			return (-1);
1742
1743		meta_sp_alloc_by_ext(sp, np, head, free_ext,
1744		    ext->ext_offset - MD_SP_WMSIZE,
1745		    ext->ext_length + MD_SP_WMSIZE, ext->ext_seq);
1746
1747		numexts++;
1748	}
1749
1750	assert(meta_sp_list_overlaps(*head) == 0);
1751
1752	if (getenv(META_SP_DEBUG)) {
1753		meta_sp_debug("meta_sp_alloc_by_list: Extent list after "
1754		    "allocation:\n");
1755		meta_sp_list_dump(*head);
1756	}
1757
1758	return (numexts);
1759}
1760
1761/*
1762 * **************************************************************************
1763 *                     Extent List Population Functions                     *
1764 * **************************************************************************
1765 */
1766
1767/*
1768 * FUNCTION:	meta_sp_extlist_from_namelist()
1769 * INPUT:	sp	- the set name for the device the node belongs to
1770 *		spnplp	- the namelist of soft partitions to build a list from
1771 * OUTPUT:	extlist	- the extent list built from the SPs in the namelist
1772 *		ep	- return error pointer
1773 * RETURNS:	int	- -1 if error, 0 on success
1774 * PURPOSE:	builds an extent list representing the soft partitions
1775 *		specified in the namelist.  Each extent in each soft
1776 *		partition is added to the list with the type EXTTYP_ALLOC.
1777 *		The EXTFLG_UPDATE flag is not set on any nodes.  Each
1778 *		extent in the list includes the space occupied by the
1779 *		watermark, which is not included in the unit structures.
1780 */
1781static int
1782meta_sp_extlist_from_namelist(
1783	mdsetname_t	*sp,
1784	mdnamelist_t	*spnlp,
1785	sp_ext_node_t	**extlist,
1786	md_error_t	*ep
1787)
1788{
1789	int		extn;
1790	md_sp_t		*msp;		/* unit structure of the sp's */
1791	mdnamelist_t	*namep;
1792
1793	assert(sp != NULL);
1794
1795	/*
1796	 * Now go through the soft partitions and add a node to the used
1797	 * list for each allocated extent.
1798	 */
1799	for (namep = spnlp; namep != NULL; namep = namep->next) {
1800		mdname_t	*curnp = namep->namep;
1801
1802		/* get the unit structure */
1803		if ((msp = meta_get_sp_common(sp, curnp, 0, ep)) == NULL)
1804			return (-1);
1805
1806		for (extn = 0; (extn < msp->ext.ext_len); extn++) {
1807			md_sp_ext_t	*extp = &msp->ext.ext_val[extn];
1808
1809			/*
1810			 * subtract from offset and add to the length
1811			 * to account for the watermark, which is not
1812			 * contained in the extents in the unit structure.
1813			 */
1814			meta_sp_list_insert(sp, curnp, extlist,
1815			    extp->poff - MD_SP_WMSIZE, extp->len + MD_SP_WMSIZE,
1816			    EXTTYP_ALLOC, extn, 0, meta_sp_cmp_by_offset);
1817		}
1818	}
1819	return (0);
1820}
1821
1822/*
1823 * FUNCTION:	meta_sp_extlist_from_wm()
1824 * INPUT:	sp	- the set name for the device the node belongs to
1825 *		compnp	- the name of the device to scan watermarks on
1826 * OUTPUT:	extlist	- the extent list built from the SPs in the namelist
1827 *		ep	- return error pointer
1828 * RETURNS:	int	- -1 if error, 0 on success
1829 * PURPOSE:	builds an extent list representing the soft partitions
1830 *		specified in the namelist.  Each extent in each soft
1831 *		partition is added to the list with the type EXTTYP_ALLOC.
1832 *		The EXTFLG_UPDATE flag is not set on any nodes.  Each
1833 *		extent in the list includes the space occupied by the
1834 *		watermark, which is not included in the unit structures.
1835 */
1836static int
1837meta_sp_extlist_from_wm(
1838	mdsetname_t	*sp,
1839	mdname_t	*compnp,
1840	sp_ext_node_t	**extlist,
1841	ext_cmpfunc_t	compare,
1842	md_error_t	*ep
1843)
1844{
1845	mp_watermark_t	wm;
1846	mdname_t	*np = NULL;
1847	mdsetname_t	*spsetp = NULL;
1848	sp_ext_offset_t	cur_off;
1849	md_set_desc	*sd;
1850	int		init = 0;
1851	mdkey_t		key;
1852	minor_t		mnum;
1853
1854	if (!metaislocalset(sp)) {
1855		if ((sd = metaget_setdesc(sp, ep)) == NULL)
1856			return (-1);
1857	}
1858
1859	if ((cur_off = meta_sp_get_start(sp, compnp, ep)) == MD_DISKADDR_ERROR)
1860		return (-1);
1861
1862	for (;;) {
1863		if (meta_sp_read_wm(sp, compnp, &wm, cur_off, ep) != 0) {
1864			return (-1);
1865		}
1866
1867		/* get the set and name pointers */
1868		if (strcmp(wm.wm_setname, MD_SP_LOCALSETNAME) != 0) {
1869			if ((spsetp = metasetname(wm.wm_setname, ep)) == NULL) {
1870				return (-1);
1871			}
1872		}
1873
1874		/*
1875		 * For the MN set, meta_init_make_device needs to
1876		 * be run on all the nodes so the entries for the
1877		 * softpart device name and its comp can be created
1878		 * in the same order in the replica namespace.  If
1879		 * we have it run on mdmn_do_iocset then the mddbs
1880		 * will be out of sync between master node and slave
1881		 * nodes.
1882		 */
1883		if (strcmp(wm.wm_mdname, MD_SP_FREEWMNAME) != 0) {
1884
1885			if (!metaislocalset(sp) && MD_MNSET_DESC(sd)) {
1886				md_mn_msg_addmdname_t	*send_params;
1887				int			result;
1888				md_mn_result_t		*resp = NULL;
1889				int			message_size;
1890
1891				message_size =  sizeof (*send_params) +
1892				    strlen(wm.wm_mdname) + 1;
1893				send_params = Zalloc(message_size);
1894				send_params->addmdname_setno = sp->setno;
1895				(void) strcpy(&send_params->addmdname_name[0],
1896				    wm.wm_mdname);
1897				result = mdmn_send_message(sp->setno,
1898				    MD_MN_MSG_ADDMDNAME,
1899				    MD_MSGF_PANIC_WHEN_INCONSISTENT, 0,
1900				    (char *)send_params, message_size, &resp,
1901				    ep);
1902				Free(send_params);
1903				if (resp != NULL) {
1904					if (resp->mmr_exitval != 0) {
1905						free_result(resp);
1906						return (-1);
1907					}
1908					free_result(resp);
1909				}
1910				if (result != 0)
1911					return (-1);
1912			} else {
1913
1914				if (!is_existing_meta_hsp(sp, wm.wm_mdname)) {
1915					if ((key = meta_init_make_device(&sp,
1916					    wm.wm_mdname, ep)) <= 0) {
1917						return (-1);
1918					}
1919					init = 1;
1920				}
1921			}
1922
1923			np = metaname(&spsetp, wm.wm_mdname, META_DEVICE, ep);
1924			if (np == NULL) {
1925				if (init) {
1926					if (meta_getnmentbykey(sp->setno,
1927					    MD_SIDEWILD, key, NULL, &mnum,
1928					    NULL, ep) != NULL) {
1929						(void) metaioctl(MD_IOCREM_DEV,
1930						    &mnum, ep, NULL);
1931					}
1932					(void) del_self_name(sp, key, ep);
1933				}
1934				return (-1);
1935			}
1936		}
1937
1938		/* insert watermark into extent list */
1939		meta_sp_list_insert(spsetp, np, extlist, cur_off,
1940		    wm.wm_length + MD_SP_WMSIZE, wm.wm_type, wm.wm_seq,
1941		    EXTFLG_UPDATE, compare);
1942
1943		/* if we see the end watermark, we're done */
1944		if (wm.wm_type == EXTTYP_END)
1945			break;
1946
1947		cur_off += wm.wm_length + 1;
1948
1949		/* clear out set and name pointers for next iteration */
1950		np = NULL;
1951		spsetp = NULL;
1952	}
1953
1954	return (0);
1955}
1956
1957/*
1958 * **************************************************************************
1959 *                        Print (metastat) Functions                        *
1960 * **************************************************************************
1961 */
1962
1963/*
1964 * FUNCTION:	meta_sp_short_print()
1965 * INPUT:	msp	- the unit structure to display
1966 *		fp	- the file pointer to send output to
1967 *		options	- print options from the command line processor
1968 * OUTPUT:	ep	- return error pointer
1969 * RETURNS:	int	- -1 if error, 0 on success
1970 * PURPOSE:	display a short report of the soft partition in md.tab
1971 *		form, primarily used for metastat -p.
1972 */
1973static int
1974meta_sp_short_print(
1975	md_sp_t		*msp,
1976	char		*fname,
1977	FILE		*fp,
1978	mdprtopts_t	options,
1979	md_error_t	*ep
1980)
1981{
1982	int	extn;
1983
1984	if (options & PRINT_LARGEDEVICES) {
1985		if ((msp->common.revision & MD_64BIT_META_DEV) == 0)
1986			return (0);
1987	}
1988
1989	if (options & PRINT_FN) {
1990		if ((msp->common.revision & MD_FN_META_DEV) == 0)
1991			return (0);
1992	}
1993
1994	/* print name and -p */
1995	if (fprintf(fp, "%s -p", msp->common.namep->cname) == EOF)
1996		return (mdsyserror(ep, errno, fname));
1997
1998	/* print the component */
1999	/*
2000	 * Always print the full path name
2001	 */
2002	if (fprintf(fp, " %s", msp->compnamep->rname) == EOF)
2003		return (mdsyserror(ep, errno, fname));
2004
2005	/* print out each extent */
2006	for (extn = 0; (extn < msp->ext.ext_len); extn++) {
2007		md_sp_ext_t	*extp = &msp->ext.ext_val[extn];
2008		if (fprintf(fp, " -o %llu -b %llu ", extp->poff,
2009		    extp->len) == EOF)
2010			return (mdsyserror(ep, errno, fname));
2011	}
2012
2013	if (fprintf(fp, "\n") == EOF)
2014		return (mdsyserror(ep, errno, fname));
2015
2016	/* success */
2017	return (0);
2018}
2019
2020/*
2021 * FUNCTION:	meta_sp_status_to_name()
2022 * INPUT:	xsp_status	- the status value to convert to a string
2023 *		tstate		- transient errored device state. If set the
2024 *				  device is Unavailable
2025 * OUTPUT:	none
2026 * RETURNS:	char *	- a pointer to the string representing the status value
2027 * PURPOSE:	return an internationalized string representing the
2028 *		status value for a soft partition.  The strings are
2029 *		strdup'd and must be freed by the caller.
2030 */
2031static char *
2032meta_sp_status_to_name(
2033	xsp_status_t	xsp_status,
2034	uint_t		tstate
2035)
2036{
2037	char *rval = NULL;
2038
2039	/*
2040	 * Check to see if we have MD_INACCESSIBLE set. This is the only valid
2041	 * value for an 'Unavailable' return. tstate can be set because of
2042	 * other multi-node reasons (e.g. ABR being set)
2043	 */
2044	if (tstate & MD_INACCESSIBLE) {
2045		return (Strdup(dgettext(TEXT_DOMAIN, "Unavailable")));
2046	}
2047
2048	switch (xsp_status) {
2049	case MD_SP_CREATEPEND:
2050		rval = Strdup(dgettext(TEXT_DOMAIN, "Creating"));
2051		break;
2052	case MD_SP_GROWPEND:
2053		rval = Strdup(dgettext(TEXT_DOMAIN, "Growing"));
2054		break;
2055	case MD_SP_DELPEND:
2056		rval = Strdup(dgettext(TEXT_DOMAIN, "Deleting"));
2057		break;
2058	case MD_SP_OK:
2059		rval = Strdup(dgettext(TEXT_DOMAIN, "Okay"));
2060		break;
2061	case MD_SP_ERR:
2062		rval = Strdup(dgettext(TEXT_DOMAIN, "Errored"));
2063		break;
2064	case MD_SP_RECOVER:
2065		rval = Strdup(dgettext(TEXT_DOMAIN, "Recovering"));
2066		break;
2067	}
2068
2069	if (rval == NULL)
2070		rval = Strdup(dgettext(TEXT_DOMAIN, "Invalid"));
2071
2072	return (rval);
2073}
2074
2075/*
2076 * FUNCTION:	meta_sp_report()
2077 * INPUT:	sp	- the set name for the unit being displayed
2078 *		msp	- the unit structure to display
2079 *		nlpp	- pass back the large devs
2080 *		fp	- the file pointer to send output to
2081 *		options	- print options from the command line processor
2082 * OUTPUT:	ep	- return error pointer
2083 * RETURNS:	int	- -1 if error, 0 on success
2084 * PURPOSE:	print a full report of the device specified
2085 */
2086static int
2087meta_sp_report(
2088	mdsetname_t	*sp,
2089	md_sp_t		*msp,
2090	mdnamelist_t	**nlpp,
2091	char		*fname,
2092	FILE		*fp,
2093	mdprtopts_t	options,
2094	md_error_t	*ep
2095)
2096{
2097	uint_t		extn;
2098	char		*status;
2099	char		*devid = "";
2100	mdname_t	*didnp = NULL;
2101	ddi_devid_t	dtp;
2102	int		len;
2103	uint_t		tstate = 0;
2104
2105	if (options & PRINT_LARGEDEVICES) {
2106		if ((msp->common.revision & MD_64BIT_META_DEV) == 0) {
2107			return (0);
2108		} else {
2109			if (meta_getdevs(sp, msp->common.namep, nlpp, ep) != 0)
2110				return (-1);
2111		}
2112	}
2113
2114	if (options & PRINT_FN) {
2115		if ((msp->common.revision & MD_FN_META_DEV) == 0) {
2116			return (0);
2117		} else {
2118			if (meta_getdevs(sp, msp->common.namep, nlpp, ep) != 0)
2119				return (-1);
2120		}
2121	}
2122
2123	if (options & PRINT_HEADER) {
2124		if (fprintf(fp, dgettext(TEXT_DOMAIN, "%s: Soft Partition\n"),
2125		    msp->common.namep->cname) == EOF)
2126			return (mdsyserror(ep, errno, fname));
2127	}
2128
2129	if (fprintf(fp, dgettext(TEXT_DOMAIN, "    Device: %s\n"),
2130	    msp->compnamep->cname) == EOF)
2131		return (mdsyserror(ep, errno, fname));
2132
2133	/* Determine if device is available before displaying status */
2134	if (metaismeta(msp->common.namep)) {
2135		if (meta_get_tstate(msp->common.namep->dev, &tstate, ep) != 0)
2136			return (-1);
2137	}
2138	status = meta_sp_status_to_name(msp->status, tstate & MD_DEV_ERRORED);
2139
2140	/* print out "State" to be consistent with other metadevices */
2141	if (tstate & MD_ABR_CAP) {
2142		if (fprintf(fp, dgettext(TEXT_DOMAIN,
2143		    "    State: %s - Application Based Recovery (ABR)\n"),
2144		    status) == EOF) {
2145			Free(status);
2146			return (mdsyserror(ep, errno, fname));
2147		}
2148	} else {
2149		if (fprintf(fp, dgettext(TEXT_DOMAIN,
2150		    "    State: %s\n"), status) == EOF) {
2151			Free(status);
2152			return (mdsyserror(ep, errno, fname));
2153		}
2154	}
2155	free(status);
2156
2157	if (fprintf(fp, dgettext(TEXT_DOMAIN, "    Size: %llu blocks (%s)\n"),
2158	    msp->common.size,
2159	    meta_number_to_string(msp->common.size, DEV_BSIZE)) == EOF)
2160		return (mdsyserror(ep, errno, fname));
2161
2162	/* print component details */
2163	if (! metaismeta(msp->compnamep)) {
2164		diskaddr_t	start_blk;
2165		int		has_mddb;
2166		char		*has_mddb_str;
2167
2168		/* print header */
2169		/*
2170		 * Building a format string on the fly that will
2171		 * be used in (f)printf. This allows the length
2172		 * of the ctd to vary from small to large without
2173		 * looking horrible.
2174		 */
2175		len = strlen(msp->compnamep->cname);
2176		len = max(len, strlen(dgettext(TEXT_DOMAIN, "Device")));
2177		len += 2;
2178		if (fprintf(fp,
2179		    "\t%-*.*s %-12.12s %-5.5s %s\n",
2180		    len, len,
2181		    dgettext(TEXT_DOMAIN, "Device"),
2182		    dgettext(TEXT_DOMAIN, "Start Block"),
2183		    dgettext(TEXT_DOMAIN, "Dbase"),
2184		    dgettext(TEXT_DOMAIN, "Reloc")) == EOF) {
2185			return (mdsyserror(ep, errno, fname));
2186		}
2187
2188
2189		/* get info */
2190		if ((start_blk = meta_sp_get_start(sp, msp->compnamep, ep)) ==
2191		    MD_DISKADDR_ERROR)
2192			return (-1);
2193
2194		if ((has_mddb = metahasmddb(sp, msp->compnamep, ep)) < 0)
2195			return (-1);
2196
2197		if (has_mddb)
2198			has_mddb_str = dgettext(TEXT_DOMAIN, "Yes");
2199		else
2200			has_mddb_str = dgettext(TEXT_DOMAIN, "No");
2201
2202		/* populate the key in the name_p structure */
2203		didnp = metadevname(&sp, msp->compnamep->dev, ep);
2204		if (didnp == NULL) {
2205			return (-1);
2206		}
2207
2208		/* determine if devid does NOT exist */
2209		if (options & PRINT_DEVID) {
2210			if ((dtp = meta_getdidbykey(sp->setno,
2211			    getmyside(sp, ep), didnp->key, ep)) == NULL)
2212				devid = dgettext(TEXT_DOMAIN, "No ");
2213			else {
2214				devid = dgettext(TEXT_DOMAIN, "Yes");
2215				free(dtp);
2216			}
2217		}
2218
2219		/* print info */
2220		/*
2221		 * This allows the length
2222		 * of the ctd to vary from small to large without
2223		 * looking horrible.
2224		 */
2225		if (fprintf(fp, "\t%-*s %8lld     %-5.5s %s\n",
2226		    len, msp->compnamep->cname,
2227		    start_blk, has_mddb_str, devid) == EOF) {
2228			return (mdsyserror(ep, errno, fname));
2229		}
2230		(void) fprintf(fp, "\n");
2231	}
2232
2233
2234	/* print the headers */
2235	if (fprintf(fp, "\t%6.6s %24.24s %24.24s\n",
2236	    dgettext(TEXT_DOMAIN, "Extent"),
2237	    dgettext(TEXT_DOMAIN, "Start Block"),
2238	    dgettext(TEXT_DOMAIN, "Block count")) == EOF)
2239		return (mdsyserror(ep, errno, fname));
2240
2241	/* print out each extent */
2242	for (extn = 0; (extn < msp->ext.ext_len); extn++) {
2243		md_sp_ext_t	*extp = &msp->ext.ext_val[extn];
2244
2245		/* If PRINT_TIMES option is ever supported, add output here */
2246		if (fprintf(fp, "\t%6u %24llu %24llu\n",
2247		    extn, extp->poff, extp->len) == EOF)
2248			return (mdsyserror(ep, errno, fname));
2249	}
2250
2251	/* separate records with a newline */
2252	(void) fprintf(fp, "\n");
2253	return (0);
2254}
2255
2256/*
2257 * FUNCTION:	meta_sp_print()
2258 * INPUT:	sp	- the set name for the unit being displayed
2259 *		np	- the name of the device to print
2260 *		fname	- ??? not used
2261 *		fp	- the file pointer to send output to
2262 *		options	- print options from the command line processor
2263 * OUTPUT:	ep	- return error pointer
2264 * RETURNS:	int	- -1 if error, 0 on success
2265 * PURPOSE:	print a full report of the device specified by metastat.
2266 *		This is the main entry point for printing.
2267 */
2268int
2269meta_sp_print(
2270	mdsetname_t	*sp,
2271	mdname_t	*np,
2272	mdnamelist_t	**nlpp,
2273	char		*fname,
2274	FILE		*fp,
2275	mdprtopts_t	options,
2276	md_error_t	*ep
2277)
2278{
2279	md_sp_t		*msp;
2280	md_unit_t	*mdp;
2281	int		rval = 0;
2282
2283	/* should always have the same set */
2284	assert(sp != NULL);
2285
2286	/* print all the soft partitions */
2287	if (np == NULL) {
2288		mdnamelist_t	*nlp = NULL;
2289		mdnamelist_t	*p;
2290		int		cnt;
2291
2292		if ((cnt = meta_get_sp_names(sp, &nlp, options, ep)) < 0)
2293			return (-1);
2294		else if (cnt == 0)
2295			return (0);
2296
2297		/* recusively print them out */
2298		for (p = nlp; (p != NULL); p = p->next) {
2299			mdname_t	*curnp = p->namep;
2300
2301			/*
2302			 * one problem with the rval of -1 here is that
2303			 * the error gets "lost" when the next device is
2304			 * printed, but we want to print them all anyway.
2305			 */
2306			rval = meta_sp_print(sp, curnp, nlpp, fname, fp,
2307			    options, ep);
2308		}
2309
2310		/* clean up, return success */
2311		metafreenamelist(nlp);
2312		return (rval);
2313	}
2314
2315	/* get the unit structure */
2316	if ((msp = meta_get_sp_common(sp, np,
2317	    ((options & PRINT_FAST) ? 1 : 0), ep)) == NULL)
2318		return (-1);
2319
2320	/* check for parented */
2321	if ((! (options & PRINT_SUBDEVS)) &&
2322	    (MD_HAS_PARENT(msp->common.parent))) {
2323		return (0);
2324	}
2325
2326	/* print appropriate detail */
2327	if (options & PRINT_SHORT) {
2328		if (meta_sp_short_print(msp, fname, fp, options, ep) != 0)
2329			return (-1);
2330	} else {
2331		if (meta_sp_report(sp, msp, nlpp, fname, fp, options, ep) != 0)
2332			return (-1);
2333	}
2334
2335	/*
2336	 * Print underlying metadevices if they are parented to us and
2337	 * if the info for the underlying metadevice has not been printed.
2338	 */
2339	if (metaismeta(msp->compnamep)) {
2340		/* get the unit structure for the subdevice */
2341		if ((mdp = meta_get_mdunit(sp, msp->compnamep, ep)) == NULL)
2342			return (-1);
2343
2344		/* If info not already printed, recurse */
2345		if (!BT_TEST(sp_parent_printed, MD_MIN2UNIT(MD_SID(mdp)))) {
2346			if (meta_print_name(sp, msp->compnamep, nlpp, fname, fp,
2347			    (options | PRINT_HEADER | PRINT_SUBDEVS),
2348			    NULL, ep) != 0) {
2349				return (-1);
2350			}
2351			BT_SET(sp_parent_printed, MD_MIN2UNIT(MD_SID(mdp)));
2352		}
2353	}
2354	return (0);
2355}
2356
2357/*
2358 * **************************************************************************
2359 *                     Watermark Manipulation Functions                     *
2360 * **************************************************************************
2361 */
2362
2363/*
2364 * FUNCTION:	meta_sp_get_start()
2365 * INPUT:	sp	- the operating set
2366 *		np 	- device upon which the sp is being built
2367 * OUTPUT:	ep	- return error pointer
2368 * RETURNS:	daddr_t	- -1 if error, otherwise the start block
2369 * PURPOSE:	Encapsulate the determination of the start block of the
2370 *		device upon which the sp is built or being built.
2371 */
2372static diskaddr_t
2373meta_sp_get_start(
2374	mdsetname_t	*sp,
2375	mdname_t	*np,
2376	md_error_t	*ep
2377)
2378{
2379	daddr_t		start_block;
2380
2381	if ((start_block = metagetstart(sp, np, ep)) != MD_DISKADDR_ERROR)
2382		start_block += MD_SP_START;
2383
2384	return (start_block);
2385}
2386
2387/*
2388 * FUNCTION:	meta_sp_update_wm_common()
2389 * INPUT:	sp	- the operating set
2390 *		msp	- a pointer to the XDR unit structure
2391 *		extlist	- the extent list specifying watermarks to update
2392 *		iocval	- either MD_IOC_SPUPDATEWM or MD_MN_IOC_SPUPDATEWM
2393 * OUTPUT:	ep	- return error pointer
2394 * RETURNS:	int	- -1 if error, 0 on success
2395 * PURPOSE:	steps backwards through the extent list updating
2396 *		watermarks for all extents with the EXTFLG_UPDATE flag
2397 *		set.  Writing the watermarks guarantees consistency when
2398 *		extents must be broken into pieces since the original
2399 *		watermark will be the last to be updated, and will be
2400 *		changed to point to a new watermark that is already
2401 *		known to be consistent.  If one of the writes fails, the
2402 *		original watermark stays intact and none of the changes
2403 *		are realized.
2404 */
2405static int
2406meta_sp_update_wm_common(
2407	mdsetname_t	*sp,
2408	md_sp_t		*msp,
2409	sp_ext_node_t	*extlist,
2410	int		iocval,
2411	md_error_t	*ep
2412)
2413{
2414	sp_ext_node_t	*ext;
2415	sp_ext_node_t	*tail;
2416	mp_watermark_t	*wmp, *watermarks;
2417	xsp_offset_t	*osp, *offsets;
2418	int		update_count = 0;
2419	int		rval = 0;
2420	md_unit_t	*mdp;
2421	md_sp_update_wm_t	update_params;
2422
2423	if (getenv(META_SP_DEBUG)) {
2424		meta_sp_debug("meta_sp_update_wm: Updating watermarks:\n");
2425		meta_sp_list_dump(extlist);
2426	}
2427
2428	/*
2429	 * find the last node so we can write the watermarks backwards
2430	 * and count watermarks to update so we can allocate space
2431	 */
2432	for (ext = extlist; ext != NULL; ext = ext->ext_next) {
2433		if ((ext->ext_flags & EXTFLG_UPDATE) != 0) {
2434			update_count++;
2435		}
2436
2437		if (ext->ext_next == NULL) {
2438			tail = ext;
2439		}
2440	}
2441	ext = tail;
2442
2443	wmp = watermarks =
2444	    Zalloc(update_count * sizeof (mp_watermark_t));
2445	osp = offsets =
2446	    Zalloc(update_count * sizeof (sp_ext_offset_t));
2447
2448	while (ext != NULL) {
2449		if ((ext->ext_flags & EXTFLG_UPDATE) != 0) {
2450			/* update watermark */
2451			wmp->wm_magic = MD_SP_MAGIC;
2452			wmp->wm_version = MD_SP_VERSION;
2453			wmp->wm_type = ext->ext_type;
2454			wmp->wm_seq = ext->ext_seq;
2455			wmp->wm_length = ext->ext_length - MD_SP_WMSIZE;
2456
2457			/* fill in the volume name and set name */
2458			if (ext->ext_namep != NULL)
2459				(void) strcpy(wmp->wm_mdname,
2460				    ext->ext_namep->cname);
2461			else
2462				(void) strcpy(wmp->wm_mdname, MD_SP_FREEWMNAME);
2463			if (ext->ext_setp != NULL &&
2464			    ext->ext_setp->setno != MD_LOCAL_SET)
2465				(void) strcpy(wmp->wm_setname,
2466				    ext->ext_setp->setname);
2467			else
2468				(void) strcpy(wmp->wm_setname,
2469				    MD_SP_LOCALSETNAME);
2470
2471			/* Generate the checksum */
2472			wmp->wm_checksum = 0;
2473			crcgen((uchar_t *)wmp, (uint_t *)&wmp->wm_checksum,
2474			    sizeof (*wmp), NULL);
2475
2476			/* record the extent offset */
2477			*osp = ext->ext_offset;
2478
2479			/* Advance the placeholders */
2480			osp++; wmp++;
2481		}
2482		ext = ext->ext_prev;
2483	}
2484
2485	mdp = meta_get_mdunit(sp, msp->common.namep, ep);
2486	if (mdp == NULL) {
2487		rval = -1;
2488		goto out;
2489	}
2490
2491	(void) memset(&update_params, 0, sizeof (update_params));
2492	update_params.mnum = MD_SID(mdp);
2493	update_params.count = update_count;
2494	update_params.wmp = (uintptr_t)watermarks;
2495	update_params.osp = (uintptr_t)offsets;
2496	MD_SETDRIVERNAME(&update_params, MD_SP,
2497	    MD_MIN2SET(update_params.mnum));
2498
2499	if (metaioctl(iocval, &update_params, &update_params.mde,
2500	    msp->common.namep->cname) != 0) {
2501		(void) mdstealerror(ep, &update_params.mde);
2502		rval = -1;
2503		goto out;
2504	}
2505
2506out:
2507	Free(watermarks);
2508	Free(offsets);
2509
2510	return (rval);
2511}
2512
2513static int
2514meta_sp_update_wm(
2515	mdsetname_t	*sp,
2516	md_sp_t		*msp,
2517	sp_ext_node_t	*extlist,
2518	md_error_t	*ep
2519)
2520{
2521	return (meta_sp_update_wm_common(sp, msp, extlist, MD_IOC_SPUPDATEWM,
2522	    ep));
2523}
2524
2525static int
2526meta_mn_sp_update_wm(
2527	mdsetname_t	*sp,
2528	md_sp_t		*msp,
2529	sp_ext_node_t	*extlist,
2530	md_error_t	*ep
2531)
2532{
2533	return (meta_sp_update_wm_common(sp, msp, extlist, MD_MN_IOC_SPUPDATEWM,
2534	    ep));
2535}
2536
2537/*
2538 * FUNCTION:	meta_sp_clear_wm()
2539 * INPUT:	sp	- the operating set
2540 *		msp	- the unit structure for the soft partition to clear
2541 * OUTPUT:	ep	- return error pointer
2542 * RETURNS:	int	- -1 if error, 0 on success
2543 * PURPOSE:	steps through the extents for a soft partition unit and
2544 *		creates an extent list designed to mark all of the
2545 *		watermarks for those extents as free.  The extent list
2546 *		is then passed to meta_sp_update_wm() to actually write
2547 *		the watermarks out.
2548 */
2549static int
2550meta_sp_clear_wm(
2551	mdsetname_t	*sp,
2552	md_sp_t		*msp,
2553	md_error_t	*ep
2554)
2555{
2556	sp_ext_node_t	*extlist = NULL;
2557	int		numexts = msp->ext.ext_len;
2558	uint_t		i;
2559	int		rval = 0;
2560
2561	/* for each watermark must set the flag to SP_FREE */
2562	for (i = 0; i < numexts; i++) {
2563		md_sp_ext_t	*extp = &msp->ext.ext_val[i];
2564
2565		meta_sp_list_insert(NULL, NULL, &extlist,
2566		    extp->poff - MD_SP_WMSIZE, extp->len + MD_SP_WMSIZE,
2567		    EXTTYP_FREE, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset);
2568	}
2569
2570	/* update watermarks */
2571	rval = meta_sp_update_wm(sp, msp, extlist, ep);
2572
2573	meta_sp_list_free(&extlist);
2574	return (rval);
2575}
2576
2577/*
2578 * FUNCTION:	meta_sp_read_wm()
2579 * INPUT:	sp	- setname for component
2580 *		compnp	- mdname_t for component
2581 *		offset	- the offset of the watermark to read (sectors)
2582 * OUTPUT:	wm	- the watermark structure to read into
2583 *		ep	- return error pointer
2584 * RETURNS:	int	- -1 if error, 0 on success
2585 * PURPOSE:	seeks out to the requested offset and reads a watermark.
2586 *		It then verifies that the magic number is correct and
2587 *		that the checksum is valid, returning an error if either
2588 *		is wrong.
2589 */
2590static int
2591meta_sp_read_wm(
2592	mdsetname_t	*sp,
2593	mdname_t	*compnp,
2594	mp_watermark_t	*wm,
2595	sp_ext_offset_t	offset,
2596	md_error_t	*ep
2597)
2598{
2599	md_sp_read_wm_t	read_params;
2600
2601	/*
2602	 * make sure block offset does not overflow 2^64 bytes and it's a
2603	 * multiple of the block size.
2604	 */
2605	assert(offset <= (1LL << (64 - DEV_BSHIFT)));
2606	/* LINTED */
2607	assert((sizeof (*wm) % DEV_BSIZE) == 0);
2608
2609	(void) memset(wm, 0, sizeof (*wm));
2610
2611	(void) memset(&read_params, 0, sizeof (read_params));
2612	read_params.rdev = compnp->dev;
2613	read_params.wmp = (uintptr_t)wm;
2614	read_params.offset = offset;
2615	MD_SETDRIVERNAME(&read_params, MD_SP, sp->setno);
2616
2617	if (metaioctl(MD_IOC_SPREADWM, &read_params,
2618	    &read_params.mde, compnp->cname) != 0) {
2619
2620		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
2621		    "Extent header read failed, block %llu.\n"), offset);
2622		return (mdstealerror(ep, &read_params.mde));
2623	}
2624
2625	/* make sure magic number is correct */
2626	if (wm->wm_magic != MD_SP_MAGIC) {
2627		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
2628		    "found incorrect magic number %x, expected %x.\n"),
2629		    wm->wm_magic, MD_SP_MAGIC);
2630		/*
2631		 * Pass NULL for the device name as we don't have
2632		 * valid watermark contents.
2633		 */
2634		return (mdmderror(ep, MDE_SP_BADWMMAGIC, 0, NULL));
2635	}
2636
2637	if (crcchk((uchar_t *)wm, (uint_t *)&wm->wm_checksum,
2638	    sizeof (*wm), NULL)) {
2639		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
2640		    "found incorrect checksum %x.\n"),
2641		    wm->wm_checksum);
2642		return (mdmderror(ep, MDE_SP_BADWMCRC, 0, wm->wm_mdname));
2643	}
2644
2645	return (0);
2646}
2647
2648/*
2649 * **************************************************************************
2650 *                  Query Functions
2651 * **************************************************************************
2652 */
2653
2654/*
2655 * IMPORTANT NOTE: This is a static function that assumes that
2656 *		   its input parameters have been checked and
2657 *		   have valid values that lie within acceptable
2658 *		   ranges.
2659 *
2660 * FUNCTION:	meta_sp_enough_space()
2661 * INPUT:	desired_number_of_sps - the number of soft partitions desired;
2662 *					must be > 0
2663 *		desired_sp_size - the desired soft partition size in blocks;
2664 *				  must be > 0
2665 *		extent_listpp - a reference to a reference to an extent
2666 *				list that lists the extents on a device;
2667 *				must be a reference to a reference to a
2668 *				valid extent list
2669 *		alignment - the desired data space alignment for the sp's
2670 * OUTPUT:	boolean_t return value
2671 * RETURNS:	boolean_t - B_TRUE if there's enough space in the extent
2672 *			    list to create the desired soft partitions,
2673 *			    B_FALSE if there's not enough space
2674 * PURPOSE:	determines whether there's enough free space in an extent
2675 *		list to allow creation of a set of soft partitions
2676 */
2677static boolean_t
2678meta_sp_enough_space(
2679	int		desired_number_of_sps,
2680	blkcnt_t	desired_sp_size,
2681	sp_ext_node_t	**extent_listpp,
2682	sp_ext_length_t	alignment
2683)
2684{
2685	boolean_t		enough_space;
2686	int			number_of_sps;
2687	int			number_of_extents_used;
2688	sp_ext_length_t		desired_ext_length = desired_sp_size;
2689
2690	enough_space = B_TRUE;
2691	number_of_sps = 0;
2692	while ((enough_space == B_TRUE) &&
2693	    (number_of_sps < desired_number_of_sps)) {
2694		/*
2695		 * Use the extent allocation algorithm implemented by
2696		 * meta_sp_alloc_by_len() to test whether the free
2697		 * extents in the extent list referenced by *extent_listpp
2698		 * contain enough space to accomodate a soft partition
2699		 * of size desired_ext_length.
2700		 *
2701		 * Repeat the test <desired_number_of_sps> times
2702		 * or until it fails, whichever comes first,
2703		 * each time allocating the extents required to
2704		 * create the soft partition without actually
2705		 * creating the soft partition.
2706		 */
2707		number_of_extents_used = meta_sp_alloc_by_len(
2708		    TEST_SETNAMEP, TEST_SOFT_PARTITION_NAMEP,
2709		    extent_listpp, &desired_ext_length,
2710		    NO_OFFSET, alignment);
2711		if (number_of_extents_used == -1) {
2712			enough_space = B_FALSE;
2713		} else {
2714			number_of_sps++;
2715		}
2716	}
2717	return (enough_space);
2718}
2719
2720/*
2721 * IMPORTANT NOTE: This is a static function that calls other functions
2722 *		   that check its mdsetnamep and device_mdnamep
2723 *		   input parameters, but expects extent_listpp to
2724 *		   be a initialized to a valid address to which
2725 *		   it can write a reference to the extent list that
2726 *		   it creates.
2727 *
2728 * FUNCTION:	meta_sp_get_extent_list()
2729 * INPUT:	mdsetnamep - a reference to the mdsetname_t structure
2730 *			     for the set containing the device for
2731 *			     which the extents are to be listed
2732 *		device_mdnamep - a reference to the mdname_t structure
2733 *				 for the device for which the extents
2734 *				 are to be listed
2735 * OUTPUT:	*extent_listpp - a reference to the extent list for
2736 *				 the device; NULL if the function fails
2737 *		*ep - the libmeta error encountered, if any
2738 * RETURNS:	boolean_t - B_TRUE if the function call was successful,
2739 *			    B_FALSE if not
2740 * PURPOSE:	gets the extent list for a device
2741 */
2742static boolean_t
2743meta_sp_get_extent_list(
2744	mdsetname_t	*mdsetnamep,
2745	mdname_t	*device_mdnamep,
2746	sp_ext_node_t	**extent_listpp,
2747	md_error_t	*ep
2748)
2749{
2750	diskaddr_t		device_size_in_blocks;
2751	mdnamelist_t		*sp_name_listp;
2752	diskaddr_t		start_block_address_in_blocks;
2753
2754	*extent_listpp = NULL;
2755	sp_name_listp = NULL;
2756
2757	start_block_address_in_blocks = meta_sp_get_start(mdsetnamep,
2758	    device_mdnamep, ep);
2759	if (start_block_address_in_blocks == MD_DISKADDR_ERROR) {
2760		if (getenv(META_SP_DEBUG)) {
2761			mde_perror(ep,
2762			    "meta_sp_get_extent_list:meta_sp_get_start");
2763		}
2764		return (B_FALSE);
2765	}
2766
2767	device_size_in_blocks = metagetsize(device_mdnamep, ep);
2768	if (device_size_in_blocks == MD_DISKADDR_ERROR) {
2769		if (getenv(META_SP_DEBUG)) {
2770			mde_perror(ep,
2771			    "meta_sp_get_extent_list:metagetsize");
2772		}
2773		return (B_FALSE);
2774	}
2775
2776	/*
2777	 * Sanity check: the start block will have skipped an integer
2778	 * number of cylinders, C.  C will usually be zero.  If (C > 0),
2779	 * and the disk slice happens to only be C cylinders in total
2780	 * size, we'll fail this check.
2781	 */
2782	if (device_size_in_blocks <=
2783	    (start_block_address_in_blocks + MD_SP_WMSIZE)) {
2784		(void) mdmderror(ep, MDE_SP_NOSPACE, 0, device_mdnamep->cname);
2785		return (B_FALSE);
2786	}
2787
2788	/*
2789	 * After this point, we will have allocated resources, so any
2790	 * failure returns must be through the supplied "fail" label
2791	 * to properly deallocate things.
2792	 */
2793
2794	/*
2795	 * Create an empty extent list that starts one watermark past
2796	 * the start block of the device and ends one watermark before
2797	 * the end of the device.
2798	 */
2799	meta_sp_list_insert(TEST_SETNAMEP, TEST_SOFT_PARTITION_NAMEP,
2800	    extent_listpp, NO_OFFSET,
2801	    (sp_ext_length_t)start_block_address_in_blocks,
2802	    EXTTYP_RESERVED, NO_SEQUENCE_NUMBER, NO_FLAGS,
2803	    meta_sp_cmp_by_offset);
2804	meta_sp_list_insert(TEST_SETNAMEP, TEST_SOFT_PARTITION_NAMEP,
2805	    extent_listpp, (sp_ext_offset_t)(device_size_in_blocks -
2806	    MD_SP_WMSIZE), MD_SP_WMSIZE, EXTTYP_END, NO_SEQUENCE_NUMBER,
2807	    NO_FLAGS, meta_sp_cmp_by_offset);
2808
2809	/*
2810	 * Get the list of soft partitions that are already on the
2811	 * device.
2812	 */
2813	if (meta_sp_get_by_component(mdsetnamep, device_mdnamep,
2814	    &sp_name_listp, FORCE_RELOAD_CACHE, ep) < 1) {
2815		if (getenv(META_SP_DEBUG)) {
2816			mde_perror(ep,
2817			    "meta_sp_get_extent_list:meta_sp_get_by_component");
2818		}
2819		goto fail;
2820	}
2821
2822	if (sp_name_listp != NULL) {
2823		/*
2824		 * If there are soft partitions on the device, add the
2825		 * extents used in them to the extent list.
2826		 */
2827		if (meta_sp_extlist_from_namelist(mdsetnamep, sp_name_listp,
2828		    extent_listpp, ep) == -1) {
2829			if (getenv(META_SP_DEBUG)) {
2830				mde_perror(ep, "meta_sp_get_extent_list:"
2831				    "meta_sp_extlist_from_namelist");
2832			}
2833			goto fail;
2834		}
2835		metafreenamelist(sp_name_listp);
2836	}
2837
2838	/*
2839	 * Add free extents to the extent list to represent
2840	 * the remaining regions of free space on the
2841	 * device.
2842	 */
2843	meta_sp_list_freefill(extent_listpp, device_size_in_blocks);
2844	return (B_TRUE);
2845
2846fail:
2847	if (sp_name_listp != NULL) {
2848		metafreenamelist(sp_name_listp);
2849	}
2850
2851	if (*extent_listpp != NULL) {
2852		/*
2853		 * meta_sp_list_free sets *extent_listpp to NULL.
2854		 */
2855		meta_sp_list_free(extent_listpp);
2856	}
2857	return (B_FALSE);
2858}
2859
2860/*
2861 * IMPORTANT NOTE: This is a static function that calls other functions
2862 *		   that check its mdsetnamep and mddrivenamep
2863 *		   input parameters, but expects extent_listpp to
2864 *		   be a initialized to a valid address to which
2865 *		   it can write a reference to the extent list that
2866 *		   it creates.
2867 *
2868 * FUNCTION:	meta_sp_get_extent_list_for_drive()
2869 * INPUT:	mdsetnamep - a reference to the mdsetname_t structure
2870 *			     for the set containing the drive for
2871 *			     which the extents are to be listed
2872 *		mddrivenamep   - a reference to the mddrivename_t structure
2873 *				 for the drive for which the extents
2874 *				 are to be listed
2875 * OUTPUT:	*extent_listpp - a reference to the extent list for
2876 *				 the drive; NULL if the function fails
2877 * RETURNS:	boolean_t - B_TRUE if the function call was successful,
2878 *			    B_FALSE if not
2879 * PURPOSE:	gets the extent list for a drive when the entire drive
2880 *		is to be soft partitioned
2881 */
2882static boolean_t
2883meta_sp_get_extent_list_for_drive(
2884	mdsetname_t	*mdsetnamep,
2885	mddrivename_t	*mddrivenamep,
2886	sp_ext_node_t	**extent_listpp
2887)
2888{
2889	boolean_t		can_use;
2890	diskaddr_t		free_space;
2891	md_error_t		mderror;
2892	mdvtoc_t		proposed_vtoc;
2893	int			repartition_options;
2894	int			return_value;
2895	md_sp_t			test_sp_struct;
2896
2897	can_use = B_TRUE;
2898	*extent_listpp = NULL;
2899	mderror = mdnullerror;
2900	test_sp_struct.compnamep = metaslicename(mddrivenamep, MD_SLICE0,
2901	    &mderror);
2902	if (test_sp_struct.compnamep == NULL) {
2903		can_use = B_FALSE;
2904	}
2905
2906	if (can_use == B_TRUE) {
2907		mderror = mdnullerror;
2908		repartition_options = 0;
2909		return_value = meta_check_sp(mdsetnamep, &test_sp_struct,
2910		    MDCMD_USE_WHOLE_DISK, &repartition_options, &mderror);
2911		if (return_value != 0) {
2912			can_use = B_FALSE;
2913		}
2914	}
2915
2916	if (can_use == B_TRUE) {
2917		mderror = mdnullerror;
2918		repartition_options = repartition_options |
2919		    (MD_REPART_FORCE | MD_REPART_DONT_LABEL);
2920		return_value = meta_repartition_drive(mdsetnamep, mddrivenamep,
2921		    repartition_options, &proposed_vtoc, &mderror);
2922		if (return_value != 0) {
2923			can_use = B_FALSE;
2924		}
2925	}
2926
2927	if (can_use == B_TRUE) {
2928		free_space = proposed_vtoc.parts[MD_SLICE0].size;
2929		if (free_space <= (MD_SP_START + MD_SP_WMSIZE)) {
2930			can_use = B_FALSE;
2931		}
2932	}
2933
2934	if (can_use == B_TRUE) {
2935		/*
2936		 * Create an extent list that starts with
2937		 * a reserved extent that ends at the start
2938		 * of the usable space on slice zero of the
2939		 * proposed VTOC, ends with an extent that
2940		 * reserves space for a watermark at the end
2941		 * of slice zero, and contains a single free
2942		 * extent that occupies the rest of the space
2943		 * on the slice.
2944		 *
2945		 * NOTE:
2946		 *
2947		 * Don't use metagetstart() or metagetsize() to
2948		 * find the usable space.  They query the mdname_t
2949		 * structure that represents an actual device to
2950		 * determine the amount of space on the device that
2951		 * contains metadata and the total amount of space
2952		 * on the device.  Since this function creates a
2953		 * proposed extent list that doesn't reflect the
2954		 * state of an actual device, there's no mdname_t
2955		 * structure to be queried.
2956		 *
2957		 * When a drive is reformatted to prepare for
2958		 * soft partitioning, all of slice seven is
2959		 * reserved for metadata, all of slice zero is
2960		 * available for soft partitioning, and all other
2961		 * slices on the drive are empty.  The proposed
2962		 * extent list for the drive therefore contains
2963		 * only three extents: a reserved extent that ends
2964		 * at the start of the usable space on slice zero,
2965		 * a single free extent that occupies all the usable
2966		 * space on slice zero, and an ending extent that
2967		 * reserves space for a watermark at the end of
2968		 * slice zero.
2969		 */
2970		meta_sp_list_insert(TEST_SETNAMEP, TEST_SOFT_PARTITION_NAMEP,
2971		    extent_listpp, NO_OFFSET, (sp_ext_length_t)(MD_SP_START),
2972		    EXTTYP_RESERVED, NO_SEQUENCE_NUMBER, NO_FLAGS,
2973		    meta_sp_cmp_by_offset);
2974		meta_sp_list_insert(TEST_SETNAMEP, TEST_SOFT_PARTITION_NAMEP,
2975		    extent_listpp, (sp_ext_offset_t)(free_space - MD_SP_WMSIZE),
2976		    MD_SP_WMSIZE, EXTTYP_END, NO_SEQUENCE_NUMBER, NO_FLAGS,
2977		    meta_sp_cmp_by_offset);
2978		meta_sp_list_freefill(extent_listpp, free_space);
2979	}
2980	return (can_use);
2981}
2982
2983/*
2984 * FUNCTION:	meta_sp_can_create_sps()
2985 * INPUT:	mdsetnamep - a reference to the mdsetname_t structure
2986 *			     for the set containing the device for
2987 *			     which the extents are to be listed
2988 *		mdnamep - a reference to the mdname_t of the device
2989 *			  on which the soft parititions are to be created
2990 *		number_of_sps - the desired number of soft partitions
2991 *		sp_size - the desired soft partition size
2992 * OUTPUT:	boolean_t return value
2993 * RETURNS:	boolean_t - B_TRUE if the soft partitionns can be created,
2994 *			    B_FALSE if not
2995 * PURPOSE:	determines whether a set of soft partitions can be created
2996 *		on a device
2997 */
2998boolean_t
2999meta_sp_can_create_sps(
3000	mdsetname_t	*mdsetnamep,
3001	mdname_t	*mdnamep,
3002	int		number_of_sps,
3003	blkcnt_t	sp_size
3004)
3005{
3006	sp_ext_node_t	*extent_listp;
3007	boolean_t	succeeded;
3008	md_error_t	mde;
3009
3010	if ((number_of_sps > 0) && (sp_size > 0)) {
3011		succeeded = meta_sp_get_extent_list(mdsetnamep, mdnamep,
3012		    &extent_listp, &mde);
3013	} else {
3014		succeeded = B_FALSE;
3015	}
3016
3017	/*
3018	 * We don't really care about an error return from the
3019	 * alignment call; that will just result in passing zero,
3020	 * which will be interpreted as no alignment.
3021	 */
3022
3023	if (succeeded == B_TRUE) {
3024		succeeded = meta_sp_enough_space(number_of_sps,
3025		    sp_size, &extent_listp,
3026		    meta_sp_get_default_alignment(mdsetnamep, mdnamep, &mde));
3027		meta_sp_list_free(&extent_listp);
3028	}
3029	return (succeeded);
3030}
3031
3032/*
3033 * FUNCTION:	meta_sp_can_create_sps_on_drive()
3034 * INPUT:	mdsetnamep - a reference to the mdsetname_t structure
3035 *			     for the set containing the drive for
3036 *			     which the extents are to be listed
3037 *		mddrivenamep - a reference to the mddrivename_t of the drive
3038 *			       on which the soft parititions are to be created
3039 *		number_of_sps - the desired number of soft partitions
3040 *		sp_size - the desired soft partition size
3041 * OUTPUT:	boolean_t return value
3042 * RETURNS:	boolean_t - B_TRUE if the soft partitionns can be created,
3043 *			    B_FALSE if not
3044 * PURPOSE:	determines whether a set of soft partitions can be created
3045 *		on a drive if the entire drive is soft partitioned
3046 */
3047boolean_t
3048meta_sp_can_create_sps_on_drive(
3049	mdsetname_t	*mdsetnamep,
3050	mddrivename_t	*mddrivenamep,
3051	int		number_of_sps,
3052	blkcnt_t	sp_size
3053)
3054{
3055	sp_ext_node_t	*extent_listp;
3056	boolean_t	succeeded;
3057
3058	if ((number_of_sps > 0) && (sp_size > 0)) {
3059		succeeded = meta_sp_get_extent_list_for_drive(mdsetnamep,
3060		    mddrivenamep, &extent_listp);
3061	} else {
3062		succeeded = B_FALSE;
3063	}
3064
3065	/*
3066	 * We don't care about alignment on the space call because
3067	 * we're specifically dealing with a drive, which will have no
3068	 * inherent alignment.
3069	 */
3070
3071	if (succeeded == B_TRUE) {
3072		succeeded = meta_sp_enough_space(number_of_sps, sp_size,
3073		    &extent_listp, SP_UNALIGNED);
3074		meta_sp_list_free(&extent_listp);
3075	}
3076	return (succeeded);
3077}
3078
3079/*
3080 * FUNCTION:	meta_sp_get_free_space()
3081 * INPUT:	mdsetnamep - a reference to the mdsetname_t structure
3082 *			     for the set containing the device for
3083 *			     which the free space is to be returned
3084 *		mdnamep - a reference to the mdname_t of the device
3085 *			  for which the free space is to be returned
3086 * OUTPUT:	blkcnt_t return value
3087 * RETURNS:	blkcnt_t - the number of blocks of free space on the device
3088 * PURPOSE:	returns the number of blocks of free space on a device
3089 */
3090blkcnt_t
3091meta_sp_get_free_space(
3092	mdsetname_t	*mdsetnamep,
3093	mdname_t	*mdnamep
3094)
3095{
3096	sp_ext_node_t		*extent_listp;
3097	sp_ext_length_t		free_blocks;
3098	boolean_t		succeeded;
3099	md_error_t		mde;
3100
3101	extent_listp = NULL;
3102	free_blocks = 0;
3103	succeeded = meta_sp_get_extent_list(mdsetnamep, mdnamep,
3104	    &extent_listp, &mde);
3105	if (succeeded == B_TRUE) {
3106		free_blocks = meta_sp_list_size(extent_listp,
3107		    EXTTYP_FREE, INCLUDE_WM);
3108		meta_sp_list_free(&extent_listp);
3109		if (free_blocks > (10 * MD_SP_WMSIZE)) {
3110			/*
3111			 * Subtract a safety margin for watermarks when
3112			 * computing the number of blocks available for
3113			 * use.  The actual number of watermarks can't
3114			 * be calculated without knowing the exact numbers
3115			 * and sizes of both the free extents and the soft
3116			 * partitions to be created.  The calculation is
3117			 * highly complex and error-prone even if those
3118			 * quantities are known.  The approximate value
3119			 * 10 * MD_SP_WMSIZE is within a few blocks of the
3120			 * correct value in all practical cases.
3121			 */
3122			free_blocks = free_blocks - (10 * MD_SP_WMSIZE);
3123		} else {
3124			free_blocks = 0;
3125		}
3126	} else {
3127		mdclrerror(&mde);
3128	}
3129
3130	return (free_blocks);
3131}
3132
3133/*
3134 * FUNCTION:	meta_sp_get_free_space_on_drive()
3135 * INPUT:	mdsetnamep - a reference to the mdsetname_t structure
3136 *			     for the set containing the drive for
3137 *			     which the free space is to be returned
3138 *		mddrivenamep - a reference to the mddrivename_t of the drive
3139 *			       for which the free space is to be returned
3140 * OUTPUT:	blkcnt_t return value
3141 * RETURNS:	blkcnt_t - the number of blocks of free space on the drive
3142 * PURPOSE:	returns the number of blocks of space usable for soft
3143 *		partitions on an entire drive, if the entire drive is
3144 *		soft partitioned
3145 */
3146blkcnt_t
3147meta_sp_get_free_space_on_drive(
3148	mdsetname_t	*mdsetnamep,
3149	mddrivename_t	*mddrivenamep
3150)
3151{
3152	sp_ext_node_t		*extent_listp;
3153	sp_ext_length_t		free_blocks;
3154	boolean_t		succeeded;
3155
3156	extent_listp = NULL;
3157	free_blocks = 0;
3158	succeeded = meta_sp_get_extent_list_for_drive(mdsetnamep,
3159	    mddrivenamep, &extent_listp);
3160	if (succeeded == B_TRUE) {
3161		free_blocks = meta_sp_list_size(extent_listp,
3162		    EXTTYP_FREE, INCLUDE_WM);
3163		meta_sp_list_free(&extent_listp);
3164		if (free_blocks > (10 * MD_SP_WMSIZE)) {
3165			/*
3166			 * Subtract a safety margin for watermarks when
3167			 * computing the number of blocks available for
3168			 * use.  The actual number of watermarks can't
3169			 * be calculated without knowing the exact numbers
3170			 * and sizes of both the free extents and the soft
3171			 * partitions to be created.  The calculation is
3172			 * highly complex and error-prone even if those
3173			 * quantities are known.  The approximate value
3174			 * 10 * MD_SP_WMSIZE is within a few blocks of the
3175			 * correct value in all practical cases.
3176			 */
3177			free_blocks = free_blocks - (10 * MD_SP_WMSIZE);
3178		} else {
3179			free_blocks = 0;
3180		}
3181	}
3182	return (free_blocks);
3183}
3184
3185/*
3186 * FUNCTION:	meta_sp_get_number_of_possible_sps()
3187 * INPUT:	mdsetnamep - a reference to the mdsetname_t structure
3188 *			     for the set containing the device for
3189 *			     which the number of possible soft partitions
3190 *			     is to be returned
3191 *		mdnamep - a reference to the mdname_t of the device
3192 *			  for which the number of possible soft partitions
3193 *			  is to be returned
3194 * OUTPUT:	int return value
3195 * RETURNS:	int - the number of soft partitions of the desired size
3196 *		      that can be created on the device
3197 * PURPOSE:	returns the number of soft partitions of a given size
3198 *		that can be created on a device
3199 */
3200int
3201meta_sp_get_number_of_possible_sps(
3202	mdsetname_t	*mdsetnamep,
3203	mdname_t	*mdnamep,
3204	blkcnt_t	sp_size
3205)
3206{
3207	sp_ext_node_t	*extent_listp;
3208	int		number_of_possible_sps;
3209	boolean_t	succeeded;
3210	md_error_t	mde;
3211	sp_ext_length_t	alignment;
3212
3213	extent_listp = NULL;
3214	number_of_possible_sps = 0;
3215	if (sp_size > 0) {
3216		if ((succeeded = meta_sp_get_extent_list(mdsetnamep,
3217		    mdnamep, &extent_listp, &mde)) == B_FALSE)
3218			mdclrerror(&mde);
3219	} else {
3220		succeeded = B_FALSE;
3221	}
3222
3223	if (succeeded == B_TRUE) {
3224		alignment = meta_sp_get_default_alignment(mdsetnamep,
3225		    mdnamep, &mde);
3226	}
3227
3228	while (succeeded == B_TRUE) {
3229		/*
3230		 * Keep allocating space from the extent list
3231		 * for soft partitions of the desired size until
3232		 * there's not enough free space left in the list
3233		 * for another soft partiition of that size.
3234		 * Add one to the number of possible soft partitions
3235		 * for each soft partition for which there is
3236		 * enough free space left.
3237		 */
3238		succeeded = meta_sp_enough_space(ONE_SOFT_PARTITION,
3239		    sp_size, &extent_listp, alignment);
3240		if (succeeded == B_TRUE) {
3241			number_of_possible_sps++;
3242		}
3243	}
3244	if (extent_listp != NULL) {
3245		meta_sp_list_free(&extent_listp);
3246	}
3247	return (number_of_possible_sps);
3248}
3249
3250/*
3251 * FUNCTION:	meta_sp_get_number_of_possible_sps_on_drive()
3252 * INPUT:	mdsetnamep - a reference to the mdsetname_t structure
3253 *			     for the set containing the drive for
3254 *			     which the number of possible soft partitions
3255 *			     is to be returned
3256 *		mddrivenamep - a reference to the mddrivename_t of the drive
3257 *			       for which the number of possible soft partitions
3258 *			       is to be returned
3259 *		sp_size - the size in blocks of the proposed soft partitions
3260 * OUTPUT:	int return value
3261 * RETURNS:	int - the number of soft partitions of the desired size
3262 *		      that can be created on the drive
3263 * PURPOSE:	returns the number of soft partitions of a given size
3264 *		that can be created on a drive, if the entire drive is
3265 *		soft partitioned
3266 */
3267int
3268meta_sp_get_number_of_possible_sps_on_drive(
3269	mdsetname_t	*mdsetnamep,
3270	mddrivename_t	*mddrivenamep,
3271	blkcnt_t	sp_size
3272)
3273{
3274	sp_ext_node_t	*extent_listp;
3275	int		number_of_possible_sps;
3276	boolean_t	succeeded;
3277
3278	extent_listp = NULL;
3279	number_of_possible_sps = 0;
3280	if (sp_size > 0) {
3281		succeeded = meta_sp_get_extent_list_for_drive(mdsetnamep,
3282		    mddrivenamep, &extent_listp);
3283	} else {
3284		succeeded = B_FALSE;
3285	}
3286	while (succeeded == B_TRUE) {
3287		/*
3288		 * Keep allocating space from the extent list
3289		 * for soft partitions of the desired size until
3290		 * there's not enough free space left in the list
3291		 * for another soft partition of that size.
3292		 * Add one to the number of possible soft partitions
3293		 * for each soft partition for which there is
3294		 * enough free space left.
3295		 *
3296		 * Since it's a drive, not a metadevice, make no
3297		 * assumptions about alignment.
3298		 */
3299		succeeded = meta_sp_enough_space(ONE_SOFT_PARTITION,
3300		    sp_size, &extent_listp, SP_UNALIGNED);
3301		if (succeeded == B_TRUE) {
3302			number_of_possible_sps++;
3303		}
3304	}
3305	if (extent_listp != NULL) {
3306		meta_sp_list_free(&extent_listp);
3307	}
3308	return (number_of_possible_sps);
3309}
3310
3311/*
3312 * FUNCTION:	meta_sp_get_possible_sp_size()
3313 * INPUT:	mdsetnamep - a reference to the mdsetname_t structure
3314 *			     for the set containing the device for
3315 *			     which the possible soft partition size
3316 *			     is to be returned
3317 *		mdnamep - a reference to the mdname_t of the device
3318 *			  for which the possible soft partition size
3319 *			  is to be returned
3320 *		number_of_sps - the desired number of soft partitions
3321 * OUTPUT:	blkcnt_t return value
3322 * RETURNS:	blkcnt_t - the possible soft partition size in blocks
3323 * PURPOSE:	returns the maximum possible size of each of a given number of
3324 *		soft partitions of equal size that can be created on a device
3325 */
3326blkcnt_t
3327meta_sp_get_possible_sp_size(
3328	mdsetname_t	*mdsetnamep,
3329	mdname_t	*mdnamep,
3330	int		number_of_sps
3331)
3332{
3333	blkcnt_t	free_blocks;
3334	blkcnt_t	sp_size;
3335	boolean_t	succeeded;
3336
3337	sp_size = 0;
3338	if (number_of_sps > 0) {
3339		free_blocks = meta_sp_get_free_space(mdsetnamep, mdnamep);
3340		sp_size = free_blocks / number_of_sps;
3341		succeeded = meta_sp_can_create_sps(mdsetnamep, mdnamep,
3342		    number_of_sps, sp_size);
3343		while ((succeeded == B_FALSE) && (sp_size > 0)) {
3344			/*
3345			 * To compensate for space that may have been
3346			 * occupied by watermarks, reduce sp_size by a
3347			 * number of blocks equal to the number of soft
3348			 * partitions desired, and test again to see
3349			 * whether the desired number of soft partitions
3350			 * can be created.
3351			 */
3352			sp_size = sp_size - ((blkcnt_t)number_of_sps);
3353			succeeded = meta_sp_can_create_sps(mdsetnamep, mdnamep,
3354			    number_of_sps, sp_size);
3355		}
3356		if (sp_size < 0) {
3357			sp_size = 0;
3358		}
3359	}
3360	return (sp_size);
3361}
3362
3363/*
3364 * FUNCTION:	meta_sp_get_possible_sp_size_on_drive()
3365 * INPUT:	mdsetnamep - a reference to the mdsetname_t structure
3366 *			     for the set containing the drive for
3367 *			     which the possible soft partition size
3368 *			     is to be returned
3369 *		mddrivenamep - a reference to the mddrivename_t of the drive
3370 *			       for which the possible soft partition size
3371 *			       is to be returned
3372 *		number_of_sps - the desired number of soft partitions
3373 * OUTPUT:	blkcnt_t return value
3374 * RETURNS:	blkcnt_t - the possible soft partition size in blocks
3375 * PURPOSE:	returns the maximum possible size of each of a given number of
3376 *		soft partitions of equal size that can be created on a drive
3377 *              if the entire drive is soft partitioned
3378 */
3379blkcnt_t
3380meta_sp_get_possible_sp_size_on_drive(
3381	mdsetname_t	*mdsetnamep,
3382	mddrivename_t	*mddrivenamep,
3383	int		number_of_sps
3384)
3385{
3386	blkcnt_t	free_blocks;
3387	blkcnt_t	sp_size;
3388	boolean_t	succeeded;
3389
3390	sp_size = 0;
3391	if (number_of_sps > 0) {
3392		free_blocks = meta_sp_get_free_space_on_drive(mdsetnamep,
3393		    mddrivenamep);
3394		sp_size = free_blocks / number_of_sps;
3395		succeeded = meta_sp_can_create_sps_on_drive(mdsetnamep,
3396		    mddrivenamep, number_of_sps, sp_size);
3397		while ((succeeded == B_FALSE) && (sp_size > 0)) {
3398			/*
3399			 * To compensate for space that may have been
3400			 * occupied by watermarks, reduce sp_size by a
3401			 * number of blocks equal to the number of soft
3402			 * partitions desired, and test again to see
3403			 * whether the desired number of soft partitions
3404			 * can be created.
3405			 */
3406			sp_size = sp_size - ((blkcnt_t)number_of_sps);
3407			succeeded = meta_sp_can_create_sps_on_drive(mdsetnamep,
3408			    mddrivenamep, number_of_sps, sp_size);
3409		}
3410		if (sp_size < 0) {
3411			sp_size = 0;
3412		}
3413	}
3414	return (sp_size);
3415}
3416
3417/*
3418 * **************************************************************************
3419 *                  Unit Structure Manipulation Functions                   *
3420 * **************************************************************************
3421 */
3422
3423/*
3424 * FUNCTION:	meta_sp_fillextarray()
3425 * INPUT:	mp	- the unit structure to fill
3426 *		extlist	- the list of extents to fill with
3427 * OUTPUT:	none
3428 * RETURNS:	void
3429 * PURPOSE:	fills in the unit structure extent list with the extents
3430 *		specified by extlist.  Only extents in extlist with the
3431 *		EXTFLG_UPDATE flag are changed in the unit structure,
3432 *		and the index into the unit structure is the sequence
3433 *		number in the extent list.  After all of the nodes have
3434 *		been updated the virtual offsets in the unit structure
3435 *		are updated to reflect the new lengths.
3436 */
3437static void
3438meta_sp_fillextarray(
3439	mp_unit_t	*mp,
3440	sp_ext_node_t	*extlist
3441)
3442{
3443	int	i;
3444	sp_ext_node_t	*ext;
3445	sp_ext_offset_t	curvoff = 0LL;
3446
3447	assert(mp != NULL);
3448
3449	/* go through the allocation list and fill in our unit structure */
3450	for (ext = extlist; ext != NULL; ext = ext->ext_next) {
3451		if ((ext->ext_type == EXTTYP_ALLOC) &&
3452		    (ext->ext_flags & EXTFLG_UPDATE) != 0) {
3453			mp->un_ext[ext->ext_seq].un_poff =
3454			    ext->ext_offset + MD_SP_WMSIZE;
3455			mp->un_ext[ext->ext_seq].un_len =
3456			    ext->ext_length - MD_SP_WMSIZE;
3457		}
3458	}
3459
3460	for (i = 0; i < mp->un_numexts; i++) {
3461		assert(mp->un_ext[i].un_poff != 0);
3462		assert(mp->un_ext[i].un_len  != 0);
3463		mp->un_ext[i].un_voff = curvoff;
3464		curvoff += mp->un_ext[i].un_len;
3465	}
3466}
3467
3468/*
3469 * FUNCTION:	meta_sp_createunit()
3470 * INPUT:	np	- the name of the device to create a unit structure for
3471 *		compnp	- the name of the device the soft partition is on
3472 *		extlist	- the extent list to populate the new unit with
3473 *		numexts	- the number of extents in the extent list
3474 *		len	- the total size of the soft partition (sectors)
3475 *		status	- the initial status of the unit structure
3476 * OUTPUT:	ep	- return error pointer
3477 * RETURNS:	mp_unit_t * - the new unit structure.
3478 * PURPOSE:	allocates and fills in a new soft partition unit
3479 *		structure to be passed to the soft partitioning driver
3480 *		for creation.
3481 */
3482static mp_unit_t *
3483meta_sp_createunit(
3484	mdname_t	*np,
3485	mdname_t	*compnp,
3486	sp_ext_node_t	*extlist,
3487	int		numexts,
3488	sp_ext_length_t	len,
3489	sp_status_t	status,
3490	md_error_t	*ep
3491)
3492{
3493	mp_unit_t	*mp;
3494	uint_t		ms_size;
3495
3496	ms_size = (sizeof (*mp) - sizeof (mp->un_ext[0])) +
3497	    (numexts * sizeof (mp->un_ext[0]));
3498
3499	mp = Zalloc(ms_size);
3500
3501	/* fill in fields in common unit structure */
3502	mp->c.un_type = MD_METASP;
3503	mp->c.un_size = ms_size;
3504	MD_SID(mp) = meta_getminor(np->dev);
3505	mp->c.un_total_blocks = len;
3506	mp->c.un_actual_tb = len;
3507
3508	/* set up geometry */
3509	(void) meta_sp_setgeom(np, compnp, mp, ep);
3510
3511	/* if we're building on metadevice we can't parent */
3512	if (metaismeta(compnp))
3513		MD_CAPAB(mp) = MD_CANT_PARENT;
3514	else
3515		MD_CAPAB(mp) = MD_CAN_PARENT;
3516
3517	/* fill soft partition-specific fields */
3518	mp->un_dev = compnp->dev;
3519	mp->un_key = compnp->key;
3520
3521	/* mdname_t start_blk field is not 64-bit! */
3522	mp->un_start_blk = (sp_ext_offset_t)compnp->start_blk;
3523	mp->un_status = status;
3524	mp->un_numexts = numexts;
3525	mp->un_length = len;
3526
3527	/* fill in the extent array */
3528	meta_sp_fillextarray(mp, extlist);
3529
3530	return (mp);
3531}
3532
3533/*
3534 * FUNCTION:	meta_sp_updateunit()
3535 * INPUT:	np       - name structure for the metadevice being updated
3536 *		old_un	 - the original unit structure that is being updated
3537 *		extlist	 - the extent list to populate the new unit with
3538 *		grow_len - the amount by which the partition is being grown
3539 *		numexts	 - the number of extents in the extent list
3540 *		ep       - return error pointer
3541 * OUTPUT:	none
3542 * RETURNS:	mp_unit_t * - the updated unit structure
3543 * PURPOSE:	allocates and fills in a new soft partition unit structure to
3544 *		be passed to the soft partitioning driver for creation.  The
3545 *		old unit structure is first copied in, and then the updated
3546 *		extents are changed in the new unit structure.  This is
3547 *		typically used when the size of an existing unit is changed.
3548 */
3549static mp_unit_t *
3550meta_sp_updateunit(
3551	mdname_t	*np,
3552	mp_unit_t	*old_un,
3553	sp_ext_node_t	*extlist,
3554	sp_ext_length_t	grow_len,
3555	int		numexts,
3556	md_error_t	*ep
3557)
3558{
3559	mp_unit_t	*new_un;
3560	sp_ext_length_t	new_len;
3561	uint_t		new_size;
3562
3563	assert(old_un != NULL);
3564	assert(extlist != NULL);
3565
3566	/* allocate new unit structure and copy in old unit */
3567	new_size = (sizeof (*old_un) - sizeof (old_un->un_ext[0])) +
3568	    ((old_un->un_numexts + numexts) * sizeof (old_un->un_ext[0]));
3569	new_len = old_un->un_length + grow_len;
3570	new_un = Zalloc(new_size);
3571	bcopy(old_un, new_un, old_un->c.un_size);
3572
3573	/* update size and geometry information */
3574	new_un->c.un_size = new_size;
3575	new_un->un_length = new_len;
3576	new_un->c.un_total_blocks = new_len;
3577	new_un->c.un_actual_tb = new_len;
3578	if (meta_adjust_geom((md_unit_t *)new_un, np,
3579	    old_un->c.un_wr_reinstruct, old_un->c.un_rd_reinstruct,
3580	    0, ep) != 0) {
3581		Free(new_un);
3582		return (NULL);
3583	}
3584
3585	/* update extent information */
3586	new_un->un_numexts += numexts;
3587
3588	meta_sp_fillextarray(new_un, extlist);
3589
3590	return (new_un);
3591}
3592
3593/*
3594 * FUNCTION:	meta_get_sp()
3595 * INPUT:	sp	- the set name for the device to get
3596 *		np	- the name of the device to get
3597 * OUTPUT:	ep	- return error pointer
3598 * RETURNS:	md_sp_t * - the XDR unit structure for the soft partition
3599 * PURPOSE:	interface to the rest of libmeta for fetching a unit structure
3600 *		for the named device.  Just a wrapper for meta_get_sp_common().
3601 */
3602md_sp_t *
3603meta_get_sp(
3604	mdsetname_t	*sp,
3605	mdname_t	*np,
3606	md_error_t	*ep
3607)
3608{
3609	return (meta_get_sp_common(sp, np, 0, ep));
3610}
3611
3612/*
3613 * FUNCTION:	meta_get_sp_common()
3614 * INPUT:	sp	- the set name for the device to get
3615 *		np	- the name of the device to get
3616 *		fast	- whether to use the cache or not (NOT IMPLEMENTED!)
3617 * OUTPUT:	ep	- return error pointer
3618 * RETURNS:	md_sp_t * - the XDR unit structure for the soft partition,
3619 *			    NULL if np is not a soft partition
3620 * PURPOSE:	common routine for fetching a soft partition unit structure
3621 */
3622md_sp_t *
3623meta_get_sp_common(
3624	mdsetname_t	*sp,
3625	mdname_t	*np,
3626	int		fast,
3627	md_error_t	*ep
3628)
3629{
3630	mddrivename_t	*dnp = np->drivenamep;
3631	char		*miscname;
3632	mp_unit_t	*mp;
3633	md_sp_t		*msp;
3634	int		i;
3635
3636	/* must have set */
3637	assert(sp != NULL);
3638
3639	/* short circuit */
3640	if (dnp->unitp != NULL) {
3641		if (dnp->unitp->type != MD_METASP)
3642			return (NULL);
3643		return ((md_sp_t *)dnp->unitp);
3644	}
3645	/* get miscname and unit */
3646	if ((miscname = metagetmiscname(np, ep)) == NULL)
3647		return (NULL);
3648
3649	if (strcmp(miscname, MD_SP) != 0) {
3650		(void) mdmderror(ep, MDE_NOT_SP, 0, np->cname);
3651		return (NULL);
3652	}
3653
3654	if ((mp = (mp_unit_t *)meta_get_mdunit(sp, np, ep)) == NULL)
3655		return (NULL);
3656
3657	assert(mp->c.un_type == MD_METASP);
3658
3659	/* allocate soft partition */
3660	msp = Zalloc(sizeof (*msp));
3661
3662	/* get the common information */
3663	msp->common.namep = np;
3664	msp->common.type = mp->c.un_type;
3665	msp->common.state = mp->c.un_status;
3666	msp->common.capabilities = mp->c.un_capabilities;
3667	msp->common.parent = mp->c.un_parent;
3668	msp->common.size = mp->c.un_total_blocks;
3669	msp->common.user_flags = mp->c.un_user_flags;
3670	msp->common.revision = mp->c.un_revision;
3671
3672	/* get soft partition information */
3673	if ((msp->compnamep = metakeyname(&sp, mp->un_key, fast, ep)) == NULL)
3674		goto out;
3675
3676	/*
3677	 * Fill in the key and the start block.  Note that the start
3678	 * block in the unit structure is 64 bits but the name pointer
3679	 * only supports 32 bits.
3680	 */
3681	msp->compnamep->key = mp->un_key;
3682	msp->compnamep->start_blk = mp->un_start_blk;
3683
3684	/* fill in status field */
3685	msp->status = mp->un_status;
3686
3687	/* allocate the extents */
3688	msp->ext.ext_val = Zalloc(mp->un_numexts * sizeof (*msp->ext.ext_val));
3689	msp->ext.ext_len = mp->un_numexts;
3690
3691	/* do the extents for this soft partition */
3692	for (i = 0; i < mp->un_numexts; i++) {
3693		struct mp_ext	*mde = &mp->un_ext[i];
3694		md_sp_ext_t	*extp = &msp->ext.ext_val[i];
3695
3696		extp->voff = mde->un_voff;
3697		extp->poff = mde->un_poff;
3698		extp->len = mde->un_len;
3699	}
3700
3701	/* cleanup, return success */
3702	Free(mp);
3703	dnp->unitp = (md_common_t *)msp;
3704	return (msp);
3705
3706out:
3707	/* clean up and return error */
3708	Free(mp);
3709	Free(msp);
3710	return (NULL);
3711}
3712
3713
3714/*
3715 * FUNCTION:	meta_init_sp()
3716 * INPUT:	spp	- the set name for the new device
3717 *		argc	- the remaining argument count for the metainit cmdline
3718 *		argv	- the remainder of the unparsed command line
3719 *		options	- global options parsed by metainit
3720 * OUTPUT:	ep	- return error pointer
3721 * RETURNS:	int	- -1 failure, 0 success
3722 * PURPOSE:	provides the command line parsing and name management overhead
3723 *		for creating a new soft partition.  Ultimately this calls
3724 *		meta_create_sp() which does the real work of allocating space
3725 *		for the new soft partition.
3726 */
3727int
3728meta_init_sp(
3729	mdsetname_t	**spp,
3730	int		argc,
3731	char		*argv[],
3732	mdcmdopts_t	options,
3733	md_error_t	*ep
3734)
3735{
3736	char		*compname = NULL;
3737	mdname_t	*spcompnp = NULL;	/* name of component volume */
3738	char		*devname = argv[0];	/* unit name */
3739	mdname_t	*np = NULL;		/* name of soft partition */
3740	md_sp_t		*msp = NULL;
3741	int		c;
3742	int		old_optind;
3743	sp_ext_length_t	len = 0LL;
3744	int		rval = -1;
3745	uint_t		seq;
3746	int		oflag;
3747	int		failed;
3748	mddrivename_t	*dnp = NULL;
3749	sp_ext_length_t	alignment = 0LL;
3750	sp_ext_node_t	*extlist = NULL;
3751
3752	assert(argc > 0);
3753
3754	/* expect sp name, -p, optional -e, compname, and size parameters */
3755	/* grab soft partition name */
3756	if ((np = metaname(spp, devname, META_DEVICE, ep)) == NULL)
3757		goto out;
3758
3759	/* see if it exists already */
3760	if (metagetmiscname(np, ep) != NULL) {
3761		(void) mdmderror(ep, MDE_UNIT_ALREADY_SETUP,
3762		    meta_getminor(np->dev), devname);
3763		goto out;
3764	} else if (! mdismderror(ep, MDE_UNIT_NOT_SETUP)) {
3765		goto out;
3766	} else {
3767		mdclrerror(ep);
3768	}
3769	--argc, ++argv;
3770
3771	if (argc == 0)
3772		goto syntax;
3773
3774	/* grab -p */
3775	if (strcmp(argv[0], "-p") != 0)
3776		goto syntax;
3777	--argc, ++argv;
3778
3779	if (argc == 0)
3780		goto syntax;
3781
3782	/* see if -e is there */
3783	if (strcmp(argv[0], "-e") == 0) {
3784		/* use the whole disk */
3785		options |= MDCMD_USE_WHOLE_DISK;
3786		--argc, ++argv;
3787	}
3788
3789	if (argc == 0)
3790		goto syntax;
3791
3792	/* get component name */
3793	compname = Strdup(argv[0]);
3794
3795	if (options & MDCMD_USE_WHOLE_DISK) {
3796		if ((dnp = metadrivename(spp, compname, ep)) == NULL) {
3797			goto out;
3798		}
3799		if ((spcompnp = metaslicename(dnp, 0, ep)) == NULL) {
3800			goto out;
3801		}
3802	} else if ((spcompnp = metaname(spp, compname, UNKNOWN, ep)) == NULL) {
3803		goto out;
3804	}
3805	assert(*spp != NULL);
3806
3807	if (!(options & MDCMD_NOLOCK)) {
3808		/* grab set lock */
3809		if (meta_lock(*spp, TRUE, ep))
3810			goto out;
3811
3812		if (meta_check_ownership(*spp, ep) != 0)
3813			goto out;
3814	}
3815
3816	/* allocate the soft partition */
3817	msp = Zalloc(sizeof (*msp));
3818
3819	/* setup common */
3820	msp->common.namep = np;
3821	msp->common.type = MD_METASP;
3822
3823	compname = spcompnp->cname;
3824
3825	assert(spcompnp->rname != NULL);
3826	--argc, ++argv;
3827
3828	if (argc == 0) {
3829		goto syntax;
3830	}
3831
3832	if (*argv[0] == '-') {
3833		/*
3834		 * parse any other command line options, this includes
3835		 * the recovery options -o and -b. The special thing
3836		 * with these options is that the len needs to be
3837		 * kept track of otherwise when the geometry of the
3838		 * "device" is built it will create an invalid geometry
3839		 */
3840		old_optind = optind = 0;
3841		opterr = 0;
3842		oflag = 0;
3843		seq = 0;
3844		failed = 0;
3845		while ((c = getopt(argc, argv, "A:o:b:")) != -1) {
3846			sp_ext_offset_t	offset;
3847			sp_ext_length_t	length;
3848			longlong_t	tmp_size;
3849
3850			switch (c) {
3851			case 'A':	/* data alignment */
3852				if (meta_sp_parsesizestring(optarg,
3853				    &alignment) == -1) {
3854					failed = 1;
3855				}
3856				break;
3857			case 'o':	/* offset in the partition */
3858				if (oflag == 1) {
3859					failed = 1;
3860				} else {
3861					tmp_size = atoll(optarg);
3862					if (tmp_size <= 0) {
3863						failed = 1;
3864					} else {
3865						oflag = 1;
3866						options |= MDCMD_DIRECT;
3867
3868						offset = tmp_size;
3869					}
3870				}
3871
3872				break;
3873			case 'b':	/* number of blocks */
3874				if (oflag == 0) {
3875					failed = 1;
3876				} else {
3877					tmp_size = atoll(optarg);
3878					if (tmp_size <= 0) {
3879						failed = 1;
3880					} else {
3881						oflag = 0;
3882
3883						length = tmp_size;
3884
3885						/* we have a pair of values */
3886						meta_sp_list_insert(*spp, np,
3887						    &extlist, offset, length,
3888						    EXTTYP_ALLOC, seq++,
3889						    EXTFLG_UPDATE,
3890						    meta_sp_cmp_by_offset);
3891						len += length;
3892					}
3893				}
3894
3895				break;
3896			default:
3897				argc -= old_optind;
3898				argv += old_optind;
3899				goto options;
3900			}
3901
3902			if (failed) {
3903				argc -= old_optind;
3904				argv += old_optind;
3905				goto syntax;
3906			}
3907
3908			old_optind = optind;
3909		}
3910		argc -= optind;
3911		argv += optind;
3912
3913		/*
3914		 * Must have matching pairs of -o and -b flags
3915		 */
3916		if (oflag != 0)
3917			goto syntax;
3918
3919		/*
3920		 * Can't specify both layout (indicated indirectly by
3921		 * len being set by thye -o/-b cases above) AND
3922		 * alignment
3923		 */
3924		if ((len > 0LL) && (alignment > 0LL))
3925			goto syntax;
3926
3927		/*
3928		 * sanity check the allocation list
3929		 */
3930		if ((extlist != NULL) && meta_sp_list_overlaps(extlist))
3931			goto syntax;
3932	}
3933
3934	if (len == 0LL) {
3935		if (argc == 0)
3936			goto syntax;
3937		if (meta_sp_parsesize(argv[0], &len) == -1)
3938			goto syntax;
3939		--argc, ++argv;
3940	}
3941
3942	msp->ext.ext_val = Zalloc(sizeof (*msp->ext.ext_val));
3943	msp->ext.ext_val->len = len;
3944	msp->compnamep = spcompnp;
3945
3946	/* we should be at the end */
3947	if (argc != 0)
3948		goto syntax;
3949
3950	/* create soft partition */
3951	if (meta_create_sp(*spp, msp, extlist, options, alignment, ep) != 0)
3952		goto out;
3953	rval = 0;
3954
3955	/* let em know */
3956	if (options & MDCMD_PRINT) {
3957		(void) printf(dgettext(TEXT_DOMAIN,
3958		    "%s: Soft Partition is setup\n"),
3959		    devname);
3960		(void) fflush(stdout);
3961	}
3962	goto out;
3963
3964syntax:
3965	/* syntax error */
3966	rval = meta_cook_syntax(ep, MDE_SYNTAX, compname, argc, argv);
3967	goto out;
3968
3969options:
3970	/* options error */
3971	rval = meta_cook_syntax(ep, MDE_OPTION, compname, argc, argv);
3972	goto out;
3973
3974out:
3975	if (msp != NULL) {
3976		if (msp->ext.ext_val != NULL) {
3977			Free(msp->ext.ext_val);
3978		}
3979		Free(msp);
3980	}
3981
3982	return (rval);
3983}
3984
3985/*
3986 * FUNCTION:	meta_free_sp()
3987 * INPUT:	msp	- the soft partition unit to free
3988 * OUTPUT:	none
3989 * RETURNS:	void
3990 * PURPOSE:	provides an interface from the rest of libmeta for freeing a
3991 *		soft partition unit
3992 */
3993void
3994meta_free_sp(md_sp_t *msp)
3995{
3996	Free(msp);
3997}
3998
3999/*
4000 * FUNCTION:	meta_sp_issp()
4001 * INPUT:	sp	- the set name to check
4002 *		np	- the name to check
4003 * OUTPUT:	ep	- return error pointer
4004 * RETURNS:	int	- 0 means sp,np is a soft partition
4005 *			  1 means sp,np is not a soft partition
4006 * PURPOSE:	determines whether the given device is a soft partition
4007 *		device.  This is called by other metadevice check routines.
4008 */
4009int
4010meta_sp_issp(
4011	mdsetname_t	*sp,
4012	mdname_t	*np,
4013	md_error_t	*ep
4014)
4015{
4016	if (meta_get_sp_common(sp, np, 0, ep) == NULL)
4017		return (1);
4018
4019	return (0);
4020}
4021
4022/*
4023 * FUNCTION:	meta_check_sp()
4024 * INPUT:	sp	- the set name to check
4025 *		msp	- the unit structure to check
4026 *		options	- creation options
4027 * OUTPUT:	repart_options - options to be passed to
4028 *				meta_repartition_drive()
4029 *		ep	- return error pointer
4030 * RETURNS:	int	-  0 ok to create on this component
4031 *			  -1 error or not ok to create on this component
4032 * PURPOSE:	Checks to determine whether the rules for creation of
4033 *		soft partitions allow creation of a soft partition on
4034 *		the device described by the mdname_t structure referred
4035 *		to by msp->compnamep.
4036 *
4037 *		NOTE: Does NOT check to determine whether the extents
4038 *		      described in the md_sp_t structure referred to by
4039 *		      msp will fit on the device described by the mdname_t
4040 *		      structure located at msp->compnamep.
4041 */
4042static int
4043meta_check_sp(
4044	mdsetname_t	*sp,
4045	md_sp_t		*msp,
4046	mdcmdopts_t	options,
4047	int		*repart_options,
4048	md_error_t	*ep
4049)
4050{
4051	md_common_t	*mdp;
4052	mdname_t	*compnp = msp->compnamep;
4053	uint_t		slice;
4054	mddrivename_t	*dnp;
4055	mdname_t	*slicenp;
4056	mdvtoc_t	*vtocp;
4057
4058	/* make sure it is in the set */
4059	if (meta_check_inset(sp, compnp, ep) != 0)
4060		return (-1);
4061
4062	if ((options & MDCMD_USE_WHOLE_DISK) != 0) {
4063		uint_t	rep_slice;
4064
4065		/*
4066		 * check to make sure we can partition this drive.
4067		 * we cannot continue if any of the following are
4068		 * true:
4069		 * The drive is a metadevice.
4070		 * The drive contains a mounted slice.
4071		 * The drive contains a slice being swapped to.
4072		 * The drive contains slices which are part of other
4073		 * metadevices.
4074		 * The drive contains a metadb.
4075		 */
4076		if (metaismeta(compnp))
4077			return (mddeverror(ep, MDE_IS_META, compnp->dev,
4078			    compnp->cname));
4079
4080		assert(compnp->drivenamep != NULL);
4081
4082		/*
4083		 * ensure that we have slice 0 since the disk will be
4084		 * repartitioned in the USE_WHOLE_DISK case.  this check
4085		 * is redundant unless the user incorrectly specifies a
4086		 * a fully qualified drive AND slice name (i.e.,
4087		 * /dev/dsk/cXtXdXsX), which will be incorrectly
4088		 * recognized as a drive name by the metaname code.
4089		 */
4090
4091		if ((vtocp = metagetvtoc(compnp, FALSE, &slice, ep)) == NULL)
4092			return (-1);
4093		if (slice != MD_SLICE0)
4094			return (mderror(ep, MDE_NOT_DRIVENAME, compnp->cname));
4095
4096		dnp = compnp->drivenamep;
4097		if (meta_replicaslice(dnp, &rep_slice, ep) != 0)
4098			return (-1);
4099
4100		for (slice = 0; slice < vtocp->nparts; slice++) {
4101
4102			/* only check if the slice really exists */
4103			if (vtocp->parts[slice].size == 0)
4104				continue;
4105
4106			slicenp = metaslicename(dnp, slice, ep);
4107			if (slicenp == NULL)
4108				return (-1);
4109
4110			/* check to ensure that it is not already in use */
4111			if (meta_check_inuse(sp,
4112			    slicenp, MDCHK_INUSE, ep) != 0) {
4113				return (-1);
4114			}
4115
4116			/*
4117			 * Up to this point, tests are applied to all
4118			 * slices uniformly.
4119			 */
4120
4121			if (slice == rep_slice) {
4122				/*
4123				 * Tests inside the body of this
4124				 * conditional are applied only to
4125				 * slice seven.
4126				 */
4127				if (meta_check_inmeta(sp, slicenp,
4128				    options | MDCHK_ALLOW_MDDB |
4129				    MDCHK_ALLOW_REPSLICE, 0, -1, ep) != 0)
4130					return (-1);
4131
4132				/*
4133				 * For slice seven, a metadb is NOT an
4134				 * automatic failure. It merely means
4135				 * that we're not allowed to muck
4136				 * about with the partitioning of that
4137				 * slice.  We indicate this by masking
4138				 * in the MD_REPART_LEAVE_REP flag.
4139				 */
4140				if (metahasmddb(sp, slicenp, ep)) {
4141					assert(repart_options !=
4142					    NULL);
4143					*repart_options |=
4144					    MD_REPART_LEAVE_REP;
4145				}
4146
4147				/*
4148				 * Skip the remaining tests for slice
4149				 * seven
4150				 */
4151				continue;
4152			}
4153
4154			/*
4155			 * Tests below this point will be applied to
4156			 * all slices EXCEPT for the replica slice.
4157			 */
4158
4159
4160			/* check if component is in a metadevice */
4161			if (meta_check_inmeta(sp, slicenp, options, 0,
4162			    -1, ep) != 0)
4163				return (-1);
4164
4165			/* check to see if component has a metadb */
4166			if (metahasmddb(sp, slicenp, ep))
4167				return (mddeverror(ep, MDE_HAS_MDDB,
4168				    slicenp->dev, slicenp->cname));
4169		}
4170		/*
4171		 * This should be all of the testing necessary when
4172		 * the MDCMD_USE_WHOLE_DISK flag is set; the rest of
4173		 * meta_check_sp() is oriented towards component
4174		 * arguments instead of disks.
4175		 */
4176		goto meta_check_sp_ok;
4177
4178	}
4179
4180	/* check to ensure that it is not already in use */
4181	if (meta_check_inuse(sp, compnp, MDCHK_INUSE, ep) != 0) {
4182		return (-1);
4183	}
4184
4185	if (!metaismeta(compnp)) {	/* handle non-metadevices */
4186
4187		/*
4188		 * The component can have one or more soft partitions on it
4189		 * already, but can't be part of any other type of metadevice,
4190		 * so if it is used for a metadevice, but the metadevice
4191		 * isn't a soft partition, return failure.
4192		 */
4193
4194		if (meta_check_inmeta(sp, compnp, options, 0, -1, ep) != 0 &&
4195		    meta_check_insp(sp, compnp, 0, -1, ep) == 0) {
4196			return (-1);
4197		}
4198	} else {			/* handle metadevices */
4199		/* get underlying unit & check capabilities */
4200		if ((mdp = meta_get_unit(sp, compnp, ep)) == NULL)
4201			return (-1);
4202
4203		if ((! (mdp->capabilities & MD_CAN_PARENT)) ||
4204		    (! (mdp->capabilities & MD_CAN_SP)))
4205			return (mdmderror(ep, MDE_INVAL_UNIT,
4206			    meta_getminor(compnp->dev), compnp->cname));
4207	}
4208
4209meta_check_sp_ok:
4210	mdclrerror(ep);
4211	return (0);
4212}
4213
4214/*
4215 * FUNCTION:	meta_create_sp()
4216 * INPUT:	sp	- the set name to create in
4217 *		msp	- the unit structure to create
4218 *		oblist	- an optional list of requested extents (-o/-b options)
4219 *		options	- creation options
4220 *		alignment - data alignment
4221 * OUTPUT:	ep	- return error pointer
4222 * RETURNS:	int	-  0 success, -1 error
4223 * PURPOSE:	does most of the work for creating a soft partition.  If
4224 *		metainit -p -e was used, first partition the drive.  Then
4225 *		create an extent list based on the existing soft partitions
4226 *		and assume all space not used by them is free.  Storage for
4227 *		the new soft partition is allocated from the free extents
4228 *		based on the length specified on the command line or the
4229 *		oblist passed in.  The unit structure is then committed and
4230 *		the watermarks are updated.  Finally, the status is changed to
4231 *		Okay and the process is complete.
4232 */
4233static int
4234meta_create_sp(
4235	mdsetname_t	*sp,
4236	md_sp_t		*msp,
4237	sp_ext_node_t	*oblist,
4238	mdcmdopts_t	options,
4239	sp_ext_length_t	alignment,
4240	md_error_t	*ep
4241)
4242{
4243	mdname_t	*np = msp->common.namep;
4244	mdname_t	*compnp = msp->compnamep;
4245	mp_unit_t	*mp = NULL;
4246	mdnamelist_t	*keynlp = NULL, *spnlp = NULL;
4247	md_set_params_t	set_params;
4248	int		rval = -1;
4249	diskaddr_t	comp_size;
4250	diskaddr_t	sp_start;
4251	sp_ext_node_t	*extlist = NULL;
4252	int		numexts = 0;	/* number of extents */
4253	int		count = 0;
4254	int		committed = 0;
4255	int		repart_options = MD_REPART_FORCE;
4256	int		create_flag = MD_CRO_32BIT;
4257	int		mn_set_master = 0;
4258
4259	md_set_desc	*sd;
4260	md_set_mmown_params_t	*ownpar = NULL;
4261	int		comp_is_mirror = 0;
4262
4263	/* validate soft partition */
4264	if (meta_check_sp(sp, msp, options, &repart_options, ep) != 0)
4265		return (-1);
4266
4267	if ((options & MDCMD_USE_WHOLE_DISK) != 0) {
4268		if ((options & MDCMD_DOIT) != 0) {
4269			if (meta_repartition_drive(sp,
4270			    compnp->drivenamep,
4271			    repart_options,
4272			    NULL, /* Don't return the VTOC */
4273			    ep) != 0)
4274
4275				return (-1);
4276		} else {
4277			/*
4278			 * If -n and -e are both specified, it doesn't make
4279			 * sense to continue without actually partitioning
4280			 * the drive.
4281			 */
4282			return (0);
4283		}
4284	}
4285
4286	/* populate the start_blk field of the component name */
4287	if ((sp_start = meta_sp_get_start(sp, compnp, ep)) ==
4288	    MD_DISKADDR_ERROR) {
4289		rval = -1;
4290		goto out;
4291	}
4292
4293	if (options & MDCMD_DOIT) {
4294		/* store name in namespace */
4295		if (add_key_name(sp, compnp, &keynlp, ep) != 0) {
4296			rval = -1;
4297			goto out;
4298		}
4299	}
4300
4301	/*
4302	 * Get a list of the soft partitions that currently reside on
4303	 * the component.  We should ALWAYS force reload the cache,
4304	 * because if this is a single creation, there will not BE a
4305	 * cached list, and if we're using the md.tab, we must rebuild
4306	 * the list because it won't contain the previous (if any)
4307	 * soft partition.
4308	 */
4309	count = meta_sp_get_by_component(sp, compnp, &spnlp, 1, ep);
4310	if (count < 0) {
4311		/* error occured */
4312		rval = -1;
4313		goto out;
4314	}
4315
4316	/*
4317	 * get the size of the underlying device.  if the size is smaller
4318	 * than or equal to the watermark size, we know there isn't
4319	 * enough space.
4320	 */
4321	if ((comp_size = metagetsize(compnp, ep)) == MD_DISKADDR_ERROR) {
4322		rval = -1;
4323		goto out;
4324	} else if (comp_size <= MD_SP_WMSIZE) {
4325		(void) mdmderror(ep, MDE_SP_NOSPACE, 0, compnp->cname);
4326		rval = -1;
4327		goto out;
4328	}
4329	/*
4330	 * seed extlist with reserved space at the beginning of the volume and
4331	 * enough space for the end watermark.  The end watermark always gets
4332	 * updated, but if the underlying device changes size it may not be
4333	 * pointed to until the extent before it is updated.  Since the
4334	 * end of the reserved space is where the first watermark starts,
4335	 * the reserved extent should never be marked for updating.
4336	 */
4337
4338	meta_sp_list_insert(NULL, NULL, &extlist,
4339	    0ULL, sp_start, EXTTYP_RESERVED, 0, 0, meta_sp_cmp_by_offset);
4340	meta_sp_list_insert(NULL, NULL, &extlist,
4341	    (sp_ext_offset_t)(comp_size - MD_SP_WMSIZE), MD_SP_WMSIZE,
4342	    EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset);
4343
4344	if (meta_sp_extlist_from_namelist(sp, spnlp, &extlist, ep) == -1) {
4345		rval = -1;
4346		goto out;
4347	}
4348
4349	metafreenamelist(spnlp);
4350
4351	if (getenv(META_SP_DEBUG)) {
4352		meta_sp_debug("meta_create_sp: list of used extents:\n");
4353		meta_sp_list_dump(extlist);
4354	}
4355
4356	meta_sp_list_freefill(&extlist, metagetsize(compnp, ep));
4357
4358	/* get extent list from -o/-b options or from free space */
4359	if (options & MDCMD_DIRECT) {
4360		if (getenv(META_SP_DEBUG)) {
4361			meta_sp_debug("meta_create_sp: Dumping -o/-b list:\n");
4362			meta_sp_list_dump(oblist);
4363		}
4364
4365		numexts = meta_sp_alloc_by_list(sp, np, &extlist, oblist);
4366		if (numexts == -1) {
4367			(void) mdmderror(ep, MDE_SP_OVERLAP, 0, np->cname);
4368			rval = -1;
4369			goto out;
4370		}
4371	} else {
4372		numexts = meta_sp_alloc_by_len(sp, np, &extlist,
4373		    &msp->ext.ext_val->len, 0LL, (alignment > 0) ? alignment :
4374		    meta_sp_get_default_alignment(sp, compnp, ep));
4375		if (numexts == -1) {
4376			(void) mdmderror(ep, MDE_SP_NOSPACE, 0, np->cname);
4377			rval = -1;
4378			goto out;
4379		}
4380	}
4381
4382	assert(extlist != NULL);
4383
4384	/* create soft partition */
4385	mp = meta_sp_createunit(msp->common.namep, msp->compnamep,
4386	    extlist, numexts, msp->ext.ext_val->len, MD_SP_CREATEPEND, ep);
4387
4388	create_flag = meta_check_devicesize(mp->c.un_total_blocks);
4389
4390	/* if we're not doing anything (metainit -n), return success */
4391	if (! (options & MDCMD_DOIT)) {
4392		rval = 0;	/* success */
4393		goto out;
4394	}
4395
4396	(void) memset(&set_params, 0, sizeof (set_params));
4397
4398	if (create_flag == MD_CRO_64BIT) {
4399		mp->c.un_revision |= MD_64BIT_META_DEV;
4400		set_params.options = MD_CRO_64BIT;
4401	} else {
4402		mp->c.un_revision &= ~MD_64BIT_META_DEV;
4403		set_params.options = MD_CRO_32BIT;
4404	}
4405
4406	if (getenv(META_SP_DEBUG)) {
4407		meta_sp_debug("meta_create_sp: printing unit structure\n");
4408		meta_sp_printunit(mp);
4409	}
4410
4411	/*
4412	 * Check to see if we're trying to create a partition on a mirror. If so
4413	 * we may have to enforce an ownership change before writing the
4414	 * watermark out.
4415	 */
4416	if (metaismeta(compnp)) {
4417		char *miscname;
4418
4419		miscname = metagetmiscname(compnp, ep);
4420		if (miscname != NULL)
4421			comp_is_mirror = (strcmp(miscname, MD_MIRROR) == 0);
4422		else
4423			comp_is_mirror = 0;
4424	} else {
4425		comp_is_mirror = 0;
4426	}
4427
4428	/*
4429	 * For a multi-node environment we have to ensure that the master
4430	 * node owns an underlying mirror before we issue the MD_IOCSET ioctl.
4431	 * If the master does not own the device we will deadlock as the
4432	 * implicit write of the watermarks (in sp_ioctl.c) will cause an
4433	 * ownership change that will block as the MD_IOCSET is still in
4434	 * progress. To close this window we force an owner change to occur
4435	 * before issuing the MD_IOCSET. We cannot simply open the device and
4436	 * write to it as this will only work for the first soft-partition
4437	 * creation.
4438	 */
4439
4440	if (comp_is_mirror && !metaislocalset(sp)) {
4441
4442		if ((sd = metaget_setdesc(sp, ep)) == NULL) {
4443			rval = -1;
4444			goto out;
4445		}
4446		if (MD_MNSET_DESC(sd) && sd->sd_mn_am_i_master) {
4447			mn_set_master = 1;
4448		}
4449	}
4450
4451	set_params.mnum = MD_SID(mp);
4452	set_params.size = mp->c.un_size;
4453	set_params.mdp = (uintptr_t)mp;
4454	MD_SETDRIVERNAME(&set_params, MD_SP, MD_MIN2SET(set_params.mnum));
4455
4456	/* first phase of commit. */
4457	if (metaioctl(MD_IOCSET, &set_params, &set_params.mde,
4458	    np->cname) != 0) {
4459		(void) mdstealerror(ep, &set_params.mde);
4460		rval = -1;
4461		goto out;
4462	}
4463
4464	/* we've successfully committed the record */
4465	committed = 1;
4466
4467	/* write watermarks */
4468	/*
4469	 * Special-case for Multi-node sets. As we now have a distributed DRL
4470	 * update mechanism, we _will_ hit the ioctl-within-ioctl deadlock case
4471	 * unless we use a 'special' MN-capable ioctl to stage the watermark
4472	 * update. This only affects the master-node in an MN set.
4473	 */
4474	if (mn_set_master) {
4475		if (meta_mn_sp_update_wm(sp, msp, extlist, ep) < 0) {
4476			rval = -1;
4477			goto out;
4478		}
4479	} else {
4480		if (meta_sp_update_wm(sp, msp, extlist, ep) < 0) {
4481			rval = -1;
4482			goto out;
4483		}
4484	}
4485
4486	/* second phase of commit, set status to MD_SP_OK */
4487	if (meta_sp_setstatus(sp, &(MD_SID(mp)), 1, MD_SP_OK, ep) < 0) {
4488		rval = -1;
4489		goto out;
4490	}
4491	rval = 0;
4492out:
4493	Free(mp);
4494	if (ownpar)
4495		Free(ownpar);
4496
4497	if (extlist != NULL)
4498		meta_sp_list_free(&extlist);
4499
4500	if (rval != 0 && keynlp != NULL && committed != 1)
4501		(void) del_key_names(sp, keynlp, NULL);
4502
4503	metafreenamelist(keynlp);
4504
4505	return (rval);
4506}
4507
4508/*
4509 * **************************************************************************
4510 *                      Reset (metaclear) Functions                         *
4511 * **************************************************************************
4512 */
4513
4514/*
4515 * FUNCTION:	meta_sp_reset_common()
4516 * INPUT:	sp	- the set name of the device to reset
4517 *		np	- the name of the device to reset
4518 *		msp	- the unit structure to reset
4519 *		options	- metaclear options
4520 * OUTPUT:	ep	- return error pointer
4521 * RETURNS:	int	-  0 success, -1 error
4522 * PURPOSE:	"resets", or more accurately deletes, the soft partition
4523 *		specified.  First the state is set to "deleting" and then the
4524 *		watermarks are all cleared out.  Once the watermarks have been
4525 *		updated, the unit structure is deleted from the metadb.
4526 */
4527static int
4528meta_sp_reset_common(
4529	mdsetname_t	*sp,
4530	mdname_t	*np,
4531	md_sp_t		*msp,
4532	md_sp_reset_t	reset_params,
4533	mdcmdopts_t	options,
4534	md_error_t	*ep
4535)
4536{
4537	char	*miscname;
4538	int	rval = -1;
4539	int	is_open = 0;
4540
4541	/* make sure that nobody owns us */
4542	if (MD_HAS_PARENT(msp->common.parent))
4543		return (mdmderror(ep, MDE_IN_USE, meta_getminor(np->dev),
4544		    np->cname));
4545
4546	/* make sure that the soft partition isn't open */
4547	if ((is_open = meta_isopen(sp, np, ep, options)) < 0)
4548		return (-1);
4549	else if (is_open)
4550		return (mdmderror(ep, MDE_IS_OPEN, meta_getminor(np->dev),
4551		    np->cname));
4552
4553	/* get miscname */
4554	if ((miscname = metagetmiscname(np, ep)) == NULL)
4555		return (-1);
4556
4557	/* fill in reset params */
4558	MD_SETDRIVERNAME(&reset_params, miscname, sp->setno);
4559	reset_params.mnum = meta_getminor(np->dev);
4560	reset_params.force = (options & MDCMD_FORCE) ? 1 : 0;
4561
4562	/*
4563	 * clear soft partition - phase one.
4564	 * place the soft partition into the "delete pending" state.
4565	 */
4566	if (meta_sp_setstatus(sp, &reset_params.mnum, 1, MD_SP_DELPEND, ep) < 0)
4567		return (-1);
4568
4569	/*
4570	 * Now clear the watermarks.  If the force flag is specified,
4571	 * ignore any errors writing the watermarks and delete the unit
4572	 * structure anyway.  An error may leave the on-disk format in a
4573	 * corrupt state.  If force is not specified and we fail here,
4574	 * the soft partition will remain in the "delete pending" state.
4575	 */
4576	if ((meta_sp_clear_wm(sp, msp, ep) < 0) &&
4577	    ((options & MDCMD_FORCE) == 0))
4578		goto out;
4579
4580	/*
4581	 * clear soft partition - phase two.
4582	 * the driver removes the soft partition from the metadb and
4583	 * zeros out incore version.
4584	 */
4585	if (metaioctl(MD_IOCRESET, &reset_params,
4586	    &reset_params.mde, np->cname) != 0) {
4587		(void) mdstealerror(ep, &reset_params.mde);
4588		goto out;
4589	}
4590
4591	/*
4592	 * Wait for the /dev to be cleaned up. Ignore the return
4593	 * value since there's not much we can do.
4594	 */
4595	(void) meta_update_devtree(meta_getminor(np->dev));
4596
4597	rval = 0;	/* success */
4598
4599	if (options & MDCMD_PRINT) {
4600		(void) printf(dgettext(TEXT_DOMAIN,
4601		    "%s: Soft Partition is cleared\n"),
4602		    np->cname);
4603		(void) fflush(stdout);
4604	}
4605
4606	/*
4607	 * if told to recurse and on a metadevice, then attempt to
4608	 * clear the subdevices.  Indicate failure if the clear fails.
4609	 */
4610	if ((options & MDCMD_RECURSE) &&
4611	    (metaismeta(msp->compnamep)) &&
4612	    (meta_reset_by_name(sp, msp->compnamep, options, ep) != 0))
4613		rval = -1;
4614
4615out:
4616	meta_invalidate_name(np);
4617	return (rval);
4618}
4619
4620/*
4621 * FUNCTION:	meta_sp_reset()
4622 * INPUT:	sp	- the set name of the device to reset
4623 *		np	- the name of the device to reset
4624 *		options	- metaclear options
4625 * OUTPUT:	ep	- return error pointer
4626 * RETURNS:	int	-  0 success, -1 error
4627 * PURPOSE:	provides the entry point to the rest of libmeta for deleting a
4628 *		soft partition.  If np is NULL, then soft partitions are
4629 *		all deleted at the current level and then recursively deleted.
4630 *		Otherwise, if a name is specified either directly or as a
4631 *		result of a recursive operation, it deletes only that name.
4632 *		Since something sitting under a soft partition may be parented
4633 *		to it, we have to reparent that other device to another soft
4634 *		partition on the same component if we're deleting the one it's
4635 *		parented to.
4636 */
4637int
4638meta_sp_reset(
4639	mdsetname_t	*sp,
4640	mdname_t	*np,
4641	mdcmdopts_t	options,
4642	md_error_t	*ep
4643)
4644{
4645	md_sp_t		*msp;
4646	int		rval = -1;
4647	mdnamelist_t	*spnlp = NULL, *nlp = NULL;
4648	md_sp_reset_t	reset_params;
4649	int		num_sp;
4650
4651	assert(sp != NULL);
4652
4653	/* reset/delete all soft paritions */
4654	if (np == NULL) {
4655		/*
4656		 * meta_reset_all sets MDCMD_RECURSE, but this behavior
4657		 * is incorrect for soft partitions.  We want to clear
4658		 * all soft partitions at a particular level in the
4659		 * metadevice stack before moving to the next level.
4660		 * Thus, we clear MDCMD_RECURSE from the options.
4661		 */
4662		options &= ~MDCMD_RECURSE;
4663
4664		/* for each soft partition */
4665		rval = 0;
4666		if (meta_get_sp_names(sp, &spnlp, 0, ep) < 0)
4667			rval = -1;
4668
4669		for (nlp = spnlp; (nlp != NULL); nlp = nlp->next) {
4670			np = nlp->namep;
4671			if ((msp = meta_get_sp(sp, np, ep)) == NULL) {
4672				rval = -1;
4673				break;
4674			}
4675			/*
4676			 * meta_reset_all calls us twice to get soft
4677			 * partitions at the top and bottom of the stack.
4678			 * thus, if we have a parent, we'll get deleted
4679			 * on the next call.
4680			 */
4681			if (MD_HAS_PARENT(msp->common.parent))
4682				continue;
4683			/*
4684			 * If this is a multi-node set, we send a series
4685			 * of individual metaclear commands.
4686			 */
4687			if (meta_is_mn_set(sp, ep)) {
4688				if (meta_mn_send_metaclear_command(sp,
4689				    np->cname, options, 0, ep) != 0) {
4690					rval = -1;
4691					break;
4692				}
4693			} else {
4694				if (meta_sp_reset(sp, np, options, ep) != 0) {
4695					rval = -1;
4696					break;
4697				}
4698			}
4699		}
4700		/* cleanup return status */
4701		metafreenamelist(spnlp);
4702		return (rval);
4703	}
4704
4705	/* check the name */
4706	if (metachkmeta(np, ep) != 0)
4707		return (-1);
4708
4709	/* get the unit structure */
4710	if ((msp = meta_get_sp(sp, np, ep)) == NULL)
4711		return (-1);
4712
4713	/* clear out reset parameters */
4714	(void) memset(&reset_params, 0, sizeof (reset_params));
4715
4716	/* if our child is a metadevice, we need to deparent/reparent it */
4717	if (metaismeta(msp->compnamep)) {
4718		/* get sp's on this component */
4719		if ((num_sp = meta_sp_get_by_component(sp, msp->compnamep,
4720		    &spnlp, 1, ep)) <= 0)
4721			/* no sp's on this device.  error! */
4722			return (-1);
4723		else if (num_sp == 1)
4724			/* last sp on this device, so we deparent */
4725			reset_params.new_parent = MD_NO_PARENT;
4726		else {
4727			/* have to reparent this metadevice */
4728			for (nlp = spnlp; nlp != NULL; nlp = nlp->next) {
4729				if (meta_getminor(nlp->namep->dev) ==
4730				    meta_getminor(np->dev))
4731					continue;
4732				/*
4733				 * this isn't the softpart we are deleting,
4734				 * so use this device as the new parent.
4735				 */
4736				reset_params.new_parent =
4737				    meta_getminor(nlp->namep->dev);
4738				break;
4739			}
4740		}
4741		metafreenamelist(spnlp);
4742	}
4743
4744	if (meta_sp_reset_common(sp, np, msp, reset_params, options, ep) != 0)
4745		return (-1);
4746
4747	return (0);
4748}
4749
4750/*
4751 * FUNCTION:	meta_sp_reset_component()
4752 * INPUT:	sp	- the set name of the device to reset
4753 *		name	- the string name of the device to reset
4754 *		options	- metaclear options
4755 * OUTPUT:	ep	- return error pointer
4756 * RETURNS:	int	-  0 success, -1 error
4757 * PURPOSE:	provides the ability to delete all soft partitions on a
4758 *		specified device (metaclear -p).  It first gets all of the
4759 *		soft partitions on the component and then deletes each one
4760 *		individually.
4761 */
4762int
4763meta_sp_reset_component(
4764	mdsetname_t	*sp,
4765	char		*name,
4766	mdcmdopts_t	options,
4767	md_error_t	*ep
4768)
4769{
4770	mdname_t	*compnp, *np;
4771	mdnamelist_t	*spnlp = NULL;
4772	mdnamelist_t	*nlp = NULL;
4773	md_sp_t		*msp;
4774	int		count;
4775	md_sp_reset_t	reset_params;
4776
4777	if ((compnp = metaname(&sp, name, UNKNOWN, ep)) == NULL)
4778		return (-1);
4779
4780	/* If we're starting out with no soft partitions, it's an error */
4781	count = meta_sp_get_by_component(sp, compnp, &spnlp, 1, ep);
4782	if (count == 0)
4783		return (mdmderror(ep, MDE_SP_NOSP, 0, compnp->cname));
4784	else if (count < 0)
4785		return (-1);
4786
4787	/*
4788	 * clear all soft partitions on this component.
4789	 * NOTE: we reparent underlying metadevices as we go so that
4790	 * things stay sane.  Also, if we encounter an error, we stop
4791	 * and go no further in case recovery might be needed.
4792	 */
4793	for (nlp = spnlp; nlp != NULL; nlp = nlp->next) {
4794		/* clear out reset parameters */
4795		(void) memset(&reset_params, 0, sizeof (reset_params));
4796
4797		/* check the name */
4798		np = nlp->namep;
4799
4800		if (metachkmeta(np, ep) != 0) {
4801			metafreenamelist(spnlp);
4802			return (-1);
4803		}
4804
4805		/* get the unit structure */
4806		if ((msp = meta_get_sp(sp, np, ep)) == NULL) {
4807			metafreenamelist(spnlp);
4808			return (-1);
4809		}
4810
4811		/* have to deparent/reparent metadevices */
4812		if (metaismeta(compnp)) {
4813			if (nlp->next == NULL)
4814				reset_params.new_parent = MD_NO_PARENT;
4815			else
4816				reset_params.new_parent =
4817				    meta_getminor(spnlp->next->namep->dev);
4818		}
4819
4820		/* clear soft partition */
4821		if (meta_sp_reset_common(sp, np, msp, reset_params,
4822		    options, ep) < 0) {
4823			metafreenamelist(spnlp);
4824			return (-1);
4825		}
4826	}
4827	metafreenamelist(spnlp);
4828	return (0);
4829}
4830
4831/*
4832 * **************************************************************************
4833 *                      Grow (metattach) Functions                          *
4834 * **************************************************************************
4835 */
4836
4837/*
4838 * FUNCTION:	meta_sp_attach()
4839 * INPUT:	sp	- the set name of the device to attach to
4840 *		np	- the name of the device to attach to
4841 *		addsize	- the unparsed string holding the amount of space to add
4842 *		options	- metattach options
4843 *		alignment - data alignment
4844 * OUTPUT:	ep	- return error pointer
4845 * RETURNS:	int	-  0 success, -1 error
4846 * PURPOSE:	grows a soft partition by reading in the existing unit
4847 *		structure and setting its state to Growing, allocating more
4848 *		space (similar to meta_create_sp()), updating the watermarks,
4849 *		and then writing out the new unit structure in the Okay state.
4850 */
4851int
4852meta_sp_attach(
4853	mdsetname_t	*sp,
4854	mdname_t	*np,
4855	char		*addsize,
4856	mdcmdopts_t	options,
4857	sp_ext_length_t	alignment,
4858	md_error_t	*ep
4859)
4860{
4861	md_grow_params_t	grow_params;
4862	sp_ext_length_t		grow_len;	/* amount to grow */
4863	mp_unit_t		*mp, *new_un;
4864	mdname_t		*compnp = NULL;
4865
4866	sp_ext_node_t		*extlist = NULL;
4867	int			numexts;
4868	mdnamelist_t		*spnlp = NULL;
4869	int			count;
4870	md_sp_t			*msp;
4871	daddr_t			start_block;
4872
4873	/* should have the same set */
4874	assert(sp != NULL);
4875	assert(sp->setno == MD_MIN2SET(meta_getminor(np->dev)));
4876
4877	/* check name */
4878	if (metachkmeta(np, ep) != 0)
4879		return (-1);
4880
4881	if (meta_sp_parsesize(addsize, &grow_len) == -1) {
4882		return (mdmderror(ep, MDE_SP_BAD_LENGTH, 0, np->cname));
4883	}
4884
4885	if ((mp = (mp_unit_t *)meta_get_mdunit(sp, np, ep)) == NULL)
4886		return (-1);
4887
4888	/* make sure we don't have a parent */
4889	if (MD_HAS_PARENT(mp->c.un_parent)) {
4890		Free(mp);
4891		return (mdmderror(ep, MDE_INVAL_UNIT, 0, np->cname));
4892	}
4893
4894	if (getenv(META_SP_DEBUG)) {
4895		meta_sp_debug("meta_sp_attach: Unit structure before new "
4896		    "space:\n");
4897		meta_sp_printunit(mp);
4898	}
4899
4900	/*
4901	 * NOTE: the fast option to metakeyname is 0 as opposed to 1
4902	 * If this was not the case we would suffer the following
4903	 * assertion failure:
4904	 * Assertion failed: type1 != MDT_FAST_META && type1 != MDT_FAST_COMP
4905	 * file meta_check.x, line 315
4906	 * I guess this is because we have not "seen" this drive before
4907	 * and hence hit the failure - this is of course the attach routine
4908	 */
4909	if ((compnp = metakeyname(&sp, mp->un_key, 0, ep)) == NULL) {
4910		Free(mp);
4911		return (-1);
4912	}
4913
4914	/* metakeyname does not fill in the key. */
4915	compnp->key = mp->un_key;
4916
4917	/* work out the space on the component that we are dealing with */
4918	count = meta_sp_get_by_component(sp, compnp, &spnlp, 0, ep);
4919
4920	/*
4921	 * see if the component has been soft partitioned yet, or if an
4922	 * error occurred.
4923	 */
4924	if (count == 0) {
4925		Free(mp);
4926		return (mdmderror(ep, MDE_NOT_SP, 0, np->cname));
4927	} else if (count < 0) {
4928		Free(mp);
4929		return (-1);
4930	}
4931
4932	/*
4933	 * seed extlist with reserved space at the beginning of the volume and
4934	 * enough space for the end watermark.  The end watermark always gets
4935	 * updated, but if the underlying device changes size it may not be
4936	 * pointed to until the extent before it is updated.  Since the
4937	 * end of the reserved space is where the first watermark starts,
4938	 * the reserved extent should never be marked for updating.
4939	 */
4940	if ((start_block = meta_sp_get_start(sp, compnp, ep)) ==
4941	    MD_DISKADDR_ERROR) {
4942		Free(mp);
4943		return (-1);
4944	}
4945
4946	meta_sp_list_insert(NULL, NULL, &extlist, 0ULL, start_block,
4947	    EXTTYP_RESERVED, 0, 0, meta_sp_cmp_by_offset);
4948	meta_sp_list_insert(NULL, NULL, &extlist,
4949	    metagetsize(compnp, ep) - MD_SP_WMSIZE, MD_SP_WMSIZE,
4950	    EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset);
4951
4952	if (meta_sp_extlist_from_namelist(sp, spnlp, &extlist, ep) == -1) {
4953		Free(mp);
4954		return (-1);
4955	}
4956
4957	metafreenamelist(spnlp);
4958
4959	if (getenv(META_SP_DEBUG)) {
4960		meta_sp_debug("meta_sp_attach: list of used extents:\n");
4961		meta_sp_list_dump(extlist);
4962	}
4963
4964	meta_sp_list_freefill(&extlist, metagetsize(compnp, ep));
4965
4966	assert(mp->un_numexts >= 1);
4967	numexts = meta_sp_alloc_by_len(sp, np, &extlist, &grow_len,
4968	    mp->un_ext[mp->un_numexts - 1].un_poff,
4969	    (alignment > 0) ? alignment :
4970	    meta_sp_get_default_alignment(sp, compnp, ep));
4971
4972	if (numexts == -1) {
4973		Free(mp);
4974		return (mdmderror(ep, MDE_SP_NOSPACE, 0, np->cname));
4975	}
4976
4977	/* allocate new unit structure and copy in old unit */
4978	if ((new_un = meta_sp_updateunit(np, mp, extlist,
4979	    grow_len, numexts, ep)) == NULL) {
4980		Free(mp);
4981		return (-1);
4982	}
4983	Free(mp);
4984
4985	/* If running in dryrun mode (-n option), we're done here */
4986	if ((options & MDCMD_DOIT) == 0) {
4987		if (options & MDCMD_PRINT) {
4988			(void) printf(dgettext(TEXT_DOMAIN,
4989			    "%s: Soft Partition would grow\n"),
4990			    np->cname);
4991			(void) fflush(stdout);
4992		}
4993		return (0);
4994	}
4995
4996	if (getenv(META_SP_DEBUG)) {
4997		meta_sp_debug("meta_sp_attach: updated unit structure:\n");
4998		meta_sp_printunit(new_un);
4999	}
5000
5001	assert(new_un != NULL);
5002
5003	(void) memset(&grow_params, 0, sizeof (grow_params));
5004	if (new_un->c.un_total_blocks > MD_MAX_BLKS_FOR_SMALL_DEVS) {
5005		grow_params.options = MD_CRO_64BIT;
5006		new_un->c.un_revision |= MD_64BIT_META_DEV;
5007	} else {
5008		grow_params.options = MD_CRO_32BIT;
5009		new_un->c.un_revision &= ~MD_64BIT_META_DEV;
5010	}
5011	grow_params.mnum = MD_SID(new_un);
5012	grow_params.size = new_un->c.un_size;
5013	grow_params.mdp = (uintptr_t)new_un;
5014	MD_SETDRIVERNAME(&grow_params, MD_SP, MD_MIN2SET(grow_params.mnum));
5015
5016	if (metaioctl(MD_IOCGROW, &grow_params, &grow_params.mde,
5017	    np->cname) != 0) {
5018		(void) mdstealerror(ep, &grow_params.mde);
5019		return (-1);
5020	}
5021
5022	/* update all watermarks */
5023
5024	if ((msp = meta_get_sp(sp, np, ep)) == NULL)
5025		return (-1);
5026	if (meta_sp_update_wm(sp, msp, extlist, ep) < 0)
5027		return (-1);
5028
5029
5030	/* second phase of commit, set status to MD_SP_OK */
5031	if (meta_sp_setstatus(sp, &(MD_SID(new_un)), 1, MD_SP_OK, ep) < 0)
5032		return (-1);
5033
5034	meta_invalidate_name(np);
5035
5036	if (options & MDCMD_PRINT) {
5037		(void) printf(dgettext(TEXT_DOMAIN,
5038		    "%s: Soft Partition has been grown\n"),
5039		    np->cname);
5040		(void) fflush(stdout);
5041	}
5042
5043	return (0);
5044}
5045
5046/*
5047 * **************************************************************************
5048 *                    Recovery (metarecover) Functions                      *
5049 * **************************************************************************
5050 */
5051
5052/*
5053 * FUNCTION:	meta_recover_sp()
5054 * INPUT:	sp	- the name of the set we are recovering on
5055 *		compnp	- name pointer for device we are recovering on
5056 *		argc	- argument count
5057 *		argv	- left over arguments not parsed by metarecover command
5058 *		options	- metarecover options
5059 * OUTPUT:	ep	- return error pointer
5060 * RETURNS:	int	- 0 - success, -1 - error
5061 * PURPOSE:	parse soft partitioning-specific metarecover options and
5062 *		dispatch to the appropriate function to handle recovery.
5063 */
5064int
5065meta_recover_sp(
5066	mdsetname_t	*sp,
5067	mdname_t	*compnp,
5068	int		argc,
5069	char		*argv[],
5070	mdcmdopts_t	options,
5071	md_error_t	*ep
5072)
5073{
5074	md_set_desc	*sd;
5075
5076	if (argc > 1) {
5077		(void) meta_cook_syntax(ep, MDE_SYNTAX, compnp->cname,
5078		    argc, argv);
5079		return (-1);
5080	}
5081
5082	/*
5083	 * For a MN set, this operation must be performed on the master
5084	 * as it is responsible for maintaining the watermarks
5085	 */
5086	if (!metaislocalset(sp)) {
5087		if ((sd = metaget_setdesc(sp, ep)) == NULL)
5088			return (-1);
5089		if (MD_MNSET_DESC(sd) && !sd->sd_mn_am_i_master) {
5090			(void) mddserror(ep, MDE_DS_MASTER_ONLY, sp->setno,
5091			    sd->sd_mn_master_nodenm, NULL, NULL);
5092			return (-1);
5093		}
5094	}
5095	if (argc == 0) {
5096		/*
5097		 * if no additional arguments are passed, metarecover should
5098		 * validate both on-disk and metadb structures as well as
5099		 * checking that both are consistent with each other
5100		 */
5101		if (meta_sp_validate_wm(sp, compnp, options, ep) < 0)
5102			return (-1);
5103		if (meta_sp_validate_unit(sp, compnp, options, ep) < 0)
5104			return (-1);
5105		if (meta_sp_validate_wm_and_unit(sp, compnp, options, ep) < 0)
5106			return (-1);
5107	} else if (strcmp(argv[0], "-d") == 0) {
5108		/*
5109		 * Ensure that there is no existing valid record for this
5110		 * soft-partition. If there is we have nothing to do.
5111		 */
5112		if (meta_sp_validate_unit(sp, compnp, options, ep) == 0)
5113			return (-1);
5114		/* validate and recover from on-disk structures */
5115		if (meta_sp_validate_wm(sp, compnp, options, ep) < 0)
5116			return (-1);
5117		if (meta_sp_recover_from_wm(sp, compnp, options, ep) < 0)
5118			return (-1);
5119	} else if (strcmp(argv[0], "-m") == 0) {
5120		/* validate and recover from metadb structures */
5121		if (meta_sp_validate_unit(sp, compnp, options, ep) < 0)
5122			return (-1);
5123		if (meta_sp_recover_from_unit(sp, compnp, options, ep) < 0)
5124			return (-1);
5125	} else {
5126		/* syntax error */
5127		(void) meta_cook_syntax(ep, MDE_SYNTAX, compnp->cname,
5128		    argc, argv);
5129		return (-1);
5130	}
5131
5132	return (0);
5133}
5134
5135/*
5136 * FUNCTION:	meta_sp_display_exthdr()
5137 * INPUT:	none
5138 * OUTPUT:	none
5139 * RETURNS:	void
5140 * PURPOSE:	print header line for sp_ext_node_t information.  to be used
5141 *		in conjunction with meta_sp_display_ext().
5142 */
5143static void
5144meta_sp_display_exthdr(void)
5145{
5146	(void) printf("%20s %5s %7s %20s %20s\n",
5147	    dgettext(TEXT_DOMAIN, "Name"),
5148	    dgettext(TEXT_DOMAIN, "Seq#"),
5149	    dgettext(TEXT_DOMAIN, "Type"),
5150	    dgettext(TEXT_DOMAIN, "Offset"),
5151	    dgettext(TEXT_DOMAIN, "Length"));
5152}
5153
5154
5155/*
5156 * FUNCTION:	meta_sp_display_ext()
5157 * INPUT:	ext	- extent to display
5158 * OUTPUT:	none
5159 * RETURNS:	void
5160 * PURPOSE:	print selected fields from sp_ext_node_t.
5161 */
5162static void
5163meta_sp_display_ext(sp_ext_node_t *ext)
5164{
5165	/* print extent information */
5166	if (ext->ext_namep != NULL)
5167		(void) printf("%20s ", ext->ext_namep->cname);
5168	else
5169		(void) printf("%20s ", "NONE");
5170
5171	(void) printf("%5u ", ext->ext_seq);
5172
5173	switch (ext->ext_type) {
5174	case EXTTYP_ALLOC:
5175		(void) printf("%7s ", "ALLOC");
5176		break;
5177	case EXTTYP_FREE:
5178		(void) printf("%7s ", "FREE");
5179		break;
5180	case EXTTYP_RESERVED:
5181		(void) printf("%7s ", "RESV");
5182		break;
5183	case EXTTYP_END:
5184		(void) printf("%7s ", "END");
5185		break;
5186	default:
5187		(void) printf("%7s ", "INVLD");
5188		break;
5189	}
5190
5191	(void) printf("%20llu %20llu\n", ext->ext_offset, ext->ext_length);
5192}
5193
5194
5195/*
5196 * FUNCTION:	meta_sp_checkseq()
5197 * INPUT:	extlist	- list of extents to be checked
5198 * OUTPUT:	none
5199 * RETURNS:	int	- 0 - success, -1 - error
5200 * PURPOSE:	check soft partition sequence numbers.  this function assumes
5201 *		that a list of extents representing 1 or more soft partitions
5202 *		is passed in sorted in sequence number order.  within a
5203 *		single soft partition, there may not be any missing or
5204 *		duplicate sequence numbers.
5205 */
5206static int
5207meta_sp_checkseq(sp_ext_node_t *extlist)
5208{
5209	sp_ext_node_t *ext;
5210
5211	assert(extlist != NULL);
5212
5213	for (ext = extlist;
5214	    ext->ext_next != NULL && ext->ext_next->ext_type == EXTTYP_ALLOC;
5215	    ext = ext->ext_next) {
5216		if (ext->ext_next->ext_namep != NULL &&
5217		    strcmp(ext->ext_next->ext_namep->cname,
5218		    ext->ext_namep->cname) != 0)
5219				continue;
5220
5221		if (ext->ext_next->ext_seq != ext->ext_seq + 1) {
5222			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
5223			    "%s: sequence numbers are "
5224			    "incorrect: %d should be %d\n"),
5225			    ext->ext_next->ext_namep->cname,
5226			    ext->ext_next->ext_seq, ext->ext_seq + 1);
5227			return (-1);
5228		}
5229	}
5230	return (0);
5231}
5232
5233
5234/*
5235 * FUNCTION:	meta_sp_resolve_name_conflict()
5236 * INPUT:	sp	- name of set we're are recovering in.
5237 *		old_np	- name pointer of soft partition we found on disk.
5238 * OUTPUT:	new_np	- name pointer for new soft partition name.
5239 *		ep	- error pointer returned.
5240 * RETURNS:	int	- 0 - name not replace, 1 - name replaced, -1 - error
5241 * PURPOSE:	Check to see if the name of one of the soft partitions we found
5242 *		on disk already exists in the metadb.  If so, prompt for a new
5243 *		name.  In addition, we keep a static array of names that
5244 *		will be recovered from this device since these names don't
5245 *		exist in the configuration at this point but cannot be
5246 *		recovered more than once.
5247 */
5248static int
5249meta_sp_resolve_name_conflict(
5250	mdsetname_t	*sp,
5251	mdname_t	*old_np,
5252	mdname_t	**new_np,
5253	md_error_t	*ep
5254)
5255{
5256	char		yesno[255];
5257	char		*yes;
5258	char		newname[MD_SP_MAX_DEVNAME_PLUS_1];
5259	int		nunits;
5260	static int	*used_names = NULL;
5261
5262	assert(old_np != NULL);
5263
5264	if (used_names == NULL) {
5265		if ((nunits = meta_get_nunits(ep)) < 0)
5266			return (-1);
5267		used_names = Zalloc(nunits * sizeof (int));
5268	}
5269
5270	/* see if it exists already */
5271	if (used_names[MD_MIN2UNIT(meta_getminor(old_np->dev))] == 0 &&
5272	    metagetmiscname(old_np, ep) == NULL) {
5273		if (! mdismderror(ep, MDE_UNIT_NOT_SETUP))
5274			return (-1);
5275		else {
5276			used_names[MD_MIN2UNIT(meta_getminor(old_np->dev))] = 1;
5277			mdclrerror(ep);
5278			return (0);
5279		}
5280	}
5281
5282	/* name exists, ask the user for a new one */
5283	(void) printf(dgettext(TEXT_DOMAIN,
5284	    "WARNING: A soft partition named %s was found in the extent\n"
5285	    "headers, but this name already exists in the metadb "
5286	    "configuration.\n"
5287	    "In order to continue recovery you must supply\n"
5288	    "a new name for this soft partition.\n"), old_np->cname);
5289	(void) printf(dgettext(TEXT_DOMAIN,
5290	    "Would you like to continue and supply a new name? (yes/no) "));
5291
5292	(void) fflush(stdout);
5293	if ((fgets(yesno, sizeof (yesno), stdin) == NULL) ||
5294	    (strlen(yesno) == 1))
5295		(void) snprintf(yesno, sizeof (yesno), "%s\n",
5296		    dgettext(TEXT_DOMAIN, "no"));
5297	yes = dgettext(TEXT_DOMAIN, "yes");
5298	if (strncasecmp(yesno, yes, strlen(yesno) - 1) != 0) {
5299		return (-1);
5300	}
5301
5302	(void) fflush(stdin);
5303
5304	/* get the new name */
5305	for (;;) {
5306		(void) printf(dgettext(TEXT_DOMAIN, "Please enter a new name "
5307		    "for this soft partition (dXXXX) "));
5308		(void) fflush(stdout);
5309		if (fgets(newname, MD_SP_MAX_DEVNAME_PLUS_1, stdin) == NULL)
5310			(void) strcpy(newname, "");
5311
5312		/* remove newline character */
5313		if (newname[strlen(newname) - 1] == '\n')
5314			newname[strlen(newname) - 1] = '\0';
5315
5316		if (!(is_metaname(newname)) ||
5317		    (meta_init_make_device(&sp, newname, ep) <= 0)) {
5318			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
5319			    "Invalid metadevice name\n"));
5320			(void) fflush(stderr);
5321			continue;
5322		}
5323
5324		if ((*new_np = metaname(&sp, newname,
5325		    META_DEVICE, ep)) == NULL) {
5326			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
5327			    "Invalid metadevice name\n"));
5328			(void) fflush(stderr);
5329			continue;
5330		}
5331
5332		assert(MD_MIN2UNIT(meta_getminor((*new_np)->dev)) < nunits);
5333		/* make sure the name isn't already being used */
5334		if (used_names[MD_MIN2UNIT(meta_getminor((*new_np)->dev))] ||
5335		    metagetmiscname(*new_np, ep) != NULL) {
5336			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
5337			    "That name already exists\n"));
5338			continue;
5339		} else if (! mdismderror(ep, MDE_UNIT_NOT_SETUP))
5340			return (-1);
5341
5342		break;
5343	}
5344
5345	/* got a new name, place in used array and return */
5346	used_names[MD_MIN2UNIT(meta_getminor((*new_np)->dev))] = 1;
5347	mdclrerror(ep);
5348	return (1);
5349}
5350
5351/*
5352 * FUNCTION:	meta_sp_validate_wm()
5353 * INPUT:	sp	- set name we are recovering in
5354 *		compnp	- name pointer for device we are recovering from
5355 *		options	- metarecover options
5356 * OUTPUT:	ep	- error pointer returned
5357 * RETURNS:	int	- 0 - success, -1 - error
5358 * PURPOSE:	validate and display watermark configuration.  walk the
5359 *		on-disk watermark structures and validate the information
5360 *		found within.  since a watermark configuration is
5361 *		"self-defining", the act of traversing the watermarks
5362 *		is part of the validation process.
5363 */
5364static int
5365meta_sp_validate_wm(
5366	mdsetname_t	*sp,
5367	mdname_t	*compnp,
5368	mdcmdopts_t	options,
5369	md_error_t	*ep
5370)
5371{
5372	sp_ext_node_t	*extlist = NULL;
5373	sp_ext_node_t	*ext;
5374	int		num_sps = 0;
5375	int		rval;
5376
5377	if ((options & MDCMD_VERBOSE) != 0)
5378		(void) printf(dgettext(TEXT_DOMAIN,
5379		    "Verifying on-disk structures on %s.\n"),
5380		    compnp->cname);
5381
5382	/*
5383	 * for each watermark, build an ext_node, place on list.
5384	 */
5385	rval = meta_sp_extlist_from_wm(sp, compnp, &extlist,
5386	    meta_sp_cmp_by_nameseq, ep);
5387
5388	if ((options & MDCMD_VERBOSE) != 0) {
5389		/* print out what we found */
5390		if (extlist == NULL)
5391			(void) printf(dgettext(TEXT_DOMAIN,
5392			    "No extent headers found on %s.\n"),
5393			    compnp->cname);
5394		else {
5395			(void) printf(dgettext(TEXT_DOMAIN,
5396			    "The following extent headers were found on %s.\n"),
5397			    compnp->cname);
5398			meta_sp_display_exthdr();
5399		}
5400		for (ext = extlist; ext != NULL; ext = ext->ext_next)
5401			meta_sp_display_ext(ext);
5402	}
5403
5404	if (rval < 0) {
5405		(void) printf(dgettext(TEXT_DOMAIN,
5406		    "%s: On-disk structures invalid or "
5407		    "no soft partitions found.\n"),
5408		    compnp->cname);
5409		return (-1);
5410	}
5411
5412	assert(extlist != NULL);
5413
5414	/* count number of soft partitions */
5415	for (ext = extlist;
5416	    ext != NULL && ext->ext_type == EXTTYP_ALLOC;
5417	    ext = ext->ext_next) {
5418		if (ext->ext_next != NULL &&
5419		    ext->ext_next->ext_namep != NULL &&
5420		    strcmp(ext->ext_next->ext_namep->cname,
5421		    ext->ext_namep->cname) == 0)
5422				continue;
5423		num_sps++;
5424	}
5425
5426	if ((options & MDCMD_VERBOSE) != 0)
5427		(void) printf(dgettext(TEXT_DOMAIN,
5428		    "Found %d soft partition(s) on %s.\n"), num_sps,
5429		    compnp->cname);
5430
5431	if (num_sps == 0) {
5432		(void) printf(dgettext(TEXT_DOMAIN,
5433		    "%s: No soft partitions.\n"), compnp->cname);
5434		return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
5435	}
5436
5437	/* check sequence numbers */
5438	if ((options & MDCMD_VERBOSE) != 0)
5439		(void) printf(dgettext(TEXT_DOMAIN,
5440		    "Checking sequence numbers.\n"));
5441
5442	if (meta_sp_checkseq(extlist) != 0)
5443		return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
5444
5445	return (0);
5446}
5447
5448/*
5449 * FUNCTION:	meta_sp_validate_unit()
5450 * INPUT:	sp	- name of set we are recovering in
5451 *		compnp	- name of component we are recovering from
5452 *		options	- metarecover options
5453 * OUTPUT:	ep	- error pointer returned
5454 * RETURNS:	int	- 0 - success, -1 - error
5455 * PURPOSE:	validate and display metadb configuration.  begin by getting
5456 *		all soft partitions built on the specified component.  get
5457 *		the unit structure for each one and validate the fields within.
5458 */
5459static int
5460meta_sp_validate_unit(
5461	mdsetname_t	*sp,
5462	mdname_t	*compnp,
5463	mdcmdopts_t	options,
5464	md_error_t	*ep
5465)
5466{
5467	md_sp_t		*msp;
5468	mdnamelist_t	*spnlp = NULL;
5469	mdnamelist_t	*namep = NULL;
5470	int		count;
5471	uint_t		extn;
5472	sp_ext_length_t	size;
5473
5474	if ((options & MDCMD_VERBOSE) != 0)
5475		(void) printf(dgettext(TEXT_DOMAIN,
5476		    "%s: Validating soft partition metadb entries.\n"),
5477		    compnp->cname);
5478
5479	if ((size = metagetsize(compnp, ep)) == MD_DISKADDR_ERROR)
5480		return (-1);
5481
5482	/* get all soft partitions on component */
5483	count = meta_sp_get_by_component(sp, compnp, &spnlp, 0, ep);
5484
5485	if (count == 0) {
5486		(void) printf(dgettext(TEXT_DOMAIN,
5487		    "%s: No soft partitions.\n"), compnp->cname);
5488		return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
5489	} else if (count < 0) {
5490		return (-1);
5491	}
5492
5493	/* Now go through the soft partitions and check each one */
5494	for (namep = spnlp; namep != NULL; namep = namep->next) {
5495		mdname_t	*curnp = namep->namep;
5496		sp_ext_offset_t	curvoff;
5497
5498		/* get the unit structure */
5499		if ((msp = meta_get_sp_common(sp, curnp, 0, ep)) == NULL)
5500			return (-1);
5501
5502		/* verify generic unit structure parameters */
5503		if ((options & MDCMD_VERBOSE) != 0)
5504			(void) printf(dgettext(TEXT_DOMAIN,
5505			    "\nVerifying device %s.\n"),
5506			    curnp->cname);
5507
5508		/*
5509		 * MD_SP_LAST is an invalid state and is always the
5510		 * highest numbered.
5511		 */
5512		if (msp->status >= MD_SP_LAST) {
5513			(void) printf(dgettext(TEXT_DOMAIN,
5514			    "%s: status value %u is out of range.\n"),
5515			    curnp->cname, msp->status);
5516			return (mdmderror(ep, MDE_RECOVER_FAILED,
5517			    0, curnp->cname));
5518		} else if ((options & MDCMD_VERBOSE) != 0) {
5519			uint_t	tstate = 0;
5520
5521			if (metaismeta(msp->compnamep)) {
5522				if (meta_get_tstate(msp->common.namep->dev,
5523				    &tstate, ep) != 0)
5524					return (-1);
5525			}
5526			(void) printf(dgettext(TEXT_DOMAIN,
5527			    "%s: Status \"%s\" is valid.\n"),
5528			    curnp->cname, meta_sp_status_to_name(msp->status,
5529			    tstate & MD_DEV_ERRORED));
5530		}
5531
5532		/* Now verify each extent */
5533		if ((options & MDCMD_VERBOSE) != 0)
5534			(void) printf("%14s %21s %21s %21s\n",
5535			    dgettext(TEXT_DOMAIN, "Extent Number"),
5536			    dgettext(TEXT_DOMAIN, "Virtual Offset"),
5537			    dgettext(TEXT_DOMAIN, "Physical Offset"),
5538			    dgettext(TEXT_DOMAIN, "Length"));
5539
5540		curvoff = 0ULL;
5541		for (extn = 0; extn < msp->ext.ext_len; extn++) {
5542			md_sp_ext_t	*extp = &msp->ext.ext_val[extn];
5543
5544			if ((options & MDCMD_VERBOSE) != 0)
5545				(void) printf("%14u %21llu %21llu %21llu\n",
5546				    extn, extp->voff, extp->poff, extp->len);
5547
5548			if (extp->voff != curvoff) {
5549				(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
5550				    "%s: virtual offset for extent %u "
5551				    "is inconsistent, expected %llu, "
5552				    "got %llu.\n"), curnp->cname, extn,
5553				    curvoff, extp->voff);
5554				return (mdmderror(ep, MDE_RECOVER_FAILED,
5555				    0, compnp->cname));
5556			}
5557
5558			/* make sure extent does not drop off the end */
5559			if ((extp->poff + extp->len) == size) {
5560				(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
5561				    "%s: extent %u at offset %llu, "
5562				    "length %llu exceeds the size of the "
5563				    "device, %llu.\n"), curnp->cname,
5564				    extn, extp->poff, extp->len, size);
5565				return (mdmderror(ep, MDE_RECOVER_FAILED,
5566				    0, compnp->cname));
5567			}
5568
5569			curvoff += extp->len;
5570		}
5571	}
5572	if (options & MDCMD_PRINT) {
5573		(void) printf(dgettext(TEXT_DOMAIN,
5574		    "%s: Soft Partition metadb configuration is valid\n"),
5575		    compnp->cname);
5576	}
5577	return (0);
5578}
5579
5580/*
5581 * FUNCTION:	meta_sp_validate_wm_and_unit()
5582 * INPUT:	sp	- name of set we are recovering in
5583 *		compnp	- name of device we are recovering from
5584 *		options	- metarecover options
5585 * OUTPUT:	ep	- error pointer returned
5586 * RETURNS:	int	- 0 - success, -1 error
5587 * PURPOSE:	cross-validate and display watermarks and metadb records.
5588 *		get both the unit structures for the soft partitions built
5589 *		on the specified component and the watermarks found on that
5590 *		component and check to make sure they are consistent with
5591 *		each other.
5592 */
5593static int
5594meta_sp_validate_wm_and_unit(
5595	mdsetname_t	*sp,
5596	mdname_t	*np,
5597	mdcmdopts_t	options,
5598	md_error_t	*ep
5599)
5600{
5601	sp_ext_node_t	*wmlist = NULL;
5602	sp_ext_node_t	*unitlist = NULL;
5603	sp_ext_node_t	*unitext;
5604	sp_ext_node_t	*wmext;
5605	sp_ext_offset_t	tmpunitoff;
5606	mdnamelist_t	*spnlp = NULL;
5607	int		count;
5608	int		rval = 0;
5609	int		verbose = (options & MDCMD_VERBOSE);
5610
5611	/* get unit structure list */
5612	count = meta_sp_get_by_component(sp, np, &spnlp, 0, ep);
5613	if (count <= 0)
5614		return (-1);
5615
5616	meta_sp_list_insert(NULL, NULL, &unitlist,
5617	    metagetsize(np, ep) - MD_SP_WMSIZE, MD_SP_WMSIZE,
5618	    EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset);
5619
5620	if (meta_sp_extlist_from_namelist(sp, spnlp, &unitlist, ep) == -1) {
5621		metafreenamelist(spnlp);
5622		return (-1);
5623	}
5624
5625	metafreenamelist(spnlp);
5626
5627	meta_sp_list_freefill(&unitlist, metagetsize(np, ep));
5628
5629	if (meta_sp_extlist_from_wm(sp, np, &wmlist,
5630	    meta_sp_cmp_by_offset, ep) < 0) {
5631		meta_sp_list_free(&unitlist);
5632		return (-1);
5633	}
5634
5635	if (getenv(META_SP_DEBUG)) {
5636		meta_sp_debug("meta_sp_validate_wm_and_unit: unit list:\n");
5637		meta_sp_list_dump(unitlist);
5638		meta_sp_debug("meta_sp_validate_wm_and_unit: wm list:\n");
5639		meta_sp_list_dump(wmlist);
5640	}
5641
5642	/*
5643	 * step through both lists and compare allocated nodes.  Free
5644	 * nodes and end watermarks may differ between the two but
5645	 * that's generally ok, and if they're wrong will typically
5646	 * cause misplaced allocated extents.
5647	 */
5648	if (verbose)
5649		(void) printf(dgettext(TEXT_DOMAIN, "\n%s: Verifying metadb "
5650		    "allocations match extent headers.\n"), np->cname);
5651
5652	unitext = unitlist;
5653	wmext = wmlist;
5654	while ((wmext != NULL) && (unitext != NULL)) {
5655		/* find next allocated extents in each list */
5656		while (wmext != NULL && wmext->ext_type != EXTTYP_ALLOC)
5657			wmext = wmext->ext_next;
5658
5659		while (unitext != NULL && unitext->ext_type != EXTTYP_ALLOC)
5660			unitext = unitext->ext_next;
5661
5662		if (wmext == NULL || unitext == NULL)
5663			break;
5664
5665		if (verbose) {
5666			(void) printf(dgettext(TEXT_DOMAIN,
5667			    "Metadb extent:\n"));
5668			meta_sp_display_exthdr();
5669			meta_sp_display_ext(unitext);
5670			(void) printf(dgettext(TEXT_DOMAIN,
5671			    "Extent header extent:\n"));
5672			meta_sp_display_exthdr();
5673			meta_sp_display_ext(wmext);
5674			(void) printf("\n");
5675		}
5676
5677		if (meta_sp_validate_exts(np, wmext, unitext, ep) < 0)
5678			rval = -1;
5679
5680		/*
5681		 * if the offsets aren't equal, only increment the
5682		 * lowest one in hopes of getting the lists back in sync.
5683		 */
5684		tmpunitoff = unitext->ext_offset;
5685		if (unitext->ext_offset <= wmext->ext_offset)
5686			unitext = unitext->ext_next;
5687		if (wmext->ext_offset <= tmpunitoff)
5688			wmext = wmext->ext_next;
5689	}
5690
5691	/*
5692	 * if both lists aren't at the end then there are extra
5693	 * allocated nodes in one of them.
5694	 */
5695	if (wmext != NULL) {
5696		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
5697		    "%s: extent headers contain allocations not in "
5698		    "the metadb\n\n"), np->cname);
5699		rval = -1;
5700	}
5701
5702	if (unitext != NULL) {
5703		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
5704		    "%s: metadb contains allocations not in the extent "
5705		    "headers\n\n"), np->cname);
5706		rval = -1;
5707	}
5708
5709	if (options & MDCMD_PRINT) {
5710		if (rval == 0) {
5711			(void) printf(dgettext(TEXT_DOMAIN,
5712			    "%s: Soft Partition metadb matches extent "
5713			    "header configuration\n"), np->cname);
5714		} else {
5715			(void) printf(dgettext(TEXT_DOMAIN,
5716			    "%s: Soft Partition metadb does not match extent "
5717			    "header configuration\n"), np->cname);
5718		}
5719	}
5720
5721	return (rval);
5722}
5723
5724/*
5725 * FUNCTION:	meta_sp_validate_exts()
5726 * INPUT:	compnp	- name pointer for device we are recovering from
5727 *		wmext	- extent node representing watermark
5728 *		unitext	- extent node from unit structure
5729 * OUTPUT:	ep	- return error pointer
5730 * RETURNS:	int	- 0 - succes, mdmderror return code - error
5731 * PURPOSE:	Takes two extent nodes and checks them against each other.
5732 *		offset, length, sequence number, set, and name are compared.
5733 */
5734static int
5735meta_sp_validate_exts(
5736	mdname_t	*compnp,
5737	sp_ext_node_t	*wmext,
5738	sp_ext_node_t	*unitext,
5739	md_error_t	*ep
5740)
5741{
5742	if (wmext->ext_offset != unitext->ext_offset) {
5743		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
5744		    "%s: unit structure and extent header offsets differ.\n"),
5745		    compnp->cname);
5746		return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
5747	}
5748
5749	if (wmext->ext_length != unitext->ext_length) {
5750		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
5751		    "%s: unit structure and extent header lengths differ.\n"),
5752		    compnp->cname);
5753		return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
5754	}
5755
5756	if (wmext->ext_seq != unitext->ext_seq) {
5757		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
5758		    "%s: unit structure and extent header sequence numbers "
5759		    "differ.\n"), compnp->cname);
5760		return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
5761	}
5762
5763	if (wmext->ext_type != unitext->ext_type) {
5764		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
5765		    "%s: unit structure and extent header types differ.\n"),
5766		    compnp->cname);
5767		return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
5768	}
5769
5770	/*
5771	 * If one has a set pointer and the other doesn't, error.
5772	 * If both extents have setnames, then make sure they match
5773	 * If both are NULL, it's ok, they match.
5774	 */
5775	if ((unitext->ext_setp == NULL) ^ (wmext->ext_setp == NULL)) {
5776		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
5777		    "%s: unit structure and extent header set values "
5778		    "differ.\n"), compnp->cname);
5779		return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
5780	}
5781
5782	if (unitext->ext_setp != NULL) {
5783		if (strcmp(unitext->ext_setp->setname,
5784		    wmext->ext_setp->setname) != 0) {
5785			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
5786			    "%s: unit structure and extent header set names "
5787			    "differ.\n"), compnp->cname);
5788			return (mdmderror(ep, MDE_RECOVER_FAILED,
5789			    0, compnp->cname));
5790		}
5791	}
5792
5793	/*
5794	 * If one has a name pointer and the other doesn't, error.
5795	 * If both extents have names, then make sure they match
5796	 * If both are NULL, it's ok, they match.
5797	 */
5798	if ((unitext->ext_namep == NULL) ^ (wmext->ext_namep == NULL)) {
5799		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
5800		    "%s: unit structure and extent header name values "
5801		    "differ.\n"), compnp->cname);
5802		return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
5803	}
5804
5805	if (unitext->ext_namep != NULL) {
5806		if (strcmp(wmext->ext_namep->cname,
5807		    unitext->ext_namep->cname) != 0) {
5808			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
5809			    "%s: unit structure and extent header names "
5810			    "differ.\n"), compnp->cname);
5811			return (mdmderror(ep, MDE_RECOVER_FAILED,
5812			    0, compnp->cname));
5813		}
5814	}
5815
5816	return (0);
5817}
5818
5819/*
5820 * FUNCTION:	update_sp_status()
5821 * INPUT:	sp	- name of set we are recovering in
5822 *		minors	- pointer to an array of soft partition minor numbers
5823 *		num_sps	- number of minor numbers in array
5824 *		status	- new status to be applied to all soft parts in array
5825 *		mn_set	- set if current set is a multi-node set
5826 * OUTPUT:	ep	- return error pointer
5827 * RETURNS:	int	- 0 - success, -1 - error
5828 * PURPOSE:	update  status of soft partitions to new status. minors is an
5829 *		array of minor numbers to apply the new status to.
5830 *		If mn_set is set, a message is sent to all nodes in the
5831 *		cluster to update the status locally.
5832 */
5833static int
5834update_sp_status(
5835	mdsetname_t	*sp,
5836	minor_t		*minors,
5837	int		num_sps,
5838	sp_status_t	status,
5839	bool_t		mn_set,
5840	md_error_t	*ep
5841)
5842{
5843	int	i;
5844	int	err = 0;
5845
5846	if (mn_set) {
5847		md_mn_msg_sp_setstat_t	sp_setstat_params;
5848		int			result;
5849		md_mn_result_t		*resp = NULL;
5850
5851		for (i = 0; i < num_sps; i++) {
5852			sp_setstat_params.sp_setstat_mnum = minors[i];
5853			sp_setstat_params.sp_setstat_status = status;
5854
5855			result = mdmn_send_message(sp->setno,
5856			    MD_MN_MSG_SP_SETSTAT, MD_MSGF_DEFAULT_FLAGS, 0,
5857			    (char *)&sp_setstat_params,
5858			    sizeof (sp_setstat_params),
5859			    &resp, ep);
5860			if (resp != NULL) {
5861				if (resp->mmr_exitval != 0)
5862					err = -1;
5863				free_result(resp);
5864			}
5865			if (result != 0) {
5866				err = -1;
5867			}
5868		}
5869	} else {
5870		if (meta_sp_setstatus(sp, minors, num_sps, status, ep) < 0)
5871			err = -1;
5872	}
5873	if (err < 0) {
5874		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
5875		    "Error updating status on recovered soft "
5876		    "partitions.\n"));
5877	}
5878	return (err);
5879}
5880
5881/*
5882 * FUNCTION:	meta_sp_recover_from_wm()
5883 * INPUT:	sp	- name of set we are recovering in
5884 *		compnp	- name pointer for component we are recovering from
5885 *		options	- metarecover options
5886 * OUTPUT:	ep	- return error pointer
5887 * RETURNS:	int	- 0 - success, -1 - error
5888 * PURPOSE:	update metadb records to match watermarks.  begin by getting
5889 *		an extlist representing all soft partitions on the component.
5890 *		then build a unit structure for each soft partition.
5891 *		notify user of changes, then commit each soft partition to
5892 *		the metadb one at a time in the "recovering" state.  update
5893 *		any watermarks that may need it	(to reflect possible name
5894 *		changes), and, finally, set the status of all recovered
5895 *		partitions to the "OK" state at once.
5896 */
5897static int
5898meta_sp_recover_from_wm(
5899	mdsetname_t	*sp,
5900	mdname_t	*compnp,
5901	mdcmdopts_t	options,
5902	md_error_t	*ep
5903)
5904{
5905	sp_ext_node_t		*extlist = NULL;
5906	sp_ext_node_t		*sp_list = NULL;
5907	sp_ext_node_t		*update_list = NULL;
5908	sp_ext_node_t		*ext;
5909	sp_ext_node_t		*sp_ext;
5910	mp_unit_t		*mp;
5911	mp_unit_t		**un_array;
5912	int			numexts = 0, num_sps = 0, i = 0;
5913	int			err = 0;
5914	int			not_recovered = 0;
5915	int			committed = 0;
5916	sp_ext_length_t		sp_length = 0LL;
5917	mdnamelist_t		*keynlp = NULL;
5918	mdname_t		*np;
5919	mdname_t		*new_np;
5920	int			new_name;
5921	md_set_params_t		set_params;
5922	minor_t			*minors = NULL;
5923	char			yesno[255];
5924	char			*yes;
5925	bool_t			mn_set = 0;
5926	md_set_desc		*sd;
5927	mm_unit_t		*mm;
5928	md_set_mmown_params_t	*ownpar = NULL;
5929	int			comp_is_mirror = 0;
5930
5931	/*
5932	 * if this component appears in another metadevice already, do
5933	 * NOT recover from it.
5934	 */
5935	if (meta_check_inmeta(sp, compnp, options, 0, -1, ep) != 0)
5936		return (-1);
5937
5938	/* set flag if dealing with a MN set */
5939	if (!metaislocalset(sp)) {
5940		if ((sd = metaget_setdesc(sp, ep)) == NULL) {
5941			return (-1);
5942		}
5943		if (MD_MNSET_DESC(sd))
5944			mn_set = 1;
5945	}
5946	/*
5947	 * for each watermark, build an ext_node, place on list.
5948	 */
5949	if (meta_sp_extlist_from_wm(sp, compnp, &extlist,
5950	    meta_sp_cmp_by_nameseq, ep) < 0)
5951		return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
5952
5953	assert(extlist != NULL);
5954
5955	/* count number of soft partitions */
5956	for (ext = extlist;
5957	    ext != NULL && ext->ext_type == EXTTYP_ALLOC;
5958	    ext = ext->ext_next) {
5959		if (ext->ext_next != NULL &&
5960		    ext->ext_next->ext_namep != NULL &&
5961		    strcmp(ext->ext_next->ext_namep->cname,
5962		    ext->ext_namep->cname) == 0)
5963				continue;
5964		num_sps++;
5965	}
5966
5967	/* allocate array of unit structure pointers */
5968	un_array = Zalloc(num_sps * sizeof (mp_unit_t *));
5969
5970	/*
5971	 * build unit structures from list of ext_nodes.
5972	 */
5973	for (ext = extlist;
5974	    ext != NULL && ext->ext_type == EXTTYP_ALLOC;
5975	    ext = ext->ext_next) {
5976		meta_sp_list_insert(ext->ext_setp, ext->ext_namep,
5977		    &sp_list, ext->ext_offset, ext->ext_length,
5978		    ext->ext_type, ext->ext_seq, ext->ext_flags,
5979		    meta_sp_cmp_by_nameseq);
5980
5981		numexts++;
5982		sp_length += ext->ext_length - MD_SP_WMSIZE;
5983
5984		if (ext->ext_next != NULL &&
5985		    ext->ext_next->ext_namep != NULL &&
5986		    strcmp(ext->ext_next->ext_namep->cname,
5987		    ext->ext_namep->cname) == 0)
5988				continue;
5989
5990		/*
5991		 * if we made it here, we are at a soft partition
5992		 * boundary in the list.
5993		 */
5994		if (getenv(META_SP_DEBUG)) {
5995			meta_sp_debug("meta_recover_from_wm: dumping wm "
5996			    "list:\n");
5997			meta_sp_list_dump(sp_list);
5998		}
5999
6000		assert(sp_list != NULL);
6001		assert(sp_list->ext_namep != NULL);
6002
6003		if ((new_name = meta_sp_resolve_name_conflict(sp,
6004		    sp_list->ext_namep, &new_np, ep)) < 0) {
6005			err = 1;
6006			goto out;
6007		} else if (new_name) {
6008			for (sp_ext = sp_list;
6009			    sp_ext != NULL;
6010			    sp_ext = sp_ext->ext_next) {
6011				/*
6012				 * insert into the update list for
6013				 * watermark update.
6014				 */
6015				meta_sp_list_insert(sp_ext->ext_setp,
6016				    new_np, &update_list, sp_ext->ext_offset,
6017				    sp_ext->ext_length, sp_ext->ext_type,
6018				    sp_ext->ext_seq, EXTFLG_UPDATE,
6019				    meta_sp_cmp_by_offset);
6020			}
6021
6022		}
6023		if (options & MDCMD_DOIT) {
6024			/* store name in namespace */
6025			if (mn_set) {
6026				/* send message to all nodes to return key */
6027				md_mn_msg_addkeyname_t	*send_params;
6028				int			result;
6029				md_mn_result_t		*resp = NULL;
6030				int			message_size;
6031
6032				message_size =  sizeof (*send_params) +
6033				    strlen(compnp->cname) + 1;
6034				send_params = Zalloc(message_size);
6035				send_params->addkeyname_setno = sp->setno;
6036				(void) strcpy(&send_params->addkeyname_name[0],
6037				    compnp->cname);
6038				result = mdmn_send_message(sp->setno,
6039				    MD_MN_MSG_ADDKEYNAME, MD_MSGF_DEFAULT_FLAGS,
6040				    0, (char *)send_params, message_size, &resp,
6041				    ep);
6042				Free(send_params);
6043				if (resp != NULL) {
6044					if (resp->mmr_exitval >= 0) {
6045						compnp->key =
6046						    (mdkey_t)resp->mmr_exitval;
6047					} else {
6048						err = 1;
6049						free_result(resp);
6050						goto out;
6051					}
6052					free_result(resp);
6053				}
6054				if (result != 0) {
6055					err = 1;
6056					goto out;
6057				}
6058				(void) metanamelist_append(&keynlp, compnp);
6059			} else {
6060				if (add_key_name(sp, compnp, &keynlp,
6061				    ep) != 0) {
6062					err = 1;
6063					goto out;
6064				}
6065			}
6066		}
6067
6068		/* create the unit structure */
6069		if ((mp = meta_sp_createunit(
6070		    (new_name) ? new_np : sp_list->ext_namep, compnp,
6071		    sp_list, numexts, sp_length, MD_SP_RECOVER, ep)) == NULL) {
6072			err = 1;
6073			goto out;
6074		}
6075
6076		if (getenv(META_SP_DEBUG)) {
6077			meta_sp_debug("meta_sp_recover_from_wm: "
6078			    "printing newly created unit structure");
6079			meta_sp_printunit(mp);
6080		}
6081
6082		/* place in unit structure array */
6083		un_array[i++] = mp;
6084
6085		/* free sp_list */
6086		meta_sp_list_free(&sp_list);
6087		sp_list = NULL;
6088		numexts = 0;
6089		sp_length = 0LL;
6090	}
6091
6092	/* display configuration updates */
6093	(void) printf(dgettext(TEXT_DOMAIN,
6094	    "The following soft partitions were found and will be added to\n"
6095	    "your metadevice configuration.\n"));
6096	(void) printf("%5s %15s %18s\n",
6097	    dgettext(TEXT_DOMAIN, "Name"),
6098	    dgettext(TEXT_DOMAIN, "Size"),
6099	    dgettext(TEXT_DOMAIN, "No. of Extents"));
6100	for (i = 0; i < num_sps; i++) {
6101		(void) printf("%5s%lu %15llu %9d\n", "d",
6102		    MD_MIN2UNIT(MD_SID(un_array[i])),
6103		    un_array[i]->un_length, un_array[i]->un_numexts);
6104	}
6105
6106	if (!(options & MDCMD_DOIT)) {
6107		not_recovered = 1;
6108		goto out;
6109	}
6110
6111	/* ask user for confirmation */
6112	(void) printf(dgettext(TEXT_DOMAIN,
6113	    "WARNING: You are about to add one or more soft partition\n"
6114	    "metadevices to your metadevice configuration.  If there\n"
6115	    "appears to be an error in the soft partition(s) displayed\n"
6116	    "above, do NOT proceed with this recovery operation.\n"));
6117	(void) printf(dgettext(TEXT_DOMAIN,
6118	    "Are you sure you want to do this (yes/no)? "));
6119
6120	(void) fflush(stdout);
6121	if ((fgets(yesno, sizeof (yesno), stdin) == NULL) ||
6122	    (strlen(yesno) == 1))
6123		(void) snprintf(yesno, sizeof (yesno), "%s\n",
6124		    dgettext(TEXT_DOMAIN, "no"));
6125	yes = dgettext(TEXT_DOMAIN, "yes");
6126	if (strncasecmp(yesno, yes, strlen(yesno) - 1) != 0) {
6127		not_recovered = 1;
6128		goto out;
6129	}
6130
6131	/* commit records one at a time */
6132	for (i = 0; i < num_sps; i++) {
6133		(void) memset(&set_params, 0, sizeof (set_params));
6134		set_params.mnum = MD_SID(un_array[i]);
6135		set_params.size = (un_array[i])->c.un_size;
6136		set_params.mdp = (uintptr_t)(un_array[i]);
6137		set_params.options =
6138		    meta_check_devicesize(un_array[i]->un_length);
6139		if (set_params.options == MD_CRO_64BIT) {
6140			un_array[i]->c.un_revision |= MD_64BIT_META_DEV;
6141		} else {
6142			un_array[i]->c.un_revision &= ~MD_64BIT_META_DEV;
6143		}
6144		MD_SETDRIVERNAME(&set_params, MD_SP,
6145		    MD_MIN2SET(set_params.mnum));
6146
6147		np = metamnumname(&sp, MD_SID(un_array[i]), 0, ep);
6148
6149		/*
6150		 * If this is an MN set, send the MD_IOCSET ioctl to all nodes
6151		 */
6152		if (mn_set) {
6153			md_mn_msg_iocset_t	send_params;
6154			int			result;
6155			md_mn_result_t		*resp = NULL;
6156			int			mess_size;
6157
6158			/*
6159			 * Calculate message size. md_mn_msg_iocset_t only
6160			 * contains one extent, so increment the size to
6161			 * include all extents
6162			 */
6163			mess_size = sizeof (send_params) -
6164			    sizeof (mp_ext_t) +
6165			    (un_array[i]->un_numexts * sizeof (mp_ext_t));
6166
6167			send_params.iocset_params = set_params;
6168			(void) memcpy(&send_params.unit, un_array[i],
6169			    sizeof (*un_array[i]) - sizeof (mp_ext_t) +
6170			    (un_array[i]->un_numexts * sizeof (mp_ext_t)));
6171			result = mdmn_send_message(sp->setno,
6172			    MD_MN_MSG_IOCSET, MD_MSGF_DEFAULT_FLAGS, 0,
6173			    (char *)&send_params, mess_size, &resp,
6174			    ep);
6175			if (resp != NULL) {
6176				if (resp->mmr_exitval != 0)
6177					err = 1;
6178				free_result(resp);
6179			}
6180			if (result != 0) {
6181				err = 1;
6182			}
6183		} else {
6184			if (metaioctl(MD_IOCSET, &set_params, &set_params.mde,
6185			    np->cname) != 0) {
6186				err = 1;
6187			}
6188		}
6189
6190		if (err == 1) {
6191			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
6192			    "%s: Error committing record to metadb.\n"),
6193			    np->cname);
6194			goto out;
6195		}
6196
6197		/* note that we've committed a record */
6198		if (!committed)
6199			committed = 1;
6200
6201		/* update any watermarks that need it */
6202		if (update_list != NULL) {
6203			md_sp_t *msp;
6204
6205			/*
6206			 * Check to see if we're trying to create a partition
6207			 * on a mirror. If so we may have to enforce an
6208			 * ownership change before writing the watermark out.
6209			 */
6210			if (metaismeta(compnp)) {
6211				char *miscname;
6212
6213				miscname = metagetmiscname(compnp, ep);
6214				if (miscname != NULL)
6215					comp_is_mirror = (strcmp(miscname,
6216					    MD_MIRROR) == 0);
6217				else
6218					comp_is_mirror = 0;
6219			}
6220			/*
6221			 * If this is a MN set and the component is a mirror,
6222			 * change ownership to this node in order to write the
6223			 * watermarks
6224			 */
6225			if (mn_set && comp_is_mirror) {
6226				mm = (mm_unit_t *)meta_get_unit(sp, compnp, ep);
6227				if (mm == NULL) {
6228					err = 1;
6229					goto out;
6230				} else {
6231					err = meta_mn_change_owner(&ownpar,
6232					    sp->setno,
6233					    meta_getminor(compnp->dev),
6234					    sd->sd_mn_mynode->nd_nodeid,
6235					    MD_MN_MM_PREVENT_CHANGE |
6236					    MD_MN_MM_SPAWN_THREAD);
6237					if (err != 0)
6238						goto out;
6239				}
6240			}
6241
6242			if ((msp = meta_get_sp(sp, np, ep)) == NULL) {
6243				err = 1;
6244				(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
6245				    "%s: Error updating extent headers.\n"),
6246				    np->cname);
6247				goto out;
6248			}
6249			if (meta_sp_update_wm(sp, msp, update_list, ep) < 0) {
6250				err = 1;
6251				(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
6252				    "%s: Error updating extent headers "
6253				    "on disk.\n"), np->cname);
6254				goto out;
6255			}
6256		}
6257		/*
6258		 * If we have changed ownership earlier and prevented any
6259		 * ownership changes, we can now allow ownership changes
6260		 * again.
6261		 */
6262		if (ownpar) {
6263			(void) meta_mn_change_owner(&ownpar, sp->setno,
6264			    ownpar->d.mnum,
6265			    ownpar->d.owner,
6266			    MD_MN_MM_ALLOW_CHANGE | MD_MN_MM_SPAWN_THREAD);
6267		}
6268	}
6269
6270	/* update status of all soft partitions to OK */
6271	minors = Zalloc(num_sps * sizeof (minor_t));
6272	for (i = 0; i < num_sps; i++)
6273		minors[i] = MD_SID(un_array[i]);
6274
6275	err = update_sp_status(sp, minors, num_sps, MD_SP_OK, mn_set, ep);
6276	if (err != 0)
6277		goto out;
6278
6279	if (options & MDCMD_PRINT)
6280		(void) printf(dgettext(TEXT_DOMAIN, "%s: "
6281		    "Soft Partitions recovered from device.\n"),
6282		    compnp->cname);
6283out:
6284	/* free memory */
6285	if (extlist != NULL)
6286		meta_sp_list_free(&extlist);
6287	if (sp_list != NULL)
6288		meta_sp_list_free(&sp_list);
6289	if (update_list != NULL)
6290		meta_sp_list_free(&update_list);
6291	if (un_array != NULL)	{
6292		for (i = 0; i < num_sps; i++)
6293			Free(un_array[i]);
6294		Free(un_array);
6295	}
6296	if (minors != NULL)
6297		Free(minors);
6298	if (ownpar != NULL)
6299		Free(ownpar);
6300	(void) fflush(stdout);
6301
6302	if ((keynlp != NULL) && (committed != 1)) {
6303		/*
6304		 * if we haven't committed any softparts, either because of an
6305		 * error or because the user decided not to proceed, delete
6306		 * namelist key for the component
6307		 */
6308		if (mn_set) {
6309			mdnamelist_t	*p;
6310
6311			for (p = keynlp; (p != NULL); p = p->next) {
6312				mdname_t		*np = p->namep;
6313				md_mn_msg_delkeyname_t	send_params;
6314				md_mn_result_t		*resp = NULL;
6315
6316				send_params.delkeyname_dev = np->dev;
6317				send_params.delkeyname_setno = sp->setno;
6318				send_params.delkeyname_key = np->key;
6319				(void) mdmn_send_message(sp->setno,
6320				    MD_MN_MSG_DELKEYNAME, MD_MSGF_DEFAULT_FLAGS,
6321				    0, (char *)&send_params,
6322				    sizeof (send_params),
6323				    &resp, ep);
6324				if (resp != NULL) {
6325					free_result(resp);
6326				}
6327			}
6328		} else {
6329			(void) del_key_names(sp, keynlp, NULL);
6330		}
6331	}
6332
6333	metafreenamelist(keynlp);
6334
6335	if (err)
6336		return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
6337
6338	if (not_recovered)
6339		if (options & MDCMD_PRINT)
6340			(void) printf(dgettext(TEXT_DOMAIN, "%s: "
6341			    "Soft Partitions NOT recovered from device.\n"),
6342			    compnp->cname);
6343	return (0);
6344}
6345
6346/*
6347 * FUNCTION:	meta_sp_recover_from_unit()
6348 * INPUT:	sp	- name of set we are recovering in
6349 *		compnp	- name of component we are recovering from
6350 *		options	- metarecover options
6351 * OUTPUT:	ep	- return error pointer
6352 * RETURNS:	int	- 0 - success, -1 - error
6353 * PURPOSE:	update watermarks to match metadb records.  begin by getting
6354 *		a namelist representing all soft partitions on the specified
6355 *		component.  then, build an extlist representing the soft
6356 *		partitions, filling in the freespace extents.  notify user
6357 *		of changes, place all soft partitions into the "recovering"
6358 *		state and update the watermarks.  finally, return all soft
6359 *		partitions to the "OK" state.
6360 */
6361static int
6362meta_sp_recover_from_unit(
6363	mdsetname_t	*sp,
6364	mdname_t	*compnp,
6365	mdcmdopts_t	options,
6366	md_error_t	*ep
6367)
6368{
6369	mdnamelist_t	*spnlp = NULL;
6370	mdnamelist_t	*nlp = NULL;
6371	sp_ext_node_t	*ext = NULL;
6372	sp_ext_node_t	*extlist = NULL;
6373	int		count;
6374	char		yesno[255];
6375	char		*yes;
6376	int		rval = 0;
6377	minor_t		*minors = NULL;
6378	int		i;
6379	md_sp_t		*msp;
6380	md_set_desc	*sd;
6381	bool_t		mn_set = 0;
6382	daddr_t		start_block;
6383
6384	count = meta_sp_get_by_component(sp, compnp, &spnlp, 0, ep);
6385	if (count <= 0)
6386		return (-1);
6387
6388	/* set flag if dealing with a MN set */
6389	if (!metaislocalset(sp)) {
6390		if ((sd = metaget_setdesc(sp, ep)) == NULL) {
6391			return (-1);
6392		}
6393		if (MD_MNSET_DESC(sd))
6394			mn_set = 1;
6395	}
6396	/*
6397	 * Save the XDR unit structure for one of the soft partitions;
6398	 * we'll use this later to provide metadevice context to
6399	 * update the watermarks so the device can be resolved by
6400	 * devid instead of dev_t.
6401	 */
6402	if ((msp = meta_get_sp(sp, spnlp->namep, ep)) == NULL) {
6403		metafreenamelist(spnlp);
6404		return (-1);
6405	}
6406
6407	if ((start_block = meta_sp_get_start(sp, compnp, ep)) ==
6408	    MD_DISKADDR_ERROR) {
6409		return (-1);
6410	}
6411
6412	meta_sp_list_insert(NULL, NULL, &extlist, 0ULL, start_block,
6413	    EXTTYP_RESERVED, 0, 0, meta_sp_cmp_by_offset);
6414	meta_sp_list_insert(NULL, NULL, &extlist,
6415	    metagetsize(compnp, ep) - MD_SP_WMSIZE, MD_SP_WMSIZE,
6416	    EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset);
6417
6418	if (meta_sp_extlist_from_namelist(sp, spnlp, &extlist, ep) == -1) {
6419		metafreenamelist(spnlp);
6420		return (-1);
6421	}
6422
6423	assert(extlist != NULL);
6424	if ((options & MDCMD_VERBOSE) != 0) {
6425		(void) printf(dgettext(TEXT_DOMAIN,
6426		    "Updating extent headers on device %s from metadb.\n\n"),
6427		    compnp->cname);
6428		(void) printf(dgettext(TEXT_DOMAIN,
6429		    "The following extent headers will be written:\n"));
6430		meta_sp_display_exthdr();
6431	}
6432
6433	meta_sp_list_freefill(&extlist, metagetsize(compnp, ep));
6434
6435	for (ext = extlist; ext != NULL; ext = ext->ext_next) {
6436
6437		/* mark every node for updating except the reserved space */
6438		if (ext->ext_type != EXTTYP_RESERVED) {
6439			ext->ext_flags |= EXTFLG_UPDATE;
6440
6441			/* print extent information */
6442			if ((options & MDCMD_VERBOSE) != 0)
6443				meta_sp_display_ext(ext);
6444		}
6445	}
6446
6447	/* request verification and then update all watermarks */
6448	if ((options & MDCMD_DOIT) != 0) {
6449
6450		(void) printf(dgettext(TEXT_DOMAIN,
6451		    "\nWARNING: You are about to overwrite portions of %s\n"
6452		    "with soft partition metadata. The extent headers will be\n"
6453		    "written to match the existing metadb configuration.  If\n"
6454		    "the device was not previously setup with this\n"
6455		    "configuration, data loss may result.\n\n"),
6456		    compnp->cname);
6457		(void) printf(dgettext(TEXT_DOMAIN,
6458		    "Are you sure you want to do this (yes/no)? "));
6459
6460		(void) fflush(stdout);
6461		if ((fgets(yesno, sizeof (yesno), stdin) == NULL) ||
6462		    (strlen(yesno) == 1))
6463			(void) snprintf(yesno, sizeof (yesno),
6464			    "%s\n", dgettext(TEXT_DOMAIN, "no"));
6465		yes = dgettext(TEXT_DOMAIN, "yes");
6466		if (strncasecmp(yesno, yes, strlen(yesno) - 1) == 0) {
6467			/* place soft partitions into recovering state */
6468			minors = Zalloc(count * sizeof (minor_t));
6469			for (nlp = spnlp, i = 0;
6470			    nlp != NULL && i < count;
6471			    nlp = nlp->next, i++) {
6472				assert(nlp->namep != NULL);
6473				minors[i] = meta_getminor(nlp->namep->dev);
6474			}
6475			if (update_sp_status(sp, minors, count,
6476			    MD_SP_RECOVER, mn_set, ep) != 0) {
6477				rval = -1;
6478				goto out;
6479			}
6480
6481			/* update the watermarks */
6482			if (meta_sp_update_wm(sp, msp, extlist, ep) < 0) {
6483				rval = -1;
6484				goto out;
6485			}
6486
6487			if (options & MDCMD_PRINT) {
6488				(void) printf(dgettext(TEXT_DOMAIN, "%s: "
6489				    "Soft Partitions recovered from metadb\n"),
6490				    compnp->cname);
6491			}
6492
6493			/* return soft partitions to the OK state */
6494			if (update_sp_status(sp, minors, count,
6495			    MD_SP_OK, mn_set, ep) != 0) {
6496				rval = -1;
6497				goto out;
6498			}
6499
6500			rval = 0;
6501			goto out;
6502		}
6503	}
6504
6505	if (options & MDCMD_PRINT) {
6506		(void) printf(dgettext(TEXT_DOMAIN,
6507		    "%s: Soft Partitions NOT recovered from metadb\n"),
6508		    compnp->cname);
6509	}
6510
6511out:
6512	if (minors != NULL)
6513		Free(minors);
6514	metafreenamelist(spnlp);
6515	meta_sp_list_free(&extlist);
6516	(void) fflush(stdout);
6517	return (rval);
6518}
6519
6520
6521/*
6522 * FUNCTION:	meta_sp_update_abr()
6523 * INPUT:	sp	- name of set we are recovering in
6524 * OUTPUT:	ep	- return error pointer
6525 * RETURNS:	int	- 0 - success, -1 - error
6526 * PURPOSE:	update the ABR state for all soft partitions in the set. This
6527 *		is called when joining a set. It sends a message to the master
6528 *		node for each soft partition to get the value of tstate and
6529 *		then sets ABR ,if required, by opening the sp, setting ABR
6530 *		and then closing the sp. This approach is taken rather that
6531 *		just issuing the MD_MN_SET_CAP ioctl, in order to deal with
6532 *		the case when we have another node simultaneously unsetting ABR.
6533 */
6534int
6535meta_sp_update_abr(
6536	mdsetname_t	*sp,
6537	md_error_t	*ep
6538)
6539{
6540	mdnamelist_t	*devnlp = NULL;
6541	mdnamelist_t	*p;
6542	mdname_t	*devnp = NULL;
6543	md_unit_t	*un;
6544	char		fname[MAXPATHLEN];
6545	int		mnum, fd;
6546	volcap_t	vc;
6547	uint_t		tstate;
6548
6549
6550	if (meta_get_sp_names(sp, &devnlp, 0, ep) < 0) {
6551		return (-1);
6552	}
6553
6554	/* Exit if no soft partitions in this set */
6555	if (devnlp == NULL)
6556		return (0);
6557
6558	/* For each soft partition */
6559	for (p = devnlp; (p != NULL); p = p->next) {
6560		devnp = p->namep;
6561
6562		/* check if this is a top level metadevice */
6563		if ((un = meta_get_mdunit(sp, devnp, ep)) == NULL)
6564			goto out;
6565		if (MD_HAS_PARENT(MD_PARENT(un))) {
6566			Free(un);
6567			continue;
6568		}
6569		Free(un);
6570
6571		/* Get tstate from Master */
6572		if (meta_mn_send_get_tstate(devnp->dev, &tstate, ep) != 0) {
6573			mdname_t	*np;
6574			np = metamnumname(&sp, meta_getminor(devnp->dev), 0,
6575			    ep);
6576			if (np) {
6577				md_perror(dgettext(TEXT_DOMAIN,
6578				    "Unable to get tstate for %s"), np->cname);
6579			}
6580			continue;
6581		}
6582		/* If not set on the master, nothing to do */
6583		if (!(tstate & MD_ABR_CAP))
6584			continue;
6585
6586		mnum = meta_getminor(devnp->dev);
6587		(void) snprintf(fname, MAXPATHLEN, "/dev/md/%s/rdsk/d%u",
6588		    sp->setname, (unsigned)MD_MIN2UNIT(mnum));
6589		if ((fd = open(fname, O_RDWR, 0)) < 0) {
6590			md_perror(dgettext(TEXT_DOMAIN,
6591			    "Could not open device %s"), fname);
6592			continue;
6593		}
6594
6595		/* Set ABR state */
6596		vc.vc_info = 0;
6597		vc.vc_set = 0;
6598		if (ioctl(fd, DKIOCGETVOLCAP, &vc) < 0) {
6599			(void) close(fd);
6600			continue;
6601		}
6602
6603		vc.vc_set = DKV_ABR_CAP;
6604		if (ioctl(fd, DKIOCSETVOLCAP, &vc) < 0) {
6605			(void) close(fd);
6606			goto out;
6607		}
6608
6609		(void) close(fd);
6610	}
6611	metafreenamelist(devnlp);
6612	return (0);
6613out:
6614	metafreenamelist(devnlp);
6615	return (-1);
6616}
6617
6618/*
6619 * FUNCTION:	meta_mn_sp_update_abr()
6620 * INPUT:	arg	- Given set.
6621 * PURPOSE:	update the ABR state for all soft partitions in the set by
6622 *		forking a process to call meta_sp_update_abr()
6623 *		This function is only called via rpc.metad when adding a node
6624 *		to a set, ie this node is beong joined to the set by another
6625 *		node.
6626 */
6627void *
6628meta_mn_sp_update_abr(void *arg)
6629{
6630	set_t		setno = *((set_t *)arg);
6631	mdsetname_t	*sp;
6632	md_error_t	mde = mdnullerror;
6633	int		fval;
6634
6635	/* should have a set */
6636	assert(setno != NULL);
6637
6638	if ((sp = metasetnosetname(setno, &mde)) == NULL) {
6639		mde_perror(&mde, "");
6640		return (NULL);
6641	}
6642
6643	if (!(meta_is_mn_set(sp, &mde))) {
6644		mde_perror(&mde, "");
6645		return (NULL);
6646	}
6647
6648	/* fork a process */
6649	if ((fval = md_daemonize(sp, &mde)) != 0) {
6650		/*
6651		 * md_daemonize will fork off a process.  The is the
6652		 * parent or error.
6653		 */
6654		if (fval > 0) {
6655			return (NULL);
6656		}
6657		mde_perror(&mde, "");
6658		return (NULL);
6659	}
6660	/*
6661	 * Child process should never return back to rpc.metad, but
6662	 * should exit.
6663	 * Flush all internally cached data inherited from parent process
6664	 * since cached data will be cleared when parent process RPC request
6665	 * has completed (which is possibly before this child process
6666	 * can complete).
6667	 * Child process can retrieve and cache its own copy of data from
6668	 * rpc.metad that won't be changed by the parent process.
6669	 *
6670	 * Reset md_in_daemon since this child will be a client of rpc.metad
6671	 * not part of the rpc.metad daemon itself.
6672	 * md_in_daemon is used by rpc.metad so that libmeta can tell if
6673	 * this thread is rpc.metad or any other thread.  (If this thread
6674	 * was rpc.metad it could use some short circuit code to get data
6675	 * directly from rpc.metad instead of doing an RPC call to rpc.metad).
6676	 */
6677	md_in_daemon = 0;
6678	metaflushsetname(sp);
6679	sr_cache_flush_setno(setno);
6680	if ((sp = metasetnosetname(setno, &mde)) == NULL) {
6681		mde_perror(&mde, "");
6682		md_exit(sp, 1);
6683	}
6684
6685
6686	/*
6687	 * Closing stdin/out/err here.
6688	 */
6689	(void) close(0);
6690	(void) close(1);
6691	(void) close(2);
6692	assert(fval == 0);
6693
6694	(void) meta_sp_update_abr(sp, &mde);
6695
6696	md_exit(sp, 0);
6697	/*NOTREACHED*/
6698	return (NULL);
6699}
6700
6701int
6702meta_sp_check_component(
6703	mdsetname_t	*sp,
6704	mdname_t	*np,
6705	md_error_t	*ep
6706)
6707{
6708	md_sp_t	*msp;
6709	minor_t	mnum = 0;
6710	md_dev64_t	dev = 0;
6711	mdnm_params_t	nm;
6712	md_getdevs_params_t	mgd;
6713	side_t	sideno;
6714	char	*miscname;
6715	md_dev64_t	*mydev = NULL;
6716	char	*pname = NULL, *t;
6717	char	*ctd_name = NULL;
6718	char	*devname = NULL;
6719	int	len;
6720	int	rval = -1;
6721
6722	(void) memset(&nm, '\0', sizeof (nm));
6723	if ((msp = meta_get_sp_common(sp, np, 0, ep)) == NULL)
6724		return (-1);
6725
6726	if ((miscname = metagetmiscname(np, ep)) == NULL)
6727		return (-1);
6728
6729	sideno = getmyside(sp, ep);
6730
6731	meta_sp_debug("meta_sp_check_component: %s is on %s key: %d"
6732	    " dev: %llu\n",
6733	    np->cname, msp->compnamep->cname, msp->compnamep->key,
6734	    msp->compnamep->dev);
6735
6736	/*
6737	 * Now get the data from the unit structure. The compnamep stuff
6738	 * contains the data from the namespace and we need the un_dev
6739	 * from the unit structure.
6740	 */
6741	(void) memset(&mgd, '\0', sizeof (mgd));
6742	MD_SETDRIVERNAME(&mgd, miscname, sp->setno);
6743	mgd.cnt = 1;		    /* sp's only have one subdevice */
6744	mgd.mnum = meta_getminor(np->dev);
6745
6746	mydev = Zalloc(sizeof (*mydev));
6747	mgd.devs = (uintptr_t)mydev;
6748
6749	if (metaioctl(MD_IOCGET_DEVS, &mgd, &mgd.mde, np->cname) != 0) {
6750		meta_sp_debug("meta_sp_check_component: ioctl failed\n");
6751		(void) mdstealerror(ep, &mgd.mde);
6752		rval = 0;
6753		goto out;
6754	} else if (mgd.cnt <= 0) {
6755		assert(mgd.cnt >= 0);
6756		rval = 0;
6757		goto out;
6758	}
6759
6760	/* Get the devname from the name space. */
6761	if ((devname = meta_getnmentbykey(sp->setno, sideno,
6762	    msp->compnamep->key, NULL, &mnum, &dev, ep)) == NULL) {
6763		meta_sp_debug("meta_sp_check_component: key %d not"
6764		    "found\n", msp->compnamep->key);
6765		goto out;
6766	}
6767
6768	meta_sp_debug("dev %s from component: (%lu, %lu)\n",
6769	    devname,
6770	    meta_getmajor(*mydev),
6771	    meta_getminor(*mydev));
6772	meta_sp_debug("minor from the namespace: %lu\n", mnum);
6773
6774	if (mnum != meta_getminor(*mydev)) {
6775		/*
6776		 * The minor numbers are different. Update the namespace
6777		 * with the information from the component.
6778		 */
6779
6780		t = strrchr(devname, '/');
6781		t++;
6782		ctd_name = Strdup(t);
6783
6784		meta_sp_debug("meta_sp_check_component: ctd_name: %s\n",
6785		    ctd_name);
6786
6787		len = strlen(devname);
6788		t = strrchr(devname, '/');
6789		t++;
6790		pname = Zalloc((len - strlen(t)) + 1);
6791		(void) strncpy(pname, devname, (len - strlen(t)));
6792		meta_sp_debug("pathname: %s\n", pname);
6793
6794		meta_sp_debug("updating the minor number to %lu\n", nm.mnum);
6795
6796		if (meta_update_namespace(sp->setno, sideno,
6797		    ctd_name, *mydev, msp->compnamep->key, pname,
6798		    ep) != 0) {
6799			goto out;
6800		}
6801	}
6802out:
6803	if (pname != NULL)
6804		Free(pname);
6805	if (ctd_name != NULL)
6806		Free(ctd_name);
6807	if (devname != NULL)
6808		Free(devname);
6809	if (mydev != NULL)
6810		Free(mydev);
6811	return (rval);
6812}
6813