1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#include <sys/types.h>
27#include <sys/time.h>
28#include <sys/ksynch.h>
29#include <sys/kmem.h>
30#include <sys/errno.h>
31#include <sys/cmn_err.h>
32#include <sys/debug.h>
33#include <sys/ddi.h>
34#include <sys/nsc_thread.h>
35#include <sys/unistat/spcs_s.h>
36#include <sys/unistat/spcs_errors.h>
37
38#include <sys/unistat/spcs_s_k.h>
39#include <sys/nsctl/nsctl.h>
40#include "dsw.h"
41#include "dsw_dev.h"
42#include "../rdc/rdc_update.h"
43#include <sys/nskernd.h>
44
45#include <sys/sdt.h>		/* dtrace is S10 or later */
46
47#ifdef DS_DDICT
48#include "../contract.h"
49#endif
50
51/*
52 * Instant Image
53 *
54 * This file contains the core implementation of II.
55 *
56 * II is implemented as a simple filter module that pushes itself between
57 * user (SV, STE, etc.) and SDBC or NET.
58 *
59 */
60
61
62#define	REMOTE_VOL(s, ip)	(((s) && ((ip->bi_flags)&DSW_SHDEXPORT)) || \
63				    (!(s)&&((ip->bi_flags)&DSW_SHDIMPORT)))
64
65#define	total_ref(ip)	((ip->bi_shdref + ip->bi_shdrref + ip->bi_bmpref) + \
66			    (NSHADOWS(ip) ? 0 : ip->bi_mstref + ip->bi_mstrref))
67
68
69#define	II_TAIL_COPY(d, s, m, t)	bcopy(&(s.m), &(d.m), \
70					sizeof (d) - (uint_t)&((t *)0)->m)
71extern dev_info_t *ii_dip;
72
73#define	II_LINK_CLUSTER(ip, cluster) \
74	_ii_ll_add(ip, &_ii_cluster_mutex, &_ii_cluster_top, cluster, \
75	    &ip->bi_cluster)
76#define	II_UNLINK_CLUSTER(ip) \
77	_ii_ll_remove(ip, &_ii_cluster_mutex, &_ii_cluster_top, &ip->bi_cluster)
78
79#define	II_LINK_GROUP(ip, group) \
80	_ii_ll_add(ip, &_ii_group_mutex, &_ii_group_top, group, &ip->bi_group)
81#define	II_UNLINK_GROUP(ip) \
82	_ii_ll_remove(ip, &_ii_group_mutex, &_ii_group_top, &ip->bi_group)
83
84_ii_info_t *_ii_info_top;
85_ii_info_t *_ii_mst_top = 0;
86_ii_overflow_t	*_ii_overflow_top;
87_ii_lsthead_t *_ii_cluster_top;
88_ii_lsthead_t *_ii_group_top;
89
90int	ii_debug;		/* level of cmn_err noise */
91int	ii_bitmap;		/* bitmap operations switch */
92uint_t	ii_header = 16;		/* Undocumented tunable (with adb!), start */
93				/* of area cleared in volume when a dependent */
94				/* shadow is disabled. */
95				/* max # of chunks in copy loop before delay */
96int	ii_throttle_unit = MIN_THROTTLE_UNIT;
97				/* length of delay during update loop */
98int	ii_throttle_delay = MIN_THROTTLE_DELAY;
99int	ii_copy_direct = 1;
100int	ii_nconcopy = 10;	/* default value when starting with no cache */
101kmutex_t _ii_cluster_mutex;
102kmutex_t _ii_group_mutex;
103
104static int _ii_shutting_down = 0;
105static nsc_io_t *_ii_io, *_ii_ior;
106static nsc_mem_t *_ii_local_mem;
107static nsc_def_t _ii_fd_def[], _ii_io_def[], _ii_ior_def[];
108static kmutex_t	_ii_info_mutex;
109static kmutex_t	_ii_overflow_mutex;
110static kmutex_t _ii_config_mutex;
111static _ii_bmp_ops_t alloc_buf_bmp, kmem_buf_bmp;
112static nsc_svc_t *ii_volume_update;	/* IIVolumeUpdate token */
113static nsc_svc_t *ii_report_luns;	/* IIReportLuns token */
114static nsc_svc_t *ii_get_initiators;	/* IIGetInitiators token */
115static ksema_t	_ii_concopy_sema;
116static int	_ii_concopy_init = 0;
117static int	_ii_instance = 0;
118
119void _ii_deinit_dev();
120
121static void _ii_info_free(_ii_info_t *ip);
122static void _ii_info_freeshd(_ii_info_t *ip);
123static void ii_sibling_free(_ii_info_t *ip);
124ii_header_t *_ii_bm_header_get(_ii_info_t *ip, nsc_buf_t **tmp);
125int _ii_bm_header_put(ii_header_t *hdr, _ii_info_t *ip,
126    nsc_buf_t *tmp);
127static void _ii_bm_header_free(ii_header_t *hdr, _ii_info_t *ip,
128    nsc_buf_t *tmp);
129static int _ii_copyvol(_ii_info_t *, int, int, spcs_s_info_t, int);
130static void _ii_stopvol(_ii_info_t *ip);
131static int _ii_stopcopy(_ii_info_t *ip);
132static _ii_info_t *_ii_find_set(char *volume);
133static _ii_info_t *_ii_find_vol(char *, int);
134static _ii_overflow_t *_ii_find_overflow(char *volume);
135static void _ii_ioctl_done(_ii_info_t *ip);
136static void _ii_lock_chunk(_ii_info_t *ip, chunkid_t);
137static void _ii_unlock_chunks(_ii_info_t *ip, chunkid_t, int);
138void _ii_error(_ii_info_t *ip, int error_type);
139static nsc_buf_t *_ii_alloc_handle(void (*d_cb)(), void (*r_cb)(),
140    void (*w_cb)(), ii_fd_t *bfd);
141static int _ii_free_handle(ii_buf_t *h, ii_fd_t *bfd);
142extern nsc_size_t ii_btsize(nsc_size_t);
143extern int ii_tinit(_ii_info_t *);
144extern chunkid_t ii_tsearch(_ii_info_t *, chunkid_t);
145extern void ii_tdelete(_ii_info_t *, chunkid_t);
146extern void ii_reclaim_overflow(_ii_info_t *);
147static void ii_overflow_free(_ii_info_t *ip, int disable);
148static int ii_overflow_attach(_ii_info_t *, char *, int);
149int _ii_nsc_io(_ii_info_t *, int, nsc_fd_t *, int, nsc_off_t, unsigned char *,
150	nsc_size_t);
151static nsc_path_t *_ii_register_path(char *path, int type, nsc_io_t *io);
152static int _ii_unregister_path(nsc_path_t *sp, int flag, char *type);
153static int _ii_reserve_begin(_ii_info_t *ip);
154static int _ii_wait_for_it(_ii_info_t *ip);
155static void _ii_reserve_end(_ii_info_t *ip);
156static kstat_t *_ii_overflow_kstat_create(_ii_info_t *ip, _ii_overflow_t *op);
157static int _ii_ll_add(_ii_info_t *, kmutex_t *, _ii_lsthead_t **, char *,
158    char **);
159static int _ii_ll_remove(_ii_info_t *, kmutex_t *, _ii_lsthead_t **, char **);
160#define	_ii_unlock_chunk(ip, chunk)	_ii_unlock_chunks(ip, chunk, 1)
161extern const int dsw_major_rev;
162extern const int dsw_minor_rev;
163extern const int dsw_micro_rev;
164extern const int dsw_baseline_rev;
165
166/*
167 * These constants are used by ii_overflow_free() to indicate how the
168 * reclamation should take place.
169 *	NO_RECLAIM: just detach the overflow from the set; do not
170 *		attempt to reclaim chunks, do not decrement the
171 *		used-by count
172 *	RECLAIM: reclaim all chunks before decrementing the used-by count
173 *	INIT_OVR: decrement the used-by count only; do not reclaim chunks
174 */
175
176#define	NO_RECLAIM 0
177#define	RECLAIM 1
178#define	INIT_OVR 2
179
180struct	copy_args {			/* arguments passed to copy process */
181	_ii_info_t *ip;
182	int flag;
183	int rtype;
184	int wait;
185	spcs_s_info_t kstatus;
186	int rc;
187};
188
189/* set-specific kstats info */
190ii_kstat_set_t ii_kstat_set = {
191	{ DSW_SKSTAT_SIZE, KSTAT_DATA_ULONG },
192	{ DSW_SKSTAT_MTIME, KSTAT_DATA_ULONG },
193	{ DSW_SKSTAT_FLAGS, KSTAT_DATA_ULONG },
194	{ DSW_SKSTAT_THROTTLE_UNIT, KSTAT_DATA_ULONG },
195	{ DSW_SKSTAT_THROTTLE_DELAY, KSTAT_DATA_ULONG },
196	{ DSW_SKSTAT_SHDCHKS, KSTAT_DATA_ULONG },
197	{ DSW_SKSTAT_SHDCHKUSED, KSTAT_DATA_ULONG },
198	{ DSW_SKSTAT_SHDBITS, KSTAT_DATA_ULONG },
199	{ DSW_SKSTAT_COPYBITS, KSTAT_DATA_ULONG },
200	{ DSW_SKSTAT_MSTA, KSTAT_DATA_CHAR },
201	{ DSW_SKSTAT_MSTB, KSTAT_DATA_CHAR },
202	{ DSW_SKSTAT_MSTC, KSTAT_DATA_CHAR },
203	{ DSW_SKSTAT_MSTD, KSTAT_DATA_CHAR },
204	{ DSW_SKSTAT_SETA, KSTAT_DATA_CHAR },
205	{ DSW_SKSTAT_SETB, KSTAT_DATA_CHAR },
206	{ DSW_SKSTAT_SETC, KSTAT_DATA_CHAR },
207	{ DSW_SKSTAT_SETD, KSTAT_DATA_CHAR },
208	{ DSW_SKSTAT_BMPA, KSTAT_DATA_CHAR },
209	{ DSW_SKSTAT_BMPB, KSTAT_DATA_CHAR },
210	{ DSW_SKSTAT_BMPC, KSTAT_DATA_CHAR },
211	{ DSW_SKSTAT_BMPD, KSTAT_DATA_CHAR },
212	{ DSW_SKSTAT_OVRA, KSTAT_DATA_CHAR },
213	{ DSW_SKSTAT_OVRB, KSTAT_DATA_CHAR },
214	{ DSW_SKSTAT_OVRC, KSTAT_DATA_CHAR },
215	{ DSW_SKSTAT_OVRD, KSTAT_DATA_CHAR },
216	{ DSW_SKSTAT_MSTIO, KSTAT_DATA_CHAR },
217	{ DSW_SKSTAT_SHDIO, KSTAT_DATA_CHAR },
218	{ DSW_SKSTAT_BMPIO, KSTAT_DATA_CHAR },
219	{ DSW_SKSTAT_OVRIO, KSTAT_DATA_CHAR },
220};
221
222/*
223 * _ii_init_dev
224 *	Initialise the shadow driver
225 *
226 */
227
228int
229_ii_init_dev()
230{
231	_ii_io = nsc_register_io("ii", NSC_II_ID|NSC_REFCNT|NSC_FILTER,
232	    _ii_io_def);
233	if (_ii_io == NULL)
234		cmn_err(CE_WARN, "!ii: nsc_register_io failed.");
235
236	_ii_ior = nsc_register_io("ii-raw", NSC_IIR_ID|NSC_REFCNT|NSC_FILTER,
237	    _ii_ior_def);
238	if (_ii_ior == NULL)
239		cmn_err(CE_WARN, "!ii: nsc_register_io r failed.");
240
241	_ii_local_mem = nsc_register_mem("ii:kmem", NSC_MEM_LOCAL, 0);
242	if (_ii_local_mem == NULL)
243		cmn_err(CE_WARN, "!ii: nsc_register_mem failed.");
244
245
246	if (!_ii_io || !_ii_ior || !_ii_local_mem) {
247		_ii_deinit_dev();
248		return (ENOMEM);
249	}
250
251	mutex_init(&_ii_info_mutex, NULL, MUTEX_DRIVER, NULL);
252	mutex_init(&_ii_overflow_mutex, NULL, MUTEX_DRIVER, NULL);
253	mutex_init(&_ii_config_mutex, NULL, MUTEX_DRIVER, NULL);
254	mutex_init(&_ii_cluster_mutex, NULL, MUTEX_DRIVER, NULL);
255	mutex_init(&_ii_group_mutex, NULL, MUTEX_DRIVER, NULL);
256
257	ii_volume_update = nsc_register_svc("RDCVolumeUpdated", 0);
258	ii_report_luns = nsc_register_svc("IIReportLuns", 0);
259	ii_get_initiators = nsc_register_svc("IIGetInitiators", 0);
260
261	if (!ii_volume_update || !ii_report_luns || !ii_get_initiators) {
262		_ii_deinit_dev();
263		return (ENOMEM);
264	}
265
266	return (0);
267}
268
269
270/*
271 * _ii_deinit_dev
272 *	De-initialise the shadow driver
273 *
274 */
275
276void
277_ii_deinit_dev()
278{
279
280	if (_ii_io)
281		(void) nsc_unregister_io(_ii_io, 0);
282
283	if (_ii_ior)
284		(void) nsc_unregister_io(_ii_ior, 0);
285
286	if (_ii_local_mem)
287		(void) nsc_unregister_mem(_ii_local_mem);
288
289	if (ii_volume_update)
290		(void) nsc_unregister_svc(ii_volume_update);
291
292	if (ii_report_luns)
293		(void) nsc_unregister_svc(ii_report_luns);
294
295	if (ii_get_initiators)
296		(void) nsc_unregister_svc(ii_get_initiators);
297
298	mutex_destroy(&_ii_info_mutex);
299	mutex_destroy(&_ii_overflow_mutex);
300	mutex_destroy(&_ii_config_mutex);
301	mutex_destroy(&_ii_cluster_mutex);
302	mutex_destroy(&_ii_group_mutex);
303	if (_ii_concopy_init)
304		sema_destroy(&_ii_concopy_sema);
305	_ii_concopy_init = 0;
306
307}
308
309static char *
310ii_pathname(nsc_fd_t *fd)
311{
312	char *rc;
313
314	if (fd == NULL || (rc = nsc_pathname(fd)) == NULL)
315		return ("");
316	else
317		return (rc);
318}
319
320
321/*
322 * _ii_rlse_d
323 *	Internal mechanics of _ii_rlse_devs().  Takes care of
324 *	resetting the ownership information as required.
325 */
326
327static void
328_ii_rlse_d(ip, mst, raw)
329_ii_info_t *ip;
330int mst, raw;
331{
332	_ii_info_dev_t *cip;
333	_ii_info_dev_t *rip;
334
335	rip = mst ? (ip->bi_mstrdev) : &(ip->bi_shdrdev);
336	cip = mst ? (ip->bi_mstdev) : &(ip->bi_shddev);
337
338	DTRACE_PROBE2(_ii_rlse_d_type,
339			_ii_info_dev_t *, rip,
340			_ii_info_dev_t *, cip);
341
342
343	if (RSRV(cip)) {
344		if (raw) {
345			ASSERT(cip->bi_orsrv > 0);
346			cip->bi_orsrv--;
347		} else {
348			ASSERT(cip->bi_rsrv > 0);
349			cip->bi_rsrv--;
350		}
351
352		if (cip->bi_rsrv > 0) {
353			nsc_set_owner(cip->bi_fd, cip->bi_iodev);
354		} else if (cip->bi_orsrv > 0) {
355			nsc_set_owner(cip->bi_fd, rip->bi_iodev);
356		} else {
357			nsc_set_owner(cip->bi_fd, NULL);
358		}
359
360		if (!RSRV(cip)) {
361			nsc_release(cip->bi_fd);
362		}
363	} else {
364		if (raw) {
365			ASSERT(rip->bi_rsrv > 0);
366			rip->bi_rsrv--;
367		} else {
368			ASSERT(rip->bi_orsrv > 0);
369			rip->bi_orsrv--;
370		}
371
372		if (rip->bi_rsrv > 0) {
373			nsc_set_owner(rip->bi_fd, rip->bi_iodev);
374		} else if (rip->bi_orsrv > 0) {
375			nsc_set_owner(rip->bi_fd, cip->bi_iodev);
376		} else {
377			nsc_set_owner(rip->bi_fd, NULL);
378		}
379
380		if (!RSRV(rip)) {
381			rip->bi_flag = 0;
382			nsc_release(rip->bi_fd);
383			cv_broadcast(&ip->bi_releasecv);
384		}
385	}
386
387}
388
389
390/*
391 * _ii_rlse_devs
392 *	Release named underlying devices.
393 *
394 *	NOTE: the 'devs' argument must be the same as that passed to
395 *	the preceding _ii_rsrv_devs call.
396 */
397
398void
399_ii_rlse_devs(ip, devs)
400_ii_info_t *ip;
401int devs;
402{
403
404	ASSERT(!(devs & (MST|SHD)));
405
406	ASSERT(ip->bi_head != (_ii_info_t *)0xdeadbeef);
407	if (!ip) {
408		cmn_err(CE_WARN, "!ii: _ii_rlse_devs null ip");
409		return;
410	}
411
412	mutex_enter(&ip->bi_rsrvmutex);
413
414	DTRACE_PROBE(_ii_rlse_devs_mutex);
415
416	if ((devs&(MST|MSTR)) != 0 && (ip->bi_flags&DSW_SHDIMPORT) == 0) {
417		if (NSHADOWS(ip) && ip != ip->bi_master)
418			_ii_rlse_devs(ip->bi_master, devs&(MST|MSTR));
419		else
420			_ii_rlse_d(ip, 1, (devs&MSTR));
421	}
422
423	if ((devs&(SHD|SHDR)) != 0 && (ip->bi_flags&DSW_SHDEXPORT) == 0) {
424		_ii_rlse_d(ip, 0, (devs&SHDR));
425	}
426
427	if ((devs&BMP) != 0 && ip->bi_bmpfd) {
428		if (--(ip->bi_bmprsrv) == 0)
429			nsc_release(ip->bi_bmpfd);
430	}
431
432	ASSERT(ip->bi_bmprsrv >= 0);
433	ASSERT(ip->bi_shdrsrv >= 0);
434	ASSERT(ip->bi_shdrrsrv >= 0);
435	mutex_exit(&ip->bi_rsrvmutex);
436
437}
438
439
440/*
441 * _ii_rsrv_d
442 *	Reserve device flagged, unless its companion is already reserved,
443 *	in that case increase the reserve on the companion.
444 */
445
446static int
447_ii_rsrv_d(int raw, _ii_info_dev_t *rid, _ii_info_dev_t *cid, int flag,
448    _ii_info_t *ip)
449{
450	_ii_info_dev_t *p = NULL;
451	int other = 0;
452	int rc;
453
454	/*
455	 * If user wants to do a cache reserve and it's already
456	 * raw reserved, we need to do a real nsc_reserve, so wait
457	 * until the release has been done.
458	 */
459	if (RSRV(rid) && (flag == II_EXTERNAL) &&
460	    (raw == 0) && (rid->bi_flag != II_EXTERNAL)) {
461		ip->bi_release++;
462		while (RSRV(rid)) {
463			DTRACE_PROBE1(_ii_rsrv_d_wait, _ii_info_dev_t *, rid);
464			cv_wait(&ip->bi_releasecv, &ip->bi_rsrvmutex);
465			DTRACE_PROBE1(_ii_rsrv_d_resume, _ii_info_dev_t *, rid);
466		}
467		ip->bi_release--;
468	}
469
470	if (RSRV(rid)) {
471		p = rid;
472		if (!raw) {
473			other = 1;
474		}
475	} else if (RSRV(cid)) {
476		p = cid;
477		if (raw) {
478			other = 1;
479		}
480	}
481
482	if (p) {
483		if (other) {
484			p->bi_orsrv++;
485		} else {
486			p->bi_rsrv++;
487		}
488
489		if (p->bi_iodev) {
490			nsc_set_owner(p->bi_fd, p->bi_iodev);
491		}
492
493		return (0);
494	}
495	p = raw ? rid : cid;
496
497	if ((rc = nsc_reserve(p->bi_fd, 0)) == 0) {
498		if (p->bi_iodev) {
499			nsc_set_owner(p->bi_fd, p->bi_iodev);
500		}
501		p->bi_rsrv++;
502		if (raw)
503			p->bi_flag = flag;
504	}
505
506	return (rc);
507}
508
509/*
510 * _ii_rsrv_devs
511 *	Reserve named underlying devices.
512 *
513 */
514
515int
516_ii_rsrv_devs(_ii_info_t *ip, int devs, int flag)
517{
518	int rc = 0;
519	int got = 0;
520
521	ASSERT(!(devs & (MST|SHD)));
522
523	if (!ip) {
524		cmn_err(CE_WARN, "!ii: _ii_rsrv_devs null ip");
525		return (EINVAL);
526	}
527
528	mutex_enter(&ip->bi_rsrvmutex);
529
530	DTRACE_PROBE(_ii_rsrv_devs_mutex);
531
532	if (rc == 0 && (devs&(MST|MSTR)) != 0 &&
533	    (ip->bi_flags&DSW_SHDIMPORT) == 0) {
534		DTRACE_PROBE(_ii_rsrv_devs_master);
535		if (NSHADOWS(ip) && ip != ip->bi_master) {
536			if ((rc = _ii_rsrv_devs(ip->bi_master, devs&(MST|MSTR),
537			    flag)) != 0) {
538				cmn_err(CE_WARN,
539				    "!ii: nsc_reserve multi-master failed");
540			} else {
541				got |= devs&(MST|MSTR);
542			}
543		} else {
544			if ((rc = _ii_rsrv_d((devs&MSTR) != 0, ip->bi_mstrdev,
545			    ip->bi_mstdev, flag, ip)) != 0) {
546				cmn_err(CE_WARN,
547				    "!ii: nsc_reserve master failed %d", rc);
548			} else {
549				got |= (devs&(MST|MSTR));
550			}
551		}
552	}
553
554	if (rc == 0 && (devs&(SHD|SHDR)) != 0 &&
555	    (ip->bi_flags&DSW_SHDEXPORT) == 0) {
556		DTRACE_PROBE(_ii_rsrv_devs_shadow);
557		if ((rc = _ii_rsrv_d((devs&SHDR) != 0, &ip->bi_shdrdev,
558		    &ip->bi_shddev, flag, ip)) != 0) {
559			cmn_err(CE_WARN,
560			    "!ii: nsc_reserve shadow failed %d", rc);
561		} else {
562			got |= (devs&(SHD|SHDR));
563		}
564	}
565
566	if (rc == 0 && (devs&BMP) != 0 && ip->bi_bmpfd) {
567		DTRACE_PROBE(_ii_rsrv_devs_bitmap);
568		if ((ip->bi_bmprsrv == 0) &&
569		    (rc = nsc_reserve(ip->bi_bmpfd, 0)) != 0) {
570			cmn_err(CE_WARN,
571			    "!ii: nsc_reserve bitmap failed %d", rc);
572		} else {
573			(ip->bi_bmprsrv)++;
574			got |= BMP;
575		}
576	}
577	mutex_exit(&ip->bi_rsrvmutex);
578	if (rc != 0 && got != 0)
579		_ii_rlse_devs(ip, got);
580
581	return (rc);
582}
583
584static int
585_ii_reserve_begin(_ii_info_t *ip)
586{
587	int rc;
588
589	mutex_enter(&ip->bi_rlsemutex);
590	if ((rc = _ii_wait_for_it(ip)) == 0) {
591		++ip->bi_rsrvcnt;
592	}
593	mutex_exit(&ip->bi_rlsemutex);
594
595	return (rc);
596}
597
598static int
599_ii_wait_for_it(_ii_info_t *ip)
600{
601	int nosig;
602
603	nosig = 1;
604	while (ip->bi_rsrvcnt > 0) {
605		nosig = cv_wait_sig(&ip->bi_reservecv, &ip->bi_rlsemutex);
606		if (!nosig) {
607			break;
608		}
609	}
610
611	return (nosig? 0 : EINTR);
612}
613
614static void
615_ii_reserve_end(_ii_info_t *ip)
616{
617	mutex_enter(&ip->bi_rlsemutex);
618	if (ip->bi_rsrvcnt <= 0) {
619		mutex_exit(&ip->bi_rlsemutex);
620		return;
621	}
622	--ip->bi_rsrvcnt;
623	mutex_exit(&ip->bi_rlsemutex);
624	cv_broadcast(&ip->bi_reservecv);
625
626}
627
628static int
629ii_fill_copy_bmp(_ii_info_t *ip)
630{
631	int rc;
632	chunkid_t max_chunk, chunk_num;
633
634	if ((rc = II_FILL_COPY_BMP(ip)) != 0)
635		return (rc);
636	/*
637	 * make certain that the last bits of the last byte of the bitmap
638	 * aren't filled as they may be copied out to the user.
639	 */
640
641	chunk_num = ip->bi_size / DSW_SIZE;
642	if ((ip->bi_size % DSW_SIZE) != 0)
643		++chunk_num;
644
645	max_chunk = chunk_num;
646	if ((max_chunk & 0x7) != 0)
647		max_chunk = (max_chunk + 7) & ~7;
648
649	DTRACE_PROBE2(_ii_fill_copy_bmp_chunks, chunkid_t, chunk_num,
650	    chunkid_t, max_chunk);
651
652	for (; chunk_num < max_chunk; chunk_num++) {
653		(void) II_CLR_COPY_BIT(ip, chunk_num);
654	}
655
656	return (0);
657}
658
659static int
660ii_update_denied(_ii_info_t *ip, spcs_s_info_t kstatus,
661				int direction, int all)
662{
663	rdc_update_t update;
664	int size;
665	unsigned char *bmp;
666
667	update.volume = direction == CV_SHD2MST ? ii_pathname(MSTFD(ip)) :
668	    ip->bi_keyname;
669	update.denied = 0;
670	update.protocol = RDC_SVC_ONRETURN;
671	update.size = size = FBA_SIZE(DSW_BM_FBA_LEN(ip->bi_size));
672	update.status = kstatus;
673	update.bitmap = bmp = kmem_alloc(update.size, KM_SLEEP);
674	if (bmp == NULL) {
675		spcs_s_add(kstatus, ENOMEM);
676		return (1);
677	}
678
679	DTRACE_PROBE2(_ii_update_denied, int, all, int, size);
680
681	if (all) {
682		while (size-- > 0)
683			*bmp++ = (unsigned char)0xff;
684	} else {
685		if (II_CHANGE_BMP(ip, update.bitmap) != 0) {
686			/* failed to read bitmap */
687			spcs_s_add(kstatus, EIO);
688			update.denied = 1;
689		}
690	}
691
692	/* check that no user of volume objects */
693	if (update.denied == 0) {
694		(void) nsc_call_svc(ii_volume_update, (intptr_t)&update);
695	}
696	kmem_free(update.bitmap, FBA_SIZE(DSW_BM_FBA_LEN(ip->bi_size)));
697
698	return (update.denied);
699}
700
701static int
702ii_need_same_size(_ii_info_t *ip)
703{
704	rdc_update_t update;
705
706	update.volume = ip->bi_keyname;
707	update.denied = 0;
708	update.protocol = RDC_SVC_VOL_ENABLED;
709
710	(void) nsc_call_svc(ii_volume_update, (intptr_t)&update);
711
712	return (update.denied);
713}
714
715/*
716 * ii_volume:	check if vol is already known to Instant Image and return
717 *	volume type if it is.
718 */
719
720static int
721ii_volume(char *vol, int locked)
722{
723	_ii_info_t *ip;
724	_ii_overflow_t	*op;
725	int rc = NONE;
726
727	/* scan overflow volume list */
728	mutex_enter(&_ii_overflow_mutex);
729
730	DTRACE_PROBE(_ii_volume_mutex);
731
732	for (op = _ii_overflow_top; op; op = op->ii_next) {
733		if (strcmp(vol, op->ii_volname) == 0)
734			break;
735	}
736	mutex_exit(&_ii_overflow_mutex);
737	if (op) {
738		return (OVR);
739	}
740
741	if (!locked) {
742		mutex_enter(&_ii_info_mutex);
743	}
744
745	DTRACE_PROBE(_ii_volume_mutex2);
746
747	for (ip = _ii_info_top; ip; ip = ip->bi_next) {
748		if (strcmp(vol, ii_pathname(ip->bi_mstfd)) == 0) {
749			rc = MST;
750			break;
751		}
752		if (strcmp(vol, ip->bi_keyname)  == 0) {
753			rc = SHD;
754			break;
755		}
756		if (strcmp(vol, ii_pathname(ip->bi_bmpfd)) == 0) {
757			rc = BMP;
758			break;
759		}
760	}
761	DTRACE_PROBE1(_ii_volume_data, int, rc);
762
763	if (!locked) {
764		mutex_exit(&_ii_info_mutex);
765	}
766
767	return (rc);
768}
769
770/*
771 * ii_open_shadow: open shadow volume for both cached and raw access,
772 *	if the normal device open fails attempt a file open to allow
773 *	shadowing into a file.
774 */
775
776static int
777ii_open_shadow(_ii_info_t *ip, char *shadow_vol)
778{
779	int rc = 0;
780	int file_rc = 0;
781
782	ip->bi_shdfd = nsc_open(shadow_vol,
783	    NSC_IIR_ID|NSC_DEVICE|NSC_RDWR, _ii_fd_def,
784	    (blind_t)&(ip->bi_shddev), &rc);
785	if (!ip->bi_shdfd) {
786		ip->bi_shdfd = nsc_open(shadow_vol,
787		    NSC_IIR_ID|NSC_FILE|NSC_RDWR, _ii_fd_def,
788		    (blind_t)&(ip->bi_shddev), &file_rc);
789		file_rc = 1;
790		if (!ip->bi_shdfd) {
791			return (rc);
792		}
793		DTRACE_PROBE(_ii_open_shadow);
794	}
795	else
796		DTRACE_PROBE(_ii_open_shadow);
797
798	if (file_rc == 0) {
799		ip->bi_shdrfd = nsc_open(shadow_vol,
800		    NSC_IIR_ID|NSC_DEVICE|NSC_RDWR, _ii_fd_def,
801		    (blind_t)&(ip->bi_shdrdev), &rc);
802		DTRACE_PROBE(_ii_open_shadow);
803	} else {
804		ip->bi_shdrfd = nsc_open(shadow_vol,
805		    NSC_IIR_ID|NSC_FILE|NSC_RDWR, _ii_fd_def,
806		    (blind_t)&(ip->bi_shdrdev), &rc);
807		DTRACE_PROBE(_ii_open_shadow);
808	}
809
810	if (!ip->bi_shdrfd) {
811		(void) nsc_close(ip->bi_shdfd);
812		DTRACE_PROBE(_ii_open_shadow);
813		return (rc);
814	}
815
816	return (0);
817}
818
819static void
820ii_register_shd(_ii_info_t *ip)
821{
822	ip->bi_shd_tok = _ii_register_path(ip->bi_keyname,
823	    NSC_CACHE, _ii_io);
824	ip->bi_shdr_tok = _ii_register_path(ip->bi_keyname,
825	    NSC_DEVICE, _ii_ior);
826
827}
828
829static void
830ii_register_mst(_ii_info_t *ip)
831{
832	ip->bi_mst_tok = _ii_register_path(ii_pathname(ip->bi_mstfd),
833	    NSC_CACHE, _ii_io);
834	ip->bi_mstr_tok = _ii_register_path(ii_pathname(ip->bi_mstrfd),
835	    NSC_DEVICE, _ii_ior);
836
837}
838
839static int
840ii_register_ok(_ii_info_t *ip)
841{
842	int rc;
843	int sibling;
844	int exported;
845
846	rc = 1;
847	sibling = NSHADOWS(ip) && ip != ip->bi_head;
848	exported = ip->bi_flags & DSW_SHDEXPORT;
849
850	if ((ip->bi_bmpfd && !ip->bi_bmp_tok) || (!exported && (
851	    !ip->bi_shd_tok || !ip->bi_shdr_tok)))
852		rc = 0;
853	else if (!sibling && (!ip->bi_mst_tok || !ip->bi_mstr_tok))
854		rc = 0;
855
856	return (rc);
857}
858
859#ifndef DISABLE_KSTATS
860
861/*
862 * _ii_kstat_create
863 *	Create and install kstat_io data
864 *
865 * Calling/Exit State:
866 *	Returns 0 if kstats couldn't be created, otherwise it returns
867 *	a pointer to the created kstat_t.
868 */
869
870static kstat_t *
871_ii_kstat_create(_ii_info_t *ip, char *type)
872{
873	kstat_t *result;
874	char name[ IOSTAT_NAME_LEN ];
875	int setnum;
876	char *nptr;
877	static int mstnum = 0;
878	static int shdbmpnum = -1;
879
880	switch (*type) {
881	case 'm':
882		setnum = mstnum++;
883		nptr = ip->bi_kstat_io.mstio;
884		break;
885	case 's':
886		/* assumption: shadow kstats created before bitmap */
887		setnum = ++shdbmpnum;
888		nptr = ip->bi_kstat_io.shdio;
889		break;
890	case 'b':
891		setnum = shdbmpnum;
892		nptr = ip->bi_kstat_io.bmpio;
893		break;
894	default:
895		cmn_err(CE_WARN, "!Unable to determine kstat type (%c)", *type);
896		setnum = -1;
897		break;
898	}
899	/*
900	 * The name of the kstat, defined below, is designed to work
901	 * with the 'iostat -x' command.  This command leaves only
902	 * 9 characters for the name, and the kstats built in to Solaris
903	 * all seem to be of the form <service><number>.  For that
904	 * reason, we have chosen ii<type><number>, where <type> is
905	 * m, s, b, or o (for master, shadow, bitmap, and overflow
906	 * respectively), and the number is monotonically increasing from
907	 * 0 for each time one of those <type>s are created.  Note that
908	 * the shadow and bitmap are always created in pairs and so, for
909	 * any given set, they will have the same <number>.
910	 */
911	(void) sprintf(name, "ii%c%d", *type, setnum);
912	(void) strncpy(nptr, name, IOSTAT_NAME_LEN);
913	result = kstat_create("ii", 0, name, "disk", KSTAT_TYPE_IO, 1, 0);
914	if (result) {
915		result->ks_private = ip;
916		result->ks_lock = &ip->bi_kstat_io.statmutex;
917		kstat_install(result);
918	} else {
919		cmn_err(CE_WARN, "!Unable to create %s kstats for set %s", type,
920		    ip->bi_keyname);
921	}
922
923	return (result);
924}
925
926/*
927 * _ii_overflow_kstat_create
928 *	Create and install kstat_io data for an overflow volume
929 *
930 * Calling/Exit State:
931 *	Returns 0 if kstats couldn't be created, otherwise it returns
932 *	a pointer to the created kstat_t.
933 *
934 * See comments in _ii_kstat_create for additional information.
935 *
936 */
937static kstat_t *
938_ii_overflow_kstat_create(_ii_info_t *ip, _ii_overflow_t *op)
939{
940	kstat_t *result;
941	char *nptr;
942	char name [IOSTAT_NAME_LEN];
943	static int ovrnum = 0;
944	int setnum = ovrnum++;
945
946	nptr = ip->bi_kstat_io.ovrio;
947
948	(void) sprintf(name, "iio%d", setnum);
949	(void) strncpy(nptr, name, IOSTAT_NAME_LEN);
950
951	mutex_init(&op->ii_kstat_mutex, NULL, MUTEX_DRIVER, NULL);
952
953	if ((result =
954	    kstat_create("ii", 0, name, "disk", KSTAT_TYPE_IO, 1, 0))) {
955		result->ks_private = ip;
956		result->ks_lock = &op->ii_kstat_mutex;
957		kstat_install(result);
958	} else {
959		mutex_destroy(&op->ii_kstat_mutex);
960		cmn_err(CE_WARN, "!Unabled to create overflow kstat for set "
961		    "%s", ip->bi_keyname);
962	}
963
964	return (result);
965}
966
967#endif
968
969static void
970ii_str_kstat_copy(char *str, char *p1, char *p2, char *p3, char *p4)
971{
972	static int whinged = 0;
973	char *part[ 4 ];
974	char fulldata[ DSW_NAMELEN ];
975	int i, offset, remain;
976	int num_parts;
977	int leftover;
978	int kscharsize = KSTAT_DATA_CHAR_LEN - 1;
979
980	/*
981	 * NOTE: the following lines must be changed if DSW_NAMELEN
982	 * ever changes.  You'll need a part[] for every kscharsize
983	 * characters (or fraction thereof).  The ii_kstat_set_t
984	 * definition in dsw_dev.h will also need new ovr_? entries.
985	 */
986	part[ 0 ] = p1;
987	part[ 1 ] = p2;
988	part[ 2 ] = p3;
989	part[ 3 ] = p4;
990
991	bzero(fulldata, DSW_NAMELEN);
992	if (str) {
993		(void) strncpy(fulldata, str, DSW_NAMELEN);
994	}
995
996	num_parts = DSW_NAMELEN / kscharsize;
997	leftover = DSW_NAMELEN % kscharsize;
998	if (leftover) {
999		++num_parts;
1000	}
1001
1002	if (num_parts > sizeof (part) / sizeof (part[0])) {
1003		/*
1004		 * DSW_NAMELEN is 64 and kscharsize is 15.
1005		 * It's always "whinged"
1006		 */
1007		if (!whinged) {
1008#ifdef DEBUG
1009			cmn_err(CE_WARN, "!May not have enough room "
1010			    "to store volume name in kstats");
1011#endif
1012			whinged = 1;
1013		}
1014		num_parts = sizeof (part) / sizeof (part[0]);
1015	}
1016
1017	offset = 0;
1018	remain = DSW_NAMELEN;
1019	for (i = 0; i < num_parts; i++) {
1020		int to_copy = remain > kscharsize? kscharsize : remain;
1021		bcopy(&fulldata[ offset ], part[ i ], to_copy);
1022		offset += to_copy;
1023		remain -= to_copy;
1024	}
1025}
1026
1027static int
1028ii_set_stats_update(kstat_t *ksp, int rw)
1029{
1030	_ii_info_t *ip = (_ii_info_t *)ksp->ks_private;
1031	ii_kstat_set_t *kp = (ii_kstat_set_t *)ksp->ks_data;
1032
1033	if (KSTAT_WRITE == rw) {
1034		return (EACCES);
1035	}
1036
1037	/* copy values over */
1038	kp->size.value.ul = ip->bi_size;
1039	kp->flags.value.ul = ip->bi_flags;
1040	kp->unit.value.ul = ip->bi_throttle_unit;
1041	kp->delay.value.ul = ip->bi_throttle_delay;
1042	kp->mtime.value.ul = ip->bi_mtime;
1043
1044	/* update bitmap counters if necessary */
1045	if (ip->bi_state & DSW_CNTCPYBITS) {
1046		ip->bi_copybits = 0;
1047		if (_ii_rsrv_devs(ip, BMP, II_INTERNAL) == 0) {
1048			ip->bi_state &= ~DSW_CNTCPYBITS;
1049			II_CNT_BITS(ip, ip->bi_copyfba,
1050			    &ip->bi_copybits,
1051			    DSW_BM_SIZE_BYTES(ip));
1052			_ii_rlse_devs(ip, BMP);
1053		}
1054	}
1055
1056	if (ip->bi_state & DSW_CNTSHDBITS) {
1057		ip->bi_shdbits = 0;
1058		if (_ii_rsrv_devs(ip, BMP, II_INTERNAL) == 0) {
1059			ip->bi_state &= ~DSW_CNTSHDBITS;
1060			II_CNT_BITS(ip, ip->bi_shdfba,
1061			    &ip->bi_shdbits,
1062			    DSW_BM_SIZE_BYTES(ip));
1063			_ii_rlse_devs(ip, BMP);
1064		}
1065	}
1066
1067	kp->copybits.value.ul = ip->bi_copybits;
1068	kp->shdbits.value.ul = ip->bi_shdbits;
1069
1070	/* copy volume names */
1071	ii_str_kstat_copy(ii_pathname(MSTFD(ip)),
1072	    kp->mst_a.value.c, kp->mst_b.value.c,
1073	    kp->mst_c.value.c, kp->mst_d.value.c);
1074
1075	ii_str_kstat_copy(ip->bi_keyname, kp->set_a.value.c, kp->set_b.value.c,
1076	    kp->set_c.value.c, kp->set_d.value.c);
1077
1078	ii_str_kstat_copy(ii_pathname(ip->bi_bmpfd),
1079	    kp->bmp_a.value.c, kp->bmp_b.value.c,
1080	    kp->bmp_c.value.c, kp->bmp_d.value.c);
1081
1082	if (ip->bi_overflow) {
1083		ii_str_kstat_copy(ip->bi_overflow->ii_volname,
1084		    kp->ovr_a.value.c, kp->ovr_b.value.c, kp->ovr_c.value.c,
1085		    kp->ovr_d.value.c);
1086		(void) strlcpy(kp->ovr_io.value.c, ip->bi_kstat_io.ovrio,
1087		    KSTAT_DATA_CHAR_LEN);
1088	} else {
1089		ii_str_kstat_copy("", kp->ovr_a.value.c, kp->ovr_b.value.c,
1090		    kp->ovr_c.value.c, kp->ovr_d.value.c);
1091		bzero(kp->ovr_io.value.c, KSTAT_DATA_CHAR_LEN);
1092	}
1093	if ((ip->bi_flags) & DSW_TREEMAP) {
1094		kp->shdchks.value.ul = ip->bi_shdchks;
1095		kp->shdchkused.value.ul = ip->bi_shdchkused;
1096	} else {
1097		kp->shdchks.value.ul = 0;
1098		kp->shdchkused.value.ul = 0;
1099	}
1100	/* make sure value.c are always null terminated */
1101	(void) strlcpy(kp->mst_io.value.c, ip->bi_kstat_io.mstio,
1102	    KSTAT_DATA_CHAR_LEN);
1103	(void) strlcpy(kp->shd_io.value.c, ip->bi_kstat_io.shdio,
1104	    KSTAT_DATA_CHAR_LEN);
1105	(void) strlcpy(kp->bmp_io.value.c, ip->bi_kstat_io.bmpio,
1106	    KSTAT_DATA_CHAR_LEN);
1107
1108	return (0);
1109}
1110
1111/*
1112 * _ii_config
1113 *	Configure an II device pair
1114 *
1115 * Calling/Exit State:
1116 *	Returns 0 if the pairing was configured, otherwise an
1117 *	error code. The ioctl data stucture is copied out to the user
1118 *	and contains any additional error information, and the master
1119 *	and shadow volume names if not supplied by the user.
1120 *
1121 * Description:
1122 *	Reads the user configuration structure and attempts
1123 *	to establish an II pairing. The snapshot of the master
1124 *	device is established at this point in time.
1125 */
1126
1127int
1128_ii_config(intptr_t arg, int ilp32, int *rvp, int iflags)
1129{
1130	dsw_config_t uconf;
1131	dsw_config32_t *uconf32;
1132	_ii_info_t *ip, *hip, **ipp;
1133	int rc;
1134	int type;
1135	int nshadows;
1136	int add_to_mst_top;
1137	int import;
1138	int existing;
1139	int resized;
1140	nsc_size_t mst_size, shd_size, bmp_size;
1141	nsc_off_t shdfba;
1142	nsc_off_t copyfba;
1143	int keylen, keyoffset;
1144	ii_header_t *bm_header;
1145	nsc_buf_t *tmp;
1146	spcs_s_info_t kstatus;
1147	spcs_s_info32_t ustatus32;
1148	int rtype;
1149	uint_t hints;
1150
1151	/* Import is a once only operation like an enable */
1152	ASSERT((iflags&(II_EXISTING|II_IMPORT)) != (II_EXISTING|II_IMPORT));
1153	existing = (iflags&II_EXISTING) != 0;
1154	import = (iflags&II_IMPORT) != 0;
1155	*rvp = 0;
1156	if (ilp32) {
1157		uconf32 = kmem_zalloc(sizeof (dsw_config32_t), KM_SLEEP);
1158		if (uconf32 == NULL) {
1159			return (ENOMEM);
1160		}
1161		if (copyin((void *)arg, uconf32, sizeof (*uconf32)) < 0)
1162			return (EFAULT);
1163		II_TAIL_COPY(uconf, (*uconf32), master_vol, dsw_config_t);
1164		uconf.status = (spcs_s_info_t)uconf32->status;
1165		ustatus32 = uconf32->status;
1166		kmem_free(uconf32, sizeof (dsw_config32_t));
1167	} else if (copyin((void *)arg, &uconf, sizeof (uconf)) < 0)
1168		return (EFAULT);
1169
1170	DTRACE_PROBE3(_ii_config_info, char *, uconf.master_vol,
1171	    char *, uconf.shadow_vol, char *, uconf.bitmap_vol);
1172
1173	kstatus = spcs_s_kcreate();
1174	if (kstatus == NULL)
1175		return (ENOMEM);
1176
1177	if (_ii_shutting_down)
1178		return (spcs_s_ocopyoutf(&kstatus, uconf.status,
1179		    DSW_ESHUTDOWN));
1180
1181	if (uconf.bitmap_vol[0] == 0)
1182		return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EEMPTY));
1183
1184	mutex_enter(&_ii_config_mutex);
1185	ip = nsc_kmem_zalloc(sizeof (*ip), KM_SLEEP, _ii_local_mem);
1186	if (!ip) {
1187		mutex_exit(&_ii_config_mutex);
1188		return (spcs_s_ocopyoutf(&kstatus, uconf.status, ENOMEM));
1189	}
1190	ip->bi_mstdev = nsc_kmem_zalloc(sizeof (*ip->bi_mstdev), KM_SLEEP,
1191	    _ii_local_mem);
1192	ip->bi_mstrdev = nsc_kmem_zalloc(sizeof (*ip->bi_mstdev), KM_SLEEP,
1193	    _ii_local_mem);
1194	if (ip->bi_mstdev == NULL || ip->bi_mstrdev == NULL) {
1195		mutex_exit(&_ii_config_mutex);
1196		_ii_info_free(ip);
1197		return (spcs_s_ocopyoutf(&kstatus, uconf.status, ENOMEM));
1198	}
1199
1200	ip->bi_disabled = 1;	/* mark as disabled until we are ready to go */
1201	mutex_init(&ip->bi_mutex, NULL, MUTEX_DRIVER, NULL);
1202	mutex_init(&ip->bi_bmpmutex, NULL, MUTEX_DRIVER, NULL);
1203	mutex_init(&ip->bi_rsrvmutex, NULL, MUTEX_DRIVER, NULL);
1204	mutex_init(&ip->bi_rlsemutex, NULL, MUTEX_DRIVER, NULL);
1205	mutex_init(&ip->bi_chksmutex, NULL, MUTEX_DRIVER, NULL);
1206	cv_init(&ip->bi_copydonecv, NULL, CV_DRIVER, NULL);
1207	cv_init(&ip->bi_reservecv, NULL, CV_DRIVER, NULL);
1208	cv_init(&ip->bi_releasecv, NULL, CV_DRIVER, NULL);
1209	cv_init(&ip->bi_ioctlcv, NULL, CV_DRIVER, NULL);
1210	cv_init(&ip->bi_closingcv, NULL, CV_DRIVER, NULL);
1211	cv_init(&ip->bi_busycv, NULL, CV_DRIVER, NULL);
1212	rw_init(&ip->bi_busyrw, NULL, RW_DRIVER, NULL);
1213	rw_init(&ip->bi_linkrw, NULL, RW_DRIVER, NULL);
1214	(void) strncpy(ip->bi_keyname, uconf.shadow_vol, DSW_NAMELEN);
1215	ip->bi_keyname[DSW_NAMELEN-1] = '\0';
1216	ip->bi_throttle_unit = ii_throttle_unit;
1217	ip->bi_throttle_delay = ii_throttle_delay;
1218
1219	/* First check the list to see if uconf.bitmap_vol's already there */
1220
1221	if (ii_volume(uconf.bitmap_vol, 0) != NONE) {
1222		DTRACE_PROBE(_ii_config_bmp_found);
1223		mutex_exit(&_ii_config_mutex);
1224		_ii_info_free(ip);
1225		return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EINUSE));
1226	}
1227
1228	ip->bi_bmpfd = nsc_open(uconf.bitmap_vol,
1229	    NSC_IIR_ID|NSC_FILE|NSC_RDWR, NULL, (blind_t)&(ip->bi_bmpdev), &rc);
1230	if (!ip->bi_bmpfd)
1231		ip->bi_bmpfd = nsc_open(uconf.bitmap_vol,
1232		    NSC_IIR_ID|NSC_CACHE|NSC_DEVICE|NSC_RDWR, NULL,
1233		    (blind_t)&(ip->bi_bmpdev), &rc);
1234	if (!ip->bi_bmpfd && !existing) {
1235		mutex_exit(&_ii_config_mutex);
1236		_ii_info_free(ip);
1237		spcs_s_add(kstatus, rc);
1238		DTRACE_PROBE(_ii_config_no_bmp);
1239		return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EOPEN));
1240	}
1241
1242	if (import) {
1243		uconf.flag = DSW_GOLDEN;
1244		II_FLAG_SETX(DSW_SHDIMPORT|DSW_GOLDEN, ip);
1245	}
1246
1247	if (existing) {
1248
1249		DTRACE_PROBE(_ii_config_existing);
1250		/*
1251		 * ii_config is used by enable, import and resume (existing)
1252		 * If not importing or resuming, then this must be enable.
1253		 * Indicate this fact for SNMP use.
1254		 */
1255
1256		if (!ip->bi_bmpfd) {
1257			/*
1258			 * Couldn't read bitmap, mark master and shadow as
1259			 * unusable.
1260			 */
1261			II_FLAG_ASSIGN(DSW_BMPOFFLINE|DSW_MSTOFFLINE|
1262			    DSW_SHDOFFLINE, ip);
1263
1264			/*
1265			 * Set cluster tag for this element so it can
1266			 * be suspended later
1267			 */
1268			(void) II_LINK_CLUSTER(ip, uconf.cluster_tag);
1269
1270			/* need to check on master, might be shared */
1271			goto header_checked;
1272		}
1273		/* check the header */
1274		(void) _ii_rsrv_devs(ip, BMP, II_INTERNAL);
1275
1276		/* get first block of bit map */
1277		mutex_enter(&ip->bi_mutex);
1278		bm_header = _ii_bm_header_get(ip, &tmp);
1279		mutex_exit(&ip->bi_mutex);
1280		if (bm_header == NULL) {
1281			if (ii_debug > 0)
1282				cmn_err(CE_WARN,
1283				    "!ii: _ii_bm_header_get returned NULL");
1284			mutex_exit(&_ii_config_mutex);
1285			_ii_info_free(ip);
1286			return (spcs_s_ocopyoutf(&kstatus, uconf.status,
1287			    DSW_EHDRBMP));
1288		}
1289
1290		if (bm_header->ii_magic != DSW_DIRTY &&
1291		    bm_header->ii_magic != DSW_CLEAN) {
1292			mutex_exit(&_ii_config_mutex);
1293			_ii_bm_header_free(bm_header, ip, tmp);
1294			_ii_info_free(ip);
1295			return (spcs_s_ocopyoutf(&kstatus, uconf.status,
1296			    DSW_EINVALBMP));
1297		}
1298
1299		II_FLAG_ASSIGN(bm_header->ii_state, ip);
1300		/* Restore copy throttle parameters, if header version is 3 */
1301		if (bm_header->ii_version >= 3) {	/* II_HEADER_VERSION */
1302			ip->bi_throttle_delay = bm_header->ii_throttle_delay;
1303			ip->bi_throttle_unit  = bm_header->ii_throttle_unit;
1304		}
1305
1306		/* Restore cluster & group names, if header version is 4 */
1307		if (bm_header->ii_version >= 4) {
1308			/* cluster */
1309			if (*bm_header->clstr_name) {
1310				(void) strncpy(uconf.cluster_tag,
1311				    bm_header->clstr_name, DSW_NAMELEN);
1312				(void) II_LINK_CLUSTER(ip, uconf.cluster_tag);
1313			}
1314
1315			/* group */
1316			if (*bm_header->group_name) {
1317				(void) strncpy(uconf.group_name,
1318				    bm_header->group_name, DSW_NAMELEN);
1319				(void) II_LINK_GROUP(ip, uconf.group_name);
1320			}
1321		}
1322		/* restore latest modification time, if header version >= 5 */
1323		if (bm_header->ii_version >= 5) {
1324			ip->bi_mtime = bm_header->ii_mtime;
1325		}
1326
1327		/* Fetch master and shadow names from bitmap header */
1328		if (uconf.master_vol[0] == 0)
1329			(void) strncpy(uconf.master_vol, bm_header->master_vol,
1330			    DSW_NAMELEN);
1331		if (uconf.shadow_vol[0] == 0)
1332			(void) strncpy(uconf.shadow_vol, bm_header->shadow_vol,
1333			    DSW_NAMELEN);
1334
1335		/* return the fetched names to the user */
1336		if (ilp32) {
1337			uconf32 = kmem_zalloc(sizeof (dsw_config32_t),
1338			    KM_SLEEP);
1339			if (uconf32 == NULL) {
1340				mutex_exit(&_ii_config_mutex);
1341				_ii_bm_header_free(bm_header, ip, tmp);
1342				_ii_rlse_devs(ip, BMP);
1343				_ii_info_free(ip);
1344				return (ENOMEM);
1345			}
1346			uconf32->status = ustatus32;
1347			II_TAIL_COPY((*uconf32), uconf, master_vol,
1348			    dsw_config32_t);
1349			rc = copyout(uconf32, (void *)arg, sizeof (*uconf32));
1350			kmem_free(uconf32, sizeof (dsw_config32_t));
1351		} else {
1352			rc = copyout(&uconf, (void *)arg, sizeof (uconf));
1353		}
1354		if (rc) {
1355			mutex_exit(&_ii_config_mutex);
1356			_ii_bm_header_free(bm_header, ip, tmp);
1357			_ii_rlse_devs(ip, BMP);
1358			_ii_info_free(ip);
1359			return (EFAULT);
1360		}
1361
1362		if (strncmp(bm_header->bitmap_vol, uconf.bitmap_vol,
1363		    DSW_NAMELEN) || ((!(ip->bi_flags&DSW_SHDIMPORT)) &&
1364		    strncmp(bm_header->master_vol, uconf.master_vol,
1365		    DSW_NAMELEN)) || strncmp(bm_header->shadow_vol,
1366		    uconf.shadow_vol, DSW_NAMELEN)) {
1367			mutex_exit(&_ii_config_mutex);
1368			_ii_bm_header_free(bm_header, ip, tmp);
1369			_ii_rlse_devs(ip, BMP);
1370			_ii_info_free(ip);
1371			return (spcs_s_ocopyoutf(&kstatus, uconf.status,
1372			    DSW_EMISMATCH));
1373		}
1374		shdfba = bm_header->ii_shdfba;
1375		copyfba = bm_header->ii_copyfba;
1376		if ((ip->bi_flags)&DSW_TREEMAP) {
1377			if (ii_debug > 0)
1378				cmn_err(CE_NOTE,
1379				    "!II: Resuming short shadow volume");
1380
1381			ip->bi_mstchks = bm_header->ii_mstchks;
1382			ip->bi_shdchks = bm_header->ii_shdchks;
1383			ip->bi_shdchkused = bm_header->ii_shdchkused;
1384			ip->bi_shdfchk = bm_header->ii_shdfchk;
1385
1386			if (bm_header->overflow_vol[0] != 0)
1387				if ((rc = ii_overflow_attach(ip,
1388				    bm_header->overflow_vol, 0)) != 0) {
1389					mutex_exit(&_ii_config_mutex);
1390					_ii_bm_header_free(bm_header, ip, tmp);
1391					_ii_rlse_devs(ip, BMP);
1392					_ii_info_free(ip);
1393					return (spcs_s_ocopyoutf(&kstatus,
1394					    uconf.status, rc));
1395			}
1396		}
1397		_ii_bm_header_free(bm_header, ip, tmp);
1398		_ii_rlse_devs(ip, BMP);
1399	}
1400header_checked:
1401
1402	if (ip->bi_flags&DSW_SHDIMPORT)
1403		(void) strcpy(uconf.master_vol, "<imported shadow>");
1404	if (!uconf.master_vol[0] || !uconf.shadow_vol[0]) {
1405		mutex_exit(&_ii_config_mutex);
1406		_ii_info_free(ip);
1407		return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EEMPTY));
1408	}
1409
1410	/* check that no volume has been given twice */
1411	if (strncmp(uconf.master_vol, uconf.shadow_vol, DSW_NAMELEN) == 0) {
1412		mutex_exit(&_ii_config_mutex);
1413		_ii_info_free(ip);
1414		return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EOPEN));
1415	}
1416
1417	if (strncmp(uconf.master_vol, uconf.bitmap_vol, DSW_NAMELEN) == 0) {
1418		mutex_exit(&_ii_config_mutex);
1419		_ii_info_free(ip);
1420		return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EOPEN));
1421	}
1422
1423	if (strncmp(uconf.bitmap_vol, uconf.shadow_vol, DSW_NAMELEN) == 0) {
1424		mutex_exit(&_ii_config_mutex);
1425		_ii_info_free(ip);
1426		return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EOPEN));
1427	}
1428
1429	/* check that master is not already a bitmap, shadow or overflow */
1430	type = ii_volume(uconf.master_vol, 1);
1431	if (type != NONE && type != MST) {
1432		mutex_exit(&_ii_config_mutex);
1433		_ii_info_free(ip);
1434		return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EINUSE));
1435	}
1436
1437	/* check that shadow is not used as anything else */
1438	type = ii_volume(uconf.shadow_vol, 1);
1439	if (type != NONE && type != SHD) {
1440		mutex_exit(&_ii_config_mutex);
1441		_ii_info_free(ip);
1442		return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EINUSE));
1443	}
1444
1445	/* Setup the table bitmap operations table */
1446	switch (ii_bitmap) {
1447	case II_KMEM:
1448		if (ii_debug > 0)
1449			cmn_err(CE_NOTE, "!ii: using volatile bitmaps");
1450		ip->bi_bitmap_ops = &kmem_buf_bmp;
1451		break;
1452	case II_FWC:
1453		hints = 0;
1454		(void) nsc_node_hints(&hints);
1455		if ((hints & NSC_FORCED_WRTHRU) == 0)
1456			ip->bi_bitmap_ops = &kmem_buf_bmp;
1457		else
1458			ip->bi_bitmap_ops = &alloc_buf_bmp;
1459		if (ii_debug > 0) {
1460			cmn_err(CE_NOTE, "!ii: chosen to use %s bitmaps",
1461			    ip->bi_bitmap_ops == &kmem_buf_bmp ?
1462			    "volatile" : "persistent");
1463		}
1464		break;
1465	case II_WTHRU:
1466	default:
1467		if (ii_debug > 0)
1468			cmn_err(CE_NOTE, "!ii: using persistent bitmaps");
1469		ip->bi_bitmap_ops = &alloc_buf_bmp;
1470		break;
1471	}
1472
1473	/*
1474	 * If we found aother shadow volume with the same name,
1475	 * If this is an resume operation,
1476	 * If this shadow is in the exported state
1477	 * then try an on the fly join instead
1478	 */
1479	for (hip = _ii_info_top; hip; hip = hip->bi_next)
1480		if (strcmp(uconf.shadow_vol, hip->bi_keyname) == 0)
1481				break;
1482	if ((hip) && (type == SHD) && existing &&
1483	    (ip->bi_flags & DSW_SHDEXPORT)) {
1484
1485		/*
1486		 * Stop any copy in progress
1487		 */
1488		while (_ii_stopcopy(hip) == EINTR)
1489			;
1490
1491		/*
1492		 * Start the imported shadow teardown
1493		 */
1494		mutex_enter(&hip->bi_mutex);
1495
1496		/* disable accesss to imported shadow */
1497		hip->bi_disabled = 1;
1498
1499		/* Wait for any I/O's to complete */
1500		while (hip->bi_ioctl) {
1501			hip->bi_state |= DSW_IOCTL;
1502			cv_wait(&hip->bi_ioctlcv, &hip->bi_mutex);
1503		}
1504		mutex_exit(&hip->bi_mutex);
1505
1506		/* this rw_enter forces us to drain all active IO */
1507		rw_enter(&hip->bi_linkrw, RW_WRITER);
1508		rw_exit(&hip->bi_linkrw);
1509
1510		/* remove ip from _ii_info_top linked list */
1511		mutex_enter(&_ii_info_mutex);
1512		for (ipp = &_ii_info_top; *ipp; ipp = &((*ipp)->bi_next)) {
1513			if (hip == *ipp) {
1514				*ipp = hip->bi_next;
1515				break;
1516			}
1517		}
1518		if (hip->bi_kstat) {
1519			kstat_delete(hip->bi_kstat);
1520			hip->bi_kstat = NULL;
1521		}
1522		mutex_exit(&_ii_info_mutex);
1523
1524		/* Gain access to both bitmap volumes */
1525		rtype = BMP;
1526		if (((rc = _ii_rsrv_devs(hip, rtype, II_INTERNAL)) != 0) ||
1527		    ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0)) {
1528			mutex_exit(&_ii_config_mutex);
1529			_ii_info_free(ip);
1530			return (spcs_s_ocopyoutf(&kstatus, uconf.status, rc));
1531		}
1532
1533		/* Merge imported bitmap */
1534		rc = II_JOIN_BMP(ip, hip);
1535
1536		/* Release access to bitmap volume */
1537		_ii_rlse_devs(hip, rtype);
1538		ii_sibling_free(hip);
1539
1540		/* Clear the fact that we are exported */
1541		mutex_enter(&ip->bi_mutex);
1542		II_FLAG_CLR(DSW_SHDEXPORT, ip);
1543
1544		/* Release resources */
1545		mutex_exit(&ip->bi_mutex);
1546		_ii_rlse_devs(ip, BMP);
1547
1548	} else if (type != NONE) {
1549		mutex_exit(&_ii_config_mutex);
1550		_ii_info_free(ip);
1551		return (spcs_s_ocopyoutf(&kstatus, uconf.status, DSW_EINUSE));
1552	}
1553
1554	/*
1555	 * Handle non-exported shadow
1556	 */
1557	if ((ip->bi_flags & DSW_SHDEXPORT) == 0) {
1558		if ((rc = ii_open_shadow(ip, uconf.shadow_vol)) != 0) {
1559			mutex_exit(&_ii_config_mutex);
1560			_ii_info_free(ip);
1561			spcs_s_add(kstatus, rc);
1562			return (spcs_s_ocopyoutf(&kstatus, uconf.status,
1563			    DSW_EOPEN));
1564		}
1565	}
1566
1567	/*
1568	 * allocate _ii_concopy_sema and set to a value that won't allow
1569	 * all cache to be allocated by copy loops.
1570	 */
1571
1572	if (_ii_concopy_init == 0 && ip->bi_bmpfd != NULL) {
1573		int asize = 0, wsize;
1574		nsc_size_t cfbas, maxfbas;
1575
1576		(void) nsc_cache_sizes(&asize, &wsize);
1577
1578		if (asize > 0) {
1579			cfbas = FBA_NUM(asize);
1580			(void) _ii_rsrv_devs(ip, BMP, II_INTERNAL);
1581			rc = nsc_maxfbas(ip->bi_bmpfd, 0, &maxfbas);
1582			_ii_rlse_devs(ip, BMP);
1583			if (!II_SUCCESS(rc))
1584				maxfbas = 1024;		/* i.e. _SD_MAX_FBAS */
1585			ii_nconcopy = cfbas / (maxfbas * 2) / 3;
1586		}
1587		if (ii_nconcopy < 2)
1588			ii_nconcopy = 2;
1589		ASSERT(ii_nconcopy > 0);
1590		sema_init(&_ii_concopy_sema, ii_nconcopy, NULL,
1591		    SEMA_DRIVER, NULL);
1592		_ii_concopy_init = 1;
1593	}
1594
1595	/* check for shared master volume */
1596	for (hip = _ii_mst_top; hip; hip = hip->bi_nextmst)
1597		if (strcmp(uconf.master_vol, ii_pathname(hip->bi_mstfd)) == 0)
1598			break;
1599	add_to_mst_top = (hip == NULL);
1600	if (!hip)
1601		for (hip = _ii_info_top; hip; hip = hip->bi_next)
1602			if (strcmp(uconf.master_vol,
1603			    ii_pathname(hip->bi_mstfd)) == 0)
1604				break;
1605	nshadows = (hip != NULL);
1606
1607	/* Check if master is offline */
1608	if (hip) {
1609		if (hip->bi_flags & DSW_MSTOFFLINE) {
1610			mutex_exit(&_ii_config_mutex);
1611			_ii_info_free(ip);
1612			return (spcs_s_ocopyoutf(&kstatus, uconf.status,
1613			    DSW_EOFFLINE));
1614		}
1615	}
1616
1617	if (!nshadows && (ip->bi_flags&DSW_SHDIMPORT) == 0) {
1618		ip->bi_mstfd = nsc_open(uconf.master_vol,
1619		    NSC_IIR_ID|NSC_DEVICE|NSC_RDWR, _ii_fd_def,
1620		    (blind_t)(ip->bi_mstdev), &rc);
1621		if (!ip->bi_mstfd) {
1622			mutex_exit(&_ii_config_mutex);
1623			_ii_info_free(ip);
1624			spcs_s_add(kstatus, rc);
1625			return (spcs_s_ocopyoutf(&kstatus, uconf.status,
1626			    DSW_EOPEN));
1627		}
1628
1629		ip->bi_mstrfd = nsc_open(uconf.master_vol,
1630		    NSC_IIR_ID|NSC_DEVICE|NSC_RDWR, _ii_fd_def,
1631		    (blind_t)(ip->bi_mstrdev), &rc);
1632		if (!ip->bi_mstrfd) {
1633			mutex_exit(&_ii_config_mutex);
1634			_ii_info_free(ip);
1635			spcs_s_add(kstatus, rc);
1636			return (spcs_s_ocopyoutf(&kstatus, uconf.status,
1637			    DSW_EOPEN));
1638		}
1639	}
1640
1641	ip->bi_head = ip;
1642	ip->bi_master = ip;
1643
1644	mutex_enter(&_ii_info_mutex);
1645	ip->bi_next = _ii_info_top;
1646	_ii_info_top = ip;
1647	if (nshadows) {
1648		/* link new shadow group together with others sharing master */
1649		if (ii_debug > 0)
1650			cmn_err(CE_NOTE,
1651			    "!II: shadow %s shares master %s with other shadow"
1652			    " groups", uconf.shadow_vol, uconf.master_vol);
1653		hip = hip->bi_head;
1654		nsc_kmem_free(ip->bi_mstrdev, sizeof (*ip->bi_mstrdev));
1655		nsc_kmem_free(ip->bi_mstdev, sizeof (*ip->bi_mstdev));
1656		ip->bi_mstrdev = hip->bi_mstrdev;
1657		ip->bi_mstdev = hip->bi_mstdev;
1658		ip->bi_head = hip;
1659		ip->bi_sibling = hip->bi_sibling;
1660		if (add_to_mst_top) {
1661			hip->bi_nextmst = _ii_mst_top;
1662			_ii_mst_top = hip;
1663		}
1664		hip->bi_sibling = ip;
1665		ip->bi_master = ip->bi_head->bi_master;
1666	}
1667	mutex_exit(&_ii_info_mutex);
1668	mutex_exit(&_ii_config_mutex);
1669
1670	keylen = strlen(ip->bi_keyname);
1671	if (keylen > KSTAT_STRLEN - 1) {
1672		keyoffset = keylen + 1 - KSTAT_STRLEN;
1673	} else {
1674		keyoffset = 0;
1675	}
1676	ip->bi_kstat = kstat_create("ii", _ii_instance++,
1677	    &ip->bi_keyname[ keyoffset ], "iiset", KSTAT_TYPE_NAMED,
1678	    sizeof (ii_kstat_set) / sizeof (kstat_named_t),
1679	    KSTAT_FLAG_VIRTUAL);
1680	if (ip->bi_kstat) {
1681		ip->bi_kstat->ks_data = &ii_kstat_set;
1682		ip->bi_kstat->ks_update = ii_set_stats_update;
1683		ip->bi_kstat->ks_private = ip;
1684		kstat_install(ip->bi_kstat);
1685	} else {
1686		cmn_err(CE_WARN, "!Unable to create set-specific kstats");
1687	}
1688
1689#ifndef DISABLE_KSTATS
1690	/* create kstats information */
1691	mutex_init(&ip->bi_kstat_io.statmutex, NULL, MUTEX_DRIVER, NULL);
1692	if (ip == ip->bi_master) {
1693		ip->bi_kstat_io.master = _ii_kstat_create(ip, "master");
1694	} else {
1695		ip->bi_kstat_io.master = ip->bi_master->bi_kstat_io.master;
1696		(void) strlcpy(ip->bi_kstat_io.mstio,
1697		    ip->bi_master->bi_kstat_io.mstio, KSTAT_DATA_CHAR_LEN);
1698	}
1699	ip->bi_kstat_io.shadow = _ii_kstat_create(ip, "shadow");
1700	ip->bi_kstat_io.bitmap = _ii_kstat_create(ip, "bitmap");
1701#endif
1702
1703	(void) _ii_reserve_begin(ip);
1704	rtype = MSTR|SHDR|BMP;
1705	if ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0) {
1706		spcs_s_add(kstatus, rc);
1707		rc = DSW_ERSRVFAIL;
1708		goto fail;
1709	}
1710
1711	if (ip->bi_flags&DSW_SHDIMPORT) {
1712		rc = 0;		/* no master for imported volumes */
1713		mst_size = 0;
1714	} else
1715		rc = nsc_partsize(MSTFD(ip), &mst_size);
1716	if (rc == 0 && (ip->bi_flags&DSW_SHDEXPORT) == 0)
1717		rc = nsc_partsize(SHDFD(ip), &shd_size);
1718	if (!ip->bi_bmpfd)
1719		rc = EINVAL;
1720	if (rc == 0)
1721		rc = nsc_partsize(ip->bi_bmpfd, &bmp_size);
1722
1723	if (ip->bi_flags&DSW_SHDIMPORT)
1724		ip->bi_size = shd_size;
1725	else
1726		ip->bi_size = mst_size;
1727
1728	if ((((ip->bi_flags&DSW_SHDIMPORT) != DSW_SHDIMPORT) &&
1729	    (mst_size < 1)) ||
1730	    (((ip->bi_flags&DSW_SHDEXPORT) != DSW_SHDEXPORT) &&
1731	    (shd_size < 1)) ||
1732	    ((rc == 0) && (bmp_size < 1))) {
1733		/* could be really zero, or could be > 1 TB; fail the enable */
1734		rc = EINVAL;
1735	}
1736
1737	if (rc != 0) {	/* rc set means an nsc_partsize() failed */
1738		/*
1739		 * If existing group, mark bitmap as offline and set
1740		 * bmp_size to "right size".
1741		 */
1742		if (existing) {
1743			bmp_size = 2 * DSW_BM_FBA_LEN(mst_size) +
1744			    DSW_SHD_BM_OFFSET;
1745			goto no_more_bmp_tests;
1746		}
1747		spcs_s_add(kstatus, rc);
1748		rc = DSW_EPARTSIZE;
1749		_ii_rlse_devs(ip, rtype);
1750		_ii_reserve_end(ip);
1751		goto fail;
1752	}
1753
1754	if (ip->bi_flags&DSW_SHDIMPORT)
1755		mst_size = shd_size;
1756	if (ip->bi_flags&DSW_SHDEXPORT)
1757		shd_size = mst_size;
1758	/*
1759	 * Check with RDC if the master & shadow sizes are different.
1760	 * Once II is enabled, the shadow size will be made to appear
1761	 * the same as the master, and this will panic RDC if we're
1762	 * changing sizes on it.
1763	 */
1764	resized = (shd_size != mst_size);
1765	if (resized && ii_need_same_size(ip)) {
1766		cmn_err(CE_WARN, "!Cannot enable II set: would change volume "
1767		    "size on RDC");
1768		rc = DSW_EOPACKAGE;
1769		_ii_rlse_devs(ip, rtype);
1770		_ii_reserve_end(ip);
1771		goto fail;
1772	}
1773	if (bmp_size < 2 * DSW_BM_FBA_LEN(mst_size) + DSW_SHD_BM_OFFSET) {
1774		/* bitmap volume too small */
1775		if (ii_debug > 0)
1776			cmn_err(CE_NOTE,
1777			    "!ii: invalid sizes: bmp %" NSC_SZFMT " mst %"
1778			    NSC_SZFMT " %" NSC_SZFMT "",
1779			    bmp_size, mst_size, DSW_BM_FBA_LEN(mst_size));
1780		rc = DSW_EBMPSIZE;
1781		_ii_rlse_devs(ip, rtype);
1782		_ii_reserve_end(ip);
1783		goto fail;
1784	}
1785	if ((shd_size < mst_size) && (uconf.flag&DSW_GOLDEN) != 0) {
1786		/* shadow volume too small */
1787		if (ii_debug > 0)
1788			cmn_err(CE_NOTE, "!shd size too small (%" NSC_SZFMT
1789			    ") for independent set's master (%" NSC_SZFMT ")",
1790			    shd_size, mst_size);
1791		rc = DSW_ESHDSIZE;
1792		_ii_rlse_devs(ip, rtype);
1793		_ii_reserve_end(ip);
1794		goto fail;
1795	}
1796
1797	ip->bi_busy = kmem_zalloc(1 + (ip->bi_size / (DSW_SIZE * DSW_BITS)),
1798	    KM_SLEEP);
1799	if (!ip->bi_busy) {
1800		rc = ENOMEM;
1801		_ii_rlse_devs(ip, rtype);
1802		_ii_reserve_end(ip);
1803		goto fail;
1804	}
1805
1806	if (existing == 0) {
1807
1808		DTRACE_PROBE(_ii_config);
1809
1810		/* first time this shadow has been set up */
1811		mutex_enter(&ip->bi_mutex);
1812		bm_header = _ii_bm_header_get(ip, &tmp);
1813		mutex_exit(&ip->bi_mutex);
1814		if (bm_header == NULL) {
1815			if (ii_debug > 0)
1816				cmn_err(CE_WARN,
1817				    "!ii: _ii_bm_header_get returned NULL");
1818			rc = DSW_EHDRBMP;
1819			_ii_rlse_devs(ip, rtype);
1820			_ii_reserve_end(ip);
1821			goto fail;
1822		}
1823		bzero(bm_header, sizeof (*bm_header));
1824		/* copy pathnames into it */
1825		(void) strncpy(bm_header->master_vol, uconf.master_vol,
1826		    DSW_NAMELEN);
1827		(void) strncpy(bm_header->shadow_vol, uconf.shadow_vol,
1828		    DSW_NAMELEN);
1829		(void) strncpy(bm_header->bitmap_vol, uconf.bitmap_vol,
1830		    DSW_NAMELEN);
1831		(void) strncpy(bm_header->clstr_name, uconf.cluster_tag,
1832		    DSW_NAMELEN);
1833		(void) strncpy(bm_header->group_name, uconf.group_name,
1834		    DSW_NAMELEN);
1835
1836		if (uconf.cluster_tag[0] != 0)
1837			(void) II_LINK_CLUSTER(ip, uconf.cluster_tag);
1838
1839		if (uconf.group_name[0] != 0)
1840			(void) II_LINK_GROUP(ip, uconf.group_name);
1841
1842
1843		bm_header->ii_state = (uconf.flag & DSW_GOLDEN);
1844		II_FLAG_ASSIGN(bm_header->ii_state, ip);
1845
1846		if (import) {
1847			II_FLAG_SETX(DSW_SHDIMPORT, ip);
1848			bm_header->ii_state |= DSW_SHDIMPORT;
1849		}
1850		if (resized) {
1851			II_FLAG_SETX(DSW_RESIZED, ip);
1852			bm_header->ii_state |= DSW_RESIZED;
1853		}
1854		bm_header->ii_type = (uconf.flag & DSW_GOLDEN) ?
1855		    DSW_GOLDEN_TYPE : DSW_QUICK_TYPE;
1856		bm_header->ii_magic = DSW_DIRTY;
1857		bm_header->ii_version = II_HEADER_VERSION;
1858		bm_header->ii_shdfba = DSW_SHD_BM_OFFSET;
1859		bm_header->ii_copyfba = DSW_COPY_BM_OFFSET;
1860		bm_header->ii_throttle_delay = ip->bi_throttle_delay;
1861		bm_header->ii_throttle_unit = ip->bi_throttle_unit;
1862		ip->bi_shdfba = bm_header->ii_shdfba;
1863		ip->bi_copyfba = bm_header->ii_copyfba;
1864		ip->bi_mtime = ddi_get_time();
1865
1866		/* write it to disk */
1867		mutex_enter(&ip->bi_mutex);
1868		rc = _ii_bm_header_put(bm_header, ip, tmp);
1869		mutex_exit(&ip->bi_mutex);
1870		if (!II_SUCCESS(rc)) {
1871			spcs_s_add(kstatus, rc);
1872			rc = DSW_EHDRBMP;
1873			_ii_rlse_devs(ip, rtype);
1874			_ii_reserve_end(ip);
1875			goto fail;
1876		}
1877		if ((shd_size < mst_size) && (uconf.flag & DSW_GOLDEN) == 0) {
1878		/*
1879		 * shadow volume smaller than master, must use a dependent
1880		 * copy with a bitmap file stored mapping for chunk locations.
1881		 */
1882					/* number of chunks in shadow volume */
1883			nsc_size_t shd_chunks;
1884			nsc_size_t bmp_chunks;
1885			nsc_size_t tmp_chunks;
1886
1887			if (ii_debug > 1)
1888				cmn_err(CE_NOTE, "!ii: using tree index on %s",
1889				    uconf.master_vol);
1890			shd_chunks = shd_size / DSW_SIZE;
1891			/* do not add in partial chunk at end */
1892
1893			ip->bi_mstchks = mst_size / DSW_SIZE;
1894			if (mst_size % DSW_SIZE != 0)
1895				ip->bi_mstchks++;
1896			bmp_chunks = ii_btsize(bmp_size - ip->bi_copyfba -
1897			    DSW_BM_FBA_LEN(ip->bi_size));
1898			tmp_chunks = ip->bi_copyfba +
1899			    DSW_BM_FBA_LEN(ip->bi_size);
1900			if (bmp_chunks < (nsc_size_t)ip->bi_mstchks) {
1901				if (ii_debug > -1) {
1902					cmn_err(CE_NOTE, "!ii: bitmap vol too"
1903					    "small: %" NSC_SZFMT " vs. %"
1904					    NSC_SZFMT, bmp_size,
1905					    tmp_chunks);
1906				}
1907				spcs_s_add(kstatus, rc);
1908				rc = DSW_EHDRBMP;
1909				_ii_rlse_devs(ip, rtype);
1910				_ii_reserve_end(ip);
1911				goto fail;
1912			}
1913			mutex_enter(&ip->bi_mutex);
1914			II_FLAG_SET(DSW_TREEMAP, ip);
1915			mutex_exit(&ip->bi_mutex);
1916
1917			/* following values are written to header by ii_tinit */
1918#if (defined(NSC_MULTI_TERABYTE) && !defined(II_MULTIMULTI_TERABYTE))
1919			ASSERT(shd_chunks <= INT32_MAX);
1920			ASSERT(mst_size / DSW_SIZE <= INT32_MAX);
1921#endif
1922			ip->bi_mstchks = mst_size / DSW_SIZE;
1923			if (mst_size % DSW_SIZE != 0)
1924				ip->bi_mstchks++;
1925#ifdef	II_MULTIMULTI_TERABYTE
1926			ip->bi_shdchks = shd_chunks;
1927#else
1928			/* still have 31 bit chunkid's */
1929			ip->bi_shdchks = (chunkid_t)shd_chunks;
1930#endif
1931			ip->bi_shdchkused = 0;
1932			rc = ii_tinit(ip);
1933		} else {
1934			ip->bi_shdchks = shd_size / DSW_SIZE;
1935			ip->bi_shdchkused = 0;
1936		}
1937		if (rc == 0)
1938			rc = II_LOAD_BMP(ip, 1);
1939		if (rc == 0)
1940			rc = II_ZEROBM(ip);
1941		if (rc == 0)
1942			rc = II_COPYBM(ip);	/* also clear copy bitmap */
1943		if (rc == 0 && (uconf.flag & DSW_GOLDEN) && !import)
1944			rc = ii_fill_copy_bmp(ip);
1945		if (rc) {
1946			spcs_s_add(kstatus, rc);
1947			rc = DSW_EHDRBMP;
1948			_ii_rlse_devs(ip, rtype);
1949			goto fail;
1950		}
1951		/* check that changing shadow won't upset RDC */
1952		if (ii_update_denied(ip, kstatus, 0, 1)) {
1953			rc = DSW_EOPACKAGE;
1954			_ii_rlse_devs(ip, rtype);
1955			_ii_reserve_end(ip);
1956			goto fail;
1957		}
1958		ip->bi_disabled = 0;	/* all okay and ready, we can go now */
1959		_ii_rlse_devs(ip, rtype);
1960		/* no _ii_reserve_end() here - we must register first */
1961		ip->bi_bmp_tok = _ii_register_path(ii_pathname(ip->bi_bmpfd),
1962		    NSC_CACHE|NSC_DEVICE, _ii_io);
1963		if (!nshadows)
1964			ii_register_mst(ip);
1965		ii_register_shd(ip);
1966
1967		if (!ii_register_ok(ip)) {
1968			ip->bi_disabled = 1;	/* argh */
1969			rc = DSW_EREGISTER;
1970			goto fail;
1971		}
1972		/* no _ii_reserve_begin() here -- we're still in process */
1973		(void) _ii_rsrv_devs(ip, rtype, II_INTERNAL);
1974
1975		if (ii_debug > 0)
1976			cmn_err(CE_NOTE, "!ii: config: master %s shadow %s",
1977			    uconf.master_vol, uconf.shadow_vol);
1978		rc = 0;
1979		if ((uconf.flag & DSW_GOLDEN) && !import) {
1980			mutex_enter(&ip->bi_mutex);
1981			II_FLAG_SET(DSW_COPYINGM | DSW_COPYINGP, ip);
1982			ip->bi_ioctl++;	/* we are effectively in an ioctl */
1983			mutex_exit(&ip->bi_mutex);
1984			rc = _ii_copyvol(ip, 0, rtype, kstatus, 1);
1985		}
1986		_ii_rlse_devs(ip, rtype);
1987		_ii_reserve_end(ip);
1988
1989		++iigkstat.num_sets.value.ul;
1990
1991		return (spcs_s_ocopyoutf(&kstatus, uconf.status, rc));
1992	}
1993
1994	ip->bi_shdchks = shd_size / DSW_SIZE;
1995	ip->bi_shdfba = shdfba;
1996	ip->bi_copyfba = copyfba;
1997	rc = II_LOAD_BMP(ip, 0);		/* reload saved bitmap */
1998	mutex_enter(&ip->bi_mutex);
1999	if (rc == 0)
2000		bm_header = _ii_bm_header_get(ip, &tmp);
2001	mutex_exit(&ip->bi_mutex);
2002	if (rc || bm_header == NULL) {
2003		if (existing) {
2004			goto no_more_bmp_tests;
2005		}
2006		rc = DSW_EHDRBMP;
2007		goto fail;
2008	}
2009
2010	/*
2011	 * If the header is dirty and it wasn't kept on persistent storage
2012	 * then the bitmaps must be assumed to be bad.
2013	 */
2014	if (bm_header->ii_magic == DSW_DIRTY &&
2015	    ip->bi_bitmap_ops != &alloc_buf_bmp) {
2016		type = bm_header->ii_type;
2017		_ii_bm_header_free(bm_header, ip, tmp);
2018		if (type == DSW_GOLDEN_TYPE) {
2019			if ((ip->bi_flags & DSW_COPYINGM) != 0)
2020				_ii_error(ip, DSW_SHDOFFLINE);
2021			else if ((ip->bi_flags & DSW_COPYINGS) != 0)
2022				_ii_error(ip, DSW_MSTOFFLINE);
2023			else {
2024				/* No copying, so they're just different */
2025				rc = ii_fill_copy_bmp(ip);
2026				if (rc) {
2027					spcs_s_add(kstatus, rc);
2028					rc = DSW_EHDRBMP;
2029					goto fail;
2030				}
2031			}
2032		} else
2033			_ii_error(ip, DSW_SHDOFFLINE);
2034
2035		mutex_enter(&ip->bi_mutex);
2036		bm_header = _ii_bm_header_get(ip, &tmp);
2037		mutex_exit(&ip->bi_mutex);
2038		if (bm_header == NULL) {
2039			rc = DSW_EHDRBMP;
2040			goto fail;
2041		}
2042	}
2043
2044	bm_header->ii_magic = DSW_DIRTY;
2045	mutex_enter(&ip->bi_mutex);
2046	rc = _ii_bm_header_put(bm_header, ip, tmp);
2047	mutex_exit(&ip->bi_mutex);
2048	if (!II_SUCCESS(rc)) {
2049		spcs_s_add(kstatus, rc);
2050		rc = DSW_EHDRBMP;
2051		goto fail;
2052	}
2053
2054	ip->bi_bmp_tok = _ii_register_path(ii_pathname(ip->bi_bmpfd),
2055	    NSC_CACHE|NSC_DEVICE, _ii_io);
2056no_more_bmp_tests:
2057	_ii_rlse_devs(ip, rtype);
2058	ip->bi_disabled = 0;	/* all okay and ready, we can go now */
2059	if (!nshadows)
2060		ii_register_mst(ip);
2061	if ((ip->bi_flags & DSW_SHDEXPORT) == 0)
2062		ii_register_shd(ip);
2063
2064	if (!ii_register_ok(ip)) {
2065		rc = DSW_EREGISTER;
2066		goto fail;
2067	}
2068	_ii_reserve_end(ip);
2069
2070	if (ii_debug > 0)
2071		cmn_err(CE_NOTE, "!ii: config: master %s shadow %s",
2072		    uconf.master_vol, uconf.shadow_vol);
2073
2074	rc = 0;
2075	if (ip->bi_flags & DSW_COPYINGP) {
2076		/* Copy was in progress, so continue it */
2077		(void) _ii_rsrv_devs(ip, rtype, II_INTERNAL);
2078		mutex_enter(&ip->bi_mutex);
2079		ip->bi_ioctl++;		/* we are effectively in an ioctl */
2080		mutex_exit(&ip->bi_mutex);
2081		rc = _ii_copyvol(ip, ((ip->bi_flags & DSW_COPYINGS) != 0) ?
2082		    CV_SHD2MST : 0, rtype, kstatus, 0);
2083	}
2084
2085	++iigkstat.num_sets.value.ul;
2086
2087	return (spcs_s_ocopyoutf(&kstatus, uconf.status, rc));
2088
2089fail:
2090	/* remove ip from _ii_info_top linked list */
2091	mutex_enter(&_ii_info_mutex);
2092	for (ipp = &_ii_info_top; *ipp; ipp = &((*ipp)->bi_next)) {
2093		if (ip == *ipp) {
2094			*ipp = ip->bi_next;
2095			break;
2096		}
2097	}
2098	mutex_exit(&_ii_info_mutex);
2099	ii_sibling_free(ip);
2100
2101	return (spcs_s_ocopyoutf(&kstatus, uconf.status, rc));
2102}
2103
2104static int
2105_ii_perform_disable(char *setname, spcs_s_info_t *kstatusp, int reclaim)
2106{
2107	_ii_info_t **xip, *ip;
2108	_ii_overflow_t *op;
2109	nsc_buf_t *tmp = NULL;
2110	int rc;
2111	ii_header_t *bm_header;
2112	int rtype;
2113
2114	mutex_enter(&_ii_info_mutex);
2115	ip = _ii_find_set(setname);
2116	if (ip == NULL) {
2117		mutex_exit(&_ii_info_mutex);
2118		return (DSW_ENOTFOUND);
2119	}
2120
2121	if ((ip->bi_flags & DSW_GOLDEN) &&
2122	    ((ip->bi_flags & DSW_COPYINGP) != 0)) {
2123		/*
2124		 * Cannot disable an independent copy while still copying
2125		 * as it means that a data dependency exists.
2126		 */
2127		mutex_exit(&_ii_info_mutex);
2128		_ii_ioctl_done(ip);
2129		mutex_exit(&ip->bi_mutex);
2130		DTRACE_PROBE(_ii_perform_disable_end_DSW_EDEPENDENCY);
2131		return (DSW_EDEPENDENCY);
2132	}
2133
2134	if ((ip->bi_flags & DSW_GOLDEN) == 0 &&
2135	    ii_update_denied(ip, *kstatusp, 0, 1)) {
2136		/* Cannot disable a dependent shadow while RDC is unsure */
2137		mutex_exit(&_ii_info_mutex);
2138		_ii_ioctl_done(ip);
2139		mutex_exit(&ip->bi_mutex);
2140		DTRACE_PROBE(DSW_EOPACKAGE);
2141		return (DSW_EOPACKAGE);
2142	}
2143
2144	if (((ip->bi_flags & DSW_RESIZED) == DSW_RESIZED) &&
2145	    ii_need_same_size(ip)) {
2146		/* We can't disable the set whilst RDC is using it */
2147		mutex_exit(&_ii_info_mutex);
2148		_ii_ioctl_done(ip);
2149		mutex_exit(&ip->bi_mutex);
2150		cmn_err(CE_WARN, "!Cannot disable II set: would change "
2151		    "volume size on RDC");
2152		DTRACE_PROBE(DSW_EOPACKAGE_resize);
2153		return (DSW_EOPACKAGE);
2154	}
2155
2156	ip->bi_disabled = 1;
2157	if (NSHADOWS(ip) && (ip->bi_master == ip)) {
2158		ip->bi_flags &= (~DSW_COPYING);
2159		ip->bi_state |= DSW_MULTIMST;
2160	}
2161	mutex_exit(&_ii_info_mutex);
2162
2163	_ii_ioctl_done(ip);
2164	mutex_exit(&ip->bi_mutex);
2165
2166	_ii_stopvol(ip);
2167
2168	rtype = SHDR|BMP;
2169	if ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0) {
2170		spcs_s_add(*kstatusp, rc);
2171		DTRACE_PROBE(DSW_ERSRVFAIL);
2172		return (DSW_ERSRVFAIL);
2173	}
2174
2175	if ((ii_header < 128) &&
2176	    (((ip->bi_flags & DSW_GOLDEN) == 0) ||
2177	    (ip->bi_flags & DSW_COPYING))) {
2178		/*
2179		 * Not a full copy so attempt to prevent use of partial copy
2180		 * by clearing where the first ufs super-block would be
2181		 * located. Solaris often incorporates the disk header into
2182		 * the start of the first slice, so avoid clearing the very
2183		 * first 16 blocks of the volume.
2184		 */
2185
2186		if (ii_debug > 1)
2187			cmn_err(CE_NOTE, "!ii: Shadow copy invalidated");
2188		II_READ_START(ip, shadow);
2189		rc = nsc_alloc_buf(SHDFD(ip), ii_header, 128 - ii_header,
2190		    NSC_RDWRBUF, &tmp);
2191		II_READ_END(ip, shadow, rc, 128 - ii_header);
2192		if (II_SUCCESS(rc)) {
2193			rc = nsc_zero(tmp, ii_header, 128 - ii_header, 0);
2194			if (II_SUCCESS(rc)) {
2195				II_NSC_WRITE(ip, shadow, rc, tmp, ii_header,
2196				    (128 - ii_header), 0);
2197			}
2198		}
2199		if (tmp)
2200			(void) nsc_free_buf(tmp);
2201		if (!II_SUCCESS(rc))
2202			_ii_error(ip, DSW_SHDOFFLINE);
2203	}
2204
2205	/* this rw_enter forces us to drain all active IO */
2206	rw_enter(&ip->bi_linkrw, RW_WRITER);
2207	rw_exit(&ip->bi_linkrw);
2208
2209	/* remove ip from _ii_info_top linked list */
2210	mutex_enter(&_ii_info_mutex);
2211	for (xip = &_ii_info_top; *xip; xip = &((*xip)->bi_next)) {
2212		if (ip == *xip) {
2213			*xip = ip->bi_next;
2214			break;
2215		}
2216	}
2217	if (ip->bi_kstat) {
2218		kstat_delete(ip->bi_kstat);
2219		ip->bi_kstat = NULL;
2220	}
2221	mutex_exit(&_ii_info_mutex);
2222
2223	rc = II_SAVE_BMP(ip, 1);
2224	mutex_enter(&ip->bi_mutex);
2225	if (rc == 0)
2226		bm_header = _ii_bm_header_get(ip, &tmp);
2227	if (rc == 0 && bm_header) {
2228		if (ii_debug > 1)
2229			cmn_err(CE_NOTE, "!ii: Invalid header written");
2230		bm_header->ii_magic = DSW_INVALID;
2231		/* write it to disk */
2232		(void) _ii_bm_header_put(bm_header, ip, tmp);
2233	}
2234	mutex_exit(&ip->bi_mutex);
2235
2236	op = ip->bi_overflow;
2237	if (op && (reclaim == -1)) {
2238		reclaim = (op->ii_drefcnt == 1? NO_RECLAIM : RECLAIM);
2239	}
2240
2241	if ((op != NULL) && (op->ii_hversion >= 1) &&
2242	    (op->ii_hmagic == II_OMAGIC)) {
2243		mutex_enter(&_ii_overflow_mutex);
2244		if (ip->bi_flags & DSW_OVRHDRDRTY) {
2245			mutex_enter(&ip->bi_mutex);
2246			ip->bi_flags &= ~DSW_OVRHDRDRTY;
2247			mutex_exit(&ip->bi_mutex);
2248			ASSERT(op->ii_urefcnt > 0);
2249			op->ii_urefcnt--;
2250		}
2251		if (op->ii_urefcnt == 0) {
2252			op->ii_flags &= ~IIO_CNTR_INVLD;
2253			op->ii_unused = op->ii_nchunks - 1;
2254		}
2255		mutex_exit(&_ii_overflow_mutex);
2256	}
2257	ii_overflow_free(ip, reclaim);
2258	_ii_rlse_devs(ip, rtype);
2259
2260	ii_sibling_free(ip);
2261
2262	--iigkstat.num_sets.value.ul;
2263	return (0);
2264}
2265
2266/*
2267 * _ii_disable
2268 *	Deconfigures an II pair
2269 *
2270 * Calling/Exit State:
2271 *	Returns 0 if the pair was disabled. Otherwise an error code
2272 *	is returned and any additional error information is copied
2273 *	out to the user.
2274 *
2275 * Description:
2276 *	Reads the user configuration structure and attempts to
2277 *	deconfigure that pairing based on the master device pathname.
2278 */
2279
2280int
2281_ii_disable(intptr_t arg, int ilp32, int *rvp)
2282{
2283	dsw_ioctl_t uparms;
2284	dsw_ioctl32_t uparms32;
2285	_ii_overflow_t *op;
2286	int rc, rerr;
2287	spcs_s_info_t kstatus;
2288	uint64_t hash;
2289	int reclaim;
2290	_ii_lsthead_t *oldhead, **head;
2291	_ii_lstinfo_t *np, **xnp, *oldp;
2292
2293	*rvp = 0;
2294
2295	if (ilp32) {
2296		if (copyin((void *)arg, &uparms32, sizeof (uparms32)) < 0)
2297			return (EFAULT);
2298		II_TAIL_COPY(uparms, uparms32, shadow_vol, dsw_ioctl_t);
2299		uparms.status = (spcs_s_info_t)uparms32.status;
2300	} else if (copyin((void *)arg, &uparms, sizeof (uparms)) < 0)
2301		return (EFAULT);
2302
2303	kstatus = spcs_s_kcreate();
2304	if (kstatus == NULL)
2305		return (ENOMEM);
2306
2307	if (!uparms.shadow_vol[0])
2308		return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EEMPTY));
2309
2310	DTRACE_PROBE2(_ii_disable_info, char *, uparms.shadow_vol,
2311	    int, uparms.flags);
2312
2313	/* group or single set? */
2314	if (uparms.flags & CV_IS_GROUP) {
2315		hash = nsc_strhash(uparms.shadow_vol);
2316		mutex_enter(&_ii_group_mutex);
2317		for (head = &_ii_group_top; *head;
2318		    head = &((*head)->lst_next)) {
2319			if ((hash == (*head)->lst_hash) &&
2320			    strncmp((*head)->lst_name, uparms.shadow_vol,
2321			    DSW_NAMELEN) == 0)
2322				break;
2323		}
2324
2325		if (!*head) {
2326			mutex_exit(&_ii_group_mutex);
2327			return (spcs_s_ocopyoutf(&kstatus, uparms.status,
2328			    DSW_EGNOTFOUND));
2329		}
2330
2331		/* clear any overflow vol usage counts */
2332		for (np = (*head)->lst_start; np; np = np->lst_next) {
2333			if (np->lst_ip->bi_overflow) {
2334				np->lst_ip->bi_overflow->ii_detachcnt = 0;
2335			}
2336		}
2337
2338		/* now increment */
2339		for (np = (*head)->lst_start; np; np = np->lst_next) {
2340			if (np->lst_ip->bi_overflow) {
2341				++np->lst_ip->bi_overflow->ii_detachcnt;
2342			}
2343		}
2344
2345		/* finally, disable all group members */
2346		rerr = 0;
2347		xnp = &(*head)->lst_start;
2348		while (*xnp) {
2349			op = (*xnp)->lst_ip->bi_overflow;
2350			if (op) {
2351				reclaim = (op->ii_drefcnt == op->ii_detachcnt?
2352				    NO_RECLAIM : RECLAIM);
2353				--op->ii_detachcnt;
2354			}
2355
2356			/* clear out the group pointer */
2357			(*xnp)->lst_ip->bi_group = NULL;
2358
2359			rc = _ii_perform_disable((*xnp)->lst_ip->bi_keyname,
2360			    &kstatus, reclaim);
2361			if (rc) {
2362				/* restore group name */
2363				(*xnp)->lst_ip->bi_group = (*head)->lst_name;
2364
2365				/* restore detachcnt */
2366				if (op) {
2367					++op->ii_detachcnt;
2368				}
2369
2370				/* don't delete branch */
2371				++rerr;
2372				spcs_s_add(kstatus, rc);
2373
2374				/* move forward in linked list */
2375				xnp = &(*xnp)->lst_next;
2376			} else {
2377				oldp = (*xnp);
2378				*xnp = (*xnp)->lst_next;
2379				kmem_free(oldp, sizeof (_ii_lstinfo_t));
2380			}
2381		}
2382		if (rerr) {
2383			mutex_exit(&_ii_group_mutex);
2384			return (spcs_s_ocopyoutf(&kstatus, uparms.status,
2385			    DSW_EDISABLE));
2386		}
2387		/* no errors, all sets disabled, OK to free list head */
2388		oldhead = *head;
2389		*head = (*head)->lst_next;
2390		kmem_free(oldhead, sizeof (_ii_lsthead_t));
2391		mutex_exit(&_ii_group_mutex);
2392	} else {
2393		/* only a single set is being disabled */
2394		rc = _ii_perform_disable(uparms.shadow_vol, &kstatus, -1);
2395		if (rc)
2396			return (spcs_s_ocopyoutf(&kstatus, uparms.status, rc));
2397	}
2398
2399	spcs_s_kfree(kstatus);
2400
2401	return (0);
2402}
2403
2404
2405/*
2406 * _ii_stat
2407 *	Get state of the shadow.
2408 *
2409 * Calling/Exit State:
2410 *	Returns 0 on success, otherwise an error code is returned
2411 *	and any additional error information is copied out to the user.
2412 *	The size variable in the dsw_stat_t is set to the FBA size
2413 *	of the volume, the stat variable is set to the state, and
2414 *	the structure is copied out.
2415 */
2416/*ARGSUSED*/
2417int
2418_ii_stat(intptr_t arg, int ilp32, int *rvp)
2419{
2420	dsw_stat_t ustat;
2421	dsw_stat32_t ustat32;
2422	_ii_info_t *ip;
2423	spcs_s_info_t kstatus;
2424	char *group, *cluster;
2425
2426	if (ilp32) {
2427		if (copyin((void *)arg, &ustat32, sizeof (ustat32)) < 0)
2428			return (EFAULT);
2429		II_TAIL_COPY(ustat, ustat32, shadow_vol, dsw_stat_t);
2430		ustat.status = (spcs_s_info_t)ustat32.status;
2431	} else if (copyin((void *)arg, &ustat, sizeof (ustat)) < 0)
2432		return (EFAULT);
2433
2434	kstatus = spcs_s_kcreate();
2435	if (kstatus == NULL)
2436		return (ENOMEM);
2437
2438	if (!ustat.shadow_vol[0])
2439		return (spcs_s_ocopyoutf(&kstatus, ustat.status, DSW_EEMPTY));
2440
2441	mutex_enter(&_ii_info_mutex);
2442	ip = _ii_find_set(ustat.shadow_vol);
2443	mutex_exit(&_ii_info_mutex);
2444	if (ip == NULL)
2445		return (spcs_s_ocopyoutf(&kstatus, ustat.status,
2446		    DSW_ENOTFOUND));
2447
2448	ustat.stat = ip->bi_flags;
2449	ustat.size = ip->bi_size;
2450	ustat.mtime = ip->bi_mtime;
2451
2452	if (ilp32)
2453		bzero(ustat32.overflow_vol, DSW_NAMELEN);
2454	else
2455		bzero(ustat.overflow_vol, DSW_NAMELEN);
2456	if (ip->bi_overflow) {
2457		(void) strncpy(ilp32 ? ustat32.overflow_vol :
2458		    ustat.overflow_vol, ip->bi_overflow->ii_volname,
2459		    DSW_NAMELEN);
2460	}
2461
2462	ustat.shdsize = ip->bi_shdchks;
2463	if ((ip->bi_flags) & DSW_TREEMAP) {
2464		ustat.shdused = ip->bi_shdchkused;
2465	} else {
2466		ustat.shdused = 0;
2467	}
2468
2469	/* copy over group and cluster associations */
2470	group = ilp32? ustat32.group_name : ustat.group_name;
2471	cluster = ilp32? ustat32.cluster_tag : ustat.cluster_tag;
2472	bzero(group, DSW_NAMELEN);
2473	bzero(cluster, DSW_NAMELEN);
2474	if (ip->bi_group)
2475		(void) strncpy(group, ip->bi_group, DSW_NAMELEN);
2476	if (ip->bi_cluster)
2477		(void) strncpy(cluster, ip->bi_cluster, DSW_NAMELEN);
2478
2479	_ii_ioctl_done(ip);
2480	mutex_exit(&ip->bi_mutex);
2481
2482	spcs_s_kfree(kstatus);
2483	if (ilp32) {
2484		ustat32.stat = ustat.stat;
2485		ustat32.size = ustat.size;
2486		ustat32.shdsize = ustat.shdsize;
2487		ustat32.shdused = ustat.shdused;
2488		ustat32.mtime = ustat.mtime;
2489		if (copyout(&ustat32, (void *)arg, sizeof (ustat32)))
2490			return (EFAULT);
2491	} else if (copyout(&ustat, (void *)arg, sizeof (ustat)))
2492		return (EFAULT);
2493
2494	return (0);
2495}
2496
2497
2498/*
2499 * _ii_list
2500 *	List what shadow sets are currently configured.
2501 *
2502 * Calling/Exit State:
2503 *	Returns 0 on success, otherwise an error code is returned
2504 *	and any additional error information is copied out to the user.
2505 */
2506/*ARGSUSED*/
2507int
2508_ii_list(intptr_t arg, int ilp32, int *rvp)
2509{
2510	dsw_list_t ulist;
2511	dsw_list32_t ulist32;
2512	_ii_info_t *ip;
2513	dsw_config_t cf, *cfp;
2514	dsw_config32_t cf32, *cf32p;
2515	int rc;
2516	int used;
2517	spcs_s_info_t kstatus;
2518
2519	if (ilp32) {
2520		if (copyin((void *)arg, &ulist32, sizeof (ulist32)) < 0)
2521			return (EFAULT);
2522		II_TAIL_COPY(ulist, ulist32, list_size, dsw_list_t);
2523		ulist.status = (spcs_s_info_t)ulist32.status;
2524	} else if (copyin((void *)arg, &ulist, sizeof (ulist)) < 0)
2525		return (EFAULT);
2526
2527	kstatus = spcs_s_kcreate();
2528	if (kstatus == NULL)
2529		return (ENOMEM);
2530
2531	cf32p = (dsw_config32_t *)(unsigned long)ulist32.list;
2532	cfp = ulist.list;
2533	ulist.list_used = 0;
2534	mutex_enter(&_ii_info_mutex);
2535	ip = _ii_info_top;
2536
2537	DTRACE_PROBE1(_ii_list_count, int, ulist.list_size);
2538
2539	for (rc = used = 0; used < ulist.list_size && ip; ip = ip->bi_next) {
2540
2541		if (ip->bi_disabled)
2542			continue;
2543
2544		mutex_enter(&ip->bi_mutex);
2545		ip->bi_ioctl++;
2546		if (ilp32) {
2547			bzero(&cf32, sizeof (cf32));
2548			cf32.flag = ip->bi_flags;
2549			(void) strncpy(cf32.master_vol,
2550			    ii_pathname(ip->bi_mstfd), DSW_NAMELEN);
2551			(void) strncpy(cf32.shadow_vol,
2552			    ip->bi_keyname, DSW_NAMELEN);
2553			(void) strncpy(cf32.bitmap_vol, (ip->bi_bmpfd)
2554			    ? ii_pathname(ip->bi_bmpfd)
2555			    : "<offline_bitmap>", DSW_NAMELEN);
2556			if (copyout(&cf32, (void *)cf32p, sizeof (cf32)))
2557				rc = EFAULT;
2558			cf32p++;
2559		} else {
2560			bzero(&cf, sizeof (cf));
2561			cf.flag = ip->bi_flags;
2562			(void) strncpy(cf.master_vol,
2563			    ii_pathname(ip->bi_mstfd), DSW_NAMELEN);
2564			(void) strncpy(cf.shadow_vol,
2565			    ip->bi_keyname, DSW_NAMELEN);
2566			(void) strncpy(cf.bitmap_vol, (ip->bi_bmpfd)
2567			    ? ii_pathname(ip->bi_bmpfd)
2568			    : "<offline_bitmap>", DSW_NAMELEN);
2569			if (copyout(&cf, (void *)cfp, sizeof (cf)))
2570				rc = EFAULT;
2571			cfp++;
2572		}
2573		_ii_ioctl_done(ip);
2574		mutex_exit(&ip->bi_mutex);
2575		used++;
2576	}
2577	mutex_exit(&_ii_info_mutex);
2578
2579	spcs_s_kfree(kstatus);
2580	if (rc)
2581		return (rc);
2582
2583	ulist.list_used = used;
2584	if (ilp32) {
2585		ulist32.list_used = ulist.list_used;
2586		if (copyout(&ulist32, (void *)arg, sizeof (ulist32)))
2587			return (EFAULT);
2588	} else if (copyout(&ulist, (void *)arg, sizeof (ulist)))
2589		return (EFAULT);
2590
2591	return (0);
2592}
2593
2594/*
2595 * _ii_listlen
2596 *	Counts the number of items the DSWIOC_LIST and DSWIOC_OLIST
2597 *	ioctl calls would return.
2598 *
2599 * Calling/Exit State:
2600 *	Returns 0 on success, otherwise an error code is returned.
2601 *	Result is returned as successful ioctl value.
2602 */
2603/*ARGSUSED*/
2604int
2605_ii_listlen(int cmd, int ilp32, int *rvp)
2606{
2607	_ii_info_t *ip;
2608	_ii_overflow_t *op;
2609	int count = 0;
2610
2611	switch (cmd) {
2612
2613	case DSWIOC_LISTLEN:
2614		mutex_enter(&_ii_info_mutex);
2615		for (ip = _ii_info_top; ip; ip = ip->bi_next) {
2616			if (ip->bi_disabled == 0) {
2617				count++;
2618			}
2619		}
2620		mutex_exit(&_ii_info_mutex);
2621		break;
2622	case DSWIOC_OLISTLEN:
2623		mutex_enter(&_ii_overflow_mutex);
2624		for (op = _ii_overflow_top; op; op = op->ii_next)
2625			count++;
2626		mutex_exit(&_ii_overflow_mutex);
2627		break;
2628	default:
2629		return (EINVAL);
2630	}
2631	*rvp = count;
2632
2633	return (0);
2634}
2635
2636/*
2637 * _ii_report_bmp
2638 *
2639 *	Report to the user daemon that the bitmap has gone bad
2640 */
2641static int
2642_ii_report_bmp(_ii_info_t *ip)
2643{
2644	int rc;
2645	struct nskernd *nsk;
2646
2647	nsk = kmem_zalloc(sizeof (*nsk), KM_SLEEP);
2648	if (!nsk) {
2649		return (ENOMEM);
2650	}
2651	nsk->command = NSKERND_IIBITMAP;
2652	nsk->data1 = (int64_t)(ip->bi_flags | DSW_BMPOFFLINE);
2653	(void) strncpy(nsk->char1, ip->bi_keyname,
2654	    min(DSW_NAMELEN, NSC_MAXPATH));
2655
2656	rc = nskernd_get(nsk);
2657	if (rc == 0) {
2658		rc = (int)nsk->data1;
2659	}
2660	if (rc == 0) {
2661		DTRACE_PROBE(_ii_report_bmp_end);
2662	} else {
2663		DTRACE_PROBE1(_ii_report_bmp_end_2, int, rc);
2664	}
2665	kmem_free(nsk, sizeof (*nsk));
2666	return (rc);
2667}
2668
2669/*
2670 * _ii_offline
2671 *	Set volume offline flag(s) for a shadow.
2672 *
2673 * Calling/Exit State:
2674 *	Returns 0 on success, otherwise an error code is returned
2675 *	and any additional error information is copied out to the user.
2676 */
2677/*ARGSUSED*/
2678int
2679_ii_offline(intptr_t arg, int ilp32, int *rvp)
2680{
2681	dsw_ioctl_t uparms;
2682	dsw_ioctl32_t uparms32;
2683	_ii_info_t *ip;
2684	int rc;
2685	spcs_s_info_t kstatus;
2686
2687	if (ilp32) {
2688		if (copyin((void *)arg, &uparms32, sizeof (uparms32)) < 0)
2689			return (EFAULT);
2690		II_TAIL_COPY(uparms, uparms32, shadow_vol, dsw_ioctl_t);
2691		uparms.status = (spcs_s_info_t)uparms32.status;
2692	} else if (copyin((void *)arg, &uparms, sizeof (uparms)) < 0)
2693		return (EFAULT);
2694
2695	kstatus = spcs_s_kcreate();
2696	if (kstatus == NULL)
2697		return (ENOMEM);
2698
2699	if (!uparms.shadow_vol[0])
2700		return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EEMPTY));
2701
2702	mutex_enter(&_ii_info_mutex);
2703	ip = _ii_find_set(uparms.shadow_vol);
2704	mutex_exit(&_ii_info_mutex);
2705	if (ip == NULL)
2706		return (spcs_s_ocopyoutf(&kstatus, uparms.status,
2707		    DSW_ENOTFOUND));
2708
2709	if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0) {
2710		_ii_ioctl_done(ip);
2711		mutex_exit(&ip->bi_mutex);
2712		spcs_s_add(kstatus, rc);
2713		return (spcs_s_ocopyoutf(&kstatus, uparms.status,
2714		    DSW_ERSRVFAIL));
2715	}
2716
2717	mutex_exit(&ip->bi_mutex);
2718	_ii_error(ip, uparms.flags & DSW_OFFLINE);
2719	mutex_enter(&ip->bi_mutex);
2720	_ii_ioctl_done(ip);
2721	mutex_exit(&ip->bi_mutex);
2722
2723	_ii_rlse_devs(ip, BMP);
2724
2725	spcs_s_kfree(kstatus);
2726
2727	return (0);
2728}
2729
2730
2731/*
2732 * _ii_wait
2733 *	Wait for a copy to complete.
2734 *
2735 * Calling/Exit State:
2736 *	Returns 0 if the copy completed, otherwise error code.
2737 *
2738 */
2739/*ARGSUSED*/
2740int
2741_ii_wait(intptr_t arg, int ilp32, int *rvp)
2742{
2743	dsw_ioctl_t uparms;
2744	dsw_ioctl32_t uparms32;
2745	_ii_info_t *ip;
2746	int rc = 0;
2747	spcs_s_info_t kstatus;
2748
2749	if (ilp32) {
2750		if (copyin((void *)arg, &uparms32, sizeof (uparms32)) < 0)
2751			return (EFAULT);
2752		II_TAIL_COPY(uparms, uparms32, shadow_vol, dsw_ioctl_t);
2753		uparms.status = (spcs_s_info_t)uparms32.status;
2754		uparms.pid = uparms32.pid;
2755	} else if (copyin((void *)arg, &uparms, sizeof (uparms)) < 0)
2756		return (EFAULT);
2757
2758	kstatus = spcs_s_kcreate();
2759	if (kstatus == NULL)
2760		return (ENOMEM);
2761
2762	if (!uparms.shadow_vol[0])
2763		return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EEMPTY));
2764
2765	mutex_enter(&_ii_info_mutex);
2766	ip = _ii_find_set(uparms.shadow_vol);
2767	mutex_exit(&_ii_info_mutex);
2768	if (ip == NULL)
2769		return (spcs_s_ocopyoutf(&kstatus, uparms.status,
2770		    DSW_ENOTFOUND));
2771
2772	while (ip->bi_flags & DSW_COPYINGP) {
2773		if (cv_wait_sig(&ip->bi_copydonecv, &ip->bi_mutex) == 0) {
2774			/* Awoken by a signal */
2775			rc = EINTR;
2776			break;
2777		}
2778	}
2779
2780	/* Is this an attempt to unlock the copy/update PID? */
2781	if (uparms.flags & CV_LOCK_PID) {
2782		if (ip->bi_locked_pid == 0) {
2783			rc = DSW_ENOTLOCKED;
2784		} else if (uparms.pid == -1) {
2785			cmn_err(CE_WARN, "!ii: Copy/Update PID %d, cleared",
2786			    ip->bi_locked_pid);
2787			ip->bi_locked_pid = 0;
2788		} else if (uparms.pid != ip->bi_locked_pid) {
2789			rc = DSW_EINUSE;
2790		} else {
2791			ip->bi_locked_pid = 0;
2792		}
2793	}
2794
2795	_ii_ioctl_done(ip);
2796	mutex_exit(&ip->bi_mutex);
2797
2798	return (spcs_s_ocopyoutf(&kstatus, uparms.status, rc));
2799}
2800
2801
2802static int
2803_ii_reset_mstvol(_ii_info_t *ip)
2804{
2805	_ii_info_t *xip;
2806
2807	if (!NSHADOWS(ip))
2808		return (DSW_COPYINGS | DSW_COPYINGP);
2809
2810	/* check for siblings updating master */
2811	for (xip = ip->bi_head; xip; xip = xip->bi_sibling) {
2812		if (xip == ip)
2813			continue;
2814		/* check if master is okay */
2815		if ((xip->bi_flags & DSW_MSTOFFLINE) == 0) {
2816			return (0);
2817		}
2818	}
2819
2820	return (DSW_COPYINGS | DSW_COPYINGP);
2821}
2822
2823/*
2824 * _ii_reset
2825 *	Reset offlined underlying volumes
2826 *
2827 * Calling/Exit State:
2828 *	Returns 0 on success, otherwise an error code is returned
2829 *	and any additional error information is copied out to the user.
2830 */
2831/*ARGSUSED*/
2832int
2833_ii_reset(intptr_t arg, int ilp32, int *rvp)
2834{
2835	dsw_ioctl_t uparms;
2836	dsw_ioctl32_t uparms32;
2837	_ii_info_t *ip;
2838	nsc_buf_t *tmp = NULL;
2839	int rc;
2840	int flags;
2841	ii_header_t *bm_header;
2842	spcs_s_info_t kstatus;
2843	int rtype;
2844
2845	if (ilp32) {
2846		if (copyin((void *)arg, &uparms32, sizeof (uparms32)) < 0)
2847			return (EFAULT);
2848		II_TAIL_COPY(uparms, uparms32, shadow_vol, dsw_ioctl_t);
2849		uparms.status = (spcs_s_info_t)uparms32.status;
2850	} else if (copyin((void *)arg, &uparms, sizeof (uparms)) < 0)
2851		return (EFAULT);
2852
2853	kstatus = spcs_s_kcreate();
2854	if (kstatus == NULL)
2855		return (ENOMEM);
2856
2857	if (!uparms.shadow_vol[0])
2858		return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EEMPTY));
2859
2860	mutex_enter(&_ii_info_mutex);
2861	ip = _ii_find_set(uparms.shadow_vol);
2862	mutex_exit(&_ii_info_mutex);
2863	if (ip == NULL)
2864		return (spcs_s_ocopyoutf(&kstatus, uparms.status,
2865		    DSW_ENOTFOUND));
2866
2867	mutex_exit(&ip->bi_mutex);
2868
2869	/* Figure out what to do according to what was flagged as  */
2870
2871	if ((ip->bi_flags & DSW_OFFLINE) == 0) {
2872		/* Nothing offline, so no op */
2873		mutex_enter(&ip->bi_mutex);
2874		_ii_ioctl_done(ip);
2875		mutex_exit(&ip->bi_mutex);
2876		spcs_s_kfree(kstatus);
2877		return (0);
2878	}
2879
2880	if (!ip->bi_bmpfd) {
2881		/* No bitmap fd, can't do anything */
2882		mutex_enter(&ip->bi_mutex);
2883		_ii_ioctl_done(ip);
2884		mutex_exit(&ip->bi_mutex);
2885		spcs_s_kfree(kstatus);
2886		return (DSW_EHDRBMP);
2887	}
2888
2889	rtype = MSTR|SHDR|BMP;
2890	if ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0) {
2891		mutex_enter(&ip->bi_mutex);
2892		_ii_ioctl_done(ip);
2893		mutex_exit(&ip->bi_mutex);
2894		spcs_s_add(kstatus, rc);
2895		return (spcs_s_ocopyoutf(&kstatus, uparms.status,
2896		    DSW_ERSRVFAIL));
2897	}
2898
2899	/*
2900	 * Cannot use _ii_bm_header_get as it will fail if DSW_BMPOFFLINE
2901	 */
2902	II_READ_START(ip, bitmap);
2903	rc = nsc_alloc_buf(ip->bi_bmpfd, 0, FBA_LEN(sizeof (ii_header_t)),
2904	    NSC_RDWRBUF, &tmp);
2905	II_READ_END(ip, bitmap, rc, FBA_LEN(sizeof (ii_header_t)));
2906	if (!II_SUCCESS(rc)) {
2907		_ii_rlse_devs(ip, rtype);
2908		mutex_enter(&ip->bi_mutex);
2909		_ii_ioctl_done(ip);
2910		mutex_exit(&ip->bi_mutex);
2911		if (tmp)
2912			(void) nsc_free_buf(tmp);
2913		_ii_error(ip, DSW_BMPOFFLINE);
2914		spcs_s_add(kstatus, rc);
2915		return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EHDRBMP));
2916	}
2917
2918	bm_header = (ii_header_t *)(tmp)->sb_vec[0].sv_addr;
2919	if (bm_header == NULL) {
2920		_ii_rlse_devs(ip, rtype);
2921		mutex_enter(&ip->bi_mutex);
2922		_ii_ioctl_done(ip);
2923		mutex_exit(&ip->bi_mutex);
2924		if (tmp)
2925			(void) nsc_free_buf(tmp);
2926		return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EHDRBMP));
2927	}
2928
2929	flags = ip->bi_flags & ~DSW_COPY_FLAGS;
2930	if ((flags & (DSW_SHDIMPORT|DSW_SHDEXPORT)) == 0) {
2931		if (((flags & DSW_SHDOFFLINE) == 0) &&
2932		    ((flags & DSW_MSTOFFLINE) == DSW_MSTOFFLINE)) {
2933			/* Shadow was OK but master was offline */
2934			flags |= _ii_reset_mstvol(ip);
2935		} else if ((flags & DSW_SHDOFFLINE) == DSW_SHDOFFLINE) {
2936			/* Shadow was offline, don't care what the master was */
2937			flags |= (DSW_COPYINGM | DSW_COPYINGP);
2938		}
2939	}
2940	if (ip->bi_flags & DSW_VOVERFLOW) {
2941		ip->bi_flags &= ~DSW_VOVERFLOW;
2942		ip->bi_flags |= DSW_FRECLAIM;
2943	}
2944	flags &= ~(DSW_OFFLINE | DSW_CFGOFFLINE | DSW_VOVERFLOW | DSW_OVERFLOW);
2945	if ((ip->bi_flags & DSW_BMPOFFLINE) == DSW_BMPOFFLINE) {
2946		/* free any overflow allocation */
2947		ii_overflow_free(ip, INIT_OVR);
2948		/* Bitmap now OK, so set up new bitmap header */
2949		(void) strncpy(bm_header->master_vol, ii_pathname(ip->bi_mstfd),
2950		    DSW_NAMELEN);
2951		(void) strncpy(bm_header->shadow_vol, ii_pathname(ip->bi_shdfd),
2952		    DSW_NAMELEN);
2953		(void) strncpy(bm_header->bitmap_vol, ii_pathname(ip->bi_bmpfd),
2954		    DSW_NAMELEN);
2955		if (ip->bi_cluster) {
2956			(void) strncpy(bm_header->clstr_name, ip->bi_cluster,
2957			    DSW_NAMELEN);
2958		}
2959		if (ip->bi_group) {
2960			(void) strncpy(bm_header->group_name, ip->bi_group,
2961			    DSW_NAMELEN);
2962		}
2963		bm_header->ii_type = (flags & DSW_GOLDEN) ?
2964		    DSW_GOLDEN_TYPE : DSW_QUICK_TYPE;
2965		bm_header->ii_magic = DSW_DIRTY;
2966		bm_header->ii_version = II_HEADER_VERSION;
2967		bm_header->ii_shdfba = DSW_SHD_BM_OFFSET;
2968		bm_header->ii_copyfba = DSW_COPY_BM_OFFSET;
2969		bm_header->ii_throttle_delay = ip->bi_throttle_delay;
2970		bm_header->ii_throttle_unit = ip->bi_throttle_unit;
2971		ip->bi_shdfba = bm_header->ii_shdfba;
2972		ip->bi_copyfba = bm_header->ii_copyfba;
2973	} else if ((ip->bi_flags & DSW_SHDOFFLINE) == DSW_SHDOFFLINE) {
2974		/* bitmap didn't go offline, but shadow did */
2975		if (ip->bi_overflow) {
2976			ii_overflow_free(ip, RECLAIM);
2977		}
2978	}
2979	_ii_lock_chunk(ip, II_NULLCHUNK);
2980	mutex_enter(&ip->bi_mutex);
2981	II_FLAG_ASSIGN(flags, ip);
2982
2983	mutex_exit(&ip->bi_mutex);
2984	rc = ii_fill_copy_bmp(ip);
2985	if (rc == 0)
2986		rc = II_ZEROBM(ip);
2987	if (rc == 0) {
2988		if ((ip->bi_flags&(DSW_GOLDEN)) == 0) {
2989			/* just clear bitmaps for dependent copy */
2990			if (ip->bi_flags & DSW_TREEMAP) {
2991				bm_header->ii_state = ip->bi_flags;
2992				mutex_enter(&ip->bi_mutex);
2993				rc = _ii_bm_header_put(bm_header, ip, tmp);
2994				mutex_exit(&ip->bi_mutex);
2995				tmp = NULL;
2996				if (rc == 0) {
2997					rc = ii_tinit(ip);
2998					if (rc == 0) {
2999						mutex_enter(&ip->bi_mutex);
3000						bm_header =
3001						    _ii_bm_header_get(ip, &tmp);
3002						mutex_exit(&ip->bi_mutex);
3003					}
3004				}
3005			}
3006
3007			if (rc == 0)
3008				II_FLAG_CLRX(DSW_COPY_FLAGS, ip);
3009			/*
3010			 * if copy flags were set, another process may be
3011			 * waiting
3012			 */
3013			if (rc == 0 && (flags & DSW_COPYINGP))
3014				cv_broadcast(&ip->bi_copydonecv);
3015
3016			if (rc == 0)
3017				rc = II_COPYBM(ip);
3018		}
3019	}
3020	_ii_unlock_chunk(ip, II_NULLCHUNK);
3021	if (rc) {
3022		if (tmp)
3023			_ii_bm_header_free(bm_header, ip, tmp);
3024		mutex_enter(&ip->bi_mutex);
3025		_ii_ioctl_done(ip);
3026		mutex_exit(&ip->bi_mutex);
3027		_ii_rlse_devs(ip, rtype);
3028		spcs_s_add(kstatus, rc);
3029		return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EHDRBMP));
3030	}
3031	bm_header->ii_state = ip->bi_flags;
3032	mutex_enter(&ip->bi_mutex);
3033	rc = _ii_bm_header_put(bm_header, ip, tmp);
3034	if (!II_SUCCESS(rc)) {
3035		_ii_ioctl_done(ip);
3036		mutex_exit(&ip->bi_mutex);
3037		_ii_rlse_devs(ip, rtype);
3038		spcs_s_add(kstatus, rc);
3039		return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EHDRBMP));
3040	}
3041
3042	/* check with RDC */
3043	if (ii_update_denied(ip, kstatus, (ip->bi_flags & DSW_COPYINGS) ?
3044	    CV_SHD2MST : 0, 1)) {
3045		_ii_ioctl_done(ip);
3046		mutex_exit(&ip->bi_mutex);
3047		_ii_rlse_devs(ip, rtype);
3048		return (spcs_s_ocopyoutf(&kstatus, uparms.status, rc));
3049	}
3050
3051	/* don't perform copy for dependent shadows */
3052	if ((ip->bi_flags&(DSW_GOLDEN)) == 0) {
3053		_ii_ioctl_done(ip);
3054		mutex_exit(&ip->bi_mutex);
3055		_ii_rlse_devs(ip, rtype);
3056		return (spcs_s_ocopyoutf(&kstatus, uparms.status, rc));
3057	}
3058
3059	mutex_exit(&ip->bi_mutex);
3060	/* _ii_copyvol calls _ii_ioctl_done() */
3061	if (ip->bi_flags & DSW_COPYINGS)
3062		rc = _ii_copyvol(ip, CV_SHD2MST, rtype, kstatus, 1);
3063	else if (ip->bi_flags & DSW_COPYINGM)
3064		rc = _ii_copyvol(ip, 0, rtype, kstatus, 1);
3065	else {
3066		mutex_enter(&ip->bi_mutex);
3067		_ii_ioctl_done(ip);
3068		mutex_exit(&ip->bi_mutex);
3069	}
3070
3071	_ii_rlse_devs(ip, rtype);
3072
3073	return (spcs_s_ocopyoutf(&kstatus, uparms.status, rc));
3074}
3075
3076
3077/*
3078 * _ii_version
3079 *	Get version of the InstantImage module.
3080 *
3081 * Calling/Exit State:
3082 *	Returns 0 on success, otherwise EFAULT is returned.
3083 *	The major and minor revisions are copied out to the user if
3084 *	successful.
3085 */
3086/*ARGSUSED*/
3087int
3088_ii_version(intptr_t arg, int ilp32, int *rvp)
3089{
3090	dsw_version_t uversion;
3091	dsw_version32_t uversion32;
3092
3093	if (ilp32) {
3094		if (copyin((void *)arg, &uversion32, sizeof (uversion32)) < 0)
3095			return (EFAULT);
3096
3097		uversion32.major = dsw_major_rev;
3098		uversion32.minor = dsw_minor_rev;
3099		uversion32.micro = dsw_micro_rev;
3100		uversion32.baseline = dsw_baseline_rev;
3101
3102		if (copyout(&uversion32, (void *)arg, sizeof (uversion32)))
3103			return (EFAULT);
3104	} else {
3105		if (copyin((void *)arg, &uversion, sizeof (uversion)) < 0)
3106			return (EFAULT);
3107
3108		uversion.major = dsw_major_rev;
3109		uversion.minor = dsw_minor_rev;
3110		uversion.micro = dsw_micro_rev;
3111		uversion.baseline = dsw_baseline_rev;
3112
3113		if (copyout(&uversion, (void *)arg, sizeof (uversion)))
3114			return (EFAULT);
3115	}
3116
3117	return (0);
3118}
3119
3120/*
3121 * _ii_copyparm
3122 *	Get and set copy parameters.
3123 *
3124 * Calling/Exit State:
3125 *	Returns 0 on success, otherwise EFAULT is returned.
3126 *	The previous values are returned to the user.
3127 */
3128/*ARGSUSED*/
3129int
3130_ii_copyparm(intptr_t arg, int ilp32, int *rvp)
3131{
3132	dsw_copyp_t copyp;
3133	dsw_copyp32_t copyp32;
3134	spcs_s_info_t kstatus;
3135	_ii_info_t *ip;
3136	int rc = 0;
3137	int tmp;
3138
3139	if (ilp32) {
3140		if (copyin((void *)arg, &copyp32, sizeof (copyp32)) < 0)
3141			return (EFAULT);
3142		II_TAIL_COPY(copyp, copyp32, shadow_vol, dsw_copyp_t);
3143		copyp.status = (spcs_s_info_t)copyp32.status;
3144	} else if (copyin((void *)arg, &copyp, sizeof (copyp)) < 0)
3145			return (EFAULT);
3146
3147	kstatus = spcs_s_kcreate();
3148	if (kstatus == NULL)
3149		return (ENOMEM);
3150
3151	if (!copyp.shadow_vol[0])
3152		return (spcs_s_ocopyoutf(&kstatus, copyp.status, DSW_EEMPTY));
3153
3154	mutex_enter(&_ii_info_mutex);
3155	ip = _ii_find_set(copyp.shadow_vol);
3156	mutex_exit(&_ii_info_mutex);
3157	if (ip == NULL)
3158		return (spcs_s_ocopyoutf(&kstatus, copyp.status,
3159		    DSW_ENOTFOUND));
3160
3161	tmp = ip->bi_throttle_delay;
3162	if (copyp.copy_delay != -1) {
3163		if (copyp.copy_delay >= MIN_THROTTLE_DELAY &&
3164		    copyp.copy_delay <= MAX_THROTTLE_DELAY)
3165			ip->bi_throttle_delay = copyp.copy_delay;
3166		else {
3167			cmn_err(CE_WARN, "!ii: delay out of range %d",
3168			    copyp.copy_delay);
3169			rc = EINVAL;
3170		}
3171	}
3172	copyp.copy_delay = tmp;
3173
3174	tmp = ip->bi_throttle_unit;
3175	if (copyp.copy_unit != -1) {
3176		if (copyp.copy_unit >= MIN_THROTTLE_UNIT &&
3177		    copyp.copy_unit <= MAX_THROTTLE_UNIT) {
3178			if (rc != EINVAL)
3179				ip->bi_throttle_unit = copyp.copy_unit;
3180		} else {
3181			cmn_err(CE_WARN, "!ii: unit out of range %d",
3182			    copyp.copy_unit);
3183			if (rc != EINVAL) {
3184				rc = EINVAL;
3185				ip->bi_throttle_delay = copyp.copy_delay;
3186			}
3187		}
3188	}
3189	copyp.copy_unit = tmp;
3190
3191	_ii_ioctl_done(ip);
3192	mutex_exit(&ip->bi_mutex);
3193
3194	if (ilp32) {
3195		copyp32.copy_delay = copyp.copy_delay;
3196		copyp32.copy_unit = copyp.copy_unit;
3197		if (copyout(&copyp32, (void *)arg, sizeof (copyp32)) < 0)
3198			return (EFAULT);
3199	} else if (copyout(&copyp, (void *)arg, sizeof (copyp)))
3200			return (EFAULT);
3201
3202	return (spcs_s_ocopyoutf(&kstatus, copyp.status, rc));
3203}
3204
3205
3206/*
3207 * _ii_suspend_vol
3208 *	suspend an individual InstantImage group
3209 *
3210 * Calling/Exit State:
3211 *	Returns 0 on success, nonzero otherwise
3212 */
3213
3214int
3215_ii_suspend_vol(_ii_info_t *ip)
3216{
3217	_ii_info_t **xip;
3218	int copy_flag;
3219	int rc;
3220	nsc_buf_t *tmp = NULL;
3221	ii_header_t *bm_header;
3222
3223	copy_flag = ip->bi_flags & DSW_COPY_FLAGS;
3224
3225	_ii_stopvol(ip);
3226	ASSERT(total_ref(ip) == 0);
3227
3228	if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0)
3229		return (rc);
3230
3231	/* this rw_enter forces us to drain all active IO */
3232	rw_enter(&ip->bi_linkrw, RW_WRITER);
3233	rw_exit(&ip->bi_linkrw);
3234
3235	mutex_enter(&_ii_info_mutex);
3236	for (xip = &_ii_info_top; *xip; xip = &(*xip)->bi_next) {
3237		if (ip == *xip)
3238			break;
3239	}
3240	*xip = ip->bi_next;
3241	mutex_exit(&_ii_info_mutex);
3242
3243	rc = II_SAVE_BMP(ip, 1);
3244	mutex_enter(&ip->bi_mutex);
3245	if (rc == 0)
3246		bm_header = _ii_bm_header_get(ip, &tmp);
3247	if (rc == 0 && bm_header) {
3248		bm_header->ii_magic = DSW_CLEAN;
3249		bm_header->ii_state |= copy_flag;
3250		bm_header->ii_throttle_delay = ip->bi_throttle_delay;
3251		bm_header->ii_throttle_unit = ip->bi_throttle_unit;
3252		/* copy over the mtime */
3253		bm_header->ii_mtime = ip->bi_mtime;
3254		/* write it to disk */
3255		rc = _ii_bm_header_put(bm_header, ip, tmp);
3256	}
3257	--iigkstat.num_sets.value.ul;
3258	mutex_exit(&ip->bi_mutex);
3259
3260	ii_overflow_free(ip, NO_RECLAIM);
3261	_ii_rlse_devs(ip, BMP);
3262
3263	ii_sibling_free(ip);
3264
3265	return (rc);
3266}
3267
3268/*
3269 * _ii_suspend_cluster
3270 *	Cluster resource group is switching over to another node, so
3271 *	all shadowed volumes in that group are suspended.
3272 *
3273 * Returns 0 on success, or ESRCH if the name of the cluster resource
3274 * group couldn't be found.
3275 */
3276int
3277_ii_suspend_cluster(char *shadow_vol)
3278{
3279	int found, last;
3280	uint64_t hash;
3281	_ii_info_t *ip;
3282	_ii_lsthead_t **cp, *xcp;
3283	_ii_lstinfo_t **np, *xnp;
3284
3285	/* find appropriate cluster list */
3286	mutex_enter(&_ii_cluster_mutex);
3287	hash = nsc_strhash(shadow_vol);
3288	for (cp = &_ii_cluster_top; *cp; cp = &((*cp)->lst_next)) {
3289		if ((hash == (*cp)->lst_hash) && strncmp(shadow_vol,
3290		    (*cp)->lst_name, DSW_NAMELEN) == 0)
3291			break;
3292	}
3293
3294	if (!*cp) {
3295		mutex_exit(&_ii_cluster_mutex);
3296		return (DSW_ECNOTFOUND);
3297	}
3298
3299	found = 1;
3300	last = 0;
3301	while (found && !last) {
3302		found = 0;
3303
3304		mutex_enter(&_ii_info_mutex);
3305		for (np = &(*cp)->lst_start; *np; np = &((*np)->lst_next)) {
3306			ip = (*np)->lst_ip;
3307
3308			if (ip->bi_disabled)
3309				continue;
3310
3311			found++;
3312
3313			ip->bi_disabled = 1;
3314			if (NSHADOWS(ip) && (ip->bi_master == ip)) {
3315				ip->bi_flags &= (~DSW_COPYING);
3316				ip->bi_state |= DSW_MULTIMST;
3317			}
3318			mutex_exit(&_ii_info_mutex);
3319
3320			xnp = *np;
3321			*np = (*np)->lst_next;
3322			kmem_free(xnp, sizeof (_ii_lstinfo_t));
3323			ip->bi_cluster = NULL;
3324
3325			(void) _ii_suspend_vol(ip);
3326			break;
3327		}
3328		if (found == 0)
3329			mutex_exit(&_ii_info_mutex);
3330		else if (!(*cp)->lst_start) {
3331			xcp = *cp;
3332			*cp = (*cp)->lst_next;
3333			kmem_free(xcp, sizeof (_ii_lsthead_t));
3334			last = 1;
3335		}
3336	}
3337	mutex_exit(&_ii_cluster_mutex);
3338
3339	return (0);
3340}
3341
3342/*
3343 * _ii_shutdown
3344 *	System is shutting down, so all shadowed volumes are suspended.
3345 *
3346 *	This always succeeds, so always returns 0.
3347 */
3348
3349/* ARGSUSED */
3350
3351int
3352_ii_shutdown(intptr_t arg, int *rvp)
3353{
3354	_ii_info_t **xip, *ip;
3355	int found;
3356
3357	*rvp = 0;
3358
3359	_ii_shutting_down = 1;
3360
3361	/* Go through the list until only disabled entries are found */
3362
3363	found = 1;
3364	while (found) {
3365		found = 0;
3366
3367		mutex_enter(&_ii_info_mutex);
3368		for (xip = &_ii_info_top; *xip; xip = &(*xip)->bi_next) {
3369			ip = *xip;
3370			if (ip->bi_disabled) {
3371				/* Also covers not fully configured yet */
3372				continue;
3373			}
3374			found++;
3375
3376			ip->bi_disabled = 1;
3377			mutex_exit(&_ii_info_mutex);
3378
3379			(void) _ii_suspend_vol(ip);
3380
3381			break;
3382		}
3383		if (found == 0)
3384			mutex_exit(&_ii_info_mutex);
3385	}
3386
3387	_ii_shutting_down = 0;
3388
3389	return (0);
3390}
3391
3392/*
3393 * _ii_suspend
3394 *	Suspend an InstantImage, saving its state to allow a subsequent resume.
3395 *
3396 * Calling/Exit State:
3397 *	Returns 0 if the pair was suspended. Otherwise an error code
3398 *	is returned and any additional error information is copied
3399 *	out to the user.
3400 */
3401
3402/* ARGSUSED */
3403
3404int
3405_ii_suspend(intptr_t arg, int ilp32, int *rvp)
3406{
3407	dsw_ioctl_t uparms;
3408	dsw_ioctl32_t uparms32;
3409	_ii_info_t *ip;
3410	int rc;
3411	spcs_s_info_t kstatus;
3412
3413	*rvp = 0;
3414
3415	if (ilp32) {
3416		if (copyin((void *)arg, &uparms32, sizeof (uparms32)) < 0)
3417			return (EFAULT);
3418		II_TAIL_COPY(uparms, uparms32, shadow_vol, dsw_ioctl_t);
3419		uparms.status = (spcs_s_info_t)uparms32.status;
3420	} else if (copyin((void *)arg, &uparms, sizeof (uparms)) < 0)
3421		return (EFAULT);
3422
3423	kstatus = spcs_s_kcreate();
3424	if (kstatus == NULL)
3425		return (ENOMEM);
3426
3427	if (!uparms.shadow_vol[0])
3428		return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EEMPTY));
3429
3430	if ((uparms.flags & CV_IS_CLUSTER) != 0) {
3431		rc = _ii_suspend_cluster(uparms.shadow_vol);
3432	} else {
3433		mutex_enter(&_ii_info_mutex);
3434		ip = _ii_find_set(uparms.shadow_vol);
3435		if (ip == NULL) {
3436			mutex_exit(&_ii_info_mutex);
3437			return (spcs_s_ocopyoutf(&kstatus, uparms.status,
3438			    DSW_ENOTFOUND));
3439		}
3440
3441		ip->bi_disabled = 1;
3442		if (NSHADOWS(ip) && (ip->bi_master == ip)) {
3443			ip->bi_flags &= (~DSW_COPYING);
3444			ip->bi_state |= DSW_MULTIMST;
3445		}
3446		mutex_exit(&_ii_info_mutex);
3447
3448		_ii_ioctl_done(ip);
3449		mutex_exit(&ip->bi_mutex);
3450
3451		rc = _ii_suspend_vol(ip);
3452	}
3453
3454	return (spcs_s_ocopyoutf(&kstatus, uparms.status, rc));
3455}
3456
3457
3458/*
3459 * _ii_abort
3460 *	Stop any copying process for shadow.
3461 *
3462 * Calling/Exit State:
3463 *	Returns 0 if the abort succeeded. Otherwise an error code
3464 *	is returned and any additional error information is copied
3465 *	out to the user.
3466 */
3467
3468/* ARGSUSED */
3469
3470int
3471_ii_abort(intptr_t arg, int ilp32, int *rvp)
3472{
3473	dsw_ioctl_t uabort;
3474	dsw_ioctl32_t uabort32;
3475	_ii_info_t *ip;
3476	int rc;
3477	spcs_s_info_t kstatus;
3478
3479	if (ilp32) {
3480		if (copyin((void *)arg, &uabort32, sizeof (uabort32)) < 0)
3481			return (EFAULT);
3482		II_TAIL_COPY(uabort, uabort32, shadow_vol, dsw_ioctl_t);
3483		uabort.status = (spcs_s_info_t)uabort32.status;
3484	} else if (copyin((void *)arg, &uabort, sizeof (uabort)) < 0)
3485		return (EFAULT);
3486
3487	kstatus = spcs_s_kcreate();
3488	if (kstatus == NULL)
3489		return (ENOMEM);
3490
3491	if (!uabort.shadow_vol[0])
3492		return (spcs_s_ocopyoutf(&kstatus, uabort.status, DSW_EEMPTY));
3493
3494	mutex_enter(&_ii_info_mutex);
3495	ip = _ii_find_set(uabort.shadow_vol);
3496	mutex_exit(&_ii_info_mutex);
3497	if (ip == NULL)
3498		return (spcs_s_ocopyoutf(&kstatus, uabort.status,
3499		    DSW_ENOTFOUND));
3500
3501	mutex_exit(&ip->bi_mutex);
3502
3503	rc = _ii_stopcopy(ip);
3504
3505	mutex_enter(&ip->bi_mutex);
3506	_ii_ioctl_done(ip);
3507	mutex_exit(&ip->bi_mutex);
3508
3509	return (spcs_s_ocopyoutf(&kstatus, uabort.status, rc));
3510}
3511
3512
3513/*
3514 * _ii_segment
3515 *	Copy out II pair bitmaps (cpy, shd, idx) in segments
3516 *
3517 * Calling/Exit State:
3518 *	Returns 0 if the operation succeeded. Otherwise an error code
3519 *	is returned and any additional error information is copied
3520 *	out to the user.
3521 *
3522 */
3523int
3524_ii_segment(intptr_t arg, int ilp32, int *rvp)
3525{
3526	dsw_segment_t usegment;
3527	dsw_segment32_t usegment32;
3528	_ii_info_t *ip;
3529	int rc, size;
3530	spcs_s_info_t kstatus;
3531	int32_t bi_idxfba;
3532
3533	*rvp = 0;
3534
3535	if (ilp32) {
3536		if (copyin((void *)arg, &usegment32, sizeof (usegment32)))
3537			return (EFAULT);
3538		usegment.status = (spcs_s_info_t)usegment32.status;
3539		bcopy(usegment32.shadow_vol, usegment.shadow_vol, DSW_NAMELEN);
3540		usegment.seg_number = (unsigned)usegment32.seg_number;
3541		usegment.shd_bitmap =
3542		    (unsigned char   *)(unsigned long)usegment32.shd_bitmap;
3543		usegment.shd_size = usegment32.shd_size;
3544		usegment.cpy_bitmap =
3545		    (unsigned char   *)(unsigned long)usegment32.cpy_bitmap;
3546		usegment.cpy_size = usegment32.cpy_size;
3547		usegment.idx_bitmap =
3548		    (unsigned char   *)(unsigned long)usegment32.idx_bitmap;
3549		usegment.idx_size = usegment32.idx_size;
3550	} else if (copyin((void *)arg, &usegment, sizeof (usegment)))
3551		return (EFAULT);
3552
3553	kstatus = spcs_s_kcreate();
3554	if (kstatus == NULL)
3555		return (ENOMEM);
3556
3557	if (usegment.shadow_vol[0]) {
3558		mutex_enter(&_ii_info_mutex);
3559		ip = _ii_find_set(usegment.shadow_vol);
3560		mutex_exit(&_ii_info_mutex);
3561		if (ip == NULL)
3562			return (spcs_s_ocopyoutf(&kstatus, usegment.status,
3563			    DSW_ENOTFOUND));
3564	} else
3565		return (spcs_s_ocopyoutf(&kstatus, usegment.status,
3566		    DSW_EEMPTY));
3567
3568	mutex_exit(&ip->bi_mutex);
3569
3570	size = ((((ip->bi_size + (DSW_SIZE-1))
3571	    / DSW_SIZE) + (DSW_BITS-1))) / DSW_BITS;
3572	bi_idxfba = ip->bi_copyfba + (ip->bi_copyfba - ip->bi_shdfba);
3573	if (((nsc_size_t)usegment.seg_number > DSW_BM_FBA_LEN(ip->bi_size)) ||
3574	    (usegment.shd_size > size) ||
3575	    (usegment.cpy_size > size) ||
3576	    (!(ip->bi_flags & DSW_GOLDEN) && (usegment.idx_size > size*32))) {
3577		_ii_ioctl_done(ip);
3578		return (spcs_s_ocopyoutf(&kstatus, usegment.status,
3579		    DSW_EMISMATCH));
3580	}
3581
3582	if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0) {
3583		mutex_enter(&ip->bi_mutex);
3584		_ii_ioctl_done(ip);
3585		mutex_exit(&ip->bi_mutex);
3586		spcs_s_add(kstatus, rc);
3587		return (spcs_s_ocopyoutf(&kstatus, usegment.status,
3588		    DSW_ERSRVFAIL));
3589	}
3590
3591	if (usegment.shd_bitmap && usegment.shd_size > 0)
3592		rc = II_CO_BMP(ip, ip->bi_shdfba+usegment.seg_number,
3593		    usegment.shd_bitmap, usegment.shd_size);
3594	if (rc == 0 && usegment.cpy_bitmap && usegment.cpy_size > 0)
3595		rc = II_CO_BMP(ip, ip->bi_copyfba+usegment.seg_number,
3596		    usegment.cpy_bitmap, usegment.cpy_size);
3597	if (!(ip->bi_flags & DSW_GOLDEN)) {
3598		if (rc == 0 && usegment.idx_bitmap && usegment.idx_size > 0)
3599			rc = II_CO_BMP(ip, bi_idxfba+usegment.seg_number*32,
3600			    usegment.idx_bitmap, usegment.idx_size);
3601	}
3602
3603	_ii_rlse_devs(ip, BMP);
3604	mutex_enter(&ip->bi_mutex);
3605	_ii_ioctl_done(ip);
3606	mutex_exit(&ip->bi_mutex);
3607	if (rc) {
3608		spcs_s_add(kstatus, rc);
3609		return (spcs_s_ocopyoutf(&kstatus, usegment.status, DSW_EIO));
3610	}
3611
3612	spcs_s_kfree(kstatus);
3613	return (0);
3614}
3615
3616
3617/*
3618 * _ii_bitmap
3619 *	Copy out II pair bitmaps to user program
3620 *
3621 * Calling/Exit State:
3622 *	Returns 0 if the operation succeeded. Otherwise an error code
3623 *	is returned and any additional error information is copied
3624 *	out to the user.
3625 */
3626
3627int
3628_ii_bitmap(intptr_t arg, int ilp32, int *rvp)
3629{
3630	dsw_bitmap_t ubitmap;
3631	dsw_bitmap32_t ubitmap32;
3632	_ii_info_t *ip;
3633	int rc;
3634	spcs_s_info_t kstatus;
3635
3636	*rvp = 0;
3637
3638	if (ilp32) {
3639		if (copyin((void *)arg, &ubitmap32, sizeof (ubitmap32)))
3640			return (EFAULT);
3641		ubitmap.status = (spcs_s_info_t)ubitmap32.status;
3642		bcopy(ubitmap32.shadow_vol, ubitmap.shadow_vol, DSW_NAMELEN);
3643		ubitmap.shd_bitmap =
3644		    (unsigned char   *)(unsigned long)ubitmap32.shd_bitmap;
3645		ubitmap.shd_size = ubitmap32.shd_size;
3646		ubitmap.copy_bitmap =
3647		    (unsigned char   *)(unsigned long)ubitmap32.copy_bitmap;
3648		ubitmap.copy_size = ubitmap32.copy_size;
3649	} else if (copyin((void *)arg, &ubitmap, sizeof (ubitmap)))
3650		return (EFAULT);
3651
3652	kstatus = spcs_s_kcreate();
3653	if (kstatus == NULL)
3654		return (ENOMEM);
3655
3656	if (!ubitmap.shadow_vol[0])
3657		return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EEMPTY));
3658
3659	mutex_enter(&_ii_info_mutex);
3660	ip = _ii_find_set(ubitmap.shadow_vol);
3661	mutex_exit(&_ii_info_mutex);
3662	if (ip == NULL)
3663		return (spcs_s_ocopyoutf(&kstatus, ubitmap.status,
3664		    DSW_ENOTFOUND));
3665
3666	mutex_exit(&ip->bi_mutex);
3667
3668	if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0) {
3669		mutex_enter(&ip->bi_mutex);
3670		_ii_ioctl_done(ip);
3671		mutex_exit(&ip->bi_mutex);
3672		spcs_s_add(kstatus, rc);
3673		return (spcs_s_ocopyoutf(&kstatus, ubitmap.status,
3674		    DSW_ERSRVFAIL));
3675	}
3676
3677	if (ubitmap.shd_bitmap && ubitmap.shd_size > 0)
3678		rc = II_CO_BMP(ip, ip->bi_shdfba, ubitmap.shd_bitmap,
3679		    ubitmap.shd_size);
3680	if (rc == 0 && ubitmap.copy_bitmap && ubitmap.copy_size > 0)
3681		rc = II_CO_BMP(ip, ip->bi_copyfba, ubitmap.copy_bitmap,
3682		    ubitmap.copy_size);
3683	_ii_rlse_devs(ip, BMP);
3684	mutex_enter(&ip->bi_mutex);
3685	_ii_ioctl_done(ip);
3686	mutex_exit(&ip->bi_mutex);
3687	if (rc) {
3688		spcs_s_add(kstatus, rc);
3689		return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EIO));
3690	}
3691
3692	spcs_s_kfree(kstatus);
3693
3694	return (0);
3695}
3696
3697/*
3698 * _ii_export
3699 *	Exports the shadow volume
3700 *
3701 * Calling/Exit State:
3702 *	Returns 0 if the shadow was exported. Otherwise an error code
3703 *	is returned and any additional error information is copied
3704 *	out to the user.
3705 *
3706 * Description:
3707 */
3708
3709int
3710_ii_export(intptr_t arg, int ilp32, int *rvp)
3711{
3712	dsw_ioctl_t uparms;
3713	dsw_ioctl32_t uparms32;
3714	_ii_info_t *ip;
3715	nsc_fd_t *fd;
3716	int rc = 0;
3717	spcs_s_info_t kstatus;
3718
3719	*rvp = 0;
3720
3721	if (ilp32) {
3722		if (copyin((void *)arg, &uparms32, sizeof (uparms32)) < 0)
3723			return (EFAULT);
3724		II_TAIL_COPY(uparms, uparms32, shadow_vol, dsw_ioctl_t);
3725		uparms.status = (spcs_s_info_t)uparms32.status;
3726	} else if (copyin((void *)arg, &uparms, sizeof (uparms)) < 0)
3727		return (EFAULT);
3728
3729	kstatus = spcs_s_kcreate();
3730	if (kstatus == NULL)
3731		return (ENOMEM);
3732
3733	if (!uparms.shadow_vol[0])
3734		return (spcs_s_ocopyoutf(&kstatus, uparms.status, DSW_EEMPTY));
3735
3736	mutex_enter(&_ii_info_mutex);
3737	ip = _ii_find_set(uparms.shadow_vol);
3738	mutex_exit(&_ii_info_mutex);
3739	if (ip == NULL)
3740		return (spcs_s_ocopyoutf(&kstatus, uparms.status,
3741		    DSW_ENOTFOUND));
3742
3743	if ((ip->bi_flags & DSW_GOLDEN) == 0 ||
3744	    ((ip->bi_flags & (DSW_COPYING|DSW_SHDEXPORT|DSW_SHDIMPORT)) != 0)) {
3745		/*
3746		 * Cannot export a dependent copy or while still copying or
3747		 * the shadow is already in an exported state
3748		 */
3749		rc = ip->bi_flags & (DSW_SHDEXPORT|DSW_SHDIMPORT)
3750		    ? DSW_EALREADY : DSW_EDEPENDENCY;
3751		_ii_ioctl_done(ip);
3752		mutex_exit(&ip->bi_mutex);
3753		return (spcs_s_ocopyoutf(&kstatus, uparms.status, rc));
3754	}
3755	if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0) {
3756		_ii_ioctl_done(ip);
3757		mutex_exit(&ip->bi_mutex);
3758		spcs_s_add(kstatus, rc);
3759		return (spcs_s_ocopyoutf(&kstatus, uparms.status,
3760		    DSW_ERSRVFAIL));
3761	}
3762	II_FLAG_SET(DSW_SHDEXPORT, ip);
3763
3764	mutex_exit(&ip->bi_mutex);
3765
3766	/* this rw_enter forces us to drain all active IO */
3767	rw_enter(&ip->bi_linkrw, RW_WRITER);
3768	rw_exit(&ip->bi_linkrw);
3769
3770	mutex_enter(&ip->bi_mutex);
3771
3772	_ii_rlse_devs(ip, BMP);
3773
3774	/* Shut shadow volume. */
3775	if (ip->bi_shdfd) {
3776		if (ip->bi_shdrsrv) {
3777			nsc_release(ip->bi_shdfd);
3778			ip->bi_shdrsrv = NULL;
3779		}
3780		fd = ip->bi_shdfd;
3781		ip->bi_shdfd = NULL;
3782		mutex_exit(&ip->bi_mutex);
3783		(void) nsc_close(fd);
3784		mutex_enter(&ip->bi_mutex);
3785	}
3786
3787	if (ip->bi_shdrfd) {
3788		if (ip->bi_shdrrsrv) {
3789			nsc_release(ip->bi_shdrfd);
3790			ip->bi_shdrrsrv = NULL;
3791		}
3792		fd = ip->bi_shdrfd;
3793		ip->bi_shdrfd = NULL;
3794		mutex_exit(&ip->bi_mutex);
3795		(void) nsc_close(fd);
3796		mutex_enter(&ip->bi_mutex);
3797	}
3798	_ii_ioctl_done(ip);
3799	mutex_exit(&ip->bi_mutex);
3800
3801	(void) _ii_reserve_begin(ip);
3802	if (ip->bi_shd_tok) {
3803		(void) _ii_unregister_path(ip->bi_shd_tok, 0, "shadow");
3804		ip->bi_shd_tok = NULL;
3805	}
3806
3807	if (ip->bi_shdr_tok) {
3808		(void) _ii_unregister_path(ip->bi_shdr_tok, 0,
3809		    "raw shadow");
3810		ip->bi_shdr_tok = NULL;
3811	}
3812	_ii_reserve_end(ip);
3813
3814	spcs_s_kfree(kstatus);
3815
3816	return (0);
3817}
3818
3819/*
3820 * _ii_join
3821 *	Rejoins the shadow volume
3822 *
3823 * Calling/Exit State:
3824 *	Returns 0 if the shadow was exported. Otherwise an error code
3825 *	is returned and any additional error information is copied
3826 *	out to the user.
3827 *
3828 * Description:
3829 */
3830
3831int
3832_ii_join(intptr_t arg, int ilp32, int *rvp)
3833{
3834	dsw_bitmap_t ubitmap;
3835	dsw_bitmap32_t ubitmap32;
3836	_ii_info_t *ip;
3837	uint64_t bm_size;
3838	int rc = 0;
3839	int rtype = 0;
3840	spcs_s_info_t kstatus;
3841
3842	*rvp = 0;
3843
3844	if (ilp32) {
3845		if (copyin((void *)arg, &ubitmap32, sizeof (ubitmap32)) < 0)
3846			return (EFAULT);
3847		II_TAIL_COPY(ubitmap, ubitmap32, shadow_vol, dsw_bitmap_t);
3848		ubitmap.status = (spcs_s_info_t)ubitmap32.status;
3849		ubitmap.shd_bitmap =
3850		    (unsigned char   *)(unsigned long)ubitmap32.shd_bitmap;
3851		ubitmap.shd_size = ubitmap32.shd_size;
3852	} else if (copyin((void *)arg, &ubitmap, sizeof (ubitmap)) < 0)
3853		return (EFAULT);
3854
3855	kstatus = spcs_s_kcreate();
3856	if (kstatus == NULL)
3857		return (ENOMEM);
3858
3859	if (!ubitmap.shadow_vol[0])
3860		return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EEMPTY));
3861
3862	mutex_enter(&_ii_info_mutex);
3863	ip = _ii_find_set(ubitmap.shadow_vol);
3864	mutex_exit(&_ii_info_mutex);
3865	if (ip == NULL)
3866		return (spcs_s_ocopyoutf(&kstatus, ubitmap.status,
3867		    DSW_ENOTFOUND));
3868
3869	/*
3870	 * Check that group has shadow exported.
3871	 */
3872	if ((ip->bi_flags & DSW_SHDEXPORT) == 0) {
3873		/*
3874		 * Cannot join if the shadow isn't exported.
3875		 */
3876		_ii_ioctl_done(ip);
3877		mutex_exit(&ip->bi_mutex);
3878		return (spcs_s_ocopyoutf(&kstatus, ubitmap.status,
3879		    DSW_ENOTEXPORTED));
3880	}
3881	/* check bitmap is at least large enough for master volume size */
3882	bm_size = FBA_SIZE(DSW_BM_FBA_LEN(ip->bi_size));
3883	if (ubitmap.shd_size < bm_size) {
3884		/* bitmap is to small */
3885		_ii_ioctl_done(ip);
3886		mutex_exit(&ip->bi_mutex);
3887		return (spcs_s_ocopyoutf(&kstatus, ubitmap.status,
3888		    DSW_EINVALBMP));
3889	}
3890	/* read in bitmap and or with differences bitmap */
3891	rtype = BMP;
3892	if ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0) {
3893		_ii_ioctl_done(ip);
3894		mutex_exit(&ip->bi_mutex);
3895		spcs_s_add(kstatus, rc);
3896		return (spcs_s_ocopyoutf(&kstatus, ubitmap.status,
3897		    DSW_ERSRVFAIL));
3898	}
3899	rc = II_CI_BMP(ip, ip->bi_shdfba, ubitmap.shd_bitmap,
3900	    ubitmap.shd_size);
3901	/* open up shadow */
3902	if ((rc = ii_open_shadow(ip, ip->bi_keyname)) != 0) {
3903		_ii_ioctl_done(ip);
3904		mutex_exit(&ip->bi_mutex);
3905		spcs_s_add(kstatus, rc);
3906		_ii_rlse_devs(ip, rtype);
3907		return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EOPEN));
3908	}
3909	ii_register_shd(ip);
3910	if (!rc)
3911		II_FLAG_CLR(DSW_SHDEXPORT, ip);
3912	_ii_ioctl_done(ip);
3913	mutex_exit(&ip->bi_mutex);
3914	_ii_rlse_devs(ip, rtype);
3915
3916	if (rc) {
3917		spcs_s_add(kstatus, rc);
3918		return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EIO));
3919	}
3920
3921	spcs_s_kfree(kstatus);
3922
3923	return (0);
3924}
3925
3926
3927/*
3928 * _ii_ocreate
3929 *	Configures a volume suitable for use as an overflow volume.
3930 *
3931 * Calling/Exit State:
3932 *	Returns 0 if the volume was configured successfully. Otherwise
3933 *	 an error code is returned and any additional error information
3934 *	is copied out to the user.
3935 *
3936 * Description:
3937 */
3938
3939int
3940_ii_ocreate(intptr_t arg, int ilp32, int *rvp)
3941{
3942	dsw_ioctl_t uioctl;
3943	dsw_ioctl32_t uioctl32;
3944	_ii_overflow_t	ov;
3945	_ii_overflow_t	*op = &ov;
3946	int rc = 0;
3947	nsc_fd_t	*fd;
3948	nsc_iodev_t	*iodev;
3949	nsc_size_t vol_size;
3950	char *overflow_vol;
3951	spcs_s_info_t kstatus;
3952
3953	*rvp = 0;
3954
3955	if (ilp32) {
3956		if (copyin((void *)arg, &uioctl32, sizeof (uioctl32)) < 0)
3957			return (EFAULT);
3958		II_TAIL_COPY(uioctl, uioctl32, shadow_vol, dsw_ioctl_t);
3959		uioctl.status = (spcs_s_info_t)uioctl32.status;
3960	} else if (copyin((void *)arg, &uioctl, sizeof (uioctl)) < 0)
3961		return (EFAULT);
3962
3963	overflow_vol = uioctl.shadow_vol;
3964	kstatus = spcs_s_kcreate();
3965	if (kstatus == NULL)
3966		return (ENOMEM);
3967
3968	if (!overflow_vol[0])
3969		return (spcs_s_ocopyoutf(&kstatus, uioctl.status, DSW_EEMPTY));
3970
3971	if (ii_volume(overflow_vol, 0) != NONE)
3972		return (spcs_s_ocopyoutf(&kstatus, uioctl.status, DSW_EINUSE));
3973
3974	fd = nsc_open(overflow_vol,
3975	    NSC_IIR_ID|NSC_FILE|NSC_RDWR, NULL, (blind_t)&(iodev), &rc);
3976	if (!fd)
3977		fd = nsc_open(uioctl.shadow_vol,
3978		    NSC_IIR_ID|NSC_DEVICE|NSC_RDWR, NULL,
3979		    (blind_t)&(iodev), &rc);
3980	if (fd == NULL) {
3981		spcs_s_add(kstatus, rc);
3982		return (spcs_s_ocopyoutf(&kstatus, uioctl.status, DSW_EIO));
3983	}
3984	if ((rc = nsc_reserve(fd, 0)) != 0) {
3985		spcs_s_add(kstatus, rc);
3986		(void) nsc_close(fd);
3987		return (spcs_s_ocopyoutf(&kstatus, uioctl.status,
3988		    DSW_ERSRVFAIL));
3989	}
3990	/* setup magic number etc; */
3991	rc = nsc_partsize(fd, &vol_size);
3992	if (rc) {
3993		spcs_s_add(kstatus, rc);
3994		(void) nsc_close(fd);
3995		return (spcs_s_ocopyoutf(&kstatus, uioctl.status, DSW_EIO));
3996	}
3997	op->ii_hmagic = II_OMAGIC;
3998		/* take 1 off as chunk 0 contains header */
3999	op->ii_nchunks = (vol_size / DSW_SIZE) -1;
4000	op->ii_drefcnt = 0;
4001	op->ii_used = 1;			/* we have used the header */
4002	op->ii_unused = op->ii_nchunks - op->ii_used;
4003	op->ii_freehead = II_NULLNODE;
4004	op->ii_hversion = OV_HEADER_VERSION;
4005	op->ii_flags = 0;
4006	op->ii_urefcnt = 0;
4007	(void) strncpy(op->ii_volname, uioctl.shadow_vol, DSW_NAMELEN);
4008	rc = _ii_nsc_io(0, KS_NA, fd, NSC_WRBUF, II_OHEADER_FBA,
4009	    (unsigned char *)&op->ii_do, sizeof (op->ii_do));
4010	(void) nsc_release(fd);
4011	(void) nsc_close(fd);
4012	if (rc) {
4013		spcs_s_add(kstatus, rc);
4014		return (spcs_s_ocopyoutf(&kstatus, uioctl.status, DSW_EIO));
4015	}
4016
4017	spcs_s_kfree(kstatus);
4018
4019	return (0);
4020}
4021
4022
4023/*
4024 * _ii_oattach
4025 *	Attaches the volume in the "bitmap_vol" field as an overflow volume.
4026 *
4027 * Calling/Exit State:
4028 *	Returns 0 if the volume was attached. Fails if the shadow group
4029 *	is of the wrong type (eg independent) or already has an overflow
4030 *	volume attached.
4031 *
4032 * Description:
4033 */
4034
4035int
4036_ii_oattach(intptr_t arg, int ilp32, int *rvp)
4037{
4038	dsw_config_t uconfig;
4039	dsw_config32_t uconfig32;
4040	_ii_info_t *ip;
4041	int rc = 0;
4042	int rtype = 0;
4043	ii_header_t *bm_header;
4044	nsc_buf_t *tmp = NULL;
4045	spcs_s_info_t kstatus;
4046
4047	*rvp = 0;
4048
4049	if (ilp32) {
4050		if (copyin((void *)arg, &uconfig32, sizeof (uconfig32)) < 0)
4051			return (EFAULT);
4052		II_TAIL_COPY(uconfig, uconfig32, shadow_vol, dsw_config_t);
4053		uconfig.status = (spcs_s_info_t)uconfig32.status;
4054	} else if (copyin((void *)arg, &uconfig, sizeof (uconfig)) < 0)
4055		return (EFAULT);
4056
4057	kstatus = spcs_s_kcreate();
4058	if (kstatus == NULL)
4059		return (ENOMEM);
4060
4061	if (!uconfig.shadow_vol[0])
4062		return (spcs_s_ocopyoutf(&kstatus, uconfig.status, DSW_EEMPTY));
4063
4064	switch (ii_volume(uconfig.bitmap_vol, 0)) {
4065	case NONE:
4066	case OVR:
4067		break;
4068	default:
4069		return (spcs_s_ocopyoutf(&kstatus, uconfig.status, DSW_EINUSE));
4070	}
4071	mutex_enter(&_ii_info_mutex);
4072	ip = _ii_find_set(uconfig.shadow_vol);
4073	mutex_exit(&_ii_info_mutex);
4074	if (ip == NULL)
4075		return (spcs_s_ocopyoutf(&kstatus, uconfig.status,
4076		    DSW_ENOTFOUND));
4077
4078	/* check shadow doesn't already have an overflow volume */
4079	if (ip->bi_overflow) {
4080		_ii_ioctl_done(ip);
4081		mutex_exit(&ip->bi_mutex);
4082		return (spcs_s_ocopyoutf(&kstatus, uconfig.status,
4083		    DSW_EALREADY));
4084	}
4085	/* check shadow is mapped so can have an overflow */
4086	if ((ip->bi_flags&DSW_TREEMAP) == 0) {
4087		_ii_ioctl_done(ip);
4088		mutex_exit(&ip->bi_mutex);
4089		return (spcs_s_ocopyoutf(&kstatus, uconfig.status,
4090		    DSW_EWRONGTYPE));
4091	}
4092	rtype = BMP;
4093	if ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0) {
4094		_ii_ioctl_done(ip);
4095		mutex_exit(&ip->bi_mutex);
4096		spcs_s_add(kstatus, rc);
4097		return (spcs_s_ocopyoutf(&kstatus, uconfig.status,
4098		    DSW_ERSRVFAIL));
4099	}
4100	/* attach volume */
4101	if ((rc = ii_overflow_attach(ip, uconfig.bitmap_vol, 1)) != 0) {
4102		_ii_ioctl_done(ip);
4103		mutex_exit(&ip->bi_mutex);
4104		_ii_rlse_devs(ip, rtype);
4105		return (spcs_s_ocopyoutf(&kstatus, uconfig.status, rc));
4106	}
4107
4108	/* re-write header so shadow can be restarted with overflow volume */
4109
4110	bm_header = _ii_bm_header_get(ip, &tmp);
4111	if (bm_header == NULL) {
4112		/* detach volume */
4113		ii_overflow_free(ip, RECLAIM);
4114		_ii_ioctl_done(ip);
4115		mutex_exit(&ip->bi_mutex);
4116		_ii_rlse_devs(ip, rtype);
4117		return (spcs_s_ocopyoutf(&kstatus, uconfig.status,
4118		    DSW_EHDRBMP));
4119	}
4120	(void) strncpy(bm_header->overflow_vol, uconfig.bitmap_vol,
4121	    DSW_NAMELEN);
4122	(void) _ii_bm_header_put(bm_header, ip, tmp);
4123	_ii_rlse_devs(ip, rtype);
4124	_ii_ioctl_done(ip);
4125	mutex_exit(&ip->bi_mutex);
4126
4127	spcs_s_kfree(kstatus);
4128
4129	return (0);
4130}
4131
4132
4133/*
4134 * _ii_odetach
4135 *	Breaks the link with the overflow volume.
4136 *
4137 * Calling/Exit State:
4138 *	Returns 0 if the overflow volume was detached. Otherwise an error code
4139 *	is returned and any additional error information is copied
4140 *	out to the user.
4141 *
4142 * Description:
4143 */
4144
4145int
4146_ii_odetach(intptr_t arg, int ilp32, int *rvp)
4147{
4148	dsw_bitmap_t ubitmap;
4149	dsw_bitmap32_t ubitmap32;
4150	_ii_info_t *ip;
4151	int rc = 0;
4152	int rtype = 0;
4153	ii_header_t *bm_header;
4154	nsc_buf_t *tmp = NULL;
4155	spcs_s_info_t kstatus;
4156
4157	*rvp = 0;
4158
4159	if (ilp32) {
4160		if (copyin((void *)arg, &ubitmap32, sizeof (ubitmap32)) < 0)
4161			return (EFAULT);
4162		II_TAIL_COPY(ubitmap, ubitmap32, shadow_vol, dsw_bitmap_t);
4163		ubitmap.status = (spcs_s_info_t)ubitmap32.status;
4164	} else if (copyin((void *)arg, &ubitmap, sizeof (ubitmap)) < 0)
4165		return (EFAULT);
4166
4167	kstatus = spcs_s_kcreate();
4168	if (kstatus == NULL)
4169		return (ENOMEM);
4170
4171	if (!ubitmap.shadow_vol[0])
4172		return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EEMPTY));
4173
4174	mutex_enter(&_ii_info_mutex);
4175	ip = _ii_find_set(ubitmap.shadow_vol);
4176	mutex_exit(&_ii_info_mutex);
4177	if (ip == NULL)
4178		return (spcs_s_ocopyoutf(&kstatus, ubitmap.status,
4179		    DSW_ENOTFOUND));
4180
4181	if ((ip->bi_flags&DSW_VOVERFLOW) != 0) {
4182		_ii_ioctl_done(ip);
4183		mutex_exit(&ip->bi_mutex);
4184		return (spcs_s_ocopyoutf(&kstatus, ubitmap.status,
4185		    DSW_EODEPENDENCY));
4186	}
4187	rtype = BMP;
4188	if ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0) {
4189		_ii_ioctl_done(ip);
4190		mutex_exit(&ip->bi_mutex);
4191		spcs_s_add(kstatus, rc);
4192		return (spcs_s_ocopyoutf(&kstatus, ubitmap.status,
4193		    DSW_ERSRVFAIL));
4194	}
4195	ii_overflow_free(ip, RECLAIM);
4196	/* re-write header to break link with overflow volume */
4197
4198	bm_header = _ii_bm_header_get(ip, &tmp);
4199	if (bm_header == NULL) {
4200		_ii_rlse_devs(ip, rtype);
4201		_ii_ioctl_done(ip);
4202		mutex_exit(&ip->bi_mutex);
4203		return (spcs_s_ocopyoutf(&kstatus, ubitmap.status,
4204		    DSW_EHDRBMP));
4205	}
4206	bzero(bm_header->overflow_vol, DSW_NAMELEN);
4207	(void) _ii_bm_header_put(bm_header, ip, tmp);
4208
4209	_ii_rlse_devs(ip, rtype);
4210	_ii_ioctl_done(ip);
4211
4212	mutex_exit(&ip->bi_mutex);
4213	if (rc) {
4214		spcs_s_add(kstatus, rc);
4215		return (spcs_s_ocopyoutf(&kstatus, ubitmap.status, DSW_EIO));
4216	}
4217
4218	spcs_s_kfree(kstatus);
4219
4220	--iigkstat.assoc_over.value.ul;
4221
4222	return (0);
4223}
4224
4225
4226/*
4227 * _ii_gc_list
4228 *	Returns a list of all lists, or all entries in a list
4229 *
4230 */
4231int
4232_ii_gc_list(intptr_t arg, int ilp32, int *rvp, kmutex_t *mutex,
4233    _ii_lsthead_t *lst)
4234{
4235	dsw_aioctl_t ulist;
4236	dsw_aioctl32_t ulist32;
4237	int name_offset, i;
4238	spcs_s_info_t kstatus;
4239	char *carg = (char *)arg;
4240	uint64_t hash;
4241	_ii_lsthead_t *cp;
4242	_ii_lstinfo_t *np;
4243
4244	*rvp = 0;
4245	name_offset = (int)&(((dsw_aioctl_t *)0)->shadow_vol[0]);
4246	if (ilp32) {
4247		if (copyin((void *) arg, &ulist32, sizeof (ulist32)) < 0)
4248			return (EFAULT);
4249		II_TAIL_COPY(ulist, ulist32, flags, dsw_aioctl_t);
4250		ulist.status = (spcs_s_info_t)ulist32.status;
4251		name_offset = (int)&(((dsw_aioctl32_t *)0)->shadow_vol[0]);
4252	} else if (copyin((void *) arg, &ulist, sizeof (ulist)) < 0)
4253		return (EFAULT);
4254
4255	kstatus = spcs_s_kcreate();
4256	if (kstatus == NULL)
4257		return (ENOMEM);
4258
4259	mutex_enter(mutex);
4260	if (ulist.shadow_vol[ 0 ] != 0) {
4261		/* search for specific list */
4262		hash = nsc_strhash(ulist.shadow_vol);
4263		for (cp = lst; cp; cp = cp->lst_next) {
4264			if ((hash == cp->lst_hash) && strncmp(ulist.shadow_vol,
4265			    cp->lst_name, DSW_NAMELEN) == 0) {
4266				break;
4267			}
4268		}
4269		if (cp) {
4270			for (i = 0, np = cp->lst_start; i < ulist.count && np;
4271			    np = np->lst_next, carg += DSW_NAMELEN, i++) {
4272				if (copyout(np->lst_ip->bi_keyname,
4273				    carg + name_offset, DSW_NAMELEN)) {
4274					mutex_exit(mutex);
4275					return (spcs_s_ocopyoutf(&kstatus,
4276					    ulist.status, EFAULT));
4277				}
4278			}
4279		} else {
4280			i = 0;
4281		}
4282	} else {
4283		/* return full list */
4284		for (i = 0, cp = lst; i < ulist.count && cp;
4285		    carg += DSW_NAMELEN, i++, cp = cp->lst_next) {
4286			if (copyout(cp->lst_name, carg + name_offset,
4287			    DSW_NAMELEN)) {
4288				mutex_exit(mutex);
4289				return (spcs_s_ocopyoutf(&kstatus, ulist.status,
4290				    EFAULT));
4291			}
4292		}
4293	}
4294	mutex_exit(mutex);
4295	ulist32.count = ulist.count = i;
4296
4297	if (ilp32) {
4298		if (copyout(&ulist32, (void *) arg, name_offset))
4299			return (EFAULT);
4300	} else {
4301		if (copyout(&ulist, (void*) arg, name_offset))
4302			return (EFAULT);
4303	}
4304
4305	return (spcs_s_ocopyoutf(&kstatus, ulist.status, 0));
4306}
4307
4308/*
4309 * _ii_olist
4310 *	Breaks the link with the overflow volume.
4311 *
4312 * Calling/Exit State:
4313 *	Returns 0 if the overflow volume was detached. Otherwise an error code
4314 *	is returned and any additional error information is copied
4315 *	out to the user.
4316 *
4317 * Description:
4318 */
4319
4320int
4321_ii_olist(intptr_t arg, int ilp32, int *rvp)
4322{
4323	dsw_aioctl_t ulist;
4324	dsw_aioctl32_t ulist32;
4325	_ii_overflow_t *op;
4326	int rc = 0;
4327	int name_offset, i;
4328	char *carg = (char *)arg;
4329	spcs_s_info_t kstatus;
4330
4331	*rvp = 0;
4332
4333	name_offset = (int)&(((dsw_aioctl_t *)0)->shadow_vol[0]);
4334	if (ilp32) {
4335		if (copyin((void *)arg, &ulist32, sizeof (ulist32)) < 0)
4336			return (EFAULT);
4337		II_TAIL_COPY(ulist, ulist32, flags, dsw_aioctl_t);
4338		ulist.status = (spcs_s_info_t)ulist32.status;
4339		name_offset = (int)&(((dsw_aioctl32_t *)0)->shadow_vol[0]);
4340	} else if (copyin((void *)arg, &ulist, sizeof (ulist)) < 0)
4341		return (EFAULT);
4342
4343	kstatus = spcs_s_kcreate();
4344	if (kstatus == NULL)
4345		return (ENOMEM);
4346
4347	i = 0;
4348
4349	mutex_enter(&_ii_overflow_mutex);
4350	for (op = _ii_overflow_top; i < ulist.count && op;
4351	    carg += DSW_NAMELEN) {
4352		if (copyout(op->ii_volname, carg+name_offset, DSW_NAMELEN)) {
4353			mutex_exit(&_ii_overflow_mutex);
4354			return (spcs_s_ocopyoutf(&kstatus, ulist.status,
4355			    EFAULT));
4356		}
4357		i++;
4358		op = op->ii_next;
4359	}
4360	mutex_exit(&_ii_overflow_mutex);
4361	ulist32.count = ulist.count = i;
4362	/* return count of items listed to user */
4363	if (ilp32) {
4364		if (copyout(&ulist32, (void *)arg, name_offset))
4365			return (EFAULT);
4366	} else {
4367		if (copyout(&ulist, (void *)arg, name_offset))
4368			return (EFAULT);
4369	}
4370
4371	return (spcs_s_ocopyoutf(&kstatus, ulist.status, rc));
4372}
4373
4374/*
4375 * _ii_ostat
4376 *	Breaks the link with the overflow volume.
4377 *
4378 * Calling/Exit State:
4379 *	Returns 0 if the overflow volume was detached. Otherwise an error code
4380 *	is returned and any additional error information is copied
4381 *	out to the user.
4382 *
4383 * Description:
4384 */
4385
4386int
4387_ii_ostat(intptr_t arg, int ilp32, int *rvp, int is_iost_2)
4388{
4389	dsw_ostat_t ustat;
4390	dsw_ostat32_t ustat32;
4391	_ii_overflow_t *op;
4392	spcs_s_info_t kstatus;
4393
4394	*rvp = 0;
4395
4396	if (ilp32) {
4397		if (copyin((void *)arg, &ustat32, sizeof (ustat32)) < 0)
4398			return (EFAULT);
4399		II_TAIL_COPY(ustat, ustat32, overflow_vol, dsw_ostat_t);
4400		ustat.status = (spcs_s_info_t)ustat32.status;
4401	} else if (copyin((void *)arg, &ustat, sizeof (ustat)) < 0)
4402		return (EFAULT);
4403
4404	kstatus = spcs_s_kcreate();
4405	if (kstatus == NULL)
4406		return (ENOMEM);
4407	if (!ustat.overflow_vol[0])
4408		return (spcs_s_ocopyoutf(&kstatus, ustat.status, DSW_EEMPTY));
4409
4410	op = _ii_find_overflow(ustat.overflow_vol);
4411	if (op == NULL)
4412		return (spcs_s_ocopyoutf(&kstatus, ustat.status,
4413		    DSW_ENOTFOUND));
4414
4415	ustat.nchunks = op->ii_nchunks;
4416	ustat.used = op->ii_used;
4417	ustat.unused = op->ii_unused;
4418	ustat.drefcnt = op->ii_drefcnt;
4419	ustat.crefcnt = op->ii_crefcnt;
4420	if (is_iost_2) {
4421		ustat.hversion = op->ii_hversion;
4422		ustat.flags = op->ii_flags;
4423		ustat.hmagic = op->ii_hmagic;
4424	}
4425
4426	spcs_s_kfree(kstatus);
4427	if (ilp32) {
4428		ustat32.nchunks = ustat.nchunks;
4429		ustat32.used = ustat.used;
4430		ustat32.unused = ustat.unused;
4431		ustat32.drefcnt = ustat.drefcnt;
4432		ustat32.crefcnt = ustat.crefcnt;
4433		if (is_iost_2) {
4434			ustat32.hversion = ustat.hversion;
4435			ustat32.flags = ustat.flags;
4436			ustat32.hmagic = ustat.hmagic;
4437		}
4438		if (copyout(&ustat32, (void *)arg, sizeof (ustat32)))
4439			return (EFAULT);
4440	} else {
4441		if (copyout(&ustat, (void *)arg, sizeof (ustat)))
4442			return (EFAULT);
4443	}
4444	return (0);
4445}
4446
4447/*
4448 * _ii_move_grp()
4449 *	Move a set from one group to another, possibly creating the new
4450 *	group.
4451 */
4452
4453int
4454_ii_move_grp(intptr_t arg, int ilp32, int *rvp)
4455{
4456	dsw_movegrp_t umove;
4457	dsw_movegrp32_t umove32;
4458	spcs_s_info_t kstatus;
4459	_ii_info_t *ip;
4460	int rc = 0;
4461	nsc_buf_t *tmp;
4462	ii_header_t *bm_header;
4463
4464	*rvp = 0;
4465
4466	if (ilp32) {
4467		if (copyin((void *)arg, &umove32, sizeof (umove32)) < 0)
4468			return (EFAULT);
4469		II_TAIL_COPY(umove, umove32, shadow_vol, dsw_movegrp_t);
4470		umove.status = (spcs_s_info_t)umove32.status;
4471	} else if (copyin((void *)arg, &umove, sizeof (umove)) < 0)
4472		return (EFAULT);
4473
4474	kstatus = spcs_s_kcreate();
4475	if (kstatus == NULL)
4476		return (ENOMEM);
4477
4478	if (!umove.shadow_vol[0])
4479		return (spcs_s_ocopyoutf(&kstatus, umove.status, DSW_EEMPTY));
4480
4481	mutex_enter(&_ii_info_mutex);
4482	ip = _ii_find_set(umove.shadow_vol);
4483	mutex_exit(&_ii_info_mutex);
4484
4485	if (!ip)
4486		return (spcs_s_ocopyoutf(&kstatus, umove.status,
4487		    DSW_ENOTFOUND));
4488
4489	if (!umove.new_group[0]) {
4490		/* are we clearing the group association? */
4491		if (ip->bi_group) {
4492			DTRACE_PROBE2(_ii_move_grp1, char *, ip->bi_keyname,
4493			    char *, ip->bi_group);
4494			rc = II_UNLINK_GROUP(ip);
4495		}
4496	} else if (!ip->bi_group) {
4497		rc = II_LINK_GROUP(ip, umove.new_group);
4498		DTRACE_PROBE2(_ii_move_grp2, char *, ip->bi_keyname,
4499		    char *, ip->bi_group);
4500	} else {
4501		/* remove it from one group and add it to the other */
4502		DTRACE_PROBE3(_ii_move_grp, char *, ip->bi_keyname,
4503		    char *, ip->bi_group, char *, umove.new_group);
4504		rc = II_UNLINK_GROUP(ip);
4505		if (!rc)
4506			rc = II_LINK_GROUP(ip, umove.new_group);
4507	}
4508
4509	/* ** BEGIN UPDATE BITMAP HEADER ** */
4510	if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0) {
4511		_ii_ioctl_done(ip);
4512		mutex_exit(&ip->bi_mutex);
4513		spcs_s_add(kstatus, rc);
4514		return (spcs_s_ocopyoutf(&kstatus, umove.status,
4515		    DSW_ERSRVFAIL));
4516	}
4517	bm_header = _ii_bm_header_get(ip, &tmp);
4518	if (bm_header) {
4519		(void) strncpy(bm_header->group_name, umove.new_group,
4520		    DSW_NAMELEN);
4521		(void) _ii_bm_header_put(bm_header, ip, tmp);
4522	}
4523	_ii_rlse_devs(ip, BMP);
4524	/* ** END UPDATE BITMAP HEADER ** */
4525
4526	_ii_ioctl_done(ip);
4527	mutex_exit(&ip->bi_mutex);
4528
4529	return (spcs_s_ocopyoutf(&kstatus, umove.status, rc));
4530}
4531
4532/*
4533 * _ii_change_tag()
4534 *	Move a set from one group to another, possibly creating the new
4535 *	group.
4536 */
4537
4538int
4539_ii_change_tag(intptr_t arg, int ilp32, int *rvp)
4540{
4541	dsw_movegrp_t umove;
4542	dsw_movegrp32_t umove32;
4543	spcs_s_info_t kstatus;
4544	_ii_info_t *ip;
4545	int rc = 0;
4546	nsc_buf_t *tmp;
4547	ii_header_t *bm_header;
4548
4549	*rvp = 0;
4550
4551	if (ilp32) {
4552		if (copyin((void *)arg, &umove32, sizeof (umove32)) < 0)
4553			return (EFAULT);
4554		II_TAIL_COPY(umove, umove32, shadow_vol, dsw_movegrp_t);
4555		umove.status = (spcs_s_info_t)umove32.status;
4556	} else if (copyin((void *)arg, &umove, sizeof (umove)) < 0)
4557		return (EFAULT);
4558
4559	kstatus = spcs_s_kcreate();
4560	if (kstatus == NULL)
4561		return (ENOMEM);
4562
4563	if (!umove.shadow_vol[0])
4564		return (spcs_s_ocopyoutf(&kstatus, umove.status, DSW_EEMPTY));
4565
4566	mutex_enter(&_ii_info_mutex);
4567	ip = _ii_find_set(umove.shadow_vol);
4568	mutex_exit(&_ii_info_mutex);
4569
4570	if (!ip)
4571		return (spcs_s_ocopyoutf(&kstatus, umove.status,
4572		    DSW_ENOTFOUND));
4573
4574	if (!umove.new_group[0]) {
4575		/* are we clearing the group association? */
4576		if (ip->bi_cluster) {
4577			DTRACE_PROBE2(_ii_change_tag, char *, ip->bi_keyname,
4578			    char *, ip->bi_cluster);
4579			rc = II_UNLINK_CLUSTER(ip);
4580		}
4581	} else if (!ip->bi_cluster) {
4582		/* are we adding it to a group for the first time? */
4583		rc = II_LINK_CLUSTER(ip, umove.new_group);
4584		DTRACE_PROBE2(_ii_change_tag, char *, ip->bi_keyname,
4585		    char *, ip->bi_cluster);
4586	} else {
4587		/* remove it from one group and add it to the other */
4588		DTRACE_PROBE3(_ii_change_tag_2, char *, ip->bi_keyname,
4589		    char *, ip->bi_cluster, char *, umove.new_group);
4590		rc = II_UNLINK_CLUSTER(ip);
4591		if (!rc)
4592			rc = II_LINK_CLUSTER(ip, umove.new_group);
4593	}
4594
4595	/* ** BEGIN UPDATE BITMAP HEADER ** */
4596	if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0) {
4597		_ii_ioctl_done(ip);
4598		mutex_exit(&ip->bi_mutex);
4599		spcs_s_add(kstatus, rc);
4600		return (spcs_s_ocopyoutf(&kstatus, umove.status,
4601		    DSW_ERSRVFAIL));
4602	}
4603	bm_header = _ii_bm_header_get(ip, &tmp);
4604	if (bm_header) {
4605		(void) strncpy(bm_header->clstr_name, umove.new_group,
4606		    DSW_NAMELEN);
4607		(void) _ii_bm_header_put(bm_header, ip, tmp);
4608	}
4609	_ii_rlse_devs(ip, BMP);
4610	/* ** END UPDATE BITMAP HEADER ** */
4611
4612	_ii_ioctl_done(ip);
4613	mutex_exit(&ip->bi_mutex);
4614
4615	return (spcs_s_ocopyoutf(&kstatus, umove.status, rc));
4616}
4617
4618
4619/*
4620 * _ii_spcs_s_ocopyoutf()
4621 * Wrapper for spcs_s_ocopyoutf() used by _ii_chk_copy() which permits
4622 * the spcs_s_info_t argument to be NULL. _ii_chk_copy() requires this
4623 * functionality as it is sometimes called by _ii_control_copy() which
4624 * has no user context to copy any errors into. At all other times a NULL
4625 * spcs_s_info_t argument would indicate a bug in the calling function.
4626 */
4627
4628static int
4629_ii_spcs_s_ocopyoutf(spcs_s_info_t *kstatusp, spcs_s_info_t ustatus, int err)
4630{
4631	if (ustatus)
4632		return (spcs_s_ocopyoutf(kstatusp, ustatus, err));
4633	spcs_s_kfree(*kstatusp);
4634	return (err);
4635}
4636
4637static int
4638_ii_chk_copy(_ii_info_t *ip, int flags, spcs_s_info_t *kstatusp, pid_t pid,
4639    spcs_s_info_t ustatus)
4640{
4641	_ii_info_t *xip;
4642	int rc;
4643	int rtype;
4644
4645	if ((ip->bi_flags & DSW_COPYINGP) != 0) {
4646		_ii_ioctl_done(ip);
4647		mutex_exit(&ip->bi_mutex);
4648		return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus, DSW_ECOPYING));
4649	}
4650
4651	if (ip->bi_flags & DSW_OFFLINE) {
4652		_ii_ioctl_done(ip);
4653		mutex_exit(&ip->bi_mutex);
4654		return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus, DSW_EOFFLINE));
4655	}
4656
4657	if ((ip->bi_flags & (DSW_SHDIMPORT|DSW_SHDEXPORT)) != 0) {
4658		_ii_ioctl_done(ip);
4659		mutex_exit(&ip->bi_mutex);
4660		return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus,
4661		    DSW_EISEXPORTED));
4662	}
4663
4664	if ((flags & CV_SHD2MST) == CV_SHD2MST) {
4665		if ((ip->bi_flags & DSW_COPYINGM) != 0) {
4666				_ii_ioctl_done(ip);
4667				mutex_exit(&ip->bi_mutex);
4668				return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus,
4669				    DSW_ECOPYING));
4670		}
4671		/* check if any sibling shadow is copying towards this master */
4672		for (xip = ip->bi_head; xip; xip = xip->bi_sibling) {
4673			if (ip != xip && (xip->bi_flags & DSW_COPYINGS) != 0) {
4674				_ii_ioctl_done(ip);
4675				mutex_exit(&ip->bi_mutex);
4676				return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus,
4677				    DSW_ECOPYING));
4678			}
4679		}
4680	}
4681
4682	if (((flags & CV_SHD2MST) == 0) &&
4683	    ((ip->bi_flags & DSW_COPYINGS) != 0)) {
4684		_ii_ioctl_done(ip);
4685		mutex_exit(&ip->bi_mutex);
4686		return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus, DSW_ECOPYING));
4687	}
4688
4689	if (ip->bi_flags & DSW_TREEMAP) {
4690		if ((ip->bi_flags & DSW_OVERFLOW) && (flags & CV_SHD2MST)) {
4691			_ii_ioctl_done(ip);
4692			mutex_exit(&ip->bi_mutex);
4693			return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus,
4694			    DSW_EINCOMPLETE));
4695		}
4696	}
4697
4698	/* Assure that no other PID owns this copy/update */
4699	if (ip->bi_locked_pid == 0) {
4700		if (flags & CV_LOCK_PID)
4701			ip->bi_locked_pid = pid;
4702	} else if (ip->bi_locked_pid != pid) {
4703		_ii_ioctl_done(ip);
4704		mutex_exit(&ip->bi_mutex);
4705		return (spcs_s_ocopyoutf(kstatusp, ustatus, DSW_EINUSE));
4706	}
4707
4708	mutex_exit(&ip->bi_mutex);
4709
4710	rtype = MSTR|SHDR|BMP;
4711	if ((rc = _ii_rsrv_devs(ip, rtype, II_INTERNAL)) != 0) {
4712		mutex_enter(&ip->bi_mutex);
4713		_ii_ioctl_done(ip);
4714		mutex_exit(&ip->bi_mutex);
4715		spcs_s_add(*kstatusp, rc);
4716		return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus,
4717		    DSW_ERSRVFAIL));
4718	}
4719
4720	if (ii_update_denied(ip, *kstatusp, flags & CV_SHD2MST, 0)) {
4721		mutex_enter(&ip->bi_mutex);
4722		_ii_ioctl_done(ip);
4723		mutex_exit(&ip->bi_mutex);
4724		_ii_rlse_devs(ip, rtype);
4725		return (_ii_spcs_s_ocopyoutf(kstatusp, ustatus,
4726		    DSW_EOPACKAGE));
4727	}
4728
4729	return (0);
4730}
4731
4732static int
4733_ii_do_copy(_ii_info_t *ip, int flags, spcs_s_info_t kstatus, int waitflag)
4734{
4735	int rc = 0;
4736	int rtype = MSTR|SHDR|BMP;
4737	_ii_overflow_t *op;
4738	int quick_update = 0;
4739
4740	waitflag = (waitflag != 0);
4741	/*
4742	 * a copy of a tree-mapped device must be downgraded to
4743	 * an update.
4744	 */
4745	if (ip->bi_flags & DSW_TREEMAP)
4746		flags |= CV_BMP_ONLY;
4747
4748	/*
4749	 * If we want to update the dependent shadow we only need to zero
4750	 * the shadow bitmap.
4751	 */
4752
4753	if (((ip->bi_flags & DSW_GOLDEN) == 0) &&
4754	    (flags & (CV_BMP_ONLY|CV_SHD2MST)) == CV_BMP_ONLY) {
4755
4756		DTRACE_PROBE(DEPENDENT);
4757
4758		/* assign updating time */
4759		ip->bi_mtime = ddi_get_time();
4760
4761		if (ip->bi_flags & DSW_TREEMAP) {
4762			DTRACE_PROBE(COMPACT_DEPENDENT);
4763
4764			if (ip->bi_overflow &&
4765			    (ip->bi_overflow->ii_flags & IIO_VOL_UPDATE) == 0) {
4766				/* attempt to do a quick update */
4767				quick_update = 1;
4768				ip->bi_overflow->ii_flags |= IIO_VOL_UPDATE;
4769				ip->bi_overflow->ii_detachcnt = 1;
4770			}
4771
4772			rc = ii_tinit(ip);
4773
4774			if (quick_update && ip->bi_overflow) {
4775				/* clean up */
4776				ip->bi_overflow->ii_flags &= ~(IIO_VOL_UPDATE);
4777				ip->bi_overflow->ii_detachcnt = 0;
4778			}
4779		}
4780
4781		if (rc == 0)
4782			rc = II_ZEROBM(ip);	/* update copy of shadow */
4783		if (((op = ip->bi_overflow) != NULL) &&
4784		    (op->ii_hversion >= 1) && (op->ii_hmagic == II_OMAGIC)) {
4785			mutex_enter(&_ii_overflow_mutex);
4786			if (ip->bi_flags & DSW_OVRHDRDRTY) {
4787				mutex_enter(&ip->bi_mutex);
4788				ip->bi_flags &= ~DSW_OVRHDRDRTY;
4789				mutex_exit(&ip->bi_mutex);
4790				ASSERT(op->ii_urefcnt > 0);
4791				op->ii_urefcnt--;
4792			}
4793			if (op->ii_urefcnt == 0) {
4794				op->ii_flags &= ~IIO_CNTR_INVLD;
4795				op->ii_unused = op->ii_nchunks - 1;
4796			}
4797			mutex_exit(&_ii_overflow_mutex);
4798		}
4799		mutex_enter(&ip->bi_mutex);
4800		II_FLAG_CLR(DSW_OVERFLOW, ip);
4801		mutex_exit(&ip->bi_mutex);
4802
4803		_ii_unlock_chunk(ip, II_NULLCHUNK);
4804		mutex_enter(&ip->bi_mutex);
4805		_ii_ioctl_done(ip);
4806		mutex_exit(&ip->bi_mutex);
4807		_ii_rlse_devs(ip, rtype);
4808		if (rc) {
4809			spcs_s_add(kstatus, rc);
4810			return (DSW_EIO);
4811		} else {
4812			DTRACE_PROBE(_ii_do_copy_end);
4813			return (0);
4814		}
4815	}
4816
4817	/*
4818	 * need to perform an actual copy.
4819	 */
4820
4821	/*
4822	 * Perform bitmap copy if asked or from dependent shadow to master.
4823	 */
4824	if ((flags & CV_BMP_ONLY) ||
4825	    ((flags & CV_SHD2MST) &&
4826	    ((ip->bi_flags & DSW_GOLDEN) == 0))) {
4827		DTRACE_PROBE(INDEPENDENT_fast);
4828		rc = II_ORBM(ip);		/* save shadow bits for copy */
4829	} else {
4830		DTRACE_PROBE(INDEPENDENT_slow);
4831		rc = ii_fill_copy_bmp(ip); /* set bits for independent copy */
4832	}
4833	if (rc == 0)
4834		rc = II_ZEROBM(ip);
4835	_ii_unlock_chunk(ip, II_NULLCHUNK);
4836	if (rc == 0) {
4837		mutex_enter(&ip->bi_mutex);
4838		if (ip->bi_flags & (DSW_COPYINGP | DSW_SHDEXPORT)) {
4839			rc = (ip->bi_flags & DSW_COPYINGP)
4840			    ? DSW_ECOPYING : DSW_EISEXPORTED;
4841
4842			_ii_ioctl_done(ip);
4843			mutex_exit(&ip->bi_mutex);
4844			_ii_rlse_devs(ip, rtype);
4845			return (rc);
4846		}
4847
4848		/* assign copying time */
4849		ip->bi_mtime = ddi_get_time();
4850
4851		if (flags & CV_SHD2MST)
4852			II_FLAG_SET(DSW_COPYINGS | DSW_COPYINGP, ip);
4853		else
4854			II_FLAG_SET(DSW_COPYINGM | DSW_COPYINGP, ip);
4855		mutex_exit(&ip->bi_mutex);
4856		rc = _ii_copyvol(ip, (flags & CV_SHD2MST),
4857		    rtype, kstatus, waitflag);
4858	} else {
4859		mutex_enter(&ip->bi_mutex);
4860		_ii_ioctl_done(ip);
4861		mutex_exit(&ip->bi_mutex);
4862	}
4863
4864	if (waitflag)
4865		_ii_rlse_devs(ip, rtype);
4866
4867	return (rc);
4868}
4869
4870/*
4871 * _ii_copy
4872 *	Copy or update (take snapshot) II volume.
4873 *
4874 * Calling/Exit State:
4875 *	Returns 0 if the operation succeeded. Otherwise an error code
4876 *	is returned and any additional error information is copied
4877 *	out to the user.
4878 */
4879
4880int
4881_ii_copy(intptr_t arg, int ilp32, int *rvp)
4882{
4883	dsw_ioctl_t ucopy;
4884	dsw_ioctl32_t ucopy32;
4885	_ii_info_t *ip;
4886	int rc = 0;
4887	spcs_s_info_t kstatus;
4888
4889	*rvp = 0;
4890
4891	if (ilp32) {
4892		if (copyin((void *)arg, &ucopy32, sizeof (ucopy32)) < 0)
4893			return (EFAULT);
4894		II_TAIL_COPY(ucopy, ucopy32, shadow_vol, dsw_ioctl_t);
4895		ucopy.status = (spcs_s_info_t)ucopy32.status;
4896	} else if (copyin((void *)arg, &ucopy, sizeof (ucopy)) < 0)
4897		return (EFAULT);
4898
4899	kstatus = spcs_s_kcreate();
4900	if (kstatus == NULL)
4901		return (ENOMEM);
4902
4903	if (!ucopy.shadow_vol[0])
4904		return (spcs_s_ocopyoutf(&kstatus, ucopy.status, DSW_EEMPTY));
4905
4906	mutex_enter(&_ii_info_mutex);
4907	ip = _ii_find_set(ucopy.shadow_vol);
4908	mutex_exit(&_ii_info_mutex);
4909	if (ip == NULL)
4910		return (spcs_s_ocopyoutf(&kstatus, ucopy.status,
4911		    DSW_ENOTFOUND));
4912
4913	/* Check that the copy/update makes sense */
4914	if ((rc = _ii_chk_copy(ip, ucopy.flags, &kstatus, ucopy.pid,
4915	    ucopy.status)) == 0) {
4916		/* perform the copy */
4917		_ii_lock_chunk(ip, II_NULLCHUNK);
4918		/* _ii_do_copy() calls _ii_ioctl_done() */
4919		rc = _ii_do_copy(ip, ucopy.flags, kstatus, 1);
4920		return (spcs_s_ocopyoutf(&kstatus, ucopy.status, rc));
4921	}
4922
4923	return (rc);
4924}
4925
4926/*
4927 * _ii_mass_copy
4928 * Copies/updates the sets pointed to in the ipa array.
4929 *
4930 * Calling/Exit State:
4931 * Returns 0 if the operations was successful.  Otherwise an
4932 * error code.
4933 */
4934int
4935_ii_mass_copy(_ii_info_t **ipa, dsw_aioctl_t *ucopy, int wait)
4936{
4937	int i;
4938	int rc = 0;
4939	int failed;
4940	int rtype = MSTR|SHDR|BMP;
4941	_ii_info_t *ip;
4942	spcs_s_info_t kstatus;
4943
4944	kstatus = spcs_s_kcreate();
4945	if (kstatus == NULL)
4946		return (ENOMEM);
4947
4948	/* Check copy validitity */
4949	for (i = 0; i < ucopy->count; i++) {
4950		ip = ipa[i];
4951
4952		rc = _ii_chk_copy(ip, ucopy->flags, &kstatus, ucopy->pid,
4953		    ucopy->status);
4954
4955		if (rc) {
4956			/* Clean up the mess */
4957
4958			DTRACE_PROBE1(_ii_mass_copy_end1, int, rc);
4959
4960			/*
4961			 * The array ipa now looks like:
4962			 *    0..(i-1): needs mutex_enter/ioctl_done/mutex_exit
4963			 *    i: needs nothing (_ii_chk_copy does cleanup)
4964			 *    (i+1)..n: needs just ioctl_done/mutex_exit
4965			 */
4966
4967			failed = i;
4968
4969			for (i = 0; i < failed; i++) {
4970				mutex_enter(&(ipa[i]->bi_mutex));
4971				_ii_ioctl_done(ipa[i]);
4972				mutex_exit(&(ipa[i]->bi_mutex));
4973				_ii_rlse_devs(ipa[i], rtype);
4974			}
4975
4976			/* skip 'failed', start with failed + 1 */
4977
4978			for (i = failed + 1; i < ucopy->count; i++) {
4979				_ii_ioctl_done(ipa[i]);
4980				mutex_exit(&(ipa[i]->bi_mutex));
4981			}
4982
4983			return (rc);
4984		}
4985	}
4986
4987	/* Check for duplicate shadows in same II group */
4988	if (ucopy->flags & CV_SHD2MST) {
4989		/* Reset the state of all masters */
4990		for (i = 0; i < ucopy->count; i++) {
4991			ip = ipa[i];
4992			ip->bi_master->bi_state &= ~DSW_MSTTARGET;
4993		}
4994
4995		for (i = 0; i < ucopy->count; i++) {
4996			ip = ipa[i];
4997			/*
4998			 * Check the state of the master.  If DSW_MSTTARGET is
4999			 * set, it's because this master is attached to another
5000			 * shadow within this set.
5001			 */
5002			if (ip->bi_master->bi_state & DSW_MSTTARGET) {
5003				rc = EINVAL;
5004				break;
5005			}
5006
5007			/*
5008			 * Set the DSW_MSTTARGET bit on the master associated
5009			 * with this shadow.  This will allow us to detect
5010			 * multiple shadows pointing to this master within
5011			 * this loop.
5012			 */
5013			ip->bi_master->bi_state |= DSW_MSTTARGET;
5014		}
5015	}
5016
5017	/* Handle error */
5018	if (rc) {
5019		DTRACE_PROBE1(_ii_mass_copy_end2, int, rc);
5020		for (i = 0; i < ucopy->count; i++) {
5021			ip = ipa[i];
5022
5023			_ii_rlse_devs(ip, rtype);
5024
5025			mutex_enter(&ip->bi_mutex);
5026			_ii_ioctl_done(ip);
5027			mutex_exit(&ip->bi_mutex);
5028		}
5029
5030		return (spcs_s_ocopyoutf(&kstatus, ucopy->status, rc));
5031	}
5032
5033	/* Lock bitmaps & prepare counts */
5034	for (i = 0; i < ucopy->count; i++) {
5035		ip = ipa[i];
5036		_ii_lock_chunk(ip, II_NULLCHUNK);
5037		if (ip->bi_overflow) {
5038			ip->bi_overflow->ii_detachcnt = 0;
5039		}
5040	}
5041
5042	/* determine which volumes we're dealing with */
5043	for (i = 0; i < ucopy->count; i++) {
5044		ip = ipa[i];
5045		if (ip->bi_overflow) {
5046			ip->bi_overflow->ii_flags |= IIO_VOL_UPDATE;
5047			if ((ucopy->flags & (CV_BMP_ONLY|CV_SHD2MST)) ==
5048			    CV_BMP_ONLY) {
5049				++ip->bi_overflow->ii_detachcnt;
5050			}
5051		}
5052	}
5053
5054	/* Perform copy */
5055	for (i = 0; i < ucopy->count; i++) {
5056		ip = ipa[i];
5057		rc = _ii_do_copy(ip, ucopy->flags, kstatus, wait);
5058		/* Hum... what to do if one of these fails? */
5059	}
5060
5061	/* clear out flags so as to prevent any accidental reuse */
5062	for (i = 0; i < ucopy->count; i++) {
5063		ip = ipa[i];
5064		if (ip->bi_overflow)
5065			ip->bi_overflow->ii_flags &= ~(IIO_VOL_UPDATE);
5066	}
5067
5068	/*
5069	 * We can only clean up the kstatus structure if there are
5070	 * no waiters.  If someone's waiting for the information,
5071	 * _ii_copyvolp() uses spcs_s_add to write to kstatus.  Panic
5072	 * would ensue if we freed it up now.
5073	 */
5074	if (!wait)
5075		rc = spcs_s_ocopyoutf(&kstatus, ucopy->status, rc);
5076
5077	return (rc);
5078}
5079
5080/*
5081 * _ii_list_copy
5082 * Retrieve a list from a character array and use _ii_mass_copy to
5083 * initiate a copy/update operation on all of the specified sets.
5084 *
5085 * Calling/Exit State:
5086 * Returns 0 if the operations was successful.  Otherwise an
5087 * error code.
5088 */
5089int
5090_ii_list_copy(char *list, dsw_aioctl_t *ucopy, int wait)
5091{
5092	int i;
5093	int rc = 0;
5094	char *name;
5095	_ii_info_t *ip;
5096	_ii_info_t **ipa;
5097
5098	ipa = kmem_zalloc(sizeof (_ii_info_t *) * ucopy->count, KM_SLEEP);
5099
5100	/* Reserve devices */
5101	name = list;
5102	mutex_enter(&_ii_info_mutex);
5103	for (i = 0; i < ucopy->count; i++, name += DSW_NAMELEN) {
5104		ip = _ii_find_set(name);
5105
5106		if (ip == NULL) {
5107			rc = DSW_ENOTFOUND;
5108			break;
5109		}
5110
5111		ipa[i] = ip;
5112	}
5113
5114	if (rc != 0) {
5115		/* Failed to find all sets, release those we do have */
5116		while (i-- > 0) {
5117			ip = ipa[i];
5118			mutex_enter(&ip->bi_mutex);
5119			_ii_ioctl_done(ip);
5120			mutex_exit(&ip->bi_mutex);
5121		}
5122	} else {
5123		/* Begin copy operation */
5124		rc = _ii_mass_copy(ipa, ucopy, wait);
5125	}
5126
5127	mutex_exit(&_ii_info_mutex);
5128
5129	kmem_free(ipa, sizeof (_ii_info_t *) * ucopy->count);
5130
5131	return (rc);
5132}
5133
5134/*
5135 * _ii_group_copy
5136 * Retrieve list of sets in a group and use _ii_mass_copy to initiate
5137 * a copy/update of all of them.
5138 *
5139 * Calling/Exit State:
5140 * Returns 0 if the operations was successful.  Otherwise an
5141 * error code.
5142 */
5143int
5144_ii_group_copy(char *name, dsw_aioctl_t *ucopy, int wait)
5145{
5146	int		i;
5147	int		rc;
5148	uint64_t	hash;
5149	_ii_info_t	**ipa;
5150	_ii_lsthead_t	*head;
5151	_ii_lstinfo_t	*np;
5152
5153	/* find group */
5154	hash = nsc_strhash(name);
5155
5156	mutex_enter(&_ii_group_mutex);
5157
5158	for (head = _ii_group_top; head; head = head->lst_next) {
5159		if (hash == head->lst_hash && strncmp(head->lst_name,
5160		    name, DSW_NAMELEN) == 0)
5161			break;
5162	}
5163
5164	if (!head) {
5165		mutex_exit(&_ii_group_mutex);
5166		DTRACE_PROBE(_ii_group_copy);
5167		return (DSW_EGNOTFOUND);
5168	}
5169
5170	/* Count entries */
5171	for (ucopy->count = 0, np = head->lst_start; np; np = np->lst_next)
5172		++ucopy->count;
5173
5174	if (ucopy->count == 0) {
5175		mutex_exit(&_ii_group_mutex);
5176		return (DSW_EGNOTFOUND);
5177	}
5178
5179	ipa = kmem_zalloc(sizeof (_ii_info_t *) * ucopy->count, KM_SLEEP);
5180	if (ipa == NULL) {
5181		mutex_exit(&_ii_group_mutex);
5182		return (ENOMEM);
5183	}
5184
5185	/* Create list */
5186	mutex_enter(&_ii_info_mutex);
5187	np = head->lst_start;
5188	for (i = 0; i < ucopy->count; i++) {
5189		ASSERT(np != 0);
5190
5191		ipa[i] = np->lst_ip;
5192
5193		mutex_enter(&ipa[i]->bi_mutex);
5194		ipa[i]->bi_ioctl++;
5195
5196		np = np->lst_next;
5197	}
5198
5199	/* Begin copy operation */
5200	rc = _ii_mass_copy(ipa, ucopy, wait);
5201
5202	mutex_exit(&_ii_info_mutex);
5203	mutex_exit(&_ii_group_mutex);
5204
5205	kmem_free(ipa, sizeof (_ii_info_t *) * ucopy->count);
5206
5207	return (rc);
5208}
5209
5210/*
5211 * _ii_acopy
5212 *	Copy or update (take snapshot) II multiple volumes.
5213 *
5214 * Calling/Exit State:
5215 *	Returns 0 if the operation succeeded. Otherwise an error code
5216 *	is returned and any additional error information is copied
5217 *	out to the user.
5218 */
5219int
5220_ii_acopy(intptr_t arg, int ilp32, int *rvp)
5221{
5222	int rc;
5223	int name_offset;
5224	char *list;
5225	char *nptr;
5226	char name[DSW_NAMELEN];
5227	dsw_aioctl_t ucopy;
5228	dsw_aioctl32_t ucopy32;
5229	spcs_s_info_t kstatus;
5230
5231	*rvp = 0;
5232
5233	name_offset = (int)&(((dsw_aioctl_t *)0)->shadow_vol[0]);
5234
5235	if (ilp32) {
5236		if (copyin((void *)arg, &ucopy32, sizeof (ucopy32)) < 0)
5237			return (EFAULT);
5238		II_TAIL_COPY(ucopy, ucopy32, flags, dsw_ioctl_t);
5239		ucopy.status = (spcs_s_info_t)ucopy32.status;
5240		name_offset = (int)&(((dsw_aioctl32_t *)0)->shadow_vol[0]);
5241	} else if (copyin((void *)arg, &ucopy, sizeof (ucopy)) < 0)
5242		return (EFAULT);
5243
5244	kstatus = spcs_s_kcreate();
5245
5246	if (kstatus == NULL)
5247		return (ENOMEM);
5248
5249	nptr = (char *)arg + name_offset;
5250	rc = 0;
5251
5252	if (ucopy.flags & CV_IS_GROUP) {
5253		if (copyin(nptr, name, DSW_NAMELEN) < 0)
5254			return (spcs_s_ocopyoutf(&kstatus, ucopy.status,
5255			    EFAULT));
5256
5257		/* kstatus information is handled within _ii_group_copy */
5258		rc = _ii_group_copy(name, &ucopy, 0);
5259	} else if (ucopy.count > 0) {
5260		list = kmem_alloc(DSW_NAMELEN * ucopy.count, KM_SLEEP);
5261
5262		if (list == NULL)
5263			return (spcs_s_ocopyoutf(&kstatus, ucopy.status,
5264			    ENOMEM));
5265
5266		if (copyin(nptr, list, DSW_NAMELEN * ucopy.count) < 0)
5267			return (spcs_s_ocopyoutf(&kstatus, ucopy.status,
5268			    EFAULT));
5269
5270		rc = _ii_list_copy(list, &ucopy, 0);
5271		kmem_free(list, DSW_NAMELEN * ucopy.count);
5272	}
5273
5274	return (spcs_s_ocopyoutf(&kstatus, ucopy.status, rc));
5275}
5276
5277/*
5278 * _ii_bitsset
5279 *	Copy out II pair bitmaps to user program
5280 *
5281 * Calling/Exit State:
5282 *	Returns 0 if the operation succeeded. Otherwise an error code
5283 *	is returned and any additional error information is copied
5284 *	out to the user.
5285 */
5286int
5287_ii_bitsset(intptr_t arg, int ilp32, int cmd, int *rvp)
5288{
5289	dsw_bitsset_t ubitsset;
5290	dsw_bitsset32_t ubitsset32;
5291	nsc_size_t nbitsset;
5292	_ii_info_t *ip;
5293	int rc;
5294	spcs_s_info_t kstatus;
5295	int bitmap_size;
5296
5297	*rvp = 0;
5298
5299	if (ilp32) {
5300		if (copyin((void *)arg, &ubitsset32, sizeof (ubitsset32)))
5301			return (EFAULT);
5302		ubitsset.status = (spcs_s_info_t)ubitsset32.status;
5303		bcopy(ubitsset32.shadow_vol, ubitsset.shadow_vol, DSW_NAMELEN);
5304	} else if (copyin((void *)arg, &ubitsset, sizeof (ubitsset)))
5305		return (EFAULT);
5306
5307	kstatus = spcs_s_kcreate();
5308	if (kstatus == NULL)
5309		return (ENOMEM);
5310
5311	if (!ubitsset.shadow_vol[0])
5312		return (spcs_s_ocopyoutf(&kstatus, ubitsset.status,
5313		    DSW_EEMPTY));
5314
5315	mutex_enter(&_ii_info_mutex);
5316	ip = _ii_find_set(ubitsset.shadow_vol);
5317	mutex_exit(&_ii_info_mutex);
5318	if (ip == NULL)
5319		return (spcs_s_ocopyoutf(&kstatus, ubitsset.status,
5320		    DSW_ENOTFOUND));
5321
5322	mutex_exit(&ip->bi_mutex);
5323
5324	if ((rc = _ii_rsrv_devs(ip, BMP, II_INTERNAL)) != 0) {
5325		mutex_enter(&ip->bi_mutex);
5326		_ii_ioctl_done(ip);
5327		mutex_exit(&ip->bi_mutex);
5328		spcs_s_add(kstatus, rc);
5329		return (spcs_s_ocopyoutf(&kstatus, ubitsset.status,
5330		    DSW_ERSRVFAIL));
5331	}
5332
5333	ubitsset.tot_size = ip->bi_size / DSW_SIZE;
5334	if ((ip->bi_size % DSW_SIZE) != 0)
5335		++ubitsset.tot_size;
5336	bitmap_size = (ubitsset.tot_size + 7) / 8;
5337	if (cmd == DSWIOC_SBITSSET)
5338		rc = II_CNT_BITS(ip, ip->bi_shdfba, &nbitsset, bitmap_size);
5339	else
5340		rc = II_CNT_BITS(ip, ip->bi_copyfba, &nbitsset, bitmap_size);
5341	ubitsset.tot_set = nbitsset;
5342	_ii_rlse_devs(ip, BMP);
5343	mutex_enter(&ip->bi_mutex);
5344	_ii_ioctl_done(ip);
5345	mutex_exit(&ip->bi_mutex);
5346	if (rc) {
5347		spcs_s_add(kstatus, rc);
5348		return (spcs_s_ocopyoutf(&kstatus, ubitsset.status, DSW_EIO));
5349	}
5350
5351	spcs_s_kfree(kstatus);
5352	/* return the fetched names to the user */
5353	if (ilp32) {
5354		ubitsset32.status = (spcs_s_info32_t)ubitsset.status;
5355		ubitsset32.tot_size = ubitsset.tot_size;
5356		ubitsset32.tot_set = ubitsset.tot_set;
5357		rc = copyout(&ubitsset32, (void *)arg, sizeof (ubitsset32));
5358	} else {
5359		rc = copyout(&ubitsset, (void *)arg, sizeof (ubitsset));
5360	}
5361
5362	return (rc);
5363}
5364
5365/*
5366 * _ii_stopvol
5367 *	Stop any copying process for shadow, and stop shadowing
5368 *
5369 */
5370
5371static void
5372_ii_stopvol(_ii_info_t *ip)
5373{
5374	nsc_path_t *mst_tok;
5375	nsc_path_t *mstr_tok;
5376	nsc_path_t *shd_tok;
5377	nsc_path_t *shdr_tok;
5378	nsc_path_t *bmp_tok;
5379	int rc;
5380
5381	while (_ii_stopcopy(ip) == EINTR)
5382		;
5383
5384	DTRACE_PROBE(_ii_stopvol);
5385
5386	mutex_enter(&ip->bi_mutex);
5387	mst_tok = ip->bi_mst_tok;
5388	mstr_tok = ip->bi_mstr_tok;
5389	shd_tok = ip->bi_shd_tok;
5390	shdr_tok = ip->bi_shdr_tok;
5391	bmp_tok = ip->bi_bmp_tok;
5392	ip->bi_shd_tok = 0;
5393	ip->bi_shdr_tok = 0;
5394	if (!NSHADOWS(ip)) {
5395		ip->bi_mst_tok = 0;
5396		ip->bi_mstr_tok = 0;
5397	}
5398	ip->bi_bmp_tok = 0;
5399
5400	/* Wait for any _ii_open() calls to complete */
5401
5402	while (ip->bi_ioctl) {
5403		ip->bi_state |= DSW_IOCTL;
5404		cv_wait(&ip->bi_ioctlcv, &ip->bi_mutex);
5405	}
5406	mutex_exit(&ip->bi_mutex);
5407
5408	rc = _ii_reserve_begin(ip);
5409	if (rc) {
5410		cmn_err(CE_WARN, "!_ii_stopvol: _ii_reserve_begin %d", rc);
5411	}
5412	if (!NSHADOWS(ip)) {
5413		if (mst_tok) {
5414			rc = _ii_unregister_path(mst_tok, NSC_PCATCH,
5415			    "master");
5416			if (rc)
5417				cmn_err(CE_WARN, "!ii: unregister master %d",
5418				    rc);
5419		}
5420
5421		if (mstr_tok) {
5422			rc = _ii_unregister_path(mstr_tok, NSC_PCATCH,
5423			    "raw master");
5424			if (rc)
5425				cmn_err(CE_WARN, "!ii: unregister raw "
5426				    "master %d", rc);
5427		}
5428	}
5429
5430	if (shd_tok) {
5431		rc = _ii_unregister_path(shd_tok, NSC_PCATCH, "shadow");
5432		if (rc)
5433			cmn_err(CE_WARN, "!ii: unregister shadow %d", rc);
5434	}
5435
5436	if (shdr_tok) {
5437		rc = _ii_unregister_path(shdr_tok, NSC_PCATCH, "raw shadow");
5438		if (rc)
5439			cmn_err(CE_WARN, "!ii: unregister raw shadow %d", rc);
5440	}
5441
5442	if (bmp_tok) {
5443		rc = _ii_unregister_path(bmp_tok, NSC_PCATCH, "bitmap");
5444		if (rc)
5445			cmn_err(CE_WARN, "!ii: unregister bitmap %d", rc);
5446	}
5447	_ii_reserve_end(ip);
5448
5449	/* Wait for all necessary _ii_close() calls to complete */
5450	mutex_enter(&ip->bi_mutex);
5451
5452	while (total_ref(ip) != 0) {
5453		ip->bi_state |= DSW_CLOSING;
5454		cv_wait(&ip->bi_closingcv, &ip->bi_mutex);
5455	}
5456	if (!NSHADOWS(ip)) {
5457		nsc_set_owner(ip->bi_mstfd, NULL);
5458		nsc_set_owner(ip->bi_mstrfd, NULL);
5459	}
5460	nsc_set_owner(ip->bi_shdfd, NULL);
5461	nsc_set_owner(ip->bi_shdrfd, NULL);
5462	mutex_exit(&ip->bi_mutex);
5463
5464}
5465
5466
5467/*
5468 * _ii_ioctl_done
5469 *	If this is the last one to complete, wakeup all processes waiting
5470 *	for ioctls to complete
5471 *
5472 */
5473
5474static void
5475_ii_ioctl_done(_ii_info_t *ip)
5476{
5477	ASSERT(ip->bi_ioctl > 0);
5478	ip->bi_ioctl--;
5479	if (ip->bi_ioctl == 0 && (ip->bi_state & DSW_IOCTL)) {
5480		ip->bi_state &= ~DSW_IOCTL;
5481		cv_broadcast(&ip->bi_ioctlcv);
5482	}
5483
5484}
5485
5486/*
5487 * _ii_find_vol
5488 *	Search the configured shadows list for the supplied volume.
5489 *	If found, flag an ioctl in progress and return the locked _ii_info_t.
5490 *
5491 *	The caller must check to see if the bi_disable flag is set and
5492 *	treat it appropriately.
5493 *
5494 * ASSUMPTION:
5495 *	_ii_info_mutex must be locked prior to calling this function
5496 *
5497 */
5498
5499static _ii_info_t *
5500_ii_find_vol(char *volume, int vol)
5501{
5502	_ii_info_t **xip, *ip;
5503
5504	for (xip = &_ii_info_top; *xip; xip = &(*xip)->bi_next) {
5505		if ((*xip)->bi_disabled)
5506			continue;
5507		if (strcmp(volume, vol == MST ? ii_pathname((*xip)->bi_mstfd) :
5508		    (*xip)->bi_keyname) == 0) {
5509			break;
5510		}
5511	}
5512
5513	if (!*xip) {
5514		DTRACE_PROBE(VolNotFound);
5515		return (NULL);
5516	}
5517
5518	ip = *xip;
5519	if (!ip->bi_shd_tok && ((ip->bi_flags & DSW_SHDEXPORT) == 0)) {
5520		/* Not fully configured until bi_shd_tok is set */
5521		DTRACE_PROBE(SetNotConfiged);
5522		return (NULL);
5523
5524	}
5525	mutex_enter(&ip->bi_mutex);
5526	ip->bi_ioctl++;
5527
5528	return (ip);
5529}
5530
5531static _ii_info_t *
5532_ii_find_set(char *volume)
5533{
5534	return (_ii_find_vol(volume, SHD));
5535}
5536
5537/*
5538 * _ii_find_overflow
5539 *	Search the configured shadows list for the supplied overflow volume.
5540 *
5541 */
5542
5543static _ii_overflow_t *
5544_ii_find_overflow(char *volume)
5545{
5546	_ii_overflow_t **xop, *op;
5547
5548	mutex_enter(&_ii_overflow_mutex);
5549
5550	DTRACE_PROBE(_ii_find_overflowmutex);
5551
5552	for (xop = &_ii_overflow_top; *xop; xop = &(*xop)->ii_next) {
5553		if (strcmp(volume, (*xop)->ii_volname) == 0) {
5554			break;
5555		}
5556	}
5557
5558	if (!*xop) {
5559		mutex_exit(&_ii_overflow_mutex);
5560		return (NULL);
5561	}
5562
5563	op = *xop;
5564	mutex_exit(&_ii_overflow_mutex);
5565
5566	return (op);
5567}
5568
5569/*
5570 * _ii_bm_header_get
5571 *	Fetch the bitmap volume header
5572 *
5573 */
5574
5575ii_header_t *
5576_ii_bm_header_get(_ii_info_t *ip, nsc_buf_t **tmp)
5577{
5578	ii_header_t *hdr;
5579	nsc_off_t read_fba;
5580	int rc;
5581
5582	ASSERT(ip->bi_bmprsrv);		/* assert bitmap is reserved */
5583	ASSERT(MUTEX_HELD(&ip->bi_mutex));
5584
5585	if ((ip->bi_flags & DSW_BMPOFFLINE) != 0)
5586		return (NULL);
5587
5588	*tmp = NULL;
5589	read_fba = 0;
5590
5591	II_READ_START(ip, bitmap);
5592	rc = nsc_alloc_buf(ip->bi_bmpfd, read_fba,
5593	    FBA_LEN(sizeof (ii_header_t)), NSC_RDWRBUF, tmp);
5594	II_READ_END(ip, bitmap, rc, FBA_LEN(sizeof (ii_header_t)));
5595	if (!II_SUCCESS(rc)) {
5596		if (ii_debug > 2)
5597			cmn_err(CE_WARN, "!ii: nsc_alloc_buf returned 0x%x",
5598			    rc);
5599		if (*tmp)
5600			(void) nsc_free_buf(*tmp);
5601		*tmp = NULL;
5602		mutex_exit(&ip->bi_mutex);
5603		_ii_error(ip, DSW_BMPOFFLINE);
5604		mutex_enter(&ip->bi_mutex);
5605		return (NULL);
5606	}
5607
5608	hdr = (ii_header_t *)(*tmp)->sb_vec[0].sv_addr;
5609
5610	return (hdr);
5611}
5612
5613
5614/*
5615 * _ii_bm_header_free
5616 *	Free the bitmap volume header
5617 *
5618 */
5619
5620/* ARGSUSED */
5621
5622void
5623_ii_bm_header_free(ii_header_t *hdr, _ii_info_t *ip, nsc_buf_t *tmp)
5624{
5625	(void) nsc_free_buf(tmp);
5626
5627}
5628
5629/*
5630 * _ii_bm_header_put
5631 *	Write out the modified bitmap volume header and free it
5632 *
5633 */
5634
5635/* ARGSUSED */
5636
5637int
5638_ii_bm_header_put(ii_header_t *hdr, _ii_info_t *ip, nsc_buf_t *tmp)
5639{
5640	nsc_off_t write_fba;
5641	int rc;
5642
5643	ASSERT(MUTEX_HELD(&ip->bi_mutex));
5644
5645	write_fba = 0;
5646
5647	II_NSC_WRITE(ip, bitmap, rc, tmp, write_fba,
5648	    FBA_LEN(sizeof (ii_header_t)), 0);
5649
5650	(void) nsc_free_buf(tmp);
5651	if (!II_SUCCESS(rc)) {
5652		mutex_exit(&ip->bi_mutex);
5653		_ii_error(ip, DSW_BMPOFFLINE);
5654		mutex_enter(&ip->bi_mutex);
5655		DTRACE_PROBE(_ii_bm_header_put);
5656		return (rc);
5657	} else {
5658		DTRACE_PROBE(_ii_bm_header_put_end);
5659		return (0);
5660	}
5661}
5662
5663/*
5664 * _ii_flag_op
5665 *	Clear or set a flag in bi_flags and dsw_state.
5666 *	This relies on the ownership of the header block's nsc_buf
5667 *	for locking.
5668 *
5669 */
5670
5671void
5672_ii_flag_op(and, or, ip, update)
5673int	and, or;
5674_ii_info_t *ip;
5675int update;
5676{
5677	ii_header_t *bm_header;
5678	nsc_buf_t *tmp;
5679
5680	ip->bi_flags &= and;
5681	ip->bi_flags |= or;
5682
5683	if (update == TRUE) {
5684
5685		/*
5686		 * No point trying to access bitmap header if it's offline
5687		 * or has been disassociated from set via DSW_HANGING
5688		 */
5689		if ((ip->bi_flags & (DSW_BMPOFFLINE|DSW_HANGING)) == 0) {
5690			bm_header = _ii_bm_header_get(ip, &tmp);
5691			if (bm_header == NULL) {
5692				if (tmp)
5693					(void) nsc_free_buf(tmp);
5694				DTRACE_PROBE(_ii_flag_op_end);
5695				return;
5696			}
5697			bm_header->ii_state &= and;
5698			bm_header->ii_state |= or;
5699			/* copy over the mtime */
5700			bm_header->ii_mtime = ip->bi_mtime;
5701			(void) _ii_bm_header_put(bm_header, ip, tmp);
5702		}
5703	}
5704
5705}
5706
5707/*
5708 * _ii_nsc_io
5709 *	Perform read or write on an underlying nsc device
5710 * fd		- nsc file descriptor
5711 * flag		- nsc io direction and characteristics flag
5712 * fba_pos	- offset from beginning of device in FBAs
5713 * io_addr	- pointer to data buffer
5714 * io_len	- length of io in bytes
5715 */
5716
5717int
5718_ii_nsc_io(_ii_info_t *ip, int ks, nsc_fd_t *fd, int flag, nsc_off_t fba_pos,
5719    unsigned char *io_addr, nsc_size_t io_len)
5720{
5721	nsc_buf_t *tmp = NULL;
5722	nsc_vec_t *vecp;
5723	uchar_t	*vaddr;
5724	size_t	copy_len;
5725	int64_t	vlen;
5726	int	rc;
5727	nsc_size_t	fba_req, fba_len;
5728	nsc_size_t	maxfbas = 0;
5729	nsc_size_t	tocopy;
5730	unsigned char *toaddr;
5731
5732	rc = nsc_maxfbas(fd, 0, &maxfbas);
5733	if (!II_SUCCESS(rc)) {
5734#ifdef DEBUG
5735		cmn_err(CE_WARN, "!_ii_nsc_io: maxfbas failed (%d)", rc);
5736#endif
5737		maxfbas = DSW_CBLK_FBA;
5738	}
5739
5740	toaddr = io_addr;
5741	fba_req = FBA_LEN(io_len);
5742
5743#ifdef DEBUG_SPLIT_IO
5744	cmn_err(CE_NOTE, "!_ii_nsc_io: maxfbas = %08x", maxfbas);
5745	cmn_err(CE_NOTE, "!_ii_nsc_io: toaddr=%08x, io_len=%08x, fba_req=%08x",
5746	    toaddr, io_len, fba_req);
5747#endif
5748
5749loop:
5750	tmp = NULL;
5751	fba_len = min(fba_req, maxfbas);
5752	tocopy = min(io_len, FBA_SIZE(fba_len));
5753
5754	DTRACE_PROBE2(_ii_nsc_io_buffer, nsc_off_t, fba_pos,
5755	    nsc_size_t, fba_len);
5756
5757#ifdef DEBUG_SPLIT_IO
5758	cmn_err(CE_NOTE, "!_ii_nsc_io: fba_pos=%08x, fba_len=%08x",
5759	    fba_pos, fba_len);
5760#endif
5761
5762#ifndef DISABLE_KSTATS
5763	if (flag & NSC_READ) {
5764		switch (ks) {
5765		case KS_MST:
5766			II_READ_START(ip, master);
5767			break;
5768		case KS_SHD:
5769			II_READ_START(ip, shadow);
5770			break;
5771		case KS_BMP:
5772			II_READ_START(ip, bitmap);
5773			break;
5774		case KS_OVR:
5775			II_READ_START(ip, overflow);
5776			break;
5777		default:
5778			cmn_err(CE_WARN, "!Invalid kstats type %d", ks);
5779			break;
5780		}
5781	}
5782#endif
5783
5784	rc = nsc_alloc_buf(fd, fba_pos, fba_len, flag, &tmp);
5785
5786#ifndef DISABLE_KSTATS
5787	if (flag & NSC_READ) {
5788		switch (ks) {
5789		case KS_MST:
5790			II_READ_END(ip, master, rc, fba_len);
5791			break;
5792		case KS_SHD:
5793			II_READ_END(ip, shadow, rc, fba_len);
5794			break;
5795		case KS_BMP:
5796			II_READ_END(ip, bitmap, rc, fba_len);
5797			break;
5798		case KS_OVR:
5799			II_READ_END(ip, overflow, rc, fba_len);
5800			break;
5801		}
5802	}
5803#endif
5804
5805	if (!II_SUCCESS(rc)) {
5806		if (tmp) {
5807			(void) nsc_free_buf(tmp);
5808		}
5809
5810		return (EIO);
5811	}
5812
5813	if ((flag & (NSC_WRITE|NSC_READ)) == NSC_WRITE &&
5814	    (FBA_OFF(io_len) != 0)) {
5815		/*
5816		 * Not overwriting all of the last FBA, so read in the
5817		 * old contents now before we overwrite it with the new
5818		 * data.
5819		 */
5820#ifdef DEBUG_SPLIT_IO
5821		cmn_err(CE_NOTE, "!_ii_nsc_io: Read-B4-Write %08x",
5822		    fba_pos+FBA_NUM(io_len));
5823#endif
5824
5825#ifdef DISABLE_KSTATS
5826		rc = nsc_read(tmp, fba_pos+FBA_NUM(io_len), 1, 0);
5827#else
5828		switch (ks) {
5829		case KS_MST:
5830			II_NSC_READ(ip, master, rc, tmp,
5831			    fba_pos+FBA_NUM(io_len), 1, 0);
5832			break;
5833		case KS_SHD:
5834			II_NSC_READ(ip, shadow, rc, tmp,
5835			    fba_pos+FBA_NUM(io_len), 1, 0);
5836			break;
5837		case KS_BMP:
5838			II_NSC_READ(ip, bitmap, rc, tmp,
5839			    fba_pos+FBA_NUM(io_len), 1, 0);
5840			break;
5841		case KS_OVR:
5842			II_NSC_READ(ip, overflow, rc, tmp,
5843			    fba_pos+FBA_NUM(io_len), 1, 0);
5844			break;
5845		case KS_NA:
5846			rc = nsc_read(tmp, fba_pos+FBA_NUM(io_len), 1, 0);
5847			break;
5848		default:
5849			cmn_err(CE_WARN, "!Invalid kstats type %d", ks);
5850			rc = nsc_read(tmp, fba_pos+FBA_NUM(io_len), 1, 0);
5851			break;
5852		}
5853#endif
5854		if (!II_SUCCESS(rc)) {
5855			(void) nsc_free_buf(tmp);
5856			return (EIO);
5857		}
5858	}
5859
5860	vecp = tmp->sb_vec;
5861	vlen = vecp->sv_len;
5862	vaddr = vecp->sv_addr;
5863
5864	while (tocopy > 0) {
5865		if (vecp->sv_addr == 0 || vecp->sv_len == 0) {
5866#ifdef DEBUG
5867			cmn_err(CE_WARN, "!_ii_nsc_io: ran off end of handle");
5868#endif
5869			break;
5870		}
5871
5872		copy_len = (size_t)min(vlen, tocopy);
5873
5874		DTRACE_PROBE1(_ii_nsc_io_bcopy, size_t, copy_len);
5875
5876		if (flag & NSC_WRITE)
5877			bcopy(io_addr, vaddr, copy_len);
5878		else
5879			bcopy(vaddr, io_addr, copy_len);
5880
5881		toaddr += copy_len;
5882		tocopy -= copy_len;
5883		io_addr += copy_len;
5884		io_len -= copy_len;
5885		vaddr += copy_len;
5886		vlen -= copy_len;
5887
5888		if (vlen <= 0) {
5889			vecp++;
5890			vaddr = vecp->sv_addr;
5891			vlen = vecp->sv_len;
5892		}
5893	}
5894
5895	if (flag & NSC_WRITE) {
5896#ifdef DISABLE_KSTATS
5897		rc = nsc_write(tmp, tmp->sb_pos, tmp->sb_len, 0);
5898#else
5899		switch (ks) {
5900		case KS_MST:
5901			II_NSC_WRITE(ip, master, rc, tmp, tmp->sb_pos,
5902			    tmp->sb_len, 0);
5903			break;
5904		case KS_SHD:
5905			II_NSC_WRITE(ip, shadow, rc, tmp, tmp->sb_pos,
5906			    tmp->sb_len, 0);
5907			break;
5908		case KS_BMP:
5909			II_NSC_WRITE(ip, bitmap, rc, tmp, tmp->sb_pos,
5910			    tmp->sb_len, 0);
5911			break;
5912		case KS_OVR:
5913			II_NSC_WRITE(ip, overflow, rc, tmp, tmp->sb_pos,
5914			    tmp->sb_len, 0);
5915			break;
5916		case KS_NA:
5917			rc = nsc_write(tmp, tmp->sb_pos, tmp->sb_len, 0);
5918			break;
5919		default:
5920			cmn_err(CE_WARN, "!Invalid kstats type %d", ks);
5921			rc = nsc_write(tmp, tmp->sb_pos, tmp->sb_len, 0);
5922			break;
5923		}
5924#endif
5925		if (!II_SUCCESS(rc)) {
5926			(void) nsc_free_buf(tmp);
5927			return (rc);
5928		}
5929	}
5930
5931	(void) nsc_free_buf(tmp);
5932
5933	fba_pos += fba_len;
5934	fba_req -= fba_len;
5935	if (fba_req > 0)
5936		goto loop;
5937
5938	return (0);
5939}
5940
5941
5942/*
5943 * ii_overflow_attach
5944 */
5945static int
5946ii_overflow_attach(_ii_info_t *ip, char *name, int first)
5947{
5948	_ii_overflow_t *op;
5949	int rc = 0;
5950	int reserved = 0;
5951	int mutex_set = 0;
5952	int II_OLD_OMAGIC = 0x426c7565; /* "Blue" */
5953
5954	mutex_enter(&_ii_overflow_mutex);
5955	/* search for name in list */
5956	for (op = _ii_overflow_top; op; op = op->ii_next) {
5957		if (strncmp(op->ii_volname, name, DSW_NAMELEN) == 0)
5958			break;
5959	}
5960	if (op) {
5961		ip->bi_overflow = op;
5962		op->ii_crefcnt++;
5963		op->ii_drefcnt++;
5964		if ((op->ii_flags & IIO_CNTR_INVLD) && (op->ii_hversion >= 1)) {
5965			if (!first)
5966				mutex_enter(&ip->bi_mutex);
5967			ip->bi_flags |= DSW_OVRHDRDRTY;
5968			if (!first)
5969				mutex_exit(&ip->bi_mutex);
5970			op->ii_urefcnt++;
5971		}
5972#ifndef DISABLE_KSTATS
5973		ip->bi_kstat_io.overflow = op->ii_overflow;
5974		(void) strlcpy(ip->bi_kstat_io.ovrio, op->ii_ioname,
5975		    KSTAT_DATA_CHAR_LEN);
5976#endif
5977		/* write header */
5978		if (!(rc = nsc_reserve(op->ii_dev->bi_fd, NSC_MULTI))) {
5979			rc = _ii_nsc_io(ip, KS_OVR, op->ii_dev->bi_fd,
5980			    NSC_WRBUF, II_OHEADER_FBA,
5981			    (unsigned char *)&op->ii_do, sizeof (op->ii_do));
5982			(void) nsc_release(op->ii_dev->bi_fd);
5983			++iigkstat.assoc_over.value.ul;
5984		}
5985		mutex_exit(&_ii_overflow_mutex);
5986		return (rc);
5987	}
5988	if ((op = kmem_zalloc(sizeof (*op), KM_SLEEP)) == NULL) {
5989		mutex_exit(&_ii_overflow_mutex);
5990		return (ENOMEM);
5991	}
5992	if ((op->ii_dev = kmem_zalloc(sizeof (_ii_info_dev_t), KM_SLEEP))
5993	    == NULL) {
5994		kmem_free(op, sizeof (*op));
5995		mutex_exit(&_ii_overflow_mutex);
5996		return (ENOMEM);
5997	}
5998#ifndef DISABLE_KSTATS
5999	if ((op->ii_overflow = _ii_overflow_kstat_create(ip, op))) {
6000		ip->bi_kstat_io.overflow = op->ii_overflow;
6001		(void) strlcpy(op->ii_ioname, ip->bi_kstat_io.ovrio,
6002		    KSTAT_DATA_CHAR_LEN);
6003	} else {
6004		goto fail;
6005	}
6006#endif
6007	/* open overflow volume */
6008	op->ii_dev->bi_fd = nsc_open(name, NSC_IIR_ID|NSC_FILE|NSC_RDWR, NULL,
6009	    (blind_t)&(op->ii_dev->bi_iodev), &rc);
6010	if (!op->ii_dev->bi_fd)
6011		op->ii_dev->bi_fd = nsc_open(name,
6012		    NSC_IIR_ID|NSC_DEVICE|NSC_RDWR, NULL,
6013		    (blind_t)&(op->ii_dev->bi_iodev), &rc);
6014	if (op->ii_dev->bi_fd == NULL) {
6015		goto fail;
6016	}
6017	if ((rc = nsc_reserve(op->ii_dev->bi_fd, 0)) != 0)
6018		goto fail;
6019	reserved = 1;
6020	/* register path */
6021	op->ii_dev->bi_tok = _ii_register_path(name, NSC_DEVICE,
6022	    _ii_ior);
6023	if (!op->ii_dev->bi_tok) {
6024		goto fail;
6025	}
6026	/* read header */
6027	rc = _ii_nsc_io(ip, KS_OVR, op->ii_dev->bi_fd, NSC_RDBUF,
6028	    II_OHEADER_FBA, (unsigned char *)&op->ii_do, sizeof (op->ii_do));
6029	if (!II_SUCCESS(rc)) {
6030		_ii_error(ip, DSW_OVROFFLINE);
6031		goto fail;
6032	}
6033	/* On resume, check for old hmagic */
6034	if (strncmp(op->ii_volname, name, DSW_NAMELEN) ||
6035	    ((op->ii_hmagic != II_OLD_OMAGIC) &&
6036	    (op->ii_hmagic != II_OMAGIC))) {
6037		rc = DSW_EOMAGIC;
6038		goto fail;
6039	}
6040	/* set up counts */
6041	op->ii_crefcnt = 1;
6042	op->ii_drefcnt = 0;
6043	op->ii_urefcnt = 0;
6044	op->ii_hmagic = II_OMAGIC;
6045	if (!first) {
6046		/* if header version > 0, check if header written */
6047		if (((op->ii_flags & IIO_HDR_WRTN) == 0) &&
6048		    (op->ii_hversion >= 1)) {
6049			op->ii_flags |= IIO_CNTR_INVLD;
6050			mutex_enter(&ip->bi_mutex);
6051			ip->bi_flags |= DSW_OVRHDRDRTY;
6052			mutex_exit(&ip->bi_mutex);
6053			op->ii_urefcnt++;
6054		}
6055	}
6056	op->ii_flags &= ~IIO_HDR_WRTN;
6057	op->ii_drefcnt++;
6058	/* write header */
6059	rc = _ii_nsc_io(ip, KS_OVR, op->ii_dev->bi_fd, NSC_WRBUF,
6060	    II_OHEADER_FBA, (unsigned char *)&op->ii_do, sizeof (op->ii_do));
6061	nsc_release(op->ii_dev->bi_fd);
6062	reserved = 0;
6063	if (!II_SUCCESS(rc)) {
6064		_ii_error(ip, DSW_OVROFFLINE);
6065		goto fail;
6066	}
6067
6068	mutex_init(&op->ii_mutex, NULL, MUTEX_DRIVER, NULL);
6069	mutex_set++;
6070
6071	/* link onto list */
6072	op->ii_next = _ii_overflow_top;
6073	_ii_overflow_top = op;
6074	ip->bi_overflow = op;
6075
6076	++iigkstat.assoc_over.value.ul;
6077	mutex_exit(&_ii_overflow_mutex);
6078
6079	DTRACE_PROBE(_ii_overflow_attach_end);
6080	return (0);
6081fail:
6082#ifndef DISABLE_KSTATS
6083	/* Clean-up kstat stuff */
6084	if (op->ii_overflow) {
6085		kstat_delete(op->ii_overflow);
6086		mutex_destroy(&op->ii_kstat_mutex);
6087	}
6088#endif
6089	/* clean up mutex if we made it that far */
6090	if (mutex_set) {
6091		mutex_destroy(&op->ii_mutex);
6092	}
6093
6094	if (op->ii_dev) {
6095		if (op->ii_dev->bi_tok) {
6096			(void) _ii_unregister_path(op->ii_dev->bi_tok, 0,
6097			    "overflow");
6098		}
6099		if (reserved)
6100			(void) nsc_release(op->ii_dev->bi_fd);
6101		if (op->ii_dev->bi_fd)
6102			(void) nsc_close(op->ii_dev->bi_fd);
6103		kmem_free(op->ii_dev, sizeof (_ii_info_dev_t));
6104	}
6105	kmem_free(op, sizeof (*op));
6106	mutex_exit(&_ii_overflow_mutex);
6107
6108	return (rc);
6109}
6110
6111/*
6112 * ii_overflow_free
6113 * Assumes that ip is locked for I/O
6114 */
6115static void
6116ii_overflow_free(_ii_info_t *ip, int reclaim)
6117{
6118	_ii_overflow_t *op, **xp;
6119
6120	if ((op = ip->bi_overflow) == NULL)
6121		return;
6122	ip->bi_kstat_io.overflow = NULL;
6123	mutex_enter(&_ii_overflow_mutex);
6124	switch (reclaim) {
6125	case NO_RECLAIM:
6126		if (--(op->ii_drefcnt) == 0) {
6127			/* indicate header written */
6128			op->ii_flags |= IIO_HDR_WRTN;
6129			/* write out header */
6130			ASSERT(op->ii_dev->bi_fd);
6131			(void) nsc_reserve(op->ii_dev->bi_fd, NSC_MULTI);
6132			(void) _ii_nsc_io(ip, KS_OVR, op->ii_dev->bi_fd,
6133			    NSC_WRBUF, II_OHEADER_FBA,
6134			    (unsigned char *)&op->ii_do,
6135			    sizeof (op->ii_do));
6136			nsc_release(op->ii_dev->bi_fd);
6137		}
6138		break;
6139	case RECLAIM:
6140		ii_reclaim_overflow(ip);
6141		/* FALLTHRU */
6142	case INIT_OVR:
6143		if (--(op->ii_drefcnt) == 0) {
6144			/* reset to new condition, c.f. _ii_ocreate() */
6145			op->ii_used = 1;
6146			op->ii_unused = op->ii_nchunks - op->ii_used;
6147			op->ii_freehead = II_NULLNODE;
6148		}
6149
6150		/* write out header */
6151		ASSERT(op->ii_dev->bi_fd);
6152		(void) nsc_reserve(op->ii_dev->bi_fd, NSC_MULTI);
6153		(void) _ii_nsc_io(ip, KS_OVR, op->ii_dev->bi_fd, NSC_WRBUF,
6154		    II_OHEADER_FBA, (unsigned char *)&op->ii_do,
6155		    sizeof (op->ii_do));
6156		nsc_release(op->ii_dev->bi_fd);
6157	}
6158
6159	if (--(op->ii_crefcnt) == 0) {
6160		/* Close fd and unlink from active chain; */
6161
6162		(void) _ii_unregister_path(op->ii_dev->bi_tok, 0, "overflow");
6163		(void) nsc_close(op->ii_dev->bi_fd);
6164
6165		for (xp = &_ii_overflow_top; *xp && *xp != op;
6166		    xp = &((*xp)->ii_next))
6167			/* NULL statement */;
6168		*xp = op->ii_next;
6169
6170		if (op->ii_overflow) {
6171			kstat_delete(op->ii_overflow);
6172		}
6173
6174		/* Clean up ii_overflow_t mutexs */
6175		mutex_destroy(&op->ii_kstat_mutex);
6176		mutex_destroy(&op->ii_mutex);
6177
6178		if (op->ii_dev)
6179			kmem_free(op->ii_dev, sizeof (_ii_info_dev_t));
6180		kmem_free(op, sizeof (*op));
6181	}
6182	ip->bi_overflow = NULL;
6183	--iigkstat.assoc_over.value.ul;
6184	mutex_exit(&_ii_overflow_mutex);
6185
6186}
6187
6188/*
6189 * ii_sibling_free
6190 *	Free resources and unlink the sibling chains etc.
6191 */
6192
6193static void
6194ii_sibling_free(_ii_info_t *ip)
6195{
6196	_ii_info_t *hip, *yip;
6197
6198	if (!ip)
6199		return;
6200
6201	if (ip->bi_shdr_tok)
6202		(void) _ii_unregister_path(ip->bi_shdr_tok, 0, "raw shadow");
6203
6204	if (ip->bi_shd_tok)
6205		(void) _ii_unregister_path(ip->bi_shd_tok, 0, "shadow");
6206
6207	rw_enter(&ip->bi_linkrw, RW_WRITER);
6208
6209	ip->bi_shd_tok = NULL;
6210	ip->bi_shdr_tok = NULL;
6211
6212	if (NSHADOWS(ip)) {
6213		mutex_enter(&_ii_info_mutex);
6214		if (ip->bi_head == ip) {	/* removing head of list */
6215			hip = ip->bi_sibling;
6216			for (yip = hip; yip; yip = yip->bi_sibling)
6217				yip->bi_head = hip;
6218
6219		} else {		/* removing member of list */
6220			hip = ip->bi_head;
6221			for (yip = ip->bi_head; yip; yip = yip->bi_sibling) {
6222				if (yip->bi_sibling == ip) {
6223					yip->bi_sibling = ip->bi_sibling;
6224					break;
6225				}
6226			}
6227		}
6228		hip->bi_master->bi_head = hip;
6229		if (ip->bi_master == ip) {    /* master I/O goes through this */
6230			mutex_exit(&_ii_info_mutex);
6231			_ii_info_freeshd(ip);
6232			rw_exit(&ip->bi_linkrw);
6233			return;
6234		}
6235		mutex_exit(&_ii_info_mutex);
6236	} else {
6237		if (ip->bi_master != ip)	/* last ref to master side ip */
6238			_ii_info_free(ip->bi_master);	/* ==A== */
6239	}
6240
6241	if (ip->bi_master != ip) {	/* info_free ==A== will close these */
6242		/*
6243		 * Null out any pointers to shared master side resources
6244		 * that should only be freed once when the last reference
6245		 * to this master is freed and calls _ii_info_free().
6246		 */
6247		ip->bi_mstdev = NULL;
6248		ip->bi_mstrdev = NULL;
6249		ip->bi_kstat_io.master = NULL;
6250	}
6251	rw_exit(&ip->bi_linkrw);
6252	_ii_info_free(ip);
6253
6254}
6255
6256/*
6257 * _ii_info_freeshd
6258 *	Free shadow side resources
6259 *
6260 * Calling/Exit State:
6261 *	No mutexes should be held on entry to this function.
6262 *
6263 * Description:
6264 *	Frees the system resources associated with the shadow
6265 *	access, leaving the master side alone. This allows the
6266 *	original master side to continue in use while there are
6267 *	outstanding references to this _ii_info_t.
6268 */
6269
6270static void
6271_ii_info_freeshd(_ii_info_t *ip)
6272{
6273	if (!ip)
6274		return;
6275	if ((ip->bi_flags&DSW_HANGING) == DSW_HANGING)
6276		return;		/* this work has already been completed */
6277
6278	II_FLAG_SETX(DSW_HANGING, ip);
6279
6280	if (ip->bi_cluster)
6281		(void) II_UNLINK_CLUSTER(ip);
6282	if (ip->bi_group)
6283		(void) II_UNLINK_GROUP(ip);
6284
6285	if (ip->bi_shdfd && ip->bi_shdrsrv)
6286		nsc_release(ip->bi_shdfd);
6287	if (ip->bi_shdrfd && ip->bi_shdrrsrv)
6288		nsc_release(ip->bi_shdrfd);
6289	if (ip->bi_bmpfd && ip->bi_bmprsrv)
6290		nsc_release(ip->bi_bmpfd);
6291
6292	if (ip->bi_bmp_tok)
6293		(void) _ii_unregister_path(ip->bi_bmp_tok, 0, "bitmap");
6294
6295	if (ip->bi_shdr_tok)
6296		(void) _ii_unregister_path(ip->bi_shdr_tok, 0, "raw shadow");
6297
6298	if (ip->bi_shd_tok)
6299		(void) _ii_unregister_path(ip->bi_shd_tok, 0, "shadow");
6300	ip->bi_shd_tok = NULL;
6301	ip->bi_shdr_tok = NULL;
6302
6303	if (ip->bi_shdfd)
6304		(void) nsc_close(ip->bi_shdfd);
6305
6306	if (ip->bi_shdrfd)
6307		(void) nsc_close(ip->bi_shdrfd);
6308
6309	if (ip->bi_bmpfd)
6310		(void) nsc_close(ip->bi_bmpfd);
6311
6312	ip->bi_shdfd = NULL;
6313	ip->bi_shdrfd = NULL;
6314	ip->bi_bmpfd = NULL;
6315
6316	if (ip->bi_busy)
6317		kmem_free(ip->bi_busy,
6318		    1 + (ip->bi_size / (DSW_SIZE * DSW_BITS)));
6319	ip->bi_busy = NULL;
6320
6321	if (ip->bi_kstat_io.shadow) {
6322		kstat_delete(ip->bi_kstat_io.shadow);
6323		ip->bi_kstat_io.shadow = NULL;
6324	}
6325	if (ip->bi_kstat_io.bitmap) {
6326		kstat_delete(ip->bi_kstat_io.bitmap);
6327		ip->bi_kstat_io.bitmap = NULL;
6328	}
6329	if (ip->bi_kstat) {
6330		kstat_delete(ip->bi_kstat);
6331		ip->bi_kstat = NULL;
6332	}
6333
6334}
6335
6336/*
6337 * _ii_info_free
6338 *	Free resources
6339 *
6340 * Calling/Exit State:
6341 *	No mutexes should be held on entry to this function.
6342 *
6343 * Description:
6344 *	Frees the system resources associated with the specified
6345 *	II information structure.
6346 */
6347
6348static void
6349_ii_info_free(_ii_info_t *ip)
6350{
6351	_ii_info_t **xip;
6352
6353	if (!ip)
6354		return;
6355
6356	mutex_enter(&_ii_info_mutex);
6357	for (xip = &_ii_mst_top; *xip; xip = &((*xip)->bi_nextmst)) {
6358		if (ip == *xip) {
6359			*xip = ip->bi_nextmst;
6360			break;
6361		}
6362	}
6363	mutex_exit(&_ii_info_mutex);
6364
6365	/* this rw_enter forces us to wait until all nsc_buffers are freed */
6366	rw_enter(&ip->bi_linkrw, RW_WRITER);
6367	if (ip->bi_mstdev && ip->bi_mstfd && ip->bi_mstrsrv)
6368		nsc_release(ip->bi_mstfd);
6369	if (ip->bi_mstrdev && ip->bi_mstrfd && ip->bi_mstrrsrv)
6370		nsc_release(ip->bi_mstrfd);
6371
6372	if (ip->bi_mstdev && ip->bi_mst_tok)
6373		(void) _ii_unregister_path(ip->bi_mst_tok, 0, "master");
6374	if (ip->bi_mstrdev && ip->bi_mstr_tok)
6375		(void) _ii_unregister_path(ip->bi_mstr_tok, 0, "raw master");
6376
6377	if (ip->bi_mstdev && ip->bi_mstfd)
6378		(void) nsc_close(ip->bi_mstfd);
6379	if (ip->bi_mstrdev && ip->bi_mstrfd)
6380		(void) nsc_close(ip->bi_mstrfd);
6381	rw_exit(&ip->bi_linkrw);
6382
6383	if (ip->bi_mstdev) {
6384		nsc_kmem_free(ip->bi_mstdev, sizeof (*ip->bi_mstdev));
6385	}
6386	if (ip->bi_mstrdev) {
6387		nsc_kmem_free(ip->bi_mstrdev, sizeof (*ip->bi_mstrdev));
6388	}
6389
6390	if (ip->bi_kstat_io.master) {
6391		kstat_delete(ip->bi_kstat_io.master);
6392	}
6393	if (ip->bi_kstat_io.shadow) {
6394		kstat_delete(ip->bi_kstat_io.shadow);
6395		ip->bi_kstat_io.shadow = 0;
6396	}
6397	if (ip->bi_kstat_io.bitmap) {
6398		kstat_delete(ip->bi_kstat_io.bitmap);
6399		ip->bi_kstat_io.bitmap = 0;
6400	}
6401	if (ip->bi_kstat) {
6402		kstat_delete(ip->bi_kstat);
6403		ip->bi_kstat = NULL;
6404	}
6405
6406	/* this rw_enter forces us to wait until all nsc_buffers are freed */
6407	rw_enter(&ip->bi_linkrw, RW_WRITER);
6408	rw_exit(&ip->bi_linkrw);
6409
6410	mutex_destroy(&ip->bi_mutex);
6411	mutex_destroy(&ip->bi_rsrvmutex);
6412	mutex_destroy(&ip->bi_rlsemutex);
6413	mutex_destroy(&ip->bi_bmpmutex);
6414	mutex_destroy(&ip->bi_chksmutex);
6415	cv_destroy(&ip->bi_copydonecv);
6416	cv_destroy(&ip->bi_reservecv);
6417	cv_destroy(&ip->bi_releasecv);
6418	cv_destroy(&ip->bi_ioctlcv);
6419	cv_destroy(&ip->bi_closingcv);
6420	cv_destroy(&ip->bi_busycv);
6421	rw_destroy(&ip->bi_busyrw);
6422	rw_destroy(&ip->bi_linkrw);
6423
6424	_ii_info_freeshd(ip);
6425
6426#ifdef DEBUG
6427	ip->bi_head = (_ii_info_t *)0xdeadbeef;
6428#endif
6429
6430	nsc_kmem_free(ip, sizeof (*ip));
6431
6432}
6433
6434/*
6435 * _ii_copy_chunks
6436 *	Perform a copy of some chunks
6437 *
6438 * Calling/Exit State:
6439 *	Returns 0 if the data was copied successfully, otherwise
6440 *	error code.
6441 *
6442 * Description:
6443 *	flag is set to CV_SHD2MST if the data is to be copied from the shadow
6444 *	to the master, 0 if it is to be copied from the master to the shadow.
6445 */
6446
6447static int
6448_ii_copy_chunks(_ii_info_t *ip, int flag, chunkid_t chunk_num, int nchunks)
6449{
6450	int	mst_flag;
6451	int	shd_flag;
6452	int	ovr_flag;
6453	nsc_off_t	pos;
6454	nsc_size_t	len;
6455	int	rc;
6456	nsc_off_t	shd_pos;
6457	chunkid_t	shd_chunk;
6458	nsc_buf_t *mst_tmp = NULL;
6459	nsc_buf_t *shd_tmp = NULL;
6460
6461	if (ip->bi_flags & DSW_MSTOFFLINE) {
6462		DTRACE_PROBE(_ii_copy_chunks_end);
6463		return (EIO);
6464	}
6465
6466	if (ip->bi_flags & (DSW_SHDOFFLINE|DSW_SHDEXPORT|DSW_SHDIMPORT)) {
6467		DTRACE_PROBE(_ii_copy_chunks_end);
6468		return (EIO);
6469	}
6470
6471	if (flag == CV_SHD2MST) {
6472		mst_flag = NSC_WRBUF|NSC_WRTHRU;
6473		shd_flag = NSC_RDBUF;
6474	} else {
6475		shd_flag = NSC_WRBUF|NSC_WRTHRU;
6476		mst_flag = NSC_RDBUF;
6477	}
6478
6479	pos = DSW_CHK2FBA(chunk_num);
6480	len = DSW_SIZE * nchunks;
6481	if (pos + len > ip->bi_size)
6482		len = ip->bi_size - pos;
6483	if (ip->bi_flags & DSW_TREEMAP) {
6484		ASSERT(nchunks == 1);
6485		shd_chunk = ii_tsearch(ip, chunk_num);
6486		if (shd_chunk == II_NULLNODE) {
6487			/* shadow is full */
6488			mutex_enter(&ip->bi_mutex);
6489			II_FLAG_SET(DSW_OVERFLOW, ip);
6490			mutex_exit(&ip->bi_mutex);
6491			DTRACE_PROBE(_ii_copy_chunks_end);
6492			return (EIO);
6493		}
6494
6495		ovr_flag = II_ISOVERFLOW(shd_chunk);
6496		shd_pos = DSW_CHK2FBA((ovr_flag) ?
6497		    II_2OVERFLOW(shd_chunk) : shd_chunk);
6498	} else {
6499		ovr_flag = FALSE;
6500		shd_chunk = chunk_num;
6501		shd_pos = pos;
6502	}
6503
6504	/*
6505	 * Always allocate the master side before the shadow to
6506	 * avoid deadlocks on the same chunk.
6507	 */
6508
6509	DTRACE_PROBE2(_ii_copy_chunks_alloc, nsc_off_t, pos, nsc_size_t, len);
6510
6511	II_ALLOC_BUF(ip, master, rc, MSTFD(ip), pos, len, mst_flag, &mst_tmp);
6512	if (!II_SUCCESS(rc)) {
6513		if (mst_tmp)
6514			(void) nsc_free_buf(mst_tmp);
6515		_ii_error(ip, DSW_MSTOFFLINE);
6516		DTRACE_PROBE(_ii_copy_chunks_end);
6517		return (rc);
6518	}
6519
6520	if (ovr_flag) {
6521		/* use overflow volume */
6522		(void) nsc_reserve(OVRFD(ip), NSC_MULTI);
6523		II_ALLOC_BUF(ip, overflow, rc, OVRFD(ip), shd_pos, len,
6524		    shd_flag, &shd_tmp);
6525	} else {
6526		II_ALLOC_BUF(ip, shadow, rc, SHDFD(ip), shd_pos, len, shd_flag,
6527		    &shd_tmp);
6528	}
6529	if (!II_SUCCESS(rc)) {
6530		(void) nsc_free_buf(mst_tmp);
6531		if (shd_tmp)
6532			(void) nsc_free_buf(shd_tmp);
6533		if (ovr_flag)
6534			nsc_release(OVRFD(ip));
6535		_ii_error(ip, DSW_SHDOFFLINE);
6536		if (ovr_flag)
6537			_ii_error(ip, DSW_OVROFFLINE);
6538		DTRACE_PROBE(_ii_copy_chunks_end);
6539		return (rc);
6540	}
6541
6542	/*
6543	 * The direction of copy is determined by the mst_flag.
6544	 */
6545	DTRACE_PROBE2(_ii_copy_chunks_copy, kstat_named_t, ii_copy_direct,
6546	    int, mst_flag);
6547
6548	if (ii_copy_direct) {
6549		if (mst_flag & NSC_WRBUF) {
6550			if (ovr_flag) {
6551				II_NSC_COPY_DIRECT(ip, overflow, master, rc,
6552				    shd_tmp, mst_tmp, shd_pos, pos, len)
6553			} else {
6554				II_NSC_COPY_DIRECT(ip, shadow, master, rc,
6555				    shd_tmp, mst_tmp, shd_pos, pos, len)
6556			}
6557			if (!II_SUCCESS(rc)) {
6558				/* A copy has failed - something is wrong */
6559				_ii_error(ip, DSW_MSTOFFLINE);
6560				_ii_error(ip, DSW_SHDOFFLINE);
6561				if (ovr_flag)
6562					_ii_error(ip, DSW_OVROFFLINE);
6563			}
6564		} else {
6565			if (ovr_flag) {
6566				II_NSC_COPY_DIRECT(ip, master, overflow, rc,
6567				    mst_tmp, shd_tmp, pos, shd_pos, len);
6568			} else {
6569				II_NSC_COPY_DIRECT(ip, master, shadow, rc,
6570				    mst_tmp, shd_tmp, pos, shd_pos, len);
6571			}
6572			if (!II_SUCCESS(rc)) {
6573				/*
6574				 * A failure has occurred during the above copy.
6575				 * The macro calls nsc_copy_direct, which will
6576				 * never return a read failure, only a write
6577				 * failure. With this assumption, we should
6578				 * take only the target volume offline.
6579				 */
6580				_ii_error(ip, DSW_SHDOFFLINE);
6581				if (ovr_flag)
6582					_ii_error(ip, DSW_OVROFFLINE);
6583			}
6584		}
6585	} else {
6586		if (mst_flag & NSC_WRBUF) {
6587			rc = nsc_copy(shd_tmp, mst_tmp, shd_pos, pos, len);
6588			if (II_SUCCESS(rc)) {
6589				II_NSC_WRITE(ip, master, rc, mst_tmp, pos, len,
6590				    0);
6591				if (!II_SUCCESS(rc))
6592					_ii_error(ip, DSW_MSTOFFLINE);
6593			} else {
6594				/* A copy has failed - something is wrong */
6595				_ii_error(ip, DSW_MSTOFFLINE);
6596				_ii_error(ip, DSW_SHDOFFLINE);
6597			}
6598		} else {
6599			rc = nsc_copy(mst_tmp, shd_tmp, pos, shd_pos, len);
6600			if (II_SUCCESS(rc)) {
6601				if (ovr_flag) {
6602					II_NSC_WRITE(ip, overflow, rc, shd_tmp,
6603					    shd_pos, len, 0);
6604				} else {
6605					II_NSC_WRITE(ip, shadow, rc, shd_tmp,
6606					    shd_pos, len, 0);
6607				}
6608				if (!II_SUCCESS(rc)) {
6609					_ii_error(ip, DSW_SHDOFFLINE);
6610					if (ovr_flag)
6611						_ii_error(ip, DSW_OVROFFLINE);
6612				}
6613			} else {
6614				/* A copy has failed - something is wrong */
6615				_ii_error(ip, DSW_MSTOFFLINE);
6616				_ii_error(ip, DSW_SHDOFFLINE);
6617			}
6618		}
6619	}
6620
6621	(void) nsc_free_buf(mst_tmp);
6622	(void) nsc_free_buf(shd_tmp);
6623	if (ovr_flag)
6624		nsc_release(OVRFD(ip));
6625
6626	DTRACE_PROBE(_ii_copy_chunks);
6627
6628	if (II_SUCCESS(rc)) {
6629		(void) II_CLR_COPY_BITS(ip, chunk_num, nchunks);
6630		rc = 0;
6631	}
6632
6633	return (rc);
6634}
6635
6636
6637/*
6638 * _ii_copy_on_write
6639 *
6640 * Calling/Exit State:
6641 *	Returns 0 on success, otherwise error code.
6642 *
6643 * Description:
6644 *	Determines if a copy on write is necessary, and performs it.
6645 *	A copy on write is necessary in the following cases:
6646 *		- No copy is in progress and the shadow bit is clear, which
6647 *		  means this is the first write to this track.
6648 *		- A copy is in progress and the copy bit is set, which means
6649 *		  that a track copy is required.
6650 *	If a copy to the master is to be done, make a recursive call to this
6651 *	function to do any necessary copy on write on other InstantImage groups
6652 * 	that share the same master volume.
6653 */
6654
6655static int
6656_ii_copy_on_write(_ii_info_t *ip, int flag, chunkid_t chunk_num, int nchunks)
6657{
6658	int rc = 0;
6659	int rtype;
6660	int hanging =  (ip->bi_flags&DSW_HANGING);
6661
6662	if (hanging ||
6663	    (flag & (CV_SIBLING|CV_SHD2MST)) == CV_SHD2MST && NSHADOWS(ip)) {
6664		_ii_info_t *xip;
6665		/*
6666		 * Preserve copy of master for all other shadows of this master
6667		 * before writing our data onto the master.
6668		 */
6669
6670		/*
6671		 * Avoid deadlock with COW on same chunk of sibling shadow
6672		 * by unlocking this chunk before copying all other sibling
6673		 * chunks.
6674		 */
6675
6676		/*
6677		 * Only using a single chunk when copying to master avoids
6678		 * complex code here.
6679		 */
6680
6681		ASSERT(nchunks == 1);
6682		if (!hanging)
6683			_ii_unlock_chunk(ip, chunk_num);
6684		for (xip = ip->bi_head; xip; xip = xip->bi_sibling) {
6685			if (xip == ip)		/* don't copy ourselves again */
6686				continue;
6687
6688			DTRACE_PROBE(_ii_copy_on_write);
6689
6690			rw_enter(&xip->bi_linkrw, RW_READER);
6691			mutex_enter(&xip->bi_mutex);
6692			if (xip->bi_disabled) {
6693				mutex_exit(&xip->bi_mutex);
6694				rw_exit(&xip->bi_linkrw);
6695				continue;	/* this set is stopping */
6696			}
6697			xip->bi_shdref++;
6698			mutex_exit(&xip->bi_mutex);
6699			/* don't waste time asking for MST as ip shares it */
6700			rtype = SHDR|BMP;
6701			(void) _ii_rsrv_devs(xip, rtype, II_INTERNAL);
6702			_ii_lock_chunk(xip, chunk_num);
6703			rc = _ii_copy_on_write(xip, flag | CV_SIBLING,
6704			    chunk_num, 1);
6705
6706			/*
6707			 * See comments in _ii_shadow_write()
6708			 */
6709			if (rc == 0 ||
6710			    (rc == EIO && (xip->bi_flags&DSW_OVERFLOW) != 0))
6711				(void) II_SET_SHD_BIT(xip, chunk_num);
6712
6713			_ii_unlock_chunk(xip, chunk_num);
6714			_ii_rlse_devs(xip, rtype);
6715			mutex_enter(&xip->bi_mutex);
6716			xip->bi_shdref--;
6717			if (xip->bi_state & DSW_CLOSING) {
6718				if (total_ref(xip) == 0) {
6719					cv_signal(&xip->bi_closingcv);
6720				}
6721			}
6722			mutex_exit(&xip->bi_mutex);
6723			rw_exit(&xip->bi_linkrw);
6724		}
6725		if (hanging) {
6726			DTRACE_PROBE(_ii_copy_on_write_end);
6727			return (0);
6728		}
6729		/*
6730		 * Reacquire chunk lock and check that a COW by a sibling
6731		 * has not already copied this chunk.
6732		 */
6733		_ii_lock_chunk(ip, chunk_num);
6734		rc = II_TST_SHD_BIT(ip, chunk_num);
6735		if (rc < 0) {
6736			DTRACE_PROBE(_ii_copy_on_write_end);
6737			return (EIO);
6738		}
6739		if (rc != 0) {
6740			DTRACE_PROBE(_ii_copy_on_write_end);
6741			return (0);
6742		}
6743	}
6744
6745	if ((ip->bi_flags & DSW_COPYING) == 0) {
6746		/* Not copying at all */
6747
6748		if ((ip->bi_flags & DSW_GOLDEN) == DSW_GOLDEN) {
6749			/* No copy-on-write as it is independent */
6750			DTRACE_PROBE(_ii_copy_on_write_end);
6751			return (0);
6752		}
6753
6754		/* Dependent, so depends on shadow bit */
6755
6756		if ((flag == CV_SHD2MST) &&
6757		    ((ip->bi_flags & DSW_SHDOFFLINE) != 0)) {
6758			/*
6759			 * Writing master but shadow is offline, so
6760			 * no need to copy on write or set shadow bit
6761			 */
6762			DTRACE_PROBE(_ii_copy_on_write_end);
6763			return (0);
6764		}
6765		if (ip->bi_flags & DSW_BMPOFFLINE) {
6766			DTRACE_PROBE(_ii_copy_on_write_end);
6767			return (EIO);
6768		}
6769		rc = II_TST_SHD_BIT(ip, chunk_num);
6770		if (rc < 0) {
6771			DTRACE_PROBE(_ii_copy_on_write_end);
6772			return (EIO);
6773		}
6774		if (rc == 0) {
6775			/* Shadow bit clear, copy master to shadow */
6776			rc = _ii_copy_chunks(ip, 0, chunk_num, nchunks);
6777		}
6778	} else {
6779		/* Copying one way or the other */
6780		if (ip->bi_flags & DSW_BMPOFFLINE) {
6781			DTRACE_PROBE(_ii_copy_on_write_end);
6782			return (EIO);
6783		}
6784		rc = II_TST_COPY_BIT(ip, chunk_num);
6785		if (rc < 0) {
6786			DTRACE_PROBE(_ii_copy_on_write_end);
6787			return (EIO);
6788		}
6789		if (rc) {
6790			/* Copy bit set, do a copy */
6791			if ((ip->bi_flags & DSW_COPYINGS) == 0) {
6792				/* Copy master to shadow */
6793				rc = _ii_copy_chunks(ip, 0, chunk_num, nchunks);
6794			} else {
6795				/* Copy shadow to master */
6796				rc = _ii_copy_chunks(ip, CV_SHD2MST, chunk_num,
6797				    nchunks);
6798			}
6799		}
6800	}
6801	return (rc);
6802}
6803
6804#ifdef	DEBUG
6805int ii_maxchunks = 0;
6806#endif
6807
6808/*
6809 * _ii_copyvolp()
6810 *	Copy volume process.
6811 *
6812 * Calling/Exit State:
6813 *	Passes 0 back to caller when the copy is complete or has been aborted,
6814 * 	otherwise error code.
6815 *
6816 * Description:
6817 *	According to the flag, copy the master to the shadow volume or the
6818 *	shadow to the master volume. Upon return wakeup all processes waiting
6819 *	for this copy.
6820 *
6821 */
6822
6823static void
6824_ii_copyvolp(struct copy_args *ca)
6825{
6826	chunkid_t	chunk_num;
6827	int	rc = 0;
6828	chunkid_t	max_chunk;
6829	nsc_size_t	nc_max;
6830	int		nc_try, nc_got;
6831	nsc_size_t	mst_max, shd_max;
6832	_ii_info_t *ip;
6833	int	flag;
6834	nsc_size_t	bitmap_size;
6835	nsc_size_t	shadow_set, copy_set;
6836	int	chunkcount = 0;
6837	int	rsrv = 1;
6838	spcs_s_info_t kstatus;
6839
6840	ip = ca->ip;
6841	flag = ca->flag;
6842	kstatus = ca->kstatus;
6843
6844	if (ip->bi_disabled) {
6845		rc = DSW_EABORTED;
6846		goto skip;
6847	}
6848	max_chunk = ip->bi_size / DSW_SIZE;
6849	if ((ip->bi_size % DSW_SIZE) != 0)
6850		++max_chunk;
6851	if ((ip->bi_flags&DSW_TREEMAP))
6852		nc_max = 1;
6853	else {
6854		mst_max = shd_max = 0;
6855		(void) nsc_maxfbas(MSTFD(ip), 0, &mst_max);
6856		(void) nsc_maxfbas(SHDFD(ip), 0, &shd_max);
6857		nc_max = (mst_max < shd_max) ? mst_max : shd_max;
6858		nc_max /= DSW_SIZE;
6859		ASSERT(nc_max > 0 && nc_max < 1000);
6860	}
6861#ifdef	DEBUG
6862	if (ii_maxchunks > 0)
6863		nc_max = ii_maxchunks;
6864#endif
6865	for (chunk_num = nc_got = 0; /* CSTYLED */; /* CSTYLED */) {
6866		if ((flag & CV_SHD2MST) && NSHADOWS(ip))
6867			nc_try = 1;
6868		else
6869			nc_try = (int)nc_max;
6870		chunk_num = II_NEXT_COPY_BIT(ip, chunk_num + nc_got,
6871		    max_chunk, nc_try, &nc_got);
6872
6873		if (chunk_num >= max_chunk)	/* loop complete */
6874			break;
6875		if (ip->bi_flags & DSW_COPYINGX) {
6876			/* request to abort copy */
6877			_ii_unlock_chunks(ip, chunk_num, nc_got);
6878			rc = DSW_EABORTED;
6879			break;
6880		}
6881
6882		sema_p(&_ii_concopy_sema);
6883		rc = _ii_copy_on_write(ip, (flag & CV_SHD2MST), chunk_num,
6884		    nc_got);
6885		sema_v(&_ii_concopy_sema);
6886		if (ip->bi_flags & DSW_TREEMAP)
6887			ii_tdelete(ip, chunk_num);
6888		_ii_unlock_chunks(ip, chunk_num, nc_got);
6889		if (!II_SUCCESS(rc)) {
6890			if (ca->wait)
6891				spcs_s_add(kstatus, rc);
6892			rc = DSW_EIO;
6893			break;
6894		}
6895		if (ip->bi_release ||
6896		    (++chunkcount % ip->bi_throttle_unit) == 0) {
6897			_ii_rlse_devs(ip, (ca->rtype&(~BMP)));
6898			rsrv = 0;
6899			delay(ip->bi_throttle_delay);
6900			ca->rtype = MSTR|SHDR|(ca->rtype&BMP);
6901			if ((rc = _ii_rsrv_devs(ip, (ca->rtype&(~BMP)),
6902			    II_INTERNAL)) != 0) {
6903				if (ca->wait)
6904					spcs_s_add(kstatus, rc);
6905				rc = DSW_EIO;
6906				break;
6907			}
6908			rsrv = 1;
6909			if (nc_max > 1) {
6910				/*
6911				 * maxfbas could have changed during the
6912				 * release/reserve, so recalculate the size
6913				 * of transfer we can do.
6914				 */
6915				(void) nsc_maxfbas(MSTFD(ip), 0, &mst_max);
6916				(void) nsc_maxfbas(SHDFD(ip), 0, &shd_max);
6917				nc_max = (mst_max < shd_max) ?
6918				    mst_max : shd_max;
6919				nc_max /= DSW_SIZE;
6920			}
6921		}
6922	}
6923skip:
6924	mutex_enter(&ip->bi_mutex);
6925	if (ip->bi_flags & DSW_COPYINGX)
6926		II_FLAG_CLR(DSW_COPYINGP|DSW_COPYINGX, ip);
6927	else
6928		II_FLAG_CLR(DSW_COPY_FLAGS, ip);
6929
6930	if ((ip->bi_flags & DSW_TREEMAP) && (flag & CV_SHD2MST) &&
6931	    (ip->bi_flags & DSW_VOVERFLOW)) {
6932		int rs;
6933		bitmap_size = ip->bi_size / DSW_SIZE;
6934		if ((ip->bi_size % DSW_SIZE) != 0)
6935			++bitmap_size;
6936		bitmap_size += 7;
6937		bitmap_size /= 8;
6938
6939		/* Count the number of copy bits set */
6940		rs = II_CNT_BITS(ip, ip->bi_copyfba, &copy_set, bitmap_size);
6941		if ((rs == 0) && (copy_set == 0)) {
6942			/*
6943			 * If we counted successfully and completed the copy
6944			 * see if any writes have forced the set into the
6945			 * overflow
6946			 */
6947			rs = II_CNT_BITS(ip, ip->bi_shdfba, &shadow_set,
6948			    bitmap_size);
6949			if ((rs == 0) && (shadow_set <
6950			    (nsc_size_t)ip->bi_shdchks)) {
6951				II_FLAG_CLR(DSW_VOVERFLOW, ip);
6952				--iigkstat.spilled_over.value.ul;
6953			}
6954		}
6955	}
6956
6957	ca->rc = rc;
6958	cv_broadcast(&ip->bi_copydonecv);
6959	mutex_exit(&ip->bi_mutex);
6960	if (!ca->wait) {
6961		if (rsrv)
6962			_ii_rlse_devs(ip, ca->rtype);
6963		kmem_free(ca, sizeof (*ca));
6964	}
6965
6966}
6967
6968/*
6969 * _ii_copyvol()
6970 *	Copy a volume.
6971 *
6972 * Calling/Exit State:
6973 *	Returns 0 when the copy is complete or has been aborted,
6974 * 	otherwise error code.
6975 *
6976 * Description:
6977 *	According to the flag, copy the master to the shadow volume or the
6978 *	shadow to the master volume. Upon return wakeup all processes waiting
6979 *	for this copy. Uses a separate process (_ii_copyvolp) to allow the
6980 *	caller to be interrupted.
6981 */
6982
6983static int
6984_ii_copyvol(_ii_info_t *ip, int flag, int rtype, spcs_s_info_t kstatus,
6985				int wait)
6986{
6987	struct copy_args *ca;
6988	int rc;
6989
6990	/*
6991	 * start copy in separate process.
6992	 */
6993
6994	ca = (struct copy_args *)kmem_alloc(sizeof (*ca), KM_SLEEP);
6995	ca->ip = ip;
6996	ca->flag = flag;
6997	ca->rtype = rtype;
6998	ca->kstatus = kstatus;
6999	ca->wait = wait;
7000	ca->rc = 0;
7001
7002	if (rc = nsc_create_process((void (*)(void *))_ii_copyvolp,
7003	    (void *)ca, FALSE)) {
7004		mutex_enter(&ip->bi_mutex);
7005		_ii_ioctl_done(ip);
7006		mutex_exit(&ip->bi_mutex);
7007		cmn_err(CE_NOTE, "!Can't create II copy process");
7008		kmem_free(ca, sizeof (*ca));
7009		return (rc);
7010	}
7011	mutex_enter(&ip->bi_mutex);
7012	if (wait == 0) {
7013		_ii_ioctl_done(ip);
7014		mutex_exit(&ip->bi_mutex);
7015		return (0);
7016	}
7017	while (ip->bi_flags & DSW_COPYINGP) {
7018		(void) cv_wait_sig(&ip->bi_copydonecv, &ip->bi_mutex);
7019	}
7020	_ii_ioctl_done(ip);
7021	mutex_exit(&ip->bi_mutex);
7022	rc = ca->rc;
7023	kmem_free(ca, sizeof (*ca));
7024
7025	return (rc);
7026}
7027
7028/*
7029 * _ii_stopcopy
7030 *	Stops any copy process on ip.
7031 *
7032 * Calling/Exit State:
7033 *	Returns 0 if the copy was stopped, otherwise error code.
7034 *
7035 * Description:
7036 *	Stop an in-progress copy by setting the DSW_COPYINGX flag, then
7037 *	wait for the copy to complete.
7038 */
7039
7040static int
7041_ii_stopcopy(_ii_info_t *ip)
7042{
7043	mutex_enter(&ip->bi_mutex);
7044	DTRACE_PROBE1(_ii_stopcopy_flags,
7045	    uint_t, ip->bi_flags);
7046
7047	while (ip->bi_flags & DSW_COPYINGP) {
7048
7049		DTRACE_PROBE(_ii_stopcopy);
7050
7051		II_FLAG_SET(DSW_COPYINGX, ip);
7052
7053		if (cv_wait_sig(&ip->bi_copydonecv, &ip->bi_mutex) == 0) {
7054			/* Awoken by a signal */
7055			mutex_exit(&ip->bi_mutex);
7056			DTRACE_PROBE(_ii_stopcopy);
7057			return (EINTR);
7058		}
7059	}
7060
7061	mutex_exit(&ip->bi_mutex);
7062
7063	return (0);
7064}
7065
7066/*
7067 * _ii_error
7068 *	Given the error type that occurred, and the current state of the
7069 *	shadowing, set the appropriate error condition(s).
7070 *
7071 */
7072
7073void
7074_ii_error(_ii_info_t *ip, int error_type)
7075{
7076	int copy_flags;
7077	int golden;
7078	int flags;
7079	int recursive_call = (error_type & DSW_OVERFLOW) != 0;
7080	int offline_bits = DSW_OFFLINE;
7081	_ii_info_t *xip;
7082	int rc;
7083
7084	error_type &= ~DSW_OVERFLOW;
7085
7086	mutex_enter(&ip->bi_mutex);
7087	flags = (ip->bi_flags) & offline_bits;
7088	if ((flags ^ error_type) == 0) {
7089		/* nothing new offline */
7090		mutex_exit(&ip->bi_mutex);
7091		return;
7092	}
7093
7094	if (error_type == DSW_BMPOFFLINE &&
7095	    (ip->bi_flags & DSW_BMPOFFLINE) == 0) {
7096		/* first, let nskerd know */
7097		rc = _ii_report_bmp(ip);
7098		if (rc) {
7099			if (ii_debug > 0) {
7100				cmn_err(CE_WARN, "!Unable to mark bitmap bad in"
7101				    " config DB; rc = %d", rc);
7102			}
7103			ip->bi_flags |= DSW_CFGOFFLINE;
7104		}
7105	}
7106
7107	flags = ip->bi_flags;
7108	golden = ((flags & DSW_GOLDEN) == DSW_GOLDEN);
7109	copy_flags = flags & DSW_COPYING;
7110
7111	switch (error_type) {
7112
7113	case DSW_BMPOFFLINE:
7114		/* prevent further use of bitmap */
7115		flags |= DSW_BMPOFFLINE;
7116		if (ii_debug > 0)
7117			cmn_err(CE_NOTE, "!ii: Bitmap offline");
7118
7119		switch (copy_flags) {
7120
7121		case DSW_COPYINGM:
7122			/* Bitmap offline, copying master to shadow */
7123			flags |= DSW_SHDOFFLINE;
7124			if (ii_debug > 0)
7125				cmn_err(CE_NOTE, "!ii: Implied shadow offline");
7126			break;
7127
7128		case DSW_COPYINGS:
7129			/* Bitmap offline, copying shadow to master */
7130			if (golden) {
7131				/* Shadow is still usable */
7132				if (ii_debug > 0)
7133					cmn_err(CE_NOTE,
7134					    "!ii: Implied master offline");
7135				flags |= DSW_MSTOFFLINE;
7136			} else {
7137				/*
7138				 * Snapshot restore from shadow to master
7139				 * is a dumb thing to do anyway. Lose both.
7140				 */
7141				flags |= DSW_SHDOFFLINE | DSW_MSTOFFLINE;
7142				if (ii_debug > 0)
7143					cmn_err(CE_NOTE,
7144					    "ii: Implied master and "
7145					    "shadow offline");
7146			}
7147			break;
7148
7149		case 0:
7150			/* Bitmap offline, no copying in progress */
7151			if (!golden) {
7152				if (ii_debug > 0)
7153					cmn_err(CE_NOTE,
7154					    "!ii: Implied shadow offline");
7155				flags |= DSW_SHDOFFLINE;
7156			}
7157			break;
7158		}
7159		break;
7160
7161	case DSW_OVROFFLINE:
7162		flags |= DSW_OVROFFLINE;
7163		ASSERT(ip->bi_overflow);
7164		if (ii_debug > 0)
7165			cmn_err(CE_NOTE, "!ii: Overflow offline");
7166		/* FALLTHRU */
7167	case DSW_SHDOFFLINE:
7168		flags |= DSW_SHDOFFLINE;
7169		if (ii_debug > 0)
7170			cmn_err(CE_NOTE, "!ii: Shadow offline");
7171
7172		if (copy_flags == DSW_COPYINGS) {
7173			/* Shadow offline, copying shadow to master */
7174			if (ii_debug > 0)
7175				cmn_err(CE_NOTE, "!ii: Implied master offline");
7176			flags |= DSW_MSTOFFLINE;
7177		}
7178		break;
7179
7180	case DSW_MSTOFFLINE:
7181		flags |= DSW_MSTOFFLINE;
7182		if (ii_debug > 0)
7183			cmn_err(CE_NOTE, "!ii: Master offline");
7184
7185		switch (copy_flags) {
7186
7187		case DSW_COPYINGM:
7188			/* Master offline, copying master to shadow */
7189			flags |= DSW_SHDOFFLINE;
7190			if (ii_debug > 0)
7191				cmn_err(CE_NOTE, "!ii: Implied shadow offline");
7192			break;
7193
7194		case DSW_COPYINGS:
7195			/* Master offline, copying shadow to master */
7196			if (!golden) {
7197				flags |= DSW_SHDOFFLINE;
7198				if (ii_debug > 0)
7199					cmn_err(CE_NOTE,
7200					    "!ii: Implied shadow offline");
7201			}
7202			break;
7203
7204		case 0:
7205			/* Master offline, no copying in progress */
7206			if (!golden) {
7207				flags |= DSW_SHDOFFLINE;
7208				if (ii_debug > 0)
7209					cmn_err(CE_NOTE,
7210					    "!ii: Implied shadow offline");
7211			}
7212			break;
7213		}
7214		break;
7215
7216	default:
7217		break;
7218	}
7219
7220	II_FLAG_SET(flags, ip);
7221	mutex_exit(&ip->bi_mutex);
7222
7223	if (!recursive_call &&
7224	    NSHADOWS(ip) && (flags&DSW_MSTOFFLINE) == DSW_MSTOFFLINE) {
7225		/* take master offline for all other sibling shadows */
7226		for (xip = ip->bi_head; xip; xip = xip->bi_sibling) {
7227			if (xip == ip)
7228				continue;
7229			if (_ii_rsrv_devs(xip, BMP, II_INTERNAL) != 0)
7230				continue;
7231					/* overload DSW_OVERFLOW */
7232			_ii_error(xip, DSW_MSTOFFLINE|DSW_OVERFLOW);
7233			_ii_rlse_devs(xip, BMP);
7234		}
7235	}
7236
7237}
7238
7239
7240/*
7241 * _ii_lock_chunk
7242 *	Locks access to the specified chunk
7243 *
7244 */
7245
7246static void
7247_ii_lock_chunk(_ii_info_t *ip, chunkid_t chunk)
7248{
7249	if (chunk == II_NULLCHUNK) {
7250
7251		DTRACE_PROBE(_ii_lock_chunk_type);
7252
7253		rw_enter(&ip->bi_busyrw, RW_WRITER);
7254
7255	} else {
7256
7257		DTRACE_PROBE(_ii_lock_chunk_type);
7258
7259		if (ip->bi_busy == NULL) {
7260			DTRACE_PROBE(_ii_lock_chunk_end);
7261			return;
7262		}
7263
7264		rw_enter(&ip->bi_busyrw, RW_READER);
7265		mutex_enter(&ip->bi_mutex);
7266		while (DSW_BIT_ISSET(ip->bi_busy[chunk / DSW_BITS],
7267		    chunk % DSW_BITS))
7268			cv_wait(&ip->bi_busycv, &ip->bi_mutex);
7269		DSW_BIT_SET(ip->bi_busy[chunk / DSW_BITS], chunk % DSW_BITS);
7270		mutex_exit(&ip->bi_mutex);
7271	}
7272
7273}
7274
7275
7276/*
7277 * _ii_trylock_chunk
7278 *	Tries to lock access to the specified chunk
7279 * Returns non-zero on success.
7280 *
7281 */
7282
7283static int
7284_ii_trylock_chunk(_ii_info_t *ip, chunkid_t chunk)
7285{
7286	int rc;
7287
7288	ASSERT(chunk != II_NULLCHUNK);
7289	if (rw_tryenter(&ip->bi_busyrw, RW_READER) == 0) {
7290		DTRACE_PROBE(_ii_trylock_chunk);
7291		return (0);
7292	}
7293
7294	if (ip->bi_busy == NULL) {
7295		DTRACE_PROBE(_ii_trylock_chunk_end);
7296		return (0);
7297	}
7298
7299	mutex_enter(&ip->bi_mutex);
7300	if (DSW_BIT_ISSET(ip->bi_busy[chunk / DSW_BITS], chunk % DSW_BITS)) {
7301		rw_exit(&ip->bi_busyrw);	/* RW_READER */
7302		rc = 0;
7303	} else {
7304		DSW_BIT_SET(ip->bi_busy[chunk / DSW_BITS], chunk % DSW_BITS);
7305		rc = 1;
7306	}
7307	mutex_exit(&ip->bi_mutex);
7308
7309	return (rc);
7310}
7311
7312/*
7313 * _ii_unlock_chunks
7314 *	Unlocks access to the specified chunks
7315 *
7316 */
7317
7318static void
7319_ii_unlock_chunks(_ii_info_t *ip, chunkid_t  chunk, int n)
7320{
7321	if (chunk == II_NULLCHUNK) {
7322
7323		DTRACE_PROBE(_ii_unlock_chunks);
7324
7325		rw_exit(&ip->bi_busyrw);	/* RW_WRITER */
7326
7327	} else {
7328
7329		if (ip->bi_busy == NULL) {
7330			DTRACE_PROBE(_ii_unlock_chunks_end);
7331			return;
7332		}
7333		mutex_enter(&ip->bi_mutex);
7334
7335		DTRACE_PROBE(_ii_unlock_chunks);
7336
7337		for (; n-- > 0; chunk++) {
7338			ASSERT(DSW_BIT_ISSET(ip->bi_busy[chunk / DSW_BITS],
7339			    chunk % DSW_BITS));
7340			DSW_BIT_CLR(ip->bi_busy[chunk / DSW_BITS],
7341			    chunk % DSW_BITS);
7342			rw_exit(&ip->bi_busyrw);	/* RW_READER */
7343		}
7344		cv_broadcast(&ip->bi_busycv);
7345		mutex_exit(&ip->bi_mutex);
7346
7347	}
7348}
7349
7350/*
7351 * Copyout the bit map.
7352 */
7353static int
7354_ii_ab_co_bmp(_ii_info_t *ip, nsc_off_t bm_offset, unsigned char *user_bm,
7355    int user_bm_size)
7356{
7357	nsc_off_t	last_fba;
7358	nsc_buf_t *tmp;
7359	nsc_vec_t *nsc_vecp;
7360	nsc_off_t	fba_pos;
7361	int	buf_fba_len;
7362	int	buf_byte_len;
7363	size_t	co_len;
7364	int	rc;
7365
7366	DTRACE_PROBE2(_ii_ab_co_bmp_start, nsc_off_t, bm_offset,
7367	    nsc_size_t, user_bm_size);
7368
7369	if (ip->bi_flags & DSW_BMPOFFLINE)
7370		return (EIO);
7371
7372	/* First calculate the size of the shadow and copy bitmaps */
7373	co_len = DSW_BM_FBA_LEN(ip->bi_size);
7374	ASSERT((ip->bi_copyfba - ip->bi_shdfba) == co_len);
7375
7376	/* Are we in the ranges of the various bitmaps/indexes? */
7377	if (bm_offset < ip->bi_shdfba)
7378		return (EIO);
7379	else if (bm_offset < (last_fba = ip->bi_shdfba + co_len))
7380		/*EMPTY*/;
7381	else if (bm_offset < (last_fba = ip->bi_copyfba + co_len))
7382		/*EMPTY*/;
7383	else if ((ip->bi_flags & DSW_TREEMAP) &&
7384	    (bm_offset < (last_fba = last_fba + (co_len * 32))))
7385		/*EMPTY*/;
7386	else return (EIO);
7387
7388	/* Are we within the size of the segment being copied? */
7389	if (FBA_LEN(user_bm_size) > last_fba - bm_offset)
7390		return (EIO);
7391
7392	for (fba_pos = bm_offset; fba_pos < last_fba && user_bm_size > 0;
7393	    fba_pos += DSW_CBLK_FBA) {
7394		tmp = NULL;
7395		buf_fba_len = fba_pos + DSW_CBLK_FBA < last_fba ?
7396		    DSW_CBLK_FBA : last_fba - fba_pos;
7397		II_READ_START(ip, bitmap);
7398		rc = nsc_alloc_buf(ip->bi_bmpfd, fba_pos, buf_fba_len,
7399		    NSC_RDBUF, &tmp);
7400		II_READ_END(ip, bitmap, rc, buf_fba_len);
7401		if (!II_SUCCESS(rc)) {
7402			if (tmp)
7403				(void) nsc_free_buf(tmp);
7404
7405			_ii_error(ip, DSW_BMPOFFLINE);
7406			return (EIO);
7407		}
7408
7409		/* copyout each nsc_vec's worth of data */
7410		buf_byte_len = FBA_SIZE(buf_fba_len);
7411		for (nsc_vecp = tmp->sb_vec;
7412		    buf_byte_len > 0 && user_bm_size > 0;
7413		    nsc_vecp++) {
7414			co_len = (user_bm_size > nsc_vecp->sv_len) ?
7415			    nsc_vecp->sv_len : user_bm_size;
7416			if (copyout(nsc_vecp->sv_addr, user_bm, co_len)) {
7417				(void) nsc_free_buf(tmp);
7418				return (EFAULT);
7419			}
7420			user_bm += co_len;
7421			user_bm_size -= co_len;
7422			buf_byte_len -= co_len;
7423		}
7424
7425
7426		(void) nsc_free_buf(tmp);
7427	}
7428
7429	return (0);
7430}
7431
7432/*
7433 * Copyin a bit map and or with differences bitmap.
7434 */
7435static int
7436_ii_ab_ci_bmp(_ii_info_t *ip, nsc_off_t bm_offset, unsigned char *user_bm,
7437int user_bm_size)
7438{
7439	nsc_off_t	last_fba;
7440	nsc_buf_t *tmp;
7441	nsc_vec_t *nsc_vecp;
7442	nsc_off_t	fba_pos;
7443	int	buf_fba_len;
7444	int	buf_byte_len;
7445	size_t	ci_len;
7446	int	rc;
7447	int	n;
7448	unsigned char *tmp_buf, *tmpp, *tmpq;
7449
7450	DTRACE_PROBE2(_ii_ab_ci_bmp_start, nsc_off_t, bm_offset,
7451	    nsc_size_t, user_bm_size);
7452
7453	if (ip->bi_flags & DSW_BMPOFFLINE)
7454		return (EIO);
7455
7456	tmp_buf = NULL;
7457	last_fba = bm_offset + DSW_BM_FBA_LEN(ip->bi_size);
7458
7459	for (fba_pos = bm_offset; fba_pos < last_fba && user_bm_size > 0;
7460	    fba_pos += DSW_CBLK_FBA) {
7461		tmp = NULL;
7462		buf_fba_len = fba_pos + DSW_CBLK_FBA < last_fba ?
7463		    DSW_CBLK_FBA : last_fba - fba_pos;
7464		II_READ_START(ip, bitmap);
7465		rc = nsc_alloc_buf(ip->bi_bmpfd, fba_pos, buf_fba_len,
7466		    NSC_RDWRBUF, &tmp);
7467		II_READ_END(ip, bitmap, rc, buf_fba_len);
7468		if (!II_SUCCESS(rc)) {
7469			if (tmp)
7470				(void) nsc_free_buf(tmp);
7471
7472			_ii_error(ip, DSW_BMPOFFLINE);
7473			return (EIO);
7474		}
7475
7476		/* copyin each nsc_vec's worth of data */
7477		buf_byte_len = FBA_SIZE(buf_fba_len);
7478		for (nsc_vecp = tmp->sb_vec;
7479		    buf_byte_len > 0 && user_bm_size > 0;
7480		    nsc_vecp++) {
7481			ci_len = (user_bm_size > nsc_vecp->sv_len) ?
7482			    nsc_vecp->sv_len : user_bm_size;
7483			tmpp = tmp_buf = kmem_alloc(ci_len, KM_SLEEP);
7484			tmpq = nsc_vecp->sv_addr;
7485			if (copyin(user_bm, tmpp, ci_len)) {
7486				(void) nsc_free_buf(tmp);
7487				kmem_free(tmp_buf, ci_len);
7488				return (EFAULT);
7489			}
7490			for (n = ci_len; n-- > 0; /* CSTYLED */)
7491				*tmpq++ |= *tmpp++;
7492			user_bm += ci_len;
7493			user_bm_size -= ci_len;
7494			buf_byte_len -= ci_len;
7495			kmem_free(tmp_buf, ci_len);
7496		}
7497
7498		II_NSC_WRITE(ip, bitmap, rc, tmp, fba_pos, buf_fba_len, 0);
7499		if (!II_SUCCESS(rc)) {
7500			(void) nsc_free_buf(tmp);
7501			_ii_error(ip, DSW_BMPOFFLINE);
7502			return (EIO);
7503		}
7504
7505		(void) nsc_free_buf(tmp);
7506	}
7507
7508	ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
7509
7510	return (0);
7511}
7512
7513/*
7514 * Completely zero the bit map.
7515 *
7516 *	Returns 0 if no error
7517 *	Returns non-zero if there was an error
7518 */
7519static int
7520_ii_ab_zerobm(_ii_info_t *ip)
7521{
7522	nsc_off_t fba_pos;
7523	int rc;
7524	nsc_size_t len;
7525	nsc_size_t size;
7526	nsc_buf_t *tmp;
7527
7528	size = DSW_BM_FBA_LEN(ip->bi_size) + ip->bi_shdfba;
7529	for (fba_pos = ip->bi_shdfba; fba_pos < size; fba_pos += DSW_CBLK_FBA) {
7530		tmp = NULL;
7531		len = fba_pos + DSW_CBLK_FBA < size ?
7532		    DSW_CBLK_FBA : size - fba_pos;
7533		II_READ_START(ip, bitmap);
7534		rc = nsc_alloc_buf(ip->bi_bmpfd, fba_pos, len, NSC_RDWRBUF,
7535		    &tmp);
7536		II_READ_END(ip, bitmap, rc, len);
7537		if (!II_SUCCESS(rc)) {
7538			if (tmp)
7539				(void) nsc_free_buf(tmp);
7540
7541			_ii_error(ip, DSW_BMPOFFLINE);
7542			return (rc);
7543		}
7544
7545		rc = nsc_zero(tmp, fba_pos, len, 0);
7546		if (II_SUCCESS(rc)) {
7547			II_NSC_WRITE(ip, bitmap, rc, tmp, fba_pos, len, 0);
7548		}
7549
7550		(void) nsc_free_buf(tmp);
7551		if (!II_SUCCESS(rc)) {
7552			_ii_error(ip, DSW_BMPOFFLINE);
7553			return (rc);
7554		}
7555	}
7556
7557	ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
7558
7559	return (0);
7560}
7561
7562
7563/*
7564 * Copy shadow bitmap to copy bitmap
7565 */
7566static int
7567_ii_ab_copybm(_ii_info_t *ip)
7568{
7569	nsc_off_t copy_fba_pos, shd_fba_pos;
7570	int rc;
7571	nsc_size_t len;
7572	nsc_off_t size;
7573	nsc_buf_t *copy_tmp, *shd_tmp;
7574
7575	size = DSW_BM_FBA_LEN(ip->bi_size) + ip->bi_shdfba;
7576	copy_fba_pos = ip->bi_copyfba;
7577	for (shd_fba_pos = ip->bi_shdfba; shd_fba_pos < size;
7578	    copy_fba_pos += DSW_CBLK_FBA, shd_fba_pos += DSW_CBLK_FBA) {
7579		shd_tmp = NULL;
7580		len = shd_fba_pos + DSW_CBLK_FBA < size ?
7581		    DSW_CBLK_FBA : size - shd_fba_pos;
7582		II_READ_START(ip, bitmap);
7583		rc = nsc_alloc_buf(ip->bi_bmpfd, shd_fba_pos, len, NSC_RDBUF,
7584		    &shd_tmp);
7585		II_READ_END(ip, bitmap, rc, len);
7586		if (!II_SUCCESS(rc)) {
7587			if (shd_tmp)
7588				(void) nsc_free_buf(shd_tmp);
7589
7590			_ii_error(ip, DSW_BMPOFFLINE);
7591			if (ii_debug > 1)
7592				cmn_err(CE_NOTE, "!ii: copybm failed 1 rc %d",
7593				    rc);
7594
7595			return (rc);
7596		}
7597
7598		copy_tmp = NULL;
7599		rc = nsc_alloc_buf(ip->bi_bmpfd, copy_fba_pos, len, NSC_WRBUF,
7600		    &copy_tmp);
7601		if (!II_SUCCESS(rc)) {
7602			(void) nsc_free_buf(shd_tmp);
7603			if (copy_tmp)
7604				(void) nsc_free_buf(copy_tmp);
7605
7606			_ii_error(ip, DSW_BMPOFFLINE);
7607			if (ii_debug > 1)
7608				cmn_err(CE_NOTE, "!ii: copybm failed 2 rc %d",
7609				    rc);
7610
7611			return (rc);
7612		}
7613		rc = nsc_copy(shd_tmp, copy_tmp, shd_fba_pos, copy_fba_pos,
7614		    len);
7615		if (II_SUCCESS(rc)) {
7616			II_NSC_WRITE(ip, bitmap, rc, copy_tmp, copy_fba_pos,
7617			    len, 0);
7618		}
7619
7620		(void) nsc_free_buf(shd_tmp);
7621		(void) nsc_free_buf(copy_tmp);
7622		if (!II_SUCCESS(rc)) {
7623			if (ii_debug > 1)
7624				cmn_err(CE_NOTE, "!ii: copybm failed 4 rc %d",
7625				    rc);
7626			_ii_error(ip, DSW_BMPOFFLINE);
7627			return (rc);
7628		}
7629	}
7630
7631	ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
7632
7633	return (0);
7634}
7635
7636
7637/*
7638 * stolen from nsc_copy_h()
7639 */
7640
7641static int
7642_ii_nsc_or(nsc_buf_t *h1, nsc_buf_t *h2, nsc_off_t pos1, nsc_off_t pos2,
7643	nsc_size_t len)
7644{
7645	unsigned char *a1, *a2;
7646	unsigned char *b1, *b2;
7647	nsc_vec_t *v1, *v2;
7648	int i, sz, l1, l2;
7649
7650	if (pos1 < h1->sb_pos || pos1 + len > h1->sb_pos + h1->sb_len ||
7651	    pos2 < h2->sb_pos || pos2 + len > h2->sb_pos + h2->sb_len)
7652		return (EINVAL);
7653
7654	if (!len)
7655		return (0);
7656
7657	/* find starting point in "from" vector */
7658
7659	v1 = h1->sb_vec;
7660	pos1 -= h1->sb_pos;
7661
7662	for (; pos1 >= FBA_NUM(v1->sv_len); v1++)
7663		pos1 -= FBA_NUM(v1->sv_len);
7664
7665	a1 = v1->sv_addr + FBA_SIZE(pos1);
7666	l1 = v1->sv_len - FBA_SIZE(pos1);
7667
7668	/* find starting point in "to" vector */
7669
7670	v2 = h2->sb_vec;
7671	pos2 -= h2->sb_pos;
7672
7673	for (; pos2 >= FBA_NUM(v2->sv_len); v2++)
7674		pos2 -= FBA_NUM(v2->sv_len);
7675
7676	a2 = v2->sv_addr + FBA_SIZE(pos2);
7677	l2 = v2->sv_len - FBA_SIZE(pos2);
7678
7679	/* copy required data */
7680
7681	len = FBA_SIZE(len);
7682
7683	while (len) {
7684		sz = min(l1, l2);
7685		sz = (int)min((nsc_size_t)sz, len);
7686
7687		b1 = a1;
7688		b2 = a2;
7689		for (i = sz; i-- > 0; /* CSTYLED */)
7690			*b2++ |= *b1++;
7691
7692		l1 -= sz;
7693		l2 -= sz;
7694		a1 += sz;
7695		a2 += sz;
7696		len -= sz;
7697
7698		if (!l1) {
7699			a1 = (++v1)->sv_addr;
7700			l1 = v1->sv_len;
7701		}
7702		if (!l2) {
7703			a2 = (++v2)->sv_addr;
7704			l2 = v2->sv_len;
7705		}
7706	}
7707
7708	return (0);
7709}
7710
7711
7712/*
7713 * Or the shadow bitmap in to the copy bitmap, clear the
7714 * shadow bitmap.
7715 */
7716static int
7717_ii_ab_orbm(_ii_info_t *ip)
7718{
7719	nsc_off_t copy_fba_pos, shd_fba_pos;
7720	int rc;
7721	nsc_size_t len;
7722	size_t size;
7723	nsc_buf_t *copy_tmp, *shd_tmp;
7724
7725	if (ip->bi_flags & DSW_BMPOFFLINE)
7726		return (EIO);
7727
7728	size = DSW_BM_FBA_LEN(ip->bi_size) + ip->bi_shdfba;
7729	copy_fba_pos = ip->bi_copyfba;
7730	for (shd_fba_pos = ip->bi_shdfba; shd_fba_pos < size;
7731	    copy_fba_pos += DSW_CBLK_FBA, shd_fba_pos += DSW_CBLK_FBA) {
7732		shd_tmp = NULL;
7733		len = shd_fba_pos + DSW_CBLK_FBA < size ?
7734		    DSW_CBLK_FBA : size - shd_fba_pos;
7735		II_READ_START(ip, bitmap);
7736		rc = nsc_alloc_buf(ip->bi_bmpfd, shd_fba_pos, len,
7737		    NSC_RDBUF|NSC_WRBUF, &shd_tmp);
7738		II_READ_END(ip, bitmap, rc, len);
7739		if (!II_SUCCESS(rc)) {
7740			if (shd_tmp)
7741				(void) nsc_free_buf(shd_tmp);
7742
7743			_ii_error(ip, DSW_BMPOFFLINE);
7744			return (rc);
7745		}
7746
7747		copy_tmp = NULL;
7748		II_READ_START(ip, bitmap);
7749		rc = nsc_alloc_buf(ip->bi_bmpfd, copy_fba_pos, len,
7750		    NSC_RDBUF|NSC_WRBUF, &copy_tmp);
7751		II_READ_END(ip, bitmap, rc, len);
7752		if (!II_SUCCESS(rc)) {
7753			(void) nsc_free_buf(shd_tmp);
7754			if (copy_tmp)
7755				(void) nsc_free_buf(copy_tmp);
7756
7757			_ii_error(ip, DSW_BMPOFFLINE);
7758			return (rc);
7759		}
7760		rc = _ii_nsc_or(shd_tmp, copy_tmp, shd_fba_pos, copy_fba_pos,
7761		    len);
7762		if (II_SUCCESS(rc)) {
7763			II_NSC_WRITE(ip, bitmap, rc, copy_tmp, copy_fba_pos,
7764			    len, 0);
7765		}
7766		if (II_SUCCESS(rc))
7767			rc = nsc_zero(shd_tmp, shd_fba_pos, len, 0);
7768		if (II_SUCCESS(rc)) {
7769			II_NSC_WRITE(ip, bitmap, rc, shd_tmp, shd_fba_pos, len,
7770			    0);
7771		}
7772
7773		(void) nsc_free_buf(shd_tmp);
7774		(void) nsc_free_buf(copy_tmp);
7775		if (!II_SUCCESS(rc)) {
7776			_ii_error(ip, DSW_BMPOFFLINE);
7777			return (rc);
7778		}
7779	}
7780
7781	ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
7782
7783	return (0);
7784}
7785
7786/*
7787 * _ii_ab_tst_shd_bit
7788 *	Determine if a chunk has been copied to the shadow device
7789 *	Relies on the alloc_buf/free_buf semantics for locking.
7790 *
7791 * Calling/Exit State:
7792 *	Returns 1 if the modified bit has been set for the shadow device,
7793 *	Returns 0 if the modified bit has not been set for the shadow device,
7794 *	Returns -1 if there was an error
7795 */
7796
7797static int
7798_ii_ab_tst_shd_bit(_ii_info_t *ip, chunkid_t chunk)
7799{
7800	int rc;
7801	nsc_off_t fba;
7802	nsc_buf_t *tmp = NULL;
7803
7804	if (ip->bi_flags & DSW_BMPOFFLINE)
7805		return (EIO);
7806
7807	fba = ip->bi_shdfba + chunk / (FBA_SIZE(1) * DSW_BITS);
7808	chunk %= FBA_SIZE(1) * DSW_BITS;
7809	II_READ_START(ip, bitmap);
7810	rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_RDBUF, &tmp);
7811	II_READ_END(ip, bitmap, rc, 1);
7812	if (!II_SUCCESS(rc)) {
7813		_ii_error(ip, DSW_BMPOFFLINE);
7814		if (tmp)
7815			(void) nsc_free_buf(tmp);
7816		return (-1);
7817	}
7818	rc = DSW_BIT_ISSET(tmp->sb_vec->sv_addr[chunk/DSW_BITS],
7819	    chunk%DSW_BITS);
7820	(void) nsc_free_buf(tmp);
7821
7822	return (rc);
7823}
7824
7825
7826/*
7827 * _ii_ab_set_shd_bit
7828 *	Records that a chunk has been copied to the shadow device
7829 *
7830 *	Returns non-zero if an error is encountered
7831 *	Returns 0 if no error
7832 */
7833
7834static int
7835_ii_ab_set_shd_bit(_ii_info_t *ip, chunkid_t chunk)
7836{
7837	int rc;
7838	nsc_off_t fba;
7839	nsc_buf_t *tmp = NULL;
7840
7841	if (ip->bi_flags & DSW_BMPOFFLINE)
7842		return (EIO);
7843
7844	fba = ip->bi_shdfba + chunk / (FBA_SIZE(1) * DSW_BITS);
7845	chunk %= FBA_SIZE(1) * DSW_BITS;
7846	II_READ_START(ip, bitmap);
7847	rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_RDBUF|NSC_WRBUF, &tmp);
7848	II_READ_END(ip, bitmap, rc, 1);
7849	if (!II_SUCCESS(rc)) {
7850		_ii_error(ip, DSW_BMPOFFLINE);
7851		if (tmp)
7852			(void) nsc_free_buf(tmp);
7853		return (rc);
7854	}
7855	if (DSW_BIT_ISSET(tmp->sb_vec->sv_addr[chunk/DSW_BITS],
7856	    chunk%DSW_BITS) == 0) {
7857		DSW_BIT_SET(tmp->sb_vec->sv_addr[chunk/DSW_BITS],
7858		    chunk%DSW_BITS);
7859		II_NSC_WRITE(ip, bitmap, rc, tmp, fba, 1, 0);
7860		if ((ip->bi_state & DSW_CNTSHDBITS) == 0)
7861			ip->bi_shdbits++;
7862	}
7863	(void) nsc_free_buf(tmp);
7864	if (!II_SUCCESS(rc)) {
7865		_ii_error(ip, DSW_BMPOFFLINE);
7866		return (rc);
7867	}
7868
7869	return (0);
7870}
7871
7872
7873/*
7874 * _ii_ab_tst_copy_bit
7875 *	Determine if a chunk needs to be copied during updates.
7876 *
7877 * Calling/Exit State:
7878 *	Returns 1 if the copy bit for the chunk is set
7879 *	Returns 0 if the copy bit for the chunk is not set
7880 *	Returns -1 if an error is encountered
7881 */
7882
7883static int
7884_ii_ab_tst_copy_bit(_ii_info_t *ip, chunkid_t chunk)
7885{
7886	int rc;
7887	nsc_off_t fba;
7888	nsc_buf_t *tmp = NULL;
7889
7890	if (ip->bi_flags & DSW_BMPOFFLINE)
7891		return (-1);
7892
7893	fba = ip->bi_copyfba + chunk / (FBA_SIZE(1) * DSW_BITS);
7894	chunk %= FBA_SIZE(1) * DSW_BITS;
7895	II_READ_START(ip, bitmap);
7896	rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_RDBUF, &tmp);
7897	II_READ_END(ip, bitmap, rc, 1);
7898	if (!II_SUCCESS(rc)) {
7899		if (tmp)
7900			(void) nsc_free_buf(tmp);
7901		_ii_error(ip, DSW_BMPOFFLINE);
7902		return (-1);
7903	}
7904	rc = DSW_BIT_ISSET(tmp->sb_vec->sv_addr[chunk/DSW_BITS],
7905	    chunk%DSW_BITS);
7906	(void) nsc_free_buf(tmp);
7907
7908	return (rc);
7909}
7910
7911
7912/*
7913 * _ii_ab_set_copy_bit
7914 *	Records that a chunk has been copied to the shadow device
7915 *
7916 *	Returns non-zero if an error is encountered
7917 *	Returns 0 if no error
7918 */
7919
7920static int
7921_ii_ab_set_copy_bit(_ii_info_t *ip, chunkid_t chunk)
7922{
7923	int rc;
7924	nsc_off_t fba;
7925	nsc_buf_t *tmp = NULL;
7926
7927	if (ip->bi_flags & DSW_BMPOFFLINE)
7928		return (EIO);
7929
7930	fba = ip->bi_copyfba + chunk / (FBA_SIZE(1) * DSW_BITS);
7931	chunk %= FBA_SIZE(1) * DSW_BITS;
7932	II_READ_START(ip, bitmap);
7933	rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_RDBUF|NSC_WRBUF, &tmp);
7934	II_READ_END(ip, bitmap, rc, 1);
7935	if (!II_SUCCESS(rc)) {
7936		if (tmp)
7937			(void) nsc_free_buf(tmp);
7938		_ii_error(ip, DSW_BMPOFFLINE);
7939		return (rc);
7940	}
7941	if (DSW_BIT_ISSET(tmp->sb_vec->sv_addr[chunk/DSW_BITS],
7942	    chunk%DSW_BITS) == 0) {
7943		DSW_BIT_SET(tmp->sb_vec->sv_addr[chunk/DSW_BITS],
7944		    chunk%DSW_BITS);
7945		if ((ip->bi_state & DSW_CNTCPYBITS) == 0)
7946			ip->bi_copybits++;
7947
7948		II_NSC_WRITE(ip, bitmap, rc, tmp, fba, 1, 0);
7949	}
7950	(void) nsc_free_buf(tmp);
7951	if (!II_SUCCESS(rc)) {
7952		_ii_error(ip, DSW_BMPOFFLINE);
7953		return (rc);
7954	}
7955
7956	return (0);
7957}
7958
7959
7960/*
7961 * _ii_ab_clr_copy_bits
7962 *	Records that a chunk has been cleared on the shadow device, this
7963 *	function assumes that the bits to clear are all in the same fba,
7964 *	as is the case when they were generated by _ii_ab_next_copy_bit().
7965 *
7966 *	Returns non-zero if an error is encountered
7967 *	Returns 0 if no error
7968 */
7969
7970static int
7971_ii_ab_clr_copy_bits(_ii_info_t *ip, chunkid_t chunk, int nchunks)
7972{
7973	int rc;
7974	nsc_off_t fba;
7975	nsc_buf_t *tmp = NULL;
7976
7977	if (ip->bi_flags & DSW_BMPOFFLINE)
7978		return (EIO);
7979
7980	fba = ip->bi_copyfba + chunk / (FBA_SIZE(1) * DSW_BITS);
7981	chunk %= FBA_SIZE(1) * DSW_BITS;
7982	II_READ_START(ip, bitmap);
7983	rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_RDBUF|NSC_WRBUF, &tmp);
7984	II_READ_END(ip, bitmap, rc, 1);
7985	if (!II_SUCCESS(rc)) {
7986		if (tmp)
7987			(void) nsc_free_buf(tmp);
7988		_ii_error(ip, DSW_BMPOFFLINE);
7989		return (rc);
7990	}
7991	for (; nchunks-- > 0; chunk++) {
7992		DSW_BIT_CLR(tmp->sb_vec->sv_addr[chunk/DSW_BITS],
7993		    chunk%DSW_BITS);
7994		if (ip->bi_copybits > 0)
7995			ip->bi_copybits--;
7996	}
7997
7998	II_NSC_WRITE(ip, bitmap, rc, tmp, fba, 1, 0);
7999	(void) nsc_free_buf(tmp);
8000	if (!II_SUCCESS(rc)) {
8001		_ii_error(ip, DSW_BMPOFFLINE);
8002		return (rc);
8003	}
8004
8005	return (0);
8006}
8007
8008/*
8009 * _ii_ab_fill_copy_bmp
8010 *	Fills the copy bitmap with 1's.
8011 *
8012 *	Returns non-zero if an error is encountered
8013 *	Returns 0 if no error
8014 */
8015
8016static int
8017_ii_ab_fill_copy_bmp(_ii_info_t *ip)
8018{
8019	int rc;
8020	nsc_off_t fba;
8021	nsc_buf_t *tmp;
8022	unsigned char *p;
8023	int i, j;
8024
8025	if (ip->bi_flags & DSW_BMPOFFLINE)
8026		return (EIO);
8027
8028	fba = ip->bi_copyfba;
8029	for (i = DSW_BM_FBA_LEN(ip->bi_size); i-- > 0; fba++) {
8030		tmp = NULL;
8031		rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_WRBUF, &tmp);
8032		if (!II_SUCCESS(rc)) {
8033			if (tmp)
8034				(void) nsc_free_buf(tmp);
8035			_ii_error(ip, DSW_BMPOFFLINE);
8036			return (rc);
8037		}
8038		p = (unsigned char *)tmp->sb_vec->sv_addr;
8039		for (j = FBA_SIZE(1); j-- > 0; p++)
8040			*p = (unsigned char)0xff;
8041		II_NSC_WRITE(ip, bitmap, rc, tmp, fba, 1, 0);
8042		if (!II_SUCCESS(rc)) {
8043			_ii_error(ip, DSW_BMPOFFLINE);
8044			(void) nsc_free_buf(tmp);
8045			return (rc);
8046		}
8047		(void) nsc_free_buf(tmp);
8048	}
8049
8050	ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
8051
8052	return (0);
8053}
8054
8055/*
8056 * _ii_ab_load_bmp
8057 *	Load bitmap from persistent storage.
8058 */
8059
8060static int
8061_ii_ab_load_bmp(_ii_info_t *ip, int flag)
8062/* ARGSUSED */
8063{
8064	if (ip->bi_flags & DSW_BMPOFFLINE)
8065		return (EIO);
8066
8067	ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
8068
8069	return (0);
8070}
8071
8072/*
8073 * _ii_ab_next_copy_bit
8074 *	Find next set copy bit.
8075 *
8076 * Returns the next bits set in the copy bitmap, with the corresponding chunks
8077 * locked. Used to avoid having to reread the same bit map block as each bit
8078 * is tested.
8079 */
8080
8081static chunkid_t
8082_ii_ab_next_copy_bit(_ii_info_t *ip, chunkid_t startchunk, chunkid_t maxchunk,
8083	int wanted, int *got)
8084{
8085	chunkid_t rc;
8086	nsc_off_t fba;
8087	chunkid_t chunk;
8088	int bits_per_fba = FBA_SIZE(1) * DSW_BITS;
8089	int high;
8090	chunkid_t nextchunk;
8091	nsc_buf_t *tmp = NULL;
8092
8093	*got = 0;
8094again:
8095	if (ip->bi_flags & DSW_BMPOFFLINE)
8096		return (maxchunk + 1);
8097
8098	while (startchunk < maxchunk) {
8099		tmp = NULL;
8100		fba = ip->bi_copyfba + startchunk / bits_per_fba;
8101		chunk = startchunk % bits_per_fba;
8102		II_READ_START(ip, bitmap);
8103		rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_RDBUF, &tmp);
8104		II_READ_END(ip, bitmap, rc, 1);
8105		if (!II_SUCCESS(rc)) {
8106			if (tmp)
8107				(void) nsc_free_buf(tmp);
8108			_ii_error(ip, DSW_BMPOFFLINE);
8109			return (maxchunk + 1);
8110		}
8111		high = startchunk + bits_per_fba - startchunk%bits_per_fba;
8112		if (high > maxchunk)
8113			high = maxchunk;
8114		for (; startchunk < high; chunk++, startchunk++) {
8115			if (DSW_BIT_ISSET(tmp->sb_vec->sv_addr[chunk/DSW_BITS],
8116			    chunk%DSW_BITS)) {
8117				/*
8118				 * trylock won't sleep so can use while
8119				 * holding the buf.
8120				 */
8121				if (!_ii_trylock_chunk(ip, startchunk)) {
8122					(void) nsc_free_buf(tmp);
8123					_ii_lock_chunk(ip, startchunk);
8124					if (_ii_ab_tst_copy_bit(ip, startchunk)
8125					    != 1) {
8126						/*
8127						 * another process copied this
8128						 * chunk while we were acquiring
8129						 * the chunk lock.
8130						 */
8131						_ii_unlock_chunk(ip,
8132						    startchunk);
8133						DTRACE_PROBE(
8134						    _ii_ab_next_copy_bit_again);
8135						goto again;
8136					}
8137					*got = 1;
8138					DTRACE_PROBE(_ii_ab_next_copy_bit_end);
8139					return (startchunk);
8140				}
8141				*got = 1;
8142				nextchunk = startchunk + 1;
8143				chunk++;
8144				for (; --wanted > 0 && nextchunk < high;
8145				    nextchunk++, chunk++) {
8146					if (!DSW_BIT_ISSET(tmp->sb_vec->sv_addr
8147					    [chunk/DSW_BITS], chunk%DSW_BITS)) {
8148						break;	/* end of bit run */
8149					}
8150					if (_ii_trylock_chunk(ip, nextchunk))
8151						(*got)++;
8152					else
8153						break;
8154				}
8155				(void) nsc_free_buf(tmp);
8156				DTRACE_PROBE(_ii_ab_next_copy_bit);
8157				return (startchunk);
8158			}
8159		}
8160		(void) nsc_free_buf(tmp);
8161	}
8162
8163	return (maxchunk + 1);
8164}
8165
8166/*
8167 * _ii_ab_save_bmp
8168 *	Save bitmap to persistent storage.
8169 */
8170
8171static int
8172_ii_ab_save_bmp(_ii_info_t *ip, int flag)
8173/* ARGSUSED */
8174{
8175	if (ip->bi_flags & DSW_BMPOFFLINE)
8176		return (EIO);
8177
8178	return (0);
8179}
8180
8181/*
8182 * _ii_ab_change_bmp
8183 *	copy change bitmap to memory
8184 */
8185
8186static int
8187_ii_ab_change_bmp(_ii_info_t *ip, unsigned char *ptr)
8188/* ARGSUSED */
8189{
8190	int	bm_size;
8191	int	i, j, fba;
8192	int	rc;
8193	unsigned char *p;
8194	nsc_buf_t *tmp = NULL;
8195
8196	if (ip->bi_flags & DSW_BMPOFFLINE)
8197		return (EIO);
8198	bm_size = FBA_SIZE(DSW_BM_FBA_LEN(ip->bi_size));
8199
8200	rc = _ii_nsc_io(ip, KS_BMP, ip->bi_bmpfd, NSC_RDBUF, ip->bi_shdfba,
8201	    ptr, bm_size);
8202	if (!II_SUCCESS(rc)) {
8203		_ii_error(ip, DSW_BMPOFFLINE);
8204		return (rc);
8205	}
8206
8207	fba = ip->bi_copyfba;
8208	for (i = DSW_BM_FBA_LEN(ip->bi_size); i-- > 0; fba++) {
8209		tmp = NULL;
8210		II_READ_START(ip, bitmap);
8211		rc = nsc_alloc_buf(ip->bi_bmpfd, fba, 1, NSC_RDBUF, &tmp);
8212		II_READ_END(ip, bitmap, rc, 1);
8213		if (!II_SUCCESS(rc)) {
8214			if (tmp)
8215				(void) nsc_free_buf(tmp);
8216			_ii_error(ip, DSW_BMPOFFLINE);
8217			return (rc);
8218		}
8219		p = (unsigned char *)tmp->sb_vec->sv_addr;
8220		for (j = FBA_SIZE(1); j-- > 0; p++)
8221			*ptr |= *p;
8222		(void) nsc_free_buf(tmp);
8223	}
8224
8225	return (0);
8226}
8227
8228/*
8229 * Count bits set in the bit map.
8230 */
8231static int
8232_ii_ab_cnt_bits(_ii_info_t *ip, nsc_off_t bm_offset, nsc_size_t *counter,
8233int bm_size)
8234{
8235	nsc_size_t	last_fba;
8236	nsc_buf_t *tmp;
8237	nsc_vec_t *sd_vecp;
8238	nsc_off_t	fba_pos;
8239	int	buf_fba_len;
8240	int	buf_byte_len;
8241	int	co_len;
8242	int	i;
8243	unsigned int j, k;
8244	unsigned char *cp;
8245	int	rc;
8246
8247	*counter = 0;
8248	if (ip->bi_flags & DSW_BMPOFFLINE)
8249		return (EIO);
8250
8251	last_fba = bm_offset + DSW_BM_FBA_LEN(ip->bi_size);
8252
8253	for (fba_pos = bm_offset; fba_pos < last_fba && bm_size > 0;
8254	    fba_pos += DSW_CBLK_FBA) {
8255		tmp = NULL;
8256		buf_fba_len = fba_pos + DSW_CBLK_FBA < last_fba ?
8257		    DSW_CBLK_FBA : last_fba - fba_pos;
8258		II_READ_START(ip, bitmap);
8259		rc = nsc_alloc_buf(ip->bi_bmpfd, fba_pos, buf_fba_len,
8260		    NSC_RDBUF, &tmp);
8261		II_READ_END(ip, bitmap, rc, 1);
8262		if (!II_SUCCESS(rc)) {
8263			if (tmp)
8264				(void) nsc_free_buf(tmp);
8265
8266			_ii_error(ip, DSW_BMPOFFLINE);
8267			return (EIO);
8268		}
8269
8270		/* count each sd_vec's worth of data */
8271		buf_byte_len = FBA_SIZE(buf_fba_len);
8272		for (sd_vecp = tmp->sb_vec;
8273		    buf_byte_len > 0 && bm_size > 0;
8274		    sd_vecp++) {
8275			co_len = (bm_size > sd_vecp->sv_len) ?
8276			    sd_vecp->sv_len : bm_size;
8277			cp = sd_vecp->sv_addr;
8278			for (i = k = 0; i < co_len; i++)
8279				for (j = (unsigned)*cp++; j; j &= j - 1)
8280					k++;
8281			*counter += k;
8282			bm_size -= co_len;
8283			buf_byte_len -= co_len;
8284		}
8285
8286
8287		(void) nsc_free_buf(tmp);
8288	}
8289
8290	return (0);
8291}
8292
8293/*
8294 * OR the bitmaps as part of a join operation
8295 */
8296static int
8297_ii_ab_join_bmp(_ii_info_t *dest_ip, _ii_info_t *src_ip)
8298{
8299	int rc;
8300	nsc_size_t len;
8301	nsc_size_t size;
8302	nsc_buf_t *dest_tmp, *src_tmp;
8303	nsc_off_t src_fba_pos;
8304
8305	if ((src_ip->bi_flags & DSW_BMPOFFLINE) ||
8306	    (dest_ip->bi_flags & DSW_BMPOFFLINE))
8307		return (EIO);
8308
8309	size = DSW_BM_FBA_LEN(src_ip->bi_size) + src_ip->bi_shdfba;
8310	for (src_fba_pos = src_ip->bi_shdfba; src_fba_pos < size;
8311	    src_fba_pos += DSW_CBLK_FBA) {
8312		src_tmp = NULL;
8313		len = src_fba_pos + DSW_CBLK_FBA < size ?
8314		    DSW_CBLK_FBA : size - src_fba_pos;
8315		II_READ_START(src_ip, bitmap);
8316		rc = nsc_alloc_buf(src_ip->bi_bmpfd, src_fba_pos, len,
8317		    NSC_RDWRBUF, &src_tmp);
8318		II_READ_END(src_ip, bitmap, rc, len);
8319		if (!II_SUCCESS(rc)) {
8320			if (src_tmp)
8321				(void) nsc_free_buf(src_tmp);
8322
8323			_ii_error(src_ip, DSW_BMPOFFLINE);
8324			return (rc);
8325		}
8326
8327		dest_tmp = NULL;
8328		II_READ_START(dest_ip, bitmap);
8329		rc = nsc_alloc_buf(dest_ip->bi_bmpfd, src_fba_pos, len,
8330		    NSC_RDWRBUF, &dest_tmp);
8331		II_READ_END(dest_ip, bitmap, rc, len);
8332		if (!II_SUCCESS(rc)) {
8333			(void) nsc_free_buf(src_tmp);
8334			if (dest_tmp)
8335				(void) nsc_free_buf(dest_tmp);
8336
8337			_ii_error(dest_ip, DSW_BMPOFFLINE);
8338			return (rc);
8339		}
8340		rc = _ii_nsc_or(src_tmp, dest_tmp, src_fba_pos, src_fba_pos,
8341		    len);
8342		if (II_SUCCESS(rc)) {
8343			II_NSC_WRITE(dest_ip, bitmap, rc, dest_tmp,
8344			    src_fba_pos, len, 0);
8345		}
8346
8347		(void) nsc_free_buf(src_tmp);
8348		(void) nsc_free_buf(dest_tmp);
8349		if (!II_SUCCESS(rc)) {
8350			_ii_error(dest_ip, DSW_BMPOFFLINE);
8351			return (rc);
8352		}
8353	}
8354
8355	dest_ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
8356
8357	return (0);
8358
8359}
8360
8361static _ii_bmp_ops_t alloc_buf_bmp = {
8362	_ii_ab_co_bmp,
8363	_ii_ab_ci_bmp,
8364	_ii_ab_zerobm,
8365	_ii_ab_copybm,
8366	_ii_ab_orbm,
8367	_ii_ab_tst_shd_bit,
8368	_ii_ab_set_shd_bit,
8369	_ii_ab_tst_copy_bit,
8370	_ii_ab_set_copy_bit,
8371	_ii_ab_clr_copy_bits,
8372	_ii_ab_next_copy_bit,
8373	_ii_ab_fill_copy_bmp,
8374	_ii_ab_load_bmp,
8375	_ii_ab_save_bmp,
8376	_ii_ab_change_bmp,
8377	_ii_ab_cnt_bits,
8378	_ii_ab_join_bmp
8379};
8380
8381
8382/*
8383 * Copyout the bit map.
8384 */
8385static int
8386_ii_km_co_bmp(_ii_info_t *ip, nsc_off_t bm_offset, unsigned char *user_bm,
8387    int user_bm_size)
8388{
8389	int	start_offset;
8390	int	bm_size;
8391	size_t	co_len;
8392	nsc_off_t	last_fba;
8393
8394	/* First calculate the size of the shadow and copy bitmaps */
8395	co_len = DSW_BM_FBA_LEN(ip->bi_size);
8396	ASSERT((ip->bi_copyfba - ip->bi_shdfba) == co_len);
8397
8398	/* Are we in the ranges of the various bitmaps/indexes? */
8399	if (bm_offset < ip->bi_shdfba)
8400		return (EIO);
8401	else if (bm_offset < (last_fba = ip->bi_shdfba + co_len))
8402		/*EMPTY*/;
8403	else if (bm_offset < (last_fba = ip->bi_copyfba + co_len))
8404		/*EMPTY*/;
8405	else if ((ip->bi_flags & DSW_TREEMAP) &&
8406	    (bm_offset < (last_fba = last_fba + (co_len * 32))))
8407		/*EMPTY*/;
8408	else return (EIO);
8409
8410	if (FBA_LEN(user_bm_size) > last_fba - bm_offset)
8411		return (EIO);
8412
8413	start_offset = FBA_SIZE(bm_offset);
8414	bm_size = FBA_SIZE(last_fba);
8415
8416	co_len = (user_bm_size > bm_size) ? bm_size : user_bm_size;
8417	if (copyout(ip->bi_bitmap + start_offset, user_bm, co_len))
8418		return (EFAULT);
8419
8420	return (0);
8421}
8422
8423/*
8424 * Copyin a bit map and or with differences bitmap.
8425 */
8426static int
8427_ii_km_ci_bmp(_ii_info_t *ip, nsc_off_t bm_offset, unsigned char *user_bm,
8428    int user_bm_size)
8429{
8430	unsigned char *tmp_buf;
8431	unsigned char *dest;
8432	unsigned char *p;
8433	size_t	tmp_size;
8434	int	n;
8435	int	start_offset;
8436	int	bm_size;
8437	size_t	ci_len;
8438	int	rc = 0;
8439
8440	start_offset = FBA_SIZE(bm_offset);
8441	bm_size = FBA_SIZE(DSW_BM_FBA_LEN(ip->bi_size));
8442
8443	tmp_buf = NULL;
8444	tmp_size = FBA_SIZE(1);
8445
8446	tmp_buf = kmem_alloc(tmp_size, KM_SLEEP);
8447	start_offset = FBA_SIZE(bm_offset);
8448	dest = ip->bi_bitmap + start_offset;
8449	bm_size = FBA_SIZE(DSW_BM_FBA_LEN(ip->bi_size));
8450
8451	ci_len = (user_bm_size > bm_size) ? bm_size : user_bm_size;
8452	while (ci_len > 0) {
8453		n = (tmp_size > ci_len) ? ci_len : tmp_size;
8454		if (copyin(user_bm, tmp_buf, n)) {
8455			rc = EFAULT;
8456			break;
8457		}
8458		user_bm += n;
8459		for (p = tmp_buf; n--> 0; ci_len--)
8460			*dest++ |= *p++;
8461	}
8462	if (tmp_buf)
8463		kmem_free(tmp_buf, tmp_size);
8464
8465	ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
8466
8467	return (rc);
8468}
8469
8470/*
8471 * Completely zero the bit map.
8472 */
8473static int
8474_ii_km_zerobm(_ii_info_t *ip)
8475{
8476	int start_offset = FBA_SIZE(ip->bi_shdfba);
8477	int len;
8478
8479	len = FBA_SIZE(ip->bi_copyfba - ip->bi_shdfba);
8480	mutex_enter(&ip->bi_bmpmutex);
8481	bzero(ip->bi_bitmap+start_offset, len);
8482	mutex_exit(&ip->bi_bmpmutex);
8483
8484	ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
8485
8486	return (0);
8487}
8488
8489
8490/*
8491 * Copy shadow bitmap to copy bitmap
8492 */
8493static int
8494_ii_km_copybm(_ii_info_t *ip)
8495{
8496	int copy_offset, shd_offset;
8497	int len;
8498
8499	len = FBA_SIZE(ip->bi_copyfba - ip->bi_shdfba);
8500	shd_offset = FBA_SIZE(ip->bi_shdfba);
8501	copy_offset = FBA_SIZE(ip->bi_copyfba);
8502	mutex_enter(&ip->bi_bmpmutex);
8503	bcopy(ip->bi_bitmap+shd_offset, ip->bi_bitmap+copy_offset, len);
8504	mutex_exit(&ip->bi_bmpmutex);
8505
8506	ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
8507
8508	return (0);
8509}
8510
8511
8512/*
8513 * Or the shadow bitmap in to the copy bitmap, clear the
8514 * shadow bitmap.
8515 */
8516static int
8517_ii_km_orbm(_ii_info_t *ip)
8518{
8519	unsigned char *copy, *shd;
8520	int copy_offset, shd_offset;
8521	int len;
8522
8523	len = FBA_SIZE(ip->bi_copyfba - ip->bi_shdfba);
8524	shd_offset = FBA_SIZE(ip->bi_shdfba);
8525	copy_offset = FBA_SIZE(ip->bi_copyfba);
8526	shd = ip->bi_bitmap + shd_offset;
8527	copy = ip->bi_bitmap + copy_offset;
8528
8529	mutex_enter(&ip->bi_bmpmutex);
8530	while (len-- > 0)
8531		*copy++ |= *shd++;
8532	mutex_exit(&ip->bi_bmpmutex);
8533
8534	ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
8535
8536	return (0);
8537}
8538
8539/*
8540 * _ii_km_tst_shd_bit
8541 *	Determine if a chunk has been copied to the shadow device
8542 *
8543 * Calling/Exit State:
8544 *	Returns 1 if the modified bit has been set for the shadow device,
8545 *	otherwise returns 0.
8546 */
8547
8548static int
8549_ii_km_tst_shd_bit(_ii_info_t *ip, chunkid_t chunk)
8550{
8551	unsigned char *bmp;
8552	int bmp_offset;
8553	int rc;
8554
8555	bmp_offset = FBA_SIZE(ip->bi_shdfba);
8556	bmp = ip->bi_bitmap + bmp_offset;
8557
8558	mutex_enter(&ip->bi_bmpmutex);
8559	rc = DSW_BIT_ISSET(bmp[chunk/DSW_BITS], chunk%DSW_BITS);
8560	mutex_exit(&ip->bi_bmpmutex);
8561
8562	return (rc);
8563}
8564
8565
8566/*
8567 * _ii_km_set_shd_bit
8568 *	Records that a chunk has been copied to the shadow device
8569 */
8570
8571static int
8572_ii_km_set_shd_bit(_ii_info_t *ip, chunkid_t chunk)
8573{
8574	unsigned char *bmp;
8575	int bmp_offset;
8576
8577	bmp_offset = FBA_SIZE(ip->bi_shdfba);
8578	bmp = ip->bi_bitmap + bmp_offset;
8579
8580	mutex_enter(&ip->bi_bmpmutex);
8581	if (DSW_BIT_ISSET(bmp[chunk/DSW_BITS], chunk%DSW_BITS) == 0) {
8582		DSW_BIT_SET(bmp[chunk/DSW_BITS], chunk%DSW_BITS);
8583		if ((ip->bi_state & DSW_CNTSHDBITS) == 0)
8584			ip->bi_shdbits++;
8585	}
8586	mutex_exit(&ip->bi_bmpmutex);
8587
8588	return (0);
8589}
8590
8591/*
8592 * _ii_km_tst_copy_bit
8593 *	Determine if a chunk needs to be copied during updates.
8594 *
8595 * Calling/Exit State:
8596 *	Returns 1 if the copy bit for the chunk is set,
8597 *	otherwise returns 0
8598 */
8599
8600static int
8601_ii_km_tst_copy_bit(_ii_info_t *ip, chunkid_t chunk)
8602{
8603	unsigned char *bmp;
8604	int bmp_offset;
8605	int rc;
8606
8607	bmp_offset = FBA_SIZE(ip->bi_copyfba);
8608	bmp = ip->bi_bitmap + bmp_offset;
8609
8610	mutex_enter(&ip->bi_bmpmutex);
8611	rc = DSW_BIT_ISSET(bmp[chunk/DSW_BITS], chunk%DSW_BITS);
8612	mutex_exit(&ip->bi_bmpmutex);
8613
8614	return (rc);
8615}
8616
8617
8618/*
8619 * _ii_km_set_copy_bit
8620 *	Records that a chunk has been copied to the shadow device
8621 */
8622
8623static int
8624_ii_km_set_copy_bit(_ii_info_t *ip, chunkid_t chunk)
8625{
8626	unsigned char *bmp;
8627	int bmp_offset;
8628
8629	bmp_offset = FBA_SIZE(ip->bi_copyfba);
8630	bmp = ip->bi_bitmap + bmp_offset;
8631
8632	mutex_enter(&ip->bi_bmpmutex);
8633	if (DSW_BIT_ISSET(bmp[chunk/DSW_BITS], chunk%DSW_BITS) == 0) {
8634		DSW_BIT_SET(bmp[chunk/DSW_BITS], chunk%DSW_BITS);
8635		if ((ip->bi_state & DSW_CNTCPYBITS) == 0)
8636			ip->bi_copybits++;
8637	}
8638	mutex_exit(&ip->bi_bmpmutex);
8639
8640	return (0);
8641}
8642
8643
8644/*
8645 * _ii_km_clr_copy_bits
8646 *	Records that a chunk has been cleared on the shadow device
8647 */
8648
8649static int
8650_ii_km_clr_copy_bits(_ii_info_t *ip, chunkid_t chunk, int nchunks)
8651{
8652	unsigned char *bmp;
8653	int bmp_offset;
8654
8655	bmp_offset = FBA_SIZE(ip->bi_copyfba);
8656	bmp = ip->bi_bitmap + bmp_offset;
8657
8658	mutex_enter(&ip->bi_bmpmutex);
8659	for (; nchunks-- > 0; chunk++) {
8660		DSW_BIT_CLR(bmp[chunk/DSW_BITS], chunk%DSW_BITS);
8661		if (ip->bi_copybits > 0)
8662			ip->bi_copybits--;
8663	}
8664	mutex_exit(&ip->bi_bmpmutex);
8665
8666	return (0);
8667}
8668
8669/*
8670 * _ii_km_fill_copy_bmp
8671 *	Fills the copy bitmap with 1's.
8672 */
8673
8674static int
8675_ii_km_fill_copy_bmp(_ii_info_t *ip)
8676{
8677	int len;
8678	unsigned char *bmp;
8679	int bmp_offset;
8680
8681	bmp_offset = FBA_SIZE(ip->bi_copyfba);
8682	bmp = ip->bi_bitmap + bmp_offset;
8683
8684	len = FBA_SIZE(ip->bi_copyfba - ip->bi_shdfba);
8685
8686	mutex_enter(&ip->bi_bmpmutex);
8687	while (len-- > 0)
8688		*bmp++ = (unsigned char)0xff;
8689	mutex_exit(&ip->bi_bmpmutex);
8690
8691	ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
8692
8693	return (0);
8694}
8695
8696/*
8697 * _ii_km_load_bmp
8698 *	Load bitmap from persistent storage.
8699 */
8700
8701static int
8702_ii_km_load_bmp(_ii_info_t *ip, int flag)
8703{
8704	nsc_off_t bmp_offset;
8705	nsc_size_t bitmap_size;
8706	int rc;
8707
8708	if (ip->bi_flags & DSW_BMPOFFLINE)
8709		return (EIO);
8710
8711	if (ip->bi_bitmap == NULL) {
8712		bitmap_size = FBA_SIZE(2 * (ip->bi_copyfba - ip->bi_shdfba) +
8713		    ip->bi_shdfba);
8714		ip->bi_bitmap = nsc_kmem_zalloc(bitmap_size, KM_SLEEP,
8715		    _ii_local_mem);
8716	}
8717	if (flag)
8718		return (0);		/* just create an empty bitmap */
8719	bmp_offset = FBA_SIZE(ip->bi_shdfba);
8720	rc = _ii_nsc_io(ip, KS_BMP, ip->bi_bmpfd, NSC_RDBUF, ip->bi_shdfba,
8721	    ip->bi_bitmap + bmp_offset,
8722	    2 * FBA_SIZE(ip->bi_copyfba - ip->bi_shdfba));
8723	if (!II_SUCCESS(rc))
8724		_ii_error(ip, DSW_BMPOFFLINE);
8725
8726	ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
8727
8728	return (rc);
8729}
8730
8731/*
8732 * _ii_km_save_bmp
8733 *	Save bitmap to persistent storage.
8734 */
8735
8736static int
8737_ii_km_save_bmp(_ii_info_t *ip, int flag)
8738{
8739	int bmp_offset;
8740	int bitmap_size;
8741	int rc;
8742
8743	bmp_offset = FBA_SIZE(ip->bi_shdfba);
8744	if (ip->bi_flags & DSW_BMPOFFLINE)
8745		rc = EIO;
8746	else {
8747		rc = _ii_nsc_io(ip, KS_BMP, ip->bi_bmpfd, NSC_WRBUF,
8748		    ip->bi_shdfba, ip->bi_bitmap + bmp_offset,
8749		    2 * FBA_SIZE(ip->bi_copyfba - ip->bi_shdfba));
8750		if (!II_SUCCESS(rc))
8751			_ii_error(ip, DSW_BMPOFFLINE);
8752	}
8753
8754	if (flag && ip->bi_bitmap) {		/* dispose of bitmap memory */
8755		bitmap_size = FBA_SIZE(2 * (ip->bi_copyfba - ip->bi_shdfba) +
8756		    ip->bi_shdfba);
8757		nsc_kmem_free(ip->bi_bitmap, bitmap_size);
8758		ip->bi_bitmap = NULL;
8759	}
8760
8761	return (rc);
8762}
8763
8764/*
8765 * _ii_km_next_copy_bit
8766 *	Find next set copy bit.
8767 *
8768 * Returns the next bits set in the copy bitmap, with the corresponding chunks
8769 * locked. Used to cut down on the number of times the bmpmutex is acquired.
8770 */
8771
8772static chunkid_t
8773_ii_km_next_copy_bit(_ii_info_t *ip, chunkid_t chunk, chunkid_t maxchunk,
8774	int want, int *got)
8775{
8776	unsigned char *bmp;
8777	int bmp_offset;
8778	int nextchunk;
8779
8780	*got = 0;
8781	bmp_offset = FBA_SIZE(ip->bi_copyfba);
8782	bmp = ip->bi_bitmap + bmp_offset;
8783
8784	mutex_enter(&ip->bi_bmpmutex);
8785	for (; chunk < maxchunk; chunk++) {
8786		if (DSW_BIT_ISSET(bmp[chunk/DSW_BITS], chunk%DSW_BITS)) {
8787			/*
8788			 * trylock won't sleep so can use while
8789			 * holding bi_bmpmutex.
8790			 */
8791			if (!_ii_trylock_chunk(ip, chunk)) {
8792				mutex_exit(&ip->bi_bmpmutex);
8793				_ii_lock_chunk(ip, chunk);
8794				*got = 1;
8795
8796				DTRACE_PROBE(_ii_km_next_copy_bit);
8797
8798				return (chunk);
8799			}
8800			*got = 1;
8801			for (nextchunk = chunk + 1;
8802			    *got < want && nextchunk < maxchunk; nextchunk++) {
8803				if (!DSW_BIT_ISSET(bmp[nextchunk/DSW_BITS],
8804				    nextchunk%DSW_BITS))
8805					break;
8806				if (_ii_trylock_chunk(ip, nextchunk))
8807					(*got)++;
8808				else
8809					break;
8810			}
8811			mutex_exit(&ip->bi_bmpmutex);
8812
8813			DTRACE_PROBE(_ii_km_next_copy_bit);
8814			return (chunk);
8815		}
8816	}
8817	mutex_exit(&ip->bi_bmpmutex);
8818
8819	return (maxchunk + 1);
8820}
8821
8822/*
8823 * _ii_km_change_bmp
8824 *	copy change bitmap to memory
8825 */
8826
8827static int
8828_ii_km_change_bmp(_ii_info_t *ip, unsigned char *ptr)
8829/* ARGSUSED */
8830{
8831	int	start_offset;
8832	int	bm_size;
8833	unsigned char *q;
8834
8835	bm_size = FBA_SIZE(DSW_BM_FBA_LEN(ip->bi_size));
8836
8837	start_offset = FBA_SIZE(ip->bi_shdfba);
8838	bcopy(ip->bi_bitmap + start_offset, ptr, bm_size);
8839
8840	start_offset = FBA_SIZE(ip->bi_copyfba);
8841	q = ip->bi_bitmap + start_offset;
8842	while (bm_size-- > 0)
8843		*ptr |= *q;
8844
8845	return (0);
8846}
8847
8848/*
8849 * Count bits set in the bit map.
8850 */
8851static int
8852_ii_km_cnt_bits(_ii_info_t *ip, nsc_off_t bm_offset, nsc_size_t *counter,
8853    int bm_size)
8854{
8855	int	start_offset;
8856	int	i;
8857	nsc_size_t j, k;
8858	unsigned char *cp;
8859
8860	start_offset = FBA_SIZE(bm_offset);
8861
8862	cp = ip->bi_bitmap + start_offset;
8863	for (i = k = 0; i < bm_size; i++)
8864		for (j = (unsigned)*cp++; j; j &= j - 1)
8865			k++;
8866	*counter = k;
8867
8868	return (0);
8869}
8870
8871/*
8872 * Or the shadow bitmap in to the copy bitmap, clear the
8873 * shadow bitmap.
8874 */
8875static int
8876_ii_km_join_bmp(_ii_info_t *dest_ip, _ii_info_t *src_ip)
8877{
8878	uchar_t *dest, *src;
8879	nsc_size_t bm_size;
8880
8881	dest = dest_ip->bi_bitmap + FBA_SIZE(dest_ip->bi_shdfba);
8882	src = src_ip->bi_bitmap + FBA_SIZE(src_ip->bi_shdfba);
8883	bm_size = FBA_SIZE(DSW_BM_FBA_LEN(dest_ip->bi_size));
8884
8885	while (bm_size-- > 0)
8886		*dest++ |= *src++;
8887
8888	dest_ip->bi_state |= (DSW_CNTSHDBITS|DSW_CNTCPYBITS);
8889
8890	return (0);
8891}
8892
8893static _ii_bmp_ops_t kmem_buf_bmp = {
8894	_ii_km_co_bmp,
8895	_ii_km_ci_bmp,
8896	_ii_km_zerobm,
8897	_ii_km_copybm,
8898	_ii_km_orbm,
8899	_ii_km_tst_shd_bit,
8900	_ii_km_set_shd_bit,
8901	_ii_km_tst_copy_bit,
8902	_ii_km_set_copy_bit,
8903	_ii_km_clr_copy_bits,
8904	_ii_km_next_copy_bit,
8905	_ii_km_fill_copy_bmp,
8906	_ii_km_load_bmp,
8907	_ii_km_save_bmp,
8908	_ii_km_change_bmp,
8909	_ii_km_cnt_bits,
8910	_ii_km_join_bmp
8911};
8912
8913
8914static int
8915ii_read_volume(_ii_info_t *ip, int mst_src, nsc_buf_t *srcbuf,
8916	nsc_buf_t *dstbuf, chunkid_t chunk_num, nsc_off_t fba, nsc_size_t len)
8917{
8918	int rc;
8919	nsc_buf_t *tmp;
8920	nsc_off_t mapped_fba;
8921	chunkid_t mapped_chunk;
8922	int overflow;
8923
8924	if (mst_src || (ip->bi_flags&DSW_TREEMAP) == 0) {
8925		/* simple read with optional copy */
8926		if (mst_src) {
8927			II_NSC_READ(ip, master, rc, srcbuf, fba, len, 0);
8928		} else {
8929			II_NSC_READ(ip, shadow, rc, srcbuf, fba, len, 0);
8930		}
8931		if (dstbuf && II_SUCCESS(rc)) {
8932			rc = nsc_copy(srcbuf, dstbuf, fba, fba, len);
8933		}
8934
8935		return (rc);
8936	}
8937	/* read from mapped shadow into final buffer */
8938	mapped_chunk = ii_tsearch(ip, chunk_num);
8939	if (mapped_chunk == II_NULLNODE)
8940		return (EIO);
8941	overflow = II_ISOVERFLOW(mapped_chunk);
8942	if (overflow)
8943		mapped_chunk = II_2OVERFLOW(mapped_chunk);
8944	/* convert chunk number from tsearch into final fba */
8945	mapped_fba = DSW_CHK2FBA(mapped_chunk) + (fba % DSW_SIZE);
8946	tmp = NULL;
8947	if (overflow) {
8948		(void) nsc_reserve(OVRFD(ip), NSC_MULTI);
8949		II_READ_START(ip, overflow);
8950		rc = nsc_alloc_buf(OVRFD(ip), mapped_fba, len, NSC_RDBUF, &tmp);
8951		II_READ_END(ip, overflow, rc, len);
8952	} else {
8953		II_READ_START(ip, shadow);
8954		rc = nsc_alloc_buf(SHDFD(ip), mapped_fba, len, NSC_RDBUF, &tmp);
8955		II_READ_END(ip, shadow, rc, len);
8956	}
8957	if (II_SUCCESS(rc)) {
8958		if (dstbuf == NULL)
8959			dstbuf = srcbuf;
8960		rc = nsc_copy(tmp, dstbuf, mapped_fba, fba, len);
8961		(void) nsc_free_buf(tmp);
8962	}
8963	if (overflow)
8964		nsc_release(OVRFD(ip));
8965
8966	return (rc);
8967}
8968
8969/*
8970 * _ii_fill_buf
8971 *	Read data from the required device
8972 *
8973 * Calling/Exit State:
8974 *	Returns 0 if the data was read successfully, otherwise
8975 *	error code.
8976 *
8977 * Description:
8978 *	Reads the data from fba_pos for length fba_len from the
8979 *	required device. This data may be a mix of data from the master
8980 *	device and the shadow device, depending on the state of the
8981 *	bitmaps.
8982 */
8983
8984static int
8985_ii_fill_buf(ii_fd_t *bfd, nsc_off_t fba_pos, nsc_size_t fba_len, int flag,
8986    nsc_buf_t **handle, nsc_buf_t **handle2)
8987{
8988	_ii_info_t *ip = bfd->ii_info;
8989	_ii_info_t *xip;
8990	int second_shd = 0;
8991	nsc_off_t temp_fba;
8992	nsc_size_t temp_len;
8993	nsc_size_t bmp_len;
8994	chunkid_t chunk_num;
8995	int rc;
8996	int fill_from_pair;
8997	int rtype = SHDR|BMP;
8998	nsc_buf_t *second_buf = NULL;
8999
9000	if (flag&NSC_RDAHEAD)
9001		return (NSC_DONE);
9002
9003	chunk_num = fba_pos / DSW_SIZE;
9004	temp_fba = fba_pos;
9005	temp_len = fba_len;
9006
9007	/*
9008	 * If the master is being updated from a shadow we need to fill from
9009	 * the correct shadow volume.
9010	 */
9011	if (NSHADOWS(ip) && bfd->ii_shd == 0) {
9012		for (xip = ip->bi_head; xip; xip = xip->bi_sibling) {
9013			if (xip == ip)
9014				continue;
9015			if (xip->bi_flags &DSW_COPYINGS) {
9016				second_shd = 1;
9017				ip = xip;
9018				if ((rc = _ii_rsrv_devs(ip, rtype,
9019				    II_INTERNAL)) != 0)
9020					return (EIO);
9021				rc = nsc_alloc_buf(SHDFD(ip), fba_pos, fba_len,
9022				    (flag&NSC_RDAHEAD)|NSC_MIXED, &second_buf);
9023				if (!II_SUCCESS(rc)) {
9024					rc = EIO;
9025					goto out;
9026				}
9027				handle2 = &second_buf;
9028				break;
9029			}
9030		}
9031	}
9032
9033	while (temp_len > 0) {
9034		if ((temp_fba + temp_len) > DSW_CHK2FBA(chunk_num + 1)) {
9035			bmp_len = DSW_CHK2FBA(chunk_num + 1) - temp_fba;
9036			temp_len -= bmp_len;
9037		} else {
9038			bmp_len = temp_len;
9039			temp_len = 0;
9040		}
9041
9042		fill_from_pair = 0;
9043
9044		if ((ip->bi_flags & DSW_COPYINGM) == DSW_COPYINGM) {
9045			rc = II_TST_COPY_BIT(ip, chunk_num);
9046			/* Treat a failed bitmap volume as a clear bit */
9047			if (rc > 0) {
9048				/* Copy bit set */
9049				if (bfd->ii_shd) {
9050					if (*handle2)
9051						fill_from_pair = 1;
9052					else {
9053						rc = EIO;
9054						goto out;
9055					}
9056				}
9057			}
9058		}
9059		if ((ip->bi_flags & DSW_COPYINGS) == DSW_COPYINGS) {
9060			rc = II_TST_COPY_BIT(ip, chunk_num);
9061			/* Treat a failed bitmap volume as a clear bit */
9062			if (rc > 0) {
9063				/* Copy bit set */
9064				if (bfd->ii_shd == 0) {
9065					if (*handle2 ||
9066					    (ip->bi_flags&DSW_TREEMAP))
9067						fill_from_pair = 1;
9068					else {
9069						rc = EIO;
9070						goto out;
9071					}
9072				}
9073			}
9074		}
9075		if (((ip->bi_flags & DSW_GOLDEN) == 0) && bfd->ii_shd) {
9076			/* Dependent shadow read */
9077
9078			rc = II_TST_SHD_BIT(ip, chunk_num);
9079			if (rc < 0) {
9080				rc = EIO;
9081				goto out;
9082			}
9083			if (rc == 0) {
9084				/* Shadow bit clear */
9085				if (*handle2)
9086					fill_from_pair = 1;
9087				else {
9088					rc = EIO;
9089					goto out;
9090				}
9091			}
9092		}
9093
9094		if (fill_from_pair) {
9095			/* it matters now */
9096			if (ip->bi_flags & (DSW_MSTOFFLINE | DSW_SHDOFFLINE)) {
9097				rc = EIO;
9098				goto out;
9099			}
9100			if (*handle2 == NULL &&
9101			    (ip->bi_flags&DSW_TREEMAP) == 0) {
9102				rc = EIO;
9103				goto out;
9104			}
9105			rc = ii_read_volume(ip, bfd->ii_shd,
9106			    *handle2, *handle, chunk_num, temp_fba, bmp_len);
9107			if (!II_SUCCESS(rc)) {
9108				_ii_error(ip, DSW_MSTOFFLINE);
9109				_ii_error(ip, DSW_SHDOFFLINE);
9110				goto out;
9111			}
9112		} else {
9113			if (bfd->ii_shd && (ip->bi_flags & DSW_SHDOFFLINE)) {
9114				rc = EIO;
9115				goto out;
9116			}
9117			if ((bfd->ii_shd == 0) &&
9118			    (ip->bi_flags & DSW_MSTOFFLINE)) {
9119				rc = EIO;
9120				goto out;
9121			}
9122			rc = ii_read_volume(ip, !(bfd->ii_shd), *handle, NULL,
9123			    chunk_num, temp_fba, bmp_len);
9124			if (!II_SUCCESS(rc)) {
9125				if (bfd->ii_shd)
9126					_ii_error(ip, DSW_SHDOFFLINE);
9127				else
9128					_ii_error(ip, DSW_MSTOFFLINE);
9129				goto out;
9130			}
9131		}
9132
9133		temp_fba += bmp_len;
9134		chunk_num++;
9135	}
9136
9137	rc = 0;
9138out:
9139	if (second_buf)
9140		(void) nsc_free_buf(second_buf);
9141	if (second_shd)
9142		_ii_rlse_devs(ip, rtype);
9143
9144	return (rc);
9145}
9146
9147
9148/*
9149 * _ii_shadow_write
9150 *	Perform any copy on write required by a write buffer request
9151 *
9152 * Calling/Exit State:
9153 *	Returns 0 on success, otherwise error code.
9154 *
9155 */
9156
9157static int
9158_ii_shadow_write(ii_fd_t *bfd, nsc_off_t pos, nsc_size_t len)
9159{
9160	_ii_info_t *ip = bfd->ii_info;
9161	chunkid_t	chunk_num;
9162	int	rc;
9163	int	flag;
9164	int hanging;
9165
9166	DTRACE_PROBE2(_ii_shadow_write_start, nsc_off_t, pos, nsc_size_t, len);
9167
9168	/* fail immediately if config DB is unavailable */
9169	if ((ip->bi_flags & DSW_CFGOFFLINE) == DSW_CFGOFFLINE) {
9170		return (EIO);
9171	}
9172
9173	chunk_num = pos / DSW_SIZE;
9174
9175	if (bfd->ii_shd)
9176		flag = 0;		/* To shadow */
9177	else
9178		flag = CV_SHD2MST;	/* To master */
9179
9180	mutex_enter(&ip->bi_mutex);
9181	ip->bi_shdref++;
9182	mutex_exit(&ip->bi_mutex);
9183	hanging = (ip->bi_flags&DSW_HANGING) != 0;
9184
9185	for (; (chunk_num >= 0) &&
9186	    DSW_CHK2FBA(chunk_num) < (pos + len); chunk_num++) {
9187
9188		if (!hanging)
9189			_ii_lock_chunk(ip, chunk_num);
9190		rc = _ii_copy_on_write(ip, flag, chunk_num, 1);
9191
9192		/*
9193		 * Set the shadow bit when a small shadow has overflowed so
9194		 * that ii_read_volume can return an error if an attempt is
9195		 * made to read that chunk.
9196		 */
9197		if (!hanging) {
9198			if (rc == 0 ||
9199			    (rc == EIO && (ip->bi_flags&DSW_OVERFLOW) != 0))
9200				(void) II_SET_SHD_BIT(ip, chunk_num);
9201			_ii_unlock_chunk(ip, chunk_num);
9202		}
9203	}
9204
9205	mutex_enter(&ip->bi_mutex);
9206	ip->bi_shdref--;
9207	if (ip->bi_state & DSW_CLOSING) {
9208		if (total_ref(ip) == 0) {
9209			cv_signal(&ip->bi_closingcv);
9210		}
9211	}
9212	mutex_exit(&ip->bi_mutex);
9213
9214	/* did the bitmap fail during this process? */
9215	return (ip->bi_flags & DSW_CFGOFFLINE? EIO : 0);
9216}
9217
9218/*
9219 * _ii_alloc_buf
9220 *	Allocate a buffer of data
9221 *
9222 * Calling/Exit State:
9223 *	Returns 0 for success, < 0 for async I/O, > 0 is an error code.
9224 *
9225 * Description:
9226 *	For a write buffer, calls dsw_shadow_write to perform any necessary
9227 *	copy on write operations, then allocates the real buffers from the
9228 *	underlying devices.
9229 *	For a read buffer, allocates the real buffers from the underlying
9230 *	devices, then calls _ii_fill_buf to fill the required buffer.
9231 *	For a buffer that is neither read nor write, just allocate the
9232 *	buffers so that a _ii_fill_buf can be done later by _ii_read.
9233 */
9234
9235static int
9236_ii_alloc_buf(ii_fd_t *bfd, nsc_off_t pos, nsc_size_t len, int flag,
9237    ii_buf_t **ptr)
9238{
9239	_ii_info_t *ip = bfd->ii_info;
9240	ii_buf_t *h;
9241	int	raw = II_RAW(bfd);
9242	int rc = 0;
9243	int ioflag;
9244	int fbuf = 0, fbuf2 = 0, abuf = 0;
9245	int rw_ent = 0;
9246
9247	if (bfd->ii_bmp) {
9248		DTRACE_PROBE(_ii_alloc_buf_end);
9249		/* any I/O to the bitmap device is barred */
9250		return (EIO);
9251	}
9252
9253	if (len == 0) {
9254		DTRACE_PROBE(_ii_alloc_buf_end);
9255		return (EINVAL);
9256	}
9257
9258	/* Bounds checking */
9259	if (pos + len > ip->bi_size) {
9260		if (ii_debug > 1)
9261			cmn_err(CE_NOTE,
9262			    "!ii: Attempt to access beyond end of ii volume");
9263		DTRACE_PROBE(_ii_alloc_buf_end);
9264		return (EIO);
9265	}
9266
9267	h = *ptr;
9268	if (h == NULL) {
9269		h = (ii_buf_t *)_ii_alloc_handle(NULL, NULL, NULL, bfd);
9270		if (h == NULL) {
9271			DTRACE_PROBE(_ii_alloc_buf_end);
9272			return (ENOMEM);
9273		}
9274	}
9275
9276	/*
9277	 * Temporary nsc_reserve of bitmap and other device.
9278	 * This device has already been reserved by the preceding _ii_attach.
9279	 * Corresponding nsc_release is in _ii_free_buf.
9280	 */
9281
9282	h->ii_rsrv = BMP | (raw ? (bfd->ii_shd ? MSTR : SHDR)
9283	    : (bfd->ii_shd ? MST : SHD));
9284
9285	if (!bfd->ii_shd)
9286		ip = ip->bi_master;
9287
9288	rw_enter(&ip->bi_linkrw, RW_READER);
9289	rw_ent = 1;
9290	if (ip->bi_shdfd == NULL || (ip->bi_flags & DSW_SHDEXPORT) ==
9291	    DSW_SHDEXPORT)
9292		h->ii_rsrv &= ~(SHD|SHDR);
9293	if ((rc = _ii_rsrv_devs(ip, h->ii_rsrv, II_EXTERNAL)) != 0) {
9294		rw_exit(&ip->bi_linkrw);
9295		rw_ent = 0;
9296		h->ii_rsrv = NULL;
9297		goto error;
9298	}
9299
9300	if (flag & NSC_WRBUF) {
9301		rc = _ii_shadow_write(bfd, pos, len);
9302		if (!II_SUCCESS(rc))
9303			goto error;
9304	}
9305
9306	if (!(flag & NSC_RDAHEAD))
9307		ioflag = flag & ~(NSC_RDBUF);
9308	else
9309		ioflag = flag;
9310
9311	if (bfd->ii_shd) {
9312		/*
9313		 * SHADOW
9314		 */
9315
9316		if (ip->bi_flags & DSW_SHDEXPORT) {
9317			rc = EIO;
9318			goto error;
9319		}
9320		/*
9321		 * The master device buffer has to be allocated first
9322		 * so that deadlocks are avoided.
9323		 */
9324		DTRACE_PROBE(AllocBufFor_SHADOW);
9325
9326		if ((ip->bi_flags & (DSW_MSTOFFLINE|DSW_SHDIMPORT)) == 0) {
9327			rc = nsc_alloc_buf(MSTFD(ip), pos, len,
9328			    (flag&NSC_RDAHEAD)|NSC_MIXED, &h->ii_bufp2);
9329			if (!II_SUCCESS(rc)) {
9330				if (ii_debug > 2)
9331					cmn_err(CE_WARN, "!ii: "
9332					    "Join/write-S race detected\n");
9333				if (h->ii_bufp2)
9334					(void) nsc_free_buf(h->ii_bufp2);
9335				h->ii_bufp2 = NULL;
9336				/*
9337				 * Carry on as this will not matter if
9338				 * _ii_fill_buf is not called, or if
9339				 * it is called but doesn't need to read this
9340				 * volume.
9341				 */
9342				rc = 0;
9343			}
9344			fbuf2 = 1;
9345		}
9346
9347		if (ip->bi_flags & DSW_SHDOFFLINE) {
9348			rc = EIO;
9349			goto error;
9350		}
9351		if ((ip->bi_flags)&DSW_TREEMAP) {
9352			rc = nsc_alloc_abuf(pos, len, 0, &h->ii_abufp);
9353			if (!II_SUCCESS(rc)) {
9354				_ii_error(ip, DSW_SHDOFFLINE);
9355				goto error;
9356			}
9357			abuf = 1;
9358		} else {
9359			II_ALLOC_BUF(ip, shadow, rc, SHDFD(ip), pos, len,
9360			    ioflag, &h->ii_bufp);	/* do not read yet */
9361			if (!II_SUCCESS(rc)) {
9362				_ii_error(ip, DSW_SHDOFFLINE);
9363				goto error;
9364			}
9365			fbuf = 1;
9366		}
9367	} else {
9368		/*
9369		 * MASTER
9370		 */
9371
9372		/*
9373		 * The master device buffer has to be allocated first
9374		 * so that deadlocks are avoided.
9375		 */
9376
9377		if (ip->bi_flags & (DSW_MSTOFFLINE|DSW_SHDIMPORT)) {
9378			rc = EIO;
9379			goto error;
9380		}
9381
9382		DTRACE_PROBE(AllocBufFor_MASTER);
9383
9384		II_ALLOC_BUF(ip, master, rc, MSTFD(ip), pos, len, ioflag,
9385		    &h->ii_bufp);		/* do not read yet */
9386		if (!II_SUCCESS(rc)) {
9387			_ii_error(ip, DSW_MSTOFFLINE);
9388			goto error;
9389		}
9390		fbuf = 1;
9391
9392		/*
9393		 * If shadow FD and (dependent set OR copying) and
9394		 * not (compact dependent && shadow offline && shadow exported)
9395		 */
9396		if ((ip->bi_shdfd) &&
9397		    ((ip->bi_flags & DSW_COPYINGP) ||
9398		    (!(ip->bi_flags & DSW_GOLDEN))) &&
9399		    (!(ip->bi_flags &
9400		    (DSW_TREEMAP|DSW_SHDOFFLINE|DSW_SHDEXPORT)))) {
9401			rc = nsc_alloc_buf(SHDFD(ip), pos, len,
9402			    (flag&NSC_RDAHEAD)|NSC_MIXED, &h->ii_bufp2);
9403			if (!II_SUCCESS(rc)) {
9404				if (ii_debug > 2)
9405					cmn_err(CE_WARN, "!ii: "
9406					    "Join/write-M race detected\n");
9407				if (h->ii_bufp2)
9408					(void) nsc_free_buf(h->ii_bufp2);
9409				h->ii_bufp2 = NULL;
9410				/*
9411				 * Carry on as this will not matter if
9412				 * _ii_fill_buf is not called, or if
9413				 * it is called but doesn't need to read this
9414				 * volume.
9415				 */
9416				rc = 0;
9417			}
9418			fbuf2 = 1;
9419		}
9420	}
9421
9422	if (flag & NSC_RDBUF)
9423		rc = _ii_fill_buf(bfd, pos, len, flag,
9424		    h->ii_abufp ? &h->ii_abufp : &h->ii_bufp, &h->ii_bufp2);
9425
9426error:
9427	if (II_SUCCESS(rc)) {
9428		h->ii_bufh.sb_vec = h->ii_abufp ? h->ii_abufp->sb_vec :
9429		    h->ii_bufp->sb_vec;
9430		h->ii_bufh.sb_error = 0;
9431		h->ii_bufh.sb_flag |= flag;
9432		h->ii_bufh.sb_pos = pos;
9433		h->ii_bufh.sb_len = len;
9434	} else {
9435		h->ii_bufh.sb_error = rc;
9436		if (h->ii_bufp2 && fbuf2) {
9437			(void) nsc_free_buf(h->ii_bufp2);
9438			h->ii_bufp2 = NULL;
9439		}
9440		if (h->ii_bufp && fbuf) {
9441			(void) nsc_free_buf(h->ii_bufp);
9442			h->ii_bufp = NULL;
9443		}
9444		if (h->ii_abufp && abuf) {
9445			(void) nsc_free_buf(h->ii_abufp);
9446			h->ii_abufp = NULL;
9447		}
9448
9449		if (h->ii_rsrv) {
9450			/*
9451			 * Release temporary reserve - reserved above.
9452			 */
9453			_ii_rlse_devs(ip, h->ii_rsrv);
9454			h->ii_rsrv = NULL;
9455		}
9456		if (rw_ent)
9457			rw_exit(&ip->bi_linkrw);
9458	}
9459
9460	return (rc);
9461}
9462
9463
9464/*
9465 * _ii_free_buf
9466 */
9467
9468static int
9469_ii_free_buf(ii_buf_t *h)
9470{
9471	ii_fd_t *bfd;
9472	int rsrv;
9473	int rc;
9474
9475	if (h->ii_abufp == NULL) {
9476		rc = nsc_free_buf(h->ii_bufp);
9477	} else {
9478		rc = nsc_free_buf(h->ii_abufp);
9479		h->ii_abufp = NULL;
9480	}
9481	if (!II_SUCCESS(rc))
9482		return (rc);
9483	if (h->ii_bufp2) {
9484		rc = nsc_free_buf(h->ii_bufp2);
9485		h->ii_bufp2 = NULL;
9486		if (!II_SUCCESS(rc))
9487			return (rc);
9488	}
9489
9490	bfd = h->ii_fd;
9491	rsrv = h->ii_rsrv;
9492
9493	if ((h->ii_bufh.sb_flag & NSC_HALLOCATED) == 0) {
9494		rc = _ii_free_handle(h, h->ii_fd);
9495		if (!II_SUCCESS(rc))
9496			return (rc);
9497	} else {
9498		h->ii_bufh.sb_flag = NSC_HALLOCATED;
9499		h->ii_bufh.sb_vec = NULL;
9500		h->ii_bufh.sb_error = 0;
9501		h->ii_bufh.sb_pos = 0;
9502		h->ii_bufh.sb_len = 0;
9503		h->ii_rsrv = NULL;
9504	}
9505
9506	/*
9507	 * Release temporary reserve - reserved in _ii_alloc_buf.
9508	 */
9509
9510	if (rsrv)
9511		_ii_rlse_devs(bfd->ii_info, rsrv);
9512	rw_exit(&bfd->ii_info->bi_linkrw);
9513
9514	return (0);
9515}
9516
9517
9518/*
9519 * _ii_open
9520 *	Open a device
9521 *
9522 * Calling/Exit State:
9523 *	Returns a token to identify the shadow device.
9524 *
9525 * Description:
9526 *	Performs the housekeeping operations associated with an upper layer
9527 *	of the nsc stack opening a shadowed device.
9528 */
9529
9530/* ARGSUSED */
9531
9532static int
9533_ii_open(char *path, int flag, blind_t *cdp, nsc_iodev_t *iodev)
9534{
9535	_ii_info_t *ip;
9536	_ii_overflow_t *op;
9537	ii_fd_t *bfd;
9538	int is_mst = 0;
9539	int is_shd = 0;
9540	int raw = (flag & NSC_CACHE) == 0;
9541
9542	bfd = nsc_kmem_zalloc(sizeof (*bfd), KM_SLEEP, _ii_local_mem);
9543	if (!bfd)
9544		return (ENOMEM);
9545
9546	DTRACE_PROBE1(_ii_open_mutex,
9547	    ii_fd_t *, bfd);
9548
9549	mutex_enter(&_ii_info_mutex);
9550
9551	for (ip = _ii_info_top; ip; ip = ip->bi_next) {
9552		if (strcmp(path, ii_pathname(ip->bi_mstfd)) == 0) {
9553			is_mst = 1;
9554			break;
9555		} else if (strcmp(path, ip->bi_keyname) == 0) {
9556			is_shd = 1;
9557			break;
9558		} else if (strcmp(path, ii_pathname(ip->bi_bmpfd)) == 0)
9559			break;
9560	}
9561
9562	if (is_mst)
9563		ip = ip->bi_master;
9564
9565	if (ip && ip->bi_disabled && !(ip->bi_state & DSW_MULTIMST)) {
9566		DTRACE_PROBE(_ii_open_Disabled);
9567		mutex_exit(&_ii_info_mutex);
9568		return (EINTR);
9569	}
9570
9571	if (!ip) {
9572		/* maybe it's an overflow */
9573		mutex_exit(&_ii_info_mutex);
9574		mutex_enter(&_ii_overflow_mutex);
9575		for (op = _ii_overflow_top; op; op = op->ii_next) {
9576			if (strcmp(path, op->ii_volname) == 0)
9577				break;
9578		}
9579		mutex_exit(&_ii_overflow_mutex);
9580
9581		if (!op) {
9582			nsc_kmem_free(bfd, sizeof (*bfd));
9583			DTRACE_PROBE(_ii_open_end_EINVAL);
9584			return (EINVAL);
9585		}
9586		bfd->ii_ovr = 1;
9587		bfd->ii_oflags = flag;
9588		bfd->ii_optr = op;
9589		*cdp = (blind_t)bfd;
9590
9591		DTRACE_PROBE(_ii_open_end_overflow);
9592		return (0);
9593	}
9594	mutex_enter(&ip->bi_mutex);
9595	ip->bi_ioctl++;
9596	mutex_exit(&_ii_info_mutex);
9597
9598	if (is_mst) {
9599		if (raw) {
9600			ip->bi_mstr_iodev = NULL;	/* set in attach */
9601			ip->bi_mstrref++;
9602		} else {
9603			ip->bi_mst_iodev = NULL;	/* set in attach */
9604			ip->bi_mstref++;
9605		}
9606		ip->bi_master->bi_iifd = bfd;
9607	} else if (is_shd) {
9608		if (raw) {
9609			ip->bi_shdr_iodev = NULL;	/* set in attach */
9610			ip->bi_shdrref++;
9611		} else {
9612			ip->bi_shd_iodev = NULL;	/* set in attach */
9613			ip->bi_shdref++;
9614		}
9615		bfd->ii_shd = 1;
9616	} else {
9617		ip->bi_bmpref++;
9618		ip->bi_bmp_iodev = NULL;	/* set in attach */
9619		bfd->ii_bmp = 1;
9620	}
9621
9622	_ii_ioctl_done(ip);
9623	mutex_exit(&ip->bi_mutex);
9624
9625	bfd->ii_info = ip;
9626	bfd->ii_oflags = flag;
9627
9628	*cdp = (blind_t)bfd;
9629
9630	return (0);
9631}
9632
9633static int
9634_ii_openc(char *path, int flag, blind_t *cdp, nsc_iodev_t *iodev)
9635{
9636	return (_ii_open(path, NSC_CACHE|flag, cdp, iodev));
9637}
9638
9639static int
9640_ii_openr(char *path, int flag, blind_t *cdp, nsc_iodev_t *iodev)
9641{
9642	return (_ii_open(path, NSC_DEVICE|flag, cdp, iodev));
9643}
9644
9645
9646/*
9647 * _ii_close
9648 *	Close a device
9649 *
9650 * Calling/Exit State:
9651 *	Always succeeds - returns 0
9652 *
9653 * Description:
9654 *	Performs the housekeeping operations associated with an upper layer
9655 *	of the nsc stack closing a shadowed device.
9656 */
9657
9658static int
9659_ii_close(bfd)
9660ii_fd_t *bfd;
9661{
9662	_ii_info_t *ip = bfd->ii_info;
9663	_ii_info_dev_t *dip;
9664	int raw;
9665
9666	if (!ip) {
9667		ASSERT(bfd->ii_ovr);
9668		return (0);
9669	}
9670
9671	raw = II_RAW(bfd);
9672
9673	mutex_enter(&ip->bi_mutex);
9674
9675	if (bfd->ii_shd && raw) {
9676		dip = &ip->bi_shdrdev;
9677	} else if (bfd->ii_shd) {
9678		dip = &ip->bi_shddev;
9679	} else if (bfd->ii_bmp) {
9680		dip = &ip->bi_bmpdev;
9681	} else if (raw) {
9682		dip = ip->bi_mstrdev;
9683	} else {
9684		dip = ip->bi_mstdev;
9685	}
9686
9687	if (dip) {
9688		dip->bi_ref--;
9689		if (dip->bi_ref == 0)
9690			dip->bi_iodev = NULL;
9691	}
9692
9693	if (ip->bi_state & DSW_CLOSING) {
9694		if (total_ref(ip) == 0) {
9695			cv_signal(&ip->bi_closingcv);
9696		}
9697	} else if ((ip->bi_flags & DSW_HANGING) &&
9698	    (ip->bi_head->bi_state & DSW_CLOSING))
9699		cv_signal(&ip->bi_head->bi_closingcv);
9700
9701	if (!(bfd->ii_shd || bfd->ii_bmp))	/* is master device */
9702		ip->bi_master->bi_iifd = NULL;
9703	mutex_exit(&ip->bi_mutex);
9704
9705	nsc_kmem_free(bfd, sizeof (*bfd));
9706
9707	return (0);
9708}
9709
9710/*
9711 * _ii_alloc_handle
9712 *	Allocate a handle
9713 *
9714 */
9715
9716static nsc_buf_t *
9717_ii_alloc_handle(void (*d_cb)(), void (*r_cb)(), void (*w_cb)(), ii_fd_t *bfd)
9718{
9719	ii_buf_t *h;
9720
9721	if (REMOTE_VOL(bfd->ii_shd, bfd->ii_info))
9722		return (NULL);
9723
9724	h = kmem_alloc(sizeof (*h), KM_SLEEP);
9725	if (!h)
9726		return (NULL);
9727
9728	h->ii_abufp = NULL;
9729	h->ii_bufp = nsc_alloc_handle(II_FD(bfd), d_cb, r_cb, w_cb);
9730	if (!h->ii_bufp) {
9731		kmem_free(h, sizeof (*h));
9732		return (NULL);
9733	}
9734	h->ii_bufp2 = NULL;
9735	h->ii_bufh.sb_flag = NSC_HALLOCATED;
9736	h->ii_fd = bfd;
9737	h->ii_rsrv = NULL;
9738
9739	return ((nsc_buf_t *)h);
9740}
9741
9742
9743/*
9744 * _ii_free_handle
9745 *	Free a handle
9746 *
9747 */
9748
9749static int	 /*ARGSUSED*/
9750_ii_free_handle(ii_buf_t *h, ii_fd_t *bfd)
9751{
9752	int rc;
9753
9754	if (h->ii_abufp)
9755		(void) nsc_free_buf(h->ii_abufp);
9756	rc = nsc_free_handle(h->ii_bufp);
9757	if (!II_SUCCESS(rc)) {
9758		return (rc);
9759	}
9760
9761	kmem_free(h, sizeof (ii_buf_t));
9762
9763	return (0);
9764}
9765
9766
9767/*
9768 * _ii_attach
9769 *	Attach
9770 *
9771 * Calling/Exit State:
9772 *	Returns 0 for success, errno on failure.
9773 *
9774 * Description:
9775 */
9776
9777static int
9778_ii_attach(ii_fd_t *bfd, nsc_iodev_t *iodev)
9779{
9780	_ii_info_t *ip;
9781	int dev;
9782	int raw;
9783	int rc;
9784	_ii_info_dev_t *infop;
9785
9786	raw  = II_RAW(bfd);
9787
9788	DTRACE_PROBE2(_ii_attach_info,
9789	    char *, bfd->ii_shd? "shadow" : "master",
9790	    int, raw);
9791
9792	if (bfd->ii_ovr)
9793		return (EINVAL);
9794
9795	ip = bfd->ii_info;
9796	if (ip == NULL)
9797		return (EINVAL);
9798
9799	mutex_enter(&ip->bi_mutex);
9800	if (bfd->ii_bmp) {
9801		infop = &ip->bi_bmpdev;
9802	} else if (bfd->ii_shd) {
9803		if (raw) {
9804			infop = &ip->bi_shdrdev;
9805		} else {
9806			infop = &ip->bi_shddev;
9807		}
9808	} else if (!bfd->ii_ovr) {
9809		if (raw) {
9810			infop = ip->bi_mstrdev;
9811		} else {
9812			infop = ip->bi_mstdev;
9813		}
9814	}
9815
9816	if (iodev) {
9817		infop->bi_iodev = iodev;
9818		nsc_set_owner(infop->bi_fd, infop->bi_iodev);
9819	}
9820	mutex_exit(&ip->bi_mutex);
9821
9822	if (bfd->ii_bmp)
9823		return (EINVAL);
9824
9825	if (raw)
9826		dev = bfd->ii_shd ? SHDR : MSTR;
9827	else
9828		dev = bfd->ii_shd ? SHD : MST;
9829
9830	rc = _ii_rsrv_devs(ip, dev, II_EXTERNAL);
9831
9832	return (rc);
9833}
9834
9835
9836/*
9837 * _ii_detach
9838 *	Detach
9839 *
9840 * Calling/Exit State:
9841 *	Returns 0 for success, always succeeds
9842 *
9843 * Description:
9844 */
9845
9846static int
9847_ii_detach(bfd)
9848ii_fd_t *bfd;
9849{
9850	int dev;
9851	int raw;
9852
9853	raw = II_RAW(bfd);
9854
9855	DTRACE_PROBE2(_ii_detach_info,
9856	    char *, bfd->ii_shd? "shadow" : "master",
9857	    int, raw);
9858
9859	if (bfd->ii_bmp)
9860		return (0);
9861
9862	ASSERT(bfd->ii_info);
9863	dev = bfd->ii_shd ? (raw ? SHDR : SHD) : (raw ? MSTR : MST);
9864	_ii_rlse_devs(bfd->ii_info, dev);
9865
9866	return (0);
9867}
9868
9869/*
9870 * _ii_get_pinned
9871 *
9872 */
9873
9874static int
9875_ii_get_pinned(ii_fd_t *bfd)
9876{
9877	int rc;
9878
9879	if (REMOTE_VOL(bfd->ii_shd, bfd->ii_info))
9880		return (EIO);
9881
9882	rc = nsc_get_pinned(II_FD(bfd));
9883
9884	return (rc);
9885}
9886
9887/*
9888 * _ii_discard_pinned
9889 *
9890 */
9891
9892static int
9893_ii_discard_pinned(ii_fd_t *bfd, nsc_off_t pos, nsc_size_t len)
9894{
9895	int rc;
9896
9897	if (REMOTE_VOL(bfd->ii_shd, bfd->ii_info))
9898		return (EIO);
9899	rc = nsc_discard_pinned(II_FD(bfd), pos, len);
9900
9901	return (rc);
9902}
9903
9904/*
9905 * _ii_partsize
9906 *
9907 */
9908
9909static int
9910_ii_partsize(ii_fd_t *bfd, nsc_size_t *ptr)
9911{
9912	/* Always return saved size */
9913	*ptr = bfd->ii_info->bi_size;
9914	return (0);
9915}
9916
9917/*
9918 * _ii_maxfbas
9919 *
9920 */
9921
9922static int
9923_ii_maxfbas(ii_fd_t *bfd, int flag, nsc_size_t *ptr)
9924{
9925	int rc;
9926	int rs;
9927	int dev;
9928	_ii_info_t *ip;
9929
9930	ip = bfd->ii_info;
9931	if (REMOTE_VOL(bfd->ii_shd, ip))
9932		return (EIO);
9933
9934	dev =  ((ip->bi_flags)&DSW_SHDIMPORT) ? SHDR : MSTR;
9935
9936	DTRACE_PROBE1(_ii_maxfbas_info,
9937	    char *, dev == SHDR? "shadow" : "master");
9938
9939	rs = _ii_rsrv_devs(ip, dev, II_INTERNAL);
9940	rc = nsc_maxfbas((dev == MSTR) ? MSTFD(ip) : SHDFD(ip), flag, ptr);
9941
9942	if (rs == 0)
9943		_ii_rlse_devs(ip, dev);
9944
9945	return (rc);
9946}
9947
9948/*
9949 * ii_get_group_list
9950 */
9951_ii_info_t **
9952ii_get_group_list(char *group, int *count)
9953{
9954	int i;
9955	int nip;
9956	uint64_t   hash;
9957	_ii_info_t **ipa;
9958	_ii_lsthead_t *head;
9959	_ii_lstinfo_t *np;
9960
9961	hash = nsc_strhash(group);
9962
9963	for (head = _ii_group_top; head; head = head->lst_next) {
9964		if (hash == head->lst_hash && strncmp(head->lst_name,
9965		    group, DSW_NAMELEN) == 0)
9966			break;
9967	}
9968
9969	if (!head) {
9970		return (NULL);
9971	}
9972
9973	/* Count entries */
9974	for (nip = 0, np = head->lst_start; np; np = np->lst_next)
9975		++nip;
9976
9977	ASSERT(nip > 0);
9978
9979	ipa = kmem_zalloc(sizeof (_ii_info_t *) * nip, KM_SLEEP);
9980
9981	np = head->lst_start;
9982
9983	for (i = 0; i < nip; i++) {
9984		ASSERT(np != 0);
9985
9986		ipa[i] = np->lst_ip;
9987		np = np->lst_next;
9988	}
9989
9990	*count = nip;
9991	return (ipa);
9992}
9993
9994/*
9995 * _ii_pinned
9996 *
9997 */
9998
9999static void
10000_ii_pinned(_ii_info_dev_t *dip, nsc_off_t pos, nsc_size_t len)
10001{
10002	DTRACE_PROBE3(_ii_pinned_start, nsc_iodev_t, dip->bi_iodev,
10003	    nsc_off_t, pos, nsc_size_t, len);
10004
10005	nsc_pinned_data(dip->bi_iodev, pos, len);
10006
10007}
10008
10009/*
10010 * _ii_unpinned
10011 *
10012 */
10013
10014static void
10015_ii_unpinned(_ii_info_dev_t *dip, nsc_off_t pos, nsc_size_t len)
10016{
10017	nsc_unpinned_data(dip->bi_iodev, pos, len);
10018
10019}
10020
10021
10022/*
10023 * _ii_read
10024 */
10025
10026static int
10027_ii_read(ii_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag)
10028{
10029	int rc;
10030	void *sb_vec;
10031	nsc_vec_t **src;
10032
10033	if (REMOTE_VOL(h->ii_fd->ii_shd, h->ii_fd->ii_info))
10034		rc = EIO;
10035	else {
10036		src =  h->ii_abufp? &h->ii_abufp->sb_vec : &h->ii_bufp->sb_vec;
10037		sb_vec = *src;
10038		*src = h->ii_bufh.sb_vec;
10039		rc = _ii_fill_buf(h->ii_fd, pos, len, flag,
10040		    h->ii_abufp ? &h->ii_abufp : &h->ii_bufp, &h->ii_bufp2);
10041		*src = sb_vec;
10042	}
10043	if (!II_SUCCESS(rc))
10044		h->ii_bufh.sb_error = rc;
10045
10046	return (rc);
10047}
10048
10049
10050/*
10051 * _ii_write
10052 */
10053
10054static int
10055_ii_write(ii_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag)
10056{
10057	int rc;
10058	ii_fd_t *bfd = h->ii_fd;
10059	_ii_info_t *ip = bfd->ii_info;
10060	chunkid_t	chunk_num;
10061	nsc_size_t	copy_len;
10062	nsc_off_t	mapped_fba;
10063	chunkid_t	mapped_chunk;
10064	int	overflow;
10065	nsc_buf_t *tmp;
10066	void	*sb_vec;
10067
10068	if (REMOTE_VOL(h->ii_fd->ii_shd, h->ii_fd->ii_info))
10069		rc = EIO;
10070	else if ((ip->bi_flags&DSW_TREEMAP) == 0 || !bfd->ii_shd) {
10071		sb_vec = h->ii_bufp->sb_vec;
10072		h->ii_bufp->sb_vec = h->ii_bufh.sb_vec;
10073		if (bfd->ii_shd) {
10074			II_NSC_WRITE(ip, shadow, rc, h->ii_bufp, pos, len,
10075			    flag);
10076		} else {
10077			II_NSC_WRITE(ip, master, rc, h->ii_bufp, pos, len,
10078			    flag);
10079		}
10080		h->ii_bufp->sb_vec = sb_vec;
10081	} else {
10082		/* write of mapped shadow buffer */
10083		rc = 0;
10084		chunk_num = pos / DSW_SIZE;
10085		while (len > 0 && II_SUCCESS(rc)) {
10086			/*
10087			 * don't need to test bitmaps as allocating the
10088			 * write buffer will c-o-write the chunk.
10089			 */
10090			mapped_chunk = ii_tsearch(ip, chunk_num);
10091			if (mapped_chunk == II_NULLNODE) {
10092				rc = EIO;
10093				break;
10094			}
10095			overflow = II_ISOVERFLOW(mapped_chunk);
10096			if (overflow)
10097				mapped_chunk = II_2OVERFLOW(mapped_chunk);
10098			mapped_fba = DSW_CHK2FBA(mapped_chunk) +
10099			    (pos % DSW_SIZE);
10100			copy_len = DSW_SIZE - (pos % DSW_SIZE);
10101			if (copy_len > len)
10102				copy_len = len;
10103			tmp = NULL;
10104			if (overflow) {
10105				(void) nsc_reserve(OVRFD(ip), NSC_MULTI);
10106				rc = nsc_alloc_buf(OVRFD(ip), mapped_fba,
10107				    copy_len, NSC_WRBUF, &tmp);
10108			} else
10109				rc = nsc_alloc_buf(SHDFD(ip), mapped_fba,
10110				    copy_len, NSC_WRBUF, &tmp);
10111			sb_vec = h->ii_abufp->sb_vec;
10112			h->ii_abufp->sb_vec = h->ii_bufh.sb_vec;
10113			if (II_SUCCESS(rc)) {
10114				rc = nsc_copy(h->ii_abufp, tmp, pos,
10115				    mapped_fba, copy_len);
10116			}
10117			if (overflow) {
10118				II_NSC_WRITE(ip, overflow, rc, tmp, mapped_fba,
10119				    copy_len, flag);
10120			} else {
10121				II_NSC_WRITE(ip, shadow, rc, tmp, mapped_fba,
10122				    copy_len, flag);
10123			}
10124			h->ii_abufp->sb_vec = sb_vec;
10125			(void) nsc_free_buf(tmp);
10126			if (overflow)
10127				nsc_release(OVRFD(ip));
10128			/* move on to next chunk */
10129			pos += copy_len;
10130			len -= copy_len;
10131			chunk_num++;
10132		}
10133	}
10134	if (!II_SUCCESS(rc))
10135		h->ii_bufh.sb_error = rc;
10136
10137	return (rc);
10138}
10139
10140
10141/*
10142 * _ii_zero
10143 */
10144
10145static int
10146_ii_zero(ii_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag)
10147{
10148	int rc;
10149	void *sb_vec;
10150
10151	sb_vec = h->ii_bufp->sb_vec;
10152	h->ii_bufp->sb_vec = h->ii_bufh.sb_vec;
10153	rc = nsc_zero(h->ii_bufp, pos, len, flag);
10154	h->ii_bufp->sb_vec = sb_vec;
10155	if (!II_SUCCESS(rc))
10156		h->ii_bufh.sb_error = rc;
10157
10158	return (rc);
10159}
10160
10161
10162/*
10163 * _ii_uncommit
10164 */
10165
10166static int
10167_ii_uncommit(ii_buf_t *h, nsc_off_t pos, nsc_size_t len, int flag)
10168{
10169	int rc;
10170	void *sb_vec;
10171
10172	sb_vec = h->ii_bufp->sb_vec;
10173	h->ii_bufp->sb_vec = h->ii_bufh.sb_vec;
10174	rc = nsc_uncommit(h->ii_bufp, pos, len, flag);
10175	h->ii_bufp->sb_vec = sb_vec;
10176	if (!II_SUCCESS(rc))
10177		h->ii_bufh.sb_error = rc;
10178
10179	return (rc);
10180}
10181
10182
10183/*
10184 * _ii_trksize
10185 */
10186
10187static int
10188_ii_trksize(ii_fd_t *bfd, int trksize)
10189{
10190	int rc;
10191
10192	rc = nsc_set_trksize(II_FD(bfd), trksize);
10193
10194	return (rc);
10195}
10196
10197/*
10198 * _ii_register_path
10199 */
10200
10201static nsc_path_t *
10202_ii_register_path(char *path, int type, nsc_io_t *io)
10203{
10204	nsc_path_t *tok;
10205
10206	tok = nsc_register_path(path, type, io);
10207
10208	return (tok);
10209}
10210
10211/*
10212 * _ii_unregister_path
10213 */
10214/*ARGSUSED*/
10215static int
10216_ii_unregister_path(nsc_path_t *sp, int flag, char *type)
10217{
10218	int rc;
10219
10220	rc = nsc_unregister_path(sp, flag);
10221
10222	return (rc);
10223}
10224
10225int
10226_ii_ll_add(_ii_info_t *ip, kmutex_t *mutex, _ii_lsthead_t **lst, char *name,
10227    char **key)
10228{
10229	_ii_lsthead_t **head;
10230	_ii_lstinfo_t *node;
10231	uint64_t hash;
10232
10233	ASSERT(key && !*key);
10234	ASSERT(ip && mutex && lst && name);
10235
10236	node = kmem_zalloc(sizeof (_ii_lstinfo_t), KM_SLEEP);
10237	if (!node) {
10238		cmn_err(CE_WARN, "!ii: _ii_ll_add: ENOMEM");
10239		DTRACE_PROBE(_ii_ll_add_end_ENOMEM);
10240		return (ENOMEM);
10241	}
10242	node->lst_ip = ip;
10243
10244	/* find out where we should insert it */
10245	hash = nsc_strhash(name);
10246
10247	mutex_enter(mutex);
10248	for (head = lst; *head; head = &((*head)->lst_next)) {
10249		if (((*head)->lst_hash == hash) &&
10250		    strncmp(name, (*head)->lst_name, DSW_NAMELEN) == 0) {
10251			node->lst_next = (*head)->lst_start;
10252			(*head)->lst_start = node;
10253			break;
10254		}
10255	}
10256
10257	if (!*head) {
10258		/* create a new entry */
10259		*head = kmem_zalloc(sizeof (_ii_lsthead_t), KM_SLEEP);
10260		if (!*head) {
10261			/* bother */
10262			cmn_err(CE_WARN, "!ii: _ii_ll_add: ENOMEM");
10263			kmem_free(node, sizeof (_ii_lstinfo_t));
10264			DTRACE_PROBE(_ii_ll_add_end_2);
10265			return (ENOMEM);
10266		}
10267		(*head)->lst_hash = hash;
10268		(void) strncpy((*head)->lst_name, name, DSW_NAMELEN);
10269		(*head)->lst_start = node;
10270	}
10271	mutex_exit(mutex);
10272
10273	*key = (*head)->lst_name;
10274
10275	return (0);
10276}
10277
10278int
10279_ii_ll_remove(_ii_info_t *ip, kmutex_t *mutex, _ii_lsthead_t **lst, char **key)
10280{
10281	_ii_lsthead_t **head, *oldhead = 0;
10282	_ii_lstinfo_t **node, *oldnode = 0;
10283	uint64_t hash;
10284	int found;
10285
10286	ASSERT(key && *key);
10287	ASSERT(ip && lst);
10288
10289	hash = nsc_strhash(*key);
10290
10291	mutex_enter(mutex);
10292	for (head = lst; *head; head = &((*head)->lst_next)) {
10293		if (((*head)->lst_hash == hash) &&
10294		    strncmp(*key, (*head)->lst_name, DSW_NAMELEN) == 0)
10295			break;
10296	}
10297	if (!*head) {
10298		/* no such link (!) */
10299		mutex_exit(mutex);
10300		return (0);
10301	}
10302
10303	found = 0;
10304	for (node = &(*head)->lst_start; *node; node = &((*node)->lst_next)) {
10305		if (ip == (*node)->lst_ip) {
10306			oldnode = *node;
10307			*node = (*node)->lst_next;
10308			kmem_free(oldnode, sizeof (_ii_lstinfo_t));
10309			found = 1;
10310			break;
10311		}
10312	}
10313
10314	ASSERT(found);
10315
10316	if (!found) {
10317		mutex_exit(mutex);
10318		return (0);
10319	}
10320
10321	/* did we just delete the last set in this resource group? */
10322	if (!(*head)->lst_start) {
10323		oldhead = *head;
10324		*head = (*head)->lst_next;
10325		kmem_free(oldhead, sizeof (_ii_lsthead_t));
10326	}
10327	mutex_exit(mutex);
10328
10329	*key = NULL;
10330
10331	return (0);
10332}
10333
10334static nsc_def_t _ii_fd_def[] = {
10335	"Pinned",	(uintptr_t)_ii_pinned,		0,
10336	"Unpinned",	(uintptr_t)_ii_unpinned,	0,
10337	0,		0,				0
10338};
10339
10340
10341static nsc_def_t _ii_io_def[] = {
10342	"Open",		(uintptr_t)_ii_openc,		0,
10343	"Close",	(uintptr_t)_ii_close,		0,
10344	"Attach",	(uintptr_t)_ii_attach,		0,
10345	"Detach",	(uintptr_t)_ii_detach,		0,
10346	"AllocHandle",	(uintptr_t)_ii_alloc_handle,	0,
10347	"FreeHandle",	(uintptr_t)_ii_free_handle,	0,
10348	"AllocBuf",	(uintptr_t)_ii_alloc_buf,	0,
10349	"FreeBuf",	(uintptr_t)_ii_free_buf,	0,
10350	"GetPinned",	(uintptr_t)_ii_get_pinned,	0,
10351	"Discard",	(uintptr_t)_ii_discard_pinned,	0,
10352	"PartSize",	(uintptr_t)_ii_partsize,	0,
10353	"MaxFbas",	(uintptr_t)_ii_maxfbas,	0,
10354	"Read",		(uintptr_t)_ii_read,		0,
10355	"Write",	(uintptr_t)_ii_write,		0,
10356	"Zero",		(uintptr_t)_ii_zero,		0,
10357	"Uncommit",	(uintptr_t)_ii_uncommit,	0,
10358	"TrackSize",	(uintptr_t)_ii_trksize,	0,
10359	"Provide",	0,				0,
10360	0,		0,				0
10361};
10362
10363static nsc_def_t _ii_ior_def[] = {
10364	"Open",		(uintptr_t)_ii_openr,		0,
10365	"Close",	(uintptr_t)_ii_close,		0,
10366	"Attach",	(uintptr_t)_ii_attach,		0,
10367	"Detach",	(uintptr_t)_ii_detach,		0,
10368	"AllocHandle",	(uintptr_t)_ii_alloc_handle,	0,
10369	"FreeHandle",	(uintptr_t)_ii_free_handle,	0,
10370	"AllocBuf",	(uintptr_t)_ii_alloc_buf,	0,
10371	"FreeBuf",	(uintptr_t)_ii_free_buf,	0,
10372	"GetPinned",	(uintptr_t)_ii_get_pinned,	0,
10373	"Discard",	(uintptr_t)_ii_discard_pinned,	0,
10374	"PartSize",	(uintptr_t)_ii_partsize,	0,
10375	"MaxFbas",	(uintptr_t)_ii_maxfbas,	0,
10376	"Read",		(uintptr_t)_ii_read,		0,
10377	"Write",	(uintptr_t)_ii_write,		0,
10378	"Zero",		(uintptr_t)_ii_zero,		0,
10379	"Uncommit",	(uintptr_t)_ii_uncommit,	0,
10380	"TrackSize",	(uintptr_t)_ii_trksize,	0,
10381	"Provide",	0,				0,
10382	0,		0,				0
10383};
10384