scsi_vhci.c revision 4851:5e98cf4c2164
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25#pragma ident	"%Z%%M%	%I%	%E% SMI"
26
27/*
28 * Multiplexed I/O SCSI vHCI implementation
29 */
30
31#include <sys/conf.h>
32#include <sys/file.h>
33#include <sys/ddi.h>
34#include <sys/sunddi.h>
35#include <sys/scsi/scsi.h>
36#include <sys/scsi/impl/scsi_reset_notify.h>
37#include <sys/sunmdi.h>
38#include <sys/mdi_impldefs.h>
39#include <sys/scsi/adapters/scsi_vhci.h>
40#include <sys/disp.h>
41#include <sys/byteorder.h>
42
43extern uintptr_t scsi_callback_id;
44extern ddi_dma_attr_t scsi_alloc_attr;
45
46#ifdef	DEBUG
47int	vhci_debug = VHCI_DEBUG_DEFAULT_VAL;
48#endif
49
50/* retry for the vhci_do_prout command when a not ready is returned */
51int vhci_prout_not_ready_retry = 180;
52
53/*
54 * These values are defined to support the internal retry of
55 * SCSI packets for better sense code handling.
56 */
57#define	VHCI_CMD_CMPLT	0
58#define	VHCI_CMD_RETRY	1
59#define	VHCI_CMD_ERROR	-1
60
61#define	PROPFLAGS (DDI_PROP_DONTPASS | DDI_PROP_NOTPROM)
62#define	VHCI_SCSI_PERR		0x47
63#define	VHCI_PGR_ILLEGALOP	-2
64#define	VHCI_NUM_UPDATE_TASKQ	8
65#define	VHCI_STD_INQ_SIZE	128
66
67/*
68 * Version Macros
69 */
70#define	VHCI_NAME_VERSION	"SCSI VHCI Driver %I%"
71char		vhci_version_name[] = VHCI_NAME_VERSION;
72
73int		vhci_first_time = 0;
74clock_t		vhci_to_ticks = 0;
75int		vhci_init_wait_timeout = VHCI_INIT_WAIT_TIMEOUT;
76kcondvar_t	vhci_cv;
77kmutex_t	vhci_global_mutex;
78void		*vhci_softstate = NULL; /* for soft state */
79
80/*
81 * Flag to delay the retry of the reserve command
82 */
83int		vhci_reserve_delay = 100000;
84static int	vhci_path_quiesce_timeout = 60;
85static uchar_t	zero_key[MHIOC_RESV_KEY_SIZE];
86
87/* uscsi delay for a TRAN_BUSY */
88static int vhci_uscsi_delay = 100000;
89static int vhci_uscsi_retry_count = 180;
90/* uscsi_restart_sense timeout id in case it needs to get canceled */
91static timeout_id_t vhci_restart_timeid = 0;
92
93/*
94 * Bidirectional map of 'target-port' to port id <pid> for support of
95 * iostat(1M) '-Xx' and '-Yx' output.
96 */
97static kmutex_t		vhci_targetmap_mutex;
98static uint_t		vhci_targetmap_pid = 1;
99static mod_hash_t	*vhci_targetmap_bypid;	/* <pid> -> 'target-port' */
100static mod_hash_t	*vhci_targetmap_byport;	/* 'target-port' -> <pid> */
101
102/*
103 * functions exported by scsi_vhci struct cb_ops
104 */
105static int vhci_open(dev_t *, int, int, cred_t *);
106static int vhci_close(dev_t, int, int, cred_t *);
107static int vhci_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
108
109/*
110 * functions exported by scsi_vhci struct dev_ops
111 */
112static int vhci_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
113static int vhci_attach(dev_info_t *, ddi_attach_cmd_t);
114static int vhci_detach(dev_info_t *, ddi_detach_cmd_t);
115
116/*
117 * functions exported by scsi_vhci scsi_hba_tran_t transport table
118 */
119static int vhci_scsi_tgt_init(dev_info_t *, dev_info_t *,
120    scsi_hba_tran_t *, struct scsi_device *);
121static void vhci_scsi_tgt_free(dev_info_t *, dev_info_t *, scsi_hba_tran_t *,
122    struct scsi_device *);
123static int vhci_pgr_register_start(scsi_vhci_lun_t *, struct scsi_pkt *);
124static int vhci_scsi_start(struct scsi_address *, struct scsi_pkt *);
125static int vhci_scsi_abort(struct scsi_address *, struct scsi_pkt *);
126static int vhci_scsi_reset(struct scsi_address *, int);
127static int vhci_scsi_reset_target(struct scsi_address *, int level,
128    uint8_t select_path);
129static int vhci_scsi_reset_bus(struct scsi_address *);
130static int vhci_scsi_getcap(struct scsi_address *, char *, int);
131static int vhci_scsi_setcap(struct scsi_address *, char *, int, int);
132static int vhci_commoncap(struct scsi_address *, char *, int, int, int);
133static int vhci_pHCI_cap(struct scsi_address *ap, char *cap, int val, int whom,
134    mdi_pathinfo_t *pip);
135static struct scsi_pkt *vhci_scsi_init_pkt(struct scsi_address *,
136    struct scsi_pkt *, struct buf *, int, int, int, int, int (*)(), caddr_t);
137static void vhci_scsi_destroy_pkt(struct scsi_address *, struct scsi_pkt *);
138static void vhci_scsi_dmafree(struct scsi_address *, struct scsi_pkt *);
139static void vhci_scsi_sync_pkt(struct scsi_address *, struct scsi_pkt *);
140static int vhci_scsi_reset_notify(struct scsi_address *, int, void (*)(caddr_t),
141    caddr_t);
142static int vhci_scsi_get_bus_addr(struct scsi_device *, char *, int);
143static int vhci_scsi_get_name(struct scsi_device *, char *, int);
144static int vhci_scsi_bus_power(dev_info_t *, void *, pm_bus_power_op_t,
145    void *, void *);
146static int vhci_scsi_bus_config(dev_info_t *, uint_t, ddi_bus_config_op_t,
147    void *, dev_info_t **);
148
149/*
150 * functions registered with the mpxio framework via mdi_vhci_ops_t
151 */
152static int vhci_pathinfo_init(dev_info_t *, mdi_pathinfo_t *, int);
153static int vhci_pathinfo_uninit(dev_info_t *, mdi_pathinfo_t *, int);
154static int vhci_pathinfo_state_change(dev_info_t *, mdi_pathinfo_t *,
155    mdi_pathinfo_state_t, uint32_t, int);
156static int vhci_pathinfo_online(dev_info_t *, mdi_pathinfo_t *, int);
157static int vhci_pathinfo_offline(dev_info_t *, mdi_pathinfo_t *, int);
158static int vhci_failover(dev_info_t *, dev_info_t *, int);
159static void vhci_client_attached(dev_info_t *);
160
161static int vhci_ctl(dev_t, int, intptr_t, int, cred_t *, int *);
162static int vhci_devctl(dev_t, int, intptr_t, int, cred_t *, int *);
163static int vhci_ioc_get_phci_path(sv_iocdata_t *, caddr_t, int, caddr_t);
164static int vhci_ioc_get_client_path(sv_iocdata_t *, caddr_t, int, caddr_t);
165static int vhci_ioc_get_paddr(sv_iocdata_t *, caddr_t, int, caddr_t);
166static int vhci_ioc_send_client_path(caddr_t, sv_iocdata_t *, int, caddr_t);
167static void vhci_ioc_devi_to_path(dev_info_t *, caddr_t);
168static int vhci_get_phci_path_list(dev_info_t *, sv_path_info_t *, uint_t);
169static int vhci_get_client_path_list(dev_info_t *, sv_path_info_t *, uint_t);
170static int vhci_get_iocdata(const void *, sv_iocdata_t *, int, caddr_t);
171static int vhci_get_iocswitchdata(const void *, sv_switch_to_cntlr_iocdata_t *,
172    int, caddr_t);
173static int vhci_ioc_alloc_pathinfo(sv_path_info_t **, sv_path_info_t **,
174    uint_t, sv_iocdata_t *, int, caddr_t);
175static void vhci_ioc_free_pathinfo(sv_path_info_t *, sv_path_info_t *, uint_t);
176static int vhci_ioc_send_pathinfo(sv_path_info_t *, sv_path_info_t *, uint_t,
177    sv_iocdata_t *, int, caddr_t);
178static int vhci_handle_ext_fo(struct scsi_pkt *, int);
179static int vhci_efo_watch_cb(caddr_t, struct scsi_watch_result *);
180static int vhci_quiesce_lun(struct scsi_vhci_lun *);
181static int vhci_pgr_validate_and_register(scsi_vhci_priv_t *);
182static void vhci_dispatch_scsi_start(void *);
183static void vhci_efo_done(void *);
184static void vhci_initiate_auto_failback(void *);
185static void vhci_update_pHCI_pkt(struct vhci_pkt *, struct scsi_pkt *);
186static int vhci_update_pathinfo(struct scsi_device *, mdi_pathinfo_t *,
187    struct scsi_failover_ops *, scsi_vhci_lun_t *, struct scsi_vhci *);
188static void vhci_kstat_create_pathinfo(mdi_pathinfo_t *);
189static int vhci_quiesce_paths(dev_info_t *, dev_info_t *,
190    scsi_vhci_lun_t *, char *, char *);
191
192static char *vhci_devnm_to_guid(char *);
193static int vhci_bind_transport(struct scsi_address *, struct vhci_pkt *,
194    int, int (*func)(caddr_t));
195static void vhci_intr(struct scsi_pkt *);
196static int vhci_do_prout(scsi_vhci_priv_t *);
197static void vhci_run_cmd(void *);
198static int vhci_do_prin(struct vhci_pkt **);
199static struct scsi_pkt *vhci_create_retry_pkt(struct vhci_pkt *);
200static struct vhci_pkt *vhci_sync_retry_pkt(struct vhci_pkt *);
201static struct scsi_vhci_lun *vhci_lun_lookup(dev_info_t *);
202static struct scsi_vhci_lun *vhci_lun_lookup_alloc(dev_info_t *, char *, int *);
203static void vhci_lun_free(dev_info_t *);
204static int vhci_recovery_reset(scsi_vhci_lun_t *, struct scsi_address *,
205    uint8_t, uint8_t);
206void vhci_update_pathstates(void *);
207
208#ifdef DEBUG
209static void vhci_print_prin_keys(vhci_prin_readkeys_t *, int);
210#endif
211static void vhci_print_prout_keys(scsi_vhci_lun_t *, char *);
212static void vhci_uscsi_iodone(struct scsi_pkt *pkt);
213
214/*
215 * MP-API related functions
216 */
217extern int vhci_mpapi_init(struct scsi_vhci *);
218extern void vhci_mpapi_add_dev_prod(struct scsi_vhci *, char *);
219extern int vhci_mpapi_ctl(dev_t, int, intptr_t, int, cred_t *, int *);
220extern void vhci_update_mpapi_data(struct scsi_vhci *,
221    scsi_vhci_lun_t *, mdi_pathinfo_t *);
222extern void* vhci_get_mpapi_item(struct scsi_vhci *, mpapi_list_header_t *,
223    uint8_t, void*);
224extern void vhci_mpapi_set_path_state(dev_info_t *, mdi_pathinfo_t *, int);
225extern int vhci_mpapi_update_tpg_acc_state_for_lu(struct scsi_vhci *,
226    scsi_vhci_lun_t *);
227
228/* Special export to MP-API of tpgs non-'fops' entry point */
229int (*tpgs_set_target_groups)(struct scsi_address *, int, int);
230
231#define	VHCI_DMA_MAX_XFER_CAP	0xffffffffULL
232
233#define	VHCI_MAX_PGR_RETRIES	3
234
235/*
236 * Macros for the device-type mpxio options
237 */
238#define	LOAD_BALANCE_OPTIONS		"load-balance-options"
239#define	LOGICAL_BLOCK_REGION_SIZE	"region-size"
240#define	MPXIO_OPTIONS_LIST		"device-type-mpxio-options-list"
241#define	DEVICE_TYPE_STR			"device-type"
242#define	isdigit(ch)			((ch) >= '0' && (ch) <= '9')
243
244static struct cb_ops vhci_cb_ops = {
245	vhci_open,			/* open */
246	vhci_close,			/* close */
247	nodev,				/* strategy */
248	nodev,				/* print */
249	nodev,				/* dump */
250	nodev,				/* read */
251	nodev,				/* write */
252	vhci_ioctl,			/* ioctl */
253	nodev,				/* devmap */
254	nodev,				/* mmap */
255	nodev,				/* segmap */
256	nochpoll,			/* chpoll */
257	ddi_prop_op,			/* cb_prop_op */
258	0,				/* streamtab */
259	D_NEW | D_MP,			/* cb_flag */
260	CB_REV,				/* rev */
261	nodev,				/* aread */
262	nodev				/* awrite */
263};
264
265static struct dev_ops vhci_ops = {
266	DEVO_REV,
267	0,
268	vhci_getinfo,
269	nulldev,		/* identify */
270	nulldev,		/* probe */
271	vhci_attach,		/* attach and detach are mandatory */
272	vhci_detach,
273	nodev,			/* reset */
274	&vhci_cb_ops,		/* cb_ops */
275	NULL,			/* bus_ops */
276	NULL,			/* power */
277};
278
279extern struct mod_ops mod_driverops;
280
281static struct modldrv modldrv = {
282	&mod_driverops,
283	vhci_version_name,	/* module name */
284	&vhci_ops
285};
286
287static struct modlinkage modlinkage = {
288	MODREV_1,
289	&modldrv,
290	NULL
291};
292
293static mdi_vhci_ops_t vhci_opinfo = {
294	MDI_VHCI_OPS_REV,
295	vhci_pathinfo_init,		/* Pathinfo node init callback	*/
296	vhci_pathinfo_uninit,		/* Pathinfo uninit callback	*/
297	vhci_pathinfo_state_change,	/* Pathinfo node state change	*/
298	vhci_failover,			/* failover callback		*/
299	vhci_client_attached		/* client attached callback	*/
300};
301
302/*
303 * The scsi_failover table defines an ordered set of 'fops' modules supported
304 * by scsi_vhci.  Currently, initialize this table from the 'ddi-forceload'
305 * property specified in scsi_vhci.conf.
306 */
307struct scsi_failover {
308	ddi_modhandle_t			sf_mod;
309	struct scsi_failover_ops	*sf_sfo;
310} *scsi_failover_table;
311uint_t	scsi_nfailover;
312
313int
314_init(void)
315{
316	int	rval;
317
318	/*
319	 * Allocate soft state and prepare to do ddi_soft_state_zalloc()
320	 * before registering with the transport first.
321	 */
322	if ((rval = ddi_soft_state_init(&vhci_softstate,
323	    sizeof (struct scsi_vhci), 1)) != 0) {
324		VHCI_DEBUG(1, (CE_NOTE, NULL,
325		    "!_init:soft state init failed\n"));
326		return (rval);
327	}
328
329	if ((rval = scsi_hba_init(&modlinkage)) != 0) {
330		VHCI_DEBUG(1, (CE_NOTE, NULL,
331		    "!_init: scsi hba init failed\n"));
332		ddi_soft_state_fini(&vhci_softstate);
333		return (rval);
334	}
335
336	mutex_init(&vhci_global_mutex, NULL, MUTEX_DRIVER, NULL);
337	cv_init(&vhci_cv, NULL, CV_DRIVER, NULL);
338
339	mutex_init(&vhci_targetmap_mutex, NULL, MUTEX_DRIVER, NULL);
340	vhci_targetmap_byport = mod_hash_create_strhash(
341	    "vhci_targetmap_byport", 256, mod_hash_null_valdtor);
342	vhci_targetmap_bypid = mod_hash_create_idhash(
343	    "vhci_targetmap_bypid", 256, mod_hash_null_valdtor);
344
345	if ((rval = mod_install(&modlinkage)) != 0) {
346		VHCI_DEBUG(1, (CE_NOTE, NULL, "!_init: mod_install failed\n"));
347		if (vhci_targetmap_bypid)
348			mod_hash_destroy_idhash(vhci_targetmap_bypid);
349		if (vhci_targetmap_byport)
350			mod_hash_destroy_strhash(vhci_targetmap_byport);
351		mutex_destroy(&vhci_targetmap_mutex);
352		cv_destroy(&vhci_cv);
353		mutex_destroy(&vhci_global_mutex);
354		scsi_hba_fini(&modlinkage);
355		ddi_soft_state_fini(&vhci_softstate);
356	}
357	return (rval);
358}
359
360
361/*
362 * the system is done with us as a driver, so clean up
363 */
364int
365_fini(void)
366{
367	int rval;
368
369	/*
370	 * don't start cleaning up until we know that the module remove
371	 * has worked  -- if this works, then we know that each instance
372	 * has successfully been DDI_DETACHed
373	 */
374	if ((rval = mod_remove(&modlinkage)) != 0) {
375		VHCI_DEBUG(4, (CE_NOTE, NULL, "!_fini: mod_remove failed\n"));
376		return (rval);
377	}
378
379	if (vhci_targetmap_bypid)
380		mod_hash_destroy_idhash(vhci_targetmap_bypid);
381	if (vhci_targetmap_byport)
382		mod_hash_destroy_strhash(vhci_targetmap_byport);
383	mutex_destroy(&vhci_targetmap_mutex);
384	cv_destroy(&vhci_cv);
385	mutex_destroy(&vhci_global_mutex);
386	scsi_hba_fini(&modlinkage);
387	ddi_soft_state_fini(&vhci_softstate);
388
389	return (rval);
390}
391
392int
393_info(struct modinfo *modinfop)
394{
395	return (mod_info(&modlinkage, modinfop));
396}
397
398/*
399 * Lookup scsi_failover by "short name" of failover module.
400 */
401struct scsi_failover_ops *
402vhci_failover_ops_by_name(char *name)
403{
404	struct scsi_failover	*sf;
405
406	for (sf = scsi_failover_table; sf->sf_mod; sf++) {
407		if (sf->sf_sfo == NULL)
408			continue;
409		if (strcmp(sf->sf_sfo->sfo_name, name) == 0)
410			return (sf->sf_sfo);
411	}
412	return (NULL);
413}
414
415/*
416 * Load all scsi_failover_ops 'fops' modules.
417 */
418static void
419vhci_failover_modopen(struct scsi_vhci *vhci)
420{
421	char			**module;
422	int			i;
423	struct scsi_failover	*sf;
424	char			**dt;
425	int			e;
426
427	if (scsi_failover_table)
428		return;
429
430	/* Get the list of modules from scsi_vhci.conf */
431	if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY,
432	    vhci->vhci_dip, DDI_PROP_DONTPASS, "ddi-forceload",
433	    &module, &scsi_nfailover) != DDI_PROP_SUCCESS) {
434		cmn_err(CE_WARN, "scsi_vhci: "
435		    "scsi_vhci.conf is missing 'ddi-forceload'");
436		return;
437	}
438	if (scsi_nfailover == 0) {
439		cmn_err(CE_WARN, "scsi_vhci: "
440		    "scsi_vhci.conf has empty 'ddi-forceload'");
441		return;
442	}
443
444	/* allocate failover table based on number of modules */
445	scsi_failover_table = (struct scsi_failover *)
446	    kmem_zalloc(sizeof (struct scsi_failover) * (scsi_nfailover + 1),
447	    KM_SLEEP);
448
449	/* loop over modules specified in scsi_vhci.conf and open each module */
450	for (i = 0, sf = scsi_failover_table; i < scsi_nfailover; i++) {
451		if (module[i] == NULL)
452			continue;
453
454		sf->sf_mod = ddi_modopen(module[i], KRTLD_MODE_FIRST, &e);
455		if (sf->sf_mod == NULL) {
456			/*
457			 * A module returns EEXIST if other software is
458			 * supporting the intended function: for example
459			 * the scsi_vhci_f_sum_emc module returns EEXIST
460			 * from _init if EMC powerpath software is installed.
461			 */
462			if (e != EEXIST)
463				cmn_err(CE_WARN, "scsi_vhci: unable to open "
464				    "module '%s', error %d", module[i], e);
465			continue;
466		}
467		sf->sf_sfo = ddi_modsym(sf->sf_mod,
468		    "scsi_vhci_failover_ops", &e);
469		if (sf->sf_sfo == NULL) {
470			cmn_err(CE_WARN, "scsi_vhci: "
471			    "unable to import 'scsi_failover_ops' from '%s', "
472			    "error %d", module[i], e);
473			(void) ddi_modclose(sf->sf_mod);
474			sf->sf_mod = NULL;
475			continue;
476		}
477
478		/* register vid/pid of devices supported with mpapi */
479		for (dt = sf->sf_sfo->sfo_devices; *dt; dt++)
480			vhci_mpapi_add_dev_prod(vhci, *dt);
481
482		/*
483		 * Special processing for SFO_NAME_TPGS module, which contains
484		 * the `tpgs_set_target_groups` implementation needed by the
485		 * MP-API code.
486		 */
487		if (strcmp(sf->sf_sfo->sfo_name, SFO_NAME_TPGS) == 0) {
488			tpgs_set_target_groups =
489			    (int (*)(struct scsi_address *, int, int))
490			    ddi_modsym(sf->sf_mod, "std_set_target_groups", &e);
491			if (tpgs_set_target_groups == NULL) {
492				cmn_err(CE_WARN, "scsi_vhci: "
493				    "unable to import 'std_set_target_groups' "
494				    "from '%s', error %d", module[i], e);
495			}
496		}
497
498		sf++;
499	}
500
501	/* verify that at least the "well-known" modules were there */
502	if (vhci_failover_ops_by_name(SFO_NAME_SYM) == NULL)
503		cmn_err(CE_WARN, "scsi_vhci: well-known module \""
504		    SFO_NAME_SYM "\" not defined in scsi_vhci.conf's "
505		    "'ddi-forceload'");
506	if (vhci_failover_ops_by_name(SFO_NAME_TPGS) == NULL)
507		cmn_err(CE_WARN, "scsi_vhci: well-known module \""
508		    SFO_NAME_TPGS "\" not defined in scsi_vhci.conf's "
509		    "'ddi-forceload'");
510
511	/* call sfo_init for modules that need it */
512	for (sf = scsi_failover_table; sf->sf_mod; sf++) {
513		if (sf->sf_sfo && sf->sf_sfo->sfo_init)
514			(*sf->sf_sfo->sfo_init)();
515	}
516}
517
518/*
519 * unload all loaded scsi_failover_ops modules
520 */
521static void
522vhci_failover_modclose()
523{
524	struct scsi_failover	*sf;
525
526	for (sf = scsi_failover_table; sf->sf_mod; sf++) {
527		if ((sf->sf_mod == NULL) || (sf->sf_sfo == NULL))
528			continue;
529		(void) ddi_modclose(sf->sf_mod);
530		sf->sf_mod = NULL;
531		sf->sf_sfo = NULL;
532	}
533
534	if (scsi_failover_table && scsi_nfailover)
535		kmem_free(scsi_failover_table,
536		    sizeof (struct scsi_failover) * (scsi_nfailover + 1));
537	scsi_failover_table = NULL;
538	scsi_nfailover = 0;
539}
540
541/* ARGSUSED */
542static int
543vhci_open(dev_t *devp, int flag, int otype, cred_t *credp)
544{
545	struct scsi_vhci	*vhci;
546
547	if (otype != OTYP_CHR) {
548		return (EINVAL);
549	}
550
551	vhci = ddi_get_soft_state(vhci_softstate, MINOR2INST(getminor(*devp)));
552	if (vhci == NULL) {
553		VHCI_DEBUG(1, (CE_NOTE, NULL, "vhci_open: failed ENXIO\n"));
554		return (ENXIO);
555	}
556
557	mutex_enter(&vhci->vhci_mutex);
558	if ((flag & FEXCL) && (vhci->vhci_state & VHCI_STATE_OPEN)) {
559		mutex_exit(&vhci->vhci_mutex);
560		vhci_log(CE_NOTE, vhci->vhci_dip,
561		    "!vhci%d: Already open\n", getminor(*devp));
562		return (EBUSY);
563	}
564
565	vhci->vhci_state |= VHCI_STATE_OPEN;
566	mutex_exit(&vhci->vhci_mutex);
567	return (0);
568}
569
570
571/* ARGSUSED */
572static int
573vhci_close(dev_t dev, int flag, int otype, cred_t *credp)
574{
575	struct scsi_vhci	*vhci;
576
577	if (otype != OTYP_CHR) {
578		return (EINVAL);
579	}
580
581	vhci = ddi_get_soft_state(vhci_softstate, MINOR2INST(getminor(dev)));
582	if (vhci == NULL) {
583		VHCI_DEBUG(1, (CE_NOTE, NULL, "vhci_close: failed ENXIO\n"));
584		return (ENXIO);
585	}
586
587	mutex_enter(&vhci->vhci_mutex);
588	vhci->vhci_state &= ~VHCI_STATE_OPEN;
589	mutex_exit(&vhci->vhci_mutex);
590
591	return (0);
592}
593
594/* ARGSUSED */
595static int
596vhci_ioctl(dev_t dev, int cmd, intptr_t data, int mode,
597	cred_t *credp, int *rval)
598{
599	if (IS_DEVCTL(cmd)) {
600		return (vhci_devctl(dev, cmd, data, mode, credp, rval));
601	} else if (cmd == MP_CMD) {
602		return (vhci_mpapi_ctl(dev, cmd, data, mode, credp, rval));
603	} else {
604		return (vhci_ctl(dev, cmd, data, mode, credp, rval));
605	}
606}
607
608/*
609 * attach the module
610 */
611static int
612vhci_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
613{
614	int			rval = DDI_FAILURE;
615	int			scsi_hba_attached = 0;
616	int			vhci_attached = 0;
617	int			mutex_initted = 0;
618	int			instance;
619	struct scsi_vhci	*vhci;
620	scsi_hba_tran_t		*tran;
621	char			cache_name_buf[64];
622	char			*data;
623
624	VHCI_DEBUG(4, (CE_NOTE, NULL, "vhci_attach: cmd=0x%x\n", cmd));
625
626	instance = ddi_get_instance(dip);
627
628	switch (cmd) {
629	case DDI_ATTACH:
630		break;
631
632	case DDI_RESUME:
633	case DDI_PM_RESUME:
634		VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_attach: resume not yet"
635		    "implemented\n"));
636		return (rval);
637
638	default:
639		VHCI_DEBUG(1, (CE_NOTE, NULL,
640		    "!vhci_attach: unknown ddi command\n"));
641		return (rval);
642	}
643
644	/*
645	 * Allocate vhci data structure.
646	 */
647	if (ddi_soft_state_zalloc(vhci_softstate, instance) != DDI_SUCCESS) {
648		VHCI_DEBUG(1, (CE_NOTE, dip, "!vhci_attach:"
649		    "soft state alloc failed\n"));
650		return (DDI_FAILURE);
651	}
652
653	if ((vhci = ddi_get_soft_state(vhci_softstate, instance)) == NULL) {
654		VHCI_DEBUG(1, (CE_NOTE, dip, "!vhci_attach:"
655		    "bad soft state\n"));
656		ddi_soft_state_free(vhci_softstate, instance);
657		return (DDI_FAILURE);
658	}
659
660	/* Allocate packet cache */
661	(void) snprintf(cache_name_buf, sizeof (cache_name_buf),
662	    "vhci%d_cache", instance);
663
664	mutex_init(&vhci->vhci_mutex, NULL, MUTEX_DRIVER, NULL);
665	mutex_initted++;
666
667	/*
668	 * Allocate a transport structure
669	 */
670	tran = scsi_hba_tran_alloc(dip, SCSI_HBA_CANSLEEP);
671	ASSERT(tran != NULL);
672
673	vhci->vhci_tran		= tran;
674	vhci->vhci_dip		= dip;
675	vhci->vhci_instance	= instance;
676
677	tran->tran_hba_private	= vhci;
678	tran->tran_tgt_private	= NULL;
679	tran->tran_tgt_init	= vhci_scsi_tgt_init;
680	tran->tran_tgt_probe	= NULL;
681	tran->tran_tgt_free	= vhci_scsi_tgt_free;
682
683	tran->tran_start	= vhci_scsi_start;
684	tran->tran_abort	= vhci_scsi_abort;
685	tran->tran_reset	= vhci_scsi_reset;
686	tran->tran_getcap	= vhci_scsi_getcap;
687	tran->tran_setcap	= vhci_scsi_setcap;
688	tran->tran_init_pkt	= vhci_scsi_init_pkt;
689	tran->tran_destroy_pkt	= vhci_scsi_destroy_pkt;
690	tran->tran_dmafree	= vhci_scsi_dmafree;
691	tran->tran_sync_pkt	= vhci_scsi_sync_pkt;
692	tran->tran_reset_notify = vhci_scsi_reset_notify;
693
694	tran->tran_get_bus_addr	= vhci_scsi_get_bus_addr;
695	tran->tran_get_name	= vhci_scsi_get_name;
696	tran->tran_bus_reset	= NULL;
697	tran->tran_quiesce	= NULL;
698	tran->tran_unquiesce	= NULL;
699
700	/*
701	 * register event notification routines with scsa
702	 */
703	tran->tran_get_eventcookie = NULL;
704	tran->tran_add_eventcall = NULL;
705	tran->tran_remove_eventcall = NULL;
706	tran->tran_post_event = NULL;
707
708	tran->tran_bus_power = vhci_scsi_bus_power;
709
710	tran->tran_bus_config = vhci_scsi_bus_config;
711
712	/*
713	 * Attach this instance with the mpxio framework
714	 */
715	if (mdi_vhci_register(MDI_HCI_CLASS_SCSI, dip, &vhci_opinfo, 0)
716	    != MDI_SUCCESS) {
717		VHCI_DEBUG(1, (CE_NOTE, dip, "!vhci_attach:"
718		    "mdi_vhci_register failed\n"));
719		goto attach_fail;
720	}
721	vhci_attached++;
722
723	/*
724	 * Attach this instance of the hba.
725	 *
726	 * Regarding dma attributes: Since scsi_vhci is a virtual scsi HBA
727	 * driver, it has nothing to do with DMA. However, when calling
728	 * scsi_hba_attach_setup() we need to pass something valid in the
729	 * dma attributes parameter. So we just use scsi_alloc_attr.
730	 * SCSA itself seems to care only for dma_attr_minxfer and
731	 * dma_attr_burstsizes fields of dma attributes structure.
732	 * It expects those fileds to be non-zero.
733	 */
734	if (scsi_hba_attach_setup(dip, &scsi_alloc_attr, tran,
735	    SCSI_HBA_TRAN_CLONE) != DDI_SUCCESS) {
736		VHCI_DEBUG(1, (CE_NOTE, dip, "!vhci_attach:"
737		    "hba attach failed\n"));
738		goto attach_fail;
739	}
740	scsi_hba_attached++;
741
742	if (ddi_create_minor_node(dip, "devctl", S_IFCHR,
743	    INST2DEVCTL(instance), DDI_NT_SCSI_NEXUS, 0) != DDI_SUCCESS) {
744		VHCI_DEBUG(1, (CE_NOTE, dip, "!vhci_attach:"
745		    " ddi_create_minor_node failed\n"));
746		goto attach_fail;
747	}
748
749	/*
750	 * Set pm-want-child-notification property for
751	 * power management of the phci and client
752	 */
753	if (ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
754	    "pm-want-child-notification?", NULL, NULL) != DDI_PROP_SUCCESS) {
755		cmn_err(CE_WARN,
756		    "%s%d fail to create pm-want-child-notification? prop",
757		    ddi_driver_name(dip), ddi_get_instance(dip));
758		goto attach_fail;
759	}
760
761	vhci->vhci_taskq = taskq_create("vhci_taskq", 1, MINCLSYSPRI, 1, 4, 0);
762	vhci->vhci_update_pathstates_taskq =
763	    taskq_create("vhci_update_pathstates", VHCI_NUM_UPDATE_TASKQ,
764	    MINCLSYSPRI, 1, 4, 0);
765	ASSERT(vhci->vhci_taskq);
766	ASSERT(vhci->vhci_update_pathstates_taskq);
767
768	/*
769	 * Set appropriate configuration flags based on options set in
770	 * conf file.
771	 */
772	vhci->vhci_conf_flags = 0;
773	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip, PROPFLAGS,
774	    "auto-failback", &data) == DDI_SUCCESS) {
775		if (strcmp(data, "enable") == 0)
776			vhci->vhci_conf_flags |= VHCI_CONF_FLAGS_AUTO_FAILBACK;
777		ddi_prop_free(data);
778	}
779
780	if (!(vhci->vhci_conf_flags & VHCI_CONF_FLAGS_AUTO_FAILBACK))
781		vhci_log(CE_NOTE, dip, "!Auto-failback capability "
782		    "disabled through scsi_vhci.conf file.");
783
784	/*
785	 * Allocate an mpapi private structure
786	 */
787	vhci->mp_priv = kmem_zalloc(sizeof (mpapi_priv_t), KM_SLEEP);
788	if (vhci_mpapi_init(vhci) != 0) {
789		VHCI_DEBUG(1, (CE_WARN, NULL, "!vhci_attach: "
790		    "vhci_mpapi_init() failed"));
791	}
792
793	vhci_failover_modopen(vhci);		/* load failover modules */
794
795	ddi_report_dev(dip);
796	return (DDI_SUCCESS);
797
798attach_fail:
799	if (vhci_attached)
800		(void) mdi_vhci_unregister(dip, 0);
801
802	if (scsi_hba_attached)
803		(void) scsi_hba_detach(dip);
804
805	if (vhci->vhci_tran)
806		scsi_hba_tran_free(vhci->vhci_tran);
807
808	if (mutex_initted) {
809		mutex_destroy(&vhci->vhci_mutex);
810	}
811
812	ddi_soft_state_free(vhci_softstate, instance);
813	return (DDI_FAILURE);
814}
815
816
817/*ARGSUSED*/
818static int
819vhci_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
820{
821	int			instance = ddi_get_instance(dip);
822	scsi_hba_tran_t		*tran;
823	struct scsi_vhci	*vhci;
824
825	VHCI_DEBUG(4, (CE_NOTE, NULL, "vhci_detach: cmd=0x%x\n", cmd));
826
827	if ((tran = ddi_get_driver_private(dip)) == NULL)
828		return (DDI_FAILURE);
829
830	vhci = TRAN2HBAPRIVATE(tran);
831	if (!vhci) {
832		return (DDI_FAILURE);
833	}
834
835	switch (cmd) {
836	case DDI_DETACH:
837		break;
838
839	case DDI_SUSPEND:
840	case DDI_PM_SUSPEND:
841		VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_detach: suspend/pm not yet"
842		    "implemented\n"));
843		return (DDI_FAILURE);
844
845	default:
846		VHCI_DEBUG(1, (CE_NOTE, NULL,
847		    "!vhci_detach: unknown ddi command\n"));
848		return (DDI_FAILURE);
849	}
850
851	(void) mdi_vhci_unregister(dip, 0);
852	(void) scsi_hba_detach(dip);
853	scsi_hba_tran_free(tran);
854
855	if (ddi_prop_remove(DDI_DEV_T_NONE, dip,
856	    "pm-want-child-notification?") != DDI_PROP_SUCCESS) {
857		cmn_err(CE_WARN,
858		    "%s%d unable to remove prop pm-want_child_notification?",
859		    ddi_driver_name(dip), ddi_get_instance(dip));
860	}
861	if (vhci_restart_timeid != 0) {
862		(void) untimeout(vhci_restart_timeid);
863	}
864	vhci_restart_timeid = 0;
865
866	mutex_destroy(&vhci->vhci_mutex);
867	vhci->vhci_dip = NULL;
868	vhci->vhci_tran = NULL;
869	taskq_destroy(vhci->vhci_taskq);
870	taskq_destroy(vhci->vhci_update_pathstates_taskq);
871	ddi_remove_minor_node(dip, NULL);
872	ddi_soft_state_free(vhci_softstate, instance);
873
874	vhci_failover_modclose();		/* unload failover modules */
875	return (DDI_SUCCESS);
876}
877
878/*
879 * vhci_getinfo()
880 * Given the device number, return the devinfo pointer or the
881 * instance number.
882 * Note: always succeed DDI_INFO_DEVT2INSTANCE, even before attach.
883 */
884
885/*ARGSUSED*/
886static int
887vhci_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
888{
889	struct scsi_vhci	*vhcip;
890	int			instance = MINOR2INST(getminor((dev_t)arg));
891
892	switch (cmd) {
893	case DDI_INFO_DEVT2DEVINFO:
894		vhcip = ddi_get_soft_state(vhci_softstate, instance);
895		if (vhcip != NULL)
896			*result = vhcip->vhci_dip;
897		else {
898			*result = NULL;
899			return (DDI_FAILURE);
900		}
901		break;
902
903	case DDI_INFO_DEVT2INSTANCE:
904		*result = (void *)(uintptr_t)instance;
905		break;
906
907	default:
908		return (DDI_FAILURE);
909	}
910
911	return (DDI_SUCCESS);
912}
913
914
915/*ARGSUSED*/
916static int
917vhci_scsi_tgt_init(dev_info_t *hba_dip, dev_info_t *tgt_dip,
918	scsi_hba_tran_t *hba_tran, struct scsi_device *sd)
919{
920	char			*guid;
921	scsi_vhci_lun_t		*vlun;
922	struct scsi_vhci	*vhci;
923	clock_t			from_ticks;
924	mdi_pathinfo_t		*pip;
925	int			rval;
926
927	ASSERT(hba_dip != NULL);
928	ASSERT(tgt_dip != NULL);
929
930	vhci = ddi_get_soft_state(vhci_softstate, ddi_get_instance(hba_dip));
931	ASSERT(vhci != NULL);
932
933	VHCI_DEBUG(4, (CE_NOTE, hba_dip,
934	    "!tgt_init: called for %s (instance %d)\n",
935	    ddi_driver_name(tgt_dip), ddi_get_instance(tgt_dip)));
936
937	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, tgt_dip, PROPFLAGS,
938	    MDI_CLIENT_GUID_PROP, &guid) != DDI_SUCCESS) {
939		/*
940		 * This must be the .conf node.  The ssd node under
941		 * fp already inserts a delay, so we just return from here.
942		 * We rely on this delay to have all dips be posted to
943		 * the ndi hotplug thread's newdev list.  This is
944		 * necessary for the deferred attach mechanism to work
945		 * and opens() done soon after boot to succeed.
946		 */
947		VHCI_DEBUG(4, (CE_WARN, hba_dip, "tgt_init: lun guid "
948		    "property failed"));
949		return (DDI_NOT_WELL_FORMED);
950	}
951
952	vlun = vhci_lun_lookup(tgt_dip);
953
954	mutex_enter(&vhci_global_mutex);
955
956	from_ticks = ddi_get_lbolt();
957	if (vhci_to_ticks == 0) {
958		vhci_to_ticks = from_ticks +
959		    drv_usectohz(vhci_init_wait_timeout);
960	}
961
962#if DEBUG
963	if (vlun) {
964		VHCI_DEBUG(1, (CE_WARN, hba_dip, "tgt_init: "
965		    "vhci_scsi_tgt_init: guid %s : found vlun 0x%p "
966		    "from_ticks %lx to_ticks %lx",
967		    guid, (void *)vlun, from_ticks, vhci_to_ticks));
968	} else {
969		VHCI_DEBUG(1, (CE_WARN, hba_dip, "tgt_init: "
970		    "vhci_scsi_tgt_init: guid %s : vlun not found "
971		    "from_ticks %lx to_ticks %lx", guid, from_ticks,
972		    vhci_to_ticks));
973	}
974#endif
975
976	rval = mdi_select_path(tgt_dip, NULL,
977	    (MDI_SELECT_ONLINE_PATH | MDI_SELECT_STANDBY_PATH), NULL, &pip);
978	if (rval == MDI_SUCCESS) {
979		mdi_rele_path(pip);
980	}
981
982	/*
983	 * Wait for the following conditions :
984	 *	1. no vlun available yet
985	 *	2. no path established
986	 *	3. timer did not expire
987	 */
988	while ((vlun == NULL) || (mdi_client_get_path_count(tgt_dip) == 0) ||
989	    (rval != MDI_SUCCESS)) {
990		if (vlun && vlun->svl_not_supported) {
991			VHCI_DEBUG(1, (CE_WARN, hba_dip, "tgt_init: "
992			    "vlun 0x%p lun guid %s not supported!",
993			    (void *)vlun, guid));
994			mutex_exit(&vhci_global_mutex);
995			ddi_prop_free(guid);
996			return (DDI_NOT_WELL_FORMED);
997		}
998		if ((vhci_first_time == 0) && (from_ticks >= vhci_to_ticks)) {
999			vhci_first_time = 1;
1000		}
1001		if (vhci_first_time == 1) {
1002			VHCI_DEBUG(1, (CE_WARN, hba_dip, "vhci_scsi_tgt_init: "
1003			    "no wait for %s. from_tick %lx, to_tick %lx",
1004			    guid, from_ticks, vhci_to_ticks));
1005			mutex_exit(&vhci_global_mutex);
1006			ddi_prop_free(guid);
1007			return (DDI_NOT_WELL_FORMED);
1008		}
1009
1010		if (cv_timedwait(&vhci_cv,
1011		    &vhci_global_mutex, vhci_to_ticks) == -1) {
1012			/* Timed out */
1013#ifdef DEBUG
1014			if (vlun == NULL) {
1015				VHCI_DEBUG(1, (CE_WARN, hba_dip,
1016				    "tgt_init: no vlun for %s!", guid));
1017			} else if (mdi_client_get_path_count(tgt_dip) == 0) {
1018				VHCI_DEBUG(1, (CE_WARN, hba_dip,
1019				    "tgt_init: client path count is "
1020				    "zero for %s!", guid));
1021			} else {
1022				VHCI_DEBUG(1, (CE_WARN, hba_dip,
1023				    "tgt_init: client path not "
1024				    "available yet for %s!", guid));
1025			}
1026#endif /* DEBUG */
1027			mutex_exit(&vhci_global_mutex);
1028			ddi_prop_free(guid);
1029			return (DDI_NOT_WELL_FORMED);
1030		}
1031		vlun = vhci_lun_lookup(tgt_dip);
1032		rval = mdi_select_path(tgt_dip, NULL,
1033		    (MDI_SELECT_ONLINE_PATH | MDI_SELECT_STANDBY_PATH),
1034		    NULL, &pip);
1035		if (rval == MDI_SUCCESS) {
1036			mdi_rele_path(pip);
1037		}
1038		from_ticks = ddi_get_lbolt();
1039	}
1040	mutex_exit(&vhci_global_mutex);
1041
1042	ASSERT(vlun != NULL);
1043	ddi_prop_free(guid);
1044	hba_tran->tran_tgt_private = vlun;
1045
1046	return (DDI_SUCCESS);
1047}
1048
1049/*ARGSUSED*/
1050static void
1051vhci_scsi_tgt_free(dev_info_t *hba_dip, dev_info_t *tgt_dip,
1052	scsi_hba_tran_t *hba_tran, struct scsi_device *sd)
1053{
1054}
1055
1056/*
1057 * a PGR register command has started; copy the info we need
1058 */
1059int
1060vhci_pgr_register_start(scsi_vhci_lun_t *vlun, struct scsi_pkt *pkt)
1061{
1062	struct vhci_pkt		*vpkt = TGTPKT2VHCIPKT(pkt);
1063	void			*addr;
1064
1065	if (!vpkt->vpkt_tgt_init_bp)
1066		return (TRAN_BADPKT);
1067
1068	addr = bp_mapin_common(vpkt->vpkt_tgt_init_bp,
1069	    (vpkt->vpkt_flags & CFLAG_NOWAIT) ? VM_NOSLEEP : VM_SLEEP);
1070	if (addr == NULL)
1071		return (TRAN_BUSY);
1072
1073	mutex_enter(&vlun->svl_mutex);
1074
1075	vhci_print_prout_keys(vlun, "v_pgr_reg_start: before bcopy:");
1076
1077	bcopy(addr, &vlun->svl_prout, sizeof (vhci_prout_t) -
1078	    (2 * MHIOC_RESV_KEY_SIZE*sizeof (char)));
1079	bcopy(pkt->pkt_cdbp, vlun->svl_cdb, sizeof (vlun->svl_cdb));
1080
1081	vhci_print_prout_keys(vlun, "v_pgr_reg_start: after bcopy:");
1082
1083	vlun->svl_time = pkt->pkt_time;
1084	vlun->svl_bcount = vpkt->vpkt_tgt_init_bp->b_bcount;
1085	vlun->svl_first_path = vpkt->vpkt_path;
1086	mutex_exit(&vlun->svl_mutex);
1087	return (0);
1088}
1089
1090/*
1091 * Function name : vhci_scsi_start()
1092 *
1093 * Return Values : TRAN_FATAL_ERROR	- vhci has been shutdown
1094 *					  or other fatal failure
1095 *					  preventing packet transportation
1096 *		   TRAN_BUSY		- request queue is full
1097 *		   TRAN_ACCEPT		- pkt has been submitted to phci
1098 *					  (or is held in the waitQ)
1099 * Description	 : Implements SCSA's tran_start() entry point for
1100 *		   packet transport
1101 *
1102 */
1103static int
1104vhci_scsi_start(struct scsi_address *ap, struct scsi_pkt *pkt)
1105{
1106	int			rval = TRAN_ACCEPT;
1107	int			instance, held;
1108	struct scsi_vhci	*vhci = ADDR2VHCI(ap);
1109	struct scsi_vhci_lun	*vlun = ADDR2VLUN(ap);
1110	struct vhci_pkt		*vpkt = TGTPKT2VHCIPKT(pkt);
1111	int			flags = 0;
1112	scsi_vhci_priv_t	*svp;
1113	dev_info_t 		*cdip;
1114	client_lb_t		lbp;
1115	int			restore_lbp = 0;
1116	/* set if pkt is SCSI-II RESERVE cmd */
1117	int			pkt_reserve_cmd = 0;
1118	int			reserve_failed = 0;
1119
1120	ASSERT(vhci != NULL);
1121	ASSERT(vpkt != NULL);
1122	ASSERT(vpkt->vpkt_state != VHCI_PKT_ISSUED);
1123	cdip = ADDR2DIP(ap);
1124
1125	/*
1126	 * Block IOs if LUN is held or QUIESCED for IOs.
1127	 */
1128	if ((VHCI_LUN_IS_HELD(vlun)) ||
1129	    ((vlun->svl_flags & VLUN_QUIESCED_FLG) == VLUN_QUIESCED_FLG)) {
1130		return (TRAN_BUSY);
1131	}
1132
1133	/*
1134	 * vhci_lun needs to be quiesced before SCSI-II RESERVE command
1135	 * can be issued.  This may require a cv_timedwait, which is
1136	 * dangerous to perform in an interrupt context.  So if this
1137	 * is a RESERVE command a taskq is dispatched to service it.
1138	 * This taskq shall again call vhci_scsi_start, but we shall be
1139	 * sure its not in an interrupt context.
1140	 */
1141	if ((pkt->pkt_cdbp[0] == SCMD_RESERVE) ||
1142	    (pkt->pkt_cdbp[0] == SCMD_RESERVE_G1)) {
1143		if (!(vpkt->vpkt_state & VHCI_PKT_THRU_TASKQ)) {
1144			if (taskq_dispatch(vhci->vhci_taskq,
1145			    vhci_dispatch_scsi_start, (void *) vpkt,
1146			    KM_NOSLEEP)) {
1147				return (TRAN_ACCEPT);
1148			} else {
1149				return (TRAN_BUSY);
1150			}
1151		}
1152
1153		/*
1154		 * Here we ensure that simultaneous SCSI-II RESERVE cmds don't
1155		 * get serviced for a lun.
1156		 */
1157		VHCI_HOLD_LUN(vlun, VH_NOSLEEP, held);
1158		if (!held) {
1159			return (TRAN_BUSY);
1160		} else if ((vlun->svl_flags & VLUN_QUIESCED_FLG) ==
1161		    VLUN_QUIESCED_FLG) {
1162			VHCI_RELEASE_LUN(vlun);
1163			return (TRAN_BUSY);
1164		}
1165
1166		/*
1167		 * To ensure that no IOs occur for this LUN for the duration
1168		 * of this pkt set the VLUN_QUIESCED_FLG.
1169		 * In case this routine needs to exit on error make sure that
1170		 * this flag is cleared.
1171		 */
1172		vlun->svl_flags |= VLUN_QUIESCED_FLG;
1173		pkt_reserve_cmd = 1;
1174
1175		/*
1176		 * if this is a SCSI-II RESERVE command, set load balancing
1177		 * policy to be ALTERNATE PATH to ensure that all subsequent
1178		 * IOs are routed on the same path.  This is because if commands
1179		 * are routed across multiple paths then IOs on paths other than
1180		 * the one on which the RESERVE was executed will get a
1181		 * RESERVATION CONFLICT
1182		 */
1183		lbp = mdi_get_lb_policy(cdip);
1184		if (lbp != LOAD_BALANCE_NONE) {
1185			if (vhci_quiesce_lun(vlun) != 1) {
1186				vlun->svl_flags &= ~VLUN_QUIESCED_FLG;
1187				VHCI_RELEASE_LUN(vlun);
1188				return (TRAN_FATAL_ERROR);
1189			}
1190			vlun->svl_lb_policy_save = lbp;
1191			if (mdi_set_lb_policy(cdip, LOAD_BALANCE_NONE) !=
1192			    MDI_SUCCESS) {
1193				vlun->svl_flags &= ~VLUN_QUIESCED_FLG;
1194				VHCI_RELEASE_LUN(vlun);
1195				return (TRAN_FATAL_ERROR);
1196			}
1197			restore_lbp = 1;
1198		}
1199		/*
1200		 * See comments for VLUN_RESERVE_ACTIVE_FLG in scsi_vhci.h
1201		 * To narrow this window where a reserve command may be sent
1202		 * down an inactive path the path states first need to be
1203		 * updated. Before calling vhci_update_pathstates reset
1204		 * VLUN_RESERVE_ACTIVE_FLG, just in case it was already set
1205		 * for this lun.  This shall prevent an unnecessary reset
1206		 * from being sent out.
1207		 */
1208		vlun->svl_flags &= ~VLUN_RESERVE_ACTIVE_FLG;
1209		vhci_update_pathstates((void *)vlun);
1210	}
1211
1212	instance = ddi_get_instance(vhci->vhci_dip);
1213
1214	/*
1215	 * If the command is PRIN with action of zero, then the cmd
1216	 * is reading PR keys which requires filtering on completion.
1217	 * Data cache sync must be guaranteed.
1218	 */
1219	if ((pkt->pkt_cdbp[0] == SCMD_PRIN) &&
1220	    (pkt->pkt_cdbp[1] == 0) &&
1221	    (vpkt->vpkt_org_vpkt == NULL)) {
1222		vpkt->vpkt_tgt_init_pkt_flags |= PKT_CONSISTENT;
1223	}
1224
1225	/*
1226	 * Do not defer bind for PKT_DMA_PARTIAL
1227	 */
1228	if ((vpkt->vpkt_flags & CFLAG_DMA_PARTIAL) == 0) {
1229
1230		/* This is a non pkt_dma_partial case */
1231		if ((rval = vhci_bind_transport(
1232		    ap, vpkt, vpkt->vpkt_tgt_init_pkt_flags, NULL_FUNC))
1233		    != TRAN_ACCEPT) {
1234			VHCI_DEBUG(6, (CE_WARN, vhci->vhci_dip,
1235			    "!vhci%d %x: failed to bind transport: "
1236			    "vlun 0x%p pkt_reserved %x restore_lbp %x,"
1237			    "lbp %x", instance, rval, (void *)vlun,
1238			    pkt_reserve_cmd, restore_lbp, lbp));
1239			if (restore_lbp)
1240				(void) mdi_set_lb_policy(cdip, lbp);
1241			if (pkt_reserve_cmd)
1242				vlun->svl_flags &= ~VLUN_QUIESCED_FLG;
1243			return (rval);
1244		}
1245		VHCI_DEBUG(8, (CE_NOTE, NULL,
1246		    "vhci_scsi_start: v_b_t called 0x%p\n", (void *)vpkt));
1247	}
1248	ASSERT(vpkt->vpkt_hba_pkt != NULL);
1249	ASSERT(vpkt->vpkt_path != NULL);
1250
1251	/*
1252	 * This is the chance to adjust the pHCI's pkt and other information
1253	 * from target driver's pkt.
1254	 */
1255	VHCI_DEBUG(8, (CE_NOTE, vhci->vhci_dip, "vhci_scsi_start vpkt %p\n",
1256	    (void *)vpkt));
1257	vhci_update_pHCI_pkt(vpkt, pkt);
1258
1259	if (vlun->svl_flags & VLUN_RESERVE_ACTIVE_FLG) {
1260		if (vpkt->vpkt_path != vlun->svl_resrv_pip) {
1261			VHCI_DEBUG(1, (CE_WARN, vhci->vhci_dip,
1262			    "!vhci_bind: reserve flag set for vlun 0x%p, but, "
1263			    "pktpath 0x%p resrv path 0x%p differ. lb_policy %x",
1264			    (void *)vlun, (void *)vpkt->vpkt_path,
1265			    (void *)vlun->svl_resrv_pip,
1266			    mdi_get_lb_policy(cdip)));
1267			reserve_failed = 1;
1268		}
1269	}
1270
1271	svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(
1272	    vpkt->vpkt_path);
1273	if (svp == NULL || reserve_failed) {
1274		if (pkt_reserve_cmd) {
1275			VHCI_DEBUG(6, (CE_WARN, vhci->vhci_dip,
1276			    "!vhci_bind returned null svp vlun 0x%p",
1277			    (void *)vlun));
1278			vlun->svl_flags &= ~VLUN_QUIESCED_FLG;
1279			if (restore_lbp)
1280				(void) mdi_set_lb_policy(cdip, lbp);
1281		}
1282pkt_cleanup:
1283		if ((vpkt->vpkt_flags & CFLAG_DMA_PARTIAL) == 0) {
1284			scsi_destroy_pkt(vpkt->vpkt_hba_pkt);
1285			vpkt->vpkt_hba_pkt = NULL;
1286			if (vpkt->vpkt_path) {
1287				mdi_rele_path(vpkt->vpkt_path);
1288				vpkt->vpkt_path = NULL;
1289			}
1290		}
1291		if ((pkt->pkt_cdbp[0] == SCMD_PROUT) &&
1292		    (((pkt->pkt_cdbp[1] & 0x1f) == VHCI_PROUT_REGISTER) ||
1293		    ((pkt->pkt_cdbp[1] & 0x1f) ==
1294		    VHCI_PROUT_R_AND_IGNORE))) {
1295			sema_v(&vlun->svl_pgr_sema);
1296		}
1297		return (TRAN_BUSY);
1298	}
1299
1300	VHCI_INCR_PATH_CMDCOUNT(svp);
1301
1302	/*
1303	 * Ensure that no other IOs raced ahead, while a RESERVE cmd was
1304	 * QUIESCING the same lun.
1305	 */
1306	if ((!pkt_reserve_cmd) &&
1307	    ((vlun->svl_flags & VLUN_QUIESCED_FLG) == VLUN_QUIESCED_FLG)) {
1308		VHCI_DECR_PATH_CMDCOUNT(svp);
1309		goto pkt_cleanup;
1310	}
1311
1312	if ((pkt->pkt_cdbp[0] == SCMD_PRIN) ||
1313	    (pkt->pkt_cdbp[0] == SCMD_PROUT)) {
1314		/*
1315		 * currently this thread only handles running PGR
1316		 * commands, so don't bother creating it unless
1317		 * something interesting is going to happen (like
1318		 * either a PGR out, or a PGR in with enough space
1319		 * to hold the keys that are getting returned)
1320		 */
1321		mutex_enter(&vlun->svl_mutex);
1322		if (((vlun->svl_flags & VLUN_TASK_D_ALIVE_FLG) == 0) &&
1323		    (pkt->pkt_cdbp[0] == SCMD_PROUT)) {
1324			vlun->svl_taskq = taskq_create("vlun_pgr_task_daemon",
1325			    1, MINCLSYSPRI, 1, 4, 0);
1326			vlun->svl_flags |= VLUN_TASK_D_ALIVE_FLG;
1327		}
1328		mutex_exit(&vlun->svl_mutex);
1329		if ((pkt->pkt_cdbp[0] == SCMD_PROUT) &&
1330		    (((pkt->pkt_cdbp[1] & 0x1f) == VHCI_PROUT_REGISTER) ||
1331		    ((pkt->pkt_cdbp[1] & 0x1f) ==
1332		    VHCI_PROUT_R_AND_IGNORE))) {
1333			if (rval = vhci_pgr_register_start(vlun, pkt)) {
1334				/* an error */
1335				sema_v(&vlun->svl_pgr_sema);
1336				return (rval);
1337			}
1338		}
1339	}
1340
1341	/*
1342	 * SCSI-II RESERVE cmd is not expected in polled mode.
1343	 * If this changes it needs to be handled for the polled scenario.
1344	 */
1345	flags = vpkt->vpkt_hba_pkt->pkt_flags;
1346	rval = scsi_transport(vpkt->vpkt_hba_pkt);
1347	if (rval == TRAN_ACCEPT) {
1348		if (flags & FLAG_NOINTR) {
1349			struct scsi_pkt *tpkt = vpkt->vpkt_tgt_pkt;
1350			struct scsi_pkt *pkt = vpkt->vpkt_hba_pkt;
1351
1352			ASSERT(tpkt != NULL);
1353			*(tpkt->pkt_scbp) = *(pkt->pkt_scbp);
1354			tpkt->pkt_resid = pkt->pkt_resid;
1355			tpkt->pkt_state = pkt->pkt_state;
1356			tpkt->pkt_statistics = pkt->pkt_statistics;
1357			tpkt->pkt_reason = pkt->pkt_reason;
1358
1359			if ((*(pkt->pkt_scbp) == STATUS_CHECK) &&
1360			    (pkt->pkt_state & STATE_ARQ_DONE)) {
1361				bcopy(pkt->pkt_scbp, tpkt->pkt_scbp,
1362				    vpkt->vpkt_tgt_init_scblen);
1363			}
1364
1365			VHCI_DECR_PATH_CMDCOUNT(svp);
1366			if ((vpkt->vpkt_flags & CFLAG_DMA_PARTIAL) == 0) {
1367				scsi_destroy_pkt(vpkt->vpkt_hba_pkt);
1368				vpkt->vpkt_hba_pkt = NULL;
1369				if (vpkt->vpkt_path) {
1370					mdi_rele_path(vpkt->vpkt_path);
1371					vpkt->vpkt_path = NULL;
1372				}
1373			}
1374			/*
1375			 * This path will not automatically retry pkts
1376			 * internally, therefore, vpkt_org_vpkt should
1377			 * never be set.
1378			 */
1379			ASSERT(vpkt->vpkt_org_vpkt == NULL);
1380			if (tpkt->pkt_comp) {
1381				(*tpkt->pkt_comp)(tpkt);
1382			}
1383		}
1384		return (rval);
1385	} else if ((pkt->pkt_cdbp[0] == SCMD_PROUT) &&
1386	    (((pkt->pkt_cdbp[1] & 0x1f) == VHCI_PROUT_REGISTER) ||
1387	    ((pkt->pkt_cdbp[1] & 0x1f) == VHCI_PROUT_R_AND_IGNORE))) {
1388		/* the command exited with bad status */
1389		sema_v(&vlun->svl_pgr_sema);
1390	} else if (vpkt->vpkt_tgt_pkt->pkt_cdbp[0] == SCMD_PRIN) {
1391		/* the command exited with bad status */
1392		sema_v(&vlun->svl_pgr_sema);
1393	} else if (pkt_reserve_cmd) {
1394		VHCI_DEBUG(6, (CE_WARN, vhci->vhci_dip,
1395		    "!vhci_scsi_start: reserve failed vlun 0x%p",
1396		    (void *)vlun));
1397		vlun->svl_flags &= ~VLUN_QUIESCED_FLG;
1398		if (restore_lbp)
1399			(void) mdi_set_lb_policy(cdip, lbp);
1400	}
1401
1402	ASSERT(vpkt->vpkt_hba_pkt != NULL);
1403	VHCI_DECR_PATH_CMDCOUNT(svp);
1404
1405	/* Do not destroy phci packet information for PKT_DMA_PARTIAL */
1406	if ((vpkt->vpkt_flags & CFLAG_DMA_PARTIAL) == 0) {
1407		scsi_destroy_pkt(vpkt->vpkt_hba_pkt);
1408		vpkt->vpkt_hba_pkt = NULL;
1409		if (vpkt->vpkt_path) {
1410			MDI_PI_ERRSTAT(vpkt->vpkt_path, MDI_PI_TRANSERR);
1411			mdi_rele_path(vpkt->vpkt_path);
1412			vpkt->vpkt_path = NULL;
1413		}
1414	}
1415	return (TRAN_BUSY);
1416}
1417
1418/*
1419 * Function name : vhci_scsi_reset()
1420 *
1421 * Return Values : 0 - reset failed
1422 *		   1 - reset succeeded
1423 */
1424
1425/* ARGSUSED */
1426static int
1427vhci_scsi_reset(struct scsi_address *ap, int level)
1428{
1429	int rval = 0;
1430
1431	cmn_err(CE_WARN, "!vhci_scsi_reset 0x%x", level);
1432	if ((level == RESET_TARGET) || (level == RESET_LUN)) {
1433		return (vhci_scsi_reset_target(ap, level, TRUE));
1434	} else if (level == RESET_ALL) {
1435		return (vhci_scsi_reset_bus(ap));
1436	}
1437
1438	return (rval);
1439}
1440
1441/*
1442 * vhci_recovery_reset:
1443 *	Issues reset to the device
1444 * Input:
1445 *	vlun - vhci lun pointer of the device
1446 *	ap - address of the device
1447 *	select_path:
1448 *		If select_path is FALSE, then the address specified in ap is
1449 *		the path on which reset will be issued.
1450 *		If select_path is TRUE, then path is obtained by calling
1451 *		mdi_select_path.
1452 *
1453 *	recovery_depth:
1454 *		Caller can specify the level of reset.
1455 *		VHCI_DEPTH_LUN -
1456 *			Issues LUN RESET if device supports lun reset.
1457 *		VHCI_DEPTH_TARGET -
1458 *			If Lun Reset fails or the device does not support
1459 *			Lun Reset, issues TARGET RESET
1460 *		VHCI_DEPTH_ALL -
1461 *			If Lun Reset fails or the device does not support
1462 *			Lun Reset, issues TARGET RESET.
1463 *			If TARGET RESET does not succeed, issues Bus Reset.
1464 */
1465
1466static int
1467vhci_recovery_reset(scsi_vhci_lun_t *vlun, struct scsi_address *ap,
1468	uint8_t select_path, uint8_t recovery_depth)
1469{
1470	int	ret = 0;
1471
1472	ASSERT(ap != NULL);
1473
1474	if (vlun && vlun->svl_support_lun_reset == 1) {
1475		ret = vhci_scsi_reset_target(ap, RESET_LUN,
1476		    select_path);
1477	}
1478
1479	recovery_depth--;
1480
1481	if ((ret == 0) && recovery_depth) {
1482		ret = vhci_scsi_reset_target(ap, RESET_TARGET,
1483		    select_path);
1484		recovery_depth--;
1485	}
1486
1487	if ((ret == 0) && recovery_depth) {
1488		(void) scsi_reset(ap, RESET_ALL);
1489	}
1490
1491	return (ret);
1492}
1493
1494/*
1495 * Note: The scsi_address passed to this routine could be the scsi_address
1496 * for the virtual device or the physical device. No assumptions should be
1497 * made in this routine about the ap structure and a_hba_tran->tran_tgt_private
1498 * field of ap can not be assumed to be the vhci structure.
1499 * Further note that the child dip would be the dip of the ssd node irrespective
1500 * of the scsi_address passed.
1501 */
1502
1503static int
1504vhci_scsi_reset_target(struct scsi_address *ap, int level, uint8_t select_path)
1505{
1506	dev_info_t		*vdip, *pdip, *cdip = ADDR2DIP(ap);
1507	mdi_pathinfo_t		*pip = NULL;
1508	mdi_pathinfo_t		*npip = NULL;
1509	int			rval = -1;
1510	scsi_vhci_priv_t	*svp = NULL;
1511	struct scsi_address	*pap = NULL;
1512	scsi_hba_tran_t		*hba = NULL;
1513	int			sps;
1514	struct scsi_vhci	*vhci = NULL;
1515
1516	if (select_path != TRUE) {
1517		ASSERT(ap != NULL);
1518		if (level == RESET_LUN) {
1519			hba = ap->a_hba_tran;
1520			ASSERT(hba != NULL);
1521			return ((*hba->tran_reset)(ap, RESET_LUN));
1522		}
1523		return (scsi_reset(ap, level));
1524	}
1525
1526	ASSERT(cdip != NULL);
1527	vdip = ddi_get_parent(cdip);
1528	ASSERT(vdip != NULL);
1529	vhci = ddi_get_soft_state(vhci_softstate, ddi_get_instance(vdip));
1530	ASSERT(vhci != NULL);
1531
1532	rval = mdi_select_path(cdip, NULL, MDI_SELECT_ONLINE_PATH, NULL, &pip);
1533	if ((rval != MDI_SUCCESS) || (pip == NULL)) {
1534		VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_scsi_reset_target: "
1535		    "Unable to get a path, dip 0x%p", (void *)cdip));
1536		return (0);
1537	}
1538again:
1539	svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(pip);
1540	if (svp == NULL) {
1541		VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_scsi_reset_target: "
1542		    "priv is NULL, pip 0x%p", (void *)pip));
1543		mdi_rele_path(pip);
1544		return (0);
1545	}
1546
1547	if (svp->svp_psd == NULL) {
1548		VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_scsi_reset_target: "
1549		    "psd is NULL, pip 0x%p, svp 0x%p",
1550		    (void *)pip, (void *)svp));
1551		mdi_rele_path(pip);
1552		return (0);
1553	}
1554
1555	pap = &svp->svp_psd->sd_address;
1556	hba = pap->a_hba_tran;
1557
1558	ASSERT(pap != NULL);
1559	ASSERT(hba != NULL);
1560
1561	if (hba->tran_reset != NULL) {
1562		if ((*hba->tran_reset)(pap, level) == 0) {
1563			pdip = mdi_pi_get_phci(pip);
1564			vhci_log(CE_WARN, vdip, "!(%s%d):"
1565			    " path (%s%d), reset %d failed",
1566			    ddi_driver_name(cdip), ddi_get_instance(cdip),
1567			    ddi_driver_name(pdip), ddi_get_instance(pdip),
1568			    level);
1569
1570			/*
1571			 * Select next path and issue the reset, repeat
1572			 * until all paths are exhausted
1573			 */
1574			sps = mdi_select_path(cdip, NULL,
1575			    MDI_SELECT_ONLINE_PATH, pip, &npip);
1576			if ((sps != MDI_SUCCESS) || (npip == NULL)) {
1577				mdi_rele_path(pip);
1578				return (0);
1579			}
1580			mdi_rele_path(pip);
1581			pip = npip;
1582			goto again;
1583		}
1584		mdi_rele_path(pip);
1585		mutex_enter(&vhci->vhci_mutex);
1586		scsi_hba_reset_notify_callback(&vhci->vhci_mutex,
1587		    &vhci->vhci_reset_notify_listf);
1588		mutex_exit(&vhci->vhci_mutex);
1589		VHCI_DEBUG(6, (CE_NOTE, NULL, "!vhci_scsi_reset_target: "
1590		    "reset %d sent down pip:%p for cdip:%p\n", level,
1591		    (void *)pip, (void *)cdip));
1592		return (1);
1593	}
1594	mdi_rele_path(pip);
1595	return (0);
1596}
1597
1598
1599/* ARGSUSED */
1600static int
1601vhci_scsi_reset_bus(struct scsi_address *ap)
1602{
1603	return (1);
1604}
1605
1606
1607/*
1608 * called by vhci_getcap and vhci_setcap to get and set (respectively)
1609 * SCSI capabilities
1610 */
1611/* ARGSUSED */
1612static int
1613vhci_commoncap(struct scsi_address *ap, char *cap,
1614    int val, int tgtonly, int doset)
1615{
1616	struct scsi_vhci		*vhci = ADDR2VHCI(ap);
1617	struct scsi_vhci_lun		*vlun = ADDR2VLUN(ap);
1618	int			cidx;
1619	int			rval = 0;
1620
1621	if (cap == (char *)0) {
1622		VHCI_DEBUG(3, (CE_WARN, vhci->vhci_dip,
1623		    "!vhci_commoncap: invalid arg"));
1624		return (rval);
1625	}
1626
1627	if ((cidx = scsi_hba_lookup_capstr(cap)) == -1) {
1628		return (UNDEFINED);
1629	}
1630
1631	/*
1632	 * Process setcap request.
1633	 */
1634	if (doset) {
1635		/*
1636		 * At present, we can only set binary (0/1) values
1637		 */
1638		switch (cidx) {
1639		case SCSI_CAP_ARQ:
1640			if (val == 0) {
1641				rval = 0;
1642			} else {
1643				rval = 1;
1644			}
1645			break;
1646
1647		case SCSI_CAP_LUN_RESET:
1648			if (tgtonly == 0) {
1649				VHCI_DEBUG(1, (CE_WARN, vhci->vhci_dip,
1650				    "scsi_vhci_setcap: "
1651				    "Returning error since whom = 0"));
1652				rval = -1;
1653				break;
1654			}
1655			/*
1656			 * Set the capability accordingly.
1657			 */
1658			mutex_enter(&vlun->svl_mutex);
1659			vlun->svl_support_lun_reset = val;
1660			rval = val;
1661			mutex_exit(&vlun->svl_mutex);
1662			break;
1663
1664		case SCSI_CAP_SECTOR_SIZE:
1665			mutex_enter(&vlun->svl_mutex);
1666			vlun->svl_sector_size = val;
1667			vlun->svl_setcap_done = 1;
1668			mutex_exit(&vlun->svl_mutex);
1669			(void) vhci_pHCI_cap(ap, cap, val, tgtonly, NULL);
1670
1671			/* Always return success */
1672			rval = 1;
1673			break;
1674
1675		default:
1676			VHCI_DEBUG(6, (CE_WARN, vhci->vhci_dip,
1677			    "!vhci_setcap: unsupported %d", cidx));
1678			rval = UNDEFINED;
1679			break;
1680		}
1681
1682		VHCI_DEBUG(6, (CE_NOTE, vhci->vhci_dip,
1683		    "!set cap: cap=%s, val/tgtonly/doset/rval = "
1684		    "0x%x/0x%x/0x%x/%d\n",
1685		    cap, val, tgtonly, doset, rval));
1686
1687	} else {
1688		/*
1689		 * Process getcap request.
1690		 */
1691		switch (cidx) {
1692		case SCSI_CAP_DMA_MAX:
1693			rval = (int)VHCI_DMA_MAX_XFER_CAP;
1694			break;
1695
1696		case SCSI_CAP_INITIATOR_ID:
1697			rval = 0x00;
1698			break;
1699
1700		case SCSI_CAP_ARQ:
1701		case SCSI_CAP_RESET_NOTIFICATION:
1702		case SCSI_CAP_TAGGED_QING:
1703			rval = 1;
1704			break;
1705
1706		case SCSI_CAP_SCSI_VERSION:
1707			rval = 3;
1708			break;
1709
1710		case SCSI_CAP_INTERCONNECT_TYPE:
1711			rval = INTERCONNECT_FABRIC;
1712			break;
1713
1714		case SCSI_CAP_LUN_RESET:
1715			/*
1716			 * scsi_vhci will always return success for LUN reset.
1717			 * When request for doing LUN reset comes
1718			 * through scsi_reset entry point, at that time attempt
1719			 * will be made to do reset through all the possible
1720			 * paths.
1721			 */
1722			mutex_enter(&vlun->svl_mutex);
1723			rval = vlun->svl_support_lun_reset;
1724			mutex_exit(&vlun->svl_mutex);
1725			VHCI_DEBUG(4, (CE_WARN, vhci->vhci_dip,
1726			    "scsi_vhci_getcap:"
1727			    "Getting the Lun reset capability %d", rval));
1728			break;
1729
1730		case SCSI_CAP_SECTOR_SIZE:
1731			mutex_enter(&vlun->svl_mutex);
1732			rval = vlun->svl_sector_size;
1733			mutex_exit(&vlun->svl_mutex);
1734			break;
1735
1736		default:
1737			VHCI_DEBUG(6, (CE_WARN, vhci->vhci_dip,
1738			    "!vhci_getcap: unsupported %d", cidx));
1739			rval = UNDEFINED;
1740			break;
1741		}
1742
1743		VHCI_DEBUG(6, (CE_NOTE, vhci->vhci_dip,
1744		    "!get cap: cap=%s, val/tgtonly/doset/rval = "
1745		    "0x%x/0x%x/0x%x/%d\n",
1746		    cap, val, tgtonly, doset, rval));
1747	}
1748	return (rval);
1749}
1750
1751
1752/*
1753 * Function name : vhci_scsi_getcap()
1754 *
1755 */
1756static int
1757vhci_scsi_getcap(struct scsi_address *ap, char *cap, int whom)
1758{
1759	return (vhci_commoncap(ap, cap, 0, whom, 0));
1760}
1761
1762static int
1763vhci_scsi_setcap(struct scsi_address *ap, char *cap, int value, int whom)
1764{
1765	return (vhci_commoncap(ap, cap, value, whom, 1));
1766}
1767
1768/*
1769 * Function name : vhci_scsi_abort()
1770 */
1771/* ARGSUSED */
1772static int
1773vhci_scsi_abort(struct scsi_address *ap, struct scsi_pkt *pkt)
1774{
1775	return (0);
1776}
1777
1778/*
1779 * Function name : vhci_scsi_init_pkt
1780 *
1781 * Return Values : pointer to scsi_pkt, or NULL
1782 */
1783/* ARGSUSED */
1784static struct scsi_pkt *
1785vhci_scsi_init_pkt(struct scsi_address *ap, struct scsi_pkt *pkt,
1786	struct buf *bp, int cmdlen, int statuslen, int tgtlen,
1787	int flags, int (*callback)(caddr_t), caddr_t arg)
1788{
1789	struct scsi_vhci	*vhci = ADDR2VHCI(ap);
1790	struct vhci_pkt		*vpkt;
1791	int			rval;
1792	int			newpkt = 0;
1793	struct scsi_pkt		*pktp;
1794
1795
1796	if (pkt == NULL) {
1797		if (cmdlen > VHCI_SCSI_CDB_SIZE) {
1798			VHCI_DEBUG(1, (CE_NOTE, NULL,
1799			    "!init pkt: cdb size not supported\n"));
1800			return (NULL);
1801		}
1802
1803		pktp = scsi_hba_pkt_alloc(vhci->vhci_dip,
1804		    ap, cmdlen, statuslen, tgtlen, sizeof (*vpkt), callback,
1805		    arg);
1806
1807		if (pktp == NULL) {
1808			return (NULL);
1809		}
1810
1811		/* Get the vhci's private structure */
1812		vpkt = (struct vhci_pkt *)(pktp->pkt_ha_private);
1813		ASSERT(vpkt);
1814
1815		/* Save the target driver's packet */
1816		vpkt->vpkt_tgt_pkt = pktp;
1817
1818		/*
1819		 * Save pkt_tgt_init_pkt fields if deferred binding
1820		 * is needed or for other purposes.
1821		 */
1822		vpkt->vpkt_tgt_init_pkt_flags = flags;
1823		vpkt->vpkt_flags = (callback == NULL_FUNC) ? CFLAG_NOWAIT : 0;
1824		vpkt->vpkt_state = VHCI_PKT_IDLE;
1825		vpkt->vpkt_tgt_init_cdblen = cmdlen;
1826		vpkt->vpkt_tgt_init_scblen = statuslen;
1827		vpkt->vpkt_tgt_init_privlen = tgtlen;
1828		newpkt = 1;
1829	} else { /* pkt not NULL */
1830		vpkt = pkt->pkt_ha_private;
1831	}
1832
1833	VHCI_DEBUG(8, (CE_NOTE, NULL, "vhci_scsi_init_pkt "
1834	    "vpkt %p flags %x\n", (void *)vpkt, flags));
1835
1836	/* Clear any stale error flags */
1837	if (bp) {
1838		bioerror(bp, 0);
1839	}
1840
1841	vpkt->vpkt_tgt_init_bp = bp;
1842
1843	if (flags & PKT_DMA_PARTIAL) {
1844
1845		/*
1846		 * Immediate binding is needed.
1847		 * Target driver may not set this flag in next invocation.
1848		 * vhci has to remember this flag was set during first
1849		 * invocation of vhci_scsi_init_pkt.
1850		 */
1851		vpkt->vpkt_flags |= CFLAG_DMA_PARTIAL;
1852	}
1853
1854	if (vpkt->vpkt_flags & CFLAG_DMA_PARTIAL) {
1855
1856		/*
1857		 * Re-initialize some of the target driver packet state
1858		 * information.
1859		 */
1860		vpkt->vpkt_tgt_pkt->pkt_state = 0;
1861		vpkt->vpkt_tgt_pkt->pkt_statistics = 0;
1862		vpkt->vpkt_tgt_pkt->pkt_reason = 0;
1863
1864		/*
1865		 * Binding a vpkt->vpkt_path for this IO at init_time.
1866		 * If an IO error happens later, target driver will clear
1867		 * this vpkt->vpkt_path binding before re-init IO again.
1868		 */
1869		VHCI_DEBUG(8, (CE_NOTE, NULL,
1870		    "vhci_scsi_init_pkt: calling v_b_t %p, newpkt %d\n",
1871		    (void *)vpkt, newpkt));
1872		if (pkt && vpkt->vpkt_hba_pkt) {
1873			VHCI_DEBUG(4, (CE_NOTE, NULL,
1874			    "v_s_i_p calling update_pHCI_pkt resid %ld\n",
1875			    pkt->pkt_resid));
1876			vhci_update_pHCI_pkt(vpkt, pkt);
1877		}
1878		if (callback == SLEEP_FUNC) {
1879			rval = vhci_bind_transport(
1880			    ap, vpkt, flags, callback);
1881		} else {
1882			rval = vhci_bind_transport(
1883			    ap, vpkt, flags, NULL_FUNC);
1884		}
1885		VHCI_DEBUG(8, (CE_NOTE, NULL,
1886		    "vhci_scsi_init_pkt: v_b_t called 0x%p rval 0x%x\n",
1887		    (void *)vpkt, rval));
1888		if (bp) {
1889			if (rval == TRAN_FATAL_ERROR) {
1890				/*
1891				 * No paths available. Could not bind
1892				 * any pHCI. Setting EFAULT as a way
1893				 * to indicate no DMA is mapped.
1894				 */
1895				bioerror(bp, EFAULT);
1896			} else {
1897				/*
1898				 * Do not indicate any pHCI errors to
1899				 * target driver otherwise.
1900				 */
1901				bioerror(bp, 0);
1902			}
1903		}
1904		if (rval != TRAN_ACCEPT) {
1905			VHCI_DEBUG(8, (CE_NOTE, NULL,
1906			    "vhci_scsi_init_pkt: "
1907			    "v_b_t failed 0x%p newpkt %x\n",
1908			    (void *)vpkt, newpkt));
1909			if (newpkt) {
1910				scsi_hba_pkt_free(ap,
1911				    vpkt->vpkt_tgt_pkt);
1912			}
1913			return (NULL);
1914		}
1915		ASSERT(vpkt->vpkt_hba_pkt != NULL);
1916		ASSERT(vpkt->vpkt_path != NULL);
1917
1918		/* Update the resid for the target driver */
1919		vpkt->vpkt_tgt_pkt->pkt_resid =
1920		    vpkt->vpkt_hba_pkt->pkt_resid;
1921	}
1922
1923	return (vpkt->vpkt_tgt_pkt);
1924}
1925
1926/*
1927 * Function name : vhci_scsi_destroy_pkt
1928 *
1929 * Return Values : none
1930 */
1931static void
1932vhci_scsi_destroy_pkt(struct scsi_address *ap, struct scsi_pkt *pkt)
1933{
1934	struct vhci_pkt		*vpkt = (struct vhci_pkt *)pkt->pkt_ha_private;
1935
1936	VHCI_DEBUG(8, (CE_NOTE, NULL,
1937	    "vhci_scsi_destroy_pkt: vpkt 0x%p\n", (void *)vpkt));
1938
1939	vpkt->vpkt_tgt_init_pkt_flags = 0;
1940	if (vpkt->vpkt_hba_pkt) {
1941		scsi_destroy_pkt(vpkt->vpkt_hba_pkt);
1942		vpkt->vpkt_hba_pkt = NULL;
1943	}
1944	if (vpkt->vpkt_path) {
1945		mdi_rele_path(vpkt->vpkt_path);
1946		vpkt->vpkt_path = NULL;
1947	}
1948
1949	ASSERT(vpkt->vpkt_state != VHCI_PKT_ISSUED);
1950	scsi_hba_pkt_free(ap, vpkt->vpkt_tgt_pkt);
1951}
1952
1953/*
1954 * Function name : vhci_scsi_dmafree()
1955 *
1956 * Return Values : none
1957 */
1958/*ARGSUSED*/
1959static void
1960vhci_scsi_dmafree(struct scsi_address *ap, struct scsi_pkt *pkt)
1961{
1962	struct vhci_pkt	*vpkt = (struct vhci_pkt *)pkt->pkt_ha_private;
1963
1964	VHCI_DEBUG(6, (CE_NOTE, NULL,
1965	    "vhci_scsi_dmafree: vpkt 0x%p\n", (void *)vpkt));
1966
1967	ASSERT(vpkt != NULL);
1968	if (vpkt->vpkt_hba_pkt) {
1969		scsi_destroy_pkt(vpkt->vpkt_hba_pkt);
1970		vpkt->vpkt_hba_pkt = NULL;
1971	}
1972	if (vpkt->vpkt_path) {
1973		mdi_rele_path(vpkt->vpkt_path);
1974		vpkt->vpkt_path = NULL;
1975	}
1976}
1977
1978/*
1979 * Function name : vhci_scsi_sync_pkt()
1980 *
1981 * Return Values : none
1982 */
1983/*ARGSUSED*/
1984static void
1985vhci_scsi_sync_pkt(struct scsi_address *ap, struct scsi_pkt *pkt)
1986{
1987	struct vhci_pkt	*vpkt = (struct vhci_pkt *)pkt->pkt_ha_private;
1988
1989	ASSERT(vpkt != NULL);
1990	if (vpkt->vpkt_hba_pkt) {
1991		scsi_sync_pkt(vpkt->vpkt_hba_pkt);
1992	}
1993}
1994
1995/*
1996 * routine for reset notification setup, to register or cancel.
1997 */
1998static int
1999vhci_scsi_reset_notify(struct scsi_address *ap, int flag,
2000	void (*callback)(caddr_t), caddr_t arg)
2001{
2002	struct scsi_vhci *vhci = ADDR2VHCI(ap);
2003	return (scsi_hba_reset_notify_setup(ap, flag, callback, arg,
2004	    &vhci->vhci_mutex, &vhci->vhci_reset_notify_listf));
2005}
2006
2007static int
2008vhci_scsi_get_name_bus_addr(struct scsi_device *sd,
2009    char *name, int len, int bus_addr)
2010{
2011	dev_info_t		*cdip;
2012	char			*guid;
2013	scsi_vhci_lun_t		*vlun;
2014
2015	ASSERT(sd != NULL);
2016	ASSERT(name != NULL);
2017
2018	cdip = sd->sd_dev;
2019
2020	ASSERT(cdip != NULL);
2021
2022	if (mdi_component_is_client(cdip, NULL) != MDI_SUCCESS) {
2023		name[0] = '\0';
2024		return (1);
2025	}
2026
2027	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip, PROPFLAGS,
2028	    MDI_CLIENT_GUID_PROP, &guid) != DDI_SUCCESS) {
2029		name[0] = '\0';
2030		return (1);
2031	}
2032
2033	vlun = ADDR2VLUN(&sd->sd_address);
2034	if (bus_addr && vlun && vlun->svl_fops_name) {
2035		/* report the guid and the name of the failover module */
2036		(void) snprintf(name, len, "g%s %s", guid, vlun->svl_fops_name);
2037	} else {
2038		/* report the guid */
2039		(void) snprintf(name, len, "g%s", guid);
2040	}
2041
2042	ddi_prop_free(guid);
2043	return (1);
2044}
2045
2046static int
2047vhci_scsi_get_bus_addr(struct scsi_device *sd, char *name, int len)
2048{
2049	return (vhci_scsi_get_name_bus_addr(sd, name, len, 1));
2050}
2051
2052static int
2053vhci_scsi_get_name(struct scsi_device *sd, char *name, int len)
2054{
2055	return (vhci_scsi_get_name_bus_addr(sd, name, len, 0));
2056}
2057
2058/*
2059 * Return a pointer to the guid part of the devnm.
2060 * devnm format is "nodename@busaddr", busaddr format is "gGUID".
2061 */
2062static char *
2063vhci_devnm_to_guid(char *devnm)
2064{
2065	char *cp = devnm;
2066
2067	if (devnm == NULL)
2068		return (NULL);
2069
2070	while (*cp != '\0' && *cp != '@')
2071		cp++;
2072	if (*cp == '@' && *(cp + 1) == 'g')
2073		return (cp + 2);
2074	return (NULL);
2075}
2076
2077static int
2078vhci_bind_transport(struct scsi_address *ap, struct vhci_pkt *vpkt, int flags,
2079    int (*func)(caddr_t))
2080{
2081	struct scsi_vhci	*vhci = ADDR2VHCI(ap);
2082	dev_info_t		*cdip = ADDR2DIP(ap);
2083	mdi_pathinfo_t		*pip = NULL;
2084	mdi_pathinfo_t		*npip = NULL;
2085	scsi_vhci_priv_t	*svp = NULL;
2086	struct scsi_device	*psd = NULL;
2087	struct scsi_address	*address = NULL;
2088	struct scsi_pkt		*pkt = NULL;
2089	int			rval = -1;
2090	int			pgr_sema_held = 0;
2091	int			held;
2092	int			mps_flag = MDI_SELECT_ONLINE_PATH;
2093	struct scsi_vhci_lun	*vlun;
2094	time_t			tnow;
2095
2096	vlun = ADDR2VLUN(ap);
2097	ASSERT(vlun != 0);
2098
2099	if ((vpkt->vpkt_tgt_pkt->pkt_cdbp[0] == SCMD_PROUT) &&
2100	    (((vpkt->vpkt_tgt_pkt->pkt_cdbp[1] & 0x1f) ==
2101	    VHCI_PROUT_REGISTER) ||
2102	    ((vpkt->vpkt_tgt_pkt->pkt_cdbp[1] & 0x1f) ==
2103	    VHCI_PROUT_R_AND_IGNORE))) {
2104		if (!sema_tryp(&vlun->svl_pgr_sema))
2105			return (TRAN_BUSY);
2106		pgr_sema_held = 1;
2107		if (vlun->svl_first_path != NULL) {
2108			rval = mdi_select_path(cdip, NULL,
2109			    MDI_SELECT_ONLINE_PATH | MDI_SELECT_STANDBY_PATH,
2110			    NULL, &pip);
2111			if ((rval != MDI_SUCCESS) || (pip == NULL)) {
2112				VHCI_DEBUG(4, (CE_NOTE, NULL,
2113				    "vhci_bind_transport: path select fail\n"));
2114			} else {
2115				npip = pip;
2116				do {
2117					if (npip == vlun->svl_first_path) {
2118						VHCI_DEBUG(4, (CE_NOTE, NULL,
2119						    "vhci_bind_transport: "
2120						    "valid first path 0x%p\n",
2121						    (void *)
2122						    vlun->svl_first_path));
2123						pip = vlun->svl_first_path;
2124						goto bind_path;
2125					}
2126					pip = npip;
2127					rval = mdi_select_path(cdip, NULL,
2128					    MDI_SELECT_ONLINE_PATH |
2129					    MDI_SELECT_STANDBY_PATH,
2130					    pip, &npip);
2131					mdi_rele_path(pip);
2132				} while ((rval == MDI_SUCCESS) &&
2133				    (npip != NULL));
2134			}
2135		}
2136
2137		if (vlun->svl_first_path) {
2138			VHCI_DEBUG(4, (CE_NOTE, NULL,
2139			    "vhci_bind_transport: invalid first path 0x%p\n",
2140			    (void *)vlun->svl_first_path));
2141			vlun->svl_first_path = NULL;
2142		}
2143	} else if (vpkt->vpkt_tgt_pkt->pkt_cdbp[0] == SCMD_PRIN) {
2144		if ((vpkt->vpkt_state & VHCI_PKT_THRU_TASKQ) == 0) {
2145			if (!sema_tryp(&vlun->svl_pgr_sema))
2146				return (TRAN_BUSY);
2147		}
2148		pgr_sema_held = 1;
2149	}
2150
2151	/*
2152	 * If the path is already bound for PKT_PARTIAL_DMA case,
2153	 * try to use the same path.
2154	 */
2155	if ((vpkt->vpkt_flags & CFLAG_DMA_PARTIAL) && vpkt->vpkt_path) {
2156		VHCI_DEBUG(4, (CE_NOTE, NULL,
2157		    "vhci_bind_transport: PKT_PARTIAL_DMA "
2158		    "vpkt 0x%p, path 0x%p\n",
2159		    (void *)vpkt, (void *)vpkt->vpkt_path));
2160		pip = vpkt->vpkt_path;
2161		goto bind_path;
2162	}
2163
2164	/*
2165	 * If reservation is active bind the transport directly to the pip
2166	 * with the reservation.
2167	 */
2168	if (vpkt->vpkt_hba_pkt == NULL) {
2169		if (vlun->svl_flags & VLUN_RESERVE_ACTIVE_FLG) {
2170			if (MDI_PI_IS_ONLINE(vlun->svl_resrv_pip)) {
2171				pip = vlun->svl_resrv_pip;
2172				mdi_hold_path(pip);
2173				vlun->svl_waiting_for_activepath = 0;
2174				rval = MDI_SUCCESS;
2175				goto bind_path;
2176			} else {
2177				if (pgr_sema_held) {
2178					sema_v(&vlun->svl_pgr_sema);
2179				}
2180				return (TRAN_BUSY);
2181			}
2182		}
2183try_again:
2184		rval = mdi_select_path(cdip, vpkt->vpkt_tgt_init_bp, 0, NULL,
2185		    &pip);
2186		if (rval == MDI_BUSY) {
2187			if (pgr_sema_held) {
2188				sema_v(&vlun->svl_pgr_sema);
2189			}
2190			return (TRAN_BUSY);
2191		} else if (rval == MDI_DEVI_ONLINING) {
2192			/*
2193			 * if we are here then we are in the midst of
2194			 * an attach/probe of the client device.
2195			 * We attempt to bind to ONLINE path if available,
2196			 * else it is OK to bind to a STANDBY path (instead
2197			 * of triggering a failover) because IO associated
2198			 * with attach/probe (eg. INQUIRY, block 0 read)
2199			 * are completed by targets even on passive paths
2200			 * If no ONLINE paths available, it is important
2201			 * to set svl_waiting_for_activepath for two
2202			 * reasons: (1) avoid sense analysis in the
2203			 * "external failure detection" codepath in
2204			 * vhci_intr().  Failure to do so will result in
2205			 * infinite loop (unless an ONLINE path becomes
2206			 * available at some point) (2) avoid
2207			 * unnecessary failover (see "---Waiting For Active
2208			 * Path---" comment below).
2209			 */
2210			VHCI_DEBUG(1, (CE_NOTE, NULL, "!%p in onlining "
2211			    "state\n", (void *)cdip));
2212			pip = NULL;
2213			rval = mdi_select_path(cdip, vpkt->vpkt_tgt_init_bp,
2214			    mps_flag, NULL, &pip);
2215			if ((rval != MDI_SUCCESS) || (pip == NULL)) {
2216				if (vlun->svl_waiting_for_activepath == 0) {
2217					vlun->svl_waiting_for_activepath = 1;
2218					vlun->svl_wfa_time = ddi_get_time();
2219				}
2220				mps_flag |= MDI_SELECT_STANDBY_PATH;
2221				rval = mdi_select_path(cdip,
2222				    vpkt->vpkt_tgt_init_bp,
2223				    mps_flag, NULL, &pip);
2224				if ((rval != MDI_SUCCESS) || (pip == NULL)) {
2225					if (pgr_sema_held) {
2226						sema_v(&vlun->svl_pgr_sema);
2227					}
2228					return (TRAN_FATAL_ERROR);
2229				}
2230				goto bind_path;
2231			}
2232		} else if (rval == MDI_FAILURE) {
2233			if (pgr_sema_held) {
2234				sema_v(&vlun->svl_pgr_sema);
2235			}
2236			return (TRAN_FATAL_ERROR);
2237		}
2238
2239		if ((pip == NULL) || (rval == MDI_NOPATH)) {
2240			while (vlun->svl_waiting_for_activepath) {
2241				/*
2242				 * ---Waiting For Active Path---
2243				 * This device was discovered across a
2244				 * passive path; lets wait for a little
2245				 * bit, hopefully an active path will
2246				 * show up obviating the need for a
2247				 * failover
2248				 */
2249				tnow = ddi_get_time();
2250				if (tnow - vlun->svl_wfa_time >= 60) {
2251					vlun->svl_waiting_for_activepath = 0;
2252				} else {
2253					drv_usecwait(1000);
2254					if (vlun->svl_waiting_for_activepath
2255					    == 0) {
2256						/*
2257						 * an active path has come
2258						 * online!
2259						 */
2260						goto try_again;
2261					}
2262				}
2263			}
2264			VHCI_HOLD_LUN(vlun, VH_NOSLEEP, held);
2265			if (!held) {
2266				VHCI_DEBUG(4, (CE_NOTE, NULL,
2267				    "!Lun not held\n"));
2268				if (pgr_sema_held) {
2269					sema_v(&vlun->svl_pgr_sema);
2270				}
2271				return (TRAN_BUSY);
2272			}
2273			/*
2274			 * now that the LUN is stable, one last check
2275			 * to make sure no other changes sneaked in
2276			 * (like a path coming online or a
2277			 * failover initiated by another thread)
2278			 */
2279			pip = NULL;
2280			rval = mdi_select_path(cdip, vpkt->vpkt_tgt_init_bp,
2281			    0, NULL, &pip);
2282			if (pip != NULL) {
2283				VHCI_RELEASE_LUN(vlun);
2284				vlun->svl_waiting_for_activepath = 0;
2285				goto bind_path;
2286			}
2287
2288			/*
2289			 * Check if there is an ONLINE path OR a STANDBY path
2290			 * available. If none is available, do not attempt
2291			 * to do a failover, just return a fatal error at this
2292			 * point.
2293			 */
2294			npip = NULL;
2295			rval = mdi_select_path(cdip, NULL,
2296			    (MDI_SELECT_ONLINE_PATH | MDI_SELECT_STANDBY_PATH),
2297			    NULL, &npip);
2298			if ((npip == NULL) || (rval != MDI_SUCCESS)) {
2299				/*
2300				 * No paths available, jus return FATAL error.
2301				 */
2302				VHCI_RELEASE_LUN(vlun);
2303				if (pgr_sema_held) {
2304					sema_v(&vlun->svl_pgr_sema);
2305				}
2306				return (TRAN_FATAL_ERROR);
2307			}
2308			mdi_rele_path(npip);
2309			VHCI_DEBUG(1, (CE_NOTE, NULL, "!invoking "
2310			    "mdi_failover\n"));
2311			rval = mdi_failover(vhci->vhci_dip, cdip,
2312			    MDI_FAILOVER_ASYNC);
2313			if (rval == MDI_FAILURE) {
2314				VHCI_RELEASE_LUN(vlun);
2315				if (pgr_sema_held) {
2316					sema_v(&vlun->svl_pgr_sema);
2317				}
2318				return (TRAN_FATAL_ERROR);
2319			} else if (rval == MDI_BUSY) {
2320				VHCI_RELEASE_LUN(vlun);
2321				if (pgr_sema_held) {
2322					sema_v(&vlun->svl_pgr_sema);
2323				}
2324				return (TRAN_BUSY);
2325			} else {
2326				if (pgr_sema_held) {
2327					sema_v(&vlun->svl_pgr_sema);
2328				}
2329				return (TRAN_BUSY);
2330			}
2331		}
2332		vlun->svl_waiting_for_activepath = 0;
2333bind_path:
2334		vpkt->vpkt_path = pip;
2335		svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(pip);
2336		ASSERT(svp != NULL);
2337
2338		psd = svp->svp_psd;
2339		ASSERT(psd != NULL);
2340		address = &psd->sd_address;
2341	} else {
2342		pkt = vpkt->vpkt_hba_pkt;
2343		address = &pkt->pkt_address;
2344	}
2345
2346	/*
2347	 * For PKT_PARTIAL_DMA case, call pHCI's scsi_init_pkt whenever
2348	 * target driver calls vhci_scsi_init_pkt.
2349	 */
2350	if ((vpkt->vpkt_flags & CFLAG_DMA_PARTIAL) &&
2351	    vpkt->vpkt_path && vpkt->vpkt_hba_pkt) {
2352		VHCI_DEBUG(4, (CE_NOTE, NULL,
2353		    "vhci_bind_transport: PKT_PARTIAL_DMA "
2354		    "vpkt 0x%p, path 0x%p hba_pkt 0x%p\n",
2355		    (void *)vpkt, (void *)vpkt->vpkt_path, (void *)pkt));
2356		pkt = vpkt->vpkt_hba_pkt;
2357		address = &pkt->pkt_address;
2358	}
2359
2360	if (pkt == NULL || (vpkt->vpkt_flags & CFLAG_DMA_PARTIAL)) {
2361		pkt = scsi_init_pkt(address, pkt,
2362		    vpkt->vpkt_tgt_init_bp, vpkt->vpkt_tgt_init_cdblen,
2363		    vpkt->vpkt_tgt_init_scblen,
2364		    vpkt->vpkt_tgt_init_privlen, flags, func, NULL);
2365
2366		if (pkt == NULL) {
2367			VHCI_DEBUG(4, (CE_NOTE, NULL,
2368			    "!bind transport: 0x%p 0x%p 0x%p\n",
2369			    (void *)vhci, (void *)psd, (void *)vpkt));
2370			if ((vpkt->vpkt_hba_pkt == NULL) && vpkt->vpkt_path) {
2371				MDI_PI_ERRSTAT(vpkt->vpkt_path,
2372				    MDI_PI_TRANSERR);
2373				mdi_rele_path(vpkt->vpkt_path);
2374				vpkt->vpkt_path = NULL;
2375			}
2376			if (pgr_sema_held) {
2377				sema_v(&vlun->svl_pgr_sema);
2378			}
2379			/*
2380			 * Looks like a fatal error.
2381			 * May be device disappeared underneath.
2382			 * Give another chance to target driver for a retry to
2383			 * get another path.
2384			 */
2385			return (TRAN_BUSY);
2386		}
2387	}
2388
2389	pkt->pkt_private = vpkt;
2390	vpkt->vpkt_hba_pkt = pkt;
2391	return (TRAN_ACCEPT);
2392}
2393
2394
2395/*PRINTFLIKE3*/
2396void
2397vhci_log(int level, dev_info_t *dip, const char *fmt, ...)
2398{
2399	char		buf[256];
2400	va_list		ap;
2401
2402	va_start(ap, fmt);
2403	(void) vsprintf(buf, fmt, ap);
2404	va_end(ap);
2405
2406	scsi_log(dip, "scsi_vhci", level, buf);
2407}
2408
2409/* do a PGR out with the information we've saved away */
2410static int
2411vhci_do_prout(scsi_vhci_priv_t *svp)
2412{
2413
2414	struct scsi_pkt			*new_pkt;
2415	struct buf			*bp;
2416	scsi_vhci_lun_t			*vlun;
2417	int				rval, retry, nr_retry, ua_retry;
2418	struct scsi_extended_sense	*sns;
2419
2420	bp = getrbuf(KM_SLEEP);
2421	bp->b_flags = B_WRITE;
2422	bp->b_resid = 0;
2423
2424	VHCI_INCR_PATH_CMDCOUNT(svp);
2425	vlun = svp->svp_svl;
2426
2427	new_pkt = scsi_init_pkt(&svp->svp_psd->sd_address, NULL, bp,
2428	    CDB_GROUP1, sizeof (struct scsi_arq_status), 0, 0,
2429	    SLEEP_FUNC, NULL);
2430	if (new_pkt == NULL) {
2431		VHCI_DECR_PATH_CMDCOUNT(svp);
2432		freerbuf(bp);
2433		cmn_err(CE_WARN, "!vhci_do_prout: scsi_init_pkt failed");
2434		return (0);
2435	}
2436	mutex_enter(&vlun->svl_mutex);
2437	bp->b_un.b_addr = (caddr_t)&vlun->svl_prout;
2438	bp->b_bcount = vlun->svl_bcount;
2439	bcopy(vlun->svl_cdb, new_pkt->pkt_cdbp,
2440	    sizeof (vlun->svl_cdb));
2441	new_pkt->pkt_time = vlun->svl_time;
2442	mutex_exit(&vlun->svl_mutex);
2443	new_pkt->pkt_flags = FLAG_NOINTR;
2444
2445	ua_retry = nr_retry = retry = 0;
2446again:
2447	rval = vhci_do_scsi_cmd(new_pkt);
2448	if (rval != 1) {
2449		if ((new_pkt->pkt_reason == CMD_CMPLT) &&
2450		    (SCBP_C(new_pkt) == STATUS_CHECK) &&
2451		    (new_pkt->pkt_state & STATE_ARQ_DONE)) {
2452			sns = &(((struct scsi_arq_status *)(uintptr_t)
2453			    (new_pkt->pkt_scbp))->sts_sensedata);
2454			if ((sns->es_key == KEY_UNIT_ATTENTION) ||
2455			    (sns->es_key == KEY_NOT_READY)) {
2456				int max_retry;
2457				struct scsi_failover_ops *fops;
2458				fops = vlun->svl_fops;
2459				rval = (*fops->sfo_analyze_sense)
2460				    (svp->svp_psd, sns,
2461				    vlun->svl_fops_ctpriv);
2462				if (rval == SCSI_SENSE_NOT_READY) {
2463					max_retry = vhci_prout_not_ready_retry;
2464					retry = nr_retry++;
2465					delay(1*drv_usectohz(1000000));
2466				} else {
2467					/* chk for state change and update */
2468					if (rval == SCSI_SENSE_STATE_CHANGED) {
2469						int held;
2470						VHCI_HOLD_LUN(vlun,
2471						    VH_NOSLEEP, held);
2472						if (!held) {
2473							rval = TRAN_BUSY;
2474						} else {
2475							/* chk for alua first */
2476							vhci_update_pathstates(
2477							    (void *)vlun);
2478						}
2479					}
2480					retry = ua_retry++;
2481					max_retry = VHCI_MAX_PGR_RETRIES;
2482				}
2483				if (retry < max_retry) {
2484					VHCI_DEBUG(4, (CE_WARN, NULL,
2485					    "!vhci_do_prout retry 0x%x "
2486					    "(0x%x 0x%x 0x%x)",
2487					    SCBP_C(new_pkt),
2488					    new_pkt->pkt_cdbp[0],
2489					    new_pkt->pkt_cdbp[1],
2490					    new_pkt->pkt_cdbp[2]));
2491					goto again;
2492				}
2493				rval = 0;
2494				VHCI_DEBUG(4, (CE_WARN, NULL,
2495				    "!vhci_do_prout 0x%x "
2496				    "(0x%x 0x%x 0x%x)",
2497				    SCBP_C(new_pkt),
2498				    new_pkt->pkt_cdbp[0],
2499				    new_pkt->pkt_cdbp[1],
2500				    new_pkt->pkt_cdbp[2]));
2501			} else if (sns->es_key == KEY_ILLEGAL_REQUEST)
2502				rval = VHCI_PGR_ILLEGALOP;
2503		}
2504	} else {
2505		rval = 1;
2506	}
2507	scsi_destroy_pkt(new_pkt);
2508	VHCI_DECR_PATH_CMDCOUNT(svp);
2509	freerbuf(bp);
2510	return (rval);
2511}
2512
2513static void
2514vhci_run_cmd(void *arg)
2515{
2516	struct scsi_pkt		*pkt = (struct scsi_pkt *)arg;
2517	struct scsi_pkt		*tpkt;
2518	scsi_vhci_priv_t	*svp;
2519	mdi_pathinfo_t		*pip, *npip;
2520	scsi_vhci_lun_t		*vlun;
2521	dev_info_t		*cdip;
2522	scsi_vhci_priv_t	*nsvp;
2523	int			fail = 0;
2524	int			rval;
2525	struct vhci_pkt		*vpkt;
2526	uchar_t			cdb_1;
2527	vhci_prout_t		*prout;
2528
2529	vpkt = (struct vhci_pkt *)pkt->pkt_private;
2530	tpkt = vpkt->vpkt_tgt_pkt;
2531	pip = vpkt->vpkt_path;
2532	svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(pip);
2533	if (svp == NULL) {
2534		tpkt->pkt_reason = CMD_TRAN_ERR;
2535		tpkt->pkt_statistics = STAT_ABORTED;
2536		goto done;
2537	}
2538	vlun = svp->svp_svl;
2539	prout = &vlun->svl_prout;
2540	if (SCBP_C(pkt) != STATUS_GOOD)
2541		fail++;
2542	cdip = vlun->svl_dip;
2543	pip = npip = NULL;
2544	rval = mdi_select_path(cdip, NULL,
2545	    MDI_SELECT_ONLINE_PATH|MDI_SELECT_STANDBY_PATH, NULL, &npip);
2546	if ((rval != MDI_SUCCESS) || (npip == NULL)) {
2547		VHCI_DEBUG(4, (CE_NOTE, NULL,
2548		    "vhci_run_cmd: no path! 0x%p\n", (void *)svp));
2549		tpkt->pkt_reason = CMD_TRAN_ERR;
2550		tpkt->pkt_statistics = STAT_ABORTED;
2551		goto done;
2552	}
2553
2554	cdb_1 = vlun->svl_cdb[1];
2555	vlun->svl_cdb[1] &= 0xe0;
2556	vlun->svl_cdb[1] |= VHCI_PROUT_R_AND_IGNORE;
2557
2558	do {
2559		nsvp = (scsi_vhci_priv_t *)
2560		    mdi_pi_get_vhci_private(npip);
2561		if (nsvp == NULL) {
2562			VHCI_DEBUG(4, (CE_NOTE, NULL,
2563			    "vhci_run_cmd: no "
2564			    "client priv! 0x%p offlined?\n",
2565			    (void *)npip));
2566			goto next_path;
2567		}
2568		if (vlun->svl_first_path == npip) {
2569			goto next_path;
2570		} else {
2571			if (vhci_do_prout(nsvp) != 1)
2572				fail++;
2573		}
2574next_path:
2575		pip = npip;
2576		rval = mdi_select_path(cdip, NULL,
2577		    MDI_SELECT_ONLINE_PATH|MDI_SELECT_STANDBY_PATH,
2578		    pip, &npip);
2579		mdi_rele_path(pip);
2580	} while ((rval == MDI_SUCCESS) && (npip != NULL));
2581
2582	vlun->svl_cdb[1] = cdb_1;
2583
2584	if (fail) {
2585		VHCI_DEBUG(4, (CE_WARN, NULL, "%s%d: key registration failed, "
2586		    "couldn't be replicated on all paths",
2587		    ddi_driver_name(cdip), ddi_get_instance(cdip)));
2588		vhci_print_prout_keys(vlun, "vhci_run_cmd: ");
2589
2590		if (SCBP_C(pkt) != STATUS_GOOD) {
2591			tpkt->pkt_reason = CMD_TRAN_ERR;
2592			tpkt->pkt_statistics = STAT_ABORTED;
2593		}
2594	} else {
2595		vlun->svl_pgr_active = 1;
2596		vhci_print_prout_keys(vlun, "vhci_run_cmd: before bcopy:");
2597
2598		bcopy((const void *)prout->service_key,
2599		    (void *)prout->active_service_key, MHIOC_RESV_KEY_SIZE);
2600		bcopy((const void *)prout->res_key,
2601		    (void *)prout->active_res_key, MHIOC_RESV_KEY_SIZE);
2602
2603		vhci_print_prout_keys(vlun, "vhci_run_cmd: after bcopy:");
2604	}
2605done:
2606	if (SCBP_C(pkt) == STATUS_GOOD)
2607		vlun->svl_first_path = NULL;
2608
2609	if (svp)
2610		VHCI_DECR_PATH_CMDCOUNT(svp);
2611
2612	if ((vpkt->vpkt_flags & CFLAG_DMA_PARTIAL) == 0) {
2613		scsi_destroy_pkt(pkt);
2614		vpkt->vpkt_hba_pkt = NULL;
2615		if (vpkt->vpkt_path) {
2616			mdi_rele_path(vpkt->vpkt_path);
2617			vpkt->vpkt_path = NULL;
2618		}
2619	}
2620
2621	sema_v(&vlun->svl_pgr_sema);
2622	/*
2623	 * The PROUT commands are not included in the automatic retry
2624	 * mechanism, therefore, vpkt_org_vpkt should never be set here.
2625	 */
2626	ASSERT(vpkt->vpkt_org_vpkt == NULL);
2627	if (tpkt->pkt_comp)
2628		(*tpkt->pkt_comp)(tpkt);
2629
2630}
2631
2632/*
2633 * Get the keys registered with this target.  Since we will have
2634 * registered the same key with multiple initiators, strip out
2635 * any duplicate keys.
2636 *
2637 * The pointers which will be used to filter the registered keys from
2638 * the device will be stored in filter_prin and filter_pkt.  If the
2639 * allocation length of the buffer was sufficient for the number of
2640 * parameter data bytes available to be returned by the device then the
2641 * key filtering will use the keylist returned from the original
2642 * request.  If the allocation length of the buffer was not sufficient,
2643 * then the filtering will use the keylist returned from the request
2644 * that is resent below.
2645 *
2646 * If the device returns an additional length field that is greater than
2647 * the allocation length of the buffer, then allocate a new buffer which
2648 * can accommodate the number of parameter data bytes available to be
2649 * returned.  Resend the scsi PRIN command, filter out the duplicate
2650 * keys and return as many of the unique keys found that was originally
2651 * requested and set the additional length field equal to the data bytes
2652 * of unique reservation keys available to be returned.
2653 *
2654 * If the device returns an additional length field that is less than or
2655 * equal to the allocation length of the buffer, then all the available
2656 * keys registered were returned by the device.  Filter out the
2657 * duplicate keys and return all of the unique keys found and set the
2658 * additional length field equal to the data bytes of the reservation
2659 * keys to be returned.
2660 */
2661static int
2662vhci_do_prin(struct vhci_pkt **vpkt)
2663{
2664	scsi_vhci_priv_t *svp = (scsi_vhci_priv_t *)
2665	    mdi_pi_get_vhci_private((*vpkt)->vpkt_path);
2666	vhci_prin_readkeys_t *prin;
2667	scsi_vhci_lun_t *vlun = svp->svp_svl;
2668	struct scsi_vhci *vhci =
2669	    ADDR2VHCI(&((*vpkt)->vpkt_tgt_pkt->pkt_address));
2670
2671	struct buf		*new_bp = NULL;
2672	struct scsi_pkt		*new_pkt = NULL;
2673	struct vhci_pkt		*new_vpkt = NULL;
2674	int			hdr_len = 0;
2675	int			rval = VHCI_CMD_CMPLT;
2676	uint32_t		prin_length = 0;
2677	uint32_t		svl_prin_length = 0;
2678
2679	prin = (vhci_prin_readkeys_t *)
2680	    bp_mapin_common((*vpkt)->vpkt_tgt_init_bp, VM_NOSLEEP);
2681
2682	if (prin != NULL) {
2683		prin_length = BE_32(prin->length);
2684	}
2685
2686	if (prin == NULL) {
2687		VHCI_DEBUG(5, (CE_WARN, NULL,
2688		    "vhci_do_prin: bp_mapin_common failed."));
2689		rval = VHCI_CMD_ERROR;
2690	} else {
2691		/*
2692		 * According to SPC-3r22, sec 4.3.4.6: "If the amount of
2693		 * information to be transferred exceeds the maximum value
2694		 * that the ALLOCATION LENGTH field is capable of specifying,
2695		 * the device server shall...terminate the command with CHECK
2696		 * CONDITION status".  The ALLOCATION LENGTH field of the
2697		 * PERSISTENT RESERVE IN command is 2 bytes. We should never
2698		 * get here with an ADDITIONAL LENGTH greater than 0xFFFF
2699		 * so if we do, then it is an error!
2700		 */
2701
2702		hdr_len = sizeof (prin->length) + sizeof (prin->generation);
2703
2704		if ((prin_length + hdr_len) > 0xFFFF) {
2705			VHCI_DEBUG(5, (CE_NOTE, NULL,
2706			    "vhci_do_prin: Device returned invalid "
2707			    "length 0x%x\n", prin_length));
2708			rval = VHCI_CMD_ERROR;
2709		}
2710	}
2711
2712	/*
2713	 * If prin->length is greater than the byte count allocated in the
2714	 * original buffer, then resend the request with enough buffer
2715	 * allocated to get all of the available registered keys.
2716	 */
2717	if (rval != VHCI_CMD_ERROR) {
2718		if (((*vpkt)->vpkt_tgt_init_bp->b_bcount - hdr_len) <
2719		    prin_length) {
2720			if ((*vpkt)->vpkt_org_vpkt == NULL) {
2721				new_pkt = vhci_create_retry_pkt(*vpkt);
2722				if (new_pkt != NULL) {
2723					new_vpkt = TGTPKT2VHCIPKT(new_pkt);
2724
2725					/*
2726					 * This is the buf with buffer pointer
2727					 * where the prin readkeys will be
2728					 * returned from the device
2729					 */
2730					new_bp = scsi_alloc_consistent_buf(
2731					    &svp->svp_psd->sd_address,
2732					    NULL, (prin_length + hdr_len),
2733					    ((*vpkt)->vpkt_tgt_init_bp->
2734					    b_flags & (B_READ | B_WRITE)),
2735					    NULL_FUNC, NULL);
2736					if (new_bp != NULL) {
2737						if (new_bp->b_un.b_addr !=
2738						    NULL) {
2739
2740							new_bp->b_bcount =
2741							    prin_length +
2742							    hdr_len;
2743
2744							new_pkt->pkt_cdbp[7] =
2745							    (uchar_t)(new_bp->
2746							    b_bcount >> 8);
2747							new_pkt->pkt_cdbp[8] =
2748							    (uchar_t)new_bp->
2749							    b_bcount;
2750
2751							rval = VHCI_CMD_RETRY;
2752						} else {
2753							rval = VHCI_CMD_ERROR;
2754						}
2755					} else {
2756						rval = VHCI_CMD_ERROR;
2757					}
2758				} else {
2759					rval = VHCI_CMD_ERROR;
2760				}
2761			} else {
2762				rval = VHCI_CMD_ERROR;
2763			}
2764		}
2765	}
2766
2767	if (rval == VHCI_CMD_RETRY) {
2768		new_vpkt->vpkt_tgt_init_bp = new_bp;
2769
2770		/*
2771		 * Release the old path because it does not matter which path
2772		 * this command is sent down.  This allows the normal bind
2773		 * transport mechanism to be used.
2774		 */
2775		if ((*vpkt)->vpkt_path != NULL) {
2776			mdi_rele_path((*vpkt)->vpkt_path);
2777			(*vpkt)->vpkt_path = NULL;
2778		}
2779
2780		/*
2781		 * Dispatch the retry command
2782		 */
2783		if (taskq_dispatch(vhci->vhci_taskq, vhci_dispatch_scsi_start,
2784		    (void *) new_vpkt, KM_NOSLEEP) == NULL) {
2785			rval = VHCI_CMD_ERROR;
2786		} else {
2787			/*
2788			 * If we return VHCI_CMD_RETRY, that means the caller
2789			 * is going to bail and wait for the reissued command
2790			 * to complete.  In that case, we need to decrement
2791			 * the path command count right now.  In any other
2792			 * case, it'll be decremented by the caller.
2793			 */
2794			VHCI_DECR_PATH_CMDCOUNT(svp);
2795		}
2796	}
2797
2798	if ((rval != VHCI_CMD_ERROR) && (rval != VHCI_CMD_RETRY)) {
2799		int new, old;
2800		int data_len = 0;
2801
2802		data_len = prin_length / MHIOC_RESV_KEY_SIZE;
2803		VHCI_DEBUG(4, (CE_NOTE, NULL, "vhci_do_prin: %d keys read\n",
2804		    data_len));
2805
2806#ifdef DEBUG
2807		VHCI_DEBUG(5, (CE_NOTE, NULL, "vhci_do_prin: from storage\n"));
2808		if (vhci_debug == 5)
2809			vhci_print_prin_keys(prin, data_len);
2810		VHCI_DEBUG(5, (CE_NOTE, NULL,
2811		    "vhci_do_prin: MPxIO old keys:\n"));
2812		if (vhci_debug == 5)
2813			vhci_print_prin_keys(&vlun->svl_prin, data_len);
2814#endif
2815
2816		/*
2817		 * Filter out all duplicate keys returned from the device
2818		 * We know that we use a different key for every host, so we
2819		 * can simply strip out duplicates. Otherwise we would need to
2820		 * do more bookkeeping to figure out which keys to strip out.
2821		 */
2822
2823		new = 0;
2824
2825		if (data_len > 0) {
2826			vlun->svl_prin.keylist[0] = prin->keylist[0];
2827			new++;
2828		}
2829
2830		for (old = 1; old < data_len; old++) {
2831			int j;
2832			int match = 0;
2833			for (j = 0; j < new; j++) {
2834				if (bcmp(&prin->keylist[old],
2835				    &vlun->svl_prin.keylist[j],
2836				    sizeof (mhioc_resv_key_t)) == 0) {
2837					match = 1;
2838					break;
2839				}
2840			}
2841			if (!match) {
2842				vlun->svl_prin.keylist[new] =
2843				    prin->keylist[old];
2844				new++;
2845			}
2846		}
2847
2848		vlun->svl_prin.generation = prin->generation;
2849		svl_prin_length = new * MHIOC_RESV_KEY_SIZE;
2850		vlun->svl_prin.length = BE_32(svl_prin_length);
2851
2852		/*
2853		 * If we arrived at this point after issuing a retry, make sure
2854		 * that we put everything back the way it originally was so
2855		 * that the target driver can complete the command correctly.
2856		 */
2857		if ((*vpkt)->vpkt_org_vpkt != NULL) {
2858			new_bp = (*vpkt)->vpkt_tgt_init_bp;
2859
2860			scsi_free_consistent_buf(new_bp);
2861
2862			*vpkt = vhci_sync_retry_pkt(*vpkt);
2863
2864			/*
2865			 * Make sure the original buffer is mapped into kernel
2866			 * space before we try to copy the filtered keys into
2867			 * it.
2868			 */
2869			prin = (vhci_prin_readkeys_t *)bp_mapin_common(
2870			    (*vpkt)->vpkt_tgt_init_bp, VM_NOSLEEP);
2871		}
2872
2873		/*
2874		 * Now copy the desired number of prin keys into the original
2875		 * target buffer.
2876		 */
2877		if (svl_prin_length <=
2878		    ((*vpkt)->vpkt_tgt_init_bp->b_bcount - hdr_len)) {
2879			/*
2880			 * It is safe to return all of the available unique
2881			 * keys
2882			 */
2883			bcopy(&vlun->svl_prin, prin, svl_prin_length + hdr_len);
2884		} else {
2885			/*
2886			 * Not all of the available keys were requested by the
2887			 * original command.
2888			 */
2889			bcopy(&vlun->svl_prin, prin,
2890			    (*vpkt)->vpkt_tgt_init_bp->b_bcount);
2891		}
2892#ifdef DEBUG
2893		VHCI_DEBUG(5, (CE_NOTE, NULL,
2894		    "vhci_do_prin: To Application:\n"));
2895		if (vhci_debug == 5)
2896			vhci_print_prin_keys(prin, new);
2897		VHCI_DEBUG(5, (CE_NOTE, NULL,
2898		    "vhci_do_prin: MPxIO new keys:\n"));
2899		if (vhci_debug == 5)
2900			vhci_print_prin_keys(&vlun->svl_prin, new);
2901#endif
2902	}
2903
2904	if (rval == VHCI_CMD_ERROR) {
2905		/*
2906		 * If we arrived at this point after issuing a
2907		 * retry, make sure that we put everything back
2908		 * the way it originally was so that ssd can
2909		 * complete the command correctly.
2910		 */
2911
2912		if ((*vpkt)->vpkt_org_vpkt != NULL) {
2913			new_bp = (*vpkt)->vpkt_tgt_init_bp;
2914			if (new_bp != NULL) {
2915				scsi_free_consistent_buf(new_bp);
2916			}
2917
2918			new_vpkt = *vpkt;
2919			*vpkt = (*vpkt)->vpkt_org_vpkt;
2920
2921			vhci_scsi_destroy_pkt(&svp->svp_psd->sd_address,
2922			    new_vpkt->vpkt_tgt_pkt);
2923		}
2924
2925		/*
2926		 * Mark this command completion as having an error so that
2927		 * ssd will retry the command.
2928		 */
2929
2930		(*vpkt)->vpkt_tgt_pkt->pkt_reason = CMD_ABORTED;
2931		(*vpkt)->vpkt_tgt_pkt->pkt_statistics |= STAT_ABORTED;
2932
2933		rval = VHCI_CMD_CMPLT;
2934	}
2935
2936	/*
2937	 * Make sure that the semaphore is only released once.
2938	 */
2939	if (rval == VHCI_CMD_CMPLT) {
2940		sema_v(&vlun->svl_pgr_sema);
2941	}
2942
2943	return (rval);
2944}
2945
2946static void
2947vhci_intr(struct scsi_pkt *pkt)
2948{
2949	struct vhci_pkt		*vpkt = (struct vhci_pkt *)pkt->pkt_private;
2950	struct scsi_pkt		*tpkt;
2951	scsi_vhci_priv_t	*svp;
2952	scsi_vhci_lun_t		*vlun;
2953	int			rval, held;
2954	struct scsi_failover_ops	*fops;
2955	struct scsi_extended_sense	*sns;
2956	mdi_pathinfo_t		*lpath;
2957	static char		*timeout_err = "Command Timeout";
2958	static char		*parity_err = "Parity Error";
2959	char			*err_str = NULL;
2960	dev_info_t		*vdip, *cdip, *pdip;
2961	char			*cpath, *dpath;
2962
2963	ASSERT(vpkt != NULL);
2964	tpkt = vpkt->vpkt_tgt_pkt;
2965	ASSERT(tpkt != NULL);
2966	svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(vpkt->vpkt_path);
2967	ASSERT(svp != NULL);
2968	vlun = svp->svp_svl;
2969	ASSERT(vlun != NULL);
2970	lpath = vpkt->vpkt_path;
2971
2972	/*
2973	 * sync up the target driver's pkt with the pkt that
2974	 * we actually used
2975	 */
2976	*(tpkt->pkt_scbp) = *(pkt->pkt_scbp);
2977	tpkt->pkt_resid = pkt->pkt_resid;
2978	tpkt->pkt_state = pkt->pkt_state;
2979	tpkt->pkt_statistics = pkt->pkt_statistics;
2980	tpkt->pkt_reason = pkt->pkt_reason;
2981
2982	if (pkt->pkt_cdbp[0] == SCMD_PROUT &&
2983	    ((pkt->pkt_cdbp[1] & 0x1f) == VHCI_PROUT_REGISTER) ||
2984	    ((pkt->pkt_cdbp[1] & 0x1f) == VHCI_PROUT_R_AND_IGNORE)) {
2985		if ((SCBP_C(pkt) != STATUS_GOOD) ||
2986		    (pkt->pkt_reason != CMD_CMPLT)) {
2987			sema_v(&vlun->svl_pgr_sema);
2988		}
2989	} else if (pkt->pkt_cdbp[0] == SCMD_PRIN) {
2990		if (pkt->pkt_reason != CMD_CMPLT ||
2991		    (SCBP_C(pkt) != STATUS_GOOD)) {
2992			sema_v(&vlun->svl_pgr_sema);
2993		}
2994	}
2995
2996	switch (pkt->pkt_reason) {
2997	case CMD_CMPLT:
2998		/*
2999		 * cmd completed successfully, check for scsi errors
3000		 */
3001		switch (*(pkt->pkt_scbp)) {
3002		case STATUS_CHECK:
3003			if (pkt->pkt_state & STATE_ARQ_DONE) {
3004				sns = &(((struct scsi_arq_status *)(uintptr_t)
3005				    (pkt->pkt_scbp))->sts_sensedata);
3006				fops = vlun->svl_fops;
3007				ASSERT(fops != NULL);
3008				VHCI_DEBUG(4, (CE_NOTE, NULL, "vhci_intr: "
3009				    "Received sns key %x  esc %x  escq %x\n",
3010				    sns->es_key, sns->es_add_code,
3011				    sns->es_qual_code));
3012
3013				if (vlun->svl_waiting_for_activepath == 1) {
3014					/*
3015					 * if we are here it means we are
3016					 * in the midst of a probe/attach
3017					 * through a passive path; this
3018					 * case is exempt from sense analysis
3019					 * for detection of ext. failover
3020					 * because that would unnecessarily
3021					 * increase attach time.
3022					 */
3023					bcopy(pkt->pkt_scbp, tpkt->pkt_scbp,
3024					    vpkt->vpkt_tgt_init_scblen);
3025					break;
3026				}
3027				if (sns->es_add_code == VHCI_SCSI_PERR) {
3028					/*
3029					 * parity error
3030					 */
3031					err_str = parity_err;
3032					bcopy(pkt->pkt_scbp, tpkt->pkt_scbp,
3033					    vpkt->vpkt_tgt_init_scblen);
3034					break;
3035				}
3036				rval = (*fops->sfo_analyze_sense)
3037				    (svp->svp_psd, sns, vlun->svl_fops_ctpriv);
3038				if ((rval == SCSI_SENSE_NOFAILOVER) ||
3039				    (rval == SCSI_SENSE_UNKNOWN) ||
3040				    (rval == SCSI_SENSE_NOT_READY)) {
3041					bcopy(pkt->pkt_scbp, tpkt->pkt_scbp,
3042					    vpkt->vpkt_tgt_init_scblen);
3043					break;
3044				} else if (rval == SCSI_SENSE_STATE_CHANGED) {
3045					struct scsi_vhci	*vhci;
3046					vhci = ADDR2VHCI(&tpkt->pkt_address);
3047					VHCI_HOLD_LUN(vlun, VH_NOSLEEP, held);
3048					if (!held) {
3049						/*
3050						 * looks like some other thread
3051						 * has already detected this
3052						 * condition
3053						 */
3054						tpkt->pkt_state &=
3055						    ~STATE_ARQ_DONE;
3056						*(tpkt->pkt_scbp) =
3057						    STATUS_BUSY;
3058						break;
3059					}
3060					(void) taskq_dispatch(
3061					    vhci->vhci_update_pathstates_taskq,
3062					    vhci_update_pathstates,
3063					    (void *)vlun, KM_SLEEP);
3064				} else {
3065					/*
3066					 * externally initiated failover
3067					 * has occurred or is in progress
3068					 */
3069					VHCI_HOLD_LUN(vlun, VH_NOSLEEP, held);
3070					if (!held) {
3071						/*
3072						 * looks like some other thread
3073						 * has already detected this
3074						 * condition
3075						 */
3076						tpkt->pkt_state &=
3077						    ~STATE_ARQ_DONE;
3078						*(tpkt->pkt_scbp) =
3079						    STATUS_BUSY;
3080						break;
3081					} else {
3082						rval = vhci_handle_ext_fo
3083						    (pkt, rval);
3084						if (rval == BUSY_RETURN) {
3085							tpkt->pkt_state &=
3086							    ~STATE_ARQ_DONE;
3087							*(tpkt->pkt_scbp) =
3088							    STATUS_BUSY;
3089							break;
3090						}
3091						bcopy(pkt->pkt_scbp,
3092						    tpkt->pkt_scbp,
3093						    vpkt->vpkt_tgt_init_scblen);
3094						break;
3095					}
3096				}
3097			}
3098			break;
3099
3100		/*
3101		 * If this is a good SCSI-II RELEASE cmd completion then restore
3102		 * the load balancing policy and reset VLUN_RESERVE_ACTIVE_FLG.
3103		 * If this is a good SCSI-II RESERVE cmd completion then set
3104		 * VLUN_RESERVE_ACTIVE_FLG.
3105		 */
3106		case STATUS_GOOD:
3107			if ((pkt->pkt_cdbp[0] == SCMD_RELEASE) ||
3108			    (pkt->pkt_cdbp[0] == SCMD_RELEASE_G1)) {
3109				(void) mdi_set_lb_policy(vlun->svl_dip,
3110				    vlun->svl_lb_policy_save);
3111				vlun->svl_flags &= ~VLUN_RESERVE_ACTIVE_FLG;
3112				VHCI_DEBUG(1, (CE_WARN, NULL,
3113				    "!vhci_intr: vlun 0x%p release path 0x%p",
3114				    (void *)vlun, (void *)vpkt->vpkt_path));
3115			}
3116
3117			if ((pkt->pkt_cdbp[0] == SCMD_RESERVE) ||
3118			    (pkt->pkt_cdbp[0] == SCMD_RESERVE_G1)) {
3119				vlun->svl_flags |= VLUN_RESERVE_ACTIVE_FLG;
3120				vlun->svl_resrv_pip = vpkt->vpkt_path;
3121				VHCI_DEBUG(1, (CE_WARN, NULL,
3122				    "!vhci_intr: vlun 0x%p reserved path 0x%p",
3123				    (void *)vlun, (void *)vpkt->vpkt_path));
3124			}
3125			break;
3126
3127		case STATUS_RESERVATION_CONFLICT:
3128			VHCI_DEBUG(1, (CE_WARN, NULL,
3129			    "!vhci_intr: vlun 0x%p "
3130			    "reserve conflict on path 0x%p",
3131			    (void *)vlun, (void *)vpkt->vpkt_path));
3132			/* FALLTHROUGH */
3133		default:
3134			break;
3135		}
3136
3137		/*
3138		 * Update I/O completion statistics for the path
3139		 */
3140		mdi_pi_kstat_iosupdate(vpkt->vpkt_path, vpkt->vpkt_tgt_init_bp);
3141
3142		/*
3143		 * Command completed successfully, release the dma binding and
3144		 * destroy the transport side of the packet.
3145		 */
3146		if ((pkt->pkt_cdbp[0] ==  SCMD_PROUT) &&
3147		    (((pkt->pkt_cdbp[1] & 0x1f) == VHCI_PROUT_REGISTER) ||
3148		    ((pkt->pkt_cdbp[1] & 0x1f) ==
3149		    VHCI_PROUT_R_AND_IGNORE))) {
3150			if (SCBP_C(pkt) == STATUS_GOOD) {
3151				ASSERT(vlun->svl_taskq);
3152				svp->svp_last_pkt_reason = pkt->pkt_reason;
3153				(void) taskq_dispatch(vlun->svl_taskq,
3154				    vhci_run_cmd, pkt, KM_SLEEP);
3155				return;
3156			}
3157		}
3158		if ((SCBP_C(pkt) == STATUS_GOOD) &&
3159		    (pkt->pkt_cdbp[0] == SCMD_PRIN) &&
3160		    vpkt->vpkt_tgt_init_bp) {
3161			/*
3162			 * If the action (value in byte 1 of the cdb) is zero,
3163			 * we're reading keys, and that's the only condition
3164			 * where we need to be concerned with filtering keys
3165			 * and potential retries.  Otherwise, we simply signal
3166			 * the semaphore and move on.
3167			 */
3168			if (pkt->pkt_cdbp[1] == 0) {
3169				/*
3170				 * If this is the completion of an internal
3171				 * retry then we need to make sure that the
3172				 * pkt and tpkt pointers are readjusted so
3173				 * the calls to scsi_destroy_pkt and pkt_comp
3174				 * below work * correctly.
3175				 */
3176				if (vpkt->vpkt_org_vpkt != NULL) {
3177					pkt = vpkt->vpkt_org_vpkt->vpkt_hba_pkt;
3178					tpkt = vpkt->vpkt_org_vpkt->
3179					    vpkt_tgt_pkt;
3180
3181					/*
3182					 * If this command was issued through
3183					 * the taskq then we need to clear
3184					 * this flag for proper processing in
3185					 * the case of a retry from the target
3186					 * driver.
3187					 */
3188					vpkt->vpkt_state &=
3189					    ~VHCI_PKT_THRU_TASKQ;
3190				}
3191
3192				/*
3193				 * if vhci_do_prin returns VHCI_CMD_CMPLT then
3194				 * vpkt will contain the address of the
3195				 * original vpkt
3196				 */
3197				if (vhci_do_prin(&vpkt) == VHCI_CMD_RETRY) {
3198					/*
3199					 * The command has been resent to get
3200					 * all the keys from the device.  Don't
3201					 * complete the command with ssd until
3202					 * the retry completes.
3203					 */
3204					return;
3205				}
3206			} else {
3207				sema_v(&vlun->svl_pgr_sema);
3208			}
3209		}
3210
3211		break;
3212
3213	case CMD_TIMEOUT:
3214		if ((pkt->pkt_statistics &
3215		    (STAT_BUS_RESET|STAT_DEV_RESET|STAT_ABORTED)) == 0) {
3216
3217			VHCI_DEBUG(1, (CE_NOTE, NULL,
3218			    "!scsi vhci timeout invoked\n"));
3219
3220			(void) vhci_recovery_reset(vlun, &pkt->pkt_address,
3221			    FALSE, VHCI_DEPTH_ALL);
3222		}
3223		MDI_PI_ERRSTAT(lpath, MDI_PI_TRANSERR);
3224		tpkt->pkt_statistics |= STAT_ABORTED;
3225		err_str = timeout_err;
3226		break;
3227
3228	case CMD_TRAN_ERR:
3229		/*
3230		 * This status is returned if the transport has sent the cmd
3231		 * down the link to the target and then some error occurs.
3232		 * In case of SCSI-II RESERVE cmd, we don't know if the
3233		 * reservation been accepted by the target or not, so we need
3234		 * to clear the reservation.
3235		 */
3236		if ((pkt->pkt_cdbp[0] == SCMD_RESERVE) ||
3237		    (pkt->pkt_cdbp[0] == SCMD_RESERVE_G1)) {
3238			VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_intr received"
3239			    " cmd_tran_err for scsi-2 reserve cmd\n"));
3240			if (!vhci_recovery_reset(vlun, &pkt->pkt_address,
3241			    TRUE, VHCI_DEPTH_TARGET)) {
3242				VHCI_DEBUG(1, (CE_WARN, NULL,
3243				    "!vhci_intr cmd_tran_err reset failed!"));
3244			}
3245		}
3246		break;
3247
3248	case CMD_DEV_GONE:
3249		tpkt->pkt_reason = CMD_CMPLT;
3250		tpkt->pkt_state = STATE_GOT_BUS |
3251		    STATE_GOT_TARGET | STATE_SENT_CMD |
3252		    STATE_GOT_STATUS;
3253		*(tpkt->pkt_scbp) = STATUS_BUSY;
3254		break;
3255
3256	default:
3257		break;
3258	}
3259
3260	/*
3261	 * SCSI-II RESERVE cmd has been serviced by the lower layers clear
3262	 * the flag so the lun is not QUIESCED any longer.
3263	 * Also clear the VHCI_PKT_THRU_TASKQ flag, to ensure that if this pkt
3264	 * is retried, a taskq shall again be dispatched to service it.  Else
3265	 * it may lead to a system hang if the retry is within interrupt
3266	 * context.
3267	 */
3268	if ((pkt->pkt_cdbp[0] == SCMD_RESERVE) ||
3269	    (pkt->pkt_cdbp[0] == SCMD_RESERVE_G1)) {
3270		vlun->svl_flags &= ~VLUN_QUIESCED_FLG;
3271		vpkt->vpkt_state &= ~VHCI_PKT_THRU_TASKQ;
3272	}
3273
3274	/*
3275	 * vpkt_org_vpkt should always be NULL here if the retry command
3276	 * has been successfully processed.  If vpkt_org_vpkt != NULL at
3277	 * this point, it is an error so restore the original vpkt and
3278	 * return an error to the target driver so it can retry the
3279	 * command as appropriate.
3280	 */
3281	if (vpkt->vpkt_org_vpkt != NULL) {
3282		struct vhci_pkt *new_vpkt = vpkt;
3283		vpkt = vpkt->vpkt_org_vpkt;
3284
3285		vhci_scsi_destroy_pkt(&svp->svp_psd->sd_address,
3286		    new_vpkt->vpkt_tgt_pkt);
3287
3288		/*
3289		 * Mark this command completion as having an error so that
3290		 * ssd will retry the command.
3291		 */
3292		vpkt->vpkt_tgt_pkt->pkt_reason = CMD_ABORTED;
3293		vpkt->vpkt_tgt_pkt->pkt_statistics |= STAT_ABORTED;
3294
3295		pkt = vpkt->vpkt_hba_pkt;
3296		tpkt = vpkt->vpkt_tgt_pkt;
3297	}
3298
3299	if ((err_str != NULL) && (pkt->pkt_reason !=
3300	    svp->svp_last_pkt_reason)) {
3301		cdip = vlun->svl_dip;
3302		pdip = mdi_pi_get_phci(vpkt->vpkt_path);
3303		vdip = ddi_get_parent(cdip);
3304		cpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3305		dpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3306		vhci_log(CE_WARN, vdip, "!%s (%s%d): %s on path %s (%s%d)",
3307		    ddi_pathname(cdip, cpath), ddi_driver_name(cdip),
3308		    ddi_get_instance(cdip), err_str,
3309		    ddi_pathname(pdip, dpath), ddi_driver_name(pdip),
3310		    ddi_get_instance(pdip));
3311		kmem_free(cpath, MAXPATHLEN);
3312		kmem_free(dpath, MAXPATHLEN);
3313	}
3314	svp->svp_last_pkt_reason = pkt->pkt_reason;
3315	VHCI_DECR_PATH_CMDCOUNT(svp);
3316
3317	/*
3318	 * For PARTIAL_DMA, vhci should not free the path.
3319	 * Target driver will call into vhci_scsi_dmafree or
3320	 * destroy pkt to release this path.
3321	 */
3322	if ((vpkt->vpkt_flags & CFLAG_DMA_PARTIAL) == 0) {
3323		scsi_destroy_pkt(pkt);
3324		vpkt->vpkt_hba_pkt = NULL;
3325		if (vpkt->vpkt_path) {
3326			mdi_rele_path(vpkt->vpkt_path);
3327			vpkt->vpkt_path = NULL;
3328		}
3329	}
3330
3331	if (tpkt->pkt_comp) {
3332		(*tpkt->pkt_comp)(tpkt);
3333	}
3334}
3335
3336/*
3337 * two possibilities: (1) failover has completed
3338 * or (2) is in progress; update our path states for
3339 * the former case; for the latter case,
3340 * initiate a scsi_watch request to
3341 * determine when failover completes - vlun is HELD
3342 * until failover completes; BUSY is returned to upper
3343 * layer in both the cases
3344 */
3345static int
3346vhci_handle_ext_fo(struct scsi_pkt *pkt, int fostat)
3347{
3348	struct vhci_pkt		*vpkt = (struct vhci_pkt *)pkt->pkt_private;
3349	struct scsi_pkt		*tpkt;
3350	scsi_vhci_priv_t	*svp;
3351	scsi_vhci_lun_t		*vlun;
3352	struct scsi_vhci	*vhci;
3353	scsi_vhci_swarg_t	*swarg;
3354	char			*path;
3355
3356	ASSERT(vpkt != NULL);
3357	tpkt = vpkt->vpkt_tgt_pkt;
3358	ASSERT(tpkt != NULL);
3359	svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(vpkt->vpkt_path);
3360	ASSERT(svp != NULL);
3361	vlun = svp->svp_svl;
3362	ASSERT(vlun != NULL);
3363	ASSERT(VHCI_LUN_IS_HELD(vlun));
3364
3365	vhci = ADDR2VHCI(&tpkt->pkt_address);
3366
3367	if (fostat == SCSI_SENSE_INACTIVE) {
3368		VHCI_DEBUG(1, (CE_NOTE, NULL, "!Failover "
3369		    "detected for %s; updating path states...\n",
3370		    vlun->svl_lun_wwn));
3371		/*
3372		 * set the vlun flag to indicate to the task that the target
3373		 * port group needs updating
3374		 */
3375		vlun->svl_flags |= VLUN_UPDATE_TPG;
3376		(void) taskq_dispatch(vhci->vhci_update_pathstates_taskq,
3377		    vhci_update_pathstates, (void *)vlun, KM_SLEEP);
3378	} else {
3379		path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3380		vhci_log(CE_NOTE, ddi_get_parent(vlun->svl_dip),
3381		    "!%s (%s%d): Waiting for externally initiated failover "
3382		    "to complete", ddi_pathname(vlun->svl_dip, path),
3383		    ddi_driver_name(vlun->svl_dip),
3384		    ddi_get_instance(vlun->svl_dip));
3385		kmem_free(path, MAXPATHLEN);
3386		swarg = kmem_alloc(sizeof (*swarg), KM_NOSLEEP);
3387		if (swarg == NULL) {
3388			VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_handle_ext_fo: "
3389			    "request packet allocation for %s failed....\n",
3390			    vlun->svl_lun_wwn));
3391			VHCI_RELEASE_LUN(vlun);
3392			return (PKT_RETURN);
3393		}
3394		swarg->svs_svp = svp;
3395		swarg->svs_tos = ddi_get_time();
3396		swarg->svs_pi = vpkt->vpkt_path;
3397		swarg->svs_release_lun = 0;
3398		swarg->svs_done = 0;
3399		/*
3400		 * place a hold on the path...we don't want it to
3401		 * vanish while scsi_watch is in progress
3402		 */
3403		mdi_hold_path(vpkt->vpkt_path);
3404		svp->svp_sw_token = scsi_watch_request_submit(svp->svp_psd,
3405		    VHCI_FOWATCH_INTERVAL, SENSE_LENGTH, vhci_efo_watch_cb,
3406		    (caddr_t)swarg);
3407	}
3408	return (BUSY_RETURN);
3409}
3410
3411/*
3412 * vhci_efo_watch_cb:
3413 *	Callback from scsi_watch request to check the failover status.
3414 *	Completion is either due to successful failover or timeout.
3415 *	Upon successful completion, vhci_update_path_states is called.
3416 *	For timeout condition, vhci_efo_done is called.
3417 *	Always returns 0 to scsi_watch to keep retrying till vhci_efo_done
3418 *	terminates this request properly in a separate thread.
3419 */
3420
3421static int
3422vhci_efo_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
3423{
3424	struct scsi_status		*statusp = resultp->statusp;
3425	struct scsi_extended_sense	*sensep = resultp->sensep;
3426	struct scsi_pkt			*pkt = resultp->pkt;
3427	scsi_vhci_swarg_t		*swarg;
3428	scsi_vhci_priv_t		*svp;
3429	scsi_vhci_lun_t			*vlun;
3430	struct scsi_vhci		*vhci;
3431	dev_info_t			*vdip;
3432	int				rval, updt_paths;
3433
3434	swarg = (scsi_vhci_swarg_t *)(uintptr_t)arg;
3435	svp = swarg->svs_svp;
3436	if (swarg->svs_done) {
3437		/*
3438		 * Already completed failover or timedout.
3439		 * Waiting for vhci_efo_done to terminate this scsi_watch.
3440		 */
3441		return (0);
3442	}
3443
3444	ASSERT(svp != NULL);
3445	vlun = svp->svp_svl;
3446	ASSERT(vlun != NULL);
3447	ASSERT(VHCI_LUN_IS_HELD(vlun));
3448	vlun->svl_efo_update_path = 0;
3449	vdip = ddi_get_parent(vlun->svl_dip);
3450	vhci = ddi_get_soft_state(vhci_softstate,
3451	    ddi_get_instance(vdip));
3452
3453	updt_paths = 0;
3454
3455	if (pkt->pkt_reason != CMD_CMPLT) {
3456		if ((ddi_get_time() - swarg->svs_tos) >= VHCI_EXTFO_TIMEOUT) {
3457			swarg->svs_release_lun = 1;
3458			goto done;
3459		}
3460		return (0);
3461	}
3462	if (*((unsigned char *)statusp) == STATUS_CHECK) {
3463		rval = (*(vlun->svl_fops->sfo_analyze_sense))
3464		    (svp->svp_psd, sensep, vlun->svl_fops_ctpriv);
3465		switch (rval) {
3466			/*
3467			 * Only update path states in case path is definitely
3468			 * inactive, or no failover occurred.  For all other
3469			 * check conditions continue pinging.  A unexpected
3470			 * check condition shouldn't cause pinging to complete
3471			 * prematurely.
3472			 */
3473			case SCSI_SENSE_INACTIVE:
3474			case SCSI_SENSE_NOFAILOVER:
3475				updt_paths = 1;
3476				break;
3477			default:
3478				if ((ddi_get_time() - swarg->svs_tos)
3479				    >= VHCI_EXTFO_TIMEOUT) {
3480					swarg->svs_release_lun = 1;
3481					goto done;
3482				}
3483				return (0);
3484		}
3485	} else if (*((unsigned char *)statusp) ==
3486	    STATUS_RESERVATION_CONFLICT) {
3487		updt_paths = 1;
3488	} else if ((*((unsigned char *)statusp)) &
3489	    (STATUS_BUSY | STATUS_QFULL)) {
3490		return (0);
3491	}
3492	if ((*((unsigned char *)statusp) == STATUS_GOOD) ||
3493	    (updt_paths == 1)) {
3494		/*
3495		 * we got here because we had detected an
3496		 * externally initiated failover; things
3497		 * have settled down now, so let's
3498		 * start up a task to update the
3499		 * path states and target port group
3500		 */
3501		vlun->svl_efo_update_path = 1;
3502		swarg->svs_done = 1;
3503		vlun->svl_swarg = swarg;
3504		vlun->svl_flags |= VLUN_UPDATE_TPG;
3505		(void) taskq_dispatch(vhci->vhci_update_pathstates_taskq,
3506		    vhci_update_pathstates, (void *)vlun,
3507		    KM_SLEEP);
3508		return (0);
3509	}
3510	if ((ddi_get_time() - swarg->svs_tos) >= VHCI_EXTFO_TIMEOUT) {
3511		swarg->svs_release_lun = 1;
3512		goto done;
3513	}
3514	return (0);
3515done:
3516	swarg->svs_done = 1;
3517	(void) taskq_dispatch(vhci->vhci_taskq,
3518	    vhci_efo_done, (void *)swarg, KM_SLEEP);
3519	return (0);
3520}
3521
3522/*
3523 * vhci_efo_done:
3524 *	cleanly terminates scsi_watch and free up resources.
3525 *	Called as taskq function in vhci_efo_watch_cb for EFO timeout condition
3526 *	or by vhci_update_path_states invoked during external initiated
3527 *	failover completion.
3528 */
3529static void
3530vhci_efo_done(void *arg)
3531{
3532	scsi_vhci_lun_t			*vlun;
3533	scsi_vhci_swarg_t		*swarg = (scsi_vhci_swarg_t *)arg;
3534	scsi_vhci_priv_t		*svp = swarg->svs_svp;
3535	ASSERT(svp);
3536
3537	vlun = svp->svp_svl;
3538	ASSERT(vlun);
3539
3540	/* Wait for clean termination of scsi_watch */
3541	(void) scsi_watch_request_terminate(svp->svp_sw_token,
3542	    SCSI_WATCH_TERMINATE_WAIT);
3543	svp->svp_sw_token = NULL;
3544
3545	/* release path and freeup resources to indicate failover completion */
3546	mdi_rele_path(swarg->svs_pi);
3547	if (swarg->svs_release_lun) {
3548		VHCI_RELEASE_LUN(vlun);
3549	}
3550	kmem_free((void *)swarg, sizeof (*swarg));
3551}
3552
3553/*
3554 * Update the path states
3555 * vlun should be HELD when this is invoked.
3556 * Calls vhci_efo_done to cleanup resources allocated for EFO.
3557 */
3558void
3559vhci_update_pathstates(void *arg)
3560{
3561	mdi_pathinfo_t			*pip, *npip;
3562	dev_info_t			*dip, *pdip;
3563	struct scsi_failover_ops	*fo;
3564	struct scsi_vhci_priv		*svp;
3565	struct scsi_device		*psd;
3566	struct scsi_path_opinfo		opinfo;
3567	char				*pclass, *tptr;
3568	struct scsi_vhci_lun		*vlun = (struct scsi_vhci_lun *)arg;
3569	int				sps; /* mdi_select_path() status */
3570	char				*cpath, *dpath;
3571	struct scsi_vhci		*vhci;
3572	struct scsi_pkt			*pkt;
3573	struct buf			*bp;
3574	int				reserve_conflict = 0;
3575
3576	ASSERT(VHCI_LUN_IS_HELD(vlun));
3577	dip  = vlun->svl_dip;
3578	pip = npip = NULL;
3579
3580	vhci = ddi_get_soft_state(vhci_softstate,
3581	    ddi_get_instance(ddi_get_parent(dip)));
3582
3583	sps = mdi_select_path(dip, NULL, (MDI_SELECT_ONLINE_PATH |
3584	    MDI_SELECT_STANDBY_PATH), NULL, &npip);
3585	if ((npip == NULL) || (sps != MDI_SUCCESS)) {
3586		goto done;
3587	}
3588
3589	fo = vlun->svl_fops;
3590	do {
3591		pip = npip;
3592		svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(pip);
3593		psd = svp->svp_psd;
3594		if ((*fo->sfo_path_get_opinfo)(psd, &opinfo,
3595		    vlun->svl_fops_ctpriv) != 0) {
3596			sps = mdi_select_path(dip, NULL,
3597			    (MDI_SELECT_ONLINE_PATH | MDI_SELECT_STANDBY_PATH),
3598			    pip, &npip);
3599			mdi_rele_path(pip);
3600			continue;
3601		}
3602
3603		if (mdi_prop_lookup_string(pip, "path-class", &pclass) !=
3604		    MDI_SUCCESS) {
3605			VHCI_DEBUG(1, (CE_NOTE, NULL,
3606			    "!vhci_update_pathstates: prop lookup failed for "
3607			    "path 0x%p\n", (void *)pip));
3608			sps = mdi_select_path(dip, NULL,
3609			    (MDI_SELECT_ONLINE_PATH | MDI_SELECT_STANDBY_PATH),
3610			    pip, &npip);
3611			mdi_rele_path(pip);
3612			continue;
3613		}
3614
3615		/*
3616		 * Need to update the "path-class" property
3617		 * value in the device tree if different
3618		 * from the existing value.
3619		 */
3620		if (strcmp(pclass, opinfo.opinfo_path_attr) != 0) {
3621			(void) mdi_prop_update_string(pip, "path-class",
3622			    opinfo.opinfo_path_attr);
3623		}
3624
3625		/*
3626		 * Only change the state if needed. i.e. Don't call
3627		 * mdi_pi_set_state to ONLINE a path if its already
3628		 * ONLINE. Same for STANDBY paths.
3629		 */
3630
3631		if ((opinfo.opinfo_path_state == SCSI_PATH_ACTIVE ||
3632		    opinfo.opinfo_path_state == SCSI_PATH_ACTIVE_NONOPT)) {
3633			if (!(MDI_PI_IS_ONLINE(pip))) {
3634				VHCI_DEBUG(1, (CE_NOTE, NULL,
3635				    "!vhci_update_pathstates: marking path"
3636				    " 0x%p as ONLINE\n", (void *)pip));
3637				pdip = mdi_pi_get_phci(pip);
3638				cpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3639				dpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3640				vhci_log(CE_NOTE, ddi_get_parent(dip), "!%s"
3641				    " (%s%d): path %s (%s%d) target address %s"
3642				    " is now ONLINE because of"
3643				    " an externally initiated failover",
3644				    ddi_pathname(dip, cpath),
3645				    ddi_driver_name(dip),
3646				    ddi_get_instance(dip),
3647				    ddi_pathname(pdip, dpath),
3648				    ddi_driver_name(pdip),
3649				    ddi_get_instance(pdip),
3650				    mdi_pi_get_addr(pip));
3651				kmem_free(cpath, MAXPATHLEN);
3652				kmem_free(dpath, MAXPATHLEN);
3653				mdi_pi_set_state(pip,
3654				    MDI_PATHINFO_STATE_ONLINE);
3655				mdi_pi_set_preferred(pip,
3656				    opinfo.opinfo_preferred);
3657				tptr = kmem_alloc(strlen
3658				    (opinfo.opinfo_path_attr)+1, KM_SLEEP);
3659				(void) strlcpy(tptr, opinfo.opinfo_path_attr,
3660				    (strlen(opinfo.opinfo_path_attr)+1));
3661				mutex_enter(&vlun->svl_mutex);
3662				if (vlun->svl_active_pclass != NULL) {
3663					kmem_free(vlun->svl_active_pclass,
3664					    strlen(vlun->svl_active_pclass)+1);
3665				}
3666				vlun->svl_active_pclass = tptr;
3667				if (vlun->svl_waiting_for_activepath) {
3668					vlun->svl_waiting_for_activepath = 0;
3669				}
3670				mutex_exit(&vlun->svl_mutex);
3671				/* Check for Reservation Conflict */
3672				bp = scsi_alloc_consistent_buf(
3673				    &svp->svp_psd->sd_address,
3674				    (struct buf *)NULL, DEV_BSIZE, B_READ,
3675				    NULL, NULL);
3676				if (!bp) {
3677					VHCI_DEBUG(1, (CE_NOTE, NULL,
3678					    "vhci_update_pathstates: "
3679					    "!No resources (buf)\n"));
3680					mdi_rele_path(pip);
3681					goto done;
3682				}
3683				pkt = scsi_init_pkt(&svp->svp_psd->sd_address,
3684				    NULL, bp, CDB_GROUP1,
3685				    sizeof (struct scsi_arq_status), 0,
3686				    PKT_CONSISTENT, NULL, NULL);
3687				if (pkt) {
3688					(void) scsi_setup_cdb((union scsi_cdb *)
3689					    (uintptr_t)pkt->pkt_cdbp,
3690					    SCMD_READ, 1, 1, 0);
3691					pkt->pkt_time = 3*30;
3692					pkt->pkt_flags = FLAG_NOINTR;
3693					if ((scsi_transport(pkt) ==
3694					    TRAN_ACCEPT) && (pkt->pkt_reason
3695					    == CMD_CMPLT) && (SCBP_C(pkt) ==
3696					    STATUS_RESERVATION_CONFLICT)) {
3697						reserve_conflict = 1;
3698					}
3699					scsi_destroy_pkt(pkt);
3700				}
3701				scsi_free_consistent_buf(bp);
3702			} else if (MDI_PI_IS_ONLINE(pip)) {
3703				if (strcmp(pclass, opinfo.opinfo_path_attr)
3704				    != 0) {
3705					mdi_pi_set_preferred(pip,
3706					    opinfo.opinfo_preferred);
3707					mutex_enter(&vlun->svl_mutex);
3708					if (vlun->svl_active_pclass == NULL ||
3709					    strcmp(opinfo.opinfo_path_attr,
3710					    vlun->svl_active_pclass) != 0) {
3711						mutex_exit(&vlun->svl_mutex);
3712						tptr = kmem_alloc(strlen
3713						    (opinfo.opinfo_path_attr)+1,
3714						    KM_SLEEP);
3715						(void) strlcpy(tptr,
3716						    opinfo.opinfo_path_attr,
3717						    (strlen
3718						    (opinfo.opinfo_path_attr)
3719						    +1));
3720						mutex_enter(&vlun->svl_mutex);
3721					} else {
3722						/*
3723						 * No need to update
3724						 * svl_active_pclass
3725						 */
3726						tptr = NULL;
3727						mutex_exit(&vlun->svl_mutex);
3728					}
3729					if (tptr) {
3730						if (vlun->svl_active_pclass
3731						    != NULL) {
3732							kmem_free(vlun->
3733							    svl_active_pclass,
3734							    strlen(vlun->
3735							    svl_active_pclass)
3736							    +1);
3737						}
3738						vlun->svl_active_pclass = tptr;
3739						mutex_exit(&vlun->svl_mutex);
3740					}
3741				}
3742			}
3743		} else if ((opinfo.opinfo_path_state == SCSI_PATH_INACTIVE) &&
3744		    !(MDI_PI_IS_STANDBY(pip))) {
3745			VHCI_DEBUG(1, (CE_NOTE, NULL,
3746			    "!vhci_update_pathstates: marking path"
3747			    " 0x%p as STANDBY\n", (void *)pip));
3748			pdip = mdi_pi_get_phci(pip);
3749			cpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3750			dpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3751			vhci_log(CE_NOTE, ddi_get_parent(dip), "!%s"
3752			    " (%s%d): path %s (%s%d) target address %s"
3753			    " is now STANDBY because of"
3754			    " an externally initiated failover",
3755			    ddi_pathname(dip, cpath),
3756			    ddi_driver_name(dip),
3757			    ddi_get_instance(dip),
3758			    ddi_pathname(pdip, dpath),
3759			    ddi_driver_name(pdip),
3760			    ddi_get_instance(pdip),
3761			    mdi_pi_get_addr(pip));
3762			kmem_free(cpath, MAXPATHLEN);
3763			kmem_free(dpath, MAXPATHLEN);
3764			mdi_pi_set_state(pip,
3765			    MDI_PATHINFO_STATE_STANDBY);
3766			mdi_pi_set_preferred(pip,
3767			    opinfo.opinfo_preferred);
3768			mutex_enter(&vlun->svl_mutex);
3769			if (vlun->svl_active_pclass != NULL) {
3770				if (strcmp(vlun->svl_active_pclass,
3771				    opinfo.opinfo_path_attr) == 0) {
3772					kmem_free(vlun->
3773					    svl_active_pclass,
3774					    strlen(vlun->
3775					    svl_active_pclass)+1);
3776					vlun->svl_active_pclass = NULL;
3777				}
3778			}
3779			mutex_exit(&vlun->svl_mutex);
3780		}
3781		(void) mdi_prop_free(pclass);
3782		sps = mdi_select_path(dip, NULL,
3783		    (MDI_SELECT_ONLINE_PATH | MDI_SELECT_STANDBY_PATH),
3784		    pip, &npip);
3785		mdi_rele_path(pip);
3786
3787	} while ((npip != NULL) && (sps == MDI_SUCCESS));
3788
3789	/*
3790	 * Check to see if this vlun has an active SCSI-II RESERVE.  If so
3791	 * clear the reservation by sending a reset, so the host doesn't
3792	 * receive a reservation conflict.
3793	 * Reset VLUN_RESERVE_ACTIVE_FLG for this vlun. Also notify ssd
3794	 * of the reset, explicitly.
3795	 */
3796	if (vlun->svl_flags & VLUN_RESERVE_ACTIVE_FLG) {
3797		if (reserve_conflict && (vlun->svl_xlf_capable == 0)) {
3798			(void) vhci_recovery_reset(vlun,
3799			    &svp->svp_psd->sd_address, FALSE,
3800			    VHCI_DEPTH_TARGET);
3801		}
3802		vlun->svl_flags &= ~VLUN_RESERVE_ACTIVE_FLG;
3803		mutex_enter(&vhci->vhci_mutex);
3804		scsi_hba_reset_notify_callback(&vhci->vhci_mutex,
3805		    &vhci->vhci_reset_notify_listf);
3806		mutex_exit(&vhci->vhci_mutex);
3807	}
3808	if (vlun->svl_flags & VLUN_UPDATE_TPG) {
3809		/*
3810		 * Update the AccessState of related MP-API TPGs
3811		 */
3812		(void) vhci_mpapi_update_tpg_acc_state_for_lu(vhci, vlun);
3813		vlun->svl_flags &= ~VLUN_UPDATE_TPG;
3814	}
3815done:
3816	if (vlun->svl_efo_update_path) {
3817		vlun->svl_efo_update_path = 0;
3818		vhci_efo_done(vlun->svl_swarg);
3819		vlun->svl_swarg = 0;
3820	}
3821	VHCI_RELEASE_LUN(vlun);
3822}
3823
3824/* ARGSUSED */
3825static int
3826vhci_pathinfo_init(dev_info_t *vdip, mdi_pathinfo_t *pip, int flags)
3827{
3828	scsi_hba_tran_t		*hba = NULL;
3829	struct scsi_device	*psd = NULL;
3830	scsi_vhci_lun_t		*vlun = NULL;
3831	dev_info_t		*pdip = NULL;
3832	dev_info_t		*tgt_dip;
3833	struct scsi_vhci	*vhci;
3834	char			*guid;
3835	scsi_vhci_priv_t	*svp = NULL;
3836	int			rval = MDI_FAILURE;
3837	int			vlun_alloced = 0;
3838
3839	ASSERT(vdip != NULL);
3840	ASSERT(pip != NULL);
3841
3842	vhci = ddi_get_soft_state(vhci_softstate, ddi_get_instance(vdip));
3843	ASSERT(vhci != NULL);
3844
3845	pdip = mdi_pi_get_phci(pip);
3846	ASSERT(pdip != NULL);
3847
3848	hba = ddi_get_driver_private(pdip);
3849	ASSERT(hba != NULL);
3850
3851	tgt_dip = mdi_pi_get_client(pip);
3852	ASSERT(tgt_dip != NULL);
3853
3854	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, tgt_dip, PROPFLAGS,
3855	    MDI_CLIENT_GUID_PROP, &guid) != DDI_SUCCESS) {
3856		VHCI_DEBUG(1, (CE_WARN, NULL,
3857		    "vhci_pathinfo_init: lun guid property failed"));
3858		goto failure;
3859	}
3860
3861	vlun = vhci_lun_lookup_alloc(tgt_dip, guid, &vlun_alloced);
3862	ddi_prop_free(guid);
3863
3864	vlun->svl_dip = tgt_dip;
3865
3866	svp = kmem_zalloc(sizeof (*svp), KM_SLEEP);
3867	svp->svp_svl = vlun;
3868
3869	vlun->svl_lb_policy_save = mdi_get_lb_policy(tgt_dip);
3870	mutex_init(&svp->svp_mutex, NULL, MUTEX_DRIVER, NULL);
3871	cv_init(&svp->svp_cv, NULL, CV_DRIVER, NULL);
3872
3873	psd = kmem_zalloc(sizeof (*psd), KM_SLEEP);
3874	mutex_init(&psd->sd_mutex, NULL, MUTEX_DRIVER, NULL);
3875
3876	/*
3877	 * Clone transport structure if requested, so
3878	 * Self enumerating HBAs always need to use cloning
3879	 */
3880
3881	if (hba->tran_hba_flags & SCSI_HBA_TRAN_CLONE) {
3882		scsi_hba_tran_t	*clone =
3883		    kmem_alloc(sizeof (scsi_hba_tran_t), KM_SLEEP);
3884		bcopy(hba, clone, sizeof (scsi_hba_tran_t));
3885		hba = clone;
3886		hba->tran_sd = psd;
3887	} else {
3888		ASSERT(hba->tran_sd == NULL);
3889	}
3890	psd->sd_dev = tgt_dip;
3891	psd->sd_address.a_hba_tran = hba;
3892	psd->sd_private = (caddr_t)pip;
3893	svp->svp_psd = psd;
3894	mdi_pi_set_vhci_private(pip, (caddr_t)svp);
3895
3896	/*
3897	 * call hba's target init entry point if it exists
3898	 */
3899	if (hba->tran_tgt_init != NULL) {
3900		if ((rval = (*hba->tran_tgt_init)(pdip, tgt_dip,
3901		    hba, psd)) != DDI_SUCCESS) {
3902			VHCI_DEBUG(1, (CE_WARN, pdip,
3903			    "!vhci_pathinfo_init: tran_tgt_init failed for "
3904			    "path=0x%p rval=%x", (void *)pip, rval));
3905			goto failure;
3906		}
3907	}
3908
3909	svp->svp_new_path = 1;
3910
3911	psd->sd_inq = NULL;
3912
3913	VHCI_DEBUG(4, (CE_NOTE, NULL, "!vhci_pathinfo_init: path:%p\n",
3914	    (void *)pip));
3915	return (MDI_SUCCESS);
3916
3917failure:
3918	if (psd) {
3919		mutex_destroy(&psd->sd_mutex);
3920		kmem_free(psd, sizeof (*psd));
3921	}
3922	if (svp) {
3923		mdi_pi_set_vhci_private(pip, NULL);
3924		mutex_destroy(&svp->svp_mutex);
3925		cv_destroy(&svp->svp_cv);
3926		kmem_free(svp, sizeof (*svp));
3927	}
3928	if (hba && hba->tran_hba_flags & SCSI_HBA_TRAN_CLONE)
3929		kmem_free(hba, sizeof (scsi_hba_tran_t));
3930
3931	if (vlun_alloced)
3932		vhci_lun_free(tgt_dip);
3933
3934	return (rval);
3935}
3936
3937/* ARGSUSED */
3938static int
3939vhci_pathinfo_uninit(dev_info_t *vdip, mdi_pathinfo_t *pip, int flags)
3940{
3941	scsi_hba_tran_t		*hba = NULL;
3942	struct scsi_device	*psd = NULL;
3943	dev_info_t		*pdip = NULL;
3944	dev_info_t		*cdip = NULL;
3945	scsi_vhci_priv_t	*svp = NULL;
3946
3947	ASSERT(vdip != NULL);
3948	ASSERT(pip != NULL);
3949
3950	pdip = mdi_pi_get_phci(pip);
3951	ASSERT(pdip != NULL);
3952
3953	cdip = mdi_pi_get_client(pip);
3954	ASSERT(cdip != NULL);
3955
3956	hba = ddi_get_driver_private(pdip);
3957	ASSERT(hba != NULL);
3958
3959	vhci_mpapi_set_path_state(vdip, pip, MP_DRVR_PATH_STATE_REMOVED);
3960	svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(pip);
3961	if (svp == NULL) {
3962		/* path already freed. Nothing to do. */
3963		return (MDI_SUCCESS);
3964	}
3965
3966	psd = svp->svp_psd;
3967	ASSERT(psd != NULL);
3968
3969	if (hba->tran_hba_flags & SCSI_HBA_TRAN_CLONE) {
3970		hba = psd->sd_address.a_hba_tran;
3971		ASSERT(hba->tran_hba_flags & SCSI_HBA_TRAN_CLONE);
3972		ASSERT(hba->tran_sd == psd);
3973	} else {
3974		ASSERT(hba->tran_sd == NULL);
3975	}
3976
3977	if (hba->tran_tgt_free != NULL) {
3978		(*hba->tran_tgt_free) (pdip, cdip, hba, psd);
3979	}
3980	mutex_destroy(&psd->sd_mutex);
3981	if (hba->tran_hba_flags & SCSI_HBA_TRAN_CLONE) {
3982		kmem_free(hba, sizeof (*hba));
3983	}
3984
3985	mdi_pi_set_vhci_private(pip, NULL);
3986	kmem_free((caddr_t)psd, sizeof (*psd));
3987
3988	mutex_destroy(&svp->svp_mutex);
3989	cv_destroy(&svp->svp_cv);
3990	kmem_free((caddr_t)svp, sizeof (*svp));
3991
3992	/*
3993	 * If this is the last path to the client,
3994	 * then free up the vlun as well.
3995	 */
3996	if (mdi_client_get_path_count(cdip) == 1) {
3997		vhci_lun_free(cdip);
3998	}
3999
4000	VHCI_DEBUG(4, (CE_NOTE, NULL, "!vhci_pathinfo_uninit: path=0x%p\n",
4001	    (void *)pip));
4002	return (MDI_SUCCESS);
4003}
4004
4005/* ARGSUSED */
4006static int
4007vhci_pathinfo_state_change(dev_info_t *vdip, mdi_pathinfo_t *pip,
4008    mdi_pathinfo_state_t state, uint32_t ext_state, int flags)
4009{
4010	int			rval = MDI_SUCCESS;
4011	scsi_vhci_priv_t	*svp;
4012	scsi_vhci_lun_t		*vlun;
4013	int			held;
4014	int			op = (flags & 0xf00) >> 8;
4015	struct scsi_vhci	*vhci;
4016
4017	vhci = ddi_get_soft_state(vhci_softstate, ddi_get_instance(vdip));
4018
4019	if (flags & MDI_EXT_STATE_CHANGE) {
4020		/*
4021		 * We do not want to issue any commands down the path in case
4022		 * sync flag is set. Lower layers might not be ready to accept
4023		 * any I/O commands.
4024		 */
4025		if (op == DRIVER_DISABLE)
4026			return (MDI_SUCCESS);
4027
4028		svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(pip);
4029		if (svp == NULL) {
4030			return (MDI_FAILURE);
4031		}
4032		vlun = svp->svp_svl;
4033
4034		if (flags & MDI_BEFORE_STATE_CHANGE) {
4035			/*
4036			 * Hold the LUN.
4037			 */
4038			VHCI_HOLD_LUN(vlun, VH_SLEEP, held);
4039			if (flags & MDI_DISABLE_OP)  {
4040				/*
4041				 * Issue scsi reset if it happens to be
4042				 * reserved path.
4043				 */
4044				if (vlun->svl_flags & VLUN_RESERVE_ACTIVE_FLG) {
4045					/*
4046					 * if reservation pending on
4047					 * this path, dont' mark the
4048					 * path busy
4049					 */
4050					if (op == DRIVER_DISABLE_TRANSIENT) {
4051						VHCI_DEBUG(1, (CE_NOTE, NULL,
4052						    "!vhci_pathinfo"
4053						    "_state_change (pip:%p): "
4054						    " reservation: fail busy\n",
4055						    (void *)pip));
4056						return (MDI_FAILURE);
4057					}
4058					if (pip == vlun->svl_resrv_pip) {
4059						if (vhci_recovery_reset(
4060						    svp->svp_svl,
4061						    &svp->svp_psd->sd_address,
4062						    TRUE,
4063						    VHCI_DEPTH_TARGET) == 0) {
4064							VHCI_DEBUG(1,
4065							    (CE_NOTE, NULL,
4066							    "!vhci_pathinfo"
4067							    "_state_change "
4068							    " (pip:%p): "
4069							    "reset failed, "
4070							    "give up!\n",
4071							    (void *)pip));
4072						}
4073						vlun->svl_flags &=
4074						    ~VLUN_RESERVE_ACTIVE_FLG;
4075					}
4076				}
4077			} else if (flags & MDI_ENABLE_OP)  {
4078				if (((vhci->vhci_conf_flags &
4079				    VHCI_CONF_FLAGS_AUTO_FAILBACK) ==
4080				    VHCI_CONF_FLAGS_AUTO_FAILBACK) &&
4081				    MDI_PI_IS_USER_DISABLE(pip) &&
4082				    MDI_PI_IS_STANDBY(pip)) {
4083					struct scsi_failover_ops	*fo;
4084					char *best_pclass, *pclass = NULL;
4085					int  best_class, rv;
4086					/*
4087					 * Failback if enabling a standby path
4088					 * and it is the primary class or
4089					 * preferred class
4090					 */
4091					best_class = mdi_pi_get_preferred(pip);
4092					if (best_class == 0) {
4093						/*
4094						 * if not preferred - compare
4095						 * path-class with class
4096						 */
4097						fo = vlun->svl_fops;
4098						(*fo->sfo_pathclass_next)(NULL,
4099						    &best_pclass,
4100						    vlun->svl_fops_ctpriv);
4101						pclass = NULL;
4102						rv = mdi_prop_lookup_string(pip,
4103						    "path-class", &pclass);
4104						if (rv != MDI_SUCCESS ||
4105						    pclass == NULL) {
4106							vhci_log(CE_NOTE, vdip,
4107							    "!path-class "
4108							    " lookup "
4109							    "failed. rv: %d"
4110							    "class: %p", rv,
4111							    (void *)pclass);
4112						} else if (strncmp(pclass,
4113						    best_pclass,
4114						    strlen(best_pclass)) == 0) {
4115							best_class = 1;
4116						}
4117						if (rv == MDI_SUCCESS &&
4118						    pclass != NULL) {
4119							rv = mdi_prop_free(
4120							    pclass);
4121							if (rv !=
4122							    DDI_PROP_SUCCESS) {
4123								vhci_log(
4124								    CE_NOTE,
4125								    vdip,
4126								    "!path-"
4127								    "class"
4128								    " free"
4129								    " failed"
4130								    " rv: %d"
4131								    " class: "
4132								    "%p",
4133								    rv,
4134								    (void *)
4135								    pclass);
4136							}
4137						}
4138					}
4139					if (best_class == 1) {
4140						VHCI_DEBUG(1, (CE_NOTE, NULL,
4141						    "preferred path: %p "
4142						    "USER_DISABLE->USER_ENABLE "
4143						    "transition for lun %s\n",
4144						    (void *)pip,
4145						    vlun->svl_lun_wwn));
4146						(void) taskq_dispatch(
4147						    vhci->vhci_taskq,
4148						    vhci_initiate_auto_failback,
4149						    (void *) vlun, KM_SLEEP);
4150					}
4151				}
4152				/*
4153				 * if PGR is active, revalidate key and
4154				 * register on this path also, if key is
4155				 * still valid
4156				 */
4157				sema_p(&vlun->svl_pgr_sema);
4158				if (vlun->svl_pgr_active)
4159					(void)
4160					    vhci_pgr_validate_and_register(svp);
4161				sema_v(&vlun->svl_pgr_sema);
4162				/*
4163				 * Inform target driver about any
4164				 * reservations to be reinstated if target
4165				 * has dropped reservation during the busy
4166				 * period.
4167				 */
4168				mutex_enter(&vhci->vhci_mutex);
4169				scsi_hba_reset_notify_callback(
4170				    &vhci->vhci_mutex,
4171				    &vhci->vhci_reset_notify_listf);
4172				mutex_exit(&vhci->vhci_mutex);
4173			}
4174		}
4175		if (flags & MDI_AFTER_STATE_CHANGE) {
4176			if (flags & MDI_ENABLE_OP)  {
4177				mutex_enter(&vhci_global_mutex);
4178				cv_broadcast(&vhci_cv);
4179				mutex_exit(&vhci_global_mutex);
4180			}
4181			if (vlun->svl_setcap_done) {
4182				(void) vhci_pHCI_cap(&svp->svp_psd->sd_address,
4183				    "sector-size", vlun->svl_sector_size,
4184				    1, pip);
4185			}
4186
4187			/*
4188			 * Release the LUN
4189			 */
4190			VHCI_RELEASE_LUN(vlun);
4191
4192			/*
4193			 * Path transition is complete.
4194			 * Run callback to indicate target driver to
4195			 * retry to prevent IO starvation.
4196			 */
4197			if (scsi_callback_id != 0) {
4198				ddi_run_callback(&scsi_callback_id);
4199			}
4200		}
4201	} else {
4202		switch (state) {
4203		case MDI_PATHINFO_STATE_ONLINE:
4204			rval = vhci_pathinfo_online(vdip, pip, flags);
4205			break;
4206
4207		case MDI_PATHINFO_STATE_OFFLINE:
4208			rval = vhci_pathinfo_offline(vdip, pip, flags);
4209			break;
4210
4211		default:
4212			break;
4213		}
4214		/*
4215		 * Path transition is complete.
4216		 * Run callback to indicate target driver to
4217		 * retry to prevent IO starvation.
4218		 */
4219		if ((rval == MDI_SUCCESS) && (scsi_callback_id != 0)) {
4220			ddi_run_callback(&scsi_callback_id);
4221		}
4222		return (rval);
4223	}
4224
4225	return (MDI_SUCCESS);
4226}
4227
4228/*
4229 * Parse the mpxio load balancing options. The datanameptr
4230 * will point to a string containing the load-balance-options value.
4231 * The load-balance-options value will be a property that
4232 * defines the load-balance algorithm and any arguments to that
4233 * algorithm.
4234 * For example:
4235 * device-type-mpxio-options-list=
4236 * "device-type=SUN    SENA", "load-balance-options=logical-block-options"
4237 * "device-type=SUN     SE6920", "round-robin-options";
4238 * logical-block-options="load-balance=logical-block", "region-size=15";
4239 * round-robin-options="load-balance=round-robin";
4240 *
4241 * If the load-balance is not defined the load balance algorithm will
4242 * default to the global setting. There will be default values assigned
4243 * to the arguments (region-size=18) and if an argument is one
4244 * that is not known, it will be ignored.
4245 */
4246static void
4247vhci_parse_mpxio_lb_options(dev_info_t *dip, dev_info_t *cdip,
4248	caddr_t datanameptr)
4249{
4250	char			*dataptr, *next_entry;
4251	caddr_t			config_list	= NULL;
4252	int			config_list_len = 0, list_len = 0;
4253	int			region_size = -1;
4254	client_lb_t		load_balance;
4255
4256	if (ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, datanameptr,
4257	    (caddr_t)&config_list, &config_list_len) != DDI_PROP_SUCCESS) {
4258		return;
4259	}
4260
4261	list_len = config_list_len;
4262	next_entry = config_list;
4263	while (config_list_len > 0) {
4264		dataptr = next_entry;
4265
4266		if (strncmp(mdi_load_balance, dataptr,
4267		    strlen(mdi_load_balance)) == 0) {
4268			/* get the load-balance scheme */
4269			dataptr += strlen(mdi_load_balance) + 1;
4270			if (strcmp(dataptr, LOAD_BALANCE_PROP_RR) == 0) {
4271				(void) mdi_set_lb_policy(cdip, LOAD_BALANCE_RR);
4272				load_balance = LOAD_BALANCE_RR;
4273			} else if (strcmp(dataptr,
4274			    LOAD_BALANCE_PROP_LBA) == 0) {
4275				(void) mdi_set_lb_policy(cdip,
4276				    LOAD_BALANCE_LBA);
4277				load_balance = LOAD_BALANCE_LBA;
4278			} else if (strcmp(dataptr,
4279			    LOAD_BALANCE_PROP_NONE) == 0) {
4280				(void) mdi_set_lb_policy(cdip,
4281				    LOAD_BALANCE_NONE);
4282				load_balance = LOAD_BALANCE_NONE;
4283			}
4284		} else if (strncmp(dataptr, LOGICAL_BLOCK_REGION_SIZE,
4285		    strlen(LOGICAL_BLOCK_REGION_SIZE)) == 0) {
4286			int	i = 0;
4287			char	*ptr;
4288			char	*tmp;
4289
4290			tmp = dataptr + (strlen(LOGICAL_BLOCK_REGION_SIZE) + 1);
4291			/* check for numeric value */
4292			for (ptr = tmp; i < strlen(tmp); i++, ptr++) {
4293				if (!isdigit(*ptr)) {
4294					cmn_err(CE_WARN,
4295					    "Illegal region size: %s."
4296					    " Setting to default value: %d",
4297					    tmp,
4298					    LOAD_BALANCE_DEFAULT_REGION_SIZE);
4299					region_size =
4300					    LOAD_BALANCE_DEFAULT_REGION_SIZE;
4301					break;
4302				}
4303			}
4304			if (i >= strlen(tmp)) {
4305				region_size = stoi(&tmp);
4306			}
4307			(void) mdi_set_lb_region_size(cdip, region_size);
4308		}
4309		config_list_len -= (strlen(next_entry) + 1);
4310		next_entry += strlen(next_entry) + 1;
4311	}
4312#ifdef DEBUG
4313	if ((region_size >= 0) && (load_balance != LOAD_BALANCE_LBA)) {
4314		VHCI_DEBUG(1, (CE_NOTE, dip,
4315		    "!vhci_parse_mpxio_lb_options: region-size: %d"
4316		    "only valid for load-balance=logical-block\n",
4317		    region_size));
4318	}
4319#endif
4320	if ((region_size == -1) && (load_balance == LOAD_BALANCE_LBA)) {
4321		VHCI_DEBUG(1, (CE_NOTE, dip,
4322		    "!vhci_parse_mpxio_lb_options: No region-size"
4323		    " defined load-balance=logical-block."
4324		    " Default to: %d\n", LOAD_BALANCE_DEFAULT_REGION_SIZE));
4325		(void) mdi_set_lb_region_size(cdip,
4326		    LOAD_BALANCE_DEFAULT_REGION_SIZE);
4327	}
4328	if (list_len > 0) {
4329		kmem_free(config_list, list_len);
4330	}
4331}
4332
4333/*
4334 * Parse the device-type-mpxio-options-list looking for the key of
4335 * "load-balance-options". If found, parse the load balancing options.
4336 * Check the comment of the vhci_get_device_type_mpxio_options()
4337 * for the device-type-mpxio-options-list.
4338 */
4339static void
4340vhci_parse_mpxio_options(dev_info_t *dip, dev_info_t *cdip,
4341		caddr_t datanameptr, int list_len)
4342{
4343	char		*dataptr;
4344	int		len;
4345
4346	/*
4347	 * get the data list
4348	 */
4349	dataptr = datanameptr;
4350	len = 0;
4351	while (len < list_len &&
4352	    strncmp(dataptr, DEVICE_TYPE_STR, strlen(DEVICE_TYPE_STR))
4353	    != 0) {
4354		if (strncmp(dataptr, LOAD_BALANCE_OPTIONS,
4355		    strlen(LOAD_BALANCE_OPTIONS)) == 0) {
4356			len += strlen(LOAD_BALANCE_OPTIONS) + 1;
4357			dataptr += strlen(LOAD_BALANCE_OPTIONS) + 1;
4358			vhci_parse_mpxio_lb_options(dip, cdip, dataptr);
4359		}
4360		len += strlen(dataptr) + 1;
4361		dataptr += strlen(dataptr) + 1;
4362	}
4363}
4364
4365/*
4366 * Check the inquriy string returned from the device wiith the device-type
4367 * Check for the existence of the device-type-mpxio-options-list and
4368 * if found parse the list checking for a match with the device-type
4369 * value and the inquiry string returned from the device. If a match
4370 * is found, parse the mpxio options list. The format of the
4371 * device-type-mpxio-options-list is:
4372 * device-type-mpxio-options-list=
4373 * "device-type=SUN    SENA", "load-balance-options=logical-block-options"
4374 * "device-type=SUN     SE6920", "round-robin-options";
4375 * logical-block-options="load-balance=logical-block", "region-size=15";
4376 * round-robin-options="load-balance=round-robin";
4377 */
4378void
4379vhci_get_device_type_mpxio_options(dev_info_t *dip, dev_info_t *cdip,
4380	struct scsi_device *devp)
4381{
4382
4383	caddr_t			config_list	= NULL;
4384	caddr_t			vidptr, datanameptr;
4385	int			vidlen, dupletlen = 0;
4386	int			config_list_len = 0, len;
4387	struct scsi_inquiry	*inq = devp->sd_inq;
4388
4389	/*
4390	 * look up the device-type-mpxio-options-list and walk thru
4391	 * the list compare the vendor ids of the earlier inquiry command and
4392	 * with those vids in the list if there is a match, lookup
4393	 * the mpxio-options value
4394	 */
4395	if (ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
4396	    MPXIO_OPTIONS_LIST,
4397	    (caddr_t)&config_list, &config_list_len) == DDI_PROP_SUCCESS) {
4398
4399		/*
4400		 * Compare vids in each duplet - if it matches,
4401		 * parse the mpxio options list.
4402		 */
4403		for (len = config_list_len, vidptr = config_list; len > 0;
4404		    len -= dupletlen) {
4405
4406			dupletlen = 0;
4407
4408			if (strlen(vidptr) != 0 &&
4409			    strncmp(vidptr, DEVICE_TYPE_STR,
4410			    strlen(DEVICE_TYPE_STR)) == 0) {
4411				/* point to next duplet */
4412				datanameptr = vidptr + strlen(vidptr) + 1;
4413				/* add len of this duplet */
4414				dupletlen += strlen(vidptr) + 1;
4415				/* get to device type */
4416				vidptr += strlen(DEVICE_TYPE_STR) + 1;
4417				vidlen = strlen(vidptr);
4418				if ((vidlen != 0) &&
4419				    bcmp(inq->inq_vid, vidptr, vidlen) == 0) {
4420					vhci_parse_mpxio_options(dip, cdip,
4421					    datanameptr, len - dupletlen);
4422					break;
4423				}
4424				/* get to next duplet */
4425				vidptr += strlen(vidptr) + 1;
4426			}
4427			/* get to the next device-type */
4428			while (len - dupletlen > 0 &&
4429			    strlen(vidptr) != 0 &&
4430			    strncmp(vidptr, DEVICE_TYPE_STR,
4431			    strlen(DEVICE_TYPE_STR)) != 0) {
4432				dupletlen += strlen(vidptr) + 1;
4433				vidptr += strlen(vidptr) + 1;
4434			}
4435		}
4436		if (config_list_len > 0) {
4437			kmem_free(config_list, config_list_len);
4438		}
4439	}
4440}
4441
4442static int
4443vhci_update_pathinfo(struct scsi_device *psd,  mdi_pathinfo_t *pip,
4444	struct scsi_failover_ops *fo,
4445	scsi_vhci_lun_t		*vlun,
4446	struct scsi_vhci	*vhci)
4447{
4448	struct scsi_path_opinfo		opinfo;
4449	char				*pclass, *best_pclass;
4450
4451	if ((*fo->sfo_path_get_opinfo)(psd, &opinfo,
4452	    vlun->svl_fops_ctpriv) != 0) {
4453		VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_update_pathinfo: "
4454		    "Failed to get operation info for path:%p\n", (void *)pip));
4455		return (MDI_FAILURE);
4456	}
4457	/* set the xlf capable flag in the vlun for future use */
4458	vlun->svl_xlf_capable = opinfo.opinfo_xlf_capable;
4459	(void) mdi_prop_update_string(pip, "path-class",
4460	    opinfo.opinfo_path_attr);
4461
4462	pclass = opinfo.opinfo_path_attr;
4463	if (opinfo.opinfo_path_state == SCSI_PATH_ACTIVE) {
4464		mutex_enter(&vlun->svl_mutex);
4465		if (vlun->svl_active_pclass != NULL) {
4466			if (strcmp(vlun->svl_active_pclass, pclass) != 0) {
4467				mutex_exit(&vlun->svl_mutex);
4468				/*
4469				 * Externally initiated failover has happened;
4470				 * force the path state to be STANDBY/ONLINE,
4471				 * next IO will trigger failover and thus
4472				 * sync-up the pathstates.  Reason we don't
4473				 * sync-up immediately by invoking
4474				 * vhci_update_pathstates() is because it
4475				 * needs a VHCI_HOLD_LUN() and we don't
4476				 * want to block here.
4477				 *
4478				 * Further, if the device is an ALUA device,
4479				 * then failure to exactly match 'pclass' and
4480				 * 'svl_active_pclass'(as is the case here)
4481				 * indicates that the currently active path
4482				 * is a 'non-optimized' path - which means
4483				 * that 'svl_active_pclass' needs to be
4484				 * replaced with opinfo.opinfo_path_state
4485				 * value.
4486				 */
4487
4488				if (SCSI_FAILOVER_IS_TPGS(vlun->svl_fops)) {
4489					char	*tptr;
4490
4491					/*
4492					 * The device is ALUA compliant. The
4493					 * state need to be changed to online
4494					 * rather than standby state which is
4495					 * done typically for a asymmetric
4496					 * device that is non ALUA compliant.
4497					 */
4498					mdi_pi_set_state(pip,
4499					    MDI_PATHINFO_STATE_ONLINE);
4500					tptr = kmem_alloc(strlen
4501					    (opinfo.opinfo_path_attr)+1,
4502					    KM_SLEEP);
4503					(void) strlcpy(tptr,
4504					    opinfo.opinfo_path_attr,
4505					    (strlen(opinfo.opinfo_path_attr)
4506					    +1));
4507					mutex_enter(&vlun->svl_mutex);
4508					kmem_free(vlun->svl_active_pclass,
4509					    strlen(vlun->svl_active_pclass)+1);
4510					vlun->svl_active_pclass = tptr;
4511					mutex_exit(&vlun->svl_mutex);
4512				} else {
4513					/*
4514					 * Non ALUA device case.
4515					 */
4516					mdi_pi_set_state(pip,
4517					    MDI_PATHINFO_STATE_STANDBY);
4518				}
4519				vlun->svl_fo_support = opinfo.opinfo_mode;
4520				mdi_pi_set_preferred(pip,
4521				    opinfo.opinfo_preferred);
4522				return (MDI_SUCCESS);
4523			}
4524		} else {
4525			char	*tptr;
4526
4527			/*
4528			 * lets release the mutex before we try to
4529			 * allocate since the potential to sleep is
4530			 * possible.
4531			 */
4532			mutex_exit(&vlun->svl_mutex);
4533			tptr = kmem_alloc(strlen(pclass)+1, KM_SLEEP);
4534			(void) strlcpy(tptr, pclass, (strlen(pclass)+1));
4535			mutex_enter(&vlun->svl_mutex);
4536			vlun->svl_active_pclass = tptr;
4537		}
4538		mutex_exit(&vlun->svl_mutex);
4539		mdi_pi_set_state(pip, MDI_PATHINFO_STATE_ONLINE);
4540		vlun->svl_waiting_for_activepath = 0;
4541	} else if (opinfo.opinfo_path_state == SCSI_PATH_ACTIVE_NONOPT) {
4542		mutex_enter(&vlun->svl_mutex);
4543		if (vlun->svl_active_pclass == NULL) {
4544			char	*tptr;
4545
4546			mutex_exit(&vlun->svl_mutex);
4547			tptr = kmem_alloc(strlen(pclass)+1, KM_SLEEP);
4548			(void) strlcpy(tptr, pclass, (strlen(pclass)+1));
4549			mutex_enter(&vlun->svl_mutex);
4550			vlun->svl_active_pclass = tptr;
4551		}
4552		mutex_exit(&vlun->svl_mutex);
4553		mdi_pi_set_state(pip, MDI_PATHINFO_STATE_ONLINE);
4554		vlun->svl_waiting_for_activepath = 0;
4555	} else if (opinfo.opinfo_path_state == SCSI_PATH_INACTIVE) {
4556		mutex_enter(&vlun->svl_mutex);
4557		if (vlun->svl_active_pclass != NULL) {
4558			if (strcmp(vlun->svl_active_pclass, pclass) == 0) {
4559				mutex_exit(&vlun->svl_mutex);
4560				/*
4561				 * externally initiated failover has happened;
4562				 * force state to ONLINE (see comment above)
4563				 */
4564				mdi_pi_set_state(pip,
4565				    MDI_PATHINFO_STATE_ONLINE);
4566				vlun->svl_fo_support = opinfo.opinfo_mode;
4567				mdi_pi_set_preferred(pip,
4568				    opinfo.opinfo_preferred);
4569				return (MDI_SUCCESS);
4570			}
4571		}
4572		mutex_exit(&vlun->svl_mutex);
4573		mdi_pi_set_state(pip, MDI_PATHINFO_STATE_STANDBY);
4574
4575		/*
4576		 * Initiate auto-failback, if enabled, for path if path-state
4577		 * is transitioning from OFFLINE->STANDBY and pathclass is the
4578		 * prefered pathclass for this storage.
4579		 * NOTE: In case where opinfo_path_state is SCSI_PATH_ACTIVE
4580		 * (above), where the pi state is set to STANDBY, we don't
4581		 * initiate auto-failback as the next IO shall take care of.
4582		 * this. See comment above.
4583		 */
4584		(*fo->sfo_pathclass_next)(NULL, &best_pclass,
4585		    vlun->svl_fops_ctpriv);
4586		if (((vhci->vhci_conf_flags & VHCI_CONF_FLAGS_AUTO_FAILBACK) ==
4587		    VHCI_CONF_FLAGS_AUTO_FAILBACK) &&
4588		    ((strcmp(pclass, best_pclass) == 0) ||
4589		    mdi_pi_get_preferred(pip) == 1) &&
4590		    ((MDI_PI_OLD_STATE(pip) == MDI_PATHINFO_STATE_OFFLINE)||
4591		    (MDI_PI_OLD_STATE(pip) == MDI_PATHINFO_STATE_INIT))) {
4592			VHCI_DEBUG(1, (CE_NOTE, NULL, "%s pathclass path: %p"
4593			    " OFFLINE->STANDBY transition for lun %s\n",
4594			    best_pclass, (void *)pip, vlun->svl_lun_wwn));
4595			(void) taskq_dispatch(vhci->vhci_taskq,
4596			    vhci_initiate_auto_failback, (void *) vlun,
4597			    KM_SLEEP);
4598		}
4599	}
4600	vlun->svl_fo_support = opinfo.opinfo_mode;
4601	mdi_pi_set_preferred(pip, opinfo.opinfo_preferred);
4602
4603	VHCI_DEBUG(8, (CE_NOTE, NULL, "vhci_update_pathinfo: opinfo_rev = %x,"
4604	    " opinfo_path_state = %x opinfo_preferred = %x, opinfo_mode = %x\n",
4605	    opinfo.opinfo_rev, opinfo.opinfo_path_state,
4606	    opinfo.opinfo_preferred, opinfo.opinfo_mode));
4607
4608	return (MDI_SUCCESS);
4609}
4610
4611/*
4612 * Form the kstat name and and call mdi_pi_kstat_create()
4613 */
4614void
4615vhci_kstat_create_pathinfo(mdi_pathinfo_t *pip)
4616{
4617	dev_info_t	*tgt_dip;
4618	dev_info_t	*pdip;
4619	char		*guid;
4620	char		*target_port, *target_port_dup;
4621	char		ks_name[KSTAT_STRLEN];
4622	uint_t		pid;
4623	int		by_id;
4624	mod_hash_val_t	hv;
4625
4626
4627	/* return if we have already allocated kstats */
4628	if (mdi_pi_kstat_exists(pip))
4629		return;
4630
4631	/*
4632	 * We need instance numbers to create a kstat name, return if we don't
4633	 * have instance numbers assigned yet.
4634	 */
4635	tgt_dip = mdi_pi_get_client(pip);
4636	pdip = mdi_pi_get_phci(pip);
4637	if ((ddi_get_instance(tgt_dip) == -1) || (ddi_get_instance(pdip) == -1))
4638		return;
4639
4640	/*
4641	 * A path oriented kstat has a ks_name of the form:
4642	 *
4643	 * <client-driver><instance>.t<pid>.<pHCI-driver><instance>
4644	 *
4645	 * We maintain a bidirectional 'target-port' to <pid> map,
4646	 * called targetmap. All pathinfo nodes with the same
4647	 * 'target-port' map to the same <pid>. The iostat(1M) code,
4648	 * when parsing a path oriented kstat name, uses the <pid> as
4649	 * a SCSI_VHCI_GET_TARGET_LONGNAME ioctl argument in order
4650	 * to get the 'target-port'. For KSTAT_FLAG_PERSISTENT kstats,
4651	 * this ioctl needs to translate a <pid> to a 'target-port'
4652	 * even after all pathinfo nodes associated with the
4653	 * 'target-port' have been destroyed. This is needed to support
4654	 * consistent first-iteration activity-since-boot iostat(1M)
4655	 * output. Because of this requirement, the mapping can't be
4656	 * based on pathinfo information in a devinfo snapshot.
4657	 */
4658
4659	/* determine 'target-port' */
4660	if (mdi_prop_lookup_string(pip,
4661	    "target-port", &target_port) == MDI_SUCCESS) {
4662		target_port_dup = i_ddi_strdup(target_port, KM_SLEEP);
4663		(void) mdi_prop_free(target_port);
4664		by_id = 1;
4665	} else {
4666		/*
4667		 * If the pHCI did not set up 'target-port' on this
4668		 * pathinfo node, assume that our client is the only
4669		 * one with paths to the device by using the guid
4670		 * value as the 'target-port'. Since no other client
4671		 * will have the same guid, no other client will use
4672		 * the same <pid>.  NOTE: a client with an instance
4673		 * number always has a guid.
4674		 */
4675		(void) ddi_prop_lookup_string(DDI_DEV_T_ANY, tgt_dip,
4676		    PROPFLAGS, MDI_CLIENT_GUID_PROP, &guid);
4677		target_port_dup = i_ddi_strdup(guid, KM_SLEEP);
4678		ddi_prop_free(guid);
4679
4680		/*
4681		 * For this type of mapping we don't want the
4682		 * <id> -> 'target-port' mapping to be made.  This
4683		 * will cause the SCSI_VHCI_GET_TARGET_LONGNAME ioctl
4684		 * to fail, and the iostat(1M) long '-n' output will
4685		 * still use the <pid>.  We do this because we just
4686		 * made up the 'target-port' using the guid, and we
4687		 * don't want to expose that fact in iostat output.
4688		 */
4689		by_id = 0;
4690	}
4691
4692	/* find/establish <pid> given 'target-port' */
4693	mutex_enter(&vhci_targetmap_mutex);
4694	if (mod_hash_find(vhci_targetmap_byport,
4695	    (mod_hash_key_t)target_port_dup, &hv) == 0) {
4696		pid = (int)(intptr_t)hv;	/* mapping exists */
4697	} else {
4698		pid = vhci_targetmap_pid++;	/* new mapping */
4699
4700		(void) mod_hash_insert(vhci_targetmap_byport,
4701		    (mod_hash_key_t)target_port_dup,
4702		    (mod_hash_val_t)(intptr_t)pid);
4703		if (by_id) {
4704			(void) mod_hash_insert(vhci_targetmap_bypid,
4705			    (mod_hash_key_t)(uintptr_t)pid,
4706			    (mod_hash_val_t)(uintptr_t)target_port_dup);
4707		}
4708		target_port_dup = NULL;		/* owned by hash */
4709	}
4710	mutex_exit(&vhci_targetmap_mutex);
4711
4712	/* form kstat name */
4713	(void) snprintf(ks_name, KSTAT_STRLEN, "%s%d.t%d.%s%d",
4714	    ddi_driver_name(tgt_dip), ddi_get_instance(tgt_dip),
4715	    pid, ddi_driver_name(pdip), ddi_get_instance(pdip));
4716
4717	VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_path_online: path:%p "
4718	    "kstat %s: pid %x <-> port %s\n", (void *)pip,
4719	    ks_name, pid, target_port_dup));
4720	if (target_port_dup)
4721		kmem_free(target_port_dup, strlen(target_port_dup) + 1);
4722
4723	/* call mdi to create kstats with the name we built */
4724	(void) mdi_pi_kstat_create(pip, ks_name);
4725}
4726
4727/* ARGSUSED */
4728static int
4729vhci_pathinfo_online(dev_info_t *vdip, mdi_pathinfo_t *pip, int flags)
4730{
4731	scsi_hba_tran_t			*hba = NULL;
4732	struct scsi_device		*psd = NULL;
4733	scsi_vhci_lun_t			*vlun = NULL;
4734	dev_info_t			*pdip = NULL;
4735	dev_info_t			*tgt_dip;
4736	struct scsi_vhci		*vhci;
4737	char				*guid;
4738	struct scsi_failover		*sf;
4739	struct scsi_failover_ops	*sfo;
4740	char				*override;
4741	scsi_vhci_priv_t		*svp = NULL;
4742	struct buf			*bp;
4743	struct scsi_address		*ap;
4744	struct scsi_pkt			*pkt;
4745	int				rval = MDI_FAILURE;
4746	uint_t				inq_size = VHCI_STD_INQ_SIZE;
4747	mpapi_item_list_t		*list_ptr;
4748	mpapi_lu_data_t			*ld;
4749
4750	ASSERT(vdip != NULL);
4751	ASSERT(pip != NULL);
4752
4753	vhci = ddi_get_soft_state(vhci_softstate, ddi_get_instance(vdip));
4754	ASSERT(vhci != NULL);
4755
4756	pdip = mdi_pi_get_phci(pip);
4757	hba = ddi_get_driver_private(pdip);
4758	ASSERT(hba != NULL);
4759
4760	svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(pip);
4761	ASSERT(svp != NULL);
4762
4763	tgt_dip = mdi_pi_get_client(pip);
4764	ASSERT(tgt_dip != NULL);
4765	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, tgt_dip, PROPFLAGS,
4766	    MDI_CLIENT_GUID_PROP, &guid) != DDI_SUCCESS) {
4767		VHCI_DEBUG(1, (CE_WARN, NULL, "vhci_path_online: lun guid "
4768		    "property failed"));
4769		goto failure;
4770	}
4771
4772	vlun = vhci_lun_lookup(tgt_dip);
4773	ASSERT(vlun != NULL);
4774
4775	ddi_prop_free(guid);
4776
4777	vlun->svl_dip = mdi_pi_get_client(pip);
4778	ASSERT(vlun->svl_dip != NULL);
4779
4780	psd = svp->svp_psd;
4781	ASSERT(psd != NULL);
4782
4783	/*
4784	 * For INQUIRY response buffer size, we use VHCI_STD_INQ_SIZE(128bytes)
4785	 * instead of SUN_INQSIZE(48bytes) which is used in sd layer. This is
4786	 * because we could get the Vendor specific parameters(present 97th
4787	 * byte onwards) which are required to process Vendor specific data
4788	 * based on array type.
4789	 * This INQUIRY buffer is freed in vhci_pathinfo_offline but NEVER
4790	 * in a different layer like sd/phci transport. In other words, vhci
4791	 * maintains its own copy of scsi_device and scsi_inquiry data on a
4792	 * per-path basis.
4793	 */
4794	if (psd->sd_inq == NULL) {
4795		psd->sd_inq = (struct scsi_inquiry *)
4796		    kmem_zalloc(inq_size, KM_SLEEP);
4797	}
4798
4799	tgt_dip = psd->sd_dev;
4800	ASSERT(tgt_dip != NULL);
4801
4802	/*
4803	 * do inquiry to pass into probe routine; this
4804	 * will avoid each probe routine doing scsi inquiry
4805	 */
4806	bp = getrbuf(KM_SLEEP);
4807	bp->b_un.b_addr = (caddr_t)psd->sd_inq;
4808	bp->b_flags = B_READ;
4809	bp->b_bcount = inq_size;
4810	bp->b_resid = 0;
4811
4812	ap = &psd->sd_address;
4813	pkt = scsi_init_pkt(ap, NULL, bp, CDB_GROUP0,
4814	    sizeof (struct scsi_arq_status), 0, 0, SLEEP_FUNC, NULL);
4815	if (pkt == NULL) {
4816		VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_pathinfo_online: "
4817		    "Inquiry init_pkt failed :%p\n", (void *)pip));
4818		rval = MDI_FAILURE;
4819		goto failure;
4820	}
4821	pkt->pkt_cdbp[0] = SCMD_INQUIRY;
4822	pkt->pkt_cdbp[4] = (uchar_t)inq_size;
4823	pkt->pkt_time = 60;
4824
4825	rval = vhci_do_scsi_cmd(pkt);
4826	scsi_destroy_pkt(pkt);
4827	freerbuf(bp);
4828	if (rval == 0) {
4829		VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_pathinfo_online: "
4830		    "Failover Inquiry failed path:%p rval:%x\n",
4831		    (void *)pip, rval));
4832		rval = MDI_FAILURE;
4833		goto failure;
4834	}
4835
4836	/*
4837	 * Determine if device is supported under scsi_vhci, and select
4838	 * failover module.
4839	 *
4840	 * See if there is a scsi_vhci.conf file override for this devices's
4841	 * VID/PID. The following values can be returned:
4842	 *
4843	 * NULL		If the NULL is returned then there is no scsi_vhci.conf
4844	 *		override.  For NULL, we determine the failover_ops for
4845	 *		this device by checking the sfo_device_probe entry
4846	 *		point for each 'fops' module, in order.
4847	 *
4848	 *		NOTE: Correct operation may depend on module ordering
4849	 *		of 'specific' (failover modules that are completely
4850	 *		VID/PID table based) to 'generic' (failover modules
4851	 *		that based on T10 standards like TPGS).  Currently,
4852	 *		the value of 'ddi-forceload' in scsi_vhci.conf is used
4853	 *		to establish the module list and probe order.
4854	 *
4855	 * "NONE"	If value "NONE" is returned then there is a
4856	 *		scsi_vhci.conf VID/PID override to indicate the device
4857	 *		should not be supported under scsi_vhci (even if there
4858	 *		is an 'fops' module supporting the device).
4859	 *
4860	 * "<other>"	If another value is returned then that value is the
4861	 *		name of the 'fops' module that should be used.
4862	 */
4863	sfo = NULL;	/* "NONE" */
4864	override = scsi_get_device_type_string(
4865	    "scsi-vhci-failover-override", vdip, psd);
4866
4867	if (override == NULL) {
4868		/* NULL: default: select based on sfo_device_probe results */
4869		for (sf = scsi_failover_table; sf->sf_mod; sf++) {
4870			if ((sf->sf_sfo == NULL) ||
4871			    ((*sf->sf_sfo->sfo_device_probe) (psd,
4872			    psd->sd_inq, &vlun->svl_fops_ctpriv) ==
4873			    SFO_DEVICE_PROBE_PHCI))
4874				continue;
4875
4876			/* found failover module, supported under scsi_vhci */
4877			sfo = sf->sf_sfo;
4878			vlun->svl_fops_name =
4879			    i_ddi_strdup(sfo->sfo_name, KM_SLEEP);
4880			break;
4881		}
4882	} else if (strcmp(override, "NONE") && strcmp(override, "none")) {
4883		/* !"NONE": select based on driver.conf specified name */
4884		for (sf = scsi_failover_table, sfo = NULL; sf->sf_mod; sf++) {
4885			if ((sf->sf_sfo == NULL) ||
4886			    (sf->sf_sfo->sfo_name == NULL) ||
4887			    strcmp(override, sf->sf_sfo->sfo_name))
4888				continue;
4889
4890			/* found failover module, supported under scsi_vhci */
4891			sfo = sf->sf_sfo;
4892			vlun->svl_fops_name = kmem_alloc(strlen("conf ") +
4893			    strlen(sfo->sfo_name) + 1, KM_SLEEP);
4894			(void) sprintf(vlun->svl_fops_name, "conf %s",
4895			    sfo->sfo_name);
4896			break;
4897		}
4898	}
4899	if (override)
4900		kmem_free(override, strlen(override) + 1);
4901
4902	if (sfo == NULL) {
4903		/* no failover module - device not supported */
4904		VHCI_DEBUG(1, (CE_NOTE, vhci->vhci_dip,
4905		    "!vhci_pathinfo_online: dev (path 0x%p) not "
4906		    "supported\n", (void *)pip));
4907		vlun->svl_not_supported = 1;
4908		rval = MDI_NOT_SUPPORTED;
4909		goto done;
4910	}
4911
4912	/* failover supported for device - save failover_ops in vlun */
4913	vlun->svl_fops = sfo;
4914
4915	/*
4916	 * Obtain the device-type based mpxio options as specified in
4917	 * scsi_vhci.conf file.
4918	 *
4919	 * NOTE: currently, the end result is a call to
4920	 * mdi_set_lb_region_size().
4921	 */
4922	vhci_get_device_type_mpxio_options(vdip, tgt_dip, psd);
4923
4924	/*
4925	 * if PGR is active, revalidate key and register on this path also,
4926	 * if key is still valid
4927	 */
4928	sema_p(&vlun->svl_pgr_sema);
4929	if (vlun->svl_pgr_active) {
4930		rval = vhci_pgr_validate_and_register(svp);
4931		if (rval != 1) {
4932			rval = MDI_FAILURE;
4933			sema_v(&vlun->svl_pgr_sema);
4934			goto failure;
4935		}
4936	}
4937	sema_v(&vlun->svl_pgr_sema);
4938
4939	if (svp->svp_new_path) {
4940		/*
4941		 * Last chance to perform any cleanup operations on this
4942		 * new path before making this path completely online.
4943		 */
4944		svp->svp_new_path = 0;
4945
4946		/*
4947		 * If scsi_vhci knows the lun is alread RESERVE'd,
4948		 * then skip the issue of RELEASE on new path.
4949		 */
4950		if ((vlun->svl_flags & VLUN_RESERVE_ACTIVE_FLG) == 0) {
4951			/*
4952			 * Issue SCSI-2 RELEASE only for the first time on
4953			 * a new path just in case the host rebooted and
4954			 * a reservation is still pending on this path.
4955			 * IBM Shark storage does not clear RESERVE upon
4956			 * host reboot.
4957			 */
4958			ap = &psd->sd_address;
4959			pkt = scsi_init_pkt(ap, NULL, NULL, CDB_GROUP0,
4960			    sizeof (struct scsi_arq_status), 0, 0,
4961			    SLEEP_FUNC, NULL);
4962			if (pkt == NULL) {
4963				VHCI_DEBUG(1, (CE_NOTE, NULL,
4964				    "!vhci_pathinfo_online: "
4965				    "Release init_pkt failed :%p\n",
4966				    (void *)pip));
4967				rval = MDI_FAILURE;
4968				goto failure;
4969			}
4970			pkt->pkt_cdbp[0] = SCMD_RELEASE;
4971			pkt->pkt_time = 60;
4972
4973			VHCI_DEBUG(1, (CE_NOTE, NULL,
4974			    "!vhci_path_online: path:%p "
4975			    "Issued SCSI-2 RELEASE\n", (void *)pip));
4976
4977			/* Ignore the return value */
4978			(void) vhci_do_scsi_cmd(pkt);
4979			scsi_destroy_pkt(pkt);
4980		}
4981	}
4982
4983	rval = vhci_update_pathinfo(psd, pip, sfo, vlun, vhci);
4984	if (rval == MDI_FAILURE) {
4985		goto failure;
4986	}
4987
4988	/* Initialize MP-API data */
4989	vhci_update_mpapi_data(vhci, vlun, pip);
4990
4991	/*
4992	 * MP-API also needs the Inquiry data to be maintained in the
4993	 * mp_vendor_prop_t structure, so find the lun and update its
4994	 * structure with this data.
4995	 */
4996	list_ptr = (mpapi_item_list_t *)vhci_get_mpapi_item(vhci, NULL,
4997	    MP_OBJECT_TYPE_MULTIPATH_LU, (void *)vlun);
4998	ld = (mpapi_lu_data_t *)list_ptr->item->idata;
4999	if (ld != NULL) {
5000		bcopy(psd->sd_inq->inq_vid, ld->prop.prodInfo.vendor, 8);
5001		bcopy(psd->sd_inq->inq_pid, ld->prop.prodInfo.product, 16);
5002		bcopy(psd->sd_inq->inq_revision, ld->prop.prodInfo.revision, 4);
5003	} else {
5004		VHCI_DEBUG(1, (CE_WARN, NULL, "!vhci_pathinfo_online: "
5005		    "mpapi_lu_data_t is NULL"));
5006	}
5007
5008	/* create kstats for path */
5009	vhci_kstat_create_pathinfo(pip);
5010
5011done:
5012	mutex_enter(&vhci_global_mutex);
5013	cv_broadcast(&vhci_cv);
5014	mutex_exit(&vhci_global_mutex);
5015
5016	if (vlun->svl_setcap_done) {
5017		(void) vhci_pHCI_cap(ap, "sector-size",
5018		    vlun->svl_sector_size, 1, pip);
5019	}
5020
5021	VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_path_online: path:%p\n",
5022	    (void *)pip));
5023
5024failure:
5025	if ((rval != MDI_SUCCESS) && psd->sd_inq) {
5026		kmem_free((caddr_t)psd->sd_inq, inq_size);
5027		psd->sd_inq = (struct scsi_inquiry *)NULL;
5028	}
5029	return (rval);
5030}
5031
5032/*
5033 * path offline handler.  Release all bindings that will not be
5034 * released by the normal packet transport/completion code path.
5035 * Since we don't (presently) keep any bindings alive outside of
5036 * the in-transport packets (which will be released on completion)
5037 * there is not much to do here.
5038 */
5039/* ARGSUSED */
5040static int
5041vhci_pathinfo_offline(dev_info_t *vdip, mdi_pathinfo_t *pip, int flags)
5042{
5043	scsi_hba_tran_t		*hba = NULL;
5044	struct scsi_device	*psd = NULL;
5045	dev_info_t		*pdip = NULL;
5046	dev_info_t		*cdip = NULL;
5047	scsi_vhci_priv_t	*svp = NULL;
5048	uint_t			inq_size = VHCI_STD_INQ_SIZE;
5049
5050	ASSERT(vdip != NULL);
5051	ASSERT(pip != NULL);
5052
5053	pdip = mdi_pi_get_phci(pip);
5054	ASSERT(pdip != NULL);
5055	if (pdip == NULL) {
5056		VHCI_DEBUG(1, (CE_WARN, vdip, "Invalid path 0x%p: NULL "
5057		    "phci dip", (void *)pip));
5058		return (MDI_FAILURE);
5059	}
5060
5061	cdip = mdi_pi_get_client(pip);
5062	ASSERT(cdip != NULL);
5063	if (cdip == NULL) {
5064		VHCI_DEBUG(1, (CE_WARN, vdip, "Invalid path 0x%p: NULL "
5065		    "client dip", (void *)pip));
5066		return (MDI_FAILURE);
5067	}
5068
5069	hba = ddi_get_driver_private(pdip);
5070	ASSERT(hba != NULL);
5071
5072	svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(pip);
5073	if (svp == NULL) {
5074		/*
5075		 * mdi_pathinfo node in INIT state can have vHCI private
5076		 * information set to null
5077		 */
5078		VHCI_DEBUG(1, (CE_NOTE, vdip, "!vhci_pathinfo_offline: "
5079		    "svp is NULL for pip 0x%p\n", (void *)pip));
5080		return (MDI_SUCCESS);
5081	}
5082
5083	psd = svp->svp_psd;
5084	ASSERT(psd != NULL);
5085
5086	mutex_enter(&svp->svp_mutex);
5087
5088	VHCI_DEBUG(1, (CE_NOTE, vdip, "!vhci_pathinfo_offline: "
5089	    "%d cmds pending on path: 0x%p\n", svp->svp_cmds, (void *)pip));
5090	while (svp->svp_cmds != 0) {
5091		if (cv_timedwait(&svp->svp_cv, &svp->svp_mutex,
5092		    ddi_get_lbolt() +
5093		    drv_usectohz(vhci_path_quiesce_timeout * 1000000)) == -1) {
5094			/*
5095			 * The timeout time reached without the condition
5096			 * being signaled.
5097			 */
5098			VHCI_DEBUG(1, (CE_NOTE, vdip, "!vhci_pathinfo_offline: "
5099			    "Timeout reached on path 0x%p without the cond\n",
5100			    (void *)pip));
5101			VHCI_DEBUG(1, (CE_NOTE, vdip, "!vhci_pathinfo_offline: "
5102			    "%d cmds still pending on path: 0x%p\n",
5103			    svp->svp_cmds, (void *)pip));
5104			break;
5105		}
5106	}
5107	mutex_exit(&svp->svp_mutex);
5108
5109	/*
5110	 * Check to see if this vlun has an active SCSI-II RESERVE. And this
5111	 * is the pip for the path that has been reserved.
5112	 * If so clear the reservation by sending a reset, so the host will not
5113	 * get a reservation conflict.  Reset the flag VLUN_RESERVE_ACTIVE_FLG
5114	 * for this lun.  Also a reset notify is sent to the target driver
5115	 * just in case the POR check condition is cleared by some other layer
5116	 * in the stack.
5117	 */
5118	if (svp->svp_svl->svl_flags & VLUN_RESERVE_ACTIVE_FLG) {
5119		if (pip == svp->svp_svl->svl_resrv_pip) {
5120			if (vhci_recovery_reset(svp->svp_svl,
5121			    &svp->svp_psd->sd_address, TRUE,
5122			    VHCI_DEPTH_TARGET) == 0) {
5123				VHCI_DEBUG(1, (CE_NOTE, NULL,
5124				    "!vhci_pathinfo_offline (pip:%p):"
5125				    "reset failed, retrying\n", (void *)pip));
5126				delay(1*drv_usectohz(1000000));
5127				if (vhci_recovery_reset(svp->svp_svl,
5128				    &svp->svp_psd->sd_address, TRUE,
5129				    VHCI_DEPTH_TARGET) == 0) {
5130					VHCI_DEBUG(1, (CE_NOTE, NULL,
5131					    "!vhci_pathinfo_offline "
5132					    "(pip:%p): reset failed, "
5133					    "giving up!\n", (void *)pip));
5134				}
5135			}
5136			svp->svp_svl->svl_flags &= ~VLUN_RESERVE_ACTIVE_FLG;
5137		}
5138	}
5139
5140	mdi_pi_set_state(pip, MDI_PATHINFO_STATE_OFFLINE);
5141	if (psd->sd_inq) {
5142		kmem_free((caddr_t)psd->sd_inq, inq_size);
5143		psd->sd_inq = (struct scsi_inquiry *)NULL;
5144	}
5145	vhci_mpapi_set_path_state(vdip, pip, MP_DRVR_PATH_STATE_REMOVED);
5146
5147	VHCI_DEBUG(1, (CE_NOTE, NULL,
5148	    "!vhci_pathinfo_offline: offlined path 0x%p\n", (void *)pip));
5149	return (MDI_SUCCESS);
5150}
5151
5152
5153/*
5154 * routine for SCSI VHCI IOCTL implementation.
5155 */
5156/* ARGSUSED */
5157static int
5158vhci_ctl(dev_t dev, int cmd, intptr_t data, int mode, cred_t *credp, int *rval)
5159{
5160	struct scsi_vhci		*vhci;
5161	dev_info_t			*vdip;
5162	mdi_pathinfo_t			*pip;
5163	int				instance, held;
5164	int				retval = 0;
5165	caddr_t				phci_path = NULL, client_path = NULL;
5166	caddr_t				paddr = NULL;
5167	sv_iocdata_t			ioc;
5168	sv_iocdata_t			*pioc = &ioc;
5169	sv_switch_to_cntlr_iocdata_t	iocsc;
5170	sv_switch_to_cntlr_iocdata_t	*piocsc = &iocsc;
5171	caddr_t				s;
5172	scsi_vhci_lun_t			*vlun;
5173	struct scsi_failover_ops	*fo;
5174	char				*pclass;
5175
5176	/* Check for validity of vhci structure */
5177	vhci = ddi_get_soft_state(vhci_softstate, MINOR2INST(getminor(dev)));
5178	if (vhci == NULL) {
5179		return (ENXIO);
5180	}
5181
5182	mutex_enter(&vhci->vhci_mutex);
5183	if ((vhci->vhci_state & VHCI_STATE_OPEN) == 0) {
5184		mutex_exit(&vhci->vhci_mutex);
5185		return (ENXIO);
5186	}
5187	mutex_exit(&vhci->vhci_mutex);
5188
5189	/* Get the vhci dip */
5190	vdip = vhci->vhci_dip;
5191	ASSERT(vdip != NULL);
5192	instance = ddi_get_instance(vdip);
5193
5194	/* Allocate memory for getting parameters from userland */
5195	phci_path	= kmem_zalloc(MAXPATHLEN, KM_SLEEP);
5196	client_path	= kmem_zalloc(MAXPATHLEN, KM_SLEEP);
5197	paddr		= kmem_zalloc(MAXNAMELEN, KM_SLEEP);
5198
5199	/*
5200	 * Set a local variable indicating the ioctl name. Used for
5201	 * printing debug strings.
5202	 */
5203	switch (cmd) {
5204	case SCSI_VHCI_GET_CLIENT_MULTIPATH_INFO:
5205		s = "GET_CLIENT_MULTIPATH_INFO";
5206		break;
5207
5208	case SCSI_VHCI_GET_PHCI_MULTIPATH_INFO:
5209		s = "GET_PHCI_MULTIPATH_INFO";
5210		break;
5211
5212	case SCSI_VHCI_GET_CLIENT_NAME:
5213		s = "GET_CLIENT_NAME";
5214		break;
5215
5216	case SCSI_VHCI_PATH_ONLINE:
5217		s = "PATH_ONLINE";
5218		break;
5219
5220	case SCSI_VHCI_PATH_OFFLINE:
5221		s = "PATH_OFFLINE";
5222		break;
5223
5224	case SCSI_VHCI_PATH_STANDBY:
5225		s = "PATH_STANDBY";
5226		break;
5227
5228	case SCSI_VHCI_PATH_TEST:
5229		s = "PATH_TEST";
5230		break;
5231
5232	case SCSI_VHCI_SWITCH_TO_CNTLR:
5233		s = "SWITCH_TO_CNTLR";
5234		break;
5235	case SCSI_VHCI_PATH_DISABLE:
5236		s = "PATH_DISABLE";
5237		break;
5238	case SCSI_VHCI_PATH_ENABLE:
5239		s = "PATH_ENABLE";
5240		break;
5241
5242	case SCSI_VHCI_GET_TARGET_LONGNAME:
5243		s = "GET_TARGET_LONGNAME";
5244		break;
5245
5246#ifdef	DEBUG
5247	case SCSI_VHCI_CONFIGURE_PHCI:
5248		s = "CONFIGURE_PHCI";
5249		break;
5250
5251	case SCSI_VHCI_UNCONFIGURE_PHCI:
5252		s = "UNCONFIGURE_PHCI";
5253		break;
5254#endif
5255
5256	default:
5257		s = "Unknown";
5258		vhci_log(CE_NOTE, vdip,
5259		    "!vhci%d: ioctl %x (unsupported ioctl)", instance, cmd);
5260		retval = ENOTSUP;
5261		break;
5262	}
5263	if (retval != 0) {
5264		goto end;
5265	}
5266
5267	VHCI_DEBUG(6, (CE_WARN, vdip, "!vhci%d: ioctl <%s>", instance, s));
5268
5269	/*
5270	 * Get IOCTL parameters from userland
5271	 */
5272	switch (cmd) {
5273	case SCSI_VHCI_GET_CLIENT_MULTIPATH_INFO:
5274	case SCSI_VHCI_GET_PHCI_MULTIPATH_INFO:
5275	case SCSI_VHCI_GET_CLIENT_NAME:
5276	case SCSI_VHCI_PATH_ONLINE:
5277	case SCSI_VHCI_PATH_OFFLINE:
5278	case SCSI_VHCI_PATH_STANDBY:
5279	case SCSI_VHCI_PATH_TEST:
5280	case SCSI_VHCI_PATH_DISABLE:
5281	case SCSI_VHCI_PATH_ENABLE:
5282	case SCSI_VHCI_GET_TARGET_LONGNAME:
5283#ifdef	DEBUG
5284	case SCSI_VHCI_CONFIGURE_PHCI:
5285	case SCSI_VHCI_UNCONFIGURE_PHCI:
5286#endif
5287		retval = vhci_get_iocdata((const void *)data, pioc, mode, s);
5288		break;
5289
5290	case SCSI_VHCI_SWITCH_TO_CNTLR:
5291		retval = vhci_get_iocswitchdata((const void *)data, piocsc,
5292		    mode, s);
5293		break;
5294	}
5295	if (retval != 0) {
5296		goto end;
5297	}
5298
5299
5300	/*
5301	 * Process the IOCTL
5302	 */
5303	switch (cmd) {
5304	case SCSI_VHCI_GET_CLIENT_MULTIPATH_INFO:
5305	{
5306		uint_t		num_paths;	/* Num paths to client dev */
5307		sv_path_info_t	*upibuf = NULL;	/* To keep userland values */
5308		sv_path_info_t	*kpibuf = NULL; /* Kernel data for ioctls */
5309		dev_info_t	*cdip;		/* Client device dip */
5310
5311		if (pioc->ret_elem == NULL) {
5312			retval = EINVAL;
5313			break;
5314		}
5315
5316		/* Get client device path from user land */
5317		if (vhci_ioc_get_client_path(pioc, client_path, mode, s)) {
5318			retval = EFAULT;
5319			break;
5320		}
5321
5322		VHCI_DEBUG(6, (CE_WARN, vdip, "!vhci_ioctl: ioctl <%s> "
5323		    "client <%s>", s, client_path));
5324
5325		/* Get number of paths to this client device */
5326		if ((cdip = mdi_client_path2devinfo(vdip, client_path))
5327		    == NULL) {
5328			retval = ENXIO;
5329			VHCI_DEBUG(1, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5330			    "client dip doesn't exist. invalid path <%s>",
5331			    s, client_path));
5332			break;
5333		}
5334		num_paths = mdi_client_get_path_count(cdip);
5335
5336		if (ddi_copyout(&num_paths, pioc->ret_elem,
5337		    sizeof (num_paths), mode)) {
5338			VHCI_DEBUG(1, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5339			    "num_paths copyout failed", s));
5340			retval = EFAULT;
5341			break;
5342		}
5343
5344		/* If  user just wanted num_paths, then return */
5345		if (pioc->buf_elem == 0 || pioc->ret_buf == NULL ||
5346		    num_paths == 0) {
5347			break;
5348		}
5349
5350		/* Set num_paths to value as much as can be sent to userland */
5351		if (num_paths > pioc->buf_elem) {
5352			num_paths = pioc->buf_elem;
5353		}
5354
5355		/* Allocate memory and get userland pointers */
5356		if (vhci_ioc_alloc_pathinfo(&upibuf, &kpibuf, num_paths,
5357		    pioc, mode, s) != 0) {
5358			retval = EFAULT;
5359			break;
5360		}
5361		ASSERT(upibuf != NULL);
5362		ASSERT(kpibuf != NULL);
5363
5364		/*
5365		 * Get the path information and send it to userland.
5366		 */
5367		if (vhci_get_client_path_list(cdip, kpibuf, num_paths)
5368		    != MDI_SUCCESS) {
5369			retval = ENXIO;
5370			vhci_ioc_free_pathinfo(upibuf, kpibuf, num_paths);
5371			break;
5372		}
5373
5374		if (vhci_ioc_send_pathinfo(upibuf, kpibuf, num_paths,
5375		    pioc, mode, s)) {
5376			retval = EFAULT;
5377			vhci_ioc_free_pathinfo(upibuf, kpibuf, num_paths);
5378			break;
5379		}
5380
5381		/* Free the memory allocated for path information */
5382		vhci_ioc_free_pathinfo(upibuf, kpibuf, num_paths);
5383		break;
5384	}
5385
5386	case SCSI_VHCI_GET_PHCI_MULTIPATH_INFO:
5387	{
5388		uint_t		num_paths;	/* Num paths to client dev */
5389		sv_path_info_t	*upibuf = NULL;	/* To keep userland values */
5390		sv_path_info_t	*kpibuf = NULL; /* Kernel data for ioctls */
5391		dev_info_t	*pdip;		/* PHCI device dip */
5392
5393		if (pioc->ret_elem == NULL) {
5394			retval = EINVAL;
5395			break;
5396		}
5397
5398		/* Get PHCI device path from user land */
5399		if (vhci_ioc_get_phci_path(pioc, phci_path, mode, s)) {
5400			retval = EFAULT;
5401			break;
5402		}
5403
5404		VHCI_DEBUG(6, (CE_WARN, vdip,
5405		    "!vhci_ioctl: ioctl <%s> phci <%s>", s, phci_path));
5406
5407		/* Get number of devices associated with this PHCI device */
5408		if ((pdip = mdi_phci_path2devinfo(vdip, phci_path)) == NULL) {
5409			VHCI_DEBUG(1, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5410			    "phci dip doesn't exist. invalid path <%s>",
5411			    s, phci_path));
5412			retval = ENXIO;
5413			break;
5414		}
5415
5416		num_paths = mdi_phci_get_path_count(pdip);
5417
5418		if (ddi_copyout(&num_paths, pioc->ret_elem,
5419		    sizeof (num_paths), mode)) {
5420			VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5421			    "num_paths copyout failed", s));
5422			retval = EFAULT;
5423			break;
5424		}
5425
5426		/* If  user just wanted num_paths, then return */
5427		if (pioc->buf_elem == 0 || pioc->ret_buf == NULL ||
5428		    num_paths == 0) {
5429			break;
5430		}
5431
5432		/* Set num_paths to value as much as can be sent to userland */
5433		if (num_paths > pioc->buf_elem) {
5434			num_paths = pioc->buf_elem;
5435		}
5436
5437		/* Allocate memory and get userland pointers */
5438		if (vhci_ioc_alloc_pathinfo(&upibuf, &kpibuf, num_paths,
5439		    pioc, mode, s) != 0) {
5440			retval = EFAULT;
5441			break;
5442		}
5443		ASSERT(upibuf != NULL);
5444		ASSERT(kpibuf != NULL);
5445
5446		/*
5447		 * Get the path information and send it to userland.
5448		 */
5449		if (vhci_get_phci_path_list(pdip, kpibuf, num_paths)
5450		    != MDI_SUCCESS) {
5451			retval = ENXIO;
5452			vhci_ioc_free_pathinfo(upibuf, kpibuf, num_paths);
5453			break;
5454		}
5455
5456		if (vhci_ioc_send_pathinfo(upibuf, kpibuf, num_paths,
5457		    pioc, mode, s)) {
5458			retval = EFAULT;
5459			vhci_ioc_free_pathinfo(upibuf, kpibuf, num_paths);
5460			break;
5461		}
5462
5463		/* Free the memory allocated for path information */
5464		vhci_ioc_free_pathinfo(upibuf, kpibuf, num_paths);
5465		break;
5466	}
5467
5468	case SCSI_VHCI_GET_CLIENT_NAME:
5469	{
5470		dev_info_t		*cdip, *pdip;
5471
5472		/* Get PHCI path and device address from user land */
5473		if (vhci_ioc_get_phci_path(pioc, phci_path, mode, s) ||
5474		    vhci_ioc_get_paddr(pioc, paddr, mode, s)) {
5475			retval = EFAULT;
5476			break;
5477		}
5478
5479		VHCI_DEBUG(6, (CE_WARN, vdip, "!vhci_ioctl: ioctl <%s> "
5480		    "phci <%s>, paddr <%s>", s, phci_path, paddr));
5481
5482		/* Get the PHCI dip */
5483		if ((pdip = mdi_phci_path2devinfo(vdip, phci_path)) == NULL) {
5484			VHCI_DEBUG(1, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5485			    "phci dip doesn't exist. invalid path <%s>",
5486			    s, phci_path));
5487			retval = ENXIO;
5488			break;
5489		}
5490
5491		if ((pip = mdi_pi_find(pdip, NULL, paddr)) == NULL) {
5492			VHCI_DEBUG(1, (CE_WARN, vdip, "!vhci_ioctl: ioctl <%s> "
5493			    "pathinfo doesn't exist. invalid device addr", s));
5494			retval = ENXIO;
5495			break;
5496		}
5497
5498		/* Get the client device pathname and send to userland */
5499		cdip = mdi_pi_get_client(pip);
5500		vhci_ioc_devi_to_path(cdip, client_path);
5501
5502		VHCI_DEBUG(6, (CE_WARN, vdip, "!vhci_ioctl: ioctl <%s> "
5503		    "client <%s>", s, client_path));
5504
5505		if (vhci_ioc_send_client_path(client_path, pioc, mode, s)) {
5506			retval = EFAULT;
5507			break;
5508		}
5509		break;
5510	}
5511
5512	case SCSI_VHCI_PATH_ONLINE:
5513	case SCSI_VHCI_PATH_OFFLINE:
5514	case SCSI_VHCI_PATH_STANDBY:
5515	case SCSI_VHCI_PATH_TEST:
5516	{
5517		dev_info_t		*pdip;	/* PHCI dip */
5518
5519		/* Get PHCI path and device address from user land */
5520		if (vhci_ioc_get_phci_path(pioc, phci_path, mode, s) ||
5521		    vhci_ioc_get_paddr(pioc, paddr, mode, s)) {
5522			retval = EFAULT;
5523			break;
5524		}
5525
5526		VHCI_DEBUG(6, (CE_WARN, vdip, "!vhci_ioctl: ioctl <%s> "
5527		    "phci <%s>, paddr <%s>", s, phci_path, paddr));
5528
5529		/* Get the PHCI dip */
5530		if ((pdip = mdi_phci_path2devinfo(vdip, phci_path)) == NULL) {
5531			VHCI_DEBUG(1, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5532			    "phci dip doesn't exist. invalid path <%s>",
5533			    s, phci_path));
5534			retval = ENXIO;
5535			break;
5536		}
5537
5538		if ((pip = mdi_pi_find(pdip, NULL, paddr)) == NULL) {
5539			VHCI_DEBUG(1, (CE_WARN, vdip, "!vhci_ioctl: ioctl <%s> "
5540			    "pathinfo doesn't exist. invalid device addr", s));
5541			retval = ENXIO;
5542			break;
5543		}
5544
5545		VHCI_DEBUG(6, (CE_WARN, vdip, "!vhci_ioctl: ioctl <%s> "
5546		    "Calling MDI function to change device state", s));
5547
5548		switch (cmd) {
5549		case SCSI_VHCI_PATH_ONLINE:
5550			retval = mdi_pi_online(pip, 0);
5551			break;
5552
5553		case SCSI_VHCI_PATH_OFFLINE:
5554			retval = mdi_pi_offline(pip, 0);
5555			break;
5556
5557		case SCSI_VHCI_PATH_STANDBY:
5558			retval = mdi_pi_standby(pip, 0);
5559			break;
5560
5561		case SCSI_VHCI_PATH_TEST:
5562			break;
5563		}
5564		break;
5565	}
5566
5567	case SCSI_VHCI_SWITCH_TO_CNTLR:
5568	{
5569		dev_info_t *cdip;
5570		struct scsi_device *devp;
5571
5572		/* Get the client device pathname */
5573		if (ddi_copyin(piocsc->client, client_path,
5574		    MAXPATHLEN, mode)) {
5575			VHCI_DEBUG(2, (CE_WARN, vdip, "!vhci_ioctl: ioctl <%s> "
5576			    "client_path copyin failed", s));
5577			retval = EFAULT;
5578			break;
5579		}
5580
5581		/* Get the path class to which user wants to switch */
5582		if (ddi_copyin(piocsc->class, paddr, MAXNAMELEN, mode)) {
5583			VHCI_DEBUG(2, (CE_WARN, vdip, "!vhci_ioctl: ioctl <%s> "
5584			    "controller_class copyin failed", s));
5585			retval = EFAULT;
5586			break;
5587		}
5588
5589		/* Perform validity checks */
5590		if ((cdip = mdi_client_path2devinfo(vdip,
5591		    client_path)) == NULL) {
5592			VHCI_DEBUG(1, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5593			    "client dip doesn't exist. invalid path <%s>",
5594			    s, client_path));
5595			retval = ENXIO;
5596			break;
5597		}
5598
5599		VHCI_DEBUG(6, (CE_WARN, vdip, "!vhci_ioctl: Calling MDI func "
5600		    "to switch controller"));
5601		VHCI_DEBUG(6, (CE_WARN, vdip, "!vhci_ioctl: client <%s> "
5602		    "class <%s>", client_path, paddr));
5603
5604		if (strcmp(paddr, PCLASS_PRIMARY) &&
5605		    strcmp(paddr, PCLASS_SECONDARY)) {
5606			VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5607			    "invalid path class <%s>", s, paddr));
5608			retval = ENXIO;
5609			break;
5610		}
5611
5612		devp = ddi_get_driver_private(cdip);
5613		if (devp == NULL) {
5614			VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5615			    "invalid scsi device <%s>", s, client_path));
5616			retval = ENXIO;
5617			break;
5618		}
5619		vlun = ADDR2VLUN(&devp->sd_address);
5620		ASSERT(vlun);
5621
5622		/*
5623		 * Checking to see if device has only one pclass, PRIMARY.
5624		 * If so this device doesn't support failovers.  Assumed
5625		 * that the devices with one pclass is PRIMARY, as thats the
5626		 * case today.  If this is not true and in future other
5627		 * symmetric devices are supported with other pclass, this
5628		 * IOCTL shall have to be overhauled anyways as now the only
5629		 * arguments it accepts are PRIMARY and SECONDARY.
5630		 */
5631		fo = vlun->svl_fops;
5632		if ((*fo->sfo_pathclass_next)(PCLASS_PRIMARY, &pclass,
5633		    vlun->svl_fops_ctpriv)) {
5634			retval = ENOTSUP;
5635			break;
5636		}
5637
5638		VHCI_HOLD_LUN(vlun, VH_SLEEP, held);
5639		mutex_enter(&vlun->svl_mutex);
5640		if (vlun->svl_active_pclass != NULL) {
5641			if (strcmp(vlun->svl_active_pclass, paddr) == 0) {
5642				mutex_exit(&vlun->svl_mutex);
5643				retval = EALREADY;
5644				VHCI_RELEASE_LUN(vlun);
5645				break;
5646			}
5647		}
5648		mutex_exit(&vlun->svl_mutex);
5649		/* Call mdi function to cause  a switch over */
5650		retval = mdi_failover(vdip, cdip, MDI_FAILOVER_SYNC);
5651		if (retval == MDI_SUCCESS) {
5652			retval = 0;
5653		} else if (retval == MDI_BUSY) {
5654			retval = EBUSY;
5655		} else {
5656			retval = EIO;
5657		}
5658		VHCI_RELEASE_LUN(vlun);
5659		break;
5660	}
5661
5662	case SCSI_VHCI_PATH_ENABLE:
5663	case SCSI_VHCI_PATH_DISABLE:
5664	{
5665		dev_info_t	*cdip, *pdip;
5666
5667		/*
5668		 * Get client device path from user land
5669		 */
5670		if (vhci_ioc_get_client_path(pioc, client_path, mode, s)) {
5671			retval = EFAULT;
5672			break;
5673		}
5674
5675		/*
5676		 * Get Phci device path from user land
5677		 */
5678		if (vhci_ioc_get_phci_path(pioc, phci_path, mode, s)) {
5679			retval = EFAULT;
5680			break;
5681		}
5682
5683		/*
5684		 * Get the devinfo for the Phci.
5685		 */
5686		if ((pdip = mdi_phci_path2devinfo(vdip, phci_path)) == NULL) {
5687			VHCI_DEBUG(1, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5688			    "phci dip doesn't exist. invalid path <%s>",
5689			    s, phci_path));
5690			retval = ENXIO;
5691			break;
5692		}
5693
5694		/*
5695		 * If the client path is set to /scsi_vhci then we need
5696		 * to do the operation on all the clients so set cdip to NULL.
5697		 * Else, try to get the client dip.
5698		 */
5699		if (strcmp(client_path, "/scsi_vhci") == 0) {
5700			cdip = NULL;
5701		} else {
5702			if ((cdip = mdi_client_path2devinfo(vdip,
5703			    client_path)) == NULL) {
5704				retval = ENXIO;
5705				VHCI_DEBUG(1, (CE_WARN, NULL,
5706				    "!vhci_ioctl: ioctl <%s> client dip "
5707				    "doesn't exist. invalid path <%s>",
5708				    s, client_path));
5709				break;
5710			}
5711		}
5712
5713		if (cmd == SCSI_VHCI_PATH_ENABLE)
5714			retval = mdi_pi_enable(cdip, pdip, USER_DISABLE);
5715		else
5716			retval = mdi_pi_disable(cdip, pdip, USER_DISABLE);
5717
5718		break;
5719	}
5720
5721	case SCSI_VHCI_GET_TARGET_LONGNAME:
5722	{
5723		uint_t		pid = pioc->buf_elem;
5724		char		*target_port;
5725		mod_hash_val_t	hv;
5726
5727		/* targetmap lookup of 'target-port' by <pid> */
5728		if (mod_hash_find(vhci_targetmap_bypid,
5729		    (mod_hash_key_t)(uintptr_t)pid, &hv) != 0) {
5730			/*
5731			 * NOTE: failure to find the mapping is OK for guid
5732			 * based 'target-port' values.
5733			 */
5734			VHCI_DEBUG(3, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5735			    "targetport mapping doesn't exist: pid %d",
5736			    s, pid));
5737			retval = ENXIO;
5738			break;
5739		}
5740
5741		/* copyout 'target-port' result */
5742		target_port = (char *)hv;
5743		if (copyoutstr(target_port, pioc->addr, MAXNAMELEN, NULL)) {
5744			VHCI_DEBUG(1, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5745			    "targetport copyout failed: len: %d",
5746			    s, (int)strlen(target_port)));
5747			retval = EFAULT;
5748		}
5749		break;
5750	}
5751
5752#ifdef	DEBUG
5753	case SCSI_VHCI_CONFIGURE_PHCI:
5754	{
5755		dev_info_t		*pdip;
5756
5757		/* Get PHCI path and device address from user land */
5758		if (vhci_ioc_get_phci_path(pioc, phci_path, mode, s)) {
5759			retval = EFAULT;
5760			break;
5761		}
5762
5763		VHCI_DEBUG(6, (CE_WARN, vdip, "!vhci_ioctl: ioctl <%s> "
5764		    "phci <%s>", s, phci_path));
5765
5766		/* Get the PHCI dip */
5767		if ((pdip = e_ddi_hold_devi_by_path(phci_path, 0)) == NULL) {
5768			VHCI_DEBUG(3, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5769			    "phci dip doesn't exist. invalid path <%s>",
5770			    s, phci_path));
5771			retval = ENXIO;
5772			break;
5773		}
5774
5775		if (ndi_devi_config(pdip,
5776		    NDI_DEVFS_CLEAN|NDI_DEVI_PERSIST) != NDI_SUCCESS) {
5777			retval = EIO;
5778		}
5779
5780		ddi_release_devi(pdip);
5781		break;
5782	}
5783
5784	case SCSI_VHCI_UNCONFIGURE_PHCI:
5785	{
5786		dev_info_t		*pdip;
5787
5788		/* Get PHCI path and device address from user land */
5789		if (vhci_ioc_get_phci_path(pioc, phci_path, mode, s)) {
5790			retval = EFAULT;
5791			break;
5792		}
5793
5794		VHCI_DEBUG(6, (CE_WARN, vdip, "!vhci_ioctl: ioctl <%s> "
5795		    "phci <%s>", s, phci_path));
5796
5797		/* Get the PHCI dip */
5798		if ((pdip = e_ddi_hold_devi_by_path(phci_path, 0)) == NULL) {
5799			VHCI_DEBUG(3, (CE_WARN, NULL, "!vhci_ioctl: ioctl <%s> "
5800			    "phci dip doesn't exist. invalid path <%s>",
5801			    s, phci_path));
5802			retval = ENXIO;
5803			break;
5804		}
5805
5806		if (ndi_devi_unconfig(pdip,
5807		    NDI_DEVI_REMOVE|NDI_DEVFS_CLEAN) != NDI_SUCCESS) {
5808			retval = EBUSY;
5809		}
5810
5811		ddi_release_devi(pdip);
5812		break;
5813	}
5814#endif
5815	}
5816
5817end:
5818	/* Free the memory allocated above */
5819	if (phci_path != NULL) {
5820		kmem_free(phci_path, MAXPATHLEN);
5821	}
5822	if (client_path != NULL) {
5823		kmem_free(client_path, MAXPATHLEN);
5824	}
5825	if (paddr != NULL) {
5826		kmem_free(paddr, MAXNAMELEN);
5827	}
5828	return (retval);
5829}
5830
5831/*
5832 * devctl IOCTL support for client device DR
5833 */
5834/* ARGSUSED */
5835int
5836vhci_devctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
5837    int *rvalp)
5838{
5839	dev_info_t *self;
5840	dev_info_t *child;
5841	scsi_hba_tran_t *hba;
5842	struct devctl_iocdata *dcp;
5843	struct scsi_vhci *vhci;
5844	int rv = 0;
5845	int retval = 0;
5846	scsi_vhci_priv_t *svp;
5847	mdi_pathinfo_t  *pip;
5848
5849	if ((vhci = ddi_get_soft_state(vhci_softstate,
5850	    MINOR2INST(getminor(dev)))) == NULL)
5851		return (ENXIO);
5852
5853	/*
5854	 * check if :devctl minor device has been opened
5855	 */
5856	mutex_enter(&vhci->vhci_mutex);
5857	if ((vhci->vhci_state & VHCI_STATE_OPEN) == 0) {
5858		mutex_exit(&vhci->vhci_mutex);
5859		return (ENXIO);
5860	}
5861	mutex_exit(&vhci->vhci_mutex);
5862
5863	self = vhci->vhci_dip;
5864	hba = ddi_get_driver_private(self);
5865	if (hba == NULL)
5866		return (ENXIO);
5867
5868	/*
5869	 * We can use the generic implementation for these ioctls
5870	 */
5871	switch (cmd) {
5872	case DEVCTL_DEVICE_GETSTATE:
5873	case DEVCTL_DEVICE_ONLINE:
5874	case DEVCTL_DEVICE_OFFLINE:
5875	case DEVCTL_DEVICE_REMOVE:
5876	case DEVCTL_BUS_GETSTATE:
5877		return (ndi_devctl_ioctl(self, cmd, arg, mode, 0));
5878	}
5879
5880	/*
5881	 * read devctl ioctl data
5882	 */
5883	if (ndi_dc_allochdl((void *)arg, &dcp) != NDI_SUCCESS)
5884		return (EFAULT);
5885
5886	switch (cmd) {
5887
5888	case DEVCTL_DEVICE_RESET:
5889		/*
5890		 * lookup and hold child device
5891		 */
5892		if ((child = ndi_devi_find(self, ndi_dc_getname(dcp),
5893		    ndi_dc_getaddr(dcp))) == NULL) {
5894			rv = ENXIO;
5895			break;
5896		}
5897		retval = mdi_select_path(child, NULL,
5898		    (MDI_SELECT_ONLINE_PATH | MDI_SELECT_STANDBY_PATH),
5899		    NULL, &pip);
5900		if ((retval != MDI_SUCCESS) || (pip == NULL)) {
5901			VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_ioctl:"
5902			    "Unable to get a path, dip 0x%p", (void *)child));
5903			rv = ENXIO;
5904			break;
5905		}
5906		svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(pip);
5907		if (vhci_recovery_reset(svp->svp_svl,
5908		    &svp->svp_psd->sd_address, TRUE,
5909		    VHCI_DEPTH_TARGET) == 0) {
5910			VHCI_DEBUG(1, (CE_NOTE, NULL,
5911			    "!vhci_ioctl(pip:%p): "
5912			    "reset failed\n", (void *)pip));
5913			rv = ENXIO;
5914		}
5915		mdi_rele_path(pip);
5916		break;
5917
5918	case DEVCTL_BUS_QUIESCE:
5919	case DEVCTL_BUS_UNQUIESCE:
5920	case DEVCTL_BUS_RESET:
5921	case DEVCTL_BUS_RESETALL:
5922#ifdef	DEBUG
5923	case DEVCTL_BUS_CONFIGURE:
5924	case DEVCTL_BUS_UNCONFIGURE:
5925#endif
5926		rv = ENOTSUP;
5927		break;
5928
5929	default:
5930		rv = ENOTTY;
5931	} /* end of outer switch */
5932
5933	ndi_dc_freehdl(dcp);
5934	return (rv);
5935}
5936
5937/*
5938 * Routine to get the PHCI pathname from ioctl structures in userland
5939 */
5940/* ARGSUSED */
5941static int
5942vhci_ioc_get_phci_path(sv_iocdata_t *pioc, caddr_t phci_path,
5943	int mode, caddr_t s)
5944{
5945	int retval = 0;
5946
5947	if (ddi_copyin(pioc->phci, phci_path, MAXPATHLEN, mode)) {
5948		VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_ioc_get_phci: ioctl <%s> "
5949		    "phci_path copyin failed", s));
5950		retval = EFAULT;
5951	}
5952	return (retval);
5953
5954}
5955
5956
5957/*
5958 * Routine to get the Client device pathname from ioctl structures in userland
5959 */
5960/* ARGSUSED */
5961static int
5962vhci_ioc_get_client_path(sv_iocdata_t *pioc, caddr_t client_path,
5963	int mode, caddr_t s)
5964{
5965	int retval = 0;
5966
5967	if (ddi_copyin(pioc->client, client_path, MAXPATHLEN, mode)) {
5968		VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_ioc_get_client: "
5969		    "ioctl <%s> client_path copyin failed", s));
5970		retval = EFAULT;
5971	}
5972	return (retval);
5973}
5974
5975
5976/*
5977 * Routine to get physical device address from ioctl structure in userland
5978 */
5979/* ARGSUSED */
5980static int
5981vhci_ioc_get_paddr(sv_iocdata_t *pioc, caddr_t paddr, int mode, caddr_t s)
5982{
5983	int retval = 0;
5984
5985	if (ddi_copyin(pioc->addr, paddr, MAXNAMELEN, mode)) {
5986		VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_ioc_get_paddr: "
5987		    "ioctl <%s> device addr copyin failed", s));
5988		retval = EFAULT;
5989	}
5990	return (retval);
5991}
5992
5993
5994/*
5995 * Routine to send client device pathname to userland.
5996 */
5997/* ARGSUSED */
5998static int
5999vhci_ioc_send_client_path(caddr_t client_path, sv_iocdata_t *pioc,
6000	int mode, caddr_t s)
6001{
6002	int retval = 0;
6003
6004	if (ddi_copyout(client_path, pioc->client, MAXPATHLEN, mode)) {
6005		VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_ioc_send_client: "
6006		    "ioctl <%s> client_path copyout failed", s));
6007		retval = EFAULT;
6008	}
6009	return (retval);
6010}
6011
6012
6013/*
6014 * Routine to translated dev_info pointer (dip) to device pathname.
6015 */
6016static void
6017vhci_ioc_devi_to_path(dev_info_t *dip, caddr_t path)
6018{
6019	(void) ddi_pathname(dip, path);
6020}
6021
6022
6023/*
6024 * vhci_get_phci_path_list:
6025 *		get information about devices associated with a
6026 *		given PHCI device.
6027 *
6028 * Return Values:
6029 *		path information elements
6030 */
6031int
6032vhci_get_phci_path_list(dev_info_t *pdip, sv_path_info_t *pibuf,
6033	uint_t num_elems)
6034{
6035	uint_t			count, done;
6036	mdi_pathinfo_t		*pip;
6037	sv_path_info_t		*ret_pip;
6038	int			status;
6039	size_t			prop_size;
6040	int			circular;
6041
6042	/*
6043	 * Get the PHCI structure and retrieve the path information
6044	 * from the GUID hash table.
6045	 */
6046
6047	ret_pip = pibuf;
6048	count = 0;
6049
6050	ndi_devi_enter(pdip, &circular);
6051
6052	done = (count >= num_elems);
6053	pip = mdi_get_next_client_path(pdip, NULL);
6054	while (pip && !done) {
6055		mdi_pi_lock(pip);
6056		(void) ddi_pathname(mdi_pi_get_phci(pip),
6057		    ret_pip->device.ret_phci);
6058		(void) strcpy(ret_pip->ret_addr, mdi_pi_get_addr(pip));
6059		(void) mdi_pi_get_state2(pip, &ret_pip->ret_state,
6060		    &ret_pip->ret_ext_state);
6061
6062		status = mdi_prop_size(pip, &prop_size);
6063		if (status == MDI_SUCCESS && ret_pip->ret_prop.ret_buf_size) {
6064			*ret_pip->ret_prop.ret_buf_size = (uint_t)prop_size;
6065		}
6066
6067#ifdef DEBUG
6068		if (status != MDI_SUCCESS) {
6069			VHCI_DEBUG(2, (CE_WARN, NULL,
6070			    "!vhci_get_phci_path_list: "
6071			    "phci <%s>, prop size failure 0x%x",
6072			    ret_pip->device.ret_phci, status));
6073		}
6074#endif /* DEBUG */
6075
6076
6077		if (status == MDI_SUCCESS && ret_pip->ret_prop.buf &&
6078		    prop_size && ret_pip->ret_prop.buf_size >= prop_size) {
6079			status = mdi_prop_pack(pip,
6080			    &ret_pip->ret_prop.buf,
6081			    ret_pip->ret_prop.buf_size);
6082
6083#ifdef DEBUG
6084			if (status != MDI_SUCCESS) {
6085				VHCI_DEBUG(2, (CE_WARN, NULL,
6086				    "!vhci_get_phci_path_list: "
6087				    "phci <%s>, prop pack failure 0x%x",
6088				    ret_pip->device.ret_phci, status));
6089			}
6090#endif /* DEBUG */
6091		}
6092
6093		mdi_pi_unlock(pip);
6094		pip = mdi_get_next_client_path(pdip, pip);
6095		ret_pip++;
6096		count++;
6097		done = (count >= num_elems);
6098	}
6099
6100	ndi_devi_exit(pdip, circular);
6101
6102	return (MDI_SUCCESS);
6103}
6104
6105
6106/*
6107 * vhci_get_client_path_list:
6108 *		get information about various paths associated with a
6109 *		given client device.
6110 *
6111 * Return Values:
6112 *		path information elements
6113 */
6114int
6115vhci_get_client_path_list(dev_info_t *cdip, sv_path_info_t *pibuf,
6116	uint_t num_elems)
6117{
6118	uint_t			count, done;
6119	mdi_pathinfo_t		*pip;
6120	sv_path_info_t		*ret_pip;
6121	int			status;
6122	size_t			prop_size;
6123	int			circular;
6124
6125	ret_pip = pibuf;
6126	count = 0;
6127
6128	ndi_devi_enter(cdip, &circular);
6129
6130	done = (count >= num_elems);
6131	pip = mdi_get_next_phci_path(cdip, NULL);
6132	while (pip && !done) {
6133		mdi_pi_lock(pip);
6134		(void) ddi_pathname(mdi_pi_get_phci(pip),
6135		    ret_pip->device.ret_phci);
6136		(void) strcpy(ret_pip->ret_addr, mdi_pi_get_addr(pip));
6137		(void) mdi_pi_get_state2(pip, &ret_pip->ret_state,
6138		    &ret_pip->ret_ext_state);
6139
6140		status = mdi_prop_size(pip, &prop_size);
6141		if (status == MDI_SUCCESS && ret_pip->ret_prop.ret_buf_size) {
6142			*ret_pip->ret_prop.ret_buf_size = (uint_t)prop_size;
6143		}
6144
6145#ifdef DEBUG
6146		if (status != MDI_SUCCESS) {
6147			VHCI_DEBUG(2, (CE_WARN, NULL,
6148			    "!vhci_get_client_path_list: "
6149			    "phci <%s>, prop size failure 0x%x",
6150			    ret_pip->device.ret_phci, status));
6151		}
6152#endif /* DEBUG */
6153
6154
6155		if (status == MDI_SUCCESS && ret_pip->ret_prop.buf &&
6156		    prop_size && ret_pip->ret_prop.buf_size >= prop_size) {
6157			status = mdi_prop_pack(pip,
6158			    &ret_pip->ret_prop.buf,
6159			    ret_pip->ret_prop.buf_size);
6160
6161#ifdef DEBUG
6162			if (status != MDI_SUCCESS) {
6163				VHCI_DEBUG(2, (CE_WARN, NULL,
6164				    "!vhci_get_client_path_list: "
6165				    "phci <%s>, prop pack failure 0x%x",
6166				    ret_pip->device.ret_phci, status));
6167			}
6168#endif /* DEBUG */
6169		}
6170
6171		mdi_pi_unlock(pip);
6172		pip = mdi_get_next_phci_path(cdip, pip);
6173		ret_pip++;
6174		count++;
6175		done = (count >= num_elems);
6176	}
6177
6178	ndi_devi_exit(cdip, circular);
6179
6180	return (MDI_SUCCESS);
6181}
6182
6183
6184/*
6185 * Routine to get ioctl argument structure from userland.
6186 */
6187/* ARGSUSED */
6188static int
6189vhci_get_iocdata(const void *data, sv_iocdata_t *pioc, int mode, caddr_t s)
6190{
6191	int	retval = 0;
6192
6193#ifdef  _MULTI_DATAMODEL
6194	switch (ddi_model_convert_from(mode & FMODELS)) {
6195	case DDI_MODEL_ILP32:
6196	{
6197		sv_iocdata32_t	ioc32;
6198
6199		if (ddi_copyin(data, &ioc32, sizeof (ioc32), mode)) {
6200			retval = EFAULT;
6201			break;
6202		}
6203		pioc->client	= (caddr_t)(uintptr_t)ioc32.client;
6204		pioc->phci	= (caddr_t)(uintptr_t)ioc32.phci;
6205		pioc->addr	= (caddr_t)(uintptr_t)ioc32.addr;
6206		pioc->buf_elem	= (uint_t)ioc32.buf_elem;
6207		pioc->ret_buf	= (sv_path_info_t *)(uintptr_t)ioc32.ret_buf;
6208		pioc->ret_elem	= (uint_t *)(uintptr_t)ioc32.ret_elem;
6209		break;
6210	}
6211
6212	case DDI_MODEL_NONE:
6213		if (ddi_copyin(data, pioc, sizeof (*pioc), mode)) {
6214			retval = EFAULT;
6215			break;
6216		}
6217		break;
6218	}
6219#else   /* _MULTI_DATAMODEL */
6220	if (ddi_copyin(data, pioc, sizeof (*pioc), mode)) {
6221		retval = EFAULT;
6222	}
6223#endif  /* _MULTI_DATAMODEL */
6224
6225#ifdef DEBUG
6226	if (retval) {
6227		VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_get_ioc: cmd <%s> "
6228		    "iocdata copyin failed", s));
6229	}
6230#endif
6231
6232	return (retval);
6233}
6234
6235
6236/*
6237 * Routine to get the ioctl argument for ioctl causing controller switchover.
6238 */
6239/* ARGSUSED */
6240static int
6241vhci_get_iocswitchdata(const void *data, sv_switch_to_cntlr_iocdata_t *piocsc,
6242    int mode, caddr_t s)
6243{
6244	int	retval = 0;
6245
6246#ifdef  _MULTI_DATAMODEL
6247	switch (ddi_model_convert_from(mode & FMODELS)) {
6248	case DDI_MODEL_ILP32:
6249	{
6250		sv_switch_to_cntlr_iocdata32_t	ioc32;
6251
6252		if (ddi_copyin(data, &ioc32, sizeof (ioc32), mode)) {
6253			retval = EFAULT;
6254			break;
6255		}
6256		piocsc->client	= (caddr_t)(uintptr_t)ioc32.client;
6257		piocsc->class	= (caddr_t)(uintptr_t)ioc32.class;
6258		break;
6259	}
6260
6261	case DDI_MODEL_NONE:
6262		if (ddi_copyin(data, piocsc, sizeof (*piocsc), mode)) {
6263			retval = EFAULT;
6264		}
6265		break;
6266	}
6267#else   /* _MULTI_DATAMODEL */
6268	if (ddi_copyin(data, piocsc, sizeof (*piocsc), mode)) {
6269		retval = EFAULT;
6270	}
6271#endif  /* _MULTI_DATAMODEL */
6272
6273#ifdef DEBUG
6274	if (retval) {
6275		VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_get_ioc: cmd <%s> "
6276		    "switch_to_cntlr_iocdata copyin failed", s));
6277	}
6278#endif
6279
6280	return (retval);
6281}
6282
6283
6284/*
6285 * Routine to allocate memory for the path information structures.
6286 * It allocates two chunks of memory - one for keeping userland
6287 * pointers/values for path information and path properties, second for
6288 * keeping allocating kernel memory for path properties. These path
6289 * properties are finally copied to userland.
6290 */
6291/* ARGSUSED */
6292static int
6293vhci_ioc_alloc_pathinfo(sv_path_info_t **upibuf, sv_path_info_t **kpibuf,
6294    uint_t num_paths, sv_iocdata_t *pioc, int mode, caddr_t s)
6295{
6296	sv_path_info_t	*pi;
6297	uint_t		bufsize;
6298	int		retval = 0;
6299	int		index;
6300
6301	/* Allocate memory */
6302	*upibuf = (sv_path_info_t *)
6303	    kmem_zalloc(sizeof (sv_path_info_t) * num_paths, KM_SLEEP);
6304	ASSERT(*upibuf != NULL);
6305	*kpibuf = (sv_path_info_t *)
6306	    kmem_zalloc(sizeof (sv_path_info_t) * num_paths, KM_SLEEP);
6307	ASSERT(*kpibuf != NULL);
6308
6309	/*
6310	 * Get the path info structure from the user space.
6311	 * We are interested in the following fields:
6312	 *	- user size of buffer for per path properties.
6313	 *	- user address of buffer for path info properties.
6314	 *	- user pointer for returning actual buffer size
6315	 * Keep these fields in the 'upibuf' structures.
6316	 * Allocate buffer for per path info properties in kernel
6317	 * structure ('kpibuf').
6318	 * Size of these buffers will be equal to the size of buffers
6319	 * in the user space.
6320	 */
6321#ifdef  _MULTI_DATAMODEL
6322	switch (ddi_model_convert_from(mode & FMODELS)) {
6323	case DDI_MODEL_ILP32:
6324	{
6325		sv_path_info32_t	*src;
6326		sv_path_info32_t	pi32;
6327
6328		src  = (sv_path_info32_t *)pioc->ret_buf;
6329		pi = (sv_path_info_t *)*upibuf;
6330		for (index = 0; index < num_paths; index++, src++, pi++) {
6331			if (ddi_copyin(src, &pi32, sizeof (pi32), mode)) {
6332				retval = EFAULT;
6333				break;
6334			}
6335
6336			pi->ret_prop.buf_size	=
6337			    (uint_t)pi32.ret_prop.buf_size;
6338			pi->ret_prop.ret_buf_size =
6339			    (uint_t *)(uintptr_t)pi32.ret_prop.ret_buf_size;
6340			pi->ret_prop.buf	=
6341			    (caddr_t)(uintptr_t)pi32.ret_prop.buf;
6342		}
6343		break;
6344	}
6345
6346	case DDI_MODEL_NONE:
6347		if (ddi_copyin(pioc->ret_buf, *upibuf,
6348		    sizeof (sv_path_info_t) * num_paths, mode)) {
6349			retval = EFAULT;
6350		}
6351		break;
6352	}
6353#else   /* _MULTI_DATAMODEL */
6354	if (ddi_copyin(pioc->ret_buf, *upibuf,
6355	    sizeof (sv_path_info_t) * num_paths, mode)) {
6356		retval = EFAULT;
6357	}
6358#endif  /* _MULTI_DATAMODEL */
6359
6360	if (retval != 0) {
6361		VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_alloc_path_info: "
6362		    "ioctl <%s> normal: path_info copyin failed", s));
6363		kmem_free(*upibuf, sizeof (sv_path_info_t) * num_paths);
6364		kmem_free(*kpibuf, sizeof (sv_path_info_t) * num_paths);
6365		*upibuf = NULL;
6366		*kpibuf = NULL;
6367		return (retval);
6368	}
6369
6370	/*
6371	 * Allocate memory for per path properties.
6372	 */
6373	for (index = 0, pi = *kpibuf; index < num_paths; index++, pi++) {
6374		bufsize = (*upibuf)[index].ret_prop.buf_size;
6375
6376		if (bufsize && bufsize <= SV_PROP_MAX_BUF_SIZE) {
6377			pi->ret_prop.buf_size = bufsize;
6378			pi->ret_prop.buf = (caddr_t)
6379			    kmem_zalloc(bufsize, KM_SLEEP);
6380			ASSERT(pi->ret_prop.buf != NULL);
6381		} else {
6382			pi->ret_prop.buf_size = 0;
6383			pi->ret_prop.buf = NULL;
6384		}
6385
6386		if ((*upibuf)[index].ret_prop.ret_buf_size != NULL) {
6387			pi->ret_prop.ret_buf_size = (uint_t *)kmem_zalloc(
6388			    sizeof (*pi->ret_prop.ret_buf_size), KM_SLEEP);
6389			ASSERT(pi->ret_prop.ret_buf_size != NULL);
6390		} else {
6391			pi->ret_prop.ret_buf_size = NULL;
6392		}
6393	}
6394
6395	return (0);
6396}
6397
6398
6399/*
6400 * Routine to free memory for the path information structures.
6401 * This is the memory which was allocated earlier.
6402 */
6403/* ARGSUSED */
6404static void
6405vhci_ioc_free_pathinfo(sv_path_info_t *upibuf, sv_path_info_t *kpibuf,
6406    uint_t num_paths)
6407{
6408	sv_path_info_t	*pi;
6409	int		index;
6410
6411	/* Free memory for per path properties */
6412	for (index = 0, pi = kpibuf; index < num_paths; index++, pi++) {
6413		if (pi->ret_prop.ret_buf_size != NULL) {
6414			kmem_free(pi->ret_prop.ret_buf_size,
6415			    sizeof (*pi->ret_prop.ret_buf_size));
6416		}
6417
6418		if (pi->ret_prop.buf != NULL) {
6419			kmem_free(pi->ret_prop.buf, pi->ret_prop.buf_size);
6420		}
6421	}
6422
6423	/* Free memory for path info structures */
6424	kmem_free(upibuf, sizeof (sv_path_info_t) * num_paths);
6425	kmem_free(kpibuf, sizeof (sv_path_info_t) * num_paths);
6426}
6427
6428
6429/*
6430 * Routine to copy path information and path properties to userland.
6431 */
6432/* ARGSUSED */
6433static int
6434vhci_ioc_send_pathinfo(sv_path_info_t *upibuf, sv_path_info_t *kpibuf,
6435    uint_t num_paths, sv_iocdata_t *pioc, int mode, caddr_t s)
6436{
6437	int			retval = 0, index;
6438	sv_path_info_t		*upi_ptr;
6439	sv_path_info32_t	*upi32_ptr;
6440
6441#ifdef  _MULTI_DATAMODEL
6442	switch (ddi_model_convert_from(mode & FMODELS)) {
6443	case DDI_MODEL_ILP32:
6444		goto copy_32bit;
6445
6446	case DDI_MODEL_NONE:
6447		goto copy_normal;
6448	}
6449#else   /* _MULTI_DATAMODEL */
6450
6451	goto copy_normal;
6452
6453#endif  /* _MULTI_DATAMODEL */
6454
6455copy_normal:
6456
6457	/*
6458	 * Copy path information and path properties to user land.
6459	 * Pointer fields inside the path property structure were
6460	 * saved in the 'upibuf' structure earlier.
6461	 */
6462	upi_ptr = pioc->ret_buf;
6463	for (index = 0; index < num_paths; index++) {
6464		if (ddi_copyout(kpibuf[index].device.ret_ct,
6465		    upi_ptr[index].device.ret_ct, MAXPATHLEN, mode)) {
6466			retval = EFAULT;
6467			break;
6468		}
6469
6470		if (ddi_copyout(kpibuf[index].ret_addr,
6471		    upi_ptr[index].ret_addr, MAXNAMELEN, mode)) {
6472			retval = EFAULT;
6473			break;
6474		}
6475
6476		if (ddi_copyout(&kpibuf[index].ret_state,
6477		    &upi_ptr[index].ret_state, sizeof (kpibuf[index].ret_state),
6478		    mode)) {
6479			retval = EFAULT;
6480			break;
6481		}
6482
6483		if (ddi_copyout(&kpibuf[index].ret_ext_state,
6484		    &upi_ptr[index].ret_ext_state,
6485		    sizeof (kpibuf[index].ret_ext_state), mode)) {
6486			retval = EFAULT;
6487			break;
6488		}
6489
6490		if ((kpibuf[index].ret_prop.ret_buf_size != NULL) &&
6491		    ddi_copyout(kpibuf[index].ret_prop.ret_buf_size,
6492		    upibuf[index].ret_prop.ret_buf_size,
6493		    sizeof (*upibuf[index].ret_prop.ret_buf_size), mode)) {
6494			retval = EFAULT;
6495			break;
6496		}
6497
6498		if ((kpibuf[index].ret_prop.buf != NULL) &&
6499		    ddi_copyout(kpibuf[index].ret_prop.buf,
6500		    upibuf[index].ret_prop.buf,
6501		    upibuf[index].ret_prop.buf_size, mode)) {
6502			retval = EFAULT;
6503			break;
6504		}
6505	}
6506
6507#ifdef DEBUG
6508	if (retval) {
6509		VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_get_ioc: ioctl <%s> "
6510		    "normal: path_info copyout failed", s));
6511	}
6512#endif
6513
6514	return (retval);
6515
6516copy_32bit:
6517	/*
6518	 * Copy path information and path properties to user land.
6519	 * Pointer fields inside the path property structure were
6520	 * saved in the 'upibuf' structure earlier.
6521	 */
6522	upi32_ptr = (sv_path_info32_t *)pioc->ret_buf;
6523	for (index = 0; index < num_paths; index++) {
6524		if (ddi_copyout(kpibuf[index].device.ret_ct,
6525		    upi32_ptr[index].device.ret_ct, MAXPATHLEN, mode)) {
6526			retval = EFAULT;
6527			break;
6528		}
6529
6530		if (ddi_copyout(kpibuf[index].ret_addr,
6531		    upi32_ptr[index].ret_addr, MAXNAMELEN, mode)) {
6532			retval = EFAULT;
6533			break;
6534		}
6535
6536		if (ddi_copyout(&kpibuf[index].ret_state,
6537		    &upi32_ptr[index].ret_state,
6538		    sizeof (kpibuf[index].ret_state), mode)) {
6539			retval = EFAULT;
6540			break;
6541		}
6542
6543		if (ddi_copyout(&kpibuf[index].ret_ext_state,
6544		    &upi32_ptr[index].ret_ext_state,
6545		    sizeof (kpibuf[index].ret_ext_state), mode)) {
6546			retval = EFAULT;
6547			break;
6548		}
6549		if ((kpibuf[index].ret_prop.ret_buf_size != NULL) &&
6550		    ddi_copyout(kpibuf[index].ret_prop.ret_buf_size,
6551		    upibuf[index].ret_prop.ret_buf_size,
6552		    sizeof (*upibuf[index].ret_prop.ret_buf_size), mode)) {
6553			retval = EFAULT;
6554			break;
6555		}
6556
6557		if ((kpibuf[index].ret_prop.buf != NULL) &&
6558		    ddi_copyout(kpibuf[index].ret_prop.buf,
6559		    upibuf[index].ret_prop.buf,
6560		    upibuf[index].ret_prop.buf_size, mode)) {
6561			retval = EFAULT;
6562			break;
6563		}
6564	}
6565
6566#ifdef DEBUG
6567	if (retval) {
6568		VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_get_ioc: ioctl <%s> "
6569		    "normal: path_info copyout failed", s));
6570	}
6571#endif
6572
6573	return (retval);
6574}
6575
6576
6577/*
6578 * vhci_failover()
6579 * This routine expects VHCI_HOLD_LUN before being invoked.  It can be invoked
6580 * as MDI_FAILOVER_ASYNC or MDI_FAILOVER_SYNC.  For Asynchronous failovers
6581 * this routine shall VHCI_RELEASE_LUN on exiting.  For synchronous failovers
6582 * it is the callers responsibility to release lun.
6583 */
6584
6585/* ARGSUSED */
6586static int
6587vhci_failover(dev_info_t *vdip, dev_info_t *cdip, int flags)
6588{
6589	char			*guid;
6590	scsi_vhci_lun_t		*vlun = NULL;
6591	struct scsi_vhci	*vhci;
6592	mdi_pathinfo_t		*pip, *npip;
6593	char			*s_pclass, *pclass1, *pclass2, *pclass;
6594	char			active_pclass_copy[255], *active_pclass_ptr;
6595	char			*ptr1, *ptr2;
6596	mdi_pathinfo_state_t	pi_state;
6597	uint32_t		pi_ext_state;
6598	scsi_vhci_priv_t	*svp;
6599	struct scsi_device	*sd;
6600	struct scsi_failover_ops	*sfo;
6601	int			sps; /* mdi_select_path() status */
6602	int			activation_done = 0;
6603	int			rval, retval = MDI_FAILURE;
6604	int			reserve_pending, check_condition, UA_condition;
6605	struct scsi_pkt		*pkt;
6606	struct buf		*bp;
6607
6608	vhci = ddi_get_soft_state(vhci_softstate, ddi_get_instance(vdip));
6609	sd = ddi_get_driver_private(cdip);
6610	vlun = ADDR2VLUN(&sd->sd_address);
6611	ASSERT(vlun != 0);
6612	ASSERT(VHCI_LUN_IS_HELD(vlun));
6613	guid = vlun->svl_lun_wwn;
6614	VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_failover(1): guid %s\n", guid));
6615	vhci_log(CE_NOTE, vdip, "!Initiating failover for device %s "
6616	    "(GUID %s)", ddi_node_name(cdip), guid);
6617
6618	/*
6619	 * Lets maintain a local copy of the vlun->svl_active_pclass
6620	 * for the rest of the processing. Accessing the field
6621	 * directly in the loop below causes loop logic to break
6622	 * especially when the field gets updated by other threads
6623	 * update path status etc and causes 'paths are not currently
6624	 * available' condition to be declared prematurely.
6625	 */
6626	mutex_enter(&vlun->svl_mutex);
6627	if (vlun->svl_active_pclass != NULL) {
6628		(void) strlcpy(active_pclass_copy, vlun->svl_active_pclass,
6629		    sizeof (active_pclass_copy));
6630		active_pclass_ptr = &active_pclass_copy[0];
6631		mutex_exit(&vlun->svl_mutex);
6632		if (vhci_quiesce_paths(vdip, cdip, vlun, guid,
6633		    active_pclass_ptr) != 0) {
6634			retval = MDI_FAILURE;
6635		}
6636	} else {
6637		/*
6638		 * can happen only when the available path to device
6639		 * discovered is a STANDBY path.
6640		 */
6641		mutex_exit(&vlun->svl_mutex);
6642		active_pclass_copy[0] = '\0';
6643		active_pclass_ptr = NULL;
6644	}
6645
6646	sfo = vlun->svl_fops;
6647	ASSERT(sfo != NULL);
6648	pclass1 = s_pclass = active_pclass_ptr;
6649	VHCI_DEBUG(1, (CE_NOTE, NULL, "!(%s)failing over from %s\n", guid,
6650	    (s_pclass == NULL ? "<none>" : s_pclass)));
6651
6652next_pathclass:
6653
6654	rval = (*sfo->sfo_pathclass_next)(pclass1, &pclass2,
6655	    vlun->svl_fops_ctpriv);
6656	if (rval == ENOENT) {
6657		if (s_pclass == NULL) {
6658			VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_failover(4)(%s): "
6659			    "failed, no more pathclasses\n", guid));
6660			goto done;
6661		} else {
6662			(*sfo->sfo_pathclass_next)(NULL, &pclass2,
6663			    vlun->svl_fops_ctpriv);
6664		}
6665	} else if (rval == EINVAL) {
6666		vhci_log(CE_NOTE, vdip, "!Failover operation failed for "
6667		    "device %s (GUID %s): Invalid path-class %s",
6668		    ddi_node_name(cdip), guid,
6669		    ((pclass1 == NULL) ? "<none>" : pclass1));
6670		goto done;
6671	}
6672	if ((s_pclass != NULL) && (strcmp(pclass2, s_pclass) == 0)) {
6673		/*
6674		 * paths are not currently available
6675		 */
6676		vhci_log(CE_NOTE, vdip, "!Failover path currently unavailable"
6677		    " for device %s (GUID %s)",
6678		    ddi_node_name(cdip), guid);
6679		goto done;
6680	}
6681	pip = npip = NULL;
6682	VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_failover(5.2)(%s): considering "
6683	    "%s as failover destination\n", guid, pclass2));
6684	sps = mdi_select_path(cdip, NULL, MDI_SELECT_STANDBY_PATH, NULL, &npip);
6685	if ((npip == NULL) || (sps != MDI_SUCCESS)) {
6686		VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_failover(%s): no "
6687		    "STANDBY paths found (status:%x)!\n", guid, sps));
6688		pclass1 = pclass2;
6689		goto next_pathclass;
6690	}
6691	do {
6692		pclass = NULL;
6693		if ((mdi_prop_lookup_string(npip, "path-class",
6694		    &pclass) != MDI_SUCCESS) || (strcmp(pclass2,
6695		    pclass) != 0)) {
6696			VHCI_DEBUG(1, (CE_NOTE, NULL,
6697			    "!vhci_failover(5.5)(%s): skipping path "
6698			    "%p(%s)...\n", guid, (void *)npip, pclass));
6699			pip = npip;
6700			sps = mdi_select_path(cdip, NULL,
6701			    MDI_SELECT_STANDBY_PATH, pip, &npip);
6702			mdi_rele_path(pip);
6703			(void) mdi_prop_free(pclass);
6704			continue;
6705		}
6706		svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(npip);
6707
6708		/*
6709		 * Issue READ at non-zer block on this STANDBY path.
6710		 * Purple returns
6711		 * 1. RESERVATION_CONFLICT if reservation is pending
6712		 * 2. POR check condition if it reset happened.
6713		 * 2. failover Check Conditions if one is already in progress.
6714		 */
6715		reserve_pending = 0;
6716		check_condition = 0;
6717		UA_condition = 0;
6718
6719		bp = scsi_alloc_consistent_buf(&svp->svp_psd->sd_address,
6720		    (struct buf *)NULL, DEV_BSIZE, B_READ, NULL, NULL);
6721		if (!bp) {
6722			VHCI_DEBUG(1, (CE_NOTE, NULL,
6723			    "vhci_failover !No resources (buf)\n"));
6724			mdi_rele_path(npip);
6725			goto done;
6726		}
6727		pkt = scsi_init_pkt(&svp->svp_psd->sd_address, NULL, bp,
6728		    CDB_GROUP1, sizeof (struct scsi_arq_status), 0,
6729		    PKT_CONSISTENT, NULL, NULL);
6730		if (pkt) {
6731			(void) scsi_setup_cdb((union scsi_cdb *)(uintptr_t)
6732			    pkt->pkt_cdbp, SCMD_READ, 1, 1, 0);
6733			pkt->pkt_flags = FLAG_NOINTR;
6734check_path_again:
6735			pkt->pkt_time = 3*30;
6736			if (scsi_transport(pkt) == TRAN_ACCEPT) {
6737				switch (pkt->pkt_reason) {
6738				case CMD_CMPLT:
6739					switch (SCBP_C(pkt)) {
6740					case STATUS_GOOD:
6741						/* Already failed over */
6742						activation_done = 1;
6743						break;
6744					case STATUS_RESERVATION_CONFLICT:
6745						reserve_pending = 1;
6746						break;
6747					case STATUS_CHECK:
6748						check_condition = 1;
6749						break;
6750					}
6751				}
6752			}
6753			if (check_condition &&
6754			    (pkt->pkt_state & STATE_ARQ_DONE)) {
6755				struct scsi_extended_sense *sns =
6756				    &(((struct scsi_arq_status *)(uintptr_t)
6757				    (pkt->pkt_scbp))->sts_sensedata);
6758				if (sns->es_key == KEY_UNIT_ATTENTION &&
6759				    sns->es_add_code == 0x29) {
6760					/* Already failed over */
6761					VHCI_DEBUG(1, (CE_NOTE, NULL,
6762					    "!vhci_failover(7)(%s): "
6763					    "path 0x%p POR UA condition\n",
6764					    guid, (void *)npip));
6765					if (UA_condition == 0) {
6766						UA_condition = 1;
6767						goto check_path_again;
6768					}
6769				} else {
6770					activation_done = 0;
6771					VHCI_DEBUG(1, (CE_NOTE, NULL,
6772					    "!vhci_failover(%s): path 0x%p "
6773					    "unhandled chkcond %x %x %x\n",
6774					    guid, (void *)npip, sns->es_key,
6775					    sns->es_add_code,
6776					    sns->es_qual_code));
6777				}
6778			}
6779			scsi_destroy_pkt(pkt);
6780		}
6781		scsi_free_consistent_buf(bp);
6782
6783		if (activation_done) {
6784			mdi_rele_path(npip);
6785			VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_failover(7)(%s): "
6786			    "path 0x%p already failedover\n", guid,
6787			    (void *)npip));
6788			break;
6789		}
6790		if (reserve_pending && (vlun->svl_xlf_capable == 0)) {
6791			(void) vhci_recovery_reset(vlun,
6792			    &svp->svp_psd->sd_address,
6793			    FALSE, VHCI_DEPTH_ALL);
6794		}
6795		VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_failover(6)(%s): "
6796		    "activating path 0x%p(psd:%p)\n", guid, (void *)npip,
6797		    (void *)svp->svp_psd));
6798		if ((*sfo->sfo_path_activate)(svp->svp_psd, pclass2,
6799		    vlun->svl_fops_ctpriv) == 0) {
6800			activation_done = 1;
6801			mdi_rele_path(npip);
6802			VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_failover(7)(%s): "
6803			    "path 0x%p successfully activated\n", guid,
6804			    (void *)npip));
6805			break;
6806		}
6807		pip = npip;
6808		sps = mdi_select_path(cdip, NULL, MDI_SELECT_STANDBY_PATH,
6809		    pip, &npip);
6810		mdi_rele_path(pip);
6811	} while ((npip != NULL) && (sps == MDI_SUCCESS));
6812	if (activation_done == 0) {
6813		pclass1 = pclass2;
6814		goto next_pathclass;
6815	}
6816
6817	/*
6818	 * if we are here, we have succeeded in activating path npip of
6819	 * pathclass pclass2; let us validate all paths of pclass2 by
6820	 * "ping"-ing each one and mark the good ones ONLINE
6821	 * Also, set the state of the paths belonging to the previously
6822	 * active pathclass to STANDBY
6823	 */
6824	pip = npip = NULL;
6825	sps = mdi_select_path(cdip, NULL, (MDI_SELECT_ONLINE_PATH |
6826	    MDI_SELECT_STANDBY_PATH | MDI_SELECT_USER_DISABLE_PATH),
6827	    NULL, &npip);
6828	if (npip == NULL || sps != MDI_SUCCESS) {
6829		VHCI_DEBUG(1, (CE_NOTE, NULL, "!Failover operation failed for "
6830		    "device %s (GUID %s): paths may be busy\n",
6831		    ddi_node_name(cdip), guid));
6832		goto done;
6833	}
6834	do {
6835		(void) mdi_pi_get_state2(npip, &pi_state, &pi_ext_state);
6836		if (mdi_prop_lookup_string(npip, "path-class", &pclass)
6837		    != MDI_SUCCESS) {
6838			pip = npip;
6839			sps = mdi_select_path(cdip, NULL,
6840			    (MDI_SELECT_ONLINE_PATH |
6841			    MDI_SELECT_STANDBY_PATH |
6842			    MDI_SELECT_USER_DISABLE_PATH),
6843			    pip, &npip);
6844			mdi_rele_path(pip);
6845			continue;
6846		}
6847		if (strcmp(pclass, pclass2) == 0) {
6848			if (pi_state == MDI_PATHINFO_STATE_STANDBY) {
6849				svp = (scsi_vhci_priv_t *)
6850				    mdi_pi_get_vhci_private(npip);
6851				VHCI_DEBUG(1, (CE_NOTE, NULL,
6852				    "!vhci_failover(8)(%s): "
6853				    "pinging path 0x%p\n",
6854				    guid, (void *)npip));
6855				if ((*sfo->sfo_path_ping)(svp->svp_psd,
6856				    vlun->svl_fops_ctpriv) == 1) {
6857					mdi_pi_set_state(npip,
6858					    MDI_PATHINFO_STATE_ONLINE);
6859					VHCI_DEBUG(1, (CE_NOTE, NULL,
6860					    "!vhci_failover(9)(%s): "
6861					    "path 0x%p ping successful, "
6862					    "marked online\n", guid,
6863					    (void *)npip));
6864					MDI_PI_ERRSTAT(npip, MDI_PI_FAILTO);
6865				}
6866			}
6867		} else if ((s_pclass != NULL) && (strcmp(pclass, s_pclass)
6868		    == 0)) {
6869			if (pi_state == MDI_PATHINFO_STATE_ONLINE) {
6870				mdi_pi_set_state(npip,
6871				    MDI_PATHINFO_STATE_STANDBY);
6872				VHCI_DEBUG(1, (CE_NOTE, NULL,
6873				    "!vhci_failover(10)(%s): path 0x%p marked "
6874				    "STANDBY\n", guid, (void *)npip));
6875				MDI_PI_ERRSTAT(npip, MDI_PI_FAILFROM);
6876			}
6877		}
6878		(void) mdi_prop_free(pclass);
6879		pip = npip;
6880		sps = mdi_select_path(cdip, NULL, (MDI_SELECT_ONLINE_PATH |
6881		    MDI_SELECT_STANDBY_PATH|MDI_SELECT_USER_DISABLE_PATH),
6882		    pip, &npip);
6883		mdi_rele_path(pip);
6884	} while ((npip != NULL) && (sps == MDI_SUCCESS));
6885
6886	/*
6887	 * Update the AccessState of related MP-API TPGs
6888	 */
6889	(void) vhci_mpapi_update_tpg_acc_state_for_lu(vhci, vlun);
6890
6891	vhci_log(CE_NOTE, vdip, "!Failover operation completed successfully "
6892	    "for device %s (GUID %s): failed over from %s to %s",
6893	    ddi_node_name(cdip), guid, ((s_pclass == NULL) ? "<none>" :
6894	    s_pclass), pclass2);
6895	ptr1 = kmem_alloc(strlen(pclass2)+1, KM_SLEEP);
6896	(void) strlcpy(ptr1, pclass2, (strlen(pclass2)+1));
6897	mutex_enter(&vlun->svl_mutex);
6898	ptr2 = vlun->svl_active_pclass;
6899	vlun->svl_active_pclass = ptr1;
6900	mutex_exit(&vlun->svl_mutex);
6901	if (ptr2) {
6902		kmem_free(ptr2, strlen(ptr2)+1);
6903	}
6904	mutex_enter(&vhci->vhci_mutex);
6905	scsi_hba_reset_notify_callback(&vhci->vhci_mutex,
6906	    &vhci->vhci_reset_notify_listf);
6907	/* All reservations are cleared upon these resets. */
6908	vlun->svl_flags &= ~VLUN_RESERVE_ACTIVE_FLG;
6909	mutex_exit(&vhci->vhci_mutex);
6910	VHCI_DEBUG(1, (CE_NOTE, NULL, "!vhci_failover(11): DONE! Active "
6911	    "pathclass for %s is now %s\n", guid, pclass2));
6912	retval = MDI_SUCCESS;
6913
6914done:
6915	if (flags == MDI_FAILOVER_ASYNC) {
6916		VHCI_RELEASE_LUN(vlun);
6917		VHCI_DEBUG(6, (CE_NOTE, NULL, "!vhci_failover(12): DONE! "
6918		    "releasing lun, as failover was ASYNC\n"));
6919	} else {
6920		VHCI_DEBUG(6, (CE_NOTE, NULL, "!vhci_failover(12): DONE! "
6921		    "NOT releasing lun, as failover was SYNC\n"));
6922	}
6923	return (retval);
6924}
6925
6926/*
6927 * vhci_client_attached is called after the successful attach of a
6928 * client devinfo node.
6929 */
6930static void
6931vhci_client_attached(dev_info_t *cdip)
6932{
6933	mdi_pathinfo_t	*pip;
6934	int		circular;
6935
6936	/*
6937	 * At this point the client has attached and it's instance number is
6938	 * valid, so we can set up kstats.  We need to do this here because it
6939	 * is possible for paths to go online prior to client attach, in which
6940	 * case the call to vhci_kstat_create_pathinfo in vhci_pathinfo_online
6941	 * was a noop.
6942	 */
6943	ndi_devi_enter(cdip, &circular);
6944	for (pip = mdi_get_next_phci_path(cdip, NULL); pip;
6945	    pip = mdi_get_next_phci_path(cdip, pip))
6946		vhci_kstat_create_pathinfo(pip);
6947	ndi_devi_exit(cdip, circular);
6948}
6949
6950/*
6951 * quiesce all of the online paths
6952 */
6953static int
6954vhci_quiesce_paths(dev_info_t *vdip, dev_info_t *cdip, scsi_vhci_lun_t *vlun,
6955	char *guid, char *active_pclass_ptr)
6956{
6957	scsi_vhci_priv_t	*svp;
6958	char			*s_pclass = NULL;
6959	mdi_pathinfo_t		*npip, *pip;
6960	int			sps;
6961
6962	/* quiesce currently active paths */
6963	s_pclass = NULL;
6964	pip = npip = NULL;
6965	sps = mdi_select_path(cdip, NULL, MDI_SELECT_ONLINE_PATH, NULL, &npip);
6966	if ((npip == NULL) || (sps != MDI_SUCCESS)) {
6967		return (1);
6968	}
6969	do {
6970		if (mdi_prop_lookup_string(npip, "path-class",
6971		    &s_pclass) != MDI_SUCCESS) {
6972			mdi_rele_path(npip);
6973			vhci_log(CE_NOTE, vdip, "!Failover operation failed "
6974			    "for device %s (GUID %s) due to an internal "
6975			    "error", ddi_node_name(cdip), guid);
6976			return (1);
6977		}
6978		if (strcmp(s_pclass, active_pclass_ptr) == 0) {
6979			/*
6980			 * quiesce path. Free s_pclass since
6981			 * we don't need it anymore
6982			 */
6983			VHCI_DEBUG(1, (CE_NOTE, NULL,
6984			    "!vhci_failover(2)(%s): failing over "
6985			    "from %s; quiescing path %p\n",
6986			    guid, s_pclass, (void *)npip));
6987			(void) mdi_prop_free(s_pclass);
6988			svp = (scsi_vhci_priv_t *)
6989			    mdi_pi_get_vhci_private(npip);
6990			if (svp == NULL) {
6991				VHCI_DEBUG(1, (CE_NOTE, NULL,
6992				    "!vhci_failover(2.5)(%s): no "
6993				    "client priv! %p offlined?\n",
6994				    guid, (void *)npip));
6995				pip = npip;
6996				sps = mdi_select_path(cdip, NULL,
6997				    MDI_SELECT_ONLINE_PATH, pip, &npip);
6998				mdi_rele_path(pip);
6999				continue;
7000			}
7001			if (scsi_abort(&svp->svp_psd->sd_address, NULL)
7002			    == 0) {
7003				(void) vhci_recovery_reset(vlun,
7004				    &svp->svp_psd->sd_address, FALSE,
7005				    VHCI_DEPTH_TARGET);
7006			}
7007			mutex_enter(&svp->svp_mutex);
7008			if (svp->svp_cmds == 0) {
7009				VHCI_DEBUG(1, (CE_NOTE, NULL,
7010				    "!vhci_failover(3)(%s):"
7011				    "quiesced path %p\n", guid, (void *)npip));
7012			} else {
7013				while (svp->svp_cmds != 0) {
7014					cv_wait(&svp->svp_cv, &svp->svp_mutex);
7015					VHCI_DEBUG(1, (CE_NOTE, NULL,
7016					    "!vhci_failover(3.cv)(%s):"
7017					    "quiesced path %p\n", guid,
7018					    (void *)npip));
7019				}
7020			}
7021			mutex_exit(&svp->svp_mutex);
7022		} else {
7023			/*
7024			 * make sure we freeup the memory
7025			 */
7026			(void) mdi_prop_free(s_pclass);
7027		}
7028		pip = npip;
7029		sps = mdi_select_path(cdip, NULL, MDI_SELECT_ONLINE_PATH,
7030		    pip, &npip);
7031		mdi_rele_path(pip);
7032	} while ((npip != NULL) && (sps == MDI_SUCCESS));
7033	return (0);
7034}
7035
7036static struct scsi_vhci_lun *
7037vhci_lun_lookup(dev_info_t *tgt_dip)
7038{
7039	return ((struct scsi_vhci_lun *)
7040	    mdi_client_get_vhci_private(tgt_dip));
7041}
7042
7043static struct scsi_vhci_lun *
7044vhci_lun_lookup_alloc(dev_info_t *tgt_dip, char *guid, int *didalloc)
7045{
7046	struct scsi_vhci_lun *svl;
7047
7048	if (svl = vhci_lun_lookup(tgt_dip)) {
7049		return (svl);
7050	}
7051
7052	svl = kmem_zalloc(sizeof (*svl), KM_SLEEP);
7053	svl->svl_lun_wwn = kmem_zalloc(strlen(guid)+1, KM_SLEEP);
7054	(void) strcpy(svl->svl_lun_wwn,  guid);
7055	mutex_init(&svl->svl_mutex, NULL, MUTEX_DRIVER, NULL);
7056	cv_init(&svl->svl_cv, NULL, CV_DRIVER, NULL);
7057	sema_init(&svl->svl_pgr_sema, 1, NULL, SEMA_DRIVER, NULL);
7058	svl->svl_waiting_for_activepath = 1;
7059	svl->svl_sector_size = 1;
7060	mdi_client_set_vhci_private(tgt_dip, svl);
7061	*didalloc = 1;
7062	VHCI_DEBUG(1, (CE_NOTE, NULL,
7063	    "vhci_lun_lookup_alloc: guid %s vlun 0x%p\n",
7064	    guid, (void *)svl));
7065	return (svl);
7066}
7067
7068static void
7069vhci_lun_free(dev_info_t *tgt_dip)
7070{
7071	struct scsi_vhci_lun *dvlp;
7072	char *guid;
7073
7074	dvlp = (struct scsi_vhci_lun *)
7075	    mdi_client_get_vhci_private(tgt_dip);
7076	ASSERT(dvlp != NULL);
7077
7078	mdi_client_set_vhci_private(tgt_dip, NULL);
7079
7080	guid = dvlp->svl_lun_wwn;
7081	ASSERT(guid != NULL);
7082	VHCI_DEBUG(4, (CE_NOTE, NULL, "!vhci_lun_free: %s\n", guid));
7083
7084	mutex_enter(&dvlp->svl_mutex);
7085	if (dvlp->svl_active_pclass != NULL) {
7086		kmem_free(dvlp->svl_active_pclass,
7087		    strlen(dvlp->svl_active_pclass)+1);
7088	}
7089	dvlp->svl_active_pclass = NULL;
7090	mutex_exit(&dvlp->svl_mutex);
7091
7092	if (dvlp->svl_lun_wwn != NULL) {
7093		kmem_free(dvlp->svl_lun_wwn, strlen(dvlp->svl_lun_wwn)+1);
7094	}
7095	dvlp->svl_lun_wwn = NULL;
7096
7097	if (dvlp->svl_fops_name) {
7098		kmem_free(dvlp->svl_fops_name, strlen(dvlp->svl_fops_name)+1);
7099	}
7100	dvlp->svl_fops_name = NULL;
7101
7102	if (dvlp->svl_flags & VLUN_TASK_D_ALIVE_FLG)
7103		taskq_destroy(dvlp->svl_taskq);
7104
7105	mutex_destroy(&dvlp->svl_mutex);
7106	cv_destroy(&dvlp->svl_cv);
7107	sema_destroy(&dvlp->svl_pgr_sema);
7108	kmem_free(dvlp, sizeof (*dvlp));
7109}
7110
7111
7112int
7113vhci_do_scsi_cmd(struct scsi_pkt *pkt)
7114{
7115	int	err = 0;
7116	int	retry_cnt = 0;
7117	struct scsi_extended_sense	*sns;
7118
7119retry:
7120	err = scsi_poll(pkt);
7121	if (err) {
7122		if (pkt->pkt_cdbp[0] == SCMD_RELEASE) {
7123			if (SCBP_C(pkt) == STATUS_RESERVATION_CONFLICT) {
7124				VHCI_DEBUG(1, (CE_NOTE, NULL,
7125				    "!v_s_do_s_c: RELEASE conflict\n"));
7126				return (0);
7127			}
7128		}
7129		if (retry_cnt++ < 3) {
7130			VHCI_DEBUG(1, (CE_WARN, NULL,
7131			    "!v_s_do_s_c:retry packet 0x%p "
7132			    "status 0x%x reason %s",
7133			    (void *)pkt, SCBP_C(pkt),
7134			    scsi_rname(pkt->pkt_reason)));
7135			if ((pkt->pkt_reason == CMD_CMPLT) &&
7136			    (SCBP_C(pkt) == STATUS_CHECK) &&
7137			    (pkt->pkt_state & STATE_ARQ_DONE)) {
7138				sns = &(((struct scsi_arq_status *)(uintptr_t)
7139				    (pkt->pkt_scbp))->sts_sensedata);
7140				VHCI_DEBUG(1, (CE_WARN, NULL,
7141				    "!v_s_do_s_c:retry "
7142				    "packet 0x%p  sense data %s", (void *)pkt,
7143				    scsi_sname(sns->es_key)));
7144			}
7145			goto retry;
7146		}
7147		VHCI_DEBUG(1, (CE_WARN, NULL,
7148		    "!v_s_do_s_c: failed transport 0x%p 0x%x",
7149		    (void *)pkt, SCBP_C(pkt)));
7150		return (0);
7151	}
7152
7153	switch (pkt->pkt_reason) {
7154		case CMD_TIMEOUT:
7155			VHCI_DEBUG(1, (CE_WARN, NULL, "!pkt timed "
7156			    "out (pkt 0x%p)", (void *)pkt));
7157			return (0);
7158		case CMD_CMPLT:
7159			switch (SCBP_C(pkt)) {
7160				case STATUS_GOOD:
7161					break;
7162				case STATUS_CHECK:
7163					if (pkt->pkt_state & STATE_ARQ_DONE) {
7164						sns = &(((
7165						    struct scsi_arq_status *)
7166						    (uintptr_t)
7167						    (pkt->pkt_scbp))->
7168						    sts_sensedata);
7169						if ((sns->es_key ==
7170						    KEY_UNIT_ATTENTION) ||
7171						    (sns->es_key ==
7172						    KEY_NOT_READY)) {
7173							/*
7174							 * clear unit attn.
7175							 */
7176
7177							VHCI_DEBUG(1,
7178							    (CE_WARN, NULL,
7179							    "!v_s_do_s_c: "
7180							    "retry "
7181							    "packet 0x%p sense "
7182							    "data %s",
7183							    (void *)pkt,
7184							    scsi_sname
7185							    (sns->es_key)));
7186							goto retry;
7187						}
7188						VHCI_DEBUG(4, (CE_WARN, NULL,
7189						    "!ARQ while "
7190						    "transporting "
7191						    "(pkt 0x%p)",
7192						    (void *)pkt));
7193						return (0);
7194					}
7195					return (0);
7196				default:
7197					VHCI_DEBUG(1, (CE_WARN, NULL,
7198					    "!Bad status returned "
7199					    "(pkt 0x%p, status %x)",
7200					    (void *)pkt, SCBP_C(pkt)));
7201					return (0);
7202			}
7203			break;
7204		case CMD_INCOMPLETE:
7205		case CMD_RESET:
7206		case CMD_ABORTED:
7207		case CMD_TRAN_ERR:
7208			if (retry_cnt++ < 1) {
7209				VHCI_DEBUG(1, (CE_WARN, NULL,
7210				    "!v_s_do_s_c: retry packet 0x%p %s",
7211				    (void *)pkt, scsi_rname(pkt->pkt_reason)));
7212				goto retry;
7213			}
7214			/* FALLTHROUGH */
7215		default:
7216			VHCI_DEBUG(1, (CE_WARN, NULL, "!pkt did not "
7217			    "complete successfully (pkt 0x%p,"
7218			    "reason %x)", (void *)pkt, pkt->pkt_reason));
7219			return (0);
7220	}
7221	return (1);
7222}
7223
7224static int
7225vhci_quiesce_lun(struct scsi_vhci_lun *vlun)
7226{
7227	mdi_pathinfo_t		*pip, *spip;
7228	dev_info_t		*cdip;
7229	struct scsi_vhci_priv	*svp;
7230	mdi_pathinfo_state_t	pstate;
7231	uint32_t		p_ext_state;
7232	int			circular;
7233
7234	cdip = vlun->svl_dip;
7235	pip = spip = NULL;
7236	ndi_devi_enter(cdip, &circular);
7237	pip = mdi_get_next_phci_path(cdip, NULL);
7238	while (pip != NULL) {
7239		(void) mdi_pi_get_state2(pip, &pstate, &p_ext_state);
7240		if (pstate != MDI_PATHINFO_STATE_ONLINE) {
7241			spip = pip;
7242			pip = mdi_get_next_phci_path(cdip, spip);
7243			continue;
7244		}
7245		mdi_hold_path(pip);
7246		ndi_devi_exit(cdip, circular);
7247		svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(pip);
7248		mutex_enter(&svp->svp_mutex);
7249		while (svp->svp_cmds != 0) {
7250			if (cv_timedwait(&svp->svp_cv, &svp->svp_mutex,
7251			    ddi_get_lbolt() + drv_usectohz
7252			    (vhci_path_quiesce_timeout * 1000000)) == -1) {
7253				mutex_exit(&svp->svp_mutex);
7254				mdi_rele_path(pip);
7255				VHCI_DEBUG(1, (CE_WARN, NULL,
7256				    "Quiesce of lun is not successful "
7257				    "vlun: 0x%p.", (void *)vlun));
7258				return (0);
7259			}
7260		}
7261		mutex_exit(&svp->svp_mutex);
7262		ndi_devi_enter(cdip, &circular);
7263		spip = pip;
7264		pip = mdi_get_next_phci_path(cdip, spip);
7265		mdi_rele_path(spip);
7266	}
7267	ndi_devi_exit(cdip, circular);
7268	return (1);
7269}
7270
7271static int
7272vhci_pgr_validate_and_register(scsi_vhci_priv_t *svp)
7273{
7274	scsi_vhci_lun_t		*vlun;
7275	vhci_prout_t		*prout;
7276	int			rval, success;
7277	mdi_pathinfo_t		*pip, *npip;
7278	scsi_vhci_priv_t	*osvp;
7279	dev_info_t		*cdip;
7280	uchar_t			cdb_1;
7281	uchar_t			temp_res_key[MHIOC_RESV_KEY_SIZE];
7282
7283
7284	/*
7285	 * see if there are any other paths available; if none,
7286	 * then there is nothing to do.
7287	 */
7288	cdip = svp->svp_svl->svl_dip;
7289	rval = mdi_select_path(cdip, NULL, MDI_SELECT_ONLINE_PATH |
7290	    MDI_SELECT_STANDBY_PATH, NULL, &pip);
7291	if ((rval != MDI_SUCCESS) || (pip == NULL)) {
7292		VHCI_DEBUG(4, (CE_NOTE, NULL,
7293		    "%s%d: vhci_pgr_validate_and_register: first path\n",
7294		    ddi_driver_name(cdip), ddi_get_instance(cdip)));
7295		return (1);
7296	}
7297
7298	vlun = svp->svp_svl;
7299	prout = &vlun->svl_prout;
7300	ASSERT(vlun->svl_pgr_active != 0);
7301
7302	/*
7303	 * When the path was busy/offlined, some other host might have
7304	 * cleared this key. Validate key on some other path first.
7305	 * If it fails, return failure.
7306	 */
7307
7308	npip = pip;
7309	pip = NULL;
7310	success = 0;
7311
7312	/* Save the res key */
7313	bcopy((const void *)prout->res_key,
7314	    (void *)temp_res_key, MHIOC_RESV_KEY_SIZE);
7315
7316	/*
7317	 * Sometimes CDB from application can be a Register_And_Ignore.
7318	 * Instead of validation, this cdb would result in force registration.
7319	 * Convert it to normal cdb for validation.
7320	 * After that be sure to restore the cdb.
7321	 */
7322	cdb_1 = vlun->svl_cdb[1];
7323	vlun->svl_cdb[1] &= 0xe0;
7324
7325	do {
7326		osvp = (scsi_vhci_priv_t *)
7327		    mdi_pi_get_vhci_private(npip);
7328		if (osvp == NULL) {
7329			VHCI_DEBUG(4, (CE_NOTE, NULL,
7330			    "vhci_pgr_validate_and_register: no "
7331			    "client priv! 0x%p offlined?\n",
7332			    (void *)npip));
7333			goto next_path_1;
7334		}
7335
7336		if (osvp == svp) {
7337			VHCI_DEBUG(4, (CE_NOTE, NULL,
7338			    "vhci_pgr_validate_and_register: same svp 0x%p"
7339			    " npip 0x%p vlun 0x%p\n",
7340			    (void *)svp, (void *)npip, (void *)vlun));
7341			goto next_path_1;
7342		}
7343
7344		VHCI_DEBUG(4, (CE_NOTE, NULL,
7345		    "vhci_pgr_validate_and_register: First validate on"
7346		    " osvp 0x%p being done. vlun 0x%p thread 0x%p Before bcopy"
7347		    " cdb1 %x\n", (void *)osvp, (void *)vlun,
7348		    (void *)curthread, vlun->svl_cdb[1]));
7349		vhci_print_prout_keys(vlun, "v_pgr_val_reg: before bcopy:");
7350
7351		bcopy((const void *)prout->service_key,
7352		    (void *)prout->res_key, MHIOC_RESV_KEY_SIZE);
7353
7354		VHCI_DEBUG(4, (CE_WARN, NULL, "vlun 0x%p After bcopy",
7355		    (void *)vlun));
7356		vhci_print_prout_keys(vlun, "v_pgr_val_reg: after bcopy: ");
7357
7358		rval = vhci_do_prout(osvp);
7359		if (rval == 1) {
7360			VHCI_DEBUG(4, (CE_NOTE, NULL,
7361			    "%s%d: vhci_pgr_validate_and_register: key"
7362			    " validated thread 0x%p\n", ddi_driver_name(cdip),
7363			    ddi_get_instance(cdip), (void *)curthread));
7364			pip = npip;
7365			success = 1;
7366			break;
7367		} else {
7368			VHCI_DEBUG(4, (CE_NOTE, NULL,
7369			    "vhci_pgr_validate_and_register: First validation"
7370			    " on osvp 0x%p failed %x\n", (void *)osvp, rval));
7371			vhci_print_prout_keys(vlun, "v_pgr_val_reg: failed:");
7372		}
7373
7374		/*
7375		 * Try other paths
7376		 */
7377next_path_1:
7378		pip = npip;
7379		rval = mdi_select_path(cdip, NULL,
7380		    MDI_SELECT_ONLINE_PATH|MDI_SELECT_STANDBY_PATH,
7381		    pip, &npip);
7382		mdi_rele_path(pip);
7383	} while ((rval == MDI_SUCCESS) && (npip != NULL));
7384
7385
7386	/* Be sure to restore original cdb */
7387	vlun->svl_cdb[1] = cdb_1;
7388
7389	/* Restore the res_key */
7390	bcopy((const void *)temp_res_key,
7391	    (void *)prout->res_key, MHIOC_RESV_KEY_SIZE);
7392
7393	/*
7394	 * If key could not be registered on any path for the first time,
7395	 * return success as online should still continue.
7396	 */
7397	if (success == 0) {
7398		return (1);
7399	}
7400
7401	ASSERT(pip != NULL);
7402
7403	/*
7404	 * Force register on new path
7405	 */
7406	cdb_1 = vlun->svl_cdb[1];		/* store the cdb */
7407
7408	vlun->svl_cdb[1] &= 0xe0;
7409	vlun->svl_cdb[1] |= VHCI_PROUT_R_AND_IGNORE;
7410
7411	vhci_print_prout_keys(vlun, "v_pgr_val_reg: keys before bcopy: ");
7412
7413	bcopy((const void *)prout->active_service_key,
7414	    (void *)prout->service_key, MHIOC_RESV_KEY_SIZE);
7415	bcopy((const void *)prout->active_res_key,
7416	    (void *)prout->res_key, MHIOC_RESV_KEY_SIZE);
7417
7418	vhci_print_prout_keys(vlun, "v_pgr_val_reg:keys after bcopy: ");
7419
7420	rval = vhci_do_prout(svp);
7421	vlun->svl_cdb[1] = cdb_1;		/* restore the cdb */
7422	if (rval != 1) {
7423		VHCI_DEBUG(4, (CE_NOTE, NULL,
7424		    "vhci_pgr_validate_and_register: register on new"
7425		    " path 0x%p svp 0x%p failed %x\n",
7426		    (void *)pip, (void *)svp, rval));
7427		vhci_print_prout_keys(vlun, "v_pgr_val_reg: reg failed: ");
7428		mdi_rele_path(pip);
7429		return (0);
7430	}
7431
7432	if (bcmp(prout->service_key, zero_key, MHIOC_RESV_KEY_SIZE) == 0) {
7433		VHCI_DEBUG(4, (CE_NOTE, NULL,
7434		    "vhci_pgr_validate_and_register: zero service key\n"));
7435		mdi_rele_path(pip);
7436		return (rval);
7437	}
7438
7439	/*
7440	 * While the key was force registered, some other host might have
7441	 * cleared the key. Re-validate key on another pre-existing path
7442	 * before declaring success.
7443	 */
7444	npip = pip;
7445	pip = NULL;
7446
7447	/*
7448	 * Sometimes CDB from application can be Register and Ignore.
7449	 * Instead of validation, it would result in force registration.
7450	 * Convert it to normal cdb for validation.
7451	 * After that be sure to restore the cdb.
7452	 */
7453	cdb_1 = vlun->svl_cdb[1];
7454	vlun->svl_cdb[1] &= 0xe0;
7455	success = 0;
7456
7457	do {
7458		osvp = (scsi_vhci_priv_t *)
7459		    mdi_pi_get_vhci_private(npip);
7460		if (osvp == NULL) {
7461			VHCI_DEBUG(4, (CE_NOTE, NULL,
7462			    "vhci_pgr_validate_and_register: no "
7463			    "client priv! 0x%p offlined?\n",
7464			    (void *)npip));
7465			goto next_path_2;
7466		}
7467
7468		if (osvp == svp) {
7469			VHCI_DEBUG(4, (CE_NOTE, NULL,
7470			    "vhci_pgr_validate_and_register: same osvp 0x%p"
7471			    " npip 0x%p vlun 0x%p\n",
7472			    (void *)svp, (void *)npip, (void *)vlun));
7473			goto next_path_2;
7474		}
7475
7476		VHCI_DEBUG(4, (CE_NOTE, NULL,
7477		    "vhci_pgr_validate_and_register: Re-validation on"
7478		    " osvp 0x%p being done. vlun 0x%p Before bcopy cdb1 %x\n",
7479		    (void *)osvp, (void *)vlun, vlun->svl_cdb[1]));
7480		vhci_print_prout_keys(vlun, "v_pgr_val_reg: before bcopy: ");
7481
7482		bcopy((const void *)prout->service_key,
7483		    (void *)prout->res_key, MHIOC_RESV_KEY_SIZE);
7484
7485		vhci_print_prout_keys(vlun, "v_pgr_val_reg: after bcopy: ");
7486
7487		rval = vhci_do_prout(osvp);
7488		if (rval == 1) {
7489			VHCI_DEBUG(4, (CE_NOTE, NULL,
7490			    "%s%d: vhci_pgr_validate_and_register: key"
7491			    " validated thread 0x%p\n", ddi_driver_name(cdip),
7492			    ddi_get_instance(cdip), (void *)curthread));
7493			pip = npip;
7494			success = 1;
7495			break;
7496		} else {
7497			VHCI_DEBUG(4, (CE_NOTE, NULL,
7498			    "vhci_pgr_validate_and_register: Re-validation on"
7499			    " osvp 0x%p failed %x\n", (void *)osvp, rval));
7500			vhci_print_prout_keys(vlun,
7501			    "v_pgr_val_reg: reval failed: ");
7502		}
7503
7504		/*
7505		 * Try other paths
7506		 */
7507next_path_2:
7508		pip = npip;
7509		rval = mdi_select_path(cdip, NULL,
7510		    MDI_SELECT_ONLINE_PATH|MDI_SELECT_STANDBY_PATH,
7511		    pip, &npip);
7512		mdi_rele_path(pip);
7513	} while ((rval == MDI_SUCCESS) && (npip != NULL));
7514
7515	/* Be sure to restore original cdb */
7516	vlun->svl_cdb[1] = cdb_1;
7517
7518	if (success == 1) {
7519		/* Successfully validated registration */
7520		mdi_rele_path(pip);
7521		return (1);
7522	}
7523
7524	VHCI_DEBUG(4, (CE_WARN, NULL, "key validation failed"));
7525
7526	/*
7527	 * key invalid, back out by registering key value of 0
7528	 */
7529	VHCI_DEBUG(4, (CE_NOTE, NULL,
7530	    "vhci_pgr_validate_and_register: backout on"
7531	    " svp 0x%p being done\n", (void *)svp));
7532	vhci_print_prout_keys(vlun, "v_pgr_val_reg: before bcopy: ");
7533
7534	bcopy((const void *)prout->service_key, (void *)prout->res_key,
7535	    MHIOC_RESV_KEY_SIZE);
7536	bzero((void *)prout->service_key, MHIOC_RESV_KEY_SIZE);
7537
7538	vhci_print_prout_keys(vlun, "v_pgr_val_reg: before bcopy: ");
7539
7540	/*
7541	 * Get a new path
7542	 */
7543	rval = mdi_select_path(cdip, NULL, MDI_SELECT_ONLINE_PATH |
7544	    MDI_SELECT_STANDBY_PATH, NULL, &pip);
7545	if ((rval != MDI_SUCCESS) || (pip == NULL)) {
7546		VHCI_DEBUG(4, (CE_NOTE, NULL,
7547		    "%s%d: vhci_pgr_validate_and_register: no valid pip\n",
7548		    ddi_driver_name(cdip), ddi_get_instance(cdip)));
7549		return (0);
7550	}
7551
7552	if ((rval = vhci_do_prout(svp)) != 1) {
7553		VHCI_DEBUG(4, (CE_NOTE, NULL,
7554		    "vhci_pgr_validate_and_register: backout on"
7555		    " svp 0x%p failed\n", (void *)svp));
7556		vhci_print_prout_keys(vlun, "backout failed");
7557
7558		VHCI_DEBUG(4, (CE_WARN, NULL,
7559		    "%s%d: vhci_pgr_validate_and_register: key"
7560		    " validation and backout failed", ddi_driver_name(cdip),
7561		    ddi_get_instance(cdip)));
7562		if (rval == VHCI_PGR_ILLEGALOP) {
7563			VHCI_DEBUG(4, (CE_WARN, NULL,
7564			    "%s%d: vhci_pgr_validate_and_register: key"
7565			    " already cleared", ddi_driver_name(cdip),
7566			    ddi_get_instance(cdip)));
7567			rval = 1;
7568		} else
7569			rval = 0;
7570	} else {
7571		VHCI_DEBUG(4, (CE_NOTE, NULL,
7572		    "%s%d: vhci_pgr_validate_and_register: key"
7573		    " validation failed, key backed out\n",
7574		    ddi_driver_name(cdip), ddi_get_instance(cdip)));
7575		vhci_print_prout_keys(vlun, "v_pgr_val_reg: key backed out: ");
7576	}
7577	mdi_rele_path(pip);
7578
7579	return (rval);
7580}
7581
7582/*
7583 * taskq routine to dispatch a scsi cmd to vhci_scsi_start.  This ensures
7584 * that vhci_scsi_start is not called in interrupt context.
7585 * As the upper layer gets TRAN_ACCEPT when the command is dispatched, we
7586 * need to complete the command if something goes wrong.
7587 */
7588static void
7589vhci_dispatch_scsi_start(void *arg)
7590{
7591	struct vhci_pkt *vpkt = (struct vhci_pkt *)arg;
7592	struct scsi_pkt *tpkt = vpkt->vpkt_tgt_pkt;
7593	int			rval = TRAN_BUSY;
7594
7595	VHCI_DEBUG(6, (CE_NOTE, NULL, "!vhci_dispatch_scsi_start: sending"
7596	    " scsi-2 reserve for 0x%p\n",
7597	    (void *)ADDR2DIP(&(vpkt->vpkt_tgt_pkt->pkt_address))));
7598
7599	/*
7600	 * To prevent the taskq from being called recursively we set the
7601	 * the VHCI_PKT_THRU_TASKQ bit in the vhci_pkt_states.
7602	 */
7603	vpkt->vpkt_state |= VHCI_PKT_THRU_TASKQ;
7604
7605	/*
7606	 * Wait for the transport to get ready to send packets
7607	 * and if it times out, it will return something other than
7608	 * TRAN_BUSY. The vhci_reserve_delay may want to
7609	 * get tuned for other transports and is therefore a global.
7610	 * Using delay since this routine is called by taskq dispatch
7611	 * and not called during interrupt context.
7612	 */
7613	while ((rval = vhci_scsi_start(&(vpkt->vpkt_tgt_pkt->pkt_address),
7614	    vpkt->vpkt_tgt_pkt)) == TRAN_BUSY) {
7615		delay(drv_usectohz(vhci_reserve_delay));
7616	}
7617
7618	switch (rval) {
7619	case TRAN_ACCEPT:
7620		return;
7621
7622	default:
7623		/*
7624		 * This pkt shall be retried, and to ensure another taskq
7625		 * is dispatched for it, clear the VHCI_PKT_THRU_TASKQ
7626		 * flag.
7627		 */
7628		vpkt->vpkt_state &= ~VHCI_PKT_THRU_TASKQ;
7629
7630		/* Ensure that the pkt is retried without a reset */
7631		tpkt->pkt_reason = CMD_ABORTED;
7632		tpkt->pkt_statistics |= STAT_ABORTED;
7633		VHCI_DEBUG(1, (CE_WARN, NULL, "!vhci_dispatch_scsi_start: "
7634		    "TRAN_rval %d returned for dip 0x%p", rval,
7635		    (void *)ADDR2DIP(&(vpkt->vpkt_tgt_pkt->pkt_address))));
7636		break;
7637	}
7638
7639	/*
7640	 * vpkt_org_vpkt should always be NULL here if the retry command
7641	 * has been successfully dispatched.  If vpkt_org_vpkt != NULL at
7642	 * this point, it is an error so restore the original vpkt and
7643	 * return an error to the target driver so it can retry the
7644	 * command as appropriate.
7645	 */
7646	if (vpkt->vpkt_org_vpkt != NULL) {
7647		struct vhci_pkt		*new_vpkt = vpkt;
7648		scsi_vhci_priv_t	*svp = (scsi_vhci_priv_t *)
7649		    mdi_pi_get_vhci_private(vpkt->vpkt_path);
7650
7651		vpkt = vpkt->vpkt_org_vpkt;
7652
7653		vpkt->vpkt_tgt_pkt->pkt_reason = tpkt->pkt_reason;
7654		vpkt->vpkt_tgt_pkt->pkt_statistics = tpkt->pkt_statistics;
7655
7656		vhci_scsi_destroy_pkt(&svp->svp_psd->sd_address,
7657		    new_vpkt->vpkt_tgt_pkt);
7658
7659		tpkt = vpkt->vpkt_tgt_pkt;
7660	}
7661
7662	if (tpkt->pkt_comp) {
7663		(*tpkt->pkt_comp)(tpkt);
7664	}
7665}
7666
7667static void
7668vhci_initiate_auto_failback(void *arg)
7669{
7670	struct scsi_vhci_lun	*vlun = (struct scsi_vhci_lun *)arg;
7671	dev_info_t		*vdip, *cdip;
7672	int			held;
7673
7674	cdip = vlun->svl_dip;
7675	vdip = ddi_get_parent(cdip);
7676
7677	VHCI_HOLD_LUN(vlun, VH_SLEEP, held);
7678
7679	/*
7680	 * Perform a final check to see if the active path class is indeed
7681	 * not the prefered path class.  As in the time the auto failback
7682	 * was dispatched, an external failover could have been detected.
7683	 * [Some other host could have detected this condition and triggered
7684	 *  the auto failback before].
7685	 * In such a case if we go ahead with failover we will be negating the
7686	 * whole purpose of auto failback.
7687	 */
7688	mutex_enter(&vlun->svl_mutex);
7689	if (vlun->svl_active_pclass != NULL) {
7690		char				*best_pclass;
7691		struct scsi_failover_ops	*fo;
7692
7693		fo = vlun->svl_fops;
7694
7695		(*fo->sfo_pathclass_next)(NULL, &best_pclass,
7696		    vlun->svl_fops_ctpriv);
7697		if (strcmp(vlun->svl_active_pclass, best_pclass) == 0) {
7698			mutex_exit(&vlun->svl_mutex);
7699			VHCI_RELEASE_LUN(vlun);
7700			VHCI_DEBUG(1, (CE_NOTE, NULL, "Not initiating "
7701			    "auto failback for %s as %s pathclass already "
7702			    "active.\n", vlun->svl_lun_wwn, best_pclass));
7703			return;
7704		}
7705	}
7706	mutex_exit(&vlun->svl_mutex);
7707	if (mdi_failover(vdip, vlun->svl_dip, MDI_FAILOVER_SYNC)
7708	    == MDI_SUCCESS) {
7709		vhci_log(CE_NOTE, vdip, "!Auto failback operation "
7710		    "succeeded for device %s (GUID %s)",
7711		    ddi_node_name(cdip), vlun->svl_lun_wwn);
7712	} else {
7713		vhci_log(CE_NOTE, vdip, "!Auto failback operation "
7714		    "failed for device %s (GUID %s)",
7715		    ddi_node_name(cdip), vlun->svl_lun_wwn);
7716	}
7717	VHCI_RELEASE_LUN(vlun);
7718}
7719
7720#ifdef DEBUG
7721static void
7722vhci_print_prin_keys(vhci_prin_readkeys_t *prin, int numkeys)
7723{
7724	uchar_t index = 0;
7725	char buf[100];
7726
7727	VHCI_DEBUG(5, (CE_NOTE, NULL, "num keys %d\n", numkeys));
7728
7729	while (index < numkeys) {
7730		bcopy(&prin->keylist[index], buf, MHIOC_RESV_KEY_SIZE);
7731		VHCI_DEBUG(5, (CE_NOTE, NULL,
7732		    "%02x%02x%02x%02x%02x%02x%02x%02x\t",
7733		    buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6],
7734		    buf[7]));
7735		index++;
7736	}
7737}
7738#endif
7739
7740static void
7741vhci_print_prout_keys(scsi_vhci_lun_t *vlun, char *msg)
7742{
7743	int			i;
7744	vhci_prout_t		*prout;
7745	char			buf1[4*MHIOC_RESV_KEY_SIZE + 1];
7746	char			buf2[4*MHIOC_RESV_KEY_SIZE + 1];
7747	char			buf3[4*MHIOC_RESV_KEY_SIZE + 1];
7748	char			buf4[4*MHIOC_RESV_KEY_SIZE + 1];
7749
7750	prout = &vlun->svl_prout;
7751
7752	for (i = 0; i < MHIOC_RESV_KEY_SIZE; i++)
7753		(void) sprintf(&buf1[4*i], "[%02x]", prout->res_key[i]);
7754	for (i = 0; i < MHIOC_RESV_KEY_SIZE; i++)
7755		(void) sprintf(&buf2[(4*i)], "[%02x]", prout->service_key[i]);
7756	for (i = 0; i < MHIOC_RESV_KEY_SIZE; i++)
7757		(void) sprintf(&buf3[4*i], "[%02x]", prout->active_res_key[i]);
7758	for (i = 0; i < MHIOC_RESV_KEY_SIZE; i++)
7759		(void) sprintf(&buf4[4*i], "[%02x]",
7760		    prout->active_service_key[i]);
7761
7762	/* Printing all in one go. Otherwise it will jumble up */
7763	VHCI_DEBUG(5, (CE_CONT, NULL, "%s vlun 0x%p, thread 0x%p\n"
7764	    "res_key:          : %s\n"
7765	    "service_key       : %s\n"
7766	    "active_res_key    : %s\n"
7767	    "active_service_key: %s\n",
7768	    msg, (void *)vlun, (void *)curthread, buf1, buf2, buf3, buf4));
7769}
7770
7771/*
7772 * Called from vhci_scsi_start to update the pHCI pkt with target packet.
7773 */
7774static void
7775vhci_update_pHCI_pkt(struct vhci_pkt *vpkt, struct scsi_pkt *pkt)
7776{
7777
7778	ASSERT(vpkt->vpkt_hba_pkt);
7779
7780	vpkt->vpkt_hba_pkt->pkt_flags = pkt->pkt_flags;
7781	vpkt->vpkt_hba_pkt->pkt_flags |= FLAG_NOQUEUE;
7782
7783	if ((vpkt->vpkt_hba_pkt->pkt_flags & FLAG_NOINTR) ||
7784	    MDI_PI_IS_SUSPENDED(vpkt->vpkt_path)) {
7785		/*
7786		 * Polled Command is requested or HBA is in
7787		 * suspended state
7788		 */
7789		vpkt->vpkt_hba_pkt->pkt_flags |= FLAG_NOINTR;
7790		vpkt->vpkt_hba_pkt->pkt_comp = NULL;
7791	} else {
7792		vpkt->vpkt_hba_pkt->pkt_comp = vhci_intr;
7793	}
7794	vpkt->vpkt_hba_pkt->pkt_time = pkt->pkt_time;
7795	bcopy(pkt->pkt_cdbp, vpkt->vpkt_hba_pkt->pkt_cdbp,
7796	    vpkt->vpkt_tgt_init_cdblen);
7797	vpkt->vpkt_hba_pkt->pkt_resid = pkt->pkt_resid;
7798
7799	/* Re-initialize the following pHCI packet state information */
7800	vpkt->vpkt_hba_pkt->pkt_state = 0;
7801	vpkt->vpkt_hba_pkt->pkt_statistics = 0;
7802	vpkt->vpkt_hba_pkt->pkt_reason = 0;
7803}
7804
7805static int
7806vhci_scsi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op,
7807    void *arg, void *result)
7808{
7809	int ret = DDI_SUCCESS;
7810
7811	/*
7812	 * Generic processing in MPxIO framework
7813	 */
7814	ret = mdi_bus_power(parent, impl_arg, op, arg, result);
7815
7816	switch (ret) {
7817	case MDI_SUCCESS:
7818		ret = DDI_SUCCESS;
7819		break;
7820	case MDI_FAILURE:
7821		ret = DDI_FAILURE;
7822		break;
7823	default:
7824		break;
7825	}
7826
7827	return (ret);
7828}
7829
7830static int
7831vhci_pHCI_cap(struct scsi_address *ap, char *cap, int val, int whom,
7832    mdi_pathinfo_t *pip)
7833{
7834	dev_info_t		*cdip;
7835	mdi_pathinfo_t		*npip = NULL;
7836	scsi_vhci_priv_t	*svp = NULL;
7837	struct scsi_address	*pap = NULL;
7838	scsi_hba_tran_t		*hba = NULL;
7839	int			sps;
7840	int			mps_flag;
7841	int			rval = 0;
7842
7843	mps_flag = (MDI_SELECT_ONLINE_PATH | MDI_SELECT_STANDBY_PATH);
7844	if (pip) {
7845		/*
7846		 * If the call is from vhci_pathinfo_state_change,
7847		 * then this path was busy and is becoming ready to accept IO.
7848		 */
7849		ASSERT(ap != NULL);
7850		hba = ap->a_hba_tran;
7851		ASSERT(hba != NULL);
7852		rval = scsi_ifsetcap(ap, cap, val, whom);
7853
7854		VHCI_DEBUG(2, (CE_NOTE, NULL,
7855		    "!vhci_pHCI_cap: only on path %p, ap %p, rval %x\n",
7856		    (void *)pip, (void *)ap, rval));
7857
7858		return (rval);
7859	}
7860
7861	/*
7862	 * Set capability on all the pHCIs.
7863	 * If any path is busy, then the capability would be set by
7864	 * vhci_pathinfo_state_change.
7865	 */
7866
7867	cdip = ADDR2DIP(ap);
7868	ASSERT(cdip != NULL);
7869	sps = mdi_select_path(cdip, NULL, mps_flag, NULL, &pip);
7870	if ((sps != MDI_SUCCESS) || (pip == NULL)) {
7871		VHCI_DEBUG(2, (CE_WARN, NULL,
7872		    "!vhci_pHCI_cap: Unable to get a path, dip 0x%p",
7873		    (void *)cdip));
7874		return (0);
7875	}
7876
7877again:
7878	svp = (scsi_vhci_priv_t *)mdi_pi_get_vhci_private(pip);
7879	if (svp == NULL) {
7880		VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_pHCI_cap: "
7881		    "priv is NULL, pip 0x%p", (void *)pip));
7882		mdi_rele_path(pip);
7883		return (rval);
7884	}
7885
7886	if (svp->svp_psd == NULL) {
7887		VHCI_DEBUG(2, (CE_WARN, NULL, "!vhci_pHCI_cap: "
7888		    "psd is NULL, pip 0x%p, svp 0x%p",
7889		    (void *)pip, (void *)svp));
7890		mdi_rele_path(pip);
7891		return (rval);
7892	}
7893
7894	pap = &svp->svp_psd->sd_address;
7895	ASSERT(pap != NULL);
7896	hba = pap->a_hba_tran;
7897	ASSERT(hba != NULL);
7898
7899	if (hba->tran_setcap != NULL) {
7900		rval = scsi_ifsetcap(pap, cap, val, whom);
7901
7902		VHCI_DEBUG(2, (CE_NOTE, NULL,
7903		    "!vhci_pHCI_cap: path %p, ap %p, rval %x\n",
7904		    (void *)pip, (void *)ap, rval));
7905
7906		/*
7907		 * Select next path and issue the setcap, repeat
7908		 * until all paths are exhausted
7909		 */
7910		sps = mdi_select_path(cdip, NULL, mps_flag, pip, &npip);
7911		if ((sps != MDI_SUCCESS) || (npip == NULL)) {
7912			mdi_rele_path(pip);
7913			return (1);
7914		}
7915		mdi_rele_path(pip);
7916		pip = npip;
7917		goto again;
7918	}
7919	mdi_rele_path(pip);
7920	return (rval);
7921}
7922
7923static int
7924vhci_scsi_bus_config(dev_info_t *pdip, uint_t flags, ddi_bus_config_op_t op,
7925    void *arg, dev_info_t **child)
7926{
7927	char *guid;
7928
7929	if (op == BUS_CONFIG_ONE || op == BUS_UNCONFIG_ONE)
7930		guid = vhci_devnm_to_guid((char *)arg);
7931	else
7932		guid = NULL;
7933
7934	if (mdi_vhci_bus_config(pdip, flags, op, arg, child, guid)
7935	    == MDI_SUCCESS)
7936		return (NDI_SUCCESS);
7937	else
7938		return (NDI_FAILURE);
7939}
7940
7941/*
7942 * Take the original vhci_pkt, create a duplicate of the pkt for resending
7943 * as though it originated in ssd.
7944 */
7945static struct scsi_pkt *
7946vhci_create_retry_pkt(struct vhci_pkt *vpkt)
7947{
7948	struct vhci_pkt *new_vpkt = NULL;
7949	struct scsi_pkt	*pkt = NULL;
7950
7951	scsi_vhci_priv_t *svp = (scsi_vhci_priv_t *)
7952	    mdi_pi_get_vhci_private(vpkt->vpkt_path);
7953
7954	/*
7955	 * Ensure consistent data at completion time by setting PKT_CONSISTENT
7956	 */
7957	pkt = vhci_scsi_init_pkt(&svp->svp_psd->sd_address, pkt,
7958	    vpkt->vpkt_tgt_init_bp, vpkt->vpkt_tgt_init_cdblen,
7959	    vpkt->vpkt_tgt_init_scblen, vpkt->vpkt_tgt_init_privlen,
7960	    PKT_CONSISTENT,
7961	    NULL_FUNC, NULL);
7962	if (pkt != NULL) {
7963		new_vpkt = TGTPKT2VHCIPKT(pkt);
7964
7965		pkt->pkt_address = vpkt->vpkt_tgt_pkt->pkt_address;
7966		pkt->pkt_flags = vpkt->vpkt_tgt_pkt->pkt_flags;
7967		pkt->pkt_time = vpkt->vpkt_tgt_pkt->pkt_time;
7968		pkt->pkt_comp = vpkt->vpkt_tgt_pkt->pkt_comp;
7969
7970		pkt->pkt_resid = 0;
7971		pkt->pkt_statistics = 0;
7972		pkt->pkt_reason = 0;
7973
7974		bcopy(vpkt->vpkt_tgt_pkt->pkt_cdbp,
7975		    pkt->pkt_cdbp, vpkt->vpkt_tgt_init_cdblen);
7976
7977		/*
7978		 * Save a pointer to the original vhci_pkt
7979		 */
7980		new_vpkt->vpkt_org_vpkt = vpkt;
7981	}
7982
7983	return (pkt);
7984}
7985
7986/*
7987 * Copy the successful completion information from the hba packet into
7988 * the original target pkt from the upper layer.  Returns the original
7989 * vpkt and destroys the new vpkt from the internal retry.
7990 */
7991static struct vhci_pkt *
7992vhci_sync_retry_pkt(struct vhci_pkt *vpkt)
7993{
7994	struct vhci_pkt		*ret_vpkt = NULL;
7995	struct scsi_pkt		*tpkt = NULL;
7996	struct scsi_pkt		*hba_pkt = NULL;
7997	scsi_vhci_priv_t	*svp = (scsi_vhci_priv_t *)
7998	    mdi_pi_get_vhci_private(vpkt->vpkt_path);
7999
8000	ASSERT(vpkt->vpkt_org_vpkt != NULL);
8001	VHCI_DEBUG(0, (CE_NOTE, NULL, "vhci_sync_retry_pkt: Retry pkt "
8002	    "completed successfully!\n"));
8003
8004	ret_vpkt = vpkt->vpkt_org_vpkt;
8005	tpkt = ret_vpkt->vpkt_tgt_pkt;
8006	hba_pkt = vpkt->vpkt_hba_pkt;
8007
8008	/*
8009	 * Copy the good status into the target driver's packet
8010	 */
8011	*(tpkt->pkt_scbp) = *(hba_pkt->pkt_scbp);
8012	tpkt->pkt_resid = hba_pkt->pkt_resid;
8013	tpkt->pkt_state = hba_pkt->pkt_state;
8014	tpkt->pkt_statistics = hba_pkt->pkt_statistics;
8015	tpkt->pkt_reason = hba_pkt->pkt_reason;
8016
8017	/*
8018	 * Destroy the internally created vpkt for the retry
8019	 */
8020	vhci_scsi_destroy_pkt(&svp->svp_psd->sd_address,
8021	    vpkt->vpkt_tgt_pkt);
8022
8023	return (ret_vpkt);
8024}
8025
8026/* restart the request sense request */
8027static void
8028vhci_uscsi_restart_sense(void *arg)
8029{
8030	struct buf 	*rqbp;
8031	struct buf 	*bp;
8032	struct scsi_pkt *rqpkt = (struct scsi_pkt *)arg;
8033	mp_uscsi_cmd_t 	*mp_uscmdp;
8034
8035	VHCI_DEBUG(4, (CE_WARN, NULL,
8036	    "vhci_uscsi_restart_sense: enter: rqpkt: %p", (void *)rqpkt));
8037
8038	if (scsi_transport(rqpkt) != TRAN_ACCEPT) {
8039		/* if it fails - need to wakeup the original command */
8040		mp_uscmdp = rqpkt->pkt_private;
8041		bp = mp_uscmdp->cmdbp;
8042		rqbp = mp_uscmdp->rqbp;
8043		ASSERT(mp_uscmdp && bp && rqbp);
8044		scsi_free_consistent_buf(rqbp);
8045		scsi_destroy_pkt(rqpkt);
8046		bp->b_resid = bp->b_bcount;
8047		bioerror(bp, EIO);
8048		biodone(bp);
8049	}
8050}
8051
8052/*
8053 * auto-rqsense is not enabled so we have to retrieve the request sense
8054 * manually.
8055 */
8056static int
8057vhci_uscsi_send_sense(struct scsi_pkt *pkt, mp_uscsi_cmd_t *mp_uscmdp)
8058{
8059	struct buf 		*rqbp, *cmdbp;
8060	struct scsi_pkt 	*rqpkt;
8061	int			rval = 0;
8062
8063	cmdbp = mp_uscmdp->cmdbp;
8064	ASSERT(cmdbp != NULL);
8065
8066	VHCI_DEBUG(4, (CE_WARN, NULL,
8067	    "vhci_uscsi_send_sense: enter: bp: %p pkt: %p scmd: %p",
8068	    (void *)cmdbp, (void *)pkt, (void *)mp_uscmdp));
8069	/* set up the packet information and cdb */
8070	if ((rqbp = scsi_alloc_consistent_buf(mp_uscmdp->ap, NULL,
8071	    SENSE_LENGTH, B_READ, NULL, NULL)) == NULL) {
8072		return (-1);
8073	}
8074
8075	if ((rqpkt = scsi_init_pkt(mp_uscmdp->ap, NULL, rqbp,
8076	    CDB_GROUP0, 1, 0, PKT_CONSISTENT, NULL, NULL)) == NULL) {
8077		scsi_free_consistent_buf(rqbp);
8078		return (-1);
8079	}
8080
8081	(void) scsi_setup_cdb((union scsi_cdb *)(intptr_t)rqpkt->pkt_cdbp,
8082	    SCMD_REQUEST_SENSE, 0, SENSE_LENGTH, 0);
8083
8084	mp_uscmdp->rqbp = rqbp;
8085	rqbp->b_private = mp_uscmdp;
8086	rqpkt->pkt_flags |= FLAG_SENSING;
8087	rqpkt->pkt_time = 60;
8088	rqpkt->pkt_comp = vhci_uscsi_iodone;
8089	rqpkt->pkt_private = mp_uscmdp;
8090
8091	/* get her done */
8092	switch (scsi_transport(rqpkt)) {
8093	case TRAN_ACCEPT:
8094		VHCI_DEBUG(1, (CE_NOTE, NULL, "vhci_uscsi_send_sense: "
8095		    "transport accepted."));
8096		break;
8097	case TRAN_BUSY:
8098		VHCI_DEBUG(1, (CE_NOTE, NULL, "vhci_uscsi_send_sense: "
8099		    "transport busy, setting timeout."));
8100		vhci_restart_timeid = timeout(vhci_uscsi_restart_sense, rqpkt,
8101		    (drv_usectohz(5 * 1000000)));
8102		break;
8103	default:
8104		VHCI_DEBUG(1, (CE_NOTE, NULL, "vhci_uscsi_send_sense: "
8105		    "transport failed"));
8106		scsi_free_consistent_buf(rqbp);
8107		scsi_destroy_pkt(rqpkt);
8108		rval = -1;
8109	}
8110
8111	return (rval);
8112}
8113
8114/*
8115 * done routine for the mpapi uscsi command - this is behaving as though
8116 * FLAG_DIAGNOSE is set meaning there are no retries except for a manual
8117 * request sense.
8118 */
8119void
8120vhci_uscsi_iodone(struct scsi_pkt *pkt)
8121{
8122	struct buf 			*bp;
8123	mp_uscsi_cmd_t 			*mp_uscmdp;
8124	struct uscsi_cmd 		*uscmdp;
8125	struct scsi_arq_status 		*arqstat;
8126	int 				err;
8127
8128	mp_uscmdp = (mp_uscsi_cmd_t *)pkt->pkt_private;
8129	uscmdp = mp_uscmdp->uscmdp;
8130	bp = mp_uscmdp->cmdbp;
8131	ASSERT(bp != NULL);
8132	VHCI_DEBUG(4, (CE_WARN, NULL,
8133	    "vhci_uscsi_iodone: enter: bp: %p pkt: %p scmd: %p",
8134	    (void *)bp, (void *)pkt, (void *)mp_uscmdp));
8135	/* Save the status and the residual into the uscsi_cmd struct */
8136	uscmdp->uscsi_status = ((*(pkt)->pkt_scbp) & STATUS_MASK);
8137	uscmdp->uscsi_resid = bp->b_resid;
8138
8139	/* return on a very successful command */
8140	if (pkt->pkt_reason == CMD_CMPLT &&
8141	    SCBP_C(pkt) == 0 && ((pkt->pkt_flags & FLAG_SENSING) == 0) &&
8142	    pkt->pkt_resid == 0) {
8143		mdi_pi_kstat_iosupdate(mp_uscmdp->pip, bp);
8144		scsi_destroy_pkt(pkt);
8145		biodone(bp);
8146		return;
8147	}
8148	VHCI_DEBUG(4, (CE_NOTE, NULL, "iodone: reason=0x%x "
8149	    " pkt_resid=%ld pkt_state: 0x%x b_count: %ld b_resid: %ld",
8150	    pkt->pkt_reason, pkt->pkt_resid,
8151	    pkt->pkt_state, bp->b_bcount, bp->b_resid));
8152
8153	err = EIO;
8154
8155	arqstat = (struct scsi_arq_status *)(intptr_t)(pkt->pkt_scbp);
8156	if (pkt->pkt_reason != CMD_CMPLT) {
8157		/*
8158		 * The command did not complete.
8159		 */
8160		VHCI_DEBUG(4, (CE_NOTE, NULL,
8161		    "vhci_uscsi_iodone: command did not complete."
8162		    " reason: %x flag: %x", pkt->pkt_reason, pkt->pkt_flags));
8163		if (pkt->pkt_flags & FLAG_SENSING) {
8164			MDI_PI_ERRSTAT(mp_uscmdp->pip, MDI_PI_TRANSERR);
8165		} else if (pkt->pkt_reason == CMD_TIMEOUT) {
8166			MDI_PI_ERRSTAT(mp_uscmdp->pip, MDI_PI_HARDERR);
8167			err = ETIMEDOUT;
8168		}
8169	} else if (pkt->pkt_state & STATE_ARQ_DONE && mp_uscmdp->arq_enabled) {
8170		/*
8171		 * The auto-rqsense happened, and the packet has a filled-in
8172		 * scsi_arq_status structure, pointed to by pkt_scbp.
8173		 */
8174		VHCI_DEBUG(4, (CE_NOTE, NULL,
8175		    "vhci_uscsi_iodone: received auto-requested sense"));
8176		if (uscmdp->uscsi_flags & USCSI_RQENABLE) {
8177			/* get the amount of data to copy into rqbuf */
8178			int rqlen = SENSE_LENGTH - arqstat->sts_rqpkt_resid;
8179			rqlen = min(((int)uscmdp->uscsi_rqlen), rqlen);
8180			uscmdp->uscsi_rqresid = uscmdp->uscsi_rqlen - rqlen;
8181			uscmdp->uscsi_rqstatus =
8182			    *((char *)&arqstat->sts_rqpkt_status);
8183			if (uscmdp->uscsi_rqbuf && uscmdp->uscsi_rqlen &&
8184			    rqlen != 0) {
8185				bcopy(&(arqstat->sts_sensedata),
8186				    uscmdp->uscsi_rqbuf, rqlen);
8187			}
8188			mdi_pi_kstat_iosupdate(mp_uscmdp->pip, bp);
8189			VHCI_DEBUG(4, (CE_NOTE, NULL,
8190			    "vhci_uscsi_iodone: ARQ "
8191			    "uscsi_rqstatus=0x%x uscsi_rqresid=%d rqlen: %d "
8192			    "xfer: %d rqpkt_resid: %d\n",
8193			    uscmdp->uscsi_rqstatus, uscmdp->uscsi_rqresid,
8194			    uscmdp->uscsi_rqlen, rqlen,
8195			    arqstat->sts_rqpkt_resid));
8196		}
8197	} else if (pkt->pkt_flags & FLAG_SENSING) {
8198		struct buf *rqbp;
8199		struct scsi_status *rqstatus;
8200
8201		rqstatus = (struct scsi_status *)pkt->pkt_scbp;
8202		/* a manual request sense was done - get the information */
8203		if (uscmdp->uscsi_flags & USCSI_RQENABLE) {
8204			int rqlen = SENSE_LENGTH - pkt->pkt_resid;
8205
8206			rqbp = mp_uscmdp->rqbp;
8207			/* get the amount of data to copy into rqbuf */
8208			rqlen = min(((int)uscmdp->uscsi_rqlen), rqlen);
8209			uscmdp->uscsi_rqresid = uscmdp->uscsi_rqlen - rqlen;
8210			uscmdp->uscsi_rqstatus = *((char *)rqstatus);
8211			if (uscmdp->uscsi_rqlen && uscmdp->uscsi_rqbuf) {
8212				bcopy(rqbp->b_un.b_addr, uscmdp->uscsi_rqbuf,
8213				    rqlen);
8214			}
8215			MDI_PI_ERRSTAT(mp_uscmdp->pip, MDI_PI_TRANSERR);
8216			scsi_free_consistent_buf(rqbp);
8217		}
8218		VHCI_DEBUG(4, (CE_NOTE, NULL, "vhci_uscsi_iodone: FLAG_SENSING"
8219		    "uscsi_rqstatus=0x%x uscsi_rqresid=%d\n",
8220		    uscmdp->uscsi_rqstatus, uscmdp->uscsi_rqresid));
8221	} else {
8222		struct scsi_status *status =
8223		    (struct scsi_status *)pkt->pkt_scbp;
8224		/*
8225		 * Command completed and we're not getting sense. Check for
8226		 * errors and decide what to do next.
8227		 */
8228		VHCI_DEBUG(4, (CE_NOTE, NULL,
8229		    "vhci_uscsi_iodone: command appears complete: reason: %x",
8230		    pkt->pkt_reason));
8231		if (status->sts_chk) {
8232			/* need to manually get the request sense */
8233			if (vhci_uscsi_send_sense(pkt, mp_uscmdp) == 0) {
8234				scsi_destroy_pkt(pkt);
8235				return;
8236			}
8237		} else {
8238			VHCI_DEBUG(4, (CE_NOTE, NULL,
8239			    "vhci_chk_err: appears complete"));
8240			err = 0;
8241			mdi_pi_kstat_iosupdate(mp_uscmdp->pip, bp);
8242			if (pkt->pkt_resid) {
8243				bp->b_resid += pkt->pkt_resid;
8244			}
8245		}
8246	}
8247
8248	if (err) {
8249		if (bp->b_resid == 0)
8250			bp->b_resid = bp->b_bcount;
8251		bioerror(bp, err);
8252		bp->b_flags |= B_ERROR;
8253	}
8254
8255	scsi_destroy_pkt(pkt);
8256	biodone(bp);
8257
8258	VHCI_DEBUG(4, (CE_WARN, NULL, "vhci_uscsi_iodone: exit"));
8259}
8260
8261/*
8262 * start routine for the mpapi uscsi command
8263 */
8264int
8265vhci_uscsi_iostart(struct buf *bp)
8266{
8267	struct scsi_pkt 	*pkt;
8268	struct uscsi_cmd	*uscmdp;
8269	mp_uscsi_cmd_t 		*mp_uscmdp;
8270	int			stat_size, rval;
8271	int			retry = 0;
8272
8273	ASSERT(bp->b_private != NULL);
8274
8275	mp_uscmdp = (mp_uscsi_cmd_t *)bp->b_private;
8276	uscmdp = mp_uscmdp->uscmdp;
8277	if (uscmdp->uscsi_flags & USCSI_RQENABLE) {
8278		stat_size = SENSE_LENGTH;
8279	} else {
8280		stat_size = 1;
8281	}
8282
8283	pkt = scsi_init_pkt(mp_uscmdp->ap, NULL, bp, uscmdp->uscsi_cdblen,
8284	    stat_size, 0, 0, SLEEP_FUNC, NULL);
8285	if (pkt == NULL) {
8286		VHCI_DEBUG(4, (CE_NOTE, NULL,
8287		    "vhci_uscsi_iostart: rval: EINVAL"));
8288		bp->b_resid = bp->b_bcount;
8289		uscmdp->uscsi_resid = bp->b_bcount;
8290		bioerror(bp, EINVAL);
8291		biodone(bp);
8292		return (EINVAL);
8293	}
8294
8295	pkt->pkt_time = uscmdp->uscsi_timeout;
8296	bcopy(uscmdp->uscsi_cdb, pkt->pkt_cdbp, (size_t)uscmdp->uscsi_cdblen);
8297	pkt->pkt_comp = vhci_uscsi_iodone;
8298	pkt->pkt_private = mp_uscmdp;
8299	if (uscmdp->uscsi_flags & USCSI_SILENT)
8300		pkt->pkt_flags |= FLAG_SILENT;
8301	if (uscmdp->uscsi_flags & USCSI_ISOLATE)
8302		pkt->pkt_flags |= FLAG_ISOLATE;
8303	if (uscmdp->uscsi_flags & USCSI_DIAGNOSE)
8304		pkt->pkt_flags |= FLAG_DIAGNOSE;
8305	if (uscmdp->uscsi_flags & USCSI_RENEGOT) {
8306		pkt->pkt_flags |= FLAG_RENEGOTIATE_WIDE_SYNC;
8307	}
8308	VHCI_DEBUG(4, (CE_WARN, NULL,
8309	    "vhci_uscsi_iostart: ap: %p pkt: %p pcdbp: %p uscmdp: %p"
8310	    " ucdbp: %p pcdblen: %d bp: %p count: %ld pip: %p"
8311	    " stat_size: %d",
8312	    (void *)mp_uscmdp->ap, (void *)pkt, (void *)pkt->pkt_cdbp,
8313	    (void *)uscmdp, (void *)uscmdp->uscsi_cdb, pkt->pkt_cdblen,
8314	    (void *)bp, bp->b_bcount, (void *)mp_uscmdp->pip, stat_size));
8315
8316	while (((rval = scsi_transport(pkt)) == TRAN_BUSY) &&
8317	    retry < vhci_uscsi_retry_count) {
8318		delay(drv_usectohz(vhci_uscsi_delay));
8319		retry++;
8320	}
8321	if (retry >= vhci_uscsi_retry_count) {
8322		VHCI_DEBUG(4, (CE_NOTE, NULL,
8323		    "vhci_uscsi_iostart: tran_busy - retry: %d", retry));
8324	}
8325	switch (rval) {
8326	case TRAN_ACCEPT:
8327		rval =  0;
8328		break;
8329
8330	default:
8331		VHCI_DEBUG(4, (CE_NOTE, NULL,
8332		    "vhci_uscsi_iostart: rval: %d count: %ld res: %ld",
8333		    rval, bp->b_bcount, bp->b_resid));
8334		bp->b_resid = bp->b_bcount;
8335		uscmdp->uscsi_resid = bp->b_bcount;
8336		bioerror(bp, EIO);
8337		scsi_destroy_pkt(pkt);
8338		biodone(bp);
8339		rval = EIO;
8340		MDI_PI_ERRSTAT(mp_uscmdp->pip, MDI_PI_TRANSERR);
8341		break;
8342	}
8343	VHCI_DEBUG(4, (CE_NOTE, NULL,
8344	    "vhci_uscsi_iostart: exit: rval: %d", rval));
8345	return (rval);
8346}
8347