asym_sun.c revision 4851:5e98cf4c2164
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25#pragma ident	"%Z%%M%	%I%	%E% SMI"
26
27/*
28 * Implementation of "scsi_vhci_f_asym_sun" asymmetric failover_ops.
29 *
30 * Note : f_asym_sun method is the same as the one originally used by SUN's
31 * T3 (Purple) device.
32 */
33
34#include <sys/conf.h>
35#include <sys/file.h>
36#include <sys/ddi.h>
37#include <sys/sunddi.h>
38#include <sys/scsi/scsi.h>
39#include <sys/scsi/adapters/scsi_vhci.h>
40
41/* Supported device table entries.  */
42char	*purple_dev_table[] = {
43/*	"                  111111" */
44/*	"012345670123456789012345" */
45/*	"|-VID--||-----PID------|" */
46
47	"SUN     T300            ",
48	"SUN     T4              ",
49	NULL,
50};
51
52/* Failover module plumbing. */
53SCSI_FAILOVER_OP("f_asym_sun", purple, "%I%");
54
55#define	PURPLE_FO_CMD_RETRY_DELAY	1000000 /* 1 seconds */
56#define	PURPLE_FO_RETRY_DELAY		2000000 /* 2 seconds */
57/*
58 * max time for failover to complete is 3 minutes.  Compute
59 * number of retries accordingly, to ensure we wait for at least
60 * 3 minutes
61 */
62#define	PURPLE_FO_MAX_RETRIES	(3*60*1000000)/PURPLE_FO_RETRY_DELAY
63
64/*
65 * max number of retries for purple failover to complete where the ping
66 * command is failing due to transport errors or commands being rejected by
67 * purple.
68 * PURPLE_FO_MAX_RETRIES takes into account the case where CMD_CMPLTs but
69 * purple takes time to complete the failover.
70 */
71#define	PURPLE_FO_MAX_CMD_RETRIES	3
72
73#define	T3_SCSI_ASC_FO_IN_PROGRESS	0x90
74#define	T3_SCSI_ASCQ_PATH_ACT2INACT	0x00
75#define	T3_SCSI_ASCQ_PATH_INACT2ACT	0x01
76#define	T3_SCSI_ASC_PATH_INACTIVE	0x04
77#define	T3_SCSI_ASCQ_PATH_INACTIVE	0x88
78
79static void purple_get_fo_mode(struct scsi_device *sd,
80		int *mode, int *ownership, int *xlf_capable);
81
82/* ARGSUSED */
83static int
84purple_device_probe(struct scsi_device *sd, struct scsi_inquiry *stdinq,
85void **ctpriv)
86{
87	char	**dt;
88	int	xlf = 0, mode = 0, ownership = 0;
89
90	VHCI_DEBUG(6, (CE_NOTE, NULL, "purple_device_probe: vidpid %s\n",
91	    stdinq->inq_vid));
92
93	for (dt = purple_dev_table; *dt; dt++) {
94		if (strncmp(stdinq->inq_vid, *dt, strlen(*dt)))
95			continue;
96
97		/* match */
98		purple_get_fo_mode(sd, &mode, &ownership, &xlf);
99		if (mode == SCSI_EXPLICIT_FAILOVER)
100			return (SFO_DEVICE_PROBE_VHCI);
101		else
102			return (SFO_DEVICE_PROBE_PHCI);
103	}
104	return (SFO_DEVICE_PROBE_PHCI);
105}
106
107/* ARGSUSED */
108static void
109purple_device_unprobe(struct scsi_device *sd, void *ctpriv)
110{
111	/*
112	 * For future use
113	 */
114}
115
116/* ARGSUSED */
117static void
118purple_get_fo_mode(struct scsi_device *sd, int *mode,
119int *ownership, int *xlf_capable)
120{
121	char		inqbuf[0xff], *ptr, *end;
122	int		retval = 0;
123	struct buf	*bp;
124	struct scsi_pkt	*pkt;
125	struct scsi_address	*ap;
126
127	*mode = *ownership = *xlf_capable = 0;
128	bp = getrbuf(KM_NOSLEEP);
129	if (bp == NULL)
130		return;
131	bp->b_un.b_addr = inqbuf;
132	bp->b_flags = B_READ;
133	bp->b_bcount = 0xff;
134	bp->b_resid = 0;
135
136	ap = &sd->sd_address;
137	pkt = scsi_init_pkt(ap, NULL, bp, CDB_GROUP0,
138	    sizeof (struct scsi_arq_status), 0, 0, NULL, NULL);
139	if (pkt == NULL) {
140		freerbuf(bp);
141		return;
142	}
143
144	pkt->pkt_cdbp[0] = SCMD_INQUIRY;
145	pkt->pkt_cdbp[1] = 0x1;
146	pkt->pkt_cdbp[2] = 0x83;
147	pkt->pkt_cdbp[4] = 0xff;
148	pkt->pkt_time = 90;
149
150	retval = vhci_do_scsi_cmd(pkt);
151	scsi_destroy_pkt(pkt);
152	freerbuf(bp);
153	if (retval == 0) {
154		VHCI_DEBUG(4, (CE_NOTE, NULL, "!(sd:%p)failed to get mode"
155		    " and ownership info\n", (void *)sd));
156		return;
157	}
158
159	ptr = inqbuf;
160	ptr += 4; /* identification descriptor 0 */
161	end = inqbuf + 4 + inqbuf[3];
162	while (((ptr[1] & 0x0f) != 0xf) && (ptr < end))
163		ptr += ptr[3] + 4;  /* next identification descriptor */
164	if (ptr >= end) {
165		VHCI_DEBUG(4, (CE_NOTE, NULL, "!(sd:%p)p_g_m_a_o:assuming"
166		    " implicit mode\n", (void *)sd));
167		*mode = SCSI_IMPLICIT_FAILOVER;
168		*ownership = 0;
169		return;
170	}
171	ptr += 4; /* Port Failover Identifier */
172	*mode = ptr[0];
173	if ((ptr[1] & 0x3) == 0x01)
174		*ownership = 0;
175	else if ((ptr[1] & 0x3) == 0x00)
176		*ownership = 1;
177	if (ptr[1] & 0x4) {
178		*xlf_capable = 1;
179	} else {
180		*xlf_capable = 0;
181	}
182}
183
184static int
185purple_activate_explicit(struct scsi_device *sd, int xlf_capable)
186{
187	char			cdb[CDB_GROUP1];
188	struct scsi_address	*ap;
189	struct scsi_pkt		*pkt;
190	int			retval;
191
192	bzero(cdb, CDB_GROUP1);
193
194	ap = &sd->sd_address;
195	pkt = scsi_init_pkt(ap, NULL, NULL, CDB_GROUP1,
196	    sizeof (struct scsi_arq_status), 0, 0, NULL, NULL);
197	if (pkt == NULL)
198		return (0);
199
200	pkt->pkt_cdbp[0] = 0xD0;
201	if (xlf_capable) {
202		/*
203		 * Bit 2/1: 1/0: implicitly drop any reservation
204		 * Bit 0: Grab bit - 1 means an explicit failover will be
205		 * triggered
206		 */
207		pkt->pkt_cdbp[1] = 0x05;
208	} else {
209		pkt->pkt_cdbp[1] = 0x01; /* no reservation check, "grab" lun */
210	}
211
212	retval = vhci_do_scsi_cmd(pkt);
213	scsi_destroy_pkt(pkt);
214
215	return (retval);
216}
217
218/* ARGSUSED */
219static int
220purple_path_activate(struct scsi_device *sd, char *pathclass,
221void *ctpriv)
222{
223	struct buf		*bp;
224	struct scsi_pkt		*pkt;
225	struct scsi_address	*ap;
226	int			err, retry_cnt, retry_cmd_cnt;
227	int			mode, ownership, retval, xlf;
228	struct scsi_extended_sense	*sns;
229
230	ap = &sd->sd_address;
231
232	mode = ownership = 0;
233
234	purple_get_fo_mode(sd, &mode, &ownership, &xlf);
235	if (ownership == 1) {
236		VHCI_DEBUG(4, (CE_NOTE, NULL, "!path already active for 0x%p\n",
237		    (void *)sd));
238		return (0);
239	}
240
241	if (mode != SCSI_IMPLICIT_FAILOVER) {
242		VHCI_DEBUG(4, (CE_NOTE, NULL,
243		    "!mode is EXPLICIT for 0x%p xlf %x\n",
244		    (void *)sd, xlf));
245		retval = purple_activate_explicit(sd, xlf);
246		if (retval == 0) {
247			VHCI_DEBUG(4, (CE_NOTE, NULL,
248			    "!(sd:%p)purple_path_activate failed(1)\n",
249			    (void *)sd));
250			return (1);
251		}
252	} else {
253		VHCI_DEBUG(4, (CE_NOTE, NULL, "!mode is IMPLICIT for 0x%p\n",
254		    (void *)sd));
255	}
256
257	bp = scsi_alloc_consistent_buf(ap, (struct buf *)NULL, DEV_BSIZE,
258	    B_READ, NULL, NULL);
259	if (!bp) {
260		cmn_err(CE_WARN, "!No resources (buf) to initiate T3 path "
261		    "activation");
262		return (1);
263	}
264
265	pkt = scsi_init_pkt(ap, NULL, bp, CDB_GROUP1,
266	    sizeof (struct scsi_arq_status), 0, PKT_CONSISTENT, NULL, NULL);
267	if (!pkt) {
268		cmn_err(CE_WARN, "!Packet alloc failure during T3 "
269		    "path activation");
270		scsi_free_consistent_buf(bp);
271		return (1);
272	}
273
274	(void) scsi_setup_cdb((union scsi_cdb *)(uintptr_t)pkt->pkt_cdbp,
275	    SCMD_READ, 1, 1, 0);
276	pkt->pkt_time = 3*30;
277	pkt->pkt_flags |= FLAG_NOINTR;
278
279	retry_cnt = 0;
280	retry_cmd_cnt = 0;
281retry:
282	err = scsi_transport(pkt);
283	if (err != TRAN_ACCEPT) {
284		/*
285		 * Retry TRAN_BUSY till PURPLE_FO_MAX_RETRIES is exhausted.
286		 * All other errors are fatal and should not be retried.
287		 */
288		if ((err == TRAN_BUSY) &&
289		    (retry_cnt++ < PURPLE_FO_MAX_RETRIES)) {
290			drv_usecwait(PURPLE_FO_RETRY_DELAY);
291			goto retry;
292		}
293		cmn_err(CE_WARN, "T3 failover failed, "
294		    "couldn't transport packet");
295		scsi_destroy_pkt(pkt);
296		scsi_free_consistent_buf(bp);
297		return (1);
298	}
299
300	switch (pkt->pkt_reason) {
301		case CMD_TIMEOUT:
302			cmn_err(CE_WARN, "!T3 failover failed: timed out ");
303			scsi_destroy_pkt(pkt);
304			scsi_free_consistent_buf(bp);
305			return (1);
306		case CMD_CMPLT:
307			/*
308			 * Re-initialize retry_cmd_cnt. Allow transport and
309			 * cmd errors to go through a full retry count when
310			 * these are encountered.  This way TRAN/CMD errors
311			 * retry count is not exhausted due to CMD_CMPLTs
312			 * delay for a T3 fo to finish. This allows the system
313			 * to brave a hick-up on the link at any given time,
314			 * while waiting for the fo to complete.
315			 */
316			retry_cmd_cnt = 0;
317			if (pkt->pkt_state & STATE_ARQ_DONE) {
318				sns = &(((struct scsi_arq_status *)(uintptr_t)
319				    (pkt->pkt_scbp))->sts_sensedata);
320				if (sns->es_key == KEY_UNIT_ATTENTION) {
321					/*
322					 * swallow unit attention
323					 */
324					goto retry;
325				} else if ((sns->es_key == KEY_NOT_READY) &&
326				    (sns->es_add_code ==
327				    T3_SCSI_ASC_FO_IN_PROGRESS) &&
328				    (sns->es_qual_code ==
329				    T3_SCSI_ASCQ_PATH_INACT2ACT)) {
330					if (retry_cnt++ >=
331					    PURPLE_FO_MAX_RETRIES) {
332						cmn_err(CE_WARN, "!T3 failover"
333						    " failed: timed out "
334						    "waiting for path to "
335						    "become active");
336						scsi_destroy_pkt(pkt);
337						scsi_free_consistent_buf(bp);
338						return (1);
339					}
340					VHCI_DEBUG(6, (CE_NOTE, NULL,
341					    "!(sd:%p)lun becoming active...\n",
342					    (void *)sd));
343					drv_usecwait(PURPLE_FO_RETRY_DELAY);
344					goto retry;
345				}
346				cmn_err(CE_NOTE, "!T3 failover failed;"
347				    " sense key:%x, ASC: %x, "
348				    "ASCQ:%x", sns->es_key,
349				    sns->es_add_code, sns->es_qual_code);
350				scsi_destroy_pkt(pkt);
351				scsi_free_consistent_buf(bp);
352				return (1);
353			}
354			switch (SCBP_C(pkt)) {
355				case STATUS_GOOD:
356					break;
357				case STATUS_CHECK:
358					VHCI_DEBUG(4, (CE_WARN, NULL,
359					    "!(sd:%p)T3:"
360					    " cont allegiance during purple "
361					    "activation", (void *)sd));
362					scsi_destroy_pkt(pkt);
363					scsi_free_consistent_buf(bp);
364					return (1);
365				case STATUS_QFULL:
366					VHCI_DEBUG(6, (CE_NOTE, NULL, "QFULL "
367					    "status returned during purple "
368					    "path activation for 0x%p\n",
369					    (void *)sd));
370					drv_usecwait(5000);
371					goto retry;
372				case STATUS_BUSY:
373					VHCI_DEBUG(6, (CE_NOTE, NULL, "BUSY "
374					    "status returned during purple "
375					    "path activation for 0x%p\n",
376					    (void *)sd));
377					drv_usecwait(5000);
378					goto retry;
379				default:
380					VHCI_DEBUG(4, (CE_WARN, NULL,
381					    "!(sd:%p) Bad status "
382					    "returned during purple "
383					    "activation (pkt 0x%p, "
384					    "status %x)",
385					    (void *)sd, (void *)pkt,
386					    SCBP_C(pkt)));
387					scsi_destroy_pkt(pkt);
388					scsi_free_consistent_buf(bp);
389					return (1);
390			}
391			break;
392		case CMD_INCOMPLETE:
393		case CMD_RESET:
394		case CMD_ABORTED:
395		case CMD_TRAN_ERR:
396			/*
397			 * Increased the number of retries when these error
398			 * cases are encountered.  Also added a 1 sec wait
399			 * before retrying.
400			 */
401			if (retry_cmd_cnt++ < PURPLE_FO_MAX_CMD_RETRIES) {
402				drv_usecwait(PURPLE_FO_CMD_RETRY_DELAY);
403				VHCI_DEBUG(4, (CE_WARN, NULL,
404				    "!Retrying T3 path activation due to "
405				    "pkt reason:%x, retry cnt:%d",
406				    pkt->pkt_reason, retry_cmd_cnt));
407				goto retry;
408			}
409			/* FALLTHROUGH */
410		default:
411			cmn_err(CE_WARN, "!T3 path activation did not "
412			    "complete successfully,"
413			    "(pkt reason %x)", pkt->pkt_reason);
414			scsi_destroy_pkt(pkt);
415			scsi_free_consistent_buf(bp);
416			return (1);
417	}
418
419	VHCI_DEBUG(4, (CE_NOTE, NULL, "!T3 path activation success\n"));
420	scsi_destroy_pkt(pkt);
421	scsi_free_consistent_buf(bp);
422	return (0);
423}
424
425/* ARGSUSED */
426static int purple_path_deactivate(struct scsi_device *sd, char *pathclass,
427void *ctpriv)
428{
429	return (0);
430}
431
432/* ARGSUSED */
433static int
434purple_path_get_opinfo(struct scsi_device *sd, struct scsi_path_opinfo
435*opinfo, void *ctpriv)
436{
437	struct scsi_inquiry	*inq;
438	struct buf		*bp;
439	struct scsi_pkt		*pkt;
440	struct scsi_address	*ap;
441	int			retval, mode, ownership, xlf;
442
443	ap = &sd->sd_address;
444
445	bp = scsi_alloc_consistent_buf(ap, (struct buf *)NULL, SUN_INQSIZE,
446	    B_READ, NULL, NULL);
447	if (!bp)
448		return (1);
449	pkt = scsi_init_pkt(ap, NULL, bp, CDB_GROUP0,
450	    sizeof (struct scsi_arq_status), 0, PKT_CONSISTENT, NULL, NULL);
451	if (!pkt) {
452		scsi_free_consistent_buf(bp);
453		return (1);
454	}
455	(void) scsi_setup_cdb((union scsi_cdb *)(uintptr_t)pkt->pkt_cdbp,
456	    SCMD_INQUIRY, 0, SUN_INQSIZE, 0);
457	pkt->pkt_time = 60;
458
459	retval = vhci_do_scsi_cmd(pkt);
460	if (retval == 0) {
461		scsi_destroy_pkt(pkt);
462		scsi_free_consistent_buf(bp);
463		return (1);
464	}
465
466	inq = (struct scsi_inquiry *)bp->b_un.b_addr;
467
468	opinfo->opinfo_rev = OPINFO_REV;
469
470	/*
471	 * Ignore to check inquiry dual port bit.
472	 * T3 can return this bit as 0 when one of its controller goes down.
473	 * Instead relying on inquiry port bit only.
474	 */
475	if (inq->inq_port == 0) {
476		(void) strcpy(opinfo->opinfo_path_attr, "primary");
477	} else {
478		(void) strcpy(opinfo->opinfo_path_attr, "secondary");
479	}
480
481	scsi_destroy_pkt(pkt);
482	scsi_free_consistent_buf(bp);
483
484	purple_get_fo_mode(sd, &mode, &ownership, &xlf);
485
486	if (ownership == 1)
487		opinfo->opinfo_path_state = SCSI_PATH_ACTIVE;
488	else
489		opinfo->opinfo_path_state = SCSI_PATH_INACTIVE;
490	opinfo->opinfo_xlf_capable = xlf;
491	opinfo->opinfo_pswtch_best = 30;
492	opinfo->opinfo_pswtch_worst = 3*30;
493	opinfo->opinfo_mode = (uint16_t)mode;
494	opinfo->opinfo_preferred = 1;
495
496	return (0);
497}
498
499/* ARGSUSED */
500static int purple_path_ping(struct scsi_device *sd, void *ctpriv)
501{
502	/*
503	 * For future use
504	 */
505	return (1);
506}
507
508/* ARGSUSED */
509static int
510purple_analyze_sense(struct scsi_device *sd, struct scsi_extended_sense
511*sense, void *ctpriv)
512{
513	if (sense->es_key == KEY_NOT_READY) {
514		if (sense->es_add_code == T3_SCSI_ASC_FO_IN_PROGRESS) {
515			if (sense->es_qual_code == T3_SCSI_ASCQ_PATH_INACT2ACT)
516				return (SCSI_SENSE_INACT2ACT);
517			else if (sense->es_qual_code ==
518			    T3_SCSI_ASCQ_PATH_ACT2INACT)
519				return (SCSI_SENSE_ACT2INACT);
520		} else if ((sense->es_add_code == T3_SCSI_ASC_PATH_INACTIVE) &&
521		    (sense->es_qual_code == T3_SCSI_ASCQ_PATH_INACTIVE)) {
522			return (SCSI_SENSE_INACTIVE);
523		}
524	}
525
526	/*
527	 * At this point sense data may be for power-on-reset UNIT ATTN or
528	 * hardware errors, vendor unique sense data etc.  For all these cases
529	 * return SCSI_SENSE_UNKNOWN.
530	 */
531	VHCI_DEBUG(6, (CE_NOTE, NULL, "!T3 analyze sense UNKNOWN:"
532	    " sense key:%x, ASC: %x, ASCQ:%x\n", sense->es_key,
533	    sense->es_add_code, sense->es_qual_code));
534	return (SCSI_SENSE_UNKNOWN);
535}
536
537/* ARGSUSED */
538static int
539purple_pathclass_next(char *cur, char **nxt, void *ctpriv)
540{
541	if (cur == NULL) {
542		*nxt = PCLASS_PRIMARY;
543		return (0);
544	} else if (strcmp(cur, PCLASS_PRIMARY) == 0) {
545		*nxt = PCLASS_SECONDARY;
546		return (0);
547	} else if (strcmp(cur, PCLASS_SECONDARY) == 0) {
548		return (ENOENT);
549	} else {
550		return (EINVAL);
551	}
552}
553