sd.c revision 8863:94039d51dda4
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*
28 * SCSI disk target driver.
29 */
30#include <sys/scsi/scsi.h>
31#include <sys/dkbad.h>
32#include <sys/dklabel.h>
33#include <sys/dkio.h>
34#include <sys/fdio.h>
35#include <sys/cdio.h>
36#include <sys/mhd.h>
37#include <sys/vtoc.h>
38#include <sys/dktp/fdisk.h>
39#include <sys/kstat.h>
40#include <sys/vtrace.h>
41#include <sys/note.h>
42#include <sys/thread.h>
43#include <sys/proc.h>
44#include <sys/efi_partition.h>
45#include <sys/var.h>
46#include <sys/aio_req.h>
47
48#ifdef __lock_lint
49#define	_LP64
50#define	__amd64
51#endif
52
53#if (defined(__fibre))
54/* Note: is there a leadville version of the following? */
55#include <sys/fc4/fcal_linkapp.h>
56#endif
57#include <sys/taskq.h>
58#include <sys/uuid.h>
59#include <sys/byteorder.h>
60#include <sys/sdt.h>
61
62#include "sd_xbuf.h"
63
64#include <sys/scsi/targets/sddef.h>
65#include <sys/cmlb.h>
66#include <sys/sysevent/eventdefs.h>
67#include <sys/sysevent/dev.h>
68
69#include <sys/fm/protocol.h>
70
71/*
72 * Loadable module info.
73 */
74#if (defined(__fibre))
75#define	SD_MODULE_NAME	"SCSI SSA/FCAL Disk Driver"
76char _depends_on[]	= "misc/scsi misc/cmlb drv/fcp";
77#else /* !__fibre */
78#define	SD_MODULE_NAME	"SCSI Disk Driver"
79char _depends_on[]	= "misc/scsi misc/cmlb";
80#endif /* !__fibre */
81
82/*
83 * Define the interconnect type, to allow the driver to distinguish
84 * between parallel SCSI (sd) and fibre channel (ssd) behaviors.
85 *
86 * This is really for backward compatibility. In the future, the driver
87 * should actually check the "interconnect-type" property as reported by
88 * the HBA; however at present this property is not defined by all HBAs,
89 * so we will use this #define (1) to permit the driver to run in
90 * backward-compatibility mode; and (2) to print a notification message
91 * if an FC HBA does not support the "interconnect-type" property.  The
92 * behavior of the driver will be to assume parallel SCSI behaviors unless
93 * the "interconnect-type" property is defined by the HBA **AND** has a
94 * value of either INTERCONNECT_FIBRE, INTERCONNECT_SSA, or
95 * INTERCONNECT_FABRIC, in which case the driver will assume Fibre
96 * Channel behaviors (as per the old ssd).  (Note that the
97 * INTERCONNECT_1394 and INTERCONNECT_USB types are not supported and
98 * will result in the driver assuming parallel SCSI behaviors.)
99 *
100 * (see common/sys/scsi/impl/services.h)
101 *
102 * Note: For ssd semantics, don't use INTERCONNECT_FABRIC as the default
103 * since some FC HBAs may already support that, and there is some code in
104 * the driver that already looks for it.  Using INTERCONNECT_FABRIC as the
105 * default would confuse that code, and besides things should work fine
106 * anyways if the FC HBA already reports INTERCONNECT_FABRIC for the
107 * "interconnect_type" property.
108 *
109 */
110#if (defined(__fibre))
111#define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_FIBRE
112#else
113#define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_PARALLEL
114#endif
115
116/*
117 * The name of the driver, established from the module name in _init.
118 */
119static	char *sd_label			= NULL;
120
121/*
122 * Driver name is unfortunately prefixed on some driver.conf properties.
123 */
124#if (defined(__fibre))
125#define	sd_max_xfer_size		ssd_max_xfer_size
126#define	sd_config_list			ssd_config_list
127static	char *sd_max_xfer_size		= "ssd_max_xfer_size";
128static	char *sd_config_list		= "ssd-config-list";
129#else
130static	char *sd_max_xfer_size		= "sd_max_xfer_size";
131static	char *sd_config_list		= "sd-config-list";
132#endif
133
134/*
135 * Driver global variables
136 */
137
138#if (defined(__fibre))
139/*
140 * These #defines are to avoid namespace collisions that occur because this
141 * code is currently used to compile two separate driver modules: sd and ssd.
142 * All global variables need to be treated this way (even if declared static)
143 * in order to allow the debugger to resolve the names properly.
144 * It is anticipated that in the near future the ssd module will be obsoleted,
145 * at which time this namespace issue should go away.
146 */
147#define	sd_state			ssd_state
148#define	sd_io_time			ssd_io_time
149#define	sd_failfast_enable		ssd_failfast_enable
150#define	sd_ua_retry_count		ssd_ua_retry_count
151#define	sd_report_pfa			ssd_report_pfa
152#define	sd_max_throttle			ssd_max_throttle
153#define	sd_min_throttle			ssd_min_throttle
154#define	sd_rot_delay			ssd_rot_delay
155
156#define	sd_retry_on_reservation_conflict	\
157					ssd_retry_on_reservation_conflict
158#define	sd_reinstate_resv_delay		ssd_reinstate_resv_delay
159#define	sd_resv_conflict_name		ssd_resv_conflict_name
160
161#define	sd_component_mask		ssd_component_mask
162#define	sd_level_mask			ssd_level_mask
163#define	sd_debug_un			ssd_debug_un
164#define	sd_error_level			ssd_error_level
165
166#define	sd_xbuf_active_limit		ssd_xbuf_active_limit
167#define	sd_xbuf_reserve_limit		ssd_xbuf_reserve_limit
168
169#define	sd_tr				ssd_tr
170#define	sd_reset_throttle_timeout	ssd_reset_throttle_timeout
171#define	sd_qfull_throttle_timeout	ssd_qfull_throttle_timeout
172#define	sd_qfull_throttle_enable	ssd_qfull_throttle_enable
173#define	sd_check_media_time		ssd_check_media_time
174#define	sd_wait_cmds_complete		ssd_wait_cmds_complete
175#define	sd_label_mutex			ssd_label_mutex
176#define	sd_detach_mutex			ssd_detach_mutex
177#define	sd_log_buf			ssd_log_buf
178#define	sd_log_mutex			ssd_log_mutex
179
180#define	sd_disk_table			ssd_disk_table
181#define	sd_disk_table_size		ssd_disk_table_size
182#define	sd_sense_mutex			ssd_sense_mutex
183#define	sd_cdbtab			ssd_cdbtab
184
185#define	sd_cb_ops			ssd_cb_ops
186#define	sd_ops				ssd_ops
187#define	sd_additional_codes		ssd_additional_codes
188#define	sd_tgops			ssd_tgops
189
190#define	sd_minor_data			ssd_minor_data
191#define	sd_minor_data_efi		ssd_minor_data_efi
192
193#define	sd_tq				ssd_tq
194#define	sd_wmr_tq			ssd_wmr_tq
195#define	sd_taskq_name			ssd_taskq_name
196#define	sd_wmr_taskq_name		ssd_wmr_taskq_name
197#define	sd_taskq_minalloc		ssd_taskq_minalloc
198#define	sd_taskq_maxalloc		ssd_taskq_maxalloc
199
200#define	sd_dump_format_string		ssd_dump_format_string
201
202#define	sd_iostart_chain		ssd_iostart_chain
203#define	sd_iodone_chain			ssd_iodone_chain
204
205#define	sd_pm_idletime			ssd_pm_idletime
206
207#define	sd_force_pm_supported		ssd_force_pm_supported
208
209#define	sd_dtype_optical_bind		ssd_dtype_optical_bind
210
211#define	sd_ssc_init			ssd_ssc_init
212#define	sd_ssc_send			ssd_ssc_send
213#define	sd_ssc_fini			ssd_ssc_fini
214#define	sd_ssc_assessment		ssd_ssc_assessment
215#define	sd_ssc_post			ssd_ssc_post
216#define	sd_ssc_print			ssd_ssc_print
217#define	sd_ssc_ereport_post		ssd_ssc_ereport_post
218#define	sd_ssc_set_info			ssd_ssc_set_info
219#define	sd_ssc_extract_info		ssd_ssc_extract_info
220
221#endif
222
223#ifdef	SDDEBUG
224int	sd_force_pm_supported		= 0;
225#endif	/* SDDEBUG */
226
227void *sd_state				= NULL;
228int sd_io_time				= SD_IO_TIME;
229int sd_failfast_enable			= 1;
230int sd_ua_retry_count			= SD_UA_RETRY_COUNT;
231int sd_report_pfa			= 1;
232int sd_max_throttle			= SD_MAX_THROTTLE;
233int sd_min_throttle			= SD_MIN_THROTTLE;
234int sd_rot_delay			= 4; /* Default 4ms Rotation delay */
235int sd_qfull_throttle_enable		= TRUE;
236
237int sd_retry_on_reservation_conflict	= 1;
238int sd_reinstate_resv_delay		= SD_REINSTATE_RESV_DELAY;
239_NOTE(SCHEME_PROTECTS_DATA("safe sharing", sd_reinstate_resv_delay))
240
241static int sd_dtype_optical_bind	= -1;
242
243/* Note: the following is not a bug, it really is "sd_" and not "ssd_" */
244static	char *sd_resv_conflict_name	= "sd_retry_on_reservation_conflict";
245
246/*
247 * Global data for debug logging. To enable debug printing, sd_component_mask
248 * and sd_level_mask should be set to the desired bit patterns as outlined in
249 * sddef.h.
250 */
251uint_t	sd_component_mask		= 0x0;
252uint_t	sd_level_mask			= 0x0;
253struct	sd_lun *sd_debug_un		= NULL;
254uint_t	sd_error_level			= SCSI_ERR_RETRYABLE;
255
256/* Note: these may go away in the future... */
257static uint32_t	sd_xbuf_active_limit	= 512;
258static uint32_t sd_xbuf_reserve_limit	= 16;
259
260static struct sd_resv_reclaim_request	sd_tr = { NULL, NULL, NULL, 0, 0, 0 };
261
262/*
263 * Timer value used to reset the throttle after it has been reduced
264 * (typically in response to TRAN_BUSY or STATUS_QFULL)
265 */
266static int sd_reset_throttle_timeout	= SD_RESET_THROTTLE_TIMEOUT;
267static int sd_qfull_throttle_timeout	= SD_QFULL_THROTTLE_TIMEOUT;
268
269/*
270 * Interval value associated with the media change scsi watch.
271 */
272static int sd_check_media_time		= 3000000;
273
274/*
275 * Wait value used for in progress operations during a DDI_SUSPEND
276 */
277static int sd_wait_cmds_complete	= SD_WAIT_CMDS_COMPLETE;
278
279/*
280 * sd_label_mutex protects a static buffer used in the disk label
281 * component of the driver
282 */
283static kmutex_t sd_label_mutex;
284
285/*
286 * sd_detach_mutex protects un_layer_count, un_detach_count, and
287 * un_opens_in_progress in the sd_lun structure.
288 */
289static kmutex_t sd_detach_mutex;
290
291_NOTE(MUTEX_PROTECTS_DATA(sd_detach_mutex,
292	sd_lun::{un_layer_count un_detach_count un_opens_in_progress}))
293
294/*
295 * Global buffer and mutex for debug logging
296 */
297static char	sd_log_buf[1024];
298static kmutex_t	sd_log_mutex;
299
300/*
301 * Structs and globals for recording attached lun information.
302 * This maintains a chain. Each node in the chain represents a SCSI controller.
303 * The structure records the number of luns attached to each target connected
304 * with the controller.
305 * For parallel scsi device only.
306 */
307struct sd_scsi_hba_tgt_lun {
308	struct sd_scsi_hba_tgt_lun	*next;
309	dev_info_t			*pdip;
310	int				nlun[NTARGETS_WIDE];
311};
312
313/*
314 * Flag to indicate the lun is attached or detached
315 */
316#define	SD_SCSI_LUN_ATTACH	0
317#define	SD_SCSI_LUN_DETACH	1
318
319static kmutex_t	sd_scsi_target_lun_mutex;
320static struct sd_scsi_hba_tgt_lun	*sd_scsi_target_lun_head = NULL;
321
322_NOTE(MUTEX_PROTECTS_DATA(sd_scsi_target_lun_mutex,
323    sd_scsi_hba_tgt_lun::next sd_scsi_hba_tgt_lun::pdip))
324
325_NOTE(MUTEX_PROTECTS_DATA(sd_scsi_target_lun_mutex,
326    sd_scsi_target_lun_head))
327
328/*
329 * "Smart" Probe Caching structs, globals, #defines, etc.
330 * For parallel scsi and non-self-identify device only.
331 */
332
333/*
334 * The following resources and routines are implemented to support
335 * "smart" probing, which caches the scsi_probe() results in an array,
336 * in order to help avoid long probe times.
337 */
338struct sd_scsi_probe_cache {
339	struct	sd_scsi_probe_cache	*next;
340	dev_info_t	*pdip;
341	int		cache[NTARGETS_WIDE];
342};
343
344static kmutex_t	sd_scsi_probe_cache_mutex;
345static struct	sd_scsi_probe_cache *sd_scsi_probe_cache_head = NULL;
346
347/*
348 * Really we only need protection on the head of the linked list, but
349 * better safe than sorry.
350 */
351_NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
352    sd_scsi_probe_cache::next sd_scsi_probe_cache::pdip))
353
354_NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
355    sd_scsi_probe_cache_head))
356
357
358/*
359 * Vendor specific data name property declarations
360 */
361
362#if defined(__fibre) || defined(__i386) ||defined(__amd64)
363
364static sd_tunables seagate_properties = {
365	SEAGATE_THROTTLE_VALUE,
366	0,
367	0,
368	0,
369	0,
370	0,
371	0,
372	0,
373	0
374};
375
376
377static sd_tunables fujitsu_properties = {
378	FUJITSU_THROTTLE_VALUE,
379	0,
380	0,
381	0,
382	0,
383	0,
384	0,
385	0,
386	0
387};
388
389static sd_tunables ibm_properties = {
390	IBM_THROTTLE_VALUE,
391	0,
392	0,
393	0,
394	0,
395	0,
396	0,
397	0,
398	0
399};
400
401static sd_tunables purple_properties = {
402	PURPLE_THROTTLE_VALUE,
403	0,
404	0,
405	PURPLE_BUSY_RETRIES,
406	PURPLE_RESET_RETRY_COUNT,
407	PURPLE_RESERVE_RELEASE_TIME,
408	0,
409	0,
410	0
411};
412
413static sd_tunables sve_properties = {
414	SVE_THROTTLE_VALUE,
415	0,
416	0,
417	SVE_BUSY_RETRIES,
418	SVE_RESET_RETRY_COUNT,
419	SVE_RESERVE_RELEASE_TIME,
420	SVE_MIN_THROTTLE_VALUE,
421	SVE_DISKSORT_DISABLED_FLAG,
422	0
423};
424
425static sd_tunables maserati_properties = {
426	0,
427	0,
428	0,
429	0,
430	0,
431	0,
432	0,
433	MASERATI_DISKSORT_DISABLED_FLAG,
434	MASERATI_LUN_RESET_ENABLED_FLAG
435};
436
437static sd_tunables pirus_properties = {
438	PIRUS_THROTTLE_VALUE,
439	0,
440	PIRUS_NRR_COUNT,
441	PIRUS_BUSY_RETRIES,
442	PIRUS_RESET_RETRY_COUNT,
443	0,
444	PIRUS_MIN_THROTTLE_VALUE,
445	PIRUS_DISKSORT_DISABLED_FLAG,
446	PIRUS_LUN_RESET_ENABLED_FLAG
447};
448
449#endif
450
451#if (defined(__sparc) && !defined(__fibre)) || \
452	(defined(__i386) || defined(__amd64))
453
454
455static sd_tunables elite_properties = {
456	ELITE_THROTTLE_VALUE,
457	0,
458	0,
459	0,
460	0,
461	0,
462	0,
463	0,
464	0
465};
466
467static sd_tunables st31200n_properties = {
468	ST31200N_THROTTLE_VALUE,
469	0,
470	0,
471	0,
472	0,
473	0,
474	0,
475	0,
476	0
477};
478
479#endif /* Fibre or not */
480
481static sd_tunables lsi_properties_scsi = {
482	LSI_THROTTLE_VALUE,
483	0,
484	LSI_NOTREADY_RETRIES,
485	0,
486	0,
487	0,
488	0,
489	0,
490	0
491};
492
493static sd_tunables symbios_properties = {
494	SYMBIOS_THROTTLE_VALUE,
495	0,
496	SYMBIOS_NOTREADY_RETRIES,
497	0,
498	0,
499	0,
500	0,
501	0,
502	0
503};
504
505static sd_tunables lsi_properties = {
506	0,
507	0,
508	LSI_NOTREADY_RETRIES,
509	0,
510	0,
511	0,
512	0,
513	0,
514	0
515};
516
517static sd_tunables lsi_oem_properties = {
518	0,
519	0,
520	LSI_OEM_NOTREADY_RETRIES,
521	0,
522	0,
523	0,
524	0,
525	0,
526	0,
527	1
528};
529
530
531
532#if (defined(SD_PROP_TST))
533
534#define	SD_TST_CTYPE_VAL	CTYPE_CDROM
535#define	SD_TST_THROTTLE_VAL	16
536#define	SD_TST_NOTREADY_VAL	12
537#define	SD_TST_BUSY_VAL		60
538#define	SD_TST_RST_RETRY_VAL	36
539#define	SD_TST_RSV_REL_TIME	60
540
541static sd_tunables tst_properties = {
542	SD_TST_THROTTLE_VAL,
543	SD_TST_CTYPE_VAL,
544	SD_TST_NOTREADY_VAL,
545	SD_TST_BUSY_VAL,
546	SD_TST_RST_RETRY_VAL,
547	SD_TST_RSV_REL_TIME,
548	0,
549	0,
550	0
551};
552#endif
553
554/* This is similar to the ANSI toupper implementation */
555#define	SD_TOUPPER(C)	(((C) >= 'a' && (C) <= 'z') ? (C) - 'a' + 'A' : (C))
556
557/*
558 * Static Driver Configuration Table
559 *
560 * This is the table of disks which need throttle adjustment (or, perhaps
561 * something else as defined by the flags at a future time.)  device_id
562 * is a string consisting of concatenated vid (vendor), pid (product/model)
563 * and revision strings as defined in the scsi_inquiry structure.  Offsets of
564 * the parts of the string are as defined by the sizes in the scsi_inquiry
565 * structure.  Device type is searched as far as the device_id string is
566 * defined.  Flags defines which values are to be set in the driver from the
567 * properties list.
568 *
569 * Entries below which begin and end with a "*" are a special case.
570 * These do not have a specific vendor, and the string which follows
571 * can appear anywhere in the 16 byte PID portion of the inquiry data.
572 *
573 * Entries below which begin and end with a " " (blank) are a special
574 * case. The comparison function will treat multiple consecutive blanks
575 * as equivalent to a single blank. For example, this causes a
576 * sd_disk_table entry of " NEC CDROM " to match a device's id string
577 * of  "NEC       CDROM".
578 *
579 * Note: The MD21 controller type has been obsoleted.
580 *	 ST318202F is a Legacy device
581 *	 MAM3182FC, MAM3364FC, MAM3738FC do not appear to have ever been
582 *	 made with an FC connection. The entries here are a legacy.
583 */
584static sd_disk_config_t sd_disk_table[] = {
585#if defined(__fibre) || defined(__i386) || defined(__amd64)
586	{ "SEAGATE ST34371FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
587	{ "SEAGATE ST19171FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
588	{ "SEAGATE ST39102FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
589	{ "SEAGATE ST39103FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
590	{ "SEAGATE ST118273F", SD_CONF_BSET_THROTTLE, &seagate_properties },
591	{ "SEAGATE ST318202F", SD_CONF_BSET_THROTTLE, &seagate_properties },
592	{ "SEAGATE ST318203F", SD_CONF_BSET_THROTTLE, &seagate_properties },
593	{ "SEAGATE ST136403F", SD_CONF_BSET_THROTTLE, &seagate_properties },
594	{ "SEAGATE ST318304F", SD_CONF_BSET_THROTTLE, &seagate_properties },
595	{ "SEAGATE ST336704F", SD_CONF_BSET_THROTTLE, &seagate_properties },
596	{ "SEAGATE ST373405F", SD_CONF_BSET_THROTTLE, &seagate_properties },
597	{ "SEAGATE ST336605F", SD_CONF_BSET_THROTTLE, &seagate_properties },
598	{ "SEAGATE ST336752F", SD_CONF_BSET_THROTTLE, &seagate_properties },
599	{ "SEAGATE ST318452F", SD_CONF_BSET_THROTTLE, &seagate_properties },
600	{ "FUJITSU MAG3091F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
601	{ "FUJITSU MAG3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
602	{ "FUJITSU MAA3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
603	{ "FUJITSU MAF3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
604	{ "FUJITSU MAL3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
605	{ "FUJITSU MAL3738F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
606	{ "FUJITSU MAM3182FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
607	{ "FUJITSU MAM3364FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
608	{ "FUJITSU MAM3738FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
609	{ "IBM     DDYFT1835",  SD_CONF_BSET_THROTTLE, &ibm_properties },
610	{ "IBM     DDYFT3695",  SD_CONF_BSET_THROTTLE, &ibm_properties },
611	{ "IBM     IC35LF2D2",  SD_CONF_BSET_THROTTLE, &ibm_properties },
612	{ "IBM     IC35LF2PR",  SD_CONF_BSET_THROTTLE, &ibm_properties },
613	{ "IBM     1724-100",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
614	{ "IBM     1726-2xx",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
615	{ "IBM     1726-22x",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
616	{ "IBM     1726-4xx",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
617	{ "IBM     1726-42x",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
618	{ "IBM     1726-3xx",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
619	{ "IBM     3526",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
620	{ "IBM     3542",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
621	{ "IBM     3552",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
622	{ "IBM     1722",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
623	{ "IBM     1742",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
624	{ "IBM     1815",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
625	{ "IBM     FAStT",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
626	{ "IBM     1814",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
627	{ "IBM     1814-200",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
628	{ "IBM     1818",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
629	{ "DELL    MD3000",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
630	{ "DELL    MD3000i",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
631	{ "LSI     INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
632	{ "ENGENIO INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
633	{ "SGI     TP",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
634	{ "SGI     IS",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
635	{ "*CSM100_*",		SD_CONF_BSET_NRR_COUNT |
636			SD_CONF_BSET_CACHE_IS_NV, &lsi_oem_properties },
637	{ "*CSM200_*",		SD_CONF_BSET_NRR_COUNT |
638			SD_CONF_BSET_CACHE_IS_NV, &lsi_oem_properties },
639	{ "Fujitsu SX300",	SD_CONF_BSET_THROTTLE,  &lsi_oem_properties },
640	{ "LSI",		SD_CONF_BSET_NRR_COUNT, &lsi_properties },
641	{ "SUN     T3", SD_CONF_BSET_THROTTLE |
642			SD_CONF_BSET_BSY_RETRY_COUNT|
643			SD_CONF_BSET_RST_RETRIES|
644			SD_CONF_BSET_RSV_REL_TIME,
645		&purple_properties },
646	{ "SUN     SESS01", SD_CONF_BSET_THROTTLE |
647		SD_CONF_BSET_BSY_RETRY_COUNT|
648		SD_CONF_BSET_RST_RETRIES|
649		SD_CONF_BSET_RSV_REL_TIME|
650		SD_CONF_BSET_MIN_THROTTLE|
651		SD_CONF_BSET_DISKSORT_DISABLED,
652		&sve_properties },
653	{ "SUN     T4", SD_CONF_BSET_THROTTLE |
654			SD_CONF_BSET_BSY_RETRY_COUNT|
655			SD_CONF_BSET_RST_RETRIES|
656			SD_CONF_BSET_RSV_REL_TIME,
657		&purple_properties },
658	{ "SUN     SVE01", SD_CONF_BSET_DISKSORT_DISABLED |
659		SD_CONF_BSET_LUN_RESET_ENABLED,
660		&maserati_properties },
661	{ "SUN     SE6920", SD_CONF_BSET_THROTTLE |
662		SD_CONF_BSET_NRR_COUNT|
663		SD_CONF_BSET_BSY_RETRY_COUNT|
664		SD_CONF_BSET_RST_RETRIES|
665		SD_CONF_BSET_MIN_THROTTLE|
666		SD_CONF_BSET_DISKSORT_DISABLED|
667		SD_CONF_BSET_LUN_RESET_ENABLED,
668		&pirus_properties },
669	{ "SUN     SE6940", SD_CONF_BSET_THROTTLE |
670		SD_CONF_BSET_NRR_COUNT|
671		SD_CONF_BSET_BSY_RETRY_COUNT|
672		SD_CONF_BSET_RST_RETRIES|
673		SD_CONF_BSET_MIN_THROTTLE|
674		SD_CONF_BSET_DISKSORT_DISABLED|
675		SD_CONF_BSET_LUN_RESET_ENABLED,
676		&pirus_properties },
677	{ "SUN     StorageTek 6920", SD_CONF_BSET_THROTTLE |
678		SD_CONF_BSET_NRR_COUNT|
679		SD_CONF_BSET_BSY_RETRY_COUNT|
680		SD_CONF_BSET_RST_RETRIES|
681		SD_CONF_BSET_MIN_THROTTLE|
682		SD_CONF_BSET_DISKSORT_DISABLED|
683		SD_CONF_BSET_LUN_RESET_ENABLED,
684		&pirus_properties },
685	{ "SUN     StorageTek 6940", SD_CONF_BSET_THROTTLE |
686		SD_CONF_BSET_NRR_COUNT|
687		SD_CONF_BSET_BSY_RETRY_COUNT|
688		SD_CONF_BSET_RST_RETRIES|
689		SD_CONF_BSET_MIN_THROTTLE|
690		SD_CONF_BSET_DISKSORT_DISABLED|
691		SD_CONF_BSET_LUN_RESET_ENABLED,
692		&pirus_properties },
693	{ "SUN     PSX1000", SD_CONF_BSET_THROTTLE |
694		SD_CONF_BSET_NRR_COUNT|
695		SD_CONF_BSET_BSY_RETRY_COUNT|
696		SD_CONF_BSET_RST_RETRIES|
697		SD_CONF_BSET_MIN_THROTTLE|
698		SD_CONF_BSET_DISKSORT_DISABLED|
699		SD_CONF_BSET_LUN_RESET_ENABLED,
700		&pirus_properties },
701	{ "SUN     SE6330", SD_CONF_BSET_THROTTLE |
702		SD_CONF_BSET_NRR_COUNT|
703		SD_CONF_BSET_BSY_RETRY_COUNT|
704		SD_CONF_BSET_RST_RETRIES|
705		SD_CONF_BSET_MIN_THROTTLE|
706		SD_CONF_BSET_DISKSORT_DISABLED|
707		SD_CONF_BSET_LUN_RESET_ENABLED,
708		&pirus_properties },
709	{ "SUN     STK6580_6780", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
710	{ "STK     OPENstorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
711	{ "STK     OpenStorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
712	{ "STK     BladeCtlr",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
713	{ "STK     FLEXLINE",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
714	{ "SYMBIOS", SD_CONF_BSET_NRR_COUNT, &symbios_properties },
715#endif /* fibre or NON-sparc platforms */
716#if ((defined(__sparc) && !defined(__fibre)) ||\
717	(defined(__i386) || defined(__amd64)))
718	{ "SEAGATE ST42400N", SD_CONF_BSET_THROTTLE, &elite_properties },
719	{ "SEAGATE ST31200N", SD_CONF_BSET_THROTTLE, &st31200n_properties },
720	{ "SEAGATE ST41600N", SD_CONF_BSET_TUR_CHECK, NULL },
721	{ "CONNER  CP30540",  SD_CONF_BSET_NOCACHE,  NULL },
722	{ "*SUN0104*", SD_CONF_BSET_FAB_DEVID, NULL },
723	{ "*SUN0207*", SD_CONF_BSET_FAB_DEVID, NULL },
724	{ "*SUN0327*", SD_CONF_BSET_FAB_DEVID, NULL },
725	{ "*SUN0340*", SD_CONF_BSET_FAB_DEVID, NULL },
726	{ "*SUN0424*", SD_CONF_BSET_FAB_DEVID, NULL },
727	{ "*SUN0669*", SD_CONF_BSET_FAB_DEVID, NULL },
728	{ "*SUN1.0G*", SD_CONF_BSET_FAB_DEVID, NULL },
729	{ "SYMBIOS INF-01-00       ", SD_CONF_BSET_FAB_DEVID, NULL },
730	{ "SYMBIOS", SD_CONF_BSET_THROTTLE|SD_CONF_BSET_NRR_COUNT,
731	    &symbios_properties },
732	{ "LSI", SD_CONF_BSET_THROTTLE | SD_CONF_BSET_NRR_COUNT,
733	    &lsi_properties_scsi },
734#if defined(__i386) || defined(__amd64)
735	{ " NEC CD-ROM DRIVE:260 ", (SD_CONF_BSET_PLAYMSF_BCD
736				    | SD_CONF_BSET_READSUB_BCD
737				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
738				    | SD_CONF_BSET_NO_READ_HEADER
739				    | SD_CONF_BSET_READ_CD_XD4), NULL },
740
741	{ " NEC CD-ROM DRIVE:270 ", (SD_CONF_BSET_PLAYMSF_BCD
742				    | SD_CONF_BSET_READSUB_BCD
743				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
744				    | SD_CONF_BSET_NO_READ_HEADER
745				    | SD_CONF_BSET_READ_CD_XD4), NULL },
746#endif /* __i386 || __amd64 */
747#endif /* sparc NON-fibre or NON-sparc platforms */
748
749#if (defined(SD_PROP_TST))
750	{ "VENDOR  PRODUCT ", (SD_CONF_BSET_THROTTLE
751				| SD_CONF_BSET_CTYPE
752				| SD_CONF_BSET_NRR_COUNT
753				| SD_CONF_BSET_FAB_DEVID
754				| SD_CONF_BSET_NOCACHE
755				| SD_CONF_BSET_BSY_RETRY_COUNT
756				| SD_CONF_BSET_PLAYMSF_BCD
757				| SD_CONF_BSET_READSUB_BCD
758				| SD_CONF_BSET_READ_TOC_TRK_BCD
759				| SD_CONF_BSET_READ_TOC_ADDR_BCD
760				| SD_CONF_BSET_NO_READ_HEADER
761				| SD_CONF_BSET_READ_CD_XD4
762				| SD_CONF_BSET_RST_RETRIES
763				| SD_CONF_BSET_RSV_REL_TIME
764				| SD_CONF_BSET_TUR_CHECK), &tst_properties},
765#endif
766};
767
768static const int sd_disk_table_size =
769	sizeof (sd_disk_table)/ sizeof (sd_disk_config_t);
770
771
772
773#define	SD_INTERCONNECT_PARALLEL	0
774#define	SD_INTERCONNECT_FABRIC		1
775#define	SD_INTERCONNECT_FIBRE		2
776#define	SD_INTERCONNECT_SSA		3
777#define	SD_INTERCONNECT_SATA		4
778#define	SD_IS_PARALLEL_SCSI(un)		\
779	((un)->un_interconnect_type == SD_INTERCONNECT_PARALLEL)
780#define	SD_IS_SERIAL(un)		\
781	((un)->un_interconnect_type == SD_INTERCONNECT_SATA)
782
783/*
784 * Definitions used by device id registration routines
785 */
786#define	VPD_HEAD_OFFSET		3	/* size of head for vpd page */
787#define	VPD_PAGE_LENGTH		3	/* offset for pge length data */
788#define	VPD_MODE_PAGE		1	/* offset into vpd pg for "page code" */
789
790static kmutex_t sd_sense_mutex = {0};
791
792/*
793 * Macros for updates of the driver state
794 */
795#define	New_state(un, s)        \
796	(un)->un_last_state = (un)->un_state, (un)->un_state = (s)
797#define	Restore_state(un)	\
798	{ uchar_t tmp = (un)->un_last_state; New_state((un), tmp); }
799
800static struct sd_cdbinfo sd_cdbtab[] = {
801	{ CDB_GROUP0, 0x00,	   0x1FFFFF,   0xFF,	    },
802	{ CDB_GROUP1, SCMD_GROUP1, 0xFFFFFFFF, 0xFFFF,	    },
803	{ CDB_GROUP5, SCMD_GROUP5, 0xFFFFFFFF, 0xFFFFFFFF,  },
804	{ CDB_GROUP4, SCMD_GROUP4, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFF, },
805};
806
807/*
808 * Specifies the number of seconds that must have elapsed since the last
809 * cmd. has completed for a device to be declared idle to the PM framework.
810 */
811static int sd_pm_idletime = 1;
812
813/*
814 * Internal function prototypes
815 */
816
817#if (defined(__fibre))
818/*
819 * These #defines are to avoid namespace collisions that occur because this
820 * code is currently used to compile two separate driver modules: sd and ssd.
821 * All function names need to be treated this way (even if declared static)
822 * in order to allow the debugger to resolve the names properly.
823 * It is anticipated that in the near future the ssd module will be obsoleted,
824 * at which time this ugliness should go away.
825 */
826#define	sd_log_trace			ssd_log_trace
827#define	sd_log_info			ssd_log_info
828#define	sd_log_err			ssd_log_err
829#define	sdprobe				ssdprobe
830#define	sdinfo				ssdinfo
831#define	sd_prop_op			ssd_prop_op
832#define	sd_scsi_probe_cache_init	ssd_scsi_probe_cache_init
833#define	sd_scsi_probe_cache_fini	ssd_scsi_probe_cache_fini
834#define	sd_scsi_clear_probe_cache	ssd_scsi_clear_probe_cache
835#define	sd_scsi_probe_with_cache	ssd_scsi_probe_with_cache
836#define	sd_scsi_target_lun_init		ssd_scsi_target_lun_init
837#define	sd_scsi_target_lun_fini		ssd_scsi_target_lun_fini
838#define	sd_scsi_get_target_lun_count	ssd_scsi_get_target_lun_count
839#define	sd_scsi_update_lun_on_target	ssd_scsi_update_lun_on_target
840#define	sd_spin_up_unit			ssd_spin_up_unit
841#define	sd_enable_descr_sense		ssd_enable_descr_sense
842#define	sd_reenable_dsense_task		ssd_reenable_dsense_task
843#define	sd_set_mmc_caps			ssd_set_mmc_caps
844#define	sd_read_unit_properties		ssd_read_unit_properties
845#define	sd_process_sdconf_file		ssd_process_sdconf_file
846#define	sd_process_sdconf_table		ssd_process_sdconf_table
847#define	sd_sdconf_id_match		ssd_sdconf_id_match
848#define	sd_blank_cmp			ssd_blank_cmp
849#define	sd_chk_vers1_data		ssd_chk_vers1_data
850#define	sd_set_vers1_properties		ssd_set_vers1_properties
851
852#define	sd_get_physical_geometry	ssd_get_physical_geometry
853#define	sd_get_virtual_geometry		ssd_get_virtual_geometry
854#define	sd_update_block_info		ssd_update_block_info
855#define	sd_register_devid		ssd_register_devid
856#define	sd_get_devid			ssd_get_devid
857#define	sd_create_devid			ssd_create_devid
858#define	sd_write_deviceid		ssd_write_deviceid
859#define	sd_check_vpd_page_support	ssd_check_vpd_page_support
860#define	sd_setup_pm			ssd_setup_pm
861#define	sd_create_pm_components		ssd_create_pm_components
862#define	sd_ddi_suspend			ssd_ddi_suspend
863#define	sd_ddi_pm_suspend		ssd_ddi_pm_suspend
864#define	sd_ddi_resume			ssd_ddi_resume
865#define	sd_ddi_pm_resume		ssd_ddi_pm_resume
866#define	sdpower				ssdpower
867#define	sdattach			ssdattach
868#define	sddetach			ssddetach
869#define	sd_unit_attach			ssd_unit_attach
870#define	sd_unit_detach			ssd_unit_detach
871#define	sd_set_unit_attributes		ssd_set_unit_attributes
872#define	sd_create_errstats		ssd_create_errstats
873#define	sd_set_errstats			ssd_set_errstats
874#define	sd_set_pstats			ssd_set_pstats
875#define	sddump				ssddump
876#define	sd_scsi_poll			ssd_scsi_poll
877#define	sd_send_polled_RQS		ssd_send_polled_RQS
878#define	sd_ddi_scsi_poll		ssd_ddi_scsi_poll
879#define	sd_init_event_callbacks		ssd_init_event_callbacks
880#define	sd_event_callback		ssd_event_callback
881#define	sd_cache_control		ssd_cache_control
882#define	sd_get_write_cache_enabled	ssd_get_write_cache_enabled
883#define	sd_get_nv_sup			ssd_get_nv_sup
884#define	sd_make_device			ssd_make_device
885#define	sdopen				ssdopen
886#define	sdclose				ssdclose
887#define	sd_ready_and_valid		ssd_ready_and_valid
888#define	sdmin				ssdmin
889#define	sdread				ssdread
890#define	sdwrite				ssdwrite
891#define	sdaread				ssdaread
892#define	sdawrite			ssdawrite
893#define	sdstrategy			ssdstrategy
894#define	sdioctl				ssdioctl
895#define	sd_mapblockaddr_iostart		ssd_mapblockaddr_iostart
896#define	sd_mapblocksize_iostart		ssd_mapblocksize_iostart
897#define	sd_checksum_iostart		ssd_checksum_iostart
898#define	sd_checksum_uscsi_iostart	ssd_checksum_uscsi_iostart
899#define	sd_pm_iostart			ssd_pm_iostart
900#define	sd_core_iostart			ssd_core_iostart
901#define	sd_mapblockaddr_iodone		ssd_mapblockaddr_iodone
902#define	sd_mapblocksize_iodone		ssd_mapblocksize_iodone
903#define	sd_checksum_iodone		ssd_checksum_iodone
904#define	sd_checksum_uscsi_iodone	ssd_checksum_uscsi_iodone
905#define	sd_pm_iodone			ssd_pm_iodone
906#define	sd_initpkt_for_buf		ssd_initpkt_for_buf
907#define	sd_destroypkt_for_buf		ssd_destroypkt_for_buf
908#define	sd_setup_rw_pkt			ssd_setup_rw_pkt
909#define	sd_setup_next_rw_pkt		ssd_setup_next_rw_pkt
910#define	sd_buf_iodone			ssd_buf_iodone
911#define	sd_uscsi_strategy		ssd_uscsi_strategy
912#define	sd_initpkt_for_uscsi		ssd_initpkt_for_uscsi
913#define	sd_destroypkt_for_uscsi		ssd_destroypkt_for_uscsi
914#define	sd_uscsi_iodone			ssd_uscsi_iodone
915#define	sd_xbuf_strategy		ssd_xbuf_strategy
916#define	sd_xbuf_init			ssd_xbuf_init
917#define	sd_pm_entry			ssd_pm_entry
918#define	sd_pm_exit			ssd_pm_exit
919
920#define	sd_pm_idletimeout_handler	ssd_pm_idletimeout_handler
921#define	sd_pm_timeout_handler		ssd_pm_timeout_handler
922
923#define	sd_add_buf_to_waitq		ssd_add_buf_to_waitq
924#define	sdintr				ssdintr
925#define	sd_start_cmds			ssd_start_cmds
926#define	sd_send_scsi_cmd		ssd_send_scsi_cmd
927#define	sd_bioclone_alloc		ssd_bioclone_alloc
928#define	sd_bioclone_free		ssd_bioclone_free
929#define	sd_shadow_buf_alloc		ssd_shadow_buf_alloc
930#define	sd_shadow_buf_free		ssd_shadow_buf_free
931#define	sd_print_transport_rejected_message	\
932					ssd_print_transport_rejected_message
933#define	sd_retry_command		ssd_retry_command
934#define	sd_set_retry_bp			ssd_set_retry_bp
935#define	sd_send_request_sense_command	ssd_send_request_sense_command
936#define	sd_start_retry_command		ssd_start_retry_command
937#define	sd_start_direct_priority_command	\
938					ssd_start_direct_priority_command
939#define	sd_return_failed_command	ssd_return_failed_command
940#define	sd_return_failed_command_no_restart	\
941					ssd_return_failed_command_no_restart
942#define	sd_return_command		ssd_return_command
943#define	sd_sync_with_callback		ssd_sync_with_callback
944#define	sdrunout			ssdrunout
945#define	sd_mark_rqs_busy		ssd_mark_rqs_busy
946#define	sd_mark_rqs_idle		ssd_mark_rqs_idle
947#define	sd_reduce_throttle		ssd_reduce_throttle
948#define	sd_restore_throttle		ssd_restore_throttle
949#define	sd_print_incomplete_msg		ssd_print_incomplete_msg
950#define	sd_init_cdb_limits		ssd_init_cdb_limits
951#define	sd_pkt_status_good		ssd_pkt_status_good
952#define	sd_pkt_status_check_condition	ssd_pkt_status_check_condition
953#define	sd_pkt_status_busy		ssd_pkt_status_busy
954#define	sd_pkt_status_reservation_conflict	\
955					ssd_pkt_status_reservation_conflict
956#define	sd_pkt_status_qfull		ssd_pkt_status_qfull
957#define	sd_handle_request_sense		ssd_handle_request_sense
958#define	sd_handle_auto_request_sense	ssd_handle_auto_request_sense
959#define	sd_print_sense_failed_msg	ssd_print_sense_failed_msg
960#define	sd_validate_sense_data		ssd_validate_sense_data
961#define	sd_decode_sense			ssd_decode_sense
962#define	sd_print_sense_msg		ssd_print_sense_msg
963#define	sd_sense_key_no_sense		ssd_sense_key_no_sense
964#define	sd_sense_key_recoverable_error	ssd_sense_key_recoverable_error
965#define	sd_sense_key_not_ready		ssd_sense_key_not_ready
966#define	sd_sense_key_medium_or_hardware_error	\
967					ssd_sense_key_medium_or_hardware_error
968#define	sd_sense_key_illegal_request	ssd_sense_key_illegal_request
969#define	sd_sense_key_unit_attention	ssd_sense_key_unit_attention
970#define	sd_sense_key_fail_command	ssd_sense_key_fail_command
971#define	sd_sense_key_blank_check	ssd_sense_key_blank_check
972#define	sd_sense_key_aborted_command	ssd_sense_key_aborted_command
973#define	sd_sense_key_default		ssd_sense_key_default
974#define	sd_print_retry_msg		ssd_print_retry_msg
975#define	sd_print_cmd_incomplete_msg	ssd_print_cmd_incomplete_msg
976#define	sd_pkt_reason_cmd_incomplete	ssd_pkt_reason_cmd_incomplete
977#define	sd_pkt_reason_cmd_tran_err	ssd_pkt_reason_cmd_tran_err
978#define	sd_pkt_reason_cmd_reset		ssd_pkt_reason_cmd_reset
979#define	sd_pkt_reason_cmd_aborted	ssd_pkt_reason_cmd_aborted
980#define	sd_pkt_reason_cmd_timeout	ssd_pkt_reason_cmd_timeout
981#define	sd_pkt_reason_cmd_unx_bus_free	ssd_pkt_reason_cmd_unx_bus_free
982#define	sd_pkt_reason_cmd_tag_reject	ssd_pkt_reason_cmd_tag_reject
983#define	sd_pkt_reason_default		ssd_pkt_reason_default
984#define	sd_reset_target			ssd_reset_target
985#define	sd_start_stop_unit_callback	ssd_start_stop_unit_callback
986#define	sd_start_stop_unit_task		ssd_start_stop_unit_task
987#define	sd_taskq_create			ssd_taskq_create
988#define	sd_taskq_delete			ssd_taskq_delete
989#define	sd_target_change_task		ssd_target_change_task
990#define	sd_log_lun_expansion_event	ssd_log_lun_expansion_event
991#define	sd_media_change_task		ssd_media_change_task
992#define	sd_handle_mchange		ssd_handle_mchange
993#define	sd_send_scsi_DOORLOCK		ssd_send_scsi_DOORLOCK
994#define	sd_send_scsi_READ_CAPACITY	ssd_send_scsi_READ_CAPACITY
995#define	sd_send_scsi_READ_CAPACITY_16	ssd_send_scsi_READ_CAPACITY_16
996#define	sd_send_scsi_GET_CONFIGURATION	ssd_send_scsi_GET_CONFIGURATION
997#define	sd_send_scsi_feature_GET_CONFIGURATION	\
998					sd_send_scsi_feature_GET_CONFIGURATION
999#define	sd_send_scsi_START_STOP_UNIT	ssd_send_scsi_START_STOP_UNIT
1000#define	sd_send_scsi_INQUIRY		ssd_send_scsi_INQUIRY
1001#define	sd_send_scsi_TEST_UNIT_READY	ssd_send_scsi_TEST_UNIT_READY
1002#define	sd_send_scsi_PERSISTENT_RESERVE_IN	\
1003					ssd_send_scsi_PERSISTENT_RESERVE_IN
1004#define	sd_send_scsi_PERSISTENT_RESERVE_OUT	\
1005					ssd_send_scsi_PERSISTENT_RESERVE_OUT
1006#define	sd_send_scsi_SYNCHRONIZE_CACHE	ssd_send_scsi_SYNCHRONIZE_CACHE
1007#define	sd_send_scsi_SYNCHRONIZE_CACHE_biodone	\
1008					ssd_send_scsi_SYNCHRONIZE_CACHE_biodone
1009#define	sd_send_scsi_MODE_SENSE		ssd_send_scsi_MODE_SENSE
1010#define	sd_send_scsi_MODE_SELECT	ssd_send_scsi_MODE_SELECT
1011#define	sd_send_scsi_RDWR		ssd_send_scsi_RDWR
1012#define	sd_send_scsi_LOG_SENSE		ssd_send_scsi_LOG_SENSE
1013#define	sd_alloc_rqs			ssd_alloc_rqs
1014#define	sd_free_rqs			ssd_free_rqs
1015#define	sd_dump_memory			ssd_dump_memory
1016#define	sd_get_media_info		ssd_get_media_info
1017#define	sd_dkio_ctrl_info		ssd_dkio_ctrl_info
1018#define	sd_nvpair_str_decode		ssd_nvpair_str_decode
1019#define	sd_strtok_r			ssd_strtok_r
1020#define	sd_set_properties		ssd_set_properties
1021#define	sd_get_tunables_from_conf	ssd_get_tunables_from_conf
1022#define	sd_setup_next_xfer		ssd_setup_next_xfer
1023#define	sd_dkio_get_temp		ssd_dkio_get_temp
1024#define	sd_check_mhd			ssd_check_mhd
1025#define	sd_mhd_watch_cb			ssd_mhd_watch_cb
1026#define	sd_mhd_watch_incomplete		ssd_mhd_watch_incomplete
1027#define	sd_sname			ssd_sname
1028#define	sd_mhd_resvd_recover		ssd_mhd_resvd_recover
1029#define	sd_resv_reclaim_thread		ssd_resv_reclaim_thread
1030#define	sd_take_ownership		ssd_take_ownership
1031#define	sd_reserve_release		ssd_reserve_release
1032#define	sd_rmv_resv_reclaim_req		ssd_rmv_resv_reclaim_req
1033#define	sd_mhd_reset_notify_cb		ssd_mhd_reset_notify_cb
1034#define	sd_persistent_reservation_in_read_keys	\
1035					ssd_persistent_reservation_in_read_keys
1036#define	sd_persistent_reservation_in_read_resv	\
1037					ssd_persistent_reservation_in_read_resv
1038#define	sd_mhdioc_takeown		ssd_mhdioc_takeown
1039#define	sd_mhdioc_failfast		ssd_mhdioc_failfast
1040#define	sd_mhdioc_release		ssd_mhdioc_release
1041#define	sd_mhdioc_register_devid	ssd_mhdioc_register_devid
1042#define	sd_mhdioc_inkeys		ssd_mhdioc_inkeys
1043#define	sd_mhdioc_inresv		ssd_mhdioc_inresv
1044#define	sr_change_blkmode		ssr_change_blkmode
1045#define	sr_change_speed			ssr_change_speed
1046#define	sr_atapi_change_speed		ssr_atapi_change_speed
1047#define	sr_pause_resume			ssr_pause_resume
1048#define	sr_play_msf			ssr_play_msf
1049#define	sr_play_trkind			ssr_play_trkind
1050#define	sr_read_all_subcodes		ssr_read_all_subcodes
1051#define	sr_read_subchannel		ssr_read_subchannel
1052#define	sr_read_tocentry		ssr_read_tocentry
1053#define	sr_read_tochdr			ssr_read_tochdr
1054#define	sr_read_cdda			ssr_read_cdda
1055#define	sr_read_cdxa			ssr_read_cdxa
1056#define	sr_read_mode1			ssr_read_mode1
1057#define	sr_read_mode2			ssr_read_mode2
1058#define	sr_read_cd_mode2		ssr_read_cd_mode2
1059#define	sr_sector_mode			ssr_sector_mode
1060#define	sr_eject			ssr_eject
1061#define	sr_ejected			ssr_ejected
1062#define	sr_check_wp			ssr_check_wp
1063#define	sd_check_media			ssd_check_media
1064#define	sd_media_watch_cb		ssd_media_watch_cb
1065#define	sd_delayed_cv_broadcast		ssd_delayed_cv_broadcast
1066#define	sr_volume_ctrl			ssr_volume_ctrl
1067#define	sr_read_sony_session_offset	ssr_read_sony_session_offset
1068#define	sd_log_page_supported		ssd_log_page_supported
1069#define	sd_check_for_writable_cd	ssd_check_for_writable_cd
1070#define	sd_wm_cache_constructor		ssd_wm_cache_constructor
1071#define	sd_wm_cache_destructor		ssd_wm_cache_destructor
1072#define	sd_range_lock			ssd_range_lock
1073#define	sd_get_range			ssd_get_range
1074#define	sd_free_inlist_wmap		ssd_free_inlist_wmap
1075#define	sd_range_unlock			ssd_range_unlock
1076#define	sd_read_modify_write_task	ssd_read_modify_write_task
1077#define	sddump_do_read_of_rmw		ssddump_do_read_of_rmw
1078
1079#define	sd_iostart_chain		ssd_iostart_chain
1080#define	sd_iodone_chain			ssd_iodone_chain
1081#define	sd_initpkt_map			ssd_initpkt_map
1082#define	sd_destroypkt_map		ssd_destroypkt_map
1083#define	sd_chain_type_map		ssd_chain_type_map
1084#define	sd_chain_index_map		ssd_chain_index_map
1085
1086#define	sd_failfast_flushctl		ssd_failfast_flushctl
1087#define	sd_failfast_flushq		ssd_failfast_flushq
1088#define	sd_failfast_flushq_callback	ssd_failfast_flushq_callback
1089
1090#define	sd_is_lsi			ssd_is_lsi
1091#define	sd_tg_rdwr			ssd_tg_rdwr
1092#define	sd_tg_getinfo			ssd_tg_getinfo
1093
1094#endif	/* #if (defined(__fibre)) */
1095
1096
1097int _init(void);
1098int _fini(void);
1099int _info(struct modinfo *modinfop);
1100
1101/*PRINTFLIKE3*/
1102static void sd_log_trace(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1103/*PRINTFLIKE3*/
1104static void sd_log_info(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1105/*PRINTFLIKE3*/
1106static void sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1107
1108static int sdprobe(dev_info_t *devi);
1109static int sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg,
1110    void **result);
1111static int sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op,
1112    int mod_flags, char *name, caddr_t valuep, int *lengthp);
1113
1114/*
1115 * Smart probe for parallel scsi
1116 */
1117static void sd_scsi_probe_cache_init(void);
1118static void sd_scsi_probe_cache_fini(void);
1119static void sd_scsi_clear_probe_cache(void);
1120static int  sd_scsi_probe_with_cache(struct scsi_device *devp, int (*fn)());
1121
1122/*
1123 * Attached luns on target for parallel scsi
1124 */
1125static void sd_scsi_target_lun_init(void);
1126static void sd_scsi_target_lun_fini(void);
1127static int  sd_scsi_get_target_lun_count(dev_info_t *dip, int target);
1128static void sd_scsi_update_lun_on_target(dev_info_t *dip, int target, int flag);
1129
1130static int	sd_spin_up_unit(sd_ssc_t *ssc);
1131
1132/*
1133 * Using sd_ssc_init to establish sd_ssc_t struct
1134 * Using sd_ssc_send to send uscsi internal command
1135 * Using sd_ssc_fini to free sd_ssc_t struct
1136 */
1137static sd_ssc_t *sd_ssc_init(struct sd_lun *un);
1138static int sd_ssc_send(sd_ssc_t *ssc, struct uscsi_cmd *incmd,
1139    int flag, enum uio_seg dataspace, int path_flag);
1140static void sd_ssc_fini(sd_ssc_t *ssc);
1141
1142/*
1143 * Using sd_ssc_assessment to set correct type-of-assessment
1144 * Using sd_ssc_post to post ereport & system log
1145 *       sd_ssc_post will call sd_ssc_print to print system log
1146 *       sd_ssc_post will call sd_ssd_ereport_post to post ereport
1147 */
1148static void sd_ssc_assessment(sd_ssc_t *ssc,
1149    enum sd_type_assessment tp_assess);
1150
1151static void sd_ssc_post(sd_ssc_t *ssc, enum sd_driver_assessment sd_assess);
1152static void sd_ssc_print(sd_ssc_t *ssc, int sd_severity);
1153static void sd_ssc_ereport_post(sd_ssc_t *ssc,
1154    enum sd_driver_assessment drv_assess);
1155
1156/*
1157 * Using sd_ssc_set_info to mark an un-decodable-data error.
1158 * Using sd_ssc_extract_info to transfer information from internal
1159 *       data structures to sd_ssc_t.
1160 */
1161static void sd_ssc_set_info(sd_ssc_t *ssc, int ssc_flags, uint_t comp,
1162    const char *fmt, ...);
1163static void sd_ssc_extract_info(sd_ssc_t *ssc, struct sd_lun *un,
1164    struct scsi_pkt *pktp, struct buf *bp, struct sd_xbuf *xp);
1165
1166static int sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd, int flag,
1167    enum uio_seg dataspace, int path_flag);
1168
1169#ifdef _LP64
1170static void	sd_enable_descr_sense(sd_ssc_t *ssc);
1171static void	sd_reenable_dsense_task(void *arg);
1172#endif /* _LP64 */
1173
1174static void	sd_set_mmc_caps(sd_ssc_t *ssc);
1175
1176static void sd_read_unit_properties(struct sd_lun *un);
1177static int  sd_process_sdconf_file(struct sd_lun *un);
1178static void sd_nvpair_str_decode(struct sd_lun *un, char *nvpair_str);
1179static char *sd_strtok_r(char *string, const char *sepset, char **lasts);
1180static void sd_set_properties(struct sd_lun *un, char *name, char *value);
1181static void sd_get_tunables_from_conf(struct sd_lun *un, int flags,
1182    int *data_list, sd_tunables *values);
1183static void sd_process_sdconf_table(struct sd_lun *un);
1184static int  sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen);
1185static int  sd_blank_cmp(struct sd_lun *un, char *id, int idlen);
1186static int  sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
1187	int list_len, char *dataname_ptr);
1188static void sd_set_vers1_properties(struct sd_lun *un, int flags,
1189    sd_tunables *prop_list);
1190
1191static void sd_register_devid(sd_ssc_t *ssc, dev_info_t *devi,
1192    int reservation_flag);
1193static int  sd_get_devid(sd_ssc_t *ssc);
1194static ddi_devid_t sd_create_devid(sd_ssc_t *ssc);
1195static int  sd_write_deviceid(sd_ssc_t *ssc);
1196static int  sd_get_devid_page(struct sd_lun *un, uchar_t *wwn, int *len);
1197static int  sd_check_vpd_page_support(sd_ssc_t *ssc);
1198
1199static void sd_setup_pm(sd_ssc_t *ssc, dev_info_t *devi);
1200static void sd_create_pm_components(dev_info_t *devi, struct sd_lun *un);
1201
1202static int  sd_ddi_suspend(dev_info_t *devi);
1203static int  sd_ddi_pm_suspend(struct sd_lun *un);
1204static int  sd_ddi_resume(dev_info_t *devi);
1205static int  sd_ddi_pm_resume(struct sd_lun *un);
1206static int  sdpower(dev_info_t *devi, int component, int level);
1207
1208static int  sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd);
1209static int  sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd);
1210static int  sd_unit_attach(dev_info_t *devi);
1211static int  sd_unit_detach(dev_info_t *devi);
1212
1213static void sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi);
1214static void sd_create_errstats(struct sd_lun *un, int instance);
1215static void sd_set_errstats(struct sd_lun *un);
1216static void sd_set_pstats(struct sd_lun *un);
1217
1218static int  sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk);
1219static int  sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pkt);
1220static int  sd_send_polled_RQS(struct sd_lun *un);
1221static int  sd_ddi_scsi_poll(struct scsi_pkt *pkt);
1222
1223#if (defined(__fibre))
1224/*
1225 * Event callbacks (photon)
1226 */
1227static void sd_init_event_callbacks(struct sd_lun *un);
1228static void  sd_event_callback(dev_info_t *, ddi_eventcookie_t, void *, void *);
1229#endif
1230
1231/*
1232 * Defines for sd_cache_control
1233 */
1234
1235#define	SD_CACHE_ENABLE		1
1236#define	SD_CACHE_DISABLE	0
1237#define	SD_CACHE_NOCHANGE	-1
1238
1239static int   sd_cache_control(sd_ssc_t *ssc, int rcd_flag, int wce_flag);
1240static int   sd_get_write_cache_enabled(sd_ssc_t *ssc, int *is_enabled);
1241static void  sd_get_nv_sup(sd_ssc_t *ssc);
1242static dev_t sd_make_device(dev_info_t *devi);
1243
1244static void  sd_update_block_info(struct sd_lun *un, uint32_t lbasize,
1245	uint64_t capacity);
1246
1247/*
1248 * Driver entry point functions.
1249 */
1250static int  sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p);
1251static int  sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p);
1252static int  sd_ready_and_valid(sd_ssc_t *ssc, int part);
1253
1254static void sdmin(struct buf *bp);
1255static int sdread(dev_t dev, struct uio *uio, cred_t *cred_p);
1256static int sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p);
1257static int sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1258static int sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1259
1260static int sdstrategy(struct buf *bp);
1261static int sdioctl(dev_t, int, intptr_t, int, cred_t *, int *);
1262
1263/*
1264 * Function prototypes for layering functions in the iostart chain.
1265 */
1266static void sd_mapblockaddr_iostart(int index, struct sd_lun *un,
1267	struct buf *bp);
1268static void sd_mapblocksize_iostart(int index, struct sd_lun *un,
1269	struct buf *bp);
1270static void sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp);
1271static void sd_checksum_uscsi_iostart(int index, struct sd_lun *un,
1272	struct buf *bp);
1273static void sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp);
1274static void sd_core_iostart(int index, struct sd_lun *un, struct buf *bp);
1275
1276/*
1277 * Function prototypes for layering functions in the iodone chain.
1278 */
1279static void sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp);
1280static void sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp);
1281static void sd_mapblockaddr_iodone(int index, struct sd_lun *un,
1282	struct buf *bp);
1283static void sd_mapblocksize_iodone(int index, struct sd_lun *un,
1284	struct buf *bp);
1285static void sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp);
1286static void sd_checksum_uscsi_iodone(int index, struct sd_lun *un,
1287	struct buf *bp);
1288static void sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp);
1289
1290/*
1291 * Prototypes for functions to support buf(9S) based IO.
1292 */
1293static void sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg);
1294static int sd_initpkt_for_buf(struct buf *, struct scsi_pkt **);
1295static void sd_destroypkt_for_buf(struct buf *);
1296static int sd_setup_rw_pkt(struct sd_lun *un, struct scsi_pkt **pktpp,
1297	struct buf *bp, int flags,
1298	int (*callback)(caddr_t), caddr_t callback_arg,
1299	diskaddr_t lba, uint32_t blockcount);
1300static int sd_setup_next_rw_pkt(struct sd_lun *un, struct scsi_pkt *pktp,
1301	struct buf *bp, diskaddr_t lba, uint32_t blockcount);
1302
1303/*
1304 * Prototypes for functions to support USCSI IO.
1305 */
1306static int sd_uscsi_strategy(struct buf *bp);
1307static int sd_initpkt_for_uscsi(struct buf *, struct scsi_pkt **);
1308static void sd_destroypkt_for_uscsi(struct buf *);
1309
1310static void sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
1311	uchar_t chain_type, void *pktinfop);
1312
1313static int  sd_pm_entry(struct sd_lun *un);
1314static void sd_pm_exit(struct sd_lun *un);
1315
1316static void sd_pm_idletimeout_handler(void *arg);
1317
1318/*
1319 * sd_core internal functions (used at the sd_core_io layer).
1320 */
1321static void sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp);
1322static void sdintr(struct scsi_pkt *pktp);
1323static void sd_start_cmds(struct sd_lun *un, struct buf *immed_bp);
1324
1325static int sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd, int flag,
1326	enum uio_seg dataspace, int path_flag);
1327
1328static struct buf *sd_bioclone_alloc(struct buf *bp, size_t datalen,
1329	daddr_t blkno, int (*func)(struct buf *));
1330static struct buf *sd_shadow_buf_alloc(struct buf *bp, size_t datalen,
1331	uint_t bflags, daddr_t blkno, int (*func)(struct buf *));
1332static void sd_bioclone_free(struct buf *bp);
1333static void sd_shadow_buf_free(struct buf *bp);
1334
1335static void sd_print_transport_rejected_message(struct sd_lun *un,
1336	struct sd_xbuf *xp, int code);
1337static void sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp,
1338    void *arg, int code);
1339static void sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp,
1340    void *arg, int code);
1341static void sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp,
1342    void *arg, int code);
1343
1344static void sd_retry_command(struct sd_lun *un, struct buf *bp,
1345	int retry_check_flag,
1346	void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp,
1347		int c),
1348	void *user_arg, int failure_code,  clock_t retry_delay,
1349	void (*statp)(kstat_io_t *));
1350
1351static void sd_set_retry_bp(struct sd_lun *un, struct buf *bp,
1352	clock_t retry_delay, void (*statp)(kstat_io_t *));
1353
1354static void sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
1355	struct scsi_pkt *pktp);
1356static void sd_start_retry_command(void *arg);
1357static void sd_start_direct_priority_command(void *arg);
1358static void sd_return_failed_command(struct sd_lun *un, struct buf *bp,
1359	int errcode);
1360static void sd_return_failed_command_no_restart(struct sd_lun *un,
1361	struct buf *bp, int errcode);
1362static void sd_return_command(struct sd_lun *un, struct buf *bp);
1363static void sd_sync_with_callback(struct sd_lun *un);
1364static int sdrunout(caddr_t arg);
1365
1366static void sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp);
1367static struct buf *sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *xp);
1368
1369static void sd_reduce_throttle(struct sd_lun *un, int throttle_type);
1370static void sd_restore_throttle(void *arg);
1371
1372static void sd_init_cdb_limits(struct sd_lun *un);
1373
1374static void sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
1375	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1376
1377/*
1378 * Error handling functions
1379 */
1380static void sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
1381	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1382static void sd_pkt_status_busy(struct sd_lun *un, struct buf *bp,
1383	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1384static void sd_pkt_status_reservation_conflict(struct sd_lun *un,
1385	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1386static void sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
1387	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1388
1389static void sd_handle_request_sense(struct sd_lun *un, struct buf *bp,
1390	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1391static void sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
1392	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1393static int sd_validate_sense_data(struct sd_lun *un, struct buf *bp,
1394	struct sd_xbuf *xp, size_t actual_len);
1395static void sd_decode_sense(struct sd_lun *un, struct buf *bp,
1396	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1397
1398static void sd_print_sense_msg(struct sd_lun *un, struct buf *bp,
1399	void *arg, int code);
1400
1401static void sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
1402	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1403static void sd_sense_key_recoverable_error(struct sd_lun *un,
1404	uint8_t *sense_datap,
1405	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1406static void sd_sense_key_not_ready(struct sd_lun *un,
1407	uint8_t *sense_datap,
1408	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1409static void sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
1410	uint8_t *sense_datap,
1411	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1412static void sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
1413	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1414static void sd_sense_key_unit_attention(struct sd_lun *un,
1415	uint8_t *sense_datap,
1416	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1417static void sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
1418	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1419static void sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
1420	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1421static void sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
1422	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1423static void sd_sense_key_default(struct sd_lun *un,
1424	uint8_t *sense_datap,
1425	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1426
1427static void sd_print_retry_msg(struct sd_lun *un, struct buf *bp,
1428	void *arg, int flag);
1429
1430static void sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
1431	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1432static void sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
1433	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1434static void sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
1435	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1436static void sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
1437	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1438static void sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
1439	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1440static void sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
1441	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1442static void sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
1443	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1444static void sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
1445	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1446
1447static void sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp);
1448
1449static void sd_start_stop_unit_callback(void *arg);
1450static void sd_start_stop_unit_task(void *arg);
1451
1452static void sd_taskq_create(void);
1453static void sd_taskq_delete(void);
1454static void sd_target_change_task(void *arg);
1455static void sd_log_lun_expansion_event(struct sd_lun *un, int km_flag);
1456static void sd_media_change_task(void *arg);
1457
1458static int sd_handle_mchange(struct sd_lun *un);
1459static int sd_send_scsi_DOORLOCK(sd_ssc_t *ssc, int flag, int path_flag);
1460static int sd_send_scsi_READ_CAPACITY(sd_ssc_t *ssc, uint64_t *capp,
1461	uint32_t *lbap, int path_flag);
1462static int sd_send_scsi_READ_CAPACITY_16(sd_ssc_t *ssc, uint64_t *capp,
1463	uint32_t *lbap, int path_flag);
1464static int sd_send_scsi_START_STOP_UNIT(sd_ssc_t *ssc, int flag,
1465	int path_flag);
1466static int sd_send_scsi_INQUIRY(sd_ssc_t *ssc, uchar_t *bufaddr,
1467	size_t buflen, uchar_t evpd, uchar_t page_code, size_t *residp);
1468static int sd_send_scsi_TEST_UNIT_READY(sd_ssc_t *ssc, int flag);
1469static int sd_send_scsi_PERSISTENT_RESERVE_IN(sd_ssc_t *ssc,
1470	uchar_t usr_cmd, uint16_t data_len, uchar_t *data_bufp);
1471static int sd_send_scsi_PERSISTENT_RESERVE_OUT(sd_ssc_t *ssc,
1472	uchar_t usr_cmd, uchar_t *usr_bufp);
1473static int sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un,
1474	struct dk_callback *dkc);
1475static int sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp);
1476static int sd_send_scsi_GET_CONFIGURATION(sd_ssc_t *ssc,
1477	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1478	uchar_t *bufaddr, uint_t buflen, int path_flag);
1479static int sd_send_scsi_feature_GET_CONFIGURATION(sd_ssc_t *ssc,
1480	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1481	uchar_t *bufaddr, uint_t buflen, char feature, int path_flag);
1482static int sd_send_scsi_MODE_SENSE(sd_ssc_t *ssc, int cdbsize,
1483	uchar_t *bufaddr, size_t buflen, uchar_t page_code, int path_flag);
1484static int sd_send_scsi_MODE_SELECT(sd_ssc_t *ssc, int cdbsize,
1485	uchar_t *bufaddr, size_t buflen, uchar_t save_page, int path_flag);
1486static int sd_send_scsi_RDWR(sd_ssc_t *ssc, uchar_t cmd, void *bufaddr,
1487	size_t buflen, daddr_t start_block, int path_flag);
1488#define	sd_send_scsi_READ(ssc, bufaddr, buflen, start_block, path_flag)	\
1489	sd_send_scsi_RDWR(ssc, SCMD_READ, bufaddr, buflen, start_block, \
1490	path_flag)
1491#define	sd_send_scsi_WRITE(ssc, bufaddr, buflen, start_block, path_flag)\
1492	sd_send_scsi_RDWR(ssc, SCMD_WRITE, bufaddr, buflen, start_block,\
1493	path_flag)
1494
1495static int sd_send_scsi_LOG_SENSE(sd_ssc_t *ssc, uchar_t *bufaddr,
1496	uint16_t buflen, uchar_t page_code, uchar_t page_control,
1497	uint16_t param_ptr, int path_flag);
1498
1499static int  sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un);
1500static void sd_free_rqs(struct sd_lun *un);
1501
1502static void sd_dump_memory(struct sd_lun *un, uint_t comp, char *title,
1503	uchar_t *data, int len, int fmt);
1504static void sd_panic_for_res_conflict(struct sd_lun *un);
1505
1506/*
1507 * Disk Ioctl Function Prototypes
1508 */
1509static int sd_get_media_info(dev_t dev, caddr_t arg, int flag);
1510static int sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag);
1511static int sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag);
1512
1513/*
1514 * Multi-host Ioctl Prototypes
1515 */
1516static int sd_check_mhd(dev_t dev, int interval);
1517static int sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1518static void sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt);
1519static char *sd_sname(uchar_t status);
1520static void sd_mhd_resvd_recover(void *arg);
1521static void sd_resv_reclaim_thread();
1522static int sd_take_ownership(dev_t dev, struct mhioctkown *p);
1523static int sd_reserve_release(dev_t dev, int cmd);
1524static void sd_rmv_resv_reclaim_req(dev_t dev);
1525static void sd_mhd_reset_notify_cb(caddr_t arg);
1526static int sd_persistent_reservation_in_read_keys(struct sd_lun *un,
1527	mhioc_inkeys_t *usrp, int flag);
1528static int sd_persistent_reservation_in_read_resv(struct sd_lun *un,
1529	mhioc_inresvs_t *usrp, int flag);
1530static int sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag);
1531static int sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag);
1532static int sd_mhdioc_release(dev_t dev);
1533static int sd_mhdioc_register_devid(dev_t dev);
1534static int sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag);
1535static int sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag);
1536
1537/*
1538 * SCSI removable prototypes
1539 */
1540static int sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag);
1541static int sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1542static int sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1543static int sr_pause_resume(dev_t dev, int mode);
1544static int sr_play_msf(dev_t dev, caddr_t data, int flag);
1545static int sr_play_trkind(dev_t dev, caddr_t data, int flag);
1546static int sr_read_all_subcodes(dev_t dev, caddr_t data, int flag);
1547static int sr_read_subchannel(dev_t dev, caddr_t data, int flag);
1548static int sr_read_tocentry(dev_t dev, caddr_t data, int flag);
1549static int sr_read_tochdr(dev_t dev, caddr_t data, int flag);
1550static int sr_read_cdda(dev_t dev, caddr_t data, int flag);
1551static int sr_read_cdxa(dev_t dev, caddr_t data, int flag);
1552static int sr_read_mode1(dev_t dev, caddr_t data, int flag);
1553static int sr_read_mode2(dev_t dev, caddr_t data, int flag);
1554static int sr_read_cd_mode2(dev_t dev, caddr_t data, int flag);
1555static int sr_sector_mode(dev_t dev, uint32_t blksize);
1556static int sr_eject(dev_t dev);
1557static void sr_ejected(register struct sd_lun *un);
1558static int sr_check_wp(dev_t dev);
1559static int sd_check_media(dev_t dev, enum dkio_state state);
1560static int sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1561static void sd_delayed_cv_broadcast(void *arg);
1562static int sr_volume_ctrl(dev_t dev, caddr_t data, int flag);
1563static int sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag);
1564
1565static int sd_log_page_supported(sd_ssc_t *ssc, int log_page);
1566
1567/*
1568 * Function Prototype for the non-512 support (DVDRAM, MO etc.) functions.
1569 */
1570static void sd_check_for_writable_cd(sd_ssc_t *ssc, int path_flag);
1571static int sd_wm_cache_constructor(void *wm, void *un, int flags);
1572static void sd_wm_cache_destructor(void *wm, void *un);
1573static struct sd_w_map *sd_range_lock(struct sd_lun *un, daddr_t startb,
1574	daddr_t endb, ushort_t typ);
1575static struct sd_w_map *sd_get_range(struct sd_lun *un, daddr_t startb,
1576	daddr_t endb);
1577static void sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp);
1578static void sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm);
1579static void sd_read_modify_write_task(void * arg);
1580static int
1581sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
1582	struct buf **bpp);
1583
1584
1585/*
1586 * Function prototypes for failfast support.
1587 */
1588static void sd_failfast_flushq(struct sd_lun *un);
1589static int sd_failfast_flushq_callback(struct buf *bp);
1590
1591/*
1592 * Function prototypes to check for lsi devices
1593 */
1594static void sd_is_lsi(struct sd_lun *un);
1595
1596/*
1597 * Function prototypes for partial DMA support
1598 */
1599static int sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
1600		struct scsi_pkt *pkt, struct sd_xbuf *xp);
1601
1602
1603/* Function prototypes for cmlb */
1604static int sd_tg_rdwr(dev_info_t *devi, uchar_t cmd, void *bufaddr,
1605    diskaddr_t start_block, size_t reqlength, void *tg_cookie);
1606
1607static int sd_tg_getinfo(dev_info_t *devi, int cmd, void *arg, void *tg_cookie);
1608
1609/*
1610 * Constants for failfast support:
1611 *
1612 * SD_FAILFAST_INACTIVE: Instance is currently in a normal state, with NO
1613 * failfast processing being performed.
1614 *
1615 * SD_FAILFAST_ACTIVE: Instance is in the failfast state and is performing
1616 * failfast processing on all bufs with B_FAILFAST set.
1617 */
1618
1619#define	SD_FAILFAST_INACTIVE		0
1620#define	SD_FAILFAST_ACTIVE		1
1621
1622/*
1623 * Bitmask to control behavior of buf(9S) flushes when a transition to
1624 * the failfast state occurs. Optional bits include:
1625 *
1626 * SD_FAILFAST_FLUSH_ALL_BUFS: When set, flush ALL bufs including those that
1627 * do NOT have B_FAILFAST set. When clear, only bufs with B_FAILFAST will
1628 * be flushed.
1629 *
1630 * SD_FAILFAST_FLUSH_ALL_QUEUES: When set, flush any/all other queues in the
1631 * driver, in addition to the regular wait queue. This includes the xbuf
1632 * queues. When clear, only the driver's wait queue will be flushed.
1633 */
1634#define	SD_FAILFAST_FLUSH_ALL_BUFS	0x01
1635#define	SD_FAILFAST_FLUSH_ALL_QUEUES	0x02
1636
1637/*
1638 * The default behavior is to only flush bufs that have B_FAILFAST set, but
1639 * to flush all queues within the driver.
1640 */
1641static int sd_failfast_flushctl = SD_FAILFAST_FLUSH_ALL_QUEUES;
1642
1643
1644/*
1645 * SD Testing Fault Injection
1646 */
1647#ifdef SD_FAULT_INJECTION
1648static void sd_faultinjection_ioctl(int cmd, intptr_t arg, struct sd_lun *un);
1649static void sd_faultinjection(struct scsi_pkt *pktp);
1650static void sd_injection_log(char *buf, struct sd_lun *un);
1651#endif
1652
1653/*
1654 * Device driver ops vector
1655 */
1656static struct cb_ops sd_cb_ops = {
1657	sdopen,			/* open */
1658	sdclose,		/* close */
1659	sdstrategy,		/* strategy */
1660	nodev,			/* print */
1661	sddump,			/* dump */
1662	sdread,			/* read */
1663	sdwrite,		/* write */
1664	sdioctl,		/* ioctl */
1665	nodev,			/* devmap */
1666	nodev,			/* mmap */
1667	nodev,			/* segmap */
1668	nochpoll,		/* poll */
1669	sd_prop_op,		/* cb_prop_op */
1670	0,			/* streamtab  */
1671	D_64BIT | D_MP | D_NEW | D_HOTPLUG, /* Driver compatibility flags */
1672	CB_REV,			/* cb_rev */
1673	sdaread, 		/* async I/O read entry point */
1674	sdawrite		/* async I/O write entry point */
1675};
1676
1677struct dev_ops sd_ops = {
1678	DEVO_REV,		/* devo_rev, */
1679	0,			/* refcnt  */
1680	sdinfo,			/* info */
1681	nulldev,		/* identify */
1682	sdprobe,		/* probe */
1683	sdattach,		/* attach */
1684	sddetach,		/* detach */
1685	nodev,			/* reset */
1686	&sd_cb_ops,		/* driver operations */
1687	NULL,			/* bus operations */
1688	sdpower,		/* power */
1689	ddi_quiesce_not_needed,		/* quiesce */
1690};
1691
1692/*
1693 * This is the loadable module wrapper.
1694 */
1695#include <sys/modctl.h>
1696
1697#ifndef XPV_HVM_DRIVER
1698static struct modldrv modldrv = {
1699	&mod_driverops,		/* Type of module. This one is a driver */
1700	SD_MODULE_NAME,		/* Module name. */
1701	&sd_ops			/* driver ops */
1702};
1703
1704static struct modlinkage modlinkage = {
1705	MODREV_1, &modldrv, NULL
1706};
1707
1708#else /* XPV_HVM_DRIVER */
1709static struct modlmisc modlmisc = {
1710	&mod_miscops,		/* Type of module. This one is a misc */
1711	"HVM " SD_MODULE_NAME,		/* Module name. */
1712};
1713
1714static struct modlinkage modlinkage = {
1715	MODREV_1, &modlmisc, NULL
1716};
1717
1718#endif /* XPV_HVM_DRIVER */
1719
1720static cmlb_tg_ops_t sd_tgops = {
1721	TG_DK_OPS_VERSION_1,
1722	sd_tg_rdwr,
1723	sd_tg_getinfo
1724};
1725
1726static struct scsi_asq_key_strings sd_additional_codes[] = {
1727	0x81, 0, "Logical Unit is Reserved",
1728	0x85, 0, "Audio Address Not Valid",
1729	0xb6, 0, "Media Load Mechanism Failed",
1730	0xB9, 0, "Audio Play Operation Aborted",
1731	0xbf, 0, "Buffer Overflow for Read All Subcodes Command",
1732	0x53, 2, "Medium removal prevented",
1733	0x6f, 0, "Authentication failed during key exchange",
1734	0x6f, 1, "Key not present",
1735	0x6f, 2, "Key not established",
1736	0x6f, 3, "Read without proper authentication",
1737	0x6f, 4, "Mismatched region to this logical unit",
1738	0x6f, 5, "Region reset count error",
1739	0xffff, 0x0, NULL
1740};
1741
1742
1743/*
1744 * Struct for passing printing information for sense data messages
1745 */
1746struct sd_sense_info {
1747	int	ssi_severity;
1748	int	ssi_pfa_flag;
1749};
1750
1751/*
1752 * Table of function pointers for iostart-side routines. Separate "chains"
1753 * of layered function calls are formed by placing the function pointers
1754 * sequentially in the desired order. Functions are called according to an
1755 * incrementing table index ordering. The last function in each chain must
1756 * be sd_core_iostart(). The corresponding iodone-side routines are expected
1757 * in the sd_iodone_chain[] array.
1758 *
1759 * Note: It may seem more natural to organize both the iostart and iodone
1760 * functions together, into an array of structures (or some similar
1761 * organization) with a common index, rather than two separate arrays which
1762 * must be maintained in synchronization. The purpose of this division is
1763 * to achieve improved performance: individual arrays allows for more
1764 * effective cache line utilization on certain platforms.
1765 */
1766
1767typedef void (*sd_chain_t)(int index, struct sd_lun *un, struct buf *bp);
1768
1769
1770static sd_chain_t sd_iostart_chain[] = {
1771
1772	/* Chain for buf IO for disk drive targets (PM enabled) */
1773	sd_mapblockaddr_iostart,	/* Index: 0 */
1774	sd_pm_iostart,			/* Index: 1 */
1775	sd_core_iostart,		/* Index: 2 */
1776
1777	/* Chain for buf IO for disk drive targets (PM disabled) */
1778	sd_mapblockaddr_iostart,	/* Index: 3 */
1779	sd_core_iostart,		/* Index: 4 */
1780
1781	/* Chain for buf IO for removable-media targets (PM enabled) */
1782	sd_mapblockaddr_iostart,	/* Index: 5 */
1783	sd_mapblocksize_iostart,	/* Index: 6 */
1784	sd_pm_iostart,			/* Index: 7 */
1785	sd_core_iostart,		/* Index: 8 */
1786
1787	/* Chain for buf IO for removable-media targets (PM disabled) */
1788	sd_mapblockaddr_iostart,	/* Index: 9 */
1789	sd_mapblocksize_iostart,	/* Index: 10 */
1790	sd_core_iostart,		/* Index: 11 */
1791
1792	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1793	sd_mapblockaddr_iostart,	/* Index: 12 */
1794	sd_checksum_iostart,		/* Index: 13 */
1795	sd_pm_iostart,			/* Index: 14 */
1796	sd_core_iostart,		/* Index: 15 */
1797
1798	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1799	sd_mapblockaddr_iostart,	/* Index: 16 */
1800	sd_checksum_iostart,		/* Index: 17 */
1801	sd_core_iostart,		/* Index: 18 */
1802
1803	/* Chain for USCSI commands (all targets) */
1804	sd_pm_iostart,			/* Index: 19 */
1805	sd_core_iostart,		/* Index: 20 */
1806
1807	/* Chain for checksumming USCSI commands (all targets) */
1808	sd_checksum_uscsi_iostart,	/* Index: 21 */
1809	sd_pm_iostart,			/* Index: 22 */
1810	sd_core_iostart,		/* Index: 23 */
1811
1812	/* Chain for "direct" USCSI commands (all targets) */
1813	sd_core_iostart,		/* Index: 24 */
1814
1815	/* Chain for "direct priority" USCSI commands (all targets) */
1816	sd_core_iostart,		/* Index: 25 */
1817};
1818
1819/*
1820 * Macros to locate the first function of each iostart chain in the
1821 * sd_iostart_chain[] array. These are located by the index in the array.
1822 */
1823#define	SD_CHAIN_DISK_IOSTART			0
1824#define	SD_CHAIN_DISK_IOSTART_NO_PM		3
1825#define	SD_CHAIN_RMMEDIA_IOSTART		5
1826#define	SD_CHAIN_RMMEDIA_IOSTART_NO_PM		9
1827#define	SD_CHAIN_CHKSUM_IOSTART			12
1828#define	SD_CHAIN_CHKSUM_IOSTART_NO_PM		16
1829#define	SD_CHAIN_USCSI_CMD_IOSTART		19
1830#define	SD_CHAIN_USCSI_CHKSUM_IOSTART		21
1831#define	SD_CHAIN_DIRECT_CMD_IOSTART		24
1832#define	SD_CHAIN_PRIORITY_CMD_IOSTART		25
1833
1834
1835/*
1836 * Table of function pointers for the iodone-side routines for the driver-
1837 * internal layering mechanism.  The calling sequence for iodone routines
1838 * uses a decrementing table index, so the last routine called in a chain
1839 * must be at the lowest array index location for that chain.  The last
1840 * routine for each chain must be either sd_buf_iodone() (for buf(9S) IOs)
1841 * or sd_uscsi_iodone() (for uscsi IOs).  Other than this, the ordering
1842 * of the functions in an iodone side chain must correspond to the ordering
1843 * of the iostart routines for that chain.  Note that there is no iodone
1844 * side routine that corresponds to sd_core_iostart(), so there is no
1845 * entry in the table for this.
1846 */
1847
1848static sd_chain_t sd_iodone_chain[] = {
1849
1850	/* Chain for buf IO for disk drive targets (PM enabled) */
1851	sd_buf_iodone,			/* Index: 0 */
1852	sd_mapblockaddr_iodone,		/* Index: 1 */
1853	sd_pm_iodone,			/* Index: 2 */
1854
1855	/* Chain for buf IO for disk drive targets (PM disabled) */
1856	sd_buf_iodone,			/* Index: 3 */
1857	sd_mapblockaddr_iodone,		/* Index: 4 */
1858
1859	/* Chain for buf IO for removable-media targets (PM enabled) */
1860	sd_buf_iodone,			/* Index: 5 */
1861	sd_mapblockaddr_iodone,		/* Index: 6 */
1862	sd_mapblocksize_iodone,		/* Index: 7 */
1863	sd_pm_iodone,			/* Index: 8 */
1864
1865	/* Chain for buf IO for removable-media targets (PM disabled) */
1866	sd_buf_iodone,			/* Index: 9 */
1867	sd_mapblockaddr_iodone,		/* Index: 10 */
1868	sd_mapblocksize_iodone,		/* Index: 11 */
1869
1870	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1871	sd_buf_iodone,			/* Index: 12 */
1872	sd_mapblockaddr_iodone,		/* Index: 13 */
1873	sd_checksum_iodone,		/* Index: 14 */
1874	sd_pm_iodone,			/* Index: 15 */
1875
1876	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1877	sd_buf_iodone,			/* Index: 16 */
1878	sd_mapblockaddr_iodone,		/* Index: 17 */
1879	sd_checksum_iodone,		/* Index: 18 */
1880
1881	/* Chain for USCSI commands (non-checksum targets) */
1882	sd_uscsi_iodone,		/* Index: 19 */
1883	sd_pm_iodone,			/* Index: 20 */
1884
1885	/* Chain for USCSI commands (checksum targets) */
1886	sd_uscsi_iodone,		/* Index: 21 */
1887	sd_checksum_uscsi_iodone,	/* Index: 22 */
1888	sd_pm_iodone,			/* Index: 22 */
1889
1890	/* Chain for "direct" USCSI commands (all targets) */
1891	sd_uscsi_iodone,		/* Index: 24 */
1892
1893	/* Chain for "direct priority" USCSI commands (all targets) */
1894	sd_uscsi_iodone,		/* Index: 25 */
1895};
1896
1897
1898/*
1899 * Macros to locate the "first" function in the sd_iodone_chain[] array for
1900 * each iodone-side chain. These are located by the array index, but as the
1901 * iodone side functions are called in a decrementing-index order, the
1902 * highest index number in each chain must be specified (as these correspond
1903 * to the first function in the iodone chain that will be called by the core
1904 * at IO completion time).
1905 */
1906
1907#define	SD_CHAIN_DISK_IODONE			2
1908#define	SD_CHAIN_DISK_IODONE_NO_PM		4
1909#define	SD_CHAIN_RMMEDIA_IODONE			8
1910#define	SD_CHAIN_RMMEDIA_IODONE_NO_PM		11
1911#define	SD_CHAIN_CHKSUM_IODONE			15
1912#define	SD_CHAIN_CHKSUM_IODONE_NO_PM		18
1913#define	SD_CHAIN_USCSI_CMD_IODONE		20
1914#define	SD_CHAIN_USCSI_CHKSUM_IODONE		22
1915#define	SD_CHAIN_DIRECT_CMD_IODONE		24
1916#define	SD_CHAIN_PRIORITY_CMD_IODONE		25
1917
1918
1919
1920
1921/*
1922 * Array to map a layering chain index to the appropriate initpkt routine.
1923 * The redundant entries are present so that the index used for accessing
1924 * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1925 * with this table as well.
1926 */
1927typedef int (*sd_initpkt_t)(struct buf *, struct scsi_pkt **);
1928
1929static sd_initpkt_t	sd_initpkt_map[] = {
1930
1931	/* Chain for buf IO for disk drive targets (PM enabled) */
1932	sd_initpkt_for_buf,		/* Index: 0 */
1933	sd_initpkt_for_buf,		/* Index: 1 */
1934	sd_initpkt_for_buf,		/* Index: 2 */
1935
1936	/* Chain for buf IO for disk drive targets (PM disabled) */
1937	sd_initpkt_for_buf,		/* Index: 3 */
1938	sd_initpkt_for_buf,		/* Index: 4 */
1939
1940	/* Chain for buf IO for removable-media targets (PM enabled) */
1941	sd_initpkt_for_buf,		/* Index: 5 */
1942	sd_initpkt_for_buf,		/* Index: 6 */
1943	sd_initpkt_for_buf,		/* Index: 7 */
1944	sd_initpkt_for_buf,		/* Index: 8 */
1945
1946	/* Chain for buf IO for removable-media targets (PM disabled) */
1947	sd_initpkt_for_buf,		/* Index: 9 */
1948	sd_initpkt_for_buf,		/* Index: 10 */
1949	sd_initpkt_for_buf,		/* Index: 11 */
1950
1951	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1952	sd_initpkt_for_buf,		/* Index: 12 */
1953	sd_initpkt_for_buf,		/* Index: 13 */
1954	sd_initpkt_for_buf,		/* Index: 14 */
1955	sd_initpkt_for_buf,		/* Index: 15 */
1956
1957	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1958	sd_initpkt_for_buf,		/* Index: 16 */
1959	sd_initpkt_for_buf,		/* Index: 17 */
1960	sd_initpkt_for_buf,		/* Index: 18 */
1961
1962	/* Chain for USCSI commands (non-checksum targets) */
1963	sd_initpkt_for_uscsi,		/* Index: 19 */
1964	sd_initpkt_for_uscsi,		/* Index: 20 */
1965
1966	/* Chain for USCSI commands (checksum targets) */
1967	sd_initpkt_for_uscsi,		/* Index: 21 */
1968	sd_initpkt_for_uscsi,		/* Index: 22 */
1969	sd_initpkt_for_uscsi,		/* Index: 22 */
1970
1971	/* Chain for "direct" USCSI commands (all targets) */
1972	sd_initpkt_for_uscsi,		/* Index: 24 */
1973
1974	/* Chain for "direct priority" USCSI commands (all targets) */
1975	sd_initpkt_for_uscsi,		/* Index: 25 */
1976
1977};
1978
1979
1980/*
1981 * Array to map a layering chain index to the appropriate destroypktpkt routine.
1982 * The redundant entries are present so that the index used for accessing
1983 * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1984 * with this table as well.
1985 */
1986typedef void (*sd_destroypkt_t)(struct buf *);
1987
1988static sd_destroypkt_t	sd_destroypkt_map[] = {
1989
1990	/* Chain for buf IO for disk drive targets (PM enabled) */
1991	sd_destroypkt_for_buf,		/* Index: 0 */
1992	sd_destroypkt_for_buf,		/* Index: 1 */
1993	sd_destroypkt_for_buf,		/* Index: 2 */
1994
1995	/* Chain for buf IO for disk drive targets (PM disabled) */
1996	sd_destroypkt_for_buf,		/* Index: 3 */
1997	sd_destroypkt_for_buf,		/* Index: 4 */
1998
1999	/* Chain for buf IO for removable-media targets (PM enabled) */
2000	sd_destroypkt_for_buf,		/* Index: 5 */
2001	sd_destroypkt_for_buf,		/* Index: 6 */
2002	sd_destroypkt_for_buf,		/* Index: 7 */
2003	sd_destroypkt_for_buf,		/* Index: 8 */
2004
2005	/* Chain for buf IO for removable-media targets (PM disabled) */
2006	sd_destroypkt_for_buf,		/* Index: 9 */
2007	sd_destroypkt_for_buf,		/* Index: 10 */
2008	sd_destroypkt_for_buf,		/* Index: 11 */
2009
2010	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
2011	sd_destroypkt_for_buf,		/* Index: 12 */
2012	sd_destroypkt_for_buf,		/* Index: 13 */
2013	sd_destroypkt_for_buf,		/* Index: 14 */
2014	sd_destroypkt_for_buf,		/* Index: 15 */
2015
2016	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
2017	sd_destroypkt_for_buf,		/* Index: 16 */
2018	sd_destroypkt_for_buf,		/* Index: 17 */
2019	sd_destroypkt_for_buf,		/* Index: 18 */
2020
2021	/* Chain for USCSI commands (non-checksum targets) */
2022	sd_destroypkt_for_uscsi,	/* Index: 19 */
2023	sd_destroypkt_for_uscsi,	/* Index: 20 */
2024
2025	/* Chain for USCSI commands (checksum targets) */
2026	sd_destroypkt_for_uscsi,	/* Index: 21 */
2027	sd_destroypkt_for_uscsi,	/* Index: 22 */
2028	sd_destroypkt_for_uscsi,	/* Index: 22 */
2029
2030	/* Chain for "direct" USCSI commands (all targets) */
2031	sd_destroypkt_for_uscsi,	/* Index: 24 */
2032
2033	/* Chain for "direct priority" USCSI commands (all targets) */
2034	sd_destroypkt_for_uscsi,	/* Index: 25 */
2035
2036};
2037
2038
2039
2040/*
2041 * Array to map a layering chain index to the appropriate chain "type".
2042 * The chain type indicates a specific property/usage of the chain.
2043 * The redundant entries are present so that the index used for accessing
2044 * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
2045 * with this table as well.
2046 */
2047
2048#define	SD_CHAIN_NULL			0	/* for the special RQS cmd */
2049#define	SD_CHAIN_BUFIO			1	/* regular buf IO */
2050#define	SD_CHAIN_USCSI			2	/* regular USCSI commands */
2051#define	SD_CHAIN_DIRECT			3	/* uscsi, w/ bypass power mgt */
2052#define	SD_CHAIN_DIRECT_PRIORITY	4	/* uscsi, w/ bypass power mgt */
2053						/* (for error recovery) */
2054
2055static int sd_chain_type_map[] = {
2056
2057	/* Chain for buf IO for disk drive targets (PM enabled) */
2058	SD_CHAIN_BUFIO,			/* Index: 0 */
2059	SD_CHAIN_BUFIO,			/* Index: 1 */
2060	SD_CHAIN_BUFIO,			/* Index: 2 */
2061
2062	/* Chain for buf IO for disk drive targets (PM disabled) */
2063	SD_CHAIN_BUFIO,			/* Index: 3 */
2064	SD_CHAIN_BUFIO,			/* Index: 4 */
2065
2066	/* Chain for buf IO for removable-media targets (PM enabled) */
2067	SD_CHAIN_BUFIO,			/* Index: 5 */
2068	SD_CHAIN_BUFIO,			/* Index: 6 */
2069	SD_CHAIN_BUFIO,			/* Index: 7 */
2070	SD_CHAIN_BUFIO,			/* Index: 8 */
2071
2072	/* Chain for buf IO for removable-media targets (PM disabled) */
2073	SD_CHAIN_BUFIO,			/* Index: 9 */
2074	SD_CHAIN_BUFIO,			/* Index: 10 */
2075	SD_CHAIN_BUFIO,			/* Index: 11 */
2076
2077	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
2078	SD_CHAIN_BUFIO,			/* Index: 12 */
2079	SD_CHAIN_BUFIO,			/* Index: 13 */
2080	SD_CHAIN_BUFIO,			/* Index: 14 */
2081	SD_CHAIN_BUFIO,			/* Index: 15 */
2082
2083	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
2084	SD_CHAIN_BUFIO,			/* Index: 16 */
2085	SD_CHAIN_BUFIO,			/* Index: 17 */
2086	SD_CHAIN_BUFIO,			/* Index: 18 */
2087
2088	/* Chain for USCSI commands (non-checksum targets) */
2089	SD_CHAIN_USCSI,			/* Index: 19 */
2090	SD_CHAIN_USCSI,			/* Index: 20 */
2091
2092	/* Chain for USCSI commands (checksum targets) */
2093	SD_CHAIN_USCSI,			/* Index: 21 */
2094	SD_CHAIN_USCSI,			/* Index: 22 */
2095	SD_CHAIN_USCSI,			/* Index: 22 */
2096
2097	/* Chain for "direct" USCSI commands (all targets) */
2098	SD_CHAIN_DIRECT,		/* Index: 24 */
2099
2100	/* Chain for "direct priority" USCSI commands (all targets) */
2101	SD_CHAIN_DIRECT_PRIORITY,	/* Index: 25 */
2102};
2103
2104
2105/* Macro to return TRUE if the IO has come from the sd_buf_iostart() chain. */
2106#define	SD_IS_BUFIO(xp)			\
2107	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_BUFIO)
2108
2109/* Macro to return TRUE if the IO has come from the "direct priority" chain. */
2110#define	SD_IS_DIRECT_PRIORITY(xp)	\
2111	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_DIRECT_PRIORITY)
2112
2113
2114
2115/*
2116 * Struct, array, and macros to map a specific chain to the appropriate
2117 * layering indexes in the sd_iostart_chain[] and sd_iodone_chain[] arrays.
2118 *
2119 * The sd_chain_index_map[] array is used at attach time to set the various
2120 * un_xxx_chain type members of the sd_lun softstate to the specific layering
2121 * chain to be used with the instance. This allows different instances to use
2122 * different chain for buf IO, uscsi IO, etc.. Also, since the xb_chain_iostart
2123 * and xb_chain_iodone index values in the sd_xbuf are initialized to these
2124 * values at sd_xbuf init time, this allows (1) layering chains may be changed
2125 * dynamically & without the use of locking; and (2) a layer may update the
2126 * xb_chain_io[start|done] member in a given xbuf with its current index value,
2127 * to allow for deferred processing of an IO within the same chain from a
2128 * different execution context.
2129 */
2130
2131struct sd_chain_index {
2132	int	sci_iostart_index;
2133	int	sci_iodone_index;
2134};
2135
2136static struct sd_chain_index	sd_chain_index_map[] = {
2137	{ SD_CHAIN_DISK_IOSTART,		SD_CHAIN_DISK_IODONE },
2138	{ SD_CHAIN_DISK_IOSTART_NO_PM,		SD_CHAIN_DISK_IODONE_NO_PM },
2139	{ SD_CHAIN_RMMEDIA_IOSTART,		SD_CHAIN_RMMEDIA_IODONE },
2140	{ SD_CHAIN_RMMEDIA_IOSTART_NO_PM,	SD_CHAIN_RMMEDIA_IODONE_NO_PM },
2141	{ SD_CHAIN_CHKSUM_IOSTART,		SD_CHAIN_CHKSUM_IODONE },
2142	{ SD_CHAIN_CHKSUM_IOSTART_NO_PM,	SD_CHAIN_CHKSUM_IODONE_NO_PM },
2143	{ SD_CHAIN_USCSI_CMD_IOSTART,		SD_CHAIN_USCSI_CMD_IODONE },
2144	{ SD_CHAIN_USCSI_CHKSUM_IOSTART,	SD_CHAIN_USCSI_CHKSUM_IODONE },
2145	{ SD_CHAIN_DIRECT_CMD_IOSTART,		SD_CHAIN_DIRECT_CMD_IODONE },
2146	{ SD_CHAIN_PRIORITY_CMD_IOSTART,	SD_CHAIN_PRIORITY_CMD_IODONE },
2147};
2148
2149
2150/*
2151 * The following are indexes into the sd_chain_index_map[] array.
2152 */
2153
2154/* un->un_buf_chain_type must be set to one of these */
2155#define	SD_CHAIN_INFO_DISK		0
2156#define	SD_CHAIN_INFO_DISK_NO_PM	1
2157#define	SD_CHAIN_INFO_RMMEDIA		2
2158#define	SD_CHAIN_INFO_RMMEDIA_NO_PM	3
2159#define	SD_CHAIN_INFO_CHKSUM		4
2160#define	SD_CHAIN_INFO_CHKSUM_NO_PM	5
2161
2162/* un->un_uscsi_chain_type must be set to one of these */
2163#define	SD_CHAIN_INFO_USCSI_CMD		6
2164/* USCSI with PM disabled is the same as DIRECT */
2165#define	SD_CHAIN_INFO_USCSI_CMD_NO_PM	8
2166#define	SD_CHAIN_INFO_USCSI_CHKSUM	7
2167
2168/* un->un_direct_chain_type must be set to one of these */
2169#define	SD_CHAIN_INFO_DIRECT_CMD	8
2170
2171/* un->un_priority_chain_type must be set to one of these */
2172#define	SD_CHAIN_INFO_PRIORITY_CMD	9
2173
2174/* size for devid inquiries */
2175#define	MAX_INQUIRY_SIZE		0xF0
2176
2177/*
2178 * Macros used by functions to pass a given buf(9S) struct along to the
2179 * next function in the layering chain for further processing.
2180 *
2181 * In the following macros, passing more than three arguments to the called
2182 * routines causes the optimizer for the SPARC compiler to stop doing tail
2183 * call elimination which results in significant performance degradation.
2184 */
2185#define	SD_BEGIN_IOSTART(index, un, bp)	\
2186	((*(sd_iostart_chain[index]))(index, un, bp))
2187
2188#define	SD_BEGIN_IODONE(index, un, bp)	\
2189	((*(sd_iodone_chain[index]))(index, un, bp))
2190
2191#define	SD_NEXT_IOSTART(index, un, bp)				\
2192	((*(sd_iostart_chain[(index) + 1]))((index) + 1, un, bp))
2193
2194#define	SD_NEXT_IODONE(index, un, bp)				\
2195	((*(sd_iodone_chain[(index) - 1]))((index) - 1, un, bp))
2196
2197/*
2198 *    Function: _init
2199 *
2200 * Description: This is the driver _init(9E) entry point.
2201 *
2202 * Return Code: Returns the value from mod_install(9F) or
2203 *		ddi_soft_state_init(9F) as appropriate.
2204 *
2205 *     Context: Called when driver module loaded.
2206 */
2207
2208int
2209_init(void)
2210{
2211	int	err;
2212
2213	/* establish driver name from module name */
2214	sd_label = (char *)mod_modname(&modlinkage);
2215
2216#ifndef XPV_HVM_DRIVER
2217	err = ddi_soft_state_init(&sd_state, sizeof (struct sd_lun),
2218	    SD_MAXUNIT);
2219	if (err != 0) {
2220		return (err);
2221	}
2222
2223#else /* XPV_HVM_DRIVER */
2224	/* Remove the leading "hvm_" from the module name */
2225	ASSERT(strncmp(sd_label, "hvm_", strlen("hvm_")) == 0);
2226	sd_label += strlen("hvm_");
2227
2228#endif /* XPV_HVM_DRIVER */
2229
2230	mutex_init(&sd_detach_mutex, NULL, MUTEX_DRIVER, NULL);
2231	mutex_init(&sd_log_mutex,    NULL, MUTEX_DRIVER, NULL);
2232	mutex_init(&sd_label_mutex,  NULL, MUTEX_DRIVER, NULL);
2233
2234	mutex_init(&sd_tr.srq_resv_reclaim_mutex, NULL, MUTEX_DRIVER, NULL);
2235	cv_init(&sd_tr.srq_resv_reclaim_cv, NULL, CV_DRIVER, NULL);
2236	cv_init(&sd_tr.srq_inprocess_cv, NULL, CV_DRIVER, NULL);
2237
2238	/*
2239	 * it's ok to init here even for fibre device
2240	 */
2241	sd_scsi_probe_cache_init();
2242
2243	sd_scsi_target_lun_init();
2244
2245	/*
2246	 * Creating taskq before mod_install ensures that all callers (threads)
2247	 * that enter the module after a successful mod_install encounter
2248	 * a valid taskq.
2249	 */
2250	sd_taskq_create();
2251
2252	err = mod_install(&modlinkage);
2253	if (err != 0) {
2254		/* delete taskq if install fails */
2255		sd_taskq_delete();
2256
2257		mutex_destroy(&sd_detach_mutex);
2258		mutex_destroy(&sd_log_mutex);
2259		mutex_destroy(&sd_label_mutex);
2260
2261		mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2262		cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2263		cv_destroy(&sd_tr.srq_inprocess_cv);
2264
2265		sd_scsi_probe_cache_fini();
2266
2267		sd_scsi_target_lun_fini();
2268
2269#ifndef XPV_HVM_DRIVER
2270		ddi_soft_state_fini(&sd_state);
2271#endif /* !XPV_HVM_DRIVER */
2272		return (err);
2273	}
2274
2275	return (err);
2276}
2277
2278
2279/*
2280 *    Function: _fini
2281 *
2282 * Description: This is the driver _fini(9E) entry point.
2283 *
2284 * Return Code: Returns the value from mod_remove(9F)
2285 *
2286 *     Context: Called when driver module is unloaded.
2287 */
2288
2289int
2290_fini(void)
2291{
2292	int err;
2293
2294	if ((err = mod_remove(&modlinkage)) != 0) {
2295		return (err);
2296	}
2297
2298	sd_taskq_delete();
2299
2300	mutex_destroy(&sd_detach_mutex);
2301	mutex_destroy(&sd_log_mutex);
2302	mutex_destroy(&sd_label_mutex);
2303	mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2304
2305	sd_scsi_probe_cache_fini();
2306
2307	sd_scsi_target_lun_fini();
2308
2309	cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2310	cv_destroy(&sd_tr.srq_inprocess_cv);
2311
2312#ifndef XPV_HVM_DRIVER
2313	ddi_soft_state_fini(&sd_state);
2314#endif /* !XPV_HVM_DRIVER */
2315
2316	return (err);
2317}
2318
2319
2320/*
2321 *    Function: _info
2322 *
2323 * Description: This is the driver _info(9E) entry point.
2324 *
2325 *   Arguments: modinfop - pointer to the driver modinfo structure
2326 *
2327 * Return Code: Returns the value from mod_info(9F).
2328 *
2329 *     Context: Kernel thread context
2330 */
2331
2332int
2333_info(struct modinfo *modinfop)
2334{
2335	return (mod_info(&modlinkage, modinfop));
2336}
2337
2338
2339/*
2340 * The following routines implement the driver message logging facility.
2341 * They provide component- and level- based debug output filtering.
2342 * Output may also be restricted to messages for a single instance by
2343 * specifying a soft state pointer in sd_debug_un. If sd_debug_un is set
2344 * to NULL, then messages for all instances are printed.
2345 *
2346 * These routines have been cloned from each other due to the language
2347 * constraints of macros and variable argument list processing.
2348 */
2349
2350
2351/*
2352 *    Function: sd_log_err
2353 *
2354 * Description: This routine is called by the SD_ERROR macro for debug
2355 *		logging of error conditions.
2356 *
2357 *   Arguments: comp - driver component being logged
2358 *		dev  - pointer to driver info structure
2359 *		fmt  - error string and format to be logged
2360 */
2361
2362static void
2363sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...)
2364{
2365	va_list		ap;
2366	dev_info_t	*dev;
2367
2368	ASSERT(un != NULL);
2369	dev = SD_DEVINFO(un);
2370	ASSERT(dev != NULL);
2371
2372	/*
2373	 * Filter messages based on the global component and level masks.
2374	 * Also print if un matches the value of sd_debug_un, or if
2375	 * sd_debug_un is set to NULL.
2376	 */
2377	if ((sd_component_mask & comp) && (sd_level_mask & SD_LOGMASK_ERROR) &&
2378	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2379		mutex_enter(&sd_log_mutex);
2380		va_start(ap, fmt);
2381		(void) vsprintf(sd_log_buf, fmt, ap);
2382		va_end(ap);
2383		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2384		mutex_exit(&sd_log_mutex);
2385	}
2386#ifdef SD_FAULT_INJECTION
2387	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2388	if (un->sd_injection_mask & comp) {
2389		mutex_enter(&sd_log_mutex);
2390		va_start(ap, fmt);
2391		(void) vsprintf(sd_log_buf, fmt, ap);
2392		va_end(ap);
2393		sd_injection_log(sd_log_buf, un);
2394		mutex_exit(&sd_log_mutex);
2395	}
2396#endif
2397}
2398
2399
2400/*
2401 *    Function: sd_log_info
2402 *
2403 * Description: This routine is called by the SD_INFO macro for debug
2404 *		logging of general purpose informational conditions.
2405 *
2406 *   Arguments: comp - driver component being logged
2407 *		dev  - pointer to driver info structure
2408 *		fmt  - info string and format to be logged
2409 */
2410
2411static void
2412sd_log_info(uint_t component, struct sd_lun *un, const char *fmt, ...)
2413{
2414	va_list		ap;
2415	dev_info_t	*dev;
2416
2417	ASSERT(un != NULL);
2418	dev = SD_DEVINFO(un);
2419	ASSERT(dev != NULL);
2420
2421	/*
2422	 * Filter messages based on the global component and level masks.
2423	 * Also print if un matches the value of sd_debug_un, or if
2424	 * sd_debug_un is set to NULL.
2425	 */
2426	if ((sd_component_mask & component) &&
2427	    (sd_level_mask & SD_LOGMASK_INFO) &&
2428	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2429		mutex_enter(&sd_log_mutex);
2430		va_start(ap, fmt);
2431		(void) vsprintf(sd_log_buf, fmt, ap);
2432		va_end(ap);
2433		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2434		mutex_exit(&sd_log_mutex);
2435	}
2436#ifdef SD_FAULT_INJECTION
2437	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2438	if (un->sd_injection_mask & component) {
2439		mutex_enter(&sd_log_mutex);
2440		va_start(ap, fmt);
2441		(void) vsprintf(sd_log_buf, fmt, ap);
2442		va_end(ap);
2443		sd_injection_log(sd_log_buf, un);
2444		mutex_exit(&sd_log_mutex);
2445	}
2446#endif
2447}
2448
2449
2450/*
2451 *    Function: sd_log_trace
2452 *
2453 * Description: This routine is called by the SD_TRACE macro for debug
2454 *		logging of trace conditions (i.e. function entry/exit).
2455 *
2456 *   Arguments: comp - driver component being logged
2457 *		dev  - pointer to driver info structure
2458 *		fmt  - trace string and format to be logged
2459 */
2460
2461static void
2462sd_log_trace(uint_t component, struct sd_lun *un, const char *fmt, ...)
2463{
2464	va_list		ap;
2465	dev_info_t	*dev;
2466
2467	ASSERT(un != NULL);
2468	dev = SD_DEVINFO(un);
2469	ASSERT(dev != NULL);
2470
2471	/*
2472	 * Filter messages based on the global component and level masks.
2473	 * Also print if un matches the value of sd_debug_un, or if
2474	 * sd_debug_un is set to NULL.
2475	 */
2476	if ((sd_component_mask & component) &&
2477	    (sd_level_mask & SD_LOGMASK_TRACE) &&
2478	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2479		mutex_enter(&sd_log_mutex);
2480		va_start(ap, fmt);
2481		(void) vsprintf(sd_log_buf, fmt, ap);
2482		va_end(ap);
2483		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2484		mutex_exit(&sd_log_mutex);
2485	}
2486#ifdef SD_FAULT_INJECTION
2487	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2488	if (un->sd_injection_mask & component) {
2489		mutex_enter(&sd_log_mutex);
2490		va_start(ap, fmt);
2491		(void) vsprintf(sd_log_buf, fmt, ap);
2492		va_end(ap);
2493		sd_injection_log(sd_log_buf, un);
2494		mutex_exit(&sd_log_mutex);
2495	}
2496#endif
2497}
2498
2499
2500/*
2501 *    Function: sdprobe
2502 *
2503 * Description: This is the driver probe(9e) entry point function.
2504 *
2505 *   Arguments: devi - opaque device info handle
2506 *
2507 * Return Code: DDI_PROBE_SUCCESS: If the probe was successful.
2508 *              DDI_PROBE_FAILURE: If the probe failed.
2509 *              DDI_PROBE_PARTIAL: If the instance is not present now,
2510 *				   but may be present in the future.
2511 */
2512
2513static int
2514sdprobe(dev_info_t *devi)
2515{
2516	struct scsi_device	*devp;
2517	int			rval;
2518#ifndef XPV_HVM_DRIVER
2519	int			instance = ddi_get_instance(devi);
2520#endif /* !XPV_HVM_DRIVER */
2521
2522	/*
2523	 * if it wasn't for pln, sdprobe could actually be nulldev
2524	 * in the "__fibre" case.
2525	 */
2526	if (ddi_dev_is_sid(devi) == DDI_SUCCESS) {
2527		return (DDI_PROBE_DONTCARE);
2528	}
2529
2530	devp = ddi_get_driver_private(devi);
2531
2532	if (devp == NULL) {
2533		/* Ooops... nexus driver is mis-configured... */
2534		return (DDI_PROBE_FAILURE);
2535	}
2536
2537#ifndef XPV_HVM_DRIVER
2538	if (ddi_get_soft_state(sd_state, instance) != NULL) {
2539		return (DDI_PROBE_PARTIAL);
2540	}
2541#endif /* !XPV_HVM_DRIVER */
2542
2543	/*
2544	 * Call the SCSA utility probe routine to see if we actually
2545	 * have a target at this SCSI nexus.
2546	 */
2547	switch (sd_scsi_probe_with_cache(devp, NULL_FUNC)) {
2548	case SCSIPROBE_EXISTS:
2549		switch (devp->sd_inq->inq_dtype) {
2550		case DTYPE_DIRECT:
2551			rval = DDI_PROBE_SUCCESS;
2552			break;
2553		case DTYPE_RODIRECT:
2554			/* CDs etc. Can be removable media */
2555			rval = DDI_PROBE_SUCCESS;
2556			break;
2557		case DTYPE_OPTICAL:
2558			/*
2559			 * Rewritable optical driver HP115AA
2560			 * Can also be removable media
2561			 */
2562
2563			/*
2564			 * Do not attempt to bind to  DTYPE_OPTICAL if
2565			 * pre solaris 9 sparc sd behavior is required
2566			 *
2567			 * If first time through and sd_dtype_optical_bind
2568			 * has not been set in /etc/system check properties
2569			 */
2570
2571			if (sd_dtype_optical_bind  < 0) {
2572				sd_dtype_optical_bind = ddi_prop_get_int
2573				    (DDI_DEV_T_ANY, devi, 0,
2574				    "optical-device-bind", 1);
2575			}
2576
2577			if (sd_dtype_optical_bind == 0) {
2578				rval = DDI_PROBE_FAILURE;
2579			} else {
2580				rval = DDI_PROBE_SUCCESS;
2581			}
2582			break;
2583
2584		case DTYPE_NOTPRESENT:
2585		default:
2586			rval = DDI_PROBE_FAILURE;
2587			break;
2588		}
2589		break;
2590	default:
2591		rval = DDI_PROBE_PARTIAL;
2592		break;
2593	}
2594
2595	/*
2596	 * This routine checks for resource allocation prior to freeing,
2597	 * so it will take care of the "smart probing" case where a
2598	 * scsi_probe() may or may not have been issued and will *not*
2599	 * free previously-freed resources.
2600	 */
2601	scsi_unprobe(devp);
2602	return (rval);
2603}
2604
2605
2606/*
2607 *    Function: sdinfo
2608 *
2609 * Description: This is the driver getinfo(9e) entry point function.
2610 * 		Given the device number, return the devinfo pointer from
2611 *		the scsi_device structure or the instance number
2612 *		associated with the dev_t.
2613 *
2614 *   Arguments: dip     - pointer to device info structure
2615 *		infocmd - command argument (DDI_INFO_DEVT2DEVINFO,
2616 *			  DDI_INFO_DEVT2INSTANCE)
2617 *		arg     - driver dev_t
2618 *		resultp - user buffer for request response
2619 *
2620 * Return Code: DDI_SUCCESS
2621 *              DDI_FAILURE
2622 */
2623/* ARGSUSED */
2624static int
2625sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
2626{
2627	struct sd_lun	*un;
2628	dev_t		dev;
2629	int		instance;
2630	int		error;
2631
2632	switch (infocmd) {
2633	case DDI_INFO_DEVT2DEVINFO:
2634		dev = (dev_t)arg;
2635		instance = SDUNIT(dev);
2636		if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
2637			return (DDI_FAILURE);
2638		}
2639		*result = (void *) SD_DEVINFO(un);
2640		error = DDI_SUCCESS;
2641		break;
2642	case DDI_INFO_DEVT2INSTANCE:
2643		dev = (dev_t)arg;
2644		instance = SDUNIT(dev);
2645		*result = (void *)(uintptr_t)instance;
2646		error = DDI_SUCCESS;
2647		break;
2648	default:
2649		error = DDI_FAILURE;
2650	}
2651	return (error);
2652}
2653
2654/*
2655 *    Function: sd_prop_op
2656 *
2657 * Description: This is the driver prop_op(9e) entry point function.
2658 *		Return the number of blocks for the partition in question
2659 *		or forward the request to the property facilities.
2660 *
2661 *   Arguments: dev       - device number
2662 *		dip       - pointer to device info structure
2663 *		prop_op   - property operator
2664 *		mod_flags - DDI_PROP_DONTPASS, don't pass to parent
2665 *		name      - pointer to property name
2666 *		valuep    - pointer or address of the user buffer
2667 *		lengthp   - property length
2668 *
2669 * Return Code: DDI_PROP_SUCCESS
2670 *              DDI_PROP_NOT_FOUND
2671 *              DDI_PROP_UNDEFINED
2672 *              DDI_PROP_NO_MEMORY
2673 *              DDI_PROP_BUF_TOO_SMALL
2674 */
2675
2676static int
2677sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags,
2678	char *name, caddr_t valuep, int *lengthp)
2679{
2680	struct sd_lun	*un;
2681
2682	if ((un = ddi_get_soft_state(sd_state, ddi_get_instance(dip))) == NULL)
2683		return (ddi_prop_op(dev, dip, prop_op, mod_flags,
2684		    name, valuep, lengthp));
2685
2686	return (cmlb_prop_op(un->un_cmlbhandle,
2687	    dev, dip, prop_op, mod_flags, name, valuep, lengthp,
2688	    SDPART(dev), (void *)SD_PATH_DIRECT));
2689}
2690
2691/*
2692 * The following functions are for smart probing:
2693 * sd_scsi_probe_cache_init()
2694 * sd_scsi_probe_cache_fini()
2695 * sd_scsi_clear_probe_cache()
2696 * sd_scsi_probe_with_cache()
2697 */
2698
2699/*
2700 *    Function: sd_scsi_probe_cache_init
2701 *
2702 * Description: Initializes the probe response cache mutex and head pointer.
2703 *
2704 *     Context: Kernel thread context
2705 */
2706
2707static void
2708sd_scsi_probe_cache_init(void)
2709{
2710	mutex_init(&sd_scsi_probe_cache_mutex, NULL, MUTEX_DRIVER, NULL);
2711	sd_scsi_probe_cache_head = NULL;
2712}
2713
2714
2715/*
2716 *    Function: sd_scsi_probe_cache_fini
2717 *
2718 * Description: Frees all resources associated with the probe response cache.
2719 *
2720 *     Context: Kernel thread context
2721 */
2722
2723static void
2724sd_scsi_probe_cache_fini(void)
2725{
2726	struct sd_scsi_probe_cache *cp;
2727	struct sd_scsi_probe_cache *ncp;
2728
2729	/* Clean up our smart probing linked list */
2730	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = ncp) {
2731		ncp = cp->next;
2732		kmem_free(cp, sizeof (struct sd_scsi_probe_cache));
2733	}
2734	sd_scsi_probe_cache_head = NULL;
2735	mutex_destroy(&sd_scsi_probe_cache_mutex);
2736}
2737
2738
2739/*
2740 *    Function: sd_scsi_clear_probe_cache
2741 *
2742 * Description: This routine clears the probe response cache. This is
2743 *		done when open() returns ENXIO so that when deferred
2744 *		attach is attempted (possibly after a device has been
2745 *		turned on) we will retry the probe. Since we don't know
2746 *		which target we failed to open, we just clear the
2747 *		entire cache.
2748 *
2749 *     Context: Kernel thread context
2750 */
2751
2752static void
2753sd_scsi_clear_probe_cache(void)
2754{
2755	struct sd_scsi_probe_cache	*cp;
2756	int				i;
2757
2758	mutex_enter(&sd_scsi_probe_cache_mutex);
2759	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2760		/*
2761		 * Reset all entries to SCSIPROBE_EXISTS.  This will
2762		 * force probing to be performed the next time
2763		 * sd_scsi_probe_with_cache is called.
2764		 */
2765		for (i = 0; i < NTARGETS_WIDE; i++) {
2766			cp->cache[i] = SCSIPROBE_EXISTS;
2767		}
2768	}
2769	mutex_exit(&sd_scsi_probe_cache_mutex);
2770}
2771
2772
2773/*
2774 *    Function: sd_scsi_probe_with_cache
2775 *
2776 * Description: This routine implements support for a scsi device probe
2777 *		with cache. The driver maintains a cache of the target
2778 *		responses to scsi probes. If we get no response from a
2779 *		target during a probe inquiry, we remember that, and we
2780 *		avoid additional calls to scsi_probe on non-zero LUNs
2781 *		on the same target until the cache is cleared. By doing
2782 *		so we avoid the 1/4 sec selection timeout for nonzero
2783 *		LUNs. lun0 of a target is always probed.
2784 *
2785 *   Arguments: devp     - Pointer to a scsi_device(9S) structure
2786 *              waitfunc - indicates what the allocator routines should
2787 *			   do when resources are not available. This value
2788 *			   is passed on to scsi_probe() when that routine
2789 *			   is called.
2790 *
2791 * Return Code: SCSIPROBE_NORESP if a NORESP in probe response cache;
2792 *		otherwise the value returned by scsi_probe(9F).
2793 *
2794 *     Context: Kernel thread context
2795 */
2796
2797static int
2798sd_scsi_probe_with_cache(struct scsi_device *devp, int (*waitfn)())
2799{
2800	struct sd_scsi_probe_cache	*cp;
2801	dev_info_t	*pdip = ddi_get_parent(devp->sd_dev);
2802	int		lun, tgt;
2803
2804	lun = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2805	    SCSI_ADDR_PROP_LUN, 0);
2806	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2807	    SCSI_ADDR_PROP_TARGET, -1);
2808
2809	/* Make sure caching enabled and target in range */
2810	if ((tgt < 0) || (tgt >= NTARGETS_WIDE)) {
2811		/* do it the old way (no cache) */
2812		return (scsi_probe(devp, waitfn));
2813	}
2814
2815	mutex_enter(&sd_scsi_probe_cache_mutex);
2816
2817	/* Find the cache for this scsi bus instance */
2818	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2819		if (cp->pdip == pdip) {
2820			break;
2821		}
2822	}
2823
2824	/* If we can't find a cache for this pdip, create one */
2825	if (cp == NULL) {
2826		int i;
2827
2828		cp = kmem_zalloc(sizeof (struct sd_scsi_probe_cache),
2829		    KM_SLEEP);
2830		cp->pdip = pdip;
2831		cp->next = sd_scsi_probe_cache_head;
2832		sd_scsi_probe_cache_head = cp;
2833		for (i = 0; i < NTARGETS_WIDE; i++) {
2834			cp->cache[i] = SCSIPROBE_EXISTS;
2835		}
2836	}
2837
2838	mutex_exit(&sd_scsi_probe_cache_mutex);
2839
2840	/* Recompute the cache for this target if LUN zero */
2841	if (lun == 0) {
2842		cp->cache[tgt] = SCSIPROBE_EXISTS;
2843	}
2844
2845	/* Don't probe if cache remembers a NORESP from a previous LUN. */
2846	if (cp->cache[tgt] != SCSIPROBE_EXISTS) {
2847		return (SCSIPROBE_NORESP);
2848	}
2849
2850	/* Do the actual probe; save & return the result */
2851	return (cp->cache[tgt] = scsi_probe(devp, waitfn));
2852}
2853
2854
2855/*
2856 *    Function: sd_scsi_target_lun_init
2857 *
2858 * Description: Initializes the attached lun chain mutex and head pointer.
2859 *
2860 *     Context: Kernel thread context
2861 */
2862
2863static void
2864sd_scsi_target_lun_init(void)
2865{
2866	mutex_init(&sd_scsi_target_lun_mutex, NULL, MUTEX_DRIVER, NULL);
2867	sd_scsi_target_lun_head = NULL;
2868}
2869
2870
2871/*
2872 *    Function: sd_scsi_target_lun_fini
2873 *
2874 * Description: Frees all resources associated with the attached lun
2875 *              chain
2876 *
2877 *     Context: Kernel thread context
2878 */
2879
2880static void
2881sd_scsi_target_lun_fini(void)
2882{
2883	struct sd_scsi_hba_tgt_lun	*cp;
2884	struct sd_scsi_hba_tgt_lun	*ncp;
2885
2886	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = ncp) {
2887		ncp = cp->next;
2888		kmem_free(cp, sizeof (struct sd_scsi_hba_tgt_lun));
2889	}
2890	sd_scsi_target_lun_head = NULL;
2891	mutex_destroy(&sd_scsi_target_lun_mutex);
2892}
2893
2894
2895/*
2896 *    Function: sd_scsi_get_target_lun_count
2897 *
2898 * Description: This routine will check in the attached lun chain to see
2899 * 		how many luns are attached on the required SCSI controller
2900 * 		and target. Currently, some capabilities like tagged queue
2901 *		are supported per target based by HBA. So all luns in a
2902 *		target have the same capabilities. Based on this assumption,
2903 * 		sd should only set these capabilities once per target. This
2904 *		function is called when sd needs to decide how many luns
2905 *		already attached on a target.
2906 *
2907 *   Arguments: dip	- Pointer to the system's dev_info_t for the SCSI
2908 *			  controller device.
2909 *              target	- The target ID on the controller's SCSI bus.
2910 *
2911 * Return Code: The number of luns attached on the required target and
2912 *		controller.
2913 *		-1 if target ID is not in parallel SCSI scope or the given
2914 * 		dip is not in the chain.
2915 *
2916 *     Context: Kernel thread context
2917 */
2918
2919static int
2920sd_scsi_get_target_lun_count(dev_info_t *dip, int target)
2921{
2922	struct sd_scsi_hba_tgt_lun	*cp;
2923
2924	if ((target < 0) || (target >= NTARGETS_WIDE)) {
2925		return (-1);
2926	}
2927
2928	mutex_enter(&sd_scsi_target_lun_mutex);
2929
2930	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = cp->next) {
2931		if (cp->pdip == dip) {
2932			break;
2933		}
2934	}
2935
2936	mutex_exit(&sd_scsi_target_lun_mutex);
2937
2938	if (cp == NULL) {
2939		return (-1);
2940	}
2941
2942	return (cp->nlun[target]);
2943}
2944
2945
2946/*
2947 *    Function: sd_scsi_update_lun_on_target
2948 *
2949 * Description: This routine is used to update the attached lun chain when a
2950 *		lun is attached or detached on a target.
2951 *
2952 *   Arguments: dip     - Pointer to the system's dev_info_t for the SCSI
2953 *                        controller device.
2954 *              target  - The target ID on the controller's SCSI bus.
2955 *		flag	- Indicate the lun is attached or detached.
2956 *
2957 *     Context: Kernel thread context
2958 */
2959
2960static void
2961sd_scsi_update_lun_on_target(dev_info_t *dip, int target, int flag)
2962{
2963	struct sd_scsi_hba_tgt_lun	*cp;
2964
2965	mutex_enter(&sd_scsi_target_lun_mutex);
2966
2967	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = cp->next) {
2968		if (cp->pdip == dip) {
2969			break;
2970		}
2971	}
2972
2973	if ((cp == NULL) && (flag == SD_SCSI_LUN_ATTACH)) {
2974		cp = kmem_zalloc(sizeof (struct sd_scsi_hba_tgt_lun),
2975		    KM_SLEEP);
2976		cp->pdip = dip;
2977		cp->next = sd_scsi_target_lun_head;
2978		sd_scsi_target_lun_head = cp;
2979	}
2980
2981	mutex_exit(&sd_scsi_target_lun_mutex);
2982
2983	if (cp != NULL) {
2984		if (flag == SD_SCSI_LUN_ATTACH) {
2985			cp->nlun[target] ++;
2986		} else {
2987			cp->nlun[target] --;
2988		}
2989	}
2990}
2991
2992
2993/*
2994 *    Function: sd_spin_up_unit
2995 *
2996 * Description: Issues the following commands to spin-up the device:
2997 *		START STOP UNIT, and INQUIRY.
2998 *
2999 *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
3000 *                      structure for this target.
3001 *
3002 * Return Code: 0 - success
3003 *		EIO - failure
3004 *		EACCES - reservation conflict
3005 *
3006 *     Context: Kernel thread context
3007 */
3008
3009static int
3010sd_spin_up_unit(sd_ssc_t *ssc)
3011{
3012	size_t	resid		= 0;
3013	int	has_conflict	= FALSE;
3014	uchar_t *bufaddr;
3015	int 	status;
3016	struct sd_lun	*un;
3017
3018	ASSERT(ssc != NULL);
3019	un = ssc->ssc_un;
3020	ASSERT(un != NULL);
3021
3022	/*
3023	 * Send a throwaway START UNIT command.
3024	 *
3025	 * If we fail on this, we don't care presently what precisely
3026	 * is wrong.  EMC's arrays will also fail this with a check
3027	 * condition (0x2/0x4/0x3) if the device is "inactive," but
3028	 * we don't want to fail the attach because it may become
3029	 * "active" later.
3030	 */
3031	status = sd_send_scsi_START_STOP_UNIT(ssc, SD_TARGET_START,
3032	    SD_PATH_DIRECT);
3033
3034	if (status != 0) {
3035		if (status == EACCES)
3036			has_conflict = TRUE;
3037		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3038	}
3039
3040	/*
3041	 * Send another INQUIRY command to the target. This is necessary for
3042	 * non-removable media direct access devices because their INQUIRY data
3043	 * may not be fully qualified until they are spun up (perhaps via the
3044	 * START command above).  Note: This seems to be needed for some
3045	 * legacy devices only.) The INQUIRY command should succeed even if a
3046	 * Reservation Conflict is present.
3047	 */
3048	bufaddr = kmem_zalloc(SUN_INQSIZE, KM_SLEEP);
3049
3050	if (sd_send_scsi_INQUIRY(ssc, bufaddr, SUN_INQSIZE, 0, 0, &resid)
3051	    != 0) {
3052		kmem_free(bufaddr, SUN_INQSIZE);
3053		sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
3054		return (EIO);
3055	}
3056
3057	/*
3058	 * If we got enough INQUIRY data, copy it over the old INQUIRY data.
3059	 * Note that this routine does not return a failure here even if the
3060	 * INQUIRY command did not return any data.  This is a legacy behavior.
3061	 */
3062	if ((SUN_INQSIZE - resid) >= SUN_MIN_INQLEN) {
3063		bcopy(bufaddr, SD_INQUIRY(un), SUN_INQSIZE);
3064	}
3065
3066	kmem_free(bufaddr, SUN_INQSIZE);
3067
3068	/* If we hit a reservation conflict above, tell the caller. */
3069	if (has_conflict == TRUE) {
3070		return (EACCES);
3071	}
3072
3073	return (0);
3074}
3075
3076#ifdef _LP64
3077/*
3078 *    Function: sd_enable_descr_sense
3079 *
3080 * Description: This routine attempts to select descriptor sense format
3081 *		using the Control mode page.  Devices that support 64 bit
3082 *		LBAs (for >2TB luns) should also implement descriptor
3083 *		sense data so we will call this function whenever we see
3084 *		a lun larger than 2TB.  If for some reason the device
3085 *		supports 64 bit LBAs but doesn't support descriptor sense
3086 *		presumably the mode select will fail.  Everything will
3087 *		continue to work normally except that we will not get
3088 *		complete sense data for commands that fail with an LBA
3089 *		larger than 32 bits.
3090 *
3091 *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
3092 *                      structure for this target.
3093 *
3094 *     Context: Kernel thread context only
3095 */
3096
3097static void
3098sd_enable_descr_sense(sd_ssc_t *ssc)
3099{
3100	uchar_t			*header;
3101	struct mode_control_scsi3 *ctrl_bufp;
3102	size_t			buflen;
3103	size_t			bd_len;
3104	int			status;
3105	struct sd_lun		*un;
3106
3107	ASSERT(ssc != NULL);
3108	un = ssc->ssc_un;
3109	ASSERT(un != NULL);
3110
3111	/*
3112	 * Read MODE SENSE page 0xA, Control Mode Page
3113	 */
3114	buflen = MODE_HEADER_LENGTH + MODE_BLK_DESC_LENGTH +
3115	    sizeof (struct mode_control_scsi3);
3116	header = kmem_zalloc(buflen, KM_SLEEP);
3117
3118	status = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, header, buflen,
3119	    MODEPAGE_CTRL_MODE, SD_PATH_DIRECT);
3120
3121	if (status != 0) {
3122		SD_ERROR(SD_LOG_COMMON, un,
3123		    "sd_enable_descr_sense: mode sense ctrl page failed\n");
3124		goto eds_exit;
3125	}
3126
3127	/*
3128	 * Determine size of Block Descriptors in order to locate
3129	 * the mode page data. ATAPI devices return 0, SCSI devices
3130	 * should return MODE_BLK_DESC_LENGTH.
3131	 */
3132	bd_len  = ((struct mode_header *)header)->bdesc_length;
3133
3134	/* Clear the mode data length field for MODE SELECT */
3135	((struct mode_header *)header)->length = 0;
3136
3137	ctrl_bufp = (struct mode_control_scsi3 *)
3138	    (header + MODE_HEADER_LENGTH + bd_len);
3139
3140	/*
3141	 * If the page length is smaller than the expected value,
3142	 * the target device doesn't support D_SENSE. Bail out here.
3143	 */
3144	if (ctrl_bufp->mode_page.length <
3145	    sizeof (struct mode_control_scsi3) - 2) {
3146		SD_ERROR(SD_LOG_COMMON, un,
3147		    "sd_enable_descr_sense: enable D_SENSE failed\n");
3148		goto eds_exit;
3149	}
3150
3151	/*
3152	 * Clear PS bit for MODE SELECT
3153	 */
3154	ctrl_bufp->mode_page.ps = 0;
3155
3156	/*
3157	 * Set D_SENSE to enable descriptor sense format.
3158	 */
3159	ctrl_bufp->d_sense = 1;
3160
3161	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3162
3163	/*
3164	 * Use MODE SELECT to commit the change to the D_SENSE bit
3165	 */
3166	status = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0, header,
3167	    buflen, SD_DONTSAVE_PAGE, SD_PATH_DIRECT);
3168
3169	if (status != 0) {
3170		SD_INFO(SD_LOG_COMMON, un,
3171		    "sd_enable_descr_sense: mode select ctrl page failed\n");
3172	} else {
3173		kmem_free(header, buflen);
3174		return;
3175	}
3176
3177eds_exit:
3178	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3179	kmem_free(header, buflen);
3180}
3181
3182/*
3183 *    Function: sd_reenable_dsense_task
3184 *
3185 * Description: Re-enable descriptor sense after device or bus reset
3186 *
3187 *     Context: Executes in a taskq() thread context
3188 */
3189static void
3190sd_reenable_dsense_task(void *arg)
3191{
3192	struct	sd_lun	*un = arg;
3193	sd_ssc_t	*ssc;
3194
3195	ASSERT(un != NULL);
3196
3197	ssc = sd_ssc_init(un);
3198	sd_enable_descr_sense(ssc);
3199	sd_ssc_fini(ssc);
3200}
3201#endif /* _LP64 */
3202
3203/*
3204 *    Function: sd_set_mmc_caps
3205 *
3206 * Description: This routine determines if the device is MMC compliant and if
3207 *		the device supports CDDA via a mode sense of the CDVD
3208 *		capabilities mode page. Also checks if the device is a
3209 *		dvdram writable device.
3210 *
3211 *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
3212 *                      structure for this target.
3213 *
3214 *     Context: Kernel thread context only
3215 */
3216
3217static void
3218sd_set_mmc_caps(sd_ssc_t *ssc)
3219{
3220	struct mode_header_grp2		*sense_mhp;
3221	uchar_t				*sense_page;
3222	caddr_t				buf;
3223	int				bd_len;
3224	int				status;
3225	struct uscsi_cmd		com;
3226	int				rtn;
3227	uchar_t				*out_data_rw, *out_data_hd;
3228	uchar_t				*rqbuf_rw, *rqbuf_hd;
3229	struct sd_lun			*un;
3230
3231	ASSERT(ssc != NULL);
3232	un = ssc->ssc_un;
3233	ASSERT(un != NULL);
3234
3235	/*
3236	 * The flags which will be set in this function are - mmc compliant,
3237	 * dvdram writable device, cdda support. Initialize them to FALSE
3238	 * and if a capability is detected - it will be set to TRUE.
3239	 */
3240	un->un_f_mmc_cap = FALSE;
3241	un->un_f_dvdram_writable_device = FALSE;
3242	un->un_f_cfg_cdda = FALSE;
3243
3244	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
3245	status = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, (uchar_t *)buf,
3246	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, SD_PATH_DIRECT);
3247
3248	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3249
3250	if (status != 0) {
3251		/* command failed; just return */
3252		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3253		return;
3254	}
3255	/*
3256	 * If the mode sense request for the CDROM CAPABILITIES
3257	 * page (0x2A) succeeds the device is assumed to be MMC.
3258	 */
3259	un->un_f_mmc_cap = TRUE;
3260
3261	/* Get to the page data */
3262	sense_mhp = (struct mode_header_grp2 *)buf;
3263	bd_len = (sense_mhp->bdesc_length_hi << 8) |
3264	    sense_mhp->bdesc_length_lo;
3265	if (bd_len > MODE_BLK_DESC_LENGTH) {
3266		/*
3267		 * We did not get back the expected block descriptor
3268		 * length so we cannot determine if the device supports
3269		 * CDDA. However, we still indicate the device is MMC
3270		 * according to the successful response to the page
3271		 * 0x2A mode sense request.
3272		 */
3273		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3274		    "sd_set_mmc_caps: Mode Sense returned "
3275		    "invalid block descriptor length\n");
3276		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3277		return;
3278	}
3279
3280	/* See if read CDDA is supported */
3281	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 +
3282	    bd_len);
3283	un->un_f_cfg_cdda = (sense_page[5] & 0x01) ? TRUE : FALSE;
3284
3285	/* See if writing DVD RAM is supported. */
3286	un->un_f_dvdram_writable_device = (sense_page[3] & 0x20) ? TRUE : FALSE;
3287	if (un->un_f_dvdram_writable_device == TRUE) {
3288		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3289		return;
3290	}
3291
3292	/*
3293	 * If the device presents DVD or CD capabilities in the mode
3294	 * page, we can return here since a RRD will not have
3295	 * these capabilities.
3296	 */
3297	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3298		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3299		return;
3300	}
3301	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3302
3303	/*
3304	 * If un->un_f_dvdram_writable_device is still FALSE,
3305	 * check for a Removable Rigid Disk (RRD).  A RRD
3306	 * device is identified by the features RANDOM_WRITABLE and
3307	 * HARDWARE_DEFECT_MANAGEMENT.
3308	 */
3309	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3310	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3311
3312	rtn = sd_send_scsi_feature_GET_CONFIGURATION(ssc, &com, rqbuf_rw,
3313	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3314	    RANDOM_WRITABLE, SD_PATH_STANDARD);
3315
3316	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3317
3318	if (rtn != 0) {
3319		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3320		kmem_free(rqbuf_rw, SENSE_LENGTH);
3321		return;
3322	}
3323
3324	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3325	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3326
3327	rtn = sd_send_scsi_feature_GET_CONFIGURATION(ssc, &com, rqbuf_hd,
3328	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3329	    HARDWARE_DEFECT_MANAGEMENT, SD_PATH_STANDARD);
3330
3331	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3332
3333	if (rtn == 0) {
3334		/*
3335		 * We have good information, check for random writable
3336		 * and hardware defect features.
3337		 */
3338		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3339		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT)) {
3340			un->un_f_dvdram_writable_device = TRUE;
3341		}
3342	}
3343
3344	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3345	kmem_free(rqbuf_rw, SENSE_LENGTH);
3346	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3347	kmem_free(rqbuf_hd, SENSE_LENGTH);
3348}
3349
3350/*
3351 *    Function: sd_check_for_writable_cd
3352 *
3353 * Description: This routine determines if the media in the device is
3354 *		writable or not. It uses the get configuration command (0x46)
3355 *		to determine if the media is writable
3356 *
3357 *   Arguments: un - driver soft state (unit) structure
3358 *              path_flag - SD_PATH_DIRECT to use the USCSI "direct"
3359 *                           chain and the normal command waitq, or
3360 *                           SD_PATH_DIRECT_PRIORITY to use the USCSI
3361 *                           "direct" chain and bypass the normal command
3362 *                           waitq.
3363 *
3364 *     Context: Never called at interrupt context.
3365 */
3366
3367static void
3368sd_check_for_writable_cd(sd_ssc_t *ssc, int path_flag)
3369{
3370	struct uscsi_cmd		com;
3371	uchar_t				*out_data;
3372	uchar_t				*rqbuf;
3373	int				rtn;
3374	uchar_t				*out_data_rw, *out_data_hd;
3375	uchar_t				*rqbuf_rw, *rqbuf_hd;
3376	struct mode_header_grp2		*sense_mhp;
3377	uchar_t				*sense_page;
3378	caddr_t				buf;
3379	int				bd_len;
3380	int				status;
3381	struct sd_lun			*un;
3382
3383	ASSERT(ssc != NULL);
3384	un = ssc->ssc_un;
3385	ASSERT(un != NULL);
3386	ASSERT(mutex_owned(SD_MUTEX(un)));
3387
3388	/*
3389	 * Initialize the writable media to false, if configuration info.
3390	 * tells us otherwise then only we will set it.
3391	 */
3392	un->un_f_mmc_writable_media = FALSE;
3393	mutex_exit(SD_MUTEX(un));
3394
3395	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
3396	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3397
3398	rtn = sd_send_scsi_GET_CONFIGURATION(ssc, &com, rqbuf, SENSE_LENGTH,
3399	    out_data, SD_PROFILE_HEADER_LEN, path_flag);
3400
3401	if (rtn != 0)
3402		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3403
3404	mutex_enter(SD_MUTEX(un));
3405	if (rtn == 0) {
3406		/*
3407		 * We have good information, check for writable DVD.
3408		 */
3409		if ((out_data[6] == 0) && (out_data[7] == 0x12)) {
3410			un->un_f_mmc_writable_media = TRUE;
3411			kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3412			kmem_free(rqbuf, SENSE_LENGTH);
3413			return;
3414		}
3415	}
3416
3417	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3418	kmem_free(rqbuf, SENSE_LENGTH);
3419
3420	/*
3421	 * Determine if this is a RRD type device.
3422	 */
3423	mutex_exit(SD_MUTEX(un));
3424	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
3425	status = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, (uchar_t *)buf,
3426	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, path_flag);
3427
3428	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3429
3430	mutex_enter(SD_MUTEX(un));
3431	if (status != 0) {
3432		/* command failed; just return */
3433		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3434		return;
3435	}
3436
3437	/* Get to the page data */
3438	sense_mhp = (struct mode_header_grp2 *)buf;
3439	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
3440	if (bd_len > MODE_BLK_DESC_LENGTH) {
3441		/*
3442		 * We did not get back the expected block descriptor length so
3443		 * we cannot check the mode page.
3444		 */
3445		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3446		    "sd_check_for_writable_cd: Mode Sense returned "
3447		    "invalid block descriptor length\n");
3448		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3449		return;
3450	}
3451
3452	/*
3453	 * If the device presents DVD or CD capabilities in the mode
3454	 * page, we can return here since a RRD device will not have
3455	 * these capabilities.
3456	 */
3457	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 + bd_len);
3458	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3459		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3460		return;
3461	}
3462	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3463
3464	/*
3465	 * If un->un_f_mmc_writable_media is still FALSE,
3466	 * check for RRD type media.  A RRD device is identified
3467	 * by the features RANDOM_WRITABLE and HARDWARE_DEFECT_MANAGEMENT.
3468	 */
3469	mutex_exit(SD_MUTEX(un));
3470	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3471	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3472
3473	rtn = sd_send_scsi_feature_GET_CONFIGURATION(ssc, &com, rqbuf_rw,
3474	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3475	    RANDOM_WRITABLE, path_flag);
3476
3477	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3478	if (rtn != 0) {
3479		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3480		kmem_free(rqbuf_rw, SENSE_LENGTH);
3481		mutex_enter(SD_MUTEX(un));
3482		return;
3483	}
3484
3485	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3486	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3487
3488	rtn = sd_send_scsi_feature_GET_CONFIGURATION(ssc, &com, rqbuf_hd,
3489	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3490	    HARDWARE_DEFECT_MANAGEMENT, path_flag);
3491
3492	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3493	mutex_enter(SD_MUTEX(un));
3494	if (rtn == 0) {
3495		/*
3496		 * We have good information, check for random writable
3497		 * and hardware defect features as current.
3498		 */
3499		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3500		    (out_data_rw[10] & 0x1) &&
3501		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT) &&
3502		    (out_data_hd[10] & 0x1)) {
3503			un->un_f_mmc_writable_media = TRUE;
3504		}
3505	}
3506
3507	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3508	kmem_free(rqbuf_rw, SENSE_LENGTH);
3509	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3510	kmem_free(rqbuf_hd, SENSE_LENGTH);
3511}
3512
3513/*
3514 *    Function: sd_read_unit_properties
3515 *
3516 * Description: The following implements a property lookup mechanism.
3517 *		Properties for particular disks (keyed on vendor, model
3518 *		and rev numbers) are sought in the sd.conf file via
3519 *		sd_process_sdconf_file(), and if not found there, are
3520 *		looked for in a list hardcoded in this driver via
3521 *		sd_process_sdconf_table() Once located the properties
3522 *		are used to update the driver unit structure.
3523 *
3524 *   Arguments: un - driver soft state (unit) structure
3525 */
3526
3527static void
3528sd_read_unit_properties(struct sd_lun *un)
3529{
3530	/*
3531	 * sd_process_sdconf_file returns SD_FAILURE if it cannot find
3532	 * the "sd-config-list" property (from the sd.conf file) or if
3533	 * there was not a match for the inquiry vid/pid. If this event
3534	 * occurs the static driver configuration table is searched for
3535	 * a match.
3536	 */
3537	ASSERT(un != NULL);
3538	if (sd_process_sdconf_file(un) == SD_FAILURE) {
3539		sd_process_sdconf_table(un);
3540	}
3541
3542	/* check for LSI device */
3543	sd_is_lsi(un);
3544
3545
3546}
3547
3548
3549/*
3550 *    Function: sd_process_sdconf_file
3551 *
3552 * Description: Use ddi_prop_lookup(9F) to obtain the properties from the
3553 *		driver's config file (ie, sd.conf) and update the driver
3554 *		soft state structure accordingly.
3555 *
3556 *   Arguments: un - driver soft state (unit) structure
3557 *
3558 * Return Code: SD_SUCCESS - The properties were successfully set according
3559 *			     to the driver configuration file.
3560 *		SD_FAILURE - The driver config list was not obtained or
3561 *			     there was no vid/pid match. This indicates that
3562 *			     the static config table should be used.
3563 *
3564 * The config file has a property, "sd-config-list". Currently we support
3565 * two kinds of formats. For both formats, the value of this property
3566 * is a list of duplets:
3567 *
3568 *  sd-config-list=
3569 *	<duplet>,
3570 *	[,<duplet>]*;
3571 *
3572 * For the improved format, where
3573 *
3574 *     <duplet>:= "<vid+pid>","<tunable-list>"
3575 *
3576 * and
3577 *
3578 *     <tunable-list>:=   <tunable> [, <tunable> ]*;
3579 *     <tunable> =        <name> : <value>
3580 *
3581 * The <vid+pid> is the string that is returned by the target device on a
3582 * SCSI inquiry command, the <tunable-list> contains one or more tunables
3583 * to apply to all target devices with the specified <vid+pid>.
3584 *
3585 * Each <tunable> is a "<name> : <value>" pair.
3586 *
3587 * For the old format, the structure of each duplet is as follows:
3588 *
3589 *  <duplet>:= "<vid+pid>","<data-property-name_list>"
3590 *
3591 * The first entry of the duplet is the device ID string (the concatenated
3592 * vid & pid; not to be confused with a device_id).  This is defined in
3593 * the same way as in the sd_disk_table.
3594 *
3595 * The second part of the duplet is a string that identifies a
3596 * data-property-name-list. The data-property-name-list is defined as
3597 * follows:
3598 *
3599 *  <data-property-name-list>:=<data-property-name> [<data-property-name>]
3600 *
3601 * The syntax of <data-property-name> depends on the <version> field.
3602 *
3603 * If version = SD_CONF_VERSION_1 we have the following syntax:
3604 *
3605 * 	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
3606 *
3607 * where the prop0 value will be used to set prop0 if bit0 set in the
3608 * flags, prop1 if bit1 set, etc. and N = SD_CONF_MAX_ITEMS -1
3609 *
3610 */
3611
3612static int
3613sd_process_sdconf_file(struct sd_lun *un)
3614{
3615	char	**config_list = NULL;
3616	uint_t	nelements;
3617	char	*vidptr;
3618	int	vidlen;
3619	char	*dnlist_ptr;
3620	char	*dataname_ptr;
3621	char	*dataname_lasts;
3622	int	*data_list = NULL;
3623	uint_t	data_list_len;
3624	int	rval = SD_FAILURE;
3625	int	i;
3626
3627	ASSERT(un != NULL);
3628
3629	/* Obtain the configuration list associated with the .conf file */
3630	if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, SD_DEVINFO(un),
3631	    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, sd_config_list,
3632	    &config_list, &nelements) != DDI_PROP_SUCCESS) {
3633		return (SD_FAILURE);
3634	}
3635
3636	/*
3637	 * Compare vids in each duplet to the inquiry vid - if a match is
3638	 * made, get the data value and update the soft state structure
3639	 * accordingly.
3640	 *
3641	 * Each duplet should show as a pair of strings, return SD_FAILURE
3642	 * otherwise.
3643	 */
3644	if (nelements & 1) {
3645		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3646		    "sd-config-list should show as pairs of strings.\n");
3647		if (config_list)
3648			ddi_prop_free(config_list);
3649		return (SD_FAILURE);
3650	}
3651
3652	for (i = 0; i < nelements; i += 2) {
3653		/*
3654		 * Note: The assumption here is that each vid entry is on
3655		 * a unique line from its associated duplet.
3656		 */
3657		vidptr = config_list[i];
3658		vidlen = (int)strlen(vidptr);
3659		if ((vidlen == 0) ||
3660		    (sd_sdconf_id_match(un, vidptr, vidlen) != SD_SUCCESS)) {
3661			continue;
3662		}
3663
3664		/*
3665		 * dnlist contains 1 or more blank separated
3666		 * data-property-name entries
3667		 */
3668		dnlist_ptr = config_list[i + 1];
3669
3670		if (strchr(dnlist_ptr, ':') != NULL) {
3671			/*
3672			 * Decode the improved format sd-config-list.
3673			 */
3674			sd_nvpair_str_decode(un, dnlist_ptr);
3675		} else {
3676			/*
3677			 * The old format sd-config-list, loop through all
3678			 * data-property-name entries in the
3679			 * data-property-name-list
3680			 * setting the properties for each.
3681			 */
3682			for (dataname_ptr = sd_strtok_r(dnlist_ptr, " \t",
3683			    &dataname_lasts); dataname_ptr != NULL;
3684			    dataname_ptr = sd_strtok_r(NULL, " \t",
3685			    &dataname_lasts)) {
3686				int version;
3687
3688				SD_INFO(SD_LOG_ATTACH_DETACH, un,
3689				    "sd_process_sdconf_file: disk:%s, "
3690				    "data:%s\n", vidptr, dataname_ptr);
3691
3692				/* Get the data list */
3693				if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY,
3694				    SD_DEVINFO(un), 0, dataname_ptr, &data_list,
3695				    &data_list_len) != DDI_PROP_SUCCESS) {
3696					SD_INFO(SD_LOG_ATTACH_DETACH, un,
3697					    "sd_process_sdconf_file: data "
3698					    "property (%s) has no value\n",
3699					    dataname_ptr);
3700					continue;
3701				}
3702
3703				version = data_list[0];
3704
3705				if (version == SD_CONF_VERSION_1) {
3706					sd_tunables values;
3707
3708					/* Set the properties */
3709					if (sd_chk_vers1_data(un, data_list[1],
3710					    &data_list[2], data_list_len,
3711					    dataname_ptr) == SD_SUCCESS) {
3712						sd_get_tunables_from_conf(un,
3713						    data_list[1], &data_list[2],
3714						    &values);
3715						sd_set_vers1_properties(un,
3716						    data_list[1], &values);
3717						rval = SD_SUCCESS;
3718					} else {
3719						rval = SD_FAILURE;
3720					}
3721				} else {
3722					scsi_log(SD_DEVINFO(un), sd_label,
3723					    CE_WARN, "data property %s version "
3724					    "0x%x is invalid.",
3725					    dataname_ptr, version);
3726					rval = SD_FAILURE;
3727				}
3728				if (data_list)
3729					ddi_prop_free(data_list);
3730			}
3731		}
3732	}
3733
3734	/* free up the memory allocated by ddi_prop_lookup_string_array(). */
3735	if (config_list) {
3736		ddi_prop_free(config_list);
3737	}
3738
3739	return (rval);
3740}
3741
3742/*
3743 *    Function: sd_nvpair_str_decode()
3744 *
3745 * Description: Parse the improved format sd-config-list to get
3746 *    each entry of tunable, which includes a name-value pair.
3747 *    Then call sd_set_properties() to set the property.
3748 *
3749 *   Arguments: un - driver soft state (unit) structure
3750 *    nvpair_str - the tunable list
3751 */
3752static void
3753sd_nvpair_str_decode(struct sd_lun *un, char *nvpair_str)
3754{
3755	char	*nv, *name, *value, *token;
3756	char	*nv_lasts, *v_lasts, *x_lasts;
3757
3758	for (nv = sd_strtok_r(nvpair_str, ",", &nv_lasts); nv != NULL;
3759	    nv = sd_strtok_r(NULL, ",", &nv_lasts)) {
3760		token = sd_strtok_r(nv, ":", &v_lasts);
3761		name  = sd_strtok_r(token, " \t", &x_lasts);
3762		token = sd_strtok_r(NULL, ":", &v_lasts);
3763		value = sd_strtok_r(token, " \t", &x_lasts);
3764		if (name == NULL || value == NULL) {
3765			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3766			    "sd_nvpair_str_decode: "
3767			    "name or value is not valid!\n");
3768		} else {
3769			sd_set_properties(un, name, value);
3770		}
3771	}
3772}
3773
3774/*
3775 *    Function: sd_strtok_r()
3776 *
3777 * Description: This function uses strpbrk and strspn to break
3778 *    string into tokens on sequentially subsequent calls. Return
3779 *    NULL when no non-separator characters remain. The first
3780 *    argument is NULL for subsequent calls.
3781 */
3782static char *
3783sd_strtok_r(char *string, const char *sepset, char **lasts)
3784{
3785	char	*q, *r;
3786
3787	/* First or subsequent call */
3788	if (string == NULL)
3789		string = *lasts;
3790
3791	if (string == NULL)
3792		return (NULL);
3793
3794	/* Skip leading separators */
3795	q = string + strspn(string, sepset);
3796
3797	if (*q == '\0')
3798		return (NULL);
3799
3800	if ((r = strpbrk(q, sepset)) == NULL)
3801		*lasts = NULL;
3802	else {
3803		*r = '\0';
3804		*lasts = r + 1;
3805	}
3806	return (q);
3807}
3808
3809/*
3810 *    Function: sd_set_properties()
3811 *
3812 * Description: Set device properties based on the improved
3813 *    format sd-config-list.
3814 *
3815 *   Arguments: un - driver soft state (unit) structure
3816 *    name  - supported tunable name
3817 *    value - tunable value
3818 */
3819static void
3820sd_set_properties(struct sd_lun *un, char *name, char *value)
3821{
3822	char	*endptr = NULL;
3823	long	val = 0;
3824
3825	if (strcasecmp(name, "cache-nonvolatile") == 0) {
3826		if (strcasecmp(value, "true") == 0) {
3827			un->un_f_suppress_cache_flush = TRUE;
3828		} else if (strcasecmp(value, "false") == 0) {
3829			un->un_f_suppress_cache_flush = FALSE;
3830		} else {
3831			goto value_invalid;
3832		}
3833		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3834		    "suppress_cache_flush flag set to %d\n",
3835		    un->un_f_suppress_cache_flush);
3836		return;
3837	}
3838
3839	if (strcasecmp(name, "controller-type") == 0) {
3840		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
3841			un->un_ctype = val;
3842		} else {
3843			goto value_invalid;
3844		}
3845		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3846		    "ctype set to %d\n", un->un_ctype);
3847		return;
3848	}
3849
3850	if (strcasecmp(name, "delay-busy") == 0) {
3851		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
3852			un->un_busy_timeout = drv_usectohz(val / 1000);
3853		} else {
3854			goto value_invalid;
3855		}
3856		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3857		    "busy_timeout set to %d\n", un->un_busy_timeout);
3858		return;
3859	}
3860
3861	if (strcasecmp(name, "disksort") == 0) {
3862		if (strcasecmp(value, "true") == 0) {
3863			un->un_f_disksort_disabled = FALSE;
3864		} else if (strcasecmp(value, "false") == 0) {
3865			un->un_f_disksort_disabled = TRUE;
3866		} else {
3867			goto value_invalid;
3868		}
3869		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3870		    "disksort disabled flag set to %d\n",
3871		    un->un_f_disksort_disabled);
3872		return;
3873	}
3874
3875	if (strcasecmp(name, "timeout-releasereservation") == 0) {
3876		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
3877			un->un_reserve_release_time = val;
3878		} else {
3879			goto value_invalid;
3880		}
3881		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3882		    "reservation release timeout set to %d\n",
3883		    un->un_reserve_release_time);
3884		return;
3885	}
3886
3887	if (strcasecmp(name, "reset-lun") == 0) {
3888		if (strcasecmp(value, "true") == 0) {
3889			un->un_f_lun_reset_enabled = TRUE;
3890		} else if (strcasecmp(value, "false") == 0) {
3891			un->un_f_lun_reset_enabled = FALSE;
3892		} else {
3893			goto value_invalid;
3894		}
3895		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3896		    "lun reset enabled flag set to %d\n",
3897		    un->un_f_lun_reset_enabled);
3898		return;
3899	}
3900
3901	if (strcasecmp(name, "retries-busy") == 0) {
3902		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
3903			un->un_busy_retry_count = val;
3904		} else {
3905			goto value_invalid;
3906		}
3907		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3908		    "busy retry count set to %d\n", un->un_busy_retry_count);
3909		return;
3910	}
3911
3912	if (strcasecmp(name, "retries-timeout") == 0) {
3913		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
3914			un->un_retry_count = val;
3915		} else {
3916			goto value_invalid;
3917		}
3918		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3919		    "timeout retry count set to %d\n", un->un_retry_count);
3920		return;
3921	}
3922
3923	if (strcasecmp(name, "retries-notready") == 0) {
3924		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
3925			un->un_notready_retry_count = val;
3926		} else {
3927			goto value_invalid;
3928		}
3929		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3930		    "notready retry count set to %d\n",
3931		    un->un_notready_retry_count);
3932		return;
3933	}
3934
3935	if (strcasecmp(name, "retries-reset") == 0) {
3936		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
3937			un->un_reset_retry_count = val;
3938		} else {
3939			goto value_invalid;
3940		}
3941		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3942		    "reset retry count set to %d\n",
3943		    un->un_reset_retry_count);
3944		return;
3945	}
3946
3947	if (strcasecmp(name, "throttle-max") == 0) {
3948		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
3949			un->un_saved_throttle = un->un_throttle = val;
3950		} else {
3951			goto value_invalid;
3952		}
3953		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3954		    "throttle set to %d\n", un->un_throttle);
3955	}
3956
3957	if (strcasecmp(name, "throttle-min") == 0) {
3958		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
3959			un->un_min_throttle = val;
3960		} else {
3961			goto value_invalid;
3962		}
3963		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3964		    "min throttle set to %d\n", un->un_min_throttle);
3965	}
3966
3967	/*
3968	 * Validate the throttle values.
3969	 * If any of the numbers are invalid, set everything to defaults.
3970	 */
3971	if ((un->un_throttle < SD_LOWEST_VALID_THROTTLE) ||
3972	    (un->un_min_throttle < SD_LOWEST_VALID_THROTTLE) ||
3973	    (un->un_min_throttle > un->un_throttle)) {
3974		un->un_saved_throttle = un->un_throttle = sd_max_throttle;
3975		un->un_min_throttle = sd_min_throttle;
3976	}
3977	return;
3978
3979value_invalid:
3980	SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
3981	    "value of prop %s is invalid\n", name);
3982}
3983
3984/*
3985 *    Function: sd_get_tunables_from_conf()
3986 *
3987 *
3988 *    This function reads the data list from the sd.conf file and pulls
3989 *    the values that can have numeric values as arguments and places
3990 *    the values in the appropriate sd_tunables member.
3991 *    Since the order of the data list members varies across platforms
3992 *    This function reads them from the data list in a platform specific
3993 *    order and places them into the correct sd_tunable member that is
3994 *    consistent across all platforms.
3995 */
3996static void
3997sd_get_tunables_from_conf(struct sd_lun *un, int flags, int *data_list,
3998    sd_tunables *values)
3999{
4000	int i;
4001	int mask;
4002
4003	bzero(values, sizeof (sd_tunables));
4004
4005	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
4006
4007		mask = 1 << i;
4008		if (mask > flags) {
4009			break;
4010		}
4011
4012		switch (mask & flags) {
4013		case 0:	/* This mask bit not set in flags */
4014			continue;
4015		case SD_CONF_BSET_THROTTLE:
4016			values->sdt_throttle = data_list[i];
4017			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4018			    "sd_get_tunables_from_conf: throttle = %d\n",
4019			    values->sdt_throttle);
4020			break;
4021		case SD_CONF_BSET_CTYPE:
4022			values->sdt_ctype = data_list[i];
4023			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4024			    "sd_get_tunables_from_conf: ctype = %d\n",
4025			    values->sdt_ctype);
4026			break;
4027		case SD_CONF_BSET_NRR_COUNT:
4028			values->sdt_not_rdy_retries = data_list[i];
4029			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4030			    "sd_get_tunables_from_conf: not_rdy_retries = %d\n",
4031			    values->sdt_not_rdy_retries);
4032			break;
4033		case SD_CONF_BSET_BSY_RETRY_COUNT:
4034			values->sdt_busy_retries = data_list[i];
4035			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4036			    "sd_get_tunables_from_conf: busy_retries = %d\n",
4037			    values->sdt_busy_retries);
4038			break;
4039		case SD_CONF_BSET_RST_RETRIES:
4040			values->sdt_reset_retries = data_list[i];
4041			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4042			    "sd_get_tunables_from_conf: reset_retries = %d\n",
4043			    values->sdt_reset_retries);
4044			break;
4045		case SD_CONF_BSET_RSV_REL_TIME:
4046			values->sdt_reserv_rel_time = data_list[i];
4047			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4048			    "sd_get_tunables_from_conf: reserv_rel_time = %d\n",
4049			    values->sdt_reserv_rel_time);
4050			break;
4051		case SD_CONF_BSET_MIN_THROTTLE:
4052			values->sdt_min_throttle = data_list[i];
4053			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4054			    "sd_get_tunables_from_conf: min_throttle = %d\n",
4055			    values->sdt_min_throttle);
4056			break;
4057		case SD_CONF_BSET_DISKSORT_DISABLED:
4058			values->sdt_disk_sort_dis = data_list[i];
4059			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4060			    "sd_get_tunables_from_conf: disk_sort_dis = %d\n",
4061			    values->sdt_disk_sort_dis);
4062			break;
4063		case SD_CONF_BSET_LUN_RESET_ENABLED:
4064			values->sdt_lun_reset_enable = data_list[i];
4065			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4066			    "sd_get_tunables_from_conf: lun_reset_enable = %d"
4067			    "\n", values->sdt_lun_reset_enable);
4068			break;
4069		case SD_CONF_BSET_CACHE_IS_NV:
4070			values->sdt_suppress_cache_flush = data_list[i];
4071			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4072			    "sd_get_tunables_from_conf: \
4073			    suppress_cache_flush = %d"
4074			    "\n", values->sdt_suppress_cache_flush);
4075			break;
4076		}
4077	}
4078}
4079
4080/*
4081 *    Function: sd_process_sdconf_table
4082 *
4083 * Description: Search the static configuration table for a match on the
4084 *		inquiry vid/pid and update the driver soft state structure
4085 *		according to the table property values for the device.
4086 *
4087 *		The form of a configuration table entry is:
4088 *		  <vid+pid>,<flags>,<property-data>
4089 *		  "SEAGATE ST42400N",1,0x40000,
4090 *		  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1;
4091 *
4092 *   Arguments: un - driver soft state (unit) structure
4093 */
4094
4095static void
4096sd_process_sdconf_table(struct sd_lun *un)
4097{
4098	char	*id = NULL;
4099	int	table_index;
4100	int	idlen;
4101
4102	ASSERT(un != NULL);
4103	for (table_index = 0; table_index < sd_disk_table_size;
4104	    table_index++) {
4105		id = sd_disk_table[table_index].device_id;
4106		idlen = strlen(id);
4107		if (idlen == 0) {
4108			continue;
4109		}
4110
4111		/*
4112		 * The static configuration table currently does not
4113		 * implement version 10 properties. Additionally,
4114		 * multiple data-property-name entries are not
4115		 * implemented in the static configuration table.
4116		 */
4117		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
4118			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4119			    "sd_process_sdconf_table: disk %s\n", id);
4120			sd_set_vers1_properties(un,
4121			    sd_disk_table[table_index].flags,
4122			    sd_disk_table[table_index].properties);
4123			break;
4124		}
4125	}
4126}
4127
4128
4129/*
4130 *    Function: sd_sdconf_id_match
4131 *
4132 * Description: This local function implements a case sensitive vid/pid
4133 *		comparison as well as the boundary cases of wild card and
4134 *		multiple blanks.
4135 *
4136 *		Note: An implicit assumption made here is that the scsi
4137 *		inquiry structure will always keep the vid, pid and
4138 *		revision strings in consecutive sequence, so they can be
4139 *		read as a single string. If this assumption is not the
4140 *		case, a separate string, to be used for the check, needs
4141 *		to be built with these strings concatenated.
4142 *
4143 *   Arguments: un - driver soft state (unit) structure
4144 *		id - table or config file vid/pid
4145 *		idlen  - length of the vid/pid (bytes)
4146 *
4147 * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
4148 *		SD_FAILURE - Indicates no match with the inquiry vid/pid
4149 */
4150
4151static int
4152sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen)
4153{
4154	struct scsi_inquiry	*sd_inq;
4155	int 			rval = SD_SUCCESS;
4156
4157	ASSERT(un != NULL);
4158	sd_inq = un->un_sd->sd_inq;
4159	ASSERT(id != NULL);
4160
4161	/*
4162	 * We use the inq_vid as a pointer to a buffer containing the
4163	 * vid and pid and use the entire vid/pid length of the table
4164	 * entry for the comparison. This works because the inq_pid
4165	 * data member follows inq_vid in the scsi_inquiry structure.
4166	 */
4167	if (strncasecmp(sd_inq->inq_vid, id, idlen) != 0) {
4168		/*
4169		 * The user id string is compared to the inquiry vid/pid
4170		 * using a case insensitive comparison and ignoring
4171		 * multiple spaces.
4172		 */
4173		rval = sd_blank_cmp(un, id, idlen);
4174		if (rval != SD_SUCCESS) {
4175			/*
4176			 * User id strings that start and end with a "*"
4177			 * are a special case. These do not have a
4178			 * specific vendor, and the product string can
4179			 * appear anywhere in the 16 byte PID portion of
4180			 * the inquiry data. This is a simple strstr()
4181			 * type search for the user id in the inquiry data.
4182			 */
4183			if ((id[0] == '*') && (id[idlen - 1] == '*')) {
4184				char	*pidptr = &id[1];
4185				int	i;
4186				int	j;
4187				int	pidstrlen = idlen - 2;
4188				j = sizeof (SD_INQUIRY(un)->inq_pid) -
4189				    pidstrlen;
4190
4191				if (j < 0) {
4192					return (SD_FAILURE);
4193				}
4194				for (i = 0; i < j; i++) {
4195					if (bcmp(&SD_INQUIRY(un)->inq_pid[i],
4196					    pidptr, pidstrlen) == 0) {
4197						rval = SD_SUCCESS;
4198						break;
4199					}
4200				}
4201			}
4202		}
4203	}
4204	return (rval);
4205}
4206
4207
4208/*
4209 *    Function: sd_blank_cmp
4210 *
4211 * Description: If the id string starts and ends with a space, treat
4212 *		multiple consecutive spaces as equivalent to a single
4213 *		space. For example, this causes a sd_disk_table entry
4214 *		of " NEC CDROM " to match a device's id string of
4215 *		"NEC       CDROM".
4216 *
4217 *		Note: The success exit condition for this routine is if
4218 *		the pointer to the table entry is '\0' and the cnt of
4219 *		the inquiry length is zero. This will happen if the inquiry
4220 *		string returned by the device is padded with spaces to be
4221 *		exactly 24 bytes in length (8 byte vid + 16 byte pid). The
4222 *		SCSI spec states that the inquiry string is to be padded with
4223 *		spaces.
4224 *
4225 *   Arguments: un - driver soft state (unit) structure
4226 *		id - table or config file vid/pid
4227 *		idlen  - length of the vid/pid (bytes)
4228 *
4229 * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
4230 *		SD_FAILURE - Indicates no match with the inquiry vid/pid
4231 */
4232
4233static int
4234sd_blank_cmp(struct sd_lun *un, char *id, int idlen)
4235{
4236	char		*p1;
4237	char		*p2;
4238	int		cnt;
4239	cnt = sizeof (SD_INQUIRY(un)->inq_vid) +
4240	    sizeof (SD_INQUIRY(un)->inq_pid);
4241
4242	ASSERT(un != NULL);
4243	p2 = un->un_sd->sd_inq->inq_vid;
4244	ASSERT(id != NULL);
4245	p1 = id;
4246
4247	if ((id[0] == ' ') && (id[idlen - 1] == ' ')) {
4248		/*
4249		 * Note: string p1 is terminated by a NUL but string p2
4250		 * isn't.  The end of p2 is determined by cnt.
4251		 */
4252		for (;;) {
4253			/* skip over any extra blanks in both strings */
4254			while ((*p1 != '\0') && (*p1 == ' ')) {
4255				p1++;
4256			}
4257			while ((cnt != 0) && (*p2 == ' ')) {
4258				p2++;
4259				cnt--;
4260			}
4261
4262			/* compare the two strings */
4263			if ((cnt == 0) ||
4264			    (SD_TOUPPER(*p1) != SD_TOUPPER(*p2))) {
4265				break;
4266			}
4267			while ((cnt > 0) &&
4268			    (SD_TOUPPER(*p1) == SD_TOUPPER(*p2))) {
4269				p1++;
4270				p2++;
4271				cnt--;
4272			}
4273		}
4274	}
4275
4276	/* return SD_SUCCESS if both strings match */
4277	return (((*p1 == '\0') && (cnt == 0)) ? SD_SUCCESS : SD_FAILURE);
4278}
4279
4280
4281/*
4282 *    Function: sd_chk_vers1_data
4283 *
4284 * Description: Verify the version 1 device properties provided by the
4285 *		user via the configuration file
4286 *
4287 *   Arguments: un	     - driver soft state (unit) structure
4288 *		flags	     - integer mask indicating properties to be set
4289 *		prop_list    - integer list of property values
4290 *		list_len     - number of the elements
4291 *
4292 * Return Code: SD_SUCCESS - Indicates the user provided data is valid
4293 *		SD_FAILURE - Indicates the user provided data is invalid
4294 */
4295
4296static int
4297sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
4298    int list_len, char *dataname_ptr)
4299{
4300	int i;
4301	int mask = 1;
4302	int index = 0;
4303
4304	ASSERT(un != NULL);
4305
4306	/* Check for a NULL property name and list */
4307	if (dataname_ptr == NULL) {
4308		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4309		    "sd_chk_vers1_data: NULL data property name.");
4310		return (SD_FAILURE);
4311	}
4312	if (prop_list == NULL) {
4313		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4314		    "sd_chk_vers1_data: %s NULL data property list.",
4315		    dataname_ptr);
4316		return (SD_FAILURE);
4317	}
4318
4319	/* Display a warning if undefined bits are set in the flags */
4320	if (flags & ~SD_CONF_BIT_MASK) {
4321		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4322		    "sd_chk_vers1_data: invalid bits 0x%x in data list %s. "
4323		    "Properties not set.",
4324		    (flags & ~SD_CONF_BIT_MASK), dataname_ptr);
4325		return (SD_FAILURE);
4326	}
4327
4328	/*
4329	 * Verify the length of the list by identifying the highest bit set
4330	 * in the flags and validating that the property list has a length
4331	 * up to the index of this bit.
4332	 */
4333	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
4334		if (flags & mask) {
4335			index++;
4336		}
4337		mask = 1 << i;
4338	}
4339	if (list_len < (index + 2)) {
4340		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4341		    "sd_chk_vers1_data: "
4342		    "Data property list %s size is incorrect. "
4343		    "Properties not set.", dataname_ptr);
4344		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT, "Size expected: "
4345		    "version + 1 flagword + %d properties", SD_CONF_MAX_ITEMS);
4346		return (SD_FAILURE);
4347	}
4348	return (SD_SUCCESS);
4349}
4350
4351
4352/*
4353 *    Function: sd_set_vers1_properties
4354 *
4355 * Description: Set version 1 device properties based on a property list
4356 *		retrieved from the driver configuration file or static
4357 *		configuration table. Version 1 properties have the format:
4358 *
4359 * 	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
4360 *
4361 *		where the prop0 value will be used to set prop0 if bit0
4362 *		is set in the flags
4363 *
4364 *   Arguments: un	     - driver soft state (unit) structure
4365 *		flags	     - integer mask indicating properties to be set
4366 *		prop_list    - integer list of property values
4367 */
4368
4369static void
4370sd_set_vers1_properties(struct sd_lun *un, int flags, sd_tunables *prop_list)
4371{
4372	ASSERT(un != NULL);
4373
4374	/*
4375	 * Set the flag to indicate cache is to be disabled. An attempt
4376	 * to disable the cache via sd_cache_control() will be made
4377	 * later during attach once the basic initialization is complete.
4378	 */
4379	if (flags & SD_CONF_BSET_NOCACHE) {
4380		un->un_f_opt_disable_cache = TRUE;
4381		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4382		    "sd_set_vers1_properties: caching disabled flag set\n");
4383	}
4384
4385	/* CD-specific configuration parameters */
4386	if (flags & SD_CONF_BSET_PLAYMSF_BCD) {
4387		un->un_f_cfg_playmsf_bcd = TRUE;
4388		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4389		    "sd_set_vers1_properties: playmsf_bcd set\n");
4390	}
4391	if (flags & SD_CONF_BSET_READSUB_BCD) {
4392		un->un_f_cfg_readsub_bcd = TRUE;
4393		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4394		    "sd_set_vers1_properties: readsub_bcd set\n");
4395	}
4396	if (flags & SD_CONF_BSET_READ_TOC_TRK_BCD) {
4397		un->un_f_cfg_read_toc_trk_bcd = TRUE;
4398		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4399		    "sd_set_vers1_properties: read_toc_trk_bcd set\n");
4400	}
4401	if (flags & SD_CONF_BSET_READ_TOC_ADDR_BCD) {
4402		un->un_f_cfg_read_toc_addr_bcd = TRUE;
4403		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4404		    "sd_set_vers1_properties: read_toc_addr_bcd set\n");
4405	}
4406	if (flags & SD_CONF_BSET_NO_READ_HEADER) {
4407		un->un_f_cfg_no_read_header = TRUE;
4408		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4409		    "sd_set_vers1_properties: no_read_header set\n");
4410	}
4411	if (flags & SD_CONF_BSET_READ_CD_XD4) {
4412		un->un_f_cfg_read_cd_xd4 = TRUE;
4413		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4414		    "sd_set_vers1_properties: read_cd_xd4 set\n");
4415	}
4416
4417	/* Support for devices which do not have valid/unique serial numbers */
4418	if (flags & SD_CONF_BSET_FAB_DEVID) {
4419		un->un_f_opt_fab_devid = TRUE;
4420		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4421		    "sd_set_vers1_properties: fab_devid bit set\n");
4422	}
4423
4424	/* Support for user throttle configuration */
4425	if (flags & SD_CONF_BSET_THROTTLE) {
4426		ASSERT(prop_list != NULL);
4427		un->un_saved_throttle = un->un_throttle =
4428		    prop_list->sdt_throttle;
4429		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4430		    "sd_set_vers1_properties: throttle set to %d\n",
4431		    prop_list->sdt_throttle);
4432	}
4433
4434	/* Set the per disk retry count according to the conf file or table. */
4435	if (flags & SD_CONF_BSET_NRR_COUNT) {
4436		ASSERT(prop_list != NULL);
4437		if (prop_list->sdt_not_rdy_retries) {
4438			un->un_notready_retry_count =
4439			    prop_list->sdt_not_rdy_retries;
4440			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4441			    "sd_set_vers1_properties: not ready retry count"
4442			    " set to %d\n", un->un_notready_retry_count);
4443		}
4444	}
4445
4446	/* The controller type is reported for generic disk driver ioctls */
4447	if (flags & SD_CONF_BSET_CTYPE) {
4448		ASSERT(prop_list != NULL);
4449		switch (prop_list->sdt_ctype) {
4450		case CTYPE_CDROM:
4451			un->un_ctype = prop_list->sdt_ctype;
4452			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4453			    "sd_set_vers1_properties: ctype set to "
4454			    "CTYPE_CDROM\n");
4455			break;
4456		case CTYPE_CCS:
4457			un->un_ctype = prop_list->sdt_ctype;
4458			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4459			    "sd_set_vers1_properties: ctype set to "
4460			    "CTYPE_CCS\n");
4461			break;
4462		case CTYPE_ROD:		/* RW optical */
4463			un->un_ctype = prop_list->sdt_ctype;
4464			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4465			    "sd_set_vers1_properties: ctype set to "
4466			    "CTYPE_ROD\n");
4467			break;
4468		default:
4469			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4470			    "sd_set_vers1_properties: Could not set "
4471			    "invalid ctype value (%d)",
4472			    prop_list->sdt_ctype);
4473		}
4474	}
4475
4476	/* Purple failover timeout */
4477	if (flags & SD_CONF_BSET_BSY_RETRY_COUNT) {
4478		ASSERT(prop_list != NULL);
4479		un->un_busy_retry_count =
4480		    prop_list->sdt_busy_retries;
4481		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4482		    "sd_set_vers1_properties: "
4483		    "busy retry count set to %d\n",
4484		    un->un_busy_retry_count);
4485	}
4486
4487	/* Purple reset retry count */
4488	if (flags & SD_CONF_BSET_RST_RETRIES) {
4489		ASSERT(prop_list != NULL);
4490		un->un_reset_retry_count =
4491		    prop_list->sdt_reset_retries;
4492		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4493		    "sd_set_vers1_properties: "
4494		    "reset retry count set to %d\n",
4495		    un->un_reset_retry_count);
4496	}
4497
4498	/* Purple reservation release timeout */
4499	if (flags & SD_CONF_BSET_RSV_REL_TIME) {
4500		ASSERT(prop_list != NULL);
4501		un->un_reserve_release_time =
4502		    prop_list->sdt_reserv_rel_time;
4503		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4504		    "sd_set_vers1_properties: "
4505		    "reservation release timeout set to %d\n",
4506		    un->un_reserve_release_time);
4507	}
4508
4509	/*
4510	 * Driver flag telling the driver to verify that no commands are pending
4511	 * for a device before issuing a Test Unit Ready. This is a workaround
4512	 * for a firmware bug in some Seagate eliteI drives.
4513	 */
4514	if (flags & SD_CONF_BSET_TUR_CHECK) {
4515		un->un_f_cfg_tur_check = TRUE;
4516		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4517		    "sd_set_vers1_properties: tur queue check set\n");
4518	}
4519
4520	if (flags & SD_CONF_BSET_MIN_THROTTLE) {
4521		un->un_min_throttle = prop_list->sdt_min_throttle;
4522		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4523		    "sd_set_vers1_properties: min throttle set to %d\n",
4524		    un->un_min_throttle);
4525	}
4526
4527	if (flags & SD_CONF_BSET_DISKSORT_DISABLED) {
4528		un->un_f_disksort_disabled =
4529		    (prop_list->sdt_disk_sort_dis != 0) ?
4530		    TRUE : FALSE;
4531		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4532		    "sd_set_vers1_properties: disksort disabled "
4533		    "flag set to %d\n",
4534		    prop_list->sdt_disk_sort_dis);
4535	}
4536
4537	if (flags & SD_CONF_BSET_LUN_RESET_ENABLED) {
4538		un->un_f_lun_reset_enabled =
4539		    (prop_list->sdt_lun_reset_enable != 0) ?
4540		    TRUE : FALSE;
4541		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4542		    "sd_set_vers1_properties: lun reset enabled "
4543		    "flag set to %d\n",
4544		    prop_list->sdt_lun_reset_enable);
4545	}
4546
4547	if (flags & SD_CONF_BSET_CACHE_IS_NV) {
4548		un->un_f_suppress_cache_flush =
4549		    (prop_list->sdt_suppress_cache_flush != 0) ?
4550		    TRUE : FALSE;
4551		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4552		    "sd_set_vers1_properties: suppress_cache_flush "
4553		    "flag set to %d\n",
4554		    prop_list->sdt_suppress_cache_flush);
4555	}
4556
4557	/*
4558	 * Validate the throttle values.
4559	 * If any of the numbers are invalid, set everything to defaults.
4560	 */
4561	if ((un->un_throttle < SD_LOWEST_VALID_THROTTLE) ||
4562	    (un->un_min_throttle < SD_LOWEST_VALID_THROTTLE) ||
4563	    (un->un_min_throttle > un->un_throttle)) {
4564		un->un_saved_throttle = un->un_throttle = sd_max_throttle;
4565		un->un_min_throttle = sd_min_throttle;
4566	}
4567}
4568
4569/*
4570 *   Function: sd_is_lsi()
4571 *
4572 *   Description: Check for lsi devices, step through the static device
4573 *	table to match vid/pid.
4574 *
4575 *   Args: un - ptr to sd_lun
4576 *
4577 *   Notes:  When creating new LSI property, need to add the new LSI property
4578 *		to this function.
4579 */
4580static void
4581sd_is_lsi(struct sd_lun *un)
4582{
4583	char	*id = NULL;
4584	int	table_index;
4585	int	idlen;
4586	void	*prop;
4587
4588	ASSERT(un != NULL);
4589	for (table_index = 0; table_index < sd_disk_table_size;
4590	    table_index++) {
4591		id = sd_disk_table[table_index].device_id;
4592		idlen = strlen(id);
4593		if (idlen == 0) {
4594			continue;
4595		}
4596
4597		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
4598			prop = sd_disk_table[table_index].properties;
4599			if (prop == &lsi_properties ||
4600			    prop == &lsi_oem_properties ||
4601			    prop == &lsi_properties_scsi ||
4602			    prop == &symbios_properties) {
4603				un->un_f_cfg_is_lsi = TRUE;
4604			}
4605			break;
4606		}
4607	}
4608}
4609
4610/*
4611 *    Function: sd_get_physical_geometry
4612 *
4613 * Description: Retrieve the MODE SENSE page 3 (Format Device Page) and
4614 *		MODE SENSE page 4 (Rigid Disk Drive Geometry Page) from the
4615 *		target, and use this information to initialize the physical
4616 *		geometry cache specified by pgeom_p.
4617 *
4618 *		MODE SENSE is an optional command, so failure in this case
4619 *		does not necessarily denote an error. We want to use the
4620 *		MODE SENSE commands to derive the physical geometry of the
4621 *		device, but if either command fails, the logical geometry is
4622 *		used as the fallback for disk label geometry in cmlb.
4623 *
4624 *		This requires that un->un_blockcount and un->un_tgt_blocksize
4625 *		have already been initialized for the current target and
4626 *		that the current values be passed as args so that we don't
4627 *		end up ever trying to use -1 as a valid value. This could
4628 *		happen if either value is reset while we're not holding
4629 *		the mutex.
4630 *
4631 *   Arguments: un - driver soft state (unit) structure
4632 *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4633 *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4634 *			to use the USCSI "direct" chain and bypass the normal
4635 *			command waitq.
4636 *
4637 *     Context: Kernel thread only (can sleep).
4638 */
4639
4640static int
4641sd_get_physical_geometry(struct sd_lun *un, cmlb_geom_t *pgeom_p,
4642	diskaddr_t capacity, int lbasize, int path_flag)
4643{
4644	struct	mode_format	*page3p;
4645	struct	mode_geometry	*page4p;
4646	struct	mode_header	*headerp;
4647	int	sector_size;
4648	int	nsect;
4649	int	nhead;
4650	int	ncyl;
4651	int	intrlv;
4652	int	spc;
4653	diskaddr_t	modesense_capacity;
4654	int	rpm;
4655	int	bd_len;
4656	int	mode_header_length;
4657	uchar_t	*p3bufp;
4658	uchar_t	*p4bufp;
4659	int	cdbsize;
4660	int 	ret = EIO;
4661	sd_ssc_t *ssc;
4662	int	status;
4663
4664	ASSERT(un != NULL);
4665
4666	if (lbasize == 0) {
4667		if (ISCD(un)) {
4668			lbasize = 2048;
4669		} else {
4670			lbasize = un->un_sys_blocksize;
4671		}
4672	}
4673	pgeom_p->g_secsize = (unsigned short)lbasize;
4674
4675	/*
4676	 * If the unit is a cd/dvd drive MODE SENSE page three
4677	 * and MODE SENSE page four are reserved (see SBC spec
4678	 * and MMC spec). To prevent soft errors just return
4679	 * using the default LBA size.
4680	 */
4681	if (ISCD(un))
4682		return (ret);
4683
4684	cdbsize = (un->un_f_cfg_is_atapi == TRUE) ? CDB_GROUP2 : CDB_GROUP0;
4685
4686	/*
4687	 * Retrieve MODE SENSE page 3 - Format Device Page
4688	 */
4689	p3bufp = kmem_zalloc(SD_MODE_SENSE_PAGE3_LENGTH, KM_SLEEP);
4690	ssc = sd_ssc_init(un);
4691	status = sd_send_scsi_MODE_SENSE(ssc, cdbsize, p3bufp,
4692	    SD_MODE_SENSE_PAGE3_LENGTH, SD_MODE_SENSE_PAGE3_CODE, path_flag);
4693	if (status != 0) {
4694		SD_ERROR(SD_LOG_COMMON, un,
4695		    "sd_get_physical_geometry: mode sense page 3 failed\n");
4696		goto page3_exit;
4697	}
4698
4699	/*
4700	 * Determine size of Block Descriptors in order to locate the mode
4701	 * page data.  ATAPI devices return 0, SCSI devices should return
4702	 * MODE_BLK_DESC_LENGTH.
4703	 */
4704	headerp = (struct mode_header *)p3bufp;
4705	if (un->un_f_cfg_is_atapi == TRUE) {
4706		struct mode_header_grp2 *mhp =
4707		    (struct mode_header_grp2 *)headerp;
4708		mode_header_length = MODE_HEADER_LENGTH_GRP2;
4709		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
4710	} else {
4711		mode_header_length = MODE_HEADER_LENGTH;
4712		bd_len = ((struct mode_header *)headerp)->bdesc_length;
4713	}
4714
4715	if (bd_len > MODE_BLK_DESC_LENGTH) {
4716		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, SD_LOG_COMMON,
4717		    "sd_get_physical_geometry: received unexpected bd_len "
4718		    "of %d, page3\n", bd_len);
4719		status = EIO;
4720		goto page3_exit;
4721	}
4722
4723	page3p = (struct mode_format *)
4724	    ((caddr_t)headerp + mode_header_length + bd_len);
4725
4726	if (page3p->mode_page.code != SD_MODE_SENSE_PAGE3_CODE) {
4727		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, SD_LOG_COMMON,
4728		    "sd_get_physical_geometry: mode sense pg3 code mismatch "
4729		    "%d\n", page3p->mode_page.code);
4730		status = EIO;
4731		goto page3_exit;
4732	}
4733
4734	/*
4735	 * Use this physical geometry data only if BOTH MODE SENSE commands
4736	 * complete successfully; otherwise, revert to the logical geometry.
4737	 * So, we need to save everything in temporary variables.
4738	 */
4739	sector_size = BE_16(page3p->data_bytes_sect);
4740
4741	/*
4742	 * 1243403: The NEC D38x7 drives do not support MODE SENSE sector size
4743	 */
4744	if (sector_size == 0) {
4745		sector_size = un->un_sys_blocksize;
4746	} else {
4747		sector_size &= ~(un->un_sys_blocksize - 1);
4748	}
4749
4750	nsect  = BE_16(page3p->sect_track);
4751	intrlv = BE_16(page3p->interleave);
4752
4753	SD_INFO(SD_LOG_COMMON, un,
4754	    "sd_get_physical_geometry: Format Parameters (page 3)\n");
4755	SD_INFO(SD_LOG_COMMON, un,
4756	    "   mode page: %d; nsect: %d; sector size: %d;\n",
4757	    page3p->mode_page.code, nsect, sector_size);
4758	SD_INFO(SD_LOG_COMMON, un,
4759	    "   interleave: %d; track skew: %d; cylinder skew: %d;\n", intrlv,
4760	    BE_16(page3p->track_skew),
4761	    BE_16(page3p->cylinder_skew));
4762
4763	sd_ssc_assessment(ssc, SD_FMT_STANDARD);
4764
4765	/*
4766	 * Retrieve MODE SENSE page 4 - Rigid Disk Drive Geometry Page
4767	 */
4768	p4bufp = kmem_zalloc(SD_MODE_SENSE_PAGE4_LENGTH, KM_SLEEP);
4769	status = sd_send_scsi_MODE_SENSE(ssc, cdbsize, p4bufp,
4770	    SD_MODE_SENSE_PAGE4_LENGTH, SD_MODE_SENSE_PAGE4_CODE, path_flag);
4771	if (status != 0) {
4772		SD_ERROR(SD_LOG_COMMON, un,
4773		    "sd_get_physical_geometry: mode sense page 4 failed\n");
4774		goto page4_exit;
4775	}
4776
4777	/*
4778	 * Determine size of Block Descriptors in order to locate the mode
4779	 * page data.  ATAPI devices return 0, SCSI devices should return
4780	 * MODE_BLK_DESC_LENGTH.
4781	 */
4782	headerp = (struct mode_header *)p4bufp;
4783	if (un->un_f_cfg_is_atapi == TRUE) {
4784		struct mode_header_grp2 *mhp =
4785		    (struct mode_header_grp2 *)headerp;
4786		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
4787	} else {
4788		bd_len = ((struct mode_header *)headerp)->bdesc_length;
4789	}
4790
4791	if (bd_len > MODE_BLK_DESC_LENGTH) {
4792		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, SD_LOG_COMMON,
4793		    "sd_get_physical_geometry: received unexpected bd_len of "
4794		    "%d, page4\n", bd_len);
4795		status = EIO;
4796		goto page4_exit;
4797	}
4798
4799	page4p = (struct mode_geometry *)
4800	    ((caddr_t)headerp + mode_header_length + bd_len);
4801
4802	if (page4p->mode_page.code != SD_MODE_SENSE_PAGE4_CODE) {
4803		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, SD_LOG_COMMON,
4804		    "sd_get_physical_geometry: mode sense pg4 code mismatch "
4805		    "%d\n", page4p->mode_page.code);
4806		status = EIO;
4807		goto page4_exit;
4808	}
4809
4810	/*
4811	 * Stash the data now, after we know that both commands completed.
4812	 */
4813
4814
4815	nhead = (int)page4p->heads;	/* uchar, so no conversion needed */
4816	spc   = nhead * nsect;
4817	ncyl  = (page4p->cyl_ub << 16) + (page4p->cyl_mb << 8) + page4p->cyl_lb;
4818	rpm   = BE_16(page4p->rpm);
4819
4820	modesense_capacity = spc * ncyl;
4821
4822	SD_INFO(SD_LOG_COMMON, un,
4823	    "sd_get_physical_geometry: Geometry Parameters (page 4)\n");
4824	SD_INFO(SD_LOG_COMMON, un,
4825	    "   cylinders: %d; heads: %d; rpm: %d;\n", ncyl, nhead, rpm);
4826	SD_INFO(SD_LOG_COMMON, un,
4827	    "   computed capacity(h*s*c): %d;\n", modesense_capacity);
4828	SD_INFO(SD_LOG_COMMON, un, "   pgeom_p: %p; read cap: %d\n",
4829	    (void *)pgeom_p, capacity);
4830
4831	/*
4832	 * Compensate if the drive's geometry is not rectangular, i.e.,
4833	 * the product of C * H * S returned by MODE SENSE >= that returned
4834	 * by read capacity. This is an idiosyncrasy of the original x86
4835	 * disk subsystem.
4836	 */
4837	if (modesense_capacity >= capacity) {
4838		SD_INFO(SD_LOG_COMMON, un,
4839		    "sd_get_physical_geometry: adjusting acyl; "
4840		    "old: %d; new: %d\n", pgeom_p->g_acyl,
4841		    (modesense_capacity - capacity + spc - 1) / spc);
4842		if (sector_size != 0) {
4843			/* 1243403: NEC D38x7 drives don't support sec size */
4844			pgeom_p->g_secsize = (unsigned short)sector_size;
4845		}
4846		pgeom_p->g_nsect    = (unsigned short)nsect;
4847		pgeom_p->g_nhead    = (unsigned short)nhead;
4848		pgeom_p->g_capacity = capacity;
4849		pgeom_p->g_acyl	    =
4850		    (modesense_capacity - pgeom_p->g_capacity + spc - 1) / spc;
4851		pgeom_p->g_ncyl	    = ncyl - pgeom_p->g_acyl;
4852	}
4853
4854	pgeom_p->g_rpm    = (unsigned short)rpm;
4855	pgeom_p->g_intrlv = (unsigned short)intrlv;
4856	ret = 0;
4857
4858	SD_INFO(SD_LOG_COMMON, un,
4859	    "sd_get_physical_geometry: mode sense geometry:\n");
4860	SD_INFO(SD_LOG_COMMON, un,
4861	    "   nsect: %d; sector size: %d; interlv: %d\n",
4862	    nsect, sector_size, intrlv);
4863	SD_INFO(SD_LOG_COMMON, un,
4864	    "   nhead: %d; ncyl: %d; rpm: %d; capacity(ms): %d\n",
4865	    nhead, ncyl, rpm, modesense_capacity);
4866	SD_INFO(SD_LOG_COMMON, un,
4867	    "sd_get_physical_geometry: (cached)\n");
4868	SD_INFO(SD_LOG_COMMON, un,
4869	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
4870	    pgeom_p->g_ncyl,  pgeom_p->g_acyl,
4871	    pgeom_p->g_nhead, pgeom_p->g_nsect);
4872	SD_INFO(SD_LOG_COMMON, un,
4873	    "   lbasize: %d; capacity: %ld; intrlv: %d; rpm: %d\n",
4874	    pgeom_p->g_secsize, pgeom_p->g_capacity,
4875	    pgeom_p->g_intrlv, pgeom_p->g_rpm);
4876	sd_ssc_assessment(ssc, SD_FMT_STANDARD);
4877
4878page4_exit:
4879	kmem_free(p4bufp, SD_MODE_SENSE_PAGE4_LENGTH);
4880
4881page3_exit:
4882	kmem_free(p3bufp, SD_MODE_SENSE_PAGE3_LENGTH);
4883
4884	if (status != 0) {
4885		if (status == EIO) {
4886			/*
4887			 * Some disks do not support mode sense(6), we
4888			 * should ignore this kind of error(sense key is
4889			 * 0x5 - illegal request).
4890			 */
4891			uint8_t *sensep;
4892			int senlen;
4893
4894			sensep = (uint8_t *)ssc->ssc_uscsi_cmd->uscsi_rqbuf;
4895			senlen = (int)(ssc->ssc_uscsi_cmd->uscsi_rqlen -
4896			    ssc->ssc_uscsi_cmd->uscsi_rqresid);
4897
4898			if (senlen > 0 &&
4899			    scsi_sense_key(sensep) == KEY_ILLEGAL_REQUEST) {
4900				sd_ssc_assessment(ssc,
4901				    SD_FMT_IGNORE_COMPROMISE);
4902			} else {
4903				sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
4904			}
4905		} else {
4906			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
4907		}
4908	}
4909	sd_ssc_fini(ssc);
4910	return (ret);
4911}
4912
4913/*
4914 *    Function: sd_get_virtual_geometry
4915 *
4916 * Description: Ask the controller to tell us about the target device.
4917 *
4918 *   Arguments: un - pointer to softstate
4919 *		capacity - disk capacity in #blocks
4920 *		lbasize - disk block size in bytes
4921 *
4922 *     Context: Kernel thread only
4923 */
4924
4925static int
4926sd_get_virtual_geometry(struct sd_lun *un, cmlb_geom_t *lgeom_p,
4927    diskaddr_t capacity, int lbasize)
4928{
4929	uint_t	geombuf;
4930	int	spc;
4931
4932	ASSERT(un != NULL);
4933
4934	/* Set sector size, and total number of sectors */
4935	(void) scsi_ifsetcap(SD_ADDRESS(un), "sector-size",   lbasize,  1);
4936	(void) scsi_ifsetcap(SD_ADDRESS(un), "total-sectors", capacity, 1);
4937
4938	/* Let the HBA tell us its geometry */
4939	geombuf = (uint_t)scsi_ifgetcap(SD_ADDRESS(un), "geometry", 1);
4940
4941	/* A value of -1 indicates an undefined "geometry" property */
4942	if (geombuf == (-1)) {
4943		return (EINVAL);
4944	}
4945
4946	/* Initialize the logical geometry cache. */
4947	lgeom_p->g_nhead   = (geombuf >> 16) & 0xffff;
4948	lgeom_p->g_nsect   = geombuf & 0xffff;
4949	lgeom_p->g_secsize = un->un_sys_blocksize;
4950
4951	spc = lgeom_p->g_nhead * lgeom_p->g_nsect;
4952
4953	/*
4954	 * Note: The driver originally converted the capacity value from
4955	 * target blocks to system blocks. However, the capacity value passed
4956	 * to this routine is already in terms of system blocks (this scaling
4957	 * is done when the READ CAPACITY command is issued and processed).
4958	 * This 'error' may have gone undetected because the usage of g_ncyl
4959	 * (which is based upon g_capacity) is very limited within the driver
4960	 */
4961	lgeom_p->g_capacity = capacity;
4962
4963	/*
4964	 * Set ncyl to zero if the hba returned a zero nhead or nsect value. The
4965	 * hba may return zero values if the device has been removed.
4966	 */
4967	if (spc == 0) {
4968		lgeom_p->g_ncyl = 0;
4969	} else {
4970		lgeom_p->g_ncyl = lgeom_p->g_capacity / spc;
4971	}
4972	lgeom_p->g_acyl = 0;
4973
4974	SD_INFO(SD_LOG_COMMON, un, "sd_get_virtual_geometry: (cached)\n");
4975	return (0);
4976
4977}
4978/*
4979 *    Function: sd_update_block_info
4980 *
4981 * Description: Calculate a byte count to sector count bitshift value
4982 *		from sector size.
4983 *
4984 *   Arguments: un: unit struct.
4985 *		lbasize: new target sector size
4986 *		capacity: new target capacity, ie. block count
4987 *
4988 *     Context: Kernel thread context
4989 */
4990
4991static void
4992sd_update_block_info(struct sd_lun *un, uint32_t lbasize, uint64_t capacity)
4993{
4994	if (lbasize != 0) {
4995		un->un_tgt_blocksize = lbasize;
4996		un->un_f_tgt_blocksize_is_valid	= TRUE;
4997	}
4998
4999	if (capacity != 0) {
5000		un->un_blockcount		= capacity;
5001		un->un_f_blockcount_is_valid	= TRUE;
5002	}
5003}
5004
5005
5006/*
5007 *    Function: sd_register_devid
5008 *
5009 * Description: This routine will obtain the device id information from the
5010 *		target, obtain the serial number, and register the device
5011 *		id with the ddi framework.
5012 *
5013 *   Arguments: devi - the system's dev_info_t for the device.
5014 *		un - driver soft state (unit) structure
5015 *		reservation_flag - indicates if a reservation conflict
5016 *		occurred during attach
5017 *
5018 *     Context: Kernel Thread
5019 */
5020static void
5021sd_register_devid(sd_ssc_t *ssc, dev_info_t *devi, int reservation_flag)
5022{
5023	int		rval		= 0;
5024	uchar_t		*inq80		= NULL;
5025	size_t		inq80_len	= MAX_INQUIRY_SIZE;
5026	size_t		inq80_resid	= 0;
5027	uchar_t		*inq83		= NULL;
5028	size_t		inq83_len	= MAX_INQUIRY_SIZE;
5029	size_t		inq83_resid	= 0;
5030	int		dlen, len;
5031	char		*sn;
5032	struct sd_lun	*un;
5033
5034	ASSERT(ssc != NULL);
5035	un = ssc->ssc_un;
5036	ASSERT(un != NULL);
5037	ASSERT(mutex_owned(SD_MUTEX(un)));
5038	ASSERT((SD_DEVINFO(un)) == devi);
5039
5040	/*
5041	 * If transport has already registered a devid for this target
5042	 * then that takes precedence over the driver's determination
5043	 * of the devid.
5044	 */
5045	if (ddi_devid_get(SD_DEVINFO(un), &un->un_devid) == DDI_SUCCESS) {
5046		ASSERT(un->un_devid);
5047		return; /* use devid registered by the transport */
5048	}
5049
5050	/*
5051	 * This is the case of antiquated Sun disk drives that have the
5052	 * FAB_DEVID property set in the disk_table.  These drives
5053	 * manage the devid's by storing them in last 2 available sectors
5054	 * on the drive and have them fabricated by the ddi layer by calling
5055	 * ddi_devid_init and passing the DEVID_FAB flag.
5056	 */
5057	if (un->un_f_opt_fab_devid == TRUE) {
5058		/*
5059		 * Depending on EINVAL isn't reliable, since a reserved disk
5060		 * may result in invalid geometry, so check to make sure a
5061		 * reservation conflict did not occur during attach.
5062		 */
5063		if ((sd_get_devid(ssc) == EINVAL) &&
5064		    (reservation_flag != SD_TARGET_IS_RESERVED)) {
5065			/*
5066			 * The devid is invalid AND there is no reservation
5067			 * conflict.  Fabricate a new devid.
5068			 */
5069			(void) sd_create_devid(ssc);
5070		}
5071
5072		/* Register the devid if it exists */
5073		if (un->un_devid != NULL) {
5074			(void) ddi_devid_register(SD_DEVINFO(un),
5075			    un->un_devid);
5076			SD_INFO(SD_LOG_ATTACH_DETACH, un,
5077			    "sd_register_devid: Devid Fabricated\n");
5078		}
5079		return;
5080	}
5081
5082	/*
5083	 * We check the availability of the World Wide Name (0x83) and Unit
5084	 * Serial Number (0x80) pages in sd_check_vpd_page_support(), and using
5085	 * un_vpd_page_mask from them, we decide which way to get the WWN.  If
5086	 * 0x83 is available, that is the best choice.  Our next choice is
5087	 * 0x80.  If neither are available, we munge the devid from the device
5088	 * vid/pid/serial # for Sun qualified disks, or use the ddi framework
5089	 * to fabricate a devid for non-Sun qualified disks.
5090	 */
5091	if (sd_check_vpd_page_support(ssc) == 0) {
5092		/* collect page 80 data if available */
5093		if (un->un_vpd_page_mask & SD_VPD_UNIT_SERIAL_PG) {
5094
5095			mutex_exit(SD_MUTEX(un));
5096			inq80 = kmem_zalloc(inq80_len, KM_SLEEP);
5097
5098			rval = sd_send_scsi_INQUIRY(ssc, inq80, inq80_len,
5099			    0x01, 0x80, &inq80_resid);
5100
5101			if (rval != 0) {
5102				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5103				kmem_free(inq80, inq80_len);
5104				inq80 = NULL;
5105				inq80_len = 0;
5106			} else if (ddi_prop_exists(
5107			    DDI_DEV_T_NONE, SD_DEVINFO(un),
5108			    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
5109			    INQUIRY_SERIAL_NO) == 0) {
5110				/*
5111				 * If we don't already have a serial number
5112				 * property, do quick verify of data returned
5113				 * and define property.
5114				 */
5115				dlen = inq80_len - inq80_resid;
5116				len = (size_t)inq80[3];
5117				if ((dlen >= 4) && ((len + 4) <= dlen)) {
5118					/*
5119					 * Ensure sn termination, skip leading
5120					 * blanks, and create property
5121					 * 'inquiry-serial-no'.
5122					 */
5123					sn = (char *)&inq80[4];
5124					sn[len] = 0;
5125					while (*sn && (*sn == ' '))
5126						sn++;
5127					if (*sn) {
5128						(void) ddi_prop_update_string(
5129						    DDI_DEV_T_NONE,
5130						    SD_DEVINFO(un),
5131						    INQUIRY_SERIAL_NO, sn);
5132					}
5133				}
5134			}
5135			mutex_enter(SD_MUTEX(un));
5136		}
5137
5138		/* collect page 83 data if available */
5139		if (un->un_vpd_page_mask & SD_VPD_DEVID_WWN_PG) {
5140			mutex_exit(SD_MUTEX(un));
5141			inq83 = kmem_zalloc(inq83_len, KM_SLEEP);
5142
5143			rval = sd_send_scsi_INQUIRY(ssc, inq83, inq83_len,
5144			    0x01, 0x83, &inq83_resid);
5145
5146			if (rval != 0) {
5147				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5148				kmem_free(inq83, inq83_len);
5149				inq83 = NULL;
5150				inq83_len = 0;
5151			}
5152			mutex_enter(SD_MUTEX(un));
5153		}
5154	}
5155
5156	/* encode best devid possible based on data available */
5157	if (ddi_devid_scsi_encode(DEVID_SCSI_ENCODE_VERSION_LATEST,
5158	    (char *)ddi_driver_name(SD_DEVINFO(un)),
5159	    (uchar_t *)SD_INQUIRY(un), sizeof (*SD_INQUIRY(un)),
5160	    inq80, inq80_len - inq80_resid, inq83, inq83_len -
5161	    inq83_resid, &un->un_devid) == DDI_SUCCESS) {
5162
5163		/* devid successfully encoded, register devid */
5164		(void) ddi_devid_register(SD_DEVINFO(un), un->un_devid);
5165
5166	} else {
5167		/*
5168		 * Unable to encode a devid based on data available.
5169		 * This is not a Sun qualified disk.  Older Sun disk
5170		 * drives that have the SD_FAB_DEVID property
5171		 * set in the disk_table and non Sun qualified
5172		 * disks are treated in the same manner.  These
5173		 * drives manage the devid's by storing them in
5174		 * last 2 available sectors on the drive and
5175		 * have them fabricated by the ddi layer by
5176		 * calling ddi_devid_init and passing the
5177		 * DEVID_FAB flag.
5178		 * Create a fabricate devid only if there's no
5179		 * fabricate devid existed.
5180		 */
5181		if (sd_get_devid(ssc) == EINVAL) {
5182			(void) sd_create_devid(ssc);
5183		}
5184		un->un_f_opt_fab_devid = TRUE;
5185
5186		/* Register the devid if it exists */
5187		if (un->un_devid != NULL) {
5188			(void) ddi_devid_register(SD_DEVINFO(un),
5189			    un->un_devid);
5190			SD_INFO(SD_LOG_ATTACH_DETACH, un,
5191			    "sd_register_devid: devid fabricated using "
5192			    "ddi framework\n");
5193		}
5194	}
5195
5196	/* clean up resources */
5197	if (inq80 != NULL) {
5198		kmem_free(inq80, inq80_len);
5199	}
5200	if (inq83 != NULL) {
5201		kmem_free(inq83, inq83_len);
5202	}
5203}
5204
5205
5206
5207/*
5208 *    Function: sd_get_devid
5209 *
5210 * Description: This routine will return 0 if a valid device id has been
5211 *		obtained from the target and stored in the soft state. If a
5212 *		valid device id has not been previously read and stored, a
5213 *		read attempt will be made.
5214 *
5215 *   Arguments: un - driver soft state (unit) structure
5216 *
5217 * Return Code: 0 if we successfully get the device id
5218 *
5219 *     Context: Kernel Thread
5220 */
5221
5222static int
5223sd_get_devid(sd_ssc_t *ssc)
5224{
5225	struct dk_devid		*dkdevid;
5226	ddi_devid_t		tmpid;
5227	uint_t			*ip;
5228	size_t			sz;
5229	diskaddr_t		blk;
5230	int			status;
5231	int			chksum;
5232	int			i;
5233	size_t			buffer_size;
5234	struct sd_lun		*un;
5235
5236	ASSERT(ssc != NULL);
5237	un = ssc->ssc_un;
5238	ASSERT(un != NULL);
5239	ASSERT(mutex_owned(SD_MUTEX(un)));
5240
5241	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: entry: un: 0x%p\n",
5242	    un);
5243
5244	if (un->un_devid != NULL) {
5245		return (0);
5246	}
5247
5248	mutex_exit(SD_MUTEX(un));
5249	if (cmlb_get_devid_block(un->un_cmlbhandle, &blk,
5250	    (void *)SD_PATH_DIRECT) != 0) {
5251		mutex_enter(SD_MUTEX(un));
5252		return (EINVAL);
5253	}
5254
5255	/*
5256	 * Read and verify device id, stored in the reserved cylinders at the
5257	 * end of the disk. Backup label is on the odd sectors of the last
5258	 * track of the last cylinder. Device id will be on track of the next
5259	 * to last cylinder.
5260	 */
5261	mutex_enter(SD_MUTEX(un));
5262	buffer_size = SD_REQBYTES2TGTBYTES(un, sizeof (struct dk_devid));
5263	mutex_exit(SD_MUTEX(un));
5264	dkdevid = kmem_alloc(buffer_size, KM_SLEEP);
5265	status = sd_send_scsi_READ(ssc, dkdevid, buffer_size, blk,
5266	    SD_PATH_DIRECT);
5267
5268	if (status != 0) {
5269		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5270		goto error;
5271	}
5272
5273	/* Validate the revision */
5274	if ((dkdevid->dkd_rev_hi != DK_DEVID_REV_MSB) ||
5275	    (dkdevid->dkd_rev_lo != DK_DEVID_REV_LSB)) {
5276		status = EINVAL;
5277		goto error;
5278	}
5279
5280	/* Calculate the checksum */
5281	chksum = 0;
5282	ip = (uint_t *)dkdevid;
5283	for (i = 0; i < ((un->un_sys_blocksize - sizeof (int))/sizeof (int));
5284	    i++) {
5285		chksum ^= ip[i];
5286	}
5287
5288	/* Compare the checksums */
5289	if (DKD_GETCHKSUM(dkdevid) != chksum) {
5290		status = EINVAL;
5291		goto error;
5292	}
5293
5294	/* Validate the device id */
5295	if (ddi_devid_valid((ddi_devid_t)&dkdevid->dkd_devid) != DDI_SUCCESS) {
5296		status = EINVAL;
5297		goto error;
5298	}
5299
5300	/*
5301	 * Store the device id in the driver soft state
5302	 */
5303	sz = ddi_devid_sizeof((ddi_devid_t)&dkdevid->dkd_devid);
5304	tmpid = kmem_alloc(sz, KM_SLEEP);
5305
5306	mutex_enter(SD_MUTEX(un));
5307
5308	un->un_devid = tmpid;
5309	bcopy(&dkdevid->dkd_devid, un->un_devid, sz);
5310
5311	kmem_free(dkdevid, buffer_size);
5312
5313	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: exit: un:0x%p\n", un);
5314
5315	return (status);
5316error:
5317	mutex_enter(SD_MUTEX(un));
5318	kmem_free(dkdevid, buffer_size);
5319	return (status);
5320}
5321
5322
5323/*
5324 *    Function: sd_create_devid
5325 *
5326 * Description: This routine will fabricate the device id and write it
5327 *		to the disk.
5328 *
5329 *   Arguments: un - driver soft state (unit) structure
5330 *
5331 * Return Code: value of the fabricated device id
5332 *
5333 *     Context: Kernel Thread
5334 */
5335
5336static ddi_devid_t
5337sd_create_devid(sd_ssc_t *ssc)
5338{
5339	struct sd_lun	*un;
5340
5341	ASSERT(ssc != NULL);
5342	un = ssc->ssc_un;
5343	ASSERT(un != NULL);
5344
5345	/* Fabricate the devid */
5346	if (ddi_devid_init(SD_DEVINFO(un), DEVID_FAB, 0, NULL, &un->un_devid)
5347	    == DDI_FAILURE) {
5348		return (NULL);
5349	}
5350
5351	/* Write the devid to disk */
5352	if (sd_write_deviceid(ssc) != 0) {
5353		ddi_devid_free(un->un_devid);
5354		un->un_devid = NULL;
5355	}
5356
5357	return (un->un_devid);
5358}
5359
5360
5361/*
5362 *    Function: sd_write_deviceid
5363 *
5364 * Description: This routine will write the device id to the disk
5365 *		reserved sector.
5366 *
5367 *   Arguments: un - driver soft state (unit) structure
5368 *
5369 * Return Code: EINVAL
5370 *		value returned by sd_send_scsi_cmd
5371 *
5372 *     Context: Kernel Thread
5373 */
5374
5375static int
5376sd_write_deviceid(sd_ssc_t *ssc)
5377{
5378	struct dk_devid		*dkdevid;
5379	diskaddr_t		blk;
5380	uint_t			*ip, chksum;
5381	int			status;
5382	int			i;
5383	struct sd_lun		*un;
5384
5385	ASSERT(ssc != NULL);
5386	un = ssc->ssc_un;
5387	ASSERT(un != NULL);
5388	ASSERT(mutex_owned(SD_MUTEX(un)));
5389
5390	mutex_exit(SD_MUTEX(un));
5391	if (cmlb_get_devid_block(un->un_cmlbhandle, &blk,
5392	    (void *)SD_PATH_DIRECT) != 0) {
5393		mutex_enter(SD_MUTEX(un));
5394		return (-1);
5395	}
5396
5397
5398	/* Allocate the buffer */
5399	dkdevid = kmem_zalloc(un->un_sys_blocksize, KM_SLEEP);
5400
5401	/* Fill in the revision */
5402	dkdevid->dkd_rev_hi = DK_DEVID_REV_MSB;
5403	dkdevid->dkd_rev_lo = DK_DEVID_REV_LSB;
5404
5405	/* Copy in the device id */
5406	mutex_enter(SD_MUTEX(un));
5407	bcopy(un->un_devid, &dkdevid->dkd_devid,
5408	    ddi_devid_sizeof(un->un_devid));
5409	mutex_exit(SD_MUTEX(un));
5410
5411	/* Calculate the checksum */
5412	chksum = 0;
5413	ip = (uint_t *)dkdevid;
5414	for (i = 0; i < ((un->un_sys_blocksize - sizeof (int))/sizeof (int));
5415	    i++) {
5416		chksum ^= ip[i];
5417	}
5418
5419	/* Fill-in checksum */
5420	DKD_FORMCHKSUM(chksum, dkdevid);
5421
5422	/* Write the reserved sector */
5423	status = sd_send_scsi_WRITE(ssc, dkdevid, un->un_sys_blocksize, blk,
5424	    SD_PATH_DIRECT);
5425	if (status != 0)
5426		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5427
5428	kmem_free(dkdevid, un->un_sys_blocksize);
5429
5430	mutex_enter(SD_MUTEX(un));
5431	return (status);
5432}
5433
5434
5435/*
5436 *    Function: sd_check_vpd_page_support
5437 *
5438 * Description: This routine sends an inquiry command with the EVPD bit set and
5439 *		a page code of 0x00 to the device. It is used to determine which
5440 *		vital product pages are available to find the devid. We are
5441 *		looking for pages 0x83 or 0x80.  If we return a negative 1, the
5442 *		device does not support that command.
5443 *
5444 *   Arguments: un  - driver soft state (unit) structure
5445 *
5446 * Return Code: 0 - success
5447 *		1 - check condition
5448 *
5449 *     Context: This routine can sleep.
5450 */
5451
5452static int
5453sd_check_vpd_page_support(sd_ssc_t *ssc)
5454{
5455	uchar_t	*page_list	= NULL;
5456	uchar_t	page_length	= 0xff;	/* Use max possible length */
5457	uchar_t	evpd		= 0x01;	/* Set the EVPD bit */
5458	uchar_t	page_code	= 0x00;	/* Supported VPD Pages */
5459	int    	rval		= 0;
5460	int	counter;
5461	struct sd_lun		*un;
5462
5463	ASSERT(ssc != NULL);
5464	un = ssc->ssc_un;
5465	ASSERT(un != NULL);
5466	ASSERT(mutex_owned(SD_MUTEX(un)));
5467
5468	mutex_exit(SD_MUTEX(un));
5469
5470	/*
5471	 * We'll set the page length to the maximum to save figuring it out
5472	 * with an additional call.
5473	 */
5474	page_list =  kmem_zalloc(page_length, KM_SLEEP);
5475
5476	rval = sd_send_scsi_INQUIRY(ssc, page_list, page_length, evpd,
5477	    page_code, NULL);
5478
5479	if (rval != 0)
5480		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5481
5482	mutex_enter(SD_MUTEX(un));
5483
5484	/*
5485	 * Now we must validate that the device accepted the command, as some
5486	 * drives do not support it.  If the drive does support it, we will
5487	 * return 0, and the supported pages will be in un_vpd_page_mask.  If
5488	 * not, we return -1.
5489	 */
5490	if ((rval == 0) && (page_list[VPD_MODE_PAGE] == 0x00)) {
5491		/* Loop to find one of the 2 pages we need */
5492		counter = 4;  /* Supported pages start at byte 4, with 0x00 */
5493
5494		/*
5495		 * Pages are returned in ascending order, and 0x83 is what we
5496		 * are hoping for.
5497		 */
5498		while ((page_list[counter] <= 0x86) &&
5499		    (counter <= (page_list[VPD_PAGE_LENGTH] +
5500		    VPD_HEAD_OFFSET))) {
5501			/*
5502			 * Add 3 because page_list[3] is the number of
5503			 * pages minus 3
5504			 */
5505
5506			switch (page_list[counter]) {
5507			case 0x00:
5508				un->un_vpd_page_mask |= SD_VPD_SUPPORTED_PG;
5509				break;
5510			case 0x80:
5511				un->un_vpd_page_mask |= SD_VPD_UNIT_SERIAL_PG;
5512				break;
5513			case 0x81:
5514				un->un_vpd_page_mask |= SD_VPD_OPERATING_PG;
5515				break;
5516			case 0x82:
5517				un->un_vpd_page_mask |= SD_VPD_ASCII_OP_PG;
5518				break;
5519			case 0x83:
5520				un->un_vpd_page_mask |= SD_VPD_DEVID_WWN_PG;
5521				break;
5522			case 0x86:
5523				un->un_vpd_page_mask |= SD_VPD_EXTENDED_DATA_PG;
5524				break;
5525			}
5526			counter++;
5527		}
5528
5529	} else {
5530		rval = -1;
5531
5532		SD_INFO(SD_LOG_ATTACH_DETACH, un,
5533		    "sd_check_vpd_page_support: This drive does not implement "
5534		    "VPD pages.\n");
5535	}
5536
5537	kmem_free(page_list, page_length);
5538
5539	return (rval);
5540}
5541
5542
5543/*
5544 *    Function: sd_setup_pm
5545 *
5546 * Description: Initialize Power Management on the device
5547 *
5548 *     Context: Kernel Thread
5549 */
5550
5551static void
5552sd_setup_pm(sd_ssc_t *ssc, dev_info_t *devi)
5553{
5554	uint_t		log_page_size;
5555	uchar_t		*log_page_data;
5556	int		rval = 0;
5557	struct sd_lun	*un;
5558
5559	ASSERT(ssc != NULL);
5560	un = ssc->ssc_un;
5561	ASSERT(un != NULL);
5562
5563	/*
5564	 * Since we are called from attach, holding a mutex for
5565	 * un is unnecessary. Because some of the routines called
5566	 * from here require SD_MUTEX to not be held, assert this
5567	 * right up front.
5568	 */
5569	ASSERT(!mutex_owned(SD_MUTEX(un)));
5570	/*
5571	 * Since the sd device does not have the 'reg' property,
5572	 * cpr will not call its DDI_SUSPEND/DDI_RESUME entries.
5573	 * The following code is to tell cpr that this device
5574	 * DOES need to be suspended and resumed.
5575	 */
5576	(void) ddi_prop_update_string(DDI_DEV_T_NONE, devi,
5577	    "pm-hardware-state", "needs-suspend-resume");
5578
5579	/*
5580	 * This complies with the new power management framework
5581	 * for certain desktop machines. Create the pm_components
5582	 * property as a string array property.
5583	 */
5584	if (un->un_f_pm_supported) {
5585		/*
5586		 * not all devices have a motor, try it first.
5587		 * some devices may return ILLEGAL REQUEST, some
5588		 * will hang
5589		 * The following START_STOP_UNIT is used to check if target
5590		 * device has a motor.
5591		 */
5592		un->un_f_start_stop_supported = TRUE;
5593		rval = sd_send_scsi_START_STOP_UNIT(ssc, SD_TARGET_START,
5594		    SD_PATH_DIRECT);
5595
5596		if (rval != 0) {
5597			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5598			un->un_f_start_stop_supported = FALSE;
5599		}
5600
5601		/*
5602		 * create pm properties anyways otherwise the parent can't
5603		 * go to sleep
5604		 */
5605		(void) sd_create_pm_components(devi, un);
5606		un->un_f_pm_is_enabled = TRUE;
5607		return;
5608	}
5609
5610	if (!un->un_f_log_sense_supported) {
5611		un->un_power_level = SD_SPINDLE_ON;
5612		un->un_f_pm_is_enabled = FALSE;
5613		return;
5614	}
5615
5616	rval = sd_log_page_supported(ssc, START_STOP_CYCLE_PAGE);
5617
5618#ifdef	SDDEBUG
5619	if (sd_force_pm_supported) {
5620		/* Force a successful result */
5621		rval = 1;
5622	}
5623#endif
5624
5625	/*
5626	 * If the start-stop cycle counter log page is not supported
5627	 * or if the pm-capable property is SD_PM_CAPABLE_FALSE (0)
5628	 * then we should not create the pm_components property.
5629	 */
5630	if (rval == -1) {
5631		/*
5632		 * Error.
5633		 * Reading log sense failed, most likely this is
5634		 * an older drive that does not support log sense.
5635		 * If this fails auto-pm is not supported.
5636		 */
5637		un->un_power_level = SD_SPINDLE_ON;
5638		un->un_f_pm_is_enabled = FALSE;
5639
5640	} else if (rval == 0) {
5641		/*
5642		 * Page not found.
5643		 * The start stop cycle counter is implemented as page
5644		 * START_STOP_CYCLE_PAGE_VU_PAGE (0x31) in older disks. For
5645		 * newer disks it is implemented as START_STOP_CYCLE_PAGE (0xE).
5646		 */
5647		if (sd_log_page_supported(ssc, START_STOP_CYCLE_VU_PAGE) == 1) {
5648			/*
5649			 * Page found, use this one.
5650			 */
5651			un->un_start_stop_cycle_page = START_STOP_CYCLE_VU_PAGE;
5652			un->un_f_pm_is_enabled = TRUE;
5653		} else {
5654			/*
5655			 * Error or page not found.
5656			 * auto-pm is not supported for this device.
5657			 */
5658			un->un_power_level = SD_SPINDLE_ON;
5659			un->un_f_pm_is_enabled = FALSE;
5660		}
5661	} else {
5662		/*
5663		 * Page found, use it.
5664		 */
5665		un->un_start_stop_cycle_page = START_STOP_CYCLE_PAGE;
5666		un->un_f_pm_is_enabled = TRUE;
5667	}
5668
5669
5670	if (un->un_f_pm_is_enabled == TRUE) {
5671		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
5672		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
5673
5674		rval = sd_send_scsi_LOG_SENSE(ssc, log_page_data,
5675		    log_page_size, un->un_start_stop_cycle_page,
5676		    0x01, 0, SD_PATH_DIRECT);
5677
5678		if (rval != 0) {
5679			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5680		}
5681
5682#ifdef	SDDEBUG
5683		if (sd_force_pm_supported) {
5684			/* Force a successful result */
5685			rval = 0;
5686		}
5687#endif
5688
5689		/*
5690		 * If the Log sense for Page( Start/stop cycle counter page)
5691		 * succeeds, then power management is supported and we can
5692		 * enable auto-pm.
5693		 */
5694		if (rval == 0)  {
5695			(void) sd_create_pm_components(devi, un);
5696		} else {
5697			un->un_power_level = SD_SPINDLE_ON;
5698			un->un_f_pm_is_enabled = FALSE;
5699		}
5700
5701		kmem_free(log_page_data, log_page_size);
5702	}
5703}
5704
5705
5706/*
5707 *    Function: sd_create_pm_components
5708 *
5709 * Description: Initialize PM property.
5710 *
5711 *     Context: Kernel thread context
5712 */
5713
5714static void
5715sd_create_pm_components(dev_info_t *devi, struct sd_lun *un)
5716{
5717	char *pm_comp[] = { "NAME=spindle-motor", "0=off", "1=on", NULL };
5718
5719	ASSERT(!mutex_owned(SD_MUTEX(un)));
5720
5721	if (ddi_prop_update_string_array(DDI_DEV_T_NONE, devi,
5722	    "pm-components", pm_comp, 3) == DDI_PROP_SUCCESS) {
5723		/*
5724		 * When components are initially created they are idle,
5725		 * power up any non-removables.
5726		 * Note: the return value of pm_raise_power can't be used
5727		 * for determining if PM should be enabled for this device.
5728		 * Even if you check the return values and remove this
5729		 * property created above, the PM framework will not honor the
5730		 * change after the first call to pm_raise_power. Hence,
5731		 * removal of that property does not help if pm_raise_power
5732		 * fails. In the case of removable media, the start/stop
5733		 * will fail if the media is not present.
5734		 */
5735		if (un->un_f_attach_spinup && (pm_raise_power(SD_DEVINFO(un), 0,
5736		    SD_SPINDLE_ON) == DDI_SUCCESS)) {
5737			mutex_enter(SD_MUTEX(un));
5738			un->un_power_level = SD_SPINDLE_ON;
5739			mutex_enter(&un->un_pm_mutex);
5740			/* Set to on and not busy. */
5741			un->un_pm_count = 0;
5742		} else {
5743			mutex_enter(SD_MUTEX(un));
5744			un->un_power_level = SD_SPINDLE_OFF;
5745			mutex_enter(&un->un_pm_mutex);
5746			/* Set to off. */
5747			un->un_pm_count = -1;
5748		}
5749		mutex_exit(&un->un_pm_mutex);
5750		mutex_exit(SD_MUTEX(un));
5751	} else {
5752		un->un_power_level = SD_SPINDLE_ON;
5753		un->un_f_pm_is_enabled = FALSE;
5754	}
5755}
5756
5757
5758/*
5759 *    Function: sd_ddi_suspend
5760 *
5761 * Description: Performs system power-down operations. This includes
5762 *		setting the drive state to indicate its suspended so
5763 *		that no new commands will be accepted. Also, wait for
5764 *		all commands that are in transport or queued to a timer
5765 *		for retry to complete. All timeout threads are cancelled.
5766 *
5767 * Return Code: DDI_FAILURE or DDI_SUCCESS
5768 *
5769 *     Context: Kernel thread context
5770 */
5771
5772static int
5773sd_ddi_suspend(dev_info_t *devi)
5774{
5775	struct	sd_lun	*un;
5776	clock_t		wait_cmds_complete;
5777
5778	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
5779	if (un == NULL) {
5780		return (DDI_FAILURE);
5781	}
5782
5783	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: entry\n");
5784
5785	mutex_enter(SD_MUTEX(un));
5786
5787	/* Return success if the device is already suspended. */
5788	if (un->un_state == SD_STATE_SUSPENDED) {
5789		mutex_exit(SD_MUTEX(un));
5790		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
5791		    "device already suspended, exiting\n");
5792		return (DDI_SUCCESS);
5793	}
5794
5795	/* Return failure if the device is being used by HA */
5796	if (un->un_resvd_status &
5797	    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE)) {
5798		mutex_exit(SD_MUTEX(un));
5799		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
5800		    "device in use by HA, exiting\n");
5801		return (DDI_FAILURE);
5802	}
5803
5804	/*
5805	 * Return failure if the device is in a resource wait
5806	 * or power changing state.
5807	 */
5808	if ((un->un_state == SD_STATE_RWAIT) ||
5809	    (un->un_state == SD_STATE_PM_CHANGING)) {
5810		mutex_exit(SD_MUTEX(un));
5811		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
5812		    "device in resource wait state, exiting\n");
5813		return (DDI_FAILURE);
5814	}
5815
5816
5817	un->un_save_state = un->un_last_state;
5818	New_state(un, SD_STATE_SUSPENDED);
5819
5820	/*
5821	 * Wait for all commands that are in transport or queued to a timer
5822	 * for retry to complete.
5823	 *
5824	 * While waiting, no new commands will be accepted or sent because of
5825	 * the new state we set above.
5826	 *
5827	 * Wait till current operation has completed. If we are in the resource
5828	 * wait state (with an intr outstanding) then we need to wait till the
5829	 * intr completes and starts the next cmd. We want to wait for
5830	 * SD_WAIT_CMDS_COMPLETE seconds before failing the DDI_SUSPEND.
5831	 */
5832	wait_cmds_complete = ddi_get_lbolt() +
5833	    (sd_wait_cmds_complete * drv_usectohz(1000000));
5834
5835	while (un->un_ncmds_in_transport != 0) {
5836		/*
5837		 * Fail if commands do not finish in the specified time.
5838		 */
5839		if (cv_timedwait(&un->un_disk_busy_cv, SD_MUTEX(un),
5840		    wait_cmds_complete) == -1) {
5841			/*
5842			 * Undo the state changes made above. Everything
5843			 * must go back to it's original value.
5844			 */
5845			Restore_state(un);
5846			un->un_last_state = un->un_save_state;
5847			/* Wake up any threads that might be waiting. */
5848			cv_broadcast(&un->un_suspend_cv);
5849			mutex_exit(SD_MUTEX(un));
5850			SD_ERROR(SD_LOG_IO_PM, un,
5851			    "sd_ddi_suspend: failed due to outstanding cmds\n");
5852			SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exiting\n");
5853			return (DDI_FAILURE);
5854		}
5855	}
5856
5857	/*
5858	 * Cancel SCSI watch thread and timeouts, if any are active
5859	 */
5860
5861	if (SD_OK_TO_SUSPEND_SCSI_WATCHER(un)) {
5862		opaque_t temp_token = un->un_swr_token;
5863		mutex_exit(SD_MUTEX(un));
5864		scsi_watch_suspend(temp_token);
5865		mutex_enter(SD_MUTEX(un));
5866	}
5867
5868	if (un->un_reset_throttle_timeid != NULL) {
5869		timeout_id_t temp_id = un->un_reset_throttle_timeid;
5870		un->un_reset_throttle_timeid = NULL;
5871		mutex_exit(SD_MUTEX(un));
5872		(void) untimeout(temp_id);
5873		mutex_enter(SD_MUTEX(un));
5874	}
5875
5876	if (un->un_dcvb_timeid != NULL) {
5877		timeout_id_t temp_id = un->un_dcvb_timeid;
5878		un->un_dcvb_timeid = NULL;
5879		mutex_exit(SD_MUTEX(un));
5880		(void) untimeout(temp_id);
5881		mutex_enter(SD_MUTEX(un));
5882	}
5883
5884	mutex_enter(&un->un_pm_mutex);
5885	if (un->un_pm_timeid != NULL) {
5886		timeout_id_t temp_id = un->un_pm_timeid;
5887		un->un_pm_timeid = NULL;
5888		mutex_exit(&un->un_pm_mutex);
5889		mutex_exit(SD_MUTEX(un));
5890		(void) untimeout(temp_id);
5891		mutex_enter(SD_MUTEX(un));
5892	} else {
5893		mutex_exit(&un->un_pm_mutex);
5894	}
5895
5896	if (un->un_retry_timeid != NULL) {
5897		timeout_id_t temp_id = un->un_retry_timeid;
5898		un->un_retry_timeid = NULL;
5899		mutex_exit(SD_MUTEX(un));
5900		(void) untimeout(temp_id);
5901		mutex_enter(SD_MUTEX(un));
5902
5903		if (un->un_retry_bp != NULL) {
5904			un->un_retry_bp->av_forw = un->un_waitq_headp;
5905			un->un_waitq_headp = un->un_retry_bp;
5906			if (un->un_waitq_tailp == NULL) {
5907				un->un_waitq_tailp = un->un_retry_bp;
5908			}
5909			un->un_retry_bp = NULL;
5910			un->un_retry_statp = NULL;
5911		}
5912	}
5913
5914	if (un->un_direct_priority_timeid != NULL) {
5915		timeout_id_t temp_id = un->un_direct_priority_timeid;
5916		un->un_direct_priority_timeid = NULL;
5917		mutex_exit(SD_MUTEX(un));
5918		(void) untimeout(temp_id);
5919		mutex_enter(SD_MUTEX(un));
5920	}
5921
5922	if (un->un_f_is_fibre == TRUE) {
5923		/*
5924		 * Remove callbacks for insert and remove events
5925		 */
5926		if (un->un_insert_event != NULL) {
5927			mutex_exit(SD_MUTEX(un));
5928			(void) ddi_remove_event_handler(un->un_insert_cb_id);
5929			mutex_enter(SD_MUTEX(un));
5930			un->un_insert_event = NULL;
5931		}
5932
5933		if (un->un_remove_event != NULL) {
5934			mutex_exit(SD_MUTEX(un));
5935			(void) ddi_remove_event_handler(un->un_remove_cb_id);
5936			mutex_enter(SD_MUTEX(un));
5937			un->un_remove_event = NULL;
5938		}
5939	}
5940
5941	mutex_exit(SD_MUTEX(un));
5942
5943	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exit\n");
5944
5945	return (DDI_SUCCESS);
5946}
5947
5948
5949/*
5950 *    Function: sd_ddi_pm_suspend
5951 *
5952 * Description: Set the drive state to low power.
5953 *		Someone else is required to actually change the drive
5954 *		power level.
5955 *
5956 *   Arguments: un - driver soft state (unit) structure
5957 *
5958 * Return Code: DDI_FAILURE or DDI_SUCCESS
5959 *
5960 *     Context: Kernel thread context
5961 */
5962
5963static int
5964sd_ddi_pm_suspend(struct sd_lun *un)
5965{
5966	ASSERT(un != NULL);
5967	SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: entry\n");
5968
5969	ASSERT(!mutex_owned(SD_MUTEX(un)));
5970	mutex_enter(SD_MUTEX(un));
5971
5972	/*
5973	 * Exit if power management is not enabled for this device, or if
5974	 * the device is being used by HA.
5975	 */
5976	if ((un->un_f_pm_is_enabled == FALSE) || (un->un_resvd_status &
5977	    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE))) {
5978		mutex_exit(SD_MUTEX(un));
5979		SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: exiting\n");
5980		return (DDI_SUCCESS);
5981	}
5982
5983	SD_INFO(SD_LOG_POWER, un, "sd_ddi_pm_suspend: un_ncmds_in_driver=%ld\n",
5984	    un->un_ncmds_in_driver);
5985
5986	/*
5987	 * See if the device is not busy, ie.:
5988	 *    - we have no commands in the driver for this device
5989	 *    - not waiting for resources
5990	 */
5991	if ((un->un_ncmds_in_driver == 0) &&
5992	    (un->un_state != SD_STATE_RWAIT)) {
5993		/*
5994		 * The device is not busy, so it is OK to go to low power state.
5995		 * Indicate low power, but rely on someone else to actually
5996		 * change it.
5997		 */
5998		mutex_enter(&un->un_pm_mutex);
5999		un->un_pm_count = -1;
6000		mutex_exit(&un->un_pm_mutex);
6001		un->un_power_level = SD_SPINDLE_OFF;
6002	}
6003
6004	mutex_exit(SD_MUTEX(un));
6005
6006	SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: exit\n");
6007
6008	return (DDI_SUCCESS);
6009}
6010
6011
6012/*
6013 *    Function: sd_ddi_resume
6014 *
6015 * Description: Performs system power-up operations..
6016 *
6017 * Return Code: DDI_SUCCESS
6018 *		DDI_FAILURE
6019 *
6020 *     Context: Kernel thread context
6021 */
6022
6023static int
6024sd_ddi_resume(dev_info_t *devi)
6025{
6026	struct	sd_lun	*un;
6027
6028	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
6029	if (un == NULL) {
6030		return (DDI_FAILURE);
6031	}
6032
6033	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: entry\n");
6034
6035	mutex_enter(SD_MUTEX(un));
6036	Restore_state(un);
6037
6038	/*
6039	 * Restore the state which was saved to give the
6040	 * the right state in un_last_state
6041	 */
6042	un->un_last_state = un->un_save_state;
6043	/*
6044	 * Note: throttle comes back at full.
6045	 * Also note: this MUST be done before calling pm_raise_power
6046	 * otherwise the system can get hung in biowait. The scenario where
6047	 * this'll happen is under cpr suspend. Writing of the system
6048	 * state goes through sddump, which writes 0 to un_throttle. If
6049	 * writing the system state then fails, example if the partition is
6050	 * too small, then cpr attempts a resume. If throttle isn't restored
6051	 * from the saved value until after calling pm_raise_power then
6052	 * cmds sent in sdpower are not transported and sd_send_scsi_cmd hangs
6053	 * in biowait.
6054	 */
6055	un->un_throttle = un->un_saved_throttle;
6056
6057	/*
6058	 * The chance of failure is very rare as the only command done in power
6059	 * entry point is START command when you transition from 0->1 or
6060	 * unknown->1. Put it to SPINDLE ON state irrespective of the state at
6061	 * which suspend was done. Ignore the return value as the resume should
6062	 * not be failed. In the case of removable media the media need not be
6063	 * inserted and hence there is a chance that raise power will fail with
6064	 * media not present.
6065	 */
6066	if (un->un_f_attach_spinup) {
6067		mutex_exit(SD_MUTEX(un));
6068		(void) pm_raise_power(SD_DEVINFO(un), 0, SD_SPINDLE_ON);
6069		mutex_enter(SD_MUTEX(un));
6070	}
6071
6072	/*
6073	 * Don't broadcast to the suspend cv and therefore possibly
6074	 * start I/O until after power has been restored.
6075	 */
6076	cv_broadcast(&un->un_suspend_cv);
6077	cv_broadcast(&un->un_state_cv);
6078
6079	/* restart thread */
6080	if (SD_OK_TO_RESUME_SCSI_WATCHER(un)) {
6081		scsi_watch_resume(un->un_swr_token);
6082	}
6083
6084#if (defined(__fibre))
6085	if (un->un_f_is_fibre == TRUE) {
6086		/*
6087		 * Add callbacks for insert and remove events
6088		 */
6089		if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
6090			sd_init_event_callbacks(un);
6091		}
6092	}
6093#endif
6094
6095	/*
6096	 * Transport any pending commands to the target.
6097	 *
6098	 * If this is a low-activity device commands in queue will have to wait
6099	 * until new commands come in, which may take awhile. Also, we
6100	 * specifically don't check un_ncmds_in_transport because we know that
6101	 * there really are no commands in progress after the unit was
6102	 * suspended and we could have reached the throttle level, been
6103	 * suspended, and have no new commands coming in for awhile. Highly
6104	 * unlikely, but so is the low-activity disk scenario.
6105	 */
6106	ddi_xbuf_dispatch(un->un_xbuf_attr);
6107
6108	sd_start_cmds(un, NULL);
6109	mutex_exit(SD_MUTEX(un));
6110
6111	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: exit\n");
6112
6113	return (DDI_SUCCESS);
6114}
6115
6116
6117/*
6118 *    Function: sd_ddi_pm_resume
6119 *
6120 * Description: Set the drive state to powered on.
6121 *		Someone else is required to actually change the drive
6122 *		power level.
6123 *
6124 *   Arguments: un - driver soft state (unit) structure
6125 *
6126 * Return Code: DDI_SUCCESS
6127 *
6128 *     Context: Kernel thread context
6129 */
6130
6131static int
6132sd_ddi_pm_resume(struct sd_lun *un)
6133{
6134	ASSERT(un != NULL);
6135
6136	ASSERT(!mutex_owned(SD_MUTEX(un)));
6137	mutex_enter(SD_MUTEX(un));
6138	un->un_power_level = SD_SPINDLE_ON;
6139
6140	ASSERT(!mutex_owned(&un->un_pm_mutex));
6141	mutex_enter(&un->un_pm_mutex);
6142	if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
6143		un->un_pm_count++;
6144		ASSERT(un->un_pm_count == 0);
6145		/*
6146		 * Note: no longer do the cv_broadcast on un_suspend_cv. The
6147		 * un_suspend_cv is for a system resume, not a power management
6148		 * device resume. (4297749)
6149		 *	 cv_broadcast(&un->un_suspend_cv);
6150		 */
6151	}
6152	mutex_exit(&un->un_pm_mutex);
6153	mutex_exit(SD_MUTEX(un));
6154
6155	return (DDI_SUCCESS);
6156}
6157
6158
6159/*
6160 *    Function: sd_pm_idletimeout_handler
6161 *
6162 * Description: A timer routine that's active only while a device is busy.
6163 *		The purpose is to extend slightly the pm framework's busy
6164 *		view of the device to prevent busy/idle thrashing for
6165 *		back-to-back commands. Do this by comparing the current time
6166 *		to the time at which the last command completed and when the
6167 *		difference is greater than sd_pm_idletime, call
6168 *		pm_idle_component. In addition to indicating idle to the pm
6169 *		framework, update the chain type to again use the internal pm
6170 *		layers of the driver.
6171 *
6172 *   Arguments: arg - driver soft state (unit) structure
6173 *
6174 *     Context: Executes in a timeout(9F) thread context
6175 */
6176
6177static void
6178sd_pm_idletimeout_handler(void *arg)
6179{
6180	struct sd_lun *un = arg;
6181
6182	time_t	now;
6183
6184	mutex_enter(&sd_detach_mutex);
6185	if (un->un_detach_count != 0) {
6186		/* Abort if the instance is detaching */
6187		mutex_exit(&sd_detach_mutex);
6188		return;
6189	}
6190	mutex_exit(&sd_detach_mutex);
6191
6192	now = ddi_get_time();
6193	/*
6194	 * Grab both mutexes, in the proper order, since we're accessing
6195	 * both PM and softstate variables.
6196	 */
6197	mutex_enter(SD_MUTEX(un));
6198	mutex_enter(&un->un_pm_mutex);
6199	if (((now - un->un_pm_idle_time) > sd_pm_idletime) &&
6200	    (un->un_ncmds_in_driver == 0) && (un->un_pm_count == 0)) {
6201		/*
6202		 * Update the chain types.
6203		 * This takes affect on the next new command received.
6204		 */
6205		if (un->un_f_non_devbsize_supported) {
6206			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
6207		} else {
6208			un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
6209		}
6210		un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD;
6211
6212		SD_TRACE(SD_LOG_IO_PM, un,
6213		    "sd_pm_idletimeout_handler: idling device\n");
6214		(void) pm_idle_component(SD_DEVINFO(un), 0);
6215		un->un_pm_idle_timeid = NULL;
6216	} else {
6217		un->un_pm_idle_timeid =
6218		    timeout(sd_pm_idletimeout_handler, un,
6219		    (drv_usectohz((clock_t)300000))); /* 300 ms. */
6220	}
6221	mutex_exit(&un->un_pm_mutex);
6222	mutex_exit(SD_MUTEX(un));
6223}
6224
6225
6226/*
6227 *    Function: sd_pm_timeout_handler
6228 *
6229 * Description: Callback to tell framework we are idle.
6230 *
6231 *     Context: timeout(9f) thread context.
6232 */
6233
6234static void
6235sd_pm_timeout_handler(void *arg)
6236{
6237	struct sd_lun *un = arg;
6238
6239	(void) pm_idle_component(SD_DEVINFO(un), 0);
6240	mutex_enter(&un->un_pm_mutex);
6241	un->un_pm_timeid = NULL;
6242	mutex_exit(&un->un_pm_mutex);
6243}
6244
6245
6246/*
6247 *    Function: sdpower
6248 *
6249 * Description: PM entry point.
6250 *
6251 * Return Code: DDI_SUCCESS
6252 *		DDI_FAILURE
6253 *
6254 *     Context: Kernel thread context
6255 */
6256
6257static int
6258sdpower(dev_info_t *devi, int component, int level)
6259{
6260	struct sd_lun	*un;
6261	int		instance;
6262	int		rval = DDI_SUCCESS;
6263	uint_t		i, log_page_size, maxcycles, ncycles;
6264	uchar_t		*log_page_data;
6265	int		log_sense_page;
6266	int		medium_present;
6267	time_t		intvlp;
6268	dev_t		dev;
6269	struct pm_trans_data	sd_pm_tran_data;
6270	uchar_t		save_state;
6271	int		sval;
6272	uchar_t		state_before_pm;
6273	int		got_semaphore_here;
6274	sd_ssc_t	*ssc;
6275
6276	instance = ddi_get_instance(devi);
6277
6278	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
6279	    (SD_SPINDLE_OFF > level) || (level > SD_SPINDLE_ON) ||
6280	    component != 0) {
6281		return (DDI_FAILURE);
6282	}
6283
6284	dev = sd_make_device(SD_DEVINFO(un));
6285	ssc = sd_ssc_init(un);
6286
6287	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: entry, level = %d\n", level);
6288
6289	/*
6290	 * Must synchronize power down with close.
6291	 * Attempt to decrement/acquire the open/close semaphore,
6292	 * but do NOT wait on it. If it's not greater than zero,
6293	 * ie. it can't be decremented without waiting, then
6294	 * someone else, either open or close, already has it
6295	 * and the try returns 0. Use that knowledge here to determine
6296	 * if it's OK to change the device power level.
6297	 * Also, only increment it on exit if it was decremented, ie. gotten,
6298	 * here.
6299	 */
6300	got_semaphore_here = sema_tryp(&un->un_semoclose);
6301
6302	mutex_enter(SD_MUTEX(un));
6303
6304	SD_INFO(SD_LOG_POWER, un, "sdpower: un_ncmds_in_driver = %ld\n",
6305	    un->un_ncmds_in_driver);
6306
6307	/*
6308	 * If un_ncmds_in_driver is non-zero it indicates commands are
6309	 * already being processed in the driver, or if the semaphore was
6310	 * not gotten here it indicates an open or close is being processed.
6311	 * At the same time somebody is requesting to go low power which
6312	 * can't happen, therefore we need to return failure.
6313	 */
6314	if ((level == SD_SPINDLE_OFF) &&
6315	    ((un->un_ncmds_in_driver != 0) || (got_semaphore_here == 0))) {
6316		mutex_exit(SD_MUTEX(un));
6317
6318		if (got_semaphore_here != 0) {
6319			sema_v(&un->un_semoclose);
6320		}
6321		SD_TRACE(SD_LOG_IO_PM, un,
6322		    "sdpower: exit, device has queued cmds.\n");
6323
6324		goto sdpower_failed;
6325	}
6326
6327	/*
6328	 * if it is OFFLINE that means the disk is completely dead
6329	 * in our case we have to put the disk in on or off by sending commands
6330	 * Of course that will fail anyway so return back here.
6331	 *
6332	 * Power changes to a device that's OFFLINE or SUSPENDED
6333	 * are not allowed.
6334	 */
6335	if ((un->un_state == SD_STATE_OFFLINE) ||
6336	    (un->un_state == SD_STATE_SUSPENDED)) {
6337		mutex_exit(SD_MUTEX(un));
6338
6339		if (got_semaphore_here != 0) {
6340			sema_v(&un->un_semoclose);
6341		}
6342		SD_TRACE(SD_LOG_IO_PM, un,
6343		    "sdpower: exit, device is off-line.\n");
6344
6345		goto sdpower_failed;
6346	}
6347
6348	/*
6349	 * Change the device's state to indicate it's power level
6350	 * is being changed. Do this to prevent a power off in the
6351	 * middle of commands, which is especially bad on devices
6352	 * that are really powered off instead of just spun down.
6353	 */
6354	state_before_pm = un->un_state;
6355	un->un_state = SD_STATE_PM_CHANGING;
6356
6357	mutex_exit(SD_MUTEX(un));
6358
6359	/*
6360	 * If "pm-capable" property is set to TRUE by HBA drivers,
6361	 * bypass the following checking, otherwise, check the log
6362	 * sense information for this device
6363	 */
6364	if ((level == SD_SPINDLE_OFF) && un->un_f_log_sense_supported) {
6365		/*
6366		 * Get the log sense information to understand whether the
6367		 * the powercycle counts have gone beyond the threshhold.
6368		 */
6369		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
6370		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
6371
6372		mutex_enter(SD_MUTEX(un));
6373		log_sense_page = un->un_start_stop_cycle_page;
6374		mutex_exit(SD_MUTEX(un));
6375
6376		rval = sd_send_scsi_LOG_SENSE(ssc, log_page_data,
6377		    log_page_size, log_sense_page, 0x01, 0, SD_PATH_DIRECT);
6378
6379		if (rval != 0) {
6380			if (rval == EIO)
6381				sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
6382			else
6383				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
6384		}
6385
6386#ifdef	SDDEBUG
6387		if (sd_force_pm_supported) {
6388			/* Force a successful result */
6389			rval = 0;
6390		}
6391#endif
6392		if (rval != 0) {
6393			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
6394			    "Log Sense Failed\n");
6395
6396			kmem_free(log_page_data, log_page_size);
6397			/* Cannot support power management on those drives */
6398
6399			if (got_semaphore_here != 0) {
6400				sema_v(&un->un_semoclose);
6401			}
6402			/*
6403			 * On exit put the state back to it's original value
6404			 * and broadcast to anyone waiting for the power
6405			 * change completion.
6406			 */
6407			mutex_enter(SD_MUTEX(un));
6408			un->un_state = state_before_pm;
6409			cv_broadcast(&un->un_suspend_cv);
6410			mutex_exit(SD_MUTEX(un));
6411			SD_TRACE(SD_LOG_IO_PM, un,
6412			    "sdpower: exit, Log Sense Failed.\n");
6413
6414			goto sdpower_failed;
6415		}
6416
6417		/*
6418		 * From the page data - Convert the essential information to
6419		 * pm_trans_data
6420		 */
6421		maxcycles =
6422		    (log_page_data[0x1c] << 24) | (log_page_data[0x1d] << 16) |
6423		    (log_page_data[0x1E] << 8)  | log_page_data[0x1F];
6424
6425		sd_pm_tran_data.un.scsi_cycles.lifemax = maxcycles;
6426
6427		ncycles =
6428		    (log_page_data[0x24] << 24) | (log_page_data[0x25] << 16) |
6429		    (log_page_data[0x26] << 8)  | log_page_data[0x27];
6430
6431		sd_pm_tran_data.un.scsi_cycles.ncycles = ncycles;
6432
6433		for (i = 0; i < DC_SCSI_MFR_LEN; i++) {
6434			sd_pm_tran_data.un.scsi_cycles.svc_date[i] =
6435			    log_page_data[8+i];
6436		}
6437
6438		kmem_free(log_page_data, log_page_size);
6439
6440		/*
6441		 * Call pm_trans_check routine to get the Ok from
6442		 * the global policy
6443		 */
6444
6445		sd_pm_tran_data.format = DC_SCSI_FORMAT;
6446		sd_pm_tran_data.un.scsi_cycles.flag = 0;
6447
6448		rval = pm_trans_check(&sd_pm_tran_data, &intvlp);
6449#ifdef	SDDEBUG
6450		if (sd_force_pm_supported) {
6451			/* Force a successful result */
6452			rval = 1;
6453		}
6454#endif
6455		switch (rval) {
6456		case 0:
6457			/*
6458			 * Not Ok to Power cycle or error in parameters passed
6459			 * Would have given the advised time to consider power
6460			 * cycle. Based on the new intvlp parameter we are
6461			 * supposed to pretend we are busy so that pm framework
6462			 * will never call our power entry point. Because of
6463			 * that install a timeout handler and wait for the
6464			 * recommended time to elapse so that power management
6465			 * can be effective again.
6466			 *
6467			 * To effect this behavior, call pm_busy_component to
6468			 * indicate to the framework this device is busy.
6469			 * By not adjusting un_pm_count the rest of PM in
6470			 * the driver will function normally, and independent
6471			 * of this but because the framework is told the device
6472			 * is busy it won't attempt powering down until it gets
6473			 * a matching idle. The timeout handler sends this.
6474			 * Note: sd_pm_entry can't be called here to do this
6475			 * because sdpower may have been called as a result
6476			 * of a call to pm_raise_power from within sd_pm_entry.
6477			 *
6478			 * If a timeout handler is already active then
6479			 * don't install another.
6480			 */
6481			mutex_enter(&un->un_pm_mutex);
6482			if (un->un_pm_timeid == NULL) {
6483				un->un_pm_timeid =
6484				    timeout(sd_pm_timeout_handler,
6485				    un, intvlp * drv_usectohz(1000000));
6486				mutex_exit(&un->un_pm_mutex);
6487				(void) pm_busy_component(SD_DEVINFO(un), 0);
6488			} else {
6489				mutex_exit(&un->un_pm_mutex);
6490			}
6491			if (got_semaphore_here != 0) {
6492				sema_v(&un->un_semoclose);
6493			}
6494			/*
6495			 * On exit put the state back to it's original value
6496			 * and broadcast to anyone waiting for the power
6497			 * change completion.
6498			 */
6499			mutex_enter(SD_MUTEX(un));
6500			un->un_state = state_before_pm;
6501			cv_broadcast(&un->un_suspend_cv);
6502			mutex_exit(SD_MUTEX(un));
6503
6504			SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, "
6505			    "trans check Failed, not ok to power cycle.\n");
6506
6507			goto sdpower_failed;
6508		case -1:
6509			if (got_semaphore_here != 0) {
6510				sema_v(&un->un_semoclose);
6511			}
6512			/*
6513			 * On exit put the state back to it's original value
6514			 * and broadcast to anyone waiting for the power
6515			 * change completion.
6516			 */
6517			mutex_enter(SD_MUTEX(un));
6518			un->un_state = state_before_pm;
6519			cv_broadcast(&un->un_suspend_cv);
6520			mutex_exit(SD_MUTEX(un));
6521			SD_TRACE(SD_LOG_IO_PM, un,
6522			    "sdpower: exit, trans check command Failed.\n");
6523
6524			goto sdpower_failed;
6525		}
6526	}
6527
6528	if (level == SD_SPINDLE_OFF) {
6529		/*
6530		 * Save the last state... if the STOP FAILS we need it
6531		 * for restoring
6532		 */
6533		mutex_enter(SD_MUTEX(un));
6534		save_state = un->un_last_state;
6535		/*
6536		 * There must not be any cmds. getting processed
6537		 * in the driver when we get here. Power to the
6538		 * device is potentially going off.
6539		 */
6540		ASSERT(un->un_ncmds_in_driver == 0);
6541		mutex_exit(SD_MUTEX(un));
6542
6543		/*
6544		 * For now suspend the device completely before spindle is
6545		 * turned off
6546		 */
6547		if ((rval = sd_ddi_pm_suspend(un)) == DDI_FAILURE) {
6548			if (got_semaphore_here != 0) {
6549				sema_v(&un->un_semoclose);
6550			}
6551			/*
6552			 * On exit put the state back to it's original value
6553			 * and broadcast to anyone waiting for the power
6554			 * change completion.
6555			 */
6556			mutex_enter(SD_MUTEX(un));
6557			un->un_state = state_before_pm;
6558			cv_broadcast(&un->un_suspend_cv);
6559			mutex_exit(SD_MUTEX(un));
6560			SD_TRACE(SD_LOG_IO_PM, un,
6561			    "sdpower: exit, PM suspend Failed.\n");
6562
6563			goto sdpower_failed;
6564		}
6565	}
6566
6567	/*
6568	 * The transition from SPINDLE_OFF to SPINDLE_ON can happen in open,
6569	 * close, or strategy. Dump no long uses this routine, it uses it's
6570	 * own code so it can be done in polled mode.
6571	 */
6572
6573	medium_present = TRUE;
6574
6575	/*
6576	 * When powering up, issue a TUR in case the device is at unit
6577	 * attention.  Don't do retries. Bypass the PM layer, otherwise
6578	 * a deadlock on un_pm_busy_cv will occur.
6579	 */
6580	if (level == SD_SPINDLE_ON) {
6581		sval = sd_send_scsi_TEST_UNIT_READY(ssc,
6582		    SD_DONT_RETRY_TUR | SD_BYPASS_PM);
6583		if (sval != 0)
6584			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
6585	}
6586
6587	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: sending \'%s\' unit\n",
6588	    ((level == SD_SPINDLE_ON) ? "START" : "STOP"));
6589
6590	sval = sd_send_scsi_START_STOP_UNIT(ssc,
6591	    ((level == SD_SPINDLE_ON) ? SD_TARGET_START : SD_TARGET_STOP),
6592	    SD_PATH_DIRECT);
6593	if (sval != 0) {
6594		if (sval == EIO)
6595			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
6596		else
6597			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
6598	}
6599
6600	/* Command failed, check for media present. */
6601	if ((sval == ENXIO) && un->un_f_has_removable_media) {
6602		medium_present = FALSE;
6603	}
6604
6605	/*
6606	 * The conditions of interest here are:
6607	 *   if a spindle off with media present fails,
6608	 *	then restore the state and return an error.
6609	 *   else if a spindle on fails,
6610	 *	then return an error (there's no state to restore).
6611	 * In all other cases we setup for the new state
6612	 * and return success.
6613	 */
6614	switch (level) {
6615	case SD_SPINDLE_OFF:
6616		if ((medium_present == TRUE) && (sval != 0)) {
6617			/* The stop command from above failed */
6618			rval = DDI_FAILURE;
6619			/*
6620			 * The stop command failed, and we have media
6621			 * present. Put the level back by calling the
6622			 * sd_pm_resume() and set the state back to
6623			 * it's previous value.
6624			 */
6625			(void) sd_ddi_pm_resume(un);
6626			mutex_enter(SD_MUTEX(un));
6627			un->un_last_state = save_state;
6628			mutex_exit(SD_MUTEX(un));
6629			break;
6630		}
6631		/*
6632		 * The stop command from above succeeded.
6633		 */
6634		if (un->un_f_monitor_media_state) {
6635			/*
6636			 * Terminate watch thread in case of removable media
6637			 * devices going into low power state. This is as per
6638			 * the requirements of pm framework, otherwise commands
6639			 * will be generated for the device (through watch
6640			 * thread), even when the device is in low power state.
6641			 */
6642			mutex_enter(SD_MUTEX(un));
6643			un->un_f_watcht_stopped = FALSE;
6644			if (un->un_swr_token != NULL) {
6645				opaque_t temp_token = un->un_swr_token;
6646				un->un_f_watcht_stopped = TRUE;
6647				un->un_swr_token = NULL;
6648				mutex_exit(SD_MUTEX(un));
6649				(void) scsi_watch_request_terminate(temp_token,
6650				    SCSI_WATCH_TERMINATE_ALL_WAIT);
6651			} else {
6652				mutex_exit(SD_MUTEX(un));
6653			}
6654		}
6655		break;
6656
6657	default:	/* The level requested is spindle on... */
6658		/*
6659		 * Legacy behavior: return success on a failed spinup
6660		 * if there is no media in the drive.
6661		 * Do this by looking at medium_present here.
6662		 */
6663		if ((sval != 0) && medium_present) {
6664			/* The start command from above failed */
6665			rval = DDI_FAILURE;
6666			break;
6667		}
6668		/*
6669		 * The start command from above succeeded
6670		 * Resume the devices now that we have
6671		 * started the disks
6672		 */
6673		(void) sd_ddi_pm_resume(un);
6674
6675		/*
6676		 * Resume the watch thread since it was suspended
6677		 * when the device went into low power mode.
6678		 */
6679		if (un->un_f_monitor_media_state) {
6680			mutex_enter(SD_MUTEX(un));
6681			if (un->un_f_watcht_stopped == TRUE) {
6682				opaque_t temp_token;
6683
6684				un->un_f_watcht_stopped = FALSE;
6685				mutex_exit(SD_MUTEX(un));
6686				temp_token = scsi_watch_request_submit(
6687				    SD_SCSI_DEVP(un),
6688				    sd_check_media_time,
6689				    SENSE_LENGTH, sd_media_watch_cb,
6690				    (caddr_t)dev);
6691				mutex_enter(SD_MUTEX(un));
6692				un->un_swr_token = temp_token;
6693			}
6694			mutex_exit(SD_MUTEX(un));
6695		}
6696	}
6697	if (got_semaphore_here != 0) {
6698		sema_v(&un->un_semoclose);
6699	}
6700	/*
6701	 * On exit put the state back to it's original value
6702	 * and broadcast to anyone waiting for the power
6703	 * change completion.
6704	 */
6705	mutex_enter(SD_MUTEX(un));
6706	un->un_state = state_before_pm;
6707	cv_broadcast(&un->un_suspend_cv);
6708	mutex_exit(SD_MUTEX(un));
6709
6710	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, status = 0x%x\n", rval);
6711
6712	sd_ssc_fini(ssc);
6713	return (rval);
6714
6715sdpower_failed:
6716
6717	sd_ssc_fini(ssc);
6718	return (DDI_FAILURE);
6719}
6720
6721
6722
6723/*
6724 *    Function: sdattach
6725 *
6726 * Description: Driver's attach(9e) entry point function.
6727 *
6728 *   Arguments: devi - opaque device info handle
6729 *		cmd  - attach  type
6730 *
6731 * Return Code: DDI_SUCCESS
6732 *		DDI_FAILURE
6733 *
6734 *     Context: Kernel thread context
6735 */
6736
6737static int
6738sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd)
6739{
6740	switch (cmd) {
6741	case DDI_ATTACH:
6742		return (sd_unit_attach(devi));
6743	case DDI_RESUME:
6744		return (sd_ddi_resume(devi));
6745	default:
6746		break;
6747	}
6748	return (DDI_FAILURE);
6749}
6750
6751
6752/*
6753 *    Function: sddetach
6754 *
6755 * Description: Driver's detach(9E) entry point function.
6756 *
6757 *   Arguments: devi - opaque device info handle
6758 *		cmd  - detach  type
6759 *
6760 * Return Code: DDI_SUCCESS
6761 *		DDI_FAILURE
6762 *
6763 *     Context: Kernel thread context
6764 */
6765
6766static int
6767sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd)
6768{
6769	switch (cmd) {
6770	case DDI_DETACH:
6771		return (sd_unit_detach(devi));
6772	case DDI_SUSPEND:
6773		return (sd_ddi_suspend(devi));
6774	default:
6775		break;
6776	}
6777	return (DDI_FAILURE);
6778}
6779
6780
6781/*
6782 *     Function: sd_sync_with_callback
6783 *
6784 *  Description: Prevents sd_unit_attach or sd_unit_detach from freeing the soft
6785 *		 state while the callback routine is active.
6786 *
6787 *    Arguments: un: softstate structure for the instance
6788 *
6789 *	Context: Kernel thread context
6790 */
6791
6792static void
6793sd_sync_with_callback(struct sd_lun *un)
6794{
6795	ASSERT(un != NULL);
6796
6797	mutex_enter(SD_MUTEX(un));
6798
6799	ASSERT(un->un_in_callback >= 0);
6800
6801	while (un->un_in_callback > 0) {
6802		mutex_exit(SD_MUTEX(un));
6803		delay(2);
6804		mutex_enter(SD_MUTEX(un));
6805	}
6806
6807	mutex_exit(SD_MUTEX(un));
6808}
6809
6810/*
6811 *    Function: sd_unit_attach
6812 *
6813 * Description: Performs DDI_ATTACH processing for sdattach(). Allocates
6814 *		the soft state structure for the device and performs
6815 *		all necessary structure and device initializations.
6816 *
6817 *   Arguments: devi: the system's dev_info_t for the device.
6818 *
6819 * Return Code: DDI_SUCCESS if attach is successful.
6820 *		DDI_FAILURE if any part of the attach fails.
6821 *
6822 *     Context: Called at attach(9e) time for the DDI_ATTACH flag.
6823 *		Kernel thread context only.  Can sleep.
6824 */
6825
6826static int
6827sd_unit_attach(dev_info_t *devi)
6828{
6829	struct	scsi_device	*devp;
6830	struct	sd_lun		*un;
6831	char			*variantp;
6832	int	reservation_flag = SD_TARGET_IS_UNRESERVED;
6833	int	instance;
6834	int	rval;
6835	int	wc_enabled;
6836	int	tgt;
6837	uint64_t	capacity;
6838	uint_t		lbasize = 0;
6839	dev_info_t	*pdip = ddi_get_parent(devi);
6840	int		offbyone = 0;
6841	int		geom_label_valid = 0;
6842	sd_ssc_t	*ssc;
6843	int		status;
6844	struct sd_fm_internal	*sfip = NULL;
6845	int		max_xfer_size;
6846
6847	/*
6848	 * Retrieve the target driver's private data area. This was set
6849	 * up by the HBA.
6850	 */
6851	devp = ddi_get_driver_private(devi);
6852
6853	/*
6854	 * Retrieve the target ID of the device.
6855	 */
6856	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
6857	    SCSI_ADDR_PROP_TARGET, -1);
6858
6859	/*
6860	 * Since we have no idea what state things were left in by the last
6861	 * user of the device, set up some 'default' settings, ie. turn 'em
6862	 * off. The scsi_ifsetcap calls force re-negotiations with the drive.
6863	 * Do this before the scsi_probe, which sends an inquiry.
6864	 * This is a fix for bug (4430280).
6865	 * Of special importance is wide-xfer. The drive could have been left
6866	 * in wide transfer mode by the last driver to communicate with it,
6867	 * this includes us. If that's the case, and if the following is not
6868	 * setup properly or we don't re-negotiate with the drive prior to
6869	 * transferring data to/from the drive, it causes bus parity errors,
6870	 * data overruns, and unexpected interrupts. This first occurred when
6871	 * the fix for bug (4378686) was made.
6872	 */
6873	(void) scsi_ifsetcap(&devp->sd_address, "lun-reset", 0, 1);
6874	(void) scsi_ifsetcap(&devp->sd_address, "wide-xfer", 0, 1);
6875	(void) scsi_ifsetcap(&devp->sd_address, "auto-rqsense", 0, 1);
6876
6877	/*
6878	 * Currently, scsi_ifsetcap sets tagged-qing capability for all LUNs
6879	 * on a target. Setting it per lun instance actually sets the
6880	 * capability of this target, which affects those luns already
6881	 * attached on the same target. So during attach, we can only disable
6882	 * this capability only when no other lun has been attached on this
6883	 * target. By doing this, we assume a target has the same tagged-qing
6884	 * capability for every lun. The condition can be removed when HBA
6885	 * is changed to support per lun based tagged-qing capability.
6886	 */
6887	if (sd_scsi_get_target_lun_count(pdip, tgt) < 1) {
6888		(void) scsi_ifsetcap(&devp->sd_address, "tagged-qing", 0, 1);
6889	}
6890
6891	/*
6892	 * Use scsi_probe() to issue an INQUIRY command to the device.
6893	 * This call will allocate and fill in the scsi_inquiry structure
6894	 * and point the sd_inq member of the scsi_device structure to it.
6895	 * If the attach succeeds, then this memory will not be de-allocated
6896	 * (via scsi_unprobe()) until the instance is detached.
6897	 */
6898	if (scsi_probe(devp, SLEEP_FUNC) != SCSIPROBE_EXISTS) {
6899		goto probe_failed;
6900	}
6901
6902	/*
6903	 * Check the device type as specified in the inquiry data and
6904	 * claim it if it is of a type that we support.
6905	 */
6906	switch (devp->sd_inq->inq_dtype) {
6907	case DTYPE_DIRECT:
6908		break;
6909	case DTYPE_RODIRECT:
6910		break;
6911	case DTYPE_OPTICAL:
6912		break;
6913	case DTYPE_NOTPRESENT:
6914	default:
6915		/* Unsupported device type; fail the attach. */
6916		goto probe_failed;
6917	}
6918
6919	/*
6920	 * Allocate the soft state structure for this unit.
6921	 *
6922	 * We rely upon this memory being set to all zeroes by
6923	 * ddi_soft_state_zalloc().  We assume that any member of the
6924	 * soft state structure that is not explicitly initialized by
6925	 * this routine will have a value of zero.
6926	 */
6927	instance = ddi_get_instance(devp->sd_dev);
6928#ifndef XPV_HVM_DRIVER
6929	if (ddi_soft_state_zalloc(sd_state, instance) != DDI_SUCCESS) {
6930		goto probe_failed;
6931	}
6932#endif /* !XPV_HVM_DRIVER */
6933
6934	/*
6935	 * Retrieve a pointer to the newly-allocated soft state.
6936	 *
6937	 * This should NEVER fail if the ddi_soft_state_zalloc() call above
6938	 * was successful, unless something has gone horribly wrong and the
6939	 * ddi's soft state internals are corrupt (in which case it is
6940	 * probably better to halt here than just fail the attach....)
6941	 */
6942	if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
6943		panic("sd_unit_attach: NULL soft state on instance:0x%x",
6944		    instance);
6945		/*NOTREACHED*/
6946	}
6947
6948	/*
6949	 * Link the back ptr of the driver soft state to the scsi_device
6950	 * struct for this lun.
6951	 * Save a pointer to the softstate in the driver-private area of
6952	 * the scsi_device struct.
6953	 * Note: We cannot call SD_INFO, SD_TRACE, SD_ERROR, or SD_DIAG until
6954	 * we first set un->un_sd below.
6955	 */
6956	un->un_sd = devp;
6957	devp->sd_private = (opaque_t)un;
6958
6959	/*
6960	 * The following must be after devp is stored in the soft state struct.
6961	 */
6962#ifdef SDDEBUG
6963	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
6964	    "%s_unit_attach: un:0x%p instance:%d\n",
6965	    ddi_driver_name(devi), un, instance);
6966#endif
6967
6968	/*
6969	 * Set up the device type and node type (for the minor nodes).
6970	 * By default we assume that the device can at least support the
6971	 * Common Command Set. Call it a CD-ROM if it reports itself
6972	 * as a RODIRECT device.
6973	 */
6974	switch (devp->sd_inq->inq_dtype) {
6975	case DTYPE_RODIRECT:
6976		un->un_node_type = DDI_NT_CD_CHAN;
6977		un->un_ctype	 = CTYPE_CDROM;
6978		break;
6979	case DTYPE_OPTICAL:
6980		un->un_node_type = DDI_NT_BLOCK_CHAN;
6981		un->un_ctype	 = CTYPE_ROD;
6982		break;
6983	default:
6984		un->un_node_type = DDI_NT_BLOCK_CHAN;
6985		un->un_ctype	 = CTYPE_CCS;
6986		break;
6987	}
6988
6989	/*
6990	 * Try to read the interconnect type from the HBA.
6991	 *
6992	 * Note: This driver is currently compiled as two binaries, a parallel
6993	 * scsi version (sd) and a fibre channel version (ssd). All functional
6994	 * differences are determined at compile time. In the future a single
6995	 * binary will be provided and the interconnect type will be used to
6996	 * differentiate between fibre and parallel scsi behaviors. At that time
6997	 * it will be necessary for all fibre channel HBAs to support this
6998	 * property.
6999	 *
7000	 * set un_f_is_fiber to TRUE ( default fiber )
7001	 */
7002	un->un_f_is_fibre = TRUE;
7003	switch (scsi_ifgetcap(SD_ADDRESS(un), "interconnect-type", -1)) {
7004	case INTERCONNECT_SSA:
7005		un->un_interconnect_type = SD_INTERCONNECT_SSA;
7006		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7007		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SSA\n", un);
7008		break;
7009	case INTERCONNECT_PARALLEL:
7010		un->un_f_is_fibre = FALSE;
7011		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
7012		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7013		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_PARALLEL\n", un);
7014		break;
7015	case INTERCONNECT_SATA:
7016		un->un_f_is_fibre = FALSE;
7017		un->un_interconnect_type = SD_INTERCONNECT_SATA;
7018		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7019		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SATA\n", un);
7020		break;
7021	case INTERCONNECT_FIBRE:
7022		un->un_interconnect_type = SD_INTERCONNECT_FIBRE;
7023		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7024		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FIBRE\n", un);
7025		break;
7026	case INTERCONNECT_FABRIC:
7027		un->un_interconnect_type = SD_INTERCONNECT_FABRIC;
7028		un->un_node_type = DDI_NT_BLOCK_FABRIC;
7029		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7030		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FABRIC\n", un);
7031		break;
7032	default:
7033#ifdef SD_DEFAULT_INTERCONNECT_TYPE
7034		/*
7035		 * The HBA does not support the "interconnect-type" property
7036		 * (or did not provide a recognized type).
7037		 *
7038		 * Note: This will be obsoleted when a single fibre channel
7039		 * and parallel scsi driver is delivered. In the meantime the
7040		 * interconnect type will be set to the platform default.If that
7041		 * type is not parallel SCSI, it means that we should be
7042		 * assuming "ssd" semantics. However, here this also means that
7043		 * the FC HBA is not supporting the "interconnect-type" property
7044		 * like we expect it to, so log this occurrence.
7045		 */
7046		un->un_interconnect_type = SD_DEFAULT_INTERCONNECT_TYPE;
7047		if (!SD_IS_PARALLEL_SCSI(un)) {
7048			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7049			    "sd_unit_attach: un:0x%p Assuming "
7050			    "INTERCONNECT_FIBRE\n", un);
7051		} else {
7052			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7053			    "sd_unit_attach: un:0x%p Assuming "
7054			    "INTERCONNECT_PARALLEL\n", un);
7055			un->un_f_is_fibre = FALSE;
7056		}
7057#else
7058		/*
7059		 * Note: This source will be implemented when a single fibre
7060		 * channel and parallel scsi driver is delivered. The default
7061		 * will be to assume that if a device does not support the
7062		 * "interconnect-type" property it is a parallel SCSI HBA and
7063		 * we will set the interconnect type for parallel scsi.
7064		 */
7065		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
7066		un->un_f_is_fibre = FALSE;
7067#endif
7068		break;
7069	}
7070
7071	if (un->un_f_is_fibre == TRUE) {
7072		if (scsi_ifgetcap(SD_ADDRESS(un), "scsi-version", 1) ==
7073		    SCSI_VERSION_3) {
7074			switch (un->un_interconnect_type) {
7075			case SD_INTERCONNECT_FIBRE:
7076			case SD_INTERCONNECT_SSA:
7077				un->un_node_type = DDI_NT_BLOCK_WWN;
7078				break;
7079			default:
7080				break;
7081			}
7082		}
7083	}
7084
7085	/*
7086	 * Initialize the Request Sense command for the target
7087	 */
7088	if (sd_alloc_rqs(devp, un) != DDI_SUCCESS) {
7089		goto alloc_rqs_failed;
7090	}
7091
7092	/*
7093	 * Set un_retry_count with SD_RETRY_COUNT, this is ok for Sparc
7094	 * with separate binary for sd and ssd.
7095	 *
7096	 * x86 has 1 binary, un_retry_count is set base on connection type.
7097	 * The hardcoded values will go away when Sparc uses 1 binary
7098	 * for sd and ssd.  This hardcoded values need to match
7099	 * SD_RETRY_COUNT in sddef.h
7100	 * The value used is base on interconnect type.
7101	 * fibre = 3, parallel = 5
7102	 */
7103#if defined(__i386) || defined(__amd64)
7104	un->un_retry_count = un->un_f_is_fibre ? 3 : 5;
7105#else
7106	un->un_retry_count = SD_RETRY_COUNT;
7107#endif
7108
7109	/*
7110	 * Set the per disk retry count to the default number of retries
7111	 * for disks and CDROMs. This value can be overridden by the
7112	 * disk property list or an entry in sd.conf.
7113	 */
7114	un->un_notready_retry_count =
7115	    ISCD(un) ? CD_NOT_READY_RETRY_COUNT(un)
7116	    : DISK_NOT_READY_RETRY_COUNT(un);
7117
7118	/*
7119	 * Set the busy retry count to the default value of un_retry_count.
7120	 * This can be overridden by entries in sd.conf or the device
7121	 * config table.
7122	 */
7123	un->un_busy_retry_count = un->un_retry_count;
7124
7125	/*
7126	 * Init the reset threshold for retries.  This number determines
7127	 * how many retries must be performed before a reset can be issued
7128	 * (for certain error conditions). This can be overridden by entries
7129	 * in sd.conf or the device config table.
7130	 */
7131	un->un_reset_retry_count = (un->un_retry_count / 2);
7132
7133	/*
7134	 * Set the victim_retry_count to the default un_retry_count
7135	 */
7136	un->un_victim_retry_count = (2 * un->un_retry_count);
7137
7138	/*
7139	 * Set the reservation release timeout to the default value of
7140	 * 5 seconds. This can be overridden by entries in ssd.conf or the
7141	 * device config table.
7142	 */
7143	un->un_reserve_release_time = 5;
7144
7145	/*
7146	 * Set up the default maximum transfer size. Note that this may
7147	 * get updated later in the attach, when setting up default wide
7148	 * operations for disks.
7149	 */
7150#if defined(__i386) || defined(__amd64)
7151	un->un_max_xfer_size = (uint_t)SD_DEFAULT_MAX_XFER_SIZE;
7152	un->un_partial_dma_supported = 1;
7153#else
7154	un->un_max_xfer_size = (uint_t)maxphys;
7155#endif
7156
7157	/*
7158	 * Get "allow bus device reset" property (defaults to "enabled" if
7159	 * the property was not defined). This is to disable bus resets for
7160	 * certain kinds of error recovery. Note: In the future when a run-time
7161	 * fibre check is available the soft state flag should default to
7162	 * enabled.
7163	 */
7164	if (un->un_f_is_fibre == TRUE) {
7165		un->un_f_allow_bus_device_reset = TRUE;
7166	} else {
7167		if (ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
7168		    "allow-bus-device-reset", 1) != 0) {
7169			un->un_f_allow_bus_device_reset = TRUE;
7170			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7171			    "sd_unit_attach: un:0x%p Bus device reset "
7172			    "enabled\n", un);
7173		} else {
7174			un->un_f_allow_bus_device_reset = FALSE;
7175			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7176			    "sd_unit_attach: un:0x%p Bus device reset "
7177			    "disabled\n", un);
7178		}
7179	}
7180
7181	/*
7182	 * Check if this is an ATAPI device. ATAPI devices use Group 1
7183	 * Read/Write commands and Group 2 Mode Sense/Select commands.
7184	 *
7185	 * Note: The "obsolete" way of doing this is to check for the "atapi"
7186	 * property. The new "variant" property with a value of "atapi" has been
7187	 * introduced so that future 'variants' of standard SCSI behavior (like
7188	 * atapi) could be specified by the underlying HBA drivers by supplying
7189	 * a new value for the "variant" property, instead of having to define a
7190	 * new property.
7191	 */
7192	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "atapi", -1) != -1) {
7193		un->un_f_cfg_is_atapi = TRUE;
7194		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7195		    "sd_unit_attach: un:0x%p Atapi device\n", un);
7196	}
7197	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, devi, 0, "variant",
7198	    &variantp) == DDI_PROP_SUCCESS) {
7199		if (strcmp(variantp, "atapi") == 0) {
7200			un->un_f_cfg_is_atapi = TRUE;
7201			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7202			    "sd_unit_attach: un:0x%p Atapi device\n", un);
7203		}
7204		ddi_prop_free(variantp);
7205	}
7206
7207	un->un_cmd_timeout	= SD_IO_TIME;
7208
7209	un->un_busy_timeout  = SD_BSY_TIMEOUT;
7210
7211	/* Info on current states, statuses, etc. (Updated frequently) */
7212	un->un_state		= SD_STATE_NORMAL;
7213	un->un_last_state	= SD_STATE_NORMAL;
7214
7215	/* Control & status info for command throttling */
7216	un->un_throttle		= sd_max_throttle;
7217	un->un_saved_throttle	= sd_max_throttle;
7218	un->un_min_throttle	= sd_min_throttle;
7219
7220	if (un->un_f_is_fibre == TRUE) {
7221		un->un_f_use_adaptive_throttle = TRUE;
7222	} else {
7223		un->un_f_use_adaptive_throttle = FALSE;
7224	}
7225
7226	/* Removable media support. */
7227	cv_init(&un->un_state_cv, NULL, CV_DRIVER, NULL);
7228	un->un_mediastate		= DKIO_NONE;
7229	un->un_specified_mediastate	= DKIO_NONE;
7230
7231	/* CVs for suspend/resume (PM or DR) */
7232	cv_init(&un->un_suspend_cv,   NULL, CV_DRIVER, NULL);
7233	cv_init(&un->un_disk_busy_cv, NULL, CV_DRIVER, NULL);
7234
7235	/* Power management support. */
7236	un->un_power_level = SD_SPINDLE_UNINIT;
7237
7238	cv_init(&un->un_wcc_cv,   NULL, CV_DRIVER, NULL);
7239	un->un_f_wcc_inprog = 0;
7240
7241	/*
7242	 * The open/close semaphore is used to serialize threads executing
7243	 * in the driver's open & close entry point routines for a given
7244	 * instance.
7245	 */
7246	(void) sema_init(&un->un_semoclose, 1, NULL, SEMA_DRIVER, NULL);
7247
7248	/*
7249	 * The conf file entry and softstate variable is a forceful override,
7250	 * meaning a non-zero value must be entered to change the default.
7251	 */
7252	un->un_f_disksort_disabled = FALSE;
7253
7254	/*
7255	 * Retrieve the properties from the static driver table or the driver
7256	 * configuration file (.conf) for this unit and update the soft state
7257	 * for the device as needed for the indicated properties.
7258	 * Note: the property configuration needs to occur here as some of the
7259	 * following routines may have dependencies on soft state flags set
7260	 * as part of the driver property configuration.
7261	 */
7262	sd_read_unit_properties(un);
7263	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7264	    "sd_unit_attach: un:0x%p property configuration complete.\n", un);
7265
7266	/*
7267	 * Only if a device has "hotpluggable" property, it is
7268	 * treated as hotpluggable device. Otherwise, it is
7269	 * regarded as non-hotpluggable one.
7270	 */
7271	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "hotpluggable",
7272	    -1) != -1) {
7273		un->un_f_is_hotpluggable = TRUE;
7274	}
7275
7276	/*
7277	 * set unit's attributes(flags) according to "hotpluggable" and
7278	 * RMB bit in INQUIRY data.
7279	 */
7280	sd_set_unit_attributes(un, devi);
7281
7282	/*
7283	 * By default, we mark the capacity, lbasize, and geometry
7284	 * as invalid. Only if we successfully read a valid capacity
7285	 * will we update the un_blockcount and un_tgt_blocksize with the
7286	 * valid values (the geometry will be validated later).
7287	 */
7288	un->un_f_blockcount_is_valid	= FALSE;
7289	un->un_f_tgt_blocksize_is_valid	= FALSE;
7290
7291	/*
7292	 * Use DEV_BSIZE and DEV_BSHIFT as defaults, until we can determine
7293	 * otherwise.
7294	 */
7295	un->un_tgt_blocksize  = un->un_sys_blocksize  = DEV_BSIZE;
7296	un->un_blockcount = 0;
7297
7298	/*
7299	 * Set up the per-instance info needed to determine the correct
7300	 * CDBs and other info for issuing commands to the target.
7301	 */
7302	sd_init_cdb_limits(un);
7303
7304	/*
7305	 * Set up the IO chains to use, based upon the target type.
7306	 */
7307	if (un->un_f_non_devbsize_supported) {
7308		un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
7309	} else {
7310		un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
7311	}
7312	un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD;
7313	un->un_direct_chain_type = SD_CHAIN_INFO_DIRECT_CMD;
7314	un->un_priority_chain_type = SD_CHAIN_INFO_PRIORITY_CMD;
7315
7316	un->un_xbuf_attr = ddi_xbuf_attr_create(sizeof (struct sd_xbuf),
7317	    sd_xbuf_strategy, un, sd_xbuf_active_limit,  sd_xbuf_reserve_limit,
7318	    ddi_driver_major(devi), DDI_XBUF_QTHREAD_DRIVER);
7319	ddi_xbuf_attr_register_devinfo(un->un_xbuf_attr, devi);
7320
7321
7322	if (ISCD(un)) {
7323		un->un_additional_codes = sd_additional_codes;
7324	} else {
7325		un->un_additional_codes = NULL;
7326	}
7327
7328	/*
7329	 * Create the kstats here so they can be available for attach-time
7330	 * routines that send commands to the unit (either polled or via
7331	 * sd_send_scsi_cmd).
7332	 *
7333	 * Note: This is a critical sequence that needs to be maintained:
7334	 *	1) Instantiate the kstats here, before any routines using the
7335	 *	   iopath (i.e. sd_send_scsi_cmd).
7336	 *	2) Instantiate and initialize the partition stats
7337	 *	   (sd_set_pstats).
7338	 *	3) Initialize the error stats (sd_set_errstats), following
7339	 *	   sd_validate_geometry(),sd_register_devid(),
7340	 *	   and sd_cache_control().
7341	 */
7342
7343	un->un_stats = kstat_create(sd_label, instance,
7344	    NULL, "disk", KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT);
7345	if (un->un_stats != NULL) {
7346		un->un_stats->ks_lock = SD_MUTEX(un);
7347		kstat_install(un->un_stats);
7348	}
7349	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7350	    "sd_unit_attach: un:0x%p un_stats created\n", un);
7351
7352	sd_create_errstats(un, instance);
7353	if (un->un_errstats == NULL) {
7354		goto create_errstats_failed;
7355	}
7356	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7357	    "sd_unit_attach: un:0x%p errstats created\n", un);
7358
7359	/*
7360	 * The following if/else code was relocated here from below as part
7361	 * of the fix for bug (4430280). However with the default setup added
7362	 * on entry to this routine, it's no longer absolutely necessary for
7363	 * this to be before the call to sd_spin_up_unit.
7364	 */
7365	if (SD_IS_PARALLEL_SCSI(un) || SD_IS_SERIAL(un)) {
7366		int tq_trigger_flag = (((devp->sd_inq->inq_ansi == 4) ||
7367		    (devp->sd_inq->inq_ansi == 5)) &&
7368		    devp->sd_inq->inq_bque) || devp->sd_inq->inq_cmdque;
7369
7370		/*
7371		 * If tagged queueing is supported by the target
7372		 * and by the host adapter then we will enable it
7373		 */
7374		un->un_tagflags = 0;
7375		if ((devp->sd_inq->inq_rdf == RDF_SCSI2) && tq_trigger_flag &&
7376		    (un->un_f_arq_enabled == TRUE)) {
7377			if (scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing",
7378			    1, 1) == 1) {
7379				un->un_tagflags = FLAG_STAG;
7380				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7381				    "sd_unit_attach: un:0x%p tag queueing "
7382				    "enabled\n", un);
7383			} else if (scsi_ifgetcap(SD_ADDRESS(un),
7384			    "untagged-qing", 0) == 1) {
7385				un->un_f_opt_queueing = TRUE;
7386				un->un_saved_throttle = un->un_throttle =
7387				    min(un->un_throttle, 3);
7388			} else {
7389				un->un_f_opt_queueing = FALSE;
7390				un->un_saved_throttle = un->un_throttle = 1;
7391			}
7392		} else if ((scsi_ifgetcap(SD_ADDRESS(un), "untagged-qing", 0)
7393		    == 1) && (un->un_f_arq_enabled == TRUE)) {
7394			/* The Host Adapter supports internal queueing. */
7395			un->un_f_opt_queueing = TRUE;
7396			un->un_saved_throttle = un->un_throttle =
7397			    min(un->un_throttle, 3);
7398		} else {
7399			un->un_f_opt_queueing = FALSE;
7400			un->un_saved_throttle = un->un_throttle = 1;
7401			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7402			    "sd_unit_attach: un:0x%p no tag queueing\n", un);
7403		}
7404
7405		/*
7406		 * Enable large transfers for SATA/SAS drives
7407		 */
7408		if (SD_IS_SERIAL(un)) {
7409			un->un_max_xfer_size =
7410			    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
7411			    sd_max_xfer_size, SD_MAX_XFER_SIZE);
7412			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7413			    "sd_unit_attach: un:0x%p max transfer "
7414			    "size=0x%x\n", un, un->un_max_xfer_size);
7415
7416		}
7417
7418		/* Setup or tear down default wide operations for disks */
7419
7420		/*
7421		 * Note: Legacy: it may be possible for both "sd_max_xfer_size"
7422		 * and "ssd_max_xfer_size" to exist simultaneously on the same
7423		 * system and be set to different values. In the future this
7424		 * code may need to be updated when the ssd module is
7425		 * obsoleted and removed from the system. (4299588)
7426		 */
7427		if (SD_IS_PARALLEL_SCSI(un) &&
7428		    (devp->sd_inq->inq_rdf == RDF_SCSI2) &&
7429		    (devp->sd_inq->inq_wbus16 || devp->sd_inq->inq_wbus32)) {
7430			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
7431			    1, 1) == 1) {
7432				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7433				    "sd_unit_attach: un:0x%p Wide Transfer "
7434				    "enabled\n", un);
7435			}
7436
7437			/*
7438			 * If tagged queuing has also been enabled, then
7439			 * enable large xfers
7440			 */
7441			if (un->un_saved_throttle == sd_max_throttle) {
7442				un->un_max_xfer_size =
7443				    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
7444				    sd_max_xfer_size, SD_MAX_XFER_SIZE);
7445				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7446				    "sd_unit_attach: un:0x%p max transfer "
7447				    "size=0x%x\n", un, un->un_max_xfer_size);
7448			}
7449		} else {
7450			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
7451			    0, 1) == 1) {
7452				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7453				    "sd_unit_attach: un:0x%p "
7454				    "Wide Transfer disabled\n", un);
7455			}
7456		}
7457	} else {
7458		un->un_tagflags = FLAG_STAG;
7459		un->un_max_xfer_size = ddi_getprop(DDI_DEV_T_ANY,
7460		    devi, 0, sd_max_xfer_size, SD_MAX_XFER_SIZE);
7461	}
7462
7463	/*
7464	 * If this target supports LUN reset, try to enable it.
7465	 */
7466	if (un->un_f_lun_reset_enabled) {
7467		if (scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 1, 1) == 1) {
7468			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
7469			    "un:0x%p lun_reset capability set\n", un);
7470		} else {
7471			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
7472			    "un:0x%p lun-reset capability not set\n", un);
7473		}
7474	}
7475
7476	/*
7477	 * Adjust the maximum transfer size. This is to fix
7478	 * the problem of partial DMA support on SPARC. Some
7479	 * HBA driver, like aac, has very small dma_attr_maxxfer
7480	 * size, which requires partial DMA support on SPARC.
7481	 * In the future the SPARC pci nexus driver may solve
7482	 * the problem instead of this fix.
7483	 */
7484	max_xfer_size = scsi_ifgetcap(SD_ADDRESS(un), "dma-max", 1);
7485	if ((max_xfer_size > 0) && (max_xfer_size < un->un_max_xfer_size)) {
7486		/* We need DMA partial even on sparc to ensure sddump() works */
7487		un->un_max_xfer_size = max_xfer_size;
7488		if (un->un_partial_dma_supported == 0)
7489			un->un_partial_dma_supported = 1;
7490	}
7491	if (ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
7492	    DDI_PROP_DONTPASS, "buf_break", 0) == 1) {
7493		if (ddi_xbuf_attr_setup_brk(un->un_xbuf_attr,
7494		    un->un_max_xfer_size) == 1) {
7495			un->un_buf_breakup_supported = 1;
7496			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
7497			    "un:0x%p Buf breakup enabled\n", un);
7498		}
7499	}
7500
7501	/*
7502	 * Set PKT_DMA_PARTIAL flag.
7503	 */
7504	if (un->un_partial_dma_supported == 1) {
7505		un->un_pkt_flags = PKT_DMA_PARTIAL;
7506	} else {
7507		un->un_pkt_flags = 0;
7508	}
7509
7510	/* Initialize sd_ssc_t for internal uscsi commands */
7511	ssc = sd_ssc_init(un);
7512	scsi_fm_init(devp);
7513
7514	/*
7515	 * Allocate memory for SCSI FMA stuffs.
7516	 */
7517	un->un_fm_private =
7518	    kmem_zalloc(sizeof (struct sd_fm_internal), KM_SLEEP);
7519	sfip = (struct sd_fm_internal *)un->un_fm_private;
7520	sfip->fm_ssc.ssc_uscsi_cmd = &sfip->fm_ucmd;
7521	sfip->fm_ssc.ssc_uscsi_info = &sfip->fm_uinfo;
7522	sfip->fm_ssc.ssc_un = un;
7523
7524	if (ISCD(un) ||
7525	    un->un_f_has_removable_media ||
7526	    devp->sd_fm_capable == DDI_FM_NOT_CAPABLE) {
7527		/*
7528		 * We don't touch CDROM or the DDI_FM_NOT_CAPABLE device.
7529		 * Their log are unchanged.
7530		 */
7531		sfip->fm_log_level = SD_FM_LOG_NSUP;
7532	} else {
7533		/*
7534		 * If enter here, it should be non-CDROM and FM-capable
7535		 * device, and it will not keep the old scsi_log as before
7536		 * in /var/adm/messages. However, the property
7537		 * "fm-scsi-log" will control whether the FM telemetry will
7538		 * be logged in /var/adm/messages.
7539		 */
7540		int fm_scsi_log;
7541		fm_scsi_log = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
7542		    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, "fm-scsi-log", 0);
7543
7544		if (fm_scsi_log)
7545			sfip->fm_log_level = SD_FM_LOG_EREPORT;
7546		else
7547			sfip->fm_log_level = SD_FM_LOG_SILENT;
7548	}
7549
7550	/*
7551	 * At this point in the attach, we have enough info in the
7552	 * soft state to be able to issue commands to the target.
7553	 *
7554	 * All command paths used below MUST issue their commands as
7555	 * SD_PATH_DIRECT. This is important as intermediate layers
7556	 * are not all initialized yet (such as PM).
7557	 */
7558
7559	/*
7560	 * Send a TEST UNIT READY command to the device. This should clear
7561	 * any outstanding UNIT ATTENTION that may be present.
7562	 *
7563	 * Note: Don't check for success, just track if there is a reservation,
7564	 * this is a throw away command to clear any unit attentions.
7565	 *
7566	 * Note: This MUST be the first command issued to the target during
7567	 * attach to ensure power on UNIT ATTENTIONS are cleared.
7568	 * Pass in flag SD_DONT_RETRY_TUR to prevent the long delays associated
7569	 * with attempts at spinning up a device with no media.
7570	 */
7571	status = sd_send_scsi_TEST_UNIT_READY(ssc, SD_DONT_RETRY_TUR);
7572	if (status != 0) {
7573		if (status == EACCES)
7574			reservation_flag = SD_TARGET_IS_RESERVED;
7575		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
7576	}
7577
7578	/*
7579	 * If the device is NOT a removable media device, attempt to spin
7580	 * it up (using the START_STOP_UNIT command) and read its capacity
7581	 * (using the READ CAPACITY command).  Note, however, that either
7582	 * of these could fail and in some cases we would continue with
7583	 * the attach despite the failure (see below).
7584	 */
7585	if (un->un_f_descr_format_supported) {
7586
7587		switch (sd_spin_up_unit(ssc)) {
7588		case 0:
7589			/*
7590			 * Spin-up was successful; now try to read the
7591			 * capacity.  If successful then save the results
7592			 * and mark the capacity & lbasize as valid.
7593			 */
7594			SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7595			    "sd_unit_attach: un:0x%p spin-up successful\n", un);
7596
7597			status = sd_send_scsi_READ_CAPACITY(ssc, &capacity,
7598			    &lbasize, SD_PATH_DIRECT);
7599
7600			switch (status) {
7601			case 0: {
7602				if (capacity > DK_MAX_BLOCKS) {
7603#ifdef _LP64
7604					if ((capacity + 1) >
7605					    SD_GROUP1_MAX_ADDRESS) {
7606						/*
7607						 * Enable descriptor format
7608						 * sense data so that we can
7609						 * get 64 bit sense data
7610						 * fields.
7611						 */
7612						sd_enable_descr_sense(ssc);
7613					}
7614#else
7615					/* 32-bit kernels can't handle this */
7616					scsi_log(SD_DEVINFO(un),
7617					    sd_label, CE_WARN,
7618					    "disk has %llu blocks, which "
7619					    "is too large for a 32-bit "
7620					    "kernel", capacity);
7621
7622#if defined(__i386) || defined(__amd64)
7623					/*
7624					 * 1TB disk was treated as (1T - 512)B
7625					 * in the past, so that it might have
7626					 * valid VTOC and solaris partitions,
7627					 * we have to allow it to continue to
7628					 * work.
7629					 */
7630					if (capacity -1 > DK_MAX_BLOCKS)
7631#endif
7632					goto spinup_failed;
7633#endif
7634				}
7635
7636				/*
7637				 * Here it's not necessary to check the case:
7638				 * the capacity of the device is bigger than
7639				 * what the max hba cdb can support. Because
7640				 * sd_send_scsi_READ_CAPACITY will retrieve
7641				 * the capacity by sending USCSI command, which
7642				 * is constrained by the max hba cdb. Actually,
7643				 * sd_send_scsi_READ_CAPACITY will return
7644				 * EINVAL when using bigger cdb than required
7645				 * cdb length. Will handle this case in
7646				 * "case EINVAL".
7647				 */
7648
7649				/*
7650				 * The following relies on
7651				 * sd_send_scsi_READ_CAPACITY never
7652				 * returning 0 for capacity and/or lbasize.
7653				 */
7654				sd_update_block_info(un, lbasize, capacity);
7655
7656				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7657				    "sd_unit_attach: un:0x%p capacity = %ld "
7658				    "blocks; lbasize= %ld.\n", un,
7659				    un->un_blockcount, un->un_tgt_blocksize);
7660
7661				break;
7662			}
7663			case EINVAL:
7664				/*
7665				 * In the case where the max-cdb-length property
7666				 * is smaller than the required CDB length for
7667				 * a SCSI device, a target driver can fail to
7668				 * attach to that device.
7669				 */
7670				scsi_log(SD_DEVINFO(un),
7671				    sd_label, CE_WARN,
7672				    "disk capacity is too large "
7673				    "for current cdb length");
7674				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
7675
7676				goto spinup_failed;
7677			case EACCES:
7678				/*
7679				 * Should never get here if the spin-up
7680				 * succeeded, but code it in anyway.
7681				 * From here, just continue with the attach...
7682				 */
7683				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7684				    "sd_unit_attach: un:0x%p "
7685				    "sd_send_scsi_READ_CAPACITY "
7686				    "returned reservation conflict\n", un);
7687				reservation_flag = SD_TARGET_IS_RESERVED;
7688				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
7689				break;
7690			default:
7691				/*
7692				 * Likewise, should never get here if the
7693				 * spin-up succeeded. Just continue with
7694				 * the attach...
7695				 */
7696				if (status == EIO)
7697					sd_ssc_assessment(ssc,
7698					    SD_FMT_STATUS_CHECK);
7699				else
7700					sd_ssc_assessment(ssc,
7701					    SD_FMT_IGNORE);
7702				break;
7703			}
7704			break;
7705		case EACCES:
7706			/*
7707			 * Device is reserved by another host.  In this case
7708			 * we could not spin it up or read the capacity, but
7709			 * we continue with the attach anyway.
7710			 */
7711			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7712			    "sd_unit_attach: un:0x%p spin-up reservation "
7713			    "conflict.\n", un);
7714			reservation_flag = SD_TARGET_IS_RESERVED;
7715			break;
7716		default:
7717			/* Fail the attach if the spin-up failed. */
7718			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7719			    "sd_unit_attach: un:0x%p spin-up failed.", un);
7720			goto spinup_failed;
7721		}
7722
7723	}
7724
7725	/*
7726	 * Check to see if this is a MMC drive
7727	 */
7728	if (ISCD(un)) {
7729		sd_set_mmc_caps(ssc);
7730	}
7731
7732
7733	/*
7734	 * Add a zero-length attribute to tell the world we support
7735	 * kernel ioctls (for layered drivers)
7736	 */
7737	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
7738	    DDI_KERNEL_IOCTL, NULL, 0);
7739
7740	/*
7741	 * Add a boolean property to tell the world we support
7742	 * the B_FAILFAST flag (for layered drivers)
7743	 */
7744	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
7745	    "ddi-failfast-supported", NULL, 0);
7746
7747	/*
7748	 * Initialize power management
7749	 */
7750	mutex_init(&un->un_pm_mutex, NULL, MUTEX_DRIVER, NULL);
7751	cv_init(&un->un_pm_busy_cv, NULL, CV_DRIVER, NULL);
7752	sd_setup_pm(ssc, devi);
7753	if (un->un_f_pm_is_enabled == FALSE) {
7754		/*
7755		 * For performance, point to a jump table that does
7756		 * not include pm.
7757		 * The direct and priority chains don't change with PM.
7758		 *
7759		 * Note: this is currently done based on individual device
7760		 * capabilities. When an interface for determining system
7761		 * power enabled state becomes available, or when additional
7762		 * layers are added to the command chain, these values will
7763		 * have to be re-evaluated for correctness.
7764		 */
7765		if (un->un_f_non_devbsize_supported) {
7766			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA_NO_PM;
7767		} else {
7768			un->un_buf_chain_type = SD_CHAIN_INFO_DISK_NO_PM;
7769		}
7770		un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
7771	}
7772
7773	/*
7774	 * This property is set to 0 by HA software to avoid retries
7775	 * on a reserved disk. (The preferred property name is
7776	 * "retry-on-reservation-conflict") (1189689)
7777	 *
7778	 * Note: The use of a global here can have unintended consequences. A
7779	 * per instance variable is preferable to match the capabilities of
7780	 * different underlying hba's (4402600)
7781	 */
7782	sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY, devi,
7783	    DDI_PROP_DONTPASS, "retry-on-reservation-conflict",
7784	    sd_retry_on_reservation_conflict);
7785	if (sd_retry_on_reservation_conflict != 0) {
7786		sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY,
7787		    devi, DDI_PROP_DONTPASS, sd_resv_conflict_name,
7788		    sd_retry_on_reservation_conflict);
7789	}
7790
7791	/* Set up options for QFULL handling. */
7792	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
7793	    "qfull-retries", -1)) != -1) {
7794		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retries",
7795		    rval, 1);
7796	}
7797	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
7798	    "qfull-retry-interval", -1)) != -1) {
7799		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retry-interval",
7800		    rval, 1);
7801	}
7802
7803	/*
7804	 * This just prints a message that announces the existence of the
7805	 * device. The message is always printed in the system logfile, but
7806	 * only appears on the console if the system is booted with the
7807	 * -v (verbose) argument.
7808	 */
7809	ddi_report_dev(devi);
7810
7811	un->un_mediastate = DKIO_NONE;
7812
7813	cmlb_alloc_handle(&un->un_cmlbhandle);
7814
7815#if defined(__i386) || defined(__amd64)
7816	/*
7817	 * On x86, compensate for off-by-1 legacy error
7818	 */
7819	if (!un->un_f_has_removable_media && !un->un_f_is_hotpluggable &&
7820	    (lbasize == un->un_sys_blocksize))
7821		offbyone = CMLB_OFF_BY_ONE;
7822#endif
7823
7824	if (cmlb_attach(devi, &sd_tgops, (int)devp->sd_inq->inq_dtype,
7825	    VOID2BOOLEAN(un->un_f_has_removable_media != 0),
7826	    VOID2BOOLEAN(un->un_f_is_hotpluggable != 0),
7827	    un->un_node_type, offbyone, un->un_cmlbhandle,
7828	    (void *)SD_PATH_DIRECT) != 0) {
7829		goto cmlb_attach_failed;
7830	}
7831
7832
7833	/*
7834	 * Read and validate the device's geometry (ie, disk label)
7835	 * A new unformatted drive will not have a valid geometry, but
7836	 * the driver needs to successfully attach to this device so
7837	 * the drive can be formatted via ioctls.
7838	 */
7839	geom_label_valid = (cmlb_validate(un->un_cmlbhandle, 0,
7840	    (void *)SD_PATH_DIRECT) == 0) ? 1: 0;
7841
7842	mutex_enter(SD_MUTEX(un));
7843
7844	/*
7845	 * Read and initialize the devid for the unit.
7846	 */
7847	if (un->un_f_devid_supported) {
7848		sd_register_devid(ssc, devi, reservation_flag);
7849	}
7850	mutex_exit(SD_MUTEX(un));
7851
7852#if (defined(__fibre))
7853	/*
7854	 * Register callbacks for fibre only.  You can't do this solely
7855	 * on the basis of the devid_type because this is hba specific.
7856	 * We need to query our hba capabilities to find out whether to
7857	 * register or not.
7858	 */
7859	if (un->un_f_is_fibre) {
7860		if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
7861			sd_init_event_callbacks(un);
7862			SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7863			    "sd_unit_attach: un:0x%p event callbacks inserted",
7864			    un);
7865		}
7866	}
7867#endif
7868
7869	if (un->un_f_opt_disable_cache == TRUE) {
7870		/*
7871		 * Disable both read cache and write cache.  This is
7872		 * the historic behavior of the keywords in the config file.
7873		 */
7874		if (sd_cache_control(ssc, SD_CACHE_DISABLE, SD_CACHE_DISABLE) !=
7875		    0) {
7876			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
7877			    "sd_unit_attach: un:0x%p Could not disable "
7878			    "caching", un);
7879			goto devid_failed;
7880		}
7881	}
7882
7883	/*
7884	 * Check the value of the WCE bit now and
7885	 * set un_f_write_cache_enabled accordingly.
7886	 */
7887	(void) sd_get_write_cache_enabled(ssc, &wc_enabled);
7888	mutex_enter(SD_MUTEX(un));
7889	un->un_f_write_cache_enabled = (wc_enabled != 0);
7890	mutex_exit(SD_MUTEX(un));
7891
7892	/*
7893	 * Check the value of the NV_SUP bit and set
7894	 * un_f_suppress_cache_flush accordingly.
7895	 */
7896	sd_get_nv_sup(ssc);
7897
7898	/*
7899	 * Find out what type of reservation this disk supports.
7900	 */
7901	status = sd_send_scsi_PERSISTENT_RESERVE_IN(ssc, SD_READ_KEYS, 0, NULL);
7902
7903	switch (status) {
7904	case 0:
7905		/*
7906		 * SCSI-3 reservations are supported.
7907		 */
7908		un->un_reservation_type = SD_SCSI3_RESERVATION;
7909		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7910		    "sd_unit_attach: un:0x%p SCSI-3 reservations\n", un);
7911		break;
7912	case ENOTSUP:
7913		/*
7914		 * The PERSISTENT RESERVE IN command would not be recognized by
7915		 * a SCSI-2 device, so assume the reservation type is SCSI-2.
7916		 */
7917		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7918		    "sd_unit_attach: un:0x%p SCSI-2 reservations\n", un);
7919		un->un_reservation_type = SD_SCSI2_RESERVATION;
7920
7921		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
7922		break;
7923	default:
7924		/*
7925		 * default to SCSI-3 reservations
7926		 */
7927		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7928		    "sd_unit_attach: un:0x%p default SCSI3 reservations\n", un);
7929		un->un_reservation_type = SD_SCSI3_RESERVATION;
7930
7931		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
7932		break;
7933	}
7934
7935	/*
7936	 * Set the pstat and error stat values here, so data obtained during the
7937	 * previous attach-time routines is available.
7938	 *
7939	 * Note: This is a critical sequence that needs to be maintained:
7940	 *	1) Instantiate the kstats before any routines using the iopath
7941	 *	   (i.e. sd_send_scsi_cmd).
7942	 *	2) Initialize the error stats (sd_set_errstats) and partition
7943	 *	   stats (sd_set_pstats)here, following
7944	 *	   cmlb_validate_geometry(), sd_register_devid(), and
7945	 *	   sd_cache_control().
7946	 */
7947
7948	if (un->un_f_pkstats_enabled && geom_label_valid) {
7949		sd_set_pstats(un);
7950		SD_TRACE(SD_LOG_IO_PARTITION, un,
7951		    "sd_unit_attach: un:0x%p pstats created and set\n", un);
7952	}
7953
7954	sd_set_errstats(un);
7955	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7956	    "sd_unit_attach: un:0x%p errstats set\n", un);
7957
7958
7959	/*
7960	 * After successfully attaching an instance, we record the information
7961	 * of how many luns have been attached on the relative target and
7962	 * controller for parallel SCSI. This information is used when sd tries
7963	 * to set the tagged queuing capability in HBA.
7964	 */
7965	if (SD_IS_PARALLEL_SCSI(un) && (tgt >= 0) && (tgt < NTARGETS_WIDE)) {
7966		sd_scsi_update_lun_on_target(pdip, tgt, SD_SCSI_LUN_ATTACH);
7967	}
7968
7969	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7970	    "sd_unit_attach: un:0x%p exit success\n", un);
7971
7972	/* Uninitialize sd_ssc_t pointer */
7973	sd_ssc_fini(ssc);
7974
7975	return (DDI_SUCCESS);
7976
7977	/*
7978	 * An error occurred during the attach; clean up & return failure.
7979	 */
7980
7981devid_failed:
7982
7983setup_pm_failed:
7984	ddi_remove_minor_node(devi, NULL);
7985
7986cmlb_attach_failed:
7987	/*
7988	 * Cleanup from the scsi_ifsetcap() calls (437868)
7989	 */
7990	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
7991	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
7992
7993	/*
7994	 * Refer to the comments of setting tagged-qing in the beginning of
7995	 * sd_unit_attach. We can only disable tagged queuing when there is
7996	 * no lun attached on the target.
7997	 */
7998	if (sd_scsi_get_target_lun_count(pdip, tgt) < 1) {
7999		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
8000	}
8001
8002	if (un->un_f_is_fibre == FALSE) {
8003		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
8004	}
8005
8006spinup_failed:
8007
8008	/* Uninitialize sd_ssc_t pointer */
8009	sd_ssc_fini(ssc);
8010
8011	mutex_enter(SD_MUTEX(un));
8012
8013	/* Deallocate SCSI FMA memory spaces */
8014	kmem_free(un->un_fm_private, sizeof (struct sd_fm_internal));
8015
8016	/* Cancel callback for SD_PATH_DIRECT_PRIORITY cmd. restart */
8017	if (un->un_direct_priority_timeid != NULL) {
8018		timeout_id_t temp_id = un->un_direct_priority_timeid;
8019		un->un_direct_priority_timeid = NULL;
8020		mutex_exit(SD_MUTEX(un));
8021		(void) untimeout(temp_id);
8022		mutex_enter(SD_MUTEX(un));
8023	}
8024
8025	/* Cancel any pending start/stop timeouts */
8026	if (un->un_startstop_timeid != NULL) {
8027		timeout_id_t temp_id = un->un_startstop_timeid;
8028		un->un_startstop_timeid = NULL;
8029		mutex_exit(SD_MUTEX(un));
8030		(void) untimeout(temp_id);
8031		mutex_enter(SD_MUTEX(un));
8032	}
8033
8034	/* Cancel any pending reset-throttle timeouts */
8035	if (un->un_reset_throttle_timeid != NULL) {
8036		timeout_id_t temp_id = un->un_reset_throttle_timeid;
8037		un->un_reset_throttle_timeid = NULL;
8038		mutex_exit(SD_MUTEX(un));
8039		(void) untimeout(temp_id);
8040		mutex_enter(SD_MUTEX(un));
8041	}
8042
8043	/* Cancel any pending retry timeouts */
8044	if (un->un_retry_timeid != NULL) {
8045		timeout_id_t temp_id = un->un_retry_timeid;
8046		un->un_retry_timeid = NULL;
8047		mutex_exit(SD_MUTEX(un));
8048		(void) untimeout(temp_id);
8049		mutex_enter(SD_MUTEX(un));
8050	}
8051
8052	/* Cancel any pending delayed cv broadcast timeouts */
8053	if (un->un_dcvb_timeid != NULL) {
8054		timeout_id_t temp_id = un->un_dcvb_timeid;
8055		un->un_dcvb_timeid = NULL;
8056		mutex_exit(SD_MUTEX(un));
8057		(void) untimeout(temp_id);
8058		mutex_enter(SD_MUTEX(un));
8059	}
8060
8061	mutex_exit(SD_MUTEX(un));
8062
8063	/* There should not be any in-progress I/O so ASSERT this check */
8064	ASSERT(un->un_ncmds_in_transport == 0);
8065	ASSERT(un->un_ncmds_in_driver == 0);
8066
8067	/* Do not free the softstate if the callback routine is active */
8068	sd_sync_with_callback(un);
8069
8070	/*
8071	 * Partition stats apparently are not used with removables. These would
8072	 * not have been created during attach, so no need to clean them up...
8073	 */
8074	if (un->un_errstats != NULL) {
8075		kstat_delete(un->un_errstats);
8076		un->un_errstats = NULL;
8077	}
8078
8079create_errstats_failed:
8080
8081	if (un->un_stats != NULL) {
8082		kstat_delete(un->un_stats);
8083		un->un_stats = NULL;
8084	}
8085
8086	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
8087	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
8088
8089	ddi_prop_remove_all(devi);
8090	sema_destroy(&un->un_semoclose);
8091	cv_destroy(&un->un_state_cv);
8092
8093getrbuf_failed:
8094
8095	sd_free_rqs(un);
8096
8097alloc_rqs_failed:
8098
8099	devp->sd_private = NULL;
8100	bzero(un, sizeof (struct sd_lun));	/* Clear any stale data! */
8101
8102get_softstate_failed:
8103	/*
8104	 * Note: the man pages are unclear as to whether or not doing a
8105	 * ddi_soft_state_free(sd_state, instance) is the right way to
8106	 * clean up after the ddi_soft_state_zalloc() if the subsequent
8107	 * ddi_get_soft_state() fails.  The implication seems to be
8108	 * that the get_soft_state cannot fail if the zalloc succeeds.
8109	 */
8110#ifndef XPV_HVM_DRIVER
8111	ddi_soft_state_free(sd_state, instance);
8112#endif /* !XPV_HVM_DRIVER */
8113
8114probe_failed:
8115	scsi_unprobe(devp);
8116
8117	return (DDI_FAILURE);
8118}
8119
8120
8121/*
8122 *    Function: sd_unit_detach
8123 *
8124 * Description: Performs DDI_DETACH processing for sddetach().
8125 *
8126 * Return Code: DDI_SUCCESS
8127 *		DDI_FAILURE
8128 *
8129 *     Context: Kernel thread context
8130 */
8131
8132static int
8133sd_unit_detach(dev_info_t *devi)
8134{
8135	struct scsi_device	*devp;
8136	struct sd_lun		*un;
8137	int			i;
8138	int			tgt;
8139	dev_t			dev;
8140	dev_info_t		*pdip = ddi_get_parent(devi);
8141#ifndef XPV_HVM_DRIVER
8142	int			instance = ddi_get_instance(devi);
8143#endif /* !XPV_HVM_DRIVER */
8144
8145	mutex_enter(&sd_detach_mutex);
8146
8147	/*
8148	 * Fail the detach for any of the following:
8149	 *  - Unable to get the sd_lun struct for the instance
8150	 *  - A layered driver has an outstanding open on the instance
8151	 *  - Another thread is already detaching this instance
8152	 *  - Another thread is currently performing an open
8153	 */
8154	devp = ddi_get_driver_private(devi);
8155	if ((devp == NULL) ||
8156	    ((un = (struct sd_lun *)devp->sd_private) == NULL) ||
8157	    (un->un_ncmds_in_driver != 0) || (un->un_layer_count != 0) ||
8158	    (un->un_detach_count != 0) || (un->un_opens_in_progress != 0)) {
8159		mutex_exit(&sd_detach_mutex);
8160		return (DDI_FAILURE);
8161	}
8162
8163	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: entry 0x%p\n", un);
8164
8165	/*
8166	 * Mark this instance as currently in a detach, to inhibit any
8167	 * opens from a layered driver.
8168	 */
8169	un->un_detach_count++;
8170	mutex_exit(&sd_detach_mutex);
8171
8172	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
8173	    SCSI_ADDR_PROP_TARGET, -1);
8174
8175	dev = sd_make_device(SD_DEVINFO(un));
8176
8177#ifndef lint
8178	_NOTE(COMPETING_THREADS_NOW);
8179#endif
8180
8181	mutex_enter(SD_MUTEX(un));
8182
8183	/*
8184	 * Fail the detach if there are any outstanding layered
8185	 * opens on this device.
8186	 */
8187	for (i = 0; i < NDKMAP; i++) {
8188		if (un->un_ocmap.lyropen[i] != 0) {
8189			goto err_notclosed;
8190		}
8191	}
8192
8193	/*
8194	 * Verify there are NO outstanding commands issued to this device.
8195	 * ie, un_ncmds_in_transport == 0.
8196	 * It's possible to have outstanding commands through the physio
8197	 * code path, even though everything's closed.
8198	 */
8199	if ((un->un_ncmds_in_transport != 0) || (un->un_retry_timeid != NULL) ||
8200	    (un->un_direct_priority_timeid != NULL) ||
8201	    (un->un_state == SD_STATE_RWAIT)) {
8202		mutex_exit(SD_MUTEX(un));
8203		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8204		    "sd_dr_detach: Detach failure due to outstanding cmds\n");
8205		goto err_stillbusy;
8206	}
8207
8208	/*
8209	 * If we have the device reserved, release the reservation.
8210	 */
8211	if ((un->un_resvd_status & SD_RESERVE) &&
8212	    !(un->un_resvd_status & SD_LOST_RESERVE)) {
8213		mutex_exit(SD_MUTEX(un));
8214		/*
8215		 * Note: sd_reserve_release sends a command to the device
8216		 * via the sd_ioctlcmd() path, and can sleep.
8217		 */
8218		if (sd_reserve_release(dev, SD_RELEASE) != 0) {
8219			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8220			    "sd_dr_detach: Cannot release reservation \n");
8221		}
8222	} else {
8223		mutex_exit(SD_MUTEX(un));
8224	}
8225
8226	/*
8227	 * Untimeout any reserve recover, throttle reset, restart unit
8228	 * and delayed broadcast timeout threads. Protect the timeout pointer
8229	 * from getting nulled by their callback functions.
8230	 */
8231	mutex_enter(SD_MUTEX(un));
8232	if (un->un_resvd_timeid != NULL) {
8233		timeout_id_t temp_id = un->un_resvd_timeid;
8234		un->un_resvd_timeid = NULL;
8235		mutex_exit(SD_MUTEX(un));
8236		(void) untimeout(temp_id);
8237		mutex_enter(SD_MUTEX(un));
8238	}
8239
8240	if (un->un_reset_throttle_timeid != NULL) {
8241		timeout_id_t temp_id = un->un_reset_throttle_timeid;
8242		un->un_reset_throttle_timeid = NULL;
8243		mutex_exit(SD_MUTEX(un));
8244		(void) untimeout(temp_id);
8245		mutex_enter(SD_MUTEX(un));
8246	}
8247
8248	if (un->un_startstop_timeid != NULL) {
8249		timeout_id_t temp_id = un->un_startstop_timeid;
8250		un->un_startstop_timeid = NULL;
8251		mutex_exit(SD_MUTEX(un));
8252		(void) untimeout(temp_id);
8253		mutex_enter(SD_MUTEX(un));
8254	}
8255
8256	if (un->un_dcvb_timeid != NULL) {
8257		timeout_id_t temp_id = un->un_dcvb_timeid;
8258		un->un_dcvb_timeid = NULL;
8259		mutex_exit(SD_MUTEX(un));
8260		(void) untimeout(temp_id);
8261	} else {
8262		mutex_exit(SD_MUTEX(un));
8263	}
8264
8265	/* Remove any pending reservation reclaim requests for this device */
8266	sd_rmv_resv_reclaim_req(dev);
8267
8268	mutex_enter(SD_MUTEX(un));
8269
8270	/* Cancel any pending callbacks for SD_PATH_DIRECT_PRIORITY cmd. */
8271	if (un->un_direct_priority_timeid != NULL) {
8272		timeout_id_t temp_id = un->un_direct_priority_timeid;
8273		un->un_direct_priority_timeid = NULL;
8274		mutex_exit(SD_MUTEX(un));
8275		(void) untimeout(temp_id);
8276		mutex_enter(SD_MUTEX(un));
8277	}
8278
8279	/* Cancel any active multi-host disk watch thread requests */
8280	if (un->un_mhd_token != NULL) {
8281		mutex_exit(SD_MUTEX(un));
8282		 _NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_mhd_token));
8283		if (scsi_watch_request_terminate(un->un_mhd_token,
8284		    SCSI_WATCH_TERMINATE_NOWAIT)) {
8285			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8286			    "sd_dr_detach: Cannot cancel mhd watch request\n");
8287			/*
8288			 * Note: We are returning here after having removed
8289			 * some driver timeouts above. This is consistent with
8290			 * the legacy implementation but perhaps the watch
8291			 * terminate call should be made with the wait flag set.
8292			 */
8293			goto err_stillbusy;
8294		}
8295		mutex_enter(SD_MUTEX(un));
8296		un->un_mhd_token = NULL;
8297	}
8298
8299	if (un->un_swr_token != NULL) {
8300		mutex_exit(SD_MUTEX(un));
8301		_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_swr_token));
8302		if (scsi_watch_request_terminate(un->un_swr_token,
8303		    SCSI_WATCH_TERMINATE_NOWAIT)) {
8304			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8305			    "sd_dr_detach: Cannot cancel swr watch request\n");
8306			/*
8307			 * Note: We are returning here after having removed
8308			 * some driver timeouts above. This is consistent with
8309			 * the legacy implementation but perhaps the watch
8310			 * terminate call should be made with the wait flag set.
8311			 */
8312			goto err_stillbusy;
8313		}
8314		mutex_enter(SD_MUTEX(un));
8315		un->un_swr_token = NULL;
8316	}
8317
8318	mutex_exit(SD_MUTEX(un));
8319
8320	/*
8321	 * Clear any scsi_reset_notifies. We clear the reset notifies
8322	 * if we have not registered one.
8323	 * Note: The sd_mhd_reset_notify_cb() fn tries to acquire SD_MUTEX!
8324	 */
8325	(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
8326	    sd_mhd_reset_notify_cb, (caddr_t)un);
8327
8328	/*
8329	 * protect the timeout pointers from getting nulled by
8330	 * their callback functions during the cancellation process.
8331	 * In such a scenario untimeout can be invoked with a null value.
8332	 */
8333	_NOTE(NO_COMPETING_THREADS_NOW);
8334
8335	mutex_enter(&un->un_pm_mutex);
8336	if (un->un_pm_idle_timeid != NULL) {
8337		timeout_id_t temp_id = un->un_pm_idle_timeid;
8338		un->un_pm_idle_timeid = NULL;
8339		mutex_exit(&un->un_pm_mutex);
8340
8341		/*
8342		 * Timeout is active; cancel it.
8343		 * Note that it'll never be active on a device
8344		 * that does not support PM therefore we don't
8345		 * have to check before calling pm_idle_component.
8346		 */
8347		(void) untimeout(temp_id);
8348		(void) pm_idle_component(SD_DEVINFO(un), 0);
8349		mutex_enter(&un->un_pm_mutex);
8350	}
8351
8352	/*
8353	 * Check whether there is already a timeout scheduled for power
8354	 * management. If yes then don't lower the power here, that's.
8355	 * the timeout handler's job.
8356	 */
8357	if (un->un_pm_timeid != NULL) {
8358		timeout_id_t temp_id = un->un_pm_timeid;
8359		un->un_pm_timeid = NULL;
8360		mutex_exit(&un->un_pm_mutex);
8361		/*
8362		 * Timeout is active; cancel it.
8363		 * Note that it'll never be active on a device
8364		 * that does not support PM therefore we don't
8365		 * have to check before calling pm_idle_component.
8366		 */
8367		(void) untimeout(temp_id);
8368		(void) pm_idle_component(SD_DEVINFO(un), 0);
8369
8370	} else {
8371		mutex_exit(&un->un_pm_mutex);
8372		if ((un->un_f_pm_is_enabled == TRUE) &&
8373		    (pm_lower_power(SD_DEVINFO(un), 0, SD_SPINDLE_OFF) !=
8374		    DDI_SUCCESS)) {
8375			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8376		    "sd_dr_detach: Lower power request failed, ignoring.\n");
8377			/*
8378			 * Fix for bug: 4297749, item # 13
8379			 * The above test now includes a check to see if PM is
8380			 * supported by this device before call
8381			 * pm_lower_power().
8382			 * Note, the following is not dead code. The call to
8383			 * pm_lower_power above will generate a call back into
8384			 * our sdpower routine which might result in a timeout
8385			 * handler getting activated. Therefore the following
8386			 * code is valid and necessary.
8387			 */
8388			mutex_enter(&un->un_pm_mutex);
8389			if (un->un_pm_timeid != NULL) {
8390				timeout_id_t temp_id = un->un_pm_timeid;
8391				un->un_pm_timeid = NULL;
8392				mutex_exit(&un->un_pm_mutex);
8393				(void) untimeout(temp_id);
8394				(void) pm_idle_component(SD_DEVINFO(un), 0);
8395			} else {
8396				mutex_exit(&un->un_pm_mutex);
8397			}
8398		}
8399	}
8400
8401	/*
8402	 * Cleanup from the scsi_ifsetcap() calls (437868)
8403	 * Relocated here from above to be after the call to
8404	 * pm_lower_power, which was getting errors.
8405	 */
8406	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
8407	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
8408
8409	/*
8410	 * Currently, tagged queuing is supported per target based by HBA.
8411	 * Setting this per lun instance actually sets the capability of this
8412	 * target in HBA, which affects those luns already attached on the
8413	 * same target. So during detach, we can only disable this capability
8414	 * only when this is the only lun left on this target. By doing
8415	 * this, we assume a target has the same tagged queuing capability
8416	 * for every lun. The condition can be removed when HBA is changed to
8417	 * support per lun based tagged queuing capability.
8418	 */
8419	if (sd_scsi_get_target_lun_count(pdip, tgt) <= 1) {
8420		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
8421	}
8422
8423	if (un->un_f_is_fibre == FALSE) {
8424		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
8425	}
8426
8427	/*
8428	 * Remove any event callbacks, fibre only
8429	 */
8430	if (un->un_f_is_fibre == TRUE) {
8431		if ((un->un_insert_event != NULL) &&
8432		    (ddi_remove_event_handler(un->un_insert_cb_id) !=
8433		    DDI_SUCCESS)) {
8434			/*
8435			 * Note: We are returning here after having done
8436			 * substantial cleanup above. This is consistent
8437			 * with the legacy implementation but this may not
8438			 * be the right thing to do.
8439			 */
8440			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8441			    "sd_dr_detach: Cannot cancel insert event\n");
8442			goto err_remove_event;
8443		}
8444		un->un_insert_event = NULL;
8445
8446		if ((un->un_remove_event != NULL) &&
8447		    (ddi_remove_event_handler(un->un_remove_cb_id) !=
8448		    DDI_SUCCESS)) {
8449			/*
8450			 * Note: We are returning here after having done
8451			 * substantial cleanup above. This is consistent
8452			 * with the legacy implementation but this may not
8453			 * be the right thing to do.
8454			 */
8455			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8456			    "sd_dr_detach: Cannot cancel remove event\n");
8457			goto err_remove_event;
8458		}
8459		un->un_remove_event = NULL;
8460	}
8461
8462	/* Do not free the softstate if the callback routine is active */
8463	sd_sync_with_callback(un);
8464
8465	cmlb_detach(un->un_cmlbhandle, (void *)SD_PATH_DIRECT);
8466	cmlb_free_handle(&un->un_cmlbhandle);
8467
8468	/*
8469	 * Hold the detach mutex here, to make sure that no other threads ever
8470	 * can access a (partially) freed soft state structure.
8471	 */
8472	mutex_enter(&sd_detach_mutex);
8473
8474	/*
8475	 * Clean up the soft state struct.
8476	 * Cleanup is done in reverse order of allocs/inits.
8477	 * At this point there should be no competing threads anymore.
8478	 */
8479
8480	scsi_fm_fini(devp);
8481
8482	/*
8483	 * Deallocate memory for SCSI FMA.
8484	 */
8485	kmem_free(un->un_fm_private, sizeof (struct sd_fm_internal));
8486
8487	/* Unregister and free device id. */
8488	ddi_devid_unregister(devi);
8489	if (un->un_devid) {
8490		ddi_devid_free(un->un_devid);
8491		un->un_devid = NULL;
8492	}
8493
8494	/*
8495	 * Destroy wmap cache if it exists.
8496	 */
8497	if (un->un_wm_cache != NULL) {
8498		kmem_cache_destroy(un->un_wm_cache);
8499		un->un_wm_cache = NULL;
8500	}
8501
8502	/*
8503	 * kstat cleanup is done in detach for all device types (4363169).
8504	 * We do not want to fail detach if the device kstats are not deleted
8505	 * since there is a confusion about the devo_refcnt for the device.
8506	 * We just delete the kstats and let detach complete successfully.
8507	 */
8508	if (un->un_stats != NULL) {
8509		kstat_delete(un->un_stats);
8510		un->un_stats = NULL;
8511	}
8512	if (un->un_errstats != NULL) {
8513		kstat_delete(un->un_errstats);
8514		un->un_errstats = NULL;
8515	}
8516
8517	/* Remove partition stats */
8518	if (un->un_f_pkstats_enabled) {
8519		for (i = 0; i < NSDMAP; i++) {
8520			if (un->un_pstats[i] != NULL) {
8521				kstat_delete(un->un_pstats[i]);
8522				un->un_pstats[i] = NULL;
8523			}
8524		}
8525	}
8526
8527	/* Remove xbuf registration */
8528	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
8529	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
8530
8531	/* Remove driver properties */
8532	ddi_prop_remove_all(devi);
8533
8534	mutex_destroy(&un->un_pm_mutex);
8535	cv_destroy(&un->un_pm_busy_cv);
8536
8537	cv_destroy(&un->un_wcc_cv);
8538
8539	/* Open/close semaphore */
8540	sema_destroy(&un->un_semoclose);
8541
8542	/* Removable media condvar. */
8543	cv_destroy(&un->un_state_cv);
8544
8545	/* Suspend/resume condvar. */
8546	cv_destroy(&un->un_suspend_cv);
8547	cv_destroy(&un->un_disk_busy_cv);
8548
8549	sd_free_rqs(un);
8550
8551	/* Free up soft state */
8552	devp->sd_private = NULL;
8553
8554	bzero(un, sizeof (struct sd_lun));
8555#ifndef XPV_HVM_DRIVER
8556	ddi_soft_state_free(sd_state, instance);
8557#endif /* !XPV_HVM_DRIVER */
8558
8559	mutex_exit(&sd_detach_mutex);
8560
8561	/* This frees up the INQUIRY data associated with the device. */
8562	scsi_unprobe(devp);
8563
8564	/*
8565	 * After successfully detaching an instance, we update the information
8566	 * of how many luns have been attached in the relative target and
8567	 * controller for parallel SCSI. This information is used when sd tries
8568	 * to set the tagged queuing capability in HBA.
8569	 * Since un has been released, we can't use SD_IS_PARALLEL_SCSI(un) to
8570	 * check if the device is parallel SCSI. However, we don't need to
8571	 * check here because we've already checked during attach. No device
8572	 * that is not parallel SCSI is in the chain.
8573	 */
8574	if ((tgt >= 0) && (tgt < NTARGETS_WIDE)) {
8575		sd_scsi_update_lun_on_target(pdip, tgt, SD_SCSI_LUN_DETACH);
8576	}
8577
8578	return (DDI_SUCCESS);
8579
8580err_notclosed:
8581	mutex_exit(SD_MUTEX(un));
8582
8583err_stillbusy:
8584	_NOTE(NO_COMPETING_THREADS_NOW);
8585
8586err_remove_event:
8587	mutex_enter(&sd_detach_mutex);
8588	un->un_detach_count--;
8589	mutex_exit(&sd_detach_mutex);
8590
8591	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: exit failure\n");
8592	return (DDI_FAILURE);
8593}
8594
8595
8596/*
8597 *    Function: sd_create_errstats
8598 *
8599 * Description: This routine instantiates the device error stats.
8600 *
8601 *		Note: During attach the stats are instantiated first so they are
8602 *		available for attach-time routines that utilize the driver
8603 *		iopath to send commands to the device. The stats are initialized
8604 *		separately so data obtained during some attach-time routines is
8605 *		available. (4362483)
8606 *
8607 *   Arguments: un - driver soft state (unit) structure
8608 *		instance - driver instance
8609 *
8610 *     Context: Kernel thread context
8611 */
8612
8613static void
8614sd_create_errstats(struct sd_lun *un, int instance)
8615{
8616	struct	sd_errstats	*stp;
8617	char	kstatmodule_err[KSTAT_STRLEN];
8618	char	kstatname[KSTAT_STRLEN];
8619	int	ndata = (sizeof (struct sd_errstats) / sizeof (kstat_named_t));
8620
8621	ASSERT(un != NULL);
8622
8623	if (un->un_errstats != NULL) {
8624		return;
8625	}
8626
8627	(void) snprintf(kstatmodule_err, sizeof (kstatmodule_err),
8628	    "%serr", sd_label);
8629	(void) snprintf(kstatname, sizeof (kstatname),
8630	    "%s%d,err", sd_label, instance);
8631
8632	un->un_errstats = kstat_create(kstatmodule_err, instance, kstatname,
8633	    "device_error", KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT);
8634
8635	if (un->un_errstats == NULL) {
8636		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8637		    "sd_create_errstats: Failed kstat_create\n");
8638		return;
8639	}
8640
8641	stp = (struct sd_errstats *)un->un_errstats->ks_data;
8642	kstat_named_init(&stp->sd_softerrs,	"Soft Errors",
8643	    KSTAT_DATA_UINT32);
8644	kstat_named_init(&stp->sd_harderrs,	"Hard Errors",
8645	    KSTAT_DATA_UINT32);
8646	kstat_named_init(&stp->sd_transerrs,	"Transport Errors",
8647	    KSTAT_DATA_UINT32);
8648	kstat_named_init(&stp->sd_vid,		"Vendor",
8649	    KSTAT_DATA_CHAR);
8650	kstat_named_init(&stp->sd_pid,		"Product",
8651	    KSTAT_DATA_CHAR);
8652	kstat_named_init(&stp->sd_revision,	"Revision",
8653	    KSTAT_DATA_CHAR);
8654	kstat_named_init(&stp->sd_serial,	"Serial No",
8655	    KSTAT_DATA_CHAR);
8656	kstat_named_init(&stp->sd_capacity,	"Size",
8657	    KSTAT_DATA_ULONGLONG);
8658	kstat_named_init(&stp->sd_rq_media_err,	"Media Error",
8659	    KSTAT_DATA_UINT32);
8660	kstat_named_init(&stp->sd_rq_ntrdy_err,	"Device Not Ready",
8661	    KSTAT_DATA_UINT32);
8662	kstat_named_init(&stp->sd_rq_nodev_err,	"No Device",
8663	    KSTAT_DATA_UINT32);
8664	kstat_named_init(&stp->sd_rq_recov_err,	"Recoverable",
8665	    KSTAT_DATA_UINT32);
8666	kstat_named_init(&stp->sd_rq_illrq_err,	"Illegal Request",
8667	    KSTAT_DATA_UINT32);
8668	kstat_named_init(&stp->sd_rq_pfa_err,	"Predictive Failure Analysis",
8669	    KSTAT_DATA_UINT32);
8670
8671	un->un_errstats->ks_private = un;
8672	un->un_errstats->ks_update  = nulldev;
8673
8674	kstat_install(un->un_errstats);
8675}
8676
8677
8678/*
8679 *    Function: sd_set_errstats
8680 *
8681 * Description: This routine sets the value of the vendor id, product id,
8682 *		revision, serial number, and capacity device error stats.
8683 *
8684 *		Note: During attach the stats are instantiated first so they are
8685 *		available for attach-time routines that utilize the driver
8686 *		iopath to send commands to the device. The stats are initialized
8687 *		separately so data obtained during some attach-time routines is
8688 *		available. (4362483)
8689 *
8690 *   Arguments: un - driver soft state (unit) structure
8691 *
8692 *     Context: Kernel thread context
8693 */
8694
8695static void
8696sd_set_errstats(struct sd_lun *un)
8697{
8698	struct	sd_errstats	*stp;
8699
8700	ASSERT(un != NULL);
8701	ASSERT(un->un_errstats != NULL);
8702	stp = (struct sd_errstats *)un->un_errstats->ks_data;
8703	ASSERT(stp != NULL);
8704	(void) strncpy(stp->sd_vid.value.c, un->un_sd->sd_inq->inq_vid, 8);
8705	(void) strncpy(stp->sd_pid.value.c, un->un_sd->sd_inq->inq_pid, 16);
8706	(void) strncpy(stp->sd_revision.value.c,
8707	    un->un_sd->sd_inq->inq_revision, 4);
8708
8709	/*
8710	 * All the errstats are persistent across detach/attach,
8711	 * so reset all the errstats here in case of the hot
8712	 * replacement of disk drives, except for not changed
8713	 * Sun qualified drives.
8714	 */
8715	if ((bcmp(&SD_INQUIRY(un)->inq_pid[9], "SUN", 3) != 0) ||
8716	    (bcmp(&SD_INQUIRY(un)->inq_serial, stp->sd_serial.value.c,
8717	    sizeof (SD_INQUIRY(un)->inq_serial)) != 0)) {
8718		stp->sd_softerrs.value.ui32 = 0;
8719		stp->sd_harderrs.value.ui32 = 0;
8720		stp->sd_transerrs.value.ui32 = 0;
8721		stp->sd_rq_media_err.value.ui32 = 0;
8722		stp->sd_rq_ntrdy_err.value.ui32 = 0;
8723		stp->sd_rq_nodev_err.value.ui32 = 0;
8724		stp->sd_rq_recov_err.value.ui32 = 0;
8725		stp->sd_rq_illrq_err.value.ui32 = 0;
8726		stp->sd_rq_pfa_err.value.ui32 = 0;
8727	}
8728
8729	/*
8730	 * Set the "Serial No" kstat for Sun qualified drives (indicated by
8731	 * "SUN" in bytes 25-27 of the inquiry data (bytes 9-11 of the pid)
8732	 * (4376302))
8733	 */
8734	if (bcmp(&SD_INQUIRY(un)->inq_pid[9], "SUN", 3) == 0) {
8735		bcopy(&SD_INQUIRY(un)->inq_serial, stp->sd_serial.value.c,
8736		    sizeof (SD_INQUIRY(un)->inq_serial));
8737	}
8738
8739	if (un->un_f_blockcount_is_valid != TRUE) {
8740		/*
8741		 * Set capacity error stat to 0 for no media. This ensures
8742		 * a valid capacity is displayed in response to 'iostat -E'
8743		 * when no media is present in the device.
8744		 */
8745		stp->sd_capacity.value.ui64 = 0;
8746	} else {
8747		/*
8748		 * Multiply un_blockcount by un->un_sys_blocksize to get
8749		 * capacity.
8750		 *
8751		 * Note: for non-512 blocksize devices "un_blockcount" has been
8752		 * "scaled" in sd_send_scsi_READ_CAPACITY by multiplying by
8753		 * (un_tgt_blocksize / un->un_sys_blocksize).
8754		 */
8755		stp->sd_capacity.value.ui64 = (uint64_t)
8756		    ((uint64_t)un->un_blockcount * un->un_sys_blocksize);
8757	}
8758}
8759
8760
8761/*
8762 *    Function: sd_set_pstats
8763 *
8764 * Description: This routine instantiates and initializes the partition
8765 *              stats for each partition with more than zero blocks.
8766 *		(4363169)
8767 *
8768 *   Arguments: un - driver soft state (unit) structure
8769 *
8770 *     Context: Kernel thread context
8771 */
8772
8773static void
8774sd_set_pstats(struct sd_lun *un)
8775{
8776	char	kstatname[KSTAT_STRLEN];
8777	int	instance;
8778	int	i;
8779	diskaddr_t	nblks = 0;
8780	char	*partname = NULL;
8781
8782	ASSERT(un != NULL);
8783
8784	instance = ddi_get_instance(SD_DEVINFO(un));
8785
8786	/* Note:x86: is this a VTOC8/VTOC16 difference? */
8787	for (i = 0; i < NSDMAP; i++) {
8788
8789		if (cmlb_partinfo(un->un_cmlbhandle, i,
8790		    &nblks, NULL, &partname, NULL, (void *)SD_PATH_DIRECT) != 0)
8791			continue;
8792		mutex_enter(SD_MUTEX(un));
8793
8794		if ((un->un_pstats[i] == NULL) &&
8795		    (nblks != 0)) {
8796
8797			(void) snprintf(kstatname, sizeof (kstatname),
8798			    "%s%d,%s", sd_label, instance,
8799			    partname);
8800
8801			un->un_pstats[i] = kstat_create(sd_label,
8802			    instance, kstatname, "partition", KSTAT_TYPE_IO,
8803			    1, KSTAT_FLAG_PERSISTENT);
8804			if (un->un_pstats[i] != NULL) {
8805				un->un_pstats[i]->ks_lock = SD_MUTEX(un);
8806				kstat_install(un->un_pstats[i]);
8807			}
8808		}
8809		mutex_exit(SD_MUTEX(un));
8810	}
8811}
8812
8813
8814#if (defined(__fibre))
8815/*
8816 *    Function: sd_init_event_callbacks
8817 *
8818 * Description: This routine initializes the insertion and removal event
8819 *		callbacks. (fibre only)
8820 *
8821 *   Arguments: un - driver soft state (unit) structure
8822 *
8823 *     Context: Kernel thread context
8824 */
8825
8826static void
8827sd_init_event_callbacks(struct sd_lun *un)
8828{
8829	ASSERT(un != NULL);
8830
8831	if ((un->un_insert_event == NULL) &&
8832	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_INSERT_EVENT,
8833	    &un->un_insert_event) == DDI_SUCCESS)) {
8834		/*
8835		 * Add the callback for an insertion event
8836		 */
8837		(void) ddi_add_event_handler(SD_DEVINFO(un),
8838		    un->un_insert_event, sd_event_callback, (void *)un,
8839		    &(un->un_insert_cb_id));
8840	}
8841
8842	if ((un->un_remove_event == NULL) &&
8843	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_REMOVE_EVENT,
8844	    &un->un_remove_event) == DDI_SUCCESS)) {
8845		/*
8846		 * Add the callback for a removal event
8847		 */
8848		(void) ddi_add_event_handler(SD_DEVINFO(un),
8849		    un->un_remove_event, sd_event_callback, (void *)un,
8850		    &(un->un_remove_cb_id));
8851	}
8852}
8853
8854
8855/*
8856 *    Function: sd_event_callback
8857 *
8858 * Description: This routine handles insert/remove events (photon). The
8859 *		state is changed to OFFLINE which can be used to supress
8860 *		error msgs. (fibre only)
8861 *
8862 *   Arguments: un - driver soft state (unit) structure
8863 *
8864 *     Context: Callout thread context
8865 */
8866/* ARGSUSED */
8867static void
8868sd_event_callback(dev_info_t *dip, ddi_eventcookie_t event, void *arg,
8869    void *bus_impldata)
8870{
8871	struct sd_lun *un = (struct sd_lun *)arg;
8872
8873	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_insert_event));
8874	if (event == un->un_insert_event) {
8875		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: insert event");
8876		mutex_enter(SD_MUTEX(un));
8877		if (un->un_state == SD_STATE_OFFLINE) {
8878			if (un->un_last_state != SD_STATE_SUSPENDED) {
8879				un->un_state = un->un_last_state;
8880			} else {
8881				/*
8882				 * We have gone through SUSPEND/RESUME while
8883				 * we were offline. Restore the last state
8884				 */
8885				un->un_state = un->un_save_state;
8886			}
8887		}
8888		mutex_exit(SD_MUTEX(un));
8889
8890	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_remove_event));
8891	} else if (event == un->un_remove_event) {
8892		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: remove event");
8893		mutex_enter(SD_MUTEX(un));
8894		/*
8895		 * We need to handle an event callback that occurs during
8896		 * the suspend operation, since we don't prevent it.
8897		 */
8898		if (un->un_state != SD_STATE_OFFLINE) {
8899			if (un->un_state != SD_STATE_SUSPENDED) {
8900				New_state(un, SD_STATE_OFFLINE);
8901			} else {
8902				un->un_last_state = SD_STATE_OFFLINE;
8903			}
8904		}
8905		mutex_exit(SD_MUTEX(un));
8906	} else {
8907		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
8908		    "!Unknown event\n");
8909	}
8910
8911}
8912#endif
8913
8914/*
8915 *    Function: sd_cache_control()
8916 *
8917 * Description: This routine is the driver entry point for setting
8918 *		read and write caching by modifying the WCE (write cache
8919 *		enable) and RCD (read cache disable) bits of mode
8920 *		page 8 (MODEPAGE_CACHING).
8921 *
8922 *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
8923 *                      structure for this target.
8924 *		rcd_flag - flag for controlling the read cache
8925 *		wce_flag - flag for controlling the write cache
8926 *
8927 * Return Code: EIO
8928 *		code returned by sd_send_scsi_MODE_SENSE and
8929 *		sd_send_scsi_MODE_SELECT
8930 *
8931 *     Context: Kernel Thread
8932 */
8933
8934static int
8935sd_cache_control(sd_ssc_t *ssc, int rcd_flag, int wce_flag)
8936{
8937	struct mode_caching	*mode_caching_page;
8938	uchar_t			*header;
8939	size_t			buflen;
8940	int			hdrlen;
8941	int			bd_len;
8942	int			rval = 0;
8943	struct mode_header_grp2	*mhp;
8944	struct sd_lun		*un;
8945	int			status;
8946
8947	ASSERT(ssc != NULL);
8948	un = ssc->ssc_un;
8949	ASSERT(un != NULL);
8950
8951	/*
8952	 * Do a test unit ready, otherwise a mode sense may not work if this
8953	 * is the first command sent to the device after boot.
8954	 */
8955	status = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
8956	if (status != 0)
8957		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
8958
8959	if (un->un_f_cfg_is_atapi == TRUE) {
8960		hdrlen = MODE_HEADER_LENGTH_GRP2;
8961	} else {
8962		hdrlen = MODE_HEADER_LENGTH;
8963	}
8964
8965	/*
8966	 * Allocate memory for the retrieved mode page and its headers.  Set
8967	 * a pointer to the page itself.  Use mode_cache_scsi3 to insure
8968	 * we get all of the mode sense data otherwise, the mode select
8969	 * will fail.  mode_cache_scsi3 is a superset of mode_caching.
8970	 */
8971	buflen = hdrlen + MODE_BLK_DESC_LENGTH +
8972	    sizeof (struct mode_cache_scsi3);
8973
8974	header = kmem_zalloc(buflen, KM_SLEEP);
8975
8976	/* Get the information from the device. */
8977	if (un->un_f_cfg_is_atapi == TRUE) {
8978		rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, header, buflen,
8979		    MODEPAGE_CACHING, SD_PATH_DIRECT);
8980	} else {
8981		rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, header, buflen,
8982		    MODEPAGE_CACHING, SD_PATH_DIRECT);
8983	}
8984
8985	if (rval != 0) {
8986		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
8987		    "sd_cache_control: Mode Sense Failed\n");
8988		goto mode_sense_failed;
8989	}
8990
8991	/*
8992	 * Determine size of Block Descriptors in order to locate
8993	 * the mode page data. ATAPI devices return 0, SCSI devices
8994	 * should return MODE_BLK_DESC_LENGTH.
8995	 */
8996	if (un->un_f_cfg_is_atapi == TRUE) {
8997		mhp	= (struct mode_header_grp2 *)header;
8998		bd_len  = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
8999	} else {
9000		bd_len  = ((struct mode_header *)header)->bdesc_length;
9001	}
9002
9003	if (bd_len > MODE_BLK_DESC_LENGTH) {
9004		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, 0,
9005		    "sd_cache_control: Mode Sense returned invalid block "
9006		    "descriptor length\n");
9007		rval = EIO;
9008		goto mode_sense_failed;
9009	}
9010
9011	mode_caching_page = (struct mode_caching *)(header + hdrlen + bd_len);
9012	if (mode_caching_page->mode_page.code != MODEPAGE_CACHING) {
9013		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, SD_LOG_COMMON,
9014		    "sd_cache_control: Mode Sense caching page code mismatch "
9015		    "%d\n", mode_caching_page->mode_page.code);
9016		rval = EIO;
9017		goto mode_sense_failed;
9018	}
9019
9020	/* Check the relevant bits on successful mode sense. */
9021	if ((mode_caching_page->rcd && rcd_flag == SD_CACHE_ENABLE) ||
9022	    (!mode_caching_page->rcd && rcd_flag == SD_CACHE_DISABLE) ||
9023	    (mode_caching_page->wce && wce_flag == SD_CACHE_DISABLE) ||
9024	    (!mode_caching_page->wce && wce_flag == SD_CACHE_ENABLE)) {
9025
9026		size_t sbuflen;
9027		uchar_t save_pg;
9028
9029		/*
9030		 * Construct select buffer length based on the
9031		 * length of the sense data returned.
9032		 */
9033		sbuflen =  hdrlen + MODE_BLK_DESC_LENGTH +
9034		    sizeof (struct mode_page) +
9035		    (int)mode_caching_page->mode_page.length;
9036
9037		/*
9038		 * Set the caching bits as requested.
9039		 */
9040		if (rcd_flag == SD_CACHE_ENABLE)
9041			mode_caching_page->rcd = 0;
9042		else if (rcd_flag == SD_CACHE_DISABLE)
9043			mode_caching_page->rcd = 1;
9044
9045		if (wce_flag == SD_CACHE_ENABLE)
9046			mode_caching_page->wce = 1;
9047		else if (wce_flag == SD_CACHE_DISABLE)
9048			mode_caching_page->wce = 0;
9049
9050		/*
9051		 * Save the page if the mode sense says the
9052		 * drive supports it.
9053		 */
9054		save_pg = mode_caching_page->mode_page.ps ?
9055		    SD_SAVE_PAGE : SD_DONTSAVE_PAGE;
9056
9057		/* Clear reserved bits before mode select. */
9058		mode_caching_page->mode_page.ps = 0;
9059
9060		/*
9061		 * Clear out mode header for mode select.
9062		 * The rest of the retrieved page will be reused.
9063		 */
9064		bzero(header, hdrlen);
9065
9066		if (un->un_f_cfg_is_atapi == TRUE) {
9067			mhp = (struct mode_header_grp2 *)header;
9068			mhp->bdesc_length_hi = bd_len >> 8;
9069			mhp->bdesc_length_lo = (uchar_t)bd_len & 0xff;
9070		} else {
9071			((struct mode_header *)header)->bdesc_length = bd_len;
9072		}
9073
9074		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9075
9076		/* Issue mode select to change the cache settings */
9077		if (un->un_f_cfg_is_atapi == TRUE) {
9078			rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP1, header,
9079			    sbuflen, save_pg, SD_PATH_DIRECT);
9080		} else {
9081			rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0, header,
9082			    sbuflen, save_pg, SD_PATH_DIRECT);
9083		}
9084
9085	}
9086
9087
9088mode_sense_failed:
9089
9090	kmem_free(header, buflen);
9091
9092	if (rval != 0) {
9093		if (rval == EIO)
9094			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
9095		else
9096			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9097	}
9098	return (rval);
9099}
9100
9101
9102/*
9103 *    Function: sd_get_write_cache_enabled()
9104 *
9105 * Description: This routine is the driver entry point for determining if
9106 *		write caching is enabled.  It examines the WCE (write cache
9107 *		enable) bits of mode page 8 (MODEPAGE_CACHING).
9108 *
9109 *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
9110 *                      structure for this target.
9111 *		is_enabled - pointer to int where write cache enabled state
9112 *		is returned (non-zero -> write cache enabled)
9113 *
9114 *
9115 * Return Code: EIO
9116 *		code returned by sd_send_scsi_MODE_SENSE
9117 *
9118 *     Context: Kernel Thread
9119 *
9120 * NOTE: If ioctl is added to disable write cache, this sequence should
9121 * be followed so that no locking is required for accesses to
9122 * un->un_f_write_cache_enabled:
9123 * 	do mode select to clear wce
9124 * 	do synchronize cache to flush cache
9125 * 	set un->un_f_write_cache_enabled = FALSE
9126 *
9127 * Conversely, an ioctl to enable the write cache should be done
9128 * in this order:
9129 * 	set un->un_f_write_cache_enabled = TRUE
9130 * 	do mode select to set wce
9131 */
9132
9133static int
9134sd_get_write_cache_enabled(sd_ssc_t *ssc, int *is_enabled)
9135{
9136	struct mode_caching	*mode_caching_page;
9137	uchar_t			*header;
9138	size_t			buflen;
9139	int			hdrlen;
9140	int			bd_len;
9141	int			rval = 0;
9142	struct sd_lun		*un;
9143	int			status;
9144
9145	ASSERT(ssc != NULL);
9146	un = ssc->ssc_un;
9147	ASSERT(un != NULL);
9148	ASSERT(is_enabled != NULL);
9149
9150	/* in case of error, flag as enabled */
9151	*is_enabled = TRUE;
9152
9153	/*
9154	 * Do a test unit ready, otherwise a mode sense may not work if this
9155	 * is the first command sent to the device after boot.
9156	 */
9157	status = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
9158
9159	if (status != 0)
9160		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9161
9162	if (un->un_f_cfg_is_atapi == TRUE) {
9163		hdrlen = MODE_HEADER_LENGTH_GRP2;
9164	} else {
9165		hdrlen = MODE_HEADER_LENGTH;
9166	}
9167
9168	/*
9169	 * Allocate memory for the retrieved mode page and its headers.  Set
9170	 * a pointer to the page itself.
9171	 */
9172	buflen = hdrlen + MODE_BLK_DESC_LENGTH + sizeof (struct mode_caching);
9173	header = kmem_zalloc(buflen, KM_SLEEP);
9174
9175	/* Get the information from the device. */
9176	if (un->un_f_cfg_is_atapi == TRUE) {
9177		rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, header, buflen,
9178		    MODEPAGE_CACHING, SD_PATH_DIRECT);
9179	} else {
9180		rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, header, buflen,
9181		    MODEPAGE_CACHING, SD_PATH_DIRECT);
9182	}
9183
9184	if (rval != 0) {
9185		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
9186		    "sd_get_write_cache_enabled: Mode Sense Failed\n");
9187		goto mode_sense_failed;
9188	}
9189
9190	/*
9191	 * Determine size of Block Descriptors in order to locate
9192	 * the mode page data. ATAPI devices return 0, SCSI devices
9193	 * should return MODE_BLK_DESC_LENGTH.
9194	 */
9195	if (un->un_f_cfg_is_atapi == TRUE) {
9196		struct mode_header_grp2	*mhp;
9197		mhp	= (struct mode_header_grp2 *)header;
9198		bd_len  = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
9199	} else {
9200		bd_len  = ((struct mode_header *)header)->bdesc_length;
9201	}
9202
9203	if (bd_len > MODE_BLK_DESC_LENGTH) {
9204		/* FMA should make upset complain here */
9205		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, 0,
9206		    "sd_get_write_cache_enabled: Mode Sense returned invalid "
9207		    "block descriptor length\n");
9208		rval = EIO;
9209		goto mode_sense_failed;
9210	}
9211
9212	mode_caching_page = (struct mode_caching *)(header + hdrlen + bd_len);
9213	if (mode_caching_page->mode_page.code != MODEPAGE_CACHING) {
9214		/* FMA could make upset complain here */
9215		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, SD_LOG_COMMON,
9216		    "sd_get_write_cache_enabled: Mode Sense caching page "
9217		    "code mismatch %d\n", mode_caching_page->mode_page.code);
9218		rval = EIO;
9219		goto mode_sense_failed;
9220	}
9221	*is_enabled = mode_caching_page->wce;
9222
9223mode_sense_failed:
9224	if (rval == 0) {
9225		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
9226	} else if (rval == EIO) {
9227		/*
9228		 * Some disks do not support mode sense(6), we
9229		 * should ignore this kind of error(sense key is
9230		 * 0x5 - illegal request).
9231		 */
9232		uint8_t *sensep;
9233		int senlen;
9234
9235		sensep = (uint8_t *)ssc->ssc_uscsi_cmd->uscsi_rqbuf;
9236		senlen = (int)(ssc->ssc_uscsi_cmd->uscsi_rqlen -
9237		    ssc->ssc_uscsi_cmd->uscsi_rqresid);
9238
9239		if (senlen > 0 &&
9240		    scsi_sense_key(sensep) == KEY_ILLEGAL_REQUEST) {
9241			sd_ssc_assessment(ssc, SD_FMT_IGNORE_COMPROMISE);
9242		} else {
9243			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
9244		}
9245	} else {
9246		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9247	}
9248	kmem_free(header, buflen);
9249	return (rval);
9250}
9251
9252/*
9253 *    Function: sd_get_nv_sup()
9254 *
9255 * Description: This routine is the driver entry point for
9256 * determining whether non-volatile cache is supported. This
9257 * determination process works as follows:
9258 *
9259 * 1. sd first queries sd.conf on whether
9260 * suppress_cache_flush bit is set for this device.
9261 *
9262 * 2. if not there, then queries the internal disk table.
9263 *
9264 * 3. if either sd.conf or internal disk table specifies
9265 * cache flush be suppressed, we don't bother checking
9266 * NV_SUP bit.
9267 *
9268 * If SUPPRESS_CACHE_FLUSH bit is not set to 1, sd queries
9269 * the optional INQUIRY VPD page 0x86. If the device
9270 * supports VPD page 0x86, sd examines the NV_SUP
9271 * (non-volatile cache support) bit in the INQUIRY VPD page
9272 * 0x86:
9273 *   o If NV_SUP bit is set, sd assumes the device has a
9274 *   non-volatile cache and set the
9275 *   un_f_sync_nv_supported to TRUE.
9276 *   o Otherwise cache is not non-volatile,
9277 *   un_f_sync_nv_supported is set to FALSE.
9278 *
9279 * Arguments: un - driver soft state (unit) structure
9280 *
9281 * Return Code:
9282 *
9283 *     Context: Kernel Thread
9284 */
9285
9286static void
9287sd_get_nv_sup(sd_ssc_t *ssc)
9288{
9289	int		rval		= 0;
9290	uchar_t		*inq86		= NULL;
9291	size_t		inq86_len	= MAX_INQUIRY_SIZE;
9292	size_t		inq86_resid	= 0;
9293	struct		dk_callback *dkc;
9294	struct sd_lun	*un;
9295
9296	ASSERT(ssc != NULL);
9297	un = ssc->ssc_un;
9298	ASSERT(un != NULL);
9299
9300	mutex_enter(SD_MUTEX(un));
9301
9302	/*
9303	 * Be conservative on the device's support of
9304	 * SYNC_NV bit: un_f_sync_nv_supported is
9305	 * initialized to be false.
9306	 */
9307	un->un_f_sync_nv_supported = FALSE;
9308
9309	/*
9310	 * If either sd.conf or internal disk table
9311	 * specifies cache flush be suppressed, then
9312	 * we don't bother checking NV_SUP bit.
9313	 */
9314	if (un->un_f_suppress_cache_flush == TRUE) {
9315		mutex_exit(SD_MUTEX(un));
9316		return;
9317	}
9318
9319	if (sd_check_vpd_page_support(ssc) == 0 &&
9320	    un->un_vpd_page_mask & SD_VPD_EXTENDED_DATA_PG) {
9321		mutex_exit(SD_MUTEX(un));
9322		/* collect page 86 data if available */
9323		inq86 = kmem_zalloc(inq86_len, KM_SLEEP);
9324
9325		rval = sd_send_scsi_INQUIRY(ssc, inq86, inq86_len,
9326		    0x01, 0x86, &inq86_resid);
9327
9328		if (rval == 0 && (inq86_len - inq86_resid > 6)) {
9329			SD_TRACE(SD_LOG_COMMON, un,
9330			    "sd_get_nv_sup: \
9331			    successfully get VPD page: %x \
9332			    PAGE LENGTH: %x BYTE 6: %x\n",
9333			    inq86[1], inq86[3], inq86[6]);
9334
9335			mutex_enter(SD_MUTEX(un));
9336			/*
9337			 * check the value of NV_SUP bit: only if the device
9338			 * reports NV_SUP bit to be 1, the
9339			 * un_f_sync_nv_supported bit will be set to true.
9340			 */
9341			if (inq86[6] & SD_VPD_NV_SUP) {
9342				un->un_f_sync_nv_supported = TRUE;
9343			}
9344			mutex_exit(SD_MUTEX(un));
9345		} else if (rval != 0) {
9346			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9347		}
9348
9349		kmem_free(inq86, inq86_len);
9350	} else {
9351		mutex_exit(SD_MUTEX(un));
9352	}
9353
9354	/*
9355	 * Send a SYNC CACHE command to check whether
9356	 * SYNC_NV bit is supported. This command should have
9357	 * un_f_sync_nv_supported set to correct value.
9358	 */
9359	mutex_enter(SD_MUTEX(un));
9360	if (un->un_f_sync_nv_supported) {
9361		mutex_exit(SD_MUTEX(un));
9362		dkc = kmem_zalloc(sizeof (struct dk_callback), KM_SLEEP);
9363		dkc->dkc_flag = FLUSH_VOLATILE;
9364		(void) sd_send_scsi_SYNCHRONIZE_CACHE(un, dkc);
9365
9366		/*
9367		 * Send a TEST UNIT READY command to the device. This should
9368		 * clear any outstanding UNIT ATTENTION that may be present.
9369		 */
9370		rval = sd_send_scsi_TEST_UNIT_READY(ssc, SD_DONT_RETRY_TUR);
9371		if (rval != 0)
9372			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9373
9374		kmem_free(dkc, sizeof (struct dk_callback));
9375	} else {
9376		mutex_exit(SD_MUTEX(un));
9377	}
9378
9379	SD_TRACE(SD_LOG_COMMON, un, "sd_get_nv_sup: \
9380	    un_f_suppress_cache_flush is set to %d\n",
9381	    un->un_f_suppress_cache_flush);
9382}
9383
9384/*
9385 *    Function: sd_make_device
9386 *
9387 * Description: Utility routine to return the Solaris device number from
9388 *		the data in the device's dev_info structure.
9389 *
9390 * Return Code: The Solaris device number
9391 *
9392 *     Context: Any
9393 */
9394
9395static dev_t
9396sd_make_device(dev_info_t *devi)
9397{
9398	return (makedevice(ddi_driver_major(devi),
9399	    ddi_get_instance(devi) << SDUNIT_SHIFT));
9400}
9401
9402
9403/*
9404 *    Function: sd_pm_entry
9405 *
9406 * Description: Called at the start of a new command to manage power
9407 *		and busy status of a device. This includes determining whether
9408 *		the current power state of the device is sufficient for
9409 *		performing the command or whether it must be changed.
9410 *		The PM framework is notified appropriately.
9411 *		Only with a return status of DDI_SUCCESS will the
9412 *		component be busy to the framework.
9413 *
9414 *		All callers of sd_pm_entry must check the return status
9415 *		and only call sd_pm_exit it it was DDI_SUCCESS. A status
9416 *		of DDI_FAILURE indicates the device failed to power up.
9417 *		In this case un_pm_count has been adjusted so the result
9418 *		on exit is still powered down, ie. count is less than 0.
9419 *		Calling sd_pm_exit with this count value hits an ASSERT.
9420 *
9421 * Return Code: DDI_SUCCESS or DDI_FAILURE
9422 *
9423 *     Context: Kernel thread context.
9424 */
9425
9426static int
9427sd_pm_entry(struct sd_lun *un)
9428{
9429	int return_status = DDI_SUCCESS;
9430
9431	ASSERT(!mutex_owned(SD_MUTEX(un)));
9432	ASSERT(!mutex_owned(&un->un_pm_mutex));
9433
9434	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: entry\n");
9435
9436	if (un->un_f_pm_is_enabled == FALSE) {
9437		SD_TRACE(SD_LOG_IO_PM, un,
9438		    "sd_pm_entry: exiting, PM not enabled\n");
9439		return (return_status);
9440	}
9441
9442	/*
9443	 * Just increment a counter if PM is enabled. On the transition from
9444	 * 0 ==> 1, mark the device as busy.  The iodone side will decrement
9445	 * the count with each IO and mark the device as idle when the count
9446	 * hits 0.
9447	 *
9448	 * If the count is less than 0 the device is powered down. If a powered
9449	 * down device is successfully powered up then the count must be
9450	 * incremented to reflect the power up. Note that it'll get incremented
9451	 * a second time to become busy.
9452	 *
9453	 * Because the following has the potential to change the device state
9454	 * and must release the un_pm_mutex to do so, only one thread can be
9455	 * allowed through at a time.
9456	 */
9457
9458	mutex_enter(&un->un_pm_mutex);
9459	while (un->un_pm_busy == TRUE) {
9460		cv_wait(&un->un_pm_busy_cv, &un->un_pm_mutex);
9461	}
9462	un->un_pm_busy = TRUE;
9463
9464	if (un->un_pm_count < 1) {
9465
9466		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: busy component\n");
9467
9468		/*
9469		 * Indicate we are now busy so the framework won't attempt to
9470		 * power down the device. This call will only fail if either
9471		 * we passed a bad component number or the device has no
9472		 * components. Neither of these should ever happen.
9473		 */
9474		mutex_exit(&un->un_pm_mutex);
9475		return_status = pm_busy_component(SD_DEVINFO(un), 0);
9476		ASSERT(return_status == DDI_SUCCESS);
9477
9478		mutex_enter(&un->un_pm_mutex);
9479
9480		if (un->un_pm_count < 0) {
9481			mutex_exit(&un->un_pm_mutex);
9482
9483			SD_TRACE(SD_LOG_IO_PM, un,
9484			    "sd_pm_entry: power up component\n");
9485
9486			/*
9487			 * pm_raise_power will cause sdpower to be called
9488			 * which brings the device power level to the
9489			 * desired state, ON in this case. If successful,
9490			 * un_pm_count and un_power_level will be updated
9491			 * appropriately.
9492			 */
9493			return_status = pm_raise_power(SD_DEVINFO(un), 0,
9494			    SD_SPINDLE_ON);
9495
9496			mutex_enter(&un->un_pm_mutex);
9497
9498			if (return_status != DDI_SUCCESS) {
9499				/*
9500				 * Power up failed.
9501				 * Idle the device and adjust the count
9502				 * so the result on exit is that we're
9503				 * still powered down, ie. count is less than 0.
9504				 */
9505				SD_TRACE(SD_LOG_IO_PM, un,
9506				    "sd_pm_entry: power up failed,"
9507				    " idle the component\n");
9508
9509				(void) pm_idle_component(SD_DEVINFO(un), 0);
9510				un->un_pm_count--;
9511			} else {
9512				/*
9513				 * Device is powered up, verify the
9514				 * count is non-negative.
9515				 * This is debug only.
9516				 */
9517				ASSERT(un->un_pm_count == 0);
9518			}
9519		}
9520
9521		if (return_status == DDI_SUCCESS) {
9522			/*
9523			 * For performance, now that the device has been tagged
9524			 * as busy, and it's known to be powered up, update the
9525			 * chain types to use jump tables that do not include
9526			 * pm. This significantly lowers the overhead and
9527			 * therefore improves performance.
9528			 */
9529
9530			mutex_exit(&un->un_pm_mutex);
9531			mutex_enter(SD_MUTEX(un));
9532			SD_TRACE(SD_LOG_IO_PM, un,
9533			    "sd_pm_entry: changing uscsi_chain_type from %d\n",
9534			    un->un_uscsi_chain_type);
9535
9536			if (un->un_f_non_devbsize_supported) {
9537				un->un_buf_chain_type =
9538				    SD_CHAIN_INFO_RMMEDIA_NO_PM;
9539			} else {
9540				un->un_buf_chain_type =
9541				    SD_CHAIN_INFO_DISK_NO_PM;
9542			}
9543			un->un_uscsi_chain_type = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
9544
9545			SD_TRACE(SD_LOG_IO_PM, un,
9546			    "             changed  uscsi_chain_type to   %d\n",
9547			    un->un_uscsi_chain_type);
9548			mutex_exit(SD_MUTEX(un));
9549			mutex_enter(&un->un_pm_mutex);
9550
9551			if (un->un_pm_idle_timeid == NULL) {
9552				/* 300 ms. */
9553				un->un_pm_idle_timeid =
9554				    timeout(sd_pm_idletimeout_handler, un,
9555				    (drv_usectohz((clock_t)300000)));
9556				/*
9557				 * Include an extra call to busy which keeps the
9558				 * device busy with-respect-to the PM layer
9559				 * until the timer fires, at which time it'll
9560				 * get the extra idle call.
9561				 */
9562				(void) pm_busy_component(SD_DEVINFO(un), 0);
9563			}
9564		}
9565	}
9566	un->un_pm_busy = FALSE;
9567	/* Next... */
9568	cv_signal(&un->un_pm_busy_cv);
9569
9570	un->un_pm_count++;
9571
9572	SD_TRACE(SD_LOG_IO_PM, un,
9573	    "sd_pm_entry: exiting, un_pm_count = %d\n", un->un_pm_count);
9574
9575	mutex_exit(&un->un_pm_mutex);
9576
9577	return (return_status);
9578}
9579
9580
9581/*
9582 *    Function: sd_pm_exit
9583 *
9584 * Description: Called at the completion of a command to manage busy
9585 *		status for the device. If the device becomes idle the
9586 *		PM framework is notified.
9587 *
9588 *     Context: Kernel thread context
9589 */
9590
9591static void
9592sd_pm_exit(struct sd_lun *un)
9593{
9594	ASSERT(!mutex_owned(SD_MUTEX(un)));
9595	ASSERT(!mutex_owned(&un->un_pm_mutex));
9596
9597	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: entry\n");
9598
9599	/*
9600	 * After attach the following flag is only read, so don't
9601	 * take the penalty of acquiring a mutex for it.
9602	 */
9603	if (un->un_f_pm_is_enabled == TRUE) {
9604
9605		mutex_enter(&un->un_pm_mutex);
9606		un->un_pm_count--;
9607
9608		SD_TRACE(SD_LOG_IO_PM, un,
9609		    "sd_pm_exit: un_pm_count = %d\n", un->un_pm_count);
9610
9611		ASSERT(un->un_pm_count >= 0);
9612		if (un->un_pm_count == 0) {
9613			mutex_exit(&un->un_pm_mutex);
9614
9615			SD_TRACE(SD_LOG_IO_PM, un,
9616			    "sd_pm_exit: idle component\n");
9617
9618			(void) pm_idle_component(SD_DEVINFO(un), 0);
9619
9620		} else {
9621			mutex_exit(&un->un_pm_mutex);
9622		}
9623	}
9624
9625	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: exiting\n");
9626}
9627
9628
9629/*
9630 *    Function: sdopen
9631 *
9632 * Description: Driver's open(9e) entry point function.
9633 *
9634 *   Arguments: dev_i   - pointer to device number
9635 *		flag    - how to open file (FEXCL, FNDELAY, FREAD, FWRITE)
9636 *		otyp    - open type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
9637 *		cred_p  - user credential pointer
9638 *
9639 * Return Code: EINVAL
9640 *		ENXIO
9641 *		EIO
9642 *		EROFS
9643 *		EBUSY
9644 *
9645 *     Context: Kernel thread context
9646 */
9647/* ARGSUSED */
9648static int
9649sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p)
9650{
9651	struct sd_lun	*un;
9652	int		nodelay;
9653	int		part;
9654	uint64_t	partmask;
9655	int		instance;
9656	dev_t		dev;
9657	int		rval = EIO;
9658	diskaddr_t	nblks = 0;
9659	diskaddr_t	label_cap;
9660
9661	/* Validate the open type */
9662	if (otyp >= OTYPCNT) {
9663		return (EINVAL);
9664	}
9665
9666	dev = *dev_p;
9667	instance = SDUNIT(dev);
9668	mutex_enter(&sd_detach_mutex);
9669
9670	/*
9671	 * Fail the open if there is no softstate for the instance, or
9672	 * if another thread somewhere is trying to detach the instance.
9673	 */
9674	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
9675	    (un->un_detach_count != 0)) {
9676		mutex_exit(&sd_detach_mutex);
9677		/*
9678		 * The probe cache only needs to be cleared when open (9e) fails
9679		 * with ENXIO (4238046).
9680		 */
9681		/*
9682		 * un-conditionally clearing probe cache is ok with
9683		 * separate sd/ssd binaries
9684		 * x86 platform can be an issue with both parallel
9685		 * and fibre in 1 binary
9686		 */
9687		sd_scsi_clear_probe_cache();
9688		return (ENXIO);
9689	}
9690
9691	/*
9692	 * The un_layer_count is to prevent another thread in specfs from
9693	 * trying to detach the instance, which can happen when we are
9694	 * called from a higher-layer driver instead of thru specfs.
9695	 * This will not be needed when DDI provides a layered driver
9696	 * interface that allows specfs to know that an instance is in
9697	 * use by a layered driver & should not be detached.
9698	 *
9699	 * Note: the semantics for layered driver opens are exactly one
9700	 * close for every open.
9701	 */
9702	if (otyp == OTYP_LYR) {
9703		un->un_layer_count++;
9704	}
9705
9706	/*
9707	 * Keep a count of the current # of opens in progress. This is because
9708	 * some layered drivers try to call us as a regular open. This can
9709	 * cause problems that we cannot prevent, however by keeping this count
9710	 * we can at least keep our open and detach routines from racing against
9711	 * each other under such conditions.
9712	 */
9713	un->un_opens_in_progress++;
9714	mutex_exit(&sd_detach_mutex);
9715
9716	nodelay  = (flag & (FNDELAY | FNONBLOCK));
9717	part	 = SDPART(dev);
9718	partmask = 1 << part;
9719
9720	/*
9721	 * We use a semaphore here in order to serialize
9722	 * open and close requests on the device.
9723	 */
9724	sema_p(&un->un_semoclose);
9725
9726	mutex_enter(SD_MUTEX(un));
9727
9728	/*
9729	 * All device accesses go thru sdstrategy() where we check
9730	 * on suspend status but there could be a scsi_poll command,
9731	 * which bypasses sdstrategy(), so we need to check pm
9732	 * status.
9733	 */
9734
9735	if (!nodelay) {
9736		while ((un->un_state == SD_STATE_SUSPENDED) ||
9737		    (un->un_state == SD_STATE_PM_CHANGING)) {
9738			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
9739		}
9740
9741		mutex_exit(SD_MUTEX(un));
9742		if (sd_pm_entry(un) != DDI_SUCCESS) {
9743			rval = EIO;
9744			SD_ERROR(SD_LOG_OPEN_CLOSE, un,
9745			    "sdopen: sd_pm_entry failed\n");
9746			goto open_failed_with_pm;
9747		}
9748		mutex_enter(SD_MUTEX(un));
9749	}
9750
9751	/* check for previous exclusive open */
9752	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: un=%p\n", (void *)un);
9753	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
9754	    "sdopen: exclopen=%x, flag=%x, regopen=%x\n",
9755	    un->un_exclopen, flag, un->un_ocmap.regopen[otyp]);
9756
9757	if (un->un_exclopen & (partmask)) {
9758		goto excl_open_fail;
9759	}
9760
9761	if (flag & FEXCL) {
9762		int i;
9763		if (un->un_ocmap.lyropen[part]) {
9764			goto excl_open_fail;
9765		}
9766		for (i = 0; i < (OTYPCNT - 1); i++) {
9767			if (un->un_ocmap.regopen[i] & (partmask)) {
9768				goto excl_open_fail;
9769			}
9770		}
9771	}
9772
9773	/*
9774	 * Check the write permission if this is a removable media device,
9775	 * NDELAY has not been set, and writable permission is requested.
9776	 *
9777	 * Note: If NDELAY was set and this is write-protected media the WRITE
9778	 * attempt will fail with EIO as part of the I/O processing. This is a
9779	 * more permissive implementation that allows the open to succeed and
9780	 * WRITE attempts to fail when appropriate.
9781	 */
9782	if (un->un_f_chk_wp_open) {
9783		if ((flag & FWRITE) && (!nodelay)) {
9784			mutex_exit(SD_MUTEX(un));
9785			/*
9786			 * Defer the check for write permission on writable
9787			 * DVD drive till sdstrategy and will not fail open even
9788			 * if FWRITE is set as the device can be writable
9789			 * depending upon the media and the media can change
9790			 * after the call to open().
9791			 */
9792			if (un->un_f_dvdram_writable_device == FALSE) {
9793				if (ISCD(un) || sr_check_wp(dev)) {
9794				rval = EROFS;
9795				mutex_enter(SD_MUTEX(un));
9796				SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
9797				    "write to cd or write protected media\n");
9798				goto open_fail;
9799				}
9800			}
9801			mutex_enter(SD_MUTEX(un));
9802		}
9803	}
9804
9805	/*
9806	 * If opening in NDELAY/NONBLOCK mode, just return.
9807	 * Check if disk is ready and has a valid geometry later.
9808	 */
9809	if (!nodelay) {
9810		sd_ssc_t	*ssc;
9811
9812		mutex_exit(SD_MUTEX(un));
9813		ssc = sd_ssc_init(un);
9814		rval = sd_ready_and_valid(ssc, part);
9815		sd_ssc_fini(ssc);
9816		mutex_enter(SD_MUTEX(un));
9817		/*
9818		 * Fail if device is not ready or if the number of disk
9819		 * blocks is zero or negative for non CD devices.
9820		 */
9821
9822		nblks = 0;
9823
9824		if (rval == SD_READY_VALID && (!ISCD(un))) {
9825			/* if cmlb_partinfo fails, nblks remains 0 */
9826			mutex_exit(SD_MUTEX(un));
9827			(void) cmlb_partinfo(un->un_cmlbhandle, part, &nblks,
9828			    NULL, NULL, NULL, (void *)SD_PATH_DIRECT);
9829			mutex_enter(SD_MUTEX(un));
9830		}
9831
9832		if ((rval != SD_READY_VALID) ||
9833		    (!ISCD(un) && nblks <= 0)) {
9834			rval = un->un_f_has_removable_media ? ENXIO : EIO;
9835			SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
9836			    "device not ready or invalid disk block value\n");
9837			goto open_fail;
9838		}
9839#if defined(__i386) || defined(__amd64)
9840	} else {
9841		uchar_t *cp;
9842		/*
9843		 * x86 requires special nodelay handling, so that p0 is
9844		 * always defined and accessible.
9845		 * Invalidate geometry only if device is not already open.
9846		 */
9847		cp = &un->un_ocmap.chkd[0];
9848		while (cp < &un->un_ocmap.chkd[OCSIZE]) {
9849			if (*cp != (uchar_t)0) {
9850				break;
9851			}
9852			cp++;
9853		}
9854		if (cp == &un->un_ocmap.chkd[OCSIZE]) {
9855			mutex_exit(SD_MUTEX(un));
9856			cmlb_invalidate(un->un_cmlbhandle,
9857			    (void *)SD_PATH_DIRECT);
9858			mutex_enter(SD_MUTEX(un));
9859		}
9860
9861#endif
9862	}
9863
9864	if (otyp == OTYP_LYR) {
9865		un->un_ocmap.lyropen[part]++;
9866	} else {
9867		un->un_ocmap.regopen[otyp] |= partmask;
9868	}
9869
9870	/* Set up open and exclusive open flags */
9871	if (flag & FEXCL) {
9872		un->un_exclopen |= (partmask);
9873	}
9874
9875	/*
9876	 * If the lun is EFI labeled and lun capacity is greater than the
9877	 * capacity contained in the label, log a sys-event to notify the
9878	 * interested module.
9879	 * To avoid an infinite loop of logging sys-event, we only log the
9880	 * event when the lun is not opened in NDELAY mode. The event handler
9881	 * should open the lun in NDELAY mode.
9882	 */
9883	if (!(flag & FNDELAY)) {
9884		mutex_exit(SD_MUTEX(un));
9885		if (cmlb_efi_label_capacity(un->un_cmlbhandle, &label_cap,
9886		    (void*)SD_PATH_DIRECT) == 0) {
9887			mutex_enter(SD_MUTEX(un));
9888			if (un->un_f_blockcount_is_valid &&
9889			    un->un_blockcount > label_cap) {
9890				mutex_exit(SD_MUTEX(un));
9891				sd_log_lun_expansion_event(un,
9892				    (nodelay ? KM_NOSLEEP : KM_SLEEP));
9893				mutex_enter(SD_MUTEX(un));
9894			}
9895		} else {
9896			mutex_enter(SD_MUTEX(un));
9897		}
9898	}
9899
9900	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: "
9901	    "open of part %d type %d\n", part, otyp);
9902
9903	mutex_exit(SD_MUTEX(un));
9904	if (!nodelay) {
9905		sd_pm_exit(un);
9906	}
9907
9908	sema_v(&un->un_semoclose);
9909
9910	mutex_enter(&sd_detach_mutex);
9911	un->un_opens_in_progress--;
9912	mutex_exit(&sd_detach_mutex);
9913
9914	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: exit success\n");
9915	return (DDI_SUCCESS);
9916
9917excl_open_fail:
9918	SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: fail exclusive open\n");
9919	rval = EBUSY;
9920
9921open_fail:
9922	mutex_exit(SD_MUTEX(un));
9923
9924	/*
9925	 * On a failed open we must exit the pm management.
9926	 */
9927	if (!nodelay) {
9928		sd_pm_exit(un);
9929	}
9930open_failed_with_pm:
9931	sema_v(&un->un_semoclose);
9932
9933	mutex_enter(&sd_detach_mutex);
9934	un->un_opens_in_progress--;
9935	if (otyp == OTYP_LYR) {
9936		un->un_layer_count--;
9937	}
9938	mutex_exit(&sd_detach_mutex);
9939
9940	return (rval);
9941}
9942
9943
9944/*
9945 *    Function: sdclose
9946 *
9947 * Description: Driver's close(9e) entry point function.
9948 *
9949 *   Arguments: dev    - device number
9950 *		flag   - file status flag, informational only
9951 *		otyp   - close type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
9952 *		cred_p - user credential pointer
9953 *
9954 * Return Code: ENXIO
9955 *
9956 *     Context: Kernel thread context
9957 */
9958/* ARGSUSED */
9959static int
9960sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p)
9961{
9962	struct sd_lun	*un;
9963	uchar_t		*cp;
9964	int		part;
9965	int		nodelay;
9966	int		rval = 0;
9967
9968	/* Validate the open type */
9969	if (otyp >= OTYPCNT) {
9970		return (ENXIO);
9971	}
9972
9973	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
9974		return (ENXIO);
9975	}
9976
9977	part = SDPART(dev);
9978	nodelay = flag & (FNDELAY | FNONBLOCK);
9979
9980	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
9981	    "sdclose: close of part %d type %d\n", part, otyp);
9982
9983	/*
9984	 * We use a semaphore here in order to serialize
9985	 * open and close requests on the device.
9986	 */
9987	sema_p(&un->un_semoclose);
9988
9989	mutex_enter(SD_MUTEX(un));
9990
9991	/* Don't proceed if power is being changed. */
9992	while (un->un_state == SD_STATE_PM_CHANGING) {
9993		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
9994	}
9995
9996	if (un->un_exclopen & (1 << part)) {
9997		un->un_exclopen &= ~(1 << part);
9998	}
9999
10000	/* Update the open partition map */
10001	if (otyp == OTYP_LYR) {
10002		un->un_ocmap.lyropen[part] -= 1;
10003	} else {
10004		un->un_ocmap.regopen[otyp] &= ~(1 << part);
10005	}
10006
10007	cp = &un->un_ocmap.chkd[0];
10008	while (cp < &un->un_ocmap.chkd[OCSIZE]) {
10009		if (*cp != NULL) {
10010			break;
10011		}
10012		cp++;
10013	}
10014
10015	if (cp == &un->un_ocmap.chkd[OCSIZE]) {
10016		SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdclose: last close\n");
10017
10018		/*
10019		 * We avoid persistance upon the last close, and set
10020		 * the throttle back to the maximum.
10021		 */
10022		un->un_throttle = un->un_saved_throttle;
10023
10024		if (un->un_state == SD_STATE_OFFLINE) {
10025			if (un->un_f_is_fibre == FALSE) {
10026				scsi_log(SD_DEVINFO(un), sd_label,
10027				    CE_WARN, "offline\n");
10028			}
10029			mutex_exit(SD_MUTEX(un));
10030			cmlb_invalidate(un->un_cmlbhandle,
10031			    (void *)SD_PATH_DIRECT);
10032			mutex_enter(SD_MUTEX(un));
10033
10034		} else {
10035			/*
10036			 * Flush any outstanding writes in NVRAM cache.
10037			 * Note: SYNCHRONIZE CACHE is an optional SCSI-2
10038			 * cmd, it may not work for non-Pluto devices.
10039			 * SYNCHRONIZE CACHE is not required for removables,
10040			 * except DVD-RAM drives.
10041			 *
10042			 * Also note: because SYNCHRONIZE CACHE is currently
10043			 * the only command issued here that requires the
10044			 * drive be powered up, only do the power up before
10045			 * sending the Sync Cache command. If additional
10046			 * commands are added which require a powered up
10047			 * drive, the following sequence may have to change.
10048			 *
10049			 * And finally, note that parallel SCSI on SPARC
10050			 * only issues a Sync Cache to DVD-RAM, a newly
10051			 * supported device.
10052			 */
10053#if defined(__i386) || defined(__amd64)
10054			if ((un->un_f_sync_cache_supported &&
10055			    un->un_f_sync_cache_required) ||
10056			    un->un_f_dvdram_writable_device == TRUE) {
10057#else
10058			if (un->un_f_dvdram_writable_device == TRUE) {
10059#endif
10060				mutex_exit(SD_MUTEX(un));
10061				if (sd_pm_entry(un) == DDI_SUCCESS) {
10062					rval =
10063					    sd_send_scsi_SYNCHRONIZE_CACHE(un,
10064					    NULL);
10065					/* ignore error if not supported */
10066					if (rval == ENOTSUP) {
10067						rval = 0;
10068					} else if (rval != 0) {
10069						rval = EIO;
10070					}
10071					sd_pm_exit(un);
10072				} else {
10073					rval = EIO;
10074				}
10075				mutex_enter(SD_MUTEX(un));
10076			}
10077
10078			/*
10079			 * For devices which supports DOOR_LOCK, send an ALLOW
10080			 * MEDIA REMOVAL command, but don't get upset if it
10081			 * fails. We need to raise the power of the drive before
10082			 * we can call sd_send_scsi_DOORLOCK()
10083			 */
10084			if (un->un_f_doorlock_supported) {
10085				mutex_exit(SD_MUTEX(un));
10086				if (sd_pm_entry(un) == DDI_SUCCESS) {
10087					sd_ssc_t	*ssc;
10088
10089					ssc = sd_ssc_init(un);
10090					rval = sd_send_scsi_DOORLOCK(ssc,
10091					    SD_REMOVAL_ALLOW, SD_PATH_DIRECT);
10092					if (rval != 0)
10093						sd_ssc_assessment(ssc,
10094						    SD_FMT_IGNORE);
10095					sd_ssc_fini(ssc);
10096
10097					sd_pm_exit(un);
10098					if (ISCD(un) && (rval != 0) &&
10099					    (nodelay != 0)) {
10100						rval = ENXIO;
10101					}
10102				} else {
10103					rval = EIO;
10104				}
10105				mutex_enter(SD_MUTEX(un));
10106			}
10107
10108			/*
10109			 * If a device has removable media, invalidate all
10110			 * parameters related to media, such as geometry,
10111			 * blocksize, and blockcount.
10112			 */
10113			if (un->un_f_has_removable_media) {
10114				sr_ejected(un);
10115			}
10116
10117			/*
10118			 * Destroy the cache (if it exists) which was
10119			 * allocated for the write maps since this is
10120			 * the last close for this media.
10121			 */
10122			if (un->un_wm_cache) {
10123				/*
10124				 * Check if there are pending commands.
10125				 * and if there are give a warning and
10126				 * do not destroy the cache.
10127				 */
10128				if (un->un_ncmds_in_driver > 0) {
10129					scsi_log(SD_DEVINFO(un),
10130					    sd_label, CE_WARN,
10131					    "Unable to clean up memory "
10132					    "because of pending I/O\n");
10133				} else {
10134					kmem_cache_destroy(
10135					    un->un_wm_cache);
10136					un->un_wm_cache = NULL;
10137				}
10138			}
10139		}
10140	}
10141
10142	mutex_exit(SD_MUTEX(un));
10143	sema_v(&un->un_semoclose);
10144
10145	if (otyp == OTYP_LYR) {
10146		mutex_enter(&sd_detach_mutex);
10147		/*
10148		 * The detach routine may run when the layer count
10149		 * drops to zero.
10150		 */
10151		un->un_layer_count--;
10152		mutex_exit(&sd_detach_mutex);
10153	}
10154
10155	return (rval);
10156}
10157
10158
10159/*
10160 *    Function: sd_ready_and_valid
10161 *
10162 * Description: Test if device is ready and has a valid geometry.
10163 *
10164 *   Arguments: ssc - sd_ssc_t will contain un
10165 *		un  - driver soft state (unit) structure
10166 *
10167 * Return Code: SD_READY_VALID		ready and valid label
10168 *		SD_NOT_READY_VALID	not ready, no label
10169 *		SD_RESERVED_BY_OTHERS	reservation conflict
10170 *
10171 *     Context: Never called at interrupt context.
10172 */
10173
10174static int
10175sd_ready_and_valid(sd_ssc_t *ssc, int part)
10176{
10177	struct sd_errstats	*stp;
10178	uint64_t		capacity;
10179	uint_t			lbasize;
10180	int			rval = SD_READY_VALID;
10181	char			name_str[48];
10182	boolean_t		is_valid;
10183	struct sd_lun		*un;
10184	int			status;
10185
10186	ASSERT(ssc != NULL);
10187	un = ssc->ssc_un;
10188	ASSERT(un != NULL);
10189	ASSERT(!mutex_owned(SD_MUTEX(un)));
10190
10191	mutex_enter(SD_MUTEX(un));
10192	/*
10193	 * If a device has removable media, we must check if media is
10194	 * ready when checking if this device is ready and valid.
10195	 */
10196	if (un->un_f_has_removable_media) {
10197		mutex_exit(SD_MUTEX(un));
10198		status = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
10199
10200		if (status != 0) {
10201			rval = SD_NOT_READY_VALID;
10202			mutex_enter(SD_MUTEX(un));
10203
10204			/* Ignore all failed status for removalbe media */
10205			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
10206
10207			goto done;
10208		}
10209
10210		is_valid = SD_IS_VALID_LABEL(un);
10211		mutex_enter(SD_MUTEX(un));
10212		if (!is_valid ||
10213		    (un->un_f_blockcount_is_valid == FALSE) ||
10214		    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
10215
10216			/* capacity has to be read every open. */
10217			mutex_exit(SD_MUTEX(un));
10218			status = sd_send_scsi_READ_CAPACITY(ssc, &capacity,
10219			    &lbasize, SD_PATH_DIRECT);
10220
10221			if (status != 0) {
10222				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
10223
10224				cmlb_invalidate(un->un_cmlbhandle,
10225				    (void *)SD_PATH_DIRECT);
10226				mutex_enter(SD_MUTEX(un));
10227				rval = SD_NOT_READY_VALID;
10228
10229				goto done;
10230			} else {
10231				mutex_enter(SD_MUTEX(un));
10232				sd_update_block_info(un, lbasize, capacity);
10233			}
10234		}
10235
10236		/*
10237		 * Check if the media in the device is writable or not.
10238		 */
10239		if (!is_valid && ISCD(un)) {
10240			sd_check_for_writable_cd(ssc, SD_PATH_DIRECT);
10241		}
10242
10243	} else {
10244		/*
10245		 * Do a test unit ready to clear any unit attention from non-cd
10246		 * devices.
10247		 */
10248		mutex_exit(SD_MUTEX(un));
10249
10250		status = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
10251		if (status != 0) {
10252			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
10253		}
10254
10255		mutex_enter(SD_MUTEX(un));
10256	}
10257
10258
10259	/*
10260	 * If this is a non 512 block device, allocate space for
10261	 * the wmap cache. This is being done here since every time
10262	 * a media is changed this routine will be called and the
10263	 * block size is a function of media rather than device.
10264	 */
10265	if (un->un_f_non_devbsize_supported && NOT_DEVBSIZE(un)) {
10266		if (!(un->un_wm_cache)) {
10267			(void) snprintf(name_str, sizeof (name_str),
10268			    "%s%d_cache",
10269			    ddi_driver_name(SD_DEVINFO(un)),
10270			    ddi_get_instance(SD_DEVINFO(un)));
10271			un->un_wm_cache = kmem_cache_create(
10272			    name_str, sizeof (struct sd_w_map),
10273			    8, sd_wm_cache_constructor,
10274			    sd_wm_cache_destructor, NULL,
10275			    (void *)un, NULL, 0);
10276			if (!(un->un_wm_cache)) {
10277				rval = ENOMEM;
10278				goto done;
10279			}
10280		}
10281	}
10282
10283	if (un->un_state == SD_STATE_NORMAL) {
10284		/*
10285		 * If the target is not yet ready here (defined by a TUR
10286		 * failure), invalidate the geometry and print an 'offline'
10287		 * message. This is a legacy message, as the state of the
10288		 * target is not actually changed to SD_STATE_OFFLINE.
10289		 *
10290		 * If the TUR fails for EACCES (Reservation Conflict),
10291		 * SD_RESERVED_BY_OTHERS will be returned to indicate
10292		 * reservation conflict. If the TUR fails for other
10293		 * reasons, SD_NOT_READY_VALID will be returned.
10294		 */
10295		int err;
10296
10297		mutex_exit(SD_MUTEX(un));
10298		err = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
10299		mutex_enter(SD_MUTEX(un));
10300
10301		if (err != 0) {
10302			mutex_exit(SD_MUTEX(un));
10303			cmlb_invalidate(un->un_cmlbhandle,
10304			    (void *)SD_PATH_DIRECT);
10305			mutex_enter(SD_MUTEX(un));
10306			if (err == EACCES) {
10307				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
10308				    "reservation conflict\n");
10309				rval = SD_RESERVED_BY_OTHERS;
10310				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
10311			} else {
10312				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
10313				    "drive offline\n");
10314				rval = SD_NOT_READY_VALID;
10315				sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
10316			}
10317			goto done;
10318		}
10319	}
10320
10321	if (un->un_f_format_in_progress == FALSE) {
10322		mutex_exit(SD_MUTEX(un));
10323
10324		(void) cmlb_validate(un->un_cmlbhandle, 0,
10325		    (void *)SD_PATH_DIRECT);
10326		if (cmlb_partinfo(un->un_cmlbhandle, part, NULL, NULL, NULL,
10327		    NULL, (void *) SD_PATH_DIRECT) != 0) {
10328			rval = SD_NOT_READY_VALID;
10329			mutex_enter(SD_MUTEX(un));
10330
10331			goto done;
10332		}
10333		if (un->un_f_pkstats_enabled) {
10334			sd_set_pstats(un);
10335			SD_TRACE(SD_LOG_IO_PARTITION, un,
10336			    "sd_ready_and_valid: un:0x%p pstats created and "
10337			    "set\n", un);
10338		}
10339		mutex_enter(SD_MUTEX(un));
10340	}
10341
10342	/*
10343	 * If this device supports DOOR_LOCK command, try and send
10344	 * this command to PREVENT MEDIA REMOVAL, but don't get upset
10345	 * if it fails. For a CD, however, it is an error
10346	 */
10347	if (un->un_f_doorlock_supported) {
10348		mutex_exit(SD_MUTEX(un));
10349		status = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_PREVENT,
10350		    SD_PATH_DIRECT);
10351
10352		if ((status != 0) && ISCD(un)) {
10353			rval = SD_NOT_READY_VALID;
10354			mutex_enter(SD_MUTEX(un));
10355
10356			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
10357
10358			goto done;
10359		} else if (status != 0)
10360			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
10361		mutex_enter(SD_MUTEX(un));
10362	}
10363
10364	/* The state has changed, inform the media watch routines */
10365	un->un_mediastate = DKIO_INSERTED;
10366	cv_broadcast(&un->un_state_cv);
10367	rval = SD_READY_VALID;
10368
10369done:
10370
10371	/*
10372	 * Initialize the capacity kstat value, if no media previously
10373	 * (capacity kstat is 0) and a media has been inserted
10374	 * (un_blockcount > 0).
10375	 */
10376	if (un->un_errstats != NULL) {
10377		stp = (struct sd_errstats *)un->un_errstats->ks_data;
10378		if ((stp->sd_capacity.value.ui64 == 0) &&
10379		    (un->un_f_blockcount_is_valid == TRUE)) {
10380			stp->sd_capacity.value.ui64 =
10381			    (uint64_t)((uint64_t)un->un_blockcount *
10382			    un->un_sys_blocksize);
10383		}
10384	}
10385
10386	mutex_exit(SD_MUTEX(un));
10387	return (rval);
10388}
10389
10390
10391/*
10392 *    Function: sdmin
10393 *
10394 * Description: Routine to limit the size of a data transfer. Used in
10395 *		conjunction with physio(9F).
10396 *
10397 *   Arguments: bp - pointer to the indicated buf(9S) struct.
10398 *
10399 *     Context: Kernel thread context.
10400 */
10401
10402static void
10403sdmin(struct buf *bp)
10404{
10405	struct sd_lun	*un;
10406	int		instance;
10407
10408	instance = SDUNIT(bp->b_edev);
10409
10410	un = ddi_get_soft_state(sd_state, instance);
10411	ASSERT(un != NULL);
10412
10413	/*
10414	 * We depend on DMA partial or buf breakup to restrict
10415	 * IO size if any of them enabled.
10416	 */
10417	if (un->un_partial_dma_supported ||
10418	    un->un_buf_breakup_supported) {
10419		return;
10420	}
10421
10422	if (bp->b_bcount > un->un_max_xfer_size) {
10423		bp->b_bcount = un->un_max_xfer_size;
10424	}
10425}
10426
10427
10428/*
10429 *    Function: sdread
10430 *
10431 * Description: Driver's read(9e) entry point function.
10432 *
10433 *   Arguments: dev   - device number
10434 *		uio   - structure pointer describing where data is to be stored
10435 *			in user's space
10436 *		cred_p  - user credential pointer
10437 *
10438 * Return Code: ENXIO
10439 *		EIO
10440 *		EINVAL
10441 *		value returned by physio
10442 *
10443 *     Context: Kernel thread context.
10444 */
10445/* ARGSUSED */
10446static int
10447sdread(dev_t dev, struct uio *uio, cred_t *cred_p)
10448{
10449	struct sd_lun	*un = NULL;
10450	int		secmask;
10451	int		err = 0;
10452	sd_ssc_t	*ssc;
10453
10454	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10455		return (ENXIO);
10456	}
10457
10458	ASSERT(!mutex_owned(SD_MUTEX(un)));
10459
10460
10461	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
10462		mutex_enter(SD_MUTEX(un));
10463		/*
10464		 * Because the call to sd_ready_and_valid will issue I/O we
10465		 * must wait here if either the device is suspended or
10466		 * if it's power level is changing.
10467		 */
10468		while ((un->un_state == SD_STATE_SUSPENDED) ||
10469		    (un->un_state == SD_STATE_PM_CHANGING)) {
10470			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10471		}
10472		un->un_ncmds_in_driver++;
10473		mutex_exit(SD_MUTEX(un));
10474
10475		/* Initialize sd_ssc_t for internal uscsi commands */
10476		ssc = sd_ssc_init(un);
10477		if ((sd_ready_and_valid(ssc, SDPART(dev))) != SD_READY_VALID) {
10478			err = EIO;
10479		} else {
10480			err = 0;
10481		}
10482		sd_ssc_fini(ssc);
10483
10484		mutex_enter(SD_MUTEX(un));
10485		un->un_ncmds_in_driver--;
10486		ASSERT(un->un_ncmds_in_driver >= 0);
10487		mutex_exit(SD_MUTEX(un));
10488		if (err != 0)
10489			return (err);
10490	}
10491
10492	/*
10493	 * Read requests are restricted to multiples of the system block size.
10494	 */
10495	secmask = un->un_sys_blocksize - 1;
10496
10497	if (uio->uio_loffset & ((offset_t)(secmask))) {
10498		SD_ERROR(SD_LOG_READ_WRITE, un,
10499		    "sdread: file offset not modulo %d\n",
10500		    un->un_sys_blocksize);
10501		err = EINVAL;
10502	} else if (uio->uio_iov->iov_len & (secmask)) {
10503		SD_ERROR(SD_LOG_READ_WRITE, un,
10504		    "sdread: transfer length not modulo %d\n",
10505		    un->un_sys_blocksize);
10506		err = EINVAL;
10507	} else {
10508		err = physio(sdstrategy, NULL, dev, B_READ, sdmin, uio);
10509	}
10510
10511	return (err);
10512}
10513
10514
10515/*
10516 *    Function: sdwrite
10517 *
10518 * Description: Driver's write(9e) entry point function.
10519 *
10520 *   Arguments: dev   - device number
10521 *		uio   - structure pointer describing where data is stored in
10522 *			user's space
10523 *		cred_p  - user credential pointer
10524 *
10525 * Return Code: ENXIO
10526 *		EIO
10527 *		EINVAL
10528 *		value returned by physio
10529 *
10530 *     Context: Kernel thread context.
10531 */
10532/* ARGSUSED */
10533static int
10534sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p)
10535{
10536	struct sd_lun	*un = NULL;
10537	int		secmask;
10538	int		err = 0;
10539	sd_ssc_t	*ssc;
10540
10541	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10542		return (ENXIO);
10543	}
10544
10545	ASSERT(!mutex_owned(SD_MUTEX(un)));
10546
10547	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
10548		mutex_enter(SD_MUTEX(un));
10549		/*
10550		 * Because the call to sd_ready_and_valid will issue I/O we
10551		 * must wait here if either the device is suspended or
10552		 * if it's power level is changing.
10553		 */
10554		while ((un->un_state == SD_STATE_SUSPENDED) ||
10555		    (un->un_state == SD_STATE_PM_CHANGING)) {
10556			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10557		}
10558		un->un_ncmds_in_driver++;
10559		mutex_exit(SD_MUTEX(un));
10560
10561		/* Initialize sd_ssc_t for internal uscsi commands */
10562		ssc = sd_ssc_init(un);
10563		if ((sd_ready_and_valid(ssc, SDPART(dev))) != SD_READY_VALID) {
10564			err = EIO;
10565		} else {
10566			err = 0;
10567		}
10568		sd_ssc_fini(ssc);
10569
10570		mutex_enter(SD_MUTEX(un));
10571		un->un_ncmds_in_driver--;
10572		ASSERT(un->un_ncmds_in_driver >= 0);
10573		mutex_exit(SD_MUTEX(un));
10574		if (err != 0)
10575			return (err);
10576	}
10577
10578	/*
10579	 * Write requests are restricted to multiples of the system block size.
10580	 */
10581	secmask = un->un_sys_blocksize - 1;
10582
10583	if (uio->uio_loffset & ((offset_t)(secmask))) {
10584		SD_ERROR(SD_LOG_READ_WRITE, un,
10585		    "sdwrite: file offset not modulo %d\n",
10586		    un->un_sys_blocksize);
10587		err = EINVAL;
10588	} else if (uio->uio_iov->iov_len & (secmask)) {
10589		SD_ERROR(SD_LOG_READ_WRITE, un,
10590		    "sdwrite: transfer length not modulo %d\n",
10591		    un->un_sys_blocksize);
10592		err = EINVAL;
10593	} else {
10594		err = physio(sdstrategy, NULL, dev, B_WRITE, sdmin, uio);
10595	}
10596
10597	return (err);
10598}
10599
10600
10601/*
10602 *    Function: sdaread
10603 *
10604 * Description: Driver's aread(9e) entry point function.
10605 *
10606 *   Arguments: dev   - device number
10607 *		aio   - structure pointer describing where data is to be stored
10608 *		cred_p  - user credential pointer
10609 *
10610 * Return Code: ENXIO
10611 *		EIO
10612 *		EINVAL
10613 *		value returned by aphysio
10614 *
10615 *     Context: Kernel thread context.
10616 */
10617/* ARGSUSED */
10618static int
10619sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p)
10620{
10621	struct sd_lun	*un = NULL;
10622	struct uio	*uio = aio->aio_uio;
10623	int		secmask;
10624	int		err = 0;
10625	sd_ssc_t	*ssc;
10626
10627	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10628		return (ENXIO);
10629	}
10630
10631	ASSERT(!mutex_owned(SD_MUTEX(un)));
10632
10633	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
10634		mutex_enter(SD_MUTEX(un));
10635		/*
10636		 * Because the call to sd_ready_and_valid will issue I/O we
10637		 * must wait here if either the device is suspended or
10638		 * if it's power level is changing.
10639		 */
10640		while ((un->un_state == SD_STATE_SUSPENDED) ||
10641		    (un->un_state == SD_STATE_PM_CHANGING)) {
10642			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10643		}
10644		un->un_ncmds_in_driver++;
10645		mutex_exit(SD_MUTEX(un));
10646
10647		/* Initialize sd_ssc_t for internal uscsi commands */
10648		ssc = sd_ssc_init(un);
10649		if ((sd_ready_and_valid(ssc, SDPART(dev))) != SD_READY_VALID) {
10650			err = EIO;
10651		} else {
10652			err = 0;
10653		}
10654		sd_ssc_fini(ssc);
10655
10656		mutex_enter(SD_MUTEX(un));
10657		un->un_ncmds_in_driver--;
10658		ASSERT(un->un_ncmds_in_driver >= 0);
10659		mutex_exit(SD_MUTEX(un));
10660		if (err != 0)
10661			return (err);
10662	}
10663
10664	/*
10665	 * Read requests are restricted to multiples of the system block size.
10666	 */
10667	secmask = un->un_sys_blocksize - 1;
10668
10669	if (uio->uio_loffset & ((offset_t)(secmask))) {
10670		SD_ERROR(SD_LOG_READ_WRITE, un,
10671		    "sdaread: file offset not modulo %d\n",
10672		    un->un_sys_blocksize);
10673		err = EINVAL;
10674	} else if (uio->uio_iov->iov_len & (secmask)) {
10675		SD_ERROR(SD_LOG_READ_WRITE, un,
10676		    "sdaread: transfer length not modulo %d\n",
10677		    un->un_sys_blocksize);
10678		err = EINVAL;
10679	} else {
10680		err = aphysio(sdstrategy, anocancel, dev, B_READ, sdmin, aio);
10681	}
10682
10683	return (err);
10684}
10685
10686
10687/*
10688 *    Function: sdawrite
10689 *
10690 * Description: Driver's awrite(9e) entry point function.
10691 *
10692 *   Arguments: dev   - device number
10693 *		aio   - structure pointer describing where data is stored
10694 *		cred_p  - user credential pointer
10695 *
10696 * Return Code: ENXIO
10697 *		EIO
10698 *		EINVAL
10699 *		value returned by aphysio
10700 *
10701 *     Context: Kernel thread context.
10702 */
10703/* ARGSUSED */
10704static int
10705sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p)
10706{
10707	struct sd_lun	*un = NULL;
10708	struct uio	*uio = aio->aio_uio;
10709	int		secmask;
10710	int		err = 0;
10711	sd_ssc_t	*ssc;
10712
10713	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10714		return (ENXIO);
10715	}
10716
10717	ASSERT(!mutex_owned(SD_MUTEX(un)));
10718
10719	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
10720		mutex_enter(SD_MUTEX(un));
10721		/*
10722		 * Because the call to sd_ready_and_valid will issue I/O we
10723		 * must wait here if either the device is suspended or
10724		 * if it's power level is changing.
10725		 */
10726		while ((un->un_state == SD_STATE_SUSPENDED) ||
10727		    (un->un_state == SD_STATE_PM_CHANGING)) {
10728			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10729		}
10730		un->un_ncmds_in_driver++;
10731		mutex_exit(SD_MUTEX(un));
10732
10733		/* Initialize sd_ssc_t for internal uscsi commands */
10734		ssc = sd_ssc_init(un);
10735		if ((sd_ready_and_valid(ssc, SDPART(dev))) != SD_READY_VALID) {
10736			err = EIO;
10737		} else {
10738			err = 0;
10739		}
10740		sd_ssc_fini(ssc);
10741
10742		mutex_enter(SD_MUTEX(un));
10743		un->un_ncmds_in_driver--;
10744		ASSERT(un->un_ncmds_in_driver >= 0);
10745		mutex_exit(SD_MUTEX(un));
10746		if (err != 0)
10747			return (err);
10748	}
10749
10750	/*
10751	 * Write requests are restricted to multiples of the system block size.
10752	 */
10753	secmask = un->un_sys_blocksize - 1;
10754
10755	if (uio->uio_loffset & ((offset_t)(secmask))) {
10756		SD_ERROR(SD_LOG_READ_WRITE, un,
10757		    "sdawrite: file offset not modulo %d\n",
10758		    un->un_sys_blocksize);
10759		err = EINVAL;
10760	} else if (uio->uio_iov->iov_len & (secmask)) {
10761		SD_ERROR(SD_LOG_READ_WRITE, un,
10762		    "sdawrite: transfer length not modulo %d\n",
10763		    un->un_sys_blocksize);
10764		err = EINVAL;
10765	} else {
10766		err = aphysio(sdstrategy, anocancel, dev, B_WRITE, sdmin, aio);
10767	}
10768
10769	return (err);
10770}
10771
10772
10773
10774
10775
10776/*
10777 * Driver IO processing follows the following sequence:
10778 *
10779 *     sdioctl(9E)     sdstrategy(9E)         biodone(9F)
10780 *         |                |                     ^
10781 *         v                v                     |
10782 * sd_send_scsi_cmd()  ddi_xbuf_qstrategy()       +-------------------+
10783 *         |                |                     |                   |
10784 *         v                |                     |                   |
10785 * sd_uscsi_strategy() sd_xbuf_strategy()   sd_buf_iodone()   sd_uscsi_iodone()
10786 *         |                |                     ^                   ^
10787 *         v                v                     |                   |
10788 * SD_BEGIN_IOSTART()  SD_BEGIN_IOSTART()         |                   |
10789 *         |                |                     |                   |
10790 *     +---+                |                     +------------+      +-------+
10791 *     |                    |                                  |              |
10792 *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
10793 *     |                    v                                  |              |
10794 *     |         sd_mapblockaddr_iostart()           sd_mapblockaddr_iodone() |
10795 *     |                    |                                  ^              |
10796 *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
10797 *     |                    v                                  |              |
10798 *     |         sd_mapblocksize_iostart()           sd_mapblocksize_iodone() |
10799 *     |                    |                                  ^              |
10800 *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
10801 *     |                    v                                  |              |
10802 *     |           sd_checksum_iostart()               sd_checksum_iodone()   |
10803 *     |                    |                                  ^              |
10804 *     +-> SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()+------------->+
10805 *     |                    v                                  |              |
10806 *     |              sd_pm_iostart()                     sd_pm_iodone()      |
10807 *     |                    |                                  ^              |
10808 *     |                    |                                  |              |
10809 *     +-> SD_NEXT_IOSTART()|               SD_BEGIN_IODONE()--+--------------+
10810 *                          |                           ^
10811 *                          v                           |
10812 *                   sd_core_iostart()                  |
10813 *                          |                           |
10814 *                          |                           +------>(*destroypkt)()
10815 *                          +-> sd_start_cmds() <-+     |           |
10816 *                          |                     |     |           v
10817 *                          |                     |     |  scsi_destroy_pkt(9F)
10818 *                          |                     |     |
10819 *                          +->(*initpkt)()       +- sdintr()
10820 *                          |  |                        |  |
10821 *                          |  +-> scsi_init_pkt(9F)    |  +-> sd_handle_xxx()
10822 *                          |  +-> scsi_setup_cdb(9F)   |
10823 *                          |                           |
10824 *                          +--> scsi_transport(9F)     |
10825 *                                     |                |
10826 *                                     +----> SCSA ---->+
10827 *
10828 *
10829 * This code is based upon the following presumptions:
10830 *
10831 *   - iostart and iodone functions operate on buf(9S) structures. These
10832 *     functions perform the necessary operations on the buf(9S) and pass
10833 *     them along to the next function in the chain by using the macros
10834 *     SD_NEXT_IOSTART() (for iostart side functions) and SD_NEXT_IODONE()
10835 *     (for iodone side functions).
10836 *
10837 *   - The iostart side functions may sleep. The iodone side functions
10838 *     are called under interrupt context and may NOT sleep. Therefore
10839 *     iodone side functions also may not call iostart side functions.
10840 *     (NOTE: iostart side functions should NOT sleep for memory, as
10841 *     this could result in deadlock.)
10842 *
10843 *   - An iostart side function may call its corresponding iodone side
10844 *     function directly (if necessary).
10845 *
10846 *   - In the event of an error, an iostart side function can return a buf(9S)
10847 *     to its caller by calling SD_BEGIN_IODONE() (after setting B_ERROR and
10848 *     b_error in the usual way of course).
10849 *
10850 *   - The taskq mechanism may be used by the iodone side functions to dispatch
10851 *     requests to the iostart side functions.  The iostart side functions in
10852 *     this case would be called under the context of a taskq thread, so it's
10853 *     OK for them to block/sleep/spin in this case.
10854 *
10855 *   - iostart side functions may allocate "shadow" buf(9S) structs and
10856 *     pass them along to the next function in the chain.  The corresponding
10857 *     iodone side functions must coalesce the "shadow" bufs and return
10858 *     the "original" buf to the next higher layer.
10859 *
10860 *   - The b_private field of the buf(9S) struct holds a pointer to
10861 *     an sd_xbuf struct, which contains information needed to
10862 *     construct the scsi_pkt for the command.
10863 *
10864 *   - The SD_MUTEX(un) is NOT held across calls to the next layer. Each
10865 *     layer must acquire & release the SD_MUTEX(un) as needed.
10866 */
10867
10868
10869/*
10870 * Create taskq for all targets in the system. This is created at
10871 * _init(9E) and destroyed at _fini(9E).
10872 *
10873 * Note: here we set the minalloc to a reasonably high number to ensure that
10874 * we will have an adequate supply of task entries available at interrupt time.
10875 * This is used in conjunction with the TASKQ_PREPOPULATE flag in
10876 * sd_create_taskq().  Since we do not want to sleep for allocations at
10877 * interrupt time, set maxalloc equal to minalloc. That way we will just fail
10878 * the command if we ever try to dispatch more than SD_TASKQ_MAXALLOC taskq
10879 * requests any one instant in time.
10880 */
10881#define	SD_TASKQ_NUMTHREADS	8
10882#define	SD_TASKQ_MINALLOC	256
10883#define	SD_TASKQ_MAXALLOC	256
10884
10885static taskq_t	*sd_tq = NULL;
10886_NOTE(SCHEME_PROTECTS_DATA("stable data", sd_tq))
10887
10888static int	sd_taskq_minalloc = SD_TASKQ_MINALLOC;
10889static int	sd_taskq_maxalloc = SD_TASKQ_MAXALLOC;
10890
10891/*
10892 * The following task queue is being created for the write part of
10893 * read-modify-write of non-512 block size devices.
10894 * Limit the number of threads to 1 for now. This number has been chosen
10895 * considering the fact that it applies only to dvd ram drives/MO drives
10896 * currently. Performance for which is not main criteria at this stage.
10897 * Note: It needs to be explored if we can use a single taskq in future
10898 */
10899#define	SD_WMR_TASKQ_NUMTHREADS	1
10900static taskq_t	*sd_wmr_tq = NULL;
10901_NOTE(SCHEME_PROTECTS_DATA("stable data", sd_wmr_tq))
10902
10903/*
10904 *    Function: sd_taskq_create
10905 *
10906 * Description: Create taskq thread(s) and preallocate task entries
10907 *
10908 * Return Code: Returns a pointer to the allocated taskq_t.
10909 *
10910 *     Context: Can sleep. Requires blockable context.
10911 *
10912 *       Notes: - The taskq() facility currently is NOT part of the DDI.
10913 *		  (definitely NOT recommeded for 3rd-party drivers!) :-)
10914 *		- taskq_create() will block for memory, also it will panic
10915 *		  if it cannot create the requested number of threads.
10916 *		- Currently taskq_create() creates threads that cannot be
10917 *		  swapped.
10918 *		- We use TASKQ_PREPOPULATE to ensure we have an adequate
10919 *		  supply of taskq entries at interrupt time (ie, so that we
10920 *		  do not have to sleep for memory)
10921 */
10922
10923static void
10924sd_taskq_create(void)
10925{
10926	char	taskq_name[TASKQ_NAMELEN];
10927
10928	ASSERT(sd_tq == NULL);
10929	ASSERT(sd_wmr_tq == NULL);
10930
10931	(void) snprintf(taskq_name, sizeof (taskq_name),
10932	    "%s_drv_taskq", sd_label);
10933	sd_tq = (taskq_create(taskq_name, SD_TASKQ_NUMTHREADS,
10934	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
10935	    TASKQ_PREPOPULATE));
10936
10937	(void) snprintf(taskq_name, sizeof (taskq_name),
10938	    "%s_rmw_taskq", sd_label);
10939	sd_wmr_tq = (taskq_create(taskq_name, SD_WMR_TASKQ_NUMTHREADS,
10940	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
10941	    TASKQ_PREPOPULATE));
10942}
10943
10944
10945/*
10946 *    Function: sd_taskq_delete
10947 *
10948 * Description: Complementary cleanup routine for sd_taskq_create().
10949 *
10950 *     Context: Kernel thread context.
10951 */
10952
10953static void
10954sd_taskq_delete(void)
10955{
10956	ASSERT(sd_tq != NULL);
10957	ASSERT(sd_wmr_tq != NULL);
10958	taskq_destroy(sd_tq);
10959	taskq_destroy(sd_wmr_tq);
10960	sd_tq = NULL;
10961	sd_wmr_tq = NULL;
10962}
10963
10964
10965/*
10966 *    Function: sdstrategy
10967 *
10968 * Description: Driver's strategy (9E) entry point function.
10969 *
10970 *   Arguments: bp - pointer to buf(9S)
10971 *
10972 * Return Code: Always returns zero
10973 *
10974 *     Context: Kernel thread context.
10975 */
10976
10977static int
10978sdstrategy(struct buf *bp)
10979{
10980	struct sd_lun *un;
10981
10982	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
10983	if (un == NULL) {
10984		bioerror(bp, EIO);
10985		bp->b_resid = bp->b_bcount;
10986		biodone(bp);
10987		return (0);
10988	}
10989	/* As was done in the past, fail new cmds. if state is dumping. */
10990	if (un->un_state == SD_STATE_DUMPING) {
10991		bioerror(bp, ENXIO);
10992		bp->b_resid = bp->b_bcount;
10993		biodone(bp);
10994		return (0);
10995	}
10996
10997	ASSERT(!mutex_owned(SD_MUTEX(un)));
10998
10999	/*
11000	 * Commands may sneak in while we released the mutex in
11001	 * DDI_SUSPEND, we should block new commands. However, old
11002	 * commands that are still in the driver at this point should
11003	 * still be allowed to drain.
11004	 */
11005	mutex_enter(SD_MUTEX(un));
11006	/*
11007	 * Must wait here if either the device is suspended or
11008	 * if it's power level is changing.
11009	 */
11010	while ((un->un_state == SD_STATE_SUSPENDED) ||
11011	    (un->un_state == SD_STATE_PM_CHANGING)) {
11012		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11013	}
11014
11015	un->un_ncmds_in_driver++;
11016
11017	/*
11018	 * atapi: Since we are running the CD for now in PIO mode we need to
11019	 * call bp_mapin here to avoid bp_mapin called interrupt context under
11020	 * the HBA's init_pkt routine.
11021	 */
11022	if (un->un_f_cfg_is_atapi == TRUE) {
11023		mutex_exit(SD_MUTEX(un));
11024		bp_mapin(bp);
11025		mutex_enter(SD_MUTEX(un));
11026	}
11027	SD_INFO(SD_LOG_IO, un, "sdstrategy: un_ncmds_in_driver = %ld\n",
11028	    un->un_ncmds_in_driver);
11029
11030	if (bp->b_flags & B_WRITE)
11031		un->un_f_sync_cache_required = TRUE;
11032
11033	mutex_exit(SD_MUTEX(un));
11034
11035	/*
11036	 * This will (eventually) allocate the sd_xbuf area and
11037	 * call sd_xbuf_strategy().  We just want to return the
11038	 * result of ddi_xbuf_qstrategy so that we have an opt-
11039	 * imized tail call which saves us a stack frame.
11040	 */
11041	return (ddi_xbuf_qstrategy(bp, un->un_xbuf_attr));
11042}
11043
11044
11045/*
11046 *    Function: sd_xbuf_strategy
11047 *
11048 * Description: Function for initiating IO operations via the
11049 *		ddi_xbuf_qstrategy() mechanism.
11050 *
11051 *     Context: Kernel thread context.
11052 */
11053
11054static void
11055sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg)
11056{
11057	struct sd_lun *un = arg;
11058
11059	ASSERT(bp != NULL);
11060	ASSERT(xp != NULL);
11061	ASSERT(un != NULL);
11062	ASSERT(!mutex_owned(SD_MUTEX(un)));
11063
11064	/*
11065	 * Initialize the fields in the xbuf and save a pointer to the
11066	 * xbuf in bp->b_private.
11067	 */
11068	sd_xbuf_init(un, bp, xp, SD_CHAIN_BUFIO, NULL);
11069
11070	/* Send the buf down the iostart chain */
11071	SD_BEGIN_IOSTART(((struct sd_xbuf *)xp)->xb_chain_iostart, un, bp);
11072}
11073
11074
11075/*
11076 *    Function: sd_xbuf_init
11077 *
11078 * Description: Prepare the given sd_xbuf struct for use.
11079 *
11080 *   Arguments: un - ptr to softstate
11081 *		bp - ptr to associated buf(9S)
11082 *		xp - ptr to associated sd_xbuf
11083 *		chain_type - IO chain type to use:
11084 *			SD_CHAIN_NULL
11085 *			SD_CHAIN_BUFIO
11086 *			SD_CHAIN_USCSI
11087 *			SD_CHAIN_DIRECT
11088 *			SD_CHAIN_DIRECT_PRIORITY
11089 *		pktinfop - ptr to private data struct for scsi_pkt(9S)
11090 *			initialization; may be NULL if none.
11091 *
11092 *     Context: Kernel thread context
11093 */
11094
11095static void
11096sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
11097	uchar_t chain_type, void *pktinfop)
11098{
11099	int index;
11100
11101	ASSERT(un != NULL);
11102	ASSERT(bp != NULL);
11103	ASSERT(xp != NULL);
11104
11105	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: buf:0x%p chain type:0x%x\n",
11106	    bp, chain_type);
11107
11108	xp->xb_un	= un;
11109	xp->xb_pktp	= NULL;
11110	xp->xb_pktinfo	= pktinfop;
11111	xp->xb_private	= bp->b_private;
11112	xp->xb_blkno	= (daddr_t)bp->b_blkno;
11113
11114	/*
11115	 * Set up the iostart and iodone chain indexes in the xbuf, based
11116	 * upon the specified chain type to use.
11117	 */
11118	switch (chain_type) {
11119	case SD_CHAIN_NULL:
11120		/*
11121		 * Fall thru to just use the values for the buf type, even
11122		 * tho for the NULL chain these values will never be used.
11123		 */
11124		/* FALLTHRU */
11125	case SD_CHAIN_BUFIO:
11126		index = un->un_buf_chain_type;
11127		break;
11128	case SD_CHAIN_USCSI:
11129		index = un->un_uscsi_chain_type;
11130		break;
11131	case SD_CHAIN_DIRECT:
11132		index = un->un_direct_chain_type;
11133		break;
11134	case SD_CHAIN_DIRECT_PRIORITY:
11135		index = un->un_priority_chain_type;
11136		break;
11137	default:
11138		/* We're really broken if we ever get here... */
11139		panic("sd_xbuf_init: illegal chain type!");
11140		/*NOTREACHED*/
11141	}
11142
11143	xp->xb_chain_iostart = sd_chain_index_map[index].sci_iostart_index;
11144	xp->xb_chain_iodone = sd_chain_index_map[index].sci_iodone_index;
11145
11146	/*
11147	 * It might be a bit easier to simply bzero the entire xbuf above,
11148	 * but it turns out that since we init a fair number of members anyway,
11149	 * we save a fair number cycles by doing explicit assignment of zero.
11150	 */
11151	xp->xb_pkt_flags	= 0;
11152	xp->xb_dma_resid	= 0;
11153	xp->xb_retry_count	= 0;
11154	xp->xb_victim_retry_count = 0;
11155	xp->xb_ua_retry_count	= 0;
11156	xp->xb_nr_retry_count	= 0;
11157	xp->xb_sense_bp		= NULL;
11158	xp->xb_sense_status	= 0;
11159	xp->xb_sense_state	= 0;
11160	xp->xb_sense_resid	= 0;
11161	xp->xb_ena		= 0;
11162
11163	bp->b_private	= xp;
11164	bp->b_flags	&= ~(B_DONE | B_ERROR);
11165	bp->b_resid	= 0;
11166	bp->av_forw	= NULL;
11167	bp->av_back	= NULL;
11168	bioerror(bp, 0);
11169
11170	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: done.\n");
11171}
11172
11173
11174/*
11175 *    Function: sd_uscsi_strategy
11176 *
11177 * Description: Wrapper for calling into the USCSI chain via physio(9F)
11178 *
11179 *   Arguments: bp - buf struct ptr
11180 *
11181 * Return Code: Always returns 0
11182 *
11183 *     Context: Kernel thread context
11184 */
11185
11186static int
11187sd_uscsi_strategy(struct buf *bp)
11188{
11189	struct sd_lun		*un;
11190	struct sd_uscsi_info	*uip;
11191	struct sd_xbuf		*xp;
11192	uchar_t			chain_type;
11193	uchar_t			cmd;
11194
11195	ASSERT(bp != NULL);
11196
11197	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
11198	if (un == NULL) {
11199		bioerror(bp, EIO);
11200		bp->b_resid = bp->b_bcount;
11201		biodone(bp);
11202		return (0);
11203	}
11204
11205	ASSERT(!mutex_owned(SD_MUTEX(un)));
11206
11207	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: entry: buf:0x%p\n", bp);
11208
11209	/*
11210	 * A pointer to a struct sd_uscsi_info is expected in bp->b_private
11211	 */
11212	ASSERT(bp->b_private != NULL);
11213	uip = (struct sd_uscsi_info *)bp->b_private;
11214	cmd = ((struct uscsi_cmd *)(uip->ui_cmdp))->uscsi_cdb[0];
11215
11216	mutex_enter(SD_MUTEX(un));
11217	/*
11218	 * atapi: Since we are running the CD for now in PIO mode we need to
11219	 * call bp_mapin here to avoid bp_mapin called interrupt context under
11220	 * the HBA's init_pkt routine.
11221	 */
11222	if (un->un_f_cfg_is_atapi == TRUE) {
11223		mutex_exit(SD_MUTEX(un));
11224		bp_mapin(bp);
11225		mutex_enter(SD_MUTEX(un));
11226	}
11227	un->un_ncmds_in_driver++;
11228	SD_INFO(SD_LOG_IO, un, "sd_uscsi_strategy: un_ncmds_in_driver = %ld\n",
11229	    un->un_ncmds_in_driver);
11230
11231	if ((bp->b_flags & B_WRITE) && (bp->b_bcount != 0) &&
11232	    (cmd != SCMD_MODE_SELECT) && (cmd != SCMD_MODE_SELECT_G1))
11233		un->un_f_sync_cache_required = TRUE;
11234
11235	mutex_exit(SD_MUTEX(un));
11236
11237	switch (uip->ui_flags) {
11238	case SD_PATH_DIRECT:
11239		chain_type = SD_CHAIN_DIRECT;
11240		break;
11241	case SD_PATH_DIRECT_PRIORITY:
11242		chain_type = SD_CHAIN_DIRECT_PRIORITY;
11243		break;
11244	default:
11245		chain_type = SD_CHAIN_USCSI;
11246		break;
11247	}
11248
11249	/*
11250	 * We may allocate extra buf for external USCSI commands. If the
11251	 * application asks for bigger than 20-byte sense data via USCSI,
11252	 * SCSA layer will allocate 252 bytes sense buf for that command.
11253	 */
11254	if (((struct uscsi_cmd *)(uip->ui_cmdp))->uscsi_rqlen >
11255	    SENSE_LENGTH) {
11256		xp = kmem_zalloc(sizeof (struct sd_xbuf) - SENSE_LENGTH +
11257		    MAX_SENSE_LENGTH, KM_SLEEP);
11258	} else {
11259		xp = kmem_zalloc(sizeof (struct sd_xbuf), KM_SLEEP);
11260	}
11261
11262	sd_xbuf_init(un, bp, xp, chain_type, uip->ui_cmdp);
11263
11264	/* Use the index obtained within xbuf_init */
11265	SD_BEGIN_IOSTART(xp->xb_chain_iostart, un, bp);
11266
11267	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: exit: buf:0x%p\n", bp);
11268
11269	return (0);
11270}
11271
11272/*
11273 *    Function: sd_send_scsi_cmd
11274 *
11275 * Description: Runs a USCSI command for user (when called thru sdioctl),
11276 *		or for the driver
11277 *
11278 *   Arguments: dev - the dev_t for the device
11279 *		incmd - ptr to a valid uscsi_cmd struct
11280 *		flag - bit flag, indicating open settings, 32/64 bit type
11281 *		dataspace - UIO_USERSPACE or UIO_SYSSPACE
11282 *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
11283 *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
11284 *			to use the USCSI "direct" chain and bypass the normal
11285 *			command waitq.
11286 *
11287 * Return Code: 0 -  successful completion of the given command
11288 *		EIO - scsi_uscsi_handle_command() failed
11289 *		ENXIO  - soft state not found for specified dev
11290 *		EINVAL
11291 *		EFAULT - copyin/copyout error
11292 *		return code of scsi_uscsi_handle_command():
11293 *			EIO
11294 *			ENXIO
11295 *			EACCES
11296 *
11297 *     Context: Waits for command to complete. Can sleep.
11298 */
11299
11300static int
11301sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd, int flag,
11302	enum uio_seg dataspace, int path_flag)
11303{
11304	struct sd_lun	*un;
11305	sd_ssc_t	*ssc;
11306	int		rval;
11307
11308	un = ddi_get_soft_state(sd_state, SDUNIT(dev));
11309	if (un == NULL) {
11310		return (ENXIO);
11311	}
11312
11313	/*
11314	 * Using sd_ssc_send to handle uscsi cmd
11315	 */
11316	ssc = sd_ssc_init(un);
11317	rval = sd_ssc_send(ssc, incmd, flag, dataspace, path_flag);
11318	sd_ssc_fini(ssc);
11319
11320	return (rval);
11321}
11322
11323/*
11324 *    Function: sd_ssc_init
11325 *
11326 * Description: Uscsi end-user call this function to initialize necessary
11327 *              fields, such as uscsi_cmd and sd_uscsi_info struct.
11328 *
11329 *              The return value of sd_send_scsi_cmd will be treated as a
11330 *              fault in various conditions. Even it is not Zero, some
11331 *              callers may ignore the return value. That is to say, we can
11332 *              not make an accurate assessment in sdintr, since if a
11333 *              command is failed in sdintr it does not mean the caller of
11334 *              sd_send_scsi_cmd will treat it as a real failure.
11335 *
11336 *              To avoid printing too many error logs for a failed uscsi
11337 *              packet that the caller may not treat it as a failure, the
11338 *              sd will keep silent for handling all uscsi commands.
11339 *
11340 *              During detach->attach and attach-open, for some types of
11341 *              problems, the driver should be providing information about
11342 *              the problem encountered. Device use USCSI_SILENT, which
11343 *              suppresses all driver information. The result is that no
11344 *              information about the problem is available. Being
11345 *              completely silent during this time is inappropriate. The
11346 *              driver needs a more selective filter than USCSI_SILENT, so
11347 *              that information related to faults is provided.
11348 *
11349 *              To make the accurate accessment, the caller  of
11350 *              sd_send_scsi_USCSI_CMD should take the ownership and
11351 *              get necessary information to print error messages.
11352 *
11353 *              If we want to print necessary info of uscsi command, we need to
11354 *              keep the uscsi_cmd and sd_uscsi_info till we can make the
11355 *              assessment. We use sd_ssc_init to alloc necessary
11356 *              structs for sending an uscsi command and we are also
11357 *              responsible for free the memory by calling
11358 *              sd_ssc_fini.
11359 *
11360 *              The calling secquences will look like:
11361 *              sd_ssc_init->
11362 *
11363 *                  ...
11364 *
11365 *                  sd_send_scsi_USCSI_CMD->
11366 *                      sd_ssc_send-> - - - sdintr
11367 *                  ...
11368 *
11369 *                  if we think the return value should be treated as a
11370 *                  failure, we make the accessment here and print out
11371 *                  necessary by retrieving uscsi_cmd and sd_uscsi_info'
11372 *
11373 *                  ...
11374 *
11375 *              sd_ssc_fini
11376 *
11377 *
11378 *   Arguments: un - pointer to driver soft state (unit) structure for this
11379 *                   target.
11380 *
11381 * Return code: sd_ssc_t - pointer to allocated sd_ssc_t struct, it contains
11382 *                         uscsi_cmd and sd_uscsi_info.
11383 *                  NULL - if can not alloc memory for sd_ssc_t struct
11384 *
11385 *     Context: Kernel Thread.
11386 */
11387static sd_ssc_t *
11388sd_ssc_init(struct sd_lun *un)
11389{
11390	sd_ssc_t		*ssc;
11391	struct uscsi_cmd	*ucmdp;
11392	struct sd_uscsi_info	*uip;
11393
11394	ASSERT(un != NULL);
11395	ASSERT(!mutex_owned(SD_MUTEX(un)));
11396
11397	/*
11398	 * Allocate sd_ssc_t structure
11399	 */
11400	ssc = kmem_zalloc(sizeof (sd_ssc_t), KM_SLEEP);
11401
11402	/*
11403	 * Allocate uscsi_cmd by calling scsi_uscsi_alloc common routine
11404	 */
11405	ucmdp = scsi_uscsi_alloc();
11406
11407	/*
11408	 * Allocate sd_uscsi_info structure
11409	 */
11410	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
11411
11412	ssc->ssc_uscsi_cmd = ucmdp;
11413	ssc->ssc_uscsi_info = uip;
11414	ssc->ssc_un = un;
11415
11416	return (ssc);
11417}
11418
11419/*
11420 * Function: sd_ssc_fini
11421 *
11422 * Description: To free sd_ssc_t and it's hanging off
11423 *
11424 * Arguments: ssc - struct pointer of sd_ssc_t.
11425 */
11426static void
11427sd_ssc_fini(sd_ssc_t *ssc)
11428{
11429	scsi_uscsi_free(ssc->ssc_uscsi_cmd);
11430
11431	if (ssc->ssc_uscsi_info != NULL) {
11432		kmem_free(ssc->ssc_uscsi_info, sizeof (struct sd_uscsi_info));
11433		ssc->ssc_uscsi_info = NULL;
11434	}
11435
11436	kmem_free(ssc, sizeof (sd_ssc_t));
11437	ssc = NULL;
11438}
11439
11440/*
11441 * Function: sd_ssc_send
11442 *
11443 * Description: Runs a USCSI command for user when called through sdioctl,
11444 *              or for the driver.
11445 *
11446 *   Arguments: ssc - the struct of sd_ssc_t will bring uscsi_cmd and
11447 *                    sd_uscsi_info in.
11448 *		incmd - ptr to a valid uscsi_cmd struct
11449 *		flag - bit flag, indicating open settings, 32/64 bit type
11450 *		dataspace - UIO_USERSPACE or UIO_SYSSPACE
11451 *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
11452 *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
11453 *			to use the USCSI "direct" chain and bypass the normal
11454 *			command waitq.
11455 *
11456 * Return Code: 0 -  successful completion of the given command
11457 *		EIO - scsi_uscsi_handle_command() failed
11458 *		ENXIO  - soft state not found for specified dev
11459 *		EINVAL
11460 *		EFAULT - copyin/copyout error
11461 *		return code of scsi_uscsi_handle_command():
11462 *			EIO
11463 *			ENXIO
11464 *			EACCES
11465 *
11466 *     Context: Kernel Thread;
11467 *              Waits for command to complete. Can sleep.
11468 */
11469static int
11470sd_ssc_send(sd_ssc_t *ssc, struct uscsi_cmd *incmd, int flag,
11471	enum uio_seg dataspace, int path_flag)
11472{
11473	struct sd_uscsi_info	*uip;
11474	struct uscsi_cmd	*uscmd;
11475	struct sd_lun		*un;
11476	dev_t			dev;
11477
11478	int	format = 0;
11479	int	rval;
11480
11481	ASSERT(ssc != NULL);
11482	un = ssc->ssc_un;
11483	ASSERT(un != NULL);
11484	uscmd = ssc->ssc_uscsi_cmd;
11485	ASSERT(uscmd != NULL);
11486	ASSERT(!mutex_owned(SD_MUTEX(un)));
11487	if (ssc->ssc_flags & SSC_FLAGS_NEED_ASSESSMENT) {
11488		/*
11489		 * If enter here, it indicates that the previous uscsi
11490		 * command has not been processed by sd_ssc_assessment.
11491		 * This is violating our rules of FMA telemetry processing.
11492		 * We should print out this message and the last undisposed
11493		 * uscsi command.
11494		 */
11495		if (uscmd->uscsi_cdb != NULL) {
11496			SD_INFO(SD_LOG_SDTEST, un,
11497			    "sd_ssc_send is missing the alternative "
11498			    "sd_ssc_assessment when running command 0x%x.\n",
11499			    uscmd->uscsi_cdb[0]);
11500		}
11501		/*
11502		 * Set the ssc_flags to SSC_FLAGS_UNKNOWN, which should be
11503		 * the initial status.
11504		 */
11505		ssc->ssc_flags = SSC_FLAGS_UNKNOWN;
11506	}
11507
11508	/*
11509	 * We need to make sure sd_ssc_send will have sd_ssc_assessment
11510	 * followed to avoid missing FMA telemetries.
11511	 */
11512	ssc->ssc_flags |= SSC_FLAGS_NEED_ASSESSMENT;
11513
11514#ifdef SDDEBUG
11515	switch (dataspace) {
11516	case UIO_USERSPACE:
11517		SD_TRACE(SD_LOG_IO, un,
11518		    "sd_ssc_send: entry: un:0x%p UIO_USERSPACE\n", un);
11519		break;
11520	case UIO_SYSSPACE:
11521		SD_TRACE(SD_LOG_IO, un,
11522		    "sd_ssc_send: entry: un:0x%p UIO_SYSSPACE\n", un);
11523		break;
11524	default:
11525		SD_TRACE(SD_LOG_IO, un,
11526		    "sd_ssc_send: entry: un:0x%p UNEXPECTED SPACE\n", un);
11527		break;
11528	}
11529#endif
11530
11531	rval = scsi_uscsi_copyin((intptr_t)incmd, flag,
11532	    SD_ADDRESS(un), &uscmd);
11533	if (rval != 0) {
11534		SD_TRACE(SD_LOG_IO, un, "sd_sense_scsi_cmd: "
11535		    "scsi_uscsi_alloc_and_copyin failed\n", un);
11536		return (rval);
11537	}
11538
11539	if ((uscmd->uscsi_cdb != NULL) &&
11540	    (uscmd->uscsi_cdb[0] == SCMD_FORMAT)) {
11541		mutex_enter(SD_MUTEX(un));
11542		un->un_f_format_in_progress = TRUE;
11543		mutex_exit(SD_MUTEX(un));
11544		format = 1;
11545	}
11546
11547	/*
11548	 * Allocate an sd_uscsi_info struct and fill it with the info
11549	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
11550	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
11551	 * since we allocate the buf here in this function, we do not
11552	 * need to preserve the prior contents of b_private.
11553	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
11554	 */
11555	uip = ssc->ssc_uscsi_info;
11556	uip->ui_flags = path_flag;
11557	uip->ui_cmdp = uscmd;
11558
11559	/*
11560	 * Commands sent with priority are intended for error recovery
11561	 * situations, and do not have retries performed.
11562	 */
11563	if (path_flag == SD_PATH_DIRECT_PRIORITY) {
11564		uscmd->uscsi_flags |= USCSI_DIAGNOSE;
11565	}
11566	uscmd->uscsi_flags &= ~USCSI_NOINTR;
11567
11568	dev = SD_GET_DEV(un);
11569	rval = scsi_uscsi_handle_cmd(dev, dataspace, uscmd,
11570	    sd_uscsi_strategy, NULL, uip);
11571
11572	/*
11573	 * mark ssc_flags right after handle_cmd to make sure
11574	 * the uscsi has been sent
11575	 */
11576	ssc->ssc_flags |= SSC_FLAGS_CMD_ISSUED;
11577
11578#ifdef SDDEBUG
11579	SD_INFO(SD_LOG_IO, un, "sd_ssc_send: "
11580	    "uscsi_status: 0x%02x  uscsi_resid:0x%x\n",
11581	    uscmd->uscsi_status, uscmd->uscsi_resid);
11582	if (uscmd->uscsi_bufaddr != NULL) {
11583		SD_INFO(SD_LOG_IO, un, "sd_ssc_send: "
11584		    "uscmd->uscsi_bufaddr: 0x%p  uscmd->uscsi_buflen:%d\n",
11585		    uscmd->uscsi_bufaddr, uscmd->uscsi_buflen);
11586		if (dataspace == UIO_SYSSPACE) {
11587			SD_DUMP_MEMORY(un, SD_LOG_IO,
11588			    "data", (uchar_t *)uscmd->uscsi_bufaddr,
11589			    uscmd->uscsi_buflen, SD_LOG_HEX);
11590		}
11591	}
11592#endif
11593
11594	if (format == 1) {
11595		mutex_enter(SD_MUTEX(un));
11596		un->un_f_format_in_progress = FALSE;
11597		mutex_exit(SD_MUTEX(un));
11598	}
11599
11600	(void) scsi_uscsi_copyout((intptr_t)incmd, uscmd);
11601
11602	return (rval);
11603}
11604
11605/*
11606 *     Function: sd_ssc_print
11607 *
11608 * Description: Print information available to the console.
11609 *
11610 * Arguments: ssc - the struct of sd_ssc_t will bring uscsi_cmd and
11611 *                    sd_uscsi_info in.
11612 *            sd_severity - log level.
11613 *     Context: Kernel thread or interrupt context.
11614 */
11615static void
11616sd_ssc_print(sd_ssc_t *ssc, int sd_severity)
11617{
11618	struct uscsi_cmd	*ucmdp;
11619	struct scsi_device	*devp;
11620	dev_info_t 		*devinfo;
11621	uchar_t			*sensep;
11622	int			senlen;
11623	union scsi_cdb		*cdbp;
11624	uchar_t			com;
11625	extern struct scsi_key_strings scsi_cmds[];
11626
11627	ASSERT(ssc != NULL);
11628	ASSERT(ssc->ssc_un != NULL);
11629
11630	if (SD_FM_LOG(ssc->ssc_un) != SD_FM_LOG_EREPORT)
11631		return;
11632	ucmdp = ssc->ssc_uscsi_cmd;
11633	devp = SD_SCSI_DEVP(ssc->ssc_un);
11634	devinfo = SD_DEVINFO(ssc->ssc_un);
11635	ASSERT(ucmdp != NULL);
11636	ASSERT(devp != NULL);
11637	ASSERT(devinfo != NULL);
11638	sensep = (uint8_t *)ucmdp->uscsi_rqbuf;
11639	senlen = ucmdp->uscsi_rqlen - ucmdp->uscsi_rqresid;
11640	cdbp = (union scsi_cdb *)ucmdp->uscsi_cdb;
11641
11642	/* In certain case (like DOORLOCK), the cdb could be NULL. */
11643	if (cdbp == NULL)
11644		return;
11645	/* We don't print log if no sense data available. */
11646	if (senlen == 0)
11647		sensep = NULL;
11648	com = cdbp->scc_cmd;
11649	scsi_generic_errmsg(devp, sd_label, sd_severity, 0, 0, com,
11650	    scsi_cmds, sensep, ssc->ssc_un->un_additional_codes, NULL);
11651}
11652
11653/*
11654 *     Function: sd_ssc_assessment
11655 *
11656 * Description: We use this function to make an assessment at the point
11657 *              where SD driver may encounter a potential error.
11658 *
11659 * Arguments: ssc - the struct of sd_ssc_t will bring uscsi_cmd and
11660 *                  sd_uscsi_info in.
11661 *            tp_assess - a hint of strategy for ereport posting.
11662 *            Possible values of tp_assess include:
11663 *                SD_FMT_IGNORE - we don't post any ereport because we're
11664 *                sure that it is ok to ignore the underlying problems.
11665 *                SD_FMT_IGNORE_COMPROMISE - we don't post any ereport for now
11666 *                but it might be not correct to ignore the underlying hardware
11667 *                error.
11668 *                SD_FMT_STATUS_CHECK - we will post an ereport with the
11669 *                payload driver-assessment of value "fail" or
11670 *                "fatal"(depending on what information we have here). This
11671 *                assessment value is usually set when SD driver think there
11672 *                is a potential error occurred(Typically, when return value
11673 *                of the SCSI command is EIO).
11674 *                SD_FMT_STANDARD - we will post an ereport with the payload
11675 *                driver-assessment of value "info". This assessment value is
11676 *                set when the SCSI command returned successfully and with
11677 *                sense data sent back.
11678 *
11679 *     Context: Kernel thread.
11680 */
11681static void
11682sd_ssc_assessment(sd_ssc_t *ssc, enum sd_type_assessment tp_assess)
11683{
11684	int senlen = 0;
11685	struct uscsi_cmd *ucmdp = NULL;
11686	struct sd_lun *un;
11687
11688	ASSERT(ssc != NULL);
11689	un = ssc->ssc_un;
11690	ASSERT(un != NULL);
11691	ucmdp = ssc->ssc_uscsi_cmd;
11692	ASSERT(ucmdp != NULL);
11693
11694	if (ssc->ssc_flags & SSC_FLAGS_NEED_ASSESSMENT) {
11695		ssc->ssc_flags &= ~SSC_FLAGS_NEED_ASSESSMENT;
11696	} else {
11697		/*
11698		 * If enter here, it indicates that we have a wrong
11699		 * calling sequence of sd_ssc_send and sd_ssc_assessment,
11700		 * both of which should be called in a pair in case of
11701		 * loss of FMA telemetries.
11702		 */
11703		if (ucmdp->uscsi_cdb != NULL) {
11704			SD_INFO(SD_LOG_SDTEST, un,
11705			    "sd_ssc_assessment is missing the "
11706			    "alternative sd_ssc_send when running 0x%x, "
11707			    "or there are superfluous sd_ssc_assessment for "
11708			    "the same sd_ssc_send.\n",
11709			    ucmdp->uscsi_cdb[0]);
11710		}
11711		/*
11712		 * Set the ssc_flags to the initial value to avoid passing
11713		 * down dirty flags to the following sd_ssc_send function.
11714		 */
11715		ssc->ssc_flags = SSC_FLAGS_UNKNOWN;
11716		return;
11717	}
11718
11719	/*
11720	 * Only handle an issued command which is waiting for assessment.
11721	 * A command which is not issued will not have
11722	 * SSC_FLAGS_INVALID_DATA set, so it'ok we just return here.
11723	 */
11724	if (!(ssc->ssc_flags & SSC_FLAGS_CMD_ISSUED)) {
11725		sd_ssc_print(ssc, SCSI_ERR_INFO);
11726		return;
11727	} else {
11728		/*
11729		 * For an issued command, we should clear this flag in
11730		 * order to make the sd_ssc_t structure be used off
11731		 * multiple uscsi commands.
11732		 */
11733		ssc->ssc_flags &= ~SSC_FLAGS_CMD_ISSUED;
11734	}
11735
11736	/*
11737	 * We will not deal with non-retryable(flag USCSI_DIAGNOSE set)
11738	 * commands here. And we should clear the ssc_flags before return.
11739	 */
11740	if (ucmdp->uscsi_flags & USCSI_DIAGNOSE) {
11741		ssc->ssc_flags = SSC_FLAGS_UNKNOWN;
11742		return;
11743	}
11744
11745	switch (tp_assess) {
11746	case SD_FMT_IGNORE:
11747	case SD_FMT_IGNORE_COMPROMISE:
11748		break;
11749	case SD_FMT_STATUS_CHECK:
11750		/*
11751		 * For a failed command(including the succeeded command
11752		 * with invalid data sent back).
11753		 */
11754		sd_ssc_post(ssc, SD_FM_DRV_FATAL);
11755		break;
11756	case SD_FMT_STANDARD:
11757		/*
11758		 * Always for the succeeded commands probably with sense
11759		 * data sent back.
11760		 * Limitation:
11761		 *	We can only handle a succeeded command with sense
11762		 *	data sent back when auto-request-sense is enabled.
11763		 */
11764		senlen = ssc->ssc_uscsi_cmd->uscsi_rqlen -
11765		    ssc->ssc_uscsi_cmd->uscsi_rqresid;
11766		if ((ssc->ssc_uscsi_info->ui_pkt_state & STATE_ARQ_DONE) &&
11767		    (un->un_f_arq_enabled == TRUE) &&
11768		    senlen > 0 &&
11769		    ssc->ssc_uscsi_cmd->uscsi_rqbuf != NULL) {
11770			sd_ssc_post(ssc, SD_FM_DRV_NOTICE);
11771		}
11772		break;
11773	default:
11774		/*
11775		 * Should not have other type of assessment.
11776		 */
11777		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
11778		    "sd_ssc_assessment got wrong "
11779		    "sd_type_assessment %d.\n", tp_assess);
11780		break;
11781	}
11782	/*
11783	 * Clear up the ssc_flags before return.
11784	 */
11785	ssc->ssc_flags = SSC_FLAGS_UNKNOWN;
11786}
11787
11788/*
11789 *    Function: sd_ssc_post
11790 *
11791 * Description: 1. read the driver property to get fm-scsi-log flag.
11792 *              2. print log if fm_log_capable is non-zero.
11793 *              3. call sd_ssc_ereport_post to post ereport if possible.
11794 *
11795 *    Context: May be called from kernel thread or interrupt context.
11796 */
11797static void
11798sd_ssc_post(sd_ssc_t *ssc, enum sd_driver_assessment sd_assess)
11799{
11800	struct sd_lun	*un;
11801	int		sd_severity;
11802
11803	ASSERT(ssc != NULL);
11804	un = ssc->ssc_un;
11805	ASSERT(un != NULL);
11806
11807	/*
11808	 * We may enter here from sd_ssc_assessment(for USCSI command) or
11809	 * by directly called from sdintr context.
11810	 * We don't handle a non-disk drive(CD-ROM, removable media).
11811	 * Clear the ssc_flags before return in case we've set
11812	 * SSC_FLAGS_INVALID_XXX which should be skipped for a non-disk
11813	 * driver.
11814	 */
11815	if (ISCD(un) || un->un_f_has_removable_media) {
11816		ssc->ssc_flags = SSC_FLAGS_UNKNOWN;
11817		return;
11818	}
11819
11820	switch (sd_assess) {
11821		case SD_FM_DRV_FATAL:
11822			sd_severity = SCSI_ERR_FATAL;
11823			break;
11824		case SD_FM_DRV_RECOVERY:
11825			sd_severity = SCSI_ERR_RECOVERED;
11826			break;
11827		case SD_FM_DRV_RETRY:
11828			sd_severity = SCSI_ERR_RETRYABLE;
11829			break;
11830		case SD_FM_DRV_NOTICE:
11831			sd_severity = SCSI_ERR_INFO;
11832			break;
11833		default:
11834			sd_severity = SCSI_ERR_UNKNOWN;
11835	}
11836	/* print log */
11837	sd_ssc_print(ssc, sd_severity);
11838
11839	/* always post ereport */
11840	sd_ssc_ereport_post(ssc, sd_assess);
11841}
11842
11843/*
11844 *    Function: sd_ssc_set_info
11845 *
11846 * Description: Mark ssc_flags and set ssc_info which would be the
11847 *              payload of uderr ereport. This function will cause
11848 *              sd_ssc_ereport_post to post uderr ereport only.
11849 *              Besides, when ssc_flags == SSC_FLAGS_INVALID_DATA(USCSI),
11850 *              the function will also call SD_ERROR or scsi_log for a
11851 *              CDROM/removable-media/DDI_FM_NOT_CAPABLE device.
11852 *
11853 * Arguments: ssc - the struct of sd_ssc_t will bring uscsi_cmd and
11854 *                  sd_uscsi_info in.
11855 *            ssc_flags - indicate the sub-category of a uderr.
11856 *            comp - this argument is meaningful only when
11857 *                   ssc_flags == SSC_FLAGS_INVALID_DATA, and its possible
11858 *                   values include:
11859 *                   > 0, SD_ERROR is used with comp as the driver logging
11860 *                   component;
11861 *                   = 0, scsi-log is used to log error telemetries;
11862 *                   < 0, no log available for this telemetry.
11863 *
11864 *    Context: Kernel thread or interrupt context
11865 */
11866static void
11867sd_ssc_set_info(sd_ssc_t *ssc, int ssc_flags, uint_t comp, const char *fmt, ...)
11868{
11869	va_list	ap;
11870
11871	ASSERT(ssc != NULL);
11872	ASSERT(ssc->ssc_un != NULL);
11873
11874	ssc->ssc_flags |= ssc_flags;
11875	va_start(ap, fmt);
11876	(void) vsnprintf(ssc->ssc_info, sizeof (ssc->ssc_info), fmt, ap);
11877	va_end(ap);
11878
11879	/*
11880	 * If SSC_FLAGS_INVALID_DATA is set, it should be a uscsi command
11881	 * with invalid data sent back. For non-uscsi command, the
11882	 * following code will be bypassed.
11883	 */
11884	if (ssc_flags & SSC_FLAGS_INVALID_DATA) {
11885		if (SD_FM_LOG(ssc->ssc_un) == SD_FM_LOG_NSUP) {
11886			/*
11887			 * If the error belong to certain component and we
11888			 * do not want it to show up on the console, we
11889			 * will use SD_ERROR, otherwise scsi_log is
11890			 * preferred.
11891			 */
11892			if (comp > 0) {
11893				SD_ERROR(comp, ssc->ssc_un, ssc->ssc_info);
11894			} else if (comp == 0) {
11895				scsi_log(SD_DEVINFO(ssc->ssc_un), sd_label,
11896				    CE_WARN, ssc->ssc_info);
11897			}
11898		}
11899	}
11900}
11901
11902/*
11903 *    Function: sd_buf_iodone
11904 *
11905 * Description: Frees the sd_xbuf & returns the buf to its originator.
11906 *
11907 *     Context: May be called from interrupt context.
11908 */
11909/* ARGSUSED */
11910static void
11911sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp)
11912{
11913	struct sd_xbuf *xp;
11914
11915	ASSERT(un != NULL);
11916	ASSERT(bp != NULL);
11917	ASSERT(!mutex_owned(SD_MUTEX(un)));
11918
11919	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: entry.\n");
11920
11921	xp = SD_GET_XBUF(bp);
11922	ASSERT(xp != NULL);
11923
11924	/* xbuf is gone after this */
11925	if (ddi_xbuf_done(bp, un->un_xbuf_attr)) {
11926		mutex_enter(SD_MUTEX(un));
11927
11928		/*
11929		 * Grab time when the cmd completed.
11930		 * This is used for determining if the system has been
11931		 * idle long enough to make it idle to the PM framework.
11932		 * This is for lowering the overhead, and therefore improving
11933		 * performance per I/O operation.
11934		 */
11935		un->un_pm_idle_time = ddi_get_time();
11936
11937		un->un_ncmds_in_driver--;
11938		ASSERT(un->un_ncmds_in_driver >= 0);
11939		SD_INFO(SD_LOG_IO, un,
11940		    "sd_buf_iodone: un_ncmds_in_driver = %ld\n",
11941		    un->un_ncmds_in_driver);
11942
11943		mutex_exit(SD_MUTEX(un));
11944	}
11945
11946	biodone(bp);				/* bp is gone after this */
11947
11948	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: exit.\n");
11949}
11950
11951
11952/*
11953 *    Function: sd_uscsi_iodone
11954 *
11955 * Description: Frees the sd_xbuf & returns the buf to its originator.
11956 *
11957 *     Context: May be called from interrupt context.
11958 */
11959/* ARGSUSED */
11960static void
11961sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
11962{
11963	struct sd_xbuf *xp;
11964
11965	ASSERT(un != NULL);
11966	ASSERT(bp != NULL);
11967
11968	xp = SD_GET_XBUF(bp);
11969	ASSERT(xp != NULL);
11970	ASSERT(!mutex_owned(SD_MUTEX(un)));
11971
11972	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: entry.\n");
11973
11974	bp->b_private = xp->xb_private;
11975
11976	mutex_enter(SD_MUTEX(un));
11977
11978	/*
11979	 * Grab time when the cmd completed.
11980	 * This is used for determining if the system has been
11981	 * idle long enough to make it idle to the PM framework.
11982	 * This is for lowering the overhead, and therefore improving
11983	 * performance per I/O operation.
11984	 */
11985	un->un_pm_idle_time = ddi_get_time();
11986
11987	un->un_ncmds_in_driver--;
11988	ASSERT(un->un_ncmds_in_driver >= 0);
11989	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: un_ncmds_in_driver = %ld\n",
11990	    un->un_ncmds_in_driver);
11991
11992	mutex_exit(SD_MUTEX(un));
11993
11994	if (((struct uscsi_cmd *)(xp->xb_pktinfo))->uscsi_rqlen >
11995	    SENSE_LENGTH) {
11996		kmem_free(xp, sizeof (struct sd_xbuf) - SENSE_LENGTH +
11997		    MAX_SENSE_LENGTH);
11998	} else {
11999		kmem_free(xp, sizeof (struct sd_xbuf));
12000	}
12001
12002	biodone(bp);
12003
12004	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: exit.\n");
12005}
12006
12007
12008/*
12009 *    Function: sd_mapblockaddr_iostart
12010 *
12011 * Description: Verify request lies within the partition limits for
12012 *		the indicated minor device.  Issue "overrun" buf if
12013 *		request would exceed partition range.  Converts
12014 *		partition-relative block address to absolute.
12015 *
12016 *     Context: Can sleep
12017 *
12018 *      Issues: This follows what the old code did, in terms of accessing
12019 *		some of the partition info in the unit struct without holding
12020 *		the mutext.  This is a general issue, if the partition info
12021 *		can be altered while IO is in progress... as soon as we send
12022 *		a buf, its partitioning can be invalid before it gets to the
12023 *		device.  Probably the right fix is to move partitioning out
12024 *		of the driver entirely.
12025 */
12026
12027static void
12028sd_mapblockaddr_iostart(int index, struct sd_lun *un, struct buf *bp)
12029{
12030	diskaddr_t	nblocks;	/* #blocks in the given partition */
12031	daddr_t	blocknum;	/* Block number specified by the buf */
12032	size_t	requested_nblocks;
12033	size_t	available_nblocks;
12034	int	partition;
12035	diskaddr_t	partition_offset;
12036	struct sd_xbuf *xp;
12037
12038	ASSERT(un != NULL);
12039	ASSERT(bp != NULL);
12040	ASSERT(!mutex_owned(SD_MUTEX(un)));
12041
12042	SD_TRACE(SD_LOG_IO_PARTITION, un,
12043	    "sd_mapblockaddr_iostart: entry: buf:0x%p\n", bp);
12044
12045	xp = SD_GET_XBUF(bp);
12046	ASSERT(xp != NULL);
12047
12048	/*
12049	 * If the geometry is not indicated as valid, attempt to access
12050	 * the unit & verify the geometry/label. This can be the case for
12051	 * removable-media devices, of if the device was opened in
12052	 * NDELAY/NONBLOCK mode.
12053	 */
12054	partition = SDPART(bp->b_edev);
12055
12056	if (!SD_IS_VALID_LABEL(un)) {
12057		sd_ssc_t *ssc;
12058		/*
12059		 * Initialize sd_ssc_t for internal uscsi commands
12060		 * In case of potential porformance issue, we need
12061		 * to alloc memory only if there is invalid label
12062		 */
12063		ssc = sd_ssc_init(un);
12064
12065		if (sd_ready_and_valid(ssc, partition) != SD_READY_VALID) {
12066			/*
12067			 * For removable devices it is possible to start an
12068			 * I/O without a media by opening the device in nodelay
12069			 * mode. Also for writable CDs there can be many
12070			 * scenarios where there is no geometry yet but volume
12071			 * manager is trying to issue a read() just because
12072			 * it can see TOC on the CD. So do not print a message
12073			 * for removables.
12074			 */
12075			if (!un->un_f_has_removable_media) {
12076				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
12077				    "i/o to invalid geometry\n");
12078			}
12079			bioerror(bp, EIO);
12080			bp->b_resid = bp->b_bcount;
12081			SD_BEGIN_IODONE(index, un, bp);
12082
12083			sd_ssc_fini(ssc);
12084			return;
12085		}
12086		sd_ssc_fini(ssc);
12087	}
12088
12089	nblocks = 0;
12090	(void) cmlb_partinfo(un->un_cmlbhandle, partition,
12091	    &nblocks, &partition_offset, NULL, NULL, (void *)SD_PATH_DIRECT);
12092
12093	/*
12094	 * blocknum is the starting block number of the request. At this
12095	 * point it is still relative to the start of the minor device.
12096	 */
12097	blocknum = xp->xb_blkno;
12098
12099	/*
12100	 * Legacy: If the starting block number is one past the last block
12101	 * in the partition, do not set B_ERROR in the buf.
12102	 */
12103	if (blocknum == nblocks)  {
12104		goto error_exit;
12105	}
12106
12107	/*
12108	 * Confirm that the first block of the request lies within the
12109	 * partition limits. Also the requested number of bytes must be
12110	 * a multiple of the system block size.
12111	 */
12112	if ((blocknum < 0) || (blocknum >= nblocks) ||
12113	    ((bp->b_bcount & (un->un_sys_blocksize - 1)) != 0)) {
12114		bp->b_flags |= B_ERROR;
12115		goto error_exit;
12116	}
12117
12118	/*
12119	 * If the requsted # blocks exceeds the available # blocks, that
12120	 * is an overrun of the partition.
12121	 */
12122	requested_nblocks = SD_BYTES2SYSBLOCKS(un, bp->b_bcount);
12123	available_nblocks = (size_t)(nblocks - blocknum);
12124	ASSERT(nblocks >= blocknum);
12125
12126	if (requested_nblocks > available_nblocks) {
12127		/*
12128		 * Allocate an "overrun" buf to allow the request to proceed
12129		 * for the amount of space available in the partition. The
12130		 * amount not transferred will be added into the b_resid
12131		 * when the operation is complete. The overrun buf
12132		 * replaces the original buf here, and the original buf
12133		 * is saved inside the overrun buf, for later use.
12134		 */
12135		size_t resid = SD_SYSBLOCKS2BYTES(un,
12136		    (offset_t)(requested_nblocks - available_nblocks));
12137		size_t count = bp->b_bcount - resid;
12138		/*
12139		 * Note: count is an unsigned entity thus it'll NEVER
12140		 * be less than 0 so ASSERT the original values are
12141		 * correct.
12142		 */
12143		ASSERT(bp->b_bcount >= resid);
12144
12145		bp = sd_bioclone_alloc(bp, count, blocknum,
12146		    (int (*)(struct buf *)) sd_mapblockaddr_iodone);
12147		xp = SD_GET_XBUF(bp); /* Update for 'new' bp! */
12148		ASSERT(xp != NULL);
12149	}
12150
12151	/* At this point there should be no residual for this buf. */
12152	ASSERT(bp->b_resid == 0);
12153
12154	/* Convert the block number to an absolute address. */
12155	xp->xb_blkno += partition_offset;
12156
12157	SD_NEXT_IOSTART(index, un, bp);
12158
12159	SD_TRACE(SD_LOG_IO_PARTITION, un,
12160	    "sd_mapblockaddr_iostart: exit 0: buf:0x%p\n", bp);
12161
12162	return;
12163
12164error_exit:
12165	bp->b_resid = bp->b_bcount;
12166	SD_BEGIN_IODONE(index, un, bp);
12167	SD_TRACE(SD_LOG_IO_PARTITION, un,
12168	    "sd_mapblockaddr_iostart: exit 1: buf:0x%p\n", bp);
12169}
12170
12171
12172/*
12173 *    Function: sd_mapblockaddr_iodone
12174 *
12175 * Description: Completion-side processing for partition management.
12176 *
12177 *     Context: May be called under interrupt context
12178 */
12179
12180static void
12181sd_mapblockaddr_iodone(int index, struct sd_lun *un, struct buf *bp)
12182{
12183	/* int	partition; */	/* Not used, see below. */
12184	ASSERT(un != NULL);
12185	ASSERT(bp != NULL);
12186	ASSERT(!mutex_owned(SD_MUTEX(un)));
12187
12188	SD_TRACE(SD_LOG_IO_PARTITION, un,
12189	    "sd_mapblockaddr_iodone: entry: buf:0x%p\n", bp);
12190
12191	if (bp->b_iodone == (int (*)(struct buf *)) sd_mapblockaddr_iodone) {
12192		/*
12193		 * We have an "overrun" buf to deal with...
12194		 */
12195		struct sd_xbuf	*xp;
12196		struct buf	*obp;	/* ptr to the original buf */
12197
12198		xp = SD_GET_XBUF(bp);
12199		ASSERT(xp != NULL);
12200
12201		/* Retrieve the pointer to the original buf */
12202		obp = (struct buf *)xp->xb_private;
12203		ASSERT(obp != NULL);
12204
12205		obp->b_resid = obp->b_bcount - (bp->b_bcount - bp->b_resid);
12206		bioerror(obp, bp->b_error);
12207
12208		sd_bioclone_free(bp);
12209
12210		/*
12211		 * Get back the original buf.
12212		 * Note that since the restoration of xb_blkno below
12213		 * was removed, the sd_xbuf is not needed.
12214		 */
12215		bp = obp;
12216		/*
12217		 * xp = SD_GET_XBUF(bp);
12218		 * ASSERT(xp != NULL);
12219		 */
12220	}
12221
12222	/*
12223	 * Convert sd->xb_blkno back to a minor-device relative value.
12224	 * Note: this has been commented out, as it is not needed in the
12225	 * current implementation of the driver (ie, since this function
12226	 * is at the top of the layering chains, so the info will be
12227	 * discarded) and it is in the "hot" IO path.
12228	 *
12229	 * partition = getminor(bp->b_edev) & SDPART_MASK;
12230	 * xp->xb_blkno -= un->un_offset[partition];
12231	 */
12232
12233	SD_NEXT_IODONE(index, un, bp);
12234
12235	SD_TRACE(SD_LOG_IO_PARTITION, un,
12236	    "sd_mapblockaddr_iodone: exit: buf:0x%p\n", bp);
12237}
12238
12239
12240/*
12241 *    Function: sd_mapblocksize_iostart
12242 *
12243 * Description: Convert between system block size (un->un_sys_blocksize)
12244 *		and target block size (un->un_tgt_blocksize).
12245 *
12246 *     Context: Can sleep to allocate resources.
12247 *
12248 * Assumptions: A higher layer has already performed any partition validation,
12249 *		and converted the xp->xb_blkno to an absolute value relative
12250 *		to the start of the device.
12251 *
12252 *		It is also assumed that the higher layer has implemented
12253 *		an "overrun" mechanism for the case where the request would
12254 *		read/write beyond the end of a partition.  In this case we
12255 *		assume (and ASSERT) that bp->b_resid == 0.
12256 *
12257 *		Note: The implementation for this routine assumes the target
12258 *		block size remains constant between allocation and transport.
12259 */
12260
12261static void
12262sd_mapblocksize_iostart(int index, struct sd_lun *un, struct buf *bp)
12263{
12264	struct sd_mapblocksize_info	*bsp;
12265	struct sd_xbuf			*xp;
12266	offset_t first_byte;
12267	daddr_t	start_block, end_block;
12268	daddr_t	request_bytes;
12269	ushort_t is_aligned = FALSE;
12270
12271	ASSERT(un != NULL);
12272	ASSERT(bp != NULL);
12273	ASSERT(!mutex_owned(SD_MUTEX(un)));
12274	ASSERT(bp->b_resid == 0);
12275
12276	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12277	    "sd_mapblocksize_iostart: entry: buf:0x%p\n", bp);
12278
12279	/*
12280	 * For a non-writable CD, a write request is an error
12281	 */
12282	if (ISCD(un) && ((bp->b_flags & B_READ) == 0) &&
12283	    (un->un_f_mmc_writable_media == FALSE)) {
12284		bioerror(bp, EIO);
12285		bp->b_resid = bp->b_bcount;
12286		SD_BEGIN_IODONE(index, un, bp);
12287		return;
12288	}
12289
12290	/*
12291	 * We do not need a shadow buf if the device is using
12292	 * un->un_sys_blocksize as its block size or if bcount == 0.
12293	 * In this case there is no layer-private data block allocated.
12294	 */
12295	if ((un->un_tgt_blocksize == un->un_sys_blocksize) ||
12296	    (bp->b_bcount == 0)) {
12297		goto done;
12298	}
12299
12300#if defined(__i386) || defined(__amd64)
12301	/* We do not support non-block-aligned transfers for ROD devices */
12302	ASSERT(!ISROD(un));
12303#endif
12304
12305	xp = SD_GET_XBUF(bp);
12306	ASSERT(xp != NULL);
12307
12308	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12309	    "tgt_blocksize:0x%x sys_blocksize: 0x%x\n",
12310	    un->un_tgt_blocksize, un->un_sys_blocksize);
12311	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12312	    "request start block:0x%x\n", xp->xb_blkno);
12313	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12314	    "request len:0x%x\n", bp->b_bcount);
12315
12316	/*
12317	 * Allocate the layer-private data area for the mapblocksize layer.
12318	 * Layers are allowed to use the xp_private member of the sd_xbuf
12319	 * struct to store the pointer to their layer-private data block, but
12320	 * each layer also has the responsibility of restoring the prior
12321	 * contents of xb_private before returning the buf/xbuf to the
12322	 * higher layer that sent it.
12323	 *
12324	 * Here we save the prior contents of xp->xb_private into the
12325	 * bsp->mbs_oprivate field of our layer-private data area. This value
12326	 * is restored by sd_mapblocksize_iodone() just prior to freeing up
12327	 * the layer-private area and returning the buf/xbuf to the layer
12328	 * that sent it.
12329	 *
12330	 * Note that here we use kmem_zalloc for the allocation as there are
12331	 * parts of the mapblocksize code that expect certain fields to be
12332	 * zero unless explicitly set to a required value.
12333	 */
12334	bsp = kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
12335	bsp->mbs_oprivate = xp->xb_private;
12336	xp->xb_private = bsp;
12337
12338	/*
12339	 * This treats the data on the disk (target) as an array of bytes.
12340	 * first_byte is the byte offset, from the beginning of the device,
12341	 * to the location of the request. This is converted from a
12342	 * un->un_sys_blocksize block address to a byte offset, and then back
12343	 * to a block address based upon a un->un_tgt_blocksize block size.
12344	 *
12345	 * xp->xb_blkno should be absolute upon entry into this function,
12346	 * but, but it is based upon partitions that use the "system"
12347	 * block size. It must be adjusted to reflect the block size of
12348	 * the target.
12349	 *
12350	 * Note that end_block is actually the block that follows the last
12351	 * block of the request, but that's what is needed for the computation.
12352	 */
12353	first_byte  = SD_SYSBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
12354	start_block = xp->xb_blkno = first_byte / un->un_tgt_blocksize;
12355	end_block   = (first_byte + bp->b_bcount + un->un_tgt_blocksize - 1) /
12356	    un->un_tgt_blocksize;
12357
12358	/* request_bytes is rounded up to a multiple of the target block size */
12359	request_bytes = (end_block - start_block) * un->un_tgt_blocksize;
12360
12361	/*
12362	 * See if the starting address of the request and the request
12363	 * length are aligned on a un->un_tgt_blocksize boundary. If aligned
12364	 * then we do not need to allocate a shadow buf to handle the request.
12365	 */
12366	if (((first_byte   % un->un_tgt_blocksize) == 0) &&
12367	    ((bp->b_bcount % un->un_tgt_blocksize) == 0)) {
12368		is_aligned = TRUE;
12369	}
12370
12371	if ((bp->b_flags & B_READ) == 0) {
12372		/*
12373		 * Lock the range for a write operation. An aligned request is
12374		 * considered a simple write; otherwise the request must be a
12375		 * read-modify-write.
12376		 */
12377		bsp->mbs_wmp = sd_range_lock(un, start_block, end_block - 1,
12378		    (is_aligned == TRUE) ? SD_WTYPE_SIMPLE : SD_WTYPE_RMW);
12379	}
12380
12381	/*
12382	 * Alloc a shadow buf if the request is not aligned. Also, this is
12383	 * where the READ command is generated for a read-modify-write. (The
12384	 * write phase is deferred until after the read completes.)
12385	 */
12386	if (is_aligned == FALSE) {
12387
12388		struct sd_mapblocksize_info	*shadow_bsp;
12389		struct sd_xbuf	*shadow_xp;
12390		struct buf	*shadow_bp;
12391
12392		/*
12393		 * Allocate the shadow buf and it associated xbuf. Note that
12394		 * after this call the xb_blkno value in both the original
12395		 * buf's sd_xbuf _and_ the shadow buf's sd_xbuf will be the
12396		 * same: absolute relative to the start of the device, and
12397		 * adjusted for the target block size. The b_blkno in the
12398		 * shadow buf will also be set to this value. We should never
12399		 * change b_blkno in the original bp however.
12400		 *
12401		 * Note also that the shadow buf will always need to be a
12402		 * READ command, regardless of whether the incoming command
12403		 * is a READ or a WRITE.
12404		 */
12405		shadow_bp = sd_shadow_buf_alloc(bp, request_bytes, B_READ,
12406		    xp->xb_blkno,
12407		    (int (*)(struct buf *)) sd_mapblocksize_iodone);
12408
12409		shadow_xp = SD_GET_XBUF(shadow_bp);
12410
12411		/*
12412		 * Allocate the layer-private data for the shadow buf.
12413		 * (No need to preserve xb_private in the shadow xbuf.)
12414		 */
12415		shadow_xp->xb_private = shadow_bsp =
12416		    kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
12417
12418		/*
12419		 * bsp->mbs_copy_offset is used later by sd_mapblocksize_iodone
12420		 * to figure out where the start of the user data is (based upon
12421		 * the system block size) in the data returned by the READ
12422		 * command (which will be based upon the target blocksize). Note
12423		 * that this is only really used if the request is unaligned.
12424		 */
12425		bsp->mbs_copy_offset = (ssize_t)(first_byte -
12426		    ((offset_t)xp->xb_blkno * un->un_tgt_blocksize));
12427		ASSERT((bsp->mbs_copy_offset >= 0) &&
12428		    (bsp->mbs_copy_offset < un->un_tgt_blocksize));
12429
12430		shadow_bsp->mbs_copy_offset = bsp->mbs_copy_offset;
12431
12432		shadow_bsp->mbs_layer_index = bsp->mbs_layer_index = index;
12433
12434		/* Transfer the wmap (if any) to the shadow buf */
12435		shadow_bsp->mbs_wmp = bsp->mbs_wmp;
12436		bsp->mbs_wmp = NULL;
12437
12438		/*
12439		 * The shadow buf goes on from here in place of the
12440		 * original buf.
12441		 */
12442		shadow_bsp->mbs_orig_bp = bp;
12443		bp = shadow_bp;
12444	}
12445
12446	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12447	    "sd_mapblocksize_iostart: tgt start block:0x%x\n", xp->xb_blkno);
12448	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12449	    "sd_mapblocksize_iostart: tgt request len:0x%x\n",
12450	    request_bytes);
12451	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12452	    "sd_mapblocksize_iostart: shadow buf:0x%x\n", bp);
12453
12454done:
12455	SD_NEXT_IOSTART(index, un, bp);
12456
12457	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12458	    "sd_mapblocksize_iostart: exit: buf:0x%p\n", bp);
12459}
12460
12461
12462/*
12463 *    Function: sd_mapblocksize_iodone
12464 *
12465 * Description: Completion side processing for block-size mapping.
12466 *
12467 *     Context: May be called under interrupt context
12468 */
12469
12470static void
12471sd_mapblocksize_iodone(int index, struct sd_lun *un, struct buf *bp)
12472{
12473	struct sd_mapblocksize_info	*bsp;
12474	struct sd_xbuf	*xp;
12475	struct sd_xbuf	*orig_xp;	/* sd_xbuf for the original buf */
12476	struct buf	*orig_bp;	/* ptr to the original buf */
12477	offset_t	shadow_end;
12478	offset_t	request_end;
12479	offset_t	shadow_start;
12480	ssize_t		copy_offset;
12481	size_t		copy_length;
12482	size_t		shortfall;
12483	uint_t		is_write;	/* TRUE if this bp is a WRITE */
12484	uint_t		has_wmap;	/* TRUE is this bp has a wmap */
12485
12486	ASSERT(un != NULL);
12487	ASSERT(bp != NULL);
12488
12489	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12490	    "sd_mapblocksize_iodone: entry: buf:0x%p\n", bp);
12491
12492	/*
12493	 * There is no shadow buf or layer-private data if the target is
12494	 * using un->un_sys_blocksize as its block size or if bcount == 0.
12495	 */
12496	if ((un->un_tgt_blocksize == un->un_sys_blocksize) ||
12497	    (bp->b_bcount == 0)) {
12498		goto exit;
12499	}
12500
12501	xp = SD_GET_XBUF(bp);
12502	ASSERT(xp != NULL);
12503
12504	/* Retrieve the pointer to the layer-private data area from the xbuf. */
12505	bsp = xp->xb_private;
12506
12507	is_write = ((bp->b_flags & B_READ) == 0) ? TRUE : FALSE;
12508	has_wmap = (bsp->mbs_wmp != NULL) ? TRUE : FALSE;
12509
12510	if (is_write) {
12511		/*
12512		 * For a WRITE request we must free up the block range that
12513		 * we have locked up.  This holds regardless of whether this is
12514		 * an aligned write request or a read-modify-write request.
12515		 */
12516		sd_range_unlock(un, bsp->mbs_wmp);
12517		bsp->mbs_wmp = NULL;
12518	}
12519
12520	if ((bp->b_iodone != (int(*)(struct buf *))sd_mapblocksize_iodone)) {
12521		/*
12522		 * An aligned read or write command will have no shadow buf;
12523		 * there is not much else to do with it.
12524		 */
12525		goto done;
12526	}
12527
12528	orig_bp = bsp->mbs_orig_bp;
12529	ASSERT(orig_bp != NULL);
12530	orig_xp = SD_GET_XBUF(orig_bp);
12531	ASSERT(orig_xp != NULL);
12532	ASSERT(!mutex_owned(SD_MUTEX(un)));
12533
12534	if (!is_write && has_wmap) {
12535		/*
12536		 * A READ with a wmap means this is the READ phase of a
12537		 * read-modify-write. If an error occurred on the READ then
12538		 * we do not proceed with the WRITE phase or copy any data.
12539		 * Just release the write maps and return with an error.
12540		 */
12541		if ((bp->b_resid != 0) || (bp->b_error != 0)) {
12542			orig_bp->b_resid = orig_bp->b_bcount;
12543			bioerror(orig_bp, bp->b_error);
12544			sd_range_unlock(un, bsp->mbs_wmp);
12545			goto freebuf_done;
12546		}
12547	}
12548
12549	/*
12550	 * Here is where we set up to copy the data from the shadow buf
12551	 * into the space associated with the original buf.
12552	 *
12553	 * To deal with the conversion between block sizes, these
12554	 * computations treat the data as an array of bytes, with the
12555	 * first byte (byte 0) corresponding to the first byte in the
12556	 * first block on the disk.
12557	 */
12558
12559	/*
12560	 * shadow_start and shadow_len indicate the location and size of
12561	 * the data returned with the shadow IO request.
12562	 */
12563	shadow_start  = SD_TGTBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
12564	shadow_end    = shadow_start + bp->b_bcount - bp->b_resid;
12565
12566	/*
12567	 * copy_offset gives the offset (in bytes) from the start of the first
12568	 * block of the READ request to the beginning of the data.  We retrieve
12569	 * this value from xb_pktp in the ORIGINAL xbuf, as it has been saved
12570	 * there by sd_mapblockize_iostart(). copy_length gives the amount of
12571	 * data to be copied (in bytes).
12572	 */
12573	copy_offset  = bsp->mbs_copy_offset;
12574	ASSERT((copy_offset >= 0) && (copy_offset < un->un_tgt_blocksize));
12575	copy_length  = orig_bp->b_bcount;
12576	request_end  = shadow_start + copy_offset + orig_bp->b_bcount;
12577
12578	/*
12579	 * Set up the resid and error fields of orig_bp as appropriate.
12580	 */
12581	if (shadow_end >= request_end) {
12582		/* We got all the requested data; set resid to zero */
12583		orig_bp->b_resid = 0;
12584	} else {
12585		/*
12586		 * We failed to get enough data to fully satisfy the original
12587		 * request. Just copy back whatever data we got and set
12588		 * up the residual and error code as required.
12589		 *
12590		 * 'shortfall' is the amount by which the data received with the
12591		 * shadow buf has "fallen short" of the requested amount.
12592		 */
12593		shortfall = (size_t)(request_end - shadow_end);
12594
12595		if (shortfall > orig_bp->b_bcount) {
12596			/*
12597			 * We did not get enough data to even partially
12598			 * fulfill the original request.  The residual is
12599			 * equal to the amount requested.
12600			 */
12601			orig_bp->b_resid = orig_bp->b_bcount;
12602		} else {
12603			/*
12604			 * We did not get all the data that we requested
12605			 * from the device, but we will try to return what
12606			 * portion we did get.
12607			 */
12608			orig_bp->b_resid = shortfall;
12609		}
12610		ASSERT(copy_length >= orig_bp->b_resid);
12611		copy_length  -= orig_bp->b_resid;
12612	}
12613
12614	/* Propagate the error code from the shadow buf to the original buf */
12615	bioerror(orig_bp, bp->b_error);
12616
12617	if (is_write) {
12618		goto freebuf_done;	/* No data copying for a WRITE */
12619	}
12620
12621	if (has_wmap) {
12622		/*
12623		 * This is a READ command from the READ phase of a
12624		 * read-modify-write request. We have to copy the data given
12625		 * by the user OVER the data returned by the READ command,
12626		 * then convert the command from a READ to a WRITE and send
12627		 * it back to the target.
12628		 */
12629		bcopy(orig_bp->b_un.b_addr, bp->b_un.b_addr + copy_offset,
12630		    copy_length);
12631
12632		bp->b_flags &= ~((int)B_READ);	/* Convert to a WRITE */
12633
12634		/*
12635		 * Dispatch the WRITE command to the taskq thread, which
12636		 * will in turn send the command to the target. When the
12637		 * WRITE command completes, we (sd_mapblocksize_iodone())
12638		 * will get called again as part of the iodone chain
12639		 * processing for it. Note that we will still be dealing
12640		 * with the shadow buf at that point.
12641		 */
12642		if (taskq_dispatch(sd_wmr_tq, sd_read_modify_write_task, bp,
12643		    KM_NOSLEEP) != 0) {
12644			/*
12645			 * Dispatch was successful so we are done. Return
12646			 * without going any higher up the iodone chain. Do
12647			 * not free up any layer-private data until after the
12648			 * WRITE completes.
12649			 */
12650			return;
12651		}
12652
12653		/*
12654		 * Dispatch of the WRITE command failed; set up the error
12655		 * condition and send this IO back up the iodone chain.
12656		 */
12657		bioerror(orig_bp, EIO);
12658		orig_bp->b_resid = orig_bp->b_bcount;
12659
12660	} else {
12661		/*
12662		 * This is a regular READ request (ie, not a RMW). Copy the
12663		 * data from the shadow buf into the original buf. The
12664		 * copy_offset compensates for any "misalignment" between the
12665		 * shadow buf (with its un->un_tgt_blocksize blocks) and the
12666		 * original buf (with its un->un_sys_blocksize blocks).
12667		 */
12668		bcopy(bp->b_un.b_addr + copy_offset, orig_bp->b_un.b_addr,
12669		    copy_length);
12670	}
12671
12672freebuf_done:
12673
12674	/*
12675	 * At this point we still have both the shadow buf AND the original
12676	 * buf to deal with, as well as the layer-private data area in each.
12677	 * Local variables are as follows:
12678	 *
12679	 * bp -- points to shadow buf
12680	 * xp -- points to xbuf of shadow buf
12681	 * bsp -- points to layer-private data area of shadow buf
12682	 * orig_bp -- points to original buf
12683	 *
12684	 * First free the shadow buf and its associated xbuf, then free the
12685	 * layer-private data area from the shadow buf. There is no need to
12686	 * restore xb_private in the shadow xbuf.
12687	 */
12688	sd_shadow_buf_free(bp);
12689	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
12690
12691	/*
12692	 * Now update the local variables to point to the original buf, xbuf,
12693	 * and layer-private area.
12694	 */
12695	bp = orig_bp;
12696	xp = SD_GET_XBUF(bp);
12697	ASSERT(xp != NULL);
12698	ASSERT(xp == orig_xp);
12699	bsp = xp->xb_private;
12700	ASSERT(bsp != NULL);
12701
12702done:
12703	/*
12704	 * Restore xb_private to whatever it was set to by the next higher
12705	 * layer in the chain, then free the layer-private data area.
12706	 */
12707	xp->xb_private = bsp->mbs_oprivate;
12708	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
12709
12710exit:
12711	SD_TRACE(SD_LOG_IO_RMMEDIA, SD_GET_UN(bp),
12712	    "sd_mapblocksize_iodone: calling SD_NEXT_IODONE: buf:0x%p\n", bp);
12713
12714	SD_NEXT_IODONE(index, un, bp);
12715}
12716
12717
12718/*
12719 *    Function: sd_checksum_iostart
12720 *
12721 * Description: A stub function for a layer that's currently not used.
12722 *		For now just a placeholder.
12723 *
12724 *     Context: Kernel thread context
12725 */
12726
12727static void
12728sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp)
12729{
12730	ASSERT(un != NULL);
12731	ASSERT(bp != NULL);
12732	ASSERT(!mutex_owned(SD_MUTEX(un)));
12733	SD_NEXT_IOSTART(index, un, bp);
12734}
12735
12736
12737/*
12738 *    Function: sd_checksum_iodone
12739 *
12740 * Description: A stub function for a layer that's currently not used.
12741 *		For now just a placeholder.
12742 *
12743 *     Context: May be called under interrupt context
12744 */
12745
12746static void
12747sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp)
12748{
12749	ASSERT(un != NULL);
12750	ASSERT(bp != NULL);
12751	ASSERT(!mutex_owned(SD_MUTEX(un)));
12752	SD_NEXT_IODONE(index, un, bp);
12753}
12754
12755
12756/*
12757 *    Function: sd_checksum_uscsi_iostart
12758 *
12759 * Description: A stub function for a layer that's currently not used.
12760 *		For now just a placeholder.
12761 *
12762 *     Context: Kernel thread context
12763 */
12764
12765static void
12766sd_checksum_uscsi_iostart(int index, struct sd_lun *un, struct buf *bp)
12767{
12768	ASSERT(un != NULL);
12769	ASSERT(bp != NULL);
12770	ASSERT(!mutex_owned(SD_MUTEX(un)));
12771	SD_NEXT_IOSTART(index, un, bp);
12772}
12773
12774
12775/*
12776 *    Function: sd_checksum_uscsi_iodone
12777 *
12778 * Description: A stub function for a layer that's currently not used.
12779 *		For now just a placeholder.
12780 *
12781 *     Context: May be called under interrupt context
12782 */
12783
12784static void
12785sd_checksum_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
12786{
12787	ASSERT(un != NULL);
12788	ASSERT(bp != NULL);
12789	ASSERT(!mutex_owned(SD_MUTEX(un)));
12790	SD_NEXT_IODONE(index, un, bp);
12791}
12792
12793
12794/*
12795 *    Function: sd_pm_iostart
12796 *
12797 * Description: iostart-side routine for Power mangement.
12798 *
12799 *     Context: Kernel thread context
12800 */
12801
12802static void
12803sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp)
12804{
12805	ASSERT(un != NULL);
12806	ASSERT(bp != NULL);
12807	ASSERT(!mutex_owned(SD_MUTEX(un)));
12808	ASSERT(!mutex_owned(&un->un_pm_mutex));
12809
12810	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: entry\n");
12811
12812	if (sd_pm_entry(un) != DDI_SUCCESS) {
12813		/*
12814		 * Set up to return the failed buf back up the 'iodone'
12815		 * side of the calling chain.
12816		 */
12817		bioerror(bp, EIO);
12818		bp->b_resid = bp->b_bcount;
12819
12820		SD_BEGIN_IODONE(index, un, bp);
12821
12822		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
12823		return;
12824	}
12825
12826	SD_NEXT_IOSTART(index, un, bp);
12827
12828	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
12829}
12830
12831
12832/*
12833 *    Function: sd_pm_iodone
12834 *
12835 * Description: iodone-side routine for power mangement.
12836 *
12837 *     Context: may be called from interrupt context
12838 */
12839
12840static void
12841sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp)
12842{
12843	ASSERT(un != NULL);
12844	ASSERT(bp != NULL);
12845	ASSERT(!mutex_owned(&un->un_pm_mutex));
12846
12847	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: entry\n");
12848
12849	/*
12850	 * After attach the following flag is only read, so don't
12851	 * take the penalty of acquiring a mutex for it.
12852	 */
12853	if (un->un_f_pm_is_enabled == TRUE) {
12854		sd_pm_exit(un);
12855	}
12856
12857	SD_NEXT_IODONE(index, un, bp);
12858
12859	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: exit\n");
12860}
12861
12862
12863/*
12864 *    Function: sd_core_iostart
12865 *
12866 * Description: Primary driver function for enqueuing buf(9S) structs from
12867 *		the system and initiating IO to the target device
12868 *
12869 *     Context: Kernel thread context. Can sleep.
12870 *
12871 * Assumptions:  - The given xp->xb_blkno is absolute
12872 *		   (ie, relative to the start of the device).
12873 *		 - The IO is to be done using the native blocksize of
12874 *		   the device, as specified in un->un_tgt_blocksize.
12875 */
12876/* ARGSUSED */
12877static void
12878sd_core_iostart(int index, struct sd_lun *un, struct buf *bp)
12879{
12880	struct sd_xbuf *xp;
12881
12882	ASSERT(un != NULL);
12883	ASSERT(bp != NULL);
12884	ASSERT(!mutex_owned(SD_MUTEX(un)));
12885	ASSERT(bp->b_resid == 0);
12886
12887	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: entry: bp:0x%p\n", bp);
12888
12889	xp = SD_GET_XBUF(bp);
12890	ASSERT(xp != NULL);
12891
12892	mutex_enter(SD_MUTEX(un));
12893
12894	/*
12895	 * If we are currently in the failfast state, fail any new IO
12896	 * that has B_FAILFAST set, then return.
12897	 */
12898	if ((bp->b_flags & B_FAILFAST) &&
12899	    (un->un_failfast_state == SD_FAILFAST_ACTIVE)) {
12900		mutex_exit(SD_MUTEX(un));
12901		bioerror(bp, EIO);
12902		bp->b_resid = bp->b_bcount;
12903		SD_BEGIN_IODONE(index, un, bp);
12904		return;
12905	}
12906
12907	if (SD_IS_DIRECT_PRIORITY(xp)) {
12908		/*
12909		 * Priority command -- transport it immediately.
12910		 *
12911		 * Note: We may want to assert that USCSI_DIAGNOSE is set,
12912		 * because all direct priority commands should be associated
12913		 * with error recovery actions which we don't want to retry.
12914		 */
12915		sd_start_cmds(un, bp);
12916	} else {
12917		/*
12918		 * Normal command -- add it to the wait queue, then start
12919		 * transporting commands from the wait queue.
12920		 */
12921		sd_add_buf_to_waitq(un, bp);
12922		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
12923		sd_start_cmds(un, NULL);
12924	}
12925
12926	mutex_exit(SD_MUTEX(un));
12927
12928	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: exit: bp:0x%p\n", bp);
12929}
12930
12931
12932/*
12933 *    Function: sd_init_cdb_limits
12934 *
12935 * Description: This is to handle scsi_pkt initialization differences
12936 *		between the driver platforms.
12937 *
12938 *		Legacy behaviors:
12939 *
12940 *		If the block number or the sector count exceeds the
12941 *		capabilities of a Group 0 command, shift over to a
12942 *		Group 1 command. We don't blindly use Group 1
12943 *		commands because a) some drives (CDC Wren IVs) get a
12944 *		bit confused, and b) there is probably a fair amount
12945 *		of speed difference for a target to receive and decode
12946 *		a 10 byte command instead of a 6 byte command.
12947 *
12948 *		The xfer time difference of 6 vs 10 byte CDBs is
12949 *		still significant so this code is still worthwhile.
12950 *		10 byte CDBs are very inefficient with the fas HBA driver
12951 *		and older disks. Each CDB byte took 1 usec with some
12952 *		popular disks.
12953 *
12954 *     Context: Must be called at attach time
12955 */
12956
12957static void
12958sd_init_cdb_limits(struct sd_lun *un)
12959{
12960	int hba_cdb_limit;
12961
12962	/*
12963	 * Use CDB_GROUP1 commands for most devices except for
12964	 * parallel SCSI fixed drives in which case we get better
12965	 * performance using CDB_GROUP0 commands (where applicable).
12966	 */
12967	un->un_mincdb = SD_CDB_GROUP1;
12968#if !defined(__fibre)
12969	if (!un->un_f_is_fibre && !un->un_f_cfg_is_atapi && !ISROD(un) &&
12970	    !un->un_f_has_removable_media) {
12971		un->un_mincdb = SD_CDB_GROUP0;
12972	}
12973#endif
12974
12975	/*
12976	 * Try to read the max-cdb-length supported by HBA.
12977	 */
12978	un->un_max_hba_cdb = scsi_ifgetcap(SD_ADDRESS(un), "max-cdb-length", 1);
12979	if (0 >= un->un_max_hba_cdb) {
12980		un->un_max_hba_cdb = CDB_GROUP4;
12981		hba_cdb_limit = SD_CDB_GROUP4;
12982	} else if (0 < un->un_max_hba_cdb &&
12983	    un->un_max_hba_cdb < CDB_GROUP1) {
12984		hba_cdb_limit = SD_CDB_GROUP0;
12985	} else if (CDB_GROUP1 <= un->un_max_hba_cdb &&
12986	    un->un_max_hba_cdb < CDB_GROUP5) {
12987		hba_cdb_limit = SD_CDB_GROUP1;
12988	} else if (CDB_GROUP5 <= un->un_max_hba_cdb &&
12989	    un->un_max_hba_cdb < CDB_GROUP4) {
12990		hba_cdb_limit = SD_CDB_GROUP5;
12991	} else {
12992		hba_cdb_limit = SD_CDB_GROUP4;
12993	}
12994
12995	/*
12996	 * Use CDB_GROUP5 commands for removable devices.  Use CDB_GROUP4
12997	 * commands for fixed disks unless we are building for a 32 bit
12998	 * kernel.
12999	 */
13000#ifdef _LP64
13001	un->un_maxcdb = (un->un_f_has_removable_media) ? SD_CDB_GROUP5 :
13002	    min(hba_cdb_limit, SD_CDB_GROUP4);
13003#else
13004	un->un_maxcdb = (un->un_f_has_removable_media) ? SD_CDB_GROUP5 :
13005	    min(hba_cdb_limit, SD_CDB_GROUP1);
13006#endif
13007
13008	un->un_status_len = (int)((un->un_f_arq_enabled == TRUE)
13009	    ? sizeof (struct scsi_arq_status) : 1);
13010	un->un_cmd_timeout = (ushort_t)sd_io_time;
13011	un->un_uscsi_timeout = ((ISCD(un)) ? 2 : 1) * un->un_cmd_timeout;
13012}
13013
13014
13015/*
13016 *    Function: sd_initpkt_for_buf
13017 *
13018 * Description: Allocate and initialize for transport a scsi_pkt struct,
13019 *		based upon the info specified in the given buf struct.
13020 *
13021 *		Assumes the xb_blkno in the request is absolute (ie,
13022 *		relative to the start of the device (NOT partition!).
13023 *		Also assumes that the request is using the native block
13024 *		size of the device (as returned by the READ CAPACITY
13025 *		command).
13026 *
13027 * Return Code: SD_PKT_ALLOC_SUCCESS
13028 *		SD_PKT_ALLOC_FAILURE
13029 *		SD_PKT_ALLOC_FAILURE_NO_DMA
13030 *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13031 *
13032 *     Context: Kernel thread and may be called from software interrupt context
13033 *		as part of a sdrunout callback. This function may not block or
13034 *		call routines that block
13035 */
13036
13037static int
13038sd_initpkt_for_buf(struct buf *bp, struct scsi_pkt **pktpp)
13039{
13040	struct sd_xbuf	*xp;
13041	struct scsi_pkt *pktp = NULL;
13042	struct sd_lun	*un;
13043	size_t		blockcount;
13044	daddr_t		startblock;
13045	int		rval;
13046	int		cmd_flags;
13047
13048	ASSERT(bp != NULL);
13049	ASSERT(pktpp != NULL);
13050	xp = SD_GET_XBUF(bp);
13051	ASSERT(xp != NULL);
13052	un = SD_GET_UN(bp);
13053	ASSERT(un != NULL);
13054	ASSERT(mutex_owned(SD_MUTEX(un)));
13055	ASSERT(bp->b_resid == 0);
13056
13057	SD_TRACE(SD_LOG_IO_CORE, un,
13058	    "sd_initpkt_for_buf: entry: buf:0x%p\n", bp);
13059
13060	mutex_exit(SD_MUTEX(un));
13061
13062#if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13063	if (xp->xb_pkt_flags & SD_XB_DMA_FREED) {
13064		/*
13065		 * Already have a scsi_pkt -- just need DMA resources.
13066		 * We must recompute the CDB in case the mapping returns
13067		 * a nonzero pkt_resid.
13068		 * Note: if this is a portion of a PKT_DMA_PARTIAL transfer
13069		 * that is being retried, the unmap/remap of the DMA resouces
13070		 * will result in the entire transfer starting over again
13071		 * from the very first block.
13072		 */
13073		ASSERT(xp->xb_pktp != NULL);
13074		pktp = xp->xb_pktp;
13075	} else {
13076		pktp = NULL;
13077	}
13078#endif /* __i386 || __amd64 */
13079
13080	startblock = xp->xb_blkno;	/* Absolute block num. */
13081	blockcount = SD_BYTES2TGTBLOCKS(un, bp->b_bcount);
13082
13083	cmd_flags = un->un_pkt_flags | (xp->xb_pkt_flags & SD_XB_INITPKT_MASK);
13084
13085	/*
13086	 * sd_setup_rw_pkt will determine the appropriate CDB group to use,
13087	 * call scsi_init_pkt, and build the CDB.
13088	 */
13089	rval = sd_setup_rw_pkt(un, &pktp, bp,
13090	    cmd_flags, sdrunout, (caddr_t)un,
13091	    startblock, blockcount);
13092
13093	if (rval == 0) {
13094		/*
13095		 * Success.
13096		 *
13097		 * If partial DMA is being used and required for this transfer.
13098		 * set it up here.
13099		 */
13100		if ((un->un_pkt_flags & PKT_DMA_PARTIAL) != 0 &&
13101		    (pktp->pkt_resid != 0)) {
13102
13103			/*
13104			 * Save the CDB length and pkt_resid for the
13105			 * next xfer
13106			 */
13107			xp->xb_dma_resid = pktp->pkt_resid;
13108
13109			/* rezero resid */
13110			pktp->pkt_resid = 0;
13111
13112		} else {
13113			xp->xb_dma_resid = 0;
13114		}
13115
13116		pktp->pkt_flags = un->un_tagflags;
13117		pktp->pkt_time  = un->un_cmd_timeout;
13118		pktp->pkt_comp  = sdintr;
13119
13120		pktp->pkt_private = bp;
13121		*pktpp = pktp;
13122
13123		SD_TRACE(SD_LOG_IO_CORE, un,
13124		    "sd_initpkt_for_buf: exit: buf:0x%p\n", bp);
13125
13126#if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13127		xp->xb_pkt_flags &= ~SD_XB_DMA_FREED;
13128#endif
13129
13130		mutex_enter(SD_MUTEX(un));
13131		return (SD_PKT_ALLOC_SUCCESS);
13132
13133	}
13134
13135	/*
13136	 * SD_PKT_ALLOC_FAILURE is the only expected failure code
13137	 * from sd_setup_rw_pkt.
13138	 */
13139	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
13140
13141	if (rval == SD_PKT_ALLOC_FAILURE) {
13142		*pktpp = NULL;
13143		/*
13144		 * Set the driver state to RWAIT to indicate the driver
13145		 * is waiting on resource allocations. The driver will not
13146		 * suspend, pm_suspend, or detatch while the state is RWAIT.
13147		 */
13148		mutex_enter(SD_MUTEX(un));
13149		New_state(un, SD_STATE_RWAIT);
13150
13151		SD_ERROR(SD_LOG_IO_CORE, un,
13152		    "sd_initpkt_for_buf: No pktp. exit bp:0x%p\n", bp);
13153
13154		if ((bp->b_flags & B_ERROR) != 0) {
13155			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
13156		}
13157		return (SD_PKT_ALLOC_FAILURE);
13158	} else {
13159		/*
13160		 * PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13161		 *
13162		 * This should never happen.  Maybe someone messed with the
13163		 * kernel's minphys?
13164		 */
13165		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13166		    "Request rejected: too large for CDB: "
13167		    "lba:0x%08lx  len:0x%08lx\n", startblock, blockcount);
13168		SD_ERROR(SD_LOG_IO_CORE, un,
13169		    "sd_initpkt_for_buf: No cp. exit bp:0x%p\n", bp);
13170		mutex_enter(SD_MUTEX(un));
13171		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13172
13173	}
13174}
13175
13176
13177/*
13178 *    Function: sd_destroypkt_for_buf
13179 *
13180 * Description: Free the scsi_pkt(9S) for the given bp (buf IO processing).
13181 *
13182 *     Context: Kernel thread or interrupt context
13183 */
13184
13185static void
13186sd_destroypkt_for_buf(struct buf *bp)
13187{
13188	ASSERT(bp != NULL);
13189	ASSERT(SD_GET_UN(bp) != NULL);
13190
13191	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
13192	    "sd_destroypkt_for_buf: entry: buf:0x%p\n", bp);
13193
13194	ASSERT(SD_GET_PKTP(bp) != NULL);
13195	scsi_destroy_pkt(SD_GET_PKTP(bp));
13196
13197	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
13198	    "sd_destroypkt_for_buf: exit: buf:0x%p\n", bp);
13199}
13200
13201/*
13202 *    Function: sd_setup_rw_pkt
13203 *
13204 * Description: Determines appropriate CDB group for the requested LBA
13205 *		and transfer length, calls scsi_init_pkt, and builds
13206 *		the CDB.  Do not use for partial DMA transfers except
13207 *		for the initial transfer since the CDB size must
13208 *		remain constant.
13209 *
13210 *     Context: Kernel thread and may be called from software interrupt
13211 *		context as part of a sdrunout callback. This function may not
13212 *		block or call routines that block
13213 */
13214
13215
13216int
13217sd_setup_rw_pkt(struct sd_lun *un,
13218    struct scsi_pkt **pktpp, struct buf *bp, int flags,
13219    int (*callback)(caddr_t), caddr_t callback_arg,
13220    diskaddr_t lba, uint32_t blockcount)
13221{
13222	struct scsi_pkt *return_pktp;
13223	union scsi_cdb *cdbp;
13224	struct sd_cdbinfo *cp = NULL;
13225	int i;
13226
13227	/*
13228	 * See which size CDB to use, based upon the request.
13229	 */
13230	for (i = un->un_mincdb; i <= un->un_maxcdb; i++) {
13231
13232		/*
13233		 * Check lba and block count against sd_cdbtab limits.
13234		 * In the partial DMA case, we have to use the same size
13235		 * CDB for all the transfers.  Check lba + blockcount
13236		 * against the max LBA so we know that segment of the
13237		 * transfer can use the CDB we select.
13238		 */
13239		if ((lba + blockcount - 1 <= sd_cdbtab[i].sc_maxlba) &&
13240		    (blockcount <= sd_cdbtab[i].sc_maxlen)) {
13241
13242			/*
13243			 * The command will fit into the CDB type
13244			 * specified by sd_cdbtab[i].
13245			 */
13246			cp = sd_cdbtab + i;
13247
13248			/*
13249			 * Call scsi_init_pkt so we can fill in the
13250			 * CDB.
13251			 */
13252			return_pktp = scsi_init_pkt(SD_ADDRESS(un), *pktpp,
13253			    bp, cp->sc_grpcode, un->un_status_len, 0,
13254			    flags, callback, callback_arg);
13255
13256			if (return_pktp != NULL) {
13257
13258				/*
13259				 * Return new value of pkt
13260				 */
13261				*pktpp = return_pktp;
13262
13263				/*
13264				 * To be safe, zero the CDB insuring there is
13265				 * no leftover data from a previous command.
13266				 */
13267				bzero(return_pktp->pkt_cdbp, cp->sc_grpcode);
13268
13269				/*
13270				 * Handle partial DMA mapping
13271				 */
13272				if (return_pktp->pkt_resid != 0) {
13273
13274					/*
13275					 * Not going to xfer as many blocks as
13276					 * originally expected
13277					 */
13278					blockcount -=
13279					    SD_BYTES2TGTBLOCKS(un,
13280					    return_pktp->pkt_resid);
13281				}
13282
13283				cdbp = (union scsi_cdb *)return_pktp->pkt_cdbp;
13284
13285				/*
13286				 * Set command byte based on the CDB
13287				 * type we matched.
13288				 */
13289				cdbp->scc_cmd = cp->sc_grpmask |
13290				    ((bp->b_flags & B_READ) ?
13291				    SCMD_READ : SCMD_WRITE);
13292
13293				SD_FILL_SCSI1_LUN(un, return_pktp);
13294
13295				/*
13296				 * Fill in LBA and length
13297				 */
13298				ASSERT((cp->sc_grpcode == CDB_GROUP1) ||
13299				    (cp->sc_grpcode == CDB_GROUP4) ||
13300				    (cp->sc_grpcode == CDB_GROUP0) ||
13301				    (cp->sc_grpcode == CDB_GROUP5));
13302
13303				if (cp->sc_grpcode == CDB_GROUP1) {
13304					FORMG1ADDR(cdbp, lba);
13305					FORMG1COUNT(cdbp, blockcount);
13306					return (0);
13307				} else if (cp->sc_grpcode == CDB_GROUP4) {
13308					FORMG4LONGADDR(cdbp, lba);
13309					FORMG4COUNT(cdbp, blockcount);
13310					return (0);
13311				} else if (cp->sc_grpcode == CDB_GROUP0) {
13312					FORMG0ADDR(cdbp, lba);
13313					FORMG0COUNT(cdbp, blockcount);
13314					return (0);
13315				} else if (cp->sc_grpcode == CDB_GROUP5) {
13316					FORMG5ADDR(cdbp, lba);
13317					FORMG5COUNT(cdbp, blockcount);
13318					return (0);
13319				}
13320
13321				/*
13322				 * It should be impossible to not match one
13323				 * of the CDB types above, so we should never
13324				 * reach this point.  Set the CDB command byte
13325				 * to test-unit-ready to avoid writing
13326				 * to somewhere we don't intend.
13327				 */
13328				cdbp->scc_cmd = SCMD_TEST_UNIT_READY;
13329				return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13330			} else {
13331				/*
13332				 * Couldn't get scsi_pkt
13333				 */
13334				return (SD_PKT_ALLOC_FAILURE);
13335			}
13336		}
13337	}
13338
13339	/*
13340	 * None of the available CDB types were suitable.  This really
13341	 * should never happen:  on a 64 bit system we support
13342	 * READ16/WRITE16 which will hold an entire 64 bit disk address
13343	 * and on a 32 bit system we will refuse to bind to a device
13344	 * larger than 2TB so addresses will never be larger than 32 bits.
13345	 */
13346	return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13347}
13348
13349/*
13350 *    Function: sd_setup_next_rw_pkt
13351 *
13352 * Description: Setup packet for partial DMA transfers, except for the
13353 * 		initial transfer.  sd_setup_rw_pkt should be used for
13354 *		the initial transfer.
13355 *
13356 *     Context: Kernel thread and may be called from interrupt context.
13357 */
13358
13359int
13360sd_setup_next_rw_pkt(struct sd_lun *un,
13361    struct scsi_pkt *pktp, struct buf *bp,
13362    diskaddr_t lba, uint32_t blockcount)
13363{
13364	uchar_t com;
13365	union scsi_cdb *cdbp;
13366	uchar_t cdb_group_id;
13367
13368	ASSERT(pktp != NULL);
13369	ASSERT(pktp->pkt_cdbp != NULL);
13370
13371	cdbp = (union scsi_cdb *)pktp->pkt_cdbp;
13372	com = cdbp->scc_cmd;
13373	cdb_group_id = CDB_GROUPID(com);
13374
13375	ASSERT((cdb_group_id == CDB_GROUPID_0) ||
13376	    (cdb_group_id == CDB_GROUPID_1) ||
13377	    (cdb_group_id == CDB_GROUPID_4) ||
13378	    (cdb_group_id == CDB_GROUPID_5));
13379
13380	/*
13381	 * Move pkt to the next portion of the xfer.
13382	 * func is NULL_FUNC so we do not have to release
13383	 * the disk mutex here.
13384	 */
13385	if (scsi_init_pkt(SD_ADDRESS(un), pktp, bp, 0, 0, 0, 0,
13386	    NULL_FUNC, NULL) == pktp) {
13387		/* Success.  Handle partial DMA */
13388		if (pktp->pkt_resid != 0) {
13389			blockcount -=
13390			    SD_BYTES2TGTBLOCKS(un, pktp->pkt_resid);
13391		}
13392
13393		cdbp->scc_cmd = com;
13394		SD_FILL_SCSI1_LUN(un, pktp);
13395		if (cdb_group_id == CDB_GROUPID_1) {
13396			FORMG1ADDR(cdbp, lba);
13397			FORMG1COUNT(cdbp, blockcount);
13398			return (0);
13399		} else if (cdb_group_id == CDB_GROUPID_4) {
13400			FORMG4LONGADDR(cdbp, lba);
13401			FORMG4COUNT(cdbp, blockcount);
13402			return (0);
13403		} else if (cdb_group_id == CDB_GROUPID_0) {
13404			FORMG0ADDR(cdbp, lba);
13405			FORMG0COUNT(cdbp, blockcount);
13406			return (0);
13407		} else if (cdb_group_id == CDB_GROUPID_5) {
13408			FORMG5ADDR(cdbp, lba);
13409			FORMG5COUNT(cdbp, blockcount);
13410			return (0);
13411		}
13412
13413		/* Unreachable */
13414		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13415	}
13416
13417	/*
13418	 * Error setting up next portion of cmd transfer.
13419	 * Something is definitely very wrong and this
13420	 * should not happen.
13421	 */
13422	return (SD_PKT_ALLOC_FAILURE);
13423}
13424
13425/*
13426 *    Function: sd_initpkt_for_uscsi
13427 *
13428 * Description: Allocate and initialize for transport a scsi_pkt struct,
13429 *		based upon the info specified in the given uscsi_cmd struct.
13430 *
13431 * Return Code: SD_PKT_ALLOC_SUCCESS
13432 *		SD_PKT_ALLOC_FAILURE
13433 *		SD_PKT_ALLOC_FAILURE_NO_DMA
13434 *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13435 *
13436 *     Context: Kernel thread and may be called from software interrupt context
13437 *		as part of a sdrunout callback. This function may not block or
13438 *		call routines that block
13439 */
13440
13441static int
13442sd_initpkt_for_uscsi(struct buf *bp, struct scsi_pkt **pktpp)
13443{
13444	struct uscsi_cmd *uscmd;
13445	struct sd_xbuf	*xp;
13446	struct scsi_pkt	*pktp;
13447	struct sd_lun	*un;
13448	uint32_t	flags = 0;
13449
13450	ASSERT(bp != NULL);
13451	ASSERT(pktpp != NULL);
13452	xp = SD_GET_XBUF(bp);
13453	ASSERT(xp != NULL);
13454	un = SD_GET_UN(bp);
13455	ASSERT(un != NULL);
13456	ASSERT(mutex_owned(SD_MUTEX(un)));
13457
13458	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
13459	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
13460	ASSERT(uscmd != NULL);
13461
13462	SD_TRACE(SD_LOG_IO_CORE, un,
13463	    "sd_initpkt_for_uscsi: entry: buf:0x%p\n", bp);
13464
13465	/*
13466	 * Allocate the scsi_pkt for the command.
13467	 * Note: If PKT_DMA_PARTIAL flag is set, scsi_vhci binds a path
13468	 *	 during scsi_init_pkt time and will continue to use the
13469	 *	 same path as long as the same scsi_pkt is used without
13470	 *	 intervening scsi_dma_free(). Since uscsi command does
13471	 *	 not call scsi_dmafree() before retry failed command, it
13472	 *	 is necessary to make sure PKT_DMA_PARTIAL flag is NOT
13473	 *	 set such that scsi_vhci can use other available path for
13474	 *	 retry. Besides, ucsci command does not allow DMA breakup,
13475	 *	 so there is no need to set PKT_DMA_PARTIAL flag.
13476	 */
13477	if (uscmd->uscsi_rqlen > SENSE_LENGTH) {
13478		pktp = scsi_init_pkt(SD_ADDRESS(un), NULL,
13479		    ((bp->b_bcount != 0) ? bp : NULL), uscmd->uscsi_cdblen,
13480		    ((int)(uscmd->uscsi_rqlen) + sizeof (struct scsi_arq_status)
13481		    - sizeof (struct scsi_extended_sense)), 0,
13482		    (un->un_pkt_flags & ~PKT_DMA_PARTIAL) | PKT_XARQ,
13483		    sdrunout, (caddr_t)un);
13484	} else {
13485		pktp = scsi_init_pkt(SD_ADDRESS(un), NULL,
13486		    ((bp->b_bcount != 0) ? bp : NULL), uscmd->uscsi_cdblen,
13487		    sizeof (struct scsi_arq_status), 0,
13488		    (un->un_pkt_flags & ~PKT_DMA_PARTIAL),
13489		    sdrunout, (caddr_t)un);
13490	}
13491
13492	if (pktp == NULL) {
13493		*pktpp = NULL;
13494		/*
13495		 * Set the driver state to RWAIT to indicate the driver
13496		 * is waiting on resource allocations. The driver will not
13497		 * suspend, pm_suspend, or detatch while the state is RWAIT.
13498		 */
13499		New_state(un, SD_STATE_RWAIT);
13500
13501		SD_ERROR(SD_LOG_IO_CORE, un,
13502		    "sd_initpkt_for_uscsi: No pktp. exit bp:0x%p\n", bp);
13503
13504		if ((bp->b_flags & B_ERROR) != 0) {
13505			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
13506		}
13507		return (SD_PKT_ALLOC_FAILURE);
13508	}
13509
13510	/*
13511	 * We do not do DMA breakup for USCSI commands, so return failure
13512	 * here if all the needed DMA resources were not allocated.
13513	 */
13514	if ((un->un_pkt_flags & PKT_DMA_PARTIAL) &&
13515	    (bp->b_bcount != 0) && (pktp->pkt_resid != 0)) {
13516		scsi_destroy_pkt(pktp);
13517		SD_ERROR(SD_LOG_IO_CORE, un, "sd_initpkt_for_uscsi: "
13518		    "No partial DMA for USCSI. exit: buf:0x%p\n", bp);
13519		return (SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL);
13520	}
13521
13522	/* Init the cdb from the given uscsi struct */
13523	(void) scsi_setup_cdb((union scsi_cdb *)pktp->pkt_cdbp,
13524	    uscmd->uscsi_cdb[0], 0, 0, 0);
13525
13526	SD_FILL_SCSI1_LUN(un, pktp);
13527
13528	/*
13529	 * Set up the optional USCSI flags. See the uscsi (7I) man page
13530	 * for listing of the supported flags.
13531	 */
13532
13533	if (uscmd->uscsi_flags & USCSI_SILENT) {
13534		flags |= FLAG_SILENT;
13535	}
13536
13537	if (uscmd->uscsi_flags & USCSI_DIAGNOSE) {
13538		flags |= FLAG_DIAGNOSE;
13539	}
13540
13541	if (uscmd->uscsi_flags & USCSI_ISOLATE) {
13542		flags |= FLAG_ISOLATE;
13543	}
13544
13545	if (un->un_f_is_fibre == FALSE) {
13546		if (uscmd->uscsi_flags & USCSI_RENEGOT) {
13547			flags |= FLAG_RENEGOTIATE_WIDE_SYNC;
13548		}
13549	}
13550
13551	/*
13552	 * Set the pkt flags here so we save time later.
13553	 * Note: These flags are NOT in the uscsi man page!!!
13554	 */
13555	if (uscmd->uscsi_flags & USCSI_HEAD) {
13556		flags |= FLAG_HEAD;
13557	}
13558
13559	if (uscmd->uscsi_flags & USCSI_NOINTR) {
13560		flags |= FLAG_NOINTR;
13561	}
13562
13563	/*
13564	 * For tagged queueing, things get a bit complicated.
13565	 * Check first for head of queue and last for ordered queue.
13566	 * If neither head nor order, use the default driver tag flags.
13567	 */
13568	if ((uscmd->uscsi_flags & USCSI_NOTAG) == 0) {
13569		if (uscmd->uscsi_flags & USCSI_HTAG) {
13570			flags |= FLAG_HTAG;
13571		} else if (uscmd->uscsi_flags & USCSI_OTAG) {
13572			flags |= FLAG_OTAG;
13573		} else {
13574			flags |= un->un_tagflags & FLAG_TAGMASK;
13575		}
13576	}
13577
13578	if (uscmd->uscsi_flags & USCSI_NODISCON) {
13579		flags = (flags & ~FLAG_TAGMASK) | FLAG_NODISCON;
13580	}
13581
13582	pktp->pkt_flags = flags;
13583
13584	/* Transfer uscsi information to scsi_pkt */
13585	(void) scsi_uscsi_pktinit(uscmd, pktp);
13586
13587	/* Copy the caller's CDB into the pkt... */
13588	bcopy(uscmd->uscsi_cdb, pktp->pkt_cdbp, uscmd->uscsi_cdblen);
13589
13590	if (uscmd->uscsi_timeout == 0) {
13591		pktp->pkt_time = un->un_uscsi_timeout;
13592	} else {
13593		pktp->pkt_time = uscmd->uscsi_timeout;
13594	}
13595
13596	/* need it later to identify USCSI request in sdintr */
13597	xp->xb_pkt_flags |= SD_XB_USCSICMD;
13598
13599	xp->xb_sense_resid = uscmd->uscsi_rqresid;
13600
13601	pktp->pkt_private = bp;
13602	pktp->pkt_comp = sdintr;
13603	*pktpp = pktp;
13604
13605	SD_TRACE(SD_LOG_IO_CORE, un,
13606	    "sd_initpkt_for_uscsi: exit: buf:0x%p\n", bp);
13607
13608	return (SD_PKT_ALLOC_SUCCESS);
13609}
13610
13611
13612/*
13613 *    Function: sd_destroypkt_for_uscsi
13614 *
13615 * Description: Free the scsi_pkt(9S) struct for the given bp, for uscsi
13616 *		IOs.. Also saves relevant info into the associated uscsi_cmd
13617 *		struct.
13618 *
13619 *     Context: May be called under interrupt context
13620 */
13621
13622static void
13623sd_destroypkt_for_uscsi(struct buf *bp)
13624{
13625	struct uscsi_cmd *uscmd;
13626	struct sd_xbuf	*xp;
13627	struct scsi_pkt	*pktp;
13628	struct sd_lun	*un;
13629	struct sd_uscsi_info *suip;
13630
13631	ASSERT(bp != NULL);
13632	xp = SD_GET_XBUF(bp);
13633	ASSERT(xp != NULL);
13634	un = SD_GET_UN(bp);
13635	ASSERT(un != NULL);
13636	ASSERT(!mutex_owned(SD_MUTEX(un)));
13637	pktp = SD_GET_PKTP(bp);
13638	ASSERT(pktp != NULL);
13639
13640	SD_TRACE(SD_LOG_IO_CORE, un,
13641	    "sd_destroypkt_for_uscsi: entry: buf:0x%p\n", bp);
13642
13643	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
13644	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
13645	ASSERT(uscmd != NULL);
13646
13647	/* Save the status and the residual into the uscsi_cmd struct */
13648	uscmd->uscsi_status = ((*(pktp)->pkt_scbp) & STATUS_MASK);
13649	uscmd->uscsi_resid  = bp->b_resid;
13650
13651	/* Transfer scsi_pkt information to uscsi */
13652	(void) scsi_uscsi_pktfini(pktp, uscmd);
13653
13654	/*
13655	 * If enabled, copy any saved sense data into the area specified
13656	 * by the uscsi command.
13657	 */
13658	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
13659	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
13660		/*
13661		 * Note: uscmd->uscsi_rqbuf should always point to a buffer
13662		 * at least SENSE_LENGTH bytes in size (see sd_send_scsi_cmd())
13663		 */
13664		uscmd->uscsi_rqstatus = xp->xb_sense_status;
13665		uscmd->uscsi_rqresid  = xp->xb_sense_resid;
13666		if (uscmd->uscsi_rqlen > SENSE_LENGTH) {
13667			bcopy(xp->xb_sense_data, uscmd->uscsi_rqbuf,
13668			    MAX_SENSE_LENGTH);
13669		} else {
13670			bcopy(xp->xb_sense_data, uscmd->uscsi_rqbuf,
13671			    SENSE_LENGTH);
13672		}
13673	}
13674	/*
13675	 * The following assignments are for SCSI FMA.
13676	 */
13677	ASSERT(xp->xb_private != NULL);
13678	suip = (struct sd_uscsi_info *)xp->xb_private;
13679	suip->ui_pkt_reason = pktp->pkt_reason;
13680	suip->ui_pkt_state = pktp->pkt_state;
13681	suip->ui_pkt_statistics = pktp->pkt_statistics;
13682	suip->ui_lba = (uint64_t)SD_GET_BLKNO(bp);
13683
13684	/* We are done with the scsi_pkt; free it now */
13685	ASSERT(SD_GET_PKTP(bp) != NULL);
13686	scsi_destroy_pkt(SD_GET_PKTP(bp));
13687
13688	SD_TRACE(SD_LOG_IO_CORE, un,
13689	    "sd_destroypkt_for_uscsi: exit: buf:0x%p\n", bp);
13690}
13691
13692
13693/*
13694 *    Function: sd_bioclone_alloc
13695 *
13696 * Description: Allocate a buf(9S) and init it as per the given buf
13697 *		and the various arguments.  The associated sd_xbuf
13698 *		struct is (nearly) duplicated.  The struct buf *bp
13699 *		argument is saved in new_xp->xb_private.
13700 *
13701 *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
13702 *		datalen - size of data area for the shadow bp
13703 *		blkno - starting LBA
13704 *		func - function pointer for b_iodone in the shadow buf. (May
13705 *			be NULL if none.)
13706 *
13707 * Return Code: Pointer to allocates buf(9S) struct
13708 *
13709 *     Context: Can sleep.
13710 */
13711
13712static struct buf *
13713sd_bioclone_alloc(struct buf *bp, size_t datalen,
13714	daddr_t blkno, int (*func)(struct buf *))
13715{
13716	struct	sd_lun	*un;
13717	struct	sd_xbuf	*xp;
13718	struct	sd_xbuf	*new_xp;
13719	struct	buf	*new_bp;
13720
13721	ASSERT(bp != NULL);
13722	xp = SD_GET_XBUF(bp);
13723	ASSERT(xp != NULL);
13724	un = SD_GET_UN(bp);
13725	ASSERT(un != NULL);
13726	ASSERT(!mutex_owned(SD_MUTEX(un)));
13727
13728	new_bp = bioclone(bp, 0, datalen, SD_GET_DEV(un), blkno, func,
13729	    NULL, KM_SLEEP);
13730
13731	new_bp->b_lblkno	= blkno;
13732
13733	/*
13734	 * Allocate an xbuf for the shadow bp and copy the contents of the
13735	 * original xbuf into it.
13736	 */
13737	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
13738	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
13739
13740	/*
13741	 * The given bp is automatically saved in the xb_private member
13742	 * of the new xbuf.  Callers are allowed to depend on this.
13743	 */
13744	new_xp->xb_private = bp;
13745
13746	new_bp->b_private  = new_xp;
13747
13748	return (new_bp);
13749}
13750
13751/*
13752 *    Function: sd_shadow_buf_alloc
13753 *
13754 * Description: Allocate a buf(9S) and init it as per the given buf
13755 *		and the various arguments.  The associated sd_xbuf
13756 *		struct is (nearly) duplicated.  The struct buf *bp
13757 *		argument is saved in new_xp->xb_private.
13758 *
13759 *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
13760 *		datalen - size of data area for the shadow bp
13761 *		bflags - B_READ or B_WRITE (pseudo flag)
13762 *		blkno - starting LBA
13763 *		func - function pointer for b_iodone in the shadow buf. (May
13764 *			be NULL if none.)
13765 *
13766 * Return Code: Pointer to allocates buf(9S) struct
13767 *
13768 *     Context: Can sleep.
13769 */
13770
13771static struct buf *
13772sd_shadow_buf_alloc(struct buf *bp, size_t datalen, uint_t bflags,
13773	daddr_t blkno, int (*func)(struct buf *))
13774{
13775	struct	sd_lun	*un;
13776	struct	sd_xbuf	*xp;
13777	struct	sd_xbuf	*new_xp;
13778	struct	buf	*new_bp;
13779
13780	ASSERT(bp != NULL);
13781	xp = SD_GET_XBUF(bp);
13782	ASSERT(xp != NULL);
13783	un = SD_GET_UN(bp);
13784	ASSERT(un != NULL);
13785	ASSERT(!mutex_owned(SD_MUTEX(un)));
13786
13787	if (bp->b_flags & (B_PAGEIO | B_PHYS)) {
13788		bp_mapin(bp);
13789	}
13790
13791	bflags &= (B_READ | B_WRITE);
13792#if defined(__i386) || defined(__amd64)
13793	new_bp = getrbuf(KM_SLEEP);
13794	new_bp->b_un.b_addr = kmem_zalloc(datalen, KM_SLEEP);
13795	new_bp->b_bcount = datalen;
13796	new_bp->b_flags = bflags |
13797	    (bp->b_flags & ~(B_PAGEIO | B_PHYS | B_REMAPPED | B_SHADOW));
13798#else
13799	new_bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), NULL,
13800	    datalen, bflags, SLEEP_FUNC, NULL);
13801#endif
13802	new_bp->av_forw	= NULL;
13803	new_bp->av_back	= NULL;
13804	new_bp->b_dev	= bp->b_dev;
13805	new_bp->b_blkno	= blkno;
13806	new_bp->b_iodone = func;
13807	new_bp->b_edev	= bp->b_edev;
13808	new_bp->b_resid	= 0;
13809
13810	/* We need to preserve the B_FAILFAST flag */
13811	if (bp->b_flags & B_FAILFAST) {
13812		new_bp->b_flags |= B_FAILFAST;
13813	}
13814
13815	/*
13816	 * Allocate an xbuf for the shadow bp and copy the contents of the
13817	 * original xbuf into it.
13818	 */
13819	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
13820	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
13821
13822	/* Need later to copy data between the shadow buf & original buf! */
13823	new_xp->xb_pkt_flags |= PKT_CONSISTENT;
13824
13825	/*
13826	 * The given bp is automatically saved in the xb_private member
13827	 * of the new xbuf.  Callers are allowed to depend on this.
13828	 */
13829	new_xp->xb_private = bp;
13830
13831	new_bp->b_private  = new_xp;
13832
13833	return (new_bp);
13834}
13835
13836/*
13837 *    Function: sd_bioclone_free
13838 *
13839 * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations
13840 *		in the larger than partition operation.
13841 *
13842 *     Context: May be called under interrupt context
13843 */
13844
13845static void
13846sd_bioclone_free(struct buf *bp)
13847{
13848	struct sd_xbuf	*xp;
13849
13850	ASSERT(bp != NULL);
13851	xp = SD_GET_XBUF(bp);
13852	ASSERT(xp != NULL);
13853
13854	/*
13855	 * Call bp_mapout() before freeing the buf,  in case a lower
13856	 * layer or HBA  had done a bp_mapin().  we must do this here
13857	 * as we are the "originator" of the shadow buf.
13858	 */
13859	bp_mapout(bp);
13860
13861	/*
13862	 * Null out b_iodone before freeing the bp, to ensure that the driver
13863	 * never gets confused by a stale value in this field. (Just a little
13864	 * extra defensiveness here.)
13865	 */
13866	bp->b_iodone = NULL;
13867
13868	freerbuf(bp);
13869
13870	kmem_free(xp, sizeof (struct sd_xbuf));
13871}
13872
13873/*
13874 *    Function: sd_shadow_buf_free
13875 *
13876 * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations.
13877 *
13878 *     Context: May be called under interrupt context
13879 */
13880
13881static void
13882sd_shadow_buf_free(struct buf *bp)
13883{
13884	struct sd_xbuf	*xp;
13885
13886	ASSERT(bp != NULL);
13887	xp = SD_GET_XBUF(bp);
13888	ASSERT(xp != NULL);
13889
13890#if defined(__sparc)
13891	/*
13892	 * Call bp_mapout() before freeing the buf,  in case a lower
13893	 * layer or HBA  had done a bp_mapin().  we must do this here
13894	 * as we are the "originator" of the shadow buf.
13895	 */
13896	bp_mapout(bp);
13897#endif
13898
13899	/*
13900	 * Null out b_iodone before freeing the bp, to ensure that the driver
13901	 * never gets confused by a stale value in this field. (Just a little
13902	 * extra defensiveness here.)
13903	 */
13904	bp->b_iodone = NULL;
13905
13906#if defined(__i386) || defined(__amd64)
13907	kmem_free(bp->b_un.b_addr, bp->b_bcount);
13908	freerbuf(bp);
13909#else
13910	scsi_free_consistent_buf(bp);
13911#endif
13912
13913	kmem_free(xp, sizeof (struct sd_xbuf));
13914}
13915
13916
13917/*
13918 *    Function: sd_print_transport_rejected_message
13919 *
13920 * Description: This implements the ludicrously complex rules for printing
13921 *		a "transport rejected" message.  This is to address the
13922 *		specific problem of having a flood of this error message
13923 *		produced when a failover occurs.
13924 *
13925 *     Context: Any.
13926 */
13927
13928static void
13929sd_print_transport_rejected_message(struct sd_lun *un, struct sd_xbuf *xp,
13930	int code)
13931{
13932	ASSERT(un != NULL);
13933	ASSERT(mutex_owned(SD_MUTEX(un)));
13934	ASSERT(xp != NULL);
13935
13936	/*
13937	 * Print the "transport rejected" message under the following
13938	 * conditions:
13939	 *
13940	 * - Whenever the SD_LOGMASK_DIAG bit of sd_level_mask is set
13941	 * - The error code from scsi_transport() is NOT a TRAN_FATAL_ERROR.
13942	 * - If the error code IS a TRAN_FATAL_ERROR, then the message is
13943	 *   printed the FIRST time a TRAN_FATAL_ERROR is returned from
13944	 *   scsi_transport(9F) (which indicates that the target might have
13945	 *   gone off-line).  This uses the un->un_tran_fatal_count
13946	 *   count, which is incremented whenever a TRAN_FATAL_ERROR is
13947	 *   received, and reset to zero whenver a TRAN_ACCEPT is returned
13948	 *   from scsi_transport().
13949	 *
13950	 * The FLAG_SILENT in the scsi_pkt must be CLEARED in ALL of
13951	 * the preceeding cases in order for the message to be printed.
13952	 */
13953	if (((xp->xb_pktp->pkt_flags & FLAG_SILENT) == 0) &&
13954	    (SD_FM_LOG(un) == SD_FM_LOG_NSUP)) {
13955		if ((sd_level_mask & SD_LOGMASK_DIAG) ||
13956		    (code != TRAN_FATAL_ERROR) ||
13957		    (un->un_tran_fatal_count == 1)) {
13958			switch (code) {
13959			case TRAN_BADPKT:
13960				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13961				    "transport rejected bad packet\n");
13962				break;
13963			case TRAN_FATAL_ERROR:
13964				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13965				    "transport rejected fatal error\n");
13966				break;
13967			default:
13968				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13969				    "transport rejected (%d)\n", code);
13970				break;
13971			}
13972		}
13973	}
13974}
13975
13976
13977/*
13978 *    Function: sd_add_buf_to_waitq
13979 *
13980 * Description: Add the given buf(9S) struct to the wait queue for the
13981 *		instance.  If sorting is enabled, then the buf is added
13982 *		to the queue via an elevator sort algorithm (a la
13983 *		disksort(9F)).  The SD_GET_BLKNO(bp) is used as the sort key.
13984 *		If sorting is not enabled, then the buf is just added
13985 *		to the end of the wait queue.
13986 *
13987 * Return Code: void
13988 *
13989 *     Context: Does not sleep/block, therefore technically can be called
13990 *		from any context.  However if sorting is enabled then the
13991 *		execution time is indeterminate, and may take long if
13992 *		the wait queue grows large.
13993 */
13994
13995static void
13996sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp)
13997{
13998	struct buf *ap;
13999
14000	ASSERT(bp != NULL);
14001	ASSERT(un != NULL);
14002	ASSERT(mutex_owned(SD_MUTEX(un)));
14003
14004	/* If the queue is empty, add the buf as the only entry & return. */
14005	if (un->un_waitq_headp == NULL) {
14006		ASSERT(un->un_waitq_tailp == NULL);
14007		un->un_waitq_headp = un->un_waitq_tailp = bp;
14008		bp->av_forw = NULL;
14009		return;
14010	}
14011
14012	ASSERT(un->un_waitq_tailp != NULL);
14013
14014	/*
14015	 * If sorting is disabled, just add the buf to the tail end of
14016	 * the wait queue and return.
14017	 */
14018	if (un->un_f_disksort_disabled) {
14019		un->un_waitq_tailp->av_forw = bp;
14020		un->un_waitq_tailp = bp;
14021		bp->av_forw = NULL;
14022		return;
14023	}
14024
14025	/*
14026	 * Sort thru the list of requests currently on the wait queue
14027	 * and add the new buf request at the appropriate position.
14028	 *
14029	 * The un->un_waitq_headp is an activity chain pointer on which
14030	 * we keep two queues, sorted in ascending SD_GET_BLKNO() order. The
14031	 * first queue holds those requests which are positioned after
14032	 * the current SD_GET_BLKNO() (in the first request); the second holds
14033	 * requests which came in after their SD_GET_BLKNO() number was passed.
14034	 * Thus we implement a one way scan, retracting after reaching
14035	 * the end of the drive to the first request on the second
14036	 * queue, at which time it becomes the first queue.
14037	 * A one-way scan is natural because of the way UNIX read-ahead
14038	 * blocks are allocated.
14039	 *
14040	 * If we lie after the first request, then we must locate the
14041	 * second request list and add ourselves to it.
14042	 */
14043	ap = un->un_waitq_headp;
14044	if (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap)) {
14045		while (ap->av_forw != NULL) {
14046			/*
14047			 * Look for an "inversion" in the (normally
14048			 * ascending) block numbers. This indicates
14049			 * the start of the second request list.
14050			 */
14051			if (SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) {
14052				/*
14053				 * Search the second request list for the
14054				 * first request at a larger block number.
14055				 * We go before that; however if there is
14056				 * no such request, we go at the end.
14057				 */
14058				do {
14059					if (SD_GET_BLKNO(bp) <
14060					    SD_GET_BLKNO(ap->av_forw)) {
14061						goto insert;
14062					}
14063					ap = ap->av_forw;
14064				} while (ap->av_forw != NULL);
14065				goto insert;		/* after last */
14066			}
14067			ap = ap->av_forw;
14068		}
14069
14070		/*
14071		 * No inversions... we will go after the last, and
14072		 * be the first request in the second request list.
14073		 */
14074		goto insert;
14075	}
14076
14077	/*
14078	 * Request is at/after the current request...
14079	 * sort in the first request list.
14080	 */
14081	while (ap->av_forw != NULL) {
14082		/*
14083		 * We want to go after the current request (1) if
14084		 * there is an inversion after it (i.e. it is the end
14085		 * of the first request list), or (2) if the next
14086		 * request is a larger block no. than our request.
14087		 */
14088		if ((SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) ||
14089		    (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap->av_forw))) {
14090			goto insert;
14091		}
14092		ap = ap->av_forw;
14093	}
14094
14095	/*
14096	 * Neither a second list nor a larger request, therefore
14097	 * we go at the end of the first list (which is the same
14098	 * as the end of the whole schebang).
14099	 */
14100insert:
14101	bp->av_forw = ap->av_forw;
14102	ap->av_forw = bp;
14103
14104	/*
14105	 * If we inserted onto the tail end of the waitq, make sure the
14106	 * tail pointer is updated.
14107	 */
14108	if (ap == un->un_waitq_tailp) {
14109		un->un_waitq_tailp = bp;
14110	}
14111}
14112
14113
14114/*
14115 *    Function: sd_start_cmds
14116 *
14117 * Description: Remove and transport cmds from the driver queues.
14118 *
14119 *   Arguments: un - pointer to the unit (soft state) struct for the target.
14120 *
14121 *		immed_bp - ptr to a buf to be transported immediately. Only
14122 *		the immed_bp is transported; bufs on the waitq are not
14123 *		processed and the un_retry_bp is not checked.  If immed_bp is
14124 *		NULL, then normal queue processing is performed.
14125 *
14126 *     Context: May be called from kernel thread context, interrupt context,
14127 *		or runout callback context. This function may not block or
14128 *		call routines that block.
14129 */
14130
14131static void
14132sd_start_cmds(struct sd_lun *un, struct buf *immed_bp)
14133{
14134	struct	sd_xbuf	*xp;
14135	struct	buf	*bp;
14136	void	(*statp)(kstat_io_t *);
14137#if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14138	void	(*saved_statp)(kstat_io_t *);
14139#endif
14140	int	rval;
14141	struct sd_fm_internal *sfip = NULL;
14142
14143	ASSERT(un != NULL);
14144	ASSERT(mutex_owned(SD_MUTEX(un)));
14145	ASSERT(un->un_ncmds_in_transport >= 0);
14146	ASSERT(un->un_throttle >= 0);
14147
14148	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: entry\n");
14149
14150	do {
14151#if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14152		saved_statp = NULL;
14153#endif
14154
14155		/*
14156		 * If we are syncing or dumping, fail the command to
14157		 * avoid recursively calling back into scsi_transport().
14158		 * The dump I/O itself uses a separate code path so this
14159		 * only prevents non-dump I/O from being sent while dumping.
14160		 * File system sync takes place before dumping begins.
14161		 * During panic, filesystem I/O is allowed provided
14162		 * un_in_callback is <= 1.  This is to prevent recursion
14163		 * such as sd_start_cmds -> scsi_transport -> sdintr ->
14164		 * sd_start_cmds and so on.  See panic.c for more information
14165		 * about the states the system can be in during panic.
14166		 */
14167		if ((un->un_state == SD_STATE_DUMPING) ||
14168		    (ddi_in_panic() && (un->un_in_callback > 1))) {
14169			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14170			    "sd_start_cmds: panicking\n");
14171			goto exit;
14172		}
14173
14174		if ((bp = immed_bp) != NULL) {
14175			/*
14176			 * We have a bp that must be transported immediately.
14177			 * It's OK to transport the immed_bp here without doing
14178			 * the throttle limit check because the immed_bp is
14179			 * always used in a retry/recovery case. This means
14180			 * that we know we are not at the throttle limit by
14181			 * virtue of the fact that to get here we must have
14182			 * already gotten a command back via sdintr(). This also
14183			 * relies on (1) the command on un_retry_bp preventing
14184			 * further commands from the waitq from being issued;
14185			 * and (2) the code in sd_retry_command checking the
14186			 * throttle limit before issuing a delayed or immediate
14187			 * retry. This holds even if the throttle limit is
14188			 * currently ratcheted down from its maximum value.
14189			 */
14190			statp = kstat_runq_enter;
14191			if (bp == un->un_retry_bp) {
14192				ASSERT((un->un_retry_statp == NULL) ||
14193				    (un->un_retry_statp == kstat_waitq_enter) ||
14194				    (un->un_retry_statp ==
14195				    kstat_runq_back_to_waitq));
14196				/*
14197				 * If the waitq kstat was incremented when
14198				 * sd_set_retry_bp() queued this bp for a retry,
14199				 * then we must set up statp so that the waitq
14200				 * count will get decremented correctly below.
14201				 * Also we must clear un->un_retry_statp to
14202				 * ensure that we do not act on a stale value
14203				 * in this field.
14204				 */
14205				if ((un->un_retry_statp == kstat_waitq_enter) ||
14206				    (un->un_retry_statp ==
14207				    kstat_runq_back_to_waitq)) {
14208					statp = kstat_waitq_to_runq;
14209				}
14210#if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14211				saved_statp = un->un_retry_statp;
14212#endif
14213				un->un_retry_statp = NULL;
14214
14215				SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
14216				    "sd_start_cmds: un:0x%p: GOT retry_bp:0x%p "
14217				    "un_throttle:%d un_ncmds_in_transport:%d\n",
14218				    un, un->un_retry_bp, un->un_throttle,
14219				    un->un_ncmds_in_transport);
14220			} else {
14221				SD_TRACE(SD_LOG_IO_CORE, un, "sd_start_cmds: "
14222				    "processing priority bp:0x%p\n", bp);
14223			}
14224
14225		} else if ((bp = un->un_waitq_headp) != NULL) {
14226			/*
14227			 * A command on the waitq is ready to go, but do not
14228			 * send it if:
14229			 *
14230			 * (1) the throttle limit has been reached, or
14231			 * (2) a retry is pending, or
14232			 * (3) a START_STOP_UNIT callback pending, or
14233			 * (4) a callback for a SD_PATH_DIRECT_PRIORITY
14234			 *	command is pending.
14235			 *
14236			 * For all of these conditions, IO processing will
14237			 * restart after the condition is cleared.
14238			 */
14239			if (un->un_ncmds_in_transport >= un->un_throttle) {
14240				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14241				    "sd_start_cmds: exiting, "
14242				    "throttle limit reached!\n");
14243				goto exit;
14244			}
14245			if (un->un_retry_bp != NULL) {
14246				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14247				    "sd_start_cmds: exiting, retry pending!\n");
14248				goto exit;
14249			}
14250			if (un->un_startstop_timeid != NULL) {
14251				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14252				    "sd_start_cmds: exiting, "
14253				    "START_STOP pending!\n");
14254				goto exit;
14255			}
14256			if (un->un_direct_priority_timeid != NULL) {
14257				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14258				    "sd_start_cmds: exiting, "
14259				    "SD_PATH_DIRECT_PRIORITY cmd. pending!\n");
14260				goto exit;
14261			}
14262
14263			/* Dequeue the command */
14264			un->un_waitq_headp = bp->av_forw;
14265			if (un->un_waitq_headp == NULL) {
14266				un->un_waitq_tailp = NULL;
14267			}
14268			bp->av_forw = NULL;
14269			statp = kstat_waitq_to_runq;
14270			SD_TRACE(SD_LOG_IO_CORE, un,
14271			    "sd_start_cmds: processing waitq bp:0x%p\n", bp);
14272
14273		} else {
14274			/* No work to do so bail out now */
14275			SD_TRACE(SD_LOG_IO_CORE, un,
14276			    "sd_start_cmds: no more work, exiting!\n");
14277			goto exit;
14278		}
14279
14280		/*
14281		 * Reset the state to normal. This is the mechanism by which
14282		 * the state transitions from either SD_STATE_RWAIT or
14283		 * SD_STATE_OFFLINE to SD_STATE_NORMAL.
14284		 * If state is SD_STATE_PM_CHANGING then this command is
14285		 * part of the device power control and the state must
14286		 * not be put back to normal. Doing so would would
14287		 * allow new commands to proceed when they shouldn't,
14288		 * the device may be going off.
14289		 */
14290		if ((un->un_state != SD_STATE_SUSPENDED) &&
14291		    (un->un_state != SD_STATE_PM_CHANGING)) {
14292			New_state(un, SD_STATE_NORMAL);
14293		}
14294
14295		xp = SD_GET_XBUF(bp);
14296		ASSERT(xp != NULL);
14297
14298#if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14299		/*
14300		 * Allocate the scsi_pkt if we need one, or attach DMA
14301		 * resources if we have a scsi_pkt that needs them. The
14302		 * latter should only occur for commands that are being
14303		 * retried.
14304		 */
14305		if ((xp->xb_pktp == NULL) ||
14306		    ((xp->xb_pkt_flags & SD_XB_DMA_FREED) != 0)) {
14307#else
14308		if (xp->xb_pktp == NULL) {
14309#endif
14310			/*
14311			 * There is no scsi_pkt allocated for this buf. Call
14312			 * the initpkt function to allocate & init one.
14313			 *
14314			 * The scsi_init_pkt runout callback functionality is
14315			 * implemented as follows:
14316			 *
14317			 * 1) The initpkt function always calls
14318			 *    scsi_init_pkt(9F) with sdrunout specified as the
14319			 *    callback routine.
14320			 * 2) A successful packet allocation is initialized and
14321			 *    the I/O is transported.
14322			 * 3) The I/O associated with an allocation resource
14323			 *    failure is left on its queue to be retried via
14324			 *    runout or the next I/O.
14325			 * 4) The I/O associated with a DMA error is removed
14326			 *    from the queue and failed with EIO. Processing of
14327			 *    the transport queues is also halted to be
14328			 *    restarted via runout or the next I/O.
14329			 * 5) The I/O associated with a CDB size or packet
14330			 *    size error is removed from the queue and failed
14331			 *    with EIO. Processing of the transport queues is
14332			 *    continued.
14333			 *
14334			 * Note: there is no interface for canceling a runout
14335			 * callback. To prevent the driver from detaching or
14336			 * suspending while a runout is pending the driver
14337			 * state is set to SD_STATE_RWAIT
14338			 *
14339			 * Note: using the scsi_init_pkt callback facility can
14340			 * result in an I/O request persisting at the head of
14341			 * the list which cannot be satisfied even after
14342			 * multiple retries. In the future the driver may
14343			 * implement some kind of maximum runout count before
14344			 * failing an I/O.
14345			 *
14346			 * Note: the use of funcp below may seem superfluous,
14347			 * but it helps warlock figure out the correct
14348			 * initpkt function calls (see [s]sd.wlcmd).
14349			 */
14350			struct scsi_pkt	*pktp;
14351			int (*funcp)(struct buf *bp, struct scsi_pkt **pktp);
14352
14353			ASSERT(bp != un->un_rqs_bp);
14354
14355			funcp = sd_initpkt_map[xp->xb_chain_iostart];
14356			switch ((*funcp)(bp, &pktp)) {
14357			case  SD_PKT_ALLOC_SUCCESS:
14358				xp->xb_pktp = pktp;
14359				SD_TRACE(SD_LOG_IO_CORE, un,
14360				    "sd_start_cmd: SD_PKT_ALLOC_SUCCESS 0x%p\n",
14361				    pktp);
14362				goto got_pkt;
14363
14364			case SD_PKT_ALLOC_FAILURE:
14365				/*
14366				 * Temporary (hopefully) resource depletion.
14367				 * Since retries and RQS commands always have a
14368				 * scsi_pkt allocated, these cases should never
14369				 * get here. So the only cases this needs to
14370				 * handle is a bp from the waitq (which we put
14371				 * back onto the waitq for sdrunout), or a bp
14372				 * sent as an immed_bp (which we just fail).
14373				 */
14374				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14375				    "sd_start_cmds: SD_PKT_ALLOC_FAILURE\n");
14376
14377#if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14378
14379				if (bp == immed_bp) {
14380					/*
14381					 * If SD_XB_DMA_FREED is clear, then
14382					 * this is a failure to allocate a
14383					 * scsi_pkt, and we must fail the
14384					 * command.
14385					 */
14386					if ((xp->xb_pkt_flags &
14387					    SD_XB_DMA_FREED) == 0) {
14388						break;
14389					}
14390
14391					/*
14392					 * If this immediate command is NOT our
14393					 * un_retry_bp, then we must fail it.
14394					 */
14395					if (bp != un->un_retry_bp) {
14396						break;
14397					}
14398
14399					/*
14400					 * We get here if this cmd is our
14401					 * un_retry_bp that was DMAFREED, but
14402					 * scsi_init_pkt() failed to reallocate
14403					 * DMA resources when we attempted to
14404					 * retry it. This can happen when an
14405					 * mpxio failover is in progress, but
14406					 * we don't want to just fail the
14407					 * command in this case.
14408					 *
14409					 * Use timeout(9F) to restart it after
14410					 * a 100ms delay.  We don't want to
14411					 * let sdrunout() restart it, because
14412					 * sdrunout() is just supposed to start
14413					 * commands that are sitting on the
14414					 * wait queue.  The un_retry_bp stays
14415					 * set until the command completes, but
14416					 * sdrunout can be called many times
14417					 * before that happens.  Since sdrunout
14418					 * cannot tell if the un_retry_bp is
14419					 * already in the transport, it could
14420					 * end up calling scsi_transport() for
14421					 * the un_retry_bp multiple times.
14422					 *
14423					 * Also: don't schedule the callback
14424					 * if some other callback is already
14425					 * pending.
14426					 */
14427					if (un->un_retry_statp == NULL) {
14428						/*
14429						 * restore the kstat pointer to
14430						 * keep kstat counts coherent
14431						 * when we do retry the command.
14432						 */
14433						un->un_retry_statp =
14434						    saved_statp;
14435					}
14436
14437					if ((un->un_startstop_timeid == NULL) &&
14438					    (un->un_retry_timeid == NULL) &&
14439					    (un->un_direct_priority_timeid ==
14440					    NULL)) {
14441
14442						un->un_retry_timeid =
14443						    timeout(
14444						    sd_start_retry_command,
14445						    un, SD_RESTART_TIMEOUT);
14446					}
14447					goto exit;
14448				}
14449
14450#else
14451				if (bp == immed_bp) {
14452					break;	/* Just fail the command */
14453				}
14454#endif
14455
14456				/* Add the buf back to the head of the waitq */
14457				bp->av_forw = un->un_waitq_headp;
14458				un->un_waitq_headp = bp;
14459				if (un->un_waitq_tailp == NULL) {
14460					un->un_waitq_tailp = bp;
14461				}
14462				goto exit;
14463
14464			case SD_PKT_ALLOC_FAILURE_NO_DMA:
14465				/*
14466				 * HBA DMA resource failure. Fail the command
14467				 * and continue processing of the queues.
14468				 */
14469				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14470				    "sd_start_cmds: "
14471				    "SD_PKT_ALLOC_FAILURE_NO_DMA\n");
14472				break;
14473
14474			case SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL:
14475				/*
14476				 * Note:x86: Partial DMA mapping not supported
14477				 * for USCSI commands, and all the needed DMA
14478				 * resources were not allocated.
14479				 */
14480				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14481				    "sd_start_cmds: "
14482				    "SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL\n");
14483				break;
14484
14485			case SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL:
14486				/*
14487				 * Note:x86: Request cannot fit into CDB based
14488				 * on lba and len.
14489				 */
14490				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14491				    "sd_start_cmds: "
14492				    "SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL\n");
14493				break;
14494
14495			default:
14496				/* Should NEVER get here! */
14497				panic("scsi_initpkt error");
14498				/*NOTREACHED*/
14499			}
14500
14501			/*
14502			 * Fatal error in allocating a scsi_pkt for this buf.
14503			 * Update kstats & return the buf with an error code.
14504			 * We must use sd_return_failed_command_no_restart() to
14505			 * avoid a recursive call back into sd_start_cmds().
14506			 * However this also means that we must keep processing
14507			 * the waitq here in order to avoid stalling.
14508			 */
14509			if (statp == kstat_waitq_to_runq) {
14510				SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
14511			}
14512			sd_return_failed_command_no_restart(un, bp, EIO);
14513			if (bp == immed_bp) {
14514				/* immed_bp is gone by now, so clear this */
14515				immed_bp = NULL;
14516			}
14517			continue;
14518		}
14519got_pkt:
14520		if (bp == immed_bp) {
14521			/* goto the head of the class.... */
14522			xp->xb_pktp->pkt_flags |= FLAG_HEAD;
14523		}
14524
14525		un->un_ncmds_in_transport++;
14526		SD_UPDATE_KSTATS(un, statp, bp);
14527
14528		/*
14529		 * Call scsi_transport() to send the command to the target.
14530		 * According to SCSA architecture, we must drop the mutex here
14531		 * before calling scsi_transport() in order to avoid deadlock.
14532		 * Note that the scsi_pkt's completion routine can be executed
14533		 * (from interrupt context) even before the call to
14534		 * scsi_transport() returns.
14535		 */
14536		SD_TRACE(SD_LOG_IO_CORE, un,
14537		    "sd_start_cmds: calling scsi_transport()\n");
14538		DTRACE_PROBE1(scsi__transport__dispatch, struct buf *, bp);
14539
14540		mutex_exit(SD_MUTEX(un));
14541		rval = scsi_transport(xp->xb_pktp);
14542		mutex_enter(SD_MUTEX(un));
14543
14544		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14545		    "sd_start_cmds: scsi_transport() returned %d\n", rval);
14546
14547		switch (rval) {
14548		case TRAN_ACCEPT:
14549			/* Clear this with every pkt accepted by the HBA */
14550			un->un_tran_fatal_count = 0;
14551			break;	/* Success; try the next cmd (if any) */
14552
14553		case TRAN_BUSY:
14554			un->un_ncmds_in_transport--;
14555			ASSERT(un->un_ncmds_in_transport >= 0);
14556
14557			/*
14558			 * Don't retry request sense, the sense data
14559			 * is lost when another request is sent.
14560			 * Free up the rqs buf and retry
14561			 * the original failed cmd.  Update kstat.
14562			 */
14563			if (bp == un->un_rqs_bp) {
14564				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14565				bp = sd_mark_rqs_idle(un, xp);
14566				sd_retry_command(un, bp, SD_RETRIES_STANDARD,
14567				    NULL, NULL, EIO, un->un_busy_timeout / 500,
14568				    kstat_waitq_enter);
14569				goto exit;
14570			}
14571
14572#if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14573			/*
14574			 * Free the DMA resources for the  scsi_pkt. This will
14575			 * allow mpxio to select another path the next time
14576			 * we call scsi_transport() with this scsi_pkt.
14577			 * See sdintr() for the rationalization behind this.
14578			 */
14579			if ((un->un_f_is_fibre == TRUE) &&
14580			    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
14581			    ((xp->xb_pktp->pkt_flags & FLAG_SENSING) == 0)) {
14582				scsi_dmafree(xp->xb_pktp);
14583				xp->xb_pkt_flags |= SD_XB_DMA_FREED;
14584			}
14585#endif
14586
14587			if (SD_IS_DIRECT_PRIORITY(SD_GET_XBUF(bp))) {
14588				/*
14589				 * Commands that are SD_PATH_DIRECT_PRIORITY
14590				 * are for error recovery situations. These do
14591				 * not use the normal command waitq, so if they
14592				 * get a TRAN_BUSY we cannot put them back onto
14593				 * the waitq for later retry. One possible
14594				 * problem is that there could already be some
14595				 * other command on un_retry_bp that is waiting
14596				 * for this one to complete, so we would be
14597				 * deadlocked if we put this command back onto
14598				 * the waitq for later retry (since un_retry_bp
14599				 * must complete before the driver gets back to
14600				 * commands on the waitq).
14601				 *
14602				 * To avoid deadlock we must schedule a callback
14603				 * that will restart this command after a set
14604				 * interval.  This should keep retrying for as
14605				 * long as the underlying transport keeps
14606				 * returning TRAN_BUSY (just like for other
14607				 * commands).  Use the same timeout interval as
14608				 * for the ordinary TRAN_BUSY retry.
14609				 */
14610				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14611				    "sd_start_cmds: scsi_transport() returned "
14612				    "TRAN_BUSY for DIRECT_PRIORITY cmd!\n");
14613
14614				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14615				un->un_direct_priority_timeid =
14616				    timeout(sd_start_direct_priority_command,
14617				    bp, un->un_busy_timeout / 500);
14618
14619				goto exit;
14620			}
14621
14622			/*
14623			 * For TRAN_BUSY, we want to reduce the throttle value,
14624			 * unless we are retrying a command.
14625			 */
14626			if (bp != un->un_retry_bp) {
14627				sd_reduce_throttle(un, SD_THROTTLE_TRAN_BUSY);
14628			}
14629
14630			/*
14631			 * Set up the bp to be tried again 10 ms later.
14632			 * Note:x86: Is there a timeout value in the sd_lun
14633			 * for this condition?
14634			 */
14635			sd_set_retry_bp(un, bp, un->un_busy_timeout / 500,
14636			    kstat_runq_back_to_waitq);
14637			goto exit;
14638
14639		case TRAN_FATAL_ERROR:
14640			un->un_tran_fatal_count++;
14641			/* FALLTHRU */
14642
14643		case TRAN_BADPKT:
14644		default:
14645			un->un_ncmds_in_transport--;
14646			ASSERT(un->un_ncmds_in_transport >= 0);
14647
14648			/*
14649			 * If this is our REQUEST SENSE command with a
14650			 * transport error, we must get back the pointers
14651			 * to the original buf, and mark the REQUEST
14652			 * SENSE command as "available".
14653			 */
14654			if (bp == un->un_rqs_bp) {
14655				bp = sd_mark_rqs_idle(un, xp);
14656				xp = SD_GET_XBUF(bp);
14657			} else {
14658				/*
14659				 * Legacy behavior: do not update transport
14660				 * error count for request sense commands.
14661				 */
14662				SD_UPDATE_ERRSTATS(un, sd_transerrs);
14663			}
14664
14665			SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14666			sd_print_transport_rejected_message(un, xp, rval);
14667
14668			/*
14669			 * This command will be terminated by SD driver due
14670			 * to a fatal transport error. We should post
14671			 * ereport.io.scsi.cmd.disk.tran with driver-assessment
14672			 * of "fail" for any command to indicate this
14673			 * situation.
14674			 */
14675			if (xp->xb_ena > 0) {
14676				ASSERT(un->un_fm_private != NULL);
14677				sfip = un->un_fm_private;
14678				sfip->fm_ssc.ssc_flags |= SSC_FLAGS_TRAN_ABORT;
14679				sd_ssc_extract_info(&sfip->fm_ssc, un,
14680				    xp->xb_pktp, bp, xp);
14681				sd_ssc_post(&sfip->fm_ssc, SD_FM_DRV_FATAL);
14682			}
14683
14684			/*
14685			 * We must use sd_return_failed_command_no_restart() to
14686			 * avoid a recursive call back into sd_start_cmds().
14687			 * However this also means that we must keep processing
14688			 * the waitq here in order to avoid stalling.
14689			 */
14690			sd_return_failed_command_no_restart(un, bp, EIO);
14691
14692			/*
14693			 * Notify any threads waiting in sd_ddi_suspend() that
14694			 * a command completion has occurred.
14695			 */
14696			if (un->un_state == SD_STATE_SUSPENDED) {
14697				cv_broadcast(&un->un_disk_busy_cv);
14698			}
14699
14700			if (bp == immed_bp) {
14701				/* immed_bp is gone by now, so clear this */
14702				immed_bp = NULL;
14703			}
14704			break;
14705		}
14706
14707	} while (immed_bp == NULL);
14708
14709exit:
14710	ASSERT(mutex_owned(SD_MUTEX(un)));
14711	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: exit\n");
14712}
14713
14714
14715/*
14716 *    Function: sd_return_command
14717 *
14718 * Description: Returns a command to its originator (with or without an
14719 *		error).  Also starts commands waiting to be transported
14720 *		to the target.
14721 *
14722 *     Context: May be called from interrupt, kernel, or timeout context
14723 */
14724
14725static void
14726sd_return_command(struct sd_lun *un, struct buf *bp)
14727{
14728	struct sd_xbuf *xp;
14729	struct scsi_pkt *pktp;
14730	struct sd_fm_internal *sfip;
14731
14732	ASSERT(bp != NULL);
14733	ASSERT(un != NULL);
14734	ASSERT(mutex_owned(SD_MUTEX(un)));
14735	ASSERT(bp != un->un_rqs_bp);
14736	xp = SD_GET_XBUF(bp);
14737	ASSERT(xp != NULL);
14738
14739	pktp = SD_GET_PKTP(bp);
14740	sfip = (struct sd_fm_internal *)un->un_fm_private;
14741	ASSERT(sfip != NULL);
14742
14743	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: entry\n");
14744
14745	/*
14746	 * Note: check for the "sdrestart failed" case.
14747	 */
14748	if ((un->un_partial_dma_supported == 1) &&
14749	    ((xp->xb_pkt_flags & SD_XB_USCSICMD) != SD_XB_USCSICMD) &&
14750	    (geterror(bp) == 0) && (xp->xb_dma_resid != 0) &&
14751	    (xp->xb_pktp->pkt_resid == 0)) {
14752
14753		if (sd_setup_next_xfer(un, bp, pktp, xp) != 0) {
14754			/*
14755			 * Successfully set up next portion of cmd
14756			 * transfer, try sending it
14757			 */
14758			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
14759			    NULL, NULL, 0, (clock_t)0, NULL);
14760			sd_start_cmds(un, NULL);
14761			return;	/* Note:x86: need a return here? */
14762		}
14763	}
14764
14765	/*
14766	 * If this is the failfast bp, clear it from un_failfast_bp. This
14767	 * can happen if upon being re-tried the failfast bp either
14768	 * succeeded or encountered another error (possibly even a different
14769	 * error than the one that precipitated the failfast state, but in
14770	 * that case it would have had to exhaust retries as well). Regardless,
14771	 * this should not occur whenever the instance is in the active
14772	 * failfast state.
14773	 */
14774	if (bp == un->un_failfast_bp) {
14775		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
14776		un->un_failfast_bp = NULL;
14777	}
14778
14779	/*
14780	 * Clear the failfast state upon successful completion of ANY cmd.
14781	 */
14782	if (bp->b_error == 0) {
14783		un->un_failfast_state = SD_FAILFAST_INACTIVE;
14784		/*
14785		 * If this is a successful command, but used to be retried,
14786		 * we will take it as a recovered command and post an
14787		 * ereport with driver-assessment of "recovered".
14788		 */
14789		if (xp->xb_ena > 0) {
14790			sd_ssc_extract_info(&sfip->fm_ssc, un, pktp, bp, xp);
14791			sd_ssc_post(&sfip->fm_ssc, SD_FM_DRV_RECOVERY);
14792		}
14793	} else {
14794		/*
14795		 * If this is a failed non-USCSI command we will post an
14796		 * ereport with driver-assessment set accordingly("fail" or
14797		 * "fatal").
14798		 */
14799		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
14800			sd_ssc_extract_info(&sfip->fm_ssc, un, pktp, bp, xp);
14801			sd_ssc_post(&sfip->fm_ssc, SD_FM_DRV_FATAL);
14802		}
14803	}
14804
14805	/*
14806	 * This is used if the command was retried one or more times. Show that
14807	 * we are done with it, and allow processing of the waitq to resume.
14808	 */
14809	if (bp == un->un_retry_bp) {
14810		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14811		    "sd_return_command: un:0x%p: "
14812		    "RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
14813		un->un_retry_bp = NULL;
14814		un->un_retry_statp = NULL;
14815	}
14816
14817	SD_UPDATE_RDWR_STATS(un, bp);
14818	SD_UPDATE_PARTITION_STATS(un, bp);
14819
14820	switch (un->un_state) {
14821	case SD_STATE_SUSPENDED:
14822		/*
14823		 * Notify any threads waiting in sd_ddi_suspend() that
14824		 * a command completion has occurred.
14825		 */
14826		cv_broadcast(&un->un_disk_busy_cv);
14827		break;
14828	default:
14829		sd_start_cmds(un, NULL);
14830		break;
14831	}
14832
14833	/* Return this command up the iodone chain to its originator. */
14834	mutex_exit(SD_MUTEX(un));
14835
14836	(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
14837	xp->xb_pktp = NULL;
14838
14839	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
14840
14841	ASSERT(!mutex_owned(SD_MUTEX(un)));
14842	mutex_enter(SD_MUTEX(un));
14843
14844	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: exit\n");
14845}
14846
14847
14848/*
14849 *    Function: sd_return_failed_command
14850 *
14851 * Description: Command completion when an error occurred.
14852 *
14853 *     Context: May be called from interrupt context
14854 */
14855
14856static void
14857sd_return_failed_command(struct sd_lun *un, struct buf *bp, int errcode)
14858{
14859	ASSERT(bp != NULL);
14860	ASSERT(un != NULL);
14861	ASSERT(mutex_owned(SD_MUTEX(un)));
14862
14863	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14864	    "sd_return_failed_command: entry\n");
14865
14866	/*
14867	 * b_resid could already be nonzero due to a partial data
14868	 * transfer, so do not change it here.
14869	 */
14870	SD_BIOERROR(bp, errcode);
14871
14872	sd_return_command(un, bp);
14873	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14874	    "sd_return_failed_command: exit\n");
14875}
14876
14877
14878/*
14879 *    Function: sd_return_failed_command_no_restart
14880 *
14881 * Description: Same as sd_return_failed_command, but ensures that no
14882 *		call back into sd_start_cmds will be issued.
14883 *
14884 *     Context: May be called from interrupt context
14885 */
14886
14887static void
14888sd_return_failed_command_no_restart(struct sd_lun *un, struct buf *bp,
14889	int errcode)
14890{
14891	struct sd_xbuf *xp;
14892
14893	ASSERT(bp != NULL);
14894	ASSERT(un != NULL);
14895	ASSERT(mutex_owned(SD_MUTEX(un)));
14896	xp = SD_GET_XBUF(bp);
14897	ASSERT(xp != NULL);
14898	ASSERT(errcode != 0);
14899
14900	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14901	    "sd_return_failed_command_no_restart: entry\n");
14902
14903	/*
14904	 * b_resid could already be nonzero due to a partial data
14905	 * transfer, so do not change it here.
14906	 */
14907	SD_BIOERROR(bp, errcode);
14908
14909	/*
14910	 * If this is the failfast bp, clear it. This can happen if the
14911	 * failfast bp encounterd a fatal error when we attempted to
14912	 * re-try it (such as a scsi_transport(9F) failure).  However
14913	 * we should NOT be in an active failfast state if the failfast
14914	 * bp is not NULL.
14915	 */
14916	if (bp == un->un_failfast_bp) {
14917		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
14918		un->un_failfast_bp = NULL;
14919	}
14920
14921	if (bp == un->un_retry_bp) {
14922		/*
14923		 * This command was retried one or more times. Show that we are
14924		 * done with it, and allow processing of the waitq to resume.
14925		 */
14926		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14927		    "sd_return_failed_command_no_restart: "
14928		    " un:0x%p: RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
14929		un->un_retry_bp = NULL;
14930		un->un_retry_statp = NULL;
14931	}
14932
14933	SD_UPDATE_RDWR_STATS(un, bp);
14934	SD_UPDATE_PARTITION_STATS(un, bp);
14935
14936	mutex_exit(SD_MUTEX(un));
14937
14938	if (xp->xb_pktp != NULL) {
14939		(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
14940		xp->xb_pktp = NULL;
14941	}
14942
14943	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
14944
14945	mutex_enter(SD_MUTEX(un));
14946
14947	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14948	    "sd_return_failed_command_no_restart: exit\n");
14949}
14950
14951
14952/*
14953 *    Function: sd_retry_command
14954 *
14955 * Description: queue up a command for retry, or (optionally) fail it
14956 *		if retry counts are exhausted.
14957 *
14958 *   Arguments: un - Pointer to the sd_lun struct for the target.
14959 *
14960 *		bp - Pointer to the buf for the command to be retried.
14961 *
14962 *		retry_check_flag - Flag to see which (if any) of the retry
14963 *		   counts should be decremented/checked. If the indicated
14964 *		   retry count is exhausted, then the command will not be
14965 *		   retried; it will be failed instead. This should use a
14966 *		   value equal to one of the following:
14967 *
14968 *			SD_RETRIES_NOCHECK
14969 *			SD_RESD_RETRIES_STANDARD
14970 *			SD_RETRIES_VICTIM
14971 *
14972 *		   Optionally may be bitwise-OR'ed with SD_RETRIES_ISOLATE
14973 *		   if the check should be made to see of FLAG_ISOLATE is set
14974 *		   in the pkt. If FLAG_ISOLATE is set, then the command is
14975 *		   not retried, it is simply failed.
14976 *
14977 *		user_funcp - Ptr to function to call before dispatching the
14978 *		   command. May be NULL if no action needs to be performed.
14979 *		   (Primarily intended for printing messages.)
14980 *
14981 *		user_arg - Optional argument to be passed along to
14982 *		   the user_funcp call.
14983 *
14984 *		failure_code - errno return code to set in the bp if the
14985 *		   command is going to be failed.
14986 *
14987 *		retry_delay - Retry delay interval in (clock_t) units. May
14988 *		   be zero which indicates that the retry should be retried
14989 *		   immediately (ie, without an intervening delay).
14990 *
14991 *		statp - Ptr to kstat function to be updated if the command
14992 *		   is queued for a delayed retry. May be NULL if no kstat
14993 *		   update is desired.
14994 *
14995 *     Context: May be called from interrupt context.
14996 */
14997
14998static void
14999sd_retry_command(struct sd_lun *un, struct buf *bp, int retry_check_flag,
15000	void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp, int
15001	code), void *user_arg, int failure_code,  clock_t retry_delay,
15002	void (*statp)(kstat_io_t *))
15003{
15004	struct sd_xbuf	*xp;
15005	struct scsi_pkt	*pktp;
15006	struct sd_fm_internal *sfip;
15007
15008	ASSERT(un != NULL);
15009	ASSERT(mutex_owned(SD_MUTEX(un)));
15010	ASSERT(bp != NULL);
15011	xp = SD_GET_XBUF(bp);
15012	ASSERT(xp != NULL);
15013	pktp = SD_GET_PKTP(bp);
15014	ASSERT(pktp != NULL);
15015
15016	sfip = (struct sd_fm_internal *)un->un_fm_private;
15017	ASSERT(sfip != NULL);
15018
15019	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
15020	    "sd_retry_command: entry: bp:0x%p xp:0x%p\n", bp, xp);
15021
15022	/*
15023	 * If we are syncing or dumping, fail the command to avoid
15024	 * recursively calling back into scsi_transport().
15025	 */
15026	if (ddi_in_panic()) {
15027		goto fail_command_no_log;
15028	}
15029
15030	/*
15031	 * We should never be be retrying a command with FLAG_DIAGNOSE set, so
15032	 * log an error and fail the command.
15033	 */
15034	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
15035		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
15036		    "ERROR, retrying FLAG_DIAGNOSE command.\n");
15037		sd_dump_memory(un, SD_LOG_IO, "CDB",
15038		    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
15039		sd_dump_memory(un, SD_LOG_IO, "Sense Data",
15040		    (uchar_t *)xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
15041		goto fail_command;
15042	}
15043
15044	/*
15045	 * If we are suspended, then put the command onto head of the
15046	 * wait queue since we don't want to start more commands, and
15047	 * clear the un_retry_bp. Next time when we are resumed, will
15048	 * handle the command in the wait queue.
15049	 */
15050	switch (un->un_state) {
15051	case SD_STATE_SUSPENDED:
15052	case SD_STATE_DUMPING:
15053		bp->av_forw = un->un_waitq_headp;
15054		un->un_waitq_headp = bp;
15055		if (un->un_waitq_tailp == NULL) {
15056			un->un_waitq_tailp = bp;
15057		}
15058		if (bp == un->un_retry_bp) {
15059			un->un_retry_bp = NULL;
15060			un->un_retry_statp = NULL;
15061		}
15062		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
15063		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: "
15064		    "exiting; cmd bp:0x%p requeued for SUSPEND/DUMP\n", bp);
15065		return;
15066	default:
15067		break;
15068	}
15069
15070	/*
15071	 * If the caller wants us to check FLAG_ISOLATE, then see if that
15072	 * is set; if it is then we do not want to retry the command.
15073	 * Normally, FLAG_ISOLATE is only used with USCSI cmds.
15074	 */
15075	if ((retry_check_flag & SD_RETRIES_ISOLATE) != 0) {
15076		if ((pktp->pkt_flags & FLAG_ISOLATE) != 0) {
15077			goto fail_command;
15078		}
15079	}
15080
15081
15082	/*
15083	 * If SD_RETRIES_FAILFAST is set, it indicates that either a
15084	 * command timeout or a selection timeout has occurred. This means
15085	 * that we were unable to establish an kind of communication with
15086	 * the target, and subsequent retries and/or commands are likely
15087	 * to encounter similar results and take a long time to complete.
15088	 *
15089	 * If this is a failfast error condition, we need to update the
15090	 * failfast state, even if this bp does not have B_FAILFAST set.
15091	 */
15092	if (retry_check_flag & SD_RETRIES_FAILFAST) {
15093		if (un->un_failfast_state == SD_FAILFAST_ACTIVE) {
15094			ASSERT(un->un_failfast_bp == NULL);
15095			/*
15096			 * If we are already in the active failfast state, and
15097			 * another failfast error condition has been detected,
15098			 * then fail this command if it has B_FAILFAST set.
15099			 * If B_FAILFAST is clear, then maintain the legacy
15100			 * behavior of retrying heroically, even tho this will
15101			 * take a lot more time to fail the command.
15102			 */
15103			if (bp->b_flags & B_FAILFAST) {
15104				goto fail_command;
15105			}
15106		} else {
15107			/*
15108			 * We're not in the active failfast state, but we
15109			 * have a failfast error condition, so we must begin
15110			 * transition to the next state. We do this regardless
15111			 * of whether or not this bp has B_FAILFAST set.
15112			 */
15113			if (un->un_failfast_bp == NULL) {
15114				/*
15115				 * This is the first bp to meet a failfast
15116				 * condition so save it on un_failfast_bp &
15117				 * do normal retry processing. Do not enter
15118				 * active failfast state yet. This marks
15119				 * entry into the "failfast pending" state.
15120				 */
15121				un->un_failfast_bp = bp;
15122
15123			} else if (un->un_failfast_bp == bp) {
15124				/*
15125				 * This is the second time *this* bp has
15126				 * encountered a failfast error condition,
15127				 * so enter active failfast state & flush
15128				 * queues as appropriate.
15129				 */
15130				un->un_failfast_state = SD_FAILFAST_ACTIVE;
15131				un->un_failfast_bp = NULL;
15132				sd_failfast_flushq(un);
15133
15134				/*
15135				 * Fail this bp now if B_FAILFAST set;
15136				 * otherwise continue with retries. (It would
15137				 * be pretty ironic if this bp succeeded on a
15138				 * subsequent retry after we just flushed all
15139				 * the queues).
15140				 */
15141				if (bp->b_flags & B_FAILFAST) {
15142					goto fail_command;
15143				}
15144
15145#if !defined(lint) && !defined(__lint)
15146			} else {
15147				/*
15148				 * If neither of the preceeding conditionals
15149				 * was true, it means that there is some
15150				 * *other* bp that has met an inital failfast
15151				 * condition and is currently either being
15152				 * retried or is waiting to be retried. In
15153				 * that case we should perform normal retry
15154				 * processing on *this* bp, since there is a
15155				 * chance that the current failfast condition
15156				 * is transient and recoverable. If that does
15157				 * not turn out to be the case, then retries
15158				 * will be cleared when the wait queue is
15159				 * flushed anyway.
15160				 */
15161#endif
15162			}
15163		}
15164	} else {
15165		/*
15166		 * SD_RETRIES_FAILFAST is clear, which indicates that we
15167		 * likely were able to at least establish some level of
15168		 * communication with the target and subsequent commands
15169		 * and/or retries are likely to get through to the target,
15170		 * In this case we want to be aggressive about clearing
15171		 * the failfast state. Note that this does not affect
15172		 * the "failfast pending" condition.
15173		 */
15174		un->un_failfast_state = SD_FAILFAST_INACTIVE;
15175	}
15176
15177
15178	/*
15179	 * Check the specified retry count to see if we can still do
15180	 * any retries with this pkt before we should fail it.
15181	 */
15182	switch (retry_check_flag & SD_RETRIES_MASK) {
15183	case SD_RETRIES_VICTIM:
15184		/*
15185		 * Check the victim retry count. If exhausted, then fall
15186		 * thru & check against the standard retry count.
15187		 */
15188		if (xp->xb_victim_retry_count < un->un_victim_retry_count) {
15189			/* Increment count & proceed with the retry */
15190			xp->xb_victim_retry_count++;
15191			break;
15192		}
15193		/* Victim retries exhausted, fall back to std. retries... */
15194		/* FALLTHRU */
15195
15196	case SD_RETRIES_STANDARD:
15197		if (xp->xb_retry_count >= un->un_retry_count) {
15198			/* Retries exhausted, fail the command */
15199			SD_TRACE(SD_LOG_IO_CORE, un,
15200			    "sd_retry_command: retries exhausted!\n");
15201			/*
15202			 * update b_resid for failed SCMD_READ & SCMD_WRITE
15203			 * commands with nonzero pkt_resid.
15204			 */
15205			if ((pktp->pkt_reason == CMD_CMPLT) &&
15206			    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD) &&
15207			    (pktp->pkt_resid != 0)) {
15208				uchar_t op = SD_GET_PKT_OPCODE(pktp) & 0x1F;
15209				if ((op == SCMD_READ) || (op == SCMD_WRITE)) {
15210					SD_UPDATE_B_RESID(bp, pktp);
15211				}
15212			}
15213			goto fail_command;
15214		}
15215		xp->xb_retry_count++;
15216		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15217		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
15218		break;
15219
15220	case SD_RETRIES_UA:
15221		if (xp->xb_ua_retry_count >= sd_ua_retry_count) {
15222			/* Retries exhausted, fail the command */
15223			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15224			    "Unit Attention retries exhausted. "
15225			    "Check the target.\n");
15226			goto fail_command;
15227		}
15228		xp->xb_ua_retry_count++;
15229		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15230		    "sd_retry_command: retry count:%d\n",
15231		    xp->xb_ua_retry_count);
15232		break;
15233
15234	case SD_RETRIES_BUSY:
15235		if (xp->xb_retry_count >= un->un_busy_retry_count) {
15236			/* Retries exhausted, fail the command */
15237			SD_TRACE(SD_LOG_IO_CORE, un,
15238			    "sd_retry_command: retries exhausted!\n");
15239			goto fail_command;
15240		}
15241		xp->xb_retry_count++;
15242		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15243		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
15244		break;
15245
15246	case SD_RETRIES_NOCHECK:
15247	default:
15248		/* No retry count to check. Just proceed with the retry */
15249		break;
15250	}
15251
15252	xp->xb_pktp->pkt_flags |= FLAG_HEAD;
15253
15254	/*
15255	 * If this is a non-USCSI command being retried
15256	 * during execution last time, we should post an ereport with
15257	 * driver-assessment of the value "retry".
15258	 * For partial DMA, request sense and STATUS_QFULL, there are no
15259	 * hardware errors, we bypass ereport posting.
15260	 */
15261	if (failure_code != 0) {
15262		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
15263			sd_ssc_extract_info(&sfip->fm_ssc, un, pktp, bp, xp);
15264			sd_ssc_post(&sfip->fm_ssc, SD_FM_DRV_RETRY);
15265		}
15266	}
15267
15268	/*
15269	 * If we were given a zero timeout, we must attempt to retry the
15270	 * command immediately (ie, without a delay).
15271	 */
15272	if (retry_delay == 0) {
15273		/*
15274		 * Check some limiting conditions to see if we can actually
15275		 * do the immediate retry.  If we cannot, then we must
15276		 * fall back to queueing up a delayed retry.
15277		 */
15278		if (un->un_ncmds_in_transport >= un->un_throttle) {
15279			/*
15280			 * We are at the throttle limit for the target,
15281			 * fall back to delayed retry.
15282			 */
15283			retry_delay = un->un_busy_timeout;
15284			statp = kstat_waitq_enter;
15285			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15286			    "sd_retry_command: immed. retry hit "
15287			    "throttle!\n");
15288		} else {
15289			/*
15290			 * We're clear to proceed with the immediate retry.
15291			 * First call the user-provided function (if any)
15292			 */
15293			if (user_funcp != NULL) {
15294				(*user_funcp)(un, bp, user_arg,
15295				    SD_IMMEDIATE_RETRY_ISSUED);
15296#ifdef __lock_lint
15297				sd_print_incomplete_msg(un, bp, user_arg,
15298				    SD_IMMEDIATE_RETRY_ISSUED);
15299				sd_print_cmd_incomplete_msg(un, bp, user_arg,
15300				    SD_IMMEDIATE_RETRY_ISSUED);
15301				sd_print_sense_failed_msg(un, bp, user_arg,
15302				    SD_IMMEDIATE_RETRY_ISSUED);
15303#endif
15304			}
15305
15306			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15307			    "sd_retry_command: issuing immediate retry\n");
15308
15309			/*
15310			 * Call sd_start_cmds() to transport the command to
15311			 * the target.
15312			 */
15313			sd_start_cmds(un, bp);
15314
15315			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15316			    "sd_retry_command exit\n");
15317			return;
15318		}
15319	}
15320
15321	/*
15322	 * Set up to retry the command after a delay.
15323	 * First call the user-provided function (if any)
15324	 */
15325	if (user_funcp != NULL) {
15326		(*user_funcp)(un, bp, user_arg, SD_DELAYED_RETRY_ISSUED);
15327	}
15328
15329	sd_set_retry_bp(un, bp, retry_delay, statp);
15330
15331	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
15332	return;
15333
15334fail_command:
15335
15336	if (user_funcp != NULL) {
15337		(*user_funcp)(un, bp, user_arg, SD_NO_RETRY_ISSUED);
15338	}
15339
15340fail_command_no_log:
15341
15342	SD_INFO(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15343	    "sd_retry_command: returning failed command\n");
15344
15345	sd_return_failed_command(un, bp, failure_code);
15346
15347	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
15348}
15349
15350
15351/*
15352 *    Function: sd_set_retry_bp
15353 *
15354 * Description: Set up the given bp for retry.
15355 *
15356 *   Arguments: un - ptr to associated softstate
15357 *		bp - ptr to buf(9S) for the command
15358 *		retry_delay - time interval before issuing retry (may be 0)
15359 *		statp - optional pointer to kstat function
15360 *
15361 *     Context: May be called under interrupt context
15362 */
15363
15364static void
15365sd_set_retry_bp(struct sd_lun *un, struct buf *bp, clock_t retry_delay,
15366	void (*statp)(kstat_io_t *))
15367{
15368	ASSERT(un != NULL);
15369	ASSERT(mutex_owned(SD_MUTEX(un)));
15370	ASSERT(bp != NULL);
15371
15372	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
15373	    "sd_set_retry_bp: entry: un:0x%p bp:0x%p\n", un, bp);
15374
15375	/*
15376	 * Indicate that the command is being retried. This will not allow any
15377	 * other commands on the wait queue to be transported to the target
15378	 * until this command has been completed (success or failure). The
15379	 * "retry command" is not transported to the target until the given
15380	 * time delay expires, unless the user specified a 0 retry_delay.
15381	 *
15382	 * Note: the timeout(9F) callback routine is what actually calls
15383	 * sd_start_cmds() to transport the command, with the exception of a
15384	 * zero retry_delay. The only current implementor of a zero retry delay
15385	 * is the case where a START_STOP_UNIT is sent to spin-up a device.
15386	 */
15387	if (un->un_retry_bp == NULL) {
15388		ASSERT(un->un_retry_statp == NULL);
15389		un->un_retry_bp = bp;
15390
15391		/*
15392		 * If the user has not specified a delay the command should
15393		 * be queued and no timeout should be scheduled.
15394		 */
15395		if (retry_delay == 0) {
15396			/*
15397			 * Save the kstat pointer that will be used in the
15398			 * call to SD_UPDATE_KSTATS() below, so that
15399			 * sd_start_cmds() can correctly decrement the waitq
15400			 * count when it is time to transport this command.
15401			 */
15402			un->un_retry_statp = statp;
15403			goto done;
15404		}
15405	}
15406
15407	if (un->un_retry_bp == bp) {
15408		/*
15409		 * Save the kstat pointer that will be used in the call to
15410		 * SD_UPDATE_KSTATS() below, so that sd_start_cmds() can
15411		 * correctly decrement the waitq count when it is time to
15412		 * transport this command.
15413		 */
15414		un->un_retry_statp = statp;
15415
15416		/*
15417		 * Schedule a timeout if:
15418		 *   1) The user has specified a delay.
15419		 *   2) There is not a START_STOP_UNIT callback pending.
15420		 *
15421		 * If no delay has been specified, then it is up to the caller
15422		 * to ensure that IO processing continues without stalling.
15423		 * Effectively, this means that the caller will issue the
15424		 * required call to sd_start_cmds(). The START_STOP_UNIT
15425		 * callback does this after the START STOP UNIT command has
15426		 * completed. In either of these cases we should not schedule
15427		 * a timeout callback here.  Also don't schedule the timeout if
15428		 * an SD_PATH_DIRECT_PRIORITY command is waiting to restart.
15429		 */
15430		if ((retry_delay != 0) && (un->un_startstop_timeid == NULL) &&
15431		    (un->un_direct_priority_timeid == NULL)) {
15432			un->un_retry_timeid =
15433			    timeout(sd_start_retry_command, un, retry_delay);
15434			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15435			    "sd_set_retry_bp: setting timeout: un: 0x%p"
15436			    " bp:0x%p un_retry_timeid:0x%p\n",
15437			    un, bp, un->un_retry_timeid);
15438		}
15439	} else {
15440		/*
15441		 * We only get in here if there is already another command
15442		 * waiting to be retried.  In this case, we just put the
15443		 * given command onto the wait queue, so it can be transported
15444		 * after the current retry command has completed.
15445		 *
15446		 * Also we have to make sure that if the command at the head
15447		 * of the wait queue is the un_failfast_bp, that we do not
15448		 * put ahead of it any other commands that are to be retried.
15449		 */
15450		if ((un->un_failfast_bp != NULL) &&
15451		    (un->un_failfast_bp == un->un_waitq_headp)) {
15452			/*
15453			 * Enqueue this command AFTER the first command on
15454			 * the wait queue (which is also un_failfast_bp).
15455			 */
15456			bp->av_forw = un->un_waitq_headp->av_forw;
15457			un->un_waitq_headp->av_forw = bp;
15458			if (un->un_waitq_headp == un->un_waitq_tailp) {
15459				un->un_waitq_tailp = bp;
15460			}
15461		} else {
15462			/* Enqueue this command at the head of the waitq. */
15463			bp->av_forw = un->un_waitq_headp;
15464			un->un_waitq_headp = bp;
15465			if (un->un_waitq_tailp == NULL) {
15466				un->un_waitq_tailp = bp;
15467			}
15468		}
15469
15470		if (statp == NULL) {
15471			statp = kstat_waitq_enter;
15472		}
15473		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15474		    "sd_set_retry_bp: un:0x%p already delayed retry\n", un);
15475	}
15476
15477done:
15478	if (statp != NULL) {
15479		SD_UPDATE_KSTATS(un, statp, bp);
15480	}
15481
15482	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15483	    "sd_set_retry_bp: exit un:0x%p\n", un);
15484}
15485
15486
15487/*
15488 *    Function: sd_start_retry_command
15489 *
15490 * Description: Start the command that has been waiting on the target's
15491 *		retry queue.  Called from timeout(9F) context after the
15492 *		retry delay interval has expired.
15493 *
15494 *   Arguments: arg - pointer to associated softstate for the device.
15495 *
15496 *     Context: timeout(9F) thread context.  May not sleep.
15497 */
15498
15499static void
15500sd_start_retry_command(void *arg)
15501{
15502	struct sd_lun *un = arg;
15503
15504	ASSERT(un != NULL);
15505	ASSERT(!mutex_owned(SD_MUTEX(un)));
15506
15507	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15508	    "sd_start_retry_command: entry\n");
15509
15510	mutex_enter(SD_MUTEX(un));
15511
15512	un->un_retry_timeid = NULL;
15513
15514	if (un->un_retry_bp != NULL) {
15515		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15516		    "sd_start_retry_command: un:0x%p STARTING bp:0x%p\n",
15517		    un, un->un_retry_bp);
15518		sd_start_cmds(un, un->un_retry_bp);
15519	}
15520
15521	mutex_exit(SD_MUTEX(un));
15522
15523	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15524	    "sd_start_retry_command: exit\n");
15525}
15526
15527
15528/*
15529 *    Function: sd_start_direct_priority_command
15530 *
15531 * Description: Used to re-start an SD_PATH_DIRECT_PRIORITY command that had
15532 *		received TRAN_BUSY when we called scsi_transport() to send it
15533 *		to the underlying HBA. This function is called from timeout(9F)
15534 *		context after the delay interval has expired.
15535 *
15536 *   Arguments: arg - pointer to associated buf(9S) to be restarted.
15537 *
15538 *     Context: timeout(9F) thread context.  May not sleep.
15539 */
15540
15541static void
15542sd_start_direct_priority_command(void *arg)
15543{
15544	struct buf	*priority_bp = arg;
15545	struct sd_lun	*un;
15546
15547	ASSERT(priority_bp != NULL);
15548	un = SD_GET_UN(priority_bp);
15549	ASSERT(un != NULL);
15550	ASSERT(!mutex_owned(SD_MUTEX(un)));
15551
15552	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15553	    "sd_start_direct_priority_command: entry\n");
15554
15555	mutex_enter(SD_MUTEX(un));
15556	un->un_direct_priority_timeid = NULL;
15557	sd_start_cmds(un, priority_bp);
15558	mutex_exit(SD_MUTEX(un));
15559
15560	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15561	    "sd_start_direct_priority_command: exit\n");
15562}
15563
15564
15565/*
15566 *    Function: sd_send_request_sense_command
15567 *
15568 * Description: Sends a REQUEST SENSE command to the target
15569 *
15570 *     Context: May be called from interrupt context.
15571 */
15572
15573static void
15574sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
15575	struct scsi_pkt *pktp)
15576{
15577	ASSERT(bp != NULL);
15578	ASSERT(un != NULL);
15579	ASSERT(mutex_owned(SD_MUTEX(un)));
15580
15581	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_send_request_sense_command: "
15582	    "entry: buf:0x%p\n", bp);
15583
15584	/*
15585	 * If we are syncing or dumping, then fail the command to avoid a
15586	 * recursive callback into scsi_transport(). Also fail the command
15587	 * if we are suspended (legacy behavior).
15588	 */
15589	if (ddi_in_panic() || (un->un_state == SD_STATE_SUSPENDED) ||
15590	    (un->un_state == SD_STATE_DUMPING)) {
15591		sd_return_failed_command(un, bp, EIO);
15592		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15593		    "sd_send_request_sense_command: syncing/dumping, exit\n");
15594		return;
15595	}
15596
15597	/*
15598	 * Retry the failed command and don't issue the request sense if:
15599	 *    1) the sense buf is busy
15600	 *    2) we have 1 or more outstanding commands on the target
15601	 *    (the sense data will be cleared or invalidated any way)
15602	 *
15603	 * Note: There could be an issue with not checking a retry limit here,
15604	 * the problem is determining which retry limit to check.
15605	 */
15606	if ((un->un_sense_isbusy != 0) || (un->un_ncmds_in_transport > 0)) {
15607		/* Don't retry if the command is flagged as non-retryable */
15608		if ((pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
15609			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
15610			    NULL, NULL, 0, un->un_busy_timeout,
15611			    kstat_waitq_enter);
15612			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15613			    "sd_send_request_sense_command: "
15614			    "at full throttle, retrying exit\n");
15615		} else {
15616			sd_return_failed_command(un, bp, EIO);
15617			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15618			    "sd_send_request_sense_command: "
15619			    "at full throttle, non-retryable exit\n");
15620		}
15621		return;
15622	}
15623
15624	sd_mark_rqs_busy(un, bp);
15625	sd_start_cmds(un, un->un_rqs_bp);
15626
15627	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15628	    "sd_send_request_sense_command: exit\n");
15629}
15630
15631
15632/*
15633 *    Function: sd_mark_rqs_busy
15634 *
15635 * Description: Indicate that the request sense bp for this instance is
15636 *		in use.
15637 *
15638 *     Context: May be called under interrupt context
15639 */
15640
15641static void
15642sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp)
15643{
15644	struct sd_xbuf	*sense_xp;
15645
15646	ASSERT(un != NULL);
15647	ASSERT(bp != NULL);
15648	ASSERT(mutex_owned(SD_MUTEX(un)));
15649	ASSERT(un->un_sense_isbusy == 0);
15650
15651	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: entry: "
15652	    "buf:0x%p xp:0x%p un:0x%p\n", bp, SD_GET_XBUF(bp), un);
15653
15654	sense_xp = SD_GET_XBUF(un->un_rqs_bp);
15655	ASSERT(sense_xp != NULL);
15656
15657	SD_INFO(SD_LOG_IO, un,
15658	    "sd_mark_rqs_busy: entry: sense_xp:0x%p\n", sense_xp);
15659
15660	ASSERT(sense_xp->xb_pktp != NULL);
15661	ASSERT((sense_xp->xb_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD))
15662	    == (FLAG_SENSING | FLAG_HEAD));
15663
15664	un->un_sense_isbusy = 1;
15665	un->un_rqs_bp->b_resid = 0;
15666	sense_xp->xb_pktp->pkt_resid  = 0;
15667	sense_xp->xb_pktp->pkt_reason = 0;
15668
15669	/* So we can get back the bp at interrupt time! */
15670	sense_xp->xb_sense_bp = bp;
15671
15672	bzero(un->un_rqs_bp->b_un.b_addr, SENSE_LENGTH);
15673
15674	/*
15675	 * Mark this buf as awaiting sense data. (This is already set in
15676	 * the pkt_flags for the RQS packet.)
15677	 */
15678	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags |= FLAG_SENSING;
15679
15680	/* Request sense down same path */
15681	if (scsi_pkt_allocated_correctly((SD_GET_XBUF(bp))->xb_pktp) &&
15682	    ((SD_GET_XBUF(bp))->xb_pktp)->pkt_path_instance)
15683		sense_xp->xb_pktp->pkt_path_instance =
15684		    ((SD_GET_XBUF(bp))->xb_pktp)->pkt_path_instance;
15685
15686	sense_xp->xb_retry_count	= 0;
15687	sense_xp->xb_victim_retry_count = 0;
15688	sense_xp->xb_ua_retry_count	= 0;
15689	sense_xp->xb_nr_retry_count 	= 0;
15690	sense_xp->xb_dma_resid  = 0;
15691
15692	/* Clean up the fields for auto-request sense */
15693	sense_xp->xb_sense_status = 0;
15694	sense_xp->xb_sense_state  = 0;
15695	sense_xp->xb_sense_resid  = 0;
15696	bzero(sense_xp->xb_sense_data, sizeof (sense_xp->xb_sense_data));
15697
15698	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: exit\n");
15699}
15700
15701
15702/*
15703 *    Function: sd_mark_rqs_idle
15704 *
15705 * Description: SD_MUTEX must be held continuously through this routine
15706 *		to prevent reuse of the rqs struct before the caller can
15707 *		complete it's processing.
15708 *
15709 * Return Code: Pointer to the RQS buf
15710 *
15711 *     Context: May be called under interrupt context
15712 */
15713
15714static struct buf *
15715sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *sense_xp)
15716{
15717	struct buf *bp;
15718	ASSERT(un != NULL);
15719	ASSERT(sense_xp != NULL);
15720	ASSERT(mutex_owned(SD_MUTEX(un)));
15721	ASSERT(un->un_sense_isbusy != 0);
15722
15723	un->un_sense_isbusy = 0;
15724	bp = sense_xp->xb_sense_bp;
15725	sense_xp->xb_sense_bp = NULL;
15726
15727	/* This pkt is no longer interested in getting sense data */
15728	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags &= ~FLAG_SENSING;
15729
15730	return (bp);
15731}
15732
15733
15734
15735/*
15736 *    Function: sd_alloc_rqs
15737 *
15738 * Description: Set up the unit to receive auto request sense data
15739 *
15740 * Return Code: DDI_SUCCESS or DDI_FAILURE
15741 *
15742 *     Context: Called under attach(9E) context
15743 */
15744
15745static int
15746sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un)
15747{
15748	struct sd_xbuf *xp;
15749
15750	ASSERT(un != NULL);
15751	ASSERT(!mutex_owned(SD_MUTEX(un)));
15752	ASSERT(un->un_rqs_bp == NULL);
15753	ASSERT(un->un_rqs_pktp == NULL);
15754
15755	/*
15756	 * First allocate the required buf and scsi_pkt structs, then set up
15757	 * the CDB in the scsi_pkt for a REQUEST SENSE command.
15758	 */
15759	un->un_rqs_bp = scsi_alloc_consistent_buf(&devp->sd_address, NULL,
15760	    MAX_SENSE_LENGTH, B_READ, SLEEP_FUNC, NULL);
15761	if (un->un_rqs_bp == NULL) {
15762		return (DDI_FAILURE);
15763	}
15764
15765	un->un_rqs_pktp = scsi_init_pkt(&devp->sd_address, NULL, un->un_rqs_bp,
15766	    CDB_GROUP0, 1, 0, PKT_CONSISTENT, SLEEP_FUNC, NULL);
15767
15768	if (un->un_rqs_pktp == NULL) {
15769		sd_free_rqs(un);
15770		return (DDI_FAILURE);
15771	}
15772
15773	/* Set up the CDB in the scsi_pkt for a REQUEST SENSE command. */
15774	(void) scsi_setup_cdb((union scsi_cdb *)un->un_rqs_pktp->pkt_cdbp,
15775	    SCMD_REQUEST_SENSE, 0, MAX_SENSE_LENGTH, 0);
15776
15777	SD_FILL_SCSI1_LUN(un, un->un_rqs_pktp);
15778
15779	/* Set up the other needed members in the ARQ scsi_pkt. */
15780	un->un_rqs_pktp->pkt_comp   = sdintr;
15781	un->un_rqs_pktp->pkt_time   = sd_io_time;
15782	un->un_rqs_pktp->pkt_flags |=
15783	    (FLAG_SENSING | FLAG_HEAD);	/* (1222170) */
15784
15785	/*
15786	 * Allocate  & init the sd_xbuf struct for the RQS command. Do not
15787	 * provide any intpkt, destroypkt routines as we take care of
15788	 * scsi_pkt allocation/freeing here and in sd_free_rqs().
15789	 */
15790	xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
15791	sd_xbuf_init(un, un->un_rqs_bp, xp, SD_CHAIN_NULL, NULL);
15792	xp->xb_pktp = un->un_rqs_pktp;
15793	SD_INFO(SD_LOG_ATTACH_DETACH, un,
15794	    "sd_alloc_rqs: un 0x%p, rqs  xp 0x%p,  pkt 0x%p,  buf 0x%p\n",
15795	    un, xp, un->un_rqs_pktp, un->un_rqs_bp);
15796
15797	/*
15798	 * Save the pointer to the request sense private bp so it can
15799	 * be retrieved in sdintr.
15800	 */
15801	un->un_rqs_pktp->pkt_private = un->un_rqs_bp;
15802	ASSERT(un->un_rqs_bp->b_private == xp);
15803
15804	/*
15805	 * See if the HBA supports auto-request sense for the specified
15806	 * target/lun. If it does, then try to enable it (if not already
15807	 * enabled).
15808	 *
15809	 * Note: For some HBAs (ifp & sf), scsi_ifsetcap will always return
15810	 * failure, while for other HBAs (pln) scsi_ifsetcap will always
15811	 * return success.  However, in both of these cases ARQ is always
15812	 * enabled and scsi_ifgetcap will always return true. The best approach
15813	 * is to issue the scsi_ifgetcap() first, then try the scsi_ifsetcap().
15814	 *
15815	 * The 3rd case is the HBA (adp) always return enabled on
15816	 * scsi_ifgetgetcap even when it's not enable, the best approach
15817	 * is issue a scsi_ifsetcap then a scsi_ifgetcap
15818	 * Note: this case is to circumvent the Adaptec bug. (x86 only)
15819	 */
15820
15821	if (un->un_f_is_fibre == TRUE) {
15822		un->un_f_arq_enabled = TRUE;
15823	} else {
15824#if defined(__i386) || defined(__amd64)
15825		/*
15826		 * Circumvent the Adaptec bug, remove this code when
15827		 * the bug is fixed
15828		 */
15829		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1);
15830#endif
15831		switch (scsi_ifgetcap(SD_ADDRESS(un), "auto-rqsense", 1)) {
15832		case 0:
15833			SD_INFO(SD_LOG_ATTACH_DETACH, un,
15834			    "sd_alloc_rqs: HBA supports ARQ\n");
15835			/*
15836			 * ARQ is supported by this HBA but currently is not
15837			 * enabled. Attempt to enable it and if successful then
15838			 * mark this instance as ARQ enabled.
15839			 */
15840			if (scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1)
15841			    == 1) {
15842				/* Successfully enabled ARQ in the HBA */
15843				SD_INFO(SD_LOG_ATTACH_DETACH, un,
15844				    "sd_alloc_rqs: ARQ enabled\n");
15845				un->un_f_arq_enabled = TRUE;
15846			} else {
15847				/* Could not enable ARQ in the HBA */
15848				SD_INFO(SD_LOG_ATTACH_DETACH, un,
15849				    "sd_alloc_rqs: failed ARQ enable\n");
15850				un->un_f_arq_enabled = FALSE;
15851			}
15852			break;
15853		case 1:
15854			/*
15855			 * ARQ is supported by this HBA and is already enabled.
15856			 * Just mark ARQ as enabled for this instance.
15857			 */
15858			SD_INFO(SD_LOG_ATTACH_DETACH, un,
15859			    "sd_alloc_rqs: ARQ already enabled\n");
15860			un->un_f_arq_enabled = TRUE;
15861			break;
15862		default:
15863			/*
15864			 * ARQ is not supported by this HBA; disable it for this
15865			 * instance.
15866			 */
15867			SD_INFO(SD_LOG_ATTACH_DETACH, un,
15868			    "sd_alloc_rqs: HBA does not support ARQ\n");
15869			un->un_f_arq_enabled = FALSE;
15870			break;
15871		}
15872	}
15873
15874	return (DDI_SUCCESS);
15875}
15876
15877
15878/*
15879 *    Function: sd_free_rqs
15880 *
15881 * Description: Cleanup for the pre-instance RQS command.
15882 *
15883 *     Context: Kernel thread context
15884 */
15885
15886static void
15887sd_free_rqs(struct sd_lun *un)
15888{
15889	ASSERT(un != NULL);
15890
15891	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: entry\n");
15892
15893	/*
15894	 * If consistent memory is bound to a scsi_pkt, the pkt
15895	 * has to be destroyed *before* freeing the consistent memory.
15896	 * Don't change the sequence of this operations.
15897	 * scsi_destroy_pkt() might access memory, which isn't allowed,
15898	 * after it was freed in scsi_free_consistent_buf().
15899	 */
15900	if (un->un_rqs_pktp != NULL) {
15901		scsi_destroy_pkt(un->un_rqs_pktp);
15902		un->un_rqs_pktp = NULL;
15903	}
15904
15905	if (un->un_rqs_bp != NULL) {
15906		struct sd_xbuf *xp = SD_GET_XBUF(un->un_rqs_bp);
15907		if (xp != NULL) {
15908			kmem_free(xp, sizeof (struct sd_xbuf));
15909		}
15910		scsi_free_consistent_buf(un->un_rqs_bp);
15911		un->un_rqs_bp = NULL;
15912	}
15913	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: exit\n");
15914}
15915
15916
15917
15918/*
15919 *    Function: sd_reduce_throttle
15920 *
15921 * Description: Reduces the maximum # of outstanding commands on a
15922 *		target to the current number of outstanding commands.
15923 *		Queues a tiemout(9F) callback to restore the limit
15924 *		after a specified interval has elapsed.
15925 *		Typically used when we get a TRAN_BUSY return code
15926 *		back from scsi_transport().
15927 *
15928 *   Arguments: un - ptr to the sd_lun softstate struct
15929 *		throttle_type: SD_THROTTLE_TRAN_BUSY or SD_THROTTLE_QFULL
15930 *
15931 *     Context: May be called from interrupt context
15932 */
15933
15934static void
15935sd_reduce_throttle(struct sd_lun *un, int throttle_type)
15936{
15937	ASSERT(un != NULL);
15938	ASSERT(mutex_owned(SD_MUTEX(un)));
15939	ASSERT(un->un_ncmds_in_transport >= 0);
15940
15941	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
15942	    "entry: un:0x%p un_throttle:%d un_ncmds_in_transport:%d\n",
15943	    un, un->un_throttle, un->un_ncmds_in_transport);
15944
15945	if (un->un_throttle > 1) {
15946		if (un->un_f_use_adaptive_throttle == TRUE) {
15947			switch (throttle_type) {
15948			case SD_THROTTLE_TRAN_BUSY:
15949				if (un->un_busy_throttle == 0) {
15950					un->un_busy_throttle = un->un_throttle;
15951				}
15952				break;
15953			case SD_THROTTLE_QFULL:
15954				un->un_busy_throttle = 0;
15955				break;
15956			default:
15957				ASSERT(FALSE);
15958			}
15959
15960			if (un->un_ncmds_in_transport > 0) {
15961				un->un_throttle = un->un_ncmds_in_transport;
15962			}
15963
15964		} else {
15965			if (un->un_ncmds_in_transport == 0) {
15966				un->un_throttle = 1;
15967			} else {
15968				un->un_throttle = un->un_ncmds_in_transport;
15969			}
15970		}
15971	}
15972
15973	/* Reschedule the timeout if none is currently active */
15974	if (un->un_reset_throttle_timeid == NULL) {
15975		un->un_reset_throttle_timeid = timeout(sd_restore_throttle,
15976		    un, SD_THROTTLE_RESET_INTERVAL);
15977		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15978		    "sd_reduce_throttle: timeout scheduled!\n");
15979	}
15980
15981	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
15982	    "exit: un:0x%p un_throttle:%d\n", un, un->un_throttle);
15983}
15984
15985
15986
15987/*
15988 *    Function: sd_restore_throttle
15989 *
15990 * Description: Callback function for timeout(9F).  Resets the current
15991 *		value of un->un_throttle to its default.
15992 *
15993 *   Arguments: arg - pointer to associated softstate for the device.
15994 *
15995 *     Context: May be called from interrupt context
15996 */
15997
15998static void
15999sd_restore_throttle(void *arg)
16000{
16001	struct sd_lun	*un = arg;
16002
16003	ASSERT(un != NULL);
16004	ASSERT(!mutex_owned(SD_MUTEX(un)));
16005
16006	mutex_enter(SD_MUTEX(un));
16007
16008	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
16009	    "entry: un:0x%p un_throttle:%d\n", un, un->un_throttle);
16010
16011	un->un_reset_throttle_timeid = NULL;
16012
16013	if (un->un_f_use_adaptive_throttle == TRUE) {
16014		/*
16015		 * If un_busy_throttle is nonzero, then it contains the
16016		 * value that un_throttle was when we got a TRAN_BUSY back
16017		 * from scsi_transport(). We want to revert back to this
16018		 * value.
16019		 *
16020		 * In the QFULL case, the throttle limit will incrementally
16021		 * increase until it reaches max throttle.
16022		 */
16023		if (un->un_busy_throttle > 0) {
16024			un->un_throttle = un->un_busy_throttle;
16025			un->un_busy_throttle = 0;
16026		} else {
16027			/*
16028			 * increase throttle by 10% open gate slowly, schedule
16029			 * another restore if saved throttle has not been
16030			 * reached
16031			 */
16032			short throttle;
16033			if (sd_qfull_throttle_enable) {
16034				throttle = un->un_throttle +
16035				    max((un->un_throttle / 10), 1);
16036				un->un_throttle =
16037				    (throttle < un->un_saved_throttle) ?
16038				    throttle : un->un_saved_throttle;
16039				if (un->un_throttle < un->un_saved_throttle) {
16040					un->un_reset_throttle_timeid =
16041					    timeout(sd_restore_throttle,
16042					    un,
16043					    SD_QFULL_THROTTLE_RESET_INTERVAL);
16044				}
16045			}
16046		}
16047
16048		/*
16049		 * If un_throttle has fallen below the low-water mark, we
16050		 * restore the maximum value here (and allow it to ratchet
16051		 * down again if necessary).
16052		 */
16053		if (un->un_throttle < un->un_min_throttle) {
16054			un->un_throttle = un->un_saved_throttle;
16055		}
16056	} else {
16057		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
16058		    "restoring limit from 0x%x to 0x%x\n",
16059		    un->un_throttle, un->un_saved_throttle);
16060		un->un_throttle = un->un_saved_throttle;
16061	}
16062
16063	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
16064	    "sd_restore_throttle: calling sd_start_cmds!\n");
16065
16066	sd_start_cmds(un, NULL);
16067
16068	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
16069	    "sd_restore_throttle: exit: un:0x%p un_throttle:%d\n",
16070	    un, un->un_throttle);
16071
16072	mutex_exit(SD_MUTEX(un));
16073
16074	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: exit\n");
16075}
16076
16077/*
16078 *    Function: sdrunout
16079 *
16080 * Description: Callback routine for scsi_init_pkt when a resource allocation
16081 *		fails.
16082 *
16083 *   Arguments: arg - a pointer to the sd_lun unit struct for the particular
16084 *		soft state instance.
16085 *
16086 * Return Code: The scsi_init_pkt routine allows for the callback function to
16087 *		return a 0 indicating the callback should be rescheduled or a 1
16088 *		indicating not to reschedule. This routine always returns 1
16089 *		because the driver always provides a callback function to
16090 *		scsi_init_pkt. This results in a callback always being scheduled
16091 *		(via the scsi_init_pkt callback implementation) if a resource
16092 *		failure occurs.
16093 *
16094 *     Context: This callback function may not block or call routines that block
16095 *
16096 *        Note: Using the scsi_init_pkt callback facility can result in an I/O
16097 *		request persisting at the head of the list which cannot be
16098 *		satisfied even after multiple retries. In the future the driver
16099 *		may implement some time of maximum runout count before failing
16100 *		an I/O.
16101 */
16102
16103static int
16104sdrunout(caddr_t arg)
16105{
16106	struct sd_lun	*un = (struct sd_lun *)arg;
16107
16108	ASSERT(un != NULL);
16109	ASSERT(!mutex_owned(SD_MUTEX(un)));
16110
16111	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: entry\n");
16112
16113	mutex_enter(SD_MUTEX(un));
16114	sd_start_cmds(un, NULL);
16115	mutex_exit(SD_MUTEX(un));
16116	/*
16117	 * This callback routine always returns 1 (i.e. do not reschedule)
16118	 * because we always specify sdrunout as the callback handler for
16119	 * scsi_init_pkt inside the call to sd_start_cmds.
16120	 */
16121	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: exit\n");
16122	return (1);
16123}
16124
16125
16126/*
16127 *    Function: sdintr
16128 *
16129 * Description: Completion callback routine for scsi_pkt(9S) structs
16130 *		sent to the HBA driver via scsi_transport(9F).
16131 *
16132 *     Context: Interrupt context
16133 */
16134
16135static void
16136sdintr(struct scsi_pkt *pktp)
16137{
16138	struct buf	*bp;
16139	struct sd_xbuf	*xp;
16140	struct sd_lun	*un;
16141	size_t		actual_len;
16142	sd_ssc_t	*sscp;
16143
16144	ASSERT(pktp != NULL);
16145	bp = (struct buf *)pktp->pkt_private;
16146	ASSERT(bp != NULL);
16147	xp = SD_GET_XBUF(bp);
16148	ASSERT(xp != NULL);
16149	ASSERT(xp->xb_pktp != NULL);
16150	un = SD_GET_UN(bp);
16151	ASSERT(un != NULL);
16152	ASSERT(!mutex_owned(SD_MUTEX(un)));
16153
16154#ifdef SD_FAULT_INJECTION
16155
16156	SD_INFO(SD_LOG_IOERR, un, "sdintr: sdintr calling Fault injection\n");
16157	/* SD FaultInjection */
16158	sd_faultinjection(pktp);
16159
16160#endif /* SD_FAULT_INJECTION */
16161
16162	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: entry: buf:0x%p,"
16163	    " xp:0x%p, un:0x%p\n", bp, xp, un);
16164
16165	mutex_enter(SD_MUTEX(un));
16166
16167	ASSERT(un->un_fm_private != NULL);
16168	sscp = &((struct sd_fm_internal *)(un->un_fm_private))->fm_ssc;
16169	ASSERT(sscp != NULL);
16170
16171	/* Reduce the count of the #commands currently in transport */
16172	un->un_ncmds_in_transport--;
16173	ASSERT(un->un_ncmds_in_transport >= 0);
16174
16175	/* Increment counter to indicate that the callback routine is active */
16176	un->un_in_callback++;
16177
16178	SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
16179
16180#ifdef	SDDEBUG
16181	if (bp == un->un_retry_bp) {
16182		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sdintr: "
16183		    "un:0x%p: GOT retry_bp:0x%p un_ncmds_in_transport:%d\n",
16184		    un, un->un_retry_bp, un->un_ncmds_in_transport);
16185	}
16186#endif
16187
16188	/*
16189	 * If pkt_reason is CMD_DEV_GONE, fail the command, and update the media
16190	 * state if needed.
16191	 */
16192	if (pktp->pkt_reason == CMD_DEV_GONE) {
16193		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16194		    "Command failed to complete...Device is gone\n");
16195		if (un->un_mediastate != DKIO_DEV_GONE) {
16196			un->un_mediastate = DKIO_DEV_GONE;
16197			cv_broadcast(&un->un_state_cv);
16198		}
16199		sd_return_failed_command(un, bp, EIO);
16200		goto exit;
16201	}
16202
16203	if (pktp->pkt_state & STATE_XARQ_DONE) {
16204		SD_TRACE(SD_LOG_COMMON, un,
16205		    "sdintr: extra sense data received. pkt=%p\n", pktp);
16206	}
16207
16208	/*
16209	 * First see if the pkt has auto-request sense data with it....
16210	 * Look at the packet state first so we don't take a performance
16211	 * hit looking at the arq enabled flag unless absolutely necessary.
16212	 */
16213	if ((pktp->pkt_state & STATE_ARQ_DONE) &&
16214	    (un->un_f_arq_enabled == TRUE)) {
16215		/*
16216		 * The HBA did an auto request sense for this command so check
16217		 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
16218		 * driver command that should not be retried.
16219		 */
16220		if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
16221			/*
16222			 * Save the relevant sense info into the xp for the
16223			 * original cmd.
16224			 */
16225			struct scsi_arq_status *asp;
16226			asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
16227			xp->xb_sense_status =
16228			    *((uchar_t *)(&(asp->sts_rqpkt_status)));
16229			xp->xb_sense_state  = asp->sts_rqpkt_state;
16230			xp->xb_sense_resid  = asp->sts_rqpkt_resid;
16231			if (pktp->pkt_state & STATE_XARQ_DONE) {
16232				actual_len = MAX_SENSE_LENGTH -
16233				    xp->xb_sense_resid;
16234				bcopy(&asp->sts_sensedata, xp->xb_sense_data,
16235				    MAX_SENSE_LENGTH);
16236			} else {
16237				if (xp->xb_sense_resid > SENSE_LENGTH) {
16238					actual_len = MAX_SENSE_LENGTH -
16239					    xp->xb_sense_resid;
16240				} else {
16241					actual_len = SENSE_LENGTH -
16242					    xp->xb_sense_resid;
16243				}
16244				if (xp->xb_pkt_flags & SD_XB_USCSICMD) {
16245					if ((((struct uscsi_cmd *)
16246					    (xp->xb_pktinfo))->uscsi_rqlen) >
16247					    actual_len) {
16248						xp->xb_sense_resid =
16249						    (((struct uscsi_cmd *)
16250						    (xp->xb_pktinfo))->
16251						    uscsi_rqlen) - actual_len;
16252					} else {
16253						xp->xb_sense_resid = 0;
16254					}
16255				}
16256				bcopy(&asp->sts_sensedata, xp->xb_sense_data,
16257				    SENSE_LENGTH);
16258			}
16259
16260			/* fail the command */
16261			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16262			    "sdintr: arq done and FLAG_DIAGNOSE set\n");
16263			sd_return_failed_command(un, bp, EIO);
16264			goto exit;
16265		}
16266
16267#if (defined(__i386) || defined(__amd64))	/* DMAFREE for x86 only */
16268		/*
16269		 * We want to either retry or fail this command, so free
16270		 * the DMA resources here.  If we retry the command then
16271		 * the DMA resources will be reallocated in sd_start_cmds().
16272		 * Note that when PKT_DMA_PARTIAL is used, this reallocation
16273		 * causes the *entire* transfer to start over again from the
16274		 * beginning of the request, even for PARTIAL chunks that
16275		 * have already transferred successfully.
16276		 */
16277		if ((un->un_f_is_fibre == TRUE) &&
16278		    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
16279		    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
16280			scsi_dmafree(pktp);
16281			xp->xb_pkt_flags |= SD_XB_DMA_FREED;
16282		}
16283#endif
16284
16285		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16286		    "sdintr: arq done, sd_handle_auto_request_sense\n");
16287
16288		sd_handle_auto_request_sense(un, bp, xp, pktp);
16289		goto exit;
16290	}
16291
16292	/* Next see if this is the REQUEST SENSE pkt for the instance */
16293	if (pktp->pkt_flags & FLAG_SENSING)  {
16294		/* This pktp is from the unit's REQUEST_SENSE command */
16295		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16296		    "sdintr: sd_handle_request_sense\n");
16297		sd_handle_request_sense(un, bp, xp, pktp);
16298		goto exit;
16299	}
16300
16301	/*
16302	 * Check to see if the command successfully completed as requested;
16303	 * this is the most common case (and also the hot performance path).
16304	 *
16305	 * Requirements for successful completion are:
16306	 * pkt_reason is CMD_CMPLT and packet status is status good.
16307	 * In addition:
16308	 * - A residual of zero indicates successful completion no matter what
16309	 *   the command is.
16310	 * - If the residual is not zero and the command is not a read or
16311	 *   write, then it's still defined as successful completion. In other
16312	 *   words, if the command is a read or write the residual must be
16313	 *   zero for successful completion.
16314	 * - If the residual is not zero and the command is a read or
16315	 *   write, and it's a USCSICMD, then it's still defined as
16316	 *   successful completion.
16317	 */
16318	if ((pktp->pkt_reason == CMD_CMPLT) &&
16319	    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD)) {
16320
16321		/*
16322		 * Since this command is returned with a good status, we
16323		 * can reset the count for Sonoma failover.
16324		 */
16325		un->un_sonoma_failure_count = 0;
16326
16327		/*
16328		 * Return all USCSI commands on good status
16329		 */
16330		if (pktp->pkt_resid == 0) {
16331			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16332			    "sdintr: returning command for resid == 0\n");
16333		} else if (((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_READ) &&
16334		    ((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_WRITE)) {
16335			SD_UPDATE_B_RESID(bp, pktp);
16336			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16337			    "sdintr: returning command for resid != 0\n");
16338		} else if (xp->xb_pkt_flags & SD_XB_USCSICMD) {
16339			SD_UPDATE_B_RESID(bp, pktp);
16340			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16341			    "sdintr: returning uscsi command\n");
16342		} else {
16343			goto not_successful;
16344		}
16345		sd_return_command(un, bp);
16346
16347		/*
16348		 * Decrement counter to indicate that the callback routine
16349		 * is done.
16350		 */
16351		un->un_in_callback--;
16352		ASSERT(un->un_in_callback >= 0);
16353		mutex_exit(SD_MUTEX(un));
16354
16355		return;
16356	}
16357
16358not_successful:
16359
16360#if (defined(__i386) || defined(__amd64))	/* DMAFREE for x86 only */
16361	/*
16362	 * The following is based upon knowledge of the underlying transport
16363	 * and its use of DMA resources.  This code should be removed when
16364	 * PKT_DMA_PARTIAL support is taken out of the disk driver in favor
16365	 * of the new PKT_CMD_BREAKUP protocol. See also sd_initpkt_for_buf()
16366	 * and sd_start_cmds().
16367	 *
16368	 * Free any DMA resources associated with this command if there
16369	 * is a chance it could be retried or enqueued for later retry.
16370	 * If we keep the DMA binding then mpxio cannot reissue the
16371	 * command on another path whenever a path failure occurs.
16372	 *
16373	 * Note that when PKT_DMA_PARTIAL is used, free/reallocation
16374	 * causes the *entire* transfer to start over again from the
16375	 * beginning of the request, even for PARTIAL chunks that
16376	 * have already transferred successfully.
16377	 *
16378	 * This is only done for non-uscsi commands (and also skipped for the
16379	 * driver's internal RQS command). Also just do this for Fibre Channel
16380	 * devices as these are the only ones that support mpxio.
16381	 */
16382	if ((un->un_f_is_fibre == TRUE) &&
16383	    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
16384	    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
16385		scsi_dmafree(pktp);
16386		xp->xb_pkt_flags |= SD_XB_DMA_FREED;
16387	}
16388#endif
16389
16390	/*
16391	 * The command did not successfully complete as requested so check
16392	 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
16393	 * driver command that should not be retried so just return. If
16394	 * FLAG_DIAGNOSE is not set the error will be processed below.
16395	 */
16396	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
16397		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16398		    "sdintr: FLAG_DIAGNOSE: sd_return_failed_command\n");
16399		/*
16400		 * Issue a request sense if a check condition caused the error
16401		 * (we handle the auto request sense case above), otherwise
16402		 * just fail the command.
16403		 */
16404		if ((pktp->pkt_reason == CMD_CMPLT) &&
16405		    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK)) {
16406			sd_send_request_sense_command(un, bp, pktp);
16407		} else {
16408			sd_return_failed_command(un, bp, EIO);
16409		}
16410		goto exit;
16411	}
16412
16413	/*
16414	 * The command did not successfully complete as requested so process
16415	 * the error, retry, and/or attempt recovery.
16416	 */
16417	switch (pktp->pkt_reason) {
16418	case CMD_CMPLT:
16419		switch (SD_GET_PKT_STATUS(pktp)) {
16420		case STATUS_GOOD:
16421			/*
16422			 * The command completed successfully with a non-zero
16423			 * residual
16424			 */
16425			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16426			    "sdintr: STATUS_GOOD \n");
16427			sd_pkt_status_good(un, bp, xp, pktp);
16428			break;
16429
16430		case STATUS_CHECK:
16431		case STATUS_TERMINATED:
16432			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16433			    "sdintr: STATUS_TERMINATED | STATUS_CHECK\n");
16434			sd_pkt_status_check_condition(un, bp, xp, pktp);
16435			break;
16436
16437		case STATUS_BUSY:
16438			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16439			    "sdintr: STATUS_BUSY\n");
16440			sd_pkt_status_busy(un, bp, xp, pktp);
16441			break;
16442
16443		case STATUS_RESERVATION_CONFLICT:
16444			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16445			    "sdintr: STATUS_RESERVATION_CONFLICT\n");
16446			sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
16447			break;
16448
16449		case STATUS_QFULL:
16450			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16451			    "sdintr: STATUS_QFULL\n");
16452			sd_pkt_status_qfull(un, bp, xp, pktp);
16453			break;
16454
16455		case STATUS_MET:
16456		case STATUS_INTERMEDIATE:
16457		case STATUS_SCSI2:
16458		case STATUS_INTERMEDIATE_MET:
16459		case STATUS_ACA_ACTIVE:
16460			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16461			    "Unexpected SCSI status received: 0x%x\n",
16462			    SD_GET_PKT_STATUS(pktp));
16463			/*
16464			 * Mark the ssc_flags when detected invalid status
16465			 * code for non-USCSI command.
16466			 */
16467			if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
16468				sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_STATUS,
16469				    0, "stat-code");
16470			}
16471			sd_return_failed_command(un, bp, EIO);
16472			break;
16473
16474		default:
16475			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16476			    "Invalid SCSI status received: 0x%x\n",
16477			    SD_GET_PKT_STATUS(pktp));
16478			if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
16479				sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_STATUS,
16480				    0, "stat-code");
16481			}
16482			sd_return_failed_command(un, bp, EIO);
16483			break;
16484
16485		}
16486		break;
16487
16488	case CMD_INCOMPLETE:
16489		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16490		    "sdintr:  CMD_INCOMPLETE\n");
16491		sd_pkt_reason_cmd_incomplete(un, bp, xp, pktp);
16492		break;
16493	case CMD_TRAN_ERR:
16494		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16495		    "sdintr: CMD_TRAN_ERR\n");
16496		sd_pkt_reason_cmd_tran_err(un, bp, xp, pktp);
16497		break;
16498	case CMD_RESET:
16499		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16500		    "sdintr: CMD_RESET \n");
16501		sd_pkt_reason_cmd_reset(un, bp, xp, pktp);
16502		break;
16503	case CMD_ABORTED:
16504		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16505		    "sdintr: CMD_ABORTED \n");
16506		sd_pkt_reason_cmd_aborted(un, bp, xp, pktp);
16507		break;
16508	case CMD_TIMEOUT:
16509		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16510		    "sdintr: CMD_TIMEOUT\n");
16511		sd_pkt_reason_cmd_timeout(un, bp, xp, pktp);
16512		break;
16513	case CMD_UNX_BUS_FREE:
16514		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16515		    "sdintr: CMD_UNX_BUS_FREE \n");
16516		sd_pkt_reason_cmd_unx_bus_free(un, bp, xp, pktp);
16517		break;
16518	case CMD_TAG_REJECT:
16519		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16520		    "sdintr: CMD_TAG_REJECT\n");
16521		sd_pkt_reason_cmd_tag_reject(un, bp, xp, pktp);
16522		break;
16523	default:
16524		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16525		    "sdintr: default\n");
16526		/*
16527		 * Mark the ssc_flags for detecting invliad pkt_reason.
16528		 */
16529		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
16530			sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_PKT_REASON,
16531			    0, "pkt-reason");
16532		}
16533		sd_pkt_reason_default(un, bp, xp, pktp);
16534		break;
16535	}
16536
16537exit:
16538	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: exit\n");
16539
16540	/* Decrement counter to indicate that the callback routine is done. */
16541	un->un_in_callback--;
16542	ASSERT(un->un_in_callback >= 0);
16543
16544	/*
16545	 * At this point, the pkt has been dispatched, ie, it is either
16546	 * being re-tried or has been returned to its caller and should
16547	 * not be referenced.
16548	 */
16549
16550	mutex_exit(SD_MUTEX(un));
16551}
16552
16553
16554/*
16555 *    Function: sd_print_incomplete_msg
16556 *
16557 * Description: Prints the error message for a CMD_INCOMPLETE error.
16558 *
16559 *   Arguments: un - ptr to associated softstate for the device.
16560 *		bp - ptr to the buf(9S) for the command.
16561 *		arg - message string ptr
16562 *		code - SD_DELAYED_RETRY_ISSUED, SD_IMMEDIATE_RETRY_ISSUED,
16563 *			or SD_NO_RETRY_ISSUED.
16564 *
16565 *     Context: May be called under interrupt context
16566 */
16567
16568static void
16569sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
16570{
16571	struct scsi_pkt	*pktp;
16572	char	*msgp;
16573	char	*cmdp = arg;
16574
16575	ASSERT(un != NULL);
16576	ASSERT(mutex_owned(SD_MUTEX(un)));
16577	ASSERT(bp != NULL);
16578	ASSERT(arg != NULL);
16579	pktp = SD_GET_PKTP(bp);
16580	ASSERT(pktp != NULL);
16581
16582	switch (code) {
16583	case SD_DELAYED_RETRY_ISSUED:
16584	case SD_IMMEDIATE_RETRY_ISSUED:
16585		msgp = "retrying";
16586		break;
16587	case SD_NO_RETRY_ISSUED:
16588	default:
16589		msgp = "giving up";
16590		break;
16591	}
16592
16593	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
16594		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16595		    "incomplete %s- %s\n", cmdp, msgp);
16596	}
16597}
16598
16599
16600
16601/*
16602 *    Function: sd_pkt_status_good
16603 *
16604 * Description: Processing for a STATUS_GOOD code in pkt_status.
16605 *
16606 *     Context: May be called under interrupt context
16607 */
16608
16609static void
16610sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
16611	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16612{
16613	char	*cmdp;
16614
16615	ASSERT(un != NULL);
16616	ASSERT(mutex_owned(SD_MUTEX(un)));
16617	ASSERT(bp != NULL);
16618	ASSERT(xp != NULL);
16619	ASSERT(pktp != NULL);
16620	ASSERT(pktp->pkt_reason == CMD_CMPLT);
16621	ASSERT(SD_GET_PKT_STATUS(pktp) == STATUS_GOOD);
16622	ASSERT(pktp->pkt_resid != 0);
16623
16624	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: entry\n");
16625
16626	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16627	switch (SD_GET_PKT_OPCODE(pktp) & 0x1F) {
16628	case SCMD_READ:
16629		cmdp = "read";
16630		break;
16631	case SCMD_WRITE:
16632		cmdp = "write";
16633		break;
16634	default:
16635		SD_UPDATE_B_RESID(bp, pktp);
16636		sd_return_command(un, bp);
16637		SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
16638		return;
16639	}
16640
16641	/*
16642	 * See if we can retry the read/write, preferrably immediately.
16643	 * If retries are exhaused, then sd_retry_command() will update
16644	 * the b_resid count.
16645	 */
16646	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_incomplete_msg,
16647	    cmdp, EIO, (clock_t)0, NULL);
16648
16649	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
16650}
16651
16652
16653
16654
16655
16656/*
16657 *    Function: sd_handle_request_sense
16658 *
16659 * Description: Processing for non-auto Request Sense command.
16660 *
16661 *   Arguments: un - ptr to associated softstate
16662 *		sense_bp - ptr to buf(9S) for the RQS command
16663 *		sense_xp - ptr to the sd_xbuf for the RQS command
16664 *		sense_pktp - ptr to the scsi_pkt(9S) for the RQS command
16665 *
16666 *     Context: May be called under interrupt context
16667 */
16668
16669static void
16670sd_handle_request_sense(struct sd_lun *un, struct buf *sense_bp,
16671	struct sd_xbuf *sense_xp, struct scsi_pkt *sense_pktp)
16672{
16673	struct buf	*cmd_bp;	/* buf for the original command */
16674	struct sd_xbuf	*cmd_xp;	/* sd_xbuf for the original command */
16675	struct scsi_pkt *cmd_pktp;	/* pkt for the original command */
16676	size_t		actual_len;	/* actual sense data length */
16677
16678	ASSERT(un != NULL);
16679	ASSERT(mutex_owned(SD_MUTEX(un)));
16680	ASSERT(sense_bp != NULL);
16681	ASSERT(sense_xp != NULL);
16682	ASSERT(sense_pktp != NULL);
16683
16684	/*
16685	 * Note the sense_bp, sense_xp, and sense_pktp here are for the
16686	 * RQS command and not the original command.
16687	 */
16688	ASSERT(sense_pktp == un->un_rqs_pktp);
16689	ASSERT(sense_bp   == un->un_rqs_bp);
16690	ASSERT((sense_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD)) ==
16691	    (FLAG_SENSING | FLAG_HEAD));
16692	ASSERT((((SD_GET_XBUF(sense_xp->xb_sense_bp))->xb_pktp->pkt_flags) &
16693	    FLAG_SENSING) == FLAG_SENSING);
16694
16695	/* These are the bp, xp, and pktp for the original command */
16696	cmd_bp = sense_xp->xb_sense_bp;
16697	cmd_xp = SD_GET_XBUF(cmd_bp);
16698	cmd_pktp = SD_GET_PKTP(cmd_bp);
16699
16700	if (sense_pktp->pkt_reason != CMD_CMPLT) {
16701		/*
16702		 * The REQUEST SENSE command failed.  Release the REQUEST
16703		 * SENSE command for re-use, get back the bp for the original
16704		 * command, and attempt to re-try the original command if
16705		 * FLAG_DIAGNOSE is not set in the original packet.
16706		 */
16707		SD_UPDATE_ERRSTATS(un, sd_harderrs);
16708		if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
16709			cmd_bp = sd_mark_rqs_idle(un, sense_xp);
16710			sd_retry_command(un, cmd_bp, SD_RETRIES_STANDARD,
16711			    NULL, NULL, EIO, (clock_t)0, NULL);
16712			return;
16713		}
16714	}
16715
16716	/*
16717	 * Save the relevant sense info into the xp for the original cmd.
16718	 *
16719	 * Note: if the request sense failed the state info will be zero
16720	 * as set in sd_mark_rqs_busy()
16721	 */
16722	cmd_xp->xb_sense_status = *(sense_pktp->pkt_scbp);
16723	cmd_xp->xb_sense_state  = sense_pktp->pkt_state;
16724	actual_len = MAX_SENSE_LENGTH - sense_pktp->pkt_resid;
16725	if ((cmd_xp->xb_pkt_flags & SD_XB_USCSICMD) &&
16726	    (((struct uscsi_cmd *)cmd_xp->xb_pktinfo)->uscsi_rqlen >
16727	    SENSE_LENGTH)) {
16728		bcopy(sense_bp->b_un.b_addr, cmd_xp->xb_sense_data,
16729		    MAX_SENSE_LENGTH);
16730		cmd_xp->xb_sense_resid = sense_pktp->pkt_resid;
16731	} else {
16732		bcopy(sense_bp->b_un.b_addr, cmd_xp->xb_sense_data,
16733		    SENSE_LENGTH);
16734		if (actual_len < SENSE_LENGTH) {
16735			cmd_xp->xb_sense_resid = SENSE_LENGTH - actual_len;
16736		} else {
16737			cmd_xp->xb_sense_resid = 0;
16738		}
16739	}
16740
16741	/*
16742	 *  Free up the RQS command....
16743	 *  NOTE:
16744	 *	Must do this BEFORE calling sd_validate_sense_data!
16745	 *	sd_validate_sense_data may return the original command in
16746	 *	which case the pkt will be freed and the flags can no
16747	 *	longer be touched.
16748	 *	SD_MUTEX is held through this process until the command
16749	 *	is dispatched based upon the sense data, so there are
16750	 *	no race conditions.
16751	 */
16752	(void) sd_mark_rqs_idle(un, sense_xp);
16753
16754	/*
16755	 * For a retryable command see if we have valid sense data, if so then
16756	 * turn it over to sd_decode_sense() to figure out the right course of
16757	 * action. Just fail a non-retryable command.
16758	 */
16759	if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
16760		if (sd_validate_sense_data(un, cmd_bp, cmd_xp, actual_len) ==
16761		    SD_SENSE_DATA_IS_VALID) {
16762			sd_decode_sense(un, cmd_bp, cmd_xp, cmd_pktp);
16763		}
16764	} else {
16765		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Failed CDB",
16766		    (uchar_t *)cmd_pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
16767		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Sense Data",
16768		    (uchar_t *)cmd_xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
16769		sd_return_failed_command(un, cmd_bp, EIO);
16770	}
16771}
16772
16773
16774
16775
16776/*
16777 *    Function: sd_handle_auto_request_sense
16778 *
16779 * Description: Processing for auto-request sense information.
16780 *
16781 *   Arguments: un - ptr to associated softstate
16782 *		bp - ptr to buf(9S) for the command
16783 *		xp - ptr to the sd_xbuf for the command
16784 *		pktp - ptr to the scsi_pkt(9S) for the command
16785 *
16786 *     Context: May be called under interrupt context
16787 */
16788
16789static void
16790sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
16791	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16792{
16793	struct scsi_arq_status *asp;
16794	size_t actual_len;
16795
16796	ASSERT(un != NULL);
16797	ASSERT(mutex_owned(SD_MUTEX(un)));
16798	ASSERT(bp != NULL);
16799	ASSERT(xp != NULL);
16800	ASSERT(pktp != NULL);
16801	ASSERT(pktp != un->un_rqs_pktp);
16802	ASSERT(bp   != un->un_rqs_bp);
16803
16804	/*
16805	 * For auto-request sense, we get a scsi_arq_status back from
16806	 * the HBA, with the sense data in the sts_sensedata member.
16807	 * The pkt_scbp of the packet points to this scsi_arq_status.
16808	 */
16809	asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
16810
16811	if (asp->sts_rqpkt_reason != CMD_CMPLT) {
16812		/*
16813		 * The auto REQUEST SENSE failed; see if we can re-try
16814		 * the original command.
16815		 */
16816		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16817		    "auto request sense failed (reason=%s)\n",
16818		    scsi_rname(asp->sts_rqpkt_reason));
16819
16820		sd_reset_target(un, pktp);
16821
16822		sd_retry_command(un, bp, SD_RETRIES_STANDARD,
16823		    NULL, NULL, EIO, (clock_t)0, NULL);
16824		return;
16825	}
16826
16827	/* Save the relevant sense info into the xp for the original cmd. */
16828	xp->xb_sense_status = *((uchar_t *)(&(asp->sts_rqpkt_status)));
16829	xp->xb_sense_state  = asp->sts_rqpkt_state;
16830	xp->xb_sense_resid  = asp->sts_rqpkt_resid;
16831	if (xp->xb_sense_state & STATE_XARQ_DONE) {
16832		actual_len = MAX_SENSE_LENGTH - xp->xb_sense_resid;
16833		bcopy(&asp->sts_sensedata, xp->xb_sense_data,
16834		    MAX_SENSE_LENGTH);
16835	} else {
16836		if (xp->xb_sense_resid > SENSE_LENGTH) {
16837			actual_len = MAX_SENSE_LENGTH - xp->xb_sense_resid;
16838		} else {
16839			actual_len = SENSE_LENGTH - xp->xb_sense_resid;
16840		}
16841		if (xp->xb_pkt_flags & SD_XB_USCSICMD) {
16842			if ((((struct uscsi_cmd *)
16843			    (xp->xb_pktinfo))->uscsi_rqlen) > actual_len) {
16844				xp->xb_sense_resid = (((struct uscsi_cmd *)
16845				    (xp->xb_pktinfo))->uscsi_rqlen) -
16846				    actual_len;
16847			} else {
16848				xp->xb_sense_resid = 0;
16849			}
16850		}
16851		bcopy(&asp->sts_sensedata, xp->xb_sense_data, SENSE_LENGTH);
16852	}
16853
16854	/*
16855	 * See if we have valid sense data, if so then turn it over to
16856	 * sd_decode_sense() to figure out the right course of action.
16857	 */
16858	if (sd_validate_sense_data(un, bp, xp, actual_len) ==
16859	    SD_SENSE_DATA_IS_VALID) {
16860		sd_decode_sense(un, bp, xp, pktp);
16861	}
16862}
16863
16864
16865/*
16866 *    Function: sd_print_sense_failed_msg
16867 *
16868 * Description: Print log message when RQS has failed.
16869 *
16870 *   Arguments: un - ptr to associated softstate
16871 *		bp - ptr to buf(9S) for the command
16872 *		arg - generic message string ptr
16873 *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
16874 *			or SD_NO_RETRY_ISSUED
16875 *
16876 *     Context: May be called from interrupt context
16877 */
16878
16879static void
16880sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp, void *arg,
16881	int code)
16882{
16883	char	*msgp = arg;
16884
16885	ASSERT(un != NULL);
16886	ASSERT(mutex_owned(SD_MUTEX(un)));
16887	ASSERT(bp != NULL);
16888
16889	if ((code == SD_NO_RETRY_ISSUED) && (msgp != NULL)) {
16890		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, msgp);
16891	}
16892}
16893
16894
16895/*
16896 *    Function: sd_validate_sense_data
16897 *
16898 * Description: Check the given sense data for validity.
16899 *		If the sense data is not valid, the command will
16900 *		be either failed or retried!
16901 *
16902 * Return Code: SD_SENSE_DATA_IS_INVALID
16903 *		SD_SENSE_DATA_IS_VALID
16904 *
16905 *     Context: May be called from interrupt context
16906 */
16907
16908static int
16909sd_validate_sense_data(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
16910	size_t actual_len)
16911{
16912	struct scsi_extended_sense *esp;
16913	struct	scsi_pkt *pktp;
16914	char	*msgp = NULL;
16915	sd_ssc_t *sscp;
16916
16917	ASSERT(un != NULL);
16918	ASSERT(mutex_owned(SD_MUTEX(un)));
16919	ASSERT(bp != NULL);
16920	ASSERT(bp != un->un_rqs_bp);
16921	ASSERT(xp != NULL);
16922	ASSERT(un->un_fm_private != NULL);
16923
16924	pktp = SD_GET_PKTP(bp);
16925	ASSERT(pktp != NULL);
16926
16927	sscp = &((struct sd_fm_internal *)(un->un_fm_private))->fm_ssc;
16928	ASSERT(sscp != NULL);
16929
16930	/*
16931	 * Check the status of the RQS command (auto or manual).
16932	 */
16933	switch (xp->xb_sense_status & STATUS_MASK) {
16934	case STATUS_GOOD:
16935		break;
16936
16937	case STATUS_RESERVATION_CONFLICT:
16938		sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
16939		return (SD_SENSE_DATA_IS_INVALID);
16940
16941	case STATUS_BUSY:
16942		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16943		    "Busy Status on REQUEST SENSE\n");
16944		sd_retry_command(un, bp, SD_RETRIES_BUSY, NULL,
16945		    NULL, EIO, un->un_busy_timeout / 500, kstat_waitq_enter);
16946		return (SD_SENSE_DATA_IS_INVALID);
16947
16948	case STATUS_QFULL:
16949		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16950		    "QFULL Status on REQUEST SENSE\n");
16951		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL,
16952		    NULL, EIO, un->un_busy_timeout / 500, kstat_waitq_enter);
16953		return (SD_SENSE_DATA_IS_INVALID);
16954
16955	case STATUS_CHECK:
16956	case STATUS_TERMINATED:
16957		msgp = "Check Condition on REQUEST SENSE\n";
16958		goto sense_failed;
16959
16960	default:
16961		msgp = "Not STATUS_GOOD on REQUEST_SENSE\n";
16962		goto sense_failed;
16963	}
16964
16965	/*
16966	 * See if we got the minimum required amount of sense data.
16967	 * Note: We are assuming the returned sense data is SENSE_LENGTH bytes
16968	 * or less.
16969	 */
16970	if (((xp->xb_sense_state & STATE_XFERRED_DATA) == 0) ||
16971	    (actual_len == 0)) {
16972		msgp = "Request Sense couldn't get sense data\n";
16973		goto sense_failed;
16974	}
16975
16976	if (actual_len < SUN_MIN_SENSE_LENGTH) {
16977		msgp = "Not enough sense information\n";
16978		/* Mark the ssc_flags for detecting invalid sense data */
16979		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
16980			sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_SENSE, 0,
16981			    "sense-data");
16982		}
16983		goto sense_failed;
16984	}
16985
16986	/*
16987	 * We require the extended sense data
16988	 */
16989	esp = (struct scsi_extended_sense *)xp->xb_sense_data;
16990	if (esp->es_class != CLASS_EXTENDED_SENSE) {
16991		if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
16992			static char tmp[8];
16993			static char buf[148];
16994			char *p = (char *)(xp->xb_sense_data);
16995			int i;
16996
16997			mutex_enter(&sd_sense_mutex);
16998			(void) strcpy(buf, "undecodable sense information:");
16999			for (i = 0; i < actual_len; i++) {
17000				(void) sprintf(tmp, " 0x%x", *(p++)&0xff);
17001				(void) strcpy(&buf[strlen(buf)], tmp);
17002			}
17003			i = strlen(buf);
17004			(void) strcpy(&buf[i], "-(assumed fatal)\n");
17005
17006			if (SD_FM_LOG(un) == SD_FM_LOG_NSUP) {
17007				scsi_log(SD_DEVINFO(un), sd_label,
17008				    CE_WARN, buf);
17009			}
17010			mutex_exit(&sd_sense_mutex);
17011		}
17012
17013		/* Mark the ssc_flags for detecting invalid sense data */
17014		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
17015			sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_SENSE, 0,
17016			    "sense-data");
17017		}
17018
17019		/* Note: Legacy behavior, fail the command with no retry */
17020		sd_return_failed_command(un, bp, EIO);
17021		return (SD_SENSE_DATA_IS_INVALID);
17022	}
17023
17024	/*
17025	 * Check that es_code is valid (es_class concatenated with es_code
17026	 * make up the "response code" field.  es_class will always be 7, so
17027	 * make sure es_code is 0, 1, 2, 3 or 0xf.  es_code will indicate the
17028	 * format.
17029	 */
17030	if ((esp->es_code != CODE_FMT_FIXED_CURRENT) &&
17031	    (esp->es_code != CODE_FMT_FIXED_DEFERRED) &&
17032	    (esp->es_code != CODE_FMT_DESCR_CURRENT) &&
17033	    (esp->es_code != CODE_FMT_DESCR_DEFERRED) &&
17034	    (esp->es_code != CODE_FMT_VENDOR_SPECIFIC)) {
17035		/* Mark the ssc_flags for detecting invalid sense data */
17036		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
17037			sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_SENSE, 0,
17038			    "sense-data");
17039		}
17040		goto sense_failed;
17041	}
17042
17043	return (SD_SENSE_DATA_IS_VALID);
17044
17045sense_failed:
17046	/*
17047	 * If the request sense failed (for whatever reason), attempt
17048	 * to retry the original command.
17049	 */
17050#if defined(__i386) || defined(__amd64)
17051	/*
17052	 * SD_RETRY_DELAY is conditionally compile (#if fibre) in
17053	 * sddef.h for Sparc platform, and x86 uses 1 binary
17054	 * for both SCSI/FC.
17055	 * The SD_RETRY_DELAY value need to be adjusted here
17056	 * when SD_RETRY_DELAY change in sddef.h
17057	 */
17058	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
17059	    sd_print_sense_failed_msg, msgp, EIO,
17060	    un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0, NULL);
17061#else
17062	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
17063	    sd_print_sense_failed_msg, msgp, EIO, SD_RETRY_DELAY, NULL);
17064#endif
17065
17066	return (SD_SENSE_DATA_IS_INVALID);
17067}
17068
17069/*
17070 *    Function: sd_decode_sense
17071 *
17072 * Description: Take recovery action(s) when SCSI Sense Data is received.
17073 *
17074 *     Context: Interrupt context.
17075 */
17076
17077static void
17078sd_decode_sense(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
17079	struct scsi_pkt *pktp)
17080{
17081	uint8_t sense_key;
17082
17083	ASSERT(un != NULL);
17084	ASSERT(mutex_owned(SD_MUTEX(un)));
17085	ASSERT(bp != NULL);
17086	ASSERT(bp != un->un_rqs_bp);
17087	ASSERT(xp != NULL);
17088	ASSERT(pktp != NULL);
17089
17090	sense_key = scsi_sense_key(xp->xb_sense_data);
17091
17092	switch (sense_key) {
17093	case KEY_NO_SENSE:
17094		sd_sense_key_no_sense(un, bp, xp, pktp);
17095		break;
17096	case KEY_RECOVERABLE_ERROR:
17097		sd_sense_key_recoverable_error(un, xp->xb_sense_data,
17098		    bp, xp, pktp);
17099		break;
17100	case KEY_NOT_READY:
17101		sd_sense_key_not_ready(un, xp->xb_sense_data,
17102		    bp, xp, pktp);
17103		break;
17104	case KEY_MEDIUM_ERROR:
17105	case KEY_HARDWARE_ERROR:
17106		sd_sense_key_medium_or_hardware_error(un,
17107		    xp->xb_sense_data, bp, xp, pktp);
17108		break;
17109	case KEY_ILLEGAL_REQUEST:
17110		sd_sense_key_illegal_request(un, bp, xp, pktp);
17111		break;
17112	case KEY_UNIT_ATTENTION:
17113		sd_sense_key_unit_attention(un, xp->xb_sense_data,
17114		    bp, xp, pktp);
17115		break;
17116	case KEY_WRITE_PROTECT:
17117	case KEY_VOLUME_OVERFLOW:
17118	case KEY_MISCOMPARE:
17119		sd_sense_key_fail_command(un, bp, xp, pktp);
17120		break;
17121	case KEY_BLANK_CHECK:
17122		sd_sense_key_blank_check(un, bp, xp, pktp);
17123		break;
17124	case KEY_ABORTED_COMMAND:
17125		sd_sense_key_aborted_command(un, bp, xp, pktp);
17126		break;
17127	case KEY_VENDOR_UNIQUE:
17128	case KEY_COPY_ABORTED:
17129	case KEY_EQUAL:
17130	case KEY_RESERVED:
17131	default:
17132		sd_sense_key_default(un, xp->xb_sense_data,
17133		    bp, xp, pktp);
17134		break;
17135	}
17136}
17137
17138
17139/*
17140 *    Function: sd_dump_memory
17141 *
17142 * Description: Debug logging routine to print the contents of a user provided
17143 *		buffer. The output of the buffer is broken up into 256 byte
17144 *		segments due to a size constraint of the scsi_log.
17145 *		implementation.
17146 *
17147 *   Arguments: un - ptr to softstate
17148 *		comp - component mask
17149 *		title - "title" string to preceed data when printed
17150 *		data - ptr to data block to be printed
17151 *		len - size of data block to be printed
17152 *		fmt - SD_LOG_HEX (use 0x%02x format) or SD_LOG_CHAR (use %c)
17153 *
17154 *     Context: May be called from interrupt context
17155 */
17156
17157#define	SD_DUMP_MEMORY_BUF_SIZE	256
17158
17159static char *sd_dump_format_string[] = {
17160		" 0x%02x",
17161		" %c"
17162};
17163
17164static void
17165sd_dump_memory(struct sd_lun *un, uint_t comp, char *title, uchar_t *data,
17166    int len, int fmt)
17167{
17168	int	i, j;
17169	int	avail_count;
17170	int	start_offset;
17171	int	end_offset;
17172	size_t	entry_len;
17173	char	*bufp;
17174	char	*local_buf;
17175	char	*format_string;
17176
17177	ASSERT((fmt == SD_LOG_HEX) || (fmt == SD_LOG_CHAR));
17178
17179	/*
17180	 * In the debug version of the driver, this function is called from a
17181	 * number of places which are NOPs in the release driver.
17182	 * The debug driver therefore has additional methods of filtering
17183	 * debug output.
17184	 */
17185#ifdef SDDEBUG
17186	/*
17187	 * In the debug version of the driver we can reduce the amount of debug
17188	 * messages by setting sd_error_level to something other than
17189	 * SCSI_ERR_ALL and clearing bits in sd_level_mask and
17190	 * sd_component_mask.
17191	 */
17192	if (((sd_level_mask & (SD_LOGMASK_DUMP_MEM | SD_LOGMASK_DIAG)) == 0) ||
17193	    (sd_error_level != SCSI_ERR_ALL)) {
17194		return;
17195	}
17196	if (((sd_component_mask & comp) == 0) ||
17197	    (sd_error_level != SCSI_ERR_ALL)) {
17198		return;
17199	}
17200#else
17201	if (sd_error_level != SCSI_ERR_ALL) {
17202		return;
17203	}
17204#endif
17205
17206	local_buf = kmem_zalloc(SD_DUMP_MEMORY_BUF_SIZE, KM_SLEEP);
17207	bufp = local_buf;
17208	/*
17209	 * Available length is the length of local_buf[], minus the
17210	 * length of the title string, minus one for the ":", minus
17211	 * one for the newline, minus one for the NULL terminator.
17212	 * This gives the #bytes available for holding the printed
17213	 * values from the given data buffer.
17214	 */
17215	if (fmt == SD_LOG_HEX) {
17216		format_string = sd_dump_format_string[0];
17217	} else /* SD_LOG_CHAR */ {
17218		format_string = sd_dump_format_string[1];
17219	}
17220	/*
17221	 * Available count is the number of elements from the given
17222	 * data buffer that we can fit into the available length.
17223	 * This is based upon the size of the format string used.
17224	 * Make one entry and find it's size.
17225	 */
17226	(void) sprintf(bufp, format_string, data[0]);
17227	entry_len = strlen(bufp);
17228	avail_count = (SD_DUMP_MEMORY_BUF_SIZE - strlen(title) - 3) / entry_len;
17229
17230	j = 0;
17231	while (j < len) {
17232		bufp = local_buf;
17233		bzero(bufp, SD_DUMP_MEMORY_BUF_SIZE);
17234		start_offset = j;
17235
17236		end_offset = start_offset + avail_count;
17237
17238		(void) sprintf(bufp, "%s:", title);
17239		bufp += strlen(bufp);
17240		for (i = start_offset; ((i < end_offset) && (j < len));
17241		    i++, j++) {
17242			(void) sprintf(bufp, format_string, data[i]);
17243			bufp += entry_len;
17244		}
17245		(void) sprintf(bufp, "\n");
17246
17247		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE, "%s", local_buf);
17248	}
17249	kmem_free(local_buf, SD_DUMP_MEMORY_BUF_SIZE);
17250}
17251
17252/*
17253 *    Function: sd_print_sense_msg
17254 *
17255 * Description: Log a message based upon the given sense data.
17256 *
17257 *   Arguments: un - ptr to associated softstate
17258 *		bp - ptr to buf(9S) for the command
17259 *		arg - ptr to associate sd_sense_info struct
17260 *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
17261 *			or SD_NO_RETRY_ISSUED
17262 *
17263 *     Context: May be called from interrupt context
17264 */
17265
17266static void
17267sd_print_sense_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
17268{
17269	struct sd_xbuf	*xp;
17270	struct scsi_pkt	*pktp;
17271	uint8_t *sensep;
17272	daddr_t request_blkno;
17273	diskaddr_t err_blkno;
17274	int severity;
17275	int pfa_flag;
17276	extern struct scsi_key_strings scsi_cmds[];
17277
17278	ASSERT(un != NULL);
17279	ASSERT(mutex_owned(SD_MUTEX(un)));
17280	ASSERT(bp != NULL);
17281	xp = SD_GET_XBUF(bp);
17282	ASSERT(xp != NULL);
17283	pktp = SD_GET_PKTP(bp);
17284	ASSERT(pktp != NULL);
17285	ASSERT(arg != NULL);
17286
17287	severity = ((struct sd_sense_info *)(arg))->ssi_severity;
17288	pfa_flag = ((struct sd_sense_info *)(arg))->ssi_pfa_flag;
17289
17290	if ((code == SD_DELAYED_RETRY_ISSUED) ||
17291	    (code == SD_IMMEDIATE_RETRY_ISSUED)) {
17292		severity = SCSI_ERR_RETRYABLE;
17293	}
17294
17295	/* Use absolute block number for the request block number */
17296	request_blkno = xp->xb_blkno;
17297
17298	/*
17299	 * Now try to get the error block number from the sense data
17300	 */
17301	sensep = xp->xb_sense_data;
17302
17303	if (scsi_sense_info_uint64(sensep, SENSE_LENGTH,
17304	    (uint64_t *)&err_blkno)) {
17305		/*
17306		 * We retrieved the error block number from the information
17307		 * portion of the sense data.
17308		 *
17309		 * For USCSI commands we are better off using the error
17310		 * block no. as the requested block no. (This is the best
17311		 * we can estimate.)
17312		 */
17313		if ((SD_IS_BUFIO(xp) == FALSE) &&
17314		    ((pktp->pkt_flags & FLAG_SILENT) == 0)) {
17315			request_blkno = err_blkno;
17316		}
17317	} else {
17318		/*
17319		 * Without the es_valid bit set (for fixed format) or an
17320		 * information descriptor (for descriptor format) we cannot
17321		 * be certain of the error blkno, so just use the
17322		 * request_blkno.
17323		 */
17324		err_blkno = (diskaddr_t)request_blkno;
17325	}
17326
17327	/*
17328	 * The following will log the buffer contents for the release driver
17329	 * if the SD_LOGMASK_DIAG bit of sd_level_mask is set, or the error
17330	 * level is set to verbose.
17331	 */
17332	sd_dump_memory(un, SD_LOG_IO, "Failed CDB",
17333	    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
17334	sd_dump_memory(un, SD_LOG_IO, "Sense Data",
17335	    (uchar_t *)sensep, SENSE_LENGTH, SD_LOG_HEX);
17336
17337	if (pfa_flag == FALSE) {
17338		/* This is normally only set for USCSI */
17339		if ((pktp->pkt_flags & FLAG_SILENT) != 0) {
17340			return;
17341		}
17342
17343		if ((SD_IS_BUFIO(xp) == TRUE) &&
17344		    (((sd_level_mask & SD_LOGMASK_DIAG) == 0) &&
17345		    (severity < sd_error_level))) {
17346			return;
17347		}
17348	}
17349	/*
17350	 * Check for Sonoma Failover and keep a count of how many failed I/O's
17351	 */
17352	if ((SD_IS_LSI(un)) &&
17353	    (scsi_sense_key(sensep) == KEY_ILLEGAL_REQUEST) &&
17354	    (scsi_sense_asc(sensep) == 0x94) &&
17355	    (scsi_sense_ascq(sensep) == 0x01)) {
17356		un->un_sonoma_failure_count++;
17357		if (un->un_sonoma_failure_count > 1) {
17358			return;
17359		}
17360	}
17361
17362	if (SD_FM_LOG(un) == SD_FM_LOG_NSUP ||
17363	    ((scsi_sense_key(sensep) == KEY_RECOVERABLE_ERROR) &&
17364	    (pktp->pkt_resid == 0))) {
17365		scsi_vu_errmsg(SD_SCSI_DEVP(un), pktp, sd_label, severity,
17366		    request_blkno, err_blkno, scsi_cmds,
17367		    (struct scsi_extended_sense *)sensep,
17368		    un->un_additional_codes, NULL);
17369	}
17370}
17371
17372/*
17373 *    Function: sd_sense_key_no_sense
17374 *
17375 * Description: Recovery action when sense data was not received.
17376 *
17377 *     Context: May be called from interrupt context
17378 */
17379
17380static void
17381sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
17382	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17383{
17384	struct sd_sense_info	si;
17385
17386	ASSERT(un != NULL);
17387	ASSERT(mutex_owned(SD_MUTEX(un)));
17388	ASSERT(bp != NULL);
17389	ASSERT(xp != NULL);
17390	ASSERT(pktp != NULL);
17391
17392	si.ssi_severity = SCSI_ERR_FATAL;
17393	si.ssi_pfa_flag = FALSE;
17394
17395	SD_UPDATE_ERRSTATS(un, sd_softerrs);
17396
17397	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17398	    &si, EIO, (clock_t)0, NULL);
17399}
17400
17401
17402/*
17403 *    Function: sd_sense_key_recoverable_error
17404 *
17405 * Description: Recovery actions for a SCSI "Recovered Error" sense key.
17406 *
17407 *     Context: May be called from interrupt context
17408 */
17409
17410static void
17411sd_sense_key_recoverable_error(struct sd_lun *un,
17412	uint8_t *sense_datap,
17413	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17414{
17415	struct sd_sense_info	si;
17416	uint8_t asc = scsi_sense_asc(sense_datap);
17417
17418	ASSERT(un != NULL);
17419	ASSERT(mutex_owned(SD_MUTEX(un)));
17420	ASSERT(bp != NULL);
17421	ASSERT(xp != NULL);
17422	ASSERT(pktp != NULL);
17423
17424	/*
17425	 * 0x5D: FAILURE PREDICTION THRESHOLD EXCEEDED
17426	 */
17427	if ((asc == 0x5D) && (sd_report_pfa != 0)) {
17428		SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
17429		si.ssi_severity = SCSI_ERR_INFO;
17430		si.ssi_pfa_flag = TRUE;
17431	} else {
17432		SD_UPDATE_ERRSTATS(un, sd_softerrs);
17433		SD_UPDATE_ERRSTATS(un, sd_rq_recov_err);
17434		si.ssi_severity = SCSI_ERR_RECOVERED;
17435		si.ssi_pfa_flag = FALSE;
17436	}
17437
17438	if (pktp->pkt_resid == 0) {
17439		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17440		sd_return_command(un, bp);
17441		return;
17442	}
17443
17444	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17445	    &si, EIO, (clock_t)0, NULL);
17446}
17447
17448
17449
17450
17451/*
17452 *    Function: sd_sense_key_not_ready
17453 *
17454 * Description: Recovery actions for a SCSI "Not Ready" sense key.
17455 *
17456 *     Context: May be called from interrupt context
17457 */
17458
17459static void
17460sd_sense_key_not_ready(struct sd_lun *un,
17461	uint8_t *sense_datap,
17462	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17463{
17464	struct sd_sense_info	si;
17465	uint8_t asc = scsi_sense_asc(sense_datap);
17466	uint8_t ascq = scsi_sense_ascq(sense_datap);
17467
17468	ASSERT(un != NULL);
17469	ASSERT(mutex_owned(SD_MUTEX(un)));
17470	ASSERT(bp != NULL);
17471	ASSERT(xp != NULL);
17472	ASSERT(pktp != NULL);
17473
17474	si.ssi_severity = SCSI_ERR_FATAL;
17475	si.ssi_pfa_flag = FALSE;
17476
17477	/*
17478	 * Update error stats after first NOT READY error. Disks may have
17479	 * been powered down and may need to be restarted.  For CDROMs,
17480	 * report NOT READY errors only if media is present.
17481	 */
17482	if ((ISCD(un) && (asc == 0x3A)) ||
17483	    (xp->xb_nr_retry_count > 0)) {
17484		SD_UPDATE_ERRSTATS(un, sd_harderrs);
17485		SD_UPDATE_ERRSTATS(un, sd_rq_ntrdy_err);
17486	}
17487
17488	/*
17489	 * Just fail if the "not ready" retry limit has been reached.
17490	 */
17491	if (xp->xb_nr_retry_count >= un->un_notready_retry_count) {
17492		/* Special check for error message printing for removables. */
17493		if (un->un_f_has_removable_media && (asc == 0x04) &&
17494		    (ascq >= 0x04)) {
17495			si.ssi_severity = SCSI_ERR_ALL;
17496		}
17497		goto fail_command;
17498	}
17499
17500	/*
17501	 * Check the ASC and ASCQ in the sense data as needed, to determine
17502	 * what to do.
17503	 */
17504	switch (asc) {
17505	case 0x04:	/* LOGICAL UNIT NOT READY */
17506		/*
17507		 * disk drives that don't spin up result in a very long delay
17508		 * in format without warning messages. We will log a message
17509		 * if the error level is set to verbose.
17510		 */
17511		if (sd_error_level < SCSI_ERR_RETRYABLE) {
17512			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17513			    "logical unit not ready, resetting disk\n");
17514		}
17515
17516		/*
17517		 * There are different requirements for CDROMs and disks for
17518		 * the number of retries.  If a CD-ROM is giving this, it is
17519		 * probably reading TOC and is in the process of getting
17520		 * ready, so we should keep on trying for a long time to make
17521		 * sure that all types of media are taken in account (for
17522		 * some media the drive takes a long time to read TOC).  For
17523		 * disks we do not want to retry this too many times as this
17524		 * can cause a long hang in format when the drive refuses to
17525		 * spin up (a very common failure).
17526		 */
17527		switch (ascq) {
17528		case 0x00:  /* LUN NOT READY, CAUSE NOT REPORTABLE */
17529			/*
17530			 * Disk drives frequently refuse to spin up which
17531			 * results in a very long hang in format without
17532			 * warning messages.
17533			 *
17534			 * Note: This code preserves the legacy behavior of
17535			 * comparing xb_nr_retry_count against zero for fibre
17536			 * channel targets instead of comparing against the
17537			 * un_reset_retry_count value.  The reason for this
17538			 * discrepancy has been so utterly lost beneath the
17539			 * Sands of Time that even Indiana Jones could not
17540			 * find it.
17541			 */
17542			if (un->un_f_is_fibre == TRUE) {
17543				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
17544				    (xp->xb_nr_retry_count > 0)) &&
17545				    (un->un_startstop_timeid == NULL)) {
17546					scsi_log(SD_DEVINFO(un), sd_label,
17547					    CE_WARN, "logical unit not ready, "
17548					    "resetting disk\n");
17549					sd_reset_target(un, pktp);
17550				}
17551			} else {
17552				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
17553				    (xp->xb_nr_retry_count >
17554				    un->un_reset_retry_count)) &&
17555				    (un->un_startstop_timeid == NULL)) {
17556					scsi_log(SD_DEVINFO(un), sd_label,
17557					    CE_WARN, "logical unit not ready, "
17558					    "resetting disk\n");
17559					sd_reset_target(un, pktp);
17560				}
17561			}
17562			break;
17563
17564		case 0x01:  /* LUN IS IN PROCESS OF BECOMING READY */
17565			/*
17566			 * If the target is in the process of becoming
17567			 * ready, just proceed with the retry. This can
17568			 * happen with CD-ROMs that take a long time to
17569			 * read TOC after a power cycle or reset.
17570			 */
17571			goto do_retry;
17572
17573		case 0x02:  /* LUN NOT READY, INITITIALIZING CMD REQUIRED */
17574			break;
17575
17576		case 0x03:  /* LUN NOT READY, MANUAL INTERVENTION REQUIRED */
17577			/*
17578			 * Retries cannot help here so just fail right away.
17579			 */
17580			goto fail_command;
17581
17582		case 0x88:
17583			/*
17584			 * Vendor-unique code for T3/T4: it indicates a
17585			 * path problem in a mutipathed config, but as far as
17586			 * the target driver is concerned it equates to a fatal
17587			 * error, so we should just fail the command right away
17588			 * (without printing anything to the console). If this
17589			 * is not a T3/T4, fall thru to the default recovery
17590			 * action.
17591			 * T3/T4 is FC only, don't need to check is_fibre
17592			 */
17593			if (SD_IS_T3(un) || SD_IS_T4(un)) {
17594				sd_return_failed_command(un, bp, EIO);
17595				return;
17596			}
17597			/* FALLTHRU */
17598
17599		case 0x04:  /* LUN NOT READY, FORMAT IN PROGRESS */
17600		case 0x05:  /* LUN NOT READY, REBUILD IN PROGRESS */
17601		case 0x06:  /* LUN NOT READY, RECALCULATION IN PROGRESS */
17602		case 0x07:  /* LUN NOT READY, OPERATION IN PROGRESS */
17603		case 0x08:  /* LUN NOT READY, LONG WRITE IN PROGRESS */
17604		default:    /* Possible future codes in SCSI spec? */
17605			/*
17606			 * For removable-media devices, do not retry if
17607			 * ASCQ > 2 as these result mostly from USCSI commands
17608			 * on MMC devices issued to check status of an
17609			 * operation initiated in immediate mode.  Also for
17610			 * ASCQ >= 4 do not print console messages as these
17611			 * mainly represent a user-initiated operation
17612			 * instead of a system failure.
17613			 */
17614			if (un->un_f_has_removable_media) {
17615				si.ssi_severity = SCSI_ERR_ALL;
17616				goto fail_command;
17617			}
17618			break;
17619		}
17620
17621		/*
17622		 * As part of our recovery attempt for the NOT READY
17623		 * condition, we issue a START STOP UNIT command. However
17624		 * we want to wait for a short delay before attempting this
17625		 * as there may still be more commands coming back from the
17626		 * target with the check condition. To do this we use
17627		 * timeout(9F) to call sd_start_stop_unit_callback() after
17628		 * the delay interval expires. (sd_start_stop_unit_callback()
17629		 * dispatches sd_start_stop_unit_task(), which will issue
17630		 * the actual START STOP UNIT command. The delay interval
17631		 * is one-half of the delay that we will use to retry the
17632		 * command that generated the NOT READY condition.
17633		 *
17634		 * Note that we could just dispatch sd_start_stop_unit_task()
17635		 * from here and allow it to sleep for the delay interval,
17636		 * but then we would be tying up the taskq thread
17637		 * uncesessarily for the duration of the delay.
17638		 *
17639		 * Do not issue the START STOP UNIT if the current command
17640		 * is already a START STOP UNIT.
17641		 */
17642		if (pktp->pkt_cdbp[0] == SCMD_START_STOP) {
17643			break;
17644		}
17645
17646		/*
17647		 * Do not schedule the timeout if one is already pending.
17648		 */
17649		if (un->un_startstop_timeid != NULL) {
17650			SD_INFO(SD_LOG_ERROR, un,
17651			    "sd_sense_key_not_ready: restart already issued to"
17652			    " %s%d\n", ddi_driver_name(SD_DEVINFO(un)),
17653			    ddi_get_instance(SD_DEVINFO(un)));
17654			break;
17655		}
17656
17657		/*
17658		 * Schedule the START STOP UNIT command, then queue the command
17659		 * for a retry.
17660		 *
17661		 * Note: A timeout is not scheduled for this retry because we
17662		 * want the retry to be serial with the START_STOP_UNIT. The
17663		 * retry will be started when the START_STOP_UNIT is completed
17664		 * in sd_start_stop_unit_task.
17665		 */
17666		un->un_startstop_timeid = timeout(sd_start_stop_unit_callback,
17667		    un, un->un_busy_timeout / 2);
17668		xp->xb_nr_retry_count++;
17669		sd_set_retry_bp(un, bp, 0, kstat_waitq_enter);
17670		return;
17671
17672	case 0x05:	/* LOGICAL UNIT DOES NOT RESPOND TO SELECTION */
17673		if (sd_error_level < SCSI_ERR_RETRYABLE) {
17674			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17675			    "unit does not respond to selection\n");
17676		}
17677		break;
17678
17679	case 0x3A:	/* MEDIUM NOT PRESENT */
17680		if (sd_error_level >= SCSI_ERR_FATAL) {
17681			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17682			    "Caddy not inserted in drive\n");
17683		}
17684
17685		sr_ejected(un);
17686		un->un_mediastate = DKIO_EJECTED;
17687		/* The state has changed, inform the media watch routines */
17688		cv_broadcast(&un->un_state_cv);
17689		/* Just fail if no media is present in the drive. */
17690		goto fail_command;
17691
17692	default:
17693		if (sd_error_level < SCSI_ERR_RETRYABLE) {
17694			scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
17695			    "Unit not Ready. Additional sense code 0x%x\n",
17696			    asc);
17697		}
17698		break;
17699	}
17700
17701do_retry:
17702
17703	/*
17704	 * Retry the command, as some targets may report NOT READY for
17705	 * several seconds after being reset.
17706	 */
17707	xp->xb_nr_retry_count++;
17708	si.ssi_severity = SCSI_ERR_RETRYABLE;
17709	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
17710	    &si, EIO, un->un_busy_timeout, NULL);
17711
17712	return;
17713
17714fail_command:
17715	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17716	sd_return_failed_command(un, bp, EIO);
17717}
17718
17719
17720
17721/*
17722 *    Function: sd_sense_key_medium_or_hardware_error
17723 *
17724 * Description: Recovery actions for a SCSI "Medium Error" or "Hardware Error"
17725 *		sense key.
17726 *
17727 *     Context: May be called from interrupt context
17728 */
17729
17730static void
17731sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
17732	uint8_t *sense_datap,
17733	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17734{
17735	struct sd_sense_info	si;
17736	uint8_t sense_key = scsi_sense_key(sense_datap);
17737	uint8_t asc = scsi_sense_asc(sense_datap);
17738
17739	ASSERT(un != NULL);
17740	ASSERT(mutex_owned(SD_MUTEX(un)));
17741	ASSERT(bp != NULL);
17742	ASSERT(xp != NULL);
17743	ASSERT(pktp != NULL);
17744
17745	si.ssi_severity = SCSI_ERR_FATAL;
17746	si.ssi_pfa_flag = FALSE;
17747
17748	if (sense_key == KEY_MEDIUM_ERROR) {
17749		SD_UPDATE_ERRSTATS(un, sd_rq_media_err);
17750	}
17751
17752	SD_UPDATE_ERRSTATS(un, sd_harderrs);
17753
17754	if ((un->un_reset_retry_count != 0) &&
17755	    (xp->xb_retry_count == un->un_reset_retry_count)) {
17756		mutex_exit(SD_MUTEX(un));
17757		/* Do NOT do a RESET_ALL here: too intrusive. (4112858) */
17758		if (un->un_f_allow_bus_device_reset == TRUE) {
17759
17760			boolean_t try_resetting_target = B_TRUE;
17761
17762			/*
17763			 * We need to be able to handle specific ASC when we are
17764			 * handling a KEY_HARDWARE_ERROR. In particular
17765			 * taking the default action of resetting the target may
17766			 * not be the appropriate way to attempt recovery.
17767			 * Resetting a target because of a single LUN failure
17768			 * victimizes all LUNs on that target.
17769			 *
17770			 * This is true for the LSI arrays, if an LSI
17771			 * array controller returns an ASC of 0x84 (LUN Dead) we
17772			 * should trust it.
17773			 */
17774
17775			if (sense_key == KEY_HARDWARE_ERROR) {
17776				switch (asc) {
17777				case 0x84:
17778					if (SD_IS_LSI(un)) {
17779						try_resetting_target = B_FALSE;
17780					}
17781					break;
17782				default:
17783					break;
17784				}
17785			}
17786
17787			if (try_resetting_target == B_TRUE) {
17788				int reset_retval = 0;
17789				if (un->un_f_lun_reset_enabled == TRUE) {
17790					SD_TRACE(SD_LOG_IO_CORE, un,
17791					    "sd_sense_key_medium_or_hardware_"
17792					    "error: issuing RESET_LUN\n");
17793					reset_retval =
17794					    scsi_reset(SD_ADDRESS(un),
17795					    RESET_LUN);
17796				}
17797				if (reset_retval == 0) {
17798					SD_TRACE(SD_LOG_IO_CORE, un,
17799					    "sd_sense_key_medium_or_hardware_"
17800					    "error: issuing RESET_TARGET\n");
17801					(void) scsi_reset(SD_ADDRESS(un),
17802					    RESET_TARGET);
17803				}
17804			}
17805		}
17806		mutex_enter(SD_MUTEX(un));
17807	}
17808
17809	/*
17810	 * This really ought to be a fatal error, but we will retry anyway
17811	 * as some drives report this as a spurious error.
17812	 */
17813	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17814	    &si, EIO, (clock_t)0, NULL);
17815}
17816
17817
17818
17819/*
17820 *    Function: sd_sense_key_illegal_request
17821 *
17822 * Description: Recovery actions for a SCSI "Illegal Request" sense key.
17823 *
17824 *     Context: May be called from interrupt context
17825 */
17826
17827static void
17828sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
17829	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17830{
17831	struct sd_sense_info	si;
17832
17833	ASSERT(un != NULL);
17834	ASSERT(mutex_owned(SD_MUTEX(un)));
17835	ASSERT(bp != NULL);
17836	ASSERT(xp != NULL);
17837	ASSERT(pktp != NULL);
17838
17839	SD_UPDATE_ERRSTATS(un, sd_rq_illrq_err);
17840
17841	si.ssi_severity = SCSI_ERR_INFO;
17842	si.ssi_pfa_flag = FALSE;
17843
17844	/* Pointless to retry if the target thinks it's an illegal request */
17845	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17846	sd_return_failed_command(un, bp, EIO);
17847}
17848
17849
17850
17851
17852/*
17853 *    Function: sd_sense_key_unit_attention
17854 *
17855 * Description: Recovery actions for a SCSI "Unit Attention" sense key.
17856 *
17857 *     Context: May be called from interrupt context
17858 */
17859
17860static void
17861sd_sense_key_unit_attention(struct sd_lun *un,
17862	uint8_t *sense_datap,
17863	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17864{
17865	/*
17866	 * For UNIT ATTENTION we allow retries for one minute. Devices
17867	 * like Sonoma can return UNIT ATTENTION close to a minute
17868	 * under certain conditions.
17869	 */
17870	int	retry_check_flag = SD_RETRIES_UA;
17871	boolean_t	kstat_updated = B_FALSE;
17872	struct	sd_sense_info		si;
17873	uint8_t asc = scsi_sense_asc(sense_datap);
17874	uint8_t	ascq = scsi_sense_ascq(sense_datap);
17875
17876	ASSERT(un != NULL);
17877	ASSERT(mutex_owned(SD_MUTEX(un)));
17878	ASSERT(bp != NULL);
17879	ASSERT(xp != NULL);
17880	ASSERT(pktp != NULL);
17881
17882	si.ssi_severity = SCSI_ERR_INFO;
17883	si.ssi_pfa_flag = FALSE;
17884
17885
17886	switch (asc) {
17887	case 0x5D:  /* FAILURE PREDICTION THRESHOLD EXCEEDED */
17888		if (sd_report_pfa != 0) {
17889			SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
17890			si.ssi_pfa_flag = TRUE;
17891			retry_check_flag = SD_RETRIES_STANDARD;
17892			goto do_retry;
17893		}
17894
17895		break;
17896
17897	case 0x29:  /* POWER ON, RESET, OR BUS DEVICE RESET OCCURRED */
17898		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
17899			un->un_resvd_status |=
17900			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
17901		}
17902#ifdef _LP64
17903		if (un->un_blockcount + 1 > SD_GROUP1_MAX_ADDRESS) {
17904			if (taskq_dispatch(sd_tq, sd_reenable_dsense_task,
17905			    un, KM_NOSLEEP) == 0) {
17906				/*
17907				 * If we can't dispatch the task we'll just
17908				 * live without descriptor sense.  We can
17909				 * try again on the next "unit attention"
17910				 */
17911				SD_ERROR(SD_LOG_ERROR, un,
17912				    "sd_sense_key_unit_attention: "
17913				    "Could not dispatch "
17914				    "sd_reenable_dsense_task\n");
17915			}
17916		}
17917#endif /* _LP64 */
17918		/* FALLTHRU */
17919
17920	case 0x28: /* NOT READY TO READY CHANGE, MEDIUM MAY HAVE CHANGED */
17921		if (!un->un_f_has_removable_media) {
17922			break;
17923		}
17924
17925		/*
17926		 * When we get a unit attention from a removable-media device,
17927		 * it may be in a state that will take a long time to recover
17928		 * (e.g., from a reset).  Since we are executing in interrupt
17929		 * context here, we cannot wait around for the device to come
17930		 * back. So hand this command off to sd_media_change_task()
17931		 * for deferred processing under taskq thread context. (Note
17932		 * that the command still may be failed if a problem is
17933		 * encountered at a later time.)
17934		 */
17935		if (taskq_dispatch(sd_tq, sd_media_change_task, pktp,
17936		    KM_NOSLEEP) == 0) {
17937			/*
17938			 * Cannot dispatch the request so fail the command.
17939			 */
17940			SD_UPDATE_ERRSTATS(un, sd_harderrs);
17941			SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
17942			si.ssi_severity = SCSI_ERR_FATAL;
17943			sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17944			sd_return_failed_command(un, bp, EIO);
17945		}
17946
17947		/*
17948		 * If failed to dispatch sd_media_change_task(), we already
17949		 * updated kstat. If succeed to dispatch sd_media_change_task(),
17950		 * we should update kstat later if it encounters an error. So,
17951		 * we update kstat_updated flag here.
17952		 */
17953		kstat_updated = B_TRUE;
17954
17955		/*
17956		 * Either the command has been successfully dispatched to a
17957		 * task Q for retrying, or the dispatch failed. In either case
17958		 * do NOT retry again by calling sd_retry_command. This sets up
17959		 * two retries of the same command and when one completes and
17960		 * frees the resources the other will access freed memory,
17961		 * a bad thing.
17962		 */
17963		return;
17964
17965	default:
17966		break;
17967	}
17968
17969	/*
17970	 * ASC  ASCQ
17971	 *  2A   09	Capacity data has changed
17972	 *  2A   01	Mode parameters changed
17973	 *  3F   0E	Reported luns data has changed
17974	 * Arrays that support logical unit expansion should report
17975	 * capacity changes(2Ah/09). Mode parameters changed and
17976	 * reported luns data has changed are the approximation.
17977	 */
17978	if (((asc == 0x2a) && (ascq == 0x09)) ||
17979	    ((asc == 0x2a) && (ascq == 0x01)) ||
17980	    ((asc == 0x3f) && (ascq == 0x0e))) {
17981		if (taskq_dispatch(sd_tq, sd_target_change_task, un,
17982		    KM_NOSLEEP) == 0) {
17983			SD_ERROR(SD_LOG_ERROR, un,
17984			    "sd_sense_key_unit_attention: "
17985			    "Could not dispatch sd_target_change_task\n");
17986		}
17987	}
17988
17989	/*
17990	 * Update kstat if we haven't done that.
17991	 */
17992	if (!kstat_updated) {
17993		SD_UPDATE_ERRSTATS(un, sd_harderrs);
17994		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
17995	}
17996
17997do_retry:
17998	sd_retry_command(un, bp, retry_check_flag, sd_print_sense_msg, &si,
17999	    EIO, SD_UA_RETRY_DELAY, NULL);
18000}
18001
18002
18003
18004/*
18005 *    Function: sd_sense_key_fail_command
18006 *
18007 * Description: Use to fail a command when we don't like the sense key that
18008 *		was returned.
18009 *
18010 *     Context: May be called from interrupt context
18011 */
18012
18013static void
18014sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
18015	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18016{
18017	struct sd_sense_info	si;
18018
18019	ASSERT(un != NULL);
18020	ASSERT(mutex_owned(SD_MUTEX(un)));
18021	ASSERT(bp != NULL);
18022	ASSERT(xp != NULL);
18023	ASSERT(pktp != NULL);
18024
18025	si.ssi_severity = SCSI_ERR_FATAL;
18026	si.ssi_pfa_flag = FALSE;
18027
18028	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18029	sd_return_failed_command(un, bp, EIO);
18030}
18031
18032
18033
18034/*
18035 *    Function: sd_sense_key_blank_check
18036 *
18037 * Description: Recovery actions for a SCSI "Blank Check" sense key.
18038 *		Has no monetary connotation.
18039 *
18040 *     Context: May be called from interrupt context
18041 */
18042
18043static void
18044sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
18045	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18046{
18047	struct sd_sense_info	si;
18048
18049	ASSERT(un != NULL);
18050	ASSERT(mutex_owned(SD_MUTEX(un)));
18051	ASSERT(bp != NULL);
18052	ASSERT(xp != NULL);
18053	ASSERT(pktp != NULL);
18054
18055	/*
18056	 * Blank check is not fatal for removable devices, therefore
18057	 * it does not require a console message.
18058	 */
18059	si.ssi_severity = (un->un_f_has_removable_media) ? SCSI_ERR_ALL :
18060	    SCSI_ERR_FATAL;
18061	si.ssi_pfa_flag = FALSE;
18062
18063	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18064	sd_return_failed_command(un, bp, EIO);
18065}
18066
18067
18068
18069
18070/*
18071 *    Function: sd_sense_key_aborted_command
18072 *
18073 * Description: Recovery actions for a SCSI "Aborted Command" sense key.
18074 *
18075 *     Context: May be called from interrupt context
18076 */
18077
18078static void
18079sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
18080	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18081{
18082	struct sd_sense_info	si;
18083
18084	ASSERT(un != NULL);
18085	ASSERT(mutex_owned(SD_MUTEX(un)));
18086	ASSERT(bp != NULL);
18087	ASSERT(xp != NULL);
18088	ASSERT(pktp != NULL);
18089
18090	si.ssi_severity = SCSI_ERR_FATAL;
18091	si.ssi_pfa_flag = FALSE;
18092
18093	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18094
18095	/*
18096	 * This really ought to be a fatal error, but we will retry anyway
18097	 * as some drives report this as a spurious error.
18098	 */
18099	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
18100	    &si, EIO, drv_usectohz(100000), NULL);
18101}
18102
18103
18104
18105/*
18106 *    Function: sd_sense_key_default
18107 *
18108 * Description: Default recovery action for several SCSI sense keys (basically
18109 *		attempts a retry).
18110 *
18111 *     Context: May be called from interrupt context
18112 */
18113
18114static void
18115sd_sense_key_default(struct sd_lun *un,
18116	uint8_t *sense_datap,
18117	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
18118{
18119	struct sd_sense_info	si;
18120	uint8_t sense_key = scsi_sense_key(sense_datap);
18121
18122	ASSERT(un != NULL);
18123	ASSERT(mutex_owned(SD_MUTEX(un)));
18124	ASSERT(bp != NULL);
18125	ASSERT(xp != NULL);
18126	ASSERT(pktp != NULL);
18127
18128	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18129
18130	/*
18131	 * Undecoded sense key.	Attempt retries and hope that will fix
18132	 * the problem.  Otherwise, we're dead.
18133	 */
18134	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
18135		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18136		    "Unhandled Sense Key '%s'\n", sense_keys[sense_key]);
18137	}
18138
18139	si.ssi_severity = SCSI_ERR_FATAL;
18140	si.ssi_pfa_flag = FALSE;
18141
18142	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
18143	    &si, EIO, (clock_t)0, NULL);
18144}
18145
18146
18147
18148/*
18149 *    Function: sd_print_retry_msg
18150 *
18151 * Description: Print a message indicating the retry action being taken.
18152 *
18153 *   Arguments: un - ptr to associated softstate
18154 *		bp - ptr to buf(9S) for the command
18155 *		arg - not used.
18156 *		flag - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
18157 *			or SD_NO_RETRY_ISSUED
18158 *
18159 *     Context: May be called from interrupt context
18160 */
18161/* ARGSUSED */
18162static void
18163sd_print_retry_msg(struct sd_lun *un, struct buf *bp, void *arg, int flag)
18164{
18165	struct sd_xbuf	*xp;
18166	struct scsi_pkt *pktp;
18167	char *reasonp;
18168	char *msgp;
18169
18170	ASSERT(un != NULL);
18171	ASSERT(mutex_owned(SD_MUTEX(un)));
18172	ASSERT(bp != NULL);
18173	pktp = SD_GET_PKTP(bp);
18174	ASSERT(pktp != NULL);
18175	xp = SD_GET_XBUF(bp);
18176	ASSERT(xp != NULL);
18177
18178	ASSERT(!mutex_owned(&un->un_pm_mutex));
18179	mutex_enter(&un->un_pm_mutex);
18180	if ((un->un_state == SD_STATE_SUSPENDED) ||
18181	    (SD_DEVICE_IS_IN_LOW_POWER(un)) ||
18182	    (pktp->pkt_flags & FLAG_SILENT)) {
18183		mutex_exit(&un->un_pm_mutex);
18184		goto update_pkt_reason;
18185	}
18186	mutex_exit(&un->un_pm_mutex);
18187
18188	/*
18189	 * Suppress messages if they are all the same pkt_reason; with
18190	 * TQ, many (up to 256) are returned with the same pkt_reason.
18191	 * If we are in panic, then suppress the retry messages.
18192	 */
18193	switch (flag) {
18194	case SD_NO_RETRY_ISSUED:
18195		msgp = "giving up";
18196		break;
18197	case SD_IMMEDIATE_RETRY_ISSUED:
18198	case SD_DELAYED_RETRY_ISSUED:
18199		if (ddi_in_panic() || (un->un_state == SD_STATE_OFFLINE) ||
18200		    ((pktp->pkt_reason == un->un_last_pkt_reason) &&
18201		    (sd_error_level != SCSI_ERR_ALL))) {
18202			return;
18203		}
18204		msgp = "retrying command";
18205		break;
18206	default:
18207		goto update_pkt_reason;
18208	}
18209
18210	reasonp = (((pktp->pkt_statistics & STAT_PERR) != 0) ? "parity error" :
18211	    scsi_rname(pktp->pkt_reason));
18212
18213	if (SD_FM_LOG(un) == SD_FM_LOG_NSUP) {
18214		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18215		    "SCSI transport failed: reason '%s': %s\n", reasonp, msgp);
18216	}
18217
18218update_pkt_reason:
18219	/*
18220	 * Update un->un_last_pkt_reason with the value in pktp->pkt_reason.
18221	 * This is to prevent multiple console messages for the same failure
18222	 * condition.  Note that un->un_last_pkt_reason is NOT restored if &
18223	 * when the command is retried successfully because there still may be
18224	 * more commands coming back with the same value of pktp->pkt_reason.
18225	 */
18226	if ((pktp->pkt_reason != CMD_CMPLT) || (xp->xb_retry_count == 0)) {
18227		un->un_last_pkt_reason = pktp->pkt_reason;
18228	}
18229}
18230
18231
18232/*
18233 *    Function: sd_print_cmd_incomplete_msg
18234 *
18235 * Description: Message logging fn. for a SCSA "CMD_INCOMPLETE" pkt_reason.
18236 *
18237 *   Arguments: un - ptr to associated softstate
18238 *		bp - ptr to buf(9S) for the command
18239 *		arg - passed to sd_print_retry_msg()
18240 *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
18241 *			or SD_NO_RETRY_ISSUED
18242 *
18243 *     Context: May be called from interrupt context
18244 */
18245
18246static void
18247sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg,
18248	int code)
18249{
18250	dev_info_t	*dip;
18251
18252	ASSERT(un != NULL);
18253	ASSERT(mutex_owned(SD_MUTEX(un)));
18254	ASSERT(bp != NULL);
18255
18256	switch (code) {
18257	case SD_NO_RETRY_ISSUED:
18258		/* Command was failed. Someone turned off this target? */
18259		if (un->un_state != SD_STATE_OFFLINE) {
18260			/*
18261			 * Suppress message if we are detaching and
18262			 * device has been disconnected
18263			 * Note that DEVI_IS_DEVICE_REMOVED is a consolidation
18264			 * private interface and not part of the DDI
18265			 */
18266			dip = un->un_sd->sd_dev;
18267			if (!(DEVI_IS_DETACHING(dip) &&
18268			    DEVI_IS_DEVICE_REMOVED(dip))) {
18269				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18270				"disk not responding to selection\n");
18271			}
18272			New_state(un, SD_STATE_OFFLINE);
18273		}
18274		break;
18275
18276	case SD_DELAYED_RETRY_ISSUED:
18277	case SD_IMMEDIATE_RETRY_ISSUED:
18278	default:
18279		/* Command was successfully queued for retry */
18280		sd_print_retry_msg(un, bp, arg, code);
18281		break;
18282	}
18283}
18284
18285
18286/*
18287 *    Function: sd_pkt_reason_cmd_incomplete
18288 *
18289 * Description: Recovery actions for a SCSA "CMD_INCOMPLETE" pkt_reason.
18290 *
18291 *     Context: May be called from interrupt context
18292 */
18293
18294static void
18295sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
18296	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18297{
18298	int flag = SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE;
18299
18300	ASSERT(un != NULL);
18301	ASSERT(mutex_owned(SD_MUTEX(un)));
18302	ASSERT(bp != NULL);
18303	ASSERT(xp != NULL);
18304	ASSERT(pktp != NULL);
18305
18306	/* Do not do a reset if selection did not complete */
18307	/* Note: Should this not just check the bit? */
18308	if (pktp->pkt_state != STATE_GOT_BUS) {
18309		SD_UPDATE_ERRSTATS(un, sd_transerrs);
18310		sd_reset_target(un, pktp);
18311	}
18312
18313	/*
18314	 * If the target was not successfully selected, then set
18315	 * SD_RETRIES_FAILFAST to indicate that we lost communication
18316	 * with the target, and further retries and/or commands are
18317	 * likely to take a long time.
18318	 */
18319	if ((pktp->pkt_state & STATE_GOT_TARGET) == 0) {
18320		flag |= SD_RETRIES_FAILFAST;
18321	}
18322
18323	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18324
18325	sd_retry_command(un, bp, flag,
18326	    sd_print_cmd_incomplete_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18327}
18328
18329
18330
18331/*
18332 *    Function: sd_pkt_reason_cmd_tran_err
18333 *
18334 * Description: Recovery actions for a SCSA "CMD_TRAN_ERR" pkt_reason.
18335 *
18336 *     Context: May be called from interrupt context
18337 */
18338
18339static void
18340sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
18341	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18342{
18343	ASSERT(un != NULL);
18344	ASSERT(mutex_owned(SD_MUTEX(un)));
18345	ASSERT(bp != NULL);
18346	ASSERT(xp != NULL);
18347	ASSERT(pktp != NULL);
18348
18349	/*
18350	 * Do not reset if we got a parity error, or if
18351	 * selection did not complete.
18352	 */
18353	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18354	/* Note: Should this not just check the bit for pkt_state? */
18355	if (((pktp->pkt_statistics & STAT_PERR) == 0) &&
18356	    (pktp->pkt_state != STATE_GOT_BUS)) {
18357		SD_UPDATE_ERRSTATS(un, sd_transerrs);
18358		sd_reset_target(un, pktp);
18359	}
18360
18361	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18362
18363	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18364	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18365}
18366
18367
18368
18369/*
18370 *    Function: sd_pkt_reason_cmd_reset
18371 *
18372 * Description: Recovery actions for a SCSA "CMD_RESET" pkt_reason.
18373 *
18374 *     Context: May be called from interrupt context
18375 */
18376
18377static void
18378sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
18379	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18380{
18381	ASSERT(un != NULL);
18382	ASSERT(mutex_owned(SD_MUTEX(un)));
18383	ASSERT(bp != NULL);
18384	ASSERT(xp != NULL);
18385	ASSERT(pktp != NULL);
18386
18387	/* The target may still be running the command, so try to reset. */
18388	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18389	sd_reset_target(un, pktp);
18390
18391	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18392
18393	/*
18394	 * If pkt_reason is CMD_RESET chances are that this pkt got
18395	 * reset because another target on this bus caused it. The target
18396	 * that caused it should get CMD_TIMEOUT with pkt_statistics
18397	 * of STAT_TIMEOUT/STAT_DEV_RESET.
18398	 */
18399
18400	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
18401	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18402}
18403
18404
18405
18406
18407/*
18408 *    Function: sd_pkt_reason_cmd_aborted
18409 *
18410 * Description: Recovery actions for a SCSA "CMD_ABORTED" pkt_reason.
18411 *
18412 *     Context: May be called from interrupt context
18413 */
18414
18415static void
18416sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
18417	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18418{
18419	ASSERT(un != NULL);
18420	ASSERT(mutex_owned(SD_MUTEX(un)));
18421	ASSERT(bp != NULL);
18422	ASSERT(xp != NULL);
18423	ASSERT(pktp != NULL);
18424
18425	/* The target may still be running the command, so try to reset. */
18426	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18427	sd_reset_target(un, pktp);
18428
18429	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18430
18431	/*
18432	 * If pkt_reason is CMD_ABORTED chances are that this pkt got
18433	 * aborted because another target on this bus caused it. The target
18434	 * that caused it should get CMD_TIMEOUT with pkt_statistics
18435	 * of STAT_TIMEOUT/STAT_DEV_RESET.
18436	 */
18437
18438	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
18439	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18440}
18441
18442
18443
18444/*
18445 *    Function: sd_pkt_reason_cmd_timeout
18446 *
18447 * Description: Recovery actions for a SCSA "CMD_TIMEOUT" pkt_reason.
18448 *
18449 *     Context: May be called from interrupt context
18450 */
18451
18452static void
18453sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
18454	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18455{
18456	ASSERT(un != NULL);
18457	ASSERT(mutex_owned(SD_MUTEX(un)));
18458	ASSERT(bp != NULL);
18459	ASSERT(xp != NULL);
18460	ASSERT(pktp != NULL);
18461
18462
18463	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18464	sd_reset_target(un, pktp);
18465
18466	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18467
18468	/*
18469	 * A command timeout indicates that we could not establish
18470	 * communication with the target, so set SD_RETRIES_FAILFAST
18471	 * as further retries/commands are likely to take a long time.
18472	 */
18473	sd_retry_command(un, bp,
18474	    (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE | SD_RETRIES_FAILFAST),
18475	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18476}
18477
18478
18479
18480/*
18481 *    Function: sd_pkt_reason_cmd_unx_bus_free
18482 *
18483 * Description: Recovery actions for a SCSA "CMD_UNX_BUS_FREE" pkt_reason.
18484 *
18485 *     Context: May be called from interrupt context
18486 */
18487
18488static void
18489sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
18490	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18491{
18492	void (*funcp)(struct sd_lun *un, struct buf *bp, void *arg, int code);
18493
18494	ASSERT(un != NULL);
18495	ASSERT(mutex_owned(SD_MUTEX(un)));
18496	ASSERT(bp != NULL);
18497	ASSERT(xp != NULL);
18498	ASSERT(pktp != NULL);
18499
18500	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18501	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18502
18503	funcp = ((pktp->pkt_statistics & STAT_PERR) == 0) ?
18504	    sd_print_retry_msg : NULL;
18505
18506	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18507	    funcp, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18508}
18509
18510
18511/*
18512 *    Function: sd_pkt_reason_cmd_tag_reject
18513 *
18514 * Description: Recovery actions for a SCSA "CMD_TAG_REJECT" pkt_reason.
18515 *
18516 *     Context: May be called from interrupt context
18517 */
18518
18519static void
18520sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
18521	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18522{
18523	ASSERT(un != NULL);
18524	ASSERT(mutex_owned(SD_MUTEX(un)));
18525	ASSERT(bp != NULL);
18526	ASSERT(xp != NULL);
18527	ASSERT(pktp != NULL);
18528
18529	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18530	pktp->pkt_flags = 0;
18531	un->un_tagflags = 0;
18532	if (un->un_f_opt_queueing == TRUE) {
18533		un->un_throttle = min(un->un_throttle, 3);
18534	} else {
18535		un->un_throttle = 1;
18536	}
18537	mutex_exit(SD_MUTEX(un));
18538	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
18539	mutex_enter(SD_MUTEX(un));
18540
18541	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18542
18543	/* Legacy behavior not to check retry counts here. */
18544	sd_retry_command(un, bp, (SD_RETRIES_NOCHECK | SD_RETRIES_ISOLATE),
18545	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18546}
18547
18548
18549/*
18550 *    Function: sd_pkt_reason_default
18551 *
18552 * Description: Default recovery actions for SCSA pkt_reason values that
18553 *		do not have more explicit recovery actions.
18554 *
18555 *     Context: May be called from interrupt context
18556 */
18557
18558static void
18559sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
18560	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18561{
18562	ASSERT(un != NULL);
18563	ASSERT(mutex_owned(SD_MUTEX(un)));
18564	ASSERT(bp != NULL);
18565	ASSERT(xp != NULL);
18566	ASSERT(pktp != NULL);
18567
18568	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18569	sd_reset_target(un, pktp);
18570
18571	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18572
18573	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18574	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18575}
18576
18577
18578
18579/*
18580 *    Function: sd_pkt_status_check_condition
18581 *
18582 * Description: Recovery actions for a "STATUS_CHECK" SCSI command status.
18583 *
18584 *     Context: May be called from interrupt context
18585 */
18586
18587static void
18588sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
18589	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18590{
18591	ASSERT(un != NULL);
18592	ASSERT(mutex_owned(SD_MUTEX(un)));
18593	ASSERT(bp != NULL);
18594	ASSERT(xp != NULL);
18595	ASSERT(pktp != NULL);
18596
18597	SD_TRACE(SD_LOG_IO, un, "sd_pkt_status_check_condition: "
18598	    "entry: buf:0x%p xp:0x%p\n", bp, xp);
18599
18600	/*
18601	 * If ARQ is NOT enabled, then issue a REQUEST SENSE command (the
18602	 * command will be retried after the request sense). Otherwise, retry
18603	 * the command. Note: we are issuing the request sense even though the
18604	 * retry limit may have been reached for the failed command.
18605	 */
18606	if (un->un_f_arq_enabled == FALSE) {
18607		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
18608		    "no ARQ, sending request sense command\n");
18609		sd_send_request_sense_command(un, bp, pktp);
18610	} else {
18611		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
18612		    "ARQ,retrying request sense command\n");
18613#if defined(__i386) || defined(__amd64)
18614		/*
18615		 * The SD_RETRY_DELAY value need to be adjusted here
18616		 * when SD_RETRY_DELAY change in sddef.h
18617		 */
18618		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
18619		    un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0,
18620		    NULL);
18621#else
18622		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL,
18623		    EIO, SD_RETRY_DELAY, NULL);
18624#endif
18625	}
18626
18627	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: exit\n");
18628}
18629
18630
18631/*
18632 *    Function: sd_pkt_status_busy
18633 *
18634 * Description: Recovery actions for a "STATUS_BUSY" SCSI command status.
18635 *
18636 *     Context: May be called from interrupt context
18637 */
18638
18639static void
18640sd_pkt_status_busy(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
18641	struct scsi_pkt *pktp)
18642{
18643	ASSERT(un != NULL);
18644	ASSERT(mutex_owned(SD_MUTEX(un)));
18645	ASSERT(bp != NULL);
18646	ASSERT(xp != NULL);
18647	ASSERT(pktp != NULL);
18648
18649	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18650	    "sd_pkt_status_busy: entry\n");
18651
18652	/* If retries are exhausted, just fail the command. */
18653	if (xp->xb_retry_count >= un->un_busy_retry_count) {
18654		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18655		    "device busy too long\n");
18656		sd_return_failed_command(un, bp, EIO);
18657		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18658		    "sd_pkt_status_busy: exit\n");
18659		return;
18660	}
18661	xp->xb_retry_count++;
18662
18663	/*
18664	 * Try to reset the target. However, we do not want to perform
18665	 * more than one reset if the device continues to fail. The reset
18666	 * will be performed when the retry count reaches the reset
18667	 * threshold.  This threshold should be set such that at least
18668	 * one retry is issued before the reset is performed.
18669	 */
18670	if (xp->xb_retry_count ==
18671	    ((un->un_reset_retry_count < 2) ? 2 : un->un_reset_retry_count)) {
18672		int rval = 0;
18673		mutex_exit(SD_MUTEX(un));
18674		if (un->un_f_allow_bus_device_reset == TRUE) {
18675			/*
18676			 * First try to reset the LUN; if we cannot then
18677			 * try to reset the target.
18678			 */
18679			if (un->un_f_lun_reset_enabled == TRUE) {
18680				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18681				    "sd_pkt_status_busy: RESET_LUN\n");
18682				rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
18683			}
18684			if (rval == 0) {
18685				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18686				    "sd_pkt_status_busy: RESET_TARGET\n");
18687				rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
18688			}
18689		}
18690		if (rval == 0) {
18691			/*
18692			 * If the RESET_LUN and/or RESET_TARGET failed,
18693			 * try RESET_ALL
18694			 */
18695			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18696			    "sd_pkt_status_busy: RESET_ALL\n");
18697			rval = scsi_reset(SD_ADDRESS(un), RESET_ALL);
18698		}
18699		mutex_enter(SD_MUTEX(un));
18700		if (rval == 0) {
18701			/*
18702			 * The RESET_LUN, RESET_TARGET, and/or RESET_ALL failed.
18703			 * At this point we give up & fail the command.
18704			 */
18705			sd_return_failed_command(un, bp, EIO);
18706			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18707			    "sd_pkt_status_busy: exit (failed cmd)\n");
18708			return;
18709		}
18710	}
18711
18712	/*
18713	 * Retry the command. Be sure to specify SD_RETRIES_NOCHECK as
18714	 * we have already checked the retry counts above.
18715	 */
18716	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL,
18717	    EIO, un->un_busy_timeout, NULL);
18718
18719	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18720	    "sd_pkt_status_busy: exit\n");
18721}
18722
18723
18724/*
18725 *    Function: sd_pkt_status_reservation_conflict
18726 *
18727 * Description: Recovery actions for a "STATUS_RESERVATION_CONFLICT" SCSI
18728 *		command status.
18729 *
18730 *     Context: May be called from interrupt context
18731 */
18732
18733static void
18734sd_pkt_status_reservation_conflict(struct sd_lun *un, struct buf *bp,
18735	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18736{
18737	ASSERT(un != NULL);
18738	ASSERT(mutex_owned(SD_MUTEX(un)));
18739	ASSERT(bp != NULL);
18740	ASSERT(xp != NULL);
18741	ASSERT(pktp != NULL);
18742
18743	/*
18744	 * If the command was PERSISTENT_RESERVATION_[IN|OUT] then reservation
18745	 * conflict could be due to various reasons like incorrect keys, not
18746	 * registered or not reserved etc. So, we return EACCES to the caller.
18747	 */
18748	if (un->un_reservation_type == SD_SCSI3_RESERVATION) {
18749		int cmd = SD_GET_PKT_OPCODE(pktp);
18750		if ((cmd == SCMD_PERSISTENT_RESERVE_IN) ||
18751		    (cmd == SCMD_PERSISTENT_RESERVE_OUT)) {
18752			sd_return_failed_command(un, bp, EACCES);
18753			return;
18754		}
18755	}
18756
18757	un->un_resvd_status |= SD_RESERVATION_CONFLICT;
18758
18759	if ((un->un_resvd_status & SD_FAILFAST) != 0) {
18760		if (sd_failfast_enable != 0) {
18761			/* By definition, we must panic here.... */
18762			sd_panic_for_res_conflict(un);
18763			/*NOTREACHED*/
18764		}
18765		SD_ERROR(SD_LOG_IO, un,
18766		    "sd_handle_resv_conflict: Disk Reserved\n");
18767		sd_return_failed_command(un, bp, EACCES);
18768		return;
18769	}
18770
18771	/*
18772	 * 1147670: retry only if sd_retry_on_reservation_conflict
18773	 * property is set (default is 1). Retries will not succeed
18774	 * on a disk reserved by another initiator. HA systems
18775	 * may reset this via sd.conf to avoid these retries.
18776	 *
18777	 * Note: The legacy return code for this failure is EIO, however EACCES
18778	 * seems more appropriate for a reservation conflict.
18779	 */
18780	if (sd_retry_on_reservation_conflict == 0) {
18781		SD_ERROR(SD_LOG_IO, un,
18782		    "sd_handle_resv_conflict: Device Reserved\n");
18783		sd_return_failed_command(un, bp, EIO);
18784		return;
18785	}
18786
18787	/*
18788	 * Retry the command if we can.
18789	 *
18790	 * Note: The legacy return code for this failure is EIO, however EACCES
18791	 * seems more appropriate for a reservation conflict.
18792	 */
18793	sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
18794	    (clock_t)2, NULL);
18795}
18796
18797
18798
18799/*
18800 *    Function: sd_pkt_status_qfull
18801 *
18802 * Description: Handle a QUEUE FULL condition from the target.  This can
18803 *		occur if the HBA does not handle the queue full condition.
18804 *		(Basically this means third-party HBAs as Sun HBAs will
18805 *		handle the queue full condition.)  Note that if there are
18806 *		some commands already in the transport, then the queue full
18807 *		has occurred because the queue for this nexus is actually
18808 *		full. If there are no commands in the transport, then the
18809 *		queue full is resulting from some other initiator or lun
18810 *		consuming all the resources at the target.
18811 *
18812 *     Context: May be called from interrupt context
18813 */
18814
18815static void
18816sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
18817	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18818{
18819	ASSERT(un != NULL);
18820	ASSERT(mutex_owned(SD_MUTEX(un)));
18821	ASSERT(bp != NULL);
18822	ASSERT(xp != NULL);
18823	ASSERT(pktp != NULL);
18824
18825	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18826	    "sd_pkt_status_qfull: entry\n");
18827
18828	/*
18829	 * Just lower the QFULL throttle and retry the command.  Note that
18830	 * we do not limit the number of retries here.
18831	 */
18832	sd_reduce_throttle(un, SD_THROTTLE_QFULL);
18833	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL, 0,
18834	    SD_RESTART_TIMEOUT, NULL);
18835
18836	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18837	    "sd_pkt_status_qfull: exit\n");
18838}
18839
18840
18841/*
18842 *    Function: sd_reset_target
18843 *
18844 * Description: Issue a scsi_reset(9F), with either RESET_LUN,
18845 *		RESET_TARGET, or RESET_ALL.
18846 *
18847 *     Context: May be called under interrupt context.
18848 */
18849
18850static void
18851sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp)
18852{
18853	int rval = 0;
18854
18855	ASSERT(un != NULL);
18856	ASSERT(mutex_owned(SD_MUTEX(un)));
18857	ASSERT(pktp != NULL);
18858
18859	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: entry\n");
18860
18861	/*
18862	 * No need to reset if the transport layer has already done so.
18863	 */
18864	if ((pktp->pkt_statistics &
18865	    (STAT_BUS_RESET | STAT_DEV_RESET | STAT_ABORTED)) != 0) {
18866		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18867		    "sd_reset_target: no reset\n");
18868		return;
18869	}
18870
18871	mutex_exit(SD_MUTEX(un));
18872
18873	if (un->un_f_allow_bus_device_reset == TRUE) {
18874		if (un->un_f_lun_reset_enabled == TRUE) {
18875			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18876			    "sd_reset_target: RESET_LUN\n");
18877			rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
18878		}
18879		if (rval == 0) {
18880			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18881			    "sd_reset_target: RESET_TARGET\n");
18882			rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
18883		}
18884	}
18885
18886	if (rval == 0) {
18887		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18888		    "sd_reset_target: RESET_ALL\n");
18889		(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
18890	}
18891
18892	mutex_enter(SD_MUTEX(un));
18893
18894	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: exit\n");
18895}
18896
18897/*
18898 *    Function: sd_target_change_task
18899 *
18900 * Description: Handle dynamic target change
18901 *
18902 *     Context: Executes in a taskq() thread context
18903 */
18904static void
18905sd_target_change_task(void *arg)
18906{
18907	struct sd_lun		*un = arg;
18908	uint64_t		capacity;
18909	diskaddr_t		label_cap;
18910	uint_t			lbasize;
18911	sd_ssc_t		*ssc;
18912
18913	ASSERT(un != NULL);
18914	ASSERT(!mutex_owned(SD_MUTEX(un)));
18915
18916	if ((un->un_f_blockcount_is_valid == FALSE) ||
18917	    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
18918		return;
18919	}
18920
18921	ssc = sd_ssc_init(un);
18922
18923	if (sd_send_scsi_READ_CAPACITY(ssc, &capacity,
18924	    &lbasize, SD_PATH_DIRECT) != 0) {
18925		SD_ERROR(SD_LOG_ERROR, un,
18926		    "sd_target_change_task: fail to read capacity\n");
18927		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
18928		goto task_exit;
18929	}
18930
18931	mutex_enter(SD_MUTEX(un));
18932	if (capacity <= un->un_blockcount) {
18933		mutex_exit(SD_MUTEX(un));
18934		goto task_exit;
18935	}
18936
18937	sd_update_block_info(un, lbasize, capacity);
18938	mutex_exit(SD_MUTEX(un));
18939
18940	/*
18941	 * If lun is EFI labeled and lun capacity is greater than the
18942	 * capacity contained in the label, log a sys event.
18943	 */
18944	if (cmlb_efi_label_capacity(un->un_cmlbhandle, &label_cap,
18945	    (void*)SD_PATH_DIRECT) == 0) {
18946		mutex_enter(SD_MUTEX(un));
18947		if (un->un_f_blockcount_is_valid &&
18948		    un->un_blockcount > label_cap) {
18949			mutex_exit(SD_MUTEX(un));
18950			sd_log_lun_expansion_event(un, KM_SLEEP);
18951		} else {
18952			mutex_exit(SD_MUTEX(un));
18953		}
18954	}
18955
18956task_exit:
18957	sd_ssc_fini(ssc);
18958}
18959
18960/*
18961 *    Function: sd_log_lun_expansion_event
18962 *
18963 * Description: Log lun expansion sys event
18964 *
18965 *     Context: Never called from interrupt context
18966 */
18967static void
18968sd_log_lun_expansion_event(struct sd_lun *un, int km_flag)
18969{
18970	int err;
18971	char			*path;
18972	nvlist_t		*dle_attr_list;
18973
18974	/* Allocate and build sysevent attribute list */
18975	err = nvlist_alloc(&dle_attr_list, NV_UNIQUE_NAME_TYPE, km_flag);
18976	if (err != 0) {
18977		SD_ERROR(SD_LOG_ERROR, un,
18978		    "sd_log_lun_expansion_event: fail to allocate space\n");
18979		return;
18980	}
18981
18982	path = kmem_alloc(MAXPATHLEN, km_flag);
18983	if (path == NULL) {
18984		nvlist_free(dle_attr_list);
18985		SD_ERROR(SD_LOG_ERROR, un,
18986		    "sd_log_lun_expansion_event: fail to allocate space\n");
18987		return;
18988	}
18989	/*
18990	 * Add path attribute to identify the lun.
18991	 * We are using minor node 'a' as the sysevent attribute.
18992	 */
18993	(void) snprintf(path, MAXPATHLEN, "/devices");
18994	(void) ddi_pathname(SD_DEVINFO(un), path + strlen(path));
18995	(void) snprintf(path + strlen(path), MAXPATHLEN - strlen(path),
18996	    ":a");
18997
18998	err = nvlist_add_string(dle_attr_list, DEV_PHYS_PATH, path);
18999	if (err != 0) {
19000		nvlist_free(dle_attr_list);
19001		kmem_free(path, MAXPATHLEN);
19002		SD_ERROR(SD_LOG_ERROR, un,
19003		    "sd_log_lun_expansion_event: fail to add attribute\n");
19004		return;
19005	}
19006
19007	/* Log dynamic lun expansion sysevent */
19008	err = ddi_log_sysevent(SD_DEVINFO(un), SUNW_VENDOR, EC_DEV_STATUS,
19009	    ESC_DEV_DLE, dle_attr_list, NULL, km_flag);
19010	if (err != DDI_SUCCESS) {
19011		SD_ERROR(SD_LOG_ERROR, un,
19012		    "sd_log_lun_expansion_event: fail to log sysevent\n");
19013	}
19014
19015	nvlist_free(dle_attr_list);
19016	kmem_free(path, MAXPATHLEN);
19017}
19018
19019/*
19020 *    Function: sd_media_change_task
19021 *
19022 * Description: Recovery action for CDROM to become available.
19023 *
19024 *     Context: Executes in a taskq() thread context
19025 */
19026
19027static void
19028sd_media_change_task(void *arg)
19029{
19030	struct	scsi_pkt	*pktp = arg;
19031	struct	sd_lun		*un;
19032	struct	buf		*bp;
19033	struct	sd_xbuf		*xp;
19034	int	err		= 0;
19035	int	retry_count	= 0;
19036	int	retry_limit	= SD_UNIT_ATTENTION_RETRY/10;
19037	struct	sd_sense_info	si;
19038
19039	ASSERT(pktp != NULL);
19040	bp = (struct buf *)pktp->pkt_private;
19041	ASSERT(bp != NULL);
19042	xp = SD_GET_XBUF(bp);
19043	ASSERT(xp != NULL);
19044	un = SD_GET_UN(bp);
19045	ASSERT(un != NULL);
19046	ASSERT(!mutex_owned(SD_MUTEX(un)));
19047	ASSERT(un->un_f_monitor_media_state);
19048
19049	si.ssi_severity = SCSI_ERR_INFO;
19050	si.ssi_pfa_flag = FALSE;
19051
19052	/*
19053	 * When a reset is issued on a CDROM, it takes a long time to
19054	 * recover. First few attempts to read capacity and other things
19055	 * related to handling unit attention fail (with a ASC 0x4 and
19056	 * ASCQ 0x1). In that case we want to do enough retries and we want
19057	 * to limit the retries in other cases of genuine failures like
19058	 * no media in drive.
19059	 */
19060	while (retry_count++ < retry_limit) {
19061		if ((err = sd_handle_mchange(un)) == 0) {
19062			break;
19063		}
19064		if (err == EAGAIN) {
19065			retry_limit = SD_UNIT_ATTENTION_RETRY;
19066		}
19067		/* Sleep for 0.5 sec. & try again */
19068		delay(drv_usectohz(500000));
19069	}
19070
19071	/*
19072	 * Dispatch (retry or fail) the original command here,
19073	 * along with appropriate console messages....
19074	 *
19075	 * Must grab the mutex before calling sd_retry_command,
19076	 * sd_print_sense_msg and sd_return_failed_command.
19077	 */
19078	mutex_enter(SD_MUTEX(un));
19079	if (err != SD_CMD_SUCCESS) {
19080		SD_UPDATE_ERRSTATS(un, sd_harderrs);
19081		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
19082		si.ssi_severity = SCSI_ERR_FATAL;
19083		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
19084		sd_return_failed_command(un, bp, EIO);
19085	} else {
19086		sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
19087		    &si, EIO, (clock_t)0, NULL);
19088	}
19089	mutex_exit(SD_MUTEX(un));
19090}
19091
19092
19093
19094/*
19095 *    Function: sd_handle_mchange
19096 *
19097 * Description: Perform geometry validation & other recovery when CDROM
19098 *		has been removed from drive.
19099 *
19100 * Return Code: 0 for success
19101 *		errno-type return code of either sd_send_scsi_DOORLOCK() or
19102 *		sd_send_scsi_READ_CAPACITY()
19103 *
19104 *     Context: Executes in a taskq() thread context
19105 */
19106
19107static int
19108sd_handle_mchange(struct sd_lun *un)
19109{
19110	uint64_t	capacity;
19111	uint32_t	lbasize;
19112	int		rval;
19113	sd_ssc_t	*ssc;
19114
19115	ASSERT(!mutex_owned(SD_MUTEX(un)));
19116	ASSERT(un->un_f_monitor_media_state);
19117
19118	ssc = sd_ssc_init(un);
19119	rval = sd_send_scsi_READ_CAPACITY(ssc, &capacity, &lbasize,
19120	    SD_PATH_DIRECT_PRIORITY);
19121
19122	if (rval != 0)
19123		goto failed;
19124
19125	mutex_enter(SD_MUTEX(un));
19126	sd_update_block_info(un, lbasize, capacity);
19127
19128	if (un->un_errstats != NULL) {
19129		struct	sd_errstats *stp =
19130		    (struct sd_errstats *)un->un_errstats->ks_data;
19131		stp->sd_capacity.value.ui64 = (uint64_t)
19132		    ((uint64_t)un->un_blockcount *
19133		    (uint64_t)un->un_tgt_blocksize);
19134	}
19135
19136	/*
19137	 * Check if the media in the device is writable or not
19138	 */
19139	if (ISCD(un)) {
19140		sd_check_for_writable_cd(ssc, SD_PATH_DIRECT_PRIORITY);
19141	}
19142
19143	/*
19144	 * Note: Maybe let the strategy/partitioning chain worry about getting
19145	 * valid geometry.
19146	 */
19147	mutex_exit(SD_MUTEX(un));
19148	cmlb_invalidate(un->un_cmlbhandle, (void *)SD_PATH_DIRECT_PRIORITY);
19149
19150
19151	if (cmlb_validate(un->un_cmlbhandle, 0,
19152	    (void *)SD_PATH_DIRECT_PRIORITY) != 0) {
19153		sd_ssc_fini(ssc);
19154		return (EIO);
19155	} else {
19156		if (un->un_f_pkstats_enabled) {
19157			sd_set_pstats(un);
19158			SD_TRACE(SD_LOG_IO_PARTITION, un,
19159			    "sd_handle_mchange: un:0x%p pstats created and "
19160			    "set\n", un);
19161		}
19162	}
19163
19164	/*
19165	 * Try to lock the door
19166	 */
19167	rval = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_PREVENT,
19168	    SD_PATH_DIRECT_PRIORITY);
19169failed:
19170	if (rval != 0)
19171		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
19172	sd_ssc_fini(ssc);
19173	return (rval);
19174}
19175
19176
19177/*
19178 *    Function: sd_send_scsi_DOORLOCK
19179 *
19180 * Description: Issue the scsi DOOR LOCK command
19181 *
19182 *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
19183 *                      structure for this target.
19184 *		flag  - SD_REMOVAL_ALLOW
19185 *			SD_REMOVAL_PREVENT
19186 *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19187 *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19188 *			to use the USCSI "direct" chain and bypass the normal
19189 *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
19190 *			command is issued as part of an error recovery action.
19191 *
19192 * Return Code: 0   - Success
19193 *		errno return code from sd_ssc_send()
19194 *
19195 *     Context: Can sleep.
19196 */
19197
19198static int
19199sd_send_scsi_DOORLOCK(sd_ssc_t *ssc, int flag, int path_flag)
19200{
19201	struct scsi_extended_sense	sense_buf;
19202	union scsi_cdb		cdb;
19203	struct uscsi_cmd	ucmd_buf;
19204	int			status;
19205	struct sd_lun		*un;
19206
19207	ASSERT(ssc != NULL);
19208	un = ssc->ssc_un;
19209	ASSERT(un != NULL);
19210	ASSERT(!mutex_owned(SD_MUTEX(un)));
19211
19212	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_DOORLOCK: entry: un:0x%p\n", un);
19213
19214	/* already determined doorlock is not supported, fake success */
19215	if (un->un_f_doorlock_supported == FALSE) {
19216		return (0);
19217	}
19218
19219	/*
19220	 * If we are ejecting and see an SD_REMOVAL_PREVENT
19221	 * ignore the command so we can complete the eject
19222	 * operation.
19223	 */
19224	if (flag == SD_REMOVAL_PREVENT) {
19225		mutex_enter(SD_MUTEX(un));
19226		if (un->un_f_ejecting == TRUE) {
19227			mutex_exit(SD_MUTEX(un));
19228			return (EAGAIN);
19229		}
19230		mutex_exit(SD_MUTEX(un));
19231	}
19232
19233	bzero(&cdb, sizeof (cdb));
19234	bzero(&ucmd_buf, sizeof (ucmd_buf));
19235
19236	cdb.scc_cmd = SCMD_DOORLOCK;
19237	cdb.cdb_opaque[4] = (uchar_t)flag;
19238
19239	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19240	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19241	ucmd_buf.uscsi_bufaddr	= NULL;
19242	ucmd_buf.uscsi_buflen	= 0;
19243	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19244	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
19245	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
19246	ucmd_buf.uscsi_timeout	= 15;
19247
19248	SD_TRACE(SD_LOG_IO, un,
19249	    "sd_send_scsi_DOORLOCK: returning sd_ssc_send\n");
19250
19251	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
19252	    UIO_SYSSPACE, path_flag);
19253
19254	if (status == 0)
19255		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
19256
19257	if ((status == EIO) && (ucmd_buf.uscsi_status == STATUS_CHECK) &&
19258	    (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19259	    (scsi_sense_key((uint8_t *)&sense_buf) == KEY_ILLEGAL_REQUEST)) {
19260		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
19261
19262		/* fake success and skip subsequent doorlock commands */
19263		un->un_f_doorlock_supported = FALSE;
19264		return (0);
19265	}
19266
19267	return (status);
19268}
19269
19270/*
19271 *    Function: sd_send_scsi_READ_CAPACITY
19272 *
19273 * Description: This routine uses the scsi READ CAPACITY command to determine
19274 *		the device capacity in number of blocks and the device native
19275 *		block size. If this function returns a failure, then the
19276 *		values in *capp and *lbap are undefined.  If the capacity
19277 *		returned is 0xffffffff then the lun is too large for a
19278 *		normal READ CAPACITY command and the results of a
19279 *		READ CAPACITY 16 will be used instead.
19280 *
19281 *   Arguments: ssc   - ssc contains ptr to soft state struct for the target
19282 *		capp - ptr to unsigned 64-bit variable to receive the
19283 *			capacity value from the command.
19284 *		lbap - ptr to unsigned 32-bit varaible to receive the
19285 *			block size value from the command
19286 *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19287 *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19288 *			to use the USCSI "direct" chain and bypass the normal
19289 *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
19290 *			command is issued as part of an error recovery action.
19291 *
19292 * Return Code: 0   - Success
19293 *		EIO - IO error
19294 *		EACCES - Reservation conflict detected
19295 *		EAGAIN - Device is becoming ready
19296 *		errno return code from sd_ssc_send()
19297 *
19298 *     Context: Can sleep.  Blocks until command completes.
19299 */
19300
19301#define	SD_CAPACITY_SIZE	sizeof (struct scsi_capacity)
19302
19303static int
19304sd_send_scsi_READ_CAPACITY(sd_ssc_t *ssc, uint64_t *capp, uint32_t *lbap,
19305	int path_flag)
19306{
19307	struct	scsi_extended_sense	sense_buf;
19308	struct	uscsi_cmd	ucmd_buf;
19309	union	scsi_cdb	cdb;
19310	uint32_t		*capacity_buf;
19311	uint64_t		capacity;
19312	uint32_t		lbasize;
19313	int			status;
19314	struct sd_lun		*un;
19315
19316	ASSERT(ssc != NULL);
19317
19318	un = ssc->ssc_un;
19319	ASSERT(un != NULL);
19320	ASSERT(!mutex_owned(SD_MUTEX(un)));
19321	ASSERT(capp != NULL);
19322	ASSERT(lbap != NULL);
19323
19324	SD_TRACE(SD_LOG_IO, un,
19325	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
19326
19327	/*
19328	 * First send a READ_CAPACITY command to the target.
19329	 * (This command is mandatory under SCSI-2.)
19330	 *
19331	 * Set up the CDB for the READ_CAPACITY command.  The Partial
19332	 * Medium Indicator bit is cleared.  The address field must be
19333	 * zero if the PMI bit is zero.
19334	 */
19335	bzero(&cdb, sizeof (cdb));
19336	bzero(&ucmd_buf, sizeof (ucmd_buf));
19337
19338	capacity_buf = kmem_zalloc(SD_CAPACITY_SIZE, KM_SLEEP);
19339
19340	cdb.scc_cmd = SCMD_READ_CAPACITY;
19341
19342	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19343	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
19344	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity_buf;
19345	ucmd_buf.uscsi_buflen	= SD_CAPACITY_SIZE;
19346	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19347	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
19348	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19349	ucmd_buf.uscsi_timeout	= 60;
19350
19351	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
19352	    UIO_SYSSPACE, path_flag);
19353
19354	switch (status) {
19355	case 0:
19356		/* Return failure if we did not get valid capacity data. */
19357		if (ucmd_buf.uscsi_resid != 0) {
19358			sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
19359			    "sd_send_scsi_READ_CAPACITY received invalid "
19360			    "capacity data");
19361			kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19362			return (EIO);
19363		}
19364		/*
19365		 * Read capacity and block size from the READ CAPACITY 10 data.
19366		 * This data may be adjusted later due to device specific
19367		 * issues.
19368		 *
19369		 * According to the SCSI spec, the READ CAPACITY 10
19370		 * command returns the following:
19371		 *
19372		 *  bytes 0-3: Maximum logical block address available.
19373		 *		(MSB in byte:0 & LSB in byte:3)
19374		 *
19375		 *  bytes 4-7: Block length in bytes
19376		 *		(MSB in byte:4 & LSB in byte:7)
19377		 *
19378		 */
19379		capacity = BE_32(capacity_buf[0]);
19380		lbasize = BE_32(capacity_buf[1]);
19381
19382		/*
19383		 * Done with capacity_buf
19384		 */
19385		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19386
19387		/*
19388		 * if the reported capacity is set to all 0xf's, then
19389		 * this disk is too large and requires SBC-2 commands.
19390		 * Reissue the request using READ CAPACITY 16.
19391		 */
19392		if (capacity == 0xffffffff) {
19393			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
19394			status = sd_send_scsi_READ_CAPACITY_16(ssc, &capacity,
19395			    &lbasize, path_flag);
19396			if (status != 0) {
19397				return (status);
19398			}
19399		}
19400		break;	/* Success! */
19401	case EIO:
19402		switch (ucmd_buf.uscsi_status) {
19403		case STATUS_RESERVATION_CONFLICT:
19404			status = EACCES;
19405			break;
19406		case STATUS_CHECK:
19407			/*
19408			 * Check condition; look for ASC/ASCQ of 0x04/0x01
19409			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
19410			 */
19411			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19412			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x04) &&
19413			    (scsi_sense_ascq((uint8_t *)&sense_buf) == 0x01)) {
19414				kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19415				return (EAGAIN);
19416			}
19417			break;
19418		default:
19419			break;
19420		}
19421		/* FALLTHRU */
19422	default:
19423		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19424		return (status);
19425	}
19426
19427	/*
19428	 * Some ATAPI CD-ROM drives report inaccurate LBA size values
19429	 * (2352 and 0 are common) so for these devices always force the value
19430	 * to 2048 as required by the ATAPI specs.
19431	 */
19432	if ((un->un_f_cfg_is_atapi == TRUE) && (ISCD(un))) {
19433		lbasize = 2048;
19434	}
19435
19436	/*
19437	 * Get the maximum LBA value from the READ CAPACITY data.
19438	 * Here we assume that the Partial Medium Indicator (PMI) bit
19439	 * was cleared when issuing the command. This means that the LBA
19440	 * returned from the device is the LBA of the last logical block
19441	 * on the logical unit.  The actual logical block count will be
19442	 * this value plus one.
19443	 *
19444	 * Currently the capacity is saved in terms of un->un_sys_blocksize,
19445	 * so scale the capacity value to reflect this.
19446	 */
19447	capacity = (capacity + 1) * (lbasize / un->un_sys_blocksize);
19448
19449	/*
19450	 * Copy the values from the READ CAPACITY command into the space
19451	 * provided by the caller.
19452	 */
19453	*capp = capacity;
19454	*lbap = lbasize;
19455
19456	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY: "
19457	    "capacity:0x%llx  lbasize:0x%x\n", capacity, lbasize);
19458
19459	/*
19460	 * Both the lbasize and capacity from the device must be nonzero,
19461	 * otherwise we assume that the values are not valid and return
19462	 * failure to the caller. (4203735)
19463	 */
19464	if ((capacity == 0) || (lbasize == 0)) {
19465		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
19466		    "sd_send_scsi_READ_CAPACITY received invalid value "
19467		    "capacity %llu lbasize %d", capacity, lbasize);
19468		return (EIO);
19469	}
19470	sd_ssc_assessment(ssc, SD_FMT_STANDARD);
19471	return (0);
19472}
19473
19474/*
19475 *    Function: sd_send_scsi_READ_CAPACITY_16
19476 *
19477 * Description: This routine uses the scsi READ CAPACITY 16 command to
19478 *		determine the device capacity in number of blocks and the
19479 *		device native block size.  If this function returns a failure,
19480 *		then the values in *capp and *lbap are undefined.
19481 *		This routine should always be called by
19482 *		sd_send_scsi_READ_CAPACITY which will appy any device
19483 *		specific adjustments to capacity and lbasize.
19484 *
19485 *   Arguments: ssc   - ssc contains ptr to soft state struct for the target
19486 *		capp - ptr to unsigned 64-bit variable to receive the
19487 *			capacity value from the command.
19488 *		lbap - ptr to unsigned 32-bit varaible to receive the
19489 *			block size value from the command
19490 *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19491 *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19492 *			to use the USCSI "direct" chain and bypass the normal
19493 *			command waitq. SD_PATH_DIRECT_PRIORITY is used when
19494 *			this command is issued as part of an error recovery
19495 *			action.
19496 *
19497 * Return Code: 0   - Success
19498 *		EIO - IO error
19499 *		EACCES - Reservation conflict detected
19500 *		EAGAIN - Device is becoming ready
19501 *		errno return code from sd_ssc_send()
19502 *
19503 *     Context: Can sleep.  Blocks until command completes.
19504 */
19505
19506#define	SD_CAPACITY_16_SIZE	sizeof (struct scsi_capacity_16)
19507
19508static int
19509sd_send_scsi_READ_CAPACITY_16(sd_ssc_t *ssc, uint64_t *capp,
19510	uint32_t *lbap, int path_flag)
19511{
19512	struct	scsi_extended_sense	sense_buf;
19513	struct	uscsi_cmd	ucmd_buf;
19514	union	scsi_cdb	cdb;
19515	uint64_t		*capacity16_buf;
19516	uint64_t		capacity;
19517	uint32_t		lbasize;
19518	int			status;
19519	struct sd_lun		*un;
19520
19521	ASSERT(ssc != NULL);
19522
19523	un = ssc->ssc_un;
19524	ASSERT(un != NULL);
19525	ASSERT(!mutex_owned(SD_MUTEX(un)));
19526	ASSERT(capp != NULL);
19527	ASSERT(lbap != NULL);
19528
19529	SD_TRACE(SD_LOG_IO, un,
19530	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
19531
19532	/*
19533	 * First send a READ_CAPACITY_16 command to the target.
19534	 *
19535	 * Set up the CDB for the READ_CAPACITY_16 command.  The Partial
19536	 * Medium Indicator bit is cleared.  The address field must be
19537	 * zero if the PMI bit is zero.
19538	 */
19539	bzero(&cdb, sizeof (cdb));
19540	bzero(&ucmd_buf, sizeof (ucmd_buf));
19541
19542	capacity16_buf = kmem_zalloc(SD_CAPACITY_16_SIZE, KM_SLEEP);
19543
19544	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19545	ucmd_buf.uscsi_cdblen	= CDB_GROUP4;
19546	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity16_buf;
19547	ucmd_buf.uscsi_buflen	= SD_CAPACITY_16_SIZE;
19548	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19549	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
19550	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19551	ucmd_buf.uscsi_timeout	= 60;
19552
19553	/*
19554	 * Read Capacity (16) is a Service Action In command.  One
19555	 * command byte (0x9E) is overloaded for multiple operations,
19556	 * with the second CDB byte specifying the desired operation
19557	 */
19558	cdb.scc_cmd = SCMD_SVC_ACTION_IN_G4;
19559	cdb.cdb_opaque[1] = SSVC_ACTION_READ_CAPACITY_G4;
19560
19561	/*
19562	 * Fill in allocation length field
19563	 */
19564	FORMG4COUNT(&cdb, ucmd_buf.uscsi_buflen);
19565
19566	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
19567	    UIO_SYSSPACE, path_flag);
19568
19569	switch (status) {
19570	case 0:
19571		/* Return failure if we did not get valid capacity data. */
19572		if (ucmd_buf.uscsi_resid > 20) {
19573			sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
19574			    "sd_send_scsi_READ_CAPACITY_16 received invalid "
19575			    "capacity data");
19576			kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19577			return (EIO);
19578		}
19579
19580		/*
19581		 * Read capacity and block size from the READ CAPACITY 10 data.
19582		 * This data may be adjusted later due to device specific
19583		 * issues.
19584		 *
19585		 * According to the SCSI spec, the READ CAPACITY 10
19586		 * command returns the following:
19587		 *
19588		 *  bytes 0-7: Maximum logical block address available.
19589		 *		(MSB in byte:0 & LSB in byte:7)
19590		 *
19591		 *  bytes 8-11: Block length in bytes
19592		 *		(MSB in byte:8 & LSB in byte:11)
19593		 *
19594		 */
19595		capacity = BE_64(capacity16_buf[0]);
19596		lbasize = BE_32(*(uint32_t *)&capacity16_buf[1]);
19597
19598		/*
19599		 * Done with capacity16_buf
19600		 */
19601		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19602
19603		/*
19604		 * if the reported capacity is set to all 0xf's, then
19605		 * this disk is too large.  This could only happen with
19606		 * a device that supports LBAs larger than 64 bits which
19607		 * are not defined by any current T10 standards.
19608		 */
19609		if (capacity == 0xffffffffffffffff) {
19610			sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
19611			    "disk is too large");
19612			return (EIO);
19613		}
19614		break;	/* Success! */
19615	case EIO:
19616		switch (ucmd_buf.uscsi_status) {
19617		case STATUS_RESERVATION_CONFLICT:
19618			status = EACCES;
19619			break;
19620		case STATUS_CHECK:
19621			/*
19622			 * Check condition; look for ASC/ASCQ of 0x04/0x01
19623			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
19624			 */
19625			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19626			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x04) &&
19627			    (scsi_sense_ascq((uint8_t *)&sense_buf) == 0x01)) {
19628				kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19629				return (EAGAIN);
19630			}
19631			break;
19632		default:
19633			break;
19634		}
19635		/* FALLTHRU */
19636	default:
19637		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19638		return (status);
19639	}
19640
19641	*capp = capacity;
19642	*lbap = lbasize;
19643
19644	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY_16: "
19645	    "capacity:0x%llx  lbasize:0x%x\n", capacity, lbasize);
19646
19647	return (0);
19648}
19649
19650
19651/*
19652 *    Function: sd_send_scsi_START_STOP_UNIT
19653 *
19654 * Description: Issue a scsi START STOP UNIT command to the target.
19655 *
19656 *   Arguments: ssc    - ssc contatins pointer to driver soft state (unit)
19657 *                       structure for this target.
19658 *		flag  - SD_TARGET_START
19659 *			SD_TARGET_STOP
19660 *			SD_TARGET_EJECT
19661 *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19662 *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19663 *			to use the USCSI "direct" chain and bypass the normal
19664 *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
19665 *			command is issued as part of an error recovery action.
19666 *
19667 * Return Code: 0   - Success
19668 *		EIO - IO error
19669 *		EACCES - Reservation conflict detected
19670 *		ENXIO  - Not Ready, medium not present
19671 *		errno return code from sd_ssc_send()
19672 *
19673 *     Context: Can sleep.
19674 */
19675
19676static int
19677sd_send_scsi_START_STOP_UNIT(sd_ssc_t *ssc, int flag, int path_flag)
19678{
19679	struct	scsi_extended_sense	sense_buf;
19680	union scsi_cdb		cdb;
19681	struct uscsi_cmd	ucmd_buf;
19682	int			status;
19683	struct sd_lun		*un;
19684
19685	ASSERT(ssc != NULL);
19686	un = ssc->ssc_un;
19687	ASSERT(un != NULL);
19688	ASSERT(!mutex_owned(SD_MUTEX(un)));
19689
19690	SD_TRACE(SD_LOG_IO, un,
19691	    "sd_send_scsi_START_STOP_UNIT: entry: un:0x%p\n", un);
19692
19693	if (un->un_f_check_start_stop &&
19694	    ((flag == SD_TARGET_START) || (flag == SD_TARGET_STOP)) &&
19695	    (un->un_f_start_stop_supported != TRUE)) {
19696		return (0);
19697	}
19698
19699	/*
19700	 * If we are performing an eject operation and
19701	 * we receive any command other than SD_TARGET_EJECT
19702	 * we should immediately return.
19703	 */
19704	if (flag != SD_TARGET_EJECT) {
19705		mutex_enter(SD_MUTEX(un));
19706		if (un->un_f_ejecting == TRUE) {
19707			mutex_exit(SD_MUTEX(un));
19708			return (EAGAIN);
19709		}
19710		mutex_exit(SD_MUTEX(un));
19711	}
19712
19713	bzero(&cdb, sizeof (cdb));
19714	bzero(&ucmd_buf, sizeof (ucmd_buf));
19715	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19716
19717	cdb.scc_cmd = SCMD_START_STOP;
19718	cdb.cdb_opaque[4] = (uchar_t)flag;
19719
19720	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19721	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19722	ucmd_buf.uscsi_bufaddr	= NULL;
19723	ucmd_buf.uscsi_buflen	= 0;
19724	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19725	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19726	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
19727	ucmd_buf.uscsi_timeout	= 200;
19728
19729	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
19730	    UIO_SYSSPACE, path_flag);
19731
19732	switch (status) {
19733	case 0:
19734		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
19735		break;	/* Success! */
19736	case EIO:
19737		switch (ucmd_buf.uscsi_status) {
19738		case STATUS_RESERVATION_CONFLICT:
19739			status = EACCES;
19740			break;
19741		case STATUS_CHECK:
19742			if (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) {
19743				switch (scsi_sense_key(
19744				    (uint8_t *)&sense_buf)) {
19745				case KEY_ILLEGAL_REQUEST:
19746					status = ENOTSUP;
19747					break;
19748				case KEY_NOT_READY:
19749					if (scsi_sense_asc(
19750					    (uint8_t *)&sense_buf)
19751					    == 0x3A) {
19752						status = ENXIO;
19753					}
19754					break;
19755				default:
19756					break;
19757				}
19758			}
19759			break;
19760		default:
19761			break;
19762		}
19763		break;
19764	default:
19765		break;
19766	}
19767
19768	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_START_STOP_UNIT: exit\n");
19769
19770	return (status);
19771}
19772
19773
19774/*
19775 *    Function: sd_start_stop_unit_callback
19776 *
19777 * Description: timeout(9F) callback to begin recovery process for a
19778 *		device that has spun down.
19779 *
19780 *   Arguments: arg - pointer to associated softstate struct.
19781 *
19782 *     Context: Executes in a timeout(9F) thread context
19783 */
19784
19785static void
19786sd_start_stop_unit_callback(void *arg)
19787{
19788	struct sd_lun	*un = arg;
19789	ASSERT(un != NULL);
19790	ASSERT(!mutex_owned(SD_MUTEX(un)));
19791
19792	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_callback: entry\n");
19793
19794	(void) taskq_dispatch(sd_tq, sd_start_stop_unit_task, un, KM_NOSLEEP);
19795}
19796
19797
19798/*
19799 *    Function: sd_start_stop_unit_task
19800 *
19801 * Description: Recovery procedure when a drive is spun down.
19802 *
19803 *   Arguments: arg - pointer to associated softstate struct.
19804 *
19805 *     Context: Executes in a taskq() thread context
19806 */
19807
19808static void
19809sd_start_stop_unit_task(void *arg)
19810{
19811	struct sd_lun	*un = arg;
19812	sd_ssc_t	*ssc;
19813	int		rval;
19814
19815	ASSERT(un != NULL);
19816	ASSERT(!mutex_owned(SD_MUTEX(un)));
19817
19818	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: entry\n");
19819
19820	/*
19821	 * Some unformatted drives report not ready error, no need to
19822	 * restart if format has been initiated.
19823	 */
19824	mutex_enter(SD_MUTEX(un));
19825	if (un->un_f_format_in_progress == TRUE) {
19826		mutex_exit(SD_MUTEX(un));
19827		return;
19828	}
19829	mutex_exit(SD_MUTEX(un));
19830
19831	/*
19832	 * When a START STOP command is issued from here, it is part of a
19833	 * failure recovery operation and must be issued before any other
19834	 * commands, including any pending retries. Thus it must be sent
19835	 * using SD_PATH_DIRECT_PRIORITY. It doesn't matter if the spin up
19836	 * succeeds or not, we will start I/O after the attempt.
19837	 */
19838	ssc = sd_ssc_init(un);
19839	rval = sd_send_scsi_START_STOP_UNIT(ssc, SD_TARGET_START,
19840	    SD_PATH_DIRECT_PRIORITY);
19841	if (rval != 0)
19842		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
19843	sd_ssc_fini(ssc);
19844	/*
19845	 * The above call blocks until the START_STOP_UNIT command completes.
19846	 * Now that it has completed, we must re-try the original IO that
19847	 * received the NOT READY condition in the first place. There are
19848	 * three possible conditions here:
19849	 *
19850	 *  (1) The original IO is on un_retry_bp.
19851	 *  (2) The original IO is on the regular wait queue, and un_retry_bp
19852	 *	is NULL.
19853	 *  (3) The original IO is on the regular wait queue, and un_retry_bp
19854	 *	points to some other, unrelated bp.
19855	 *
19856	 * For each case, we must call sd_start_cmds() with un_retry_bp
19857	 * as the argument. If un_retry_bp is NULL, this will initiate
19858	 * processing of the regular wait queue.  If un_retry_bp is not NULL,
19859	 * then this will process the bp on un_retry_bp. That may or may not
19860	 * be the original IO, but that does not matter: the important thing
19861	 * is to keep the IO processing going at this point.
19862	 *
19863	 * Note: This is a very specific error recovery sequence associated
19864	 * with a drive that is not spun up. We attempt a START_STOP_UNIT and
19865	 * serialize the I/O with completion of the spin-up.
19866	 */
19867	mutex_enter(SD_MUTEX(un));
19868	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19869	    "sd_start_stop_unit_task: un:0x%p starting bp:0x%p\n",
19870	    un, un->un_retry_bp);
19871	un->un_startstop_timeid = NULL;	/* Timeout is no longer pending */
19872	sd_start_cmds(un, un->un_retry_bp);
19873	mutex_exit(SD_MUTEX(un));
19874
19875	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: exit\n");
19876}
19877
19878
19879/*
19880 *    Function: sd_send_scsi_INQUIRY
19881 *
19882 * Description: Issue the scsi INQUIRY command.
19883 *
19884 *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
19885 *                      structure for this target.
19886 *		bufaddr
19887 *		buflen
19888 *		evpd
19889 *		page_code
19890 *		page_length
19891 *
19892 * Return Code: 0   - Success
19893 *		errno return code from sd_ssc_send()
19894 *
19895 *     Context: Can sleep. Does not return until command is completed.
19896 */
19897
19898static int
19899sd_send_scsi_INQUIRY(sd_ssc_t *ssc, uchar_t *bufaddr, size_t buflen,
19900	uchar_t evpd, uchar_t page_code, size_t *residp)
19901{
19902	union scsi_cdb		cdb;
19903	struct uscsi_cmd	ucmd_buf;
19904	int			status;
19905	struct sd_lun		*un;
19906
19907	ASSERT(ssc != NULL);
19908	un = ssc->ssc_un;
19909	ASSERT(un != NULL);
19910	ASSERT(!mutex_owned(SD_MUTEX(un)));
19911	ASSERT(bufaddr != NULL);
19912
19913	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: entry: un:0x%p\n", un);
19914
19915	bzero(&cdb, sizeof (cdb));
19916	bzero(&ucmd_buf, sizeof (ucmd_buf));
19917	bzero(bufaddr, buflen);
19918
19919	cdb.scc_cmd = SCMD_INQUIRY;
19920	cdb.cdb_opaque[1] = evpd;
19921	cdb.cdb_opaque[2] = page_code;
19922	FORMG0COUNT(&cdb, buflen);
19923
19924	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19925	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19926	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
19927	ucmd_buf.uscsi_buflen	= buflen;
19928	ucmd_buf.uscsi_rqbuf	= NULL;
19929	ucmd_buf.uscsi_rqlen	= 0;
19930	ucmd_buf.uscsi_flags	= USCSI_READ | USCSI_SILENT;
19931	ucmd_buf.uscsi_timeout	= 200;	/* Excessive legacy value */
19932
19933	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
19934	    UIO_SYSSPACE, SD_PATH_DIRECT);
19935
19936	/*
19937	 * Only handle status == 0, the upper-level caller
19938	 * will put different assessment based on the context.
19939	 */
19940	if (status == 0)
19941		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
19942
19943	if ((status == 0) && (residp != NULL)) {
19944		*residp = ucmd_buf.uscsi_resid;
19945	}
19946
19947	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: exit\n");
19948
19949	return (status);
19950}
19951
19952
19953/*
19954 *    Function: sd_send_scsi_TEST_UNIT_READY
19955 *
19956 * Description: Issue the scsi TEST UNIT READY command.
19957 *		This routine can be told to set the flag USCSI_DIAGNOSE to
19958 *		prevent retrying failed commands. Use this when the intent
19959 *		is either to check for device readiness, to clear a Unit
19960 *		Attention, or to clear any outstanding sense data.
19961 *		However under specific conditions the expected behavior
19962 *		is for retries to bring a device ready, so use the flag
19963 *		with caution.
19964 *
19965 *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
19966 *                      structure for this target.
19967 *		flag:   SD_CHECK_FOR_MEDIA: return ENXIO if no media present
19968 *			SD_DONT_RETRY_TUR: include uscsi flag USCSI_DIAGNOSE.
19969 *			0: dont check for media present, do retries on cmd.
19970 *
19971 * Return Code: 0   - Success
19972 *		EIO - IO error
19973 *		EACCES - Reservation conflict detected
19974 *		ENXIO  - Not Ready, medium not present
19975 *		errno return code from sd_ssc_send()
19976 *
19977 *     Context: Can sleep. Does not return until command is completed.
19978 */
19979
19980static int
19981sd_send_scsi_TEST_UNIT_READY(sd_ssc_t *ssc, int flag)
19982{
19983	struct	scsi_extended_sense	sense_buf;
19984	union scsi_cdb		cdb;
19985	struct uscsi_cmd	ucmd_buf;
19986	int			status;
19987	struct sd_lun		*un;
19988
19989	ASSERT(ssc != NULL);
19990	un = ssc->ssc_un;
19991	ASSERT(un != NULL);
19992	ASSERT(!mutex_owned(SD_MUTEX(un)));
19993
19994	SD_TRACE(SD_LOG_IO, un,
19995	    "sd_send_scsi_TEST_UNIT_READY: entry: un:0x%p\n", un);
19996
19997	/*
19998	 * Some Seagate elite1 TQ devices get hung with disconnect/reconnect
19999	 * timeouts when they receive a TUR and the queue is not empty. Check
20000	 * the configuration flag set during attach (indicating the drive has
20001	 * this firmware bug) and un_ncmds_in_transport before issuing the
20002	 * TUR. If there are
20003	 * pending commands return success, this is a bit arbitrary but is ok
20004	 * for non-removables (i.e. the eliteI disks) and non-clustering
20005	 * configurations.
20006	 */
20007	if (un->un_f_cfg_tur_check == TRUE) {
20008		mutex_enter(SD_MUTEX(un));
20009		if (un->un_ncmds_in_transport != 0) {
20010			mutex_exit(SD_MUTEX(un));
20011			return (0);
20012		}
20013		mutex_exit(SD_MUTEX(un));
20014	}
20015
20016	bzero(&cdb, sizeof (cdb));
20017	bzero(&ucmd_buf, sizeof (ucmd_buf));
20018	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20019
20020	cdb.scc_cmd = SCMD_TEST_UNIT_READY;
20021
20022	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20023	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
20024	ucmd_buf.uscsi_bufaddr	= NULL;
20025	ucmd_buf.uscsi_buflen	= 0;
20026	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20027	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20028	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
20029
20030	/* Use flag USCSI_DIAGNOSE to prevent retries if it fails. */
20031	if ((flag & SD_DONT_RETRY_TUR) != 0) {
20032		ucmd_buf.uscsi_flags |= USCSI_DIAGNOSE;
20033	}
20034	ucmd_buf.uscsi_timeout	= 60;
20035
20036	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
20037	    UIO_SYSSPACE, ((flag & SD_BYPASS_PM) ? SD_PATH_DIRECT :
20038	    SD_PATH_STANDARD));
20039
20040	switch (status) {
20041	case 0:
20042		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
20043		break;	/* Success! */
20044	case EIO:
20045		switch (ucmd_buf.uscsi_status) {
20046		case STATUS_RESERVATION_CONFLICT:
20047			status = EACCES;
20048			break;
20049		case STATUS_CHECK:
20050			if ((flag & SD_CHECK_FOR_MEDIA) == 0) {
20051				break;
20052			}
20053			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20054			    (scsi_sense_key((uint8_t *)&sense_buf) ==
20055			    KEY_NOT_READY) &&
20056			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x3A)) {
20057				status = ENXIO;
20058			}
20059			break;
20060		default:
20061			break;
20062		}
20063		break;
20064	default:
20065		break;
20066	}
20067
20068	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_TEST_UNIT_READY: exit\n");
20069
20070	return (status);
20071}
20072
20073/*
20074 *    Function: sd_send_scsi_PERSISTENT_RESERVE_IN
20075 *
20076 * Description: Issue the scsi PERSISTENT RESERVE IN command.
20077 *
20078 *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
20079 *                      structure for this target.
20080 *
20081 * Return Code: 0   - Success
20082 *		EACCES
20083 *		ENOTSUP
20084 *		errno return code from sd_ssc_send()
20085 *
20086 *     Context: Can sleep. Does not return until command is completed.
20087 */
20088
20089static int
20090sd_send_scsi_PERSISTENT_RESERVE_IN(sd_ssc_t *ssc, uchar_t  usr_cmd,
20091	uint16_t data_len, uchar_t *data_bufp)
20092{
20093	struct scsi_extended_sense	sense_buf;
20094	union scsi_cdb		cdb;
20095	struct uscsi_cmd	ucmd_buf;
20096	int			status;
20097	int			no_caller_buf = FALSE;
20098	struct sd_lun		*un;
20099
20100	ASSERT(ssc != NULL);
20101	un = ssc->ssc_un;
20102	ASSERT(un != NULL);
20103	ASSERT(!mutex_owned(SD_MUTEX(un)));
20104	ASSERT((usr_cmd == SD_READ_KEYS) || (usr_cmd == SD_READ_RESV));
20105
20106	SD_TRACE(SD_LOG_IO, un,
20107	    "sd_send_scsi_PERSISTENT_RESERVE_IN: entry: un:0x%p\n", un);
20108
20109	bzero(&cdb, sizeof (cdb));
20110	bzero(&ucmd_buf, sizeof (ucmd_buf));
20111	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20112	if (data_bufp == NULL) {
20113		/* Allocate a default buf if the caller did not give one */
20114		ASSERT(data_len == 0);
20115		data_len  = MHIOC_RESV_KEY_SIZE;
20116		data_bufp = kmem_zalloc(MHIOC_RESV_KEY_SIZE, KM_SLEEP);
20117		no_caller_buf = TRUE;
20118	}
20119
20120	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_IN;
20121	cdb.cdb_opaque[1] = usr_cmd;
20122	FORMG1COUNT(&cdb, data_len);
20123
20124	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20125	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
20126	ucmd_buf.uscsi_bufaddr	= (caddr_t)data_bufp;
20127	ucmd_buf.uscsi_buflen	= data_len;
20128	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20129	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20130	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
20131	ucmd_buf.uscsi_timeout	= 60;
20132
20133	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
20134	    UIO_SYSSPACE, SD_PATH_STANDARD);
20135
20136	switch (status) {
20137	case 0:
20138		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
20139
20140		break;	/* Success! */
20141	case EIO:
20142		switch (ucmd_buf.uscsi_status) {
20143		case STATUS_RESERVATION_CONFLICT:
20144			status = EACCES;
20145			break;
20146		case STATUS_CHECK:
20147			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20148			    (scsi_sense_key((uint8_t *)&sense_buf) ==
20149			    KEY_ILLEGAL_REQUEST)) {
20150				status = ENOTSUP;
20151			}
20152			break;
20153		default:
20154			break;
20155		}
20156		break;
20157	default:
20158		break;
20159	}
20160
20161	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_IN: exit\n");
20162
20163	if (no_caller_buf == TRUE) {
20164		kmem_free(data_bufp, data_len);
20165	}
20166
20167	return (status);
20168}
20169
20170
20171/*
20172 *    Function: sd_send_scsi_PERSISTENT_RESERVE_OUT
20173 *
20174 * Description: This routine is the driver entry point for handling CD-ROM
20175 *		multi-host persistent reservation requests (MHIOCGRP_INKEYS,
20176 *		MHIOCGRP_INRESV) by sending the SCSI-3 PROUT commands to the
20177 *		device.
20178 *
20179 *   Arguments: ssc  -  ssc contains un - pointer to soft state struct
20180 *                      for the target.
20181 *		usr_cmd SCSI-3 reservation facility command (one of
20182 *			SD_SCSI3_REGISTER, SD_SCSI3_RESERVE, SD_SCSI3_RELEASE,
20183 *			SD_SCSI3_PREEMPTANDABORT)
20184 *		usr_bufp - user provided pointer register, reserve descriptor or
20185 *			preempt and abort structure (mhioc_register_t,
20186 *                      mhioc_resv_desc_t, mhioc_preemptandabort_t)
20187 *
20188 * Return Code: 0   - Success
20189 *		EACCES
20190 *		ENOTSUP
20191 *		errno return code from sd_ssc_send()
20192 *
20193 *     Context: Can sleep. Does not return until command is completed.
20194 */
20195
20196static int
20197sd_send_scsi_PERSISTENT_RESERVE_OUT(sd_ssc_t *ssc, uchar_t usr_cmd,
20198	uchar_t	*usr_bufp)
20199{
20200	struct scsi_extended_sense	sense_buf;
20201	union scsi_cdb		cdb;
20202	struct uscsi_cmd	ucmd_buf;
20203	int			status;
20204	uchar_t			data_len = sizeof (sd_prout_t);
20205	sd_prout_t		*prp;
20206	struct sd_lun		*un;
20207
20208	ASSERT(ssc != NULL);
20209	un = ssc->ssc_un;
20210	ASSERT(un != NULL);
20211	ASSERT(!mutex_owned(SD_MUTEX(un)));
20212	ASSERT(data_len == 24);	/* required by scsi spec */
20213
20214	SD_TRACE(SD_LOG_IO, un,
20215	    "sd_send_scsi_PERSISTENT_RESERVE_OUT: entry: un:0x%p\n", un);
20216
20217	if (usr_bufp == NULL) {
20218		return (EINVAL);
20219	}
20220
20221	bzero(&cdb, sizeof (cdb));
20222	bzero(&ucmd_buf, sizeof (ucmd_buf));
20223	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20224	prp = kmem_zalloc(data_len, KM_SLEEP);
20225
20226	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_OUT;
20227	cdb.cdb_opaque[1] = usr_cmd;
20228	FORMG1COUNT(&cdb, data_len);
20229
20230	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20231	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
20232	ucmd_buf.uscsi_bufaddr	= (caddr_t)prp;
20233	ucmd_buf.uscsi_buflen	= data_len;
20234	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20235	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20236	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
20237	ucmd_buf.uscsi_timeout	= 60;
20238
20239	switch (usr_cmd) {
20240	case SD_SCSI3_REGISTER: {
20241		mhioc_register_t *ptr = (mhioc_register_t *)usr_bufp;
20242
20243		bcopy(ptr->oldkey.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
20244		bcopy(ptr->newkey.key, prp->service_key,
20245		    MHIOC_RESV_KEY_SIZE);
20246		prp->aptpl = ptr->aptpl;
20247		break;
20248	}
20249	case SD_SCSI3_RESERVE:
20250	case SD_SCSI3_RELEASE: {
20251		mhioc_resv_desc_t *ptr = (mhioc_resv_desc_t *)usr_bufp;
20252
20253		bcopy(ptr->key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
20254		prp->scope_address = BE_32(ptr->scope_specific_addr);
20255		cdb.cdb_opaque[2] = ptr->type;
20256		break;
20257	}
20258	case SD_SCSI3_PREEMPTANDABORT: {
20259		mhioc_preemptandabort_t *ptr =
20260		    (mhioc_preemptandabort_t *)usr_bufp;
20261
20262		bcopy(ptr->resvdesc.key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
20263		bcopy(ptr->victim_key.key, prp->service_key,
20264		    MHIOC_RESV_KEY_SIZE);
20265		prp->scope_address = BE_32(ptr->resvdesc.scope_specific_addr);
20266		cdb.cdb_opaque[2] = ptr->resvdesc.type;
20267		ucmd_buf.uscsi_flags |= USCSI_HEAD;
20268		break;
20269	}
20270	case SD_SCSI3_REGISTERANDIGNOREKEY:
20271	{
20272		mhioc_registerandignorekey_t *ptr;
20273		ptr = (mhioc_registerandignorekey_t *)usr_bufp;
20274		bcopy(ptr->newkey.key,
20275		    prp->service_key, MHIOC_RESV_KEY_SIZE);
20276		prp->aptpl = ptr->aptpl;
20277		break;
20278	}
20279	default:
20280		ASSERT(FALSE);
20281		break;
20282	}
20283
20284	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
20285	    UIO_SYSSPACE, SD_PATH_STANDARD);
20286
20287	switch (status) {
20288	case 0:
20289		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
20290		break;	/* Success! */
20291	case EIO:
20292		switch (ucmd_buf.uscsi_status) {
20293		case STATUS_RESERVATION_CONFLICT:
20294			status = EACCES;
20295			break;
20296		case STATUS_CHECK:
20297			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20298			    (scsi_sense_key((uint8_t *)&sense_buf) ==
20299			    KEY_ILLEGAL_REQUEST)) {
20300				status = ENOTSUP;
20301			}
20302			break;
20303		default:
20304			break;
20305		}
20306		break;
20307	default:
20308		break;
20309	}
20310
20311	kmem_free(prp, data_len);
20312	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_OUT: exit\n");
20313	return (status);
20314}
20315
20316
20317/*
20318 *    Function: sd_send_scsi_SYNCHRONIZE_CACHE
20319 *
20320 * Description: Issues a scsi SYNCHRONIZE CACHE command to the target
20321 *
20322 *   Arguments: un - pointer to the target's soft state struct
20323 *              dkc - pointer to the callback structure
20324 *
20325 * Return Code: 0 - success
20326 *		errno-type error code
20327 *
20328 *     Context: kernel thread context only.
20329 *
20330 *  _______________________________________________________________
20331 * | dkc_flag &   | dkc_callback | DKIOCFLUSHWRITECACHE            |
20332 * |FLUSH_VOLATILE|              | operation                       |
20333 * |______________|______________|_________________________________|
20334 * | 0            | NULL         | Synchronous flush on both       |
20335 * |              |              | volatile and non-volatile cache |
20336 * |______________|______________|_________________________________|
20337 * | 1            | NULL         | Synchronous flush on volatile   |
20338 * |              |              | cache; disk drivers may suppress|
20339 * |              |              | flush if disk table indicates   |
20340 * |              |              | non-volatile cache              |
20341 * |______________|______________|_________________________________|
20342 * | 0            | !NULL        | Asynchronous flush on both      |
20343 * |              |              | volatile and non-volatile cache;|
20344 * |______________|______________|_________________________________|
20345 * | 1            | !NULL        | Asynchronous flush on volatile  |
20346 * |              |              | cache; disk drivers may suppress|
20347 * |              |              | flush if disk table indicates   |
20348 * |              |              | non-volatile cache              |
20349 * |______________|______________|_________________________________|
20350 *
20351 */
20352
20353static int
20354sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un, struct dk_callback *dkc)
20355{
20356	struct sd_uscsi_info	*uip;
20357	struct uscsi_cmd	*uscmd;
20358	union scsi_cdb		*cdb;
20359	struct buf		*bp;
20360	int			rval = 0;
20361	int			is_async;
20362
20363	SD_TRACE(SD_LOG_IO, un,
20364	    "sd_send_scsi_SYNCHRONIZE_CACHE: entry: un:0x%p\n", un);
20365
20366	ASSERT(un != NULL);
20367	ASSERT(!mutex_owned(SD_MUTEX(un)));
20368
20369	if (dkc == NULL || dkc->dkc_callback == NULL) {
20370		is_async = FALSE;
20371	} else {
20372		is_async = TRUE;
20373	}
20374
20375	mutex_enter(SD_MUTEX(un));
20376	/* check whether cache flush should be suppressed */
20377	if (un->un_f_suppress_cache_flush == TRUE) {
20378		mutex_exit(SD_MUTEX(un));
20379		/*
20380		 * suppress the cache flush if the device is told to do
20381		 * so by sd.conf or disk table
20382		 */
20383		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_SYNCHRONIZE_CACHE: \
20384		    skip the cache flush since suppress_cache_flush is %d!\n",
20385		    un->un_f_suppress_cache_flush);
20386
20387		if (is_async == TRUE) {
20388			/* invoke callback for asynchronous flush */
20389			(*dkc->dkc_callback)(dkc->dkc_cookie, 0);
20390		}
20391		return (rval);
20392	}
20393	mutex_exit(SD_MUTEX(un));
20394
20395	/*
20396	 * check dkc_flag & FLUSH_VOLATILE so SYNC_NV bit can be
20397	 * set properly
20398	 */
20399	cdb = kmem_zalloc(CDB_GROUP1, KM_SLEEP);
20400	cdb->scc_cmd = SCMD_SYNCHRONIZE_CACHE;
20401
20402	mutex_enter(SD_MUTEX(un));
20403	if (dkc != NULL && un->un_f_sync_nv_supported &&
20404	    (dkc->dkc_flag & FLUSH_VOLATILE)) {
20405		/*
20406		 * if the device supports SYNC_NV bit, turn on
20407		 * the SYNC_NV bit to only flush volatile cache
20408		 */
20409		cdb->cdb_un.tag |= SD_SYNC_NV_BIT;
20410	}
20411	mutex_exit(SD_MUTEX(un));
20412
20413	/*
20414	 * First get some memory for the uscsi_cmd struct and cdb
20415	 * and initialize for SYNCHRONIZE_CACHE cmd.
20416	 */
20417	uscmd = kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
20418	uscmd->uscsi_cdblen = CDB_GROUP1;
20419	uscmd->uscsi_cdb = (caddr_t)cdb;
20420	uscmd->uscsi_bufaddr = NULL;
20421	uscmd->uscsi_buflen = 0;
20422	uscmd->uscsi_rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
20423	uscmd->uscsi_rqlen = SENSE_LENGTH;
20424	uscmd->uscsi_rqresid = SENSE_LENGTH;
20425	uscmd->uscsi_flags = USCSI_RQENABLE | USCSI_SILENT;
20426	uscmd->uscsi_timeout = sd_io_time;
20427
20428	/*
20429	 * Allocate an sd_uscsi_info struct and fill it with the info
20430	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
20431	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
20432	 * since we allocate the buf here in this function, we do not
20433	 * need to preserve the prior contents of b_private.
20434	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
20435	 */
20436	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
20437	uip->ui_flags = SD_PATH_DIRECT;
20438	uip->ui_cmdp  = uscmd;
20439
20440	bp = getrbuf(KM_SLEEP);
20441	bp->b_private = uip;
20442
20443	/*
20444	 * Setup buffer to carry uscsi request.
20445	 */
20446	bp->b_flags  = B_BUSY;
20447	bp->b_bcount = 0;
20448	bp->b_blkno  = 0;
20449
20450	if (is_async == TRUE) {
20451		bp->b_iodone = sd_send_scsi_SYNCHRONIZE_CACHE_biodone;
20452		uip->ui_dkc = *dkc;
20453	}
20454
20455	bp->b_edev = SD_GET_DEV(un);
20456	bp->b_dev = cmpdev(bp->b_edev);	/* maybe unnecessary? */
20457
20458	/*
20459	 * Unset un_f_sync_cache_required flag
20460	 */
20461	mutex_enter(SD_MUTEX(un));
20462	un->un_f_sync_cache_required = FALSE;
20463	mutex_exit(SD_MUTEX(un));
20464
20465	(void) sd_uscsi_strategy(bp);
20466
20467	/*
20468	 * If synchronous request, wait for completion
20469	 * If async just return and let b_iodone callback
20470	 * cleanup.
20471	 * NOTE: On return, u_ncmds_in_driver will be decremented,
20472	 * but it was also incremented in sd_uscsi_strategy(), so
20473	 * we should be ok.
20474	 */
20475	if (is_async == FALSE) {
20476		(void) biowait(bp);
20477		rval = sd_send_scsi_SYNCHRONIZE_CACHE_biodone(bp);
20478	}
20479
20480	return (rval);
20481}
20482
20483
20484static int
20485sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp)
20486{
20487	struct sd_uscsi_info *uip;
20488	struct uscsi_cmd *uscmd;
20489	uint8_t *sense_buf;
20490	struct sd_lun *un;
20491	int status;
20492	union scsi_cdb *cdb;
20493
20494	uip = (struct sd_uscsi_info *)(bp->b_private);
20495	ASSERT(uip != NULL);
20496
20497	uscmd = uip->ui_cmdp;
20498	ASSERT(uscmd != NULL);
20499
20500	sense_buf = (uint8_t *)uscmd->uscsi_rqbuf;
20501	ASSERT(sense_buf != NULL);
20502
20503	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
20504	ASSERT(un != NULL);
20505
20506	cdb = (union scsi_cdb *)uscmd->uscsi_cdb;
20507
20508	status = geterror(bp);
20509	switch (status) {
20510	case 0:
20511		break;	/* Success! */
20512	case EIO:
20513		switch (uscmd->uscsi_status) {
20514		case STATUS_RESERVATION_CONFLICT:
20515			/* Ignore reservation conflict */
20516			status = 0;
20517			goto done;
20518
20519		case STATUS_CHECK:
20520			if ((uscmd->uscsi_rqstatus == STATUS_GOOD) &&
20521			    (scsi_sense_key(sense_buf) ==
20522			    KEY_ILLEGAL_REQUEST)) {
20523				/* Ignore Illegal Request error */
20524				if (cdb->cdb_un.tag&SD_SYNC_NV_BIT) {
20525					mutex_enter(SD_MUTEX(un));
20526					un->un_f_sync_nv_supported = FALSE;
20527					mutex_exit(SD_MUTEX(un));
20528					status = 0;
20529					SD_TRACE(SD_LOG_IO, un,
20530					    "un_f_sync_nv_supported \
20531					    is set to false.\n");
20532					goto done;
20533				}
20534
20535				mutex_enter(SD_MUTEX(un));
20536				un->un_f_sync_cache_supported = FALSE;
20537				mutex_exit(SD_MUTEX(un));
20538				SD_TRACE(SD_LOG_IO, un,
20539				    "sd_send_scsi_SYNCHRONIZE_CACHE_biodone: \
20540				    un_f_sync_cache_supported set to false \
20541				    with asc = %x, ascq = %x\n",
20542				    scsi_sense_asc(sense_buf),
20543				    scsi_sense_ascq(sense_buf));
20544				status = ENOTSUP;
20545				goto done;
20546			}
20547			break;
20548		default:
20549			break;
20550		}
20551		/* FALLTHRU */
20552	default:
20553		/*
20554		 * Turn on the un_f_sync_cache_required flag
20555		 * since the SYNC CACHE command failed
20556		 */
20557		mutex_enter(SD_MUTEX(un));
20558		un->un_f_sync_cache_required = TRUE;
20559		mutex_exit(SD_MUTEX(un));
20560
20561		/*
20562		 * Don't log an error message if this device
20563		 * has removable media.
20564		 */
20565		if (!un->un_f_has_removable_media) {
20566			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
20567			    "SYNCHRONIZE CACHE command failed (%d)\n", status);
20568		}
20569		break;
20570	}
20571
20572done:
20573	if (uip->ui_dkc.dkc_callback != NULL) {
20574		(*uip->ui_dkc.dkc_callback)(uip->ui_dkc.dkc_cookie, status);
20575	}
20576
20577	ASSERT((bp->b_flags & B_REMAPPED) == 0);
20578	freerbuf(bp);
20579	kmem_free(uip, sizeof (struct sd_uscsi_info));
20580	kmem_free(uscmd->uscsi_rqbuf, SENSE_LENGTH);
20581	kmem_free(uscmd->uscsi_cdb, (size_t)uscmd->uscsi_cdblen);
20582	kmem_free(uscmd, sizeof (struct uscsi_cmd));
20583
20584	return (status);
20585}
20586
20587
20588/*
20589 *    Function: sd_send_scsi_GET_CONFIGURATION
20590 *
20591 * Description: Issues the get configuration command to the device.
20592 *		Called from sd_check_for_writable_cd & sd_get_media_info
20593 *		caller needs to ensure that buflen = SD_PROFILE_HEADER_LEN
20594 *   Arguments: ssc
20595 *		ucmdbuf
20596 *		rqbuf
20597 *		rqbuflen
20598 *		bufaddr
20599 *		buflen
20600 *		path_flag
20601 *
20602 * Return Code: 0   - Success
20603 *		errno return code from sd_ssc_send()
20604 *
20605 *     Context: Can sleep. Does not return until command is completed.
20606 *
20607 */
20608
20609static int
20610sd_send_scsi_GET_CONFIGURATION(sd_ssc_t *ssc, struct uscsi_cmd *ucmdbuf,
20611	uchar_t *rqbuf, uint_t rqbuflen, uchar_t *bufaddr, uint_t buflen,
20612	int path_flag)
20613{
20614	char	cdb[CDB_GROUP1];
20615	int	status;
20616	struct sd_lun	*un;
20617
20618	ASSERT(ssc != NULL);
20619	un = ssc->ssc_un;
20620	ASSERT(un != NULL);
20621	ASSERT(!mutex_owned(SD_MUTEX(un)));
20622	ASSERT(bufaddr != NULL);
20623	ASSERT(ucmdbuf != NULL);
20624	ASSERT(rqbuf != NULL);
20625
20626	SD_TRACE(SD_LOG_IO, un,
20627	    "sd_send_scsi_GET_CONFIGURATION: entry: un:0x%p\n", un);
20628
20629	bzero(cdb, sizeof (cdb));
20630	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
20631	bzero(rqbuf, rqbuflen);
20632	bzero(bufaddr, buflen);
20633
20634	/*
20635	 * Set up cdb field for the get configuration command.
20636	 */
20637	cdb[0] = SCMD_GET_CONFIGURATION;
20638	cdb[1] = 0x02;  /* Requested Type */
20639	cdb[8] = SD_PROFILE_HEADER_LEN;
20640	ucmdbuf->uscsi_cdb = cdb;
20641	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
20642	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
20643	ucmdbuf->uscsi_buflen = buflen;
20644	ucmdbuf->uscsi_timeout = sd_io_time;
20645	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
20646	ucmdbuf->uscsi_rqlen = rqbuflen;
20647	ucmdbuf->uscsi_flags = USCSI_RQENABLE|USCSI_SILENT|USCSI_READ;
20648
20649	status = sd_ssc_send(ssc, ucmdbuf, FKIOCTL,
20650	    UIO_SYSSPACE, path_flag);
20651
20652	switch (status) {
20653	case 0:
20654		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
20655		break;  /* Success! */
20656	case EIO:
20657		switch (ucmdbuf->uscsi_status) {
20658		case STATUS_RESERVATION_CONFLICT:
20659			status = EACCES;
20660			break;
20661		default:
20662			break;
20663		}
20664		break;
20665	default:
20666		break;
20667	}
20668
20669	if (status == 0) {
20670		SD_DUMP_MEMORY(un, SD_LOG_IO,
20671		    "sd_send_scsi_GET_CONFIGURATION: data",
20672		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
20673	}
20674
20675	SD_TRACE(SD_LOG_IO, un,
20676	    "sd_send_scsi_GET_CONFIGURATION: exit\n");
20677
20678	return (status);
20679}
20680
20681/*
20682 *    Function: sd_send_scsi_feature_GET_CONFIGURATION
20683 *
20684 * Description: Issues the get configuration command to the device to
20685 *              retrieve a specific feature. Called from
20686 *		sd_check_for_writable_cd & sd_set_mmc_caps.
20687 *   Arguments: ssc
20688 *              ucmdbuf
20689 *              rqbuf
20690 *              rqbuflen
20691 *              bufaddr
20692 *              buflen
20693 *		feature
20694 *
20695 * Return Code: 0   - Success
20696 *              errno return code from sd_ssc_send()
20697 *
20698 *     Context: Can sleep. Does not return until command is completed.
20699 *
20700 */
20701static int
20702sd_send_scsi_feature_GET_CONFIGURATION(sd_ssc_t *ssc,
20703	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
20704	uchar_t *bufaddr, uint_t buflen, char feature, int path_flag)
20705{
20706	char    cdb[CDB_GROUP1];
20707	int	status;
20708	struct sd_lun	*un;
20709
20710	ASSERT(ssc != NULL);
20711	un = ssc->ssc_un;
20712	ASSERT(un != NULL);
20713	ASSERT(!mutex_owned(SD_MUTEX(un)));
20714	ASSERT(bufaddr != NULL);
20715	ASSERT(ucmdbuf != NULL);
20716	ASSERT(rqbuf != NULL);
20717
20718	SD_TRACE(SD_LOG_IO, un,
20719	    "sd_send_scsi_feature_GET_CONFIGURATION: entry: un:0x%p\n", un);
20720
20721	bzero(cdb, sizeof (cdb));
20722	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
20723	bzero(rqbuf, rqbuflen);
20724	bzero(bufaddr, buflen);
20725
20726	/*
20727	 * Set up cdb field for the get configuration command.
20728	 */
20729	cdb[0] = SCMD_GET_CONFIGURATION;
20730	cdb[1] = 0x02;  /* Requested Type */
20731	cdb[3] = feature;
20732	cdb[8] = buflen;
20733	ucmdbuf->uscsi_cdb = cdb;
20734	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
20735	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
20736	ucmdbuf->uscsi_buflen = buflen;
20737	ucmdbuf->uscsi_timeout = sd_io_time;
20738	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
20739	ucmdbuf->uscsi_rqlen = rqbuflen;
20740	ucmdbuf->uscsi_flags = USCSI_RQENABLE|USCSI_SILENT|USCSI_READ;
20741
20742	status = sd_ssc_send(ssc, ucmdbuf, FKIOCTL,
20743	    UIO_SYSSPACE, path_flag);
20744
20745	switch (status) {
20746	case 0:
20747
20748		break;  /* Success! */
20749	case EIO:
20750		switch (ucmdbuf->uscsi_status) {
20751		case STATUS_RESERVATION_CONFLICT:
20752			status = EACCES;
20753			break;
20754		default:
20755			break;
20756		}
20757		break;
20758	default:
20759		break;
20760	}
20761
20762	if (status == 0) {
20763		SD_DUMP_MEMORY(un, SD_LOG_IO,
20764		    "sd_send_scsi_feature_GET_CONFIGURATION: data",
20765		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
20766	}
20767
20768	SD_TRACE(SD_LOG_IO, un,
20769	    "sd_send_scsi_feature_GET_CONFIGURATION: exit\n");
20770
20771	return (status);
20772}
20773
20774
20775/*
20776 *    Function: sd_send_scsi_MODE_SENSE
20777 *
20778 * Description: Utility function for issuing a scsi MODE SENSE command.
20779 *		Note: This routine uses a consistent implementation for Group0,
20780 *		Group1, and Group2 commands across all platforms. ATAPI devices
20781 *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
20782 *
20783 *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
20784 *                      structure for this target.
20785 *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
20786 *			  CDB_GROUP[1|2] (10 byte).
20787 *		bufaddr - buffer for page data retrieved from the target.
20788 *		buflen - size of page to be retrieved.
20789 *		page_code - page code of data to be retrieved from the target.
20790 *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20791 *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20792 *			to use the USCSI "direct" chain and bypass the normal
20793 *			command waitq.
20794 *
20795 * Return Code: 0   - Success
20796 *		errno return code from sd_ssc_send()
20797 *
20798 *     Context: Can sleep. Does not return until command is completed.
20799 */
20800
20801static int
20802sd_send_scsi_MODE_SENSE(sd_ssc_t *ssc, int cdbsize, uchar_t *bufaddr,
20803	size_t buflen,  uchar_t page_code, int path_flag)
20804{
20805	struct	scsi_extended_sense	sense_buf;
20806	union scsi_cdb		cdb;
20807	struct uscsi_cmd	ucmd_buf;
20808	int			status;
20809	int			headlen;
20810	struct sd_lun		*un;
20811
20812	ASSERT(ssc != NULL);
20813	un = ssc->ssc_un;
20814	ASSERT(un != NULL);
20815	ASSERT(!mutex_owned(SD_MUTEX(un)));
20816	ASSERT(bufaddr != NULL);
20817	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
20818	    (cdbsize == CDB_GROUP2));
20819
20820	SD_TRACE(SD_LOG_IO, un,
20821	    "sd_send_scsi_MODE_SENSE: entry: un:0x%p\n", un);
20822
20823	bzero(&cdb, sizeof (cdb));
20824	bzero(&ucmd_buf, sizeof (ucmd_buf));
20825	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20826	bzero(bufaddr, buflen);
20827
20828	if (cdbsize == CDB_GROUP0) {
20829		cdb.scc_cmd = SCMD_MODE_SENSE;
20830		cdb.cdb_opaque[2] = page_code;
20831		FORMG0COUNT(&cdb, buflen);
20832		headlen = MODE_HEADER_LENGTH;
20833	} else {
20834		cdb.scc_cmd = SCMD_MODE_SENSE_G1;
20835		cdb.cdb_opaque[2] = page_code;
20836		FORMG1COUNT(&cdb, buflen);
20837		headlen = MODE_HEADER_LENGTH_GRP2;
20838	}
20839
20840	ASSERT(headlen <= buflen);
20841	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
20842
20843	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20844	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
20845	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
20846	ucmd_buf.uscsi_buflen	= buflen;
20847	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20848	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20849	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
20850	ucmd_buf.uscsi_timeout	= 60;
20851
20852	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
20853	    UIO_SYSSPACE, path_flag);
20854
20855	switch (status) {
20856	case 0:
20857		/*
20858		 * sr_check_wp() uses 0x3f page code and check the header of
20859		 * mode page to determine if target device is write-protected.
20860		 * But some USB devices return 0 bytes for 0x3f page code. For
20861		 * this case, make sure that mode page header is returned at
20862		 * least.
20863		 */
20864		if (buflen - ucmd_buf.uscsi_resid <  headlen) {
20865			status = EIO;
20866			sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
20867			    "mode page header is not returned");
20868		}
20869		break;	/* Success! */
20870	case EIO:
20871		switch (ucmd_buf.uscsi_status) {
20872		case STATUS_RESERVATION_CONFLICT:
20873			status = EACCES;
20874			break;
20875		default:
20876			break;
20877		}
20878		break;
20879	default:
20880		break;
20881	}
20882
20883	if (status == 0) {
20884		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SENSE: data",
20885		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
20886	}
20887	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SENSE: exit\n");
20888
20889	return (status);
20890}
20891
20892
20893/*
20894 *    Function: sd_send_scsi_MODE_SELECT
20895 *
20896 * Description: Utility function for issuing a scsi MODE SELECT command.
20897 *		Note: This routine uses a consistent implementation for Group0,
20898 *		Group1, and Group2 commands across all platforms. ATAPI devices
20899 *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
20900 *
20901 *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
20902 *                      structure for this target.
20903 *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
20904 *			  CDB_GROUP[1|2] (10 byte).
20905 *		bufaddr - buffer for page data retrieved from the target.
20906 *		buflen - size of page to be retrieved.
20907 *		save_page - boolean to determin if SP bit should be set.
20908 *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20909 *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20910 *			to use the USCSI "direct" chain and bypass the normal
20911 *			command waitq.
20912 *
20913 * Return Code: 0   - Success
20914 *		errno return code from sd_ssc_send()
20915 *
20916 *     Context: Can sleep. Does not return until command is completed.
20917 */
20918
20919static int
20920sd_send_scsi_MODE_SELECT(sd_ssc_t *ssc, int cdbsize, uchar_t *bufaddr,
20921	size_t buflen,  uchar_t save_page, int path_flag)
20922{
20923	struct	scsi_extended_sense	sense_buf;
20924	union scsi_cdb		cdb;
20925	struct uscsi_cmd	ucmd_buf;
20926	int			status;
20927	struct sd_lun		*un;
20928
20929	ASSERT(ssc != NULL);
20930	un = ssc->ssc_un;
20931	ASSERT(un != NULL);
20932	ASSERT(!mutex_owned(SD_MUTEX(un)));
20933	ASSERT(bufaddr != NULL);
20934	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
20935	    (cdbsize == CDB_GROUP2));
20936
20937	SD_TRACE(SD_LOG_IO, un,
20938	    "sd_send_scsi_MODE_SELECT: entry: un:0x%p\n", un);
20939
20940	bzero(&cdb, sizeof (cdb));
20941	bzero(&ucmd_buf, sizeof (ucmd_buf));
20942	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20943
20944	/* Set the PF bit for many third party drives */
20945	cdb.cdb_opaque[1] = 0x10;
20946
20947	/* Set the savepage(SP) bit if given */
20948	if (save_page == SD_SAVE_PAGE) {
20949		cdb.cdb_opaque[1] |= 0x01;
20950	}
20951
20952	if (cdbsize == CDB_GROUP0) {
20953		cdb.scc_cmd = SCMD_MODE_SELECT;
20954		FORMG0COUNT(&cdb, buflen);
20955	} else {
20956		cdb.scc_cmd = SCMD_MODE_SELECT_G1;
20957		FORMG1COUNT(&cdb, buflen);
20958	}
20959
20960	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
20961
20962	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20963	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
20964	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
20965	ucmd_buf.uscsi_buflen	= buflen;
20966	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20967	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20968	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
20969	ucmd_buf.uscsi_timeout	= 60;
20970
20971	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
20972	    UIO_SYSSPACE, path_flag);
20973
20974	switch (status) {
20975	case 0:
20976		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
20977		break;	/* Success! */
20978	case EIO:
20979		switch (ucmd_buf.uscsi_status) {
20980		case STATUS_RESERVATION_CONFLICT:
20981			status = EACCES;
20982			break;
20983		default:
20984			break;
20985		}
20986		break;
20987	default:
20988		break;
20989	}
20990
20991	if (status == 0) {
20992		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SELECT: data",
20993		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
20994	}
20995	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SELECT: exit\n");
20996
20997	return (status);
20998}
20999
21000
21001/*
21002 *    Function: sd_send_scsi_RDWR
21003 *
21004 * Description: Issue a scsi READ or WRITE command with the given parameters.
21005 *
21006 *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
21007 *                      structure for this target.
21008 *		cmd:	 SCMD_READ or SCMD_WRITE
21009 *		bufaddr: Address of caller's buffer to receive the RDWR data
21010 *		buflen:  Length of caller's buffer receive the RDWR data.
21011 *		start_block: Block number for the start of the RDWR operation.
21012 *			 (Assumes target-native block size.)
21013 *		residp:  Pointer to variable to receive the redisual of the
21014 *			 RDWR operation (may be NULL of no residual requested).
21015 *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
21016 *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
21017 *			to use the USCSI "direct" chain and bypass the normal
21018 *			command waitq.
21019 *
21020 * Return Code: 0   - Success
21021 *		errno return code from sd_ssc_send()
21022 *
21023 *     Context: Can sleep. Does not return until command is completed.
21024 */
21025
21026static int
21027sd_send_scsi_RDWR(sd_ssc_t *ssc, uchar_t cmd, void *bufaddr,
21028	size_t buflen, daddr_t start_block, int path_flag)
21029{
21030	struct	scsi_extended_sense	sense_buf;
21031	union scsi_cdb		cdb;
21032	struct uscsi_cmd	ucmd_buf;
21033	uint32_t		block_count;
21034	int			status;
21035	int			cdbsize;
21036	uchar_t			flag;
21037	struct sd_lun		*un;
21038
21039	ASSERT(ssc != NULL);
21040	un = ssc->ssc_un;
21041	ASSERT(un != NULL);
21042	ASSERT(!mutex_owned(SD_MUTEX(un)));
21043	ASSERT(bufaddr != NULL);
21044	ASSERT((cmd == SCMD_READ) || (cmd == SCMD_WRITE));
21045
21046	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: entry: un:0x%p\n", un);
21047
21048	if (un->un_f_tgt_blocksize_is_valid != TRUE) {
21049		return (EINVAL);
21050	}
21051
21052	mutex_enter(SD_MUTEX(un));
21053	block_count = SD_BYTES2TGTBLOCKS(un, buflen);
21054	mutex_exit(SD_MUTEX(un));
21055
21056	flag = (cmd == SCMD_READ) ? USCSI_READ : USCSI_WRITE;
21057
21058	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_RDWR: "
21059	    "bufaddr:0x%p buflen:0x%x start_block:0x%p block_count:0x%x\n",
21060	    bufaddr, buflen, start_block, block_count);
21061
21062	bzero(&cdb, sizeof (cdb));
21063	bzero(&ucmd_buf, sizeof (ucmd_buf));
21064	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
21065
21066	/* Compute CDB size to use */
21067	if (start_block > 0xffffffff)
21068		cdbsize = CDB_GROUP4;
21069	else if ((start_block & 0xFFE00000) ||
21070	    (un->un_f_cfg_is_atapi == TRUE))
21071		cdbsize = CDB_GROUP1;
21072	else
21073		cdbsize = CDB_GROUP0;
21074
21075	switch (cdbsize) {
21076	case CDB_GROUP0:	/* 6-byte CDBs */
21077		cdb.scc_cmd = cmd;
21078		FORMG0ADDR(&cdb, start_block);
21079		FORMG0COUNT(&cdb, block_count);
21080		break;
21081	case CDB_GROUP1:	/* 10-byte CDBs */
21082		cdb.scc_cmd = cmd | SCMD_GROUP1;
21083		FORMG1ADDR(&cdb, start_block);
21084		FORMG1COUNT(&cdb, block_count);
21085		break;
21086	case CDB_GROUP4:	/* 16-byte CDBs */
21087		cdb.scc_cmd = cmd | SCMD_GROUP4;
21088		FORMG4LONGADDR(&cdb, (uint64_t)start_block);
21089		FORMG4COUNT(&cdb, block_count);
21090		break;
21091	case CDB_GROUP5:	/* 12-byte CDBs (currently unsupported) */
21092	default:
21093		/* All others reserved */
21094		return (EINVAL);
21095	}
21096
21097	/* Set LUN bit(s) in CDB if this is a SCSI-1 device */
21098	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
21099
21100	ucmd_buf.uscsi_cdb	= (char *)&cdb;
21101	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
21102	ucmd_buf.uscsi_bufaddr	= bufaddr;
21103	ucmd_buf.uscsi_buflen	= buflen;
21104	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
21105	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
21106	ucmd_buf.uscsi_flags	= flag | USCSI_RQENABLE | USCSI_SILENT;
21107	ucmd_buf.uscsi_timeout	= 60;
21108	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
21109	    UIO_SYSSPACE, path_flag);
21110
21111	switch (status) {
21112	case 0:
21113		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
21114		break;	/* Success! */
21115	case EIO:
21116		switch (ucmd_buf.uscsi_status) {
21117		case STATUS_RESERVATION_CONFLICT:
21118			status = EACCES;
21119			break;
21120		default:
21121			break;
21122		}
21123		break;
21124	default:
21125		break;
21126	}
21127
21128	if (status == 0) {
21129		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_RDWR: data",
21130		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
21131	}
21132
21133	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: exit\n");
21134
21135	return (status);
21136}
21137
21138
21139/*
21140 *    Function: sd_send_scsi_LOG_SENSE
21141 *
21142 * Description: Issue a scsi LOG_SENSE command with the given parameters.
21143 *
21144 *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
21145 *                      structure for this target.
21146 *
21147 * Return Code: 0   - Success
21148 *		errno return code from sd_ssc_send()
21149 *
21150 *     Context: Can sleep. Does not return until command is completed.
21151 */
21152
21153static int
21154sd_send_scsi_LOG_SENSE(sd_ssc_t *ssc, uchar_t *bufaddr, uint16_t buflen,
21155	uchar_t page_code, uchar_t page_control, uint16_t param_ptr,
21156	int path_flag)
21157
21158{
21159	struct scsi_extended_sense	sense_buf;
21160	union scsi_cdb		cdb;
21161	struct uscsi_cmd	ucmd_buf;
21162	int			status;
21163	struct sd_lun		*un;
21164
21165	ASSERT(ssc != NULL);
21166	un = ssc->ssc_un;
21167	ASSERT(un != NULL);
21168	ASSERT(!mutex_owned(SD_MUTEX(un)));
21169
21170	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: entry: un:0x%p\n", un);
21171
21172	bzero(&cdb, sizeof (cdb));
21173	bzero(&ucmd_buf, sizeof (ucmd_buf));
21174	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
21175
21176	cdb.scc_cmd = SCMD_LOG_SENSE_G1;
21177	cdb.cdb_opaque[2] = (page_control << 6) | page_code;
21178	cdb.cdb_opaque[5] = (uchar_t)((param_ptr & 0xFF00) >> 8);
21179	cdb.cdb_opaque[6] = (uchar_t)(param_ptr  & 0x00FF);
21180	FORMG1COUNT(&cdb, buflen);
21181
21182	ucmd_buf.uscsi_cdb	= (char *)&cdb;
21183	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
21184	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
21185	ucmd_buf.uscsi_buflen	= buflen;
21186	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
21187	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
21188	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
21189	ucmd_buf.uscsi_timeout	= 60;
21190
21191	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
21192	    UIO_SYSSPACE, path_flag);
21193
21194	switch (status) {
21195	case 0:
21196		break;
21197	case EIO:
21198		switch (ucmd_buf.uscsi_status) {
21199		case STATUS_RESERVATION_CONFLICT:
21200			status = EACCES;
21201			break;
21202		case STATUS_CHECK:
21203			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
21204			    (scsi_sense_key((uint8_t *)&sense_buf) ==
21205				KEY_ILLEGAL_REQUEST) &&
21206			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x24)) {
21207				/*
21208				 * ASC 0x24: INVALID FIELD IN CDB
21209				 */
21210				switch (page_code) {
21211				case START_STOP_CYCLE_PAGE:
21212					/*
21213					 * The start stop cycle counter is
21214					 * implemented as page 0x31 in earlier
21215					 * generation disks. In new generation
21216					 * disks the start stop cycle counter is
21217					 * implemented as page 0xE. To properly
21218					 * handle this case if an attempt for
21219					 * log page 0xE is made and fails we
21220					 * will try again using page 0x31.
21221					 *
21222					 * Network storage BU committed to
21223					 * maintain the page 0x31 for this
21224					 * purpose and will not have any other
21225					 * page implemented with page code 0x31
21226					 * until all disks transition to the
21227					 * standard page.
21228					 */
21229					mutex_enter(SD_MUTEX(un));
21230					un->un_start_stop_cycle_page =
21231					    START_STOP_CYCLE_VU_PAGE;
21232					cdb.cdb_opaque[2] =
21233					    (char)(page_control << 6) |
21234					    un->un_start_stop_cycle_page;
21235					mutex_exit(SD_MUTEX(un));
21236					sd_ssc_assessment(ssc, SD_FMT_IGNORE);
21237					status = sd_ssc_send(
21238					    ssc, &ucmd_buf, FKIOCTL,
21239					    UIO_SYSSPACE, path_flag);
21240
21241					break;
21242				case TEMPERATURE_PAGE:
21243					status = ENOTTY;
21244					break;
21245				default:
21246					break;
21247				}
21248			}
21249			break;
21250		default:
21251			break;
21252		}
21253		break;
21254	default:
21255		break;
21256	}
21257
21258	if (status == 0) {
21259		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
21260		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_LOG_SENSE: data",
21261		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
21262	}
21263
21264	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: exit\n");
21265
21266	return (status);
21267}
21268
21269
21270/*
21271 *    Function: sdioctl
21272 *
21273 * Description: Driver's ioctl(9e) entry point function.
21274 *
21275 *   Arguments: dev     - device number
21276 *		cmd     - ioctl operation to be performed
21277 *		arg     - user argument, contains data to be set or reference
21278 *			  parameter for get
21279 *		flag    - bit flag, indicating open settings, 32/64 bit type
21280 *		cred_p  - user credential pointer
21281 *		rval_p  - calling process return value (OPT)
21282 *
21283 * Return Code: EINVAL
21284 *		ENOTTY
21285 *		ENXIO
21286 *		EIO
21287 *		EFAULT
21288 *		ENOTSUP
21289 *		EPERM
21290 *
21291 *     Context: Called from the device switch at normal priority.
21292 */
21293
21294static int
21295sdioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cred_p, int *rval_p)
21296{
21297	struct sd_lun	*un = NULL;
21298	int		err = 0;
21299	int		i = 0;
21300	cred_t		*cr;
21301	int		tmprval = EINVAL;
21302	boolean_t	is_valid;
21303	sd_ssc_t	*ssc;
21304
21305	/*
21306	 * All device accesses go thru sdstrategy where we check on suspend
21307	 * status
21308	 */
21309	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21310		return (ENXIO);
21311	}
21312
21313	ASSERT(!mutex_owned(SD_MUTEX(un)));
21314
21315	/* Initialize sd_ssc_t for internal uscsi commands */
21316	ssc = sd_ssc_init(un);
21317
21318	is_valid = SD_IS_VALID_LABEL(un);
21319
21320	/*
21321	 * Moved this wait from sd_uscsi_strategy to here for
21322	 * reasons of deadlock prevention. Internal driver commands,
21323	 * specifically those to change a devices power level, result
21324	 * in a call to sd_uscsi_strategy.
21325	 */
21326	mutex_enter(SD_MUTEX(un));
21327	while ((un->un_state == SD_STATE_SUSPENDED) ||
21328	    (un->un_state == SD_STATE_PM_CHANGING)) {
21329		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
21330	}
21331	/*
21332	 * Twiddling the counter here protects commands from now
21333	 * through to the top of sd_uscsi_strategy. Without the
21334	 * counter inc. a power down, for example, could get in
21335	 * after the above check for state is made and before
21336	 * execution gets to the top of sd_uscsi_strategy.
21337	 * That would cause problems.
21338	 */
21339	un->un_ncmds_in_driver++;
21340
21341	if (!is_valid &&
21342	    (flag & (FNDELAY | FNONBLOCK))) {
21343		switch (cmd) {
21344		case DKIOCGGEOM:	/* SD_PATH_DIRECT */
21345		case DKIOCGVTOC:
21346		case DKIOCGEXTVTOC:
21347		case DKIOCGAPART:
21348		case DKIOCPARTINFO:
21349		case DKIOCEXTPARTINFO:
21350		case DKIOCSGEOM:
21351		case DKIOCSAPART:
21352		case DKIOCGETEFI:
21353		case DKIOCPARTITION:
21354		case DKIOCSVTOC:
21355		case DKIOCSEXTVTOC:
21356		case DKIOCSETEFI:
21357		case DKIOCGMBOOT:
21358		case DKIOCSMBOOT:
21359		case DKIOCG_PHYGEOM:
21360		case DKIOCG_VIRTGEOM:
21361			/* let cmlb handle it */
21362			goto skip_ready_valid;
21363
21364		case CDROMPAUSE:
21365		case CDROMRESUME:
21366		case CDROMPLAYMSF:
21367		case CDROMPLAYTRKIND:
21368		case CDROMREADTOCHDR:
21369		case CDROMREADTOCENTRY:
21370		case CDROMSTOP:
21371		case CDROMSTART:
21372		case CDROMVOLCTRL:
21373		case CDROMSUBCHNL:
21374		case CDROMREADMODE2:
21375		case CDROMREADMODE1:
21376		case CDROMREADOFFSET:
21377		case CDROMSBLKMODE:
21378		case CDROMGBLKMODE:
21379		case CDROMGDRVSPEED:
21380		case CDROMSDRVSPEED:
21381		case CDROMCDDA:
21382		case CDROMCDXA:
21383		case CDROMSUBCODE:
21384			if (!ISCD(un)) {
21385				un->un_ncmds_in_driver--;
21386				ASSERT(un->un_ncmds_in_driver >= 0);
21387				mutex_exit(SD_MUTEX(un));
21388				err = ENOTTY;
21389				goto done_without_assess;
21390			}
21391			break;
21392		case FDEJECT:
21393		case DKIOCEJECT:
21394		case CDROMEJECT:
21395			if (!un->un_f_eject_media_supported) {
21396				un->un_ncmds_in_driver--;
21397				ASSERT(un->un_ncmds_in_driver >= 0);
21398				mutex_exit(SD_MUTEX(un));
21399				err = ENOTTY;
21400				goto done_without_assess;
21401			}
21402			break;
21403		case DKIOCFLUSHWRITECACHE:
21404			mutex_exit(SD_MUTEX(un));
21405			err = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
21406			if (err != 0) {
21407				mutex_enter(SD_MUTEX(un));
21408				un->un_ncmds_in_driver--;
21409				ASSERT(un->un_ncmds_in_driver >= 0);
21410				mutex_exit(SD_MUTEX(un));
21411				err = EIO;
21412				goto done_quick_assess;
21413			}
21414			mutex_enter(SD_MUTEX(un));
21415			/* FALLTHROUGH */
21416		case DKIOCREMOVABLE:
21417		case DKIOCHOTPLUGGABLE:
21418		case DKIOCINFO:
21419		case DKIOCGMEDIAINFO:
21420		case MHIOCENFAILFAST:
21421		case MHIOCSTATUS:
21422		case MHIOCTKOWN:
21423		case MHIOCRELEASE:
21424		case MHIOCGRP_INKEYS:
21425		case MHIOCGRP_INRESV:
21426		case MHIOCGRP_REGISTER:
21427		case MHIOCGRP_RESERVE:
21428		case MHIOCGRP_PREEMPTANDABORT:
21429		case MHIOCGRP_REGISTERANDIGNOREKEY:
21430		case CDROMCLOSETRAY:
21431		case USCSICMD:
21432			goto skip_ready_valid;
21433		default:
21434			break;
21435		}
21436
21437		mutex_exit(SD_MUTEX(un));
21438		err = sd_ready_and_valid(ssc, SDPART(dev));
21439		mutex_enter(SD_MUTEX(un));
21440
21441		if (err != SD_READY_VALID) {
21442			switch (cmd) {
21443			case DKIOCSTATE:
21444			case CDROMGDRVSPEED:
21445			case CDROMSDRVSPEED:
21446			case FDEJECT:	/* for eject command */
21447			case DKIOCEJECT:
21448			case CDROMEJECT:
21449			case DKIOCREMOVABLE:
21450			case DKIOCHOTPLUGGABLE:
21451				break;
21452			default:
21453				if (un->un_f_has_removable_media) {
21454					err = ENXIO;
21455				} else {
21456				/* Do not map SD_RESERVED_BY_OTHERS to EIO */
21457					if (err == SD_RESERVED_BY_OTHERS) {
21458						err = EACCES;
21459					} else {
21460						err = EIO;
21461					}
21462				}
21463				un->un_ncmds_in_driver--;
21464				ASSERT(un->un_ncmds_in_driver >= 0);
21465				mutex_exit(SD_MUTEX(un));
21466
21467				goto done_without_assess;
21468			}
21469		}
21470	}
21471
21472skip_ready_valid:
21473	mutex_exit(SD_MUTEX(un));
21474
21475	switch (cmd) {
21476	case DKIOCINFO:
21477		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCINFO\n");
21478		err = sd_dkio_ctrl_info(dev, (caddr_t)arg, flag);
21479		break;
21480
21481	case DKIOCGMEDIAINFO:
21482		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGMEDIAINFO\n");
21483		err = sd_get_media_info(dev, (caddr_t)arg, flag);
21484		break;
21485
21486	case DKIOCGGEOM:
21487	case DKIOCGVTOC:
21488	case DKIOCGEXTVTOC:
21489	case DKIOCGAPART:
21490	case DKIOCPARTINFO:
21491	case DKIOCEXTPARTINFO:
21492	case DKIOCSGEOM:
21493	case DKIOCSAPART:
21494	case DKIOCGETEFI:
21495	case DKIOCPARTITION:
21496	case DKIOCSVTOC:
21497	case DKIOCSEXTVTOC:
21498	case DKIOCSETEFI:
21499	case DKIOCGMBOOT:
21500	case DKIOCSMBOOT:
21501	case DKIOCG_PHYGEOM:
21502	case DKIOCG_VIRTGEOM:
21503		SD_TRACE(SD_LOG_IOCTL, un, "DKIOC %d\n", cmd);
21504
21505		/* TUR should spin up */
21506
21507		if (un->un_f_has_removable_media)
21508			err = sd_send_scsi_TEST_UNIT_READY(ssc,
21509			    SD_CHECK_FOR_MEDIA);
21510
21511		else
21512			err = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
21513
21514		if (err != 0)
21515			goto done_with_assess;
21516
21517		err = cmlb_ioctl(un->un_cmlbhandle, dev,
21518		    cmd, arg, flag, cred_p, rval_p, (void *)SD_PATH_DIRECT);
21519
21520		if ((err == 0) &&
21521		    ((cmd == DKIOCSETEFI) ||
21522		    (un->un_f_pkstats_enabled) &&
21523		    (cmd == DKIOCSAPART || cmd == DKIOCSVTOC ||
21524		    cmd == DKIOCSEXTVTOC))) {
21525
21526			tmprval = cmlb_validate(un->un_cmlbhandle, CMLB_SILENT,
21527			    (void *)SD_PATH_DIRECT);
21528			if ((tmprval == 0) && un->un_f_pkstats_enabled) {
21529				sd_set_pstats(un);
21530				SD_TRACE(SD_LOG_IO_PARTITION, un,
21531				    "sd_ioctl: un:0x%p pstats created and "
21532				    "set\n", un);
21533			}
21534		}
21535
21536		if ((cmd == DKIOCSVTOC || cmd == DKIOCSEXTVTOC) ||
21537		    ((cmd == DKIOCSETEFI) && (tmprval == 0))) {
21538
21539			mutex_enter(SD_MUTEX(un));
21540			if (un->un_f_devid_supported &&
21541			    (un->un_f_opt_fab_devid == TRUE)) {
21542				if (un->un_devid == NULL) {
21543					sd_register_devid(ssc, SD_DEVINFO(un),
21544					    SD_TARGET_IS_UNRESERVED);
21545				} else {
21546					/*
21547					 * The device id for this disk
21548					 * has been fabricated. The
21549					 * device id must be preserved
21550					 * by writing it back out to
21551					 * disk.
21552					 */
21553					if (sd_write_deviceid(ssc) != 0) {
21554						ddi_devid_free(un->un_devid);
21555						un->un_devid = NULL;
21556					}
21557				}
21558			}
21559			mutex_exit(SD_MUTEX(un));
21560		}
21561
21562		break;
21563
21564	case DKIOCLOCK:
21565		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCLOCK\n");
21566		err = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_PREVENT,
21567		    SD_PATH_STANDARD);
21568		goto done_with_assess;
21569
21570	case DKIOCUNLOCK:
21571		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCUNLOCK\n");
21572		err = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_ALLOW,
21573		    SD_PATH_STANDARD);
21574		goto done_with_assess;
21575
21576	case DKIOCSTATE: {
21577		enum dkio_state		state;
21578		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSTATE\n");
21579
21580		if (ddi_copyin((void *)arg, &state, sizeof (int), flag) != 0) {
21581			err = EFAULT;
21582		} else {
21583			err = sd_check_media(dev, state);
21584			if (err == 0) {
21585				if (ddi_copyout(&un->un_mediastate, (void *)arg,
21586				    sizeof (int), flag) != 0)
21587					err = EFAULT;
21588			}
21589		}
21590		break;
21591	}
21592
21593	case DKIOCREMOVABLE:
21594		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCREMOVABLE\n");
21595		i = un->un_f_has_removable_media ? 1 : 0;
21596		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
21597			err = EFAULT;
21598		} else {
21599			err = 0;
21600		}
21601		break;
21602
21603	case DKIOCHOTPLUGGABLE:
21604		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCHOTPLUGGABLE\n");
21605		i = un->un_f_is_hotpluggable ? 1 : 0;
21606		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
21607			err = EFAULT;
21608		} else {
21609			err = 0;
21610		}
21611		break;
21612
21613	case DKIOCGTEMPERATURE:
21614		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGTEMPERATURE\n");
21615		err = sd_dkio_get_temp(dev, (caddr_t)arg, flag);
21616		break;
21617
21618	case MHIOCENFAILFAST:
21619		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCENFAILFAST\n");
21620		if ((err = drv_priv(cred_p)) == 0) {
21621			err = sd_mhdioc_failfast(dev, (caddr_t)arg, flag);
21622		}
21623		break;
21624
21625	case MHIOCTKOWN:
21626		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCTKOWN\n");
21627		if ((err = drv_priv(cred_p)) == 0) {
21628			err = sd_mhdioc_takeown(dev, (caddr_t)arg, flag);
21629		}
21630		break;
21631
21632	case MHIOCRELEASE:
21633		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCRELEASE\n");
21634		if ((err = drv_priv(cred_p)) == 0) {
21635			err = sd_mhdioc_release(dev);
21636		}
21637		break;
21638
21639	case MHIOCSTATUS:
21640		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCSTATUS\n");
21641		if ((err = drv_priv(cred_p)) == 0) {
21642			switch (sd_send_scsi_TEST_UNIT_READY(ssc, 0)) {
21643			case 0:
21644				err = 0;
21645				break;
21646			case EACCES:
21647				*rval_p = 1;
21648				err = 0;
21649				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
21650				break;
21651			default:
21652				err = EIO;
21653				goto done_with_assess;
21654			}
21655		}
21656		break;
21657
21658	case MHIOCQRESERVE:
21659		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCQRESERVE\n");
21660		if ((err = drv_priv(cred_p)) == 0) {
21661			err = sd_reserve_release(dev, SD_RESERVE);
21662		}
21663		break;
21664
21665	case MHIOCREREGISTERDEVID:
21666		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCREREGISTERDEVID\n");
21667		if (drv_priv(cred_p) == EPERM) {
21668			err = EPERM;
21669		} else if (!un->un_f_devid_supported) {
21670			err = ENOTTY;
21671		} else {
21672			err = sd_mhdioc_register_devid(dev);
21673		}
21674		break;
21675
21676	case MHIOCGRP_INKEYS:
21677		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INKEYS\n");
21678		if (((err = drv_priv(cred_p)) != EPERM) && arg != NULL) {
21679			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21680				err = ENOTSUP;
21681			} else {
21682				err = sd_mhdioc_inkeys(dev, (caddr_t)arg,
21683				    flag);
21684			}
21685		}
21686		break;
21687
21688	case MHIOCGRP_INRESV:
21689		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INRESV\n");
21690		if (((err = drv_priv(cred_p)) != EPERM) && arg != NULL) {
21691			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21692				err = ENOTSUP;
21693			} else {
21694				err = sd_mhdioc_inresv(dev, (caddr_t)arg, flag);
21695			}
21696		}
21697		break;
21698
21699	case MHIOCGRP_REGISTER:
21700		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_REGISTER\n");
21701		if ((err = drv_priv(cred_p)) != EPERM) {
21702			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21703				err = ENOTSUP;
21704			} else if (arg != NULL) {
21705				mhioc_register_t reg;
21706				if (ddi_copyin((void *)arg, &reg,
21707				    sizeof (mhioc_register_t), flag) != 0) {
21708					err = EFAULT;
21709				} else {
21710					err =
21711					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21712					    ssc, SD_SCSI3_REGISTER,
21713					    (uchar_t *)&reg);
21714					if (err != 0)
21715						goto done_with_assess;
21716				}
21717			}
21718		}
21719		break;
21720
21721	case MHIOCGRP_RESERVE:
21722		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_RESERVE\n");
21723		if ((err = drv_priv(cred_p)) != EPERM) {
21724			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21725				err = ENOTSUP;
21726			} else if (arg != NULL) {
21727				mhioc_resv_desc_t resv_desc;
21728				if (ddi_copyin((void *)arg, &resv_desc,
21729				    sizeof (mhioc_resv_desc_t), flag) != 0) {
21730					err = EFAULT;
21731				} else {
21732					err =
21733					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21734					    ssc, SD_SCSI3_RESERVE,
21735					    (uchar_t *)&resv_desc);
21736					if (err != 0)
21737						goto done_with_assess;
21738				}
21739			}
21740		}
21741		break;
21742
21743	case MHIOCGRP_PREEMPTANDABORT:
21744		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_PREEMPTANDABORT\n");
21745		if ((err = drv_priv(cred_p)) != EPERM) {
21746			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21747				err = ENOTSUP;
21748			} else if (arg != NULL) {
21749				mhioc_preemptandabort_t preempt_abort;
21750				if (ddi_copyin((void *)arg, &preempt_abort,
21751				    sizeof (mhioc_preemptandabort_t),
21752				    flag) != 0) {
21753					err = EFAULT;
21754				} else {
21755					err =
21756					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21757					    ssc, SD_SCSI3_PREEMPTANDABORT,
21758					    (uchar_t *)&preempt_abort);
21759					if (err != 0)
21760						goto done_with_assess;
21761				}
21762			}
21763		}
21764		break;
21765
21766	case MHIOCGRP_REGISTERANDIGNOREKEY:
21767		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_REGISTERANDIGNOREKEY\n");
21768		if ((err = drv_priv(cred_p)) != EPERM) {
21769			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21770				err = ENOTSUP;
21771			} else if (arg != NULL) {
21772				mhioc_registerandignorekey_t r_and_i;
21773				if (ddi_copyin((void *)arg, (void *)&r_and_i,
21774				    sizeof (mhioc_registerandignorekey_t),
21775				    flag) != 0) {
21776					err = EFAULT;
21777				} else {
21778					err =
21779					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21780					    ssc, SD_SCSI3_REGISTERANDIGNOREKEY,
21781					    (uchar_t *)&r_and_i);
21782					if (err != 0)
21783						goto done_with_assess;
21784				}
21785			}
21786		}
21787		break;
21788
21789	case USCSICMD:
21790		SD_TRACE(SD_LOG_IOCTL, un, "USCSICMD\n");
21791		cr = ddi_get_cred();
21792		if ((drv_priv(cred_p) != 0) && (drv_priv(cr) != 0)) {
21793			err = EPERM;
21794		} else {
21795			enum uio_seg	uioseg;
21796
21797			uioseg = (flag & FKIOCTL) ? UIO_SYSSPACE :
21798			    UIO_USERSPACE;
21799			if (un->un_f_format_in_progress == TRUE) {
21800				err = EAGAIN;
21801				break;
21802			}
21803
21804			err = sd_ssc_send(ssc,
21805			    (struct uscsi_cmd *)arg,
21806			    flag, uioseg, SD_PATH_STANDARD);
21807			if (err != 0)
21808				goto done_with_assess;
21809			else
21810				sd_ssc_assessment(ssc, SD_FMT_STANDARD);
21811		}
21812		break;
21813
21814	case CDROMPAUSE:
21815	case CDROMRESUME:
21816		SD_TRACE(SD_LOG_IOCTL, un, "PAUSE-RESUME\n");
21817		if (!ISCD(un)) {
21818			err = ENOTTY;
21819		} else {
21820			err = sr_pause_resume(dev, cmd);
21821		}
21822		break;
21823
21824	case CDROMPLAYMSF:
21825		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYMSF\n");
21826		if (!ISCD(un)) {
21827			err = ENOTTY;
21828		} else {
21829			err = sr_play_msf(dev, (caddr_t)arg, flag);
21830		}
21831		break;
21832
21833	case CDROMPLAYTRKIND:
21834		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYTRKIND\n");
21835#if defined(__i386) || defined(__amd64)
21836		/*
21837		 * not supported on ATAPI CD drives, use CDROMPLAYMSF instead
21838		 */
21839		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
21840#else
21841		if (!ISCD(un)) {
21842#endif
21843			err = ENOTTY;
21844		} else {
21845			err = sr_play_trkind(dev, (caddr_t)arg, flag);
21846		}
21847		break;
21848
21849	case CDROMREADTOCHDR:
21850		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCHDR\n");
21851		if (!ISCD(un)) {
21852			err = ENOTTY;
21853		} else {
21854			err = sr_read_tochdr(dev, (caddr_t)arg, flag);
21855		}
21856		break;
21857
21858	case CDROMREADTOCENTRY:
21859		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCENTRY\n");
21860		if (!ISCD(un)) {
21861			err = ENOTTY;
21862		} else {
21863			err = sr_read_tocentry(dev, (caddr_t)arg, flag);
21864		}
21865		break;
21866
21867	case CDROMSTOP:
21868		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTOP\n");
21869		if (!ISCD(un)) {
21870			err = ENOTTY;
21871		} else {
21872			err = sd_send_scsi_START_STOP_UNIT(ssc, SD_TARGET_STOP,
21873			    SD_PATH_STANDARD);
21874			goto done_with_assess;
21875		}
21876		break;
21877
21878	case CDROMSTART:
21879		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTART\n");
21880		if (!ISCD(un)) {
21881			err = ENOTTY;
21882		} else {
21883			err = sd_send_scsi_START_STOP_UNIT(ssc, SD_TARGET_START,
21884			    SD_PATH_STANDARD);
21885			goto done_with_assess;
21886		}
21887		break;
21888
21889	case CDROMCLOSETRAY:
21890		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCLOSETRAY\n");
21891		if (!ISCD(un)) {
21892			err = ENOTTY;
21893		} else {
21894			err = sd_send_scsi_START_STOP_UNIT(ssc, SD_TARGET_CLOSE,
21895			    SD_PATH_STANDARD);
21896			goto done_with_assess;
21897		}
21898		break;
21899
21900	case FDEJECT:	/* for eject command */
21901	case DKIOCEJECT:
21902	case CDROMEJECT:
21903		SD_TRACE(SD_LOG_IOCTL, un, "EJECT\n");
21904		if (!un->un_f_eject_media_supported) {
21905			err = ENOTTY;
21906		} else {
21907			err = sr_eject(dev);
21908		}
21909		break;
21910
21911	case CDROMVOLCTRL:
21912		SD_TRACE(SD_LOG_IOCTL, un, "CDROMVOLCTRL\n");
21913		if (!ISCD(un)) {
21914			err = ENOTTY;
21915		} else {
21916			err = sr_volume_ctrl(dev, (caddr_t)arg, flag);
21917		}
21918		break;
21919
21920	case CDROMSUBCHNL:
21921		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCHNL\n");
21922		if (!ISCD(un)) {
21923			err = ENOTTY;
21924		} else {
21925			err = sr_read_subchannel(dev, (caddr_t)arg, flag);
21926		}
21927		break;
21928
21929	case CDROMREADMODE2:
21930		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE2\n");
21931		if (!ISCD(un)) {
21932			err = ENOTTY;
21933		} else if (un->un_f_cfg_is_atapi == TRUE) {
21934			/*
21935			 * If the drive supports READ CD, use that instead of
21936			 * switching the LBA size via a MODE SELECT
21937			 * Block Descriptor
21938			 */
21939			err = sr_read_cd_mode2(dev, (caddr_t)arg, flag);
21940		} else {
21941			err = sr_read_mode2(dev, (caddr_t)arg, flag);
21942		}
21943		break;
21944
21945	case CDROMREADMODE1:
21946		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE1\n");
21947		if (!ISCD(un)) {
21948			err = ENOTTY;
21949		} else {
21950			err = sr_read_mode1(dev, (caddr_t)arg, flag);
21951		}
21952		break;
21953
21954	case CDROMREADOFFSET:
21955		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADOFFSET\n");
21956		if (!ISCD(un)) {
21957			err = ENOTTY;
21958		} else {
21959			err = sr_read_sony_session_offset(dev, (caddr_t)arg,
21960			    flag);
21961		}
21962		break;
21963
21964	case CDROMSBLKMODE:
21965		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSBLKMODE\n");
21966		/*
21967		 * There is no means of changing block size in case of atapi
21968		 * drives, thus return ENOTTY if drive type is atapi
21969		 */
21970		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
21971			err = ENOTTY;
21972		} else if (un->un_f_mmc_cap == TRUE) {
21973
21974			/*
21975			 * MMC Devices do not support changing the
21976			 * logical block size
21977			 *
21978			 * Note: EINVAL is being returned instead of ENOTTY to
21979			 * maintain consistancy with the original mmc
21980			 * driver update.
21981			 */
21982			err = EINVAL;
21983		} else {
21984			mutex_enter(SD_MUTEX(un));
21985			if ((!(un->un_exclopen & (1<<SDPART(dev)))) ||
21986			    (un->un_ncmds_in_transport > 0)) {
21987				mutex_exit(SD_MUTEX(un));
21988				err = EINVAL;
21989			} else {
21990				mutex_exit(SD_MUTEX(un));
21991				err = sr_change_blkmode(dev, cmd, arg, flag);
21992			}
21993		}
21994		break;
21995
21996	case CDROMGBLKMODE:
21997		SD_TRACE(SD_LOG_IOCTL, un, "CDROMGBLKMODE\n");
21998		if (!ISCD(un)) {
21999			err = ENOTTY;
22000		} else if ((un->un_f_cfg_is_atapi != FALSE) &&
22001		    (un->un_f_blockcount_is_valid != FALSE)) {
22002			/*
22003			 * Drive is an ATAPI drive so return target block
22004			 * size for ATAPI drives since we cannot change the
22005			 * blocksize on ATAPI drives. Used primarily to detect
22006			 * if an ATAPI cdrom is present.
22007			 */
22008			if (ddi_copyout(&un->un_tgt_blocksize, (void *)arg,
22009			    sizeof (int), flag) != 0) {
22010				err = EFAULT;
22011			} else {
22012				err = 0;
22013			}
22014
22015		} else {
22016			/*
22017			 * Drive supports changing block sizes via a Mode
22018			 * Select.
22019			 */
22020			err = sr_change_blkmode(dev, cmd, arg, flag);
22021		}
22022		break;
22023
22024	case CDROMGDRVSPEED:
22025	case CDROMSDRVSPEED:
22026		SD_TRACE(SD_LOG_IOCTL, un, "CDROMXDRVSPEED\n");
22027		if (!ISCD(un)) {
22028			err = ENOTTY;
22029		} else if (un->un_f_mmc_cap == TRUE) {
22030			/*
22031			 * Note: In the future the driver implementation
22032			 * for getting and
22033			 * setting cd speed should entail:
22034			 * 1) If non-mmc try the Toshiba mode page
22035			 *    (sr_change_speed)
22036			 * 2) If mmc but no support for Real Time Streaming try
22037			 *    the SET CD SPEED (0xBB) command
22038			 *   (sr_atapi_change_speed)
22039			 * 3) If mmc and support for Real Time Streaming
22040			 *    try the GET PERFORMANCE and SET STREAMING
22041			 *    commands (not yet implemented, 4380808)
22042			 */
22043			/*
22044			 * As per recent MMC spec, CD-ROM speed is variable
22045			 * and changes with LBA. Since there is no such
22046			 * things as drive speed now, fail this ioctl.
22047			 *
22048			 * Note: EINVAL is returned for consistancy of original
22049			 * implementation which included support for getting
22050			 * the drive speed of mmc devices but not setting
22051			 * the drive speed. Thus EINVAL would be returned
22052			 * if a set request was made for an mmc device.
22053			 * We no longer support get or set speed for
22054			 * mmc but need to remain consistent with regard
22055			 * to the error code returned.
22056			 */
22057			err = EINVAL;
22058		} else if (un->un_f_cfg_is_atapi == TRUE) {
22059			err = sr_atapi_change_speed(dev, cmd, arg, flag);
22060		} else {
22061			err = sr_change_speed(dev, cmd, arg, flag);
22062		}
22063		break;
22064
22065	case CDROMCDDA:
22066		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDDA\n");
22067		if (!ISCD(un)) {
22068			err = ENOTTY;
22069		} else {
22070			err = sr_read_cdda(dev, (void *)arg, flag);
22071		}
22072		break;
22073
22074	case CDROMCDXA:
22075		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDXA\n");
22076		if (!ISCD(un)) {
22077			err = ENOTTY;
22078		} else {
22079			err = sr_read_cdxa(dev, (caddr_t)arg, flag);
22080		}
22081		break;
22082
22083	case CDROMSUBCODE:
22084		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCODE\n");
22085		if (!ISCD(un)) {
22086			err = ENOTTY;
22087		} else {
22088			err = sr_read_all_subcodes(dev, (caddr_t)arg, flag);
22089		}
22090		break;
22091
22092
22093#ifdef SDDEBUG
22094/* RESET/ABORTS testing ioctls */
22095	case DKIOCRESET: {
22096		int	reset_level;
22097
22098		if (ddi_copyin((void *)arg, &reset_level, sizeof (int), flag)) {
22099			err = EFAULT;
22100		} else {
22101			SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCRESET: "
22102			    "reset_level = 0x%lx\n", reset_level);
22103			if (scsi_reset(SD_ADDRESS(un), reset_level)) {
22104				err = 0;
22105			} else {
22106				err = EIO;
22107			}
22108		}
22109		break;
22110	}
22111
22112	case DKIOCABORT:
22113		SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCABORT:\n");
22114		if (scsi_abort(SD_ADDRESS(un), NULL)) {
22115			err = 0;
22116		} else {
22117			err = EIO;
22118		}
22119		break;
22120#endif
22121
22122#ifdef SD_FAULT_INJECTION
22123/* SDIOC FaultInjection testing ioctls */
22124	case SDIOCSTART:
22125	case SDIOCSTOP:
22126	case SDIOCINSERTPKT:
22127	case SDIOCINSERTXB:
22128	case SDIOCINSERTUN:
22129	case SDIOCINSERTARQ:
22130	case SDIOCPUSH:
22131	case SDIOCRETRIEVE:
22132	case SDIOCRUN:
22133		SD_INFO(SD_LOG_SDTEST, un, "sdioctl:"
22134		    "SDIOC detected cmd:0x%X:\n", cmd);
22135		/* call error generator */
22136		sd_faultinjection_ioctl(cmd, arg, un);
22137		err = 0;
22138		break;
22139
22140#endif /* SD_FAULT_INJECTION */
22141
22142	case DKIOCFLUSHWRITECACHE:
22143		{
22144			struct dk_callback *dkc = (struct dk_callback *)arg;
22145
22146			mutex_enter(SD_MUTEX(un));
22147			if (!un->un_f_sync_cache_supported ||
22148			    !un->un_f_write_cache_enabled) {
22149				err = un->un_f_sync_cache_supported ?
22150				    0 : ENOTSUP;
22151				mutex_exit(SD_MUTEX(un));
22152				if ((flag & FKIOCTL) && dkc != NULL &&
22153				    dkc->dkc_callback != NULL) {
22154					(*dkc->dkc_callback)(dkc->dkc_cookie,
22155					    err);
22156					/*
22157					 * Did callback and reported error.
22158					 * Since we did a callback, ioctl
22159					 * should return 0.
22160					 */
22161					err = 0;
22162				}
22163				break;
22164			}
22165			mutex_exit(SD_MUTEX(un));
22166
22167			if ((flag & FKIOCTL) && dkc != NULL &&
22168			    dkc->dkc_callback != NULL) {
22169				/* async SYNC CACHE request */
22170				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, dkc);
22171			} else {
22172				/* synchronous SYNC CACHE request */
22173				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, NULL);
22174			}
22175		}
22176		break;
22177
22178	case DKIOCGETWCE: {
22179
22180		int wce;
22181
22182		if ((err = sd_get_write_cache_enabled(ssc, &wce)) != 0) {
22183			break;
22184		}
22185
22186		if (ddi_copyout(&wce, (void *)arg, sizeof (wce), flag)) {
22187			err = EFAULT;
22188		}
22189		break;
22190	}
22191
22192	case DKIOCSETWCE: {
22193
22194		int wce, sync_supported;
22195
22196		if (ddi_copyin((void *)arg, &wce, sizeof (wce), flag)) {
22197			err = EFAULT;
22198			break;
22199		}
22200
22201		/*
22202		 * Synchronize multiple threads trying to enable
22203		 * or disable the cache via the un_f_wcc_cv
22204		 * condition variable.
22205		 */
22206		mutex_enter(SD_MUTEX(un));
22207
22208		/*
22209		 * Don't allow the cache to be enabled if the
22210		 * config file has it disabled.
22211		 */
22212		if (un->un_f_opt_disable_cache && wce) {
22213			mutex_exit(SD_MUTEX(un));
22214			err = EINVAL;
22215			break;
22216		}
22217
22218		/*
22219		 * Wait for write cache change in progress
22220		 * bit to be clear before proceeding.
22221		 */
22222		while (un->un_f_wcc_inprog)
22223			cv_wait(&un->un_wcc_cv, SD_MUTEX(un));
22224
22225		un->un_f_wcc_inprog = 1;
22226
22227		if (un->un_f_write_cache_enabled && wce == 0) {
22228			/*
22229			 * Disable the write cache.  Don't clear
22230			 * un_f_write_cache_enabled until after
22231			 * the mode select and flush are complete.
22232			 */
22233			sync_supported = un->un_f_sync_cache_supported;
22234
22235			/*
22236			 * If cache flush is suppressed, we assume that the
22237			 * controller firmware will take care of managing the
22238			 * write cache for us: no need to explicitly
22239			 * disable it.
22240			 */
22241			if (!un->un_f_suppress_cache_flush) {
22242				mutex_exit(SD_MUTEX(un));
22243				if ((err = sd_cache_control(ssc,
22244				    SD_CACHE_NOCHANGE,
22245				    SD_CACHE_DISABLE)) == 0 &&
22246				    sync_supported) {
22247					err = sd_send_scsi_SYNCHRONIZE_CACHE(un,
22248					    NULL);
22249				}
22250			} else {
22251				mutex_exit(SD_MUTEX(un));
22252			}
22253
22254			mutex_enter(SD_MUTEX(un));
22255			if (err == 0) {
22256				un->un_f_write_cache_enabled = 0;
22257			}
22258
22259		} else if (!un->un_f_write_cache_enabled && wce != 0) {
22260			/*
22261			 * Set un_f_write_cache_enabled first, so there is
22262			 * no window where the cache is enabled, but the
22263			 * bit says it isn't.
22264			 */
22265			un->un_f_write_cache_enabled = 1;
22266
22267			/*
22268			 * If cache flush is suppressed, we assume that the
22269			 * controller firmware will take care of managing the
22270			 * write cache for us: no need to explicitly
22271			 * enable it.
22272			 */
22273			if (!un->un_f_suppress_cache_flush) {
22274				mutex_exit(SD_MUTEX(un));
22275				err = sd_cache_control(ssc, SD_CACHE_NOCHANGE,
22276				    SD_CACHE_ENABLE);
22277			} else {
22278				mutex_exit(SD_MUTEX(un));
22279			}
22280
22281			mutex_enter(SD_MUTEX(un));
22282
22283			if (err) {
22284				un->un_f_write_cache_enabled = 0;
22285			}
22286		}
22287
22288		un->un_f_wcc_inprog = 0;
22289		cv_broadcast(&un->un_wcc_cv);
22290		mutex_exit(SD_MUTEX(un));
22291		break;
22292	}
22293
22294	default:
22295		err = ENOTTY;
22296		break;
22297	}
22298	mutex_enter(SD_MUTEX(un));
22299	un->un_ncmds_in_driver--;
22300	ASSERT(un->un_ncmds_in_driver >= 0);
22301	mutex_exit(SD_MUTEX(un));
22302
22303
22304done_without_assess:
22305	sd_ssc_fini(ssc);
22306
22307	SD_TRACE(SD_LOG_IOCTL, un, "sdioctl: exit: %d\n", err);
22308	return (err);
22309
22310done_with_assess:
22311	mutex_enter(SD_MUTEX(un));
22312	un->un_ncmds_in_driver--;
22313	ASSERT(un->un_ncmds_in_driver >= 0);
22314	mutex_exit(SD_MUTEX(un));
22315
22316done_quick_assess:
22317	if (err != 0)
22318		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
22319	/* Uninitialize sd_ssc_t pointer */
22320	sd_ssc_fini(ssc);
22321
22322	SD_TRACE(SD_LOG_IOCTL, un, "sdioctl: exit: %d\n", err);
22323	return (err);
22324}
22325
22326
22327/*
22328 *    Function: sd_dkio_ctrl_info
22329 *
22330 * Description: This routine is the driver entry point for handling controller
22331 *		information ioctl requests (DKIOCINFO).
22332 *
22333 *   Arguments: dev  - the device number
22334 *		arg  - pointer to user provided dk_cinfo structure
22335 *		       specifying the controller type and attributes.
22336 *		flag - this argument is a pass through to ddi_copyxxx()
22337 *		       directly from the mode argument of ioctl().
22338 *
22339 * Return Code: 0
22340 *		EFAULT
22341 *		ENXIO
22342 */
22343
22344static int
22345sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag)
22346{
22347	struct sd_lun	*un = NULL;
22348	struct dk_cinfo	*info;
22349	dev_info_t	*pdip;
22350	int		lun, tgt;
22351
22352	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22353		return (ENXIO);
22354	}
22355
22356	info = (struct dk_cinfo *)
22357	    kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP);
22358
22359	switch (un->un_ctype) {
22360	case CTYPE_CDROM:
22361		info->dki_ctype = DKC_CDROM;
22362		break;
22363	default:
22364		info->dki_ctype = DKC_SCSI_CCS;
22365		break;
22366	}
22367	pdip = ddi_get_parent(SD_DEVINFO(un));
22368	info->dki_cnum = ddi_get_instance(pdip);
22369	if (strlen(ddi_get_name(pdip)) < DK_DEVLEN) {
22370		(void) strcpy(info->dki_cname, ddi_get_name(pdip));
22371	} else {
22372		(void) strncpy(info->dki_cname, ddi_node_name(pdip),
22373		    DK_DEVLEN - 1);
22374	}
22375
22376	lun = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
22377	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_LUN, 0);
22378	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
22379	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_TARGET, 0);
22380
22381	/* Unit Information */
22382	info->dki_unit = ddi_get_instance(SD_DEVINFO(un));
22383	info->dki_slave = ((tgt << 3) | lun);
22384	(void) strncpy(info->dki_dname, ddi_driver_name(SD_DEVINFO(un)),
22385	    DK_DEVLEN - 1);
22386	info->dki_flags = DKI_FMTVOL;
22387	info->dki_partition = SDPART(dev);
22388
22389	/* Max Transfer size of this device in blocks */
22390	info->dki_maxtransfer = un->un_max_xfer_size / un->un_sys_blocksize;
22391	info->dki_addr = 0;
22392	info->dki_space = 0;
22393	info->dki_prio = 0;
22394	info->dki_vec = 0;
22395
22396	if (ddi_copyout(info, arg, sizeof (struct dk_cinfo), flag) != 0) {
22397		kmem_free(info, sizeof (struct dk_cinfo));
22398		return (EFAULT);
22399	} else {
22400		kmem_free(info, sizeof (struct dk_cinfo));
22401		return (0);
22402	}
22403}
22404
22405
22406/*
22407 *    Function: sd_get_media_info
22408 *
22409 * Description: This routine is the driver entry point for handling ioctl
22410 *		requests for the media type or command set profile used by the
22411 *		drive to operate on the media (DKIOCGMEDIAINFO).
22412 *
22413 *   Arguments: dev	- the device number
22414 *		arg	- pointer to user provided dk_minfo structure
22415 *			  specifying the media type, logical block size and
22416 *			  drive capacity.
22417 *		flag	- this argument is a pass through to ddi_copyxxx()
22418 *			  directly from the mode argument of ioctl().
22419 *
22420 * Return Code: 0
22421 *		EACCESS
22422 *		EFAULT
22423 *		ENXIO
22424 *		EIO
22425 */
22426
22427static int
22428sd_get_media_info(dev_t dev, caddr_t arg, int flag)
22429{
22430	struct sd_lun		*un = NULL;
22431	struct uscsi_cmd	com;
22432	struct scsi_inquiry	*sinq;
22433	struct dk_minfo		media_info;
22434	u_longlong_t		media_capacity;
22435	uint64_t		capacity;
22436	uint_t			lbasize;
22437	uchar_t			*out_data;
22438	uchar_t			*rqbuf;
22439	int			rval = 0;
22440	int			rtn;
22441	sd_ssc_t		*ssc;
22442	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
22443	    (un->un_state == SD_STATE_OFFLINE)) {
22444		return (ENXIO);
22445	}
22446
22447	SD_TRACE(SD_LOG_IOCTL_DKIO, un, "sd_get_media_info: entry\n");
22448
22449	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
22450	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
22451
22452	/* Issue a TUR to determine if the drive is ready with media present */
22453	ssc = sd_ssc_init(un);
22454	rval = sd_send_scsi_TEST_UNIT_READY(ssc, SD_CHECK_FOR_MEDIA);
22455	if (rval == ENXIO) {
22456		goto done;
22457	} else if (rval != 0) {
22458		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
22459	}
22460
22461	/* Now get configuration data */
22462	if (ISCD(un)) {
22463		media_info.dki_media_type = DK_CDROM;
22464
22465		/* Allow SCMD_GET_CONFIGURATION to MMC devices only */
22466		if (un->un_f_mmc_cap == TRUE) {
22467			rtn = sd_send_scsi_GET_CONFIGURATION(ssc, &com, rqbuf,
22468			    SENSE_LENGTH, out_data, SD_PROFILE_HEADER_LEN,
22469			    SD_PATH_STANDARD);
22470
22471			if (rtn) {
22472				/*
22473				 * We ignore all failures for CD and need to
22474				 * put the assessment before processing code
22475				 * to avoid missing assessment for FMA.
22476				 */
22477				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
22478				/*
22479				 * Failed for other than an illegal request
22480				 * or command not supported
22481				 */
22482				if ((com.uscsi_status == STATUS_CHECK) &&
22483				    (com.uscsi_rqstatus == STATUS_GOOD)) {
22484					if ((rqbuf[2] != KEY_ILLEGAL_REQUEST) ||
22485					    (rqbuf[12] != 0x20)) {
22486						rval = EIO;
22487						goto no_assessment;
22488					}
22489				}
22490			} else {
22491				/*
22492				 * The GET CONFIGURATION command succeeded
22493				 * so set the media type according to the
22494				 * returned data
22495				 */
22496				media_info.dki_media_type = out_data[6];
22497				media_info.dki_media_type <<= 8;
22498				media_info.dki_media_type |= out_data[7];
22499			}
22500		}
22501	} else {
22502		/*
22503		 * The profile list is not available, so we attempt to identify
22504		 * the media type based on the inquiry data
22505		 */
22506		sinq = un->un_sd->sd_inq;
22507		if ((sinq->inq_dtype == DTYPE_DIRECT) ||
22508		    (sinq->inq_dtype == DTYPE_OPTICAL)) {
22509			/* This is a direct access device  or optical disk */
22510			media_info.dki_media_type = DK_FIXED_DISK;
22511
22512			if ((bcmp(sinq->inq_vid, "IOMEGA", 6) == 0) ||
22513			    (bcmp(sinq->inq_vid, "iomega", 6) == 0)) {
22514				if ((bcmp(sinq->inq_pid, "ZIP", 3) == 0)) {
22515					media_info.dki_media_type = DK_ZIP;
22516				} else if (
22517				    (bcmp(sinq->inq_pid, "jaz", 3) == 0)) {
22518					media_info.dki_media_type = DK_JAZ;
22519				}
22520			}
22521		} else {
22522			/*
22523			 * Not a CD, direct access or optical disk so return
22524			 * unknown media
22525			 */
22526			media_info.dki_media_type = DK_UNKNOWN;
22527		}
22528	}
22529
22530	/* Now read the capacity so we can provide the lbasize and capacity */
22531	rval = sd_send_scsi_READ_CAPACITY(ssc, &capacity, &lbasize,
22532	    SD_PATH_DIRECT);
22533	switch (rval) {
22534	case 0:
22535		break;
22536	case EACCES:
22537		rval = EACCES;
22538		goto done;
22539	default:
22540		rval = EIO;
22541		goto done;
22542	}
22543
22544	/*
22545	 * If lun is expanded dynamically, update the un structure.
22546	 */
22547	mutex_enter(SD_MUTEX(un));
22548	if ((un->un_f_blockcount_is_valid == TRUE) &&
22549	    (un->un_f_tgt_blocksize_is_valid == TRUE) &&
22550	    (capacity > un->un_blockcount)) {
22551		sd_update_block_info(un, lbasize, capacity);
22552	}
22553	mutex_exit(SD_MUTEX(un));
22554
22555	media_info.dki_lbsize = lbasize;
22556	media_capacity = capacity;
22557
22558	/*
22559	 * sd_send_scsi_READ_CAPACITY() reports capacity in
22560	 * un->un_sys_blocksize chunks. So we need to convert it into
22561	 * cap.lbasize chunks.
22562	 */
22563	media_capacity *= un->un_sys_blocksize;
22564	media_capacity /= lbasize;
22565	media_info.dki_capacity = media_capacity;
22566
22567	if (ddi_copyout(&media_info, arg, sizeof (struct dk_minfo), flag)) {
22568		rval = EFAULT;
22569		/* Put goto. Anybody might add some code below in future */
22570		goto no_assessment;
22571	}
22572done:
22573	if (rval != 0) {
22574		if (rval == EIO)
22575			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
22576		else
22577			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
22578	}
22579no_assessment:
22580	sd_ssc_fini(ssc);
22581	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
22582	kmem_free(rqbuf, SENSE_LENGTH);
22583	return (rval);
22584}
22585
22586
22587/*
22588 *    Function: sd_check_media
22589 *
22590 * Description: This utility routine implements the functionality for the
22591 *		DKIOCSTATE ioctl. This ioctl blocks the user thread until the
22592 *		driver state changes from that specified by the user
22593 *		(inserted or ejected). For example, if the user specifies
22594 *		DKIO_EJECTED and the current media state is inserted this
22595 *		routine will immediately return DKIO_INSERTED. However, if the
22596 *		current media state is not inserted the user thread will be
22597 *		blocked until the drive state changes. If DKIO_NONE is specified
22598 *		the user thread will block until a drive state change occurs.
22599 *
22600 *   Arguments: dev  - the device number
22601 *		state  - user pointer to a dkio_state, updated with the current
22602 *			drive state at return.
22603 *
22604 * Return Code: ENXIO
22605 *		EIO
22606 *		EAGAIN
22607 *		EINTR
22608 */
22609
22610static int
22611sd_check_media(dev_t dev, enum dkio_state state)
22612{
22613	struct sd_lun		*un = NULL;
22614	enum dkio_state		prev_state;
22615	opaque_t		token = NULL;
22616	int			rval = 0;
22617	sd_ssc_t		*ssc;
22618	dev_t			sub_dev;
22619
22620	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22621		return (ENXIO);
22622	}
22623
22624	/*
22625	 * sub_dev is used when submitting request to scsi watch.
22626	 * All submissions are unified to use same device number.
22627	 */
22628	sub_dev = sd_make_device(SD_DEVINFO(un));
22629
22630	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: entry\n");
22631
22632	ssc = sd_ssc_init(un);
22633
22634	mutex_enter(SD_MUTEX(un));
22635
22636	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: "
22637	    "state=%x, mediastate=%x\n", state, un->un_mediastate);
22638
22639	prev_state = un->un_mediastate;
22640
22641	/* is there anything to do? */
22642	if (state == un->un_mediastate || un->un_mediastate == DKIO_NONE) {
22643		/*
22644		 * submit the request to the scsi_watch service;
22645		 * scsi_media_watch_cb() does the real work
22646		 */
22647		mutex_exit(SD_MUTEX(un));
22648
22649		/*
22650		 * This change handles the case where a scsi watch request is
22651		 * added to a device that is powered down. To accomplish this
22652		 * we power up the device before adding the scsi watch request,
22653		 * since the scsi watch sends a TUR directly to the device
22654		 * which the device cannot handle if it is powered down.
22655		 */
22656		if (sd_pm_entry(un) != DDI_SUCCESS) {
22657			mutex_enter(SD_MUTEX(un));
22658			goto done;
22659		}
22660
22661		token = scsi_watch_request_submit(SD_SCSI_DEVP(un),
22662		    sd_check_media_time, SENSE_LENGTH, sd_media_watch_cb,
22663		    (caddr_t)sub_dev);
22664
22665		sd_pm_exit(un);
22666
22667		mutex_enter(SD_MUTEX(un));
22668		if (token == NULL) {
22669			rval = EAGAIN;
22670			goto done;
22671		}
22672
22673		/*
22674		 * This is a special case IOCTL that doesn't return
22675		 * until the media state changes. Routine sdpower
22676		 * knows about and handles this so don't count it
22677		 * as an active cmd in the driver, which would
22678		 * keep the device busy to the pm framework.
22679		 * If the count isn't decremented the device can't
22680		 * be powered down.
22681		 */
22682		un->un_ncmds_in_driver--;
22683		ASSERT(un->un_ncmds_in_driver >= 0);
22684
22685		/*
22686		 * if a prior request had been made, this will be the same
22687		 * token, as scsi_watch was designed that way.
22688		 */
22689		un->un_swr_token = token;
22690		un->un_specified_mediastate = state;
22691
22692		/*
22693		 * now wait for media change
22694		 * we will not be signalled unless mediastate == state but it is
22695		 * still better to test for this condition, since there is a
22696		 * 2 sec cv_broadcast delay when mediastate == DKIO_INSERTED
22697		 */
22698		SD_TRACE(SD_LOG_COMMON, un,
22699		    "sd_check_media: waiting for media state change\n");
22700		while (un->un_mediastate == state) {
22701			if (cv_wait_sig(&un->un_state_cv, SD_MUTEX(un)) == 0) {
22702				SD_TRACE(SD_LOG_COMMON, un,
22703				    "sd_check_media: waiting for media state "
22704				    "was interrupted\n");
22705				un->un_ncmds_in_driver++;
22706				rval = EINTR;
22707				goto done;
22708			}
22709			SD_TRACE(SD_LOG_COMMON, un,
22710			    "sd_check_media: received signal, state=%x\n",
22711			    un->un_mediastate);
22712		}
22713		/*
22714		 * Inc the counter to indicate the device once again
22715		 * has an active outstanding cmd.
22716		 */
22717		un->un_ncmds_in_driver++;
22718	}
22719
22720	/* invalidate geometry */
22721	if (prev_state == DKIO_INSERTED && un->un_mediastate == DKIO_EJECTED) {
22722		sr_ejected(un);
22723	}
22724
22725	if (un->un_mediastate == DKIO_INSERTED && prev_state != DKIO_INSERTED) {
22726		uint64_t	capacity;
22727		uint_t		lbasize;
22728
22729		SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: media inserted\n");
22730		mutex_exit(SD_MUTEX(un));
22731		/*
22732		 * Since the following routines use SD_PATH_DIRECT, we must
22733		 * call PM directly before the upcoming disk accesses. This
22734		 * may cause the disk to be power/spin up.
22735		 */
22736
22737		if (sd_pm_entry(un) == DDI_SUCCESS) {
22738			rval = sd_send_scsi_READ_CAPACITY(ssc,
22739			    &capacity, &lbasize, SD_PATH_DIRECT);
22740			if (rval != 0) {
22741				sd_pm_exit(un);
22742				if (rval == EIO)
22743					sd_ssc_assessment(ssc,
22744					    SD_FMT_STATUS_CHECK);
22745				else
22746					sd_ssc_assessment(ssc, SD_FMT_IGNORE);
22747				mutex_enter(SD_MUTEX(un));
22748				goto done;
22749			}
22750		} else {
22751			rval = EIO;
22752			mutex_enter(SD_MUTEX(un));
22753			goto done;
22754		}
22755		mutex_enter(SD_MUTEX(un));
22756
22757		sd_update_block_info(un, lbasize, capacity);
22758
22759		/*
22760		 *  Check if the media in the device is writable or not
22761		 */
22762		if (ISCD(un)) {
22763			sd_check_for_writable_cd(ssc, SD_PATH_DIRECT);
22764		}
22765
22766		mutex_exit(SD_MUTEX(un));
22767		cmlb_invalidate(un->un_cmlbhandle, (void *)SD_PATH_DIRECT);
22768		if ((cmlb_validate(un->un_cmlbhandle, 0,
22769		    (void *)SD_PATH_DIRECT) == 0) && un->un_f_pkstats_enabled) {
22770			sd_set_pstats(un);
22771			SD_TRACE(SD_LOG_IO_PARTITION, un,
22772			    "sd_check_media: un:0x%p pstats created and "
22773			    "set\n", un);
22774		}
22775
22776		rval = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_PREVENT,
22777		    SD_PATH_DIRECT);
22778
22779		sd_pm_exit(un);
22780
22781		if (rval != 0) {
22782			if (rval == EIO)
22783				sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
22784			else
22785				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
22786		}
22787
22788		mutex_enter(SD_MUTEX(un));
22789	}
22790done:
22791	sd_ssc_fini(ssc);
22792	un->un_f_watcht_stopped = FALSE;
22793	if (token != NULL && un->un_swr_token != NULL) {
22794		/*
22795		 * Use of this local token and the mutex ensures that we avoid
22796		 * some race conditions associated with terminating the
22797		 * scsi watch.
22798		 */
22799		token = un->un_swr_token;
22800		mutex_exit(SD_MUTEX(un));
22801		(void) scsi_watch_request_terminate(token,
22802		    SCSI_WATCH_TERMINATE_WAIT);
22803		if (scsi_watch_get_ref_count(token) == 0) {
22804			mutex_enter(SD_MUTEX(un));
22805			un->un_swr_token = (opaque_t)NULL;
22806		} else {
22807			mutex_enter(SD_MUTEX(un));
22808		}
22809	}
22810
22811	/*
22812	 * Update the capacity kstat value, if no media previously
22813	 * (capacity kstat is 0) and a media has been inserted
22814	 * (un_f_blockcount_is_valid == TRUE)
22815	 */
22816	if (un->un_errstats) {
22817		struct sd_errstats	*stp = NULL;
22818
22819		stp = (struct sd_errstats *)un->un_errstats->ks_data;
22820		if ((stp->sd_capacity.value.ui64 == 0) &&
22821		    (un->un_f_blockcount_is_valid == TRUE)) {
22822			stp->sd_capacity.value.ui64 =
22823			    (uint64_t)((uint64_t)un->un_blockcount *
22824			    un->un_sys_blocksize);
22825		}
22826	}
22827	mutex_exit(SD_MUTEX(un));
22828	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: done\n");
22829	return (rval);
22830}
22831
22832
22833/*
22834 *    Function: sd_delayed_cv_broadcast
22835 *
22836 * Description: Delayed cv_broadcast to allow for target to recover from media
22837 *		insertion.
22838 *
22839 *   Arguments: arg - driver soft state (unit) structure
22840 */
22841
22842static void
22843sd_delayed_cv_broadcast(void *arg)
22844{
22845	struct sd_lun *un = arg;
22846
22847	SD_TRACE(SD_LOG_COMMON, un, "sd_delayed_cv_broadcast\n");
22848
22849	mutex_enter(SD_MUTEX(un));
22850	un->un_dcvb_timeid = NULL;
22851	cv_broadcast(&un->un_state_cv);
22852	mutex_exit(SD_MUTEX(un));
22853}
22854
22855
22856/*
22857 *    Function: sd_media_watch_cb
22858 *
22859 * Description: Callback routine used for support of the DKIOCSTATE ioctl. This
22860 *		routine processes the TUR sense data and updates the driver
22861 *		state if a transition has occurred. The user thread
22862 *		(sd_check_media) is then signalled.
22863 *
22864 *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
22865 *			among multiple watches that share this callback function
22866 *		resultp - scsi watch facility result packet containing scsi
22867 *			  packet, status byte and sense data
22868 *
22869 * Return Code: 0 for success, -1 for failure
22870 */
22871
22872static int
22873sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
22874{
22875	struct sd_lun			*un;
22876	struct scsi_status		*statusp = resultp->statusp;
22877	uint8_t				*sensep = (uint8_t *)resultp->sensep;
22878	enum dkio_state			state = DKIO_NONE;
22879	dev_t				dev = (dev_t)arg;
22880	uchar_t				actual_sense_length;
22881	uint8_t				skey, asc, ascq;
22882
22883	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22884		return (-1);
22885	}
22886	actual_sense_length = resultp->actual_sense_length;
22887
22888	mutex_enter(SD_MUTEX(un));
22889	SD_TRACE(SD_LOG_COMMON, un,
22890	    "sd_media_watch_cb: status=%x, sensep=%p, len=%x\n",
22891	    *((char *)statusp), (void *)sensep, actual_sense_length);
22892
22893	if (resultp->pkt->pkt_reason == CMD_DEV_GONE) {
22894		un->un_mediastate = DKIO_DEV_GONE;
22895		cv_broadcast(&un->un_state_cv);
22896		mutex_exit(SD_MUTEX(un));
22897
22898		return (0);
22899	}
22900
22901	/*
22902	 * If there was a check condition then sensep points to valid sense data
22903	 * If status was not a check condition but a reservation or busy status
22904	 * then the new state is DKIO_NONE
22905	 */
22906	if (sensep != NULL) {
22907		skey = scsi_sense_key(sensep);
22908		asc = scsi_sense_asc(sensep);
22909		ascq = scsi_sense_ascq(sensep);
22910
22911		SD_INFO(SD_LOG_COMMON, un,
22912		    "sd_media_watch_cb: sense KEY=%x, ASC=%x, ASCQ=%x\n",
22913		    skey, asc, ascq);
22914		/* This routine only uses up to 13 bytes of sense data. */
22915		if (actual_sense_length >= 13) {
22916			if (skey == KEY_UNIT_ATTENTION) {
22917				if (asc == 0x28) {
22918					state = DKIO_INSERTED;
22919				}
22920			} else if (skey == KEY_NOT_READY) {
22921				/*
22922				 * Sense data of 02/06/00 means that the
22923				 * drive could not read the media (No
22924				 * reference position found). In this case
22925				 * to prevent a hang on the DKIOCSTATE IOCTL
22926				 * we set the media state to DKIO_INSERTED.
22927				 */
22928				if (asc == 0x06 && ascq == 0x00)
22929					state = DKIO_INSERTED;
22930
22931				/*
22932				 * if 02/04/02  means that the host
22933				 * should send start command. Explicitly
22934				 * leave the media state as is
22935				 * (inserted) as the media is inserted
22936				 * and host has stopped device for PM
22937				 * reasons. Upon next true read/write
22938				 * to this media will bring the
22939				 * device to the right state good for
22940				 * media access.
22941				 */
22942				if (asc == 0x3a) {
22943					state = DKIO_EJECTED;
22944				} else {
22945					/*
22946					 * If the drive is busy with an
22947					 * operation or long write, keep the
22948					 * media in an inserted state.
22949					 */
22950
22951					if ((asc == 0x04) &&
22952					    ((ascq == 0x02) ||
22953					    (ascq == 0x07) ||
22954					    (ascq == 0x08))) {
22955						state = DKIO_INSERTED;
22956					}
22957				}
22958			} else if (skey == KEY_NO_SENSE) {
22959				if ((asc == 0x00) && (ascq == 0x00)) {
22960					/*
22961					 * Sense Data 00/00/00 does not provide
22962					 * any information about the state of
22963					 * the media. Ignore it.
22964					 */
22965					mutex_exit(SD_MUTEX(un));
22966					return (0);
22967				}
22968			}
22969		}
22970	} else if ((*((char *)statusp) == STATUS_GOOD) &&
22971	    (resultp->pkt->pkt_reason == CMD_CMPLT)) {
22972		state = DKIO_INSERTED;
22973	}
22974
22975	SD_TRACE(SD_LOG_COMMON, un,
22976	    "sd_media_watch_cb: state=%x, specified=%x\n",
22977	    state, un->un_specified_mediastate);
22978
22979	/*
22980	 * now signal the waiting thread if this is *not* the specified state;
22981	 * delay the signal if the state is DKIO_INSERTED to allow the target
22982	 * to recover
22983	 */
22984	if (state != un->un_specified_mediastate) {
22985		un->un_mediastate = state;
22986		if (state == DKIO_INSERTED) {
22987			/*
22988			 * delay the signal to give the drive a chance
22989			 * to do what it apparently needs to do
22990			 */
22991			SD_TRACE(SD_LOG_COMMON, un,
22992			    "sd_media_watch_cb: delayed cv_broadcast\n");
22993			if (un->un_dcvb_timeid == NULL) {
22994				un->un_dcvb_timeid =
22995				    timeout(sd_delayed_cv_broadcast, un,
22996				    drv_usectohz((clock_t)MEDIA_ACCESS_DELAY));
22997			}
22998		} else {
22999			SD_TRACE(SD_LOG_COMMON, un,
23000			    "sd_media_watch_cb: immediate cv_broadcast\n");
23001			cv_broadcast(&un->un_state_cv);
23002		}
23003	}
23004	mutex_exit(SD_MUTEX(un));
23005	return (0);
23006}
23007
23008
23009/*
23010 *    Function: sd_dkio_get_temp
23011 *
23012 * Description: This routine is the driver entry point for handling ioctl
23013 *		requests to get the disk temperature.
23014 *
23015 *   Arguments: dev  - the device number
23016 *		arg  - pointer to user provided dk_temperature structure.
23017 *		flag - this argument is a pass through to ddi_copyxxx()
23018 *		       directly from the mode argument of ioctl().
23019 *
23020 * Return Code: 0
23021 *		EFAULT
23022 *		ENXIO
23023 *		EAGAIN
23024 */
23025
23026static int
23027sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag)
23028{
23029	struct sd_lun		*un = NULL;
23030	struct dk_temperature	*dktemp = NULL;
23031	uchar_t			*temperature_page;
23032	int			rval = 0;
23033	int			path_flag = SD_PATH_STANDARD;
23034	sd_ssc_t		*ssc;
23035
23036	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23037		return (ENXIO);
23038	}
23039
23040	ssc = sd_ssc_init(un);
23041	dktemp = kmem_zalloc(sizeof (struct dk_temperature), KM_SLEEP);
23042
23043	/* copyin the disk temp argument to get the user flags */
23044	if (ddi_copyin((void *)arg, dktemp,
23045	    sizeof (struct dk_temperature), flag) != 0) {
23046		rval = EFAULT;
23047		goto done;
23048	}
23049
23050	/* Initialize the temperature to invalid. */
23051	dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
23052	dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
23053
23054	/*
23055	 * Note: Investigate removing the "bypass pm" semantic.
23056	 * Can we just bypass PM always?
23057	 */
23058	if (dktemp->dkt_flags & DKT_BYPASS_PM) {
23059		path_flag = SD_PATH_DIRECT;
23060		ASSERT(!mutex_owned(&un->un_pm_mutex));
23061		mutex_enter(&un->un_pm_mutex);
23062		if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
23063			/*
23064			 * If DKT_BYPASS_PM is set, and the drive happens to be
23065			 * in low power mode, we can not wake it up, Need to
23066			 * return EAGAIN.
23067			 */
23068			mutex_exit(&un->un_pm_mutex);
23069			rval = EAGAIN;
23070			goto done;
23071		} else {
23072			/*
23073			 * Indicate to PM the device is busy. This is required
23074			 * to avoid a race - i.e. the ioctl is issuing a
23075			 * command and the pm framework brings down the device
23076			 * to low power mode (possible power cut-off on some
23077			 * platforms).
23078			 */
23079			mutex_exit(&un->un_pm_mutex);
23080			if (sd_pm_entry(un) != DDI_SUCCESS) {
23081				rval = EAGAIN;
23082				goto done;
23083			}
23084		}
23085	}
23086
23087	temperature_page = kmem_zalloc(TEMPERATURE_PAGE_SIZE, KM_SLEEP);
23088
23089	rval = sd_send_scsi_LOG_SENSE(ssc, temperature_page,
23090	    TEMPERATURE_PAGE_SIZE, TEMPERATURE_PAGE, 1, 0, path_flag);
23091	if (rval != 0)
23092		goto done2;
23093
23094	/*
23095	 * For the current temperature verify that the parameter length is 0x02
23096	 * and the parameter code is 0x00
23097	 */
23098	if ((temperature_page[7] == 0x02) && (temperature_page[4] == 0x00) &&
23099	    (temperature_page[5] == 0x00)) {
23100		if (temperature_page[9] == 0xFF) {
23101			dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
23102		} else {
23103			dktemp->dkt_cur_temp = (short)(temperature_page[9]);
23104		}
23105	}
23106
23107	/*
23108	 * For the reference temperature verify that the parameter
23109	 * length is 0x02 and the parameter code is 0x01
23110	 */
23111	if ((temperature_page[13] == 0x02) && (temperature_page[10] == 0x00) &&
23112	    (temperature_page[11] == 0x01)) {
23113		if (temperature_page[15] == 0xFF) {
23114			dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
23115		} else {
23116			dktemp->dkt_ref_temp = (short)(temperature_page[15]);
23117		}
23118	}
23119
23120	/* Do the copyout regardless of the temperature commands status. */
23121	if (ddi_copyout(dktemp, (void *)arg, sizeof (struct dk_temperature),
23122	    flag) != 0) {
23123		rval = EFAULT;
23124		goto done1;
23125	}
23126
23127done2:
23128	if (rval != 0) {
23129		if (rval == EIO)
23130			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
23131		else
23132			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
23133	}
23134done1:
23135	if (path_flag == SD_PATH_DIRECT) {
23136		sd_pm_exit(un);
23137	}
23138
23139	kmem_free(temperature_page, TEMPERATURE_PAGE_SIZE);
23140done:
23141	sd_ssc_fini(ssc);
23142	if (dktemp != NULL) {
23143		kmem_free(dktemp, sizeof (struct dk_temperature));
23144	}
23145
23146	return (rval);
23147}
23148
23149
23150/*
23151 *    Function: sd_log_page_supported
23152 *
23153 * Description: This routine uses sd_send_scsi_LOG_SENSE to find the list of
23154 *		supported log pages.
23155 *
23156 *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
23157 *                      structure for this target.
23158 *		log_page -
23159 *
23160 * Return Code: -1 - on error (log sense is optional and may not be supported).
23161 *		0  - log page not found.
23162 *  		1  - log page found.
23163 */
23164
23165static int
23166sd_log_page_supported(sd_ssc_t *ssc, int log_page)
23167{
23168	uchar_t *log_page_data;
23169	int	i;
23170	int	match = 0;
23171	int	log_size;
23172	int	status = 0;
23173	struct sd_lun	*un;
23174
23175	ASSERT(ssc != NULL);
23176	un = ssc->ssc_un;
23177	ASSERT(un != NULL);
23178
23179	log_page_data = kmem_zalloc(0xFF, KM_SLEEP);
23180
23181	status = sd_send_scsi_LOG_SENSE(ssc, log_page_data, 0xFF, 0, 0x01, 0,
23182	    SD_PATH_DIRECT);
23183
23184	if (status != 0) {
23185		if (status == EIO) {
23186			/*
23187			 * Some disks do not support log sense, we
23188			 * should ignore this kind of error(sense key is
23189			 * 0x5 - illegal request).
23190			 */
23191			uint8_t *sensep;
23192			int senlen;
23193
23194			sensep = (uint8_t *)ssc->ssc_uscsi_cmd->uscsi_rqbuf;
23195			senlen = (int)(ssc->ssc_uscsi_cmd->uscsi_rqlen -
23196			    ssc->ssc_uscsi_cmd->uscsi_rqresid);
23197
23198			if (senlen > 0 &&
23199			    scsi_sense_key(sensep) == KEY_ILLEGAL_REQUEST) {
23200				sd_ssc_assessment(ssc,
23201				    SD_FMT_IGNORE_COMPROMISE);
23202			} else {
23203				sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
23204			}
23205		} else {
23206			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
23207		}
23208
23209		SD_ERROR(SD_LOG_COMMON, un,
23210		    "sd_log_page_supported: failed log page retrieval\n");
23211		kmem_free(log_page_data, 0xFF);
23212		return (-1);
23213	}
23214
23215	log_size = log_page_data[3];
23216
23217	/*
23218	 * The list of supported log pages start from the fourth byte. Check
23219	 * until we run out of log pages or a match is found.
23220	 */
23221	for (i = 4; (i < (log_size + 4)) && !match; i++) {
23222		if (log_page_data[i] == log_page) {
23223			match++;
23224		}
23225	}
23226	kmem_free(log_page_data, 0xFF);
23227	return (match);
23228}
23229
23230
23231/*
23232 *    Function: sd_mhdioc_failfast
23233 *
23234 * Description: This routine is the driver entry point for handling ioctl
23235 *		requests to enable/disable the multihost failfast option.
23236 *		(MHIOCENFAILFAST)
23237 *
23238 *   Arguments: dev	- the device number
23239 *		arg	- user specified probing interval.
23240 *		flag	- this argument is a pass through to ddi_copyxxx()
23241 *			  directly from the mode argument of ioctl().
23242 *
23243 * Return Code: 0
23244 *		EFAULT
23245 *		ENXIO
23246 */
23247
23248static int
23249sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag)
23250{
23251	struct sd_lun	*un = NULL;
23252	int		mh_time;
23253	int		rval = 0;
23254
23255	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23256		return (ENXIO);
23257	}
23258
23259	if (ddi_copyin((void *)arg, &mh_time, sizeof (int), flag))
23260		return (EFAULT);
23261
23262	if (mh_time) {
23263		mutex_enter(SD_MUTEX(un));
23264		un->un_resvd_status |= SD_FAILFAST;
23265		mutex_exit(SD_MUTEX(un));
23266		/*
23267		 * If mh_time is INT_MAX, then this ioctl is being used for
23268		 * SCSI-3 PGR purposes, and we don't need to spawn watch thread.
23269		 */
23270		if (mh_time != INT_MAX) {
23271			rval = sd_check_mhd(dev, mh_time);
23272		}
23273	} else {
23274		(void) sd_check_mhd(dev, 0);
23275		mutex_enter(SD_MUTEX(un));
23276		un->un_resvd_status &= ~SD_FAILFAST;
23277		mutex_exit(SD_MUTEX(un));
23278	}
23279	return (rval);
23280}
23281
23282
23283/*
23284 *    Function: sd_mhdioc_takeown
23285 *
23286 * Description: This routine is the driver entry point for handling ioctl
23287 *		requests to forcefully acquire exclusive access rights to the
23288 *		multihost disk (MHIOCTKOWN).
23289 *
23290 *   Arguments: dev	- the device number
23291 *		arg	- user provided structure specifying the delay
23292 *			  parameters in milliseconds
23293 *		flag	- this argument is a pass through to ddi_copyxxx()
23294 *			  directly from the mode argument of ioctl().
23295 *
23296 * Return Code: 0
23297 *		EFAULT
23298 *		ENXIO
23299 */
23300
23301static int
23302sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag)
23303{
23304	struct sd_lun		*un = NULL;
23305	struct mhioctkown	*tkown = NULL;
23306	int			rval = 0;
23307
23308	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23309		return (ENXIO);
23310	}
23311
23312	if (arg != NULL) {
23313		tkown = (struct mhioctkown *)
23314		    kmem_zalloc(sizeof (struct mhioctkown), KM_SLEEP);
23315		rval = ddi_copyin(arg, tkown, sizeof (struct mhioctkown), flag);
23316		if (rval != 0) {
23317			rval = EFAULT;
23318			goto error;
23319		}
23320	}
23321
23322	rval = sd_take_ownership(dev, tkown);
23323	mutex_enter(SD_MUTEX(un));
23324	if (rval == 0) {
23325		un->un_resvd_status |= SD_RESERVE;
23326		if (tkown != NULL && tkown->reinstate_resv_delay != 0) {
23327			sd_reinstate_resv_delay =
23328			    tkown->reinstate_resv_delay * 1000;
23329		} else {
23330			sd_reinstate_resv_delay = SD_REINSTATE_RESV_DELAY;
23331		}
23332		/*
23333		 * Give the scsi_watch routine interval set by
23334		 * the MHIOCENFAILFAST ioctl precedence here.
23335		 */
23336		if ((un->un_resvd_status & SD_FAILFAST) == 0) {
23337			mutex_exit(SD_MUTEX(un));
23338			(void) sd_check_mhd(dev, sd_reinstate_resv_delay/1000);
23339			SD_TRACE(SD_LOG_IOCTL_MHD, un,
23340			    "sd_mhdioc_takeown : %d\n",
23341			    sd_reinstate_resv_delay);
23342		} else {
23343			mutex_exit(SD_MUTEX(un));
23344		}
23345		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_NOTIFY,
23346		    sd_mhd_reset_notify_cb, (caddr_t)un);
23347	} else {
23348		un->un_resvd_status &= ~SD_RESERVE;
23349		mutex_exit(SD_MUTEX(un));
23350	}
23351
23352error:
23353	if (tkown != NULL) {
23354		kmem_free(tkown, sizeof (struct mhioctkown));
23355	}
23356	return (rval);
23357}
23358
23359
23360/*
23361 *    Function: sd_mhdioc_release
23362 *
23363 * Description: This routine is the driver entry point for handling ioctl
23364 *		requests to release exclusive access rights to the multihost
23365 *		disk (MHIOCRELEASE).
23366 *
23367 *   Arguments: dev	- the device number
23368 *
23369 * Return Code: 0
23370 *		ENXIO
23371 */
23372
23373static int
23374sd_mhdioc_release(dev_t dev)
23375{
23376	struct sd_lun		*un = NULL;
23377	timeout_id_t		resvd_timeid_save;
23378	int			resvd_status_save;
23379	int			rval = 0;
23380
23381	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23382		return (ENXIO);
23383	}
23384
23385	mutex_enter(SD_MUTEX(un));
23386	resvd_status_save = un->un_resvd_status;
23387	un->un_resvd_status &=
23388	    ~(SD_RESERVE | SD_LOST_RESERVE | SD_WANT_RESERVE);
23389	if (un->un_resvd_timeid) {
23390		resvd_timeid_save = un->un_resvd_timeid;
23391		un->un_resvd_timeid = NULL;
23392		mutex_exit(SD_MUTEX(un));
23393		(void) untimeout(resvd_timeid_save);
23394	} else {
23395		mutex_exit(SD_MUTEX(un));
23396	}
23397
23398	/*
23399	 * destroy any pending timeout thread that may be attempting to
23400	 * reinstate reservation on this device.
23401	 */
23402	sd_rmv_resv_reclaim_req(dev);
23403
23404	if ((rval = sd_reserve_release(dev, SD_RELEASE)) == 0) {
23405		mutex_enter(SD_MUTEX(un));
23406		if ((un->un_mhd_token) &&
23407		    ((un->un_resvd_status & SD_FAILFAST) == 0)) {
23408			mutex_exit(SD_MUTEX(un));
23409			(void) sd_check_mhd(dev, 0);
23410		} else {
23411			mutex_exit(SD_MUTEX(un));
23412		}
23413		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
23414		    sd_mhd_reset_notify_cb, (caddr_t)un);
23415	} else {
23416		/*
23417		 * sd_mhd_watch_cb will restart the resvd recover timeout thread
23418		 */
23419		mutex_enter(SD_MUTEX(un));
23420		un->un_resvd_status = resvd_status_save;
23421		mutex_exit(SD_MUTEX(un));
23422	}
23423	return (rval);
23424}
23425
23426
23427/*
23428 *    Function: sd_mhdioc_register_devid
23429 *
23430 * Description: This routine is the driver entry point for handling ioctl
23431 *		requests to register the device id (MHIOCREREGISTERDEVID).
23432 *
23433 *		Note: The implementation for this ioctl has been updated to
23434 *		be consistent with the original PSARC case (1999/357)
23435 *		(4375899, 4241671, 4220005)
23436 *
23437 *   Arguments: dev	- the device number
23438 *
23439 * Return Code: 0
23440 *		ENXIO
23441 */
23442
23443static int
23444sd_mhdioc_register_devid(dev_t dev)
23445{
23446	struct sd_lun	*un = NULL;
23447	int		rval = 0;
23448	sd_ssc_t	*ssc;
23449
23450	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23451		return (ENXIO);
23452	}
23453
23454	ASSERT(!mutex_owned(SD_MUTEX(un)));
23455
23456	mutex_enter(SD_MUTEX(un));
23457
23458	/* If a devid already exists, de-register it */
23459	if (un->un_devid != NULL) {
23460		ddi_devid_unregister(SD_DEVINFO(un));
23461		/*
23462		 * After unregister devid, needs to free devid memory
23463		 */
23464		ddi_devid_free(un->un_devid);
23465		un->un_devid = NULL;
23466	}
23467
23468	/* Check for reservation conflict */
23469	mutex_exit(SD_MUTEX(un));
23470	ssc = sd_ssc_init(un);
23471	rval = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
23472	mutex_enter(SD_MUTEX(un));
23473
23474	switch (rval) {
23475	case 0:
23476		sd_register_devid(ssc, SD_DEVINFO(un), SD_TARGET_IS_UNRESERVED);
23477		break;
23478	case EACCES:
23479		break;
23480	default:
23481		rval = EIO;
23482	}
23483
23484	mutex_exit(SD_MUTEX(un));
23485	if (rval != 0) {
23486		if (rval == EIO)
23487			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
23488		else
23489			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
23490	}
23491	sd_ssc_fini(ssc);
23492	return (rval);
23493}
23494
23495
23496/*
23497 *    Function: sd_mhdioc_inkeys
23498 *
23499 * Description: This routine is the driver entry point for handling ioctl
23500 *		requests to issue the SCSI-3 Persistent In Read Keys command
23501 *		to the device (MHIOCGRP_INKEYS).
23502 *
23503 *   Arguments: dev	- the device number
23504 *		arg	- user provided in_keys structure
23505 *		flag	- this argument is a pass through to ddi_copyxxx()
23506 *			  directly from the mode argument of ioctl().
23507 *
23508 * Return Code: code returned by sd_persistent_reservation_in_read_keys()
23509 *		ENXIO
23510 *		EFAULT
23511 */
23512
23513static int
23514sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag)
23515{
23516	struct sd_lun		*un;
23517	mhioc_inkeys_t		inkeys;
23518	int			rval = 0;
23519
23520	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23521		return (ENXIO);
23522	}
23523
23524#ifdef _MULTI_DATAMODEL
23525	switch (ddi_model_convert_from(flag & FMODELS)) {
23526	case DDI_MODEL_ILP32: {
23527		struct mhioc_inkeys32	inkeys32;
23528
23529		if (ddi_copyin(arg, &inkeys32,
23530		    sizeof (struct mhioc_inkeys32), flag) != 0) {
23531			return (EFAULT);
23532		}
23533		inkeys.li = (mhioc_key_list_t *)(uintptr_t)inkeys32.li;
23534		if ((rval = sd_persistent_reservation_in_read_keys(un,
23535		    &inkeys, flag)) != 0) {
23536			return (rval);
23537		}
23538		inkeys32.generation = inkeys.generation;
23539		if (ddi_copyout(&inkeys32, arg, sizeof (struct mhioc_inkeys32),
23540		    flag) != 0) {
23541			return (EFAULT);
23542		}
23543		break;
23544	}
23545	case DDI_MODEL_NONE:
23546		if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t),
23547		    flag) != 0) {
23548			return (EFAULT);
23549		}
23550		if ((rval = sd_persistent_reservation_in_read_keys(un,
23551		    &inkeys, flag)) != 0) {
23552			return (rval);
23553		}
23554		if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t),
23555		    flag) != 0) {
23556			return (EFAULT);
23557		}
23558		break;
23559	}
23560
23561#else /* ! _MULTI_DATAMODEL */
23562
23563	if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t), flag) != 0) {
23564		return (EFAULT);
23565	}
23566	rval = sd_persistent_reservation_in_read_keys(un, &inkeys, flag);
23567	if (rval != 0) {
23568		return (rval);
23569	}
23570	if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t), flag) != 0) {
23571		return (EFAULT);
23572	}
23573
23574#endif /* _MULTI_DATAMODEL */
23575
23576	return (rval);
23577}
23578
23579
23580/*
23581 *    Function: sd_mhdioc_inresv
23582 *
23583 * Description: This routine is the driver entry point for handling ioctl
23584 *		requests to issue the SCSI-3 Persistent In Read Reservations
23585 *		command to the device (MHIOCGRP_INKEYS).
23586 *
23587 *   Arguments: dev	- the device number
23588 *		arg	- user provided in_resv structure
23589 *		flag	- this argument is a pass through to ddi_copyxxx()
23590 *			  directly from the mode argument of ioctl().
23591 *
23592 * Return Code: code returned by sd_persistent_reservation_in_read_resv()
23593 *		ENXIO
23594 *		EFAULT
23595 */
23596
23597static int
23598sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag)
23599{
23600	struct sd_lun		*un;
23601	mhioc_inresvs_t		inresvs;
23602	int			rval = 0;
23603
23604	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23605		return (ENXIO);
23606	}
23607
23608#ifdef _MULTI_DATAMODEL
23609
23610	switch (ddi_model_convert_from(flag & FMODELS)) {
23611	case DDI_MODEL_ILP32: {
23612		struct mhioc_inresvs32	inresvs32;
23613
23614		if (ddi_copyin(arg, &inresvs32,
23615		    sizeof (struct mhioc_inresvs32), flag) != 0) {
23616			return (EFAULT);
23617		}
23618		inresvs.li = (mhioc_resv_desc_list_t *)(uintptr_t)inresvs32.li;
23619		if ((rval = sd_persistent_reservation_in_read_resv(un,
23620		    &inresvs, flag)) != 0) {
23621			return (rval);
23622		}
23623		inresvs32.generation = inresvs.generation;
23624		if (ddi_copyout(&inresvs32, arg,
23625		    sizeof (struct mhioc_inresvs32), flag) != 0) {
23626			return (EFAULT);
23627		}
23628		break;
23629	}
23630	case DDI_MODEL_NONE:
23631		if (ddi_copyin(arg, &inresvs,
23632		    sizeof (mhioc_inresvs_t), flag) != 0) {
23633			return (EFAULT);
23634		}
23635		if ((rval = sd_persistent_reservation_in_read_resv(un,
23636		    &inresvs, flag)) != 0) {
23637			return (rval);
23638		}
23639		if (ddi_copyout(&inresvs, arg,
23640		    sizeof (mhioc_inresvs_t), flag) != 0) {
23641			return (EFAULT);
23642		}
23643		break;
23644	}
23645
23646#else /* ! _MULTI_DATAMODEL */
23647
23648	if (ddi_copyin(arg, &inresvs, sizeof (mhioc_inresvs_t), flag) != 0) {
23649		return (EFAULT);
23650	}
23651	rval = sd_persistent_reservation_in_read_resv(un, &inresvs, flag);
23652	if (rval != 0) {
23653		return (rval);
23654	}
23655	if (ddi_copyout(&inresvs, arg, sizeof (mhioc_inresvs_t), flag)) {
23656		return (EFAULT);
23657	}
23658
23659#endif /* ! _MULTI_DATAMODEL */
23660
23661	return (rval);
23662}
23663
23664
23665/*
23666 * The following routines support the clustering functionality described below
23667 * and implement lost reservation reclaim functionality.
23668 *
23669 * Clustering
23670 * ----------
23671 * The clustering code uses two different, independent forms of SCSI
23672 * reservation. Traditional SCSI-2 Reserve/Release and the newer SCSI-3
23673 * Persistent Group Reservations. For any particular disk, it will use either
23674 * SCSI-2 or SCSI-3 PGR but never both at the same time for the same disk.
23675 *
23676 * SCSI-2
23677 * The cluster software takes ownership of a multi-hosted disk by issuing the
23678 * MHIOCTKOWN ioctl to the disk driver. It releases ownership by issuing the
23679 * MHIOCRELEASE ioctl.  Closely related is the MHIOCENFAILFAST ioctl -- a
23680 * cluster, just after taking ownership of the disk with the MHIOCTKOWN ioctl
23681 * then issues the MHIOCENFAILFAST ioctl.  This ioctl "enables failfast" in the
23682 * driver. The meaning of failfast is that if the driver (on this host) ever
23683 * encounters the scsi error return code RESERVATION_CONFLICT from the device,
23684 * it should immediately panic the host. The motivation for this ioctl is that
23685 * if this host does encounter reservation conflict, the underlying cause is
23686 * that some other host of the cluster has decided that this host is no longer
23687 * in the cluster and has seized control of the disks for itself. Since this
23688 * host is no longer in the cluster, it ought to panic itself. The
23689 * MHIOCENFAILFAST ioctl does two things:
23690 *	(a) it sets a flag that will cause any returned RESERVATION_CONFLICT
23691 *      error to panic the host
23692 *      (b) it sets up a periodic timer to test whether this host still has
23693 *      "access" (in that no other host has reserved the device):  if the
23694 *      periodic timer gets RESERVATION_CONFLICT, the host is panicked. The
23695 *      purpose of that periodic timer is to handle scenarios where the host is
23696 *      otherwise temporarily quiescent, temporarily doing no real i/o.
23697 * The MHIOCTKOWN ioctl will "break" a reservation that is held by another host,
23698 * by issuing a SCSI Bus Device Reset.  It will then issue a SCSI Reserve for
23699 * the device itself.
23700 *
23701 * SCSI-3 PGR
23702 * A direct semantic implementation of the SCSI-3 Persistent Reservation
23703 * facility is supported through the shared multihost disk ioctls
23704 * (MHIOCGRP_INKEYS, MHIOCGRP_INRESV, MHIOCGRP_REGISTER, MHIOCGRP_RESERVE,
23705 * MHIOCGRP_PREEMPTANDABORT)
23706 *
23707 * Reservation Reclaim:
23708 * --------------------
23709 * To support the lost reservation reclaim operations this driver creates a
23710 * single thread to handle reinstating reservations on all devices that have
23711 * lost reservations sd_resv_reclaim_requests are logged for all devices that
23712 * have LOST RESERVATIONS when the scsi watch facility callsback sd_mhd_watch_cb
23713 * and the reservation reclaim thread loops through the requests to regain the
23714 * lost reservations.
23715 */
23716
23717/*
23718 *    Function: sd_check_mhd()
23719 *
23720 * Description: This function sets up and submits a scsi watch request or
23721 *		terminates an existing watch request. This routine is used in
23722 *		support of reservation reclaim.
23723 *
23724 *   Arguments: dev    - the device 'dev_t' is used for context to discriminate
23725 *			 among multiple watches that share the callback function
23726 *		interval - the number of microseconds specifying the watch
23727 *			   interval for issuing TEST UNIT READY commands. If
23728 *			   set to 0 the watch should be terminated. If the
23729 *			   interval is set to 0 and if the device is required
23730 *			   to hold reservation while disabling failfast, the
23731 *			   watch is restarted with an interval of
23732 *			   reinstate_resv_delay.
23733 *
23734 * Return Code: 0	   - Successful submit/terminate of scsi watch request
23735 *		ENXIO      - Indicates an invalid device was specified
23736 *		EAGAIN     - Unable to submit the scsi watch request
23737 */
23738
23739static int
23740sd_check_mhd(dev_t dev, int interval)
23741{
23742	struct sd_lun	*un;
23743	opaque_t	token;
23744
23745	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23746		return (ENXIO);
23747	}
23748
23749	/* is this a watch termination request? */
23750	if (interval == 0) {
23751		mutex_enter(SD_MUTEX(un));
23752		/* if there is an existing watch task then terminate it */
23753		if (un->un_mhd_token) {
23754			token = un->un_mhd_token;
23755			un->un_mhd_token = NULL;
23756			mutex_exit(SD_MUTEX(un));
23757			(void) scsi_watch_request_terminate(token,
23758			    SCSI_WATCH_TERMINATE_ALL_WAIT);
23759			mutex_enter(SD_MUTEX(un));
23760		} else {
23761			mutex_exit(SD_MUTEX(un));
23762			/*
23763			 * Note: If we return here we don't check for the
23764			 * failfast case. This is the original legacy
23765			 * implementation but perhaps we should be checking
23766			 * the failfast case.
23767			 */
23768			return (0);
23769		}
23770		/*
23771		 * If the device is required to hold reservation while
23772		 * disabling failfast, we need to restart the scsi_watch
23773		 * routine with an interval of reinstate_resv_delay.
23774		 */
23775		if (un->un_resvd_status & SD_RESERVE) {
23776			interval = sd_reinstate_resv_delay/1000;
23777		} else {
23778			/* no failfast so bail */
23779			mutex_exit(SD_MUTEX(un));
23780			return (0);
23781		}
23782		mutex_exit(SD_MUTEX(un));
23783	}
23784
23785	/*
23786	 * adjust minimum time interval to 1 second,
23787	 * and convert from msecs to usecs
23788	 */
23789	if (interval > 0 && interval < 1000) {
23790		interval = 1000;
23791	}
23792	interval *= 1000;
23793
23794	/*
23795	 * submit the request to the scsi_watch service
23796	 */
23797	token = scsi_watch_request_submit(SD_SCSI_DEVP(un), interval,
23798	    SENSE_LENGTH, sd_mhd_watch_cb, (caddr_t)dev);
23799	if (token == NULL) {
23800		return (EAGAIN);
23801	}
23802
23803	/*
23804	 * save token for termination later on
23805	 */
23806	mutex_enter(SD_MUTEX(un));
23807	un->un_mhd_token = token;
23808	mutex_exit(SD_MUTEX(un));
23809	return (0);
23810}
23811
23812
23813/*
23814 *    Function: sd_mhd_watch_cb()
23815 *
23816 * Description: This function is the call back function used by the scsi watch
23817 *		facility. The scsi watch facility sends the "Test Unit Ready"
23818 *		and processes the status. If applicable (i.e. a "Unit Attention"
23819 *		status and automatic "Request Sense" not used) the scsi watch
23820 *		facility will send a "Request Sense" and retrieve the sense data
23821 *		to be passed to this callback function. In either case the
23822 *		automatic "Request Sense" or the facility submitting one, this
23823 *		callback is passed the status and sense data.
23824 *
23825 *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
23826 *			among multiple watches that share this callback function
23827 *		resultp - scsi watch facility result packet containing scsi
23828 *			  packet, status byte and sense data
23829 *
23830 * Return Code: 0 - continue the watch task
23831 *		non-zero - terminate the watch task
23832 */
23833
23834static int
23835sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
23836{
23837	struct sd_lun			*un;
23838	struct scsi_status		*statusp;
23839	uint8_t				*sensep;
23840	struct scsi_pkt			*pkt;
23841	uchar_t				actual_sense_length;
23842	dev_t  				dev = (dev_t)arg;
23843
23844	ASSERT(resultp != NULL);
23845	statusp			= resultp->statusp;
23846	sensep			= (uint8_t *)resultp->sensep;
23847	pkt			= resultp->pkt;
23848	actual_sense_length	= resultp->actual_sense_length;
23849
23850	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23851		return (ENXIO);
23852	}
23853
23854	SD_TRACE(SD_LOG_IOCTL_MHD, un,
23855	    "sd_mhd_watch_cb: reason '%s', status '%s'\n",
23856	    scsi_rname(pkt->pkt_reason), sd_sname(*((unsigned char *)statusp)));
23857
23858	/* Begin processing of the status and/or sense data */
23859	if (pkt->pkt_reason != CMD_CMPLT) {
23860		/* Handle the incomplete packet */
23861		sd_mhd_watch_incomplete(un, pkt);
23862		return (0);
23863	} else if (*((unsigned char *)statusp) != STATUS_GOOD) {
23864		if (*((unsigned char *)statusp)
23865		    == STATUS_RESERVATION_CONFLICT) {
23866			/*
23867			 * Handle a reservation conflict by panicking if
23868			 * configured for failfast or by logging the conflict
23869			 * and updating the reservation status
23870			 */
23871			mutex_enter(SD_MUTEX(un));
23872			if ((un->un_resvd_status & SD_FAILFAST) &&
23873			    (sd_failfast_enable)) {
23874				sd_panic_for_res_conflict(un);
23875				/*NOTREACHED*/
23876			}
23877			SD_INFO(SD_LOG_IOCTL_MHD, un,
23878			    "sd_mhd_watch_cb: Reservation Conflict\n");
23879			un->un_resvd_status |= SD_RESERVATION_CONFLICT;
23880			mutex_exit(SD_MUTEX(un));
23881		}
23882	}
23883
23884	if (sensep != NULL) {
23885		if (actual_sense_length >= (SENSE_LENGTH - 2)) {
23886			mutex_enter(SD_MUTEX(un));
23887			if ((scsi_sense_asc(sensep) ==
23888			    SD_SCSI_RESET_SENSE_CODE) &&
23889			    (un->un_resvd_status & SD_RESERVE)) {
23890				/*
23891				 * The additional sense code indicates a power
23892				 * on or bus device reset has occurred; update
23893				 * the reservation status.
23894				 */
23895				un->un_resvd_status |=
23896				    (SD_LOST_RESERVE | SD_WANT_RESERVE);
23897				SD_INFO(SD_LOG_IOCTL_MHD, un,
23898				    "sd_mhd_watch_cb: Lost Reservation\n");
23899			}
23900		} else {
23901			return (0);
23902		}
23903	} else {
23904		mutex_enter(SD_MUTEX(un));
23905	}
23906
23907	if ((un->un_resvd_status & SD_RESERVE) &&
23908	    (un->un_resvd_status & SD_LOST_RESERVE)) {
23909		if (un->un_resvd_status & SD_WANT_RESERVE) {
23910			/*
23911			 * A reset occurred in between the last probe and this
23912			 * one so if a timeout is pending cancel it.
23913			 */
23914			if (un->un_resvd_timeid) {
23915				timeout_id_t temp_id = un->un_resvd_timeid;
23916				un->un_resvd_timeid = NULL;
23917				mutex_exit(SD_MUTEX(un));
23918				(void) untimeout(temp_id);
23919				mutex_enter(SD_MUTEX(un));
23920			}
23921			un->un_resvd_status &= ~SD_WANT_RESERVE;
23922		}
23923		if (un->un_resvd_timeid == 0) {
23924			/* Schedule a timeout to handle the lost reservation */
23925			un->un_resvd_timeid = timeout(sd_mhd_resvd_recover,
23926			    (void *)dev,
23927			    drv_usectohz(sd_reinstate_resv_delay));
23928		}
23929	}
23930	mutex_exit(SD_MUTEX(un));
23931	return (0);
23932}
23933
23934
23935/*
23936 *    Function: sd_mhd_watch_incomplete()
23937 *
23938 * Description: This function is used to find out why a scsi pkt sent by the
23939 *		scsi watch facility was not completed. Under some scenarios this
23940 *		routine will return. Otherwise it will send a bus reset to see
23941 *		if the drive is still online.
23942 *
23943 *   Arguments: un  - driver soft state (unit) structure
23944 *		pkt - incomplete scsi pkt
23945 */
23946
23947static void
23948sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt)
23949{
23950	int	be_chatty;
23951	int	perr;
23952
23953	ASSERT(pkt != NULL);
23954	ASSERT(un != NULL);
23955	be_chatty	= (!(pkt->pkt_flags & FLAG_SILENT));
23956	perr		= (pkt->pkt_statistics & STAT_PERR);
23957
23958	mutex_enter(SD_MUTEX(un));
23959	if (un->un_state == SD_STATE_DUMPING) {
23960		mutex_exit(SD_MUTEX(un));
23961		return;
23962	}
23963
23964	switch (pkt->pkt_reason) {
23965	case CMD_UNX_BUS_FREE:
23966		/*
23967		 * If we had a parity error that caused the target to drop BSY*,
23968		 * don't be chatty about it.
23969		 */
23970		if (perr && be_chatty) {
23971			be_chatty = 0;
23972		}
23973		break;
23974	case CMD_TAG_REJECT:
23975		/*
23976		 * The SCSI-2 spec states that a tag reject will be sent by the
23977		 * target if tagged queuing is not supported. A tag reject may
23978		 * also be sent during certain initialization periods or to
23979		 * control internal resources. For the latter case the target
23980		 * may also return Queue Full.
23981		 *
23982		 * If this driver receives a tag reject from a target that is
23983		 * going through an init period or controlling internal
23984		 * resources tagged queuing will be disabled. This is a less
23985		 * than optimal behavior but the driver is unable to determine
23986		 * the target state and assumes tagged queueing is not supported
23987		 */
23988		pkt->pkt_flags = 0;
23989		un->un_tagflags = 0;
23990
23991		if (un->un_f_opt_queueing == TRUE) {
23992			un->un_throttle = min(un->un_throttle, 3);
23993		} else {
23994			un->un_throttle = 1;
23995		}
23996		mutex_exit(SD_MUTEX(un));
23997		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
23998		mutex_enter(SD_MUTEX(un));
23999		break;
24000	case CMD_INCOMPLETE:
24001		/*
24002		 * The transport stopped with an abnormal state, fallthrough and
24003		 * reset the target and/or bus unless selection did not complete
24004		 * (indicated by STATE_GOT_BUS) in which case we don't want to
24005		 * go through a target/bus reset
24006		 */
24007		if (pkt->pkt_state == STATE_GOT_BUS) {
24008			break;
24009		}
24010		/*FALLTHROUGH*/
24011
24012	case CMD_TIMEOUT:
24013	default:
24014		/*
24015		 * The lun may still be running the command, so a lun reset
24016		 * should be attempted. If the lun reset fails or cannot be
24017		 * issued, than try a target reset. Lastly try a bus reset.
24018		 */
24019		if ((pkt->pkt_statistics &
24020		    (STAT_BUS_RESET|STAT_DEV_RESET|STAT_ABORTED)) == 0) {
24021			int reset_retval = 0;
24022			mutex_exit(SD_MUTEX(un));
24023			if (un->un_f_allow_bus_device_reset == TRUE) {
24024				if (un->un_f_lun_reset_enabled == TRUE) {
24025					reset_retval =
24026					    scsi_reset(SD_ADDRESS(un),
24027					    RESET_LUN);
24028				}
24029				if (reset_retval == 0) {
24030					reset_retval =
24031					    scsi_reset(SD_ADDRESS(un),
24032					    RESET_TARGET);
24033				}
24034			}
24035			if (reset_retval == 0) {
24036				(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
24037			}
24038			mutex_enter(SD_MUTEX(un));
24039		}
24040		break;
24041	}
24042
24043	/* A device/bus reset has occurred; update the reservation status. */
24044	if ((pkt->pkt_reason == CMD_RESET) || (pkt->pkt_statistics &
24045	    (STAT_BUS_RESET | STAT_DEV_RESET))) {
24046		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
24047			un->un_resvd_status |=
24048			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
24049			SD_INFO(SD_LOG_IOCTL_MHD, un,
24050			    "sd_mhd_watch_incomplete: Lost Reservation\n");
24051		}
24052	}
24053
24054	/*
24055	 * The disk has been turned off; Update the device state.
24056	 *
24057	 * Note: Should we be offlining the disk here?
24058	 */
24059	if (pkt->pkt_state == STATE_GOT_BUS) {
24060		SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_watch_incomplete: "
24061		    "Disk not responding to selection\n");
24062		if (un->un_state != SD_STATE_OFFLINE) {
24063			New_state(un, SD_STATE_OFFLINE);
24064		}
24065	} else if (be_chatty) {
24066		/*
24067		 * suppress messages if they are all the same pkt reason;
24068		 * with TQ, many (up to 256) are returned with the same
24069		 * pkt_reason
24070		 */
24071		if (pkt->pkt_reason != un->un_last_pkt_reason) {
24072			SD_ERROR(SD_LOG_IOCTL_MHD, un,
24073			    "sd_mhd_watch_incomplete: "
24074			    "SCSI transport failed: reason '%s'\n",
24075			    scsi_rname(pkt->pkt_reason));
24076		}
24077	}
24078	un->un_last_pkt_reason = pkt->pkt_reason;
24079	mutex_exit(SD_MUTEX(un));
24080}
24081
24082
24083/*
24084 *    Function: sd_sname()
24085 *
24086 * Description: This is a simple little routine to return a string containing
24087 *		a printable description of command status byte for use in
24088 *		logging.
24089 *
24090 *   Arguments: status - pointer to a status byte
24091 *
24092 * Return Code: char * - string containing status description.
24093 */
24094
24095static char *
24096sd_sname(uchar_t status)
24097{
24098	switch (status & STATUS_MASK) {
24099	case STATUS_GOOD:
24100		return ("good status");
24101	case STATUS_CHECK:
24102		return ("check condition");
24103	case STATUS_MET:
24104		return ("condition met");
24105	case STATUS_BUSY:
24106		return ("busy");
24107	case STATUS_INTERMEDIATE:
24108		return ("intermediate");
24109	case STATUS_INTERMEDIATE_MET:
24110		return ("intermediate - condition met");
24111	case STATUS_RESERVATION_CONFLICT:
24112		return ("reservation_conflict");
24113	case STATUS_TERMINATED:
24114		return ("command terminated");
24115	case STATUS_QFULL:
24116		return ("queue full");
24117	default:
24118		return ("<unknown status>");
24119	}
24120}
24121
24122
24123/*
24124 *    Function: sd_mhd_resvd_recover()
24125 *
24126 * Description: This function adds a reservation entry to the
24127 *		sd_resv_reclaim_request list and signals the reservation
24128 *		reclaim thread that there is work pending. If the reservation
24129 *		reclaim thread has not been previously created this function
24130 *		will kick it off.
24131 *
24132 *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
24133 *			among multiple watches that share this callback function
24134 *
24135 *     Context: This routine is called by timeout() and is run in interrupt
24136 *		context. It must not sleep or call other functions which may
24137 *		sleep.
24138 */
24139
24140static void
24141sd_mhd_resvd_recover(void *arg)
24142{
24143	dev_t			dev = (dev_t)arg;
24144	struct sd_lun		*un;
24145	struct sd_thr_request	*sd_treq = NULL;
24146	struct sd_thr_request	*sd_cur = NULL;
24147	struct sd_thr_request	*sd_prev = NULL;
24148	int			already_there = 0;
24149
24150	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24151		return;
24152	}
24153
24154	mutex_enter(SD_MUTEX(un));
24155	un->un_resvd_timeid = NULL;
24156	if (un->un_resvd_status & SD_WANT_RESERVE) {
24157		/*
24158		 * There was a reset so don't issue the reserve, allow the
24159		 * sd_mhd_watch_cb callback function to notice this and
24160		 * reschedule the timeout for reservation.
24161		 */
24162		mutex_exit(SD_MUTEX(un));
24163		return;
24164	}
24165	mutex_exit(SD_MUTEX(un));
24166
24167	/*
24168	 * Add this device to the sd_resv_reclaim_request list and the
24169	 * sd_resv_reclaim_thread should take care of the rest.
24170	 *
24171	 * Note: We can't sleep in this context so if the memory allocation
24172	 * fails allow the sd_mhd_watch_cb callback function to notice this and
24173	 * reschedule the timeout for reservation.  (4378460)
24174	 */
24175	sd_treq = (struct sd_thr_request *)
24176	    kmem_zalloc(sizeof (struct sd_thr_request), KM_NOSLEEP);
24177	if (sd_treq == NULL) {
24178		return;
24179	}
24180
24181	sd_treq->sd_thr_req_next = NULL;
24182	sd_treq->dev = dev;
24183	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
24184	if (sd_tr.srq_thr_req_head == NULL) {
24185		sd_tr.srq_thr_req_head = sd_treq;
24186	} else {
24187		sd_cur = sd_prev = sd_tr.srq_thr_req_head;
24188		for (; sd_cur != NULL; sd_cur = sd_cur->sd_thr_req_next) {
24189			if (sd_cur->dev == dev) {
24190				/*
24191				 * already in Queue so don't log
24192				 * another request for the device
24193				 */
24194				already_there = 1;
24195				break;
24196			}
24197			sd_prev = sd_cur;
24198		}
24199		if (!already_there) {
24200			SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_resvd_recover: "
24201			    "logging request for %lx\n", dev);
24202			sd_prev->sd_thr_req_next = sd_treq;
24203		} else {
24204			kmem_free(sd_treq, sizeof (struct sd_thr_request));
24205		}
24206	}
24207
24208	/*
24209	 * Create a kernel thread to do the reservation reclaim and free up this
24210	 * thread. We cannot block this thread while we go away to do the
24211	 * reservation reclaim
24212	 */
24213	if (sd_tr.srq_resv_reclaim_thread == NULL)
24214		sd_tr.srq_resv_reclaim_thread = thread_create(NULL, 0,
24215		    sd_resv_reclaim_thread, NULL,
24216		    0, &p0, TS_RUN, v.v_maxsyspri - 2);
24217
24218	/* Tell the reservation reclaim thread that it has work to do */
24219	cv_signal(&sd_tr.srq_resv_reclaim_cv);
24220	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
24221}
24222
24223/*
24224 *    Function: sd_resv_reclaim_thread()
24225 *
24226 * Description: This function implements the reservation reclaim operations
24227 *
24228 *   Arguments: arg - the device 'dev_t' is used for context to discriminate
24229 *		      among multiple watches that share this callback function
24230 */
24231
24232static void
24233sd_resv_reclaim_thread()
24234{
24235	struct sd_lun		*un;
24236	struct sd_thr_request	*sd_mhreq;
24237
24238	/* Wait for work */
24239	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
24240	if (sd_tr.srq_thr_req_head == NULL) {
24241		cv_wait(&sd_tr.srq_resv_reclaim_cv,
24242		    &sd_tr.srq_resv_reclaim_mutex);
24243	}
24244
24245	/* Loop while we have work */
24246	while ((sd_tr.srq_thr_cur_req = sd_tr.srq_thr_req_head) != NULL) {
24247		un = ddi_get_soft_state(sd_state,
24248		    SDUNIT(sd_tr.srq_thr_cur_req->dev));
24249		if (un == NULL) {
24250			/*
24251			 * softstate structure is NULL so just
24252			 * dequeue the request and continue
24253			 */
24254			sd_tr.srq_thr_req_head =
24255			    sd_tr.srq_thr_cur_req->sd_thr_req_next;
24256			kmem_free(sd_tr.srq_thr_cur_req,
24257			    sizeof (struct sd_thr_request));
24258			continue;
24259		}
24260
24261		/* dequeue the request */
24262		sd_mhreq = sd_tr.srq_thr_cur_req;
24263		sd_tr.srq_thr_req_head =
24264		    sd_tr.srq_thr_cur_req->sd_thr_req_next;
24265		mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
24266
24267		/*
24268		 * Reclaim reservation only if SD_RESERVE is still set. There
24269		 * may have been a call to MHIOCRELEASE before we got here.
24270		 */
24271		mutex_enter(SD_MUTEX(un));
24272		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
24273			/*
24274			 * Note: The SD_LOST_RESERVE flag is cleared before
24275			 * reclaiming the reservation. If this is done after the
24276			 * call to sd_reserve_release a reservation loss in the
24277			 * window between pkt completion of reserve cmd and
24278			 * mutex_enter below may not be recognized
24279			 */
24280			un->un_resvd_status &= ~SD_LOST_RESERVE;
24281			mutex_exit(SD_MUTEX(un));
24282
24283			if (sd_reserve_release(sd_mhreq->dev,
24284			    SD_RESERVE) == 0) {
24285				mutex_enter(SD_MUTEX(un));
24286				un->un_resvd_status |= SD_RESERVE;
24287				mutex_exit(SD_MUTEX(un));
24288				SD_INFO(SD_LOG_IOCTL_MHD, un,
24289				    "sd_resv_reclaim_thread: "
24290				    "Reservation Recovered\n");
24291			} else {
24292				mutex_enter(SD_MUTEX(un));
24293				un->un_resvd_status |= SD_LOST_RESERVE;
24294				mutex_exit(SD_MUTEX(un));
24295				SD_INFO(SD_LOG_IOCTL_MHD, un,
24296				    "sd_resv_reclaim_thread: Failed "
24297				    "Reservation Recovery\n");
24298			}
24299		} else {
24300			mutex_exit(SD_MUTEX(un));
24301		}
24302		mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
24303		ASSERT(sd_mhreq == sd_tr.srq_thr_cur_req);
24304		kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
24305		sd_mhreq = sd_tr.srq_thr_cur_req = NULL;
24306		/*
24307		 * wakeup the destroy thread if anyone is waiting on
24308		 * us to complete.
24309		 */
24310		cv_signal(&sd_tr.srq_inprocess_cv);
24311		SD_TRACE(SD_LOG_IOCTL_MHD, un,
24312		    "sd_resv_reclaim_thread: cv_signalling current request \n");
24313	}
24314
24315	/*
24316	 * cleanup the sd_tr structure now that this thread will not exist
24317	 */
24318	ASSERT(sd_tr.srq_thr_req_head == NULL);
24319	ASSERT(sd_tr.srq_thr_cur_req == NULL);
24320	sd_tr.srq_resv_reclaim_thread = NULL;
24321	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
24322	thread_exit();
24323}
24324
24325
24326/*
24327 *    Function: sd_rmv_resv_reclaim_req()
24328 *
24329 * Description: This function removes any pending reservation reclaim requests
24330 *		for the specified device.
24331 *
24332 *   Arguments: dev - the device 'dev_t'
24333 */
24334
24335static void
24336sd_rmv_resv_reclaim_req(dev_t dev)
24337{
24338	struct sd_thr_request *sd_mhreq;
24339	struct sd_thr_request *sd_prev;
24340
24341	/* Remove a reservation reclaim request from the list */
24342	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
24343	if (sd_tr.srq_thr_cur_req && sd_tr.srq_thr_cur_req->dev == dev) {
24344		/*
24345		 * We are attempting to reinstate reservation for
24346		 * this device. We wait for sd_reserve_release()
24347		 * to return before we return.
24348		 */
24349		cv_wait(&sd_tr.srq_inprocess_cv,
24350		    &sd_tr.srq_resv_reclaim_mutex);
24351	} else {
24352		sd_prev = sd_mhreq = sd_tr.srq_thr_req_head;
24353		if (sd_mhreq && sd_mhreq->dev == dev) {
24354			sd_tr.srq_thr_req_head = sd_mhreq->sd_thr_req_next;
24355			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
24356			mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
24357			return;
24358		}
24359		for (; sd_mhreq != NULL; sd_mhreq = sd_mhreq->sd_thr_req_next) {
24360			if (sd_mhreq && sd_mhreq->dev == dev) {
24361				break;
24362			}
24363			sd_prev = sd_mhreq;
24364		}
24365		if (sd_mhreq != NULL) {
24366			sd_prev->sd_thr_req_next = sd_mhreq->sd_thr_req_next;
24367			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
24368		}
24369	}
24370	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
24371}
24372
24373
24374/*
24375 *    Function: sd_mhd_reset_notify_cb()
24376 *
24377 * Description: This is a call back function for scsi_reset_notify. This
24378 *		function updates the softstate reserved status and logs the
24379 *		reset. The driver scsi watch facility callback function
24380 *		(sd_mhd_watch_cb) and reservation reclaim thread functionality
24381 *		will reclaim the reservation.
24382 *
24383 *   Arguments: arg  - driver soft state (unit) structure
24384 */
24385
24386static void
24387sd_mhd_reset_notify_cb(caddr_t arg)
24388{
24389	struct sd_lun *un = (struct sd_lun *)arg;
24390
24391	mutex_enter(SD_MUTEX(un));
24392	if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
24393		un->un_resvd_status |= (SD_LOST_RESERVE | SD_WANT_RESERVE);
24394		SD_INFO(SD_LOG_IOCTL_MHD, un,
24395		    "sd_mhd_reset_notify_cb: Lost Reservation\n");
24396	}
24397	mutex_exit(SD_MUTEX(un));
24398}
24399
24400
24401/*
24402 *    Function: sd_take_ownership()
24403 *
24404 * Description: This routine implements an algorithm to achieve a stable
24405 *		reservation on disks which don't implement priority reserve,
24406 *		and makes sure that other host lose re-reservation attempts.
24407 *		This algorithm contains of a loop that keeps issuing the RESERVE
24408 *		for some period of time (min_ownership_delay, default 6 seconds)
24409 *		During that loop, it looks to see if there has been a bus device
24410 *		reset or bus reset (both of which cause an existing reservation
24411 *		to be lost). If the reservation is lost issue RESERVE until a
24412 *		period of min_ownership_delay with no resets has gone by, or
24413 *		until max_ownership_delay has expired. This loop ensures that
24414 *		the host really did manage to reserve the device, in spite of
24415 *		resets. The looping for min_ownership_delay (default six
24416 *		seconds) is important to early generation clustering products,
24417 *		Solstice HA 1.x and Sun Cluster 2.x. Those products use an
24418 *		MHIOCENFAILFAST periodic timer of two seconds. By having
24419 *		MHIOCTKOWN issue Reserves in a loop for six seconds, and having
24420 *		MHIOCENFAILFAST poll every two seconds, the idea is that by the
24421 *		time the MHIOCTKOWN ioctl returns, the other host (if any) will
24422 *		have already noticed, via the MHIOCENFAILFAST polling, that it
24423 *		no longer "owns" the disk and will have panicked itself.  Thus,
24424 *		the host issuing the MHIOCTKOWN is assured (with timing
24425 *		dependencies) that by the time it actually starts to use the
24426 *		disk for real work, the old owner is no longer accessing it.
24427 *
24428 *		min_ownership_delay is the minimum amount of time for which the
24429 *		disk must be reserved continuously devoid of resets before the
24430 *		MHIOCTKOWN ioctl will return success.
24431 *
24432 *		max_ownership_delay indicates the amount of time by which the
24433 *		take ownership should succeed or timeout with an error.
24434 *
24435 *   Arguments: dev - the device 'dev_t'
24436 *		*p  - struct containing timing info.
24437 *
24438 * Return Code: 0 for success or error code
24439 */
24440
24441static int
24442sd_take_ownership(dev_t dev, struct mhioctkown *p)
24443{
24444	struct sd_lun	*un;
24445	int		rval;
24446	int		err;
24447	int		reservation_count   = 0;
24448	int		min_ownership_delay =  6000000; /* in usec */
24449	int		max_ownership_delay = 30000000; /* in usec */
24450	clock_t		start_time;	/* starting time of this algorithm */
24451	clock_t		end_time;	/* time limit for giving up */
24452	clock_t		ownership_time;	/* time limit for stable ownership */
24453	clock_t		current_time;
24454	clock_t		previous_current_time;
24455
24456	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24457		return (ENXIO);
24458	}
24459
24460	/*
24461	 * Attempt a device reservation. A priority reservation is requested.
24462	 */
24463	if ((rval = sd_reserve_release(dev, SD_PRIORITY_RESERVE))
24464	    != SD_SUCCESS) {
24465		SD_ERROR(SD_LOG_IOCTL_MHD, un,
24466		    "sd_take_ownership: return(1)=%d\n", rval);
24467		return (rval);
24468	}
24469
24470	/* Update the softstate reserved status to indicate the reservation */
24471	mutex_enter(SD_MUTEX(un));
24472	un->un_resvd_status |= SD_RESERVE;
24473	un->un_resvd_status &=
24474	    ~(SD_LOST_RESERVE | SD_WANT_RESERVE | SD_RESERVATION_CONFLICT);
24475	mutex_exit(SD_MUTEX(un));
24476
24477	if (p != NULL) {
24478		if (p->min_ownership_delay != 0) {
24479			min_ownership_delay = p->min_ownership_delay * 1000;
24480		}
24481		if (p->max_ownership_delay != 0) {
24482			max_ownership_delay = p->max_ownership_delay * 1000;
24483		}
24484	}
24485	SD_INFO(SD_LOG_IOCTL_MHD, un,
24486	    "sd_take_ownership: min, max delays: %d, %d\n",
24487	    min_ownership_delay, max_ownership_delay);
24488
24489	start_time = ddi_get_lbolt();
24490	current_time	= start_time;
24491	ownership_time	= current_time + drv_usectohz(min_ownership_delay);
24492	end_time	= start_time + drv_usectohz(max_ownership_delay);
24493
24494	while (current_time - end_time < 0) {
24495		delay(drv_usectohz(500000));
24496
24497		if ((err = sd_reserve_release(dev, SD_RESERVE)) != 0) {
24498			if ((sd_reserve_release(dev, SD_RESERVE)) != 0) {
24499				mutex_enter(SD_MUTEX(un));
24500				rval = (un->un_resvd_status &
24501				    SD_RESERVATION_CONFLICT) ? EACCES : EIO;
24502				mutex_exit(SD_MUTEX(un));
24503				break;
24504			}
24505		}
24506		previous_current_time = current_time;
24507		current_time = ddi_get_lbolt();
24508		mutex_enter(SD_MUTEX(un));
24509		if (err || (un->un_resvd_status & SD_LOST_RESERVE)) {
24510			ownership_time = ddi_get_lbolt() +
24511			    drv_usectohz(min_ownership_delay);
24512			reservation_count = 0;
24513		} else {
24514			reservation_count++;
24515		}
24516		un->un_resvd_status |= SD_RESERVE;
24517		un->un_resvd_status &= ~(SD_LOST_RESERVE | SD_WANT_RESERVE);
24518		mutex_exit(SD_MUTEX(un));
24519
24520		SD_INFO(SD_LOG_IOCTL_MHD, un,
24521		    "sd_take_ownership: ticks for loop iteration=%ld, "
24522		    "reservation=%s\n", (current_time - previous_current_time),
24523		    reservation_count ? "ok" : "reclaimed");
24524
24525		if (current_time - ownership_time >= 0 &&
24526		    reservation_count >= 4) {
24527			rval = 0; /* Achieved a stable ownership */
24528			break;
24529		}
24530		if (current_time - end_time >= 0) {
24531			rval = EACCES; /* No ownership in max possible time */
24532			break;
24533		}
24534	}
24535	SD_TRACE(SD_LOG_IOCTL_MHD, un,
24536	    "sd_take_ownership: return(2)=%d\n", rval);
24537	return (rval);
24538}
24539
24540
24541/*
24542 *    Function: sd_reserve_release()
24543 *
24544 * Description: This function builds and sends scsi RESERVE, RELEASE, and
24545 *		PRIORITY RESERVE commands based on a user specified command type
24546 *
24547 *   Arguments: dev - the device 'dev_t'
24548 *		cmd - user specified command type; one of SD_PRIORITY_RESERVE,
24549 *		      SD_RESERVE, SD_RELEASE
24550 *
24551 * Return Code: 0 or Error Code
24552 */
24553
24554static int
24555sd_reserve_release(dev_t dev, int cmd)
24556{
24557	struct uscsi_cmd	*com = NULL;
24558	struct sd_lun		*un = NULL;
24559	char			cdb[CDB_GROUP0];
24560	int			rval;
24561
24562	ASSERT((cmd == SD_RELEASE) || (cmd == SD_RESERVE) ||
24563	    (cmd == SD_PRIORITY_RESERVE));
24564
24565	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24566		return (ENXIO);
24567	}
24568
24569	/* instantiate and initialize the command and cdb */
24570	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
24571	bzero(cdb, CDB_GROUP0);
24572	com->uscsi_flags   = USCSI_SILENT;
24573	com->uscsi_timeout = un->un_reserve_release_time;
24574	com->uscsi_cdblen  = CDB_GROUP0;
24575	com->uscsi_cdb	   = cdb;
24576	if (cmd == SD_RELEASE) {
24577		cdb[0] = SCMD_RELEASE;
24578	} else {
24579		cdb[0] = SCMD_RESERVE;
24580	}
24581
24582	/* Send the command. */
24583	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24584	    SD_PATH_STANDARD);
24585
24586	/*
24587	 * "break" a reservation that is held by another host, by issuing a
24588	 * reset if priority reserve is desired, and we could not get the
24589	 * device.
24590	 */
24591	if ((cmd == SD_PRIORITY_RESERVE) &&
24592	    (rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
24593		/*
24594		 * First try to reset the LUN. If we cannot, then try a target
24595		 * reset, followed by a bus reset if the target reset fails.
24596		 */
24597		int reset_retval = 0;
24598		if (un->un_f_lun_reset_enabled == TRUE) {
24599			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
24600		}
24601		if (reset_retval == 0) {
24602			/* The LUN reset either failed or was not issued */
24603			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
24604		}
24605		if ((reset_retval == 0) &&
24606		    (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0)) {
24607			rval = EIO;
24608			kmem_free(com, sizeof (*com));
24609			return (rval);
24610		}
24611
24612		bzero(com, sizeof (struct uscsi_cmd));
24613		com->uscsi_flags   = USCSI_SILENT;
24614		com->uscsi_cdb	   = cdb;
24615		com->uscsi_cdblen  = CDB_GROUP0;
24616		com->uscsi_timeout = 5;
24617
24618		/*
24619		 * Reissue the last reserve command, this time without request
24620		 * sense.  Assume that it is just a regular reserve command.
24621		 */
24622		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
24623		    SD_PATH_STANDARD);
24624	}
24625
24626	/* Return an error if still getting a reservation conflict. */
24627	if ((rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
24628		rval = EACCES;
24629	}
24630
24631	kmem_free(com, sizeof (*com));
24632	return (rval);
24633}
24634
24635
24636#define	SD_NDUMP_RETRIES	12
24637/*
24638 *	System Crash Dump routine
24639 */
24640
24641static int
24642sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
24643{
24644	int		instance;
24645	int		partition;
24646	int		i;
24647	int		err;
24648	struct sd_lun	*un;
24649	struct scsi_pkt *wr_pktp;
24650	struct buf	*wr_bp;
24651	struct buf	wr_buf;
24652	daddr_t		tgt_byte_offset; /* rmw - byte offset for target */
24653	daddr_t		tgt_blkno;	/* rmw - blkno for target */
24654	size_t		tgt_byte_count; /* rmw -  # of bytes to xfer */
24655	size_t		tgt_nblk; /* rmw -  # of tgt blks to xfer */
24656	size_t		io_start_offset;
24657	int		doing_rmw = FALSE;
24658	int		rval;
24659	ssize_t		dma_resid;
24660	daddr_t		oblkno;
24661	diskaddr_t	nblks = 0;
24662	diskaddr_t	start_block;
24663
24664	instance = SDUNIT(dev);
24665	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
24666	    !SD_IS_VALID_LABEL(un) || ISCD(un)) {
24667		return (ENXIO);
24668	}
24669
24670	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*un))
24671
24672	SD_TRACE(SD_LOG_DUMP, un, "sddump: entry\n");
24673
24674	partition = SDPART(dev);
24675	SD_INFO(SD_LOG_DUMP, un, "sddump: partition = %d\n", partition);
24676
24677	/* Validate blocks to dump at against partition size. */
24678
24679	(void) cmlb_partinfo(un->un_cmlbhandle, partition,
24680	    &nblks, &start_block, NULL, NULL, (void *)SD_PATH_DIRECT);
24681
24682	if ((blkno + nblk) > nblks) {
24683		SD_TRACE(SD_LOG_DUMP, un,
24684		    "sddump: dump range larger than partition: "
24685		    "blkno = 0x%x, nblk = 0x%x, dkl_nblk = 0x%x\n",
24686		    blkno, nblk, nblks);
24687		return (EINVAL);
24688	}
24689
24690	mutex_enter(&un->un_pm_mutex);
24691	if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
24692		struct scsi_pkt *start_pktp;
24693
24694		mutex_exit(&un->un_pm_mutex);
24695
24696		/*
24697		 * use pm framework to power on HBA 1st
24698		 */
24699		(void) pm_raise_power(SD_DEVINFO(un), 0, SD_SPINDLE_ON);
24700
24701		/*
24702		 * Dump no long uses sdpower to power on a device, it's
24703		 * in-line here so it can be done in polled mode.
24704		 */
24705
24706		SD_INFO(SD_LOG_DUMP, un, "sddump: starting device\n");
24707
24708		start_pktp = scsi_init_pkt(SD_ADDRESS(un), NULL, NULL,
24709		    CDB_GROUP0, un->un_status_len, 0, 0, NULL_FUNC, NULL);
24710
24711		if (start_pktp == NULL) {
24712			/* We were not given a SCSI packet, fail. */
24713			return (EIO);
24714		}
24715		bzero(start_pktp->pkt_cdbp, CDB_GROUP0);
24716		start_pktp->pkt_cdbp[0] = SCMD_START_STOP;
24717		start_pktp->pkt_cdbp[4] = SD_TARGET_START;
24718		start_pktp->pkt_flags = FLAG_NOINTR;
24719
24720		mutex_enter(SD_MUTEX(un));
24721		SD_FILL_SCSI1_LUN(un, start_pktp);
24722		mutex_exit(SD_MUTEX(un));
24723		/*
24724		 * Scsi_poll returns 0 (success) if the command completes and
24725		 * the status block is STATUS_GOOD.
24726		 */
24727		if (sd_scsi_poll(un, start_pktp) != 0) {
24728			scsi_destroy_pkt(start_pktp);
24729			return (EIO);
24730		}
24731		scsi_destroy_pkt(start_pktp);
24732		(void) sd_ddi_pm_resume(un);
24733	} else {
24734		mutex_exit(&un->un_pm_mutex);
24735	}
24736
24737	mutex_enter(SD_MUTEX(un));
24738	un->un_throttle = 0;
24739
24740	/*
24741	 * The first time through, reset the specific target device.
24742	 * However, when cpr calls sddump we know that sd is in a
24743	 * a good state so no bus reset is required.
24744	 * Clear sense data via Request Sense cmd.
24745	 * In sddump we don't care about allow_bus_device_reset anymore
24746	 */
24747
24748	if ((un->un_state != SD_STATE_SUSPENDED) &&
24749	    (un->un_state != SD_STATE_DUMPING)) {
24750
24751		New_state(un, SD_STATE_DUMPING);
24752
24753		if (un->un_f_is_fibre == FALSE) {
24754			mutex_exit(SD_MUTEX(un));
24755			/*
24756			 * Attempt a bus reset for parallel scsi.
24757			 *
24758			 * Note: A bus reset is required because on some host
24759			 * systems (i.e. E420R) a bus device reset is
24760			 * insufficient to reset the state of the target.
24761			 *
24762			 * Note: Don't issue the reset for fibre-channel,
24763			 * because this tends to hang the bus (loop) for
24764			 * too long while everyone is logging out and in
24765			 * and the deadman timer for dumping will fire
24766			 * before the dump is complete.
24767			 */
24768			if (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0) {
24769				mutex_enter(SD_MUTEX(un));
24770				Restore_state(un);
24771				mutex_exit(SD_MUTEX(un));
24772				return (EIO);
24773			}
24774
24775			/* Delay to give the device some recovery time. */
24776			drv_usecwait(10000);
24777
24778			if (sd_send_polled_RQS(un) == SD_FAILURE) {
24779				SD_INFO(SD_LOG_DUMP, un,
24780				    "sddump: sd_send_polled_RQS failed\n");
24781			}
24782			mutex_enter(SD_MUTEX(un));
24783		}
24784	}
24785
24786	/*
24787	 * Convert the partition-relative block number to a
24788	 * disk physical block number.
24789	 */
24790	blkno += start_block;
24791
24792	SD_INFO(SD_LOG_DUMP, un, "sddump: disk blkno = 0x%x\n", blkno);
24793
24794
24795	/*
24796	 * Check if the device has a non-512 block size.
24797	 */
24798	wr_bp = NULL;
24799	if (NOT_DEVBSIZE(un)) {
24800		tgt_byte_offset = blkno * un->un_sys_blocksize;
24801		tgt_byte_count = nblk * un->un_sys_blocksize;
24802		if ((tgt_byte_offset % un->un_tgt_blocksize) ||
24803		    (tgt_byte_count % un->un_tgt_blocksize)) {
24804			doing_rmw = TRUE;
24805			/*
24806			 * Calculate the block number and number of block
24807			 * in terms of the media block size.
24808			 */
24809			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
24810			tgt_nblk =
24811			    ((tgt_byte_offset + tgt_byte_count +
24812			    (un->un_tgt_blocksize - 1)) /
24813			    un->un_tgt_blocksize) - tgt_blkno;
24814
24815			/*
24816			 * Invoke the routine which is going to do read part
24817			 * of read-modify-write.
24818			 * Note that this routine returns a pointer to
24819			 * a valid bp in wr_bp.
24820			 */
24821			err = sddump_do_read_of_rmw(un, tgt_blkno, tgt_nblk,
24822			    &wr_bp);
24823			if (err) {
24824				mutex_exit(SD_MUTEX(un));
24825				return (err);
24826			}
24827			/*
24828			 * Offset is being calculated as -
24829			 * (original block # * system block size) -
24830			 * (new block # * target block size)
24831			 */
24832			io_start_offset =
24833			    ((uint64_t)(blkno * un->un_sys_blocksize)) -
24834			    ((uint64_t)(tgt_blkno * un->un_tgt_blocksize));
24835
24836			ASSERT((io_start_offset >= 0) &&
24837			    (io_start_offset < un->un_tgt_blocksize));
24838			/*
24839			 * Do the modify portion of read modify write.
24840			 */
24841			bcopy(addr, &wr_bp->b_un.b_addr[io_start_offset],
24842			    (size_t)nblk * un->un_sys_blocksize);
24843		} else {
24844			doing_rmw = FALSE;
24845			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
24846			tgt_nblk = tgt_byte_count / un->un_tgt_blocksize;
24847		}
24848
24849		/* Convert blkno and nblk to target blocks */
24850		blkno = tgt_blkno;
24851		nblk = tgt_nblk;
24852	} else {
24853		wr_bp = &wr_buf;
24854		bzero(wr_bp, sizeof (struct buf));
24855		wr_bp->b_flags		= B_BUSY;
24856		wr_bp->b_un.b_addr	= addr;
24857		wr_bp->b_bcount		= nblk << DEV_BSHIFT;
24858		wr_bp->b_resid		= 0;
24859	}
24860
24861	mutex_exit(SD_MUTEX(un));
24862
24863	/*
24864	 * Obtain a SCSI packet for the write command.
24865	 * It should be safe to call the allocator here without
24866	 * worrying about being locked for DVMA mapping because
24867	 * the address we're passed is already a DVMA mapping
24868	 *
24869	 * We are also not going to worry about semaphore ownership
24870	 * in the dump buffer. Dumping is single threaded at present.
24871	 */
24872
24873	wr_pktp = NULL;
24874
24875	dma_resid = wr_bp->b_bcount;
24876	oblkno = blkno;
24877
24878	while (dma_resid != 0) {
24879
24880	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
24881		wr_bp->b_flags &= ~B_ERROR;
24882
24883		if (un->un_partial_dma_supported == 1) {
24884			blkno = oblkno +
24885			    ((wr_bp->b_bcount - dma_resid) /
24886			    un->un_tgt_blocksize);
24887			nblk = dma_resid / un->un_tgt_blocksize;
24888
24889			if (wr_pktp) {
24890				/*
24891				 * Partial DMA transfers after initial transfer
24892				 */
24893				rval = sd_setup_next_rw_pkt(un, wr_pktp, wr_bp,
24894				    blkno, nblk);
24895			} else {
24896				/* Initial transfer */
24897				rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
24898				    un->un_pkt_flags, NULL_FUNC, NULL,
24899				    blkno, nblk);
24900			}
24901		} else {
24902			rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
24903			    0, NULL_FUNC, NULL, blkno, nblk);
24904		}
24905
24906		if (rval == 0) {
24907			/* We were given a SCSI packet, continue. */
24908			break;
24909		}
24910
24911		if (i == 0) {
24912			if (wr_bp->b_flags & B_ERROR) {
24913				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
24914				    "no resources for dumping; "
24915				    "error code: 0x%x, retrying",
24916				    geterror(wr_bp));
24917			} else {
24918				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
24919				    "no resources for dumping; retrying");
24920			}
24921		} else if (i != (SD_NDUMP_RETRIES - 1)) {
24922			if (wr_bp->b_flags & B_ERROR) {
24923				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
24924				    "no resources for dumping; error code: "
24925				    "0x%x, retrying\n", geterror(wr_bp));
24926			}
24927		} else {
24928			if (wr_bp->b_flags & B_ERROR) {
24929				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
24930				    "no resources for dumping; "
24931				    "error code: 0x%x, retries failed, "
24932				    "giving up.\n", geterror(wr_bp));
24933			} else {
24934				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
24935				    "no resources for dumping; "
24936				    "retries failed, giving up.\n");
24937			}
24938			mutex_enter(SD_MUTEX(un));
24939			Restore_state(un);
24940			if (NOT_DEVBSIZE(un) && (doing_rmw == TRUE)) {
24941				mutex_exit(SD_MUTEX(un));
24942				scsi_free_consistent_buf(wr_bp);
24943			} else {
24944				mutex_exit(SD_MUTEX(un));
24945			}
24946			return (EIO);
24947		}
24948		drv_usecwait(10000);
24949	}
24950
24951	if (un->un_partial_dma_supported == 1) {
24952		/*
24953		 * save the resid from PARTIAL_DMA
24954		 */
24955		dma_resid = wr_pktp->pkt_resid;
24956		if (dma_resid != 0)
24957			nblk -= SD_BYTES2TGTBLOCKS(un, dma_resid);
24958		wr_pktp->pkt_resid = 0;
24959	} else {
24960		dma_resid = 0;
24961	}
24962
24963	/* SunBug 1222170 */
24964	wr_pktp->pkt_flags = FLAG_NOINTR;
24965
24966	err = EIO;
24967	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
24968
24969		/*
24970		 * Scsi_poll returns 0 (success) if the command completes and
24971		 * the status block is STATUS_GOOD.  We should only check
24972		 * errors if this condition is not true.  Even then we should
24973		 * send our own request sense packet only if we have a check
24974		 * condition and auto request sense has not been performed by
24975		 * the hba.
24976		 */
24977		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending write\n");
24978
24979		if ((sd_scsi_poll(un, wr_pktp) == 0) &&
24980		    (wr_pktp->pkt_resid == 0)) {
24981			err = SD_SUCCESS;
24982			break;
24983		}
24984
24985		/*
24986		 * Check CMD_DEV_GONE 1st, give up if device is gone.
24987		 */
24988		if (wr_pktp->pkt_reason == CMD_DEV_GONE) {
24989			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
24990			    "Error while dumping state...Device is gone\n");
24991			break;
24992		}
24993
24994		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_CHECK) {
24995			SD_INFO(SD_LOG_DUMP, un,
24996			    "sddump: write failed with CHECK, try # %d\n", i);
24997			if (((wr_pktp->pkt_state & STATE_ARQ_DONE) == 0)) {
24998				(void) sd_send_polled_RQS(un);
24999			}
25000
25001			continue;
25002		}
25003
25004		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_BUSY) {
25005			int reset_retval = 0;
25006
25007			SD_INFO(SD_LOG_DUMP, un,
25008			    "sddump: write failed with BUSY, try # %d\n", i);
25009
25010			if (un->un_f_lun_reset_enabled == TRUE) {
25011				reset_retval = scsi_reset(SD_ADDRESS(un),
25012				    RESET_LUN);
25013			}
25014			if (reset_retval == 0) {
25015				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
25016			}
25017			(void) sd_send_polled_RQS(un);
25018
25019		} else {
25020			SD_INFO(SD_LOG_DUMP, un,
25021			    "sddump: write failed with 0x%x, try # %d\n",
25022			    SD_GET_PKT_STATUS(wr_pktp), i);
25023			mutex_enter(SD_MUTEX(un));
25024			sd_reset_target(un, wr_pktp);
25025			mutex_exit(SD_MUTEX(un));
25026		}
25027
25028		/*
25029		 * If we are not getting anywhere with lun/target resets,
25030		 * let's reset the bus.
25031		 */
25032		if (i == SD_NDUMP_RETRIES/2) {
25033			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
25034			(void) sd_send_polled_RQS(un);
25035		}
25036	}
25037	}
25038
25039	scsi_destroy_pkt(wr_pktp);
25040	mutex_enter(SD_MUTEX(un));
25041	if ((NOT_DEVBSIZE(un)) && (doing_rmw == TRUE)) {
25042		mutex_exit(SD_MUTEX(un));
25043		scsi_free_consistent_buf(wr_bp);
25044	} else {
25045		mutex_exit(SD_MUTEX(un));
25046	}
25047	SD_TRACE(SD_LOG_DUMP, un, "sddump: exit: err = %d\n", err);
25048	return (err);
25049}
25050
25051/*
25052 *    Function: sd_scsi_poll()
25053 *
25054 * Description: This is a wrapper for the scsi_poll call.
25055 *
25056 *   Arguments: sd_lun - The unit structure
25057 *              scsi_pkt - The scsi packet being sent to the device.
25058 *
25059 * Return Code: 0 - Command completed successfully with good status
25060 *             -1 - Command failed.  This could indicate a check condition
25061 *                  or other status value requiring recovery action.
25062 *
25063 * NOTE: This code is only called off sddump().
25064 */
25065
25066static int
25067sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pktp)
25068{
25069	int status;
25070
25071	ASSERT(un != NULL);
25072	ASSERT(!mutex_owned(SD_MUTEX(un)));
25073	ASSERT(pktp != NULL);
25074
25075	status = SD_SUCCESS;
25076
25077	if (scsi_ifgetcap(&pktp->pkt_address, "tagged-qing", 1) == 1) {
25078		pktp->pkt_flags |= un->un_tagflags;
25079		pktp->pkt_flags &= ~FLAG_NODISCON;
25080	}
25081
25082	status = sd_ddi_scsi_poll(pktp);
25083	/*
25084	 * Scsi_poll returns 0 (success) if the command completes and the
25085	 * status block is STATUS_GOOD.  We should only check errors if this
25086	 * condition is not true.  Even then we should send our own request
25087	 * sense packet only if we have a check condition and auto
25088	 * request sense has not been performed by the hba.
25089	 * Don't get RQS data if pkt_reason is CMD_DEV_GONE.
25090	 */
25091	if ((status != SD_SUCCESS) &&
25092	    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK) &&
25093	    (pktp->pkt_state & STATE_ARQ_DONE) == 0 &&
25094	    (pktp->pkt_reason != CMD_DEV_GONE))
25095		(void) sd_send_polled_RQS(un);
25096
25097	return (status);
25098}
25099
25100/*
25101 *    Function: sd_send_polled_RQS()
25102 *
25103 * Description: This sends the request sense command to a device.
25104 *
25105 *   Arguments: sd_lun - The unit structure
25106 *
25107 * Return Code: 0 - Command completed successfully with good status
25108 *             -1 - Command failed.
25109 *
25110 */
25111
25112static int
25113sd_send_polled_RQS(struct sd_lun *un)
25114{
25115	int	ret_val;
25116	struct	scsi_pkt	*rqs_pktp;
25117	struct	buf		*rqs_bp;
25118
25119	ASSERT(un != NULL);
25120	ASSERT(!mutex_owned(SD_MUTEX(un)));
25121
25122	ret_val = SD_SUCCESS;
25123
25124	rqs_pktp = un->un_rqs_pktp;
25125	rqs_bp	 = un->un_rqs_bp;
25126
25127	mutex_enter(SD_MUTEX(un));
25128
25129	if (un->un_sense_isbusy) {
25130		ret_val = SD_FAILURE;
25131		mutex_exit(SD_MUTEX(un));
25132		return (ret_val);
25133	}
25134
25135	/*
25136	 * If the request sense buffer (and packet) is not in use,
25137	 * let's set the un_sense_isbusy and send our packet
25138	 */
25139	un->un_sense_isbusy 	= 1;
25140	rqs_pktp->pkt_resid  	= 0;
25141	rqs_pktp->pkt_reason 	= 0;
25142	rqs_pktp->pkt_flags |= FLAG_NOINTR;
25143	bzero(rqs_bp->b_un.b_addr, SENSE_LENGTH);
25144
25145	mutex_exit(SD_MUTEX(un));
25146
25147	SD_INFO(SD_LOG_COMMON, un, "sd_send_polled_RQS: req sense buf at"
25148	    " 0x%p\n", rqs_bp->b_un.b_addr);
25149
25150	/*
25151	 * Can't send this to sd_scsi_poll, we wrap ourselves around the
25152	 * axle - it has a call into us!
25153	 */
25154	if ((ret_val = sd_ddi_scsi_poll(rqs_pktp)) != 0) {
25155		SD_INFO(SD_LOG_COMMON, un,
25156		    "sd_send_polled_RQS: RQS failed\n");
25157	}
25158
25159	SD_DUMP_MEMORY(un, SD_LOG_COMMON, "sd_send_polled_RQS:",
25160	    (uchar_t *)rqs_bp->b_un.b_addr, SENSE_LENGTH, SD_LOG_HEX);
25161
25162	mutex_enter(SD_MUTEX(un));
25163	un->un_sense_isbusy = 0;
25164	mutex_exit(SD_MUTEX(un));
25165
25166	return (ret_val);
25167}
25168
25169/*
25170 * Defines needed for localized version of the scsi_poll routine.
25171 */
25172#define	CSEC		10000			/* usecs */
25173#define	SEC_TO_CSEC	(1000000/CSEC)
25174
25175/*
25176 *    Function: sd_ddi_scsi_poll()
25177 *
25178 * Description: Localized version of the scsi_poll routine.  The purpose is to
25179 *		send a scsi_pkt to a device as a polled command.  This version
25180 *		is to ensure more robust handling of transport errors.
25181 *		Specifically this routine cures not ready, coming ready
25182 *		transition for power up and reset of sonoma's.  This can take
25183 *		up to 45 seconds for power-on and 20 seconds for reset of a
25184 * 		sonoma lun.
25185 *
25186 *   Arguments: scsi_pkt - The scsi_pkt being sent to a device
25187 *
25188 * Return Code: 0 - Command completed successfully with good status
25189 *             -1 - Command failed.
25190 *
25191 * NOTE: This code is almost identical to scsi_poll, however before 6668774 can
25192 * be fixed (removing this code), we need to determine how to handle the
25193 * KEY_UNIT_ATTENTION condition below in conditions not as limited as sddump().
25194 *
25195 * NOTE: This code is only called off sddump().
25196 */
25197static int
25198sd_ddi_scsi_poll(struct scsi_pkt *pkt)
25199{
25200	int			rval = -1;
25201	int			savef;
25202	long			savet;
25203	void			(*savec)();
25204	int			timeout;
25205	int			busy_count;
25206	int			poll_delay;
25207	int			rc;
25208	uint8_t			*sensep;
25209	struct scsi_arq_status	*arqstat;
25210	extern int		do_polled_io;
25211
25212	ASSERT(pkt->pkt_scbp);
25213
25214	/*
25215	 * save old flags..
25216	 */
25217	savef = pkt->pkt_flags;
25218	savec = pkt->pkt_comp;
25219	savet = pkt->pkt_time;
25220
25221	pkt->pkt_flags |= FLAG_NOINTR;
25222
25223	/*
25224	 * XXX there is nothing in the SCSA spec that states that we should not
25225	 * do a callback for polled cmds; however, removing this will break sd
25226	 * and probably other target drivers
25227	 */
25228	pkt->pkt_comp = NULL;
25229
25230	/*
25231	 * we don't like a polled command without timeout.
25232	 * 60 seconds seems long enough.
25233	 */
25234	if (pkt->pkt_time == 0)
25235		pkt->pkt_time = SCSI_POLL_TIMEOUT;
25236
25237	/*
25238	 * Send polled cmd.
25239	 *
25240	 * We do some error recovery for various errors.  Tran_busy,
25241	 * queue full, and non-dispatched commands are retried every 10 msec.
25242	 * as they are typically transient failures.  Busy status and Not
25243	 * Ready are retried every second as this status takes a while to
25244	 * change.
25245	 */
25246	timeout = pkt->pkt_time * SEC_TO_CSEC;
25247
25248	for (busy_count = 0; busy_count < timeout; busy_count++) {
25249		/*
25250		 * Initialize pkt status variables.
25251		 */
25252		*pkt->pkt_scbp = pkt->pkt_reason = pkt->pkt_state = 0;
25253
25254		if ((rc = scsi_transport(pkt)) != TRAN_ACCEPT) {
25255			if (rc != TRAN_BUSY) {
25256				/* Transport failed - give up. */
25257				break;
25258			} else {
25259				/* Transport busy - try again. */
25260				poll_delay = 1 * CSEC;		/* 10 msec. */
25261			}
25262		} else {
25263			/*
25264			 * Transport accepted - check pkt status.
25265			 */
25266			rc = (*pkt->pkt_scbp) & STATUS_MASK;
25267			if ((pkt->pkt_reason == CMD_CMPLT) &&
25268			    (rc == STATUS_CHECK) &&
25269			    (pkt->pkt_state & STATE_ARQ_DONE)) {
25270				arqstat =
25271				    (struct scsi_arq_status *)(pkt->pkt_scbp);
25272				sensep = (uint8_t *)&arqstat->sts_sensedata;
25273			} else {
25274				sensep = NULL;
25275			}
25276
25277			if ((pkt->pkt_reason == CMD_CMPLT) &&
25278			    (rc == STATUS_GOOD)) {
25279				/* No error - we're done */
25280				rval = 0;
25281				break;
25282
25283			} else if (pkt->pkt_reason == CMD_DEV_GONE) {
25284				/* Lost connection - give up */
25285				break;
25286
25287			} else if ((pkt->pkt_reason == CMD_INCOMPLETE) &&
25288			    (pkt->pkt_state == 0)) {
25289				/* Pkt not dispatched - try again. */
25290				poll_delay = 1 * CSEC;		/* 10 msec. */
25291
25292			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
25293			    (rc == STATUS_QFULL)) {
25294				/* Queue full - try again. */
25295				poll_delay = 1 * CSEC;		/* 10 msec. */
25296
25297			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
25298			    (rc == STATUS_BUSY)) {
25299				/* Busy - try again. */
25300				poll_delay = 100 * CSEC;	/* 1 sec. */
25301				busy_count += (SEC_TO_CSEC - 1);
25302
25303			} else if ((sensep != NULL) &&
25304			    (scsi_sense_key(sensep) == KEY_UNIT_ATTENTION)) {
25305				/*
25306				 * Unit Attention - try again.
25307				 * Pretend it took 1 sec.
25308				 * NOTE: 'continue' avoids poll_delay
25309				 */
25310				busy_count += (SEC_TO_CSEC - 1);
25311				continue;
25312
25313			} else if ((sensep != NULL) &&
25314			    (scsi_sense_key(sensep) == KEY_NOT_READY) &&
25315			    (scsi_sense_asc(sensep) == 0x04) &&
25316			    (scsi_sense_ascq(sensep) == 0x01)) {
25317				/*
25318				 * Not ready -> ready - try again.
25319				 * 04h/01h: LUN IS IN PROCESS OF BECOMING READY
25320				 * ...same as STATUS_BUSY
25321				 */
25322				poll_delay = 100 * CSEC;	/* 1 sec. */
25323				busy_count += (SEC_TO_CSEC - 1);
25324
25325			} else {
25326				/* BAD status - give up. */
25327				break;
25328			}
25329		}
25330
25331		if (((curthread->t_flag & T_INTR_THREAD) == 0) &&
25332		    !do_polled_io) {
25333			delay(drv_usectohz(poll_delay));
25334		} else {
25335			/* we busy wait during cpr_dump or interrupt threads */
25336			drv_usecwait(poll_delay);
25337		}
25338	}
25339
25340	pkt->pkt_flags = savef;
25341	pkt->pkt_comp = savec;
25342	pkt->pkt_time = savet;
25343
25344	/* return on error */
25345	if (rval)
25346		return (rval);
25347
25348	/*
25349	 * This is not a performance critical code path.
25350	 *
25351	 * As an accommodation for scsi_poll callers, to avoid ddi_dma_sync()
25352	 * issues associated with looking at DMA memory prior to
25353	 * scsi_pkt_destroy(), we scsi_sync_pkt() prior to return.
25354	 */
25355	scsi_sync_pkt(pkt);
25356	return (0);
25357}
25358
25359
25360
25361/*
25362 *    Function: sd_persistent_reservation_in_read_keys
25363 *
25364 * Description: This routine is the driver entry point for handling CD-ROM
25365 *		multi-host persistent reservation requests (MHIOCGRP_INKEYS)
25366 *		by sending the SCSI-3 PRIN commands to the device.
25367 *		Processes the read keys command response by copying the
25368 *		reservation key information into the user provided buffer.
25369 *		Support for the 32/64 bit _MULTI_DATAMODEL is implemented.
25370 *
25371 *   Arguments: un   -  Pointer to soft state struct for the target.
25372 *		usrp -	user provided pointer to multihost Persistent In Read
25373 *			Keys structure (mhioc_inkeys_t)
25374 *		flag -	this argument is a pass through to ddi_copyxxx()
25375 *			directly from the mode argument of ioctl().
25376 *
25377 * Return Code: 0   - Success
25378 *		EACCES
25379 *		ENOTSUP
25380 *		errno return code from sd_send_scsi_cmd()
25381 *
25382 *     Context: Can sleep. Does not return until command is completed.
25383 */
25384
25385static int
25386sd_persistent_reservation_in_read_keys(struct sd_lun *un,
25387    mhioc_inkeys_t *usrp, int flag)
25388{
25389#ifdef _MULTI_DATAMODEL
25390	struct mhioc_key_list32	li32;
25391#endif
25392	sd_prin_readkeys_t	*in;
25393	mhioc_inkeys_t		*ptr;
25394	mhioc_key_list_t	li;
25395	uchar_t			*data_bufp;
25396	int 			data_len;
25397	int			rval = 0;
25398	size_t			copysz;
25399	sd_ssc_t		*ssc;
25400
25401	if ((ptr = (mhioc_inkeys_t *)usrp) == NULL) {
25402		return (EINVAL);
25403	}
25404	bzero(&li, sizeof (mhioc_key_list_t));
25405
25406	ssc = sd_ssc_init(un);
25407
25408	/*
25409	 * Get the listsize from user
25410	 */
25411#ifdef _MULTI_DATAMODEL
25412
25413	switch (ddi_model_convert_from(flag & FMODELS)) {
25414	case DDI_MODEL_ILP32:
25415		copysz = sizeof (struct mhioc_key_list32);
25416		if (ddi_copyin(ptr->li, &li32, copysz, flag)) {
25417			SD_ERROR(SD_LOG_IOCTL_MHD, un,
25418			    "sd_persistent_reservation_in_read_keys: "
25419			    "failed ddi_copyin: mhioc_key_list32_t\n");
25420			rval = EFAULT;
25421			goto done;
25422		}
25423		li.listsize = li32.listsize;
25424		li.list = (mhioc_resv_key_t *)(uintptr_t)li32.list;
25425		break;
25426
25427	case DDI_MODEL_NONE:
25428		copysz = sizeof (mhioc_key_list_t);
25429		if (ddi_copyin(ptr->li, &li, copysz, flag)) {
25430			SD_ERROR(SD_LOG_IOCTL_MHD, un,
25431			    "sd_persistent_reservation_in_read_keys: "
25432			    "failed ddi_copyin: mhioc_key_list_t\n");
25433			rval = EFAULT;
25434			goto done;
25435		}
25436		break;
25437	}
25438
25439#else /* ! _MULTI_DATAMODEL */
25440	copysz = sizeof (mhioc_key_list_t);
25441	if (ddi_copyin(ptr->li, &li, copysz, flag)) {
25442		SD_ERROR(SD_LOG_IOCTL_MHD, un,
25443		    "sd_persistent_reservation_in_read_keys: "
25444		    "failed ddi_copyin: mhioc_key_list_t\n");
25445		rval = EFAULT;
25446		goto done;
25447	}
25448#endif
25449
25450	data_len  = li.listsize * MHIOC_RESV_KEY_SIZE;
25451	data_len += (sizeof (sd_prin_readkeys_t) - sizeof (caddr_t));
25452	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
25453
25454	rval = sd_send_scsi_PERSISTENT_RESERVE_IN(ssc, SD_READ_KEYS,
25455	    data_len, data_bufp);
25456	if (rval != 0) {
25457		if (rval == EIO)
25458			sd_ssc_assessment(ssc, SD_FMT_IGNORE_COMPROMISE);
25459		else
25460			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
25461		goto done;
25462	}
25463	in = (sd_prin_readkeys_t *)data_bufp;
25464	ptr->generation = BE_32(in->generation);
25465	li.listlen = BE_32(in->len) / MHIOC_RESV_KEY_SIZE;
25466
25467	/*
25468	 * Return the min(listsize, listlen) keys
25469	 */
25470#ifdef _MULTI_DATAMODEL
25471
25472	switch (ddi_model_convert_from(flag & FMODELS)) {
25473	case DDI_MODEL_ILP32:
25474		li32.listlen = li.listlen;
25475		if (ddi_copyout(&li32, ptr->li, copysz, flag)) {
25476			SD_ERROR(SD_LOG_IOCTL_MHD, un,
25477			    "sd_persistent_reservation_in_read_keys: "
25478			    "failed ddi_copyout: mhioc_key_list32_t\n");
25479			rval = EFAULT;
25480			goto done;
25481		}
25482		break;
25483
25484	case DDI_MODEL_NONE:
25485		if (ddi_copyout(&li, ptr->li, copysz, flag)) {
25486			SD_ERROR(SD_LOG_IOCTL_MHD, un,
25487			    "sd_persistent_reservation_in_read_keys: "
25488			    "failed ddi_copyout: mhioc_key_list_t\n");
25489			rval = EFAULT;
25490			goto done;
25491		}
25492		break;
25493	}
25494
25495#else /* ! _MULTI_DATAMODEL */
25496
25497	if (ddi_copyout(&li, ptr->li, copysz, flag)) {
25498		SD_ERROR(SD_LOG_IOCTL_MHD, un,
25499		    "sd_persistent_reservation_in_read_keys: "
25500		    "failed ddi_copyout: mhioc_key_list_t\n");
25501		rval = EFAULT;
25502		goto done;
25503	}
25504
25505#endif /* _MULTI_DATAMODEL */
25506
25507	copysz = min(li.listlen * MHIOC_RESV_KEY_SIZE,
25508	    li.listsize * MHIOC_RESV_KEY_SIZE);
25509	if (ddi_copyout(&in->keylist, li.list, copysz, flag)) {
25510		SD_ERROR(SD_LOG_IOCTL_MHD, un,
25511		    "sd_persistent_reservation_in_read_keys: "
25512		    "failed ddi_copyout: keylist\n");
25513		rval = EFAULT;
25514	}
25515done:
25516	sd_ssc_fini(ssc);
25517	kmem_free(data_bufp, data_len);
25518	return (rval);
25519}
25520
25521
25522/*
25523 *    Function: sd_persistent_reservation_in_read_resv
25524 *
25525 * Description: This routine is the driver entry point for handling CD-ROM
25526 *		multi-host persistent reservation requests (MHIOCGRP_INRESV)
25527 *		by sending the SCSI-3 PRIN commands to the device.
25528 *		Process the read persistent reservations command response by
25529 *		copying the reservation information into the user provided
25530 *		buffer. Support for the 32/64 _MULTI_DATAMODEL is implemented.
25531 *
25532 *   Arguments: un   -  Pointer to soft state struct for the target.
25533 *		usrp -	user provided pointer to multihost Persistent In Read
25534 *			Keys structure (mhioc_inkeys_t)
25535 *		flag -	this argument is a pass through to ddi_copyxxx()
25536 *			directly from the mode argument of ioctl().
25537 *
25538 * Return Code: 0   - Success
25539 *		EACCES
25540 *		ENOTSUP
25541 *		errno return code from sd_send_scsi_cmd()
25542 *
25543 *     Context: Can sleep. Does not return until command is completed.
25544 */
25545
25546static int
25547sd_persistent_reservation_in_read_resv(struct sd_lun *un,
25548    mhioc_inresvs_t *usrp, int flag)
25549{
25550#ifdef _MULTI_DATAMODEL
25551	struct mhioc_resv_desc_list32 resvlist32;
25552#endif
25553	sd_prin_readresv_t	*in;
25554	mhioc_inresvs_t		*ptr;
25555	sd_readresv_desc_t	*readresv_ptr;
25556	mhioc_resv_desc_list_t	resvlist;
25557	mhioc_resv_desc_t 	resvdesc;
25558	uchar_t			*data_bufp = NULL;
25559	int 			data_len;
25560	int			rval = 0;
25561	int			i;
25562	size_t			copysz;
25563	mhioc_resv_desc_t	*bufp;
25564	sd_ssc_t		*ssc;
25565
25566	if ((ptr = usrp) == NULL) {
25567		return (EINVAL);
25568	}
25569
25570	ssc = sd_ssc_init(un);
25571
25572	/*
25573	 * Get the listsize from user
25574	 */
25575#ifdef _MULTI_DATAMODEL
25576	switch (ddi_model_convert_from(flag & FMODELS)) {
25577	case DDI_MODEL_ILP32:
25578		copysz = sizeof (struct mhioc_resv_desc_list32);
25579		if (ddi_copyin(ptr->li, &resvlist32, copysz, flag)) {
25580			SD_ERROR(SD_LOG_IOCTL_MHD, un,
25581			    "sd_persistent_reservation_in_read_resv: "
25582			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
25583			rval = EFAULT;
25584			goto done;
25585		}
25586		resvlist.listsize = resvlist32.listsize;
25587		resvlist.list = (mhioc_resv_desc_t *)(uintptr_t)resvlist32.list;
25588		break;
25589
25590	case DDI_MODEL_NONE:
25591		copysz = sizeof (mhioc_resv_desc_list_t);
25592		if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
25593			SD_ERROR(SD_LOG_IOCTL_MHD, un,
25594			    "sd_persistent_reservation_in_read_resv: "
25595			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
25596			rval = EFAULT;
25597			goto done;
25598		}
25599		break;
25600	}
25601#else /* ! _MULTI_DATAMODEL */
25602	copysz = sizeof (mhioc_resv_desc_list_t);
25603	if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
25604		SD_ERROR(SD_LOG_IOCTL_MHD, un,
25605		    "sd_persistent_reservation_in_read_resv: "
25606		    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
25607		rval = EFAULT;
25608		goto done;
25609	}
25610#endif /* ! _MULTI_DATAMODEL */
25611
25612	data_len  = resvlist.listsize * SCSI3_RESV_DESC_LEN;
25613	data_len += (sizeof (sd_prin_readresv_t) - sizeof (caddr_t));
25614	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
25615
25616	rval = sd_send_scsi_PERSISTENT_RESERVE_IN(ssc, SD_READ_RESV,
25617	    data_len, data_bufp);
25618	if (rval != 0) {
25619		if (rval == EIO)
25620			sd_ssc_assessment(ssc, SD_FMT_IGNORE_COMPROMISE);
25621		else
25622			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
25623		goto done;
25624	}
25625	in = (sd_prin_readresv_t *)data_bufp;
25626	ptr->generation = BE_32(in->generation);
25627	resvlist.listlen = BE_32(in->len) / SCSI3_RESV_DESC_LEN;
25628
25629	/*
25630	 * Return the min(listsize, listlen( keys
25631	 */
25632#ifdef _MULTI_DATAMODEL
25633
25634	switch (ddi_model_convert_from(flag & FMODELS)) {
25635	case DDI_MODEL_ILP32:
25636		resvlist32.listlen = resvlist.listlen;
25637		if (ddi_copyout(&resvlist32, ptr->li, copysz, flag)) {
25638			SD_ERROR(SD_LOG_IOCTL_MHD, un,
25639			    "sd_persistent_reservation_in_read_resv: "
25640			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
25641			rval = EFAULT;
25642			goto done;
25643		}
25644		break;
25645
25646	case DDI_MODEL_NONE:
25647		if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
25648			SD_ERROR(SD_LOG_IOCTL_MHD, un,
25649			    "sd_persistent_reservation_in_read_resv: "
25650			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
25651			rval = EFAULT;
25652			goto done;
25653		}
25654		break;
25655	}
25656
25657#else /* ! _MULTI_DATAMODEL */
25658
25659	if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
25660		SD_ERROR(SD_LOG_IOCTL_MHD, un,
25661		    "sd_persistent_reservation_in_read_resv: "
25662		    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
25663		rval = EFAULT;
25664		goto done;
25665	}
25666
25667#endif /* ! _MULTI_DATAMODEL */
25668
25669	readresv_ptr = (sd_readresv_desc_t *)&in->readresv_desc;
25670	bufp = resvlist.list;
25671	copysz = sizeof (mhioc_resv_desc_t);
25672	for (i = 0; i < min(resvlist.listlen, resvlist.listsize);
25673	    i++, readresv_ptr++, bufp++) {
25674
25675		bcopy(&readresv_ptr->resvkey, &resvdesc.key,
25676		    MHIOC_RESV_KEY_SIZE);
25677		resvdesc.type  = readresv_ptr->type;
25678		resvdesc.scope = readresv_ptr->scope;
25679		resvdesc.scope_specific_addr =
25680		    BE_32(readresv_ptr->scope_specific_addr);
25681
25682		if (ddi_copyout(&resvdesc, bufp, copysz, flag)) {
25683			SD_ERROR(SD_LOG_IOCTL_MHD, un,
25684			    "sd_persistent_reservation_in_read_resv: "
25685			    "failed ddi_copyout: resvlist\n");
25686			rval = EFAULT;
25687			goto done;
25688		}
25689	}
25690done:
25691	sd_ssc_fini(ssc);
25692	/* only if data_bufp is allocated, we need to free it */
25693	if (data_bufp) {
25694		kmem_free(data_bufp, data_len);
25695	}
25696	return (rval);
25697}
25698
25699
25700/*
25701 *    Function: sr_change_blkmode()
25702 *
25703 * Description: This routine is the driver entry point for handling CD-ROM
25704 *		block mode ioctl requests. Support for returning and changing
25705 *		the current block size in use by the device is implemented. The
25706 *		LBA size is changed via a MODE SELECT Block Descriptor.
25707 *
25708 *		This routine issues a mode sense with an allocation length of
25709 *		12 bytes for the mode page header and a single block descriptor.
25710 *
25711 *   Arguments: dev - the device 'dev_t'
25712 *		cmd - the request type; one of CDROMGBLKMODE (get) or
25713 *		      CDROMSBLKMODE (set)
25714 *		data - current block size or requested block size
25715 *		flag - this argument is a pass through to ddi_copyxxx() directly
25716 *		       from the mode argument of ioctl().
25717 *
25718 * Return Code: the code returned by sd_send_scsi_cmd()
25719 *		EINVAL if invalid arguments are provided
25720 *		EFAULT if ddi_copyxxx() fails
25721 *		ENXIO if fail ddi_get_soft_state
25722 *		EIO if invalid mode sense block descriptor length
25723 *
25724 */
25725
25726static int
25727sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag)
25728{
25729	struct sd_lun			*un = NULL;
25730	struct mode_header		*sense_mhp, *select_mhp;
25731	struct block_descriptor		*sense_desc, *select_desc;
25732	int				current_bsize;
25733	int				rval = EINVAL;
25734	uchar_t				*sense = NULL;
25735	uchar_t				*select = NULL;
25736	sd_ssc_t			*ssc;
25737
25738	ASSERT((cmd == CDROMGBLKMODE) || (cmd == CDROMSBLKMODE));
25739
25740	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25741		return (ENXIO);
25742	}
25743
25744	/*
25745	 * The block length is changed via the Mode Select block descriptor, the
25746	 * "Read/Write Error Recovery" mode page (0x1) contents are not actually
25747	 * required as part of this routine. Therefore the mode sense allocation
25748	 * length is specified to be the length of a mode page header and a
25749	 * block descriptor.
25750	 */
25751	sense = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
25752
25753	ssc = sd_ssc_init(un);
25754	rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, sense,
25755	    BUFLEN_CHG_BLK_MODE, MODEPAGE_ERR_RECOV, SD_PATH_STANDARD);
25756	sd_ssc_fini(ssc);
25757	if (rval != 0) {
25758		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25759		    "sr_change_blkmode: Mode Sense Failed\n");
25760		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
25761		return (rval);
25762	}
25763
25764	/* Check the block descriptor len to handle only 1 block descriptor */
25765	sense_mhp = (struct mode_header *)sense;
25766	if ((sense_mhp->bdesc_length == 0) ||
25767	    (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH)) {
25768		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25769		    "sr_change_blkmode: Mode Sense returned invalid block"
25770		    " descriptor length\n");
25771		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
25772		return (EIO);
25773	}
25774	sense_desc = (struct block_descriptor *)(sense + MODE_HEADER_LENGTH);
25775	current_bsize = ((sense_desc->blksize_hi << 16) |
25776	    (sense_desc->blksize_mid << 8) | sense_desc->blksize_lo);
25777
25778	/* Process command */
25779	switch (cmd) {
25780	case CDROMGBLKMODE:
25781		/* Return the block size obtained during the mode sense */
25782		if (ddi_copyout(&current_bsize, (void *)data,
25783		    sizeof (int), flag) != 0)
25784			rval = EFAULT;
25785		break;
25786	case CDROMSBLKMODE:
25787		/* Validate the requested block size */
25788		switch (data) {
25789		case CDROM_BLK_512:
25790		case CDROM_BLK_1024:
25791		case CDROM_BLK_2048:
25792		case CDROM_BLK_2056:
25793		case CDROM_BLK_2336:
25794		case CDROM_BLK_2340:
25795		case CDROM_BLK_2352:
25796		case CDROM_BLK_2368:
25797		case CDROM_BLK_2448:
25798		case CDROM_BLK_2646:
25799		case CDROM_BLK_2647:
25800			break;
25801		default:
25802			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25803			    "sr_change_blkmode: "
25804			    "Block Size '%ld' Not Supported\n", data);
25805			kmem_free(sense, BUFLEN_CHG_BLK_MODE);
25806			return (EINVAL);
25807		}
25808
25809		/*
25810		 * The current block size matches the requested block size so
25811		 * there is no need to send the mode select to change the size
25812		 */
25813		if (current_bsize == data) {
25814			break;
25815		}
25816
25817		/* Build the select data for the requested block size */
25818		select = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
25819		select_mhp = (struct mode_header *)select;
25820		select_desc =
25821		    (struct block_descriptor *)(select + MODE_HEADER_LENGTH);
25822		/*
25823		 * The LBA size is changed via the block descriptor, so the
25824		 * descriptor is built according to the user data
25825		 */
25826		select_mhp->bdesc_length = MODE_BLK_DESC_LENGTH;
25827		select_desc->blksize_hi  = (char)(((data) & 0x00ff0000) >> 16);
25828		select_desc->blksize_mid = (char)(((data) & 0x0000ff00) >> 8);
25829		select_desc->blksize_lo  = (char)((data) & 0x000000ff);
25830
25831		/* Send the mode select for the requested block size */
25832		ssc = sd_ssc_init(un);
25833		rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0,
25834		    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
25835		    SD_PATH_STANDARD);
25836		sd_ssc_fini(ssc);
25837		if (rval != 0) {
25838			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25839			    "sr_change_blkmode: Mode Select Failed\n");
25840			/*
25841			 * The mode select failed for the requested block size,
25842			 * so reset the data for the original block size and
25843			 * send it to the target. The error is indicated by the
25844			 * return value for the failed mode select.
25845			 */
25846			select_desc->blksize_hi  = sense_desc->blksize_hi;
25847			select_desc->blksize_mid = sense_desc->blksize_mid;
25848			select_desc->blksize_lo  = sense_desc->blksize_lo;
25849			ssc = sd_ssc_init(un);
25850			(void) sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0,
25851			    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
25852			    SD_PATH_STANDARD);
25853			sd_ssc_fini(ssc);
25854		} else {
25855			ASSERT(!mutex_owned(SD_MUTEX(un)));
25856			mutex_enter(SD_MUTEX(un));
25857			sd_update_block_info(un, (uint32_t)data, 0);
25858			mutex_exit(SD_MUTEX(un));
25859		}
25860		break;
25861	default:
25862		/* should not reach here, but check anyway */
25863		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25864		    "sr_change_blkmode: Command '%x' Not Supported\n", cmd);
25865		rval = EINVAL;
25866		break;
25867	}
25868
25869	if (select) {
25870		kmem_free(select, BUFLEN_CHG_BLK_MODE);
25871	}
25872	if (sense) {
25873		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
25874	}
25875	return (rval);
25876}
25877
25878
25879/*
25880 * Note: The following sr_change_speed() and sr_atapi_change_speed() routines
25881 * implement driver support for getting and setting the CD speed. The command
25882 * set used will be based on the device type. If the device has not been
25883 * identified as MMC the Toshiba vendor specific mode page will be used. If
25884 * the device is MMC but does not support the Real Time Streaming feature
25885 * the SET CD SPEED command will be used to set speed and mode page 0x2A will
25886 * be used to read the speed.
25887 */
25888
25889/*
25890 *    Function: sr_change_speed()
25891 *
25892 * Description: This routine is the driver entry point for handling CD-ROM
25893 *		drive speed ioctl requests for devices supporting the Toshiba
25894 *		vendor specific drive speed mode page. Support for returning
25895 *		and changing the current drive speed in use by the device is
25896 *		implemented.
25897 *
25898 *   Arguments: dev - the device 'dev_t'
25899 *		cmd - the request type; one of CDROMGDRVSPEED (get) or
25900 *		      CDROMSDRVSPEED (set)
25901 *		data - current drive speed or requested drive speed
25902 *		flag - this argument is a pass through to ddi_copyxxx() directly
25903 *		       from the mode argument of ioctl().
25904 *
25905 * Return Code: the code returned by sd_send_scsi_cmd()
25906 *		EINVAL if invalid arguments are provided
25907 *		EFAULT if ddi_copyxxx() fails
25908 *		ENXIO if fail ddi_get_soft_state
25909 *		EIO if invalid mode sense block descriptor length
25910 */
25911
25912static int
25913sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
25914{
25915	struct sd_lun			*un = NULL;
25916	struct mode_header		*sense_mhp, *select_mhp;
25917	struct mode_speed		*sense_page, *select_page;
25918	int				current_speed;
25919	int				rval = EINVAL;
25920	int				bd_len;
25921	uchar_t				*sense = NULL;
25922	uchar_t				*select = NULL;
25923	sd_ssc_t			*ssc;
25924
25925	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
25926	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25927		return (ENXIO);
25928	}
25929
25930	/*
25931	 * Note: The drive speed is being modified here according to a Toshiba
25932	 * vendor specific mode page (0x31).
25933	 */
25934	sense = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
25935
25936	ssc = sd_ssc_init(un);
25937	rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, sense,
25938	    BUFLEN_MODE_CDROM_SPEED, CDROM_MODE_SPEED,
25939	    SD_PATH_STANDARD);
25940	sd_ssc_fini(ssc);
25941	if (rval != 0) {
25942		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25943		    "sr_change_speed: Mode Sense Failed\n");
25944		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
25945		return (rval);
25946	}
25947	sense_mhp  = (struct mode_header *)sense;
25948
25949	/* Check the block descriptor len to handle only 1 block descriptor */
25950	bd_len = sense_mhp->bdesc_length;
25951	if (bd_len > MODE_BLK_DESC_LENGTH) {
25952		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25953		    "sr_change_speed: Mode Sense returned invalid block "
25954		    "descriptor length\n");
25955		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
25956		return (EIO);
25957	}
25958
25959	sense_page = (struct mode_speed *)
25960	    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
25961	current_speed = sense_page->speed;
25962
25963	/* Process command */
25964	switch (cmd) {
25965	case CDROMGDRVSPEED:
25966		/* Return the drive speed obtained during the mode sense */
25967		if (current_speed == 0x2) {
25968			current_speed = CDROM_TWELVE_SPEED;
25969		}
25970		if (ddi_copyout(&current_speed, (void *)data,
25971		    sizeof (int), flag) != 0) {
25972			rval = EFAULT;
25973		}
25974		break;
25975	case CDROMSDRVSPEED:
25976		/* Validate the requested drive speed */
25977		switch ((uchar_t)data) {
25978		case CDROM_TWELVE_SPEED:
25979			data = 0x2;
25980			/*FALLTHROUGH*/
25981		case CDROM_NORMAL_SPEED:
25982		case CDROM_DOUBLE_SPEED:
25983		case CDROM_QUAD_SPEED:
25984		case CDROM_MAXIMUM_SPEED:
25985			break;
25986		default:
25987			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25988			    "sr_change_speed: "
25989			    "Drive Speed '%d' Not Supported\n", (uchar_t)data);
25990			kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
25991			return (EINVAL);
25992		}
25993
25994		/*
25995		 * The current drive speed matches the requested drive speed so
25996		 * there is no need to send the mode select to change the speed
25997		 */
25998		if (current_speed == data) {
25999			break;
26000		}
26001
26002		/* Build the select data for the requested drive speed */
26003		select = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
26004		select_mhp = (struct mode_header *)select;
26005		select_mhp->bdesc_length = 0;
26006		select_page =
26007		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
26008		select_page =
26009		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
26010		select_page->mode_page.code = CDROM_MODE_SPEED;
26011		select_page->mode_page.length = 2;
26012		select_page->speed = (uchar_t)data;
26013
26014		/* Send the mode select for the requested block size */
26015		ssc = sd_ssc_init(un);
26016		rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0, select,
26017		    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
26018		    SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
26019		sd_ssc_fini(ssc);
26020		if (rval != 0) {
26021			/*
26022			 * The mode select failed for the requested drive speed,
26023			 * so reset the data for the original drive speed and
26024			 * send it to the target. The error is indicated by the
26025			 * return value for the failed mode select.
26026			 */
26027			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26028			    "sr_drive_speed: Mode Select Failed\n");
26029			select_page->speed = sense_page->speed;
26030			ssc = sd_ssc_init(un);
26031			(void) sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0, select,
26032			    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
26033			    SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
26034			sd_ssc_fini(ssc);
26035		}
26036		break;
26037	default:
26038		/* should not reach here, but check anyway */
26039		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26040		    "sr_change_speed: Command '%x' Not Supported\n", cmd);
26041		rval = EINVAL;
26042		break;
26043	}
26044
26045	if (select) {
26046		kmem_free(select, BUFLEN_MODE_CDROM_SPEED);
26047	}
26048	if (sense) {
26049		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
26050	}
26051
26052	return (rval);
26053}
26054
26055
26056/*
26057 *    Function: sr_atapi_change_speed()
26058 *
26059 * Description: This routine is the driver entry point for handling CD-ROM
26060 *		drive speed ioctl requests for MMC devices that do not support
26061 *		the Real Time Streaming feature (0x107).
26062 *
26063 *		Note: This routine will use the SET SPEED command which may not
26064 *		be supported by all devices.
26065 *
26066 *   Arguments: dev- the device 'dev_t'
26067 *		cmd- the request type; one of CDROMGDRVSPEED (get) or
26068 *		     CDROMSDRVSPEED (set)
26069 *		data- current drive speed or requested drive speed
26070 *		flag- this argument is a pass through to ddi_copyxxx() directly
26071 *		      from the mode argument of ioctl().
26072 *
26073 * Return Code: the code returned by sd_send_scsi_cmd()
26074 *		EINVAL if invalid arguments are provided
26075 *		EFAULT if ddi_copyxxx() fails
26076 *		ENXIO if fail ddi_get_soft_state
26077 *		EIO if invalid mode sense block descriptor length
26078 */
26079
26080static int
26081sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
26082{
26083	struct sd_lun			*un;
26084	struct uscsi_cmd		*com = NULL;
26085	struct mode_header_grp2		*sense_mhp;
26086	uchar_t				*sense_page;
26087	uchar_t				*sense = NULL;
26088	char				cdb[CDB_GROUP5];
26089	int				bd_len;
26090	int				current_speed = 0;
26091	int				max_speed = 0;
26092	int				rval;
26093	sd_ssc_t			*ssc;
26094
26095	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
26096
26097	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
26098		return (ENXIO);
26099	}
26100
26101	sense = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
26102
26103	ssc = sd_ssc_init(un);
26104	rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, sense,
26105	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP,
26106	    SD_PATH_STANDARD);
26107	sd_ssc_fini(ssc);
26108	if (rval != 0) {
26109		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26110		    "sr_atapi_change_speed: Mode Sense Failed\n");
26111		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
26112		return (rval);
26113	}
26114
26115	/* Check the block descriptor len to handle only 1 block descriptor */
26116	sense_mhp = (struct mode_header_grp2 *)sense;
26117	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
26118	if (bd_len > MODE_BLK_DESC_LENGTH) {
26119		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26120		    "sr_atapi_change_speed: Mode Sense returned invalid "
26121		    "block descriptor length\n");
26122		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
26123		return (EIO);
26124	}
26125
26126	/* Calculate the current and maximum drive speeds */
26127	sense_page = (uchar_t *)(sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
26128	current_speed = (sense_page[14] << 8) | sense_page[15];
26129	max_speed = (sense_page[8] << 8) | sense_page[9];
26130
26131	/* Process the command */
26132	switch (cmd) {
26133	case CDROMGDRVSPEED:
26134		current_speed /= SD_SPEED_1X;
26135		if (ddi_copyout(&current_speed, (void *)data,
26136		    sizeof (int), flag) != 0)
26137			rval = EFAULT;
26138		break;
26139	case CDROMSDRVSPEED:
26140		/* Convert the speed code to KB/sec */
26141		switch ((uchar_t)data) {
26142		case CDROM_NORMAL_SPEED:
26143			current_speed = SD_SPEED_1X;
26144			break;
26145		case CDROM_DOUBLE_SPEED:
26146			current_speed = 2 * SD_SPEED_1X;
26147			break;
26148		case CDROM_QUAD_SPEED:
26149			current_speed = 4 * SD_SPEED_1X;
26150			break;
26151		case CDROM_TWELVE_SPEED:
26152			current_speed = 12 * SD_SPEED_1X;
26153			break;
26154		case CDROM_MAXIMUM_SPEED:
26155			current_speed = 0xffff;
26156			break;
26157		default:
26158			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26159			    "sr_atapi_change_speed: invalid drive speed %d\n",
26160			    (uchar_t)data);
26161			kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
26162			return (EINVAL);
26163		}
26164
26165		/* Check the request against the drive's max speed. */
26166		if (current_speed != 0xffff) {
26167			if (current_speed > max_speed) {
26168				kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
26169				return (EINVAL);
26170			}
26171		}
26172
26173		/*
26174		 * Build and send the SET SPEED command
26175		 *
26176		 * Note: The SET SPEED (0xBB) command used in this routine is
26177		 * obsolete per the SCSI MMC spec but still supported in the
26178		 * MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
26179		 * therefore the command is still implemented in this routine.
26180		 */
26181		bzero(cdb, sizeof (cdb));
26182		cdb[0] = (char)SCMD_SET_CDROM_SPEED;
26183		cdb[2] = (uchar_t)(current_speed >> 8);
26184		cdb[3] = (uchar_t)current_speed;
26185		com = kmem_zalloc(sizeof (*com), KM_SLEEP);
26186		com->uscsi_cdb	   = (caddr_t)cdb;
26187		com->uscsi_cdblen  = CDB_GROUP5;
26188		com->uscsi_bufaddr = NULL;
26189		com->uscsi_buflen  = 0;
26190		com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT;
26191		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, 0, SD_PATH_STANDARD);
26192		break;
26193	default:
26194		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26195		    "sr_atapi_change_speed: Command '%x' Not Supported\n", cmd);
26196		rval = EINVAL;
26197	}
26198
26199	if (sense) {
26200		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
26201	}
26202	if (com) {
26203		kmem_free(com, sizeof (*com));
26204	}
26205	return (rval);
26206}
26207
26208
26209/*
26210 *    Function: sr_pause_resume()
26211 *
26212 * Description: This routine is the driver entry point for handling CD-ROM
26213 *		pause/resume ioctl requests. This only affects the audio play
26214 *		operation.
26215 *
26216 *   Arguments: dev - the device 'dev_t'
26217 *		cmd - the request type; one of CDROMPAUSE or CDROMRESUME, used
26218 *		      for setting the resume bit of the cdb.
26219 *
26220 * Return Code: the code returned by sd_send_scsi_cmd()
26221 *		EINVAL if invalid mode specified
26222 *
26223 */
26224
26225static int
26226sr_pause_resume(dev_t dev, int cmd)
26227{
26228	struct sd_lun		*un;
26229	struct uscsi_cmd	*com;
26230	char			cdb[CDB_GROUP1];
26231	int			rval;
26232
26233	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
26234		return (ENXIO);
26235	}
26236
26237	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
26238	bzero(cdb, CDB_GROUP1);
26239	cdb[0] = SCMD_PAUSE_RESUME;
26240	switch (cmd) {
26241	case CDROMRESUME:
26242		cdb[8] = 1;
26243		break;
26244	case CDROMPAUSE:
26245		cdb[8] = 0;
26246		break;
26247	default:
26248		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_pause_resume:"
26249		    " Command '%x' Not Supported\n", cmd);
26250		rval = EINVAL;
26251		goto done;
26252	}
26253
26254	com->uscsi_cdb    = cdb;
26255	com->uscsi_cdblen = CDB_GROUP1;
26256	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
26257
26258	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
26259	    SD_PATH_STANDARD);
26260
26261done:
26262	kmem_free(com, sizeof (*com));
26263	return (rval);
26264}
26265
26266
26267/*
26268 *    Function: sr_play_msf()
26269 *
26270 * Description: This routine is the driver entry point for handling CD-ROM
26271 *		ioctl requests to output the audio signals at the specified
26272 *		starting address and continue the audio play until the specified
26273 *		ending address (CDROMPLAYMSF) The address is in Minute Second
26274 *		Frame (MSF) format.
26275 *
26276 *   Arguments: dev	- the device 'dev_t'
26277 *		data	- pointer to user provided audio msf structure,
26278 *		          specifying start/end addresses.
26279 *		flag	- this argument is a pass through to ddi_copyxxx()
26280 *		          directly from the mode argument of ioctl().
26281 *
26282 * Return Code: the code returned by sd_send_scsi_cmd()
26283 *		EFAULT if ddi_copyxxx() fails
26284 *		ENXIO if fail ddi_get_soft_state
26285 *		EINVAL if data pointer is NULL
26286 */
26287
26288static int
26289sr_play_msf(dev_t dev, caddr_t data, int flag)
26290{
26291	struct sd_lun		*un;
26292	struct uscsi_cmd	*com;
26293	struct cdrom_msf	msf_struct;
26294	struct cdrom_msf	*msf = &msf_struct;
26295	char			cdb[CDB_GROUP1];
26296	int			rval;
26297
26298	if (data == NULL) {
26299		return (EINVAL);
26300	}
26301
26302	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
26303		return (ENXIO);
26304	}
26305
26306	if (ddi_copyin(data, msf, sizeof (struct cdrom_msf), flag)) {
26307		return (EFAULT);
26308	}
26309
26310	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
26311	bzero(cdb, CDB_GROUP1);
26312	cdb[0] = SCMD_PLAYAUDIO_MSF;
26313	if (un->un_f_cfg_playmsf_bcd == TRUE) {
26314		cdb[3] = BYTE_TO_BCD(msf->cdmsf_min0);
26315		cdb[4] = BYTE_TO_BCD(msf->cdmsf_sec0);
26316		cdb[5] = BYTE_TO_BCD(msf->cdmsf_frame0);
26317		cdb[6] = BYTE_TO_BCD(msf->cdmsf_min1);
26318		cdb[7] = BYTE_TO_BCD(msf->cdmsf_sec1);
26319		cdb[8] = BYTE_TO_BCD(msf->cdmsf_frame1);
26320	} else {
26321		cdb[3] = msf->cdmsf_min0;
26322		cdb[4] = msf->cdmsf_sec0;
26323		cdb[5] = msf->cdmsf_frame0;
26324		cdb[6] = msf->cdmsf_min1;
26325		cdb[7] = msf->cdmsf_sec1;
26326		cdb[8] = msf->cdmsf_frame1;
26327	}
26328	com->uscsi_cdb    = cdb;
26329	com->uscsi_cdblen = CDB_GROUP1;
26330	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
26331	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
26332	    SD_PATH_STANDARD);
26333	kmem_free(com, sizeof (*com));
26334	return (rval);
26335}
26336
26337
26338/*
26339 *    Function: sr_play_trkind()
26340 *
26341 * Description: This routine is the driver entry point for handling CD-ROM
26342 *		ioctl requests to output the audio signals at the specified
26343 *		starting address and continue the audio play until the specified
26344 *		ending address (CDROMPLAYTRKIND). The address is in Track Index
26345 *		format.
26346 *
26347 *   Arguments: dev	- the device 'dev_t'
26348 *		data	- pointer to user provided audio track/index structure,
26349 *		          specifying start/end addresses.
26350 *		flag	- this argument is a pass through to ddi_copyxxx()
26351 *		          directly from the mode argument of ioctl().
26352 *
26353 * Return Code: the code returned by sd_send_scsi_cmd()
26354 *		EFAULT if ddi_copyxxx() fails
26355 *		ENXIO if fail ddi_get_soft_state
26356 *		EINVAL if data pointer is NULL
26357 */
26358
26359static int
26360sr_play_trkind(dev_t dev, caddr_t data, int flag)
26361{
26362	struct cdrom_ti		ti_struct;
26363	struct cdrom_ti		*ti = &ti_struct;
26364	struct uscsi_cmd	*com = NULL;
26365	char			cdb[CDB_GROUP1];
26366	int			rval;
26367
26368	if (data == NULL) {
26369		return (EINVAL);
26370	}
26371
26372	if (ddi_copyin(data, ti, sizeof (struct cdrom_ti), flag)) {
26373		return (EFAULT);
26374	}
26375
26376	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
26377	bzero(cdb, CDB_GROUP1);
26378	cdb[0] = SCMD_PLAYAUDIO_TI;
26379	cdb[4] = ti->cdti_trk0;
26380	cdb[5] = ti->cdti_ind0;
26381	cdb[7] = ti->cdti_trk1;
26382	cdb[8] = ti->cdti_ind1;
26383	com->uscsi_cdb    = cdb;
26384	com->uscsi_cdblen = CDB_GROUP1;
26385	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
26386	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
26387	    SD_PATH_STANDARD);
26388	kmem_free(com, sizeof (*com));
26389	return (rval);
26390}
26391
26392
26393/*
26394 *    Function: sr_read_all_subcodes()
26395 *
26396 * Description: This routine is the driver entry point for handling CD-ROM
26397 *		ioctl requests to return raw subcode data while the target is
26398 *		playing audio (CDROMSUBCODE).
26399 *
26400 *   Arguments: dev	- the device 'dev_t'
26401 *		data	- pointer to user provided cdrom subcode structure,
26402 *		          specifying the transfer length and address.
26403 *		flag	- this argument is a pass through to ddi_copyxxx()
26404 *		          directly from the mode argument of ioctl().
26405 *
26406 * Return Code: the code returned by sd_send_scsi_cmd()
26407 *		EFAULT if ddi_copyxxx() fails
26408 *		ENXIO if fail ddi_get_soft_state
26409 *		EINVAL if data pointer is NULL
26410 */
26411
26412static int
26413sr_read_all_subcodes(dev_t dev, caddr_t data, int flag)
26414{
26415	struct sd_lun		*un = NULL;
26416	struct uscsi_cmd	*com = NULL;
26417	struct cdrom_subcode	*subcode = NULL;
26418	int			rval;
26419	size_t			buflen;
26420	char			cdb[CDB_GROUP5];
26421
26422#ifdef _MULTI_DATAMODEL
26423	/* To support ILP32 applications in an LP64 world */
26424	struct cdrom_subcode32		cdrom_subcode32;
26425	struct cdrom_subcode32		*cdsc32 = &cdrom_subcode32;
26426#endif
26427	if (data == NULL) {
26428		return (EINVAL);
26429	}
26430
26431	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
26432		return (ENXIO);
26433	}
26434
26435	subcode = kmem_zalloc(sizeof (struct cdrom_subcode), KM_SLEEP);
26436
26437#ifdef _MULTI_DATAMODEL
26438	switch (ddi_model_convert_from(flag & FMODELS)) {
26439	case DDI_MODEL_ILP32:
26440		if (ddi_copyin(data, cdsc32, sizeof (*cdsc32), flag)) {
26441			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26442			    "sr_read_all_subcodes: ddi_copyin Failed\n");
26443			kmem_free(subcode, sizeof (struct cdrom_subcode));
26444			return (EFAULT);
26445		}
26446		/* Convert the ILP32 uscsi data from the application to LP64 */
26447		cdrom_subcode32tocdrom_subcode(cdsc32, subcode);
26448		break;
26449	case DDI_MODEL_NONE:
26450		if (ddi_copyin(data, subcode,
26451		    sizeof (struct cdrom_subcode), flag)) {
26452			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26453			    "sr_read_all_subcodes: ddi_copyin Failed\n");
26454			kmem_free(subcode, sizeof (struct cdrom_subcode));
26455			return (EFAULT);
26456		}
26457		break;
26458	}
26459#else /* ! _MULTI_DATAMODEL */
26460	if (ddi_copyin(data, subcode, sizeof (struct cdrom_subcode), flag)) {
26461		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26462		    "sr_read_all_subcodes: ddi_copyin Failed\n");
26463		kmem_free(subcode, sizeof (struct cdrom_subcode));
26464		return (EFAULT);
26465	}
26466#endif /* _MULTI_DATAMODEL */
26467
26468	/*
26469	 * Since MMC-2 expects max 3 bytes for length, check if the
26470	 * length input is greater than 3 bytes
26471	 */
26472	if ((subcode->cdsc_length & 0xFF000000) != 0) {
26473		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26474		    "sr_read_all_subcodes: "
26475		    "cdrom transfer length too large: %d (limit %d)\n",
26476		    subcode->cdsc_length, 0xFFFFFF);
26477		kmem_free(subcode, sizeof (struct cdrom_subcode));
26478		return (EINVAL);
26479	}
26480
26481	buflen = CDROM_BLK_SUBCODE * subcode->cdsc_length;
26482	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
26483	bzero(cdb, CDB_GROUP5);
26484
26485	if (un->un_f_mmc_cap == TRUE) {
26486		cdb[0] = (char)SCMD_READ_CD;
26487		cdb[2] = (char)0xff;
26488		cdb[3] = (char)0xff;
26489		cdb[4] = (char)0xff;
26490		cdb[5] = (char)0xff;
26491		cdb[6] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
26492		cdb[7] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
26493		cdb[8] = ((subcode->cdsc_length) & 0x000000ff);
26494		cdb[10] = 1;
26495	} else {
26496		/*
26497		 * Note: A vendor specific command (0xDF) is being used her to
26498		 * request a read of all subcodes.
26499		 */
26500		cdb[0] = (char)SCMD_READ_ALL_SUBCODES;
26501		cdb[6] = (((subcode->cdsc_length) & 0xff000000) >> 24);
26502		cdb[7] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
26503		cdb[8] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
26504		cdb[9] = ((subcode->cdsc_length) & 0x000000ff);
26505	}
26506	com->uscsi_cdb	   = cdb;
26507	com->uscsi_cdblen  = CDB_GROUP5;
26508	com->uscsi_bufaddr = (caddr_t)subcode->cdsc_addr;
26509	com->uscsi_buflen  = buflen;
26510	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
26511	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
26512	    SD_PATH_STANDARD);
26513	kmem_free(subcode, sizeof (struct cdrom_subcode));
26514	kmem_free(com, sizeof (*com));
26515	return (rval);
26516}
26517
26518
26519/*
26520 *    Function: sr_read_subchannel()
26521 *
26522 * Description: This routine is the driver entry point for handling CD-ROM
26523 *		ioctl requests to return the Q sub-channel data of the CD
26524 *		current position block. (CDROMSUBCHNL) The data includes the
26525 *		track number, index number, absolute CD-ROM address (LBA or MSF
26526 *		format per the user) , track relative CD-ROM address (LBA or MSF
26527 *		format per the user), control data and audio status.
26528 *
26529 *   Arguments: dev	- the device 'dev_t'
26530 *		data	- pointer to user provided cdrom sub-channel structure
26531 *		flag	- this argument is a pass through to ddi_copyxxx()
26532 *		          directly from the mode argument of ioctl().
26533 *
26534 * Return Code: the code returned by sd_send_scsi_cmd()
26535 *		EFAULT if ddi_copyxxx() fails
26536 *		ENXIO if fail ddi_get_soft_state
26537 *		EINVAL if data pointer is NULL
26538 */
26539
26540static int
26541sr_read_subchannel(dev_t dev, caddr_t data, int flag)
26542{
26543	struct sd_lun		*un;
26544	struct uscsi_cmd	*com;
26545	struct cdrom_subchnl	subchanel;
26546	struct cdrom_subchnl	*subchnl = &subchanel;
26547	char			cdb[CDB_GROUP1];
26548	caddr_t			buffer;
26549	int			rval;
26550
26551	if (data == NULL) {
26552		return (EINVAL);
26553	}
26554
26555	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
26556	    (un->un_state == SD_STATE_OFFLINE)) {
26557		return (ENXIO);
26558	}
26559
26560	if (ddi_copyin(data, subchnl, sizeof (struct cdrom_subchnl), flag)) {
26561		return (EFAULT);
26562	}
26563
26564	buffer = kmem_zalloc((size_t)16, KM_SLEEP);
26565	bzero(cdb, CDB_GROUP1);
26566	cdb[0] = SCMD_READ_SUBCHANNEL;
26567	/* Set the MSF bit based on the user requested address format */
26568	cdb[1] = (subchnl->cdsc_format & CDROM_LBA) ? 0 : 0x02;
26569	/*
26570	 * Set the Q bit in byte 2 to indicate that Q sub-channel data be
26571	 * returned
26572	 */
26573	cdb[2] = 0x40;
26574	/*
26575	 * Set byte 3 to specify the return data format. A value of 0x01
26576	 * indicates that the CD-ROM current position should be returned.
26577	 */
26578	cdb[3] = 0x01;
26579	cdb[8] = 0x10;
26580	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
26581	com->uscsi_cdb	   = cdb;
26582	com->uscsi_cdblen  = CDB_GROUP1;
26583	com->uscsi_bufaddr = buffer;
26584	com->uscsi_buflen  = 16;
26585	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
26586	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
26587	    SD_PATH_STANDARD);
26588	if (rval != 0) {
26589		kmem_free(buffer, 16);
26590		kmem_free(com, sizeof (*com));
26591		return (rval);
26592	}
26593
26594	/* Process the returned Q sub-channel data */
26595	subchnl->cdsc_audiostatus = buffer[1];
26596	subchnl->cdsc_adr	= (buffer[5] & 0xF0);
26597	subchnl->cdsc_ctrl	= (buffer[5] & 0x0F);
26598	subchnl->cdsc_trk	= buffer[6];
26599	subchnl->cdsc_ind	= buffer[7];
26600	if (subchnl->cdsc_format & CDROM_LBA) {
26601		subchnl->cdsc_absaddr.lba =
26602		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
26603		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
26604		subchnl->cdsc_reladdr.lba =
26605		    ((uchar_t)buffer[12] << 24) + ((uchar_t)buffer[13] << 16) +
26606		    ((uchar_t)buffer[14] << 8) + ((uchar_t)buffer[15]);
26607	} else if (un->un_f_cfg_readsub_bcd == TRUE) {
26608		subchnl->cdsc_absaddr.msf.minute = BCD_TO_BYTE(buffer[9]);
26609		subchnl->cdsc_absaddr.msf.second = BCD_TO_BYTE(buffer[10]);
26610		subchnl->cdsc_absaddr.msf.frame  = BCD_TO_BYTE(buffer[11]);
26611		subchnl->cdsc_reladdr.msf.minute = BCD_TO_BYTE(buffer[13]);
26612		subchnl->cdsc_reladdr.msf.second = BCD_TO_BYTE(buffer[14]);
26613		subchnl->cdsc_reladdr.msf.frame  = BCD_TO_BYTE(buffer[15]);
26614	} else {
26615		subchnl->cdsc_absaddr.msf.minute = buffer[9];
26616		subchnl->cdsc_absaddr.msf.second = buffer[10];
26617		subchnl->cdsc_absaddr.msf.frame  = buffer[11];
26618		subchnl->cdsc_reladdr.msf.minute = buffer[13];
26619		subchnl->cdsc_reladdr.msf.second = buffer[14];
26620		subchnl->cdsc_reladdr.msf.frame  = buffer[15];
26621	}
26622	kmem_free(buffer, 16);
26623	kmem_free(com, sizeof (*com));
26624	if (ddi_copyout(subchnl, data, sizeof (struct cdrom_subchnl), flag)
26625	    != 0) {
26626		return (EFAULT);
26627	}
26628	return (rval);
26629}
26630
26631
26632/*
26633 *    Function: sr_read_tocentry()
26634 *
26635 * Description: This routine is the driver entry point for handling CD-ROM
26636 *		ioctl requests to read from the Table of Contents (TOC)
26637 *		(CDROMREADTOCENTRY). This routine provides the ADR and CTRL
26638 *		fields, the starting address (LBA or MSF format per the user)
26639 *		and the data mode if the user specified track is a data track.
26640 *
26641 *		Note: The READ HEADER (0x44) command used in this routine is
26642 *		obsolete per the SCSI MMC spec but still supported in the
26643 *		MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
26644 *		therefore the command is still implemented in this routine.
26645 *
26646 *   Arguments: dev	- the device 'dev_t'
26647 *		data	- pointer to user provided toc entry structure,
26648 *			  specifying the track # and the address format
26649 *			  (LBA or MSF).
26650 *		flag	- this argument is a pass through to ddi_copyxxx()
26651 *		          directly from the mode argument of ioctl().
26652 *
26653 * Return Code: the code returned by sd_send_scsi_cmd()
26654 *		EFAULT if ddi_copyxxx() fails
26655 *		ENXIO if fail ddi_get_soft_state
26656 *		EINVAL if data pointer is NULL
26657 */
26658
26659static int
26660sr_read_tocentry(dev_t dev, caddr_t data, int flag)
26661{
26662	struct sd_lun		*un = NULL;
26663	struct uscsi_cmd	*com;
26664	struct cdrom_tocentry	toc_entry;
26665	struct cdrom_tocentry	*entry = &toc_entry;
26666	caddr_t			buffer;
26667	int			rval;
26668	char			cdb[CDB_GROUP1];
26669
26670	if (data == NULL) {
26671		return (EINVAL);
26672	}
26673
26674	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
26675	    (un->un_state == SD_STATE_OFFLINE)) {
26676		return (ENXIO);
26677	}
26678
26679	if (ddi_copyin(data, entry, sizeof (struct cdrom_tocentry), flag)) {
26680		return (EFAULT);
26681	}
26682
26683	/* Validate the requested track and address format */
26684	if (!(entry->cdte_format & (CDROM_LBA | CDROM_MSF))) {
26685		return (EINVAL);
26686	}
26687
26688	if (entry->cdte_track == 0) {
26689		return (EINVAL);
26690	}
26691
26692	buffer = kmem_zalloc((size_t)12, KM_SLEEP);
26693	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
26694	bzero(cdb, CDB_GROUP1);
26695
26696	cdb[0] = SCMD_READ_TOC;
26697	/* Set the MSF bit based on the user requested address format  */
26698	cdb[1] = ((entry->cdte_format & CDROM_LBA) ? 0 : 2);
26699	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
26700		cdb[6] = BYTE_TO_BCD(entry->cdte_track);
26701	} else {
26702		cdb[6] = entry->cdte_track;
26703	}
26704
26705	/*
26706	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
26707	 * (4 byte TOC response header + 8 byte track descriptor)
26708	 */
26709	cdb[8] = 12;
26710	com->uscsi_cdb	   = cdb;
26711	com->uscsi_cdblen  = CDB_GROUP1;
26712	com->uscsi_bufaddr = buffer;
26713	com->uscsi_buflen  = 0x0C;
26714	com->uscsi_flags   = (USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ);
26715	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
26716	    SD_PATH_STANDARD);
26717	if (rval != 0) {
26718		kmem_free(buffer, 12);
26719		kmem_free(com, sizeof (*com));
26720		return (rval);
26721	}
26722
26723	/* Process the toc entry */
26724	entry->cdte_adr		= (buffer[5] & 0xF0) >> 4;
26725	entry->cdte_ctrl	= (buffer[5] & 0x0F);
26726	if (entry->cdte_format & CDROM_LBA) {
26727		entry->cdte_addr.lba =
26728		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
26729		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
26730	} else if (un->un_f_cfg_read_toc_addr_bcd == TRUE) {
26731		entry->cdte_addr.msf.minute	= BCD_TO_BYTE(buffer[9]);
26732		entry->cdte_addr.msf.second	= BCD_TO_BYTE(buffer[10]);
26733		entry->cdte_addr.msf.frame	= BCD_TO_BYTE(buffer[11]);
26734		/*
26735		 * Send a READ TOC command using the LBA address format to get
26736		 * the LBA for the track requested so it can be used in the
26737		 * READ HEADER request
26738		 *
26739		 * Note: The MSF bit of the READ HEADER command specifies the
26740		 * output format. The block address specified in that command
26741		 * must be in LBA format.
26742		 */
26743		cdb[1] = 0;
26744		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
26745		    SD_PATH_STANDARD);
26746		if (rval != 0) {
26747			kmem_free(buffer, 12);
26748			kmem_free(com, sizeof (*com));
26749			return (rval);
26750		}
26751	} else {
26752		entry->cdte_addr.msf.minute	= buffer[9];
26753		entry->cdte_addr.msf.second	= buffer[10];
26754		entry->cdte_addr.msf.frame	= buffer[11];
26755		/*
26756		 * Send a READ TOC command using the LBA address format to get
26757		 * the LBA for the track requested so it can be used in the
26758		 * READ HEADER request
26759		 *
26760		 * Note: The MSF bit of the READ HEADER command specifies the
26761		 * output format. The block address specified in that command
26762		 * must be in LBA format.
26763		 */
26764		cdb[1] = 0;
26765		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
26766		    SD_PATH_STANDARD);
26767		if (rval != 0) {
26768			kmem_free(buffer, 12);
26769			kmem_free(com, sizeof (*com));
26770			return (rval);
26771		}
26772	}
26773
26774	/*
26775	 * Build and send the READ HEADER command to determine the data mode of
26776	 * the user specified track.
26777	 */
26778	if ((entry->cdte_ctrl & CDROM_DATA_TRACK) &&
26779	    (entry->cdte_track != CDROM_LEADOUT)) {
26780		bzero(cdb, CDB_GROUP1);
26781		cdb[0] = SCMD_READ_HEADER;
26782		cdb[2] = buffer[8];
26783		cdb[3] = buffer[9];
26784		cdb[4] = buffer[10];
26785		cdb[5] = buffer[11];
26786		cdb[8] = 0x08;
26787		com->uscsi_buflen = 0x08;
26788		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
26789		    SD_PATH_STANDARD);
26790		if (rval == 0) {
26791			entry->cdte_datamode = buffer[0];
26792		} else {
26793			/*
26794			 * READ HEADER command failed, since this is
26795			 * obsoleted in one spec, its better to return
26796			 * -1 for an invlid track so that we can still
26797			 * receive the rest of the TOC data.
26798			 */
26799			entry->cdte_datamode = (uchar_t)-1;
26800		}
26801	} else {
26802		entry->cdte_datamode = (uchar_t)-1;
26803	}
26804
26805	kmem_free(buffer, 12);
26806	kmem_free(com, sizeof (*com));
26807	if (ddi_copyout(entry, data, sizeof (struct cdrom_tocentry), flag) != 0)
26808		return (EFAULT);
26809
26810	return (rval);
26811}
26812
26813
26814/*
26815 *    Function: sr_read_tochdr()
26816 *
26817 * Description: This routine is the driver entry point for handling CD-ROM
26818 * 		ioctl requests to read the Table of Contents (TOC) header
26819 *		(CDROMREADTOHDR). The TOC header consists of the disk starting
26820 *		and ending track numbers
26821 *
26822 *   Arguments: dev	- the device 'dev_t'
26823 *		data	- pointer to user provided toc header structure,
26824 *			  specifying the starting and ending track numbers.
26825 *		flag	- this argument is a pass through to ddi_copyxxx()
26826 *			  directly from the mode argument of ioctl().
26827 *
26828 * Return Code: the code returned by sd_send_scsi_cmd()
26829 *		EFAULT if ddi_copyxxx() fails
26830 *		ENXIO if fail ddi_get_soft_state
26831 *		EINVAL if data pointer is NULL
26832 */
26833
26834static int
26835sr_read_tochdr(dev_t dev, caddr_t data, int flag)
26836{
26837	struct sd_lun		*un;
26838	struct uscsi_cmd	*com;
26839	struct cdrom_tochdr	toc_header;
26840	struct cdrom_tochdr	*hdr = &toc_header;
26841	char			cdb[CDB_GROUP1];
26842	int			rval;
26843	caddr_t			buffer;
26844
26845	if (data == NULL) {
26846		return (EINVAL);
26847	}
26848
26849	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
26850	    (un->un_state == SD_STATE_OFFLINE)) {
26851		return (ENXIO);
26852	}
26853
26854	buffer = kmem_zalloc(4, KM_SLEEP);
26855	bzero(cdb, CDB_GROUP1);
26856	cdb[0] = SCMD_READ_TOC;
26857	/*
26858	 * Specifying a track number of 0x00 in the READ TOC command indicates
26859	 * that the TOC header should be returned
26860	 */
26861	cdb[6] = 0x00;
26862	/*
26863	 * Bytes 7 & 8 are the 4 byte allocation length for TOC header.
26864	 * (2 byte data len + 1 byte starting track # + 1 byte ending track #)
26865	 */
26866	cdb[8] = 0x04;
26867	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
26868	com->uscsi_cdb	   = cdb;
26869	com->uscsi_cdblen  = CDB_GROUP1;
26870	com->uscsi_bufaddr = buffer;
26871	com->uscsi_buflen  = 0x04;
26872	com->uscsi_timeout = 300;
26873	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
26874
26875	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
26876	    SD_PATH_STANDARD);
26877	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
26878		hdr->cdth_trk0 = BCD_TO_BYTE(buffer[2]);
26879		hdr->cdth_trk1 = BCD_TO_BYTE(buffer[3]);
26880	} else {
26881		hdr->cdth_trk0 = buffer[2];
26882		hdr->cdth_trk1 = buffer[3];
26883	}
26884	kmem_free(buffer, 4);
26885	kmem_free(com, sizeof (*com));
26886	if (ddi_copyout(hdr, data, sizeof (struct cdrom_tochdr), flag) != 0) {
26887		return (EFAULT);
26888	}
26889	return (rval);
26890}
26891
26892
26893/*
26894 * Note: The following sr_read_mode1(), sr_read_cd_mode2(), sr_read_mode2(),
26895 * sr_read_cdda(), sr_read_cdxa(), routines implement driver support for
26896 * handling CDROMREAD ioctl requests for mode 1 user data, mode 2 user data,
26897 * digital audio and extended architecture digital audio. These modes are
26898 * defined in the IEC908 (Red Book), ISO10149 (Yellow Book), and the SCSI3
26899 * MMC specs.
26900 *
26901 * In addition to support for the various data formats these routines also
26902 * include support for devices that implement only the direct access READ
26903 * commands (0x08, 0x28), devices that implement the READ_CD commands
26904 * (0xBE, 0xD4), and devices that implement the vendor unique READ CDDA and
26905 * READ CDXA commands (0xD8, 0xDB)
26906 */
26907
26908/*
26909 *    Function: sr_read_mode1()
26910 *
26911 * Description: This routine is the driver entry point for handling CD-ROM
26912 *		ioctl read mode1 requests (CDROMREADMODE1).
26913 *
26914 *   Arguments: dev	- the device 'dev_t'
26915 *		data	- pointer to user provided cd read structure specifying
26916 *			  the lba buffer address and length.
26917 *		flag	- this argument is a pass through to ddi_copyxxx()
26918 *			  directly from the mode argument of ioctl().
26919 *
26920 * Return Code: the code returned by sd_send_scsi_cmd()
26921 *		EFAULT if ddi_copyxxx() fails
26922 *		ENXIO if fail ddi_get_soft_state
26923 *		EINVAL if data pointer is NULL
26924 */
26925
26926static int
26927sr_read_mode1(dev_t dev, caddr_t data, int flag)
26928{
26929	struct sd_lun		*un;
26930	struct cdrom_read	mode1_struct;
26931	struct cdrom_read	*mode1 = &mode1_struct;
26932	int			rval;
26933	sd_ssc_t		*ssc;
26934
26935#ifdef _MULTI_DATAMODEL
26936	/* To support ILP32 applications in an LP64 world */
26937	struct cdrom_read32	cdrom_read32;
26938	struct cdrom_read32	*cdrd32 = &cdrom_read32;
26939#endif /* _MULTI_DATAMODEL */
26940
26941	if (data == NULL) {
26942		return (EINVAL);
26943	}
26944
26945	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
26946	    (un->un_state == SD_STATE_OFFLINE)) {
26947		return (ENXIO);
26948	}
26949
26950	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
26951	    "sd_read_mode1: entry: un:0x%p\n", un);
26952
26953#ifdef _MULTI_DATAMODEL
26954	switch (ddi_model_convert_from(flag & FMODELS)) {
26955	case DDI_MODEL_ILP32:
26956		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
26957			return (EFAULT);
26958		}
26959		/* Convert the ILP32 uscsi data from the application to LP64 */
26960		cdrom_read32tocdrom_read(cdrd32, mode1);
26961		break;
26962	case DDI_MODEL_NONE:
26963		if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
26964			return (EFAULT);
26965		}
26966	}
26967#else /* ! _MULTI_DATAMODEL */
26968	if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
26969		return (EFAULT);
26970	}
26971#endif /* _MULTI_DATAMODEL */
26972
26973	ssc = sd_ssc_init(un);
26974	rval = sd_send_scsi_READ(ssc, mode1->cdread_bufaddr,
26975	    mode1->cdread_buflen, mode1->cdread_lba, SD_PATH_STANDARD);
26976	sd_ssc_fini(ssc);
26977
26978	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
26979	    "sd_read_mode1: exit: un:0x%p\n", un);
26980
26981	return (rval);
26982}
26983
26984
26985/*
26986 *    Function: sr_read_cd_mode2()
26987 *
26988 * Description: This routine is the driver entry point for handling CD-ROM
26989 *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
26990 *		support the READ CD (0xBE) command or the 1st generation
26991 *		READ CD (0xD4) command.
26992 *
26993 *   Arguments: dev	- the device 'dev_t'
26994 *		data	- pointer to user provided cd read structure specifying
26995 *			  the lba buffer address and length.
26996 *		flag	- this argument is a pass through to ddi_copyxxx()
26997 *			  directly from the mode argument of ioctl().
26998 *
26999 * Return Code: the code returned by sd_send_scsi_cmd()
27000 *		EFAULT if ddi_copyxxx() fails
27001 *		ENXIO if fail ddi_get_soft_state
27002 *		EINVAL if data pointer is NULL
27003 */
27004
27005static int
27006sr_read_cd_mode2(dev_t dev, caddr_t data, int flag)
27007{
27008	struct sd_lun		*un;
27009	struct uscsi_cmd	*com;
27010	struct cdrom_read	mode2_struct;
27011	struct cdrom_read	*mode2 = &mode2_struct;
27012	uchar_t			cdb[CDB_GROUP5];
27013	int			nblocks;
27014	int			rval;
27015#ifdef _MULTI_DATAMODEL
27016	/*  To support ILP32 applications in an LP64 world */
27017	struct cdrom_read32	cdrom_read32;
27018	struct cdrom_read32	*cdrd32 = &cdrom_read32;
27019#endif /* _MULTI_DATAMODEL */
27020
27021	if (data == NULL) {
27022		return (EINVAL);
27023	}
27024
27025	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27026	    (un->un_state == SD_STATE_OFFLINE)) {
27027		return (ENXIO);
27028	}
27029
27030#ifdef _MULTI_DATAMODEL
27031	switch (ddi_model_convert_from(flag & FMODELS)) {
27032	case DDI_MODEL_ILP32:
27033		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
27034			return (EFAULT);
27035		}
27036		/* Convert the ILP32 uscsi data from the application to LP64 */
27037		cdrom_read32tocdrom_read(cdrd32, mode2);
27038		break;
27039	case DDI_MODEL_NONE:
27040		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
27041			return (EFAULT);
27042		}
27043		break;
27044	}
27045
27046#else /* ! _MULTI_DATAMODEL */
27047	if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
27048		return (EFAULT);
27049	}
27050#endif /* _MULTI_DATAMODEL */
27051
27052	bzero(cdb, sizeof (cdb));
27053	if (un->un_f_cfg_read_cd_xd4 == TRUE) {
27054		/* Read command supported by 1st generation atapi drives */
27055		cdb[0] = SCMD_READ_CDD4;
27056	} else {
27057		/* Universal CD Access Command */
27058		cdb[0] = SCMD_READ_CD;
27059	}
27060
27061	/*
27062	 * Set expected sector type to: 2336s byte, Mode 2 Yellow Book
27063	 */
27064	cdb[1] = CDROM_SECTOR_TYPE_MODE2;
27065
27066	/* set the start address */
27067	cdb[2] = (uchar_t)((mode2->cdread_lba >> 24) & 0XFF);
27068	cdb[3] = (uchar_t)((mode2->cdread_lba >> 16) & 0XFF);
27069	cdb[4] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
27070	cdb[5] = (uchar_t)(mode2->cdread_lba & 0xFF);
27071
27072	/* set the transfer length */
27073	nblocks = mode2->cdread_buflen / 2336;
27074	cdb[6] = (uchar_t)(nblocks >> 16);
27075	cdb[7] = (uchar_t)(nblocks >> 8);
27076	cdb[8] = (uchar_t)nblocks;
27077
27078	/* set the filter bits */
27079	cdb[9] = CDROM_READ_CD_USERDATA;
27080
27081	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27082	com->uscsi_cdb = (caddr_t)cdb;
27083	com->uscsi_cdblen = sizeof (cdb);
27084	com->uscsi_bufaddr = mode2->cdread_bufaddr;
27085	com->uscsi_buflen = mode2->cdread_buflen;
27086	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
27087
27088	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
27089	    SD_PATH_STANDARD);
27090	kmem_free(com, sizeof (*com));
27091	return (rval);
27092}
27093
27094
27095/*
27096 *    Function: sr_read_mode2()
27097 *
27098 * Description: This routine is the driver entry point for handling CD-ROM
27099 *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
27100 *		do not support the READ CD (0xBE) command.
27101 *
27102 *   Arguments: dev	- the device 'dev_t'
27103 *		data	- pointer to user provided cd read structure specifying
27104 *			  the lba buffer address and length.
27105 *		flag	- this argument is a pass through to ddi_copyxxx()
27106 *			  directly from the mode argument of ioctl().
27107 *
27108 * Return Code: the code returned by sd_send_scsi_cmd()
27109 *		EFAULT if ddi_copyxxx() fails
27110 *		ENXIO if fail ddi_get_soft_state
27111 *		EINVAL if data pointer is NULL
27112 *		EIO if fail to reset block size
27113 *		EAGAIN if commands are in progress in the driver
27114 */
27115
27116static int
27117sr_read_mode2(dev_t dev, caddr_t data, int flag)
27118{
27119	struct sd_lun		*un;
27120	struct cdrom_read	mode2_struct;
27121	struct cdrom_read	*mode2 = &mode2_struct;
27122	int			rval;
27123	uint32_t		restore_blksize;
27124	struct uscsi_cmd	*com;
27125	uchar_t			cdb[CDB_GROUP0];
27126	int			nblocks;
27127
27128#ifdef _MULTI_DATAMODEL
27129	/* To support ILP32 applications in an LP64 world */
27130	struct cdrom_read32	cdrom_read32;
27131	struct cdrom_read32	*cdrd32 = &cdrom_read32;
27132#endif /* _MULTI_DATAMODEL */
27133
27134	if (data == NULL) {
27135		return (EINVAL);
27136	}
27137
27138	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27139	    (un->un_state == SD_STATE_OFFLINE)) {
27140		return (ENXIO);
27141	}
27142
27143	/*
27144	 * Because this routine will update the device and driver block size
27145	 * being used we want to make sure there are no commands in progress.
27146	 * If commands are in progress the user will have to try again.
27147	 *
27148	 * We check for 1 instead of 0 because we increment un_ncmds_in_driver
27149	 * in sdioctl to protect commands from sdioctl through to the top of
27150	 * sd_uscsi_strategy. See sdioctl for details.
27151	 */
27152	mutex_enter(SD_MUTEX(un));
27153	if (un->un_ncmds_in_driver != 1) {
27154		mutex_exit(SD_MUTEX(un));
27155		return (EAGAIN);
27156	}
27157	mutex_exit(SD_MUTEX(un));
27158
27159	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
27160	    "sd_read_mode2: entry: un:0x%p\n", un);
27161
27162#ifdef _MULTI_DATAMODEL
27163	switch (ddi_model_convert_from(flag & FMODELS)) {
27164	case DDI_MODEL_ILP32:
27165		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
27166			return (EFAULT);
27167		}
27168		/* Convert the ILP32 uscsi data from the application to LP64 */
27169		cdrom_read32tocdrom_read(cdrd32, mode2);
27170		break;
27171	case DDI_MODEL_NONE:
27172		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
27173			return (EFAULT);
27174		}
27175		break;
27176	}
27177#else /* ! _MULTI_DATAMODEL */
27178	if (ddi_copyin(data, mode2, sizeof (*mode2), flag)) {
27179		return (EFAULT);
27180	}
27181#endif /* _MULTI_DATAMODEL */
27182
27183	/* Store the current target block size for restoration later */
27184	restore_blksize = un->un_tgt_blocksize;
27185
27186	/* Change the device and soft state target block size to 2336 */
27187	if (sr_sector_mode(dev, SD_MODE2_BLKSIZE) != 0) {
27188		rval = EIO;
27189		goto done;
27190	}
27191
27192
27193	bzero(cdb, sizeof (cdb));
27194
27195	/* set READ operation */
27196	cdb[0] = SCMD_READ;
27197
27198	/* adjust lba for 2kbyte blocks from 512 byte blocks */
27199	mode2->cdread_lba >>= 2;
27200
27201	/* set the start address */
27202	cdb[1] = (uchar_t)((mode2->cdread_lba >> 16) & 0X1F);
27203	cdb[2] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
27204	cdb[3] = (uchar_t)(mode2->cdread_lba & 0xFF);
27205
27206	/* set the transfer length */
27207	nblocks = mode2->cdread_buflen / 2336;
27208	cdb[4] = (uchar_t)nblocks & 0xFF;
27209
27210	/* build command */
27211	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27212	com->uscsi_cdb = (caddr_t)cdb;
27213	com->uscsi_cdblen = sizeof (cdb);
27214	com->uscsi_bufaddr = mode2->cdread_bufaddr;
27215	com->uscsi_buflen = mode2->cdread_buflen;
27216	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
27217
27218	/*
27219	 * Issue SCSI command with user space address for read buffer.
27220	 *
27221	 * This sends the command through main channel in the driver.
27222	 *
27223	 * Since this is accessed via an IOCTL call, we go through the
27224	 * standard path, so that if the device was powered down, then
27225	 * it would be 'awakened' to handle the command.
27226	 */
27227	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
27228	    SD_PATH_STANDARD);
27229
27230	kmem_free(com, sizeof (*com));
27231
27232	/* Restore the device and soft state target block size */
27233	if (sr_sector_mode(dev, restore_blksize) != 0) {
27234		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27235		    "can't do switch back to mode 1\n");
27236		/*
27237		 * If sd_send_scsi_READ succeeded we still need to report
27238		 * an error because we failed to reset the block size
27239		 */
27240		if (rval == 0) {
27241			rval = EIO;
27242		}
27243	}
27244
27245done:
27246	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
27247	    "sd_read_mode2: exit: un:0x%p\n", un);
27248
27249	return (rval);
27250}
27251
27252
27253/*
27254 *    Function: sr_sector_mode()
27255 *
27256 * Description: This utility function is used by sr_read_mode2 to set the target
27257 *		block size based on the user specified size. This is a legacy
27258 *		implementation based upon a vendor specific mode page
27259 *
27260 *   Arguments: dev	- the device 'dev_t'
27261 *		data	- flag indicating if block size is being set to 2336 or
27262 *			  512.
27263 *
27264 * Return Code: the code returned by sd_send_scsi_cmd()
27265 *		EFAULT if ddi_copyxxx() fails
27266 *		ENXIO if fail ddi_get_soft_state
27267 *		EINVAL if data pointer is NULL
27268 */
27269
27270static int
27271sr_sector_mode(dev_t dev, uint32_t blksize)
27272{
27273	struct sd_lun	*un;
27274	uchar_t		*sense;
27275	uchar_t		*select;
27276	int		rval;
27277	sd_ssc_t	*ssc;
27278
27279	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27280	    (un->un_state == SD_STATE_OFFLINE)) {
27281		return (ENXIO);
27282	}
27283
27284	sense = kmem_zalloc(20, KM_SLEEP);
27285
27286	/* Note: This is a vendor specific mode page (0x81) */
27287	ssc = sd_ssc_init(un);
27288	rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, sense, 20, 0x81,
27289	    SD_PATH_STANDARD);
27290	sd_ssc_fini(ssc);
27291	if (rval != 0) {
27292		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
27293		    "sr_sector_mode: Mode Sense failed\n");
27294		kmem_free(sense, 20);
27295		return (rval);
27296	}
27297	select = kmem_zalloc(20, KM_SLEEP);
27298	select[3] = 0x08;
27299	select[10] = ((blksize >> 8) & 0xff);
27300	select[11] = (blksize & 0xff);
27301	select[12] = 0x01;
27302	select[13] = 0x06;
27303	select[14] = sense[14];
27304	select[15] = sense[15];
27305	if (blksize == SD_MODE2_BLKSIZE) {
27306		select[14] |= 0x01;
27307	}
27308
27309	ssc = sd_ssc_init(un);
27310	rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0, select, 20,
27311	    SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
27312	sd_ssc_fini(ssc);
27313	if (rval != 0) {
27314		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
27315		    "sr_sector_mode: Mode Select failed\n");
27316	} else {
27317		/*
27318		 * Only update the softstate block size if we successfully
27319		 * changed the device block mode.
27320		 */
27321		mutex_enter(SD_MUTEX(un));
27322		sd_update_block_info(un, blksize, 0);
27323		mutex_exit(SD_MUTEX(un));
27324	}
27325	kmem_free(sense, 20);
27326	kmem_free(select, 20);
27327	return (rval);
27328}
27329
27330
27331/*
27332 *    Function: sr_read_cdda()
27333 *
27334 * Description: This routine is the driver entry point for handling CD-ROM
27335 *		ioctl requests to return CD-DA or subcode data. (CDROMCDDA) If
27336 *		the target supports CDDA these requests are handled via a vendor
27337 *		specific command (0xD8) If the target does not support CDDA
27338 *		these requests are handled via the READ CD command (0xBE).
27339 *
27340 *   Arguments: dev	- the device 'dev_t'
27341 *		data	- pointer to user provided CD-DA structure specifying
27342 *			  the track starting address, transfer length, and
27343 *			  subcode options.
27344 *		flag	- this argument is a pass through to ddi_copyxxx()
27345 *			  directly from the mode argument of ioctl().
27346 *
27347 * Return Code: the code returned by sd_send_scsi_cmd()
27348 *		EFAULT if ddi_copyxxx() fails
27349 *		ENXIO if fail ddi_get_soft_state
27350 *		EINVAL if invalid arguments are provided
27351 *		ENOTTY
27352 */
27353
27354static int
27355sr_read_cdda(dev_t dev, caddr_t data, int flag)
27356{
27357	struct sd_lun			*un;
27358	struct uscsi_cmd		*com;
27359	struct cdrom_cdda		*cdda;
27360	int				rval;
27361	size_t				buflen;
27362	char				cdb[CDB_GROUP5];
27363
27364#ifdef _MULTI_DATAMODEL
27365	/* To support ILP32 applications in an LP64 world */
27366	struct cdrom_cdda32	cdrom_cdda32;
27367	struct cdrom_cdda32	*cdda32 = &cdrom_cdda32;
27368#endif /* _MULTI_DATAMODEL */
27369
27370	if (data == NULL) {
27371		return (EINVAL);
27372	}
27373
27374	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27375		return (ENXIO);
27376	}
27377
27378	cdda = kmem_zalloc(sizeof (struct cdrom_cdda), KM_SLEEP);
27379
27380#ifdef _MULTI_DATAMODEL
27381	switch (ddi_model_convert_from(flag & FMODELS)) {
27382	case DDI_MODEL_ILP32:
27383		if (ddi_copyin(data, cdda32, sizeof (*cdda32), flag)) {
27384			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27385			    "sr_read_cdda: ddi_copyin Failed\n");
27386			kmem_free(cdda, sizeof (struct cdrom_cdda));
27387			return (EFAULT);
27388		}
27389		/* Convert the ILP32 uscsi data from the application to LP64 */
27390		cdrom_cdda32tocdrom_cdda(cdda32, cdda);
27391		break;
27392	case DDI_MODEL_NONE:
27393		if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
27394			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27395			    "sr_read_cdda: ddi_copyin Failed\n");
27396			kmem_free(cdda, sizeof (struct cdrom_cdda));
27397			return (EFAULT);
27398		}
27399		break;
27400	}
27401#else /* ! _MULTI_DATAMODEL */
27402	if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
27403		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27404		    "sr_read_cdda: ddi_copyin Failed\n");
27405		kmem_free(cdda, sizeof (struct cdrom_cdda));
27406		return (EFAULT);
27407	}
27408#endif /* _MULTI_DATAMODEL */
27409
27410	/*
27411	 * Since MMC-2 expects max 3 bytes for length, check if the
27412	 * length input is greater than 3 bytes
27413	 */
27414	if ((cdda->cdda_length & 0xFF000000) != 0) {
27415		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdda: "
27416		    "cdrom transfer length too large: %d (limit %d)\n",
27417		    cdda->cdda_length, 0xFFFFFF);
27418		kmem_free(cdda, sizeof (struct cdrom_cdda));
27419		return (EINVAL);
27420	}
27421
27422	switch (cdda->cdda_subcode) {
27423	case CDROM_DA_NO_SUBCODE:
27424		buflen = CDROM_BLK_2352 * cdda->cdda_length;
27425		break;
27426	case CDROM_DA_SUBQ:
27427		buflen = CDROM_BLK_2368 * cdda->cdda_length;
27428		break;
27429	case CDROM_DA_ALL_SUBCODE:
27430		buflen = CDROM_BLK_2448 * cdda->cdda_length;
27431		break;
27432	case CDROM_DA_SUBCODE_ONLY:
27433		buflen = CDROM_BLK_SUBCODE * cdda->cdda_length;
27434		break;
27435	default:
27436		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27437		    "sr_read_cdda: Subcode '0x%x' Not Supported\n",
27438		    cdda->cdda_subcode);
27439		kmem_free(cdda, sizeof (struct cdrom_cdda));
27440		return (EINVAL);
27441	}
27442
27443	/* Build and send the command */
27444	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27445	bzero(cdb, CDB_GROUP5);
27446
27447	if (un->un_f_cfg_cdda == TRUE) {
27448		cdb[0] = (char)SCMD_READ_CD;
27449		cdb[1] = 0x04;
27450		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
27451		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
27452		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
27453		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
27454		cdb[6] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
27455		cdb[7] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
27456		cdb[8] = ((cdda->cdda_length) & 0x000000ff);
27457		cdb[9] = 0x10;
27458		switch (cdda->cdda_subcode) {
27459		case CDROM_DA_NO_SUBCODE :
27460			cdb[10] = 0x0;
27461			break;
27462		case CDROM_DA_SUBQ :
27463			cdb[10] = 0x2;
27464			break;
27465		case CDROM_DA_ALL_SUBCODE :
27466			cdb[10] = 0x1;
27467			break;
27468		case CDROM_DA_SUBCODE_ONLY :
27469			/* FALLTHROUGH */
27470		default :
27471			kmem_free(cdda, sizeof (struct cdrom_cdda));
27472			kmem_free(com, sizeof (*com));
27473			return (ENOTTY);
27474		}
27475	} else {
27476		cdb[0] = (char)SCMD_READ_CDDA;
27477		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
27478		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
27479		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
27480		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
27481		cdb[6] = (((cdda->cdda_length) & 0xff000000) >> 24);
27482		cdb[7] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
27483		cdb[8] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
27484		cdb[9] = ((cdda->cdda_length) & 0x000000ff);
27485		cdb[10] = cdda->cdda_subcode;
27486	}
27487
27488	com->uscsi_cdb = cdb;
27489	com->uscsi_cdblen = CDB_GROUP5;
27490	com->uscsi_bufaddr = (caddr_t)cdda->cdda_data;
27491	com->uscsi_buflen = buflen;
27492	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
27493
27494	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
27495	    SD_PATH_STANDARD);
27496
27497	kmem_free(cdda, sizeof (struct cdrom_cdda));
27498	kmem_free(com, sizeof (*com));
27499	return (rval);
27500}
27501
27502
27503/*
27504 *    Function: sr_read_cdxa()
27505 *
27506 * Description: This routine is the driver entry point for handling CD-ROM
27507 *		ioctl requests to return CD-XA (Extended Architecture) data.
27508 *		(CDROMCDXA).
27509 *
27510 *   Arguments: dev	- the device 'dev_t'
27511 *		data	- pointer to user provided CD-XA structure specifying
27512 *			  the data starting address, transfer length, and format
27513 *		flag	- this argument is a pass through to ddi_copyxxx()
27514 *			  directly from the mode argument of ioctl().
27515 *
27516 * Return Code: the code returned by sd_send_scsi_cmd()
27517 *		EFAULT if ddi_copyxxx() fails
27518 *		ENXIO if fail ddi_get_soft_state
27519 *		EINVAL if data pointer is NULL
27520 */
27521
27522static int
27523sr_read_cdxa(dev_t dev, caddr_t data, int flag)
27524{
27525	struct sd_lun		*un;
27526	struct uscsi_cmd	*com;
27527	struct cdrom_cdxa	*cdxa;
27528	int			rval;
27529	size_t			buflen;
27530	char			cdb[CDB_GROUP5];
27531	uchar_t			read_flags;
27532
27533#ifdef _MULTI_DATAMODEL
27534	/* To support ILP32 applications in an LP64 world */
27535	struct cdrom_cdxa32		cdrom_cdxa32;
27536	struct cdrom_cdxa32		*cdxa32 = &cdrom_cdxa32;
27537#endif /* _MULTI_DATAMODEL */
27538
27539	if (data == NULL) {
27540		return (EINVAL);
27541	}
27542
27543	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27544		return (ENXIO);
27545	}
27546
27547	cdxa = kmem_zalloc(sizeof (struct cdrom_cdxa), KM_SLEEP);
27548
27549#ifdef _MULTI_DATAMODEL
27550	switch (ddi_model_convert_from(flag & FMODELS)) {
27551	case DDI_MODEL_ILP32:
27552		if (ddi_copyin(data, cdxa32, sizeof (*cdxa32), flag)) {
27553			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
27554			return (EFAULT);
27555		}
27556		/*
27557		 * Convert the ILP32 uscsi data from the
27558		 * application to LP64 for internal use.
27559		 */
27560		cdrom_cdxa32tocdrom_cdxa(cdxa32, cdxa);
27561		break;
27562	case DDI_MODEL_NONE:
27563		if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
27564			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
27565			return (EFAULT);
27566		}
27567		break;
27568	}
27569#else /* ! _MULTI_DATAMODEL */
27570	if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
27571		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
27572		return (EFAULT);
27573	}
27574#endif /* _MULTI_DATAMODEL */
27575
27576	/*
27577	 * Since MMC-2 expects max 3 bytes for length, check if the
27578	 * length input is greater than 3 bytes
27579	 */
27580	if ((cdxa->cdxa_length & 0xFF000000) != 0) {
27581		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdxa: "
27582		    "cdrom transfer length too large: %d (limit %d)\n",
27583		    cdxa->cdxa_length, 0xFFFFFF);
27584		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
27585		return (EINVAL);
27586	}
27587
27588	switch (cdxa->cdxa_format) {
27589	case CDROM_XA_DATA:
27590		buflen = CDROM_BLK_2048 * cdxa->cdxa_length;
27591		read_flags = 0x10;
27592		break;
27593	case CDROM_XA_SECTOR_DATA:
27594		buflen = CDROM_BLK_2352 * cdxa->cdxa_length;
27595		read_flags = 0xf8;
27596		break;
27597	case CDROM_XA_DATA_W_ERROR:
27598		buflen = CDROM_BLK_2646 * cdxa->cdxa_length;
27599		read_flags = 0xfc;
27600		break;
27601	default:
27602		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27603		    "sr_read_cdxa: Format '0x%x' Not Supported\n",
27604		    cdxa->cdxa_format);
27605		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
27606		return (EINVAL);
27607	}
27608
27609	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27610	bzero(cdb, CDB_GROUP5);
27611	if (un->un_f_mmc_cap == TRUE) {
27612		cdb[0] = (char)SCMD_READ_CD;
27613		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
27614		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
27615		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
27616		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
27617		cdb[6] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
27618		cdb[7] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
27619		cdb[8] = ((cdxa->cdxa_length) & 0x000000ff);
27620		cdb[9] = (char)read_flags;
27621	} else {
27622		/*
27623		 * Note: A vendor specific command (0xDB) is being used her to
27624		 * request a read of all subcodes.
27625		 */
27626		cdb[0] = (char)SCMD_READ_CDXA;
27627		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
27628		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
27629		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
27630		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
27631		cdb[6] = (((cdxa->cdxa_length) & 0xff000000) >> 24);
27632		cdb[7] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
27633		cdb[8] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
27634		cdb[9] = ((cdxa->cdxa_length) & 0x000000ff);
27635		cdb[10] = cdxa->cdxa_format;
27636	}
27637	com->uscsi_cdb	   = cdb;
27638	com->uscsi_cdblen  = CDB_GROUP5;
27639	com->uscsi_bufaddr = (caddr_t)cdxa->cdxa_data;
27640	com->uscsi_buflen  = buflen;
27641	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
27642	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
27643	    SD_PATH_STANDARD);
27644	kmem_free(cdxa, sizeof (struct cdrom_cdxa));
27645	kmem_free(com, sizeof (*com));
27646	return (rval);
27647}
27648
27649
27650/*
27651 *    Function: sr_eject()
27652 *
27653 * Description: This routine is the driver entry point for handling CD-ROM
27654 *		eject ioctl requests (FDEJECT, DKIOCEJECT, CDROMEJECT)
27655 *
27656 *   Arguments: dev	- the device 'dev_t'
27657 *
27658 * Return Code: the code returned by sd_send_scsi_cmd()
27659 */
27660
27661static int
27662sr_eject(dev_t dev)
27663{
27664	struct sd_lun	*un;
27665	int		rval;
27666	sd_ssc_t	*ssc;
27667
27668	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27669	    (un->un_state == SD_STATE_OFFLINE)) {
27670		return (ENXIO);
27671	}
27672
27673	/*
27674	 * To prevent race conditions with the eject
27675	 * command, keep track of an eject command as
27676	 * it progresses. If we are already handling
27677	 * an eject command in the driver for the given
27678	 * unit and another request to eject is received
27679	 * immediately return EAGAIN so we don't lose
27680	 * the command if the current eject command fails.
27681	 */
27682	mutex_enter(SD_MUTEX(un));
27683	if (un->un_f_ejecting == TRUE) {
27684		mutex_exit(SD_MUTEX(un));
27685		return (EAGAIN);
27686	}
27687	un->un_f_ejecting = TRUE;
27688	mutex_exit(SD_MUTEX(un));
27689
27690	ssc = sd_ssc_init(un);
27691	rval = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_ALLOW,
27692	    SD_PATH_STANDARD);
27693	sd_ssc_fini(ssc);
27694
27695	if (rval != 0) {
27696		mutex_enter(SD_MUTEX(un));
27697		un->un_f_ejecting = FALSE;
27698		mutex_exit(SD_MUTEX(un));
27699		return (rval);
27700	}
27701
27702	ssc = sd_ssc_init(un);
27703	rval = sd_send_scsi_START_STOP_UNIT(ssc, SD_TARGET_EJECT,
27704	    SD_PATH_STANDARD);
27705	sd_ssc_fini(ssc);
27706
27707	if (rval == 0) {
27708		mutex_enter(SD_MUTEX(un));
27709		sr_ejected(un);
27710		un->un_mediastate = DKIO_EJECTED;
27711		un->un_f_ejecting = FALSE;
27712		cv_broadcast(&un->un_state_cv);
27713		mutex_exit(SD_MUTEX(un));
27714	} else {
27715		mutex_enter(SD_MUTEX(un));
27716		un->un_f_ejecting = FALSE;
27717		mutex_exit(SD_MUTEX(un));
27718	}
27719	return (rval);
27720}
27721
27722
27723/*
27724 *    Function: sr_ejected()
27725 *
27726 * Description: This routine updates the soft state structure to invalidate the
27727 *		geometry information after the media has been ejected or a
27728 *		media eject has been detected.
27729 *
27730 *   Arguments: un - driver soft state (unit) structure
27731 */
27732
27733static void
27734sr_ejected(struct sd_lun *un)
27735{
27736	struct sd_errstats *stp;
27737
27738	ASSERT(un != NULL);
27739	ASSERT(mutex_owned(SD_MUTEX(un)));
27740
27741	un->un_f_blockcount_is_valid	= FALSE;
27742	un->un_f_tgt_blocksize_is_valid	= FALSE;
27743	mutex_exit(SD_MUTEX(un));
27744	cmlb_invalidate(un->un_cmlbhandle, (void *)SD_PATH_DIRECT_PRIORITY);
27745	mutex_enter(SD_MUTEX(un));
27746
27747	if (un->un_errstats != NULL) {
27748		stp = (struct sd_errstats *)un->un_errstats->ks_data;
27749		stp->sd_capacity.value.ui64 = 0;
27750	}
27751}
27752
27753
27754/*
27755 *    Function: sr_check_wp()
27756 *
27757 * Description: This routine checks the write protection of a removable
27758 *      media disk and hotpluggable devices via the write protect bit of
27759 *      the Mode Page Header device specific field. Some devices choke
27760 *      on unsupported mode page. In order to workaround this issue,
27761 *      this routine has been implemented to use 0x3f mode page(request
27762 *      for all pages) for all device types.
27763 *
27764 *   Arguments: dev             - the device 'dev_t'
27765 *
27766 * Return Code: int indicating if the device is write protected (1) or not (0)
27767 *
27768 *     Context: Kernel thread.
27769 *
27770 */
27771
27772static int
27773sr_check_wp(dev_t dev)
27774{
27775	struct sd_lun	*un;
27776	uchar_t		device_specific;
27777	uchar_t		*sense;
27778	int		hdrlen;
27779	int		rval = FALSE;
27780	int		status;
27781	sd_ssc_t	*ssc;
27782
27783	/*
27784	 * Note: The return codes for this routine should be reworked to
27785	 * properly handle the case of a NULL softstate.
27786	 */
27787	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27788		return (FALSE);
27789	}
27790
27791	if (un->un_f_cfg_is_atapi == TRUE) {
27792		/*
27793		 * The mode page contents are not required; set the allocation
27794		 * length for the mode page header only
27795		 */
27796		hdrlen = MODE_HEADER_LENGTH_GRP2;
27797		sense = kmem_zalloc(hdrlen, KM_SLEEP);
27798		ssc = sd_ssc_init(un);
27799		status = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, sense, hdrlen,
27800		    MODEPAGE_ALLPAGES, SD_PATH_STANDARD);
27801		sd_ssc_fini(ssc);
27802		if (status != 0)
27803			goto err_exit;
27804		device_specific =
27805		    ((struct mode_header_grp2 *)sense)->device_specific;
27806	} else {
27807		hdrlen = MODE_HEADER_LENGTH;
27808		sense = kmem_zalloc(hdrlen, KM_SLEEP);
27809		ssc = sd_ssc_init(un);
27810		status = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, sense, hdrlen,
27811		    MODEPAGE_ALLPAGES, SD_PATH_STANDARD);
27812		sd_ssc_fini(ssc);
27813		if (status != 0)
27814			goto err_exit;
27815		device_specific =
27816		    ((struct mode_header *)sense)->device_specific;
27817	}
27818
27819
27820	/*
27821	 * Write protect mode sense failed; not all disks
27822	 * understand this query. Return FALSE assuming that
27823	 * these devices are not writable.
27824	 */
27825	if (device_specific & WRITE_PROTECT) {
27826		rval = TRUE;
27827	}
27828
27829err_exit:
27830	kmem_free(sense, hdrlen);
27831	return (rval);
27832}
27833
27834/*
27835 *    Function: sr_volume_ctrl()
27836 *
27837 * Description: This routine is the driver entry point for handling CD-ROM
27838 *		audio output volume ioctl requests. (CDROMVOLCTRL)
27839 *
27840 *   Arguments: dev	- the device 'dev_t'
27841 *		data	- pointer to user audio volume control structure
27842 *		flag	- this argument is a pass through to ddi_copyxxx()
27843 *			  directly from the mode argument of ioctl().
27844 *
27845 * Return Code: the code returned by sd_send_scsi_cmd()
27846 *		EFAULT if ddi_copyxxx() fails
27847 *		ENXIO if fail ddi_get_soft_state
27848 *		EINVAL if data pointer is NULL
27849 *
27850 */
27851
27852static int
27853sr_volume_ctrl(dev_t dev, caddr_t data, int flag)
27854{
27855	struct sd_lun		*un;
27856	struct cdrom_volctrl    volume;
27857	struct cdrom_volctrl    *vol = &volume;
27858	uchar_t			*sense_page;
27859	uchar_t			*select_page;
27860	uchar_t			*sense;
27861	uchar_t			*select;
27862	int			sense_buflen;
27863	int			select_buflen;
27864	int			rval;
27865	sd_ssc_t		*ssc;
27866
27867	if (data == NULL) {
27868		return (EINVAL);
27869	}
27870
27871	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27872	    (un->un_state == SD_STATE_OFFLINE)) {
27873		return (ENXIO);
27874	}
27875
27876	if (ddi_copyin(data, vol, sizeof (struct cdrom_volctrl), flag)) {
27877		return (EFAULT);
27878	}
27879
27880	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
27881		struct mode_header_grp2		*sense_mhp;
27882		struct mode_header_grp2		*select_mhp;
27883		int				bd_len;
27884
27885		sense_buflen = MODE_PARAM_LENGTH_GRP2 + MODEPAGE_AUDIO_CTRL_LEN;
27886		select_buflen = MODE_HEADER_LENGTH_GRP2 +
27887		    MODEPAGE_AUDIO_CTRL_LEN;
27888		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
27889		select = kmem_zalloc(select_buflen, KM_SLEEP);
27890		ssc = sd_ssc_init(un);
27891		rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, sense,
27892		    sense_buflen, MODEPAGE_AUDIO_CTRL,
27893		    SD_PATH_STANDARD);
27894		sd_ssc_fini(ssc);
27895
27896		if (rval != 0) {
27897			SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
27898			    "sr_volume_ctrl: Mode Sense Failed\n");
27899			kmem_free(sense, sense_buflen);
27900			kmem_free(select, select_buflen);
27901			return (rval);
27902		}
27903		sense_mhp = (struct mode_header_grp2 *)sense;
27904		select_mhp = (struct mode_header_grp2 *)select;
27905		bd_len = (sense_mhp->bdesc_length_hi << 8) |
27906		    sense_mhp->bdesc_length_lo;
27907		if (bd_len > MODE_BLK_DESC_LENGTH) {
27908			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27909			    "sr_volume_ctrl: Mode Sense returned invalid "
27910			    "block descriptor length\n");
27911			kmem_free(sense, sense_buflen);
27912			kmem_free(select, select_buflen);
27913			return (EIO);
27914		}
27915		sense_page = (uchar_t *)
27916		    (sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
27917		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH_GRP2);
27918		select_mhp->length_msb = 0;
27919		select_mhp->length_lsb = 0;
27920		select_mhp->bdesc_length_hi = 0;
27921		select_mhp->bdesc_length_lo = 0;
27922	} else {
27923		struct mode_header		*sense_mhp, *select_mhp;
27924
27925		sense_buflen = MODE_PARAM_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
27926		select_buflen = MODE_HEADER_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
27927		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
27928		select = kmem_zalloc(select_buflen, KM_SLEEP);
27929		ssc = sd_ssc_init(un);
27930		rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, sense,
27931		    sense_buflen, MODEPAGE_AUDIO_CTRL,
27932		    SD_PATH_STANDARD);
27933		sd_ssc_fini(ssc);
27934
27935		if (rval != 0) {
27936			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27937			    "sr_volume_ctrl: Mode Sense Failed\n");
27938			kmem_free(sense, sense_buflen);
27939			kmem_free(select, select_buflen);
27940			return (rval);
27941		}
27942		sense_mhp  = (struct mode_header *)sense;
27943		select_mhp = (struct mode_header *)select;
27944		if (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH) {
27945			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27946			    "sr_volume_ctrl: Mode Sense returned invalid "
27947			    "block descriptor length\n");
27948			kmem_free(sense, sense_buflen);
27949			kmem_free(select, select_buflen);
27950			return (EIO);
27951		}
27952		sense_page = (uchar_t *)
27953		    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
27954		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH);
27955		select_mhp->length = 0;
27956		select_mhp->bdesc_length = 0;
27957	}
27958	/*
27959	 * Note: An audio control data structure could be created and overlayed
27960	 * on the following in place of the array indexing method implemented.
27961	 */
27962
27963	/* Build the select data for the user volume data */
27964	select_page[0] = MODEPAGE_AUDIO_CTRL;
27965	select_page[1] = 0xE;
27966	/* Set the immediate bit */
27967	select_page[2] = 0x04;
27968	/* Zero out reserved fields */
27969	select_page[3] = 0x00;
27970	select_page[4] = 0x00;
27971	/* Return sense data for fields not to be modified */
27972	select_page[5] = sense_page[5];
27973	select_page[6] = sense_page[6];
27974	select_page[7] = sense_page[7];
27975	/* Set the user specified volume levels for channel 0 and 1 */
27976	select_page[8] = 0x01;
27977	select_page[9] = vol->channel0;
27978	select_page[10] = 0x02;
27979	select_page[11] = vol->channel1;
27980	/* Channel 2 and 3 are currently unsupported so return the sense data */
27981	select_page[12] = sense_page[12];
27982	select_page[13] = sense_page[13];
27983	select_page[14] = sense_page[14];
27984	select_page[15] = sense_page[15];
27985
27986	ssc = sd_ssc_init(un);
27987	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
27988		rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP1, select,
27989		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
27990	} else {
27991		rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0, select,
27992		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
27993	}
27994	sd_ssc_fini(ssc);
27995
27996	kmem_free(sense, sense_buflen);
27997	kmem_free(select, select_buflen);
27998	return (rval);
27999}
28000
28001
28002/*
28003 *    Function: sr_read_sony_session_offset()
28004 *
28005 * Description: This routine is the driver entry point for handling CD-ROM
28006 *		ioctl requests for session offset information. (CDROMREADOFFSET)
28007 *		The address of the first track in the last session of a
28008 *		multi-session CD-ROM is returned
28009 *
28010 *		Note: This routine uses a vendor specific key value in the
28011 *		command control field without implementing any vendor check here
28012 *		or in the ioctl routine.
28013 *
28014 *   Arguments: dev	- the device 'dev_t'
28015 *		data	- pointer to an int to hold the requested address
28016 *		flag	- this argument is a pass through to ddi_copyxxx()
28017 *			  directly from the mode argument of ioctl().
28018 *
28019 * Return Code: the code returned by sd_send_scsi_cmd()
28020 *		EFAULT if ddi_copyxxx() fails
28021 *		ENXIO if fail ddi_get_soft_state
28022 *		EINVAL if data pointer is NULL
28023 */
28024
28025static int
28026sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag)
28027{
28028	struct sd_lun		*un;
28029	struct uscsi_cmd	*com;
28030	caddr_t			buffer;
28031	char			cdb[CDB_GROUP1];
28032	int			session_offset = 0;
28033	int			rval;
28034
28035	if (data == NULL) {
28036		return (EINVAL);
28037	}
28038
28039	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28040	    (un->un_state == SD_STATE_OFFLINE)) {
28041		return (ENXIO);
28042	}
28043
28044	buffer = kmem_zalloc((size_t)SONY_SESSION_OFFSET_LEN, KM_SLEEP);
28045	bzero(cdb, CDB_GROUP1);
28046	cdb[0] = SCMD_READ_TOC;
28047	/*
28048	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
28049	 * (4 byte TOC response header + 8 byte response data)
28050	 */
28051	cdb[8] = SONY_SESSION_OFFSET_LEN;
28052	/* Byte 9 is the control byte. A vendor specific value is used */
28053	cdb[9] = SONY_SESSION_OFFSET_KEY;
28054	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28055	com->uscsi_cdb = cdb;
28056	com->uscsi_cdblen = CDB_GROUP1;
28057	com->uscsi_bufaddr = buffer;
28058	com->uscsi_buflen = SONY_SESSION_OFFSET_LEN;
28059	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28060
28061	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
28062	    SD_PATH_STANDARD);
28063	if (rval != 0) {
28064		kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
28065		kmem_free(com, sizeof (*com));
28066		return (rval);
28067	}
28068	if (buffer[1] == SONY_SESSION_OFFSET_VALID) {
28069		session_offset =
28070		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
28071		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
28072		/*
28073		 * Offset returned offset in current lbasize block's. Convert to
28074		 * 2k block's to return to the user
28075		 */
28076		if (un->un_tgt_blocksize == CDROM_BLK_512) {
28077			session_offset >>= 2;
28078		} else if (un->un_tgt_blocksize == CDROM_BLK_1024) {
28079			session_offset >>= 1;
28080		}
28081	}
28082
28083	if (ddi_copyout(&session_offset, data, sizeof (int), flag) != 0) {
28084		rval = EFAULT;
28085	}
28086
28087	kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
28088	kmem_free(com, sizeof (*com));
28089	return (rval);
28090}
28091
28092
28093/*
28094 *    Function: sd_wm_cache_constructor()
28095 *
28096 * Description: Cache Constructor for the wmap cache for the read/modify/write
28097 * 		devices.
28098 *
28099 *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
28100 *		un	- sd_lun structure for the device.
28101 *		flag	- the km flags passed to constructor
28102 *
28103 * Return Code: 0 on success.
28104 *		-1 on failure.
28105 */
28106
28107/*ARGSUSED*/
28108static int
28109sd_wm_cache_constructor(void *wm, void *un, int flags)
28110{
28111	bzero(wm, sizeof (struct sd_w_map));
28112	cv_init(&((struct sd_w_map *)wm)->wm_avail, NULL, CV_DRIVER, NULL);
28113	return (0);
28114}
28115
28116
28117/*
28118 *    Function: sd_wm_cache_destructor()
28119 *
28120 * Description: Cache destructor for the wmap cache for the read/modify/write
28121 * 		devices.
28122 *
28123 *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
28124 *		un	- sd_lun structure for the device.
28125 */
28126/*ARGSUSED*/
28127static void
28128sd_wm_cache_destructor(void *wm, void *un)
28129{
28130	cv_destroy(&((struct sd_w_map *)wm)->wm_avail);
28131}
28132
28133
28134/*
28135 *    Function: sd_range_lock()
28136 *
28137 * Description: Lock the range of blocks specified as parameter to ensure
28138 *		that read, modify write is atomic and no other i/o writes
28139 *		to the same location. The range is specified in terms
28140 *		of start and end blocks. Block numbers are the actual
28141 *		media block numbers and not system.
28142 *
28143 *   Arguments: un	- sd_lun structure for the device.
28144 *		startb - The starting block number
28145 *		endb - The end block number
28146 *		typ - type of i/o - simple/read_modify_write
28147 *
28148 * Return Code: wm  - pointer to the wmap structure.
28149 *
28150 *     Context: This routine can sleep.
28151 */
28152
28153static struct sd_w_map *
28154sd_range_lock(struct sd_lun *un, daddr_t startb, daddr_t endb, ushort_t typ)
28155{
28156	struct sd_w_map *wmp = NULL;
28157	struct sd_w_map *sl_wmp = NULL;
28158	struct sd_w_map *tmp_wmp;
28159	wm_state state = SD_WM_CHK_LIST;
28160
28161
28162	ASSERT(un != NULL);
28163	ASSERT(!mutex_owned(SD_MUTEX(un)));
28164
28165	mutex_enter(SD_MUTEX(un));
28166
28167	while (state != SD_WM_DONE) {
28168
28169		switch (state) {
28170		case SD_WM_CHK_LIST:
28171			/*
28172			 * This is the starting state. Check the wmap list
28173			 * to see if the range is currently available.
28174			 */
28175			if (!(typ & SD_WTYPE_RMW) && !(un->un_rmw_count)) {
28176				/*
28177				 * If this is a simple write and no rmw
28178				 * i/o is pending then try to lock the
28179				 * range as the range should be available.
28180				 */
28181				state = SD_WM_LOCK_RANGE;
28182			} else {
28183				tmp_wmp = sd_get_range(un, startb, endb);
28184				if (tmp_wmp != NULL) {
28185					if ((wmp != NULL) && ONLIST(un, wmp)) {
28186						/*
28187						 * Should not keep onlist wmps
28188						 * while waiting this macro
28189						 * will also do wmp = NULL;
28190						 */
28191						FREE_ONLIST_WMAP(un, wmp);
28192					}
28193					/*
28194					 * sl_wmp is the wmap on which wait
28195					 * is done, since the tmp_wmp points
28196					 * to the inuse wmap, set sl_wmp to
28197					 * tmp_wmp and change the state to sleep
28198					 */
28199					sl_wmp = tmp_wmp;
28200					state = SD_WM_WAIT_MAP;
28201				} else {
28202					state = SD_WM_LOCK_RANGE;
28203				}
28204
28205			}
28206			break;
28207
28208		case SD_WM_LOCK_RANGE:
28209			ASSERT(un->un_wm_cache);
28210			/*
28211			 * The range need to be locked, try to get a wmap.
28212			 * First attempt it with NO_SLEEP, want to avoid a sleep
28213			 * if possible as we will have to release the sd mutex
28214			 * if we have to sleep.
28215			 */
28216			if (wmp == NULL)
28217				wmp = kmem_cache_alloc(un->un_wm_cache,
28218				    KM_NOSLEEP);
28219			if (wmp == NULL) {
28220				mutex_exit(SD_MUTEX(un));
28221				_NOTE(DATA_READABLE_WITHOUT_LOCK
28222				    (sd_lun::un_wm_cache))
28223				wmp = kmem_cache_alloc(un->un_wm_cache,
28224				    KM_SLEEP);
28225				mutex_enter(SD_MUTEX(un));
28226				/*
28227				 * we released the mutex so recheck and go to
28228				 * check list state.
28229				 */
28230				state = SD_WM_CHK_LIST;
28231			} else {
28232				/*
28233				 * We exit out of state machine since we
28234				 * have the wmap. Do the housekeeping first.
28235				 * place the wmap on the wmap list if it is not
28236				 * on it already and then set the state to done.
28237				 */
28238				wmp->wm_start = startb;
28239				wmp->wm_end = endb;
28240				wmp->wm_flags = typ | SD_WM_BUSY;
28241				if (typ & SD_WTYPE_RMW) {
28242					un->un_rmw_count++;
28243				}
28244				/*
28245				 * If not already on the list then link
28246				 */
28247				if (!ONLIST(un, wmp)) {
28248					wmp->wm_next = un->un_wm;
28249					wmp->wm_prev = NULL;
28250					if (wmp->wm_next)
28251						wmp->wm_next->wm_prev = wmp;
28252					un->un_wm = wmp;
28253				}
28254				state = SD_WM_DONE;
28255			}
28256			break;
28257
28258		case SD_WM_WAIT_MAP:
28259			ASSERT(sl_wmp->wm_flags & SD_WM_BUSY);
28260			/*
28261			 * Wait is done on sl_wmp, which is set in the
28262			 * check_list state.
28263			 */
28264			sl_wmp->wm_wanted_count++;
28265			cv_wait(&sl_wmp->wm_avail, SD_MUTEX(un));
28266			sl_wmp->wm_wanted_count--;
28267			/*
28268			 * We can reuse the memory from the completed sl_wmp
28269			 * lock range for our new lock, but only if noone is
28270			 * waiting for it.
28271			 */
28272			ASSERT(!(sl_wmp->wm_flags & SD_WM_BUSY));
28273			if (sl_wmp->wm_wanted_count == 0) {
28274				if (wmp != NULL)
28275					CHK_N_FREEWMP(un, wmp);
28276				wmp = sl_wmp;
28277			}
28278			sl_wmp = NULL;
28279			/*
28280			 * After waking up, need to recheck for availability of
28281			 * range.
28282			 */
28283			state = SD_WM_CHK_LIST;
28284			break;
28285
28286		default:
28287			panic("sd_range_lock: "
28288			    "Unknown state %d in sd_range_lock", state);
28289			/*NOTREACHED*/
28290		} /* switch(state) */
28291
28292	} /* while(state != SD_WM_DONE) */
28293
28294	mutex_exit(SD_MUTEX(un));
28295
28296	ASSERT(wmp != NULL);
28297
28298	return (wmp);
28299}
28300
28301
28302/*
28303 *    Function: sd_get_range()
28304 *
28305 * Description: Find if there any overlapping I/O to this one
28306 *		Returns the write-map of 1st such I/O, NULL otherwise.
28307 *
28308 *   Arguments: un	- sd_lun structure for the device.
28309 *		startb - The starting block number
28310 *		endb - The end block number
28311 *
28312 * Return Code: wm  - pointer to the wmap structure.
28313 */
28314
28315static struct sd_w_map *
28316sd_get_range(struct sd_lun *un, daddr_t startb, daddr_t endb)
28317{
28318	struct sd_w_map *wmp;
28319
28320	ASSERT(un != NULL);
28321
28322	for (wmp = un->un_wm; wmp != NULL; wmp = wmp->wm_next) {
28323		if (!(wmp->wm_flags & SD_WM_BUSY)) {
28324			continue;
28325		}
28326		if ((startb >= wmp->wm_start) && (startb <= wmp->wm_end)) {
28327			break;
28328		}
28329		if ((endb >= wmp->wm_start) && (endb <= wmp->wm_end)) {
28330			break;
28331		}
28332	}
28333
28334	return (wmp);
28335}
28336
28337
28338/*
28339 *    Function: sd_free_inlist_wmap()
28340 *
28341 * Description: Unlink and free a write map struct.
28342 *
28343 *   Arguments: un      - sd_lun structure for the device.
28344 *		wmp	- sd_w_map which needs to be unlinked.
28345 */
28346
28347static void
28348sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp)
28349{
28350	ASSERT(un != NULL);
28351
28352	if (un->un_wm == wmp) {
28353		un->un_wm = wmp->wm_next;
28354	} else {
28355		wmp->wm_prev->wm_next = wmp->wm_next;
28356	}
28357
28358	if (wmp->wm_next) {
28359		wmp->wm_next->wm_prev = wmp->wm_prev;
28360	}
28361
28362	wmp->wm_next = wmp->wm_prev = NULL;
28363
28364	kmem_cache_free(un->un_wm_cache, wmp);
28365}
28366
28367
28368/*
28369 *    Function: sd_range_unlock()
28370 *
28371 * Description: Unlock the range locked by wm.
28372 *		Free write map if nobody else is waiting on it.
28373 *
28374 *   Arguments: un      - sd_lun structure for the device.
28375 *              wmp     - sd_w_map which needs to be unlinked.
28376 */
28377
28378static void
28379sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm)
28380{
28381	ASSERT(un != NULL);
28382	ASSERT(wm != NULL);
28383	ASSERT(!mutex_owned(SD_MUTEX(un)));
28384
28385	mutex_enter(SD_MUTEX(un));
28386
28387	if (wm->wm_flags & SD_WTYPE_RMW) {
28388		un->un_rmw_count--;
28389	}
28390
28391	if (wm->wm_wanted_count) {
28392		wm->wm_flags = 0;
28393		/*
28394		 * Broadcast that the wmap is available now.
28395		 */
28396		cv_broadcast(&wm->wm_avail);
28397	} else {
28398		/*
28399		 * If no one is waiting on the map, it should be free'ed.
28400		 */
28401		sd_free_inlist_wmap(un, wm);
28402	}
28403
28404	mutex_exit(SD_MUTEX(un));
28405}
28406
28407
28408/*
28409 *    Function: sd_read_modify_write_task
28410 *
28411 * Description: Called from a taskq thread to initiate the write phase of
28412 *		a read-modify-write request.  This is used for targets where
28413 *		un->un_sys_blocksize != un->un_tgt_blocksize.
28414 *
28415 *   Arguments: arg - a pointer to the buf(9S) struct for the write command.
28416 *
28417 *     Context: Called under taskq thread context.
28418 */
28419
28420static void
28421sd_read_modify_write_task(void *arg)
28422{
28423	struct sd_mapblocksize_info	*bsp;
28424	struct buf	*bp;
28425	struct sd_xbuf	*xp;
28426	struct sd_lun	*un;
28427
28428	bp = arg;	/* The bp is given in arg */
28429	ASSERT(bp != NULL);
28430
28431	/* Get the pointer to the layer-private data struct */
28432	xp = SD_GET_XBUF(bp);
28433	ASSERT(xp != NULL);
28434	bsp = xp->xb_private;
28435	ASSERT(bsp != NULL);
28436
28437	un = SD_GET_UN(bp);
28438	ASSERT(un != NULL);
28439	ASSERT(!mutex_owned(SD_MUTEX(un)));
28440
28441	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
28442	    "sd_read_modify_write_task: entry: buf:0x%p\n", bp);
28443
28444	/*
28445	 * This is the write phase of a read-modify-write request, called
28446	 * under the context of a taskq thread in response to the completion
28447	 * of the read portion of the rmw request completing under interrupt
28448	 * context. The write request must be sent from here down the iostart
28449	 * chain as if it were being sent from sd_mapblocksize_iostart(), so
28450	 * we use the layer index saved in the layer-private data area.
28451	 */
28452	SD_NEXT_IOSTART(bsp->mbs_layer_index, un, bp);
28453
28454	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
28455	    "sd_read_modify_write_task: exit: buf:0x%p\n", bp);
28456}
28457
28458
28459/*
28460 *    Function: sddump_do_read_of_rmw()
28461 *
28462 * Description: This routine will be called from sddump, If sddump is called
28463 *		with an I/O which not aligned on device blocksize boundary
28464 *		then the write has to be converted to read-modify-write.
28465 *		Do the read part here in order to keep sddump simple.
28466 *		Note - That the sd_mutex is held across the call to this
28467 *		routine.
28468 *
28469 *   Arguments: un	- sd_lun
28470 *		blkno	- block number in terms of media block size.
28471 *		nblk	- number of blocks.
28472 *		bpp	- pointer to pointer to the buf structure. On return
28473 *			from this function, *bpp points to the valid buffer
28474 *			to which the write has to be done.
28475 *
28476 * Return Code: 0 for success or errno-type return code
28477 */
28478
28479static int
28480sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
28481	struct buf **bpp)
28482{
28483	int err;
28484	int i;
28485	int rval;
28486	struct buf *bp;
28487	struct scsi_pkt *pkt = NULL;
28488	uint32_t target_blocksize;
28489
28490	ASSERT(un != NULL);
28491	ASSERT(mutex_owned(SD_MUTEX(un)));
28492
28493	target_blocksize = un->un_tgt_blocksize;
28494
28495	mutex_exit(SD_MUTEX(un));
28496
28497	bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), (struct buf *)NULL,
28498	    (size_t)(nblk * target_blocksize), B_READ, NULL_FUNC, NULL);
28499	if (bp == NULL) {
28500		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28501		    "no resources for dumping; giving up");
28502		err = ENOMEM;
28503		goto done;
28504	}
28505
28506	rval = sd_setup_rw_pkt(un, &pkt, bp, 0, NULL_FUNC, NULL,
28507	    blkno, nblk);
28508	if (rval != 0) {
28509		scsi_free_consistent_buf(bp);
28510		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28511		    "no resources for dumping; giving up");
28512		err = ENOMEM;
28513		goto done;
28514	}
28515
28516	pkt->pkt_flags |= FLAG_NOINTR;
28517
28518	err = EIO;
28519	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
28520
28521		/*
28522		 * Scsi_poll returns 0 (success) if the command completes and
28523		 * the status block is STATUS_GOOD.  We should only check
28524		 * errors if this condition is not true.  Even then we should
28525		 * send our own request sense packet only if we have a check
28526		 * condition and auto request sense has not been performed by
28527		 * the hba.
28528		 */
28529		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending read\n");
28530
28531		if ((sd_scsi_poll(un, pkt) == 0) && (pkt->pkt_resid == 0)) {
28532			err = 0;
28533			break;
28534		}
28535
28536		/*
28537		 * Check CMD_DEV_GONE 1st, give up if device is gone,
28538		 * no need to read RQS data.
28539		 */
28540		if (pkt->pkt_reason == CMD_DEV_GONE) {
28541			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28542			    "Error while dumping state with rmw..."
28543			    "Device is gone\n");
28544			break;
28545		}
28546
28547		if (SD_GET_PKT_STATUS(pkt) == STATUS_CHECK) {
28548			SD_INFO(SD_LOG_DUMP, un,
28549			    "sddump: read failed with CHECK, try # %d\n", i);
28550			if (((pkt->pkt_state & STATE_ARQ_DONE) == 0)) {
28551				(void) sd_send_polled_RQS(un);
28552			}
28553
28554			continue;
28555		}
28556
28557		if (SD_GET_PKT_STATUS(pkt) == STATUS_BUSY) {
28558			int reset_retval = 0;
28559
28560			SD_INFO(SD_LOG_DUMP, un,
28561			    "sddump: read failed with BUSY, try # %d\n", i);
28562
28563			if (un->un_f_lun_reset_enabled == TRUE) {
28564				reset_retval = scsi_reset(SD_ADDRESS(un),
28565				    RESET_LUN);
28566			}
28567			if (reset_retval == 0) {
28568				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
28569			}
28570			(void) sd_send_polled_RQS(un);
28571
28572		} else {
28573			SD_INFO(SD_LOG_DUMP, un,
28574			    "sddump: read failed with 0x%x, try # %d\n",
28575			    SD_GET_PKT_STATUS(pkt), i);
28576			mutex_enter(SD_MUTEX(un));
28577			sd_reset_target(un, pkt);
28578			mutex_exit(SD_MUTEX(un));
28579		}
28580
28581		/*
28582		 * If we are not getting anywhere with lun/target resets,
28583		 * let's reset the bus.
28584		 */
28585		if (i > SD_NDUMP_RETRIES/2) {
28586			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
28587			(void) sd_send_polled_RQS(un);
28588		}
28589
28590	}
28591	scsi_destroy_pkt(pkt);
28592
28593	if (err != 0) {
28594		scsi_free_consistent_buf(bp);
28595		*bpp = NULL;
28596	} else {
28597		*bpp = bp;
28598	}
28599
28600done:
28601	mutex_enter(SD_MUTEX(un));
28602	return (err);
28603}
28604
28605
28606/*
28607 *    Function: sd_failfast_flushq
28608 *
28609 * Description: Take all bp's on the wait queue that have B_FAILFAST set
28610 *		in b_flags and move them onto the failfast queue, then kick
28611 *		off a thread to return all bp's on the failfast queue to
28612 *		their owners with an error set.
28613 *
28614 *   Arguments: un - pointer to the soft state struct for the instance.
28615 *
28616 *     Context: may execute in interrupt context.
28617 */
28618
28619static void
28620sd_failfast_flushq(struct sd_lun *un)
28621{
28622	struct buf *bp;
28623	struct buf *next_waitq_bp;
28624	struct buf *prev_waitq_bp = NULL;
28625
28626	ASSERT(un != NULL);
28627	ASSERT(mutex_owned(SD_MUTEX(un)));
28628	ASSERT(un->un_failfast_state == SD_FAILFAST_ACTIVE);
28629	ASSERT(un->un_failfast_bp == NULL);
28630
28631	SD_TRACE(SD_LOG_IO_FAILFAST, un,
28632	    "sd_failfast_flushq: entry: un:0x%p\n", un);
28633
28634	/*
28635	 * Check if we should flush all bufs when entering failfast state, or
28636	 * just those with B_FAILFAST set.
28637	 */
28638	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) {
28639		/*
28640		 * Move *all* bp's on the wait queue to the failfast flush
28641		 * queue, including those that do NOT have B_FAILFAST set.
28642		 */
28643		if (un->un_failfast_headp == NULL) {
28644			ASSERT(un->un_failfast_tailp == NULL);
28645			un->un_failfast_headp = un->un_waitq_headp;
28646		} else {
28647			ASSERT(un->un_failfast_tailp != NULL);
28648			un->un_failfast_tailp->av_forw = un->un_waitq_headp;
28649		}
28650
28651		un->un_failfast_tailp = un->un_waitq_tailp;
28652
28653		/* update kstat for each bp moved out of the waitq */
28654		for (bp = un->un_waitq_headp; bp != NULL; bp = bp->av_forw) {
28655			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
28656		}
28657
28658		/* empty the waitq */
28659		un->un_waitq_headp = un->un_waitq_tailp = NULL;
28660
28661	} else {
28662		/*
28663		 * Go thru the wait queue, pick off all entries with
28664		 * B_FAILFAST set, and move these onto the failfast queue.
28665		 */
28666		for (bp = un->un_waitq_headp; bp != NULL; bp = next_waitq_bp) {
28667			/*
28668			 * Save the pointer to the next bp on the wait queue,
28669			 * so we get to it on the next iteration of this loop.
28670			 */
28671			next_waitq_bp = bp->av_forw;
28672
28673			/*
28674			 * If this bp from the wait queue does NOT have
28675			 * B_FAILFAST set, just move on to the next element
28676			 * in the wait queue. Note, this is the only place
28677			 * where it is correct to set prev_waitq_bp.
28678			 */
28679			if ((bp->b_flags & B_FAILFAST) == 0) {
28680				prev_waitq_bp = bp;
28681				continue;
28682			}
28683
28684			/*
28685			 * Remove the bp from the wait queue.
28686			 */
28687			if (bp == un->un_waitq_headp) {
28688				/* The bp is the first element of the waitq. */
28689				un->un_waitq_headp = next_waitq_bp;
28690				if (un->un_waitq_headp == NULL) {
28691					/* The wait queue is now empty */
28692					un->un_waitq_tailp = NULL;
28693				}
28694			} else {
28695				/*
28696				 * The bp is either somewhere in the middle
28697				 * or at the end of the wait queue.
28698				 */
28699				ASSERT(un->un_waitq_headp != NULL);
28700				ASSERT(prev_waitq_bp != NULL);
28701				ASSERT((prev_waitq_bp->b_flags & B_FAILFAST)
28702				    == 0);
28703				if (bp == un->un_waitq_tailp) {
28704					/* bp is the last entry on the waitq. */
28705					ASSERT(next_waitq_bp == NULL);
28706					un->un_waitq_tailp = prev_waitq_bp;
28707				}
28708				prev_waitq_bp->av_forw = next_waitq_bp;
28709			}
28710			bp->av_forw = NULL;
28711
28712			/*
28713			 * update kstat since the bp is moved out of
28714			 * the waitq
28715			 */
28716			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
28717
28718			/*
28719			 * Now put the bp onto the failfast queue.
28720			 */
28721			if (un->un_failfast_headp == NULL) {
28722				/* failfast queue is currently empty */
28723				ASSERT(un->un_failfast_tailp == NULL);
28724				un->un_failfast_headp =
28725				    un->un_failfast_tailp = bp;
28726			} else {
28727				/* Add the bp to the end of the failfast q */
28728				ASSERT(un->un_failfast_tailp != NULL);
28729				ASSERT(un->un_failfast_tailp->b_flags &
28730				    B_FAILFAST);
28731				un->un_failfast_tailp->av_forw = bp;
28732				un->un_failfast_tailp = bp;
28733			}
28734		}
28735	}
28736
28737	/*
28738	 * Now return all bp's on the failfast queue to their owners.
28739	 */
28740	while ((bp = un->un_failfast_headp) != NULL) {
28741
28742		un->un_failfast_headp = bp->av_forw;
28743		if (un->un_failfast_headp == NULL) {
28744			un->un_failfast_tailp = NULL;
28745		}
28746
28747		/*
28748		 * We want to return the bp with a failure error code, but
28749		 * we do not want a call to sd_start_cmds() to occur here,
28750		 * so use sd_return_failed_command_no_restart() instead of
28751		 * sd_return_failed_command().
28752		 */
28753		sd_return_failed_command_no_restart(un, bp, EIO);
28754	}
28755
28756	/* Flush the xbuf queues if required. */
28757	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_QUEUES) {
28758		ddi_xbuf_flushq(un->un_xbuf_attr, sd_failfast_flushq_callback);
28759	}
28760
28761	SD_TRACE(SD_LOG_IO_FAILFAST, un,
28762	    "sd_failfast_flushq: exit: un:0x%p\n", un);
28763}
28764
28765
28766/*
28767 *    Function: sd_failfast_flushq_callback
28768 *
28769 * Description: Return TRUE if the given bp meets the criteria for failfast
28770 *		flushing. Used with ddi_xbuf_flushq(9F).
28771 *
28772 *   Arguments: bp - ptr to buf struct to be examined.
28773 *
28774 *     Context: Any
28775 */
28776
28777static int
28778sd_failfast_flushq_callback(struct buf *bp)
28779{
28780	/*
28781	 * Return TRUE if (1) we want to flush ALL bufs when the failfast
28782	 * state is entered; OR (2) the given bp has B_FAILFAST set.
28783	 */
28784	return (((sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) ||
28785	    (bp->b_flags & B_FAILFAST)) ? TRUE : FALSE);
28786}
28787
28788
28789
28790/*
28791 * Function: sd_setup_next_xfer
28792 *
28793 * Description: Prepare next I/O operation using DMA_PARTIAL
28794 *
28795 */
28796
28797static int
28798sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
28799    struct scsi_pkt *pkt, struct sd_xbuf *xp)
28800{
28801	ssize_t	num_blks_not_xfered;
28802	daddr_t	strt_blk_num;
28803	ssize_t	bytes_not_xfered;
28804	int	rval;
28805
28806	ASSERT(pkt->pkt_resid == 0);
28807
28808	/*
28809	 * Calculate next block number and amount to be transferred.
28810	 *
28811	 * How much data NOT transfered to the HBA yet.
28812	 */
28813	bytes_not_xfered = xp->xb_dma_resid;
28814
28815	/*
28816	 * figure how many blocks NOT transfered to the HBA yet.
28817	 */
28818	num_blks_not_xfered = SD_BYTES2TGTBLOCKS(un, bytes_not_xfered);
28819
28820	/*
28821	 * set starting block number to the end of what WAS transfered.
28822	 */
28823	strt_blk_num = xp->xb_blkno +
28824	    SD_BYTES2TGTBLOCKS(un, bp->b_bcount - bytes_not_xfered);
28825
28826	/*
28827	 * Move pkt to the next portion of the xfer.  sd_setup_next_rw_pkt
28828	 * will call scsi_initpkt with NULL_FUNC so we do not have to release
28829	 * the disk mutex here.
28830	 */
28831	rval = sd_setup_next_rw_pkt(un, pkt, bp,
28832	    strt_blk_num, num_blks_not_xfered);
28833
28834	if (rval == 0) {
28835
28836		/*
28837		 * Success.
28838		 *
28839		 * Adjust things if there are still more blocks to be
28840		 * transfered.
28841		 */
28842		xp->xb_dma_resid = pkt->pkt_resid;
28843		pkt->pkt_resid = 0;
28844
28845		return (1);
28846	}
28847
28848	/*
28849	 * There's really only one possible return value from
28850	 * sd_setup_next_rw_pkt which occurs when scsi_init_pkt
28851	 * returns NULL.
28852	 */
28853	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
28854
28855	bp->b_resid = bp->b_bcount;
28856	bp->b_flags |= B_ERROR;
28857
28858	scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28859	    "Error setting up next portion of DMA transfer\n");
28860
28861	return (0);
28862}
28863
28864/*
28865 *    Function: sd_panic_for_res_conflict
28866 *
28867 * Description: Call panic with a string formatted with "Reservation Conflict"
28868 *		and a human readable identifier indicating the SD instance
28869 *		that experienced the reservation conflict.
28870 *
28871 *   Arguments: un - pointer to the soft state struct for the instance.
28872 *
28873 *     Context: may execute in interrupt context.
28874 */
28875
28876#define	SD_RESV_CONFLICT_FMT_LEN 40
28877void
28878sd_panic_for_res_conflict(struct sd_lun *un)
28879{
28880	char panic_str[SD_RESV_CONFLICT_FMT_LEN+MAXPATHLEN];
28881	char path_str[MAXPATHLEN];
28882
28883	(void) snprintf(panic_str, sizeof (panic_str),
28884	    "Reservation Conflict\nDisk: %s",
28885	    ddi_pathname(SD_DEVINFO(un), path_str));
28886
28887	panic(panic_str);
28888}
28889
28890/*
28891 * Note: The following sd_faultinjection_ioctl( ) routines implement
28892 * driver support for handling fault injection for error analysis
28893 * causing faults in multiple layers of the driver.
28894 *
28895 */
28896
28897#ifdef SD_FAULT_INJECTION
28898static uint_t   sd_fault_injection_on = 0;
28899
28900/*
28901 *    Function: sd_faultinjection_ioctl()
28902 *
28903 * Description: This routine is the driver entry point for handling
28904 *              faultinjection ioctls to inject errors into the
28905 *              layer model
28906 *
28907 *   Arguments: cmd	- the ioctl cmd received
28908 *		arg	- the arguments from user and returns
28909 */
28910
28911static void
28912sd_faultinjection_ioctl(int cmd, intptr_t arg,  struct sd_lun *un) {
28913
28914	uint_t i = 0;
28915	uint_t rval;
28916
28917	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl: entry\n");
28918
28919	mutex_enter(SD_MUTEX(un));
28920
28921	switch (cmd) {
28922	case SDIOCRUN:
28923		/* Allow pushed faults to be injected */
28924		SD_INFO(SD_LOG_SDTEST, un,
28925		    "sd_faultinjection_ioctl: Injecting Fault Run\n");
28926
28927		sd_fault_injection_on = 1;
28928
28929		SD_INFO(SD_LOG_IOERR, un,
28930		    "sd_faultinjection_ioctl: run finished\n");
28931		break;
28932
28933	case SDIOCSTART:
28934		/* Start Injection Session */
28935		SD_INFO(SD_LOG_SDTEST, un,
28936		    "sd_faultinjection_ioctl: Injecting Fault Start\n");
28937
28938		sd_fault_injection_on = 0;
28939		un->sd_injection_mask = 0xFFFFFFFF;
28940		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
28941			un->sd_fi_fifo_pkt[i] = NULL;
28942			un->sd_fi_fifo_xb[i] = NULL;
28943			un->sd_fi_fifo_un[i] = NULL;
28944			un->sd_fi_fifo_arq[i] = NULL;
28945		}
28946		un->sd_fi_fifo_start = 0;
28947		un->sd_fi_fifo_end = 0;
28948
28949		mutex_enter(&(un->un_fi_mutex));
28950		un->sd_fi_log[0] = '\0';
28951		un->sd_fi_buf_len = 0;
28952		mutex_exit(&(un->un_fi_mutex));
28953
28954		SD_INFO(SD_LOG_IOERR, un,
28955		    "sd_faultinjection_ioctl: start finished\n");
28956		break;
28957
28958	case SDIOCSTOP:
28959		/* Stop Injection Session */
28960		SD_INFO(SD_LOG_SDTEST, un,
28961		    "sd_faultinjection_ioctl: Injecting Fault Stop\n");
28962		sd_fault_injection_on = 0;
28963		un->sd_injection_mask = 0x0;
28964
28965		/* Empty stray or unuseds structs from fifo */
28966		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
28967			if (un->sd_fi_fifo_pkt[i] != NULL) {
28968				kmem_free(un->sd_fi_fifo_pkt[i],
28969				    sizeof (struct sd_fi_pkt));
28970			}
28971			if (un->sd_fi_fifo_xb[i] != NULL) {
28972				kmem_free(un->sd_fi_fifo_xb[i],
28973				    sizeof (struct sd_fi_xb));
28974			}
28975			if (un->sd_fi_fifo_un[i] != NULL) {
28976				kmem_free(un->sd_fi_fifo_un[i],
28977				    sizeof (struct sd_fi_un));
28978			}
28979			if (un->sd_fi_fifo_arq[i] != NULL) {
28980				kmem_free(un->sd_fi_fifo_arq[i],
28981				    sizeof (struct sd_fi_arq));
28982			}
28983			un->sd_fi_fifo_pkt[i] = NULL;
28984			un->sd_fi_fifo_un[i] = NULL;
28985			un->sd_fi_fifo_xb[i] = NULL;
28986			un->sd_fi_fifo_arq[i] = NULL;
28987		}
28988		un->sd_fi_fifo_start = 0;
28989		un->sd_fi_fifo_end = 0;
28990
28991		SD_INFO(SD_LOG_IOERR, un,
28992		    "sd_faultinjection_ioctl: stop finished\n");
28993		break;
28994
28995	case SDIOCINSERTPKT:
28996		/* Store a packet struct to be pushed onto fifo */
28997		SD_INFO(SD_LOG_SDTEST, un,
28998		    "sd_faultinjection_ioctl: Injecting Fault Insert Pkt\n");
28999
29000		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
29001
29002		sd_fault_injection_on = 0;
29003
29004		/* No more that SD_FI_MAX_ERROR allowed in Queue */
29005		if (un->sd_fi_fifo_pkt[i] != NULL) {
29006			kmem_free(un->sd_fi_fifo_pkt[i],
29007			    sizeof (struct sd_fi_pkt));
29008		}
29009		if (arg != NULL) {
29010			un->sd_fi_fifo_pkt[i] =
29011			    kmem_alloc(sizeof (struct sd_fi_pkt), KM_NOSLEEP);
29012			if (un->sd_fi_fifo_pkt[i] == NULL) {
29013				/* Alloc failed don't store anything */
29014				break;
29015			}
29016			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_pkt[i],
29017			    sizeof (struct sd_fi_pkt), 0);
29018			if (rval == -1) {
29019				kmem_free(un->sd_fi_fifo_pkt[i],
29020				    sizeof (struct sd_fi_pkt));
29021				un->sd_fi_fifo_pkt[i] = NULL;
29022			}
29023		} else {
29024			SD_INFO(SD_LOG_IOERR, un,
29025			    "sd_faultinjection_ioctl: pkt null\n");
29026		}
29027		break;
29028
29029	case SDIOCINSERTXB:
29030		/* Store a xb struct to be pushed onto fifo */
29031		SD_INFO(SD_LOG_SDTEST, un,
29032		    "sd_faultinjection_ioctl: Injecting Fault Insert XB\n");
29033
29034		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
29035
29036		sd_fault_injection_on = 0;
29037
29038		if (un->sd_fi_fifo_xb[i] != NULL) {
29039			kmem_free(un->sd_fi_fifo_xb[i],
29040			    sizeof (struct sd_fi_xb));
29041			un->sd_fi_fifo_xb[i] = NULL;
29042		}
29043		if (arg != NULL) {
29044			un->sd_fi_fifo_xb[i] =
29045			    kmem_alloc(sizeof (struct sd_fi_xb), KM_NOSLEEP);
29046			if (un->sd_fi_fifo_xb[i] == NULL) {
29047				/* Alloc failed don't store anything */
29048				break;
29049			}
29050			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_xb[i],
29051			    sizeof (struct sd_fi_xb), 0);
29052
29053			if (rval == -1) {
29054				kmem_free(un->sd_fi_fifo_xb[i],
29055				    sizeof (struct sd_fi_xb));
29056				un->sd_fi_fifo_xb[i] = NULL;
29057			}
29058		} else {
29059			SD_INFO(SD_LOG_IOERR, un,
29060			    "sd_faultinjection_ioctl: xb null\n");
29061		}
29062		break;
29063
29064	case SDIOCINSERTUN:
29065		/* Store a un struct to be pushed onto fifo */
29066		SD_INFO(SD_LOG_SDTEST, un,
29067		    "sd_faultinjection_ioctl: Injecting Fault Insert UN\n");
29068
29069		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
29070
29071		sd_fault_injection_on = 0;
29072
29073		if (un->sd_fi_fifo_un[i] != NULL) {
29074			kmem_free(un->sd_fi_fifo_un[i],
29075			    sizeof (struct sd_fi_un));
29076			un->sd_fi_fifo_un[i] = NULL;
29077		}
29078		if (arg != NULL) {
29079			un->sd_fi_fifo_un[i] =
29080			    kmem_alloc(sizeof (struct sd_fi_un), KM_NOSLEEP);
29081			if (un->sd_fi_fifo_un[i] == NULL) {
29082				/* Alloc failed don't store anything */
29083				break;
29084			}
29085			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_un[i],
29086			    sizeof (struct sd_fi_un), 0);
29087			if (rval == -1) {
29088				kmem_free(un->sd_fi_fifo_un[i],
29089				    sizeof (struct sd_fi_un));
29090				un->sd_fi_fifo_un[i] = NULL;
29091			}
29092
29093		} else {
29094			SD_INFO(SD_LOG_IOERR, un,
29095			    "sd_faultinjection_ioctl: un null\n");
29096		}
29097
29098		break;
29099
29100	case SDIOCINSERTARQ:
29101		/* Store a arq struct to be pushed onto fifo */
29102		SD_INFO(SD_LOG_SDTEST, un,
29103		    "sd_faultinjection_ioctl: Injecting Fault Insert ARQ\n");
29104		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
29105
29106		sd_fault_injection_on = 0;
29107
29108		if (un->sd_fi_fifo_arq[i] != NULL) {
29109			kmem_free(un->sd_fi_fifo_arq[i],
29110			    sizeof (struct sd_fi_arq));
29111			un->sd_fi_fifo_arq[i] = NULL;
29112		}
29113		if (arg != NULL) {
29114			un->sd_fi_fifo_arq[i] =
29115			    kmem_alloc(sizeof (struct sd_fi_arq), KM_NOSLEEP);
29116			if (un->sd_fi_fifo_arq[i] == NULL) {
29117				/* Alloc failed don't store anything */
29118				break;
29119			}
29120			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_arq[i],
29121			    sizeof (struct sd_fi_arq), 0);
29122			if (rval == -1) {
29123				kmem_free(un->sd_fi_fifo_arq[i],
29124				    sizeof (struct sd_fi_arq));
29125				un->sd_fi_fifo_arq[i] = NULL;
29126			}
29127
29128		} else {
29129			SD_INFO(SD_LOG_IOERR, un,
29130			    "sd_faultinjection_ioctl: arq null\n");
29131		}
29132
29133		break;
29134
29135	case SDIOCPUSH:
29136		/* Push stored xb, pkt, un, and arq onto fifo */
29137		sd_fault_injection_on = 0;
29138
29139		if (arg != NULL) {
29140			rval = ddi_copyin((void *)arg, &i, sizeof (uint_t), 0);
29141			if (rval != -1 &&
29142			    un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
29143				un->sd_fi_fifo_end += i;
29144			}
29145		} else {
29146			SD_INFO(SD_LOG_IOERR, un,
29147			    "sd_faultinjection_ioctl: push arg null\n");
29148			if (un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
29149				un->sd_fi_fifo_end++;
29150			}
29151		}
29152		SD_INFO(SD_LOG_IOERR, un,
29153		    "sd_faultinjection_ioctl: push to end=%d\n",
29154		    un->sd_fi_fifo_end);
29155		break;
29156
29157	case SDIOCRETRIEVE:
29158		/* Return buffer of log from Injection session */
29159		SD_INFO(SD_LOG_SDTEST, un,
29160		    "sd_faultinjection_ioctl: Injecting Fault Retreive");
29161
29162		sd_fault_injection_on = 0;
29163
29164		mutex_enter(&(un->un_fi_mutex));
29165		rval = ddi_copyout(un->sd_fi_log, (void *)arg,
29166		    un->sd_fi_buf_len+1, 0);
29167		mutex_exit(&(un->un_fi_mutex));
29168
29169		if (rval == -1) {
29170			/*
29171			 * arg is possibly invalid setting
29172			 * it to NULL for return
29173			 */
29174			arg = NULL;
29175		}
29176		break;
29177	}
29178
29179	mutex_exit(SD_MUTEX(un));
29180	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl:"
29181			    " exit\n");
29182}
29183
29184
29185/*
29186 *    Function: sd_injection_log()
29187 *
29188 * Description: This routine adds buff to the already existing injection log
29189 *              for retrieval via faultinjection_ioctl for use in fault
29190 *              detection and recovery
29191 *
29192 *   Arguments: buf - the string to add to the log
29193 */
29194
29195static void
29196sd_injection_log(char *buf, struct sd_lun *un)
29197{
29198	uint_t len;
29199
29200	ASSERT(un != NULL);
29201	ASSERT(buf != NULL);
29202
29203	mutex_enter(&(un->un_fi_mutex));
29204
29205	len = min(strlen(buf), 255);
29206	/* Add logged value to Injection log to be returned later */
29207	if (len + un->sd_fi_buf_len < SD_FI_MAX_BUF) {
29208		uint_t	offset = strlen((char *)un->sd_fi_log);
29209		char *destp = (char *)un->sd_fi_log + offset;
29210		int i;
29211		for (i = 0; i < len; i++) {
29212			*destp++ = *buf++;
29213		}
29214		un->sd_fi_buf_len += len;
29215		un->sd_fi_log[un->sd_fi_buf_len] = '\0';
29216	}
29217
29218	mutex_exit(&(un->un_fi_mutex));
29219}
29220
29221
29222/*
29223 *    Function: sd_faultinjection()
29224 *
29225 * Description: This routine takes the pkt and changes its
29226 *		content based on error injection scenerio.
29227 *
29228 *   Arguments: pktp	- packet to be changed
29229 */
29230
29231static void
29232sd_faultinjection(struct scsi_pkt *pktp)
29233{
29234	uint_t i;
29235	struct sd_fi_pkt *fi_pkt;
29236	struct sd_fi_xb *fi_xb;
29237	struct sd_fi_un *fi_un;
29238	struct sd_fi_arq *fi_arq;
29239	struct buf *bp;
29240	struct sd_xbuf *xb;
29241	struct sd_lun *un;
29242
29243	ASSERT(pktp != NULL);
29244
29245	/* pull bp xb and un from pktp */
29246	bp = (struct buf *)pktp->pkt_private;
29247	xb = SD_GET_XBUF(bp);
29248	un = SD_GET_UN(bp);
29249
29250	ASSERT(un != NULL);
29251
29252	mutex_enter(SD_MUTEX(un));
29253
29254	SD_TRACE(SD_LOG_SDTEST, un,
29255	    "sd_faultinjection: entry Injection from sdintr\n");
29256
29257	/* if injection is off return */
29258	if (sd_fault_injection_on == 0 ||
29259	    un->sd_fi_fifo_start == un->sd_fi_fifo_end) {
29260		mutex_exit(SD_MUTEX(un));
29261		return;
29262	}
29263
29264	SD_INFO(SD_LOG_SDTEST, un,
29265	    "sd_faultinjection: is working for copying\n");
29266
29267	/* take next set off fifo */
29268	i = un->sd_fi_fifo_start % SD_FI_MAX_ERROR;
29269
29270	fi_pkt = un->sd_fi_fifo_pkt[i];
29271	fi_xb = un->sd_fi_fifo_xb[i];
29272	fi_un = un->sd_fi_fifo_un[i];
29273	fi_arq = un->sd_fi_fifo_arq[i];
29274
29275
29276	/* set variables accordingly */
29277	/* set pkt if it was on fifo */
29278	if (fi_pkt != NULL) {
29279		SD_CONDSET(pktp, pkt, pkt_flags, "pkt_flags");
29280		SD_CONDSET(*pktp, pkt, pkt_scbp, "pkt_scbp");
29281		if (fi_pkt->pkt_cdbp != 0xff)
29282			SD_CONDSET(*pktp, pkt, pkt_cdbp, "pkt_cdbp");
29283		SD_CONDSET(pktp, pkt, pkt_state, "pkt_state");
29284		SD_CONDSET(pktp, pkt, pkt_statistics, "pkt_statistics");
29285		SD_CONDSET(pktp, pkt, pkt_reason, "pkt_reason");
29286
29287	}
29288	/* set xb if it was on fifo */
29289	if (fi_xb != NULL) {
29290		SD_CONDSET(xb, xb, xb_blkno, "xb_blkno");
29291		SD_CONDSET(xb, xb, xb_dma_resid, "xb_dma_resid");
29292		if (fi_xb->xb_retry_count != 0)
29293			SD_CONDSET(xb, xb, xb_retry_count, "xb_retry_count");
29294		SD_CONDSET(xb, xb, xb_victim_retry_count,
29295		    "xb_victim_retry_count");
29296		SD_CONDSET(xb, xb, xb_sense_status, "xb_sense_status");
29297		SD_CONDSET(xb, xb, xb_sense_state, "xb_sense_state");
29298		SD_CONDSET(xb, xb, xb_sense_resid, "xb_sense_resid");
29299
29300		/* copy in block data from sense */
29301		/*
29302		 * if (fi_xb->xb_sense_data[0] != -1) {
29303		 *	bcopy(fi_xb->xb_sense_data, xb->xb_sense_data,
29304		 *	SENSE_LENGTH);
29305		 * }
29306		 */
29307		bcopy(fi_xb->xb_sense_data, xb->xb_sense_data, SENSE_LENGTH);
29308
29309		/* copy in extended sense codes */
29310		SD_CONDSET(((struct scsi_extended_sense *)xb->xb_sense_data),
29311		    xb, es_code, "es_code");
29312		SD_CONDSET(((struct scsi_extended_sense *)xb->xb_sense_data),
29313		    xb, es_key, "es_key");
29314		SD_CONDSET(((struct scsi_extended_sense *)xb->xb_sense_data),
29315		    xb, es_add_code, "es_add_code");
29316		SD_CONDSET(((struct scsi_extended_sense *)xb->xb_sense_data),
29317		    xb, es_qual_code, "es_qual_code");
29318		struct scsi_extended_sense *esp;
29319		esp = (struct scsi_extended_sense *)xb->xb_sense_data;
29320		esp->es_class = CLASS_EXTENDED_SENSE;
29321	}
29322
29323	/* set un if it was on fifo */
29324	if (fi_un != NULL) {
29325		SD_CONDSET(un->un_sd->sd_inq, un, inq_rmb, "inq_rmb");
29326		SD_CONDSET(un, un, un_ctype, "un_ctype");
29327		SD_CONDSET(un, un, un_reset_retry_count,
29328		    "un_reset_retry_count");
29329		SD_CONDSET(un, un, un_reservation_type, "un_reservation_type");
29330		SD_CONDSET(un, un, un_resvd_status, "un_resvd_status");
29331		SD_CONDSET(un, un, un_f_arq_enabled, "un_f_arq_enabled");
29332		SD_CONDSET(un, un, un_f_allow_bus_device_reset,
29333		    "un_f_allow_bus_device_reset");
29334		SD_CONDSET(un, un, un_f_opt_queueing, "un_f_opt_queueing");
29335
29336	}
29337
29338	/* copy in auto request sense if it was on fifo */
29339	if (fi_arq != NULL) {
29340		bcopy(fi_arq, pktp->pkt_scbp, sizeof (struct sd_fi_arq));
29341	}
29342
29343	/* free structs */
29344	if (un->sd_fi_fifo_pkt[i] != NULL) {
29345		kmem_free(un->sd_fi_fifo_pkt[i], sizeof (struct sd_fi_pkt));
29346	}
29347	if (un->sd_fi_fifo_xb[i] != NULL) {
29348		kmem_free(un->sd_fi_fifo_xb[i], sizeof (struct sd_fi_xb));
29349	}
29350	if (un->sd_fi_fifo_un[i] != NULL) {
29351		kmem_free(un->sd_fi_fifo_un[i], sizeof (struct sd_fi_un));
29352	}
29353	if (un->sd_fi_fifo_arq[i] != NULL) {
29354		kmem_free(un->sd_fi_fifo_arq[i], sizeof (struct sd_fi_arq));
29355	}
29356
29357	/*
29358	 * kmem_free does not gurantee to set to NULL
29359	 * since we uses these to determine if we set
29360	 * values or not lets confirm they are always
29361	 * NULL after free
29362	 */
29363	un->sd_fi_fifo_pkt[i] = NULL;
29364	un->sd_fi_fifo_un[i] = NULL;
29365	un->sd_fi_fifo_xb[i] = NULL;
29366	un->sd_fi_fifo_arq[i] = NULL;
29367
29368	un->sd_fi_fifo_start++;
29369
29370	mutex_exit(SD_MUTEX(un));
29371
29372	SD_INFO(SD_LOG_SDTEST, un, "sd_faultinjection: exit\n");
29373}
29374
29375#endif /* SD_FAULT_INJECTION */
29376
29377/*
29378 * This routine is invoked in sd_unit_attach(). Before calling it, the
29379 * properties in conf file should be processed already, and "hotpluggable"
29380 * property was processed also.
29381 *
29382 * The sd driver distinguishes 3 different type of devices: removable media,
29383 * non-removable media, and hotpluggable. Below the differences are defined:
29384 *
29385 * 1. Device ID
29386 *
29387 *     The device ID of a device is used to identify this device. Refer to
29388 *     ddi_devid_register(9F).
29389 *
29390 *     For a non-removable media disk device which can provide 0x80 or 0x83
29391 *     VPD page (refer to INQUIRY command of SCSI SPC specification), a unique
29392 *     device ID is created to identify this device. For other non-removable
29393 *     media devices, a default device ID is created only if this device has
29394 *     at least 2 alter cylinders. Otherwise, this device has no devid.
29395 *
29396 *     -------------------------------------------------------
29397 *     removable media   hotpluggable  | Can Have Device ID
29398 *     -------------------------------------------------------
29399 *         false             false     |     Yes
29400 *         false             true      |     Yes
29401 *         true                x       |     No
29402 *     ------------------------------------------------------
29403 *
29404 *
29405 * 2. SCSI group 4 commands
29406 *
29407 *     In SCSI specs, only some commands in group 4 command set can use
29408 *     8-byte addresses that can be used to access >2TB storage spaces.
29409 *     Other commands have no such capability. Without supporting group4,
29410 *     it is impossible to make full use of storage spaces of a disk with
29411 *     capacity larger than 2TB.
29412 *
29413 *     -----------------------------------------------
29414 *     removable media   hotpluggable   LP64  |  Group
29415 *     -----------------------------------------------
29416 *           false          false       false |   1
29417 *           false          false       true  |   4
29418 *           false          true        false |   1
29419 *           false          true        true  |   4
29420 *           true             x           x   |   5
29421 *     -----------------------------------------------
29422 *
29423 *
29424 * 3. Check for VTOC Label
29425 *
29426 *     If a direct-access disk has no EFI label, sd will check if it has a
29427 *     valid VTOC label. Now, sd also does that check for removable media
29428 *     and hotpluggable devices.
29429 *
29430 *     --------------------------------------------------------------
29431 *     Direct-Access   removable media    hotpluggable |  Check Label
29432 *     -------------------------------------------------------------
29433 *         false          false           false        |   No
29434 *         false          false           true         |   No
29435 *         false          true            false        |   Yes
29436 *         false          true            true         |   Yes
29437 *         true            x                x          |   Yes
29438 *     --------------------------------------------------------------
29439 *
29440 *
29441 * 4. Building default VTOC label
29442 *
29443 *     As section 3 says, sd checks if some kinds of devices have VTOC label.
29444 *     If those devices have no valid VTOC label, sd(7d) will attempt to
29445 *     create default VTOC for them. Currently sd creates default VTOC label
29446 *     for all devices on x86 platform (VTOC_16), but only for removable
29447 *     media devices on SPARC (VTOC_8).
29448 *
29449 *     -----------------------------------------------------------
29450 *       removable media hotpluggable platform   |   Default Label
29451 *     -----------------------------------------------------------
29452 *             false          false    sparc     |     No
29453 *             false          true      x86      |     Yes
29454 *             false          true     sparc     |     Yes
29455 *             true             x        x       |     Yes
29456 *     ----------------------------------------------------------
29457 *
29458 *
29459 * 5. Supported blocksizes of target devices
29460 *
29461 *     Sd supports non-512-byte blocksize for removable media devices only.
29462 *     For other devices, only 512-byte blocksize is supported. This may be
29463 *     changed in near future because some RAID devices require non-512-byte
29464 *     blocksize
29465 *
29466 *     -----------------------------------------------------------
29467 *     removable media    hotpluggable    | non-512-byte blocksize
29468 *     -----------------------------------------------------------
29469 *           false          false         |   No
29470 *           false          true          |   No
29471 *           true             x           |   Yes
29472 *     -----------------------------------------------------------
29473 *
29474 *
29475 * 6. Automatic mount & unmount
29476 *
29477 *     Sd(7d) driver provides DKIOCREMOVABLE ioctl. This ioctl is used to query
29478 *     if a device is removable media device. It return 1 for removable media
29479 *     devices, and 0 for others.
29480 *
29481 *     The automatic mounting subsystem should distinguish between the types
29482 *     of devices and apply automounting policies to each.
29483 *
29484 *
29485 * 7. fdisk partition management
29486 *
29487 *     Fdisk is traditional partition method on x86 platform. Sd(7d) driver
29488 *     just supports fdisk partitions on x86 platform. On sparc platform, sd
29489 *     doesn't support fdisk partitions at all. Note: pcfs(7fs) can recognize
29490 *     fdisk partitions on both x86 and SPARC platform.
29491 *
29492 *     -----------------------------------------------------------
29493 *       platform   removable media  USB/1394  |  fdisk supported
29494 *     -----------------------------------------------------------
29495 *        x86         X               X        |       true
29496 *     ------------------------------------------------------------
29497 *        sparc       X               X        |       false
29498 *     ------------------------------------------------------------
29499 *
29500 *
29501 * 8. MBOOT/MBR
29502 *
29503 *     Although sd(7d) doesn't support fdisk on SPARC platform, it does support
29504 *     read/write mboot for removable media devices on sparc platform.
29505 *
29506 *     -----------------------------------------------------------
29507 *       platform   removable media  USB/1394  |  mboot supported
29508 *     -----------------------------------------------------------
29509 *        x86         X               X        |       true
29510 *     ------------------------------------------------------------
29511 *        sparc      false           false     |       false
29512 *        sparc      false           true      |       true
29513 *        sparc      true            false     |       true
29514 *        sparc      true            true      |       true
29515 *     ------------------------------------------------------------
29516 *
29517 *
29518 * 9.  error handling during opening device
29519 *
29520 *     If failed to open a disk device, an errno is returned. For some kinds
29521 *     of errors, different errno is returned depending on if this device is
29522 *     a removable media device. This brings USB/1394 hard disks in line with
29523 *     expected hard disk behavior. It is not expected that this breaks any
29524 *     application.
29525 *
29526 *     ------------------------------------------------------
29527 *       removable media    hotpluggable   |  errno
29528 *     ------------------------------------------------------
29529 *             false          false        |   EIO
29530 *             false          true         |   EIO
29531 *             true             x          |   ENXIO
29532 *     ------------------------------------------------------
29533 *
29534 *
29535 * 11. ioctls: DKIOCEJECT, CDROMEJECT
29536 *
29537 *     These IOCTLs are applicable only to removable media devices.
29538 *
29539 *     -----------------------------------------------------------
29540 *       removable media    hotpluggable   |DKIOCEJECT, CDROMEJECT
29541 *     -----------------------------------------------------------
29542 *             false          false        |     No
29543 *             false          true         |     No
29544 *             true            x           |     Yes
29545 *     -----------------------------------------------------------
29546 *
29547 *
29548 * 12. Kstats for partitions
29549 *
29550 *     sd creates partition kstat for non-removable media devices. USB and
29551 *     Firewire hard disks now have partition kstats
29552 *
29553 *      ------------------------------------------------------
29554 *       removable media    hotpluggable   |   kstat
29555 *      ------------------------------------------------------
29556 *             false          false        |    Yes
29557 *             false          true         |    Yes
29558 *             true             x          |    No
29559 *       ------------------------------------------------------
29560 *
29561 *
29562 * 13. Removable media & hotpluggable properties
29563 *
29564 *     Sd driver creates a "removable-media" property for removable media
29565 *     devices. Parent nexus drivers create a "hotpluggable" property if
29566 *     it supports hotplugging.
29567 *
29568 *     ---------------------------------------------------------------------
29569 *     removable media   hotpluggable |  "removable-media"   " hotpluggable"
29570 *     ---------------------------------------------------------------------
29571 *       false            false       |    No                   No
29572 *       false            true        |    No                   Yes
29573 *       true             false       |    Yes                  No
29574 *       true             true        |    Yes                  Yes
29575 *     ---------------------------------------------------------------------
29576 *
29577 *
29578 * 14. Power Management
29579 *
29580 *     sd only power manages removable media devices or devices that support
29581 *     LOG_SENSE or have a "pm-capable" property  (PSARC/2002/250)
29582 *
29583 *     A parent nexus that supports hotplugging can also set "pm-capable"
29584 *     if the disk can be power managed.
29585 *
29586 *     ------------------------------------------------------------
29587 *       removable media hotpluggable pm-capable  |   power manage
29588 *     ------------------------------------------------------------
29589 *             false          false     false     |     No
29590 *             false          false     true      |     Yes
29591 *             false          true      false     |     No
29592 *             false          true      true      |     Yes
29593 *             true             x        x        |     Yes
29594 *     ------------------------------------------------------------
29595 *
29596 *      USB and firewire hard disks can now be power managed independently
29597 *      of the framebuffer
29598 *
29599 *
29600 * 15. Support for USB disks with capacity larger than 1TB
29601 *
29602 *     Currently, sd doesn't permit a fixed disk device with capacity
29603 *     larger than 1TB to be used in a 32-bit operating system environment.
29604 *     However, sd doesn't do that for removable media devices. Instead, it
29605 *     assumes that removable media devices cannot have a capacity larger
29606 *     than 1TB. Therefore, using those devices on 32-bit system is partially
29607 *     supported, which can cause some unexpected results.
29608 *
29609 *     ---------------------------------------------------------------------
29610 *       removable media    USB/1394 | Capacity > 1TB |   Used in 32-bit env
29611 *     ---------------------------------------------------------------------
29612 *             false          false  |   true         |     no
29613 *             false          true   |   true         |     no
29614 *             true           false  |   true         |     Yes
29615 *             true           true   |   true         |     Yes
29616 *     ---------------------------------------------------------------------
29617 *
29618 *
29619 * 16. Check write-protection at open time
29620 *
29621 *     When a removable media device is being opened for writing without NDELAY
29622 *     flag, sd will check if this device is writable. If attempting to open
29623 *     without NDELAY flag a write-protected device, this operation will abort.
29624 *
29625 *     ------------------------------------------------------------
29626 *       removable media    USB/1394   |   WP Check
29627 *     ------------------------------------------------------------
29628 *             false          false    |     No
29629 *             false          true     |     No
29630 *             true           false    |     Yes
29631 *             true           true     |     Yes
29632 *     ------------------------------------------------------------
29633 *
29634 *
29635 * 17. syslog when corrupted VTOC is encountered
29636 *
29637 *      Currently, if an invalid VTOC is encountered, sd only print syslog
29638 *      for fixed SCSI disks.
29639 *     ------------------------------------------------------------
29640 *       removable media    USB/1394   |   print syslog
29641 *     ------------------------------------------------------------
29642 *             false          false    |     Yes
29643 *             false          true     |     No
29644 *             true           false    |     No
29645 *             true           true     |     No
29646 *     ------------------------------------------------------------
29647 */
29648static void
29649sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi)
29650{
29651	int	pm_capable_prop;
29652
29653	ASSERT(un->un_sd);
29654	ASSERT(un->un_sd->sd_inq);
29655
29656	/*
29657	 * Enable SYNC CACHE support for all devices.
29658	 */
29659	un->un_f_sync_cache_supported = TRUE;
29660
29661	/*
29662	 * Set the sync cache required flag to false.
29663	 * This would ensure that there is no SYNC CACHE
29664	 * sent when there are no writes
29665	 */
29666	un->un_f_sync_cache_required = FALSE;
29667
29668	if (un->un_sd->sd_inq->inq_rmb) {
29669		/*
29670		 * The media of this device is removable. And for this kind
29671		 * of devices, it is possible to change medium after opening
29672		 * devices. Thus we should support this operation.
29673		 */
29674		un->un_f_has_removable_media = TRUE;
29675
29676		/*
29677		 * support non-512-byte blocksize of removable media devices
29678		 */
29679		un->un_f_non_devbsize_supported = TRUE;
29680
29681		/*
29682		 * Assume that all removable media devices support DOOR_LOCK
29683		 */
29684		un->un_f_doorlock_supported = TRUE;
29685
29686		/*
29687		 * For a removable media device, it is possible to be opened
29688		 * with NDELAY flag when there is no media in drive, in this
29689		 * case we don't care if device is writable. But if without
29690		 * NDELAY flag, we need to check if media is write-protected.
29691		 */
29692		un->un_f_chk_wp_open = TRUE;
29693
29694		/*
29695		 * need to start a SCSI watch thread to monitor media state,
29696		 * when media is being inserted or ejected, notify syseventd.
29697		 */
29698		un->un_f_monitor_media_state = TRUE;
29699
29700		/*
29701		 * Some devices don't support START_STOP_UNIT command.
29702		 * Therefore, we'd better check if a device supports it
29703		 * before sending it.
29704		 */
29705		un->un_f_check_start_stop = TRUE;
29706
29707		/*
29708		 * support eject media ioctl:
29709		 *		FDEJECT, DKIOCEJECT, CDROMEJECT
29710		 */
29711		un->un_f_eject_media_supported = TRUE;
29712
29713		/*
29714		 * Because many removable-media devices don't support
29715		 * LOG_SENSE, we couldn't use this command to check if
29716		 * a removable media device support power-management.
29717		 * We assume that they support power-management via
29718		 * START_STOP_UNIT command and can be spun up and down
29719		 * without limitations.
29720		 */
29721		un->un_f_pm_supported = TRUE;
29722
29723		/*
29724		 * Need to create a zero length (Boolean) property
29725		 * removable-media for the removable media devices.
29726		 * Note that the return value of the property is not being
29727		 * checked, since if unable to create the property
29728		 * then do not want the attach to fail altogether. Consistent
29729		 * with other property creation in attach.
29730		 */
29731		(void) ddi_prop_create(DDI_DEV_T_NONE, devi,
29732		    DDI_PROP_CANSLEEP, "removable-media", NULL, 0);
29733
29734	} else {
29735		/*
29736		 * create device ID for device
29737		 */
29738		un->un_f_devid_supported = TRUE;
29739
29740		/*
29741		 * Spin up non-removable-media devices once it is attached
29742		 */
29743		un->un_f_attach_spinup = TRUE;
29744
29745		/*
29746		 * According to SCSI specification, Sense data has two kinds of
29747		 * format: fixed format, and descriptor format. At present, we
29748		 * don't support descriptor format sense data for removable
29749		 * media.
29750		 */
29751		if (SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT) {
29752			un->un_f_descr_format_supported = TRUE;
29753		}
29754
29755		/*
29756		 * kstats are created only for non-removable media devices.
29757		 *
29758		 * Set this in sd.conf to 0 in order to disable kstats.  The
29759		 * default is 1, so they are enabled by default.
29760		 */
29761		un->un_f_pkstats_enabled = (ddi_prop_get_int(DDI_DEV_T_ANY,
29762		    SD_DEVINFO(un), DDI_PROP_DONTPASS,
29763		    "enable-partition-kstats", 1));
29764
29765		/*
29766		 * Check if HBA has set the "pm-capable" property.
29767		 * If "pm-capable" exists and is non-zero then we can
29768		 * power manage the device without checking the start/stop
29769		 * cycle count log sense page.
29770		 *
29771		 * If "pm-capable" exists and is SD_PM_CAPABLE_FALSE (0)
29772		 * then we should not power manage the device.
29773		 *
29774		 * If "pm-capable" doesn't exist then pm_capable_prop will
29775		 * be set to SD_PM_CAPABLE_UNDEFINED (-1).  In this case,
29776		 * sd will check the start/stop cycle count log sense page
29777		 * and power manage the device if the cycle count limit has
29778		 * not been exceeded.
29779		 */
29780		pm_capable_prop = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
29781		    DDI_PROP_DONTPASS, "pm-capable", SD_PM_CAPABLE_UNDEFINED);
29782		if (pm_capable_prop == SD_PM_CAPABLE_UNDEFINED) {
29783			un->un_f_log_sense_supported = TRUE;
29784		} else {
29785			/*
29786			 * pm-capable property exists.
29787			 *
29788			 * Convert "TRUE" values for pm_capable_prop to
29789			 * SD_PM_CAPABLE_TRUE (1) to make it easier to check
29790			 * later. "TRUE" values are any values except
29791			 * SD_PM_CAPABLE_FALSE (0) and
29792			 * SD_PM_CAPABLE_UNDEFINED (-1)
29793			 */
29794			if (pm_capable_prop == SD_PM_CAPABLE_FALSE) {
29795				un->un_f_log_sense_supported = FALSE;
29796			} else {
29797				un->un_f_pm_supported = TRUE;
29798			}
29799
29800			SD_INFO(SD_LOG_ATTACH_DETACH, un,
29801			    "sd_unit_attach: un:0x%p pm-capable "
29802			    "property set to %d.\n", un, un->un_f_pm_supported);
29803		}
29804	}
29805
29806	if (un->un_f_is_hotpluggable) {
29807
29808		/*
29809		 * Have to watch hotpluggable devices as well, since
29810		 * that's the only way for userland applications to
29811		 * detect hot removal while device is busy/mounted.
29812		 */
29813		un->un_f_monitor_media_state = TRUE;
29814
29815		un->un_f_check_start_stop = TRUE;
29816
29817	}
29818}
29819
29820/*
29821 * sd_tg_rdwr:
29822 * Provides rdwr access for cmlb via sd_tgops. The start_block is
29823 * in sys block size, req_length in bytes.
29824 *
29825 */
29826static int
29827sd_tg_rdwr(dev_info_t *devi, uchar_t cmd, void *bufaddr,
29828    diskaddr_t start_block, size_t reqlength, void *tg_cookie)
29829{
29830	struct sd_lun *un;
29831	int path_flag = (int)(uintptr_t)tg_cookie;
29832	char *dkl = NULL;
29833	diskaddr_t real_addr = start_block;
29834	diskaddr_t first_byte, end_block;
29835
29836	size_t	buffer_size = reqlength;
29837	int rval = 0;
29838	diskaddr_t	cap;
29839	uint32_t	lbasize;
29840	sd_ssc_t	*ssc;
29841
29842	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
29843	if (un == NULL)
29844		return (ENXIO);
29845
29846	if (cmd != TG_READ && cmd != TG_WRITE)
29847		return (EINVAL);
29848
29849	ssc = sd_ssc_init(un);
29850	mutex_enter(SD_MUTEX(un));
29851	if (un->un_f_tgt_blocksize_is_valid == FALSE) {
29852		mutex_exit(SD_MUTEX(un));
29853		rval = sd_send_scsi_READ_CAPACITY(ssc, (uint64_t *)&cap,
29854		    &lbasize, path_flag);
29855		if (rval != 0)
29856			goto done1;
29857		mutex_enter(SD_MUTEX(un));
29858		sd_update_block_info(un, lbasize, cap);
29859		if ((un->un_f_tgt_blocksize_is_valid == FALSE)) {
29860			mutex_exit(SD_MUTEX(un));
29861			rval = EIO;
29862			goto done;
29863		}
29864	}
29865
29866	if (NOT_DEVBSIZE(un)) {
29867		/*
29868		 * sys_blocksize != tgt_blocksize, need to re-adjust
29869		 * blkno and save the index to beginning of dk_label
29870		 */
29871		first_byte  = SD_SYSBLOCKS2BYTES(un, start_block);
29872		real_addr = first_byte / un->un_tgt_blocksize;
29873
29874		end_block = (first_byte + reqlength +
29875		    un->un_tgt_blocksize - 1) / un->un_tgt_blocksize;
29876
29877		/* round up buffer size to multiple of target block size */
29878		buffer_size = (end_block - real_addr) * un->un_tgt_blocksize;
29879
29880		SD_TRACE(SD_LOG_IO_PARTITION, un, "sd_tg_rdwr",
29881		    "label_addr: 0x%x allocation size: 0x%x\n",
29882		    real_addr, buffer_size);
29883
29884		if (((first_byte % un->un_tgt_blocksize) != 0) ||
29885		    (reqlength % un->un_tgt_blocksize) != 0)
29886			/* the request is not aligned */
29887			dkl = kmem_zalloc(buffer_size, KM_SLEEP);
29888	}
29889
29890	/*
29891	 * The MMC standard allows READ CAPACITY to be
29892	 * inaccurate by a bounded amount (in the interest of
29893	 * response latency).  As a result, failed READs are
29894	 * commonplace (due to the reading of metadata and not
29895	 * data). Depending on the per-Vendor/drive Sense data,
29896	 * the failed READ can cause many (unnecessary) retries.
29897	 */
29898
29899	if (ISCD(un) && (cmd == TG_READ) &&
29900	    (un->un_f_blockcount_is_valid == TRUE) &&
29901	    ((start_block == (un->un_blockcount - 1))||
29902	    (start_block == (un->un_blockcount - 2)))) {
29903			path_flag = SD_PATH_DIRECT_PRIORITY;
29904	}
29905
29906	mutex_exit(SD_MUTEX(un));
29907	if (cmd == TG_READ) {
29908		rval = sd_send_scsi_READ(ssc, (dkl != NULL)? dkl: bufaddr,
29909		    buffer_size, real_addr, path_flag);
29910		if (dkl != NULL)
29911			bcopy(dkl + SD_TGTBYTEOFFSET(un, start_block,
29912			    real_addr), bufaddr, reqlength);
29913	} else {
29914		if (dkl) {
29915			rval = sd_send_scsi_READ(ssc, dkl, buffer_size,
29916			    real_addr, path_flag);
29917			if (rval) {
29918				goto done1;
29919			}
29920			bcopy(bufaddr, dkl + SD_TGTBYTEOFFSET(un, start_block,
29921			    real_addr), reqlength);
29922		}
29923		rval = sd_send_scsi_WRITE(ssc, (dkl != NULL)? dkl: bufaddr,
29924		    buffer_size, real_addr, path_flag);
29925	}
29926
29927done1:
29928	if (dkl != NULL)
29929		kmem_free(dkl, buffer_size);
29930
29931	if (rval != 0) {
29932		if (rval == EIO)
29933			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
29934		else
29935			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
29936	}
29937done:
29938	sd_ssc_fini(ssc);
29939	return (rval);
29940}
29941
29942
29943static int
29944sd_tg_getinfo(dev_info_t *devi, int cmd, void *arg, void *tg_cookie)
29945{
29946
29947	struct sd_lun *un;
29948	diskaddr_t	cap;
29949	uint32_t	lbasize;
29950	int		path_flag = (int)(uintptr_t)tg_cookie;
29951	int		ret = 0;
29952
29953	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
29954	if (un == NULL)
29955		return (ENXIO);
29956
29957	switch (cmd) {
29958	case TG_GETPHYGEOM:
29959	case TG_GETVIRTGEOM:
29960	case TG_GETCAPACITY:
29961	case TG_GETBLOCKSIZE:
29962		mutex_enter(SD_MUTEX(un));
29963
29964		if ((un->un_f_blockcount_is_valid == TRUE) &&
29965		    (un->un_f_tgt_blocksize_is_valid == TRUE)) {
29966			cap = un->un_blockcount;
29967			lbasize = un->un_tgt_blocksize;
29968			mutex_exit(SD_MUTEX(un));
29969		} else {
29970			sd_ssc_t	*ssc;
29971			mutex_exit(SD_MUTEX(un));
29972			ssc = sd_ssc_init(un);
29973			ret = sd_send_scsi_READ_CAPACITY(ssc, (uint64_t *)&cap,
29974			    &lbasize, path_flag);
29975			if (ret != 0) {
29976				if (ret == EIO)
29977					sd_ssc_assessment(ssc,
29978					    SD_FMT_STATUS_CHECK);
29979				else
29980					sd_ssc_assessment(ssc,
29981					    SD_FMT_IGNORE);
29982				sd_ssc_fini(ssc);
29983				return (ret);
29984			}
29985			sd_ssc_fini(ssc);
29986			mutex_enter(SD_MUTEX(un));
29987			sd_update_block_info(un, lbasize, cap);
29988			if ((un->un_f_blockcount_is_valid == FALSE) ||
29989			    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
29990				mutex_exit(SD_MUTEX(un));
29991				return (EIO);
29992			}
29993			mutex_exit(SD_MUTEX(un));
29994		}
29995
29996		if (cmd == TG_GETCAPACITY) {
29997			*(diskaddr_t *)arg = cap;
29998			return (0);
29999		}
30000
30001		if (cmd == TG_GETBLOCKSIZE) {
30002			*(uint32_t *)arg = lbasize;
30003			return (0);
30004		}
30005
30006		if (cmd == TG_GETPHYGEOM)
30007			ret = sd_get_physical_geometry(un, (cmlb_geom_t *)arg,
30008			    cap, lbasize, path_flag);
30009		else
30010			/* TG_GETVIRTGEOM */
30011			ret = sd_get_virtual_geometry(un,
30012			    (cmlb_geom_t *)arg, cap, lbasize);
30013
30014		return (ret);
30015
30016	case TG_GETATTR:
30017		mutex_enter(SD_MUTEX(un));
30018		((tg_attribute_t *)arg)->media_is_writable =
30019		    un->un_f_mmc_writable_media;
30020		mutex_exit(SD_MUTEX(un));
30021		return (0);
30022	default:
30023		return (ENOTTY);
30024
30025	}
30026}
30027
30028/*
30029 *    Function: sd_ssc_ereport_post
30030 *
30031 * Description: Will be called when SD driver need to post an ereport.
30032 *
30033 *    Context: Kernel thread or interrupt context.
30034 */
30035static void
30036sd_ssc_ereport_post(sd_ssc_t *ssc, enum sd_driver_assessment drv_assess)
30037{
30038	int uscsi_path_instance = 0;
30039	uchar_t	uscsi_pkt_reason;
30040	uint32_t uscsi_pkt_state;
30041	uint32_t uscsi_pkt_statistics;
30042	uint64_t uscsi_ena;
30043	uchar_t op_code;
30044	uint8_t *sensep;
30045	union scsi_cdb *cdbp;
30046	uint_t cdblen = 0;
30047	uint_t senlen = 0;
30048	struct sd_lun *un;
30049	dev_info_t *dip;
30050	char *devid;
30051	int ssc_invalid_flags = SSC_FLAGS_INVALID_PKT_REASON |
30052	    SSC_FLAGS_INVALID_STATUS |
30053	    SSC_FLAGS_INVALID_SENSE |
30054	    SSC_FLAGS_INVALID_DATA;
30055	char assessment[16];
30056
30057	ASSERT(ssc != NULL);
30058	ASSERT(ssc->ssc_uscsi_cmd != NULL);
30059	ASSERT(ssc->ssc_uscsi_info != NULL);
30060
30061	un = ssc->ssc_un;
30062	ASSERT(un != NULL);
30063
30064	dip = un->un_sd->sd_dev;
30065
30066	/*
30067	 * Get the devid:
30068	 *	devid will only be passed to non-transport error reports.
30069	 */
30070	devid = DEVI(dip)->devi_devid_str;
30071
30072	/*
30073	 * If we are syncing or dumping, the command will not be executed
30074	 * so we bypass this situation.
30075	 */
30076	if (ddi_in_panic() || (un->un_state == SD_STATE_SUSPENDED) ||
30077	    (un->un_state == SD_STATE_DUMPING))
30078		return;
30079
30080	uscsi_pkt_reason = ssc->ssc_uscsi_info->ui_pkt_reason;
30081	uscsi_path_instance = ssc->ssc_uscsi_cmd->uscsi_path_instance;
30082	uscsi_pkt_state = ssc->ssc_uscsi_info->ui_pkt_state;
30083	uscsi_pkt_statistics = ssc->ssc_uscsi_info->ui_pkt_statistics;
30084	uscsi_ena = ssc->ssc_uscsi_info->ui_ena;
30085
30086	sensep = (uint8_t *)ssc->ssc_uscsi_cmd->uscsi_rqbuf;
30087	cdbp = (union scsi_cdb *)ssc->ssc_uscsi_cmd->uscsi_cdb;
30088
30089	/* In rare cases, EG:DOORLOCK, the cdb could be NULL */
30090	if (cdbp == NULL) {
30091		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
30092		    "sd_ssc_ereport_post meet empty cdb\n");
30093		return;
30094	}
30095
30096	op_code = cdbp->scc_cmd;
30097
30098	cdblen = (int)ssc->ssc_uscsi_cmd->uscsi_cdblen;
30099	senlen = (int)(ssc->ssc_uscsi_cmd->uscsi_rqlen -
30100	    ssc->ssc_uscsi_cmd->uscsi_rqresid);
30101
30102	if (senlen > 0)
30103		ASSERT(sensep != NULL);
30104
30105	/*
30106	 * Initialize drv_assess to corresponding values.
30107	 * SD_FM_DRV_FATAL will be mapped to "fail" or "fatal" depending
30108	 * on the sense-key returned back.
30109	 */
30110	switch (drv_assess) {
30111		case SD_FM_DRV_RECOVERY:
30112			(void) sprintf(assessment, "%s", "recovered");
30113			break;
30114		case SD_FM_DRV_RETRY:
30115			(void) sprintf(assessment, "%s", "retry");
30116			break;
30117		case SD_FM_DRV_NOTICE:
30118			(void) sprintf(assessment, "%s", "info");
30119			break;
30120		case SD_FM_DRV_FATAL:
30121		default:
30122			(void) sprintf(assessment, "%s", "unknown");
30123	}
30124	/*
30125	 * If drv_assess == SD_FM_DRV_RECOVERY, this should be a recovered
30126	 * command, we will post ereport.io.scsi.cmd.disk.recovered.
30127	 * driver-assessment will always be "recovered" here.
30128	 */
30129	if (drv_assess == SD_FM_DRV_RECOVERY) {
30130		scsi_fm_ereport_post(un->un_sd, uscsi_path_instance,
30131		    "cmd.disk.recovered", uscsi_ena, devid, DDI_NOSLEEP,
30132		    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0,
30133		    "driver-assessment", DATA_TYPE_STRING, assessment,
30134		    "op-code", DATA_TYPE_UINT8, op_code,
30135		    "cdb", DATA_TYPE_UINT8_ARRAY,
30136		    cdblen, ssc->ssc_uscsi_cmd->uscsi_cdb,
30137		    "pkt-reason", DATA_TYPE_UINT8, uscsi_pkt_reason,
30138		    "pkt-state", DATA_TYPE_UINT32, uscsi_pkt_state,
30139		    "pkt-stats", DATA_TYPE_UINT32, uscsi_pkt_statistics,
30140		    NULL);
30141		return;
30142	}
30143
30144	/*
30145	 * If there is un-expected/un-decodable data, we should post
30146	 * ereport.io.scsi.cmd.disk.dev.uderr.
30147	 * driver-assessment will be set based on parameter drv_assess.
30148	 * SSC_FLAGS_INVALID_SENSE - invalid sense data sent back.
30149	 * SSC_FLAGS_INVALID_PKT_REASON - invalid pkt-reason encountered.
30150	 * SSC_FLAGS_INVALID_STATUS - invalid stat-code encountered.
30151	 * SSC_FLAGS_INVALID_DATA - invalid data sent back.
30152	 */
30153	if (ssc->ssc_flags & ssc_invalid_flags) {
30154		if (ssc->ssc_flags & SSC_FLAGS_INVALID_SENSE) {
30155			scsi_fm_ereport_post(un->un_sd, uscsi_path_instance,
30156			    "cmd.disk.dev.uderr", uscsi_ena, devid, DDI_NOSLEEP,
30157			    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0,
30158			    "driver-assessment", DATA_TYPE_STRING,
30159			    drv_assess == SD_FM_DRV_FATAL ?
30160			    "fail" : assessment,
30161			    "op-code", DATA_TYPE_UINT8, op_code,
30162			    "cdb", DATA_TYPE_UINT8_ARRAY,
30163			    cdblen, ssc->ssc_uscsi_cmd->uscsi_cdb,
30164			    "pkt-reason", DATA_TYPE_UINT8, uscsi_pkt_reason,
30165			    "pkt-state", DATA_TYPE_UINT32, uscsi_pkt_state,
30166			    "pkt-stats", DATA_TYPE_UINT32,
30167			    uscsi_pkt_statistics,
30168			    "stat-code", DATA_TYPE_UINT8,
30169			    ssc->ssc_uscsi_cmd->uscsi_status,
30170			    "un-decode-info", DATA_TYPE_STRING,
30171			    ssc->ssc_info,
30172			    "un-decode-value", DATA_TYPE_UINT8_ARRAY,
30173			    senlen, sensep,
30174			    NULL);
30175		} else {
30176			/*
30177			 * For other type of invalid data, the
30178			 * un-decode-value field would be empty because the
30179			 * un-decodable content could be seen from upper
30180			 * level payload or inside un-decode-info.
30181			 */
30182			scsi_fm_ereport_post(un->un_sd, uscsi_path_instance,
30183			    "cmd.disk.dev.uderr", uscsi_ena, devid, DDI_NOSLEEP,
30184			    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0,
30185			    "driver-assessment", DATA_TYPE_STRING,
30186			    drv_assess == SD_FM_DRV_FATAL ?
30187			    "fail" : assessment,
30188			    "op-code", DATA_TYPE_UINT8, op_code,
30189			    "cdb", DATA_TYPE_UINT8_ARRAY,
30190			    cdblen, ssc->ssc_uscsi_cmd->uscsi_cdb,
30191			    "pkt-reason", DATA_TYPE_UINT8, uscsi_pkt_reason,
30192			    "pkt-state", DATA_TYPE_UINT32, uscsi_pkt_state,
30193			    "pkt-stats", DATA_TYPE_UINT32,
30194			    uscsi_pkt_statistics,
30195			    "stat-code", DATA_TYPE_UINT8,
30196			    ssc->ssc_uscsi_cmd->uscsi_status,
30197			    "un-decode-info", DATA_TYPE_STRING,
30198			    ssc->ssc_info,
30199			    "un-decode-value", DATA_TYPE_UINT8_ARRAY,
30200			    0, NULL,
30201			    NULL);
30202		}
30203		ssc->ssc_flags &= ~ssc_invalid_flags;
30204		return;
30205	}
30206
30207	if (uscsi_pkt_reason != CMD_CMPLT ||
30208	    (ssc->ssc_flags & SSC_FLAGS_TRAN_ABORT)) {
30209		/*
30210		 * pkt-reason != CMD_CMPLT or SSC_FLAGS_TRAN_ABORT was
30211		 * set inside sd_start_cmds due to errors(bad packet or
30212		 * fatal transport error), we should take it as a
30213		 * transport error, so we post ereport.io.scsi.cmd.disk.tran.
30214		 * driver-assessment will be set based on drv_assess.
30215		 * We will set devid to NULL because it is a transport
30216		 * error.
30217		 */
30218		if (ssc->ssc_flags & SSC_FLAGS_TRAN_ABORT)
30219			ssc->ssc_flags &= ~SSC_FLAGS_TRAN_ABORT;
30220
30221		scsi_fm_ereport_post(un->un_sd, uscsi_path_instance,
30222		    "cmd.disk.tran", uscsi_ena, NULL, DDI_NOSLEEP, FM_VERSION,
30223		    DATA_TYPE_UINT8, FM_EREPORT_VERS0,
30224		    "driver-assessment", DATA_TYPE_STRING,
30225		    drv_assess == SD_FM_DRV_FATAL ? "fail" : assessment,
30226		    "op-code", DATA_TYPE_UINT8, op_code,
30227		    "cdb", DATA_TYPE_UINT8_ARRAY,
30228		    cdblen, ssc->ssc_uscsi_cmd->uscsi_cdb,
30229		    "pkt-reason", DATA_TYPE_UINT8, uscsi_pkt_reason,
30230		    "pkt-state", DATA_TYPE_UINT8, uscsi_pkt_state,
30231		    "pkt-stats", DATA_TYPE_UINT32, uscsi_pkt_statistics,
30232		    NULL);
30233	} else {
30234		/*
30235		 * If we got here, we have a completed command, and we need
30236		 * to further investigate the sense data to see what kind
30237		 * of ereport we should post.
30238		 * Post ereport.io.scsi.cmd.disk.dev.rqs.merr
30239		 * if sense-key == 0x3.
30240		 * Post ereport.io.scsi.cmd.disk.dev.rqs.derr otherwise.
30241		 * driver-assessment will be set based on the parameter
30242		 * drv_assess.
30243		 */
30244		if (senlen > 0) {
30245			/*
30246			 * Here we have sense data available.
30247			 */
30248			uint8_t sense_key;
30249			sense_key = scsi_sense_key(sensep);
30250			if (sense_key == 0x3) {
30251				/*
30252				 * sense-key == 0x3(medium error),
30253				 * driver-assessment should be "fatal" if
30254				 * drv_assess is SD_FM_DRV_FATAL.
30255				 */
30256				scsi_fm_ereport_post(un->un_sd,
30257				    uscsi_path_instance,
30258				    "cmd.disk.dev.rqs.merr",
30259				    uscsi_ena, devid, DDI_NOSLEEP, FM_VERSION,
30260				    DATA_TYPE_UINT8, FM_EREPORT_VERS0,
30261				    "driver-assessment",
30262				    DATA_TYPE_STRING,
30263				    drv_assess == SD_FM_DRV_FATAL ?
30264				    "fatal" : assessment,
30265				    "op-code",
30266				    DATA_TYPE_UINT8, op_code,
30267				    "cdb",
30268				    DATA_TYPE_UINT8_ARRAY, cdblen,
30269				    ssc->ssc_uscsi_cmd->uscsi_cdb,
30270				    "pkt-reason",
30271				    DATA_TYPE_UINT8, uscsi_pkt_reason,
30272				    "pkt-state",
30273				    DATA_TYPE_UINT8, uscsi_pkt_state,
30274				    "pkt-stats",
30275				    DATA_TYPE_UINT32,
30276				    uscsi_pkt_statistics,
30277				    "stat-code",
30278				    DATA_TYPE_UINT8,
30279				    ssc->ssc_uscsi_cmd->uscsi_status,
30280				    "key",
30281				    DATA_TYPE_UINT8,
30282				    scsi_sense_key(sensep),
30283				    "asc",
30284				    DATA_TYPE_UINT8,
30285				    scsi_sense_asc(sensep),
30286				    "ascq",
30287				    DATA_TYPE_UINT8,
30288				    scsi_sense_ascq(sensep),
30289				    "sense-data",
30290				    DATA_TYPE_UINT8_ARRAY,
30291				    senlen, sensep,
30292				    "lba",
30293				    DATA_TYPE_UINT64,
30294				    ssc->ssc_uscsi_info->ui_lba,
30295				    NULL);
30296				} else {
30297					/*
30298					 * if sense-key == 0x4(hardware
30299					 * error), driver-assessment should
30300					 * be "fatal" if drv_assess is
30301					 * SD_FM_DRV_FATAL.
30302					 */
30303					scsi_fm_ereport_post(un->un_sd,
30304					    uscsi_path_instance,
30305					    "cmd.disk.dev.rqs.derr",
30306					    uscsi_ena, devid, DDI_NOSLEEP,
30307					    FM_VERSION,
30308					    DATA_TYPE_UINT8, FM_EREPORT_VERS0,
30309					    "driver-assessment",
30310					    DATA_TYPE_STRING,
30311					    drv_assess == SD_FM_DRV_FATAL ?
30312					    (sense_key == 0x4 ?
30313					    "fatal" : "fail") : assessment,
30314					    "op-code",
30315					    DATA_TYPE_UINT8, op_code,
30316					    "cdb",
30317					    DATA_TYPE_UINT8_ARRAY, cdblen,
30318					    ssc->ssc_uscsi_cmd->uscsi_cdb,
30319					    "pkt-reason",
30320					    DATA_TYPE_UINT8, uscsi_pkt_reason,
30321					    "pkt-state",
30322					    DATA_TYPE_UINT8, uscsi_pkt_state,
30323					    "pkt-stats",
30324					    DATA_TYPE_UINT32,
30325					    uscsi_pkt_statistics,
30326					    "stat-code",
30327					    DATA_TYPE_UINT8,
30328					    ssc->ssc_uscsi_cmd->uscsi_status,
30329					    "key",
30330					    DATA_TYPE_UINT8,
30331					    scsi_sense_key(sensep),
30332					    "asc",
30333					    DATA_TYPE_UINT8,
30334					    scsi_sense_asc(sensep),
30335					    "ascq",
30336					    DATA_TYPE_UINT8,
30337					    scsi_sense_ascq(sensep),
30338					    "sense-data",
30339					    DATA_TYPE_UINT8_ARRAY,
30340					    senlen, sensep,
30341					    NULL);
30342				}
30343		} else {
30344			/*
30345			 * For stat_code == STATUS_GOOD, this is not a
30346			 * hardware error.
30347			 */
30348			if (ssc->ssc_uscsi_cmd->uscsi_status == STATUS_GOOD)
30349				return;
30350
30351			/*
30352			 * Post ereport.io.scsi.cmd.disk.dev.serr if we got the
30353			 * stat-code but with sense data unavailable.
30354			 * driver-assessment will be set based on parameter
30355			 * drv_assess.
30356			 */
30357			scsi_fm_ereport_post(un->un_sd,
30358			    uscsi_path_instance, "cmd.disk.dev.serr", uscsi_ena,
30359			    devid, DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8,
30360			    FM_EREPORT_VERS0,
30361			    "driver-assessment", DATA_TYPE_STRING,
30362			    drv_assess == SD_FM_DRV_FATAL ? "fail" : assessment,
30363			    "op-code", DATA_TYPE_UINT8, op_code,
30364			    "cdb",
30365			    DATA_TYPE_UINT8_ARRAY,
30366			    cdblen, ssc->ssc_uscsi_cmd->uscsi_cdb,
30367			    "pkt-reason",
30368			    DATA_TYPE_UINT8, uscsi_pkt_reason,
30369			    "pkt-state",
30370			    DATA_TYPE_UINT8, uscsi_pkt_state,
30371			    "pkt-stats",
30372			    DATA_TYPE_UINT32, uscsi_pkt_statistics,
30373			    "stat-code",
30374			    DATA_TYPE_UINT8,
30375			    ssc->ssc_uscsi_cmd->uscsi_status,
30376			    NULL);
30377		}
30378	}
30379}
30380
30381/*
30382 *     Function: sd_ssc_extract_info
30383 *
30384 * Description: Extract information available to help generate ereport.
30385 *
30386 *     Context: Kernel thread or interrupt context.
30387 */
30388static void
30389sd_ssc_extract_info(sd_ssc_t *ssc, struct sd_lun *un, struct scsi_pkt *pktp,
30390    struct buf *bp, struct sd_xbuf *xp)
30391{
30392	size_t senlen = 0;
30393	union scsi_cdb *cdbp;
30394	int path_instance;
30395	/*
30396	 * Need scsi_cdb_size array to determine the cdb length.
30397	 */
30398	extern uchar_t	scsi_cdb_size[];
30399
30400	ASSERT(un != NULL);
30401	ASSERT(pktp != NULL);
30402	ASSERT(bp != NULL);
30403	ASSERT(xp != NULL);
30404	ASSERT(ssc != NULL);
30405	ASSERT(mutex_owned(SD_MUTEX(un)));
30406
30407	/*
30408	 * Transfer the cdb buffer pointer here.
30409	 */
30410	cdbp = (union scsi_cdb *)pktp->pkt_cdbp;
30411
30412	ssc->ssc_uscsi_cmd->uscsi_cdblen = scsi_cdb_size[GETGROUP(cdbp)];
30413	ssc->ssc_uscsi_cmd->uscsi_cdb = (caddr_t)cdbp;
30414
30415	/*
30416	 * Transfer the sense data buffer pointer if sense data is available,
30417	 * calculate the sense data length first.
30418	 */
30419	if ((xp->xb_sense_state & STATE_XARQ_DONE) ||
30420	    (xp->xb_sense_state & STATE_ARQ_DONE)) {
30421		/*
30422		 * For arq case, we will enter here.
30423		 */
30424		if (xp->xb_sense_state & STATE_XARQ_DONE) {
30425			senlen = MAX_SENSE_LENGTH - xp->xb_sense_resid;
30426		} else {
30427			senlen = SENSE_LENGTH;
30428		}
30429	} else {
30430		/*
30431		 * For non-arq case, we will enter this branch.
30432		 */
30433		if (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK &&
30434		    (xp->xb_sense_state & STATE_XFERRED_DATA)) {
30435			senlen = SENSE_LENGTH - xp->xb_sense_resid;
30436		}
30437
30438	}
30439
30440	ssc->ssc_uscsi_cmd->uscsi_rqlen = (senlen & 0xff);
30441	ssc->ssc_uscsi_cmd->uscsi_rqresid = 0;
30442	ssc->ssc_uscsi_cmd->uscsi_rqbuf = (caddr_t)xp->xb_sense_data;
30443
30444	ssc->ssc_uscsi_cmd->uscsi_status = ((*(pktp)->pkt_scbp) & STATUS_MASK);
30445
30446	/*
30447	 * Only transfer path_instance when scsi_pkt was properly allocated.
30448	 */
30449	path_instance = pktp->pkt_path_instance;
30450	if (scsi_pkt_allocated_correctly(pktp) && path_instance)
30451		ssc->ssc_uscsi_cmd->uscsi_path_instance = path_instance;
30452	else
30453		ssc->ssc_uscsi_cmd->uscsi_path_instance = 0;
30454
30455	/*
30456	 * Copy in the other fields we may need when posting ereport.
30457	 */
30458	ssc->ssc_uscsi_info->ui_pkt_reason = pktp->pkt_reason;
30459	ssc->ssc_uscsi_info->ui_pkt_state = pktp->pkt_state;
30460	ssc->ssc_uscsi_info->ui_pkt_statistics = pktp->pkt_statistics;
30461	ssc->ssc_uscsi_info->ui_lba = (uint64_t)SD_GET_BLKNO(bp);
30462
30463	/*
30464	 * For partially read/write command, we will not create ena
30465	 * in case of a successful command be reconized as recovered.
30466	 */
30467	if ((pktp->pkt_reason == CMD_CMPLT) &&
30468	    (ssc->ssc_uscsi_cmd->uscsi_status == STATUS_GOOD) &&
30469	    (senlen == 0)) {
30470		return;
30471	}
30472
30473	/*
30474	 * To associate ereports of a single command execution flow, we
30475	 * need a shared ena for a specific command.
30476	 */
30477	if (xp->xb_ena == 0)
30478		xp->xb_ena = fm_ena_generate(0, FM_ENA_FMT1);
30479	ssc->ssc_uscsi_info->ui_ena = xp->xb_ena;
30480}
30481