1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26/*
27 * SCSI disk target driver.
28 */
29#include <sys/scsi/scsi.h>
30#include <sys/dkbad.h>
31#include <sys/dklabel.h>
32#include <sys/dkio.h>
33#include <sys/fdio.h>
34#include <sys/cdio.h>
35#include <sys/mhd.h>
36#include <sys/vtoc.h>
37#include <sys/dktp/fdisk.h>
38#include <sys/kstat.h>
39#include <sys/vtrace.h>
40#include <sys/note.h>
41#include <sys/thread.h>
42#include <sys/proc.h>
43#include <sys/efi_partition.h>
44#include <sys/var.h>
45#include <sys/aio_req.h>
46
47#ifdef __lock_lint
48#define	_LP64
49#define	__amd64
50#endif
51
52#if (defined(__fibre))
53/* Note: is there a leadville version of the following? */
54#include <sys/fc4/fcal_linkapp.h>
55#endif
56#include <sys/taskq.h>
57#include <sys/uuid.h>
58#include <sys/byteorder.h>
59#include <sys/sdt.h>
60
61#include "sd_xbuf.h"
62
63#include <sys/scsi/targets/sddef.h>
64#include <sys/cmlb.h>
65#include <sys/sysevent/eventdefs.h>
66#include <sys/sysevent/dev.h>
67
68#include <sys/fm/protocol.h>
69
70/*
71 * Loadable module info.
72 */
73#if (defined(__fibre))
74#define	SD_MODULE_NAME	"SCSI SSA/FCAL Disk Driver"
75char _depends_on[]	= "misc/scsi misc/cmlb drv/fcp";
76#else /* !__fibre */
77#define	SD_MODULE_NAME	"SCSI Disk Driver"
78char _depends_on[]	= "misc/scsi misc/cmlb";
79#endif /* !__fibre */
80
81/*
82 * Define the interconnect type, to allow the driver to distinguish
83 * between parallel SCSI (sd) and fibre channel (ssd) behaviors.
84 *
85 * This is really for backward compatibility. In the future, the driver
86 * should actually check the "interconnect-type" property as reported by
87 * the HBA; however at present this property is not defined by all HBAs,
88 * so we will use this #define (1) to permit the driver to run in
89 * backward-compatibility mode; and (2) to print a notification message
90 * if an FC HBA does not support the "interconnect-type" property.  The
91 * behavior of the driver will be to assume parallel SCSI behaviors unless
92 * the "interconnect-type" property is defined by the HBA **AND** has a
93 * value of either INTERCONNECT_FIBRE, INTERCONNECT_SSA, or
94 * INTERCONNECT_FABRIC, in which case the driver will assume Fibre
95 * Channel behaviors (as per the old ssd).  (Note that the
96 * INTERCONNECT_1394 and INTERCONNECT_USB types are not supported and
97 * will result in the driver assuming parallel SCSI behaviors.)
98 *
99 * (see common/sys/scsi/impl/services.h)
100 *
101 * Note: For ssd semantics, don't use INTERCONNECT_FABRIC as the default
102 * since some FC HBAs may already support that, and there is some code in
103 * the driver that already looks for it.  Using INTERCONNECT_FABRIC as the
104 * default would confuse that code, and besides things should work fine
105 * anyways if the FC HBA already reports INTERCONNECT_FABRIC for the
106 * "interconnect_type" property.
107 *
108 */
109#if (defined(__fibre))
110#define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_FIBRE
111#else
112#define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_PARALLEL
113#endif
114
115/*
116 * The name of the driver, established from the module name in _init.
117 */
118static	char *sd_label			= NULL;
119
120/*
121 * Driver name is unfortunately prefixed on some driver.conf properties.
122 */
123#if (defined(__fibre))
124#define	sd_max_xfer_size		ssd_max_xfer_size
125#define	sd_config_list			ssd_config_list
126static	char *sd_max_xfer_size		= "ssd_max_xfer_size";
127static	char *sd_config_list		= "ssd-config-list";
128#else
129static	char *sd_max_xfer_size		= "sd_max_xfer_size";
130static	char *sd_config_list		= "sd-config-list";
131#endif
132
133/*
134 * Driver global variables
135 */
136
137#if (defined(__fibre))
138/*
139 * These #defines are to avoid namespace collisions that occur because this
140 * code is currently used to compile two separate driver modules: sd and ssd.
141 * All global variables need to be treated this way (even if declared static)
142 * in order to allow the debugger to resolve the names properly.
143 * It is anticipated that in the near future the ssd module will be obsoleted,
144 * at which time this namespace issue should go away.
145 */
146#define	sd_state			ssd_state
147#define	sd_io_time			ssd_io_time
148#define	sd_failfast_enable		ssd_failfast_enable
149#define	sd_ua_retry_count		ssd_ua_retry_count
150#define	sd_report_pfa			ssd_report_pfa
151#define	sd_max_throttle			ssd_max_throttle
152#define	sd_min_throttle			ssd_min_throttle
153#define	sd_rot_delay			ssd_rot_delay
154
155#define	sd_retry_on_reservation_conflict	\
156					ssd_retry_on_reservation_conflict
157#define	sd_reinstate_resv_delay		ssd_reinstate_resv_delay
158#define	sd_resv_conflict_name		ssd_resv_conflict_name
159
160#define	sd_component_mask		ssd_component_mask
161#define	sd_level_mask			ssd_level_mask
162#define	sd_debug_un			ssd_debug_un
163#define	sd_error_level			ssd_error_level
164
165#define	sd_xbuf_active_limit		ssd_xbuf_active_limit
166#define	sd_xbuf_reserve_limit		ssd_xbuf_reserve_limit
167
168#define	sd_tr				ssd_tr
169#define	sd_reset_throttle_timeout	ssd_reset_throttle_timeout
170#define	sd_qfull_throttle_timeout	ssd_qfull_throttle_timeout
171#define	sd_qfull_throttle_enable	ssd_qfull_throttle_enable
172#define	sd_check_media_time		ssd_check_media_time
173#define	sd_wait_cmds_complete		ssd_wait_cmds_complete
174#define	sd_label_mutex			ssd_label_mutex
175#define	sd_detach_mutex			ssd_detach_mutex
176#define	sd_log_buf			ssd_log_buf
177#define	sd_log_mutex			ssd_log_mutex
178
179#define	sd_disk_table			ssd_disk_table
180#define	sd_disk_table_size		ssd_disk_table_size
181#define	sd_sense_mutex			ssd_sense_mutex
182#define	sd_cdbtab			ssd_cdbtab
183
184#define	sd_cb_ops			ssd_cb_ops
185#define	sd_ops				ssd_ops
186#define	sd_additional_codes		ssd_additional_codes
187#define	sd_tgops			ssd_tgops
188
189#define	sd_minor_data			ssd_minor_data
190#define	sd_minor_data_efi		ssd_minor_data_efi
191
192#define	sd_tq				ssd_tq
193#define	sd_wmr_tq			ssd_wmr_tq
194#define	sd_taskq_name			ssd_taskq_name
195#define	sd_wmr_taskq_name		ssd_wmr_taskq_name
196#define	sd_taskq_minalloc		ssd_taskq_minalloc
197#define	sd_taskq_maxalloc		ssd_taskq_maxalloc
198
199#define	sd_dump_format_string		ssd_dump_format_string
200
201#define	sd_iostart_chain		ssd_iostart_chain
202#define	sd_iodone_chain			ssd_iodone_chain
203
204#define	sd_pm_idletime			ssd_pm_idletime
205
206#define	sd_force_pm_supported		ssd_force_pm_supported
207
208#define	sd_dtype_optical_bind		ssd_dtype_optical_bind
209
210#define	sd_ssc_init			ssd_ssc_init
211#define	sd_ssc_send			ssd_ssc_send
212#define	sd_ssc_fini			ssd_ssc_fini
213#define	sd_ssc_assessment		ssd_ssc_assessment
214#define	sd_ssc_post			ssd_ssc_post
215#define	sd_ssc_print			ssd_ssc_print
216#define	sd_ssc_ereport_post		ssd_ssc_ereport_post
217#define	sd_ssc_set_info			ssd_ssc_set_info
218#define	sd_ssc_extract_info		ssd_ssc_extract_info
219
220#endif
221
222#ifdef	SDDEBUG
223int	sd_force_pm_supported		= 0;
224#endif	/* SDDEBUG */
225
226void *sd_state				= NULL;
227int sd_io_time				= SD_IO_TIME;
228int sd_failfast_enable			= 1;
229int sd_ua_retry_count			= SD_UA_RETRY_COUNT;
230int sd_report_pfa			= 1;
231int sd_max_throttle			= SD_MAX_THROTTLE;
232int sd_min_throttle			= SD_MIN_THROTTLE;
233int sd_rot_delay			= 4; /* Default 4ms Rotation delay */
234int sd_qfull_throttle_enable		= TRUE;
235
236int sd_retry_on_reservation_conflict	= 1;
237int sd_reinstate_resv_delay		= SD_REINSTATE_RESV_DELAY;
238_NOTE(SCHEME_PROTECTS_DATA("safe sharing", sd_reinstate_resv_delay))
239
240static int sd_dtype_optical_bind	= -1;
241
242/* Note: the following is not a bug, it really is "sd_" and not "ssd_" */
243static	char *sd_resv_conflict_name	= "sd_retry_on_reservation_conflict";
244
245/*
246 * Global data for debug logging. To enable debug printing, sd_component_mask
247 * and sd_level_mask should be set to the desired bit patterns as outlined in
248 * sddef.h.
249 */
250uint_t	sd_component_mask		= 0x0;
251uint_t	sd_level_mask			= 0x0;
252struct	sd_lun *sd_debug_un		= NULL;
253uint_t	sd_error_level			= SCSI_ERR_RETRYABLE;
254
255/* Note: these may go away in the future... */
256static uint32_t	sd_xbuf_active_limit	= 512;
257static uint32_t sd_xbuf_reserve_limit	= 16;
258
259static struct sd_resv_reclaim_request	sd_tr = { NULL, NULL, NULL, 0, 0, 0 };
260
261/*
262 * Timer value used to reset the throttle after it has been reduced
263 * (typically in response to TRAN_BUSY or STATUS_QFULL)
264 */
265static int sd_reset_throttle_timeout	= SD_RESET_THROTTLE_TIMEOUT;
266static int sd_qfull_throttle_timeout	= SD_QFULL_THROTTLE_TIMEOUT;
267
268/*
269 * Interval value associated with the media change scsi watch.
270 */
271static int sd_check_media_time		= 3000000;
272
273/*
274 * Wait value used for in progress operations during a DDI_SUSPEND
275 */
276static int sd_wait_cmds_complete	= SD_WAIT_CMDS_COMPLETE;
277
278/*
279 * sd_label_mutex protects a static buffer used in the disk label
280 * component of the driver
281 */
282static kmutex_t sd_label_mutex;
283
284/*
285 * sd_detach_mutex protects un_layer_count, un_detach_count, and
286 * un_opens_in_progress in the sd_lun structure.
287 */
288static kmutex_t sd_detach_mutex;
289
290_NOTE(MUTEX_PROTECTS_DATA(sd_detach_mutex,
291	sd_lun::{un_layer_count un_detach_count un_opens_in_progress}))
292
293/*
294 * Global buffer and mutex for debug logging
295 */
296static char	sd_log_buf[1024];
297static kmutex_t	sd_log_mutex;
298
299/*
300 * Structs and globals for recording attached lun information.
301 * This maintains a chain. Each node in the chain represents a SCSI controller.
302 * The structure records the number of luns attached to each target connected
303 * with the controller.
304 * For parallel scsi device only.
305 */
306struct sd_scsi_hba_tgt_lun {
307	struct sd_scsi_hba_tgt_lun	*next;
308	dev_info_t			*pdip;
309	int				nlun[NTARGETS_WIDE];
310};
311
312/*
313 * Flag to indicate the lun is attached or detached
314 */
315#define	SD_SCSI_LUN_ATTACH	0
316#define	SD_SCSI_LUN_DETACH	1
317
318static kmutex_t	sd_scsi_target_lun_mutex;
319static struct sd_scsi_hba_tgt_lun	*sd_scsi_target_lun_head = NULL;
320
321_NOTE(MUTEX_PROTECTS_DATA(sd_scsi_target_lun_mutex,
322    sd_scsi_hba_tgt_lun::next sd_scsi_hba_tgt_lun::pdip))
323
324_NOTE(MUTEX_PROTECTS_DATA(sd_scsi_target_lun_mutex,
325    sd_scsi_target_lun_head))
326
327/*
328 * "Smart" Probe Caching structs, globals, #defines, etc.
329 * For parallel scsi and non-self-identify device only.
330 */
331
332/*
333 * The following resources and routines are implemented to support
334 * "smart" probing, which caches the scsi_probe() results in an array,
335 * in order to help avoid long probe times.
336 */
337struct sd_scsi_probe_cache {
338	struct	sd_scsi_probe_cache	*next;
339	dev_info_t	*pdip;
340	int		cache[NTARGETS_WIDE];
341};
342
343static kmutex_t	sd_scsi_probe_cache_mutex;
344static struct	sd_scsi_probe_cache *sd_scsi_probe_cache_head = NULL;
345
346/*
347 * Really we only need protection on the head of the linked list, but
348 * better safe than sorry.
349 */
350_NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
351    sd_scsi_probe_cache::next sd_scsi_probe_cache::pdip))
352
353_NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
354    sd_scsi_probe_cache_head))
355
356/*
357 * Power attribute table
358 */
359static sd_power_attr_ss sd_pwr_ss = {
360	{ "NAME=spindle-motor", "0=off", "1=on", NULL },
361	{0, 100},
362	{30, 0},
363	{20000, 0}
364};
365
366static sd_power_attr_pc sd_pwr_pc = {
367	{ "NAME=spindle-motor", "0=stopped", "1=standby", "2=idle",
368		"3=active", NULL },
369	{0, 0, 0, 100},
370	{90, 90, 20, 0},
371	{15000, 15000, 1000, 0}
372};
373
374/*
375 * Power level to power condition
376 */
377static int sd_pl2pc[] = {
378	SD_TARGET_START_VALID,
379	SD_TARGET_STANDBY,
380	SD_TARGET_IDLE,
381	SD_TARGET_ACTIVE
382};
383
384/*
385 * Vendor specific data name property declarations
386 */
387
388#if defined(__fibre) || defined(__i386) ||defined(__amd64)
389
390static sd_tunables seagate_properties = {
391	SEAGATE_THROTTLE_VALUE,
392	0,
393	0,
394	0,
395	0,
396	0,
397	0,
398	0,
399	0
400};
401
402
403static sd_tunables fujitsu_properties = {
404	FUJITSU_THROTTLE_VALUE,
405	0,
406	0,
407	0,
408	0,
409	0,
410	0,
411	0,
412	0
413};
414
415static sd_tunables ibm_properties = {
416	IBM_THROTTLE_VALUE,
417	0,
418	0,
419	0,
420	0,
421	0,
422	0,
423	0,
424	0
425};
426
427static sd_tunables purple_properties = {
428	PURPLE_THROTTLE_VALUE,
429	0,
430	0,
431	PURPLE_BUSY_RETRIES,
432	PURPLE_RESET_RETRY_COUNT,
433	PURPLE_RESERVE_RELEASE_TIME,
434	0,
435	0,
436	0
437};
438
439static sd_tunables sve_properties = {
440	SVE_THROTTLE_VALUE,
441	0,
442	0,
443	SVE_BUSY_RETRIES,
444	SVE_RESET_RETRY_COUNT,
445	SVE_RESERVE_RELEASE_TIME,
446	SVE_MIN_THROTTLE_VALUE,
447	SVE_DISKSORT_DISABLED_FLAG,
448	0
449};
450
451static sd_tunables maserati_properties = {
452	0,
453	0,
454	0,
455	0,
456	0,
457	0,
458	0,
459	MASERATI_DISKSORT_DISABLED_FLAG,
460	MASERATI_LUN_RESET_ENABLED_FLAG
461};
462
463static sd_tunables pirus_properties = {
464	PIRUS_THROTTLE_VALUE,
465	0,
466	PIRUS_NRR_COUNT,
467	PIRUS_BUSY_RETRIES,
468	PIRUS_RESET_RETRY_COUNT,
469	0,
470	PIRUS_MIN_THROTTLE_VALUE,
471	PIRUS_DISKSORT_DISABLED_FLAG,
472	PIRUS_LUN_RESET_ENABLED_FLAG
473};
474
475#endif
476
477#if (defined(__sparc) && !defined(__fibre)) || \
478	(defined(__i386) || defined(__amd64))
479
480
481static sd_tunables elite_properties = {
482	ELITE_THROTTLE_VALUE,
483	0,
484	0,
485	0,
486	0,
487	0,
488	0,
489	0,
490	0
491};
492
493static sd_tunables st31200n_properties = {
494	ST31200N_THROTTLE_VALUE,
495	0,
496	0,
497	0,
498	0,
499	0,
500	0,
501	0,
502	0
503};
504
505#endif /* Fibre or not */
506
507static sd_tunables lsi_properties_scsi = {
508	LSI_THROTTLE_VALUE,
509	0,
510	LSI_NOTREADY_RETRIES,
511	0,
512	0,
513	0,
514	0,
515	0,
516	0
517};
518
519static sd_tunables symbios_properties = {
520	SYMBIOS_THROTTLE_VALUE,
521	0,
522	SYMBIOS_NOTREADY_RETRIES,
523	0,
524	0,
525	0,
526	0,
527	0,
528	0
529};
530
531static sd_tunables lsi_properties = {
532	0,
533	0,
534	LSI_NOTREADY_RETRIES,
535	0,
536	0,
537	0,
538	0,
539	0,
540	0
541};
542
543static sd_tunables lsi_oem_properties = {
544	0,
545	0,
546	LSI_OEM_NOTREADY_RETRIES,
547	0,
548	0,
549	0,
550	0,
551	0,
552	0,
553	1
554};
555
556
557
558#if (defined(SD_PROP_TST))
559
560#define	SD_TST_CTYPE_VAL	CTYPE_CDROM
561#define	SD_TST_THROTTLE_VAL	16
562#define	SD_TST_NOTREADY_VAL	12
563#define	SD_TST_BUSY_VAL		60
564#define	SD_TST_RST_RETRY_VAL	36
565#define	SD_TST_RSV_REL_TIME	60
566
567static sd_tunables tst_properties = {
568	SD_TST_THROTTLE_VAL,
569	SD_TST_CTYPE_VAL,
570	SD_TST_NOTREADY_VAL,
571	SD_TST_BUSY_VAL,
572	SD_TST_RST_RETRY_VAL,
573	SD_TST_RSV_REL_TIME,
574	0,
575	0,
576	0
577};
578#endif
579
580/* This is similar to the ANSI toupper implementation */
581#define	SD_TOUPPER(C)	(((C) >= 'a' && (C) <= 'z') ? (C) - 'a' + 'A' : (C))
582
583/*
584 * Static Driver Configuration Table
585 *
586 * This is the table of disks which need throttle adjustment (or, perhaps
587 * something else as defined by the flags at a future time.)  device_id
588 * is a string consisting of concatenated vid (vendor), pid (product/model)
589 * and revision strings as defined in the scsi_inquiry structure.  Offsets of
590 * the parts of the string are as defined by the sizes in the scsi_inquiry
591 * structure.  Device type is searched as far as the device_id string is
592 * defined.  Flags defines which values are to be set in the driver from the
593 * properties list.
594 *
595 * Entries below which begin and end with a "*" are a special case.
596 * These do not have a specific vendor, and the string which follows
597 * can appear anywhere in the 16 byte PID portion of the inquiry data.
598 *
599 * Entries below which begin and end with a " " (blank) are a special
600 * case. The comparison function will treat multiple consecutive blanks
601 * as equivalent to a single blank. For example, this causes a
602 * sd_disk_table entry of " NEC CDROM " to match a device's id string
603 * of  "NEC       CDROM".
604 *
605 * Note: The MD21 controller type has been obsoleted.
606 *	 ST318202F is a Legacy device
607 *	 MAM3182FC, MAM3364FC, MAM3738FC do not appear to have ever been
608 *	 made with an FC connection. The entries here are a legacy.
609 */
610static sd_disk_config_t sd_disk_table[] = {
611#if defined(__fibre) || defined(__i386) || defined(__amd64)
612	{ "SEAGATE ST34371FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
613	{ "SEAGATE ST19171FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
614	{ "SEAGATE ST39102FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
615	{ "SEAGATE ST39103FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
616	{ "SEAGATE ST118273F", SD_CONF_BSET_THROTTLE, &seagate_properties },
617	{ "SEAGATE ST318202F", SD_CONF_BSET_THROTTLE, &seagate_properties },
618	{ "SEAGATE ST318203F", SD_CONF_BSET_THROTTLE, &seagate_properties },
619	{ "SEAGATE ST136403F", SD_CONF_BSET_THROTTLE, &seagate_properties },
620	{ "SEAGATE ST318304F", SD_CONF_BSET_THROTTLE, &seagate_properties },
621	{ "SEAGATE ST336704F", SD_CONF_BSET_THROTTLE, &seagate_properties },
622	{ "SEAGATE ST373405F", SD_CONF_BSET_THROTTLE, &seagate_properties },
623	{ "SEAGATE ST336605F", SD_CONF_BSET_THROTTLE, &seagate_properties },
624	{ "SEAGATE ST336752F", SD_CONF_BSET_THROTTLE, &seagate_properties },
625	{ "SEAGATE ST318452F", SD_CONF_BSET_THROTTLE, &seagate_properties },
626	{ "FUJITSU MAG3091F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
627	{ "FUJITSU MAG3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
628	{ "FUJITSU MAA3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
629	{ "FUJITSU MAF3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
630	{ "FUJITSU MAL3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
631	{ "FUJITSU MAL3738F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
632	{ "FUJITSU MAM3182FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
633	{ "FUJITSU MAM3364FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
634	{ "FUJITSU MAM3738FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
635	{ "IBM     DDYFT1835",  SD_CONF_BSET_THROTTLE, &ibm_properties },
636	{ "IBM     DDYFT3695",  SD_CONF_BSET_THROTTLE, &ibm_properties },
637	{ "IBM     IC35LF2D2",  SD_CONF_BSET_THROTTLE, &ibm_properties },
638	{ "IBM     IC35LF2PR",  SD_CONF_BSET_THROTTLE, &ibm_properties },
639	{ "IBM     1724-100",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
640	{ "IBM     1726-2xx",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
641	{ "IBM     1726-22x",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
642	{ "IBM     1726-4xx",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
643	{ "IBM     1726-42x",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
644	{ "IBM     1726-3xx",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
645	{ "IBM     3526",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
646	{ "IBM     3542",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
647	{ "IBM     3552",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
648	{ "IBM     1722",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
649	{ "IBM     1742",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
650	{ "IBM     1815",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
651	{ "IBM     FAStT",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
652	{ "IBM     1814",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
653	{ "IBM     1814-200",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
654	{ "IBM     1818",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
655	{ "DELL    MD3000",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
656	{ "DELL    MD3000i",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
657	{ "LSI     INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
658	{ "ENGENIO INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
659	{ "SGI     TP",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
660	{ "SGI     IS",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
661	{ "*CSM100_*",		SD_CONF_BSET_NRR_COUNT |
662			SD_CONF_BSET_CACHE_IS_NV, &lsi_oem_properties },
663	{ "*CSM200_*",		SD_CONF_BSET_NRR_COUNT |
664			SD_CONF_BSET_CACHE_IS_NV, &lsi_oem_properties },
665	{ "Fujitsu SX300",	SD_CONF_BSET_THROTTLE,  &lsi_oem_properties },
666	{ "LSI",		SD_CONF_BSET_NRR_COUNT, &lsi_properties },
667	{ "SUN     T3", SD_CONF_BSET_THROTTLE |
668			SD_CONF_BSET_BSY_RETRY_COUNT|
669			SD_CONF_BSET_RST_RETRIES|
670			SD_CONF_BSET_RSV_REL_TIME,
671		&purple_properties },
672	{ "SUN     SESS01", SD_CONF_BSET_THROTTLE |
673		SD_CONF_BSET_BSY_RETRY_COUNT|
674		SD_CONF_BSET_RST_RETRIES|
675		SD_CONF_BSET_RSV_REL_TIME|
676		SD_CONF_BSET_MIN_THROTTLE|
677		SD_CONF_BSET_DISKSORT_DISABLED,
678		&sve_properties },
679	{ "SUN     T4", SD_CONF_BSET_THROTTLE |
680			SD_CONF_BSET_BSY_RETRY_COUNT|
681			SD_CONF_BSET_RST_RETRIES|
682			SD_CONF_BSET_RSV_REL_TIME,
683		&purple_properties },
684	{ "SUN     SVE01", SD_CONF_BSET_DISKSORT_DISABLED |
685		SD_CONF_BSET_LUN_RESET_ENABLED,
686		&maserati_properties },
687	{ "SUN     SE6920", SD_CONF_BSET_THROTTLE |
688		SD_CONF_BSET_NRR_COUNT|
689		SD_CONF_BSET_BSY_RETRY_COUNT|
690		SD_CONF_BSET_RST_RETRIES|
691		SD_CONF_BSET_MIN_THROTTLE|
692		SD_CONF_BSET_DISKSORT_DISABLED|
693		SD_CONF_BSET_LUN_RESET_ENABLED,
694		&pirus_properties },
695	{ "SUN     SE6940", SD_CONF_BSET_THROTTLE |
696		SD_CONF_BSET_NRR_COUNT|
697		SD_CONF_BSET_BSY_RETRY_COUNT|
698		SD_CONF_BSET_RST_RETRIES|
699		SD_CONF_BSET_MIN_THROTTLE|
700		SD_CONF_BSET_DISKSORT_DISABLED|
701		SD_CONF_BSET_LUN_RESET_ENABLED,
702		&pirus_properties },
703	{ "SUN     StorageTek 6920", SD_CONF_BSET_THROTTLE |
704		SD_CONF_BSET_NRR_COUNT|
705		SD_CONF_BSET_BSY_RETRY_COUNT|
706		SD_CONF_BSET_RST_RETRIES|
707		SD_CONF_BSET_MIN_THROTTLE|
708		SD_CONF_BSET_DISKSORT_DISABLED|
709		SD_CONF_BSET_LUN_RESET_ENABLED,
710		&pirus_properties },
711	{ "SUN     StorageTek 6940", SD_CONF_BSET_THROTTLE |
712		SD_CONF_BSET_NRR_COUNT|
713		SD_CONF_BSET_BSY_RETRY_COUNT|
714		SD_CONF_BSET_RST_RETRIES|
715		SD_CONF_BSET_MIN_THROTTLE|
716		SD_CONF_BSET_DISKSORT_DISABLED|
717		SD_CONF_BSET_LUN_RESET_ENABLED,
718		&pirus_properties },
719	{ "SUN     PSX1000", SD_CONF_BSET_THROTTLE |
720		SD_CONF_BSET_NRR_COUNT|
721		SD_CONF_BSET_BSY_RETRY_COUNT|
722		SD_CONF_BSET_RST_RETRIES|
723		SD_CONF_BSET_MIN_THROTTLE|
724		SD_CONF_BSET_DISKSORT_DISABLED|
725		SD_CONF_BSET_LUN_RESET_ENABLED,
726		&pirus_properties },
727	{ "SUN     SE6330", SD_CONF_BSET_THROTTLE |
728		SD_CONF_BSET_NRR_COUNT|
729		SD_CONF_BSET_BSY_RETRY_COUNT|
730		SD_CONF_BSET_RST_RETRIES|
731		SD_CONF_BSET_MIN_THROTTLE|
732		SD_CONF_BSET_DISKSORT_DISABLED|
733		SD_CONF_BSET_LUN_RESET_ENABLED,
734		&pirus_properties },
735	{ "SUN     STK6580_6780", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
736	{ "SUN     SUN_6180", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
737	{ "STK     OPENstorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
738	{ "STK     OpenStorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
739	{ "STK     BladeCtlr",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
740	{ "STK     FLEXLINE",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
741	{ "SYMBIOS", SD_CONF_BSET_NRR_COUNT, &symbios_properties },
742#endif /* fibre or NON-sparc platforms */
743#if ((defined(__sparc) && !defined(__fibre)) ||\
744	(defined(__i386) || defined(__amd64)))
745	{ "SEAGATE ST42400N", SD_CONF_BSET_THROTTLE, &elite_properties },
746	{ "SEAGATE ST31200N", SD_CONF_BSET_THROTTLE, &st31200n_properties },
747	{ "SEAGATE ST41600N", SD_CONF_BSET_TUR_CHECK, NULL },
748	{ "CONNER  CP30540",  SD_CONF_BSET_NOCACHE,  NULL },
749	{ "*SUN0104*", SD_CONF_BSET_FAB_DEVID, NULL },
750	{ "*SUN0207*", SD_CONF_BSET_FAB_DEVID, NULL },
751	{ "*SUN0327*", SD_CONF_BSET_FAB_DEVID, NULL },
752	{ "*SUN0340*", SD_CONF_BSET_FAB_DEVID, NULL },
753	{ "*SUN0424*", SD_CONF_BSET_FAB_DEVID, NULL },
754	{ "*SUN0669*", SD_CONF_BSET_FAB_DEVID, NULL },
755	{ "*SUN1.0G*", SD_CONF_BSET_FAB_DEVID, NULL },
756	{ "SYMBIOS INF-01-00       ", SD_CONF_BSET_FAB_DEVID, NULL },
757	{ "SYMBIOS", SD_CONF_BSET_THROTTLE|SD_CONF_BSET_NRR_COUNT,
758	    &symbios_properties },
759	{ "LSI", SD_CONF_BSET_THROTTLE | SD_CONF_BSET_NRR_COUNT,
760	    &lsi_properties_scsi },
761#if defined(__i386) || defined(__amd64)
762	{ " NEC CD-ROM DRIVE:260 ", (SD_CONF_BSET_PLAYMSF_BCD
763				    | SD_CONF_BSET_READSUB_BCD
764				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
765				    | SD_CONF_BSET_NO_READ_HEADER
766				    | SD_CONF_BSET_READ_CD_XD4), NULL },
767
768	{ " NEC CD-ROM DRIVE:270 ", (SD_CONF_BSET_PLAYMSF_BCD
769				    | SD_CONF_BSET_READSUB_BCD
770				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
771				    | SD_CONF_BSET_NO_READ_HEADER
772				    | SD_CONF_BSET_READ_CD_XD4), NULL },
773#endif /* __i386 || __amd64 */
774#endif /* sparc NON-fibre or NON-sparc platforms */
775
776#if (defined(SD_PROP_TST))
777	{ "VENDOR  PRODUCT ", (SD_CONF_BSET_THROTTLE
778				| SD_CONF_BSET_CTYPE
779				| SD_CONF_BSET_NRR_COUNT
780				| SD_CONF_BSET_FAB_DEVID
781				| SD_CONF_BSET_NOCACHE
782				| SD_CONF_BSET_BSY_RETRY_COUNT
783				| SD_CONF_BSET_PLAYMSF_BCD
784				| SD_CONF_BSET_READSUB_BCD
785				| SD_CONF_BSET_READ_TOC_TRK_BCD
786				| SD_CONF_BSET_READ_TOC_ADDR_BCD
787				| SD_CONF_BSET_NO_READ_HEADER
788				| SD_CONF_BSET_READ_CD_XD4
789				| SD_CONF_BSET_RST_RETRIES
790				| SD_CONF_BSET_RSV_REL_TIME
791				| SD_CONF_BSET_TUR_CHECK), &tst_properties},
792#endif
793};
794
795static const int sd_disk_table_size =
796	sizeof (sd_disk_table)/ sizeof (sd_disk_config_t);
797
798/*
799 * Emulation mode disk drive VID/PID table
800 */
801static char sd_flash_dev_table[][25] = {
802	"ATA     MARVELL SD88SA02",
803	"MARVELL SD88SA02",
804	"TOSHIBA THNSNV05",
805};
806
807static const int sd_flash_dev_table_size =
808	sizeof (sd_flash_dev_table) / sizeof (sd_flash_dev_table[0]);
809
810#define	SD_INTERCONNECT_PARALLEL	0
811#define	SD_INTERCONNECT_FABRIC		1
812#define	SD_INTERCONNECT_FIBRE		2
813#define	SD_INTERCONNECT_SSA		3
814#define	SD_INTERCONNECT_SATA		4
815#define	SD_INTERCONNECT_SAS		5
816
817#define	SD_IS_PARALLEL_SCSI(un)		\
818	((un)->un_interconnect_type == SD_INTERCONNECT_PARALLEL)
819#define	SD_IS_SERIAL(un)		\
820	(((un)->un_interconnect_type == SD_INTERCONNECT_SATA) ||\
821	((un)->un_interconnect_type == SD_INTERCONNECT_SAS))
822
823/*
824 * Definitions used by device id registration routines
825 */
826#define	VPD_HEAD_OFFSET		3	/* size of head for vpd page */
827#define	VPD_PAGE_LENGTH		3	/* offset for pge length data */
828#define	VPD_MODE_PAGE		1	/* offset into vpd pg for "page code" */
829
830static kmutex_t sd_sense_mutex = {0};
831
832/*
833 * Macros for updates of the driver state
834 */
835#define	New_state(un, s)        \
836	(un)->un_last_state = (un)->un_state, (un)->un_state = (s)
837#define	Restore_state(un)	\
838	{ uchar_t tmp = (un)->un_last_state; New_state((un), tmp); }
839
840static struct sd_cdbinfo sd_cdbtab[] = {
841	{ CDB_GROUP0, 0x00,	   0x1FFFFF,   0xFF,	    },
842	{ CDB_GROUP1, SCMD_GROUP1, 0xFFFFFFFF, 0xFFFF,	    },
843	{ CDB_GROUP5, SCMD_GROUP5, 0xFFFFFFFF, 0xFFFFFFFF,  },
844	{ CDB_GROUP4, SCMD_GROUP4, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFF, },
845};
846
847/*
848 * Specifies the number of seconds that must have elapsed since the last
849 * cmd. has completed for a device to be declared idle to the PM framework.
850 */
851static int sd_pm_idletime = 1;
852
853/*
854 * Internal function prototypes
855 */
856
857#if (defined(__fibre))
858/*
859 * These #defines are to avoid namespace collisions that occur because this
860 * code is currently used to compile two separate driver modules: sd and ssd.
861 * All function names need to be treated this way (even if declared static)
862 * in order to allow the debugger to resolve the names properly.
863 * It is anticipated that in the near future the ssd module will be obsoleted,
864 * at which time this ugliness should go away.
865 */
866#define	sd_log_trace			ssd_log_trace
867#define	sd_log_info			ssd_log_info
868#define	sd_log_err			ssd_log_err
869#define	sdprobe				ssdprobe
870#define	sdinfo				ssdinfo
871#define	sd_prop_op			ssd_prop_op
872#define	sd_scsi_probe_cache_init	ssd_scsi_probe_cache_init
873#define	sd_scsi_probe_cache_fini	ssd_scsi_probe_cache_fini
874#define	sd_scsi_clear_probe_cache	ssd_scsi_clear_probe_cache
875#define	sd_scsi_probe_with_cache	ssd_scsi_probe_with_cache
876#define	sd_scsi_target_lun_init		ssd_scsi_target_lun_init
877#define	sd_scsi_target_lun_fini		ssd_scsi_target_lun_fini
878#define	sd_scsi_get_target_lun_count	ssd_scsi_get_target_lun_count
879#define	sd_scsi_update_lun_on_target	ssd_scsi_update_lun_on_target
880#define	sd_spin_up_unit			ssd_spin_up_unit
881#define	sd_enable_descr_sense		ssd_enable_descr_sense
882#define	sd_reenable_dsense_task		ssd_reenable_dsense_task
883#define	sd_set_mmc_caps			ssd_set_mmc_caps
884#define	sd_read_unit_properties		ssd_read_unit_properties
885#define	sd_process_sdconf_file		ssd_process_sdconf_file
886#define	sd_process_sdconf_table		ssd_process_sdconf_table
887#define	sd_sdconf_id_match		ssd_sdconf_id_match
888#define	sd_blank_cmp			ssd_blank_cmp
889#define	sd_chk_vers1_data		ssd_chk_vers1_data
890#define	sd_set_vers1_properties		ssd_set_vers1_properties
891#define	sd_check_solid_state		ssd_check_solid_state
892#define	sd_check_emulation_mode		ssd_check_emulation_mode
893
894#define	sd_get_physical_geometry	ssd_get_physical_geometry
895#define	sd_get_virtual_geometry		ssd_get_virtual_geometry
896#define	sd_update_block_info		ssd_update_block_info
897#define	sd_register_devid		ssd_register_devid
898#define	sd_get_devid			ssd_get_devid
899#define	sd_create_devid			ssd_create_devid
900#define	sd_write_deviceid		ssd_write_deviceid
901#define	sd_check_vpd_page_support	ssd_check_vpd_page_support
902#define	sd_setup_pm			ssd_setup_pm
903#define	sd_create_pm_components		ssd_create_pm_components
904#define	sd_ddi_suspend			ssd_ddi_suspend
905#define	sd_ddi_resume			ssd_ddi_resume
906#define	sd_pm_state_change		ssd_pm_state_change
907#define	sdpower				ssdpower
908#define	sdattach			ssdattach
909#define	sddetach			ssddetach
910#define	sd_unit_attach			ssd_unit_attach
911#define	sd_unit_detach			ssd_unit_detach
912#define	sd_set_unit_attributes		ssd_set_unit_attributes
913#define	sd_create_errstats		ssd_create_errstats
914#define	sd_set_errstats			ssd_set_errstats
915#define	sd_set_pstats			ssd_set_pstats
916#define	sddump				ssddump
917#define	sd_scsi_poll			ssd_scsi_poll
918#define	sd_send_polled_RQS		ssd_send_polled_RQS
919#define	sd_ddi_scsi_poll		ssd_ddi_scsi_poll
920#define	sd_init_event_callbacks		ssd_init_event_callbacks
921#define	sd_event_callback		ssd_event_callback
922#define	sd_cache_control		ssd_cache_control
923#define	sd_get_write_cache_enabled	ssd_get_write_cache_enabled
924#define	sd_get_nv_sup			ssd_get_nv_sup
925#define	sd_make_device			ssd_make_device
926#define	sdopen				ssdopen
927#define	sdclose				ssdclose
928#define	sd_ready_and_valid		ssd_ready_and_valid
929#define	sdmin				ssdmin
930#define	sdread				ssdread
931#define	sdwrite				ssdwrite
932#define	sdaread				ssdaread
933#define	sdawrite			ssdawrite
934#define	sdstrategy			ssdstrategy
935#define	sdioctl				ssdioctl
936#define	sd_mapblockaddr_iostart		ssd_mapblockaddr_iostart
937#define	sd_mapblocksize_iostart		ssd_mapblocksize_iostart
938#define	sd_checksum_iostart		ssd_checksum_iostart
939#define	sd_checksum_uscsi_iostart	ssd_checksum_uscsi_iostart
940#define	sd_pm_iostart			ssd_pm_iostart
941#define	sd_core_iostart			ssd_core_iostart
942#define	sd_mapblockaddr_iodone		ssd_mapblockaddr_iodone
943#define	sd_mapblocksize_iodone		ssd_mapblocksize_iodone
944#define	sd_checksum_iodone		ssd_checksum_iodone
945#define	sd_checksum_uscsi_iodone	ssd_checksum_uscsi_iodone
946#define	sd_pm_iodone			ssd_pm_iodone
947#define	sd_initpkt_for_buf		ssd_initpkt_for_buf
948#define	sd_destroypkt_for_buf		ssd_destroypkt_for_buf
949#define	sd_setup_rw_pkt			ssd_setup_rw_pkt
950#define	sd_setup_next_rw_pkt		ssd_setup_next_rw_pkt
951#define	sd_buf_iodone			ssd_buf_iodone
952#define	sd_uscsi_strategy		ssd_uscsi_strategy
953#define	sd_initpkt_for_uscsi		ssd_initpkt_for_uscsi
954#define	sd_destroypkt_for_uscsi		ssd_destroypkt_for_uscsi
955#define	sd_uscsi_iodone			ssd_uscsi_iodone
956#define	sd_xbuf_strategy		ssd_xbuf_strategy
957#define	sd_xbuf_init			ssd_xbuf_init
958#define	sd_pm_entry			ssd_pm_entry
959#define	sd_pm_exit			ssd_pm_exit
960
961#define	sd_pm_idletimeout_handler	ssd_pm_idletimeout_handler
962#define	sd_pm_timeout_handler		ssd_pm_timeout_handler
963
964#define	sd_add_buf_to_waitq		ssd_add_buf_to_waitq
965#define	sdintr				ssdintr
966#define	sd_start_cmds			ssd_start_cmds
967#define	sd_send_scsi_cmd		ssd_send_scsi_cmd
968#define	sd_bioclone_alloc		ssd_bioclone_alloc
969#define	sd_bioclone_free		ssd_bioclone_free
970#define	sd_shadow_buf_alloc		ssd_shadow_buf_alloc
971#define	sd_shadow_buf_free		ssd_shadow_buf_free
972#define	sd_print_transport_rejected_message	\
973					ssd_print_transport_rejected_message
974#define	sd_retry_command		ssd_retry_command
975#define	sd_set_retry_bp			ssd_set_retry_bp
976#define	sd_send_request_sense_command	ssd_send_request_sense_command
977#define	sd_start_retry_command		ssd_start_retry_command
978#define	sd_start_direct_priority_command	\
979					ssd_start_direct_priority_command
980#define	sd_return_failed_command	ssd_return_failed_command
981#define	sd_return_failed_command_no_restart	\
982					ssd_return_failed_command_no_restart
983#define	sd_return_command		ssd_return_command
984#define	sd_sync_with_callback		ssd_sync_with_callback
985#define	sdrunout			ssdrunout
986#define	sd_mark_rqs_busy		ssd_mark_rqs_busy
987#define	sd_mark_rqs_idle		ssd_mark_rqs_idle
988#define	sd_reduce_throttle		ssd_reduce_throttle
989#define	sd_restore_throttle		ssd_restore_throttle
990#define	sd_print_incomplete_msg		ssd_print_incomplete_msg
991#define	sd_init_cdb_limits		ssd_init_cdb_limits
992#define	sd_pkt_status_good		ssd_pkt_status_good
993#define	sd_pkt_status_check_condition	ssd_pkt_status_check_condition
994#define	sd_pkt_status_busy		ssd_pkt_status_busy
995#define	sd_pkt_status_reservation_conflict	\
996					ssd_pkt_status_reservation_conflict
997#define	sd_pkt_status_qfull		ssd_pkt_status_qfull
998#define	sd_handle_request_sense		ssd_handle_request_sense
999#define	sd_handle_auto_request_sense	ssd_handle_auto_request_sense
1000#define	sd_print_sense_failed_msg	ssd_print_sense_failed_msg
1001#define	sd_validate_sense_data		ssd_validate_sense_data
1002#define	sd_decode_sense			ssd_decode_sense
1003#define	sd_print_sense_msg		ssd_print_sense_msg
1004#define	sd_sense_key_no_sense		ssd_sense_key_no_sense
1005#define	sd_sense_key_recoverable_error	ssd_sense_key_recoverable_error
1006#define	sd_sense_key_not_ready		ssd_sense_key_not_ready
1007#define	sd_sense_key_medium_or_hardware_error	\
1008					ssd_sense_key_medium_or_hardware_error
1009#define	sd_sense_key_illegal_request	ssd_sense_key_illegal_request
1010#define	sd_sense_key_unit_attention	ssd_sense_key_unit_attention
1011#define	sd_sense_key_fail_command	ssd_sense_key_fail_command
1012#define	sd_sense_key_blank_check	ssd_sense_key_blank_check
1013#define	sd_sense_key_aborted_command	ssd_sense_key_aborted_command
1014#define	sd_sense_key_default		ssd_sense_key_default
1015#define	sd_print_retry_msg		ssd_print_retry_msg
1016#define	sd_print_cmd_incomplete_msg	ssd_print_cmd_incomplete_msg
1017#define	sd_pkt_reason_cmd_incomplete	ssd_pkt_reason_cmd_incomplete
1018#define	sd_pkt_reason_cmd_tran_err	ssd_pkt_reason_cmd_tran_err
1019#define	sd_pkt_reason_cmd_reset		ssd_pkt_reason_cmd_reset
1020#define	sd_pkt_reason_cmd_aborted	ssd_pkt_reason_cmd_aborted
1021#define	sd_pkt_reason_cmd_timeout	ssd_pkt_reason_cmd_timeout
1022#define	sd_pkt_reason_cmd_unx_bus_free	ssd_pkt_reason_cmd_unx_bus_free
1023#define	sd_pkt_reason_cmd_tag_reject	ssd_pkt_reason_cmd_tag_reject
1024#define	sd_pkt_reason_default		ssd_pkt_reason_default
1025#define	sd_reset_target			ssd_reset_target
1026#define	sd_start_stop_unit_callback	ssd_start_stop_unit_callback
1027#define	sd_start_stop_unit_task		ssd_start_stop_unit_task
1028#define	sd_taskq_create			ssd_taskq_create
1029#define	sd_taskq_delete			ssd_taskq_delete
1030#define	sd_target_change_task		ssd_target_change_task
1031#define	sd_log_dev_status_event		ssd_log_dev_status_event
1032#define	sd_log_lun_expansion_event	ssd_log_lun_expansion_event
1033#define	sd_log_eject_request_event	ssd_log_eject_request_event
1034#define	sd_media_change_task		ssd_media_change_task
1035#define	sd_handle_mchange		ssd_handle_mchange
1036#define	sd_send_scsi_DOORLOCK		ssd_send_scsi_DOORLOCK
1037#define	sd_send_scsi_READ_CAPACITY	ssd_send_scsi_READ_CAPACITY
1038#define	sd_send_scsi_READ_CAPACITY_16	ssd_send_scsi_READ_CAPACITY_16
1039#define	sd_send_scsi_GET_CONFIGURATION	ssd_send_scsi_GET_CONFIGURATION
1040#define	sd_send_scsi_feature_GET_CONFIGURATION	\
1041					sd_send_scsi_feature_GET_CONFIGURATION
1042#define	sd_send_scsi_START_STOP_UNIT	ssd_send_scsi_START_STOP_UNIT
1043#define	sd_send_scsi_INQUIRY		ssd_send_scsi_INQUIRY
1044#define	sd_send_scsi_TEST_UNIT_READY	ssd_send_scsi_TEST_UNIT_READY
1045#define	sd_send_scsi_PERSISTENT_RESERVE_IN	\
1046					ssd_send_scsi_PERSISTENT_RESERVE_IN
1047#define	sd_send_scsi_PERSISTENT_RESERVE_OUT	\
1048					ssd_send_scsi_PERSISTENT_RESERVE_OUT
1049#define	sd_send_scsi_SYNCHRONIZE_CACHE	ssd_send_scsi_SYNCHRONIZE_CACHE
1050#define	sd_send_scsi_SYNCHRONIZE_CACHE_biodone	\
1051					ssd_send_scsi_SYNCHRONIZE_CACHE_biodone
1052#define	sd_send_scsi_MODE_SENSE		ssd_send_scsi_MODE_SENSE
1053#define	sd_send_scsi_MODE_SELECT	ssd_send_scsi_MODE_SELECT
1054#define	sd_send_scsi_RDWR		ssd_send_scsi_RDWR
1055#define	sd_send_scsi_LOG_SENSE		ssd_send_scsi_LOG_SENSE
1056#define	sd_send_scsi_GET_EVENT_STATUS_NOTIFICATION	\
1057				ssd_send_scsi_GET_EVENT_STATUS_NOTIFICATION
1058#define	sd_gesn_media_data_valid	ssd_gesn_media_data_valid
1059#define	sd_alloc_rqs			ssd_alloc_rqs
1060#define	sd_free_rqs			ssd_free_rqs
1061#define	sd_dump_memory			ssd_dump_memory
1062#define	sd_get_media_info_com		ssd_get_media_info_com
1063#define	sd_get_media_info		ssd_get_media_info
1064#define	sd_get_media_info_ext		ssd_get_media_info_ext
1065#define	sd_dkio_ctrl_info		ssd_dkio_ctrl_info
1066#define	sd_nvpair_str_decode		ssd_nvpair_str_decode
1067#define	sd_strtok_r			ssd_strtok_r
1068#define	sd_set_properties		ssd_set_properties
1069#define	sd_get_tunables_from_conf	ssd_get_tunables_from_conf
1070#define	sd_setup_next_xfer		ssd_setup_next_xfer
1071#define	sd_dkio_get_temp		ssd_dkio_get_temp
1072#define	sd_check_mhd			ssd_check_mhd
1073#define	sd_mhd_watch_cb			ssd_mhd_watch_cb
1074#define	sd_mhd_watch_incomplete		ssd_mhd_watch_incomplete
1075#define	sd_sname			ssd_sname
1076#define	sd_mhd_resvd_recover		ssd_mhd_resvd_recover
1077#define	sd_resv_reclaim_thread		ssd_resv_reclaim_thread
1078#define	sd_take_ownership		ssd_take_ownership
1079#define	sd_reserve_release		ssd_reserve_release
1080#define	sd_rmv_resv_reclaim_req		ssd_rmv_resv_reclaim_req
1081#define	sd_mhd_reset_notify_cb		ssd_mhd_reset_notify_cb
1082#define	sd_persistent_reservation_in_read_keys	\
1083					ssd_persistent_reservation_in_read_keys
1084#define	sd_persistent_reservation_in_read_resv	\
1085					ssd_persistent_reservation_in_read_resv
1086#define	sd_mhdioc_takeown		ssd_mhdioc_takeown
1087#define	sd_mhdioc_failfast		ssd_mhdioc_failfast
1088#define	sd_mhdioc_release		ssd_mhdioc_release
1089#define	sd_mhdioc_register_devid	ssd_mhdioc_register_devid
1090#define	sd_mhdioc_inkeys		ssd_mhdioc_inkeys
1091#define	sd_mhdioc_inresv		ssd_mhdioc_inresv
1092#define	sr_change_blkmode		ssr_change_blkmode
1093#define	sr_change_speed			ssr_change_speed
1094#define	sr_atapi_change_speed		ssr_atapi_change_speed
1095#define	sr_pause_resume			ssr_pause_resume
1096#define	sr_play_msf			ssr_play_msf
1097#define	sr_play_trkind			ssr_play_trkind
1098#define	sr_read_all_subcodes		ssr_read_all_subcodes
1099#define	sr_read_subchannel		ssr_read_subchannel
1100#define	sr_read_tocentry		ssr_read_tocentry
1101#define	sr_read_tochdr			ssr_read_tochdr
1102#define	sr_read_cdda			ssr_read_cdda
1103#define	sr_read_cdxa			ssr_read_cdxa
1104#define	sr_read_mode1			ssr_read_mode1
1105#define	sr_read_mode2			ssr_read_mode2
1106#define	sr_read_cd_mode2		ssr_read_cd_mode2
1107#define	sr_sector_mode			ssr_sector_mode
1108#define	sr_eject			ssr_eject
1109#define	sr_ejected			ssr_ejected
1110#define	sr_check_wp			ssr_check_wp
1111#define	sd_watch_request_submit		ssd_watch_request_submit
1112#define	sd_check_media			ssd_check_media
1113#define	sd_media_watch_cb		ssd_media_watch_cb
1114#define	sd_delayed_cv_broadcast		ssd_delayed_cv_broadcast
1115#define	sr_volume_ctrl			ssr_volume_ctrl
1116#define	sr_read_sony_session_offset	ssr_read_sony_session_offset
1117#define	sd_log_page_supported		ssd_log_page_supported
1118#define	sd_check_for_writable_cd	ssd_check_for_writable_cd
1119#define	sd_wm_cache_constructor		ssd_wm_cache_constructor
1120#define	sd_wm_cache_destructor		ssd_wm_cache_destructor
1121#define	sd_range_lock			ssd_range_lock
1122#define	sd_get_range			ssd_get_range
1123#define	sd_free_inlist_wmap		ssd_free_inlist_wmap
1124#define	sd_range_unlock			ssd_range_unlock
1125#define	sd_read_modify_write_task	ssd_read_modify_write_task
1126#define	sddump_do_read_of_rmw		ssddump_do_read_of_rmw
1127
1128#define	sd_iostart_chain		ssd_iostart_chain
1129#define	sd_iodone_chain			ssd_iodone_chain
1130#define	sd_initpkt_map			ssd_initpkt_map
1131#define	sd_destroypkt_map		ssd_destroypkt_map
1132#define	sd_chain_type_map		ssd_chain_type_map
1133#define	sd_chain_index_map		ssd_chain_index_map
1134
1135#define	sd_failfast_flushctl		ssd_failfast_flushctl
1136#define	sd_failfast_flushq		ssd_failfast_flushq
1137#define	sd_failfast_flushq_callback	ssd_failfast_flushq_callback
1138
1139#define	sd_is_lsi			ssd_is_lsi
1140#define	sd_tg_rdwr			ssd_tg_rdwr
1141#define	sd_tg_getinfo			ssd_tg_getinfo
1142#define	sd_rmw_msg_print_handler	ssd_rmw_msg_print_handler
1143
1144#endif	/* #if (defined(__fibre)) */
1145
1146
1147int _init(void);
1148int _fini(void);
1149int _info(struct modinfo *modinfop);
1150
1151/*PRINTFLIKE3*/
1152static void sd_log_trace(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1153/*PRINTFLIKE3*/
1154static void sd_log_info(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1155/*PRINTFLIKE3*/
1156static void sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1157
1158static int sdprobe(dev_info_t *devi);
1159static int sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg,
1160    void **result);
1161static int sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op,
1162    int mod_flags, char *name, caddr_t valuep, int *lengthp);
1163
1164/*
1165 * Smart probe for parallel scsi
1166 */
1167static void sd_scsi_probe_cache_init(void);
1168static void sd_scsi_probe_cache_fini(void);
1169static void sd_scsi_clear_probe_cache(void);
1170static int  sd_scsi_probe_with_cache(struct scsi_device *devp, int (*fn)());
1171
1172/*
1173 * Attached luns on target for parallel scsi
1174 */
1175static void sd_scsi_target_lun_init(void);
1176static void sd_scsi_target_lun_fini(void);
1177static int  sd_scsi_get_target_lun_count(dev_info_t *dip, int target);
1178static void sd_scsi_update_lun_on_target(dev_info_t *dip, int target, int flag);
1179
1180static int	sd_spin_up_unit(sd_ssc_t *ssc);
1181
1182/*
1183 * Using sd_ssc_init to establish sd_ssc_t struct
1184 * Using sd_ssc_send to send uscsi internal command
1185 * Using sd_ssc_fini to free sd_ssc_t struct
1186 */
1187static sd_ssc_t *sd_ssc_init(struct sd_lun *un);
1188static int sd_ssc_send(sd_ssc_t *ssc, struct uscsi_cmd *incmd,
1189    int flag, enum uio_seg dataspace, int path_flag);
1190static void sd_ssc_fini(sd_ssc_t *ssc);
1191
1192/*
1193 * Using sd_ssc_assessment to set correct type-of-assessment
1194 * Using sd_ssc_post to post ereport & system log
1195 *       sd_ssc_post will call sd_ssc_print to print system log
1196 *       sd_ssc_post will call sd_ssd_ereport_post to post ereport
1197 */
1198static void sd_ssc_assessment(sd_ssc_t *ssc,
1199    enum sd_type_assessment tp_assess);
1200
1201static void sd_ssc_post(sd_ssc_t *ssc, enum sd_driver_assessment sd_assess);
1202static void sd_ssc_print(sd_ssc_t *ssc, int sd_severity);
1203static void sd_ssc_ereport_post(sd_ssc_t *ssc,
1204    enum sd_driver_assessment drv_assess);
1205
1206/*
1207 * Using sd_ssc_set_info to mark an un-decodable-data error.
1208 * Using sd_ssc_extract_info to transfer information from internal
1209 *       data structures to sd_ssc_t.
1210 */
1211static void sd_ssc_set_info(sd_ssc_t *ssc, int ssc_flags, uint_t comp,
1212    const char *fmt, ...);
1213static void sd_ssc_extract_info(sd_ssc_t *ssc, struct sd_lun *un,
1214    struct scsi_pkt *pktp, struct buf *bp, struct sd_xbuf *xp);
1215
1216static int sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd, int flag,
1217    enum uio_seg dataspace, int path_flag);
1218
1219#ifdef _LP64
1220static void	sd_enable_descr_sense(sd_ssc_t *ssc);
1221static void	sd_reenable_dsense_task(void *arg);
1222#endif /* _LP64 */
1223
1224static void	sd_set_mmc_caps(sd_ssc_t *ssc);
1225
1226static void sd_read_unit_properties(struct sd_lun *un);
1227static int  sd_process_sdconf_file(struct sd_lun *un);
1228static void sd_nvpair_str_decode(struct sd_lun *un, char *nvpair_str);
1229static char *sd_strtok_r(char *string, const char *sepset, char **lasts);
1230static void sd_set_properties(struct sd_lun *un, char *name, char *value);
1231static void sd_get_tunables_from_conf(struct sd_lun *un, int flags,
1232    int *data_list, sd_tunables *values);
1233static void sd_process_sdconf_table(struct sd_lun *un);
1234static int  sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen);
1235static int  sd_blank_cmp(struct sd_lun *un, char *id, int idlen);
1236static int  sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
1237	int list_len, char *dataname_ptr);
1238static void sd_set_vers1_properties(struct sd_lun *un, int flags,
1239    sd_tunables *prop_list);
1240
1241static void sd_register_devid(sd_ssc_t *ssc, dev_info_t *devi,
1242    int reservation_flag);
1243static int  sd_get_devid(sd_ssc_t *ssc);
1244static ddi_devid_t sd_create_devid(sd_ssc_t *ssc);
1245static int  sd_write_deviceid(sd_ssc_t *ssc);
1246static int  sd_get_devid_page(struct sd_lun *un, uchar_t *wwn, int *len);
1247static int  sd_check_vpd_page_support(sd_ssc_t *ssc);
1248
1249static void sd_setup_pm(sd_ssc_t *ssc, dev_info_t *devi);
1250static void sd_create_pm_components(dev_info_t *devi, struct sd_lun *un);
1251
1252static int  sd_ddi_suspend(dev_info_t *devi);
1253static int  sd_ddi_resume(dev_info_t *devi);
1254static int  sd_pm_state_change(struct sd_lun *un, int level, int flag);
1255static int  sdpower(dev_info_t *devi, int component, int level);
1256
1257static int  sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd);
1258static int  sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd);
1259static int  sd_unit_attach(dev_info_t *devi);
1260static int  sd_unit_detach(dev_info_t *devi);
1261
1262static void sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi);
1263static void sd_create_errstats(struct sd_lun *un, int instance);
1264static void sd_set_errstats(struct sd_lun *un);
1265static void sd_set_pstats(struct sd_lun *un);
1266
1267static int  sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk);
1268static int  sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pkt);
1269static int  sd_send_polled_RQS(struct sd_lun *un);
1270static int  sd_ddi_scsi_poll(struct scsi_pkt *pkt);
1271
1272#if (defined(__fibre))
1273/*
1274 * Event callbacks (photon)
1275 */
1276static void sd_init_event_callbacks(struct sd_lun *un);
1277static void  sd_event_callback(dev_info_t *, ddi_eventcookie_t, void *, void *);
1278#endif
1279
1280/*
1281 * Defines for sd_cache_control
1282 */
1283
1284#define	SD_CACHE_ENABLE		1
1285#define	SD_CACHE_DISABLE	0
1286#define	SD_CACHE_NOCHANGE	-1
1287
1288static int   sd_cache_control(sd_ssc_t *ssc, int rcd_flag, int wce_flag);
1289static int   sd_get_write_cache_enabled(sd_ssc_t *ssc, int *is_enabled);
1290static void  sd_get_nv_sup(sd_ssc_t *ssc);
1291static dev_t sd_make_device(dev_info_t *devi);
1292static void  sd_check_solid_state(sd_ssc_t *ssc);
1293static void  sd_check_emulation_mode(sd_ssc_t *ssc);
1294static void  sd_update_block_info(struct sd_lun *un, uint32_t lbasize,
1295	uint64_t capacity);
1296
1297/*
1298 * Driver entry point functions.
1299 */
1300static int  sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p);
1301static int  sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p);
1302static int  sd_ready_and_valid(sd_ssc_t *ssc, int part);
1303
1304static void sdmin(struct buf *bp);
1305static int sdread(dev_t dev, struct uio *uio, cred_t *cred_p);
1306static int sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p);
1307static int sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1308static int sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1309
1310static int sdstrategy(struct buf *bp);
1311static int sdioctl(dev_t, int, intptr_t, int, cred_t *, int *);
1312
1313/*
1314 * Function prototypes for layering functions in the iostart chain.
1315 */
1316static void sd_mapblockaddr_iostart(int index, struct sd_lun *un,
1317	struct buf *bp);
1318static void sd_mapblocksize_iostart(int index, struct sd_lun *un,
1319	struct buf *bp);
1320static void sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp);
1321static void sd_checksum_uscsi_iostart(int index, struct sd_lun *un,
1322	struct buf *bp);
1323static void sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp);
1324static void sd_core_iostart(int index, struct sd_lun *un, struct buf *bp);
1325
1326/*
1327 * Function prototypes for layering functions in the iodone chain.
1328 */
1329static void sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp);
1330static void sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp);
1331static void sd_mapblockaddr_iodone(int index, struct sd_lun *un,
1332	struct buf *bp);
1333static void sd_mapblocksize_iodone(int index, struct sd_lun *un,
1334	struct buf *bp);
1335static void sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp);
1336static void sd_checksum_uscsi_iodone(int index, struct sd_lun *un,
1337	struct buf *bp);
1338static void sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp);
1339
1340/*
1341 * Prototypes for functions to support buf(9S) based IO.
1342 */
1343static void sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg);
1344static int sd_initpkt_for_buf(struct buf *, struct scsi_pkt **);
1345static void sd_destroypkt_for_buf(struct buf *);
1346static int sd_setup_rw_pkt(struct sd_lun *un, struct scsi_pkt **pktpp,
1347	struct buf *bp, int flags,
1348	int (*callback)(caddr_t), caddr_t callback_arg,
1349	diskaddr_t lba, uint32_t blockcount);
1350static int sd_setup_next_rw_pkt(struct sd_lun *un, struct scsi_pkt *pktp,
1351	struct buf *bp, diskaddr_t lba, uint32_t blockcount);
1352
1353/*
1354 * Prototypes for functions to support USCSI IO.
1355 */
1356static int sd_uscsi_strategy(struct buf *bp);
1357static int sd_initpkt_for_uscsi(struct buf *, struct scsi_pkt **);
1358static void sd_destroypkt_for_uscsi(struct buf *);
1359
1360static void sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
1361	uchar_t chain_type, void *pktinfop);
1362
1363static int  sd_pm_entry(struct sd_lun *un);
1364static void sd_pm_exit(struct sd_lun *un);
1365
1366static void sd_pm_idletimeout_handler(void *arg);
1367
1368/*
1369 * sd_core internal functions (used at the sd_core_io layer).
1370 */
1371static void sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp);
1372static void sdintr(struct scsi_pkt *pktp);
1373static void sd_start_cmds(struct sd_lun *un, struct buf *immed_bp);
1374
1375static int sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd, int flag,
1376	enum uio_seg dataspace, int path_flag);
1377
1378static struct buf *sd_bioclone_alloc(struct buf *bp, size_t datalen,
1379	daddr_t blkno, int (*func)(struct buf *));
1380static struct buf *sd_shadow_buf_alloc(struct buf *bp, size_t datalen,
1381	uint_t bflags, daddr_t blkno, int (*func)(struct buf *));
1382static void sd_bioclone_free(struct buf *bp);
1383static void sd_shadow_buf_free(struct buf *bp);
1384
1385static void sd_print_transport_rejected_message(struct sd_lun *un,
1386	struct sd_xbuf *xp, int code);
1387static void sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp,
1388    void *arg, int code);
1389static void sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp,
1390    void *arg, int code);
1391static void sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp,
1392    void *arg, int code);
1393
1394static void sd_retry_command(struct sd_lun *un, struct buf *bp,
1395	int retry_check_flag,
1396	void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp,
1397		int c),
1398	void *user_arg, int failure_code,  clock_t retry_delay,
1399	void (*statp)(kstat_io_t *));
1400
1401static void sd_set_retry_bp(struct sd_lun *un, struct buf *bp,
1402	clock_t retry_delay, void (*statp)(kstat_io_t *));
1403
1404static void sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
1405	struct scsi_pkt *pktp);
1406static void sd_start_retry_command(void *arg);
1407static void sd_start_direct_priority_command(void *arg);
1408static void sd_return_failed_command(struct sd_lun *un, struct buf *bp,
1409	int errcode);
1410static void sd_return_failed_command_no_restart(struct sd_lun *un,
1411	struct buf *bp, int errcode);
1412static void sd_return_command(struct sd_lun *un, struct buf *bp);
1413static void sd_sync_with_callback(struct sd_lun *un);
1414static int sdrunout(caddr_t arg);
1415
1416static void sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp);
1417static struct buf *sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *xp);
1418
1419static void sd_reduce_throttle(struct sd_lun *un, int throttle_type);
1420static void sd_restore_throttle(void *arg);
1421
1422static void sd_init_cdb_limits(struct sd_lun *un);
1423
1424static void sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
1425	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1426
1427/*
1428 * Error handling functions
1429 */
1430static void sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
1431	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1432static void sd_pkt_status_busy(struct sd_lun *un, struct buf *bp,
1433	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1434static void sd_pkt_status_reservation_conflict(struct sd_lun *un,
1435	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1436static void sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
1437	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1438
1439static void sd_handle_request_sense(struct sd_lun *un, struct buf *bp,
1440	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1441static void sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
1442	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1443static int sd_validate_sense_data(struct sd_lun *un, struct buf *bp,
1444	struct sd_xbuf *xp, size_t actual_len);
1445static void sd_decode_sense(struct sd_lun *un, struct buf *bp,
1446	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1447
1448static void sd_print_sense_msg(struct sd_lun *un, struct buf *bp,
1449	void *arg, int code);
1450
1451static void sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
1452	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1453static void sd_sense_key_recoverable_error(struct sd_lun *un,
1454	uint8_t *sense_datap,
1455	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1456static void sd_sense_key_not_ready(struct sd_lun *un,
1457	uint8_t *sense_datap,
1458	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1459static void sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
1460	uint8_t *sense_datap,
1461	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1462static void sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
1463	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1464static void sd_sense_key_unit_attention(struct sd_lun *un,
1465	uint8_t *sense_datap,
1466	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1467static void sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
1468	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1469static void sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
1470	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1471static void sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
1472	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1473static void sd_sense_key_default(struct sd_lun *un,
1474	uint8_t *sense_datap,
1475	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1476
1477static void sd_print_retry_msg(struct sd_lun *un, struct buf *bp,
1478	void *arg, int flag);
1479
1480static void sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
1481	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1482static void sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
1483	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1484static void sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
1485	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1486static void sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
1487	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1488static void sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
1489	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1490static void sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
1491	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1492static void sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
1493	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1494static void sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
1495	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1496
1497static void sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp);
1498
1499static void sd_start_stop_unit_callback(void *arg);
1500static void sd_start_stop_unit_task(void *arg);
1501
1502static void sd_taskq_create(void);
1503static void sd_taskq_delete(void);
1504static void sd_target_change_task(void *arg);
1505static void sd_log_dev_status_event(struct sd_lun *un, char *esc, int km_flag);
1506static void sd_log_lun_expansion_event(struct sd_lun *un, int km_flag);
1507static void sd_log_eject_request_event(struct sd_lun *un, int km_flag);
1508static void sd_media_change_task(void *arg);
1509
1510static int sd_handle_mchange(struct sd_lun *un);
1511static int sd_send_scsi_DOORLOCK(sd_ssc_t *ssc, int flag, int path_flag);
1512static int sd_send_scsi_READ_CAPACITY(sd_ssc_t *ssc, uint64_t *capp,
1513	uint32_t *lbap, int path_flag);
1514static int sd_send_scsi_READ_CAPACITY_16(sd_ssc_t *ssc, uint64_t *capp,
1515	uint32_t *lbap, uint32_t *psp, int path_flag);
1516static int sd_send_scsi_START_STOP_UNIT(sd_ssc_t *ssc, int pc_flag,
1517	int flag, int path_flag);
1518static int sd_send_scsi_INQUIRY(sd_ssc_t *ssc, uchar_t *bufaddr,
1519	size_t buflen, uchar_t evpd, uchar_t page_code, size_t *residp);
1520static int sd_send_scsi_TEST_UNIT_READY(sd_ssc_t *ssc, int flag);
1521static int sd_send_scsi_PERSISTENT_RESERVE_IN(sd_ssc_t *ssc,
1522	uchar_t usr_cmd, uint16_t data_len, uchar_t *data_bufp);
1523static int sd_send_scsi_PERSISTENT_RESERVE_OUT(sd_ssc_t *ssc,
1524	uchar_t usr_cmd, uchar_t *usr_bufp);
1525static int sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un,
1526	struct dk_callback *dkc);
1527static int sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp);
1528static int sd_send_scsi_GET_CONFIGURATION(sd_ssc_t *ssc,
1529	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1530	uchar_t *bufaddr, uint_t buflen, int path_flag);
1531static int sd_send_scsi_feature_GET_CONFIGURATION(sd_ssc_t *ssc,
1532	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1533	uchar_t *bufaddr, uint_t buflen, char feature, int path_flag);
1534static int sd_send_scsi_MODE_SENSE(sd_ssc_t *ssc, int cdbsize,
1535	uchar_t *bufaddr, size_t buflen, uchar_t page_code, int path_flag);
1536static int sd_send_scsi_MODE_SELECT(sd_ssc_t *ssc, int cdbsize,
1537	uchar_t *bufaddr, size_t buflen, uchar_t save_page, int path_flag);
1538static int sd_send_scsi_RDWR(sd_ssc_t *ssc, uchar_t cmd, void *bufaddr,
1539	size_t buflen, daddr_t start_block, int path_flag);
1540#define	sd_send_scsi_READ(ssc, bufaddr, buflen, start_block, path_flag)	\
1541	sd_send_scsi_RDWR(ssc, SCMD_READ, bufaddr, buflen, start_block, \
1542	path_flag)
1543#define	sd_send_scsi_WRITE(ssc, bufaddr, buflen, start_block, path_flag)\
1544	sd_send_scsi_RDWR(ssc, SCMD_WRITE, bufaddr, buflen, start_block,\
1545	path_flag)
1546
1547static int sd_send_scsi_LOG_SENSE(sd_ssc_t *ssc, uchar_t *bufaddr,
1548	uint16_t buflen, uchar_t page_code, uchar_t page_control,
1549	uint16_t param_ptr, int path_flag);
1550static int sd_send_scsi_GET_EVENT_STATUS_NOTIFICATION(sd_ssc_t *ssc,
1551	uchar_t *bufaddr, size_t buflen, uchar_t class_req);
1552static boolean_t sd_gesn_media_data_valid(uchar_t *data);
1553
1554static int  sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un);
1555static void sd_free_rqs(struct sd_lun *un);
1556
1557static void sd_dump_memory(struct sd_lun *un, uint_t comp, char *title,
1558	uchar_t *data, int len, int fmt);
1559static void sd_panic_for_res_conflict(struct sd_lun *un);
1560
1561/*
1562 * Disk Ioctl Function Prototypes
1563 */
1564static int sd_get_media_info(dev_t dev, caddr_t arg, int flag);
1565static int sd_get_media_info_ext(dev_t dev, caddr_t arg, int flag);
1566static int sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag);
1567static int sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag);
1568
1569/*
1570 * Multi-host Ioctl Prototypes
1571 */
1572static int sd_check_mhd(dev_t dev, int interval);
1573static int sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1574static void sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt);
1575static char *sd_sname(uchar_t status);
1576static void sd_mhd_resvd_recover(void *arg);
1577static void sd_resv_reclaim_thread();
1578static int sd_take_ownership(dev_t dev, struct mhioctkown *p);
1579static int sd_reserve_release(dev_t dev, int cmd);
1580static void sd_rmv_resv_reclaim_req(dev_t dev);
1581static void sd_mhd_reset_notify_cb(caddr_t arg);
1582static int sd_persistent_reservation_in_read_keys(struct sd_lun *un,
1583	mhioc_inkeys_t *usrp, int flag);
1584static int sd_persistent_reservation_in_read_resv(struct sd_lun *un,
1585	mhioc_inresvs_t *usrp, int flag);
1586static int sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag);
1587static int sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag);
1588static int sd_mhdioc_release(dev_t dev);
1589static int sd_mhdioc_register_devid(dev_t dev);
1590static int sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag);
1591static int sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag);
1592
1593/*
1594 * SCSI removable prototypes
1595 */
1596static int sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag);
1597static int sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1598static int sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1599static int sr_pause_resume(dev_t dev, int mode);
1600static int sr_play_msf(dev_t dev, caddr_t data, int flag);
1601static int sr_play_trkind(dev_t dev, caddr_t data, int flag);
1602static int sr_read_all_subcodes(dev_t dev, caddr_t data, int flag);
1603static int sr_read_subchannel(dev_t dev, caddr_t data, int flag);
1604static int sr_read_tocentry(dev_t dev, caddr_t data, int flag);
1605static int sr_read_tochdr(dev_t dev, caddr_t data, int flag);
1606static int sr_read_cdda(dev_t dev, caddr_t data, int flag);
1607static int sr_read_cdxa(dev_t dev, caddr_t data, int flag);
1608static int sr_read_mode1(dev_t dev, caddr_t data, int flag);
1609static int sr_read_mode2(dev_t dev, caddr_t data, int flag);
1610static int sr_read_cd_mode2(dev_t dev, caddr_t data, int flag);
1611static int sr_sector_mode(dev_t dev, uint32_t blksize);
1612static int sr_eject(dev_t dev);
1613static void sr_ejected(register struct sd_lun *un);
1614static int sr_check_wp(dev_t dev);
1615static opaque_t sd_watch_request_submit(struct sd_lun *un);
1616static int sd_check_media(dev_t dev, enum dkio_state state);
1617static int sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1618static void sd_delayed_cv_broadcast(void *arg);
1619static int sr_volume_ctrl(dev_t dev, caddr_t data, int flag);
1620static int sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag);
1621
1622static int sd_log_page_supported(sd_ssc_t *ssc, int log_page);
1623
1624/*
1625 * Function Prototype for the non-512 support (DVDRAM, MO etc.) functions.
1626 */
1627static void sd_check_for_writable_cd(sd_ssc_t *ssc, int path_flag);
1628static int sd_wm_cache_constructor(void *wm, void *un, int flags);
1629static void sd_wm_cache_destructor(void *wm, void *un);
1630static struct sd_w_map *sd_range_lock(struct sd_lun *un, daddr_t startb,
1631	daddr_t endb, ushort_t typ);
1632static struct sd_w_map *sd_get_range(struct sd_lun *un, daddr_t startb,
1633	daddr_t endb);
1634static void sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp);
1635static void sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm);
1636static void sd_read_modify_write_task(void * arg);
1637static int
1638sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
1639	struct buf **bpp);
1640
1641
1642/*
1643 * Function prototypes for failfast support.
1644 */
1645static void sd_failfast_flushq(struct sd_lun *un);
1646static int sd_failfast_flushq_callback(struct buf *bp);
1647
1648/*
1649 * Function prototypes to check for lsi devices
1650 */
1651static void sd_is_lsi(struct sd_lun *un);
1652
1653/*
1654 * Function prototypes for partial DMA support
1655 */
1656static int sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
1657		struct scsi_pkt *pkt, struct sd_xbuf *xp);
1658
1659
1660/* Function prototypes for cmlb */
1661static int sd_tg_rdwr(dev_info_t *devi, uchar_t cmd, void *bufaddr,
1662    diskaddr_t start_block, size_t reqlength, void *tg_cookie);
1663
1664static int sd_tg_getinfo(dev_info_t *devi, int cmd, void *arg, void *tg_cookie);
1665
1666/*
1667 * For printing RMW warning message timely
1668 */
1669static void sd_rmw_msg_print_handler(void *arg);
1670
1671/*
1672 * Constants for failfast support:
1673 *
1674 * SD_FAILFAST_INACTIVE: Instance is currently in a normal state, with NO
1675 * failfast processing being performed.
1676 *
1677 * SD_FAILFAST_ACTIVE: Instance is in the failfast state and is performing
1678 * failfast processing on all bufs with B_FAILFAST set.
1679 */
1680
1681#define	SD_FAILFAST_INACTIVE		0
1682#define	SD_FAILFAST_ACTIVE		1
1683
1684/*
1685 * Bitmask to control behavior of buf(9S) flushes when a transition to
1686 * the failfast state occurs. Optional bits include:
1687 *
1688 * SD_FAILFAST_FLUSH_ALL_BUFS: When set, flush ALL bufs including those that
1689 * do NOT have B_FAILFAST set. When clear, only bufs with B_FAILFAST will
1690 * be flushed.
1691 *
1692 * SD_FAILFAST_FLUSH_ALL_QUEUES: When set, flush any/all other queues in the
1693 * driver, in addition to the regular wait queue. This includes the xbuf
1694 * queues. When clear, only the driver's wait queue will be flushed.
1695 */
1696#define	SD_FAILFAST_FLUSH_ALL_BUFS	0x01
1697#define	SD_FAILFAST_FLUSH_ALL_QUEUES	0x02
1698
1699/*
1700 * The default behavior is to only flush bufs that have B_FAILFAST set, but
1701 * to flush all queues within the driver.
1702 */
1703static int sd_failfast_flushctl = SD_FAILFAST_FLUSH_ALL_QUEUES;
1704
1705
1706/*
1707 * SD Testing Fault Injection
1708 */
1709#ifdef SD_FAULT_INJECTION
1710static void sd_faultinjection_ioctl(int cmd, intptr_t arg, struct sd_lun *un);
1711static void sd_faultinjection(struct scsi_pkt *pktp);
1712static void sd_injection_log(char *buf, struct sd_lun *un);
1713#endif
1714
1715/*
1716 * Device driver ops vector
1717 */
1718static struct cb_ops sd_cb_ops = {
1719	sdopen,			/* open */
1720	sdclose,		/* close */
1721	sdstrategy,		/* strategy */
1722	nodev,			/* print */
1723	sddump,			/* dump */
1724	sdread,			/* read */
1725	sdwrite,		/* write */
1726	sdioctl,		/* ioctl */
1727	nodev,			/* devmap */
1728	nodev,			/* mmap */
1729	nodev,			/* segmap */
1730	nochpoll,		/* poll */
1731	sd_prop_op,		/* cb_prop_op */
1732	0,			/* streamtab  */
1733	D_64BIT | D_MP | D_NEW | D_HOTPLUG, /* Driver compatibility flags */
1734	CB_REV,			/* cb_rev */
1735	sdaread, 		/* async I/O read entry point */
1736	sdawrite		/* async I/O write entry point */
1737};
1738
1739struct dev_ops sd_ops = {
1740	DEVO_REV,		/* devo_rev, */
1741	0,			/* refcnt  */
1742	sdinfo,			/* info */
1743	nulldev,		/* identify */
1744	sdprobe,		/* probe */
1745	sdattach,		/* attach */
1746	sddetach,		/* detach */
1747	nodev,			/* reset */
1748	&sd_cb_ops,		/* driver operations */
1749	NULL,			/* bus operations */
1750	sdpower,		/* power */
1751	ddi_quiesce_not_needed,		/* quiesce */
1752};
1753
1754/*
1755 * This is the loadable module wrapper.
1756 */
1757#include <sys/modctl.h>
1758
1759#ifndef XPV_HVM_DRIVER
1760static struct modldrv modldrv = {
1761	&mod_driverops,		/* Type of module. This one is a driver */
1762	SD_MODULE_NAME,		/* Module name. */
1763	&sd_ops			/* driver ops */
1764};
1765
1766static struct modlinkage modlinkage = {
1767	MODREV_1, &modldrv, NULL
1768};
1769
1770#else /* XPV_HVM_DRIVER */
1771static struct modlmisc modlmisc = {
1772	&mod_miscops,		/* Type of module. This one is a misc */
1773	"HVM " SD_MODULE_NAME,		/* Module name. */
1774};
1775
1776static struct modlinkage modlinkage = {
1777	MODREV_1, &modlmisc, NULL
1778};
1779
1780#endif /* XPV_HVM_DRIVER */
1781
1782static cmlb_tg_ops_t sd_tgops = {
1783	TG_DK_OPS_VERSION_1,
1784	sd_tg_rdwr,
1785	sd_tg_getinfo
1786};
1787
1788static struct scsi_asq_key_strings sd_additional_codes[] = {
1789	0x81, 0, "Logical Unit is Reserved",
1790	0x85, 0, "Audio Address Not Valid",
1791	0xb6, 0, "Media Load Mechanism Failed",
1792	0xB9, 0, "Audio Play Operation Aborted",
1793	0xbf, 0, "Buffer Overflow for Read All Subcodes Command",
1794	0x53, 2, "Medium removal prevented",
1795	0x6f, 0, "Authentication failed during key exchange",
1796	0x6f, 1, "Key not present",
1797	0x6f, 2, "Key not established",
1798	0x6f, 3, "Read without proper authentication",
1799	0x6f, 4, "Mismatched region to this logical unit",
1800	0x6f, 5, "Region reset count error",
1801	0xffff, 0x0, NULL
1802};
1803
1804
1805/*
1806 * Struct for passing printing information for sense data messages
1807 */
1808struct sd_sense_info {
1809	int	ssi_severity;
1810	int	ssi_pfa_flag;
1811};
1812
1813/*
1814 * Table of function pointers for iostart-side routines. Separate "chains"
1815 * of layered function calls are formed by placing the function pointers
1816 * sequentially in the desired order. Functions are called according to an
1817 * incrementing table index ordering. The last function in each chain must
1818 * be sd_core_iostart(). The corresponding iodone-side routines are expected
1819 * in the sd_iodone_chain[] array.
1820 *
1821 * Note: It may seem more natural to organize both the iostart and iodone
1822 * functions together, into an array of structures (or some similar
1823 * organization) with a common index, rather than two separate arrays which
1824 * must be maintained in synchronization. The purpose of this division is
1825 * to achieve improved performance: individual arrays allows for more
1826 * effective cache line utilization on certain platforms.
1827 */
1828
1829typedef void (*sd_chain_t)(int index, struct sd_lun *un, struct buf *bp);
1830
1831
1832static sd_chain_t sd_iostart_chain[] = {
1833
1834	/* Chain for buf IO for disk drive targets (PM enabled) */
1835	sd_mapblockaddr_iostart,	/* Index: 0 */
1836	sd_pm_iostart,			/* Index: 1 */
1837	sd_core_iostart,		/* Index: 2 */
1838
1839	/* Chain for buf IO for disk drive targets (PM disabled) */
1840	sd_mapblockaddr_iostart,	/* Index: 3 */
1841	sd_core_iostart,		/* Index: 4 */
1842
1843	/*
1844	 * Chain for buf IO for removable-media or large sector size
1845	 * disk drive targets with RMW needed (PM enabled)
1846	 */
1847	sd_mapblockaddr_iostart,	/* Index: 5 */
1848	sd_mapblocksize_iostart,	/* Index: 6 */
1849	sd_pm_iostart,			/* Index: 7 */
1850	sd_core_iostart,		/* Index: 8 */
1851
1852	/*
1853	 * Chain for buf IO for removable-media or large sector size
1854	 * disk drive targets with RMW needed (PM disabled)
1855	 */
1856	sd_mapblockaddr_iostart,	/* Index: 9 */
1857	sd_mapblocksize_iostart,	/* Index: 10 */
1858	sd_core_iostart,		/* Index: 11 */
1859
1860	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1861	sd_mapblockaddr_iostart,	/* Index: 12 */
1862	sd_checksum_iostart,		/* Index: 13 */
1863	sd_pm_iostart,			/* Index: 14 */
1864	sd_core_iostart,		/* Index: 15 */
1865
1866	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1867	sd_mapblockaddr_iostart,	/* Index: 16 */
1868	sd_checksum_iostart,		/* Index: 17 */
1869	sd_core_iostart,		/* Index: 18 */
1870
1871	/* Chain for USCSI commands (all targets) */
1872	sd_pm_iostart,			/* Index: 19 */
1873	sd_core_iostart,		/* Index: 20 */
1874
1875	/* Chain for checksumming USCSI commands (all targets) */
1876	sd_checksum_uscsi_iostart,	/* Index: 21 */
1877	sd_pm_iostart,			/* Index: 22 */
1878	sd_core_iostart,		/* Index: 23 */
1879
1880	/* Chain for "direct" USCSI commands (all targets) */
1881	sd_core_iostart,		/* Index: 24 */
1882
1883	/* Chain for "direct priority" USCSI commands (all targets) */
1884	sd_core_iostart,		/* Index: 25 */
1885
1886	/*
1887	 * Chain for buf IO for large sector size disk drive targets
1888	 * with RMW needed with checksumming (PM enabled)
1889	 */
1890	sd_mapblockaddr_iostart,	/* Index: 26 */
1891	sd_mapblocksize_iostart,	/* Index: 27 */
1892	sd_checksum_iostart,		/* Index: 28 */
1893	sd_pm_iostart,			/* Index: 29 */
1894	sd_core_iostart,		/* Index: 30 */
1895
1896	/*
1897	 * Chain for buf IO for large sector size disk drive targets
1898	 * with RMW needed with checksumming (PM disabled)
1899	 */
1900	sd_mapblockaddr_iostart,	/* Index: 31 */
1901	sd_mapblocksize_iostart,	/* Index: 32 */
1902	sd_checksum_iostart,		/* Index: 33 */
1903	sd_core_iostart,		/* Index: 34 */
1904
1905};
1906
1907/*
1908 * Macros to locate the first function of each iostart chain in the
1909 * sd_iostart_chain[] array. These are located by the index in the array.
1910 */
1911#define	SD_CHAIN_DISK_IOSTART			0
1912#define	SD_CHAIN_DISK_IOSTART_NO_PM		3
1913#define	SD_CHAIN_MSS_DISK_IOSTART		5
1914#define	SD_CHAIN_RMMEDIA_IOSTART		5
1915#define	SD_CHAIN_MSS_DISK_IOSTART_NO_PM		9
1916#define	SD_CHAIN_RMMEDIA_IOSTART_NO_PM		9
1917#define	SD_CHAIN_CHKSUM_IOSTART			12
1918#define	SD_CHAIN_CHKSUM_IOSTART_NO_PM		16
1919#define	SD_CHAIN_USCSI_CMD_IOSTART		19
1920#define	SD_CHAIN_USCSI_CHKSUM_IOSTART		21
1921#define	SD_CHAIN_DIRECT_CMD_IOSTART		24
1922#define	SD_CHAIN_PRIORITY_CMD_IOSTART		25
1923#define	SD_CHAIN_MSS_CHKSUM_IOSTART		26
1924#define	SD_CHAIN_MSS_CHKSUM_IOSTART_NO_PM	31
1925
1926
1927/*
1928 * Table of function pointers for the iodone-side routines for the driver-
1929 * internal layering mechanism.  The calling sequence for iodone routines
1930 * uses a decrementing table index, so the last routine called in a chain
1931 * must be at the lowest array index location for that chain.  The last
1932 * routine for each chain must be either sd_buf_iodone() (for buf(9S) IOs)
1933 * or sd_uscsi_iodone() (for uscsi IOs).  Other than this, the ordering
1934 * of the functions in an iodone side chain must correspond to the ordering
1935 * of the iostart routines for that chain.  Note that there is no iodone
1936 * side routine that corresponds to sd_core_iostart(), so there is no
1937 * entry in the table for this.
1938 */
1939
1940static sd_chain_t sd_iodone_chain[] = {
1941
1942	/* Chain for buf IO for disk drive targets (PM enabled) */
1943	sd_buf_iodone,			/* Index: 0 */
1944	sd_mapblockaddr_iodone,		/* Index: 1 */
1945	sd_pm_iodone,			/* Index: 2 */
1946
1947	/* Chain for buf IO for disk drive targets (PM disabled) */
1948	sd_buf_iodone,			/* Index: 3 */
1949	sd_mapblockaddr_iodone,		/* Index: 4 */
1950
1951	/*
1952	 * Chain for buf IO for removable-media or large sector size
1953	 * disk drive targets with RMW needed (PM enabled)
1954	 */
1955	sd_buf_iodone,			/* Index: 5 */
1956	sd_mapblockaddr_iodone,		/* Index: 6 */
1957	sd_mapblocksize_iodone,		/* Index: 7 */
1958	sd_pm_iodone,			/* Index: 8 */
1959
1960	/*
1961	 * Chain for buf IO for removable-media or large sector size
1962	 * disk drive targets with RMW needed (PM disabled)
1963	 */
1964	sd_buf_iodone,			/* Index: 9 */
1965	sd_mapblockaddr_iodone,		/* Index: 10 */
1966	sd_mapblocksize_iodone,		/* Index: 11 */
1967
1968	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1969	sd_buf_iodone,			/* Index: 12 */
1970	sd_mapblockaddr_iodone,		/* Index: 13 */
1971	sd_checksum_iodone,		/* Index: 14 */
1972	sd_pm_iodone,			/* Index: 15 */
1973
1974	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1975	sd_buf_iodone,			/* Index: 16 */
1976	sd_mapblockaddr_iodone,		/* Index: 17 */
1977	sd_checksum_iodone,		/* Index: 18 */
1978
1979	/* Chain for USCSI commands (non-checksum targets) */
1980	sd_uscsi_iodone,		/* Index: 19 */
1981	sd_pm_iodone,			/* Index: 20 */
1982
1983	/* Chain for USCSI commands (checksum targets) */
1984	sd_uscsi_iodone,		/* Index: 21 */
1985	sd_checksum_uscsi_iodone,	/* Index: 22 */
1986	sd_pm_iodone,			/* Index: 22 */
1987
1988	/* Chain for "direct" USCSI commands (all targets) */
1989	sd_uscsi_iodone,		/* Index: 24 */
1990
1991	/* Chain for "direct priority" USCSI commands (all targets) */
1992	sd_uscsi_iodone,		/* Index: 25 */
1993
1994	/*
1995	 * Chain for buf IO for large sector size disk drive targets
1996	 * with checksumming (PM enabled)
1997	 */
1998	sd_buf_iodone,			/* Index: 26 */
1999	sd_mapblockaddr_iodone,		/* Index: 27 */
2000	sd_mapblocksize_iodone,		/* Index: 28 */
2001	sd_checksum_iodone,		/* Index: 29 */
2002	sd_pm_iodone,			/* Index: 30 */
2003
2004	/*
2005	 * Chain for buf IO for large sector size disk drive targets
2006	 * with checksumming (PM disabled)
2007	 */
2008	sd_buf_iodone,			/* Index: 31 */
2009	sd_mapblockaddr_iodone,		/* Index: 32 */
2010	sd_mapblocksize_iodone,		/* Index: 33 */
2011	sd_checksum_iodone,		/* Index: 34 */
2012};
2013
2014
2015/*
2016 * Macros to locate the "first" function in the sd_iodone_chain[] array for
2017 * each iodone-side chain. These are located by the array index, but as the
2018 * iodone side functions are called in a decrementing-index order, the
2019 * highest index number in each chain must be specified (as these correspond
2020 * to the first function in the iodone chain that will be called by the core
2021 * at IO completion time).
2022 */
2023
2024#define	SD_CHAIN_DISK_IODONE			2
2025#define	SD_CHAIN_DISK_IODONE_NO_PM		4
2026#define	SD_CHAIN_RMMEDIA_IODONE			8
2027#define	SD_CHAIN_MSS_DISK_IODONE		8
2028#define	SD_CHAIN_RMMEDIA_IODONE_NO_PM		11
2029#define	SD_CHAIN_MSS_DISK_IODONE_NO_PM		11
2030#define	SD_CHAIN_CHKSUM_IODONE			15
2031#define	SD_CHAIN_CHKSUM_IODONE_NO_PM		18
2032#define	SD_CHAIN_USCSI_CMD_IODONE		20
2033#define	SD_CHAIN_USCSI_CHKSUM_IODONE		22
2034#define	SD_CHAIN_DIRECT_CMD_IODONE		24
2035#define	SD_CHAIN_PRIORITY_CMD_IODONE		25
2036#define	SD_CHAIN_MSS_CHKSUM_IODONE		30
2037#define	SD_CHAIN_MSS_CHKSUM_IODONE_NO_PM	34
2038
2039
2040
2041/*
2042 * Array to map a layering chain index to the appropriate initpkt routine.
2043 * The redundant entries are present so that the index used for accessing
2044 * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
2045 * with this table as well.
2046 */
2047typedef int (*sd_initpkt_t)(struct buf *, struct scsi_pkt **);
2048
2049static sd_initpkt_t	sd_initpkt_map[] = {
2050
2051	/* Chain for buf IO for disk drive targets (PM enabled) */
2052	sd_initpkt_for_buf,		/* Index: 0 */
2053	sd_initpkt_for_buf,		/* Index: 1 */
2054	sd_initpkt_for_buf,		/* Index: 2 */
2055
2056	/* Chain for buf IO for disk drive targets (PM disabled) */
2057	sd_initpkt_for_buf,		/* Index: 3 */
2058	sd_initpkt_for_buf,		/* Index: 4 */
2059
2060	/*
2061	 * Chain for buf IO for removable-media or large sector size
2062	 * disk drive targets (PM enabled)
2063	 */
2064	sd_initpkt_for_buf,		/* Index: 5 */
2065	sd_initpkt_for_buf,		/* Index: 6 */
2066	sd_initpkt_for_buf,		/* Index: 7 */
2067	sd_initpkt_for_buf,		/* Index: 8 */
2068
2069	/*
2070	 * Chain for buf IO for removable-media or large sector size
2071	 * disk drive targets (PM disabled)
2072	 */
2073	sd_initpkt_for_buf,		/* Index: 9 */
2074	sd_initpkt_for_buf,		/* Index: 10 */
2075	sd_initpkt_for_buf,		/* Index: 11 */
2076
2077	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
2078	sd_initpkt_for_buf,		/* Index: 12 */
2079	sd_initpkt_for_buf,		/* Index: 13 */
2080	sd_initpkt_for_buf,		/* Index: 14 */
2081	sd_initpkt_for_buf,		/* Index: 15 */
2082
2083	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
2084	sd_initpkt_for_buf,		/* Index: 16 */
2085	sd_initpkt_for_buf,		/* Index: 17 */
2086	sd_initpkt_for_buf,		/* Index: 18 */
2087
2088	/* Chain for USCSI commands (non-checksum targets) */
2089	sd_initpkt_for_uscsi,		/* Index: 19 */
2090	sd_initpkt_for_uscsi,		/* Index: 20 */
2091
2092	/* Chain for USCSI commands (checksum targets) */
2093	sd_initpkt_for_uscsi,		/* Index: 21 */
2094	sd_initpkt_for_uscsi,		/* Index: 22 */
2095	sd_initpkt_for_uscsi,		/* Index: 22 */
2096
2097	/* Chain for "direct" USCSI commands (all targets) */
2098	sd_initpkt_for_uscsi,		/* Index: 24 */
2099
2100	/* Chain for "direct priority" USCSI commands (all targets) */
2101	sd_initpkt_for_uscsi,		/* Index: 25 */
2102
2103	/*
2104	 * Chain for buf IO for large sector size disk drive targets
2105	 * with checksumming (PM enabled)
2106	 */
2107	sd_initpkt_for_buf,		/* Index: 26 */
2108	sd_initpkt_for_buf,		/* Index: 27 */
2109	sd_initpkt_for_buf,		/* Index: 28 */
2110	sd_initpkt_for_buf,		/* Index: 29 */
2111	sd_initpkt_for_buf,		/* Index: 30 */
2112
2113	/*
2114	 * Chain for buf IO for large sector size disk drive targets
2115	 * with checksumming (PM disabled)
2116	 */
2117	sd_initpkt_for_buf,		/* Index: 31 */
2118	sd_initpkt_for_buf,		/* Index: 32 */
2119	sd_initpkt_for_buf,		/* Index: 33 */
2120	sd_initpkt_for_buf,		/* Index: 34 */
2121};
2122
2123
2124/*
2125 * Array to map a layering chain index to the appropriate destroypktpkt routine.
2126 * The redundant entries are present so that the index used for accessing
2127 * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
2128 * with this table as well.
2129 */
2130typedef void (*sd_destroypkt_t)(struct buf *);
2131
2132static sd_destroypkt_t	sd_destroypkt_map[] = {
2133
2134	/* Chain for buf IO for disk drive targets (PM enabled) */
2135	sd_destroypkt_for_buf,		/* Index: 0 */
2136	sd_destroypkt_for_buf,		/* Index: 1 */
2137	sd_destroypkt_for_buf,		/* Index: 2 */
2138
2139	/* Chain for buf IO for disk drive targets (PM disabled) */
2140	sd_destroypkt_for_buf,		/* Index: 3 */
2141	sd_destroypkt_for_buf,		/* Index: 4 */
2142
2143	/*
2144	 * Chain for buf IO for removable-media or large sector size
2145	 * disk drive targets (PM enabled)
2146	 */
2147	sd_destroypkt_for_buf,		/* Index: 5 */
2148	sd_destroypkt_for_buf,		/* Index: 6 */
2149	sd_destroypkt_for_buf,		/* Index: 7 */
2150	sd_destroypkt_for_buf,		/* Index: 8 */
2151
2152	/*
2153	 * Chain for buf IO for removable-media or large sector size
2154	 * disk drive targets (PM disabled)
2155	 */
2156	sd_destroypkt_for_buf,		/* Index: 9 */
2157	sd_destroypkt_for_buf,		/* Index: 10 */
2158	sd_destroypkt_for_buf,		/* Index: 11 */
2159
2160	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
2161	sd_destroypkt_for_buf,		/* Index: 12 */
2162	sd_destroypkt_for_buf,		/* Index: 13 */
2163	sd_destroypkt_for_buf,		/* Index: 14 */
2164	sd_destroypkt_for_buf,		/* Index: 15 */
2165
2166	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
2167	sd_destroypkt_for_buf,		/* Index: 16 */
2168	sd_destroypkt_for_buf,		/* Index: 17 */
2169	sd_destroypkt_for_buf,		/* Index: 18 */
2170
2171	/* Chain for USCSI commands (non-checksum targets) */
2172	sd_destroypkt_for_uscsi,	/* Index: 19 */
2173	sd_destroypkt_for_uscsi,	/* Index: 20 */
2174
2175	/* Chain for USCSI commands (checksum targets) */
2176	sd_destroypkt_for_uscsi,	/* Index: 21 */
2177	sd_destroypkt_for_uscsi,	/* Index: 22 */
2178	sd_destroypkt_for_uscsi,	/* Index: 22 */
2179
2180	/* Chain for "direct" USCSI commands (all targets) */
2181	sd_destroypkt_for_uscsi,	/* Index: 24 */
2182
2183	/* Chain for "direct priority" USCSI commands (all targets) */
2184	sd_destroypkt_for_uscsi,	/* Index: 25 */
2185
2186	/*
2187	 * Chain for buf IO for large sector size disk drive targets
2188	 * with checksumming (PM disabled)
2189	 */
2190	sd_destroypkt_for_buf,		/* Index: 26 */
2191	sd_destroypkt_for_buf,		/* Index: 27 */
2192	sd_destroypkt_for_buf,		/* Index: 28 */
2193	sd_destroypkt_for_buf,		/* Index: 29 */
2194	sd_destroypkt_for_buf,		/* Index: 30 */
2195
2196	/*
2197	 * Chain for buf IO for large sector size disk drive targets
2198	 * with checksumming (PM enabled)
2199	 */
2200	sd_destroypkt_for_buf,		/* Index: 31 */
2201	sd_destroypkt_for_buf,		/* Index: 32 */
2202	sd_destroypkt_for_buf,		/* Index: 33 */
2203	sd_destroypkt_for_buf,		/* Index: 34 */
2204};
2205
2206
2207
2208/*
2209 * Array to map a layering chain index to the appropriate chain "type".
2210 * The chain type indicates a specific property/usage of the chain.
2211 * The redundant entries are present so that the index used for accessing
2212 * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
2213 * with this table as well.
2214 */
2215
2216#define	SD_CHAIN_NULL			0	/* for the special RQS cmd */
2217#define	SD_CHAIN_BUFIO			1	/* regular buf IO */
2218#define	SD_CHAIN_USCSI			2	/* regular USCSI commands */
2219#define	SD_CHAIN_DIRECT			3	/* uscsi, w/ bypass power mgt */
2220#define	SD_CHAIN_DIRECT_PRIORITY	4	/* uscsi, w/ bypass power mgt */
2221						/* (for error recovery) */
2222
2223static int sd_chain_type_map[] = {
2224
2225	/* Chain for buf IO for disk drive targets (PM enabled) */
2226	SD_CHAIN_BUFIO,			/* Index: 0 */
2227	SD_CHAIN_BUFIO,			/* Index: 1 */
2228	SD_CHAIN_BUFIO,			/* Index: 2 */
2229
2230	/* Chain for buf IO for disk drive targets (PM disabled) */
2231	SD_CHAIN_BUFIO,			/* Index: 3 */
2232	SD_CHAIN_BUFIO,			/* Index: 4 */
2233
2234	/*
2235	 * Chain for buf IO for removable-media or large sector size
2236	 * disk drive targets (PM enabled)
2237	 */
2238	SD_CHAIN_BUFIO,			/* Index: 5 */
2239	SD_CHAIN_BUFIO,			/* Index: 6 */
2240	SD_CHAIN_BUFIO,			/* Index: 7 */
2241	SD_CHAIN_BUFIO,			/* Index: 8 */
2242
2243	/*
2244	 * Chain for buf IO for removable-media or large sector size
2245	 * disk drive targets (PM disabled)
2246	 */
2247	SD_CHAIN_BUFIO,			/* Index: 9 */
2248	SD_CHAIN_BUFIO,			/* Index: 10 */
2249	SD_CHAIN_BUFIO,			/* Index: 11 */
2250
2251	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
2252	SD_CHAIN_BUFIO,			/* Index: 12 */
2253	SD_CHAIN_BUFIO,			/* Index: 13 */
2254	SD_CHAIN_BUFIO,			/* Index: 14 */
2255	SD_CHAIN_BUFIO,			/* Index: 15 */
2256
2257	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
2258	SD_CHAIN_BUFIO,			/* Index: 16 */
2259	SD_CHAIN_BUFIO,			/* Index: 17 */
2260	SD_CHAIN_BUFIO,			/* Index: 18 */
2261
2262	/* Chain for USCSI commands (non-checksum targets) */
2263	SD_CHAIN_USCSI,			/* Index: 19 */
2264	SD_CHAIN_USCSI,			/* Index: 20 */
2265
2266	/* Chain for USCSI commands (checksum targets) */
2267	SD_CHAIN_USCSI,			/* Index: 21 */
2268	SD_CHAIN_USCSI,			/* Index: 22 */
2269	SD_CHAIN_USCSI,			/* Index: 23 */
2270
2271	/* Chain for "direct" USCSI commands (all targets) */
2272	SD_CHAIN_DIRECT,		/* Index: 24 */
2273
2274	/* Chain for "direct priority" USCSI commands (all targets) */
2275	SD_CHAIN_DIRECT_PRIORITY,	/* Index: 25 */
2276
2277	/*
2278	 * Chain for buf IO for large sector size disk drive targets
2279	 * with checksumming (PM enabled)
2280	 */
2281	SD_CHAIN_BUFIO,			/* Index: 26 */
2282	SD_CHAIN_BUFIO,			/* Index: 27 */
2283	SD_CHAIN_BUFIO,			/* Index: 28 */
2284	SD_CHAIN_BUFIO,			/* Index: 29 */
2285	SD_CHAIN_BUFIO,			/* Index: 30 */
2286
2287	/*
2288	 * Chain for buf IO for large sector size disk drive targets
2289	 * with checksumming (PM disabled)
2290	 */
2291	SD_CHAIN_BUFIO,			/* Index: 31 */
2292	SD_CHAIN_BUFIO,			/* Index: 32 */
2293	SD_CHAIN_BUFIO,			/* Index: 33 */
2294	SD_CHAIN_BUFIO,			/* Index: 34 */
2295};
2296
2297
2298/* Macro to return TRUE if the IO has come from the sd_buf_iostart() chain. */
2299#define	SD_IS_BUFIO(xp)			\
2300	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_BUFIO)
2301
2302/* Macro to return TRUE if the IO has come from the "direct priority" chain. */
2303#define	SD_IS_DIRECT_PRIORITY(xp)	\
2304	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_DIRECT_PRIORITY)
2305
2306
2307
2308/*
2309 * Struct, array, and macros to map a specific chain to the appropriate
2310 * layering indexes in the sd_iostart_chain[] and sd_iodone_chain[] arrays.
2311 *
2312 * The sd_chain_index_map[] array is used at attach time to set the various
2313 * un_xxx_chain type members of the sd_lun softstate to the specific layering
2314 * chain to be used with the instance. This allows different instances to use
2315 * different chain for buf IO, uscsi IO, etc.. Also, since the xb_chain_iostart
2316 * and xb_chain_iodone index values in the sd_xbuf are initialized to these
2317 * values at sd_xbuf init time, this allows (1) layering chains may be changed
2318 * dynamically & without the use of locking; and (2) a layer may update the
2319 * xb_chain_io[start|done] member in a given xbuf with its current index value,
2320 * to allow for deferred processing of an IO within the same chain from a
2321 * different execution context.
2322 */
2323
2324struct sd_chain_index {
2325	int	sci_iostart_index;
2326	int	sci_iodone_index;
2327};
2328
2329static struct sd_chain_index	sd_chain_index_map[] = {
2330	{ SD_CHAIN_DISK_IOSTART,		SD_CHAIN_DISK_IODONE },
2331	{ SD_CHAIN_DISK_IOSTART_NO_PM,		SD_CHAIN_DISK_IODONE_NO_PM },
2332	{ SD_CHAIN_RMMEDIA_IOSTART,		SD_CHAIN_RMMEDIA_IODONE },
2333	{ SD_CHAIN_RMMEDIA_IOSTART_NO_PM,	SD_CHAIN_RMMEDIA_IODONE_NO_PM },
2334	{ SD_CHAIN_CHKSUM_IOSTART,		SD_CHAIN_CHKSUM_IODONE },
2335	{ SD_CHAIN_CHKSUM_IOSTART_NO_PM,	SD_CHAIN_CHKSUM_IODONE_NO_PM },
2336	{ SD_CHAIN_USCSI_CMD_IOSTART,		SD_CHAIN_USCSI_CMD_IODONE },
2337	{ SD_CHAIN_USCSI_CHKSUM_IOSTART,	SD_CHAIN_USCSI_CHKSUM_IODONE },
2338	{ SD_CHAIN_DIRECT_CMD_IOSTART,		SD_CHAIN_DIRECT_CMD_IODONE },
2339	{ SD_CHAIN_PRIORITY_CMD_IOSTART,	SD_CHAIN_PRIORITY_CMD_IODONE },
2340	{ SD_CHAIN_MSS_CHKSUM_IOSTART,		SD_CHAIN_MSS_CHKSUM_IODONE },
2341	{ SD_CHAIN_MSS_CHKSUM_IOSTART_NO_PM, SD_CHAIN_MSS_CHKSUM_IODONE_NO_PM },
2342
2343};
2344
2345
2346/*
2347 * The following are indexes into the sd_chain_index_map[] array.
2348 */
2349
2350/* un->un_buf_chain_type must be set to one of these */
2351#define	SD_CHAIN_INFO_DISK		0
2352#define	SD_CHAIN_INFO_DISK_NO_PM	1
2353#define	SD_CHAIN_INFO_RMMEDIA		2
2354#define	SD_CHAIN_INFO_MSS_DISK		2
2355#define	SD_CHAIN_INFO_RMMEDIA_NO_PM	3
2356#define	SD_CHAIN_INFO_MSS_DSK_NO_PM	3
2357#define	SD_CHAIN_INFO_CHKSUM		4
2358#define	SD_CHAIN_INFO_CHKSUM_NO_PM	5
2359#define	SD_CHAIN_INFO_MSS_DISK_CHKSUM	10
2360#define	SD_CHAIN_INFO_MSS_DISK_CHKSUM_NO_PM	11
2361
2362/* un->un_uscsi_chain_type must be set to one of these */
2363#define	SD_CHAIN_INFO_USCSI_CMD		6
2364/* USCSI with PM disabled is the same as DIRECT */
2365#define	SD_CHAIN_INFO_USCSI_CMD_NO_PM	8
2366#define	SD_CHAIN_INFO_USCSI_CHKSUM	7
2367
2368/* un->un_direct_chain_type must be set to one of these */
2369#define	SD_CHAIN_INFO_DIRECT_CMD	8
2370
2371/* un->un_priority_chain_type must be set to one of these */
2372#define	SD_CHAIN_INFO_PRIORITY_CMD	9
2373
2374/* size for devid inquiries */
2375#define	MAX_INQUIRY_SIZE		0xF0
2376
2377/*
2378 * Macros used by functions to pass a given buf(9S) struct along to the
2379 * next function in the layering chain for further processing.
2380 *
2381 * In the following macros, passing more than three arguments to the called
2382 * routines causes the optimizer for the SPARC compiler to stop doing tail
2383 * call elimination which results in significant performance degradation.
2384 */
2385#define	SD_BEGIN_IOSTART(index, un, bp)	\
2386	((*(sd_iostart_chain[index]))(index, un, bp))
2387
2388#define	SD_BEGIN_IODONE(index, un, bp)	\
2389	((*(sd_iodone_chain[index]))(index, un, bp))
2390
2391#define	SD_NEXT_IOSTART(index, un, bp)				\
2392	((*(sd_iostart_chain[(index) + 1]))((index) + 1, un, bp))
2393
2394#define	SD_NEXT_IODONE(index, un, bp)				\
2395	((*(sd_iodone_chain[(index) - 1]))((index) - 1, un, bp))
2396
2397/*
2398 *    Function: _init
2399 *
2400 * Description: This is the driver _init(9E) entry point.
2401 *
2402 * Return Code: Returns the value from mod_install(9F) or
2403 *		ddi_soft_state_init(9F) as appropriate.
2404 *
2405 *     Context: Called when driver module loaded.
2406 */
2407
2408int
2409_init(void)
2410{
2411	int	err;
2412
2413	/* establish driver name from module name */
2414	sd_label = (char *)mod_modname(&modlinkage);
2415
2416#ifndef XPV_HVM_DRIVER
2417	err = ddi_soft_state_init(&sd_state, sizeof (struct sd_lun),
2418	    SD_MAXUNIT);
2419	if (err != 0) {
2420		return (err);
2421	}
2422
2423#else /* XPV_HVM_DRIVER */
2424	/* Remove the leading "hvm_" from the module name */
2425	ASSERT(strncmp(sd_label, "hvm_", strlen("hvm_")) == 0);
2426	sd_label += strlen("hvm_");
2427
2428#endif /* XPV_HVM_DRIVER */
2429
2430	mutex_init(&sd_detach_mutex, NULL, MUTEX_DRIVER, NULL);
2431	mutex_init(&sd_log_mutex,    NULL, MUTEX_DRIVER, NULL);
2432	mutex_init(&sd_label_mutex,  NULL, MUTEX_DRIVER, NULL);
2433
2434	mutex_init(&sd_tr.srq_resv_reclaim_mutex, NULL, MUTEX_DRIVER, NULL);
2435	cv_init(&sd_tr.srq_resv_reclaim_cv, NULL, CV_DRIVER, NULL);
2436	cv_init(&sd_tr.srq_inprocess_cv, NULL, CV_DRIVER, NULL);
2437
2438	/*
2439	 * it's ok to init here even for fibre device
2440	 */
2441	sd_scsi_probe_cache_init();
2442
2443	sd_scsi_target_lun_init();
2444
2445	/*
2446	 * Creating taskq before mod_install ensures that all callers (threads)
2447	 * that enter the module after a successful mod_install encounter
2448	 * a valid taskq.
2449	 */
2450	sd_taskq_create();
2451
2452	err = mod_install(&modlinkage);
2453	if (err != 0) {
2454		/* delete taskq if install fails */
2455		sd_taskq_delete();
2456
2457		mutex_destroy(&sd_detach_mutex);
2458		mutex_destroy(&sd_log_mutex);
2459		mutex_destroy(&sd_label_mutex);
2460
2461		mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2462		cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2463		cv_destroy(&sd_tr.srq_inprocess_cv);
2464
2465		sd_scsi_probe_cache_fini();
2466
2467		sd_scsi_target_lun_fini();
2468
2469#ifndef XPV_HVM_DRIVER
2470		ddi_soft_state_fini(&sd_state);
2471#endif /* !XPV_HVM_DRIVER */
2472		return (err);
2473	}
2474
2475	return (err);
2476}
2477
2478
2479/*
2480 *    Function: _fini
2481 *
2482 * Description: This is the driver _fini(9E) entry point.
2483 *
2484 * Return Code: Returns the value from mod_remove(9F)
2485 *
2486 *     Context: Called when driver module is unloaded.
2487 */
2488
2489int
2490_fini(void)
2491{
2492	int err;
2493
2494	if ((err = mod_remove(&modlinkage)) != 0) {
2495		return (err);
2496	}
2497
2498	sd_taskq_delete();
2499
2500	mutex_destroy(&sd_detach_mutex);
2501	mutex_destroy(&sd_log_mutex);
2502	mutex_destroy(&sd_label_mutex);
2503	mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2504
2505	sd_scsi_probe_cache_fini();
2506
2507	sd_scsi_target_lun_fini();
2508
2509	cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2510	cv_destroy(&sd_tr.srq_inprocess_cv);
2511
2512#ifndef XPV_HVM_DRIVER
2513	ddi_soft_state_fini(&sd_state);
2514#endif /* !XPV_HVM_DRIVER */
2515
2516	return (err);
2517}
2518
2519
2520/*
2521 *    Function: _info
2522 *
2523 * Description: This is the driver _info(9E) entry point.
2524 *
2525 *   Arguments: modinfop - pointer to the driver modinfo structure
2526 *
2527 * Return Code: Returns the value from mod_info(9F).
2528 *
2529 *     Context: Kernel thread context
2530 */
2531
2532int
2533_info(struct modinfo *modinfop)
2534{
2535	return (mod_info(&modlinkage, modinfop));
2536}
2537
2538
2539/*
2540 * The following routines implement the driver message logging facility.
2541 * They provide component- and level- based debug output filtering.
2542 * Output may also be restricted to messages for a single instance by
2543 * specifying a soft state pointer in sd_debug_un. If sd_debug_un is set
2544 * to NULL, then messages for all instances are printed.
2545 *
2546 * These routines have been cloned from each other due to the language
2547 * constraints of macros and variable argument list processing.
2548 */
2549
2550
2551/*
2552 *    Function: sd_log_err
2553 *
2554 * Description: This routine is called by the SD_ERROR macro for debug
2555 *		logging of error conditions.
2556 *
2557 *   Arguments: comp - driver component being logged
2558 *		dev  - pointer to driver info structure
2559 *		fmt  - error string and format to be logged
2560 */
2561
2562static void
2563sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...)
2564{
2565	va_list		ap;
2566	dev_info_t	*dev;
2567
2568	ASSERT(un != NULL);
2569	dev = SD_DEVINFO(un);
2570	ASSERT(dev != NULL);
2571
2572	/*
2573	 * Filter messages based on the global component and level masks.
2574	 * Also print if un matches the value of sd_debug_un, or if
2575	 * sd_debug_un is set to NULL.
2576	 */
2577	if ((sd_component_mask & comp) && (sd_level_mask & SD_LOGMASK_ERROR) &&
2578	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2579		mutex_enter(&sd_log_mutex);
2580		va_start(ap, fmt);
2581		(void) vsprintf(sd_log_buf, fmt, ap);
2582		va_end(ap);
2583		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2584		mutex_exit(&sd_log_mutex);
2585	}
2586#ifdef SD_FAULT_INJECTION
2587	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2588	if (un->sd_injection_mask & comp) {
2589		mutex_enter(&sd_log_mutex);
2590		va_start(ap, fmt);
2591		(void) vsprintf(sd_log_buf, fmt, ap);
2592		va_end(ap);
2593		sd_injection_log(sd_log_buf, un);
2594		mutex_exit(&sd_log_mutex);
2595	}
2596#endif
2597}
2598
2599
2600/*
2601 *    Function: sd_log_info
2602 *
2603 * Description: This routine is called by the SD_INFO macro for debug
2604 *		logging of general purpose informational conditions.
2605 *
2606 *   Arguments: comp - driver component being logged
2607 *		dev  - pointer to driver info structure
2608 *		fmt  - info string and format to be logged
2609 */
2610
2611static void
2612sd_log_info(uint_t component, struct sd_lun *un, const char *fmt, ...)
2613{
2614	va_list		ap;
2615	dev_info_t	*dev;
2616
2617	ASSERT(un != NULL);
2618	dev = SD_DEVINFO(un);
2619	ASSERT(dev != NULL);
2620
2621	/*
2622	 * Filter messages based on the global component and level masks.
2623	 * Also print if un matches the value of sd_debug_un, or if
2624	 * sd_debug_un is set to NULL.
2625	 */
2626	if ((sd_component_mask & component) &&
2627	    (sd_level_mask & SD_LOGMASK_INFO) &&
2628	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2629		mutex_enter(&sd_log_mutex);
2630		va_start(ap, fmt);
2631		(void) vsprintf(sd_log_buf, fmt, ap);
2632		va_end(ap);
2633		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2634		mutex_exit(&sd_log_mutex);
2635	}
2636#ifdef SD_FAULT_INJECTION
2637	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2638	if (un->sd_injection_mask & component) {
2639		mutex_enter(&sd_log_mutex);
2640		va_start(ap, fmt);
2641		(void) vsprintf(sd_log_buf, fmt, ap);
2642		va_end(ap);
2643		sd_injection_log(sd_log_buf, un);
2644		mutex_exit(&sd_log_mutex);
2645	}
2646#endif
2647}
2648
2649
2650/*
2651 *    Function: sd_log_trace
2652 *
2653 * Description: This routine is called by the SD_TRACE macro for debug
2654 *		logging of trace conditions (i.e. function entry/exit).
2655 *
2656 *   Arguments: comp - driver component being logged
2657 *		dev  - pointer to driver info structure
2658 *		fmt  - trace string and format to be logged
2659 */
2660
2661static void
2662sd_log_trace(uint_t component, struct sd_lun *un, const char *fmt, ...)
2663{
2664	va_list		ap;
2665	dev_info_t	*dev;
2666
2667	ASSERT(un != NULL);
2668	dev = SD_DEVINFO(un);
2669	ASSERT(dev != NULL);
2670
2671	/*
2672	 * Filter messages based on the global component and level masks.
2673	 * Also print if un matches the value of sd_debug_un, or if
2674	 * sd_debug_un is set to NULL.
2675	 */
2676	if ((sd_component_mask & component) &&
2677	    (sd_level_mask & SD_LOGMASK_TRACE) &&
2678	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2679		mutex_enter(&sd_log_mutex);
2680		va_start(ap, fmt);
2681		(void) vsprintf(sd_log_buf, fmt, ap);
2682		va_end(ap);
2683		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2684		mutex_exit(&sd_log_mutex);
2685	}
2686#ifdef SD_FAULT_INJECTION
2687	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2688	if (un->sd_injection_mask & component) {
2689		mutex_enter(&sd_log_mutex);
2690		va_start(ap, fmt);
2691		(void) vsprintf(sd_log_buf, fmt, ap);
2692		va_end(ap);
2693		sd_injection_log(sd_log_buf, un);
2694		mutex_exit(&sd_log_mutex);
2695	}
2696#endif
2697}
2698
2699
2700/*
2701 *    Function: sdprobe
2702 *
2703 * Description: This is the driver probe(9e) entry point function.
2704 *
2705 *   Arguments: devi - opaque device info handle
2706 *
2707 * Return Code: DDI_PROBE_SUCCESS: If the probe was successful.
2708 *              DDI_PROBE_FAILURE: If the probe failed.
2709 *              DDI_PROBE_PARTIAL: If the instance is not present now,
2710 *				   but may be present in the future.
2711 */
2712
2713static int
2714sdprobe(dev_info_t *devi)
2715{
2716	struct scsi_device	*devp;
2717	int			rval;
2718#ifndef XPV_HVM_DRIVER
2719	int			instance = ddi_get_instance(devi);
2720#endif /* !XPV_HVM_DRIVER */
2721
2722	/*
2723	 * if it wasn't for pln, sdprobe could actually be nulldev
2724	 * in the "__fibre" case.
2725	 */
2726	if (ddi_dev_is_sid(devi) == DDI_SUCCESS) {
2727		return (DDI_PROBE_DONTCARE);
2728	}
2729
2730	devp = ddi_get_driver_private(devi);
2731
2732	if (devp == NULL) {
2733		/* Ooops... nexus driver is mis-configured... */
2734		return (DDI_PROBE_FAILURE);
2735	}
2736
2737#ifndef XPV_HVM_DRIVER
2738	if (ddi_get_soft_state(sd_state, instance) != NULL) {
2739		return (DDI_PROBE_PARTIAL);
2740	}
2741#endif /* !XPV_HVM_DRIVER */
2742
2743	/*
2744	 * Call the SCSA utility probe routine to see if we actually
2745	 * have a target at this SCSI nexus.
2746	 */
2747	switch (sd_scsi_probe_with_cache(devp, NULL_FUNC)) {
2748	case SCSIPROBE_EXISTS:
2749		switch (devp->sd_inq->inq_dtype) {
2750		case DTYPE_DIRECT:
2751			rval = DDI_PROBE_SUCCESS;
2752			break;
2753		case DTYPE_RODIRECT:
2754			/* CDs etc. Can be removable media */
2755			rval = DDI_PROBE_SUCCESS;
2756			break;
2757		case DTYPE_OPTICAL:
2758			/*
2759			 * Rewritable optical driver HP115AA
2760			 * Can also be removable media
2761			 */
2762
2763			/*
2764			 * Do not attempt to bind to  DTYPE_OPTICAL if
2765			 * pre solaris 9 sparc sd behavior is required
2766			 *
2767			 * If first time through and sd_dtype_optical_bind
2768			 * has not been set in /etc/system check properties
2769			 */
2770
2771			if (sd_dtype_optical_bind  < 0) {
2772				sd_dtype_optical_bind = ddi_prop_get_int
2773				    (DDI_DEV_T_ANY, devi, 0,
2774				    "optical-device-bind", 1);
2775			}
2776
2777			if (sd_dtype_optical_bind == 0) {
2778				rval = DDI_PROBE_FAILURE;
2779			} else {
2780				rval = DDI_PROBE_SUCCESS;
2781			}
2782			break;
2783
2784		case DTYPE_NOTPRESENT:
2785		default:
2786			rval = DDI_PROBE_FAILURE;
2787			break;
2788		}
2789		break;
2790	default:
2791		rval = DDI_PROBE_PARTIAL;
2792		break;
2793	}
2794
2795	/*
2796	 * This routine checks for resource allocation prior to freeing,
2797	 * so it will take care of the "smart probing" case where a
2798	 * scsi_probe() may or may not have been issued and will *not*
2799	 * free previously-freed resources.
2800	 */
2801	scsi_unprobe(devp);
2802	return (rval);
2803}
2804
2805
2806/*
2807 *    Function: sdinfo
2808 *
2809 * Description: This is the driver getinfo(9e) entry point function.
2810 * 		Given the device number, return the devinfo pointer from
2811 *		the scsi_device structure or the instance number
2812 *		associated with the dev_t.
2813 *
2814 *   Arguments: dip     - pointer to device info structure
2815 *		infocmd - command argument (DDI_INFO_DEVT2DEVINFO,
2816 *			  DDI_INFO_DEVT2INSTANCE)
2817 *		arg     - driver dev_t
2818 *		resultp - user buffer for request response
2819 *
2820 * Return Code: DDI_SUCCESS
2821 *              DDI_FAILURE
2822 */
2823/* ARGSUSED */
2824static int
2825sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
2826{
2827	struct sd_lun	*un;
2828	dev_t		dev;
2829	int		instance;
2830	int		error;
2831
2832	switch (infocmd) {
2833	case DDI_INFO_DEVT2DEVINFO:
2834		dev = (dev_t)arg;
2835		instance = SDUNIT(dev);
2836		if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
2837			return (DDI_FAILURE);
2838		}
2839		*result = (void *) SD_DEVINFO(un);
2840		error = DDI_SUCCESS;
2841		break;
2842	case DDI_INFO_DEVT2INSTANCE:
2843		dev = (dev_t)arg;
2844		instance = SDUNIT(dev);
2845		*result = (void *)(uintptr_t)instance;
2846		error = DDI_SUCCESS;
2847		break;
2848	default:
2849		error = DDI_FAILURE;
2850	}
2851	return (error);
2852}
2853
2854/*
2855 *    Function: sd_prop_op
2856 *
2857 * Description: This is the driver prop_op(9e) entry point function.
2858 *		Return the number of blocks for the partition in question
2859 *		or forward the request to the property facilities.
2860 *
2861 *   Arguments: dev       - device number
2862 *		dip       - pointer to device info structure
2863 *		prop_op   - property operator
2864 *		mod_flags - DDI_PROP_DONTPASS, don't pass to parent
2865 *		name      - pointer to property name
2866 *		valuep    - pointer or address of the user buffer
2867 *		lengthp   - property length
2868 *
2869 * Return Code: DDI_PROP_SUCCESS
2870 *              DDI_PROP_NOT_FOUND
2871 *              DDI_PROP_UNDEFINED
2872 *              DDI_PROP_NO_MEMORY
2873 *              DDI_PROP_BUF_TOO_SMALL
2874 */
2875
2876static int
2877sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags,
2878	char *name, caddr_t valuep, int *lengthp)
2879{
2880	struct sd_lun	*un;
2881
2882	if ((un = ddi_get_soft_state(sd_state, ddi_get_instance(dip))) == NULL)
2883		return (ddi_prop_op(dev, dip, prop_op, mod_flags,
2884		    name, valuep, lengthp));
2885
2886	return (cmlb_prop_op(un->un_cmlbhandle,
2887	    dev, dip, prop_op, mod_flags, name, valuep, lengthp,
2888	    SDPART(dev), (void *)SD_PATH_DIRECT));
2889}
2890
2891/*
2892 * The following functions are for smart probing:
2893 * sd_scsi_probe_cache_init()
2894 * sd_scsi_probe_cache_fini()
2895 * sd_scsi_clear_probe_cache()
2896 * sd_scsi_probe_with_cache()
2897 */
2898
2899/*
2900 *    Function: sd_scsi_probe_cache_init
2901 *
2902 * Description: Initializes the probe response cache mutex and head pointer.
2903 *
2904 *     Context: Kernel thread context
2905 */
2906
2907static void
2908sd_scsi_probe_cache_init(void)
2909{
2910	mutex_init(&sd_scsi_probe_cache_mutex, NULL, MUTEX_DRIVER, NULL);
2911	sd_scsi_probe_cache_head = NULL;
2912}
2913
2914
2915/*
2916 *    Function: sd_scsi_probe_cache_fini
2917 *
2918 * Description: Frees all resources associated with the probe response cache.
2919 *
2920 *     Context: Kernel thread context
2921 */
2922
2923static void
2924sd_scsi_probe_cache_fini(void)
2925{
2926	struct sd_scsi_probe_cache *cp;
2927	struct sd_scsi_probe_cache *ncp;
2928
2929	/* Clean up our smart probing linked list */
2930	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = ncp) {
2931		ncp = cp->next;
2932		kmem_free(cp, sizeof (struct sd_scsi_probe_cache));
2933	}
2934	sd_scsi_probe_cache_head = NULL;
2935	mutex_destroy(&sd_scsi_probe_cache_mutex);
2936}
2937
2938
2939/*
2940 *    Function: sd_scsi_clear_probe_cache
2941 *
2942 * Description: This routine clears the probe response cache. This is
2943 *		done when open() returns ENXIO so that when deferred
2944 *		attach is attempted (possibly after a device has been
2945 *		turned on) we will retry the probe. Since we don't know
2946 *		which target we failed to open, we just clear the
2947 *		entire cache.
2948 *
2949 *     Context: Kernel thread context
2950 */
2951
2952static void
2953sd_scsi_clear_probe_cache(void)
2954{
2955	struct sd_scsi_probe_cache	*cp;
2956	int				i;
2957
2958	mutex_enter(&sd_scsi_probe_cache_mutex);
2959	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2960		/*
2961		 * Reset all entries to SCSIPROBE_EXISTS.  This will
2962		 * force probing to be performed the next time
2963		 * sd_scsi_probe_with_cache is called.
2964		 */
2965		for (i = 0; i < NTARGETS_WIDE; i++) {
2966			cp->cache[i] = SCSIPROBE_EXISTS;
2967		}
2968	}
2969	mutex_exit(&sd_scsi_probe_cache_mutex);
2970}
2971
2972
2973/*
2974 *    Function: sd_scsi_probe_with_cache
2975 *
2976 * Description: This routine implements support for a scsi device probe
2977 *		with cache. The driver maintains a cache of the target
2978 *		responses to scsi probes. If we get no response from a
2979 *		target during a probe inquiry, we remember that, and we
2980 *		avoid additional calls to scsi_probe on non-zero LUNs
2981 *		on the same target until the cache is cleared. By doing
2982 *		so we avoid the 1/4 sec selection timeout for nonzero
2983 *		LUNs. lun0 of a target is always probed.
2984 *
2985 *   Arguments: devp     - Pointer to a scsi_device(9S) structure
2986 *              waitfunc - indicates what the allocator routines should
2987 *			   do when resources are not available. This value
2988 *			   is passed on to scsi_probe() when that routine
2989 *			   is called.
2990 *
2991 * Return Code: SCSIPROBE_NORESP if a NORESP in probe response cache;
2992 *		otherwise the value returned by scsi_probe(9F).
2993 *
2994 *     Context: Kernel thread context
2995 */
2996
2997static int
2998sd_scsi_probe_with_cache(struct scsi_device *devp, int (*waitfn)())
2999{
3000	struct sd_scsi_probe_cache	*cp;
3001	dev_info_t	*pdip = ddi_get_parent(devp->sd_dev);
3002	int		lun, tgt;
3003
3004	lun = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
3005	    SCSI_ADDR_PROP_LUN, 0);
3006	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
3007	    SCSI_ADDR_PROP_TARGET, -1);
3008
3009	/* Make sure caching enabled and target in range */
3010	if ((tgt < 0) || (tgt >= NTARGETS_WIDE)) {
3011		/* do it the old way (no cache) */
3012		return (scsi_probe(devp, waitfn));
3013	}
3014
3015	mutex_enter(&sd_scsi_probe_cache_mutex);
3016
3017	/* Find the cache for this scsi bus instance */
3018	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
3019		if (cp->pdip == pdip) {
3020			break;
3021		}
3022	}
3023
3024	/* If we can't find a cache for this pdip, create one */
3025	if (cp == NULL) {
3026		int i;
3027
3028		cp = kmem_zalloc(sizeof (struct sd_scsi_probe_cache),
3029		    KM_SLEEP);
3030		cp->pdip = pdip;
3031		cp->next = sd_scsi_probe_cache_head;
3032		sd_scsi_probe_cache_head = cp;
3033		for (i = 0; i < NTARGETS_WIDE; i++) {
3034			cp->cache[i] = SCSIPROBE_EXISTS;
3035		}
3036	}
3037
3038	mutex_exit(&sd_scsi_probe_cache_mutex);
3039
3040	/* Recompute the cache for this target if LUN zero */
3041	if (lun == 0) {
3042		cp->cache[tgt] = SCSIPROBE_EXISTS;
3043	}
3044
3045	/* Don't probe if cache remembers a NORESP from a previous LUN. */
3046	if (cp->cache[tgt] != SCSIPROBE_EXISTS) {
3047		return (SCSIPROBE_NORESP);
3048	}
3049
3050	/* Do the actual probe; save & return the result */
3051	return (cp->cache[tgt] = scsi_probe(devp, waitfn));
3052}
3053
3054
3055/*
3056 *    Function: sd_scsi_target_lun_init
3057 *
3058 * Description: Initializes the attached lun chain mutex and head pointer.
3059 *
3060 *     Context: Kernel thread context
3061 */
3062
3063static void
3064sd_scsi_target_lun_init(void)
3065{
3066	mutex_init(&sd_scsi_target_lun_mutex, NULL, MUTEX_DRIVER, NULL);
3067	sd_scsi_target_lun_head = NULL;
3068}
3069
3070
3071/*
3072 *    Function: sd_scsi_target_lun_fini
3073 *
3074 * Description: Frees all resources associated with the attached lun
3075 *              chain
3076 *
3077 *     Context: Kernel thread context
3078 */
3079
3080static void
3081sd_scsi_target_lun_fini(void)
3082{
3083	struct sd_scsi_hba_tgt_lun	*cp;
3084	struct sd_scsi_hba_tgt_lun	*ncp;
3085
3086	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = ncp) {
3087		ncp = cp->next;
3088		kmem_free(cp, sizeof (struct sd_scsi_hba_tgt_lun));
3089	}
3090	sd_scsi_target_lun_head = NULL;
3091	mutex_destroy(&sd_scsi_target_lun_mutex);
3092}
3093
3094
3095/*
3096 *    Function: sd_scsi_get_target_lun_count
3097 *
3098 * Description: This routine will check in the attached lun chain to see
3099 * 		how many luns are attached on the required SCSI controller
3100 * 		and target. Currently, some capabilities like tagged queue
3101 *		are supported per target based by HBA. So all luns in a
3102 *		target have the same capabilities. Based on this assumption,
3103 * 		sd should only set these capabilities once per target. This
3104 *		function is called when sd needs to decide how many luns
3105 *		already attached on a target.
3106 *
3107 *   Arguments: dip	- Pointer to the system's dev_info_t for the SCSI
3108 *			  controller device.
3109 *              target	- The target ID on the controller's SCSI bus.
3110 *
3111 * Return Code: The number of luns attached on the required target and
3112 *		controller.
3113 *		-1 if target ID is not in parallel SCSI scope or the given
3114 * 		dip is not in the chain.
3115 *
3116 *     Context: Kernel thread context
3117 */
3118
3119static int
3120sd_scsi_get_target_lun_count(dev_info_t *dip, int target)
3121{
3122	struct sd_scsi_hba_tgt_lun	*cp;
3123
3124	if ((target < 0) || (target >= NTARGETS_WIDE)) {
3125		return (-1);
3126	}
3127
3128	mutex_enter(&sd_scsi_target_lun_mutex);
3129
3130	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = cp->next) {
3131		if (cp->pdip == dip) {
3132			break;
3133		}
3134	}
3135
3136	mutex_exit(&sd_scsi_target_lun_mutex);
3137
3138	if (cp == NULL) {
3139		return (-1);
3140	}
3141
3142	return (cp->nlun[target]);
3143}
3144
3145
3146/*
3147 *    Function: sd_scsi_update_lun_on_target
3148 *
3149 * Description: This routine is used to update the attached lun chain when a
3150 *		lun is attached or detached on a target.
3151 *
3152 *   Arguments: dip     - Pointer to the system's dev_info_t for the SCSI
3153 *                        controller device.
3154 *              target  - The target ID on the controller's SCSI bus.
3155 *		flag	- Indicate the lun is attached or detached.
3156 *
3157 *     Context: Kernel thread context
3158 */
3159
3160static void
3161sd_scsi_update_lun_on_target(dev_info_t *dip, int target, int flag)
3162{
3163	struct sd_scsi_hba_tgt_lun	*cp;
3164
3165	mutex_enter(&sd_scsi_target_lun_mutex);
3166
3167	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = cp->next) {
3168		if (cp->pdip == dip) {
3169			break;
3170		}
3171	}
3172
3173	if ((cp == NULL) && (flag == SD_SCSI_LUN_ATTACH)) {
3174		cp = kmem_zalloc(sizeof (struct sd_scsi_hba_tgt_lun),
3175		    KM_SLEEP);
3176		cp->pdip = dip;
3177		cp->next = sd_scsi_target_lun_head;
3178		sd_scsi_target_lun_head = cp;
3179	}
3180
3181	mutex_exit(&sd_scsi_target_lun_mutex);
3182
3183	if (cp != NULL) {
3184		if (flag == SD_SCSI_LUN_ATTACH) {
3185			cp->nlun[target] ++;
3186		} else {
3187			cp->nlun[target] --;
3188		}
3189	}
3190}
3191
3192
3193/*
3194 *    Function: sd_spin_up_unit
3195 *
3196 * Description: Issues the following commands to spin-up the device:
3197 *		START STOP UNIT, and INQUIRY.
3198 *
3199 *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
3200 *                      structure for this target.
3201 *
3202 * Return Code: 0 - success
3203 *		EIO - failure
3204 *		EACCES - reservation conflict
3205 *
3206 *     Context: Kernel thread context
3207 */
3208
3209static int
3210sd_spin_up_unit(sd_ssc_t *ssc)
3211{
3212	size_t	resid		= 0;
3213	int	has_conflict	= FALSE;
3214	uchar_t *bufaddr;
3215	int 	status;
3216	struct sd_lun	*un;
3217
3218	ASSERT(ssc != NULL);
3219	un = ssc->ssc_un;
3220	ASSERT(un != NULL);
3221
3222	/*
3223	 * Send a throwaway START UNIT command.
3224	 *
3225	 * If we fail on this, we don't care presently what precisely
3226	 * is wrong.  EMC's arrays will also fail this with a check
3227	 * condition (0x2/0x4/0x3) if the device is "inactive," but
3228	 * we don't want to fail the attach because it may become
3229	 * "active" later.
3230	 * We don't know if power condition is supported or not at
3231	 * this stage, use START STOP bit.
3232	 */
3233	status = sd_send_scsi_START_STOP_UNIT(ssc, SD_START_STOP,
3234	    SD_TARGET_START, SD_PATH_DIRECT);
3235
3236	if (status != 0) {
3237		if (status == EACCES)
3238			has_conflict = TRUE;
3239		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3240	}
3241
3242	/*
3243	 * Send another INQUIRY command to the target. This is necessary for
3244	 * non-removable media direct access devices because their INQUIRY data
3245	 * may not be fully qualified until they are spun up (perhaps via the
3246	 * START command above).  Note: This seems to be needed for some
3247	 * legacy devices only.) The INQUIRY command should succeed even if a
3248	 * Reservation Conflict is present.
3249	 */
3250	bufaddr = kmem_zalloc(SUN_INQSIZE, KM_SLEEP);
3251
3252	if (sd_send_scsi_INQUIRY(ssc, bufaddr, SUN_INQSIZE, 0, 0, &resid)
3253	    != 0) {
3254		kmem_free(bufaddr, SUN_INQSIZE);
3255		sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
3256		return (EIO);
3257	}
3258
3259	/*
3260	 * If we got enough INQUIRY data, copy it over the old INQUIRY data.
3261	 * Note that this routine does not return a failure here even if the
3262	 * INQUIRY command did not return any data.  This is a legacy behavior.
3263	 */
3264	if ((SUN_INQSIZE - resid) >= SUN_MIN_INQLEN) {
3265		bcopy(bufaddr, SD_INQUIRY(un), SUN_INQSIZE);
3266	}
3267
3268	kmem_free(bufaddr, SUN_INQSIZE);
3269
3270	/* If we hit a reservation conflict above, tell the caller. */
3271	if (has_conflict == TRUE) {
3272		return (EACCES);
3273	}
3274
3275	return (0);
3276}
3277
3278#ifdef _LP64
3279/*
3280 *    Function: sd_enable_descr_sense
3281 *
3282 * Description: This routine attempts to select descriptor sense format
3283 *		using the Control mode page.  Devices that support 64 bit
3284 *		LBAs (for >2TB luns) should also implement descriptor
3285 *		sense data so we will call this function whenever we see
3286 *		a lun larger than 2TB.  If for some reason the device
3287 *		supports 64 bit LBAs but doesn't support descriptor sense
3288 *		presumably the mode select will fail.  Everything will
3289 *		continue to work normally except that we will not get
3290 *		complete sense data for commands that fail with an LBA
3291 *		larger than 32 bits.
3292 *
3293 *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
3294 *                      structure for this target.
3295 *
3296 *     Context: Kernel thread context only
3297 */
3298
3299static void
3300sd_enable_descr_sense(sd_ssc_t *ssc)
3301{
3302	uchar_t			*header;
3303	struct mode_control_scsi3 *ctrl_bufp;
3304	size_t			buflen;
3305	size_t			bd_len;
3306	int			status;
3307	struct sd_lun		*un;
3308
3309	ASSERT(ssc != NULL);
3310	un = ssc->ssc_un;
3311	ASSERT(un != NULL);
3312
3313	/*
3314	 * Read MODE SENSE page 0xA, Control Mode Page
3315	 */
3316	buflen = MODE_HEADER_LENGTH + MODE_BLK_DESC_LENGTH +
3317	    sizeof (struct mode_control_scsi3);
3318	header = kmem_zalloc(buflen, KM_SLEEP);
3319
3320	status = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, header, buflen,
3321	    MODEPAGE_CTRL_MODE, SD_PATH_DIRECT);
3322
3323	if (status != 0) {
3324		SD_ERROR(SD_LOG_COMMON, un,
3325		    "sd_enable_descr_sense: mode sense ctrl page failed\n");
3326		goto eds_exit;
3327	}
3328
3329	/*
3330	 * Determine size of Block Descriptors in order to locate
3331	 * the mode page data. ATAPI devices return 0, SCSI devices
3332	 * should return MODE_BLK_DESC_LENGTH.
3333	 */
3334	bd_len  = ((struct mode_header *)header)->bdesc_length;
3335
3336	/* Clear the mode data length field for MODE SELECT */
3337	((struct mode_header *)header)->length = 0;
3338
3339	ctrl_bufp = (struct mode_control_scsi3 *)
3340	    (header + MODE_HEADER_LENGTH + bd_len);
3341
3342	/*
3343	 * If the page length is smaller than the expected value,
3344	 * the target device doesn't support D_SENSE. Bail out here.
3345	 */
3346	if (ctrl_bufp->mode_page.length <
3347	    sizeof (struct mode_control_scsi3) - 2) {
3348		SD_ERROR(SD_LOG_COMMON, un,
3349		    "sd_enable_descr_sense: enable D_SENSE failed\n");
3350		goto eds_exit;
3351	}
3352
3353	/*
3354	 * Clear PS bit for MODE SELECT
3355	 */
3356	ctrl_bufp->mode_page.ps = 0;
3357
3358	/*
3359	 * Set D_SENSE to enable descriptor sense format.
3360	 */
3361	ctrl_bufp->d_sense = 1;
3362
3363	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3364
3365	/*
3366	 * Use MODE SELECT to commit the change to the D_SENSE bit
3367	 */
3368	status = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0, header,
3369	    buflen, SD_DONTSAVE_PAGE, SD_PATH_DIRECT);
3370
3371	if (status != 0) {
3372		SD_INFO(SD_LOG_COMMON, un,
3373		    "sd_enable_descr_sense: mode select ctrl page failed\n");
3374	} else {
3375		kmem_free(header, buflen);
3376		return;
3377	}
3378
3379eds_exit:
3380	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3381	kmem_free(header, buflen);
3382}
3383
3384/*
3385 *    Function: sd_reenable_dsense_task
3386 *
3387 * Description: Re-enable descriptor sense after device or bus reset
3388 *
3389 *     Context: Executes in a taskq() thread context
3390 */
3391static void
3392sd_reenable_dsense_task(void *arg)
3393{
3394	struct	sd_lun	*un = arg;
3395	sd_ssc_t	*ssc;
3396
3397	ASSERT(un != NULL);
3398
3399	ssc = sd_ssc_init(un);
3400	sd_enable_descr_sense(ssc);
3401	sd_ssc_fini(ssc);
3402}
3403#endif /* _LP64 */
3404
3405/*
3406 *    Function: sd_set_mmc_caps
3407 *
3408 * Description: This routine determines if the device is MMC compliant and if
3409 *		the device supports CDDA via a mode sense of the CDVD
3410 *		capabilities mode page. Also checks if the device is a
3411 *		dvdram writable device.
3412 *
3413 *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
3414 *                      structure for this target.
3415 *
3416 *     Context: Kernel thread context only
3417 */
3418
3419static void
3420sd_set_mmc_caps(sd_ssc_t *ssc)
3421{
3422	struct mode_header_grp2		*sense_mhp;
3423	uchar_t				*sense_page;
3424	caddr_t				buf;
3425	int				bd_len;
3426	int				status;
3427	struct uscsi_cmd		com;
3428	int				rtn;
3429	uchar_t				*out_data_rw, *out_data_hd;
3430	uchar_t				*rqbuf_rw, *rqbuf_hd;
3431	uchar_t				*out_data_gesn;
3432	int				gesn_len;
3433	struct sd_lun			*un;
3434
3435	ASSERT(ssc != NULL);
3436	un = ssc->ssc_un;
3437	ASSERT(un != NULL);
3438
3439	/*
3440	 * The flags which will be set in this function are - mmc compliant,
3441	 * dvdram writable device, cdda support. Initialize them to FALSE
3442	 * and if a capability is detected - it will be set to TRUE.
3443	 */
3444	un->un_f_mmc_cap = FALSE;
3445	un->un_f_dvdram_writable_device = FALSE;
3446	un->un_f_cfg_cdda = FALSE;
3447
3448	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
3449	status = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, (uchar_t *)buf,
3450	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, SD_PATH_DIRECT);
3451
3452	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3453
3454	if (status != 0) {
3455		/* command failed; just return */
3456		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3457		return;
3458	}
3459	/*
3460	 * If the mode sense request for the CDROM CAPABILITIES
3461	 * page (0x2A) succeeds the device is assumed to be MMC.
3462	 */
3463	un->un_f_mmc_cap = TRUE;
3464
3465	/* See if GET STATUS EVENT NOTIFICATION is supported */
3466	if (un->un_f_mmc_gesn_polling) {
3467		gesn_len = SD_GESN_HEADER_LEN + SD_GESN_MEDIA_DATA_LEN;
3468		out_data_gesn = kmem_zalloc(gesn_len, KM_SLEEP);
3469
3470		rtn = sd_send_scsi_GET_EVENT_STATUS_NOTIFICATION(ssc,
3471		    out_data_gesn, gesn_len, 1 << SD_GESN_MEDIA_CLASS);
3472
3473		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3474
3475		if ((rtn != 0) || !sd_gesn_media_data_valid(out_data_gesn)) {
3476			un->un_f_mmc_gesn_polling = FALSE;
3477			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3478			    "sd_set_mmc_caps: gesn not supported "
3479			    "%d %x %x %x %x\n", rtn,
3480			    out_data_gesn[0], out_data_gesn[1],
3481			    out_data_gesn[2], out_data_gesn[3]);
3482		}
3483
3484		kmem_free(out_data_gesn, gesn_len);
3485	}
3486
3487	/* Get to the page data */
3488	sense_mhp = (struct mode_header_grp2 *)buf;
3489	bd_len = (sense_mhp->bdesc_length_hi << 8) |
3490	    sense_mhp->bdesc_length_lo;
3491	if (bd_len > MODE_BLK_DESC_LENGTH) {
3492		/*
3493		 * We did not get back the expected block descriptor
3494		 * length so we cannot determine if the device supports
3495		 * CDDA. However, we still indicate the device is MMC
3496		 * according to the successful response to the page
3497		 * 0x2A mode sense request.
3498		 */
3499		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3500		    "sd_set_mmc_caps: Mode Sense returned "
3501		    "invalid block descriptor length\n");
3502		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3503		return;
3504	}
3505
3506	/* See if read CDDA is supported */
3507	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 +
3508	    bd_len);
3509	un->un_f_cfg_cdda = (sense_page[5] & 0x01) ? TRUE : FALSE;
3510
3511	/* See if writing DVD RAM is supported. */
3512	un->un_f_dvdram_writable_device = (sense_page[3] & 0x20) ? TRUE : FALSE;
3513	if (un->un_f_dvdram_writable_device == TRUE) {
3514		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3515		return;
3516	}
3517
3518	/*
3519	 * If the device presents DVD or CD capabilities in the mode
3520	 * page, we can return here since a RRD will not have
3521	 * these capabilities.
3522	 */
3523	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3524		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3525		return;
3526	}
3527	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3528
3529	/*
3530	 * If un->un_f_dvdram_writable_device is still FALSE,
3531	 * check for a Removable Rigid Disk (RRD).  A RRD
3532	 * device is identified by the features RANDOM_WRITABLE and
3533	 * HARDWARE_DEFECT_MANAGEMENT.
3534	 */
3535	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3536	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3537
3538	rtn = sd_send_scsi_feature_GET_CONFIGURATION(ssc, &com, rqbuf_rw,
3539	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3540	    RANDOM_WRITABLE, SD_PATH_STANDARD);
3541
3542	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3543
3544	if (rtn != 0) {
3545		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3546		kmem_free(rqbuf_rw, SENSE_LENGTH);
3547		return;
3548	}
3549
3550	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3551	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3552
3553	rtn = sd_send_scsi_feature_GET_CONFIGURATION(ssc, &com, rqbuf_hd,
3554	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3555	    HARDWARE_DEFECT_MANAGEMENT, SD_PATH_STANDARD);
3556
3557	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3558
3559	if (rtn == 0) {
3560		/*
3561		 * We have good information, check for random writable
3562		 * and hardware defect features.
3563		 */
3564		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3565		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT)) {
3566			un->un_f_dvdram_writable_device = TRUE;
3567		}
3568	}
3569
3570	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3571	kmem_free(rqbuf_rw, SENSE_LENGTH);
3572	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3573	kmem_free(rqbuf_hd, SENSE_LENGTH);
3574}
3575
3576/*
3577 *    Function: sd_check_for_writable_cd
3578 *
3579 * Description: This routine determines if the media in the device is
3580 *		writable or not. It uses the get configuration command (0x46)
3581 *		to determine if the media is writable
3582 *
3583 *   Arguments: un - driver soft state (unit) structure
3584 *              path_flag - SD_PATH_DIRECT to use the USCSI "direct"
3585 *                           chain and the normal command waitq, or
3586 *                           SD_PATH_DIRECT_PRIORITY to use the USCSI
3587 *                           "direct" chain and bypass the normal command
3588 *                           waitq.
3589 *
3590 *     Context: Never called at interrupt context.
3591 */
3592
3593static void
3594sd_check_for_writable_cd(sd_ssc_t *ssc, int path_flag)
3595{
3596	struct uscsi_cmd		com;
3597	uchar_t				*out_data;
3598	uchar_t				*rqbuf;
3599	int				rtn;
3600	uchar_t				*out_data_rw, *out_data_hd;
3601	uchar_t				*rqbuf_rw, *rqbuf_hd;
3602	struct mode_header_grp2		*sense_mhp;
3603	uchar_t				*sense_page;
3604	caddr_t				buf;
3605	int				bd_len;
3606	int				status;
3607	struct sd_lun			*un;
3608
3609	ASSERT(ssc != NULL);
3610	un = ssc->ssc_un;
3611	ASSERT(un != NULL);
3612	ASSERT(mutex_owned(SD_MUTEX(un)));
3613
3614	/*
3615	 * Initialize the writable media to false, if configuration info.
3616	 * tells us otherwise then only we will set it.
3617	 */
3618	un->un_f_mmc_writable_media = FALSE;
3619	mutex_exit(SD_MUTEX(un));
3620
3621	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
3622	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3623
3624	rtn = sd_send_scsi_GET_CONFIGURATION(ssc, &com, rqbuf, SENSE_LENGTH,
3625	    out_data, SD_PROFILE_HEADER_LEN, path_flag);
3626
3627	if (rtn != 0)
3628		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3629
3630	mutex_enter(SD_MUTEX(un));
3631	if (rtn == 0) {
3632		/*
3633		 * We have good information, check for writable DVD.
3634		 */
3635		if ((out_data[6] == 0) && (out_data[7] == 0x12)) {
3636			un->un_f_mmc_writable_media = TRUE;
3637			kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3638			kmem_free(rqbuf, SENSE_LENGTH);
3639			return;
3640		}
3641	}
3642
3643	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3644	kmem_free(rqbuf, SENSE_LENGTH);
3645
3646	/*
3647	 * Determine if this is a RRD type device.
3648	 */
3649	mutex_exit(SD_MUTEX(un));
3650	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
3651	status = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, (uchar_t *)buf,
3652	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, path_flag);
3653
3654	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3655
3656	mutex_enter(SD_MUTEX(un));
3657	if (status != 0) {
3658		/* command failed; just return */
3659		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3660		return;
3661	}
3662
3663	/* Get to the page data */
3664	sense_mhp = (struct mode_header_grp2 *)buf;
3665	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
3666	if (bd_len > MODE_BLK_DESC_LENGTH) {
3667		/*
3668		 * We did not get back the expected block descriptor length so
3669		 * we cannot check the mode page.
3670		 */
3671		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3672		    "sd_check_for_writable_cd: Mode Sense returned "
3673		    "invalid block descriptor length\n");
3674		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3675		return;
3676	}
3677
3678	/*
3679	 * If the device presents DVD or CD capabilities in the mode
3680	 * page, we can return here since a RRD device will not have
3681	 * these capabilities.
3682	 */
3683	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 + bd_len);
3684	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3685		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3686		return;
3687	}
3688	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3689
3690	/*
3691	 * If un->un_f_mmc_writable_media is still FALSE,
3692	 * check for RRD type media.  A RRD device is identified
3693	 * by the features RANDOM_WRITABLE and HARDWARE_DEFECT_MANAGEMENT.
3694	 */
3695	mutex_exit(SD_MUTEX(un));
3696	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3697	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3698
3699	rtn = sd_send_scsi_feature_GET_CONFIGURATION(ssc, &com, rqbuf_rw,
3700	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3701	    RANDOM_WRITABLE, path_flag);
3702
3703	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3704	if (rtn != 0) {
3705		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3706		kmem_free(rqbuf_rw, SENSE_LENGTH);
3707		mutex_enter(SD_MUTEX(un));
3708		return;
3709	}
3710
3711	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3712	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3713
3714	rtn = sd_send_scsi_feature_GET_CONFIGURATION(ssc, &com, rqbuf_hd,
3715	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3716	    HARDWARE_DEFECT_MANAGEMENT, path_flag);
3717
3718	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3719	mutex_enter(SD_MUTEX(un));
3720	if (rtn == 0) {
3721		/*
3722		 * We have good information, check for random writable
3723		 * and hardware defect features as current.
3724		 */
3725		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3726		    (out_data_rw[10] & 0x1) &&
3727		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT) &&
3728		    (out_data_hd[10] & 0x1)) {
3729			un->un_f_mmc_writable_media = TRUE;
3730		}
3731	}
3732
3733	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3734	kmem_free(rqbuf_rw, SENSE_LENGTH);
3735	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3736	kmem_free(rqbuf_hd, SENSE_LENGTH);
3737}
3738
3739/*
3740 *    Function: sd_read_unit_properties
3741 *
3742 * Description: The following implements a property lookup mechanism.
3743 *		Properties for particular disks (keyed on vendor, model
3744 *		and rev numbers) are sought in the sd.conf file via
3745 *		sd_process_sdconf_file(), and if not found there, are
3746 *		looked for in a list hardcoded in this driver via
3747 *		sd_process_sdconf_table() Once located the properties
3748 *		are used to update the driver unit structure.
3749 *
3750 *   Arguments: un - driver soft state (unit) structure
3751 */
3752
3753static void
3754sd_read_unit_properties(struct sd_lun *un)
3755{
3756	/*
3757	 * sd_process_sdconf_file returns SD_FAILURE if it cannot find
3758	 * the "sd-config-list" property (from the sd.conf file) or if
3759	 * there was not a match for the inquiry vid/pid. If this event
3760	 * occurs the static driver configuration table is searched for
3761	 * a match.
3762	 */
3763	ASSERT(un != NULL);
3764	if (sd_process_sdconf_file(un) == SD_FAILURE) {
3765		sd_process_sdconf_table(un);
3766	}
3767
3768	/* check for LSI device */
3769	sd_is_lsi(un);
3770
3771
3772}
3773
3774
3775/*
3776 *    Function: sd_process_sdconf_file
3777 *
3778 * Description: Use ddi_prop_lookup(9F) to obtain the properties from the
3779 *		driver's config file (ie, sd.conf) and update the driver
3780 *		soft state structure accordingly.
3781 *
3782 *   Arguments: un - driver soft state (unit) structure
3783 *
3784 * Return Code: SD_SUCCESS - The properties were successfully set according
3785 *			     to the driver configuration file.
3786 *		SD_FAILURE - The driver config list was not obtained or
3787 *			     there was no vid/pid match. This indicates that
3788 *			     the static config table should be used.
3789 *
3790 * The config file has a property, "sd-config-list". Currently we support
3791 * two kinds of formats. For both formats, the value of this property
3792 * is a list of duplets:
3793 *
3794 *  sd-config-list=
3795 *	<duplet>,
3796 *	[,<duplet>]*;
3797 *
3798 * For the improved format, where
3799 *
3800 *     <duplet>:= "<vid+pid>","<tunable-list>"
3801 *
3802 * and
3803 *
3804 *     <tunable-list>:=   <tunable> [, <tunable> ]*;
3805 *     <tunable> =        <name> : <value>
3806 *
3807 * The <vid+pid> is the string that is returned by the target device on a
3808 * SCSI inquiry command, the <tunable-list> contains one or more tunables
3809 * to apply to all target devices with the specified <vid+pid>.
3810 *
3811 * Each <tunable> is a "<name> : <value>" pair.
3812 *
3813 * For the old format, the structure of each duplet is as follows:
3814 *
3815 *  <duplet>:= "<vid+pid>","<data-property-name_list>"
3816 *
3817 * The first entry of the duplet is the device ID string (the concatenated
3818 * vid & pid; not to be confused with a device_id).  This is defined in
3819 * the same way as in the sd_disk_table.
3820 *
3821 * The second part of the duplet is a string that identifies a
3822 * data-property-name-list. The data-property-name-list is defined as
3823 * follows:
3824 *
3825 *  <data-property-name-list>:=<data-property-name> [<data-property-name>]
3826 *
3827 * The syntax of <data-property-name> depends on the <version> field.
3828 *
3829 * If version = SD_CONF_VERSION_1 we have the following syntax:
3830 *
3831 * 	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
3832 *
3833 * where the prop0 value will be used to set prop0 if bit0 set in the
3834 * flags, prop1 if bit1 set, etc. and N = SD_CONF_MAX_ITEMS -1
3835 *
3836 */
3837
3838static int
3839sd_process_sdconf_file(struct sd_lun *un)
3840{
3841	char	**config_list = NULL;
3842	uint_t	nelements;
3843	char	*vidptr;
3844	int	vidlen;
3845	char	*dnlist_ptr;
3846	char	*dataname_ptr;
3847	char	*dataname_lasts;
3848	int	*data_list = NULL;
3849	uint_t	data_list_len;
3850	int	rval = SD_FAILURE;
3851	int	i;
3852
3853	ASSERT(un != NULL);
3854
3855	/* Obtain the configuration list associated with the .conf file */
3856	if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, SD_DEVINFO(un),
3857	    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, sd_config_list,
3858	    &config_list, &nelements) != DDI_PROP_SUCCESS) {
3859		return (SD_FAILURE);
3860	}
3861
3862	/*
3863	 * Compare vids in each duplet to the inquiry vid - if a match is
3864	 * made, get the data value and update the soft state structure
3865	 * accordingly.
3866	 *
3867	 * Each duplet should show as a pair of strings, return SD_FAILURE
3868	 * otherwise.
3869	 */
3870	if (nelements & 1) {
3871		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3872		    "sd-config-list should show as pairs of strings.\n");
3873		if (config_list)
3874			ddi_prop_free(config_list);
3875		return (SD_FAILURE);
3876	}
3877
3878	for (i = 0; i < nelements; i += 2) {
3879		/*
3880		 * Note: The assumption here is that each vid entry is on
3881		 * a unique line from its associated duplet.
3882		 */
3883		vidptr = config_list[i];
3884		vidlen = (int)strlen(vidptr);
3885		if ((vidlen == 0) ||
3886		    (sd_sdconf_id_match(un, vidptr, vidlen) != SD_SUCCESS)) {
3887			continue;
3888		}
3889
3890		/*
3891		 * dnlist contains 1 or more blank separated
3892		 * data-property-name entries
3893		 */
3894		dnlist_ptr = config_list[i + 1];
3895
3896		if (strchr(dnlist_ptr, ':') != NULL) {
3897			/*
3898			 * Decode the improved format sd-config-list.
3899			 */
3900			sd_nvpair_str_decode(un, dnlist_ptr);
3901		} else {
3902			/*
3903			 * The old format sd-config-list, loop through all
3904			 * data-property-name entries in the
3905			 * data-property-name-list
3906			 * setting the properties for each.
3907			 */
3908			for (dataname_ptr = sd_strtok_r(dnlist_ptr, " \t",
3909			    &dataname_lasts); dataname_ptr != NULL;
3910			    dataname_ptr = sd_strtok_r(NULL, " \t",
3911			    &dataname_lasts)) {
3912				int version;
3913
3914				SD_INFO(SD_LOG_ATTACH_DETACH, un,
3915				    "sd_process_sdconf_file: disk:%s, "
3916				    "data:%s\n", vidptr, dataname_ptr);
3917
3918				/* Get the data list */
3919				if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY,
3920				    SD_DEVINFO(un), 0, dataname_ptr, &data_list,
3921				    &data_list_len) != DDI_PROP_SUCCESS) {
3922					SD_INFO(SD_LOG_ATTACH_DETACH, un,
3923					    "sd_process_sdconf_file: data "
3924					    "property (%s) has no value\n",
3925					    dataname_ptr);
3926					continue;
3927				}
3928
3929				version = data_list[0];
3930
3931				if (version == SD_CONF_VERSION_1) {
3932					sd_tunables values;
3933
3934					/* Set the properties */
3935					if (sd_chk_vers1_data(un, data_list[1],
3936					    &data_list[2], data_list_len,
3937					    dataname_ptr) == SD_SUCCESS) {
3938						sd_get_tunables_from_conf(un,
3939						    data_list[1], &data_list[2],
3940						    &values);
3941						sd_set_vers1_properties(un,
3942						    data_list[1], &values);
3943						rval = SD_SUCCESS;
3944					} else {
3945						rval = SD_FAILURE;
3946					}
3947				} else {
3948					scsi_log(SD_DEVINFO(un), sd_label,
3949					    CE_WARN, "data property %s version "
3950					    "0x%x is invalid.",
3951					    dataname_ptr, version);
3952					rval = SD_FAILURE;
3953				}
3954				if (data_list)
3955					ddi_prop_free(data_list);
3956			}
3957		}
3958	}
3959
3960	/* free up the memory allocated by ddi_prop_lookup_string_array(). */
3961	if (config_list) {
3962		ddi_prop_free(config_list);
3963	}
3964
3965	return (rval);
3966}
3967
3968/*
3969 *    Function: sd_nvpair_str_decode()
3970 *
3971 * Description: Parse the improved format sd-config-list to get
3972 *    each entry of tunable, which includes a name-value pair.
3973 *    Then call sd_set_properties() to set the property.
3974 *
3975 *   Arguments: un - driver soft state (unit) structure
3976 *    nvpair_str - the tunable list
3977 */
3978static void
3979sd_nvpair_str_decode(struct sd_lun *un, char *nvpair_str)
3980{
3981	char	*nv, *name, *value, *token;
3982	char	*nv_lasts, *v_lasts, *x_lasts;
3983
3984	for (nv = sd_strtok_r(nvpair_str, ",", &nv_lasts); nv != NULL;
3985	    nv = sd_strtok_r(NULL, ",", &nv_lasts)) {
3986		token = sd_strtok_r(nv, ":", &v_lasts);
3987		name  = sd_strtok_r(token, " \t", &x_lasts);
3988		token = sd_strtok_r(NULL, ":", &v_lasts);
3989		value = sd_strtok_r(token, " \t", &x_lasts);
3990		if (name == NULL || value == NULL) {
3991			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3992			    "sd_nvpair_str_decode: "
3993			    "name or value is not valid!\n");
3994		} else {
3995			sd_set_properties(un, name, value);
3996		}
3997	}
3998}
3999
4000/*
4001 *    Function: sd_strtok_r()
4002 *
4003 * Description: This function uses strpbrk and strspn to break
4004 *    string into tokens on sequentially subsequent calls. Return
4005 *    NULL when no non-separator characters remain. The first
4006 *    argument is NULL for subsequent calls.
4007 */
4008static char *
4009sd_strtok_r(char *string, const char *sepset, char **lasts)
4010{
4011	char	*q, *r;
4012
4013	/* First or subsequent call */
4014	if (string == NULL)
4015		string = *lasts;
4016
4017	if (string == NULL)
4018		return (NULL);
4019
4020	/* Skip leading separators */
4021	q = string + strspn(string, sepset);
4022
4023	if (*q == '\0')
4024		return (NULL);
4025
4026	if ((r = strpbrk(q, sepset)) == NULL)
4027		*lasts = NULL;
4028	else {
4029		*r = '\0';
4030		*lasts = r + 1;
4031	}
4032	return (q);
4033}
4034
4035/*
4036 *    Function: sd_set_properties()
4037 *
4038 * Description: Set device properties based on the improved
4039 *    format sd-config-list.
4040 *
4041 *   Arguments: un - driver soft state (unit) structure
4042 *    name  - supported tunable name
4043 *    value - tunable value
4044 */
4045static void
4046sd_set_properties(struct sd_lun *un, char *name, char *value)
4047{
4048	char	*endptr = NULL;
4049	long	val = 0;
4050
4051	if (strcasecmp(name, "cache-nonvolatile") == 0) {
4052		if (strcasecmp(value, "true") == 0) {
4053			un->un_f_suppress_cache_flush = TRUE;
4054		} else if (strcasecmp(value, "false") == 0) {
4055			un->un_f_suppress_cache_flush = FALSE;
4056		} else {
4057			goto value_invalid;
4058		}
4059		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4060		    "suppress_cache_flush flag set to %d\n",
4061		    un->un_f_suppress_cache_flush);
4062		return;
4063	}
4064
4065	if (strcasecmp(name, "controller-type") == 0) {
4066		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
4067			un->un_ctype = val;
4068		} else {
4069			goto value_invalid;
4070		}
4071		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4072		    "ctype set to %d\n", un->un_ctype);
4073		return;
4074	}
4075
4076	if (strcasecmp(name, "delay-busy") == 0) {
4077		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
4078			un->un_busy_timeout = drv_usectohz(val / 1000);
4079		} else {
4080			goto value_invalid;
4081		}
4082		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4083		    "busy_timeout set to %d\n", un->un_busy_timeout);
4084		return;
4085	}
4086
4087	if (strcasecmp(name, "disksort") == 0) {
4088		if (strcasecmp(value, "true") == 0) {
4089			un->un_f_disksort_disabled = FALSE;
4090		} else if (strcasecmp(value, "false") == 0) {
4091			un->un_f_disksort_disabled = TRUE;
4092		} else {
4093			goto value_invalid;
4094		}
4095		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4096		    "disksort disabled flag set to %d\n",
4097		    un->un_f_disksort_disabled);
4098		return;
4099	}
4100
4101	if (strcasecmp(name, "power-condition") == 0) {
4102		if (strcasecmp(value, "true") == 0) {
4103			un->un_f_power_condition_disabled = FALSE;
4104		} else if (strcasecmp(value, "false") == 0) {
4105			un->un_f_power_condition_disabled = TRUE;
4106		} else {
4107			goto value_invalid;
4108		}
4109		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4110		    "power condition disabled flag set to %d\n",
4111		    un->un_f_power_condition_disabled);
4112		return;
4113	}
4114
4115	if (strcasecmp(name, "timeout-releasereservation") == 0) {
4116		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
4117			un->un_reserve_release_time = val;
4118		} else {
4119			goto value_invalid;
4120		}
4121		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4122		    "reservation release timeout set to %d\n",
4123		    un->un_reserve_release_time);
4124		return;
4125	}
4126
4127	if (strcasecmp(name, "reset-lun") == 0) {
4128		if (strcasecmp(value, "true") == 0) {
4129			un->un_f_lun_reset_enabled = TRUE;
4130		} else if (strcasecmp(value, "false") == 0) {
4131			un->un_f_lun_reset_enabled = FALSE;
4132		} else {
4133			goto value_invalid;
4134		}
4135		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4136		    "lun reset enabled flag set to %d\n",
4137		    un->un_f_lun_reset_enabled);
4138		return;
4139	}
4140
4141	if (strcasecmp(name, "retries-busy") == 0) {
4142		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
4143			un->un_busy_retry_count = val;
4144		} else {
4145			goto value_invalid;
4146		}
4147		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4148		    "busy retry count set to %d\n", un->un_busy_retry_count);
4149		return;
4150	}
4151
4152	if (strcasecmp(name, "retries-timeout") == 0) {
4153		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
4154			un->un_retry_count = val;
4155		} else {
4156			goto value_invalid;
4157		}
4158		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4159		    "timeout retry count set to %d\n", un->un_retry_count);
4160		return;
4161	}
4162
4163	if (strcasecmp(name, "retries-notready") == 0) {
4164		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
4165			un->un_notready_retry_count = val;
4166		} else {
4167			goto value_invalid;
4168		}
4169		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4170		    "notready retry count set to %d\n",
4171		    un->un_notready_retry_count);
4172		return;
4173	}
4174
4175	if (strcasecmp(name, "retries-reset") == 0) {
4176		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
4177			un->un_reset_retry_count = val;
4178		} else {
4179			goto value_invalid;
4180		}
4181		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4182		    "reset retry count set to %d\n",
4183		    un->un_reset_retry_count);
4184		return;
4185	}
4186
4187	if (strcasecmp(name, "throttle-max") == 0) {
4188		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
4189			un->un_saved_throttle = un->un_throttle = val;
4190		} else {
4191			goto value_invalid;
4192		}
4193		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4194		    "throttle set to %d\n", un->un_throttle);
4195	}
4196
4197	if (strcasecmp(name, "throttle-min") == 0) {
4198		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
4199			un->un_min_throttle = val;
4200		} else {
4201			goto value_invalid;
4202		}
4203		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4204		    "min throttle set to %d\n", un->un_min_throttle);
4205	}
4206
4207	if (strcasecmp(name, "rmw-type") == 0) {
4208		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
4209			un->un_f_rmw_type = val;
4210		} else {
4211			goto value_invalid;
4212		}
4213		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4214		    "RMW type set to %d\n", un->un_f_rmw_type);
4215	}
4216
4217	/*
4218	 * Validate the throttle values.
4219	 * If any of the numbers are invalid, set everything to defaults.
4220	 */
4221	if ((un->un_throttle < SD_LOWEST_VALID_THROTTLE) ||
4222	    (un->un_min_throttle < SD_LOWEST_VALID_THROTTLE) ||
4223	    (un->un_min_throttle > un->un_throttle)) {
4224		un->un_saved_throttle = un->un_throttle = sd_max_throttle;
4225		un->un_min_throttle = sd_min_throttle;
4226	}
4227
4228	if (strcasecmp(name, "mmc-gesn-polling") == 0) {
4229		if (strcasecmp(value, "true") == 0) {
4230			un->un_f_mmc_gesn_polling = TRUE;
4231		} else if (strcasecmp(value, "false") == 0) {
4232			un->un_f_mmc_gesn_polling = FALSE;
4233		} else {
4234			goto value_invalid;
4235		}
4236		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4237		    "mmc-gesn-polling set to %d\n",
4238		    un->un_f_mmc_gesn_polling);
4239	}
4240
4241	return;
4242
4243value_invalid:
4244	SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4245	    "value of prop %s is invalid\n", name);
4246}
4247
4248/*
4249 *    Function: sd_get_tunables_from_conf()
4250 *
4251 *
4252 *    This function reads the data list from the sd.conf file and pulls
4253 *    the values that can have numeric values as arguments and places
4254 *    the values in the appropriate sd_tunables member.
4255 *    Since the order of the data list members varies across platforms
4256 *    This function reads them from the data list in a platform specific
4257 *    order and places them into the correct sd_tunable member that is
4258 *    consistent across all platforms.
4259 */
4260static void
4261sd_get_tunables_from_conf(struct sd_lun *un, int flags, int *data_list,
4262    sd_tunables *values)
4263{
4264	int i;
4265	int mask;
4266
4267	bzero(values, sizeof (sd_tunables));
4268
4269	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
4270
4271		mask = 1 << i;
4272		if (mask > flags) {
4273			break;
4274		}
4275
4276		switch (mask & flags) {
4277		case 0:	/* This mask bit not set in flags */
4278			continue;
4279		case SD_CONF_BSET_THROTTLE:
4280			values->sdt_throttle = data_list[i];
4281			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4282			    "sd_get_tunables_from_conf: throttle = %d\n",
4283			    values->sdt_throttle);
4284			break;
4285		case SD_CONF_BSET_CTYPE:
4286			values->sdt_ctype = data_list[i];
4287			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4288			    "sd_get_tunables_from_conf: ctype = %d\n",
4289			    values->sdt_ctype);
4290			break;
4291		case SD_CONF_BSET_NRR_COUNT:
4292			values->sdt_not_rdy_retries = data_list[i];
4293			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4294			    "sd_get_tunables_from_conf: not_rdy_retries = %d\n",
4295			    values->sdt_not_rdy_retries);
4296			break;
4297		case SD_CONF_BSET_BSY_RETRY_COUNT:
4298			values->sdt_busy_retries = data_list[i];
4299			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4300			    "sd_get_tunables_from_conf: busy_retries = %d\n",
4301			    values->sdt_busy_retries);
4302			break;
4303		case SD_CONF_BSET_RST_RETRIES:
4304			values->sdt_reset_retries = data_list[i];
4305			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4306			    "sd_get_tunables_from_conf: reset_retries = %d\n",
4307			    values->sdt_reset_retries);
4308			break;
4309		case SD_CONF_BSET_RSV_REL_TIME:
4310			values->sdt_reserv_rel_time = data_list[i];
4311			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4312			    "sd_get_tunables_from_conf: reserv_rel_time = %d\n",
4313			    values->sdt_reserv_rel_time);
4314			break;
4315		case SD_CONF_BSET_MIN_THROTTLE:
4316			values->sdt_min_throttle = data_list[i];
4317			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4318			    "sd_get_tunables_from_conf: min_throttle = %d\n",
4319			    values->sdt_min_throttle);
4320			break;
4321		case SD_CONF_BSET_DISKSORT_DISABLED:
4322			values->sdt_disk_sort_dis = data_list[i];
4323			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4324			    "sd_get_tunables_from_conf: disk_sort_dis = %d\n",
4325			    values->sdt_disk_sort_dis);
4326			break;
4327		case SD_CONF_BSET_LUN_RESET_ENABLED:
4328			values->sdt_lun_reset_enable = data_list[i];
4329			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4330			    "sd_get_tunables_from_conf: lun_reset_enable = %d"
4331			    "\n", values->sdt_lun_reset_enable);
4332			break;
4333		case SD_CONF_BSET_CACHE_IS_NV:
4334			values->sdt_suppress_cache_flush = data_list[i];
4335			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4336			    "sd_get_tunables_from_conf: \
4337			    suppress_cache_flush = %d"
4338			    "\n", values->sdt_suppress_cache_flush);
4339			break;
4340		case SD_CONF_BSET_PC_DISABLED:
4341			values->sdt_disk_sort_dis = data_list[i];
4342			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4343			    "sd_get_tunables_from_conf: power_condition_dis = "
4344			    "%d\n", values->sdt_power_condition_dis);
4345			break;
4346		}
4347	}
4348}
4349
4350/*
4351 *    Function: sd_process_sdconf_table
4352 *
4353 * Description: Search the static configuration table for a match on the
4354 *		inquiry vid/pid and update the driver soft state structure
4355 *		according to the table property values for the device.
4356 *
4357 *		The form of a configuration table entry is:
4358 *		  <vid+pid>,<flags>,<property-data>
4359 *		  "SEAGATE ST42400N",1,0x40000,
4360 *		  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1;
4361 *
4362 *   Arguments: un - driver soft state (unit) structure
4363 */
4364
4365static void
4366sd_process_sdconf_table(struct sd_lun *un)
4367{
4368	char	*id = NULL;
4369	int	table_index;
4370	int	idlen;
4371
4372	ASSERT(un != NULL);
4373	for (table_index = 0; table_index < sd_disk_table_size;
4374	    table_index++) {
4375		id = sd_disk_table[table_index].device_id;
4376		idlen = strlen(id);
4377		if (idlen == 0) {
4378			continue;
4379		}
4380
4381		/*
4382		 * The static configuration table currently does not
4383		 * implement version 10 properties. Additionally,
4384		 * multiple data-property-name entries are not
4385		 * implemented in the static configuration table.
4386		 */
4387		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
4388			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4389			    "sd_process_sdconf_table: disk %s\n", id);
4390			sd_set_vers1_properties(un,
4391			    sd_disk_table[table_index].flags,
4392			    sd_disk_table[table_index].properties);
4393			break;
4394		}
4395	}
4396}
4397
4398
4399/*
4400 *    Function: sd_sdconf_id_match
4401 *
4402 * Description: This local function implements a case sensitive vid/pid
4403 *		comparison as well as the boundary cases of wild card and
4404 *		multiple blanks.
4405 *
4406 *		Note: An implicit assumption made here is that the scsi
4407 *		inquiry structure will always keep the vid, pid and
4408 *		revision strings in consecutive sequence, so they can be
4409 *		read as a single string. If this assumption is not the
4410 *		case, a separate string, to be used for the check, needs
4411 *		to be built with these strings concatenated.
4412 *
4413 *   Arguments: un - driver soft state (unit) structure
4414 *		id - table or config file vid/pid
4415 *		idlen  - length of the vid/pid (bytes)
4416 *
4417 * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
4418 *		SD_FAILURE - Indicates no match with the inquiry vid/pid
4419 */
4420
4421static int
4422sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen)
4423{
4424	struct scsi_inquiry	*sd_inq;
4425	int 			rval = SD_SUCCESS;
4426
4427	ASSERT(un != NULL);
4428	sd_inq = un->un_sd->sd_inq;
4429	ASSERT(id != NULL);
4430
4431	/*
4432	 * We use the inq_vid as a pointer to a buffer containing the
4433	 * vid and pid and use the entire vid/pid length of the table
4434	 * entry for the comparison. This works because the inq_pid
4435	 * data member follows inq_vid in the scsi_inquiry structure.
4436	 */
4437	if (strncasecmp(sd_inq->inq_vid, id, idlen) != 0) {
4438		/*
4439		 * The user id string is compared to the inquiry vid/pid
4440		 * using a case insensitive comparison and ignoring
4441		 * multiple spaces.
4442		 */
4443		rval = sd_blank_cmp(un, id, idlen);
4444		if (rval != SD_SUCCESS) {
4445			/*
4446			 * User id strings that start and end with a "*"
4447			 * are a special case. These do not have a
4448			 * specific vendor, and the product string can
4449			 * appear anywhere in the 16 byte PID portion of
4450			 * the inquiry data. This is a simple strstr()
4451			 * type search for the user id in the inquiry data.
4452			 */
4453			if ((id[0] == '*') && (id[idlen - 1] == '*')) {
4454				char	*pidptr = &id[1];
4455				int	i;
4456				int	j;
4457				int	pidstrlen = idlen - 2;
4458				j = sizeof (SD_INQUIRY(un)->inq_pid) -
4459				    pidstrlen;
4460
4461				if (j < 0) {
4462					return (SD_FAILURE);
4463				}
4464				for (i = 0; i < j; i++) {
4465					if (bcmp(&SD_INQUIRY(un)->inq_pid[i],
4466					    pidptr, pidstrlen) == 0) {
4467						rval = SD_SUCCESS;
4468						break;
4469					}
4470				}
4471			}
4472		}
4473	}
4474	return (rval);
4475}
4476
4477
4478/*
4479 *    Function: sd_blank_cmp
4480 *
4481 * Description: If the id string starts and ends with a space, treat
4482 *		multiple consecutive spaces as equivalent to a single
4483 *		space. For example, this causes a sd_disk_table entry
4484 *		of " NEC CDROM " to match a device's id string of
4485 *		"NEC       CDROM".
4486 *
4487 *		Note: The success exit condition for this routine is if
4488 *		the pointer to the table entry is '\0' and the cnt of
4489 *		the inquiry length is zero. This will happen if the inquiry
4490 *		string returned by the device is padded with spaces to be
4491 *		exactly 24 bytes in length (8 byte vid + 16 byte pid). The
4492 *		SCSI spec states that the inquiry string is to be padded with
4493 *		spaces.
4494 *
4495 *   Arguments: un - driver soft state (unit) structure
4496 *		id - table or config file vid/pid
4497 *		idlen  - length of the vid/pid (bytes)
4498 *
4499 * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
4500 *		SD_FAILURE - Indicates no match with the inquiry vid/pid
4501 */
4502
4503static int
4504sd_blank_cmp(struct sd_lun *un, char *id, int idlen)
4505{
4506	char		*p1;
4507	char		*p2;
4508	int		cnt;
4509	cnt = sizeof (SD_INQUIRY(un)->inq_vid) +
4510	    sizeof (SD_INQUIRY(un)->inq_pid);
4511
4512	ASSERT(un != NULL);
4513	p2 = un->un_sd->sd_inq->inq_vid;
4514	ASSERT(id != NULL);
4515	p1 = id;
4516
4517	if ((id[0] == ' ') && (id[idlen - 1] == ' ')) {
4518		/*
4519		 * Note: string p1 is terminated by a NUL but string p2
4520		 * isn't.  The end of p2 is determined by cnt.
4521		 */
4522		for (;;) {
4523			/* skip over any extra blanks in both strings */
4524			while ((*p1 != '\0') && (*p1 == ' ')) {
4525				p1++;
4526			}
4527			while ((cnt != 0) && (*p2 == ' ')) {
4528				p2++;
4529				cnt--;
4530			}
4531
4532			/* compare the two strings */
4533			if ((cnt == 0) ||
4534			    (SD_TOUPPER(*p1) != SD_TOUPPER(*p2))) {
4535				break;
4536			}
4537			while ((cnt > 0) &&
4538			    (SD_TOUPPER(*p1) == SD_TOUPPER(*p2))) {
4539				p1++;
4540				p2++;
4541				cnt--;
4542			}
4543		}
4544	}
4545
4546	/* return SD_SUCCESS if both strings match */
4547	return (((*p1 == '\0') && (cnt == 0)) ? SD_SUCCESS : SD_FAILURE);
4548}
4549
4550
4551/*
4552 *    Function: sd_chk_vers1_data
4553 *
4554 * Description: Verify the version 1 device properties provided by the
4555 *		user via the configuration file
4556 *
4557 *   Arguments: un	     - driver soft state (unit) structure
4558 *		flags	     - integer mask indicating properties to be set
4559 *		prop_list    - integer list of property values
4560 *		list_len     - number of the elements
4561 *
4562 * Return Code: SD_SUCCESS - Indicates the user provided data is valid
4563 *		SD_FAILURE - Indicates the user provided data is invalid
4564 */
4565
4566static int
4567sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
4568    int list_len, char *dataname_ptr)
4569{
4570	int i;
4571	int mask = 1;
4572	int index = 0;
4573
4574	ASSERT(un != NULL);
4575
4576	/* Check for a NULL property name and list */
4577	if (dataname_ptr == NULL) {
4578		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4579		    "sd_chk_vers1_data: NULL data property name.");
4580		return (SD_FAILURE);
4581	}
4582	if (prop_list == NULL) {
4583		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4584		    "sd_chk_vers1_data: %s NULL data property list.",
4585		    dataname_ptr);
4586		return (SD_FAILURE);
4587	}
4588
4589	/* Display a warning if undefined bits are set in the flags */
4590	if (flags & ~SD_CONF_BIT_MASK) {
4591		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4592		    "sd_chk_vers1_data: invalid bits 0x%x in data list %s. "
4593		    "Properties not set.",
4594		    (flags & ~SD_CONF_BIT_MASK), dataname_ptr);
4595		return (SD_FAILURE);
4596	}
4597
4598	/*
4599	 * Verify the length of the list by identifying the highest bit set
4600	 * in the flags and validating that the property list has a length
4601	 * up to the index of this bit.
4602	 */
4603	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
4604		if (flags & mask) {
4605			index++;
4606		}
4607		mask = 1 << i;
4608	}
4609	if (list_len < (index + 2)) {
4610		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4611		    "sd_chk_vers1_data: "
4612		    "Data property list %s size is incorrect. "
4613		    "Properties not set.", dataname_ptr);
4614		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT, "Size expected: "
4615		    "version + 1 flagword + %d properties", SD_CONF_MAX_ITEMS);
4616		return (SD_FAILURE);
4617	}
4618	return (SD_SUCCESS);
4619}
4620
4621
4622/*
4623 *    Function: sd_set_vers1_properties
4624 *
4625 * Description: Set version 1 device properties based on a property list
4626 *		retrieved from the driver configuration file or static
4627 *		configuration table. Version 1 properties have the format:
4628 *
4629 * 	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
4630 *
4631 *		where the prop0 value will be used to set prop0 if bit0
4632 *		is set in the flags
4633 *
4634 *   Arguments: un	     - driver soft state (unit) structure
4635 *		flags	     - integer mask indicating properties to be set
4636 *		prop_list    - integer list of property values
4637 */
4638
4639static void
4640sd_set_vers1_properties(struct sd_lun *un, int flags, sd_tunables *prop_list)
4641{
4642	ASSERT(un != NULL);
4643
4644	/*
4645	 * Set the flag to indicate cache is to be disabled. An attempt
4646	 * to disable the cache via sd_cache_control() will be made
4647	 * later during attach once the basic initialization is complete.
4648	 */
4649	if (flags & SD_CONF_BSET_NOCACHE) {
4650		un->un_f_opt_disable_cache = TRUE;
4651		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4652		    "sd_set_vers1_properties: caching disabled flag set\n");
4653	}
4654
4655	/* CD-specific configuration parameters */
4656	if (flags & SD_CONF_BSET_PLAYMSF_BCD) {
4657		un->un_f_cfg_playmsf_bcd = TRUE;
4658		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4659		    "sd_set_vers1_properties: playmsf_bcd set\n");
4660	}
4661	if (flags & SD_CONF_BSET_READSUB_BCD) {
4662		un->un_f_cfg_readsub_bcd = TRUE;
4663		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4664		    "sd_set_vers1_properties: readsub_bcd set\n");
4665	}
4666	if (flags & SD_CONF_BSET_READ_TOC_TRK_BCD) {
4667		un->un_f_cfg_read_toc_trk_bcd = TRUE;
4668		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4669		    "sd_set_vers1_properties: read_toc_trk_bcd set\n");
4670	}
4671	if (flags & SD_CONF_BSET_READ_TOC_ADDR_BCD) {
4672		un->un_f_cfg_read_toc_addr_bcd = TRUE;
4673		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4674		    "sd_set_vers1_properties: read_toc_addr_bcd set\n");
4675	}
4676	if (flags & SD_CONF_BSET_NO_READ_HEADER) {
4677		un->un_f_cfg_no_read_header = TRUE;
4678		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4679		    "sd_set_vers1_properties: no_read_header set\n");
4680	}
4681	if (flags & SD_CONF_BSET_READ_CD_XD4) {
4682		un->un_f_cfg_read_cd_xd4 = TRUE;
4683		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4684		    "sd_set_vers1_properties: read_cd_xd4 set\n");
4685	}
4686
4687	/* Support for devices which do not have valid/unique serial numbers */
4688	if (flags & SD_CONF_BSET_FAB_DEVID) {
4689		un->un_f_opt_fab_devid = TRUE;
4690		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4691		    "sd_set_vers1_properties: fab_devid bit set\n");
4692	}
4693
4694	/* Support for user throttle configuration */
4695	if (flags & SD_CONF_BSET_THROTTLE) {
4696		ASSERT(prop_list != NULL);
4697		un->un_saved_throttle = un->un_throttle =
4698		    prop_list->sdt_throttle;
4699		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4700		    "sd_set_vers1_properties: throttle set to %d\n",
4701		    prop_list->sdt_throttle);
4702	}
4703
4704	/* Set the per disk retry count according to the conf file or table. */
4705	if (flags & SD_CONF_BSET_NRR_COUNT) {
4706		ASSERT(prop_list != NULL);
4707		if (prop_list->sdt_not_rdy_retries) {
4708			un->un_notready_retry_count =
4709			    prop_list->sdt_not_rdy_retries;
4710			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4711			    "sd_set_vers1_properties: not ready retry count"
4712			    " set to %d\n", un->un_notready_retry_count);
4713		}
4714	}
4715
4716	/* The controller type is reported for generic disk driver ioctls */
4717	if (flags & SD_CONF_BSET_CTYPE) {
4718		ASSERT(prop_list != NULL);
4719		switch (prop_list->sdt_ctype) {
4720		case CTYPE_CDROM:
4721			un->un_ctype = prop_list->sdt_ctype;
4722			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4723			    "sd_set_vers1_properties: ctype set to "
4724			    "CTYPE_CDROM\n");
4725			break;
4726		case CTYPE_CCS:
4727			un->un_ctype = prop_list->sdt_ctype;
4728			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4729			    "sd_set_vers1_properties: ctype set to "
4730			    "CTYPE_CCS\n");
4731			break;
4732		case CTYPE_ROD:		/* RW optical */
4733			un->un_ctype = prop_list->sdt_ctype;
4734			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4735			    "sd_set_vers1_properties: ctype set to "
4736			    "CTYPE_ROD\n");
4737			break;
4738		default:
4739			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4740			    "sd_set_vers1_properties: Could not set "
4741			    "invalid ctype value (%d)",
4742			    prop_list->sdt_ctype);
4743		}
4744	}
4745
4746	/* Purple failover timeout */
4747	if (flags & SD_CONF_BSET_BSY_RETRY_COUNT) {
4748		ASSERT(prop_list != NULL);
4749		un->un_busy_retry_count =
4750		    prop_list->sdt_busy_retries;
4751		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4752		    "sd_set_vers1_properties: "
4753		    "busy retry count set to %d\n",
4754		    un->un_busy_retry_count);
4755	}
4756
4757	/* Purple reset retry count */
4758	if (flags & SD_CONF_BSET_RST_RETRIES) {
4759		ASSERT(prop_list != NULL);
4760		un->un_reset_retry_count =
4761		    prop_list->sdt_reset_retries;
4762		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4763		    "sd_set_vers1_properties: "
4764		    "reset retry count set to %d\n",
4765		    un->un_reset_retry_count);
4766	}
4767
4768	/* Purple reservation release timeout */
4769	if (flags & SD_CONF_BSET_RSV_REL_TIME) {
4770		ASSERT(prop_list != NULL);
4771		un->un_reserve_release_time =
4772		    prop_list->sdt_reserv_rel_time;
4773		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4774		    "sd_set_vers1_properties: "
4775		    "reservation release timeout set to %d\n",
4776		    un->un_reserve_release_time);
4777	}
4778
4779	/*
4780	 * Driver flag telling the driver to verify that no commands are pending
4781	 * for a device before issuing a Test Unit Ready. This is a workaround
4782	 * for a firmware bug in some Seagate eliteI drives.
4783	 */
4784	if (flags & SD_CONF_BSET_TUR_CHECK) {
4785		un->un_f_cfg_tur_check = TRUE;
4786		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4787		    "sd_set_vers1_properties: tur queue check set\n");
4788	}
4789
4790	if (flags & SD_CONF_BSET_MIN_THROTTLE) {
4791		un->un_min_throttle = prop_list->sdt_min_throttle;
4792		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4793		    "sd_set_vers1_properties: min throttle set to %d\n",
4794		    un->un_min_throttle);
4795	}
4796
4797	if (flags & SD_CONF_BSET_DISKSORT_DISABLED) {
4798		un->un_f_disksort_disabled =
4799		    (prop_list->sdt_disk_sort_dis != 0) ?
4800		    TRUE : FALSE;
4801		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4802		    "sd_set_vers1_properties: disksort disabled "
4803		    "flag set to %d\n",
4804		    prop_list->sdt_disk_sort_dis);
4805	}
4806
4807	if (flags & SD_CONF_BSET_LUN_RESET_ENABLED) {
4808		un->un_f_lun_reset_enabled =
4809		    (prop_list->sdt_lun_reset_enable != 0) ?
4810		    TRUE : FALSE;
4811		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4812		    "sd_set_vers1_properties: lun reset enabled "
4813		    "flag set to %d\n",
4814		    prop_list->sdt_lun_reset_enable);
4815	}
4816
4817	if (flags & SD_CONF_BSET_CACHE_IS_NV) {
4818		un->un_f_suppress_cache_flush =
4819		    (prop_list->sdt_suppress_cache_flush != 0) ?
4820		    TRUE : FALSE;
4821		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4822		    "sd_set_vers1_properties: suppress_cache_flush "
4823		    "flag set to %d\n",
4824		    prop_list->sdt_suppress_cache_flush);
4825	}
4826
4827	if (flags & SD_CONF_BSET_PC_DISABLED) {
4828		un->un_f_power_condition_disabled =
4829		    (prop_list->sdt_power_condition_dis != 0) ?
4830		    TRUE : FALSE;
4831		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4832		    "sd_set_vers1_properties: power_condition_disabled "
4833		    "flag set to %d\n",
4834		    prop_list->sdt_power_condition_dis);
4835	}
4836
4837	/*
4838	 * Validate the throttle values.
4839	 * If any of the numbers are invalid, set everything to defaults.
4840	 */
4841	if ((un->un_throttle < SD_LOWEST_VALID_THROTTLE) ||
4842	    (un->un_min_throttle < SD_LOWEST_VALID_THROTTLE) ||
4843	    (un->un_min_throttle > un->un_throttle)) {
4844		un->un_saved_throttle = un->un_throttle = sd_max_throttle;
4845		un->un_min_throttle = sd_min_throttle;
4846	}
4847}
4848
4849/*
4850 *   Function: sd_is_lsi()
4851 *
4852 *   Description: Check for lsi devices, step through the static device
4853 *	table to match vid/pid.
4854 *
4855 *   Args: un - ptr to sd_lun
4856 *
4857 *   Notes:  When creating new LSI property, need to add the new LSI property
4858 *		to this function.
4859 */
4860static void
4861sd_is_lsi(struct sd_lun *un)
4862{
4863	char	*id = NULL;
4864	int	table_index;
4865	int	idlen;
4866	void	*prop;
4867
4868	ASSERT(un != NULL);
4869	for (table_index = 0; table_index < sd_disk_table_size;
4870	    table_index++) {
4871		id = sd_disk_table[table_index].device_id;
4872		idlen = strlen(id);
4873		if (idlen == 0) {
4874			continue;
4875		}
4876
4877		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
4878			prop = sd_disk_table[table_index].properties;
4879			if (prop == &lsi_properties ||
4880			    prop == &lsi_oem_properties ||
4881			    prop == &lsi_properties_scsi ||
4882			    prop == &symbios_properties) {
4883				un->un_f_cfg_is_lsi = TRUE;
4884			}
4885			break;
4886		}
4887	}
4888}
4889
4890/*
4891 *    Function: sd_get_physical_geometry
4892 *
4893 * Description: Retrieve the MODE SENSE page 3 (Format Device Page) and
4894 *		MODE SENSE page 4 (Rigid Disk Drive Geometry Page) from the
4895 *		target, and use this information to initialize the physical
4896 *		geometry cache specified by pgeom_p.
4897 *
4898 *		MODE SENSE is an optional command, so failure in this case
4899 *		does not necessarily denote an error. We want to use the
4900 *		MODE SENSE commands to derive the physical geometry of the
4901 *		device, but if either command fails, the logical geometry is
4902 *		used as the fallback for disk label geometry in cmlb.
4903 *
4904 *		This requires that un->un_blockcount and un->un_tgt_blocksize
4905 *		have already been initialized for the current target and
4906 *		that the current values be passed as args so that we don't
4907 *		end up ever trying to use -1 as a valid value. This could
4908 *		happen if either value is reset while we're not holding
4909 *		the mutex.
4910 *
4911 *   Arguments: un - driver soft state (unit) structure
4912 *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4913 *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4914 *			to use the USCSI "direct" chain and bypass the normal
4915 *			command waitq.
4916 *
4917 *     Context: Kernel thread only (can sleep).
4918 */
4919
4920static int
4921sd_get_physical_geometry(struct sd_lun *un, cmlb_geom_t *pgeom_p,
4922	diskaddr_t capacity, int lbasize, int path_flag)
4923{
4924	struct	mode_format	*page3p;
4925	struct	mode_geometry	*page4p;
4926	struct	mode_header	*headerp;
4927	int	sector_size;
4928	int	nsect;
4929	int	nhead;
4930	int	ncyl;
4931	int	intrlv;
4932	int	spc;
4933	diskaddr_t	modesense_capacity;
4934	int	rpm;
4935	int	bd_len;
4936	int	mode_header_length;
4937	uchar_t	*p3bufp;
4938	uchar_t	*p4bufp;
4939	int	cdbsize;
4940	int 	ret = EIO;
4941	sd_ssc_t *ssc;
4942	int	status;
4943
4944	ASSERT(un != NULL);
4945
4946	if (lbasize == 0) {
4947		if (ISCD(un)) {
4948			lbasize = 2048;
4949		} else {
4950			lbasize = un->un_sys_blocksize;
4951		}
4952	}
4953	pgeom_p->g_secsize = (unsigned short)lbasize;
4954
4955	/*
4956	 * If the unit is a cd/dvd drive MODE SENSE page three
4957	 * and MODE SENSE page four are reserved (see SBC spec
4958	 * and MMC spec). To prevent soft errors just return
4959	 * using the default LBA size.
4960	 */
4961	if (ISCD(un))
4962		return (ret);
4963
4964	cdbsize = (un->un_f_cfg_is_atapi == TRUE) ? CDB_GROUP2 : CDB_GROUP0;
4965
4966	/*
4967	 * Retrieve MODE SENSE page 3 - Format Device Page
4968	 */
4969	p3bufp = kmem_zalloc(SD_MODE_SENSE_PAGE3_LENGTH, KM_SLEEP);
4970	ssc = sd_ssc_init(un);
4971	status = sd_send_scsi_MODE_SENSE(ssc, cdbsize, p3bufp,
4972	    SD_MODE_SENSE_PAGE3_LENGTH, SD_MODE_SENSE_PAGE3_CODE, path_flag);
4973	if (status != 0) {
4974		SD_ERROR(SD_LOG_COMMON, un,
4975		    "sd_get_physical_geometry: mode sense page 3 failed\n");
4976		goto page3_exit;
4977	}
4978
4979	/*
4980	 * Determine size of Block Descriptors in order to locate the mode
4981	 * page data.  ATAPI devices return 0, SCSI devices should return
4982	 * MODE_BLK_DESC_LENGTH.
4983	 */
4984	headerp = (struct mode_header *)p3bufp;
4985	if (un->un_f_cfg_is_atapi == TRUE) {
4986		struct mode_header_grp2 *mhp =
4987		    (struct mode_header_grp2 *)headerp;
4988		mode_header_length = MODE_HEADER_LENGTH_GRP2;
4989		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
4990	} else {
4991		mode_header_length = MODE_HEADER_LENGTH;
4992		bd_len = ((struct mode_header *)headerp)->bdesc_length;
4993	}
4994
4995	if (bd_len > MODE_BLK_DESC_LENGTH) {
4996		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, SD_LOG_COMMON,
4997		    "sd_get_physical_geometry: received unexpected bd_len "
4998		    "of %d, page3\n", bd_len);
4999		status = EIO;
5000		goto page3_exit;
5001	}
5002
5003	page3p = (struct mode_format *)
5004	    ((caddr_t)headerp + mode_header_length + bd_len);
5005
5006	if (page3p->mode_page.code != SD_MODE_SENSE_PAGE3_CODE) {
5007		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, SD_LOG_COMMON,
5008		    "sd_get_physical_geometry: mode sense pg3 code mismatch "
5009		    "%d\n", page3p->mode_page.code);
5010		status = EIO;
5011		goto page3_exit;
5012	}
5013
5014	/*
5015	 * Use this physical geometry data only if BOTH MODE SENSE commands
5016	 * complete successfully; otherwise, revert to the logical geometry.
5017	 * So, we need to save everything in temporary variables.
5018	 */
5019	sector_size = BE_16(page3p->data_bytes_sect);
5020
5021	/*
5022	 * 1243403: The NEC D38x7 drives do not support MODE SENSE sector size
5023	 */
5024	if (sector_size == 0) {
5025		sector_size = un->un_sys_blocksize;
5026	} else {
5027		sector_size &= ~(un->un_sys_blocksize - 1);
5028	}
5029
5030	nsect  = BE_16(page3p->sect_track);
5031	intrlv = BE_16(page3p->interleave);
5032
5033	SD_INFO(SD_LOG_COMMON, un,
5034	    "sd_get_physical_geometry: Format Parameters (page 3)\n");
5035	SD_INFO(SD_LOG_COMMON, un,
5036	    "   mode page: %d; nsect: %d; sector size: %d;\n",
5037	    page3p->mode_page.code, nsect, sector_size);
5038	SD_INFO(SD_LOG_COMMON, un,
5039	    "   interleave: %d; track skew: %d; cylinder skew: %d;\n", intrlv,
5040	    BE_16(page3p->track_skew),
5041	    BE_16(page3p->cylinder_skew));
5042
5043	sd_ssc_assessment(ssc, SD_FMT_STANDARD);
5044
5045	/*
5046	 * Retrieve MODE SENSE page 4 - Rigid Disk Drive Geometry Page
5047	 */
5048	p4bufp = kmem_zalloc(SD_MODE_SENSE_PAGE4_LENGTH, KM_SLEEP);
5049	status = sd_send_scsi_MODE_SENSE(ssc, cdbsize, p4bufp,
5050	    SD_MODE_SENSE_PAGE4_LENGTH, SD_MODE_SENSE_PAGE4_CODE, path_flag);
5051	if (status != 0) {
5052		SD_ERROR(SD_LOG_COMMON, un,
5053		    "sd_get_physical_geometry: mode sense page 4 failed\n");
5054		goto page4_exit;
5055	}
5056
5057	/*
5058	 * Determine size of Block Descriptors in order to locate the mode
5059	 * page data.  ATAPI devices return 0, SCSI devices should return
5060	 * MODE_BLK_DESC_LENGTH.
5061	 */
5062	headerp = (struct mode_header *)p4bufp;
5063	if (un->un_f_cfg_is_atapi == TRUE) {
5064		struct mode_header_grp2 *mhp =
5065		    (struct mode_header_grp2 *)headerp;
5066		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
5067	} else {
5068		bd_len = ((struct mode_header *)headerp)->bdesc_length;
5069	}
5070
5071	if (bd_len > MODE_BLK_DESC_LENGTH) {
5072		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, SD_LOG_COMMON,
5073		    "sd_get_physical_geometry: received unexpected bd_len of "
5074		    "%d, page4\n", bd_len);
5075		status = EIO;
5076		goto page4_exit;
5077	}
5078
5079	page4p = (struct mode_geometry *)
5080	    ((caddr_t)headerp + mode_header_length + bd_len);
5081
5082	if (page4p->mode_page.code != SD_MODE_SENSE_PAGE4_CODE) {
5083		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, SD_LOG_COMMON,
5084		    "sd_get_physical_geometry: mode sense pg4 code mismatch "
5085		    "%d\n", page4p->mode_page.code);
5086		status = EIO;
5087		goto page4_exit;
5088	}
5089
5090	/*
5091	 * Stash the data now, after we know that both commands completed.
5092	 */
5093
5094
5095	nhead = (int)page4p->heads;	/* uchar, so no conversion needed */
5096	spc   = nhead * nsect;
5097	ncyl  = (page4p->cyl_ub << 16) + (page4p->cyl_mb << 8) + page4p->cyl_lb;
5098	rpm   = BE_16(page4p->rpm);
5099
5100	modesense_capacity = spc * ncyl;
5101
5102	SD_INFO(SD_LOG_COMMON, un,
5103	    "sd_get_physical_geometry: Geometry Parameters (page 4)\n");
5104	SD_INFO(SD_LOG_COMMON, un,
5105	    "   cylinders: %d; heads: %d; rpm: %d;\n", ncyl, nhead, rpm);
5106	SD_INFO(SD_LOG_COMMON, un,
5107	    "   computed capacity(h*s*c): %d;\n", modesense_capacity);
5108	SD_INFO(SD_LOG_COMMON, un, "   pgeom_p: %p; read cap: %d\n",
5109	    (void *)pgeom_p, capacity);
5110
5111	/*
5112	 * Compensate if the drive's geometry is not rectangular, i.e.,
5113	 * the product of C * H * S returned by MODE SENSE >= that returned
5114	 * by read capacity. This is an idiosyncrasy of the original x86
5115	 * disk subsystem.
5116	 */
5117	if (modesense_capacity >= capacity) {
5118		SD_INFO(SD_LOG_COMMON, un,
5119		    "sd_get_physical_geometry: adjusting acyl; "
5120		    "old: %d; new: %d\n", pgeom_p->g_acyl,
5121		    (modesense_capacity - capacity + spc - 1) / spc);
5122		if (sector_size != 0) {
5123			/* 1243403: NEC D38x7 drives don't support sec size */
5124			pgeom_p->g_secsize = (unsigned short)sector_size;
5125		}
5126		pgeom_p->g_nsect    = (unsigned short)nsect;
5127		pgeom_p->g_nhead    = (unsigned short)nhead;
5128		pgeom_p->g_capacity = capacity;
5129		pgeom_p->g_acyl	    =
5130		    (modesense_capacity - pgeom_p->g_capacity + spc - 1) / spc;
5131		pgeom_p->g_ncyl	    = ncyl - pgeom_p->g_acyl;
5132	}
5133
5134	pgeom_p->g_rpm    = (unsigned short)rpm;
5135	pgeom_p->g_intrlv = (unsigned short)intrlv;
5136	ret = 0;
5137
5138	SD_INFO(SD_LOG_COMMON, un,
5139	    "sd_get_physical_geometry: mode sense geometry:\n");
5140	SD_INFO(SD_LOG_COMMON, un,
5141	    "   nsect: %d; sector size: %d; interlv: %d\n",
5142	    nsect, sector_size, intrlv);
5143	SD_INFO(SD_LOG_COMMON, un,
5144	    "   nhead: %d; ncyl: %d; rpm: %d; capacity(ms): %d\n",
5145	    nhead, ncyl, rpm, modesense_capacity);
5146	SD_INFO(SD_LOG_COMMON, un,
5147	    "sd_get_physical_geometry: (cached)\n");
5148	SD_INFO(SD_LOG_COMMON, un,
5149	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
5150	    pgeom_p->g_ncyl,  pgeom_p->g_acyl,
5151	    pgeom_p->g_nhead, pgeom_p->g_nsect);
5152	SD_INFO(SD_LOG_COMMON, un,
5153	    "   lbasize: %d; capacity: %ld; intrlv: %d; rpm: %d\n",
5154	    pgeom_p->g_secsize, pgeom_p->g_capacity,
5155	    pgeom_p->g_intrlv, pgeom_p->g_rpm);
5156	sd_ssc_assessment(ssc, SD_FMT_STANDARD);
5157
5158page4_exit:
5159	kmem_free(p4bufp, SD_MODE_SENSE_PAGE4_LENGTH);
5160
5161page3_exit:
5162	kmem_free(p3bufp, SD_MODE_SENSE_PAGE3_LENGTH);
5163
5164	if (status != 0) {
5165		if (status == EIO) {
5166			/*
5167			 * Some disks do not support mode sense(6), we
5168			 * should ignore this kind of error(sense key is
5169			 * 0x5 - illegal request).
5170			 */
5171			uint8_t *sensep;
5172			int senlen;
5173
5174			sensep = (uint8_t *)ssc->ssc_uscsi_cmd->uscsi_rqbuf;
5175			senlen = (int)(ssc->ssc_uscsi_cmd->uscsi_rqlen -
5176			    ssc->ssc_uscsi_cmd->uscsi_rqresid);
5177
5178			if (senlen > 0 &&
5179			    scsi_sense_key(sensep) == KEY_ILLEGAL_REQUEST) {
5180				sd_ssc_assessment(ssc,
5181				    SD_FMT_IGNORE_COMPROMISE);
5182			} else {
5183				sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
5184			}
5185		} else {
5186			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5187		}
5188	}
5189	sd_ssc_fini(ssc);
5190	return (ret);
5191}
5192
5193/*
5194 *    Function: sd_get_virtual_geometry
5195 *
5196 * Description: Ask the controller to tell us about the target device.
5197 *
5198 *   Arguments: un - pointer to softstate
5199 *		capacity - disk capacity in #blocks
5200 *		lbasize - disk block size in bytes
5201 *
5202 *     Context: Kernel thread only
5203 */
5204
5205static int
5206sd_get_virtual_geometry(struct sd_lun *un, cmlb_geom_t *lgeom_p,
5207    diskaddr_t capacity, int lbasize)
5208{
5209	uint_t	geombuf;
5210	int	spc;
5211
5212	ASSERT(un != NULL);
5213
5214	/* Set sector size, and total number of sectors */
5215	(void) scsi_ifsetcap(SD_ADDRESS(un), "sector-size",   lbasize,  1);
5216	(void) scsi_ifsetcap(SD_ADDRESS(un), "total-sectors", capacity, 1);
5217
5218	/* Let the HBA tell us its geometry */
5219	geombuf = (uint_t)scsi_ifgetcap(SD_ADDRESS(un), "geometry", 1);
5220
5221	/* A value of -1 indicates an undefined "geometry" property */
5222	if (geombuf == (-1)) {
5223		return (EINVAL);
5224	}
5225
5226	/* Initialize the logical geometry cache. */
5227	lgeom_p->g_nhead   = (geombuf >> 16) & 0xffff;
5228	lgeom_p->g_nsect   = geombuf & 0xffff;
5229	lgeom_p->g_secsize = un->un_sys_blocksize;
5230
5231	spc = lgeom_p->g_nhead * lgeom_p->g_nsect;
5232
5233	/*
5234	 * Note: The driver originally converted the capacity value from
5235	 * target blocks to system blocks. However, the capacity value passed
5236	 * to this routine is already in terms of system blocks (this scaling
5237	 * is done when the READ CAPACITY command is issued and processed).
5238	 * This 'error' may have gone undetected because the usage of g_ncyl
5239	 * (which is based upon g_capacity) is very limited within the driver
5240	 */
5241	lgeom_p->g_capacity = capacity;
5242
5243	/*
5244	 * Set ncyl to zero if the hba returned a zero nhead or nsect value. The
5245	 * hba may return zero values if the device has been removed.
5246	 */
5247	if (spc == 0) {
5248		lgeom_p->g_ncyl = 0;
5249	} else {
5250		lgeom_p->g_ncyl = lgeom_p->g_capacity / spc;
5251	}
5252	lgeom_p->g_acyl = 0;
5253
5254	SD_INFO(SD_LOG_COMMON, un, "sd_get_virtual_geometry: (cached)\n");
5255	return (0);
5256
5257}
5258/*
5259 *    Function: sd_update_block_info
5260 *
5261 * Description: Calculate a byte count to sector count bitshift value
5262 *		from sector size.
5263 *
5264 *   Arguments: un: unit struct.
5265 *		lbasize: new target sector size
5266 *		capacity: new target capacity, ie. block count
5267 *
5268 *     Context: Kernel thread context
5269 */
5270
5271static void
5272sd_update_block_info(struct sd_lun *un, uint32_t lbasize, uint64_t capacity)
5273{
5274	if (lbasize != 0) {
5275		un->un_tgt_blocksize = lbasize;
5276		un->un_f_tgt_blocksize_is_valid = TRUE;
5277		if (!un->un_f_has_removable_media) {
5278			un->un_sys_blocksize = lbasize;
5279		}
5280	}
5281
5282	if (capacity != 0) {
5283		un->un_blockcount		= capacity;
5284		un->un_f_blockcount_is_valid	= TRUE;
5285	}
5286}
5287
5288
5289/*
5290 *    Function: sd_register_devid
5291 *
5292 * Description: This routine will obtain the device id information from the
5293 *		target, obtain the serial number, and register the device
5294 *		id with the ddi framework.
5295 *
5296 *   Arguments: devi - the system's dev_info_t for the device.
5297 *		un - driver soft state (unit) structure
5298 *		reservation_flag - indicates if a reservation conflict
5299 *		occurred during attach
5300 *
5301 *     Context: Kernel Thread
5302 */
5303static void
5304sd_register_devid(sd_ssc_t *ssc, dev_info_t *devi, int reservation_flag)
5305{
5306	int		rval		= 0;
5307	uchar_t		*inq80		= NULL;
5308	size_t		inq80_len	= MAX_INQUIRY_SIZE;
5309	size_t		inq80_resid	= 0;
5310	uchar_t		*inq83		= NULL;
5311	size_t		inq83_len	= MAX_INQUIRY_SIZE;
5312	size_t		inq83_resid	= 0;
5313	int		dlen, len;
5314	char		*sn;
5315	struct sd_lun	*un;
5316
5317	ASSERT(ssc != NULL);
5318	un = ssc->ssc_un;
5319	ASSERT(un != NULL);
5320	ASSERT(mutex_owned(SD_MUTEX(un)));
5321	ASSERT((SD_DEVINFO(un)) == devi);
5322
5323
5324	/*
5325	 * We check the availability of the World Wide Name (0x83) and Unit
5326	 * Serial Number (0x80) pages in sd_check_vpd_page_support(), and using
5327	 * un_vpd_page_mask from them, we decide which way to get the WWN.  If
5328	 * 0x83 is available, that is the best choice.  Our next choice is
5329	 * 0x80.  If neither are available, we munge the devid from the device
5330	 * vid/pid/serial # for Sun qualified disks, or use the ddi framework
5331	 * to fabricate a devid for non-Sun qualified disks.
5332	 */
5333	if (sd_check_vpd_page_support(ssc) == 0) {
5334		/* collect page 80 data if available */
5335		if (un->un_vpd_page_mask & SD_VPD_UNIT_SERIAL_PG) {
5336
5337			mutex_exit(SD_MUTEX(un));
5338			inq80 = kmem_zalloc(inq80_len, KM_SLEEP);
5339
5340			rval = sd_send_scsi_INQUIRY(ssc, inq80, inq80_len,
5341			    0x01, 0x80, &inq80_resid);
5342
5343			if (rval != 0) {
5344				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5345				kmem_free(inq80, inq80_len);
5346				inq80 = NULL;
5347				inq80_len = 0;
5348			} else if (ddi_prop_exists(
5349			    DDI_DEV_T_NONE, SD_DEVINFO(un),
5350			    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
5351			    INQUIRY_SERIAL_NO) == 0) {
5352				/*
5353				 * If we don't already have a serial number
5354				 * property, do quick verify of data returned
5355				 * and define property.
5356				 */
5357				dlen = inq80_len - inq80_resid;
5358				len = (size_t)inq80[3];
5359				if ((dlen >= 4) && ((len + 4) <= dlen)) {
5360					/*
5361					 * Ensure sn termination, skip leading
5362					 * blanks, and create property
5363					 * 'inquiry-serial-no'.
5364					 */
5365					sn = (char *)&inq80[4];
5366					sn[len] = 0;
5367					while (*sn && (*sn == ' '))
5368						sn++;
5369					if (*sn) {
5370						(void) ddi_prop_update_string(
5371						    DDI_DEV_T_NONE,
5372						    SD_DEVINFO(un),
5373						    INQUIRY_SERIAL_NO, sn);
5374					}
5375				}
5376			}
5377			mutex_enter(SD_MUTEX(un));
5378		}
5379
5380		/* collect page 83 data if available */
5381		if (un->un_vpd_page_mask & SD_VPD_DEVID_WWN_PG) {
5382			mutex_exit(SD_MUTEX(un));
5383			inq83 = kmem_zalloc(inq83_len, KM_SLEEP);
5384
5385			rval = sd_send_scsi_INQUIRY(ssc, inq83, inq83_len,
5386			    0x01, 0x83, &inq83_resid);
5387
5388			if (rval != 0) {
5389				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5390				kmem_free(inq83, inq83_len);
5391				inq83 = NULL;
5392				inq83_len = 0;
5393			}
5394			mutex_enter(SD_MUTEX(un));
5395		}
5396	}
5397
5398	/*
5399	 * If transport has already registered a devid for this target
5400	 * then that takes precedence over the driver's determination
5401	 * of the devid.
5402	 *
5403	 * NOTE: The reason this check is done here instead of at the beginning
5404	 * of the function is to allow the code above to create the
5405	 * 'inquiry-serial-no' property.
5406	 */
5407	if (ddi_devid_get(SD_DEVINFO(un), &un->un_devid) == DDI_SUCCESS) {
5408		ASSERT(un->un_devid);
5409		un->un_f_devid_transport_defined = TRUE;
5410		goto cleanup; /* use devid registered by the transport */
5411	}
5412
5413	/*
5414	 * This is the case of antiquated Sun disk drives that have the
5415	 * FAB_DEVID property set in the disk_table.  These drives
5416	 * manage the devid's by storing them in last 2 available sectors
5417	 * on the drive and have them fabricated by the ddi layer by calling
5418	 * ddi_devid_init and passing the DEVID_FAB flag.
5419	 */
5420	if (un->un_f_opt_fab_devid == TRUE) {
5421		/*
5422		 * Depending on EINVAL isn't reliable, since a reserved disk
5423		 * may result in invalid geometry, so check to make sure a
5424		 * reservation conflict did not occur during attach.
5425		 */
5426		if ((sd_get_devid(ssc) == EINVAL) &&
5427		    (reservation_flag != SD_TARGET_IS_RESERVED)) {
5428			/*
5429			 * The devid is invalid AND there is no reservation
5430			 * conflict.  Fabricate a new devid.
5431			 */
5432			(void) sd_create_devid(ssc);
5433		}
5434
5435		/* Register the devid if it exists */
5436		if (un->un_devid != NULL) {
5437			(void) ddi_devid_register(SD_DEVINFO(un),
5438			    un->un_devid);
5439			SD_INFO(SD_LOG_ATTACH_DETACH, un,
5440			    "sd_register_devid: Devid Fabricated\n");
5441		}
5442		goto cleanup;
5443	}
5444
5445	/* encode best devid possible based on data available */
5446	if (ddi_devid_scsi_encode(DEVID_SCSI_ENCODE_VERSION_LATEST,
5447	    (char *)ddi_driver_name(SD_DEVINFO(un)),
5448	    (uchar_t *)SD_INQUIRY(un), sizeof (*SD_INQUIRY(un)),
5449	    inq80, inq80_len - inq80_resid, inq83, inq83_len -
5450	    inq83_resid, &un->un_devid) == DDI_SUCCESS) {
5451
5452		/* devid successfully encoded, register devid */
5453		(void) ddi_devid_register(SD_DEVINFO(un), un->un_devid);
5454
5455	} else {
5456		/*
5457		 * Unable to encode a devid based on data available.
5458		 * This is not a Sun qualified disk.  Older Sun disk
5459		 * drives that have the SD_FAB_DEVID property
5460		 * set in the disk_table and non Sun qualified
5461		 * disks are treated in the same manner.  These
5462		 * drives manage the devid's by storing them in
5463		 * last 2 available sectors on the drive and
5464		 * have them fabricated by the ddi layer by
5465		 * calling ddi_devid_init and passing the
5466		 * DEVID_FAB flag.
5467		 * Create a fabricate devid only if there's no
5468		 * fabricate devid existed.
5469		 */
5470		if (sd_get_devid(ssc) == EINVAL) {
5471			(void) sd_create_devid(ssc);
5472		}
5473		un->un_f_opt_fab_devid = TRUE;
5474
5475		/* Register the devid if it exists */
5476		if (un->un_devid != NULL) {
5477			(void) ddi_devid_register(SD_DEVINFO(un),
5478			    un->un_devid);
5479			SD_INFO(SD_LOG_ATTACH_DETACH, un,
5480			    "sd_register_devid: devid fabricated using "
5481			    "ddi framework\n");
5482		}
5483	}
5484
5485cleanup:
5486	/* clean up resources */
5487	if (inq80 != NULL) {
5488		kmem_free(inq80, inq80_len);
5489	}
5490	if (inq83 != NULL) {
5491		kmem_free(inq83, inq83_len);
5492	}
5493}
5494
5495
5496
5497/*
5498 *    Function: sd_get_devid
5499 *
5500 * Description: This routine will return 0 if a valid device id has been
5501 *		obtained from the target and stored in the soft state. If a
5502 *		valid device id has not been previously read and stored, a
5503 *		read attempt will be made.
5504 *
5505 *   Arguments: un - driver soft state (unit) structure
5506 *
5507 * Return Code: 0 if we successfully get the device id
5508 *
5509 *     Context: Kernel Thread
5510 */
5511
5512static int
5513sd_get_devid(sd_ssc_t *ssc)
5514{
5515	struct dk_devid		*dkdevid;
5516	ddi_devid_t		tmpid;
5517	uint_t			*ip;
5518	size_t			sz;
5519	diskaddr_t		blk;
5520	int			status;
5521	int			chksum;
5522	int			i;
5523	size_t			buffer_size;
5524	struct sd_lun		*un;
5525
5526	ASSERT(ssc != NULL);
5527	un = ssc->ssc_un;
5528	ASSERT(un != NULL);
5529	ASSERT(mutex_owned(SD_MUTEX(un)));
5530
5531	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: entry: un: 0x%p\n",
5532	    un);
5533
5534	if (un->un_devid != NULL) {
5535		return (0);
5536	}
5537
5538	mutex_exit(SD_MUTEX(un));
5539	if (cmlb_get_devid_block(un->un_cmlbhandle, &blk,
5540	    (void *)SD_PATH_DIRECT) != 0) {
5541		mutex_enter(SD_MUTEX(un));
5542		return (EINVAL);
5543	}
5544
5545	/*
5546	 * Read and verify device id, stored in the reserved cylinders at the
5547	 * end of the disk. Backup label is on the odd sectors of the last
5548	 * track of the last cylinder. Device id will be on track of the next
5549	 * to last cylinder.
5550	 */
5551	mutex_enter(SD_MUTEX(un));
5552	buffer_size = SD_REQBYTES2TGTBYTES(un, sizeof (struct dk_devid));
5553	mutex_exit(SD_MUTEX(un));
5554	dkdevid = kmem_alloc(buffer_size, KM_SLEEP);
5555	status = sd_send_scsi_READ(ssc, dkdevid, buffer_size, blk,
5556	    SD_PATH_DIRECT);
5557
5558	if (status != 0) {
5559		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5560		goto error;
5561	}
5562
5563	/* Validate the revision */
5564	if ((dkdevid->dkd_rev_hi != DK_DEVID_REV_MSB) ||
5565	    (dkdevid->dkd_rev_lo != DK_DEVID_REV_LSB)) {
5566		status = EINVAL;
5567		goto error;
5568	}
5569
5570	/* Calculate the checksum */
5571	chksum = 0;
5572	ip = (uint_t *)dkdevid;
5573	for (i = 0; i < ((DEV_BSIZE - sizeof (int)) / sizeof (int));
5574	    i++) {
5575		chksum ^= ip[i];
5576	}
5577
5578	/* Compare the checksums */
5579	if (DKD_GETCHKSUM(dkdevid) != chksum) {
5580		status = EINVAL;
5581		goto error;
5582	}
5583
5584	/* Validate the device id */
5585	if (ddi_devid_valid((ddi_devid_t)&dkdevid->dkd_devid) != DDI_SUCCESS) {
5586		status = EINVAL;
5587		goto error;
5588	}
5589
5590	/*
5591	 * Store the device id in the driver soft state
5592	 */
5593	sz = ddi_devid_sizeof((ddi_devid_t)&dkdevid->dkd_devid);
5594	tmpid = kmem_alloc(sz, KM_SLEEP);
5595
5596	mutex_enter(SD_MUTEX(un));
5597
5598	un->un_devid = tmpid;
5599	bcopy(&dkdevid->dkd_devid, un->un_devid, sz);
5600
5601	kmem_free(dkdevid, buffer_size);
5602
5603	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: exit: un:0x%p\n", un);
5604
5605	return (status);
5606error:
5607	mutex_enter(SD_MUTEX(un));
5608	kmem_free(dkdevid, buffer_size);
5609	return (status);
5610}
5611
5612
5613/*
5614 *    Function: sd_create_devid
5615 *
5616 * Description: This routine will fabricate the device id and write it
5617 *		to the disk.
5618 *
5619 *   Arguments: un - driver soft state (unit) structure
5620 *
5621 * Return Code: value of the fabricated device id
5622 *
5623 *     Context: Kernel Thread
5624 */
5625
5626static ddi_devid_t
5627sd_create_devid(sd_ssc_t *ssc)
5628{
5629	struct sd_lun	*un;
5630
5631	ASSERT(ssc != NULL);
5632	un = ssc->ssc_un;
5633	ASSERT(un != NULL);
5634
5635	/* Fabricate the devid */
5636	if (ddi_devid_init(SD_DEVINFO(un), DEVID_FAB, 0, NULL, &un->un_devid)
5637	    == DDI_FAILURE) {
5638		return (NULL);
5639	}
5640
5641	/* Write the devid to disk */
5642	if (sd_write_deviceid(ssc) != 0) {
5643		ddi_devid_free(un->un_devid);
5644		un->un_devid = NULL;
5645	}
5646
5647	return (un->un_devid);
5648}
5649
5650
5651/*
5652 *    Function: sd_write_deviceid
5653 *
5654 * Description: This routine will write the device id to the disk
5655 *		reserved sector.
5656 *
5657 *   Arguments: un - driver soft state (unit) structure
5658 *
5659 * Return Code: EINVAL
5660 *		value returned by sd_send_scsi_cmd
5661 *
5662 *     Context: Kernel Thread
5663 */
5664
5665static int
5666sd_write_deviceid(sd_ssc_t *ssc)
5667{
5668	struct dk_devid		*dkdevid;
5669	uchar_t			*buf;
5670	diskaddr_t		blk;
5671	uint_t			*ip, chksum;
5672	int			status;
5673	int			i;
5674	struct sd_lun		*un;
5675
5676	ASSERT(ssc != NULL);
5677	un = ssc->ssc_un;
5678	ASSERT(un != NULL);
5679	ASSERT(mutex_owned(SD_MUTEX(un)));
5680
5681	mutex_exit(SD_MUTEX(un));
5682	if (cmlb_get_devid_block(un->un_cmlbhandle, &blk,
5683	    (void *)SD_PATH_DIRECT) != 0) {
5684		mutex_enter(SD_MUTEX(un));
5685		return (-1);
5686	}
5687
5688
5689	/* Allocate the buffer */
5690	buf = kmem_zalloc(un->un_sys_blocksize, KM_SLEEP);
5691	dkdevid = (struct dk_devid *)buf;
5692
5693	/* Fill in the revision */
5694	dkdevid->dkd_rev_hi = DK_DEVID_REV_MSB;
5695	dkdevid->dkd_rev_lo = DK_DEVID_REV_LSB;
5696
5697	/* Copy in the device id */
5698	mutex_enter(SD_MUTEX(un));
5699	bcopy(un->un_devid, &dkdevid->dkd_devid,
5700	    ddi_devid_sizeof(un->un_devid));
5701	mutex_exit(SD_MUTEX(un));
5702
5703	/* Calculate the checksum */
5704	chksum = 0;
5705	ip = (uint_t *)dkdevid;
5706	for (i = 0; i < ((DEV_BSIZE - sizeof (int)) / sizeof (int));
5707	    i++) {
5708		chksum ^= ip[i];
5709	}
5710
5711	/* Fill-in checksum */
5712	DKD_FORMCHKSUM(chksum, dkdevid);
5713
5714	/* Write the reserved sector */
5715	status = sd_send_scsi_WRITE(ssc, buf, un->un_sys_blocksize, blk,
5716	    SD_PATH_DIRECT);
5717	if (status != 0)
5718		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5719
5720	kmem_free(buf, un->un_sys_blocksize);
5721
5722	mutex_enter(SD_MUTEX(un));
5723	return (status);
5724}
5725
5726
5727/*
5728 *    Function: sd_check_vpd_page_support
5729 *
5730 * Description: This routine sends an inquiry command with the EVPD bit set and
5731 *		a page code of 0x00 to the device. It is used to determine which
5732 *		vital product pages are available to find the devid. We are
5733 *		looking for pages 0x83 0x80 or 0xB1.  If we return a negative 1,
5734 *		the device does not support that command.
5735 *
5736 *   Arguments: un  - driver soft state (unit) structure
5737 *
5738 * Return Code: 0 - success
5739 *		1 - check condition
5740 *
5741 *     Context: This routine can sleep.
5742 */
5743
5744static int
5745sd_check_vpd_page_support(sd_ssc_t *ssc)
5746{
5747	uchar_t	*page_list	= NULL;
5748	uchar_t	page_length	= 0xff;	/* Use max possible length */
5749	uchar_t	evpd		= 0x01;	/* Set the EVPD bit */
5750	uchar_t	page_code	= 0x00;	/* Supported VPD Pages */
5751	int    	rval		= 0;
5752	int	counter;
5753	struct sd_lun		*un;
5754
5755	ASSERT(ssc != NULL);
5756	un = ssc->ssc_un;
5757	ASSERT(un != NULL);
5758	ASSERT(mutex_owned(SD_MUTEX(un)));
5759
5760	mutex_exit(SD_MUTEX(un));
5761
5762	/*
5763	 * We'll set the page length to the maximum to save figuring it out
5764	 * with an additional call.
5765	 */
5766	page_list =  kmem_zalloc(page_length, KM_SLEEP);
5767
5768	rval = sd_send_scsi_INQUIRY(ssc, page_list, page_length, evpd,
5769	    page_code, NULL);
5770
5771	if (rval != 0)
5772		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5773
5774	mutex_enter(SD_MUTEX(un));
5775
5776	/*
5777	 * Now we must validate that the device accepted the command, as some
5778	 * drives do not support it.  If the drive does support it, we will
5779	 * return 0, and the supported pages will be in un_vpd_page_mask.  If
5780	 * not, we return -1.
5781	 */
5782	if ((rval == 0) && (page_list[VPD_MODE_PAGE] == 0x00)) {
5783		/* Loop to find one of the 2 pages we need */
5784		counter = 4;  /* Supported pages start at byte 4, with 0x00 */
5785
5786		/*
5787		 * Pages are returned in ascending order, and 0x83 is what we
5788		 * are hoping for.
5789		 */
5790		while ((page_list[counter] <= 0xB1) &&
5791		    (counter <= (page_list[VPD_PAGE_LENGTH] +
5792		    VPD_HEAD_OFFSET))) {
5793			/*
5794			 * Add 3 because page_list[3] is the number of
5795			 * pages minus 3
5796			 */
5797
5798			switch (page_list[counter]) {
5799			case 0x00:
5800				un->un_vpd_page_mask |= SD_VPD_SUPPORTED_PG;
5801				break;
5802			case 0x80:
5803				un->un_vpd_page_mask |= SD_VPD_UNIT_SERIAL_PG;
5804				break;
5805			case 0x81:
5806				un->un_vpd_page_mask |= SD_VPD_OPERATING_PG;
5807				break;
5808			case 0x82:
5809				un->un_vpd_page_mask |= SD_VPD_ASCII_OP_PG;
5810				break;
5811			case 0x83:
5812				un->un_vpd_page_mask |= SD_VPD_DEVID_WWN_PG;
5813				break;
5814			case 0x86:
5815				un->un_vpd_page_mask |= SD_VPD_EXTENDED_DATA_PG;
5816				break;
5817			case 0xB1:
5818				un->un_vpd_page_mask |= SD_VPD_DEV_CHARACTER_PG;
5819				break;
5820			}
5821			counter++;
5822		}
5823
5824	} else {
5825		rval = -1;
5826
5827		SD_INFO(SD_LOG_ATTACH_DETACH, un,
5828		    "sd_check_vpd_page_support: This drive does not implement "
5829		    "VPD pages.\n");
5830	}
5831
5832	kmem_free(page_list, page_length);
5833
5834	return (rval);
5835}
5836
5837
5838/*
5839 *    Function: sd_setup_pm
5840 *
5841 * Description: Initialize Power Management on the device
5842 *
5843 *     Context: Kernel Thread
5844 */
5845
5846static void
5847sd_setup_pm(sd_ssc_t *ssc, dev_info_t *devi)
5848{
5849	uint_t		log_page_size;
5850	uchar_t		*log_page_data;
5851	int		rval = 0;
5852	struct sd_lun	*un;
5853
5854	ASSERT(ssc != NULL);
5855	un = ssc->ssc_un;
5856	ASSERT(un != NULL);
5857
5858	/*
5859	 * Since we are called from attach, holding a mutex for
5860	 * un is unnecessary. Because some of the routines called
5861	 * from here require SD_MUTEX to not be held, assert this
5862	 * right up front.
5863	 */
5864	ASSERT(!mutex_owned(SD_MUTEX(un)));
5865	/*
5866	 * Since the sd device does not have the 'reg' property,
5867	 * cpr will not call its DDI_SUSPEND/DDI_RESUME entries.
5868	 * The following code is to tell cpr that this device
5869	 * DOES need to be suspended and resumed.
5870	 */
5871	(void) ddi_prop_update_string(DDI_DEV_T_NONE, devi,
5872	    "pm-hardware-state", "needs-suspend-resume");
5873
5874	/*
5875	 * This complies with the new power management framework
5876	 * for certain desktop machines. Create the pm_components
5877	 * property as a string array property.
5878	 * If un_f_pm_supported is TRUE, that means the disk
5879	 * attached HBA has set the "pm-capable" property and
5880	 * the value of this property is bigger than 0.
5881	 */
5882	if (un->un_f_pm_supported) {
5883		/*
5884		 * not all devices have a motor, try it first.
5885		 * some devices may return ILLEGAL REQUEST, some
5886		 * will hang
5887		 * The following START_STOP_UNIT is used to check if target
5888		 * device has a motor.
5889		 */
5890		un->un_f_start_stop_supported = TRUE;
5891
5892		if (un->un_f_power_condition_supported) {
5893			rval = sd_send_scsi_START_STOP_UNIT(ssc,
5894			    SD_POWER_CONDITION, SD_TARGET_ACTIVE,
5895			    SD_PATH_DIRECT);
5896			if (rval != 0) {
5897				un->un_f_power_condition_supported = FALSE;
5898			}
5899		}
5900		if (!un->un_f_power_condition_supported) {
5901			rval = sd_send_scsi_START_STOP_UNIT(ssc,
5902			    SD_START_STOP, SD_TARGET_START, SD_PATH_DIRECT);
5903		}
5904		if (rval != 0) {
5905			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5906			un->un_f_start_stop_supported = FALSE;
5907		}
5908
5909		/*
5910		 * create pm properties anyways otherwise the parent can't
5911		 * go to sleep
5912		 */
5913		un->un_f_pm_is_enabled = TRUE;
5914		(void) sd_create_pm_components(devi, un);
5915
5916		/*
5917		 * If it claims that log sense is supported, check it out.
5918		 */
5919		if (un->un_f_log_sense_supported) {
5920			rval = sd_log_page_supported(ssc,
5921			    START_STOP_CYCLE_PAGE);
5922			if (rval == 1) {
5923				/* Page found, use it. */
5924				un->un_start_stop_cycle_page =
5925				    START_STOP_CYCLE_PAGE;
5926			} else {
5927				/*
5928				 * Page not found or log sense is not
5929				 * supported.
5930				 * Notice we do not check the old style
5931				 * START_STOP_CYCLE_VU_PAGE because this
5932				 * code path does not apply to old disks.
5933				 */
5934				un->un_f_log_sense_supported = FALSE;
5935				un->un_f_pm_log_sense_smart = FALSE;
5936			}
5937		}
5938
5939		return;
5940	}
5941
5942	/*
5943	 * For the disk whose attached HBA has not set the "pm-capable"
5944	 * property, check if it supports the power management.
5945	 */
5946	if (!un->un_f_log_sense_supported) {
5947		un->un_power_level = SD_SPINDLE_ON;
5948		un->un_f_pm_is_enabled = FALSE;
5949		return;
5950	}
5951
5952	rval = sd_log_page_supported(ssc, START_STOP_CYCLE_PAGE);
5953
5954#ifdef	SDDEBUG
5955	if (sd_force_pm_supported) {
5956		/* Force a successful result */
5957		rval = 1;
5958	}
5959#endif
5960
5961	/*
5962	 * If the start-stop cycle counter log page is not supported
5963	 * or if the pm-capable property is set to be false (0),
5964	 * then we should not create the pm_components property.
5965	 */
5966	if (rval == -1) {
5967		/*
5968		 * Error.
5969		 * Reading log sense failed, most likely this is
5970		 * an older drive that does not support log sense.
5971		 * If this fails auto-pm is not supported.
5972		 */
5973		un->un_power_level = SD_SPINDLE_ON;
5974		un->un_f_pm_is_enabled = FALSE;
5975
5976	} else if (rval == 0) {
5977		/*
5978		 * Page not found.
5979		 * The start stop cycle counter is implemented as page
5980		 * START_STOP_CYCLE_PAGE_VU_PAGE (0x31) in older disks. For
5981		 * newer disks it is implemented as START_STOP_CYCLE_PAGE (0xE).
5982		 */
5983		if (sd_log_page_supported(ssc, START_STOP_CYCLE_VU_PAGE) == 1) {
5984			/*
5985			 * Page found, use this one.
5986			 */
5987			un->un_start_stop_cycle_page = START_STOP_CYCLE_VU_PAGE;
5988			un->un_f_pm_is_enabled = TRUE;
5989		} else {
5990			/*
5991			 * Error or page not found.
5992			 * auto-pm is not supported for this device.
5993			 */
5994			un->un_power_level = SD_SPINDLE_ON;
5995			un->un_f_pm_is_enabled = FALSE;
5996		}
5997	} else {
5998		/*
5999		 * Page found, use it.
6000		 */
6001		un->un_start_stop_cycle_page = START_STOP_CYCLE_PAGE;
6002		un->un_f_pm_is_enabled = TRUE;
6003	}
6004
6005
6006	if (un->un_f_pm_is_enabled == TRUE) {
6007		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
6008		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
6009
6010		rval = sd_send_scsi_LOG_SENSE(ssc, log_page_data,
6011		    log_page_size, un->un_start_stop_cycle_page,
6012		    0x01, 0, SD_PATH_DIRECT);
6013
6014		if (rval != 0) {
6015			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
6016		}
6017
6018#ifdef	SDDEBUG
6019		if (sd_force_pm_supported) {
6020			/* Force a successful result */
6021			rval = 0;
6022		}
6023#endif
6024
6025		/*
6026		 * If the Log sense for Page( Start/stop cycle counter page)
6027		 * succeeds, then power management is supported and we can
6028		 * enable auto-pm.
6029		 */
6030		if (rval == 0)  {
6031			(void) sd_create_pm_components(devi, un);
6032		} else {
6033			un->un_power_level = SD_SPINDLE_ON;
6034			un->un_f_pm_is_enabled = FALSE;
6035		}
6036
6037		kmem_free(log_page_data, log_page_size);
6038	}
6039}
6040
6041
6042/*
6043 *    Function: sd_create_pm_components
6044 *
6045 * Description: Initialize PM property.
6046 *
6047 *     Context: Kernel thread context
6048 */
6049
6050static void
6051sd_create_pm_components(dev_info_t *devi, struct sd_lun *un)
6052{
6053	ASSERT(!mutex_owned(SD_MUTEX(un)));
6054
6055	if (un->un_f_power_condition_supported) {
6056		if (ddi_prop_update_string_array(DDI_DEV_T_NONE, devi,
6057		    "pm-components", sd_pwr_pc.pm_comp, 5)
6058		    != DDI_PROP_SUCCESS) {
6059			un->un_power_level = SD_SPINDLE_ACTIVE;
6060			un->un_f_pm_is_enabled = FALSE;
6061			return;
6062		}
6063	} else {
6064		if (ddi_prop_update_string_array(DDI_DEV_T_NONE, devi,
6065		    "pm-components", sd_pwr_ss.pm_comp, 3)
6066		    != DDI_PROP_SUCCESS) {
6067			un->un_power_level = SD_SPINDLE_ON;
6068			un->un_f_pm_is_enabled = FALSE;
6069			return;
6070		}
6071	}
6072	/*
6073	 * When components are initially created they are idle,
6074	 * power up any non-removables.
6075	 * Note: the return value of pm_raise_power can't be used
6076	 * for determining if PM should be enabled for this device.
6077	 * Even if you check the return values and remove this
6078	 * property created above, the PM framework will not honor the
6079	 * change after the first call to pm_raise_power. Hence,
6080	 * removal of that property does not help if pm_raise_power
6081	 * fails. In the case of removable media, the start/stop
6082	 * will fail if the media is not present.
6083	 */
6084	if (un->un_f_attach_spinup && (pm_raise_power(SD_DEVINFO(un), 0,
6085	    SD_PM_STATE_ACTIVE(un)) == DDI_SUCCESS)) {
6086		mutex_enter(SD_MUTEX(un));
6087		un->un_power_level = SD_PM_STATE_ACTIVE(un);
6088		mutex_enter(&un->un_pm_mutex);
6089		/* Set to on and not busy. */
6090		un->un_pm_count = 0;
6091	} else {
6092		mutex_enter(SD_MUTEX(un));
6093		un->un_power_level = SD_PM_STATE_STOPPED(un);
6094		mutex_enter(&un->un_pm_mutex);
6095		/* Set to off. */
6096		un->un_pm_count = -1;
6097	}
6098	mutex_exit(&un->un_pm_mutex);
6099	mutex_exit(SD_MUTEX(un));
6100}
6101
6102
6103/*
6104 *    Function: sd_ddi_suspend
6105 *
6106 * Description: Performs system power-down operations. This includes
6107 *		setting the drive state to indicate its suspended so
6108 *		that no new commands will be accepted. Also, wait for
6109 *		all commands that are in transport or queued to a timer
6110 *		for retry to complete. All timeout threads are cancelled.
6111 *
6112 * Return Code: DDI_FAILURE or DDI_SUCCESS
6113 *
6114 *     Context: Kernel thread context
6115 */
6116
6117static int
6118sd_ddi_suspend(dev_info_t *devi)
6119{
6120	struct	sd_lun	*un;
6121	clock_t		wait_cmds_complete;
6122
6123	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
6124	if (un == NULL) {
6125		return (DDI_FAILURE);
6126	}
6127
6128	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: entry\n");
6129
6130	mutex_enter(SD_MUTEX(un));
6131
6132	/* Return success if the device is already suspended. */
6133	if (un->un_state == SD_STATE_SUSPENDED) {
6134		mutex_exit(SD_MUTEX(un));
6135		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
6136		    "device already suspended, exiting\n");
6137		return (DDI_SUCCESS);
6138	}
6139
6140	/* Return failure if the device is being used by HA */
6141	if (un->un_resvd_status &
6142	    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE)) {
6143		mutex_exit(SD_MUTEX(un));
6144		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
6145		    "device in use by HA, exiting\n");
6146		return (DDI_FAILURE);
6147	}
6148
6149	/*
6150	 * Return failure if the device is in a resource wait
6151	 * or power changing state.
6152	 */
6153	if ((un->un_state == SD_STATE_RWAIT) ||
6154	    (un->un_state == SD_STATE_PM_CHANGING)) {
6155		mutex_exit(SD_MUTEX(un));
6156		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
6157		    "device in resource wait state, exiting\n");
6158		return (DDI_FAILURE);
6159	}
6160
6161
6162	un->un_save_state = un->un_last_state;
6163	New_state(un, SD_STATE_SUSPENDED);
6164
6165	/*
6166	 * Wait for all commands that are in transport or queued to a timer
6167	 * for retry to complete.
6168	 *
6169	 * While waiting, no new commands will be accepted or sent because of
6170	 * the new state we set above.
6171	 *
6172	 * Wait till current operation has completed. If we are in the resource
6173	 * wait state (with an intr outstanding) then we need to wait till the
6174	 * intr completes and starts the next cmd. We want to wait for
6175	 * SD_WAIT_CMDS_COMPLETE seconds before failing the DDI_SUSPEND.
6176	 */
6177	wait_cmds_complete = ddi_get_lbolt() +
6178	    (sd_wait_cmds_complete * drv_usectohz(1000000));
6179
6180	while (un->un_ncmds_in_transport != 0) {
6181		/*
6182		 * Fail if commands do not finish in the specified time.
6183		 */
6184		if (cv_timedwait(&un->un_disk_busy_cv, SD_MUTEX(un),
6185		    wait_cmds_complete) == -1) {
6186			/*
6187			 * Undo the state changes made above. Everything
6188			 * must go back to it's original value.
6189			 */
6190			Restore_state(un);
6191			un->un_last_state = un->un_save_state;
6192			/* Wake up any threads that might be waiting. */
6193			cv_broadcast(&un->un_suspend_cv);
6194			mutex_exit(SD_MUTEX(un));
6195			SD_ERROR(SD_LOG_IO_PM, un,
6196			    "sd_ddi_suspend: failed due to outstanding cmds\n");
6197			SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exiting\n");
6198			return (DDI_FAILURE);
6199		}
6200	}
6201
6202	/*
6203	 * Cancel SCSI watch thread and timeouts, if any are active
6204	 */
6205
6206	if (SD_OK_TO_SUSPEND_SCSI_WATCHER(un)) {
6207		opaque_t temp_token = un->un_swr_token;
6208		mutex_exit(SD_MUTEX(un));
6209		scsi_watch_suspend(temp_token);
6210		mutex_enter(SD_MUTEX(un));
6211	}
6212
6213	if (un->un_reset_throttle_timeid != NULL) {
6214		timeout_id_t temp_id = un->un_reset_throttle_timeid;
6215		un->un_reset_throttle_timeid = NULL;
6216		mutex_exit(SD_MUTEX(un));
6217		(void) untimeout(temp_id);
6218		mutex_enter(SD_MUTEX(un));
6219	}
6220
6221	if (un->un_dcvb_timeid != NULL) {
6222		timeout_id_t temp_id = un->un_dcvb_timeid;
6223		un->un_dcvb_timeid = NULL;
6224		mutex_exit(SD_MUTEX(un));
6225		(void) untimeout(temp_id);
6226		mutex_enter(SD_MUTEX(un));
6227	}
6228
6229	mutex_enter(&un->un_pm_mutex);
6230	if (un->un_pm_timeid != NULL) {
6231		timeout_id_t temp_id = un->un_pm_timeid;
6232		un->un_pm_timeid = NULL;
6233		mutex_exit(&un->un_pm_mutex);
6234		mutex_exit(SD_MUTEX(un));
6235		(void) untimeout(temp_id);
6236		mutex_enter(SD_MUTEX(un));
6237	} else {
6238		mutex_exit(&un->un_pm_mutex);
6239	}
6240
6241	if (un->un_rmw_msg_timeid != NULL) {
6242		timeout_id_t temp_id = un->un_rmw_msg_timeid;
6243		un->un_rmw_msg_timeid = NULL;
6244		mutex_exit(SD_MUTEX(un));
6245		(void) untimeout(temp_id);
6246		mutex_enter(SD_MUTEX(un));
6247	}
6248
6249	if (un->un_retry_timeid != NULL) {
6250		timeout_id_t temp_id = un->un_retry_timeid;
6251		un->un_retry_timeid = NULL;
6252		mutex_exit(SD_MUTEX(un));
6253		(void) untimeout(temp_id);
6254		mutex_enter(SD_MUTEX(un));
6255
6256		if (un->un_retry_bp != NULL) {
6257			un->un_retry_bp->av_forw = un->un_waitq_headp;
6258			un->un_waitq_headp = un->un_retry_bp;
6259			if (un->un_waitq_tailp == NULL) {
6260				un->un_waitq_tailp = un->un_retry_bp;
6261			}
6262			un->un_retry_bp = NULL;
6263			un->un_retry_statp = NULL;
6264		}
6265	}
6266
6267	if (un->un_direct_priority_timeid != NULL) {
6268		timeout_id_t temp_id = un->un_direct_priority_timeid;
6269		un->un_direct_priority_timeid = NULL;
6270		mutex_exit(SD_MUTEX(un));
6271		(void) untimeout(temp_id);
6272		mutex_enter(SD_MUTEX(un));
6273	}
6274
6275	if (un->un_f_is_fibre == TRUE) {
6276		/*
6277		 * Remove callbacks for insert and remove events
6278		 */
6279		if (un->un_insert_event != NULL) {
6280			mutex_exit(SD_MUTEX(un));
6281			(void) ddi_remove_event_handler(un->un_insert_cb_id);
6282			mutex_enter(SD_MUTEX(un));
6283			un->un_insert_event = NULL;
6284		}
6285
6286		if (un->un_remove_event != NULL) {
6287			mutex_exit(SD_MUTEX(un));
6288			(void) ddi_remove_event_handler(un->un_remove_cb_id);
6289			mutex_enter(SD_MUTEX(un));
6290			un->un_remove_event = NULL;
6291		}
6292	}
6293
6294	mutex_exit(SD_MUTEX(un));
6295
6296	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exit\n");
6297
6298	return (DDI_SUCCESS);
6299}
6300
6301
6302/*
6303 *    Function: sd_ddi_resume
6304 *
6305 * Description: Performs system power-up operations..
6306 *
6307 * Return Code: DDI_SUCCESS
6308 *		DDI_FAILURE
6309 *
6310 *     Context: Kernel thread context
6311 */
6312
6313static int
6314sd_ddi_resume(dev_info_t *devi)
6315{
6316	struct	sd_lun	*un;
6317
6318	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
6319	if (un == NULL) {
6320		return (DDI_FAILURE);
6321	}
6322
6323	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: entry\n");
6324
6325	mutex_enter(SD_MUTEX(un));
6326	Restore_state(un);
6327
6328	/*
6329	 * Restore the state which was saved to give the
6330	 * the right state in un_last_state
6331	 */
6332	un->un_last_state = un->un_save_state;
6333	/*
6334	 * Note: throttle comes back at full.
6335	 * Also note: this MUST be done before calling pm_raise_power
6336	 * otherwise the system can get hung in biowait. The scenario where
6337	 * this'll happen is under cpr suspend. Writing of the system
6338	 * state goes through sddump, which writes 0 to un_throttle. If
6339	 * writing the system state then fails, example if the partition is
6340	 * too small, then cpr attempts a resume. If throttle isn't restored
6341	 * from the saved value until after calling pm_raise_power then
6342	 * cmds sent in sdpower are not transported and sd_send_scsi_cmd hangs
6343	 * in biowait.
6344	 */
6345	un->un_throttle = un->un_saved_throttle;
6346
6347	/*
6348	 * The chance of failure is very rare as the only command done in power
6349	 * entry point is START command when you transition from 0->1 or
6350	 * unknown->1. Put it to SPINDLE ON state irrespective of the state at
6351	 * which suspend was done. Ignore the return value as the resume should
6352	 * not be failed. In the case of removable media the media need not be
6353	 * inserted and hence there is a chance that raise power will fail with
6354	 * media not present.
6355	 */
6356	if (un->un_f_attach_spinup) {
6357		mutex_exit(SD_MUTEX(un));
6358		(void) pm_raise_power(SD_DEVINFO(un), 0,
6359		    SD_PM_STATE_ACTIVE(un));
6360		mutex_enter(SD_MUTEX(un));
6361	}
6362
6363	/*
6364	 * Don't broadcast to the suspend cv and therefore possibly
6365	 * start I/O until after power has been restored.
6366	 */
6367	cv_broadcast(&un->un_suspend_cv);
6368	cv_broadcast(&un->un_state_cv);
6369
6370	/* restart thread */
6371	if (SD_OK_TO_RESUME_SCSI_WATCHER(un)) {
6372		scsi_watch_resume(un->un_swr_token);
6373	}
6374
6375#if (defined(__fibre))
6376	if (un->un_f_is_fibre == TRUE) {
6377		/*
6378		 * Add callbacks for insert and remove events
6379		 */
6380		if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
6381			sd_init_event_callbacks(un);
6382		}
6383	}
6384#endif
6385
6386	/*
6387	 * Transport any pending commands to the target.
6388	 *
6389	 * If this is a low-activity device commands in queue will have to wait
6390	 * until new commands come in, which may take awhile. Also, we
6391	 * specifically don't check un_ncmds_in_transport because we know that
6392	 * there really are no commands in progress after the unit was
6393	 * suspended and we could have reached the throttle level, been
6394	 * suspended, and have no new commands coming in for awhile. Highly
6395	 * unlikely, but so is the low-activity disk scenario.
6396	 */
6397	ddi_xbuf_dispatch(un->un_xbuf_attr);
6398
6399	sd_start_cmds(un, NULL);
6400	mutex_exit(SD_MUTEX(un));
6401
6402	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: exit\n");
6403
6404	return (DDI_SUCCESS);
6405}
6406
6407
6408/*
6409 *    Function: sd_pm_state_change
6410 *
6411 * Description: Change the driver power state.
6412 * 		Someone else is required to actually change the driver
6413 * 		power level.
6414 *
6415 *   Arguments: un - driver soft state (unit) structure
6416 *              level - the power level that is changed to
6417 *              flag - to decide how to change the power state
6418 *
6419 * Return Code: DDI_SUCCESS
6420 *
6421 *     Context: Kernel thread context
6422 */
6423static int
6424sd_pm_state_change(struct sd_lun *un, int level, int flag)
6425{
6426	ASSERT(un != NULL);
6427	SD_TRACE(SD_LOG_POWER, un, "sd_pm_state_change: entry\n");
6428
6429	ASSERT(!mutex_owned(SD_MUTEX(un)));
6430	mutex_enter(SD_MUTEX(un));
6431
6432	if (flag == SD_PM_STATE_ROLLBACK || SD_PM_IS_IO_CAPABLE(un, level)) {
6433		un->un_power_level = level;
6434		ASSERT(!mutex_owned(&un->un_pm_mutex));
6435		mutex_enter(&un->un_pm_mutex);
6436		if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
6437			un->un_pm_count++;
6438			ASSERT(un->un_pm_count == 0);
6439		}
6440		mutex_exit(&un->un_pm_mutex);
6441	} else {
6442		/*
6443		 * Exit if power management is not enabled for this device,
6444		 * or if the device is being used by HA.
6445		 */
6446		if ((un->un_f_pm_is_enabled == FALSE) || (un->un_resvd_status &
6447		    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE))) {
6448			mutex_exit(SD_MUTEX(un));
6449			SD_TRACE(SD_LOG_POWER, un,
6450			    "sd_pm_state_change: exiting\n");
6451			return (DDI_FAILURE);
6452		}
6453
6454		SD_INFO(SD_LOG_POWER, un, "sd_pm_state_change: "
6455		    "un_ncmds_in_driver=%ld\n", un->un_ncmds_in_driver);
6456
6457		/*
6458		 * See if the device is not busy, ie.:
6459		 *    - we have no commands in the driver for this device
6460		 *    - not waiting for resources
6461		 */
6462		if ((un->un_ncmds_in_driver == 0) &&
6463		    (un->un_state != SD_STATE_RWAIT)) {
6464			/*
6465			 * The device is not busy, so it is OK to go to low
6466			 * power state. Indicate low power, but rely on someone
6467			 * else to actually change it.
6468			 */
6469			mutex_enter(&un->un_pm_mutex);
6470			un->un_pm_count = -1;
6471			mutex_exit(&un->un_pm_mutex);
6472			un->un_power_level = level;
6473		}
6474	}
6475
6476	mutex_exit(SD_MUTEX(un));
6477
6478	SD_TRACE(SD_LOG_POWER, un, "sd_pm_state_change: exit\n");
6479
6480	return (DDI_SUCCESS);
6481}
6482
6483
6484/*
6485 *    Function: sd_pm_idletimeout_handler
6486 *
6487 * Description: A timer routine that's active only while a device is busy.
6488 *		The purpose is to extend slightly the pm framework's busy
6489 *		view of the device to prevent busy/idle thrashing for
6490 *		back-to-back commands. Do this by comparing the current time
6491 *		to the time at which the last command completed and when the
6492 *		difference is greater than sd_pm_idletime, call
6493 *		pm_idle_component. In addition to indicating idle to the pm
6494 *		framework, update the chain type to again use the internal pm
6495 *		layers of the driver.
6496 *
6497 *   Arguments: arg - driver soft state (unit) structure
6498 *
6499 *     Context: Executes in a timeout(9F) thread context
6500 */
6501
6502static void
6503sd_pm_idletimeout_handler(void *arg)
6504{
6505	struct sd_lun *un = arg;
6506
6507	time_t	now;
6508
6509	mutex_enter(&sd_detach_mutex);
6510	if (un->un_detach_count != 0) {
6511		/* Abort if the instance is detaching */
6512		mutex_exit(&sd_detach_mutex);
6513		return;
6514	}
6515	mutex_exit(&sd_detach_mutex);
6516
6517	now = ddi_get_time();
6518	/*
6519	 * Grab both mutexes, in the proper order, since we're accessing
6520	 * both PM and softstate variables.
6521	 */
6522	mutex_enter(SD_MUTEX(un));
6523	mutex_enter(&un->un_pm_mutex);
6524	if (((now - un->un_pm_idle_time) > sd_pm_idletime) &&
6525	    (un->un_ncmds_in_driver == 0) && (un->un_pm_count == 0)) {
6526		/*
6527		 * Update the chain types.
6528		 * This takes affect on the next new command received.
6529		 */
6530		if (un->un_f_non_devbsize_supported) {
6531			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
6532		} else {
6533			un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
6534		}
6535		un->un_uscsi_chain_type = SD_CHAIN_INFO_USCSI_CMD;
6536
6537		SD_TRACE(SD_LOG_IO_PM, un,
6538		    "sd_pm_idletimeout_handler: idling device\n");
6539		(void) pm_idle_component(SD_DEVINFO(un), 0);
6540		un->un_pm_idle_timeid = NULL;
6541	} else {
6542		un->un_pm_idle_timeid =
6543		    timeout(sd_pm_idletimeout_handler, un,
6544		    (drv_usectohz((clock_t)300000))); /* 300 ms. */
6545	}
6546	mutex_exit(&un->un_pm_mutex);
6547	mutex_exit(SD_MUTEX(un));
6548}
6549
6550
6551/*
6552 *    Function: sd_pm_timeout_handler
6553 *
6554 * Description: Callback to tell framework we are idle.
6555 *
6556 *     Context: timeout(9f) thread context.
6557 */
6558
6559static void
6560sd_pm_timeout_handler(void *arg)
6561{
6562	struct sd_lun *un = arg;
6563
6564	(void) pm_idle_component(SD_DEVINFO(un), 0);
6565	mutex_enter(&un->un_pm_mutex);
6566	un->un_pm_timeid = NULL;
6567	mutex_exit(&un->un_pm_mutex);
6568}
6569
6570
6571/*
6572 *    Function: sdpower
6573 *
6574 * Description: PM entry point.
6575 *
6576 * Return Code: DDI_SUCCESS
6577 *		DDI_FAILURE
6578 *
6579 *     Context: Kernel thread context
6580 */
6581
6582static int
6583sdpower(dev_info_t *devi, int component, int level)
6584{
6585	struct sd_lun	*un;
6586	int		instance;
6587	int		rval = DDI_SUCCESS;
6588	uint_t		i, log_page_size, maxcycles, ncycles;
6589	uchar_t		*log_page_data;
6590	int		log_sense_page;
6591	int		medium_present;
6592	time_t		intvlp;
6593	struct pm_trans_data	sd_pm_tran_data;
6594	uchar_t		save_state;
6595	int		sval;
6596	uchar_t		state_before_pm;
6597	int		got_semaphore_here;
6598	sd_ssc_t	*ssc;
6599	int	last_power_level;
6600
6601	instance = ddi_get_instance(devi);
6602
6603	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
6604	    !SD_PM_IS_LEVEL_VALID(un, level) || component != 0) {
6605		return (DDI_FAILURE);
6606	}
6607
6608	ssc = sd_ssc_init(un);
6609
6610	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: entry, level = %d\n", level);
6611
6612	/*
6613	 * Must synchronize power down with close.
6614	 * Attempt to decrement/acquire the open/close semaphore,
6615	 * but do NOT wait on it. If it's not greater than zero,
6616	 * ie. it can't be decremented without waiting, then
6617	 * someone else, either open or close, already has it
6618	 * and the try returns 0. Use that knowledge here to determine
6619	 * if it's OK to change the device power level.
6620	 * Also, only increment it on exit if it was decremented, ie. gotten,
6621	 * here.
6622	 */
6623	got_semaphore_here = sema_tryp(&un->un_semoclose);
6624
6625	mutex_enter(SD_MUTEX(un));
6626
6627	SD_INFO(SD_LOG_POWER, un, "sdpower: un_ncmds_in_driver = %ld\n",
6628	    un->un_ncmds_in_driver);
6629
6630	/*
6631	 * If un_ncmds_in_driver is non-zero it indicates commands are
6632	 * already being processed in the driver, or if the semaphore was
6633	 * not gotten here it indicates an open or close is being processed.
6634	 * At the same time somebody is requesting to go to a lower power
6635	 * that can't perform I/O, which can't happen, therefore we need to
6636	 * return failure.
6637	 */
6638	if ((!SD_PM_IS_IO_CAPABLE(un, level)) &&
6639	    ((un->un_ncmds_in_driver != 0) || (got_semaphore_here == 0))) {
6640		mutex_exit(SD_MUTEX(un));
6641
6642		if (got_semaphore_here != 0) {
6643			sema_v(&un->un_semoclose);
6644		}
6645		SD_TRACE(SD_LOG_IO_PM, un,
6646		    "sdpower: exit, device has queued cmds.\n");
6647
6648		goto sdpower_failed;
6649	}
6650
6651	/*
6652	 * if it is OFFLINE that means the disk is completely dead
6653	 * in our case we have to put the disk in on or off by sending commands
6654	 * Of course that will fail anyway so return back here.
6655	 *
6656	 * Power changes to a device that's OFFLINE or SUSPENDED
6657	 * are not allowed.
6658	 */
6659	if ((un->un_state == SD_STATE_OFFLINE) ||
6660	    (un->un_state == SD_STATE_SUSPENDED)) {
6661		mutex_exit(SD_MUTEX(un));
6662
6663		if (got_semaphore_here != 0) {
6664			sema_v(&un->un_semoclose);
6665		}
6666		SD_TRACE(SD_LOG_IO_PM, un,
6667		    "sdpower: exit, device is off-line.\n");
6668
6669		goto sdpower_failed;
6670	}
6671
6672	/*
6673	 * Change the device's state to indicate it's power level
6674	 * is being changed. Do this to prevent a power off in the
6675	 * middle of commands, which is especially bad on devices
6676	 * that are really powered off instead of just spun down.
6677	 */
6678	state_before_pm = un->un_state;
6679	un->un_state = SD_STATE_PM_CHANGING;
6680
6681	mutex_exit(SD_MUTEX(un));
6682
6683	/*
6684	 * If log sense command is not supported, bypass the
6685	 * following checking, otherwise, check the log sense
6686	 * information for this device.
6687	 */
6688	if (SD_PM_STOP_MOTOR_NEEDED(un, level) &&
6689	    un->un_f_log_sense_supported) {
6690		/*
6691		 * Get the log sense information to understand whether the
6692		 * the powercycle counts have gone beyond the threshhold.
6693		 */
6694		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
6695		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
6696
6697		mutex_enter(SD_MUTEX(un));
6698		log_sense_page = un->un_start_stop_cycle_page;
6699		mutex_exit(SD_MUTEX(un));
6700
6701		rval = sd_send_scsi_LOG_SENSE(ssc, log_page_data,
6702		    log_page_size, log_sense_page, 0x01, 0, SD_PATH_DIRECT);
6703
6704		if (rval != 0) {
6705			if (rval == EIO)
6706				sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
6707			else
6708				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
6709		}
6710
6711#ifdef	SDDEBUG
6712		if (sd_force_pm_supported) {
6713			/* Force a successful result */
6714			rval = 0;
6715		}
6716#endif
6717		if (rval != 0) {
6718			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
6719			    "Log Sense Failed\n");
6720
6721			kmem_free(log_page_data, log_page_size);
6722			/* Cannot support power management on those drives */
6723
6724			if (got_semaphore_here != 0) {
6725				sema_v(&un->un_semoclose);
6726			}
6727			/*
6728			 * On exit put the state back to it's original value
6729			 * and broadcast to anyone waiting for the power
6730			 * change completion.
6731			 */
6732			mutex_enter(SD_MUTEX(un));
6733			un->un_state = state_before_pm;
6734			cv_broadcast(&un->un_suspend_cv);
6735			mutex_exit(SD_MUTEX(un));
6736			SD_TRACE(SD_LOG_IO_PM, un,
6737			    "sdpower: exit, Log Sense Failed.\n");
6738
6739			goto sdpower_failed;
6740		}
6741
6742		/*
6743		 * From the page data - Convert the essential information to
6744		 * pm_trans_data
6745		 */
6746		maxcycles =
6747		    (log_page_data[0x1c] << 24) | (log_page_data[0x1d] << 16) |
6748		    (log_page_data[0x1E] << 8)  | log_page_data[0x1F];
6749
6750		ncycles =
6751		    (log_page_data[0x24] << 24) | (log_page_data[0x25] << 16) |
6752		    (log_page_data[0x26] << 8)  | log_page_data[0x27];
6753
6754		if (un->un_f_pm_log_sense_smart) {
6755			sd_pm_tran_data.un.smart_count.allowed = maxcycles;
6756			sd_pm_tran_data.un.smart_count.consumed = ncycles;
6757			sd_pm_tran_data.un.smart_count.flag = 0;
6758			sd_pm_tran_data.format = DC_SMART_FORMAT;
6759		} else {
6760			sd_pm_tran_data.un.scsi_cycles.lifemax = maxcycles;
6761			sd_pm_tran_data.un.scsi_cycles.ncycles = ncycles;
6762			for (i = 0; i < DC_SCSI_MFR_LEN; i++) {
6763				sd_pm_tran_data.un.scsi_cycles.svc_date[i] =
6764				    log_page_data[8+i];
6765			}
6766			sd_pm_tran_data.un.scsi_cycles.flag = 0;
6767			sd_pm_tran_data.format = DC_SCSI_FORMAT;
6768		}
6769
6770		kmem_free(log_page_data, log_page_size);
6771
6772		/*
6773		 * Call pm_trans_check routine to get the Ok from
6774		 * the global policy
6775		 */
6776		rval = pm_trans_check(&sd_pm_tran_data, &intvlp);
6777#ifdef	SDDEBUG
6778		if (sd_force_pm_supported) {
6779			/* Force a successful result */
6780			rval = 1;
6781		}
6782#endif
6783		switch (rval) {
6784		case 0:
6785			/*
6786			 * Not Ok to Power cycle or error in parameters passed
6787			 * Would have given the advised time to consider power
6788			 * cycle. Based on the new intvlp parameter we are
6789			 * supposed to pretend we are busy so that pm framework
6790			 * will never call our power entry point. Because of
6791			 * that install a timeout handler and wait for the
6792			 * recommended time to elapse so that power management
6793			 * can be effective again.
6794			 *
6795			 * To effect this behavior, call pm_busy_component to
6796			 * indicate to the framework this device is busy.
6797			 * By not adjusting un_pm_count the rest of PM in
6798			 * the driver will function normally, and independent
6799			 * of this but because the framework is told the device
6800			 * is busy it won't attempt powering down until it gets
6801			 * a matching idle. The timeout handler sends this.
6802			 * Note: sd_pm_entry can't be called here to do this
6803			 * because sdpower may have been called as a result
6804			 * of a call to pm_raise_power from within sd_pm_entry.
6805			 *
6806			 * If a timeout handler is already active then
6807			 * don't install another.
6808			 */
6809			mutex_enter(&un->un_pm_mutex);
6810			if (un->un_pm_timeid == NULL) {
6811				un->un_pm_timeid =
6812				    timeout(sd_pm_timeout_handler,
6813				    un, intvlp * drv_usectohz(1000000));
6814				mutex_exit(&un->un_pm_mutex);
6815				(void) pm_busy_component(SD_DEVINFO(un), 0);
6816			} else {
6817				mutex_exit(&un->un_pm_mutex);
6818			}
6819			if (got_semaphore_here != 0) {
6820				sema_v(&un->un_semoclose);
6821			}
6822			/*
6823			 * On exit put the state back to it's original value
6824			 * and broadcast to anyone waiting for the power
6825			 * change completion.
6826			 */
6827			mutex_enter(SD_MUTEX(un));
6828			un->un_state = state_before_pm;
6829			cv_broadcast(&un->un_suspend_cv);
6830			mutex_exit(SD_MUTEX(un));
6831
6832			SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, "
6833			    "trans check Failed, not ok to power cycle.\n");
6834
6835			goto sdpower_failed;
6836		case -1:
6837			if (got_semaphore_here != 0) {
6838				sema_v(&un->un_semoclose);
6839			}
6840			/*
6841			 * On exit put the state back to it's original value
6842			 * and broadcast to anyone waiting for the power
6843			 * change completion.
6844			 */
6845			mutex_enter(SD_MUTEX(un));
6846			un->un_state = state_before_pm;
6847			cv_broadcast(&un->un_suspend_cv);
6848			mutex_exit(SD_MUTEX(un));
6849			SD_TRACE(SD_LOG_IO_PM, un,
6850			    "sdpower: exit, trans check command Failed.\n");
6851
6852			goto sdpower_failed;
6853		}
6854	}
6855
6856	if (!SD_PM_IS_IO_CAPABLE(un, level)) {
6857		/*
6858		 * Save the last state... if the STOP FAILS we need it
6859		 * for restoring
6860		 */
6861		mutex_enter(SD_MUTEX(un));
6862		save_state = un->un_last_state;
6863		last_power_level = un->un_power_level;
6864		/*
6865		 * There must not be any cmds. getting processed
6866		 * in the driver when we get here. Power to the
6867		 * device is potentially going off.
6868		 */
6869		ASSERT(un->un_ncmds_in_driver == 0);
6870		mutex_exit(SD_MUTEX(un));
6871
6872		/*
6873		 * For now PM suspend the device completely before spindle is
6874		 * turned off
6875		 */
6876		if ((rval = sd_pm_state_change(un, level, SD_PM_STATE_CHANGE))
6877		    == DDI_FAILURE) {
6878			if (got_semaphore_here != 0) {
6879				sema_v(&un->un_semoclose);
6880			}
6881			/*
6882			 * On exit put the state back to it's original value
6883			 * and broadcast to anyone waiting for the power
6884			 * change completion.
6885			 */
6886			mutex_enter(SD_MUTEX(un));
6887			un->un_state = state_before_pm;
6888			un->un_power_level = last_power_level;
6889			cv_broadcast(&un->un_suspend_cv);
6890			mutex_exit(SD_MUTEX(un));
6891			SD_TRACE(SD_LOG_IO_PM, un,
6892			    "sdpower: exit, PM suspend Failed.\n");
6893
6894			goto sdpower_failed;
6895		}
6896	}
6897
6898	/*
6899	 * The transition from SPINDLE_OFF to SPINDLE_ON can happen in open,
6900	 * close, or strategy. Dump no long uses this routine, it uses it's
6901	 * own code so it can be done in polled mode.
6902	 */
6903
6904	medium_present = TRUE;
6905
6906	/*
6907	 * When powering up, issue a TUR in case the device is at unit
6908	 * attention.  Don't do retries. Bypass the PM layer, otherwise
6909	 * a deadlock on un_pm_busy_cv will occur.
6910	 */
6911	if (SD_PM_IS_IO_CAPABLE(un, level)) {
6912		sval = sd_send_scsi_TEST_UNIT_READY(ssc,
6913		    SD_DONT_RETRY_TUR | SD_BYPASS_PM);
6914		if (sval != 0)
6915			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
6916	}
6917
6918	if (un->un_f_power_condition_supported) {
6919		char *pm_condition_name[] = {"STOPPED", "STANDBY",
6920		    "IDLE", "ACTIVE"};
6921		SD_TRACE(SD_LOG_IO_PM, un,
6922		    "sdpower: sending \'%s\' power condition",
6923		    pm_condition_name[level]);
6924		sval = sd_send_scsi_START_STOP_UNIT(ssc, SD_POWER_CONDITION,
6925		    sd_pl2pc[level], SD_PATH_DIRECT);
6926	} else {
6927		SD_TRACE(SD_LOG_IO_PM, un, "sdpower: sending \'%s\' unit\n",
6928		    ((level == SD_SPINDLE_ON) ? "START" : "STOP"));
6929		sval = sd_send_scsi_START_STOP_UNIT(ssc, SD_START_STOP,
6930		    ((level == SD_SPINDLE_ON) ? SD_TARGET_START :
6931		    SD_TARGET_STOP), SD_PATH_DIRECT);
6932	}
6933	if (sval != 0) {
6934		if (sval == EIO)
6935			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
6936		else
6937			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
6938	}
6939
6940	/* Command failed, check for media present. */
6941	if ((sval == ENXIO) && un->un_f_has_removable_media) {
6942		medium_present = FALSE;
6943	}
6944
6945	/*
6946	 * The conditions of interest here are:
6947	 *   if a spindle off with media present fails,
6948	 *	then restore the state and return an error.
6949	 *   else if a spindle on fails,
6950	 *	then return an error (there's no state to restore).
6951	 * In all other cases we setup for the new state
6952	 * and return success.
6953	 */
6954	if (!SD_PM_IS_IO_CAPABLE(un, level)) {
6955		if ((medium_present == TRUE) && (sval != 0)) {
6956			/* The stop command from above failed */
6957			rval = DDI_FAILURE;
6958			/*
6959			 * The stop command failed, and we have media
6960			 * present. Put the level back by calling the
6961			 * sd_pm_resume() and set the state back to
6962			 * it's previous value.
6963			 */
6964			(void) sd_pm_state_change(un, last_power_level,
6965			    SD_PM_STATE_ROLLBACK);
6966			mutex_enter(SD_MUTEX(un));
6967			un->un_last_state = save_state;
6968			mutex_exit(SD_MUTEX(un));
6969		} else if (un->un_f_monitor_media_state) {
6970			/*
6971			 * The stop command from above succeeded.
6972			 * Terminate watch thread in case of removable media
6973			 * devices going into low power state. This is as per
6974			 * the requirements of pm framework, otherwise commands
6975			 * will be generated for the device (through watch
6976			 * thread), even when the device is in low power state.
6977			 */
6978			mutex_enter(SD_MUTEX(un));
6979			un->un_f_watcht_stopped = FALSE;
6980			if (un->un_swr_token != NULL) {
6981				opaque_t temp_token = un->un_swr_token;
6982				un->un_f_watcht_stopped = TRUE;
6983				un->un_swr_token = NULL;
6984				mutex_exit(SD_MUTEX(un));
6985				(void) scsi_watch_request_terminate(temp_token,
6986				    SCSI_WATCH_TERMINATE_ALL_WAIT);
6987			} else {
6988				mutex_exit(SD_MUTEX(un));
6989			}
6990		}
6991	} else {
6992		/*
6993		 * The level requested is I/O capable.
6994		 * Legacy behavior: return success on a failed spinup
6995		 * if there is no media in the drive.
6996		 * Do this by looking at medium_present here.
6997		 */
6998		if ((sval != 0) && medium_present) {
6999			/* The start command from above failed */
7000			rval = DDI_FAILURE;
7001		} else {
7002			/*
7003			 * The start command from above succeeded
7004			 * PM resume the devices now that we have
7005			 * started the disks
7006			 */
7007			(void) sd_pm_state_change(un, level,
7008			    SD_PM_STATE_CHANGE);
7009
7010			/*
7011			 * Resume the watch thread since it was suspended
7012			 * when the device went into low power mode.
7013			 */
7014			if (un->un_f_monitor_media_state) {
7015				mutex_enter(SD_MUTEX(un));
7016				if (un->un_f_watcht_stopped == TRUE) {
7017					opaque_t temp_token;
7018
7019					un->un_f_watcht_stopped = FALSE;
7020					mutex_exit(SD_MUTEX(un));
7021					temp_token =
7022					    sd_watch_request_submit(un);
7023					mutex_enter(SD_MUTEX(un));
7024					un->un_swr_token = temp_token;
7025				}
7026				mutex_exit(SD_MUTEX(un));
7027			}
7028		}
7029	}
7030
7031	if (got_semaphore_here != 0) {
7032		sema_v(&un->un_semoclose);
7033	}
7034	/*
7035	 * On exit put the state back to it's original value
7036	 * and broadcast to anyone waiting for the power
7037	 * change completion.
7038	 */
7039	mutex_enter(SD_MUTEX(un));
7040	un->un_state = state_before_pm;
7041	cv_broadcast(&un->un_suspend_cv);
7042	mutex_exit(SD_MUTEX(un));
7043
7044	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, status = 0x%x\n", rval);
7045
7046	sd_ssc_fini(ssc);
7047	return (rval);
7048
7049sdpower_failed:
7050
7051	sd_ssc_fini(ssc);
7052	return (DDI_FAILURE);
7053}
7054
7055
7056
7057/*
7058 *    Function: sdattach
7059 *
7060 * Description: Driver's attach(9e) entry point function.
7061 *
7062 *   Arguments: devi - opaque device info handle
7063 *		cmd  - attach  type
7064 *
7065 * Return Code: DDI_SUCCESS
7066 *		DDI_FAILURE
7067 *
7068 *     Context: Kernel thread context
7069 */
7070
7071static int
7072sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd)
7073{
7074	switch (cmd) {
7075	case DDI_ATTACH:
7076		return (sd_unit_attach(devi));
7077	case DDI_RESUME:
7078		return (sd_ddi_resume(devi));
7079	default:
7080		break;
7081	}
7082	return (DDI_FAILURE);
7083}
7084
7085
7086/*
7087 *    Function: sddetach
7088 *
7089 * Description: Driver's detach(9E) entry point function.
7090 *
7091 *   Arguments: devi - opaque device info handle
7092 *		cmd  - detach  type
7093 *
7094 * Return Code: DDI_SUCCESS
7095 *		DDI_FAILURE
7096 *
7097 *     Context: Kernel thread context
7098 */
7099
7100static int
7101sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd)
7102{
7103	switch (cmd) {
7104	case DDI_DETACH:
7105		return (sd_unit_detach(devi));
7106	case DDI_SUSPEND:
7107		return (sd_ddi_suspend(devi));
7108	default:
7109		break;
7110	}
7111	return (DDI_FAILURE);
7112}
7113
7114
7115/*
7116 *     Function: sd_sync_with_callback
7117 *
7118 *  Description: Prevents sd_unit_attach or sd_unit_detach from freeing the soft
7119 *		 state while the callback routine is active.
7120 *
7121 *    Arguments: un: softstate structure for the instance
7122 *
7123 *	Context: Kernel thread context
7124 */
7125
7126static void
7127sd_sync_with_callback(struct sd_lun *un)
7128{
7129	ASSERT(un != NULL);
7130
7131	mutex_enter(SD_MUTEX(un));
7132
7133	ASSERT(un->un_in_callback >= 0);
7134
7135	while (un->un_in_callback > 0) {
7136		mutex_exit(SD_MUTEX(un));
7137		delay(2);
7138		mutex_enter(SD_MUTEX(un));
7139	}
7140
7141	mutex_exit(SD_MUTEX(un));
7142}
7143
7144/*
7145 *    Function: sd_unit_attach
7146 *
7147 * Description: Performs DDI_ATTACH processing for sdattach(). Allocates
7148 *		the soft state structure for the device and performs
7149 *		all necessary structure and device initializations.
7150 *
7151 *   Arguments: devi: the system's dev_info_t for the device.
7152 *
7153 * Return Code: DDI_SUCCESS if attach is successful.
7154 *		DDI_FAILURE if any part of the attach fails.
7155 *
7156 *     Context: Called at attach(9e) time for the DDI_ATTACH flag.
7157 *		Kernel thread context only.  Can sleep.
7158 */
7159
7160static int
7161sd_unit_attach(dev_info_t *devi)
7162{
7163	struct	scsi_device	*devp;
7164	struct	sd_lun		*un;
7165	char			*variantp;
7166	char			name_str[48];
7167	int	reservation_flag = SD_TARGET_IS_UNRESERVED;
7168	int	instance;
7169	int	rval;
7170	int	wc_enabled;
7171	int	tgt;
7172	uint64_t	capacity;
7173	uint_t		lbasize = 0;
7174	dev_info_t	*pdip = ddi_get_parent(devi);
7175	int		offbyone = 0;
7176	int		geom_label_valid = 0;
7177	sd_ssc_t	*ssc;
7178	int		status;
7179	struct sd_fm_internal	*sfip = NULL;
7180	int		max_xfer_size;
7181
7182	/*
7183	 * Retrieve the target driver's private data area. This was set
7184	 * up by the HBA.
7185	 */
7186	devp = ddi_get_driver_private(devi);
7187
7188	/*
7189	 * Retrieve the target ID of the device.
7190	 */
7191	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
7192	    SCSI_ADDR_PROP_TARGET, -1);
7193
7194	/*
7195	 * Since we have no idea what state things were left in by the last
7196	 * user of the device, set up some 'default' settings, ie. turn 'em
7197	 * off. The scsi_ifsetcap calls force re-negotiations with the drive.
7198	 * Do this before the scsi_probe, which sends an inquiry.
7199	 * This is a fix for bug (4430280).
7200	 * Of special importance is wide-xfer. The drive could have been left
7201	 * in wide transfer mode by the last driver to communicate with it,
7202	 * this includes us. If that's the case, and if the following is not
7203	 * setup properly or we don't re-negotiate with the drive prior to
7204	 * transferring data to/from the drive, it causes bus parity errors,
7205	 * data overruns, and unexpected interrupts. This first occurred when
7206	 * the fix for bug (4378686) was made.
7207	 */
7208	(void) scsi_ifsetcap(&devp->sd_address, "lun-reset", 0, 1);
7209	(void) scsi_ifsetcap(&devp->sd_address, "wide-xfer", 0, 1);
7210	(void) scsi_ifsetcap(&devp->sd_address, "auto-rqsense", 0, 1);
7211
7212	/*
7213	 * Currently, scsi_ifsetcap sets tagged-qing capability for all LUNs
7214	 * on a target. Setting it per lun instance actually sets the
7215	 * capability of this target, which affects those luns already
7216	 * attached on the same target. So during attach, we can only disable
7217	 * this capability only when no other lun has been attached on this
7218	 * target. By doing this, we assume a target has the same tagged-qing
7219	 * capability for every lun. The condition can be removed when HBA
7220	 * is changed to support per lun based tagged-qing capability.
7221	 */
7222	if (sd_scsi_get_target_lun_count(pdip, tgt) < 1) {
7223		(void) scsi_ifsetcap(&devp->sd_address, "tagged-qing", 0, 1);
7224	}
7225
7226	/*
7227	 * Use scsi_probe() to issue an INQUIRY command to the device.
7228	 * This call will allocate and fill in the scsi_inquiry structure
7229	 * and point the sd_inq member of the scsi_device structure to it.
7230	 * If the attach succeeds, then this memory will not be de-allocated
7231	 * (via scsi_unprobe()) until the instance is detached.
7232	 */
7233	if (scsi_probe(devp, SLEEP_FUNC) != SCSIPROBE_EXISTS) {
7234		goto probe_failed;
7235	}
7236
7237	/*
7238	 * Check the device type as specified in the inquiry data and
7239	 * claim it if it is of a type that we support.
7240	 */
7241	switch (devp->sd_inq->inq_dtype) {
7242	case DTYPE_DIRECT:
7243		break;
7244	case DTYPE_RODIRECT:
7245		break;
7246	case DTYPE_OPTICAL:
7247		break;
7248	case DTYPE_NOTPRESENT:
7249	default:
7250		/* Unsupported device type; fail the attach. */
7251		goto probe_failed;
7252	}
7253
7254	/*
7255	 * Allocate the soft state structure for this unit.
7256	 *
7257	 * We rely upon this memory being set to all zeroes by
7258	 * ddi_soft_state_zalloc().  We assume that any member of the
7259	 * soft state structure that is not explicitly initialized by
7260	 * this routine will have a value of zero.
7261	 */
7262	instance = ddi_get_instance(devp->sd_dev);
7263#ifndef XPV_HVM_DRIVER
7264	if (ddi_soft_state_zalloc(sd_state, instance) != DDI_SUCCESS) {
7265		goto probe_failed;
7266	}
7267#endif /* !XPV_HVM_DRIVER */
7268
7269	/*
7270	 * Retrieve a pointer to the newly-allocated soft state.
7271	 *
7272	 * This should NEVER fail if the ddi_soft_state_zalloc() call above
7273	 * was successful, unless something has gone horribly wrong and the
7274	 * ddi's soft state internals are corrupt (in which case it is
7275	 * probably better to halt here than just fail the attach....)
7276	 */
7277	if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
7278		panic("sd_unit_attach: NULL soft state on instance:0x%x",
7279		    instance);
7280		/*NOTREACHED*/
7281	}
7282
7283	/*
7284	 * Link the back ptr of the driver soft state to the scsi_device
7285	 * struct for this lun.
7286	 * Save a pointer to the softstate in the driver-private area of
7287	 * the scsi_device struct.
7288	 * Note: We cannot call SD_INFO, SD_TRACE, SD_ERROR, or SD_DIAG until
7289	 * we first set un->un_sd below.
7290	 */
7291	un->un_sd = devp;
7292	devp->sd_private = (opaque_t)un;
7293
7294	/*
7295	 * The following must be after devp is stored in the soft state struct.
7296	 */
7297#ifdef SDDEBUG
7298	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7299	    "%s_unit_attach: un:0x%p instance:%d\n",
7300	    ddi_driver_name(devi), un, instance);
7301#endif
7302
7303	/*
7304	 * Set up the device type and node type (for the minor nodes).
7305	 * By default we assume that the device can at least support the
7306	 * Common Command Set. Call it a CD-ROM if it reports itself
7307	 * as a RODIRECT device.
7308	 */
7309	switch (devp->sd_inq->inq_dtype) {
7310	case DTYPE_RODIRECT:
7311		un->un_node_type = DDI_NT_CD_CHAN;
7312		un->un_ctype	 = CTYPE_CDROM;
7313		break;
7314	case DTYPE_OPTICAL:
7315		un->un_node_type = DDI_NT_BLOCK_CHAN;
7316		un->un_ctype	 = CTYPE_ROD;
7317		break;
7318	default:
7319		un->un_node_type = DDI_NT_BLOCK_CHAN;
7320		un->un_ctype	 = CTYPE_CCS;
7321		break;
7322	}
7323
7324	/*
7325	 * Try to read the interconnect type from the HBA.
7326	 *
7327	 * Note: This driver is currently compiled as two binaries, a parallel
7328	 * scsi version (sd) and a fibre channel version (ssd). All functional
7329	 * differences are determined at compile time. In the future a single
7330	 * binary will be provided and the interconnect type will be used to
7331	 * differentiate between fibre and parallel scsi behaviors. At that time
7332	 * it will be necessary for all fibre channel HBAs to support this
7333	 * property.
7334	 *
7335	 * set un_f_is_fiber to TRUE ( default fiber )
7336	 */
7337	un->un_f_is_fibre = TRUE;
7338	switch (scsi_ifgetcap(SD_ADDRESS(un), "interconnect-type", -1)) {
7339	case INTERCONNECT_SSA:
7340		un->un_interconnect_type = SD_INTERCONNECT_SSA;
7341		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7342		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SSA\n", un);
7343		break;
7344	case INTERCONNECT_PARALLEL:
7345		un->un_f_is_fibre = FALSE;
7346		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
7347		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7348		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_PARALLEL\n", un);
7349		break;
7350	case INTERCONNECT_SAS:
7351		un->un_f_is_fibre = FALSE;
7352		un->un_interconnect_type = SD_INTERCONNECT_SAS;
7353		un->un_node_type = DDI_NT_BLOCK_SAS;
7354		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7355		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SAS\n", un);
7356		break;
7357	case INTERCONNECT_SATA:
7358		un->un_f_is_fibre = FALSE;
7359		un->un_interconnect_type = SD_INTERCONNECT_SATA;
7360		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7361		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SATA\n", un);
7362		break;
7363	case INTERCONNECT_FIBRE:
7364		un->un_interconnect_type = SD_INTERCONNECT_FIBRE;
7365		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7366		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FIBRE\n", un);
7367		break;
7368	case INTERCONNECT_FABRIC:
7369		un->un_interconnect_type = SD_INTERCONNECT_FABRIC;
7370		un->un_node_type = DDI_NT_BLOCK_FABRIC;
7371		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7372		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FABRIC\n", un);
7373		break;
7374	default:
7375#ifdef SD_DEFAULT_INTERCONNECT_TYPE
7376		/*
7377		 * The HBA does not support the "interconnect-type" property
7378		 * (or did not provide a recognized type).
7379		 *
7380		 * Note: This will be obsoleted when a single fibre channel
7381		 * and parallel scsi driver is delivered. In the meantime the
7382		 * interconnect type will be set to the platform default.If that
7383		 * type is not parallel SCSI, it means that we should be
7384		 * assuming "ssd" semantics. However, here this also means that
7385		 * the FC HBA is not supporting the "interconnect-type" property
7386		 * like we expect it to, so log this occurrence.
7387		 */
7388		un->un_interconnect_type = SD_DEFAULT_INTERCONNECT_TYPE;
7389		if (!SD_IS_PARALLEL_SCSI(un)) {
7390			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7391			    "sd_unit_attach: un:0x%p Assuming "
7392			    "INTERCONNECT_FIBRE\n", un);
7393		} else {
7394			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7395			    "sd_unit_attach: un:0x%p Assuming "
7396			    "INTERCONNECT_PARALLEL\n", un);
7397			un->un_f_is_fibre = FALSE;
7398		}
7399#else
7400		/*
7401		 * Note: This source will be implemented when a single fibre
7402		 * channel and parallel scsi driver is delivered. The default
7403		 * will be to assume that if a device does not support the
7404		 * "interconnect-type" property it is a parallel SCSI HBA and
7405		 * we will set the interconnect type for parallel scsi.
7406		 */
7407		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
7408		un->un_f_is_fibre = FALSE;
7409#endif
7410		break;
7411	}
7412
7413	if (un->un_f_is_fibre == TRUE) {
7414		if (scsi_ifgetcap(SD_ADDRESS(un), "scsi-version", 1) ==
7415		    SCSI_VERSION_3) {
7416			switch (un->un_interconnect_type) {
7417			case SD_INTERCONNECT_FIBRE:
7418			case SD_INTERCONNECT_SSA:
7419				un->un_node_type = DDI_NT_BLOCK_WWN;
7420				break;
7421			default:
7422				break;
7423			}
7424		}
7425	}
7426
7427	/*
7428	 * Initialize the Request Sense command for the target
7429	 */
7430	if (sd_alloc_rqs(devp, un) != DDI_SUCCESS) {
7431		goto alloc_rqs_failed;
7432	}
7433
7434	/*
7435	 * Set un_retry_count with SD_RETRY_COUNT, this is ok for Sparc
7436	 * with separate binary for sd and ssd.
7437	 *
7438	 * x86 has 1 binary, un_retry_count is set base on connection type.
7439	 * The hardcoded values will go away when Sparc uses 1 binary
7440	 * for sd and ssd.  This hardcoded values need to match
7441	 * SD_RETRY_COUNT in sddef.h
7442	 * The value used is base on interconnect type.
7443	 * fibre = 3, parallel = 5
7444	 */
7445#if defined(__i386) || defined(__amd64)
7446	un->un_retry_count = un->un_f_is_fibre ? 3 : 5;
7447#else
7448	un->un_retry_count = SD_RETRY_COUNT;
7449#endif
7450
7451	/*
7452	 * Set the per disk retry count to the default number of retries
7453	 * for disks and CDROMs. This value can be overridden by the
7454	 * disk property list or an entry in sd.conf.
7455	 */
7456	un->un_notready_retry_count =
7457	    ISCD(un) ? CD_NOT_READY_RETRY_COUNT(un)
7458	    : DISK_NOT_READY_RETRY_COUNT(un);
7459
7460	/*
7461	 * Set the busy retry count to the default value of un_retry_count.
7462	 * This can be overridden by entries in sd.conf or the device
7463	 * config table.
7464	 */
7465	un->un_busy_retry_count = un->un_retry_count;
7466
7467	/*
7468	 * Init the reset threshold for retries.  This number determines
7469	 * how many retries must be performed before a reset can be issued
7470	 * (for certain error conditions). This can be overridden by entries
7471	 * in sd.conf or the device config table.
7472	 */
7473	un->un_reset_retry_count = (un->un_retry_count / 2);
7474
7475	/*
7476	 * Set the victim_retry_count to the default un_retry_count
7477	 */
7478	un->un_victim_retry_count = (2 * un->un_retry_count);
7479
7480	/*
7481	 * Set the reservation release timeout to the default value of
7482	 * 5 seconds. This can be overridden by entries in ssd.conf or the
7483	 * device config table.
7484	 */
7485	un->un_reserve_release_time = 5;
7486
7487	/*
7488	 * Set up the default maximum transfer size. Note that this may
7489	 * get updated later in the attach, when setting up default wide
7490	 * operations for disks.
7491	 */
7492#if defined(__i386) || defined(__amd64)
7493	un->un_max_xfer_size = (uint_t)SD_DEFAULT_MAX_XFER_SIZE;
7494	un->un_partial_dma_supported = 1;
7495#else
7496	un->un_max_xfer_size = (uint_t)maxphys;
7497#endif
7498
7499	/*
7500	 * Get "allow bus device reset" property (defaults to "enabled" if
7501	 * the property was not defined). This is to disable bus resets for
7502	 * certain kinds of error recovery. Note: In the future when a run-time
7503	 * fibre check is available the soft state flag should default to
7504	 * enabled.
7505	 */
7506	if (un->un_f_is_fibre == TRUE) {
7507		un->un_f_allow_bus_device_reset = TRUE;
7508	} else {
7509		if (ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
7510		    "allow-bus-device-reset", 1) != 0) {
7511			un->un_f_allow_bus_device_reset = TRUE;
7512			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7513			    "sd_unit_attach: un:0x%p Bus device reset "
7514			    "enabled\n", un);
7515		} else {
7516			un->un_f_allow_bus_device_reset = FALSE;
7517			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7518			    "sd_unit_attach: un:0x%p Bus device reset "
7519			    "disabled\n", un);
7520		}
7521	}
7522
7523	/*
7524	 * Check if this is an ATAPI device. ATAPI devices use Group 1
7525	 * Read/Write commands and Group 2 Mode Sense/Select commands.
7526	 *
7527	 * Note: The "obsolete" way of doing this is to check for the "atapi"
7528	 * property. The new "variant" property with a value of "atapi" has been
7529	 * introduced so that future 'variants' of standard SCSI behavior (like
7530	 * atapi) could be specified by the underlying HBA drivers by supplying
7531	 * a new value for the "variant" property, instead of having to define a
7532	 * new property.
7533	 */
7534	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "atapi", -1) != -1) {
7535		un->un_f_cfg_is_atapi = TRUE;
7536		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7537		    "sd_unit_attach: un:0x%p Atapi device\n", un);
7538	}
7539	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, devi, 0, "variant",
7540	    &variantp) == DDI_PROP_SUCCESS) {
7541		if (strcmp(variantp, "atapi") == 0) {
7542			un->un_f_cfg_is_atapi = TRUE;
7543			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7544			    "sd_unit_attach: un:0x%p Atapi device\n", un);
7545		}
7546		ddi_prop_free(variantp);
7547	}
7548
7549	un->un_cmd_timeout	= SD_IO_TIME;
7550
7551	un->un_busy_timeout  = SD_BSY_TIMEOUT;
7552
7553	/* Info on current states, statuses, etc. (Updated frequently) */
7554	un->un_state		= SD_STATE_NORMAL;
7555	un->un_last_state	= SD_STATE_NORMAL;
7556
7557	/* Control & status info for command throttling */
7558	un->un_throttle		= sd_max_throttle;
7559	un->un_saved_throttle	= sd_max_throttle;
7560	un->un_min_throttle	= sd_min_throttle;
7561
7562	if (un->un_f_is_fibre == TRUE) {
7563		un->un_f_use_adaptive_throttle = TRUE;
7564	} else {
7565		un->un_f_use_adaptive_throttle = FALSE;
7566	}
7567
7568	/* Removable media support. */
7569	cv_init(&un->un_state_cv, NULL, CV_DRIVER, NULL);
7570	un->un_mediastate		= DKIO_NONE;
7571	un->un_specified_mediastate	= DKIO_NONE;
7572
7573	/* CVs for suspend/resume (PM or DR) */
7574	cv_init(&un->un_suspend_cv,   NULL, CV_DRIVER, NULL);
7575	cv_init(&un->un_disk_busy_cv, NULL, CV_DRIVER, NULL);
7576
7577	/* Power management support. */
7578	un->un_power_level = SD_SPINDLE_UNINIT;
7579
7580	cv_init(&un->un_wcc_cv,   NULL, CV_DRIVER, NULL);
7581	un->un_f_wcc_inprog = 0;
7582
7583	/*
7584	 * The open/close semaphore is used to serialize threads executing
7585	 * in the driver's open & close entry point routines for a given
7586	 * instance.
7587	 */
7588	(void) sema_init(&un->un_semoclose, 1, NULL, SEMA_DRIVER, NULL);
7589
7590	/*
7591	 * The conf file entry and softstate variable is a forceful override,
7592	 * meaning a non-zero value must be entered to change the default.
7593	 */
7594	un->un_f_disksort_disabled = FALSE;
7595	un->un_f_rmw_type = SD_RMW_TYPE_DEFAULT;
7596	un->un_f_enable_rmw = FALSE;
7597
7598	/*
7599	 * GET EVENT STATUS NOTIFICATION media polling enabled by default, but
7600	 * can be overridden via [s]sd-config-list "mmc-gesn-polling" property.
7601	 */
7602	un->un_f_mmc_gesn_polling = TRUE;
7603
7604	/*
7605	 * Retrieve the properties from the static driver table or the driver
7606	 * configuration file (.conf) for this unit and update the soft state
7607	 * for the device as needed for the indicated properties.
7608	 * Note: the property configuration needs to occur here as some of the
7609	 * following routines may have dependencies on soft state flags set
7610	 * as part of the driver property configuration.
7611	 */
7612	sd_read_unit_properties(un);
7613	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7614	    "sd_unit_attach: un:0x%p property configuration complete.\n", un);
7615
7616	/*
7617	 * Only if a device has "hotpluggable" property, it is
7618	 * treated as hotpluggable device. Otherwise, it is
7619	 * regarded as non-hotpluggable one.
7620	 */
7621	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "hotpluggable",
7622	    -1) != -1) {
7623		un->un_f_is_hotpluggable = TRUE;
7624	}
7625
7626	/*
7627	 * set unit's attributes(flags) according to "hotpluggable" and
7628	 * RMB bit in INQUIRY data.
7629	 */
7630	sd_set_unit_attributes(un, devi);
7631
7632	/*
7633	 * By default, we mark the capacity, lbasize, and geometry
7634	 * as invalid. Only if we successfully read a valid capacity
7635	 * will we update the un_blockcount and un_tgt_blocksize with the
7636	 * valid values (the geometry will be validated later).
7637	 */
7638	un->un_f_blockcount_is_valid	= FALSE;
7639	un->un_f_tgt_blocksize_is_valid	= FALSE;
7640
7641	/*
7642	 * Use DEV_BSIZE and DEV_BSHIFT as defaults, until we can determine
7643	 * otherwise.
7644	 */
7645	un->un_tgt_blocksize  = un->un_sys_blocksize  = DEV_BSIZE;
7646	un->un_blockcount = 0;
7647
7648	/*
7649	 * physical sector size default to DEV_BSIZE currently.
7650	 */
7651	un->un_phy_blocksize = DEV_BSIZE;
7652
7653	/*
7654	 * Set up the per-instance info needed to determine the correct
7655	 * CDBs and other info for issuing commands to the target.
7656	 */
7657	sd_init_cdb_limits(un);
7658
7659	/*
7660	 * Set up the IO chains to use, based upon the target type.
7661	 */
7662	if (un->un_f_non_devbsize_supported) {
7663		un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
7664	} else {
7665		un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
7666	}
7667	un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD;
7668	un->un_direct_chain_type = SD_CHAIN_INFO_DIRECT_CMD;
7669	un->un_priority_chain_type = SD_CHAIN_INFO_PRIORITY_CMD;
7670
7671	un->un_xbuf_attr = ddi_xbuf_attr_create(sizeof (struct sd_xbuf),
7672	    sd_xbuf_strategy, un, sd_xbuf_active_limit,  sd_xbuf_reserve_limit,
7673	    ddi_driver_major(devi), DDI_XBUF_QTHREAD_DRIVER);
7674	ddi_xbuf_attr_register_devinfo(un->un_xbuf_attr, devi);
7675
7676
7677	if (ISCD(un)) {
7678		un->un_additional_codes = sd_additional_codes;
7679	} else {
7680		un->un_additional_codes = NULL;
7681	}
7682
7683	/*
7684	 * Create the kstats here so they can be available for attach-time
7685	 * routines that send commands to the unit (either polled or via
7686	 * sd_send_scsi_cmd).
7687	 *
7688	 * Note: This is a critical sequence that needs to be maintained:
7689	 *	1) Instantiate the kstats here, before any routines using the
7690	 *	   iopath (i.e. sd_send_scsi_cmd).
7691	 *	2) Instantiate and initialize the partition stats
7692	 *	   (sd_set_pstats).
7693	 *	3) Initialize the error stats (sd_set_errstats), following
7694	 *	   sd_validate_geometry(),sd_register_devid(),
7695	 *	   and sd_cache_control().
7696	 */
7697
7698	un->un_stats = kstat_create(sd_label, instance,
7699	    NULL, "disk", KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT);
7700	if (un->un_stats != NULL) {
7701		un->un_stats->ks_lock = SD_MUTEX(un);
7702		kstat_install(un->un_stats);
7703	}
7704	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7705	    "sd_unit_attach: un:0x%p un_stats created\n", un);
7706
7707	sd_create_errstats(un, instance);
7708	if (un->un_errstats == NULL) {
7709		goto create_errstats_failed;
7710	}
7711	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7712	    "sd_unit_attach: un:0x%p errstats created\n", un);
7713
7714	/*
7715	 * The following if/else code was relocated here from below as part
7716	 * of the fix for bug (4430280). However with the default setup added
7717	 * on entry to this routine, it's no longer absolutely necessary for
7718	 * this to be before the call to sd_spin_up_unit.
7719	 */
7720	if (SD_IS_PARALLEL_SCSI(un) || SD_IS_SERIAL(un)) {
7721		int tq_trigger_flag = (((devp->sd_inq->inq_ansi == 4) ||
7722		    (devp->sd_inq->inq_ansi == 5)) &&
7723		    devp->sd_inq->inq_bque) || devp->sd_inq->inq_cmdque;
7724
7725		/*
7726		 * If tagged queueing is supported by the target
7727		 * and by the host adapter then we will enable it
7728		 */
7729		un->un_tagflags = 0;
7730		if ((devp->sd_inq->inq_rdf == RDF_SCSI2) && tq_trigger_flag &&
7731		    (un->un_f_arq_enabled == TRUE)) {
7732			if (scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing",
7733			    1, 1) == 1) {
7734				un->un_tagflags = FLAG_STAG;
7735				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7736				    "sd_unit_attach: un:0x%p tag queueing "
7737				    "enabled\n", un);
7738			} else if (scsi_ifgetcap(SD_ADDRESS(un),
7739			    "untagged-qing", 0) == 1) {
7740				un->un_f_opt_queueing = TRUE;
7741				un->un_saved_throttle = un->un_throttle =
7742				    min(un->un_throttle, 3);
7743			} else {
7744				un->un_f_opt_queueing = FALSE;
7745				un->un_saved_throttle = un->un_throttle = 1;
7746			}
7747		} else if ((scsi_ifgetcap(SD_ADDRESS(un), "untagged-qing", 0)
7748		    == 1) && (un->un_f_arq_enabled == TRUE)) {
7749			/* The Host Adapter supports internal queueing. */
7750			un->un_f_opt_queueing = TRUE;
7751			un->un_saved_throttle = un->un_throttle =
7752			    min(un->un_throttle, 3);
7753		} else {
7754			un->un_f_opt_queueing = FALSE;
7755			un->un_saved_throttle = un->un_throttle = 1;
7756			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7757			    "sd_unit_attach: un:0x%p no tag queueing\n", un);
7758		}
7759
7760		/*
7761		 * Enable large transfers for SATA/SAS drives
7762		 */
7763		if (SD_IS_SERIAL(un)) {
7764			un->un_max_xfer_size =
7765			    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
7766			    sd_max_xfer_size, SD_MAX_XFER_SIZE);
7767			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7768			    "sd_unit_attach: un:0x%p max transfer "
7769			    "size=0x%x\n", un, un->un_max_xfer_size);
7770
7771		}
7772
7773		/* Setup or tear down default wide operations for disks */
7774
7775		/*
7776		 * Note: Legacy: it may be possible for both "sd_max_xfer_size"
7777		 * and "ssd_max_xfer_size" to exist simultaneously on the same
7778		 * system and be set to different values. In the future this
7779		 * code may need to be updated when the ssd module is
7780		 * obsoleted and removed from the system. (4299588)
7781		 */
7782		if (SD_IS_PARALLEL_SCSI(un) &&
7783		    (devp->sd_inq->inq_rdf == RDF_SCSI2) &&
7784		    (devp->sd_inq->inq_wbus16 || devp->sd_inq->inq_wbus32)) {
7785			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
7786			    1, 1) == 1) {
7787				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7788				    "sd_unit_attach: un:0x%p Wide Transfer "
7789				    "enabled\n", un);
7790			}
7791
7792			/*
7793			 * If tagged queuing has also been enabled, then
7794			 * enable large xfers
7795			 */
7796			if (un->un_saved_throttle == sd_max_throttle) {
7797				un->un_max_xfer_size =
7798				    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
7799				    sd_max_xfer_size, SD_MAX_XFER_SIZE);
7800				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7801				    "sd_unit_attach: un:0x%p max transfer "
7802				    "size=0x%x\n", un, un->un_max_xfer_size);
7803			}
7804		} else {
7805			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
7806			    0, 1) == 1) {
7807				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7808				    "sd_unit_attach: un:0x%p "
7809				    "Wide Transfer disabled\n", un);
7810			}
7811		}
7812	} else {
7813		un->un_tagflags = FLAG_STAG;
7814		un->un_max_xfer_size = ddi_getprop(DDI_DEV_T_ANY,
7815		    devi, 0, sd_max_xfer_size, SD_MAX_XFER_SIZE);
7816	}
7817
7818	/*
7819	 * If this target supports LUN reset, try to enable it.
7820	 */
7821	if (un->un_f_lun_reset_enabled) {
7822		if (scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 1, 1) == 1) {
7823			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
7824			    "un:0x%p lun_reset capability set\n", un);
7825		} else {
7826			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
7827			    "un:0x%p lun-reset capability not set\n", un);
7828		}
7829	}
7830
7831	/*
7832	 * Adjust the maximum transfer size. This is to fix
7833	 * the problem of partial DMA support on SPARC. Some
7834	 * HBA driver, like aac, has very small dma_attr_maxxfer
7835	 * size, which requires partial DMA support on SPARC.
7836	 * In the future the SPARC pci nexus driver may solve
7837	 * the problem instead of this fix.
7838	 */
7839	max_xfer_size = scsi_ifgetcap(SD_ADDRESS(un), "dma-max", 1);
7840	if ((max_xfer_size > 0) && (max_xfer_size < un->un_max_xfer_size)) {
7841		/* We need DMA partial even on sparc to ensure sddump() works */
7842		un->un_max_xfer_size = max_xfer_size;
7843		if (un->un_partial_dma_supported == 0)
7844			un->un_partial_dma_supported = 1;
7845	}
7846	if (ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
7847	    DDI_PROP_DONTPASS, "buf_break", 0) == 1) {
7848		if (ddi_xbuf_attr_setup_brk(un->un_xbuf_attr,
7849		    un->un_max_xfer_size) == 1) {
7850			un->un_buf_breakup_supported = 1;
7851			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
7852			    "un:0x%p Buf breakup enabled\n", un);
7853		}
7854	}
7855
7856	/*
7857	 * Set PKT_DMA_PARTIAL flag.
7858	 */
7859	if (un->un_partial_dma_supported == 1) {
7860		un->un_pkt_flags = PKT_DMA_PARTIAL;
7861	} else {
7862		un->un_pkt_flags = 0;
7863	}
7864
7865	/* Initialize sd_ssc_t for internal uscsi commands */
7866	ssc = sd_ssc_init(un);
7867	scsi_fm_init(devp);
7868
7869	/*
7870	 * Allocate memory for SCSI FMA stuffs.
7871	 */
7872	un->un_fm_private =
7873	    kmem_zalloc(sizeof (struct sd_fm_internal), KM_SLEEP);
7874	sfip = (struct sd_fm_internal *)un->un_fm_private;
7875	sfip->fm_ssc.ssc_uscsi_cmd = &sfip->fm_ucmd;
7876	sfip->fm_ssc.ssc_uscsi_info = &sfip->fm_uinfo;
7877	sfip->fm_ssc.ssc_un = un;
7878
7879	if (ISCD(un) ||
7880	    un->un_f_has_removable_media ||
7881	    devp->sd_fm_capable == DDI_FM_NOT_CAPABLE) {
7882		/*
7883		 * We don't touch CDROM or the DDI_FM_NOT_CAPABLE device.
7884		 * Their log are unchanged.
7885		 */
7886		sfip->fm_log_level = SD_FM_LOG_NSUP;
7887	} else {
7888		/*
7889		 * If enter here, it should be non-CDROM and FM-capable
7890		 * device, and it will not keep the old scsi_log as before
7891		 * in /var/adm/messages. However, the property
7892		 * "fm-scsi-log" will control whether the FM telemetry will
7893		 * be logged in /var/adm/messages.
7894		 */
7895		int fm_scsi_log;
7896		fm_scsi_log = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
7897		    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, "fm-scsi-log", 0);
7898
7899		if (fm_scsi_log)
7900			sfip->fm_log_level = SD_FM_LOG_EREPORT;
7901		else
7902			sfip->fm_log_level = SD_FM_LOG_SILENT;
7903	}
7904
7905	/*
7906	 * At this point in the attach, we have enough info in the
7907	 * soft state to be able to issue commands to the target.
7908	 *
7909	 * All command paths used below MUST issue their commands as
7910	 * SD_PATH_DIRECT. This is important as intermediate layers
7911	 * are not all initialized yet (such as PM).
7912	 */
7913
7914	/*
7915	 * Send a TEST UNIT READY command to the device. This should clear
7916	 * any outstanding UNIT ATTENTION that may be present.
7917	 *
7918	 * Note: Don't check for success, just track if there is a reservation,
7919	 * this is a throw away command to clear any unit attentions.
7920	 *
7921	 * Note: This MUST be the first command issued to the target during
7922	 * attach to ensure power on UNIT ATTENTIONS are cleared.
7923	 * Pass in flag SD_DONT_RETRY_TUR to prevent the long delays associated
7924	 * with attempts at spinning up a device with no media.
7925	 */
7926	status = sd_send_scsi_TEST_UNIT_READY(ssc, SD_DONT_RETRY_TUR);
7927	if (status != 0) {
7928		if (status == EACCES)
7929			reservation_flag = SD_TARGET_IS_RESERVED;
7930		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
7931	}
7932
7933	/*
7934	 * If the device is NOT a removable media device, attempt to spin
7935	 * it up (using the START_STOP_UNIT command) and read its capacity
7936	 * (using the READ CAPACITY command).  Note, however, that either
7937	 * of these could fail and in some cases we would continue with
7938	 * the attach despite the failure (see below).
7939	 */
7940	if (un->un_f_descr_format_supported) {
7941
7942		switch (sd_spin_up_unit(ssc)) {
7943		case 0:
7944			/*
7945			 * Spin-up was successful; now try to read the
7946			 * capacity.  If successful then save the results
7947			 * and mark the capacity & lbasize as valid.
7948			 */
7949			SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7950			    "sd_unit_attach: un:0x%p spin-up successful\n", un);
7951
7952			status = sd_send_scsi_READ_CAPACITY(ssc, &capacity,
7953			    &lbasize, SD_PATH_DIRECT);
7954
7955			switch (status) {
7956			case 0: {
7957				if (capacity > DK_MAX_BLOCKS) {
7958#ifdef _LP64
7959					if ((capacity + 1) >
7960					    SD_GROUP1_MAX_ADDRESS) {
7961						/*
7962						 * Enable descriptor format
7963						 * sense data so that we can
7964						 * get 64 bit sense data
7965						 * fields.
7966						 */
7967						sd_enable_descr_sense(ssc);
7968					}
7969#else
7970					/* 32-bit kernels can't handle this */
7971					scsi_log(SD_DEVINFO(un),
7972					    sd_label, CE_WARN,
7973					    "disk has %llu blocks, which "
7974					    "is too large for a 32-bit "
7975					    "kernel", capacity);
7976
7977#if defined(__i386) || defined(__amd64)
7978					/*
7979					 * 1TB disk was treated as (1T - 512)B
7980					 * in the past, so that it might have
7981					 * valid VTOC and solaris partitions,
7982					 * we have to allow it to continue to
7983					 * work.
7984					 */
7985					if (capacity -1 > DK_MAX_BLOCKS)
7986#endif
7987					goto spinup_failed;
7988#endif
7989				}
7990
7991				/*
7992				 * Here it's not necessary to check the case:
7993				 * the capacity of the device is bigger than
7994				 * what the max hba cdb can support. Because
7995				 * sd_send_scsi_READ_CAPACITY will retrieve
7996				 * the capacity by sending USCSI command, which
7997				 * is constrained by the max hba cdb. Actually,
7998				 * sd_send_scsi_READ_CAPACITY will return
7999				 * EINVAL when using bigger cdb than required
8000				 * cdb length. Will handle this case in
8001				 * "case EINVAL".
8002				 */
8003
8004				/*
8005				 * The following relies on
8006				 * sd_send_scsi_READ_CAPACITY never
8007				 * returning 0 for capacity and/or lbasize.
8008				 */
8009				sd_update_block_info(un, lbasize, capacity);
8010
8011				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8012				    "sd_unit_attach: un:0x%p capacity = %ld "
8013				    "blocks; lbasize= %ld.\n", un,
8014				    un->un_blockcount, un->un_tgt_blocksize);
8015
8016				break;
8017			}
8018			case EINVAL:
8019				/*
8020				 * In the case where the max-cdb-length property
8021				 * is smaller than the required CDB length for
8022				 * a SCSI device, a target driver can fail to
8023				 * attach to that device.
8024				 */
8025				scsi_log(SD_DEVINFO(un),
8026				    sd_label, CE_WARN,
8027				    "disk capacity is too large "
8028				    "for current cdb length");
8029				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
8030
8031				goto spinup_failed;
8032			case EACCES:
8033				/*
8034				 * Should never get here if the spin-up
8035				 * succeeded, but code it in anyway.
8036				 * From here, just continue with the attach...
8037				 */
8038				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8039				    "sd_unit_attach: un:0x%p "
8040				    "sd_send_scsi_READ_CAPACITY "
8041				    "returned reservation conflict\n", un);
8042				reservation_flag = SD_TARGET_IS_RESERVED;
8043				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
8044				break;
8045			default:
8046				/*
8047				 * Likewise, should never get here if the
8048				 * spin-up succeeded. Just continue with
8049				 * the attach...
8050				 */
8051				if (status == EIO)
8052					sd_ssc_assessment(ssc,
8053					    SD_FMT_STATUS_CHECK);
8054				else
8055					sd_ssc_assessment(ssc,
8056					    SD_FMT_IGNORE);
8057				break;
8058			}
8059			break;
8060		case EACCES:
8061			/*
8062			 * Device is reserved by another host.  In this case
8063			 * we could not spin it up or read the capacity, but
8064			 * we continue with the attach anyway.
8065			 */
8066			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8067			    "sd_unit_attach: un:0x%p spin-up reservation "
8068			    "conflict.\n", un);
8069			reservation_flag = SD_TARGET_IS_RESERVED;
8070			break;
8071		default:
8072			/* Fail the attach if the spin-up failed. */
8073			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8074			    "sd_unit_attach: un:0x%p spin-up failed.", un);
8075			goto spinup_failed;
8076		}
8077
8078	}
8079
8080	/*
8081	 * Check to see if this is a MMC drive
8082	 */
8083	if (ISCD(un)) {
8084		sd_set_mmc_caps(ssc);
8085	}
8086
8087	/*
8088	 * Add a zero-length attribute to tell the world we support
8089	 * kernel ioctls (for layered drivers)
8090	 */
8091	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
8092	    DDI_KERNEL_IOCTL, NULL, 0);
8093
8094	/*
8095	 * Add a boolean property to tell the world we support
8096	 * the B_FAILFAST flag (for layered drivers)
8097	 */
8098	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
8099	    "ddi-failfast-supported", NULL, 0);
8100
8101	/*
8102	 * Initialize power management
8103	 */
8104	mutex_init(&un->un_pm_mutex, NULL, MUTEX_DRIVER, NULL);
8105	cv_init(&un->un_pm_busy_cv, NULL, CV_DRIVER, NULL);
8106	sd_setup_pm(ssc, devi);
8107	if (un->un_f_pm_is_enabled == FALSE) {
8108		/*
8109		 * For performance, point to a jump table that does
8110		 * not include pm.
8111		 * The direct and priority chains don't change with PM.
8112		 *
8113		 * Note: this is currently done based on individual device
8114		 * capabilities. When an interface for determining system
8115		 * power enabled state becomes available, or when additional
8116		 * layers are added to the command chain, these values will
8117		 * have to be re-evaluated for correctness.
8118		 */
8119		if (un->un_f_non_devbsize_supported) {
8120			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA_NO_PM;
8121		} else {
8122			un->un_buf_chain_type = SD_CHAIN_INFO_DISK_NO_PM;
8123		}
8124		un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
8125	}
8126
8127	/*
8128	 * This property is set to 0 by HA software to avoid retries
8129	 * on a reserved disk. (The preferred property name is
8130	 * "retry-on-reservation-conflict") (1189689)
8131	 *
8132	 * Note: The use of a global here can have unintended consequences. A
8133	 * per instance variable is preferable to match the capabilities of
8134	 * different underlying hba's (4402600)
8135	 */
8136	sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY, devi,
8137	    DDI_PROP_DONTPASS, "retry-on-reservation-conflict",
8138	    sd_retry_on_reservation_conflict);
8139	if (sd_retry_on_reservation_conflict != 0) {
8140		sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY,
8141		    devi, DDI_PROP_DONTPASS, sd_resv_conflict_name,
8142		    sd_retry_on_reservation_conflict);
8143	}
8144
8145	/* Set up options for QFULL handling. */
8146	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8147	    "qfull-retries", -1)) != -1) {
8148		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retries",
8149		    rval, 1);
8150	}
8151	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8152	    "qfull-retry-interval", -1)) != -1) {
8153		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retry-interval",
8154		    rval, 1);
8155	}
8156
8157	/*
8158	 * This just prints a message that announces the existence of the
8159	 * device. The message is always printed in the system logfile, but
8160	 * only appears on the console if the system is booted with the
8161	 * -v (verbose) argument.
8162	 */
8163	ddi_report_dev(devi);
8164
8165	un->un_mediastate = DKIO_NONE;
8166
8167	/*
8168	 * Check if this is a SSD(Solid State Drive).
8169	 */
8170	sd_check_solid_state(ssc);
8171
8172	/*
8173	 * Check whether the drive is in emulation mode.
8174	 */
8175	sd_check_emulation_mode(ssc);
8176
8177	cmlb_alloc_handle(&un->un_cmlbhandle);
8178
8179#if defined(__i386) || defined(__amd64)
8180	/*
8181	 * On x86, compensate for off-by-1 legacy error
8182	 */
8183	if (!un->un_f_has_removable_media && !un->un_f_is_hotpluggable &&
8184	    (lbasize == un->un_sys_blocksize))
8185		offbyone = CMLB_OFF_BY_ONE;
8186#endif
8187
8188	if (cmlb_attach(devi, &sd_tgops, (int)devp->sd_inq->inq_dtype,
8189	    VOID2BOOLEAN(un->un_f_has_removable_media != 0),
8190	    VOID2BOOLEAN(un->un_f_is_hotpluggable != 0),
8191	    un->un_node_type, offbyone, un->un_cmlbhandle,
8192	    (void *)SD_PATH_DIRECT) != 0) {
8193		goto cmlb_attach_failed;
8194	}
8195
8196
8197	/*
8198	 * Read and validate the device's geometry (ie, disk label)
8199	 * A new unformatted drive will not have a valid geometry, but
8200	 * the driver needs to successfully attach to this device so
8201	 * the drive can be formatted via ioctls.
8202	 */
8203	geom_label_valid = (cmlb_validate(un->un_cmlbhandle, 0,
8204	    (void *)SD_PATH_DIRECT) == 0) ? 1: 0;
8205
8206	mutex_enter(SD_MUTEX(un));
8207
8208	/*
8209	 * Read and initialize the devid for the unit.
8210	 */
8211	if (un->un_f_devid_supported) {
8212		sd_register_devid(ssc, devi, reservation_flag);
8213	}
8214	mutex_exit(SD_MUTEX(un));
8215
8216#if (defined(__fibre))
8217	/*
8218	 * Register callbacks for fibre only.  You can't do this solely
8219	 * on the basis of the devid_type because this is hba specific.
8220	 * We need to query our hba capabilities to find out whether to
8221	 * register or not.
8222	 */
8223	if (un->un_f_is_fibre) {
8224		if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
8225			sd_init_event_callbacks(un);
8226			SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8227			    "sd_unit_attach: un:0x%p event callbacks inserted",
8228			    un);
8229		}
8230	}
8231#endif
8232
8233	if (un->un_f_opt_disable_cache == TRUE) {
8234		/*
8235		 * Disable both read cache and write cache.  This is
8236		 * the historic behavior of the keywords in the config file.
8237		 */
8238		if (sd_cache_control(ssc, SD_CACHE_DISABLE, SD_CACHE_DISABLE) !=
8239		    0) {
8240			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8241			    "sd_unit_attach: un:0x%p Could not disable "
8242			    "caching", un);
8243			goto devid_failed;
8244		}
8245	}
8246
8247	/*
8248	 * Check the value of the WCE bit now and
8249	 * set un_f_write_cache_enabled accordingly.
8250	 */
8251	(void) sd_get_write_cache_enabled(ssc, &wc_enabled);
8252	mutex_enter(SD_MUTEX(un));
8253	un->un_f_write_cache_enabled = (wc_enabled != 0);
8254	mutex_exit(SD_MUTEX(un));
8255
8256	if ((un->un_f_rmw_type != SD_RMW_TYPE_RETURN_ERROR &&
8257	    un->un_tgt_blocksize != DEV_BSIZE) ||
8258	    un->un_f_enable_rmw) {
8259		if (!(un->un_wm_cache)) {
8260			(void) snprintf(name_str, sizeof (name_str),
8261			    "%s%d_cache",
8262			    ddi_driver_name(SD_DEVINFO(un)),
8263			    ddi_get_instance(SD_DEVINFO(un)));
8264			un->un_wm_cache = kmem_cache_create(
8265			    name_str, sizeof (struct sd_w_map),
8266			    8, sd_wm_cache_constructor,
8267			    sd_wm_cache_destructor, NULL,
8268			    (void *)un, NULL, 0);
8269			if (!(un->un_wm_cache)) {
8270				goto wm_cache_failed;
8271			}
8272		}
8273	}
8274
8275	/*
8276	 * Check the value of the NV_SUP bit and set
8277	 * un_f_suppress_cache_flush accordingly.
8278	 */
8279	sd_get_nv_sup(ssc);
8280
8281	/*
8282	 * Find out what type of reservation this disk supports.
8283	 */
8284	status = sd_send_scsi_PERSISTENT_RESERVE_IN(ssc, SD_READ_KEYS, 0, NULL);
8285
8286	switch (status) {
8287	case 0:
8288		/*
8289		 * SCSI-3 reservations are supported.
8290		 */
8291		un->un_reservation_type = SD_SCSI3_RESERVATION;
8292		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8293		    "sd_unit_attach: un:0x%p SCSI-3 reservations\n", un);
8294		break;
8295	case ENOTSUP:
8296		/*
8297		 * The PERSISTENT RESERVE IN command would not be recognized by
8298		 * a SCSI-2 device, so assume the reservation type is SCSI-2.
8299		 */
8300		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8301		    "sd_unit_attach: un:0x%p SCSI-2 reservations\n", un);
8302		un->un_reservation_type = SD_SCSI2_RESERVATION;
8303
8304		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
8305		break;
8306	default:
8307		/*
8308		 * default to SCSI-3 reservations
8309		 */
8310		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8311		    "sd_unit_attach: un:0x%p default SCSI3 reservations\n", un);
8312		un->un_reservation_type = SD_SCSI3_RESERVATION;
8313
8314		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
8315		break;
8316	}
8317
8318	/*
8319	 * Set the pstat and error stat values here, so data obtained during the
8320	 * previous attach-time routines is available.
8321	 *
8322	 * Note: This is a critical sequence that needs to be maintained:
8323	 *	1) Instantiate the kstats before any routines using the iopath
8324	 *	   (i.e. sd_send_scsi_cmd).
8325	 *	2) Initialize the error stats (sd_set_errstats) and partition
8326	 *	   stats (sd_set_pstats)here, following
8327	 *	   cmlb_validate_geometry(), sd_register_devid(), and
8328	 *	   sd_cache_control().
8329	 */
8330
8331	if (un->un_f_pkstats_enabled && geom_label_valid) {
8332		sd_set_pstats(un);
8333		SD_TRACE(SD_LOG_IO_PARTITION, un,
8334		    "sd_unit_attach: un:0x%p pstats created and set\n", un);
8335	}
8336
8337	sd_set_errstats(un);
8338	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8339	    "sd_unit_attach: un:0x%p errstats set\n", un);
8340
8341
8342	/*
8343	 * After successfully attaching an instance, we record the information
8344	 * of how many luns have been attached on the relative target and
8345	 * controller for parallel SCSI. This information is used when sd tries
8346	 * to set the tagged queuing capability in HBA.
8347	 */
8348	if (SD_IS_PARALLEL_SCSI(un) && (tgt >= 0) && (tgt < NTARGETS_WIDE)) {
8349		sd_scsi_update_lun_on_target(pdip, tgt, SD_SCSI_LUN_ATTACH);
8350	}
8351
8352	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8353	    "sd_unit_attach: un:0x%p exit success\n", un);
8354
8355	/* Uninitialize sd_ssc_t pointer */
8356	sd_ssc_fini(ssc);
8357
8358	return (DDI_SUCCESS);
8359
8360	/*
8361	 * An error occurred during the attach; clean up & return failure.
8362	 */
8363wm_cache_failed:
8364devid_failed:
8365
8366setup_pm_failed:
8367	ddi_remove_minor_node(devi, NULL);
8368
8369cmlb_attach_failed:
8370	/*
8371	 * Cleanup from the scsi_ifsetcap() calls (437868)
8372	 */
8373	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
8374	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
8375
8376	/*
8377	 * Refer to the comments of setting tagged-qing in the beginning of
8378	 * sd_unit_attach. We can only disable tagged queuing when there is
8379	 * no lun attached on the target.
8380	 */
8381	if (sd_scsi_get_target_lun_count(pdip, tgt) < 1) {
8382		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
8383	}
8384
8385	if (un->un_f_is_fibre == FALSE) {
8386		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
8387	}
8388
8389spinup_failed:
8390
8391	/* Uninitialize sd_ssc_t pointer */
8392	sd_ssc_fini(ssc);
8393
8394	mutex_enter(SD_MUTEX(un));
8395
8396	/* Deallocate SCSI FMA memory spaces */
8397	kmem_free(un->un_fm_private, sizeof (struct sd_fm_internal));
8398
8399	/* Cancel callback for SD_PATH_DIRECT_PRIORITY cmd. restart */
8400	if (un->un_direct_priority_timeid != NULL) {
8401		timeout_id_t temp_id = un->un_direct_priority_timeid;
8402		un->un_direct_priority_timeid = NULL;
8403		mutex_exit(SD_MUTEX(un));
8404		(void) untimeout(temp_id);
8405		mutex_enter(SD_MUTEX(un));
8406	}
8407
8408	/* Cancel any pending start/stop timeouts */
8409	if (un->un_startstop_timeid != NULL) {
8410		timeout_id_t temp_id = un->un_startstop_timeid;
8411		un->un_startstop_timeid = NULL;
8412		mutex_exit(SD_MUTEX(un));
8413		(void) untimeout(temp_id);
8414		mutex_enter(SD_MUTEX(un));
8415	}
8416
8417	/* Cancel any pending reset-throttle timeouts */
8418	if (un->un_reset_throttle_timeid != NULL) {
8419		timeout_id_t temp_id = un->un_reset_throttle_timeid;
8420		un->un_reset_throttle_timeid = NULL;
8421		mutex_exit(SD_MUTEX(un));
8422		(void) untimeout(temp_id);
8423		mutex_enter(SD_MUTEX(un));
8424	}
8425
8426	/* Cancel rmw warning message timeouts */
8427	if (un->un_rmw_msg_timeid != NULL) {
8428		timeout_id_t temp_id = un->un_rmw_msg_timeid;
8429		un->un_rmw_msg_timeid = NULL;
8430		mutex_exit(SD_MUTEX(un));
8431		(void) untimeout(temp_id);
8432		mutex_enter(SD_MUTEX(un));
8433	}
8434
8435	/* Cancel any pending retry timeouts */
8436	if (un->un_retry_timeid != NULL) {
8437		timeout_id_t temp_id = un->un_retry_timeid;
8438		un->un_retry_timeid = NULL;
8439		mutex_exit(SD_MUTEX(un));
8440		(void) untimeout(temp_id);
8441		mutex_enter(SD_MUTEX(un));
8442	}
8443
8444	/* Cancel any pending delayed cv broadcast timeouts */
8445	if (un->un_dcvb_timeid != NULL) {
8446		timeout_id_t temp_id = un->un_dcvb_timeid;
8447		un->un_dcvb_timeid = NULL;
8448		mutex_exit(SD_MUTEX(un));
8449		(void) untimeout(temp_id);
8450		mutex_enter(SD_MUTEX(un));
8451	}
8452
8453	mutex_exit(SD_MUTEX(un));
8454
8455	/* There should not be any in-progress I/O so ASSERT this check */
8456	ASSERT(un->un_ncmds_in_transport == 0);
8457	ASSERT(un->un_ncmds_in_driver == 0);
8458
8459	/* Do not free the softstate if the callback routine is active */
8460	sd_sync_with_callback(un);
8461
8462	/*
8463	 * Partition stats apparently are not used with removables. These would
8464	 * not have been created during attach, so no need to clean them up...
8465	 */
8466	if (un->un_errstats != NULL) {
8467		kstat_delete(un->un_errstats);
8468		un->un_errstats = NULL;
8469	}
8470
8471create_errstats_failed:
8472
8473	if (un->un_stats != NULL) {
8474		kstat_delete(un->un_stats);
8475		un->un_stats = NULL;
8476	}
8477
8478	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
8479	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
8480
8481	ddi_prop_remove_all(devi);
8482	sema_destroy(&un->un_semoclose);
8483	cv_destroy(&un->un_state_cv);
8484
8485getrbuf_failed:
8486
8487	sd_free_rqs(un);
8488
8489alloc_rqs_failed:
8490
8491	devp->sd_private = NULL;
8492	bzero(un, sizeof (struct sd_lun));	/* Clear any stale data! */
8493
8494get_softstate_failed:
8495	/*
8496	 * Note: the man pages are unclear as to whether or not doing a
8497	 * ddi_soft_state_free(sd_state, instance) is the right way to
8498	 * clean up after the ddi_soft_state_zalloc() if the subsequent
8499	 * ddi_get_soft_state() fails.  The implication seems to be
8500	 * that the get_soft_state cannot fail if the zalloc succeeds.
8501	 */
8502#ifndef XPV_HVM_DRIVER
8503	ddi_soft_state_free(sd_state, instance);
8504#endif /* !XPV_HVM_DRIVER */
8505
8506probe_failed:
8507	scsi_unprobe(devp);
8508
8509	return (DDI_FAILURE);
8510}
8511
8512
8513/*
8514 *    Function: sd_unit_detach
8515 *
8516 * Description: Performs DDI_DETACH processing for sddetach().
8517 *
8518 * Return Code: DDI_SUCCESS
8519 *		DDI_FAILURE
8520 *
8521 *     Context: Kernel thread context
8522 */
8523
8524static int
8525sd_unit_detach(dev_info_t *devi)
8526{
8527	struct scsi_device	*devp;
8528	struct sd_lun		*un;
8529	int			i;
8530	int			tgt;
8531	dev_t			dev;
8532	dev_info_t		*pdip = ddi_get_parent(devi);
8533#ifndef XPV_HVM_DRIVER
8534	int			instance = ddi_get_instance(devi);
8535#endif /* !XPV_HVM_DRIVER */
8536
8537	mutex_enter(&sd_detach_mutex);
8538
8539	/*
8540	 * Fail the detach for any of the following:
8541	 *  - Unable to get the sd_lun struct for the instance
8542	 *  - A layered driver has an outstanding open on the instance
8543	 *  - Another thread is already detaching this instance
8544	 *  - Another thread is currently performing an open
8545	 */
8546	devp = ddi_get_driver_private(devi);
8547	if ((devp == NULL) ||
8548	    ((un = (struct sd_lun *)devp->sd_private) == NULL) ||
8549	    (un->un_ncmds_in_driver != 0) || (un->un_layer_count != 0) ||
8550	    (un->un_detach_count != 0) || (un->un_opens_in_progress != 0)) {
8551		mutex_exit(&sd_detach_mutex);
8552		return (DDI_FAILURE);
8553	}
8554
8555	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: entry 0x%p\n", un);
8556
8557	/*
8558	 * Mark this instance as currently in a detach, to inhibit any
8559	 * opens from a layered driver.
8560	 */
8561	un->un_detach_count++;
8562	mutex_exit(&sd_detach_mutex);
8563
8564	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
8565	    SCSI_ADDR_PROP_TARGET, -1);
8566
8567	dev = sd_make_device(SD_DEVINFO(un));
8568
8569#ifndef lint
8570	_NOTE(COMPETING_THREADS_NOW);
8571#endif
8572
8573	mutex_enter(SD_MUTEX(un));
8574
8575	/*
8576	 * Fail the detach if there are any outstanding layered
8577	 * opens on this device.
8578	 */
8579	for (i = 0; i < NDKMAP; i++) {
8580		if (un->un_ocmap.lyropen[i] != 0) {
8581			goto err_notclosed;
8582		}
8583	}
8584
8585	/*
8586	 * Verify there are NO outstanding commands issued to this device.
8587	 * ie, un_ncmds_in_transport == 0.
8588	 * It's possible to have outstanding commands through the physio
8589	 * code path, even though everything's closed.
8590	 */
8591	if ((un->un_ncmds_in_transport != 0) || (un->un_retry_timeid != NULL) ||
8592	    (un->un_direct_priority_timeid != NULL) ||
8593	    (un->un_state == SD_STATE_RWAIT)) {
8594		mutex_exit(SD_MUTEX(un));
8595		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8596		    "sd_dr_detach: Detach failure due to outstanding cmds\n");
8597		goto err_stillbusy;
8598	}
8599
8600	/*
8601	 * If we have the device reserved, release the reservation.
8602	 */
8603	if ((un->un_resvd_status & SD_RESERVE) &&
8604	    !(un->un_resvd_status & SD_LOST_RESERVE)) {
8605		mutex_exit(SD_MUTEX(un));
8606		/*
8607		 * Note: sd_reserve_release sends a command to the device
8608		 * via the sd_ioctlcmd() path, and can sleep.
8609		 */
8610		if (sd_reserve_release(dev, SD_RELEASE) != 0) {
8611			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8612			    "sd_dr_detach: Cannot release reservation \n");
8613		}
8614	} else {
8615		mutex_exit(SD_MUTEX(un));
8616	}
8617
8618	/*
8619	 * Untimeout any reserve recover, throttle reset, restart unit
8620	 * and delayed broadcast timeout threads. Protect the timeout pointer
8621	 * from getting nulled by their callback functions.
8622	 */
8623	mutex_enter(SD_MUTEX(un));
8624	if (un->un_resvd_timeid != NULL) {
8625		timeout_id_t temp_id = un->un_resvd_timeid;
8626		un->un_resvd_timeid = NULL;
8627		mutex_exit(SD_MUTEX(un));
8628		(void) untimeout(temp_id);
8629		mutex_enter(SD_MUTEX(un));
8630	}
8631
8632	if (un->un_reset_throttle_timeid != NULL) {
8633		timeout_id_t temp_id = un->un_reset_throttle_timeid;
8634		un->un_reset_throttle_timeid = NULL;
8635		mutex_exit(SD_MUTEX(un));
8636		(void) untimeout(temp_id);
8637		mutex_enter(SD_MUTEX(un));
8638	}
8639
8640	if (un->un_startstop_timeid != NULL) {
8641		timeout_id_t temp_id = un->un_startstop_timeid;
8642		un->un_startstop_timeid = NULL;
8643		mutex_exit(SD_MUTEX(un));
8644		(void) untimeout(temp_id);
8645		mutex_enter(SD_MUTEX(un));
8646	}
8647
8648	if (un->un_rmw_msg_timeid != NULL) {
8649		timeout_id_t temp_id = un->un_rmw_msg_timeid;
8650		un->un_rmw_msg_timeid = NULL;
8651		mutex_exit(SD_MUTEX(un));
8652		(void) untimeout(temp_id);
8653		mutex_enter(SD_MUTEX(un));
8654	}
8655
8656	if (un->un_dcvb_timeid != NULL) {
8657		timeout_id_t temp_id = un->un_dcvb_timeid;
8658		un->un_dcvb_timeid = NULL;
8659		mutex_exit(SD_MUTEX(un));
8660		(void) untimeout(temp_id);
8661	} else {
8662		mutex_exit(SD_MUTEX(un));
8663	}
8664
8665	/* Remove any pending reservation reclaim requests for this device */
8666	sd_rmv_resv_reclaim_req(dev);
8667
8668	mutex_enter(SD_MUTEX(un));
8669
8670	/* Cancel any pending callbacks for SD_PATH_DIRECT_PRIORITY cmd. */
8671	if (un->un_direct_priority_timeid != NULL) {
8672		timeout_id_t temp_id = un->un_direct_priority_timeid;
8673		un->un_direct_priority_timeid = NULL;
8674		mutex_exit(SD_MUTEX(un));
8675		(void) untimeout(temp_id);
8676		mutex_enter(SD_MUTEX(un));
8677	}
8678
8679	/* Cancel any active multi-host disk watch thread requests */
8680	if (un->un_mhd_token != NULL) {
8681		mutex_exit(SD_MUTEX(un));
8682		 _NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_mhd_token));
8683		if (scsi_watch_request_terminate(un->un_mhd_token,
8684		    SCSI_WATCH_TERMINATE_NOWAIT)) {
8685			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8686			    "sd_dr_detach: Cannot cancel mhd watch request\n");
8687			/*
8688			 * Note: We are returning here after having removed
8689			 * some driver timeouts above. This is consistent with
8690			 * the legacy implementation but perhaps the watch
8691			 * terminate call should be made with the wait flag set.
8692			 */
8693			goto err_stillbusy;
8694		}
8695		mutex_enter(SD_MUTEX(un));
8696		un->un_mhd_token = NULL;
8697	}
8698
8699	if (un->un_swr_token != NULL) {
8700		mutex_exit(SD_MUTEX(un));
8701		_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_swr_token));
8702		if (scsi_watch_request_terminate(un->un_swr_token,
8703		    SCSI_WATCH_TERMINATE_NOWAIT)) {
8704			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8705			    "sd_dr_detach: Cannot cancel swr watch request\n");
8706			/*
8707			 * Note: We are returning here after having removed
8708			 * some driver timeouts above. This is consistent with
8709			 * the legacy implementation but perhaps the watch
8710			 * terminate call should be made with the wait flag set.
8711			 */
8712			goto err_stillbusy;
8713		}
8714		mutex_enter(SD_MUTEX(un));
8715		un->un_swr_token = NULL;
8716	}
8717
8718	mutex_exit(SD_MUTEX(un));
8719
8720	/*
8721	 * Clear any scsi_reset_notifies. We clear the reset notifies
8722	 * if we have not registered one.
8723	 * Note: The sd_mhd_reset_notify_cb() fn tries to acquire SD_MUTEX!
8724	 */
8725	(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
8726	    sd_mhd_reset_notify_cb, (caddr_t)un);
8727
8728	/*
8729	 * protect the timeout pointers from getting nulled by
8730	 * their callback functions during the cancellation process.
8731	 * In such a scenario untimeout can be invoked with a null value.
8732	 */
8733	_NOTE(NO_COMPETING_THREADS_NOW);
8734
8735	mutex_enter(&un->un_pm_mutex);
8736	if (un->un_pm_idle_timeid != NULL) {
8737		timeout_id_t temp_id = un->un_pm_idle_timeid;
8738		un->un_pm_idle_timeid = NULL;
8739		mutex_exit(&un->un_pm_mutex);
8740
8741		/*
8742		 * Timeout is active; cancel it.
8743		 * Note that it'll never be active on a device
8744		 * that does not support PM therefore we don't
8745		 * have to check before calling pm_idle_component.
8746		 */
8747		(void) untimeout(temp_id);
8748		(void) pm_idle_component(SD_DEVINFO(un), 0);
8749		mutex_enter(&un->un_pm_mutex);
8750	}
8751
8752	/*
8753	 * Check whether there is already a timeout scheduled for power
8754	 * management. If yes then don't lower the power here, that's.
8755	 * the timeout handler's job.
8756	 */
8757	if (un->un_pm_timeid != NULL) {
8758		timeout_id_t temp_id = un->un_pm_timeid;
8759		un->un_pm_timeid = NULL;
8760		mutex_exit(&un->un_pm_mutex);
8761		/*
8762		 * Timeout is active; cancel it.
8763		 * Note that it'll never be active on a device
8764		 * that does not support PM therefore we don't
8765		 * have to check before calling pm_idle_component.
8766		 */
8767		(void) untimeout(temp_id);
8768		(void) pm_idle_component(SD_DEVINFO(un), 0);
8769
8770	} else {
8771		mutex_exit(&un->un_pm_mutex);
8772		if ((un->un_f_pm_is_enabled == TRUE) &&
8773		    (pm_lower_power(SD_DEVINFO(un), 0, SD_PM_STATE_STOPPED(un))
8774		    != DDI_SUCCESS)) {
8775			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8776		    "sd_dr_detach: Lower power request failed, ignoring.\n");
8777			/*
8778			 * Fix for bug: 4297749, item # 13
8779			 * The above test now includes a check to see if PM is
8780			 * supported by this device before call
8781			 * pm_lower_power().
8782			 * Note, the following is not dead code. The call to
8783			 * pm_lower_power above will generate a call back into
8784			 * our sdpower routine which might result in a timeout
8785			 * handler getting activated. Therefore the following
8786			 * code is valid and necessary.
8787			 */
8788			mutex_enter(&un->un_pm_mutex);
8789			if (un->un_pm_timeid != NULL) {
8790				timeout_id_t temp_id = un->un_pm_timeid;
8791				un->un_pm_timeid = NULL;
8792				mutex_exit(&un->un_pm_mutex);
8793				(void) untimeout(temp_id);
8794				(void) pm_idle_component(SD_DEVINFO(un), 0);
8795			} else {
8796				mutex_exit(&un->un_pm_mutex);
8797			}
8798		}
8799	}
8800
8801	/*
8802	 * Cleanup from the scsi_ifsetcap() calls (437868)
8803	 * Relocated here from above to be after the call to
8804	 * pm_lower_power, which was getting errors.
8805	 */
8806	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
8807	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
8808
8809	/*
8810	 * Currently, tagged queuing is supported per target based by HBA.
8811	 * Setting this per lun instance actually sets the capability of this
8812	 * target in HBA, which affects those luns already attached on the
8813	 * same target. So during detach, we can only disable this capability
8814	 * only when this is the only lun left on this target. By doing
8815	 * this, we assume a target has the same tagged queuing capability
8816	 * for every lun. The condition can be removed when HBA is changed to
8817	 * support per lun based tagged queuing capability.
8818	 */
8819	if (sd_scsi_get_target_lun_count(pdip, tgt) <= 1) {
8820		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
8821	}
8822
8823	if (un->un_f_is_fibre == FALSE) {
8824		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
8825	}
8826
8827	/*
8828	 * Remove any event callbacks, fibre only
8829	 */
8830	if (un->un_f_is_fibre == TRUE) {
8831		if ((un->un_insert_event != NULL) &&
8832		    (ddi_remove_event_handler(un->un_insert_cb_id) !=
8833		    DDI_SUCCESS)) {
8834			/*
8835			 * Note: We are returning here after having done
8836			 * substantial cleanup above. This is consistent
8837			 * with the legacy implementation but this may not
8838			 * be the right thing to do.
8839			 */
8840			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8841			    "sd_dr_detach: Cannot cancel insert event\n");
8842			goto err_remove_event;
8843		}
8844		un->un_insert_event = NULL;
8845
8846		if ((un->un_remove_event != NULL) &&
8847		    (ddi_remove_event_handler(un->un_remove_cb_id) !=
8848		    DDI_SUCCESS)) {
8849			/*
8850			 * Note: We are returning here after having done
8851			 * substantial cleanup above. This is consistent
8852			 * with the legacy implementation but this may not
8853			 * be the right thing to do.
8854			 */
8855			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8856			    "sd_dr_detach: Cannot cancel remove event\n");
8857			goto err_remove_event;
8858		}
8859		un->un_remove_event = NULL;
8860	}
8861
8862	/* Do not free the softstate if the callback routine is active */
8863	sd_sync_with_callback(un);
8864
8865	cmlb_detach(un->un_cmlbhandle, (void *)SD_PATH_DIRECT);
8866	cmlb_free_handle(&un->un_cmlbhandle);
8867
8868	/*
8869	 * Hold the detach mutex here, to make sure that no other threads ever
8870	 * can access a (partially) freed soft state structure.
8871	 */
8872	mutex_enter(&sd_detach_mutex);
8873
8874	/*
8875	 * Clean up the soft state struct.
8876	 * Cleanup is done in reverse order of allocs/inits.
8877	 * At this point there should be no competing threads anymore.
8878	 */
8879
8880	scsi_fm_fini(devp);
8881
8882	/*
8883	 * Deallocate memory for SCSI FMA.
8884	 */
8885	kmem_free(un->un_fm_private, sizeof (struct sd_fm_internal));
8886
8887	/*
8888	 * Unregister and free device id if it was not registered
8889	 * by the transport.
8890	 */
8891	if (un->un_f_devid_transport_defined == FALSE)
8892		ddi_devid_unregister(devi);
8893
8894	/*
8895	 * free the devid structure if allocated before (by ddi_devid_init()
8896	 * or ddi_devid_get()).
8897	 */
8898	if (un->un_devid) {
8899		ddi_devid_free(un->un_devid);
8900		un->un_devid = NULL;
8901	}
8902
8903	/*
8904	 * Destroy wmap cache if it exists.
8905	 */
8906	if (un->un_wm_cache != NULL) {
8907		kmem_cache_destroy(un->un_wm_cache);
8908		un->un_wm_cache = NULL;
8909	}
8910
8911	/*
8912	 * kstat cleanup is done in detach for all device types (4363169).
8913	 * We do not want to fail detach if the device kstats are not deleted
8914	 * since there is a confusion about the devo_refcnt for the device.
8915	 * We just delete the kstats and let detach complete successfully.
8916	 */
8917	if (un->un_stats != NULL) {
8918		kstat_delete(un->un_stats);
8919		un->un_stats = NULL;
8920	}
8921	if (un->un_errstats != NULL) {
8922		kstat_delete(un->un_errstats);
8923		un->un_errstats = NULL;
8924	}
8925
8926	/* Remove partition stats */
8927	if (un->un_f_pkstats_enabled) {
8928		for (i = 0; i < NSDMAP; i++) {
8929			if (un->un_pstats[i] != NULL) {
8930				kstat_delete(un->un_pstats[i]);
8931				un->un_pstats[i] = NULL;
8932			}
8933		}
8934	}
8935
8936	/* Remove xbuf registration */
8937	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
8938	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
8939
8940	/* Remove driver properties */
8941	ddi_prop_remove_all(devi);
8942
8943	mutex_destroy(&un->un_pm_mutex);
8944	cv_destroy(&un->un_pm_busy_cv);
8945
8946	cv_destroy(&un->un_wcc_cv);
8947
8948	/* Open/close semaphore */
8949	sema_destroy(&un->un_semoclose);
8950
8951	/* Removable media condvar. */
8952	cv_destroy(&un->un_state_cv);
8953
8954	/* Suspend/resume condvar. */
8955	cv_destroy(&un->un_suspend_cv);
8956	cv_destroy(&un->un_disk_busy_cv);
8957
8958	sd_free_rqs(un);
8959
8960	/* Free up soft state */
8961	devp->sd_private = NULL;
8962
8963	bzero(un, sizeof (struct sd_lun));
8964#ifndef XPV_HVM_DRIVER
8965	ddi_soft_state_free(sd_state, instance);
8966#endif /* !XPV_HVM_DRIVER */
8967
8968	mutex_exit(&sd_detach_mutex);
8969
8970	/* This frees up the INQUIRY data associated with the device. */
8971	scsi_unprobe(devp);
8972
8973	/*
8974	 * After successfully detaching an instance, we update the information
8975	 * of how many luns have been attached in the relative target and
8976	 * controller for parallel SCSI. This information is used when sd tries
8977	 * to set the tagged queuing capability in HBA.
8978	 * Since un has been released, we can't use SD_IS_PARALLEL_SCSI(un) to
8979	 * check if the device is parallel SCSI. However, we don't need to
8980	 * check here because we've already checked during attach. No device
8981	 * that is not parallel SCSI is in the chain.
8982	 */
8983	if ((tgt >= 0) && (tgt < NTARGETS_WIDE)) {
8984		sd_scsi_update_lun_on_target(pdip, tgt, SD_SCSI_LUN_DETACH);
8985	}
8986
8987	return (DDI_SUCCESS);
8988
8989err_notclosed:
8990	mutex_exit(SD_MUTEX(un));
8991
8992err_stillbusy:
8993	_NOTE(NO_COMPETING_THREADS_NOW);
8994
8995err_remove_event:
8996	mutex_enter(&sd_detach_mutex);
8997	un->un_detach_count--;
8998	mutex_exit(&sd_detach_mutex);
8999
9000	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: exit failure\n");
9001	return (DDI_FAILURE);
9002}
9003
9004
9005/*
9006 *    Function: sd_create_errstats
9007 *
9008 * Description: This routine instantiates the device error stats.
9009 *
9010 *		Note: During attach the stats are instantiated first so they are
9011 *		available for attach-time routines that utilize the driver
9012 *		iopath to send commands to the device. The stats are initialized
9013 *		separately so data obtained during some attach-time routines is
9014 *		available. (4362483)
9015 *
9016 *   Arguments: un - driver soft state (unit) structure
9017 *		instance - driver instance
9018 *
9019 *     Context: Kernel thread context
9020 */
9021
9022static void
9023sd_create_errstats(struct sd_lun *un, int instance)
9024{
9025	struct	sd_errstats	*stp;
9026	char	kstatmodule_err[KSTAT_STRLEN];
9027	char	kstatname[KSTAT_STRLEN];
9028	int	ndata = (sizeof (struct sd_errstats) / sizeof (kstat_named_t));
9029
9030	ASSERT(un != NULL);
9031
9032	if (un->un_errstats != NULL) {
9033		return;
9034	}
9035
9036	(void) snprintf(kstatmodule_err, sizeof (kstatmodule_err),
9037	    "%serr", sd_label);
9038	(void) snprintf(kstatname, sizeof (kstatname),
9039	    "%s%d,err", sd_label, instance);
9040
9041	un->un_errstats = kstat_create(kstatmodule_err, instance, kstatname,
9042	    "device_error", KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT);
9043
9044	if (un->un_errstats == NULL) {
9045		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9046		    "sd_create_errstats: Failed kstat_create\n");
9047		return;
9048	}
9049
9050	stp = (struct sd_errstats *)un->un_errstats->ks_data;
9051	kstat_named_init(&stp->sd_softerrs,	"Soft Errors",
9052	    KSTAT_DATA_UINT32);
9053	kstat_named_init(&stp->sd_harderrs,	"Hard Errors",
9054	    KSTAT_DATA_UINT32);
9055	kstat_named_init(&stp->sd_transerrs,	"Transport Errors",
9056	    KSTAT_DATA_UINT32);
9057	kstat_named_init(&stp->sd_vid,		"Vendor",
9058	    KSTAT_DATA_CHAR);
9059	kstat_named_init(&stp->sd_pid,		"Product",
9060	    KSTAT_DATA_CHAR);
9061	kstat_named_init(&stp->sd_revision,	"Revision",
9062	    KSTAT_DATA_CHAR);
9063	kstat_named_init(&stp->sd_serial,	"Serial No",
9064	    KSTAT_DATA_CHAR);
9065	kstat_named_init(&stp->sd_capacity,	"Size",
9066	    KSTAT_DATA_ULONGLONG);
9067	kstat_named_init(&stp->sd_rq_media_err,	"Media Error",
9068	    KSTAT_DATA_UINT32);
9069	kstat_named_init(&stp->sd_rq_ntrdy_err,	"Device Not Ready",
9070	    KSTAT_DATA_UINT32);
9071	kstat_named_init(&stp->sd_rq_nodev_err,	"No Device",
9072	    KSTAT_DATA_UINT32);
9073	kstat_named_init(&stp->sd_rq_recov_err,	"Recoverable",
9074	    KSTAT_DATA_UINT32);
9075	kstat_named_init(&stp->sd_rq_illrq_err,	"Illegal Request",
9076	    KSTAT_DATA_UINT32);
9077	kstat_named_init(&stp->sd_rq_pfa_err,	"Predictive Failure Analysis",
9078	    KSTAT_DATA_UINT32);
9079
9080	un->un_errstats->ks_private = un;
9081	un->un_errstats->ks_update  = nulldev;
9082
9083	kstat_install(un->un_errstats);
9084}
9085
9086
9087/*
9088 *    Function: sd_set_errstats
9089 *
9090 * Description: This routine sets the value of the vendor id, product id,
9091 *		revision, serial number, and capacity device error stats.
9092 *
9093 *		Note: During attach the stats are instantiated first so they are
9094 *		available for attach-time routines that utilize the driver
9095 *		iopath to send commands to the device. The stats are initialized
9096 *		separately so data obtained during some attach-time routines is
9097 *		available. (4362483)
9098 *
9099 *   Arguments: un - driver soft state (unit) structure
9100 *
9101 *     Context: Kernel thread context
9102 */
9103
9104static void
9105sd_set_errstats(struct sd_lun *un)
9106{
9107	struct	sd_errstats	*stp;
9108
9109	ASSERT(un != NULL);
9110	ASSERT(un->un_errstats != NULL);
9111	stp = (struct sd_errstats *)un->un_errstats->ks_data;
9112	ASSERT(stp != NULL);
9113	(void) strncpy(stp->sd_vid.value.c, un->un_sd->sd_inq->inq_vid, 8);
9114	(void) strncpy(stp->sd_pid.value.c, un->un_sd->sd_inq->inq_pid, 16);
9115	(void) strncpy(stp->sd_revision.value.c,
9116	    un->un_sd->sd_inq->inq_revision, 4);
9117
9118	/*
9119	 * All the errstats are persistent across detach/attach,
9120	 * so reset all the errstats here in case of the hot
9121	 * replacement of disk drives, except for not changed
9122	 * Sun qualified drives.
9123	 */
9124	if ((bcmp(&SD_INQUIRY(un)->inq_pid[9], "SUN", 3) != 0) ||
9125	    (bcmp(&SD_INQUIRY(un)->inq_serial, stp->sd_serial.value.c,
9126	    sizeof (SD_INQUIRY(un)->inq_serial)) != 0)) {
9127		stp->sd_softerrs.value.ui32 = 0;
9128		stp->sd_harderrs.value.ui32 = 0;
9129		stp->sd_transerrs.value.ui32 = 0;
9130		stp->sd_rq_media_err.value.ui32 = 0;
9131		stp->sd_rq_ntrdy_err.value.ui32 = 0;
9132		stp->sd_rq_nodev_err.value.ui32 = 0;
9133		stp->sd_rq_recov_err.value.ui32 = 0;
9134		stp->sd_rq_illrq_err.value.ui32 = 0;
9135		stp->sd_rq_pfa_err.value.ui32 = 0;
9136	}
9137
9138	/*
9139	 * Set the "Serial No" kstat for Sun qualified drives (indicated by
9140	 * "SUN" in bytes 25-27 of the inquiry data (bytes 9-11 of the pid)
9141	 * (4376302))
9142	 */
9143	if (bcmp(&SD_INQUIRY(un)->inq_pid[9], "SUN", 3) == 0) {
9144		bcopy(&SD_INQUIRY(un)->inq_serial, stp->sd_serial.value.c,
9145		    sizeof (SD_INQUIRY(un)->inq_serial));
9146	}
9147
9148	if (un->un_f_blockcount_is_valid != TRUE) {
9149		/*
9150		 * Set capacity error stat to 0 for no media. This ensures
9151		 * a valid capacity is displayed in response to 'iostat -E'
9152		 * when no media is present in the device.
9153		 */
9154		stp->sd_capacity.value.ui64 = 0;
9155	} else {
9156		/*
9157		 * Multiply un_blockcount by un->un_sys_blocksize to get
9158		 * capacity.
9159		 *
9160		 * Note: for non-512 blocksize devices "un_blockcount" has been
9161		 * "scaled" in sd_send_scsi_READ_CAPACITY by multiplying by
9162		 * (un_tgt_blocksize / un->un_sys_blocksize).
9163		 */
9164		stp->sd_capacity.value.ui64 = (uint64_t)
9165		    ((uint64_t)un->un_blockcount * un->un_sys_blocksize);
9166	}
9167}
9168
9169
9170/*
9171 *    Function: sd_set_pstats
9172 *
9173 * Description: This routine instantiates and initializes the partition
9174 *              stats for each partition with more than zero blocks.
9175 *		(4363169)
9176 *
9177 *   Arguments: un - driver soft state (unit) structure
9178 *
9179 *     Context: Kernel thread context
9180 */
9181
9182static void
9183sd_set_pstats(struct sd_lun *un)
9184{
9185	char	kstatname[KSTAT_STRLEN];
9186	int	instance;
9187	int	i;
9188	diskaddr_t	nblks = 0;
9189	char	*partname = NULL;
9190
9191	ASSERT(un != NULL);
9192
9193	instance = ddi_get_instance(SD_DEVINFO(un));
9194
9195	/* Note:x86: is this a VTOC8/VTOC16 difference? */
9196	for (i = 0; i < NSDMAP; i++) {
9197
9198		if (cmlb_partinfo(un->un_cmlbhandle, i,
9199		    &nblks, NULL, &partname, NULL, (void *)SD_PATH_DIRECT) != 0)
9200			continue;
9201		mutex_enter(SD_MUTEX(un));
9202
9203		if ((un->un_pstats[i] == NULL) &&
9204		    (nblks != 0)) {
9205
9206			(void) snprintf(kstatname, sizeof (kstatname),
9207			    "%s%d,%s", sd_label, instance,
9208			    partname);
9209
9210			un->un_pstats[i] = kstat_create(sd_label,
9211			    instance, kstatname, "partition", KSTAT_TYPE_IO,
9212			    1, KSTAT_FLAG_PERSISTENT);
9213			if (un->un_pstats[i] != NULL) {
9214				un->un_pstats[i]->ks_lock = SD_MUTEX(un);
9215				kstat_install(un->un_pstats[i]);
9216			}
9217		}
9218		mutex_exit(SD_MUTEX(un));
9219	}
9220}
9221
9222
9223#if (defined(__fibre))
9224/*
9225 *    Function: sd_init_event_callbacks
9226 *
9227 * Description: This routine initializes the insertion and removal event
9228 *		callbacks. (fibre only)
9229 *
9230 *   Arguments: un - driver soft state (unit) structure
9231 *
9232 *     Context: Kernel thread context
9233 */
9234
9235static void
9236sd_init_event_callbacks(struct sd_lun *un)
9237{
9238	ASSERT(un != NULL);
9239
9240	if ((un->un_insert_event == NULL) &&
9241	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_INSERT_EVENT,
9242	    &un->un_insert_event) == DDI_SUCCESS)) {
9243		/*
9244		 * Add the callback for an insertion event
9245		 */
9246		(void) ddi_add_event_handler(SD_DEVINFO(un),
9247		    un->un_insert_event, sd_event_callback, (void *)un,
9248		    &(un->un_insert_cb_id));
9249	}
9250
9251	if ((un->un_remove_event == NULL) &&
9252	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_REMOVE_EVENT,
9253	    &un->un_remove_event) == DDI_SUCCESS)) {
9254		/*
9255		 * Add the callback for a removal event
9256		 */
9257		(void) ddi_add_event_handler(SD_DEVINFO(un),
9258		    un->un_remove_event, sd_event_callback, (void *)un,
9259		    &(un->un_remove_cb_id));
9260	}
9261}
9262
9263
9264/*
9265 *    Function: sd_event_callback
9266 *
9267 * Description: This routine handles insert/remove events (photon). The
9268 *		state is changed to OFFLINE which can be used to supress
9269 *		error msgs. (fibre only)
9270 *
9271 *   Arguments: un - driver soft state (unit) structure
9272 *
9273 *     Context: Callout thread context
9274 */
9275/* ARGSUSED */
9276static void
9277sd_event_callback(dev_info_t *dip, ddi_eventcookie_t event, void *arg,
9278    void *bus_impldata)
9279{
9280	struct sd_lun *un = (struct sd_lun *)arg;
9281
9282	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_insert_event));
9283	if (event == un->un_insert_event) {
9284		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: insert event");
9285		mutex_enter(SD_MUTEX(un));
9286		if (un->un_state == SD_STATE_OFFLINE) {
9287			if (un->un_last_state != SD_STATE_SUSPENDED) {
9288				un->un_state = un->un_last_state;
9289			} else {
9290				/*
9291				 * We have gone through SUSPEND/RESUME while
9292				 * we were offline. Restore the last state
9293				 */
9294				un->un_state = un->un_save_state;
9295			}
9296		}
9297		mutex_exit(SD_MUTEX(un));
9298
9299	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_remove_event));
9300	} else if (event == un->un_remove_event) {
9301		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: remove event");
9302		mutex_enter(SD_MUTEX(un));
9303		/*
9304		 * We need to handle an event callback that occurs during
9305		 * the suspend operation, since we don't prevent it.
9306		 */
9307		if (un->un_state != SD_STATE_OFFLINE) {
9308			if (un->un_state != SD_STATE_SUSPENDED) {
9309				New_state(un, SD_STATE_OFFLINE);
9310			} else {
9311				un->un_last_state = SD_STATE_OFFLINE;
9312			}
9313		}
9314		mutex_exit(SD_MUTEX(un));
9315	} else {
9316		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
9317		    "!Unknown event\n");
9318	}
9319
9320}
9321#endif
9322
9323/*
9324 *    Function: sd_cache_control()
9325 *
9326 * Description: This routine is the driver entry point for setting
9327 *		read and write caching by modifying the WCE (write cache
9328 *		enable) and RCD (read cache disable) bits of mode
9329 *		page 8 (MODEPAGE_CACHING).
9330 *
9331 *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
9332 *                      structure for this target.
9333 *		rcd_flag - flag for controlling the read cache
9334 *		wce_flag - flag for controlling the write cache
9335 *
9336 * Return Code: EIO
9337 *		code returned by sd_send_scsi_MODE_SENSE and
9338 *		sd_send_scsi_MODE_SELECT
9339 *
9340 *     Context: Kernel Thread
9341 */
9342
9343static int
9344sd_cache_control(sd_ssc_t *ssc, int rcd_flag, int wce_flag)
9345{
9346	struct mode_caching	*mode_caching_page;
9347	uchar_t			*header;
9348	size_t			buflen;
9349	int			hdrlen;
9350	int			bd_len;
9351	int			rval = 0;
9352	struct mode_header_grp2	*mhp;
9353	struct sd_lun		*un;
9354	int			status;
9355
9356	ASSERT(ssc != NULL);
9357	un = ssc->ssc_un;
9358	ASSERT(un != NULL);
9359
9360	/*
9361	 * Do a test unit ready, otherwise a mode sense may not work if this
9362	 * is the first command sent to the device after boot.
9363	 */
9364	status = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
9365	if (status != 0)
9366		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9367
9368	if (un->un_f_cfg_is_atapi == TRUE) {
9369		hdrlen = MODE_HEADER_LENGTH_GRP2;
9370	} else {
9371		hdrlen = MODE_HEADER_LENGTH;
9372	}
9373
9374	/*
9375	 * Allocate memory for the retrieved mode page and its headers.  Set
9376	 * a pointer to the page itself.  Use mode_cache_scsi3 to insure
9377	 * we get all of the mode sense data otherwise, the mode select
9378	 * will fail.  mode_cache_scsi3 is a superset of mode_caching.
9379	 */
9380	buflen = hdrlen + MODE_BLK_DESC_LENGTH +
9381	    sizeof (struct mode_cache_scsi3);
9382
9383	header = kmem_zalloc(buflen, KM_SLEEP);
9384
9385	/* Get the information from the device. */
9386	if (un->un_f_cfg_is_atapi == TRUE) {
9387		rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, header, buflen,
9388		    MODEPAGE_CACHING, SD_PATH_DIRECT);
9389	} else {
9390		rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, header, buflen,
9391		    MODEPAGE_CACHING, SD_PATH_DIRECT);
9392	}
9393
9394	if (rval != 0) {
9395		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
9396		    "sd_cache_control: Mode Sense Failed\n");
9397		goto mode_sense_failed;
9398	}
9399
9400	/*
9401	 * Determine size of Block Descriptors in order to locate
9402	 * the mode page data. ATAPI devices return 0, SCSI devices
9403	 * should return MODE_BLK_DESC_LENGTH.
9404	 */
9405	if (un->un_f_cfg_is_atapi == TRUE) {
9406		mhp	= (struct mode_header_grp2 *)header;
9407		bd_len  = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
9408	} else {
9409		bd_len  = ((struct mode_header *)header)->bdesc_length;
9410	}
9411
9412	if (bd_len > MODE_BLK_DESC_LENGTH) {
9413		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, 0,
9414		    "sd_cache_control: Mode Sense returned invalid block "
9415		    "descriptor length\n");
9416		rval = EIO;
9417		goto mode_sense_failed;
9418	}
9419
9420	mode_caching_page = (struct mode_caching *)(header + hdrlen + bd_len);
9421	if (mode_caching_page->mode_page.code != MODEPAGE_CACHING) {
9422		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, SD_LOG_COMMON,
9423		    "sd_cache_control: Mode Sense caching page code mismatch "
9424		    "%d\n", mode_caching_page->mode_page.code);
9425		rval = EIO;
9426		goto mode_sense_failed;
9427	}
9428
9429	/* Check the relevant bits on successful mode sense. */
9430	if ((mode_caching_page->rcd && rcd_flag == SD_CACHE_ENABLE) ||
9431	    (!mode_caching_page->rcd && rcd_flag == SD_CACHE_DISABLE) ||
9432	    (mode_caching_page->wce && wce_flag == SD_CACHE_DISABLE) ||
9433	    (!mode_caching_page->wce && wce_flag == SD_CACHE_ENABLE)) {
9434
9435		size_t sbuflen;
9436		uchar_t save_pg;
9437
9438		/*
9439		 * Construct select buffer length based on the
9440		 * length of the sense data returned.
9441		 */
9442		sbuflen =  hdrlen + bd_len +
9443		    sizeof (struct mode_page) +
9444		    (int)mode_caching_page->mode_page.length;
9445
9446		/*
9447		 * Set the caching bits as requested.
9448		 */
9449		if (rcd_flag == SD_CACHE_ENABLE)
9450			mode_caching_page->rcd = 0;
9451		else if (rcd_flag == SD_CACHE_DISABLE)
9452			mode_caching_page->rcd = 1;
9453
9454		if (wce_flag == SD_CACHE_ENABLE)
9455			mode_caching_page->wce = 1;
9456		else if (wce_flag == SD_CACHE_DISABLE)
9457			mode_caching_page->wce = 0;
9458
9459		/*
9460		 * Save the page if the mode sense says the
9461		 * drive supports it.
9462		 */
9463		save_pg = mode_caching_page->mode_page.ps ?
9464		    SD_SAVE_PAGE : SD_DONTSAVE_PAGE;
9465
9466		/* Clear reserved bits before mode select. */
9467		mode_caching_page->mode_page.ps = 0;
9468
9469		/*
9470		 * Clear out mode header for mode select.
9471		 * The rest of the retrieved page will be reused.
9472		 */
9473		bzero(header, hdrlen);
9474
9475		if (un->un_f_cfg_is_atapi == TRUE) {
9476			mhp = (struct mode_header_grp2 *)header;
9477			mhp->bdesc_length_hi = bd_len >> 8;
9478			mhp->bdesc_length_lo = (uchar_t)bd_len & 0xff;
9479		} else {
9480			((struct mode_header *)header)->bdesc_length = bd_len;
9481		}
9482
9483		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9484
9485		/* Issue mode select to change the cache settings */
9486		if (un->un_f_cfg_is_atapi == TRUE) {
9487			rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP1, header,
9488			    sbuflen, save_pg, SD_PATH_DIRECT);
9489		} else {
9490			rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0, header,
9491			    sbuflen, save_pg, SD_PATH_DIRECT);
9492		}
9493
9494	}
9495
9496
9497mode_sense_failed:
9498
9499	kmem_free(header, buflen);
9500
9501	if (rval != 0) {
9502		if (rval == EIO)
9503			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
9504		else
9505			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9506	}
9507	return (rval);
9508}
9509
9510
9511/*
9512 *    Function: sd_get_write_cache_enabled()
9513 *
9514 * Description: This routine is the driver entry point for determining if
9515 *		write caching is enabled.  It examines the WCE (write cache
9516 *		enable) bits of mode page 8 (MODEPAGE_CACHING).
9517 *
9518 *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
9519 *                      structure for this target.
9520 *		is_enabled - pointer to int where write cache enabled state
9521 *		is returned (non-zero -> write cache enabled)
9522 *
9523 *
9524 * Return Code: EIO
9525 *		code returned by sd_send_scsi_MODE_SENSE
9526 *
9527 *     Context: Kernel Thread
9528 *
9529 * NOTE: If ioctl is added to disable write cache, this sequence should
9530 * be followed so that no locking is required for accesses to
9531 * un->un_f_write_cache_enabled:
9532 * 	do mode select to clear wce
9533 * 	do synchronize cache to flush cache
9534 * 	set un->un_f_write_cache_enabled = FALSE
9535 *
9536 * Conversely, an ioctl to enable the write cache should be done
9537 * in this order:
9538 * 	set un->un_f_write_cache_enabled = TRUE
9539 * 	do mode select to set wce
9540 */
9541
9542static int
9543sd_get_write_cache_enabled(sd_ssc_t *ssc, int *is_enabled)
9544{
9545	struct mode_caching	*mode_caching_page;
9546	uchar_t			*header;
9547	size_t			buflen;
9548	int			hdrlen;
9549	int			bd_len;
9550	int			rval = 0;
9551	struct sd_lun		*un;
9552	int			status;
9553
9554	ASSERT(ssc != NULL);
9555	un = ssc->ssc_un;
9556	ASSERT(un != NULL);
9557	ASSERT(is_enabled != NULL);
9558
9559	/* in case of error, flag as enabled */
9560	*is_enabled = TRUE;
9561
9562	/*
9563	 * Do a test unit ready, otherwise a mode sense may not work if this
9564	 * is the first command sent to the device after boot.
9565	 */
9566	status = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
9567
9568	if (status != 0)
9569		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9570
9571	if (un->un_f_cfg_is_atapi == TRUE) {
9572		hdrlen = MODE_HEADER_LENGTH_GRP2;
9573	} else {
9574		hdrlen = MODE_HEADER_LENGTH;
9575	}
9576
9577	/*
9578	 * Allocate memory for the retrieved mode page and its headers.  Set
9579	 * a pointer to the page itself.
9580	 */
9581	buflen = hdrlen + MODE_BLK_DESC_LENGTH + sizeof (struct mode_caching);
9582	header = kmem_zalloc(buflen, KM_SLEEP);
9583
9584	/* Get the information from the device. */
9585	if (un->un_f_cfg_is_atapi == TRUE) {
9586		rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, header, buflen,
9587		    MODEPAGE_CACHING, SD_PATH_DIRECT);
9588	} else {
9589		rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, header, buflen,
9590		    MODEPAGE_CACHING, SD_PATH_DIRECT);
9591	}
9592
9593	if (rval != 0) {
9594		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
9595		    "sd_get_write_cache_enabled: Mode Sense Failed\n");
9596		goto mode_sense_failed;
9597	}
9598
9599	/*
9600	 * Determine size of Block Descriptors in order to locate
9601	 * the mode page data. ATAPI devices return 0, SCSI devices
9602	 * should return MODE_BLK_DESC_LENGTH.
9603	 */
9604	if (un->un_f_cfg_is_atapi == TRUE) {
9605		struct mode_header_grp2	*mhp;
9606		mhp	= (struct mode_header_grp2 *)header;
9607		bd_len  = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
9608	} else {
9609		bd_len  = ((struct mode_header *)header)->bdesc_length;
9610	}
9611
9612	if (bd_len > MODE_BLK_DESC_LENGTH) {
9613		/* FMA should make upset complain here */
9614		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, 0,
9615		    "sd_get_write_cache_enabled: Mode Sense returned invalid "
9616		    "block descriptor length\n");
9617		rval = EIO;
9618		goto mode_sense_failed;
9619	}
9620
9621	mode_caching_page = (struct mode_caching *)(header + hdrlen + bd_len);
9622	if (mode_caching_page->mode_page.code != MODEPAGE_CACHING) {
9623		/* FMA could make upset complain here */
9624		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, SD_LOG_COMMON,
9625		    "sd_get_write_cache_enabled: Mode Sense caching page "
9626		    "code mismatch %d\n", mode_caching_page->mode_page.code);
9627		rval = EIO;
9628		goto mode_sense_failed;
9629	}
9630	*is_enabled = mode_caching_page->wce;
9631
9632mode_sense_failed:
9633	if (rval == 0) {
9634		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
9635	} else if (rval == EIO) {
9636		/*
9637		 * Some disks do not support mode sense(6), we
9638		 * should ignore this kind of error(sense key is
9639		 * 0x5 - illegal request).
9640		 */
9641		uint8_t *sensep;
9642		int senlen;
9643
9644		sensep = (uint8_t *)ssc->ssc_uscsi_cmd->uscsi_rqbuf;
9645		senlen = (int)(ssc->ssc_uscsi_cmd->uscsi_rqlen -
9646		    ssc->ssc_uscsi_cmd->uscsi_rqresid);
9647
9648		if (senlen > 0 &&
9649		    scsi_sense_key(sensep) == KEY_ILLEGAL_REQUEST) {
9650			sd_ssc_assessment(ssc, SD_FMT_IGNORE_COMPROMISE);
9651		} else {
9652			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
9653		}
9654	} else {
9655		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9656	}
9657	kmem_free(header, buflen);
9658	return (rval);
9659}
9660
9661/*
9662 *    Function: sd_get_nv_sup()
9663 *
9664 * Description: This routine is the driver entry point for
9665 * determining whether non-volatile cache is supported. This
9666 * determination process works as follows:
9667 *
9668 * 1. sd first queries sd.conf on whether
9669 * suppress_cache_flush bit is set for this device.
9670 *
9671 * 2. if not there, then queries the internal disk table.
9672 *
9673 * 3. if either sd.conf or internal disk table specifies
9674 * cache flush be suppressed, we don't bother checking
9675 * NV_SUP bit.
9676 *
9677 * If SUPPRESS_CACHE_FLUSH bit is not set to 1, sd queries
9678 * the optional INQUIRY VPD page 0x86. If the device
9679 * supports VPD page 0x86, sd examines the NV_SUP
9680 * (non-volatile cache support) bit in the INQUIRY VPD page
9681 * 0x86:
9682 *   o If NV_SUP bit is set, sd assumes the device has a
9683 *   non-volatile cache and set the
9684 *   un_f_sync_nv_supported to TRUE.
9685 *   o Otherwise cache is not non-volatile,
9686 *   un_f_sync_nv_supported is set to FALSE.
9687 *
9688 * Arguments: un - driver soft state (unit) structure
9689 *
9690 * Return Code:
9691 *
9692 *     Context: Kernel Thread
9693 */
9694
9695static void
9696sd_get_nv_sup(sd_ssc_t *ssc)
9697{
9698	int		rval		= 0;
9699	uchar_t		*inq86		= NULL;
9700	size_t		inq86_len	= MAX_INQUIRY_SIZE;
9701	size_t		inq86_resid	= 0;
9702	struct		dk_callback *dkc;
9703	struct sd_lun	*un;
9704
9705	ASSERT(ssc != NULL);
9706	un = ssc->ssc_un;
9707	ASSERT(un != NULL);
9708
9709	mutex_enter(SD_MUTEX(un));
9710
9711	/*
9712	 * Be conservative on the device's support of
9713	 * SYNC_NV bit: un_f_sync_nv_supported is
9714	 * initialized to be false.
9715	 */
9716	un->un_f_sync_nv_supported = FALSE;
9717
9718	/*
9719	 * If either sd.conf or internal disk table
9720	 * specifies cache flush be suppressed, then
9721	 * we don't bother checking NV_SUP bit.
9722	 */
9723	if (un->un_f_suppress_cache_flush == TRUE) {
9724		mutex_exit(SD_MUTEX(un));
9725		return;
9726	}
9727
9728	if (sd_check_vpd_page_support(ssc) == 0 &&
9729	    un->un_vpd_page_mask & SD_VPD_EXTENDED_DATA_PG) {
9730		mutex_exit(SD_MUTEX(un));
9731		/* collect page 86 data if available */
9732		inq86 = kmem_zalloc(inq86_len, KM_SLEEP);
9733
9734		rval = sd_send_scsi_INQUIRY(ssc, inq86, inq86_len,
9735		    0x01, 0x86, &inq86_resid);
9736
9737		if (rval == 0 && (inq86_len - inq86_resid > 6)) {
9738			SD_TRACE(SD_LOG_COMMON, un,
9739			    "sd_get_nv_sup: \
9740			    successfully get VPD page: %x \
9741			    PAGE LENGTH: %x BYTE 6: %x\n",
9742			    inq86[1], inq86[3], inq86[6]);
9743
9744			mutex_enter(SD_MUTEX(un));
9745			/*
9746			 * check the value of NV_SUP bit: only if the device
9747			 * reports NV_SUP bit to be 1, the
9748			 * un_f_sync_nv_supported bit will be set to true.
9749			 */
9750			if (inq86[6] & SD_VPD_NV_SUP) {
9751				un->un_f_sync_nv_supported = TRUE;
9752			}
9753			mutex_exit(SD_MUTEX(un));
9754		} else if (rval != 0) {
9755			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9756		}
9757
9758		kmem_free(inq86, inq86_len);
9759	} else {
9760		mutex_exit(SD_MUTEX(un));
9761	}
9762
9763	/*
9764	 * Send a SYNC CACHE command to check whether
9765	 * SYNC_NV bit is supported. This command should have
9766	 * un_f_sync_nv_supported set to correct value.
9767	 */
9768	mutex_enter(SD_MUTEX(un));
9769	if (un->un_f_sync_nv_supported) {
9770		mutex_exit(SD_MUTEX(un));
9771		dkc = kmem_zalloc(sizeof (struct dk_callback), KM_SLEEP);
9772		dkc->dkc_flag = FLUSH_VOLATILE;
9773		(void) sd_send_scsi_SYNCHRONIZE_CACHE(un, dkc);
9774
9775		/*
9776		 * Send a TEST UNIT READY command to the device. This should
9777		 * clear any outstanding UNIT ATTENTION that may be present.
9778		 */
9779		rval = sd_send_scsi_TEST_UNIT_READY(ssc, SD_DONT_RETRY_TUR);
9780		if (rval != 0)
9781			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9782
9783		kmem_free(dkc, sizeof (struct dk_callback));
9784	} else {
9785		mutex_exit(SD_MUTEX(un));
9786	}
9787
9788	SD_TRACE(SD_LOG_COMMON, un, "sd_get_nv_sup: \
9789	    un_f_suppress_cache_flush is set to %d\n",
9790	    un->un_f_suppress_cache_flush);
9791}
9792
9793/*
9794 *    Function: sd_make_device
9795 *
9796 * Description: Utility routine to return the Solaris device number from
9797 *		the data in the device's dev_info structure.
9798 *
9799 * Return Code: The Solaris device number
9800 *
9801 *     Context: Any
9802 */
9803
9804static dev_t
9805sd_make_device(dev_info_t *devi)
9806{
9807	return (makedevice(ddi_driver_major(devi),
9808	    ddi_get_instance(devi) << SDUNIT_SHIFT));
9809}
9810
9811
9812/*
9813 *    Function: sd_pm_entry
9814 *
9815 * Description: Called at the start of a new command to manage power
9816 *		and busy status of a device. This includes determining whether
9817 *		the current power state of the device is sufficient for
9818 *		performing the command or whether it must be changed.
9819 *		The PM framework is notified appropriately.
9820 *		Only with a return status of DDI_SUCCESS will the
9821 *		component be busy to the framework.
9822 *
9823 *		All callers of sd_pm_entry must check the return status
9824 *		and only call sd_pm_exit it it was DDI_SUCCESS. A status
9825 *		of DDI_FAILURE indicates the device failed to power up.
9826 *		In this case un_pm_count has been adjusted so the result
9827 *		on exit is still powered down, ie. count is less than 0.
9828 *		Calling sd_pm_exit with this count value hits an ASSERT.
9829 *
9830 * Return Code: DDI_SUCCESS or DDI_FAILURE
9831 *
9832 *     Context: Kernel thread context.
9833 */
9834
9835static int
9836sd_pm_entry(struct sd_lun *un)
9837{
9838	int return_status = DDI_SUCCESS;
9839
9840	ASSERT(!mutex_owned(SD_MUTEX(un)));
9841	ASSERT(!mutex_owned(&un->un_pm_mutex));
9842
9843	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: entry\n");
9844
9845	if (un->un_f_pm_is_enabled == FALSE) {
9846		SD_TRACE(SD_LOG_IO_PM, un,
9847		    "sd_pm_entry: exiting, PM not enabled\n");
9848		return (return_status);
9849	}
9850
9851	/*
9852	 * Just increment a counter if PM is enabled. On the transition from
9853	 * 0 ==> 1, mark the device as busy.  The iodone side will decrement
9854	 * the count with each IO and mark the device as idle when the count
9855	 * hits 0.
9856	 *
9857	 * If the count is less than 0 the device is powered down. If a powered
9858	 * down device is successfully powered up then the count must be
9859	 * incremented to reflect the power up. Note that it'll get incremented
9860	 * a second time to become busy.
9861	 *
9862	 * Because the following has the potential to change the device state
9863	 * and must release the un_pm_mutex to do so, only one thread can be
9864	 * allowed through at a time.
9865	 */
9866
9867	mutex_enter(&un->un_pm_mutex);
9868	while (un->un_pm_busy == TRUE) {
9869		cv_wait(&un->un_pm_busy_cv, &un->un_pm_mutex);
9870	}
9871	un->un_pm_busy = TRUE;
9872
9873	if (un->un_pm_count < 1) {
9874
9875		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: busy component\n");
9876
9877		/*
9878		 * Indicate we are now busy so the framework won't attempt to
9879		 * power down the device. This call will only fail if either
9880		 * we passed a bad component number or the device has no
9881		 * components. Neither of these should ever happen.
9882		 */
9883		mutex_exit(&un->un_pm_mutex);
9884		return_status = pm_busy_component(SD_DEVINFO(un), 0);
9885		ASSERT(return_status == DDI_SUCCESS);
9886
9887		mutex_enter(&un->un_pm_mutex);
9888
9889		if (un->un_pm_count < 0) {
9890			mutex_exit(&un->un_pm_mutex);
9891
9892			SD_TRACE(SD_LOG_IO_PM, un,
9893			    "sd_pm_entry: power up component\n");
9894
9895			/*
9896			 * pm_raise_power will cause sdpower to be called
9897			 * which brings the device power level to the
9898			 * desired state, If successful, un_pm_count and
9899			 * un_power_level will be updated appropriately.
9900			 */
9901			return_status = pm_raise_power(SD_DEVINFO(un), 0,
9902			    SD_PM_STATE_ACTIVE(un));
9903
9904			mutex_enter(&un->un_pm_mutex);
9905
9906			if (return_status != DDI_SUCCESS) {
9907				/*
9908				 * Power up failed.
9909				 * Idle the device and adjust the count
9910				 * so the result on exit is that we're
9911				 * still powered down, ie. count is less than 0.
9912				 */
9913				SD_TRACE(SD_LOG_IO_PM, un,
9914				    "sd_pm_entry: power up failed,"
9915				    " idle the component\n");
9916
9917				(void) pm_idle_component(SD_DEVINFO(un), 0);
9918				un->un_pm_count--;
9919			} else {
9920				/*
9921				 * Device is powered up, verify the
9922				 * count is non-negative.
9923				 * This is debug only.
9924				 */
9925				ASSERT(un->un_pm_count == 0);
9926			}
9927		}
9928
9929		if (return_status == DDI_SUCCESS) {
9930			/*
9931			 * For performance, now that the device has been tagged
9932			 * as busy, and it's known to be powered up, update the
9933			 * chain types to use jump tables that do not include
9934			 * pm. This significantly lowers the overhead and
9935			 * therefore improves performance.
9936			 */
9937
9938			mutex_exit(&un->un_pm_mutex);
9939			mutex_enter(SD_MUTEX(un));
9940			SD_TRACE(SD_LOG_IO_PM, un,
9941			    "sd_pm_entry: changing uscsi_chain_type from %d\n",
9942			    un->un_uscsi_chain_type);
9943
9944			if (un->un_f_non_devbsize_supported) {
9945				un->un_buf_chain_type =
9946				    SD_CHAIN_INFO_RMMEDIA_NO_PM;
9947			} else {
9948				un->un_buf_chain_type =
9949				    SD_CHAIN_INFO_DISK_NO_PM;
9950			}
9951			un->un_uscsi_chain_type = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
9952
9953			SD_TRACE(SD_LOG_IO_PM, un,
9954			    "             changed  uscsi_chain_type to   %d\n",
9955			    un->un_uscsi_chain_type);
9956			mutex_exit(SD_MUTEX(un));
9957			mutex_enter(&un->un_pm_mutex);
9958
9959			if (un->un_pm_idle_timeid == NULL) {
9960				/* 300 ms. */
9961				un->un_pm_idle_timeid =
9962				    timeout(sd_pm_idletimeout_handler, un,
9963				    (drv_usectohz((clock_t)300000)));
9964				/*
9965				 * Include an extra call to busy which keeps the
9966				 * device busy with-respect-to the PM layer
9967				 * until the timer fires, at which time it'll
9968				 * get the extra idle call.
9969				 */
9970				(void) pm_busy_component(SD_DEVINFO(un), 0);
9971			}
9972		}
9973	}
9974	un->un_pm_busy = FALSE;
9975	/* Next... */
9976	cv_signal(&un->un_pm_busy_cv);
9977
9978	un->un_pm_count++;
9979
9980	SD_TRACE(SD_LOG_IO_PM, un,
9981	    "sd_pm_entry: exiting, un_pm_count = %d\n", un->un_pm_count);
9982
9983	mutex_exit(&un->un_pm_mutex);
9984
9985	return (return_status);
9986}
9987
9988
9989/*
9990 *    Function: sd_pm_exit
9991 *
9992 * Description: Called at the completion of a command to manage busy
9993 *		status for the device. If the device becomes idle the
9994 *		PM framework is notified.
9995 *
9996 *     Context: Kernel thread context
9997 */
9998
9999static void
10000sd_pm_exit(struct sd_lun *un)
10001{
10002	ASSERT(!mutex_owned(SD_MUTEX(un)));
10003	ASSERT(!mutex_owned(&un->un_pm_mutex));
10004
10005	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: entry\n");
10006
10007	/*
10008	 * After attach the following flag is only read, so don't
10009	 * take the penalty of acquiring a mutex for it.
10010	 */
10011	if (un->un_f_pm_is_enabled == TRUE) {
10012
10013		mutex_enter(&un->un_pm_mutex);
10014		un->un_pm_count--;
10015
10016		SD_TRACE(SD_LOG_IO_PM, un,
10017		    "sd_pm_exit: un_pm_count = %d\n", un->un_pm_count);
10018
10019		ASSERT(un->un_pm_count >= 0);
10020		if (un->un_pm_count == 0) {
10021			mutex_exit(&un->un_pm_mutex);
10022
10023			SD_TRACE(SD_LOG_IO_PM, un,
10024			    "sd_pm_exit: idle component\n");
10025
10026			(void) pm_idle_component(SD_DEVINFO(un), 0);
10027
10028		} else {
10029			mutex_exit(&un->un_pm_mutex);
10030		}
10031	}
10032
10033	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: exiting\n");
10034}
10035
10036
10037/*
10038 *    Function: sdopen
10039 *
10040 * Description: Driver's open(9e) entry point function.
10041 *
10042 *   Arguments: dev_i   - pointer to device number
10043 *		flag    - how to open file (FEXCL, FNDELAY, FREAD, FWRITE)
10044 *		otyp    - open type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
10045 *		cred_p  - user credential pointer
10046 *
10047 * Return Code: EINVAL
10048 *		ENXIO
10049 *		EIO
10050 *		EROFS
10051 *		EBUSY
10052 *
10053 *     Context: Kernel thread context
10054 */
10055/* ARGSUSED */
10056static int
10057sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p)
10058{
10059	struct sd_lun	*un;
10060	int		nodelay;
10061	int		part;
10062	uint64_t	partmask;
10063	int		instance;
10064	dev_t		dev;
10065	int		rval = EIO;
10066	diskaddr_t	nblks = 0;
10067	diskaddr_t	label_cap;
10068
10069	/* Validate the open type */
10070	if (otyp >= OTYPCNT) {
10071		return (EINVAL);
10072	}
10073
10074	dev = *dev_p;
10075	instance = SDUNIT(dev);
10076	mutex_enter(&sd_detach_mutex);
10077
10078	/*
10079	 * Fail the open if there is no softstate for the instance, or
10080	 * if another thread somewhere is trying to detach the instance.
10081	 */
10082	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
10083	    (un->un_detach_count != 0)) {
10084		mutex_exit(&sd_detach_mutex);
10085		/*
10086		 * The probe cache only needs to be cleared when open (9e) fails
10087		 * with ENXIO (4238046).
10088		 */
10089		/*
10090		 * un-conditionally clearing probe cache is ok with
10091		 * separate sd/ssd binaries
10092		 * x86 platform can be an issue with both parallel
10093		 * and fibre in 1 binary
10094		 */
10095		sd_scsi_clear_probe_cache();
10096		return (ENXIO);
10097	}
10098
10099	/*
10100	 * The un_layer_count is to prevent another thread in specfs from
10101	 * trying to detach the instance, which can happen when we are
10102	 * called from a higher-layer driver instead of thru specfs.
10103	 * This will not be needed when DDI provides a layered driver
10104	 * interface that allows specfs to know that an instance is in
10105	 * use by a layered driver & should not be detached.
10106	 *
10107	 * Note: the semantics for layered driver opens are exactly one
10108	 * close for every open.
10109	 */
10110	if (otyp == OTYP_LYR) {
10111		un->un_layer_count++;
10112	}
10113
10114	/*
10115	 * Keep a count of the current # of opens in progress. This is because
10116	 * some layered drivers try to call us as a regular open. This can
10117	 * cause problems that we cannot prevent, however by keeping this count
10118	 * we can at least keep our open and detach routines from racing against
10119	 * each other under such conditions.
10120	 */
10121	un->un_opens_in_progress++;
10122	mutex_exit(&sd_detach_mutex);
10123
10124	nodelay  = (flag & (FNDELAY | FNONBLOCK));
10125	part	 = SDPART(dev);
10126	partmask = 1 << part;
10127
10128	/*
10129	 * We use a semaphore here in order to serialize
10130	 * open and close requests on the device.
10131	 */
10132	sema_p(&un->un_semoclose);
10133
10134	mutex_enter(SD_MUTEX(un));
10135
10136	/*
10137	 * All device accesses go thru sdstrategy() where we check
10138	 * on suspend status but there could be a scsi_poll command,
10139	 * which bypasses sdstrategy(), so we need to check pm
10140	 * status.
10141	 */
10142
10143	if (!nodelay) {
10144		while ((un->un_state == SD_STATE_SUSPENDED) ||
10145		    (un->un_state == SD_STATE_PM_CHANGING)) {
10146			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10147		}
10148
10149		mutex_exit(SD_MUTEX(un));
10150		if (sd_pm_entry(un) != DDI_SUCCESS) {
10151			rval = EIO;
10152			SD_ERROR(SD_LOG_OPEN_CLOSE, un,
10153			    "sdopen: sd_pm_entry failed\n");
10154			goto open_failed_with_pm;
10155		}
10156		mutex_enter(SD_MUTEX(un));
10157	}
10158
10159	/* check for previous exclusive open */
10160	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: un=%p\n", (void *)un);
10161	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
10162	    "sdopen: exclopen=%x, flag=%x, regopen=%x\n",
10163	    un->un_exclopen, flag, un->un_ocmap.regopen[otyp]);
10164
10165	if (un->un_exclopen & (partmask)) {
10166		goto excl_open_fail;
10167	}
10168
10169	if (flag & FEXCL) {
10170		int i;
10171		if (un->un_ocmap.lyropen[part]) {
10172			goto excl_open_fail;
10173		}
10174		for (i = 0; i < (OTYPCNT - 1); i++) {
10175			if (un->un_ocmap.regopen[i] & (partmask)) {
10176				goto excl_open_fail;
10177			}
10178		}
10179	}
10180
10181	/*
10182	 * Check the write permission if this is a removable media device,
10183	 * NDELAY has not been set, and writable permission is requested.
10184	 *
10185	 * Note: If NDELAY was set and this is write-protected media the WRITE
10186	 * attempt will fail with EIO as part of the I/O processing. This is a
10187	 * more permissive implementation that allows the open to succeed and
10188	 * WRITE attempts to fail when appropriate.
10189	 */
10190	if (un->un_f_chk_wp_open) {
10191		if ((flag & FWRITE) && (!nodelay)) {
10192			mutex_exit(SD_MUTEX(un));
10193			/*
10194			 * Defer the check for write permission on writable
10195			 * DVD drive till sdstrategy and will not fail open even
10196			 * if FWRITE is set as the device can be writable
10197			 * depending upon the media and the media can change
10198			 * after the call to open().
10199			 */
10200			if (un->un_f_dvdram_writable_device == FALSE) {
10201				if (ISCD(un) || sr_check_wp(dev)) {
10202				rval = EROFS;
10203				mutex_enter(SD_MUTEX(un));
10204				SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10205				    "write to cd or write protected media\n");
10206				goto open_fail;
10207				}
10208			}
10209			mutex_enter(SD_MUTEX(un));
10210		}
10211	}
10212
10213	/*
10214	 * If opening in NDELAY/NONBLOCK mode, just return.
10215	 * Check if disk is ready and has a valid geometry later.
10216	 */
10217	if (!nodelay) {
10218		sd_ssc_t	*ssc;
10219
10220		mutex_exit(SD_MUTEX(un));
10221		ssc = sd_ssc_init(un);
10222		rval = sd_ready_and_valid(ssc, part);
10223		sd_ssc_fini(ssc);
10224		mutex_enter(SD_MUTEX(un));
10225		/*
10226		 * Fail if device is not ready or if the number of disk
10227		 * blocks is zero or negative for non CD devices.
10228		 */
10229
10230		nblks = 0;
10231
10232		if (rval == SD_READY_VALID && (!ISCD(un))) {
10233			/* if cmlb_partinfo fails, nblks remains 0 */
10234			mutex_exit(SD_MUTEX(un));
10235			(void) cmlb_partinfo(un->un_cmlbhandle, part, &nblks,
10236			    NULL, NULL, NULL, (void *)SD_PATH_DIRECT);
10237			mutex_enter(SD_MUTEX(un));
10238		}
10239
10240		if ((rval != SD_READY_VALID) ||
10241		    (!ISCD(un) && nblks <= 0)) {
10242			rval = un->un_f_has_removable_media ? ENXIO : EIO;
10243			SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10244			    "device not ready or invalid disk block value\n");
10245			goto open_fail;
10246		}
10247#if defined(__i386) || defined(__amd64)
10248	} else {
10249		uchar_t *cp;
10250		/*
10251		 * x86 requires special nodelay handling, so that p0 is
10252		 * always defined and accessible.
10253		 * Invalidate geometry only if device is not already open.
10254		 */
10255		cp = &un->un_ocmap.chkd[0];
10256		while (cp < &un->un_ocmap.chkd[OCSIZE]) {
10257			if (*cp != (uchar_t)0) {
10258				break;
10259			}
10260			cp++;
10261		}
10262		if (cp == &un->un_ocmap.chkd[OCSIZE]) {
10263			mutex_exit(SD_MUTEX(un));
10264			cmlb_invalidate(un->un_cmlbhandle,
10265			    (void *)SD_PATH_DIRECT);
10266			mutex_enter(SD_MUTEX(un));
10267		}
10268
10269#endif
10270	}
10271
10272	if (otyp == OTYP_LYR) {
10273		un->un_ocmap.lyropen[part]++;
10274	} else {
10275		un->un_ocmap.regopen[otyp] |= partmask;
10276	}
10277
10278	/* Set up open and exclusive open flags */
10279	if (flag & FEXCL) {
10280		un->un_exclopen |= (partmask);
10281	}
10282
10283	/*
10284	 * If the lun is EFI labeled and lun capacity is greater than the
10285	 * capacity contained in the label, log a sys-event to notify the
10286	 * interested module.
10287	 * To avoid an infinite loop of logging sys-event, we only log the
10288	 * event when the lun is not opened in NDELAY mode. The event handler
10289	 * should open the lun in NDELAY mode.
10290	 */
10291	if (!nodelay) {
10292		mutex_exit(SD_MUTEX(un));
10293		if (cmlb_efi_label_capacity(un->un_cmlbhandle, &label_cap,
10294		    (void*)SD_PATH_DIRECT) == 0) {
10295			mutex_enter(SD_MUTEX(un));
10296			if (un->un_f_blockcount_is_valid &&
10297			    un->un_blockcount > label_cap &&
10298			    un->un_f_expnevent == B_FALSE) {
10299				un->un_f_expnevent = B_TRUE;
10300				mutex_exit(SD_MUTEX(un));
10301				sd_log_lun_expansion_event(un,
10302				    (nodelay ? KM_NOSLEEP : KM_SLEEP));
10303				mutex_enter(SD_MUTEX(un));
10304			}
10305		} else {
10306			mutex_enter(SD_MUTEX(un));
10307		}
10308	}
10309
10310	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10311	    "open of part %d type %d\n", part, otyp);
10312
10313	mutex_exit(SD_MUTEX(un));
10314	if (!nodelay) {
10315		sd_pm_exit(un);
10316	}
10317
10318	sema_v(&un->un_semoclose);
10319
10320	mutex_enter(&sd_detach_mutex);
10321	un->un_opens_in_progress--;
10322	mutex_exit(&sd_detach_mutex);
10323
10324	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: exit success\n");
10325	return (DDI_SUCCESS);
10326
10327excl_open_fail:
10328	SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: fail exclusive open\n");
10329	rval = EBUSY;
10330
10331open_fail:
10332	mutex_exit(SD_MUTEX(un));
10333
10334	/*
10335	 * On a failed open we must exit the pm management.
10336	 */
10337	if (!nodelay) {
10338		sd_pm_exit(un);
10339	}
10340open_failed_with_pm:
10341	sema_v(&un->un_semoclose);
10342
10343	mutex_enter(&sd_detach_mutex);
10344	un->un_opens_in_progress--;
10345	if (otyp == OTYP_LYR) {
10346		un->un_layer_count--;
10347	}
10348	mutex_exit(&sd_detach_mutex);
10349
10350	return (rval);
10351}
10352
10353
10354/*
10355 *    Function: sdclose
10356 *
10357 * Description: Driver's close(9e) entry point function.
10358 *
10359 *   Arguments: dev    - device number
10360 *		flag   - file status flag, informational only
10361 *		otyp   - close type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
10362 *		cred_p - user credential pointer
10363 *
10364 * Return Code: ENXIO
10365 *
10366 *     Context: Kernel thread context
10367 */
10368/* ARGSUSED */
10369static int
10370sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p)
10371{
10372	struct sd_lun	*un;
10373	uchar_t		*cp;
10374	int		part;
10375	int		nodelay;
10376	int		rval = 0;
10377
10378	/* Validate the open type */
10379	if (otyp >= OTYPCNT) {
10380		return (ENXIO);
10381	}
10382
10383	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10384		return (ENXIO);
10385	}
10386
10387	part = SDPART(dev);
10388	nodelay = flag & (FNDELAY | FNONBLOCK);
10389
10390	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
10391	    "sdclose: close of part %d type %d\n", part, otyp);
10392
10393	/*
10394	 * We use a semaphore here in order to serialize
10395	 * open and close requests on the device.
10396	 */
10397	sema_p(&un->un_semoclose);
10398
10399	mutex_enter(SD_MUTEX(un));
10400
10401	/* Don't proceed if power is being changed. */
10402	while (un->un_state == SD_STATE_PM_CHANGING) {
10403		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10404	}
10405
10406	if (un->un_exclopen & (1 << part)) {
10407		un->un_exclopen &= ~(1 << part);
10408	}
10409
10410	/* Update the open partition map */
10411	if (otyp == OTYP_LYR) {
10412		un->un_ocmap.lyropen[part] -= 1;
10413	} else {
10414		un->un_ocmap.regopen[otyp] &= ~(1 << part);
10415	}
10416
10417	cp = &un->un_ocmap.chkd[0];
10418	while (cp < &un->un_ocmap.chkd[OCSIZE]) {
10419		if (*cp != NULL) {
10420			break;
10421		}
10422		cp++;
10423	}
10424
10425	if (cp == &un->un_ocmap.chkd[OCSIZE]) {
10426		SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdclose: last close\n");
10427
10428		/*
10429		 * We avoid persistance upon the last close, and set
10430		 * the throttle back to the maximum.
10431		 */
10432		un->un_throttle = un->un_saved_throttle;
10433
10434		if (un->un_state == SD_STATE_OFFLINE) {
10435			if (un->un_f_is_fibre == FALSE) {
10436				scsi_log(SD_DEVINFO(un), sd_label,
10437				    CE_WARN, "offline\n");
10438			}
10439			mutex_exit(SD_MUTEX(un));
10440			cmlb_invalidate(un->un_cmlbhandle,
10441			    (void *)SD_PATH_DIRECT);
10442			mutex_enter(SD_MUTEX(un));
10443
10444		} else {
10445			/*
10446			 * Flush any outstanding writes in NVRAM cache.
10447			 * Note: SYNCHRONIZE CACHE is an optional SCSI-2
10448			 * cmd, it may not work for non-Pluto devices.
10449			 * SYNCHRONIZE CACHE is not required for removables,
10450			 * except DVD-RAM drives.
10451			 *
10452			 * Also note: because SYNCHRONIZE CACHE is currently
10453			 * the only command issued here that requires the
10454			 * drive be powered up, only do the power up before
10455			 * sending the Sync Cache command. If additional
10456			 * commands are added which require a powered up
10457			 * drive, the following sequence may have to change.
10458			 *
10459			 * And finally, note that parallel SCSI on SPARC
10460			 * only issues a Sync Cache to DVD-RAM, a newly
10461			 * supported device.
10462			 */
10463#if defined(__i386) || defined(__amd64)
10464			if ((un->un_f_sync_cache_supported &&
10465			    un->un_f_sync_cache_required) ||
10466			    un->un_f_dvdram_writable_device == TRUE) {
10467#else
10468			if (un->un_f_dvdram_writable_device == TRUE) {
10469#endif
10470				mutex_exit(SD_MUTEX(un));
10471				if (sd_pm_entry(un) == DDI_SUCCESS) {
10472					rval =
10473					    sd_send_scsi_SYNCHRONIZE_CACHE(un,
10474					    NULL);
10475					/* ignore error if not supported */
10476					if (rval == ENOTSUP) {
10477						rval = 0;
10478					} else if (rval != 0) {
10479						rval = EIO;
10480					}
10481					sd_pm_exit(un);
10482				} else {
10483					rval = EIO;
10484				}
10485				mutex_enter(SD_MUTEX(un));
10486			}
10487
10488			/*
10489			 * For devices which supports DOOR_LOCK, send an ALLOW
10490			 * MEDIA REMOVAL command, but don't get upset if it
10491			 * fails. We need to raise the power of the drive before
10492			 * we can call sd_send_scsi_DOORLOCK()
10493			 */
10494			if (un->un_f_doorlock_supported) {
10495				mutex_exit(SD_MUTEX(un));
10496				if (sd_pm_entry(un) == DDI_SUCCESS) {
10497					sd_ssc_t	*ssc;
10498
10499					ssc = sd_ssc_init(un);
10500					rval = sd_send_scsi_DOORLOCK(ssc,
10501					    SD_REMOVAL_ALLOW, SD_PATH_DIRECT);
10502					if (rval != 0)
10503						sd_ssc_assessment(ssc,
10504						    SD_FMT_IGNORE);
10505					sd_ssc_fini(ssc);
10506
10507					sd_pm_exit(un);
10508					if (ISCD(un) && (rval != 0) &&
10509					    (nodelay != 0)) {
10510						rval = ENXIO;
10511					}
10512				} else {
10513					rval = EIO;
10514				}
10515				mutex_enter(SD_MUTEX(un));
10516			}
10517
10518			/*
10519			 * If a device has removable media, invalidate all
10520			 * parameters related to media, such as geometry,
10521			 * blocksize, and blockcount.
10522			 */
10523			if (un->un_f_has_removable_media) {
10524				sr_ejected(un);
10525			}
10526
10527			/*
10528			 * Destroy the cache (if it exists) which was
10529			 * allocated for the write maps since this is
10530			 * the last close for this media.
10531			 */
10532			if (un->un_wm_cache) {
10533				/*
10534				 * Check if there are pending commands.
10535				 * and if there are give a warning and
10536				 * do not destroy the cache.
10537				 */
10538				if (un->un_ncmds_in_driver > 0) {
10539					scsi_log(SD_DEVINFO(un),
10540					    sd_label, CE_WARN,
10541					    "Unable to clean up memory "
10542					    "because of pending I/O\n");
10543				} else {
10544					kmem_cache_destroy(
10545					    un->un_wm_cache);
10546					un->un_wm_cache = NULL;
10547				}
10548			}
10549		}
10550	}
10551
10552	mutex_exit(SD_MUTEX(un));
10553	sema_v(&un->un_semoclose);
10554
10555	if (otyp == OTYP_LYR) {
10556		mutex_enter(&sd_detach_mutex);
10557		/*
10558		 * The detach routine may run when the layer count
10559		 * drops to zero.
10560		 */
10561		un->un_layer_count--;
10562		mutex_exit(&sd_detach_mutex);
10563	}
10564
10565	return (rval);
10566}
10567
10568
10569/*
10570 *    Function: sd_ready_and_valid
10571 *
10572 * Description: Test if device is ready and has a valid geometry.
10573 *
10574 *   Arguments: ssc - sd_ssc_t will contain un
10575 *		un  - driver soft state (unit) structure
10576 *
10577 * Return Code: SD_READY_VALID		ready and valid label
10578 *		SD_NOT_READY_VALID	not ready, no label
10579 *		SD_RESERVED_BY_OTHERS	reservation conflict
10580 *
10581 *     Context: Never called at interrupt context.
10582 */
10583
10584static int
10585sd_ready_and_valid(sd_ssc_t *ssc, int part)
10586{
10587	struct sd_errstats	*stp;
10588	uint64_t		capacity;
10589	uint_t			lbasize;
10590	int			rval = SD_READY_VALID;
10591	char			name_str[48];
10592	boolean_t		is_valid;
10593	struct sd_lun		*un;
10594	int			status;
10595
10596	ASSERT(ssc != NULL);
10597	un = ssc->ssc_un;
10598	ASSERT(un != NULL);
10599	ASSERT(!mutex_owned(SD_MUTEX(un)));
10600
10601	mutex_enter(SD_MUTEX(un));
10602	/*
10603	 * If a device has removable media, we must check if media is
10604	 * ready when checking if this device is ready and valid.
10605	 */
10606	if (un->un_f_has_removable_media) {
10607		mutex_exit(SD_MUTEX(un));
10608		status = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
10609
10610		if (status != 0) {
10611			rval = SD_NOT_READY_VALID;
10612			mutex_enter(SD_MUTEX(un));
10613
10614			/* Ignore all failed status for removalbe media */
10615			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
10616
10617			goto done;
10618		}
10619
10620		is_valid = SD_IS_VALID_LABEL(un);
10621		mutex_enter(SD_MUTEX(un));
10622		if (!is_valid ||
10623		    (un->un_f_blockcount_is_valid == FALSE) ||
10624		    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
10625
10626			/* capacity has to be read every open. */
10627			mutex_exit(SD_MUTEX(un));
10628			status = sd_send_scsi_READ_CAPACITY(ssc, &capacity,
10629			    &lbasize, SD_PATH_DIRECT);
10630
10631			if (status != 0) {
10632				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
10633
10634				cmlb_invalidate(un->un_cmlbhandle,
10635				    (void *)SD_PATH_DIRECT);
10636				mutex_enter(SD_MUTEX(un));
10637				rval = SD_NOT_READY_VALID;
10638
10639				goto done;
10640			} else {
10641				mutex_enter(SD_MUTEX(un));
10642				sd_update_block_info(un, lbasize, capacity);
10643			}
10644		}
10645
10646		/*
10647		 * Check if the media in the device is writable or not.
10648		 */
10649		if (!is_valid && ISCD(un)) {
10650			sd_check_for_writable_cd(ssc, SD_PATH_DIRECT);
10651		}
10652
10653	} else {
10654		/*
10655		 * Do a test unit ready to clear any unit attention from non-cd
10656		 * devices.
10657		 */
10658		mutex_exit(SD_MUTEX(un));
10659
10660		status = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
10661		if (status != 0) {
10662			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
10663		}
10664
10665		mutex_enter(SD_MUTEX(un));
10666	}
10667
10668
10669	/*
10670	 * If this is a non 512 block device, allocate space for
10671	 * the wmap cache. This is being done here since every time
10672	 * a media is changed this routine will be called and the
10673	 * block size is a function of media rather than device.
10674	 */
10675	if (((un->un_f_rmw_type != SD_RMW_TYPE_RETURN_ERROR ||
10676	    un->un_f_non_devbsize_supported) &&
10677	    un->un_tgt_blocksize != DEV_BSIZE) ||
10678	    un->un_f_enable_rmw) {
10679		if (!(un->un_wm_cache)) {
10680			(void) snprintf(name_str, sizeof (name_str),
10681			    "%s%d_cache",
10682			    ddi_driver_name(SD_DEVINFO(un)),
10683			    ddi_get_instance(SD_DEVINFO(un)));
10684			un->un_wm_cache = kmem_cache_create(
10685			    name_str, sizeof (struct sd_w_map),
10686			    8, sd_wm_cache_constructor,
10687			    sd_wm_cache_destructor, NULL,
10688			    (void *)un, NULL, 0);
10689			if (!(un->un_wm_cache)) {
10690				rval = ENOMEM;
10691				goto done;
10692			}
10693		}
10694	}
10695
10696	if (un->un_state == SD_STATE_NORMAL) {
10697		/*
10698		 * If the target is not yet ready here (defined by a TUR
10699		 * failure), invalidate the geometry and print an 'offline'
10700		 * message. This is a legacy message, as the state of the
10701		 * target is not actually changed to SD_STATE_OFFLINE.
10702		 *
10703		 * If the TUR fails for EACCES (Reservation Conflict),
10704		 * SD_RESERVED_BY_OTHERS will be returned to indicate
10705		 * reservation conflict. If the TUR fails for other
10706		 * reasons, SD_NOT_READY_VALID will be returned.
10707		 */
10708		int err;
10709
10710		mutex_exit(SD_MUTEX(un));
10711		err = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
10712		mutex_enter(SD_MUTEX(un));
10713
10714		if (err != 0) {
10715			mutex_exit(SD_MUTEX(un));
10716			cmlb_invalidate(un->un_cmlbhandle,
10717			    (void *)SD_PATH_DIRECT);
10718			mutex_enter(SD_MUTEX(un));
10719			if (err == EACCES) {
10720				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
10721				    "reservation conflict\n");
10722				rval = SD_RESERVED_BY_OTHERS;
10723				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
10724			} else {
10725				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
10726				    "drive offline\n");
10727				rval = SD_NOT_READY_VALID;
10728				sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
10729			}
10730			goto done;
10731		}
10732	}
10733
10734	if (un->un_f_format_in_progress == FALSE) {
10735		mutex_exit(SD_MUTEX(un));
10736
10737		(void) cmlb_validate(un->un_cmlbhandle, 0,
10738		    (void *)SD_PATH_DIRECT);
10739		if (cmlb_partinfo(un->un_cmlbhandle, part, NULL, NULL, NULL,
10740		    NULL, (void *) SD_PATH_DIRECT) != 0) {
10741			rval = SD_NOT_READY_VALID;
10742			mutex_enter(SD_MUTEX(un));
10743
10744			goto done;
10745		}
10746		if (un->un_f_pkstats_enabled) {
10747			sd_set_pstats(un);
10748			SD_TRACE(SD_LOG_IO_PARTITION, un,
10749			    "sd_ready_and_valid: un:0x%p pstats created and "
10750			    "set\n", un);
10751		}
10752		mutex_enter(SD_MUTEX(un));
10753	}
10754
10755	/*
10756	 * If this device supports DOOR_LOCK command, try and send
10757	 * this command to PREVENT MEDIA REMOVAL, but don't get upset
10758	 * if it fails. For a CD, however, it is an error
10759	 */
10760	if (un->un_f_doorlock_supported) {
10761		mutex_exit(SD_MUTEX(un));
10762		status = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_PREVENT,
10763		    SD_PATH_DIRECT);
10764
10765		if ((status != 0) && ISCD(un)) {
10766			rval = SD_NOT_READY_VALID;
10767			mutex_enter(SD_MUTEX(un));
10768
10769			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
10770
10771			goto done;
10772		} else if (status != 0)
10773			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
10774		mutex_enter(SD_MUTEX(un));
10775	}
10776
10777	/* The state has changed, inform the media watch routines */
10778	un->un_mediastate = DKIO_INSERTED;
10779	cv_broadcast(&un->un_state_cv);
10780	rval = SD_READY_VALID;
10781
10782done:
10783
10784	/*
10785	 * Initialize the capacity kstat value, if no media previously
10786	 * (capacity kstat is 0) and a media has been inserted
10787	 * (un_blockcount > 0).
10788	 */
10789	if (un->un_errstats != NULL) {
10790		stp = (struct sd_errstats *)un->un_errstats->ks_data;
10791		if ((stp->sd_capacity.value.ui64 == 0) &&
10792		    (un->un_f_blockcount_is_valid == TRUE)) {
10793			stp->sd_capacity.value.ui64 =
10794			    (uint64_t)((uint64_t)un->un_blockcount *
10795			    un->un_sys_blocksize);
10796		}
10797	}
10798
10799	mutex_exit(SD_MUTEX(un));
10800	return (rval);
10801}
10802
10803
10804/*
10805 *    Function: sdmin
10806 *
10807 * Description: Routine to limit the size of a data transfer. Used in
10808 *		conjunction with physio(9F).
10809 *
10810 *   Arguments: bp - pointer to the indicated buf(9S) struct.
10811 *
10812 *     Context: Kernel thread context.
10813 */
10814
10815static void
10816sdmin(struct buf *bp)
10817{
10818	struct sd_lun	*un;
10819	int		instance;
10820
10821	instance = SDUNIT(bp->b_edev);
10822
10823	un = ddi_get_soft_state(sd_state, instance);
10824	ASSERT(un != NULL);
10825
10826	/*
10827	 * We depend on buf breakup to restrict
10828	 * IO size if it is enabled.
10829	 */
10830	if (un->un_buf_breakup_supported) {
10831		return;
10832	}
10833
10834	if (bp->b_bcount > un->un_max_xfer_size) {
10835		bp->b_bcount = un->un_max_xfer_size;
10836	}
10837}
10838
10839
10840/*
10841 *    Function: sdread
10842 *
10843 * Description: Driver's read(9e) entry point function.
10844 *
10845 *   Arguments: dev   - device number
10846 *		uio   - structure pointer describing where data is to be stored
10847 *			in user's space
10848 *		cred_p  - user credential pointer
10849 *
10850 * Return Code: ENXIO
10851 *		EIO
10852 *		EINVAL
10853 *		value returned by physio
10854 *
10855 *     Context: Kernel thread context.
10856 */
10857/* ARGSUSED */
10858static int
10859sdread(dev_t dev, struct uio *uio, cred_t *cred_p)
10860{
10861	struct sd_lun	*un = NULL;
10862	int		secmask;
10863	int		err = 0;
10864	sd_ssc_t	*ssc;
10865
10866	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10867		return (ENXIO);
10868	}
10869
10870	ASSERT(!mutex_owned(SD_MUTEX(un)));
10871
10872
10873	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
10874		mutex_enter(SD_MUTEX(un));
10875		/*
10876		 * Because the call to sd_ready_and_valid will issue I/O we
10877		 * must wait here if either the device is suspended or
10878		 * if it's power level is changing.
10879		 */
10880		while ((un->un_state == SD_STATE_SUSPENDED) ||
10881		    (un->un_state == SD_STATE_PM_CHANGING)) {
10882			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10883		}
10884		un->un_ncmds_in_driver++;
10885		mutex_exit(SD_MUTEX(un));
10886
10887		/* Initialize sd_ssc_t for internal uscsi commands */
10888		ssc = sd_ssc_init(un);
10889		if ((sd_ready_and_valid(ssc, SDPART(dev))) != SD_READY_VALID) {
10890			err = EIO;
10891		} else {
10892			err = 0;
10893		}
10894		sd_ssc_fini(ssc);
10895
10896		mutex_enter(SD_MUTEX(un));
10897		un->un_ncmds_in_driver--;
10898		ASSERT(un->un_ncmds_in_driver >= 0);
10899		mutex_exit(SD_MUTEX(un));
10900		if (err != 0)
10901			return (err);
10902	}
10903
10904	/*
10905	 * Read requests are restricted to multiples of the system block size.
10906	 */
10907	if (un->un_f_rmw_type == SD_RMW_TYPE_RETURN_ERROR &&
10908	    !un->un_f_enable_rmw)
10909		secmask = un->un_tgt_blocksize - 1;
10910	else
10911		secmask = DEV_BSIZE - 1;
10912
10913	if (uio->uio_loffset & ((offset_t)(secmask))) {
10914		SD_ERROR(SD_LOG_READ_WRITE, un,
10915		    "sdread: file offset not modulo %d\n",
10916		    secmask + 1);
10917		err = EINVAL;
10918	} else if (uio->uio_iov->iov_len & (secmask)) {
10919		SD_ERROR(SD_LOG_READ_WRITE, un,
10920		    "sdread: transfer length not modulo %d\n",
10921		    secmask + 1);
10922		err = EINVAL;
10923	} else {
10924		err = physio(sdstrategy, NULL, dev, B_READ, sdmin, uio);
10925	}
10926
10927	return (err);
10928}
10929
10930
10931/*
10932 *    Function: sdwrite
10933 *
10934 * Description: Driver's write(9e) entry point function.
10935 *
10936 *   Arguments: dev   - device number
10937 *		uio   - structure pointer describing where data is stored in
10938 *			user's space
10939 *		cred_p  - user credential pointer
10940 *
10941 * Return Code: ENXIO
10942 *		EIO
10943 *		EINVAL
10944 *		value returned by physio
10945 *
10946 *     Context: Kernel thread context.
10947 */
10948/* ARGSUSED */
10949static int
10950sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p)
10951{
10952	struct sd_lun	*un = NULL;
10953	int		secmask;
10954	int		err = 0;
10955	sd_ssc_t	*ssc;
10956
10957	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10958		return (ENXIO);
10959	}
10960
10961	ASSERT(!mutex_owned(SD_MUTEX(un)));
10962
10963	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
10964		mutex_enter(SD_MUTEX(un));
10965		/*
10966		 * Because the call to sd_ready_and_valid will issue I/O we
10967		 * must wait here if either the device is suspended or
10968		 * if it's power level is changing.
10969		 */
10970		while ((un->un_state == SD_STATE_SUSPENDED) ||
10971		    (un->un_state == SD_STATE_PM_CHANGING)) {
10972			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10973		}
10974		un->un_ncmds_in_driver++;
10975		mutex_exit(SD_MUTEX(un));
10976
10977		/* Initialize sd_ssc_t for internal uscsi commands */
10978		ssc = sd_ssc_init(un);
10979		if ((sd_ready_and_valid(ssc, SDPART(dev))) != SD_READY_VALID) {
10980			err = EIO;
10981		} else {
10982			err = 0;
10983		}
10984		sd_ssc_fini(ssc);
10985
10986		mutex_enter(SD_MUTEX(un));
10987		un->un_ncmds_in_driver--;
10988		ASSERT(un->un_ncmds_in_driver >= 0);
10989		mutex_exit(SD_MUTEX(un));
10990		if (err != 0)
10991			return (err);
10992	}
10993
10994	/*
10995	 * Write requests are restricted to multiples of the system block size.
10996	 */
10997	if (un->un_f_rmw_type == SD_RMW_TYPE_RETURN_ERROR &&
10998	    !un->un_f_enable_rmw)
10999		secmask = un->un_tgt_blocksize - 1;
11000	else
11001		secmask = DEV_BSIZE - 1;
11002
11003	if (uio->uio_loffset & ((offset_t)(secmask))) {
11004		SD_ERROR(SD_LOG_READ_WRITE, un,
11005		    "sdwrite: file offset not modulo %d\n",
11006		    secmask + 1);
11007		err = EINVAL;
11008	} else if (uio->uio_iov->iov_len & (secmask)) {
11009		SD_ERROR(SD_LOG_READ_WRITE, un,
11010		    "sdwrite: transfer length not modulo %d\n",
11011		    secmask + 1);
11012		err = EINVAL;
11013	} else {
11014		err = physio(sdstrategy, NULL, dev, B_WRITE, sdmin, uio);
11015	}
11016
11017	return (err);
11018}
11019
11020
11021/*
11022 *    Function: sdaread
11023 *
11024 * Description: Driver's aread(9e) entry point function.
11025 *
11026 *   Arguments: dev   - device number
11027 *		aio   - structure pointer describing where data is to be stored
11028 *		cred_p  - user credential pointer
11029 *
11030 * Return Code: ENXIO
11031 *		EIO
11032 *		EINVAL
11033 *		value returned by aphysio
11034 *
11035 *     Context: Kernel thread context.
11036 */
11037/* ARGSUSED */
11038static int
11039sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p)
11040{
11041	struct sd_lun	*un = NULL;
11042	struct uio	*uio = aio->aio_uio;
11043	int		secmask;
11044	int		err = 0;
11045	sd_ssc_t	*ssc;
11046
11047	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
11048		return (ENXIO);
11049	}
11050
11051	ASSERT(!mutex_owned(SD_MUTEX(un)));
11052
11053	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
11054		mutex_enter(SD_MUTEX(un));
11055		/*
11056		 * Because the call to sd_ready_and_valid will issue I/O we
11057		 * must wait here if either the device is suspended or
11058		 * if it's power level is changing.
11059		 */
11060		while ((un->un_state == SD_STATE_SUSPENDED) ||
11061		    (un->un_state == SD_STATE_PM_CHANGING)) {
11062			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11063		}
11064		un->un_ncmds_in_driver++;
11065		mutex_exit(SD_MUTEX(un));
11066
11067		/* Initialize sd_ssc_t for internal uscsi commands */
11068		ssc = sd_ssc_init(un);
11069		if ((sd_ready_and_valid(ssc, SDPART(dev))) != SD_READY_VALID) {
11070			err = EIO;
11071		} else {
11072			err = 0;
11073		}
11074		sd_ssc_fini(ssc);
11075
11076		mutex_enter(SD_MUTEX(un));
11077		un->un_ncmds_in_driver--;
11078		ASSERT(un->un_ncmds_in_driver >= 0);
11079		mutex_exit(SD_MUTEX(un));
11080		if (err != 0)
11081			return (err);
11082	}
11083
11084	/*
11085	 * Read requests are restricted to multiples of the system block size.
11086	 */
11087	if (un->un_f_rmw_type == SD_RMW_TYPE_RETURN_ERROR &&
11088	    !un->un_f_enable_rmw)
11089		secmask = un->un_tgt_blocksize - 1;
11090	else
11091		secmask = DEV_BSIZE - 1;
11092
11093	if (uio->uio_loffset & ((offset_t)(secmask))) {
11094		SD_ERROR(SD_LOG_READ_WRITE, un,
11095		    "sdaread: file offset not modulo %d\n",
11096		    secmask + 1);
11097		err = EINVAL;
11098	} else if (uio->uio_iov->iov_len & (secmask)) {
11099		SD_ERROR(SD_LOG_READ_WRITE, un,
11100		    "sdaread: transfer length not modulo %d\n",
11101		    secmask + 1);
11102		err = EINVAL;
11103	} else {
11104		err = aphysio(sdstrategy, anocancel, dev, B_READ, sdmin, aio);
11105	}
11106
11107	return (err);
11108}
11109
11110
11111/*
11112 *    Function: sdawrite
11113 *
11114 * Description: Driver's awrite(9e) entry point function.
11115 *
11116 *   Arguments: dev   - device number
11117 *		aio   - structure pointer describing where data is stored
11118 *		cred_p  - user credential pointer
11119 *
11120 * Return Code: ENXIO
11121 *		EIO
11122 *		EINVAL
11123 *		value returned by aphysio
11124 *
11125 *     Context: Kernel thread context.
11126 */
11127/* ARGSUSED */
11128static int
11129sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p)
11130{
11131	struct sd_lun	*un = NULL;
11132	struct uio	*uio = aio->aio_uio;
11133	int		secmask;
11134	int		err = 0;
11135	sd_ssc_t	*ssc;
11136
11137	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
11138		return (ENXIO);
11139	}
11140
11141	ASSERT(!mutex_owned(SD_MUTEX(un)));
11142
11143	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
11144		mutex_enter(SD_MUTEX(un));
11145		/*
11146		 * Because the call to sd_ready_and_valid will issue I/O we
11147		 * must wait here if either the device is suspended or
11148		 * if it's power level is changing.
11149		 */
11150		while ((un->un_state == SD_STATE_SUSPENDED) ||
11151		    (un->un_state == SD_STATE_PM_CHANGING)) {
11152			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11153		}
11154		un->un_ncmds_in_driver++;
11155		mutex_exit(SD_MUTEX(un));
11156
11157		/* Initialize sd_ssc_t for internal uscsi commands */
11158		ssc = sd_ssc_init(un);
11159		if ((sd_ready_and_valid(ssc, SDPART(dev))) != SD_READY_VALID) {
11160			err = EIO;
11161		} else {
11162			err = 0;
11163		}
11164		sd_ssc_fini(ssc);
11165
11166		mutex_enter(SD_MUTEX(un));
11167		un->un_ncmds_in_driver--;
11168		ASSERT(un->un_ncmds_in_driver >= 0);
11169		mutex_exit(SD_MUTEX(un));
11170		if (err != 0)
11171			return (err);
11172	}
11173
11174	/*
11175	 * Write requests are restricted to multiples of the system block size.
11176	 */
11177	if (un->un_f_rmw_type == SD_RMW_TYPE_RETURN_ERROR &&
11178	    !un->un_f_enable_rmw)
11179		secmask = un->un_tgt_blocksize - 1;
11180	else
11181		secmask = DEV_BSIZE - 1;
11182
11183	if (uio->uio_loffset & ((offset_t)(secmask))) {
11184		SD_ERROR(SD_LOG_READ_WRITE, un,
11185		    "sdawrite: file offset not modulo %d\n",
11186		    secmask + 1);
11187		err = EINVAL;
11188	} else if (uio->uio_iov->iov_len & (secmask)) {
11189		SD_ERROR(SD_LOG_READ_WRITE, un,
11190		    "sdawrite: transfer length not modulo %d\n",
11191		    secmask + 1);
11192		err = EINVAL;
11193	} else {
11194		err = aphysio(sdstrategy, anocancel, dev, B_WRITE, sdmin, aio);
11195	}
11196
11197	return (err);
11198}
11199
11200
11201
11202
11203
11204/*
11205 * Driver IO processing follows the following sequence:
11206 *
11207 *     sdioctl(9E)     sdstrategy(9E)         biodone(9F)
11208 *         |                |                     ^
11209 *         v                v                     |
11210 * sd_send_scsi_cmd()  ddi_xbuf_qstrategy()       +-------------------+
11211 *         |                |                     |                   |
11212 *         v                |                     |                   |
11213 * sd_uscsi_strategy() sd_xbuf_strategy()   sd_buf_iodone()   sd_uscsi_iodone()
11214 *         |                |                     ^                   ^
11215 *         v                v                     |                   |
11216 * SD_BEGIN_IOSTART()  SD_BEGIN_IOSTART()         |                   |
11217 *         |                |                     |                   |
11218 *     +---+                |                     +------------+      +-------+
11219 *     |                    |                                  |              |
11220 *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
11221 *     |                    v                                  |              |
11222 *     |         sd_mapblockaddr_iostart()           sd_mapblockaddr_iodone() |
11223 *     |                    |                                  ^              |
11224 *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
11225 *     |                    v                                  |              |
11226 *     |         sd_mapblocksize_iostart()           sd_mapblocksize_iodone() |
11227 *     |                    |                                  ^              |
11228 *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
11229 *     |                    v                                  |              |
11230 *     |           sd_checksum_iostart()               sd_checksum_iodone()   |
11231 *     |                    |                                  ^              |
11232 *     +-> SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()+------------->+
11233 *     |                    v                                  |              |
11234 *     |              sd_pm_iostart()                     sd_pm_iodone()      |
11235 *     |                    |                                  ^              |
11236 *     |                    |                                  |              |
11237 *     +-> SD_NEXT_IOSTART()|               SD_BEGIN_IODONE()--+--------------+
11238 *                          |                           ^
11239 *                          v                           |
11240 *                   sd_core_iostart()                  |
11241 *                          |                           |
11242 *                          |                           +------>(*destroypkt)()
11243 *                          +-> sd_start_cmds() <-+     |           |
11244 *                          |                     |     |           v
11245 *                          |                     |     |  scsi_destroy_pkt(9F)
11246 *                          |                     |     |
11247 *                          +->(*initpkt)()       +- sdintr()
11248 *                          |  |                        |  |
11249 *                          |  +-> scsi_init_pkt(9F)    |  +-> sd_handle_xxx()
11250 *                          |  +-> scsi_setup_cdb(9F)   |
11251 *                          |                           |
11252 *                          +--> scsi_transport(9F)     |
11253 *                                     |                |
11254 *                                     +----> SCSA ---->+
11255 *
11256 *
11257 * This code is based upon the following presumptions:
11258 *
11259 *   - iostart and iodone functions operate on buf(9S) structures. These
11260 *     functions perform the necessary operations on the buf(9S) and pass
11261 *     them along to the next function in the chain by using the macros
11262 *     SD_NEXT_IOSTART() (for iostart side functions) and SD_NEXT_IODONE()
11263 *     (for iodone side functions).
11264 *
11265 *   - The iostart side functions may sleep. The iodone side functions
11266 *     are called under interrupt context and may NOT sleep. Therefore
11267 *     iodone side functions also may not call iostart side functions.
11268 *     (NOTE: iostart side functions should NOT sleep for memory, as
11269 *     this could result in deadlock.)
11270 *
11271 *   - An iostart side function may call its corresponding iodone side
11272 *     function directly (if necessary).
11273 *
11274 *   - In the event of an error, an iostart side function can return a buf(9S)
11275 *     to its caller by calling SD_BEGIN_IODONE() (after setting B_ERROR and
11276 *     b_error in the usual way of course).
11277 *
11278 *   - The taskq mechanism may be used by the iodone side functions to dispatch
11279 *     requests to the iostart side functions.  The iostart side functions in
11280 *     this case would be called under the context of a taskq thread, so it's
11281 *     OK for them to block/sleep/spin in this case.
11282 *
11283 *   - iostart side functions may allocate "shadow" buf(9S) structs and
11284 *     pass them along to the next function in the chain.  The corresponding
11285 *     iodone side functions must coalesce the "shadow" bufs and return
11286 *     the "original" buf to the next higher layer.
11287 *
11288 *   - The b_private field of the buf(9S) struct holds a pointer to
11289 *     an sd_xbuf struct, which contains information needed to
11290 *     construct the scsi_pkt for the command.
11291 *
11292 *   - The SD_MUTEX(un) is NOT held across calls to the next layer. Each
11293 *     layer must acquire & release the SD_MUTEX(un) as needed.
11294 */
11295
11296
11297/*
11298 * Create taskq for all targets in the system. This is created at
11299 * _init(9E) and destroyed at _fini(9E).
11300 *
11301 * Note: here we set the minalloc to a reasonably high number to ensure that
11302 * we will have an adequate supply of task entries available at interrupt time.
11303 * This is used in conjunction with the TASKQ_PREPOPULATE flag in
11304 * sd_create_taskq().  Since we do not want to sleep for allocations at
11305 * interrupt time, set maxalloc equal to minalloc. That way we will just fail
11306 * the command if we ever try to dispatch more than SD_TASKQ_MAXALLOC taskq
11307 * requests any one instant in time.
11308 */
11309#define	SD_TASKQ_NUMTHREADS	8
11310#define	SD_TASKQ_MINALLOC	256
11311#define	SD_TASKQ_MAXALLOC	256
11312
11313static taskq_t	*sd_tq = NULL;
11314_NOTE(SCHEME_PROTECTS_DATA("stable data", sd_tq))
11315
11316static int	sd_taskq_minalloc = SD_TASKQ_MINALLOC;
11317static int	sd_taskq_maxalloc = SD_TASKQ_MAXALLOC;
11318
11319/*
11320 * The following task queue is being created for the write part of
11321 * read-modify-write of non-512 block size devices.
11322 * Limit the number of threads to 1 for now. This number has been chosen
11323 * considering the fact that it applies only to dvd ram drives/MO drives
11324 * currently. Performance for which is not main criteria at this stage.
11325 * Note: It needs to be explored if we can use a single taskq in future
11326 */
11327#define	SD_WMR_TASKQ_NUMTHREADS	1
11328static taskq_t	*sd_wmr_tq = NULL;
11329_NOTE(SCHEME_PROTECTS_DATA("stable data", sd_wmr_tq))
11330
11331/*
11332 *    Function: sd_taskq_create
11333 *
11334 * Description: Create taskq thread(s) and preallocate task entries
11335 *
11336 * Return Code: Returns a pointer to the allocated taskq_t.
11337 *
11338 *     Context: Can sleep. Requires blockable context.
11339 *
11340 *       Notes: - The taskq() facility currently is NOT part of the DDI.
11341 *		  (definitely NOT recommeded for 3rd-party drivers!) :-)
11342 *		- taskq_create() will block for memory, also it will panic
11343 *		  if it cannot create the requested number of threads.
11344 *		- Currently taskq_create() creates threads that cannot be
11345 *		  swapped.
11346 *		- We use TASKQ_PREPOPULATE to ensure we have an adequate
11347 *		  supply of taskq entries at interrupt time (ie, so that we
11348 *		  do not have to sleep for memory)
11349 */
11350
11351static void
11352sd_taskq_create(void)
11353{
11354	char	taskq_name[TASKQ_NAMELEN];
11355
11356	ASSERT(sd_tq == NULL);
11357	ASSERT(sd_wmr_tq == NULL);
11358
11359	(void) snprintf(taskq_name, sizeof (taskq_name),
11360	    "%s_drv_taskq", sd_label);
11361	sd_tq = (taskq_create(taskq_name, SD_TASKQ_NUMTHREADS,
11362	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
11363	    TASKQ_PREPOPULATE));
11364
11365	(void) snprintf(taskq_name, sizeof (taskq_name),
11366	    "%s_rmw_taskq", sd_label);
11367	sd_wmr_tq = (taskq_create(taskq_name, SD_WMR_TASKQ_NUMTHREADS,
11368	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
11369	    TASKQ_PREPOPULATE));
11370}
11371
11372
11373/*
11374 *    Function: sd_taskq_delete
11375 *
11376 * Description: Complementary cleanup routine for sd_taskq_create().
11377 *
11378 *     Context: Kernel thread context.
11379 */
11380
11381static void
11382sd_taskq_delete(void)
11383{
11384	ASSERT(sd_tq != NULL);
11385	ASSERT(sd_wmr_tq != NULL);
11386	taskq_destroy(sd_tq);
11387	taskq_destroy(sd_wmr_tq);
11388	sd_tq = NULL;
11389	sd_wmr_tq = NULL;
11390}
11391
11392
11393/*
11394 *    Function: sdstrategy
11395 *
11396 * Description: Driver's strategy (9E) entry point function.
11397 *
11398 *   Arguments: bp - pointer to buf(9S)
11399 *
11400 * Return Code: Always returns zero
11401 *
11402 *     Context: Kernel thread context.
11403 */
11404
11405static int
11406sdstrategy(struct buf *bp)
11407{
11408	struct sd_lun *un;
11409
11410	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
11411	if (un == NULL) {
11412		bioerror(bp, EIO);
11413		bp->b_resid = bp->b_bcount;
11414		biodone(bp);
11415		return (0);
11416	}
11417
11418	/* As was done in the past, fail new cmds. if state is dumping. */
11419	if (un->un_state == SD_STATE_DUMPING) {
11420		bioerror(bp, ENXIO);
11421		bp->b_resid = bp->b_bcount;
11422		biodone(bp);
11423		return (0);
11424	}
11425
11426	ASSERT(!mutex_owned(SD_MUTEX(un)));
11427
11428	/*
11429	 * Commands may sneak in while we released the mutex in
11430	 * DDI_SUSPEND, we should block new commands. However, old
11431	 * commands that are still in the driver at this point should
11432	 * still be allowed to drain.
11433	 */
11434	mutex_enter(SD_MUTEX(un));
11435	/*
11436	 * Must wait here if either the device is suspended or
11437	 * if it's power level is changing.
11438	 */
11439	while ((un->un_state == SD_STATE_SUSPENDED) ||
11440	    (un->un_state == SD_STATE_PM_CHANGING)) {
11441		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11442	}
11443
11444	un->un_ncmds_in_driver++;
11445
11446	/*
11447	 * atapi: Since we are running the CD for now in PIO mode we need to
11448	 * call bp_mapin here to avoid bp_mapin called interrupt context under
11449	 * the HBA's init_pkt routine.
11450	 */
11451	if (un->un_f_cfg_is_atapi == TRUE) {
11452		mutex_exit(SD_MUTEX(un));
11453		bp_mapin(bp);
11454		mutex_enter(SD_MUTEX(un));
11455	}
11456	SD_INFO(SD_LOG_IO, un, "sdstrategy: un_ncmds_in_driver = %ld\n",
11457	    un->un_ncmds_in_driver);
11458
11459	if (bp->b_flags & B_WRITE)
11460		un->un_f_sync_cache_required = TRUE;
11461
11462	mutex_exit(SD_MUTEX(un));
11463
11464	/*
11465	 * This will (eventually) allocate the sd_xbuf area and
11466	 * call sd_xbuf_strategy().  We just want to return the
11467	 * result of ddi_xbuf_qstrategy so that we have an opt-
11468	 * imized tail call which saves us a stack frame.
11469	 */
11470	return (ddi_xbuf_qstrategy(bp, un->un_xbuf_attr));
11471}
11472
11473
11474/*
11475 *    Function: sd_xbuf_strategy
11476 *
11477 * Description: Function for initiating IO operations via the
11478 *		ddi_xbuf_qstrategy() mechanism.
11479 *
11480 *     Context: Kernel thread context.
11481 */
11482
11483static void
11484sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg)
11485{
11486	struct sd_lun *un = arg;
11487
11488	ASSERT(bp != NULL);
11489	ASSERT(xp != NULL);
11490	ASSERT(un != NULL);
11491	ASSERT(!mutex_owned(SD_MUTEX(un)));
11492
11493	/*
11494	 * Initialize the fields in the xbuf and save a pointer to the
11495	 * xbuf in bp->b_private.
11496	 */
11497	sd_xbuf_init(un, bp, xp, SD_CHAIN_BUFIO, NULL);
11498
11499	/* Send the buf down the iostart chain */
11500	SD_BEGIN_IOSTART(((struct sd_xbuf *)xp)->xb_chain_iostart, un, bp);
11501}
11502
11503
11504/*
11505 *    Function: sd_xbuf_init
11506 *
11507 * Description: Prepare the given sd_xbuf struct for use.
11508 *
11509 *   Arguments: un - ptr to softstate
11510 *		bp - ptr to associated buf(9S)
11511 *		xp - ptr to associated sd_xbuf
11512 *		chain_type - IO chain type to use:
11513 *			SD_CHAIN_NULL
11514 *			SD_CHAIN_BUFIO
11515 *			SD_CHAIN_USCSI
11516 *			SD_CHAIN_DIRECT
11517 *			SD_CHAIN_DIRECT_PRIORITY
11518 *		pktinfop - ptr to private data struct for scsi_pkt(9S)
11519 *			initialization; may be NULL if none.
11520 *
11521 *     Context: Kernel thread context
11522 */
11523
11524static void
11525sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
11526	uchar_t chain_type, void *pktinfop)
11527{
11528	int index;
11529
11530	ASSERT(un != NULL);
11531	ASSERT(bp != NULL);
11532	ASSERT(xp != NULL);
11533
11534	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: buf:0x%p chain type:0x%x\n",
11535	    bp, chain_type);
11536
11537	xp->xb_un	= un;
11538	xp->xb_pktp	= NULL;
11539	xp->xb_pktinfo	= pktinfop;
11540	xp->xb_private	= bp->b_private;
11541	xp->xb_blkno	= (daddr_t)bp->b_blkno;
11542
11543	/*
11544	 * Set up the iostart and iodone chain indexes in the xbuf, based
11545	 * upon the specified chain type to use.
11546	 */
11547	switch (chain_type) {
11548	case SD_CHAIN_NULL:
11549		/*
11550		 * Fall thru to just use the values for the buf type, even
11551		 * tho for the NULL chain these values will never be used.
11552		 */
11553		/* FALLTHRU */
11554	case SD_CHAIN_BUFIO:
11555		index = un->un_buf_chain_type;
11556		if ((!un->un_f_has_removable_media) &&
11557		    (un->un_tgt_blocksize != 0) &&
11558		    (un->un_tgt_blocksize != DEV_BSIZE ||
11559		    un->un_f_enable_rmw)) {
11560			int secmask = 0, blknomask = 0;
11561			if (un->un_f_enable_rmw) {
11562				blknomask =
11563				    (un->un_phy_blocksize / DEV_BSIZE) - 1;
11564				secmask = un->un_phy_blocksize - 1;
11565			} else {
11566				blknomask =
11567				    (un->un_tgt_blocksize / DEV_BSIZE) - 1;
11568				secmask = un->un_tgt_blocksize - 1;
11569			}
11570
11571			if ((bp->b_lblkno & (blknomask)) ||
11572			    (bp->b_bcount & (secmask))) {
11573				if ((un->un_f_rmw_type !=
11574				    SD_RMW_TYPE_RETURN_ERROR) ||
11575				    un->un_f_enable_rmw) {
11576					if (un->un_f_pm_is_enabled == FALSE)
11577						index =
11578						    SD_CHAIN_INFO_MSS_DSK_NO_PM;
11579					else
11580						index =
11581						    SD_CHAIN_INFO_MSS_DISK;
11582				}
11583			}
11584		}
11585		break;
11586	case SD_CHAIN_USCSI:
11587		index = un->un_uscsi_chain_type;
11588		break;
11589	case SD_CHAIN_DIRECT:
11590		index = un->un_direct_chain_type;
11591		break;
11592	case SD_CHAIN_DIRECT_PRIORITY:
11593		index = un->un_priority_chain_type;
11594		break;
11595	default:
11596		/* We're really broken if we ever get here... */
11597		panic("sd_xbuf_init: illegal chain type!");
11598		/*NOTREACHED*/
11599	}
11600
11601	xp->xb_chain_iostart = sd_chain_index_map[index].sci_iostart_index;
11602	xp->xb_chain_iodone = sd_chain_index_map[index].sci_iodone_index;
11603
11604	/*
11605	 * It might be a bit easier to simply bzero the entire xbuf above,
11606	 * but it turns out that since we init a fair number of members anyway,
11607	 * we save a fair number cycles by doing explicit assignment of zero.
11608	 */
11609	xp->xb_pkt_flags	= 0;
11610	xp->xb_dma_resid	= 0;
11611	xp->xb_retry_count	= 0;
11612	xp->xb_victim_retry_count = 0;
11613	xp->xb_ua_retry_count	= 0;
11614	xp->xb_nr_retry_count	= 0;
11615	xp->xb_sense_bp		= NULL;
11616	xp->xb_sense_status	= 0;
11617	xp->xb_sense_state	= 0;
11618	xp->xb_sense_resid	= 0;
11619	xp->xb_ena		= 0;
11620
11621	bp->b_private	= xp;
11622	bp->b_flags	&= ~(B_DONE | B_ERROR);
11623	bp->b_resid	= 0;
11624	bp->av_forw	= NULL;
11625	bp->av_back	= NULL;
11626	bioerror(bp, 0);
11627
11628	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: done.\n");
11629}
11630
11631
11632/*
11633 *    Function: sd_uscsi_strategy
11634 *
11635 * Description: Wrapper for calling into the USCSI chain via physio(9F)
11636 *
11637 *   Arguments: bp - buf struct ptr
11638 *
11639 * Return Code: Always returns 0
11640 *
11641 *     Context: Kernel thread context
11642 */
11643
11644static int
11645sd_uscsi_strategy(struct buf *bp)
11646{
11647	struct sd_lun		*un;
11648	struct sd_uscsi_info	*uip;
11649	struct sd_xbuf		*xp;
11650	uchar_t			chain_type;
11651	uchar_t			cmd;
11652
11653	ASSERT(bp != NULL);
11654
11655	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
11656	if (un == NULL) {
11657		bioerror(bp, EIO);
11658		bp->b_resid = bp->b_bcount;
11659		biodone(bp);
11660		return (0);
11661	}
11662
11663	ASSERT(!mutex_owned(SD_MUTEX(un)));
11664
11665	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: entry: buf:0x%p\n", bp);
11666
11667	/*
11668	 * A pointer to a struct sd_uscsi_info is expected in bp->b_private
11669	 */
11670	ASSERT(bp->b_private != NULL);
11671	uip = (struct sd_uscsi_info *)bp->b_private;
11672	cmd = ((struct uscsi_cmd *)(uip->ui_cmdp))->uscsi_cdb[0];
11673
11674	mutex_enter(SD_MUTEX(un));
11675	/*
11676	 * atapi: Since we are running the CD for now in PIO mode we need to
11677	 * call bp_mapin here to avoid bp_mapin called interrupt context under
11678	 * the HBA's init_pkt routine.
11679	 */
11680	if (un->un_f_cfg_is_atapi == TRUE) {
11681		mutex_exit(SD_MUTEX(un));
11682		bp_mapin(bp);
11683		mutex_enter(SD_MUTEX(un));
11684	}
11685	un->un_ncmds_in_driver++;
11686	SD_INFO(SD_LOG_IO, un, "sd_uscsi_strategy: un_ncmds_in_driver = %ld\n",
11687	    un->un_ncmds_in_driver);
11688
11689	if ((bp->b_flags & B_WRITE) && (bp->b_bcount != 0) &&
11690	    (cmd != SCMD_MODE_SELECT) && (cmd != SCMD_MODE_SELECT_G1))
11691		un->un_f_sync_cache_required = TRUE;
11692
11693	mutex_exit(SD_MUTEX(un));
11694
11695	switch (uip->ui_flags) {
11696	case SD_PATH_DIRECT:
11697		chain_type = SD_CHAIN_DIRECT;
11698		break;
11699	case SD_PATH_DIRECT_PRIORITY:
11700		chain_type = SD_CHAIN_DIRECT_PRIORITY;
11701		break;
11702	default:
11703		chain_type = SD_CHAIN_USCSI;
11704		break;
11705	}
11706
11707	/*
11708	 * We may allocate extra buf for external USCSI commands. If the
11709	 * application asks for bigger than 20-byte sense data via USCSI,
11710	 * SCSA layer will allocate 252 bytes sense buf for that command.
11711	 */
11712	if (((struct uscsi_cmd *)(uip->ui_cmdp))->uscsi_rqlen >
11713	    SENSE_LENGTH) {
11714		xp = kmem_zalloc(sizeof (struct sd_xbuf) - SENSE_LENGTH +
11715		    MAX_SENSE_LENGTH, KM_SLEEP);
11716	} else {
11717		xp = kmem_zalloc(sizeof (struct sd_xbuf), KM_SLEEP);
11718	}
11719
11720	sd_xbuf_init(un, bp, xp, chain_type, uip->ui_cmdp);
11721
11722	/* Use the index obtained within xbuf_init */
11723	SD_BEGIN_IOSTART(xp->xb_chain_iostart, un, bp);
11724
11725	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: exit: buf:0x%p\n", bp);
11726
11727	return (0);
11728}
11729
11730/*
11731 *    Function: sd_send_scsi_cmd
11732 *
11733 * Description: Runs a USCSI command for user (when called thru sdioctl),
11734 *		or for the driver
11735 *
11736 *   Arguments: dev - the dev_t for the device
11737 *		incmd - ptr to a valid uscsi_cmd struct
11738 *		flag - bit flag, indicating open settings, 32/64 bit type
11739 *		dataspace - UIO_USERSPACE or UIO_SYSSPACE
11740 *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
11741 *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
11742 *			to use the USCSI "direct" chain and bypass the normal
11743 *			command waitq.
11744 *
11745 * Return Code: 0 -  successful completion of the given command
11746 *		EIO - scsi_uscsi_handle_command() failed
11747 *		ENXIO  - soft state not found for specified dev
11748 *		EINVAL
11749 *		EFAULT - copyin/copyout error
11750 *		return code of scsi_uscsi_handle_command():
11751 *			EIO
11752 *			ENXIO
11753 *			EACCES
11754 *
11755 *     Context: Waits for command to complete. Can sleep.
11756 */
11757
11758static int
11759sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd, int flag,
11760	enum uio_seg dataspace, int path_flag)
11761{
11762	struct sd_lun	*un;
11763	sd_ssc_t	*ssc;
11764	int		rval;
11765
11766	un = ddi_get_soft_state(sd_state, SDUNIT(dev));
11767	if (un == NULL) {
11768		return (ENXIO);
11769	}
11770
11771	/*
11772	 * Using sd_ssc_send to handle uscsi cmd
11773	 */
11774	ssc = sd_ssc_init(un);
11775	rval = sd_ssc_send(ssc, incmd, flag, dataspace, path_flag);
11776	sd_ssc_fini(ssc);
11777
11778	return (rval);
11779}
11780
11781/*
11782 *    Function: sd_ssc_init
11783 *
11784 * Description: Uscsi end-user call this function to initialize necessary
11785 *              fields, such as uscsi_cmd and sd_uscsi_info struct.
11786 *
11787 *              The return value of sd_send_scsi_cmd will be treated as a
11788 *              fault in various conditions. Even it is not Zero, some
11789 *              callers may ignore the return value. That is to say, we can
11790 *              not make an accurate assessment in sdintr, since if a
11791 *              command is failed in sdintr it does not mean the caller of
11792 *              sd_send_scsi_cmd will treat it as a real failure.
11793 *
11794 *              To avoid printing too many error logs for a failed uscsi
11795 *              packet that the caller may not treat it as a failure, the
11796 *              sd will keep silent for handling all uscsi commands.
11797 *
11798 *              During detach->attach and attach-open, for some types of
11799 *              problems, the driver should be providing information about
11800 *              the problem encountered. Device use USCSI_SILENT, which
11801 *              suppresses all driver information. The result is that no
11802 *              information about the problem is available. Being
11803 *              completely silent during this time is inappropriate. The
11804 *              driver needs a more selective filter than USCSI_SILENT, so
11805 *              that information related to faults is provided.
11806 *
11807 *              To make the accurate accessment, the caller  of
11808 *              sd_send_scsi_USCSI_CMD should take the ownership and
11809 *              get necessary information to print error messages.
11810 *
11811 *              If we want to print necessary info of uscsi command, we need to
11812 *              keep the uscsi_cmd and sd_uscsi_info till we can make the
11813 *              assessment. We use sd_ssc_init to alloc necessary
11814 *              structs for sending an uscsi command and we are also
11815 *              responsible for free the memory by calling
11816 *              sd_ssc_fini.
11817 *
11818 *              The calling secquences will look like:
11819 *              sd_ssc_init->
11820 *
11821 *                  ...
11822 *
11823 *                  sd_send_scsi_USCSI_CMD->
11824 *                      sd_ssc_send-> - - - sdintr
11825 *                  ...
11826 *
11827 *                  if we think the return value should be treated as a
11828 *                  failure, we make the accessment here and print out
11829 *                  necessary by retrieving uscsi_cmd and sd_uscsi_info'
11830 *
11831 *                  ...
11832 *
11833 *              sd_ssc_fini
11834 *
11835 *
11836 *   Arguments: un - pointer to driver soft state (unit) structure for this
11837 *                   target.
11838 *
11839 * Return code: sd_ssc_t - pointer to allocated sd_ssc_t struct, it contains
11840 *                         uscsi_cmd and sd_uscsi_info.
11841 *                  NULL - if can not alloc memory for sd_ssc_t struct
11842 *
11843 *     Context: Kernel Thread.
11844 */
11845static sd_ssc_t *
11846sd_ssc_init(struct sd_lun *un)
11847{
11848	sd_ssc_t		*ssc;
11849	struct uscsi_cmd	*ucmdp;
11850	struct sd_uscsi_info	*uip;
11851
11852	ASSERT(un != NULL);
11853	ASSERT(!mutex_owned(SD_MUTEX(un)));
11854
11855	/*
11856	 * Allocate sd_ssc_t structure
11857	 */
11858	ssc = kmem_zalloc(sizeof (sd_ssc_t), KM_SLEEP);
11859
11860	/*
11861	 * Allocate uscsi_cmd by calling scsi_uscsi_alloc common routine
11862	 */
11863	ucmdp = scsi_uscsi_alloc();
11864
11865	/*
11866	 * Allocate sd_uscsi_info structure
11867	 */
11868	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
11869
11870	ssc->ssc_uscsi_cmd = ucmdp;
11871	ssc->ssc_uscsi_info = uip;
11872	ssc->ssc_un = un;
11873
11874	return (ssc);
11875}
11876
11877/*
11878 * Function: sd_ssc_fini
11879 *
11880 * Description: To free sd_ssc_t and it's hanging off
11881 *
11882 * Arguments: ssc - struct pointer of sd_ssc_t.
11883 */
11884static void
11885sd_ssc_fini(sd_ssc_t *ssc)
11886{
11887	scsi_uscsi_free(ssc->ssc_uscsi_cmd);
11888
11889	if (ssc->ssc_uscsi_info != NULL) {
11890		kmem_free(ssc->ssc_uscsi_info, sizeof (struct sd_uscsi_info));
11891		ssc->ssc_uscsi_info = NULL;
11892	}
11893
11894	kmem_free(ssc, sizeof (sd_ssc_t));
11895	ssc = NULL;
11896}
11897
11898/*
11899 * Function: sd_ssc_send
11900 *
11901 * Description: Runs a USCSI command for user when called through sdioctl,
11902 *              or for the driver.
11903 *
11904 *   Arguments: ssc - the struct of sd_ssc_t will bring uscsi_cmd and
11905 *                    sd_uscsi_info in.
11906 *		incmd - ptr to a valid uscsi_cmd struct
11907 *		flag - bit flag, indicating open settings, 32/64 bit type
11908 *		dataspace - UIO_USERSPACE or UIO_SYSSPACE
11909 *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
11910 *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
11911 *			to use the USCSI "direct" chain and bypass the normal
11912 *			command waitq.
11913 *
11914 * Return Code: 0 -  successful completion of the given command
11915 *		EIO - scsi_uscsi_handle_command() failed
11916 *		ENXIO  - soft state not found for specified dev
11917 *		ECANCELED - command cancelled due to low power
11918 *		EINVAL
11919 *		EFAULT - copyin/copyout error
11920 *		return code of scsi_uscsi_handle_command():
11921 *			EIO
11922 *			ENXIO
11923 *			EACCES
11924 *
11925 *     Context: Kernel Thread;
11926 *              Waits for command to complete. Can sleep.
11927 */
11928static int
11929sd_ssc_send(sd_ssc_t *ssc, struct uscsi_cmd *incmd, int flag,
11930	enum uio_seg dataspace, int path_flag)
11931{
11932	struct sd_uscsi_info	*uip;
11933	struct uscsi_cmd	*uscmd;
11934	struct sd_lun		*un;
11935	dev_t			dev;
11936
11937	int	format = 0;
11938	int	rval;
11939
11940	ASSERT(ssc != NULL);
11941	un = ssc->ssc_un;
11942	ASSERT(un != NULL);
11943	uscmd = ssc->ssc_uscsi_cmd;
11944	ASSERT(uscmd != NULL);
11945	ASSERT(!mutex_owned(SD_MUTEX(un)));
11946	if (ssc->ssc_flags & SSC_FLAGS_NEED_ASSESSMENT) {
11947		/*
11948		 * If enter here, it indicates that the previous uscsi
11949		 * command has not been processed by sd_ssc_assessment.
11950		 * This is violating our rules of FMA telemetry processing.
11951		 * We should print out this message and the last undisposed
11952		 * uscsi command.
11953		 */
11954		if (uscmd->uscsi_cdb != NULL) {
11955			SD_INFO(SD_LOG_SDTEST, un,
11956			    "sd_ssc_send is missing the alternative "
11957			    "sd_ssc_assessment when running command 0x%x.\n",
11958			    uscmd->uscsi_cdb[0]);
11959		}
11960		/*
11961		 * Set the ssc_flags to SSC_FLAGS_UNKNOWN, which should be
11962		 * the initial status.
11963		 */
11964		ssc->ssc_flags = SSC_FLAGS_UNKNOWN;
11965	}
11966
11967	/*
11968	 * We need to make sure sd_ssc_send will have sd_ssc_assessment
11969	 * followed to avoid missing FMA telemetries.
11970	 */
11971	ssc->ssc_flags |= SSC_FLAGS_NEED_ASSESSMENT;
11972
11973	/*
11974	 * if USCSI_PMFAILFAST is set and un is in low power, fail the
11975	 * command immediately.
11976	 */
11977	mutex_enter(SD_MUTEX(un));
11978	mutex_enter(&un->un_pm_mutex);
11979	if ((uscmd->uscsi_flags & USCSI_PMFAILFAST) &&
11980	    SD_DEVICE_IS_IN_LOW_POWER(un)) {
11981		SD_TRACE(SD_LOG_IO, un, "sd_ssc_send:"
11982		    "un:0x%p is in low power\n", un);
11983		mutex_exit(&un->un_pm_mutex);
11984		mutex_exit(SD_MUTEX(un));
11985		return (ECANCELED);
11986	}
11987	mutex_exit(&un->un_pm_mutex);
11988	mutex_exit(SD_MUTEX(un));
11989
11990#ifdef SDDEBUG
11991	switch (dataspace) {
11992	case UIO_USERSPACE:
11993		SD_TRACE(SD_LOG_IO, un,
11994		    "sd_ssc_send: entry: un:0x%p UIO_USERSPACE\n", un);
11995		break;
11996	case UIO_SYSSPACE:
11997		SD_TRACE(SD_LOG_IO, un,
11998		    "sd_ssc_send: entry: un:0x%p UIO_SYSSPACE\n", un);
11999		break;
12000	default:
12001		SD_TRACE(SD_LOG_IO, un,
12002		    "sd_ssc_send: entry: un:0x%p UNEXPECTED SPACE\n", un);
12003		break;
12004	}
12005#endif
12006
12007	rval = scsi_uscsi_copyin((intptr_t)incmd, flag,
12008	    SD_ADDRESS(un), &uscmd);
12009	if (rval != 0) {
12010		SD_TRACE(SD_LOG_IO, un, "sd_sense_scsi_cmd: "
12011		    "scsi_uscsi_alloc_and_copyin failed\n", un);
12012		return (rval);
12013	}
12014
12015	if ((uscmd->uscsi_cdb != NULL) &&
12016	    (uscmd->uscsi_cdb[0] == SCMD_FORMAT)) {
12017		mutex_enter(SD_MUTEX(un));
12018		un->un_f_format_in_progress = TRUE;
12019		mutex_exit(SD_MUTEX(un));
12020		format = 1;
12021	}
12022
12023	/*
12024	 * Allocate an sd_uscsi_info struct and fill it with the info
12025	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
12026	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
12027	 * since we allocate the buf here in this function, we do not
12028	 * need to preserve the prior contents of b_private.
12029	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
12030	 */
12031	uip = ssc->ssc_uscsi_info;
12032	uip->ui_flags = path_flag;
12033	uip->ui_cmdp = uscmd;
12034
12035	/*
12036	 * Commands sent with priority are intended for error recovery
12037	 * situations, and do not have retries performed.
12038	 */
12039	if (path_flag == SD_PATH_DIRECT_PRIORITY) {
12040		uscmd->uscsi_flags |= USCSI_DIAGNOSE;
12041	}
12042	uscmd->uscsi_flags &= ~USCSI_NOINTR;
12043
12044	dev = SD_GET_DEV(un);
12045	rval = scsi_uscsi_handle_cmd(dev, dataspace, uscmd,
12046	    sd_uscsi_strategy, NULL, uip);
12047
12048	/*
12049	 * mark ssc_flags right after handle_cmd to make sure
12050	 * the uscsi has been sent
12051	 */
12052	ssc->ssc_flags |= SSC_FLAGS_CMD_ISSUED;
12053
12054#ifdef SDDEBUG
12055	SD_INFO(SD_LOG_IO, un, "sd_ssc_send: "
12056	    "uscsi_status: 0x%02x  uscsi_resid:0x%x\n",
12057	    uscmd->uscsi_status, uscmd->uscsi_resid);
12058	if (uscmd->uscsi_bufaddr != NULL) {
12059		SD_INFO(SD_LOG_IO, un, "sd_ssc_send: "
12060		    "uscmd->uscsi_bufaddr: 0x%p  uscmd->uscsi_buflen:%d\n",
12061		    uscmd->uscsi_bufaddr, uscmd->uscsi_buflen);
12062		if (dataspace == UIO_SYSSPACE) {
12063			SD_DUMP_MEMORY(un, SD_LOG_IO,
12064			    "data", (uchar_t *)uscmd->uscsi_bufaddr,
12065			    uscmd->uscsi_buflen, SD_LOG_HEX);
12066		}
12067	}
12068#endif
12069
12070	if (format == 1) {
12071		mutex_enter(SD_MUTEX(un));
12072		un->un_f_format_in_progress = FALSE;
12073		mutex_exit(SD_MUTEX(un));
12074	}
12075
12076	(void) scsi_uscsi_copyout((intptr_t)incmd, uscmd);
12077
12078	return (rval);
12079}
12080
12081/*
12082 *     Function: sd_ssc_print
12083 *
12084 * Description: Print information available to the console.
12085 *
12086 * Arguments: ssc - the struct of sd_ssc_t will bring uscsi_cmd and
12087 *                    sd_uscsi_info in.
12088 *            sd_severity - log level.
12089 *     Context: Kernel thread or interrupt context.
12090 */
12091static void
12092sd_ssc_print(sd_ssc_t *ssc, int sd_severity)
12093{
12094	struct uscsi_cmd	*ucmdp;
12095	struct scsi_device	*devp;
12096	dev_info_t 		*devinfo;
12097	uchar_t			*sensep;
12098	int			senlen;
12099	union scsi_cdb		*cdbp;
12100	uchar_t			com;
12101	extern struct scsi_key_strings scsi_cmds[];
12102
12103	ASSERT(ssc != NULL);
12104	ASSERT(ssc->ssc_un != NULL);
12105
12106	if (SD_FM_LOG(ssc->ssc_un) != SD_FM_LOG_EREPORT)
12107		return;
12108	ucmdp = ssc->ssc_uscsi_cmd;
12109	devp = SD_SCSI_DEVP(ssc->ssc_un);
12110	devinfo = SD_DEVINFO(ssc->ssc_un);
12111	ASSERT(ucmdp != NULL);
12112	ASSERT(devp != NULL);
12113	ASSERT(devinfo != NULL);
12114	sensep = (uint8_t *)ucmdp->uscsi_rqbuf;
12115	senlen = ucmdp->uscsi_rqlen - ucmdp->uscsi_rqresid;
12116	cdbp = (union scsi_cdb *)ucmdp->uscsi_cdb;
12117
12118	/* In certain case (like DOORLOCK), the cdb could be NULL. */
12119	if (cdbp == NULL)
12120		return;
12121	/* We don't print log if no sense data available. */
12122	if (senlen == 0)
12123		sensep = NULL;
12124	com = cdbp->scc_cmd;
12125	scsi_generic_errmsg(devp, sd_label, sd_severity, 0, 0, com,
12126	    scsi_cmds, sensep, ssc->ssc_un->un_additional_codes, NULL);
12127}
12128
12129/*
12130 *     Function: sd_ssc_assessment
12131 *
12132 * Description: We use this function to make an assessment at the point
12133 *              where SD driver may encounter a potential error.
12134 *
12135 * Arguments: ssc - the struct of sd_ssc_t will bring uscsi_cmd and
12136 *                  sd_uscsi_info in.
12137 *            tp_assess - a hint of strategy for ereport posting.
12138 *            Possible values of tp_assess include:
12139 *                SD_FMT_IGNORE - we don't post any ereport because we're
12140 *                sure that it is ok to ignore the underlying problems.
12141 *                SD_FMT_IGNORE_COMPROMISE - we don't post any ereport for now
12142 *                but it might be not correct to ignore the underlying hardware
12143 *                error.
12144 *                SD_FMT_STATUS_CHECK - we will post an ereport with the
12145 *                payload driver-assessment of value "fail" or
12146 *                "fatal"(depending on what information we have here). This
12147 *                assessment value is usually set when SD driver think there
12148 *                is a potential error occurred(Typically, when return value
12149 *                of the SCSI command is EIO).
12150 *                SD_FMT_STANDARD - we will post an ereport with the payload
12151 *                driver-assessment of value "info". This assessment value is
12152 *                set when the SCSI command returned successfully and with
12153 *                sense data sent back.
12154 *
12155 *     Context: Kernel thread.
12156 */
12157static void
12158sd_ssc_assessment(sd_ssc_t *ssc, enum sd_type_assessment tp_assess)
12159{
12160	int senlen = 0;
12161	struct uscsi_cmd *ucmdp = NULL;
12162	struct sd_lun *un;
12163
12164	ASSERT(ssc != NULL);
12165	un = ssc->ssc_un;
12166	ASSERT(un != NULL);
12167	ucmdp = ssc->ssc_uscsi_cmd;
12168	ASSERT(ucmdp != NULL);
12169
12170	if (ssc->ssc_flags & SSC_FLAGS_NEED_ASSESSMENT) {
12171		ssc->ssc_flags &= ~SSC_FLAGS_NEED_ASSESSMENT;
12172	} else {
12173		/*
12174		 * If enter here, it indicates that we have a wrong
12175		 * calling sequence of sd_ssc_send and sd_ssc_assessment,
12176		 * both of which should be called in a pair in case of
12177		 * loss of FMA telemetries.
12178		 */
12179		if (ucmdp->uscsi_cdb != NULL) {
12180			SD_INFO(SD_LOG_SDTEST, un,
12181			    "sd_ssc_assessment is missing the "
12182			    "alternative sd_ssc_send when running 0x%x, "
12183			    "or there are superfluous sd_ssc_assessment for "
12184			    "the same sd_ssc_send.\n",
12185			    ucmdp->uscsi_cdb[0]);
12186		}
12187		/*
12188		 * Set the ssc_flags to the initial value to avoid passing
12189		 * down dirty flags to the following sd_ssc_send function.
12190		 */
12191		ssc->ssc_flags = SSC_FLAGS_UNKNOWN;
12192		return;
12193	}
12194
12195	/*
12196	 * Only handle an issued command which is waiting for assessment.
12197	 * A command which is not issued will not have
12198	 * SSC_FLAGS_INVALID_DATA set, so it'ok we just return here.
12199	 */
12200	if (!(ssc->ssc_flags & SSC_FLAGS_CMD_ISSUED)) {
12201		sd_ssc_print(ssc, SCSI_ERR_INFO);
12202		return;
12203	} else {
12204		/*
12205		 * For an issued command, we should clear this flag in
12206		 * order to make the sd_ssc_t structure be used off
12207		 * multiple uscsi commands.
12208		 */
12209		ssc->ssc_flags &= ~SSC_FLAGS_CMD_ISSUED;
12210	}
12211
12212	/*
12213	 * We will not deal with non-retryable(flag USCSI_DIAGNOSE set)
12214	 * commands here. And we should clear the ssc_flags before return.
12215	 */
12216	if (ucmdp->uscsi_flags & USCSI_DIAGNOSE) {
12217		ssc->ssc_flags = SSC_FLAGS_UNKNOWN;
12218		return;
12219	}
12220
12221	switch (tp_assess) {
12222	case SD_FMT_IGNORE:
12223	case SD_FMT_IGNORE_COMPROMISE:
12224		break;
12225	case SD_FMT_STATUS_CHECK:
12226		/*
12227		 * For a failed command(including the succeeded command
12228		 * with invalid data sent back).
12229		 */
12230		sd_ssc_post(ssc, SD_FM_DRV_FATAL);
12231		break;
12232	case SD_FMT_STANDARD:
12233		/*
12234		 * Always for the succeeded commands probably with sense
12235		 * data sent back.
12236		 * Limitation:
12237		 *	We can only handle a succeeded command with sense
12238		 *	data sent back when auto-request-sense is enabled.
12239		 */
12240		senlen = ssc->ssc_uscsi_cmd->uscsi_rqlen -
12241		    ssc->ssc_uscsi_cmd->uscsi_rqresid;
12242		if ((ssc->ssc_uscsi_info->ui_pkt_state & STATE_ARQ_DONE) &&
12243		    (un->un_f_arq_enabled == TRUE) &&
12244		    senlen > 0 &&
12245		    ssc->ssc_uscsi_cmd->uscsi_rqbuf != NULL) {
12246			sd_ssc_post(ssc, SD_FM_DRV_NOTICE);
12247		}
12248		break;
12249	default:
12250		/*
12251		 * Should not have other type of assessment.
12252		 */
12253		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
12254		    "sd_ssc_assessment got wrong "
12255		    "sd_type_assessment %d.\n", tp_assess);
12256		break;
12257	}
12258	/*
12259	 * Clear up the ssc_flags before return.
12260	 */
12261	ssc->ssc_flags = SSC_FLAGS_UNKNOWN;
12262}
12263
12264/*
12265 *    Function: sd_ssc_post
12266 *
12267 * Description: 1. read the driver property to get fm-scsi-log flag.
12268 *              2. print log if fm_log_capable is non-zero.
12269 *              3. call sd_ssc_ereport_post to post ereport if possible.
12270 *
12271 *    Context: May be called from kernel thread or interrupt context.
12272 */
12273static void
12274sd_ssc_post(sd_ssc_t *ssc, enum sd_driver_assessment sd_assess)
12275{
12276	struct sd_lun	*un;
12277	int		sd_severity;
12278
12279	ASSERT(ssc != NULL);
12280	un = ssc->ssc_un;
12281	ASSERT(un != NULL);
12282
12283	/*
12284	 * We may enter here from sd_ssc_assessment(for USCSI command) or
12285	 * by directly called from sdintr context.
12286	 * We don't handle a non-disk drive(CD-ROM, removable media).
12287	 * Clear the ssc_flags before return in case we've set
12288	 * SSC_FLAGS_INVALID_XXX which should be skipped for a non-disk
12289	 * driver.
12290	 */
12291	if (ISCD(un) || un->un_f_has_removable_media) {
12292		ssc->ssc_flags = SSC_FLAGS_UNKNOWN;
12293		return;
12294	}
12295
12296	switch (sd_assess) {
12297		case SD_FM_DRV_FATAL:
12298			sd_severity = SCSI_ERR_FATAL;
12299			break;
12300		case SD_FM_DRV_RECOVERY:
12301			sd_severity = SCSI_ERR_RECOVERED;
12302			break;
12303		case SD_FM_DRV_RETRY:
12304			sd_severity = SCSI_ERR_RETRYABLE;
12305			break;
12306		case SD_FM_DRV_NOTICE:
12307			sd_severity = SCSI_ERR_INFO;
12308			break;
12309		default:
12310			sd_severity = SCSI_ERR_UNKNOWN;
12311	}
12312	/* print log */
12313	sd_ssc_print(ssc, sd_severity);
12314
12315	/* always post ereport */
12316	sd_ssc_ereport_post(ssc, sd_assess);
12317}
12318
12319/*
12320 *    Function: sd_ssc_set_info
12321 *
12322 * Description: Mark ssc_flags and set ssc_info which would be the
12323 *              payload of uderr ereport. This function will cause
12324 *              sd_ssc_ereport_post to post uderr ereport only.
12325 *              Besides, when ssc_flags == SSC_FLAGS_INVALID_DATA(USCSI),
12326 *              the function will also call SD_ERROR or scsi_log for a
12327 *              CDROM/removable-media/DDI_FM_NOT_CAPABLE device.
12328 *
12329 * Arguments: ssc - the struct of sd_ssc_t will bring uscsi_cmd and
12330 *                  sd_uscsi_info in.
12331 *            ssc_flags - indicate the sub-category of a uderr.
12332 *            comp - this argument is meaningful only when
12333 *                   ssc_flags == SSC_FLAGS_INVALID_DATA, and its possible
12334 *                   values include:
12335 *                   > 0, SD_ERROR is used with comp as the driver logging
12336 *                   component;
12337 *                   = 0, scsi-log is used to log error telemetries;
12338 *                   < 0, no log available for this telemetry.
12339 *
12340 *    Context: Kernel thread or interrupt context
12341 */
12342static void
12343sd_ssc_set_info(sd_ssc_t *ssc, int ssc_flags, uint_t comp, const char *fmt, ...)
12344{
12345	va_list	ap;
12346
12347	ASSERT(ssc != NULL);
12348	ASSERT(ssc->ssc_un != NULL);
12349
12350	ssc->ssc_flags |= ssc_flags;
12351	va_start(ap, fmt);
12352	(void) vsnprintf(ssc->ssc_info, sizeof (ssc->ssc_info), fmt, ap);
12353	va_end(ap);
12354
12355	/*
12356	 * If SSC_FLAGS_INVALID_DATA is set, it should be a uscsi command
12357	 * with invalid data sent back. For non-uscsi command, the
12358	 * following code will be bypassed.
12359	 */
12360	if (ssc_flags & SSC_FLAGS_INVALID_DATA) {
12361		if (SD_FM_LOG(ssc->ssc_un) == SD_FM_LOG_NSUP) {
12362			/*
12363			 * If the error belong to certain component and we
12364			 * do not want it to show up on the console, we
12365			 * will use SD_ERROR, otherwise scsi_log is
12366			 * preferred.
12367			 */
12368			if (comp > 0) {
12369				SD_ERROR(comp, ssc->ssc_un, ssc->ssc_info);
12370			} else if (comp == 0) {
12371				scsi_log(SD_DEVINFO(ssc->ssc_un), sd_label,
12372				    CE_WARN, ssc->ssc_info);
12373			}
12374		}
12375	}
12376}
12377
12378/*
12379 *    Function: sd_buf_iodone
12380 *
12381 * Description: Frees the sd_xbuf & returns the buf to its originator.
12382 *
12383 *     Context: May be called from interrupt context.
12384 */
12385/* ARGSUSED */
12386static void
12387sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp)
12388{
12389	struct sd_xbuf *xp;
12390
12391	ASSERT(un != NULL);
12392	ASSERT(bp != NULL);
12393	ASSERT(!mutex_owned(SD_MUTEX(un)));
12394
12395	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: entry.\n");
12396
12397	xp = SD_GET_XBUF(bp);
12398	ASSERT(xp != NULL);
12399
12400	/* xbuf is gone after this */
12401	if (ddi_xbuf_done(bp, un->un_xbuf_attr)) {
12402		mutex_enter(SD_MUTEX(un));
12403
12404		/*
12405		 * Grab time when the cmd completed.
12406		 * This is used for determining if the system has been
12407		 * idle long enough to make it idle to the PM framework.
12408		 * This is for lowering the overhead, and therefore improving
12409		 * performance per I/O operation.
12410		 */
12411		un->un_pm_idle_time = ddi_get_time();
12412
12413		un->un_ncmds_in_driver--;
12414		ASSERT(un->un_ncmds_in_driver >= 0);
12415		SD_INFO(SD_LOG_IO, un,
12416		    "sd_buf_iodone: un_ncmds_in_driver = %ld\n",
12417		    un->un_ncmds_in_driver);
12418
12419		mutex_exit(SD_MUTEX(un));
12420	}
12421
12422	biodone(bp);				/* bp is gone after this */
12423
12424	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: exit.\n");
12425}
12426
12427
12428/*
12429 *    Function: sd_uscsi_iodone
12430 *
12431 * Description: Frees the sd_xbuf & returns the buf to its originator.
12432 *
12433 *     Context: May be called from interrupt context.
12434 */
12435/* ARGSUSED */
12436static void
12437sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
12438{
12439	struct sd_xbuf *xp;
12440
12441	ASSERT(un != NULL);
12442	ASSERT(bp != NULL);
12443
12444	xp = SD_GET_XBUF(bp);
12445	ASSERT(xp != NULL);
12446	ASSERT(!mutex_owned(SD_MUTEX(un)));
12447
12448	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: entry.\n");
12449
12450	bp->b_private = xp->xb_private;
12451
12452	mutex_enter(SD_MUTEX(un));
12453
12454	/*
12455	 * Grab time when the cmd completed.
12456	 * This is used for determining if the system has been
12457	 * idle long enough to make it idle to the PM framework.
12458	 * This is for lowering the overhead, and therefore improving
12459	 * performance per I/O operation.
12460	 */
12461	un->un_pm_idle_time = ddi_get_time();
12462
12463	un->un_ncmds_in_driver--;
12464	ASSERT(un->un_ncmds_in_driver >= 0);
12465	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: un_ncmds_in_driver = %ld\n",
12466	    un->un_ncmds_in_driver);
12467
12468	mutex_exit(SD_MUTEX(un));
12469
12470	if (((struct uscsi_cmd *)(xp->xb_pktinfo))->uscsi_rqlen >
12471	    SENSE_LENGTH) {
12472		kmem_free(xp, sizeof (struct sd_xbuf) - SENSE_LENGTH +
12473		    MAX_SENSE_LENGTH);
12474	} else {
12475		kmem_free(xp, sizeof (struct sd_xbuf));
12476	}
12477
12478	biodone(bp);
12479
12480	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: exit.\n");
12481}
12482
12483
12484/*
12485 *    Function: sd_mapblockaddr_iostart
12486 *
12487 * Description: Verify request lies within the partition limits for
12488 *		the indicated minor device.  Issue "overrun" buf if
12489 *		request would exceed partition range.  Converts
12490 *		partition-relative block address to absolute.
12491 *
12492 *              Upon exit of this function:
12493 *              1.I/O is aligned
12494 *                 xp->xb_blkno represents the absolute sector address
12495 *              2.I/O is misaligned
12496 *                 xp->xb_blkno represents the absolute logical block address
12497 *                 based on DEV_BSIZE. The logical block address will be
12498 *                 converted to physical sector address in sd_mapblocksize_\
12499 *                 iostart.
12500 *              3.I/O is misaligned but is aligned in "overrun" buf
12501 *                 xp->xb_blkno represents the absolute logical block address
12502 *                 based on DEV_BSIZE. The logical block address will be
12503 *                 converted to physical sector address in sd_mapblocksize_\
12504 *                 iostart. But no RMW will be issued in this case.
12505 *
12506 *     Context: Can sleep
12507 *
12508 *      Issues: This follows what the old code did, in terms of accessing
12509 *		some of the partition info in the unit struct without holding
12510 *		the mutext.  This is a general issue, if the partition info
12511 *		can be altered while IO is in progress... as soon as we send
12512 *		a buf, its partitioning can be invalid before it gets to the
12513 *		device.  Probably the right fix is to move partitioning out
12514 *		of the driver entirely.
12515 */
12516
12517static void
12518sd_mapblockaddr_iostart(int index, struct sd_lun *un, struct buf *bp)
12519{
12520	diskaddr_t	nblocks;	/* #blocks in the given partition */
12521	daddr_t	blocknum;	/* Block number specified by the buf */
12522	size_t	requested_nblocks;
12523	size_t	available_nblocks;
12524	int	partition;
12525	diskaddr_t	partition_offset;
12526	struct sd_xbuf *xp;
12527	int secmask = 0, blknomask = 0;
12528	ushort_t is_aligned = TRUE;
12529
12530	ASSERT(un != NULL);
12531	ASSERT(bp != NULL);
12532	ASSERT(!mutex_owned(SD_MUTEX(un)));
12533
12534	SD_TRACE(SD_LOG_IO_PARTITION, un,
12535	    "sd_mapblockaddr_iostart: entry: buf:0x%p\n", bp);
12536
12537	xp = SD_GET_XBUF(bp);
12538	ASSERT(xp != NULL);
12539
12540	/*
12541	 * If the geometry is not indicated as valid, attempt to access
12542	 * the unit & verify the geometry/label. This can be the case for
12543	 * removable-media devices, of if the device was opened in
12544	 * NDELAY/NONBLOCK mode.
12545	 */
12546	partition = SDPART(bp->b_edev);
12547
12548	if (!SD_IS_VALID_LABEL(un)) {
12549		sd_ssc_t *ssc;
12550		/*
12551		 * Initialize sd_ssc_t for internal uscsi commands
12552		 * In case of potential porformance issue, we need
12553		 * to alloc memory only if there is invalid label
12554		 */
12555		ssc = sd_ssc_init(un);
12556
12557		if (sd_ready_and_valid(ssc, partition) != SD_READY_VALID) {
12558			/*
12559			 * For removable devices it is possible to start an
12560			 * I/O without a media by opening the device in nodelay
12561			 * mode. Also for writable CDs there can be many
12562			 * scenarios where there is no geometry yet but volume
12563			 * manager is trying to issue a read() just because
12564			 * it can see TOC on the CD. So do not print a message
12565			 * for removables.
12566			 */
12567			if (!un->un_f_has_removable_media) {
12568				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
12569				    "i/o to invalid geometry\n");
12570			}
12571			bioerror(bp, EIO);
12572			bp->b_resid = bp->b_bcount;
12573			SD_BEGIN_IODONE(index, un, bp);
12574
12575			sd_ssc_fini(ssc);
12576			return;
12577		}
12578		sd_ssc_fini(ssc);
12579	}
12580
12581	nblocks = 0;
12582	(void) cmlb_partinfo(un->un_cmlbhandle, partition,
12583	    &nblocks, &partition_offset, NULL, NULL, (void *)SD_PATH_DIRECT);
12584
12585	if (un->un_f_enable_rmw) {
12586		blknomask = (un->un_phy_blocksize / DEV_BSIZE) - 1;
12587		secmask = un->un_phy_blocksize - 1;
12588	} else {
12589		blknomask = (un->un_tgt_blocksize / DEV_BSIZE) - 1;
12590		secmask = un->un_tgt_blocksize - 1;
12591	}
12592
12593	if ((bp->b_lblkno & (blknomask)) || (bp->b_bcount & (secmask))) {
12594		is_aligned = FALSE;
12595	}
12596
12597	if (!(NOT_DEVBSIZE(un)) || un->un_f_enable_rmw) {
12598		/*
12599		 * If I/O is aligned, no need to involve RMW(Read Modify Write)
12600		 * Convert the logical block number to target's physical sector
12601		 * number.
12602		 */
12603		if (is_aligned) {
12604			xp->xb_blkno = SD_SYS2TGTBLOCK(un, xp->xb_blkno);
12605		} else {
12606			switch (un->un_f_rmw_type) {
12607			case SD_RMW_TYPE_RETURN_ERROR:
12608				if (un->un_f_enable_rmw)
12609					break;
12610				else {
12611					bp->b_flags |= B_ERROR;
12612					goto error_exit;
12613				}
12614
12615			case SD_RMW_TYPE_DEFAULT:
12616				mutex_enter(SD_MUTEX(un));
12617				if (!un->un_f_enable_rmw &&
12618				    un->un_rmw_msg_timeid == NULL) {
12619					scsi_log(SD_DEVINFO(un), sd_label,
12620					    CE_WARN, "I/O request is not "
12621					    "aligned with %d disk sector size. "
12622					    "It is handled through Read Modify "
12623					    "Write but the performance is "
12624					    "very low.\n",
12625					    un->un_tgt_blocksize);
12626					un->un_rmw_msg_timeid =
12627					    timeout(sd_rmw_msg_print_handler,
12628					    un, SD_RMW_MSG_PRINT_TIMEOUT);
12629				} else {
12630					un->un_rmw_incre_count ++;
12631				}
12632				mutex_exit(SD_MUTEX(un));
12633				break;
12634
12635			case SD_RMW_TYPE_NO_WARNING:
12636			default:
12637				break;
12638			}
12639
12640			nblocks = SD_TGT2SYSBLOCK(un, nblocks);
12641			partition_offset = SD_TGT2SYSBLOCK(un,
12642			    partition_offset);
12643		}
12644	}
12645
12646	/*
12647	 * blocknum is the starting block number of the request. At this
12648	 * point it is still relative to the start of the minor device.
12649	 */
12650	blocknum = xp->xb_blkno;
12651
12652	/*
12653	 * Legacy: If the starting block number is one past the last block
12654	 * in the partition, do not set B_ERROR in the buf.
12655	 */
12656	if (blocknum == nblocks)  {
12657		goto error_exit;
12658	}
12659
12660	/*
12661	 * Confirm that the first block of the request lies within the
12662	 * partition limits. Also the requested number of bytes must be
12663	 * a multiple of the system block size.
12664	 */
12665	if ((blocknum < 0) || (blocknum >= nblocks) ||
12666	    ((bp->b_bcount & (DEV_BSIZE - 1)) != 0)) {
12667		bp->b_flags |= B_ERROR;
12668		goto error_exit;
12669	}
12670
12671	/*
12672	 * If the requsted # blocks exceeds the available # blocks, that
12673	 * is an overrun of the partition.
12674	 */
12675	if ((!NOT_DEVBSIZE(un)) && is_aligned) {
12676		requested_nblocks = SD_BYTES2TGTBLOCKS(un, bp->b_bcount);
12677	} else {
12678		requested_nblocks = SD_BYTES2SYSBLOCKS(bp->b_bcount);
12679	}
12680
12681	available_nblocks = (size_t)(nblocks - blocknum);
12682	ASSERT(nblocks >= blocknum);
12683
12684	if (requested_nblocks > available_nblocks) {
12685		size_t resid;
12686
12687		/*
12688		 * Allocate an "overrun" buf to allow the request to proceed
12689		 * for the amount of space available in the partition. The
12690		 * amount not transferred will be added into the b_resid
12691		 * when the operation is complete. The overrun buf
12692		 * replaces the original buf here, and the original buf
12693		 * is saved inside the overrun buf, for later use.
12694		 */
12695		if ((!NOT_DEVBSIZE(un)) && is_aligned) {
12696			resid = SD_TGTBLOCKS2BYTES(un,
12697			    (offset_t)(requested_nblocks - available_nblocks));
12698		} else {
12699			resid = SD_SYSBLOCKS2BYTES(
12700			    (offset_t)(requested_nblocks - available_nblocks));
12701		}
12702
12703		size_t count = bp->b_bcount - resid;
12704		/*
12705		 * Note: count is an unsigned entity thus it'll NEVER
12706		 * be less than 0 so ASSERT the original values are
12707		 * correct.
12708		 */
12709		ASSERT(bp->b_bcount >= resid);
12710
12711		bp = sd_bioclone_alloc(bp, count, blocknum,
12712		    (int (*)(struct buf *)) sd_mapblockaddr_iodone);
12713		xp = SD_GET_XBUF(bp); /* Update for 'new' bp! */
12714		ASSERT(xp != NULL);
12715	}
12716
12717	/* At this point there should be no residual for this buf. */
12718	ASSERT(bp->b_resid == 0);
12719
12720	/* Convert the block number to an absolute address. */
12721	xp->xb_blkno += partition_offset;
12722
12723	SD_NEXT_IOSTART(index, un, bp);
12724
12725	SD_TRACE(SD_LOG_IO_PARTITION, un,
12726	    "sd_mapblockaddr_iostart: exit 0: buf:0x%p\n", bp);
12727
12728	return;
12729
12730error_exit:
12731	bp->b_resid = bp->b_bcount;
12732	SD_BEGIN_IODONE(index, un, bp);
12733	SD_TRACE(SD_LOG_IO_PARTITION, un,
12734	    "sd_mapblockaddr_iostart: exit 1: buf:0x%p\n", bp);
12735}
12736
12737
12738/*
12739 *    Function: sd_mapblockaddr_iodone
12740 *
12741 * Description: Completion-side processing for partition management.
12742 *
12743 *     Context: May be called under interrupt context
12744 */
12745
12746static void
12747sd_mapblockaddr_iodone(int index, struct sd_lun *un, struct buf *bp)
12748{
12749	/* int	partition; */	/* Not used, see below. */
12750	ASSERT(un != NULL);
12751	ASSERT(bp != NULL);
12752	ASSERT(!mutex_owned(SD_MUTEX(un)));
12753
12754	SD_TRACE(SD_LOG_IO_PARTITION, un,
12755	    "sd_mapblockaddr_iodone: entry: buf:0x%p\n", bp);
12756
12757	if (bp->b_iodone == (int (*)(struct buf *)) sd_mapblockaddr_iodone) {
12758		/*
12759		 * We have an "overrun" buf to deal with...
12760		 */
12761		struct sd_xbuf	*xp;
12762		struct buf	*obp;	/* ptr to the original buf */
12763
12764		xp = SD_GET_XBUF(bp);
12765		ASSERT(xp != NULL);
12766
12767		/* Retrieve the pointer to the original buf */
12768		obp = (struct buf *)xp->xb_private;
12769		ASSERT(obp != NULL);
12770
12771		obp->b_resid = obp->b_bcount - (bp->b_bcount - bp->b_resid);
12772		bioerror(obp, bp->b_error);
12773
12774		sd_bioclone_free(bp);
12775
12776		/*
12777		 * Get back the original buf.
12778		 * Note that since the restoration of xb_blkno below
12779		 * was removed, the sd_xbuf is not needed.
12780		 */
12781		bp = obp;
12782		/*
12783		 * xp = SD_GET_XBUF(bp);
12784		 * ASSERT(xp != NULL);
12785		 */
12786	}
12787
12788	/*
12789	 * Convert sd->xb_blkno back to a minor-device relative value.
12790	 * Note: this has been commented out, as it is not needed in the
12791	 * current implementation of the driver (ie, since this function
12792	 * is at the top of the layering chains, so the info will be
12793	 * discarded) and it is in the "hot" IO path.
12794	 *
12795	 * partition = getminor(bp->b_edev) & SDPART_MASK;
12796	 * xp->xb_blkno -= un->un_offset[partition];
12797	 */
12798
12799	SD_NEXT_IODONE(index, un, bp);
12800
12801	SD_TRACE(SD_LOG_IO_PARTITION, un,
12802	    "sd_mapblockaddr_iodone: exit: buf:0x%p\n", bp);
12803}
12804
12805
12806/*
12807 *    Function: sd_mapblocksize_iostart
12808 *
12809 * Description: Convert between system block size (un->un_sys_blocksize)
12810 *		and target block size (un->un_tgt_blocksize).
12811 *
12812 *     Context: Can sleep to allocate resources.
12813 *
12814 * Assumptions: A higher layer has already performed any partition validation,
12815 *		and converted the xp->xb_blkno to an absolute value relative
12816 *		to the start of the device.
12817 *
12818 *		It is also assumed that the higher layer has implemented
12819 *		an "overrun" mechanism for the case where the request would
12820 *		read/write beyond the end of a partition.  In this case we
12821 *		assume (and ASSERT) that bp->b_resid == 0.
12822 *
12823 *		Note: The implementation for this routine assumes the target
12824 *		block size remains constant between allocation and transport.
12825 */
12826
12827static void
12828sd_mapblocksize_iostart(int index, struct sd_lun *un, struct buf *bp)
12829{
12830	struct sd_mapblocksize_info	*bsp;
12831	struct sd_xbuf			*xp;
12832	offset_t first_byte;
12833	daddr_t	start_block, end_block;
12834	daddr_t	request_bytes;
12835	ushort_t is_aligned = FALSE;
12836
12837	ASSERT(un != NULL);
12838	ASSERT(bp != NULL);
12839	ASSERT(!mutex_owned(SD_MUTEX(un)));
12840	ASSERT(bp->b_resid == 0);
12841
12842	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12843	    "sd_mapblocksize_iostart: entry: buf:0x%p\n", bp);
12844
12845	/*
12846	 * For a non-writable CD, a write request is an error
12847	 */
12848	if (ISCD(un) && ((bp->b_flags & B_READ) == 0) &&
12849	    (un->un_f_mmc_writable_media == FALSE)) {
12850		bioerror(bp, EIO);
12851		bp->b_resid = bp->b_bcount;
12852		SD_BEGIN_IODONE(index, un, bp);
12853		return;
12854	}
12855
12856	/*
12857	 * We do not need a shadow buf if the device is using
12858	 * un->un_sys_blocksize as its block size or if bcount == 0.
12859	 * In this case there is no layer-private data block allocated.
12860	 */
12861	if ((un->un_tgt_blocksize == DEV_BSIZE && !un->un_f_enable_rmw) ||
12862	    (bp->b_bcount == 0)) {
12863		goto done;
12864	}
12865
12866#if defined(__i386) || defined(__amd64)
12867	/* We do not support non-block-aligned transfers for ROD devices */
12868	ASSERT(!ISROD(un));
12869#endif
12870
12871	xp = SD_GET_XBUF(bp);
12872	ASSERT(xp != NULL);
12873
12874	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12875	    "tgt_blocksize:0x%x sys_blocksize: 0x%x\n",
12876	    un->un_tgt_blocksize, DEV_BSIZE);
12877	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12878	    "request start block:0x%x\n", xp->xb_blkno);
12879	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12880	    "request len:0x%x\n", bp->b_bcount);
12881
12882	/*
12883	 * Allocate the layer-private data area for the mapblocksize layer.
12884	 * Layers are allowed to use the xp_private member of the sd_xbuf
12885	 * struct to store the pointer to their layer-private data block, but
12886	 * each layer also has the responsibility of restoring the prior
12887	 * contents of xb_private before returning the buf/xbuf to the
12888	 * higher layer that sent it.
12889	 *
12890	 * Here we save the prior contents of xp->xb_private into the
12891	 * bsp->mbs_oprivate field of our layer-private data area. This value
12892	 * is restored by sd_mapblocksize_iodone() just prior to freeing up
12893	 * the layer-private area and returning the buf/xbuf to the layer
12894	 * that sent it.
12895	 *
12896	 * Note that here we use kmem_zalloc for the allocation as there are
12897	 * parts of the mapblocksize code that expect certain fields to be
12898	 * zero unless explicitly set to a required value.
12899	 */
12900	bsp = kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
12901	bsp->mbs_oprivate = xp->xb_private;
12902	xp->xb_private = bsp;
12903
12904	/*
12905	 * This treats the data on the disk (target) as an array of bytes.
12906	 * first_byte is the byte offset, from the beginning of the device,
12907	 * to the location of the request. This is converted from a
12908	 * un->un_sys_blocksize block address to a byte offset, and then back
12909	 * to a block address based upon a un->un_tgt_blocksize block size.
12910	 *
12911	 * xp->xb_blkno should be absolute upon entry into this function,
12912	 * but, but it is based upon partitions that use the "system"
12913	 * block size. It must be adjusted to reflect the block size of
12914	 * the target.
12915	 *
12916	 * Note that end_block is actually the block that follows the last
12917	 * block of the request, but that's what is needed for the computation.
12918	 */
12919	first_byte  = SD_SYSBLOCKS2BYTES((offset_t)xp->xb_blkno);
12920	if (un->un_f_enable_rmw) {
12921		start_block = xp->xb_blkno =
12922		    (first_byte / un->un_phy_blocksize) *
12923		    (un->un_phy_blocksize / DEV_BSIZE);
12924		end_block   = ((first_byte + bp->b_bcount +
12925		    un->un_phy_blocksize - 1) / un->un_phy_blocksize) *
12926		    (un->un_phy_blocksize / DEV_BSIZE);
12927	} else {
12928		start_block = xp->xb_blkno = first_byte / un->un_tgt_blocksize;
12929		end_block   = (first_byte + bp->b_bcount +
12930		    un->un_tgt_blocksize - 1) / un->un_tgt_blocksize;
12931	}
12932
12933	/* request_bytes is rounded up to a multiple of the target block size */
12934	request_bytes = (end_block - start_block) * un->un_tgt_blocksize;
12935
12936	/*
12937	 * See if the starting address of the request and the request
12938	 * length are aligned on a un->un_tgt_blocksize boundary. If aligned
12939	 * then we do not need to allocate a shadow buf to handle the request.
12940	 */
12941	if (un->un_f_enable_rmw) {
12942		if (((first_byte % un->un_phy_blocksize) == 0) &&
12943		    ((bp->b_bcount % un->un_phy_blocksize) == 0)) {
12944			is_aligned = TRUE;
12945		}
12946	} else {
12947		if (((first_byte % un->un_tgt_blocksize) == 0) &&
12948		    ((bp->b_bcount % un->un_tgt_blocksize) == 0)) {
12949			is_aligned = TRUE;
12950		}
12951	}
12952
12953	if ((bp->b_flags & B_READ) == 0) {
12954		/*
12955		 * Lock the range for a write operation. An aligned request is
12956		 * considered a simple write; otherwise the request must be a
12957		 * read-modify-write.
12958		 */
12959		bsp->mbs_wmp = sd_range_lock(un, start_block, end_block - 1,
12960		    (is_aligned == TRUE) ? SD_WTYPE_SIMPLE : SD_WTYPE_RMW);
12961	}
12962
12963	/*
12964	 * Alloc a shadow buf if the request is not aligned. Also, this is
12965	 * where the READ command is generated for a read-modify-write. (The
12966	 * write phase is deferred until after the read completes.)
12967	 */
12968	if (is_aligned == FALSE) {
12969
12970		struct sd_mapblocksize_info	*shadow_bsp;
12971		struct sd_xbuf	*shadow_xp;
12972		struct buf	*shadow_bp;
12973
12974		/*
12975		 * Allocate the shadow buf and it associated xbuf. Note that
12976		 * after this call the xb_blkno value in both the original
12977		 * buf's sd_xbuf _and_ the shadow buf's sd_xbuf will be the
12978		 * same: absolute relative to the start of the device, and
12979		 * adjusted for the target block size. The b_blkno in the
12980		 * shadow buf will also be set to this value. We should never
12981		 * change b_blkno in the original bp however.
12982		 *
12983		 * Note also that the shadow buf will always need to be a
12984		 * READ command, regardless of whether the incoming command
12985		 * is a READ or a WRITE.
12986		 */
12987		shadow_bp = sd_shadow_buf_alloc(bp, request_bytes, B_READ,
12988		    xp->xb_blkno,
12989		    (int (*)(struct buf *)) sd_mapblocksize_iodone);
12990
12991		shadow_xp = SD_GET_XBUF(shadow_bp);
12992
12993		/*
12994		 * Allocate the layer-private data for the shadow buf.
12995		 * (No need to preserve xb_private in the shadow xbuf.)
12996		 */
12997		shadow_xp->xb_private = shadow_bsp =
12998		    kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
12999
13000		/*
13001		 * bsp->mbs_copy_offset is used later by sd_mapblocksize_iodone
13002		 * to figure out where the start of the user data is (based upon
13003		 * the system block size) in the data returned by the READ
13004		 * command (which will be based upon the target blocksize). Note
13005		 * that this is only really used if the request is unaligned.
13006		 */
13007		if (un->un_f_enable_rmw) {
13008			bsp->mbs_copy_offset = (ssize_t)(first_byte -
13009			    ((offset_t)xp->xb_blkno * un->un_sys_blocksize));
13010			ASSERT((bsp->mbs_copy_offset >= 0) &&
13011			    (bsp->mbs_copy_offset < un->un_phy_blocksize));
13012		} else {
13013			bsp->mbs_copy_offset = (ssize_t)(first_byte -
13014			    ((offset_t)xp->xb_blkno * un->un_tgt_blocksize));
13015			ASSERT((bsp->mbs_copy_offset >= 0) &&
13016			    (bsp->mbs_copy_offset < un->un_tgt_blocksize));
13017		}
13018
13019		shadow_bsp->mbs_copy_offset = bsp->mbs_copy_offset;
13020
13021		shadow_bsp->mbs_layer_index = bsp->mbs_layer_index = index;
13022
13023		/* Transfer the wmap (if any) to the shadow buf */
13024		shadow_bsp->mbs_wmp = bsp->mbs_wmp;
13025		bsp->mbs_wmp = NULL;
13026
13027		/*
13028		 * The shadow buf goes on from here in place of the
13029		 * original buf.
13030		 */
13031		shadow_bsp->mbs_orig_bp = bp;
13032		bp = shadow_bp;
13033	}
13034
13035	SD_INFO(SD_LOG_IO_RMMEDIA, un,
13036	    "sd_mapblocksize_iostart: tgt start block:0x%x\n", xp->xb_blkno);
13037	SD_INFO(SD_LOG_IO_RMMEDIA, un,
13038	    "sd_mapblocksize_iostart: tgt request len:0x%x\n",
13039	    request_bytes);
13040	SD_INFO(SD_LOG_IO_RMMEDIA, un,
13041	    "sd_mapblocksize_iostart: shadow buf:0x%x\n", bp);
13042
13043done:
13044	SD_NEXT_IOSTART(index, un, bp);
13045
13046	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
13047	    "sd_mapblocksize_iostart: exit: buf:0x%p\n", bp);
13048}
13049
13050
13051/*
13052 *    Function: sd_mapblocksize_iodone
13053 *
13054 * Description: Completion side processing for block-size mapping.
13055 *
13056 *     Context: May be called under interrupt context
13057 */
13058
13059static void
13060sd_mapblocksize_iodone(int index, struct sd_lun *un, struct buf *bp)
13061{
13062	struct sd_mapblocksize_info	*bsp;
13063	struct sd_xbuf	*xp;
13064	struct sd_xbuf	*orig_xp;	/* sd_xbuf for the original buf */
13065	struct buf	*orig_bp;	/* ptr to the original buf */
13066	offset_t	shadow_end;
13067	offset_t	request_end;
13068	offset_t	shadow_start;
13069	ssize_t		copy_offset;
13070	size_t		copy_length;
13071	size_t		shortfall;
13072	uint_t		is_write;	/* TRUE if this bp is a WRITE */
13073	uint_t		has_wmap;	/* TRUE is this bp has a wmap */
13074
13075	ASSERT(un != NULL);
13076	ASSERT(bp != NULL);
13077
13078	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
13079	    "sd_mapblocksize_iodone: entry: buf:0x%p\n", bp);
13080
13081	/*
13082	 * There is no shadow buf or layer-private data if the target is
13083	 * using un->un_sys_blocksize as its block size or if bcount == 0.
13084	 */
13085	if ((un->un_tgt_blocksize == DEV_BSIZE && !un->un_f_enable_rmw) ||
13086	    (bp->b_bcount == 0)) {
13087		goto exit;
13088	}
13089
13090	xp = SD_GET_XBUF(bp);
13091	ASSERT(xp != NULL);
13092
13093	/* Retrieve the pointer to the layer-private data area from the xbuf. */
13094	bsp = xp->xb_private;
13095
13096	is_write = ((bp->b_flags & B_READ) == 0) ? TRUE : FALSE;
13097	has_wmap = (bsp->mbs_wmp != NULL) ? TRUE : FALSE;
13098
13099	if (is_write) {
13100		/*
13101		 * For a WRITE request we must free up the block range that
13102		 * we have locked up.  This holds regardless of whether this is
13103		 * an aligned write request or a read-modify-write request.
13104		 */
13105		sd_range_unlock(un, bsp->mbs_wmp);
13106		bsp->mbs_wmp = NULL;
13107	}
13108
13109	if ((bp->b_iodone != (int(*)(struct buf *))sd_mapblocksize_iodone)) {
13110		/*
13111		 * An aligned read or write command will have no shadow buf;
13112		 * there is not much else to do with it.
13113		 */
13114		goto done;
13115	}
13116
13117	orig_bp = bsp->mbs_orig_bp;
13118	ASSERT(orig_bp != NULL);
13119	orig_xp = SD_GET_XBUF(orig_bp);
13120	ASSERT(orig_xp != NULL);
13121	ASSERT(!mutex_owned(SD_MUTEX(un)));
13122
13123	if (!is_write && has_wmap) {
13124		/*
13125		 * A READ with a wmap means this is the READ phase of a
13126		 * read-modify-write. If an error occurred on the READ then
13127		 * we do not proceed with the WRITE phase or copy any data.
13128		 * Just release the write maps and return with an error.
13129		 */
13130		if ((bp->b_resid != 0) || (bp->b_error != 0)) {
13131			orig_bp->b_resid = orig_bp->b_bcount;
13132			bioerror(orig_bp, bp->b_error);
13133			sd_range_unlock(un, bsp->mbs_wmp);
13134			goto freebuf_done;
13135		}
13136	}
13137
13138	/*
13139	 * Here is where we set up to copy the data from the shadow buf
13140	 * into the space associated with the original buf.
13141	 *
13142	 * To deal with the conversion between block sizes, these
13143	 * computations treat the data as an array of bytes, with the
13144	 * first byte (byte 0) corresponding to the first byte in the
13145	 * first block on the disk.
13146	 */
13147
13148	/*
13149	 * shadow_start and shadow_len indicate the location and size of
13150	 * the data returned with the shadow IO request.
13151	 */
13152	if (un->un_f_enable_rmw) {
13153		shadow_start  = SD_SYSBLOCKS2BYTES((offset_t)xp->xb_blkno);
13154	} else {
13155		shadow_start  = SD_TGTBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
13156	}
13157	shadow_end    = shadow_start + bp->b_bcount - bp->b_resid;
13158
13159	/*
13160	 * copy_offset gives the offset (in bytes) from the start of the first
13161	 * block of the READ request to the beginning of the data.  We retrieve
13162	 * this value from xb_pktp in the ORIGINAL xbuf, as it has been saved
13163	 * there by sd_mapblockize_iostart(). copy_length gives the amount of
13164	 * data to be copied (in bytes).
13165	 */
13166	copy_offset  = bsp->mbs_copy_offset;
13167	if (un->un_f_enable_rmw) {
13168		ASSERT((copy_offset >= 0) &&
13169		    (copy_offset < un->un_phy_blocksize));
13170	} else {
13171		ASSERT((copy_offset >= 0) &&
13172		    (copy_offset < un->un_tgt_blocksize));
13173	}
13174
13175	copy_length  = orig_bp->b_bcount;
13176	request_end  = shadow_start + copy_offset + orig_bp->b_bcount;
13177
13178	/*
13179	 * Set up the resid and error fields of orig_bp as appropriate.
13180	 */
13181	if (shadow_end >= request_end) {
13182		/* We got all the requested data; set resid to zero */
13183		orig_bp->b_resid = 0;
13184	} else {
13185		/*
13186		 * We failed to get enough data to fully satisfy the original
13187		 * request. Just copy back whatever data we got and set
13188		 * up the residual and error code as required.
13189		 *
13190		 * 'shortfall' is the amount by which the data received with the
13191		 * shadow buf has "fallen short" of the requested amount.
13192		 */
13193		shortfall = (size_t)(request_end - shadow_end);
13194
13195		if (shortfall > orig_bp->b_bcount) {
13196			/*
13197			 * We did not get enough data to even partially
13198			 * fulfill the original request.  The residual is
13199			 * equal to the amount requested.
13200			 */
13201			orig_bp->b_resid = orig_bp->b_bcount;
13202		} else {
13203			/*
13204			 * We did not get all the data that we requested
13205			 * from the device, but we will try to return what
13206			 * portion we did get.
13207			 */
13208			orig_bp->b_resid = shortfall;
13209		}
13210		ASSERT(copy_length >= orig_bp->b_resid);
13211		copy_length  -= orig_bp->b_resid;
13212	}
13213
13214	/* Propagate the error code from the shadow buf to the original buf */
13215	bioerror(orig_bp, bp->b_error);
13216
13217	if (is_write) {
13218		goto freebuf_done;	/* No data copying for a WRITE */
13219	}
13220
13221	if (has_wmap) {
13222		/*
13223		 * This is a READ command from the READ phase of a
13224		 * read-modify-write request. We have to copy the data given
13225		 * by the user OVER the data returned by the READ command,
13226		 * then convert the command from a READ to a WRITE and send
13227		 * it back to the target.
13228		 */
13229		bcopy(orig_bp->b_un.b_addr, bp->b_un.b_addr + copy_offset,
13230		    copy_length);
13231
13232		bp->b_flags &= ~((int)B_READ);	/* Convert to a WRITE */
13233
13234		/*
13235		 * Dispatch the WRITE command to the taskq thread, which
13236		 * will in turn send the command to the target. When the
13237		 * WRITE command completes, we (sd_mapblocksize_iodone())
13238		 * will get called again as part of the iodone chain
13239		 * processing for it. Note that we will still be dealing
13240		 * with the shadow buf at that point.
13241		 */
13242		if (taskq_dispatch(sd_wmr_tq, sd_read_modify_write_task, bp,
13243		    KM_NOSLEEP) != 0) {
13244			/*
13245			 * Dispatch was successful so we are done. Return
13246			 * without going any higher up the iodone chain. Do
13247			 * not free up any layer-private data until after the
13248			 * WRITE completes.
13249			 */
13250			return;
13251		}
13252
13253		/*
13254		 * Dispatch of the WRITE command failed; set up the error
13255		 * condition and send this IO back up the iodone chain.
13256		 */
13257		bioerror(orig_bp, EIO);
13258		orig_bp->b_resid = orig_bp->b_bcount;
13259
13260	} else {
13261		/*
13262		 * This is a regular READ request (ie, not a RMW). Copy the
13263		 * data from the shadow buf into the original buf. The
13264		 * copy_offset compensates for any "misalignment" between the
13265		 * shadow buf (with its un->un_tgt_blocksize blocks) and the
13266		 * original buf (with its un->un_sys_blocksize blocks).
13267		 */
13268		bcopy(bp->b_un.b_addr + copy_offset, orig_bp->b_un.b_addr,
13269		    copy_length);
13270	}
13271
13272freebuf_done:
13273
13274	/*
13275	 * At this point we still have both the shadow buf AND the original
13276	 * buf to deal with, as well as the layer-private data area in each.
13277	 * Local variables are as follows:
13278	 *
13279	 * bp -- points to shadow buf
13280	 * xp -- points to xbuf of shadow buf
13281	 * bsp -- points to layer-private data area of shadow buf
13282	 * orig_bp -- points to original buf
13283	 *
13284	 * First free the shadow buf and its associated xbuf, then free the
13285	 * layer-private data area from the shadow buf. There is no need to
13286	 * restore xb_private in the shadow xbuf.
13287	 */
13288	sd_shadow_buf_free(bp);
13289	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
13290
13291	/*
13292	 * Now update the local variables to point to the original buf, xbuf,
13293	 * and layer-private area.
13294	 */
13295	bp = orig_bp;
13296	xp = SD_GET_XBUF(bp);
13297	ASSERT(xp != NULL);
13298	ASSERT(xp == orig_xp);
13299	bsp = xp->xb_private;
13300	ASSERT(bsp != NULL);
13301
13302done:
13303	/*
13304	 * Restore xb_private to whatever it was set to by the next higher
13305	 * layer in the chain, then free the layer-private data area.
13306	 */
13307	xp->xb_private = bsp->mbs_oprivate;
13308	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
13309
13310exit:
13311	SD_TRACE(SD_LOG_IO_RMMEDIA, SD_GET_UN(bp),
13312	    "sd_mapblocksize_iodone: calling SD_NEXT_IODONE: buf:0x%p\n", bp);
13313
13314	SD_NEXT_IODONE(index, un, bp);
13315}
13316
13317
13318/*
13319 *    Function: sd_checksum_iostart
13320 *
13321 * Description: A stub function for a layer that's currently not used.
13322 *		For now just a placeholder.
13323 *
13324 *     Context: Kernel thread context
13325 */
13326
13327static void
13328sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp)
13329{
13330	ASSERT(un != NULL);
13331	ASSERT(bp != NULL);
13332	ASSERT(!mutex_owned(SD_MUTEX(un)));
13333	SD_NEXT_IOSTART(index, un, bp);
13334}
13335
13336
13337/*
13338 *    Function: sd_checksum_iodone
13339 *
13340 * Description: A stub function for a layer that's currently not used.
13341 *		For now just a placeholder.
13342 *
13343 *     Context: May be called under interrupt context
13344 */
13345
13346static void
13347sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp)
13348{
13349	ASSERT(un != NULL);
13350	ASSERT(bp != NULL);
13351	ASSERT(!mutex_owned(SD_MUTEX(un)));
13352	SD_NEXT_IODONE(index, un, bp);
13353}
13354
13355
13356/*
13357 *    Function: sd_checksum_uscsi_iostart
13358 *
13359 * Description: A stub function for a layer that's currently not used.
13360 *		For now just a placeholder.
13361 *
13362 *     Context: Kernel thread context
13363 */
13364
13365static void
13366sd_checksum_uscsi_iostart(int index, struct sd_lun *un, struct buf *bp)
13367{
13368	ASSERT(un != NULL);
13369	ASSERT(bp != NULL);
13370	ASSERT(!mutex_owned(SD_MUTEX(un)));
13371	SD_NEXT_IOSTART(index, un, bp);
13372}
13373
13374
13375/*
13376 *    Function: sd_checksum_uscsi_iodone
13377 *
13378 * Description: A stub function for a layer that's currently not used.
13379 *		For now just a placeholder.
13380 *
13381 *     Context: May be called under interrupt context
13382 */
13383
13384static void
13385sd_checksum_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
13386{
13387	ASSERT(un != NULL);
13388	ASSERT(bp != NULL);
13389	ASSERT(!mutex_owned(SD_MUTEX(un)));
13390	SD_NEXT_IODONE(index, un, bp);
13391}
13392
13393
13394/*
13395 *    Function: sd_pm_iostart
13396 *
13397 * Description: iostart-side routine for Power mangement.
13398 *
13399 *     Context: Kernel thread context
13400 */
13401
13402static void
13403sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp)
13404{
13405	ASSERT(un != NULL);
13406	ASSERT(bp != NULL);
13407	ASSERT(!mutex_owned(SD_MUTEX(un)));
13408	ASSERT(!mutex_owned(&un->un_pm_mutex));
13409
13410	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: entry\n");
13411
13412	if (sd_pm_entry(un) != DDI_SUCCESS) {
13413		/*
13414		 * Set up to return the failed buf back up the 'iodone'
13415		 * side of the calling chain.
13416		 */
13417		bioerror(bp, EIO);
13418		bp->b_resid = bp->b_bcount;
13419
13420		SD_BEGIN_IODONE(index, un, bp);
13421
13422		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
13423		return;
13424	}
13425
13426	SD_NEXT_IOSTART(index, un, bp);
13427
13428	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
13429}
13430
13431
13432/*
13433 *    Function: sd_pm_iodone
13434 *
13435 * Description: iodone-side routine for power mangement.
13436 *
13437 *     Context: may be called from interrupt context
13438 */
13439
13440static void
13441sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp)
13442{
13443	ASSERT(un != NULL);
13444	ASSERT(bp != NULL);
13445	ASSERT(!mutex_owned(&un->un_pm_mutex));
13446
13447	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: entry\n");
13448
13449	/*
13450	 * After attach the following flag is only read, so don't
13451	 * take the penalty of acquiring a mutex for it.
13452	 */
13453	if (un->un_f_pm_is_enabled == TRUE) {
13454		sd_pm_exit(un);
13455	}
13456
13457	SD_NEXT_IODONE(index, un, bp);
13458
13459	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: exit\n");
13460}
13461
13462
13463/*
13464 *    Function: sd_core_iostart
13465 *
13466 * Description: Primary driver function for enqueuing buf(9S) structs from
13467 *		the system and initiating IO to the target device
13468 *
13469 *     Context: Kernel thread context. Can sleep.
13470 *
13471 * Assumptions:  - The given xp->xb_blkno is absolute
13472 *		   (ie, relative to the start of the device).
13473 *		 - The IO is to be done using the native blocksize of
13474 *		   the device, as specified in un->un_tgt_blocksize.
13475 */
13476/* ARGSUSED */
13477static void
13478sd_core_iostart(int index, struct sd_lun *un, struct buf *bp)
13479{
13480	struct sd_xbuf *xp;
13481
13482	ASSERT(un != NULL);
13483	ASSERT(bp != NULL);
13484	ASSERT(!mutex_owned(SD_MUTEX(un)));
13485	ASSERT(bp->b_resid == 0);
13486
13487	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: entry: bp:0x%p\n", bp);
13488
13489	xp = SD_GET_XBUF(bp);
13490	ASSERT(xp != NULL);
13491
13492	mutex_enter(SD_MUTEX(un));
13493
13494	/*
13495	 * If we are currently in the failfast state, fail any new IO
13496	 * that has B_FAILFAST set, then return.
13497	 */
13498	if ((bp->b_flags & B_FAILFAST) &&
13499	    (un->un_failfast_state == SD_FAILFAST_ACTIVE)) {
13500		mutex_exit(SD_MUTEX(un));
13501		bioerror(bp, EIO);
13502		bp->b_resid = bp->b_bcount;
13503		SD_BEGIN_IODONE(index, un, bp);
13504		return;
13505	}
13506
13507	if (SD_IS_DIRECT_PRIORITY(xp)) {
13508		/*
13509		 * Priority command -- transport it immediately.
13510		 *
13511		 * Note: We may want to assert that USCSI_DIAGNOSE is set,
13512		 * because all direct priority commands should be associated
13513		 * with error recovery actions which we don't want to retry.
13514		 */
13515		sd_start_cmds(un, bp);
13516	} else {
13517		/*
13518		 * Normal command -- add it to the wait queue, then start
13519		 * transporting commands from the wait queue.
13520		 */
13521		sd_add_buf_to_waitq(un, bp);
13522		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
13523		sd_start_cmds(un, NULL);
13524	}
13525
13526	mutex_exit(SD_MUTEX(un));
13527
13528	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: exit: bp:0x%p\n", bp);
13529}
13530
13531
13532/*
13533 *    Function: sd_init_cdb_limits
13534 *
13535 * Description: This is to handle scsi_pkt initialization differences
13536 *		between the driver platforms.
13537 *
13538 *		Legacy behaviors:
13539 *
13540 *		If the block number or the sector count exceeds the
13541 *		capabilities of a Group 0 command, shift over to a
13542 *		Group 1 command. We don't blindly use Group 1
13543 *		commands because a) some drives (CDC Wren IVs) get a
13544 *		bit confused, and b) there is probably a fair amount
13545 *		of speed difference for a target to receive and decode
13546 *		a 10 byte command instead of a 6 byte command.
13547 *
13548 *		The xfer time difference of 6 vs 10 byte CDBs is
13549 *		still significant so this code is still worthwhile.
13550 *		10 byte CDBs are very inefficient with the fas HBA driver
13551 *		and older disks. Each CDB byte took 1 usec with some
13552 *		popular disks.
13553 *
13554 *     Context: Must be called at attach time
13555 */
13556
13557static void
13558sd_init_cdb_limits(struct sd_lun *un)
13559{
13560	int hba_cdb_limit;
13561
13562	/*
13563	 * Use CDB_GROUP1 commands for most devices except for
13564	 * parallel SCSI fixed drives in which case we get better
13565	 * performance using CDB_GROUP0 commands (where applicable).
13566	 */
13567	un->un_mincdb = SD_CDB_GROUP1;
13568#if !defined(__fibre)
13569	if (!un->un_f_is_fibre && !un->un_f_cfg_is_atapi && !ISROD(un) &&
13570	    !un->un_f_has_removable_media) {
13571		un->un_mincdb = SD_CDB_GROUP0;
13572	}
13573#endif
13574
13575	/*
13576	 * Try to read the max-cdb-length supported by HBA.
13577	 */
13578	un->un_max_hba_cdb = scsi_ifgetcap(SD_ADDRESS(un), "max-cdb-length", 1);
13579	if (0 >= un->un_max_hba_cdb) {
13580		un->un_max_hba_cdb = CDB_GROUP4;
13581		hba_cdb_limit = SD_CDB_GROUP4;
13582	} else if (0 < un->un_max_hba_cdb &&
13583	    un->un_max_hba_cdb < CDB_GROUP1) {
13584		hba_cdb_limit = SD_CDB_GROUP0;
13585	} else if (CDB_GROUP1 <= un->un_max_hba_cdb &&
13586	    un->un_max_hba_cdb < CDB_GROUP5) {
13587		hba_cdb_limit = SD_CDB_GROUP1;
13588	} else if (CDB_GROUP5 <= un->un_max_hba_cdb &&
13589	    un->un_max_hba_cdb < CDB_GROUP4) {
13590		hba_cdb_limit = SD_CDB_GROUP5;
13591	} else {
13592		hba_cdb_limit = SD_CDB_GROUP4;
13593	}
13594
13595	/*
13596	 * Use CDB_GROUP5 commands for removable devices.  Use CDB_GROUP4
13597	 * commands for fixed disks unless we are building for a 32 bit
13598	 * kernel.
13599	 */
13600#ifdef _LP64
13601	un->un_maxcdb = (un->un_f_has_removable_media) ? SD_CDB_GROUP5 :
13602	    min(hba_cdb_limit, SD_CDB_GROUP4);
13603#else
13604	un->un_maxcdb = (un->un_f_has_removable_media) ? SD_CDB_GROUP5 :
13605	    min(hba_cdb_limit, SD_CDB_GROUP1);
13606#endif
13607
13608	un->un_status_len = (int)((un->un_f_arq_enabled == TRUE)
13609	    ? sizeof (struct scsi_arq_status) : 1);
13610	un->un_cmd_timeout = (ushort_t)sd_io_time;
13611	un->un_uscsi_timeout = ((ISCD(un)) ? 2 : 1) * un->un_cmd_timeout;
13612}
13613
13614
13615/*
13616 *    Function: sd_initpkt_for_buf
13617 *
13618 * Description: Allocate and initialize for transport a scsi_pkt struct,
13619 *		based upon the info specified in the given buf struct.
13620 *
13621 *		Assumes the xb_blkno in the request is absolute (ie,
13622 *		relative to the start of the device (NOT partition!).
13623 *		Also assumes that the request is using the native block
13624 *		size of the device (as returned by the READ CAPACITY
13625 *		command).
13626 *
13627 * Return Code: SD_PKT_ALLOC_SUCCESS
13628 *		SD_PKT_ALLOC_FAILURE
13629 *		SD_PKT_ALLOC_FAILURE_NO_DMA
13630 *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13631 *
13632 *     Context: Kernel thread and may be called from software interrupt context
13633 *		as part of a sdrunout callback. This function may not block or
13634 *		call routines that block
13635 */
13636
13637static int
13638sd_initpkt_for_buf(struct buf *bp, struct scsi_pkt **pktpp)
13639{
13640	struct sd_xbuf	*xp;
13641	struct scsi_pkt *pktp = NULL;
13642	struct sd_lun	*un;
13643	size_t		blockcount;
13644	daddr_t		startblock;
13645	int		rval;
13646	int		cmd_flags;
13647
13648	ASSERT(bp != NULL);
13649	ASSERT(pktpp != NULL);
13650	xp = SD_GET_XBUF(bp);
13651	ASSERT(xp != NULL);
13652	un = SD_GET_UN(bp);
13653	ASSERT(un != NULL);
13654	ASSERT(mutex_owned(SD_MUTEX(un)));
13655	ASSERT(bp->b_resid == 0);
13656
13657	SD_TRACE(SD_LOG_IO_CORE, un,
13658	    "sd_initpkt_for_buf: entry: buf:0x%p\n", bp);
13659
13660	mutex_exit(SD_MUTEX(un));
13661
13662#if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13663	if (xp->xb_pkt_flags & SD_XB_DMA_FREED) {
13664		/*
13665		 * Already have a scsi_pkt -- just need DMA resources.
13666		 * We must recompute the CDB in case the mapping returns
13667		 * a nonzero pkt_resid.
13668		 * Note: if this is a portion of a PKT_DMA_PARTIAL transfer
13669		 * that is being retried, the unmap/remap of the DMA resouces
13670		 * will result in the entire transfer starting over again
13671		 * from the very first block.
13672		 */
13673		ASSERT(xp->xb_pktp != NULL);
13674		pktp = xp->xb_pktp;
13675	} else {
13676		pktp = NULL;
13677	}
13678#endif /* __i386 || __amd64 */
13679
13680	startblock = xp->xb_blkno;	/* Absolute block num. */
13681	blockcount = SD_BYTES2TGTBLOCKS(un, bp->b_bcount);
13682
13683	cmd_flags = un->un_pkt_flags | (xp->xb_pkt_flags & SD_XB_INITPKT_MASK);
13684
13685	/*
13686	 * sd_setup_rw_pkt will determine the appropriate CDB group to use,
13687	 * call scsi_init_pkt, and build the CDB.
13688	 */
13689	rval = sd_setup_rw_pkt(un, &pktp, bp,
13690	    cmd_flags, sdrunout, (caddr_t)un,
13691	    startblock, blockcount);
13692
13693	if (rval == 0) {
13694		/*
13695		 * Success.
13696		 *
13697		 * If partial DMA is being used and required for this transfer.
13698		 * set it up here.
13699		 */
13700		if ((un->un_pkt_flags & PKT_DMA_PARTIAL) != 0 &&
13701		    (pktp->pkt_resid != 0)) {
13702
13703			/*
13704			 * Save the CDB length and pkt_resid for the
13705			 * next xfer
13706			 */
13707			xp->xb_dma_resid = pktp->pkt_resid;
13708
13709			/* rezero resid */
13710			pktp->pkt_resid = 0;
13711
13712		} else {
13713			xp->xb_dma_resid = 0;
13714		}
13715
13716		pktp->pkt_flags = un->un_tagflags;
13717		pktp->pkt_time  = un->un_cmd_timeout;
13718		pktp->pkt_comp  = sdintr;
13719
13720		pktp->pkt_private = bp;
13721		*pktpp = pktp;
13722
13723		SD_TRACE(SD_LOG_IO_CORE, un,
13724		    "sd_initpkt_for_buf: exit: buf:0x%p\n", bp);
13725
13726#if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13727		xp->xb_pkt_flags &= ~SD_XB_DMA_FREED;
13728#endif
13729
13730		mutex_enter(SD_MUTEX(un));
13731		return (SD_PKT_ALLOC_SUCCESS);
13732
13733	}
13734
13735	/*
13736	 * SD_PKT_ALLOC_FAILURE is the only expected failure code
13737	 * from sd_setup_rw_pkt.
13738	 */
13739	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
13740
13741	if (rval == SD_PKT_ALLOC_FAILURE) {
13742		*pktpp = NULL;
13743		/*
13744		 * Set the driver state to RWAIT to indicate the driver
13745		 * is waiting on resource allocations. The driver will not
13746		 * suspend, pm_suspend, or detatch while the state is RWAIT.
13747		 */
13748		mutex_enter(SD_MUTEX(un));
13749		New_state(un, SD_STATE_RWAIT);
13750
13751		SD_ERROR(SD_LOG_IO_CORE, un,
13752		    "sd_initpkt_for_buf: No pktp. exit bp:0x%p\n", bp);
13753
13754		if ((bp->b_flags & B_ERROR) != 0) {
13755			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
13756		}
13757		return (SD_PKT_ALLOC_FAILURE);
13758	} else {
13759		/*
13760		 * PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13761		 *
13762		 * This should never happen.  Maybe someone messed with the
13763		 * kernel's minphys?
13764		 */
13765		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13766		    "Request rejected: too large for CDB: "
13767		    "lba:0x%08lx  len:0x%08lx\n", startblock, blockcount);
13768		SD_ERROR(SD_LOG_IO_CORE, un,
13769		    "sd_initpkt_for_buf: No cp. exit bp:0x%p\n", bp);
13770		mutex_enter(SD_MUTEX(un));
13771		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13772
13773	}
13774}
13775
13776
13777/*
13778 *    Function: sd_destroypkt_for_buf
13779 *
13780 * Description: Free the scsi_pkt(9S) for the given bp (buf IO processing).
13781 *
13782 *     Context: Kernel thread or interrupt context
13783 */
13784
13785static void
13786sd_destroypkt_for_buf(struct buf *bp)
13787{
13788	ASSERT(bp != NULL);
13789	ASSERT(SD_GET_UN(bp) != NULL);
13790
13791	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
13792	    "sd_destroypkt_for_buf: entry: buf:0x%p\n", bp);
13793
13794	ASSERT(SD_GET_PKTP(bp) != NULL);
13795	scsi_destroy_pkt(SD_GET_PKTP(bp));
13796
13797	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
13798	    "sd_destroypkt_for_buf: exit: buf:0x%p\n", bp);
13799}
13800
13801/*
13802 *    Function: sd_setup_rw_pkt
13803 *
13804 * Description: Determines appropriate CDB group for the requested LBA
13805 *		and transfer length, calls scsi_init_pkt, and builds
13806 *		the CDB.  Do not use for partial DMA transfers except
13807 *		for the initial transfer since the CDB size must
13808 *		remain constant.
13809 *
13810 *     Context: Kernel thread and may be called from software interrupt
13811 *		context as part of a sdrunout callback. This function may not
13812 *		block or call routines that block
13813 */
13814
13815
13816int
13817sd_setup_rw_pkt(struct sd_lun *un,
13818    struct scsi_pkt **pktpp, struct buf *bp, int flags,
13819    int (*callback)(caddr_t), caddr_t callback_arg,
13820    diskaddr_t lba, uint32_t blockcount)
13821{
13822	struct scsi_pkt *return_pktp;
13823	union scsi_cdb *cdbp;
13824	struct sd_cdbinfo *cp = NULL;
13825	int i;
13826
13827	/*
13828	 * See which size CDB to use, based upon the request.
13829	 */
13830	for (i = un->un_mincdb; i <= un->un_maxcdb; i++) {
13831
13832		/*
13833		 * Check lba and block count against sd_cdbtab limits.
13834		 * In the partial DMA case, we have to use the same size
13835		 * CDB for all the transfers.  Check lba + blockcount
13836		 * against the max LBA so we know that segment of the
13837		 * transfer can use the CDB we select.
13838		 */
13839		if ((lba + blockcount - 1 <= sd_cdbtab[i].sc_maxlba) &&
13840		    (blockcount <= sd_cdbtab[i].sc_maxlen)) {
13841
13842			/*
13843			 * The command will fit into the CDB type
13844			 * specified by sd_cdbtab[i].
13845			 */
13846			cp = sd_cdbtab + i;
13847
13848			/*
13849			 * Call scsi_init_pkt so we can fill in the
13850			 * CDB.
13851			 */
13852			return_pktp = scsi_init_pkt(SD_ADDRESS(un), *pktpp,
13853			    bp, cp->sc_grpcode, un->un_status_len, 0,
13854			    flags, callback, callback_arg);
13855
13856			if (return_pktp != NULL) {
13857
13858				/*
13859				 * Return new value of pkt
13860				 */
13861				*pktpp = return_pktp;
13862
13863				/*
13864				 * To be safe, zero the CDB insuring there is
13865				 * no leftover data from a previous command.
13866				 */
13867				bzero(return_pktp->pkt_cdbp, cp->sc_grpcode);
13868
13869				/*
13870				 * Handle partial DMA mapping
13871				 */
13872				if (return_pktp->pkt_resid != 0) {
13873
13874					/*
13875					 * Not going to xfer as many blocks as
13876					 * originally expected
13877					 */
13878					blockcount -=
13879					    SD_BYTES2TGTBLOCKS(un,
13880					    return_pktp->pkt_resid);
13881				}
13882
13883				cdbp = (union scsi_cdb *)return_pktp->pkt_cdbp;
13884
13885				/*
13886				 * Set command byte based on the CDB
13887				 * type we matched.
13888				 */
13889				cdbp->scc_cmd = cp->sc_grpmask |
13890				    ((bp->b_flags & B_READ) ?
13891				    SCMD_READ : SCMD_WRITE);
13892
13893				SD_FILL_SCSI1_LUN(un, return_pktp);
13894
13895				/*
13896				 * Fill in LBA and length
13897				 */
13898				ASSERT((cp->sc_grpcode == CDB_GROUP1) ||
13899				    (cp->sc_grpcode == CDB_GROUP4) ||
13900				    (cp->sc_grpcode == CDB_GROUP0) ||
13901				    (cp->sc_grpcode == CDB_GROUP5));
13902
13903				if (cp->sc_grpcode == CDB_GROUP1) {
13904					FORMG1ADDR(cdbp, lba);
13905					FORMG1COUNT(cdbp, blockcount);
13906					return (0);
13907				} else if (cp->sc_grpcode == CDB_GROUP4) {
13908					FORMG4LONGADDR(cdbp, lba);
13909					FORMG4COUNT(cdbp, blockcount);
13910					return (0);
13911				} else if (cp->sc_grpcode == CDB_GROUP0) {
13912					FORMG0ADDR(cdbp, lba);
13913					FORMG0COUNT(cdbp, blockcount);
13914					return (0);
13915				} else if (cp->sc_grpcode == CDB_GROUP5) {
13916					FORMG5ADDR(cdbp, lba);
13917					FORMG5COUNT(cdbp, blockcount);
13918					return (0);
13919				}
13920
13921				/*
13922				 * It should be impossible to not match one
13923				 * of the CDB types above, so we should never
13924				 * reach this point.  Set the CDB command byte
13925				 * to test-unit-ready to avoid writing
13926				 * to somewhere we don't intend.
13927				 */
13928				cdbp->scc_cmd = SCMD_TEST_UNIT_READY;
13929				return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13930			} else {
13931				/*
13932				 * Couldn't get scsi_pkt
13933				 */
13934				return (SD_PKT_ALLOC_FAILURE);
13935			}
13936		}
13937	}
13938
13939	/*
13940	 * None of the available CDB types were suitable.  This really
13941	 * should never happen:  on a 64 bit system we support
13942	 * READ16/WRITE16 which will hold an entire 64 bit disk address
13943	 * and on a 32 bit system we will refuse to bind to a device
13944	 * larger than 2TB so addresses will never be larger than 32 bits.
13945	 */
13946	return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13947}
13948
13949/*
13950 *    Function: sd_setup_next_rw_pkt
13951 *
13952 * Description: Setup packet for partial DMA transfers, except for the
13953 * 		initial transfer.  sd_setup_rw_pkt should be used for
13954 *		the initial transfer.
13955 *
13956 *     Context: Kernel thread and may be called from interrupt context.
13957 */
13958
13959int
13960sd_setup_next_rw_pkt(struct sd_lun *un,
13961    struct scsi_pkt *pktp, struct buf *bp,
13962    diskaddr_t lba, uint32_t blockcount)
13963{
13964	uchar_t com;
13965	union scsi_cdb *cdbp;
13966	uchar_t cdb_group_id;
13967
13968	ASSERT(pktp != NULL);
13969	ASSERT(pktp->pkt_cdbp != NULL);
13970
13971	cdbp = (union scsi_cdb *)pktp->pkt_cdbp;
13972	com = cdbp->scc_cmd;
13973	cdb_group_id = CDB_GROUPID(com);
13974
13975	ASSERT((cdb_group_id == CDB_GROUPID_0) ||
13976	    (cdb_group_id == CDB_GROUPID_1) ||
13977	    (cdb_group_id == CDB_GROUPID_4) ||
13978	    (cdb_group_id == CDB_GROUPID_5));
13979
13980	/*
13981	 * Move pkt to the next portion of the xfer.
13982	 * func is NULL_FUNC so we do not have to release
13983	 * the disk mutex here.
13984	 */
13985	if (scsi_init_pkt(SD_ADDRESS(un), pktp, bp, 0, 0, 0, 0,
13986	    NULL_FUNC, NULL) == pktp) {
13987		/* Success.  Handle partial DMA */
13988		if (pktp->pkt_resid != 0) {
13989			blockcount -=
13990			    SD_BYTES2TGTBLOCKS(un, pktp->pkt_resid);
13991		}
13992
13993		cdbp->scc_cmd = com;
13994		SD_FILL_SCSI1_LUN(un, pktp);
13995		if (cdb_group_id == CDB_GROUPID_1) {
13996			FORMG1ADDR(cdbp, lba);
13997			FORMG1COUNT(cdbp, blockcount);
13998			return (0);
13999		} else if (cdb_group_id == CDB_GROUPID_4) {
14000			FORMG4LONGADDR(cdbp, lba);
14001			FORMG4COUNT(cdbp, blockcount);
14002			return (0);
14003		} else if (cdb_group_id == CDB_GROUPID_0) {
14004			FORMG0ADDR(cdbp, lba);
14005			FORMG0COUNT(cdbp, blockcount);
14006			return (0);
14007		} else if (cdb_group_id == CDB_GROUPID_5) {
14008			FORMG5ADDR(cdbp, lba);
14009			FORMG5COUNT(cdbp, blockcount);
14010			return (0);
14011		}
14012
14013		/* Unreachable */
14014		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
14015	}
14016
14017	/*
14018	 * Error setting up next portion of cmd transfer.
14019	 * Something is definitely very wrong and this
14020	 * should not happen.
14021	 */
14022	return (SD_PKT_ALLOC_FAILURE);
14023}
14024
14025/*
14026 *    Function: sd_initpkt_for_uscsi
14027 *
14028 * Description: Allocate and initialize for transport a scsi_pkt struct,
14029 *		based upon the info specified in the given uscsi_cmd struct.
14030 *
14031 * Return Code: SD_PKT_ALLOC_SUCCESS
14032 *		SD_PKT_ALLOC_FAILURE
14033 *		SD_PKT_ALLOC_FAILURE_NO_DMA
14034 *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
14035 *
14036 *     Context: Kernel thread and may be called from software interrupt context
14037 *		as part of a sdrunout callback. This function may not block or
14038 *		call routines that block
14039 */
14040
14041static int
14042sd_initpkt_for_uscsi(struct buf *bp, struct scsi_pkt **pktpp)
14043{
14044	struct uscsi_cmd *uscmd;
14045	struct sd_xbuf	*xp;
14046	struct scsi_pkt	*pktp;
14047	struct sd_lun	*un;
14048	uint32_t	flags = 0;
14049
14050	ASSERT(bp != NULL);
14051	ASSERT(pktpp != NULL);
14052	xp = SD_GET_XBUF(bp);
14053	ASSERT(xp != NULL);
14054	un = SD_GET_UN(bp);
14055	ASSERT(un != NULL);
14056	ASSERT(mutex_owned(SD_MUTEX(un)));
14057
14058	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
14059	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
14060	ASSERT(uscmd != NULL);
14061
14062	SD_TRACE(SD_LOG_IO_CORE, un,
14063	    "sd_initpkt_for_uscsi: entry: buf:0x%p\n", bp);
14064
14065	/*
14066	 * Allocate the scsi_pkt for the command.
14067	 * Note: If PKT_DMA_PARTIAL flag is set, scsi_vhci binds a path
14068	 *	 during scsi_init_pkt time and will continue to use the
14069	 *	 same path as long as the same scsi_pkt is used without
14070	 *	 intervening scsi_dma_free(). Since uscsi command does
14071	 *	 not call scsi_dmafree() before retry failed command, it
14072	 *	 is necessary to make sure PKT_DMA_PARTIAL flag is NOT
14073	 *	 set such that scsi_vhci can use other available path for
14074	 *	 retry. Besides, ucsci command does not allow DMA breakup,
14075	 *	 so there is no need to set PKT_DMA_PARTIAL flag.
14076	 */
14077	if (uscmd->uscsi_rqlen > SENSE_LENGTH) {
14078		pktp = scsi_init_pkt(SD_ADDRESS(un), NULL,
14079		    ((bp->b_bcount != 0) ? bp : NULL), uscmd->uscsi_cdblen,
14080		    ((int)(uscmd->uscsi_rqlen) + sizeof (struct scsi_arq_status)
14081		    - sizeof (struct scsi_extended_sense)), 0,
14082		    (un->un_pkt_flags & ~PKT_DMA_PARTIAL) | PKT_XARQ,
14083		    sdrunout, (caddr_t)un);
14084	} else {
14085		pktp = scsi_init_pkt(SD_ADDRESS(un), NULL,
14086		    ((bp->b_bcount != 0) ? bp : NULL), uscmd->uscsi_cdblen,
14087		    sizeof (struct scsi_arq_status), 0,
14088		    (un->un_pkt_flags & ~PKT_DMA_PARTIAL),
14089		    sdrunout, (caddr_t)un);
14090	}
14091
14092	if (pktp == NULL) {
14093		*pktpp = NULL;
14094		/*
14095		 * Set the driver state to RWAIT to indicate the driver
14096		 * is waiting on resource allocations. The driver will not
14097		 * suspend, pm_suspend, or detatch while the state is RWAIT.
14098		 */
14099		New_state(un, SD_STATE_RWAIT);
14100
14101		SD_ERROR(SD_LOG_IO_CORE, un,
14102		    "sd_initpkt_for_uscsi: No pktp. exit bp:0x%p\n", bp);
14103
14104		if ((bp->b_flags & B_ERROR) != 0) {
14105			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
14106		}
14107		return (SD_PKT_ALLOC_FAILURE);
14108	}
14109
14110	/*
14111	 * We do not do DMA breakup for USCSI commands, so return failure
14112	 * here if all the needed DMA resources were not allocated.
14113	 */
14114	if ((un->un_pkt_flags & PKT_DMA_PARTIAL) &&
14115	    (bp->b_bcount != 0) && (pktp->pkt_resid != 0)) {
14116		scsi_destroy_pkt(pktp);
14117		SD_ERROR(SD_LOG_IO_CORE, un, "sd_initpkt_for_uscsi: "
14118		    "No partial DMA for USCSI. exit: buf:0x%p\n", bp);
14119		return (SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL);
14120	}
14121
14122	/* Init the cdb from the given uscsi struct */
14123	(void) scsi_setup_cdb((union scsi_cdb *)pktp->pkt_cdbp,
14124	    uscmd->uscsi_cdb[0], 0, 0, 0);
14125
14126	SD_FILL_SCSI1_LUN(un, pktp);
14127
14128	/*
14129	 * Set up the optional USCSI flags. See the uscsi (7I) man page
14130	 * for listing of the supported flags.
14131	 */
14132
14133	if (uscmd->uscsi_flags & USCSI_SILENT) {
14134		flags |= FLAG_SILENT;
14135	}
14136
14137	if (uscmd->uscsi_flags & USCSI_DIAGNOSE) {
14138		flags |= FLAG_DIAGNOSE;
14139	}
14140
14141	if (uscmd->uscsi_flags & USCSI_ISOLATE) {
14142		flags |= FLAG_ISOLATE;
14143	}
14144
14145	if (un->un_f_is_fibre == FALSE) {
14146		if (uscmd->uscsi_flags & USCSI_RENEGOT) {
14147			flags |= FLAG_RENEGOTIATE_WIDE_SYNC;
14148		}
14149	}
14150
14151	/*
14152	 * Set the pkt flags here so we save time later.
14153	 * Note: These flags are NOT in the uscsi man page!!!
14154	 */
14155	if (uscmd->uscsi_flags & USCSI_HEAD) {
14156		flags |= FLAG_HEAD;
14157	}
14158
14159	if (uscmd->uscsi_flags & USCSI_NOINTR) {
14160		flags |= FLAG_NOINTR;
14161	}
14162
14163	/*
14164	 * For tagged queueing, things get a bit complicated.
14165	 * Check first for head of queue and last for ordered queue.
14166	 * If neither head nor order, use the default driver tag flags.
14167	 */
14168	if ((uscmd->uscsi_flags & USCSI_NOTAG) == 0) {
14169		if (uscmd->uscsi_flags & USCSI_HTAG) {
14170			flags |= FLAG_HTAG;
14171		} else if (uscmd->uscsi_flags & USCSI_OTAG) {
14172			flags |= FLAG_OTAG;
14173		} else {
14174			flags |= un->un_tagflags & FLAG_TAGMASK;
14175		}
14176	}
14177
14178	if (uscmd->uscsi_flags & USCSI_NODISCON) {
14179		flags = (flags & ~FLAG_TAGMASK) | FLAG_NODISCON;
14180	}
14181
14182	pktp->pkt_flags = flags;
14183
14184	/* Transfer uscsi information to scsi_pkt */
14185	(void) scsi_uscsi_pktinit(uscmd, pktp);
14186
14187	/* Copy the caller's CDB into the pkt... */
14188	bcopy(uscmd->uscsi_cdb, pktp->pkt_cdbp, uscmd->uscsi_cdblen);
14189
14190	if (uscmd->uscsi_timeout == 0) {
14191		pktp->pkt_time = un->un_uscsi_timeout;
14192	} else {
14193		pktp->pkt_time = uscmd->uscsi_timeout;
14194	}
14195
14196	/* need it later to identify USCSI request in sdintr */
14197	xp->xb_pkt_flags |= SD_XB_USCSICMD;
14198
14199	xp->xb_sense_resid = uscmd->uscsi_rqresid;
14200
14201	pktp->pkt_private = bp;
14202	pktp->pkt_comp = sdintr;
14203	*pktpp = pktp;
14204
14205	SD_TRACE(SD_LOG_IO_CORE, un,
14206	    "sd_initpkt_for_uscsi: exit: buf:0x%p\n", bp);
14207
14208	return (SD_PKT_ALLOC_SUCCESS);
14209}
14210
14211
14212/*
14213 *    Function: sd_destroypkt_for_uscsi
14214 *
14215 * Description: Free the scsi_pkt(9S) struct for the given bp, for uscsi
14216 *		IOs.. Also saves relevant info into the associated uscsi_cmd
14217 *		struct.
14218 *
14219 *     Context: May be called under interrupt context
14220 */
14221
14222static void
14223sd_destroypkt_for_uscsi(struct buf *bp)
14224{
14225	struct uscsi_cmd *uscmd;
14226	struct sd_xbuf	*xp;
14227	struct scsi_pkt	*pktp;
14228	struct sd_lun	*un;
14229	struct sd_uscsi_info *suip;
14230
14231	ASSERT(bp != NULL);
14232	xp = SD_GET_XBUF(bp);
14233	ASSERT(xp != NULL);
14234	un = SD_GET_UN(bp);
14235	ASSERT(un != NULL);
14236	ASSERT(!mutex_owned(SD_MUTEX(un)));
14237	pktp = SD_GET_PKTP(bp);
14238	ASSERT(pktp != NULL);
14239
14240	SD_TRACE(SD_LOG_IO_CORE, un,
14241	    "sd_destroypkt_for_uscsi: entry: buf:0x%p\n", bp);
14242
14243	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
14244	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
14245	ASSERT(uscmd != NULL);
14246
14247	/* Save the status and the residual into the uscsi_cmd struct */
14248	uscmd->uscsi_status = ((*(pktp)->pkt_scbp) & STATUS_MASK);
14249	uscmd->uscsi_resid  = bp->b_resid;
14250
14251	/* Transfer scsi_pkt information to uscsi */
14252	(void) scsi_uscsi_pktfini(pktp, uscmd);
14253
14254	/*
14255	 * If enabled, copy any saved sense data into the area specified
14256	 * by the uscsi command.
14257	 */
14258	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
14259	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
14260		/*
14261		 * Note: uscmd->uscsi_rqbuf should always point to a buffer
14262		 * at least SENSE_LENGTH bytes in size (see sd_send_scsi_cmd())
14263		 */
14264		uscmd->uscsi_rqstatus = xp->xb_sense_status;
14265		uscmd->uscsi_rqresid  = xp->xb_sense_resid;
14266		if (uscmd->uscsi_rqlen > SENSE_LENGTH) {
14267			bcopy(xp->xb_sense_data, uscmd->uscsi_rqbuf,
14268			    MAX_SENSE_LENGTH);
14269		} else {
14270			bcopy(xp->xb_sense_data, uscmd->uscsi_rqbuf,
14271			    SENSE_LENGTH);
14272		}
14273	}
14274	/*
14275	 * The following assignments are for SCSI FMA.
14276	 */
14277	ASSERT(xp->xb_private != NULL);
14278	suip = (struct sd_uscsi_info *)xp->xb_private;
14279	suip->ui_pkt_reason = pktp->pkt_reason;
14280	suip->ui_pkt_state = pktp->pkt_state;
14281	suip->ui_pkt_statistics = pktp->pkt_statistics;
14282	suip->ui_lba = (uint64_t)SD_GET_BLKNO(bp);
14283
14284	/* We are done with the scsi_pkt; free it now */
14285	ASSERT(SD_GET_PKTP(bp) != NULL);
14286	scsi_destroy_pkt(SD_GET_PKTP(bp));
14287
14288	SD_TRACE(SD_LOG_IO_CORE, un,
14289	    "sd_destroypkt_for_uscsi: exit: buf:0x%p\n", bp);
14290}
14291
14292
14293/*
14294 *    Function: sd_bioclone_alloc
14295 *
14296 * Description: Allocate a buf(9S) and init it as per the given buf
14297 *		and the various arguments.  The associated sd_xbuf
14298 *		struct is (nearly) duplicated.  The struct buf *bp
14299 *		argument is saved in new_xp->xb_private.
14300 *
14301 *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
14302 *		datalen - size of data area for the shadow bp
14303 *		blkno - starting LBA
14304 *		func - function pointer for b_iodone in the shadow buf. (May
14305 *			be NULL if none.)
14306 *
14307 * Return Code: Pointer to allocates buf(9S) struct
14308 *
14309 *     Context: Can sleep.
14310 */
14311
14312static struct buf *
14313sd_bioclone_alloc(struct buf *bp, size_t datalen,
14314	daddr_t blkno, int (*func)(struct buf *))
14315{
14316	struct	sd_lun	*un;
14317	struct	sd_xbuf	*xp;
14318	struct	sd_xbuf	*new_xp;
14319	struct	buf	*new_bp;
14320
14321	ASSERT(bp != NULL);
14322	xp = SD_GET_XBUF(bp);
14323	ASSERT(xp != NULL);
14324	un = SD_GET_UN(bp);
14325	ASSERT(un != NULL);
14326	ASSERT(!mutex_owned(SD_MUTEX(un)));
14327
14328	new_bp = bioclone(bp, 0, datalen, SD_GET_DEV(un), blkno, func,
14329	    NULL, KM_SLEEP);
14330
14331	new_bp->b_lblkno	= blkno;
14332
14333	/*
14334	 * Allocate an xbuf for the shadow bp and copy the contents of the
14335	 * original xbuf into it.
14336	 */
14337	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
14338	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
14339
14340	/*
14341	 * The given bp is automatically saved in the xb_private member
14342	 * of the new xbuf.  Callers are allowed to depend on this.
14343	 */
14344	new_xp->xb_private = bp;
14345
14346	new_bp->b_private  = new_xp;
14347
14348	return (new_bp);
14349}
14350
14351/*
14352 *    Function: sd_shadow_buf_alloc
14353 *
14354 * Description: Allocate a buf(9S) and init it as per the given buf
14355 *		and the various arguments.  The associated sd_xbuf
14356 *		struct is (nearly) duplicated.  The struct buf *bp
14357 *		argument is saved in new_xp->xb_private.
14358 *
14359 *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
14360 *		datalen - size of data area for the shadow bp
14361 *		bflags - B_READ or B_WRITE (pseudo flag)
14362 *		blkno - starting LBA
14363 *		func - function pointer for b_iodone in the shadow buf. (May
14364 *			be NULL if none.)
14365 *
14366 * Return Code: Pointer to allocates buf(9S) struct
14367 *
14368 *     Context: Can sleep.
14369 */
14370
14371static struct buf *
14372sd_shadow_buf_alloc(struct buf *bp, size_t datalen, uint_t bflags,
14373	daddr_t blkno, int (*func)(struct buf *))
14374{
14375	struct	sd_lun	*un;
14376	struct	sd_xbuf	*xp;
14377	struct	sd_xbuf	*new_xp;
14378	struct	buf	*new_bp;
14379
14380	ASSERT(bp != NULL);
14381	xp = SD_GET_XBUF(bp);
14382	ASSERT(xp != NULL);
14383	un = SD_GET_UN(bp);
14384	ASSERT(un != NULL);
14385	ASSERT(!mutex_owned(SD_MUTEX(un)));
14386
14387	if (bp->b_flags & (B_PAGEIO | B_PHYS)) {
14388		bp_mapin(bp);
14389	}
14390
14391	bflags &= (B_READ | B_WRITE);
14392#if defined(__i386) || defined(__amd64)
14393	new_bp = getrbuf(KM_SLEEP);
14394	new_bp->b_un.b_addr = kmem_zalloc(datalen, KM_SLEEP);
14395	new_bp->b_bcount = datalen;
14396	new_bp->b_flags = bflags |
14397	    (bp->b_flags & ~(B_PAGEIO | B_PHYS | B_REMAPPED | B_SHADOW));
14398#else
14399	new_bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), NULL,
14400	    datalen, bflags, SLEEP_FUNC, NULL);
14401#endif
14402	new_bp->av_forw	= NULL;
14403	new_bp->av_back	= NULL;
14404	new_bp->b_dev	= bp->b_dev;
14405	new_bp->b_blkno	= blkno;
14406	new_bp->b_iodone = func;
14407	new_bp->b_edev	= bp->b_edev;
14408	new_bp->b_resid	= 0;
14409
14410	/* We need to preserve the B_FAILFAST flag */
14411	if (bp->b_flags & B_FAILFAST) {
14412		new_bp->b_flags |= B_FAILFAST;
14413	}
14414
14415	/*
14416	 * Allocate an xbuf for the shadow bp and copy the contents of the
14417	 * original xbuf into it.
14418	 */
14419	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
14420	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
14421
14422	/* Need later to copy data between the shadow buf & original buf! */
14423	new_xp->xb_pkt_flags |= PKT_CONSISTENT;
14424
14425	/*
14426	 * The given bp is automatically saved in the xb_private member
14427	 * of the new xbuf.  Callers are allowed to depend on this.
14428	 */
14429	new_xp->xb_private = bp;
14430
14431	new_bp->b_private  = new_xp;
14432
14433	return (new_bp);
14434}
14435
14436/*
14437 *    Function: sd_bioclone_free
14438 *
14439 * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations
14440 *		in the larger than partition operation.
14441 *
14442 *     Context: May be called under interrupt context
14443 */
14444
14445static void
14446sd_bioclone_free(struct buf *bp)
14447{
14448	struct sd_xbuf	*xp;
14449
14450	ASSERT(bp != NULL);
14451	xp = SD_GET_XBUF(bp);
14452	ASSERT(xp != NULL);
14453
14454	/*
14455	 * Call bp_mapout() before freeing the buf,  in case a lower
14456	 * layer or HBA  had done a bp_mapin().  we must do this here
14457	 * as we are the "originator" of the shadow buf.
14458	 */
14459	bp_mapout(bp);
14460
14461	/*
14462	 * Null out b_iodone before freeing the bp, to ensure that the driver
14463	 * never gets confused by a stale value in this field. (Just a little
14464	 * extra defensiveness here.)
14465	 */
14466	bp->b_iodone = NULL;
14467
14468	freerbuf(bp);
14469
14470	kmem_free(xp, sizeof (struct sd_xbuf));
14471}
14472
14473/*
14474 *    Function: sd_shadow_buf_free
14475 *
14476 * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations.
14477 *
14478 *     Context: May be called under interrupt context
14479 */
14480
14481static void
14482sd_shadow_buf_free(struct buf *bp)
14483{
14484	struct sd_xbuf	*xp;
14485
14486	ASSERT(bp != NULL);
14487	xp = SD_GET_XBUF(bp);
14488	ASSERT(xp != NULL);
14489
14490#if defined(__sparc)
14491	/*
14492	 * Call bp_mapout() before freeing the buf,  in case a lower
14493	 * layer or HBA  had done a bp_mapin().  we must do this here
14494	 * as we are the "originator" of the shadow buf.
14495	 */
14496	bp_mapout(bp);
14497#endif
14498
14499	/*
14500	 * Null out b_iodone before freeing the bp, to ensure that the driver
14501	 * never gets confused by a stale value in this field. (Just a little
14502	 * extra defensiveness here.)
14503	 */
14504	bp->b_iodone = NULL;
14505
14506#if defined(__i386) || defined(__amd64)
14507	kmem_free(bp->b_un.b_addr, bp->b_bcount);
14508	freerbuf(bp);
14509#else
14510	scsi_free_consistent_buf(bp);
14511#endif
14512
14513	kmem_free(xp, sizeof (struct sd_xbuf));
14514}
14515
14516
14517/*
14518 *    Function: sd_print_transport_rejected_message
14519 *
14520 * Description: This implements the ludicrously complex rules for printing
14521 *		a "transport rejected" message.  This is to address the
14522 *		specific problem of having a flood of this error message
14523 *		produced when a failover occurs.
14524 *
14525 *     Context: Any.
14526 */
14527
14528static void
14529sd_print_transport_rejected_message(struct sd_lun *un, struct sd_xbuf *xp,
14530	int code)
14531{
14532	ASSERT(un != NULL);
14533	ASSERT(mutex_owned(SD_MUTEX(un)));
14534	ASSERT(xp != NULL);
14535
14536	/*
14537	 * Print the "transport rejected" message under the following
14538	 * conditions:
14539	 *
14540	 * - Whenever the SD_LOGMASK_DIAG bit of sd_level_mask is set
14541	 * - The error code from scsi_transport() is NOT a TRAN_FATAL_ERROR.
14542	 * - If the error code IS a TRAN_FATAL_ERROR, then the message is
14543	 *   printed the FIRST time a TRAN_FATAL_ERROR is returned from
14544	 *   scsi_transport(9F) (which indicates that the target might have
14545	 *   gone off-line).  This uses the un->un_tran_fatal_count
14546	 *   count, which is incremented whenever a TRAN_FATAL_ERROR is
14547	 *   received, and reset to zero whenver a TRAN_ACCEPT is returned
14548	 *   from scsi_transport().
14549	 *
14550	 * The FLAG_SILENT in the scsi_pkt must be CLEARED in ALL of
14551	 * the preceeding cases in order for the message to be printed.
14552	 */
14553	if (((xp->xb_pktp->pkt_flags & FLAG_SILENT) == 0) &&
14554	    (SD_FM_LOG(un) == SD_FM_LOG_NSUP)) {
14555		if ((sd_level_mask & SD_LOGMASK_DIAG) ||
14556		    (code != TRAN_FATAL_ERROR) ||
14557		    (un->un_tran_fatal_count == 1)) {
14558			switch (code) {
14559			case TRAN_BADPKT:
14560				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
14561				    "transport rejected bad packet\n");
14562				break;
14563			case TRAN_FATAL_ERROR:
14564				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
14565				    "transport rejected fatal error\n");
14566				break;
14567			default:
14568				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
14569				    "transport rejected (%d)\n", code);
14570				break;
14571			}
14572		}
14573	}
14574}
14575
14576
14577/*
14578 *    Function: sd_add_buf_to_waitq
14579 *
14580 * Description: Add the given buf(9S) struct to the wait queue for the
14581 *		instance.  If sorting is enabled, then the buf is added
14582 *		to the queue via an elevator sort algorithm (a la
14583 *		disksort(9F)).  The SD_GET_BLKNO(bp) is used as the sort key.
14584 *		If sorting is not enabled, then the buf is just added
14585 *		to the end of the wait queue.
14586 *
14587 * Return Code: void
14588 *
14589 *     Context: Does not sleep/block, therefore technically can be called
14590 *		from any context.  However if sorting is enabled then the
14591 *		execution time is indeterminate, and may take long if
14592 *		the wait queue grows large.
14593 */
14594
14595static void
14596sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp)
14597{
14598	struct buf *ap;
14599
14600	ASSERT(bp != NULL);
14601	ASSERT(un != NULL);
14602	ASSERT(mutex_owned(SD_MUTEX(un)));
14603
14604	/* If the queue is empty, add the buf as the only entry & return. */
14605	if (un->un_waitq_headp == NULL) {
14606		ASSERT(un->un_waitq_tailp == NULL);
14607		un->un_waitq_headp = un->un_waitq_tailp = bp;
14608		bp->av_forw = NULL;
14609		return;
14610	}
14611
14612	ASSERT(un->un_waitq_tailp != NULL);
14613
14614	/*
14615	 * If sorting is disabled, just add the buf to the tail end of
14616	 * the wait queue and return.
14617	 */
14618	if (un->un_f_disksort_disabled || un->un_f_enable_rmw) {
14619		un->un_waitq_tailp->av_forw = bp;
14620		un->un_waitq_tailp = bp;
14621		bp->av_forw = NULL;
14622		return;
14623	}
14624
14625	/*
14626	 * Sort thru the list of requests currently on the wait queue
14627	 * and add the new buf request at the appropriate position.
14628	 *
14629	 * The un->un_waitq_headp is an activity chain pointer on which
14630	 * we keep two queues, sorted in ascending SD_GET_BLKNO() order. The
14631	 * first queue holds those requests which are positioned after
14632	 * the current SD_GET_BLKNO() (in the first request); the second holds
14633	 * requests which came in after their SD_GET_BLKNO() number was passed.
14634	 * Thus we implement a one way scan, retracting after reaching
14635	 * the end of the drive to the first request on the second
14636	 * queue, at which time it becomes the first queue.
14637	 * A one-way scan is natural because of the way UNIX read-ahead
14638	 * blocks are allocated.
14639	 *
14640	 * If we lie after the first request, then we must locate the
14641	 * second request list and add ourselves to it.
14642	 */
14643	ap = un->un_waitq_headp;
14644	if (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap)) {
14645		while (ap->av_forw != NULL) {
14646			/*
14647			 * Look for an "inversion" in the (normally
14648			 * ascending) block numbers. This indicates
14649			 * the start of the second request list.
14650			 */
14651			if (SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) {
14652				/*
14653				 * Search the second request list for the
14654				 * first request at a larger block number.
14655				 * We go before that; however if there is
14656				 * no such request, we go at the end.
14657				 */
14658				do {
14659					if (SD_GET_BLKNO(bp) <
14660					    SD_GET_BLKNO(ap->av_forw)) {
14661						goto insert;
14662					}
14663					ap = ap->av_forw;
14664				} while (ap->av_forw != NULL);
14665				goto insert;		/* after last */
14666			}
14667			ap = ap->av_forw;
14668		}
14669
14670		/*
14671		 * No inversions... we will go after the last, and
14672		 * be the first request in the second request list.
14673		 */
14674		goto insert;
14675	}
14676
14677	/*
14678	 * Request is at/after the current request...
14679	 * sort in the first request list.
14680	 */
14681	while (ap->av_forw != NULL) {
14682		/*
14683		 * We want to go after the current request (1) if
14684		 * there is an inversion after it (i.e. it is the end
14685		 * of the first request list), or (2) if the next
14686		 * request is a larger block no. than our request.
14687		 */
14688		if ((SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) ||
14689		    (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap->av_forw))) {
14690			goto insert;
14691		}
14692		ap = ap->av_forw;
14693	}
14694
14695	/*
14696	 * Neither a second list nor a larger request, therefore
14697	 * we go at the end of the first list (which is the same
14698	 * as the end of the whole schebang).
14699	 */
14700insert:
14701	bp->av_forw = ap->av_forw;
14702	ap->av_forw = bp;
14703
14704	/*
14705	 * If we inserted onto the tail end of the waitq, make sure the
14706	 * tail pointer is updated.
14707	 */
14708	if (ap == un->un_waitq_tailp) {
14709		un->un_waitq_tailp = bp;
14710	}
14711}
14712
14713
14714/*
14715 *    Function: sd_start_cmds
14716 *
14717 * Description: Remove and transport cmds from the driver queues.
14718 *
14719 *   Arguments: un - pointer to the unit (soft state) struct for the target.
14720 *
14721 *		immed_bp - ptr to a buf to be transported immediately. Only
14722 *		the immed_bp is transported; bufs on the waitq are not
14723 *		processed and the un_retry_bp is not checked.  If immed_bp is
14724 *		NULL, then normal queue processing is performed.
14725 *
14726 *     Context: May be called from kernel thread context, interrupt context,
14727 *		or runout callback context. This function may not block or
14728 *		call routines that block.
14729 */
14730
14731static void
14732sd_start_cmds(struct sd_lun *un, struct buf *immed_bp)
14733{
14734	struct	sd_xbuf	*xp;
14735	struct	buf	*bp;
14736	void	(*statp)(kstat_io_t *);
14737#if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14738	void	(*saved_statp)(kstat_io_t *);
14739#endif
14740	int	rval;
14741	struct sd_fm_internal *sfip = NULL;
14742
14743	ASSERT(un != NULL);
14744	ASSERT(mutex_owned(SD_MUTEX(un)));
14745	ASSERT(un->un_ncmds_in_transport >= 0);
14746	ASSERT(un->un_throttle >= 0);
14747
14748	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: entry\n");
14749
14750	do {
14751#if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14752		saved_statp = NULL;
14753#endif
14754
14755		/*
14756		 * If we are syncing or dumping, fail the command to
14757		 * avoid recursively calling back into scsi_transport().
14758		 * The dump I/O itself uses a separate code path so this
14759		 * only prevents non-dump I/O from being sent while dumping.
14760		 * File system sync takes place before dumping begins.
14761		 * During panic, filesystem I/O is allowed provided
14762		 * un_in_callback is <= 1.  This is to prevent recursion
14763		 * such as sd_start_cmds -> scsi_transport -> sdintr ->
14764		 * sd_start_cmds and so on.  See panic.c for more information
14765		 * about the states the system can be in during panic.
14766		 */
14767		if ((un->un_state == SD_STATE_DUMPING) ||
14768		    (ddi_in_panic() && (un->un_in_callback > 1))) {
14769			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14770			    "sd_start_cmds: panicking\n");
14771			goto exit;
14772		}
14773
14774		if ((bp = immed_bp) != NULL) {
14775			/*
14776			 * We have a bp that must be transported immediately.
14777			 * It's OK to transport the immed_bp here without doing
14778			 * the throttle limit check because the immed_bp is
14779			 * always used in a retry/recovery case. This means
14780			 * that we know we are not at the throttle limit by
14781			 * virtue of the fact that to get here we must have
14782			 * already gotten a command back via sdintr(). This also
14783			 * relies on (1) the command on un_retry_bp preventing
14784			 * further commands from the waitq from being issued;
14785			 * and (2) the code in sd_retry_command checking the
14786			 * throttle limit before issuing a delayed or immediate
14787			 * retry. This holds even if the throttle limit is
14788			 * currently ratcheted down from its maximum value.
14789			 */
14790			statp = kstat_runq_enter;
14791			if (bp == un->un_retry_bp) {
14792				ASSERT((un->un_retry_statp == NULL) ||
14793				    (un->un_retry_statp == kstat_waitq_enter) ||
14794				    (un->un_retry_statp ==
14795				    kstat_runq_back_to_waitq));
14796				/*
14797				 * If the waitq kstat was incremented when
14798				 * sd_set_retry_bp() queued this bp for a retry,
14799				 * then we must set up statp so that the waitq
14800				 * count will get decremented correctly below.
14801				 * Also we must clear un->un_retry_statp to
14802				 * ensure that we do not act on a stale value
14803				 * in this field.
14804				 */
14805				if ((un->un_retry_statp == kstat_waitq_enter) ||
14806				    (un->un_retry_statp ==
14807				    kstat_runq_back_to_waitq)) {
14808					statp = kstat_waitq_to_runq;
14809				}
14810#if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14811				saved_statp = un->un_retry_statp;
14812#endif
14813				un->un_retry_statp = NULL;
14814
14815				SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
14816				    "sd_start_cmds: un:0x%p: GOT retry_bp:0x%p "
14817				    "un_throttle:%d un_ncmds_in_transport:%d\n",
14818				    un, un->un_retry_bp, un->un_throttle,
14819				    un->un_ncmds_in_transport);
14820			} else {
14821				SD_TRACE(SD_LOG_IO_CORE, un, "sd_start_cmds: "
14822				    "processing priority bp:0x%p\n", bp);
14823			}
14824
14825		} else if ((bp = un->un_waitq_headp) != NULL) {
14826			/*
14827			 * A command on the waitq is ready to go, but do not
14828			 * send it if:
14829			 *
14830			 * (1) the throttle limit has been reached, or
14831			 * (2) a retry is pending, or
14832			 * (3) a START_STOP_UNIT callback pending, or
14833			 * (4) a callback for a SD_PATH_DIRECT_PRIORITY
14834			 *	command is pending.
14835			 *
14836			 * For all of these conditions, IO processing will
14837			 * restart after the condition is cleared.
14838			 */
14839			if (un->un_ncmds_in_transport >= un->un_throttle) {
14840				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14841				    "sd_start_cmds: exiting, "
14842				    "throttle limit reached!\n");
14843				goto exit;
14844			}
14845			if (un->un_retry_bp != NULL) {
14846				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14847				    "sd_start_cmds: exiting, retry pending!\n");
14848				goto exit;
14849			}
14850			if (un->un_startstop_timeid != NULL) {
14851				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14852				    "sd_start_cmds: exiting, "
14853				    "START_STOP pending!\n");
14854				goto exit;
14855			}
14856			if (un->un_direct_priority_timeid != NULL) {
14857				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14858				    "sd_start_cmds: exiting, "
14859				    "SD_PATH_DIRECT_PRIORITY cmd. pending!\n");
14860				goto exit;
14861			}
14862
14863			/* Dequeue the command */
14864			un->un_waitq_headp = bp->av_forw;
14865			if (un->un_waitq_headp == NULL) {
14866				un->un_waitq_tailp = NULL;
14867			}
14868			bp->av_forw = NULL;
14869			statp = kstat_waitq_to_runq;
14870			SD_TRACE(SD_LOG_IO_CORE, un,
14871			    "sd_start_cmds: processing waitq bp:0x%p\n", bp);
14872
14873		} else {
14874			/* No work to do so bail out now */
14875			SD_TRACE(SD_LOG_IO_CORE, un,
14876			    "sd_start_cmds: no more work, exiting!\n");
14877			goto exit;
14878		}
14879
14880		/*
14881		 * Reset the state to normal. This is the mechanism by which
14882		 * the state transitions from either SD_STATE_RWAIT or
14883		 * SD_STATE_OFFLINE to SD_STATE_NORMAL.
14884		 * If state is SD_STATE_PM_CHANGING then this command is
14885		 * part of the device power control and the state must
14886		 * not be put back to normal. Doing so would would
14887		 * allow new commands to proceed when they shouldn't,
14888		 * the device may be going off.
14889		 */
14890		if ((un->un_state != SD_STATE_SUSPENDED) &&
14891		    (un->un_state != SD_STATE_PM_CHANGING)) {
14892			New_state(un, SD_STATE_NORMAL);
14893		}
14894
14895		xp = SD_GET_XBUF(bp);
14896		ASSERT(xp != NULL);
14897
14898#if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14899		/*
14900		 * Allocate the scsi_pkt if we need one, or attach DMA
14901		 * resources if we have a scsi_pkt that needs them. The
14902		 * latter should only occur for commands that are being
14903		 * retried.
14904		 */
14905		if ((xp->xb_pktp == NULL) ||
14906		    ((xp->xb_pkt_flags & SD_XB_DMA_FREED) != 0)) {
14907#else
14908		if (xp->xb_pktp == NULL) {
14909#endif
14910			/*
14911			 * There is no scsi_pkt allocated for this buf. Call
14912			 * the initpkt function to allocate & init one.
14913			 *
14914			 * The scsi_init_pkt runout callback functionality is
14915			 * implemented as follows:
14916			 *
14917			 * 1) The initpkt function always calls
14918			 *    scsi_init_pkt(9F) with sdrunout specified as the
14919			 *    callback routine.
14920			 * 2) A successful packet allocation is initialized and
14921			 *    the I/O is transported.
14922			 * 3) The I/O associated with an allocation resource
14923			 *    failure is left on its queue to be retried via
14924			 *    runout or the next I/O.
14925			 * 4) The I/O associated with a DMA error is removed
14926			 *    from the queue and failed with EIO. Processing of
14927			 *    the transport queues is also halted to be
14928			 *    restarted via runout or the next I/O.
14929			 * 5) The I/O associated with a CDB size or packet
14930			 *    size error is removed from the queue and failed
14931			 *    with EIO. Processing of the transport queues is
14932			 *    continued.
14933			 *
14934			 * Note: there is no interface for canceling a runout
14935			 * callback. To prevent the driver from detaching or
14936			 * suspending while a runout is pending the driver
14937			 * state is set to SD_STATE_RWAIT
14938			 *
14939			 * Note: using the scsi_init_pkt callback facility can
14940			 * result in an I/O request persisting at the head of
14941			 * the list which cannot be satisfied even after
14942			 * multiple retries. In the future the driver may
14943			 * implement some kind of maximum runout count before
14944			 * failing an I/O.
14945			 *
14946			 * Note: the use of funcp below may seem superfluous,
14947			 * but it helps warlock figure out the correct
14948			 * initpkt function calls (see [s]sd.wlcmd).
14949			 */
14950			struct scsi_pkt	*pktp;
14951			int (*funcp)(struct buf *bp, struct scsi_pkt **pktp);
14952
14953			ASSERT(bp != un->un_rqs_bp);
14954
14955			funcp = sd_initpkt_map[xp->xb_chain_iostart];
14956			switch ((*funcp)(bp, &pktp)) {
14957			case  SD_PKT_ALLOC_SUCCESS:
14958				xp->xb_pktp = pktp;
14959				SD_TRACE(SD_LOG_IO_CORE, un,
14960				    "sd_start_cmd: SD_PKT_ALLOC_SUCCESS 0x%p\n",
14961				    pktp);
14962				goto got_pkt;
14963
14964			case SD_PKT_ALLOC_FAILURE:
14965				/*
14966				 * Temporary (hopefully) resource depletion.
14967				 * Since retries and RQS commands always have a
14968				 * scsi_pkt allocated, these cases should never
14969				 * get here. So the only cases this needs to
14970				 * handle is a bp from the waitq (which we put
14971				 * back onto the waitq for sdrunout), or a bp
14972				 * sent as an immed_bp (which we just fail).
14973				 */
14974				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14975				    "sd_start_cmds: SD_PKT_ALLOC_FAILURE\n");
14976
14977#if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14978
14979				if (bp == immed_bp) {
14980					/*
14981					 * If SD_XB_DMA_FREED is clear, then
14982					 * this is a failure to allocate a
14983					 * scsi_pkt, and we must fail the
14984					 * command.
14985					 */
14986					if ((xp->xb_pkt_flags &
14987					    SD_XB_DMA_FREED) == 0) {
14988						break;
14989					}
14990
14991					/*
14992					 * If this immediate command is NOT our
14993					 * un_retry_bp, then we must fail it.
14994					 */
14995					if (bp != un->un_retry_bp) {
14996						break;
14997					}
14998
14999					/*
15000					 * We get here if this cmd is our
15001					 * un_retry_bp that was DMAFREED, but
15002					 * scsi_init_pkt() failed to reallocate
15003					 * DMA resources when we attempted to
15004					 * retry it. This can happen when an
15005					 * mpxio failover is in progress, but
15006					 * we don't want to just fail the
15007					 * command in this case.
15008					 *
15009					 * Use timeout(9F) to restart it after
15010					 * a 100ms delay.  We don't want to
15011					 * let sdrunout() restart it, because
15012					 * sdrunout() is just supposed to start
15013					 * commands that are sitting on the
15014					 * wait queue.  The un_retry_bp stays
15015					 * set until the command completes, but
15016					 * sdrunout can be called many times
15017					 * before that happens.  Since sdrunout
15018					 * cannot tell if the un_retry_bp is
15019					 * already in the transport, it could
15020					 * end up calling scsi_transport() for
15021					 * the un_retry_bp multiple times.
15022					 *
15023					 * Also: don't schedule the callback
15024					 * if some other callback is already
15025					 * pending.
15026					 */
15027					if (un->un_retry_statp == NULL) {
15028						/*
15029						 * restore the kstat pointer to
15030						 * keep kstat counts coherent
15031						 * when we do retry the command.
15032						 */
15033						un->un_retry_statp =
15034						    saved_statp;
15035					}
15036
15037					if ((un->un_startstop_timeid == NULL) &&
15038					    (un->un_retry_timeid == NULL) &&
15039					    (un->un_direct_priority_timeid ==
15040					    NULL)) {
15041
15042						un->un_retry_timeid =
15043						    timeout(
15044						    sd_start_retry_command,
15045						    un, SD_RESTART_TIMEOUT);
15046					}
15047					goto exit;
15048				}
15049
15050#else
15051				if (bp == immed_bp) {
15052					break;	/* Just fail the command */
15053				}
15054#endif
15055
15056				/* Add the buf back to the head of the waitq */
15057				bp->av_forw = un->un_waitq_headp;
15058				un->un_waitq_headp = bp;
15059				if (un->un_waitq_tailp == NULL) {
15060					un->un_waitq_tailp = bp;
15061				}
15062				goto exit;
15063
15064			case SD_PKT_ALLOC_FAILURE_NO_DMA:
15065				/*
15066				 * HBA DMA resource failure. Fail the command
15067				 * and continue processing of the queues.
15068				 */
15069				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15070				    "sd_start_cmds: "
15071				    "SD_PKT_ALLOC_FAILURE_NO_DMA\n");
15072				break;
15073
15074			case SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL:
15075				/*
15076				 * Note:x86: Partial DMA mapping not supported
15077				 * for USCSI commands, and all the needed DMA
15078				 * resources were not allocated.
15079				 */
15080				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15081				    "sd_start_cmds: "
15082				    "SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL\n");
15083				break;
15084
15085			case SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL:
15086				/*
15087				 * Note:x86: Request cannot fit into CDB based
15088				 * on lba and len.
15089				 */
15090				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15091				    "sd_start_cmds: "
15092				    "SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL\n");
15093				break;
15094
15095			default:
15096				/* Should NEVER get here! */
15097				panic("scsi_initpkt error");
15098				/*NOTREACHED*/
15099			}
15100
15101			/*
15102			 * Fatal error in allocating a scsi_pkt for this buf.
15103			 * Update kstats & return the buf with an error code.
15104			 * We must use sd_return_failed_command_no_restart() to
15105			 * avoid a recursive call back into sd_start_cmds().
15106			 * However this also means that we must keep processing
15107			 * the waitq here in order to avoid stalling.
15108			 */
15109			if (statp == kstat_waitq_to_runq) {
15110				SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
15111			}
15112			sd_return_failed_command_no_restart(un, bp, EIO);
15113			if (bp == immed_bp) {
15114				/* immed_bp is gone by now, so clear this */
15115				immed_bp = NULL;
15116			}
15117			continue;
15118		}
15119got_pkt:
15120		if (bp == immed_bp) {
15121			/* goto the head of the class.... */
15122			xp->xb_pktp->pkt_flags |= FLAG_HEAD;
15123		}
15124
15125		un->un_ncmds_in_transport++;
15126		SD_UPDATE_KSTATS(un, statp, bp);
15127
15128		/*
15129		 * Call scsi_transport() to send the command to the target.
15130		 * According to SCSA architecture, we must drop the mutex here
15131		 * before calling scsi_transport() in order to avoid deadlock.
15132		 * Note that the scsi_pkt's completion routine can be executed
15133		 * (from interrupt context) even before the call to
15134		 * scsi_transport() returns.
15135		 */
15136		SD_TRACE(SD_LOG_IO_CORE, un,
15137		    "sd_start_cmds: calling scsi_transport()\n");
15138		DTRACE_PROBE1(scsi__transport__dispatch, struct buf *, bp);
15139
15140		mutex_exit(SD_MUTEX(un));
15141		rval = scsi_transport(xp->xb_pktp);
15142		mutex_enter(SD_MUTEX(un));
15143
15144		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15145		    "sd_start_cmds: scsi_transport() returned %d\n", rval);
15146
15147		switch (rval) {
15148		case TRAN_ACCEPT:
15149			/* Clear this with every pkt accepted by the HBA */
15150			un->un_tran_fatal_count = 0;
15151			break;	/* Success; try the next cmd (if any) */
15152
15153		case TRAN_BUSY:
15154			un->un_ncmds_in_transport--;
15155			ASSERT(un->un_ncmds_in_transport >= 0);
15156
15157			/*
15158			 * Don't retry request sense, the sense data
15159			 * is lost when another request is sent.
15160			 * Free up the rqs buf and retry
15161			 * the original failed cmd.  Update kstat.
15162			 */
15163			if (bp == un->un_rqs_bp) {
15164				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
15165				bp = sd_mark_rqs_idle(un, xp);
15166				sd_retry_command(un, bp, SD_RETRIES_STANDARD,
15167				    NULL, NULL, EIO, un->un_busy_timeout / 500,
15168				    kstat_waitq_enter);
15169				goto exit;
15170			}
15171
15172#if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
15173			/*
15174			 * Free the DMA resources for the  scsi_pkt. This will
15175			 * allow mpxio to select another path the next time
15176			 * we call scsi_transport() with this scsi_pkt.
15177			 * See sdintr() for the rationalization behind this.
15178			 */
15179			if ((un->un_f_is_fibre == TRUE) &&
15180			    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
15181			    ((xp->xb_pktp->pkt_flags & FLAG_SENSING) == 0)) {
15182				scsi_dmafree(xp->xb_pktp);
15183				xp->xb_pkt_flags |= SD_XB_DMA_FREED;
15184			}
15185#endif
15186
15187			if (SD_IS_DIRECT_PRIORITY(SD_GET_XBUF(bp))) {
15188				/*
15189				 * Commands that are SD_PATH_DIRECT_PRIORITY
15190				 * are for error recovery situations. These do
15191				 * not use the normal command waitq, so if they
15192				 * get a TRAN_BUSY we cannot put them back onto
15193				 * the waitq for later retry. One possible
15194				 * problem is that there could already be some
15195				 * other command on un_retry_bp that is waiting
15196				 * for this one to complete, so we would be
15197				 * deadlocked if we put this command back onto
15198				 * the waitq for later retry (since un_retry_bp
15199				 * must complete before the driver gets back to
15200				 * commands on the waitq).
15201				 *
15202				 * To avoid deadlock we must schedule a callback
15203				 * that will restart this command after a set
15204				 * interval.  This should keep retrying for as
15205				 * long as the underlying transport keeps
15206				 * returning TRAN_BUSY (just like for other
15207				 * commands).  Use the same timeout interval as
15208				 * for the ordinary TRAN_BUSY retry.
15209				 */
15210				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15211				    "sd_start_cmds: scsi_transport() returned "
15212				    "TRAN_BUSY for DIRECT_PRIORITY cmd!\n");
15213
15214				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
15215				un->un_direct_priority_timeid =
15216				    timeout(sd_start_direct_priority_command,
15217				    bp, un->un_busy_timeout / 500);
15218
15219				goto exit;
15220			}
15221
15222			/*
15223			 * For TRAN_BUSY, we want to reduce the throttle value,
15224			 * unless we are retrying a command.
15225			 */
15226			if (bp != un->un_retry_bp) {
15227				sd_reduce_throttle(un, SD_THROTTLE_TRAN_BUSY);
15228			}
15229
15230			/*
15231			 * Set up the bp to be tried again 10 ms later.
15232			 * Note:x86: Is there a timeout value in the sd_lun
15233			 * for this condition?
15234			 */
15235			sd_set_retry_bp(un, bp, un->un_busy_timeout / 500,
15236			    kstat_runq_back_to_waitq);
15237			goto exit;
15238
15239		case TRAN_FATAL_ERROR:
15240			un->un_tran_fatal_count++;
15241			/* FALLTHRU */
15242
15243		case TRAN_BADPKT:
15244		default:
15245			un->un_ncmds_in_transport--;
15246			ASSERT(un->un_ncmds_in_transport >= 0);
15247
15248			/*
15249			 * If this is our REQUEST SENSE command with a
15250			 * transport error, we must get back the pointers
15251			 * to the original buf, and mark the REQUEST
15252			 * SENSE command as "available".
15253			 */
15254			if (bp == un->un_rqs_bp) {
15255				bp = sd_mark_rqs_idle(un, xp);
15256				xp = SD_GET_XBUF(bp);
15257			} else {
15258				/*
15259				 * Legacy behavior: do not update transport
15260				 * error count for request sense commands.
15261				 */
15262				SD_UPDATE_ERRSTATS(un, sd_transerrs);
15263			}
15264
15265			SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
15266			sd_print_transport_rejected_message(un, xp, rval);
15267
15268			/*
15269			 * This command will be terminated by SD driver due
15270			 * to a fatal transport error. We should post
15271			 * ereport.io.scsi.cmd.disk.tran with driver-assessment
15272			 * of "fail" for any command to indicate this
15273			 * situation.
15274			 */
15275			if (xp->xb_ena > 0) {
15276				ASSERT(un->un_fm_private != NULL);
15277				sfip = un->un_fm_private;
15278				sfip->fm_ssc.ssc_flags |= SSC_FLAGS_TRAN_ABORT;
15279				sd_ssc_extract_info(&sfip->fm_ssc, un,
15280				    xp->xb_pktp, bp, xp);
15281				sd_ssc_post(&sfip->fm_ssc, SD_FM_DRV_FATAL);
15282			}
15283
15284			/*
15285			 * We must use sd_return_failed_command_no_restart() to
15286			 * avoid a recursive call back into sd_start_cmds().
15287			 * However this also means that we must keep processing
15288			 * the waitq here in order to avoid stalling.
15289			 */
15290			sd_return_failed_command_no_restart(un, bp, EIO);
15291
15292			/*
15293			 * Notify any threads waiting in sd_ddi_suspend() that
15294			 * a command completion has occurred.
15295			 */
15296			if (un->un_state == SD_STATE_SUSPENDED) {
15297				cv_broadcast(&un->un_disk_busy_cv);
15298			}
15299
15300			if (bp == immed_bp) {
15301				/* immed_bp is gone by now, so clear this */
15302				immed_bp = NULL;
15303			}
15304			break;
15305		}
15306
15307	} while (immed_bp == NULL);
15308
15309exit:
15310	ASSERT(mutex_owned(SD_MUTEX(un)));
15311	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: exit\n");
15312}
15313
15314
15315/*
15316 *    Function: sd_return_command
15317 *
15318 * Description: Returns a command to its originator (with or without an
15319 *		error).  Also starts commands waiting to be transported
15320 *		to the target.
15321 *
15322 *     Context: May be called from interrupt, kernel, or timeout context
15323 */
15324
15325static void
15326sd_return_command(struct sd_lun *un, struct buf *bp)
15327{
15328	struct sd_xbuf *xp;
15329	struct scsi_pkt *pktp;
15330	struct sd_fm_internal *sfip;
15331
15332	ASSERT(bp != NULL);
15333	ASSERT(un != NULL);
15334	ASSERT(mutex_owned(SD_MUTEX(un)));
15335	ASSERT(bp != un->un_rqs_bp);
15336	xp = SD_GET_XBUF(bp);
15337	ASSERT(xp != NULL);
15338
15339	pktp = SD_GET_PKTP(bp);
15340	sfip = (struct sd_fm_internal *)un->un_fm_private;
15341	ASSERT(sfip != NULL);
15342
15343	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: entry\n");
15344
15345	/*
15346	 * Note: check for the "sdrestart failed" case.
15347	 */
15348	if ((un->un_partial_dma_supported == 1) &&
15349	    ((xp->xb_pkt_flags & SD_XB_USCSICMD) != SD_XB_USCSICMD) &&
15350	    (geterror(bp) == 0) && (xp->xb_dma_resid != 0) &&
15351	    (xp->xb_pktp->pkt_resid == 0)) {
15352
15353		if (sd_setup_next_xfer(un, bp, pktp, xp) != 0) {
15354			/*
15355			 * Successfully set up next portion of cmd
15356			 * transfer, try sending it
15357			 */
15358			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
15359			    NULL, NULL, 0, (clock_t)0, NULL);
15360			sd_start_cmds(un, NULL);
15361			return;	/* Note:x86: need a return here? */
15362		}
15363	}
15364
15365	/*
15366	 * If this is the failfast bp, clear it from un_failfast_bp. This
15367	 * can happen if upon being re-tried the failfast bp either
15368	 * succeeded or encountered another error (possibly even a different
15369	 * error than the one that precipitated the failfast state, but in
15370	 * that case it would have had to exhaust retries as well). Regardless,
15371	 * this should not occur whenever the instance is in the active
15372	 * failfast state.
15373	 */
15374	if (bp == un->un_failfast_bp) {
15375		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
15376		un->un_failfast_bp = NULL;
15377	}
15378
15379	/*
15380	 * Clear the failfast state upon successful completion of ANY cmd.
15381	 */
15382	if (bp->b_error == 0) {
15383		un->un_failfast_state = SD_FAILFAST_INACTIVE;
15384		/*
15385		 * If this is a successful command, but used to be retried,
15386		 * we will take it as a recovered command and post an
15387		 * ereport with driver-assessment of "recovered".
15388		 */
15389		if (xp->xb_ena > 0) {
15390			sd_ssc_extract_info(&sfip->fm_ssc, un, pktp, bp, xp);
15391			sd_ssc_post(&sfip->fm_ssc, SD_FM_DRV_RECOVERY);
15392		}
15393	} else {
15394		/*
15395		 * If this is a failed non-USCSI command we will post an
15396		 * ereport with driver-assessment set accordingly("fail" or
15397		 * "fatal").
15398		 */
15399		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
15400			sd_ssc_extract_info(&sfip->fm_ssc, un, pktp, bp, xp);
15401			sd_ssc_post(&sfip->fm_ssc, SD_FM_DRV_FATAL);
15402		}
15403	}
15404
15405	/*
15406	 * This is used if the command was retried one or more times. Show that
15407	 * we are done with it, and allow processing of the waitq to resume.
15408	 */
15409	if (bp == un->un_retry_bp) {
15410		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15411		    "sd_return_command: un:0x%p: "
15412		    "RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
15413		un->un_retry_bp = NULL;
15414		un->un_retry_statp = NULL;
15415	}
15416
15417	SD_UPDATE_RDWR_STATS(un, bp);
15418	SD_UPDATE_PARTITION_STATS(un, bp);
15419
15420	switch (un->un_state) {
15421	case SD_STATE_SUSPENDED:
15422		/*
15423		 * Notify any threads waiting in sd_ddi_suspend() that
15424		 * a command completion has occurred.
15425		 */
15426		cv_broadcast(&un->un_disk_busy_cv);
15427		break;
15428	default:
15429		sd_start_cmds(un, NULL);
15430		break;
15431	}
15432
15433	/* Return this command up the iodone chain to its originator. */
15434	mutex_exit(SD_MUTEX(un));
15435
15436	(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
15437	xp->xb_pktp = NULL;
15438
15439	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
15440
15441	ASSERT(!mutex_owned(SD_MUTEX(un)));
15442	mutex_enter(SD_MUTEX(un));
15443
15444	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: exit\n");
15445}
15446
15447
15448/*
15449 *    Function: sd_return_failed_command
15450 *
15451 * Description: Command completion when an error occurred.
15452 *
15453 *     Context: May be called from interrupt context
15454 */
15455
15456static void
15457sd_return_failed_command(struct sd_lun *un, struct buf *bp, int errcode)
15458{
15459	ASSERT(bp != NULL);
15460	ASSERT(un != NULL);
15461	ASSERT(mutex_owned(SD_MUTEX(un)));
15462
15463	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15464	    "sd_return_failed_command: entry\n");
15465
15466	/*
15467	 * b_resid could already be nonzero due to a partial data
15468	 * transfer, so do not change it here.
15469	 */
15470	SD_BIOERROR(bp, errcode);
15471
15472	sd_return_command(un, bp);
15473	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15474	    "sd_return_failed_command: exit\n");
15475}
15476
15477
15478/*
15479 *    Function: sd_return_failed_command_no_restart
15480 *
15481 * Description: Same as sd_return_failed_command, but ensures that no
15482 *		call back into sd_start_cmds will be issued.
15483 *
15484 *     Context: May be called from interrupt context
15485 */
15486
15487static void
15488sd_return_failed_command_no_restart(struct sd_lun *un, struct buf *bp,
15489	int errcode)
15490{
15491	struct sd_xbuf *xp;
15492
15493	ASSERT(bp != NULL);
15494	ASSERT(un != NULL);
15495	ASSERT(mutex_owned(SD_MUTEX(un)));
15496	xp = SD_GET_XBUF(bp);
15497	ASSERT(xp != NULL);
15498	ASSERT(errcode != 0);
15499
15500	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15501	    "sd_return_failed_command_no_restart: entry\n");
15502
15503	/*
15504	 * b_resid could already be nonzero due to a partial data
15505	 * transfer, so do not change it here.
15506	 */
15507	SD_BIOERROR(bp, errcode);
15508
15509	/*
15510	 * If this is the failfast bp, clear it. This can happen if the
15511	 * failfast bp encounterd a fatal error when we attempted to
15512	 * re-try it (such as a scsi_transport(9F) failure).  However
15513	 * we should NOT be in an active failfast state if the failfast
15514	 * bp is not NULL.
15515	 */
15516	if (bp == un->un_failfast_bp) {
15517		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
15518		un->un_failfast_bp = NULL;
15519	}
15520
15521	if (bp == un->un_retry_bp) {
15522		/*
15523		 * This command was retried one or more times. Show that we are
15524		 * done with it, and allow processing of the waitq to resume.
15525		 */
15526		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15527		    "sd_return_failed_command_no_restart: "
15528		    " un:0x%p: RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
15529		un->un_retry_bp = NULL;
15530		un->un_retry_statp = NULL;
15531	}
15532
15533	SD_UPDATE_RDWR_STATS(un, bp);
15534	SD_UPDATE_PARTITION_STATS(un, bp);
15535
15536	mutex_exit(SD_MUTEX(un));
15537
15538	if (xp->xb_pktp != NULL) {
15539		(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
15540		xp->xb_pktp = NULL;
15541	}
15542
15543	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
15544
15545	mutex_enter(SD_MUTEX(un));
15546
15547	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15548	    "sd_return_failed_command_no_restart: exit\n");
15549}
15550
15551
15552/*
15553 *    Function: sd_retry_command
15554 *
15555 * Description: queue up a command for retry, or (optionally) fail it
15556 *		if retry counts are exhausted.
15557 *
15558 *   Arguments: un - Pointer to the sd_lun struct for the target.
15559 *
15560 *		bp - Pointer to the buf for the command to be retried.
15561 *
15562 *		retry_check_flag - Flag to see which (if any) of the retry
15563 *		   counts should be decremented/checked. If the indicated
15564 *		   retry count is exhausted, then the command will not be
15565 *		   retried; it will be failed instead. This should use a
15566 *		   value equal to one of the following:
15567 *
15568 *			SD_RETRIES_NOCHECK
15569 *			SD_RESD_RETRIES_STANDARD
15570 *			SD_RETRIES_VICTIM
15571 *
15572 *		   Optionally may be bitwise-OR'ed with SD_RETRIES_ISOLATE
15573 *		   if the check should be made to see of FLAG_ISOLATE is set
15574 *		   in the pkt. If FLAG_ISOLATE is set, then the command is
15575 *		   not retried, it is simply failed.
15576 *
15577 *		user_funcp - Ptr to function to call before dispatching the
15578 *		   command. May be NULL if no action needs to be performed.
15579 *		   (Primarily intended for printing messages.)
15580 *
15581 *		user_arg - Optional argument to be passed along to
15582 *		   the user_funcp call.
15583 *
15584 *		failure_code - errno return code to set in the bp if the
15585 *		   command is going to be failed.
15586 *
15587 *		retry_delay - Retry delay interval in (clock_t) units. May
15588 *		   be zero which indicates that the retry should be retried
15589 *		   immediately (ie, without an intervening delay).
15590 *
15591 *		statp - Ptr to kstat function to be updated if the command
15592 *		   is queued for a delayed retry. May be NULL if no kstat
15593 *		   update is desired.
15594 *
15595 *     Context: May be called from interrupt context.
15596 */
15597
15598static void
15599sd_retry_command(struct sd_lun *un, struct buf *bp, int retry_check_flag,
15600	void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp, int
15601	code), void *user_arg, int failure_code,  clock_t retry_delay,
15602	void (*statp)(kstat_io_t *))
15603{
15604	struct sd_xbuf	*xp;
15605	struct scsi_pkt	*pktp;
15606	struct sd_fm_internal *sfip;
15607
15608	ASSERT(un != NULL);
15609	ASSERT(mutex_owned(SD_MUTEX(un)));
15610	ASSERT(bp != NULL);
15611	xp = SD_GET_XBUF(bp);
15612	ASSERT(xp != NULL);
15613	pktp = SD_GET_PKTP(bp);
15614	ASSERT(pktp != NULL);
15615
15616	sfip = (struct sd_fm_internal *)un->un_fm_private;
15617	ASSERT(sfip != NULL);
15618
15619	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
15620	    "sd_retry_command: entry: bp:0x%p xp:0x%p\n", bp, xp);
15621
15622	/*
15623	 * If we are syncing or dumping, fail the command to avoid
15624	 * recursively calling back into scsi_transport().
15625	 */
15626	if (ddi_in_panic()) {
15627		goto fail_command_no_log;
15628	}
15629
15630	/*
15631	 * We should never be be retrying a command with FLAG_DIAGNOSE set, so
15632	 * log an error and fail the command.
15633	 */
15634	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
15635		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
15636		    "ERROR, retrying FLAG_DIAGNOSE command.\n");
15637		sd_dump_memory(un, SD_LOG_IO, "CDB",
15638		    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
15639		sd_dump_memory(un, SD_LOG_IO, "Sense Data",
15640		    (uchar_t *)xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
15641		goto fail_command;
15642	}
15643
15644	/*
15645	 * If we are suspended, then put the command onto head of the
15646	 * wait queue since we don't want to start more commands, and
15647	 * clear the un_retry_bp. Next time when we are resumed, will
15648	 * handle the command in the wait queue.
15649	 */
15650	switch (un->un_state) {
15651	case SD_STATE_SUSPENDED:
15652	case SD_STATE_DUMPING:
15653		bp->av_forw = un->un_waitq_headp;
15654		un->un_waitq_headp = bp;
15655		if (un->un_waitq_tailp == NULL) {
15656			un->un_waitq_tailp = bp;
15657		}
15658		if (bp == un->un_retry_bp) {
15659			un->un_retry_bp = NULL;
15660			un->un_retry_statp = NULL;
15661		}
15662		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
15663		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: "
15664		    "exiting; cmd bp:0x%p requeued for SUSPEND/DUMP\n", bp);
15665		return;
15666	default:
15667		break;
15668	}
15669
15670	/*
15671	 * If the caller wants us to check FLAG_ISOLATE, then see if that
15672	 * is set; if it is then we do not want to retry the command.
15673	 * Normally, FLAG_ISOLATE is only used with USCSI cmds.
15674	 */
15675	if ((retry_check_flag & SD_RETRIES_ISOLATE) != 0) {
15676		if ((pktp->pkt_flags & FLAG_ISOLATE) != 0) {
15677			goto fail_command;
15678		}
15679	}
15680
15681
15682	/*
15683	 * If SD_RETRIES_FAILFAST is set, it indicates that either a
15684	 * command timeout or a selection timeout has occurred. This means
15685	 * that we were unable to establish an kind of communication with
15686	 * the target, and subsequent retries and/or commands are likely
15687	 * to encounter similar results and take a long time to complete.
15688	 *
15689	 * If this is a failfast error condition, we need to update the
15690	 * failfast state, even if this bp does not have B_FAILFAST set.
15691	 */
15692	if (retry_check_flag & SD_RETRIES_FAILFAST) {
15693		if (un->un_failfast_state == SD_FAILFAST_ACTIVE) {
15694			ASSERT(un->un_failfast_bp == NULL);
15695			/*
15696			 * If we are already in the active failfast state, and
15697			 * another failfast error condition has been detected,
15698			 * then fail this command if it has B_FAILFAST set.
15699			 * If B_FAILFAST is clear, then maintain the legacy
15700			 * behavior of retrying heroically, even tho this will
15701			 * take a lot more time to fail the command.
15702			 */
15703			if (bp->b_flags & B_FAILFAST) {
15704				goto fail_command;
15705			}
15706		} else {
15707			/*
15708			 * We're not in the active failfast state, but we
15709			 * have a failfast error condition, so we must begin
15710			 * transition to the next state. We do this regardless
15711			 * of whether or not this bp has B_FAILFAST set.
15712			 */
15713			if (un->un_failfast_bp == NULL) {
15714				/*
15715				 * This is the first bp to meet a failfast
15716				 * condition so save it on un_failfast_bp &
15717				 * do normal retry processing. Do not enter
15718				 * active failfast state yet. This marks
15719				 * entry into the "failfast pending" state.
15720				 */
15721				un->un_failfast_bp = bp;
15722
15723			} else if (un->un_failfast_bp == bp) {
15724				/*
15725				 * This is the second time *this* bp has
15726				 * encountered a failfast error condition,
15727				 * so enter active failfast state & flush
15728				 * queues as appropriate.
15729				 */
15730				un->un_failfast_state = SD_FAILFAST_ACTIVE;
15731				un->un_failfast_bp = NULL;
15732				sd_failfast_flushq(un);
15733
15734				/*
15735				 * Fail this bp now if B_FAILFAST set;
15736				 * otherwise continue with retries. (It would
15737				 * be pretty ironic if this bp succeeded on a
15738				 * subsequent retry after we just flushed all
15739				 * the queues).
15740				 */
15741				if (bp->b_flags & B_FAILFAST) {
15742					goto fail_command;
15743				}
15744
15745#if !defined(lint) && !defined(__lint)
15746			} else {
15747				/*
15748				 * If neither of the preceeding conditionals
15749				 * was true, it means that there is some
15750				 * *other* bp that has met an inital failfast
15751				 * condition and is currently either being
15752				 * retried or is waiting to be retried. In
15753				 * that case we should perform normal retry
15754				 * processing on *this* bp, since there is a
15755				 * chance that the current failfast condition
15756				 * is transient and recoverable. If that does
15757				 * not turn out to be the case, then retries
15758				 * will be cleared when the wait queue is
15759				 * flushed anyway.
15760				 */
15761#endif
15762			}
15763		}
15764	} else {
15765		/*
15766		 * SD_RETRIES_FAILFAST is clear, which indicates that we
15767		 * likely were able to at least establish some level of
15768		 * communication with the target and subsequent commands
15769		 * and/or retries are likely to get through to the target,
15770		 * In this case we want to be aggressive about clearing
15771		 * the failfast state. Note that this does not affect
15772		 * the "failfast pending" condition.
15773		 */
15774		un->un_failfast_state = SD_FAILFAST_INACTIVE;
15775	}
15776
15777
15778	/*
15779	 * Check the specified retry count to see if we can still do
15780	 * any retries with this pkt before we should fail it.
15781	 */
15782	switch (retry_check_flag & SD_RETRIES_MASK) {
15783	case SD_RETRIES_VICTIM:
15784		/*
15785		 * Check the victim retry count. If exhausted, then fall
15786		 * thru & check against the standard retry count.
15787		 */
15788		if (xp->xb_victim_retry_count < un->un_victim_retry_count) {
15789			/* Increment count & proceed with the retry */
15790			xp->xb_victim_retry_count++;
15791			break;
15792		}
15793		/* Victim retries exhausted, fall back to std. retries... */
15794		/* FALLTHRU */
15795
15796	case SD_RETRIES_STANDARD:
15797		if (xp->xb_retry_count >= un->un_retry_count) {
15798			/* Retries exhausted, fail the command */
15799			SD_TRACE(SD_LOG_IO_CORE, un,
15800			    "sd_retry_command: retries exhausted!\n");
15801			/*
15802			 * update b_resid for failed SCMD_READ & SCMD_WRITE
15803			 * commands with nonzero pkt_resid.
15804			 */
15805			if ((pktp->pkt_reason == CMD_CMPLT) &&
15806			    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD) &&
15807			    (pktp->pkt_resid != 0)) {
15808				uchar_t op = SD_GET_PKT_OPCODE(pktp) & 0x1F;
15809				if ((op == SCMD_READ) || (op == SCMD_WRITE)) {
15810					SD_UPDATE_B_RESID(bp, pktp);
15811				}
15812			}
15813			goto fail_command;
15814		}
15815		xp->xb_retry_count++;
15816		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15817		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
15818		break;
15819
15820	case SD_RETRIES_UA:
15821		if (xp->xb_ua_retry_count >= sd_ua_retry_count) {
15822			/* Retries exhausted, fail the command */
15823			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15824			    "Unit Attention retries exhausted. "
15825			    "Check the target.\n");
15826			goto fail_command;
15827		}
15828		xp->xb_ua_retry_count++;
15829		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15830		    "sd_retry_command: retry count:%d\n",
15831		    xp->xb_ua_retry_count);
15832		break;
15833
15834	case SD_RETRIES_BUSY:
15835		if (xp->xb_retry_count >= un->un_busy_retry_count) {
15836			/* Retries exhausted, fail the command */
15837			SD_TRACE(SD_LOG_IO_CORE, un,
15838			    "sd_retry_command: retries exhausted!\n");
15839			goto fail_command;
15840		}
15841		xp->xb_retry_count++;
15842		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15843		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
15844		break;
15845
15846	case SD_RETRIES_NOCHECK:
15847	default:
15848		/* No retry count to check. Just proceed with the retry */
15849		break;
15850	}
15851
15852	xp->xb_pktp->pkt_flags |= FLAG_HEAD;
15853
15854	/*
15855	 * If this is a non-USCSI command being retried
15856	 * during execution last time, we should post an ereport with
15857	 * driver-assessment of the value "retry".
15858	 * For partial DMA, request sense and STATUS_QFULL, there are no
15859	 * hardware errors, we bypass ereport posting.
15860	 */
15861	if (failure_code != 0) {
15862		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
15863			sd_ssc_extract_info(&sfip->fm_ssc, un, pktp, bp, xp);
15864			sd_ssc_post(&sfip->fm_ssc, SD_FM_DRV_RETRY);
15865		}
15866	}
15867
15868	/*
15869	 * If we were given a zero timeout, we must attempt to retry the
15870	 * command immediately (ie, without a delay).
15871	 */
15872	if (retry_delay == 0) {
15873		/*
15874		 * Check some limiting conditions to see if we can actually
15875		 * do the immediate retry.  If we cannot, then we must
15876		 * fall back to queueing up a delayed retry.
15877		 */
15878		if (un->un_ncmds_in_transport >= un->un_throttle) {
15879			/*
15880			 * We are at the throttle limit for the target,
15881			 * fall back to delayed retry.
15882			 */
15883			retry_delay = un->un_busy_timeout;
15884			statp = kstat_waitq_enter;
15885			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15886			    "sd_retry_command: immed. retry hit "
15887			    "throttle!\n");
15888		} else {
15889			/*
15890			 * We're clear to proceed with the immediate retry.
15891			 * First call the user-provided function (if any)
15892			 */
15893			if (user_funcp != NULL) {
15894				(*user_funcp)(un, bp, user_arg,
15895				    SD_IMMEDIATE_RETRY_ISSUED);
15896#ifdef __lock_lint
15897				sd_print_incomplete_msg(un, bp, user_arg,
15898				    SD_IMMEDIATE_RETRY_ISSUED);
15899				sd_print_cmd_incomplete_msg(un, bp, user_arg,
15900				    SD_IMMEDIATE_RETRY_ISSUED);
15901				sd_print_sense_failed_msg(un, bp, user_arg,
15902				    SD_IMMEDIATE_RETRY_ISSUED);
15903#endif
15904			}
15905
15906			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15907			    "sd_retry_command: issuing immediate retry\n");
15908
15909			/*
15910			 * Call sd_start_cmds() to transport the command to
15911			 * the target.
15912			 */
15913			sd_start_cmds(un, bp);
15914
15915			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15916			    "sd_retry_command exit\n");
15917			return;
15918		}
15919	}
15920
15921	/*
15922	 * Set up to retry the command after a delay.
15923	 * First call the user-provided function (if any)
15924	 */
15925	if (user_funcp != NULL) {
15926		(*user_funcp)(un, bp, user_arg, SD_DELAYED_RETRY_ISSUED);
15927	}
15928
15929	sd_set_retry_bp(un, bp, retry_delay, statp);
15930
15931	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
15932	return;
15933
15934fail_command:
15935
15936	if (user_funcp != NULL) {
15937		(*user_funcp)(un, bp, user_arg, SD_NO_RETRY_ISSUED);
15938	}
15939
15940fail_command_no_log:
15941
15942	SD_INFO(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15943	    "sd_retry_command: returning failed command\n");
15944
15945	sd_return_failed_command(un, bp, failure_code);
15946
15947	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
15948}
15949
15950
15951/*
15952 *    Function: sd_set_retry_bp
15953 *
15954 * Description: Set up the given bp for retry.
15955 *
15956 *   Arguments: un - ptr to associated softstate
15957 *		bp - ptr to buf(9S) for the command
15958 *		retry_delay - time interval before issuing retry (may be 0)
15959 *		statp - optional pointer to kstat function
15960 *
15961 *     Context: May be called under interrupt context
15962 */
15963
15964static void
15965sd_set_retry_bp(struct sd_lun *un, struct buf *bp, clock_t retry_delay,
15966	void (*statp)(kstat_io_t *))
15967{
15968	ASSERT(un != NULL);
15969	ASSERT(mutex_owned(SD_MUTEX(un)));
15970	ASSERT(bp != NULL);
15971
15972	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
15973	    "sd_set_retry_bp: entry: un:0x%p bp:0x%p\n", un, bp);
15974
15975	/*
15976	 * Indicate that the command is being retried. This will not allow any
15977	 * other commands on the wait queue to be transported to the target
15978	 * until this command has been completed (success or failure). The
15979	 * "retry command" is not transported to the target until the given
15980	 * time delay expires, unless the user specified a 0 retry_delay.
15981	 *
15982	 * Note: the timeout(9F) callback routine is what actually calls
15983	 * sd_start_cmds() to transport the command, with the exception of a
15984	 * zero retry_delay. The only current implementor of a zero retry delay
15985	 * is the case where a START_STOP_UNIT is sent to spin-up a device.
15986	 */
15987	if (un->un_retry_bp == NULL) {
15988		ASSERT(un->un_retry_statp == NULL);
15989		un->un_retry_bp = bp;
15990
15991		/*
15992		 * If the user has not specified a delay the command should
15993		 * be queued and no timeout should be scheduled.
15994		 */
15995		if (retry_delay == 0) {
15996			/*
15997			 * Save the kstat pointer that will be used in the
15998			 * call to SD_UPDATE_KSTATS() below, so that
15999			 * sd_start_cmds() can correctly decrement the waitq
16000			 * count when it is time to transport this command.
16001			 */
16002			un->un_retry_statp = statp;
16003			goto done;
16004		}
16005	}
16006
16007	if (un->un_retry_bp == bp) {
16008		/*
16009		 * Save the kstat pointer that will be used in the call to
16010		 * SD_UPDATE_KSTATS() below, so that sd_start_cmds() can
16011		 * correctly decrement the waitq count when it is time to
16012		 * transport this command.
16013		 */
16014		un->un_retry_statp = statp;
16015
16016		/*
16017		 * Schedule a timeout if:
16018		 *   1) The user has specified a delay.
16019		 *   2) There is not a START_STOP_UNIT callback pending.
16020		 *
16021		 * If no delay has been specified, then it is up to the caller
16022		 * to ensure that IO processing continues without stalling.
16023		 * Effectively, this means that the caller will issue the
16024		 * required call to sd_start_cmds(). The START_STOP_UNIT
16025		 * callback does this after the START STOP UNIT command has
16026		 * completed. In either of these cases we should not schedule
16027		 * a timeout callback here.  Also don't schedule the timeout if
16028		 * an SD_PATH_DIRECT_PRIORITY command is waiting to restart.
16029		 */
16030		if ((retry_delay != 0) && (un->un_startstop_timeid == NULL) &&
16031		    (un->un_direct_priority_timeid == NULL)) {
16032			un->un_retry_timeid =
16033			    timeout(sd_start_retry_command, un, retry_delay);
16034			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16035			    "sd_set_retry_bp: setting timeout: un: 0x%p"
16036			    " bp:0x%p un_retry_timeid:0x%p\n",
16037			    un, bp, un->un_retry_timeid);
16038		}
16039	} else {
16040		/*
16041		 * We only get in here if there is already another command
16042		 * waiting to be retried.  In this case, we just put the
16043		 * given command onto the wait queue, so it can be transported
16044		 * after the current retry command has completed.
16045		 *
16046		 * Also we have to make sure that if the command at the head
16047		 * of the wait queue is the un_failfast_bp, that we do not
16048		 * put ahead of it any other commands that are to be retried.
16049		 */
16050		if ((un->un_failfast_bp != NULL) &&
16051		    (un->un_failfast_bp == un->un_waitq_headp)) {
16052			/*
16053			 * Enqueue this command AFTER the first command on
16054			 * the wait queue (which is also un_failfast_bp).
16055			 */
16056			bp->av_forw = un->un_waitq_headp->av_forw;
16057			un->un_waitq_headp->av_forw = bp;
16058			if (un->un_waitq_headp == un->un_waitq_tailp) {
16059				un->un_waitq_tailp = bp;
16060			}
16061		} else {
16062			/* Enqueue this command at the head of the waitq. */
16063			bp->av_forw = un->un_waitq_headp;
16064			un->un_waitq_headp = bp;
16065			if (un->un_waitq_tailp == NULL) {
16066				un->un_waitq_tailp = bp;
16067			}
16068		}
16069
16070		if (statp == NULL) {
16071			statp = kstat_waitq_enter;
16072		}
16073		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16074		    "sd_set_retry_bp: un:0x%p already delayed retry\n", un);
16075	}
16076
16077done:
16078	if (statp != NULL) {
16079		SD_UPDATE_KSTATS(un, statp, bp);
16080	}
16081
16082	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16083	    "sd_set_retry_bp: exit un:0x%p\n", un);
16084}
16085
16086
16087/*
16088 *    Function: sd_start_retry_command
16089 *
16090 * Description: Start the command that has been waiting on the target's
16091 *		retry queue.  Called from timeout(9F) context after the
16092 *		retry delay interval has expired.
16093 *
16094 *   Arguments: arg - pointer to associated softstate for the device.
16095 *
16096 *     Context: timeout(9F) thread context.  May not sleep.
16097 */
16098
16099static void
16100sd_start_retry_command(void *arg)
16101{
16102	struct sd_lun *un = arg;
16103
16104	ASSERT(un != NULL);
16105	ASSERT(!mutex_owned(SD_MUTEX(un)));
16106
16107	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16108	    "sd_start_retry_command: entry\n");
16109
16110	mutex_enter(SD_MUTEX(un));
16111
16112	un->un_retry_timeid = NULL;
16113
16114	if (un->un_retry_bp != NULL) {
16115		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16116		    "sd_start_retry_command: un:0x%p STARTING bp:0x%p\n",
16117		    un, un->un_retry_bp);
16118		sd_start_cmds(un, un->un_retry_bp);
16119	}
16120
16121	mutex_exit(SD_MUTEX(un));
16122
16123	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16124	    "sd_start_retry_command: exit\n");
16125}
16126
16127/*
16128 *    Function: sd_rmw_msg_print_handler
16129 *
16130 * Description: If RMW mode is enabled and warning message is triggered
16131 *              print I/O count during a fixed interval.
16132 *
16133 *   Arguments: arg - pointer to associated softstate for the device.
16134 *
16135 *     Context: timeout(9F) thread context. May not sleep.
16136 */
16137static void
16138sd_rmw_msg_print_handler(void *arg)
16139{
16140	struct sd_lun *un = arg;
16141
16142	ASSERT(un != NULL);
16143	ASSERT(!mutex_owned(SD_MUTEX(un)));
16144
16145	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16146	    "sd_rmw_msg_print_handler: entry\n");
16147
16148	mutex_enter(SD_MUTEX(un));
16149
16150	if (un->un_rmw_incre_count > 0) {
16151		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16152		    "%"PRIu64" I/O requests are not aligned with %d disk "
16153		    "sector size in %ld seconds. They are handled through "
16154		    "Read Modify Write but the performance is very low!\n",
16155		    un->un_rmw_incre_count, un->un_tgt_blocksize,
16156		    drv_hztousec(SD_RMW_MSG_PRINT_TIMEOUT) / 1000000);
16157		un->un_rmw_incre_count = 0;
16158		un->un_rmw_msg_timeid = timeout(sd_rmw_msg_print_handler,
16159		    un, SD_RMW_MSG_PRINT_TIMEOUT);
16160	} else {
16161		un->un_rmw_msg_timeid = NULL;
16162	}
16163
16164	mutex_exit(SD_MUTEX(un));
16165
16166	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16167	    "sd_rmw_msg_print_handler: exit\n");
16168}
16169
16170/*
16171 *    Function: sd_start_direct_priority_command
16172 *
16173 * Description: Used to re-start an SD_PATH_DIRECT_PRIORITY command that had
16174 *		received TRAN_BUSY when we called scsi_transport() to send it
16175 *		to the underlying HBA. This function is called from timeout(9F)
16176 *		context after the delay interval has expired.
16177 *
16178 *   Arguments: arg - pointer to associated buf(9S) to be restarted.
16179 *
16180 *     Context: timeout(9F) thread context.  May not sleep.
16181 */
16182
16183static void
16184sd_start_direct_priority_command(void *arg)
16185{
16186	struct buf	*priority_bp = arg;
16187	struct sd_lun	*un;
16188
16189	ASSERT(priority_bp != NULL);
16190	un = SD_GET_UN(priority_bp);
16191	ASSERT(un != NULL);
16192	ASSERT(!mutex_owned(SD_MUTEX(un)));
16193
16194	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16195	    "sd_start_direct_priority_command: entry\n");
16196
16197	mutex_enter(SD_MUTEX(un));
16198	un->un_direct_priority_timeid = NULL;
16199	sd_start_cmds(un, priority_bp);
16200	mutex_exit(SD_MUTEX(un));
16201
16202	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16203	    "sd_start_direct_priority_command: exit\n");
16204}
16205
16206
16207/*
16208 *    Function: sd_send_request_sense_command
16209 *
16210 * Description: Sends a REQUEST SENSE command to the target
16211 *
16212 *     Context: May be called from interrupt context.
16213 */
16214
16215static void
16216sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
16217	struct scsi_pkt *pktp)
16218{
16219	ASSERT(bp != NULL);
16220	ASSERT(un != NULL);
16221	ASSERT(mutex_owned(SD_MUTEX(un)));
16222
16223	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_send_request_sense_command: "
16224	    "entry: buf:0x%p\n", bp);
16225
16226	/*
16227	 * If we are syncing or dumping, then fail the command to avoid a
16228	 * recursive callback into scsi_transport(). Also fail the command
16229	 * if we are suspended (legacy behavior).
16230	 */
16231	if (ddi_in_panic() || (un->un_state == SD_STATE_SUSPENDED) ||
16232	    (un->un_state == SD_STATE_DUMPING)) {
16233		sd_return_failed_command(un, bp, EIO);
16234		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16235		    "sd_send_request_sense_command: syncing/dumping, exit\n");
16236		return;
16237	}
16238
16239	/*
16240	 * Retry the failed command and don't issue the request sense if:
16241	 *    1) the sense buf is busy
16242	 *    2) we have 1 or more outstanding commands on the target
16243	 *    (the sense data will be cleared or invalidated any way)
16244	 *
16245	 * Note: There could be an issue with not checking a retry limit here,
16246	 * the problem is determining which retry limit to check.
16247	 */
16248	if ((un->un_sense_isbusy != 0) || (un->un_ncmds_in_transport > 0)) {
16249		/* Don't retry if the command is flagged as non-retryable */
16250		if ((pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
16251			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
16252			    NULL, NULL, 0, un->un_busy_timeout,
16253			    kstat_waitq_enter);
16254			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16255			    "sd_send_request_sense_command: "
16256			    "at full throttle, retrying exit\n");
16257		} else {
16258			sd_return_failed_command(un, bp, EIO);
16259			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16260			    "sd_send_request_sense_command: "
16261			    "at full throttle, non-retryable exit\n");
16262		}
16263		return;
16264	}
16265
16266	sd_mark_rqs_busy(un, bp);
16267	sd_start_cmds(un, un->un_rqs_bp);
16268
16269	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16270	    "sd_send_request_sense_command: exit\n");
16271}
16272
16273
16274/*
16275 *    Function: sd_mark_rqs_busy
16276 *
16277 * Description: Indicate that the request sense bp for this instance is
16278 *		in use.
16279 *
16280 *     Context: May be called under interrupt context
16281 */
16282
16283static void
16284sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp)
16285{
16286	struct sd_xbuf	*sense_xp;
16287
16288	ASSERT(un != NULL);
16289	ASSERT(bp != NULL);
16290	ASSERT(mutex_owned(SD_MUTEX(un)));
16291	ASSERT(un->un_sense_isbusy == 0);
16292
16293	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: entry: "
16294	    "buf:0x%p xp:0x%p un:0x%p\n", bp, SD_GET_XBUF(bp), un);
16295
16296	sense_xp = SD_GET_XBUF(un->un_rqs_bp);
16297	ASSERT(sense_xp != NULL);
16298
16299	SD_INFO(SD_LOG_IO, un,
16300	    "sd_mark_rqs_busy: entry: sense_xp:0x%p\n", sense_xp);
16301
16302	ASSERT(sense_xp->xb_pktp != NULL);
16303	ASSERT((sense_xp->xb_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD))
16304	    == (FLAG_SENSING | FLAG_HEAD));
16305
16306	un->un_sense_isbusy = 1;
16307	un->un_rqs_bp->b_resid = 0;
16308	sense_xp->xb_pktp->pkt_resid  = 0;
16309	sense_xp->xb_pktp->pkt_reason = 0;
16310
16311	/* So we can get back the bp at interrupt time! */
16312	sense_xp->xb_sense_bp = bp;
16313
16314	bzero(un->un_rqs_bp->b_un.b_addr, SENSE_LENGTH);
16315
16316	/*
16317	 * Mark this buf as awaiting sense data. (This is already set in
16318	 * the pkt_flags for the RQS packet.)
16319	 */
16320	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags |= FLAG_SENSING;
16321
16322	/* Request sense down same path */
16323	if (scsi_pkt_allocated_correctly((SD_GET_XBUF(bp))->xb_pktp) &&
16324	    ((SD_GET_XBUF(bp))->xb_pktp)->pkt_path_instance)
16325		sense_xp->xb_pktp->pkt_path_instance =
16326		    ((SD_GET_XBUF(bp))->xb_pktp)->pkt_path_instance;
16327
16328	sense_xp->xb_retry_count	= 0;
16329	sense_xp->xb_victim_retry_count = 0;
16330	sense_xp->xb_ua_retry_count	= 0;
16331	sense_xp->xb_nr_retry_count 	= 0;
16332	sense_xp->xb_dma_resid  = 0;
16333
16334	/* Clean up the fields for auto-request sense */
16335	sense_xp->xb_sense_status = 0;
16336	sense_xp->xb_sense_state  = 0;
16337	sense_xp->xb_sense_resid  = 0;
16338	bzero(sense_xp->xb_sense_data, sizeof (sense_xp->xb_sense_data));
16339
16340	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: exit\n");
16341}
16342
16343
16344/*
16345 *    Function: sd_mark_rqs_idle
16346 *
16347 * Description: SD_MUTEX must be held continuously through this routine
16348 *		to prevent reuse of the rqs struct before the caller can
16349 *		complete it's processing.
16350 *
16351 * Return Code: Pointer to the RQS buf
16352 *
16353 *     Context: May be called under interrupt context
16354 */
16355
16356static struct buf *
16357sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *sense_xp)
16358{
16359	struct buf *bp;
16360	ASSERT(un != NULL);
16361	ASSERT(sense_xp != NULL);
16362	ASSERT(mutex_owned(SD_MUTEX(un)));
16363	ASSERT(un->un_sense_isbusy != 0);
16364
16365	un->un_sense_isbusy = 0;
16366	bp = sense_xp->xb_sense_bp;
16367	sense_xp->xb_sense_bp = NULL;
16368
16369	/* This pkt is no longer interested in getting sense data */
16370	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags &= ~FLAG_SENSING;
16371
16372	return (bp);
16373}
16374
16375
16376
16377/*
16378 *    Function: sd_alloc_rqs
16379 *
16380 * Description: Set up the unit to receive auto request sense data
16381 *
16382 * Return Code: DDI_SUCCESS or DDI_FAILURE
16383 *
16384 *     Context: Called under attach(9E) context
16385 */
16386
16387static int
16388sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un)
16389{
16390	struct sd_xbuf *xp;
16391
16392	ASSERT(un != NULL);
16393	ASSERT(!mutex_owned(SD_MUTEX(un)));
16394	ASSERT(un->un_rqs_bp == NULL);
16395	ASSERT(un->un_rqs_pktp == NULL);
16396
16397	/*
16398	 * First allocate the required buf and scsi_pkt structs, then set up
16399	 * the CDB in the scsi_pkt for a REQUEST SENSE command.
16400	 */
16401	un->un_rqs_bp = scsi_alloc_consistent_buf(&devp->sd_address, NULL,
16402	    MAX_SENSE_LENGTH, B_READ, SLEEP_FUNC, NULL);
16403	if (un->un_rqs_bp == NULL) {
16404		return (DDI_FAILURE);
16405	}
16406
16407	un->un_rqs_pktp = scsi_init_pkt(&devp->sd_address, NULL, un->un_rqs_bp,
16408	    CDB_GROUP0, 1, 0, PKT_CONSISTENT, SLEEP_FUNC, NULL);
16409
16410	if (un->un_rqs_pktp == NULL) {
16411		sd_free_rqs(un);
16412		return (DDI_FAILURE);
16413	}
16414
16415	/* Set up the CDB in the scsi_pkt for a REQUEST SENSE command. */
16416	(void) scsi_setup_cdb((union scsi_cdb *)un->un_rqs_pktp->pkt_cdbp,
16417	    SCMD_REQUEST_SENSE, 0, MAX_SENSE_LENGTH, 0);
16418
16419	SD_FILL_SCSI1_LUN(un, un->un_rqs_pktp);
16420
16421	/* Set up the other needed members in the ARQ scsi_pkt. */
16422	un->un_rqs_pktp->pkt_comp   = sdintr;
16423	un->un_rqs_pktp->pkt_time   = sd_io_time;
16424	un->un_rqs_pktp->pkt_flags |=
16425	    (FLAG_SENSING | FLAG_HEAD);	/* (1222170) */
16426
16427	/*
16428	 * Allocate  & init the sd_xbuf struct for the RQS command. Do not
16429	 * provide any intpkt, destroypkt routines as we take care of
16430	 * scsi_pkt allocation/freeing here and in sd_free_rqs().
16431	 */
16432	xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
16433	sd_xbuf_init(un, un->un_rqs_bp, xp, SD_CHAIN_NULL, NULL);
16434	xp->xb_pktp = un->un_rqs_pktp;
16435	SD_INFO(SD_LOG_ATTACH_DETACH, un,
16436	    "sd_alloc_rqs: un 0x%p, rqs  xp 0x%p,  pkt 0x%p,  buf 0x%p\n",
16437	    un, xp, un->un_rqs_pktp, un->un_rqs_bp);
16438
16439	/*
16440	 * Save the pointer to the request sense private bp so it can
16441	 * be retrieved in sdintr.
16442	 */
16443	un->un_rqs_pktp->pkt_private = un->un_rqs_bp;
16444	ASSERT(un->un_rqs_bp->b_private == xp);
16445
16446	/*
16447	 * See if the HBA supports auto-request sense for the specified
16448	 * target/lun. If it does, then try to enable it (if not already
16449	 * enabled).
16450	 *
16451	 * Note: For some HBAs (ifp & sf), scsi_ifsetcap will always return
16452	 * failure, while for other HBAs (pln) scsi_ifsetcap will always
16453	 * return success.  However, in both of these cases ARQ is always
16454	 * enabled and scsi_ifgetcap will always return true. The best approach
16455	 * is to issue the scsi_ifgetcap() first, then try the scsi_ifsetcap().
16456	 *
16457	 * The 3rd case is the HBA (adp) always return enabled on
16458	 * scsi_ifgetgetcap even when it's not enable, the best approach
16459	 * is issue a scsi_ifsetcap then a scsi_ifgetcap
16460	 * Note: this case is to circumvent the Adaptec bug. (x86 only)
16461	 */
16462
16463	if (un->un_f_is_fibre == TRUE) {
16464		un->un_f_arq_enabled = TRUE;
16465	} else {
16466#if defined(__i386) || defined(__amd64)
16467		/*
16468		 * Circumvent the Adaptec bug, remove this code when
16469		 * the bug is fixed
16470		 */
16471		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1);
16472#endif
16473		switch (scsi_ifgetcap(SD_ADDRESS(un), "auto-rqsense", 1)) {
16474		case 0:
16475			SD_INFO(SD_LOG_ATTACH_DETACH, un,
16476			    "sd_alloc_rqs: HBA supports ARQ\n");
16477			/*
16478			 * ARQ is supported by this HBA but currently is not
16479			 * enabled. Attempt to enable it and if successful then
16480			 * mark this instance as ARQ enabled.
16481			 */
16482			if (scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1)
16483			    == 1) {
16484				/* Successfully enabled ARQ in the HBA */
16485				SD_INFO(SD_LOG_ATTACH_DETACH, un,
16486				    "sd_alloc_rqs: ARQ enabled\n");
16487				un->un_f_arq_enabled = TRUE;
16488			} else {
16489				/* Could not enable ARQ in the HBA */
16490				SD_INFO(SD_LOG_ATTACH_DETACH, un,
16491				    "sd_alloc_rqs: failed ARQ enable\n");
16492				un->un_f_arq_enabled = FALSE;
16493			}
16494			break;
16495		case 1:
16496			/*
16497			 * ARQ is supported by this HBA and is already enabled.
16498			 * Just mark ARQ as enabled for this instance.
16499			 */
16500			SD_INFO(SD_LOG_ATTACH_DETACH, un,
16501			    "sd_alloc_rqs: ARQ already enabled\n");
16502			un->un_f_arq_enabled = TRUE;
16503			break;
16504		default:
16505			/*
16506			 * ARQ is not supported by this HBA; disable it for this
16507			 * instance.
16508			 */
16509			SD_INFO(SD_LOG_ATTACH_DETACH, un,
16510			    "sd_alloc_rqs: HBA does not support ARQ\n");
16511			un->un_f_arq_enabled = FALSE;
16512			break;
16513		}
16514	}
16515
16516	return (DDI_SUCCESS);
16517}
16518
16519
16520/*
16521 *    Function: sd_free_rqs
16522 *
16523 * Description: Cleanup for the pre-instance RQS command.
16524 *
16525 *     Context: Kernel thread context
16526 */
16527
16528static void
16529sd_free_rqs(struct sd_lun *un)
16530{
16531	ASSERT(un != NULL);
16532
16533	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: entry\n");
16534
16535	/*
16536	 * If consistent memory is bound to a scsi_pkt, the pkt
16537	 * has to be destroyed *before* freeing the consistent memory.
16538	 * Don't change the sequence of this operations.
16539	 * scsi_destroy_pkt() might access memory, which isn't allowed,
16540	 * after it was freed in scsi_free_consistent_buf().
16541	 */
16542	if (un->un_rqs_pktp != NULL) {
16543		scsi_destroy_pkt(un->un_rqs_pktp);
16544		un->un_rqs_pktp = NULL;
16545	}
16546
16547	if (un->un_rqs_bp != NULL) {
16548		struct sd_xbuf *xp = SD_GET_XBUF(un->un_rqs_bp);
16549		if (xp != NULL) {
16550			kmem_free(xp, sizeof (struct sd_xbuf));
16551		}
16552		scsi_free_consistent_buf(un->un_rqs_bp);
16553		un->un_rqs_bp = NULL;
16554	}
16555	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: exit\n");
16556}
16557
16558
16559
16560/*
16561 *    Function: sd_reduce_throttle
16562 *
16563 * Description: Reduces the maximum # of outstanding commands on a
16564 *		target to the current number of outstanding commands.
16565 *		Queues a tiemout(9F) callback to restore the limit
16566 *		after a specified interval has elapsed.
16567 *		Typically used when we get a TRAN_BUSY return code
16568 *		back from scsi_transport().
16569 *
16570 *   Arguments: un - ptr to the sd_lun softstate struct
16571 *		throttle_type: SD_THROTTLE_TRAN_BUSY or SD_THROTTLE_QFULL
16572 *
16573 *     Context: May be called from interrupt context
16574 */
16575
16576static void
16577sd_reduce_throttle(struct sd_lun *un, int throttle_type)
16578{
16579	ASSERT(un != NULL);
16580	ASSERT(mutex_owned(SD_MUTEX(un)));
16581	ASSERT(un->un_ncmds_in_transport >= 0);
16582
16583	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
16584	    "entry: un:0x%p un_throttle:%d un_ncmds_in_transport:%d\n",
16585	    un, un->un_throttle, un->un_ncmds_in_transport);
16586
16587	if (un->un_throttle > 1) {
16588		if (un->un_f_use_adaptive_throttle == TRUE) {
16589			switch (throttle_type) {
16590			case SD_THROTTLE_TRAN_BUSY:
16591				if (un->un_busy_throttle == 0) {
16592					un->un_busy_throttle = un->un_throttle;
16593				}
16594				break;
16595			case SD_THROTTLE_QFULL:
16596				un->un_busy_throttle = 0;
16597				break;
16598			default:
16599				ASSERT(FALSE);
16600			}
16601
16602			if (un->un_ncmds_in_transport > 0) {
16603				un->un_throttle = un->un_ncmds_in_transport;
16604			}
16605
16606		} else {
16607			if (un->un_ncmds_in_transport == 0) {
16608				un->un_throttle = 1;
16609			} else {
16610				un->un_throttle = un->un_ncmds_in_transport;
16611			}
16612		}
16613	}
16614
16615	/* Reschedule the timeout if none is currently active */
16616	if (un->un_reset_throttle_timeid == NULL) {
16617		un->un_reset_throttle_timeid = timeout(sd_restore_throttle,
16618		    un, SD_THROTTLE_RESET_INTERVAL);
16619		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16620		    "sd_reduce_throttle: timeout scheduled!\n");
16621	}
16622
16623	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
16624	    "exit: un:0x%p un_throttle:%d\n", un, un->un_throttle);
16625}
16626
16627
16628
16629/*
16630 *    Function: sd_restore_throttle
16631 *
16632 * Description: Callback function for timeout(9F).  Resets the current
16633 *		value of un->un_throttle to its default.
16634 *
16635 *   Arguments: arg - pointer to associated softstate for the device.
16636 *
16637 *     Context: May be called from interrupt context
16638 */
16639
16640static void
16641sd_restore_throttle(void *arg)
16642{
16643	struct sd_lun	*un = arg;
16644
16645	ASSERT(un != NULL);
16646	ASSERT(!mutex_owned(SD_MUTEX(un)));
16647
16648	mutex_enter(SD_MUTEX(un));
16649
16650	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
16651	    "entry: un:0x%p un_throttle:%d\n", un, un->un_throttle);
16652
16653	un->un_reset_throttle_timeid = NULL;
16654
16655	if (un->un_f_use_adaptive_throttle == TRUE) {
16656		/*
16657		 * If un_busy_throttle is nonzero, then it contains the
16658		 * value that un_throttle was when we got a TRAN_BUSY back
16659		 * from scsi_transport(). We want to revert back to this
16660		 * value.
16661		 *
16662		 * In the QFULL case, the throttle limit will incrementally
16663		 * increase until it reaches max throttle.
16664		 */
16665		if (un->un_busy_throttle > 0) {
16666			un->un_throttle = un->un_busy_throttle;
16667			un->un_busy_throttle = 0;
16668		} else {
16669			/*
16670			 * increase throttle by 10% open gate slowly, schedule
16671			 * another restore if saved throttle has not been
16672			 * reached
16673			 */
16674			short throttle;
16675			if (sd_qfull_throttle_enable) {
16676				throttle = un->un_throttle +
16677				    max((un->un_throttle / 10), 1);
16678				un->un_throttle =
16679				    (throttle < un->un_saved_throttle) ?
16680				    throttle : un->un_saved_throttle;
16681				if (un->un_throttle < un->un_saved_throttle) {
16682					un->un_reset_throttle_timeid =
16683					    timeout(sd_restore_throttle,
16684					    un,
16685					    SD_QFULL_THROTTLE_RESET_INTERVAL);
16686				}
16687			}
16688		}
16689
16690		/*
16691		 * If un_throttle has fallen below the low-water mark, we
16692		 * restore the maximum value here (and allow it to ratchet
16693		 * down again if necessary).
16694		 */
16695		if (un->un_throttle < un->un_min_throttle) {
16696			un->un_throttle = un->un_saved_throttle;
16697		}
16698	} else {
16699		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
16700		    "restoring limit from 0x%x to 0x%x\n",
16701		    un->un_throttle, un->un_saved_throttle);
16702		un->un_throttle = un->un_saved_throttle;
16703	}
16704
16705	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
16706	    "sd_restore_throttle: calling sd_start_cmds!\n");
16707
16708	sd_start_cmds(un, NULL);
16709
16710	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
16711	    "sd_restore_throttle: exit: un:0x%p un_throttle:%d\n",
16712	    un, un->un_throttle);
16713
16714	mutex_exit(SD_MUTEX(un));
16715
16716	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: exit\n");
16717}
16718
16719/*
16720 *    Function: sdrunout
16721 *
16722 * Description: Callback routine for scsi_init_pkt when a resource allocation
16723 *		fails.
16724 *
16725 *   Arguments: arg - a pointer to the sd_lun unit struct for the particular
16726 *		soft state instance.
16727 *
16728 * Return Code: The scsi_init_pkt routine allows for the callback function to
16729 *		return a 0 indicating the callback should be rescheduled or a 1
16730 *		indicating not to reschedule. This routine always returns 1
16731 *		because the driver always provides a callback function to
16732 *		scsi_init_pkt. This results in a callback always being scheduled
16733 *		(via the scsi_init_pkt callback implementation) if a resource
16734 *		failure occurs.
16735 *
16736 *     Context: This callback function may not block or call routines that block
16737 *
16738 *        Note: Using the scsi_init_pkt callback facility can result in an I/O
16739 *		request persisting at the head of the list which cannot be
16740 *		satisfied even after multiple retries. In the future the driver
16741 *		may implement some time of maximum runout count before failing
16742 *		an I/O.
16743 */
16744
16745static int
16746sdrunout(caddr_t arg)
16747{
16748	struct sd_lun	*un = (struct sd_lun *)arg;
16749
16750	ASSERT(un != NULL);
16751	ASSERT(!mutex_owned(SD_MUTEX(un)));
16752
16753	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: entry\n");
16754
16755	mutex_enter(SD_MUTEX(un));
16756	sd_start_cmds(un, NULL);
16757	mutex_exit(SD_MUTEX(un));
16758	/*
16759	 * This callback routine always returns 1 (i.e. do not reschedule)
16760	 * because we always specify sdrunout as the callback handler for
16761	 * scsi_init_pkt inside the call to sd_start_cmds.
16762	 */
16763	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: exit\n");
16764	return (1);
16765}
16766
16767
16768/*
16769 *    Function: sdintr
16770 *
16771 * Description: Completion callback routine for scsi_pkt(9S) structs
16772 *		sent to the HBA driver via scsi_transport(9F).
16773 *
16774 *     Context: Interrupt context
16775 */
16776
16777static void
16778sdintr(struct scsi_pkt *pktp)
16779{
16780	struct buf	*bp;
16781	struct sd_xbuf	*xp;
16782	struct sd_lun	*un;
16783	size_t		actual_len;
16784	sd_ssc_t	*sscp;
16785
16786	ASSERT(pktp != NULL);
16787	bp = (struct buf *)pktp->pkt_private;
16788	ASSERT(bp != NULL);
16789	xp = SD_GET_XBUF(bp);
16790	ASSERT(xp != NULL);
16791	ASSERT(xp->xb_pktp != NULL);
16792	un = SD_GET_UN(bp);
16793	ASSERT(un != NULL);
16794	ASSERT(!mutex_owned(SD_MUTEX(un)));
16795
16796#ifdef SD_FAULT_INJECTION
16797
16798	SD_INFO(SD_LOG_IOERR, un, "sdintr: sdintr calling Fault injection\n");
16799	/* SD FaultInjection */
16800	sd_faultinjection(pktp);
16801
16802#endif /* SD_FAULT_INJECTION */
16803
16804	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: entry: buf:0x%p,"
16805	    " xp:0x%p, un:0x%p\n", bp, xp, un);
16806
16807	mutex_enter(SD_MUTEX(un));
16808
16809	ASSERT(un->un_fm_private != NULL);
16810	sscp = &((struct sd_fm_internal *)(un->un_fm_private))->fm_ssc;
16811	ASSERT(sscp != NULL);
16812
16813	/* Reduce the count of the #commands currently in transport */
16814	un->un_ncmds_in_transport--;
16815	ASSERT(un->un_ncmds_in_transport >= 0);
16816
16817	/* Increment counter to indicate that the callback routine is active */
16818	un->un_in_callback++;
16819
16820	SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
16821
16822#ifdef	SDDEBUG
16823	if (bp == un->un_retry_bp) {
16824		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sdintr: "
16825		    "un:0x%p: GOT retry_bp:0x%p un_ncmds_in_transport:%d\n",
16826		    un, un->un_retry_bp, un->un_ncmds_in_transport);
16827	}
16828#endif
16829
16830	/*
16831	 * If pkt_reason is CMD_DEV_GONE, fail the command, and update the media
16832	 * state if needed.
16833	 */
16834	if (pktp->pkt_reason == CMD_DEV_GONE) {
16835		/* Prevent multiple console messages for the same failure. */
16836		if (un->un_last_pkt_reason != CMD_DEV_GONE) {
16837			un->un_last_pkt_reason = CMD_DEV_GONE;
16838			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16839			    "Command failed to complete...Device is gone\n");
16840		}
16841		if (un->un_mediastate != DKIO_DEV_GONE) {
16842			un->un_mediastate = DKIO_DEV_GONE;
16843			cv_broadcast(&un->un_state_cv);
16844		}
16845		/*
16846		 * If the command happens to be the REQUEST SENSE command,
16847		 * free up the rqs buf and fail the original command.
16848		 */
16849		if (bp == un->un_rqs_bp) {
16850			bp = sd_mark_rqs_idle(un, xp);
16851		}
16852		sd_return_failed_command(un, bp, EIO);
16853		goto exit;
16854	}
16855
16856	if (pktp->pkt_state & STATE_XARQ_DONE) {
16857		SD_TRACE(SD_LOG_COMMON, un,
16858		    "sdintr: extra sense data received. pkt=%p\n", pktp);
16859	}
16860
16861	/*
16862	 * First see if the pkt has auto-request sense data with it....
16863	 * Look at the packet state first so we don't take a performance
16864	 * hit looking at the arq enabled flag unless absolutely necessary.
16865	 */
16866	if ((pktp->pkt_state & STATE_ARQ_DONE) &&
16867	    (un->un_f_arq_enabled == TRUE)) {
16868		/*
16869		 * The HBA did an auto request sense for this command so check
16870		 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
16871		 * driver command that should not be retried.
16872		 */
16873		if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
16874			/*
16875			 * Save the relevant sense info into the xp for the
16876			 * original cmd.
16877			 */
16878			struct scsi_arq_status *asp;
16879			asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
16880			xp->xb_sense_status =
16881			    *((uchar_t *)(&(asp->sts_rqpkt_status)));
16882			xp->xb_sense_state  = asp->sts_rqpkt_state;
16883			xp->xb_sense_resid  = asp->sts_rqpkt_resid;
16884			if (pktp->pkt_state & STATE_XARQ_DONE) {
16885				actual_len = MAX_SENSE_LENGTH -
16886				    xp->xb_sense_resid;
16887				bcopy(&asp->sts_sensedata, xp->xb_sense_data,
16888				    MAX_SENSE_LENGTH);
16889			} else {
16890				if (xp->xb_sense_resid > SENSE_LENGTH) {
16891					actual_len = MAX_SENSE_LENGTH -
16892					    xp->xb_sense_resid;
16893				} else {
16894					actual_len = SENSE_LENGTH -
16895					    xp->xb_sense_resid;
16896				}
16897				if (xp->xb_pkt_flags & SD_XB_USCSICMD) {
16898					if ((((struct uscsi_cmd *)
16899					    (xp->xb_pktinfo))->uscsi_rqlen) >
16900					    actual_len) {
16901						xp->xb_sense_resid =
16902						    (((struct uscsi_cmd *)
16903						    (xp->xb_pktinfo))->
16904						    uscsi_rqlen) - actual_len;
16905					} else {
16906						xp->xb_sense_resid = 0;
16907					}
16908				}
16909				bcopy(&asp->sts_sensedata, xp->xb_sense_data,
16910				    SENSE_LENGTH);
16911			}
16912
16913			/* fail the command */
16914			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16915			    "sdintr: arq done and FLAG_DIAGNOSE set\n");
16916			sd_return_failed_command(un, bp, EIO);
16917			goto exit;
16918		}
16919
16920#if (defined(__i386) || defined(__amd64))	/* DMAFREE for x86 only */
16921		/*
16922		 * We want to either retry or fail this command, so free
16923		 * the DMA resources here.  If we retry the command then
16924		 * the DMA resources will be reallocated in sd_start_cmds().
16925		 * Note that when PKT_DMA_PARTIAL is used, this reallocation
16926		 * causes the *entire* transfer to start over again from the
16927		 * beginning of the request, even for PARTIAL chunks that
16928		 * have already transferred successfully.
16929		 */
16930		if ((un->un_f_is_fibre == TRUE) &&
16931		    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
16932		    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
16933			scsi_dmafree(pktp);
16934			xp->xb_pkt_flags |= SD_XB_DMA_FREED;
16935		}
16936#endif
16937
16938		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16939		    "sdintr: arq done, sd_handle_auto_request_sense\n");
16940
16941		sd_handle_auto_request_sense(un, bp, xp, pktp);
16942		goto exit;
16943	}
16944
16945	/* Next see if this is the REQUEST SENSE pkt for the instance */
16946	if (pktp->pkt_flags & FLAG_SENSING)  {
16947		/* This pktp is from the unit's REQUEST_SENSE command */
16948		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16949		    "sdintr: sd_handle_request_sense\n");
16950		sd_handle_request_sense(un, bp, xp, pktp);
16951		goto exit;
16952	}
16953
16954	/*
16955	 * Check to see if the command successfully completed as requested;
16956	 * this is the most common case (and also the hot performance path).
16957	 *
16958	 * Requirements for successful completion are:
16959	 * pkt_reason is CMD_CMPLT and packet status is status good.
16960	 * In addition:
16961	 * - A residual of zero indicates successful completion no matter what
16962	 *   the command is.
16963	 * - If the residual is not zero and the command is not a read or
16964	 *   write, then it's still defined as successful completion. In other
16965	 *   words, if the command is a read or write the residual must be
16966	 *   zero for successful completion.
16967	 * - If the residual is not zero and the command is a read or
16968	 *   write, and it's a USCSICMD, then it's still defined as
16969	 *   successful completion.
16970	 */
16971	if ((pktp->pkt_reason == CMD_CMPLT) &&
16972	    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD)) {
16973
16974		/*
16975		 * Since this command is returned with a good status, we
16976		 * can reset the count for Sonoma failover.
16977		 */
16978		un->un_sonoma_failure_count = 0;
16979
16980		/*
16981		 * Return all USCSI commands on good status
16982		 */
16983		if (pktp->pkt_resid == 0) {
16984			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16985			    "sdintr: returning command for resid == 0\n");
16986		} else if (((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_READ) &&
16987		    ((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_WRITE)) {
16988			SD_UPDATE_B_RESID(bp, pktp);
16989			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16990			    "sdintr: returning command for resid != 0\n");
16991		} else if (xp->xb_pkt_flags & SD_XB_USCSICMD) {
16992			SD_UPDATE_B_RESID(bp, pktp);
16993			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16994			    "sdintr: returning uscsi command\n");
16995		} else {
16996			goto not_successful;
16997		}
16998		sd_return_command(un, bp);
16999
17000		/*
17001		 * Decrement counter to indicate that the callback routine
17002		 * is done.
17003		 */
17004		un->un_in_callback--;
17005		ASSERT(un->un_in_callback >= 0);
17006		mutex_exit(SD_MUTEX(un));
17007
17008		return;
17009	}
17010
17011not_successful:
17012
17013#if (defined(__i386) || defined(__amd64))	/* DMAFREE for x86 only */
17014	/*
17015	 * The following is based upon knowledge of the underlying transport
17016	 * and its use of DMA resources.  This code should be removed when
17017	 * PKT_DMA_PARTIAL support is taken out of the disk driver in favor
17018	 * of the new PKT_CMD_BREAKUP protocol. See also sd_initpkt_for_buf()
17019	 * and sd_start_cmds().
17020	 *
17021	 * Free any DMA resources associated with this command if there
17022	 * is a chance it could be retried or enqueued for later retry.
17023	 * If we keep the DMA binding then mpxio cannot reissue the
17024	 * command on another path whenever a path failure occurs.
17025	 *
17026	 * Note that when PKT_DMA_PARTIAL is used, free/reallocation
17027	 * causes the *entire* transfer to start over again from the
17028	 * beginning of the request, even for PARTIAL chunks that
17029	 * have already transferred successfully.
17030	 *
17031	 * This is only done for non-uscsi commands (and also skipped for the
17032	 * driver's internal RQS command). Also just do this for Fibre Channel
17033	 * devices as these are the only ones that support mpxio.
17034	 */
17035	if ((un->un_f_is_fibre == TRUE) &&
17036	    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
17037	    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
17038		scsi_dmafree(pktp);
17039		xp->xb_pkt_flags |= SD_XB_DMA_FREED;
17040	}
17041#endif
17042
17043	/*
17044	 * The command did not successfully complete as requested so check
17045	 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
17046	 * driver command that should not be retried so just return. If
17047	 * FLAG_DIAGNOSE is not set the error will be processed below.
17048	 */
17049	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
17050		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17051		    "sdintr: FLAG_DIAGNOSE: sd_return_failed_command\n");
17052		/*
17053		 * Issue a request sense if a check condition caused the error
17054		 * (we handle the auto request sense case above), otherwise
17055		 * just fail the command.
17056		 */
17057		if ((pktp->pkt_reason == CMD_CMPLT) &&
17058		    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK)) {
17059			sd_send_request_sense_command(un, bp, pktp);
17060		} else {
17061			sd_return_failed_command(un, bp, EIO);
17062		}
17063		goto exit;
17064	}
17065
17066	/*
17067	 * The command did not successfully complete as requested so process
17068	 * the error, retry, and/or attempt recovery.
17069	 */
17070	switch (pktp->pkt_reason) {
17071	case CMD_CMPLT:
17072		switch (SD_GET_PKT_STATUS(pktp)) {
17073		case STATUS_GOOD:
17074			/*
17075			 * The command completed successfully with a non-zero
17076			 * residual
17077			 */
17078			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17079			    "sdintr: STATUS_GOOD \n");
17080			sd_pkt_status_good(un, bp, xp, pktp);
17081			break;
17082
17083		case STATUS_CHECK:
17084		case STATUS_TERMINATED:
17085			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17086			    "sdintr: STATUS_TERMINATED | STATUS_CHECK\n");
17087			sd_pkt_status_check_condition(un, bp, xp, pktp);
17088			break;
17089
17090		case STATUS_BUSY:
17091			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17092			    "sdintr: STATUS_BUSY\n");
17093			sd_pkt_status_busy(un, bp, xp, pktp);
17094			break;
17095
17096		case STATUS_RESERVATION_CONFLICT:
17097			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17098			    "sdintr: STATUS_RESERVATION_CONFLICT\n");
17099			sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
17100			break;
17101
17102		case STATUS_QFULL:
17103			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17104			    "sdintr: STATUS_QFULL\n");
17105			sd_pkt_status_qfull(un, bp, xp, pktp);
17106			break;
17107
17108		case STATUS_MET:
17109		case STATUS_INTERMEDIATE:
17110		case STATUS_SCSI2:
17111		case STATUS_INTERMEDIATE_MET:
17112		case STATUS_ACA_ACTIVE:
17113			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17114			    "Unexpected SCSI status received: 0x%x\n",
17115			    SD_GET_PKT_STATUS(pktp));
17116			/*
17117			 * Mark the ssc_flags when detected invalid status
17118			 * code for non-USCSI command.
17119			 */
17120			if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
17121				sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_STATUS,
17122				    0, "stat-code");
17123			}
17124			sd_return_failed_command(un, bp, EIO);
17125			break;
17126
17127		default:
17128			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17129			    "Invalid SCSI status received: 0x%x\n",
17130			    SD_GET_PKT_STATUS(pktp));
17131			if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
17132				sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_STATUS,
17133				    0, "stat-code");
17134			}
17135			sd_return_failed_command(un, bp, EIO);
17136			break;
17137
17138		}
17139		break;
17140
17141	case CMD_INCOMPLETE:
17142		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17143		    "sdintr:  CMD_INCOMPLETE\n");
17144		sd_pkt_reason_cmd_incomplete(un, bp, xp, pktp);
17145		break;
17146	case CMD_TRAN_ERR:
17147		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17148		    "sdintr: CMD_TRAN_ERR\n");
17149		sd_pkt_reason_cmd_tran_err(un, bp, xp, pktp);
17150		break;
17151	case CMD_RESET:
17152		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17153		    "sdintr: CMD_RESET \n");
17154		sd_pkt_reason_cmd_reset(un, bp, xp, pktp);
17155		break;
17156	case CMD_ABORTED:
17157		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17158		    "sdintr: CMD_ABORTED \n");
17159		sd_pkt_reason_cmd_aborted(un, bp, xp, pktp);
17160		break;
17161	case CMD_TIMEOUT:
17162		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17163		    "sdintr: CMD_TIMEOUT\n");
17164		sd_pkt_reason_cmd_timeout(un, bp, xp, pktp);
17165		break;
17166	case CMD_UNX_BUS_FREE:
17167		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17168		    "sdintr: CMD_UNX_BUS_FREE \n");
17169		sd_pkt_reason_cmd_unx_bus_free(un, bp, xp, pktp);
17170		break;
17171	case CMD_TAG_REJECT:
17172		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17173		    "sdintr: CMD_TAG_REJECT\n");
17174		sd_pkt_reason_cmd_tag_reject(un, bp, xp, pktp);
17175		break;
17176	default:
17177		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17178		    "sdintr: default\n");
17179		/*
17180		 * Mark the ssc_flags for detecting invliad pkt_reason.
17181		 */
17182		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
17183			sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_PKT_REASON,
17184			    0, "pkt-reason");
17185		}
17186		sd_pkt_reason_default(un, bp, xp, pktp);
17187		break;
17188	}
17189
17190exit:
17191	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: exit\n");
17192
17193	/* Decrement counter to indicate that the callback routine is done. */
17194	un->un_in_callback--;
17195	ASSERT(un->un_in_callback >= 0);
17196
17197	/*
17198	 * At this point, the pkt has been dispatched, ie, it is either
17199	 * being re-tried or has been returned to its caller and should
17200	 * not be referenced.
17201	 */
17202
17203	mutex_exit(SD_MUTEX(un));
17204}
17205
17206
17207/*
17208 *    Function: sd_print_incomplete_msg
17209 *
17210 * Description: Prints the error message for a CMD_INCOMPLETE error.
17211 *
17212 *   Arguments: un - ptr to associated softstate for the device.
17213 *		bp - ptr to the buf(9S) for the command.
17214 *		arg - message string ptr
17215 *		code - SD_DELAYED_RETRY_ISSUED, SD_IMMEDIATE_RETRY_ISSUED,
17216 *			or SD_NO_RETRY_ISSUED.
17217 *
17218 *     Context: May be called under interrupt context
17219 */
17220
17221static void
17222sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
17223{
17224	struct scsi_pkt	*pktp;
17225	char	*msgp;
17226	char	*cmdp = arg;
17227
17228	ASSERT(un != NULL);
17229	ASSERT(mutex_owned(SD_MUTEX(un)));
17230	ASSERT(bp != NULL);
17231	ASSERT(arg != NULL);
17232	pktp = SD_GET_PKTP(bp);
17233	ASSERT(pktp != NULL);
17234
17235	switch (code) {
17236	case SD_DELAYED_RETRY_ISSUED:
17237	case SD_IMMEDIATE_RETRY_ISSUED:
17238		msgp = "retrying";
17239		break;
17240	case SD_NO_RETRY_ISSUED:
17241	default:
17242		msgp = "giving up";
17243		break;
17244	}
17245
17246	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
17247		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17248		    "incomplete %s- %s\n", cmdp, msgp);
17249	}
17250}
17251
17252
17253
17254/*
17255 *    Function: sd_pkt_status_good
17256 *
17257 * Description: Processing for a STATUS_GOOD code in pkt_status.
17258 *
17259 *     Context: May be called under interrupt context
17260 */
17261
17262static void
17263sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
17264	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17265{
17266	char	*cmdp;
17267
17268	ASSERT(un != NULL);
17269	ASSERT(mutex_owned(SD_MUTEX(un)));
17270	ASSERT(bp != NULL);
17271	ASSERT(xp != NULL);
17272	ASSERT(pktp != NULL);
17273	ASSERT(pktp->pkt_reason == CMD_CMPLT);
17274	ASSERT(SD_GET_PKT_STATUS(pktp) == STATUS_GOOD);
17275	ASSERT(pktp->pkt_resid != 0);
17276
17277	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: entry\n");
17278
17279	SD_UPDATE_ERRSTATS(un, sd_harderrs);
17280	switch (SD_GET_PKT_OPCODE(pktp) & 0x1F) {
17281	case SCMD_READ:
17282		cmdp = "read";
17283		break;
17284	case SCMD_WRITE:
17285		cmdp = "write";
17286		break;
17287	default:
17288		SD_UPDATE_B_RESID(bp, pktp);
17289		sd_return_command(un, bp);
17290		SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
17291		return;
17292	}
17293
17294	/*
17295	 * See if we can retry the read/write, preferrably immediately.
17296	 * If retries are exhaused, then sd_retry_command() will update
17297	 * the b_resid count.
17298	 */
17299	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_incomplete_msg,
17300	    cmdp, EIO, (clock_t)0, NULL);
17301
17302	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
17303}
17304
17305
17306
17307
17308
17309/*
17310 *    Function: sd_handle_request_sense
17311 *
17312 * Description: Processing for non-auto Request Sense command.
17313 *
17314 *   Arguments: un - ptr to associated softstate
17315 *		sense_bp - ptr to buf(9S) for the RQS command
17316 *		sense_xp - ptr to the sd_xbuf for the RQS command
17317 *		sense_pktp - ptr to the scsi_pkt(9S) for the RQS command
17318 *
17319 *     Context: May be called under interrupt context
17320 */
17321
17322static void
17323sd_handle_request_sense(struct sd_lun *un, struct buf *sense_bp,
17324	struct sd_xbuf *sense_xp, struct scsi_pkt *sense_pktp)
17325{
17326	struct buf	*cmd_bp;	/* buf for the original command */
17327	struct sd_xbuf	*cmd_xp;	/* sd_xbuf for the original command */
17328	struct scsi_pkt *cmd_pktp;	/* pkt for the original command */
17329	size_t		actual_len;	/* actual sense data length */
17330
17331	ASSERT(un != NULL);
17332	ASSERT(mutex_owned(SD_MUTEX(un)));
17333	ASSERT(sense_bp != NULL);
17334	ASSERT(sense_xp != NULL);
17335	ASSERT(sense_pktp != NULL);
17336
17337	/*
17338	 * Note the sense_bp, sense_xp, and sense_pktp here are for the
17339	 * RQS command and not the original command.
17340	 */
17341	ASSERT(sense_pktp == un->un_rqs_pktp);
17342	ASSERT(sense_bp   == un->un_rqs_bp);
17343	ASSERT((sense_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD)) ==
17344	    (FLAG_SENSING | FLAG_HEAD));
17345	ASSERT((((SD_GET_XBUF(sense_xp->xb_sense_bp))->xb_pktp->pkt_flags) &
17346	    FLAG_SENSING) == FLAG_SENSING);
17347
17348	/* These are the bp, xp, and pktp for the original command */
17349	cmd_bp = sense_xp->xb_sense_bp;
17350	cmd_xp = SD_GET_XBUF(cmd_bp);
17351	cmd_pktp = SD_GET_PKTP(cmd_bp);
17352
17353	if (sense_pktp->pkt_reason != CMD_CMPLT) {
17354		/*
17355		 * The REQUEST SENSE command failed.  Release the REQUEST
17356		 * SENSE command for re-use, get back the bp for the original
17357		 * command, and attempt to re-try the original command if
17358		 * FLAG_DIAGNOSE is not set in the original packet.
17359		 */
17360		SD_UPDATE_ERRSTATS(un, sd_harderrs);
17361		if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
17362			cmd_bp = sd_mark_rqs_idle(un, sense_xp);
17363			sd_retry_command(un, cmd_bp, SD_RETRIES_STANDARD,
17364			    NULL, NULL, EIO, (clock_t)0, NULL);
17365			return;
17366		}
17367	}
17368
17369	/*
17370	 * Save the relevant sense info into the xp for the original cmd.
17371	 *
17372	 * Note: if the request sense failed the state info will be zero
17373	 * as set in sd_mark_rqs_busy()
17374	 */
17375	cmd_xp->xb_sense_status = *(sense_pktp->pkt_scbp);
17376	cmd_xp->xb_sense_state  = sense_pktp->pkt_state;
17377	actual_len = MAX_SENSE_LENGTH - sense_pktp->pkt_resid;
17378	if ((cmd_xp->xb_pkt_flags & SD_XB_USCSICMD) &&
17379	    (((struct uscsi_cmd *)cmd_xp->xb_pktinfo)->uscsi_rqlen >
17380	    SENSE_LENGTH)) {
17381		bcopy(sense_bp->b_un.b_addr, cmd_xp->xb_sense_data,
17382		    MAX_SENSE_LENGTH);
17383		cmd_xp->xb_sense_resid = sense_pktp->pkt_resid;
17384	} else {
17385		bcopy(sense_bp->b_un.b_addr, cmd_xp->xb_sense_data,
17386		    SENSE_LENGTH);
17387		if (actual_len < SENSE_LENGTH) {
17388			cmd_xp->xb_sense_resid = SENSE_LENGTH - actual_len;
17389		} else {
17390			cmd_xp->xb_sense_resid = 0;
17391		}
17392	}
17393
17394	/*
17395	 *  Free up the RQS command....
17396	 *  NOTE:
17397	 *	Must do this BEFORE calling sd_validate_sense_data!
17398	 *	sd_validate_sense_data may return the original command in
17399	 *	which case the pkt will be freed and the flags can no
17400	 *	longer be touched.
17401	 *	SD_MUTEX is held through this process until the command
17402	 *	is dispatched based upon the sense data, so there are
17403	 *	no race conditions.
17404	 */
17405	(void) sd_mark_rqs_idle(un, sense_xp);
17406
17407	/*
17408	 * For a retryable command see if we have valid sense data, if so then
17409	 * turn it over to sd_decode_sense() to figure out the right course of
17410	 * action. Just fail a non-retryable command.
17411	 */
17412	if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
17413		if (sd_validate_sense_data(un, cmd_bp, cmd_xp, actual_len) ==
17414		    SD_SENSE_DATA_IS_VALID) {
17415			sd_decode_sense(un, cmd_bp, cmd_xp, cmd_pktp);
17416		}
17417	} else {
17418		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Failed CDB",
17419		    (uchar_t *)cmd_pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
17420		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Sense Data",
17421		    (uchar_t *)cmd_xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
17422		sd_return_failed_command(un, cmd_bp, EIO);
17423	}
17424}
17425
17426
17427
17428
17429/*
17430 *    Function: sd_handle_auto_request_sense
17431 *
17432 * Description: Processing for auto-request sense information.
17433 *
17434 *   Arguments: un - ptr to associated softstate
17435 *		bp - ptr to buf(9S) for the command
17436 *		xp - ptr to the sd_xbuf for the command
17437 *		pktp - ptr to the scsi_pkt(9S) for the command
17438 *
17439 *     Context: May be called under interrupt context
17440 */
17441
17442static void
17443sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
17444	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17445{
17446	struct scsi_arq_status *asp;
17447	size_t actual_len;
17448
17449	ASSERT(un != NULL);
17450	ASSERT(mutex_owned(SD_MUTEX(un)));
17451	ASSERT(bp != NULL);
17452	ASSERT(xp != NULL);
17453	ASSERT(pktp != NULL);
17454	ASSERT(pktp != un->un_rqs_pktp);
17455	ASSERT(bp   != un->un_rqs_bp);
17456
17457	/*
17458	 * For auto-request sense, we get a scsi_arq_status back from
17459	 * the HBA, with the sense data in the sts_sensedata member.
17460	 * The pkt_scbp of the packet points to this scsi_arq_status.
17461	 */
17462	asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
17463
17464	if (asp->sts_rqpkt_reason != CMD_CMPLT) {
17465		/*
17466		 * The auto REQUEST SENSE failed; see if we can re-try
17467		 * the original command.
17468		 */
17469		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17470		    "auto request sense failed (reason=%s)\n",
17471		    scsi_rname(asp->sts_rqpkt_reason));
17472
17473		sd_reset_target(un, pktp);
17474
17475		sd_retry_command(un, bp, SD_RETRIES_STANDARD,
17476		    NULL, NULL, EIO, (clock_t)0, NULL);
17477		return;
17478	}
17479
17480	/* Save the relevant sense info into the xp for the original cmd. */
17481	xp->xb_sense_status = *((uchar_t *)(&(asp->sts_rqpkt_status)));
17482	xp->xb_sense_state  = asp->sts_rqpkt_state;
17483	xp->xb_sense_resid  = asp->sts_rqpkt_resid;
17484	if (xp->xb_sense_state & STATE_XARQ_DONE) {
17485		actual_len = MAX_SENSE_LENGTH - xp->xb_sense_resid;
17486		bcopy(&asp->sts_sensedata, xp->xb_sense_data,
17487		    MAX_SENSE_LENGTH);
17488	} else {
17489		if (xp->xb_sense_resid > SENSE_LENGTH) {
17490			actual_len = MAX_SENSE_LENGTH - xp->xb_sense_resid;
17491		} else {
17492			actual_len = SENSE_LENGTH - xp->xb_sense_resid;
17493		}
17494		if (xp->xb_pkt_flags & SD_XB_USCSICMD) {
17495			if ((((struct uscsi_cmd *)
17496			    (xp->xb_pktinfo))->uscsi_rqlen) > actual_len) {
17497				xp->xb_sense_resid = (((struct uscsi_cmd *)
17498				    (xp->xb_pktinfo))->uscsi_rqlen) -
17499				    actual_len;
17500			} else {
17501				xp->xb_sense_resid = 0;
17502			}
17503		}
17504		bcopy(&asp->sts_sensedata, xp->xb_sense_data, SENSE_LENGTH);
17505	}
17506
17507	/*
17508	 * See if we have valid sense data, if so then turn it over to
17509	 * sd_decode_sense() to figure out the right course of action.
17510	 */
17511	if (sd_validate_sense_data(un, bp, xp, actual_len) ==
17512	    SD_SENSE_DATA_IS_VALID) {
17513		sd_decode_sense(un, bp, xp, pktp);
17514	}
17515}
17516
17517
17518/*
17519 *    Function: sd_print_sense_failed_msg
17520 *
17521 * Description: Print log message when RQS has failed.
17522 *
17523 *   Arguments: un - ptr to associated softstate
17524 *		bp - ptr to buf(9S) for the command
17525 *		arg - generic message string ptr
17526 *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
17527 *			or SD_NO_RETRY_ISSUED
17528 *
17529 *     Context: May be called from interrupt context
17530 */
17531
17532static void
17533sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp, void *arg,
17534	int code)
17535{
17536	char	*msgp = arg;
17537
17538	ASSERT(un != NULL);
17539	ASSERT(mutex_owned(SD_MUTEX(un)));
17540	ASSERT(bp != NULL);
17541
17542	if ((code == SD_NO_RETRY_ISSUED) && (msgp != NULL)) {
17543		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, msgp);
17544	}
17545}
17546
17547
17548/*
17549 *    Function: sd_validate_sense_data
17550 *
17551 * Description: Check the given sense data for validity.
17552 *		If the sense data is not valid, the command will
17553 *		be either failed or retried!
17554 *
17555 * Return Code: SD_SENSE_DATA_IS_INVALID
17556 *		SD_SENSE_DATA_IS_VALID
17557 *
17558 *     Context: May be called from interrupt context
17559 */
17560
17561static int
17562sd_validate_sense_data(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
17563	size_t actual_len)
17564{
17565	struct scsi_extended_sense *esp;
17566	struct	scsi_pkt *pktp;
17567	char	*msgp = NULL;
17568	sd_ssc_t *sscp;
17569
17570	ASSERT(un != NULL);
17571	ASSERT(mutex_owned(SD_MUTEX(un)));
17572	ASSERT(bp != NULL);
17573	ASSERT(bp != un->un_rqs_bp);
17574	ASSERT(xp != NULL);
17575	ASSERT(un->un_fm_private != NULL);
17576
17577	pktp = SD_GET_PKTP(bp);
17578	ASSERT(pktp != NULL);
17579
17580	sscp = &((struct sd_fm_internal *)(un->un_fm_private))->fm_ssc;
17581	ASSERT(sscp != NULL);
17582
17583	/*
17584	 * Check the status of the RQS command (auto or manual).
17585	 */
17586	switch (xp->xb_sense_status & STATUS_MASK) {
17587	case STATUS_GOOD:
17588		break;
17589
17590	case STATUS_RESERVATION_CONFLICT:
17591		sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
17592		return (SD_SENSE_DATA_IS_INVALID);
17593
17594	case STATUS_BUSY:
17595		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17596		    "Busy Status on REQUEST SENSE\n");
17597		sd_retry_command(un, bp, SD_RETRIES_BUSY, NULL,
17598		    NULL, EIO, un->un_busy_timeout / 500, kstat_waitq_enter);
17599		return (SD_SENSE_DATA_IS_INVALID);
17600
17601	case STATUS_QFULL:
17602		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17603		    "QFULL Status on REQUEST SENSE\n");
17604		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL,
17605		    NULL, EIO, un->un_busy_timeout / 500, kstat_waitq_enter);
17606		return (SD_SENSE_DATA_IS_INVALID);
17607
17608	case STATUS_CHECK:
17609	case STATUS_TERMINATED:
17610		msgp = "Check Condition on REQUEST SENSE\n";
17611		goto sense_failed;
17612
17613	default:
17614		msgp = "Not STATUS_GOOD on REQUEST_SENSE\n";
17615		goto sense_failed;
17616	}
17617
17618	/*
17619	 * See if we got the minimum required amount of sense data.
17620	 * Note: We are assuming the returned sense data is SENSE_LENGTH bytes
17621	 * or less.
17622	 */
17623	if (((xp->xb_sense_state & STATE_XFERRED_DATA) == 0) ||
17624	    (actual_len == 0)) {
17625		msgp = "Request Sense couldn't get sense data\n";
17626		goto sense_failed;
17627	}
17628
17629	if (actual_len < SUN_MIN_SENSE_LENGTH) {
17630		msgp = "Not enough sense information\n";
17631		/* Mark the ssc_flags for detecting invalid sense data */
17632		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
17633			sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_SENSE, 0,
17634			    "sense-data");
17635		}
17636		goto sense_failed;
17637	}
17638
17639	/*
17640	 * We require the extended sense data
17641	 */
17642	esp = (struct scsi_extended_sense *)xp->xb_sense_data;
17643	if (esp->es_class != CLASS_EXTENDED_SENSE) {
17644		if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
17645			static char tmp[8];
17646			static char buf[148];
17647			char *p = (char *)(xp->xb_sense_data);
17648			int i;
17649
17650			mutex_enter(&sd_sense_mutex);
17651			(void) strcpy(buf, "undecodable sense information:");
17652			for (i = 0; i < actual_len; i++) {
17653				(void) sprintf(tmp, " 0x%x", *(p++)&0xff);
17654				(void) strcpy(&buf[strlen(buf)], tmp);
17655			}
17656			i = strlen(buf);
17657			(void) strcpy(&buf[i], "-(assumed fatal)\n");
17658
17659			if (SD_FM_LOG(un) == SD_FM_LOG_NSUP) {
17660				scsi_log(SD_DEVINFO(un), sd_label,
17661				    CE_WARN, buf);
17662			}
17663			mutex_exit(&sd_sense_mutex);
17664		}
17665
17666		/* Mark the ssc_flags for detecting invalid sense data */
17667		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
17668			sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_SENSE, 0,
17669			    "sense-data");
17670		}
17671
17672		/* Note: Legacy behavior, fail the command with no retry */
17673		sd_return_failed_command(un, bp, EIO);
17674		return (SD_SENSE_DATA_IS_INVALID);
17675	}
17676
17677	/*
17678	 * Check that es_code is valid (es_class concatenated with es_code
17679	 * make up the "response code" field.  es_class will always be 7, so
17680	 * make sure es_code is 0, 1, 2, 3 or 0xf.  es_code will indicate the
17681	 * format.
17682	 */
17683	if ((esp->es_code != CODE_FMT_FIXED_CURRENT) &&
17684	    (esp->es_code != CODE_FMT_FIXED_DEFERRED) &&
17685	    (esp->es_code != CODE_FMT_DESCR_CURRENT) &&
17686	    (esp->es_code != CODE_FMT_DESCR_DEFERRED) &&
17687	    (esp->es_code != CODE_FMT_VENDOR_SPECIFIC)) {
17688		/* Mark the ssc_flags for detecting invalid sense data */
17689		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
17690			sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_SENSE, 0,
17691			    "sense-data");
17692		}
17693		goto sense_failed;
17694	}
17695
17696	return (SD_SENSE_DATA_IS_VALID);
17697
17698sense_failed:
17699	/*
17700	 * If the request sense failed (for whatever reason), attempt
17701	 * to retry the original command.
17702	 */
17703#if defined(__i386) || defined(__amd64)
17704	/*
17705	 * SD_RETRY_DELAY is conditionally compile (#if fibre) in
17706	 * sddef.h for Sparc platform, and x86 uses 1 binary
17707	 * for both SCSI/FC.
17708	 * The SD_RETRY_DELAY value need to be adjusted here
17709	 * when SD_RETRY_DELAY change in sddef.h
17710	 */
17711	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
17712	    sd_print_sense_failed_msg, msgp, EIO,
17713	    un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0, NULL);
17714#else
17715	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
17716	    sd_print_sense_failed_msg, msgp, EIO, SD_RETRY_DELAY, NULL);
17717#endif
17718
17719	return (SD_SENSE_DATA_IS_INVALID);
17720}
17721
17722/*
17723 *    Function: sd_decode_sense
17724 *
17725 * Description: Take recovery action(s) when SCSI Sense Data is received.
17726 *
17727 *     Context: Interrupt context.
17728 */
17729
17730static void
17731sd_decode_sense(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
17732	struct scsi_pkt *pktp)
17733{
17734	uint8_t sense_key;
17735
17736	ASSERT(un != NULL);
17737	ASSERT(mutex_owned(SD_MUTEX(un)));
17738	ASSERT(bp != NULL);
17739	ASSERT(bp != un->un_rqs_bp);
17740	ASSERT(xp != NULL);
17741	ASSERT(pktp != NULL);
17742
17743	sense_key = scsi_sense_key(xp->xb_sense_data);
17744
17745	switch (sense_key) {
17746	case KEY_NO_SENSE:
17747		sd_sense_key_no_sense(un, bp, xp, pktp);
17748		break;
17749	case KEY_RECOVERABLE_ERROR:
17750		sd_sense_key_recoverable_error(un, xp->xb_sense_data,
17751		    bp, xp, pktp);
17752		break;
17753	case KEY_NOT_READY:
17754		sd_sense_key_not_ready(un, xp->xb_sense_data,
17755		    bp, xp, pktp);
17756		break;
17757	case KEY_MEDIUM_ERROR:
17758	case KEY_HARDWARE_ERROR:
17759		sd_sense_key_medium_or_hardware_error(un,
17760		    xp->xb_sense_data, bp, xp, pktp);
17761		break;
17762	case KEY_ILLEGAL_REQUEST:
17763		sd_sense_key_illegal_request(un, bp, xp, pktp);
17764		break;
17765	case KEY_UNIT_ATTENTION:
17766		sd_sense_key_unit_attention(un, xp->xb_sense_data,
17767		    bp, xp, pktp);
17768		break;
17769	case KEY_WRITE_PROTECT:
17770	case KEY_VOLUME_OVERFLOW:
17771	case KEY_MISCOMPARE:
17772		sd_sense_key_fail_command(un, bp, xp, pktp);
17773		break;
17774	case KEY_BLANK_CHECK:
17775		sd_sense_key_blank_check(un, bp, xp, pktp);
17776		break;
17777	case KEY_ABORTED_COMMAND:
17778		sd_sense_key_aborted_command(un, bp, xp, pktp);
17779		break;
17780	case KEY_VENDOR_UNIQUE:
17781	case KEY_COPY_ABORTED:
17782	case KEY_EQUAL:
17783	case KEY_RESERVED:
17784	default:
17785		sd_sense_key_default(un, xp->xb_sense_data,
17786		    bp, xp, pktp);
17787		break;
17788	}
17789}
17790
17791
17792/*
17793 *    Function: sd_dump_memory
17794 *
17795 * Description: Debug logging routine to print the contents of a user provided
17796 *		buffer. The output of the buffer is broken up into 256 byte
17797 *		segments due to a size constraint of the scsi_log.
17798 *		implementation.
17799 *
17800 *   Arguments: un - ptr to softstate
17801 *		comp - component mask
17802 *		title - "title" string to preceed data when printed
17803 *		data - ptr to data block to be printed
17804 *		len - size of data block to be printed
17805 *		fmt - SD_LOG_HEX (use 0x%02x format) or SD_LOG_CHAR (use %c)
17806 *
17807 *     Context: May be called from interrupt context
17808 */
17809
17810#define	SD_DUMP_MEMORY_BUF_SIZE	256
17811
17812static char *sd_dump_format_string[] = {
17813		" 0x%02x",
17814		" %c"
17815};
17816
17817static void
17818sd_dump_memory(struct sd_lun *un, uint_t comp, char *title, uchar_t *data,
17819    int len, int fmt)
17820{
17821	int	i, j;
17822	int	avail_count;
17823	int	start_offset;
17824	int	end_offset;
17825	size_t	entry_len;
17826	char	*bufp;
17827	char	*local_buf;
17828	char	*format_string;
17829
17830	ASSERT((fmt == SD_LOG_HEX) || (fmt == SD_LOG_CHAR));
17831
17832	/*
17833	 * In the debug version of the driver, this function is called from a
17834	 * number of places which are NOPs in the release driver.
17835	 * The debug driver therefore has additional methods of filtering
17836	 * debug output.
17837	 */
17838#ifdef SDDEBUG
17839	/*
17840	 * In the debug version of the driver we can reduce the amount of debug
17841	 * messages by setting sd_error_level to something other than
17842	 * SCSI_ERR_ALL and clearing bits in sd_level_mask and
17843	 * sd_component_mask.
17844	 */
17845	if (((sd_level_mask & (SD_LOGMASK_DUMP_MEM | SD_LOGMASK_DIAG)) == 0) ||
17846	    (sd_error_level != SCSI_ERR_ALL)) {
17847		return;
17848	}
17849	if (((sd_component_mask & comp) == 0) ||
17850	    (sd_error_level != SCSI_ERR_ALL)) {
17851		return;
17852	}
17853#else
17854	if (sd_error_level != SCSI_ERR_ALL) {
17855		return;
17856	}
17857#endif
17858
17859	local_buf = kmem_zalloc(SD_DUMP_MEMORY_BUF_SIZE, KM_SLEEP);
17860	bufp = local_buf;
17861	/*
17862	 * Available length is the length of local_buf[], minus the
17863	 * length of the title string, minus one for the ":", minus
17864	 * one for the newline, minus one for the NULL terminator.
17865	 * This gives the #bytes available for holding the printed
17866	 * values from the given data buffer.
17867	 */
17868	if (fmt == SD_LOG_HEX) {
17869		format_string = sd_dump_format_string[0];
17870	} else /* SD_LOG_CHAR */ {
17871		format_string = sd_dump_format_string[1];
17872	}
17873	/*
17874	 * Available count is the number of elements from the given
17875	 * data buffer that we can fit into the available length.
17876	 * This is based upon the size of the format string used.
17877	 * Make one entry and find it's size.
17878	 */
17879	(void) sprintf(bufp, format_string, data[0]);
17880	entry_len = strlen(bufp);
17881	avail_count = (SD_DUMP_MEMORY_BUF_SIZE - strlen(title) - 3) / entry_len;
17882
17883	j = 0;
17884	while (j < len) {
17885		bufp = local_buf;
17886		bzero(bufp, SD_DUMP_MEMORY_BUF_SIZE);
17887		start_offset = j;
17888
17889		end_offset = start_offset + avail_count;
17890
17891		(void) sprintf(bufp, "%s:", title);
17892		bufp += strlen(bufp);
17893		for (i = start_offset; ((i < end_offset) && (j < len));
17894		    i++, j++) {
17895			(void) sprintf(bufp, format_string, data[i]);
17896			bufp += entry_len;
17897		}
17898		(void) sprintf(bufp, "\n");
17899
17900		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE, "%s", local_buf);
17901	}
17902	kmem_free(local_buf, SD_DUMP_MEMORY_BUF_SIZE);
17903}
17904
17905/*
17906 *    Function: sd_print_sense_msg
17907 *
17908 * Description: Log a message based upon the given sense data.
17909 *
17910 *   Arguments: un - ptr to associated softstate
17911 *		bp - ptr to buf(9S) for the command
17912 *		arg - ptr to associate sd_sense_info struct
17913 *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
17914 *			or SD_NO_RETRY_ISSUED
17915 *
17916 *     Context: May be called from interrupt context
17917 */
17918
17919static void
17920sd_print_sense_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
17921{
17922	struct sd_xbuf	*xp;
17923	struct scsi_pkt	*pktp;
17924	uint8_t *sensep;
17925	daddr_t request_blkno;
17926	diskaddr_t err_blkno;
17927	int severity;
17928	int pfa_flag;
17929	extern struct scsi_key_strings scsi_cmds[];
17930
17931	ASSERT(un != NULL);
17932	ASSERT(mutex_owned(SD_MUTEX(un)));
17933	ASSERT(bp != NULL);
17934	xp = SD_GET_XBUF(bp);
17935	ASSERT(xp != NULL);
17936	pktp = SD_GET_PKTP(bp);
17937	ASSERT(pktp != NULL);
17938	ASSERT(arg != NULL);
17939
17940	severity = ((struct sd_sense_info *)(arg))->ssi_severity;
17941	pfa_flag = ((struct sd_sense_info *)(arg))->ssi_pfa_flag;
17942
17943	if ((code == SD_DELAYED_RETRY_ISSUED) ||
17944	    (code == SD_IMMEDIATE_RETRY_ISSUED)) {
17945		severity = SCSI_ERR_RETRYABLE;
17946	}
17947
17948	/* Use absolute block number for the request block number */
17949	request_blkno = xp->xb_blkno;
17950
17951	/*
17952	 * Now try to get the error block number from the sense data
17953	 */
17954	sensep = xp->xb_sense_data;
17955
17956	if (scsi_sense_info_uint64(sensep, SENSE_LENGTH,
17957	    (uint64_t *)&err_blkno)) {
17958		/*
17959		 * We retrieved the error block number from the information
17960		 * portion of the sense data.
17961		 *
17962		 * For USCSI commands we are better off using the error
17963		 * block no. as the requested block no. (This is the best
17964		 * we can estimate.)
17965		 */
17966		if ((SD_IS_BUFIO(xp) == FALSE) &&
17967		    ((pktp->pkt_flags & FLAG_SILENT) == 0)) {
17968			request_blkno = err_blkno;
17969		}
17970	} else {
17971		/*
17972		 * Without the es_valid bit set (for fixed format) or an
17973		 * information descriptor (for descriptor format) we cannot
17974		 * be certain of the error blkno, so just use the
17975		 * request_blkno.
17976		 */
17977		err_blkno = (diskaddr_t)request_blkno;
17978	}
17979
17980	/*
17981	 * The following will log the buffer contents for the release driver
17982	 * if the SD_LOGMASK_DIAG bit of sd_level_mask is set, or the error
17983	 * level is set to verbose.
17984	 */
17985	sd_dump_memory(un, SD_LOG_IO, "Failed CDB",
17986	    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
17987	sd_dump_memory(un, SD_LOG_IO, "Sense Data",
17988	    (uchar_t *)sensep, SENSE_LENGTH, SD_LOG_HEX);
17989
17990	if (pfa_flag == FALSE) {
17991		/* This is normally only set for USCSI */
17992		if ((pktp->pkt_flags & FLAG_SILENT) != 0) {
17993			return;
17994		}
17995
17996		if ((SD_IS_BUFIO(xp) == TRUE) &&
17997		    (((sd_level_mask & SD_LOGMASK_DIAG) == 0) &&
17998		    (severity < sd_error_level))) {
17999			return;
18000		}
18001	}
18002	/*
18003	 * Check for Sonoma Failover and keep a count of how many failed I/O's
18004	 */
18005	if ((SD_IS_LSI(un)) &&
18006	    (scsi_sense_key(sensep) == KEY_ILLEGAL_REQUEST) &&
18007	    (scsi_sense_asc(sensep) == 0x94) &&
18008	    (scsi_sense_ascq(sensep) == 0x01)) {
18009		un->un_sonoma_failure_count++;
18010		if (un->un_sonoma_failure_count > 1) {
18011			return;
18012		}
18013	}
18014
18015	if (SD_FM_LOG(un) == SD_FM_LOG_NSUP ||
18016	    ((scsi_sense_key(sensep) == KEY_RECOVERABLE_ERROR) &&
18017	    (pktp->pkt_resid == 0))) {
18018		scsi_vu_errmsg(SD_SCSI_DEVP(un), pktp, sd_label, severity,
18019		    request_blkno, err_blkno, scsi_cmds,
18020		    (struct scsi_extended_sense *)sensep,
18021		    un->un_additional_codes, NULL);
18022	}
18023}
18024
18025/*
18026 *    Function: sd_sense_key_no_sense
18027 *
18028 * Description: Recovery action when sense data was not received.
18029 *
18030 *     Context: May be called from interrupt context
18031 */
18032
18033static void
18034sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
18035	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18036{
18037	struct sd_sense_info	si;
18038
18039	ASSERT(un != NULL);
18040	ASSERT(mutex_owned(SD_MUTEX(un)));
18041	ASSERT(bp != NULL);
18042	ASSERT(xp != NULL);
18043	ASSERT(pktp != NULL);
18044
18045	si.ssi_severity = SCSI_ERR_FATAL;
18046	si.ssi_pfa_flag = FALSE;
18047
18048	SD_UPDATE_ERRSTATS(un, sd_softerrs);
18049
18050	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
18051	    &si, EIO, (clock_t)0, NULL);
18052}
18053
18054
18055/*
18056 *    Function: sd_sense_key_recoverable_error
18057 *
18058 * Description: Recovery actions for a SCSI "Recovered Error" sense key.
18059 *
18060 *     Context: May be called from interrupt context
18061 */
18062
18063static void
18064sd_sense_key_recoverable_error(struct sd_lun *un,
18065	uint8_t *sense_datap,
18066	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
18067{
18068	struct sd_sense_info	si;
18069	uint8_t asc = scsi_sense_asc(sense_datap);
18070
18071	ASSERT(un != NULL);
18072	ASSERT(mutex_owned(SD_MUTEX(un)));
18073	ASSERT(bp != NULL);
18074	ASSERT(xp != NULL);
18075	ASSERT(pktp != NULL);
18076
18077	/*
18078	 * 0x5D: FAILURE PREDICTION THRESHOLD EXCEEDED
18079	 */
18080	if ((asc == 0x5D) && (sd_report_pfa != 0)) {
18081		SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
18082		si.ssi_severity = SCSI_ERR_INFO;
18083		si.ssi_pfa_flag = TRUE;
18084	} else {
18085		SD_UPDATE_ERRSTATS(un, sd_softerrs);
18086		SD_UPDATE_ERRSTATS(un, sd_rq_recov_err);
18087		si.ssi_severity = SCSI_ERR_RECOVERED;
18088		si.ssi_pfa_flag = FALSE;
18089	}
18090
18091	if (pktp->pkt_resid == 0) {
18092		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18093		sd_return_command(un, bp);
18094		return;
18095	}
18096
18097	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
18098	    &si, EIO, (clock_t)0, NULL);
18099}
18100
18101
18102
18103
18104/*
18105 *    Function: sd_sense_key_not_ready
18106 *
18107 * Description: Recovery actions for a SCSI "Not Ready" sense key.
18108 *
18109 *     Context: May be called from interrupt context
18110 */
18111
18112static void
18113sd_sense_key_not_ready(struct sd_lun *un,
18114	uint8_t *sense_datap,
18115	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
18116{
18117	struct sd_sense_info	si;
18118	uint8_t asc = scsi_sense_asc(sense_datap);
18119	uint8_t ascq = scsi_sense_ascq(sense_datap);
18120
18121	ASSERT(un != NULL);
18122	ASSERT(mutex_owned(SD_MUTEX(un)));
18123	ASSERT(bp != NULL);
18124	ASSERT(xp != NULL);
18125	ASSERT(pktp != NULL);
18126
18127	si.ssi_severity = SCSI_ERR_FATAL;
18128	si.ssi_pfa_flag = FALSE;
18129
18130	/*
18131	 * Update error stats after first NOT READY error. Disks may have
18132	 * been powered down and may need to be restarted.  For CDROMs,
18133	 * report NOT READY errors only if media is present.
18134	 */
18135	if ((ISCD(un) && (asc == 0x3A)) ||
18136	    (xp->xb_nr_retry_count > 0)) {
18137		SD_UPDATE_ERRSTATS(un, sd_harderrs);
18138		SD_UPDATE_ERRSTATS(un, sd_rq_ntrdy_err);
18139	}
18140
18141	/*
18142	 * Just fail if the "not ready" retry limit has been reached.
18143	 */
18144	if (xp->xb_nr_retry_count >= un->un_notready_retry_count) {
18145		/* Special check for error message printing for removables. */
18146		if (un->un_f_has_removable_media && (asc == 0x04) &&
18147		    (ascq >= 0x04)) {
18148			si.ssi_severity = SCSI_ERR_ALL;
18149		}
18150		goto fail_command;
18151	}
18152
18153	/*
18154	 * Check the ASC and ASCQ in the sense data as needed, to determine
18155	 * what to do.
18156	 */
18157	switch (asc) {
18158	case 0x04:	/* LOGICAL UNIT NOT READY */
18159		/*
18160		 * disk drives that don't spin up result in a very long delay
18161		 * in format without warning messages. We will log a message
18162		 * if the error level is set to verbose.
18163		 */
18164		if (sd_error_level < SCSI_ERR_RETRYABLE) {
18165			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18166			    "logical unit not ready, resetting disk\n");
18167		}
18168
18169		/*
18170		 * There are different requirements for CDROMs and disks for
18171		 * the number of retries.  If a CD-ROM is giving this, it is
18172		 * probably reading TOC and is in the process of getting
18173		 * ready, so we should keep on trying for a long time to make
18174		 * sure that all types of media are taken in account (for
18175		 * some media the drive takes a long time to read TOC).  For
18176		 * disks we do not want to retry this too many times as this
18177		 * can cause a long hang in format when the drive refuses to
18178		 * spin up (a very common failure).
18179		 */
18180		switch (ascq) {
18181		case 0x00:  /* LUN NOT READY, CAUSE NOT REPORTABLE */
18182			/*
18183			 * Disk drives frequently refuse to spin up which
18184			 * results in a very long hang in format without
18185			 * warning messages.
18186			 *
18187			 * Note: This code preserves the legacy behavior of
18188			 * comparing xb_nr_retry_count against zero for fibre
18189			 * channel targets instead of comparing against the
18190			 * un_reset_retry_count value.  The reason for this
18191			 * discrepancy has been so utterly lost beneath the
18192			 * Sands of Time that even Indiana Jones could not
18193			 * find it.
18194			 */
18195			if (un->un_f_is_fibre == TRUE) {
18196				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
18197				    (xp->xb_nr_retry_count > 0)) &&
18198				    (un->un_startstop_timeid == NULL)) {
18199					scsi_log(SD_DEVINFO(un), sd_label,
18200					    CE_WARN, "logical unit not ready, "
18201					    "resetting disk\n");
18202					sd_reset_target(un, pktp);
18203				}
18204			} else {
18205				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
18206				    (xp->xb_nr_retry_count >
18207				    un->un_reset_retry_count)) &&
18208				    (un->un_startstop_timeid == NULL)) {
18209					scsi_log(SD_DEVINFO(un), sd_label,
18210					    CE_WARN, "logical unit not ready, "
18211					    "resetting disk\n");
18212					sd_reset_target(un, pktp);
18213				}
18214			}
18215			break;
18216
18217		case 0x01:  /* LUN IS IN PROCESS OF BECOMING READY */
18218			/*
18219			 * If the target is in the process of becoming
18220			 * ready, just proceed with the retry. This can
18221			 * happen with CD-ROMs that take a long time to
18222			 * read TOC after a power cycle or reset.
18223			 */
18224			goto do_retry;
18225
18226		case 0x02:  /* LUN NOT READY, INITITIALIZING CMD REQUIRED */
18227			break;
18228
18229		case 0x03:  /* LUN NOT READY, MANUAL INTERVENTION REQUIRED */
18230			/*
18231			 * Retries cannot help here so just fail right away.
18232			 */
18233			goto fail_command;
18234
18235		case 0x88:
18236			/*
18237			 * Vendor-unique code for T3/T4: it indicates a
18238			 * path problem in a mutipathed config, but as far as
18239			 * the target driver is concerned it equates to a fatal
18240			 * error, so we should just fail the command right away
18241			 * (without printing anything to the console). If this
18242			 * is not a T3/T4, fall thru to the default recovery
18243			 * action.
18244			 * T3/T4 is FC only, don't need to check is_fibre
18245			 */
18246			if (SD_IS_T3(un) || SD_IS_T4(un)) {
18247				sd_return_failed_command(un, bp, EIO);
18248				return;
18249			}
18250			/* FALLTHRU */
18251
18252		case 0x04:  /* LUN NOT READY, FORMAT IN PROGRESS */
18253		case 0x05:  /* LUN NOT READY, REBUILD IN PROGRESS */
18254		case 0x06:  /* LUN NOT READY, RECALCULATION IN PROGRESS */
18255		case 0x07:  /* LUN NOT READY, OPERATION IN PROGRESS */
18256		case 0x08:  /* LUN NOT READY, LONG WRITE IN PROGRESS */
18257		default:    /* Possible future codes in SCSI spec? */
18258			/*
18259			 * For removable-media devices, do not retry if
18260			 * ASCQ > 2 as these result mostly from USCSI commands
18261			 * on MMC devices issued to check status of an
18262			 * operation initiated in immediate mode.  Also for
18263			 * ASCQ >= 4 do not print console messages as these
18264			 * mainly represent a user-initiated operation
18265			 * instead of a system failure.
18266			 */
18267			if (un->un_f_has_removable_media) {
18268				si.ssi_severity = SCSI_ERR_ALL;
18269				goto fail_command;
18270			}
18271			break;
18272		}
18273
18274		/*
18275		 * As part of our recovery attempt for the NOT READY
18276		 * condition, we issue a START STOP UNIT command. However
18277		 * we want to wait for a short delay before attempting this
18278		 * as there may still be more commands coming back from the
18279		 * target with the check condition. To do this we use
18280		 * timeout(9F) to call sd_start_stop_unit_callback() after
18281		 * the delay interval expires. (sd_start_stop_unit_callback()
18282		 * dispatches sd_start_stop_unit_task(), which will issue
18283		 * the actual START STOP UNIT command. The delay interval
18284		 * is one-half of the delay that we will use to retry the
18285		 * command that generated the NOT READY condition.
18286		 *
18287		 * Note that we could just dispatch sd_start_stop_unit_task()
18288		 * from here and allow it to sleep for the delay interval,
18289		 * but then we would be tying up the taskq thread
18290		 * uncesessarily for the duration of the delay.
18291		 *
18292		 * Do not issue the START STOP UNIT if the current command
18293		 * is already a START STOP UNIT.
18294		 */
18295		if (pktp->pkt_cdbp[0] == SCMD_START_STOP) {
18296			break;
18297		}
18298
18299		/*
18300		 * Do not schedule the timeout if one is already pending.
18301		 */
18302		if (un->un_startstop_timeid != NULL) {
18303			SD_INFO(SD_LOG_ERROR, un,
18304			    "sd_sense_key_not_ready: restart already issued to"
18305			    " %s%d\n", ddi_driver_name(SD_DEVINFO(un)),
18306			    ddi_get_instance(SD_DEVINFO(un)));
18307			break;
18308		}
18309
18310		/*
18311		 * Schedule the START STOP UNIT command, then queue the command
18312		 * for a retry.
18313		 *
18314		 * Note: A timeout is not scheduled for this retry because we
18315		 * want the retry to be serial with the START_STOP_UNIT. The
18316		 * retry will be started when the START_STOP_UNIT is completed
18317		 * in sd_start_stop_unit_task.
18318		 */
18319		un->un_startstop_timeid = timeout(sd_start_stop_unit_callback,
18320		    un, un->un_busy_timeout / 2);
18321		xp->xb_nr_retry_count++;
18322		sd_set_retry_bp(un, bp, 0, kstat_waitq_enter);
18323		return;
18324
18325	case 0x05:	/* LOGICAL UNIT DOES NOT RESPOND TO SELECTION */
18326		if (sd_error_level < SCSI_ERR_RETRYABLE) {
18327			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18328			    "unit does not respond to selection\n");
18329		}
18330		break;
18331
18332	case 0x3A:	/* MEDIUM NOT PRESENT */
18333		if (sd_error_level >= SCSI_ERR_FATAL) {
18334			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18335			    "Caddy not inserted in drive\n");
18336		}
18337
18338		sr_ejected(un);
18339		un->un_mediastate = DKIO_EJECTED;
18340		/* The state has changed, inform the media watch routines */
18341		cv_broadcast(&un->un_state_cv);
18342		/* Just fail if no media is present in the drive. */
18343		goto fail_command;
18344
18345	default:
18346		if (sd_error_level < SCSI_ERR_RETRYABLE) {
18347			scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
18348			    "Unit not Ready. Additional sense code 0x%x\n",
18349			    asc);
18350		}
18351		break;
18352	}
18353
18354do_retry:
18355
18356	/*
18357	 * Retry the command, as some targets may report NOT READY for
18358	 * several seconds after being reset.
18359	 */
18360	xp->xb_nr_retry_count++;
18361	si.ssi_severity = SCSI_ERR_RETRYABLE;
18362	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
18363	    &si, EIO, un->un_busy_timeout, NULL);
18364
18365	return;
18366
18367fail_command:
18368	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18369	sd_return_failed_command(un, bp, EIO);
18370}
18371
18372
18373
18374/*
18375 *    Function: sd_sense_key_medium_or_hardware_error
18376 *
18377 * Description: Recovery actions for a SCSI "Medium Error" or "Hardware Error"
18378 *		sense key.
18379 *
18380 *     Context: May be called from interrupt context
18381 */
18382
18383static void
18384sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
18385	uint8_t *sense_datap,
18386	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
18387{
18388	struct sd_sense_info	si;
18389	uint8_t sense_key = scsi_sense_key(sense_datap);
18390	uint8_t asc = scsi_sense_asc(sense_datap);
18391
18392	ASSERT(un != NULL);
18393	ASSERT(mutex_owned(SD_MUTEX(un)));
18394	ASSERT(bp != NULL);
18395	ASSERT(xp != NULL);
18396	ASSERT(pktp != NULL);
18397
18398	si.ssi_severity = SCSI_ERR_FATAL;
18399	si.ssi_pfa_flag = FALSE;
18400
18401	if (sense_key == KEY_MEDIUM_ERROR) {
18402		SD_UPDATE_ERRSTATS(un, sd_rq_media_err);
18403	}
18404
18405	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18406
18407	if ((un->un_reset_retry_count != 0) &&
18408	    (xp->xb_retry_count == un->un_reset_retry_count)) {
18409		mutex_exit(SD_MUTEX(un));
18410		/* Do NOT do a RESET_ALL here: too intrusive. (4112858) */
18411		if (un->un_f_allow_bus_device_reset == TRUE) {
18412
18413			boolean_t try_resetting_target = B_TRUE;
18414
18415			/*
18416			 * We need to be able to handle specific ASC when we are
18417			 * handling a KEY_HARDWARE_ERROR. In particular
18418			 * taking the default action of resetting the target may
18419			 * not be the appropriate way to attempt recovery.
18420			 * Resetting a target because of a single LUN failure
18421			 * victimizes all LUNs on that target.
18422			 *
18423			 * This is true for the LSI arrays, if an LSI
18424			 * array controller returns an ASC of 0x84 (LUN Dead) we
18425			 * should trust it.
18426			 */
18427
18428			if (sense_key == KEY_HARDWARE_ERROR) {
18429				switch (asc) {
18430				case 0x84:
18431					if (SD_IS_LSI(un)) {
18432						try_resetting_target = B_FALSE;
18433					}
18434					break;
18435				default:
18436					break;
18437				}
18438			}
18439
18440			if (try_resetting_target == B_TRUE) {
18441				int reset_retval = 0;
18442				if (un->un_f_lun_reset_enabled == TRUE) {
18443					SD_TRACE(SD_LOG_IO_CORE, un,
18444					    "sd_sense_key_medium_or_hardware_"
18445					    "error: issuing RESET_LUN\n");
18446					reset_retval =
18447					    scsi_reset(SD_ADDRESS(un),
18448					    RESET_LUN);
18449				}
18450				if (reset_retval == 0) {
18451					SD_TRACE(SD_LOG_IO_CORE, un,
18452					    "sd_sense_key_medium_or_hardware_"
18453					    "error: issuing RESET_TARGET\n");
18454					(void) scsi_reset(SD_ADDRESS(un),
18455					    RESET_TARGET);
18456				}
18457			}
18458		}
18459		mutex_enter(SD_MUTEX(un));
18460	}
18461
18462	/*
18463	 * This really ought to be a fatal error, but we will retry anyway
18464	 * as some drives report this as a spurious error.
18465	 */
18466	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
18467	    &si, EIO, (clock_t)0, NULL);
18468}
18469
18470
18471
18472/*
18473 *    Function: sd_sense_key_illegal_request
18474 *
18475 * Description: Recovery actions for a SCSI "Illegal Request" sense key.
18476 *
18477 *     Context: May be called from interrupt context
18478 */
18479
18480static void
18481sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
18482	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18483{
18484	struct sd_sense_info	si;
18485
18486	ASSERT(un != NULL);
18487	ASSERT(mutex_owned(SD_MUTEX(un)));
18488	ASSERT(bp != NULL);
18489	ASSERT(xp != NULL);
18490	ASSERT(pktp != NULL);
18491
18492	SD_UPDATE_ERRSTATS(un, sd_rq_illrq_err);
18493
18494	si.ssi_severity = SCSI_ERR_INFO;
18495	si.ssi_pfa_flag = FALSE;
18496
18497	/* Pointless to retry if the target thinks it's an illegal request */
18498	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18499	sd_return_failed_command(un, bp, EIO);
18500}
18501
18502
18503
18504
18505/*
18506 *    Function: sd_sense_key_unit_attention
18507 *
18508 * Description: Recovery actions for a SCSI "Unit Attention" sense key.
18509 *
18510 *     Context: May be called from interrupt context
18511 */
18512
18513static void
18514sd_sense_key_unit_attention(struct sd_lun *un,
18515	uint8_t *sense_datap,
18516	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
18517{
18518	/*
18519	 * For UNIT ATTENTION we allow retries for one minute. Devices
18520	 * like Sonoma can return UNIT ATTENTION close to a minute
18521	 * under certain conditions.
18522	 */
18523	int	retry_check_flag = SD_RETRIES_UA;
18524	boolean_t	kstat_updated = B_FALSE;
18525	struct	sd_sense_info		si;
18526	uint8_t asc = scsi_sense_asc(sense_datap);
18527	uint8_t	ascq = scsi_sense_ascq(sense_datap);
18528
18529	ASSERT(un != NULL);
18530	ASSERT(mutex_owned(SD_MUTEX(un)));
18531	ASSERT(bp != NULL);
18532	ASSERT(xp != NULL);
18533	ASSERT(pktp != NULL);
18534
18535	si.ssi_severity = SCSI_ERR_INFO;
18536	si.ssi_pfa_flag = FALSE;
18537
18538
18539	switch (asc) {
18540	case 0x5D:  /* FAILURE PREDICTION THRESHOLD EXCEEDED */
18541		if (sd_report_pfa != 0) {
18542			SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
18543			si.ssi_pfa_flag = TRUE;
18544			retry_check_flag = SD_RETRIES_STANDARD;
18545			goto do_retry;
18546		}
18547
18548		break;
18549
18550	case 0x29:  /* POWER ON, RESET, OR BUS DEVICE RESET OCCURRED */
18551		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
18552			un->un_resvd_status |=
18553			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
18554		}
18555#ifdef _LP64
18556		if (un->un_blockcount + 1 > SD_GROUP1_MAX_ADDRESS) {
18557			if (taskq_dispatch(sd_tq, sd_reenable_dsense_task,
18558			    un, KM_NOSLEEP) == 0) {
18559				/*
18560				 * If we can't dispatch the task we'll just
18561				 * live without descriptor sense.  We can
18562				 * try again on the next "unit attention"
18563				 */
18564				SD_ERROR(SD_LOG_ERROR, un,
18565				    "sd_sense_key_unit_attention: "
18566				    "Could not dispatch "
18567				    "sd_reenable_dsense_task\n");
18568			}
18569		}
18570#endif /* _LP64 */
18571		/* FALLTHRU */
18572
18573	case 0x28: /* NOT READY TO READY CHANGE, MEDIUM MAY HAVE CHANGED */
18574		if (!un->un_f_has_removable_media) {
18575			break;
18576		}
18577
18578		/*
18579		 * When we get a unit attention from a removable-media device,
18580		 * it may be in a state that will take a long time to recover
18581		 * (e.g., from a reset).  Since we are executing in interrupt
18582		 * context here, we cannot wait around for the device to come
18583		 * back. So hand this command off to sd_media_change_task()
18584		 * for deferred processing under taskq thread context. (Note
18585		 * that the command still may be failed if a problem is
18586		 * encountered at a later time.)
18587		 */
18588		if (taskq_dispatch(sd_tq, sd_media_change_task, pktp,
18589		    KM_NOSLEEP) == 0) {
18590			/*
18591			 * Cannot dispatch the request so fail the command.
18592			 */
18593			SD_UPDATE_ERRSTATS(un, sd_harderrs);
18594			SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
18595			si.ssi_severity = SCSI_ERR_FATAL;
18596			sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18597			sd_return_failed_command(un, bp, EIO);
18598		}
18599
18600		/*
18601		 * If failed to dispatch sd_media_change_task(), we already
18602		 * updated kstat. If succeed to dispatch sd_media_change_task(),
18603		 * we should update kstat later if it encounters an error. So,
18604		 * we update kstat_updated flag here.
18605		 */
18606		kstat_updated = B_TRUE;
18607
18608		/*
18609		 * Either the command has been successfully dispatched to a
18610		 * task Q for retrying, or the dispatch failed. In either case
18611		 * do NOT retry again by calling sd_retry_command. This sets up
18612		 * two retries of the same command and when one completes and
18613		 * frees the resources the other will access freed memory,
18614		 * a bad thing.
18615		 */
18616		return;
18617
18618	default:
18619		break;
18620	}
18621
18622	/*
18623	 * ASC  ASCQ
18624	 *  2A   09	Capacity data has changed
18625	 *  2A   01	Mode parameters changed
18626	 *  3F   0E	Reported luns data has changed
18627	 * Arrays that support logical unit expansion should report
18628	 * capacity changes(2Ah/09). Mode parameters changed and
18629	 * reported luns data has changed are the approximation.
18630	 */
18631	if (((asc == 0x2a) && (ascq == 0x09)) ||
18632	    ((asc == 0x2a) && (ascq == 0x01)) ||
18633	    ((asc == 0x3f) && (ascq == 0x0e))) {
18634		if (taskq_dispatch(sd_tq, sd_target_change_task, un,
18635		    KM_NOSLEEP) == 0) {
18636			SD_ERROR(SD_LOG_ERROR, un,
18637			    "sd_sense_key_unit_attention: "
18638			    "Could not dispatch sd_target_change_task\n");
18639		}
18640	}
18641
18642	/*
18643	 * Update kstat if we haven't done that.
18644	 */
18645	if (!kstat_updated) {
18646		SD_UPDATE_ERRSTATS(un, sd_harderrs);
18647		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
18648	}
18649
18650do_retry:
18651	sd_retry_command(un, bp, retry_check_flag, sd_print_sense_msg, &si,
18652	    EIO, SD_UA_RETRY_DELAY, NULL);
18653}
18654
18655
18656
18657/*
18658 *    Function: sd_sense_key_fail_command
18659 *
18660 * Description: Use to fail a command when we don't like the sense key that
18661 *		was returned.
18662 *
18663 *     Context: May be called from interrupt context
18664 */
18665
18666static void
18667sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
18668	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18669{
18670	struct sd_sense_info	si;
18671
18672	ASSERT(un != NULL);
18673	ASSERT(mutex_owned(SD_MUTEX(un)));
18674	ASSERT(bp != NULL);
18675	ASSERT(xp != NULL);
18676	ASSERT(pktp != NULL);
18677
18678	si.ssi_severity = SCSI_ERR_FATAL;
18679	si.ssi_pfa_flag = FALSE;
18680
18681	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18682	sd_return_failed_command(un, bp, EIO);
18683}
18684
18685
18686
18687/*
18688 *    Function: sd_sense_key_blank_check
18689 *
18690 * Description: Recovery actions for a SCSI "Blank Check" sense key.
18691 *		Has no monetary connotation.
18692 *
18693 *     Context: May be called from interrupt context
18694 */
18695
18696static void
18697sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
18698	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18699{
18700	struct sd_sense_info	si;
18701
18702	ASSERT(un != NULL);
18703	ASSERT(mutex_owned(SD_MUTEX(un)));
18704	ASSERT(bp != NULL);
18705	ASSERT(xp != NULL);
18706	ASSERT(pktp != NULL);
18707
18708	/*
18709	 * Blank check is not fatal for removable devices, therefore
18710	 * it does not require a console message.
18711	 */
18712	si.ssi_severity = (un->un_f_has_removable_media) ? SCSI_ERR_ALL :
18713	    SCSI_ERR_FATAL;
18714	si.ssi_pfa_flag = FALSE;
18715
18716	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18717	sd_return_failed_command(un, bp, EIO);
18718}
18719
18720
18721
18722
18723/*
18724 *    Function: sd_sense_key_aborted_command
18725 *
18726 * Description: Recovery actions for a SCSI "Aborted Command" sense key.
18727 *
18728 *     Context: May be called from interrupt context
18729 */
18730
18731static void
18732sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
18733	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18734{
18735	struct sd_sense_info	si;
18736
18737	ASSERT(un != NULL);
18738	ASSERT(mutex_owned(SD_MUTEX(un)));
18739	ASSERT(bp != NULL);
18740	ASSERT(xp != NULL);
18741	ASSERT(pktp != NULL);
18742
18743	si.ssi_severity = SCSI_ERR_FATAL;
18744	si.ssi_pfa_flag = FALSE;
18745
18746	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18747
18748	/*
18749	 * This really ought to be a fatal error, but we will retry anyway
18750	 * as some drives report this as a spurious error.
18751	 */
18752	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
18753	    &si, EIO, drv_usectohz(100000), NULL);
18754}
18755
18756
18757
18758/*
18759 *    Function: sd_sense_key_default
18760 *
18761 * Description: Default recovery action for several SCSI sense keys (basically
18762 *		attempts a retry).
18763 *
18764 *     Context: May be called from interrupt context
18765 */
18766
18767static void
18768sd_sense_key_default(struct sd_lun *un,
18769	uint8_t *sense_datap,
18770	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
18771{
18772	struct sd_sense_info	si;
18773	uint8_t sense_key = scsi_sense_key(sense_datap);
18774
18775	ASSERT(un != NULL);
18776	ASSERT(mutex_owned(SD_MUTEX(un)));
18777	ASSERT(bp != NULL);
18778	ASSERT(xp != NULL);
18779	ASSERT(pktp != NULL);
18780
18781	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18782
18783	/*
18784	 * Undecoded sense key.	Attempt retries and hope that will fix
18785	 * the problem.  Otherwise, we're dead.
18786	 */
18787	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
18788		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18789		    "Unhandled Sense Key '%s'\n", sense_keys[sense_key]);
18790	}
18791
18792	si.ssi_severity = SCSI_ERR_FATAL;
18793	si.ssi_pfa_flag = FALSE;
18794
18795	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
18796	    &si, EIO, (clock_t)0, NULL);
18797}
18798
18799
18800
18801/*
18802 *    Function: sd_print_retry_msg
18803 *
18804 * Description: Print a message indicating the retry action being taken.
18805 *
18806 *   Arguments: un - ptr to associated softstate
18807 *		bp - ptr to buf(9S) for the command
18808 *		arg - not used.
18809 *		flag - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
18810 *			or SD_NO_RETRY_ISSUED
18811 *
18812 *     Context: May be called from interrupt context
18813 */
18814/* ARGSUSED */
18815static void
18816sd_print_retry_msg(struct sd_lun *un, struct buf *bp, void *arg, int flag)
18817{
18818	struct sd_xbuf	*xp;
18819	struct scsi_pkt *pktp;
18820	char *reasonp;
18821	char *msgp;
18822
18823	ASSERT(un != NULL);
18824	ASSERT(mutex_owned(SD_MUTEX(un)));
18825	ASSERT(bp != NULL);
18826	pktp = SD_GET_PKTP(bp);
18827	ASSERT(pktp != NULL);
18828	xp = SD_GET_XBUF(bp);
18829	ASSERT(xp != NULL);
18830
18831	ASSERT(!mutex_owned(&un->un_pm_mutex));
18832	mutex_enter(&un->un_pm_mutex);
18833	if ((un->un_state == SD_STATE_SUSPENDED) ||
18834	    (SD_DEVICE_IS_IN_LOW_POWER(un)) ||
18835	    (pktp->pkt_flags & FLAG_SILENT)) {
18836		mutex_exit(&un->un_pm_mutex);
18837		goto update_pkt_reason;
18838	}
18839	mutex_exit(&un->un_pm_mutex);
18840
18841	/*
18842	 * Suppress messages if they are all the same pkt_reason; with
18843	 * TQ, many (up to 256) are returned with the same pkt_reason.
18844	 * If we are in panic, then suppress the retry messages.
18845	 */
18846	switch (flag) {
18847	case SD_NO_RETRY_ISSUED:
18848		msgp = "giving up";
18849		break;
18850	case SD_IMMEDIATE_RETRY_ISSUED:
18851	case SD_DELAYED_RETRY_ISSUED:
18852		if (ddi_in_panic() || (un->un_state == SD_STATE_OFFLINE) ||
18853		    ((pktp->pkt_reason == un->un_last_pkt_reason) &&
18854		    (sd_error_level != SCSI_ERR_ALL))) {
18855			return;
18856		}
18857		msgp = "retrying command";
18858		break;
18859	default:
18860		goto update_pkt_reason;
18861	}
18862
18863	reasonp = (((pktp->pkt_statistics & STAT_PERR) != 0) ? "parity error" :
18864	    scsi_rname(pktp->pkt_reason));
18865
18866	if (SD_FM_LOG(un) == SD_FM_LOG_NSUP) {
18867		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18868		    "SCSI transport failed: reason '%s': %s\n", reasonp, msgp);
18869	}
18870
18871update_pkt_reason:
18872	/*
18873	 * Update un->un_last_pkt_reason with the value in pktp->pkt_reason.
18874	 * This is to prevent multiple console messages for the same failure
18875	 * condition.  Note that un->un_last_pkt_reason is NOT restored if &
18876	 * when the command is retried successfully because there still may be
18877	 * more commands coming back with the same value of pktp->pkt_reason.
18878	 */
18879	if ((pktp->pkt_reason != CMD_CMPLT) || (xp->xb_retry_count == 0)) {
18880		un->un_last_pkt_reason = pktp->pkt_reason;
18881	}
18882}
18883
18884
18885/*
18886 *    Function: sd_print_cmd_incomplete_msg
18887 *
18888 * Description: Message logging fn. for a SCSA "CMD_INCOMPLETE" pkt_reason.
18889 *
18890 *   Arguments: un - ptr to associated softstate
18891 *		bp - ptr to buf(9S) for the command
18892 *		arg - passed to sd_print_retry_msg()
18893 *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
18894 *			or SD_NO_RETRY_ISSUED
18895 *
18896 *     Context: May be called from interrupt context
18897 */
18898
18899static void
18900sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg,
18901	int code)
18902{
18903	dev_info_t	*dip;
18904
18905	ASSERT(un != NULL);
18906	ASSERT(mutex_owned(SD_MUTEX(un)));
18907	ASSERT(bp != NULL);
18908
18909	switch (code) {
18910	case SD_NO_RETRY_ISSUED:
18911		/* Command was failed. Someone turned off this target? */
18912		if (un->un_state != SD_STATE_OFFLINE) {
18913			/*
18914			 * Suppress message if we are detaching and
18915			 * device has been disconnected
18916			 * Note that DEVI_IS_DEVICE_REMOVED is a consolidation
18917			 * private interface and not part of the DDI
18918			 */
18919			dip = un->un_sd->sd_dev;
18920			if (!(DEVI_IS_DETACHING(dip) &&
18921			    DEVI_IS_DEVICE_REMOVED(dip))) {
18922				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18923				"disk not responding to selection\n");
18924			}
18925			New_state(un, SD_STATE_OFFLINE);
18926		}
18927		break;
18928
18929	case SD_DELAYED_RETRY_ISSUED:
18930	case SD_IMMEDIATE_RETRY_ISSUED:
18931	default:
18932		/* Command was successfully queued for retry */
18933		sd_print_retry_msg(un, bp, arg, code);
18934		break;
18935	}
18936}
18937
18938
18939/*
18940 *    Function: sd_pkt_reason_cmd_incomplete
18941 *
18942 * Description: Recovery actions for a SCSA "CMD_INCOMPLETE" pkt_reason.
18943 *
18944 *     Context: May be called from interrupt context
18945 */
18946
18947static void
18948sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
18949	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18950{
18951	int flag = SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE;
18952
18953	ASSERT(un != NULL);
18954	ASSERT(mutex_owned(SD_MUTEX(un)));
18955	ASSERT(bp != NULL);
18956	ASSERT(xp != NULL);
18957	ASSERT(pktp != NULL);
18958
18959	/* Do not do a reset if selection did not complete */
18960	/* Note: Should this not just check the bit? */
18961	if (pktp->pkt_state != STATE_GOT_BUS) {
18962		SD_UPDATE_ERRSTATS(un, sd_transerrs);
18963		sd_reset_target(un, pktp);
18964	}
18965
18966	/*
18967	 * If the target was not successfully selected, then set
18968	 * SD_RETRIES_FAILFAST to indicate that we lost communication
18969	 * with the target, and further retries and/or commands are
18970	 * likely to take a long time.
18971	 */
18972	if ((pktp->pkt_state & STATE_GOT_TARGET) == 0) {
18973		flag |= SD_RETRIES_FAILFAST;
18974	}
18975
18976	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18977
18978	sd_retry_command(un, bp, flag,
18979	    sd_print_cmd_incomplete_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18980}
18981
18982
18983
18984/*
18985 *    Function: sd_pkt_reason_cmd_tran_err
18986 *
18987 * Description: Recovery actions for a SCSA "CMD_TRAN_ERR" pkt_reason.
18988 *
18989 *     Context: May be called from interrupt context
18990 */
18991
18992static void
18993sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
18994	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18995{
18996	ASSERT(un != NULL);
18997	ASSERT(mutex_owned(SD_MUTEX(un)));
18998	ASSERT(bp != NULL);
18999	ASSERT(xp != NULL);
19000	ASSERT(pktp != NULL);
19001
19002	/*
19003	 * Do not reset if we got a parity error, or if
19004	 * selection did not complete.
19005	 */
19006	SD_UPDATE_ERRSTATS(un, sd_harderrs);
19007	/* Note: Should this not just check the bit for pkt_state? */
19008	if (((pktp->pkt_statistics & STAT_PERR) == 0) &&
19009	    (pktp->pkt_state != STATE_GOT_BUS)) {
19010		SD_UPDATE_ERRSTATS(un, sd_transerrs);
19011		sd_reset_target(un, pktp);
19012	}
19013
19014	SD_UPDATE_RESERVATION_STATUS(un, pktp);
19015
19016	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
19017	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
19018}
19019
19020
19021
19022/*
19023 *    Function: sd_pkt_reason_cmd_reset
19024 *
19025 * Description: Recovery actions for a SCSA "CMD_RESET" pkt_reason.
19026 *
19027 *     Context: May be called from interrupt context
19028 */
19029
19030static void
19031sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
19032	struct sd_xbuf *xp, struct scsi_pkt *pktp)
19033{
19034	ASSERT(un != NULL);
19035	ASSERT(mutex_owned(SD_MUTEX(un)));
19036	ASSERT(bp != NULL);
19037	ASSERT(xp != NULL);
19038	ASSERT(pktp != NULL);
19039
19040	/* The target may still be running the command, so try to reset. */
19041	SD_UPDATE_ERRSTATS(un, sd_transerrs);
19042	sd_reset_target(un, pktp);
19043
19044	SD_UPDATE_RESERVATION_STATUS(un, pktp);
19045
19046	/*
19047	 * If pkt_reason is CMD_RESET chances are that this pkt got
19048	 * reset because another target on this bus caused it. The target
19049	 * that caused it should get CMD_TIMEOUT with pkt_statistics
19050	 * of STAT_TIMEOUT/STAT_DEV_RESET.
19051	 */
19052
19053	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
19054	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
19055}
19056
19057
19058
19059
19060/*
19061 *    Function: sd_pkt_reason_cmd_aborted
19062 *
19063 * Description: Recovery actions for a SCSA "CMD_ABORTED" pkt_reason.
19064 *
19065 *     Context: May be called from interrupt context
19066 */
19067
19068static void
19069sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
19070	struct sd_xbuf *xp, struct scsi_pkt *pktp)
19071{
19072	ASSERT(un != NULL);
19073	ASSERT(mutex_owned(SD_MUTEX(un)));
19074	ASSERT(bp != NULL);
19075	ASSERT(xp != NULL);
19076	ASSERT(pktp != NULL);
19077
19078	/* The target may still be running the command, so try to reset. */
19079	SD_UPDATE_ERRSTATS(un, sd_transerrs);
19080	sd_reset_target(un, pktp);
19081
19082	SD_UPDATE_RESERVATION_STATUS(un, pktp);
19083
19084	/*
19085	 * If pkt_reason is CMD_ABORTED chances are that this pkt got
19086	 * aborted because another target on this bus caused it. The target
19087	 * that caused it should get CMD_TIMEOUT with pkt_statistics
19088	 * of STAT_TIMEOUT/STAT_DEV_RESET.
19089	 */
19090
19091	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
19092	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
19093}
19094
19095
19096
19097/*
19098 *    Function: sd_pkt_reason_cmd_timeout
19099 *
19100 * Description: Recovery actions for a SCSA "CMD_TIMEOUT" pkt_reason.
19101 *
19102 *     Context: May be called from interrupt context
19103 */
19104
19105static void
19106sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
19107	struct sd_xbuf *xp, struct scsi_pkt *pktp)
19108{
19109	ASSERT(un != NULL);
19110	ASSERT(mutex_owned(SD_MUTEX(un)));
19111	ASSERT(bp != NULL);
19112	ASSERT(xp != NULL);
19113	ASSERT(pktp != NULL);
19114
19115
19116	SD_UPDATE_ERRSTATS(un, sd_transerrs);
19117	sd_reset_target(un, pktp);
19118
19119	SD_UPDATE_RESERVATION_STATUS(un, pktp);
19120
19121	/*
19122	 * A command timeout indicates that we could not establish
19123	 * communication with the target, so set SD_RETRIES_FAILFAST
19124	 * as further retries/commands are likely to take a long time.
19125	 */
19126	sd_retry_command(un, bp,
19127	    (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE | SD_RETRIES_FAILFAST),
19128	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
19129}
19130
19131
19132
19133/*
19134 *    Function: sd_pkt_reason_cmd_unx_bus_free
19135 *
19136 * Description: Recovery actions for a SCSA "CMD_UNX_BUS_FREE" pkt_reason.
19137 *
19138 *     Context: May be called from interrupt context
19139 */
19140
19141static void
19142sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
19143	struct sd_xbuf *xp, struct scsi_pkt *pktp)
19144{
19145	void (*funcp)(struct sd_lun *un, struct buf *bp, void *arg, int code);
19146
19147	ASSERT(un != NULL);
19148	ASSERT(mutex_owned(SD_MUTEX(un)));
19149	ASSERT(bp != NULL);
19150	ASSERT(xp != NULL);
19151	ASSERT(pktp != NULL);
19152
19153	SD_UPDATE_ERRSTATS(un, sd_harderrs);
19154	SD_UPDATE_RESERVATION_STATUS(un, pktp);
19155
19156	funcp = ((pktp->pkt_statistics & STAT_PERR) == 0) ?
19157	    sd_print_retry_msg : NULL;
19158
19159	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
19160	    funcp, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
19161}
19162
19163
19164/*
19165 *    Function: sd_pkt_reason_cmd_tag_reject
19166 *
19167 * Description: Recovery actions for a SCSA "CMD_TAG_REJECT" pkt_reason.
19168 *
19169 *     Context: May be called from interrupt context
19170 */
19171
19172static void
19173sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
19174	struct sd_xbuf *xp, struct scsi_pkt *pktp)
19175{
19176	ASSERT(un != NULL);
19177	ASSERT(mutex_owned(SD_MUTEX(un)));
19178	ASSERT(bp != NULL);
19179	ASSERT(xp != NULL);
19180	ASSERT(pktp != NULL);
19181
19182	SD_UPDATE_ERRSTATS(un, sd_harderrs);
19183	pktp->pkt_flags = 0;
19184	un->un_tagflags = 0;
19185	if (un->un_f_opt_queueing == TRUE) {
19186		un->un_throttle = min(un->un_throttle, 3);
19187	} else {
19188		un->un_throttle = 1;
19189	}
19190	mutex_exit(SD_MUTEX(un));
19191	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
19192	mutex_enter(SD_MUTEX(un));
19193
19194	SD_UPDATE_RESERVATION_STATUS(un, pktp);
19195
19196	/* Legacy behavior not to check retry counts here. */
19197	sd_retry_command(un, bp, (SD_RETRIES_NOCHECK | SD_RETRIES_ISOLATE),
19198	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
19199}
19200
19201
19202/*
19203 *    Function: sd_pkt_reason_default
19204 *
19205 * Description: Default recovery actions for SCSA pkt_reason values that
19206 *		do not have more explicit recovery actions.
19207 *
19208 *     Context: May be called from interrupt context
19209 */
19210
19211static void
19212sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
19213	struct sd_xbuf *xp, struct scsi_pkt *pktp)
19214{
19215	ASSERT(un != NULL);
19216	ASSERT(mutex_owned(SD_MUTEX(un)));
19217	ASSERT(bp != NULL);
19218	ASSERT(xp != NULL);
19219	ASSERT(pktp != NULL);
19220
19221	SD_UPDATE_ERRSTATS(un, sd_transerrs);
19222	sd_reset_target(un, pktp);
19223
19224	SD_UPDATE_RESERVATION_STATUS(un, pktp);
19225
19226	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
19227	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
19228}
19229
19230
19231
19232/*
19233 *    Function: sd_pkt_status_check_condition
19234 *
19235 * Description: Recovery actions for a "STATUS_CHECK" SCSI command status.
19236 *
19237 *     Context: May be called from interrupt context
19238 */
19239
19240static void
19241sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
19242	struct sd_xbuf *xp, struct scsi_pkt *pktp)
19243{
19244	ASSERT(un != NULL);
19245	ASSERT(mutex_owned(SD_MUTEX(un)));
19246	ASSERT(bp != NULL);
19247	ASSERT(xp != NULL);
19248	ASSERT(pktp != NULL);
19249
19250	SD_TRACE(SD_LOG_IO, un, "sd_pkt_status_check_condition: "
19251	    "entry: buf:0x%p xp:0x%p\n", bp, xp);
19252
19253	/*
19254	 * If ARQ is NOT enabled, then issue a REQUEST SENSE command (the
19255	 * command will be retried after the request sense). Otherwise, retry
19256	 * the command. Note: we are issuing the request sense even though the
19257	 * retry limit may have been reached for the failed command.
19258	 */
19259	if (un->un_f_arq_enabled == FALSE) {
19260		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
19261		    "no ARQ, sending request sense command\n");
19262		sd_send_request_sense_command(un, bp, pktp);
19263	} else {
19264		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
19265		    "ARQ,retrying request sense command\n");
19266#if defined(__i386) || defined(__amd64)
19267		/*
19268		 * The SD_RETRY_DELAY value need to be adjusted here
19269		 * when SD_RETRY_DELAY change in sddef.h
19270		 */
19271		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
19272		    un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0,
19273		    NULL);
19274#else
19275		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL,
19276		    EIO, SD_RETRY_DELAY, NULL);
19277#endif
19278	}
19279
19280	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: exit\n");
19281}
19282
19283
19284/*
19285 *    Function: sd_pkt_status_busy
19286 *
19287 * Description: Recovery actions for a "STATUS_BUSY" SCSI command status.
19288 *
19289 *     Context: May be called from interrupt context
19290 */
19291
19292static void
19293sd_pkt_status_busy(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
19294	struct scsi_pkt *pktp)
19295{
19296	ASSERT(un != NULL);
19297	ASSERT(mutex_owned(SD_MUTEX(un)));
19298	ASSERT(bp != NULL);
19299	ASSERT(xp != NULL);
19300	ASSERT(pktp != NULL);
19301
19302	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19303	    "sd_pkt_status_busy: entry\n");
19304
19305	/* If retries are exhausted, just fail the command. */
19306	if (xp->xb_retry_count >= un->un_busy_retry_count) {
19307		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
19308		    "device busy too long\n");
19309		sd_return_failed_command(un, bp, EIO);
19310		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19311		    "sd_pkt_status_busy: exit\n");
19312		return;
19313	}
19314	xp->xb_retry_count++;
19315
19316	/*
19317	 * Try to reset the target. However, we do not want to perform
19318	 * more than one reset if the device continues to fail. The reset
19319	 * will be performed when the retry count reaches the reset
19320	 * threshold.  This threshold should be set such that at least
19321	 * one retry is issued before the reset is performed.
19322	 */
19323	if (xp->xb_retry_count ==
19324	    ((un->un_reset_retry_count < 2) ? 2 : un->un_reset_retry_count)) {
19325		int rval = 0;
19326		mutex_exit(SD_MUTEX(un));
19327		if (un->un_f_allow_bus_device_reset == TRUE) {
19328			/*
19329			 * First try to reset the LUN; if we cannot then
19330			 * try to reset the target.
19331			 */
19332			if (un->un_f_lun_reset_enabled == TRUE) {
19333				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19334				    "sd_pkt_status_busy: RESET_LUN\n");
19335				rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
19336			}
19337			if (rval == 0) {
19338				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19339				    "sd_pkt_status_busy: RESET_TARGET\n");
19340				rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
19341			}
19342		}
19343		if (rval == 0) {
19344			/*
19345			 * If the RESET_LUN and/or RESET_TARGET failed,
19346			 * try RESET_ALL
19347			 */
19348			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19349			    "sd_pkt_status_busy: RESET_ALL\n");
19350			rval = scsi_reset(SD_ADDRESS(un), RESET_ALL);
19351		}
19352		mutex_enter(SD_MUTEX(un));
19353		if (rval == 0) {
19354			/*
19355			 * The RESET_LUN, RESET_TARGET, and/or RESET_ALL failed.
19356			 * At this point we give up & fail the command.
19357			 */
19358			sd_return_failed_command(un, bp, EIO);
19359			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19360			    "sd_pkt_status_busy: exit (failed cmd)\n");
19361			return;
19362		}
19363	}
19364
19365	/*
19366	 * Retry the command. Be sure to specify SD_RETRIES_NOCHECK as
19367	 * we have already checked the retry counts above.
19368	 */
19369	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL,
19370	    EIO, un->un_busy_timeout, NULL);
19371
19372	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19373	    "sd_pkt_status_busy: exit\n");
19374}
19375
19376
19377/*
19378 *    Function: sd_pkt_status_reservation_conflict
19379 *
19380 * Description: Recovery actions for a "STATUS_RESERVATION_CONFLICT" SCSI
19381 *		command status.
19382 *
19383 *     Context: May be called from interrupt context
19384 */
19385
19386static void
19387sd_pkt_status_reservation_conflict(struct sd_lun *un, struct buf *bp,
19388	struct sd_xbuf *xp, struct scsi_pkt *pktp)
19389{
19390	ASSERT(un != NULL);
19391	ASSERT(mutex_owned(SD_MUTEX(un)));
19392	ASSERT(bp != NULL);
19393	ASSERT(xp != NULL);
19394	ASSERT(pktp != NULL);
19395
19396	/*
19397	 * If the command was PERSISTENT_RESERVATION_[IN|OUT] then reservation
19398	 * conflict could be due to various reasons like incorrect keys, not
19399	 * registered or not reserved etc. So, we return EACCES to the caller.
19400	 */
19401	if (un->un_reservation_type == SD_SCSI3_RESERVATION) {
19402		int cmd = SD_GET_PKT_OPCODE(pktp);
19403		if ((cmd == SCMD_PERSISTENT_RESERVE_IN) ||
19404		    (cmd == SCMD_PERSISTENT_RESERVE_OUT)) {
19405			sd_return_failed_command(un, bp, EACCES);
19406			return;
19407		}
19408	}
19409
19410	un->un_resvd_status |= SD_RESERVATION_CONFLICT;
19411
19412	if ((un->un_resvd_status & SD_FAILFAST) != 0) {
19413		if (sd_failfast_enable != 0) {
19414			/* By definition, we must panic here.... */
19415			sd_panic_for_res_conflict(un);
19416			/*NOTREACHED*/
19417		}
19418		SD_ERROR(SD_LOG_IO, un,
19419		    "sd_handle_resv_conflict: Disk Reserved\n");
19420		sd_return_failed_command(un, bp, EACCES);
19421		return;
19422	}
19423
19424	/*
19425	 * 1147670: retry only if sd_retry_on_reservation_conflict
19426	 * property is set (default is 1). Retries will not succeed
19427	 * on a disk reserved by another initiator. HA systems
19428	 * may reset this via sd.conf to avoid these retries.
19429	 *
19430	 * Note: The legacy return code for this failure is EIO, however EACCES
19431	 * seems more appropriate for a reservation conflict.
19432	 */
19433	if (sd_retry_on_reservation_conflict == 0) {
19434		SD_ERROR(SD_LOG_IO, un,
19435		    "sd_handle_resv_conflict: Device Reserved\n");
19436		sd_return_failed_command(un, bp, EIO);
19437		return;
19438	}
19439
19440	/*
19441	 * Retry the command if we can.
19442	 *
19443	 * Note: The legacy return code for this failure is EIO, however EACCES
19444	 * seems more appropriate for a reservation conflict.
19445	 */
19446	sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
19447	    (clock_t)2, NULL);
19448}
19449
19450
19451
19452/*
19453 *    Function: sd_pkt_status_qfull
19454 *
19455 * Description: Handle a QUEUE FULL condition from the target.  This can
19456 *		occur if the HBA does not handle the queue full condition.
19457 *		(Basically this means third-party HBAs as Sun HBAs will
19458 *		handle the queue full condition.)  Note that if there are
19459 *		some commands already in the transport, then the queue full
19460 *		has occurred because the queue for this nexus is actually
19461 *		full. If there are no commands in the transport, then the
19462 *		queue full is resulting from some other initiator or lun
19463 *		consuming all the resources at the target.
19464 *
19465 *     Context: May be called from interrupt context
19466 */
19467
19468static void
19469sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
19470	struct sd_xbuf *xp, struct scsi_pkt *pktp)
19471{
19472	ASSERT(un != NULL);
19473	ASSERT(mutex_owned(SD_MUTEX(un)));
19474	ASSERT(bp != NULL);
19475	ASSERT(xp != NULL);
19476	ASSERT(pktp != NULL);
19477
19478	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19479	    "sd_pkt_status_qfull: entry\n");
19480
19481	/*
19482	 * Just lower the QFULL throttle and retry the command.  Note that
19483	 * we do not limit the number of retries here.
19484	 */
19485	sd_reduce_throttle(un, SD_THROTTLE_QFULL);
19486	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL, 0,
19487	    SD_RESTART_TIMEOUT, NULL);
19488
19489	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19490	    "sd_pkt_status_qfull: exit\n");
19491}
19492
19493
19494/*
19495 *    Function: sd_reset_target
19496 *
19497 * Description: Issue a scsi_reset(9F), with either RESET_LUN,
19498 *		RESET_TARGET, or RESET_ALL.
19499 *
19500 *     Context: May be called under interrupt context.
19501 */
19502
19503static void
19504sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp)
19505{
19506	int rval = 0;
19507
19508	ASSERT(un != NULL);
19509	ASSERT(mutex_owned(SD_MUTEX(un)));
19510	ASSERT(pktp != NULL);
19511
19512	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: entry\n");
19513
19514	/*
19515	 * No need to reset if the transport layer has already done so.
19516	 */
19517	if ((pktp->pkt_statistics &
19518	    (STAT_BUS_RESET | STAT_DEV_RESET | STAT_ABORTED)) != 0) {
19519		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19520		    "sd_reset_target: no reset\n");
19521		return;
19522	}
19523
19524	mutex_exit(SD_MUTEX(un));
19525
19526	if (un->un_f_allow_bus_device_reset == TRUE) {
19527		if (un->un_f_lun_reset_enabled == TRUE) {
19528			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19529			    "sd_reset_target: RESET_LUN\n");
19530			rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
19531		}
19532		if (rval == 0) {
19533			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19534			    "sd_reset_target: RESET_TARGET\n");
19535			rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
19536		}
19537	}
19538
19539	if (rval == 0) {
19540		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19541		    "sd_reset_target: RESET_ALL\n");
19542		(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
19543	}
19544
19545	mutex_enter(SD_MUTEX(un));
19546
19547	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: exit\n");
19548}
19549
19550/*
19551 *    Function: sd_target_change_task
19552 *
19553 * Description: Handle dynamic target change
19554 *
19555 *     Context: Executes in a taskq() thread context
19556 */
19557static void
19558sd_target_change_task(void *arg)
19559{
19560	struct sd_lun		*un = arg;
19561	uint64_t		capacity;
19562	diskaddr_t		label_cap;
19563	uint_t			lbasize;
19564	sd_ssc_t		*ssc;
19565
19566	ASSERT(un != NULL);
19567	ASSERT(!mutex_owned(SD_MUTEX(un)));
19568
19569	if ((un->un_f_blockcount_is_valid == FALSE) ||
19570	    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
19571		return;
19572	}
19573
19574	ssc = sd_ssc_init(un);
19575
19576	if (sd_send_scsi_READ_CAPACITY(ssc, &capacity,
19577	    &lbasize, SD_PATH_DIRECT) != 0) {
19578		SD_ERROR(SD_LOG_ERROR, un,
19579		    "sd_target_change_task: fail to read capacity\n");
19580		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
19581		goto task_exit;
19582	}
19583
19584	mutex_enter(SD_MUTEX(un));
19585	if (capacity <= un->un_blockcount) {
19586		mutex_exit(SD_MUTEX(un));
19587		goto task_exit;
19588	}
19589
19590	sd_update_block_info(un, lbasize, capacity);
19591	mutex_exit(SD_MUTEX(un));
19592
19593	/*
19594	 * If lun is EFI labeled and lun capacity is greater than the
19595	 * capacity contained in the label, log a sys event.
19596	 */
19597	if (cmlb_efi_label_capacity(un->un_cmlbhandle, &label_cap,
19598	    (void*)SD_PATH_DIRECT) == 0) {
19599		mutex_enter(SD_MUTEX(un));
19600		if (un->un_f_blockcount_is_valid &&
19601		    un->un_blockcount > label_cap) {
19602			mutex_exit(SD_MUTEX(un));
19603			sd_log_lun_expansion_event(un, KM_SLEEP);
19604		} else {
19605			mutex_exit(SD_MUTEX(un));
19606		}
19607	}
19608
19609task_exit:
19610	sd_ssc_fini(ssc);
19611}
19612
19613
19614/*
19615 *    Function: sd_log_dev_status_event
19616 *
19617 * Description: Log EC_dev_status sysevent
19618 *
19619 *     Context: Never called from interrupt context
19620 */
19621static void
19622sd_log_dev_status_event(struct sd_lun *un, char *esc, int km_flag)
19623{
19624	int err;
19625	char			*path;
19626	nvlist_t		*attr_list;
19627
19628	/* Allocate and build sysevent attribute list */
19629	err = nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE, km_flag);
19630	if (err != 0) {
19631		SD_ERROR(SD_LOG_ERROR, un,
19632		    "sd_log_dev_status_event: fail to allocate space\n");
19633		return;
19634	}
19635
19636	path = kmem_alloc(MAXPATHLEN, km_flag);
19637	if (path == NULL) {
19638		nvlist_free(attr_list);
19639		SD_ERROR(SD_LOG_ERROR, un,
19640		    "sd_log_dev_status_event: fail to allocate space\n");
19641		return;
19642	}
19643	/*
19644	 * Add path attribute to identify the lun.
19645	 * We are using minor node 'a' as the sysevent attribute.
19646	 */
19647	(void) snprintf(path, MAXPATHLEN, "/devices");
19648	(void) ddi_pathname(SD_DEVINFO(un), path + strlen(path));
19649	(void) snprintf(path + strlen(path), MAXPATHLEN - strlen(path),
19650	    ":a");
19651
19652	err = nvlist_add_string(attr_list, DEV_PHYS_PATH, path);
19653	if (err != 0) {
19654		nvlist_free(attr_list);
19655		kmem_free(path, MAXPATHLEN);
19656		SD_ERROR(SD_LOG_ERROR, un,
19657		    "sd_log_dev_status_event: fail to add attribute\n");
19658		return;
19659	}
19660
19661	/* Log dynamic lun expansion sysevent */
19662	err = ddi_log_sysevent(SD_DEVINFO(un), SUNW_VENDOR, EC_DEV_STATUS,
19663	    esc, attr_list, NULL, km_flag);
19664	if (err != DDI_SUCCESS) {
19665		SD_ERROR(SD_LOG_ERROR, un,
19666		    "sd_log_dev_status_event: fail to log sysevent\n");
19667	}
19668
19669	nvlist_free(attr_list);
19670	kmem_free(path, MAXPATHLEN);
19671}
19672
19673
19674/*
19675 *    Function: sd_log_lun_expansion_event
19676 *
19677 * Description: Log lun expansion sys event
19678 *
19679 *     Context: Never called from interrupt context
19680 */
19681static void
19682sd_log_lun_expansion_event(struct sd_lun *un, int km_flag)
19683{
19684	sd_log_dev_status_event(un, ESC_DEV_DLE, km_flag);
19685}
19686
19687
19688/*
19689 *    Function: sd_log_eject_request_event
19690 *
19691 * Description: Log eject request sysevent
19692 *
19693 *     Context: Never called from interrupt context
19694 */
19695static void
19696sd_log_eject_request_event(struct sd_lun *un, int km_flag)
19697{
19698	sd_log_dev_status_event(un, ESC_DEV_EJECT_REQUEST, km_flag);
19699}
19700
19701
19702/*
19703 *    Function: sd_media_change_task
19704 *
19705 * Description: Recovery action for CDROM to become available.
19706 *
19707 *     Context: Executes in a taskq() thread context
19708 */
19709
19710static void
19711sd_media_change_task(void *arg)
19712{
19713	struct	scsi_pkt	*pktp = arg;
19714	struct	sd_lun		*un;
19715	struct	buf		*bp;
19716	struct	sd_xbuf		*xp;
19717	int	err		= 0;
19718	int	retry_count	= 0;
19719	int	retry_limit	= SD_UNIT_ATTENTION_RETRY/10;
19720	struct	sd_sense_info	si;
19721
19722	ASSERT(pktp != NULL);
19723	bp = (struct buf *)pktp->pkt_private;
19724	ASSERT(bp != NULL);
19725	xp = SD_GET_XBUF(bp);
19726	ASSERT(xp != NULL);
19727	un = SD_GET_UN(bp);
19728	ASSERT(un != NULL);
19729	ASSERT(!mutex_owned(SD_MUTEX(un)));
19730	ASSERT(un->un_f_monitor_media_state);
19731
19732	si.ssi_severity = SCSI_ERR_INFO;
19733	si.ssi_pfa_flag = FALSE;
19734
19735	/*
19736	 * When a reset is issued on a CDROM, it takes a long time to
19737	 * recover. First few attempts to read capacity and other things
19738	 * related to handling unit attention fail (with a ASC 0x4 and
19739	 * ASCQ 0x1). In that case we want to do enough retries and we want
19740	 * to limit the retries in other cases of genuine failures like
19741	 * no media in drive.
19742	 */
19743	while (retry_count++ < retry_limit) {
19744		if ((err = sd_handle_mchange(un)) == 0) {
19745			break;
19746		}
19747		if (err == EAGAIN) {
19748			retry_limit = SD_UNIT_ATTENTION_RETRY;
19749		}
19750		/* Sleep for 0.5 sec. & try again */
19751		delay(drv_usectohz(500000));
19752	}
19753
19754	/*
19755	 * Dispatch (retry or fail) the original command here,
19756	 * along with appropriate console messages....
19757	 *
19758	 * Must grab the mutex before calling sd_retry_command,
19759	 * sd_print_sense_msg and sd_return_failed_command.
19760	 */
19761	mutex_enter(SD_MUTEX(un));
19762	if (err != SD_CMD_SUCCESS) {
19763		SD_UPDATE_ERRSTATS(un, sd_harderrs);
19764		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
19765		si.ssi_severity = SCSI_ERR_FATAL;
19766		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
19767		sd_return_failed_command(un, bp, EIO);
19768	} else {
19769		sd_retry_command(un, bp, SD_RETRIES_UA, sd_print_sense_msg,
19770		    &si, EIO, (clock_t)0, NULL);
19771	}
19772	mutex_exit(SD_MUTEX(un));
19773}
19774
19775
19776
19777/*
19778 *    Function: sd_handle_mchange
19779 *
19780 * Description: Perform geometry validation & other recovery when CDROM
19781 *		has been removed from drive.
19782 *
19783 * Return Code: 0 for success
19784 *		errno-type return code of either sd_send_scsi_DOORLOCK() or
19785 *		sd_send_scsi_READ_CAPACITY()
19786 *
19787 *     Context: Executes in a taskq() thread context
19788 */
19789
19790static int
19791sd_handle_mchange(struct sd_lun *un)
19792{
19793	uint64_t	capacity;
19794	uint32_t	lbasize;
19795	int		rval;
19796	sd_ssc_t	*ssc;
19797
19798	ASSERT(!mutex_owned(SD_MUTEX(un)));
19799	ASSERT(un->un_f_monitor_media_state);
19800
19801	ssc = sd_ssc_init(un);
19802	rval = sd_send_scsi_READ_CAPACITY(ssc, &capacity, &lbasize,
19803	    SD_PATH_DIRECT_PRIORITY);
19804
19805	if (rval != 0)
19806		goto failed;
19807
19808	mutex_enter(SD_MUTEX(un));
19809	sd_update_block_info(un, lbasize, capacity);
19810
19811	if (un->un_errstats != NULL) {
19812		struct	sd_errstats *stp =
19813		    (struct sd_errstats *)un->un_errstats->ks_data;
19814		stp->sd_capacity.value.ui64 = (uint64_t)
19815		    ((uint64_t)un->un_blockcount *
19816		    (uint64_t)un->un_tgt_blocksize);
19817	}
19818
19819	/*
19820	 * Check if the media in the device is writable or not
19821	 */
19822	if (ISCD(un)) {
19823		sd_check_for_writable_cd(ssc, SD_PATH_DIRECT_PRIORITY);
19824	}
19825
19826	/*
19827	 * Note: Maybe let the strategy/partitioning chain worry about getting
19828	 * valid geometry.
19829	 */
19830	mutex_exit(SD_MUTEX(un));
19831	cmlb_invalidate(un->un_cmlbhandle, (void *)SD_PATH_DIRECT_PRIORITY);
19832
19833
19834	if (cmlb_validate(un->un_cmlbhandle, 0,
19835	    (void *)SD_PATH_DIRECT_PRIORITY) != 0) {
19836		sd_ssc_fini(ssc);
19837		return (EIO);
19838	} else {
19839		if (un->un_f_pkstats_enabled) {
19840			sd_set_pstats(un);
19841			SD_TRACE(SD_LOG_IO_PARTITION, un,
19842			    "sd_handle_mchange: un:0x%p pstats created and "
19843			    "set\n", un);
19844		}
19845	}
19846
19847	/*
19848	 * Try to lock the door
19849	 */
19850	rval = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_PREVENT,
19851	    SD_PATH_DIRECT_PRIORITY);
19852failed:
19853	if (rval != 0)
19854		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
19855	sd_ssc_fini(ssc);
19856	return (rval);
19857}
19858
19859
19860/*
19861 *    Function: sd_send_scsi_DOORLOCK
19862 *
19863 * Description: Issue the scsi DOOR LOCK command
19864 *
19865 *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
19866 *                      structure for this target.
19867 *		flag  - SD_REMOVAL_ALLOW
19868 *			SD_REMOVAL_PREVENT
19869 *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19870 *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19871 *			to use the USCSI "direct" chain and bypass the normal
19872 *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
19873 *			command is issued as part of an error recovery action.
19874 *
19875 * Return Code: 0   - Success
19876 *		errno return code from sd_ssc_send()
19877 *
19878 *     Context: Can sleep.
19879 */
19880
19881static int
19882sd_send_scsi_DOORLOCK(sd_ssc_t *ssc, int flag, int path_flag)
19883{
19884	struct scsi_extended_sense	sense_buf;
19885	union scsi_cdb		cdb;
19886	struct uscsi_cmd	ucmd_buf;
19887	int			status;
19888	struct sd_lun		*un;
19889
19890	ASSERT(ssc != NULL);
19891	un = ssc->ssc_un;
19892	ASSERT(un != NULL);
19893	ASSERT(!mutex_owned(SD_MUTEX(un)));
19894
19895	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_DOORLOCK: entry: un:0x%p\n", un);
19896
19897	/* already determined doorlock is not supported, fake success */
19898	if (un->un_f_doorlock_supported == FALSE) {
19899		return (0);
19900	}
19901
19902	/*
19903	 * If we are ejecting and see an SD_REMOVAL_PREVENT
19904	 * ignore the command so we can complete the eject
19905	 * operation.
19906	 */
19907	if (flag == SD_REMOVAL_PREVENT) {
19908		mutex_enter(SD_MUTEX(un));
19909		if (un->un_f_ejecting == TRUE) {
19910			mutex_exit(SD_MUTEX(un));
19911			return (EAGAIN);
19912		}
19913		mutex_exit(SD_MUTEX(un));
19914	}
19915
19916	bzero(&cdb, sizeof (cdb));
19917	bzero(&ucmd_buf, sizeof (ucmd_buf));
19918
19919	cdb.scc_cmd = SCMD_DOORLOCK;
19920	cdb.cdb_opaque[4] = (uchar_t)flag;
19921
19922	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19923	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19924	ucmd_buf.uscsi_bufaddr	= NULL;
19925	ucmd_buf.uscsi_buflen	= 0;
19926	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19927	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
19928	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
19929	ucmd_buf.uscsi_timeout	= 15;
19930
19931	SD_TRACE(SD_LOG_IO, un,
19932	    "sd_send_scsi_DOORLOCK: returning sd_ssc_send\n");
19933
19934	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
19935	    UIO_SYSSPACE, path_flag);
19936
19937	if (status == 0)
19938		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
19939
19940	if ((status == EIO) && (ucmd_buf.uscsi_status == STATUS_CHECK) &&
19941	    (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19942	    (scsi_sense_key((uint8_t *)&sense_buf) == KEY_ILLEGAL_REQUEST)) {
19943		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
19944
19945		/* fake success and skip subsequent doorlock commands */
19946		un->un_f_doorlock_supported = FALSE;
19947		return (0);
19948	}
19949
19950	return (status);
19951}
19952
19953/*
19954 *    Function: sd_send_scsi_READ_CAPACITY
19955 *
19956 * Description: This routine uses the scsi READ CAPACITY command to determine
19957 *		the device capacity in number of blocks and the device native
19958 *		block size. If this function returns a failure, then the
19959 *		values in *capp and *lbap are undefined.  If the capacity
19960 *		returned is 0xffffffff then the lun is too large for a
19961 *		normal READ CAPACITY command and the results of a
19962 *		READ CAPACITY 16 will be used instead.
19963 *
19964 *   Arguments: ssc   - ssc contains ptr to soft state struct for the target
19965 *		capp - ptr to unsigned 64-bit variable to receive the
19966 *			capacity value from the command.
19967 *		lbap - ptr to unsigned 32-bit varaible to receive the
19968 *			block size value from the command
19969 *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19970 *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19971 *			to use the USCSI "direct" chain and bypass the normal
19972 *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
19973 *			command is issued as part of an error recovery action.
19974 *
19975 * Return Code: 0   - Success
19976 *		EIO - IO error
19977 *		EACCES - Reservation conflict detected
19978 *		EAGAIN - Device is becoming ready
19979 *		errno return code from sd_ssc_send()
19980 *
19981 *     Context: Can sleep.  Blocks until command completes.
19982 */
19983
19984#define	SD_CAPACITY_SIZE	sizeof (struct scsi_capacity)
19985
19986static int
19987sd_send_scsi_READ_CAPACITY(sd_ssc_t *ssc, uint64_t *capp, uint32_t *lbap,
19988	int path_flag)
19989{
19990	struct	scsi_extended_sense	sense_buf;
19991	struct	uscsi_cmd	ucmd_buf;
19992	union	scsi_cdb	cdb;
19993	uint32_t		*capacity_buf;
19994	uint64_t		capacity;
19995	uint32_t		lbasize;
19996	uint32_t		pbsize;
19997	int			status;
19998	struct sd_lun		*un;
19999
20000	ASSERT(ssc != NULL);
20001
20002	un = ssc->ssc_un;
20003	ASSERT(un != NULL);
20004	ASSERT(!mutex_owned(SD_MUTEX(un)));
20005	ASSERT(capp != NULL);
20006	ASSERT(lbap != NULL);
20007
20008	SD_TRACE(SD_LOG_IO, un,
20009	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
20010
20011	/*
20012	 * First send a READ_CAPACITY command to the target.
20013	 * (This command is mandatory under SCSI-2.)
20014	 *
20015	 * Set up the CDB for the READ_CAPACITY command.  The Partial
20016	 * Medium Indicator bit is cleared.  The address field must be
20017	 * zero if the PMI bit is zero.
20018	 */
20019	bzero(&cdb, sizeof (cdb));
20020	bzero(&ucmd_buf, sizeof (ucmd_buf));
20021
20022	capacity_buf = kmem_zalloc(SD_CAPACITY_SIZE, KM_SLEEP);
20023
20024	cdb.scc_cmd = SCMD_READ_CAPACITY;
20025
20026	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20027	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
20028	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity_buf;
20029	ucmd_buf.uscsi_buflen	= SD_CAPACITY_SIZE;
20030	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20031	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
20032	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
20033	ucmd_buf.uscsi_timeout	= 60;
20034
20035	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
20036	    UIO_SYSSPACE, path_flag);
20037
20038	switch (status) {
20039	case 0:
20040		/* Return failure if we did not get valid capacity data. */
20041		if (ucmd_buf.uscsi_resid != 0) {
20042			sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
20043			    "sd_send_scsi_READ_CAPACITY received invalid "
20044			    "capacity data");
20045			kmem_free(capacity_buf, SD_CAPACITY_SIZE);
20046			return (EIO);
20047		}
20048		/*
20049		 * Read capacity and block size from the READ CAPACITY 10 data.
20050		 * This data may be adjusted later due to device specific
20051		 * issues.
20052		 *
20053		 * According to the SCSI spec, the READ CAPACITY 10
20054		 * command returns the following:
20055		 *
20056		 *  bytes 0-3: Maximum logical block address available.
20057		 *		(MSB in byte:0 & LSB in byte:3)
20058		 *
20059		 *  bytes 4-7: Block length in bytes
20060		 *		(MSB in byte:4 & LSB in byte:7)
20061		 *
20062		 */
20063		capacity = BE_32(capacity_buf[0]);
20064		lbasize = BE_32(capacity_buf[1]);
20065
20066		/*
20067		 * Done with capacity_buf
20068		 */
20069		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
20070
20071		/*
20072		 * if the reported capacity is set to all 0xf's, then
20073		 * this disk is too large and requires SBC-2 commands.
20074		 * Reissue the request using READ CAPACITY 16.
20075		 */
20076		if (capacity == 0xffffffff) {
20077			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
20078			status = sd_send_scsi_READ_CAPACITY_16(ssc, &capacity,
20079			    &lbasize, &pbsize, path_flag);
20080			if (status != 0) {
20081				return (status);
20082			} else {
20083				goto rc16_done;
20084			}
20085		}
20086		break;	/* Success! */
20087	case EIO:
20088		switch (ucmd_buf.uscsi_status) {
20089		case STATUS_RESERVATION_CONFLICT:
20090			status = EACCES;
20091			break;
20092		case STATUS_CHECK:
20093			/*
20094			 * Check condition; look for ASC/ASCQ of 0x04/0x01
20095			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
20096			 */
20097			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20098			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x04) &&
20099			    (scsi_sense_ascq((uint8_t *)&sense_buf) == 0x01)) {
20100				kmem_free(capacity_buf, SD_CAPACITY_SIZE);
20101				return (EAGAIN);
20102			}
20103			break;
20104		default:
20105			break;
20106		}
20107		/* FALLTHRU */
20108	default:
20109		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
20110		return (status);
20111	}
20112
20113	/*
20114	 * Some ATAPI CD-ROM drives report inaccurate LBA size values
20115	 * (2352 and 0 are common) so for these devices always force the value
20116	 * to 2048 as required by the ATAPI specs.
20117	 */
20118	if ((un->un_f_cfg_is_atapi == TRUE) && (ISCD(un))) {
20119		lbasize = 2048;
20120	}
20121
20122	/*
20123	 * Get the maximum LBA value from the READ CAPACITY data.
20124	 * Here we assume that the Partial Medium Indicator (PMI) bit
20125	 * was cleared when issuing the command. This means that the LBA
20126	 * returned from the device is the LBA of the last logical block
20127	 * on the logical unit.  The actual logical block count will be
20128	 * this value plus one.
20129	 */
20130	capacity += 1;
20131
20132	/*
20133	 * Currently, for removable media, the capacity is saved in terms
20134	 * of un->un_sys_blocksize, so scale the capacity value to reflect this.
20135	 */
20136	if (un->un_f_has_removable_media)
20137		capacity *= (lbasize / un->un_sys_blocksize);
20138
20139rc16_done:
20140
20141	/*
20142	 * Copy the values from the READ CAPACITY command into the space
20143	 * provided by the caller.
20144	 */
20145	*capp = capacity;
20146	*lbap = lbasize;
20147
20148	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY: "
20149	    "capacity:0x%llx  lbasize:0x%x\n", capacity, lbasize);
20150
20151	/*
20152	 * Both the lbasize and capacity from the device must be nonzero,
20153	 * otherwise we assume that the values are not valid and return
20154	 * failure to the caller. (4203735)
20155	 */
20156	if ((capacity == 0) || (lbasize == 0)) {
20157		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
20158		    "sd_send_scsi_READ_CAPACITY received invalid value "
20159		    "capacity %llu lbasize %d", capacity, lbasize);
20160		return (EIO);
20161	}
20162	sd_ssc_assessment(ssc, SD_FMT_STANDARD);
20163	return (0);
20164}
20165
20166/*
20167 *    Function: sd_send_scsi_READ_CAPACITY_16
20168 *
20169 * Description: This routine uses the scsi READ CAPACITY 16 command to
20170 *		determine the device capacity in number of blocks and the
20171 *		device native block size.  If this function returns a failure,
20172 *		then the values in *capp and *lbap are undefined.
20173 *		This routine should be called by sd_send_scsi_READ_CAPACITY
20174 *              which will apply any device specific adjustments to capacity
20175 *              and lbasize. One exception is it is also called by
20176 *              sd_get_media_info_ext. In that function, there is no need to
20177 *              adjust the capacity and lbasize.
20178 *
20179 *   Arguments: ssc   - ssc contains ptr to soft state struct for the target
20180 *		capp - ptr to unsigned 64-bit variable to receive the
20181 *			capacity value from the command.
20182 *		lbap - ptr to unsigned 32-bit varaible to receive the
20183 *			block size value from the command
20184 *              psp  - ptr to unsigned 32-bit variable to receive the
20185 *                      physical block size value from the command
20186 *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20187 *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20188 *			to use the USCSI "direct" chain and bypass the normal
20189 *			command waitq. SD_PATH_DIRECT_PRIORITY is used when
20190 *			this command is issued as part of an error recovery
20191 *			action.
20192 *
20193 * Return Code: 0   - Success
20194 *		EIO - IO error
20195 *		EACCES - Reservation conflict detected
20196 *		EAGAIN - Device is becoming ready
20197 *		errno return code from sd_ssc_send()
20198 *
20199 *     Context: Can sleep.  Blocks until command completes.
20200 */
20201
20202#define	SD_CAPACITY_16_SIZE	sizeof (struct scsi_capacity_16)
20203
20204static int
20205sd_send_scsi_READ_CAPACITY_16(sd_ssc_t *ssc, uint64_t *capp,
20206	uint32_t *lbap, uint32_t *psp, int path_flag)
20207{
20208	struct	scsi_extended_sense	sense_buf;
20209	struct	uscsi_cmd	ucmd_buf;
20210	union	scsi_cdb	cdb;
20211	uint64_t		*capacity16_buf;
20212	uint64_t		capacity;
20213	uint32_t		lbasize;
20214	uint32_t		pbsize;
20215	uint32_t		lbpb_exp;
20216	int			status;
20217	struct sd_lun		*un;
20218
20219	ASSERT(ssc != NULL);
20220
20221	un = ssc->ssc_un;
20222	ASSERT(un != NULL);
20223	ASSERT(!mutex_owned(SD_MUTEX(un)));
20224	ASSERT(capp != NULL);
20225	ASSERT(lbap != NULL);
20226
20227	SD_TRACE(SD_LOG_IO, un,
20228	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
20229
20230	/*
20231	 * First send a READ_CAPACITY_16 command to the target.
20232	 *
20233	 * Set up the CDB for the READ_CAPACITY_16 command.  The Partial
20234	 * Medium Indicator bit is cleared.  The address field must be
20235	 * zero if the PMI bit is zero.
20236	 */
20237	bzero(&cdb, sizeof (cdb));
20238	bzero(&ucmd_buf, sizeof (ucmd_buf));
20239
20240	capacity16_buf = kmem_zalloc(SD_CAPACITY_16_SIZE, KM_SLEEP);
20241
20242	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20243	ucmd_buf.uscsi_cdblen	= CDB_GROUP4;
20244	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity16_buf;
20245	ucmd_buf.uscsi_buflen	= SD_CAPACITY_16_SIZE;
20246	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20247	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
20248	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
20249	ucmd_buf.uscsi_timeout	= 60;
20250
20251	/*
20252	 * Read Capacity (16) is a Service Action In command.  One
20253	 * command byte (0x9E) is overloaded for multiple operations,
20254	 * with the second CDB byte specifying the desired operation
20255	 */
20256	cdb.scc_cmd = SCMD_SVC_ACTION_IN_G4;
20257	cdb.cdb_opaque[1] = SSVC_ACTION_READ_CAPACITY_G4;
20258
20259	/*
20260	 * Fill in allocation length field
20261	 */
20262	FORMG4COUNT(&cdb, ucmd_buf.uscsi_buflen);
20263
20264	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
20265	    UIO_SYSSPACE, path_flag);
20266
20267	switch (status) {
20268	case 0:
20269		/* Return failure if we did not get valid capacity data. */
20270		if (ucmd_buf.uscsi_resid > 20) {
20271			sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
20272			    "sd_send_scsi_READ_CAPACITY_16 received invalid "
20273			    "capacity data");
20274			kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
20275			return (EIO);
20276		}
20277
20278		/*
20279		 * Read capacity and block size from the READ CAPACITY 16 data.
20280		 * This data may be adjusted later due to device specific
20281		 * issues.
20282		 *
20283		 * According to the SCSI spec, the READ CAPACITY 16
20284		 * command returns the following:
20285		 *
20286		 *  bytes 0-7: Maximum logical block address available.
20287		 *		(MSB in byte:0 & LSB in byte:7)
20288		 *
20289		 *  bytes 8-11: Block length in bytes
20290		 *		(MSB in byte:8 & LSB in byte:11)
20291		 *
20292		 *  byte 13: LOGICAL BLOCKS PER PHYSICAL BLOCK EXPONENT
20293		 */
20294		capacity = BE_64(capacity16_buf[0]);
20295		lbasize = BE_32(*(uint32_t *)&capacity16_buf[1]);
20296		lbpb_exp = (BE_64(capacity16_buf[1]) >> 16) & 0x0f;
20297
20298		pbsize = lbasize << lbpb_exp;
20299
20300		/*
20301		 * Done with capacity16_buf
20302		 */
20303		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
20304
20305		/*
20306		 * if the reported capacity is set to all 0xf's, then
20307		 * this disk is too large.  This could only happen with
20308		 * a device that supports LBAs larger than 64 bits which
20309		 * are not defined by any current T10 standards.
20310		 */
20311		if (capacity == 0xffffffffffffffff) {
20312			sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
20313			    "disk is too large");
20314			return (EIO);
20315		}
20316		break;	/* Success! */
20317	case EIO:
20318		switch (ucmd_buf.uscsi_status) {
20319		case STATUS_RESERVATION_CONFLICT:
20320			status = EACCES;
20321			break;
20322		case STATUS_CHECK:
20323			/*
20324			 * Check condition; look for ASC/ASCQ of 0x04/0x01
20325			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
20326			 */
20327			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20328			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x04) &&
20329			    (scsi_sense_ascq((uint8_t *)&sense_buf) == 0x01)) {
20330				kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
20331				return (EAGAIN);
20332			}
20333			break;
20334		default:
20335			break;
20336		}
20337		/* FALLTHRU */
20338	default:
20339		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
20340		return (status);
20341	}
20342
20343	/*
20344	 * Some ATAPI CD-ROM drives report inaccurate LBA size values
20345	 * (2352 and 0 are common) so for these devices always force the value
20346	 * to 2048 as required by the ATAPI specs.
20347	 */
20348	if ((un->un_f_cfg_is_atapi == TRUE) && (ISCD(un))) {
20349		lbasize = 2048;
20350	}
20351
20352	/*
20353	 * Get the maximum LBA value from the READ CAPACITY 16 data.
20354	 * Here we assume that the Partial Medium Indicator (PMI) bit
20355	 * was cleared when issuing the command. This means that the LBA
20356	 * returned from the device is the LBA of the last logical block
20357	 * on the logical unit.  The actual logical block count will be
20358	 * this value plus one.
20359	 */
20360	capacity += 1;
20361
20362	/*
20363	 * Currently, for removable media, the capacity is saved in terms
20364	 * of un->un_sys_blocksize, so scale the capacity value to reflect this.
20365	 */
20366	if (un->un_f_has_removable_media)
20367		capacity *= (lbasize / un->un_sys_blocksize);
20368
20369	*capp = capacity;
20370	*lbap = lbasize;
20371	*psp = pbsize;
20372
20373	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY_16: "
20374	    "capacity:0x%llx  lbasize:0x%x, pbsize: 0x%x\n",
20375	    capacity, lbasize, pbsize);
20376
20377	if ((capacity == 0) || (lbasize == 0) || (pbsize == 0)) {
20378		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
20379		    "sd_send_scsi_READ_CAPACITY_16 received invalid value "
20380		    "capacity %llu lbasize %d pbsize %d", capacity, lbasize);
20381		return (EIO);
20382	}
20383
20384	sd_ssc_assessment(ssc, SD_FMT_STANDARD);
20385	return (0);
20386}
20387
20388
20389/*
20390 *    Function: sd_send_scsi_START_STOP_UNIT
20391 *
20392 * Description: Issue a scsi START STOP UNIT command to the target.
20393 *
20394 *   Arguments: ssc    - ssc contatins pointer to driver soft state (unit)
20395 *                       structure for this target.
20396 *      pc_flag - SD_POWER_CONDITION
20397 *                SD_START_STOP
20398 *		flag  - SD_TARGET_START
20399 *			SD_TARGET_STOP
20400 *			SD_TARGET_EJECT
20401 *			SD_TARGET_CLOSE
20402 *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20403 *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20404 *			to use the USCSI "direct" chain and bypass the normal
20405 *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
20406 *			command is issued as part of an error recovery action.
20407 *
20408 * Return Code: 0   - Success
20409 *		EIO - IO error
20410 *		EACCES - Reservation conflict detected
20411 *		ENXIO  - Not Ready, medium not present
20412 *		errno return code from sd_ssc_send()
20413 *
20414 *     Context: Can sleep.
20415 */
20416
20417static int
20418sd_send_scsi_START_STOP_UNIT(sd_ssc_t *ssc, int pc_flag, int flag,
20419    int path_flag)
20420{
20421	struct	scsi_extended_sense	sense_buf;
20422	union scsi_cdb		cdb;
20423	struct uscsi_cmd	ucmd_buf;
20424	int			status;
20425	struct sd_lun		*un;
20426
20427	ASSERT(ssc != NULL);
20428	un = ssc->ssc_un;
20429	ASSERT(un != NULL);
20430	ASSERT(!mutex_owned(SD_MUTEX(un)));
20431
20432	SD_TRACE(SD_LOG_IO, un,
20433	    "sd_send_scsi_START_STOP_UNIT: entry: un:0x%p\n", un);
20434
20435	if (un->un_f_check_start_stop &&
20436	    (pc_flag == SD_START_STOP) &&
20437	    ((flag == SD_TARGET_START) || (flag == SD_TARGET_STOP)) &&
20438	    (un->un_f_start_stop_supported != TRUE)) {
20439		return (0);
20440	}
20441
20442	/*
20443	 * If we are performing an eject operation and
20444	 * we receive any command other than SD_TARGET_EJECT
20445	 * we should immediately return.
20446	 */
20447	if (flag != SD_TARGET_EJECT) {
20448		mutex_enter(SD_MUTEX(un));
20449		if (un->un_f_ejecting == TRUE) {
20450			mutex_exit(SD_MUTEX(un));
20451			return (EAGAIN);
20452		}
20453		mutex_exit(SD_MUTEX(un));
20454	}
20455
20456	bzero(&cdb, sizeof (cdb));
20457	bzero(&ucmd_buf, sizeof (ucmd_buf));
20458	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20459
20460	cdb.scc_cmd = SCMD_START_STOP;
20461	cdb.cdb_opaque[4] = (pc_flag == SD_POWER_CONDITION) ?
20462	    (uchar_t)(flag << 4) : (uchar_t)flag;
20463
20464	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20465	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
20466	ucmd_buf.uscsi_bufaddr	= NULL;
20467	ucmd_buf.uscsi_buflen	= 0;
20468	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20469	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20470	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
20471	ucmd_buf.uscsi_timeout	= 200;
20472
20473	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
20474	    UIO_SYSSPACE, path_flag);
20475
20476	switch (status) {
20477	case 0:
20478		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
20479		break;	/* Success! */
20480	case EIO:
20481		switch (ucmd_buf.uscsi_status) {
20482		case STATUS_RESERVATION_CONFLICT:
20483			status = EACCES;
20484			break;
20485		case STATUS_CHECK:
20486			if (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) {
20487				switch (scsi_sense_key(
20488				    (uint8_t *)&sense_buf)) {
20489				case KEY_ILLEGAL_REQUEST:
20490					status = ENOTSUP;
20491					break;
20492				case KEY_NOT_READY:
20493					if (scsi_sense_asc(
20494					    (uint8_t *)&sense_buf)
20495					    == 0x3A) {
20496						status = ENXIO;
20497					}
20498					break;
20499				default:
20500					break;
20501				}
20502			}
20503			break;
20504		default:
20505			break;
20506		}
20507		break;
20508	default:
20509		break;
20510	}
20511
20512	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_START_STOP_UNIT: exit\n");
20513
20514	return (status);
20515}
20516
20517
20518/*
20519 *    Function: sd_start_stop_unit_callback
20520 *
20521 * Description: timeout(9F) callback to begin recovery process for a
20522 *		device that has spun down.
20523 *
20524 *   Arguments: arg - pointer to associated softstate struct.
20525 *
20526 *     Context: Executes in a timeout(9F) thread context
20527 */
20528
20529static void
20530sd_start_stop_unit_callback(void *arg)
20531{
20532	struct sd_lun	*un = arg;
20533	ASSERT(un != NULL);
20534	ASSERT(!mutex_owned(SD_MUTEX(un)));
20535
20536	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_callback: entry\n");
20537
20538	(void) taskq_dispatch(sd_tq, sd_start_stop_unit_task, un, KM_NOSLEEP);
20539}
20540
20541
20542/*
20543 *    Function: sd_start_stop_unit_task
20544 *
20545 * Description: Recovery procedure when a drive is spun down.
20546 *
20547 *   Arguments: arg - pointer to associated softstate struct.
20548 *
20549 *     Context: Executes in a taskq() thread context
20550 */
20551
20552static void
20553sd_start_stop_unit_task(void *arg)
20554{
20555	struct sd_lun	*un = arg;
20556	sd_ssc_t	*ssc;
20557	int		power_level;
20558	int		rval;
20559
20560	ASSERT(un != NULL);
20561	ASSERT(!mutex_owned(SD_MUTEX(un)));
20562
20563	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: entry\n");
20564
20565	/*
20566	 * Some unformatted drives report not ready error, no need to
20567	 * restart if format has been initiated.
20568	 */
20569	mutex_enter(SD_MUTEX(un));
20570	if (un->un_f_format_in_progress == TRUE) {
20571		mutex_exit(SD_MUTEX(un));
20572		return;
20573	}
20574	mutex_exit(SD_MUTEX(un));
20575
20576	ssc = sd_ssc_init(un);
20577	/*
20578	 * When a START STOP command is issued from here, it is part of a
20579	 * failure recovery operation and must be issued before any other
20580	 * commands, including any pending retries. Thus it must be sent
20581	 * using SD_PATH_DIRECT_PRIORITY. It doesn't matter if the spin up
20582	 * succeeds or not, we will start I/O after the attempt.
20583	 * If power condition is supported and the current power level
20584	 * is capable of performing I/O, we should set the power condition
20585	 * to that level. Otherwise, set the power condition to ACTIVE.
20586	 */
20587	if (un->un_f_power_condition_supported) {
20588		mutex_enter(SD_MUTEX(un));
20589		ASSERT(SD_PM_IS_LEVEL_VALID(un, un->un_power_level));
20590		power_level = sd_pwr_pc.ran_perf[un->un_power_level]
20591		    > 0 ? un->un_power_level : SD_SPINDLE_ACTIVE;
20592		mutex_exit(SD_MUTEX(un));
20593		rval = sd_send_scsi_START_STOP_UNIT(ssc, SD_POWER_CONDITION,
20594		    sd_pl2pc[power_level], SD_PATH_DIRECT_PRIORITY);
20595	} else {
20596		rval = sd_send_scsi_START_STOP_UNIT(ssc, SD_START_STOP,
20597		    SD_TARGET_START, SD_PATH_DIRECT_PRIORITY);
20598	}
20599
20600	if (rval != 0)
20601		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
20602	sd_ssc_fini(ssc);
20603	/*
20604	 * The above call blocks until the START_STOP_UNIT command completes.
20605	 * Now that it has completed, we must re-try the original IO that
20606	 * received the NOT READY condition in the first place. There are
20607	 * three possible conditions here:
20608	 *
20609	 *  (1) The original IO is on un_retry_bp.
20610	 *  (2) The original IO is on the regular wait queue, and un_retry_bp
20611	 *	is NULL.
20612	 *  (3) The original IO is on the regular wait queue, and un_retry_bp
20613	 *	points to some other, unrelated bp.
20614	 *
20615	 * For each case, we must call sd_start_cmds() with un_retry_bp
20616	 * as the argument. If un_retry_bp is NULL, this will initiate
20617	 * processing of the regular wait queue.  If un_retry_bp is not NULL,
20618	 * then this will process the bp on un_retry_bp. That may or may not
20619	 * be the original IO, but that does not matter: the important thing
20620	 * is to keep the IO processing going at this point.
20621	 *
20622	 * Note: This is a very specific error recovery sequence associated
20623	 * with a drive that is not spun up. We attempt a START_STOP_UNIT and
20624	 * serialize the I/O with completion of the spin-up.
20625	 */
20626	mutex_enter(SD_MUTEX(un));
20627	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
20628	    "sd_start_stop_unit_task: un:0x%p starting bp:0x%p\n",
20629	    un, un->un_retry_bp);
20630	un->un_startstop_timeid = NULL;	/* Timeout is no longer pending */
20631	sd_start_cmds(un, un->un_retry_bp);
20632	mutex_exit(SD_MUTEX(un));
20633
20634	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: exit\n");
20635}
20636
20637
20638/*
20639 *    Function: sd_send_scsi_INQUIRY
20640 *
20641 * Description: Issue the scsi INQUIRY command.
20642 *
20643 *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
20644 *                      structure for this target.
20645 *		bufaddr
20646 *		buflen
20647 *		evpd
20648 *		page_code
20649 *		page_length
20650 *
20651 * Return Code: 0   - Success
20652 *		errno return code from sd_ssc_send()
20653 *
20654 *     Context: Can sleep. Does not return until command is completed.
20655 */
20656
20657static int
20658sd_send_scsi_INQUIRY(sd_ssc_t *ssc, uchar_t *bufaddr, size_t buflen,
20659	uchar_t evpd, uchar_t page_code, size_t *residp)
20660{
20661	union scsi_cdb		cdb;
20662	struct uscsi_cmd	ucmd_buf;
20663	int			status;
20664	struct sd_lun		*un;
20665
20666	ASSERT(ssc != NULL);
20667	un = ssc->ssc_un;
20668	ASSERT(un != NULL);
20669	ASSERT(!mutex_owned(SD_MUTEX(un)));
20670	ASSERT(bufaddr != NULL);
20671
20672	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: entry: un:0x%p\n", un);
20673
20674	bzero(&cdb, sizeof (cdb));
20675	bzero(&ucmd_buf, sizeof (ucmd_buf));
20676	bzero(bufaddr, buflen);
20677
20678	cdb.scc_cmd = SCMD_INQUIRY;
20679	cdb.cdb_opaque[1] = evpd;
20680	cdb.cdb_opaque[2] = page_code;
20681	FORMG0COUNT(&cdb, buflen);
20682
20683	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20684	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
20685	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
20686	ucmd_buf.uscsi_buflen	= buflen;
20687	ucmd_buf.uscsi_rqbuf	= NULL;
20688	ucmd_buf.uscsi_rqlen	= 0;
20689	ucmd_buf.uscsi_flags	= USCSI_READ | USCSI_SILENT;
20690	ucmd_buf.uscsi_timeout	= 200;	/* Excessive legacy value */
20691
20692	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
20693	    UIO_SYSSPACE, SD_PATH_DIRECT);
20694
20695	/*
20696	 * Only handle status == 0, the upper-level caller
20697	 * will put different assessment based on the context.
20698	 */
20699	if (status == 0)
20700		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
20701
20702	if ((status == 0) && (residp != NULL)) {
20703		*residp = ucmd_buf.uscsi_resid;
20704	}
20705
20706	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: exit\n");
20707
20708	return (status);
20709}
20710
20711
20712/*
20713 *    Function: sd_send_scsi_TEST_UNIT_READY
20714 *
20715 * Description: Issue the scsi TEST UNIT READY command.
20716 *		This routine can be told to set the flag USCSI_DIAGNOSE to
20717 *		prevent retrying failed commands. Use this when the intent
20718 *		is either to check for device readiness, to clear a Unit
20719 *		Attention, or to clear any outstanding sense data.
20720 *		However under specific conditions the expected behavior
20721 *		is for retries to bring a device ready, so use the flag
20722 *		with caution.
20723 *
20724 *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
20725 *                      structure for this target.
20726 *		flag:   SD_CHECK_FOR_MEDIA: return ENXIO if no media present
20727 *			SD_DONT_RETRY_TUR: include uscsi flag USCSI_DIAGNOSE.
20728 *			0: dont check for media present, do retries on cmd.
20729 *
20730 * Return Code: 0   - Success
20731 *		EIO - IO error
20732 *		EACCES - Reservation conflict detected
20733 *		ENXIO  - Not Ready, medium not present
20734 *		errno return code from sd_ssc_send()
20735 *
20736 *     Context: Can sleep. Does not return until command is completed.
20737 */
20738
20739static int
20740sd_send_scsi_TEST_UNIT_READY(sd_ssc_t *ssc, int flag)
20741{
20742	struct	scsi_extended_sense	sense_buf;
20743	union scsi_cdb		cdb;
20744	struct uscsi_cmd	ucmd_buf;
20745	int			status;
20746	struct sd_lun		*un;
20747
20748	ASSERT(ssc != NULL);
20749	un = ssc->ssc_un;
20750	ASSERT(un != NULL);
20751	ASSERT(!mutex_owned(SD_MUTEX(un)));
20752
20753	SD_TRACE(SD_LOG_IO, un,
20754	    "sd_send_scsi_TEST_UNIT_READY: entry: un:0x%p\n", un);
20755
20756	/*
20757	 * Some Seagate elite1 TQ devices get hung with disconnect/reconnect
20758	 * timeouts when they receive a TUR and the queue is not empty. Check
20759	 * the configuration flag set during attach (indicating the drive has
20760	 * this firmware bug) and un_ncmds_in_transport before issuing the
20761	 * TUR. If there are
20762	 * pending commands return success, this is a bit arbitrary but is ok
20763	 * for non-removables (i.e. the eliteI disks) and non-clustering
20764	 * configurations.
20765	 */
20766	if (un->un_f_cfg_tur_check == TRUE) {
20767		mutex_enter(SD_MUTEX(un));
20768		if (un->un_ncmds_in_transport != 0) {
20769			mutex_exit(SD_MUTEX(un));
20770			return (0);
20771		}
20772		mutex_exit(SD_MUTEX(un));
20773	}
20774
20775	bzero(&cdb, sizeof (cdb));
20776	bzero(&ucmd_buf, sizeof (ucmd_buf));
20777	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20778
20779	cdb.scc_cmd = SCMD_TEST_UNIT_READY;
20780
20781	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20782	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
20783	ucmd_buf.uscsi_bufaddr	= NULL;
20784	ucmd_buf.uscsi_buflen	= 0;
20785	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20786	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20787	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
20788
20789	/* Use flag USCSI_DIAGNOSE to prevent retries if it fails. */
20790	if ((flag & SD_DONT_RETRY_TUR) != 0) {
20791		ucmd_buf.uscsi_flags |= USCSI_DIAGNOSE;
20792	}
20793	ucmd_buf.uscsi_timeout	= 60;
20794
20795	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
20796	    UIO_SYSSPACE, ((flag & SD_BYPASS_PM) ? SD_PATH_DIRECT :
20797	    SD_PATH_STANDARD));
20798
20799	switch (status) {
20800	case 0:
20801		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
20802		break;	/* Success! */
20803	case EIO:
20804		switch (ucmd_buf.uscsi_status) {
20805		case STATUS_RESERVATION_CONFLICT:
20806			status = EACCES;
20807			break;
20808		case STATUS_CHECK:
20809			if ((flag & SD_CHECK_FOR_MEDIA) == 0) {
20810				break;
20811			}
20812			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20813			    (scsi_sense_key((uint8_t *)&sense_buf) ==
20814			    KEY_NOT_READY) &&
20815			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x3A)) {
20816				status = ENXIO;
20817			}
20818			break;
20819		default:
20820			break;
20821		}
20822		break;
20823	default:
20824		break;
20825	}
20826
20827	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_TEST_UNIT_READY: exit\n");
20828
20829	return (status);
20830}
20831
20832/*
20833 *    Function: sd_send_scsi_PERSISTENT_RESERVE_IN
20834 *
20835 * Description: Issue the scsi PERSISTENT RESERVE IN command.
20836 *
20837 *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
20838 *                      structure for this target.
20839 *
20840 * Return Code: 0   - Success
20841 *		EACCES
20842 *		ENOTSUP
20843 *		errno return code from sd_ssc_send()
20844 *
20845 *     Context: Can sleep. Does not return until command is completed.
20846 */
20847
20848static int
20849sd_send_scsi_PERSISTENT_RESERVE_IN(sd_ssc_t *ssc, uchar_t  usr_cmd,
20850	uint16_t data_len, uchar_t *data_bufp)
20851{
20852	struct scsi_extended_sense	sense_buf;
20853	union scsi_cdb		cdb;
20854	struct uscsi_cmd	ucmd_buf;
20855	int			status;
20856	int			no_caller_buf = FALSE;
20857	struct sd_lun		*un;
20858
20859	ASSERT(ssc != NULL);
20860	un = ssc->ssc_un;
20861	ASSERT(un != NULL);
20862	ASSERT(!mutex_owned(SD_MUTEX(un)));
20863	ASSERT((usr_cmd == SD_READ_KEYS) || (usr_cmd == SD_READ_RESV));
20864
20865	SD_TRACE(SD_LOG_IO, un,
20866	    "sd_send_scsi_PERSISTENT_RESERVE_IN: entry: un:0x%p\n", un);
20867
20868	bzero(&cdb, sizeof (cdb));
20869	bzero(&ucmd_buf, sizeof (ucmd_buf));
20870	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20871	if (data_bufp == NULL) {
20872		/* Allocate a default buf if the caller did not give one */
20873		ASSERT(data_len == 0);
20874		data_len  = MHIOC_RESV_KEY_SIZE;
20875		data_bufp = kmem_zalloc(MHIOC_RESV_KEY_SIZE, KM_SLEEP);
20876		no_caller_buf = TRUE;
20877	}
20878
20879	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_IN;
20880	cdb.cdb_opaque[1] = usr_cmd;
20881	FORMG1COUNT(&cdb, data_len);
20882
20883	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20884	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
20885	ucmd_buf.uscsi_bufaddr	= (caddr_t)data_bufp;
20886	ucmd_buf.uscsi_buflen	= data_len;
20887	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20888	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20889	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
20890	ucmd_buf.uscsi_timeout	= 60;
20891
20892	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
20893	    UIO_SYSSPACE, SD_PATH_STANDARD);
20894
20895	switch (status) {
20896	case 0:
20897		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
20898
20899		break;	/* Success! */
20900	case EIO:
20901		switch (ucmd_buf.uscsi_status) {
20902		case STATUS_RESERVATION_CONFLICT:
20903			status = EACCES;
20904			break;
20905		case STATUS_CHECK:
20906			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20907			    (scsi_sense_key((uint8_t *)&sense_buf) ==
20908			    KEY_ILLEGAL_REQUEST)) {
20909				status = ENOTSUP;
20910			}
20911			break;
20912		default:
20913			break;
20914		}
20915		break;
20916	default:
20917		break;
20918	}
20919
20920	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_IN: exit\n");
20921
20922	if (no_caller_buf == TRUE) {
20923		kmem_free(data_bufp, data_len);
20924	}
20925
20926	return (status);
20927}
20928
20929
20930/*
20931 *    Function: sd_send_scsi_PERSISTENT_RESERVE_OUT
20932 *
20933 * Description: This routine is the driver entry point for handling CD-ROM
20934 *		multi-host persistent reservation requests (MHIOCGRP_INKEYS,
20935 *		MHIOCGRP_INRESV) by sending the SCSI-3 PROUT commands to the
20936 *		device.
20937 *
20938 *   Arguments: ssc  -  ssc contains un - pointer to soft state struct
20939 *                      for the target.
20940 *		usr_cmd SCSI-3 reservation facility command (one of
20941 *			SD_SCSI3_REGISTER, SD_SCSI3_RESERVE, SD_SCSI3_RELEASE,
20942 *			SD_SCSI3_PREEMPTANDABORT)
20943 *		usr_bufp - user provided pointer register, reserve descriptor or
20944 *			preempt and abort structure (mhioc_register_t,
20945 *                      mhioc_resv_desc_t, mhioc_preemptandabort_t)
20946 *
20947 * Return Code: 0   - Success
20948 *		EACCES
20949 *		ENOTSUP
20950 *		errno return code from sd_ssc_send()
20951 *
20952 *     Context: Can sleep. Does not return until command is completed.
20953 */
20954
20955static int
20956sd_send_scsi_PERSISTENT_RESERVE_OUT(sd_ssc_t *ssc, uchar_t usr_cmd,
20957	uchar_t	*usr_bufp)
20958{
20959	struct scsi_extended_sense	sense_buf;
20960	union scsi_cdb		cdb;
20961	struct uscsi_cmd	ucmd_buf;
20962	int			status;
20963	uchar_t			data_len = sizeof (sd_prout_t);
20964	sd_prout_t		*prp;
20965	struct sd_lun		*un;
20966
20967	ASSERT(ssc != NULL);
20968	un = ssc->ssc_un;
20969	ASSERT(un != NULL);
20970	ASSERT(!mutex_owned(SD_MUTEX(un)));
20971	ASSERT(data_len == 24);	/* required by scsi spec */
20972
20973	SD_TRACE(SD_LOG_IO, un,
20974	    "sd_send_scsi_PERSISTENT_RESERVE_OUT: entry: un:0x%p\n", un);
20975
20976	if (usr_bufp == NULL) {
20977		return (EINVAL);
20978	}
20979
20980	bzero(&cdb, sizeof (cdb));
20981	bzero(&ucmd_buf, sizeof (ucmd_buf));
20982	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20983	prp = kmem_zalloc(data_len, KM_SLEEP);
20984
20985	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_OUT;
20986	cdb.cdb_opaque[1] = usr_cmd;
20987	FORMG1COUNT(&cdb, data_len);
20988
20989	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20990	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
20991	ucmd_buf.uscsi_bufaddr	= (caddr_t)prp;
20992	ucmd_buf.uscsi_buflen	= data_len;
20993	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20994	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20995	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
20996	ucmd_buf.uscsi_timeout	= 60;
20997
20998	switch (usr_cmd) {
20999	case SD_SCSI3_REGISTER: {
21000		mhioc_register_t *ptr = (mhioc_register_t *)usr_bufp;
21001
21002		bcopy(ptr->oldkey.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
21003		bcopy(ptr->newkey.key, prp->service_key,
21004		    MHIOC_RESV_KEY_SIZE);
21005		prp->aptpl = ptr->aptpl;
21006		break;
21007	}
21008	case SD_SCSI3_RESERVE:
21009	case SD_SCSI3_RELEASE: {
21010		mhioc_resv_desc_t *ptr = (mhioc_resv_desc_t *)usr_bufp;
21011
21012		bcopy(ptr->key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
21013		prp->scope_address = BE_32(ptr->scope_specific_addr);
21014		cdb.cdb_opaque[2] = ptr->type;
21015		break;
21016	}
21017	case SD_SCSI3_PREEMPTANDABORT: {
21018		mhioc_preemptandabort_t *ptr =
21019		    (mhioc_preemptandabort_t *)usr_bufp;
21020
21021		bcopy(ptr->resvdesc.key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
21022		bcopy(ptr->victim_key.key, prp->service_key,
21023		    MHIOC_RESV_KEY_SIZE);
21024		prp->scope_address = BE_32(ptr->resvdesc.scope_specific_addr);
21025		cdb.cdb_opaque[2] = ptr->resvdesc.type;
21026		ucmd_buf.uscsi_flags |= USCSI_HEAD;
21027		break;
21028	}
21029	case SD_SCSI3_REGISTERANDIGNOREKEY:
21030	{
21031		mhioc_registerandignorekey_t *ptr;
21032		ptr = (mhioc_registerandignorekey_t *)usr_bufp;
21033		bcopy(ptr->newkey.key,
21034		    prp->service_key, MHIOC_RESV_KEY_SIZE);
21035		prp->aptpl = ptr->aptpl;
21036		break;
21037	}
21038	default:
21039		ASSERT(FALSE);
21040		break;
21041	}
21042
21043	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
21044	    UIO_SYSSPACE, SD_PATH_STANDARD);
21045
21046	switch (status) {
21047	case 0:
21048		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
21049		break;	/* Success! */
21050	case EIO:
21051		switch (ucmd_buf.uscsi_status) {
21052		case STATUS_RESERVATION_CONFLICT:
21053			status = EACCES;
21054			break;
21055		case STATUS_CHECK:
21056			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
21057			    (scsi_sense_key((uint8_t *)&sense_buf) ==
21058			    KEY_ILLEGAL_REQUEST)) {
21059				status = ENOTSUP;
21060			}
21061			break;
21062		default:
21063			break;
21064		}
21065		break;
21066	default:
21067		break;
21068	}
21069
21070	kmem_free(prp, data_len);
21071	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_OUT: exit\n");
21072	return (status);
21073}
21074
21075
21076/*
21077 *    Function: sd_send_scsi_SYNCHRONIZE_CACHE
21078 *
21079 * Description: Issues a scsi SYNCHRONIZE CACHE command to the target
21080 *
21081 *   Arguments: un - pointer to the target's soft state struct
21082 *              dkc - pointer to the callback structure
21083 *
21084 * Return Code: 0 - success
21085 *		errno-type error code
21086 *
21087 *     Context: kernel thread context only.
21088 *
21089 *  _______________________________________________________________
21090 * | dkc_flag &   | dkc_callback | DKIOCFLUSHWRITECACHE            |
21091 * |FLUSH_VOLATILE|              | operation                       |
21092 * |______________|______________|_________________________________|
21093 * | 0            | NULL         | Synchronous flush on both       |
21094 * |              |              | volatile and non-volatile cache |
21095 * |______________|______________|_________________________________|
21096 * | 1            | NULL         | Synchronous flush on volatile   |
21097 * |              |              | cache; disk drivers may suppress|
21098 * |              |              | flush if disk table indicates   |
21099 * |              |              | non-volatile cache              |
21100 * |______________|______________|_________________________________|
21101 * | 0            | !NULL        | Asynchronous flush on both      |
21102 * |              |              | volatile and non-volatile cache;|
21103 * |______________|______________|_________________________________|
21104 * | 1            | !NULL        | Asynchronous flush on volatile  |
21105 * |              |              | cache; disk drivers may suppress|
21106 * |              |              | flush if disk table indicates   |
21107 * |              |              | non-volatile cache              |
21108 * |______________|______________|_________________________________|
21109 *
21110 */
21111
21112static int
21113sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un, struct dk_callback *dkc)
21114{
21115	struct sd_uscsi_info	*uip;
21116	struct uscsi_cmd	*uscmd;
21117	union scsi_cdb		*cdb;
21118	struct buf		*bp;
21119	int			rval = 0;
21120	int			is_async;
21121
21122	SD_TRACE(SD_LOG_IO, un,
21123	    "sd_send_scsi_SYNCHRONIZE_CACHE: entry: un:0x%p\n", un);
21124
21125	ASSERT(un != NULL);
21126	ASSERT(!mutex_owned(SD_MUTEX(un)));
21127
21128	if (dkc == NULL || dkc->dkc_callback == NULL) {
21129		is_async = FALSE;
21130	} else {
21131		is_async = TRUE;
21132	}
21133
21134	mutex_enter(SD_MUTEX(un));
21135	/* check whether cache flush should be suppressed */
21136	if (un->un_f_suppress_cache_flush == TRUE) {
21137		mutex_exit(SD_MUTEX(un));
21138		/*
21139		 * suppress the cache flush if the device is told to do
21140		 * so by sd.conf or disk table
21141		 */
21142		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_SYNCHRONIZE_CACHE: \
21143		    skip the cache flush since suppress_cache_flush is %d!\n",
21144		    un->un_f_suppress_cache_flush);
21145
21146		if (is_async == TRUE) {
21147			/* invoke callback for asynchronous flush */
21148			(*dkc->dkc_callback)(dkc->dkc_cookie, 0);
21149		}
21150		return (rval);
21151	}
21152	mutex_exit(SD_MUTEX(un));
21153
21154	/*
21155	 * check dkc_flag & FLUSH_VOLATILE so SYNC_NV bit can be
21156	 * set properly
21157	 */
21158	cdb = kmem_zalloc(CDB_GROUP1, KM_SLEEP);
21159	cdb->scc_cmd = SCMD_SYNCHRONIZE_CACHE;
21160
21161	mutex_enter(SD_MUTEX(un));
21162	if (dkc != NULL && un->un_f_sync_nv_supported &&
21163	    (dkc->dkc_flag & FLUSH_VOLATILE)) {
21164		/*
21165		 * if the device supports SYNC_NV bit, turn on
21166		 * the SYNC_NV bit to only flush volatile cache
21167		 */
21168		cdb->cdb_un.tag |= SD_SYNC_NV_BIT;
21169	}
21170	mutex_exit(SD_MUTEX(un));
21171
21172	/*
21173	 * First get some memory for the uscsi_cmd struct and cdb
21174	 * and initialize for SYNCHRONIZE_CACHE cmd.
21175	 */
21176	uscmd = kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
21177	uscmd->uscsi_cdblen = CDB_GROUP1;
21178	uscmd->uscsi_cdb = (caddr_t)cdb;
21179	uscmd->uscsi_bufaddr = NULL;
21180	uscmd->uscsi_buflen = 0;
21181	uscmd->uscsi_rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
21182	uscmd->uscsi_rqlen = SENSE_LENGTH;
21183	uscmd->uscsi_rqresid = SENSE_LENGTH;
21184	uscmd->uscsi_flags = USCSI_RQENABLE | USCSI_SILENT;
21185	uscmd->uscsi_timeout = sd_io_time;
21186
21187	/*
21188	 * Allocate an sd_uscsi_info struct and fill it with the info
21189	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
21190	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
21191	 * since we allocate the buf here in this function, we do not
21192	 * need to preserve the prior contents of b_private.
21193	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
21194	 */
21195	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
21196	uip->ui_flags = SD_PATH_DIRECT;
21197	uip->ui_cmdp  = uscmd;
21198
21199	bp = getrbuf(KM_SLEEP);
21200	bp->b_private = uip;
21201
21202	/*
21203	 * Setup buffer to carry uscsi request.
21204	 */
21205	bp->b_flags  = B_BUSY;
21206	bp->b_bcount = 0;
21207	bp->b_blkno  = 0;
21208
21209	if (is_async == TRUE) {
21210		bp->b_iodone = sd_send_scsi_SYNCHRONIZE_CACHE_biodone;
21211		uip->ui_dkc = *dkc;
21212	}
21213
21214	bp->b_edev = SD_GET_DEV(un);
21215	bp->b_dev = cmpdev(bp->b_edev);	/* maybe unnecessary? */
21216
21217	/*
21218	 * Unset un_f_sync_cache_required flag
21219	 */
21220	mutex_enter(SD_MUTEX(un));
21221	un->un_f_sync_cache_required = FALSE;
21222	mutex_exit(SD_MUTEX(un));
21223
21224	(void) sd_uscsi_strategy(bp);
21225
21226	/*
21227	 * If synchronous request, wait for completion
21228	 * If async just return and let b_iodone callback
21229	 * cleanup.
21230	 * NOTE: On return, u_ncmds_in_driver will be decremented,
21231	 * but it was also incremented in sd_uscsi_strategy(), so
21232	 * we should be ok.
21233	 */
21234	if (is_async == FALSE) {
21235		(void) biowait(bp);
21236		rval = sd_send_scsi_SYNCHRONIZE_CACHE_biodone(bp);
21237	}
21238
21239	return (rval);
21240}
21241
21242
21243static int
21244sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp)
21245{
21246	struct sd_uscsi_info *uip;
21247	struct uscsi_cmd *uscmd;
21248	uint8_t *sense_buf;
21249	struct sd_lun *un;
21250	int status;
21251	union scsi_cdb *cdb;
21252
21253	uip = (struct sd_uscsi_info *)(bp->b_private);
21254	ASSERT(uip != NULL);
21255
21256	uscmd = uip->ui_cmdp;
21257	ASSERT(uscmd != NULL);
21258
21259	sense_buf = (uint8_t *)uscmd->uscsi_rqbuf;
21260	ASSERT(sense_buf != NULL);
21261
21262	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
21263	ASSERT(un != NULL);
21264
21265	cdb = (union scsi_cdb *)uscmd->uscsi_cdb;
21266
21267	status = geterror(bp);
21268	switch (status) {
21269	case 0:
21270		break;	/* Success! */
21271	case EIO:
21272		switch (uscmd->uscsi_status) {
21273		case STATUS_RESERVATION_CONFLICT:
21274			/* Ignore reservation conflict */
21275			status = 0;
21276			goto done;
21277
21278		case STATUS_CHECK:
21279			if ((uscmd->uscsi_rqstatus == STATUS_GOOD) &&
21280			    (scsi_sense_key(sense_buf) ==
21281			    KEY_ILLEGAL_REQUEST)) {
21282				/* Ignore Illegal Request error */
21283				if (cdb->cdb_un.tag&SD_SYNC_NV_BIT) {
21284					mutex_enter(SD_MUTEX(un));
21285					un->un_f_sync_nv_supported = FALSE;
21286					mutex_exit(SD_MUTEX(un));
21287					status = 0;
21288					SD_TRACE(SD_LOG_IO, un,
21289					    "un_f_sync_nv_supported \
21290					    is set to false.\n");
21291					goto done;
21292				}
21293
21294				mutex_enter(SD_MUTEX(un));
21295				un->un_f_sync_cache_supported = FALSE;
21296				mutex_exit(SD_MUTEX(un));
21297				SD_TRACE(SD_LOG_IO, un,
21298				    "sd_send_scsi_SYNCHRONIZE_CACHE_biodone: \
21299				    un_f_sync_cache_supported set to false \
21300				    with asc = %x, ascq = %x\n",
21301				    scsi_sense_asc(sense_buf),
21302				    scsi_sense_ascq(sense_buf));
21303				status = ENOTSUP;
21304				goto done;
21305			}
21306			break;
21307		default:
21308			break;
21309		}
21310		/* FALLTHRU */
21311	default:
21312		/*
21313		 * Turn on the un_f_sync_cache_required flag
21314		 * since the SYNC CACHE command failed
21315		 */
21316		mutex_enter(SD_MUTEX(un));
21317		un->un_f_sync_cache_required = TRUE;
21318		mutex_exit(SD_MUTEX(un));
21319
21320		/*
21321		 * Don't log an error message if this device
21322		 * has removable media.
21323		 */
21324		if (!un->un_f_has_removable_media) {
21325			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
21326			    "SYNCHRONIZE CACHE command failed (%d)\n", status);
21327		}
21328		break;
21329	}
21330
21331done:
21332	if (uip->ui_dkc.dkc_callback != NULL) {
21333		(*uip->ui_dkc.dkc_callback)(uip->ui_dkc.dkc_cookie, status);
21334	}
21335
21336	ASSERT((bp->b_flags & B_REMAPPED) == 0);
21337	freerbuf(bp);
21338	kmem_free(uip, sizeof (struct sd_uscsi_info));
21339	kmem_free(uscmd->uscsi_rqbuf, SENSE_LENGTH);
21340	kmem_free(uscmd->uscsi_cdb, (size_t)uscmd->uscsi_cdblen);
21341	kmem_free(uscmd, sizeof (struct uscsi_cmd));
21342
21343	return (status);
21344}
21345
21346
21347/*
21348 *    Function: sd_send_scsi_GET_CONFIGURATION
21349 *
21350 * Description: Issues the get configuration command to the device.
21351 *		Called from sd_check_for_writable_cd & sd_get_media_info
21352 *		caller needs to ensure that buflen = SD_PROFILE_HEADER_LEN
21353 *   Arguments: ssc
21354 *		ucmdbuf
21355 *		rqbuf
21356 *		rqbuflen
21357 *		bufaddr
21358 *		buflen
21359 *		path_flag
21360 *
21361 * Return Code: 0   - Success
21362 *		errno return code from sd_ssc_send()
21363 *
21364 *     Context: Can sleep. Does not return until command is completed.
21365 *
21366 */
21367
21368static int
21369sd_send_scsi_GET_CONFIGURATION(sd_ssc_t *ssc, struct uscsi_cmd *ucmdbuf,
21370	uchar_t *rqbuf, uint_t rqbuflen, uchar_t *bufaddr, uint_t buflen,
21371	int path_flag)
21372{
21373	char	cdb[CDB_GROUP1];
21374	int	status;
21375	struct sd_lun	*un;
21376
21377	ASSERT(ssc != NULL);
21378	un = ssc->ssc_un;
21379	ASSERT(un != NULL);
21380	ASSERT(!mutex_owned(SD_MUTEX(un)));
21381	ASSERT(bufaddr != NULL);
21382	ASSERT(ucmdbuf != NULL);
21383	ASSERT(rqbuf != NULL);
21384
21385	SD_TRACE(SD_LOG_IO, un,
21386	    "sd_send_scsi_GET_CONFIGURATION: entry: un:0x%p\n", un);
21387
21388	bzero(cdb, sizeof (cdb));
21389	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
21390	bzero(rqbuf, rqbuflen);
21391	bzero(bufaddr, buflen);
21392
21393	/*
21394	 * Set up cdb field for the get configuration command.
21395	 */
21396	cdb[0] = SCMD_GET_CONFIGURATION;
21397	cdb[1] = 0x02;  /* Requested Type */
21398	cdb[8] = SD_PROFILE_HEADER_LEN;
21399	ucmdbuf->uscsi_cdb = cdb;
21400	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
21401	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
21402	ucmdbuf->uscsi_buflen = buflen;
21403	ucmdbuf->uscsi_timeout = sd_io_time;
21404	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
21405	ucmdbuf->uscsi_rqlen = rqbuflen;
21406	ucmdbuf->uscsi_flags = USCSI_RQENABLE|USCSI_SILENT|USCSI_READ;
21407
21408	status = sd_ssc_send(ssc, ucmdbuf, FKIOCTL,
21409	    UIO_SYSSPACE, path_flag);
21410
21411	switch (status) {
21412	case 0:
21413		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
21414		break;  /* Success! */
21415	case EIO:
21416		switch (ucmdbuf->uscsi_status) {
21417		case STATUS_RESERVATION_CONFLICT:
21418			status = EACCES;
21419			break;
21420		default:
21421			break;
21422		}
21423		break;
21424	default:
21425		break;
21426	}
21427
21428	if (status == 0) {
21429		SD_DUMP_MEMORY(un, SD_LOG_IO,
21430		    "sd_send_scsi_GET_CONFIGURATION: data",
21431		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
21432	}
21433
21434	SD_TRACE(SD_LOG_IO, un,
21435	    "sd_send_scsi_GET_CONFIGURATION: exit\n");
21436
21437	return (status);
21438}
21439
21440/*
21441 *    Function: sd_send_scsi_feature_GET_CONFIGURATION
21442 *
21443 * Description: Issues the get configuration command to the device to
21444 *              retrieve a specific feature. Called from
21445 *		sd_check_for_writable_cd & sd_set_mmc_caps.
21446 *   Arguments: ssc
21447 *              ucmdbuf
21448 *              rqbuf
21449 *              rqbuflen
21450 *              bufaddr
21451 *              buflen
21452 *		feature
21453 *
21454 * Return Code: 0   - Success
21455 *              errno return code from sd_ssc_send()
21456 *
21457 *     Context: Can sleep. Does not return until command is completed.
21458 *
21459 */
21460static int
21461sd_send_scsi_feature_GET_CONFIGURATION(sd_ssc_t *ssc,
21462	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
21463	uchar_t *bufaddr, uint_t buflen, char feature, int path_flag)
21464{
21465	char    cdb[CDB_GROUP1];
21466	int	status;
21467	struct sd_lun	*un;
21468
21469	ASSERT(ssc != NULL);
21470	un = ssc->ssc_un;
21471	ASSERT(un != NULL);
21472	ASSERT(!mutex_owned(SD_MUTEX(un)));
21473	ASSERT(bufaddr != NULL);
21474	ASSERT(ucmdbuf != NULL);
21475	ASSERT(rqbuf != NULL);
21476
21477	SD_TRACE(SD_LOG_IO, un,
21478	    "sd_send_scsi_feature_GET_CONFIGURATION: entry: un:0x%p\n", un);
21479
21480	bzero(cdb, sizeof (cdb));
21481	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
21482	bzero(rqbuf, rqbuflen);
21483	bzero(bufaddr, buflen);
21484
21485	/*
21486	 * Set up cdb field for the get configuration command.
21487	 */
21488	cdb[0] = SCMD_GET_CONFIGURATION;
21489	cdb[1] = 0x02;  /* Requested Type */
21490	cdb[3] = feature;
21491	cdb[8] = buflen;
21492	ucmdbuf->uscsi_cdb = cdb;
21493	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
21494	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
21495	ucmdbuf->uscsi_buflen = buflen;
21496	ucmdbuf->uscsi_timeout = sd_io_time;
21497	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
21498	ucmdbuf->uscsi_rqlen = rqbuflen;
21499	ucmdbuf->uscsi_flags = USCSI_RQENABLE|USCSI_SILENT|USCSI_READ;
21500
21501	status = sd_ssc_send(ssc, ucmdbuf, FKIOCTL,
21502	    UIO_SYSSPACE, path_flag);
21503
21504	switch (status) {
21505	case 0:
21506
21507		break;  /* Success! */
21508	case EIO:
21509		switch (ucmdbuf->uscsi_status) {
21510		case STATUS_RESERVATION_CONFLICT:
21511			status = EACCES;
21512			break;
21513		default:
21514			break;
21515		}
21516		break;
21517	default:
21518		break;
21519	}
21520
21521	if (status == 0) {
21522		SD_DUMP_MEMORY(un, SD_LOG_IO,
21523		    "sd_send_scsi_feature_GET_CONFIGURATION: data",
21524		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
21525	}
21526
21527	SD_TRACE(SD_LOG_IO, un,
21528	    "sd_send_scsi_feature_GET_CONFIGURATION: exit\n");
21529
21530	return (status);
21531}
21532
21533
21534/*
21535 *    Function: sd_send_scsi_MODE_SENSE
21536 *
21537 * Description: Utility function for issuing a scsi MODE SENSE command.
21538 *		Note: This routine uses a consistent implementation for Group0,
21539 *		Group1, and Group2 commands across all platforms. ATAPI devices
21540 *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
21541 *
21542 *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
21543 *                      structure for this target.
21544 *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
21545 *			  CDB_GROUP[1|2] (10 byte).
21546 *		bufaddr - buffer for page data retrieved from the target.
21547 *		buflen - size of page to be retrieved.
21548 *		page_code - page code of data to be retrieved from the target.
21549 *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
21550 *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
21551 *			to use the USCSI "direct" chain and bypass the normal
21552 *			command waitq.
21553 *
21554 * Return Code: 0   - Success
21555 *		errno return code from sd_ssc_send()
21556 *
21557 *     Context: Can sleep. Does not return until command is completed.
21558 */
21559
21560static int
21561sd_send_scsi_MODE_SENSE(sd_ssc_t *ssc, int cdbsize, uchar_t *bufaddr,
21562	size_t buflen,  uchar_t page_code, int path_flag)
21563{
21564	struct	scsi_extended_sense	sense_buf;
21565	union scsi_cdb		cdb;
21566	struct uscsi_cmd	ucmd_buf;
21567	int			status;
21568	int			headlen;
21569	struct sd_lun		*un;
21570
21571	ASSERT(ssc != NULL);
21572	un = ssc->ssc_un;
21573	ASSERT(un != NULL);
21574	ASSERT(!mutex_owned(SD_MUTEX(un)));
21575	ASSERT(bufaddr != NULL);
21576	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
21577	    (cdbsize == CDB_GROUP2));
21578
21579	SD_TRACE(SD_LOG_IO, un,
21580	    "sd_send_scsi_MODE_SENSE: entry: un:0x%p\n", un);
21581
21582	bzero(&cdb, sizeof (cdb));
21583	bzero(&ucmd_buf, sizeof (ucmd_buf));
21584	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
21585	bzero(bufaddr, buflen);
21586
21587	if (cdbsize == CDB_GROUP0) {
21588		cdb.scc_cmd = SCMD_MODE_SENSE;
21589		cdb.cdb_opaque[2] = page_code;
21590		FORMG0COUNT(&cdb, buflen);
21591		headlen = MODE_HEADER_LENGTH;
21592	} else {
21593		cdb.scc_cmd = SCMD_MODE_SENSE_G1;
21594		cdb.cdb_opaque[2] = page_code;
21595		FORMG1COUNT(&cdb, buflen);
21596		headlen = MODE_HEADER_LENGTH_GRP2;
21597	}
21598
21599	ASSERT(headlen <= buflen);
21600	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
21601
21602	ucmd_buf.uscsi_cdb	= (char *)&cdb;
21603	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
21604	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
21605	ucmd_buf.uscsi_buflen	= buflen;
21606	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
21607	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
21608	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
21609	ucmd_buf.uscsi_timeout	= 60;
21610
21611	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
21612	    UIO_SYSSPACE, path_flag);
21613
21614	switch (status) {
21615	case 0:
21616		/*
21617		 * sr_check_wp() uses 0x3f page code and check the header of
21618		 * mode page to determine if target device is write-protected.
21619		 * But some USB devices return 0 bytes for 0x3f page code. For
21620		 * this case, make sure that mode page header is returned at
21621		 * least.
21622		 */
21623		if (buflen - ucmd_buf.uscsi_resid <  headlen) {
21624			status = EIO;
21625			sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
21626			    "mode page header is not returned");
21627		}
21628		break;	/* Success! */
21629	case EIO:
21630		switch (ucmd_buf.uscsi_status) {
21631		case STATUS_RESERVATION_CONFLICT:
21632			status = EACCES;
21633			break;
21634		default:
21635			break;
21636		}
21637		break;
21638	default:
21639		break;
21640	}
21641
21642	if (status == 0) {
21643		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SENSE: data",
21644		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
21645	}
21646	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SENSE: exit\n");
21647
21648	return (status);
21649}
21650
21651
21652/*
21653 *    Function: sd_send_scsi_MODE_SELECT
21654 *
21655 * Description: Utility function for issuing a scsi MODE SELECT command.
21656 *		Note: This routine uses a consistent implementation for Group0,
21657 *		Group1, and Group2 commands across all platforms. ATAPI devices
21658 *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
21659 *
21660 *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
21661 *                      structure for this target.
21662 *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
21663 *			  CDB_GROUP[1|2] (10 byte).
21664 *		bufaddr - buffer for page data retrieved from the target.
21665 *		buflen - size of page to be retrieved.
21666 *		save_page - boolean to determin if SP bit should be set.
21667 *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
21668 *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
21669 *			to use the USCSI "direct" chain and bypass the normal
21670 *			command waitq.
21671 *
21672 * Return Code: 0   - Success
21673 *		errno return code from sd_ssc_send()
21674 *
21675 *     Context: Can sleep. Does not return until command is completed.
21676 */
21677
21678static int
21679sd_send_scsi_MODE_SELECT(sd_ssc_t *ssc, int cdbsize, uchar_t *bufaddr,
21680	size_t buflen,  uchar_t save_page, int path_flag)
21681{
21682	struct	scsi_extended_sense	sense_buf;
21683	union scsi_cdb		cdb;
21684	struct uscsi_cmd	ucmd_buf;
21685	int			status;
21686	struct sd_lun		*un;
21687
21688	ASSERT(ssc != NULL);
21689	un = ssc->ssc_un;
21690	ASSERT(un != NULL);
21691	ASSERT(!mutex_owned(SD_MUTEX(un)));
21692	ASSERT(bufaddr != NULL);
21693	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
21694	    (cdbsize == CDB_GROUP2));
21695
21696	SD_TRACE(SD_LOG_IO, un,
21697	    "sd_send_scsi_MODE_SELECT: entry: un:0x%p\n", un);
21698
21699	bzero(&cdb, sizeof (cdb));
21700	bzero(&ucmd_buf, sizeof (ucmd_buf));
21701	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
21702
21703	/* Set the PF bit for many third party drives */
21704	cdb.cdb_opaque[1] = 0x10;
21705
21706	/* Set the savepage(SP) bit if given */
21707	if (save_page == SD_SAVE_PAGE) {
21708		cdb.cdb_opaque[1] |= 0x01;
21709	}
21710
21711	if (cdbsize == CDB_GROUP0) {
21712		cdb.scc_cmd = SCMD_MODE_SELECT;
21713		FORMG0COUNT(&cdb, buflen);
21714	} else {
21715		cdb.scc_cmd = SCMD_MODE_SELECT_G1;
21716		FORMG1COUNT(&cdb, buflen);
21717	}
21718
21719	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
21720
21721	ucmd_buf.uscsi_cdb	= (char *)&cdb;
21722	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
21723	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
21724	ucmd_buf.uscsi_buflen	= buflen;
21725	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
21726	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
21727	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
21728	ucmd_buf.uscsi_timeout	= 60;
21729
21730	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
21731	    UIO_SYSSPACE, path_flag);
21732
21733	switch (status) {
21734	case 0:
21735		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
21736		break;	/* Success! */
21737	case EIO:
21738		switch (ucmd_buf.uscsi_status) {
21739		case STATUS_RESERVATION_CONFLICT:
21740			status = EACCES;
21741			break;
21742		default:
21743			break;
21744		}
21745		break;
21746	default:
21747		break;
21748	}
21749
21750	if (status == 0) {
21751		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SELECT: data",
21752		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
21753	}
21754	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SELECT: exit\n");
21755
21756	return (status);
21757}
21758
21759
21760/*
21761 *    Function: sd_send_scsi_RDWR
21762 *
21763 * Description: Issue a scsi READ or WRITE command with the given parameters.
21764 *
21765 *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
21766 *                      structure for this target.
21767 *		cmd:	 SCMD_READ or SCMD_WRITE
21768 *		bufaddr: Address of caller's buffer to receive the RDWR data
21769 *		buflen:  Length of caller's buffer receive the RDWR data.
21770 *		start_block: Block number for the start of the RDWR operation.
21771 *			 (Assumes target-native block size.)
21772 *		residp:  Pointer to variable to receive the redisual of the
21773 *			 RDWR operation (may be NULL of no residual requested).
21774 *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
21775 *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
21776 *			to use the USCSI "direct" chain and bypass the normal
21777 *			command waitq.
21778 *
21779 * Return Code: 0   - Success
21780 *		errno return code from sd_ssc_send()
21781 *
21782 *     Context: Can sleep. Does not return until command is completed.
21783 */
21784
21785static int
21786sd_send_scsi_RDWR(sd_ssc_t *ssc, uchar_t cmd, void *bufaddr,
21787	size_t buflen, daddr_t start_block, int path_flag)
21788{
21789	struct	scsi_extended_sense	sense_buf;
21790	union scsi_cdb		cdb;
21791	struct uscsi_cmd	ucmd_buf;
21792	uint32_t		block_count;
21793	int			status;
21794	int			cdbsize;
21795	uchar_t			flag;
21796	struct sd_lun		*un;
21797
21798	ASSERT(ssc != NULL);
21799	un = ssc->ssc_un;
21800	ASSERT(un != NULL);
21801	ASSERT(!mutex_owned(SD_MUTEX(un)));
21802	ASSERT(bufaddr != NULL);
21803	ASSERT((cmd == SCMD_READ) || (cmd == SCMD_WRITE));
21804
21805	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: entry: un:0x%p\n", un);
21806
21807	if (un->un_f_tgt_blocksize_is_valid != TRUE) {
21808		return (EINVAL);
21809	}
21810
21811	mutex_enter(SD_MUTEX(un));
21812	block_count = SD_BYTES2TGTBLOCKS(un, buflen);
21813	mutex_exit(SD_MUTEX(un));
21814
21815	flag = (cmd == SCMD_READ) ? USCSI_READ : USCSI_WRITE;
21816
21817	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_RDWR: "
21818	    "bufaddr:0x%p buflen:0x%x start_block:0x%p block_count:0x%x\n",
21819	    bufaddr, buflen, start_block, block_count);
21820
21821	bzero(&cdb, sizeof (cdb));
21822	bzero(&ucmd_buf, sizeof (ucmd_buf));
21823	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
21824
21825	/* Compute CDB size to use */
21826	if (start_block > 0xffffffff)
21827		cdbsize = CDB_GROUP4;
21828	else if ((start_block & 0xFFE00000) ||
21829	    (un->un_f_cfg_is_atapi == TRUE))
21830		cdbsize = CDB_GROUP1;
21831	else
21832		cdbsize = CDB_GROUP0;
21833
21834	switch (cdbsize) {
21835	case CDB_GROUP0:	/* 6-byte CDBs */
21836		cdb.scc_cmd = cmd;
21837		FORMG0ADDR(&cdb, start_block);
21838		FORMG0COUNT(&cdb, block_count);
21839		break;
21840	case CDB_GROUP1:	/* 10-byte CDBs */
21841		cdb.scc_cmd = cmd | SCMD_GROUP1;
21842		FORMG1ADDR(&cdb, start_block);
21843		FORMG1COUNT(&cdb, block_count);
21844		break;
21845	case CDB_GROUP4:	/* 16-byte CDBs */
21846		cdb.scc_cmd = cmd | SCMD_GROUP4;
21847		FORMG4LONGADDR(&cdb, (uint64_t)start_block);
21848		FORMG4COUNT(&cdb, block_count);
21849		break;
21850	case CDB_GROUP5:	/* 12-byte CDBs (currently unsupported) */
21851	default:
21852		/* All others reserved */
21853		return (EINVAL);
21854	}
21855
21856	/* Set LUN bit(s) in CDB if this is a SCSI-1 device */
21857	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
21858
21859	ucmd_buf.uscsi_cdb	= (char *)&cdb;
21860	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
21861	ucmd_buf.uscsi_bufaddr	= bufaddr;
21862	ucmd_buf.uscsi_buflen	= buflen;
21863	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
21864	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
21865	ucmd_buf.uscsi_flags	= flag | USCSI_RQENABLE | USCSI_SILENT;
21866	ucmd_buf.uscsi_timeout	= 60;
21867	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
21868	    UIO_SYSSPACE, path_flag);
21869
21870	switch (status) {
21871	case 0:
21872		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
21873		break;	/* Success! */
21874	case EIO:
21875		switch (ucmd_buf.uscsi_status) {
21876		case STATUS_RESERVATION_CONFLICT:
21877			status = EACCES;
21878			break;
21879		default:
21880			break;
21881		}
21882		break;
21883	default:
21884		break;
21885	}
21886
21887	if (status == 0) {
21888		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_RDWR: data",
21889		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
21890	}
21891
21892	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: exit\n");
21893
21894	return (status);
21895}
21896
21897
21898/*
21899 *    Function: sd_send_scsi_LOG_SENSE
21900 *
21901 * Description: Issue a scsi LOG_SENSE command with the given parameters.
21902 *
21903 *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
21904 *                      structure for this target.
21905 *
21906 * Return Code: 0   - Success
21907 *		errno return code from sd_ssc_send()
21908 *
21909 *     Context: Can sleep. Does not return until command is completed.
21910 */
21911
21912static int
21913sd_send_scsi_LOG_SENSE(sd_ssc_t *ssc, uchar_t *bufaddr, uint16_t buflen,
21914	uchar_t page_code, uchar_t page_control, uint16_t param_ptr,
21915	int path_flag)
21916
21917{
21918	struct scsi_extended_sense	sense_buf;
21919	union scsi_cdb		cdb;
21920	struct uscsi_cmd	ucmd_buf;
21921	int			status;
21922	struct sd_lun		*un;
21923
21924	ASSERT(ssc != NULL);
21925	un = ssc->ssc_un;
21926	ASSERT(un != NULL);
21927	ASSERT(!mutex_owned(SD_MUTEX(un)));
21928
21929	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: entry: un:0x%p\n", un);
21930
21931	bzero(&cdb, sizeof (cdb));
21932	bzero(&ucmd_buf, sizeof (ucmd_buf));
21933	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
21934
21935	cdb.scc_cmd = SCMD_LOG_SENSE_G1;
21936	cdb.cdb_opaque[2] = (page_control << 6) | page_code;
21937	cdb.cdb_opaque[5] = (uchar_t)((param_ptr & 0xFF00) >> 8);
21938	cdb.cdb_opaque[6] = (uchar_t)(param_ptr  & 0x00FF);
21939	FORMG1COUNT(&cdb, buflen);
21940
21941	ucmd_buf.uscsi_cdb	= (char *)&cdb;
21942	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
21943	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
21944	ucmd_buf.uscsi_buflen	= buflen;
21945	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
21946	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
21947	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
21948	ucmd_buf.uscsi_timeout	= 60;
21949
21950	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
21951	    UIO_SYSSPACE, path_flag);
21952
21953	switch (status) {
21954	case 0:
21955		break;
21956	case EIO:
21957		switch (ucmd_buf.uscsi_status) {
21958		case STATUS_RESERVATION_CONFLICT:
21959			status = EACCES;
21960			break;
21961		case STATUS_CHECK:
21962			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
21963			    (scsi_sense_key((uint8_t *)&sense_buf) ==
21964				KEY_ILLEGAL_REQUEST) &&
21965			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x24)) {
21966				/*
21967				 * ASC 0x24: INVALID FIELD IN CDB
21968				 */
21969				switch (page_code) {
21970				case START_STOP_CYCLE_PAGE:
21971					/*
21972					 * The start stop cycle counter is
21973					 * implemented as page 0x31 in earlier
21974					 * generation disks. In new generation
21975					 * disks the start stop cycle counter is
21976					 * implemented as page 0xE. To properly
21977					 * handle this case if an attempt for
21978					 * log page 0xE is made and fails we
21979					 * will try again using page 0x31.
21980					 *
21981					 * Network storage BU committed to
21982					 * maintain the page 0x31 for this
21983					 * purpose and will not have any other
21984					 * page implemented with page code 0x31
21985					 * until all disks transition to the
21986					 * standard page.
21987					 */
21988					mutex_enter(SD_MUTEX(un));
21989					un->un_start_stop_cycle_page =
21990					    START_STOP_CYCLE_VU_PAGE;
21991					cdb.cdb_opaque[2] =
21992					    (char)(page_control << 6) |
21993					    un->un_start_stop_cycle_page;
21994					mutex_exit(SD_MUTEX(un));
21995					sd_ssc_assessment(ssc, SD_FMT_IGNORE);
21996					status = sd_ssc_send(
21997					    ssc, &ucmd_buf, FKIOCTL,
21998					    UIO_SYSSPACE, path_flag);
21999
22000					break;
22001				case TEMPERATURE_PAGE:
22002					status = ENOTTY;
22003					break;
22004				default:
22005					break;
22006				}
22007			}
22008			break;
22009		default:
22010			break;
22011		}
22012		break;
22013	default:
22014		break;
22015	}
22016
22017	if (status == 0) {
22018		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
22019		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_LOG_SENSE: data",
22020		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
22021	}
22022
22023	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: exit\n");
22024
22025	return (status);
22026}
22027
22028
22029/*
22030 *    Function: sd_send_scsi_GET_EVENT_STATUS_NOTIFICATION
22031 *
22032 * Description: Issue the scsi GET EVENT STATUS NOTIFICATION command.
22033 *
22034 *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
22035 *                      structure for this target.
22036 *		bufaddr
22037 *		buflen
22038 *		class_req
22039 *
22040 * Return Code: 0   - Success
22041 *		errno return code from sd_ssc_send()
22042 *
22043 *     Context: Can sleep. Does not return until command is completed.
22044 */
22045
22046static int
22047sd_send_scsi_GET_EVENT_STATUS_NOTIFICATION(sd_ssc_t *ssc, uchar_t *bufaddr,
22048	size_t buflen, uchar_t class_req)
22049{
22050	union scsi_cdb		cdb;
22051	struct uscsi_cmd	ucmd_buf;
22052	int			status;
22053	struct sd_lun		*un;
22054
22055	ASSERT(ssc != NULL);
22056	un = ssc->ssc_un;
22057	ASSERT(un != NULL);
22058	ASSERT(!mutex_owned(SD_MUTEX(un)));
22059	ASSERT(bufaddr != NULL);
22060
22061	SD_TRACE(SD_LOG_IO, un,
22062	    "sd_send_scsi_GET_EVENT_STATUS_NOTIFICATION: entry: un:0x%p\n", un);
22063
22064	bzero(&cdb, sizeof (cdb));
22065	bzero(&ucmd_buf, sizeof (ucmd_buf));
22066	bzero(bufaddr, buflen);
22067
22068	cdb.scc_cmd = SCMD_GET_EVENT_STATUS_NOTIFICATION;
22069	cdb.cdb_opaque[1] = 1; /* polled */
22070	cdb.cdb_opaque[4] = class_req;
22071	FORMG1COUNT(&cdb, buflen);
22072
22073	ucmd_buf.uscsi_cdb	= (char *)&cdb;
22074	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
22075	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
22076	ucmd_buf.uscsi_buflen	= buflen;
22077	ucmd_buf.uscsi_rqbuf	= NULL;
22078	ucmd_buf.uscsi_rqlen	= 0;
22079	ucmd_buf.uscsi_flags	= USCSI_READ | USCSI_SILENT;
22080	ucmd_buf.uscsi_timeout	= 60;
22081
22082	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
22083	    UIO_SYSSPACE, SD_PATH_DIRECT);
22084
22085	/*
22086	 * Only handle status == 0, the upper-level caller
22087	 * will put different assessment based on the context.
22088	 */
22089	if (status == 0) {
22090		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
22091
22092		if (ucmd_buf.uscsi_resid != 0) {
22093			status = EIO;
22094		}
22095	}
22096
22097	SD_TRACE(SD_LOG_IO, un,
22098	    "sd_send_scsi_GET_EVENT_STATUS_NOTIFICATION: exit\n");
22099
22100	return (status);
22101}
22102
22103
22104static boolean_t
22105sd_gesn_media_data_valid(uchar_t *data)
22106{
22107	uint16_t			len;
22108
22109	len = (data[1] << 8) | data[0];
22110	return ((len >= 6) &&
22111	    ((data[2] & SD_GESN_HEADER_NEA) == 0) &&
22112	    ((data[2] & SD_GESN_HEADER_CLASS) == SD_GESN_MEDIA_CLASS) &&
22113	    ((data[3] & (1 << SD_GESN_MEDIA_CLASS)) != 0));
22114}
22115
22116
22117/*
22118 *    Function: sdioctl
22119 *
22120 * Description: Driver's ioctl(9e) entry point function.
22121 *
22122 *   Arguments: dev     - device number
22123 *		cmd     - ioctl operation to be performed
22124 *		arg     - user argument, contains data to be set or reference
22125 *			  parameter for get
22126 *		flag    - bit flag, indicating open settings, 32/64 bit type
22127 *		cred_p  - user credential pointer
22128 *		rval_p  - calling process return value (OPT)
22129 *
22130 * Return Code: EINVAL
22131 *		ENOTTY
22132 *		ENXIO
22133 *		EIO
22134 *		EFAULT
22135 *		ENOTSUP
22136 *		EPERM
22137 *
22138 *     Context: Called from the device switch at normal priority.
22139 */
22140
22141static int
22142sdioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cred_p, int *rval_p)
22143{
22144	struct sd_lun	*un = NULL;
22145	int		err = 0;
22146	int		i = 0;
22147	cred_t		*cr;
22148	int		tmprval = EINVAL;
22149	boolean_t	is_valid;
22150	sd_ssc_t	*ssc;
22151
22152	/*
22153	 * All device accesses go thru sdstrategy where we check on suspend
22154	 * status
22155	 */
22156	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22157		return (ENXIO);
22158	}
22159
22160	ASSERT(!mutex_owned(SD_MUTEX(un)));
22161
22162	/* Initialize sd_ssc_t for internal uscsi commands */
22163	ssc = sd_ssc_init(un);
22164
22165	is_valid = SD_IS_VALID_LABEL(un);
22166
22167	/*
22168	 * Moved this wait from sd_uscsi_strategy to here for
22169	 * reasons of deadlock prevention. Internal driver commands,
22170	 * specifically those to change a devices power level, result
22171	 * in a call to sd_uscsi_strategy.
22172	 */
22173	mutex_enter(SD_MUTEX(un));
22174	while ((un->un_state == SD_STATE_SUSPENDED) ||
22175	    (un->un_state == SD_STATE_PM_CHANGING)) {
22176		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
22177	}
22178	/*
22179	 * Twiddling the counter here protects commands from now
22180	 * through to the top of sd_uscsi_strategy. Without the
22181	 * counter inc. a power down, for example, could get in
22182	 * after the above check for state is made and before
22183	 * execution gets to the top of sd_uscsi_strategy.
22184	 * That would cause problems.
22185	 */
22186	un->un_ncmds_in_driver++;
22187
22188	if (!is_valid &&
22189	    (flag & (FNDELAY | FNONBLOCK))) {
22190		switch (cmd) {
22191		case DKIOCGGEOM:	/* SD_PATH_DIRECT */
22192		case DKIOCGVTOC:
22193		case DKIOCGEXTVTOC:
22194		case DKIOCGAPART:
22195		case DKIOCPARTINFO:
22196		case DKIOCEXTPARTINFO:
22197		case DKIOCSGEOM:
22198		case DKIOCSAPART:
22199		case DKIOCGETEFI:
22200		case DKIOCPARTITION:
22201		case DKIOCSVTOC:
22202		case DKIOCSEXTVTOC:
22203		case DKIOCSETEFI:
22204		case DKIOCGMBOOT:
22205		case DKIOCSMBOOT:
22206		case DKIOCG_PHYGEOM:
22207		case DKIOCG_VIRTGEOM:
22208#if defined(__i386) || defined(__amd64)
22209		case DKIOCSETEXTPART:
22210#endif
22211			/* let cmlb handle it */
22212			goto skip_ready_valid;
22213
22214		case CDROMPAUSE:
22215		case CDROMRESUME:
22216		case CDROMPLAYMSF:
22217		case CDROMPLAYTRKIND:
22218		case CDROMREADTOCHDR:
22219		case CDROMREADTOCENTRY:
22220		case CDROMSTOP:
22221		case CDROMSTART:
22222		case CDROMVOLCTRL:
22223		case CDROMSUBCHNL:
22224		case CDROMREADMODE2:
22225		case CDROMREADMODE1:
22226		case CDROMREADOFFSET:
22227		case CDROMSBLKMODE:
22228		case CDROMGBLKMODE:
22229		case CDROMGDRVSPEED:
22230		case CDROMSDRVSPEED:
22231		case CDROMCDDA:
22232		case CDROMCDXA:
22233		case CDROMSUBCODE:
22234			if (!ISCD(un)) {
22235				un->un_ncmds_in_driver--;
22236				ASSERT(un->un_ncmds_in_driver >= 0);
22237				mutex_exit(SD_MUTEX(un));
22238				err = ENOTTY;
22239				goto done_without_assess;
22240			}
22241			break;
22242		case FDEJECT:
22243		case DKIOCEJECT:
22244		case CDROMEJECT:
22245			if (!un->un_f_eject_media_supported) {
22246				un->un_ncmds_in_driver--;
22247				ASSERT(un->un_ncmds_in_driver >= 0);
22248				mutex_exit(SD_MUTEX(un));
22249				err = ENOTTY;
22250				goto done_without_assess;
22251			}
22252			break;
22253		case DKIOCFLUSHWRITECACHE:
22254			mutex_exit(SD_MUTEX(un));
22255			err = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
22256			if (err != 0) {
22257				mutex_enter(SD_MUTEX(un));
22258				un->un_ncmds_in_driver--;
22259				ASSERT(un->un_ncmds_in_driver >= 0);
22260				mutex_exit(SD_MUTEX(un));
22261				err = EIO;
22262				goto done_quick_assess;
22263			}
22264			mutex_enter(SD_MUTEX(un));
22265			/* FALLTHROUGH */
22266		case DKIOCREMOVABLE:
22267		case DKIOCHOTPLUGGABLE:
22268		case DKIOCINFO:
22269		case DKIOCGMEDIAINFO:
22270		case DKIOCGMEDIAINFOEXT:
22271		case MHIOCENFAILFAST:
22272		case MHIOCSTATUS:
22273		case MHIOCTKOWN:
22274		case MHIOCRELEASE:
22275		case MHIOCGRP_INKEYS:
22276		case MHIOCGRP_INRESV:
22277		case MHIOCGRP_REGISTER:
22278		case MHIOCGRP_RESERVE:
22279		case MHIOCGRP_PREEMPTANDABORT:
22280		case MHIOCGRP_REGISTERANDIGNOREKEY:
22281		case CDROMCLOSETRAY:
22282		case USCSICMD:
22283			goto skip_ready_valid;
22284		default:
22285			break;
22286		}
22287
22288		mutex_exit(SD_MUTEX(un));
22289		err = sd_ready_and_valid(ssc, SDPART(dev));
22290		mutex_enter(SD_MUTEX(un));
22291
22292		if (err != SD_READY_VALID) {
22293			switch (cmd) {
22294			case DKIOCSTATE:
22295			case CDROMGDRVSPEED:
22296			case CDROMSDRVSPEED:
22297			case FDEJECT:	/* for eject command */
22298			case DKIOCEJECT:
22299			case CDROMEJECT:
22300			case DKIOCREMOVABLE:
22301			case DKIOCHOTPLUGGABLE:
22302				break;
22303			default:
22304				if (un->un_f_has_removable_media) {
22305					err = ENXIO;
22306				} else {
22307				/* Do not map SD_RESERVED_BY_OTHERS to EIO */
22308					if (err == SD_RESERVED_BY_OTHERS) {
22309						err = EACCES;
22310					} else {
22311						err = EIO;
22312					}
22313				}
22314				un->un_ncmds_in_driver--;
22315				ASSERT(un->un_ncmds_in_driver >= 0);
22316				mutex_exit(SD_MUTEX(un));
22317
22318				goto done_without_assess;
22319			}
22320		}
22321	}
22322
22323skip_ready_valid:
22324	mutex_exit(SD_MUTEX(un));
22325
22326	switch (cmd) {
22327	case DKIOCINFO:
22328		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCINFO\n");
22329		err = sd_dkio_ctrl_info(dev, (caddr_t)arg, flag);
22330		break;
22331
22332	case DKIOCGMEDIAINFO:
22333		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGMEDIAINFO\n");
22334		err = sd_get_media_info(dev, (caddr_t)arg, flag);
22335		break;
22336
22337	case DKIOCGMEDIAINFOEXT:
22338		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGMEDIAINFOEXT\n");
22339		err = sd_get_media_info_ext(dev, (caddr_t)arg, flag);
22340		break;
22341
22342	case DKIOCGGEOM:
22343	case DKIOCGVTOC:
22344	case DKIOCGEXTVTOC:
22345	case DKIOCGAPART:
22346	case DKIOCPARTINFO:
22347	case DKIOCEXTPARTINFO:
22348	case DKIOCSGEOM:
22349	case DKIOCSAPART:
22350	case DKIOCGETEFI:
22351	case DKIOCPARTITION:
22352	case DKIOCSVTOC:
22353	case DKIOCSEXTVTOC:
22354	case DKIOCSETEFI:
22355	case DKIOCGMBOOT:
22356	case DKIOCSMBOOT:
22357	case DKIOCG_PHYGEOM:
22358	case DKIOCG_VIRTGEOM:
22359#if defined(__i386) || defined(__amd64)
22360	case DKIOCSETEXTPART:
22361#endif
22362		SD_TRACE(SD_LOG_IOCTL, un, "DKIOC %d\n", cmd);
22363
22364		/* TUR should spin up */
22365
22366		if (un->un_f_has_removable_media)
22367			err = sd_send_scsi_TEST_UNIT_READY(ssc,
22368			    SD_CHECK_FOR_MEDIA);
22369
22370		else
22371			err = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
22372
22373		if (err != 0)
22374			goto done_with_assess;
22375
22376		err = cmlb_ioctl(un->un_cmlbhandle, dev,
22377		    cmd, arg, flag, cred_p, rval_p, (void *)SD_PATH_DIRECT);
22378
22379		if ((err == 0) &&
22380		    ((cmd == DKIOCSETEFI) ||
22381		    (un->un_f_pkstats_enabled) &&
22382		    (cmd == DKIOCSAPART || cmd == DKIOCSVTOC ||
22383		    cmd == DKIOCSEXTVTOC))) {
22384
22385			tmprval = cmlb_validate(un->un_cmlbhandle, CMLB_SILENT,
22386			    (void *)SD_PATH_DIRECT);
22387			if ((tmprval == 0) && un->un_f_pkstats_enabled) {
22388				sd_set_pstats(un);
22389				SD_TRACE(SD_LOG_IO_PARTITION, un,
22390				    "sd_ioctl: un:0x%p pstats created and "
22391				    "set\n", un);
22392			}
22393		}
22394
22395		if ((cmd == DKIOCSVTOC || cmd == DKIOCSEXTVTOC) ||
22396		    ((cmd == DKIOCSETEFI) && (tmprval == 0))) {
22397
22398			mutex_enter(SD_MUTEX(un));
22399			if (un->un_f_devid_supported &&
22400			    (un->un_f_opt_fab_devid == TRUE)) {
22401				if (un->un_devid == NULL) {
22402					sd_register_devid(ssc, SD_DEVINFO(un),
22403					    SD_TARGET_IS_UNRESERVED);
22404				} else {
22405					/*
22406					 * The device id for this disk
22407					 * has been fabricated. The
22408					 * device id must be preserved
22409					 * by writing it back out to
22410					 * disk.
22411					 */
22412					if (sd_write_deviceid(ssc) != 0) {
22413						ddi_devid_free(un->un_devid);
22414						un->un_devid = NULL;
22415					}
22416				}
22417			}
22418			mutex_exit(SD_MUTEX(un));
22419		}
22420
22421		break;
22422
22423	case DKIOCLOCK:
22424		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCLOCK\n");
22425		err = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_PREVENT,
22426		    SD_PATH_STANDARD);
22427		goto done_with_assess;
22428
22429	case DKIOCUNLOCK:
22430		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCUNLOCK\n");
22431		err = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_ALLOW,
22432		    SD_PATH_STANDARD);
22433		goto done_with_assess;
22434
22435	case DKIOCSTATE: {
22436		enum dkio_state		state;
22437		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSTATE\n");
22438
22439		if (ddi_copyin((void *)arg, &state, sizeof (int), flag) != 0) {
22440			err = EFAULT;
22441		} else {
22442			err = sd_check_media(dev, state);
22443			if (err == 0) {
22444				if (ddi_copyout(&un->un_mediastate, (void *)arg,
22445				    sizeof (int), flag) != 0)
22446					err = EFAULT;
22447			}
22448		}
22449		break;
22450	}
22451
22452	case DKIOCREMOVABLE:
22453		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCREMOVABLE\n");
22454		i = un->un_f_has_removable_media ? 1 : 0;
22455		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
22456			err = EFAULT;
22457		} else {
22458			err = 0;
22459		}
22460		break;
22461
22462	case DKIOCHOTPLUGGABLE:
22463		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCHOTPLUGGABLE\n");
22464		i = un->un_f_is_hotpluggable ? 1 : 0;
22465		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
22466			err = EFAULT;
22467		} else {
22468			err = 0;
22469		}
22470		break;
22471
22472	case DKIOCREADONLY:
22473		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCREADONLY\n");
22474		i = 0;
22475		if ((ISCD(un) && !un->un_f_mmc_writable_media) ||
22476		    (sr_check_wp(dev) != 0)) {
22477			i = 1;
22478		}
22479		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
22480			err = EFAULT;
22481		} else {
22482			err = 0;
22483		}
22484		break;
22485
22486	case DKIOCGTEMPERATURE:
22487		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGTEMPERATURE\n");
22488		err = sd_dkio_get_temp(dev, (caddr_t)arg, flag);
22489		break;
22490
22491	case MHIOCENFAILFAST:
22492		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCENFAILFAST\n");
22493		if ((err = drv_priv(cred_p)) == 0) {
22494			err = sd_mhdioc_failfast(dev, (caddr_t)arg, flag);
22495		}
22496		break;
22497
22498	case MHIOCTKOWN:
22499		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCTKOWN\n");
22500		if ((err = drv_priv(cred_p)) == 0) {
22501			err = sd_mhdioc_takeown(dev, (caddr_t)arg, flag);
22502		}
22503		break;
22504
22505	case MHIOCRELEASE:
22506		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCRELEASE\n");
22507		if ((err = drv_priv(cred_p)) == 0) {
22508			err = sd_mhdioc_release(dev);
22509		}
22510		break;
22511
22512	case MHIOCSTATUS:
22513		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCSTATUS\n");
22514		if ((err = drv_priv(cred_p)) == 0) {
22515			switch (sd_send_scsi_TEST_UNIT_READY(ssc, 0)) {
22516			case 0:
22517				err = 0;
22518				break;
22519			case EACCES:
22520				*rval_p = 1;
22521				err = 0;
22522				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
22523				break;
22524			default:
22525				err = EIO;
22526				goto done_with_assess;
22527			}
22528		}
22529		break;
22530
22531	case MHIOCQRESERVE:
22532		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCQRESERVE\n");
22533		if ((err = drv_priv(cred_p)) == 0) {
22534			err = sd_reserve_release(dev, SD_RESERVE);
22535		}
22536		break;
22537
22538	case MHIOCREREGISTERDEVID:
22539		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCREREGISTERDEVID\n");
22540		if (drv_priv(cred_p) == EPERM) {
22541			err = EPERM;
22542		} else if (!un->un_f_devid_supported) {
22543			err = ENOTTY;
22544		} else {
22545			err = sd_mhdioc_register_devid(dev);
22546		}
22547		break;
22548
22549	case MHIOCGRP_INKEYS:
22550		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INKEYS\n");
22551		if (((err = drv_priv(cred_p)) != EPERM) && arg != NULL) {
22552			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
22553				err = ENOTSUP;
22554			} else {
22555				err = sd_mhdioc_inkeys(dev, (caddr_t)arg,
22556				    flag);
22557			}
22558		}
22559		break;
22560
22561	case MHIOCGRP_INRESV:
22562		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INRESV\n");
22563		if (((err = drv_priv(cred_p)) != EPERM) && arg != NULL) {
22564			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
22565				err = ENOTSUP;
22566			} else {
22567				err = sd_mhdioc_inresv(dev, (caddr_t)arg, flag);
22568			}
22569		}
22570		break;
22571
22572	case MHIOCGRP_REGISTER:
22573		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_REGISTER\n");
22574		if ((err = drv_priv(cred_p)) != EPERM) {
22575			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
22576				err = ENOTSUP;
22577			} else if (arg != NULL) {
22578				mhioc_register_t reg;
22579				if (ddi_copyin((void *)arg, &reg,
22580				    sizeof (mhioc_register_t), flag) != 0) {
22581					err = EFAULT;
22582				} else {
22583					err =
22584					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
22585					    ssc, SD_SCSI3_REGISTER,
22586					    (uchar_t *)&reg);
22587					if (err != 0)
22588						goto done_with_assess;
22589				}
22590			}
22591		}
22592		break;
22593
22594	case MHIOCGRP_RESERVE:
22595		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_RESERVE\n");
22596		if ((err = drv_priv(cred_p)) != EPERM) {
22597			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
22598				err = ENOTSUP;
22599			} else if (arg != NULL) {
22600				mhioc_resv_desc_t resv_desc;
22601				if (ddi_copyin((void *)arg, &resv_desc,
22602				    sizeof (mhioc_resv_desc_t), flag) != 0) {
22603					err = EFAULT;
22604				} else {
22605					err =
22606					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
22607					    ssc, SD_SCSI3_RESERVE,
22608					    (uchar_t *)&resv_desc);
22609					if (err != 0)
22610						goto done_with_assess;
22611				}
22612			}
22613		}
22614		break;
22615
22616	case MHIOCGRP_PREEMPTANDABORT:
22617		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_PREEMPTANDABORT\n");
22618		if ((err = drv_priv(cred_p)) != EPERM) {
22619			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
22620				err = ENOTSUP;
22621			} else if (arg != NULL) {
22622				mhioc_preemptandabort_t preempt_abort;
22623				if (ddi_copyin((void *)arg, &preempt_abort,
22624				    sizeof (mhioc_preemptandabort_t),
22625				    flag) != 0) {
22626					err = EFAULT;
22627				} else {
22628					err =
22629					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
22630					    ssc, SD_SCSI3_PREEMPTANDABORT,
22631					    (uchar_t *)&preempt_abort);
22632					if (err != 0)
22633						goto done_with_assess;
22634				}
22635			}
22636		}
22637		break;
22638
22639	case MHIOCGRP_REGISTERANDIGNOREKEY:
22640		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_REGISTERANDIGNOREKEY\n");
22641		if ((err = drv_priv(cred_p)) != EPERM) {
22642			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
22643				err = ENOTSUP;
22644			} else if (arg != NULL) {
22645				mhioc_registerandignorekey_t r_and_i;
22646				if (ddi_copyin((void *)arg, (void *)&r_and_i,
22647				    sizeof (mhioc_registerandignorekey_t),
22648				    flag) != 0) {
22649					err = EFAULT;
22650				} else {
22651					err =
22652					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
22653					    ssc, SD_SCSI3_REGISTERANDIGNOREKEY,
22654					    (uchar_t *)&r_and_i);
22655					if (err != 0)
22656						goto done_with_assess;
22657				}
22658			}
22659		}
22660		break;
22661
22662	case USCSICMD:
22663		SD_TRACE(SD_LOG_IOCTL, un, "USCSICMD\n");
22664		cr = ddi_get_cred();
22665		if ((drv_priv(cred_p) != 0) && (drv_priv(cr) != 0)) {
22666			err = EPERM;
22667		} else {
22668			enum uio_seg	uioseg;
22669
22670			uioseg = (flag & FKIOCTL) ? UIO_SYSSPACE :
22671			    UIO_USERSPACE;
22672			if (un->un_f_format_in_progress == TRUE) {
22673				err = EAGAIN;
22674				break;
22675			}
22676
22677			err = sd_ssc_send(ssc,
22678			    (struct uscsi_cmd *)arg,
22679			    flag, uioseg, SD_PATH_STANDARD);
22680			if (err != 0)
22681				goto done_with_assess;
22682			else
22683				sd_ssc_assessment(ssc, SD_FMT_STANDARD);
22684		}
22685		break;
22686
22687	case CDROMPAUSE:
22688	case CDROMRESUME:
22689		SD_TRACE(SD_LOG_IOCTL, un, "PAUSE-RESUME\n");
22690		if (!ISCD(un)) {
22691			err = ENOTTY;
22692		} else {
22693			err = sr_pause_resume(dev, cmd);
22694		}
22695		break;
22696
22697	case CDROMPLAYMSF:
22698		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYMSF\n");
22699		if (!ISCD(un)) {
22700			err = ENOTTY;
22701		} else {
22702			err = sr_play_msf(dev, (caddr_t)arg, flag);
22703		}
22704		break;
22705
22706	case CDROMPLAYTRKIND:
22707		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYTRKIND\n");
22708#if defined(__i386) || defined(__amd64)
22709		/*
22710		 * not supported on ATAPI CD drives, use CDROMPLAYMSF instead
22711		 */
22712		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
22713#else
22714		if (!ISCD(un)) {
22715#endif
22716			err = ENOTTY;
22717		} else {
22718			err = sr_play_trkind(dev, (caddr_t)arg, flag);
22719		}
22720		break;
22721
22722	case CDROMREADTOCHDR:
22723		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCHDR\n");
22724		if (!ISCD(un)) {
22725			err = ENOTTY;
22726		} else {
22727			err = sr_read_tochdr(dev, (caddr_t)arg, flag);
22728		}
22729		break;
22730
22731	case CDROMREADTOCENTRY:
22732		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCENTRY\n");
22733		if (!ISCD(un)) {
22734			err = ENOTTY;
22735		} else {
22736			err = sr_read_tocentry(dev, (caddr_t)arg, flag);
22737		}
22738		break;
22739
22740	case CDROMSTOP:
22741		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTOP\n");
22742		if (!ISCD(un)) {
22743			err = ENOTTY;
22744		} else {
22745			err = sd_send_scsi_START_STOP_UNIT(ssc, SD_START_STOP,
22746			    SD_TARGET_STOP, SD_PATH_STANDARD);
22747			goto done_with_assess;
22748		}
22749		break;
22750
22751	case CDROMSTART:
22752		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTART\n");
22753		if (!ISCD(un)) {
22754			err = ENOTTY;
22755		} else {
22756			err = sd_send_scsi_START_STOP_UNIT(ssc, SD_START_STOP,
22757			    SD_TARGET_START, SD_PATH_STANDARD);
22758			goto done_with_assess;
22759		}
22760		break;
22761
22762	case CDROMCLOSETRAY:
22763		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCLOSETRAY\n");
22764		if (!ISCD(un)) {
22765			err = ENOTTY;
22766		} else {
22767			err = sd_send_scsi_START_STOP_UNIT(ssc, SD_START_STOP,
22768			    SD_TARGET_CLOSE, SD_PATH_STANDARD);
22769			goto done_with_assess;
22770		}
22771		break;
22772
22773	case FDEJECT:	/* for eject command */
22774	case DKIOCEJECT:
22775	case CDROMEJECT:
22776		SD_TRACE(SD_LOG_IOCTL, un, "EJECT\n");
22777		if (!un->un_f_eject_media_supported) {
22778			err = ENOTTY;
22779		} else {
22780			err = sr_eject(dev);
22781		}
22782		break;
22783
22784	case CDROMVOLCTRL:
22785		SD_TRACE(SD_LOG_IOCTL, un, "CDROMVOLCTRL\n");
22786		if (!ISCD(un)) {
22787			err = ENOTTY;
22788		} else {
22789			err = sr_volume_ctrl(dev, (caddr_t)arg, flag);
22790		}
22791		break;
22792
22793	case CDROMSUBCHNL:
22794		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCHNL\n");
22795		if (!ISCD(un)) {
22796			err = ENOTTY;
22797		} else {
22798			err = sr_read_subchannel(dev, (caddr_t)arg, flag);
22799		}
22800		break;
22801
22802	case CDROMREADMODE2:
22803		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE2\n");
22804		if (!ISCD(un)) {
22805			err = ENOTTY;
22806		} else if (un->un_f_cfg_is_atapi == TRUE) {
22807			/*
22808			 * If the drive supports READ CD, use that instead of
22809			 * switching the LBA size via a MODE SELECT
22810			 * Block Descriptor
22811			 */
22812			err = sr_read_cd_mode2(dev, (caddr_t)arg, flag);
22813		} else {
22814			err = sr_read_mode2(dev, (caddr_t)arg, flag);
22815		}
22816		break;
22817
22818	case CDROMREADMODE1:
22819		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE1\n");
22820		if (!ISCD(un)) {
22821			err = ENOTTY;
22822		} else {
22823			err = sr_read_mode1(dev, (caddr_t)arg, flag);
22824		}
22825		break;
22826
22827	case CDROMREADOFFSET:
22828		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADOFFSET\n");
22829		if (!ISCD(un)) {
22830			err = ENOTTY;
22831		} else {
22832			err = sr_read_sony_session_offset(dev, (caddr_t)arg,
22833			    flag);
22834		}
22835		break;
22836
22837	case CDROMSBLKMODE:
22838		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSBLKMODE\n");
22839		/*
22840		 * There is no means of changing block size in case of atapi
22841		 * drives, thus return ENOTTY if drive type is atapi
22842		 */
22843		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
22844			err = ENOTTY;
22845		} else if (un->un_f_mmc_cap == TRUE) {
22846
22847			/*
22848			 * MMC Devices do not support changing the
22849			 * logical block size
22850			 *
22851			 * Note: EINVAL is being returned instead of ENOTTY to
22852			 * maintain consistancy with the original mmc
22853			 * driver update.
22854			 */
22855			err = EINVAL;
22856		} else {
22857			mutex_enter(SD_MUTEX(un));
22858			if ((!(un->un_exclopen & (1<<SDPART(dev)))) ||
22859			    (un->un_ncmds_in_transport > 0)) {
22860				mutex_exit(SD_MUTEX(un));
22861				err = EINVAL;
22862			} else {
22863				mutex_exit(SD_MUTEX(un));
22864				err = sr_change_blkmode(dev, cmd, arg, flag);
22865			}
22866		}
22867		break;
22868
22869	case CDROMGBLKMODE:
22870		SD_TRACE(SD_LOG_IOCTL, un, "CDROMGBLKMODE\n");
22871		if (!ISCD(un)) {
22872			err = ENOTTY;
22873		} else if ((un->un_f_cfg_is_atapi != FALSE) &&
22874		    (un->un_f_blockcount_is_valid != FALSE)) {
22875			/*
22876			 * Drive is an ATAPI drive so return target block
22877			 * size for ATAPI drives since we cannot change the
22878			 * blocksize on ATAPI drives. Used primarily to detect
22879			 * if an ATAPI cdrom is present.
22880			 */
22881			if (ddi_copyout(&un->un_tgt_blocksize, (void *)arg,
22882			    sizeof (int), flag) != 0) {
22883				err = EFAULT;
22884			} else {
22885				err = 0;
22886			}
22887
22888		} else {
22889			/*
22890			 * Drive supports changing block sizes via a Mode
22891			 * Select.
22892			 */
22893			err = sr_change_blkmode(dev, cmd, arg, flag);
22894		}
22895		break;
22896
22897	case CDROMGDRVSPEED:
22898	case CDROMSDRVSPEED:
22899		SD_TRACE(SD_LOG_IOCTL, un, "CDROMXDRVSPEED\n");
22900		if (!ISCD(un)) {
22901			err = ENOTTY;
22902		} else if (un->un_f_mmc_cap == TRUE) {
22903			/*
22904			 * Note: In the future the driver implementation
22905			 * for getting and
22906			 * setting cd speed should entail:
22907			 * 1) If non-mmc try the Toshiba mode page
22908			 *    (sr_change_speed)
22909			 * 2) If mmc but no support for Real Time Streaming try
22910			 *    the SET CD SPEED (0xBB) command
22911			 *   (sr_atapi_change_speed)
22912			 * 3) If mmc and support for Real Time Streaming
22913			 *    try the GET PERFORMANCE and SET STREAMING
22914			 *    commands (not yet implemented, 4380808)
22915			 */
22916			/*
22917			 * As per recent MMC spec, CD-ROM speed is variable
22918			 * and changes with LBA. Since there is no such
22919			 * things as drive speed now, fail this ioctl.
22920			 *
22921			 * Note: EINVAL is returned for consistancy of original
22922			 * implementation which included support for getting
22923			 * the drive speed of mmc devices but not setting
22924			 * the drive speed. Thus EINVAL would be returned
22925			 * if a set request was made for an mmc device.
22926			 * We no longer support get or set speed for
22927			 * mmc but need to remain consistent with regard
22928			 * to the error code returned.
22929			 */
22930			err = EINVAL;
22931		} else if (un->un_f_cfg_is_atapi == TRUE) {
22932			err = sr_atapi_change_speed(dev, cmd, arg, flag);
22933		} else {
22934			err = sr_change_speed(dev, cmd, arg, flag);
22935		}
22936		break;
22937
22938	case CDROMCDDA:
22939		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDDA\n");
22940		if (!ISCD(un)) {
22941			err = ENOTTY;
22942		} else {
22943			err = sr_read_cdda(dev, (void *)arg, flag);
22944		}
22945		break;
22946
22947	case CDROMCDXA:
22948		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDXA\n");
22949		if (!ISCD(un)) {
22950			err = ENOTTY;
22951		} else {
22952			err = sr_read_cdxa(dev, (caddr_t)arg, flag);
22953		}
22954		break;
22955
22956	case CDROMSUBCODE:
22957		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCODE\n");
22958		if (!ISCD(un)) {
22959			err = ENOTTY;
22960		} else {
22961			err = sr_read_all_subcodes(dev, (caddr_t)arg, flag);
22962		}
22963		break;
22964
22965
22966#ifdef SDDEBUG
22967/* RESET/ABORTS testing ioctls */
22968	case DKIOCRESET: {
22969		int	reset_level;
22970
22971		if (ddi_copyin((void *)arg, &reset_level, sizeof (int), flag)) {
22972			err = EFAULT;
22973		} else {
22974			SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCRESET: "
22975			    "reset_level = 0x%lx\n", reset_level);
22976			if (scsi_reset(SD_ADDRESS(un), reset_level)) {
22977				err = 0;
22978			} else {
22979				err = EIO;
22980			}
22981		}
22982		break;
22983	}
22984
22985	case DKIOCABORT:
22986		SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCABORT:\n");
22987		if (scsi_abort(SD_ADDRESS(un), NULL)) {
22988			err = 0;
22989		} else {
22990			err = EIO;
22991		}
22992		break;
22993#endif
22994
22995#ifdef SD_FAULT_INJECTION
22996/* SDIOC FaultInjection testing ioctls */
22997	case SDIOCSTART:
22998	case SDIOCSTOP:
22999	case SDIOCINSERTPKT:
23000	case SDIOCINSERTXB:
23001	case SDIOCINSERTUN:
23002	case SDIOCINSERTARQ:
23003	case SDIOCPUSH:
23004	case SDIOCRETRIEVE:
23005	case SDIOCRUN:
23006		SD_INFO(SD_LOG_SDTEST, un, "sdioctl:"
23007		    "SDIOC detected cmd:0x%X:\n", cmd);
23008		/* call error generator */
23009		sd_faultinjection_ioctl(cmd, arg, un);
23010		err = 0;
23011		break;
23012
23013#endif /* SD_FAULT_INJECTION */
23014
23015	case DKIOCFLUSHWRITECACHE:
23016		{
23017			struct dk_callback *dkc = (struct dk_callback *)arg;
23018
23019			mutex_enter(SD_MUTEX(un));
23020			if (!un->un_f_sync_cache_supported ||
23021			    !un->un_f_write_cache_enabled) {
23022				err = un->un_f_sync_cache_supported ?
23023				    0 : ENOTSUP;
23024				mutex_exit(SD_MUTEX(un));
23025				if ((flag & FKIOCTL) && dkc != NULL &&
23026				    dkc->dkc_callback != NULL) {
23027					(*dkc->dkc_callback)(dkc->dkc_cookie,
23028					    err);
23029					/*
23030					 * Did callback and reported error.
23031					 * Since we did a callback, ioctl
23032					 * should return 0.
23033					 */
23034					err = 0;
23035				}
23036				break;
23037			}
23038			mutex_exit(SD_MUTEX(un));
23039
23040			if ((flag & FKIOCTL) && dkc != NULL &&
23041			    dkc->dkc_callback != NULL) {
23042				/* async SYNC CACHE request */
23043				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, dkc);
23044			} else {
23045				/* synchronous SYNC CACHE request */
23046				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, NULL);
23047			}
23048		}
23049		break;
23050
23051	case DKIOCGETWCE: {
23052
23053		int wce;
23054
23055		if ((err = sd_get_write_cache_enabled(ssc, &wce)) != 0) {
23056			break;
23057		}
23058
23059		if (ddi_copyout(&wce, (void *)arg, sizeof (wce), flag)) {
23060			err = EFAULT;
23061		}
23062		break;
23063	}
23064
23065	case DKIOCSETWCE: {
23066
23067		int wce, sync_supported;
23068		int cur_wce = 0;
23069
23070		if (ddi_copyin((void *)arg, &wce, sizeof (wce), flag)) {
23071			err = EFAULT;
23072			break;
23073		}
23074
23075		/*
23076		 * Synchronize multiple threads trying to enable
23077		 * or disable the cache via the un_f_wcc_cv
23078		 * condition variable.
23079		 */
23080		mutex_enter(SD_MUTEX(un));
23081
23082		/*
23083		 * Don't allow the cache to be enabled if the
23084		 * config file has it disabled.
23085		 */
23086		if (un->un_f_opt_disable_cache && wce) {
23087			mutex_exit(SD_MUTEX(un));
23088			err = EINVAL;
23089			break;
23090		}
23091
23092		/*
23093		 * Wait for write cache change in progress
23094		 * bit to be clear before proceeding.
23095		 */
23096		while (un->un_f_wcc_inprog)
23097			cv_wait(&un->un_wcc_cv, SD_MUTEX(un));
23098
23099		un->un_f_wcc_inprog = 1;
23100
23101		mutex_exit(SD_MUTEX(un));
23102
23103		/*
23104		 * Get the current write cache state
23105		 */
23106		if ((err = sd_get_write_cache_enabled(ssc, &cur_wce)) != 0) {
23107			mutex_enter(SD_MUTEX(un));
23108			un->un_f_wcc_inprog = 0;
23109			cv_broadcast(&un->un_wcc_cv);
23110			mutex_exit(SD_MUTEX(un));
23111			break;
23112		}
23113
23114		mutex_enter(SD_MUTEX(un));
23115		un->un_f_write_cache_enabled = (cur_wce != 0);
23116
23117		if (un->un_f_write_cache_enabled && wce == 0) {
23118			/*
23119			 * Disable the write cache.  Don't clear
23120			 * un_f_write_cache_enabled until after
23121			 * the mode select and flush are complete.
23122			 */
23123			sync_supported = un->un_f_sync_cache_supported;
23124
23125			/*
23126			 * If cache flush is suppressed, we assume that the
23127			 * controller firmware will take care of managing the
23128			 * write cache for us: no need to explicitly
23129			 * disable it.
23130			 */
23131			if (!un->un_f_suppress_cache_flush) {
23132				mutex_exit(SD_MUTEX(un));
23133				if ((err = sd_cache_control(ssc,
23134				    SD_CACHE_NOCHANGE,
23135				    SD_CACHE_DISABLE)) == 0 &&
23136				    sync_supported) {
23137					err = sd_send_scsi_SYNCHRONIZE_CACHE(un,
23138					    NULL);
23139				}
23140			} else {
23141				mutex_exit(SD_MUTEX(un));
23142			}
23143
23144			mutex_enter(SD_MUTEX(un));
23145			if (err == 0) {
23146				un->un_f_write_cache_enabled = 0;
23147			}
23148
23149		} else if (!un->un_f_write_cache_enabled && wce != 0) {
23150			/*
23151			 * Set un_f_write_cache_enabled first, so there is
23152			 * no window where the cache is enabled, but the
23153			 * bit says it isn't.
23154			 */
23155			un->un_f_write_cache_enabled = 1;
23156
23157			/*
23158			 * If cache flush is suppressed, we assume that the
23159			 * controller firmware will take care of managing the
23160			 * write cache for us: no need to explicitly
23161			 * enable it.
23162			 */
23163			if (!un->un_f_suppress_cache_flush) {
23164				mutex_exit(SD_MUTEX(un));
23165				err = sd_cache_control(ssc, SD_CACHE_NOCHANGE,
23166				    SD_CACHE_ENABLE);
23167			} else {
23168				mutex_exit(SD_MUTEX(un));
23169			}
23170
23171			mutex_enter(SD_MUTEX(un));
23172
23173			if (err) {
23174				un->un_f_write_cache_enabled = 0;
23175			}
23176		}
23177
23178		un->un_f_wcc_inprog = 0;
23179		cv_broadcast(&un->un_wcc_cv);
23180		mutex_exit(SD_MUTEX(un));
23181		break;
23182	}
23183
23184	default:
23185		err = ENOTTY;
23186		break;
23187	}
23188	mutex_enter(SD_MUTEX(un));
23189	un->un_ncmds_in_driver--;
23190	ASSERT(un->un_ncmds_in_driver >= 0);
23191	mutex_exit(SD_MUTEX(un));
23192
23193
23194done_without_assess:
23195	sd_ssc_fini(ssc);
23196
23197	SD_TRACE(SD_LOG_IOCTL, un, "sdioctl: exit: %d\n", err);
23198	return (err);
23199
23200done_with_assess:
23201	mutex_enter(SD_MUTEX(un));
23202	un->un_ncmds_in_driver--;
23203	ASSERT(un->un_ncmds_in_driver >= 0);
23204	mutex_exit(SD_MUTEX(un));
23205
23206done_quick_assess:
23207	if (err != 0)
23208		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
23209	/* Uninitialize sd_ssc_t pointer */
23210	sd_ssc_fini(ssc);
23211
23212	SD_TRACE(SD_LOG_IOCTL, un, "sdioctl: exit: %d\n", err);
23213	return (err);
23214}
23215
23216
23217/*
23218 *    Function: sd_dkio_ctrl_info
23219 *
23220 * Description: This routine is the driver entry point for handling controller
23221 *		information ioctl requests (DKIOCINFO).
23222 *
23223 *   Arguments: dev  - the device number
23224 *		arg  - pointer to user provided dk_cinfo structure
23225 *		       specifying the controller type and attributes.
23226 *		flag - this argument is a pass through to ddi_copyxxx()
23227 *		       directly from the mode argument of ioctl().
23228 *
23229 * Return Code: 0
23230 *		EFAULT
23231 *		ENXIO
23232 */
23233
23234static int
23235sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag)
23236{
23237	struct sd_lun	*un = NULL;
23238	struct dk_cinfo	*info;
23239	dev_info_t	*pdip;
23240	int		lun, tgt;
23241
23242	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23243		return (ENXIO);
23244	}
23245
23246	info = (struct dk_cinfo *)
23247	    kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP);
23248
23249	switch (un->un_ctype) {
23250	case CTYPE_CDROM:
23251		info->dki_ctype = DKC_CDROM;
23252		break;
23253	default:
23254		info->dki_ctype = DKC_SCSI_CCS;
23255		break;
23256	}
23257	pdip = ddi_get_parent(SD_DEVINFO(un));
23258	info->dki_cnum = ddi_get_instance(pdip);
23259	if (strlen(ddi_get_name(pdip)) < DK_DEVLEN) {
23260		(void) strcpy(info->dki_cname, ddi_get_name(pdip));
23261	} else {
23262		(void) strncpy(info->dki_cname, ddi_node_name(pdip),
23263		    DK_DEVLEN - 1);
23264	}
23265
23266	lun = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
23267	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_LUN, 0);
23268	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
23269	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_TARGET, 0);
23270
23271	/* Unit Information */
23272	info->dki_unit = ddi_get_instance(SD_DEVINFO(un));
23273	info->dki_slave = ((tgt << 3) | lun);
23274	(void) strncpy(info->dki_dname, ddi_driver_name(SD_DEVINFO(un)),
23275	    DK_DEVLEN - 1);
23276	info->dki_flags = DKI_FMTVOL;
23277	info->dki_partition = SDPART(dev);
23278
23279	/* Max Transfer size of this device in blocks */
23280	info->dki_maxtransfer = un->un_max_xfer_size / un->un_sys_blocksize;
23281	info->dki_addr = 0;
23282	info->dki_space = 0;
23283	info->dki_prio = 0;
23284	info->dki_vec = 0;
23285
23286	if (ddi_copyout(info, arg, sizeof (struct dk_cinfo), flag) != 0) {
23287		kmem_free(info, sizeof (struct dk_cinfo));
23288		return (EFAULT);
23289	} else {
23290		kmem_free(info, sizeof (struct dk_cinfo));
23291		return (0);
23292	}
23293}
23294
23295/*
23296 *    Function: sd_get_media_info_com
23297 *
23298 * Description: This routine returns the information required to populate
23299 *		the fields for the dk_minfo/dk_minfo_ext structures.
23300 *
23301 *   Arguments: dev		- the device number
23302 *		dki_media_type	- media_type
23303 *		dki_lbsize	- logical block size
23304 *		dki_capacity	- capacity in blocks
23305 *		dki_pbsize	- physical block size (if requested)
23306 *
23307 * Return Code: 0
23308 *		EACCESS
23309 *		EFAULT
23310 *		ENXIO
23311 *		EIO
23312 */
23313static int
23314sd_get_media_info_com(dev_t dev, uint_t *dki_media_type, uint_t *dki_lbsize,
23315	diskaddr_t *dki_capacity, uint_t *dki_pbsize)
23316{
23317	struct sd_lun		*un = NULL;
23318	struct uscsi_cmd	com;
23319	struct scsi_inquiry	*sinq;
23320	u_longlong_t		media_capacity;
23321	uint64_t		capacity;
23322	uint_t			lbasize;
23323	uint_t			pbsize;
23324	uchar_t			*out_data;
23325	uchar_t			*rqbuf;
23326	int			rval = 0;
23327	int			rtn;
23328	sd_ssc_t		*ssc;
23329
23330	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
23331	    (un->un_state == SD_STATE_OFFLINE)) {
23332		return (ENXIO);
23333	}
23334
23335	SD_TRACE(SD_LOG_IOCTL_DKIO, un, "sd_get_media_info_com: entry\n");
23336
23337	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
23338	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
23339	ssc = sd_ssc_init(un);
23340
23341	/* Issue a TUR to determine if the drive is ready with media present */
23342	rval = sd_send_scsi_TEST_UNIT_READY(ssc, SD_CHECK_FOR_MEDIA);
23343	if (rval == ENXIO) {
23344		goto done;
23345	} else if (rval != 0) {
23346		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
23347	}
23348
23349	/* Now get configuration data */
23350	if (ISCD(un)) {
23351		*dki_media_type = DK_CDROM;
23352
23353		/* Allow SCMD_GET_CONFIGURATION to MMC devices only */
23354		if (un->un_f_mmc_cap == TRUE) {
23355			rtn = sd_send_scsi_GET_CONFIGURATION(ssc, &com, rqbuf,
23356			    SENSE_LENGTH, out_data, SD_PROFILE_HEADER_LEN,
23357			    SD_PATH_STANDARD);
23358
23359			if (rtn) {
23360				/*
23361				 * We ignore all failures for CD and need to
23362				 * put the assessment before processing code
23363				 * to avoid missing assessment for FMA.
23364				 */
23365				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
23366				/*
23367				 * Failed for other than an illegal request
23368				 * or command not supported
23369				 */
23370				if ((com.uscsi_status == STATUS_CHECK) &&
23371				    (com.uscsi_rqstatus == STATUS_GOOD)) {
23372					if ((rqbuf[2] != KEY_ILLEGAL_REQUEST) ||
23373					    (rqbuf[12] != 0x20)) {
23374						rval = EIO;
23375						goto no_assessment;
23376					}
23377				}
23378			} else {
23379				/*
23380				 * The GET CONFIGURATION command succeeded
23381				 * so set the media type according to the
23382				 * returned data
23383				 */
23384				*dki_media_type = out_data[6];
23385				*dki_media_type <<= 8;
23386				*dki_media_type |= out_data[7];
23387			}
23388		}
23389	} else {
23390		/*
23391		 * The profile list is not available, so we attempt to identify
23392		 * the media type based on the inquiry data
23393		 */
23394		sinq = un->un_sd->sd_inq;
23395		if ((sinq->inq_dtype == DTYPE_DIRECT) ||
23396		    (sinq->inq_dtype == DTYPE_OPTICAL)) {
23397			/* This is a direct access device  or optical disk */
23398			*dki_media_type = DK_FIXED_DISK;
23399
23400			if ((bcmp(sinq->inq_vid, "IOMEGA", 6) == 0) ||
23401			    (bcmp(sinq->inq_vid, "iomega", 6) == 0)) {
23402				if ((bcmp(sinq->inq_pid, "ZIP", 3) == 0)) {
23403					*dki_media_type = DK_ZIP;
23404				} else if (
23405				    (bcmp(sinq->inq_pid, "jaz", 3) == 0)) {
23406					*dki_media_type = DK_JAZ;
23407				}
23408			}
23409		} else {
23410			/*
23411			 * Not a CD, direct access or optical disk so return
23412			 * unknown media
23413			 */
23414			*dki_media_type = DK_UNKNOWN;
23415		}
23416	}
23417
23418	/*
23419	 * Now read the capacity so we can provide the lbasize,
23420	 * pbsize and capacity.
23421	 */
23422	if (dki_pbsize && un->un_f_descr_format_supported)
23423		rval = sd_send_scsi_READ_CAPACITY_16(ssc, &capacity, &lbasize,
23424		    &pbsize, SD_PATH_DIRECT);
23425
23426	if (dki_pbsize == NULL || rval != 0 ||
23427	    !un->un_f_descr_format_supported) {
23428		rval = sd_send_scsi_READ_CAPACITY(ssc, &capacity, &lbasize,
23429		    SD_PATH_DIRECT);
23430
23431		switch (rval) {
23432		case 0:
23433			if (un->un_f_enable_rmw &&
23434			    un->un_phy_blocksize != 0) {
23435				pbsize = un->un_phy_blocksize;
23436			} else {
23437				pbsize = lbasize;
23438			}
23439			media_capacity = capacity;
23440
23441			/*
23442			 * sd_send_scsi_READ_CAPACITY() reports capacity in
23443			 * un->un_sys_blocksize chunks. So we need to convert
23444			 * it into cap.lbsize chunks.
23445			 */
23446			if (un->un_f_has_removable_media) {
23447				media_capacity *= un->un_sys_blocksize;
23448				media_capacity /= lbasize;
23449			}
23450			break;
23451		case EACCES:
23452			rval = EACCES;
23453			goto done;
23454		default:
23455			rval = EIO;
23456			goto done;
23457		}
23458	} else {
23459		if (un->un_f_enable_rmw &&
23460		    !ISP2(pbsize % DEV_BSIZE)) {
23461			pbsize = SSD_SECSIZE;
23462		} else if (!ISP2(lbasize % DEV_BSIZE) ||
23463		    !ISP2(pbsize % DEV_BSIZE)) {
23464			pbsize = lbasize = DEV_BSIZE;
23465		}
23466		media_capacity = capacity;
23467	}
23468
23469	/*
23470	 * If lun is expanded dynamically, update the un structure.
23471	 */
23472	mutex_enter(SD_MUTEX(un));
23473	if ((un->un_f_blockcount_is_valid == TRUE) &&
23474	    (un->un_f_tgt_blocksize_is_valid == TRUE) &&
23475	    (capacity > un->un_blockcount)) {
23476		un->un_f_expnevent = B_FALSE;
23477		sd_update_block_info(un, lbasize, capacity);
23478	}
23479	mutex_exit(SD_MUTEX(un));
23480
23481	*dki_lbsize = lbasize;
23482	*dki_capacity = media_capacity;
23483	if (dki_pbsize)
23484		*dki_pbsize = pbsize;
23485
23486done:
23487	if (rval != 0) {
23488		if (rval == EIO)
23489			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
23490		else
23491			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
23492	}
23493no_assessment:
23494	sd_ssc_fini(ssc);
23495	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
23496	kmem_free(rqbuf, SENSE_LENGTH);
23497	return (rval);
23498}
23499
23500/*
23501 *    Function: sd_get_media_info
23502 *
23503 * Description: This routine is the driver entry point for handling ioctl
23504 *		requests for the media type or command set profile used by the
23505 *		drive to operate on the media (DKIOCGMEDIAINFO).
23506 *
23507 *   Arguments: dev	- the device number
23508 *		arg	- pointer to user provided dk_minfo structure
23509 *			  specifying the media type, logical block size and
23510 *			  drive capacity.
23511 *		flag	- this argument is a pass through to ddi_copyxxx()
23512 *			  directly from the mode argument of ioctl().
23513 *
23514 * Return Code: returns the value from sd_get_media_info_com
23515 */
23516static int
23517sd_get_media_info(dev_t dev, caddr_t arg, int flag)
23518{
23519	struct dk_minfo		mi;
23520	int			rval;
23521
23522	rval = sd_get_media_info_com(dev, &mi.dki_media_type,
23523	    &mi.dki_lbsize, &mi.dki_capacity, NULL);
23524
23525	if (rval)
23526		return (rval);
23527	if (ddi_copyout(&mi, arg, sizeof (struct dk_minfo), flag))
23528		rval = EFAULT;
23529	return (rval);
23530}
23531
23532/*
23533 *    Function: sd_get_media_info_ext
23534 *
23535 * Description: This routine is the driver entry point for handling ioctl
23536 *		requests for the media type or command set profile used by the
23537 *		drive to operate on the media (DKIOCGMEDIAINFOEXT). The
23538 *		difference this ioctl and DKIOCGMEDIAINFO is the return value
23539 *		of this ioctl contains both logical block size and physical
23540 *		block size.
23541 *
23542 *
23543 *   Arguments: dev	- the device number
23544 *		arg	- pointer to user provided dk_minfo_ext structure
23545 *			  specifying the media type, logical block size,
23546 *			  physical block size and disk capacity.
23547 *		flag	- this argument is a pass through to ddi_copyxxx()
23548 *			  directly from the mode argument of ioctl().
23549 *
23550 * Return Code: returns the value from sd_get_media_info_com
23551 */
23552static int
23553sd_get_media_info_ext(dev_t dev, caddr_t arg, int flag)
23554{
23555	struct dk_minfo_ext	mie;
23556	int			rval = 0;
23557
23558	rval = sd_get_media_info_com(dev, &mie.dki_media_type,
23559	    &mie.dki_lbsize, &mie.dki_capacity, &mie.dki_pbsize);
23560
23561	if (rval)
23562		return (rval);
23563	if (ddi_copyout(&mie, arg, sizeof (struct dk_minfo_ext), flag))
23564		rval = EFAULT;
23565	return (rval);
23566
23567}
23568
23569/*
23570 *    Function: sd_watch_request_submit
23571 *
23572 * Description: Call scsi_watch_request_submit or scsi_mmc_watch_request_submit
23573 *		depending on which is supported by device.
23574 */
23575static opaque_t
23576sd_watch_request_submit(struct sd_lun *un)
23577{
23578	dev_t			dev;
23579
23580	/* All submissions are unified to use same device number */
23581	dev = sd_make_device(SD_DEVINFO(un));
23582
23583	if (un->un_f_mmc_cap && un->un_f_mmc_gesn_polling) {
23584		return (scsi_mmc_watch_request_submit(SD_SCSI_DEVP(un),
23585		    sd_check_media_time, SENSE_LENGTH, sd_media_watch_cb,
23586		    (caddr_t)dev));
23587	} else {
23588		return (scsi_watch_request_submit(SD_SCSI_DEVP(un),
23589		    sd_check_media_time, SENSE_LENGTH, sd_media_watch_cb,
23590		    (caddr_t)dev));
23591	}
23592}
23593
23594
23595/*
23596 *    Function: sd_check_media
23597 *
23598 * Description: This utility routine implements the functionality for the
23599 *		DKIOCSTATE ioctl. This ioctl blocks the user thread until the
23600 *		driver state changes from that specified by the user
23601 *		(inserted or ejected). For example, if the user specifies
23602 *		DKIO_EJECTED and the current media state is inserted this
23603 *		routine will immediately return DKIO_INSERTED. However, if the
23604 *		current media state is not inserted the user thread will be
23605 *		blocked until the drive state changes. If DKIO_NONE is specified
23606 *		the user thread will block until a drive state change occurs.
23607 *
23608 *   Arguments: dev  - the device number
23609 *		state  - user pointer to a dkio_state, updated with the current
23610 *			drive state at return.
23611 *
23612 * Return Code: ENXIO
23613 *		EIO
23614 *		EAGAIN
23615 *		EINTR
23616 */
23617
23618static int
23619sd_check_media(dev_t dev, enum dkio_state state)
23620{
23621	struct sd_lun		*un = NULL;
23622	enum dkio_state		prev_state;
23623	opaque_t		token = NULL;
23624	int			rval = 0;
23625	sd_ssc_t		*ssc;
23626
23627	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23628		return (ENXIO);
23629	}
23630
23631	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: entry\n");
23632
23633	ssc = sd_ssc_init(un);
23634
23635	mutex_enter(SD_MUTEX(un));
23636
23637	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: "
23638	    "state=%x, mediastate=%x\n", state, un->un_mediastate);
23639
23640	prev_state = un->un_mediastate;
23641
23642	/* is there anything to do? */
23643	if (state == un->un_mediastate || un->un_mediastate == DKIO_NONE) {
23644		/*
23645		 * submit the request to the scsi_watch service;
23646		 * scsi_media_watch_cb() does the real work
23647		 */
23648		mutex_exit(SD_MUTEX(un));
23649
23650		/*
23651		 * This change handles the case where a scsi watch request is
23652		 * added to a device that is powered down. To accomplish this
23653		 * we power up the device before adding the scsi watch request,
23654		 * since the scsi watch sends a TUR directly to the device
23655		 * which the device cannot handle if it is powered down.
23656		 */
23657		if (sd_pm_entry(un) != DDI_SUCCESS) {
23658			mutex_enter(SD_MUTEX(un));
23659			goto done;
23660		}
23661
23662		token = sd_watch_request_submit(un);
23663
23664		sd_pm_exit(un);
23665
23666		mutex_enter(SD_MUTEX(un));
23667		if (token == NULL) {
23668			rval = EAGAIN;
23669			goto done;
23670		}
23671
23672		/*
23673		 * This is a special case IOCTL that doesn't return
23674		 * until the media state changes. Routine sdpower
23675		 * knows about and handles this so don't count it
23676		 * as an active cmd in the driver, which would
23677		 * keep the device busy to the pm framework.
23678		 * If the count isn't decremented the device can't
23679		 * be powered down.
23680		 */
23681		un->un_ncmds_in_driver--;
23682		ASSERT(un->un_ncmds_in_driver >= 0);
23683
23684		/*
23685		 * if a prior request had been made, this will be the same
23686		 * token, as scsi_watch was designed that way.
23687		 */
23688		un->un_swr_token = token;
23689		un->un_specified_mediastate = state;
23690
23691		/*
23692		 * now wait for media change
23693		 * we will not be signalled unless mediastate == state but it is
23694		 * still better to test for this condition, since there is a
23695		 * 2 sec cv_broadcast delay when mediastate == DKIO_INSERTED
23696		 */
23697		SD_TRACE(SD_LOG_COMMON, un,
23698		    "sd_check_media: waiting for media state change\n");
23699		while (un->un_mediastate == state) {
23700			if (cv_wait_sig(&un->un_state_cv, SD_MUTEX(un)) == 0) {
23701				SD_TRACE(SD_LOG_COMMON, un,
23702				    "sd_check_media: waiting for media state "
23703				    "was interrupted\n");
23704				un->un_ncmds_in_driver++;
23705				rval = EINTR;
23706				goto done;
23707			}
23708			SD_TRACE(SD_LOG_COMMON, un,
23709			    "sd_check_media: received signal, state=%x\n",
23710			    un->un_mediastate);
23711		}
23712		/*
23713		 * Inc the counter to indicate the device once again
23714		 * has an active outstanding cmd.
23715		 */
23716		un->un_ncmds_in_driver++;
23717	}
23718
23719	/* invalidate geometry */
23720	if (prev_state == DKIO_INSERTED && un->un_mediastate == DKIO_EJECTED) {
23721		sr_ejected(un);
23722	}
23723
23724	if (un->un_mediastate == DKIO_INSERTED && prev_state != DKIO_INSERTED) {
23725		uint64_t	capacity;
23726		uint_t		lbasize;
23727
23728		SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: media inserted\n");
23729		mutex_exit(SD_MUTEX(un));
23730		/*
23731		 * Since the following routines use SD_PATH_DIRECT, we must
23732		 * call PM directly before the upcoming disk accesses. This
23733		 * may cause the disk to be power/spin up.
23734		 */
23735
23736		if (sd_pm_entry(un) == DDI_SUCCESS) {
23737			rval = sd_send_scsi_READ_CAPACITY(ssc,
23738			    &capacity, &lbasize, SD_PATH_DIRECT);
23739			if (rval != 0) {
23740				sd_pm_exit(un);
23741				if (rval == EIO)
23742					sd_ssc_assessment(ssc,
23743					    SD_FMT_STATUS_CHECK);
23744				else
23745					sd_ssc_assessment(ssc, SD_FMT_IGNORE);
23746				mutex_enter(SD_MUTEX(un));
23747				goto done;
23748			}
23749		} else {
23750			rval = EIO;
23751			mutex_enter(SD_MUTEX(un));
23752			goto done;
23753		}
23754		mutex_enter(SD_MUTEX(un));
23755
23756		sd_update_block_info(un, lbasize, capacity);
23757
23758		/*
23759		 *  Check if the media in the device is writable or not
23760		 */
23761		if (ISCD(un)) {
23762			sd_check_for_writable_cd(ssc, SD_PATH_DIRECT);
23763		}
23764
23765		mutex_exit(SD_MUTEX(un));
23766		cmlb_invalidate(un->un_cmlbhandle, (void *)SD_PATH_DIRECT);
23767		if ((cmlb_validate(un->un_cmlbhandle, 0,
23768		    (void *)SD_PATH_DIRECT) == 0) && un->un_f_pkstats_enabled) {
23769			sd_set_pstats(un);
23770			SD_TRACE(SD_LOG_IO_PARTITION, un,
23771			    "sd_check_media: un:0x%p pstats created and "
23772			    "set\n", un);
23773		}
23774
23775		rval = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_PREVENT,
23776		    SD_PATH_DIRECT);
23777
23778		sd_pm_exit(un);
23779
23780		if (rval != 0) {
23781			if (rval == EIO)
23782				sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
23783			else
23784				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
23785		}
23786
23787		mutex_enter(SD_MUTEX(un));
23788	}
23789done:
23790	sd_ssc_fini(ssc);
23791	un->un_f_watcht_stopped = FALSE;
23792	if (token != NULL && un->un_swr_token != NULL) {
23793		/*
23794		 * Use of this local token and the mutex ensures that we avoid
23795		 * some race conditions associated with terminating the
23796		 * scsi watch.
23797		 */
23798		token = un->un_swr_token;
23799		mutex_exit(SD_MUTEX(un));
23800		(void) scsi_watch_request_terminate(token,
23801		    SCSI_WATCH_TERMINATE_WAIT);
23802		if (scsi_watch_get_ref_count(token) == 0) {
23803			mutex_enter(SD_MUTEX(un));
23804			un->un_swr_token = (opaque_t)NULL;
23805		} else {
23806			mutex_enter(SD_MUTEX(un));
23807		}
23808	}
23809
23810	/*
23811	 * Update the capacity kstat value, if no media previously
23812	 * (capacity kstat is 0) and a media has been inserted
23813	 * (un_f_blockcount_is_valid == TRUE)
23814	 */
23815	if (un->un_errstats) {
23816		struct sd_errstats	*stp = NULL;
23817
23818		stp = (struct sd_errstats *)un->un_errstats->ks_data;
23819		if ((stp->sd_capacity.value.ui64 == 0) &&
23820		    (un->un_f_blockcount_is_valid == TRUE)) {
23821			stp->sd_capacity.value.ui64 =
23822			    (uint64_t)((uint64_t)un->un_blockcount *
23823			    un->un_sys_blocksize);
23824		}
23825	}
23826	mutex_exit(SD_MUTEX(un));
23827	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: done\n");
23828	return (rval);
23829}
23830
23831
23832/*
23833 *    Function: sd_delayed_cv_broadcast
23834 *
23835 * Description: Delayed cv_broadcast to allow for target to recover from media
23836 *		insertion.
23837 *
23838 *   Arguments: arg - driver soft state (unit) structure
23839 */
23840
23841static void
23842sd_delayed_cv_broadcast(void *arg)
23843{
23844	struct sd_lun *un = arg;
23845
23846	SD_TRACE(SD_LOG_COMMON, un, "sd_delayed_cv_broadcast\n");
23847
23848	mutex_enter(SD_MUTEX(un));
23849	un->un_dcvb_timeid = NULL;
23850	cv_broadcast(&un->un_state_cv);
23851	mutex_exit(SD_MUTEX(un));
23852}
23853
23854
23855/*
23856 *    Function: sd_media_watch_cb
23857 *
23858 * Description: Callback routine used for support of the DKIOCSTATE ioctl. This
23859 *		routine processes the TUR sense data and updates the driver
23860 *		state if a transition has occurred. The user thread
23861 *		(sd_check_media) is then signalled.
23862 *
23863 *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
23864 *			among multiple watches that share this callback function
23865 *		resultp - scsi watch facility result packet containing scsi
23866 *			  packet, status byte and sense data
23867 *
23868 * Return Code: 0 for success, -1 for failure
23869 */
23870
23871static int
23872sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
23873{
23874	struct sd_lun			*un;
23875	struct scsi_status		*statusp = resultp->statusp;
23876	uint8_t				*sensep = (uint8_t *)resultp->sensep;
23877	enum dkio_state			state = DKIO_NONE;
23878	dev_t				dev = (dev_t)arg;
23879	uchar_t				actual_sense_length;
23880	uint8_t				skey, asc, ascq;
23881
23882	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23883		return (-1);
23884	}
23885	actual_sense_length = resultp->actual_sense_length;
23886
23887	mutex_enter(SD_MUTEX(un));
23888	SD_TRACE(SD_LOG_COMMON, un,
23889	    "sd_media_watch_cb: status=%x, sensep=%p, len=%x\n",
23890	    *((char *)statusp), (void *)sensep, actual_sense_length);
23891
23892	if (resultp->pkt->pkt_reason == CMD_DEV_GONE) {
23893		un->un_mediastate = DKIO_DEV_GONE;
23894		cv_broadcast(&un->un_state_cv);
23895		mutex_exit(SD_MUTEX(un));
23896
23897		return (0);
23898	}
23899
23900	if (un->un_f_mmc_cap && un->un_f_mmc_gesn_polling) {
23901		if (sd_gesn_media_data_valid(resultp->mmc_data)) {
23902			if ((resultp->mmc_data[5] &
23903			    SD_GESN_MEDIA_EVENT_STATUS_PRESENT) != 0) {
23904				state = DKIO_INSERTED;
23905			} else {
23906				state = DKIO_EJECTED;
23907			}
23908			if ((resultp->mmc_data[4] & SD_GESN_MEDIA_EVENT_CODE) ==
23909			    SD_GESN_MEDIA_EVENT_EJECTREQUEST) {
23910				sd_log_eject_request_event(un, KM_NOSLEEP);
23911			}
23912		}
23913	} else if (sensep != NULL) {
23914		/*
23915		 * If there was a check condition then sensep points to valid
23916		 * sense data. If status was not a check condition but a
23917		 * reservation or busy status then the new state is DKIO_NONE.
23918		 */
23919		skey = scsi_sense_key(sensep);
23920		asc = scsi_sense_asc(sensep);
23921		ascq = scsi_sense_ascq(sensep);
23922
23923		SD_INFO(SD_LOG_COMMON, un,
23924		    "sd_media_watch_cb: sense KEY=%x, ASC=%x, ASCQ=%x\n",
23925		    skey, asc, ascq);
23926		/* This routine only uses up to 13 bytes of sense data. */
23927		if (actual_sense_length >= 13) {
23928			if (skey == KEY_UNIT_ATTENTION) {
23929				if (asc == 0x28) {
23930					state = DKIO_INSERTED;
23931				}
23932			} else if (skey == KEY_NOT_READY) {
23933				/*
23934				 * Sense data of 02/06/00 means that the
23935				 * drive could not read the media (No
23936				 * reference position found). In this case
23937				 * to prevent a hang on the DKIOCSTATE IOCTL
23938				 * we set the media state to DKIO_INSERTED.
23939				 */
23940				if (asc == 0x06 && ascq == 0x00)
23941					state = DKIO_INSERTED;
23942
23943				/*
23944				 * if 02/04/02  means that the host
23945				 * should send start command. Explicitly
23946				 * leave the media state as is
23947				 * (inserted) as the media is inserted
23948				 * and host has stopped device for PM
23949				 * reasons. Upon next true read/write
23950				 * to this media will bring the
23951				 * device to the right state good for
23952				 * media access.
23953				 */
23954				if (asc == 0x3a) {
23955					state = DKIO_EJECTED;
23956				} else {
23957					/*
23958					 * If the drive is busy with an
23959					 * operation or long write, keep the
23960					 * media in an inserted state.
23961					 */
23962
23963					if ((asc == 0x04) &&
23964					    ((ascq == 0x02) ||
23965					    (ascq == 0x07) ||
23966					    (ascq == 0x08))) {
23967						state = DKIO_INSERTED;
23968					}
23969				}
23970			} else if (skey == KEY_NO_SENSE) {
23971				if ((asc == 0x00) && (ascq == 0x00)) {
23972					/*
23973					 * Sense Data 00/00/00 does not provide
23974					 * any information about the state of
23975					 * the media. Ignore it.
23976					 */
23977					mutex_exit(SD_MUTEX(un));
23978					return (0);
23979				}
23980			}
23981		}
23982	} else if ((*((char *)statusp) == STATUS_GOOD) &&
23983	    (resultp->pkt->pkt_reason == CMD_CMPLT)) {
23984		state = DKIO_INSERTED;
23985	}
23986
23987	SD_TRACE(SD_LOG_COMMON, un,
23988	    "sd_media_watch_cb: state=%x, specified=%x\n",
23989	    state, un->un_specified_mediastate);
23990
23991	/*
23992	 * now signal the waiting thread if this is *not* the specified state;
23993	 * delay the signal if the state is DKIO_INSERTED to allow the target
23994	 * to recover
23995	 */
23996	if (state != un->un_specified_mediastate) {
23997		un->un_mediastate = state;
23998		if (state == DKIO_INSERTED) {
23999			/*
24000			 * delay the signal to give the drive a chance
24001			 * to do what it apparently needs to do
24002			 */
24003			SD_TRACE(SD_LOG_COMMON, un,
24004			    "sd_media_watch_cb: delayed cv_broadcast\n");
24005			if (un->un_dcvb_timeid == NULL) {
24006				un->un_dcvb_timeid =
24007				    timeout(sd_delayed_cv_broadcast, un,
24008				    drv_usectohz((clock_t)MEDIA_ACCESS_DELAY));
24009			}
24010		} else {
24011			SD_TRACE(SD_LOG_COMMON, un,
24012			    "sd_media_watch_cb: immediate cv_broadcast\n");
24013			cv_broadcast(&un->un_state_cv);
24014		}
24015	}
24016	mutex_exit(SD_MUTEX(un));
24017	return (0);
24018}
24019
24020
24021/*
24022 *    Function: sd_dkio_get_temp
24023 *
24024 * Description: This routine is the driver entry point for handling ioctl
24025 *		requests to get the disk temperature.
24026 *
24027 *   Arguments: dev  - the device number
24028 *		arg  - pointer to user provided dk_temperature structure.
24029 *		flag - this argument is a pass through to ddi_copyxxx()
24030 *		       directly from the mode argument of ioctl().
24031 *
24032 * Return Code: 0
24033 *		EFAULT
24034 *		ENXIO
24035 *		EAGAIN
24036 */
24037
24038static int
24039sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag)
24040{
24041	struct sd_lun		*un = NULL;
24042	struct dk_temperature	*dktemp = NULL;
24043	uchar_t			*temperature_page;
24044	int			rval = 0;
24045	int			path_flag = SD_PATH_STANDARD;
24046	sd_ssc_t		*ssc;
24047
24048	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24049		return (ENXIO);
24050	}
24051
24052	ssc = sd_ssc_init(un);
24053	dktemp = kmem_zalloc(sizeof (struct dk_temperature), KM_SLEEP);
24054
24055	/* copyin the disk temp argument to get the user flags */
24056	if (ddi_copyin((void *)arg, dktemp,
24057	    sizeof (struct dk_temperature), flag) != 0) {
24058		rval = EFAULT;
24059		goto done;
24060	}
24061
24062	/* Initialize the temperature to invalid. */
24063	dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
24064	dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
24065
24066	/*
24067	 * Note: Investigate removing the "bypass pm" semantic.
24068	 * Can we just bypass PM always?
24069	 */
24070	if (dktemp->dkt_flags & DKT_BYPASS_PM) {
24071		path_flag = SD_PATH_DIRECT;
24072		ASSERT(!mutex_owned(&un->un_pm_mutex));
24073		mutex_enter(&un->un_pm_mutex);
24074		if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
24075			/*
24076			 * If DKT_BYPASS_PM is set, and the drive happens to be
24077			 * in low power mode, we can not wake it up, Need to
24078			 * return EAGAIN.
24079			 */
24080			mutex_exit(&un->un_pm_mutex);
24081			rval = EAGAIN;
24082			goto done;
24083		} else {
24084			/*
24085			 * Indicate to PM the device is busy. This is required
24086			 * to avoid a race - i.e. the ioctl is issuing a
24087			 * command and the pm framework brings down the device
24088			 * to low power mode (possible power cut-off on some
24089			 * platforms).
24090			 */
24091			mutex_exit(&un->un_pm_mutex);
24092			if (sd_pm_entry(un) != DDI_SUCCESS) {
24093				rval = EAGAIN;
24094				goto done;
24095			}
24096		}
24097	}
24098
24099	temperature_page = kmem_zalloc(TEMPERATURE_PAGE_SIZE, KM_SLEEP);
24100
24101	rval = sd_send_scsi_LOG_SENSE(ssc, temperature_page,
24102	    TEMPERATURE_PAGE_SIZE, TEMPERATURE_PAGE, 1, 0, path_flag);
24103	if (rval != 0)
24104		goto done2;
24105
24106	/*
24107	 * For the current temperature verify that the parameter length is 0x02
24108	 * and the parameter code is 0x00
24109	 */
24110	if ((temperature_page[7] == 0x02) && (temperature_page[4] == 0x00) &&
24111	    (temperature_page[5] == 0x00)) {
24112		if (temperature_page[9] == 0xFF) {
24113			dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
24114		} else {
24115			dktemp->dkt_cur_temp = (short)(temperature_page[9]);
24116		}
24117	}
24118
24119	/*
24120	 * For the reference temperature verify that the parameter
24121	 * length is 0x02 and the parameter code is 0x01
24122	 */
24123	if ((temperature_page[13] == 0x02) && (temperature_page[10] == 0x00) &&
24124	    (temperature_page[11] == 0x01)) {
24125		if (temperature_page[15] == 0xFF) {
24126			dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
24127		} else {
24128			dktemp->dkt_ref_temp = (short)(temperature_page[15]);
24129		}
24130	}
24131
24132	/* Do the copyout regardless of the temperature commands status. */
24133	if (ddi_copyout(dktemp, (void *)arg, sizeof (struct dk_temperature),
24134	    flag) != 0) {
24135		rval = EFAULT;
24136		goto done1;
24137	}
24138
24139done2:
24140	if (rval != 0) {
24141		if (rval == EIO)
24142			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
24143		else
24144			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
24145	}
24146done1:
24147	if (path_flag == SD_PATH_DIRECT) {
24148		sd_pm_exit(un);
24149	}
24150
24151	kmem_free(temperature_page, TEMPERATURE_PAGE_SIZE);
24152done:
24153	sd_ssc_fini(ssc);
24154	if (dktemp != NULL) {
24155		kmem_free(dktemp, sizeof (struct dk_temperature));
24156	}
24157
24158	return (rval);
24159}
24160
24161
24162/*
24163 *    Function: sd_log_page_supported
24164 *
24165 * Description: This routine uses sd_send_scsi_LOG_SENSE to find the list of
24166 *		supported log pages.
24167 *
24168 *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
24169 *                      structure for this target.
24170 *		log_page -
24171 *
24172 * Return Code: -1 - on error (log sense is optional and may not be supported).
24173 *		0  - log page not found.
24174 *  		1  - log page found.
24175 */
24176
24177static int
24178sd_log_page_supported(sd_ssc_t *ssc, int log_page)
24179{
24180	uchar_t *log_page_data;
24181	int	i;
24182	int	match = 0;
24183	int	log_size;
24184	int	status = 0;
24185	struct sd_lun	*un;
24186
24187	ASSERT(ssc != NULL);
24188	un = ssc->ssc_un;
24189	ASSERT(un != NULL);
24190
24191	log_page_data = kmem_zalloc(0xFF, KM_SLEEP);
24192
24193	status = sd_send_scsi_LOG_SENSE(ssc, log_page_data, 0xFF, 0, 0x01, 0,
24194	    SD_PATH_DIRECT);
24195
24196	if (status != 0) {
24197		if (status == EIO) {
24198			/*
24199			 * Some disks do not support log sense, we
24200			 * should ignore this kind of error(sense key is
24201			 * 0x5 - illegal request).
24202			 */
24203			uint8_t *sensep;
24204			int senlen;
24205
24206			sensep = (uint8_t *)ssc->ssc_uscsi_cmd->uscsi_rqbuf;
24207			senlen = (int)(ssc->ssc_uscsi_cmd->uscsi_rqlen -
24208			    ssc->ssc_uscsi_cmd->uscsi_rqresid);
24209
24210			if (senlen > 0 &&
24211			    scsi_sense_key(sensep) == KEY_ILLEGAL_REQUEST) {
24212				sd_ssc_assessment(ssc,
24213				    SD_FMT_IGNORE_COMPROMISE);
24214			} else {
24215				sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
24216			}
24217		} else {
24218			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
24219		}
24220
24221		SD_ERROR(SD_LOG_COMMON, un,
24222		    "sd_log_page_supported: failed log page retrieval\n");
24223		kmem_free(log_page_data, 0xFF);
24224		return (-1);
24225	}
24226
24227	log_size = log_page_data[3];
24228
24229	/*
24230	 * The list of supported log pages start from the fourth byte. Check
24231	 * until we run out of log pages or a match is found.
24232	 */
24233	for (i = 4; (i < (log_size + 4)) && !match; i++) {
24234		if (log_page_data[i] == log_page) {
24235			match++;
24236		}
24237	}
24238	kmem_free(log_page_data, 0xFF);
24239	return (match);
24240}
24241
24242
24243/*
24244 *    Function: sd_mhdioc_failfast
24245 *
24246 * Description: This routine is the driver entry point for handling ioctl
24247 *		requests to enable/disable the multihost failfast option.
24248 *		(MHIOCENFAILFAST)
24249 *
24250 *   Arguments: dev	- the device number
24251 *		arg	- user specified probing interval.
24252 *		flag	- this argument is a pass through to ddi_copyxxx()
24253 *			  directly from the mode argument of ioctl().
24254 *
24255 * Return Code: 0
24256 *		EFAULT
24257 *		ENXIO
24258 */
24259
24260static int
24261sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag)
24262{
24263	struct sd_lun	*un = NULL;
24264	int		mh_time;
24265	int		rval = 0;
24266
24267	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24268		return (ENXIO);
24269	}
24270
24271	if (ddi_copyin((void *)arg, &mh_time, sizeof (int), flag))
24272		return (EFAULT);
24273
24274	if (mh_time) {
24275		mutex_enter(SD_MUTEX(un));
24276		un->un_resvd_status |= SD_FAILFAST;
24277		mutex_exit(SD_MUTEX(un));
24278		/*
24279		 * If mh_time is INT_MAX, then this ioctl is being used for
24280		 * SCSI-3 PGR purposes, and we don't need to spawn watch thread.
24281		 */
24282		if (mh_time != INT_MAX) {
24283			rval = sd_check_mhd(dev, mh_time);
24284		}
24285	} else {
24286		(void) sd_check_mhd(dev, 0);
24287		mutex_enter(SD_MUTEX(un));
24288		un->un_resvd_status &= ~SD_FAILFAST;
24289		mutex_exit(SD_MUTEX(un));
24290	}
24291	return (rval);
24292}
24293
24294
24295/*
24296 *    Function: sd_mhdioc_takeown
24297 *
24298 * Description: This routine is the driver entry point for handling ioctl
24299 *		requests to forcefully acquire exclusive access rights to the
24300 *		multihost disk (MHIOCTKOWN).
24301 *
24302 *   Arguments: dev	- the device number
24303 *		arg	- user provided structure specifying the delay
24304 *			  parameters in milliseconds
24305 *		flag	- this argument is a pass through to ddi_copyxxx()
24306 *			  directly from the mode argument of ioctl().
24307 *
24308 * Return Code: 0
24309 *		EFAULT
24310 *		ENXIO
24311 */
24312
24313static int
24314sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag)
24315{
24316	struct sd_lun		*un = NULL;
24317	struct mhioctkown	*tkown = NULL;
24318	int			rval = 0;
24319
24320	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24321		return (ENXIO);
24322	}
24323
24324	if (arg != NULL) {
24325		tkown = (struct mhioctkown *)
24326		    kmem_zalloc(sizeof (struct mhioctkown), KM_SLEEP);
24327		rval = ddi_copyin(arg, tkown, sizeof (struct mhioctkown), flag);
24328		if (rval != 0) {
24329			rval = EFAULT;
24330			goto error;
24331		}
24332	}
24333
24334	rval = sd_take_ownership(dev, tkown);
24335	mutex_enter(SD_MUTEX(un));
24336	if (rval == 0) {
24337		un->un_resvd_status |= SD_RESERVE;
24338		if (tkown != NULL && tkown->reinstate_resv_delay != 0) {
24339			sd_reinstate_resv_delay =
24340			    tkown->reinstate_resv_delay * 1000;
24341		} else {
24342			sd_reinstate_resv_delay = SD_REINSTATE_RESV_DELAY;
24343		}
24344		/*
24345		 * Give the scsi_watch routine interval set by
24346		 * the MHIOCENFAILFAST ioctl precedence here.
24347		 */
24348		if ((un->un_resvd_status & SD_FAILFAST) == 0) {
24349			mutex_exit(SD_MUTEX(un));
24350			(void) sd_check_mhd(dev, sd_reinstate_resv_delay/1000);
24351			SD_TRACE(SD_LOG_IOCTL_MHD, un,
24352			    "sd_mhdioc_takeown : %d\n",
24353			    sd_reinstate_resv_delay);
24354		} else {
24355			mutex_exit(SD_MUTEX(un));
24356		}
24357		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_NOTIFY,
24358		    sd_mhd_reset_notify_cb, (caddr_t)un);
24359	} else {
24360		un->un_resvd_status &= ~SD_RESERVE;
24361		mutex_exit(SD_MUTEX(un));
24362	}
24363
24364error:
24365	if (tkown != NULL) {
24366		kmem_free(tkown, sizeof (struct mhioctkown));
24367	}
24368	return (rval);
24369}
24370
24371
24372/*
24373 *    Function: sd_mhdioc_release
24374 *
24375 * Description: This routine is the driver entry point for handling ioctl
24376 *		requests to release exclusive access rights to the multihost
24377 *		disk (MHIOCRELEASE).
24378 *
24379 *   Arguments: dev	- the device number
24380 *
24381 * Return Code: 0
24382 *		ENXIO
24383 */
24384
24385static int
24386sd_mhdioc_release(dev_t dev)
24387{
24388	struct sd_lun		*un = NULL;
24389	timeout_id_t		resvd_timeid_save;
24390	int			resvd_status_save;
24391	int			rval = 0;
24392
24393	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24394		return (ENXIO);
24395	}
24396
24397	mutex_enter(SD_MUTEX(un));
24398	resvd_status_save = un->un_resvd_status;
24399	un->un_resvd_status &=
24400	    ~(SD_RESERVE | SD_LOST_RESERVE | SD_WANT_RESERVE);
24401	if (un->un_resvd_timeid) {
24402		resvd_timeid_save = un->un_resvd_timeid;
24403		un->un_resvd_timeid = NULL;
24404		mutex_exit(SD_MUTEX(un));
24405		(void) untimeout(resvd_timeid_save);
24406	} else {
24407		mutex_exit(SD_MUTEX(un));
24408	}
24409
24410	/*
24411	 * destroy any pending timeout thread that may be attempting to
24412	 * reinstate reservation on this device.
24413	 */
24414	sd_rmv_resv_reclaim_req(dev);
24415
24416	if ((rval = sd_reserve_release(dev, SD_RELEASE)) == 0) {
24417		mutex_enter(SD_MUTEX(un));
24418		if ((un->un_mhd_token) &&
24419		    ((un->un_resvd_status & SD_FAILFAST) == 0)) {
24420			mutex_exit(SD_MUTEX(un));
24421			(void) sd_check_mhd(dev, 0);
24422		} else {
24423			mutex_exit(SD_MUTEX(un));
24424		}
24425		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
24426		    sd_mhd_reset_notify_cb, (caddr_t)un);
24427	} else {
24428		/*
24429		 * sd_mhd_watch_cb will restart the resvd recover timeout thread
24430		 */
24431		mutex_enter(SD_MUTEX(un));
24432		un->un_resvd_status = resvd_status_save;
24433		mutex_exit(SD_MUTEX(un));
24434	}
24435	return (rval);
24436}
24437
24438
24439/*
24440 *    Function: sd_mhdioc_register_devid
24441 *
24442 * Description: This routine is the driver entry point for handling ioctl
24443 *		requests to register the device id (MHIOCREREGISTERDEVID).
24444 *
24445 *		Note: The implementation for this ioctl has been updated to
24446 *		be consistent with the original PSARC case (1999/357)
24447 *		(4375899, 4241671, 4220005)
24448 *
24449 *   Arguments: dev	- the device number
24450 *
24451 * Return Code: 0
24452 *		ENXIO
24453 */
24454
24455static int
24456sd_mhdioc_register_devid(dev_t dev)
24457{
24458	struct sd_lun	*un = NULL;
24459	int		rval = 0;
24460	sd_ssc_t	*ssc;
24461
24462	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24463		return (ENXIO);
24464	}
24465
24466	ASSERT(!mutex_owned(SD_MUTEX(un)));
24467
24468	mutex_enter(SD_MUTEX(un));
24469
24470	/* If a devid already exists, de-register it */
24471	if (un->un_devid != NULL) {
24472		ddi_devid_unregister(SD_DEVINFO(un));
24473		/*
24474		 * After unregister devid, needs to free devid memory
24475		 */
24476		ddi_devid_free(un->un_devid);
24477		un->un_devid = NULL;
24478	}
24479
24480	/* Check for reservation conflict */
24481	mutex_exit(SD_MUTEX(un));
24482	ssc = sd_ssc_init(un);
24483	rval = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
24484	mutex_enter(SD_MUTEX(un));
24485
24486	switch (rval) {
24487	case 0:
24488		sd_register_devid(ssc, SD_DEVINFO(un), SD_TARGET_IS_UNRESERVED);
24489		break;
24490	case EACCES:
24491		break;
24492	default:
24493		rval = EIO;
24494	}
24495
24496	mutex_exit(SD_MUTEX(un));
24497	if (rval != 0) {
24498		if (rval == EIO)
24499			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
24500		else
24501			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
24502	}
24503	sd_ssc_fini(ssc);
24504	return (rval);
24505}
24506
24507
24508/*
24509 *    Function: sd_mhdioc_inkeys
24510 *
24511 * Description: This routine is the driver entry point for handling ioctl
24512 *		requests to issue the SCSI-3 Persistent In Read Keys command
24513 *		to the device (MHIOCGRP_INKEYS).
24514 *
24515 *   Arguments: dev	- the device number
24516 *		arg	- user provided in_keys structure
24517 *		flag	- this argument is a pass through to ddi_copyxxx()
24518 *			  directly from the mode argument of ioctl().
24519 *
24520 * Return Code: code returned by sd_persistent_reservation_in_read_keys()
24521 *		ENXIO
24522 *		EFAULT
24523 */
24524
24525static int
24526sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag)
24527{
24528	struct sd_lun		*un;
24529	mhioc_inkeys_t		inkeys;
24530	int			rval = 0;
24531
24532	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24533		return (ENXIO);
24534	}
24535
24536#ifdef _MULTI_DATAMODEL
24537	switch (ddi_model_convert_from(flag & FMODELS)) {
24538	case DDI_MODEL_ILP32: {
24539		struct mhioc_inkeys32	inkeys32;
24540
24541		if (ddi_copyin(arg, &inkeys32,
24542		    sizeof (struct mhioc_inkeys32), flag) != 0) {
24543			return (EFAULT);
24544		}
24545		inkeys.li = (mhioc_key_list_t *)(uintptr_t)inkeys32.li;
24546		if ((rval = sd_persistent_reservation_in_read_keys(un,
24547		    &inkeys, flag)) != 0) {
24548			return (rval);
24549		}
24550		inkeys32.generation = inkeys.generation;
24551		if (ddi_copyout(&inkeys32, arg, sizeof (struct mhioc_inkeys32),
24552		    flag) != 0) {
24553			return (EFAULT);
24554		}
24555		break;
24556	}
24557	case DDI_MODEL_NONE:
24558		if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t),
24559		    flag) != 0) {
24560			return (EFAULT);
24561		}
24562		if ((rval = sd_persistent_reservation_in_read_keys(un,
24563		    &inkeys, flag)) != 0) {
24564			return (rval);
24565		}
24566		if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t),
24567		    flag) != 0) {
24568			return (EFAULT);
24569		}
24570		break;
24571	}
24572
24573#else /* ! _MULTI_DATAMODEL */
24574
24575	if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t), flag) != 0) {
24576		return (EFAULT);
24577	}
24578	rval = sd_persistent_reservation_in_read_keys(un, &inkeys, flag);
24579	if (rval != 0) {
24580		return (rval);
24581	}
24582	if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t), flag) != 0) {
24583		return (EFAULT);
24584	}
24585
24586#endif /* _MULTI_DATAMODEL */
24587
24588	return (rval);
24589}
24590
24591
24592/*
24593 *    Function: sd_mhdioc_inresv
24594 *
24595 * Description: This routine is the driver entry point for handling ioctl
24596 *		requests to issue the SCSI-3 Persistent In Read Reservations
24597 *		command to the device (MHIOCGRP_INKEYS).
24598 *
24599 *   Arguments: dev	- the device number
24600 *		arg	- user provided in_resv structure
24601 *		flag	- this argument is a pass through to ddi_copyxxx()
24602 *			  directly from the mode argument of ioctl().
24603 *
24604 * Return Code: code returned by sd_persistent_reservation_in_read_resv()
24605 *		ENXIO
24606 *		EFAULT
24607 */
24608
24609static int
24610sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag)
24611{
24612	struct sd_lun		*un;
24613	mhioc_inresvs_t		inresvs;
24614	int			rval = 0;
24615
24616	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24617		return (ENXIO);
24618	}
24619
24620#ifdef _MULTI_DATAMODEL
24621
24622	switch (ddi_model_convert_from(flag & FMODELS)) {
24623	case DDI_MODEL_ILP32: {
24624		struct mhioc_inresvs32	inresvs32;
24625
24626		if (ddi_copyin(arg, &inresvs32,
24627		    sizeof (struct mhioc_inresvs32), flag) != 0) {
24628			return (EFAULT);
24629		}
24630		inresvs.li = (mhioc_resv_desc_list_t *)(uintptr_t)inresvs32.li;
24631		if ((rval = sd_persistent_reservation_in_read_resv(un,
24632		    &inresvs, flag)) != 0) {
24633			return (rval);
24634		}
24635		inresvs32.generation = inresvs.generation;
24636		if (ddi_copyout(&inresvs32, arg,
24637		    sizeof (struct mhioc_inresvs32), flag) != 0) {
24638			return (EFAULT);
24639		}
24640		break;
24641	}
24642	case DDI_MODEL_NONE:
24643		if (ddi_copyin(arg, &inresvs,
24644		    sizeof (mhioc_inresvs_t), flag) != 0) {
24645			return (EFAULT);
24646		}
24647		if ((rval = sd_persistent_reservation_in_read_resv(un,
24648		    &inresvs, flag)) != 0) {
24649			return (rval);
24650		}
24651		if (ddi_copyout(&inresvs, arg,
24652		    sizeof (mhioc_inresvs_t), flag) != 0) {
24653			return (EFAULT);
24654		}
24655		break;
24656	}
24657
24658#else /* ! _MULTI_DATAMODEL */
24659
24660	if (ddi_copyin(arg, &inresvs, sizeof (mhioc_inresvs_t), flag) != 0) {
24661		return (EFAULT);
24662	}
24663	rval = sd_persistent_reservation_in_read_resv(un, &inresvs, flag);
24664	if (rval != 0) {
24665		return (rval);
24666	}
24667	if (ddi_copyout(&inresvs, arg, sizeof (mhioc_inresvs_t), flag)) {
24668		return (EFAULT);
24669	}
24670
24671#endif /* ! _MULTI_DATAMODEL */
24672
24673	return (rval);
24674}
24675
24676
24677/*
24678 * The following routines support the clustering functionality described below
24679 * and implement lost reservation reclaim functionality.
24680 *
24681 * Clustering
24682 * ----------
24683 * The clustering code uses two different, independent forms of SCSI
24684 * reservation. Traditional SCSI-2 Reserve/Release and the newer SCSI-3
24685 * Persistent Group Reservations. For any particular disk, it will use either
24686 * SCSI-2 or SCSI-3 PGR but never both at the same time for the same disk.
24687 *
24688 * SCSI-2
24689 * The cluster software takes ownership of a multi-hosted disk by issuing the
24690 * MHIOCTKOWN ioctl to the disk driver. It releases ownership by issuing the
24691 * MHIOCRELEASE ioctl.  Closely related is the MHIOCENFAILFAST ioctl -- a
24692 * cluster, just after taking ownership of the disk with the MHIOCTKOWN ioctl
24693 * then issues the MHIOCENFAILFAST ioctl.  This ioctl "enables failfast" in the
24694 * driver. The meaning of failfast is that if the driver (on this host) ever
24695 * encounters the scsi error return code RESERVATION_CONFLICT from the device,
24696 * it should immediately panic the host. The motivation for this ioctl is that
24697 * if this host does encounter reservation conflict, the underlying cause is
24698 * that some other host of the cluster has decided that this host is no longer
24699 * in the cluster and has seized control of the disks for itself. Since this
24700 * host is no longer in the cluster, it ought to panic itself. The
24701 * MHIOCENFAILFAST ioctl does two things:
24702 *	(a) it sets a flag that will cause any returned RESERVATION_CONFLICT
24703 *      error to panic the host
24704 *      (b) it sets up a periodic timer to test whether this host still has
24705 *      "access" (in that no other host has reserved the device):  if the
24706 *      periodic timer gets RESERVATION_CONFLICT, the host is panicked. The
24707 *      purpose of that periodic timer is to handle scenarios where the host is
24708 *      otherwise temporarily quiescent, temporarily doing no real i/o.
24709 * The MHIOCTKOWN ioctl will "break" a reservation that is held by another host,
24710 * by issuing a SCSI Bus Device Reset.  It will then issue a SCSI Reserve for
24711 * the device itself.
24712 *
24713 * SCSI-3 PGR
24714 * A direct semantic implementation of the SCSI-3 Persistent Reservation
24715 * facility is supported through the shared multihost disk ioctls
24716 * (MHIOCGRP_INKEYS, MHIOCGRP_INRESV, MHIOCGRP_REGISTER, MHIOCGRP_RESERVE,
24717 * MHIOCGRP_PREEMPTANDABORT)
24718 *
24719 * Reservation Reclaim:
24720 * --------------------
24721 * To support the lost reservation reclaim operations this driver creates a
24722 * single thread to handle reinstating reservations on all devices that have
24723 * lost reservations sd_resv_reclaim_requests are logged for all devices that
24724 * have LOST RESERVATIONS when the scsi watch facility callsback sd_mhd_watch_cb
24725 * and the reservation reclaim thread loops through the requests to regain the
24726 * lost reservations.
24727 */
24728
24729/*
24730 *    Function: sd_check_mhd()
24731 *
24732 * Description: This function sets up and submits a scsi watch request or
24733 *		terminates an existing watch request. This routine is used in
24734 *		support of reservation reclaim.
24735 *
24736 *   Arguments: dev    - the device 'dev_t' is used for context to discriminate
24737 *			 among multiple watches that share the callback function
24738 *		interval - the number of microseconds specifying the watch
24739 *			   interval for issuing TEST UNIT READY commands. If
24740 *			   set to 0 the watch should be terminated. If the
24741 *			   interval is set to 0 and if the device is required
24742 *			   to hold reservation while disabling failfast, the
24743 *			   watch is restarted with an interval of
24744 *			   reinstate_resv_delay.
24745 *
24746 * Return Code: 0	   - Successful submit/terminate of scsi watch request
24747 *		ENXIO      - Indicates an invalid device was specified
24748 *		EAGAIN     - Unable to submit the scsi watch request
24749 */
24750
24751static int
24752sd_check_mhd(dev_t dev, int interval)
24753{
24754	struct sd_lun	*un;
24755	opaque_t	token;
24756
24757	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24758		return (ENXIO);
24759	}
24760
24761	/* is this a watch termination request? */
24762	if (interval == 0) {
24763		mutex_enter(SD_MUTEX(un));
24764		/* if there is an existing watch task then terminate it */
24765		if (un->un_mhd_token) {
24766			token = un->un_mhd_token;
24767			un->un_mhd_token = NULL;
24768			mutex_exit(SD_MUTEX(un));
24769			(void) scsi_watch_request_terminate(token,
24770			    SCSI_WATCH_TERMINATE_ALL_WAIT);
24771			mutex_enter(SD_MUTEX(un));
24772		} else {
24773			mutex_exit(SD_MUTEX(un));
24774			/*
24775			 * Note: If we return here we don't check for the
24776			 * failfast case. This is the original legacy
24777			 * implementation but perhaps we should be checking
24778			 * the failfast case.
24779			 */
24780			return (0);
24781		}
24782		/*
24783		 * If the device is required to hold reservation while
24784		 * disabling failfast, we need to restart the scsi_watch
24785		 * routine with an interval of reinstate_resv_delay.
24786		 */
24787		if (un->un_resvd_status & SD_RESERVE) {
24788			interval = sd_reinstate_resv_delay/1000;
24789		} else {
24790			/* no failfast so bail */
24791			mutex_exit(SD_MUTEX(un));
24792			return (0);
24793		}
24794		mutex_exit(SD_MUTEX(un));
24795	}
24796
24797	/*
24798	 * adjust minimum time interval to 1 second,
24799	 * and convert from msecs to usecs
24800	 */
24801	if (interval > 0 && interval < 1000) {
24802		interval = 1000;
24803	}
24804	interval *= 1000;
24805
24806	/*
24807	 * submit the request to the scsi_watch service
24808	 */
24809	token = scsi_watch_request_submit(SD_SCSI_DEVP(un), interval,
24810	    SENSE_LENGTH, sd_mhd_watch_cb, (caddr_t)dev);
24811	if (token == NULL) {
24812		return (EAGAIN);
24813	}
24814
24815	/*
24816	 * save token for termination later on
24817	 */
24818	mutex_enter(SD_MUTEX(un));
24819	un->un_mhd_token = token;
24820	mutex_exit(SD_MUTEX(un));
24821	return (0);
24822}
24823
24824
24825/*
24826 *    Function: sd_mhd_watch_cb()
24827 *
24828 * Description: This function is the call back function used by the scsi watch
24829 *		facility. The scsi watch facility sends the "Test Unit Ready"
24830 *		and processes the status. If applicable (i.e. a "Unit Attention"
24831 *		status and automatic "Request Sense" not used) the scsi watch
24832 *		facility will send a "Request Sense" and retrieve the sense data
24833 *		to be passed to this callback function. In either case the
24834 *		automatic "Request Sense" or the facility submitting one, this
24835 *		callback is passed the status and sense data.
24836 *
24837 *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
24838 *			among multiple watches that share this callback function
24839 *		resultp - scsi watch facility result packet containing scsi
24840 *			  packet, status byte and sense data
24841 *
24842 * Return Code: 0 - continue the watch task
24843 *		non-zero - terminate the watch task
24844 */
24845
24846static int
24847sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
24848{
24849	struct sd_lun			*un;
24850	struct scsi_status		*statusp;
24851	uint8_t				*sensep;
24852	struct scsi_pkt			*pkt;
24853	uchar_t				actual_sense_length;
24854	dev_t  				dev = (dev_t)arg;
24855
24856	ASSERT(resultp != NULL);
24857	statusp			= resultp->statusp;
24858	sensep			= (uint8_t *)resultp->sensep;
24859	pkt			= resultp->pkt;
24860	actual_sense_length	= resultp->actual_sense_length;
24861
24862	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24863		return (ENXIO);
24864	}
24865
24866	SD_TRACE(SD_LOG_IOCTL_MHD, un,
24867	    "sd_mhd_watch_cb: reason '%s', status '%s'\n",
24868	    scsi_rname(pkt->pkt_reason), sd_sname(*((unsigned char *)statusp)));
24869
24870	/* Begin processing of the status and/or sense data */
24871	if (pkt->pkt_reason != CMD_CMPLT) {
24872		/* Handle the incomplete packet */
24873		sd_mhd_watch_incomplete(un, pkt);
24874		return (0);
24875	} else if (*((unsigned char *)statusp) != STATUS_GOOD) {
24876		if (*((unsigned char *)statusp)
24877		    == STATUS_RESERVATION_CONFLICT) {
24878			/*
24879			 * Handle a reservation conflict by panicking if
24880			 * configured for failfast or by logging the conflict
24881			 * and updating the reservation status
24882			 */
24883			mutex_enter(SD_MUTEX(un));
24884			if ((un->un_resvd_status & SD_FAILFAST) &&
24885			    (sd_failfast_enable)) {
24886				sd_panic_for_res_conflict(un);
24887				/*NOTREACHED*/
24888			}
24889			SD_INFO(SD_LOG_IOCTL_MHD, un,
24890			    "sd_mhd_watch_cb: Reservation Conflict\n");
24891			un->un_resvd_status |= SD_RESERVATION_CONFLICT;
24892			mutex_exit(SD_MUTEX(un));
24893		}
24894	}
24895
24896	if (sensep != NULL) {
24897		if (actual_sense_length >= (SENSE_LENGTH - 2)) {
24898			mutex_enter(SD_MUTEX(un));
24899			if ((scsi_sense_asc(sensep) ==
24900			    SD_SCSI_RESET_SENSE_CODE) &&
24901			    (un->un_resvd_status & SD_RESERVE)) {
24902				/*
24903				 * The additional sense code indicates a power
24904				 * on or bus device reset has occurred; update
24905				 * the reservation status.
24906				 */
24907				un->un_resvd_status |=
24908				    (SD_LOST_RESERVE | SD_WANT_RESERVE);
24909				SD_INFO(SD_LOG_IOCTL_MHD, un,
24910				    "sd_mhd_watch_cb: Lost Reservation\n");
24911			}
24912		} else {
24913			return (0);
24914		}
24915	} else {
24916		mutex_enter(SD_MUTEX(un));
24917	}
24918
24919	if ((un->un_resvd_status & SD_RESERVE) &&
24920	    (un->un_resvd_status & SD_LOST_RESERVE)) {
24921		if (un->un_resvd_status & SD_WANT_RESERVE) {
24922			/*
24923			 * A reset occurred in between the last probe and this
24924			 * one so if a timeout is pending cancel it.
24925			 */
24926			if (un->un_resvd_timeid) {
24927				timeout_id_t temp_id = un->un_resvd_timeid;
24928				un->un_resvd_timeid = NULL;
24929				mutex_exit(SD_MUTEX(un));
24930				(void) untimeout(temp_id);
24931				mutex_enter(SD_MUTEX(un));
24932			}
24933			un->un_resvd_status &= ~SD_WANT_RESERVE;
24934		}
24935		if (un->un_resvd_timeid == 0) {
24936			/* Schedule a timeout to handle the lost reservation */
24937			un->un_resvd_timeid = timeout(sd_mhd_resvd_recover,
24938			    (void *)dev,
24939			    drv_usectohz(sd_reinstate_resv_delay));
24940		}
24941	}
24942	mutex_exit(SD_MUTEX(un));
24943	return (0);
24944}
24945
24946
24947/*
24948 *    Function: sd_mhd_watch_incomplete()
24949 *
24950 * Description: This function is used to find out why a scsi pkt sent by the
24951 *		scsi watch facility was not completed. Under some scenarios this
24952 *		routine will return. Otherwise it will send a bus reset to see
24953 *		if the drive is still online.
24954 *
24955 *   Arguments: un  - driver soft state (unit) structure
24956 *		pkt - incomplete scsi pkt
24957 */
24958
24959static void
24960sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt)
24961{
24962	int	be_chatty;
24963	int	perr;
24964
24965	ASSERT(pkt != NULL);
24966	ASSERT(un != NULL);
24967	be_chatty	= (!(pkt->pkt_flags & FLAG_SILENT));
24968	perr		= (pkt->pkt_statistics & STAT_PERR);
24969
24970	mutex_enter(SD_MUTEX(un));
24971	if (un->un_state == SD_STATE_DUMPING) {
24972		mutex_exit(SD_MUTEX(un));
24973		return;
24974	}
24975
24976	switch (pkt->pkt_reason) {
24977	case CMD_UNX_BUS_FREE:
24978		/*
24979		 * If we had a parity error that caused the target to drop BSY*,
24980		 * don't be chatty about it.
24981		 */
24982		if (perr && be_chatty) {
24983			be_chatty = 0;
24984		}
24985		break;
24986	case CMD_TAG_REJECT:
24987		/*
24988		 * The SCSI-2 spec states that a tag reject will be sent by the
24989		 * target if tagged queuing is not supported. A tag reject may
24990		 * also be sent during certain initialization periods or to
24991		 * control internal resources. For the latter case the target
24992		 * may also return Queue Full.
24993		 *
24994		 * If this driver receives a tag reject from a target that is
24995		 * going through an init period or controlling internal
24996		 * resources tagged queuing will be disabled. This is a less
24997		 * than optimal behavior but the driver is unable to determine
24998		 * the target state and assumes tagged queueing is not supported
24999		 */
25000		pkt->pkt_flags = 0;
25001		un->un_tagflags = 0;
25002
25003		if (un->un_f_opt_queueing == TRUE) {
25004			un->un_throttle = min(un->un_throttle, 3);
25005		} else {
25006			un->un_throttle = 1;
25007		}
25008		mutex_exit(SD_MUTEX(un));
25009		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
25010		mutex_enter(SD_MUTEX(un));
25011		break;
25012	case CMD_INCOMPLETE:
25013		/*
25014		 * The transport stopped with an abnormal state, fallthrough and
25015		 * reset the target and/or bus unless selection did not complete
25016		 * (indicated by STATE_GOT_BUS) in which case we don't want to
25017		 * go through a target/bus reset
25018		 */
25019		if (pkt->pkt_state == STATE_GOT_BUS) {
25020			break;
25021		}
25022		/*FALLTHROUGH*/
25023
25024	case CMD_TIMEOUT:
25025	default:
25026		/*
25027		 * The lun may still be running the command, so a lun reset
25028		 * should be attempted. If the lun reset fails or cannot be
25029		 * issued, than try a target reset. Lastly try a bus reset.
25030		 */
25031		if ((pkt->pkt_statistics &
25032		    (STAT_BUS_RESET|STAT_DEV_RESET|STAT_ABORTED)) == 0) {
25033			int reset_retval = 0;
25034			mutex_exit(SD_MUTEX(un));
25035			if (un->un_f_allow_bus_device_reset == TRUE) {
25036				if (un->un_f_lun_reset_enabled == TRUE) {
25037					reset_retval =
25038					    scsi_reset(SD_ADDRESS(un),
25039					    RESET_LUN);
25040				}
25041				if (reset_retval == 0) {
25042					reset_retval =
25043					    scsi_reset(SD_ADDRESS(un),
25044					    RESET_TARGET);
25045				}
25046			}
25047			if (reset_retval == 0) {
25048				(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
25049			}
25050			mutex_enter(SD_MUTEX(un));
25051		}
25052		break;
25053	}
25054
25055	/* A device/bus reset has occurred; update the reservation status. */
25056	if ((pkt->pkt_reason == CMD_RESET) || (pkt->pkt_statistics &
25057	    (STAT_BUS_RESET | STAT_DEV_RESET))) {
25058		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
25059			un->un_resvd_status |=
25060			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
25061			SD_INFO(SD_LOG_IOCTL_MHD, un,
25062			    "sd_mhd_watch_incomplete: Lost Reservation\n");
25063		}
25064	}
25065
25066	/*
25067	 * The disk has been turned off; Update the device state.
25068	 *
25069	 * Note: Should we be offlining the disk here?
25070	 */
25071	if (pkt->pkt_state == STATE_GOT_BUS) {
25072		SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_watch_incomplete: "
25073		    "Disk not responding to selection\n");
25074		if (un->un_state != SD_STATE_OFFLINE) {
25075			New_state(un, SD_STATE_OFFLINE);
25076		}
25077	} else if (be_chatty) {
25078		/*
25079		 * suppress messages if they are all the same pkt reason;
25080		 * with TQ, many (up to 256) are returned with the same
25081		 * pkt_reason
25082		 */
25083		if (pkt->pkt_reason != un->un_last_pkt_reason) {
25084			SD_ERROR(SD_LOG_IOCTL_MHD, un,
25085			    "sd_mhd_watch_incomplete: "
25086			    "SCSI transport failed: reason '%s'\n",
25087			    scsi_rname(pkt->pkt_reason));
25088		}
25089	}
25090	un->un_last_pkt_reason = pkt->pkt_reason;
25091	mutex_exit(SD_MUTEX(un));
25092}
25093
25094
25095/*
25096 *    Function: sd_sname()
25097 *
25098 * Description: This is a simple little routine to return a string containing
25099 *		a printable description of command status byte for use in
25100 *		logging.
25101 *
25102 *   Arguments: status - pointer to a status byte
25103 *
25104 * Return Code: char * - string containing status description.
25105 */
25106
25107static char *
25108sd_sname(uchar_t status)
25109{
25110	switch (status & STATUS_MASK) {
25111	case STATUS_GOOD:
25112		return ("good status");
25113	case STATUS_CHECK:
25114		return ("check condition");
25115	case STATUS_MET:
25116		return ("condition met");
25117	case STATUS_BUSY:
25118		return ("busy");
25119	case STATUS_INTERMEDIATE:
25120		return ("intermediate");
25121	case STATUS_INTERMEDIATE_MET:
25122		return ("intermediate - condition met");
25123	case STATUS_RESERVATION_CONFLICT:
25124		return ("reservation_conflict");
25125	case STATUS_TERMINATED:
25126		return ("command terminated");
25127	case STATUS_QFULL:
25128		return ("queue full");
25129	default:
25130		return ("<unknown status>");
25131	}
25132}
25133
25134
25135/*
25136 *    Function: sd_mhd_resvd_recover()
25137 *
25138 * Description: This function adds a reservation entry to the
25139 *		sd_resv_reclaim_request list and signals the reservation
25140 *		reclaim thread that there is work pending. If the reservation
25141 *		reclaim thread has not been previously created this function
25142 *		will kick it off.
25143 *
25144 *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
25145 *			among multiple watches that share this callback function
25146 *
25147 *     Context: This routine is called by timeout() and is run in interrupt
25148 *		context. It must not sleep or call other functions which may
25149 *		sleep.
25150 */
25151
25152static void
25153sd_mhd_resvd_recover(void *arg)
25154{
25155	dev_t			dev = (dev_t)arg;
25156	struct sd_lun		*un;
25157	struct sd_thr_request	*sd_treq = NULL;
25158	struct sd_thr_request	*sd_cur = NULL;
25159	struct sd_thr_request	*sd_prev = NULL;
25160	int			already_there = 0;
25161
25162	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25163		return;
25164	}
25165
25166	mutex_enter(SD_MUTEX(un));
25167	un->un_resvd_timeid = NULL;
25168	if (un->un_resvd_status & SD_WANT_RESERVE) {
25169		/*
25170		 * There was a reset so don't issue the reserve, allow the
25171		 * sd_mhd_watch_cb callback function to notice this and
25172		 * reschedule the timeout for reservation.
25173		 */
25174		mutex_exit(SD_MUTEX(un));
25175		return;
25176	}
25177	mutex_exit(SD_MUTEX(un));
25178
25179	/*
25180	 * Add this device to the sd_resv_reclaim_request list and the
25181	 * sd_resv_reclaim_thread should take care of the rest.
25182	 *
25183	 * Note: We can't sleep in this context so if the memory allocation
25184	 * fails allow the sd_mhd_watch_cb callback function to notice this and
25185	 * reschedule the timeout for reservation.  (4378460)
25186	 */
25187	sd_treq = (struct sd_thr_request *)
25188	    kmem_zalloc(sizeof (struct sd_thr_request), KM_NOSLEEP);
25189	if (sd_treq == NULL) {
25190		return;
25191	}
25192
25193	sd_treq->sd_thr_req_next = NULL;
25194	sd_treq->dev = dev;
25195	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25196	if (sd_tr.srq_thr_req_head == NULL) {
25197		sd_tr.srq_thr_req_head = sd_treq;
25198	} else {
25199		sd_cur = sd_prev = sd_tr.srq_thr_req_head;
25200		for (; sd_cur != NULL; sd_cur = sd_cur->sd_thr_req_next) {
25201			if (sd_cur->dev == dev) {
25202				/*
25203				 * already in Queue so don't log
25204				 * another request for the device
25205				 */
25206				already_there = 1;
25207				break;
25208			}
25209			sd_prev = sd_cur;
25210		}
25211		if (!already_there) {
25212			SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_resvd_recover: "
25213			    "logging request for %lx\n", dev);
25214			sd_prev->sd_thr_req_next = sd_treq;
25215		} else {
25216			kmem_free(sd_treq, sizeof (struct sd_thr_request));
25217		}
25218	}
25219
25220	/*
25221	 * Create a kernel thread to do the reservation reclaim and free up this
25222	 * thread. We cannot block this thread while we go away to do the
25223	 * reservation reclaim
25224	 */
25225	if (sd_tr.srq_resv_reclaim_thread == NULL)
25226		sd_tr.srq_resv_reclaim_thread = thread_create(NULL, 0,
25227		    sd_resv_reclaim_thread, NULL,
25228		    0, &p0, TS_RUN, v.v_maxsyspri - 2);
25229
25230	/* Tell the reservation reclaim thread that it has work to do */
25231	cv_signal(&sd_tr.srq_resv_reclaim_cv);
25232	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25233}
25234
25235/*
25236 *    Function: sd_resv_reclaim_thread()
25237 *
25238 * Description: This function implements the reservation reclaim operations
25239 *
25240 *   Arguments: arg - the device 'dev_t' is used for context to discriminate
25241 *		      among multiple watches that share this callback function
25242 */
25243
25244static void
25245sd_resv_reclaim_thread()
25246{
25247	struct sd_lun		*un;
25248	struct sd_thr_request	*sd_mhreq;
25249
25250	/* Wait for work */
25251	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25252	if (sd_tr.srq_thr_req_head == NULL) {
25253		cv_wait(&sd_tr.srq_resv_reclaim_cv,
25254		    &sd_tr.srq_resv_reclaim_mutex);
25255	}
25256
25257	/* Loop while we have work */
25258	while ((sd_tr.srq_thr_cur_req = sd_tr.srq_thr_req_head) != NULL) {
25259		un = ddi_get_soft_state(sd_state,
25260		    SDUNIT(sd_tr.srq_thr_cur_req->dev));
25261		if (un == NULL) {
25262			/*
25263			 * softstate structure is NULL so just
25264			 * dequeue the request and continue
25265			 */
25266			sd_tr.srq_thr_req_head =
25267			    sd_tr.srq_thr_cur_req->sd_thr_req_next;
25268			kmem_free(sd_tr.srq_thr_cur_req,
25269			    sizeof (struct sd_thr_request));
25270			continue;
25271		}
25272
25273		/* dequeue the request */
25274		sd_mhreq = sd_tr.srq_thr_cur_req;
25275		sd_tr.srq_thr_req_head =
25276		    sd_tr.srq_thr_cur_req->sd_thr_req_next;
25277		mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25278
25279		/*
25280		 * Reclaim reservation only if SD_RESERVE is still set. There
25281		 * may have been a call to MHIOCRELEASE before we got here.
25282		 */
25283		mutex_enter(SD_MUTEX(un));
25284		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
25285			/*
25286			 * Note: The SD_LOST_RESERVE flag is cleared before
25287			 * reclaiming the reservation. If this is done after the
25288			 * call to sd_reserve_release a reservation loss in the
25289			 * window between pkt completion of reserve cmd and
25290			 * mutex_enter below may not be recognized
25291			 */
25292			un->un_resvd_status &= ~SD_LOST_RESERVE;
25293			mutex_exit(SD_MUTEX(un));
25294
25295			if (sd_reserve_release(sd_mhreq->dev,
25296			    SD_RESERVE) == 0) {
25297				mutex_enter(SD_MUTEX(un));
25298				un->un_resvd_status |= SD_RESERVE;
25299				mutex_exit(SD_MUTEX(un));
25300				SD_INFO(SD_LOG_IOCTL_MHD, un,
25301				    "sd_resv_reclaim_thread: "
25302				    "Reservation Recovered\n");
25303			} else {
25304				mutex_enter(SD_MUTEX(un));
25305				un->un_resvd_status |= SD_LOST_RESERVE;
25306				mutex_exit(SD_MUTEX(un));
25307				SD_INFO(SD_LOG_IOCTL_MHD, un,
25308				    "sd_resv_reclaim_thread: Failed "
25309				    "Reservation Recovery\n");
25310			}
25311		} else {
25312			mutex_exit(SD_MUTEX(un));
25313		}
25314		mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25315		ASSERT(sd_mhreq == sd_tr.srq_thr_cur_req);
25316		kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
25317		sd_mhreq = sd_tr.srq_thr_cur_req = NULL;
25318		/*
25319		 * wakeup the destroy thread if anyone is waiting on
25320		 * us to complete.
25321		 */
25322		cv_signal(&sd_tr.srq_inprocess_cv);
25323		SD_TRACE(SD_LOG_IOCTL_MHD, un,
25324		    "sd_resv_reclaim_thread: cv_signalling current request \n");
25325	}
25326
25327	/*
25328	 * cleanup the sd_tr structure now that this thread will not exist
25329	 */
25330	ASSERT(sd_tr.srq_thr_req_head == NULL);
25331	ASSERT(sd_tr.srq_thr_cur_req == NULL);
25332	sd_tr.srq_resv_reclaim_thread = NULL;
25333	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25334	thread_exit();
25335}
25336
25337
25338/*
25339 *    Function: sd_rmv_resv_reclaim_req()
25340 *
25341 * Description: This function removes any pending reservation reclaim requests
25342 *		for the specified device.
25343 *
25344 *   Arguments: dev - the device 'dev_t'
25345 */
25346
25347static void
25348sd_rmv_resv_reclaim_req(dev_t dev)
25349{
25350	struct sd_thr_request *sd_mhreq;
25351	struct sd_thr_request *sd_prev;
25352
25353	/* Remove a reservation reclaim request from the list */
25354	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25355	if (sd_tr.srq_thr_cur_req && sd_tr.srq_thr_cur_req->dev == dev) {
25356		/*
25357		 * We are attempting to reinstate reservation for
25358		 * this device. We wait for sd_reserve_release()
25359		 * to return before we return.
25360		 */
25361		cv_wait(&sd_tr.srq_inprocess_cv,
25362		    &sd_tr.srq_resv_reclaim_mutex);
25363	} else {
25364		sd_prev = sd_mhreq = sd_tr.srq_thr_req_head;
25365		if (sd_mhreq && sd_mhreq->dev == dev) {
25366			sd_tr.srq_thr_req_head = sd_mhreq->sd_thr_req_next;
25367			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
25368			mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25369			return;
25370		}
25371		for (; sd_mhreq != NULL; sd_mhreq = sd_mhreq->sd_thr_req_next) {
25372			if (sd_mhreq && sd_mhreq->dev == dev) {
25373				break;
25374			}
25375			sd_prev = sd_mhreq;
25376		}
25377		if (sd_mhreq != NULL) {
25378			sd_prev->sd_thr_req_next = sd_mhreq->sd_thr_req_next;
25379			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
25380		}
25381	}
25382	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25383}
25384
25385
25386/*
25387 *    Function: sd_mhd_reset_notify_cb()
25388 *
25389 * Description: This is a call back function for scsi_reset_notify. This
25390 *		function updates the softstate reserved status and logs the
25391 *		reset. The driver scsi watch facility callback function
25392 *		(sd_mhd_watch_cb) and reservation reclaim thread functionality
25393 *		will reclaim the reservation.
25394 *
25395 *   Arguments: arg  - driver soft state (unit) structure
25396 */
25397
25398static void
25399sd_mhd_reset_notify_cb(caddr_t arg)
25400{
25401	struct sd_lun *un = (struct sd_lun *)arg;
25402
25403	mutex_enter(SD_MUTEX(un));
25404	if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
25405		un->un_resvd_status |= (SD_LOST_RESERVE | SD_WANT_RESERVE);
25406		SD_INFO(SD_LOG_IOCTL_MHD, un,
25407		    "sd_mhd_reset_notify_cb: Lost Reservation\n");
25408	}
25409	mutex_exit(SD_MUTEX(un));
25410}
25411
25412
25413/*
25414 *    Function: sd_take_ownership()
25415 *
25416 * Description: This routine implements an algorithm to achieve a stable
25417 *		reservation on disks which don't implement priority reserve,
25418 *		and makes sure that other host lose re-reservation attempts.
25419 *		This algorithm contains of a loop that keeps issuing the RESERVE
25420 *		for some period of time (min_ownership_delay, default 6 seconds)
25421 *		During that loop, it looks to see if there has been a bus device
25422 *		reset or bus reset (both of which cause an existing reservation
25423 *		to be lost). If the reservation is lost issue RESERVE until a
25424 *		period of min_ownership_delay with no resets has gone by, or
25425 *		until max_ownership_delay has expired. This loop ensures that
25426 *		the host really did manage to reserve the device, in spite of
25427 *		resets. The looping for min_ownership_delay (default six
25428 *		seconds) is important to early generation clustering products,
25429 *		Solstice HA 1.x and Sun Cluster 2.x. Those products use an
25430 *		MHIOCENFAILFAST periodic timer of two seconds. By having
25431 *		MHIOCTKOWN issue Reserves in a loop for six seconds, and having
25432 *		MHIOCENFAILFAST poll every two seconds, the idea is that by the
25433 *		time the MHIOCTKOWN ioctl returns, the other host (if any) will
25434 *		have already noticed, via the MHIOCENFAILFAST polling, that it
25435 *		no longer "owns" the disk and will have panicked itself.  Thus,
25436 *		the host issuing the MHIOCTKOWN is assured (with timing
25437 *		dependencies) that by the time it actually starts to use the
25438 *		disk for real work, the old owner is no longer accessing it.
25439 *
25440 *		min_ownership_delay is the minimum amount of time for which the
25441 *		disk must be reserved continuously devoid of resets before the
25442 *		MHIOCTKOWN ioctl will return success.
25443 *
25444 *		max_ownership_delay indicates the amount of time by which the
25445 *		take ownership should succeed or timeout with an error.
25446 *
25447 *   Arguments: dev - the device 'dev_t'
25448 *		*p  - struct containing timing info.
25449 *
25450 * Return Code: 0 for success or error code
25451 */
25452
25453static int
25454sd_take_ownership(dev_t dev, struct mhioctkown *p)
25455{
25456	struct sd_lun	*un;
25457	int		rval;
25458	int		err;
25459	int		reservation_count   = 0;
25460	int		min_ownership_delay =  6000000; /* in usec */
25461	int		max_ownership_delay = 30000000; /* in usec */
25462	clock_t		start_time;	/* starting time of this algorithm */
25463	clock_t		end_time;	/* time limit for giving up */
25464	clock_t		ownership_time;	/* time limit for stable ownership */
25465	clock_t		current_time;
25466	clock_t		previous_current_time;
25467
25468	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25469		return (ENXIO);
25470	}
25471
25472	/*
25473	 * Attempt a device reservation. A priority reservation is requested.
25474	 */
25475	if ((rval = sd_reserve_release(dev, SD_PRIORITY_RESERVE))
25476	    != SD_SUCCESS) {
25477		SD_ERROR(SD_LOG_IOCTL_MHD, un,
25478		    "sd_take_ownership: return(1)=%d\n", rval);
25479		return (rval);
25480	}
25481
25482	/* Update the softstate reserved status to indicate the reservation */
25483	mutex_enter(SD_MUTEX(un));
25484	un->un_resvd_status |= SD_RESERVE;
25485	un->un_resvd_status &=
25486	    ~(SD_LOST_RESERVE | SD_WANT_RESERVE | SD_RESERVATION_CONFLICT);
25487	mutex_exit(SD_MUTEX(un));
25488
25489	if (p != NULL) {
25490		if (p->min_ownership_delay != 0) {
25491			min_ownership_delay = p->min_ownership_delay * 1000;
25492		}
25493		if (p->max_ownership_delay != 0) {
25494			max_ownership_delay = p->max_ownership_delay * 1000;
25495		}
25496	}
25497	SD_INFO(SD_LOG_IOCTL_MHD, un,
25498	    "sd_take_ownership: min, max delays: %d, %d\n",
25499	    min_ownership_delay, max_ownership_delay);
25500
25501	start_time = ddi_get_lbolt();
25502	current_time	= start_time;
25503	ownership_time	= current_time + drv_usectohz(min_ownership_delay);
25504	end_time	= start_time + drv_usectohz(max_ownership_delay);
25505
25506	while (current_time - end_time < 0) {
25507		delay(drv_usectohz(500000));
25508
25509		if ((err = sd_reserve_release(dev, SD_RESERVE)) != 0) {
25510			if ((sd_reserve_release(dev, SD_RESERVE)) != 0) {
25511				mutex_enter(SD_MUTEX(un));
25512				rval = (un->un_resvd_status &
25513				    SD_RESERVATION_CONFLICT) ? EACCES : EIO;
25514				mutex_exit(SD_MUTEX(un));
25515				break;
25516			}
25517		}
25518		previous_current_time = current_time;
25519		current_time = ddi_get_lbolt();
25520		mutex_enter(SD_MUTEX(un));
25521		if (err || (un->un_resvd_status & SD_LOST_RESERVE)) {
25522			ownership_time = ddi_get_lbolt() +
25523			    drv_usectohz(min_ownership_delay);
25524			reservation_count = 0;
25525		} else {
25526			reservation_count++;
25527		}
25528		un->un_resvd_status |= SD_RESERVE;
25529		un->un_resvd_status &= ~(SD_LOST_RESERVE | SD_WANT_RESERVE);
25530		mutex_exit(SD_MUTEX(un));
25531
25532		SD_INFO(SD_LOG_IOCTL_MHD, un,
25533		    "sd_take_ownership: ticks for loop iteration=%ld, "
25534		    "reservation=%s\n", (current_time - previous_current_time),
25535		    reservation_count ? "ok" : "reclaimed");
25536
25537		if (current_time - ownership_time >= 0 &&
25538		    reservation_count >= 4) {
25539			rval = 0; /* Achieved a stable ownership */
25540			break;
25541		}
25542		if (current_time - end_time >= 0) {
25543			rval = EACCES; /* No ownership in max possible time */
25544			break;
25545		}
25546	}
25547	SD_TRACE(SD_LOG_IOCTL_MHD, un,
25548	    "sd_take_ownership: return(2)=%d\n", rval);
25549	return (rval);
25550}
25551
25552
25553/*
25554 *    Function: sd_reserve_release()
25555 *
25556 * Description: This function builds and sends scsi RESERVE, RELEASE, and
25557 *		PRIORITY RESERVE commands based on a user specified command type
25558 *
25559 *   Arguments: dev - the device 'dev_t'
25560 *		cmd - user specified command type; one of SD_PRIORITY_RESERVE,
25561 *		      SD_RESERVE, SD_RELEASE
25562 *
25563 * Return Code: 0 or Error Code
25564 */
25565
25566static int
25567sd_reserve_release(dev_t dev, int cmd)
25568{
25569	struct uscsi_cmd	*com = NULL;
25570	struct sd_lun		*un = NULL;
25571	char			cdb[CDB_GROUP0];
25572	int			rval;
25573
25574	ASSERT((cmd == SD_RELEASE) || (cmd == SD_RESERVE) ||
25575	    (cmd == SD_PRIORITY_RESERVE));
25576
25577	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25578		return (ENXIO);
25579	}
25580
25581	/* instantiate and initialize the command and cdb */
25582	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
25583	bzero(cdb, CDB_GROUP0);
25584	com->uscsi_flags   = USCSI_SILENT;
25585	com->uscsi_timeout = un->un_reserve_release_time;
25586	com->uscsi_cdblen  = CDB_GROUP0;
25587	com->uscsi_cdb	   = cdb;
25588	if (cmd == SD_RELEASE) {
25589		cdb[0] = SCMD_RELEASE;
25590	} else {
25591		cdb[0] = SCMD_RESERVE;
25592	}
25593
25594	/* Send the command. */
25595	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
25596	    SD_PATH_STANDARD);
25597
25598	/*
25599	 * "break" a reservation that is held by another host, by issuing a
25600	 * reset if priority reserve is desired, and we could not get the
25601	 * device.
25602	 */
25603	if ((cmd == SD_PRIORITY_RESERVE) &&
25604	    (rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
25605		/*
25606		 * First try to reset the LUN. If we cannot, then try a target
25607		 * reset, followed by a bus reset if the target reset fails.
25608		 */
25609		int reset_retval = 0;
25610		if (un->un_f_lun_reset_enabled == TRUE) {
25611			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
25612		}
25613		if (reset_retval == 0) {
25614			/* The LUN reset either failed or was not issued */
25615			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
25616		}
25617		if ((reset_retval == 0) &&
25618		    (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0)) {
25619			rval = EIO;
25620			kmem_free(com, sizeof (*com));
25621			return (rval);
25622		}
25623
25624		bzero(com, sizeof (struct uscsi_cmd));
25625		com->uscsi_flags   = USCSI_SILENT;
25626		com->uscsi_cdb	   = cdb;
25627		com->uscsi_cdblen  = CDB_GROUP0;
25628		com->uscsi_timeout = 5;
25629
25630		/*
25631		 * Reissue the last reserve command, this time without request
25632		 * sense.  Assume that it is just a regular reserve command.
25633		 */
25634		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
25635		    SD_PATH_STANDARD);
25636	}
25637
25638	/* Return an error if still getting a reservation conflict. */
25639	if ((rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
25640		rval = EACCES;
25641	}
25642
25643	kmem_free(com, sizeof (*com));
25644	return (rval);
25645}
25646
25647
25648#define	SD_NDUMP_RETRIES	12
25649/*
25650 *	System Crash Dump routine
25651 */
25652
25653static int
25654sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
25655{
25656	int		instance;
25657	int		partition;
25658	int		i;
25659	int		err;
25660	struct sd_lun	*un;
25661	struct scsi_pkt *wr_pktp;
25662	struct buf	*wr_bp;
25663	struct buf	wr_buf;
25664	daddr_t		tgt_byte_offset; /* rmw - byte offset for target */
25665	daddr_t		tgt_blkno;	/* rmw - blkno for target */
25666	size_t		tgt_byte_count; /* rmw -  # of bytes to xfer */
25667	size_t		tgt_nblk; /* rmw -  # of tgt blks to xfer */
25668	size_t		io_start_offset;
25669	int		doing_rmw = FALSE;
25670	int		rval;
25671	ssize_t		dma_resid;
25672	daddr_t		oblkno;
25673	diskaddr_t	nblks = 0;
25674	diskaddr_t	start_block;
25675
25676	instance = SDUNIT(dev);
25677	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
25678	    !SD_IS_VALID_LABEL(un) || ISCD(un)) {
25679		return (ENXIO);
25680	}
25681
25682	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*un))
25683
25684	SD_TRACE(SD_LOG_DUMP, un, "sddump: entry\n");
25685
25686	partition = SDPART(dev);
25687	SD_INFO(SD_LOG_DUMP, un, "sddump: partition = %d\n", partition);
25688
25689	if (!(NOT_DEVBSIZE(un))) {
25690		int secmask = 0;
25691		int blknomask = 0;
25692
25693		blknomask = (un->un_tgt_blocksize / DEV_BSIZE) - 1;
25694		secmask = un->un_tgt_blocksize - 1;
25695
25696		if (blkno & blknomask) {
25697			SD_TRACE(SD_LOG_DUMP, un,
25698			    "sddump: dump start block not modulo %d\n",
25699			    un->un_tgt_blocksize);
25700			return (EINVAL);
25701		}
25702
25703		if ((nblk * DEV_BSIZE) & secmask) {
25704			SD_TRACE(SD_LOG_DUMP, un,
25705			    "sddump: dump length not modulo %d\n",
25706			    un->un_tgt_blocksize);
25707			return (EINVAL);
25708		}
25709
25710	}
25711
25712	/* Validate blocks to dump at against partition size. */
25713
25714	(void) cmlb_partinfo(un->un_cmlbhandle, partition,
25715	    &nblks, &start_block, NULL, NULL, (void *)SD_PATH_DIRECT);
25716
25717	if (NOT_DEVBSIZE(un)) {
25718		if ((blkno + nblk) > nblks) {
25719			SD_TRACE(SD_LOG_DUMP, un,
25720			    "sddump: dump range larger than partition: "
25721			    "blkno = 0x%x, nblk = 0x%x, dkl_nblk = 0x%x\n",
25722			    blkno, nblk, nblks);
25723			return (EINVAL);
25724		}
25725	} else {
25726		if (((blkno / (un->un_tgt_blocksize / DEV_BSIZE)) +
25727		    (nblk / (un->un_tgt_blocksize / DEV_BSIZE))) > nblks) {
25728			SD_TRACE(SD_LOG_DUMP, un,
25729			    "sddump: dump range larger than partition: "
25730			    "blkno = 0x%x, nblk = 0x%x, dkl_nblk = 0x%x\n",
25731			    blkno, nblk, nblks);
25732			return (EINVAL);
25733		}
25734	}
25735
25736	mutex_enter(&un->un_pm_mutex);
25737	if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
25738		struct scsi_pkt *start_pktp;
25739
25740		mutex_exit(&un->un_pm_mutex);
25741
25742		/*
25743		 * use pm framework to power on HBA 1st
25744		 */
25745		(void) pm_raise_power(SD_DEVINFO(un), 0,
25746		    SD_PM_STATE_ACTIVE(un));
25747
25748		/*
25749		 * Dump no long uses sdpower to power on a device, it's
25750		 * in-line here so it can be done in polled mode.
25751		 */
25752
25753		SD_INFO(SD_LOG_DUMP, un, "sddump: starting device\n");
25754
25755		start_pktp = scsi_init_pkt(SD_ADDRESS(un), NULL, NULL,
25756		    CDB_GROUP0, un->un_status_len, 0, 0, NULL_FUNC, NULL);
25757
25758		if (start_pktp == NULL) {
25759			/* We were not given a SCSI packet, fail. */
25760			return (EIO);
25761		}
25762		bzero(start_pktp->pkt_cdbp, CDB_GROUP0);
25763		start_pktp->pkt_cdbp[0] = SCMD_START_STOP;
25764		start_pktp->pkt_cdbp[4] = SD_TARGET_START;
25765		start_pktp->pkt_flags = FLAG_NOINTR;
25766
25767		mutex_enter(SD_MUTEX(un));
25768		SD_FILL_SCSI1_LUN(un, start_pktp);
25769		mutex_exit(SD_MUTEX(un));
25770		/*
25771		 * Scsi_poll returns 0 (success) if the command completes and
25772		 * the status block is STATUS_GOOD.
25773		 */
25774		if (sd_scsi_poll(un, start_pktp) != 0) {
25775			scsi_destroy_pkt(start_pktp);
25776			return (EIO);
25777		}
25778		scsi_destroy_pkt(start_pktp);
25779		(void) sd_pm_state_change(un, SD_PM_STATE_ACTIVE(un),
25780		    SD_PM_STATE_CHANGE);
25781	} else {
25782		mutex_exit(&un->un_pm_mutex);
25783	}
25784
25785	mutex_enter(SD_MUTEX(un));
25786	un->un_throttle = 0;
25787
25788	/*
25789	 * The first time through, reset the specific target device.
25790	 * However, when cpr calls sddump we know that sd is in a
25791	 * a good state so no bus reset is required.
25792	 * Clear sense data via Request Sense cmd.
25793	 * In sddump we don't care about allow_bus_device_reset anymore
25794	 */
25795
25796	if ((un->un_state != SD_STATE_SUSPENDED) &&
25797	    (un->un_state != SD_STATE_DUMPING)) {
25798
25799		New_state(un, SD_STATE_DUMPING);
25800
25801		if (un->un_f_is_fibre == FALSE) {
25802			mutex_exit(SD_MUTEX(un));
25803			/*
25804			 * Attempt a bus reset for parallel scsi.
25805			 *
25806			 * Note: A bus reset is required because on some host
25807			 * systems (i.e. E420R) a bus device reset is
25808			 * insufficient to reset the state of the target.
25809			 *
25810			 * Note: Don't issue the reset for fibre-channel,
25811			 * because this tends to hang the bus (loop) for
25812			 * too long while everyone is logging out and in
25813			 * and the deadman timer for dumping will fire
25814			 * before the dump is complete.
25815			 */
25816			if (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0) {
25817				mutex_enter(SD_MUTEX(un));
25818				Restore_state(un);
25819				mutex_exit(SD_MUTEX(un));
25820				return (EIO);
25821			}
25822
25823			/* Delay to give the device some recovery time. */
25824			drv_usecwait(10000);
25825
25826			if (sd_send_polled_RQS(un) == SD_FAILURE) {
25827				SD_INFO(SD_LOG_DUMP, un,
25828				    "sddump: sd_send_polled_RQS failed\n");
25829			}
25830			mutex_enter(SD_MUTEX(un));
25831		}
25832	}
25833
25834	/*
25835	 * Convert the partition-relative block number to a
25836	 * disk physical block number.
25837	 */
25838	if (NOT_DEVBSIZE(un)) {
25839		blkno += start_block;
25840	} else {
25841		blkno = blkno / (un->un_tgt_blocksize / DEV_BSIZE);
25842		blkno += start_block;
25843	}
25844
25845	SD_INFO(SD_LOG_DUMP, un, "sddump: disk blkno = 0x%x\n", blkno);
25846
25847
25848	/*
25849	 * Check if the device has a non-512 block size.
25850	 */
25851	wr_bp = NULL;
25852	if (NOT_DEVBSIZE(un)) {
25853		tgt_byte_offset = blkno * un->un_sys_blocksize;
25854		tgt_byte_count = nblk * un->un_sys_blocksize;
25855		if ((tgt_byte_offset % un->un_tgt_blocksize) ||
25856		    (tgt_byte_count % un->un_tgt_blocksize)) {
25857			doing_rmw = TRUE;
25858			/*
25859			 * Calculate the block number and number of block
25860			 * in terms of the media block size.
25861			 */
25862			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
25863			tgt_nblk =
25864			    ((tgt_byte_offset + tgt_byte_count +
25865			    (un->un_tgt_blocksize - 1)) /
25866			    un->un_tgt_blocksize) - tgt_blkno;
25867
25868			/*
25869			 * Invoke the routine which is going to do read part
25870			 * of read-modify-write.
25871			 * Note that this routine returns a pointer to
25872			 * a valid bp in wr_bp.
25873			 */
25874			err = sddump_do_read_of_rmw(un, tgt_blkno, tgt_nblk,
25875			    &wr_bp);
25876			if (err) {
25877				mutex_exit(SD_MUTEX(un));
25878				return (err);
25879			}
25880			/*
25881			 * Offset is being calculated as -
25882			 * (original block # * system block size) -
25883			 * (new block # * target block size)
25884			 */
25885			io_start_offset =
25886			    ((uint64_t)(blkno * un->un_sys_blocksize)) -
25887			    ((uint64_t)(tgt_blkno * un->un_tgt_blocksize));
25888
25889			ASSERT((io_start_offset >= 0) &&
25890			    (io_start_offset < un->un_tgt_blocksize));
25891			/*
25892			 * Do the modify portion of read modify write.
25893			 */
25894			bcopy(addr, &wr_bp->b_un.b_addr[io_start_offset],
25895			    (size_t)nblk * un->un_sys_blocksize);
25896		} else {
25897			doing_rmw = FALSE;
25898			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
25899			tgt_nblk = tgt_byte_count / un->un_tgt_blocksize;
25900		}
25901
25902		/* Convert blkno and nblk to target blocks */
25903		blkno = tgt_blkno;
25904		nblk = tgt_nblk;
25905	} else {
25906		wr_bp = &wr_buf;
25907		bzero(wr_bp, sizeof (struct buf));
25908		wr_bp->b_flags		= B_BUSY;
25909		wr_bp->b_un.b_addr	= addr;
25910		wr_bp->b_bcount		= nblk << DEV_BSHIFT;
25911		wr_bp->b_resid		= 0;
25912	}
25913
25914	mutex_exit(SD_MUTEX(un));
25915
25916	/*
25917	 * Obtain a SCSI packet for the write command.
25918	 * It should be safe to call the allocator here without
25919	 * worrying about being locked for DVMA mapping because
25920	 * the address we're passed is already a DVMA mapping
25921	 *
25922	 * We are also not going to worry about semaphore ownership
25923	 * in the dump buffer. Dumping is single threaded at present.
25924	 */
25925
25926	wr_pktp = NULL;
25927
25928	dma_resid = wr_bp->b_bcount;
25929	oblkno = blkno;
25930
25931	if (!(NOT_DEVBSIZE(un))) {
25932		nblk = nblk / (un->un_tgt_blocksize / DEV_BSIZE);
25933	}
25934
25935	while (dma_resid != 0) {
25936
25937	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
25938		wr_bp->b_flags &= ~B_ERROR;
25939
25940		if (un->un_partial_dma_supported == 1) {
25941			blkno = oblkno +
25942			    ((wr_bp->b_bcount - dma_resid) /
25943			    un->un_tgt_blocksize);
25944			nblk = dma_resid / un->un_tgt_blocksize;
25945
25946			if (wr_pktp) {
25947				/*
25948				 * Partial DMA transfers after initial transfer
25949				 */
25950				rval = sd_setup_next_rw_pkt(un, wr_pktp, wr_bp,
25951				    blkno, nblk);
25952			} else {
25953				/* Initial transfer */
25954				rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
25955				    un->un_pkt_flags, NULL_FUNC, NULL,
25956				    blkno, nblk);
25957			}
25958		} else {
25959			rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
25960			    0, NULL_FUNC, NULL, blkno, nblk);
25961		}
25962
25963		if (rval == 0) {
25964			/* We were given a SCSI packet, continue. */
25965			break;
25966		}
25967
25968		if (i == 0) {
25969			if (wr_bp->b_flags & B_ERROR) {
25970				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25971				    "no resources for dumping; "
25972				    "error code: 0x%x, retrying",
25973				    geterror(wr_bp));
25974			} else {
25975				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25976				    "no resources for dumping; retrying");
25977			}
25978		} else if (i != (SD_NDUMP_RETRIES - 1)) {
25979			if (wr_bp->b_flags & B_ERROR) {
25980				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
25981				    "no resources for dumping; error code: "
25982				    "0x%x, retrying\n", geterror(wr_bp));
25983			}
25984		} else {
25985			if (wr_bp->b_flags & B_ERROR) {
25986				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
25987				    "no resources for dumping; "
25988				    "error code: 0x%x, retries failed, "
25989				    "giving up.\n", geterror(wr_bp));
25990			} else {
25991				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
25992				    "no resources for dumping; "
25993				    "retries failed, giving up.\n");
25994			}
25995			mutex_enter(SD_MUTEX(un));
25996			Restore_state(un);
25997			if (NOT_DEVBSIZE(un) && (doing_rmw == TRUE)) {
25998				mutex_exit(SD_MUTEX(un));
25999				scsi_free_consistent_buf(wr_bp);
26000			} else {
26001				mutex_exit(SD_MUTEX(un));
26002			}
26003			return (EIO);
26004		}
26005		drv_usecwait(10000);
26006	}
26007
26008	if (un->un_partial_dma_supported == 1) {
26009		/*
26010		 * save the resid from PARTIAL_DMA
26011		 */
26012		dma_resid = wr_pktp->pkt_resid;
26013		if (dma_resid != 0)
26014			nblk -= SD_BYTES2TGTBLOCKS(un, dma_resid);
26015		wr_pktp->pkt_resid = 0;
26016	} else {
26017		dma_resid = 0;
26018	}
26019
26020	/* SunBug 1222170 */
26021	wr_pktp->pkt_flags = FLAG_NOINTR;
26022
26023	err = EIO;
26024	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
26025
26026		/*
26027		 * Scsi_poll returns 0 (success) if the command completes and
26028		 * the status block is STATUS_GOOD.  We should only check
26029		 * errors if this condition is not true.  Even then we should
26030		 * send our own request sense packet only if we have a check
26031		 * condition and auto request sense has not been performed by
26032		 * the hba.
26033		 */
26034		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending write\n");
26035
26036		if ((sd_scsi_poll(un, wr_pktp) == 0) &&
26037		    (wr_pktp->pkt_resid == 0)) {
26038			err = SD_SUCCESS;
26039			break;
26040		}
26041
26042		/*
26043		 * Check CMD_DEV_GONE 1st, give up if device is gone.
26044		 */
26045		if (wr_pktp->pkt_reason == CMD_DEV_GONE) {
26046			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26047			    "Error while dumping state...Device is gone\n");
26048			break;
26049		}
26050
26051		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_CHECK) {
26052			SD_INFO(SD_LOG_DUMP, un,
26053			    "sddump: write failed with CHECK, try # %d\n", i);
26054			if (((wr_pktp->pkt_state & STATE_ARQ_DONE) == 0)) {
26055				(void) sd_send_polled_RQS(un);
26056			}
26057
26058			continue;
26059		}
26060
26061		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_BUSY) {
26062			int reset_retval = 0;
26063
26064			SD_INFO(SD_LOG_DUMP, un,
26065			    "sddump: write failed with BUSY, try # %d\n", i);
26066
26067			if (un->un_f_lun_reset_enabled == TRUE) {
26068				reset_retval = scsi_reset(SD_ADDRESS(un),
26069				    RESET_LUN);
26070			}
26071			if (reset_retval == 0) {
26072				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
26073			}
26074			(void) sd_send_polled_RQS(un);
26075
26076		} else {
26077			SD_INFO(SD_LOG_DUMP, un,
26078			    "sddump: write failed with 0x%x, try # %d\n",
26079			    SD_GET_PKT_STATUS(wr_pktp), i);
26080			mutex_enter(SD_MUTEX(un));
26081			sd_reset_target(un, wr_pktp);
26082			mutex_exit(SD_MUTEX(un));
26083		}
26084
26085		/*
26086		 * If we are not getting anywhere with lun/target resets,
26087		 * let's reset the bus.
26088		 */
26089		if (i == SD_NDUMP_RETRIES/2) {
26090			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
26091			(void) sd_send_polled_RQS(un);
26092		}
26093	}
26094	}
26095
26096	scsi_destroy_pkt(wr_pktp);
26097	mutex_enter(SD_MUTEX(un));
26098	if ((NOT_DEVBSIZE(un)) && (doing_rmw == TRUE)) {
26099		mutex_exit(SD_MUTEX(un));
26100		scsi_free_consistent_buf(wr_bp);
26101	} else {
26102		mutex_exit(SD_MUTEX(un));
26103	}
26104	SD_TRACE(SD_LOG_DUMP, un, "sddump: exit: err = %d\n", err);
26105	return (err);
26106}
26107
26108/*
26109 *    Function: sd_scsi_poll()
26110 *
26111 * Description: This is a wrapper for the scsi_poll call.
26112 *
26113 *   Arguments: sd_lun - The unit structure
26114 *              scsi_pkt - The scsi packet being sent to the device.
26115 *
26116 * Return Code: 0 - Command completed successfully with good status
26117 *             -1 - Command failed.  This could indicate a check condition
26118 *                  or other status value requiring recovery action.
26119 *
26120 * NOTE: This code is only called off sddump().
26121 */
26122
26123static int
26124sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pktp)
26125{
26126	int status;
26127
26128	ASSERT(un != NULL);
26129	ASSERT(!mutex_owned(SD_MUTEX(un)));
26130	ASSERT(pktp != NULL);
26131
26132	status = SD_SUCCESS;
26133
26134	if (scsi_ifgetcap(&pktp->pkt_address, "tagged-qing", 1) == 1) {
26135		pktp->pkt_flags |= un->un_tagflags;
26136		pktp->pkt_flags &= ~FLAG_NODISCON;
26137	}
26138
26139	status = sd_ddi_scsi_poll(pktp);
26140	/*
26141	 * Scsi_poll returns 0 (success) if the command completes and the
26142	 * status block is STATUS_GOOD.  We should only check errors if this
26143	 * condition is not true.  Even then we should send our own request
26144	 * sense packet only if we have a check condition and auto
26145	 * request sense has not been performed by the hba.
26146	 * Don't get RQS data if pkt_reason is CMD_DEV_GONE.
26147	 */
26148	if ((status != SD_SUCCESS) &&
26149	    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK) &&
26150	    (pktp->pkt_state & STATE_ARQ_DONE) == 0 &&
26151	    (pktp->pkt_reason != CMD_DEV_GONE))
26152		(void) sd_send_polled_RQS(un);
26153
26154	return (status);
26155}
26156
26157/*
26158 *    Function: sd_send_polled_RQS()
26159 *
26160 * Description: This sends the request sense command to a device.
26161 *
26162 *   Arguments: sd_lun - The unit structure
26163 *
26164 * Return Code: 0 - Command completed successfully with good status
26165 *             -1 - Command failed.
26166 *
26167 */
26168
26169static int
26170sd_send_polled_RQS(struct sd_lun *un)
26171{
26172	int	ret_val;
26173	struct	scsi_pkt	*rqs_pktp;
26174	struct	buf		*rqs_bp;
26175
26176	ASSERT(un != NULL);
26177	ASSERT(!mutex_owned(SD_MUTEX(un)));
26178
26179	ret_val = SD_SUCCESS;
26180
26181	rqs_pktp = un->un_rqs_pktp;
26182	rqs_bp	 = un->un_rqs_bp;
26183
26184	mutex_enter(SD_MUTEX(un));
26185
26186	if (un->un_sense_isbusy) {
26187		ret_val = SD_FAILURE;
26188		mutex_exit(SD_MUTEX(un));
26189		return (ret_val);
26190	}
26191
26192	/*
26193	 * If the request sense buffer (and packet) is not in use,
26194	 * let's set the un_sense_isbusy and send our packet
26195	 */
26196	un->un_sense_isbusy 	= 1;
26197	rqs_pktp->pkt_resid  	= 0;
26198	rqs_pktp->pkt_reason 	= 0;
26199	rqs_pktp->pkt_flags |= FLAG_NOINTR;
26200	bzero(rqs_bp->b_un.b_addr, SENSE_LENGTH);
26201
26202	mutex_exit(SD_MUTEX(un));
26203
26204	SD_INFO(SD_LOG_COMMON, un, "sd_send_polled_RQS: req sense buf at"
26205	    " 0x%p\n", rqs_bp->b_un.b_addr);
26206
26207	/*
26208	 * Can't send this to sd_scsi_poll, we wrap ourselves around the
26209	 * axle - it has a call into us!
26210	 */
26211	if ((ret_val = sd_ddi_scsi_poll(rqs_pktp)) != 0) {
26212		SD_INFO(SD_LOG_COMMON, un,
26213		    "sd_send_polled_RQS: RQS failed\n");
26214	}
26215
26216	SD_DUMP_MEMORY(un, SD_LOG_COMMON, "sd_send_polled_RQS:",
26217	    (uchar_t *)rqs_bp->b_un.b_addr, SENSE_LENGTH, SD_LOG_HEX);
26218
26219	mutex_enter(SD_MUTEX(un));
26220	un->un_sense_isbusy = 0;
26221	mutex_exit(SD_MUTEX(un));
26222
26223	return (ret_val);
26224}
26225
26226/*
26227 * Defines needed for localized version of the scsi_poll routine.
26228 */
26229#define	CSEC		10000			/* usecs */
26230#define	SEC_TO_CSEC	(1000000/CSEC)
26231
26232/*
26233 *    Function: sd_ddi_scsi_poll()
26234 *
26235 * Description: Localized version of the scsi_poll routine.  The purpose is to
26236 *		send a scsi_pkt to a device as a polled command.  This version
26237 *		is to ensure more robust handling of transport errors.
26238 *		Specifically this routine cures not ready, coming ready
26239 *		transition for power up and reset of sonoma's.  This can take
26240 *		up to 45 seconds for power-on and 20 seconds for reset of a
26241 * 		sonoma lun.
26242 *
26243 *   Arguments: scsi_pkt - The scsi_pkt being sent to a device
26244 *
26245 * Return Code: 0 - Command completed successfully with good status
26246 *             -1 - Command failed.
26247 *
26248 * NOTE: This code is almost identical to scsi_poll, however before 6668774 can
26249 * be fixed (removing this code), we need to determine how to handle the
26250 * KEY_UNIT_ATTENTION condition below in conditions not as limited as sddump().
26251 *
26252 * NOTE: This code is only called off sddump().
26253 */
26254static int
26255sd_ddi_scsi_poll(struct scsi_pkt *pkt)
26256{
26257	int			rval = -1;
26258	int			savef;
26259	long			savet;
26260	void			(*savec)();
26261	int			timeout;
26262	int			busy_count;
26263	int			poll_delay;
26264	int			rc;
26265	uint8_t			*sensep;
26266	struct scsi_arq_status	*arqstat;
26267	extern int		do_polled_io;
26268
26269	ASSERT(pkt->pkt_scbp);
26270
26271	/*
26272	 * save old flags..
26273	 */
26274	savef = pkt->pkt_flags;
26275	savec = pkt->pkt_comp;
26276	savet = pkt->pkt_time;
26277
26278	pkt->pkt_flags |= FLAG_NOINTR;
26279
26280	/*
26281	 * XXX there is nothing in the SCSA spec that states that we should not
26282	 * do a callback for polled cmds; however, removing this will break sd
26283	 * and probably other target drivers
26284	 */
26285	pkt->pkt_comp = NULL;
26286
26287	/*
26288	 * we don't like a polled command without timeout.
26289	 * 60 seconds seems long enough.
26290	 */
26291	if (pkt->pkt_time == 0)
26292		pkt->pkt_time = SCSI_POLL_TIMEOUT;
26293
26294	/*
26295	 * Send polled cmd.
26296	 *
26297	 * We do some error recovery for various errors.  Tran_busy,
26298	 * queue full, and non-dispatched commands are retried every 10 msec.
26299	 * as they are typically transient failures.  Busy status and Not
26300	 * Ready are retried every second as this status takes a while to
26301	 * change.
26302	 */
26303	timeout = pkt->pkt_time * SEC_TO_CSEC;
26304
26305	for (busy_count = 0; busy_count < timeout; busy_count++) {
26306		/*
26307		 * Initialize pkt status variables.
26308		 */
26309		*pkt->pkt_scbp = pkt->pkt_reason = pkt->pkt_state = 0;
26310
26311		if ((rc = scsi_transport(pkt)) != TRAN_ACCEPT) {
26312			if (rc != TRAN_BUSY) {
26313				/* Transport failed - give up. */
26314				break;
26315			} else {
26316				/* Transport busy - try again. */
26317				poll_delay = 1 * CSEC;		/* 10 msec. */
26318			}
26319		} else {
26320			/*
26321			 * Transport accepted - check pkt status.
26322			 */
26323			rc = (*pkt->pkt_scbp) & STATUS_MASK;
26324			if ((pkt->pkt_reason == CMD_CMPLT) &&
26325			    (rc == STATUS_CHECK) &&
26326			    (pkt->pkt_state & STATE_ARQ_DONE)) {
26327				arqstat =
26328				    (struct scsi_arq_status *)(pkt->pkt_scbp);
26329				sensep = (uint8_t *)&arqstat->sts_sensedata;
26330			} else {
26331				sensep = NULL;
26332			}
26333
26334			if ((pkt->pkt_reason == CMD_CMPLT) &&
26335			    (rc == STATUS_GOOD)) {
26336				/* No error - we're done */
26337				rval = 0;
26338				break;
26339
26340			} else if (pkt->pkt_reason == CMD_DEV_GONE) {
26341				/* Lost connection - give up */
26342				break;
26343
26344			} else if ((pkt->pkt_reason == CMD_INCOMPLETE) &&
26345			    (pkt->pkt_state == 0)) {
26346				/* Pkt not dispatched - try again. */
26347				poll_delay = 1 * CSEC;		/* 10 msec. */
26348
26349			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
26350			    (rc == STATUS_QFULL)) {
26351				/* Queue full - try again. */
26352				poll_delay = 1 * CSEC;		/* 10 msec. */
26353
26354			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
26355			    (rc == STATUS_BUSY)) {
26356				/* Busy - try again. */
26357				poll_delay = 100 * CSEC;	/* 1 sec. */
26358				busy_count += (SEC_TO_CSEC - 1);
26359
26360			} else if ((sensep != NULL) &&
26361			    (scsi_sense_key(sensep) == KEY_UNIT_ATTENTION)) {
26362				/*
26363				 * Unit Attention - try again.
26364				 * Pretend it took 1 sec.
26365				 * NOTE: 'continue' avoids poll_delay
26366				 */
26367				busy_count += (SEC_TO_CSEC - 1);
26368				continue;
26369
26370			} else if ((sensep != NULL) &&
26371			    (scsi_sense_key(sensep) == KEY_NOT_READY) &&
26372			    (scsi_sense_asc(sensep) == 0x04) &&
26373			    (scsi_sense_ascq(sensep) == 0x01)) {
26374				/*
26375				 * Not ready -> ready - try again.
26376				 * 04h/01h: LUN IS IN PROCESS OF BECOMING READY
26377				 * ...same as STATUS_BUSY
26378				 */
26379				poll_delay = 100 * CSEC;	/* 1 sec. */
26380				busy_count += (SEC_TO_CSEC - 1);
26381
26382			} else {
26383				/* BAD status - give up. */
26384				break;
26385			}
26386		}
26387
26388		if (((curthread->t_flag & T_INTR_THREAD) == 0) &&
26389		    !do_polled_io) {
26390			delay(drv_usectohz(poll_delay));
26391		} else {
26392			/* we busy wait during cpr_dump or interrupt threads */
26393			drv_usecwait(poll_delay);
26394		}
26395	}
26396
26397	pkt->pkt_flags = savef;
26398	pkt->pkt_comp = savec;
26399	pkt->pkt_time = savet;
26400
26401	/* return on error */
26402	if (rval)
26403		return (rval);
26404
26405	/*
26406	 * This is not a performance critical code path.
26407	 *
26408	 * As an accommodation for scsi_poll callers, to avoid ddi_dma_sync()
26409	 * issues associated with looking at DMA memory prior to
26410	 * scsi_pkt_destroy(), we scsi_sync_pkt() prior to return.
26411	 */
26412	scsi_sync_pkt(pkt);
26413	return (0);
26414}
26415
26416
26417
26418/*
26419 *    Function: sd_persistent_reservation_in_read_keys
26420 *
26421 * Description: This routine is the driver entry point for handling CD-ROM
26422 *		multi-host persistent reservation requests (MHIOCGRP_INKEYS)
26423 *		by sending the SCSI-3 PRIN commands to the device.
26424 *		Processes the read keys command response by copying the
26425 *		reservation key information into the user provided buffer.
26426 *		Support for the 32/64 bit _MULTI_DATAMODEL is implemented.
26427 *
26428 *   Arguments: un   -  Pointer to soft state struct for the target.
26429 *		usrp -	user provided pointer to multihost Persistent In Read
26430 *			Keys structure (mhioc_inkeys_t)
26431 *		flag -	this argument is a pass through to ddi_copyxxx()
26432 *			directly from the mode argument of ioctl().
26433 *
26434 * Return Code: 0   - Success
26435 *		EACCES
26436 *		ENOTSUP
26437 *		errno return code from sd_send_scsi_cmd()
26438 *
26439 *     Context: Can sleep. Does not return until command is completed.
26440 */
26441
26442static int
26443sd_persistent_reservation_in_read_keys(struct sd_lun *un,
26444    mhioc_inkeys_t *usrp, int flag)
26445{
26446#ifdef _MULTI_DATAMODEL
26447	struct mhioc_key_list32	li32;
26448#endif
26449	sd_prin_readkeys_t	*in;
26450	mhioc_inkeys_t		*ptr;
26451	mhioc_key_list_t	li;
26452	uchar_t			*data_bufp;
26453	int 			data_len;
26454	int			rval = 0;
26455	size_t			copysz;
26456	sd_ssc_t		*ssc;
26457
26458	if ((ptr = (mhioc_inkeys_t *)usrp) == NULL) {
26459		return (EINVAL);
26460	}
26461	bzero(&li, sizeof (mhioc_key_list_t));
26462
26463	ssc = sd_ssc_init(un);
26464
26465	/*
26466	 * Get the listsize from user
26467	 */
26468#ifdef _MULTI_DATAMODEL
26469
26470	switch (ddi_model_convert_from(flag & FMODELS)) {
26471	case DDI_MODEL_ILP32:
26472		copysz = sizeof (struct mhioc_key_list32);
26473		if (ddi_copyin(ptr->li, &li32, copysz, flag)) {
26474			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26475			    "sd_persistent_reservation_in_read_keys: "
26476			    "failed ddi_copyin: mhioc_key_list32_t\n");
26477			rval = EFAULT;
26478			goto done;
26479		}
26480		li.listsize = li32.listsize;
26481		li.list = (mhioc_resv_key_t *)(uintptr_t)li32.list;
26482		break;
26483
26484	case DDI_MODEL_NONE:
26485		copysz = sizeof (mhioc_key_list_t);
26486		if (ddi_copyin(ptr->li, &li, copysz, flag)) {
26487			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26488			    "sd_persistent_reservation_in_read_keys: "
26489			    "failed ddi_copyin: mhioc_key_list_t\n");
26490			rval = EFAULT;
26491			goto done;
26492		}
26493		break;
26494	}
26495
26496#else /* ! _MULTI_DATAMODEL */
26497	copysz = sizeof (mhioc_key_list_t);
26498	if (ddi_copyin(ptr->li, &li, copysz, flag)) {
26499		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26500		    "sd_persistent_reservation_in_read_keys: "
26501		    "failed ddi_copyin: mhioc_key_list_t\n");
26502		rval = EFAULT;
26503		goto done;
26504	}
26505#endif
26506
26507	data_len  = li.listsize * MHIOC_RESV_KEY_SIZE;
26508	data_len += (sizeof (sd_prin_readkeys_t) - sizeof (caddr_t));
26509	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
26510
26511	rval = sd_send_scsi_PERSISTENT_RESERVE_IN(ssc, SD_READ_KEYS,
26512	    data_len, data_bufp);
26513	if (rval != 0) {
26514		if (rval == EIO)
26515			sd_ssc_assessment(ssc, SD_FMT_IGNORE_COMPROMISE);
26516		else
26517			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
26518		goto done;
26519	}
26520	in = (sd_prin_readkeys_t *)data_bufp;
26521	ptr->generation = BE_32(in->generation);
26522	li.listlen = BE_32(in->len) / MHIOC_RESV_KEY_SIZE;
26523
26524	/*
26525	 * Return the min(listsize, listlen) keys
26526	 */
26527#ifdef _MULTI_DATAMODEL
26528
26529	switch (ddi_model_convert_from(flag & FMODELS)) {
26530	case DDI_MODEL_ILP32:
26531		li32.listlen = li.listlen;
26532		if (ddi_copyout(&li32, ptr->li, copysz, flag)) {
26533			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26534			    "sd_persistent_reservation_in_read_keys: "
26535			    "failed ddi_copyout: mhioc_key_list32_t\n");
26536			rval = EFAULT;
26537			goto done;
26538		}
26539		break;
26540
26541	case DDI_MODEL_NONE:
26542		if (ddi_copyout(&li, ptr->li, copysz, flag)) {
26543			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26544			    "sd_persistent_reservation_in_read_keys: "
26545			    "failed ddi_copyout: mhioc_key_list_t\n");
26546			rval = EFAULT;
26547			goto done;
26548		}
26549		break;
26550	}
26551
26552#else /* ! _MULTI_DATAMODEL */
26553
26554	if (ddi_copyout(&li, ptr->li, copysz, flag)) {
26555		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26556		    "sd_persistent_reservation_in_read_keys: "
26557		    "failed ddi_copyout: mhioc_key_list_t\n");
26558		rval = EFAULT;
26559		goto done;
26560	}
26561
26562#endif /* _MULTI_DATAMODEL */
26563
26564	copysz = min(li.listlen * MHIOC_RESV_KEY_SIZE,
26565	    li.listsize * MHIOC_RESV_KEY_SIZE);
26566	if (ddi_copyout(&in->keylist, li.list, copysz, flag)) {
26567		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26568		    "sd_persistent_reservation_in_read_keys: "
26569		    "failed ddi_copyout: keylist\n");
26570		rval = EFAULT;
26571	}
26572done:
26573	sd_ssc_fini(ssc);
26574	kmem_free(data_bufp, data_len);
26575	return (rval);
26576}
26577
26578
26579/*
26580 *    Function: sd_persistent_reservation_in_read_resv
26581 *
26582 * Description: This routine is the driver entry point for handling CD-ROM
26583 *		multi-host persistent reservation requests (MHIOCGRP_INRESV)
26584 *		by sending the SCSI-3 PRIN commands to the device.
26585 *		Process the read persistent reservations command response by
26586 *		copying the reservation information into the user provided
26587 *		buffer. Support for the 32/64 _MULTI_DATAMODEL is implemented.
26588 *
26589 *   Arguments: un   -  Pointer to soft state struct for the target.
26590 *		usrp -	user provided pointer to multihost Persistent In Read
26591 *			Keys structure (mhioc_inkeys_t)
26592 *		flag -	this argument is a pass through to ddi_copyxxx()
26593 *			directly from the mode argument of ioctl().
26594 *
26595 * Return Code: 0   - Success
26596 *		EACCES
26597 *		ENOTSUP
26598 *		errno return code from sd_send_scsi_cmd()
26599 *
26600 *     Context: Can sleep. Does not return until command is completed.
26601 */
26602
26603static int
26604sd_persistent_reservation_in_read_resv(struct sd_lun *un,
26605    mhioc_inresvs_t *usrp, int flag)
26606{
26607#ifdef _MULTI_DATAMODEL
26608	struct mhioc_resv_desc_list32 resvlist32;
26609#endif
26610	sd_prin_readresv_t	*in;
26611	mhioc_inresvs_t		*ptr;
26612	sd_readresv_desc_t	*readresv_ptr;
26613	mhioc_resv_desc_list_t	resvlist;
26614	mhioc_resv_desc_t 	resvdesc;
26615	uchar_t			*data_bufp = NULL;
26616	int 			data_len;
26617	int			rval = 0;
26618	int			i;
26619	size_t			copysz;
26620	mhioc_resv_desc_t	*bufp;
26621	sd_ssc_t		*ssc;
26622
26623	if ((ptr = usrp) == NULL) {
26624		return (EINVAL);
26625	}
26626
26627	ssc = sd_ssc_init(un);
26628
26629	/*
26630	 * Get the listsize from user
26631	 */
26632#ifdef _MULTI_DATAMODEL
26633	switch (ddi_model_convert_from(flag & FMODELS)) {
26634	case DDI_MODEL_ILP32:
26635		copysz = sizeof (struct mhioc_resv_desc_list32);
26636		if (ddi_copyin(ptr->li, &resvlist32, copysz, flag)) {
26637			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26638			    "sd_persistent_reservation_in_read_resv: "
26639			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
26640			rval = EFAULT;
26641			goto done;
26642		}
26643		resvlist.listsize = resvlist32.listsize;
26644		resvlist.list = (mhioc_resv_desc_t *)(uintptr_t)resvlist32.list;
26645		break;
26646
26647	case DDI_MODEL_NONE:
26648		copysz = sizeof (mhioc_resv_desc_list_t);
26649		if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
26650			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26651			    "sd_persistent_reservation_in_read_resv: "
26652			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
26653			rval = EFAULT;
26654			goto done;
26655		}
26656		break;
26657	}
26658#else /* ! _MULTI_DATAMODEL */
26659	copysz = sizeof (mhioc_resv_desc_list_t);
26660	if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
26661		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26662		    "sd_persistent_reservation_in_read_resv: "
26663		    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
26664		rval = EFAULT;
26665		goto done;
26666	}
26667#endif /* ! _MULTI_DATAMODEL */
26668
26669	data_len  = resvlist.listsize * SCSI3_RESV_DESC_LEN;
26670	data_len += (sizeof (sd_prin_readresv_t) - sizeof (caddr_t));
26671	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
26672
26673	rval = sd_send_scsi_PERSISTENT_RESERVE_IN(ssc, SD_READ_RESV,
26674	    data_len, data_bufp);
26675	if (rval != 0) {
26676		if (rval == EIO)
26677			sd_ssc_assessment(ssc, SD_FMT_IGNORE_COMPROMISE);
26678		else
26679			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
26680		goto done;
26681	}
26682	in = (sd_prin_readresv_t *)data_bufp;
26683	ptr->generation = BE_32(in->generation);
26684	resvlist.listlen = BE_32(in->len) / SCSI3_RESV_DESC_LEN;
26685
26686	/*
26687	 * Return the min(listsize, listlen( keys
26688	 */
26689#ifdef _MULTI_DATAMODEL
26690
26691	switch (ddi_model_convert_from(flag & FMODELS)) {
26692	case DDI_MODEL_ILP32:
26693		resvlist32.listlen = resvlist.listlen;
26694		if (ddi_copyout(&resvlist32, ptr->li, copysz, flag)) {
26695			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26696			    "sd_persistent_reservation_in_read_resv: "
26697			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
26698			rval = EFAULT;
26699			goto done;
26700		}
26701		break;
26702
26703	case DDI_MODEL_NONE:
26704		if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
26705			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26706			    "sd_persistent_reservation_in_read_resv: "
26707			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
26708			rval = EFAULT;
26709			goto done;
26710		}
26711		break;
26712	}
26713
26714#else /* ! _MULTI_DATAMODEL */
26715
26716	if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
26717		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26718		    "sd_persistent_reservation_in_read_resv: "
26719		    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
26720		rval = EFAULT;
26721		goto done;
26722	}
26723
26724#endif /* ! _MULTI_DATAMODEL */
26725
26726	readresv_ptr = (sd_readresv_desc_t *)&in->readresv_desc;
26727	bufp = resvlist.list;
26728	copysz = sizeof (mhioc_resv_desc_t);
26729	for (i = 0; i < min(resvlist.listlen, resvlist.listsize);
26730	    i++, readresv_ptr++, bufp++) {
26731
26732		bcopy(&readresv_ptr->resvkey, &resvdesc.key,
26733		    MHIOC_RESV_KEY_SIZE);
26734		resvdesc.type  = readresv_ptr->type;
26735		resvdesc.scope = readresv_ptr->scope;
26736		resvdesc.scope_specific_addr =
26737		    BE_32(readresv_ptr->scope_specific_addr);
26738
26739		if (ddi_copyout(&resvdesc, bufp, copysz, flag)) {
26740			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26741			    "sd_persistent_reservation_in_read_resv: "
26742			    "failed ddi_copyout: resvlist\n");
26743			rval = EFAULT;
26744			goto done;
26745		}
26746	}
26747done:
26748	sd_ssc_fini(ssc);
26749	/* only if data_bufp is allocated, we need to free it */
26750	if (data_bufp) {
26751		kmem_free(data_bufp, data_len);
26752	}
26753	return (rval);
26754}
26755
26756
26757/*
26758 *    Function: sr_change_blkmode()
26759 *
26760 * Description: This routine is the driver entry point for handling CD-ROM
26761 *		block mode ioctl requests. Support for returning and changing
26762 *		the current block size in use by the device is implemented. The
26763 *		LBA size is changed via a MODE SELECT Block Descriptor.
26764 *
26765 *		This routine issues a mode sense with an allocation length of
26766 *		12 bytes for the mode page header and a single block descriptor.
26767 *
26768 *   Arguments: dev - the device 'dev_t'
26769 *		cmd - the request type; one of CDROMGBLKMODE (get) or
26770 *		      CDROMSBLKMODE (set)
26771 *		data - current block size or requested block size
26772 *		flag - this argument is a pass through to ddi_copyxxx() directly
26773 *		       from the mode argument of ioctl().
26774 *
26775 * Return Code: the code returned by sd_send_scsi_cmd()
26776 *		EINVAL if invalid arguments are provided
26777 *		EFAULT if ddi_copyxxx() fails
26778 *		ENXIO if fail ddi_get_soft_state
26779 *		EIO if invalid mode sense block descriptor length
26780 *
26781 */
26782
26783static int
26784sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag)
26785{
26786	struct sd_lun			*un = NULL;
26787	struct mode_header		*sense_mhp, *select_mhp;
26788	struct block_descriptor		*sense_desc, *select_desc;
26789	int				current_bsize;
26790	int				rval = EINVAL;
26791	uchar_t				*sense = NULL;
26792	uchar_t				*select = NULL;
26793	sd_ssc_t			*ssc;
26794
26795	ASSERT((cmd == CDROMGBLKMODE) || (cmd == CDROMSBLKMODE));
26796
26797	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
26798		return (ENXIO);
26799	}
26800
26801	/*
26802	 * The block length is changed via the Mode Select block descriptor, the
26803	 * "Read/Write Error Recovery" mode page (0x1) contents are not actually
26804	 * required as part of this routine. Therefore the mode sense allocation
26805	 * length is specified to be the length of a mode page header and a
26806	 * block descriptor.
26807	 */
26808	sense = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
26809
26810	ssc = sd_ssc_init(un);
26811	rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, sense,
26812	    BUFLEN_CHG_BLK_MODE, MODEPAGE_ERR_RECOV, SD_PATH_STANDARD);
26813	sd_ssc_fini(ssc);
26814	if (rval != 0) {
26815		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26816		    "sr_change_blkmode: Mode Sense Failed\n");
26817		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
26818		return (rval);
26819	}
26820
26821	/* Check the block descriptor len to handle only 1 block descriptor */
26822	sense_mhp = (struct mode_header *)sense;
26823	if ((sense_mhp->bdesc_length == 0) ||
26824	    (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH)) {
26825		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26826		    "sr_change_blkmode: Mode Sense returned invalid block"
26827		    " descriptor length\n");
26828		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
26829		return (EIO);
26830	}
26831	sense_desc = (struct block_descriptor *)(sense + MODE_HEADER_LENGTH);
26832	current_bsize = ((sense_desc->blksize_hi << 16) |
26833	    (sense_desc->blksize_mid << 8) | sense_desc->blksize_lo);
26834
26835	/* Process command */
26836	switch (cmd) {
26837	case CDROMGBLKMODE:
26838		/* Return the block size obtained during the mode sense */
26839		if (ddi_copyout(&current_bsize, (void *)data,
26840		    sizeof (int), flag) != 0)
26841			rval = EFAULT;
26842		break;
26843	case CDROMSBLKMODE:
26844		/* Validate the requested block size */
26845		switch (data) {
26846		case CDROM_BLK_512:
26847		case CDROM_BLK_1024:
26848		case CDROM_BLK_2048:
26849		case CDROM_BLK_2056:
26850		case CDROM_BLK_2336:
26851		case CDROM_BLK_2340:
26852		case CDROM_BLK_2352:
26853		case CDROM_BLK_2368:
26854		case CDROM_BLK_2448:
26855		case CDROM_BLK_2646:
26856		case CDROM_BLK_2647:
26857			break;
26858		default:
26859			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26860			    "sr_change_blkmode: "
26861			    "Block Size '%ld' Not Supported\n", data);
26862			kmem_free(sense, BUFLEN_CHG_BLK_MODE);
26863			return (EINVAL);
26864		}
26865
26866		/*
26867		 * The current block size matches the requested block size so
26868		 * there is no need to send the mode select to change the size
26869		 */
26870		if (current_bsize == data) {
26871			break;
26872		}
26873
26874		/* Build the select data for the requested block size */
26875		select = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
26876		select_mhp = (struct mode_header *)select;
26877		select_desc =
26878		    (struct block_descriptor *)(select + MODE_HEADER_LENGTH);
26879		/*
26880		 * The LBA size is changed via the block descriptor, so the
26881		 * descriptor is built according to the user data
26882		 */
26883		select_mhp->bdesc_length = MODE_BLK_DESC_LENGTH;
26884		select_desc->blksize_hi  = (char)(((data) & 0x00ff0000) >> 16);
26885		select_desc->blksize_mid = (char)(((data) & 0x0000ff00) >> 8);
26886		select_desc->blksize_lo  = (char)((data) & 0x000000ff);
26887
26888		/* Send the mode select for the requested block size */
26889		ssc = sd_ssc_init(un);
26890		rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0,
26891		    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
26892		    SD_PATH_STANDARD);
26893		sd_ssc_fini(ssc);
26894		if (rval != 0) {
26895			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26896			    "sr_change_blkmode: Mode Select Failed\n");
26897			/*
26898			 * The mode select failed for the requested block size,
26899			 * so reset the data for the original block size and
26900			 * send it to the target. The error is indicated by the
26901			 * return value for the failed mode select.
26902			 */
26903			select_desc->blksize_hi  = sense_desc->blksize_hi;
26904			select_desc->blksize_mid = sense_desc->blksize_mid;
26905			select_desc->blksize_lo  = sense_desc->blksize_lo;
26906			ssc = sd_ssc_init(un);
26907			(void) sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0,
26908			    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
26909			    SD_PATH_STANDARD);
26910			sd_ssc_fini(ssc);
26911		} else {
26912			ASSERT(!mutex_owned(SD_MUTEX(un)));
26913			mutex_enter(SD_MUTEX(un));
26914			sd_update_block_info(un, (uint32_t)data, 0);
26915			mutex_exit(SD_MUTEX(un));
26916		}
26917		break;
26918	default:
26919		/* should not reach here, but check anyway */
26920		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26921		    "sr_change_blkmode: Command '%x' Not Supported\n", cmd);
26922		rval = EINVAL;
26923		break;
26924	}
26925
26926	if (select) {
26927		kmem_free(select, BUFLEN_CHG_BLK_MODE);
26928	}
26929	if (sense) {
26930		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
26931	}
26932	return (rval);
26933}
26934
26935
26936/*
26937 * Note: The following sr_change_speed() and sr_atapi_change_speed() routines
26938 * implement driver support for getting and setting the CD speed. The command
26939 * set used will be based on the device type. If the device has not been
26940 * identified as MMC the Toshiba vendor specific mode page will be used. If
26941 * the device is MMC but does not support the Real Time Streaming feature
26942 * the SET CD SPEED command will be used to set speed and mode page 0x2A will
26943 * be used to read the speed.
26944 */
26945
26946/*
26947 *    Function: sr_change_speed()
26948 *
26949 * Description: This routine is the driver entry point for handling CD-ROM
26950 *		drive speed ioctl requests for devices supporting the Toshiba
26951 *		vendor specific drive speed mode page. Support for returning
26952 *		and changing the current drive speed in use by the device is
26953 *		implemented.
26954 *
26955 *   Arguments: dev - the device 'dev_t'
26956 *		cmd - the request type; one of CDROMGDRVSPEED (get) or
26957 *		      CDROMSDRVSPEED (set)
26958 *		data - current drive speed or requested drive speed
26959 *		flag - this argument is a pass through to ddi_copyxxx() directly
26960 *		       from the mode argument of ioctl().
26961 *
26962 * Return Code: the code returned by sd_send_scsi_cmd()
26963 *		EINVAL if invalid arguments are provided
26964 *		EFAULT if ddi_copyxxx() fails
26965 *		ENXIO if fail ddi_get_soft_state
26966 *		EIO if invalid mode sense block descriptor length
26967 */
26968
26969static int
26970sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
26971{
26972	struct sd_lun			*un = NULL;
26973	struct mode_header		*sense_mhp, *select_mhp;
26974	struct mode_speed		*sense_page, *select_page;
26975	int				current_speed;
26976	int				rval = EINVAL;
26977	int				bd_len;
26978	uchar_t				*sense = NULL;
26979	uchar_t				*select = NULL;
26980	sd_ssc_t			*ssc;
26981
26982	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
26983	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
26984		return (ENXIO);
26985	}
26986
26987	/*
26988	 * Note: The drive speed is being modified here according to a Toshiba
26989	 * vendor specific mode page (0x31).
26990	 */
26991	sense = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
26992
26993	ssc = sd_ssc_init(un);
26994	rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, sense,
26995	    BUFLEN_MODE_CDROM_SPEED, CDROM_MODE_SPEED,
26996	    SD_PATH_STANDARD);
26997	sd_ssc_fini(ssc);
26998	if (rval != 0) {
26999		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27000		    "sr_change_speed: Mode Sense Failed\n");
27001		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27002		return (rval);
27003	}
27004	sense_mhp  = (struct mode_header *)sense;
27005
27006	/* Check the block descriptor len to handle only 1 block descriptor */
27007	bd_len = sense_mhp->bdesc_length;
27008	if (bd_len > MODE_BLK_DESC_LENGTH) {
27009		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27010		    "sr_change_speed: Mode Sense returned invalid block "
27011		    "descriptor length\n");
27012		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27013		return (EIO);
27014	}
27015
27016	sense_page = (struct mode_speed *)
27017	    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
27018	current_speed = sense_page->speed;
27019
27020	/* Process command */
27021	switch (cmd) {
27022	case CDROMGDRVSPEED:
27023		/* Return the drive speed obtained during the mode sense */
27024		if (current_speed == 0x2) {
27025			current_speed = CDROM_TWELVE_SPEED;
27026		}
27027		if (ddi_copyout(&current_speed, (void *)data,
27028		    sizeof (int), flag) != 0) {
27029			rval = EFAULT;
27030		}
27031		break;
27032	case CDROMSDRVSPEED:
27033		/* Validate the requested drive speed */
27034		switch ((uchar_t)data) {
27035		case CDROM_TWELVE_SPEED:
27036			data = 0x2;
27037			/*FALLTHROUGH*/
27038		case CDROM_NORMAL_SPEED:
27039		case CDROM_DOUBLE_SPEED:
27040		case CDROM_QUAD_SPEED:
27041		case CDROM_MAXIMUM_SPEED:
27042			break;
27043		default:
27044			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27045			    "sr_change_speed: "
27046			    "Drive Speed '%d' Not Supported\n", (uchar_t)data);
27047			kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27048			return (EINVAL);
27049		}
27050
27051		/*
27052		 * The current drive speed matches the requested drive speed so
27053		 * there is no need to send the mode select to change the speed
27054		 */
27055		if (current_speed == data) {
27056			break;
27057		}
27058
27059		/* Build the select data for the requested drive speed */
27060		select = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
27061		select_mhp = (struct mode_header *)select;
27062		select_mhp->bdesc_length = 0;
27063		select_page =
27064		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
27065		select_page =
27066		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
27067		select_page->mode_page.code = CDROM_MODE_SPEED;
27068		select_page->mode_page.length = 2;
27069		select_page->speed = (uchar_t)data;
27070
27071		/* Send the mode select for the requested block size */
27072		ssc = sd_ssc_init(un);
27073		rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0, select,
27074		    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
27075		    SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
27076		sd_ssc_fini(ssc);
27077		if (rval != 0) {
27078			/*
27079			 * The mode select failed for the requested drive speed,
27080			 * so reset the data for the original drive speed and
27081			 * send it to the target. The error is indicated by the
27082			 * return value for the failed mode select.
27083			 */
27084			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27085			    "sr_drive_speed: Mode Select Failed\n");
27086			select_page->speed = sense_page->speed;
27087			ssc = sd_ssc_init(un);
27088			(void) sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0, select,
27089			    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
27090			    SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
27091			sd_ssc_fini(ssc);
27092		}
27093		break;
27094	default:
27095		/* should not reach here, but check anyway */
27096		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27097		    "sr_change_speed: Command '%x' Not Supported\n", cmd);
27098		rval = EINVAL;
27099		break;
27100	}
27101
27102	if (select) {
27103		kmem_free(select, BUFLEN_MODE_CDROM_SPEED);
27104	}
27105	if (sense) {
27106		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27107	}
27108
27109	return (rval);
27110}
27111
27112
27113/*
27114 *    Function: sr_atapi_change_speed()
27115 *
27116 * Description: This routine is the driver entry point for handling CD-ROM
27117 *		drive speed ioctl requests for MMC devices that do not support
27118 *		the Real Time Streaming feature (0x107).
27119 *
27120 *		Note: This routine will use the SET SPEED command which may not
27121 *		be supported by all devices.
27122 *
27123 *   Arguments: dev- the device 'dev_t'
27124 *		cmd- the request type; one of CDROMGDRVSPEED (get) or
27125 *		     CDROMSDRVSPEED (set)
27126 *		data- current drive speed or requested drive speed
27127 *		flag- this argument is a pass through to ddi_copyxxx() directly
27128 *		      from the mode argument of ioctl().
27129 *
27130 * Return Code: the code returned by sd_send_scsi_cmd()
27131 *		EINVAL if invalid arguments are provided
27132 *		EFAULT if ddi_copyxxx() fails
27133 *		ENXIO if fail ddi_get_soft_state
27134 *		EIO if invalid mode sense block descriptor length
27135 */
27136
27137static int
27138sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
27139{
27140	struct sd_lun			*un;
27141	struct uscsi_cmd		*com = NULL;
27142	struct mode_header_grp2		*sense_mhp;
27143	uchar_t				*sense_page;
27144	uchar_t				*sense = NULL;
27145	char				cdb[CDB_GROUP5];
27146	int				bd_len;
27147	int				current_speed = 0;
27148	int				max_speed = 0;
27149	int				rval;
27150	sd_ssc_t			*ssc;
27151
27152	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
27153
27154	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27155		return (ENXIO);
27156	}
27157
27158	sense = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
27159
27160	ssc = sd_ssc_init(un);
27161	rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, sense,
27162	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP,
27163	    SD_PATH_STANDARD);
27164	sd_ssc_fini(ssc);
27165	if (rval != 0) {
27166		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27167		    "sr_atapi_change_speed: Mode Sense Failed\n");
27168		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27169		return (rval);
27170	}
27171
27172	/* Check the block descriptor len to handle only 1 block descriptor */
27173	sense_mhp = (struct mode_header_grp2 *)sense;
27174	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
27175	if (bd_len > MODE_BLK_DESC_LENGTH) {
27176		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27177		    "sr_atapi_change_speed: Mode Sense returned invalid "
27178		    "block descriptor length\n");
27179		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27180		return (EIO);
27181	}
27182
27183	/* Calculate the current and maximum drive speeds */
27184	sense_page = (uchar_t *)(sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
27185	current_speed = (sense_page[14] << 8) | sense_page[15];
27186	max_speed = (sense_page[8] << 8) | sense_page[9];
27187
27188	/* Process the command */
27189	switch (cmd) {
27190	case CDROMGDRVSPEED:
27191		current_speed /= SD_SPEED_1X;
27192		if (ddi_copyout(&current_speed, (void *)data,
27193		    sizeof (int), flag) != 0)
27194			rval = EFAULT;
27195		break;
27196	case CDROMSDRVSPEED:
27197		/* Convert the speed code to KB/sec */
27198		switch ((uchar_t)data) {
27199		case CDROM_NORMAL_SPEED:
27200			current_speed = SD_SPEED_1X;
27201			break;
27202		case CDROM_DOUBLE_SPEED:
27203			current_speed = 2 * SD_SPEED_1X;
27204			break;
27205		case CDROM_QUAD_SPEED:
27206			current_speed = 4 * SD_SPEED_1X;
27207			break;
27208		case CDROM_TWELVE_SPEED:
27209			current_speed = 12 * SD_SPEED_1X;
27210			break;
27211		case CDROM_MAXIMUM_SPEED:
27212			current_speed = 0xffff;
27213			break;
27214		default:
27215			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27216			    "sr_atapi_change_speed: invalid drive speed %d\n",
27217			    (uchar_t)data);
27218			kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27219			return (EINVAL);
27220		}
27221
27222		/* Check the request against the drive's max speed. */
27223		if (current_speed != 0xffff) {
27224			if (current_speed > max_speed) {
27225				kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27226				return (EINVAL);
27227			}
27228		}
27229
27230		/*
27231		 * Build and send the SET SPEED command
27232		 *
27233		 * Note: The SET SPEED (0xBB) command used in this routine is
27234		 * obsolete per the SCSI MMC spec but still supported in the
27235		 * MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
27236		 * therefore the command is still implemented in this routine.
27237		 */
27238		bzero(cdb, sizeof (cdb));
27239		cdb[0] = (char)SCMD_SET_CDROM_SPEED;
27240		cdb[2] = (uchar_t)(current_speed >> 8);
27241		cdb[3] = (uchar_t)current_speed;
27242		com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27243		com->uscsi_cdb	   = (caddr_t)cdb;
27244		com->uscsi_cdblen  = CDB_GROUP5;
27245		com->uscsi_bufaddr = NULL;
27246		com->uscsi_buflen  = 0;
27247		com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT;
27248		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, 0, SD_PATH_STANDARD);
27249		break;
27250	default:
27251		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27252		    "sr_atapi_change_speed: Command '%x' Not Supported\n", cmd);
27253		rval = EINVAL;
27254	}
27255
27256	if (sense) {
27257		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27258	}
27259	if (com) {
27260		kmem_free(com, sizeof (*com));
27261	}
27262	return (rval);
27263}
27264
27265
27266/*
27267 *    Function: sr_pause_resume()
27268 *
27269 * Description: This routine is the driver entry point for handling CD-ROM
27270 *		pause/resume ioctl requests. This only affects the audio play
27271 *		operation.
27272 *
27273 *   Arguments: dev - the device 'dev_t'
27274 *		cmd - the request type; one of CDROMPAUSE or CDROMRESUME, used
27275 *		      for setting the resume bit of the cdb.
27276 *
27277 * Return Code: the code returned by sd_send_scsi_cmd()
27278 *		EINVAL if invalid mode specified
27279 *
27280 */
27281
27282static int
27283sr_pause_resume(dev_t dev, int cmd)
27284{
27285	struct sd_lun		*un;
27286	struct uscsi_cmd	*com;
27287	char			cdb[CDB_GROUP1];
27288	int			rval;
27289
27290	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27291		return (ENXIO);
27292	}
27293
27294	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27295	bzero(cdb, CDB_GROUP1);
27296	cdb[0] = SCMD_PAUSE_RESUME;
27297	switch (cmd) {
27298	case CDROMRESUME:
27299		cdb[8] = 1;
27300		break;
27301	case CDROMPAUSE:
27302		cdb[8] = 0;
27303		break;
27304	default:
27305		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_pause_resume:"
27306		    " Command '%x' Not Supported\n", cmd);
27307		rval = EINVAL;
27308		goto done;
27309	}
27310
27311	com->uscsi_cdb    = cdb;
27312	com->uscsi_cdblen = CDB_GROUP1;
27313	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
27314
27315	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
27316	    SD_PATH_STANDARD);
27317
27318done:
27319	kmem_free(com, sizeof (*com));
27320	return (rval);
27321}
27322
27323
27324/*
27325 *    Function: sr_play_msf()
27326 *
27327 * Description: This routine is the driver entry point for handling CD-ROM
27328 *		ioctl requests to output the audio signals at the specified
27329 *		starting address and continue the audio play until the specified
27330 *		ending address (CDROMPLAYMSF) The address is in Minute Second
27331 *		Frame (MSF) format.
27332 *
27333 *   Arguments: dev	- the device 'dev_t'
27334 *		data	- pointer to user provided audio msf structure,
27335 *		          specifying start/end addresses.
27336 *		flag	- this argument is a pass through to ddi_copyxxx()
27337 *		          directly from the mode argument of ioctl().
27338 *
27339 * Return Code: the code returned by sd_send_scsi_cmd()
27340 *		EFAULT if ddi_copyxxx() fails
27341 *		ENXIO if fail ddi_get_soft_state
27342 *		EINVAL if data pointer is NULL
27343 */
27344
27345static int
27346sr_play_msf(dev_t dev, caddr_t data, int flag)
27347{
27348	struct sd_lun		*un;
27349	struct uscsi_cmd	*com;
27350	struct cdrom_msf	msf_struct;
27351	struct cdrom_msf	*msf = &msf_struct;
27352	char			cdb[CDB_GROUP1];
27353	int			rval;
27354
27355	if (data == NULL) {
27356		return (EINVAL);
27357	}
27358
27359	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27360		return (ENXIO);
27361	}
27362
27363	if (ddi_copyin(data, msf, sizeof (struct cdrom_msf), flag)) {
27364		return (EFAULT);
27365	}
27366
27367	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27368	bzero(cdb, CDB_GROUP1);
27369	cdb[0] = SCMD_PLAYAUDIO_MSF;
27370	if (un->un_f_cfg_playmsf_bcd == TRUE) {
27371		cdb[3] = BYTE_TO_BCD(msf->cdmsf_min0);
27372		cdb[4] = BYTE_TO_BCD(msf->cdmsf_sec0);
27373		cdb[5] = BYTE_TO_BCD(msf->cdmsf_frame0);
27374		cdb[6] = BYTE_TO_BCD(msf->cdmsf_min1);
27375		cdb[7] = BYTE_TO_BCD(msf->cdmsf_sec1);
27376		cdb[8] = BYTE_TO_BCD(msf->cdmsf_frame1);
27377	} else {
27378		cdb[3] = msf->cdmsf_min0;
27379		cdb[4] = msf->cdmsf_sec0;
27380		cdb[5] = msf->cdmsf_frame0;
27381		cdb[6] = msf->cdmsf_min1;
27382		cdb[7] = msf->cdmsf_sec1;
27383		cdb[8] = msf->cdmsf_frame1;
27384	}
27385	com->uscsi_cdb    = cdb;
27386	com->uscsi_cdblen = CDB_GROUP1;
27387	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
27388	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
27389	    SD_PATH_STANDARD);
27390	kmem_free(com, sizeof (*com));
27391	return (rval);
27392}
27393
27394
27395/*
27396 *    Function: sr_play_trkind()
27397 *
27398 * Description: This routine is the driver entry point for handling CD-ROM
27399 *		ioctl requests to output the audio signals at the specified
27400 *		starting address and continue the audio play until the specified
27401 *		ending address (CDROMPLAYTRKIND). The address is in Track Index
27402 *		format.
27403 *
27404 *   Arguments: dev	- the device 'dev_t'
27405 *		data	- pointer to user provided audio track/index structure,
27406 *		          specifying start/end addresses.
27407 *		flag	- this argument is a pass through to ddi_copyxxx()
27408 *		          directly from the mode argument of ioctl().
27409 *
27410 * Return Code: the code returned by sd_send_scsi_cmd()
27411 *		EFAULT if ddi_copyxxx() fails
27412 *		ENXIO if fail ddi_get_soft_state
27413 *		EINVAL if data pointer is NULL
27414 */
27415
27416static int
27417sr_play_trkind(dev_t dev, caddr_t data, int flag)
27418{
27419	struct cdrom_ti		ti_struct;
27420	struct cdrom_ti		*ti = &ti_struct;
27421	struct uscsi_cmd	*com = NULL;
27422	char			cdb[CDB_GROUP1];
27423	int			rval;
27424
27425	if (data == NULL) {
27426		return (EINVAL);
27427	}
27428
27429	if (ddi_copyin(data, ti, sizeof (struct cdrom_ti), flag)) {
27430		return (EFAULT);
27431	}
27432
27433	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27434	bzero(cdb, CDB_GROUP1);
27435	cdb[0] = SCMD_PLAYAUDIO_TI;
27436	cdb[4] = ti->cdti_trk0;
27437	cdb[5] = ti->cdti_ind0;
27438	cdb[7] = ti->cdti_trk1;
27439	cdb[8] = ti->cdti_ind1;
27440	com->uscsi_cdb    = cdb;
27441	com->uscsi_cdblen = CDB_GROUP1;
27442	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
27443	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
27444	    SD_PATH_STANDARD);
27445	kmem_free(com, sizeof (*com));
27446	return (rval);
27447}
27448
27449
27450/*
27451 *    Function: sr_read_all_subcodes()
27452 *
27453 * Description: This routine is the driver entry point for handling CD-ROM
27454 *		ioctl requests to return raw subcode data while the target is
27455 *		playing audio (CDROMSUBCODE).
27456 *
27457 *   Arguments: dev	- the device 'dev_t'
27458 *		data	- pointer to user provided cdrom subcode structure,
27459 *		          specifying the transfer length and address.
27460 *		flag	- this argument is a pass through to ddi_copyxxx()
27461 *		          directly from the mode argument of ioctl().
27462 *
27463 * Return Code: the code returned by sd_send_scsi_cmd()
27464 *		EFAULT if ddi_copyxxx() fails
27465 *		ENXIO if fail ddi_get_soft_state
27466 *		EINVAL if data pointer is NULL
27467 */
27468
27469static int
27470sr_read_all_subcodes(dev_t dev, caddr_t data, int flag)
27471{
27472	struct sd_lun		*un = NULL;
27473	struct uscsi_cmd	*com = NULL;
27474	struct cdrom_subcode	*subcode = NULL;
27475	int			rval;
27476	size_t			buflen;
27477	char			cdb[CDB_GROUP5];
27478
27479#ifdef _MULTI_DATAMODEL
27480	/* To support ILP32 applications in an LP64 world */
27481	struct cdrom_subcode32		cdrom_subcode32;
27482	struct cdrom_subcode32		*cdsc32 = &cdrom_subcode32;
27483#endif
27484	if (data == NULL) {
27485		return (EINVAL);
27486	}
27487
27488	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27489		return (ENXIO);
27490	}
27491
27492	subcode = kmem_zalloc(sizeof (struct cdrom_subcode), KM_SLEEP);
27493
27494#ifdef _MULTI_DATAMODEL
27495	switch (ddi_model_convert_from(flag & FMODELS)) {
27496	case DDI_MODEL_ILP32:
27497		if (ddi_copyin(data, cdsc32, sizeof (*cdsc32), flag)) {
27498			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27499			    "sr_read_all_subcodes: ddi_copyin Failed\n");
27500			kmem_free(subcode, sizeof (struct cdrom_subcode));
27501			return (EFAULT);
27502		}
27503		/* Convert the ILP32 uscsi data from the application to LP64 */
27504		cdrom_subcode32tocdrom_subcode(cdsc32, subcode);
27505		break;
27506	case DDI_MODEL_NONE:
27507		if (ddi_copyin(data, subcode,
27508		    sizeof (struct cdrom_subcode), flag)) {
27509			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27510			    "sr_read_all_subcodes: ddi_copyin Failed\n");
27511			kmem_free(subcode, sizeof (struct cdrom_subcode));
27512			return (EFAULT);
27513		}
27514		break;
27515	}
27516#else /* ! _MULTI_DATAMODEL */
27517	if (ddi_copyin(data, subcode, sizeof (struct cdrom_subcode), flag)) {
27518		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27519		    "sr_read_all_subcodes: ddi_copyin Failed\n");
27520		kmem_free(subcode, sizeof (struct cdrom_subcode));
27521		return (EFAULT);
27522	}
27523#endif /* _MULTI_DATAMODEL */
27524
27525	/*
27526	 * Since MMC-2 expects max 3 bytes for length, check if the
27527	 * length input is greater than 3 bytes
27528	 */
27529	if ((subcode->cdsc_length & 0xFF000000) != 0) {
27530		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27531		    "sr_read_all_subcodes: "
27532		    "cdrom transfer length too large: %d (limit %d)\n",
27533		    subcode->cdsc_length, 0xFFFFFF);
27534		kmem_free(subcode, sizeof (struct cdrom_subcode));
27535		return (EINVAL);
27536	}
27537
27538	buflen = CDROM_BLK_SUBCODE * subcode->cdsc_length;
27539	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27540	bzero(cdb, CDB_GROUP5);
27541
27542	if (un->un_f_mmc_cap == TRUE) {
27543		cdb[0] = (char)SCMD_READ_CD;
27544		cdb[2] = (char)0xff;
27545		cdb[3] = (char)0xff;
27546		cdb[4] = (char)0xff;
27547		cdb[5] = (char)0xff;
27548		cdb[6] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
27549		cdb[7] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
27550		cdb[8] = ((subcode->cdsc_length) & 0x000000ff);
27551		cdb[10] = 1;
27552	} else {
27553		/*
27554		 * Note: A vendor specific command (0xDF) is being used her to
27555		 * request a read of all subcodes.
27556		 */
27557		cdb[0] = (char)SCMD_READ_ALL_SUBCODES;
27558		cdb[6] = (((subcode->cdsc_length) & 0xff000000) >> 24);
27559		cdb[7] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
27560		cdb[8] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
27561		cdb[9] = ((subcode->cdsc_length) & 0x000000ff);
27562	}
27563	com->uscsi_cdb	   = cdb;
27564	com->uscsi_cdblen  = CDB_GROUP5;
27565	com->uscsi_bufaddr = (caddr_t)subcode->cdsc_addr;
27566	com->uscsi_buflen  = buflen;
27567	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
27568	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
27569	    SD_PATH_STANDARD);
27570	kmem_free(subcode, sizeof (struct cdrom_subcode));
27571	kmem_free(com, sizeof (*com));
27572	return (rval);
27573}
27574
27575
27576/*
27577 *    Function: sr_read_subchannel()
27578 *
27579 * Description: This routine is the driver entry point for handling CD-ROM
27580 *		ioctl requests to return the Q sub-channel data of the CD
27581 *		current position block. (CDROMSUBCHNL) The data includes the
27582 *		track number, index number, absolute CD-ROM address (LBA or MSF
27583 *		format per the user) , track relative CD-ROM address (LBA or MSF
27584 *		format per the user), control data and audio status.
27585 *
27586 *   Arguments: dev	- the device 'dev_t'
27587 *		data	- pointer to user provided cdrom sub-channel structure
27588 *		flag	- this argument is a pass through to ddi_copyxxx()
27589 *		          directly from the mode argument of ioctl().
27590 *
27591 * Return Code: the code returned by sd_send_scsi_cmd()
27592 *		EFAULT if ddi_copyxxx() fails
27593 *		ENXIO if fail ddi_get_soft_state
27594 *		EINVAL if data pointer is NULL
27595 */
27596
27597static int
27598sr_read_subchannel(dev_t dev, caddr_t data, int flag)
27599{
27600	struct sd_lun		*un;
27601	struct uscsi_cmd	*com;
27602	struct cdrom_subchnl	subchanel;
27603	struct cdrom_subchnl	*subchnl = &subchanel;
27604	char			cdb[CDB_GROUP1];
27605	caddr_t			buffer;
27606	int			rval;
27607
27608	if (data == NULL) {
27609		return (EINVAL);
27610	}
27611
27612	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27613	    (un->un_state == SD_STATE_OFFLINE)) {
27614		return (ENXIO);
27615	}
27616
27617	if (ddi_copyin(data, subchnl, sizeof (struct cdrom_subchnl), flag)) {
27618		return (EFAULT);
27619	}
27620
27621	buffer = kmem_zalloc((size_t)16, KM_SLEEP);
27622	bzero(cdb, CDB_GROUP1);
27623	cdb[0] = SCMD_READ_SUBCHANNEL;
27624	/* Set the MSF bit based on the user requested address format */
27625	cdb[1] = (subchnl->cdsc_format & CDROM_LBA) ? 0 : 0x02;
27626	/*
27627	 * Set the Q bit in byte 2 to indicate that Q sub-channel data be
27628	 * returned
27629	 */
27630	cdb[2] = 0x40;
27631	/*
27632	 * Set byte 3 to specify the return data format. A value of 0x01
27633	 * indicates that the CD-ROM current position should be returned.
27634	 */
27635	cdb[3] = 0x01;
27636	cdb[8] = 0x10;
27637	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27638	com->uscsi_cdb	   = cdb;
27639	com->uscsi_cdblen  = CDB_GROUP1;
27640	com->uscsi_bufaddr = buffer;
27641	com->uscsi_buflen  = 16;
27642	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
27643	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
27644	    SD_PATH_STANDARD);
27645	if (rval != 0) {
27646		kmem_free(buffer, 16);
27647		kmem_free(com, sizeof (*com));
27648		return (rval);
27649	}
27650
27651	/* Process the returned Q sub-channel data */
27652	subchnl->cdsc_audiostatus = buffer[1];
27653	subchnl->cdsc_adr	= (buffer[5] & 0xF0);
27654	subchnl->cdsc_ctrl	= (buffer[5] & 0x0F);
27655	subchnl->cdsc_trk	= buffer[6];
27656	subchnl->cdsc_ind	= buffer[7];
27657	if (subchnl->cdsc_format & CDROM_LBA) {
27658		subchnl->cdsc_absaddr.lba =
27659		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
27660		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
27661		subchnl->cdsc_reladdr.lba =
27662		    ((uchar_t)buffer[12] << 24) + ((uchar_t)buffer[13] << 16) +
27663		    ((uchar_t)buffer[14] << 8) + ((uchar_t)buffer[15]);
27664	} else if (un->un_f_cfg_readsub_bcd == TRUE) {
27665		subchnl->cdsc_absaddr.msf.minute = BCD_TO_BYTE(buffer[9]);
27666		subchnl->cdsc_absaddr.msf.second = BCD_TO_BYTE(buffer[10]);
27667		subchnl->cdsc_absaddr.msf.frame  = BCD_TO_BYTE(buffer[11]);
27668		subchnl->cdsc_reladdr.msf.minute = BCD_TO_BYTE(buffer[13]);
27669		subchnl->cdsc_reladdr.msf.second = BCD_TO_BYTE(buffer[14]);
27670		subchnl->cdsc_reladdr.msf.frame  = BCD_TO_BYTE(buffer[15]);
27671	} else {
27672		subchnl->cdsc_absaddr.msf.minute = buffer[9];
27673		subchnl->cdsc_absaddr.msf.second = buffer[10];
27674		subchnl->cdsc_absaddr.msf.frame  = buffer[11];
27675		subchnl->cdsc_reladdr.msf.minute = buffer[13];
27676		subchnl->cdsc_reladdr.msf.second = buffer[14];
27677		subchnl->cdsc_reladdr.msf.frame  = buffer[15];
27678	}
27679	kmem_free(buffer, 16);
27680	kmem_free(com, sizeof (*com));
27681	if (ddi_copyout(subchnl, data, sizeof (struct cdrom_subchnl), flag)
27682	    != 0) {
27683		return (EFAULT);
27684	}
27685	return (rval);
27686}
27687
27688
27689/*
27690 *    Function: sr_read_tocentry()
27691 *
27692 * Description: This routine is the driver entry point for handling CD-ROM
27693 *		ioctl requests to read from the Table of Contents (TOC)
27694 *		(CDROMREADTOCENTRY). This routine provides the ADR and CTRL
27695 *		fields, the starting address (LBA or MSF format per the user)
27696 *		and the data mode if the user specified track is a data track.
27697 *
27698 *		Note: The READ HEADER (0x44) command used in this routine is
27699 *		obsolete per the SCSI MMC spec but still supported in the
27700 *		MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
27701 *		therefore the command is still implemented in this routine.
27702 *
27703 *   Arguments: dev	- the device 'dev_t'
27704 *		data	- pointer to user provided toc entry structure,
27705 *			  specifying the track # and the address format
27706 *			  (LBA or MSF).
27707 *		flag	- this argument is a pass through to ddi_copyxxx()
27708 *		          directly from the mode argument of ioctl().
27709 *
27710 * Return Code: the code returned by sd_send_scsi_cmd()
27711 *		EFAULT if ddi_copyxxx() fails
27712 *		ENXIO if fail ddi_get_soft_state
27713 *		EINVAL if data pointer is NULL
27714 */
27715
27716static int
27717sr_read_tocentry(dev_t dev, caddr_t data, int flag)
27718{
27719	struct sd_lun		*un = NULL;
27720	struct uscsi_cmd	*com;
27721	struct cdrom_tocentry	toc_entry;
27722	struct cdrom_tocentry	*entry = &toc_entry;
27723	caddr_t			buffer;
27724	int			rval;
27725	char			cdb[CDB_GROUP1];
27726
27727	if (data == NULL) {
27728		return (EINVAL);
27729	}
27730
27731	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27732	    (un->un_state == SD_STATE_OFFLINE)) {
27733		return (ENXIO);
27734	}
27735
27736	if (ddi_copyin(data, entry, sizeof (struct cdrom_tocentry), flag)) {
27737		return (EFAULT);
27738	}
27739
27740	/* Validate the requested track and address format */
27741	if (!(entry->cdte_format & (CDROM_LBA | CDROM_MSF))) {
27742		return (EINVAL);
27743	}
27744
27745	if (entry->cdte_track == 0) {
27746		return (EINVAL);
27747	}
27748
27749	buffer = kmem_zalloc((size_t)12, KM_SLEEP);
27750	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27751	bzero(cdb, CDB_GROUP1);
27752
27753	cdb[0] = SCMD_READ_TOC;
27754	/* Set the MSF bit based on the user requested address format  */
27755	cdb[1] = ((entry->cdte_format & CDROM_LBA) ? 0 : 2);
27756	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
27757		cdb[6] = BYTE_TO_BCD(entry->cdte_track);
27758	} else {
27759		cdb[6] = entry->cdte_track;
27760	}
27761
27762	/*
27763	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
27764	 * (4 byte TOC response header + 8 byte track descriptor)
27765	 */
27766	cdb[8] = 12;
27767	com->uscsi_cdb	   = cdb;
27768	com->uscsi_cdblen  = CDB_GROUP1;
27769	com->uscsi_bufaddr = buffer;
27770	com->uscsi_buflen  = 0x0C;
27771	com->uscsi_flags   = (USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ);
27772	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
27773	    SD_PATH_STANDARD);
27774	if (rval != 0) {
27775		kmem_free(buffer, 12);
27776		kmem_free(com, sizeof (*com));
27777		return (rval);
27778	}
27779
27780	/* Process the toc entry */
27781	entry->cdte_adr		= (buffer[5] & 0xF0) >> 4;
27782	entry->cdte_ctrl	= (buffer[5] & 0x0F);
27783	if (entry->cdte_format & CDROM_LBA) {
27784		entry->cdte_addr.lba =
27785		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
27786		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
27787	} else if (un->un_f_cfg_read_toc_addr_bcd == TRUE) {
27788		entry->cdte_addr.msf.minute	= BCD_TO_BYTE(buffer[9]);
27789		entry->cdte_addr.msf.second	= BCD_TO_BYTE(buffer[10]);
27790		entry->cdte_addr.msf.frame	= BCD_TO_BYTE(buffer[11]);
27791		/*
27792		 * Send a READ TOC command using the LBA address format to get
27793		 * the LBA for the track requested so it can be used in the
27794		 * READ HEADER request
27795		 *
27796		 * Note: The MSF bit of the READ HEADER command specifies the
27797		 * output format. The block address specified in that command
27798		 * must be in LBA format.
27799		 */
27800		cdb[1] = 0;
27801		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
27802		    SD_PATH_STANDARD);
27803		if (rval != 0) {
27804			kmem_free(buffer, 12);
27805			kmem_free(com, sizeof (*com));
27806			return (rval);
27807		}
27808	} else {
27809		entry->cdte_addr.msf.minute	= buffer[9];
27810		entry->cdte_addr.msf.second	= buffer[10];
27811		entry->cdte_addr.msf.frame	= buffer[11];
27812		/*
27813		 * Send a READ TOC command using the LBA address format to get
27814		 * the LBA for the track requested so it can be used in the
27815		 * READ HEADER request
27816		 *
27817		 * Note: The MSF bit of the READ HEADER command specifies the
27818		 * output format. The block address specified in that command
27819		 * must be in LBA format.
27820		 */
27821		cdb[1] = 0;
27822		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
27823		    SD_PATH_STANDARD);
27824		if (rval != 0) {
27825			kmem_free(buffer, 12);
27826			kmem_free(com, sizeof (*com));
27827			return (rval);
27828		}
27829	}
27830
27831	/*
27832	 * Build and send the READ HEADER command to determine the data mode of
27833	 * the user specified track.
27834	 */
27835	if ((entry->cdte_ctrl & CDROM_DATA_TRACK) &&
27836	    (entry->cdte_track != CDROM_LEADOUT)) {
27837		bzero(cdb, CDB_GROUP1);
27838		cdb[0] = SCMD_READ_HEADER;
27839		cdb[2] = buffer[8];
27840		cdb[3] = buffer[9];
27841		cdb[4] = buffer[10];
27842		cdb[5] = buffer[11];
27843		cdb[8] = 0x08;
27844		com->uscsi_buflen = 0x08;
27845		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
27846		    SD_PATH_STANDARD);
27847		if (rval == 0) {
27848			entry->cdte_datamode = buffer[0];
27849		} else {
27850			/*
27851			 * READ HEADER command failed, since this is
27852			 * obsoleted in one spec, its better to return
27853			 * -1 for an invlid track so that we can still
27854			 * receive the rest of the TOC data.
27855			 */
27856			entry->cdte_datamode = (uchar_t)-1;
27857		}
27858	} else {
27859		entry->cdte_datamode = (uchar_t)-1;
27860	}
27861
27862	kmem_free(buffer, 12);
27863	kmem_free(com, sizeof (*com));
27864	if (ddi_copyout(entry, data, sizeof (struct cdrom_tocentry), flag) != 0)
27865		return (EFAULT);
27866
27867	return (rval);
27868}
27869
27870
27871/*
27872 *    Function: sr_read_tochdr()
27873 *
27874 * Description: This routine is the driver entry point for handling CD-ROM
27875 * 		ioctl requests to read the Table of Contents (TOC) header
27876 *		(CDROMREADTOHDR). The TOC header consists of the disk starting
27877 *		and ending track numbers
27878 *
27879 *   Arguments: dev	- the device 'dev_t'
27880 *		data	- pointer to user provided toc header structure,
27881 *			  specifying the starting and ending track numbers.
27882 *		flag	- this argument is a pass through to ddi_copyxxx()
27883 *			  directly from the mode argument of ioctl().
27884 *
27885 * Return Code: the code returned by sd_send_scsi_cmd()
27886 *		EFAULT if ddi_copyxxx() fails
27887 *		ENXIO if fail ddi_get_soft_state
27888 *		EINVAL if data pointer is NULL
27889 */
27890
27891static int
27892sr_read_tochdr(dev_t dev, caddr_t data, int flag)
27893{
27894	struct sd_lun		*un;
27895	struct uscsi_cmd	*com;
27896	struct cdrom_tochdr	toc_header;
27897	struct cdrom_tochdr	*hdr = &toc_header;
27898	char			cdb[CDB_GROUP1];
27899	int			rval;
27900	caddr_t			buffer;
27901
27902	if (data == NULL) {
27903		return (EINVAL);
27904	}
27905
27906	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27907	    (un->un_state == SD_STATE_OFFLINE)) {
27908		return (ENXIO);
27909	}
27910
27911	buffer = kmem_zalloc(4, KM_SLEEP);
27912	bzero(cdb, CDB_GROUP1);
27913	cdb[0] = SCMD_READ_TOC;
27914	/*
27915	 * Specifying a track number of 0x00 in the READ TOC command indicates
27916	 * that the TOC header should be returned
27917	 */
27918	cdb[6] = 0x00;
27919	/*
27920	 * Bytes 7 & 8 are the 4 byte allocation length for TOC header.
27921	 * (2 byte data len + 1 byte starting track # + 1 byte ending track #)
27922	 */
27923	cdb[8] = 0x04;
27924	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27925	com->uscsi_cdb	   = cdb;
27926	com->uscsi_cdblen  = CDB_GROUP1;
27927	com->uscsi_bufaddr = buffer;
27928	com->uscsi_buflen  = 0x04;
27929	com->uscsi_timeout = 300;
27930	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
27931
27932	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
27933	    SD_PATH_STANDARD);
27934	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
27935		hdr->cdth_trk0 = BCD_TO_BYTE(buffer[2]);
27936		hdr->cdth_trk1 = BCD_TO_BYTE(buffer[3]);
27937	} else {
27938		hdr->cdth_trk0 = buffer[2];
27939		hdr->cdth_trk1 = buffer[3];
27940	}
27941	kmem_free(buffer, 4);
27942	kmem_free(com, sizeof (*com));
27943	if (ddi_copyout(hdr, data, sizeof (struct cdrom_tochdr), flag) != 0) {
27944		return (EFAULT);
27945	}
27946	return (rval);
27947}
27948
27949
27950/*
27951 * Note: The following sr_read_mode1(), sr_read_cd_mode2(), sr_read_mode2(),
27952 * sr_read_cdda(), sr_read_cdxa(), routines implement driver support for
27953 * handling CDROMREAD ioctl requests for mode 1 user data, mode 2 user data,
27954 * digital audio and extended architecture digital audio. These modes are
27955 * defined in the IEC908 (Red Book), ISO10149 (Yellow Book), and the SCSI3
27956 * MMC specs.
27957 *
27958 * In addition to support for the various data formats these routines also
27959 * include support for devices that implement only the direct access READ
27960 * commands (0x08, 0x28), devices that implement the READ_CD commands
27961 * (0xBE, 0xD4), and devices that implement the vendor unique READ CDDA and
27962 * READ CDXA commands (0xD8, 0xDB)
27963 */
27964
27965/*
27966 *    Function: sr_read_mode1()
27967 *
27968 * Description: This routine is the driver entry point for handling CD-ROM
27969 *		ioctl read mode1 requests (CDROMREADMODE1).
27970 *
27971 *   Arguments: dev	- the device 'dev_t'
27972 *		data	- pointer to user provided cd read structure specifying
27973 *			  the lba buffer address and length.
27974 *		flag	- this argument is a pass through to ddi_copyxxx()
27975 *			  directly from the mode argument of ioctl().
27976 *
27977 * Return Code: the code returned by sd_send_scsi_cmd()
27978 *		EFAULT if ddi_copyxxx() fails
27979 *		ENXIO if fail ddi_get_soft_state
27980 *		EINVAL if data pointer is NULL
27981 */
27982
27983static int
27984sr_read_mode1(dev_t dev, caddr_t data, int flag)
27985{
27986	struct sd_lun		*un;
27987	struct cdrom_read	mode1_struct;
27988	struct cdrom_read	*mode1 = &mode1_struct;
27989	int			rval;
27990	sd_ssc_t		*ssc;
27991
27992#ifdef _MULTI_DATAMODEL
27993	/* To support ILP32 applications in an LP64 world */
27994	struct cdrom_read32	cdrom_read32;
27995	struct cdrom_read32	*cdrd32 = &cdrom_read32;
27996#endif /* _MULTI_DATAMODEL */
27997
27998	if (data == NULL) {
27999		return (EINVAL);
28000	}
28001
28002	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28003	    (un->un_state == SD_STATE_OFFLINE)) {
28004		return (ENXIO);
28005	}
28006
28007	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28008	    "sd_read_mode1: entry: un:0x%p\n", un);
28009
28010#ifdef _MULTI_DATAMODEL
28011	switch (ddi_model_convert_from(flag & FMODELS)) {
28012	case DDI_MODEL_ILP32:
28013		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
28014			return (EFAULT);
28015		}
28016		/* Convert the ILP32 uscsi data from the application to LP64 */
28017		cdrom_read32tocdrom_read(cdrd32, mode1);
28018		break;
28019	case DDI_MODEL_NONE:
28020		if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
28021			return (EFAULT);
28022		}
28023	}
28024#else /* ! _MULTI_DATAMODEL */
28025	if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
28026		return (EFAULT);
28027	}
28028#endif /* _MULTI_DATAMODEL */
28029
28030	ssc = sd_ssc_init(un);
28031	rval = sd_send_scsi_READ(ssc, mode1->cdread_bufaddr,
28032	    mode1->cdread_buflen, mode1->cdread_lba, SD_PATH_STANDARD);
28033	sd_ssc_fini(ssc);
28034
28035	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28036	    "sd_read_mode1: exit: un:0x%p\n", un);
28037
28038	return (rval);
28039}
28040
28041
28042/*
28043 *    Function: sr_read_cd_mode2()
28044 *
28045 * Description: This routine is the driver entry point for handling CD-ROM
28046 *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
28047 *		support the READ CD (0xBE) command or the 1st generation
28048 *		READ CD (0xD4) command.
28049 *
28050 *   Arguments: dev	- the device 'dev_t'
28051 *		data	- pointer to user provided cd read structure specifying
28052 *			  the lba buffer address and length.
28053 *		flag	- this argument is a pass through to ddi_copyxxx()
28054 *			  directly from the mode argument of ioctl().
28055 *
28056 * Return Code: the code returned by sd_send_scsi_cmd()
28057 *		EFAULT if ddi_copyxxx() fails
28058 *		ENXIO if fail ddi_get_soft_state
28059 *		EINVAL if data pointer is NULL
28060 */
28061
28062static int
28063sr_read_cd_mode2(dev_t dev, caddr_t data, int flag)
28064{
28065	struct sd_lun		*un;
28066	struct uscsi_cmd	*com;
28067	struct cdrom_read	mode2_struct;
28068	struct cdrom_read	*mode2 = &mode2_struct;
28069	uchar_t			cdb[CDB_GROUP5];
28070	int			nblocks;
28071	int			rval;
28072#ifdef _MULTI_DATAMODEL
28073	/*  To support ILP32 applications in an LP64 world */
28074	struct cdrom_read32	cdrom_read32;
28075	struct cdrom_read32	*cdrd32 = &cdrom_read32;
28076#endif /* _MULTI_DATAMODEL */
28077
28078	if (data == NULL) {
28079		return (EINVAL);
28080	}
28081
28082	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28083	    (un->un_state == SD_STATE_OFFLINE)) {
28084		return (ENXIO);
28085	}
28086
28087#ifdef _MULTI_DATAMODEL
28088	switch (ddi_model_convert_from(flag & FMODELS)) {
28089	case DDI_MODEL_ILP32:
28090		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
28091			return (EFAULT);
28092		}
28093		/* Convert the ILP32 uscsi data from the application to LP64 */
28094		cdrom_read32tocdrom_read(cdrd32, mode2);
28095		break;
28096	case DDI_MODEL_NONE:
28097		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
28098			return (EFAULT);
28099		}
28100		break;
28101	}
28102
28103#else /* ! _MULTI_DATAMODEL */
28104	if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
28105		return (EFAULT);
28106	}
28107#endif /* _MULTI_DATAMODEL */
28108
28109	bzero(cdb, sizeof (cdb));
28110	if (un->un_f_cfg_read_cd_xd4 == TRUE) {
28111		/* Read command supported by 1st generation atapi drives */
28112		cdb[0] = SCMD_READ_CDD4;
28113	} else {
28114		/* Universal CD Access Command */
28115		cdb[0] = SCMD_READ_CD;
28116	}
28117
28118	/*
28119	 * Set expected sector type to: 2336s byte, Mode 2 Yellow Book
28120	 */
28121	cdb[1] = CDROM_SECTOR_TYPE_MODE2;
28122
28123	/* set the start address */
28124	cdb[2] = (uchar_t)((mode2->cdread_lba >> 24) & 0XFF);
28125	cdb[3] = (uchar_t)((mode2->cdread_lba >> 16) & 0XFF);
28126	cdb[4] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
28127	cdb[5] = (uchar_t)(mode2->cdread_lba & 0xFF);
28128
28129	/* set the transfer length */
28130	nblocks = mode2->cdread_buflen / 2336;
28131	cdb[6] = (uchar_t)(nblocks >> 16);
28132	cdb[7] = (uchar_t)(nblocks >> 8);
28133	cdb[8] = (uchar_t)nblocks;
28134
28135	/* set the filter bits */
28136	cdb[9] = CDROM_READ_CD_USERDATA;
28137
28138	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28139	com->uscsi_cdb = (caddr_t)cdb;
28140	com->uscsi_cdblen = sizeof (cdb);
28141	com->uscsi_bufaddr = mode2->cdread_bufaddr;
28142	com->uscsi_buflen = mode2->cdread_buflen;
28143	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28144
28145	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
28146	    SD_PATH_STANDARD);
28147	kmem_free(com, sizeof (*com));
28148	return (rval);
28149}
28150
28151
28152/*
28153 *    Function: sr_read_mode2()
28154 *
28155 * Description: This routine is the driver entry point for handling CD-ROM
28156 *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
28157 *		do not support the READ CD (0xBE) command.
28158 *
28159 *   Arguments: dev	- the device 'dev_t'
28160 *		data	- pointer to user provided cd read structure specifying
28161 *			  the lba buffer address and length.
28162 *		flag	- this argument is a pass through to ddi_copyxxx()
28163 *			  directly from the mode argument of ioctl().
28164 *
28165 * Return Code: the code returned by sd_send_scsi_cmd()
28166 *		EFAULT if ddi_copyxxx() fails
28167 *		ENXIO if fail ddi_get_soft_state
28168 *		EINVAL if data pointer is NULL
28169 *		EIO if fail to reset block size
28170 *		EAGAIN if commands are in progress in the driver
28171 */
28172
28173static int
28174sr_read_mode2(dev_t dev, caddr_t data, int flag)
28175{
28176	struct sd_lun		*un;
28177	struct cdrom_read	mode2_struct;
28178	struct cdrom_read	*mode2 = &mode2_struct;
28179	int			rval;
28180	uint32_t		restore_blksize;
28181	struct uscsi_cmd	*com;
28182	uchar_t			cdb[CDB_GROUP0];
28183	int			nblocks;
28184
28185#ifdef _MULTI_DATAMODEL
28186	/* To support ILP32 applications in an LP64 world */
28187	struct cdrom_read32	cdrom_read32;
28188	struct cdrom_read32	*cdrd32 = &cdrom_read32;
28189#endif /* _MULTI_DATAMODEL */
28190
28191	if (data == NULL) {
28192		return (EINVAL);
28193	}
28194
28195	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28196	    (un->un_state == SD_STATE_OFFLINE)) {
28197		return (ENXIO);
28198	}
28199
28200	/*
28201	 * Because this routine will update the device and driver block size
28202	 * being used we want to make sure there are no commands in progress.
28203	 * If commands are in progress the user will have to try again.
28204	 *
28205	 * We check for 1 instead of 0 because we increment un_ncmds_in_driver
28206	 * in sdioctl to protect commands from sdioctl through to the top of
28207	 * sd_uscsi_strategy. See sdioctl for details.
28208	 */
28209	mutex_enter(SD_MUTEX(un));
28210	if (un->un_ncmds_in_driver != 1) {
28211		mutex_exit(SD_MUTEX(un));
28212		return (EAGAIN);
28213	}
28214	mutex_exit(SD_MUTEX(un));
28215
28216	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28217	    "sd_read_mode2: entry: un:0x%p\n", un);
28218
28219#ifdef _MULTI_DATAMODEL
28220	switch (ddi_model_convert_from(flag & FMODELS)) {
28221	case DDI_MODEL_ILP32:
28222		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
28223			return (EFAULT);
28224		}
28225		/* Convert the ILP32 uscsi data from the application to LP64 */
28226		cdrom_read32tocdrom_read(cdrd32, mode2);
28227		break;
28228	case DDI_MODEL_NONE:
28229		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
28230			return (EFAULT);
28231		}
28232		break;
28233	}
28234#else /* ! _MULTI_DATAMODEL */
28235	if (ddi_copyin(data, mode2, sizeof (*mode2), flag)) {
28236		return (EFAULT);
28237	}
28238#endif /* _MULTI_DATAMODEL */
28239
28240	/* Store the current target block size for restoration later */
28241	restore_blksize = un->un_tgt_blocksize;
28242
28243	/* Change the device and soft state target block size to 2336 */
28244	if (sr_sector_mode(dev, SD_MODE2_BLKSIZE) != 0) {
28245		rval = EIO;
28246		goto done;
28247	}
28248
28249
28250	bzero(cdb, sizeof (cdb));
28251
28252	/* set READ operation */
28253	cdb[0] = SCMD_READ;
28254
28255	/* adjust lba for 2kbyte blocks from 512 byte blocks */
28256	mode2->cdread_lba >>= 2;
28257
28258	/* set the start address */
28259	cdb[1] = (uchar_t)((mode2->cdread_lba >> 16) & 0X1F);
28260	cdb[2] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
28261	cdb[3] = (uchar_t)(mode2->cdread_lba & 0xFF);
28262
28263	/* set the transfer length */
28264	nblocks = mode2->cdread_buflen / 2336;
28265	cdb[4] = (uchar_t)nblocks & 0xFF;
28266
28267	/* build command */
28268	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28269	com->uscsi_cdb = (caddr_t)cdb;
28270	com->uscsi_cdblen = sizeof (cdb);
28271	com->uscsi_bufaddr = mode2->cdread_bufaddr;
28272	com->uscsi_buflen = mode2->cdread_buflen;
28273	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28274
28275	/*
28276	 * Issue SCSI command with user space address for read buffer.
28277	 *
28278	 * This sends the command through main channel in the driver.
28279	 *
28280	 * Since this is accessed via an IOCTL call, we go through the
28281	 * standard path, so that if the device was powered down, then
28282	 * it would be 'awakened' to handle the command.
28283	 */
28284	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
28285	    SD_PATH_STANDARD);
28286
28287	kmem_free(com, sizeof (*com));
28288
28289	/* Restore the device and soft state target block size */
28290	if (sr_sector_mode(dev, restore_blksize) != 0) {
28291		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28292		    "can't do switch back to mode 1\n");
28293		/*
28294		 * If sd_send_scsi_READ succeeded we still need to report
28295		 * an error because we failed to reset the block size
28296		 */
28297		if (rval == 0) {
28298			rval = EIO;
28299		}
28300	}
28301
28302done:
28303	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28304	    "sd_read_mode2: exit: un:0x%p\n", un);
28305
28306	return (rval);
28307}
28308
28309
28310/*
28311 *    Function: sr_sector_mode()
28312 *
28313 * Description: This utility function is used by sr_read_mode2 to set the target
28314 *		block size based on the user specified size. This is a legacy
28315 *		implementation based upon a vendor specific mode page
28316 *
28317 *   Arguments: dev	- the device 'dev_t'
28318 *		data	- flag indicating if block size is being set to 2336 or
28319 *			  512.
28320 *
28321 * Return Code: the code returned by sd_send_scsi_cmd()
28322 *		EFAULT if ddi_copyxxx() fails
28323 *		ENXIO if fail ddi_get_soft_state
28324 *		EINVAL if data pointer is NULL
28325 */
28326
28327static int
28328sr_sector_mode(dev_t dev, uint32_t blksize)
28329{
28330	struct sd_lun	*un;
28331	uchar_t		*sense;
28332	uchar_t		*select;
28333	int		rval;
28334	sd_ssc_t	*ssc;
28335
28336	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28337	    (un->un_state == SD_STATE_OFFLINE)) {
28338		return (ENXIO);
28339	}
28340
28341	sense = kmem_zalloc(20, KM_SLEEP);
28342
28343	/* Note: This is a vendor specific mode page (0x81) */
28344	ssc = sd_ssc_init(un);
28345	rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, sense, 20, 0x81,
28346	    SD_PATH_STANDARD);
28347	sd_ssc_fini(ssc);
28348	if (rval != 0) {
28349		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
28350		    "sr_sector_mode: Mode Sense failed\n");
28351		kmem_free(sense, 20);
28352		return (rval);
28353	}
28354	select = kmem_zalloc(20, KM_SLEEP);
28355	select[3] = 0x08;
28356	select[10] = ((blksize >> 8) & 0xff);
28357	select[11] = (blksize & 0xff);
28358	select[12] = 0x01;
28359	select[13] = 0x06;
28360	select[14] = sense[14];
28361	select[15] = sense[15];
28362	if (blksize == SD_MODE2_BLKSIZE) {
28363		select[14] |= 0x01;
28364	}
28365
28366	ssc = sd_ssc_init(un);
28367	rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0, select, 20,
28368	    SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
28369	sd_ssc_fini(ssc);
28370	if (rval != 0) {
28371		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
28372		    "sr_sector_mode: Mode Select failed\n");
28373	} else {
28374		/*
28375		 * Only update the softstate block size if we successfully
28376		 * changed the device block mode.
28377		 */
28378		mutex_enter(SD_MUTEX(un));
28379		sd_update_block_info(un, blksize, 0);
28380		mutex_exit(SD_MUTEX(un));
28381	}
28382	kmem_free(sense, 20);
28383	kmem_free(select, 20);
28384	return (rval);
28385}
28386
28387
28388/*
28389 *    Function: sr_read_cdda()
28390 *
28391 * Description: This routine is the driver entry point for handling CD-ROM
28392 *		ioctl requests to return CD-DA or subcode data. (CDROMCDDA) If
28393 *		the target supports CDDA these requests are handled via a vendor
28394 *		specific command (0xD8) If the target does not support CDDA
28395 *		these requests are handled via the READ CD command (0xBE).
28396 *
28397 *   Arguments: dev	- the device 'dev_t'
28398 *		data	- pointer to user provided CD-DA structure specifying
28399 *			  the track starting address, transfer length, and
28400 *			  subcode options.
28401 *		flag	- this argument is a pass through to ddi_copyxxx()
28402 *			  directly from the mode argument of ioctl().
28403 *
28404 * Return Code: the code returned by sd_send_scsi_cmd()
28405 *		EFAULT if ddi_copyxxx() fails
28406 *		ENXIO if fail ddi_get_soft_state
28407 *		EINVAL if invalid arguments are provided
28408 *		ENOTTY
28409 */
28410
28411static int
28412sr_read_cdda(dev_t dev, caddr_t data, int flag)
28413{
28414	struct sd_lun			*un;
28415	struct uscsi_cmd		*com;
28416	struct cdrom_cdda		*cdda;
28417	int				rval;
28418	size_t				buflen;
28419	char				cdb[CDB_GROUP5];
28420
28421#ifdef _MULTI_DATAMODEL
28422	/* To support ILP32 applications in an LP64 world */
28423	struct cdrom_cdda32	cdrom_cdda32;
28424	struct cdrom_cdda32	*cdda32 = &cdrom_cdda32;
28425#endif /* _MULTI_DATAMODEL */
28426
28427	if (data == NULL) {
28428		return (EINVAL);
28429	}
28430
28431	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
28432		return (ENXIO);
28433	}
28434
28435	cdda = kmem_zalloc(sizeof (struct cdrom_cdda), KM_SLEEP);
28436
28437#ifdef _MULTI_DATAMODEL
28438	switch (ddi_model_convert_from(flag & FMODELS)) {
28439	case DDI_MODEL_ILP32:
28440		if (ddi_copyin(data, cdda32, sizeof (*cdda32), flag)) {
28441			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28442			    "sr_read_cdda: ddi_copyin Failed\n");
28443			kmem_free(cdda, sizeof (struct cdrom_cdda));
28444			return (EFAULT);
28445		}
28446		/* Convert the ILP32 uscsi data from the application to LP64 */
28447		cdrom_cdda32tocdrom_cdda(cdda32, cdda);
28448		break;
28449	case DDI_MODEL_NONE:
28450		if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
28451			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28452			    "sr_read_cdda: ddi_copyin Failed\n");
28453			kmem_free(cdda, sizeof (struct cdrom_cdda));
28454			return (EFAULT);
28455		}
28456		break;
28457	}
28458#else /* ! _MULTI_DATAMODEL */
28459	if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
28460		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28461		    "sr_read_cdda: ddi_copyin Failed\n");
28462		kmem_free(cdda, sizeof (struct cdrom_cdda));
28463		return (EFAULT);
28464	}
28465#endif /* _MULTI_DATAMODEL */
28466
28467	/*
28468	 * Since MMC-2 expects max 3 bytes for length, check if the
28469	 * length input is greater than 3 bytes
28470	 */
28471	if ((cdda->cdda_length & 0xFF000000) != 0) {
28472		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdda: "
28473		    "cdrom transfer length too large: %d (limit %d)\n",
28474		    cdda->cdda_length, 0xFFFFFF);
28475		kmem_free(cdda, sizeof (struct cdrom_cdda));
28476		return (EINVAL);
28477	}
28478
28479	switch (cdda->cdda_subcode) {
28480	case CDROM_DA_NO_SUBCODE:
28481		buflen = CDROM_BLK_2352 * cdda->cdda_length;
28482		break;
28483	case CDROM_DA_SUBQ:
28484		buflen = CDROM_BLK_2368 * cdda->cdda_length;
28485		break;
28486	case CDROM_DA_ALL_SUBCODE:
28487		buflen = CDROM_BLK_2448 * cdda->cdda_length;
28488		break;
28489	case CDROM_DA_SUBCODE_ONLY:
28490		buflen = CDROM_BLK_SUBCODE * cdda->cdda_length;
28491		break;
28492	default:
28493		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28494		    "sr_read_cdda: Subcode '0x%x' Not Supported\n",
28495		    cdda->cdda_subcode);
28496		kmem_free(cdda, sizeof (struct cdrom_cdda));
28497		return (EINVAL);
28498	}
28499
28500	/* Build and send the command */
28501	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28502	bzero(cdb, CDB_GROUP5);
28503
28504	if (un->un_f_cfg_cdda == TRUE) {
28505		cdb[0] = (char)SCMD_READ_CD;
28506		cdb[1] = 0x04;
28507		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
28508		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
28509		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
28510		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
28511		cdb[6] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
28512		cdb[7] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
28513		cdb[8] = ((cdda->cdda_length) & 0x000000ff);
28514		cdb[9] = 0x10;
28515		switch (cdda->cdda_subcode) {
28516		case CDROM_DA_NO_SUBCODE :
28517			cdb[10] = 0x0;
28518			break;
28519		case CDROM_DA_SUBQ :
28520			cdb[10] = 0x2;
28521			break;
28522		case CDROM_DA_ALL_SUBCODE :
28523			cdb[10] = 0x1;
28524			break;
28525		case CDROM_DA_SUBCODE_ONLY :
28526			/* FALLTHROUGH */
28527		default :
28528			kmem_free(cdda, sizeof (struct cdrom_cdda));
28529			kmem_free(com, sizeof (*com));
28530			return (ENOTTY);
28531		}
28532	} else {
28533		cdb[0] = (char)SCMD_READ_CDDA;
28534		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
28535		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
28536		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
28537		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
28538		cdb[6] = (((cdda->cdda_length) & 0xff000000) >> 24);
28539		cdb[7] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
28540		cdb[8] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
28541		cdb[9] = ((cdda->cdda_length) & 0x000000ff);
28542		cdb[10] = cdda->cdda_subcode;
28543	}
28544
28545	com->uscsi_cdb = cdb;
28546	com->uscsi_cdblen = CDB_GROUP5;
28547	com->uscsi_bufaddr = (caddr_t)cdda->cdda_data;
28548	com->uscsi_buflen = buflen;
28549	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28550
28551	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
28552	    SD_PATH_STANDARD);
28553
28554	kmem_free(cdda, sizeof (struct cdrom_cdda));
28555	kmem_free(com, sizeof (*com));
28556	return (rval);
28557}
28558
28559
28560/*
28561 *    Function: sr_read_cdxa()
28562 *
28563 * Description: This routine is the driver entry point for handling CD-ROM
28564 *		ioctl requests to return CD-XA (Extended Architecture) data.
28565 *		(CDROMCDXA).
28566 *
28567 *   Arguments: dev	- the device 'dev_t'
28568 *		data	- pointer to user provided CD-XA structure specifying
28569 *			  the data starting address, transfer length, and format
28570 *		flag	- this argument is a pass through to ddi_copyxxx()
28571 *			  directly from the mode argument of ioctl().
28572 *
28573 * Return Code: the code returned by sd_send_scsi_cmd()
28574 *		EFAULT if ddi_copyxxx() fails
28575 *		ENXIO if fail ddi_get_soft_state
28576 *		EINVAL if data pointer is NULL
28577 */
28578
28579static int
28580sr_read_cdxa(dev_t dev, caddr_t data, int flag)
28581{
28582	struct sd_lun		*un;
28583	struct uscsi_cmd	*com;
28584	struct cdrom_cdxa	*cdxa;
28585	int			rval;
28586	size_t			buflen;
28587	char			cdb[CDB_GROUP5];
28588	uchar_t			read_flags;
28589
28590#ifdef _MULTI_DATAMODEL
28591	/* To support ILP32 applications in an LP64 world */
28592	struct cdrom_cdxa32		cdrom_cdxa32;
28593	struct cdrom_cdxa32		*cdxa32 = &cdrom_cdxa32;
28594#endif /* _MULTI_DATAMODEL */
28595
28596	if (data == NULL) {
28597		return (EINVAL);
28598	}
28599
28600	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
28601		return (ENXIO);
28602	}
28603
28604	cdxa = kmem_zalloc(sizeof (struct cdrom_cdxa), KM_SLEEP);
28605
28606#ifdef _MULTI_DATAMODEL
28607	switch (ddi_model_convert_from(flag & FMODELS)) {
28608	case DDI_MODEL_ILP32:
28609		if (ddi_copyin(data, cdxa32, sizeof (*cdxa32), flag)) {
28610			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28611			return (EFAULT);
28612		}
28613		/*
28614		 * Convert the ILP32 uscsi data from the
28615		 * application to LP64 for internal use.
28616		 */
28617		cdrom_cdxa32tocdrom_cdxa(cdxa32, cdxa);
28618		break;
28619	case DDI_MODEL_NONE:
28620		if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
28621			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28622			return (EFAULT);
28623		}
28624		break;
28625	}
28626#else /* ! _MULTI_DATAMODEL */
28627	if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
28628		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28629		return (EFAULT);
28630	}
28631#endif /* _MULTI_DATAMODEL */
28632
28633	/*
28634	 * Since MMC-2 expects max 3 bytes for length, check if the
28635	 * length input is greater than 3 bytes
28636	 */
28637	if ((cdxa->cdxa_length & 0xFF000000) != 0) {
28638		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdxa: "
28639		    "cdrom transfer length too large: %d (limit %d)\n",
28640		    cdxa->cdxa_length, 0xFFFFFF);
28641		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28642		return (EINVAL);
28643	}
28644
28645	switch (cdxa->cdxa_format) {
28646	case CDROM_XA_DATA:
28647		buflen = CDROM_BLK_2048 * cdxa->cdxa_length;
28648		read_flags = 0x10;
28649		break;
28650	case CDROM_XA_SECTOR_DATA:
28651		buflen = CDROM_BLK_2352 * cdxa->cdxa_length;
28652		read_flags = 0xf8;
28653		break;
28654	case CDROM_XA_DATA_W_ERROR:
28655		buflen = CDROM_BLK_2646 * cdxa->cdxa_length;
28656		read_flags = 0xfc;
28657		break;
28658	default:
28659		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28660		    "sr_read_cdxa: Format '0x%x' Not Supported\n",
28661		    cdxa->cdxa_format);
28662		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28663		return (EINVAL);
28664	}
28665
28666	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28667	bzero(cdb, CDB_GROUP5);
28668	if (un->un_f_mmc_cap == TRUE) {
28669		cdb[0] = (char)SCMD_READ_CD;
28670		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
28671		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
28672		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
28673		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
28674		cdb[6] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
28675		cdb[7] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
28676		cdb[8] = ((cdxa->cdxa_length) & 0x000000ff);
28677		cdb[9] = (char)read_flags;
28678	} else {
28679		/*
28680		 * Note: A vendor specific command (0xDB) is being used her to
28681		 * request a read of all subcodes.
28682		 */
28683		cdb[0] = (char)SCMD_READ_CDXA;
28684		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
28685		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
28686		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
28687		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
28688		cdb[6] = (((cdxa->cdxa_length) & 0xff000000) >> 24);
28689		cdb[7] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
28690		cdb[8] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
28691		cdb[9] = ((cdxa->cdxa_length) & 0x000000ff);
28692		cdb[10] = cdxa->cdxa_format;
28693	}
28694	com->uscsi_cdb	   = cdb;
28695	com->uscsi_cdblen  = CDB_GROUP5;
28696	com->uscsi_bufaddr = (caddr_t)cdxa->cdxa_data;
28697	com->uscsi_buflen  = buflen;
28698	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28699	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
28700	    SD_PATH_STANDARD);
28701	kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28702	kmem_free(com, sizeof (*com));
28703	return (rval);
28704}
28705
28706
28707/*
28708 *    Function: sr_eject()
28709 *
28710 * Description: This routine is the driver entry point for handling CD-ROM
28711 *		eject ioctl requests (FDEJECT, DKIOCEJECT, CDROMEJECT)
28712 *
28713 *   Arguments: dev	- the device 'dev_t'
28714 *
28715 * Return Code: the code returned by sd_send_scsi_cmd()
28716 */
28717
28718static int
28719sr_eject(dev_t dev)
28720{
28721	struct sd_lun	*un;
28722	int		rval;
28723	sd_ssc_t	*ssc;
28724
28725	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28726	    (un->un_state == SD_STATE_OFFLINE)) {
28727		return (ENXIO);
28728	}
28729
28730	/*
28731	 * To prevent race conditions with the eject
28732	 * command, keep track of an eject command as
28733	 * it progresses. If we are already handling
28734	 * an eject command in the driver for the given
28735	 * unit and another request to eject is received
28736	 * immediately return EAGAIN so we don't lose
28737	 * the command if the current eject command fails.
28738	 */
28739	mutex_enter(SD_MUTEX(un));
28740	if (un->un_f_ejecting == TRUE) {
28741		mutex_exit(SD_MUTEX(un));
28742		return (EAGAIN);
28743	}
28744	un->un_f_ejecting = TRUE;
28745	mutex_exit(SD_MUTEX(un));
28746
28747	ssc = sd_ssc_init(un);
28748	rval = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_ALLOW,
28749	    SD_PATH_STANDARD);
28750	sd_ssc_fini(ssc);
28751
28752	if (rval != 0) {
28753		mutex_enter(SD_MUTEX(un));
28754		un->un_f_ejecting = FALSE;
28755		mutex_exit(SD_MUTEX(un));
28756		return (rval);
28757	}
28758
28759	ssc = sd_ssc_init(un);
28760	rval = sd_send_scsi_START_STOP_UNIT(ssc, SD_START_STOP,
28761	    SD_TARGET_EJECT, SD_PATH_STANDARD);
28762	sd_ssc_fini(ssc);
28763
28764	if (rval == 0) {
28765		mutex_enter(SD_MUTEX(un));
28766		sr_ejected(un);
28767		un->un_mediastate = DKIO_EJECTED;
28768		un->un_f_ejecting = FALSE;
28769		cv_broadcast(&un->un_state_cv);
28770		mutex_exit(SD_MUTEX(un));
28771	} else {
28772		mutex_enter(SD_MUTEX(un));
28773		un->un_f_ejecting = FALSE;
28774		mutex_exit(SD_MUTEX(un));
28775	}
28776	return (rval);
28777}
28778
28779
28780/*
28781 *    Function: sr_ejected()
28782 *
28783 * Description: This routine updates the soft state structure to invalidate the
28784 *		geometry information after the media has been ejected or a
28785 *		media eject has been detected.
28786 *
28787 *   Arguments: un - driver soft state (unit) structure
28788 */
28789
28790static void
28791sr_ejected(struct sd_lun *un)
28792{
28793	struct sd_errstats *stp;
28794
28795	ASSERT(un != NULL);
28796	ASSERT(mutex_owned(SD_MUTEX(un)));
28797
28798	un->un_f_blockcount_is_valid	= FALSE;
28799	un->un_f_tgt_blocksize_is_valid	= FALSE;
28800	mutex_exit(SD_MUTEX(un));
28801	cmlb_invalidate(un->un_cmlbhandle, (void *)SD_PATH_DIRECT_PRIORITY);
28802	mutex_enter(SD_MUTEX(un));
28803
28804	if (un->un_errstats != NULL) {
28805		stp = (struct sd_errstats *)un->un_errstats->ks_data;
28806		stp->sd_capacity.value.ui64 = 0;
28807	}
28808}
28809
28810
28811/*
28812 *    Function: sr_check_wp()
28813 *
28814 * Description: This routine checks the write protection of a removable
28815 *      media disk and hotpluggable devices via the write protect bit of
28816 *      the Mode Page Header device specific field. Some devices choke
28817 *      on unsupported mode page. In order to workaround this issue,
28818 *      this routine has been implemented to use 0x3f mode page(request
28819 *      for all pages) for all device types.
28820 *
28821 *   Arguments: dev             - the device 'dev_t'
28822 *
28823 * Return Code: int indicating if the device is write protected (1) or not (0)
28824 *
28825 *     Context: Kernel thread.
28826 *
28827 */
28828
28829static int
28830sr_check_wp(dev_t dev)
28831{
28832	struct sd_lun	*un;
28833	uchar_t		device_specific;
28834	uchar_t		*sense;
28835	int		hdrlen;
28836	int		rval = FALSE;
28837	int		status;
28838	sd_ssc_t	*ssc;
28839
28840	/*
28841	 * Note: The return codes for this routine should be reworked to
28842	 * properly handle the case of a NULL softstate.
28843	 */
28844	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
28845		return (FALSE);
28846	}
28847
28848	if (un->un_f_cfg_is_atapi == TRUE) {
28849		/*
28850		 * The mode page contents are not required; set the allocation
28851		 * length for the mode page header only
28852		 */
28853		hdrlen = MODE_HEADER_LENGTH_GRP2;
28854		sense = kmem_zalloc(hdrlen, KM_SLEEP);
28855		ssc = sd_ssc_init(un);
28856		status = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, sense, hdrlen,
28857		    MODEPAGE_ALLPAGES, SD_PATH_STANDARD);
28858		sd_ssc_fini(ssc);
28859		if (status != 0)
28860			goto err_exit;
28861		device_specific =
28862		    ((struct mode_header_grp2 *)sense)->device_specific;
28863	} else {
28864		hdrlen = MODE_HEADER_LENGTH;
28865		sense = kmem_zalloc(hdrlen, KM_SLEEP);
28866		ssc = sd_ssc_init(un);
28867		status = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, sense, hdrlen,
28868		    MODEPAGE_ALLPAGES, SD_PATH_STANDARD);
28869		sd_ssc_fini(ssc);
28870		if (status != 0)
28871			goto err_exit;
28872		device_specific =
28873		    ((struct mode_header *)sense)->device_specific;
28874	}
28875
28876
28877	/*
28878	 * Write protect mode sense failed; not all disks
28879	 * understand this query. Return FALSE assuming that
28880	 * these devices are not writable.
28881	 */
28882	if (device_specific & WRITE_PROTECT) {
28883		rval = TRUE;
28884	}
28885
28886err_exit:
28887	kmem_free(sense, hdrlen);
28888	return (rval);
28889}
28890
28891/*
28892 *    Function: sr_volume_ctrl()
28893 *
28894 * Description: This routine is the driver entry point for handling CD-ROM
28895 *		audio output volume ioctl requests. (CDROMVOLCTRL)
28896 *
28897 *   Arguments: dev	- the device 'dev_t'
28898 *		data	- pointer to user audio volume control structure
28899 *		flag	- this argument is a pass through to ddi_copyxxx()
28900 *			  directly from the mode argument of ioctl().
28901 *
28902 * Return Code: the code returned by sd_send_scsi_cmd()
28903 *		EFAULT if ddi_copyxxx() fails
28904 *		ENXIO if fail ddi_get_soft_state
28905 *		EINVAL if data pointer is NULL
28906 *
28907 */
28908
28909static int
28910sr_volume_ctrl(dev_t dev, caddr_t data, int flag)
28911{
28912	struct sd_lun		*un;
28913	struct cdrom_volctrl    volume;
28914	struct cdrom_volctrl    *vol = &volume;
28915	uchar_t			*sense_page;
28916	uchar_t			*select_page;
28917	uchar_t			*sense;
28918	uchar_t			*select;
28919	int			sense_buflen;
28920	int			select_buflen;
28921	int			rval;
28922	sd_ssc_t		*ssc;
28923
28924	if (data == NULL) {
28925		return (EINVAL);
28926	}
28927
28928	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28929	    (un->un_state == SD_STATE_OFFLINE)) {
28930		return (ENXIO);
28931	}
28932
28933	if (ddi_copyin(data, vol, sizeof (struct cdrom_volctrl), flag)) {
28934		return (EFAULT);
28935	}
28936
28937	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
28938		struct mode_header_grp2		*sense_mhp;
28939		struct mode_header_grp2		*select_mhp;
28940		int				bd_len;
28941
28942		sense_buflen = MODE_PARAM_LENGTH_GRP2 + MODEPAGE_AUDIO_CTRL_LEN;
28943		select_buflen = MODE_HEADER_LENGTH_GRP2 +
28944		    MODEPAGE_AUDIO_CTRL_LEN;
28945		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
28946		select = kmem_zalloc(select_buflen, KM_SLEEP);
28947		ssc = sd_ssc_init(un);
28948		rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, sense,
28949		    sense_buflen, MODEPAGE_AUDIO_CTRL,
28950		    SD_PATH_STANDARD);
28951		sd_ssc_fini(ssc);
28952
28953		if (rval != 0) {
28954			SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
28955			    "sr_volume_ctrl: Mode Sense Failed\n");
28956			kmem_free(sense, sense_buflen);
28957			kmem_free(select, select_buflen);
28958			return (rval);
28959		}
28960		sense_mhp = (struct mode_header_grp2 *)sense;
28961		select_mhp = (struct mode_header_grp2 *)select;
28962		bd_len = (sense_mhp->bdesc_length_hi << 8) |
28963		    sense_mhp->bdesc_length_lo;
28964		if (bd_len > MODE_BLK_DESC_LENGTH) {
28965			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28966			    "sr_volume_ctrl: Mode Sense returned invalid "
28967			    "block descriptor length\n");
28968			kmem_free(sense, sense_buflen);
28969			kmem_free(select, select_buflen);
28970			return (EIO);
28971		}
28972		sense_page = (uchar_t *)
28973		    (sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
28974		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH_GRP2);
28975		select_mhp->length_msb = 0;
28976		select_mhp->length_lsb = 0;
28977		select_mhp->bdesc_length_hi = 0;
28978		select_mhp->bdesc_length_lo = 0;
28979	} else {
28980		struct mode_header		*sense_mhp, *select_mhp;
28981
28982		sense_buflen = MODE_PARAM_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
28983		select_buflen = MODE_HEADER_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
28984		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
28985		select = kmem_zalloc(select_buflen, KM_SLEEP);
28986		ssc = sd_ssc_init(un);
28987		rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, sense,
28988		    sense_buflen, MODEPAGE_AUDIO_CTRL,
28989		    SD_PATH_STANDARD);
28990		sd_ssc_fini(ssc);
28991
28992		if (rval != 0) {
28993			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28994			    "sr_volume_ctrl: Mode Sense Failed\n");
28995			kmem_free(sense, sense_buflen);
28996			kmem_free(select, select_buflen);
28997			return (rval);
28998		}
28999		sense_mhp  = (struct mode_header *)sense;
29000		select_mhp = (struct mode_header *)select;
29001		if (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH) {
29002			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29003			    "sr_volume_ctrl: Mode Sense returned invalid "
29004			    "block descriptor length\n");
29005			kmem_free(sense, sense_buflen);
29006			kmem_free(select, select_buflen);
29007			return (EIO);
29008		}
29009		sense_page = (uchar_t *)
29010		    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
29011		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH);
29012		select_mhp->length = 0;
29013		select_mhp->bdesc_length = 0;
29014	}
29015	/*
29016	 * Note: An audio control data structure could be created and overlayed
29017	 * on the following in place of the array indexing method implemented.
29018	 */
29019
29020	/* Build the select data for the user volume data */
29021	select_page[0] = MODEPAGE_AUDIO_CTRL;
29022	select_page[1] = 0xE;
29023	/* Set the immediate bit */
29024	select_page[2] = 0x04;
29025	/* Zero out reserved fields */
29026	select_page[3] = 0x00;
29027	select_page[4] = 0x00;
29028	/* Return sense data for fields not to be modified */
29029	select_page[5] = sense_page[5];
29030	select_page[6] = sense_page[6];
29031	select_page[7] = sense_page[7];
29032	/* Set the user specified volume levels for channel 0 and 1 */
29033	select_page[8] = 0x01;
29034	select_page[9] = vol->channel0;
29035	select_page[10] = 0x02;
29036	select_page[11] = vol->channel1;
29037	/* Channel 2 and 3 are currently unsupported so return the sense data */
29038	select_page[12] = sense_page[12];
29039	select_page[13] = sense_page[13];
29040	select_page[14] = sense_page[14];
29041	select_page[15] = sense_page[15];
29042
29043	ssc = sd_ssc_init(un);
29044	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
29045		rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP1, select,
29046		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
29047	} else {
29048		rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0, select,
29049		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
29050	}
29051	sd_ssc_fini(ssc);
29052
29053	kmem_free(sense, sense_buflen);
29054	kmem_free(select, select_buflen);
29055	return (rval);
29056}
29057
29058
29059/*
29060 *    Function: sr_read_sony_session_offset()
29061 *
29062 * Description: This routine is the driver entry point for handling CD-ROM
29063 *		ioctl requests for session offset information. (CDROMREADOFFSET)
29064 *		The address of the first track in the last session of a
29065 *		multi-session CD-ROM is returned
29066 *
29067 *		Note: This routine uses a vendor specific key value in the
29068 *		command control field without implementing any vendor check here
29069 *		or in the ioctl routine.
29070 *
29071 *   Arguments: dev	- the device 'dev_t'
29072 *		data	- pointer to an int to hold the requested address
29073 *		flag	- this argument is a pass through to ddi_copyxxx()
29074 *			  directly from the mode argument of ioctl().
29075 *
29076 * Return Code: the code returned by sd_send_scsi_cmd()
29077 *		EFAULT if ddi_copyxxx() fails
29078 *		ENXIO if fail ddi_get_soft_state
29079 *		EINVAL if data pointer is NULL
29080 */
29081
29082static int
29083sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag)
29084{
29085	struct sd_lun		*un;
29086	struct uscsi_cmd	*com;
29087	caddr_t			buffer;
29088	char			cdb[CDB_GROUP1];
29089	int			session_offset = 0;
29090	int			rval;
29091
29092	if (data == NULL) {
29093		return (EINVAL);
29094	}
29095
29096	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
29097	    (un->un_state == SD_STATE_OFFLINE)) {
29098		return (ENXIO);
29099	}
29100
29101	buffer = kmem_zalloc((size_t)SONY_SESSION_OFFSET_LEN, KM_SLEEP);
29102	bzero(cdb, CDB_GROUP1);
29103	cdb[0] = SCMD_READ_TOC;
29104	/*
29105	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
29106	 * (4 byte TOC response header + 8 byte response data)
29107	 */
29108	cdb[8] = SONY_SESSION_OFFSET_LEN;
29109	/* Byte 9 is the control byte. A vendor specific value is used */
29110	cdb[9] = SONY_SESSION_OFFSET_KEY;
29111	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
29112	com->uscsi_cdb = cdb;
29113	com->uscsi_cdblen = CDB_GROUP1;
29114	com->uscsi_bufaddr = buffer;
29115	com->uscsi_buflen = SONY_SESSION_OFFSET_LEN;
29116	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
29117
29118	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
29119	    SD_PATH_STANDARD);
29120	if (rval != 0) {
29121		kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
29122		kmem_free(com, sizeof (*com));
29123		return (rval);
29124	}
29125	if (buffer[1] == SONY_SESSION_OFFSET_VALID) {
29126		session_offset =
29127		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
29128		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
29129		/*
29130		 * Offset returned offset in current lbasize block's. Convert to
29131		 * 2k block's to return to the user
29132		 */
29133		if (un->un_tgt_blocksize == CDROM_BLK_512) {
29134			session_offset >>= 2;
29135		} else if (un->un_tgt_blocksize == CDROM_BLK_1024) {
29136			session_offset >>= 1;
29137		}
29138	}
29139
29140	if (ddi_copyout(&session_offset, data, sizeof (int), flag) != 0) {
29141		rval = EFAULT;
29142	}
29143
29144	kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
29145	kmem_free(com, sizeof (*com));
29146	return (rval);
29147}
29148
29149
29150/*
29151 *    Function: sd_wm_cache_constructor()
29152 *
29153 * Description: Cache Constructor for the wmap cache for the read/modify/write
29154 * 		devices.
29155 *
29156 *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
29157 *		un	- sd_lun structure for the device.
29158 *		flag	- the km flags passed to constructor
29159 *
29160 * Return Code: 0 on success.
29161 *		-1 on failure.
29162 */
29163
29164/*ARGSUSED*/
29165static int
29166sd_wm_cache_constructor(void *wm, void *un, int flags)
29167{
29168	bzero(wm, sizeof (struct sd_w_map));
29169	cv_init(&((struct sd_w_map *)wm)->wm_avail, NULL, CV_DRIVER, NULL);
29170	return (0);
29171}
29172
29173
29174/*
29175 *    Function: sd_wm_cache_destructor()
29176 *
29177 * Description: Cache destructor for the wmap cache for the read/modify/write
29178 * 		devices.
29179 *
29180 *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
29181 *		un	- sd_lun structure for the device.
29182 */
29183/*ARGSUSED*/
29184static void
29185sd_wm_cache_destructor(void *wm, void *un)
29186{
29187	cv_destroy(&((struct sd_w_map *)wm)->wm_avail);
29188}
29189
29190
29191/*
29192 *    Function: sd_range_lock()
29193 *
29194 * Description: Lock the range of blocks specified as parameter to ensure
29195 *		that read, modify write is atomic and no other i/o writes
29196 *		to the same location. The range is specified in terms
29197 *		of start and end blocks. Block numbers are the actual
29198 *		media block numbers and not system.
29199 *
29200 *   Arguments: un	- sd_lun structure for the device.
29201 *		startb - The starting block number
29202 *		endb - The end block number
29203 *		typ - type of i/o - simple/read_modify_write
29204 *
29205 * Return Code: wm  - pointer to the wmap structure.
29206 *
29207 *     Context: This routine can sleep.
29208 */
29209
29210static struct sd_w_map *
29211sd_range_lock(struct sd_lun *un, daddr_t startb, daddr_t endb, ushort_t typ)
29212{
29213	struct sd_w_map *wmp = NULL;
29214	struct sd_w_map *sl_wmp = NULL;
29215	struct sd_w_map *tmp_wmp;
29216	wm_state state = SD_WM_CHK_LIST;
29217
29218
29219	ASSERT(un != NULL);
29220	ASSERT(!mutex_owned(SD_MUTEX(un)));
29221
29222	mutex_enter(SD_MUTEX(un));
29223
29224	while (state != SD_WM_DONE) {
29225
29226		switch (state) {
29227		case SD_WM_CHK_LIST:
29228			/*
29229			 * This is the starting state. Check the wmap list
29230			 * to see if the range is currently available.
29231			 */
29232			if (!(typ & SD_WTYPE_RMW) && !(un->un_rmw_count)) {
29233				/*
29234				 * If this is a simple write and no rmw
29235				 * i/o is pending then try to lock the
29236				 * range as the range should be available.
29237				 */
29238				state = SD_WM_LOCK_RANGE;
29239			} else {
29240				tmp_wmp = sd_get_range(un, startb, endb);
29241				if (tmp_wmp != NULL) {
29242					if ((wmp != NULL) && ONLIST(un, wmp)) {
29243						/*
29244						 * Should not keep onlist wmps
29245						 * while waiting this macro
29246						 * will also do wmp = NULL;
29247						 */
29248						FREE_ONLIST_WMAP(un, wmp);
29249					}
29250					/*
29251					 * sl_wmp is the wmap on which wait
29252					 * is done, since the tmp_wmp points
29253					 * to the inuse wmap, set sl_wmp to
29254					 * tmp_wmp and change the state to sleep
29255					 */
29256					sl_wmp = tmp_wmp;
29257					state = SD_WM_WAIT_MAP;
29258				} else {
29259					state = SD_WM_LOCK_RANGE;
29260				}
29261
29262			}
29263			break;
29264
29265		case SD_WM_LOCK_RANGE:
29266			ASSERT(un->un_wm_cache);
29267			/*
29268			 * The range need to be locked, try to get a wmap.
29269			 * First attempt it with NO_SLEEP, want to avoid a sleep
29270			 * if possible as we will have to release the sd mutex
29271			 * if we have to sleep.
29272			 */
29273			if (wmp == NULL)
29274				wmp = kmem_cache_alloc(un->un_wm_cache,
29275				    KM_NOSLEEP);
29276			if (wmp == NULL) {
29277				mutex_exit(SD_MUTEX(un));
29278				_NOTE(DATA_READABLE_WITHOUT_LOCK
29279				    (sd_lun::un_wm_cache))
29280				wmp = kmem_cache_alloc(un->un_wm_cache,
29281				    KM_SLEEP);
29282				mutex_enter(SD_MUTEX(un));
29283				/*
29284				 * we released the mutex so recheck and go to
29285				 * check list state.
29286				 */
29287				state = SD_WM_CHK_LIST;
29288			} else {
29289				/*
29290				 * We exit out of state machine since we
29291				 * have the wmap. Do the housekeeping first.
29292				 * place the wmap on the wmap list if it is not
29293				 * on it already and then set the state to done.
29294				 */
29295				wmp->wm_start = startb;
29296				wmp->wm_end = endb;
29297				wmp->wm_flags = typ | SD_WM_BUSY;
29298				if (typ & SD_WTYPE_RMW) {
29299					un->un_rmw_count++;
29300				}
29301				/*
29302				 * If not already on the list then link
29303				 */
29304				if (!ONLIST(un, wmp)) {
29305					wmp->wm_next = un->un_wm;
29306					wmp->wm_prev = NULL;
29307					if (wmp->wm_next)
29308						wmp->wm_next->wm_prev = wmp;
29309					un->un_wm = wmp;
29310				}
29311				state = SD_WM_DONE;
29312			}
29313			break;
29314
29315		case SD_WM_WAIT_MAP:
29316			ASSERT(sl_wmp->wm_flags & SD_WM_BUSY);
29317			/*
29318			 * Wait is done on sl_wmp, which is set in the
29319			 * check_list state.
29320			 */
29321			sl_wmp->wm_wanted_count++;
29322			cv_wait(&sl_wmp->wm_avail, SD_MUTEX(un));
29323			sl_wmp->wm_wanted_count--;
29324			/*
29325			 * We can reuse the memory from the completed sl_wmp
29326			 * lock range for our new lock, but only if noone is
29327			 * waiting for it.
29328			 */
29329			ASSERT(!(sl_wmp->wm_flags & SD_WM_BUSY));
29330			if (sl_wmp->wm_wanted_count == 0) {
29331				if (wmp != NULL)
29332					CHK_N_FREEWMP(un, wmp);
29333				wmp = sl_wmp;
29334			}
29335			sl_wmp = NULL;
29336			/*
29337			 * After waking up, need to recheck for availability of
29338			 * range.
29339			 */
29340			state = SD_WM_CHK_LIST;
29341			break;
29342
29343		default:
29344			panic("sd_range_lock: "
29345			    "Unknown state %d in sd_range_lock", state);
29346			/*NOTREACHED*/
29347		} /* switch(state) */
29348
29349	} /* while(state != SD_WM_DONE) */
29350
29351	mutex_exit(SD_MUTEX(un));
29352
29353	ASSERT(wmp != NULL);
29354
29355	return (wmp);
29356}
29357
29358
29359/*
29360 *    Function: sd_get_range()
29361 *
29362 * Description: Find if there any overlapping I/O to this one
29363 *		Returns the write-map of 1st such I/O, NULL otherwise.
29364 *
29365 *   Arguments: un	- sd_lun structure for the device.
29366 *		startb - The starting block number
29367 *		endb - The end block number
29368 *
29369 * Return Code: wm  - pointer to the wmap structure.
29370 */
29371
29372static struct sd_w_map *
29373sd_get_range(struct sd_lun *un, daddr_t startb, daddr_t endb)
29374{
29375	struct sd_w_map *wmp;
29376
29377	ASSERT(un != NULL);
29378
29379	for (wmp = un->un_wm; wmp != NULL; wmp = wmp->wm_next) {
29380		if (!(wmp->wm_flags & SD_WM_BUSY)) {
29381			continue;
29382		}
29383		if ((startb >= wmp->wm_start) && (startb <= wmp->wm_end)) {
29384			break;
29385		}
29386		if ((endb >= wmp->wm_start) && (endb <= wmp->wm_end)) {
29387			break;
29388		}
29389	}
29390
29391	return (wmp);
29392}
29393
29394
29395/*
29396 *    Function: sd_free_inlist_wmap()
29397 *
29398 * Description: Unlink and free a write map struct.
29399 *
29400 *   Arguments: un      - sd_lun structure for the device.
29401 *		wmp	- sd_w_map which needs to be unlinked.
29402 */
29403
29404static void
29405sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp)
29406{
29407	ASSERT(un != NULL);
29408
29409	if (un->un_wm == wmp) {
29410		un->un_wm = wmp->wm_next;
29411	} else {
29412		wmp->wm_prev->wm_next = wmp->wm_next;
29413	}
29414
29415	if (wmp->wm_next) {
29416		wmp->wm_next->wm_prev = wmp->wm_prev;
29417	}
29418
29419	wmp->wm_next = wmp->wm_prev = NULL;
29420
29421	kmem_cache_free(un->un_wm_cache, wmp);
29422}
29423
29424
29425/*
29426 *    Function: sd_range_unlock()
29427 *
29428 * Description: Unlock the range locked by wm.
29429 *		Free write map if nobody else is waiting on it.
29430 *
29431 *   Arguments: un      - sd_lun structure for the device.
29432 *              wmp     - sd_w_map which needs to be unlinked.
29433 */
29434
29435static void
29436sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm)
29437{
29438	ASSERT(un != NULL);
29439	ASSERT(wm != NULL);
29440	ASSERT(!mutex_owned(SD_MUTEX(un)));
29441
29442	mutex_enter(SD_MUTEX(un));
29443
29444	if (wm->wm_flags & SD_WTYPE_RMW) {
29445		un->un_rmw_count--;
29446	}
29447
29448	if (wm->wm_wanted_count) {
29449		wm->wm_flags = 0;
29450		/*
29451		 * Broadcast that the wmap is available now.
29452		 */
29453		cv_broadcast(&wm->wm_avail);
29454	} else {
29455		/*
29456		 * If no one is waiting on the map, it should be free'ed.
29457		 */
29458		sd_free_inlist_wmap(un, wm);
29459	}
29460
29461	mutex_exit(SD_MUTEX(un));
29462}
29463
29464
29465/*
29466 *    Function: sd_read_modify_write_task
29467 *
29468 * Description: Called from a taskq thread to initiate the write phase of
29469 *		a read-modify-write request.  This is used for targets where
29470 *		un->un_sys_blocksize != un->un_tgt_blocksize.
29471 *
29472 *   Arguments: arg - a pointer to the buf(9S) struct for the write command.
29473 *
29474 *     Context: Called under taskq thread context.
29475 */
29476
29477static void
29478sd_read_modify_write_task(void *arg)
29479{
29480	struct sd_mapblocksize_info	*bsp;
29481	struct buf	*bp;
29482	struct sd_xbuf	*xp;
29483	struct sd_lun	*un;
29484
29485	bp = arg;	/* The bp is given in arg */
29486	ASSERT(bp != NULL);
29487
29488	/* Get the pointer to the layer-private data struct */
29489	xp = SD_GET_XBUF(bp);
29490	ASSERT(xp != NULL);
29491	bsp = xp->xb_private;
29492	ASSERT(bsp != NULL);
29493
29494	un = SD_GET_UN(bp);
29495	ASSERT(un != NULL);
29496	ASSERT(!mutex_owned(SD_MUTEX(un)));
29497
29498	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
29499	    "sd_read_modify_write_task: entry: buf:0x%p\n", bp);
29500
29501	/*
29502	 * This is the write phase of a read-modify-write request, called
29503	 * under the context of a taskq thread in response to the completion
29504	 * of the read portion of the rmw request completing under interrupt
29505	 * context. The write request must be sent from here down the iostart
29506	 * chain as if it were being sent from sd_mapblocksize_iostart(), so
29507	 * we use the layer index saved in the layer-private data area.
29508	 */
29509	SD_NEXT_IOSTART(bsp->mbs_layer_index, un, bp);
29510
29511	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
29512	    "sd_read_modify_write_task: exit: buf:0x%p\n", bp);
29513}
29514
29515
29516/*
29517 *    Function: sddump_do_read_of_rmw()
29518 *
29519 * Description: This routine will be called from sddump, If sddump is called
29520 *		with an I/O which not aligned on device blocksize boundary
29521 *		then the write has to be converted to read-modify-write.
29522 *		Do the read part here in order to keep sddump simple.
29523 *		Note - That the sd_mutex is held across the call to this
29524 *		routine.
29525 *
29526 *   Arguments: un	- sd_lun
29527 *		blkno	- block number in terms of media block size.
29528 *		nblk	- number of blocks.
29529 *		bpp	- pointer to pointer to the buf structure. On return
29530 *			from this function, *bpp points to the valid buffer
29531 *			to which the write has to be done.
29532 *
29533 * Return Code: 0 for success or errno-type return code
29534 */
29535
29536static int
29537sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
29538	struct buf **bpp)
29539{
29540	int err;
29541	int i;
29542	int rval;
29543	struct buf *bp;
29544	struct scsi_pkt *pkt = NULL;
29545	uint32_t target_blocksize;
29546
29547	ASSERT(un != NULL);
29548	ASSERT(mutex_owned(SD_MUTEX(un)));
29549
29550	target_blocksize = un->un_tgt_blocksize;
29551
29552	mutex_exit(SD_MUTEX(un));
29553
29554	bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), (struct buf *)NULL,
29555	    (size_t)(nblk * target_blocksize), B_READ, NULL_FUNC, NULL);
29556	if (bp == NULL) {
29557		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29558		    "no resources for dumping; giving up");
29559		err = ENOMEM;
29560		goto done;
29561	}
29562
29563	rval = sd_setup_rw_pkt(un, &pkt, bp, 0, NULL_FUNC, NULL,
29564	    blkno, nblk);
29565	if (rval != 0) {
29566		scsi_free_consistent_buf(bp);
29567		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29568		    "no resources for dumping; giving up");
29569		err = ENOMEM;
29570		goto done;
29571	}
29572
29573	pkt->pkt_flags |= FLAG_NOINTR;
29574
29575	err = EIO;
29576	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
29577
29578		/*
29579		 * Scsi_poll returns 0 (success) if the command completes and
29580		 * the status block is STATUS_GOOD.  We should only check
29581		 * errors if this condition is not true.  Even then we should
29582		 * send our own request sense packet only if we have a check
29583		 * condition and auto request sense has not been performed by
29584		 * the hba.
29585		 */
29586		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending read\n");
29587
29588		if ((sd_scsi_poll(un, pkt) == 0) && (pkt->pkt_resid == 0)) {
29589			err = 0;
29590			break;
29591		}
29592
29593		/*
29594		 * Check CMD_DEV_GONE 1st, give up if device is gone,
29595		 * no need to read RQS data.
29596		 */
29597		if (pkt->pkt_reason == CMD_DEV_GONE) {
29598			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29599			    "Error while dumping state with rmw..."
29600			    "Device is gone\n");
29601			break;
29602		}
29603
29604		if (SD_GET_PKT_STATUS(pkt) == STATUS_CHECK) {
29605			SD_INFO(SD_LOG_DUMP, un,
29606			    "sddump: read failed with CHECK, try # %d\n", i);
29607			if (((pkt->pkt_state & STATE_ARQ_DONE) == 0)) {
29608				(void) sd_send_polled_RQS(un);
29609			}
29610
29611			continue;
29612		}
29613
29614		if (SD_GET_PKT_STATUS(pkt) == STATUS_BUSY) {
29615			int reset_retval = 0;
29616
29617			SD_INFO(SD_LOG_DUMP, un,
29618			    "sddump: read failed with BUSY, try # %d\n", i);
29619
29620			if (un->un_f_lun_reset_enabled == TRUE) {
29621				reset_retval = scsi_reset(SD_ADDRESS(un),
29622				    RESET_LUN);
29623			}
29624			if (reset_retval == 0) {
29625				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
29626			}
29627			(void) sd_send_polled_RQS(un);
29628
29629		} else {
29630			SD_INFO(SD_LOG_DUMP, un,
29631			    "sddump: read failed with 0x%x, try # %d\n",
29632			    SD_GET_PKT_STATUS(pkt), i);
29633			mutex_enter(SD_MUTEX(un));
29634			sd_reset_target(un, pkt);
29635			mutex_exit(SD_MUTEX(un));
29636		}
29637
29638		/*
29639		 * If we are not getting anywhere with lun/target resets,
29640		 * let's reset the bus.
29641		 */
29642		if (i > SD_NDUMP_RETRIES/2) {
29643			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
29644			(void) sd_send_polled_RQS(un);
29645		}
29646
29647	}
29648	scsi_destroy_pkt(pkt);
29649
29650	if (err != 0) {
29651		scsi_free_consistent_buf(bp);
29652		*bpp = NULL;
29653	} else {
29654		*bpp = bp;
29655	}
29656
29657done:
29658	mutex_enter(SD_MUTEX(un));
29659	return (err);
29660}
29661
29662
29663/*
29664 *    Function: sd_failfast_flushq
29665 *
29666 * Description: Take all bp's on the wait queue that have B_FAILFAST set
29667 *		in b_flags and move them onto the failfast queue, then kick
29668 *		off a thread to return all bp's on the failfast queue to
29669 *		their owners with an error set.
29670 *
29671 *   Arguments: un - pointer to the soft state struct for the instance.
29672 *
29673 *     Context: may execute in interrupt context.
29674 */
29675
29676static void
29677sd_failfast_flushq(struct sd_lun *un)
29678{
29679	struct buf *bp;
29680	struct buf *next_waitq_bp;
29681	struct buf *prev_waitq_bp = NULL;
29682
29683	ASSERT(un != NULL);
29684	ASSERT(mutex_owned(SD_MUTEX(un)));
29685	ASSERT(un->un_failfast_state == SD_FAILFAST_ACTIVE);
29686	ASSERT(un->un_failfast_bp == NULL);
29687
29688	SD_TRACE(SD_LOG_IO_FAILFAST, un,
29689	    "sd_failfast_flushq: entry: un:0x%p\n", un);
29690
29691	/*
29692	 * Check if we should flush all bufs when entering failfast state, or
29693	 * just those with B_FAILFAST set.
29694	 */
29695	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) {
29696		/*
29697		 * Move *all* bp's on the wait queue to the failfast flush
29698		 * queue, including those that do NOT have B_FAILFAST set.
29699		 */
29700		if (un->un_failfast_headp == NULL) {
29701			ASSERT(un->un_failfast_tailp == NULL);
29702			un->un_failfast_headp = un->un_waitq_headp;
29703		} else {
29704			ASSERT(un->un_failfast_tailp != NULL);
29705			un->un_failfast_tailp->av_forw = un->un_waitq_headp;
29706		}
29707
29708		un->un_failfast_tailp = un->un_waitq_tailp;
29709
29710		/* update kstat for each bp moved out of the waitq */
29711		for (bp = un->un_waitq_headp; bp != NULL; bp = bp->av_forw) {
29712			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
29713		}
29714
29715		/* empty the waitq */
29716		un->un_waitq_headp = un->un_waitq_tailp = NULL;
29717
29718	} else {
29719		/*
29720		 * Go thru the wait queue, pick off all entries with
29721		 * B_FAILFAST set, and move these onto the failfast queue.
29722		 */
29723		for (bp = un->un_waitq_headp; bp != NULL; bp = next_waitq_bp) {
29724			/*
29725			 * Save the pointer to the next bp on the wait queue,
29726			 * so we get to it on the next iteration of this loop.
29727			 */
29728			next_waitq_bp = bp->av_forw;
29729
29730			/*
29731			 * If this bp from the wait queue does NOT have
29732			 * B_FAILFAST set, just move on to the next element
29733			 * in the wait queue. Note, this is the only place
29734			 * where it is correct to set prev_waitq_bp.
29735			 */
29736			if ((bp->b_flags & B_FAILFAST) == 0) {
29737				prev_waitq_bp = bp;
29738				continue;
29739			}
29740
29741			/*
29742			 * Remove the bp from the wait queue.
29743			 */
29744			if (bp == un->un_waitq_headp) {
29745				/* The bp is the first element of the waitq. */
29746				un->un_waitq_headp = next_waitq_bp;
29747				if (un->un_waitq_headp == NULL) {
29748					/* The wait queue is now empty */
29749					un->un_waitq_tailp = NULL;
29750				}
29751			} else {
29752				/*
29753				 * The bp is either somewhere in the middle
29754				 * or at the end of the wait queue.
29755				 */
29756				ASSERT(un->un_waitq_headp != NULL);
29757				ASSERT(prev_waitq_bp != NULL);
29758				ASSERT((prev_waitq_bp->b_flags & B_FAILFAST)
29759				    == 0);
29760				if (bp == un->un_waitq_tailp) {
29761					/* bp is the last entry on the waitq. */
29762					ASSERT(next_waitq_bp == NULL);
29763					un->un_waitq_tailp = prev_waitq_bp;
29764				}
29765				prev_waitq_bp->av_forw = next_waitq_bp;
29766			}
29767			bp->av_forw = NULL;
29768
29769			/*
29770			 * update kstat since the bp is moved out of
29771			 * the waitq
29772			 */
29773			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
29774
29775			/*
29776			 * Now put the bp onto the failfast queue.
29777			 */
29778			if (un->un_failfast_headp == NULL) {
29779				/* failfast queue is currently empty */
29780				ASSERT(un->un_failfast_tailp == NULL);
29781				un->un_failfast_headp =
29782				    un->un_failfast_tailp = bp;
29783			} else {
29784				/* Add the bp to the end of the failfast q */
29785				ASSERT(un->un_failfast_tailp != NULL);
29786				ASSERT(un->un_failfast_tailp->b_flags &
29787				    B_FAILFAST);
29788				un->un_failfast_tailp->av_forw = bp;
29789				un->un_failfast_tailp = bp;
29790			}
29791		}
29792	}
29793
29794	/*
29795	 * Now return all bp's on the failfast queue to their owners.
29796	 */
29797	while ((bp = un->un_failfast_headp) != NULL) {
29798
29799		un->un_failfast_headp = bp->av_forw;
29800		if (un->un_failfast_headp == NULL) {
29801			un->un_failfast_tailp = NULL;
29802		}
29803
29804		/*
29805		 * We want to return the bp with a failure error code, but
29806		 * we do not want a call to sd_start_cmds() to occur here,
29807		 * so use sd_return_failed_command_no_restart() instead of
29808		 * sd_return_failed_command().
29809		 */
29810		sd_return_failed_command_no_restart(un, bp, EIO);
29811	}
29812
29813	/* Flush the xbuf queues if required. */
29814	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_QUEUES) {
29815		ddi_xbuf_flushq(un->un_xbuf_attr, sd_failfast_flushq_callback);
29816	}
29817
29818	SD_TRACE(SD_LOG_IO_FAILFAST, un,
29819	    "sd_failfast_flushq: exit: un:0x%p\n", un);
29820}
29821
29822
29823/*
29824 *    Function: sd_failfast_flushq_callback
29825 *
29826 * Description: Return TRUE if the given bp meets the criteria for failfast
29827 *		flushing. Used with ddi_xbuf_flushq(9F).
29828 *
29829 *   Arguments: bp - ptr to buf struct to be examined.
29830 *
29831 *     Context: Any
29832 */
29833
29834static int
29835sd_failfast_flushq_callback(struct buf *bp)
29836{
29837	/*
29838	 * Return TRUE if (1) we want to flush ALL bufs when the failfast
29839	 * state is entered; OR (2) the given bp has B_FAILFAST set.
29840	 */
29841	return (((sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) ||
29842	    (bp->b_flags & B_FAILFAST)) ? TRUE : FALSE);
29843}
29844
29845
29846
29847/*
29848 * Function: sd_setup_next_xfer
29849 *
29850 * Description: Prepare next I/O operation using DMA_PARTIAL
29851 *
29852 */
29853
29854static int
29855sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
29856    struct scsi_pkt *pkt, struct sd_xbuf *xp)
29857{
29858	ssize_t	num_blks_not_xfered;
29859	daddr_t	strt_blk_num;
29860	ssize_t	bytes_not_xfered;
29861	int	rval;
29862
29863	ASSERT(pkt->pkt_resid == 0);
29864
29865	/*
29866	 * Calculate next block number and amount to be transferred.
29867	 *
29868	 * How much data NOT transfered to the HBA yet.
29869	 */
29870	bytes_not_xfered = xp->xb_dma_resid;
29871
29872	/*
29873	 * figure how many blocks NOT transfered to the HBA yet.
29874	 */
29875	num_blks_not_xfered = SD_BYTES2TGTBLOCKS(un, bytes_not_xfered);
29876
29877	/*
29878	 * set starting block number to the end of what WAS transfered.
29879	 */
29880	strt_blk_num = xp->xb_blkno +
29881	    SD_BYTES2TGTBLOCKS(un, bp->b_bcount - bytes_not_xfered);
29882
29883	/*
29884	 * Move pkt to the next portion of the xfer.  sd_setup_next_rw_pkt
29885	 * will call scsi_initpkt with NULL_FUNC so we do not have to release
29886	 * the disk mutex here.
29887	 */
29888	rval = sd_setup_next_rw_pkt(un, pkt, bp,
29889	    strt_blk_num, num_blks_not_xfered);
29890
29891	if (rval == 0) {
29892
29893		/*
29894		 * Success.
29895		 *
29896		 * Adjust things if there are still more blocks to be
29897		 * transfered.
29898		 */
29899		xp->xb_dma_resid = pkt->pkt_resid;
29900		pkt->pkt_resid = 0;
29901
29902		return (1);
29903	}
29904
29905	/*
29906	 * There's really only one possible return value from
29907	 * sd_setup_next_rw_pkt which occurs when scsi_init_pkt
29908	 * returns NULL.
29909	 */
29910	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
29911
29912	bp->b_resid = bp->b_bcount;
29913	bp->b_flags |= B_ERROR;
29914
29915	scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29916	    "Error setting up next portion of DMA transfer\n");
29917
29918	return (0);
29919}
29920
29921/*
29922 *    Function: sd_panic_for_res_conflict
29923 *
29924 * Description: Call panic with a string formatted with "Reservation Conflict"
29925 *		and a human readable identifier indicating the SD instance
29926 *		that experienced the reservation conflict.
29927 *
29928 *   Arguments: un - pointer to the soft state struct for the instance.
29929 *
29930 *     Context: may execute in interrupt context.
29931 */
29932
29933#define	SD_RESV_CONFLICT_FMT_LEN 40
29934void
29935sd_panic_for_res_conflict(struct sd_lun *un)
29936{
29937	char panic_str[SD_RESV_CONFLICT_FMT_LEN+MAXPATHLEN];
29938	char path_str[MAXPATHLEN];
29939
29940	(void) snprintf(panic_str, sizeof (panic_str),
29941	    "Reservation Conflict\nDisk: %s",
29942	    ddi_pathname(SD_DEVINFO(un), path_str));
29943
29944	panic(panic_str);
29945}
29946
29947/*
29948 * Note: The following sd_faultinjection_ioctl( ) routines implement
29949 * driver support for handling fault injection for error analysis
29950 * causing faults in multiple layers of the driver.
29951 *
29952 */
29953
29954#ifdef SD_FAULT_INJECTION
29955static uint_t   sd_fault_injection_on = 0;
29956
29957/*
29958 *    Function: sd_faultinjection_ioctl()
29959 *
29960 * Description: This routine is the driver entry point for handling
29961 *              faultinjection ioctls to inject errors into the
29962 *              layer model
29963 *
29964 *   Arguments: cmd	- the ioctl cmd received
29965 *		arg	- the arguments from user and returns
29966 */
29967
29968static void
29969sd_faultinjection_ioctl(int cmd, intptr_t arg,  struct sd_lun *un) {
29970
29971	uint_t i = 0;
29972	uint_t rval;
29973
29974	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl: entry\n");
29975
29976	mutex_enter(SD_MUTEX(un));
29977
29978	switch (cmd) {
29979	case SDIOCRUN:
29980		/* Allow pushed faults to be injected */
29981		SD_INFO(SD_LOG_SDTEST, un,
29982		    "sd_faultinjection_ioctl: Injecting Fault Run\n");
29983
29984		sd_fault_injection_on = 1;
29985
29986		SD_INFO(SD_LOG_IOERR, un,
29987		    "sd_faultinjection_ioctl: run finished\n");
29988		break;
29989
29990	case SDIOCSTART:
29991		/* Start Injection Session */
29992		SD_INFO(SD_LOG_SDTEST, un,
29993		    "sd_faultinjection_ioctl: Injecting Fault Start\n");
29994
29995		sd_fault_injection_on = 0;
29996		un->sd_injection_mask = 0xFFFFFFFF;
29997		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
29998			un->sd_fi_fifo_pkt[i] = NULL;
29999			un->sd_fi_fifo_xb[i] = NULL;
30000			un->sd_fi_fifo_un[i] = NULL;
30001			un->sd_fi_fifo_arq[i] = NULL;
30002		}
30003		un->sd_fi_fifo_start = 0;
30004		un->sd_fi_fifo_end = 0;
30005
30006		mutex_enter(&(un->un_fi_mutex));
30007		un->sd_fi_log[0] = '\0';
30008		un->sd_fi_buf_len = 0;
30009		mutex_exit(&(un->un_fi_mutex));
30010
30011		SD_INFO(SD_LOG_IOERR, un,
30012		    "sd_faultinjection_ioctl: start finished\n");
30013		break;
30014
30015	case SDIOCSTOP:
30016		/* Stop Injection Session */
30017		SD_INFO(SD_LOG_SDTEST, un,
30018		    "sd_faultinjection_ioctl: Injecting Fault Stop\n");
30019		sd_fault_injection_on = 0;
30020		un->sd_injection_mask = 0x0;
30021
30022		/* Empty stray or unuseds structs from fifo */
30023		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
30024			if (un->sd_fi_fifo_pkt[i] != NULL) {
30025				kmem_free(un->sd_fi_fifo_pkt[i],
30026				    sizeof (struct sd_fi_pkt));
30027			}
30028			if (un->sd_fi_fifo_xb[i] != NULL) {
30029				kmem_free(un->sd_fi_fifo_xb[i],
30030				    sizeof (struct sd_fi_xb));
30031			}
30032			if (un->sd_fi_fifo_un[i] != NULL) {
30033				kmem_free(un->sd_fi_fifo_un[i],
30034				    sizeof (struct sd_fi_un));
30035			}
30036			if (un->sd_fi_fifo_arq[i] != NULL) {
30037				kmem_free(un->sd_fi_fifo_arq[i],
30038				    sizeof (struct sd_fi_arq));
30039			}
30040			un->sd_fi_fifo_pkt[i] = NULL;
30041			un->sd_fi_fifo_un[i] = NULL;
30042			un->sd_fi_fifo_xb[i] = NULL;
30043			un->sd_fi_fifo_arq[i] = NULL;
30044		}
30045		un->sd_fi_fifo_start = 0;
30046		un->sd_fi_fifo_end = 0;
30047
30048		SD_INFO(SD_LOG_IOERR, un,
30049		    "sd_faultinjection_ioctl: stop finished\n");
30050		break;
30051
30052	case SDIOCINSERTPKT:
30053		/* Store a packet struct to be pushed onto fifo */
30054		SD_INFO(SD_LOG_SDTEST, un,
30055		    "sd_faultinjection_ioctl: Injecting Fault Insert Pkt\n");
30056
30057		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30058
30059		sd_fault_injection_on = 0;
30060
30061		/* No more that SD_FI_MAX_ERROR allowed in Queue */
30062		if (un->sd_fi_fifo_pkt[i] != NULL) {
30063			kmem_free(un->sd_fi_fifo_pkt[i],
30064			    sizeof (struct sd_fi_pkt));
30065		}
30066		if (arg != NULL) {
30067			un->sd_fi_fifo_pkt[i] =
30068			    kmem_alloc(sizeof (struct sd_fi_pkt), KM_NOSLEEP);
30069			if (un->sd_fi_fifo_pkt[i] == NULL) {
30070				/* Alloc failed don't store anything */
30071				break;
30072			}
30073			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_pkt[i],
30074			    sizeof (struct sd_fi_pkt), 0);
30075			if (rval == -1) {
30076				kmem_free(un->sd_fi_fifo_pkt[i],
30077				    sizeof (struct sd_fi_pkt));
30078				un->sd_fi_fifo_pkt[i] = NULL;
30079			}
30080		} else {
30081			SD_INFO(SD_LOG_IOERR, un,
30082			    "sd_faultinjection_ioctl: pkt null\n");
30083		}
30084		break;
30085
30086	case SDIOCINSERTXB:
30087		/* Store a xb struct to be pushed onto fifo */
30088		SD_INFO(SD_LOG_SDTEST, un,
30089		    "sd_faultinjection_ioctl: Injecting Fault Insert XB\n");
30090
30091		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30092
30093		sd_fault_injection_on = 0;
30094
30095		if (un->sd_fi_fifo_xb[i] != NULL) {
30096			kmem_free(un->sd_fi_fifo_xb[i],
30097			    sizeof (struct sd_fi_xb));
30098			un->sd_fi_fifo_xb[i] = NULL;
30099		}
30100		if (arg != NULL) {
30101			un->sd_fi_fifo_xb[i] =
30102			    kmem_alloc(sizeof (struct sd_fi_xb), KM_NOSLEEP);
30103			if (un->sd_fi_fifo_xb[i] == NULL) {
30104				/* Alloc failed don't store anything */
30105				break;
30106			}
30107			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_xb[i],
30108			    sizeof (struct sd_fi_xb), 0);
30109
30110			if (rval == -1) {
30111				kmem_free(un->sd_fi_fifo_xb[i],
30112				    sizeof (struct sd_fi_xb));
30113				un->sd_fi_fifo_xb[i] = NULL;
30114			}
30115		} else {
30116			SD_INFO(SD_LOG_IOERR, un,
30117			    "sd_faultinjection_ioctl: xb null\n");
30118		}
30119		break;
30120
30121	case SDIOCINSERTUN:
30122		/* Store a un struct to be pushed onto fifo */
30123		SD_INFO(SD_LOG_SDTEST, un,
30124		    "sd_faultinjection_ioctl: Injecting Fault Insert UN\n");
30125
30126		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30127
30128		sd_fault_injection_on = 0;
30129
30130		if (un->sd_fi_fifo_un[i] != NULL) {
30131			kmem_free(un->sd_fi_fifo_un[i],
30132			    sizeof (struct sd_fi_un));
30133			un->sd_fi_fifo_un[i] = NULL;
30134		}
30135		if (arg != NULL) {
30136			un->sd_fi_fifo_un[i] =
30137			    kmem_alloc(sizeof (struct sd_fi_un), KM_NOSLEEP);
30138			if (un->sd_fi_fifo_un[i] == NULL) {
30139				/* Alloc failed don't store anything */
30140				break;
30141			}
30142			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_un[i],
30143			    sizeof (struct sd_fi_un), 0);
30144			if (rval == -1) {
30145				kmem_free(un->sd_fi_fifo_un[i],
30146				    sizeof (struct sd_fi_un));
30147				un->sd_fi_fifo_un[i] = NULL;
30148			}
30149
30150		} else {
30151			SD_INFO(SD_LOG_IOERR, un,
30152			    "sd_faultinjection_ioctl: un null\n");
30153		}
30154
30155		break;
30156
30157	case SDIOCINSERTARQ:
30158		/* Store a arq struct to be pushed onto fifo */
30159		SD_INFO(SD_LOG_SDTEST, un,
30160		    "sd_faultinjection_ioctl: Injecting Fault Insert ARQ\n");
30161		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30162
30163		sd_fault_injection_on = 0;
30164
30165		if (un->sd_fi_fifo_arq[i] != NULL) {
30166			kmem_free(un->sd_fi_fifo_arq[i],
30167			    sizeof (struct sd_fi_arq));
30168			un->sd_fi_fifo_arq[i] = NULL;
30169		}
30170		if (arg != NULL) {
30171			un->sd_fi_fifo_arq[i] =
30172			    kmem_alloc(sizeof (struct sd_fi_arq), KM_NOSLEEP);
30173			if (un->sd_fi_fifo_arq[i] == NULL) {
30174				/* Alloc failed don't store anything */
30175				break;
30176			}
30177			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_arq[i],
30178			    sizeof (struct sd_fi_arq), 0);
30179			if (rval == -1) {
30180				kmem_free(un->sd_fi_fifo_arq[i],
30181				    sizeof (struct sd_fi_arq));
30182				un->sd_fi_fifo_arq[i] = NULL;
30183			}
30184
30185		} else {
30186			SD_INFO(SD_LOG_IOERR, un,
30187			    "sd_faultinjection_ioctl: arq null\n");
30188		}
30189
30190		break;
30191
30192	case SDIOCPUSH:
30193		/* Push stored xb, pkt, un, and arq onto fifo */
30194		sd_fault_injection_on = 0;
30195
30196		if (arg != NULL) {
30197			rval = ddi_copyin((void *)arg, &i, sizeof (uint_t), 0);
30198			if (rval != -1 &&
30199			    un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
30200				un->sd_fi_fifo_end += i;
30201			}
30202		} else {
30203			SD_INFO(SD_LOG_IOERR, un,
30204			    "sd_faultinjection_ioctl: push arg null\n");
30205			if (un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
30206				un->sd_fi_fifo_end++;
30207			}
30208		}
30209		SD_INFO(SD_LOG_IOERR, un,
30210		    "sd_faultinjection_ioctl: push to end=%d\n",
30211		    un->sd_fi_fifo_end);
30212		break;
30213
30214	case SDIOCRETRIEVE:
30215		/* Return buffer of log from Injection session */
30216		SD_INFO(SD_LOG_SDTEST, un,
30217		    "sd_faultinjection_ioctl: Injecting Fault Retreive");
30218
30219		sd_fault_injection_on = 0;
30220
30221		mutex_enter(&(un->un_fi_mutex));
30222		rval = ddi_copyout(un->sd_fi_log, (void *)arg,
30223		    un->sd_fi_buf_len+1, 0);
30224		mutex_exit(&(un->un_fi_mutex));
30225
30226		if (rval == -1) {
30227			/*
30228			 * arg is possibly invalid setting
30229			 * it to NULL for return
30230			 */
30231			arg = NULL;
30232		}
30233		break;
30234	}
30235
30236	mutex_exit(SD_MUTEX(un));
30237	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl:"
30238			    " exit\n");
30239}
30240
30241
30242/*
30243 *    Function: sd_injection_log()
30244 *
30245 * Description: This routine adds buff to the already existing injection log
30246 *              for retrieval via faultinjection_ioctl for use in fault
30247 *              detection and recovery
30248 *
30249 *   Arguments: buf - the string to add to the log
30250 */
30251
30252static void
30253sd_injection_log(char *buf, struct sd_lun *un)
30254{
30255	uint_t len;
30256
30257	ASSERT(un != NULL);
30258	ASSERT(buf != NULL);
30259
30260	mutex_enter(&(un->un_fi_mutex));
30261
30262	len = min(strlen(buf), 255);
30263	/* Add logged value to Injection log to be returned later */
30264	if (len + un->sd_fi_buf_len < SD_FI_MAX_BUF) {
30265		uint_t	offset = strlen((char *)un->sd_fi_log);
30266		char *destp = (char *)un->sd_fi_log + offset;
30267		int i;
30268		for (i = 0; i < len; i++) {
30269			*destp++ = *buf++;
30270		}
30271		un->sd_fi_buf_len += len;
30272		un->sd_fi_log[un->sd_fi_buf_len] = '\0';
30273	}
30274
30275	mutex_exit(&(un->un_fi_mutex));
30276}
30277
30278
30279/*
30280 *    Function: sd_faultinjection()
30281 *
30282 * Description: This routine takes the pkt and changes its
30283 *		content based on error injection scenerio.
30284 *
30285 *   Arguments: pktp	- packet to be changed
30286 */
30287
30288static void
30289sd_faultinjection(struct scsi_pkt *pktp)
30290{
30291	uint_t i;
30292	struct sd_fi_pkt *fi_pkt;
30293	struct sd_fi_xb *fi_xb;
30294	struct sd_fi_un *fi_un;
30295	struct sd_fi_arq *fi_arq;
30296	struct buf *bp;
30297	struct sd_xbuf *xb;
30298	struct sd_lun *un;
30299
30300	ASSERT(pktp != NULL);
30301
30302	/* pull bp xb and un from pktp */
30303	bp = (struct buf *)pktp->pkt_private;
30304	xb = SD_GET_XBUF(bp);
30305	un = SD_GET_UN(bp);
30306
30307	ASSERT(un != NULL);
30308
30309	mutex_enter(SD_MUTEX(un));
30310
30311	SD_TRACE(SD_LOG_SDTEST, un,
30312	    "sd_faultinjection: entry Injection from sdintr\n");
30313
30314	/* if injection is off return */
30315	if (sd_fault_injection_on == 0 ||
30316	    un->sd_fi_fifo_start == un->sd_fi_fifo_end) {
30317		mutex_exit(SD_MUTEX(un));
30318		return;
30319	}
30320
30321	SD_INFO(SD_LOG_SDTEST, un,
30322	    "sd_faultinjection: is working for copying\n");
30323
30324	/* take next set off fifo */
30325	i = un->sd_fi_fifo_start % SD_FI_MAX_ERROR;
30326
30327	fi_pkt = un->sd_fi_fifo_pkt[i];
30328	fi_xb = un->sd_fi_fifo_xb[i];
30329	fi_un = un->sd_fi_fifo_un[i];
30330	fi_arq = un->sd_fi_fifo_arq[i];
30331
30332
30333	/* set variables accordingly */
30334	/* set pkt if it was on fifo */
30335	if (fi_pkt != NULL) {
30336		SD_CONDSET(pktp, pkt, pkt_flags, "pkt_flags");
30337		SD_CONDSET(*pktp, pkt, pkt_scbp, "pkt_scbp");
30338		if (fi_pkt->pkt_cdbp != 0xff)
30339			SD_CONDSET(*pktp, pkt, pkt_cdbp, "pkt_cdbp");
30340		SD_CONDSET(pktp, pkt, pkt_state, "pkt_state");
30341		SD_CONDSET(pktp, pkt, pkt_statistics, "pkt_statistics");
30342		SD_CONDSET(pktp, pkt, pkt_reason, "pkt_reason");
30343
30344	}
30345	/* set xb if it was on fifo */
30346	if (fi_xb != NULL) {
30347		SD_CONDSET(xb, xb, xb_blkno, "xb_blkno");
30348		SD_CONDSET(xb, xb, xb_dma_resid, "xb_dma_resid");
30349		if (fi_xb->xb_retry_count != 0)
30350			SD_CONDSET(xb, xb, xb_retry_count, "xb_retry_count");
30351		SD_CONDSET(xb, xb, xb_victim_retry_count,
30352		    "xb_victim_retry_count");
30353		SD_CONDSET(xb, xb, xb_sense_status, "xb_sense_status");
30354		SD_CONDSET(xb, xb, xb_sense_state, "xb_sense_state");
30355		SD_CONDSET(xb, xb, xb_sense_resid, "xb_sense_resid");
30356
30357		/* copy in block data from sense */
30358		/*
30359		 * if (fi_xb->xb_sense_data[0] != -1) {
30360		 *	bcopy(fi_xb->xb_sense_data, xb->xb_sense_data,
30361		 *	SENSE_LENGTH);
30362		 * }
30363		 */
30364		bcopy(fi_xb->xb_sense_data, xb->xb_sense_data, SENSE_LENGTH);
30365
30366		/* copy in extended sense codes */
30367		SD_CONDSET(((struct scsi_extended_sense *)xb->xb_sense_data),
30368		    xb, es_code, "es_code");
30369		SD_CONDSET(((struct scsi_extended_sense *)xb->xb_sense_data),
30370		    xb, es_key, "es_key");
30371		SD_CONDSET(((struct scsi_extended_sense *)xb->xb_sense_data),
30372		    xb, es_add_code, "es_add_code");
30373		SD_CONDSET(((struct scsi_extended_sense *)xb->xb_sense_data),
30374		    xb, es_qual_code, "es_qual_code");
30375		struct scsi_extended_sense *esp;
30376		esp = (struct scsi_extended_sense *)xb->xb_sense_data;
30377		esp->es_class = CLASS_EXTENDED_SENSE;
30378	}
30379
30380	/* set un if it was on fifo */
30381	if (fi_un != NULL) {
30382		SD_CONDSET(un->un_sd->sd_inq, un, inq_rmb, "inq_rmb");
30383		SD_CONDSET(un, un, un_ctype, "un_ctype");
30384		SD_CONDSET(un, un, un_reset_retry_count,
30385		    "un_reset_retry_count");
30386		SD_CONDSET(un, un, un_reservation_type, "un_reservation_type");
30387		SD_CONDSET(un, un, un_resvd_status, "un_resvd_status");
30388		SD_CONDSET(un, un, un_f_arq_enabled, "un_f_arq_enabled");
30389		SD_CONDSET(un, un, un_f_allow_bus_device_reset,
30390		    "un_f_allow_bus_device_reset");
30391		SD_CONDSET(un, un, un_f_opt_queueing, "un_f_opt_queueing");
30392
30393	}
30394
30395	/* copy in auto request sense if it was on fifo */
30396	if (fi_arq != NULL) {
30397		bcopy(fi_arq, pktp->pkt_scbp, sizeof (struct sd_fi_arq));
30398	}
30399
30400	/* free structs */
30401	if (un->sd_fi_fifo_pkt[i] != NULL) {
30402		kmem_free(un->sd_fi_fifo_pkt[i], sizeof (struct sd_fi_pkt));
30403	}
30404	if (un->sd_fi_fifo_xb[i] != NULL) {
30405		kmem_free(un->sd_fi_fifo_xb[i], sizeof (struct sd_fi_xb));
30406	}
30407	if (un->sd_fi_fifo_un[i] != NULL) {
30408		kmem_free(un->sd_fi_fifo_un[i], sizeof (struct sd_fi_un));
30409	}
30410	if (un->sd_fi_fifo_arq[i] != NULL) {
30411		kmem_free(un->sd_fi_fifo_arq[i], sizeof (struct sd_fi_arq));
30412	}
30413
30414	/*
30415	 * kmem_free does not gurantee to set to NULL
30416	 * since we uses these to determine if we set
30417	 * values or not lets confirm they are always
30418	 * NULL after free
30419	 */
30420	un->sd_fi_fifo_pkt[i] = NULL;
30421	un->sd_fi_fifo_un[i] = NULL;
30422	un->sd_fi_fifo_xb[i] = NULL;
30423	un->sd_fi_fifo_arq[i] = NULL;
30424
30425	un->sd_fi_fifo_start++;
30426
30427	mutex_exit(SD_MUTEX(un));
30428
30429	SD_INFO(SD_LOG_SDTEST, un, "sd_faultinjection: exit\n");
30430}
30431
30432#endif /* SD_FAULT_INJECTION */
30433
30434/*
30435 * This routine is invoked in sd_unit_attach(). Before calling it, the
30436 * properties in conf file should be processed already, and "hotpluggable"
30437 * property was processed also.
30438 *
30439 * The sd driver distinguishes 3 different type of devices: removable media,
30440 * non-removable media, and hotpluggable. Below the differences are defined:
30441 *
30442 * 1. Device ID
30443 *
30444 *     The device ID of a device is used to identify this device. Refer to
30445 *     ddi_devid_register(9F).
30446 *
30447 *     For a non-removable media disk device which can provide 0x80 or 0x83
30448 *     VPD page (refer to INQUIRY command of SCSI SPC specification), a unique
30449 *     device ID is created to identify this device. For other non-removable
30450 *     media devices, a default device ID is created only if this device has
30451 *     at least 2 alter cylinders. Otherwise, this device has no devid.
30452 *
30453 *     -------------------------------------------------------
30454 *     removable media   hotpluggable  | Can Have Device ID
30455 *     -------------------------------------------------------
30456 *         false             false     |     Yes
30457 *         false             true      |     Yes
30458 *         true                x       |     No
30459 *     ------------------------------------------------------
30460 *
30461 *
30462 * 2. SCSI group 4 commands
30463 *
30464 *     In SCSI specs, only some commands in group 4 command set can use
30465 *     8-byte addresses that can be used to access >2TB storage spaces.
30466 *     Other commands have no such capability. Without supporting group4,
30467 *     it is impossible to make full use of storage spaces of a disk with
30468 *     capacity larger than 2TB.
30469 *
30470 *     -----------------------------------------------
30471 *     removable media   hotpluggable   LP64  |  Group
30472 *     -----------------------------------------------
30473 *           false          false       false |   1
30474 *           false          false       true  |   4
30475 *           false          true        false |   1
30476 *           false          true        true  |   4
30477 *           true             x           x   |   5
30478 *     -----------------------------------------------
30479 *
30480 *
30481 * 3. Check for VTOC Label
30482 *
30483 *     If a direct-access disk has no EFI label, sd will check if it has a
30484 *     valid VTOC label. Now, sd also does that check for removable media
30485 *     and hotpluggable devices.
30486 *
30487 *     --------------------------------------------------------------
30488 *     Direct-Access   removable media    hotpluggable |  Check Label
30489 *     -------------------------------------------------------------
30490 *         false          false           false        |   No
30491 *         false          false           true         |   No
30492 *         false          true            false        |   Yes
30493 *         false          true            true         |   Yes
30494 *         true            x                x          |   Yes
30495 *     --------------------------------------------------------------
30496 *
30497 *
30498 * 4. Building default VTOC label
30499 *
30500 *     As section 3 says, sd checks if some kinds of devices have VTOC label.
30501 *     If those devices have no valid VTOC label, sd(7d) will attempt to
30502 *     create default VTOC for them. Currently sd creates default VTOC label
30503 *     for all devices on x86 platform (VTOC_16), but only for removable
30504 *     media devices on SPARC (VTOC_8).
30505 *
30506 *     -----------------------------------------------------------
30507 *       removable media hotpluggable platform   |   Default Label
30508 *     -----------------------------------------------------------
30509 *             false          false    sparc     |     No
30510 *             false          true      x86      |     Yes
30511 *             false          true     sparc     |     Yes
30512 *             true             x        x       |     Yes
30513 *     ----------------------------------------------------------
30514 *
30515 *
30516 * 5. Supported blocksizes of target devices
30517 *
30518 *     Sd supports non-512-byte blocksize for removable media devices only.
30519 *     For other devices, only 512-byte blocksize is supported. This may be
30520 *     changed in near future because some RAID devices require non-512-byte
30521 *     blocksize
30522 *
30523 *     -----------------------------------------------------------
30524 *     removable media    hotpluggable    | non-512-byte blocksize
30525 *     -----------------------------------------------------------
30526 *           false          false         |   No
30527 *           false          true          |   No
30528 *           true             x           |   Yes
30529 *     -----------------------------------------------------------
30530 *
30531 *
30532 * 6. Automatic mount & unmount
30533 *
30534 *     Sd(7d) driver provides DKIOCREMOVABLE ioctl. This ioctl is used to query
30535 *     if a device is removable media device. It return 1 for removable media
30536 *     devices, and 0 for others.
30537 *
30538 *     The automatic mounting subsystem should distinguish between the types
30539 *     of devices and apply automounting policies to each.
30540 *
30541 *
30542 * 7. fdisk partition management
30543 *
30544 *     Fdisk is traditional partition method on x86 platform. Sd(7d) driver
30545 *     just supports fdisk partitions on x86 platform. On sparc platform, sd
30546 *     doesn't support fdisk partitions at all. Note: pcfs(7fs) can recognize
30547 *     fdisk partitions on both x86 and SPARC platform.
30548 *
30549 *     -----------------------------------------------------------
30550 *       platform   removable media  USB/1394  |  fdisk supported
30551 *     -----------------------------------------------------------
30552 *        x86         X               X        |       true
30553 *     ------------------------------------------------------------
30554 *        sparc       X               X        |       false
30555 *     ------------------------------------------------------------
30556 *
30557 *
30558 * 8. MBOOT/MBR
30559 *
30560 *     Although sd(7d) doesn't support fdisk on SPARC platform, it does support
30561 *     read/write mboot for removable media devices on sparc platform.
30562 *
30563 *     -----------------------------------------------------------
30564 *       platform   removable media  USB/1394  |  mboot supported
30565 *     -----------------------------------------------------------
30566 *        x86         X               X        |       true
30567 *     ------------------------------------------------------------
30568 *        sparc      false           false     |       false
30569 *        sparc      false           true      |       true
30570 *        sparc      true            false     |       true
30571 *        sparc      true            true      |       true
30572 *     ------------------------------------------------------------
30573 *
30574 *
30575 * 9.  error handling during opening device
30576 *
30577 *     If failed to open a disk device, an errno is returned. For some kinds
30578 *     of errors, different errno is returned depending on if this device is
30579 *     a removable media device. This brings USB/1394 hard disks in line with
30580 *     expected hard disk behavior. It is not expected that this breaks any
30581 *     application.
30582 *
30583 *     ------------------------------------------------------
30584 *       removable media    hotpluggable   |  errno
30585 *     ------------------------------------------------------
30586 *             false          false        |   EIO
30587 *             false          true         |   EIO
30588 *             true             x          |   ENXIO
30589 *     ------------------------------------------------------
30590 *
30591 *
30592 * 11. ioctls: DKIOCEJECT, CDROMEJECT
30593 *
30594 *     These IOCTLs are applicable only to removable media devices.
30595 *
30596 *     -----------------------------------------------------------
30597 *       removable media    hotpluggable   |DKIOCEJECT, CDROMEJECT
30598 *     -----------------------------------------------------------
30599 *             false          false        |     No
30600 *             false          true         |     No
30601 *             true            x           |     Yes
30602 *     -----------------------------------------------------------
30603 *
30604 *
30605 * 12. Kstats for partitions
30606 *
30607 *     sd creates partition kstat for non-removable media devices. USB and
30608 *     Firewire hard disks now have partition kstats
30609 *
30610 *      ------------------------------------------------------
30611 *       removable media    hotpluggable   |   kstat
30612 *      ------------------------------------------------------
30613 *             false          false        |    Yes
30614 *             false          true         |    Yes
30615 *             true             x          |    No
30616 *       ------------------------------------------------------
30617 *
30618 *
30619 * 13. Removable media & hotpluggable properties
30620 *
30621 *     Sd driver creates a "removable-media" property for removable media
30622 *     devices. Parent nexus drivers create a "hotpluggable" property if
30623 *     it supports hotplugging.
30624 *
30625 *     ---------------------------------------------------------------------
30626 *     removable media   hotpluggable |  "removable-media"   " hotpluggable"
30627 *     ---------------------------------------------------------------------
30628 *       false            false       |    No                   No
30629 *       false            true        |    No                   Yes
30630 *       true             false       |    Yes                  No
30631 *       true             true        |    Yes                  Yes
30632 *     ---------------------------------------------------------------------
30633 *
30634 *
30635 * 14. Power Management
30636 *
30637 *     sd only power manages removable media devices or devices that support
30638 *     LOG_SENSE or have a "pm-capable" property  (PSARC/2002/250)
30639 *
30640 *     A parent nexus that supports hotplugging can also set "pm-capable"
30641 *     if the disk can be power managed.
30642 *
30643 *     ------------------------------------------------------------
30644 *       removable media hotpluggable pm-capable  |   power manage
30645 *     ------------------------------------------------------------
30646 *             false          false     false     |     No
30647 *             false          false     true      |     Yes
30648 *             false          true      false     |     No
30649 *             false          true      true      |     Yes
30650 *             true             x        x        |     Yes
30651 *     ------------------------------------------------------------
30652 *
30653 *      USB and firewire hard disks can now be power managed independently
30654 *      of the framebuffer
30655 *
30656 *
30657 * 15. Support for USB disks with capacity larger than 1TB
30658 *
30659 *     Currently, sd doesn't permit a fixed disk device with capacity
30660 *     larger than 1TB to be used in a 32-bit operating system environment.
30661 *     However, sd doesn't do that for removable media devices. Instead, it
30662 *     assumes that removable media devices cannot have a capacity larger
30663 *     than 1TB. Therefore, using those devices on 32-bit system is partially
30664 *     supported, which can cause some unexpected results.
30665 *
30666 *     ---------------------------------------------------------------------
30667 *       removable media    USB/1394 | Capacity > 1TB |   Used in 32-bit env
30668 *     ---------------------------------------------------------------------
30669 *             false          false  |   true         |     no
30670 *             false          true   |   true         |     no
30671 *             true           false  |   true         |     Yes
30672 *             true           true   |   true         |     Yes
30673 *     ---------------------------------------------------------------------
30674 *
30675 *
30676 * 16. Check write-protection at open time
30677 *
30678 *     When a removable media device is being opened for writing without NDELAY
30679 *     flag, sd will check if this device is writable. If attempting to open
30680 *     without NDELAY flag a write-protected device, this operation will abort.
30681 *
30682 *     ------------------------------------------------------------
30683 *       removable media    USB/1394   |   WP Check
30684 *     ------------------------------------------------------------
30685 *             false          false    |     No
30686 *             false          true     |     No
30687 *             true           false    |     Yes
30688 *             true           true     |     Yes
30689 *     ------------------------------------------------------------
30690 *
30691 *
30692 * 17. syslog when corrupted VTOC is encountered
30693 *
30694 *      Currently, if an invalid VTOC is encountered, sd only print syslog
30695 *      for fixed SCSI disks.
30696 *     ------------------------------------------------------------
30697 *       removable media    USB/1394   |   print syslog
30698 *     ------------------------------------------------------------
30699 *             false          false    |     Yes
30700 *             false          true     |     No
30701 *             true           false    |     No
30702 *             true           true     |     No
30703 *     ------------------------------------------------------------
30704 */
30705static void
30706sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi)
30707{
30708	int	pm_cap;
30709
30710	ASSERT(un->un_sd);
30711	ASSERT(un->un_sd->sd_inq);
30712
30713	/*
30714	 * Enable SYNC CACHE support for all devices.
30715	 */
30716	un->un_f_sync_cache_supported = TRUE;
30717
30718	/*
30719	 * Set the sync cache required flag to false.
30720	 * This would ensure that there is no SYNC CACHE
30721	 * sent when there are no writes
30722	 */
30723	un->un_f_sync_cache_required = FALSE;
30724
30725	if (un->un_sd->sd_inq->inq_rmb) {
30726		/*
30727		 * The media of this device is removable. And for this kind
30728		 * of devices, it is possible to change medium after opening
30729		 * devices. Thus we should support this operation.
30730		 */
30731		un->un_f_has_removable_media = TRUE;
30732
30733		/*
30734		 * support non-512-byte blocksize of removable media devices
30735		 */
30736		un->un_f_non_devbsize_supported = TRUE;
30737
30738		/*
30739		 * Assume that all removable media devices support DOOR_LOCK
30740		 */
30741		un->un_f_doorlock_supported = TRUE;
30742
30743		/*
30744		 * For a removable media device, it is possible to be opened
30745		 * with NDELAY flag when there is no media in drive, in this
30746		 * case we don't care if device is writable. But if without
30747		 * NDELAY flag, we need to check if media is write-protected.
30748		 */
30749		un->un_f_chk_wp_open = TRUE;
30750
30751		/*
30752		 * need to start a SCSI watch thread to monitor media state,
30753		 * when media is being inserted or ejected, notify syseventd.
30754		 */
30755		un->un_f_monitor_media_state = TRUE;
30756
30757		/*
30758		 * Some devices don't support START_STOP_UNIT command.
30759		 * Therefore, we'd better check if a device supports it
30760		 * before sending it.
30761		 */
30762		un->un_f_check_start_stop = TRUE;
30763
30764		/*
30765		 * support eject media ioctl:
30766		 *		FDEJECT, DKIOCEJECT, CDROMEJECT
30767		 */
30768		un->un_f_eject_media_supported = TRUE;
30769
30770		/*
30771		 * Because many removable-media devices don't support
30772		 * LOG_SENSE, we couldn't use this command to check if
30773		 * a removable media device support power-management.
30774		 * We assume that they support power-management via
30775		 * START_STOP_UNIT command and can be spun up and down
30776		 * without limitations.
30777		 */
30778		un->un_f_pm_supported = TRUE;
30779
30780		/*
30781		 * Need to create a zero length (Boolean) property
30782		 * removable-media for the removable media devices.
30783		 * Note that the return value of the property is not being
30784		 * checked, since if unable to create the property
30785		 * then do not want the attach to fail altogether. Consistent
30786		 * with other property creation in attach.
30787		 */
30788		(void) ddi_prop_create(DDI_DEV_T_NONE, devi,
30789		    DDI_PROP_CANSLEEP, "removable-media", NULL, 0);
30790
30791	} else {
30792		/*
30793		 * create device ID for device
30794		 */
30795		un->un_f_devid_supported = TRUE;
30796
30797		/*
30798		 * Spin up non-removable-media devices once it is attached
30799		 */
30800		un->un_f_attach_spinup = TRUE;
30801
30802		/*
30803		 * According to SCSI specification, Sense data has two kinds of
30804		 * format: fixed format, and descriptor format. At present, we
30805		 * don't support descriptor format sense data for removable
30806		 * media.
30807		 */
30808		if (SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT) {
30809			un->un_f_descr_format_supported = TRUE;
30810		}
30811
30812		/*
30813		 * kstats are created only for non-removable media devices.
30814		 *
30815		 * Set this in sd.conf to 0 in order to disable kstats.  The
30816		 * default is 1, so they are enabled by default.
30817		 */
30818		un->un_f_pkstats_enabled = (ddi_prop_get_int(DDI_DEV_T_ANY,
30819		    SD_DEVINFO(un), DDI_PROP_DONTPASS,
30820		    "enable-partition-kstats", 1));
30821
30822		/*
30823		 * Check if HBA has set the "pm-capable" property.
30824		 * If "pm-capable" exists and is non-zero then we can
30825		 * power manage the device without checking the start/stop
30826		 * cycle count log sense page.
30827		 *
30828		 * If "pm-capable" exists and is set to be false (0),
30829		 * then we should not power manage the device.
30830		 *
30831		 * If "pm-capable" doesn't exist then pm_cap will
30832		 * be set to SD_PM_CAPABLE_UNDEFINED (-1).  In this case,
30833		 * sd will check the start/stop cycle count log sense page
30834		 * and power manage the device if the cycle count limit has
30835		 * not been exceeded.
30836		 */
30837		pm_cap = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
30838		    DDI_PROP_DONTPASS, "pm-capable", SD_PM_CAPABLE_UNDEFINED);
30839		if (SD_PM_CAPABLE_IS_UNDEFINED(pm_cap)) {
30840			un->un_f_log_sense_supported = TRUE;
30841			if (!un->un_f_power_condition_disabled &&
30842			    SD_INQUIRY(un)->inq_ansi == 6) {
30843				un->un_f_power_condition_supported = TRUE;
30844			}
30845		} else {
30846			/*
30847			 * pm-capable property exists.
30848			 *
30849			 * Convert "TRUE" values for pm_cap to
30850			 * SD_PM_CAPABLE_IS_TRUE to make it easier to check
30851			 * later. "TRUE" values are any values defined in
30852			 * inquiry.h.
30853			 */
30854			if (SD_PM_CAPABLE_IS_FALSE(pm_cap)) {
30855				un->un_f_log_sense_supported = FALSE;
30856			} else {
30857				/* SD_PM_CAPABLE_IS_TRUE case */
30858				un->un_f_pm_supported = TRUE;
30859				if (!un->un_f_power_condition_disabled &&
30860				    SD_PM_CAPABLE_IS_SPC_4(pm_cap)) {
30861					un->un_f_power_condition_supported =
30862					    TRUE;
30863				}
30864				if (SD_PM_CAP_LOG_SUPPORTED(pm_cap)) {
30865					un->un_f_log_sense_supported = TRUE;
30866					un->un_f_pm_log_sense_smart =
30867					    SD_PM_CAP_SMART_LOG(pm_cap);
30868				}
30869			}
30870
30871			SD_INFO(SD_LOG_ATTACH_DETACH, un,
30872			    "sd_unit_attach: un:0x%p pm-capable "
30873			    "property set to %d.\n", un, un->un_f_pm_supported);
30874		}
30875	}
30876
30877	if (un->un_f_is_hotpluggable) {
30878
30879		/*
30880		 * Have to watch hotpluggable devices as well, since
30881		 * that's the only way for userland applications to
30882		 * detect hot removal while device is busy/mounted.
30883		 */
30884		un->un_f_monitor_media_state = TRUE;
30885
30886		un->un_f_check_start_stop = TRUE;
30887
30888	}
30889}
30890
30891/*
30892 * sd_tg_rdwr:
30893 * Provides rdwr access for cmlb via sd_tgops. The start_block is
30894 * in sys block size, req_length in bytes.
30895 *
30896 */
30897static int
30898sd_tg_rdwr(dev_info_t *devi, uchar_t cmd, void *bufaddr,
30899    diskaddr_t start_block, size_t reqlength, void *tg_cookie)
30900{
30901	struct sd_lun *un;
30902	int path_flag = (int)(uintptr_t)tg_cookie;
30903	char *dkl = NULL;
30904	diskaddr_t real_addr = start_block;
30905	diskaddr_t first_byte, end_block;
30906
30907	size_t	buffer_size = reqlength;
30908	int rval = 0;
30909	diskaddr_t	cap;
30910	uint32_t	lbasize;
30911	sd_ssc_t	*ssc;
30912
30913	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
30914	if (un == NULL)
30915		return (ENXIO);
30916
30917	if (cmd != TG_READ && cmd != TG_WRITE)
30918		return (EINVAL);
30919
30920	ssc = sd_ssc_init(un);
30921	mutex_enter(SD_MUTEX(un));
30922	if (un->un_f_tgt_blocksize_is_valid == FALSE) {
30923		mutex_exit(SD_MUTEX(un));
30924		rval = sd_send_scsi_READ_CAPACITY(ssc, (uint64_t *)&cap,
30925		    &lbasize, path_flag);
30926		if (rval != 0)
30927			goto done1;
30928		mutex_enter(SD_MUTEX(un));
30929		sd_update_block_info(un, lbasize, cap);
30930		if ((un->un_f_tgt_blocksize_is_valid == FALSE)) {
30931			mutex_exit(SD_MUTEX(un));
30932			rval = EIO;
30933			goto done;
30934		}
30935	}
30936
30937	if (NOT_DEVBSIZE(un)) {
30938		/*
30939		 * sys_blocksize != tgt_blocksize, need to re-adjust
30940		 * blkno and save the index to beginning of dk_label
30941		 */
30942		first_byte  = SD_SYSBLOCKS2BYTES(start_block);
30943		real_addr = first_byte / un->un_tgt_blocksize;
30944
30945		end_block = (first_byte + reqlength +
30946		    un->un_tgt_blocksize - 1) / un->un_tgt_blocksize;
30947
30948		/* round up buffer size to multiple of target block size */
30949		buffer_size = (end_block - real_addr) * un->un_tgt_blocksize;
30950
30951		SD_TRACE(SD_LOG_IO_PARTITION, un, "sd_tg_rdwr",
30952		    "label_addr: 0x%x allocation size: 0x%x\n",
30953		    real_addr, buffer_size);
30954
30955		if (((first_byte % un->un_tgt_blocksize) != 0) ||
30956		    (reqlength % un->un_tgt_blocksize) != 0)
30957			/* the request is not aligned */
30958			dkl = kmem_zalloc(buffer_size, KM_SLEEP);
30959	}
30960
30961	/*
30962	 * The MMC standard allows READ CAPACITY to be
30963	 * inaccurate by a bounded amount (in the interest of
30964	 * response latency).  As a result, failed READs are
30965	 * commonplace (due to the reading of metadata and not
30966	 * data). Depending on the per-Vendor/drive Sense data,
30967	 * the failed READ can cause many (unnecessary) retries.
30968	 */
30969
30970	if (ISCD(un) && (cmd == TG_READ) &&
30971	    (un->un_f_blockcount_is_valid == TRUE) &&
30972	    ((start_block == (un->un_blockcount - 1))||
30973	    (start_block == (un->un_blockcount - 2)))) {
30974			path_flag = SD_PATH_DIRECT_PRIORITY;
30975	}
30976
30977	mutex_exit(SD_MUTEX(un));
30978	if (cmd == TG_READ) {
30979		rval = sd_send_scsi_READ(ssc, (dkl != NULL)? dkl: bufaddr,
30980		    buffer_size, real_addr, path_flag);
30981		if (dkl != NULL)
30982			bcopy(dkl + SD_TGTBYTEOFFSET(un, start_block,
30983			    real_addr), bufaddr, reqlength);
30984	} else {
30985		if (dkl) {
30986			rval = sd_send_scsi_READ(ssc, dkl, buffer_size,
30987			    real_addr, path_flag);
30988			if (rval) {
30989				goto done1;
30990			}
30991			bcopy(bufaddr, dkl + SD_TGTBYTEOFFSET(un, start_block,
30992			    real_addr), reqlength);
30993		}
30994		rval = sd_send_scsi_WRITE(ssc, (dkl != NULL)? dkl: bufaddr,
30995		    buffer_size, real_addr, path_flag);
30996	}
30997
30998done1:
30999	if (dkl != NULL)
31000		kmem_free(dkl, buffer_size);
31001
31002	if (rval != 0) {
31003		if (rval == EIO)
31004			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
31005		else
31006			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
31007	}
31008done:
31009	sd_ssc_fini(ssc);
31010	return (rval);
31011}
31012
31013
31014static int
31015sd_tg_getinfo(dev_info_t *devi, int cmd, void *arg, void *tg_cookie)
31016{
31017
31018	struct sd_lun *un;
31019	diskaddr_t	cap;
31020	uint32_t	lbasize;
31021	int		path_flag = (int)(uintptr_t)tg_cookie;
31022	int		ret = 0;
31023
31024	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
31025	if (un == NULL)
31026		return (ENXIO);
31027
31028	switch (cmd) {
31029	case TG_GETPHYGEOM:
31030	case TG_GETVIRTGEOM:
31031	case TG_GETCAPACITY:
31032	case TG_GETBLOCKSIZE:
31033		mutex_enter(SD_MUTEX(un));
31034
31035		if ((un->un_f_blockcount_is_valid == TRUE) &&
31036		    (un->un_f_tgt_blocksize_is_valid == TRUE)) {
31037			cap = un->un_blockcount;
31038			lbasize = un->un_tgt_blocksize;
31039			mutex_exit(SD_MUTEX(un));
31040		} else {
31041			sd_ssc_t	*ssc;
31042			mutex_exit(SD_MUTEX(un));
31043			ssc = sd_ssc_init(un);
31044			ret = sd_send_scsi_READ_CAPACITY(ssc, (uint64_t *)&cap,
31045			    &lbasize, path_flag);
31046			if (ret != 0) {
31047				if (ret == EIO)
31048					sd_ssc_assessment(ssc,
31049					    SD_FMT_STATUS_CHECK);
31050				else
31051					sd_ssc_assessment(ssc,
31052					    SD_FMT_IGNORE);
31053				sd_ssc_fini(ssc);
31054				return (ret);
31055			}
31056			sd_ssc_fini(ssc);
31057			mutex_enter(SD_MUTEX(un));
31058			sd_update_block_info(un, lbasize, cap);
31059			if ((un->un_f_blockcount_is_valid == FALSE) ||
31060			    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
31061				mutex_exit(SD_MUTEX(un));
31062				return (EIO);
31063			}
31064			mutex_exit(SD_MUTEX(un));
31065		}
31066
31067		if (cmd == TG_GETCAPACITY) {
31068			*(diskaddr_t *)arg = cap;
31069			return (0);
31070		}
31071
31072		if (cmd == TG_GETBLOCKSIZE) {
31073			*(uint32_t *)arg = lbasize;
31074			return (0);
31075		}
31076
31077		if (cmd == TG_GETPHYGEOM)
31078			ret = sd_get_physical_geometry(un, (cmlb_geom_t *)arg,
31079			    cap, lbasize, path_flag);
31080		else
31081			/* TG_GETVIRTGEOM */
31082			ret = sd_get_virtual_geometry(un,
31083			    (cmlb_geom_t *)arg, cap, lbasize);
31084
31085		return (ret);
31086
31087	case TG_GETATTR:
31088		mutex_enter(SD_MUTEX(un));
31089		((tg_attribute_t *)arg)->media_is_writable =
31090		    un->un_f_mmc_writable_media;
31091		((tg_attribute_t *)arg)->media_is_solid_state =
31092		    un->un_f_is_solid_state;
31093		mutex_exit(SD_MUTEX(un));
31094		return (0);
31095	default:
31096		return (ENOTTY);
31097
31098	}
31099}
31100
31101/*
31102 *    Function: sd_ssc_ereport_post
31103 *
31104 * Description: Will be called when SD driver need to post an ereport.
31105 *
31106 *    Context: Kernel thread or interrupt context.
31107 */
31108
31109#define	DEVID_IF_KNOWN(d) "devid", DATA_TYPE_STRING, (d) ? (d) : "unknown"
31110
31111static void
31112sd_ssc_ereport_post(sd_ssc_t *ssc, enum sd_driver_assessment drv_assess)
31113{
31114	int uscsi_path_instance = 0;
31115	uchar_t	uscsi_pkt_reason;
31116	uint32_t uscsi_pkt_state;
31117	uint32_t uscsi_pkt_statistics;
31118	uint64_t uscsi_ena;
31119	uchar_t op_code;
31120	uint8_t *sensep;
31121	union scsi_cdb *cdbp;
31122	uint_t cdblen = 0;
31123	uint_t senlen = 0;
31124	struct sd_lun *un;
31125	dev_info_t *dip;
31126	char *devid;
31127	int ssc_invalid_flags = SSC_FLAGS_INVALID_PKT_REASON |
31128	    SSC_FLAGS_INVALID_STATUS |
31129	    SSC_FLAGS_INVALID_SENSE |
31130	    SSC_FLAGS_INVALID_DATA;
31131	char assessment[16];
31132
31133	ASSERT(ssc != NULL);
31134	ASSERT(ssc->ssc_uscsi_cmd != NULL);
31135	ASSERT(ssc->ssc_uscsi_info != NULL);
31136
31137	un = ssc->ssc_un;
31138	ASSERT(un != NULL);
31139
31140	dip = un->un_sd->sd_dev;
31141
31142	/*
31143	 * Get the devid:
31144	 *	devid will only be passed to non-transport error reports.
31145	 */
31146	devid = DEVI(dip)->devi_devid_str;
31147
31148	/*
31149	 * If we are syncing or dumping, the command will not be executed
31150	 * so we bypass this situation.
31151	 */
31152	if (ddi_in_panic() || (un->un_state == SD_STATE_SUSPENDED) ||
31153	    (un->un_state == SD_STATE_DUMPING))
31154		return;
31155
31156	uscsi_pkt_reason = ssc->ssc_uscsi_info->ui_pkt_reason;
31157	uscsi_path_instance = ssc->ssc_uscsi_cmd->uscsi_path_instance;
31158	uscsi_pkt_state = ssc->ssc_uscsi_info->ui_pkt_state;
31159	uscsi_pkt_statistics = ssc->ssc_uscsi_info->ui_pkt_statistics;
31160	uscsi_ena = ssc->ssc_uscsi_info->ui_ena;
31161
31162	sensep = (uint8_t *)ssc->ssc_uscsi_cmd->uscsi_rqbuf;
31163	cdbp = (union scsi_cdb *)ssc->ssc_uscsi_cmd->uscsi_cdb;
31164
31165	/* In rare cases, EG:DOORLOCK, the cdb could be NULL */
31166	if (cdbp == NULL) {
31167		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
31168		    "sd_ssc_ereport_post meet empty cdb\n");
31169		return;
31170	}
31171
31172	op_code = cdbp->scc_cmd;
31173
31174	cdblen = (int)ssc->ssc_uscsi_cmd->uscsi_cdblen;
31175	senlen = (int)(ssc->ssc_uscsi_cmd->uscsi_rqlen -
31176	    ssc->ssc_uscsi_cmd->uscsi_rqresid);
31177
31178	if (senlen > 0)
31179		ASSERT(sensep != NULL);
31180
31181	/*
31182	 * Initialize drv_assess to corresponding values.
31183	 * SD_FM_DRV_FATAL will be mapped to "fail" or "fatal" depending
31184	 * on the sense-key returned back.
31185	 */
31186	switch (drv_assess) {
31187		case SD_FM_DRV_RECOVERY:
31188			(void) sprintf(assessment, "%s", "recovered");
31189			break;
31190		case SD_FM_DRV_RETRY:
31191			(void) sprintf(assessment, "%s", "retry");
31192			break;
31193		case SD_FM_DRV_NOTICE:
31194			(void) sprintf(assessment, "%s", "info");
31195			break;
31196		case SD_FM_DRV_FATAL:
31197		default:
31198			(void) sprintf(assessment, "%s", "unknown");
31199	}
31200	/*
31201	 * If drv_assess == SD_FM_DRV_RECOVERY, this should be a recovered
31202	 * command, we will post ereport.io.scsi.cmd.disk.recovered.
31203	 * driver-assessment will always be "recovered" here.
31204	 */
31205	if (drv_assess == SD_FM_DRV_RECOVERY) {
31206		scsi_fm_ereport_post(un->un_sd, uscsi_path_instance, NULL,
31207		    "cmd.disk.recovered", uscsi_ena, devid, NULL,
31208		    DDI_NOSLEEP, NULL,
31209		    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0,
31210		    DEVID_IF_KNOWN(devid),
31211		    "driver-assessment", DATA_TYPE_STRING, assessment,
31212		    "op-code", DATA_TYPE_UINT8, op_code,
31213		    "cdb", DATA_TYPE_UINT8_ARRAY,
31214		    cdblen, ssc->ssc_uscsi_cmd->uscsi_cdb,
31215		    "pkt-reason", DATA_TYPE_UINT8, uscsi_pkt_reason,
31216		    "pkt-state", DATA_TYPE_UINT32, uscsi_pkt_state,
31217		    "pkt-stats", DATA_TYPE_UINT32, uscsi_pkt_statistics,
31218		    NULL);
31219		return;
31220	}
31221
31222	/*
31223	 * If there is un-expected/un-decodable data, we should post
31224	 * ereport.io.scsi.cmd.disk.dev.uderr.
31225	 * driver-assessment will be set based on parameter drv_assess.
31226	 * SSC_FLAGS_INVALID_SENSE - invalid sense data sent back.
31227	 * SSC_FLAGS_INVALID_PKT_REASON - invalid pkt-reason encountered.
31228	 * SSC_FLAGS_INVALID_STATUS - invalid stat-code encountered.
31229	 * SSC_FLAGS_INVALID_DATA - invalid data sent back.
31230	 */
31231	if (ssc->ssc_flags & ssc_invalid_flags) {
31232		if (ssc->ssc_flags & SSC_FLAGS_INVALID_SENSE) {
31233			scsi_fm_ereport_post(un->un_sd, uscsi_path_instance,
31234			    NULL, "cmd.disk.dev.uderr", uscsi_ena, devid,
31235			    NULL, DDI_NOSLEEP, NULL,
31236			    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0,
31237			    DEVID_IF_KNOWN(devid),
31238			    "driver-assessment", DATA_TYPE_STRING,
31239			    drv_assess == SD_FM_DRV_FATAL ?
31240			    "fail" : assessment,
31241			    "op-code", DATA_TYPE_UINT8, op_code,
31242			    "cdb", DATA_TYPE_UINT8_ARRAY,
31243			    cdblen, ssc->ssc_uscsi_cmd->uscsi_cdb,
31244			    "pkt-reason", DATA_TYPE_UINT8, uscsi_pkt_reason,
31245			    "pkt-state", DATA_TYPE_UINT32, uscsi_pkt_state,
31246			    "pkt-stats", DATA_TYPE_UINT32,
31247			    uscsi_pkt_statistics,
31248			    "stat-code", DATA_TYPE_UINT8,
31249			    ssc->ssc_uscsi_cmd->uscsi_status,
31250			    "un-decode-info", DATA_TYPE_STRING,
31251			    ssc->ssc_info,
31252			    "un-decode-value", DATA_TYPE_UINT8_ARRAY,
31253			    senlen, sensep,
31254			    NULL);
31255		} else {
31256			/*
31257			 * For other type of invalid data, the
31258			 * un-decode-value field would be empty because the
31259			 * un-decodable content could be seen from upper
31260			 * level payload or inside un-decode-info.
31261			 */
31262			scsi_fm_ereport_post(un->un_sd, uscsi_path_instance,
31263			    NULL,
31264			    "cmd.disk.dev.uderr", uscsi_ena, devid,
31265			    NULL, DDI_NOSLEEP, NULL,
31266			    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0,
31267			    DEVID_IF_KNOWN(devid),
31268			    "driver-assessment", DATA_TYPE_STRING,
31269			    drv_assess == SD_FM_DRV_FATAL ?
31270			    "fail" : assessment,
31271			    "op-code", DATA_TYPE_UINT8, op_code,
31272			    "cdb", DATA_TYPE_UINT8_ARRAY,
31273			    cdblen, ssc->ssc_uscsi_cmd->uscsi_cdb,
31274			    "pkt-reason", DATA_TYPE_UINT8, uscsi_pkt_reason,
31275			    "pkt-state", DATA_TYPE_UINT32, uscsi_pkt_state,
31276			    "pkt-stats", DATA_TYPE_UINT32,
31277			    uscsi_pkt_statistics,
31278			    "stat-code", DATA_TYPE_UINT8,
31279			    ssc->ssc_uscsi_cmd->uscsi_status,
31280			    "un-decode-info", DATA_TYPE_STRING,
31281			    ssc->ssc_info,
31282			    "un-decode-value", DATA_TYPE_UINT8_ARRAY,
31283			    0, NULL,
31284			    NULL);
31285		}
31286		ssc->ssc_flags &= ~ssc_invalid_flags;
31287		return;
31288	}
31289
31290	if (uscsi_pkt_reason != CMD_CMPLT ||
31291	    (ssc->ssc_flags & SSC_FLAGS_TRAN_ABORT)) {
31292		/*
31293		 * pkt-reason != CMD_CMPLT or SSC_FLAGS_TRAN_ABORT was
31294		 * set inside sd_start_cmds due to errors(bad packet or
31295		 * fatal transport error), we should take it as a
31296		 * transport error, so we post ereport.io.scsi.cmd.disk.tran.
31297		 * driver-assessment will be set based on drv_assess.
31298		 * We will set devid to NULL because it is a transport
31299		 * error.
31300		 */
31301		if (ssc->ssc_flags & SSC_FLAGS_TRAN_ABORT)
31302			ssc->ssc_flags &= ~SSC_FLAGS_TRAN_ABORT;
31303
31304		scsi_fm_ereport_post(un->un_sd, uscsi_path_instance, NULL,
31305		    "cmd.disk.tran", uscsi_ena, NULL, NULL, DDI_NOSLEEP, NULL,
31306		    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0,
31307		    DEVID_IF_KNOWN(devid),
31308		    "driver-assessment", DATA_TYPE_STRING,
31309		    drv_assess == SD_FM_DRV_FATAL ? "fail" : assessment,
31310		    "op-code", DATA_TYPE_UINT8, op_code,
31311		    "cdb", DATA_TYPE_UINT8_ARRAY,
31312		    cdblen, ssc->ssc_uscsi_cmd->uscsi_cdb,
31313		    "pkt-reason", DATA_TYPE_UINT8, uscsi_pkt_reason,
31314		    "pkt-state", DATA_TYPE_UINT8, uscsi_pkt_state,
31315		    "pkt-stats", DATA_TYPE_UINT32, uscsi_pkt_statistics,
31316		    NULL);
31317	} else {
31318		/*
31319		 * If we got here, we have a completed command, and we need
31320		 * to further investigate the sense data to see what kind
31321		 * of ereport we should post.
31322		 * Post ereport.io.scsi.cmd.disk.dev.rqs.merr
31323		 * if sense-key == 0x3.
31324		 * Post ereport.io.scsi.cmd.disk.dev.rqs.derr otherwise.
31325		 * driver-assessment will be set based on the parameter
31326		 * drv_assess.
31327		 */
31328		if (senlen > 0) {
31329			/*
31330			 * Here we have sense data available.
31331			 */
31332			uint8_t sense_key;
31333			sense_key = scsi_sense_key(sensep);
31334			if (sense_key == 0x3) {
31335				/*
31336				 * sense-key == 0x3(medium error),
31337				 * driver-assessment should be "fatal" if
31338				 * drv_assess is SD_FM_DRV_FATAL.
31339				 */
31340				scsi_fm_ereport_post(un->un_sd,
31341				    uscsi_path_instance, NULL,
31342				    "cmd.disk.dev.rqs.merr",
31343				    uscsi_ena, devid, NULL, DDI_NOSLEEP, NULL,
31344				    FM_VERSION, DATA_TYPE_UINT8,
31345				    FM_EREPORT_VERS0,
31346				    DEVID_IF_KNOWN(devid),
31347				    "driver-assessment",
31348				    DATA_TYPE_STRING,
31349				    drv_assess == SD_FM_DRV_FATAL ?
31350				    "fatal" : assessment,
31351				    "op-code",
31352				    DATA_TYPE_UINT8, op_code,
31353				    "cdb",
31354				    DATA_TYPE_UINT8_ARRAY, cdblen,
31355				    ssc->ssc_uscsi_cmd->uscsi_cdb,
31356				    "pkt-reason",
31357				    DATA_TYPE_UINT8, uscsi_pkt_reason,
31358				    "pkt-state",
31359				    DATA_TYPE_UINT8, uscsi_pkt_state,
31360				    "pkt-stats",
31361				    DATA_TYPE_UINT32,
31362				    uscsi_pkt_statistics,
31363				    "stat-code",
31364				    DATA_TYPE_UINT8,
31365				    ssc->ssc_uscsi_cmd->uscsi_status,
31366				    "key",
31367				    DATA_TYPE_UINT8,
31368				    scsi_sense_key(sensep),
31369				    "asc",
31370				    DATA_TYPE_UINT8,
31371				    scsi_sense_asc(sensep),
31372				    "ascq",
31373				    DATA_TYPE_UINT8,
31374				    scsi_sense_ascq(sensep),
31375				    "sense-data",
31376				    DATA_TYPE_UINT8_ARRAY,
31377				    senlen, sensep,
31378				    "lba",
31379				    DATA_TYPE_UINT64,
31380				    ssc->ssc_uscsi_info->ui_lba,
31381				    NULL);
31382				} else {
31383					/*
31384					 * if sense-key == 0x4(hardware
31385					 * error), driver-assessment should
31386					 * be "fatal" if drv_assess is
31387					 * SD_FM_DRV_FATAL.
31388					 */
31389					scsi_fm_ereport_post(un->un_sd,
31390					    uscsi_path_instance, NULL,
31391					    "cmd.disk.dev.rqs.derr",
31392					    uscsi_ena, devid,
31393					    NULL, DDI_NOSLEEP, NULL,
31394					    FM_VERSION,
31395					    DATA_TYPE_UINT8, FM_EREPORT_VERS0,
31396					    DEVID_IF_KNOWN(devid),
31397					    "driver-assessment",
31398					    DATA_TYPE_STRING,
31399					    drv_assess == SD_FM_DRV_FATAL ?
31400					    (sense_key == 0x4 ?
31401					    "fatal" : "fail") : assessment,
31402					    "op-code",
31403					    DATA_TYPE_UINT8, op_code,
31404					    "cdb",
31405					    DATA_TYPE_UINT8_ARRAY, cdblen,
31406					    ssc->ssc_uscsi_cmd->uscsi_cdb,
31407					    "pkt-reason",
31408					    DATA_TYPE_UINT8, uscsi_pkt_reason,
31409					    "pkt-state",
31410					    DATA_TYPE_UINT8, uscsi_pkt_state,
31411					    "pkt-stats",
31412					    DATA_TYPE_UINT32,
31413					    uscsi_pkt_statistics,
31414					    "stat-code",
31415					    DATA_TYPE_UINT8,
31416					    ssc->ssc_uscsi_cmd->uscsi_status,
31417					    "key",
31418					    DATA_TYPE_UINT8,
31419					    scsi_sense_key(sensep),
31420					    "asc",
31421					    DATA_TYPE_UINT8,
31422					    scsi_sense_asc(sensep),
31423					    "ascq",
31424					    DATA_TYPE_UINT8,
31425					    scsi_sense_ascq(sensep),
31426					    "sense-data",
31427					    DATA_TYPE_UINT8_ARRAY,
31428					    senlen, sensep,
31429					    NULL);
31430				}
31431		} else {
31432			/*
31433			 * For stat_code == STATUS_GOOD, this is not a
31434			 * hardware error.
31435			 */
31436			if (ssc->ssc_uscsi_cmd->uscsi_status == STATUS_GOOD)
31437				return;
31438
31439			/*
31440			 * Post ereport.io.scsi.cmd.disk.dev.serr if we got the
31441			 * stat-code but with sense data unavailable.
31442			 * driver-assessment will be set based on parameter
31443			 * drv_assess.
31444			 */
31445			scsi_fm_ereport_post(un->un_sd, uscsi_path_instance,
31446			    NULL,
31447			    "cmd.disk.dev.serr", uscsi_ena,
31448			    devid, NULL, DDI_NOSLEEP, NULL,
31449			    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0,
31450			    DEVID_IF_KNOWN(devid),
31451			    "driver-assessment", DATA_TYPE_STRING,
31452			    drv_assess == SD_FM_DRV_FATAL ? "fail" : assessment,
31453			    "op-code", DATA_TYPE_UINT8, op_code,
31454			    "cdb",
31455			    DATA_TYPE_UINT8_ARRAY,
31456			    cdblen, ssc->ssc_uscsi_cmd->uscsi_cdb,
31457			    "pkt-reason",
31458			    DATA_TYPE_UINT8, uscsi_pkt_reason,
31459			    "pkt-state",
31460			    DATA_TYPE_UINT8, uscsi_pkt_state,
31461			    "pkt-stats",
31462			    DATA_TYPE_UINT32, uscsi_pkt_statistics,
31463			    "stat-code",
31464			    DATA_TYPE_UINT8,
31465			    ssc->ssc_uscsi_cmd->uscsi_status,
31466			    NULL);
31467		}
31468	}
31469}
31470
31471/*
31472 *     Function: sd_ssc_extract_info
31473 *
31474 * Description: Extract information available to help generate ereport.
31475 *
31476 *     Context: Kernel thread or interrupt context.
31477 */
31478static void
31479sd_ssc_extract_info(sd_ssc_t *ssc, struct sd_lun *un, struct scsi_pkt *pktp,
31480    struct buf *bp, struct sd_xbuf *xp)
31481{
31482	size_t senlen = 0;
31483	union scsi_cdb *cdbp;
31484	int path_instance;
31485	/*
31486	 * Need scsi_cdb_size array to determine the cdb length.
31487	 */
31488	extern uchar_t	scsi_cdb_size[];
31489
31490	ASSERT(un != NULL);
31491	ASSERT(pktp != NULL);
31492	ASSERT(bp != NULL);
31493	ASSERT(xp != NULL);
31494	ASSERT(ssc != NULL);
31495	ASSERT(mutex_owned(SD_MUTEX(un)));
31496
31497	/*
31498	 * Transfer the cdb buffer pointer here.
31499	 */
31500	cdbp = (union scsi_cdb *)pktp->pkt_cdbp;
31501
31502	ssc->ssc_uscsi_cmd->uscsi_cdblen = scsi_cdb_size[GETGROUP(cdbp)];
31503	ssc->ssc_uscsi_cmd->uscsi_cdb = (caddr_t)cdbp;
31504
31505	/*
31506	 * Transfer the sense data buffer pointer if sense data is available,
31507	 * calculate the sense data length first.
31508	 */
31509	if ((xp->xb_sense_state & STATE_XARQ_DONE) ||
31510	    (xp->xb_sense_state & STATE_ARQ_DONE)) {
31511		/*
31512		 * For arq case, we will enter here.
31513		 */
31514		if (xp->xb_sense_state & STATE_XARQ_DONE) {
31515			senlen = MAX_SENSE_LENGTH - xp->xb_sense_resid;
31516		} else {
31517			senlen = SENSE_LENGTH;
31518		}
31519	} else {
31520		/*
31521		 * For non-arq case, we will enter this branch.
31522		 */
31523		if (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK &&
31524		    (xp->xb_sense_state & STATE_XFERRED_DATA)) {
31525			senlen = SENSE_LENGTH - xp->xb_sense_resid;
31526		}
31527
31528	}
31529
31530	ssc->ssc_uscsi_cmd->uscsi_rqlen = (senlen & 0xff);
31531	ssc->ssc_uscsi_cmd->uscsi_rqresid = 0;
31532	ssc->ssc_uscsi_cmd->uscsi_rqbuf = (caddr_t)xp->xb_sense_data;
31533
31534	ssc->ssc_uscsi_cmd->uscsi_status = ((*(pktp)->pkt_scbp) & STATUS_MASK);
31535
31536	/*
31537	 * Only transfer path_instance when scsi_pkt was properly allocated.
31538	 */
31539	path_instance = pktp->pkt_path_instance;
31540	if (scsi_pkt_allocated_correctly(pktp) && path_instance)
31541		ssc->ssc_uscsi_cmd->uscsi_path_instance = path_instance;
31542	else
31543		ssc->ssc_uscsi_cmd->uscsi_path_instance = 0;
31544
31545	/*
31546	 * Copy in the other fields we may need when posting ereport.
31547	 */
31548	ssc->ssc_uscsi_info->ui_pkt_reason = pktp->pkt_reason;
31549	ssc->ssc_uscsi_info->ui_pkt_state = pktp->pkt_state;
31550	ssc->ssc_uscsi_info->ui_pkt_statistics = pktp->pkt_statistics;
31551	ssc->ssc_uscsi_info->ui_lba = (uint64_t)SD_GET_BLKNO(bp);
31552
31553	/*
31554	 * For partially read/write command, we will not create ena
31555	 * in case of a successful command be reconized as recovered.
31556	 */
31557	if ((pktp->pkt_reason == CMD_CMPLT) &&
31558	    (ssc->ssc_uscsi_cmd->uscsi_status == STATUS_GOOD) &&
31559	    (senlen == 0)) {
31560		return;
31561	}
31562
31563	/*
31564	 * To associate ereports of a single command execution flow, we
31565	 * need a shared ena for a specific command.
31566	 */
31567	if (xp->xb_ena == 0)
31568		xp->xb_ena = fm_ena_generate(0, FM_ENA_FMT1);
31569	ssc->ssc_uscsi_info->ui_ena = xp->xb_ena;
31570}
31571
31572
31573/*
31574 *     Function: sd_check_solid_state
31575 *
31576 * Description: Query the optional INQUIRY VPD page 0xb1. If the device
31577 *              supports VPD page 0xb1, sd examines the MEDIUM ROTATION
31578 *              RATE. If the MEDIUM ROTATION RATE is 1, sd assumes the
31579 *              device is a solid state drive.
31580 *
31581 *     Context: Kernel thread or interrupt context.
31582 */
31583
31584static void
31585sd_check_solid_state(sd_ssc_t *ssc)
31586{
31587	int		rval		= 0;
31588	uchar_t		*inqb1		= NULL;
31589	size_t		inqb1_len	= MAX_INQUIRY_SIZE;
31590	size_t		inqb1_resid	= 0;
31591	struct sd_lun	*un;
31592
31593	ASSERT(ssc != NULL);
31594	un = ssc->ssc_un;
31595	ASSERT(un != NULL);
31596	ASSERT(!mutex_owned(SD_MUTEX(un)));
31597
31598	mutex_enter(SD_MUTEX(un));
31599	un->un_f_is_solid_state = FALSE;
31600
31601	if (ISCD(un)) {
31602		mutex_exit(SD_MUTEX(un));
31603		return;
31604	}
31605
31606	if (sd_check_vpd_page_support(ssc) == 0 &&
31607	    un->un_vpd_page_mask & SD_VPD_DEV_CHARACTER_PG) {
31608		mutex_exit(SD_MUTEX(un));
31609		/* collect page b1 data */
31610		inqb1 = kmem_zalloc(inqb1_len, KM_SLEEP);
31611
31612		rval = sd_send_scsi_INQUIRY(ssc, inqb1, inqb1_len,
31613		    0x01, 0xB1, &inqb1_resid);
31614
31615		if (rval == 0 && (inqb1_len - inqb1_resid > 5)) {
31616			SD_TRACE(SD_LOG_COMMON, un,
31617			    "sd_check_solid_state: \
31618			    successfully get VPD page: %x \
31619			    PAGE LENGTH: %x BYTE 4: %x \
31620			    BYTE 5: %x", inqb1[1], inqb1[3], inqb1[4],
31621			    inqb1[5]);
31622
31623			mutex_enter(SD_MUTEX(un));
31624			/*
31625			 * Check the MEDIUM ROTATION RATE. If it is set
31626			 * to 1, the device is a solid state drive.
31627			 */
31628			if (inqb1[4] == 0 && inqb1[5] == 1) {
31629				un->un_f_is_solid_state = TRUE;
31630			}
31631			mutex_exit(SD_MUTEX(un));
31632		} else if (rval != 0) {
31633			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
31634		}
31635
31636		kmem_free(inqb1, inqb1_len);
31637	} else {
31638		mutex_exit(SD_MUTEX(un));
31639	}
31640}
31641
31642/*
31643 *	Function: sd_check_emulation_mode
31644 *
31645 *   Description: Check whether the SSD is at emulation mode
31646 *		  by issuing READ_CAPACITY_16 to see whether
31647 *		  we can get physical block size of the drive.
31648 *
31649 *	 Context: Kernel thread or interrupt context.
31650 */
31651
31652static void
31653sd_check_emulation_mode(sd_ssc_t *ssc)
31654{
31655	int		rval = 0;
31656	uint64_t	capacity;
31657	uint_t		lbasize;
31658	uint_t		pbsize;
31659	int		i;
31660	int		devid_len;
31661	struct sd_lun	*un;
31662
31663	ASSERT(ssc != NULL);
31664	un = ssc->ssc_un;
31665	ASSERT(un != NULL);
31666	ASSERT(!mutex_owned(SD_MUTEX(un)));
31667
31668	mutex_enter(SD_MUTEX(un));
31669	if (ISCD(un)) {
31670		mutex_exit(SD_MUTEX(un));
31671		return;
31672	}
31673
31674	if (un->un_f_descr_format_supported) {
31675		mutex_exit(SD_MUTEX(un));
31676		rval = sd_send_scsi_READ_CAPACITY_16(ssc, &capacity, &lbasize,
31677		    &pbsize, SD_PATH_DIRECT);
31678		mutex_enter(SD_MUTEX(un));
31679
31680		if (rval != 0) {
31681			un->un_phy_blocksize = DEV_BSIZE;
31682		} else {
31683			if (!ISP2(pbsize % DEV_BSIZE) || pbsize == 0) {
31684				un->un_phy_blocksize = DEV_BSIZE;
31685			} else {
31686				un->un_phy_blocksize = pbsize;
31687			}
31688		}
31689	}
31690
31691	for (i = 0; i < sd_flash_dev_table_size; i++) {
31692		devid_len = (int)strlen(sd_flash_dev_table[i]);
31693		if (sd_sdconf_id_match(un, sd_flash_dev_table[i], devid_len)
31694		    == SD_SUCCESS) {
31695			un->un_phy_blocksize = SSD_SECSIZE;
31696			if (un->un_f_is_solid_state &&
31697			    un->un_phy_blocksize != un->un_tgt_blocksize)
31698				un->un_f_enable_rmw = TRUE;
31699		}
31700	}
31701
31702	mutex_exit(SD_MUTEX(un));
31703}
31704