sd.c revision 3138:7bbdcbfa4cd5
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28/*
29 * SCSI disk target driver.
30 */
31#include <sys/scsi/scsi.h>
32#include <sys/dkbad.h>
33#include <sys/dklabel.h>
34#include <sys/dkio.h>
35#include <sys/fdio.h>
36#include <sys/cdio.h>
37#include <sys/mhd.h>
38#include <sys/vtoc.h>
39#include <sys/dktp/fdisk.h>
40#include <sys/file.h>
41#include <sys/stat.h>
42#include <sys/kstat.h>
43#include <sys/vtrace.h>
44#include <sys/note.h>
45#include <sys/thread.h>
46#include <sys/proc.h>
47#include <sys/efi_partition.h>
48#include <sys/var.h>
49#include <sys/aio_req.h>
50
51#ifdef __lock_lint
52#define	_LP64
53#define	__amd64
54#endif
55
56#if (defined(__fibre))
57/* Note: is there a leadville version of the following? */
58#include <sys/fc4/fcal_linkapp.h>
59#endif
60#include <sys/taskq.h>
61#include <sys/uuid.h>
62#include <sys/byteorder.h>
63#include <sys/sdt.h>
64
65#include "sd_xbuf.h"
66
67#include <sys/scsi/targets/sddef.h>
68
69
70/*
71 * Loadable module info.
72 */
73#if (defined(__fibre))
74#define	SD_MODULE_NAME	"SCSI SSA/FCAL Disk Driver %I%"
75char _depends_on[]	= "misc/scsi drv/fcp";
76#else
77#define	SD_MODULE_NAME	"SCSI Disk Driver %I%"
78char _depends_on[]	= "misc/scsi";
79#endif
80
81/*
82 * Define the interconnect type, to allow the driver to distinguish
83 * between parallel SCSI (sd) and fibre channel (ssd) behaviors.
84 *
85 * This is really for backward compatability. In the future, the driver
86 * should actually check the "interconnect-type" property as reported by
87 * the HBA; however at present this property is not defined by all HBAs,
88 * so we will use this #define (1) to permit the driver to run in
89 * backward-compatability mode; and (2) to print a notification message
90 * if an FC HBA does not support the "interconnect-type" property.  The
91 * behavior of the driver will be to assume parallel SCSI behaviors unless
92 * the "interconnect-type" property is defined by the HBA **AND** has a
93 * value of either INTERCONNECT_FIBRE, INTERCONNECT_SSA, or
94 * INTERCONNECT_FABRIC, in which case the driver will assume Fibre
95 * Channel behaviors (as per the old ssd).  (Note that the
96 * INTERCONNECT_1394 and INTERCONNECT_USB types are not supported and
97 * will result in the driver assuming parallel SCSI behaviors.)
98 *
99 * (see common/sys/scsi/impl/services.h)
100 *
101 * Note: For ssd semantics, don't use INTERCONNECT_FABRIC as the default
102 * since some FC HBAs may already support that, and there is some code in
103 * the driver that already looks for it.  Using INTERCONNECT_FABRIC as the
104 * default would confuse that code, and besides things should work fine
105 * anyways if the FC HBA already reports INTERCONNECT_FABRIC for the
106 * "interconnect_type" property.
107 *
108 * Notes for off-by-1 workaround:
109 * -----------------------------
110 *
111 *    SCSI READ_CAPACITY command returns the LBA number of the
112 *    last logical block, but sd once treated this number as
113 *    disks' capacity on x86 platform. And LBAs are addressed
114 *    based 0. So the last block was lost on x86 platform.
115 *
116 *    Now, we remove this workaround. In order for present sd
117 *    driver to work with disks which are labeled/partitioned
118 *    via previous sd, we add workaround as follows:
119 *
120 *    1) Locate backup EFI label: sd searches the next to last
121 *       block for legacy backup EFI label. If fails, it will
122 *       turn to the last block for backup EFI label;
123 *    2) Clear backup EFI label: sd first search the last block
124 *       for backup EFI label, and will search the next to last
125 *       block only if failed for the last block.
126 *    3) Calculate geometry: refer to sd_convert_geometry(), If
127 *       capacity increasing by 1 causes disks' capacity to cross
128 *       over the limits in table CHS_values, geometry info will
129 *       change. This will raise an issue: In case that primary
130 *       VTOC label is destroyed, format commandline can restore
131 *       it via backup VTOC labels. And format locates backup VTOC
132 *       labels by use of geometry from sd driver. So changing
133 *       geometry will prevent format from finding backup VTOC
134 *       labels. To eliminate this side effect for compatibility,
135 *       sd uses (capacity -1) to calculate geometry;
136 *    4) 1TB disks: some important data structures use 32-bit
137 *       signed long/int (for example, daddr_t), so that sd doesn't
138 *       support a disk with capacity larger than 1TB on 32-bit
139 *       platform. However, for exactly 1TB disk, it was treated as
140 *       (1T - 512)B in the past, and could have valid solaris
141 *       partitions. To workaround this, if an exactly 1TB disk has
142 *       solaris fdisk partition, it will be allowed to work with sd.
143 */
144#if (defined(__fibre))
145#define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_FIBRE
146#else
147#define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_PARALLEL
148#endif
149
150/*
151 * The name of the driver, established from the module name in _init.
152 */
153static	char *sd_label			= NULL;
154
155/*
156 * Driver name is unfortunately prefixed on some driver.conf properties.
157 */
158#if (defined(__fibre))
159#define	sd_max_xfer_size		ssd_max_xfer_size
160#define	sd_config_list			ssd_config_list
161static	char *sd_max_xfer_size		= "ssd_max_xfer_size";
162static	char *sd_config_list		= "ssd-config-list";
163#else
164static	char *sd_max_xfer_size		= "sd_max_xfer_size";
165static	char *sd_config_list		= "sd-config-list";
166#endif
167
168/*
169 * Driver global variables
170 */
171
172#if (defined(__fibre))
173/*
174 * These #defines are to avoid namespace collisions that occur because this
175 * code is currently used to compile two seperate driver modules: sd and ssd.
176 * All global variables need to be treated this way (even if declared static)
177 * in order to allow the debugger to resolve the names properly.
178 * It is anticipated that in the near future the ssd module will be obsoleted,
179 * at which time this namespace issue should go away.
180 */
181#define	sd_state			ssd_state
182#define	sd_io_time			ssd_io_time
183#define	sd_failfast_enable		ssd_failfast_enable
184#define	sd_ua_retry_count		ssd_ua_retry_count
185#define	sd_report_pfa			ssd_report_pfa
186#define	sd_max_throttle			ssd_max_throttle
187#define	sd_min_throttle			ssd_min_throttle
188#define	sd_rot_delay			ssd_rot_delay
189
190#define	sd_retry_on_reservation_conflict	\
191					ssd_retry_on_reservation_conflict
192#define	sd_reinstate_resv_delay		ssd_reinstate_resv_delay
193#define	sd_resv_conflict_name		ssd_resv_conflict_name
194
195#define	sd_component_mask		ssd_component_mask
196#define	sd_level_mask			ssd_level_mask
197#define	sd_debug_un			ssd_debug_un
198#define	sd_error_level			ssd_error_level
199
200#define	sd_xbuf_active_limit		ssd_xbuf_active_limit
201#define	sd_xbuf_reserve_limit		ssd_xbuf_reserve_limit
202
203#define	sd_tr				ssd_tr
204#define	sd_reset_throttle_timeout	ssd_reset_throttle_timeout
205#define	sd_qfull_throttle_timeout	ssd_qfull_throttle_timeout
206#define	sd_qfull_throttle_enable	ssd_qfull_throttle_enable
207#define	sd_check_media_time		ssd_check_media_time
208#define	sd_wait_cmds_complete		ssd_wait_cmds_complete
209#define	sd_label_mutex			ssd_label_mutex
210#define	sd_detach_mutex			ssd_detach_mutex
211#define	sd_log_buf			ssd_log_buf
212#define	sd_log_mutex			ssd_log_mutex
213
214#define	sd_disk_table			ssd_disk_table
215#define	sd_disk_table_size		ssd_disk_table_size
216#define	sd_sense_mutex			ssd_sense_mutex
217#define	sd_cdbtab			ssd_cdbtab
218
219#define	sd_cb_ops			ssd_cb_ops
220#define	sd_ops				ssd_ops
221#define	sd_additional_codes		ssd_additional_codes
222
223#define	sd_minor_data			ssd_minor_data
224#define	sd_minor_data_efi		ssd_minor_data_efi
225
226#define	sd_tq				ssd_tq
227#define	sd_wmr_tq			ssd_wmr_tq
228#define	sd_taskq_name			ssd_taskq_name
229#define	sd_wmr_taskq_name		ssd_wmr_taskq_name
230#define	sd_taskq_minalloc		ssd_taskq_minalloc
231#define	sd_taskq_maxalloc		ssd_taskq_maxalloc
232
233#define	sd_dump_format_string		ssd_dump_format_string
234
235#define	sd_iostart_chain		ssd_iostart_chain
236#define	sd_iodone_chain			ssd_iodone_chain
237
238#define	sd_pm_idletime			ssd_pm_idletime
239
240#define	sd_force_pm_supported		ssd_force_pm_supported
241
242#define	sd_dtype_optical_bind		ssd_dtype_optical_bind
243
244#endif
245
246
247#ifdef	SDDEBUG
248int	sd_force_pm_supported		= 0;
249#endif	/* SDDEBUG */
250
251void *sd_state				= NULL;
252int sd_io_time				= SD_IO_TIME;
253int sd_failfast_enable			= 1;
254int sd_ua_retry_count			= SD_UA_RETRY_COUNT;
255int sd_report_pfa			= 1;
256int sd_max_throttle			= SD_MAX_THROTTLE;
257int sd_min_throttle			= SD_MIN_THROTTLE;
258int sd_rot_delay			= 4; /* Default 4ms Rotation delay */
259int sd_qfull_throttle_enable		= TRUE;
260
261int sd_retry_on_reservation_conflict	= 1;
262int sd_reinstate_resv_delay		= SD_REINSTATE_RESV_DELAY;
263_NOTE(SCHEME_PROTECTS_DATA("safe sharing", sd_reinstate_resv_delay))
264
265static int sd_dtype_optical_bind	= -1;
266
267/* Note: the following is not a bug, it really is "sd_" and not "ssd_" */
268static	char *sd_resv_conflict_name	= "sd_retry_on_reservation_conflict";
269
270/*
271 * Global data for debug logging. To enable debug printing, sd_component_mask
272 * and sd_level_mask should be set to the desired bit patterns as outlined in
273 * sddef.h.
274 */
275uint_t	sd_component_mask		= 0x0;
276uint_t	sd_level_mask			= 0x0;
277struct	sd_lun *sd_debug_un		= NULL;
278uint_t	sd_error_level			= SCSI_ERR_RETRYABLE;
279
280/* Note: these may go away in the future... */
281static uint32_t	sd_xbuf_active_limit	= 512;
282static uint32_t sd_xbuf_reserve_limit	= 16;
283
284static struct sd_resv_reclaim_request	sd_tr = { NULL, NULL, NULL, 0, 0, 0 };
285
286/*
287 * Timer value used to reset the throttle after it has been reduced
288 * (typically in response to TRAN_BUSY or STATUS_QFULL)
289 */
290static int sd_reset_throttle_timeout	= SD_RESET_THROTTLE_TIMEOUT;
291static int sd_qfull_throttle_timeout	= SD_QFULL_THROTTLE_TIMEOUT;
292
293/*
294 * Interval value associated with the media change scsi watch.
295 */
296static int sd_check_media_time		= 3000000;
297
298/*
299 * Wait value used for in progress operations during a DDI_SUSPEND
300 */
301static int sd_wait_cmds_complete	= SD_WAIT_CMDS_COMPLETE;
302
303/*
304 * sd_label_mutex protects a static buffer used in the disk label
305 * component of the driver
306 */
307static kmutex_t sd_label_mutex;
308
309/*
310 * sd_detach_mutex protects un_layer_count, un_detach_count, and
311 * un_opens_in_progress in the sd_lun structure.
312 */
313static kmutex_t sd_detach_mutex;
314
315_NOTE(MUTEX_PROTECTS_DATA(sd_detach_mutex,
316	sd_lun::{un_layer_count un_detach_count un_opens_in_progress}))
317
318/*
319 * Global buffer and mutex for debug logging
320 */
321static char	sd_log_buf[1024];
322static kmutex_t	sd_log_mutex;
323
324/*
325 * Structs and globals for recording attached lun information.
326 * This maintains a chain. Each node in the chain represents a SCSI controller.
327 * The structure records the number of luns attached to each target connected
328 * with the controller.
329 * For parallel scsi device only.
330 */
331struct sd_scsi_hba_tgt_lun {
332	struct sd_scsi_hba_tgt_lun	*next;
333	dev_info_t			*pdip;
334	int				nlun[NTARGETS_WIDE];
335};
336
337/*
338 * Flag to indicate the lun is attached or detached
339 */
340#define	SD_SCSI_LUN_ATTACH	0
341#define	SD_SCSI_LUN_DETACH	1
342
343static kmutex_t	sd_scsi_target_lun_mutex;
344static struct sd_scsi_hba_tgt_lun	*sd_scsi_target_lun_head = NULL;
345
346_NOTE(MUTEX_PROTECTS_DATA(sd_scsi_target_lun_mutex,
347    sd_scsi_hba_tgt_lun::next sd_scsi_hba_tgt_lun::pdip))
348
349_NOTE(MUTEX_PROTECTS_DATA(sd_scsi_target_lun_mutex,
350    sd_scsi_target_lun_head))
351
352/*
353 * "Smart" Probe Caching structs, globals, #defines, etc.
354 * For parallel scsi and non-self-identify device only.
355 */
356
357/*
358 * The following resources and routines are implemented to support
359 * "smart" probing, which caches the scsi_probe() results in an array,
360 * in order to help avoid long probe times.
361 */
362struct sd_scsi_probe_cache {
363	struct	sd_scsi_probe_cache	*next;
364	dev_info_t	*pdip;
365	int		cache[NTARGETS_WIDE];
366};
367
368static kmutex_t	sd_scsi_probe_cache_mutex;
369static struct	sd_scsi_probe_cache *sd_scsi_probe_cache_head = NULL;
370
371/*
372 * Really we only need protection on the head of the linked list, but
373 * better safe than sorry.
374 */
375_NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
376    sd_scsi_probe_cache::next sd_scsi_probe_cache::pdip))
377
378_NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
379    sd_scsi_probe_cache_head))
380
381
382/*
383 * Vendor specific data name property declarations
384 */
385
386#if defined(__fibre) || defined(__i386) ||defined(__amd64)
387
388static sd_tunables seagate_properties = {
389	SEAGATE_THROTTLE_VALUE,
390	0,
391	0,
392	0,
393	0,
394	0,
395	0,
396	0,
397	0
398};
399
400
401static sd_tunables fujitsu_properties = {
402	FUJITSU_THROTTLE_VALUE,
403	0,
404	0,
405	0,
406	0,
407	0,
408	0,
409	0,
410	0
411};
412
413static sd_tunables ibm_properties = {
414	IBM_THROTTLE_VALUE,
415	0,
416	0,
417	0,
418	0,
419	0,
420	0,
421	0,
422	0
423};
424
425static sd_tunables purple_properties = {
426	PURPLE_THROTTLE_VALUE,
427	0,
428	0,
429	PURPLE_BUSY_RETRIES,
430	PURPLE_RESET_RETRY_COUNT,
431	PURPLE_RESERVE_RELEASE_TIME,
432	0,
433	0,
434	0
435};
436
437static sd_tunables sve_properties = {
438	SVE_THROTTLE_VALUE,
439	0,
440	0,
441	SVE_BUSY_RETRIES,
442	SVE_RESET_RETRY_COUNT,
443	SVE_RESERVE_RELEASE_TIME,
444	SVE_MIN_THROTTLE_VALUE,
445	SVE_DISKSORT_DISABLED_FLAG,
446	0
447};
448
449static sd_tunables maserati_properties = {
450	0,
451	0,
452	0,
453	0,
454	0,
455	0,
456	0,
457	MASERATI_DISKSORT_DISABLED_FLAG,
458	MASERATI_LUN_RESET_ENABLED_FLAG
459};
460
461static sd_tunables pirus_properties = {
462	PIRUS_THROTTLE_VALUE,
463	0,
464	PIRUS_NRR_COUNT,
465	PIRUS_BUSY_RETRIES,
466	PIRUS_RESET_RETRY_COUNT,
467	0,
468	PIRUS_MIN_THROTTLE_VALUE,
469	PIRUS_DISKSORT_DISABLED_FLAG,
470	PIRUS_LUN_RESET_ENABLED_FLAG
471};
472
473#endif
474
475#if (defined(__sparc) && !defined(__fibre)) || \
476	(defined(__i386) || defined(__amd64))
477
478
479static sd_tunables elite_properties = {
480	ELITE_THROTTLE_VALUE,
481	0,
482	0,
483	0,
484	0,
485	0,
486	0,
487	0,
488	0
489};
490
491static sd_tunables st31200n_properties = {
492	ST31200N_THROTTLE_VALUE,
493	0,
494	0,
495	0,
496	0,
497	0,
498	0,
499	0,
500	0
501};
502
503#endif /* Fibre or not */
504
505static sd_tunables lsi_properties_scsi = {
506	LSI_THROTTLE_VALUE,
507	0,
508	LSI_NOTREADY_RETRIES,
509	0,
510	0,
511	0,
512	0,
513	0,
514	0
515};
516
517static sd_tunables symbios_properties = {
518	SYMBIOS_THROTTLE_VALUE,
519	0,
520	SYMBIOS_NOTREADY_RETRIES,
521	0,
522	0,
523	0,
524	0,
525	0,
526	0
527};
528
529static sd_tunables lsi_properties = {
530	0,
531	0,
532	LSI_NOTREADY_RETRIES,
533	0,
534	0,
535	0,
536	0,
537	0,
538	0
539};
540
541static sd_tunables lsi_oem_properties = {
542	0,
543	0,
544	LSI_OEM_NOTREADY_RETRIES,
545	0,
546	0,
547	0,
548	0,
549	0,
550	0
551};
552
553
554
555#if (defined(SD_PROP_TST))
556
557#define	SD_TST_CTYPE_VAL	CTYPE_CDROM
558#define	SD_TST_THROTTLE_VAL	16
559#define	SD_TST_NOTREADY_VAL	12
560#define	SD_TST_BUSY_VAL		60
561#define	SD_TST_RST_RETRY_VAL	36
562#define	SD_TST_RSV_REL_TIME	60
563
564static sd_tunables tst_properties = {
565	SD_TST_THROTTLE_VAL,
566	SD_TST_CTYPE_VAL,
567	SD_TST_NOTREADY_VAL,
568	SD_TST_BUSY_VAL,
569	SD_TST_RST_RETRY_VAL,
570	SD_TST_RSV_REL_TIME,
571	0,
572	0,
573	0
574};
575#endif
576
577/* This is similiar to the ANSI toupper implementation */
578#define	SD_TOUPPER(C)	(((C) >= 'a' && (C) <= 'z') ? (C) - 'a' + 'A' : (C))
579
580/*
581 * Static Driver Configuration Table
582 *
583 * This is the table of disks which need throttle adjustment (or, perhaps
584 * something else as defined by the flags at a future time.)  device_id
585 * is a string consisting of concatenated vid (vendor), pid (product/model)
586 * and revision strings as defined in the scsi_inquiry structure.  Offsets of
587 * the parts of the string are as defined by the sizes in the scsi_inquiry
588 * structure.  Device type is searched as far as the device_id string is
589 * defined.  Flags defines which values are to be set in the driver from the
590 * properties list.
591 *
592 * Entries below which begin and end with a "*" are a special case.
593 * These do not have a specific vendor, and the string which follows
594 * can appear anywhere in the 16 byte PID portion of the inquiry data.
595 *
596 * Entries below which begin and end with a " " (blank) are a special
597 * case. The comparison function will treat multiple consecutive blanks
598 * as equivalent to a single blank. For example, this causes a
599 * sd_disk_table entry of " NEC CDROM " to match a device's id string
600 * of  "NEC       CDROM".
601 *
602 * Note: The MD21 controller type has been obsoleted.
603 *	 ST318202F is a Legacy device
604 *	 MAM3182FC, MAM3364FC, MAM3738FC do not appear to have ever been
605 *	 made with an FC connection. The entries here are a legacy.
606 */
607static sd_disk_config_t sd_disk_table[] = {
608#if defined(__fibre) || defined(__i386) || defined(__amd64)
609	{ "SEAGATE ST34371FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
610	{ "SEAGATE ST19171FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
611	{ "SEAGATE ST39102FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
612	{ "SEAGATE ST39103FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
613	{ "SEAGATE ST118273F", SD_CONF_BSET_THROTTLE, &seagate_properties },
614	{ "SEAGATE ST318202F", SD_CONF_BSET_THROTTLE, &seagate_properties },
615	{ "SEAGATE ST318203F", SD_CONF_BSET_THROTTLE, &seagate_properties },
616	{ "SEAGATE ST136403F", SD_CONF_BSET_THROTTLE, &seagate_properties },
617	{ "SEAGATE ST318304F", SD_CONF_BSET_THROTTLE, &seagate_properties },
618	{ "SEAGATE ST336704F", SD_CONF_BSET_THROTTLE, &seagate_properties },
619	{ "SEAGATE ST373405F", SD_CONF_BSET_THROTTLE, &seagate_properties },
620	{ "SEAGATE ST336605F", SD_CONF_BSET_THROTTLE, &seagate_properties },
621	{ "SEAGATE ST336752F", SD_CONF_BSET_THROTTLE, &seagate_properties },
622	{ "SEAGATE ST318452F", SD_CONF_BSET_THROTTLE, &seagate_properties },
623	{ "FUJITSU MAG3091F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
624	{ "FUJITSU MAG3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
625	{ "FUJITSU MAA3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
626	{ "FUJITSU MAF3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
627	{ "FUJITSU MAL3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
628	{ "FUJITSU MAL3738F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
629	{ "FUJITSU MAM3182FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
630	{ "FUJITSU MAM3364FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
631	{ "FUJITSU MAM3738FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
632	{ "IBM     DDYFT1835",  SD_CONF_BSET_THROTTLE, &ibm_properties },
633	{ "IBM     DDYFT3695",  SD_CONF_BSET_THROTTLE, &ibm_properties },
634	{ "IBM     IC35LF2D2",  SD_CONF_BSET_THROTTLE, &ibm_properties },
635	{ "IBM     IC35LF2PR",  SD_CONF_BSET_THROTTLE, &ibm_properties },
636	{ "IBM     3526",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
637	{ "IBM     3542",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
638	{ "IBM     3552",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
639	{ "IBM     1722",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
640	{ "IBM     1742",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
641	{ "IBM     1815",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
642	{ "IBM     FAStT",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
643	{ "IBM     1814",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
644	{ "IBM     1814-200",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
645	{ "LSI     INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
646	{ "ENGENIO INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
647	{ "SGI     TP",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
648	{ "SGI     IS",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
649	{ "*CSM100_*",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
650	{ "*CSM200_*",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
651	{ "Fujitsu SX300",	SD_CONF_BSET_THROTTLE,  &lsi_oem_properties },
652	{ "LSI",		SD_CONF_BSET_NRR_COUNT, &lsi_properties },
653	{ "SUN     T3", SD_CONF_BSET_THROTTLE |
654			SD_CONF_BSET_BSY_RETRY_COUNT|
655			SD_CONF_BSET_RST_RETRIES|
656			SD_CONF_BSET_RSV_REL_TIME,
657		&purple_properties },
658	{ "SUN     SESS01", SD_CONF_BSET_THROTTLE |
659		SD_CONF_BSET_BSY_RETRY_COUNT|
660		SD_CONF_BSET_RST_RETRIES|
661		SD_CONF_BSET_RSV_REL_TIME|
662		SD_CONF_BSET_MIN_THROTTLE|
663		SD_CONF_BSET_DISKSORT_DISABLED,
664		&sve_properties },
665	{ "SUN     T4", SD_CONF_BSET_THROTTLE |
666			SD_CONF_BSET_BSY_RETRY_COUNT|
667			SD_CONF_BSET_RST_RETRIES|
668			SD_CONF_BSET_RSV_REL_TIME,
669		&purple_properties },
670	{ "SUN     SVE01", SD_CONF_BSET_DISKSORT_DISABLED |
671		SD_CONF_BSET_LUN_RESET_ENABLED,
672		&maserati_properties },
673	{ "SUN     SE6920", SD_CONF_BSET_THROTTLE |
674		SD_CONF_BSET_NRR_COUNT|
675		SD_CONF_BSET_BSY_RETRY_COUNT|
676		SD_CONF_BSET_RST_RETRIES|
677		SD_CONF_BSET_MIN_THROTTLE|
678		SD_CONF_BSET_DISKSORT_DISABLED|
679		SD_CONF_BSET_LUN_RESET_ENABLED,
680		&pirus_properties },
681	{ "SUN     SE6940", SD_CONF_BSET_THROTTLE |
682		SD_CONF_BSET_NRR_COUNT|
683		SD_CONF_BSET_BSY_RETRY_COUNT|
684		SD_CONF_BSET_RST_RETRIES|
685		SD_CONF_BSET_MIN_THROTTLE|
686		SD_CONF_BSET_DISKSORT_DISABLED|
687		SD_CONF_BSET_LUN_RESET_ENABLED,
688		&pirus_properties },
689	{ "SUN     StorageTek 6920", SD_CONF_BSET_THROTTLE |
690		SD_CONF_BSET_NRR_COUNT|
691		SD_CONF_BSET_BSY_RETRY_COUNT|
692		SD_CONF_BSET_RST_RETRIES|
693		SD_CONF_BSET_MIN_THROTTLE|
694		SD_CONF_BSET_DISKSORT_DISABLED|
695		SD_CONF_BSET_LUN_RESET_ENABLED,
696		&pirus_properties },
697	{ "SUN     StorageTek 6940", SD_CONF_BSET_THROTTLE |
698		SD_CONF_BSET_NRR_COUNT|
699		SD_CONF_BSET_BSY_RETRY_COUNT|
700		SD_CONF_BSET_RST_RETRIES|
701		SD_CONF_BSET_MIN_THROTTLE|
702		SD_CONF_BSET_DISKSORT_DISABLED|
703		SD_CONF_BSET_LUN_RESET_ENABLED,
704		&pirus_properties },
705	{ "SUN     PSX1000", SD_CONF_BSET_THROTTLE |
706		SD_CONF_BSET_NRR_COUNT|
707		SD_CONF_BSET_BSY_RETRY_COUNT|
708		SD_CONF_BSET_RST_RETRIES|
709		SD_CONF_BSET_MIN_THROTTLE|
710		SD_CONF_BSET_DISKSORT_DISABLED|
711		SD_CONF_BSET_LUN_RESET_ENABLED,
712		&pirus_properties },
713	{ "SUN     SE6330", SD_CONF_BSET_THROTTLE |
714		SD_CONF_BSET_NRR_COUNT|
715		SD_CONF_BSET_BSY_RETRY_COUNT|
716		SD_CONF_BSET_RST_RETRIES|
717		SD_CONF_BSET_MIN_THROTTLE|
718		SD_CONF_BSET_DISKSORT_DISABLED|
719		SD_CONF_BSET_LUN_RESET_ENABLED,
720		&pirus_properties },
721	{ "STK     OPENstorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
722	{ "STK     OpenStorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
723	{ "STK     BladeCtlr",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
724	{ "STK     FLEXLINE",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
725	{ "SYMBIOS", SD_CONF_BSET_NRR_COUNT, &symbios_properties },
726#endif /* fibre or NON-sparc platforms */
727#if ((defined(__sparc) && !defined(__fibre)) ||\
728	(defined(__i386) || defined(__amd64)))
729	{ "SEAGATE ST42400N", SD_CONF_BSET_THROTTLE, &elite_properties },
730	{ "SEAGATE ST31200N", SD_CONF_BSET_THROTTLE, &st31200n_properties },
731	{ "SEAGATE ST41600N", SD_CONF_BSET_TUR_CHECK, NULL },
732	{ "CONNER  CP30540",  SD_CONF_BSET_NOCACHE,  NULL },
733	{ "*SUN0104*", SD_CONF_BSET_FAB_DEVID, NULL },
734	{ "*SUN0207*", SD_CONF_BSET_FAB_DEVID, NULL },
735	{ "*SUN0327*", SD_CONF_BSET_FAB_DEVID, NULL },
736	{ "*SUN0340*", SD_CONF_BSET_FAB_DEVID, NULL },
737	{ "*SUN0424*", SD_CONF_BSET_FAB_DEVID, NULL },
738	{ "*SUN0669*", SD_CONF_BSET_FAB_DEVID, NULL },
739	{ "*SUN1.0G*", SD_CONF_BSET_FAB_DEVID, NULL },
740	{ "SYMBIOS INF-01-00       ", SD_CONF_BSET_FAB_DEVID, NULL },
741	{ "SYMBIOS", SD_CONF_BSET_THROTTLE|SD_CONF_BSET_NRR_COUNT,
742	    &symbios_properties },
743	{ "LSI", SD_CONF_BSET_THROTTLE | SD_CONF_BSET_NRR_COUNT,
744	    &lsi_properties_scsi },
745#if defined(__i386) || defined(__amd64)
746	{ " NEC CD-ROM DRIVE:260 ", (SD_CONF_BSET_PLAYMSF_BCD
747				    | SD_CONF_BSET_READSUB_BCD
748				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
749				    | SD_CONF_BSET_NO_READ_HEADER
750				    | SD_CONF_BSET_READ_CD_XD4), NULL },
751
752	{ " NEC CD-ROM DRIVE:270 ", (SD_CONF_BSET_PLAYMSF_BCD
753				    | SD_CONF_BSET_READSUB_BCD
754				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
755				    | SD_CONF_BSET_NO_READ_HEADER
756				    | SD_CONF_BSET_READ_CD_XD4), NULL },
757#endif /* __i386 || __amd64 */
758#endif /* sparc NON-fibre or NON-sparc platforms */
759
760#if (defined(SD_PROP_TST))
761	{ "VENDOR  PRODUCT ", (SD_CONF_BSET_THROTTLE
762				| SD_CONF_BSET_CTYPE
763				| SD_CONF_BSET_NRR_COUNT
764				| SD_CONF_BSET_FAB_DEVID
765				| SD_CONF_BSET_NOCACHE
766				| SD_CONF_BSET_BSY_RETRY_COUNT
767				| SD_CONF_BSET_PLAYMSF_BCD
768				| SD_CONF_BSET_READSUB_BCD
769				| SD_CONF_BSET_READ_TOC_TRK_BCD
770				| SD_CONF_BSET_READ_TOC_ADDR_BCD
771				| SD_CONF_BSET_NO_READ_HEADER
772				| SD_CONF_BSET_READ_CD_XD4
773				| SD_CONF_BSET_RST_RETRIES
774				| SD_CONF_BSET_RSV_REL_TIME
775				| SD_CONF_BSET_TUR_CHECK), &tst_properties},
776#endif
777};
778
779static const int sd_disk_table_size =
780	sizeof (sd_disk_table)/ sizeof (sd_disk_config_t);
781
782
783/*
784 * Return codes of sd_uselabel().
785 */
786#define	SD_LABEL_IS_VALID		0
787#define	SD_LABEL_IS_INVALID		1
788
789#define	SD_INTERCONNECT_PARALLEL	0
790#define	SD_INTERCONNECT_FABRIC		1
791#define	SD_INTERCONNECT_FIBRE		2
792#define	SD_INTERCONNECT_SSA		3
793#define	SD_INTERCONNECT_SATA		4
794#define	SD_IS_PARALLEL_SCSI(un)		\
795	((un)->un_interconnect_type == SD_INTERCONNECT_PARALLEL)
796#define	SD_IS_SERIAL(un)		\
797	((un)->un_interconnect_type == SD_INTERCONNECT_SATA)
798
799/*
800 * Definitions used by device id registration routines
801 */
802#define	VPD_HEAD_OFFSET		3	/* size of head for vpd page */
803#define	VPD_PAGE_LENGTH		3	/* offset for pge length data */
804#define	VPD_MODE_PAGE		1	/* offset into vpd pg for "page code" */
805#define	WD_NODE			7	/* the whole disk minor */
806
807static kmutex_t sd_sense_mutex = {0};
808
809/*
810 * Macros for updates of the driver state
811 */
812#define	New_state(un, s)        \
813	(un)->un_last_state = (un)->un_state, (un)->un_state = (s)
814#define	Restore_state(un)	\
815	{ uchar_t tmp = (un)->un_last_state; New_state((un), tmp); }
816
817static struct sd_cdbinfo sd_cdbtab[] = {
818	{ CDB_GROUP0, 0x00,	   0x1FFFFF,   0xFF,	    },
819	{ CDB_GROUP1, SCMD_GROUP1, 0xFFFFFFFF, 0xFFFF,	    },
820	{ CDB_GROUP5, SCMD_GROUP5, 0xFFFFFFFF, 0xFFFFFFFF,  },
821	{ CDB_GROUP4, SCMD_GROUP4, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFF, },
822};
823
824/*
825 * Specifies the number of seconds that must have elapsed since the last
826 * cmd. has completed for a device to be declared idle to the PM framework.
827 */
828static int sd_pm_idletime = 1;
829
830/*
831 * Internal function prototypes
832 */
833
834#if (defined(__fibre))
835/*
836 * These #defines are to avoid namespace collisions that occur because this
837 * code is currently used to compile two seperate driver modules: sd and ssd.
838 * All function names need to be treated this way (even if declared static)
839 * in order to allow the debugger to resolve the names properly.
840 * It is anticipated that in the near future the ssd module will be obsoleted,
841 * at which time this ugliness should go away.
842 */
843#define	sd_log_trace			ssd_log_trace
844#define	sd_log_info			ssd_log_info
845#define	sd_log_err			ssd_log_err
846#define	sdprobe				ssdprobe
847#define	sdinfo				ssdinfo
848#define	sd_prop_op			ssd_prop_op
849#define	sd_scsi_probe_cache_init	ssd_scsi_probe_cache_init
850#define	sd_scsi_probe_cache_fini	ssd_scsi_probe_cache_fini
851#define	sd_scsi_clear_probe_cache	ssd_scsi_clear_probe_cache
852#define	sd_scsi_probe_with_cache	ssd_scsi_probe_with_cache
853#define	sd_scsi_target_lun_init		ssd_scsi_target_lun_init
854#define	sd_scsi_target_lun_fini		ssd_scsi_target_lun_fini
855#define	sd_scsi_get_target_lun_count	ssd_scsi_get_target_lun_count
856#define	sd_scsi_update_lun_on_target	ssd_scsi_update_lun_on_target
857#define	sd_spin_up_unit			ssd_spin_up_unit
858#define	sd_enable_descr_sense		ssd_enable_descr_sense
859#define	sd_reenable_dsense_task		ssd_reenable_dsense_task
860#define	sd_set_mmc_caps			ssd_set_mmc_caps
861#define	sd_read_unit_properties		ssd_read_unit_properties
862#define	sd_process_sdconf_file		ssd_process_sdconf_file
863#define	sd_process_sdconf_table		ssd_process_sdconf_table
864#define	sd_sdconf_id_match		ssd_sdconf_id_match
865#define	sd_blank_cmp			ssd_blank_cmp
866#define	sd_chk_vers1_data		ssd_chk_vers1_data
867#define	sd_set_vers1_properties		ssd_set_vers1_properties
868#define	sd_validate_geometry		ssd_validate_geometry
869
870#if defined(_SUNOS_VTOC_16)
871#define	sd_convert_geometry		ssd_convert_geometry
872#endif
873
874#define	sd_resync_geom_caches		ssd_resync_geom_caches
875#define	sd_read_fdisk			ssd_read_fdisk
876#define	sd_get_physical_geometry	ssd_get_physical_geometry
877#define	sd_get_virtual_geometry		ssd_get_virtual_geometry
878#define	sd_update_block_info		ssd_update_block_info
879#define	sd_swap_efi_gpt			ssd_swap_efi_gpt
880#define	sd_swap_efi_gpe			ssd_swap_efi_gpe
881#define	sd_validate_efi			ssd_validate_efi
882#define	sd_use_efi			ssd_use_efi
883#define	sd_uselabel			ssd_uselabel
884#define	sd_build_default_label		ssd_build_default_label
885#define	sd_has_max_chs_vals		ssd_has_max_chs_vals
886#define	sd_inq_fill			ssd_inq_fill
887#define	sd_register_devid		ssd_register_devid
888#define	sd_get_devid_block		ssd_get_devid_block
889#define	sd_get_devid			ssd_get_devid
890#define	sd_create_devid			ssd_create_devid
891#define	sd_write_deviceid		ssd_write_deviceid
892#define	sd_check_vpd_page_support	ssd_check_vpd_page_support
893#define	sd_setup_pm			ssd_setup_pm
894#define	sd_create_pm_components		ssd_create_pm_components
895#define	sd_ddi_suspend			ssd_ddi_suspend
896#define	sd_ddi_pm_suspend		ssd_ddi_pm_suspend
897#define	sd_ddi_resume			ssd_ddi_resume
898#define	sd_ddi_pm_resume		ssd_ddi_pm_resume
899#define	sdpower				ssdpower
900#define	sdattach			ssdattach
901#define	sddetach			ssddetach
902#define	sd_unit_attach			ssd_unit_attach
903#define	sd_unit_detach			ssd_unit_detach
904#define	sd_set_unit_attributes		ssd_set_unit_attributes
905#define	sd_create_minor_nodes		ssd_create_minor_nodes
906#define	sd_create_errstats		ssd_create_errstats
907#define	sd_set_errstats			ssd_set_errstats
908#define	sd_set_pstats			ssd_set_pstats
909#define	sddump				ssddump
910#define	sd_scsi_poll			ssd_scsi_poll
911#define	sd_send_polled_RQS		ssd_send_polled_RQS
912#define	sd_ddi_scsi_poll		ssd_ddi_scsi_poll
913#define	sd_init_event_callbacks		ssd_init_event_callbacks
914#define	sd_event_callback		ssd_event_callback
915#define	sd_cache_control		ssd_cache_control
916#define	sd_get_write_cache_enabled	ssd_get_write_cache_enabled
917#define	sd_make_device			ssd_make_device
918#define	sdopen				ssdopen
919#define	sdclose				ssdclose
920#define	sd_ready_and_valid		ssd_ready_and_valid
921#define	sdmin				ssdmin
922#define	sdread				ssdread
923#define	sdwrite				ssdwrite
924#define	sdaread				ssdaread
925#define	sdawrite			ssdawrite
926#define	sdstrategy			ssdstrategy
927#define	sdioctl				ssdioctl
928#define	sd_mapblockaddr_iostart		ssd_mapblockaddr_iostart
929#define	sd_mapblocksize_iostart		ssd_mapblocksize_iostart
930#define	sd_checksum_iostart		ssd_checksum_iostart
931#define	sd_checksum_uscsi_iostart	ssd_checksum_uscsi_iostart
932#define	sd_pm_iostart			ssd_pm_iostart
933#define	sd_core_iostart			ssd_core_iostart
934#define	sd_mapblockaddr_iodone		ssd_mapblockaddr_iodone
935#define	sd_mapblocksize_iodone		ssd_mapblocksize_iodone
936#define	sd_checksum_iodone		ssd_checksum_iodone
937#define	sd_checksum_uscsi_iodone	ssd_checksum_uscsi_iodone
938#define	sd_pm_iodone			ssd_pm_iodone
939#define	sd_initpkt_for_buf		ssd_initpkt_for_buf
940#define	sd_destroypkt_for_buf		ssd_destroypkt_for_buf
941#define	sd_setup_rw_pkt			ssd_setup_rw_pkt
942#define	sd_setup_next_rw_pkt		ssd_setup_next_rw_pkt
943#define	sd_buf_iodone			ssd_buf_iodone
944#define	sd_uscsi_strategy		ssd_uscsi_strategy
945#define	sd_initpkt_for_uscsi		ssd_initpkt_for_uscsi
946#define	sd_destroypkt_for_uscsi		ssd_destroypkt_for_uscsi
947#define	sd_uscsi_iodone			ssd_uscsi_iodone
948#define	sd_xbuf_strategy		ssd_xbuf_strategy
949#define	sd_xbuf_init			ssd_xbuf_init
950#define	sd_pm_entry			ssd_pm_entry
951#define	sd_pm_exit			ssd_pm_exit
952
953#define	sd_pm_idletimeout_handler	ssd_pm_idletimeout_handler
954#define	sd_pm_timeout_handler		ssd_pm_timeout_handler
955
956#define	sd_add_buf_to_waitq		ssd_add_buf_to_waitq
957#define	sdintr				ssdintr
958#define	sd_start_cmds			ssd_start_cmds
959#define	sd_send_scsi_cmd		ssd_send_scsi_cmd
960#define	sd_bioclone_alloc		ssd_bioclone_alloc
961#define	sd_bioclone_free		ssd_bioclone_free
962#define	sd_shadow_buf_alloc		ssd_shadow_buf_alloc
963#define	sd_shadow_buf_free		ssd_shadow_buf_free
964#define	sd_print_transport_rejected_message	\
965					ssd_print_transport_rejected_message
966#define	sd_retry_command		ssd_retry_command
967#define	sd_set_retry_bp			ssd_set_retry_bp
968#define	sd_send_request_sense_command	ssd_send_request_sense_command
969#define	sd_start_retry_command		ssd_start_retry_command
970#define	sd_start_direct_priority_command	\
971					ssd_start_direct_priority_command
972#define	sd_return_failed_command	ssd_return_failed_command
973#define	sd_return_failed_command_no_restart	\
974					ssd_return_failed_command_no_restart
975#define	sd_return_command		ssd_return_command
976#define	sd_sync_with_callback		ssd_sync_with_callback
977#define	sdrunout			ssdrunout
978#define	sd_mark_rqs_busy		ssd_mark_rqs_busy
979#define	sd_mark_rqs_idle		ssd_mark_rqs_idle
980#define	sd_reduce_throttle		ssd_reduce_throttle
981#define	sd_restore_throttle		ssd_restore_throttle
982#define	sd_print_incomplete_msg		ssd_print_incomplete_msg
983#define	sd_init_cdb_limits		ssd_init_cdb_limits
984#define	sd_pkt_status_good		ssd_pkt_status_good
985#define	sd_pkt_status_check_condition	ssd_pkt_status_check_condition
986#define	sd_pkt_status_busy		ssd_pkt_status_busy
987#define	sd_pkt_status_reservation_conflict	\
988					ssd_pkt_status_reservation_conflict
989#define	sd_pkt_status_qfull		ssd_pkt_status_qfull
990#define	sd_handle_request_sense		ssd_handle_request_sense
991#define	sd_handle_auto_request_sense	ssd_handle_auto_request_sense
992#define	sd_print_sense_failed_msg	ssd_print_sense_failed_msg
993#define	sd_validate_sense_data		ssd_validate_sense_data
994#define	sd_decode_sense			ssd_decode_sense
995#define	sd_print_sense_msg		ssd_print_sense_msg
996#define	sd_sense_key_no_sense		ssd_sense_key_no_sense
997#define	sd_sense_key_recoverable_error	ssd_sense_key_recoverable_error
998#define	sd_sense_key_not_ready		ssd_sense_key_not_ready
999#define	sd_sense_key_medium_or_hardware_error	\
1000					ssd_sense_key_medium_or_hardware_error
1001#define	sd_sense_key_illegal_request	ssd_sense_key_illegal_request
1002#define	sd_sense_key_unit_attention	ssd_sense_key_unit_attention
1003#define	sd_sense_key_fail_command	ssd_sense_key_fail_command
1004#define	sd_sense_key_blank_check	ssd_sense_key_blank_check
1005#define	sd_sense_key_aborted_command	ssd_sense_key_aborted_command
1006#define	sd_sense_key_default		ssd_sense_key_default
1007#define	sd_print_retry_msg		ssd_print_retry_msg
1008#define	sd_print_cmd_incomplete_msg	ssd_print_cmd_incomplete_msg
1009#define	sd_pkt_reason_cmd_incomplete	ssd_pkt_reason_cmd_incomplete
1010#define	sd_pkt_reason_cmd_tran_err	ssd_pkt_reason_cmd_tran_err
1011#define	sd_pkt_reason_cmd_reset		ssd_pkt_reason_cmd_reset
1012#define	sd_pkt_reason_cmd_aborted	ssd_pkt_reason_cmd_aborted
1013#define	sd_pkt_reason_cmd_timeout	ssd_pkt_reason_cmd_timeout
1014#define	sd_pkt_reason_cmd_unx_bus_free	ssd_pkt_reason_cmd_unx_bus_free
1015#define	sd_pkt_reason_cmd_tag_reject	ssd_pkt_reason_cmd_tag_reject
1016#define	sd_pkt_reason_default		ssd_pkt_reason_default
1017#define	sd_reset_target			ssd_reset_target
1018#define	sd_start_stop_unit_callback	ssd_start_stop_unit_callback
1019#define	sd_start_stop_unit_task		ssd_start_stop_unit_task
1020#define	sd_taskq_create			ssd_taskq_create
1021#define	sd_taskq_delete			ssd_taskq_delete
1022#define	sd_media_change_task		ssd_media_change_task
1023#define	sd_handle_mchange		ssd_handle_mchange
1024#define	sd_send_scsi_DOORLOCK		ssd_send_scsi_DOORLOCK
1025#define	sd_send_scsi_READ_CAPACITY	ssd_send_scsi_READ_CAPACITY
1026#define	sd_send_scsi_READ_CAPACITY_16	ssd_send_scsi_READ_CAPACITY_16
1027#define	sd_send_scsi_GET_CONFIGURATION	ssd_send_scsi_GET_CONFIGURATION
1028#define	sd_send_scsi_feature_GET_CONFIGURATION	\
1029					sd_send_scsi_feature_GET_CONFIGURATION
1030#define	sd_send_scsi_START_STOP_UNIT	ssd_send_scsi_START_STOP_UNIT
1031#define	sd_send_scsi_INQUIRY		ssd_send_scsi_INQUIRY
1032#define	sd_send_scsi_TEST_UNIT_READY	ssd_send_scsi_TEST_UNIT_READY
1033#define	sd_send_scsi_PERSISTENT_RESERVE_IN	\
1034					ssd_send_scsi_PERSISTENT_RESERVE_IN
1035#define	sd_send_scsi_PERSISTENT_RESERVE_OUT	\
1036					ssd_send_scsi_PERSISTENT_RESERVE_OUT
1037#define	sd_send_scsi_SYNCHRONIZE_CACHE	ssd_send_scsi_SYNCHRONIZE_CACHE
1038#define	sd_send_scsi_SYNCHRONIZE_CACHE_biodone	\
1039					ssd_send_scsi_SYNCHRONIZE_CACHE_biodone
1040#define	sd_send_scsi_MODE_SENSE		ssd_send_scsi_MODE_SENSE
1041#define	sd_send_scsi_MODE_SELECT	ssd_send_scsi_MODE_SELECT
1042#define	sd_send_scsi_RDWR		ssd_send_scsi_RDWR
1043#define	sd_send_scsi_LOG_SENSE		ssd_send_scsi_LOG_SENSE
1044#define	sd_alloc_rqs			ssd_alloc_rqs
1045#define	sd_free_rqs			ssd_free_rqs
1046#define	sd_dump_memory			ssd_dump_memory
1047#define	sd_uscsi_ioctl			ssd_uscsi_ioctl
1048#define	sd_get_media_info		ssd_get_media_info
1049#define	sd_dkio_ctrl_info		ssd_dkio_ctrl_info
1050#define	sd_dkio_get_geometry		ssd_dkio_get_geometry
1051#define	sd_dkio_set_geometry		ssd_dkio_set_geometry
1052#define	sd_dkio_get_partition		ssd_dkio_get_partition
1053#define	sd_dkio_set_partition		ssd_dkio_set_partition
1054#define	sd_dkio_partition		ssd_dkio_partition
1055#define	sd_dkio_get_vtoc		ssd_dkio_get_vtoc
1056#define	sd_dkio_get_efi			ssd_dkio_get_efi
1057#define	sd_build_user_vtoc		ssd_build_user_vtoc
1058#define	sd_dkio_set_vtoc		ssd_dkio_set_vtoc
1059#define	sd_dkio_set_efi			ssd_dkio_set_efi
1060#define	sd_build_label_vtoc		ssd_build_label_vtoc
1061#define	sd_write_label			ssd_write_label
1062#define	sd_clear_vtoc			ssd_clear_vtoc
1063#define	sd_clear_efi			ssd_clear_efi
1064#define	sd_get_tunables_from_conf	ssd_get_tunables_from_conf
1065#define	sd_setup_next_xfer		ssd_setup_next_xfer
1066#define	sd_dkio_get_temp		ssd_dkio_get_temp
1067#define	sd_dkio_get_mboot		ssd_dkio_get_mboot
1068#define	sd_dkio_set_mboot		ssd_dkio_set_mboot
1069#define	sd_setup_default_geometry	ssd_setup_default_geometry
1070#define	sd_update_fdisk_and_vtoc	ssd_update_fdisk_and_vtoc
1071#define	sd_check_mhd			ssd_check_mhd
1072#define	sd_mhd_watch_cb			ssd_mhd_watch_cb
1073#define	sd_mhd_watch_incomplete		ssd_mhd_watch_incomplete
1074#define	sd_sname			ssd_sname
1075#define	sd_mhd_resvd_recover		ssd_mhd_resvd_recover
1076#define	sd_resv_reclaim_thread		ssd_resv_reclaim_thread
1077#define	sd_take_ownership		ssd_take_ownership
1078#define	sd_reserve_release		ssd_reserve_release
1079#define	sd_rmv_resv_reclaim_req		ssd_rmv_resv_reclaim_req
1080#define	sd_mhd_reset_notify_cb		ssd_mhd_reset_notify_cb
1081#define	sd_persistent_reservation_in_read_keys	\
1082					ssd_persistent_reservation_in_read_keys
1083#define	sd_persistent_reservation_in_read_resv	\
1084					ssd_persistent_reservation_in_read_resv
1085#define	sd_mhdioc_takeown		ssd_mhdioc_takeown
1086#define	sd_mhdioc_failfast		ssd_mhdioc_failfast
1087#define	sd_mhdioc_release		ssd_mhdioc_release
1088#define	sd_mhdioc_register_devid	ssd_mhdioc_register_devid
1089#define	sd_mhdioc_inkeys		ssd_mhdioc_inkeys
1090#define	sd_mhdioc_inresv		ssd_mhdioc_inresv
1091#define	sr_change_blkmode		ssr_change_blkmode
1092#define	sr_change_speed			ssr_change_speed
1093#define	sr_atapi_change_speed		ssr_atapi_change_speed
1094#define	sr_pause_resume			ssr_pause_resume
1095#define	sr_play_msf			ssr_play_msf
1096#define	sr_play_trkind			ssr_play_trkind
1097#define	sr_read_all_subcodes		ssr_read_all_subcodes
1098#define	sr_read_subchannel		ssr_read_subchannel
1099#define	sr_read_tocentry		ssr_read_tocentry
1100#define	sr_read_tochdr			ssr_read_tochdr
1101#define	sr_read_cdda			ssr_read_cdda
1102#define	sr_read_cdxa			ssr_read_cdxa
1103#define	sr_read_mode1			ssr_read_mode1
1104#define	sr_read_mode2			ssr_read_mode2
1105#define	sr_read_cd_mode2		ssr_read_cd_mode2
1106#define	sr_sector_mode			ssr_sector_mode
1107#define	sr_eject			ssr_eject
1108#define	sr_ejected			ssr_ejected
1109#define	sr_check_wp			ssr_check_wp
1110#define	sd_check_media			ssd_check_media
1111#define	sd_media_watch_cb		ssd_media_watch_cb
1112#define	sd_delayed_cv_broadcast		ssd_delayed_cv_broadcast
1113#define	sr_volume_ctrl			ssr_volume_ctrl
1114#define	sr_read_sony_session_offset	ssr_read_sony_session_offset
1115#define	sd_log_page_supported		ssd_log_page_supported
1116#define	sd_check_for_writable_cd	ssd_check_for_writable_cd
1117#define	sd_wm_cache_constructor		ssd_wm_cache_constructor
1118#define	sd_wm_cache_destructor		ssd_wm_cache_destructor
1119#define	sd_range_lock			ssd_range_lock
1120#define	sd_get_range			ssd_get_range
1121#define	sd_free_inlist_wmap		ssd_free_inlist_wmap
1122#define	sd_range_unlock			ssd_range_unlock
1123#define	sd_read_modify_write_task	ssd_read_modify_write_task
1124#define	sddump_do_read_of_rmw		ssddump_do_read_of_rmw
1125
1126#define	sd_iostart_chain		ssd_iostart_chain
1127#define	sd_iodone_chain			ssd_iodone_chain
1128#define	sd_initpkt_map			ssd_initpkt_map
1129#define	sd_destroypkt_map		ssd_destroypkt_map
1130#define	sd_chain_type_map		ssd_chain_type_map
1131#define	sd_chain_index_map		ssd_chain_index_map
1132
1133#define	sd_failfast_flushctl		ssd_failfast_flushctl
1134#define	sd_failfast_flushq		ssd_failfast_flushq
1135#define	sd_failfast_flushq_callback	ssd_failfast_flushq_callback
1136
1137#define	sd_is_lsi			ssd_is_lsi
1138
1139#endif	/* #if (defined(__fibre)) */
1140
1141
1142int _init(void);
1143int _fini(void);
1144int _info(struct modinfo *modinfop);
1145
1146/*PRINTFLIKE3*/
1147static void sd_log_trace(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1148/*PRINTFLIKE3*/
1149static void sd_log_info(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1150/*PRINTFLIKE3*/
1151static void sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1152
1153static int sdprobe(dev_info_t *devi);
1154static int sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg,
1155    void **result);
1156static int sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op,
1157    int mod_flags, char *name, caddr_t valuep, int *lengthp);
1158
1159/*
1160 * Smart probe for parallel scsi
1161 */
1162static void sd_scsi_probe_cache_init(void);
1163static void sd_scsi_probe_cache_fini(void);
1164static void sd_scsi_clear_probe_cache(void);
1165static int  sd_scsi_probe_with_cache(struct scsi_device *devp, int (*fn)());
1166
1167/*
1168 * Attached luns on target for parallel scsi
1169 */
1170static void sd_scsi_target_lun_init(void);
1171static void sd_scsi_target_lun_fini(void);
1172static int  sd_scsi_get_target_lun_count(dev_info_t *dip, int target);
1173static void sd_scsi_update_lun_on_target(dev_info_t *dip, int target, int flag);
1174
1175static int	sd_spin_up_unit(struct sd_lun *un);
1176#ifdef _LP64
1177static void	sd_enable_descr_sense(struct sd_lun *un);
1178static void	sd_reenable_dsense_task(void *arg);
1179#endif /* _LP64 */
1180
1181static void	sd_set_mmc_caps(struct sd_lun *un);
1182
1183static void sd_read_unit_properties(struct sd_lun *un);
1184static int  sd_process_sdconf_file(struct sd_lun *un);
1185static void sd_get_tunables_from_conf(struct sd_lun *un, int flags,
1186    int *data_list, sd_tunables *values);
1187static void sd_process_sdconf_table(struct sd_lun *un);
1188static int  sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen);
1189static int  sd_blank_cmp(struct sd_lun *un, char *id, int idlen);
1190static int  sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
1191	int list_len, char *dataname_ptr);
1192static void sd_set_vers1_properties(struct sd_lun *un, int flags,
1193    sd_tunables *prop_list);
1194static int  sd_validate_geometry(struct sd_lun *un, int path_flag);
1195
1196#if defined(_SUNOS_VTOC_16)
1197static void sd_convert_geometry(uint64_t capacity, struct dk_geom *un_g);
1198#endif
1199
1200static void sd_resync_geom_caches(struct sd_lun *un, int capacity, int lbasize,
1201	int path_flag);
1202static int  sd_read_fdisk(struct sd_lun *un, uint_t capacity, int lbasize,
1203	int path_flag);
1204static void sd_get_physical_geometry(struct sd_lun *un,
1205	struct geom_cache *pgeom_p, int capacity, int lbasize, int path_flag);
1206static void sd_get_virtual_geometry(struct sd_lun *un, int capacity,
1207	int lbasize);
1208static int  sd_uselabel(struct sd_lun *un, struct dk_label *l, int path_flag);
1209static void sd_swap_efi_gpt(efi_gpt_t *);
1210static void sd_swap_efi_gpe(int nparts, efi_gpe_t *);
1211static int sd_validate_efi(efi_gpt_t *);
1212static int sd_use_efi(struct sd_lun *, int);
1213static void sd_build_default_label(struct sd_lun *un);
1214
1215#if defined(_FIRMWARE_NEEDS_FDISK)
1216static int  sd_has_max_chs_vals(struct ipart *fdp);
1217#endif
1218static void sd_inq_fill(char *p, int l, char *s);
1219
1220
1221static void sd_register_devid(struct sd_lun *un, dev_info_t *devi,
1222    int reservation_flag);
1223static daddr_t  sd_get_devid_block(struct sd_lun *un);
1224static int  sd_get_devid(struct sd_lun *un);
1225static int  sd_get_serialnum(struct sd_lun *un, uchar_t *wwn, int *len);
1226static ddi_devid_t sd_create_devid(struct sd_lun *un);
1227static int  sd_write_deviceid(struct sd_lun *un);
1228static int  sd_get_devid_page(struct sd_lun *un, uchar_t *wwn, int *len);
1229static int  sd_check_vpd_page_support(struct sd_lun *un);
1230
1231static void sd_setup_pm(struct sd_lun *un, dev_info_t *devi);
1232static void sd_create_pm_components(dev_info_t *devi, struct sd_lun *un);
1233
1234static int  sd_ddi_suspend(dev_info_t *devi);
1235static int  sd_ddi_pm_suspend(struct sd_lun *un);
1236static int  sd_ddi_resume(dev_info_t *devi);
1237static int  sd_ddi_pm_resume(struct sd_lun *un);
1238static int  sdpower(dev_info_t *devi, int component, int level);
1239
1240static int  sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd);
1241static int  sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd);
1242static int  sd_unit_attach(dev_info_t *devi);
1243static int  sd_unit_detach(dev_info_t *devi);
1244
1245static void sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi);
1246static int  sd_create_minor_nodes(struct sd_lun *un, dev_info_t *devi);
1247static void sd_create_errstats(struct sd_lun *un, int instance);
1248static void sd_set_errstats(struct sd_lun *un);
1249static void sd_set_pstats(struct sd_lun *un);
1250
1251static int  sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk);
1252static int  sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pkt);
1253static int  sd_send_polled_RQS(struct sd_lun *un);
1254static int  sd_ddi_scsi_poll(struct scsi_pkt *pkt);
1255
1256#if (defined(__fibre))
1257/*
1258 * Event callbacks (photon)
1259 */
1260static void sd_init_event_callbacks(struct sd_lun *un);
1261static void  sd_event_callback(dev_info_t *, ddi_eventcookie_t, void *, void *);
1262#endif
1263
1264/*
1265 * Defines for sd_cache_control
1266 */
1267
1268#define	SD_CACHE_ENABLE		1
1269#define	SD_CACHE_DISABLE	0
1270#define	SD_CACHE_NOCHANGE	-1
1271
1272static int   sd_cache_control(struct sd_lun *un, int rcd_flag, int wce_flag);
1273static int   sd_get_write_cache_enabled(struct sd_lun *un, int *is_enabled);
1274static dev_t sd_make_device(dev_info_t *devi);
1275
1276static void  sd_update_block_info(struct sd_lun *un, uint32_t lbasize,
1277	uint64_t capacity);
1278
1279/*
1280 * Driver entry point functions.
1281 */
1282static int  sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p);
1283static int  sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p);
1284static int  sd_ready_and_valid(struct sd_lun *un);
1285
1286static void sdmin(struct buf *bp);
1287static int sdread(dev_t dev, struct uio *uio, cred_t *cred_p);
1288static int sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p);
1289static int sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1290static int sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1291
1292static int sdstrategy(struct buf *bp);
1293static int sdioctl(dev_t, int, intptr_t, int, cred_t *, int *);
1294
1295/*
1296 * Function prototypes for layering functions in the iostart chain.
1297 */
1298static void sd_mapblockaddr_iostart(int index, struct sd_lun *un,
1299	struct buf *bp);
1300static void sd_mapblocksize_iostart(int index, struct sd_lun *un,
1301	struct buf *bp);
1302static void sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp);
1303static void sd_checksum_uscsi_iostart(int index, struct sd_lun *un,
1304	struct buf *bp);
1305static void sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp);
1306static void sd_core_iostart(int index, struct sd_lun *un, struct buf *bp);
1307
1308/*
1309 * Function prototypes for layering functions in the iodone chain.
1310 */
1311static void sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp);
1312static void sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp);
1313static void sd_mapblockaddr_iodone(int index, struct sd_lun *un,
1314	struct buf *bp);
1315static void sd_mapblocksize_iodone(int index, struct sd_lun *un,
1316	struct buf *bp);
1317static void sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp);
1318static void sd_checksum_uscsi_iodone(int index, struct sd_lun *un,
1319	struct buf *bp);
1320static void sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp);
1321
1322/*
1323 * Prototypes for functions to support buf(9S) based IO.
1324 */
1325static void sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg);
1326static int sd_initpkt_for_buf(struct buf *, struct scsi_pkt **);
1327static void sd_destroypkt_for_buf(struct buf *);
1328static int sd_setup_rw_pkt(struct sd_lun *un, struct scsi_pkt **pktpp,
1329	struct buf *bp, int flags,
1330	int (*callback)(caddr_t), caddr_t callback_arg,
1331	diskaddr_t lba, uint32_t blockcount);
1332#if defined(__i386) || defined(__amd64)
1333static int sd_setup_next_rw_pkt(struct sd_lun *un, struct scsi_pkt *pktp,
1334	struct buf *bp, diskaddr_t lba, uint32_t blockcount);
1335#endif /* defined(__i386) || defined(__amd64) */
1336
1337/*
1338 * Prototypes for functions to support USCSI IO.
1339 */
1340static int sd_uscsi_strategy(struct buf *bp);
1341static int sd_initpkt_for_uscsi(struct buf *, struct scsi_pkt **);
1342static void sd_destroypkt_for_uscsi(struct buf *);
1343
1344static void sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
1345	uchar_t chain_type, void *pktinfop);
1346
1347static int  sd_pm_entry(struct sd_lun *un);
1348static void sd_pm_exit(struct sd_lun *un);
1349
1350static void sd_pm_idletimeout_handler(void *arg);
1351
1352/*
1353 * sd_core internal functions (used at the sd_core_io layer).
1354 */
1355static void sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp);
1356static void sdintr(struct scsi_pkt *pktp);
1357static void sd_start_cmds(struct sd_lun *un, struct buf *immed_bp);
1358
1359static int sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd,
1360	enum uio_seg cdbspace, enum uio_seg dataspace, enum uio_seg rqbufspace,
1361	int path_flag);
1362
1363static struct buf *sd_bioclone_alloc(struct buf *bp, size_t datalen,
1364	daddr_t blkno, int (*func)(struct buf *));
1365static struct buf *sd_shadow_buf_alloc(struct buf *bp, size_t datalen,
1366	uint_t bflags, daddr_t blkno, int (*func)(struct buf *));
1367static void sd_bioclone_free(struct buf *bp);
1368static void sd_shadow_buf_free(struct buf *bp);
1369
1370static void sd_print_transport_rejected_message(struct sd_lun *un,
1371	struct sd_xbuf *xp, int code);
1372static void sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp,
1373    void *arg, int code);
1374static void sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp,
1375    void *arg, int code);
1376static void sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp,
1377    void *arg, int code);
1378
1379static void sd_retry_command(struct sd_lun *un, struct buf *bp,
1380	int retry_check_flag,
1381	void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp,
1382		int c),
1383	void *user_arg, int failure_code,  clock_t retry_delay,
1384	void (*statp)(kstat_io_t *));
1385
1386static void sd_set_retry_bp(struct sd_lun *un, struct buf *bp,
1387	clock_t retry_delay, void (*statp)(kstat_io_t *));
1388
1389static void sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
1390	struct scsi_pkt *pktp);
1391static void sd_start_retry_command(void *arg);
1392static void sd_start_direct_priority_command(void *arg);
1393static void sd_return_failed_command(struct sd_lun *un, struct buf *bp,
1394	int errcode);
1395static void sd_return_failed_command_no_restart(struct sd_lun *un,
1396	struct buf *bp, int errcode);
1397static void sd_return_command(struct sd_lun *un, struct buf *bp);
1398static void sd_sync_with_callback(struct sd_lun *un);
1399static int sdrunout(caddr_t arg);
1400
1401static void sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp);
1402static struct buf *sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *xp);
1403
1404static void sd_reduce_throttle(struct sd_lun *un, int throttle_type);
1405static void sd_restore_throttle(void *arg);
1406
1407static void sd_init_cdb_limits(struct sd_lun *un);
1408
1409static void sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
1410	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1411
1412/*
1413 * Error handling functions
1414 */
1415static void sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
1416	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1417static void sd_pkt_status_busy(struct sd_lun *un, struct buf *bp,
1418	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1419static void sd_pkt_status_reservation_conflict(struct sd_lun *un,
1420	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1421static void sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
1422	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1423
1424static void sd_handle_request_sense(struct sd_lun *un, struct buf *bp,
1425	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1426static void sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
1427	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1428static int sd_validate_sense_data(struct sd_lun *un, struct buf *bp,
1429	struct sd_xbuf *xp);
1430static void sd_decode_sense(struct sd_lun *un, struct buf *bp,
1431	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1432
1433static void sd_print_sense_msg(struct sd_lun *un, struct buf *bp,
1434	void *arg, int code);
1435
1436static void sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
1437	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1438static void sd_sense_key_recoverable_error(struct sd_lun *un,
1439	uint8_t *sense_datap,
1440	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1441static void sd_sense_key_not_ready(struct sd_lun *un,
1442	uint8_t *sense_datap,
1443	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1444static void sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
1445	uint8_t *sense_datap,
1446	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1447static void sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
1448	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1449static void sd_sense_key_unit_attention(struct sd_lun *un,
1450	uint8_t *sense_datap,
1451	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1452static void sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
1453	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1454static void sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
1455	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1456static void sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
1457	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1458static void sd_sense_key_default(struct sd_lun *un,
1459	uint8_t *sense_datap,
1460	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1461
1462static void sd_print_retry_msg(struct sd_lun *un, struct buf *bp,
1463	void *arg, int flag);
1464
1465static void sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
1466	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1467static void sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
1468	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1469static void sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
1470	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1471static void sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
1472	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1473static void sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
1474	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1475static void sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
1476	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1477static void sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
1478	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1479static void sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
1480	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1481
1482static void sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp);
1483
1484static void sd_start_stop_unit_callback(void *arg);
1485static void sd_start_stop_unit_task(void *arg);
1486
1487static void sd_taskq_create(void);
1488static void sd_taskq_delete(void);
1489static void sd_media_change_task(void *arg);
1490
1491static int sd_handle_mchange(struct sd_lun *un);
1492static int sd_send_scsi_DOORLOCK(struct sd_lun *un, int flag, int path_flag);
1493static int sd_send_scsi_READ_CAPACITY(struct sd_lun *un, uint64_t *capp,
1494	uint32_t *lbap, int path_flag);
1495static int sd_send_scsi_READ_CAPACITY_16(struct sd_lun *un, uint64_t *capp,
1496	uint32_t *lbap, int path_flag);
1497static int sd_send_scsi_START_STOP_UNIT(struct sd_lun *un, int flag,
1498	int path_flag);
1499static int sd_send_scsi_INQUIRY(struct sd_lun *un, uchar_t *bufaddr,
1500	size_t buflen, uchar_t evpd, uchar_t page_code, size_t *residp);
1501static int sd_send_scsi_TEST_UNIT_READY(struct sd_lun *un, int flag);
1502static int sd_send_scsi_PERSISTENT_RESERVE_IN(struct sd_lun *un,
1503	uchar_t usr_cmd, uint16_t data_len, uchar_t *data_bufp);
1504static int sd_send_scsi_PERSISTENT_RESERVE_OUT(struct sd_lun *un,
1505	uchar_t usr_cmd, uchar_t *usr_bufp);
1506static int sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un,
1507	struct dk_callback *dkc);
1508static int sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp);
1509static int sd_send_scsi_GET_CONFIGURATION(struct sd_lun *un,
1510	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1511	uchar_t *bufaddr, uint_t buflen);
1512static int sd_send_scsi_feature_GET_CONFIGURATION(struct sd_lun *un,
1513	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1514	uchar_t *bufaddr, uint_t buflen, char feature);
1515static int sd_send_scsi_MODE_SENSE(struct sd_lun *un, int cdbsize,
1516	uchar_t *bufaddr, size_t buflen, uchar_t page_code, int path_flag);
1517static int sd_send_scsi_MODE_SELECT(struct sd_lun *un, int cdbsize,
1518	uchar_t *bufaddr, size_t buflen, uchar_t save_page, int path_flag);
1519static int sd_send_scsi_RDWR(struct sd_lun *un, uchar_t cmd, void *bufaddr,
1520	size_t buflen, daddr_t start_block, int path_flag);
1521#define	sd_send_scsi_READ(un, bufaddr, buflen, start_block, path_flag)	\
1522	sd_send_scsi_RDWR(un, SCMD_READ, bufaddr, buflen, start_block, \
1523	path_flag)
1524#define	sd_send_scsi_WRITE(un, bufaddr, buflen, start_block, path_flag)	\
1525	sd_send_scsi_RDWR(un, SCMD_WRITE, bufaddr, buflen, start_block,\
1526	path_flag)
1527
1528static int sd_send_scsi_LOG_SENSE(struct sd_lun *un, uchar_t *bufaddr,
1529	uint16_t buflen, uchar_t page_code, uchar_t page_control,
1530	uint16_t param_ptr, int path_flag);
1531
1532static int  sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un);
1533static void sd_free_rqs(struct sd_lun *un);
1534
1535static void sd_dump_memory(struct sd_lun *un, uint_t comp, char *title,
1536	uchar_t *data, int len, int fmt);
1537static void sd_panic_for_res_conflict(struct sd_lun *un);
1538
1539/*
1540 * Disk Ioctl Function Prototypes
1541 */
1542static int sd_uscsi_ioctl(dev_t dev, caddr_t arg, int flag);
1543static int sd_get_media_info(dev_t dev, caddr_t arg, int flag);
1544static int sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag);
1545static int sd_dkio_get_geometry(dev_t dev, caddr_t arg, int flag,
1546	int geom_validated);
1547static int sd_dkio_set_geometry(dev_t dev, caddr_t arg, int flag);
1548static int sd_dkio_get_partition(dev_t dev, caddr_t arg, int flag,
1549	int geom_validated);
1550static int sd_dkio_set_partition(dev_t dev, caddr_t arg, int flag);
1551static int sd_dkio_get_vtoc(dev_t dev, caddr_t arg, int flag,
1552	int geom_validated);
1553static int sd_dkio_get_efi(dev_t dev, caddr_t arg, int flag);
1554static int sd_dkio_partition(dev_t dev, caddr_t arg, int flag);
1555static void sd_build_user_vtoc(struct sd_lun *un, struct vtoc *user_vtoc);
1556static int sd_dkio_set_vtoc(dev_t dev, caddr_t arg, int flag);
1557static int sd_dkio_set_efi(dev_t dev, caddr_t arg, int flag);
1558static int sd_build_label_vtoc(struct sd_lun *un, struct vtoc *user_vtoc);
1559static int sd_write_label(dev_t dev);
1560static int sd_set_vtoc(struct sd_lun *un, struct dk_label *dkl);
1561static void sd_clear_vtoc(struct sd_lun *un);
1562static void sd_clear_efi(struct sd_lun *un);
1563static int sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag);
1564static int sd_dkio_get_mboot(dev_t dev, caddr_t arg, int flag);
1565static int sd_dkio_set_mboot(dev_t dev, caddr_t arg, int flag);
1566static void sd_setup_default_geometry(struct sd_lun *un);
1567#if defined(__i386) || defined(__amd64)
1568static int sd_update_fdisk_and_vtoc(struct sd_lun *un);
1569#endif
1570
1571/*
1572 * Multi-host Ioctl Prototypes
1573 */
1574static int sd_check_mhd(dev_t dev, int interval);
1575static int sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1576static void sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt);
1577static char *sd_sname(uchar_t status);
1578static void sd_mhd_resvd_recover(void *arg);
1579static void sd_resv_reclaim_thread();
1580static int sd_take_ownership(dev_t dev, struct mhioctkown *p);
1581static int sd_reserve_release(dev_t dev, int cmd);
1582static void sd_rmv_resv_reclaim_req(dev_t dev);
1583static void sd_mhd_reset_notify_cb(caddr_t arg);
1584static int sd_persistent_reservation_in_read_keys(struct sd_lun *un,
1585	mhioc_inkeys_t *usrp, int flag);
1586static int sd_persistent_reservation_in_read_resv(struct sd_lun *un,
1587	mhioc_inresvs_t *usrp, int flag);
1588static int sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag);
1589static int sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag);
1590static int sd_mhdioc_release(dev_t dev);
1591static int sd_mhdioc_register_devid(dev_t dev);
1592static int sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag);
1593static int sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag);
1594
1595/*
1596 * SCSI removable prototypes
1597 */
1598static int sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag);
1599static int sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1600static int sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1601static int sr_pause_resume(dev_t dev, int mode);
1602static int sr_play_msf(dev_t dev, caddr_t data, int flag);
1603static int sr_play_trkind(dev_t dev, caddr_t data, int flag);
1604static int sr_read_all_subcodes(dev_t dev, caddr_t data, int flag);
1605static int sr_read_subchannel(dev_t dev, caddr_t data, int flag);
1606static int sr_read_tocentry(dev_t dev, caddr_t data, int flag);
1607static int sr_read_tochdr(dev_t dev, caddr_t data, int flag);
1608static int sr_read_cdda(dev_t dev, caddr_t data, int flag);
1609static int sr_read_cdxa(dev_t dev, caddr_t data, int flag);
1610static int sr_read_mode1(dev_t dev, caddr_t data, int flag);
1611static int sr_read_mode2(dev_t dev, caddr_t data, int flag);
1612static int sr_read_cd_mode2(dev_t dev, caddr_t data, int flag);
1613static int sr_sector_mode(dev_t dev, uint32_t blksize);
1614static int sr_eject(dev_t dev);
1615static void sr_ejected(register struct sd_lun *un);
1616static int sr_check_wp(dev_t dev);
1617static int sd_check_media(dev_t dev, enum dkio_state state);
1618static int sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1619static void sd_delayed_cv_broadcast(void *arg);
1620static int sr_volume_ctrl(dev_t dev, caddr_t data, int flag);
1621static int sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag);
1622
1623static int sd_log_page_supported(struct sd_lun *un, int log_page);
1624
1625/*
1626 * Function Prototype for the non-512 support (DVDRAM, MO etc.) functions.
1627 */
1628static void sd_check_for_writable_cd(struct sd_lun *un);
1629static int sd_wm_cache_constructor(void *wm, void *un, int flags);
1630static void sd_wm_cache_destructor(void *wm, void *un);
1631static struct sd_w_map *sd_range_lock(struct sd_lun *un, daddr_t startb,
1632	daddr_t endb, ushort_t typ);
1633static struct sd_w_map *sd_get_range(struct sd_lun *un, daddr_t startb,
1634	daddr_t endb);
1635static void sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp);
1636static void sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm);
1637static void sd_read_modify_write_task(void * arg);
1638static int
1639sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
1640	struct buf **bpp);
1641
1642
1643/*
1644 * Function prototypes for failfast support.
1645 */
1646static void sd_failfast_flushq(struct sd_lun *un);
1647static int sd_failfast_flushq_callback(struct buf *bp);
1648
1649/*
1650 * Function prototypes to check for lsi devices
1651 */
1652static void sd_is_lsi(struct sd_lun *un);
1653
1654/*
1655 * Function prototypes for x86 support
1656 */
1657#if defined(__i386) || defined(__amd64)
1658static int sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
1659		struct scsi_pkt *pkt, struct sd_xbuf *xp);
1660#endif
1661
1662/*
1663 * Constants for failfast support:
1664 *
1665 * SD_FAILFAST_INACTIVE: Instance is currently in a normal state, with NO
1666 * failfast processing being performed.
1667 *
1668 * SD_FAILFAST_ACTIVE: Instance is in the failfast state and is performing
1669 * failfast processing on all bufs with B_FAILFAST set.
1670 */
1671
1672#define	SD_FAILFAST_INACTIVE		0
1673#define	SD_FAILFAST_ACTIVE		1
1674
1675/*
1676 * Bitmask to control behavior of buf(9S) flushes when a transition to
1677 * the failfast state occurs. Optional bits include:
1678 *
1679 * SD_FAILFAST_FLUSH_ALL_BUFS: When set, flush ALL bufs including those that
1680 * do NOT have B_FAILFAST set. When clear, only bufs with B_FAILFAST will
1681 * be flushed.
1682 *
1683 * SD_FAILFAST_FLUSH_ALL_QUEUES: When set, flush any/all other queues in the
1684 * driver, in addition to the regular wait queue. This includes the xbuf
1685 * queues. When clear, only the driver's wait queue will be flushed.
1686 */
1687#define	SD_FAILFAST_FLUSH_ALL_BUFS	0x01
1688#define	SD_FAILFAST_FLUSH_ALL_QUEUES	0x02
1689
1690/*
1691 * The default behavior is to only flush bufs that have B_FAILFAST set, but
1692 * to flush all queues within the driver.
1693 */
1694static int sd_failfast_flushctl = SD_FAILFAST_FLUSH_ALL_QUEUES;
1695
1696
1697/*
1698 * SD Testing Fault Injection
1699 */
1700#ifdef SD_FAULT_INJECTION
1701static void sd_faultinjection_ioctl(int cmd, intptr_t arg, struct sd_lun *un);
1702static void sd_faultinjection(struct scsi_pkt *pktp);
1703static void sd_injection_log(char *buf, struct sd_lun *un);
1704#endif
1705
1706/*
1707 * Device driver ops vector
1708 */
1709static struct cb_ops sd_cb_ops = {
1710	sdopen,			/* open */
1711	sdclose,		/* close */
1712	sdstrategy,		/* strategy */
1713	nodev,			/* print */
1714	sddump,			/* dump */
1715	sdread,			/* read */
1716	sdwrite,		/* write */
1717	sdioctl,		/* ioctl */
1718	nodev,			/* devmap */
1719	nodev,			/* mmap */
1720	nodev,			/* segmap */
1721	nochpoll,		/* poll */
1722	sd_prop_op,		/* cb_prop_op */
1723	0,			/* streamtab  */
1724	D_64BIT | D_MP | D_NEW | D_HOTPLUG, /* Driver compatibility flags */
1725	CB_REV,			/* cb_rev */
1726	sdaread, 		/* async I/O read entry point */
1727	sdawrite		/* async I/O write entry point */
1728};
1729
1730static struct dev_ops sd_ops = {
1731	DEVO_REV,		/* devo_rev, */
1732	0,			/* refcnt  */
1733	sdinfo,			/* info */
1734	nulldev,		/* identify */
1735	sdprobe,		/* probe */
1736	sdattach,		/* attach */
1737	sddetach,		/* detach */
1738	nodev,			/* reset */
1739	&sd_cb_ops,		/* driver operations */
1740	NULL,			/* bus operations */
1741	sdpower			/* power */
1742};
1743
1744
1745/*
1746 * This is the loadable module wrapper.
1747 */
1748#include <sys/modctl.h>
1749
1750static struct modldrv modldrv = {
1751	&mod_driverops,		/* Type of module. This one is a driver */
1752	SD_MODULE_NAME,		/* Module name. */
1753	&sd_ops			/* driver ops */
1754};
1755
1756
1757static struct modlinkage modlinkage = {
1758	MODREV_1,
1759	&modldrv,
1760	NULL
1761};
1762
1763
1764static struct scsi_asq_key_strings sd_additional_codes[] = {
1765	0x81, 0, "Logical Unit is Reserved",
1766	0x85, 0, "Audio Address Not Valid",
1767	0xb6, 0, "Media Load Mechanism Failed",
1768	0xB9, 0, "Audio Play Operation Aborted",
1769	0xbf, 0, "Buffer Overflow for Read All Subcodes Command",
1770	0x53, 2, "Medium removal prevented",
1771	0x6f, 0, "Authentication failed during key exchange",
1772	0x6f, 1, "Key not present",
1773	0x6f, 2, "Key not established",
1774	0x6f, 3, "Read without proper authentication",
1775	0x6f, 4, "Mismatched region to this logical unit",
1776	0x6f, 5, "Region reset count error",
1777	0xffff, 0x0, NULL
1778};
1779
1780
1781/*
1782 * Struct for passing printing information for sense data messages
1783 */
1784struct sd_sense_info {
1785	int	ssi_severity;
1786	int	ssi_pfa_flag;
1787};
1788
1789/*
1790 * Table of function pointers for iostart-side routines. Seperate "chains"
1791 * of layered function calls are formed by placing the function pointers
1792 * sequentially in the desired order. Functions are called according to an
1793 * incrementing table index ordering. The last function in each chain must
1794 * be sd_core_iostart(). The corresponding iodone-side routines are expected
1795 * in the sd_iodone_chain[] array.
1796 *
1797 * Note: It may seem more natural to organize both the iostart and iodone
1798 * functions together, into an array of structures (or some similar
1799 * organization) with a common index, rather than two seperate arrays which
1800 * must be maintained in synchronization. The purpose of this division is
1801 * to achiece improved performance: individual arrays allows for more
1802 * effective cache line utilization on certain platforms.
1803 */
1804
1805typedef void (*sd_chain_t)(int index, struct sd_lun *un, struct buf *bp);
1806
1807
1808static sd_chain_t sd_iostart_chain[] = {
1809
1810	/* Chain for buf IO for disk drive targets (PM enabled) */
1811	sd_mapblockaddr_iostart,	/* Index: 0 */
1812	sd_pm_iostart,			/* Index: 1 */
1813	sd_core_iostart,		/* Index: 2 */
1814
1815	/* Chain for buf IO for disk drive targets (PM disabled) */
1816	sd_mapblockaddr_iostart,	/* Index: 3 */
1817	sd_core_iostart,		/* Index: 4 */
1818
1819	/* Chain for buf IO for removable-media targets (PM enabled) */
1820	sd_mapblockaddr_iostart,	/* Index: 5 */
1821	sd_mapblocksize_iostart,	/* Index: 6 */
1822	sd_pm_iostart,			/* Index: 7 */
1823	sd_core_iostart,		/* Index: 8 */
1824
1825	/* Chain for buf IO for removable-media targets (PM disabled) */
1826	sd_mapblockaddr_iostart,	/* Index: 9 */
1827	sd_mapblocksize_iostart,	/* Index: 10 */
1828	sd_core_iostart,		/* Index: 11 */
1829
1830	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1831	sd_mapblockaddr_iostart,	/* Index: 12 */
1832	sd_checksum_iostart,		/* Index: 13 */
1833	sd_pm_iostart,			/* Index: 14 */
1834	sd_core_iostart,		/* Index: 15 */
1835
1836	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1837	sd_mapblockaddr_iostart,	/* Index: 16 */
1838	sd_checksum_iostart,		/* Index: 17 */
1839	sd_core_iostart,		/* Index: 18 */
1840
1841	/* Chain for USCSI commands (all targets) */
1842	sd_pm_iostart,			/* Index: 19 */
1843	sd_core_iostart,		/* Index: 20 */
1844
1845	/* Chain for checksumming USCSI commands (all targets) */
1846	sd_checksum_uscsi_iostart,	/* Index: 21 */
1847	sd_pm_iostart,			/* Index: 22 */
1848	sd_core_iostart,		/* Index: 23 */
1849
1850	/* Chain for "direct" USCSI commands (all targets) */
1851	sd_core_iostart,		/* Index: 24 */
1852
1853	/* Chain for "direct priority" USCSI commands (all targets) */
1854	sd_core_iostart,		/* Index: 25 */
1855};
1856
1857/*
1858 * Macros to locate the first function of each iostart chain in the
1859 * sd_iostart_chain[] array. These are located by the index in the array.
1860 */
1861#define	SD_CHAIN_DISK_IOSTART			0
1862#define	SD_CHAIN_DISK_IOSTART_NO_PM		3
1863#define	SD_CHAIN_RMMEDIA_IOSTART		5
1864#define	SD_CHAIN_RMMEDIA_IOSTART_NO_PM		9
1865#define	SD_CHAIN_CHKSUM_IOSTART			12
1866#define	SD_CHAIN_CHKSUM_IOSTART_NO_PM		16
1867#define	SD_CHAIN_USCSI_CMD_IOSTART		19
1868#define	SD_CHAIN_USCSI_CHKSUM_IOSTART		21
1869#define	SD_CHAIN_DIRECT_CMD_IOSTART		24
1870#define	SD_CHAIN_PRIORITY_CMD_IOSTART		25
1871
1872
1873/*
1874 * Table of function pointers for the iodone-side routines for the driver-
1875 * internal layering mechanism.  The calling sequence for iodone routines
1876 * uses a decrementing table index, so the last routine called in a chain
1877 * must be at the lowest array index location for that chain.  The last
1878 * routine for each chain must be either sd_buf_iodone() (for buf(9S) IOs)
1879 * or sd_uscsi_iodone() (for uscsi IOs).  Other than this, the ordering
1880 * of the functions in an iodone side chain must correspond to the ordering
1881 * of the iostart routines for that chain.  Note that there is no iodone
1882 * side routine that corresponds to sd_core_iostart(), so there is no
1883 * entry in the table for this.
1884 */
1885
1886static sd_chain_t sd_iodone_chain[] = {
1887
1888	/* Chain for buf IO for disk drive targets (PM enabled) */
1889	sd_buf_iodone,			/* Index: 0 */
1890	sd_mapblockaddr_iodone,		/* Index: 1 */
1891	sd_pm_iodone,			/* Index: 2 */
1892
1893	/* Chain for buf IO for disk drive targets (PM disabled) */
1894	sd_buf_iodone,			/* Index: 3 */
1895	sd_mapblockaddr_iodone,		/* Index: 4 */
1896
1897	/* Chain for buf IO for removable-media targets (PM enabled) */
1898	sd_buf_iodone,			/* Index: 5 */
1899	sd_mapblockaddr_iodone,		/* Index: 6 */
1900	sd_mapblocksize_iodone,		/* Index: 7 */
1901	sd_pm_iodone,			/* Index: 8 */
1902
1903	/* Chain for buf IO for removable-media targets (PM disabled) */
1904	sd_buf_iodone,			/* Index: 9 */
1905	sd_mapblockaddr_iodone,		/* Index: 10 */
1906	sd_mapblocksize_iodone,		/* Index: 11 */
1907
1908	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1909	sd_buf_iodone,			/* Index: 12 */
1910	sd_mapblockaddr_iodone,		/* Index: 13 */
1911	sd_checksum_iodone,		/* Index: 14 */
1912	sd_pm_iodone,			/* Index: 15 */
1913
1914	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1915	sd_buf_iodone,			/* Index: 16 */
1916	sd_mapblockaddr_iodone,		/* Index: 17 */
1917	sd_checksum_iodone,		/* Index: 18 */
1918
1919	/* Chain for USCSI commands (non-checksum targets) */
1920	sd_uscsi_iodone,		/* Index: 19 */
1921	sd_pm_iodone,			/* Index: 20 */
1922
1923	/* Chain for USCSI commands (checksum targets) */
1924	sd_uscsi_iodone,		/* Index: 21 */
1925	sd_checksum_uscsi_iodone,	/* Index: 22 */
1926	sd_pm_iodone,			/* Index: 22 */
1927
1928	/* Chain for "direct" USCSI commands (all targets) */
1929	sd_uscsi_iodone,		/* Index: 24 */
1930
1931	/* Chain for "direct priority" USCSI commands (all targets) */
1932	sd_uscsi_iodone,		/* Index: 25 */
1933};
1934
1935
1936/*
1937 * Macros to locate the "first" function in the sd_iodone_chain[] array for
1938 * each iodone-side chain. These are located by the array index, but as the
1939 * iodone side functions are called in a decrementing-index order, the
1940 * highest index number in each chain must be specified (as these correspond
1941 * to the first function in the iodone chain that will be called by the core
1942 * at IO completion time).
1943 */
1944
1945#define	SD_CHAIN_DISK_IODONE			2
1946#define	SD_CHAIN_DISK_IODONE_NO_PM		4
1947#define	SD_CHAIN_RMMEDIA_IODONE			8
1948#define	SD_CHAIN_RMMEDIA_IODONE_NO_PM		11
1949#define	SD_CHAIN_CHKSUM_IODONE			15
1950#define	SD_CHAIN_CHKSUM_IODONE_NO_PM		18
1951#define	SD_CHAIN_USCSI_CMD_IODONE		20
1952#define	SD_CHAIN_USCSI_CHKSUM_IODONE		22
1953#define	SD_CHAIN_DIRECT_CMD_IODONE		24
1954#define	SD_CHAIN_PRIORITY_CMD_IODONE		25
1955
1956
1957
1958
1959/*
1960 * Array to map a layering chain index to the appropriate initpkt routine.
1961 * The redundant entries are present so that the index used for accessing
1962 * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1963 * with this table as well.
1964 */
1965typedef int (*sd_initpkt_t)(struct buf *, struct scsi_pkt **);
1966
1967static sd_initpkt_t	sd_initpkt_map[] = {
1968
1969	/* Chain for buf IO for disk drive targets (PM enabled) */
1970	sd_initpkt_for_buf,		/* Index: 0 */
1971	sd_initpkt_for_buf,		/* Index: 1 */
1972	sd_initpkt_for_buf,		/* Index: 2 */
1973
1974	/* Chain for buf IO for disk drive targets (PM disabled) */
1975	sd_initpkt_for_buf,		/* Index: 3 */
1976	sd_initpkt_for_buf,		/* Index: 4 */
1977
1978	/* Chain for buf IO for removable-media targets (PM enabled) */
1979	sd_initpkt_for_buf,		/* Index: 5 */
1980	sd_initpkt_for_buf,		/* Index: 6 */
1981	sd_initpkt_for_buf,		/* Index: 7 */
1982	sd_initpkt_for_buf,		/* Index: 8 */
1983
1984	/* Chain for buf IO for removable-media targets (PM disabled) */
1985	sd_initpkt_for_buf,		/* Index: 9 */
1986	sd_initpkt_for_buf,		/* Index: 10 */
1987	sd_initpkt_for_buf,		/* Index: 11 */
1988
1989	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1990	sd_initpkt_for_buf,		/* Index: 12 */
1991	sd_initpkt_for_buf,		/* Index: 13 */
1992	sd_initpkt_for_buf,		/* Index: 14 */
1993	sd_initpkt_for_buf,		/* Index: 15 */
1994
1995	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1996	sd_initpkt_for_buf,		/* Index: 16 */
1997	sd_initpkt_for_buf,		/* Index: 17 */
1998	sd_initpkt_for_buf,		/* Index: 18 */
1999
2000	/* Chain for USCSI commands (non-checksum targets) */
2001	sd_initpkt_for_uscsi,		/* Index: 19 */
2002	sd_initpkt_for_uscsi,		/* Index: 20 */
2003
2004	/* Chain for USCSI commands (checksum targets) */
2005	sd_initpkt_for_uscsi,		/* Index: 21 */
2006	sd_initpkt_for_uscsi,		/* Index: 22 */
2007	sd_initpkt_for_uscsi,		/* Index: 22 */
2008
2009	/* Chain for "direct" USCSI commands (all targets) */
2010	sd_initpkt_for_uscsi,		/* Index: 24 */
2011
2012	/* Chain for "direct priority" USCSI commands (all targets) */
2013	sd_initpkt_for_uscsi,		/* Index: 25 */
2014
2015};
2016
2017
2018/*
2019 * Array to map a layering chain index to the appropriate destroypktpkt routine.
2020 * The redundant entries are present so that the index used for accessing
2021 * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
2022 * with this table as well.
2023 */
2024typedef void (*sd_destroypkt_t)(struct buf *);
2025
2026static sd_destroypkt_t	sd_destroypkt_map[] = {
2027
2028	/* Chain for buf IO for disk drive targets (PM enabled) */
2029	sd_destroypkt_for_buf,		/* Index: 0 */
2030	sd_destroypkt_for_buf,		/* Index: 1 */
2031	sd_destroypkt_for_buf,		/* Index: 2 */
2032
2033	/* Chain for buf IO for disk drive targets (PM disabled) */
2034	sd_destroypkt_for_buf,		/* Index: 3 */
2035	sd_destroypkt_for_buf,		/* Index: 4 */
2036
2037	/* Chain for buf IO for removable-media targets (PM enabled) */
2038	sd_destroypkt_for_buf,		/* Index: 5 */
2039	sd_destroypkt_for_buf,		/* Index: 6 */
2040	sd_destroypkt_for_buf,		/* Index: 7 */
2041	sd_destroypkt_for_buf,		/* Index: 8 */
2042
2043	/* Chain for buf IO for removable-media targets (PM disabled) */
2044	sd_destroypkt_for_buf,		/* Index: 9 */
2045	sd_destroypkt_for_buf,		/* Index: 10 */
2046	sd_destroypkt_for_buf,		/* Index: 11 */
2047
2048	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
2049	sd_destroypkt_for_buf,		/* Index: 12 */
2050	sd_destroypkt_for_buf,		/* Index: 13 */
2051	sd_destroypkt_for_buf,		/* Index: 14 */
2052	sd_destroypkt_for_buf,		/* Index: 15 */
2053
2054	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
2055	sd_destroypkt_for_buf,		/* Index: 16 */
2056	sd_destroypkt_for_buf,		/* Index: 17 */
2057	sd_destroypkt_for_buf,		/* Index: 18 */
2058
2059	/* Chain for USCSI commands (non-checksum targets) */
2060	sd_destroypkt_for_uscsi,	/* Index: 19 */
2061	sd_destroypkt_for_uscsi,	/* Index: 20 */
2062
2063	/* Chain for USCSI commands (checksum targets) */
2064	sd_destroypkt_for_uscsi,	/* Index: 21 */
2065	sd_destroypkt_for_uscsi,	/* Index: 22 */
2066	sd_destroypkt_for_uscsi,	/* Index: 22 */
2067
2068	/* Chain for "direct" USCSI commands (all targets) */
2069	sd_destroypkt_for_uscsi,	/* Index: 24 */
2070
2071	/* Chain for "direct priority" USCSI commands (all targets) */
2072	sd_destroypkt_for_uscsi,	/* Index: 25 */
2073
2074};
2075
2076
2077
2078/*
2079 * Array to map a layering chain index to the appropriate chain "type".
2080 * The chain type indicates a specific property/usage of the chain.
2081 * The redundant entries are present so that the index used for accessing
2082 * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
2083 * with this table as well.
2084 */
2085
2086#define	SD_CHAIN_NULL			0	/* for the special RQS cmd */
2087#define	SD_CHAIN_BUFIO			1	/* regular buf IO */
2088#define	SD_CHAIN_USCSI			2	/* regular USCSI commands */
2089#define	SD_CHAIN_DIRECT			3	/* uscsi, w/ bypass power mgt */
2090#define	SD_CHAIN_DIRECT_PRIORITY	4	/* uscsi, w/ bypass power mgt */
2091						/* (for error recovery) */
2092
2093static int sd_chain_type_map[] = {
2094
2095	/* Chain for buf IO for disk drive targets (PM enabled) */
2096	SD_CHAIN_BUFIO,			/* Index: 0 */
2097	SD_CHAIN_BUFIO,			/* Index: 1 */
2098	SD_CHAIN_BUFIO,			/* Index: 2 */
2099
2100	/* Chain for buf IO for disk drive targets (PM disabled) */
2101	SD_CHAIN_BUFIO,			/* Index: 3 */
2102	SD_CHAIN_BUFIO,			/* Index: 4 */
2103
2104	/* Chain for buf IO for removable-media targets (PM enabled) */
2105	SD_CHAIN_BUFIO,			/* Index: 5 */
2106	SD_CHAIN_BUFIO,			/* Index: 6 */
2107	SD_CHAIN_BUFIO,			/* Index: 7 */
2108	SD_CHAIN_BUFIO,			/* Index: 8 */
2109
2110	/* Chain for buf IO for removable-media targets (PM disabled) */
2111	SD_CHAIN_BUFIO,			/* Index: 9 */
2112	SD_CHAIN_BUFIO,			/* Index: 10 */
2113	SD_CHAIN_BUFIO,			/* Index: 11 */
2114
2115	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
2116	SD_CHAIN_BUFIO,			/* Index: 12 */
2117	SD_CHAIN_BUFIO,			/* Index: 13 */
2118	SD_CHAIN_BUFIO,			/* Index: 14 */
2119	SD_CHAIN_BUFIO,			/* Index: 15 */
2120
2121	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
2122	SD_CHAIN_BUFIO,			/* Index: 16 */
2123	SD_CHAIN_BUFIO,			/* Index: 17 */
2124	SD_CHAIN_BUFIO,			/* Index: 18 */
2125
2126	/* Chain for USCSI commands (non-checksum targets) */
2127	SD_CHAIN_USCSI,			/* Index: 19 */
2128	SD_CHAIN_USCSI,			/* Index: 20 */
2129
2130	/* Chain for USCSI commands (checksum targets) */
2131	SD_CHAIN_USCSI,			/* Index: 21 */
2132	SD_CHAIN_USCSI,			/* Index: 22 */
2133	SD_CHAIN_USCSI,			/* Index: 22 */
2134
2135	/* Chain for "direct" USCSI commands (all targets) */
2136	SD_CHAIN_DIRECT,		/* Index: 24 */
2137
2138	/* Chain for "direct priority" USCSI commands (all targets) */
2139	SD_CHAIN_DIRECT_PRIORITY,	/* Index: 25 */
2140};
2141
2142
2143/* Macro to return TRUE if the IO has come from the sd_buf_iostart() chain. */
2144#define	SD_IS_BUFIO(xp)			\
2145	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_BUFIO)
2146
2147/* Macro to return TRUE if the IO has come from the "direct priority" chain. */
2148#define	SD_IS_DIRECT_PRIORITY(xp)	\
2149	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_DIRECT_PRIORITY)
2150
2151
2152
2153/*
2154 * Struct, array, and macros to map a specific chain to the appropriate
2155 * layering indexes in the sd_iostart_chain[] and sd_iodone_chain[] arrays.
2156 *
2157 * The sd_chain_index_map[] array is used at attach time to set the various
2158 * un_xxx_chain type members of the sd_lun softstate to the specific layering
2159 * chain to be used with the instance. This allows different instances to use
2160 * different chain for buf IO, uscsi IO, etc.. Also, since the xb_chain_iostart
2161 * and xb_chain_iodone index values in the sd_xbuf are initialized to these
2162 * values at sd_xbuf init time, this allows (1) layering chains may be changed
2163 * dynamically & without the use of locking; and (2) a layer may update the
2164 * xb_chain_io[start|done] member in a given xbuf with its current index value,
2165 * to allow for deferred processing of an IO within the same chain from a
2166 * different execution context.
2167 */
2168
2169struct sd_chain_index {
2170	int	sci_iostart_index;
2171	int	sci_iodone_index;
2172};
2173
2174static struct sd_chain_index	sd_chain_index_map[] = {
2175	{ SD_CHAIN_DISK_IOSTART,		SD_CHAIN_DISK_IODONE },
2176	{ SD_CHAIN_DISK_IOSTART_NO_PM,		SD_CHAIN_DISK_IODONE_NO_PM },
2177	{ SD_CHAIN_RMMEDIA_IOSTART,		SD_CHAIN_RMMEDIA_IODONE },
2178	{ SD_CHAIN_RMMEDIA_IOSTART_NO_PM,	SD_CHAIN_RMMEDIA_IODONE_NO_PM },
2179	{ SD_CHAIN_CHKSUM_IOSTART,		SD_CHAIN_CHKSUM_IODONE },
2180	{ SD_CHAIN_CHKSUM_IOSTART_NO_PM,	SD_CHAIN_CHKSUM_IODONE_NO_PM },
2181	{ SD_CHAIN_USCSI_CMD_IOSTART,		SD_CHAIN_USCSI_CMD_IODONE },
2182	{ SD_CHAIN_USCSI_CHKSUM_IOSTART,	SD_CHAIN_USCSI_CHKSUM_IODONE },
2183	{ SD_CHAIN_DIRECT_CMD_IOSTART,		SD_CHAIN_DIRECT_CMD_IODONE },
2184	{ SD_CHAIN_PRIORITY_CMD_IOSTART,	SD_CHAIN_PRIORITY_CMD_IODONE },
2185};
2186
2187
2188/*
2189 * The following are indexes into the sd_chain_index_map[] array.
2190 */
2191
2192/* un->un_buf_chain_type must be set to one of these */
2193#define	SD_CHAIN_INFO_DISK		0
2194#define	SD_CHAIN_INFO_DISK_NO_PM	1
2195#define	SD_CHAIN_INFO_RMMEDIA		2
2196#define	SD_CHAIN_INFO_RMMEDIA_NO_PM	3
2197#define	SD_CHAIN_INFO_CHKSUM		4
2198#define	SD_CHAIN_INFO_CHKSUM_NO_PM	5
2199
2200/* un->un_uscsi_chain_type must be set to one of these */
2201#define	SD_CHAIN_INFO_USCSI_CMD		6
2202/* USCSI with PM disabled is the same as DIRECT */
2203#define	SD_CHAIN_INFO_USCSI_CMD_NO_PM	8
2204#define	SD_CHAIN_INFO_USCSI_CHKSUM	7
2205
2206/* un->un_direct_chain_type must be set to one of these */
2207#define	SD_CHAIN_INFO_DIRECT_CMD	8
2208
2209/* un->un_priority_chain_type must be set to one of these */
2210#define	SD_CHAIN_INFO_PRIORITY_CMD	9
2211
2212/* size for devid inquiries */
2213#define	MAX_INQUIRY_SIZE		0xF0
2214
2215/*
2216 * Macros used by functions to pass a given buf(9S) struct along to the
2217 * next function in the layering chain for further processing.
2218 *
2219 * In the following macros, passing more than three arguments to the called
2220 * routines causes the optimizer for the SPARC compiler to stop doing tail
2221 * call elimination which results in significant performance degradation.
2222 */
2223#define	SD_BEGIN_IOSTART(index, un, bp)	\
2224	((*(sd_iostart_chain[index]))(index, un, bp))
2225
2226#define	SD_BEGIN_IODONE(index, un, bp)	\
2227	((*(sd_iodone_chain[index]))(index, un, bp))
2228
2229#define	SD_NEXT_IOSTART(index, un, bp)				\
2230	((*(sd_iostart_chain[(index) + 1]))((index) + 1, un, bp))
2231
2232#define	SD_NEXT_IODONE(index, un, bp)				\
2233	((*(sd_iodone_chain[(index) - 1]))((index) - 1, un, bp))
2234
2235/*
2236 *    Function: _init
2237 *
2238 * Description: This is the driver _init(9E) entry point.
2239 *
2240 * Return Code: Returns the value from mod_install(9F) or
2241 *		ddi_soft_state_init(9F) as appropriate.
2242 *
2243 *     Context: Called when driver module loaded.
2244 */
2245
2246int
2247_init(void)
2248{
2249	int	err;
2250
2251	/* establish driver name from module name */
2252	sd_label = mod_modname(&modlinkage);
2253
2254	err = ddi_soft_state_init(&sd_state, sizeof (struct sd_lun),
2255		SD_MAXUNIT);
2256
2257	if (err != 0) {
2258		return (err);
2259	}
2260
2261	mutex_init(&sd_detach_mutex, NULL, MUTEX_DRIVER, NULL);
2262	mutex_init(&sd_log_mutex,    NULL, MUTEX_DRIVER, NULL);
2263	mutex_init(&sd_label_mutex,  NULL, MUTEX_DRIVER, NULL);
2264
2265	mutex_init(&sd_tr.srq_resv_reclaim_mutex, NULL, MUTEX_DRIVER, NULL);
2266	cv_init(&sd_tr.srq_resv_reclaim_cv, NULL, CV_DRIVER, NULL);
2267	cv_init(&sd_tr.srq_inprocess_cv, NULL, CV_DRIVER, NULL);
2268
2269	/*
2270	 * it's ok to init here even for fibre device
2271	 */
2272	sd_scsi_probe_cache_init();
2273
2274	sd_scsi_target_lun_init();
2275
2276	/*
2277	 * Creating taskq before mod_install ensures that all callers (threads)
2278	 * that enter the module after a successfull mod_install encounter
2279	 * a valid taskq.
2280	 */
2281	sd_taskq_create();
2282
2283	err = mod_install(&modlinkage);
2284	if (err != 0) {
2285		/* delete taskq if install fails */
2286		sd_taskq_delete();
2287
2288		mutex_destroy(&sd_detach_mutex);
2289		mutex_destroy(&sd_log_mutex);
2290		mutex_destroy(&sd_label_mutex);
2291
2292		mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2293		cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2294		cv_destroy(&sd_tr.srq_inprocess_cv);
2295
2296		sd_scsi_probe_cache_fini();
2297
2298		sd_scsi_target_lun_fini();
2299
2300		ddi_soft_state_fini(&sd_state);
2301		return (err);
2302	}
2303
2304	return (err);
2305}
2306
2307
2308/*
2309 *    Function: _fini
2310 *
2311 * Description: This is the driver _fini(9E) entry point.
2312 *
2313 * Return Code: Returns the value from mod_remove(9F)
2314 *
2315 *     Context: Called when driver module is unloaded.
2316 */
2317
2318int
2319_fini(void)
2320{
2321	int err;
2322
2323	if ((err = mod_remove(&modlinkage)) != 0) {
2324		return (err);
2325	}
2326
2327	sd_taskq_delete();
2328
2329	mutex_destroy(&sd_detach_mutex);
2330	mutex_destroy(&sd_log_mutex);
2331	mutex_destroy(&sd_label_mutex);
2332	mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2333
2334	sd_scsi_probe_cache_fini();
2335
2336	sd_scsi_target_lun_fini();
2337
2338	cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2339	cv_destroy(&sd_tr.srq_inprocess_cv);
2340
2341	ddi_soft_state_fini(&sd_state);
2342
2343	return (err);
2344}
2345
2346
2347/*
2348 *    Function: _info
2349 *
2350 * Description: This is the driver _info(9E) entry point.
2351 *
2352 *   Arguments: modinfop - pointer to the driver modinfo structure
2353 *
2354 * Return Code: Returns the value from mod_info(9F).
2355 *
2356 *     Context: Kernel thread context
2357 */
2358
2359int
2360_info(struct modinfo *modinfop)
2361{
2362	return (mod_info(&modlinkage, modinfop));
2363}
2364
2365
2366/*
2367 * The following routines implement the driver message logging facility.
2368 * They provide component- and level- based debug output filtering.
2369 * Output may also be restricted to messages for a single instance by
2370 * specifying a soft state pointer in sd_debug_un. If sd_debug_un is set
2371 * to NULL, then messages for all instances are printed.
2372 *
2373 * These routines have been cloned from each other due to the language
2374 * constraints of macros and variable argument list processing.
2375 */
2376
2377
2378/*
2379 *    Function: sd_log_err
2380 *
2381 * Description: This routine is called by the SD_ERROR macro for debug
2382 *		logging of error conditions.
2383 *
2384 *   Arguments: comp - driver component being logged
2385 *		dev  - pointer to driver info structure
2386 *		fmt  - error string and format to be logged
2387 */
2388
2389static void
2390sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...)
2391{
2392	va_list		ap;
2393	dev_info_t	*dev;
2394
2395	ASSERT(un != NULL);
2396	dev = SD_DEVINFO(un);
2397	ASSERT(dev != NULL);
2398
2399	/*
2400	 * Filter messages based on the global component and level masks.
2401	 * Also print if un matches the value of sd_debug_un, or if
2402	 * sd_debug_un is set to NULL.
2403	 */
2404	if ((sd_component_mask & comp) && (sd_level_mask & SD_LOGMASK_ERROR) &&
2405	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2406		mutex_enter(&sd_log_mutex);
2407		va_start(ap, fmt);
2408		(void) vsprintf(sd_log_buf, fmt, ap);
2409		va_end(ap);
2410		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2411		mutex_exit(&sd_log_mutex);
2412	}
2413#ifdef SD_FAULT_INJECTION
2414	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2415	if (un->sd_injection_mask & comp) {
2416		mutex_enter(&sd_log_mutex);
2417		va_start(ap, fmt);
2418		(void) vsprintf(sd_log_buf, fmt, ap);
2419		va_end(ap);
2420		sd_injection_log(sd_log_buf, un);
2421		mutex_exit(&sd_log_mutex);
2422	}
2423#endif
2424}
2425
2426
2427/*
2428 *    Function: sd_log_info
2429 *
2430 * Description: This routine is called by the SD_INFO macro for debug
2431 *		logging of general purpose informational conditions.
2432 *
2433 *   Arguments: comp - driver component being logged
2434 *		dev  - pointer to driver info structure
2435 *		fmt  - info string and format to be logged
2436 */
2437
2438static void
2439sd_log_info(uint_t component, struct sd_lun *un, const char *fmt, ...)
2440{
2441	va_list		ap;
2442	dev_info_t	*dev;
2443
2444	ASSERT(un != NULL);
2445	dev = SD_DEVINFO(un);
2446	ASSERT(dev != NULL);
2447
2448	/*
2449	 * Filter messages based on the global component and level masks.
2450	 * Also print if un matches the value of sd_debug_un, or if
2451	 * sd_debug_un is set to NULL.
2452	 */
2453	if ((sd_component_mask & component) &&
2454	    (sd_level_mask & SD_LOGMASK_INFO) &&
2455	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2456		mutex_enter(&sd_log_mutex);
2457		va_start(ap, fmt);
2458		(void) vsprintf(sd_log_buf, fmt, ap);
2459		va_end(ap);
2460		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2461		mutex_exit(&sd_log_mutex);
2462	}
2463#ifdef SD_FAULT_INJECTION
2464	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2465	if (un->sd_injection_mask & component) {
2466		mutex_enter(&sd_log_mutex);
2467		va_start(ap, fmt);
2468		(void) vsprintf(sd_log_buf, fmt, ap);
2469		va_end(ap);
2470		sd_injection_log(sd_log_buf, un);
2471		mutex_exit(&sd_log_mutex);
2472	}
2473#endif
2474}
2475
2476
2477/*
2478 *    Function: sd_log_trace
2479 *
2480 * Description: This routine is called by the SD_TRACE macro for debug
2481 *		logging of trace conditions (i.e. function entry/exit).
2482 *
2483 *   Arguments: comp - driver component being logged
2484 *		dev  - pointer to driver info structure
2485 *		fmt  - trace string and format to be logged
2486 */
2487
2488static void
2489sd_log_trace(uint_t component, struct sd_lun *un, const char *fmt, ...)
2490{
2491	va_list		ap;
2492	dev_info_t	*dev;
2493
2494	ASSERT(un != NULL);
2495	dev = SD_DEVINFO(un);
2496	ASSERT(dev != NULL);
2497
2498	/*
2499	 * Filter messages based on the global component and level masks.
2500	 * Also print if un matches the value of sd_debug_un, or if
2501	 * sd_debug_un is set to NULL.
2502	 */
2503	if ((sd_component_mask & component) &&
2504	    (sd_level_mask & SD_LOGMASK_TRACE) &&
2505	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2506		mutex_enter(&sd_log_mutex);
2507		va_start(ap, fmt);
2508		(void) vsprintf(sd_log_buf, fmt, ap);
2509		va_end(ap);
2510		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2511		mutex_exit(&sd_log_mutex);
2512	}
2513#ifdef SD_FAULT_INJECTION
2514	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2515	if (un->sd_injection_mask & component) {
2516		mutex_enter(&sd_log_mutex);
2517		va_start(ap, fmt);
2518		(void) vsprintf(sd_log_buf, fmt, ap);
2519		va_end(ap);
2520		sd_injection_log(sd_log_buf, un);
2521		mutex_exit(&sd_log_mutex);
2522	}
2523#endif
2524}
2525
2526
2527/*
2528 *    Function: sdprobe
2529 *
2530 * Description: This is the driver probe(9e) entry point function.
2531 *
2532 *   Arguments: devi - opaque device info handle
2533 *
2534 * Return Code: DDI_PROBE_SUCCESS: If the probe was successful.
2535 *              DDI_PROBE_FAILURE: If the probe failed.
2536 *              DDI_PROBE_PARTIAL: If the instance is not present now,
2537 *				   but may be present in the future.
2538 */
2539
2540static int
2541sdprobe(dev_info_t *devi)
2542{
2543	struct scsi_device	*devp;
2544	int			rval;
2545	int			instance;
2546
2547	/*
2548	 * if it wasn't for pln, sdprobe could actually be nulldev
2549	 * in the "__fibre" case.
2550	 */
2551	if (ddi_dev_is_sid(devi) == DDI_SUCCESS) {
2552		return (DDI_PROBE_DONTCARE);
2553	}
2554
2555	devp = ddi_get_driver_private(devi);
2556
2557	if (devp == NULL) {
2558		/* Ooops... nexus driver is mis-configured... */
2559		return (DDI_PROBE_FAILURE);
2560	}
2561
2562	instance = ddi_get_instance(devi);
2563
2564	if (ddi_get_soft_state(sd_state, instance) != NULL) {
2565		return (DDI_PROBE_PARTIAL);
2566	}
2567
2568	/*
2569	 * Call the SCSA utility probe routine to see if we actually
2570	 * have a target at this SCSI nexus.
2571	 */
2572	switch (sd_scsi_probe_with_cache(devp, NULL_FUNC)) {
2573	case SCSIPROBE_EXISTS:
2574		switch (devp->sd_inq->inq_dtype) {
2575		case DTYPE_DIRECT:
2576			rval = DDI_PROBE_SUCCESS;
2577			break;
2578		case DTYPE_RODIRECT:
2579			/* CDs etc. Can be removable media */
2580			rval = DDI_PROBE_SUCCESS;
2581			break;
2582		case DTYPE_OPTICAL:
2583			/*
2584			 * Rewritable optical driver HP115AA
2585			 * Can also be removable media
2586			 */
2587
2588			/*
2589			 * Do not attempt to bind to  DTYPE_OPTICAL if
2590			 * pre solaris 9 sparc sd behavior is required
2591			 *
2592			 * If first time through and sd_dtype_optical_bind
2593			 * has not been set in /etc/system check properties
2594			 */
2595
2596			if (sd_dtype_optical_bind  < 0) {
2597			    sd_dtype_optical_bind = ddi_prop_get_int
2598				(DDI_DEV_T_ANY,	devi,	0,
2599				"optical-device-bind",	1);
2600			}
2601
2602			if (sd_dtype_optical_bind == 0) {
2603				rval = DDI_PROBE_FAILURE;
2604			} else {
2605				rval = DDI_PROBE_SUCCESS;
2606			}
2607			break;
2608
2609		case DTYPE_NOTPRESENT:
2610		default:
2611			rval = DDI_PROBE_FAILURE;
2612			break;
2613		}
2614		break;
2615	default:
2616		rval = DDI_PROBE_PARTIAL;
2617		break;
2618	}
2619
2620	/*
2621	 * This routine checks for resource allocation prior to freeing,
2622	 * so it will take care of the "smart probing" case where a
2623	 * scsi_probe() may or may not have been issued and will *not*
2624	 * free previously-freed resources.
2625	 */
2626	scsi_unprobe(devp);
2627	return (rval);
2628}
2629
2630
2631/*
2632 *    Function: sdinfo
2633 *
2634 * Description: This is the driver getinfo(9e) entry point function.
2635 * 		Given the device number, return the devinfo pointer from
2636 *		the scsi_device structure or the instance number
2637 *		associated with the dev_t.
2638 *
2639 *   Arguments: dip     - pointer to device info structure
2640 *		infocmd - command argument (DDI_INFO_DEVT2DEVINFO,
2641 *			  DDI_INFO_DEVT2INSTANCE)
2642 *		arg     - driver dev_t
2643 *		resultp - user buffer for request response
2644 *
2645 * Return Code: DDI_SUCCESS
2646 *              DDI_FAILURE
2647 */
2648/* ARGSUSED */
2649static int
2650sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
2651{
2652	struct sd_lun	*un;
2653	dev_t		dev;
2654	int		instance;
2655	int		error;
2656
2657	switch (infocmd) {
2658	case DDI_INFO_DEVT2DEVINFO:
2659		dev = (dev_t)arg;
2660		instance = SDUNIT(dev);
2661		if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
2662			return (DDI_FAILURE);
2663		}
2664		*result = (void *) SD_DEVINFO(un);
2665		error = DDI_SUCCESS;
2666		break;
2667	case DDI_INFO_DEVT2INSTANCE:
2668		dev = (dev_t)arg;
2669		instance = SDUNIT(dev);
2670		*result = (void *)(uintptr_t)instance;
2671		error = DDI_SUCCESS;
2672		break;
2673	default:
2674		error = DDI_FAILURE;
2675	}
2676	return (error);
2677}
2678
2679/*
2680 *    Function: sd_prop_op
2681 *
2682 * Description: This is the driver prop_op(9e) entry point function.
2683 *		Return the number of blocks for the partition in question
2684 *		or forward the request to the property facilities.
2685 *
2686 *   Arguments: dev       - device number
2687 *		dip       - pointer to device info structure
2688 *		prop_op   - property operator
2689 *		mod_flags - DDI_PROP_DONTPASS, don't pass to parent
2690 *		name      - pointer to property name
2691 *		valuep    - pointer or address of the user buffer
2692 *		lengthp   - property length
2693 *
2694 * Return Code: DDI_PROP_SUCCESS
2695 *              DDI_PROP_NOT_FOUND
2696 *              DDI_PROP_UNDEFINED
2697 *              DDI_PROP_NO_MEMORY
2698 *              DDI_PROP_BUF_TOO_SMALL
2699 */
2700
2701static int
2702sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags,
2703	char *name, caddr_t valuep, int *lengthp)
2704{
2705	int		instance = ddi_get_instance(dip);
2706	struct sd_lun	*un;
2707	uint64_t	nblocks64;
2708
2709	/*
2710	 * Our dynamic properties are all device specific and size oriented.
2711	 * Requests issued under conditions where size is valid are passed
2712	 * to ddi_prop_op_nblocks with the size information, otherwise the
2713	 * request is passed to ddi_prop_op. Size depends on valid geometry.
2714	 */
2715	un = ddi_get_soft_state(sd_state, instance);
2716	if ((dev == DDI_DEV_T_ANY) || (un == NULL) ||
2717	    (un->un_f_geometry_is_valid == FALSE)) {
2718		return (ddi_prop_op(dev, dip, prop_op, mod_flags,
2719		    name, valuep, lengthp));
2720	} else {
2721		/* get nblocks value */
2722		ASSERT(!mutex_owned(SD_MUTEX(un)));
2723		mutex_enter(SD_MUTEX(un));
2724		nblocks64 = (ulong_t)un->un_map[SDPART(dev)].dkl_nblk;
2725		mutex_exit(SD_MUTEX(un));
2726
2727		return (ddi_prop_op_nblocks(dev, dip, prop_op, mod_flags,
2728		    name, valuep, lengthp, nblocks64));
2729	}
2730}
2731
2732/*
2733 * The following functions are for smart probing:
2734 * sd_scsi_probe_cache_init()
2735 * sd_scsi_probe_cache_fini()
2736 * sd_scsi_clear_probe_cache()
2737 * sd_scsi_probe_with_cache()
2738 */
2739
2740/*
2741 *    Function: sd_scsi_probe_cache_init
2742 *
2743 * Description: Initializes the probe response cache mutex and head pointer.
2744 *
2745 *     Context: Kernel thread context
2746 */
2747
2748static void
2749sd_scsi_probe_cache_init(void)
2750{
2751	mutex_init(&sd_scsi_probe_cache_mutex, NULL, MUTEX_DRIVER, NULL);
2752	sd_scsi_probe_cache_head = NULL;
2753}
2754
2755
2756/*
2757 *    Function: sd_scsi_probe_cache_fini
2758 *
2759 * Description: Frees all resources associated with the probe response cache.
2760 *
2761 *     Context: Kernel thread context
2762 */
2763
2764static void
2765sd_scsi_probe_cache_fini(void)
2766{
2767	struct sd_scsi_probe_cache *cp;
2768	struct sd_scsi_probe_cache *ncp;
2769
2770	/* Clean up our smart probing linked list */
2771	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = ncp) {
2772		ncp = cp->next;
2773		kmem_free(cp, sizeof (struct sd_scsi_probe_cache));
2774	}
2775	sd_scsi_probe_cache_head = NULL;
2776	mutex_destroy(&sd_scsi_probe_cache_mutex);
2777}
2778
2779
2780/*
2781 *    Function: sd_scsi_clear_probe_cache
2782 *
2783 * Description: This routine clears the probe response cache. This is
2784 *		done when open() returns ENXIO so that when deferred
2785 *		attach is attempted (possibly after a device has been
2786 *		turned on) we will retry the probe. Since we don't know
2787 *		which target we failed to open, we just clear the
2788 *		entire cache.
2789 *
2790 *     Context: Kernel thread context
2791 */
2792
2793static void
2794sd_scsi_clear_probe_cache(void)
2795{
2796	struct sd_scsi_probe_cache	*cp;
2797	int				i;
2798
2799	mutex_enter(&sd_scsi_probe_cache_mutex);
2800	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2801		/*
2802		 * Reset all entries to SCSIPROBE_EXISTS.  This will
2803		 * force probing to be performed the next time
2804		 * sd_scsi_probe_with_cache is called.
2805		 */
2806		for (i = 0; i < NTARGETS_WIDE; i++) {
2807			cp->cache[i] = SCSIPROBE_EXISTS;
2808		}
2809	}
2810	mutex_exit(&sd_scsi_probe_cache_mutex);
2811}
2812
2813
2814/*
2815 *    Function: sd_scsi_probe_with_cache
2816 *
2817 * Description: This routine implements support for a scsi device probe
2818 *		with cache. The driver maintains a cache of the target
2819 *		responses to scsi probes. If we get no response from a
2820 *		target during a probe inquiry, we remember that, and we
2821 *		avoid additional calls to scsi_probe on non-zero LUNs
2822 *		on the same target until the cache is cleared. By doing
2823 *		so we avoid the 1/4 sec selection timeout for nonzero
2824 *		LUNs. lun0 of a target is always probed.
2825 *
2826 *   Arguments: devp     - Pointer to a scsi_device(9S) structure
2827 *              waitfunc - indicates what the allocator routines should
2828 *			   do when resources are not available. This value
2829 *			   is passed on to scsi_probe() when that routine
2830 *			   is called.
2831 *
2832 * Return Code: SCSIPROBE_NORESP if a NORESP in probe response cache;
2833 *		otherwise the value returned by scsi_probe(9F).
2834 *
2835 *     Context: Kernel thread context
2836 */
2837
2838static int
2839sd_scsi_probe_with_cache(struct scsi_device *devp, int (*waitfn)())
2840{
2841	struct sd_scsi_probe_cache	*cp;
2842	dev_info_t	*pdip = ddi_get_parent(devp->sd_dev);
2843	int		lun, tgt;
2844
2845	lun = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2846	    SCSI_ADDR_PROP_LUN, 0);
2847	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2848	    SCSI_ADDR_PROP_TARGET, -1);
2849
2850	/* Make sure caching enabled and target in range */
2851	if ((tgt < 0) || (tgt >= NTARGETS_WIDE)) {
2852		/* do it the old way (no cache) */
2853		return (scsi_probe(devp, waitfn));
2854	}
2855
2856	mutex_enter(&sd_scsi_probe_cache_mutex);
2857
2858	/* Find the cache for this scsi bus instance */
2859	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2860		if (cp->pdip == pdip) {
2861			break;
2862		}
2863	}
2864
2865	/* If we can't find a cache for this pdip, create one */
2866	if (cp == NULL) {
2867		int i;
2868
2869		cp = kmem_zalloc(sizeof (struct sd_scsi_probe_cache),
2870		    KM_SLEEP);
2871		cp->pdip = pdip;
2872		cp->next = sd_scsi_probe_cache_head;
2873		sd_scsi_probe_cache_head = cp;
2874		for (i = 0; i < NTARGETS_WIDE; i++) {
2875			cp->cache[i] = SCSIPROBE_EXISTS;
2876		}
2877	}
2878
2879	mutex_exit(&sd_scsi_probe_cache_mutex);
2880
2881	/* Recompute the cache for this target if LUN zero */
2882	if (lun == 0) {
2883		cp->cache[tgt] = SCSIPROBE_EXISTS;
2884	}
2885
2886	/* Don't probe if cache remembers a NORESP from a previous LUN. */
2887	if (cp->cache[tgt] != SCSIPROBE_EXISTS) {
2888		return (SCSIPROBE_NORESP);
2889	}
2890
2891	/* Do the actual probe; save & return the result */
2892	return (cp->cache[tgt] = scsi_probe(devp, waitfn));
2893}
2894
2895
2896/*
2897 *    Function: sd_scsi_target_lun_init
2898 *
2899 * Description: Initializes the attached lun chain mutex and head pointer.
2900 *
2901 *     Context: Kernel thread context
2902 */
2903
2904static void
2905sd_scsi_target_lun_init(void)
2906{
2907	mutex_init(&sd_scsi_target_lun_mutex, NULL, MUTEX_DRIVER, NULL);
2908	sd_scsi_target_lun_head = NULL;
2909}
2910
2911
2912/*
2913 *    Function: sd_scsi_target_lun_fini
2914 *
2915 * Description: Frees all resources associated with the attached lun
2916 *              chain
2917 *
2918 *     Context: Kernel thread context
2919 */
2920
2921static void
2922sd_scsi_target_lun_fini(void)
2923{
2924	struct sd_scsi_hba_tgt_lun	*cp;
2925	struct sd_scsi_hba_tgt_lun	*ncp;
2926
2927	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = ncp) {
2928		ncp = cp->next;
2929		kmem_free(cp, sizeof (struct sd_scsi_hba_tgt_lun));
2930	}
2931	sd_scsi_target_lun_head = NULL;
2932	mutex_destroy(&sd_scsi_target_lun_mutex);
2933}
2934
2935
2936/*
2937 *    Function: sd_scsi_get_target_lun_count
2938 *
2939 * Description: This routine will check in the attached lun chain to see
2940 * 		how many luns are attached on the required SCSI controller
2941 * 		and target. Currently, some capabilities like tagged queue
2942 *		are supported per target based by HBA. So all luns in a
2943 *		target have the same capabilities. Based on this assumption,
2944 * 		sd should only set these capabilities once per target. This
2945 *		function is called when sd needs to decide how many luns
2946 *		already attached on a target.
2947 *
2948 *   Arguments: dip	- Pointer to the system's dev_info_t for the SCSI
2949 *			  controller device.
2950 *              target	- The target ID on the controller's SCSI bus.
2951 *
2952 * Return Code: The number of luns attached on the required target and
2953 *		controller.
2954 *		-1 if target ID is not in parallel SCSI scope or the given
2955 * 		dip is not in the chain.
2956 *
2957 *     Context: Kernel thread context
2958 */
2959
2960static int
2961sd_scsi_get_target_lun_count(dev_info_t *dip, int target)
2962{
2963	struct sd_scsi_hba_tgt_lun	*cp;
2964
2965	if ((target < 0) || (target >= NTARGETS_WIDE)) {
2966		return (-1);
2967	}
2968
2969	mutex_enter(&sd_scsi_target_lun_mutex);
2970
2971	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = cp->next) {
2972		if (cp->pdip == dip) {
2973			break;
2974		}
2975	}
2976
2977	mutex_exit(&sd_scsi_target_lun_mutex);
2978
2979	if (cp == NULL) {
2980		return (-1);
2981	}
2982
2983	return (cp->nlun[target]);
2984}
2985
2986
2987/*
2988 *    Function: sd_scsi_update_lun_on_target
2989 *
2990 * Description: This routine is used to update the attached lun chain when a
2991 *		lun is attached or detached on a target.
2992 *
2993 *   Arguments: dip     - Pointer to the system's dev_info_t for the SCSI
2994 *                        controller device.
2995 *              target  - The target ID on the controller's SCSI bus.
2996 *		flag	- Indicate the lun is attached or detached.
2997 *
2998 *     Context: Kernel thread context
2999 */
3000
3001static void
3002sd_scsi_update_lun_on_target(dev_info_t *dip, int target, int flag)
3003{
3004	struct sd_scsi_hba_tgt_lun	*cp;
3005
3006	mutex_enter(&sd_scsi_target_lun_mutex);
3007
3008	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = cp->next) {
3009		if (cp->pdip == dip) {
3010			break;
3011		}
3012	}
3013
3014	if ((cp == NULL) && (flag == SD_SCSI_LUN_ATTACH)) {
3015		cp = kmem_zalloc(sizeof (struct sd_scsi_hba_tgt_lun),
3016		    KM_SLEEP);
3017		cp->pdip = dip;
3018		cp->next = sd_scsi_target_lun_head;
3019		sd_scsi_target_lun_head = cp;
3020	}
3021
3022	mutex_exit(&sd_scsi_target_lun_mutex);
3023
3024	if (cp != NULL) {
3025		if (flag == SD_SCSI_LUN_ATTACH) {
3026			cp->nlun[target] ++;
3027		} else {
3028			cp->nlun[target] --;
3029		}
3030	}
3031}
3032
3033
3034/*
3035 *    Function: sd_spin_up_unit
3036 *
3037 * Description: Issues the following commands to spin-up the device:
3038 *		START STOP UNIT, and INQUIRY.
3039 *
3040 *   Arguments: un - driver soft state (unit) structure
3041 *
3042 * Return Code: 0 - success
3043 *		EIO - failure
3044 *		EACCES - reservation conflict
3045 *
3046 *     Context: Kernel thread context
3047 */
3048
3049static int
3050sd_spin_up_unit(struct sd_lun *un)
3051{
3052	size_t	resid		= 0;
3053	int	has_conflict	= FALSE;
3054	uchar_t *bufaddr;
3055
3056	ASSERT(un != NULL);
3057
3058	/*
3059	 * Send a throwaway START UNIT command.
3060	 *
3061	 * If we fail on this, we don't care presently what precisely
3062	 * is wrong.  EMC's arrays will also fail this with a check
3063	 * condition (0x2/0x4/0x3) if the device is "inactive," but
3064	 * we don't want to fail the attach because it may become
3065	 * "active" later.
3066	 */
3067	if (sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START, SD_PATH_DIRECT)
3068	    == EACCES)
3069		has_conflict = TRUE;
3070
3071	/*
3072	 * Send another INQUIRY command to the target. This is necessary for
3073	 * non-removable media direct access devices because their INQUIRY data
3074	 * may not be fully qualified until they are spun up (perhaps via the
3075	 * START command above).  Note: This seems to be needed for some
3076	 * legacy devices only.) The INQUIRY command should succeed even if a
3077	 * Reservation Conflict is present.
3078	 */
3079	bufaddr = kmem_zalloc(SUN_INQSIZE, KM_SLEEP);
3080	if (sd_send_scsi_INQUIRY(un, bufaddr, SUN_INQSIZE, 0, 0, &resid) != 0) {
3081		kmem_free(bufaddr, SUN_INQSIZE);
3082		return (EIO);
3083	}
3084
3085	/*
3086	 * If we got enough INQUIRY data, copy it over the old INQUIRY data.
3087	 * Note that this routine does not return a failure here even if the
3088	 * INQUIRY command did not return any data.  This is a legacy behavior.
3089	 */
3090	if ((SUN_INQSIZE - resid) >= SUN_MIN_INQLEN) {
3091		bcopy(bufaddr, SD_INQUIRY(un), SUN_INQSIZE);
3092	}
3093
3094	kmem_free(bufaddr, SUN_INQSIZE);
3095
3096	/* If we hit a reservation conflict above, tell the caller. */
3097	if (has_conflict == TRUE) {
3098		return (EACCES);
3099	}
3100
3101	return (0);
3102}
3103
3104#ifdef _LP64
3105/*
3106 *    Function: sd_enable_descr_sense
3107 *
3108 * Description: This routine attempts to select descriptor sense format
3109 *		using the Control mode page.  Devices that support 64 bit
3110 *		LBAs (for >2TB luns) should also implement descriptor
3111 *		sense data so we will call this function whenever we see
3112 *		a lun larger than 2TB.  If for some reason the device
3113 *		supports 64 bit LBAs but doesn't support descriptor sense
3114 *		presumably the mode select will fail.  Everything will
3115 *		continue to work normally except that we will not get
3116 *		complete sense data for commands that fail with an LBA
3117 *		larger than 32 bits.
3118 *
3119 *   Arguments: un - driver soft state (unit) structure
3120 *
3121 *     Context: Kernel thread context only
3122 */
3123
3124static void
3125sd_enable_descr_sense(struct sd_lun *un)
3126{
3127	uchar_t			*header;
3128	struct mode_control_scsi3 *ctrl_bufp;
3129	size_t			buflen;
3130	size_t			bd_len;
3131
3132	/*
3133	 * Read MODE SENSE page 0xA, Control Mode Page
3134	 */
3135	buflen = MODE_HEADER_LENGTH + MODE_BLK_DESC_LENGTH +
3136	    sizeof (struct mode_control_scsi3);
3137	header = kmem_zalloc(buflen, KM_SLEEP);
3138	if (sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
3139	    MODEPAGE_CTRL_MODE, SD_PATH_DIRECT) != 0) {
3140		SD_ERROR(SD_LOG_COMMON, un,
3141		    "sd_enable_descr_sense: mode sense ctrl page failed\n");
3142		goto eds_exit;
3143	}
3144
3145	/*
3146	 * Determine size of Block Descriptors in order to locate
3147	 * the mode page data. ATAPI devices return 0, SCSI devices
3148	 * should return MODE_BLK_DESC_LENGTH.
3149	 */
3150	bd_len  = ((struct mode_header *)header)->bdesc_length;
3151
3152	ctrl_bufp = (struct mode_control_scsi3 *)
3153	    (header + MODE_HEADER_LENGTH + bd_len);
3154
3155	/*
3156	 * Clear PS bit for MODE SELECT
3157	 */
3158	ctrl_bufp->mode_page.ps = 0;
3159
3160	/*
3161	 * Set D_SENSE to enable descriptor sense format.
3162	 */
3163	ctrl_bufp->d_sense = 1;
3164
3165	/*
3166	 * Use MODE SELECT to commit the change to the D_SENSE bit
3167	 */
3168	if (sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, header,
3169	    buflen, SD_DONTSAVE_PAGE, SD_PATH_DIRECT) != 0) {
3170		SD_INFO(SD_LOG_COMMON, un,
3171		    "sd_enable_descr_sense: mode select ctrl page failed\n");
3172		goto eds_exit;
3173	}
3174
3175eds_exit:
3176	kmem_free(header, buflen);
3177}
3178
3179/*
3180 *    Function: sd_reenable_dsense_task
3181 *
3182 * Description: Re-enable descriptor sense after device or bus reset
3183 *
3184 *     Context: Executes in a taskq() thread context
3185 */
3186static void
3187sd_reenable_dsense_task(void *arg)
3188{
3189	struct	sd_lun	*un = arg;
3190
3191	ASSERT(un != NULL);
3192	sd_enable_descr_sense(un);
3193}
3194#endif /* _LP64 */
3195
3196/*
3197 *    Function: sd_set_mmc_caps
3198 *
3199 * Description: This routine determines if the device is MMC compliant and if
3200 *		the device supports CDDA via a mode sense of the CDVD
3201 *		capabilities mode page. Also checks if the device is a
3202 *		dvdram writable device.
3203 *
3204 *   Arguments: un - driver soft state (unit) structure
3205 *
3206 *     Context: Kernel thread context only
3207 */
3208
3209static void
3210sd_set_mmc_caps(struct sd_lun *un)
3211{
3212	struct mode_header_grp2		*sense_mhp;
3213	uchar_t				*sense_page;
3214	caddr_t				buf;
3215	int				bd_len;
3216	int				status;
3217	struct uscsi_cmd		com;
3218	int				rtn;
3219	uchar_t				*out_data_rw, *out_data_hd;
3220	uchar_t				*rqbuf_rw, *rqbuf_hd;
3221
3222	ASSERT(un != NULL);
3223
3224	/*
3225	 * The flags which will be set in this function are - mmc compliant,
3226	 * dvdram writable device, cdda support. Initialize them to FALSE
3227	 * and if a capability is detected - it will be set to TRUE.
3228	 */
3229	un->un_f_mmc_cap = FALSE;
3230	un->un_f_dvdram_writable_device = FALSE;
3231	un->un_f_cfg_cdda = FALSE;
3232
3233	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
3234	status = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, (uchar_t *)buf,
3235	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, SD_PATH_DIRECT);
3236
3237	if (status != 0) {
3238		/* command failed; just return */
3239		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3240		return;
3241	}
3242	/*
3243	 * If the mode sense request for the CDROM CAPABILITIES
3244	 * page (0x2A) succeeds the device is assumed to be MMC.
3245	 */
3246	un->un_f_mmc_cap = TRUE;
3247
3248	/* Get to the page data */
3249	sense_mhp = (struct mode_header_grp2 *)buf;
3250	bd_len = (sense_mhp->bdesc_length_hi << 8) |
3251	    sense_mhp->bdesc_length_lo;
3252	if (bd_len > MODE_BLK_DESC_LENGTH) {
3253		/*
3254		 * We did not get back the expected block descriptor
3255		 * length so we cannot determine if the device supports
3256		 * CDDA. However, we still indicate the device is MMC
3257		 * according to the successful response to the page
3258		 * 0x2A mode sense request.
3259		 */
3260		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3261		    "sd_set_mmc_caps: Mode Sense returned "
3262		    "invalid block descriptor length\n");
3263		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3264		return;
3265	}
3266
3267	/* See if read CDDA is supported */
3268	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 +
3269	    bd_len);
3270	un->un_f_cfg_cdda = (sense_page[5] & 0x01) ? TRUE : FALSE;
3271
3272	/* See if writing DVD RAM is supported. */
3273	un->un_f_dvdram_writable_device = (sense_page[3] & 0x20) ? TRUE : FALSE;
3274	if (un->un_f_dvdram_writable_device == TRUE) {
3275		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3276		return;
3277	}
3278
3279	/*
3280	 * If the device presents DVD or CD capabilities in the mode
3281	 * page, we can return here since a RRD will not have
3282	 * these capabilities.
3283	 */
3284	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3285		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3286		return;
3287	}
3288	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3289
3290	/*
3291	 * If un->un_f_dvdram_writable_device is still FALSE,
3292	 * check for a Removable Rigid Disk (RRD).  A RRD
3293	 * device is identified by the features RANDOM_WRITABLE and
3294	 * HARDWARE_DEFECT_MANAGEMENT.
3295	 */
3296	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3297	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3298
3299	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_rw,
3300	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3301	    RANDOM_WRITABLE);
3302	if (rtn != 0) {
3303		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3304		kmem_free(rqbuf_rw, SENSE_LENGTH);
3305		return;
3306	}
3307
3308	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3309	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3310
3311	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_hd,
3312	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3313	    HARDWARE_DEFECT_MANAGEMENT);
3314	if (rtn == 0) {
3315		/*
3316		 * We have good information, check for random writable
3317		 * and hardware defect features.
3318		 */
3319		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3320		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT)) {
3321			un->un_f_dvdram_writable_device = TRUE;
3322		}
3323	}
3324
3325	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3326	kmem_free(rqbuf_rw, SENSE_LENGTH);
3327	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3328	kmem_free(rqbuf_hd, SENSE_LENGTH);
3329}
3330
3331/*
3332 *    Function: sd_check_for_writable_cd
3333 *
3334 * Description: This routine determines if the media in the device is
3335 *		writable or not. It uses the get configuration command (0x46)
3336 *		to determine if the media is writable
3337 *
3338 *   Arguments: un - driver soft state (unit) structure
3339 *
3340 *     Context: Never called at interrupt context.
3341 */
3342
3343static void
3344sd_check_for_writable_cd(struct sd_lun *un)
3345{
3346	struct uscsi_cmd		com;
3347	uchar_t				*out_data;
3348	uchar_t				*rqbuf;
3349	int				rtn;
3350	uchar_t				*out_data_rw, *out_data_hd;
3351	uchar_t				*rqbuf_rw, *rqbuf_hd;
3352	struct mode_header_grp2		*sense_mhp;
3353	uchar_t				*sense_page;
3354	caddr_t				buf;
3355	int				bd_len;
3356	int				status;
3357
3358	ASSERT(un != NULL);
3359	ASSERT(mutex_owned(SD_MUTEX(un)));
3360
3361	/*
3362	 * Initialize the writable media to false, if configuration info.
3363	 * tells us otherwise then only we will set it.
3364	 */
3365	un->un_f_mmc_writable_media = FALSE;
3366	mutex_exit(SD_MUTEX(un));
3367
3368	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
3369	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3370
3371	rtn = sd_send_scsi_GET_CONFIGURATION(un, &com, rqbuf, SENSE_LENGTH,
3372	    out_data, SD_PROFILE_HEADER_LEN);
3373
3374	mutex_enter(SD_MUTEX(un));
3375	if (rtn == 0) {
3376		/*
3377		 * We have good information, check for writable DVD.
3378		 */
3379		if ((out_data[6] == 0) && (out_data[7] == 0x12)) {
3380			un->un_f_mmc_writable_media = TRUE;
3381			kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3382			kmem_free(rqbuf, SENSE_LENGTH);
3383			return;
3384		}
3385	}
3386
3387	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3388	kmem_free(rqbuf, SENSE_LENGTH);
3389
3390	/*
3391	 * Determine if this is a RRD type device.
3392	 */
3393	mutex_exit(SD_MUTEX(un));
3394	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
3395	status = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, (uchar_t *)buf,
3396	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, SD_PATH_DIRECT);
3397	mutex_enter(SD_MUTEX(un));
3398	if (status != 0) {
3399		/* command failed; just return */
3400		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3401		return;
3402	}
3403
3404	/* Get to the page data */
3405	sense_mhp = (struct mode_header_grp2 *)buf;
3406	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
3407	if (bd_len > MODE_BLK_DESC_LENGTH) {
3408		/*
3409		 * We did not get back the expected block descriptor length so
3410		 * we cannot check the mode page.
3411		 */
3412		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3413		    "sd_check_for_writable_cd: Mode Sense returned "
3414		    "invalid block descriptor length\n");
3415		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3416		return;
3417	}
3418
3419	/*
3420	 * If the device presents DVD or CD capabilities in the mode
3421	 * page, we can return here since a RRD device will not have
3422	 * these capabilities.
3423	 */
3424	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 + bd_len);
3425	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3426		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3427		return;
3428	}
3429	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3430
3431	/*
3432	 * If un->un_f_mmc_writable_media is still FALSE,
3433	 * check for RRD type media.  A RRD device is identified
3434	 * by the features RANDOM_WRITABLE and HARDWARE_DEFECT_MANAGEMENT.
3435	 */
3436	mutex_exit(SD_MUTEX(un));
3437	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3438	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3439
3440	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_rw,
3441	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3442	    RANDOM_WRITABLE);
3443	if (rtn != 0) {
3444		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3445		kmem_free(rqbuf_rw, SENSE_LENGTH);
3446		mutex_enter(SD_MUTEX(un));
3447		return;
3448	}
3449
3450	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3451	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3452
3453	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_hd,
3454	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3455	    HARDWARE_DEFECT_MANAGEMENT);
3456	mutex_enter(SD_MUTEX(un));
3457	if (rtn == 0) {
3458		/*
3459		 * We have good information, check for random writable
3460		 * and hardware defect features as current.
3461		 */
3462		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3463		    (out_data_rw[10] & 0x1) &&
3464		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT) &&
3465		    (out_data_hd[10] & 0x1)) {
3466			un->un_f_mmc_writable_media = TRUE;
3467		}
3468	}
3469
3470	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3471	kmem_free(rqbuf_rw, SENSE_LENGTH);
3472	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3473	kmem_free(rqbuf_hd, SENSE_LENGTH);
3474}
3475
3476/*
3477 *    Function: sd_read_unit_properties
3478 *
3479 * Description: The following implements a property lookup mechanism.
3480 *		Properties for particular disks (keyed on vendor, model
3481 *		and rev numbers) are sought in the sd.conf file via
3482 *		sd_process_sdconf_file(), and if not found there, are
3483 *		looked for in a list hardcoded in this driver via
3484 *		sd_process_sdconf_table() Once located the properties
3485 *		are used to update the driver unit structure.
3486 *
3487 *   Arguments: un - driver soft state (unit) structure
3488 */
3489
3490static void
3491sd_read_unit_properties(struct sd_lun *un)
3492{
3493	/*
3494	 * sd_process_sdconf_file returns SD_FAILURE if it cannot find
3495	 * the "sd-config-list" property (from the sd.conf file) or if
3496	 * there was not a match for the inquiry vid/pid. If this event
3497	 * occurs the static driver configuration table is searched for
3498	 * a match.
3499	 */
3500	ASSERT(un != NULL);
3501	if (sd_process_sdconf_file(un) == SD_FAILURE) {
3502		sd_process_sdconf_table(un);
3503	}
3504
3505	/* check for LSI device */
3506	sd_is_lsi(un);
3507
3508
3509}
3510
3511
3512/*
3513 *    Function: sd_process_sdconf_file
3514 *
3515 * Description: Use ddi_getlongprop to obtain the properties from the
3516 *		driver's config file (ie, sd.conf) and update the driver
3517 *		soft state structure accordingly.
3518 *
3519 *   Arguments: un - driver soft state (unit) structure
3520 *
3521 * Return Code: SD_SUCCESS - The properties were successfully set according
3522 *			     to the driver configuration file.
3523 *		SD_FAILURE - The driver config list was not obtained or
3524 *			     there was no vid/pid match. This indicates that
3525 *			     the static config table should be used.
3526 *
3527 * The config file has a property, "sd-config-list", which consists of
3528 * one or more duplets as follows:
3529 *
3530 *  sd-config-list=
3531 *	<duplet>,
3532 *	[<duplet>,]
3533 *	[<duplet>];
3534 *
3535 * The structure of each duplet is as follows:
3536 *
3537 *  <duplet>:= <vid+pid>,<data-property-name_list>
3538 *
3539 * The first entry of the duplet is the device ID string (the concatenated
3540 * vid & pid; not to be confused with a device_id).  This is defined in
3541 * the same way as in the sd_disk_table.
3542 *
3543 * The second part of the duplet is a string that identifies a
3544 * data-property-name-list. The data-property-name-list is defined as
3545 * follows:
3546 *
3547 *  <data-property-name-list>:=<data-property-name> [<data-property-name>]
3548 *
3549 * The syntax of <data-property-name> depends on the <version> field.
3550 *
3551 * If version = SD_CONF_VERSION_1 we have the following syntax:
3552 *
3553 * 	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
3554 *
3555 * where the prop0 value will be used to set prop0 if bit0 set in the
3556 * flags, prop1 if bit1 set, etc. and N = SD_CONF_MAX_ITEMS -1
3557 *
3558 */
3559
3560static int
3561sd_process_sdconf_file(struct sd_lun *un)
3562{
3563	char	*config_list = NULL;
3564	int	config_list_len;
3565	int	len;
3566	int	dupletlen = 0;
3567	char	*vidptr;
3568	int	vidlen;
3569	char	*dnlist_ptr;
3570	char	*dataname_ptr;
3571	int	dnlist_len;
3572	int	dataname_len;
3573	int	*data_list;
3574	int	data_list_len;
3575	int	rval = SD_FAILURE;
3576	int	i;
3577
3578	ASSERT(un != NULL);
3579
3580	/* Obtain the configuration list associated with the .conf file */
3581	if (ddi_getlongprop(DDI_DEV_T_ANY, SD_DEVINFO(un), DDI_PROP_DONTPASS,
3582	    sd_config_list, (caddr_t)&config_list, &config_list_len)
3583	    != DDI_PROP_SUCCESS) {
3584		return (SD_FAILURE);
3585	}
3586
3587	/*
3588	 * Compare vids in each duplet to the inquiry vid - if a match is
3589	 * made, get the data value and update the soft state structure
3590	 * accordingly.
3591	 *
3592	 * Note: This algorithm is complex and difficult to maintain. It should
3593	 * be replaced with a more robust implementation.
3594	 */
3595	for (len = config_list_len, vidptr = config_list; len > 0;
3596	    vidptr += dupletlen, len -= dupletlen) {
3597		/*
3598		 * Note: The assumption here is that each vid entry is on
3599		 * a unique line from its associated duplet.
3600		 */
3601		vidlen = dupletlen = (int)strlen(vidptr);
3602		if ((vidlen == 0) ||
3603		    (sd_sdconf_id_match(un, vidptr, vidlen) != SD_SUCCESS)) {
3604			dupletlen++;
3605			continue;
3606		}
3607
3608		/*
3609		 * dnlist contains 1 or more blank separated
3610		 * data-property-name entries
3611		 */
3612		dnlist_ptr = vidptr + vidlen + 1;
3613		dnlist_len = (int)strlen(dnlist_ptr);
3614		dupletlen += dnlist_len + 2;
3615
3616		/*
3617		 * Set a pointer for the first data-property-name
3618		 * entry in the list
3619		 */
3620		dataname_ptr = dnlist_ptr;
3621		dataname_len = 0;
3622
3623		/*
3624		 * Loop through all data-property-name entries in the
3625		 * data-property-name-list setting the properties for each.
3626		 */
3627		while (dataname_len < dnlist_len) {
3628			int version;
3629
3630			/*
3631			 * Determine the length of the current
3632			 * data-property-name entry by indexing until a
3633			 * blank or NULL is encountered. When the space is
3634			 * encountered reset it to a NULL for compliance
3635			 * with ddi_getlongprop().
3636			 */
3637			for (i = 0; ((dataname_ptr[i] != ' ') &&
3638			    (dataname_ptr[i] != '\0')); i++) {
3639				;
3640			}
3641
3642			dataname_len += i;
3643			/* If not null terminated, Make it so */
3644			if (dataname_ptr[i] == ' ') {
3645				dataname_ptr[i] = '\0';
3646			}
3647			dataname_len++;
3648			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3649			    "sd_process_sdconf_file: disk:%s, data:%s\n",
3650			    vidptr, dataname_ptr);
3651
3652			/* Get the data list */
3653			if (ddi_getlongprop(DDI_DEV_T_ANY, SD_DEVINFO(un), 0,
3654			    dataname_ptr, (caddr_t)&data_list, &data_list_len)
3655			    != DDI_PROP_SUCCESS) {
3656				SD_INFO(SD_LOG_ATTACH_DETACH, un,
3657				    "sd_process_sdconf_file: data property (%s)"
3658				    " has no value\n", dataname_ptr);
3659				dataname_ptr = dnlist_ptr + dataname_len;
3660				continue;
3661			}
3662
3663			version = data_list[0];
3664
3665			if (version == SD_CONF_VERSION_1) {
3666				sd_tunables values;
3667
3668				/* Set the properties */
3669				if (sd_chk_vers1_data(un, data_list[1],
3670				    &data_list[2], data_list_len, dataname_ptr)
3671				    == SD_SUCCESS) {
3672					sd_get_tunables_from_conf(un,
3673					    data_list[1], &data_list[2],
3674					    &values);
3675					sd_set_vers1_properties(un,
3676					    data_list[1], &values);
3677					rval = SD_SUCCESS;
3678				} else {
3679					rval = SD_FAILURE;
3680				}
3681			} else {
3682				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3683				    "data property %s version 0x%x is invalid.",
3684				    dataname_ptr, version);
3685				rval = SD_FAILURE;
3686			}
3687			kmem_free(data_list, data_list_len);
3688			dataname_ptr = dnlist_ptr + dataname_len;
3689		}
3690	}
3691
3692	/* free up the memory allocated by ddi_getlongprop */
3693	if (config_list) {
3694		kmem_free(config_list, config_list_len);
3695	}
3696
3697	return (rval);
3698}
3699
3700/*
3701 *    Function: sd_get_tunables_from_conf()
3702 *
3703 *
3704 *    This function reads the data list from the sd.conf file and pulls
3705 *    the values that can have numeric values as arguments and places
3706 *    the values in the apropriate sd_tunables member.
3707 *    Since the order of the data list members varies across platforms
3708 *    This function reads them from the data list in a platform specific
3709 *    order and places them into the correct sd_tunable member that is
3710 *    a consistant across all platforms.
3711 */
3712static void
3713sd_get_tunables_from_conf(struct sd_lun *un, int flags, int *data_list,
3714    sd_tunables *values)
3715{
3716	int i;
3717	int mask;
3718
3719	bzero(values, sizeof (sd_tunables));
3720
3721	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
3722
3723		mask = 1 << i;
3724		if (mask > flags) {
3725			break;
3726		}
3727
3728		switch (mask & flags) {
3729		case 0:	/* This mask bit not set in flags */
3730			continue;
3731		case SD_CONF_BSET_THROTTLE:
3732			values->sdt_throttle = data_list[i];
3733			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3734			    "sd_get_tunables_from_conf: throttle = %d\n",
3735			    values->sdt_throttle);
3736			break;
3737		case SD_CONF_BSET_CTYPE:
3738			values->sdt_ctype = data_list[i];
3739			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3740			    "sd_get_tunables_from_conf: ctype = %d\n",
3741			    values->sdt_ctype);
3742			break;
3743		case SD_CONF_BSET_NRR_COUNT:
3744			values->sdt_not_rdy_retries = data_list[i];
3745			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3746			    "sd_get_tunables_from_conf: not_rdy_retries = %d\n",
3747			    values->sdt_not_rdy_retries);
3748			break;
3749		case SD_CONF_BSET_BSY_RETRY_COUNT:
3750			values->sdt_busy_retries = data_list[i];
3751			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3752			    "sd_get_tunables_from_conf: busy_retries = %d\n",
3753			    values->sdt_busy_retries);
3754			break;
3755		case SD_CONF_BSET_RST_RETRIES:
3756			values->sdt_reset_retries = data_list[i];
3757			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3758			    "sd_get_tunables_from_conf: reset_retries = %d\n",
3759			    values->sdt_reset_retries);
3760			break;
3761		case SD_CONF_BSET_RSV_REL_TIME:
3762			values->sdt_reserv_rel_time = data_list[i];
3763			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3764			    "sd_get_tunables_from_conf: reserv_rel_time = %d\n",
3765			    values->sdt_reserv_rel_time);
3766			break;
3767		case SD_CONF_BSET_MIN_THROTTLE:
3768			values->sdt_min_throttle = data_list[i];
3769			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3770			    "sd_get_tunables_from_conf: min_throttle = %d\n",
3771			    values->sdt_min_throttle);
3772			break;
3773		case SD_CONF_BSET_DISKSORT_DISABLED:
3774			values->sdt_disk_sort_dis = data_list[i];
3775			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3776			    "sd_get_tunables_from_conf: disk_sort_dis = %d\n",
3777			    values->sdt_disk_sort_dis);
3778			break;
3779		case SD_CONF_BSET_LUN_RESET_ENABLED:
3780			values->sdt_lun_reset_enable = data_list[i];
3781			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3782			    "sd_get_tunables_from_conf: lun_reset_enable = %d"
3783			    "\n", values->sdt_lun_reset_enable);
3784			break;
3785		}
3786	}
3787}
3788
3789/*
3790 *    Function: sd_process_sdconf_table
3791 *
3792 * Description: Search the static configuration table for a match on the
3793 *		inquiry vid/pid and update the driver soft state structure
3794 *		according to the table property values for the device.
3795 *
3796 *		The form of a configuration table entry is:
3797 *		  <vid+pid>,<flags>,<property-data>
3798 *		  "SEAGATE ST42400N",1,63,0,0			(Fibre)
3799 *		  "SEAGATE ST42400N",1,63,0,0,0,0		(Sparc)
3800 *		  "SEAGATE ST42400N",1,63,0,0,0,0,0,0,0,0,0,0	(Intel)
3801 *
3802 *   Arguments: un - driver soft state (unit) structure
3803 */
3804
3805static void
3806sd_process_sdconf_table(struct sd_lun *un)
3807{
3808	char	*id = NULL;
3809	int	table_index;
3810	int	idlen;
3811
3812	ASSERT(un != NULL);
3813	for (table_index = 0; table_index < sd_disk_table_size;
3814	    table_index++) {
3815		id = sd_disk_table[table_index].device_id;
3816		idlen = strlen(id);
3817		if (idlen == 0) {
3818			continue;
3819		}
3820
3821		/*
3822		 * The static configuration table currently does not
3823		 * implement version 10 properties. Additionally,
3824		 * multiple data-property-name entries are not
3825		 * implemented in the static configuration table.
3826		 */
3827		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
3828			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3829			    "sd_process_sdconf_table: disk %s\n", id);
3830			sd_set_vers1_properties(un,
3831			    sd_disk_table[table_index].flags,
3832			    sd_disk_table[table_index].properties);
3833			break;
3834		}
3835	}
3836}
3837
3838
3839/*
3840 *    Function: sd_sdconf_id_match
3841 *
3842 * Description: This local function implements a case sensitive vid/pid
3843 *		comparison as well as the boundary cases of wild card and
3844 *		multiple blanks.
3845 *
3846 *		Note: An implicit assumption made here is that the scsi
3847 *		inquiry structure will always keep the vid, pid and
3848 *		revision strings in consecutive sequence, so they can be
3849 *		read as a single string. If this assumption is not the
3850 *		case, a separate string, to be used for the check, needs
3851 *		to be built with these strings concatenated.
3852 *
3853 *   Arguments: un - driver soft state (unit) structure
3854 *		id - table or config file vid/pid
3855 *		idlen  - length of the vid/pid (bytes)
3856 *
3857 * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
3858 *		SD_FAILURE - Indicates no match with the inquiry vid/pid
3859 */
3860
3861static int
3862sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen)
3863{
3864	struct scsi_inquiry	*sd_inq;
3865	int 			rval = SD_SUCCESS;
3866
3867	ASSERT(un != NULL);
3868	sd_inq = un->un_sd->sd_inq;
3869	ASSERT(id != NULL);
3870
3871	/*
3872	 * We use the inq_vid as a pointer to a buffer containing the
3873	 * vid and pid and use the entire vid/pid length of the table
3874	 * entry for the comparison. This works because the inq_pid
3875	 * data member follows inq_vid in the scsi_inquiry structure.
3876	 */
3877	if (strncasecmp(sd_inq->inq_vid, id, idlen) != 0) {
3878		/*
3879		 * The user id string is compared to the inquiry vid/pid
3880		 * using a case insensitive comparison and ignoring
3881		 * multiple spaces.
3882		 */
3883		rval = sd_blank_cmp(un, id, idlen);
3884		if (rval != SD_SUCCESS) {
3885			/*
3886			 * User id strings that start and end with a "*"
3887			 * are a special case. These do not have a
3888			 * specific vendor, and the product string can
3889			 * appear anywhere in the 16 byte PID portion of
3890			 * the inquiry data. This is a simple strstr()
3891			 * type search for the user id in the inquiry data.
3892			 */
3893			if ((id[0] == '*') && (id[idlen - 1] == '*')) {
3894				char	*pidptr = &id[1];
3895				int	i;
3896				int	j;
3897				int	pidstrlen = idlen - 2;
3898				j = sizeof (SD_INQUIRY(un)->inq_pid) -
3899				    pidstrlen;
3900
3901				if (j < 0) {
3902					return (SD_FAILURE);
3903				}
3904				for (i = 0; i < j; i++) {
3905					if (bcmp(&SD_INQUIRY(un)->inq_pid[i],
3906					    pidptr, pidstrlen) == 0) {
3907						rval = SD_SUCCESS;
3908						break;
3909					}
3910				}
3911			}
3912		}
3913	}
3914	return (rval);
3915}
3916
3917
3918/*
3919 *    Function: sd_blank_cmp
3920 *
3921 * Description: If the id string starts and ends with a space, treat
3922 *		multiple consecutive spaces as equivalent to a single
3923 *		space. For example, this causes a sd_disk_table entry
3924 *		of " NEC CDROM " to match a device's id string of
3925 *		"NEC       CDROM".
3926 *
3927 *		Note: The success exit condition for this routine is if
3928 *		the pointer to the table entry is '\0' and the cnt of
3929 *		the inquiry length is zero. This will happen if the inquiry
3930 *		string returned by the device is padded with spaces to be
3931 *		exactly 24 bytes in length (8 byte vid + 16 byte pid). The
3932 *		SCSI spec states that the inquiry string is to be padded with
3933 *		spaces.
3934 *
3935 *   Arguments: un - driver soft state (unit) structure
3936 *		id - table or config file vid/pid
3937 *		idlen  - length of the vid/pid (bytes)
3938 *
3939 * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
3940 *		SD_FAILURE - Indicates no match with the inquiry vid/pid
3941 */
3942
3943static int
3944sd_blank_cmp(struct sd_lun *un, char *id, int idlen)
3945{
3946	char		*p1;
3947	char		*p2;
3948	int		cnt;
3949	cnt = sizeof (SD_INQUIRY(un)->inq_vid) +
3950	    sizeof (SD_INQUIRY(un)->inq_pid);
3951
3952	ASSERT(un != NULL);
3953	p2 = un->un_sd->sd_inq->inq_vid;
3954	ASSERT(id != NULL);
3955	p1 = id;
3956
3957	if ((id[0] == ' ') && (id[idlen - 1] == ' ')) {
3958		/*
3959		 * Note: string p1 is terminated by a NUL but string p2
3960		 * isn't.  The end of p2 is determined by cnt.
3961		 */
3962		for (;;) {
3963			/* skip over any extra blanks in both strings */
3964			while ((*p1 != '\0') && (*p1 == ' ')) {
3965				p1++;
3966			}
3967			while ((cnt != 0) && (*p2 == ' ')) {
3968				p2++;
3969				cnt--;
3970			}
3971
3972			/* compare the two strings */
3973			if ((cnt == 0) ||
3974			    (SD_TOUPPER(*p1) != SD_TOUPPER(*p2))) {
3975				break;
3976			}
3977			while ((cnt > 0) &&
3978			    (SD_TOUPPER(*p1) == SD_TOUPPER(*p2))) {
3979				p1++;
3980				p2++;
3981				cnt--;
3982			}
3983		}
3984	}
3985
3986	/* return SD_SUCCESS if both strings match */
3987	return (((*p1 == '\0') && (cnt == 0)) ? SD_SUCCESS : SD_FAILURE);
3988}
3989
3990
3991/*
3992 *    Function: sd_chk_vers1_data
3993 *
3994 * Description: Verify the version 1 device properties provided by the
3995 *		user via the configuration file
3996 *
3997 *   Arguments: un	     - driver soft state (unit) structure
3998 *		flags	     - integer mask indicating properties to be set
3999 *		prop_list    - integer list of property values
4000 *		list_len     - length of user provided data
4001 *
4002 * Return Code: SD_SUCCESS - Indicates the user provided data is valid
4003 *		SD_FAILURE - Indicates the user provided data is invalid
4004 */
4005
4006static int
4007sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
4008    int list_len, char *dataname_ptr)
4009{
4010	int i;
4011	int mask = 1;
4012	int index = 0;
4013
4014	ASSERT(un != NULL);
4015
4016	/* Check for a NULL property name and list */
4017	if (dataname_ptr == NULL) {
4018		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4019		    "sd_chk_vers1_data: NULL data property name.");
4020		return (SD_FAILURE);
4021	}
4022	if (prop_list == NULL) {
4023		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4024		    "sd_chk_vers1_data: %s NULL data property list.",
4025		    dataname_ptr);
4026		return (SD_FAILURE);
4027	}
4028
4029	/* Display a warning if undefined bits are set in the flags */
4030	if (flags & ~SD_CONF_BIT_MASK) {
4031		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4032		    "sd_chk_vers1_data: invalid bits 0x%x in data list %s. "
4033		    "Properties not set.",
4034		    (flags & ~SD_CONF_BIT_MASK), dataname_ptr);
4035		return (SD_FAILURE);
4036	}
4037
4038	/*
4039	 * Verify the length of the list by identifying the highest bit set
4040	 * in the flags and validating that the property list has a length
4041	 * up to the index of this bit.
4042	 */
4043	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
4044		if (flags & mask) {
4045			index++;
4046		}
4047		mask = 1 << i;
4048	}
4049	if ((list_len / sizeof (int)) < (index + 2)) {
4050		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4051		    "sd_chk_vers1_data: "
4052		    "Data property list %s size is incorrect. "
4053		    "Properties not set.", dataname_ptr);
4054		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT, "Size expected: "
4055		    "version + 1 flagword + %d properties", SD_CONF_MAX_ITEMS);
4056		return (SD_FAILURE);
4057	}
4058	return (SD_SUCCESS);
4059}
4060
4061
4062/*
4063 *    Function: sd_set_vers1_properties
4064 *
4065 * Description: Set version 1 device properties based on a property list
4066 *		retrieved from the driver configuration file or static
4067 *		configuration table. Version 1 properties have the format:
4068 *
4069 * 	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
4070 *
4071 *		where the prop0 value will be used to set prop0 if bit0
4072 *		is set in the flags
4073 *
4074 *   Arguments: un	     - driver soft state (unit) structure
4075 *		flags	     - integer mask indicating properties to be set
4076 *		prop_list    - integer list of property values
4077 */
4078
4079static void
4080sd_set_vers1_properties(struct sd_lun *un, int flags, sd_tunables *prop_list)
4081{
4082	ASSERT(un != NULL);
4083
4084	/*
4085	 * Set the flag to indicate cache is to be disabled. An attempt
4086	 * to disable the cache via sd_cache_control() will be made
4087	 * later during attach once the basic initialization is complete.
4088	 */
4089	if (flags & SD_CONF_BSET_NOCACHE) {
4090		un->un_f_opt_disable_cache = TRUE;
4091		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4092		    "sd_set_vers1_properties: caching disabled flag set\n");
4093	}
4094
4095	/* CD-specific configuration parameters */
4096	if (flags & SD_CONF_BSET_PLAYMSF_BCD) {
4097		un->un_f_cfg_playmsf_bcd = TRUE;
4098		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4099		    "sd_set_vers1_properties: playmsf_bcd set\n");
4100	}
4101	if (flags & SD_CONF_BSET_READSUB_BCD) {
4102		un->un_f_cfg_readsub_bcd = TRUE;
4103		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4104		    "sd_set_vers1_properties: readsub_bcd set\n");
4105	}
4106	if (flags & SD_CONF_BSET_READ_TOC_TRK_BCD) {
4107		un->un_f_cfg_read_toc_trk_bcd = TRUE;
4108		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4109		    "sd_set_vers1_properties: read_toc_trk_bcd set\n");
4110	}
4111	if (flags & SD_CONF_BSET_READ_TOC_ADDR_BCD) {
4112		un->un_f_cfg_read_toc_addr_bcd = TRUE;
4113		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4114		    "sd_set_vers1_properties: read_toc_addr_bcd set\n");
4115	}
4116	if (flags & SD_CONF_BSET_NO_READ_HEADER) {
4117		un->un_f_cfg_no_read_header = TRUE;
4118		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4119			    "sd_set_vers1_properties: no_read_header set\n");
4120	}
4121	if (flags & SD_CONF_BSET_READ_CD_XD4) {
4122		un->un_f_cfg_read_cd_xd4 = TRUE;
4123		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4124		    "sd_set_vers1_properties: read_cd_xd4 set\n");
4125	}
4126
4127	/* Support for devices which do not have valid/unique serial numbers */
4128	if (flags & SD_CONF_BSET_FAB_DEVID) {
4129		un->un_f_opt_fab_devid = TRUE;
4130		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4131		    "sd_set_vers1_properties: fab_devid bit set\n");
4132	}
4133
4134	/* Support for user throttle configuration */
4135	if (flags & SD_CONF_BSET_THROTTLE) {
4136		ASSERT(prop_list != NULL);
4137		un->un_saved_throttle = un->un_throttle =
4138		    prop_list->sdt_throttle;
4139		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4140		    "sd_set_vers1_properties: throttle set to %d\n",
4141		    prop_list->sdt_throttle);
4142	}
4143
4144	/* Set the per disk retry count according to the conf file or table. */
4145	if (flags & SD_CONF_BSET_NRR_COUNT) {
4146		ASSERT(prop_list != NULL);
4147		if (prop_list->sdt_not_rdy_retries) {
4148			un->un_notready_retry_count =
4149				prop_list->sdt_not_rdy_retries;
4150			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4151			    "sd_set_vers1_properties: not ready retry count"
4152			    " set to %d\n", un->un_notready_retry_count);
4153		}
4154	}
4155
4156	/* The controller type is reported for generic disk driver ioctls */
4157	if (flags & SD_CONF_BSET_CTYPE) {
4158		ASSERT(prop_list != NULL);
4159		switch (prop_list->sdt_ctype) {
4160		case CTYPE_CDROM:
4161			un->un_ctype = prop_list->sdt_ctype;
4162			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4163			    "sd_set_vers1_properties: ctype set to "
4164			    "CTYPE_CDROM\n");
4165			break;
4166		case CTYPE_CCS:
4167			un->un_ctype = prop_list->sdt_ctype;
4168			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4169				"sd_set_vers1_properties: ctype set to "
4170				"CTYPE_CCS\n");
4171			break;
4172		case CTYPE_ROD:		/* RW optical */
4173			un->un_ctype = prop_list->sdt_ctype;
4174			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4175			    "sd_set_vers1_properties: ctype set to "
4176			    "CTYPE_ROD\n");
4177			break;
4178		default:
4179			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4180			    "sd_set_vers1_properties: Could not set "
4181			    "invalid ctype value (%d)",
4182			    prop_list->sdt_ctype);
4183		}
4184	}
4185
4186	/* Purple failover timeout */
4187	if (flags & SD_CONF_BSET_BSY_RETRY_COUNT) {
4188		ASSERT(prop_list != NULL);
4189		un->un_busy_retry_count =
4190			prop_list->sdt_busy_retries;
4191		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4192		    "sd_set_vers1_properties: "
4193		    "busy retry count set to %d\n",
4194		    un->un_busy_retry_count);
4195	}
4196
4197	/* Purple reset retry count */
4198	if (flags & SD_CONF_BSET_RST_RETRIES) {
4199		ASSERT(prop_list != NULL);
4200		un->un_reset_retry_count =
4201			prop_list->sdt_reset_retries;
4202		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4203		    "sd_set_vers1_properties: "
4204		    "reset retry count set to %d\n",
4205		    un->un_reset_retry_count);
4206	}
4207
4208	/* Purple reservation release timeout */
4209	if (flags & SD_CONF_BSET_RSV_REL_TIME) {
4210		ASSERT(prop_list != NULL);
4211		un->un_reserve_release_time =
4212			prop_list->sdt_reserv_rel_time;
4213		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4214		    "sd_set_vers1_properties: "
4215		    "reservation release timeout set to %d\n",
4216		    un->un_reserve_release_time);
4217	}
4218
4219	/*
4220	 * Driver flag telling the driver to verify that no commands are pending
4221	 * for a device before issuing a Test Unit Ready. This is a workaround
4222	 * for a firmware bug in some Seagate eliteI drives.
4223	 */
4224	if (flags & SD_CONF_BSET_TUR_CHECK) {
4225		un->un_f_cfg_tur_check = TRUE;
4226		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4227		    "sd_set_vers1_properties: tur queue check set\n");
4228	}
4229
4230	if (flags & SD_CONF_BSET_MIN_THROTTLE) {
4231		un->un_min_throttle = prop_list->sdt_min_throttle;
4232		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4233		    "sd_set_vers1_properties: min throttle set to %d\n",
4234		    un->un_min_throttle);
4235	}
4236
4237	if (flags & SD_CONF_BSET_DISKSORT_DISABLED) {
4238		un->un_f_disksort_disabled =
4239		    (prop_list->sdt_disk_sort_dis != 0) ?
4240		    TRUE : FALSE;
4241		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4242		    "sd_set_vers1_properties: disksort disabled "
4243		    "flag set to %d\n",
4244		    prop_list->sdt_disk_sort_dis);
4245	}
4246
4247	if (flags & SD_CONF_BSET_LUN_RESET_ENABLED) {
4248		un->un_f_lun_reset_enabled =
4249		    (prop_list->sdt_lun_reset_enable != 0) ?
4250		    TRUE : FALSE;
4251		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4252		    "sd_set_vers1_properties: lun reset enabled "
4253		    "flag set to %d\n",
4254		    prop_list->sdt_lun_reset_enable);
4255	}
4256
4257	/*
4258	 * Validate the throttle values.
4259	 * If any of the numbers are invalid, set everything to defaults.
4260	 */
4261	if ((un->un_throttle < SD_LOWEST_VALID_THROTTLE) ||
4262	    (un->un_min_throttle < SD_LOWEST_VALID_THROTTLE) ||
4263	    (un->un_min_throttle > un->un_throttle)) {
4264		un->un_saved_throttle = un->un_throttle = sd_max_throttle;
4265		un->un_min_throttle = sd_min_throttle;
4266	}
4267}
4268
4269/*
4270 *   Function: sd_is_lsi()
4271 *
4272 *   Description: Check for lsi devices, step throught the static device
4273 *	table to match vid/pid.
4274 *
4275 *   Args: un - ptr to sd_lun
4276 *
4277 *   Notes:  When creating new LSI property, need to add the new LSI property
4278 *		to this function.
4279 */
4280static void
4281sd_is_lsi(struct sd_lun *un)
4282{
4283	char	*id = NULL;
4284	int	table_index;
4285	int	idlen;
4286	void	*prop;
4287
4288	ASSERT(un != NULL);
4289	for (table_index = 0; table_index < sd_disk_table_size;
4290	    table_index++) {
4291		id = sd_disk_table[table_index].device_id;
4292		idlen = strlen(id);
4293		if (idlen == 0) {
4294			continue;
4295		}
4296
4297		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
4298			prop = sd_disk_table[table_index].properties;
4299			if (prop == &lsi_properties ||
4300			    prop == &lsi_oem_properties ||
4301			    prop == &lsi_properties_scsi ||
4302			    prop == &symbios_properties) {
4303				un->un_f_cfg_is_lsi = TRUE;
4304			}
4305			break;
4306		}
4307	}
4308}
4309
4310
4311/*
4312 * The following routines support reading and interpretation of disk labels,
4313 * including Solaris BE (8-slice) vtoc's, Solaris LE (16-slice) vtoc's, and
4314 * fdisk tables.
4315 */
4316
4317/*
4318 *    Function: sd_validate_geometry
4319 *
4320 * Description: Read the label from the disk (if present). Update the unit's
4321 *		geometry and vtoc information from the data in the label.
4322 *		Verify that the label is valid.
4323 *
4324 *   Arguments: un - driver soft state (unit) structure
4325 *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4326 *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4327 *			to use the USCSI "direct" chain and bypass the normal
4328 *			command waitq.
4329 *
4330 * Return Code: 0 - Successful completion
4331 *		EINVAL  - Invalid value in un->un_tgt_blocksize or
4332 *			  un->un_blockcount; or label on disk is corrupted
4333 *			  or unreadable.
4334 *		EACCES  - Reservation conflict at the device.
4335 *		ENOMEM  - Resource allocation error
4336 *		ENOTSUP - geometry not applicable
4337 *
4338 *     Context: Kernel thread only (can sleep).
4339 */
4340
4341static int
4342sd_validate_geometry(struct sd_lun *un, int path_flag)
4343{
4344	static	char		labelstring[128];
4345	static	char		buf[256];
4346	char	*label		= NULL;
4347	int	label_error = 0;
4348	int	gvalid		= un->un_f_geometry_is_valid;
4349	int	lbasize;
4350	uint_t	capacity;
4351	int	count;
4352#if defined(__i386) || defined(__amd64)
4353	int forced_under_1t = 0;
4354#endif
4355
4356	ASSERT(un != NULL);
4357	ASSERT(mutex_owned(SD_MUTEX(un)));
4358
4359	/*
4360	 * If the required values are not valid, then try getting them
4361	 * once via read capacity. If that fails, then fail this call.
4362	 * This is necessary with the new mpxio failover behavior in
4363	 * the T300 where we can get an attach for the inactive path
4364	 * before the active path. The inactive path fails commands with
4365	 * sense data of 02,04,88 which happens to the read capacity
4366	 * before mpxio has had sufficient knowledge to know if it should
4367	 * force a fail over or not. (Which it won't do at attach anyhow).
4368	 * If the read capacity at attach time fails, un_tgt_blocksize and
4369	 * un_blockcount won't be valid.
4370	 */
4371	if ((un->un_f_tgt_blocksize_is_valid != TRUE) ||
4372	    (un->un_f_blockcount_is_valid != TRUE)) {
4373		uint64_t	cap;
4374		uint32_t	lbasz;
4375		int		rval;
4376
4377		mutex_exit(SD_MUTEX(un));
4378		rval = sd_send_scsi_READ_CAPACITY(un, &cap,
4379		    &lbasz, SD_PATH_DIRECT);
4380		mutex_enter(SD_MUTEX(un));
4381		if (rval == 0) {
4382			/*
4383			 * The following relies on
4384			 * sd_send_scsi_READ_CAPACITY never
4385			 * returning 0 for capacity and/or lbasize.
4386			 */
4387			sd_update_block_info(un, lbasz, cap);
4388		}
4389
4390		if ((un->un_f_tgt_blocksize_is_valid != TRUE) ||
4391		    (un->un_f_blockcount_is_valid != TRUE)) {
4392			return (EINVAL);
4393		}
4394	}
4395
4396	/*
4397	 * Copy the lbasize and capacity so that if they're reset while we're
4398	 * not holding the SD_MUTEX, we will continue to use valid values
4399	 * after the SD_MUTEX is reacquired. (4119659)
4400	 */
4401	lbasize  = un->un_tgt_blocksize;
4402	capacity = un->un_blockcount;
4403
4404#if defined(_SUNOS_VTOC_16)
4405	/*
4406	 * Set up the "whole disk" fdisk partition; this should always
4407	 * exist, regardless of whether the disk contains an fdisk table
4408	 * or vtoc.
4409	 */
4410	un->un_map[P0_RAW_DISK].dkl_cylno = 0;
4411	un->un_map[P0_RAW_DISK].dkl_nblk  = capacity;
4412#endif
4413
4414	/*
4415	 * Refresh the logical and physical geometry caches.
4416	 * (data from MODE SENSE format/rigid disk geometry pages,
4417	 * and scsi_ifgetcap("geometry").
4418	 */
4419	sd_resync_geom_caches(un, capacity, lbasize, path_flag);
4420
4421	label_error = sd_use_efi(un, path_flag);
4422	if (label_error == 0) {
4423		/* found a valid EFI label */
4424		SD_TRACE(SD_LOG_IO_PARTITION, un,
4425			"sd_validate_geometry: found EFI label\n");
4426		un->un_solaris_offset = 0;
4427		un->un_solaris_size = capacity;
4428		return (ENOTSUP);
4429	}
4430	if (un->un_blockcount > DK_MAX_BLOCKS) {
4431		if (label_error == ESRCH) {
4432			/*
4433			 * they've configured a LUN over 1TB, but used
4434			 * format.dat to restrict format's view of the
4435			 * capacity to be under 1TB
4436			 */
4437			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4438"is >1TB and has a VTOC label: use format(1M) to either decrease the");
4439			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
4440"size to be < 1TB or relabel the disk with an EFI label");
4441#if defined(__i386) || defined(__amd64)
4442			forced_under_1t = 1;
4443#endif
4444		} else {
4445			/* unlabeled disk over 1TB */
4446#if defined(__i386) || defined(__amd64)
4447			/*
4448			 * Refer to comments on off-by-1 at the head of the file
4449			 * A 1TB disk was treated as (1T - 512)B in the past,
4450			 * thus, it might have valid solaris partition. We
4451			 * will return ENOTSUP later only if this disk has no
4452			 * valid solaris partition.
4453			 */
4454			if ((un->un_tgt_blocksize != un->un_sys_blocksize) ||
4455			    (un->un_blockcount - 1 > DK_MAX_BLOCKS) ||
4456			    un->un_f_has_removable_media ||
4457			    un->un_f_is_hotpluggable)
4458#endif
4459				return (ENOTSUP);
4460		}
4461	}
4462	label_error = 0;
4463
4464	/*
4465	 * at this point it is either labeled with a VTOC or it is
4466	 * under 1TB (<= 1TB actually for off-by-1)
4467	 */
4468	if (un->un_f_vtoc_label_supported) {
4469		struct	dk_label *dkl;
4470		offset_t dkl1;
4471		offset_t label_addr, real_addr;
4472		int	rval;
4473		size_t	buffer_size;
4474
4475		/*
4476		 * Note: This will set up un->un_solaris_size and
4477		 * un->un_solaris_offset.
4478		 */
4479		switch (sd_read_fdisk(un, capacity, lbasize, path_flag)) {
4480		case SD_CMD_RESERVATION_CONFLICT:
4481			ASSERT(mutex_owned(SD_MUTEX(un)));
4482			return (EACCES);
4483		case SD_CMD_FAILURE:
4484			ASSERT(mutex_owned(SD_MUTEX(un)));
4485			return (ENOMEM);
4486		}
4487
4488		if (un->un_solaris_size <= DK_LABEL_LOC) {
4489
4490#if defined(__i386) || defined(__amd64)
4491			/*
4492			 * Refer to comments on off-by-1 at the head of the file
4493			 * This is for 1TB disk only. Since that there is no
4494			 * solaris partitions, return ENOTSUP as we do for
4495			 * >1TB disk.
4496			 */
4497			if (un->un_blockcount > DK_MAX_BLOCKS)
4498				return (ENOTSUP);
4499#endif
4500			/*
4501			 * Found fdisk table but no Solaris partition entry,
4502			 * so don't call sd_uselabel() and don't create
4503			 * a default label.
4504			 */
4505			label_error = 0;
4506			un->un_f_geometry_is_valid = TRUE;
4507			goto no_solaris_partition;
4508		}
4509		label_addr = (daddr_t)(un->un_solaris_offset + DK_LABEL_LOC);
4510
4511#if defined(__i386) || defined(__amd64)
4512		/*
4513		 * Refer to comments on off-by-1 at the head of the file
4514		 * Now, this 1TB disk has valid solaris partition. It
4515		 * must be created by previous sd driver, we have to
4516		 * treat it as (1T-512)B.
4517		 */
4518		if ((un->un_blockcount > DK_MAX_BLOCKS) &&
4519		    (forced_under_1t != 1)) {
4520			un->un_f_capacity_adjusted = 1;
4521			un->un_blockcount = DK_MAX_BLOCKS;
4522			un->un_map[P0_RAW_DISK].dkl_nblk  = DK_MAX_BLOCKS;
4523
4524			/*
4525			 * Refer to sd_read_fdisk, when there is no
4526			 * fdisk partition table, un_solaris_size is
4527			 * set to disk's capacity. In this case, we
4528			 * need to adjust it
4529			 */
4530			if (un->un_solaris_size > DK_MAX_BLOCKS)
4531				un->un_solaris_size = DK_MAX_BLOCKS;
4532			sd_resync_geom_caches(un, DK_MAX_BLOCKS,
4533			    lbasize, path_flag);
4534		}
4535#endif
4536
4537		/*
4538		 * sys_blocksize != tgt_blocksize, need to re-adjust
4539		 * blkno and save the index to beginning of dk_label
4540		 */
4541		real_addr = SD_SYS2TGTBLOCK(un, label_addr);
4542		buffer_size = SD_REQBYTES2TGTBYTES(un,
4543		    sizeof (struct dk_label));
4544
4545		SD_TRACE(SD_LOG_IO_PARTITION, un, "sd_validate_geometry: "
4546		    "label_addr: 0x%x allocation size: 0x%x\n",
4547		    label_addr, buffer_size);
4548		dkl = kmem_zalloc(buffer_size, KM_NOSLEEP);
4549		if (dkl == NULL) {
4550			return (ENOMEM);
4551		}
4552
4553		mutex_exit(SD_MUTEX(un));
4554		rval = sd_send_scsi_READ(un, dkl, buffer_size, real_addr,
4555		    path_flag);
4556		mutex_enter(SD_MUTEX(un));
4557
4558		switch (rval) {
4559		case 0:
4560			/*
4561			 * sd_uselabel will establish that the geometry
4562			 * is valid.
4563			 * For sys_blocksize != tgt_blocksize, need
4564			 * to index into the beginning of dk_label
4565			 */
4566			dkl1 = (daddr_t)dkl
4567				+ SD_TGTBYTEOFFSET(un, label_addr, real_addr);
4568			if (sd_uselabel(un, (struct dk_label *)(uintptr_t)dkl1,
4569			    path_flag) != SD_LABEL_IS_VALID) {
4570				label_error = EINVAL;
4571			}
4572			break;
4573		case EACCES:
4574			label_error = EACCES;
4575			break;
4576		default:
4577			label_error = EINVAL;
4578			break;
4579		}
4580
4581		kmem_free(dkl, buffer_size);
4582
4583#if defined(_SUNOS_VTOC_8)
4584		label = (char *)un->un_asciilabel;
4585#elif defined(_SUNOS_VTOC_16)
4586		label = (char *)un->un_vtoc.v_asciilabel;
4587#else
4588#error "No VTOC format defined."
4589#endif
4590	}
4591
4592	/*
4593	 * If a valid label was not found, AND if no reservation conflict
4594	 * was detected, then go ahead and create a default label (4069506).
4595	 */
4596	if (un->un_f_default_vtoc_supported && (label_error != EACCES)) {
4597		if (un->un_f_geometry_is_valid == FALSE) {
4598			sd_build_default_label(un);
4599		}
4600		label_error = 0;
4601	}
4602
4603no_solaris_partition:
4604	if ((!un->un_f_has_removable_media ||
4605	    (un->un_f_has_removable_media &&
4606		un->un_mediastate == DKIO_EJECTED)) &&
4607		(un->un_state == SD_STATE_NORMAL && !gvalid)) {
4608		/*
4609		 * Print out a message indicating who and what we are.
4610		 * We do this only when we happen to really validate the
4611		 * geometry. We may call sd_validate_geometry() at other
4612		 * times, e.g., ioctl()'s like Get VTOC in which case we
4613		 * don't want to print the label.
4614		 * If the geometry is valid, print the label string,
4615		 * else print vendor and product info, if available
4616		 */
4617		if ((un->un_f_geometry_is_valid == TRUE) && (label != NULL)) {
4618			SD_INFO(SD_LOG_ATTACH_DETACH, un, "?<%s>\n", label);
4619		} else {
4620			mutex_enter(&sd_label_mutex);
4621			sd_inq_fill(SD_INQUIRY(un)->inq_vid, VIDMAX,
4622			    labelstring);
4623			sd_inq_fill(SD_INQUIRY(un)->inq_pid, PIDMAX,
4624			    &labelstring[64]);
4625			(void) sprintf(buf, "?Vendor '%s', product '%s'",
4626			    labelstring, &labelstring[64]);
4627			if (un->un_f_blockcount_is_valid == TRUE) {
4628				(void) sprintf(&buf[strlen(buf)],
4629				    ", %llu %u byte blocks\n",
4630				    (longlong_t)un->un_blockcount,
4631				    un->un_tgt_blocksize);
4632			} else {
4633				(void) sprintf(&buf[strlen(buf)],
4634				    ", (unknown capacity)\n");
4635			}
4636			SD_INFO(SD_LOG_ATTACH_DETACH, un, buf);
4637			mutex_exit(&sd_label_mutex);
4638		}
4639	}
4640
4641#if defined(_SUNOS_VTOC_16)
4642	/*
4643	 * If we have valid geometry, set up the remaining fdisk partitions.
4644	 * Note that dkl_cylno is not used for the fdisk map entries, so
4645	 * we set it to an entirely bogus value.
4646	 */
4647	for (count = 0; count < FD_NUMPART; count++) {
4648		un->un_map[FDISK_P1 + count].dkl_cylno = -1;
4649		un->un_map[FDISK_P1 + count].dkl_nblk =
4650		    un->un_fmap[count].fmap_nblk;
4651
4652		un->un_offset[FDISK_P1 + count] =
4653		    un->un_fmap[count].fmap_start;
4654	}
4655#endif
4656
4657	for (count = 0; count < NDKMAP; count++) {
4658#if defined(_SUNOS_VTOC_8)
4659		struct dk_map *lp  = &un->un_map[count];
4660		un->un_offset[count] =
4661		    un->un_g.dkg_nhead * un->un_g.dkg_nsect * lp->dkl_cylno;
4662#elif defined(_SUNOS_VTOC_16)
4663		struct dkl_partition *vp = &un->un_vtoc.v_part[count];
4664
4665		un->un_offset[count] = vp->p_start + un->un_solaris_offset;
4666#else
4667#error "No VTOC format defined."
4668#endif
4669	}
4670
4671	/*
4672	 * For VTOC labeled disk, create and set the partition stats
4673	 * at attach time, update the stats according to dynamic
4674	 * partition changes during running time.
4675	 */
4676	if (label_error == 0 && un->un_f_pkstats_enabled) {
4677		sd_set_pstats(un);
4678		SD_TRACE(SD_LOG_IO_PARTITION, un, "sd_validate_geometry: "
4679		    "un:0x%p pstats created and set, or updated\n", un);
4680	}
4681
4682	return (label_error);
4683}
4684
4685
4686#if defined(_SUNOS_VTOC_16)
4687/*
4688 * Macro: MAX_BLKS
4689 *
4690 *	This macro is used for table entries where we need to have the largest
4691 *	possible sector value for that head & SPT (sectors per track)
4692 *	combination.  Other entries for some smaller disk sizes are set by
4693 *	convention to match those used by X86 BIOS usage.
4694 */
4695#define	MAX_BLKS(heads, spt)	UINT16_MAX * heads * spt, heads, spt
4696
4697/*
4698 *    Function: sd_convert_geometry
4699 *
4700 * Description: Convert physical geometry into a dk_geom structure. In
4701 *		other words, make sure we don't wrap 16-bit values.
4702 *		e.g. converting from geom_cache to dk_geom
4703 *
4704 *     Context: Kernel thread only
4705 */
4706static void
4707sd_convert_geometry(uint64_t capacity, struct dk_geom *un_g)
4708{
4709	int i;
4710	static const struct chs_values {
4711		uint_t max_cap;		/* Max Capacity for this HS. */
4712		uint_t nhead;		/* Heads to use. */
4713		uint_t nsect;		/* SPT to use. */
4714	} CHS_values[] = {
4715		{0x00200000,  64, 32},		/* 1GB or smaller disk. */
4716		{0x01000000, 128, 32},		/* 8GB or smaller disk. */
4717		{MAX_BLKS(255,  63)},		/* 502.02GB or smaller disk. */
4718		{MAX_BLKS(255, 126)},		/* .98TB or smaller disk. */
4719		{DK_MAX_BLOCKS, 255, 189}	/* Max size is just under 1TB */
4720	};
4721
4722	/* Unlabeled SCSI floppy device */
4723	if (capacity <= 0x1000) {
4724		un_g->dkg_nhead = 2;
4725		un_g->dkg_ncyl = 80;
4726		un_g->dkg_nsect = capacity / (un_g->dkg_nhead * un_g->dkg_ncyl);
4727		return;
4728	}
4729
4730	/*
4731	 * For all devices we calculate cylinders using the
4732	 * heads and sectors we assign based on capacity of the
4733	 * device.  The table is designed to be compatible with the
4734	 * way other operating systems lay out fdisk tables for X86
4735	 * and to insure that the cylinders never exceed 65535 to
4736	 * prevent problems with X86 ioctls that report geometry.
4737	 * We use SPT that are multiples of 63, since other OSes that
4738	 * are not limited to 16-bits for cylinders stop at 63 SPT
4739	 * we make do by using multiples of 63 SPT.
4740	 *
4741	 * Note than capacities greater than or equal to 1TB will simply
4742	 * get the largest geometry from the table. This should be okay
4743	 * since disks this large shouldn't be using CHS values anyway.
4744	 */
4745	for (i = 0; CHS_values[i].max_cap < capacity &&
4746	    CHS_values[i].max_cap != DK_MAX_BLOCKS; i++)
4747		;
4748
4749	un_g->dkg_nhead = CHS_values[i].nhead;
4750	un_g->dkg_nsect = CHS_values[i].nsect;
4751}
4752#endif
4753
4754
4755/*
4756 *    Function: sd_resync_geom_caches
4757 *
4758 * Description: (Re)initialize both geometry caches: the virtual geometry
4759 *		information is extracted from the HBA (the "geometry"
4760 *		capability), and the physical geometry cache data is
4761 *		generated by issuing MODE SENSE commands.
4762 *
4763 *   Arguments: un - driver soft state (unit) structure
4764 *		capacity - disk capacity in #blocks
4765 *		lbasize - disk block size in bytes
4766 *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4767 *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4768 *			to use the USCSI "direct" chain and bypass the normal
4769 *			command waitq.
4770 *
4771 *     Context: Kernel thread only (can sleep).
4772 */
4773
4774static void
4775sd_resync_geom_caches(struct sd_lun *un, int capacity, int lbasize,
4776	int path_flag)
4777{
4778	struct 	geom_cache 	pgeom;
4779	struct 	geom_cache	*pgeom_p = &pgeom;
4780	int 	spc;
4781	unsigned short nhead;
4782	unsigned short nsect;
4783
4784	ASSERT(un != NULL);
4785	ASSERT(mutex_owned(SD_MUTEX(un)));
4786
4787	/*
4788	 * Ask the controller for its logical geometry.
4789	 * Note: if the HBA does not support scsi_ifgetcap("geometry"),
4790	 * then the lgeom cache will be invalid.
4791	 */
4792	sd_get_virtual_geometry(un, capacity, lbasize);
4793
4794	/*
4795	 * Initialize the pgeom cache from lgeom, so that if MODE SENSE
4796	 * doesn't work, DKIOCG_PHYSGEOM can return reasonable values.
4797	 */
4798	if (un->un_lgeom.g_nsect == 0 || un->un_lgeom.g_nhead == 0) {
4799		/*
4800		 * Note: Perhaps this needs to be more adaptive? The rationale
4801		 * is that, if there's no HBA geometry from the HBA driver, any
4802		 * guess is good, since this is the physical geometry. If MODE
4803		 * SENSE fails this gives a max cylinder size for non-LBA access
4804		 */
4805		nhead = 255;
4806		nsect = 63;
4807	} else {
4808		nhead = un->un_lgeom.g_nhead;
4809		nsect = un->un_lgeom.g_nsect;
4810	}
4811
4812	if (ISCD(un)) {
4813		pgeom_p->g_nhead = 1;
4814		pgeom_p->g_nsect = nsect * nhead;
4815	} else {
4816		pgeom_p->g_nhead = nhead;
4817		pgeom_p->g_nsect = nsect;
4818	}
4819
4820	spc = pgeom_p->g_nhead * pgeom_p->g_nsect;
4821	pgeom_p->g_capacity = capacity;
4822	pgeom_p->g_ncyl = pgeom_p->g_capacity / spc;
4823	pgeom_p->g_acyl = 0;
4824
4825	/*
4826	 * Retrieve fresh geometry data from the hardware, stash it
4827	 * here temporarily before we rebuild the incore label.
4828	 *
4829	 * We want to use the MODE SENSE commands to derive the
4830	 * physical geometry of the device, but if either command
4831	 * fails, the logical geometry is used as the fallback for
4832	 * disk label geometry.
4833	 */
4834	mutex_exit(SD_MUTEX(un));
4835	sd_get_physical_geometry(un, pgeom_p, capacity, lbasize, path_flag);
4836	mutex_enter(SD_MUTEX(un));
4837
4838	/*
4839	 * Now update the real copy while holding the mutex. This
4840	 * way the global copy is never in an inconsistent state.
4841	 */
4842	bcopy(pgeom_p, &un->un_pgeom,  sizeof (un->un_pgeom));
4843
4844	SD_INFO(SD_LOG_COMMON, un, "sd_resync_geom_caches: "
4845	    "(cached from lgeom)\n");
4846	SD_INFO(SD_LOG_COMMON, un,
4847	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
4848	    un->un_pgeom.g_ncyl, un->un_pgeom.g_acyl,
4849	    un->un_pgeom.g_nhead, un->un_pgeom.g_nsect);
4850	SD_INFO(SD_LOG_COMMON, un, "   lbasize: %d; capacity: %ld; "
4851	    "intrlv: %d; rpm: %d\n", un->un_pgeom.g_secsize,
4852	    un->un_pgeom.g_capacity, un->un_pgeom.g_intrlv,
4853	    un->un_pgeom.g_rpm);
4854}
4855
4856
4857/*
4858 *    Function: sd_read_fdisk
4859 *
4860 * Description: utility routine to read the fdisk table.
4861 *
4862 *   Arguments: un - driver soft state (unit) structure
4863 *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4864 *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4865 *			to use the USCSI "direct" chain and bypass the normal
4866 *			command waitq.
4867 *
4868 * Return Code: SD_CMD_SUCCESS
4869 *		SD_CMD_FAILURE
4870 *
4871 *     Context: Kernel thread only (can sleep).
4872 */
4873/* ARGSUSED */
4874static int
4875sd_read_fdisk(struct sd_lun *un, uint_t capacity, int lbasize, int path_flag)
4876{
4877#if defined(_NO_FDISK_PRESENT)
4878
4879	un->un_solaris_offset = 0;
4880	un->un_solaris_size = capacity;
4881	bzero(un->un_fmap, sizeof (struct fmap) * FD_NUMPART);
4882	return (SD_CMD_SUCCESS);
4883
4884#elif defined(_FIRMWARE_NEEDS_FDISK)
4885
4886	struct ipart	*fdp;
4887	struct mboot	*mbp;
4888	struct ipart	fdisk[FD_NUMPART];
4889	int		i;
4890	char		sigbuf[2];
4891	caddr_t		bufp;
4892	int		uidx;
4893	int		rval;
4894	int		lba = 0;
4895	uint_t		solaris_offset;	/* offset to solaris part. */
4896	daddr_t		solaris_size;	/* size of solaris partition */
4897	uint32_t	blocksize;
4898
4899	ASSERT(un != NULL);
4900	ASSERT(mutex_owned(SD_MUTEX(un)));
4901	ASSERT(un->un_f_tgt_blocksize_is_valid == TRUE);
4902
4903	blocksize = un->un_tgt_blocksize;
4904
4905	/*
4906	 * Start off assuming no fdisk table
4907	 */
4908	solaris_offset = 0;
4909	solaris_size   = capacity;
4910
4911	mutex_exit(SD_MUTEX(un));
4912	bufp = kmem_zalloc(blocksize, KM_SLEEP);
4913	rval = sd_send_scsi_READ(un, bufp, blocksize, 0, path_flag);
4914	mutex_enter(SD_MUTEX(un));
4915
4916	if (rval != 0) {
4917		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
4918		    "sd_read_fdisk: fdisk read err\n");
4919		kmem_free(bufp, blocksize);
4920		return (SD_CMD_FAILURE);
4921	}
4922
4923	mbp = (struct mboot *)bufp;
4924
4925	/*
4926	 * The fdisk table does not begin on a 4-byte boundary within the
4927	 * master boot record, so we copy it to an aligned structure to avoid
4928	 * alignment exceptions on some processors.
4929	 */
4930	bcopy(&mbp->parts[0], fdisk, sizeof (fdisk));
4931
4932	/*
4933	 * Check for lba support before verifying sig; sig might not be
4934	 * there, say on a blank disk, but the max_chs mark may still
4935	 * be present.
4936	 *
4937	 * Note: LBA support and BEFs are an x86-only concept but this
4938	 * code should work OK on SPARC as well.
4939	 */
4940
4941	/*
4942	 * First, check for lba-access-ok on root node (or prom root node)
4943	 * if present there, don't need to search fdisk table.
4944	 */
4945	if (ddi_getprop(DDI_DEV_T_ANY, ddi_root_node(), 0,
4946	    "lba-access-ok", 0) != 0) {
4947		/* All drives do LBA; don't search fdisk table */
4948		lba = 1;
4949	} else {
4950		/* Okay, look for mark in fdisk table */
4951		for (fdp = fdisk, i = 0; i < FD_NUMPART; i++, fdp++) {
4952			/* accumulate "lba" value from all partitions */
4953			lba = (lba || sd_has_max_chs_vals(fdp));
4954		}
4955	}
4956
4957	if (lba != 0) {
4958		dev_t dev = sd_make_device(SD_DEVINFO(un));
4959
4960		if (ddi_getprop(dev, SD_DEVINFO(un), DDI_PROP_DONTPASS,
4961		    "lba-access-ok", 0) == 0) {
4962			/* not found; create it */
4963			if (ddi_prop_create(dev, SD_DEVINFO(un), 0,
4964			    "lba-access-ok", (caddr_t)NULL, 0) !=
4965			    DDI_PROP_SUCCESS) {
4966				SD_ERROR(SD_LOG_ATTACH_DETACH, un,
4967				    "sd_read_fdisk: Can't create lba property "
4968				    "for instance %d\n",
4969				    ddi_get_instance(SD_DEVINFO(un)));
4970			}
4971		}
4972	}
4973
4974	bcopy(&mbp->signature, sigbuf, sizeof (sigbuf));
4975
4976	/*
4977	 * Endian-independent signature check
4978	 */
4979	if (((sigbuf[1] & 0xFF) != ((MBB_MAGIC >> 8) & 0xFF)) ||
4980	    (sigbuf[0] != (MBB_MAGIC & 0xFF))) {
4981		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
4982		    "sd_read_fdisk: no fdisk\n");
4983		bzero(un->un_fmap, sizeof (struct fmap) * FD_NUMPART);
4984		rval = SD_CMD_SUCCESS;
4985		goto done;
4986	}
4987
4988#ifdef SDDEBUG
4989	if (sd_level_mask & SD_LOGMASK_INFO) {
4990		fdp = fdisk;
4991		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_read_fdisk:\n");
4992		SD_INFO(SD_LOG_ATTACH_DETACH, un, "         relsect    "
4993		    "numsect         sysid       bootid\n");
4994		for (i = 0; i < FD_NUMPART; i++, fdp++) {
4995			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4996			    "    %d:  %8d   %8d     0x%08x     0x%08x\n",
4997			    i, fdp->relsect, fdp->numsect,
4998			    fdp->systid, fdp->bootid);
4999		}
5000	}
5001#endif
5002
5003	/*
5004	 * Try to find the unix partition
5005	 */
5006	uidx = -1;
5007	solaris_offset = 0;
5008	solaris_size   = 0;
5009
5010	for (fdp = fdisk, i = 0; i < FD_NUMPART; i++, fdp++) {
5011		int	relsect;
5012		int	numsect;
5013
5014		if (fdp->numsect == 0) {
5015			un->un_fmap[i].fmap_start = 0;
5016			un->un_fmap[i].fmap_nblk  = 0;
5017			continue;
5018		}
5019
5020		/*
5021		 * Data in the fdisk table is little-endian.
5022		 */
5023		relsect = LE_32(fdp->relsect);
5024		numsect = LE_32(fdp->numsect);
5025
5026		un->un_fmap[i].fmap_start = relsect;
5027		un->un_fmap[i].fmap_nblk  = numsect;
5028
5029		if (fdp->systid != SUNIXOS &&
5030		    fdp->systid != SUNIXOS2 &&
5031		    fdp->systid != EFI_PMBR) {
5032			continue;
5033		}
5034
5035		/*
5036		 * use the last active solaris partition id found
5037		 * (there should only be 1 active partition id)
5038		 *
5039		 * if there are no active solaris partition id
5040		 * then use the first inactive solaris partition id
5041		 */
5042		if ((uidx == -1) || (fdp->bootid == ACTIVE)) {
5043			uidx = i;
5044			solaris_offset = relsect;
5045			solaris_size   = numsect;
5046		}
5047	}
5048
5049	SD_INFO(SD_LOG_ATTACH_DETACH, un, "fdisk 0x%x 0x%lx",
5050	    un->un_solaris_offset, un->un_solaris_size);
5051
5052	rval = SD_CMD_SUCCESS;
5053
5054done:
5055
5056	/*
5057	 * Clear the VTOC info, only if the Solaris partition entry
5058	 * has moved, changed size, been deleted, or if the size of
5059	 * the partition is too small to even fit the label sector.
5060	 */
5061	if ((un->un_solaris_offset != solaris_offset) ||
5062	    (un->un_solaris_size != solaris_size) ||
5063	    solaris_size <= DK_LABEL_LOC) {
5064		SD_INFO(SD_LOG_ATTACH_DETACH, un, "fdisk moved 0x%x 0x%lx",
5065			solaris_offset, solaris_size);
5066		bzero(&un->un_g, sizeof (struct dk_geom));
5067		bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
5068		bzero(&un->un_map, NDKMAP * (sizeof (struct dk_map)));
5069		un->un_f_geometry_is_valid = FALSE;
5070	}
5071	un->un_solaris_offset = solaris_offset;
5072	un->un_solaris_size = solaris_size;
5073	kmem_free(bufp, blocksize);
5074	return (rval);
5075
5076#else	/* #elif defined(_FIRMWARE_NEEDS_FDISK) */
5077#error "fdisk table presence undetermined for this platform."
5078#endif	/* #if defined(_NO_FDISK_PRESENT) */
5079}
5080
5081
5082/*
5083 *    Function: sd_get_physical_geometry
5084 *
5085 * Description: Retrieve the MODE SENSE page 3 (Format Device Page) and
5086 *		MODE SENSE page 4 (Rigid Disk Drive Geometry Page) from the
5087 *		target, and use this information to initialize the physical
5088 *		geometry cache specified by pgeom_p.
5089 *
5090 *		MODE SENSE is an optional command, so failure in this case
5091 *		does not necessarily denote an error. We want to use the
5092 *		MODE SENSE commands to derive the physical geometry of the
5093 *		device, but if either command fails, the logical geometry is
5094 *		used as the fallback for disk label geometry.
5095 *
5096 *		This requires that un->un_blockcount and un->un_tgt_blocksize
5097 *		have already been initialized for the current target and
5098 *		that the current values be passed as args so that we don't
5099 *		end up ever trying to use -1 as a valid value. This could
5100 *		happen if either value is reset while we're not holding
5101 *		the mutex.
5102 *
5103 *   Arguments: un - driver soft state (unit) structure
5104 *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
5105 *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
5106 *			to use the USCSI "direct" chain and bypass the normal
5107 *			command waitq.
5108 *
5109 *     Context: Kernel thread only (can sleep).
5110 */
5111
5112static void
5113sd_get_physical_geometry(struct sd_lun *un, struct geom_cache *pgeom_p,
5114	int capacity, int lbasize, int path_flag)
5115{
5116	struct	mode_format	*page3p;
5117	struct	mode_geometry	*page4p;
5118	struct	mode_header	*headerp;
5119	int	sector_size;
5120	int	nsect;
5121	int	nhead;
5122	int	ncyl;
5123	int	intrlv;
5124	int	spc;
5125	int	modesense_capacity;
5126	int	rpm;
5127	int	bd_len;
5128	int	mode_header_length;
5129	uchar_t	*p3bufp;
5130	uchar_t	*p4bufp;
5131	int	cdbsize;
5132
5133	ASSERT(un != NULL);
5134	ASSERT(!(mutex_owned(SD_MUTEX(un))));
5135
5136	if (un->un_f_blockcount_is_valid != TRUE) {
5137		return;
5138	}
5139
5140	if (un->un_f_tgt_blocksize_is_valid != TRUE) {
5141		return;
5142	}
5143
5144	if (lbasize == 0) {
5145		if (ISCD(un)) {
5146			lbasize = 2048;
5147		} else {
5148			lbasize = un->un_sys_blocksize;
5149		}
5150	}
5151	pgeom_p->g_secsize = (unsigned short)lbasize;
5152
5153	cdbsize = (un->un_f_cfg_is_atapi == TRUE) ? CDB_GROUP2 : CDB_GROUP0;
5154
5155	/*
5156	 * Retrieve MODE SENSE page 3 - Format Device Page
5157	 */
5158	p3bufp = kmem_zalloc(SD_MODE_SENSE_PAGE3_LENGTH, KM_SLEEP);
5159	if (sd_send_scsi_MODE_SENSE(un, cdbsize, p3bufp,
5160	    SD_MODE_SENSE_PAGE3_LENGTH, SD_MODE_SENSE_PAGE3_CODE, path_flag)
5161	    != 0) {
5162		SD_ERROR(SD_LOG_COMMON, un,
5163		    "sd_get_physical_geometry: mode sense page 3 failed\n");
5164		goto page3_exit;
5165	}
5166
5167	/*
5168	 * Determine size of Block Descriptors in order to locate the mode
5169	 * page data.  ATAPI devices return 0, SCSI devices should return
5170	 * MODE_BLK_DESC_LENGTH.
5171	 */
5172	headerp = (struct mode_header *)p3bufp;
5173	if (un->un_f_cfg_is_atapi == TRUE) {
5174		struct mode_header_grp2 *mhp =
5175		    (struct mode_header_grp2 *)headerp;
5176		mode_header_length = MODE_HEADER_LENGTH_GRP2;
5177		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
5178	} else {
5179		mode_header_length = MODE_HEADER_LENGTH;
5180		bd_len = ((struct mode_header *)headerp)->bdesc_length;
5181	}
5182
5183	if (bd_len > MODE_BLK_DESC_LENGTH) {
5184		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
5185		    "received unexpected bd_len of %d, page3\n", bd_len);
5186		goto page3_exit;
5187	}
5188
5189	page3p = (struct mode_format *)
5190	    ((caddr_t)headerp + mode_header_length + bd_len);
5191
5192	if (page3p->mode_page.code != SD_MODE_SENSE_PAGE3_CODE) {
5193		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
5194		    "mode sense pg3 code mismatch %d\n",
5195		    page3p->mode_page.code);
5196		goto page3_exit;
5197	}
5198
5199	/*
5200	 * Use this physical geometry data only if BOTH MODE SENSE commands
5201	 * complete successfully; otherwise, revert to the logical geometry.
5202	 * So, we need to save everything in temporary variables.
5203	 */
5204	sector_size = BE_16(page3p->data_bytes_sect);
5205
5206	/*
5207	 * 1243403: The NEC D38x7 drives do not support MODE SENSE sector size
5208	 */
5209	if (sector_size == 0) {
5210		sector_size = (ISCD(un)) ? 2048 : un->un_sys_blocksize;
5211	} else {
5212		sector_size &= ~(un->un_sys_blocksize - 1);
5213	}
5214
5215	nsect  = BE_16(page3p->sect_track);
5216	intrlv = BE_16(page3p->interleave);
5217
5218	SD_INFO(SD_LOG_COMMON, un,
5219	    "sd_get_physical_geometry: Format Parameters (page 3)\n");
5220	SD_INFO(SD_LOG_COMMON, un,
5221	    "   mode page: %d; nsect: %d; sector size: %d;\n",
5222	    page3p->mode_page.code, nsect, sector_size);
5223	SD_INFO(SD_LOG_COMMON, un,
5224	    "   interleave: %d; track skew: %d; cylinder skew: %d;\n", intrlv,
5225	    BE_16(page3p->track_skew),
5226	    BE_16(page3p->cylinder_skew));
5227
5228
5229	/*
5230	 * Retrieve MODE SENSE page 4 - Rigid Disk Drive Geometry Page
5231	 */
5232	p4bufp = kmem_zalloc(SD_MODE_SENSE_PAGE4_LENGTH, KM_SLEEP);
5233	if (sd_send_scsi_MODE_SENSE(un, cdbsize, p4bufp,
5234	    SD_MODE_SENSE_PAGE4_LENGTH, SD_MODE_SENSE_PAGE4_CODE, path_flag)
5235	    != 0) {
5236		SD_ERROR(SD_LOG_COMMON, un,
5237		    "sd_get_physical_geometry: mode sense page 4 failed\n");
5238		goto page4_exit;
5239	}
5240
5241	/*
5242	 * Determine size of Block Descriptors in order to locate the mode
5243	 * page data.  ATAPI devices return 0, SCSI devices should return
5244	 * MODE_BLK_DESC_LENGTH.
5245	 */
5246	headerp = (struct mode_header *)p4bufp;
5247	if (un->un_f_cfg_is_atapi == TRUE) {
5248		struct mode_header_grp2 *mhp =
5249		    (struct mode_header_grp2 *)headerp;
5250		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
5251	} else {
5252		bd_len = ((struct mode_header *)headerp)->bdesc_length;
5253	}
5254
5255	if (bd_len > MODE_BLK_DESC_LENGTH) {
5256		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
5257		    "received unexpected bd_len of %d, page4\n", bd_len);
5258		goto page4_exit;
5259	}
5260
5261	page4p = (struct mode_geometry *)
5262	    ((caddr_t)headerp + mode_header_length + bd_len);
5263
5264	if (page4p->mode_page.code != SD_MODE_SENSE_PAGE4_CODE) {
5265		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
5266		    "mode sense pg4 code mismatch %d\n",
5267		    page4p->mode_page.code);
5268		goto page4_exit;
5269	}
5270
5271	/*
5272	 * Stash the data now, after we know that both commands completed.
5273	 */
5274
5275	mutex_enter(SD_MUTEX(un));
5276
5277	nhead = (int)page4p->heads;	/* uchar, so no conversion needed */
5278	spc   = nhead * nsect;
5279	ncyl  = (page4p->cyl_ub << 16) + (page4p->cyl_mb << 8) + page4p->cyl_lb;
5280	rpm   = BE_16(page4p->rpm);
5281
5282	modesense_capacity = spc * ncyl;
5283
5284	SD_INFO(SD_LOG_COMMON, un,
5285	    "sd_get_physical_geometry: Geometry Parameters (page 4)\n");
5286	SD_INFO(SD_LOG_COMMON, un,
5287	    "   cylinders: %d; heads: %d; rpm: %d;\n", ncyl, nhead, rpm);
5288	SD_INFO(SD_LOG_COMMON, un,
5289	    "   computed capacity(h*s*c): %d;\n", modesense_capacity);
5290	SD_INFO(SD_LOG_COMMON, un, "   pgeom_p: %p; read cap: %d\n",
5291	    (void *)pgeom_p, capacity);
5292
5293	/*
5294	 * Compensate if the drive's geometry is not rectangular, i.e.,
5295	 * the product of C * H * S returned by MODE SENSE >= that returned
5296	 * by read capacity. This is an idiosyncrasy of the original x86
5297	 * disk subsystem.
5298	 */
5299	if (modesense_capacity >= capacity) {
5300		SD_INFO(SD_LOG_COMMON, un,
5301		    "sd_get_physical_geometry: adjusting acyl; "
5302		    "old: %d; new: %d\n", pgeom_p->g_acyl,
5303		    (modesense_capacity - capacity + spc - 1) / spc);
5304		if (sector_size != 0) {
5305			/* 1243403: NEC D38x7 drives don't support sec size */
5306			pgeom_p->g_secsize = (unsigned short)sector_size;
5307		}
5308		pgeom_p->g_nsect    = (unsigned short)nsect;
5309		pgeom_p->g_nhead    = (unsigned short)nhead;
5310		pgeom_p->g_capacity = capacity;
5311		pgeom_p->g_acyl	    =
5312		    (modesense_capacity - pgeom_p->g_capacity + spc - 1) / spc;
5313		pgeom_p->g_ncyl	    = ncyl - pgeom_p->g_acyl;
5314	}
5315
5316	pgeom_p->g_rpm    = (unsigned short)rpm;
5317	pgeom_p->g_intrlv = (unsigned short)intrlv;
5318
5319	SD_INFO(SD_LOG_COMMON, un,
5320	    "sd_get_physical_geometry: mode sense geometry:\n");
5321	SD_INFO(SD_LOG_COMMON, un,
5322	    "   nsect: %d; sector size: %d; interlv: %d\n",
5323	    nsect, sector_size, intrlv);
5324	SD_INFO(SD_LOG_COMMON, un,
5325	    "   nhead: %d; ncyl: %d; rpm: %d; capacity(ms): %d\n",
5326	    nhead, ncyl, rpm, modesense_capacity);
5327	SD_INFO(SD_LOG_COMMON, un,
5328	    "sd_get_physical_geometry: (cached)\n");
5329	SD_INFO(SD_LOG_COMMON, un,
5330	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
5331	    un->un_pgeom.g_ncyl,  un->un_pgeom.g_acyl,
5332	    un->un_pgeom.g_nhead, un->un_pgeom.g_nsect);
5333	SD_INFO(SD_LOG_COMMON, un,
5334	    "   lbasize: %d; capacity: %ld; intrlv: %d; rpm: %d\n",
5335	    un->un_pgeom.g_secsize, un->un_pgeom.g_capacity,
5336	    un->un_pgeom.g_intrlv, un->un_pgeom.g_rpm);
5337
5338	mutex_exit(SD_MUTEX(un));
5339
5340page4_exit:
5341	kmem_free(p4bufp, SD_MODE_SENSE_PAGE4_LENGTH);
5342page3_exit:
5343	kmem_free(p3bufp, SD_MODE_SENSE_PAGE3_LENGTH);
5344}
5345
5346
5347/*
5348 *    Function: sd_get_virtual_geometry
5349 *
5350 * Description: Ask the controller to tell us about the target device.
5351 *
5352 *   Arguments: un - pointer to softstate
5353 *		capacity - disk capacity in #blocks
5354 *		lbasize - disk block size in bytes
5355 *
5356 *     Context: Kernel thread only
5357 */
5358
5359static void
5360sd_get_virtual_geometry(struct sd_lun *un, int capacity, int lbasize)
5361{
5362	struct	geom_cache 	*lgeom_p = &un->un_lgeom;
5363	uint_t	geombuf;
5364	int	spc;
5365
5366	ASSERT(un != NULL);
5367	ASSERT(mutex_owned(SD_MUTEX(un)));
5368
5369	mutex_exit(SD_MUTEX(un));
5370
5371	/* Set sector size, and total number of sectors */
5372	(void) scsi_ifsetcap(SD_ADDRESS(un), "sector-size",   lbasize,  1);
5373	(void) scsi_ifsetcap(SD_ADDRESS(un), "total-sectors", capacity, 1);
5374
5375	/* Let the HBA tell us its geometry */
5376	geombuf = (uint_t)scsi_ifgetcap(SD_ADDRESS(un), "geometry", 1);
5377
5378	mutex_enter(SD_MUTEX(un));
5379
5380	/* A value of -1 indicates an undefined "geometry" property */
5381	if (geombuf == (-1)) {
5382		return;
5383	}
5384
5385	/* Initialize the logical geometry cache. */
5386	lgeom_p->g_nhead   = (geombuf >> 16) & 0xffff;
5387	lgeom_p->g_nsect   = geombuf & 0xffff;
5388	lgeom_p->g_secsize = un->un_sys_blocksize;
5389
5390	spc = lgeom_p->g_nhead * lgeom_p->g_nsect;
5391
5392	/*
5393	 * Note: The driver originally converted the capacity value from
5394	 * target blocks to system blocks. However, the capacity value passed
5395	 * to this routine is already in terms of system blocks (this scaling
5396	 * is done when the READ CAPACITY command is issued and processed).
5397	 * This 'error' may have gone undetected because the usage of g_ncyl
5398	 * (which is based upon g_capacity) is very limited within the driver
5399	 */
5400	lgeom_p->g_capacity = capacity;
5401
5402	/*
5403	 * Set ncyl to zero if the hba returned a zero nhead or nsect value. The
5404	 * hba may return zero values if the device has been removed.
5405	 */
5406	if (spc == 0) {
5407		lgeom_p->g_ncyl = 0;
5408	} else {
5409		lgeom_p->g_ncyl = lgeom_p->g_capacity / spc;
5410	}
5411	lgeom_p->g_acyl = 0;
5412
5413	SD_INFO(SD_LOG_COMMON, un, "sd_get_virtual_geometry: (cached)\n");
5414	SD_INFO(SD_LOG_COMMON, un,
5415	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
5416	    un->un_lgeom.g_ncyl,  un->un_lgeom.g_acyl,
5417	    un->un_lgeom.g_nhead, un->un_lgeom.g_nsect);
5418	SD_INFO(SD_LOG_COMMON, un, "   lbasize: %d; capacity: %ld; "
5419	    "intrlv: %d; rpm: %d\n", un->un_lgeom.g_secsize,
5420	    un->un_lgeom.g_capacity, un->un_lgeom.g_intrlv, un->un_lgeom.g_rpm);
5421}
5422
5423
5424/*
5425 *    Function: sd_update_block_info
5426 *
5427 * Description: Calculate a byte count to sector count bitshift value
5428 *		from sector size.
5429 *
5430 *   Arguments: un: unit struct.
5431 *		lbasize: new target sector size
5432 *		capacity: new target capacity, ie. block count
5433 *
5434 *     Context: Kernel thread context
5435 */
5436
5437static void
5438sd_update_block_info(struct sd_lun *un, uint32_t lbasize, uint64_t capacity)
5439{
5440	if (lbasize != 0) {
5441		un->un_tgt_blocksize = lbasize;
5442		un->un_f_tgt_blocksize_is_valid	= TRUE;
5443	}
5444
5445	if (capacity != 0) {
5446		un->un_blockcount		= capacity;
5447		un->un_f_blockcount_is_valid	= TRUE;
5448	}
5449}
5450
5451
5452static void
5453sd_swap_efi_gpt(efi_gpt_t *e)
5454{
5455	_NOTE(ASSUMING_PROTECTED(*e))
5456	e->efi_gpt_Signature = LE_64(e->efi_gpt_Signature);
5457	e->efi_gpt_Revision = LE_32(e->efi_gpt_Revision);
5458	e->efi_gpt_HeaderSize = LE_32(e->efi_gpt_HeaderSize);
5459	e->efi_gpt_HeaderCRC32 = LE_32(e->efi_gpt_HeaderCRC32);
5460	e->efi_gpt_MyLBA = LE_64(e->efi_gpt_MyLBA);
5461	e->efi_gpt_AlternateLBA = LE_64(e->efi_gpt_AlternateLBA);
5462	e->efi_gpt_FirstUsableLBA = LE_64(e->efi_gpt_FirstUsableLBA);
5463	e->efi_gpt_LastUsableLBA = LE_64(e->efi_gpt_LastUsableLBA);
5464	UUID_LE_CONVERT(e->efi_gpt_DiskGUID, e->efi_gpt_DiskGUID);
5465	e->efi_gpt_PartitionEntryLBA = LE_64(e->efi_gpt_PartitionEntryLBA);
5466	e->efi_gpt_NumberOfPartitionEntries =
5467	    LE_32(e->efi_gpt_NumberOfPartitionEntries);
5468	e->efi_gpt_SizeOfPartitionEntry =
5469	    LE_32(e->efi_gpt_SizeOfPartitionEntry);
5470	e->efi_gpt_PartitionEntryArrayCRC32 =
5471	    LE_32(e->efi_gpt_PartitionEntryArrayCRC32);
5472}
5473
5474static void
5475sd_swap_efi_gpe(int nparts, efi_gpe_t *p)
5476{
5477	int i;
5478
5479	_NOTE(ASSUMING_PROTECTED(*p))
5480	for (i = 0; i < nparts; i++) {
5481		UUID_LE_CONVERT(p[i].efi_gpe_PartitionTypeGUID,
5482		    p[i].efi_gpe_PartitionTypeGUID);
5483		p[i].efi_gpe_StartingLBA = LE_64(p[i].efi_gpe_StartingLBA);
5484		p[i].efi_gpe_EndingLBA = LE_64(p[i].efi_gpe_EndingLBA);
5485		/* PartitionAttrs */
5486	}
5487}
5488
5489static int
5490sd_validate_efi(efi_gpt_t *labp)
5491{
5492	if (labp->efi_gpt_Signature != EFI_SIGNATURE)
5493		return (EINVAL);
5494	/* at least 96 bytes in this version of the spec. */
5495	if (sizeof (efi_gpt_t) - sizeof (labp->efi_gpt_Reserved2) >
5496	    labp->efi_gpt_HeaderSize)
5497		return (EINVAL);
5498	/* this should be 128 bytes */
5499	if (labp->efi_gpt_SizeOfPartitionEntry != sizeof (efi_gpe_t))
5500		return (EINVAL);
5501	return (0);
5502}
5503
5504static int
5505sd_use_efi(struct sd_lun *un, int path_flag)
5506{
5507	int		i;
5508	int		rval = 0;
5509	efi_gpe_t	*partitions;
5510	uchar_t		*buf;
5511	uint_t		lbasize;
5512	uint64_t	cap = 0;
5513	uint_t		nparts;
5514	diskaddr_t	gpe_lba;
5515	struct uuid	uuid_type_reserved = EFI_RESERVED;
5516
5517	ASSERT(mutex_owned(SD_MUTEX(un)));
5518	lbasize = un->un_tgt_blocksize;
5519	un->un_reserved = -1;
5520
5521	mutex_exit(SD_MUTEX(un));
5522
5523	buf = kmem_zalloc(EFI_MIN_ARRAY_SIZE, KM_SLEEP);
5524
5525	if (un->un_tgt_blocksize != un->un_sys_blocksize) {
5526		rval = EINVAL;
5527		goto done_err;
5528	}
5529
5530	rval = sd_send_scsi_READ(un, buf, lbasize, 0, path_flag);
5531	if (rval) {
5532		goto done_err;
5533	}
5534	if (((struct dk_label *)buf)->dkl_magic == DKL_MAGIC) {
5535		/* not ours */
5536		rval = ESRCH;
5537		goto done_err;
5538	}
5539
5540	rval = sd_send_scsi_READ(un, buf, lbasize, 1, path_flag);
5541	if (rval) {
5542		goto done_err;
5543	}
5544	sd_swap_efi_gpt((efi_gpt_t *)buf);
5545
5546	if ((rval = sd_validate_efi((efi_gpt_t *)buf)) != 0) {
5547		/*
5548		 * Couldn't read the primary, try the backup.  Our
5549		 * capacity at this point could be based on CHS, so
5550		 * check what the device reports.
5551		 */
5552		rval = sd_send_scsi_READ_CAPACITY(un, &cap, &lbasize,
5553		    path_flag);
5554		if (rval) {
5555			goto done_err;
5556		}
5557
5558		/*
5559		 * The MMC standard allows READ CAPACITY to be
5560		 * inaccurate by a bounded amount (in the interest of
5561		 * response latency).  As a result, failed READs are
5562		 * commonplace (due to the reading of metadata and not
5563		 * data). Depending on the per-Vendor/drive Sense data,
5564		 * the failed READ can cause many (unnecessary) retries.
5565		 */
5566
5567		/*
5568		 * Refer to comments related to off-by-1 at the
5569		 * header of this file. Search the next to last
5570		 * block for backup EFI label.
5571		 */
5572		if ((rval = sd_send_scsi_READ(un, buf, lbasize,
5573		    cap - 2, (ISCD(un)) ? SD_PATH_DIRECT_PRIORITY :
5574			path_flag)) != 0) {
5575				goto done_err;
5576		}
5577
5578		sd_swap_efi_gpt((efi_gpt_t *)buf);
5579		if ((rval = sd_validate_efi((efi_gpt_t *)buf)) != 0) {
5580			if ((rval = sd_send_scsi_READ(un, buf, lbasize,
5581			    cap - 1, (ISCD(un)) ? SD_PATH_DIRECT_PRIORITY :
5582			    path_flag)) != 0) {
5583					goto done_err;
5584			}
5585			sd_swap_efi_gpt((efi_gpt_t *)buf);
5586			if ((rval = sd_validate_efi((efi_gpt_t *)buf)) != 0)
5587				goto done_err;
5588		}
5589		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5590		    "primary label corrupt; using backup\n");
5591	}
5592
5593	if (cap == 0)
5594		rval = sd_send_scsi_READ_CAPACITY(un, &cap, &lbasize,
5595		    path_flag);
5596
5597	nparts = ((efi_gpt_t *)buf)->efi_gpt_NumberOfPartitionEntries;
5598	gpe_lba = ((efi_gpt_t *)buf)->efi_gpt_PartitionEntryLBA;
5599
5600	rval = sd_send_scsi_READ(un, buf, EFI_MIN_ARRAY_SIZE, gpe_lba,
5601	    path_flag);
5602	if (rval) {
5603		goto done_err;
5604	}
5605	partitions = (efi_gpe_t *)buf;
5606
5607	if (nparts > MAXPART) {
5608		nparts = MAXPART;
5609	}
5610	sd_swap_efi_gpe(nparts, partitions);
5611
5612	mutex_enter(SD_MUTEX(un));
5613
5614	/* Fill in partition table. */
5615	for (i = 0; i < nparts; i++) {
5616		if (partitions->efi_gpe_StartingLBA != 0 ||
5617		    partitions->efi_gpe_EndingLBA != 0) {
5618			un->un_map[i].dkl_cylno =
5619			    partitions->efi_gpe_StartingLBA;
5620			un->un_map[i].dkl_nblk =
5621			    partitions->efi_gpe_EndingLBA -
5622			    partitions->efi_gpe_StartingLBA + 1;
5623			un->un_offset[i] =
5624			    partitions->efi_gpe_StartingLBA;
5625		}
5626		if (un->un_reserved == -1) {
5627			if (bcmp(&partitions->efi_gpe_PartitionTypeGUID,
5628			    &uuid_type_reserved, sizeof (struct uuid)) == 0) {
5629				un->un_reserved = i;
5630			}
5631		}
5632		if (i == WD_NODE) {
5633			/*
5634			 * minor number 7 corresponds to the whole disk
5635			 */
5636			un->un_map[i].dkl_cylno = 0;
5637			un->un_map[i].dkl_nblk = un->un_blockcount;
5638			un->un_offset[i] = 0;
5639		}
5640		partitions++;
5641	}
5642	un->un_solaris_offset = 0;
5643	un->un_solaris_size = cap;
5644	un->un_f_geometry_is_valid = TRUE;
5645
5646	/* clear the vtoc label */
5647	bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
5648
5649	kmem_free(buf, EFI_MIN_ARRAY_SIZE);
5650
5651	/*
5652	 * For EFI labeled disk, create and set the partition stats
5653	 * at attach time, update the stats according to dynamic
5654	 * partition changes during running time.
5655	 */
5656	if (un->un_f_pkstats_enabled) {
5657		sd_set_pstats(un);
5658		SD_TRACE(SD_LOG_IO_PARTITION, un, "sd_use_efi: "
5659		    "un:0x%p pstats created and set, or updated\n", un);
5660	}
5661	return (0);
5662
5663done_err:
5664	kmem_free(buf, EFI_MIN_ARRAY_SIZE);
5665	mutex_enter(SD_MUTEX(un));
5666	/*
5667	 * if we didn't find something that could look like a VTOC
5668	 * and the disk is over 1TB, we know there isn't a valid label.
5669	 * Otherwise let sd_uselabel decide what to do.  We only
5670	 * want to invalidate this if we're certain the label isn't
5671	 * valid because sd_prop_op will now fail, which in turn
5672	 * causes things like opens and stats on the partition to fail.
5673	 */
5674	if ((un->un_blockcount > DK_MAX_BLOCKS) && (rval != ESRCH)) {
5675		un->un_f_geometry_is_valid = FALSE;
5676	}
5677	return (rval);
5678}
5679
5680
5681/*
5682 *    Function: sd_uselabel
5683 *
5684 * Description: Validate the disk label and update the relevant data (geometry,
5685 *		partition, vtoc, and capacity data) in the sd_lun struct.
5686 *		Marks the geometry of the unit as being valid.
5687 *
5688 *   Arguments: un: unit struct.
5689 *		dk_label: disk label
5690 *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
5691 *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
5692 *			to use the USCSI "direct" chain and bypass the normal
5693 *			command waitq.
5694 *
5695 * Return Code: SD_LABEL_IS_VALID: Label read from disk is OK; geometry,
5696 *		partition, vtoc, and capacity data are good.
5697 *
5698 *		SD_LABEL_IS_INVALID: Magic number or checksum error in the
5699 *		label; or computed capacity does not jibe with capacity
5700 *		reported from the READ CAPACITY command.
5701 *
5702 *     Context: Kernel thread only (can sleep).
5703 */
5704
5705static int
5706sd_uselabel(struct sd_lun *un, struct dk_label *labp, int path_flag)
5707{
5708	short	*sp;
5709	short	sum;
5710	short	count;
5711	int	label_error = SD_LABEL_IS_VALID;
5712	int	i;
5713	int	capacity;
5714	int	part_end;
5715	int	track_capacity;
5716	int	err;
5717#if defined(_SUNOS_VTOC_16)
5718	struct	dkl_partition	*vpartp;
5719#endif
5720	ASSERT(un != NULL);
5721	ASSERT(mutex_owned(SD_MUTEX(un)));
5722
5723	/* Validate the magic number of the label. */
5724	if (labp->dkl_magic != DKL_MAGIC) {
5725#if defined(__sparc)
5726		if ((un->un_state == SD_STATE_NORMAL) &&
5727			un->un_f_vtoc_errlog_supported) {
5728			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5729			    "Corrupt label; wrong magic number\n");
5730		}
5731#endif
5732		return (SD_LABEL_IS_INVALID);
5733	}
5734
5735	/* Validate the checksum of the label. */
5736	sp  = (short *)labp;
5737	sum = 0;
5738	count = sizeof (struct dk_label) / sizeof (short);
5739	while (count--)	 {
5740		sum ^= *sp++;
5741	}
5742
5743	if (sum != 0) {
5744#if	defined(_SUNOS_VTOC_16)
5745		if ((un->un_state == SD_STATE_NORMAL) && !ISCD(un)) {
5746#elif defined(_SUNOS_VTOC_8)
5747		if ((un->un_state == SD_STATE_NORMAL) &&
5748		    un->un_f_vtoc_errlog_supported) {
5749#endif
5750			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5751			    "Corrupt label - label checksum failed\n");
5752		}
5753		return (SD_LABEL_IS_INVALID);
5754	}
5755
5756
5757	/*
5758	 * Fill in geometry structure with data from label.
5759	 */
5760	bzero(&un->un_g, sizeof (struct dk_geom));
5761	un->un_g.dkg_ncyl   = labp->dkl_ncyl;
5762	un->un_g.dkg_acyl   = labp->dkl_acyl;
5763	un->un_g.dkg_bcyl   = 0;
5764	un->un_g.dkg_nhead  = labp->dkl_nhead;
5765	un->un_g.dkg_nsect  = labp->dkl_nsect;
5766	un->un_g.dkg_intrlv = labp->dkl_intrlv;
5767
5768#if defined(_SUNOS_VTOC_8)
5769	un->un_g.dkg_gap1   = labp->dkl_gap1;
5770	un->un_g.dkg_gap2   = labp->dkl_gap2;
5771	un->un_g.dkg_bhead  = labp->dkl_bhead;
5772#endif
5773#if defined(_SUNOS_VTOC_16)
5774	un->un_dkg_skew = labp->dkl_skew;
5775#endif
5776
5777#if defined(__i386) || defined(__amd64)
5778	un->un_g.dkg_apc = labp->dkl_apc;
5779#endif
5780
5781	/*
5782	 * Currently we rely on the values in the label being accurate. If
5783	 * dlk_rpm or dlk_pcly are zero in the label, use a default value.
5784	 *
5785	 * Note: In the future a MODE SENSE may be used to retrieve this data,
5786	 * although this command is optional in SCSI-2.
5787	 */
5788	un->un_g.dkg_rpm  = (labp->dkl_rpm  != 0) ? labp->dkl_rpm  : 3600;
5789	un->un_g.dkg_pcyl = (labp->dkl_pcyl != 0) ? labp->dkl_pcyl :
5790	    (un->un_g.dkg_ncyl + un->un_g.dkg_acyl);
5791
5792	/*
5793	 * The Read and Write reinstruct values may not be valid
5794	 * for older disks.
5795	 */
5796	un->un_g.dkg_read_reinstruct  = labp->dkl_read_reinstruct;
5797	un->un_g.dkg_write_reinstruct = labp->dkl_write_reinstruct;
5798
5799	/* Fill in partition table. */
5800#if defined(_SUNOS_VTOC_8)
5801	for (i = 0; i < NDKMAP; i++) {
5802		un->un_map[i].dkl_cylno = labp->dkl_map[i].dkl_cylno;
5803		un->un_map[i].dkl_nblk  = labp->dkl_map[i].dkl_nblk;
5804	}
5805#endif
5806#if  defined(_SUNOS_VTOC_16)
5807	vpartp		= labp->dkl_vtoc.v_part;
5808	track_capacity	= labp->dkl_nhead * labp->dkl_nsect;
5809
5810	/* Prevent divide by zero */
5811	if (track_capacity == 0) {
5812		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5813		    "Corrupt label - zero nhead or nsect value\n");
5814
5815		return (SD_LABEL_IS_INVALID);
5816	}
5817
5818	for (i = 0; i < NDKMAP; i++, vpartp++) {
5819		un->un_map[i].dkl_cylno = vpartp->p_start / track_capacity;
5820		un->un_map[i].dkl_nblk  = vpartp->p_size;
5821	}
5822#endif
5823
5824	/* Fill in VTOC Structure. */
5825	bcopy(&labp->dkl_vtoc, &un->un_vtoc, sizeof (struct dk_vtoc));
5826#if defined(_SUNOS_VTOC_8)
5827	/*
5828	 * The 8-slice vtoc does not include the ascii label; save it into
5829	 * the device's soft state structure here.
5830	 */
5831	bcopy(labp->dkl_asciilabel, un->un_asciilabel, LEN_DKL_ASCII);
5832#endif
5833
5834	/* Now look for a valid capacity. */
5835	track_capacity	= (un->un_g.dkg_nhead * un->un_g.dkg_nsect);
5836	capacity	= (un->un_g.dkg_ncyl  * track_capacity);
5837
5838	if (un->un_g.dkg_acyl) {
5839#if defined(__i386) || defined(__amd64)
5840		/* we may have > 1 alts cylinder */
5841		capacity += (track_capacity * un->un_g.dkg_acyl);
5842#else
5843		capacity += track_capacity;
5844#endif
5845	}
5846
5847	/*
5848	 * Force check here to ensure the computed capacity is valid.
5849	 * If capacity is zero, it indicates an invalid label and
5850	 * we should abort updating the relevant data then.
5851	 */
5852	if (capacity == 0) {
5853		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5854		    "Corrupt label - no valid capacity could be retrieved\n");
5855
5856		return (SD_LABEL_IS_INVALID);
5857	}
5858
5859	/* Mark the geometry as valid. */
5860	un->un_f_geometry_is_valid = TRUE;
5861
5862	/*
5863	 * At this point, un->un_blockcount should contain valid data from
5864	 * the READ CAPACITY command.
5865	 */
5866	if (un->un_f_blockcount_is_valid != TRUE) {
5867		/*
5868		 * We have a situation where the target didn't give us a good
5869		 * READ CAPACITY value, yet there appears to be a valid label.
5870		 * In this case, we'll fake the capacity.
5871		 */
5872		un->un_blockcount = capacity;
5873		un->un_f_blockcount_is_valid = TRUE;
5874		goto done;
5875	}
5876
5877
5878	if ((capacity <= un->un_blockcount) ||
5879	    (un->un_state != SD_STATE_NORMAL)) {
5880#if defined(_SUNOS_VTOC_8)
5881		/*
5882		 * We can't let this happen on drives that are subdivided
5883		 * into logical disks (i.e., that have an fdisk table).
5884		 * The un_blockcount field should always hold the full media
5885		 * size in sectors, period.  This code would overwrite
5886		 * un_blockcount with the size of the Solaris fdisk partition.
5887		 */
5888		SD_ERROR(SD_LOG_COMMON, un,
5889		    "sd_uselabel: Label %d blocks; Drive %d blocks\n",
5890		    capacity, un->un_blockcount);
5891		un->un_blockcount = capacity;
5892		un->un_f_blockcount_is_valid = TRUE;
5893#endif	/* defined(_SUNOS_VTOC_8) */
5894		goto done;
5895	}
5896
5897	if (ISCD(un)) {
5898		/* For CDROMs, we trust that the data in the label is OK. */
5899#if defined(_SUNOS_VTOC_8)
5900		for (i = 0; i < NDKMAP; i++) {
5901			part_end = labp->dkl_nhead * labp->dkl_nsect *
5902			    labp->dkl_map[i].dkl_cylno +
5903			    labp->dkl_map[i].dkl_nblk  - 1;
5904
5905			if ((labp->dkl_map[i].dkl_nblk) &&
5906			    (part_end > un->un_blockcount)) {
5907				un->un_f_geometry_is_valid = FALSE;
5908				break;
5909			}
5910		}
5911#endif
5912#if defined(_SUNOS_VTOC_16)
5913		vpartp = &(labp->dkl_vtoc.v_part[0]);
5914		for (i = 0; i < NDKMAP; i++, vpartp++) {
5915			part_end = vpartp->p_start + vpartp->p_size;
5916			if ((vpartp->p_size > 0) &&
5917			    (part_end > un->un_blockcount)) {
5918				un->un_f_geometry_is_valid = FALSE;
5919				break;
5920			}
5921		}
5922#endif
5923	} else {
5924		uint64_t t_capacity;
5925		uint32_t t_lbasize;
5926
5927		mutex_exit(SD_MUTEX(un));
5928		err = sd_send_scsi_READ_CAPACITY(un, &t_capacity, &t_lbasize,
5929		    path_flag);
5930		ASSERT(t_capacity <= DK_MAX_BLOCKS);
5931		mutex_enter(SD_MUTEX(un));
5932
5933		if (err == 0) {
5934			sd_update_block_info(un, t_lbasize, t_capacity);
5935		}
5936
5937		if (capacity > un->un_blockcount) {
5938			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5939			    "Corrupt label - bad geometry\n");
5940			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
5941			    "Label says %u blocks; Drive says %llu blocks\n",
5942			    capacity, (unsigned long long)un->un_blockcount);
5943			un->un_f_geometry_is_valid = FALSE;
5944			label_error = SD_LABEL_IS_INVALID;
5945		}
5946	}
5947
5948done:
5949
5950	SD_INFO(SD_LOG_COMMON, un, "sd_uselabel: (label geometry)\n");
5951	SD_INFO(SD_LOG_COMMON, un,
5952	    "   ncyl: %d; acyl: %d; nhead: %d; nsect: %d\n",
5953	    un->un_g.dkg_ncyl,  un->un_g.dkg_acyl,
5954	    un->un_g.dkg_nhead, un->un_g.dkg_nsect);
5955	SD_INFO(SD_LOG_COMMON, un,
5956	    "   lbasize: %d; capacity: %d; intrlv: %d; rpm: %d\n",
5957	    un->un_tgt_blocksize, un->un_blockcount,
5958	    un->un_g.dkg_intrlv, un->un_g.dkg_rpm);
5959	SD_INFO(SD_LOG_COMMON, un, "   wrt_reinstr: %d; rd_reinstr: %d\n",
5960	    un->un_g.dkg_write_reinstruct, un->un_g.dkg_read_reinstruct);
5961
5962	ASSERT(mutex_owned(SD_MUTEX(un)));
5963
5964	return (label_error);
5965}
5966
5967
5968/*
5969 *    Function: sd_build_default_label
5970 *
5971 * Description: Generate a default label for those devices that do not have
5972 *		one, e.g., new media, removable cartridges, etc..
5973 *
5974 *     Context: Kernel thread only
5975 */
5976
5977static void
5978sd_build_default_label(struct sd_lun *un)
5979{
5980#if defined(_SUNOS_VTOC_16)
5981	uint_t	phys_spc;
5982	uint_t	disksize;
5983	struct	dk_geom un_g;
5984	uint64_t capacity;
5985#endif
5986
5987	ASSERT(un != NULL);
5988	ASSERT(mutex_owned(SD_MUTEX(un)));
5989
5990#if defined(_SUNOS_VTOC_8)
5991	/*
5992	 * Note: This is a legacy check for non-removable devices on VTOC_8
5993	 * only. This may be a valid check for VTOC_16 as well.
5994	 * Once we understand why there is this difference between SPARC and
5995	 * x86 platform, we could remove this legacy check.
5996	 */
5997	ASSERT(un->un_f_default_vtoc_supported);
5998#endif
5999
6000	bzero(&un->un_g, sizeof (struct dk_geom));
6001	bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
6002	bzero(&un->un_map, NDKMAP * (sizeof (struct dk_map)));
6003
6004#if defined(_SUNOS_VTOC_8)
6005
6006	/*
6007	 * It's a REMOVABLE media, therefore no label (on sparc, anyway).
6008	 * But it is still necessary to set up various geometry information,
6009	 * and we are doing this here.
6010	 */
6011
6012	/*
6013	 * For the rpm, we use the minimum for the disk.  For the head, cyl,
6014	 * and number of sector per track, if the capacity <= 1GB, head = 64,
6015	 * sect = 32.  else head = 255, sect 63 Note: the capacity should be
6016	 * equal to C*H*S values.  This will cause some truncation of size due
6017	 * to round off errors. For CD-ROMs, this truncation can have adverse
6018	 * side effects, so returning ncyl and nhead as 1. The nsect will
6019	 * overflow for most of CD-ROMs as nsect is of type ushort. (4190569)
6020	 */
6021	if (ISCD(un)) {
6022		/*
6023		 * Preserve the old behavior for non-writable
6024		 * medias. Since dkg_nsect is a ushort, it
6025		 * will lose bits as cdroms have more than
6026		 * 65536 sectors. So if we recalculate
6027		 * capacity, it will become much shorter.
6028		 * But the dkg_* information is not
6029		 * used for CDROMs so it is OK. But for
6030		 * Writable CDs we need this information
6031		 * to be valid (for newfs say). So we
6032		 * make nsect and nhead > 1 that way
6033		 * nsect can still stay within ushort limit
6034		 * without losing any bits.
6035		 */
6036		if (un->un_f_mmc_writable_media == TRUE) {
6037			un->un_g.dkg_nhead = 64;
6038			un->un_g.dkg_nsect = 32;
6039			un->un_g.dkg_ncyl = un->un_blockcount / (64 * 32);
6040			un->un_blockcount = un->un_g.dkg_ncyl *
6041			    un->un_g.dkg_nhead * un->un_g.dkg_nsect;
6042		} else {
6043			un->un_g.dkg_ncyl  = 1;
6044			un->un_g.dkg_nhead = 1;
6045			un->un_g.dkg_nsect = un->un_blockcount;
6046		}
6047	} else {
6048		if (un->un_blockcount <= 0x1000) {
6049			/* unlabeled SCSI floppy device */
6050			un->un_g.dkg_nhead = 2;
6051			un->un_g.dkg_ncyl = 80;
6052			un->un_g.dkg_nsect = un->un_blockcount / (2 * 80);
6053		} else if (un->un_blockcount <= 0x200000) {
6054			un->un_g.dkg_nhead = 64;
6055			un->un_g.dkg_nsect = 32;
6056			un->un_g.dkg_ncyl  = un->un_blockcount / (64 * 32);
6057		} else {
6058			un->un_g.dkg_nhead = 255;
6059			un->un_g.dkg_nsect = 63;
6060			un->un_g.dkg_ncyl  = un->un_blockcount / (255 * 63);
6061		}
6062		un->un_blockcount =
6063		    un->un_g.dkg_ncyl * un->un_g.dkg_nhead * un->un_g.dkg_nsect;
6064	}
6065
6066	un->un_g.dkg_acyl	= 0;
6067	un->un_g.dkg_bcyl	= 0;
6068	un->un_g.dkg_rpm	= 200;
6069	un->un_asciilabel[0]	= '\0';
6070	un->un_g.dkg_pcyl	= un->un_g.dkg_ncyl;
6071
6072	un->un_map[0].dkl_cylno = 0;
6073	un->un_map[0].dkl_nblk  = un->un_blockcount;
6074	un->un_map[2].dkl_cylno = 0;
6075	un->un_map[2].dkl_nblk  = un->un_blockcount;
6076
6077#elif defined(_SUNOS_VTOC_16)
6078
6079	if (un->un_solaris_size == 0) {
6080		/*
6081		 * Got fdisk table but no solaris entry therefore
6082		 * don't create a default label
6083		 */
6084		un->un_f_geometry_is_valid = TRUE;
6085		return;
6086	}
6087
6088	/*
6089	 * For CDs we continue to use the physical geometry to calculate
6090	 * number of cylinders. All other devices must convert the
6091	 * physical geometry (geom_cache) to values that will fit
6092	 * in a dk_geom structure.
6093	 */
6094	if (ISCD(un)) {
6095		phys_spc = un->un_pgeom.g_nhead * un->un_pgeom.g_nsect;
6096	} else {
6097		/* Convert physical geometry to disk geometry */
6098		bzero(&un_g, sizeof (struct dk_geom));
6099
6100		/*
6101		 * Refer to comments related to off-by-1 at the
6102		 * header of this file.
6103		 * Before caculating geometry, capacity should be
6104		 * decreased by 1. That un_f_capacity_adjusted is
6105		 * TRUE means that we are treating a 1TB disk as
6106		 * (1T - 512)B. And the capacity of disks is already
6107		 * decreased by 1.
6108		 */
6109		if (!un->un_f_capacity_adjusted &&
6110		    !un->un_f_has_removable_media &&
6111		    !un->un_f_is_hotpluggable &&
6112			un->un_tgt_blocksize == un->un_sys_blocksize)
6113			capacity = un->un_blockcount - 1;
6114		else
6115			capacity = un->un_blockcount;
6116
6117		sd_convert_geometry(capacity, &un_g);
6118		bcopy(&un_g, &un->un_g, sizeof (un->un_g));
6119		phys_spc = un->un_g.dkg_nhead * un->un_g.dkg_nsect;
6120	}
6121
6122	ASSERT(phys_spc != 0);
6123	un->un_g.dkg_pcyl = un->un_solaris_size / phys_spc;
6124	un->un_g.dkg_acyl = DK_ACYL;
6125	un->un_g.dkg_ncyl = un->un_g.dkg_pcyl - DK_ACYL;
6126	disksize = un->un_g.dkg_ncyl * phys_spc;
6127
6128	if (ISCD(un)) {
6129		/*
6130		 * CD's don't use the "heads * sectors * cyls"-type of
6131		 * geometry, but instead use the entire capacity of the media.
6132		 */
6133		disksize = un->un_solaris_size;
6134		un->un_g.dkg_nhead = 1;
6135		un->un_g.dkg_nsect = 1;
6136		un->un_g.dkg_rpm =
6137		    (un->un_pgeom.g_rpm == 0) ? 200 : un->un_pgeom.g_rpm;
6138
6139		un->un_vtoc.v_part[0].p_start = 0;
6140		un->un_vtoc.v_part[0].p_size  = disksize;
6141		un->un_vtoc.v_part[0].p_tag   = V_BACKUP;
6142		un->un_vtoc.v_part[0].p_flag  = V_UNMNT;
6143
6144		un->un_map[0].dkl_cylno = 0;
6145		un->un_map[0].dkl_nblk  = disksize;
6146		un->un_offset[0] = 0;
6147
6148	} else {
6149		/*
6150		 * Hard disks and removable media cartridges
6151		 */
6152		un->un_g.dkg_rpm =
6153		    (un->un_pgeom.g_rpm == 0) ? 3600: un->un_pgeom.g_rpm;
6154		un->un_vtoc.v_sectorsz = un->un_sys_blocksize;
6155
6156		/* Add boot slice */
6157		un->un_vtoc.v_part[8].p_start = 0;
6158		un->un_vtoc.v_part[8].p_size  = phys_spc;
6159		un->un_vtoc.v_part[8].p_tag   = V_BOOT;
6160		un->un_vtoc.v_part[8].p_flag  = V_UNMNT;
6161
6162		un->un_map[8].dkl_cylno = 0;
6163		un->un_map[8].dkl_nblk  = phys_spc;
6164		un->un_offset[8] = 0;
6165	}
6166
6167	un->un_g.dkg_apc = 0;
6168	un->un_vtoc.v_nparts = V_NUMPAR;
6169	un->un_vtoc.v_version = V_VERSION;
6170
6171	/* Add backup slice */
6172	un->un_vtoc.v_part[2].p_start = 0;
6173	un->un_vtoc.v_part[2].p_size  = disksize;
6174	un->un_vtoc.v_part[2].p_tag   = V_BACKUP;
6175	un->un_vtoc.v_part[2].p_flag  = V_UNMNT;
6176
6177	un->un_map[2].dkl_cylno = 0;
6178	un->un_map[2].dkl_nblk  = disksize;
6179	un->un_offset[2] = 0;
6180
6181	(void) sprintf(un->un_vtoc.v_asciilabel, "DEFAULT cyl %d alt %d"
6182	    " hd %d sec %d", un->un_g.dkg_ncyl, un->un_g.dkg_acyl,
6183	    un->un_g.dkg_nhead, un->un_g.dkg_nsect);
6184
6185#else
6186#error "No VTOC format defined."
6187#endif
6188
6189	un->un_g.dkg_read_reinstruct  = 0;
6190	un->un_g.dkg_write_reinstruct = 0;
6191
6192	un->un_g.dkg_intrlv = 1;
6193
6194	un->un_vtoc.v_sanity  = VTOC_SANE;
6195
6196	un->un_f_geometry_is_valid = TRUE;
6197
6198	SD_INFO(SD_LOG_COMMON, un,
6199	    "sd_build_default_label: Default label created: "
6200	    "cyl: %d\tacyl: %d\tnhead: %d\tnsect: %d\tcap: %d\n",
6201	    un->un_g.dkg_ncyl, un->un_g.dkg_acyl, un->un_g.dkg_nhead,
6202	    un->un_g.dkg_nsect, un->un_blockcount);
6203}
6204
6205
6206#if defined(_FIRMWARE_NEEDS_FDISK)
6207/*
6208 * Max CHS values, as they are encoded into bytes, for 1022/254/63
6209 */
6210#define	LBA_MAX_SECT	(63 | ((1022 & 0x300) >> 2))
6211#define	LBA_MAX_CYL	(1022 & 0xFF)
6212#define	LBA_MAX_HEAD	(254)
6213
6214
6215/*
6216 *    Function: sd_has_max_chs_vals
6217 *
6218 * Description: Return TRUE if Cylinder-Head-Sector values are all at maximum.
6219 *
6220 *   Arguments: fdp - ptr to CHS info
6221 *
6222 * Return Code: True or false
6223 *
6224 *     Context: Any.
6225 */
6226
6227static int
6228sd_has_max_chs_vals(struct ipart *fdp)
6229{
6230	return ((fdp->begcyl  == LBA_MAX_CYL)	&&
6231	    (fdp->beghead == LBA_MAX_HEAD)	&&
6232	    (fdp->begsect == LBA_MAX_SECT)	&&
6233	    (fdp->endcyl  == LBA_MAX_CYL)	&&
6234	    (fdp->endhead == LBA_MAX_HEAD)	&&
6235	    (fdp->endsect == LBA_MAX_SECT));
6236}
6237#endif
6238
6239
6240/*
6241 *    Function: sd_inq_fill
6242 *
6243 * Description: Print a piece of inquiry data, cleaned up for non-printable
6244 *		characters and stopping at the first space character after
6245 *		the beginning of the passed string;
6246 *
6247 *   Arguments: p - source string
6248 *		l - maximum length to copy
6249 *		s - destination string
6250 *
6251 *     Context: Any.
6252 */
6253
6254static void
6255sd_inq_fill(char *p, int l, char *s)
6256{
6257	unsigned i = 0;
6258	char c;
6259
6260	while (i++ < l) {
6261		if ((c = *p++) < ' ' || c >= 0x7F) {
6262			c = '*';
6263		} else if (i != 1 && c == ' ') {
6264			break;
6265		}
6266		*s++ = c;
6267	}
6268	*s++ = 0;
6269}
6270
6271
6272/*
6273 *    Function: sd_register_devid
6274 *
6275 * Description: This routine will obtain the device id information from the
6276 *		target, obtain the serial number, and register the device
6277 *		id with the ddi framework.
6278 *
6279 *   Arguments: devi - the system's dev_info_t for the device.
6280 *		un - driver soft state (unit) structure
6281 *		reservation_flag - indicates if a reservation conflict
6282 *		occurred during attach
6283 *
6284 *     Context: Kernel Thread
6285 */
6286static void
6287sd_register_devid(struct sd_lun *un, dev_info_t *devi, int reservation_flag)
6288{
6289	int		rval		= 0;
6290	uchar_t		*inq80		= NULL;
6291	size_t		inq80_len	= MAX_INQUIRY_SIZE;
6292	size_t		inq80_resid	= 0;
6293	uchar_t		*inq83		= NULL;
6294	size_t		inq83_len	= MAX_INQUIRY_SIZE;
6295	size_t		inq83_resid	= 0;
6296
6297	ASSERT(un != NULL);
6298	ASSERT(mutex_owned(SD_MUTEX(un)));
6299	ASSERT((SD_DEVINFO(un)) == devi);
6300
6301	/*
6302	 * This is the case of antiquated Sun disk drives that have the
6303	 * FAB_DEVID property set in the disk_table.  These drives
6304	 * manage the devid's by storing them in last 2 available sectors
6305	 * on the drive and have them fabricated by the ddi layer by calling
6306	 * ddi_devid_init and passing the DEVID_FAB flag.
6307	 */
6308	if (un->un_f_opt_fab_devid == TRUE) {
6309		/*
6310		 * Depending on EINVAL isn't reliable, since a reserved disk
6311		 * may result in invalid geometry, so check to make sure a
6312		 * reservation conflict did not occur during attach.
6313		 */
6314		if ((sd_get_devid(un) == EINVAL) &&
6315		    (reservation_flag != SD_TARGET_IS_RESERVED)) {
6316			/*
6317			 * The devid is invalid AND there is no reservation
6318			 * conflict.  Fabricate a new devid.
6319			 */
6320			(void) sd_create_devid(un);
6321		}
6322
6323		/* Register the devid if it exists */
6324		if (un->un_devid != NULL) {
6325			(void) ddi_devid_register(SD_DEVINFO(un),
6326			    un->un_devid);
6327			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6328			    "sd_register_devid: Devid Fabricated\n");
6329		}
6330		return;
6331	}
6332
6333	/*
6334	 * We check the availibility of the World Wide Name (0x83) and Unit
6335	 * Serial Number (0x80) pages in sd_check_vpd_page_support(), and using
6336	 * un_vpd_page_mask from them, we decide which way to get the WWN.  If
6337	 * 0x83 is availible, that is the best choice.  Our next choice is
6338	 * 0x80.  If neither are availible, we munge the devid from the device
6339	 * vid/pid/serial # for Sun qualified disks, or use the ddi framework
6340	 * to fabricate a devid for non-Sun qualified disks.
6341	 */
6342	if (sd_check_vpd_page_support(un) == 0) {
6343		/* collect page 80 data if available */
6344		if (un->un_vpd_page_mask & SD_VPD_UNIT_SERIAL_PG) {
6345
6346			mutex_exit(SD_MUTEX(un));
6347			inq80 = kmem_zalloc(inq80_len, KM_SLEEP);
6348			rval = sd_send_scsi_INQUIRY(un, inq80, inq80_len,
6349			    0x01, 0x80, &inq80_resid);
6350
6351			if (rval != 0) {
6352				kmem_free(inq80, inq80_len);
6353				inq80 = NULL;
6354				inq80_len = 0;
6355			}
6356			mutex_enter(SD_MUTEX(un));
6357		}
6358
6359		/* collect page 83 data if available */
6360		if (un->un_vpd_page_mask & SD_VPD_DEVID_WWN_PG) {
6361			mutex_exit(SD_MUTEX(un));
6362			inq83 = kmem_zalloc(inq83_len, KM_SLEEP);
6363			rval = sd_send_scsi_INQUIRY(un, inq83, inq83_len,
6364			    0x01, 0x83, &inq83_resid);
6365
6366			if (rval != 0) {
6367				kmem_free(inq83, inq83_len);
6368				inq83 = NULL;
6369				inq83_len = 0;
6370			}
6371			mutex_enter(SD_MUTEX(un));
6372		}
6373	}
6374
6375	/* encode best devid possible based on data available */
6376	if (ddi_devid_scsi_encode(DEVID_SCSI_ENCODE_VERSION_LATEST,
6377	    (char *)ddi_driver_name(SD_DEVINFO(un)),
6378	    (uchar_t *)SD_INQUIRY(un), sizeof (*SD_INQUIRY(un)),
6379	    inq80, inq80_len - inq80_resid, inq83, inq83_len -
6380	    inq83_resid, &un->un_devid) == DDI_SUCCESS) {
6381
6382		/* devid successfully encoded, register devid */
6383		(void) ddi_devid_register(SD_DEVINFO(un), un->un_devid);
6384
6385	} else {
6386		/*
6387		 * Unable to encode a devid based on data available.
6388		 * This is not a Sun qualified disk.  Older Sun disk
6389		 * drives that have the SD_FAB_DEVID property
6390		 * set in the disk_table and non Sun qualified
6391		 * disks are treated in the same manner.  These
6392		 * drives manage the devid's by storing them in
6393		 * last 2 available sectors on the drive and
6394		 * have them fabricated by the ddi layer by
6395		 * calling ddi_devid_init and passing the
6396		 * DEVID_FAB flag.
6397		 * Create a fabricate devid only if there's no
6398		 * fabricate devid existed.
6399		 */
6400		if (sd_get_devid(un) == EINVAL) {
6401			(void) sd_create_devid(un);
6402		}
6403		un->un_f_opt_fab_devid = TRUE;
6404
6405		/* Register the devid if it exists */
6406		if (un->un_devid != NULL) {
6407			(void) ddi_devid_register(SD_DEVINFO(un),
6408			    un->un_devid);
6409			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6410			    "sd_register_devid: devid fabricated using "
6411			    "ddi framework\n");
6412		}
6413	}
6414
6415	/* clean up resources */
6416	if (inq80 != NULL) {
6417		kmem_free(inq80, inq80_len);
6418	}
6419	if (inq83 != NULL) {
6420		kmem_free(inq83, inq83_len);
6421	}
6422}
6423
6424static daddr_t
6425sd_get_devid_block(struct sd_lun *un)
6426{
6427	daddr_t			spc, blk, head, cyl;
6428
6429	if ((un->un_f_geometry_is_valid == FALSE) ||
6430	    (un->un_solaris_size < DK_LABEL_LOC))
6431		return (-1);
6432
6433	if (un->un_vtoc.v_sanity != VTOC_SANE) {
6434		/* EFI labeled */
6435		if (un->un_reserved != -1) {
6436			blk = un->un_map[un->un_reserved].dkl_cylno;
6437		} else {
6438			return (-1);
6439		}
6440	} else {
6441		/* SMI labeled */
6442		/* this geometry doesn't allow us to write a devid */
6443		if (un->un_g.dkg_acyl < 2) {
6444			return (-1);
6445		}
6446
6447		/*
6448		 * Subtract 2 guarantees that the next to last cylinder
6449		 * is used
6450		 */
6451		cyl  = un->un_g.dkg_ncyl  + un->un_g.dkg_acyl - 2;
6452		spc  = un->un_g.dkg_nhead * un->un_g.dkg_nsect;
6453		head = un->un_g.dkg_nhead - 1;
6454		blk  = (cyl * (spc - un->un_g.dkg_apc)) +
6455		    (head * un->un_g.dkg_nsect) + 1;
6456	}
6457	return (blk);
6458}
6459
6460/*
6461 *    Function: sd_get_devid
6462 *
6463 * Description: This routine will return 0 if a valid device id has been
6464 *		obtained from the target and stored in the soft state. If a
6465 *		valid device id has not been previously read and stored, a
6466 *		read attempt will be made.
6467 *
6468 *   Arguments: un - driver soft state (unit) structure
6469 *
6470 * Return Code: 0 if we successfully get the device id
6471 *
6472 *     Context: Kernel Thread
6473 */
6474
6475static int
6476sd_get_devid(struct sd_lun *un)
6477{
6478	struct dk_devid		*dkdevid;
6479	ddi_devid_t		tmpid;
6480	uint_t			*ip;
6481	size_t			sz;
6482	daddr_t			blk;
6483	int			status;
6484	int			chksum;
6485	int			i;
6486	size_t			buffer_size;
6487
6488	ASSERT(un != NULL);
6489	ASSERT(mutex_owned(SD_MUTEX(un)));
6490
6491	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: entry: un: 0x%p\n",
6492	    un);
6493
6494	if (un->un_devid != NULL) {
6495		return (0);
6496	}
6497
6498	blk = sd_get_devid_block(un);
6499	if (blk < 0)
6500		return (EINVAL);
6501
6502	/*
6503	 * Read and verify device id, stored in the reserved cylinders at the
6504	 * end of the disk. Backup label is on the odd sectors of the last
6505	 * track of the last cylinder. Device id will be on track of the next
6506	 * to last cylinder.
6507	 */
6508	buffer_size = SD_REQBYTES2TGTBYTES(un, sizeof (struct dk_devid));
6509	mutex_exit(SD_MUTEX(un));
6510	dkdevid = kmem_alloc(buffer_size, KM_SLEEP);
6511	status = sd_send_scsi_READ(un, dkdevid, buffer_size, blk,
6512	    SD_PATH_DIRECT);
6513	if (status != 0) {
6514		goto error;
6515	}
6516
6517	/* Validate the revision */
6518	if ((dkdevid->dkd_rev_hi != DK_DEVID_REV_MSB) ||
6519	    (dkdevid->dkd_rev_lo != DK_DEVID_REV_LSB)) {
6520		status = EINVAL;
6521		goto error;
6522	}
6523
6524	/* Calculate the checksum */
6525	chksum = 0;
6526	ip = (uint_t *)dkdevid;
6527	for (i = 0; i < ((un->un_sys_blocksize - sizeof (int))/sizeof (int));
6528	    i++) {
6529		chksum ^= ip[i];
6530	}
6531
6532	/* Compare the checksums */
6533	if (DKD_GETCHKSUM(dkdevid) != chksum) {
6534		status = EINVAL;
6535		goto error;
6536	}
6537
6538	/* Validate the device id */
6539	if (ddi_devid_valid((ddi_devid_t)&dkdevid->dkd_devid) != DDI_SUCCESS) {
6540		status = EINVAL;
6541		goto error;
6542	}
6543
6544	/*
6545	 * Store the device id in the driver soft state
6546	 */
6547	sz = ddi_devid_sizeof((ddi_devid_t)&dkdevid->dkd_devid);
6548	tmpid = kmem_alloc(sz, KM_SLEEP);
6549
6550	mutex_enter(SD_MUTEX(un));
6551
6552	un->un_devid = tmpid;
6553	bcopy(&dkdevid->dkd_devid, un->un_devid, sz);
6554
6555	kmem_free(dkdevid, buffer_size);
6556
6557	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: exit: un:0x%p\n", un);
6558
6559	return (status);
6560error:
6561	mutex_enter(SD_MUTEX(un));
6562	kmem_free(dkdevid, buffer_size);
6563	return (status);
6564}
6565
6566
6567/*
6568 *    Function: sd_create_devid
6569 *
6570 * Description: This routine will fabricate the device id and write it
6571 *		to the disk.
6572 *
6573 *   Arguments: un - driver soft state (unit) structure
6574 *
6575 * Return Code: value of the fabricated device id
6576 *
6577 *     Context: Kernel Thread
6578 */
6579
6580static ddi_devid_t
6581sd_create_devid(struct sd_lun *un)
6582{
6583	ASSERT(un != NULL);
6584
6585	/* Fabricate the devid */
6586	if (ddi_devid_init(SD_DEVINFO(un), DEVID_FAB, 0, NULL, &un->un_devid)
6587	    == DDI_FAILURE) {
6588		return (NULL);
6589	}
6590
6591	/* Write the devid to disk */
6592	if (sd_write_deviceid(un) != 0) {
6593		ddi_devid_free(un->un_devid);
6594		un->un_devid = NULL;
6595	}
6596
6597	return (un->un_devid);
6598}
6599
6600
6601/*
6602 *    Function: sd_write_deviceid
6603 *
6604 * Description: This routine will write the device id to the disk
6605 *		reserved sector.
6606 *
6607 *   Arguments: un - driver soft state (unit) structure
6608 *
6609 * Return Code: EINVAL
6610 *		value returned by sd_send_scsi_cmd
6611 *
6612 *     Context: Kernel Thread
6613 */
6614
6615static int
6616sd_write_deviceid(struct sd_lun *un)
6617{
6618	struct dk_devid		*dkdevid;
6619	daddr_t			blk;
6620	uint_t			*ip, chksum;
6621	int			status;
6622	int			i;
6623
6624	ASSERT(mutex_owned(SD_MUTEX(un)));
6625
6626	blk = sd_get_devid_block(un);
6627	if (blk < 0)
6628		return (-1);
6629	mutex_exit(SD_MUTEX(un));
6630
6631	/* Allocate the buffer */
6632	dkdevid = kmem_zalloc(un->un_sys_blocksize, KM_SLEEP);
6633
6634	/* Fill in the revision */
6635	dkdevid->dkd_rev_hi = DK_DEVID_REV_MSB;
6636	dkdevid->dkd_rev_lo = DK_DEVID_REV_LSB;
6637
6638	/* Copy in the device id */
6639	mutex_enter(SD_MUTEX(un));
6640	bcopy(un->un_devid, &dkdevid->dkd_devid,
6641	    ddi_devid_sizeof(un->un_devid));
6642	mutex_exit(SD_MUTEX(un));
6643
6644	/* Calculate the checksum */
6645	chksum = 0;
6646	ip = (uint_t *)dkdevid;
6647	for (i = 0; i < ((un->un_sys_blocksize - sizeof (int))/sizeof (int));
6648	    i++) {
6649		chksum ^= ip[i];
6650	}
6651
6652	/* Fill-in checksum */
6653	DKD_FORMCHKSUM(chksum, dkdevid);
6654
6655	/* Write the reserved sector */
6656	status = sd_send_scsi_WRITE(un, dkdevid, un->un_sys_blocksize, blk,
6657	    SD_PATH_DIRECT);
6658
6659	kmem_free(dkdevid, un->un_sys_blocksize);
6660
6661	mutex_enter(SD_MUTEX(un));
6662	return (status);
6663}
6664
6665
6666/*
6667 *    Function: sd_check_vpd_page_support
6668 *
6669 * Description: This routine sends an inquiry command with the EVPD bit set and
6670 *		a page code of 0x00 to the device. It is used to determine which
6671 *		vital product pages are availible to find the devid. We are
6672 *		looking for pages 0x83 or 0x80.  If we return a negative 1, the
6673 *		device does not support that command.
6674 *
6675 *   Arguments: un  - driver soft state (unit) structure
6676 *
6677 * Return Code: 0 - success
6678 *		1 - check condition
6679 *
6680 *     Context: This routine can sleep.
6681 */
6682
6683static int
6684sd_check_vpd_page_support(struct sd_lun *un)
6685{
6686	uchar_t	*page_list	= NULL;
6687	uchar_t	page_length	= 0xff;	/* Use max possible length */
6688	uchar_t	evpd		= 0x01;	/* Set the EVPD bit */
6689	uchar_t	page_code	= 0x00;	/* Supported VPD Pages */
6690	int    	rval		= 0;
6691	int	counter;
6692
6693	ASSERT(un != NULL);
6694	ASSERT(mutex_owned(SD_MUTEX(un)));
6695
6696	mutex_exit(SD_MUTEX(un));
6697
6698	/*
6699	 * We'll set the page length to the maximum to save figuring it out
6700	 * with an additional call.
6701	 */
6702	page_list =  kmem_zalloc(page_length, KM_SLEEP);
6703
6704	rval = sd_send_scsi_INQUIRY(un, page_list, page_length, evpd,
6705	    page_code, NULL);
6706
6707	mutex_enter(SD_MUTEX(un));
6708
6709	/*
6710	 * Now we must validate that the device accepted the command, as some
6711	 * drives do not support it.  If the drive does support it, we will
6712	 * return 0, and the supported pages will be in un_vpd_page_mask.  If
6713	 * not, we return -1.
6714	 */
6715	if ((rval == 0) && (page_list[VPD_MODE_PAGE] == 0x00)) {
6716		/* Loop to find one of the 2 pages we need */
6717		counter = 4;  /* Supported pages start at byte 4, with 0x00 */
6718
6719		/*
6720		 * Pages are returned in ascending order, and 0x83 is what we
6721		 * are hoping for.
6722		 */
6723		while ((page_list[counter] <= 0x83) &&
6724		    (counter <= (page_list[VPD_PAGE_LENGTH] +
6725		    VPD_HEAD_OFFSET))) {
6726			/*
6727			 * Add 3 because page_list[3] is the number of
6728			 * pages minus 3
6729			 */
6730
6731			switch (page_list[counter]) {
6732			case 0x00:
6733				un->un_vpd_page_mask |= SD_VPD_SUPPORTED_PG;
6734				break;
6735			case 0x80:
6736				un->un_vpd_page_mask |= SD_VPD_UNIT_SERIAL_PG;
6737				break;
6738			case 0x81:
6739				un->un_vpd_page_mask |= SD_VPD_OPERATING_PG;
6740				break;
6741			case 0x82:
6742				un->un_vpd_page_mask |= SD_VPD_ASCII_OP_PG;
6743				break;
6744			case 0x83:
6745				un->un_vpd_page_mask |= SD_VPD_DEVID_WWN_PG;
6746				break;
6747			}
6748			counter++;
6749		}
6750
6751	} else {
6752		rval = -1;
6753
6754		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6755		    "sd_check_vpd_page_support: This drive does not implement "
6756		    "VPD pages.\n");
6757	}
6758
6759	kmem_free(page_list, page_length);
6760
6761	return (rval);
6762}
6763
6764
6765/*
6766 *    Function: sd_setup_pm
6767 *
6768 * Description: Initialize Power Management on the device
6769 *
6770 *     Context: Kernel Thread
6771 */
6772
6773static void
6774sd_setup_pm(struct sd_lun *un, dev_info_t *devi)
6775{
6776	uint_t	log_page_size;
6777	uchar_t	*log_page_data;
6778	int	rval;
6779
6780	/*
6781	 * Since we are called from attach, holding a mutex for
6782	 * un is unnecessary. Because some of the routines called
6783	 * from here require SD_MUTEX to not be held, assert this
6784	 * right up front.
6785	 */
6786	ASSERT(!mutex_owned(SD_MUTEX(un)));
6787	/*
6788	 * Since the sd device does not have the 'reg' property,
6789	 * cpr will not call its DDI_SUSPEND/DDI_RESUME entries.
6790	 * The following code is to tell cpr that this device
6791	 * DOES need to be suspended and resumed.
6792	 */
6793	(void) ddi_prop_update_string(DDI_DEV_T_NONE, devi,
6794	    "pm-hardware-state", "needs-suspend-resume");
6795
6796	/*
6797	 * This complies with the new power management framework
6798	 * for certain desktop machines. Create the pm_components
6799	 * property as a string array property.
6800	 */
6801	if (un->un_f_pm_supported) {
6802		/*
6803		 * not all devices have a motor, try it first.
6804		 * some devices may return ILLEGAL REQUEST, some
6805		 * will hang
6806		 * The following START_STOP_UNIT is used to check if target
6807		 * device has a motor.
6808		 */
6809		un->un_f_start_stop_supported = TRUE;
6810		if (sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
6811		    SD_PATH_DIRECT) != 0) {
6812			un->un_f_start_stop_supported = FALSE;
6813		}
6814
6815		/*
6816		 * create pm properties anyways otherwise the parent can't
6817		 * go to sleep
6818		 */
6819		(void) sd_create_pm_components(devi, un);
6820		un->un_f_pm_is_enabled = TRUE;
6821		return;
6822	}
6823
6824	if (!un->un_f_log_sense_supported) {
6825		un->un_power_level = SD_SPINDLE_ON;
6826		un->un_f_pm_is_enabled = FALSE;
6827		return;
6828	}
6829
6830	rval = sd_log_page_supported(un, START_STOP_CYCLE_PAGE);
6831
6832#ifdef	SDDEBUG
6833	if (sd_force_pm_supported) {
6834		/* Force a successful result */
6835		rval = 1;
6836	}
6837#endif
6838
6839	/*
6840	 * If the start-stop cycle counter log page is not supported
6841	 * or if the pm-capable property is SD_PM_CAPABLE_FALSE (0)
6842	 * then we should not create the pm_components property.
6843	 */
6844	if (rval == -1) {
6845		/*
6846		 * Error.
6847		 * Reading log sense failed, most likely this is
6848		 * an older drive that does not support log sense.
6849		 * If this fails auto-pm is not supported.
6850		 */
6851		un->un_power_level = SD_SPINDLE_ON;
6852		un->un_f_pm_is_enabled = FALSE;
6853
6854	} else if (rval == 0) {
6855		/*
6856		 * Page not found.
6857		 * The start stop cycle counter is implemented as page
6858		 * START_STOP_CYCLE_PAGE_VU_PAGE (0x31) in older disks. For
6859		 * newer disks it is implemented as START_STOP_CYCLE_PAGE (0xE).
6860		 */
6861		if (sd_log_page_supported(un, START_STOP_CYCLE_VU_PAGE) == 1) {
6862			/*
6863			 * Page found, use this one.
6864			 */
6865			un->un_start_stop_cycle_page = START_STOP_CYCLE_VU_PAGE;
6866			un->un_f_pm_is_enabled = TRUE;
6867		} else {
6868			/*
6869			 * Error or page not found.
6870			 * auto-pm is not supported for this device.
6871			 */
6872			un->un_power_level = SD_SPINDLE_ON;
6873			un->un_f_pm_is_enabled = FALSE;
6874		}
6875	} else {
6876		/*
6877		 * Page found, use it.
6878		 */
6879		un->un_start_stop_cycle_page = START_STOP_CYCLE_PAGE;
6880		un->un_f_pm_is_enabled = TRUE;
6881	}
6882
6883
6884	if (un->un_f_pm_is_enabled == TRUE) {
6885		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
6886		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
6887
6888		rval = sd_send_scsi_LOG_SENSE(un, log_page_data,
6889		    log_page_size, un->un_start_stop_cycle_page,
6890		    0x01, 0, SD_PATH_DIRECT);
6891#ifdef	SDDEBUG
6892		if (sd_force_pm_supported) {
6893			/* Force a successful result */
6894			rval = 0;
6895		}
6896#endif
6897
6898		/*
6899		 * If the Log sense for Page( Start/stop cycle counter page)
6900		 * succeeds, then power managment is supported and we can
6901		 * enable auto-pm.
6902		 */
6903		if (rval == 0)  {
6904			(void) sd_create_pm_components(devi, un);
6905		} else {
6906			un->un_power_level = SD_SPINDLE_ON;
6907			un->un_f_pm_is_enabled = FALSE;
6908		}
6909
6910		kmem_free(log_page_data, log_page_size);
6911	}
6912}
6913
6914
6915/*
6916 *    Function: sd_create_pm_components
6917 *
6918 * Description: Initialize PM property.
6919 *
6920 *     Context: Kernel thread context
6921 */
6922
6923static void
6924sd_create_pm_components(dev_info_t *devi, struct sd_lun *un)
6925{
6926	char *pm_comp[] = { "NAME=spindle-motor", "0=off", "1=on", NULL };
6927
6928	ASSERT(!mutex_owned(SD_MUTEX(un)));
6929
6930	if (ddi_prop_update_string_array(DDI_DEV_T_NONE, devi,
6931	    "pm-components", pm_comp, 3) == DDI_PROP_SUCCESS) {
6932		/*
6933		 * When components are initially created they are idle,
6934		 * power up any non-removables.
6935		 * Note: the return value of pm_raise_power can't be used
6936		 * for determining if PM should be enabled for this device.
6937		 * Even if you check the return values and remove this
6938		 * property created above, the PM framework will not honor the
6939		 * change after the first call to pm_raise_power. Hence,
6940		 * removal of that property does not help if pm_raise_power
6941		 * fails. In the case of removable media, the start/stop
6942		 * will fail if the media is not present.
6943		 */
6944		if (un->un_f_attach_spinup && (pm_raise_power(SD_DEVINFO(un), 0,
6945		    SD_SPINDLE_ON) == DDI_SUCCESS)) {
6946			mutex_enter(SD_MUTEX(un));
6947			un->un_power_level = SD_SPINDLE_ON;
6948			mutex_enter(&un->un_pm_mutex);
6949			/* Set to on and not busy. */
6950			un->un_pm_count = 0;
6951		} else {
6952			mutex_enter(SD_MUTEX(un));
6953			un->un_power_level = SD_SPINDLE_OFF;
6954			mutex_enter(&un->un_pm_mutex);
6955			/* Set to off. */
6956			un->un_pm_count = -1;
6957		}
6958		mutex_exit(&un->un_pm_mutex);
6959		mutex_exit(SD_MUTEX(un));
6960	} else {
6961		un->un_power_level = SD_SPINDLE_ON;
6962		un->un_f_pm_is_enabled = FALSE;
6963	}
6964}
6965
6966
6967/*
6968 *    Function: sd_ddi_suspend
6969 *
6970 * Description: Performs system power-down operations. This includes
6971 *		setting the drive state to indicate its suspended so
6972 *		that no new commands will be accepted. Also, wait for
6973 *		all commands that are in transport or queued to a timer
6974 *		for retry to complete. All timeout threads are cancelled.
6975 *
6976 * Return Code: DDI_FAILURE or DDI_SUCCESS
6977 *
6978 *     Context: Kernel thread context
6979 */
6980
6981static int
6982sd_ddi_suspend(dev_info_t *devi)
6983{
6984	struct	sd_lun	*un;
6985	clock_t		wait_cmds_complete;
6986
6987	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
6988	if (un == NULL) {
6989		return (DDI_FAILURE);
6990	}
6991
6992	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: entry\n");
6993
6994	mutex_enter(SD_MUTEX(un));
6995
6996	/* Return success if the device is already suspended. */
6997	if (un->un_state == SD_STATE_SUSPENDED) {
6998		mutex_exit(SD_MUTEX(un));
6999		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
7000		    "device already suspended, exiting\n");
7001		return (DDI_SUCCESS);
7002	}
7003
7004	/* Return failure if the device is being used by HA */
7005	if (un->un_resvd_status &
7006	    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE)) {
7007		mutex_exit(SD_MUTEX(un));
7008		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
7009		    "device in use by HA, exiting\n");
7010		return (DDI_FAILURE);
7011	}
7012
7013	/*
7014	 * Return failure if the device is in a resource wait
7015	 * or power changing state.
7016	 */
7017	if ((un->un_state == SD_STATE_RWAIT) ||
7018	    (un->un_state == SD_STATE_PM_CHANGING)) {
7019		mutex_exit(SD_MUTEX(un));
7020		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
7021		    "device in resource wait state, exiting\n");
7022		return (DDI_FAILURE);
7023	}
7024
7025
7026	un->un_save_state = un->un_last_state;
7027	New_state(un, SD_STATE_SUSPENDED);
7028
7029	/*
7030	 * Wait for all commands that are in transport or queued to a timer
7031	 * for retry to complete.
7032	 *
7033	 * While waiting, no new commands will be accepted or sent because of
7034	 * the new state we set above.
7035	 *
7036	 * Wait till current operation has completed. If we are in the resource
7037	 * wait state (with an intr outstanding) then we need to wait till the
7038	 * intr completes and starts the next cmd. We want to wait for
7039	 * SD_WAIT_CMDS_COMPLETE seconds before failing the DDI_SUSPEND.
7040	 */
7041	wait_cmds_complete = ddi_get_lbolt() +
7042	    (sd_wait_cmds_complete * drv_usectohz(1000000));
7043
7044	while (un->un_ncmds_in_transport != 0) {
7045		/*
7046		 * Fail if commands do not finish in the specified time.
7047		 */
7048		if (cv_timedwait(&un->un_disk_busy_cv, SD_MUTEX(un),
7049		    wait_cmds_complete) == -1) {
7050			/*
7051			 * Undo the state changes made above. Everything
7052			 * must go back to it's original value.
7053			 */
7054			Restore_state(un);
7055			un->un_last_state = un->un_save_state;
7056			/* Wake up any threads that might be waiting. */
7057			cv_broadcast(&un->un_suspend_cv);
7058			mutex_exit(SD_MUTEX(un));
7059			SD_ERROR(SD_LOG_IO_PM, un,
7060			    "sd_ddi_suspend: failed due to outstanding cmds\n");
7061			SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exiting\n");
7062			return (DDI_FAILURE);
7063		}
7064	}
7065
7066	/*
7067	 * Cancel SCSI watch thread and timeouts, if any are active
7068	 */
7069
7070	if (SD_OK_TO_SUSPEND_SCSI_WATCHER(un)) {
7071		opaque_t temp_token = un->un_swr_token;
7072		mutex_exit(SD_MUTEX(un));
7073		scsi_watch_suspend(temp_token);
7074		mutex_enter(SD_MUTEX(un));
7075	}
7076
7077	if (un->un_reset_throttle_timeid != NULL) {
7078		timeout_id_t temp_id = un->un_reset_throttle_timeid;
7079		un->un_reset_throttle_timeid = NULL;
7080		mutex_exit(SD_MUTEX(un));
7081		(void) untimeout(temp_id);
7082		mutex_enter(SD_MUTEX(un));
7083	}
7084
7085	if (un->un_dcvb_timeid != NULL) {
7086		timeout_id_t temp_id = un->un_dcvb_timeid;
7087		un->un_dcvb_timeid = NULL;
7088		mutex_exit(SD_MUTEX(un));
7089		(void) untimeout(temp_id);
7090		mutex_enter(SD_MUTEX(un));
7091	}
7092
7093	mutex_enter(&un->un_pm_mutex);
7094	if (un->un_pm_timeid != NULL) {
7095		timeout_id_t temp_id = un->un_pm_timeid;
7096		un->un_pm_timeid = NULL;
7097		mutex_exit(&un->un_pm_mutex);
7098		mutex_exit(SD_MUTEX(un));
7099		(void) untimeout(temp_id);
7100		mutex_enter(SD_MUTEX(un));
7101	} else {
7102		mutex_exit(&un->un_pm_mutex);
7103	}
7104
7105	if (un->un_retry_timeid != NULL) {
7106		timeout_id_t temp_id = un->un_retry_timeid;
7107		un->un_retry_timeid = NULL;
7108		mutex_exit(SD_MUTEX(un));
7109		(void) untimeout(temp_id);
7110		mutex_enter(SD_MUTEX(un));
7111	}
7112
7113	if (un->un_direct_priority_timeid != NULL) {
7114		timeout_id_t temp_id = un->un_direct_priority_timeid;
7115		un->un_direct_priority_timeid = NULL;
7116		mutex_exit(SD_MUTEX(un));
7117		(void) untimeout(temp_id);
7118		mutex_enter(SD_MUTEX(un));
7119	}
7120
7121	if (un->un_f_is_fibre == TRUE) {
7122		/*
7123		 * Remove callbacks for insert and remove events
7124		 */
7125		if (un->un_insert_event != NULL) {
7126			mutex_exit(SD_MUTEX(un));
7127			(void) ddi_remove_event_handler(un->un_insert_cb_id);
7128			mutex_enter(SD_MUTEX(un));
7129			un->un_insert_event = NULL;
7130		}
7131
7132		if (un->un_remove_event != NULL) {
7133			mutex_exit(SD_MUTEX(un));
7134			(void) ddi_remove_event_handler(un->un_remove_cb_id);
7135			mutex_enter(SD_MUTEX(un));
7136			un->un_remove_event = NULL;
7137		}
7138	}
7139
7140	mutex_exit(SD_MUTEX(un));
7141
7142	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exit\n");
7143
7144	return (DDI_SUCCESS);
7145}
7146
7147
7148/*
7149 *    Function: sd_ddi_pm_suspend
7150 *
7151 * Description: Set the drive state to low power.
7152 *		Someone else is required to actually change the drive
7153 *		power level.
7154 *
7155 *   Arguments: un - driver soft state (unit) structure
7156 *
7157 * Return Code: DDI_FAILURE or DDI_SUCCESS
7158 *
7159 *     Context: Kernel thread context
7160 */
7161
7162static int
7163sd_ddi_pm_suspend(struct sd_lun *un)
7164{
7165	ASSERT(un != NULL);
7166	SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: entry\n");
7167
7168	ASSERT(!mutex_owned(SD_MUTEX(un)));
7169	mutex_enter(SD_MUTEX(un));
7170
7171	/*
7172	 * Exit if power management is not enabled for this device, or if
7173	 * the device is being used by HA.
7174	 */
7175	if ((un->un_f_pm_is_enabled == FALSE) || (un->un_resvd_status &
7176	    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE))) {
7177		mutex_exit(SD_MUTEX(un));
7178		SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: exiting\n");
7179		return (DDI_SUCCESS);
7180	}
7181
7182	SD_INFO(SD_LOG_POWER, un, "sd_ddi_pm_suspend: un_ncmds_in_driver=%ld\n",
7183	    un->un_ncmds_in_driver);
7184
7185	/*
7186	 * See if the device is not busy, ie.:
7187	 *    - we have no commands in the driver for this device
7188	 *    - not waiting for resources
7189	 */
7190	if ((un->un_ncmds_in_driver == 0) &&
7191	    (un->un_state != SD_STATE_RWAIT)) {
7192		/*
7193		 * The device is not busy, so it is OK to go to low power state.
7194		 * Indicate low power, but rely on someone else to actually
7195		 * change it.
7196		 */
7197		mutex_enter(&un->un_pm_mutex);
7198		un->un_pm_count = -1;
7199		mutex_exit(&un->un_pm_mutex);
7200		un->un_power_level = SD_SPINDLE_OFF;
7201	}
7202
7203	mutex_exit(SD_MUTEX(un));
7204
7205	SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: exit\n");
7206
7207	return (DDI_SUCCESS);
7208}
7209
7210
7211/*
7212 *    Function: sd_ddi_resume
7213 *
7214 * Description: Performs system power-up operations..
7215 *
7216 * Return Code: DDI_SUCCESS
7217 *		DDI_FAILURE
7218 *
7219 *     Context: Kernel thread context
7220 */
7221
7222static int
7223sd_ddi_resume(dev_info_t *devi)
7224{
7225	struct	sd_lun	*un;
7226
7227	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
7228	if (un == NULL) {
7229		return (DDI_FAILURE);
7230	}
7231
7232	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: entry\n");
7233
7234	mutex_enter(SD_MUTEX(un));
7235	Restore_state(un);
7236
7237	/*
7238	 * Restore the state which was saved to give the
7239	 * the right state in un_last_state
7240	 */
7241	un->un_last_state = un->un_save_state;
7242	/*
7243	 * Note: throttle comes back at full.
7244	 * Also note: this MUST be done before calling pm_raise_power
7245	 * otherwise the system can get hung in biowait. The scenario where
7246	 * this'll happen is under cpr suspend. Writing of the system
7247	 * state goes through sddump, which writes 0 to un_throttle. If
7248	 * writing the system state then fails, example if the partition is
7249	 * too small, then cpr attempts a resume. If throttle isn't restored
7250	 * from the saved value until after calling pm_raise_power then
7251	 * cmds sent in sdpower are not transported and sd_send_scsi_cmd hangs
7252	 * in biowait.
7253	 */
7254	un->un_throttle = un->un_saved_throttle;
7255
7256	/*
7257	 * The chance of failure is very rare as the only command done in power
7258	 * entry point is START command when you transition from 0->1 or
7259	 * unknown->1. Put it to SPINDLE ON state irrespective of the state at
7260	 * which suspend was done. Ignore the return value as the resume should
7261	 * not be failed. In the case of removable media the media need not be
7262	 * inserted and hence there is a chance that raise power will fail with
7263	 * media not present.
7264	 */
7265	if (un->un_f_attach_spinup) {
7266		mutex_exit(SD_MUTEX(un));
7267		(void) pm_raise_power(SD_DEVINFO(un), 0, SD_SPINDLE_ON);
7268		mutex_enter(SD_MUTEX(un));
7269	}
7270
7271	/*
7272	 * Don't broadcast to the suspend cv and therefore possibly
7273	 * start I/O until after power has been restored.
7274	 */
7275	cv_broadcast(&un->un_suspend_cv);
7276	cv_broadcast(&un->un_state_cv);
7277
7278	/* restart thread */
7279	if (SD_OK_TO_RESUME_SCSI_WATCHER(un)) {
7280		scsi_watch_resume(un->un_swr_token);
7281	}
7282
7283#if (defined(__fibre))
7284	if (un->un_f_is_fibre == TRUE) {
7285		/*
7286		 * Add callbacks for insert and remove events
7287		 */
7288		if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
7289			sd_init_event_callbacks(un);
7290		}
7291	}
7292#endif
7293
7294	/*
7295	 * Transport any pending commands to the target.
7296	 *
7297	 * If this is a low-activity device commands in queue will have to wait
7298	 * until new commands come in, which may take awhile. Also, we
7299	 * specifically don't check un_ncmds_in_transport because we know that
7300	 * there really are no commands in progress after the unit was
7301	 * suspended and we could have reached the throttle level, been
7302	 * suspended, and have no new commands coming in for awhile. Highly
7303	 * unlikely, but so is the low-activity disk scenario.
7304	 */
7305	ddi_xbuf_dispatch(un->un_xbuf_attr);
7306
7307	sd_start_cmds(un, NULL);
7308	mutex_exit(SD_MUTEX(un));
7309
7310	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: exit\n");
7311
7312	return (DDI_SUCCESS);
7313}
7314
7315
7316/*
7317 *    Function: sd_ddi_pm_resume
7318 *
7319 * Description: Set the drive state to powered on.
7320 *		Someone else is required to actually change the drive
7321 *		power level.
7322 *
7323 *   Arguments: un - driver soft state (unit) structure
7324 *
7325 * Return Code: DDI_SUCCESS
7326 *
7327 *     Context: Kernel thread context
7328 */
7329
7330static int
7331sd_ddi_pm_resume(struct sd_lun *un)
7332{
7333	ASSERT(un != NULL);
7334
7335	ASSERT(!mutex_owned(SD_MUTEX(un)));
7336	mutex_enter(SD_MUTEX(un));
7337	un->un_power_level = SD_SPINDLE_ON;
7338
7339	ASSERT(!mutex_owned(&un->un_pm_mutex));
7340	mutex_enter(&un->un_pm_mutex);
7341	if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
7342		un->un_pm_count++;
7343		ASSERT(un->un_pm_count == 0);
7344		/*
7345		 * Note: no longer do the cv_broadcast on un_suspend_cv. The
7346		 * un_suspend_cv is for a system resume, not a power management
7347		 * device resume. (4297749)
7348		 *	 cv_broadcast(&un->un_suspend_cv);
7349		 */
7350	}
7351	mutex_exit(&un->un_pm_mutex);
7352	mutex_exit(SD_MUTEX(un));
7353
7354	return (DDI_SUCCESS);
7355}
7356
7357
7358/*
7359 *    Function: sd_pm_idletimeout_handler
7360 *
7361 * Description: A timer routine that's active only while a device is busy.
7362 *		The purpose is to extend slightly the pm framework's busy
7363 *		view of the device to prevent busy/idle thrashing for
7364 *		back-to-back commands. Do this by comparing the current time
7365 *		to the time at which the last command completed and when the
7366 *		difference is greater than sd_pm_idletime, call
7367 *		pm_idle_component. In addition to indicating idle to the pm
7368 *		framework, update the chain type to again use the internal pm
7369 *		layers of the driver.
7370 *
7371 *   Arguments: arg - driver soft state (unit) structure
7372 *
7373 *     Context: Executes in a timeout(9F) thread context
7374 */
7375
7376static void
7377sd_pm_idletimeout_handler(void *arg)
7378{
7379	struct sd_lun *un = arg;
7380
7381	time_t	now;
7382
7383	mutex_enter(&sd_detach_mutex);
7384	if (un->un_detach_count != 0) {
7385		/* Abort if the instance is detaching */
7386		mutex_exit(&sd_detach_mutex);
7387		return;
7388	}
7389	mutex_exit(&sd_detach_mutex);
7390
7391	now = ddi_get_time();
7392	/*
7393	 * Grab both mutexes, in the proper order, since we're accessing
7394	 * both PM and softstate variables.
7395	 */
7396	mutex_enter(SD_MUTEX(un));
7397	mutex_enter(&un->un_pm_mutex);
7398	if (((now - un->un_pm_idle_time) > sd_pm_idletime) &&
7399	    (un->un_ncmds_in_driver == 0) && (un->un_pm_count == 0)) {
7400		/*
7401		 * Update the chain types.
7402		 * This takes affect on the next new command received.
7403		 */
7404		if (un->un_f_non_devbsize_supported) {
7405			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
7406		} else {
7407			un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
7408		}
7409		un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD;
7410
7411		SD_TRACE(SD_LOG_IO_PM, un,
7412		    "sd_pm_idletimeout_handler: idling device\n");
7413		(void) pm_idle_component(SD_DEVINFO(un), 0);
7414		un->un_pm_idle_timeid = NULL;
7415	} else {
7416		un->un_pm_idle_timeid =
7417			timeout(sd_pm_idletimeout_handler, un,
7418			(drv_usectohz((clock_t)300000))); /* 300 ms. */
7419	}
7420	mutex_exit(&un->un_pm_mutex);
7421	mutex_exit(SD_MUTEX(un));
7422}
7423
7424
7425/*
7426 *    Function: sd_pm_timeout_handler
7427 *
7428 * Description: Callback to tell framework we are idle.
7429 *
7430 *     Context: timeout(9f) thread context.
7431 */
7432
7433static void
7434sd_pm_timeout_handler(void *arg)
7435{
7436	struct sd_lun *un = arg;
7437
7438	(void) pm_idle_component(SD_DEVINFO(un), 0);
7439	mutex_enter(&un->un_pm_mutex);
7440	un->un_pm_timeid = NULL;
7441	mutex_exit(&un->un_pm_mutex);
7442}
7443
7444
7445/*
7446 *    Function: sdpower
7447 *
7448 * Description: PM entry point.
7449 *
7450 * Return Code: DDI_SUCCESS
7451 *		DDI_FAILURE
7452 *
7453 *     Context: Kernel thread context
7454 */
7455
7456static int
7457sdpower(dev_info_t *devi, int component, int level)
7458{
7459	struct sd_lun	*un;
7460	int		instance;
7461	int		rval = DDI_SUCCESS;
7462	uint_t		i, log_page_size, maxcycles, ncycles;
7463	uchar_t		*log_page_data;
7464	int		log_sense_page;
7465	int		medium_present;
7466	time_t		intvlp;
7467	dev_t		dev;
7468	struct pm_trans_data	sd_pm_tran_data;
7469	uchar_t		save_state;
7470	int		sval;
7471	uchar_t		state_before_pm;
7472	int		got_semaphore_here;
7473
7474	instance = ddi_get_instance(devi);
7475
7476	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
7477	    (SD_SPINDLE_OFF > level) || (level > SD_SPINDLE_ON) ||
7478	    component != 0) {
7479		return (DDI_FAILURE);
7480	}
7481
7482	dev = sd_make_device(SD_DEVINFO(un));
7483
7484	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: entry, level = %d\n", level);
7485
7486	/*
7487	 * Must synchronize power down with close.
7488	 * Attempt to decrement/acquire the open/close semaphore,
7489	 * but do NOT wait on it. If it's not greater than zero,
7490	 * ie. it can't be decremented without waiting, then
7491	 * someone else, either open or close, already has it
7492	 * and the try returns 0. Use that knowledge here to determine
7493	 * if it's OK to change the device power level.
7494	 * Also, only increment it on exit if it was decremented, ie. gotten,
7495	 * here.
7496	 */
7497	got_semaphore_here = sema_tryp(&un->un_semoclose);
7498
7499	mutex_enter(SD_MUTEX(un));
7500
7501	SD_INFO(SD_LOG_POWER, un, "sdpower: un_ncmds_in_driver = %ld\n",
7502	    un->un_ncmds_in_driver);
7503
7504	/*
7505	 * If un_ncmds_in_driver is non-zero it indicates commands are
7506	 * already being processed in the driver, or if the semaphore was
7507	 * not gotten here it indicates an open or close is being processed.
7508	 * At the same time somebody is requesting to go low power which
7509	 * can't happen, therefore we need to return failure.
7510	 */
7511	if ((level == SD_SPINDLE_OFF) &&
7512	    ((un->un_ncmds_in_driver != 0) || (got_semaphore_here == 0))) {
7513		mutex_exit(SD_MUTEX(un));
7514
7515		if (got_semaphore_here != 0) {
7516			sema_v(&un->un_semoclose);
7517		}
7518		SD_TRACE(SD_LOG_IO_PM, un,
7519		    "sdpower: exit, device has queued cmds.\n");
7520		return (DDI_FAILURE);
7521	}
7522
7523	/*
7524	 * if it is OFFLINE that means the disk is completely dead
7525	 * in our case we have to put the disk in on or off by sending commands
7526	 * Of course that will fail anyway so return back here.
7527	 *
7528	 * Power changes to a device that's OFFLINE or SUSPENDED
7529	 * are not allowed.
7530	 */
7531	if ((un->un_state == SD_STATE_OFFLINE) ||
7532	    (un->un_state == SD_STATE_SUSPENDED)) {
7533		mutex_exit(SD_MUTEX(un));
7534
7535		if (got_semaphore_here != 0) {
7536			sema_v(&un->un_semoclose);
7537		}
7538		SD_TRACE(SD_LOG_IO_PM, un,
7539		    "sdpower: exit, device is off-line.\n");
7540		return (DDI_FAILURE);
7541	}
7542
7543	/*
7544	 * Change the device's state to indicate it's power level
7545	 * is being changed. Do this to prevent a power off in the
7546	 * middle of commands, which is especially bad on devices
7547	 * that are really powered off instead of just spun down.
7548	 */
7549	state_before_pm = un->un_state;
7550	un->un_state = SD_STATE_PM_CHANGING;
7551
7552	mutex_exit(SD_MUTEX(un));
7553
7554	/*
7555	 * If "pm-capable" property is set to TRUE by HBA drivers,
7556	 * bypass the following checking, otherwise, check the log
7557	 * sense information for this device
7558	 */
7559	if ((level == SD_SPINDLE_OFF) && un->un_f_log_sense_supported) {
7560		/*
7561		 * Get the log sense information to understand whether the
7562		 * the powercycle counts have gone beyond the threshhold.
7563		 */
7564		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
7565		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
7566
7567		mutex_enter(SD_MUTEX(un));
7568		log_sense_page = un->un_start_stop_cycle_page;
7569		mutex_exit(SD_MUTEX(un));
7570
7571		rval = sd_send_scsi_LOG_SENSE(un, log_page_data,
7572		    log_page_size, log_sense_page, 0x01, 0, SD_PATH_DIRECT);
7573#ifdef	SDDEBUG
7574		if (sd_force_pm_supported) {
7575			/* Force a successful result */
7576			rval = 0;
7577		}
7578#endif
7579		if (rval != 0) {
7580			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
7581			    "Log Sense Failed\n");
7582			kmem_free(log_page_data, log_page_size);
7583			/* Cannot support power management on those drives */
7584
7585			if (got_semaphore_here != 0) {
7586				sema_v(&un->un_semoclose);
7587			}
7588			/*
7589			 * On exit put the state back to it's original value
7590			 * and broadcast to anyone waiting for the power
7591			 * change completion.
7592			 */
7593			mutex_enter(SD_MUTEX(un));
7594			un->un_state = state_before_pm;
7595			cv_broadcast(&un->un_suspend_cv);
7596			mutex_exit(SD_MUTEX(un));
7597			SD_TRACE(SD_LOG_IO_PM, un,
7598			    "sdpower: exit, Log Sense Failed.\n");
7599			return (DDI_FAILURE);
7600		}
7601
7602		/*
7603		 * From the page data - Convert the essential information to
7604		 * pm_trans_data
7605		 */
7606		maxcycles =
7607		    (log_page_data[0x1c] << 24) | (log_page_data[0x1d] << 16) |
7608		    (log_page_data[0x1E] << 8)  | log_page_data[0x1F];
7609
7610		sd_pm_tran_data.un.scsi_cycles.lifemax = maxcycles;
7611
7612		ncycles =
7613		    (log_page_data[0x24] << 24) | (log_page_data[0x25] << 16) |
7614		    (log_page_data[0x26] << 8)  | log_page_data[0x27];
7615
7616		sd_pm_tran_data.un.scsi_cycles.ncycles = ncycles;
7617
7618		for (i = 0; i < DC_SCSI_MFR_LEN; i++) {
7619			sd_pm_tran_data.un.scsi_cycles.svc_date[i] =
7620			    log_page_data[8+i];
7621		}
7622
7623		kmem_free(log_page_data, log_page_size);
7624
7625		/*
7626		 * Call pm_trans_check routine to get the Ok from
7627		 * the global policy
7628		 */
7629
7630		sd_pm_tran_data.format = DC_SCSI_FORMAT;
7631		sd_pm_tran_data.un.scsi_cycles.flag = 0;
7632
7633		rval = pm_trans_check(&sd_pm_tran_data, &intvlp);
7634#ifdef	SDDEBUG
7635		if (sd_force_pm_supported) {
7636			/* Force a successful result */
7637			rval = 1;
7638		}
7639#endif
7640		switch (rval) {
7641		case 0:
7642			/*
7643			 * Not Ok to Power cycle or error in parameters passed
7644			 * Would have given the advised time to consider power
7645			 * cycle. Based on the new intvlp parameter we are
7646			 * supposed to pretend we are busy so that pm framework
7647			 * will never call our power entry point. Because of
7648			 * that install a timeout handler and wait for the
7649			 * recommended time to elapse so that power management
7650			 * can be effective again.
7651			 *
7652			 * To effect this behavior, call pm_busy_component to
7653			 * indicate to the framework this device is busy.
7654			 * By not adjusting un_pm_count the rest of PM in
7655			 * the driver will function normally, and independant
7656			 * of this but because the framework is told the device
7657			 * is busy it won't attempt powering down until it gets
7658			 * a matching idle. The timeout handler sends this.
7659			 * Note: sd_pm_entry can't be called here to do this
7660			 * because sdpower may have been called as a result
7661			 * of a call to pm_raise_power from within sd_pm_entry.
7662			 *
7663			 * If a timeout handler is already active then
7664			 * don't install another.
7665			 */
7666			mutex_enter(&un->un_pm_mutex);
7667			if (un->un_pm_timeid == NULL) {
7668				un->un_pm_timeid =
7669				    timeout(sd_pm_timeout_handler,
7670				    un, intvlp * drv_usectohz(1000000));
7671				mutex_exit(&un->un_pm_mutex);
7672				(void) pm_busy_component(SD_DEVINFO(un), 0);
7673			} else {
7674				mutex_exit(&un->un_pm_mutex);
7675			}
7676			if (got_semaphore_here != 0) {
7677				sema_v(&un->un_semoclose);
7678			}
7679			/*
7680			 * On exit put the state back to it's original value
7681			 * and broadcast to anyone waiting for the power
7682			 * change completion.
7683			 */
7684			mutex_enter(SD_MUTEX(un));
7685			un->un_state = state_before_pm;
7686			cv_broadcast(&un->un_suspend_cv);
7687			mutex_exit(SD_MUTEX(un));
7688
7689			SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, "
7690			    "trans check Failed, not ok to power cycle.\n");
7691			return (DDI_FAILURE);
7692
7693		case -1:
7694			if (got_semaphore_here != 0) {
7695				sema_v(&un->un_semoclose);
7696			}
7697			/*
7698			 * On exit put the state back to it's original value
7699			 * and broadcast to anyone waiting for the power
7700			 * change completion.
7701			 */
7702			mutex_enter(SD_MUTEX(un));
7703			un->un_state = state_before_pm;
7704			cv_broadcast(&un->un_suspend_cv);
7705			mutex_exit(SD_MUTEX(un));
7706			SD_TRACE(SD_LOG_IO_PM, un,
7707			    "sdpower: exit, trans check command Failed.\n");
7708			return (DDI_FAILURE);
7709		}
7710	}
7711
7712	if (level == SD_SPINDLE_OFF) {
7713		/*
7714		 * Save the last state... if the STOP FAILS we need it
7715		 * for restoring
7716		 */
7717		mutex_enter(SD_MUTEX(un));
7718		save_state = un->un_last_state;
7719		/*
7720		 * There must not be any cmds. getting processed
7721		 * in the driver when we get here. Power to the
7722		 * device is potentially going off.
7723		 */
7724		ASSERT(un->un_ncmds_in_driver == 0);
7725		mutex_exit(SD_MUTEX(un));
7726
7727		/*
7728		 * For now suspend the device completely before spindle is
7729		 * turned off
7730		 */
7731		if ((rval = sd_ddi_pm_suspend(un)) == DDI_FAILURE) {
7732			if (got_semaphore_here != 0) {
7733				sema_v(&un->un_semoclose);
7734			}
7735			/*
7736			 * On exit put the state back to it's original value
7737			 * and broadcast to anyone waiting for the power
7738			 * change completion.
7739			 */
7740			mutex_enter(SD_MUTEX(un));
7741			un->un_state = state_before_pm;
7742			cv_broadcast(&un->un_suspend_cv);
7743			mutex_exit(SD_MUTEX(un));
7744			SD_TRACE(SD_LOG_IO_PM, un,
7745			    "sdpower: exit, PM suspend Failed.\n");
7746			return (DDI_FAILURE);
7747		}
7748	}
7749
7750	/*
7751	 * The transition from SPINDLE_OFF to SPINDLE_ON can happen in open,
7752	 * close, or strategy. Dump no long uses this routine, it uses it's
7753	 * own code so it can be done in polled mode.
7754	 */
7755
7756	medium_present = TRUE;
7757
7758	/*
7759	 * When powering up, issue a TUR in case the device is at unit
7760	 * attention.  Don't do retries. Bypass the PM layer, otherwise
7761	 * a deadlock on un_pm_busy_cv will occur.
7762	 */
7763	if (level == SD_SPINDLE_ON) {
7764		(void) sd_send_scsi_TEST_UNIT_READY(un,
7765		    SD_DONT_RETRY_TUR | SD_BYPASS_PM);
7766	}
7767
7768	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: sending \'%s\' unit\n",
7769	    ((level == SD_SPINDLE_ON) ? "START" : "STOP"));
7770
7771	sval = sd_send_scsi_START_STOP_UNIT(un,
7772	    ((level == SD_SPINDLE_ON) ? SD_TARGET_START : SD_TARGET_STOP),
7773	    SD_PATH_DIRECT);
7774	/* Command failed, check for media present. */
7775	if ((sval == ENXIO) && un->un_f_has_removable_media) {
7776		medium_present = FALSE;
7777	}
7778
7779	/*
7780	 * The conditions of interest here are:
7781	 *   if a spindle off with media present fails,
7782	 *	then restore the state and return an error.
7783	 *   else if a spindle on fails,
7784	 *	then return an error (there's no state to restore).
7785	 * In all other cases we setup for the new state
7786	 * and return success.
7787	 */
7788	switch (level) {
7789	case SD_SPINDLE_OFF:
7790		if ((medium_present == TRUE) && (sval != 0)) {
7791			/* The stop command from above failed */
7792			rval = DDI_FAILURE;
7793			/*
7794			 * The stop command failed, and we have media
7795			 * present. Put the level back by calling the
7796			 * sd_pm_resume() and set the state back to
7797			 * it's previous value.
7798			 */
7799			(void) sd_ddi_pm_resume(un);
7800			mutex_enter(SD_MUTEX(un));
7801			un->un_last_state = save_state;
7802			mutex_exit(SD_MUTEX(un));
7803			break;
7804		}
7805		/*
7806		 * The stop command from above succeeded.
7807		 */
7808		if (un->un_f_monitor_media_state) {
7809			/*
7810			 * Terminate watch thread in case of removable media
7811			 * devices going into low power state. This is as per
7812			 * the requirements of pm framework, otherwise commands
7813			 * will be generated for the device (through watch
7814			 * thread), even when the device is in low power state.
7815			 */
7816			mutex_enter(SD_MUTEX(un));
7817			un->un_f_watcht_stopped = FALSE;
7818			if (un->un_swr_token != NULL) {
7819				opaque_t temp_token = un->un_swr_token;
7820				un->un_f_watcht_stopped = TRUE;
7821				un->un_swr_token = NULL;
7822				mutex_exit(SD_MUTEX(un));
7823				(void) scsi_watch_request_terminate(temp_token,
7824				    SCSI_WATCH_TERMINATE_WAIT);
7825			} else {
7826				mutex_exit(SD_MUTEX(un));
7827			}
7828		}
7829		break;
7830
7831	default:	/* The level requested is spindle on... */
7832		/*
7833		 * Legacy behavior: return success on a failed spinup
7834		 * if there is no media in the drive.
7835		 * Do this by looking at medium_present here.
7836		 */
7837		if ((sval != 0) && medium_present) {
7838			/* The start command from above failed */
7839			rval = DDI_FAILURE;
7840			break;
7841		}
7842		/*
7843		 * The start command from above succeeded
7844		 * Resume the devices now that we have
7845		 * started the disks
7846		 */
7847		(void) sd_ddi_pm_resume(un);
7848
7849		/*
7850		 * Resume the watch thread since it was suspended
7851		 * when the device went into low power mode.
7852		 */
7853		if (un->un_f_monitor_media_state) {
7854			mutex_enter(SD_MUTEX(un));
7855			if (un->un_f_watcht_stopped == TRUE) {
7856				opaque_t temp_token;
7857
7858				un->un_f_watcht_stopped = FALSE;
7859				mutex_exit(SD_MUTEX(un));
7860				temp_token = scsi_watch_request_submit(
7861				    SD_SCSI_DEVP(un),
7862				    sd_check_media_time,
7863				    SENSE_LENGTH, sd_media_watch_cb,
7864				    (caddr_t)dev);
7865				mutex_enter(SD_MUTEX(un));
7866				un->un_swr_token = temp_token;
7867			}
7868			mutex_exit(SD_MUTEX(un));
7869		}
7870	}
7871	if (got_semaphore_here != 0) {
7872		sema_v(&un->un_semoclose);
7873	}
7874	/*
7875	 * On exit put the state back to it's original value
7876	 * and broadcast to anyone waiting for the power
7877	 * change completion.
7878	 */
7879	mutex_enter(SD_MUTEX(un));
7880	un->un_state = state_before_pm;
7881	cv_broadcast(&un->un_suspend_cv);
7882	mutex_exit(SD_MUTEX(un));
7883
7884	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, status = 0x%x\n", rval);
7885
7886	return (rval);
7887}
7888
7889
7890
7891/*
7892 *    Function: sdattach
7893 *
7894 * Description: Driver's attach(9e) entry point function.
7895 *
7896 *   Arguments: devi - opaque device info handle
7897 *		cmd  - attach  type
7898 *
7899 * Return Code: DDI_SUCCESS
7900 *		DDI_FAILURE
7901 *
7902 *     Context: Kernel thread context
7903 */
7904
7905static int
7906sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd)
7907{
7908	switch (cmd) {
7909	case DDI_ATTACH:
7910		return (sd_unit_attach(devi));
7911	case DDI_RESUME:
7912		return (sd_ddi_resume(devi));
7913	default:
7914		break;
7915	}
7916	return (DDI_FAILURE);
7917}
7918
7919
7920/*
7921 *    Function: sddetach
7922 *
7923 * Description: Driver's detach(9E) entry point function.
7924 *
7925 *   Arguments: devi - opaque device info handle
7926 *		cmd  - detach  type
7927 *
7928 * Return Code: DDI_SUCCESS
7929 *		DDI_FAILURE
7930 *
7931 *     Context: Kernel thread context
7932 */
7933
7934static int
7935sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd)
7936{
7937	switch (cmd) {
7938	case DDI_DETACH:
7939		return (sd_unit_detach(devi));
7940	case DDI_SUSPEND:
7941		return (sd_ddi_suspend(devi));
7942	default:
7943		break;
7944	}
7945	return (DDI_FAILURE);
7946}
7947
7948
7949/*
7950 *     Function: sd_sync_with_callback
7951 *
7952 *  Description: Prevents sd_unit_attach or sd_unit_detach from freeing the soft
7953 *		 state while the callback routine is active.
7954 *
7955 *    Arguments: un: softstate structure for the instance
7956 *
7957 *	Context: Kernel thread context
7958 */
7959
7960static void
7961sd_sync_with_callback(struct sd_lun *un)
7962{
7963	ASSERT(un != NULL);
7964
7965	mutex_enter(SD_MUTEX(un));
7966
7967	ASSERT(un->un_in_callback >= 0);
7968
7969	while (un->un_in_callback > 0) {
7970		mutex_exit(SD_MUTEX(un));
7971		delay(2);
7972		mutex_enter(SD_MUTEX(un));
7973	}
7974
7975	mutex_exit(SD_MUTEX(un));
7976}
7977
7978/*
7979 *    Function: sd_unit_attach
7980 *
7981 * Description: Performs DDI_ATTACH processing for sdattach(). Allocates
7982 *		the soft state structure for the device and performs
7983 *		all necessary structure and device initializations.
7984 *
7985 *   Arguments: devi: the system's dev_info_t for the device.
7986 *
7987 * Return Code: DDI_SUCCESS if attach is successful.
7988 *		DDI_FAILURE if any part of the attach fails.
7989 *
7990 *     Context: Called at attach(9e) time for the DDI_ATTACH flag.
7991 *		Kernel thread context only.  Can sleep.
7992 */
7993
7994static int
7995sd_unit_attach(dev_info_t *devi)
7996{
7997	struct	scsi_device	*devp;
7998	struct	sd_lun		*un;
7999	char			*variantp;
8000	int	reservation_flag = SD_TARGET_IS_UNRESERVED;
8001	int	instance;
8002	int	rval;
8003	int	wc_enabled;
8004	int	tgt;
8005	uint64_t	capacity;
8006	uint_t		lbasize;
8007	dev_info_t	*pdip = ddi_get_parent(devi);
8008
8009	/*
8010	 * Retrieve the target driver's private data area. This was set
8011	 * up by the HBA.
8012	 */
8013	devp = ddi_get_driver_private(devi);
8014
8015	/*
8016	 * Retrieve the target ID of the device.
8017	 */
8018	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
8019	    SCSI_ADDR_PROP_TARGET, -1);
8020
8021	/*
8022	 * Since we have no idea what state things were left in by the last
8023	 * user of the device, set up some 'default' settings, ie. turn 'em
8024	 * off. The scsi_ifsetcap calls force re-negotiations with the drive.
8025	 * Do this before the scsi_probe, which sends an inquiry.
8026	 * This is a fix for bug (4430280).
8027	 * Of special importance is wide-xfer. The drive could have been left
8028	 * in wide transfer mode by the last driver to communicate with it,
8029	 * this includes us. If that's the case, and if the following is not
8030	 * setup properly or we don't re-negotiate with the drive prior to
8031	 * transferring data to/from the drive, it causes bus parity errors,
8032	 * data overruns, and unexpected interrupts. This first occurred when
8033	 * the fix for bug (4378686) was made.
8034	 */
8035	(void) scsi_ifsetcap(&devp->sd_address, "lun-reset", 0, 1);
8036	(void) scsi_ifsetcap(&devp->sd_address, "wide-xfer", 0, 1);
8037	(void) scsi_ifsetcap(&devp->sd_address, "auto-rqsense", 0, 1);
8038
8039	/*
8040	 * Currently, scsi_ifsetcap sets tagged-qing capability for all LUNs
8041	 * on a target. Setting it per lun instance actually sets the
8042	 * capability of this target, which affects those luns already
8043	 * attached on the same target. So during attach, we can only disable
8044	 * this capability only when no other lun has been attached on this
8045	 * target. By doing this, we assume a target has the same tagged-qing
8046	 * capability for every lun. The condition can be removed when HBA
8047	 * is changed to support per lun based tagged-qing capability.
8048	 */
8049	if (sd_scsi_get_target_lun_count(pdip, tgt) < 1) {
8050		(void) scsi_ifsetcap(&devp->sd_address, "tagged-qing", 0, 1);
8051	}
8052
8053	/*
8054	 * Use scsi_probe() to issue an INQUIRY command to the device.
8055	 * This call will allocate and fill in the scsi_inquiry structure
8056	 * and point the sd_inq member of the scsi_device structure to it.
8057	 * If the attach succeeds, then this memory will not be de-allocated
8058	 * (via scsi_unprobe()) until the instance is detached.
8059	 */
8060	if (scsi_probe(devp, SLEEP_FUNC) != SCSIPROBE_EXISTS) {
8061		goto probe_failed;
8062	}
8063
8064	/*
8065	 * Check the device type as specified in the inquiry data and
8066	 * claim it if it is of a type that we support.
8067	 */
8068	switch (devp->sd_inq->inq_dtype) {
8069	case DTYPE_DIRECT:
8070		break;
8071	case DTYPE_RODIRECT:
8072		break;
8073	case DTYPE_OPTICAL:
8074		break;
8075	case DTYPE_NOTPRESENT:
8076	default:
8077		/* Unsupported device type; fail the attach. */
8078		goto probe_failed;
8079	}
8080
8081	/*
8082	 * Allocate the soft state structure for this unit.
8083	 *
8084	 * We rely upon this memory being set to all zeroes by
8085	 * ddi_soft_state_zalloc().  We assume that any member of the
8086	 * soft state structure that is not explicitly initialized by
8087	 * this routine will have a value of zero.
8088	 */
8089	instance = ddi_get_instance(devp->sd_dev);
8090	if (ddi_soft_state_zalloc(sd_state, instance) != DDI_SUCCESS) {
8091		goto probe_failed;
8092	}
8093
8094	/*
8095	 * Retrieve a pointer to the newly-allocated soft state.
8096	 *
8097	 * This should NEVER fail if the ddi_soft_state_zalloc() call above
8098	 * was successful, unless something has gone horribly wrong and the
8099	 * ddi's soft state internals are corrupt (in which case it is
8100	 * probably better to halt here than just fail the attach....)
8101	 */
8102	if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
8103		panic("sd_unit_attach: NULL soft state on instance:0x%x",
8104		    instance);
8105		/*NOTREACHED*/
8106	}
8107
8108	/*
8109	 * Link the back ptr of the driver soft state to the scsi_device
8110	 * struct for this lun.
8111	 * Save a pointer to the softstate in the driver-private area of
8112	 * the scsi_device struct.
8113	 * Note: We cannot call SD_INFO, SD_TRACE, SD_ERROR, or SD_DIAG until
8114	 * we first set un->un_sd below.
8115	 */
8116	un->un_sd = devp;
8117	devp->sd_private = (opaque_t)un;
8118
8119	/*
8120	 * The following must be after devp is stored in the soft state struct.
8121	 */
8122#ifdef SDDEBUG
8123	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8124	    "%s_unit_attach: un:0x%p instance:%d\n",
8125	    ddi_driver_name(devi), un, instance);
8126#endif
8127
8128	/*
8129	 * Set up the device type and node type (for the minor nodes).
8130	 * By default we assume that the device can at least support the
8131	 * Common Command Set. Call it a CD-ROM if it reports itself
8132	 * as a RODIRECT device.
8133	 */
8134	switch (devp->sd_inq->inq_dtype) {
8135	case DTYPE_RODIRECT:
8136		un->un_node_type = DDI_NT_CD_CHAN;
8137		un->un_ctype	 = CTYPE_CDROM;
8138		break;
8139	case DTYPE_OPTICAL:
8140		un->un_node_type = DDI_NT_BLOCK_CHAN;
8141		un->un_ctype	 = CTYPE_ROD;
8142		break;
8143	default:
8144		un->un_node_type = DDI_NT_BLOCK_CHAN;
8145		un->un_ctype	 = CTYPE_CCS;
8146		break;
8147	}
8148
8149	/*
8150	 * Try to read the interconnect type from the HBA.
8151	 *
8152	 * Note: This driver is currently compiled as two binaries, a parallel
8153	 * scsi version (sd) and a fibre channel version (ssd). All functional
8154	 * differences are determined at compile time. In the future a single
8155	 * binary will be provided and the inteconnect type will be used to
8156	 * differentiate between fibre and parallel scsi behaviors. At that time
8157	 * it will be necessary for all fibre channel HBAs to support this
8158	 * property.
8159	 *
8160	 * set un_f_is_fiber to TRUE ( default fiber )
8161	 */
8162	un->un_f_is_fibre = TRUE;
8163	switch (scsi_ifgetcap(SD_ADDRESS(un), "interconnect-type", -1)) {
8164	case INTERCONNECT_SSA:
8165		un->un_interconnect_type = SD_INTERCONNECT_SSA;
8166		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8167		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SSA\n", un);
8168		break;
8169	case INTERCONNECT_PARALLEL:
8170		un->un_f_is_fibre = FALSE;
8171		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
8172		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8173		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_PARALLEL\n", un);
8174		break;
8175	case INTERCONNECT_SATA:
8176		un->un_f_is_fibre = FALSE;
8177		un->un_interconnect_type = SD_INTERCONNECT_SATA;
8178		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8179		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SATA\n", un);
8180		break;
8181	case INTERCONNECT_FIBRE:
8182		un->un_interconnect_type = SD_INTERCONNECT_FIBRE;
8183		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8184		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FIBRE\n", un);
8185		break;
8186	case INTERCONNECT_FABRIC:
8187		un->un_interconnect_type = SD_INTERCONNECT_FABRIC;
8188		un->un_node_type = DDI_NT_BLOCK_FABRIC;
8189		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8190		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FABRIC\n", un);
8191		break;
8192	default:
8193#ifdef SD_DEFAULT_INTERCONNECT_TYPE
8194		/*
8195		 * The HBA does not support the "interconnect-type" property
8196		 * (or did not provide a recognized type).
8197		 *
8198		 * Note: This will be obsoleted when a single fibre channel
8199		 * and parallel scsi driver is delivered. In the meantime the
8200		 * interconnect type will be set to the platform default.If that
8201		 * type is not parallel SCSI, it means that we should be
8202		 * assuming "ssd" semantics. However, here this also means that
8203		 * the FC HBA is not supporting the "interconnect-type" property
8204		 * like we expect it to, so log this occurrence.
8205		 */
8206		un->un_interconnect_type = SD_DEFAULT_INTERCONNECT_TYPE;
8207		if (!SD_IS_PARALLEL_SCSI(un)) {
8208			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8209			    "sd_unit_attach: un:0x%p Assuming "
8210			    "INTERCONNECT_FIBRE\n", un);
8211		} else {
8212			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8213			    "sd_unit_attach: un:0x%p Assuming "
8214			    "INTERCONNECT_PARALLEL\n", un);
8215			un->un_f_is_fibre = FALSE;
8216		}
8217#else
8218		/*
8219		 * Note: This source will be implemented when a single fibre
8220		 * channel and parallel scsi driver is delivered. The default
8221		 * will be to assume that if a device does not support the
8222		 * "interconnect-type" property it is a parallel SCSI HBA and
8223		 * we will set the interconnect type for parallel scsi.
8224		 */
8225		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
8226		un->un_f_is_fibre = FALSE;
8227#endif
8228		break;
8229	}
8230
8231	if (un->un_f_is_fibre == TRUE) {
8232		if (scsi_ifgetcap(SD_ADDRESS(un), "scsi-version", 1) ==
8233			SCSI_VERSION_3) {
8234			switch (un->un_interconnect_type) {
8235			case SD_INTERCONNECT_FIBRE:
8236			case SD_INTERCONNECT_SSA:
8237				un->un_node_type = DDI_NT_BLOCK_WWN;
8238				break;
8239			default:
8240				break;
8241			}
8242		}
8243	}
8244
8245	/*
8246	 * Initialize the Request Sense command for the target
8247	 */
8248	if (sd_alloc_rqs(devp, un) != DDI_SUCCESS) {
8249		goto alloc_rqs_failed;
8250	}
8251
8252	/*
8253	 * Set un_retry_count with SD_RETRY_COUNT, this is ok for Sparc
8254	 * with seperate binary for sd and ssd.
8255	 *
8256	 * x86 has 1 binary, un_retry_count is set base on connection type.
8257	 * The hardcoded values will go away when Sparc uses 1 binary
8258	 * for sd and ssd.  This hardcoded values need to match
8259	 * SD_RETRY_COUNT in sddef.h
8260	 * The value used is base on interconnect type.
8261	 * fibre = 3, parallel = 5
8262	 */
8263#if defined(__i386) || defined(__amd64)
8264	un->un_retry_count = un->un_f_is_fibre ? 3 : 5;
8265#else
8266	un->un_retry_count = SD_RETRY_COUNT;
8267#endif
8268
8269	/*
8270	 * Set the per disk retry count to the default number of retries
8271	 * for disks and CDROMs. This value can be overridden by the
8272	 * disk property list or an entry in sd.conf.
8273	 */
8274	un->un_notready_retry_count =
8275	    ISCD(un) ? CD_NOT_READY_RETRY_COUNT(un)
8276			: DISK_NOT_READY_RETRY_COUNT(un);
8277
8278	/*
8279	 * Set the busy retry count to the default value of un_retry_count.
8280	 * This can be overridden by entries in sd.conf or the device
8281	 * config table.
8282	 */
8283	un->un_busy_retry_count = un->un_retry_count;
8284
8285	/*
8286	 * Init the reset threshold for retries.  This number determines
8287	 * how many retries must be performed before a reset can be issued
8288	 * (for certain error conditions). This can be overridden by entries
8289	 * in sd.conf or the device config table.
8290	 */
8291	un->un_reset_retry_count = (un->un_retry_count / 2);
8292
8293	/*
8294	 * Set the victim_retry_count to the default un_retry_count
8295	 */
8296	un->un_victim_retry_count = (2 * un->un_retry_count);
8297
8298	/*
8299	 * Set the reservation release timeout to the default value of
8300	 * 5 seconds. This can be overridden by entries in ssd.conf or the
8301	 * device config table.
8302	 */
8303	un->un_reserve_release_time = 5;
8304
8305	/*
8306	 * Set up the default maximum transfer size. Note that this may
8307	 * get updated later in the attach, when setting up default wide
8308	 * operations for disks.
8309	 */
8310#if defined(__i386) || defined(__amd64)
8311	un->un_max_xfer_size = (uint_t)SD_DEFAULT_MAX_XFER_SIZE;
8312#else
8313	un->un_max_xfer_size = (uint_t)maxphys;
8314#endif
8315
8316	/*
8317	 * Get "allow bus device reset" property (defaults to "enabled" if
8318	 * the property was not defined). This is to disable bus resets for
8319	 * certain kinds of error recovery. Note: In the future when a run-time
8320	 * fibre check is available the soft state flag should default to
8321	 * enabled.
8322	 */
8323	if (un->un_f_is_fibre == TRUE) {
8324		un->un_f_allow_bus_device_reset = TRUE;
8325	} else {
8326		if (ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
8327			"allow-bus-device-reset", 1) != 0) {
8328			un->un_f_allow_bus_device_reset = TRUE;
8329			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8330			"sd_unit_attach: un:0x%p Bus device reset enabled\n",
8331				un);
8332		} else {
8333			un->un_f_allow_bus_device_reset = FALSE;
8334			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8335			"sd_unit_attach: un:0x%p Bus device reset disabled\n",
8336				un);
8337		}
8338	}
8339
8340	/*
8341	 * Check if this is an ATAPI device. ATAPI devices use Group 1
8342	 * Read/Write commands and Group 2 Mode Sense/Select commands.
8343	 *
8344	 * Note: The "obsolete" way of doing this is to check for the "atapi"
8345	 * property. The new "variant" property with a value of "atapi" has been
8346	 * introduced so that future 'variants' of standard SCSI behavior (like
8347	 * atapi) could be specified by the underlying HBA drivers by supplying
8348	 * a new value for the "variant" property, instead of having to define a
8349	 * new property.
8350	 */
8351	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "atapi", -1) != -1) {
8352		un->un_f_cfg_is_atapi = TRUE;
8353		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8354		    "sd_unit_attach: un:0x%p Atapi device\n", un);
8355	}
8356	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, devi, 0, "variant",
8357	    &variantp) == DDI_PROP_SUCCESS) {
8358		if (strcmp(variantp, "atapi") == 0) {
8359			un->un_f_cfg_is_atapi = TRUE;
8360			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8361			    "sd_unit_attach: un:0x%p Atapi device\n", un);
8362		}
8363		ddi_prop_free(variantp);
8364	}
8365
8366	un->un_cmd_timeout	= SD_IO_TIME;
8367
8368	/* Info on current states, statuses, etc. (Updated frequently) */
8369	un->un_state		= SD_STATE_NORMAL;
8370	un->un_last_state	= SD_STATE_NORMAL;
8371
8372	/* Control & status info for command throttling */
8373	un->un_throttle		= sd_max_throttle;
8374	un->un_saved_throttle	= sd_max_throttle;
8375	un->un_min_throttle	= sd_min_throttle;
8376
8377	if (un->un_f_is_fibre == TRUE) {
8378		un->un_f_use_adaptive_throttle = TRUE;
8379	} else {
8380		un->un_f_use_adaptive_throttle = FALSE;
8381	}
8382
8383	/* Removable media support. */
8384	cv_init(&un->un_state_cv, NULL, CV_DRIVER, NULL);
8385	un->un_mediastate		= DKIO_NONE;
8386	un->un_specified_mediastate	= DKIO_NONE;
8387
8388	/* CVs for suspend/resume (PM or DR) */
8389	cv_init(&un->un_suspend_cv,   NULL, CV_DRIVER, NULL);
8390	cv_init(&un->un_disk_busy_cv, NULL, CV_DRIVER, NULL);
8391
8392	/* Power management support. */
8393	un->un_power_level = SD_SPINDLE_UNINIT;
8394
8395	cv_init(&un->un_wcc_cv,   NULL, CV_DRIVER, NULL);
8396	un->un_f_wcc_inprog = 0;
8397
8398	/*
8399	 * The open/close semaphore is used to serialize threads executing
8400	 * in the driver's open & close entry point routines for a given
8401	 * instance.
8402	 */
8403	(void) sema_init(&un->un_semoclose, 1, NULL, SEMA_DRIVER, NULL);
8404
8405	/*
8406	 * The conf file entry and softstate variable is a forceful override,
8407	 * meaning a non-zero value must be entered to change the default.
8408	 */
8409	un->un_f_disksort_disabled = FALSE;
8410
8411	/*
8412	 * Retrieve the properties from the static driver table or the driver
8413	 * configuration file (.conf) for this unit and update the soft state
8414	 * for the device as needed for the indicated properties.
8415	 * Note: the property configuration needs to occur here as some of the
8416	 * following routines may have dependancies on soft state flags set
8417	 * as part of the driver property configuration.
8418	 */
8419	sd_read_unit_properties(un);
8420	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8421	    "sd_unit_attach: un:0x%p property configuration complete.\n", un);
8422
8423	/*
8424	 * Only if a device has "hotpluggable" property, it is
8425	 * treated as hotpluggable device. Otherwise, it is
8426	 * regarded as non-hotpluggable one.
8427	 */
8428	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "hotpluggable",
8429	    -1) != -1) {
8430		un->un_f_is_hotpluggable = TRUE;
8431	}
8432
8433	/*
8434	 * set unit's attributes(flags) according to "hotpluggable" and
8435	 * RMB bit in INQUIRY data.
8436	 */
8437	sd_set_unit_attributes(un, devi);
8438
8439	/*
8440	 * By default, we mark the capacity, lbasize, and geometry
8441	 * as invalid. Only if we successfully read a valid capacity
8442	 * will we update the un_blockcount and un_tgt_blocksize with the
8443	 * valid values (the geometry will be validated later).
8444	 */
8445	un->un_f_blockcount_is_valid	= FALSE;
8446	un->un_f_tgt_blocksize_is_valid	= FALSE;
8447	un->un_f_geometry_is_valid	= FALSE;
8448
8449	/*
8450	 * Use DEV_BSIZE and DEV_BSHIFT as defaults, until we can determine
8451	 * otherwise.
8452	 */
8453	un->un_tgt_blocksize  = un->un_sys_blocksize  = DEV_BSIZE;
8454	un->un_blockcount = 0;
8455
8456	/*
8457	 * Set up the per-instance info needed to determine the correct
8458	 * CDBs and other info for issuing commands to the target.
8459	 */
8460	sd_init_cdb_limits(un);
8461
8462	/*
8463	 * Set up the IO chains to use, based upon the target type.
8464	 */
8465	if (un->un_f_non_devbsize_supported) {
8466		un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
8467	} else {
8468		un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
8469	}
8470	un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD;
8471	un->un_direct_chain_type = SD_CHAIN_INFO_DIRECT_CMD;
8472	un->un_priority_chain_type = SD_CHAIN_INFO_PRIORITY_CMD;
8473
8474	un->un_xbuf_attr = ddi_xbuf_attr_create(sizeof (struct sd_xbuf),
8475	    sd_xbuf_strategy, un, sd_xbuf_active_limit,  sd_xbuf_reserve_limit,
8476	    ddi_driver_major(devi), DDI_XBUF_QTHREAD_DRIVER);
8477	ddi_xbuf_attr_register_devinfo(un->un_xbuf_attr, devi);
8478
8479
8480	if (ISCD(un)) {
8481		un->un_additional_codes = sd_additional_codes;
8482	} else {
8483		un->un_additional_codes = NULL;
8484	}
8485
8486	/*
8487	 * Create the kstats here so they can be available for attach-time
8488	 * routines that send commands to the unit (either polled or via
8489	 * sd_send_scsi_cmd).
8490	 *
8491	 * Note: This is a critical sequence that needs to be maintained:
8492	 *	1) Instantiate the kstats here, before any routines using the
8493	 *	   iopath (i.e. sd_send_scsi_cmd).
8494	 *	2) Instantiate and initialize the partition stats
8495	 *	   (sd_set_pstats) in sd_use_efi() and sd_validate_geometry(),
8496	 *	   see detailed comments there.
8497	 *	3) Initialize the error stats (sd_set_errstats), following
8498	 *	   sd_validate_geometry(),sd_register_devid(),
8499	 *	   and sd_cache_control().
8500	 */
8501
8502	un->un_stats = kstat_create(sd_label, instance,
8503	    NULL, "disk", KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT);
8504	if (un->un_stats != NULL) {
8505		un->un_stats->ks_lock = SD_MUTEX(un);
8506		kstat_install(un->un_stats);
8507	}
8508	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8509	    "sd_unit_attach: un:0x%p un_stats created\n", un);
8510
8511	sd_create_errstats(un, instance);
8512	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8513	    "sd_unit_attach: un:0x%p errstats created\n", un);
8514
8515	/*
8516	 * The following if/else code was relocated here from below as part
8517	 * of the fix for bug (4430280). However with the default setup added
8518	 * on entry to this routine, it's no longer absolutely necessary for
8519	 * this to be before the call to sd_spin_up_unit.
8520	 */
8521	if (SD_IS_PARALLEL_SCSI(un) || SD_IS_SERIAL(un)) {
8522		/*
8523		 * If SCSI-2 tagged queueing is supported by the target
8524		 * and by the host adapter then we will enable it.
8525		 */
8526		un->un_tagflags = 0;
8527		if ((devp->sd_inq->inq_rdf == RDF_SCSI2) &&
8528		    (devp->sd_inq->inq_cmdque) &&
8529		    (un->un_f_arq_enabled == TRUE)) {
8530			if (scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing",
8531			    1, 1) == 1) {
8532				un->un_tagflags = FLAG_STAG;
8533				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8534				    "sd_unit_attach: un:0x%p tag queueing "
8535				    "enabled\n", un);
8536			} else if (scsi_ifgetcap(SD_ADDRESS(un),
8537			    "untagged-qing", 0) == 1) {
8538				un->un_f_opt_queueing = TRUE;
8539				un->un_saved_throttle = un->un_throttle =
8540				    min(un->un_throttle, 3);
8541			} else {
8542				un->un_f_opt_queueing = FALSE;
8543				un->un_saved_throttle = un->un_throttle = 1;
8544			}
8545		} else if ((scsi_ifgetcap(SD_ADDRESS(un), "untagged-qing", 0)
8546		    == 1) && (un->un_f_arq_enabled == TRUE)) {
8547			/* The Host Adapter supports internal queueing. */
8548			un->un_f_opt_queueing = TRUE;
8549			un->un_saved_throttle = un->un_throttle =
8550			    min(un->un_throttle, 3);
8551		} else {
8552			un->un_f_opt_queueing = FALSE;
8553			un->un_saved_throttle = un->un_throttle = 1;
8554			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8555			    "sd_unit_attach: un:0x%p no tag queueing\n", un);
8556		}
8557
8558		/*
8559		 * Enable large transfers for SATA/SAS drives
8560		 */
8561		if (SD_IS_SERIAL(un)) {
8562			un->un_max_xfer_size =
8563			    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8564			    sd_max_xfer_size, SD_MAX_XFER_SIZE);
8565			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8566			    "sd_unit_attach: un:0x%p max transfer "
8567			    "size=0x%x\n", un, un->un_max_xfer_size);
8568
8569		}
8570
8571		/* Setup or tear down default wide operations for disks */
8572
8573		/*
8574		 * Note: Legacy: it may be possible for both "sd_max_xfer_size"
8575		 * and "ssd_max_xfer_size" to exist simultaneously on the same
8576		 * system and be set to different values. In the future this
8577		 * code may need to be updated when the ssd module is
8578		 * obsoleted and removed from the system. (4299588)
8579		 */
8580		if (SD_IS_PARALLEL_SCSI(un) &&
8581		    (devp->sd_inq->inq_rdf == RDF_SCSI2) &&
8582		    (devp->sd_inq->inq_wbus16 || devp->sd_inq->inq_wbus32)) {
8583			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
8584			    1, 1) == 1) {
8585				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8586				    "sd_unit_attach: un:0x%p Wide Transfer "
8587				    "enabled\n", un);
8588			}
8589
8590			/*
8591			 * If tagged queuing has also been enabled, then
8592			 * enable large xfers
8593			 */
8594			if (un->un_saved_throttle == sd_max_throttle) {
8595				un->un_max_xfer_size =
8596				    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8597				    sd_max_xfer_size, SD_MAX_XFER_SIZE);
8598				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8599				    "sd_unit_attach: un:0x%p max transfer "
8600				    "size=0x%x\n", un, un->un_max_xfer_size);
8601			}
8602		} else {
8603			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
8604			    0, 1) == 1) {
8605				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8606				    "sd_unit_attach: un:0x%p "
8607				    "Wide Transfer disabled\n", un);
8608			}
8609		}
8610	} else {
8611		un->un_tagflags = FLAG_STAG;
8612		un->un_max_xfer_size = ddi_getprop(DDI_DEV_T_ANY,
8613		    devi, 0, sd_max_xfer_size, SD_MAX_XFER_SIZE);
8614	}
8615
8616	/*
8617	 * If this target supports LUN reset, try to enable it.
8618	 */
8619	if (un->un_f_lun_reset_enabled) {
8620		if (scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 1, 1) == 1) {
8621			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
8622			    "un:0x%p lun_reset capability set\n", un);
8623		} else {
8624			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
8625			    "un:0x%p lun-reset capability not set\n", un);
8626		}
8627	}
8628
8629	/*
8630	 * At this point in the attach, we have enough info in the
8631	 * soft state to be able to issue commands to the target.
8632	 *
8633	 * All command paths used below MUST issue their commands as
8634	 * SD_PATH_DIRECT. This is important as intermediate layers
8635	 * are not all initialized yet (such as PM).
8636	 */
8637
8638	/*
8639	 * Send a TEST UNIT READY command to the device. This should clear
8640	 * any outstanding UNIT ATTENTION that may be present.
8641	 *
8642	 * Note: Don't check for success, just track if there is a reservation,
8643	 * this is a throw away command to clear any unit attentions.
8644	 *
8645	 * Note: This MUST be the first command issued to the target during
8646	 * attach to ensure power on UNIT ATTENTIONS are cleared.
8647	 * Pass in flag SD_DONT_RETRY_TUR to prevent the long delays associated
8648	 * with attempts at spinning up a device with no media.
8649	 */
8650	if (sd_send_scsi_TEST_UNIT_READY(un, SD_DONT_RETRY_TUR) == EACCES) {
8651		reservation_flag = SD_TARGET_IS_RESERVED;
8652	}
8653
8654	/*
8655	 * If the device is NOT a removable media device, attempt to spin
8656	 * it up (using the START_STOP_UNIT command) and read its capacity
8657	 * (using the READ CAPACITY command).  Note, however, that either
8658	 * of these could fail and in some cases we would continue with
8659	 * the attach despite the failure (see below).
8660	 */
8661	if (un->un_f_descr_format_supported) {
8662		switch (sd_spin_up_unit(un)) {
8663		case 0:
8664			/*
8665			 * Spin-up was successful; now try to read the
8666			 * capacity.  If successful then save the results
8667			 * and mark the capacity & lbasize as valid.
8668			 */
8669			SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8670			    "sd_unit_attach: un:0x%p spin-up successful\n", un);
8671
8672			switch (sd_send_scsi_READ_CAPACITY(un, &capacity,
8673			    &lbasize, SD_PATH_DIRECT)) {
8674			case 0: {
8675				if (capacity > DK_MAX_BLOCKS) {
8676#ifdef _LP64
8677					if (capacity + 1 >
8678					    SD_GROUP1_MAX_ADDRESS) {
8679						/*
8680						 * Enable descriptor format
8681						 * sense data so that we can
8682						 * get 64 bit sense data
8683						 * fields.
8684						 */
8685						sd_enable_descr_sense(un);
8686					}
8687#else
8688					/* 32-bit kernels can't handle this */
8689					scsi_log(SD_DEVINFO(un),
8690					    sd_label, CE_WARN,
8691					    "disk has %llu blocks, which "
8692					    "is too large for a 32-bit "
8693					    "kernel", capacity);
8694
8695#if defined(__i386) || defined(__amd64)
8696					/*
8697					 * Refer to comments related to off-by-1
8698					 * at the header of this file.
8699					 * 1TB disk was treated as (1T - 512)B
8700					 * in the past, so that it might has
8701					 * valid VTOC and solaris partitions,
8702					 * we have to allow it to continue to
8703					 * work.
8704					 */
8705					if (capacity -1 > DK_MAX_BLOCKS)
8706#endif
8707					goto spinup_failed;
8708#endif
8709				}
8710
8711				/*
8712				 * Here it's not necessary to check the case:
8713				 * the capacity of the device is bigger than
8714				 * what the max hba cdb can support. Because
8715				 * sd_send_scsi_READ_CAPACITY will retrieve
8716				 * the capacity by sending USCSI command, which
8717				 * is constrained by the max hba cdb. Actually,
8718				 * sd_send_scsi_READ_CAPACITY will return
8719				 * EINVAL when using bigger cdb than required
8720				 * cdb length. Will handle this case in
8721				 * "case EINVAL".
8722				 */
8723
8724				/*
8725				 * The following relies on
8726				 * sd_send_scsi_READ_CAPACITY never
8727				 * returning 0 for capacity and/or lbasize.
8728				 */
8729				sd_update_block_info(un, lbasize, capacity);
8730
8731				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8732				    "sd_unit_attach: un:0x%p capacity = %ld "
8733				    "blocks; lbasize= %ld.\n", un,
8734				    un->un_blockcount, un->un_tgt_blocksize);
8735
8736				break;
8737			}
8738			case EINVAL:
8739				/*
8740				 * In the case where the max-cdb-length property
8741				 * is smaller than the required CDB length for
8742				 * a SCSI device, a target driver can fail to
8743				 * attach to that device.
8744				 */
8745				scsi_log(SD_DEVINFO(un),
8746				    sd_label, CE_WARN,
8747				    "disk capacity is too large "
8748				    "for current cdb length");
8749				goto spinup_failed;
8750			case EACCES:
8751				/*
8752				 * Should never get here if the spin-up
8753				 * succeeded, but code it in anyway.
8754				 * From here, just continue with the attach...
8755				 */
8756				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8757				    "sd_unit_attach: un:0x%p "
8758				    "sd_send_scsi_READ_CAPACITY "
8759				    "returned reservation conflict\n", un);
8760				reservation_flag = SD_TARGET_IS_RESERVED;
8761				break;
8762			default:
8763				/*
8764				 * Likewise, should never get here if the
8765				 * spin-up succeeded. Just continue with
8766				 * the attach...
8767				 */
8768				break;
8769			}
8770			break;
8771		case EACCES:
8772			/*
8773			 * Device is reserved by another host.  In this case
8774			 * we could not spin it up or read the capacity, but
8775			 * we continue with the attach anyway.
8776			 */
8777			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8778			    "sd_unit_attach: un:0x%p spin-up reservation "
8779			    "conflict.\n", un);
8780			reservation_flag = SD_TARGET_IS_RESERVED;
8781			break;
8782		default:
8783			/* Fail the attach if the spin-up failed. */
8784			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8785			    "sd_unit_attach: un:0x%p spin-up failed.", un);
8786			goto spinup_failed;
8787		}
8788	}
8789
8790	/*
8791	 * Check to see if this is a MMC drive
8792	 */
8793	if (ISCD(un)) {
8794		sd_set_mmc_caps(un);
8795	}
8796
8797	/*
8798	 * Create the minor nodes for the device.
8799	 * Note: If we want to support fdisk on both sparc and intel, this will
8800	 * have to separate out the notion that VTOC8 is always sparc, and
8801	 * VTOC16 is always intel (tho these can be the defaults).  The vtoc
8802	 * type will have to be determined at run-time, and the fdisk
8803	 * partitioning will have to have been read & set up before we
8804	 * create the minor nodes. (any other inits (such as kstats) that
8805	 * also ought to be done before creating the minor nodes?) (Doesn't
8806	 * setting up the minor nodes kind of imply that we're ready to
8807	 * handle an open from userland?)
8808	 */
8809	if (sd_create_minor_nodes(un, devi) != DDI_SUCCESS) {
8810		goto create_minor_nodes_failed;
8811	}
8812	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8813	    "sd_unit_attach: un:0x%p minor nodes created\n", un);
8814
8815	/*
8816	 * Add a zero-length attribute to tell the world we support
8817	 * kernel ioctls (for layered drivers)
8818	 */
8819	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
8820	    DDI_KERNEL_IOCTL, NULL, 0);
8821
8822	/*
8823	 * Add a boolean property to tell the world we support
8824	 * the B_FAILFAST flag (for layered drivers)
8825	 */
8826	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
8827	    "ddi-failfast-supported", NULL, 0);
8828
8829	/*
8830	 * Initialize power management
8831	 */
8832	mutex_init(&un->un_pm_mutex, NULL, MUTEX_DRIVER, NULL);
8833	cv_init(&un->un_pm_busy_cv, NULL, CV_DRIVER, NULL);
8834	sd_setup_pm(un, devi);
8835	if (un->un_f_pm_is_enabled == FALSE) {
8836		/*
8837		 * For performance, point to a jump table that does
8838		 * not include pm.
8839		 * The direct and priority chains don't change with PM.
8840		 *
8841		 * Note: this is currently done based on individual device
8842		 * capabilities. When an interface for determining system
8843		 * power enabled state becomes available, or when additional
8844		 * layers are added to the command chain, these values will
8845		 * have to be re-evaluated for correctness.
8846		 */
8847		if (un->un_f_non_devbsize_supported) {
8848			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA_NO_PM;
8849		} else {
8850			un->un_buf_chain_type = SD_CHAIN_INFO_DISK_NO_PM;
8851		}
8852		un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
8853	}
8854
8855	/*
8856	 * This property is set to 0 by HA software to avoid retries
8857	 * on a reserved disk. (The preferred property name is
8858	 * "retry-on-reservation-conflict") (1189689)
8859	 *
8860	 * Note: The use of a global here can have unintended consequences. A
8861	 * per instance variable is preferrable to match the capabilities of
8862	 * different underlying hba's (4402600)
8863	 */
8864	sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY, devi,
8865	    DDI_PROP_DONTPASS, "retry-on-reservation-conflict",
8866	    sd_retry_on_reservation_conflict);
8867	if (sd_retry_on_reservation_conflict != 0) {
8868		sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY,
8869		    devi, DDI_PROP_DONTPASS, sd_resv_conflict_name,
8870		    sd_retry_on_reservation_conflict);
8871	}
8872
8873	/* Set up options for QFULL handling. */
8874	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8875	    "qfull-retries", -1)) != -1) {
8876		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retries",
8877		    rval, 1);
8878	}
8879	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8880	    "qfull-retry-interval", -1)) != -1) {
8881		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retry-interval",
8882		    rval, 1);
8883	}
8884
8885	/*
8886	 * This just prints a message that announces the existence of the
8887	 * device. The message is always printed in the system logfile, but
8888	 * only appears on the console if the system is booted with the
8889	 * -v (verbose) argument.
8890	 */
8891	ddi_report_dev(devi);
8892
8893	/*
8894	 * The framework calls driver attach routines single-threaded
8895	 * for a given instance.  However we still acquire SD_MUTEX here
8896	 * because this required for calling the sd_validate_geometry()
8897	 * and sd_register_devid() functions.
8898	 */
8899	mutex_enter(SD_MUTEX(un));
8900	un->un_f_geometry_is_valid = FALSE;
8901	un->un_mediastate = DKIO_NONE;
8902	un->un_reserved = -1;
8903
8904	/*
8905	 * Read and validate the device's geometry (ie, disk label)
8906	 * A new unformatted drive will not have a valid geometry, but
8907	 * the driver needs to successfully attach to this device so
8908	 * the drive can be formatted via ioctls.
8909	 */
8910	if (((sd_validate_geometry(un, SD_PATH_DIRECT) ==
8911	    ENOTSUP)) &&
8912	    (un->un_blockcount < DK_MAX_BLOCKS)) {
8913		/*
8914		 * We found a small disk with an EFI label on it;
8915		 * we need to fix up the minor nodes accordingly.
8916		 */
8917		ddi_remove_minor_node(devi, "h");
8918		ddi_remove_minor_node(devi, "h,raw");
8919		(void) ddi_create_minor_node(devi, "wd",
8920		    S_IFBLK,
8921		    (instance << SDUNIT_SHIFT) | WD_NODE,
8922		    un->un_node_type, NULL);
8923		(void) ddi_create_minor_node(devi, "wd,raw",
8924		    S_IFCHR,
8925		    (instance << SDUNIT_SHIFT) | WD_NODE,
8926		    un->un_node_type, NULL);
8927	}
8928#if defined(__i386) || defined(__amd64)
8929	else if (un->un_f_capacity_adjusted == 1) {
8930		/*
8931		 * Refer to comments related to off-by-1 at the
8932		 * header of this file.
8933		 * Adjust minor node for 1TB disk.
8934		 */
8935		ddi_remove_minor_node(devi, "wd");
8936		ddi_remove_minor_node(devi, "wd,raw");
8937		(void) ddi_create_minor_node(devi, "h",
8938		    S_IFBLK,
8939		    (instance << SDUNIT_SHIFT) | WD_NODE,
8940		    un->un_node_type, NULL);
8941		(void) ddi_create_minor_node(devi, "h,raw",
8942		    S_IFCHR,
8943		    (instance << SDUNIT_SHIFT) | WD_NODE,
8944		    un->un_node_type, NULL);
8945	}
8946#endif
8947	/*
8948	 * Read and initialize the devid for the unit.
8949	 */
8950	ASSERT(un->un_errstats != NULL);
8951	if (un->un_f_devid_supported) {
8952		sd_register_devid(un, devi, reservation_flag);
8953	}
8954	mutex_exit(SD_MUTEX(un));
8955
8956#if (defined(__fibre))
8957	/*
8958	 * Register callbacks for fibre only.  You can't do this soley
8959	 * on the basis of the devid_type because this is hba specific.
8960	 * We need to query our hba capabilities to find out whether to
8961	 * register or not.
8962	 */
8963	if (un->un_f_is_fibre) {
8964	    if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
8965		sd_init_event_callbacks(un);
8966		SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8967		    "sd_unit_attach: un:0x%p event callbacks inserted", un);
8968	    }
8969	}
8970#endif
8971
8972	if (un->un_f_opt_disable_cache == TRUE) {
8973		/*
8974		 * Disable both read cache and write cache.  This is
8975		 * the historic behavior of the keywords in the config file.
8976		 */
8977		if (sd_cache_control(un, SD_CACHE_DISABLE, SD_CACHE_DISABLE) !=
8978		    0) {
8979			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8980			    "sd_unit_attach: un:0x%p Could not disable "
8981			    "caching", un);
8982			goto devid_failed;
8983		}
8984	}
8985
8986	/*
8987	 * Check the value of the WCE bit now and
8988	 * set un_f_write_cache_enabled accordingly.
8989	 */
8990	(void) sd_get_write_cache_enabled(un, &wc_enabled);
8991	mutex_enter(SD_MUTEX(un));
8992	un->un_f_write_cache_enabled = (wc_enabled != 0);
8993	mutex_exit(SD_MUTEX(un));
8994
8995	/*
8996	 * Set the pstat and error stat values here, so data obtained during the
8997	 * previous attach-time routines is available.
8998	 *
8999	 * Note: This is a critical sequence that needs to be maintained:
9000	 *	1) Instantiate the kstats before any routines using the iopath
9001	 *	   (i.e. sd_send_scsi_cmd).
9002	 *	2) Instantiate and initialize the partition stats
9003	 *	   (sd_set_pstats) in sd_use_efi() and sd_validate_geometry(),
9004	 *	   see detailed comments there.
9005	 *	3) Initialize the error stats (sd_set_errstats), following
9006	 *	   sd_validate_geometry(),sd_register_devid(),
9007	 *	   and sd_cache_control().
9008	 */
9009	sd_set_errstats(un);
9010	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
9011	    "sd_unit_attach: un:0x%p errstats set\n", un);
9012
9013	/*
9014	 * Find out what type of reservation this disk supports.
9015	 */
9016	switch (sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_KEYS, 0, NULL)) {
9017	case 0:
9018		/*
9019		 * SCSI-3 reservations are supported.
9020		 */
9021		un->un_reservation_type = SD_SCSI3_RESERVATION;
9022		SD_INFO(SD_LOG_ATTACH_DETACH, un,
9023		    "sd_unit_attach: un:0x%p SCSI-3 reservations\n", un);
9024		break;
9025	case ENOTSUP:
9026		/*
9027		 * The PERSISTENT RESERVE IN command would not be recognized by
9028		 * a SCSI-2 device, so assume the reservation type is SCSI-2.
9029		 */
9030		SD_INFO(SD_LOG_ATTACH_DETACH, un,
9031		    "sd_unit_attach: un:0x%p SCSI-2 reservations\n", un);
9032		un->un_reservation_type = SD_SCSI2_RESERVATION;
9033		break;
9034	default:
9035		/*
9036		 * default to SCSI-3 reservations
9037		 */
9038		SD_INFO(SD_LOG_ATTACH_DETACH, un,
9039		    "sd_unit_attach: un:0x%p default SCSI3 reservations\n", un);
9040		un->un_reservation_type = SD_SCSI3_RESERVATION;
9041		break;
9042	}
9043
9044	/*
9045	 * After successfully attaching an instance, we record the information
9046	 * of how many luns have been attached on the relative target and
9047	 * controller for parallel SCSI. This information is used when sd tries
9048	 * to set the tagged queuing capability in HBA.
9049	 */
9050	if (SD_IS_PARALLEL_SCSI(un) && (tgt >= 0) && (tgt < NTARGETS_WIDE)) {
9051		sd_scsi_update_lun_on_target(pdip, tgt, SD_SCSI_LUN_ATTACH);
9052	}
9053
9054	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
9055	    "sd_unit_attach: un:0x%p exit success\n", un);
9056
9057	return (DDI_SUCCESS);
9058
9059	/*
9060	 * An error occurred during the attach; clean up & return failure.
9061	 */
9062
9063devid_failed:
9064
9065setup_pm_failed:
9066	ddi_remove_minor_node(devi, NULL);
9067
9068create_minor_nodes_failed:
9069	/*
9070	 * Cleanup from the scsi_ifsetcap() calls (437868)
9071	 */
9072	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
9073	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
9074
9075	/*
9076	 * Refer to the comments of setting tagged-qing in the beginning of
9077	 * sd_unit_attach. We can only disable tagged queuing when there is
9078	 * no lun attached on the target.
9079	 */
9080	if (sd_scsi_get_target_lun_count(pdip, tgt) < 1) {
9081		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
9082	}
9083
9084	if (un->un_f_is_fibre == FALSE) {
9085	    (void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
9086	}
9087
9088spinup_failed:
9089
9090	mutex_enter(SD_MUTEX(un));
9091
9092	/* Cancel callback for SD_PATH_DIRECT_PRIORITY cmd. restart */
9093	if (un->un_direct_priority_timeid != NULL) {
9094		timeout_id_t temp_id = un->un_direct_priority_timeid;
9095		un->un_direct_priority_timeid = NULL;
9096		mutex_exit(SD_MUTEX(un));
9097		(void) untimeout(temp_id);
9098		mutex_enter(SD_MUTEX(un));
9099	}
9100
9101	/* Cancel any pending start/stop timeouts */
9102	if (un->un_startstop_timeid != NULL) {
9103		timeout_id_t temp_id = un->un_startstop_timeid;
9104		un->un_startstop_timeid = NULL;
9105		mutex_exit(SD_MUTEX(un));
9106		(void) untimeout(temp_id);
9107		mutex_enter(SD_MUTEX(un));
9108	}
9109
9110	/* Cancel any pending reset-throttle timeouts */
9111	if (un->un_reset_throttle_timeid != NULL) {
9112		timeout_id_t temp_id = un->un_reset_throttle_timeid;
9113		un->un_reset_throttle_timeid = NULL;
9114		mutex_exit(SD_MUTEX(un));
9115		(void) untimeout(temp_id);
9116		mutex_enter(SD_MUTEX(un));
9117	}
9118
9119	/* Cancel any pending retry timeouts */
9120	if (un->un_retry_timeid != NULL) {
9121		timeout_id_t temp_id = un->un_retry_timeid;
9122		un->un_retry_timeid = NULL;
9123		mutex_exit(SD_MUTEX(un));
9124		(void) untimeout(temp_id);
9125		mutex_enter(SD_MUTEX(un));
9126	}
9127
9128	/* Cancel any pending delayed cv broadcast timeouts */
9129	if (un->un_dcvb_timeid != NULL) {
9130		timeout_id_t temp_id = un->un_dcvb_timeid;
9131		un->un_dcvb_timeid = NULL;
9132		mutex_exit(SD_MUTEX(un));
9133		(void) untimeout(temp_id);
9134		mutex_enter(SD_MUTEX(un));
9135	}
9136
9137	mutex_exit(SD_MUTEX(un));
9138
9139	/* There should not be any in-progress I/O so ASSERT this check */
9140	ASSERT(un->un_ncmds_in_transport == 0);
9141	ASSERT(un->un_ncmds_in_driver == 0);
9142
9143	/* Do not free the softstate if the callback routine is active */
9144	sd_sync_with_callback(un);
9145
9146	/*
9147	 * Partition stats apparently are not used with removables. These would
9148	 * not have been created during attach, so no need to clean them up...
9149	 */
9150	if (un->un_stats != NULL) {
9151		kstat_delete(un->un_stats);
9152		un->un_stats = NULL;
9153	}
9154	if (un->un_errstats != NULL) {
9155		kstat_delete(un->un_errstats);
9156		un->un_errstats = NULL;
9157	}
9158
9159	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
9160	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
9161
9162	ddi_prop_remove_all(devi);
9163	sema_destroy(&un->un_semoclose);
9164	cv_destroy(&un->un_state_cv);
9165
9166getrbuf_failed:
9167
9168	sd_free_rqs(un);
9169
9170alloc_rqs_failed:
9171
9172	devp->sd_private = NULL;
9173	bzero(un, sizeof (struct sd_lun));	/* Clear any stale data! */
9174
9175get_softstate_failed:
9176	/*
9177	 * Note: the man pages are unclear as to whether or not doing a
9178	 * ddi_soft_state_free(sd_state, instance) is the right way to
9179	 * clean up after the ddi_soft_state_zalloc() if the subsequent
9180	 * ddi_get_soft_state() fails.  The implication seems to be
9181	 * that the get_soft_state cannot fail if the zalloc succeeds.
9182	 */
9183	ddi_soft_state_free(sd_state, instance);
9184
9185probe_failed:
9186	scsi_unprobe(devp);
9187#ifdef SDDEBUG
9188	if ((sd_component_mask & SD_LOG_ATTACH_DETACH) &&
9189	    (sd_level_mask & SD_LOGMASK_TRACE)) {
9190		cmn_err(CE_CONT, "sd_unit_attach: un:0x%p exit failure\n",
9191		    (void *)un);
9192	}
9193#endif
9194	return (DDI_FAILURE);
9195}
9196
9197
9198/*
9199 *    Function: sd_unit_detach
9200 *
9201 * Description: Performs DDI_DETACH processing for sddetach().
9202 *
9203 * Return Code: DDI_SUCCESS
9204 *		DDI_FAILURE
9205 *
9206 *     Context: Kernel thread context
9207 */
9208
9209static int
9210sd_unit_detach(dev_info_t *devi)
9211{
9212	struct scsi_device	*devp;
9213	struct sd_lun		*un;
9214	int			i;
9215	int			tgt;
9216	dev_t			dev;
9217	dev_info_t		*pdip = ddi_get_parent(devi);
9218	int			instance = ddi_get_instance(devi);
9219
9220	mutex_enter(&sd_detach_mutex);
9221
9222	/*
9223	 * Fail the detach for any of the following:
9224	 *  - Unable to get the sd_lun struct for the instance
9225	 *  - A layered driver has an outstanding open on the instance
9226	 *  - Another thread is already detaching this instance
9227	 *  - Another thread is currently performing an open
9228	 */
9229	devp = ddi_get_driver_private(devi);
9230	if ((devp == NULL) ||
9231	    ((un = (struct sd_lun *)devp->sd_private) == NULL) ||
9232	    (un->un_ncmds_in_driver != 0) || (un->un_layer_count != 0) ||
9233	    (un->un_detach_count != 0) || (un->un_opens_in_progress != 0)) {
9234		mutex_exit(&sd_detach_mutex);
9235		return (DDI_FAILURE);
9236	}
9237
9238	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: entry 0x%p\n", un);
9239
9240	/*
9241	 * Mark this instance as currently in a detach, to inhibit any
9242	 * opens from a layered driver.
9243	 */
9244	un->un_detach_count++;
9245	mutex_exit(&sd_detach_mutex);
9246
9247	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
9248	    SCSI_ADDR_PROP_TARGET, -1);
9249
9250	dev = sd_make_device(SD_DEVINFO(un));
9251
9252#ifndef lint
9253	_NOTE(COMPETING_THREADS_NOW);
9254#endif
9255
9256	mutex_enter(SD_MUTEX(un));
9257
9258	/*
9259	 * Fail the detach if there are any outstanding layered
9260	 * opens on this device.
9261	 */
9262	for (i = 0; i < NDKMAP; i++) {
9263		if (un->un_ocmap.lyropen[i] != 0) {
9264			goto err_notclosed;
9265		}
9266	}
9267
9268	/*
9269	 * Verify there are NO outstanding commands issued to this device.
9270	 * ie, un_ncmds_in_transport == 0.
9271	 * It's possible to have outstanding commands through the physio
9272	 * code path, even though everything's closed.
9273	 */
9274	if ((un->un_ncmds_in_transport != 0) || (un->un_retry_timeid != NULL) ||
9275	    (un->un_direct_priority_timeid != NULL) ||
9276	    (un->un_state == SD_STATE_RWAIT)) {
9277		mutex_exit(SD_MUTEX(un));
9278		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9279		    "sd_dr_detach: Detach failure due to outstanding cmds\n");
9280		goto err_stillbusy;
9281	}
9282
9283	/*
9284	 * If we have the device reserved, release the reservation.
9285	 */
9286	if ((un->un_resvd_status & SD_RESERVE) &&
9287	    !(un->un_resvd_status & SD_LOST_RESERVE)) {
9288		mutex_exit(SD_MUTEX(un));
9289		/*
9290		 * Note: sd_reserve_release sends a command to the device
9291		 * via the sd_ioctlcmd() path, and can sleep.
9292		 */
9293		if (sd_reserve_release(dev, SD_RELEASE) != 0) {
9294			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9295			    "sd_dr_detach: Cannot release reservation \n");
9296		}
9297	} else {
9298		mutex_exit(SD_MUTEX(un));
9299	}
9300
9301	/*
9302	 * Untimeout any reserve recover, throttle reset, restart unit
9303	 * and delayed broadcast timeout threads. Protect the timeout pointer
9304	 * from getting nulled by their callback functions.
9305	 */
9306	mutex_enter(SD_MUTEX(un));
9307	if (un->un_resvd_timeid != NULL) {
9308		timeout_id_t temp_id = un->un_resvd_timeid;
9309		un->un_resvd_timeid = NULL;
9310		mutex_exit(SD_MUTEX(un));
9311		(void) untimeout(temp_id);
9312		mutex_enter(SD_MUTEX(un));
9313	}
9314
9315	if (un->un_reset_throttle_timeid != NULL) {
9316		timeout_id_t temp_id = un->un_reset_throttle_timeid;
9317		un->un_reset_throttle_timeid = NULL;
9318		mutex_exit(SD_MUTEX(un));
9319		(void) untimeout(temp_id);
9320		mutex_enter(SD_MUTEX(un));
9321	}
9322
9323	if (un->un_startstop_timeid != NULL) {
9324		timeout_id_t temp_id = un->un_startstop_timeid;
9325		un->un_startstop_timeid = NULL;
9326		mutex_exit(SD_MUTEX(un));
9327		(void) untimeout(temp_id);
9328		mutex_enter(SD_MUTEX(un));
9329	}
9330
9331	if (un->un_dcvb_timeid != NULL) {
9332		timeout_id_t temp_id = un->un_dcvb_timeid;
9333		un->un_dcvb_timeid = NULL;
9334		mutex_exit(SD_MUTEX(un));
9335		(void) untimeout(temp_id);
9336	} else {
9337		mutex_exit(SD_MUTEX(un));
9338	}
9339
9340	/* Remove any pending reservation reclaim requests for this device */
9341	sd_rmv_resv_reclaim_req(dev);
9342
9343	mutex_enter(SD_MUTEX(un));
9344
9345	/* Cancel any pending callbacks for SD_PATH_DIRECT_PRIORITY cmd. */
9346	if (un->un_direct_priority_timeid != NULL) {
9347		timeout_id_t temp_id = un->un_direct_priority_timeid;
9348		un->un_direct_priority_timeid = NULL;
9349		mutex_exit(SD_MUTEX(un));
9350		(void) untimeout(temp_id);
9351		mutex_enter(SD_MUTEX(un));
9352	}
9353
9354	/* Cancel any active multi-host disk watch thread requests */
9355	if (un->un_mhd_token != NULL) {
9356		mutex_exit(SD_MUTEX(un));
9357		 _NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_mhd_token));
9358		if (scsi_watch_request_terminate(un->un_mhd_token,
9359		    SCSI_WATCH_TERMINATE_NOWAIT)) {
9360			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9361			    "sd_dr_detach: Cannot cancel mhd watch request\n");
9362			/*
9363			 * Note: We are returning here after having removed
9364			 * some driver timeouts above. This is consistent with
9365			 * the legacy implementation but perhaps the watch
9366			 * terminate call should be made with the wait flag set.
9367			 */
9368			goto err_stillbusy;
9369		}
9370		mutex_enter(SD_MUTEX(un));
9371		un->un_mhd_token = NULL;
9372	}
9373
9374	if (un->un_swr_token != NULL) {
9375		mutex_exit(SD_MUTEX(un));
9376		_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_swr_token));
9377		if (scsi_watch_request_terminate(un->un_swr_token,
9378		    SCSI_WATCH_TERMINATE_NOWAIT)) {
9379			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9380			    "sd_dr_detach: Cannot cancel swr watch request\n");
9381			/*
9382			 * Note: We are returning here after having removed
9383			 * some driver timeouts above. This is consistent with
9384			 * the legacy implementation but perhaps the watch
9385			 * terminate call should be made with the wait flag set.
9386			 */
9387			goto err_stillbusy;
9388		}
9389		mutex_enter(SD_MUTEX(un));
9390		un->un_swr_token = NULL;
9391	}
9392
9393	mutex_exit(SD_MUTEX(un));
9394
9395	/*
9396	 * Clear any scsi_reset_notifies. We clear the reset notifies
9397	 * if we have not registered one.
9398	 * Note: The sd_mhd_reset_notify_cb() fn tries to acquire SD_MUTEX!
9399	 */
9400	(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
9401	    sd_mhd_reset_notify_cb, (caddr_t)un);
9402
9403	/*
9404	 * protect the timeout pointers from getting nulled by
9405	 * their callback functions during the cancellation process.
9406	 * In such a scenario untimeout can be invoked with a null value.
9407	 */
9408	_NOTE(NO_COMPETING_THREADS_NOW);
9409
9410	mutex_enter(&un->un_pm_mutex);
9411	if (un->un_pm_idle_timeid != NULL) {
9412		timeout_id_t temp_id = un->un_pm_idle_timeid;
9413		un->un_pm_idle_timeid = NULL;
9414		mutex_exit(&un->un_pm_mutex);
9415
9416		/*
9417		 * Timeout is active; cancel it.
9418		 * Note that it'll never be active on a device
9419		 * that does not support PM therefore we don't
9420		 * have to check before calling pm_idle_component.
9421		 */
9422		(void) untimeout(temp_id);
9423		(void) pm_idle_component(SD_DEVINFO(un), 0);
9424		mutex_enter(&un->un_pm_mutex);
9425	}
9426
9427	/*
9428	 * Check whether there is already a timeout scheduled for power
9429	 * management. If yes then don't lower the power here, that's.
9430	 * the timeout handler's job.
9431	 */
9432	if (un->un_pm_timeid != NULL) {
9433		timeout_id_t temp_id = un->un_pm_timeid;
9434		un->un_pm_timeid = NULL;
9435		mutex_exit(&un->un_pm_mutex);
9436		/*
9437		 * Timeout is active; cancel it.
9438		 * Note that it'll never be active on a device
9439		 * that does not support PM therefore we don't
9440		 * have to check before calling pm_idle_component.
9441		 */
9442		(void) untimeout(temp_id);
9443		(void) pm_idle_component(SD_DEVINFO(un), 0);
9444
9445	} else {
9446		mutex_exit(&un->un_pm_mutex);
9447		if ((un->un_f_pm_is_enabled == TRUE) &&
9448		    (pm_lower_power(SD_DEVINFO(un), 0, SD_SPINDLE_OFF) !=
9449		    DDI_SUCCESS)) {
9450			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9451		    "sd_dr_detach: Lower power request failed, ignoring.\n");
9452			/*
9453			 * Fix for bug: 4297749, item # 13
9454			 * The above test now includes a check to see if PM is
9455			 * supported by this device before call
9456			 * pm_lower_power().
9457			 * Note, the following is not dead code. The call to
9458			 * pm_lower_power above will generate a call back into
9459			 * our sdpower routine which might result in a timeout
9460			 * handler getting activated. Therefore the following
9461			 * code is valid and necessary.
9462			 */
9463			mutex_enter(&un->un_pm_mutex);
9464			if (un->un_pm_timeid != NULL) {
9465				timeout_id_t temp_id = un->un_pm_timeid;
9466				un->un_pm_timeid = NULL;
9467				mutex_exit(&un->un_pm_mutex);
9468				(void) untimeout(temp_id);
9469				(void) pm_idle_component(SD_DEVINFO(un), 0);
9470			} else {
9471				mutex_exit(&un->un_pm_mutex);
9472			}
9473		}
9474	}
9475
9476	/*
9477	 * Cleanup from the scsi_ifsetcap() calls (437868)
9478	 * Relocated here from above to be after the call to
9479	 * pm_lower_power, which was getting errors.
9480	 */
9481	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
9482	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
9483
9484	/*
9485	 * Currently, tagged queuing is supported per target based by HBA.
9486	 * Setting this per lun instance actually sets the capability of this
9487	 * target in HBA, which affects those luns already attached on the
9488	 * same target. So during detach, we can only disable this capability
9489	 * only when this is the only lun left on this target. By doing
9490	 * this, we assume a target has the same tagged queuing capability
9491	 * for every lun. The condition can be removed when HBA is changed to
9492	 * support per lun based tagged queuing capability.
9493	 */
9494	if (sd_scsi_get_target_lun_count(pdip, tgt) <= 1) {
9495		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
9496	}
9497
9498	if (un->un_f_is_fibre == FALSE) {
9499		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
9500	}
9501
9502	/*
9503	 * Remove any event callbacks, fibre only
9504	 */
9505	if (un->un_f_is_fibre == TRUE) {
9506		if ((un->un_insert_event != NULL) &&
9507			(ddi_remove_event_handler(un->un_insert_cb_id) !=
9508				DDI_SUCCESS)) {
9509			/*
9510			 * Note: We are returning here after having done
9511			 * substantial cleanup above. This is consistent
9512			 * with the legacy implementation but this may not
9513			 * be the right thing to do.
9514			 */
9515			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9516				"sd_dr_detach: Cannot cancel insert event\n");
9517			goto err_remove_event;
9518		}
9519		un->un_insert_event = NULL;
9520
9521		if ((un->un_remove_event != NULL) &&
9522			(ddi_remove_event_handler(un->un_remove_cb_id) !=
9523				DDI_SUCCESS)) {
9524			/*
9525			 * Note: We are returning here after having done
9526			 * substantial cleanup above. This is consistent
9527			 * with the legacy implementation but this may not
9528			 * be the right thing to do.
9529			 */
9530			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9531				"sd_dr_detach: Cannot cancel remove event\n");
9532			goto err_remove_event;
9533		}
9534		un->un_remove_event = NULL;
9535	}
9536
9537	/* Do not free the softstate if the callback routine is active */
9538	sd_sync_with_callback(un);
9539
9540	/*
9541	 * Hold the detach mutex here, to make sure that no other threads ever
9542	 * can access a (partially) freed soft state structure.
9543	 */
9544	mutex_enter(&sd_detach_mutex);
9545
9546	/*
9547	 * Clean up the soft state struct.
9548	 * Cleanup is done in reverse order of allocs/inits.
9549	 * At this point there should be no competing threads anymore.
9550	 */
9551
9552	/* Unregister and free device id. */
9553	ddi_devid_unregister(devi);
9554	if (un->un_devid) {
9555		ddi_devid_free(un->un_devid);
9556		un->un_devid = NULL;
9557	}
9558
9559	/*
9560	 * Destroy wmap cache if it exists.
9561	 */
9562	if (un->un_wm_cache != NULL) {
9563		kmem_cache_destroy(un->un_wm_cache);
9564		un->un_wm_cache = NULL;
9565	}
9566
9567	/* Remove minor nodes */
9568	ddi_remove_minor_node(devi, NULL);
9569
9570	/*
9571	 * kstat cleanup is done in detach for all device types (4363169).
9572	 * We do not want to fail detach if the device kstats are not deleted
9573	 * since there is a confusion about the devo_refcnt for the device.
9574	 * We just delete the kstats and let detach complete successfully.
9575	 */
9576	if (un->un_stats != NULL) {
9577		kstat_delete(un->un_stats);
9578		un->un_stats = NULL;
9579	}
9580	if (un->un_errstats != NULL) {
9581		kstat_delete(un->un_errstats);
9582		un->un_errstats = NULL;
9583	}
9584
9585	/* Remove partition stats */
9586	if (un->un_f_pkstats_enabled) {
9587		for (i = 0; i < NSDMAP; i++) {
9588			if (un->un_pstats[i] != NULL) {
9589				kstat_delete(un->un_pstats[i]);
9590				un->un_pstats[i] = NULL;
9591			}
9592		}
9593	}
9594
9595	/* Remove xbuf registration */
9596	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
9597	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
9598
9599	/* Remove driver properties */
9600	ddi_prop_remove_all(devi);
9601
9602	mutex_destroy(&un->un_pm_mutex);
9603	cv_destroy(&un->un_pm_busy_cv);
9604
9605	cv_destroy(&un->un_wcc_cv);
9606
9607	/* Open/close semaphore */
9608	sema_destroy(&un->un_semoclose);
9609
9610	/* Removable media condvar. */
9611	cv_destroy(&un->un_state_cv);
9612
9613	/* Suspend/resume condvar. */
9614	cv_destroy(&un->un_suspend_cv);
9615	cv_destroy(&un->un_disk_busy_cv);
9616
9617	sd_free_rqs(un);
9618
9619	/* Free up soft state */
9620	devp->sd_private = NULL;
9621	bzero(un, sizeof (struct sd_lun));
9622	ddi_soft_state_free(sd_state, instance);
9623
9624	mutex_exit(&sd_detach_mutex);
9625
9626	/* This frees up the INQUIRY data associated with the device. */
9627	scsi_unprobe(devp);
9628
9629	/*
9630	 * After successfully detaching an instance, we update the information
9631	 * of how many luns have been attached in the relative target and
9632	 * controller for parallel SCSI. This information is used when sd tries
9633	 * to set the tagged queuing capability in HBA.
9634	 * Since un has been released, we can't use SD_IS_PARALLEL_SCSI(un) to
9635	 * check if the device is parallel SCSI. However, we don't need to
9636	 * check here because we've already checked during attach. No device
9637	 * that is not parallel SCSI is in the chain.
9638	 */
9639	if ((tgt >= 0) && (tgt < NTARGETS_WIDE)) {
9640		sd_scsi_update_lun_on_target(pdip, tgt, SD_SCSI_LUN_DETACH);
9641	}
9642
9643	return (DDI_SUCCESS);
9644
9645err_notclosed:
9646	mutex_exit(SD_MUTEX(un));
9647
9648err_stillbusy:
9649	_NOTE(NO_COMPETING_THREADS_NOW);
9650
9651err_remove_event:
9652	mutex_enter(&sd_detach_mutex);
9653	un->un_detach_count--;
9654	mutex_exit(&sd_detach_mutex);
9655
9656	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: exit failure\n");
9657	return (DDI_FAILURE);
9658}
9659
9660
9661/*
9662 * Driver minor node structure and data table
9663 */
9664struct driver_minor_data {
9665	char	*name;
9666	minor_t	minor;
9667	int	type;
9668};
9669
9670static struct driver_minor_data sd_minor_data[] = {
9671	{"a", 0, S_IFBLK},
9672	{"b", 1, S_IFBLK},
9673	{"c", 2, S_IFBLK},
9674	{"d", 3, S_IFBLK},
9675	{"e", 4, S_IFBLK},
9676	{"f", 5, S_IFBLK},
9677	{"g", 6, S_IFBLK},
9678	{"h", 7, S_IFBLK},
9679#if defined(_SUNOS_VTOC_16)
9680	{"i", 8, S_IFBLK},
9681	{"j", 9, S_IFBLK},
9682	{"k", 10, S_IFBLK},
9683	{"l", 11, S_IFBLK},
9684	{"m", 12, S_IFBLK},
9685	{"n", 13, S_IFBLK},
9686	{"o", 14, S_IFBLK},
9687	{"p", 15, S_IFBLK},
9688#endif			/* defined(_SUNOS_VTOC_16) */
9689#if defined(_FIRMWARE_NEEDS_FDISK)
9690	{"q", 16, S_IFBLK},
9691	{"r", 17, S_IFBLK},
9692	{"s", 18, S_IFBLK},
9693	{"t", 19, S_IFBLK},
9694	{"u", 20, S_IFBLK},
9695#endif			/* defined(_FIRMWARE_NEEDS_FDISK) */
9696	{"a,raw", 0, S_IFCHR},
9697	{"b,raw", 1, S_IFCHR},
9698	{"c,raw", 2, S_IFCHR},
9699	{"d,raw", 3, S_IFCHR},
9700	{"e,raw", 4, S_IFCHR},
9701	{"f,raw", 5, S_IFCHR},
9702	{"g,raw", 6, S_IFCHR},
9703	{"h,raw", 7, S_IFCHR},
9704#if defined(_SUNOS_VTOC_16)
9705	{"i,raw", 8, S_IFCHR},
9706	{"j,raw", 9, S_IFCHR},
9707	{"k,raw", 10, S_IFCHR},
9708	{"l,raw", 11, S_IFCHR},
9709	{"m,raw", 12, S_IFCHR},
9710	{"n,raw", 13, S_IFCHR},
9711	{"o,raw", 14, S_IFCHR},
9712	{"p,raw", 15, S_IFCHR},
9713#endif			/* defined(_SUNOS_VTOC_16) */
9714#if defined(_FIRMWARE_NEEDS_FDISK)
9715	{"q,raw", 16, S_IFCHR},
9716	{"r,raw", 17, S_IFCHR},
9717	{"s,raw", 18, S_IFCHR},
9718	{"t,raw", 19, S_IFCHR},
9719	{"u,raw", 20, S_IFCHR},
9720#endif			/* defined(_FIRMWARE_NEEDS_FDISK) */
9721	{0}
9722};
9723
9724static struct driver_minor_data sd_minor_data_efi[] = {
9725	{"a", 0, S_IFBLK},
9726	{"b", 1, S_IFBLK},
9727	{"c", 2, S_IFBLK},
9728	{"d", 3, S_IFBLK},
9729	{"e", 4, S_IFBLK},
9730	{"f", 5, S_IFBLK},
9731	{"g", 6, S_IFBLK},
9732	{"wd", 7, S_IFBLK},
9733#if defined(_FIRMWARE_NEEDS_FDISK)
9734	{"q", 16, S_IFBLK},
9735	{"r", 17, S_IFBLK},
9736	{"s", 18, S_IFBLK},
9737	{"t", 19, S_IFBLK},
9738	{"u", 20, S_IFBLK},
9739#endif			/* defined(_FIRMWARE_NEEDS_FDISK) */
9740	{"a,raw", 0, S_IFCHR},
9741	{"b,raw", 1, S_IFCHR},
9742	{"c,raw", 2, S_IFCHR},
9743	{"d,raw", 3, S_IFCHR},
9744	{"e,raw", 4, S_IFCHR},
9745	{"f,raw", 5, S_IFCHR},
9746	{"g,raw", 6, S_IFCHR},
9747	{"wd,raw", 7, S_IFCHR},
9748#if defined(_FIRMWARE_NEEDS_FDISK)
9749	{"q,raw", 16, S_IFCHR},
9750	{"r,raw", 17, S_IFCHR},
9751	{"s,raw", 18, S_IFCHR},
9752	{"t,raw", 19, S_IFCHR},
9753	{"u,raw", 20, S_IFCHR},
9754#endif			/* defined(_FIRMWARE_NEEDS_FDISK) */
9755	{0}
9756};
9757
9758
9759/*
9760 *    Function: sd_create_minor_nodes
9761 *
9762 * Description: Create the minor device nodes for the instance.
9763 *
9764 *   Arguments: un - driver soft state (unit) structure
9765 *		devi - pointer to device info structure
9766 *
9767 * Return Code: DDI_SUCCESS
9768 *		DDI_FAILURE
9769 *
9770 *     Context: Kernel thread context
9771 */
9772
9773static int
9774sd_create_minor_nodes(struct sd_lun *un, dev_info_t *devi)
9775{
9776	struct driver_minor_data	*dmdp;
9777	struct scsi_device		*devp;
9778	int				instance;
9779	char				name[48];
9780
9781	ASSERT(un != NULL);
9782	devp = ddi_get_driver_private(devi);
9783	instance = ddi_get_instance(devp->sd_dev);
9784
9785	/*
9786	 * Create all the minor nodes for this target.
9787	 */
9788	if (un->un_blockcount > DK_MAX_BLOCKS)
9789		dmdp = sd_minor_data_efi;
9790	else
9791		dmdp = sd_minor_data;
9792	while (dmdp->name != NULL) {
9793
9794		(void) sprintf(name, "%s", dmdp->name);
9795
9796		if (ddi_create_minor_node(devi, name, dmdp->type,
9797		    (instance << SDUNIT_SHIFT) | dmdp->minor,
9798		    un->un_node_type, NULL) == DDI_FAILURE) {
9799			/*
9800			 * Clean up any nodes that may have been created, in
9801			 * case this fails in the middle of the loop.
9802			 */
9803			ddi_remove_minor_node(devi, NULL);
9804			return (DDI_FAILURE);
9805		}
9806		dmdp++;
9807	}
9808
9809	return (DDI_SUCCESS);
9810}
9811
9812
9813/*
9814 *    Function: sd_create_errstats
9815 *
9816 * Description: This routine instantiates the device error stats.
9817 *
9818 *		Note: During attach the stats are instantiated first so they are
9819 *		available for attach-time routines that utilize the driver
9820 *		iopath to send commands to the device. The stats are initialized
9821 *		separately so data obtained during some attach-time routines is
9822 *		available. (4362483)
9823 *
9824 *   Arguments: un - driver soft state (unit) structure
9825 *		instance - driver instance
9826 *
9827 *     Context: Kernel thread context
9828 */
9829
9830static void
9831sd_create_errstats(struct sd_lun *un, int instance)
9832{
9833	struct	sd_errstats	*stp;
9834	char	kstatmodule_err[KSTAT_STRLEN];
9835	char	kstatname[KSTAT_STRLEN];
9836	int	ndata = (sizeof (struct sd_errstats) / sizeof (kstat_named_t));
9837
9838	ASSERT(un != NULL);
9839
9840	if (un->un_errstats != NULL) {
9841		return;
9842	}
9843
9844	(void) snprintf(kstatmodule_err, sizeof (kstatmodule_err),
9845	    "%serr", sd_label);
9846	(void) snprintf(kstatname, sizeof (kstatname),
9847	    "%s%d,err", sd_label, instance);
9848
9849	un->un_errstats = kstat_create(kstatmodule_err, instance, kstatname,
9850	    "device_error", KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT);
9851
9852	if (un->un_errstats == NULL) {
9853		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9854		    "sd_create_errstats: Failed kstat_create\n");
9855		return;
9856	}
9857
9858	stp = (struct sd_errstats *)un->un_errstats->ks_data;
9859	kstat_named_init(&stp->sd_softerrs,	"Soft Errors",
9860	    KSTAT_DATA_UINT32);
9861	kstat_named_init(&stp->sd_harderrs,	"Hard Errors",
9862	    KSTAT_DATA_UINT32);
9863	kstat_named_init(&stp->sd_transerrs,	"Transport Errors",
9864	    KSTAT_DATA_UINT32);
9865	kstat_named_init(&stp->sd_vid,		"Vendor",
9866	    KSTAT_DATA_CHAR);
9867	kstat_named_init(&stp->sd_pid,		"Product",
9868	    KSTAT_DATA_CHAR);
9869	kstat_named_init(&stp->sd_revision,	"Revision",
9870	    KSTAT_DATA_CHAR);
9871	kstat_named_init(&stp->sd_serial,	"Serial No",
9872	    KSTAT_DATA_CHAR);
9873	kstat_named_init(&stp->sd_capacity,	"Size",
9874	    KSTAT_DATA_ULONGLONG);
9875	kstat_named_init(&stp->sd_rq_media_err,	"Media Error",
9876	    KSTAT_DATA_UINT32);
9877	kstat_named_init(&stp->sd_rq_ntrdy_err,	"Device Not Ready",
9878	    KSTAT_DATA_UINT32);
9879	kstat_named_init(&stp->sd_rq_nodev_err,	"No Device",
9880	    KSTAT_DATA_UINT32);
9881	kstat_named_init(&stp->sd_rq_recov_err,	"Recoverable",
9882	    KSTAT_DATA_UINT32);
9883	kstat_named_init(&stp->sd_rq_illrq_err,	"Illegal Request",
9884	    KSTAT_DATA_UINT32);
9885	kstat_named_init(&stp->sd_rq_pfa_err,	"Predictive Failure Analysis",
9886	    KSTAT_DATA_UINT32);
9887
9888	un->un_errstats->ks_private = un;
9889	un->un_errstats->ks_update  = nulldev;
9890
9891	kstat_install(un->un_errstats);
9892}
9893
9894
9895/*
9896 *    Function: sd_set_errstats
9897 *
9898 * Description: This routine sets the value of the vendor id, product id,
9899 *		revision, serial number, and capacity device error stats.
9900 *
9901 *		Note: During attach the stats are instantiated first so they are
9902 *		available for attach-time routines that utilize the driver
9903 *		iopath to send commands to the device. The stats are initialized
9904 *		separately so data obtained during some attach-time routines is
9905 *		available. (4362483)
9906 *
9907 *   Arguments: un - driver soft state (unit) structure
9908 *
9909 *     Context: Kernel thread context
9910 */
9911
9912static void
9913sd_set_errstats(struct sd_lun *un)
9914{
9915	struct	sd_errstats	*stp;
9916
9917	ASSERT(un != NULL);
9918	ASSERT(un->un_errstats != NULL);
9919	stp = (struct sd_errstats *)un->un_errstats->ks_data;
9920	ASSERT(stp != NULL);
9921	(void) strncpy(stp->sd_vid.value.c, un->un_sd->sd_inq->inq_vid, 8);
9922	(void) strncpy(stp->sd_pid.value.c, un->un_sd->sd_inq->inq_pid, 16);
9923	(void) strncpy(stp->sd_revision.value.c,
9924	    un->un_sd->sd_inq->inq_revision, 4);
9925
9926	/*
9927	 * All the errstats are persistent across detach/attach,
9928	 * so reset all the errstats here in case of the hot
9929	 * replacement of disk drives, except for not changed
9930	 * Sun qualified drives.
9931	 */
9932	if ((bcmp(&SD_INQUIRY(un)->inq_pid[9], "SUN", 3) != 0) ||
9933	    (bcmp(&SD_INQUIRY(un)->inq_serial, stp->sd_serial.value.c,
9934	    sizeof (SD_INQUIRY(un)->inq_serial)) != 0)) {
9935		stp->sd_softerrs.value.ui32 = 0;
9936		stp->sd_harderrs.value.ui32 = 0;
9937		stp->sd_transerrs.value.ui32 = 0;
9938		stp->sd_rq_media_err.value.ui32 = 0;
9939		stp->sd_rq_ntrdy_err.value.ui32 = 0;
9940		stp->sd_rq_nodev_err.value.ui32 = 0;
9941		stp->sd_rq_recov_err.value.ui32 = 0;
9942		stp->sd_rq_illrq_err.value.ui32 = 0;
9943		stp->sd_rq_pfa_err.value.ui32 = 0;
9944	}
9945
9946	/*
9947	 * Set the "Serial No" kstat for Sun qualified drives (indicated by
9948	 * "SUN" in bytes 25-27 of the inquiry data (bytes 9-11 of the pid)
9949	 * (4376302))
9950	 */
9951	if (bcmp(&SD_INQUIRY(un)->inq_pid[9], "SUN", 3) == 0) {
9952		bcopy(&SD_INQUIRY(un)->inq_serial, stp->sd_serial.value.c,
9953		    sizeof (SD_INQUIRY(un)->inq_serial));
9954	}
9955
9956	if (un->un_f_blockcount_is_valid != TRUE) {
9957		/*
9958		 * Set capacity error stat to 0 for no media. This ensures
9959		 * a valid capacity is displayed in response to 'iostat -E'
9960		 * when no media is present in the device.
9961		 */
9962		stp->sd_capacity.value.ui64 = 0;
9963	} else {
9964		/*
9965		 * Multiply un_blockcount by un->un_sys_blocksize to get
9966		 * capacity.
9967		 *
9968		 * Note: for non-512 blocksize devices "un_blockcount" has been
9969		 * "scaled" in sd_send_scsi_READ_CAPACITY by multiplying by
9970		 * (un_tgt_blocksize / un->un_sys_blocksize).
9971		 */
9972		stp->sd_capacity.value.ui64 = (uint64_t)
9973		    ((uint64_t)un->un_blockcount * un->un_sys_blocksize);
9974	}
9975}
9976
9977
9978/*
9979 *    Function: sd_set_pstats
9980 *
9981 * Description: This routine instantiates and initializes the partition
9982 *              stats for each partition with more than zero blocks.
9983 *		(4363169)
9984 *
9985 *   Arguments: un - driver soft state (unit) structure
9986 *
9987 *     Context: Kernel thread context
9988 */
9989
9990static void
9991sd_set_pstats(struct sd_lun *un)
9992{
9993	char	kstatname[KSTAT_STRLEN];
9994	int	instance;
9995	int	i;
9996
9997	ASSERT(un != NULL);
9998
9999	instance = ddi_get_instance(SD_DEVINFO(un));
10000
10001	/* Note:x86: is this a VTOC8/VTOC16 difference? */
10002	for (i = 0; i < NSDMAP; i++) {
10003		if ((un->un_pstats[i] == NULL) &&
10004		    (un->un_map[i].dkl_nblk != 0)) {
10005			(void) snprintf(kstatname, sizeof (kstatname),
10006			    "%s%d,%s", sd_label, instance,
10007			    sd_minor_data[i].name);
10008			un->un_pstats[i] = kstat_create(sd_label,
10009			    instance, kstatname, "partition", KSTAT_TYPE_IO,
10010			    1, KSTAT_FLAG_PERSISTENT);
10011			if (un->un_pstats[i] != NULL) {
10012				un->un_pstats[i]->ks_lock = SD_MUTEX(un);
10013				kstat_install(un->un_pstats[i]);
10014			}
10015		}
10016	}
10017}
10018
10019
10020#if (defined(__fibre))
10021/*
10022 *    Function: sd_init_event_callbacks
10023 *
10024 * Description: This routine initializes the insertion and removal event
10025 *		callbacks. (fibre only)
10026 *
10027 *   Arguments: un - driver soft state (unit) structure
10028 *
10029 *     Context: Kernel thread context
10030 */
10031
10032static void
10033sd_init_event_callbacks(struct sd_lun *un)
10034{
10035	ASSERT(un != NULL);
10036
10037	if ((un->un_insert_event == NULL) &&
10038	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_INSERT_EVENT,
10039	    &un->un_insert_event) == DDI_SUCCESS)) {
10040		/*
10041		 * Add the callback for an insertion event
10042		 */
10043		(void) ddi_add_event_handler(SD_DEVINFO(un),
10044		    un->un_insert_event, sd_event_callback, (void *)un,
10045		    &(un->un_insert_cb_id));
10046	}
10047
10048	if ((un->un_remove_event == NULL) &&
10049	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_REMOVE_EVENT,
10050	    &un->un_remove_event) == DDI_SUCCESS)) {
10051		/*
10052		 * Add the callback for a removal event
10053		 */
10054		(void) ddi_add_event_handler(SD_DEVINFO(un),
10055		    un->un_remove_event, sd_event_callback, (void *)un,
10056		    &(un->un_remove_cb_id));
10057	}
10058}
10059
10060
10061/*
10062 *    Function: sd_event_callback
10063 *
10064 * Description: This routine handles insert/remove events (photon). The
10065 *		state is changed to OFFLINE which can be used to supress
10066 *		error msgs. (fibre only)
10067 *
10068 *   Arguments: un - driver soft state (unit) structure
10069 *
10070 *     Context: Callout thread context
10071 */
10072/* ARGSUSED */
10073static void
10074sd_event_callback(dev_info_t *dip, ddi_eventcookie_t event, void *arg,
10075    void *bus_impldata)
10076{
10077	struct sd_lun *un = (struct sd_lun *)arg;
10078
10079	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_insert_event));
10080	if (event == un->un_insert_event) {
10081		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: insert event");
10082		mutex_enter(SD_MUTEX(un));
10083		if (un->un_state == SD_STATE_OFFLINE) {
10084			if (un->un_last_state != SD_STATE_SUSPENDED) {
10085				un->un_state = un->un_last_state;
10086			} else {
10087				/*
10088				 * We have gone through SUSPEND/RESUME while
10089				 * we were offline. Restore the last state
10090				 */
10091				un->un_state = un->un_save_state;
10092			}
10093		}
10094		mutex_exit(SD_MUTEX(un));
10095
10096	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_remove_event));
10097	} else if (event == un->un_remove_event) {
10098		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: remove event");
10099		mutex_enter(SD_MUTEX(un));
10100		/*
10101		 * We need to handle an event callback that occurs during
10102		 * the suspend operation, since we don't prevent it.
10103		 */
10104		if (un->un_state != SD_STATE_OFFLINE) {
10105			if (un->un_state != SD_STATE_SUSPENDED) {
10106				New_state(un, SD_STATE_OFFLINE);
10107			} else {
10108				un->un_last_state = SD_STATE_OFFLINE;
10109			}
10110		}
10111		mutex_exit(SD_MUTEX(un));
10112	} else {
10113		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
10114		    "!Unknown event\n");
10115	}
10116
10117}
10118#endif
10119
10120/*
10121 *    Function: sd_cache_control()
10122 *
10123 * Description: This routine is the driver entry point for setting
10124 *		read and write caching by modifying the WCE (write cache
10125 *		enable) and RCD (read cache disable) bits of mode
10126 *		page 8 (MODEPAGE_CACHING).
10127 *
10128 *   Arguments: un - driver soft state (unit) structure
10129 *		rcd_flag - flag for controlling the read cache
10130 *		wce_flag - flag for controlling the write cache
10131 *
10132 * Return Code: EIO
10133 *		code returned by sd_send_scsi_MODE_SENSE and
10134 *		sd_send_scsi_MODE_SELECT
10135 *
10136 *     Context: Kernel Thread
10137 */
10138
10139static int
10140sd_cache_control(struct sd_lun *un, int rcd_flag, int wce_flag)
10141{
10142	struct mode_caching	*mode_caching_page;
10143	uchar_t			*header;
10144	size_t			buflen;
10145	int			hdrlen;
10146	int			bd_len;
10147	int			rval = 0;
10148	struct mode_header_grp2	*mhp;
10149
10150	ASSERT(un != NULL);
10151
10152	/*
10153	 * Do a test unit ready, otherwise a mode sense may not work if this
10154	 * is the first command sent to the device after boot.
10155	 */
10156	(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
10157
10158	if (un->un_f_cfg_is_atapi == TRUE) {
10159		hdrlen = MODE_HEADER_LENGTH_GRP2;
10160	} else {
10161		hdrlen = MODE_HEADER_LENGTH;
10162	}
10163
10164	/*
10165	 * Allocate memory for the retrieved mode page and its headers.  Set
10166	 * a pointer to the page itself.  Use mode_cache_scsi3 to insure
10167	 * we get all of the mode sense data otherwise, the mode select
10168	 * will fail.  mode_cache_scsi3 is a superset of mode_caching.
10169	 */
10170	buflen = hdrlen + MODE_BLK_DESC_LENGTH +
10171		sizeof (struct mode_cache_scsi3);
10172
10173	header = kmem_zalloc(buflen, KM_SLEEP);
10174
10175	/* Get the information from the device. */
10176	if (un->un_f_cfg_is_atapi == TRUE) {
10177		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, header, buflen,
10178		    MODEPAGE_CACHING, SD_PATH_DIRECT);
10179	} else {
10180		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
10181		    MODEPAGE_CACHING, SD_PATH_DIRECT);
10182	}
10183	if (rval != 0) {
10184		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
10185		    "sd_cache_control: Mode Sense Failed\n");
10186		kmem_free(header, buflen);
10187		return (rval);
10188	}
10189
10190	/*
10191	 * Determine size of Block Descriptors in order to locate
10192	 * the mode page data. ATAPI devices return 0, SCSI devices
10193	 * should return MODE_BLK_DESC_LENGTH.
10194	 */
10195	if (un->un_f_cfg_is_atapi == TRUE) {
10196		mhp	= (struct mode_header_grp2 *)header;
10197		bd_len  = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
10198	} else {
10199		bd_len  = ((struct mode_header *)header)->bdesc_length;
10200	}
10201
10202	if (bd_len > MODE_BLK_DESC_LENGTH) {
10203		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
10204		    "sd_cache_control: Mode Sense returned invalid "
10205		    "block descriptor length\n");
10206		kmem_free(header, buflen);
10207		return (EIO);
10208	}
10209
10210	mode_caching_page = (struct mode_caching *)(header + hdrlen + bd_len);
10211	if (mode_caching_page->mode_page.code != MODEPAGE_CACHING) {
10212		SD_ERROR(SD_LOG_COMMON, un, "sd_cache_control: Mode Sense"
10213		    " caching page code mismatch %d\n",
10214		    mode_caching_page->mode_page.code);
10215		kmem_free(header, buflen);
10216		return (EIO);
10217	}
10218
10219	/* Check the relevant bits on successful mode sense. */
10220	if ((mode_caching_page->rcd && rcd_flag == SD_CACHE_ENABLE) ||
10221	    (!mode_caching_page->rcd && rcd_flag == SD_CACHE_DISABLE) ||
10222	    (mode_caching_page->wce && wce_flag == SD_CACHE_DISABLE) ||
10223	    (!mode_caching_page->wce && wce_flag == SD_CACHE_ENABLE)) {
10224
10225		size_t sbuflen;
10226		uchar_t save_pg;
10227
10228		/*
10229		 * Construct select buffer length based on the
10230		 * length of the sense data returned.
10231		 */
10232		sbuflen =  hdrlen + MODE_BLK_DESC_LENGTH +
10233				sizeof (struct mode_page) +
10234				(int)mode_caching_page->mode_page.length;
10235
10236		/*
10237		 * Set the caching bits as requested.
10238		 */
10239		if (rcd_flag == SD_CACHE_ENABLE)
10240			mode_caching_page->rcd = 0;
10241		else if (rcd_flag == SD_CACHE_DISABLE)
10242			mode_caching_page->rcd = 1;
10243
10244		if (wce_flag == SD_CACHE_ENABLE)
10245			mode_caching_page->wce = 1;
10246		else if (wce_flag == SD_CACHE_DISABLE)
10247			mode_caching_page->wce = 0;
10248
10249		/*
10250		 * Save the page if the mode sense says the
10251		 * drive supports it.
10252		 */
10253		save_pg = mode_caching_page->mode_page.ps ?
10254				SD_SAVE_PAGE : SD_DONTSAVE_PAGE;
10255
10256		/* Clear reserved bits before mode select. */
10257		mode_caching_page->mode_page.ps = 0;
10258
10259		/*
10260		 * Clear out mode header for mode select.
10261		 * The rest of the retrieved page will be reused.
10262		 */
10263		bzero(header, hdrlen);
10264
10265		if (un->un_f_cfg_is_atapi == TRUE) {
10266			mhp = (struct mode_header_grp2 *)header;
10267			mhp->bdesc_length_hi = bd_len >> 8;
10268			mhp->bdesc_length_lo = (uchar_t)bd_len & 0xff;
10269		} else {
10270			((struct mode_header *)header)->bdesc_length = bd_len;
10271		}
10272
10273		/* Issue mode select to change the cache settings */
10274		if (un->un_f_cfg_is_atapi == TRUE) {
10275			rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP1, header,
10276			    sbuflen, save_pg, SD_PATH_DIRECT);
10277		} else {
10278			rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, header,
10279			    sbuflen, save_pg, SD_PATH_DIRECT);
10280		}
10281	}
10282
10283	kmem_free(header, buflen);
10284	return (rval);
10285}
10286
10287
10288/*
10289 *    Function: sd_get_write_cache_enabled()
10290 *
10291 * Description: This routine is the driver entry point for determining if
10292 *		write caching is enabled.  It examines the WCE (write cache
10293 *		enable) bits of mode page 8 (MODEPAGE_CACHING).
10294 *
10295 *   Arguments: un - driver soft state (unit) structure
10296 *   		is_enabled - pointer to int where write cache enabled state
10297 *   			is returned (non-zero -> write cache enabled)
10298 *
10299 *
10300 * Return Code: EIO
10301 *		code returned by sd_send_scsi_MODE_SENSE
10302 *
10303 *     Context: Kernel Thread
10304 *
10305 * NOTE: If ioctl is added to disable write cache, this sequence should
10306 * be followed so that no locking is required for accesses to
10307 * un->un_f_write_cache_enabled:
10308 * 	do mode select to clear wce
10309 * 	do synchronize cache to flush cache
10310 * 	set un->un_f_write_cache_enabled = FALSE
10311 *
10312 * Conversely, an ioctl to enable the write cache should be done
10313 * in this order:
10314 * 	set un->un_f_write_cache_enabled = TRUE
10315 * 	do mode select to set wce
10316 */
10317
10318static int
10319sd_get_write_cache_enabled(struct sd_lun *un, int *is_enabled)
10320{
10321	struct mode_caching	*mode_caching_page;
10322	uchar_t			*header;
10323	size_t			buflen;
10324	int			hdrlen;
10325	int			bd_len;
10326	int			rval = 0;
10327
10328	ASSERT(un != NULL);
10329	ASSERT(is_enabled != NULL);
10330
10331	/* in case of error, flag as enabled */
10332	*is_enabled = TRUE;
10333
10334	/*
10335	 * Do a test unit ready, otherwise a mode sense may not work if this
10336	 * is the first command sent to the device after boot.
10337	 */
10338	(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
10339
10340	if (un->un_f_cfg_is_atapi == TRUE) {
10341		hdrlen = MODE_HEADER_LENGTH_GRP2;
10342	} else {
10343		hdrlen = MODE_HEADER_LENGTH;
10344	}
10345
10346	/*
10347	 * Allocate memory for the retrieved mode page and its headers.  Set
10348	 * a pointer to the page itself.
10349	 */
10350	buflen = hdrlen + MODE_BLK_DESC_LENGTH + sizeof (struct mode_caching);
10351	header = kmem_zalloc(buflen, KM_SLEEP);
10352
10353	/* Get the information from the device. */
10354	if (un->un_f_cfg_is_atapi == TRUE) {
10355		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, header, buflen,
10356		    MODEPAGE_CACHING, SD_PATH_DIRECT);
10357	} else {
10358		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
10359		    MODEPAGE_CACHING, SD_PATH_DIRECT);
10360	}
10361	if (rval != 0) {
10362		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
10363		    "sd_get_write_cache_enabled: Mode Sense Failed\n");
10364		kmem_free(header, buflen);
10365		return (rval);
10366	}
10367
10368	/*
10369	 * Determine size of Block Descriptors in order to locate
10370	 * the mode page data. ATAPI devices return 0, SCSI devices
10371	 * should return MODE_BLK_DESC_LENGTH.
10372	 */
10373	if (un->un_f_cfg_is_atapi == TRUE) {
10374		struct mode_header_grp2	*mhp;
10375		mhp	= (struct mode_header_grp2 *)header;
10376		bd_len  = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
10377	} else {
10378		bd_len  = ((struct mode_header *)header)->bdesc_length;
10379	}
10380
10381	if (bd_len > MODE_BLK_DESC_LENGTH) {
10382		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
10383		    "sd_get_write_cache_enabled: Mode Sense returned invalid "
10384		    "block descriptor length\n");
10385		kmem_free(header, buflen);
10386		return (EIO);
10387	}
10388
10389	mode_caching_page = (struct mode_caching *)(header + hdrlen + bd_len);
10390	if (mode_caching_page->mode_page.code != MODEPAGE_CACHING) {
10391		SD_ERROR(SD_LOG_COMMON, un, "sd_cache_control: Mode Sense"
10392		    " caching page code mismatch %d\n",
10393		    mode_caching_page->mode_page.code);
10394		kmem_free(header, buflen);
10395		return (EIO);
10396	}
10397	*is_enabled = mode_caching_page->wce;
10398
10399	kmem_free(header, buflen);
10400	return (0);
10401}
10402
10403
10404/*
10405 *    Function: sd_make_device
10406 *
10407 * Description: Utility routine to return the Solaris device number from
10408 *		the data in the device's dev_info structure.
10409 *
10410 * Return Code: The Solaris device number
10411 *
10412 *     Context: Any
10413 */
10414
10415static dev_t
10416sd_make_device(dev_info_t *devi)
10417{
10418	return (makedevice(ddi_name_to_major(ddi_get_name(devi)),
10419	    ddi_get_instance(devi) << SDUNIT_SHIFT));
10420}
10421
10422
10423/*
10424 *    Function: sd_pm_entry
10425 *
10426 * Description: Called at the start of a new command to manage power
10427 *		and busy status of a device. This includes determining whether
10428 *		the current power state of the device is sufficient for
10429 *		performing the command or whether it must be changed.
10430 *		The PM framework is notified appropriately.
10431 *		Only with a return status of DDI_SUCCESS will the
10432 *		component be busy to the framework.
10433 *
10434 *		All callers of sd_pm_entry must check the return status
10435 *		and only call sd_pm_exit it it was DDI_SUCCESS. A status
10436 *		of DDI_FAILURE indicates the device failed to power up.
10437 *		In this case un_pm_count has been adjusted so the result
10438 *		on exit is still powered down, ie. count is less than 0.
10439 *		Calling sd_pm_exit with this count value hits an ASSERT.
10440 *
10441 * Return Code: DDI_SUCCESS or DDI_FAILURE
10442 *
10443 *     Context: Kernel thread context.
10444 */
10445
10446static int
10447sd_pm_entry(struct sd_lun *un)
10448{
10449	int return_status = DDI_SUCCESS;
10450
10451	ASSERT(!mutex_owned(SD_MUTEX(un)));
10452	ASSERT(!mutex_owned(&un->un_pm_mutex));
10453
10454	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: entry\n");
10455
10456	if (un->un_f_pm_is_enabled == FALSE) {
10457		SD_TRACE(SD_LOG_IO_PM, un,
10458		    "sd_pm_entry: exiting, PM not enabled\n");
10459		return (return_status);
10460	}
10461
10462	/*
10463	 * Just increment a counter if PM is enabled. On the transition from
10464	 * 0 ==> 1, mark the device as busy.  The iodone side will decrement
10465	 * the count with each IO and mark the device as idle when the count
10466	 * hits 0.
10467	 *
10468	 * If the count is less than 0 the device is powered down. If a powered
10469	 * down device is successfully powered up then the count must be
10470	 * incremented to reflect the power up. Note that it'll get incremented
10471	 * a second time to become busy.
10472	 *
10473	 * Because the following has the potential to change the device state
10474	 * and must release the un_pm_mutex to do so, only one thread can be
10475	 * allowed through at a time.
10476	 */
10477
10478	mutex_enter(&un->un_pm_mutex);
10479	while (un->un_pm_busy == TRUE) {
10480		cv_wait(&un->un_pm_busy_cv, &un->un_pm_mutex);
10481	}
10482	un->un_pm_busy = TRUE;
10483
10484	if (un->un_pm_count < 1) {
10485
10486		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: busy component\n");
10487
10488		/*
10489		 * Indicate we are now busy so the framework won't attempt to
10490		 * power down the device. This call will only fail if either
10491		 * we passed a bad component number or the device has no
10492		 * components. Neither of these should ever happen.
10493		 */
10494		mutex_exit(&un->un_pm_mutex);
10495		return_status = pm_busy_component(SD_DEVINFO(un), 0);
10496		ASSERT(return_status == DDI_SUCCESS);
10497
10498		mutex_enter(&un->un_pm_mutex);
10499
10500		if (un->un_pm_count < 0) {
10501			mutex_exit(&un->un_pm_mutex);
10502
10503			SD_TRACE(SD_LOG_IO_PM, un,
10504			    "sd_pm_entry: power up component\n");
10505
10506			/*
10507			 * pm_raise_power will cause sdpower to be called
10508			 * which brings the device power level to the
10509			 * desired state, ON in this case. If successful,
10510			 * un_pm_count and un_power_level will be updated
10511			 * appropriately.
10512			 */
10513			return_status = pm_raise_power(SD_DEVINFO(un), 0,
10514			    SD_SPINDLE_ON);
10515
10516			mutex_enter(&un->un_pm_mutex);
10517
10518			if (return_status != DDI_SUCCESS) {
10519				/*
10520				 * Power up failed.
10521				 * Idle the device and adjust the count
10522				 * so the result on exit is that we're
10523				 * still powered down, ie. count is less than 0.
10524				 */
10525				SD_TRACE(SD_LOG_IO_PM, un,
10526				    "sd_pm_entry: power up failed,"
10527				    " idle the component\n");
10528
10529				(void) pm_idle_component(SD_DEVINFO(un), 0);
10530				un->un_pm_count--;
10531			} else {
10532				/*
10533				 * Device is powered up, verify the
10534				 * count is non-negative.
10535				 * This is debug only.
10536				 */
10537				ASSERT(un->un_pm_count == 0);
10538			}
10539		}
10540
10541		if (return_status == DDI_SUCCESS) {
10542			/*
10543			 * For performance, now that the device has been tagged
10544			 * as busy, and it's known to be powered up, update the
10545			 * chain types to use jump tables that do not include
10546			 * pm. This significantly lowers the overhead and
10547			 * therefore improves performance.
10548			 */
10549
10550			mutex_exit(&un->un_pm_mutex);
10551			mutex_enter(SD_MUTEX(un));
10552			SD_TRACE(SD_LOG_IO_PM, un,
10553			    "sd_pm_entry: changing uscsi_chain_type from %d\n",
10554			    un->un_uscsi_chain_type);
10555
10556			if (un->un_f_non_devbsize_supported) {
10557				un->un_buf_chain_type =
10558				    SD_CHAIN_INFO_RMMEDIA_NO_PM;
10559			} else {
10560				un->un_buf_chain_type =
10561				    SD_CHAIN_INFO_DISK_NO_PM;
10562			}
10563			un->un_uscsi_chain_type = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
10564
10565			SD_TRACE(SD_LOG_IO_PM, un,
10566			    "             changed  uscsi_chain_type to   %d\n",
10567			    un->un_uscsi_chain_type);
10568			mutex_exit(SD_MUTEX(un));
10569			mutex_enter(&un->un_pm_mutex);
10570
10571			if (un->un_pm_idle_timeid == NULL) {
10572				/* 300 ms. */
10573				un->un_pm_idle_timeid =
10574				    timeout(sd_pm_idletimeout_handler, un,
10575				    (drv_usectohz((clock_t)300000)));
10576				/*
10577				 * Include an extra call to busy which keeps the
10578				 * device busy with-respect-to the PM layer
10579				 * until the timer fires, at which time it'll
10580				 * get the extra idle call.
10581				 */
10582				(void) pm_busy_component(SD_DEVINFO(un), 0);
10583			}
10584		}
10585	}
10586	un->un_pm_busy = FALSE;
10587	/* Next... */
10588	cv_signal(&un->un_pm_busy_cv);
10589
10590	un->un_pm_count++;
10591
10592	SD_TRACE(SD_LOG_IO_PM, un,
10593	    "sd_pm_entry: exiting, un_pm_count = %d\n", un->un_pm_count);
10594
10595	mutex_exit(&un->un_pm_mutex);
10596
10597	return (return_status);
10598}
10599
10600
10601/*
10602 *    Function: sd_pm_exit
10603 *
10604 * Description: Called at the completion of a command to manage busy
10605 *		status for the device. If the device becomes idle the
10606 *		PM framework is notified.
10607 *
10608 *     Context: Kernel thread context
10609 */
10610
10611static void
10612sd_pm_exit(struct sd_lun *un)
10613{
10614	ASSERT(!mutex_owned(SD_MUTEX(un)));
10615	ASSERT(!mutex_owned(&un->un_pm_mutex));
10616
10617	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: entry\n");
10618
10619	/*
10620	 * After attach the following flag is only read, so don't
10621	 * take the penalty of acquiring a mutex for it.
10622	 */
10623	if (un->un_f_pm_is_enabled == TRUE) {
10624
10625		mutex_enter(&un->un_pm_mutex);
10626		un->un_pm_count--;
10627
10628		SD_TRACE(SD_LOG_IO_PM, un,
10629		    "sd_pm_exit: un_pm_count = %d\n", un->un_pm_count);
10630
10631		ASSERT(un->un_pm_count >= 0);
10632		if (un->un_pm_count == 0) {
10633			mutex_exit(&un->un_pm_mutex);
10634
10635			SD_TRACE(SD_LOG_IO_PM, un,
10636			    "sd_pm_exit: idle component\n");
10637
10638			(void) pm_idle_component(SD_DEVINFO(un), 0);
10639
10640		} else {
10641			mutex_exit(&un->un_pm_mutex);
10642		}
10643	}
10644
10645	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: exiting\n");
10646}
10647
10648
10649/*
10650 *    Function: sdopen
10651 *
10652 * Description: Driver's open(9e) entry point function.
10653 *
10654 *   Arguments: dev_i   - pointer to device number
10655 *		flag    - how to open file (FEXCL, FNDELAY, FREAD, FWRITE)
10656 *		otyp    - open type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
10657 *		cred_p  - user credential pointer
10658 *
10659 * Return Code: EINVAL
10660 *		ENXIO
10661 *		EIO
10662 *		EROFS
10663 *		EBUSY
10664 *
10665 *     Context: Kernel thread context
10666 */
10667/* ARGSUSED */
10668static int
10669sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p)
10670{
10671	struct sd_lun	*un;
10672	int		nodelay;
10673	int		part;
10674	uint64_t	partmask;
10675	int		instance;
10676	dev_t		dev;
10677	int		rval = EIO;
10678
10679	/* Validate the open type */
10680	if (otyp >= OTYPCNT) {
10681		return (EINVAL);
10682	}
10683
10684	dev = *dev_p;
10685	instance = SDUNIT(dev);
10686	mutex_enter(&sd_detach_mutex);
10687
10688	/*
10689	 * Fail the open if there is no softstate for the instance, or
10690	 * if another thread somewhere is trying to detach the instance.
10691	 */
10692	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
10693	    (un->un_detach_count != 0)) {
10694		mutex_exit(&sd_detach_mutex);
10695		/*
10696		 * The probe cache only needs to be cleared when open (9e) fails
10697		 * with ENXIO (4238046).
10698		 */
10699		/*
10700		 * un-conditionally clearing probe cache is ok with
10701		 * separate sd/ssd binaries
10702		 * x86 platform can be an issue with both parallel
10703		 * and fibre in 1 binary
10704		 */
10705		sd_scsi_clear_probe_cache();
10706		return (ENXIO);
10707	}
10708
10709	/*
10710	 * The un_layer_count is to prevent another thread in specfs from
10711	 * trying to detach the instance, which can happen when we are
10712	 * called from a higher-layer driver instead of thru specfs.
10713	 * This will not be needed when DDI provides a layered driver
10714	 * interface that allows specfs to know that an instance is in
10715	 * use by a layered driver & should not be detached.
10716	 *
10717	 * Note: the semantics for layered driver opens are exactly one
10718	 * close for every open.
10719	 */
10720	if (otyp == OTYP_LYR) {
10721		un->un_layer_count++;
10722	}
10723
10724	/*
10725	 * Keep a count of the current # of opens in progress. This is because
10726	 * some layered drivers try to call us as a regular open. This can
10727	 * cause problems that we cannot prevent, however by keeping this count
10728	 * we can at least keep our open and detach routines from racing against
10729	 * each other under such conditions.
10730	 */
10731	un->un_opens_in_progress++;
10732	mutex_exit(&sd_detach_mutex);
10733
10734	nodelay  = (flag & (FNDELAY | FNONBLOCK));
10735	part	 = SDPART(dev);
10736	partmask = 1 << part;
10737
10738	/*
10739	 * We use a semaphore here in order to serialize
10740	 * open and close requests on the device.
10741	 */
10742	sema_p(&un->un_semoclose);
10743
10744	mutex_enter(SD_MUTEX(un));
10745
10746	/*
10747	 * All device accesses go thru sdstrategy() where we check
10748	 * on suspend status but there could be a scsi_poll command,
10749	 * which bypasses sdstrategy(), so we need to check pm
10750	 * status.
10751	 */
10752
10753	if (!nodelay) {
10754		while ((un->un_state == SD_STATE_SUSPENDED) ||
10755		    (un->un_state == SD_STATE_PM_CHANGING)) {
10756			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10757		}
10758
10759		mutex_exit(SD_MUTEX(un));
10760		if (sd_pm_entry(un) != DDI_SUCCESS) {
10761			rval = EIO;
10762			SD_ERROR(SD_LOG_OPEN_CLOSE, un,
10763			    "sdopen: sd_pm_entry failed\n");
10764			goto open_failed_with_pm;
10765		}
10766		mutex_enter(SD_MUTEX(un));
10767	}
10768
10769	/* check for previous exclusive open */
10770	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: un=%p\n", (void *)un);
10771	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
10772	    "sdopen: exclopen=%x, flag=%x, regopen=%x\n",
10773	    un->un_exclopen, flag, un->un_ocmap.regopen[otyp]);
10774
10775	if (un->un_exclopen & (partmask)) {
10776		goto excl_open_fail;
10777	}
10778
10779	if (flag & FEXCL) {
10780		int i;
10781		if (un->un_ocmap.lyropen[part]) {
10782			goto excl_open_fail;
10783		}
10784		for (i = 0; i < (OTYPCNT - 1); i++) {
10785			if (un->un_ocmap.regopen[i] & (partmask)) {
10786				goto excl_open_fail;
10787			}
10788		}
10789	}
10790
10791	/*
10792	 * Check the write permission if this is a removable media device,
10793	 * NDELAY has not been set, and writable permission is requested.
10794	 *
10795	 * Note: If NDELAY was set and this is write-protected media the WRITE
10796	 * attempt will fail with EIO as part of the I/O processing. This is a
10797	 * more permissive implementation that allows the open to succeed and
10798	 * WRITE attempts to fail when appropriate.
10799	 */
10800	if (un->un_f_chk_wp_open) {
10801		if ((flag & FWRITE) && (!nodelay)) {
10802			mutex_exit(SD_MUTEX(un));
10803			/*
10804			 * Defer the check for write permission on writable
10805			 * DVD drive till sdstrategy and will not fail open even
10806			 * if FWRITE is set as the device can be writable
10807			 * depending upon the media and the media can change
10808			 * after the call to open().
10809			 */
10810			if (un->un_f_dvdram_writable_device == FALSE) {
10811				if (ISCD(un) || sr_check_wp(dev)) {
10812				rval = EROFS;
10813				mutex_enter(SD_MUTEX(un));
10814				SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10815				    "write to cd or write protected media\n");
10816				goto open_fail;
10817				}
10818			}
10819			mutex_enter(SD_MUTEX(un));
10820		}
10821	}
10822
10823	/*
10824	 * If opening in NDELAY/NONBLOCK mode, just return.
10825	 * Check if disk is ready and has a valid geometry later.
10826	 */
10827	if (!nodelay) {
10828		mutex_exit(SD_MUTEX(un));
10829		rval = sd_ready_and_valid(un);
10830		mutex_enter(SD_MUTEX(un));
10831		/*
10832		 * Fail if device is not ready or if the number of disk
10833		 * blocks is zero or negative for non CD devices.
10834		 */
10835		if ((rval != SD_READY_VALID) ||
10836		    (!ISCD(un) && un->un_map[part].dkl_nblk <= 0)) {
10837			rval = un->un_f_has_removable_media ? ENXIO : EIO;
10838			SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10839			    "device not ready or invalid disk block value\n");
10840			goto open_fail;
10841		}
10842#if defined(__i386) || defined(__amd64)
10843	} else {
10844		uchar_t *cp;
10845		/*
10846		 * x86 requires special nodelay handling, so that p0 is
10847		 * always defined and accessible.
10848		 * Invalidate geometry only if device is not already open.
10849		 */
10850		cp = &un->un_ocmap.chkd[0];
10851		while (cp < &un->un_ocmap.chkd[OCSIZE]) {
10852			if (*cp != (uchar_t)0) {
10853			    break;
10854			}
10855			cp++;
10856		}
10857		if (cp == &un->un_ocmap.chkd[OCSIZE]) {
10858			un->un_f_geometry_is_valid = FALSE;
10859		}
10860
10861#endif
10862	}
10863
10864	if (otyp == OTYP_LYR) {
10865		un->un_ocmap.lyropen[part]++;
10866	} else {
10867		un->un_ocmap.regopen[otyp] |= partmask;
10868	}
10869
10870	/* Set up open and exclusive open flags */
10871	if (flag & FEXCL) {
10872		un->un_exclopen |= (partmask);
10873	}
10874
10875	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10876	    "open of part %d type %d\n", part, otyp);
10877
10878	mutex_exit(SD_MUTEX(un));
10879	if (!nodelay) {
10880		sd_pm_exit(un);
10881	}
10882
10883	sema_v(&un->un_semoclose);
10884
10885	mutex_enter(&sd_detach_mutex);
10886	un->un_opens_in_progress--;
10887	mutex_exit(&sd_detach_mutex);
10888
10889	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: exit success\n");
10890	return (DDI_SUCCESS);
10891
10892excl_open_fail:
10893	SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: fail exclusive open\n");
10894	rval = EBUSY;
10895
10896open_fail:
10897	mutex_exit(SD_MUTEX(un));
10898
10899	/*
10900	 * On a failed open we must exit the pm management.
10901	 */
10902	if (!nodelay) {
10903		sd_pm_exit(un);
10904	}
10905open_failed_with_pm:
10906	sema_v(&un->un_semoclose);
10907
10908	mutex_enter(&sd_detach_mutex);
10909	un->un_opens_in_progress--;
10910	if (otyp == OTYP_LYR) {
10911		un->un_layer_count--;
10912	}
10913	mutex_exit(&sd_detach_mutex);
10914
10915	return (rval);
10916}
10917
10918
10919/*
10920 *    Function: sdclose
10921 *
10922 * Description: Driver's close(9e) entry point function.
10923 *
10924 *   Arguments: dev    - device number
10925 *		flag   - file status flag, informational only
10926 *		otyp   - close type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
10927 *		cred_p - user credential pointer
10928 *
10929 * Return Code: ENXIO
10930 *
10931 *     Context: Kernel thread context
10932 */
10933/* ARGSUSED */
10934static int
10935sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p)
10936{
10937	struct sd_lun	*un;
10938	uchar_t		*cp;
10939	int		part;
10940	int		nodelay;
10941	int		rval = 0;
10942
10943	/* Validate the open type */
10944	if (otyp >= OTYPCNT) {
10945		return (ENXIO);
10946	}
10947
10948	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10949		return (ENXIO);
10950	}
10951
10952	part = SDPART(dev);
10953	nodelay = flag & (FNDELAY | FNONBLOCK);
10954
10955	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
10956	    "sdclose: close of part %d type %d\n", part, otyp);
10957
10958	/*
10959	 * We use a semaphore here in order to serialize
10960	 * open and close requests on the device.
10961	 */
10962	sema_p(&un->un_semoclose);
10963
10964	mutex_enter(SD_MUTEX(un));
10965
10966	/* Don't proceed if power is being changed. */
10967	while (un->un_state == SD_STATE_PM_CHANGING) {
10968		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10969	}
10970
10971	if (un->un_exclopen & (1 << part)) {
10972		un->un_exclopen &= ~(1 << part);
10973	}
10974
10975	/* Update the open partition map */
10976	if (otyp == OTYP_LYR) {
10977		un->un_ocmap.lyropen[part] -= 1;
10978	} else {
10979		un->un_ocmap.regopen[otyp] &= ~(1 << part);
10980	}
10981
10982	cp = &un->un_ocmap.chkd[0];
10983	while (cp < &un->un_ocmap.chkd[OCSIZE]) {
10984		if (*cp != NULL) {
10985			break;
10986		}
10987		cp++;
10988	}
10989
10990	if (cp == &un->un_ocmap.chkd[OCSIZE]) {
10991		SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdclose: last close\n");
10992
10993		/*
10994		 * We avoid persistance upon the last close, and set
10995		 * the throttle back to the maximum.
10996		 */
10997		un->un_throttle = un->un_saved_throttle;
10998
10999		if (un->un_state == SD_STATE_OFFLINE) {
11000			if (un->un_f_is_fibre == FALSE) {
11001				scsi_log(SD_DEVINFO(un), sd_label,
11002					CE_WARN, "offline\n");
11003			}
11004			un->un_f_geometry_is_valid = FALSE;
11005
11006		} else {
11007			/*
11008			 * Flush any outstanding writes in NVRAM cache.
11009			 * Note: SYNCHRONIZE CACHE is an optional SCSI-2
11010			 * cmd, it may not work for non-Pluto devices.
11011			 * SYNCHRONIZE CACHE is not required for removables,
11012			 * except DVD-RAM drives.
11013			 *
11014			 * Also note: because SYNCHRONIZE CACHE is currently
11015			 * the only command issued here that requires the
11016			 * drive be powered up, only do the power up before
11017			 * sending the Sync Cache command. If additional
11018			 * commands are added which require a powered up
11019			 * drive, the following sequence may have to change.
11020			 *
11021			 * And finally, note that parallel SCSI on SPARC
11022			 * only issues a Sync Cache to DVD-RAM, a newly
11023			 * supported device.
11024			 */
11025#if defined(__i386) || defined(__amd64)
11026			if (un->un_f_sync_cache_supported ||
11027			    un->un_f_dvdram_writable_device == TRUE) {
11028#else
11029			if (un->un_f_dvdram_writable_device == TRUE) {
11030#endif
11031				mutex_exit(SD_MUTEX(un));
11032				if (sd_pm_entry(un) == DDI_SUCCESS) {
11033					rval =
11034					    sd_send_scsi_SYNCHRONIZE_CACHE(un,
11035					    NULL);
11036					/* ignore error if not supported */
11037					if (rval == ENOTSUP) {
11038						rval = 0;
11039					} else if (rval != 0) {
11040						rval = EIO;
11041					}
11042					sd_pm_exit(un);
11043				} else {
11044					rval = EIO;
11045				}
11046				mutex_enter(SD_MUTEX(un));
11047			}
11048
11049			/*
11050			 * For devices which supports DOOR_LOCK, send an ALLOW
11051			 * MEDIA REMOVAL command, but don't get upset if it
11052			 * fails. We need to raise the power of the drive before
11053			 * we can call sd_send_scsi_DOORLOCK()
11054			 */
11055			if (un->un_f_doorlock_supported) {
11056				mutex_exit(SD_MUTEX(un));
11057				if (sd_pm_entry(un) == DDI_SUCCESS) {
11058					rval = sd_send_scsi_DOORLOCK(un,
11059					    SD_REMOVAL_ALLOW, SD_PATH_DIRECT);
11060
11061					sd_pm_exit(un);
11062					if (ISCD(un) && (rval != 0) &&
11063					    (nodelay != 0)) {
11064						rval = ENXIO;
11065					}
11066				} else {
11067					rval = EIO;
11068				}
11069				mutex_enter(SD_MUTEX(un));
11070			}
11071
11072			/*
11073			 * If a device has removable media, invalidate all
11074			 * parameters related to media, such as geometry,
11075			 * blocksize, and blockcount.
11076			 */
11077			if (un->un_f_has_removable_media) {
11078				sr_ejected(un);
11079			}
11080
11081			/*
11082			 * Destroy the cache (if it exists) which was
11083			 * allocated for the write maps since this is
11084			 * the last close for this media.
11085			 */
11086			if (un->un_wm_cache) {
11087				/*
11088				 * Check if there are pending commands.
11089				 * and if there are give a warning and
11090				 * do not destroy the cache.
11091				 */
11092				if (un->un_ncmds_in_driver > 0) {
11093					scsi_log(SD_DEVINFO(un),
11094					    sd_label, CE_WARN,
11095					    "Unable to clean up memory "
11096					    "because of pending I/O\n");
11097				} else {
11098					kmem_cache_destroy(
11099					    un->un_wm_cache);
11100					un->un_wm_cache = NULL;
11101				}
11102			}
11103		}
11104	}
11105
11106	mutex_exit(SD_MUTEX(un));
11107	sema_v(&un->un_semoclose);
11108
11109	if (otyp == OTYP_LYR) {
11110		mutex_enter(&sd_detach_mutex);
11111		/*
11112		 * The detach routine may run when the layer count
11113		 * drops to zero.
11114		 */
11115		un->un_layer_count--;
11116		mutex_exit(&sd_detach_mutex);
11117	}
11118
11119	return (rval);
11120}
11121
11122
11123/*
11124 *    Function: sd_ready_and_valid
11125 *
11126 * Description: Test if device is ready and has a valid geometry.
11127 *
11128 *   Arguments: dev - device number
11129 *		un  - driver soft state (unit) structure
11130 *
11131 * Return Code: SD_READY_VALID		ready and valid label
11132 *		SD_READY_NOT_VALID	ready, geom ops never applicable
11133 *		SD_NOT_READY_VALID	not ready, no label
11134 *		SD_RESERVED_BY_OTHERS	reservation conflict
11135 *
11136 *     Context: Never called at interrupt context.
11137 */
11138
11139static int
11140sd_ready_and_valid(struct sd_lun *un)
11141{
11142	struct sd_errstats	*stp;
11143	uint64_t		capacity;
11144	uint_t			lbasize;
11145	int			rval = SD_READY_VALID;
11146	char			name_str[48];
11147
11148	ASSERT(un != NULL);
11149	ASSERT(!mutex_owned(SD_MUTEX(un)));
11150
11151	mutex_enter(SD_MUTEX(un));
11152	/*
11153	 * If a device has removable media, we must check if media is
11154	 * ready when checking if this device is ready and valid.
11155	 */
11156	if (un->un_f_has_removable_media) {
11157		mutex_exit(SD_MUTEX(un));
11158		if (sd_send_scsi_TEST_UNIT_READY(un, 0) != 0) {
11159			rval = SD_NOT_READY_VALID;
11160			mutex_enter(SD_MUTEX(un));
11161			goto done;
11162		}
11163
11164		mutex_enter(SD_MUTEX(un));
11165		if ((un->un_f_geometry_is_valid == FALSE) ||
11166		    (un->un_f_blockcount_is_valid == FALSE) ||
11167		    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
11168
11169			/* capacity has to be read every open. */
11170			mutex_exit(SD_MUTEX(un));
11171			if (sd_send_scsi_READ_CAPACITY(un, &capacity,
11172			    &lbasize, SD_PATH_DIRECT) != 0) {
11173				mutex_enter(SD_MUTEX(un));
11174				un->un_f_geometry_is_valid = FALSE;
11175				rval = SD_NOT_READY_VALID;
11176				goto done;
11177			} else {
11178				mutex_enter(SD_MUTEX(un));
11179				sd_update_block_info(un, lbasize, capacity);
11180			}
11181		}
11182
11183		/*
11184		 * Check if the media in the device is writable or not.
11185		 */
11186		if ((un->un_f_geometry_is_valid == FALSE) && ISCD(un)) {
11187			sd_check_for_writable_cd(un);
11188		}
11189
11190	} else {
11191		/*
11192		 * Do a test unit ready to clear any unit attention from non-cd
11193		 * devices.
11194		 */
11195		mutex_exit(SD_MUTEX(un));
11196		(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
11197		mutex_enter(SD_MUTEX(un));
11198	}
11199
11200
11201	/*
11202	 * If this is a non 512 block device, allocate space for
11203	 * the wmap cache. This is being done here since every time
11204	 * a media is changed this routine will be called and the
11205	 * block size is a function of media rather than device.
11206	 */
11207	if (un->un_f_non_devbsize_supported && NOT_DEVBSIZE(un)) {
11208		if (!(un->un_wm_cache)) {
11209			(void) snprintf(name_str, sizeof (name_str),
11210			    "%s%d_cache",
11211			    ddi_driver_name(SD_DEVINFO(un)),
11212			    ddi_get_instance(SD_DEVINFO(un)));
11213			un->un_wm_cache = kmem_cache_create(
11214			    name_str, sizeof (struct sd_w_map),
11215			    8, sd_wm_cache_constructor,
11216			    sd_wm_cache_destructor, NULL,
11217			    (void *)un, NULL, 0);
11218			if (!(un->un_wm_cache)) {
11219					rval = ENOMEM;
11220					goto done;
11221			}
11222		}
11223	}
11224
11225	if (un->un_state == SD_STATE_NORMAL) {
11226		/*
11227		 * If the target is not yet ready here (defined by a TUR
11228		 * failure), invalidate the geometry and print an 'offline'
11229		 * message. This is a legacy message, as the state of the
11230		 * target is not actually changed to SD_STATE_OFFLINE.
11231		 *
11232		 * If the TUR fails for EACCES (Reservation Conflict),
11233		 * SD_RESERVED_BY_OTHERS will be returned to indicate
11234		 * reservation conflict. If the TUR fails for other
11235		 * reasons, SD_NOT_READY_VALID will be returned.
11236		 */
11237		int err;
11238
11239		mutex_exit(SD_MUTEX(un));
11240		err = sd_send_scsi_TEST_UNIT_READY(un, 0);
11241		mutex_enter(SD_MUTEX(un));
11242
11243		if (err != 0) {
11244			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
11245			    "offline or reservation conflict\n");
11246			un->un_f_geometry_is_valid = FALSE;
11247			if (err == EACCES) {
11248				rval = SD_RESERVED_BY_OTHERS;
11249			} else {
11250				rval = SD_NOT_READY_VALID;
11251			}
11252			goto done;
11253		}
11254	}
11255
11256	if (un->un_f_format_in_progress == FALSE) {
11257		/*
11258		 * Note: sd_validate_geometry may return TRUE, but that does
11259		 * not necessarily mean un_f_geometry_is_valid == TRUE!
11260		 */
11261		rval = sd_validate_geometry(un, SD_PATH_DIRECT);
11262		if (rval == ENOTSUP) {
11263			if (un->un_f_geometry_is_valid == TRUE)
11264				rval = 0;
11265			else {
11266				rval = SD_READY_NOT_VALID;
11267				goto done;
11268			}
11269		}
11270		if (rval != 0) {
11271			/*
11272			 * We don't check the validity of geometry for
11273			 * CDROMs. Also we assume we have a good label
11274			 * even if sd_validate_geometry returned ENOMEM.
11275			 */
11276			if (!ISCD(un) && rval != ENOMEM) {
11277				rval = SD_NOT_READY_VALID;
11278				goto done;
11279			}
11280		}
11281	}
11282
11283	/*
11284	 * If this device supports DOOR_LOCK command, try and send
11285	 * this command to PREVENT MEDIA REMOVAL, but don't get upset
11286	 * if it fails. For a CD, however, it is an error
11287	 */
11288	if (un->un_f_doorlock_supported) {
11289		mutex_exit(SD_MUTEX(un));
11290		if ((sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
11291		    SD_PATH_DIRECT) != 0) && ISCD(un)) {
11292			rval = SD_NOT_READY_VALID;
11293			mutex_enter(SD_MUTEX(un));
11294			goto done;
11295		}
11296		mutex_enter(SD_MUTEX(un));
11297	}
11298
11299	/* The state has changed, inform the media watch routines */
11300	un->un_mediastate = DKIO_INSERTED;
11301	cv_broadcast(&un->un_state_cv);
11302	rval = SD_READY_VALID;
11303
11304done:
11305
11306	/*
11307	 * Initialize the capacity kstat value, if no media previously
11308	 * (capacity kstat is 0) and a media has been inserted
11309	 * (un_blockcount > 0).
11310	 */
11311	if (un->un_errstats != NULL) {
11312		stp = (struct sd_errstats *)un->un_errstats->ks_data;
11313		if ((stp->sd_capacity.value.ui64 == 0) &&
11314		    (un->un_f_blockcount_is_valid == TRUE)) {
11315			stp->sd_capacity.value.ui64 =
11316			    (uint64_t)((uint64_t)un->un_blockcount *
11317			    un->un_sys_blocksize);
11318		}
11319	}
11320
11321	mutex_exit(SD_MUTEX(un));
11322	return (rval);
11323}
11324
11325
11326/*
11327 *    Function: sdmin
11328 *
11329 * Description: Routine to limit the size of a data transfer. Used in
11330 *		conjunction with physio(9F).
11331 *
11332 *   Arguments: bp - pointer to the indicated buf(9S) struct.
11333 *
11334 *     Context: Kernel thread context.
11335 */
11336
11337static void
11338sdmin(struct buf *bp)
11339{
11340	struct sd_lun	*un;
11341	int		instance;
11342
11343	instance = SDUNIT(bp->b_edev);
11344
11345	un = ddi_get_soft_state(sd_state, instance);
11346	ASSERT(un != NULL);
11347
11348	if (bp->b_bcount > un->un_max_xfer_size) {
11349		bp->b_bcount = un->un_max_xfer_size;
11350	}
11351}
11352
11353
11354/*
11355 *    Function: sdread
11356 *
11357 * Description: Driver's read(9e) entry point function.
11358 *
11359 *   Arguments: dev   - device number
11360 *		uio   - structure pointer describing where data is to be stored
11361 *			in user's space
11362 *		cred_p  - user credential pointer
11363 *
11364 * Return Code: ENXIO
11365 *		EIO
11366 *		EINVAL
11367 *		value returned by physio
11368 *
11369 *     Context: Kernel thread context.
11370 */
11371/* ARGSUSED */
11372static int
11373sdread(dev_t dev, struct uio *uio, cred_t *cred_p)
11374{
11375	struct sd_lun	*un = NULL;
11376	int		secmask;
11377	int		err;
11378
11379	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
11380		return (ENXIO);
11381	}
11382
11383	ASSERT(!mutex_owned(SD_MUTEX(un)));
11384
11385	if ((un->un_f_geometry_is_valid == FALSE) && !ISCD(un)) {
11386		mutex_enter(SD_MUTEX(un));
11387		/*
11388		 * Because the call to sd_ready_and_valid will issue I/O we
11389		 * must wait here if either the device is suspended or
11390		 * if it's power level is changing.
11391		 */
11392		while ((un->un_state == SD_STATE_SUSPENDED) ||
11393		    (un->un_state == SD_STATE_PM_CHANGING)) {
11394			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11395		}
11396		un->un_ncmds_in_driver++;
11397		mutex_exit(SD_MUTEX(un));
11398		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
11399			mutex_enter(SD_MUTEX(un));
11400			un->un_ncmds_in_driver--;
11401			ASSERT(un->un_ncmds_in_driver >= 0);
11402			mutex_exit(SD_MUTEX(un));
11403			return (EIO);
11404		}
11405		mutex_enter(SD_MUTEX(un));
11406		un->un_ncmds_in_driver--;
11407		ASSERT(un->un_ncmds_in_driver >= 0);
11408		mutex_exit(SD_MUTEX(un));
11409	}
11410
11411	/*
11412	 * Read requests are restricted to multiples of the system block size.
11413	 */
11414	secmask = un->un_sys_blocksize - 1;
11415
11416	if (uio->uio_loffset & ((offset_t)(secmask))) {
11417		SD_ERROR(SD_LOG_READ_WRITE, un,
11418		    "sdread: file offset not modulo %d\n",
11419		    un->un_sys_blocksize);
11420		err = EINVAL;
11421	} else if (uio->uio_iov->iov_len & (secmask)) {
11422		SD_ERROR(SD_LOG_READ_WRITE, un,
11423		    "sdread: transfer length not modulo %d\n",
11424		    un->un_sys_blocksize);
11425		err = EINVAL;
11426	} else {
11427		err = physio(sdstrategy, NULL, dev, B_READ, sdmin, uio);
11428	}
11429	return (err);
11430}
11431
11432
11433/*
11434 *    Function: sdwrite
11435 *
11436 * Description: Driver's write(9e) entry point function.
11437 *
11438 *   Arguments: dev   - device number
11439 *		uio   - structure pointer describing where data is stored in
11440 *			user's space
11441 *		cred_p  - user credential pointer
11442 *
11443 * Return Code: ENXIO
11444 *		EIO
11445 *		EINVAL
11446 *		value returned by physio
11447 *
11448 *     Context: Kernel thread context.
11449 */
11450/* ARGSUSED */
11451static int
11452sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p)
11453{
11454	struct sd_lun	*un = NULL;
11455	int		secmask;
11456	int		err;
11457
11458	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
11459		return (ENXIO);
11460	}
11461
11462	ASSERT(!mutex_owned(SD_MUTEX(un)));
11463
11464	if ((un->un_f_geometry_is_valid == FALSE) && !ISCD(un)) {
11465		mutex_enter(SD_MUTEX(un));
11466		/*
11467		 * Because the call to sd_ready_and_valid will issue I/O we
11468		 * must wait here if either the device is suspended or
11469		 * if it's power level is changing.
11470		 */
11471		while ((un->un_state == SD_STATE_SUSPENDED) ||
11472		    (un->un_state == SD_STATE_PM_CHANGING)) {
11473			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11474		}
11475		un->un_ncmds_in_driver++;
11476		mutex_exit(SD_MUTEX(un));
11477		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
11478			mutex_enter(SD_MUTEX(un));
11479			un->un_ncmds_in_driver--;
11480			ASSERT(un->un_ncmds_in_driver >= 0);
11481			mutex_exit(SD_MUTEX(un));
11482			return (EIO);
11483		}
11484		mutex_enter(SD_MUTEX(un));
11485		un->un_ncmds_in_driver--;
11486		ASSERT(un->un_ncmds_in_driver >= 0);
11487		mutex_exit(SD_MUTEX(un));
11488	}
11489
11490	/*
11491	 * Write requests are restricted to multiples of the system block size.
11492	 */
11493	secmask = un->un_sys_blocksize - 1;
11494
11495	if (uio->uio_loffset & ((offset_t)(secmask))) {
11496		SD_ERROR(SD_LOG_READ_WRITE, un,
11497		    "sdwrite: file offset not modulo %d\n",
11498		    un->un_sys_blocksize);
11499		err = EINVAL;
11500	} else if (uio->uio_iov->iov_len & (secmask)) {
11501		SD_ERROR(SD_LOG_READ_WRITE, un,
11502		    "sdwrite: transfer length not modulo %d\n",
11503		    un->un_sys_blocksize);
11504		err = EINVAL;
11505	} else {
11506		err = physio(sdstrategy, NULL, dev, B_WRITE, sdmin, uio);
11507	}
11508	return (err);
11509}
11510
11511
11512/*
11513 *    Function: sdaread
11514 *
11515 * Description: Driver's aread(9e) entry point function.
11516 *
11517 *   Arguments: dev   - device number
11518 *		aio   - structure pointer describing where data is to be stored
11519 *		cred_p  - user credential pointer
11520 *
11521 * Return Code: ENXIO
11522 *		EIO
11523 *		EINVAL
11524 *		value returned by aphysio
11525 *
11526 *     Context: Kernel thread context.
11527 */
11528/* ARGSUSED */
11529static int
11530sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p)
11531{
11532	struct sd_lun	*un = NULL;
11533	struct uio	*uio = aio->aio_uio;
11534	int		secmask;
11535	int		err;
11536
11537	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
11538		return (ENXIO);
11539	}
11540
11541	ASSERT(!mutex_owned(SD_MUTEX(un)));
11542
11543	if ((un->un_f_geometry_is_valid == FALSE) && !ISCD(un)) {
11544		mutex_enter(SD_MUTEX(un));
11545		/*
11546		 * Because the call to sd_ready_and_valid will issue I/O we
11547		 * must wait here if either the device is suspended or
11548		 * if it's power level is changing.
11549		 */
11550		while ((un->un_state == SD_STATE_SUSPENDED) ||
11551		    (un->un_state == SD_STATE_PM_CHANGING)) {
11552			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11553		}
11554		un->un_ncmds_in_driver++;
11555		mutex_exit(SD_MUTEX(un));
11556		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
11557			mutex_enter(SD_MUTEX(un));
11558			un->un_ncmds_in_driver--;
11559			ASSERT(un->un_ncmds_in_driver >= 0);
11560			mutex_exit(SD_MUTEX(un));
11561			return (EIO);
11562		}
11563		mutex_enter(SD_MUTEX(un));
11564		un->un_ncmds_in_driver--;
11565		ASSERT(un->un_ncmds_in_driver >= 0);
11566		mutex_exit(SD_MUTEX(un));
11567	}
11568
11569	/*
11570	 * Read requests are restricted to multiples of the system block size.
11571	 */
11572	secmask = un->un_sys_blocksize - 1;
11573
11574	if (uio->uio_loffset & ((offset_t)(secmask))) {
11575		SD_ERROR(SD_LOG_READ_WRITE, un,
11576		    "sdaread: file offset not modulo %d\n",
11577		    un->un_sys_blocksize);
11578		err = EINVAL;
11579	} else if (uio->uio_iov->iov_len & (secmask)) {
11580		SD_ERROR(SD_LOG_READ_WRITE, un,
11581		    "sdaread: transfer length not modulo %d\n",
11582		    un->un_sys_blocksize);
11583		err = EINVAL;
11584	} else {
11585		err = aphysio(sdstrategy, anocancel, dev, B_READ, sdmin, aio);
11586	}
11587	return (err);
11588}
11589
11590
11591/*
11592 *    Function: sdawrite
11593 *
11594 * Description: Driver's awrite(9e) entry point function.
11595 *
11596 *   Arguments: dev   - device number
11597 *		aio   - structure pointer describing where data is stored
11598 *		cred_p  - user credential pointer
11599 *
11600 * Return Code: ENXIO
11601 *		EIO
11602 *		EINVAL
11603 *		value returned by aphysio
11604 *
11605 *     Context: Kernel thread context.
11606 */
11607/* ARGSUSED */
11608static int
11609sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p)
11610{
11611	struct sd_lun	*un = NULL;
11612	struct uio	*uio = aio->aio_uio;
11613	int		secmask;
11614	int		err;
11615
11616	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
11617		return (ENXIO);
11618	}
11619
11620	ASSERT(!mutex_owned(SD_MUTEX(un)));
11621
11622	if ((un->un_f_geometry_is_valid == FALSE) && !ISCD(un)) {
11623		mutex_enter(SD_MUTEX(un));
11624		/*
11625		 * Because the call to sd_ready_and_valid will issue I/O we
11626		 * must wait here if either the device is suspended or
11627		 * if it's power level is changing.
11628		 */
11629		while ((un->un_state == SD_STATE_SUSPENDED) ||
11630		    (un->un_state == SD_STATE_PM_CHANGING)) {
11631			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11632		}
11633		un->un_ncmds_in_driver++;
11634		mutex_exit(SD_MUTEX(un));
11635		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
11636			mutex_enter(SD_MUTEX(un));
11637			un->un_ncmds_in_driver--;
11638			ASSERT(un->un_ncmds_in_driver >= 0);
11639			mutex_exit(SD_MUTEX(un));
11640			return (EIO);
11641		}
11642		mutex_enter(SD_MUTEX(un));
11643		un->un_ncmds_in_driver--;
11644		ASSERT(un->un_ncmds_in_driver >= 0);
11645		mutex_exit(SD_MUTEX(un));
11646	}
11647
11648	/*
11649	 * Write requests are restricted to multiples of the system block size.
11650	 */
11651	secmask = un->un_sys_blocksize - 1;
11652
11653	if (uio->uio_loffset & ((offset_t)(secmask))) {
11654		SD_ERROR(SD_LOG_READ_WRITE, un,
11655		    "sdawrite: file offset not modulo %d\n",
11656		    un->un_sys_blocksize);
11657		err = EINVAL;
11658	} else if (uio->uio_iov->iov_len & (secmask)) {
11659		SD_ERROR(SD_LOG_READ_WRITE, un,
11660		    "sdawrite: transfer length not modulo %d\n",
11661		    un->un_sys_blocksize);
11662		err = EINVAL;
11663	} else {
11664		err = aphysio(sdstrategy, anocancel, dev, B_WRITE, sdmin, aio);
11665	}
11666	return (err);
11667}
11668
11669
11670
11671
11672
11673/*
11674 * Driver IO processing follows the following sequence:
11675 *
11676 *     sdioctl(9E)     sdstrategy(9E)         biodone(9F)
11677 *         |                |                     ^
11678 *         v                v                     |
11679 * sd_send_scsi_cmd()  ddi_xbuf_qstrategy()       +-------------------+
11680 *         |                |                     |                   |
11681 *         v                |                     |                   |
11682 * sd_uscsi_strategy() sd_xbuf_strategy()   sd_buf_iodone()   sd_uscsi_iodone()
11683 *         |                |                     ^                   ^
11684 *         v                v                     |                   |
11685 * SD_BEGIN_IOSTART()  SD_BEGIN_IOSTART()         |                   |
11686 *         |                |                     |                   |
11687 *     +---+                |                     +------------+      +-------+
11688 *     |                    |                                  |              |
11689 *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
11690 *     |                    v                                  |              |
11691 *     |         sd_mapblockaddr_iostart()           sd_mapblockaddr_iodone() |
11692 *     |                    |                                  ^              |
11693 *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
11694 *     |                    v                                  |              |
11695 *     |         sd_mapblocksize_iostart()           sd_mapblocksize_iodone() |
11696 *     |                    |                                  ^              |
11697 *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
11698 *     |                    v                                  |              |
11699 *     |           sd_checksum_iostart()               sd_checksum_iodone()   |
11700 *     |                    |                                  ^              |
11701 *     +-> SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()+------------->+
11702 *     |                    v                                  |              |
11703 *     |              sd_pm_iostart()                     sd_pm_iodone()      |
11704 *     |                    |                                  ^              |
11705 *     |                    |                                  |              |
11706 *     +-> SD_NEXT_IOSTART()|               SD_BEGIN_IODONE()--+--------------+
11707 *                          |                           ^
11708 *                          v                           |
11709 *                   sd_core_iostart()                  |
11710 *                          |                           |
11711 *                          |                           +------>(*destroypkt)()
11712 *                          +-> sd_start_cmds() <-+     |           |
11713 *                          |                     |     |           v
11714 *                          |                     |     |  scsi_destroy_pkt(9F)
11715 *                          |                     |     |
11716 *                          +->(*initpkt)()       +- sdintr()
11717 *                          |  |                        |  |
11718 *                          |  +-> scsi_init_pkt(9F)    |  +-> sd_handle_xxx()
11719 *                          |  +-> scsi_setup_cdb(9F)   |
11720 *                          |                           |
11721 *                          +--> scsi_transport(9F)     |
11722 *                                     |                |
11723 *                                     +----> SCSA ---->+
11724 *
11725 *
11726 * This code is based upon the following presumtions:
11727 *
11728 *   - iostart and iodone functions operate on buf(9S) structures. These
11729 *     functions perform the necessary operations on the buf(9S) and pass
11730 *     them along to the next function in the chain by using the macros
11731 *     SD_NEXT_IOSTART() (for iostart side functions) and SD_NEXT_IODONE()
11732 *     (for iodone side functions).
11733 *
11734 *   - The iostart side functions may sleep. The iodone side functions
11735 *     are called under interrupt context and may NOT sleep. Therefore
11736 *     iodone side functions also may not call iostart side functions.
11737 *     (NOTE: iostart side functions should NOT sleep for memory, as
11738 *     this could result in deadlock.)
11739 *
11740 *   - An iostart side function may call its corresponding iodone side
11741 *     function directly (if necessary).
11742 *
11743 *   - In the event of an error, an iostart side function can return a buf(9S)
11744 *     to its caller by calling SD_BEGIN_IODONE() (after setting B_ERROR and
11745 *     b_error in the usual way of course).
11746 *
11747 *   - The taskq mechanism may be used by the iodone side functions to dispatch
11748 *     requests to the iostart side functions.  The iostart side functions in
11749 *     this case would be called under the context of a taskq thread, so it's
11750 *     OK for them to block/sleep/spin in this case.
11751 *
11752 *   - iostart side functions may allocate "shadow" buf(9S) structs and
11753 *     pass them along to the next function in the chain.  The corresponding
11754 *     iodone side functions must coalesce the "shadow" bufs and return
11755 *     the "original" buf to the next higher layer.
11756 *
11757 *   - The b_private field of the buf(9S) struct holds a pointer to
11758 *     an sd_xbuf struct, which contains information needed to
11759 *     construct the scsi_pkt for the command.
11760 *
11761 *   - The SD_MUTEX(un) is NOT held across calls to the next layer. Each
11762 *     layer must acquire & release the SD_MUTEX(un) as needed.
11763 */
11764
11765
11766/*
11767 * Create taskq for all targets in the system. This is created at
11768 * _init(9E) and destroyed at _fini(9E).
11769 *
11770 * Note: here we set the minalloc to a reasonably high number to ensure that
11771 * we will have an adequate supply of task entries available at interrupt time.
11772 * This is used in conjunction with the TASKQ_PREPOPULATE flag in
11773 * sd_create_taskq().  Since we do not want to sleep for allocations at
11774 * interrupt time, set maxalloc equal to minalloc. That way we will just fail
11775 * the command if we ever try to dispatch more than SD_TASKQ_MAXALLOC taskq
11776 * requests any one instant in time.
11777 */
11778#define	SD_TASKQ_NUMTHREADS	8
11779#define	SD_TASKQ_MINALLOC	256
11780#define	SD_TASKQ_MAXALLOC	256
11781
11782static taskq_t	*sd_tq = NULL;
11783_NOTE(SCHEME_PROTECTS_DATA("stable data", sd_tq))
11784
11785static int	sd_taskq_minalloc = SD_TASKQ_MINALLOC;
11786static int	sd_taskq_maxalloc = SD_TASKQ_MAXALLOC;
11787
11788/*
11789 * The following task queue is being created for the write part of
11790 * read-modify-write of non-512 block size devices.
11791 * Limit the number of threads to 1 for now. This number has been choosen
11792 * considering the fact that it applies only to dvd ram drives/MO drives
11793 * currently. Performance for which is not main criteria at this stage.
11794 * Note: It needs to be explored if we can use a single taskq in future
11795 */
11796#define	SD_WMR_TASKQ_NUMTHREADS	1
11797static taskq_t	*sd_wmr_tq = NULL;
11798_NOTE(SCHEME_PROTECTS_DATA("stable data", sd_wmr_tq))
11799
11800/*
11801 *    Function: sd_taskq_create
11802 *
11803 * Description: Create taskq thread(s) and preallocate task entries
11804 *
11805 * Return Code: Returns a pointer to the allocated taskq_t.
11806 *
11807 *     Context: Can sleep. Requires blockable context.
11808 *
11809 *       Notes: - The taskq() facility currently is NOT part of the DDI.
11810 *		  (definitely NOT recommeded for 3rd-party drivers!) :-)
11811 *		- taskq_create() will block for memory, also it will panic
11812 *		  if it cannot create the requested number of threads.
11813 *		- Currently taskq_create() creates threads that cannot be
11814 *		  swapped.
11815 *		- We use TASKQ_PREPOPULATE to ensure we have an adequate
11816 *		  supply of taskq entries at interrupt time (ie, so that we
11817 *		  do not have to sleep for memory)
11818 */
11819
11820static void
11821sd_taskq_create(void)
11822{
11823	char	taskq_name[TASKQ_NAMELEN];
11824
11825	ASSERT(sd_tq == NULL);
11826	ASSERT(sd_wmr_tq == NULL);
11827
11828	(void) snprintf(taskq_name, sizeof (taskq_name),
11829	    "%s_drv_taskq", sd_label);
11830	sd_tq = (taskq_create(taskq_name, SD_TASKQ_NUMTHREADS,
11831	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
11832	    TASKQ_PREPOPULATE));
11833
11834	(void) snprintf(taskq_name, sizeof (taskq_name),
11835	    "%s_rmw_taskq", sd_label);
11836	sd_wmr_tq = (taskq_create(taskq_name, SD_WMR_TASKQ_NUMTHREADS,
11837	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
11838	    TASKQ_PREPOPULATE));
11839}
11840
11841
11842/*
11843 *    Function: sd_taskq_delete
11844 *
11845 * Description: Complementary cleanup routine for sd_taskq_create().
11846 *
11847 *     Context: Kernel thread context.
11848 */
11849
11850static void
11851sd_taskq_delete(void)
11852{
11853	ASSERT(sd_tq != NULL);
11854	ASSERT(sd_wmr_tq != NULL);
11855	taskq_destroy(sd_tq);
11856	taskq_destroy(sd_wmr_tq);
11857	sd_tq = NULL;
11858	sd_wmr_tq = NULL;
11859}
11860
11861
11862/*
11863 *    Function: sdstrategy
11864 *
11865 * Description: Driver's strategy (9E) entry point function.
11866 *
11867 *   Arguments: bp - pointer to buf(9S)
11868 *
11869 * Return Code: Always returns zero
11870 *
11871 *     Context: Kernel thread context.
11872 */
11873
11874static int
11875sdstrategy(struct buf *bp)
11876{
11877	struct sd_lun *un;
11878
11879	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
11880	if (un == NULL) {
11881		bioerror(bp, EIO);
11882		bp->b_resid = bp->b_bcount;
11883		biodone(bp);
11884		return (0);
11885	}
11886	/* As was done in the past, fail new cmds. if state is dumping. */
11887	if (un->un_state == SD_STATE_DUMPING) {
11888		bioerror(bp, ENXIO);
11889		bp->b_resid = bp->b_bcount;
11890		biodone(bp);
11891		return (0);
11892	}
11893
11894	ASSERT(!mutex_owned(SD_MUTEX(un)));
11895
11896	/*
11897	 * Commands may sneak in while we released the mutex in
11898	 * DDI_SUSPEND, we should block new commands. However, old
11899	 * commands that are still in the driver at this point should
11900	 * still be allowed to drain.
11901	 */
11902	mutex_enter(SD_MUTEX(un));
11903	/*
11904	 * Must wait here if either the device is suspended or
11905	 * if it's power level is changing.
11906	 */
11907	while ((un->un_state == SD_STATE_SUSPENDED) ||
11908	    (un->un_state == SD_STATE_PM_CHANGING)) {
11909		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11910	}
11911
11912	un->un_ncmds_in_driver++;
11913
11914	/*
11915	 * atapi: Since we are running the CD for now in PIO mode we need to
11916	 * call bp_mapin here to avoid bp_mapin called interrupt context under
11917	 * the HBA's init_pkt routine.
11918	 */
11919	if (un->un_f_cfg_is_atapi == TRUE) {
11920		mutex_exit(SD_MUTEX(un));
11921		bp_mapin(bp);
11922		mutex_enter(SD_MUTEX(un));
11923	}
11924	SD_INFO(SD_LOG_IO, un, "sdstrategy: un_ncmds_in_driver = %ld\n",
11925	    un->un_ncmds_in_driver);
11926
11927	mutex_exit(SD_MUTEX(un));
11928
11929	/*
11930	 * This will (eventually) allocate the sd_xbuf area and
11931	 * call sd_xbuf_strategy().  We just want to return the
11932	 * result of ddi_xbuf_qstrategy so that we have an opt-
11933	 * imized tail call which saves us a stack frame.
11934	 */
11935	return (ddi_xbuf_qstrategy(bp, un->un_xbuf_attr));
11936}
11937
11938
11939/*
11940 *    Function: sd_xbuf_strategy
11941 *
11942 * Description: Function for initiating IO operations via the
11943 *		ddi_xbuf_qstrategy() mechanism.
11944 *
11945 *     Context: Kernel thread context.
11946 */
11947
11948static void
11949sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg)
11950{
11951	struct sd_lun *un = arg;
11952
11953	ASSERT(bp != NULL);
11954	ASSERT(xp != NULL);
11955	ASSERT(un != NULL);
11956	ASSERT(!mutex_owned(SD_MUTEX(un)));
11957
11958	/*
11959	 * Initialize the fields in the xbuf and save a pointer to the
11960	 * xbuf in bp->b_private.
11961	 */
11962	sd_xbuf_init(un, bp, xp, SD_CHAIN_BUFIO, NULL);
11963
11964	/* Send the buf down the iostart chain */
11965	SD_BEGIN_IOSTART(((struct sd_xbuf *)xp)->xb_chain_iostart, un, bp);
11966}
11967
11968
11969/*
11970 *    Function: sd_xbuf_init
11971 *
11972 * Description: Prepare the given sd_xbuf struct for use.
11973 *
11974 *   Arguments: un - ptr to softstate
11975 *		bp - ptr to associated buf(9S)
11976 *		xp - ptr to associated sd_xbuf
11977 *		chain_type - IO chain type to use:
11978 *			SD_CHAIN_NULL
11979 *			SD_CHAIN_BUFIO
11980 *			SD_CHAIN_USCSI
11981 *			SD_CHAIN_DIRECT
11982 *			SD_CHAIN_DIRECT_PRIORITY
11983 *		pktinfop - ptr to private data struct for scsi_pkt(9S)
11984 *			initialization; may be NULL if none.
11985 *
11986 *     Context: Kernel thread context
11987 */
11988
11989static void
11990sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
11991	uchar_t chain_type, void *pktinfop)
11992{
11993	int index;
11994
11995	ASSERT(un != NULL);
11996	ASSERT(bp != NULL);
11997	ASSERT(xp != NULL);
11998
11999	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: buf:0x%p chain type:0x%x\n",
12000	    bp, chain_type);
12001
12002	xp->xb_un	= un;
12003	xp->xb_pktp	= NULL;
12004	xp->xb_pktinfo	= pktinfop;
12005	xp->xb_private	= bp->b_private;
12006	xp->xb_blkno	= (daddr_t)bp->b_blkno;
12007
12008	/*
12009	 * Set up the iostart and iodone chain indexes in the xbuf, based
12010	 * upon the specified chain type to use.
12011	 */
12012	switch (chain_type) {
12013	case SD_CHAIN_NULL:
12014		/*
12015		 * Fall thru to just use the values for the buf type, even
12016		 * tho for the NULL chain these values will never be used.
12017		 */
12018		/* FALLTHRU */
12019	case SD_CHAIN_BUFIO:
12020		index = un->un_buf_chain_type;
12021		break;
12022	case SD_CHAIN_USCSI:
12023		index = un->un_uscsi_chain_type;
12024		break;
12025	case SD_CHAIN_DIRECT:
12026		index = un->un_direct_chain_type;
12027		break;
12028	case SD_CHAIN_DIRECT_PRIORITY:
12029		index = un->un_priority_chain_type;
12030		break;
12031	default:
12032		/* We're really broken if we ever get here... */
12033		panic("sd_xbuf_init: illegal chain type!");
12034		/*NOTREACHED*/
12035	}
12036
12037	xp->xb_chain_iostart = sd_chain_index_map[index].sci_iostart_index;
12038	xp->xb_chain_iodone = sd_chain_index_map[index].sci_iodone_index;
12039
12040	/*
12041	 * It might be a bit easier to simply bzero the entire xbuf above,
12042	 * but it turns out that since we init a fair number of members anyway,
12043	 * we save a fair number cycles by doing explicit assignment of zero.
12044	 */
12045	xp->xb_pkt_flags	= 0;
12046	xp->xb_dma_resid	= 0;
12047	xp->xb_retry_count	= 0;
12048	xp->xb_victim_retry_count = 0;
12049	xp->xb_ua_retry_count	= 0;
12050	xp->xb_sense_bp		= NULL;
12051	xp->xb_sense_status	= 0;
12052	xp->xb_sense_state	= 0;
12053	xp->xb_sense_resid	= 0;
12054
12055	bp->b_private	= xp;
12056	bp->b_flags	&= ~(B_DONE | B_ERROR);
12057	bp->b_resid	= 0;
12058	bp->av_forw	= NULL;
12059	bp->av_back	= NULL;
12060	bioerror(bp, 0);
12061
12062	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: done.\n");
12063}
12064
12065
12066/*
12067 *    Function: sd_uscsi_strategy
12068 *
12069 * Description: Wrapper for calling into the USCSI chain via physio(9F)
12070 *
12071 *   Arguments: bp - buf struct ptr
12072 *
12073 * Return Code: Always returns 0
12074 *
12075 *     Context: Kernel thread context
12076 */
12077
12078static int
12079sd_uscsi_strategy(struct buf *bp)
12080{
12081	struct sd_lun		*un;
12082	struct sd_uscsi_info	*uip;
12083	struct sd_xbuf		*xp;
12084	uchar_t			chain_type;
12085
12086	ASSERT(bp != NULL);
12087
12088	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
12089	if (un == NULL) {
12090		bioerror(bp, EIO);
12091		bp->b_resid = bp->b_bcount;
12092		biodone(bp);
12093		return (0);
12094	}
12095
12096	ASSERT(!mutex_owned(SD_MUTEX(un)));
12097
12098	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: entry: buf:0x%p\n", bp);
12099
12100	mutex_enter(SD_MUTEX(un));
12101	/*
12102	 * atapi: Since we are running the CD for now in PIO mode we need to
12103	 * call bp_mapin here to avoid bp_mapin called interrupt context under
12104	 * the HBA's init_pkt routine.
12105	 */
12106	if (un->un_f_cfg_is_atapi == TRUE) {
12107		mutex_exit(SD_MUTEX(un));
12108		bp_mapin(bp);
12109		mutex_enter(SD_MUTEX(un));
12110	}
12111	un->un_ncmds_in_driver++;
12112	SD_INFO(SD_LOG_IO, un, "sd_uscsi_strategy: un_ncmds_in_driver = %ld\n",
12113	    un->un_ncmds_in_driver);
12114	mutex_exit(SD_MUTEX(un));
12115
12116	/*
12117	 * A pointer to a struct sd_uscsi_info is expected in bp->b_private
12118	 */
12119	ASSERT(bp->b_private != NULL);
12120	uip = (struct sd_uscsi_info *)bp->b_private;
12121
12122	switch (uip->ui_flags) {
12123	case SD_PATH_DIRECT:
12124		chain_type = SD_CHAIN_DIRECT;
12125		break;
12126	case SD_PATH_DIRECT_PRIORITY:
12127		chain_type = SD_CHAIN_DIRECT_PRIORITY;
12128		break;
12129	default:
12130		chain_type = SD_CHAIN_USCSI;
12131		break;
12132	}
12133
12134	xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
12135	sd_xbuf_init(un, bp, xp, chain_type, uip->ui_cmdp);
12136
12137	/* Use the index obtained within xbuf_init */
12138	SD_BEGIN_IOSTART(xp->xb_chain_iostart, un, bp);
12139
12140	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: exit: buf:0x%p\n", bp);
12141
12142	return (0);
12143}
12144
12145
12146/*
12147 * These routines perform raw i/o operations.
12148 */
12149/*ARGSUSED*/
12150static void
12151sduscsimin(struct buf *bp)
12152{
12153	/*
12154	 * do not break up because the CDB count would then
12155	 * be incorrect and data underruns would result (incomplete
12156	 * read/writes which would be retried and then failed, see
12157	 * sdintr().
12158	 */
12159}
12160
12161
12162
12163/*
12164 *    Function: sd_send_scsi_cmd
12165 *
12166 * Description: Runs a USCSI command for user (when called thru sdioctl),
12167 *		or for the driver
12168 *
12169 *   Arguments: dev - the dev_t for the device
12170 *		incmd - ptr to a valid uscsi_cmd struct
12171 *		cdbspace - UIO_USERSPACE or UIO_SYSSPACE
12172 *		dataspace - UIO_USERSPACE or UIO_SYSSPACE
12173 *		rqbufspace - UIO_USERSPACE or UIO_SYSSPACE
12174 *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
12175 *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
12176 *			to use the USCSI "direct" chain and bypass the normal
12177 *			command waitq.
12178 *
12179 * Return Code: 0 -  successful completion of the given command
12180 *		EIO - scsi_reset() failed, or see biowait()/physio() codes.
12181 *		ENXIO  - soft state not found for specified dev
12182 *		EINVAL
12183 *		EFAULT - copyin/copyout error
12184 *		return code of biowait(9F) or physio(9F):
12185 *			EIO - IO error, caller may check incmd->uscsi_status
12186 *			ENXIO
12187 *			EACCES - reservation conflict
12188 *
12189 *     Context: Waits for command to complete. Can sleep.
12190 */
12191
12192static int
12193sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd,
12194	enum uio_seg cdbspace, enum uio_seg dataspace, enum uio_seg rqbufspace,
12195	int path_flag)
12196{
12197	struct sd_uscsi_info	*uip;
12198	struct uscsi_cmd	*uscmd;
12199	struct sd_lun	*un;
12200	struct buf	*bp;
12201	int	rval;
12202	int	flags;
12203
12204	un = ddi_get_soft_state(sd_state, SDUNIT(dev));
12205	if (un == NULL) {
12206		return (ENXIO);
12207	}
12208
12209	ASSERT(!mutex_owned(SD_MUTEX(un)));
12210
12211#ifdef SDDEBUG
12212	switch (dataspace) {
12213	case UIO_USERSPACE:
12214		SD_TRACE(SD_LOG_IO, un,
12215		    "sd_send_scsi_cmd: entry: un:0x%p UIO_USERSPACE\n", un);
12216		break;
12217	case UIO_SYSSPACE:
12218		SD_TRACE(SD_LOG_IO, un,
12219		    "sd_send_scsi_cmd: entry: un:0x%p UIO_SYSSPACE\n", un);
12220		break;
12221	default:
12222		SD_TRACE(SD_LOG_IO, un,
12223		    "sd_send_scsi_cmd: entry: un:0x%p UNEXPECTED SPACE\n", un);
12224		break;
12225	}
12226#endif
12227
12228	/*
12229	 * Perform resets directly; no need to generate a command to do it.
12230	 */
12231	if (incmd->uscsi_flags & (USCSI_RESET | USCSI_RESET_ALL)) {
12232		flags = ((incmd->uscsi_flags & USCSI_RESET_ALL) != 0) ?
12233		    RESET_ALL : RESET_TARGET;
12234		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: Issuing reset\n");
12235		if (scsi_reset(SD_ADDRESS(un), flags) == 0) {
12236			/* Reset attempt was unsuccessful */
12237			SD_TRACE(SD_LOG_IO, un,
12238			    "sd_send_scsi_cmd: reset: failure\n");
12239			return (EIO);
12240		}
12241		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: reset: success\n");
12242		return (0);
12243	}
12244
12245	/* Perfunctory sanity check... */
12246	if (incmd->uscsi_cdblen <= 0) {
12247		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
12248		    "invalid uscsi_cdblen, returning EINVAL\n");
12249		return (EINVAL);
12250	} else if (incmd->uscsi_cdblen > un->un_max_hba_cdb) {
12251		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
12252		    "unsupported uscsi_cdblen, returning EINVAL\n");
12253		return (EINVAL);
12254	}
12255
12256	/*
12257	 * In order to not worry about where the uscsi structure came from
12258	 * (or where the cdb it points to came from) we're going to make
12259	 * kmem_alloc'd copies of them here. This will also allow reference
12260	 * to the data they contain long after this process has gone to
12261	 * sleep and its kernel stack has been unmapped, etc.
12262	 *
12263	 * First get some memory for the uscsi_cmd struct and copy the
12264	 * contents of the given uscsi_cmd struct into it.
12265	 */
12266	uscmd = kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
12267	bcopy(incmd, uscmd, sizeof (struct uscsi_cmd));
12268
12269	SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_cmd: uscsi_cmd",
12270	    (uchar_t *)uscmd, sizeof (struct uscsi_cmd), SD_LOG_HEX);
12271
12272	/*
12273	 * Now get some space for the CDB, and copy the given CDB into
12274	 * it. Use ddi_copyin() in case the data is in user space.
12275	 */
12276	uscmd->uscsi_cdb = kmem_zalloc((size_t)incmd->uscsi_cdblen, KM_SLEEP);
12277	flags = (cdbspace == UIO_SYSSPACE) ? FKIOCTL : 0;
12278	if (ddi_copyin(incmd->uscsi_cdb, uscmd->uscsi_cdb,
12279	    (uint_t)incmd->uscsi_cdblen, flags) != 0) {
12280		kmem_free(uscmd->uscsi_cdb, (size_t)incmd->uscsi_cdblen);
12281		kmem_free(uscmd, sizeof (struct uscsi_cmd));
12282		return (EFAULT);
12283	}
12284
12285	SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_cmd: CDB",
12286	    (uchar_t *)uscmd->uscsi_cdb, incmd->uscsi_cdblen, SD_LOG_HEX);
12287
12288	bp = getrbuf(KM_SLEEP);
12289
12290	/*
12291	 * Allocate an sd_uscsi_info struct and fill it with the info
12292	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
12293	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
12294	 * since we allocate the buf here in this function, we do not
12295	 * need to preserve the prior contents of b_private.
12296	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
12297	 */
12298	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
12299	uip->ui_flags = path_flag;
12300	uip->ui_cmdp  = uscmd;
12301	bp->b_private = uip;
12302
12303	/*
12304	 * Initialize Request Sense buffering, if requested.
12305	 */
12306	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
12307	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
12308		/*
12309		 * Here uscmd->uscsi_rqbuf currently points to the caller's
12310		 * buffer, but we replace this with a kernel buffer that
12311		 * we allocate to use with the sense data. The sense data
12312		 * (if present) gets copied into this new buffer before the
12313		 * command is completed.  Then we copy the sense data from
12314		 * our allocated buf into the caller's buffer below. Note
12315		 * that incmd->uscsi_rqbuf and incmd->uscsi_rqlen are used
12316		 * below to perform the copy back to the caller's buf.
12317		 */
12318		uscmd->uscsi_rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
12319		if (rqbufspace == UIO_USERSPACE) {
12320			uscmd->uscsi_rqlen   = SENSE_LENGTH;
12321			uscmd->uscsi_rqresid = SENSE_LENGTH;
12322		} else {
12323			uchar_t rlen = min(SENSE_LENGTH, uscmd->uscsi_rqlen);
12324			uscmd->uscsi_rqlen   = rlen;
12325			uscmd->uscsi_rqresid = rlen;
12326		}
12327	} else {
12328		uscmd->uscsi_rqbuf = NULL;
12329		uscmd->uscsi_rqlen   = 0;
12330		uscmd->uscsi_rqresid = 0;
12331	}
12332
12333	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: rqbuf:0x%p  rqlen:%d\n",
12334	    uscmd->uscsi_rqbuf, uscmd->uscsi_rqlen);
12335
12336	if (un->un_f_is_fibre == FALSE) {
12337		/*
12338		 * Force asynchronous mode, if necessary.  Doing this here
12339		 * has the unfortunate effect of running other queued
12340		 * commands async also, but since the main purpose of this
12341		 * capability is downloading new drive firmware, we can
12342		 * probably live with it.
12343		 */
12344		if ((uscmd->uscsi_flags & USCSI_ASYNC) != 0) {
12345			if (scsi_ifgetcap(SD_ADDRESS(un), "synchronous", 1)
12346				== 1) {
12347				if (scsi_ifsetcap(SD_ADDRESS(un),
12348					    "synchronous", 0, 1) == 1) {
12349					SD_TRACE(SD_LOG_IO, un,
12350					"sd_send_scsi_cmd: forced async ok\n");
12351				} else {
12352					SD_TRACE(SD_LOG_IO, un,
12353					"sd_send_scsi_cmd:\
12354					forced async failed\n");
12355					rval = EINVAL;
12356					goto done;
12357				}
12358			}
12359		}
12360
12361		/*
12362		 * Re-enable synchronous mode, if requested
12363		 */
12364		if (uscmd->uscsi_flags & USCSI_SYNC) {
12365			if (scsi_ifgetcap(SD_ADDRESS(un), "synchronous", 1)
12366				== 0) {
12367				int i = scsi_ifsetcap(SD_ADDRESS(un),
12368						"synchronous", 1, 1);
12369				SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
12370					"re-enabled sync %s\n",
12371					(i == 1) ? "ok" : "failed");
12372			}
12373		}
12374	}
12375
12376	/*
12377	 * Commands sent with priority are intended for error recovery
12378	 * situations, and do not have retries performed.
12379	 */
12380	if (path_flag == SD_PATH_DIRECT_PRIORITY) {
12381		uscmd->uscsi_flags |= USCSI_DIAGNOSE;
12382	}
12383
12384	/*
12385	 * If we're going to do actual I/O, let physio do all the right things
12386	 */
12387	if (uscmd->uscsi_buflen != 0) {
12388		struct iovec	aiov;
12389		struct uio	auio;
12390		struct uio	*uio = &auio;
12391
12392		bzero(&auio, sizeof (struct uio));
12393		bzero(&aiov, sizeof (struct iovec));
12394		aiov.iov_base = uscmd->uscsi_bufaddr;
12395		aiov.iov_len  = uscmd->uscsi_buflen;
12396		uio->uio_iov  = &aiov;
12397
12398		uio->uio_iovcnt  = 1;
12399		uio->uio_resid   = uscmd->uscsi_buflen;
12400		uio->uio_segflg  = dataspace;
12401
12402		/*
12403		 * physio() will block here until the command completes....
12404		 */
12405		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: calling physio.\n");
12406
12407		rval = physio(sd_uscsi_strategy, bp, dev,
12408		    ((uscmd->uscsi_flags & USCSI_READ) ? B_READ : B_WRITE),
12409		    sduscsimin, uio);
12410
12411		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
12412		    "returned from physio with 0x%x\n", rval);
12413
12414	} else {
12415		/*
12416		 * We have to mimic what physio would do here! Argh!
12417		 */
12418		bp->b_flags  = B_BUSY |
12419		    ((uscmd->uscsi_flags & USCSI_READ) ? B_READ : B_WRITE);
12420		bp->b_edev   = dev;
12421		bp->b_dev    = cmpdev(dev);	/* maybe unnecessary? */
12422		bp->b_bcount = 0;
12423		bp->b_blkno  = 0;
12424
12425		SD_TRACE(SD_LOG_IO, un,
12426		    "sd_send_scsi_cmd: calling sd_uscsi_strategy...\n");
12427
12428		(void) sd_uscsi_strategy(bp);
12429
12430		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: calling biowait\n");
12431
12432		rval = biowait(bp);
12433
12434		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
12435		    "returned from  biowait with 0x%x\n", rval);
12436	}
12437
12438done:
12439
12440#ifdef SDDEBUG
12441	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
12442	    "uscsi_status: 0x%02x  uscsi_resid:0x%x\n",
12443	    uscmd->uscsi_status, uscmd->uscsi_resid);
12444	if (uscmd->uscsi_bufaddr != NULL) {
12445		SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
12446		    "uscmd->uscsi_bufaddr: 0x%p  uscmd->uscsi_buflen:%d\n",
12447		    uscmd->uscsi_bufaddr, uscmd->uscsi_buflen);
12448		if (dataspace == UIO_SYSSPACE) {
12449			SD_DUMP_MEMORY(un, SD_LOG_IO,
12450			    "data", (uchar_t *)uscmd->uscsi_bufaddr,
12451			    uscmd->uscsi_buflen, SD_LOG_HEX);
12452		}
12453	}
12454#endif
12455
12456	/*
12457	 * Get the status and residual to return to the caller.
12458	 */
12459	incmd->uscsi_status = uscmd->uscsi_status;
12460	incmd->uscsi_resid  = uscmd->uscsi_resid;
12461
12462	/*
12463	 * If the caller wants sense data, copy back whatever sense data
12464	 * we may have gotten, and update the relevant rqsense info.
12465	 */
12466	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
12467	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
12468
12469		int rqlen = uscmd->uscsi_rqlen - uscmd->uscsi_rqresid;
12470		rqlen = min(((int)incmd->uscsi_rqlen), rqlen);
12471
12472		/* Update the Request Sense status and resid */
12473		incmd->uscsi_rqresid  = incmd->uscsi_rqlen - rqlen;
12474		incmd->uscsi_rqstatus = uscmd->uscsi_rqstatus;
12475
12476		SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
12477		    "uscsi_rqstatus: 0x%02x  uscsi_rqresid:0x%x\n",
12478		    incmd->uscsi_rqstatus, incmd->uscsi_rqresid);
12479
12480		/* Copy out the sense data for user processes */
12481		if ((incmd->uscsi_rqbuf != NULL) && (rqlen != 0)) {
12482			int flags =
12483			    (rqbufspace == UIO_USERSPACE) ? 0 : FKIOCTL;
12484			if (ddi_copyout(uscmd->uscsi_rqbuf, incmd->uscsi_rqbuf,
12485			    rqlen, flags) != 0) {
12486				rval = EFAULT;
12487			}
12488			/*
12489			 * Note: Can't touch incmd->uscsi_rqbuf so use
12490			 * uscmd->uscsi_rqbuf instead. They're the same.
12491			 */
12492			SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
12493			    "incmd->uscsi_rqbuf: 0x%p  rqlen:%d\n",
12494			    incmd->uscsi_rqbuf, rqlen);
12495			SD_DUMP_MEMORY(un, SD_LOG_IO, "rq",
12496			    (uchar_t *)uscmd->uscsi_rqbuf, rqlen, SD_LOG_HEX);
12497		}
12498	}
12499
12500	/*
12501	 * Free allocated resources and return; mapout the buf in case it was
12502	 * mapped in by a lower layer.
12503	 */
12504	bp_mapout(bp);
12505	freerbuf(bp);
12506	kmem_free(uip, sizeof (struct sd_uscsi_info));
12507	if (uscmd->uscsi_rqbuf != NULL) {
12508		kmem_free(uscmd->uscsi_rqbuf, SENSE_LENGTH);
12509	}
12510	kmem_free(uscmd->uscsi_cdb, (size_t)uscmd->uscsi_cdblen);
12511	kmem_free(uscmd, sizeof (struct uscsi_cmd));
12512
12513	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: exit\n");
12514
12515	return (rval);
12516}
12517
12518
12519/*
12520 *    Function: sd_buf_iodone
12521 *
12522 * Description: Frees the sd_xbuf & returns the buf to its originator.
12523 *
12524 *     Context: May be called from interrupt context.
12525 */
12526/* ARGSUSED */
12527static void
12528sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp)
12529{
12530	struct sd_xbuf *xp;
12531
12532	ASSERT(un != NULL);
12533	ASSERT(bp != NULL);
12534	ASSERT(!mutex_owned(SD_MUTEX(un)));
12535
12536	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: entry.\n");
12537
12538	xp = SD_GET_XBUF(bp);
12539	ASSERT(xp != NULL);
12540
12541	mutex_enter(SD_MUTEX(un));
12542
12543	/*
12544	 * Grab time when the cmd completed.
12545	 * This is used for determining if the system has been
12546	 * idle long enough to make it idle to the PM framework.
12547	 * This is for lowering the overhead, and therefore improving
12548	 * performance per I/O operation.
12549	 */
12550	un->un_pm_idle_time = ddi_get_time();
12551
12552	un->un_ncmds_in_driver--;
12553	ASSERT(un->un_ncmds_in_driver >= 0);
12554	SD_INFO(SD_LOG_IO, un, "sd_buf_iodone: un_ncmds_in_driver = %ld\n",
12555	    un->un_ncmds_in_driver);
12556
12557	mutex_exit(SD_MUTEX(un));
12558
12559	ddi_xbuf_done(bp, un->un_xbuf_attr);	/* xbuf is gone after this */
12560	biodone(bp);				/* bp is gone after this */
12561
12562	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: exit.\n");
12563}
12564
12565
12566/*
12567 *    Function: sd_uscsi_iodone
12568 *
12569 * Description: Frees the sd_xbuf & returns the buf to its originator.
12570 *
12571 *     Context: May be called from interrupt context.
12572 */
12573/* ARGSUSED */
12574static void
12575sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
12576{
12577	struct sd_xbuf *xp;
12578
12579	ASSERT(un != NULL);
12580	ASSERT(bp != NULL);
12581
12582	xp = SD_GET_XBUF(bp);
12583	ASSERT(xp != NULL);
12584	ASSERT(!mutex_owned(SD_MUTEX(un)));
12585
12586	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: entry.\n");
12587
12588	bp->b_private = xp->xb_private;
12589
12590	mutex_enter(SD_MUTEX(un));
12591
12592	/*
12593	 * Grab time when the cmd completed.
12594	 * This is used for determining if the system has been
12595	 * idle long enough to make it idle to the PM framework.
12596	 * This is for lowering the overhead, and therefore improving
12597	 * performance per I/O operation.
12598	 */
12599	un->un_pm_idle_time = ddi_get_time();
12600
12601	un->un_ncmds_in_driver--;
12602	ASSERT(un->un_ncmds_in_driver >= 0);
12603	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: un_ncmds_in_driver = %ld\n",
12604	    un->un_ncmds_in_driver);
12605
12606	mutex_exit(SD_MUTEX(un));
12607
12608	kmem_free(xp, sizeof (struct sd_xbuf));
12609	biodone(bp);
12610
12611	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: exit.\n");
12612}
12613
12614
12615/*
12616 *    Function: sd_mapblockaddr_iostart
12617 *
12618 * Description: Verify request lies withing the partition limits for
12619 *		the indicated minor device.  Issue "overrun" buf if
12620 *		request would exceed partition range.  Converts
12621 *		partition-relative block address to absolute.
12622 *
12623 *     Context: Can sleep
12624 *
12625 *      Issues: This follows what the old code did, in terms of accessing
12626 *		some of the partition info in the unit struct without holding
12627 *		the mutext.  This is a general issue, if the partition info
12628 *		can be altered while IO is in progress... as soon as we send
12629 *		a buf, its partitioning can be invalid before it gets to the
12630 *		device.  Probably the right fix is to move partitioning out
12631 *		of the driver entirely.
12632 */
12633
12634static void
12635sd_mapblockaddr_iostart(int index, struct sd_lun *un, struct buf *bp)
12636{
12637	daddr_t	nblocks;	/* #blocks in the given partition */
12638	daddr_t	blocknum;	/* Block number specified by the buf */
12639	size_t	requested_nblocks;
12640	size_t	available_nblocks;
12641	int	partition;
12642	diskaddr_t	partition_offset;
12643	struct sd_xbuf *xp;
12644
12645
12646	ASSERT(un != NULL);
12647	ASSERT(bp != NULL);
12648	ASSERT(!mutex_owned(SD_MUTEX(un)));
12649
12650	SD_TRACE(SD_LOG_IO_PARTITION, un,
12651	    "sd_mapblockaddr_iostart: entry: buf:0x%p\n", bp);
12652
12653	xp = SD_GET_XBUF(bp);
12654	ASSERT(xp != NULL);
12655
12656	/*
12657	 * If the geometry is not indicated as valid, attempt to access
12658	 * the unit & verify the geometry/label. This can be the case for
12659	 * removable-media devices, of if the device was opened in
12660	 * NDELAY/NONBLOCK mode.
12661	 */
12662	if ((un->un_f_geometry_is_valid != TRUE) &&
12663	    (sd_ready_and_valid(un) != SD_READY_VALID)) {
12664		/*
12665		 * For removable devices it is possible to start an I/O
12666		 * without a media by opening the device in nodelay mode.
12667		 * Also for writable CDs there can be many scenarios where
12668		 * there is no geometry yet but volume manager is trying to
12669		 * issue a read() just because it can see TOC on the CD. So
12670		 * do not print a message for removables.
12671		 */
12672		if (!un->un_f_has_removable_media) {
12673			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
12674			    "i/o to invalid geometry\n");
12675		}
12676		bioerror(bp, EIO);
12677		bp->b_resid = bp->b_bcount;
12678		SD_BEGIN_IODONE(index, un, bp);
12679		return;
12680	}
12681
12682	partition = SDPART(bp->b_edev);
12683
12684	/* #blocks in partition */
12685	nblocks = un->un_map[partition].dkl_nblk;    /* #blocks in partition */
12686
12687	/* Use of a local variable potentially improves performance slightly */
12688	partition_offset = un->un_offset[partition];
12689
12690	/*
12691	 * blocknum is the starting block number of the request. At this
12692	 * point it is still relative to the start of the minor device.
12693	 */
12694	blocknum = xp->xb_blkno;
12695
12696	/*
12697	 * Legacy: If the starting block number is one past the last block
12698	 * in the partition, do not set B_ERROR in the buf.
12699	 */
12700	if (blocknum == nblocks)  {
12701		goto error_exit;
12702	}
12703
12704	/*
12705	 * Confirm that the first block of the request lies within the
12706	 * partition limits. Also the requested number of bytes must be
12707	 * a multiple of the system block size.
12708	 */
12709	if ((blocknum < 0) || (blocknum >= nblocks) ||
12710	    ((bp->b_bcount & (un->un_sys_blocksize - 1)) != 0)) {
12711		bp->b_flags |= B_ERROR;
12712		goto error_exit;
12713	}
12714
12715	/*
12716	 * If the requsted # blocks exceeds the available # blocks, that
12717	 * is an overrun of the partition.
12718	 */
12719	requested_nblocks = SD_BYTES2SYSBLOCKS(un, bp->b_bcount);
12720	available_nblocks = (size_t)(nblocks - blocknum);
12721	ASSERT(nblocks >= blocknum);
12722
12723	if (requested_nblocks > available_nblocks) {
12724		/*
12725		 * Allocate an "overrun" buf to allow the request to proceed
12726		 * for the amount of space available in the partition. The
12727		 * amount not transferred will be added into the b_resid
12728		 * when the operation is complete. The overrun buf
12729		 * replaces the original buf here, and the original buf
12730		 * is saved inside the overrun buf, for later use.
12731		 */
12732		size_t resid = SD_SYSBLOCKS2BYTES(un,
12733		    (offset_t)(requested_nblocks - available_nblocks));
12734		size_t count = bp->b_bcount - resid;
12735		/*
12736		 * Note: count is an unsigned entity thus it'll NEVER
12737		 * be less than 0 so ASSERT the original values are
12738		 * correct.
12739		 */
12740		ASSERT(bp->b_bcount >= resid);
12741
12742		bp = sd_bioclone_alloc(bp, count, blocknum,
12743			(int (*)(struct buf *)) sd_mapblockaddr_iodone);
12744		xp = SD_GET_XBUF(bp); /* Update for 'new' bp! */
12745		ASSERT(xp != NULL);
12746	}
12747
12748	/* At this point there should be no residual for this buf. */
12749	ASSERT(bp->b_resid == 0);
12750
12751	/* Convert the block number to an absolute address. */
12752	xp->xb_blkno += partition_offset;
12753
12754	SD_NEXT_IOSTART(index, un, bp);
12755
12756	SD_TRACE(SD_LOG_IO_PARTITION, un,
12757	    "sd_mapblockaddr_iostart: exit 0: buf:0x%p\n", bp);
12758
12759	return;
12760
12761error_exit:
12762	bp->b_resid = bp->b_bcount;
12763	SD_BEGIN_IODONE(index, un, bp);
12764	SD_TRACE(SD_LOG_IO_PARTITION, un,
12765	    "sd_mapblockaddr_iostart: exit 1: buf:0x%p\n", bp);
12766}
12767
12768
12769/*
12770 *    Function: sd_mapblockaddr_iodone
12771 *
12772 * Description: Completion-side processing for partition management.
12773 *
12774 *     Context: May be called under interrupt context
12775 */
12776
12777static void
12778sd_mapblockaddr_iodone(int index, struct sd_lun *un, struct buf *bp)
12779{
12780	/* int	partition; */	/* Not used, see below. */
12781	ASSERT(un != NULL);
12782	ASSERT(bp != NULL);
12783	ASSERT(!mutex_owned(SD_MUTEX(un)));
12784
12785	SD_TRACE(SD_LOG_IO_PARTITION, un,
12786	    "sd_mapblockaddr_iodone: entry: buf:0x%p\n", bp);
12787
12788	if (bp->b_iodone == (int (*)(struct buf *)) sd_mapblockaddr_iodone) {
12789		/*
12790		 * We have an "overrun" buf to deal with...
12791		 */
12792		struct sd_xbuf	*xp;
12793		struct buf	*obp;	/* ptr to the original buf */
12794
12795		xp = SD_GET_XBUF(bp);
12796		ASSERT(xp != NULL);
12797
12798		/* Retrieve the pointer to the original buf */
12799		obp = (struct buf *)xp->xb_private;
12800		ASSERT(obp != NULL);
12801
12802		obp->b_resid = obp->b_bcount - (bp->b_bcount - bp->b_resid);
12803		bioerror(obp, bp->b_error);
12804
12805		sd_bioclone_free(bp);
12806
12807		/*
12808		 * Get back the original buf.
12809		 * Note that since the restoration of xb_blkno below
12810		 * was removed, the sd_xbuf is not needed.
12811		 */
12812		bp = obp;
12813		/*
12814		 * xp = SD_GET_XBUF(bp);
12815		 * ASSERT(xp != NULL);
12816		 */
12817	}
12818
12819	/*
12820	 * Convert sd->xb_blkno back to a minor-device relative value.
12821	 * Note: this has been commented out, as it is not needed in the
12822	 * current implementation of the driver (ie, since this function
12823	 * is at the top of the layering chains, so the info will be
12824	 * discarded) and it is in the "hot" IO path.
12825	 *
12826	 * partition = getminor(bp->b_edev) & SDPART_MASK;
12827	 * xp->xb_blkno -= un->un_offset[partition];
12828	 */
12829
12830	SD_NEXT_IODONE(index, un, bp);
12831
12832	SD_TRACE(SD_LOG_IO_PARTITION, un,
12833	    "sd_mapblockaddr_iodone: exit: buf:0x%p\n", bp);
12834}
12835
12836
12837/*
12838 *    Function: sd_mapblocksize_iostart
12839 *
12840 * Description: Convert between system block size (un->un_sys_blocksize)
12841 *		and target block size (un->un_tgt_blocksize).
12842 *
12843 *     Context: Can sleep to allocate resources.
12844 *
12845 * Assumptions: A higher layer has already performed any partition validation,
12846 *		and converted the xp->xb_blkno to an absolute value relative
12847 *		to the start of the device.
12848 *
12849 *		It is also assumed that the higher layer has implemented
12850 *		an "overrun" mechanism for the case where the request would
12851 *		read/write beyond the end of a partition.  In this case we
12852 *		assume (and ASSERT) that bp->b_resid == 0.
12853 *
12854 *		Note: The implementation for this routine assumes the target
12855 *		block size remains constant between allocation and transport.
12856 */
12857
12858static void
12859sd_mapblocksize_iostart(int index, struct sd_lun *un, struct buf *bp)
12860{
12861	struct sd_mapblocksize_info	*bsp;
12862	struct sd_xbuf			*xp;
12863	offset_t first_byte;
12864	daddr_t	start_block, end_block;
12865	daddr_t	request_bytes;
12866	ushort_t is_aligned = FALSE;
12867
12868	ASSERT(un != NULL);
12869	ASSERT(bp != NULL);
12870	ASSERT(!mutex_owned(SD_MUTEX(un)));
12871	ASSERT(bp->b_resid == 0);
12872
12873	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12874	    "sd_mapblocksize_iostart: entry: buf:0x%p\n", bp);
12875
12876	/*
12877	 * For a non-writable CD, a write request is an error
12878	 */
12879	if (ISCD(un) && ((bp->b_flags & B_READ) == 0) &&
12880	    (un->un_f_mmc_writable_media == FALSE)) {
12881		bioerror(bp, EIO);
12882		bp->b_resid = bp->b_bcount;
12883		SD_BEGIN_IODONE(index, un, bp);
12884		return;
12885	}
12886
12887	/*
12888	 * We do not need a shadow buf if the device is using
12889	 * un->un_sys_blocksize as its block size or if bcount == 0.
12890	 * In this case there is no layer-private data block allocated.
12891	 */
12892	if ((un->un_tgt_blocksize == un->un_sys_blocksize) ||
12893	    (bp->b_bcount == 0)) {
12894		goto done;
12895	}
12896
12897#if defined(__i386) || defined(__amd64)
12898	/* We do not support non-block-aligned transfers for ROD devices */
12899	ASSERT(!ISROD(un));
12900#endif
12901
12902	xp = SD_GET_XBUF(bp);
12903	ASSERT(xp != NULL);
12904
12905	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12906	    "tgt_blocksize:0x%x sys_blocksize: 0x%x\n",
12907	    un->un_tgt_blocksize, un->un_sys_blocksize);
12908	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12909	    "request start block:0x%x\n", xp->xb_blkno);
12910	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12911	    "request len:0x%x\n", bp->b_bcount);
12912
12913	/*
12914	 * Allocate the layer-private data area for the mapblocksize layer.
12915	 * Layers are allowed to use the xp_private member of the sd_xbuf
12916	 * struct to store the pointer to their layer-private data block, but
12917	 * each layer also has the responsibility of restoring the prior
12918	 * contents of xb_private before returning the buf/xbuf to the
12919	 * higher layer that sent it.
12920	 *
12921	 * Here we save the prior contents of xp->xb_private into the
12922	 * bsp->mbs_oprivate field of our layer-private data area. This value
12923	 * is restored by sd_mapblocksize_iodone() just prior to freeing up
12924	 * the layer-private area and returning the buf/xbuf to the layer
12925	 * that sent it.
12926	 *
12927	 * Note that here we use kmem_zalloc for the allocation as there are
12928	 * parts of the mapblocksize code that expect certain fields to be
12929	 * zero unless explicitly set to a required value.
12930	 */
12931	bsp = kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
12932	bsp->mbs_oprivate = xp->xb_private;
12933	xp->xb_private = bsp;
12934
12935	/*
12936	 * This treats the data on the disk (target) as an array of bytes.
12937	 * first_byte is the byte offset, from the beginning of the device,
12938	 * to the location of the request. This is converted from a
12939	 * un->un_sys_blocksize block address to a byte offset, and then back
12940	 * to a block address based upon a un->un_tgt_blocksize block size.
12941	 *
12942	 * xp->xb_blkno should be absolute upon entry into this function,
12943	 * but, but it is based upon partitions that use the "system"
12944	 * block size. It must be adjusted to reflect the block size of
12945	 * the target.
12946	 *
12947	 * Note that end_block is actually the block that follows the last
12948	 * block of the request, but that's what is needed for the computation.
12949	 */
12950	first_byte  = SD_SYSBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
12951	start_block = xp->xb_blkno = first_byte / un->un_tgt_blocksize;
12952	end_block   = (first_byte + bp->b_bcount + un->un_tgt_blocksize - 1) /
12953	    un->un_tgt_blocksize;
12954
12955	/* request_bytes is rounded up to a multiple of the target block size */
12956	request_bytes = (end_block - start_block) * un->un_tgt_blocksize;
12957
12958	/*
12959	 * See if the starting address of the request and the request
12960	 * length are aligned on a un->un_tgt_blocksize boundary. If aligned
12961	 * then we do not need to allocate a shadow buf to handle the request.
12962	 */
12963	if (((first_byte   % un->un_tgt_blocksize) == 0) &&
12964	    ((bp->b_bcount % un->un_tgt_blocksize) == 0)) {
12965		is_aligned = TRUE;
12966	}
12967
12968	if ((bp->b_flags & B_READ) == 0) {
12969		/*
12970		 * Lock the range for a write operation. An aligned request is
12971		 * considered a simple write; otherwise the request must be a
12972		 * read-modify-write.
12973		 */
12974		bsp->mbs_wmp = sd_range_lock(un, start_block, end_block - 1,
12975		    (is_aligned == TRUE) ? SD_WTYPE_SIMPLE : SD_WTYPE_RMW);
12976	}
12977
12978	/*
12979	 * Alloc a shadow buf if the request is not aligned. Also, this is
12980	 * where the READ command is generated for a read-modify-write. (The
12981	 * write phase is deferred until after the read completes.)
12982	 */
12983	if (is_aligned == FALSE) {
12984
12985		struct sd_mapblocksize_info	*shadow_bsp;
12986		struct sd_xbuf	*shadow_xp;
12987		struct buf	*shadow_bp;
12988
12989		/*
12990		 * Allocate the shadow buf and it associated xbuf. Note that
12991		 * after this call the xb_blkno value in both the original
12992		 * buf's sd_xbuf _and_ the shadow buf's sd_xbuf will be the
12993		 * same: absolute relative to the start of the device, and
12994		 * adjusted for the target block size. The b_blkno in the
12995		 * shadow buf will also be set to this value. We should never
12996		 * change b_blkno in the original bp however.
12997		 *
12998		 * Note also that the shadow buf will always need to be a
12999		 * READ command, regardless of whether the incoming command
13000		 * is a READ or a WRITE.
13001		 */
13002		shadow_bp = sd_shadow_buf_alloc(bp, request_bytes, B_READ,
13003		    xp->xb_blkno,
13004		    (int (*)(struct buf *)) sd_mapblocksize_iodone);
13005
13006		shadow_xp = SD_GET_XBUF(shadow_bp);
13007
13008		/*
13009		 * Allocate the layer-private data for the shadow buf.
13010		 * (No need to preserve xb_private in the shadow xbuf.)
13011		 */
13012		shadow_xp->xb_private = shadow_bsp =
13013		    kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
13014
13015		/*
13016		 * bsp->mbs_copy_offset is used later by sd_mapblocksize_iodone
13017		 * to figure out where the start of the user data is (based upon
13018		 * the system block size) in the data returned by the READ
13019		 * command (which will be based upon the target blocksize). Note
13020		 * that this is only really used if the request is unaligned.
13021		 */
13022		bsp->mbs_copy_offset = (ssize_t)(first_byte -
13023		    ((offset_t)xp->xb_blkno * un->un_tgt_blocksize));
13024		ASSERT((bsp->mbs_copy_offset >= 0) &&
13025		    (bsp->mbs_copy_offset < un->un_tgt_blocksize));
13026
13027		shadow_bsp->mbs_copy_offset = bsp->mbs_copy_offset;
13028
13029		shadow_bsp->mbs_layer_index = bsp->mbs_layer_index = index;
13030
13031		/* Transfer the wmap (if any) to the shadow buf */
13032		shadow_bsp->mbs_wmp = bsp->mbs_wmp;
13033		bsp->mbs_wmp = NULL;
13034
13035		/*
13036		 * The shadow buf goes on from here in place of the
13037		 * original buf.
13038		 */
13039		shadow_bsp->mbs_orig_bp = bp;
13040		bp = shadow_bp;
13041	}
13042
13043	SD_INFO(SD_LOG_IO_RMMEDIA, un,
13044	    "sd_mapblocksize_iostart: tgt start block:0x%x\n", xp->xb_blkno);
13045	SD_INFO(SD_LOG_IO_RMMEDIA, un,
13046	    "sd_mapblocksize_iostart: tgt request len:0x%x\n",
13047	    request_bytes);
13048	SD_INFO(SD_LOG_IO_RMMEDIA, un,
13049	    "sd_mapblocksize_iostart: shadow buf:0x%x\n", bp);
13050
13051done:
13052	SD_NEXT_IOSTART(index, un, bp);
13053
13054	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
13055	    "sd_mapblocksize_iostart: exit: buf:0x%p\n", bp);
13056}
13057
13058
13059/*
13060 *    Function: sd_mapblocksize_iodone
13061 *
13062 * Description: Completion side processing for block-size mapping.
13063 *
13064 *     Context: May be called under interrupt context
13065 */
13066
13067static void
13068sd_mapblocksize_iodone(int index, struct sd_lun *un, struct buf *bp)
13069{
13070	struct sd_mapblocksize_info	*bsp;
13071	struct sd_xbuf	*xp;
13072	struct sd_xbuf	*orig_xp;	/* sd_xbuf for the original buf */
13073	struct buf	*orig_bp;	/* ptr to the original buf */
13074	offset_t	shadow_end;
13075	offset_t	request_end;
13076	offset_t	shadow_start;
13077	ssize_t		copy_offset;
13078	size_t		copy_length;
13079	size_t		shortfall;
13080	uint_t		is_write;	/* TRUE if this bp is a WRITE */
13081	uint_t		has_wmap;	/* TRUE is this bp has a wmap */
13082
13083	ASSERT(un != NULL);
13084	ASSERT(bp != NULL);
13085
13086	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
13087	    "sd_mapblocksize_iodone: entry: buf:0x%p\n", bp);
13088
13089	/*
13090	 * There is no shadow buf or layer-private data if the target is
13091	 * using un->un_sys_blocksize as its block size or if bcount == 0.
13092	 */
13093	if ((un->un_tgt_blocksize == un->un_sys_blocksize) ||
13094	    (bp->b_bcount == 0)) {
13095		goto exit;
13096	}
13097
13098	xp = SD_GET_XBUF(bp);
13099	ASSERT(xp != NULL);
13100
13101	/* Retrieve the pointer to the layer-private data area from the xbuf. */
13102	bsp = xp->xb_private;
13103
13104	is_write = ((bp->b_flags & B_READ) == 0) ? TRUE : FALSE;
13105	has_wmap = (bsp->mbs_wmp != NULL) ? TRUE : FALSE;
13106
13107	if (is_write) {
13108		/*
13109		 * For a WRITE request we must free up the block range that
13110		 * we have locked up.  This holds regardless of whether this is
13111		 * an aligned write request or a read-modify-write request.
13112		 */
13113		sd_range_unlock(un, bsp->mbs_wmp);
13114		bsp->mbs_wmp = NULL;
13115	}
13116
13117	if ((bp->b_iodone != (int(*)(struct buf *))sd_mapblocksize_iodone)) {
13118		/*
13119		 * An aligned read or write command will have no shadow buf;
13120		 * there is not much else to do with it.
13121		 */
13122		goto done;
13123	}
13124
13125	orig_bp = bsp->mbs_orig_bp;
13126	ASSERT(orig_bp != NULL);
13127	orig_xp = SD_GET_XBUF(orig_bp);
13128	ASSERT(orig_xp != NULL);
13129	ASSERT(!mutex_owned(SD_MUTEX(un)));
13130
13131	if (!is_write && has_wmap) {
13132		/*
13133		 * A READ with a wmap means this is the READ phase of a
13134		 * read-modify-write. If an error occurred on the READ then
13135		 * we do not proceed with the WRITE phase or copy any data.
13136		 * Just release the write maps and return with an error.
13137		 */
13138		if ((bp->b_resid != 0) || (bp->b_error != 0)) {
13139			orig_bp->b_resid = orig_bp->b_bcount;
13140			bioerror(orig_bp, bp->b_error);
13141			sd_range_unlock(un, bsp->mbs_wmp);
13142			goto freebuf_done;
13143		}
13144	}
13145
13146	/*
13147	 * Here is where we set up to copy the data from the shadow buf
13148	 * into the space associated with the original buf.
13149	 *
13150	 * To deal with the conversion between block sizes, these
13151	 * computations treat the data as an array of bytes, with the
13152	 * first byte (byte 0) corresponding to the first byte in the
13153	 * first block on the disk.
13154	 */
13155
13156	/*
13157	 * shadow_start and shadow_len indicate the location and size of
13158	 * the data returned with the shadow IO request.
13159	 */
13160	shadow_start  = SD_TGTBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
13161	shadow_end    = shadow_start + bp->b_bcount - bp->b_resid;
13162
13163	/*
13164	 * copy_offset gives the offset (in bytes) from the start of the first
13165	 * block of the READ request to the beginning of the data.  We retrieve
13166	 * this value from xb_pktp in the ORIGINAL xbuf, as it has been saved
13167	 * there by sd_mapblockize_iostart(). copy_length gives the amount of
13168	 * data to be copied (in bytes).
13169	 */
13170	copy_offset  = bsp->mbs_copy_offset;
13171	ASSERT((copy_offset >= 0) && (copy_offset < un->un_tgt_blocksize));
13172	copy_length  = orig_bp->b_bcount;
13173	request_end  = shadow_start + copy_offset + orig_bp->b_bcount;
13174
13175	/*
13176	 * Set up the resid and error fields of orig_bp as appropriate.
13177	 */
13178	if (shadow_end >= request_end) {
13179		/* We got all the requested data; set resid to zero */
13180		orig_bp->b_resid = 0;
13181	} else {
13182		/*
13183		 * We failed to get enough data to fully satisfy the original
13184		 * request. Just copy back whatever data we got and set
13185		 * up the residual and error code as required.
13186		 *
13187		 * 'shortfall' is the amount by which the data received with the
13188		 * shadow buf has "fallen short" of the requested amount.
13189		 */
13190		shortfall = (size_t)(request_end - shadow_end);
13191
13192		if (shortfall > orig_bp->b_bcount) {
13193			/*
13194			 * We did not get enough data to even partially
13195			 * fulfill the original request.  The residual is
13196			 * equal to the amount requested.
13197			 */
13198			orig_bp->b_resid = orig_bp->b_bcount;
13199		} else {
13200			/*
13201			 * We did not get all the data that we requested
13202			 * from the device, but we will try to return what
13203			 * portion we did get.
13204			 */
13205			orig_bp->b_resid = shortfall;
13206		}
13207		ASSERT(copy_length >= orig_bp->b_resid);
13208		copy_length  -= orig_bp->b_resid;
13209	}
13210
13211	/* Propagate the error code from the shadow buf to the original buf */
13212	bioerror(orig_bp, bp->b_error);
13213
13214	if (is_write) {
13215		goto freebuf_done;	/* No data copying for a WRITE */
13216	}
13217
13218	if (has_wmap) {
13219		/*
13220		 * This is a READ command from the READ phase of a
13221		 * read-modify-write request. We have to copy the data given
13222		 * by the user OVER the data returned by the READ command,
13223		 * then convert the command from a READ to a WRITE and send
13224		 * it back to the target.
13225		 */
13226		bcopy(orig_bp->b_un.b_addr, bp->b_un.b_addr + copy_offset,
13227		    copy_length);
13228
13229		bp->b_flags &= ~((int)B_READ);	/* Convert to a WRITE */
13230
13231		/*
13232		 * Dispatch the WRITE command to the taskq thread, which
13233		 * will in turn send the command to the target. When the
13234		 * WRITE command completes, we (sd_mapblocksize_iodone())
13235		 * will get called again as part of the iodone chain
13236		 * processing for it. Note that we will still be dealing
13237		 * with the shadow buf at that point.
13238		 */
13239		if (taskq_dispatch(sd_wmr_tq, sd_read_modify_write_task, bp,
13240		    KM_NOSLEEP) != 0) {
13241			/*
13242			 * Dispatch was successful so we are done. Return
13243			 * without going any higher up the iodone chain. Do
13244			 * not free up any layer-private data until after the
13245			 * WRITE completes.
13246			 */
13247			return;
13248		}
13249
13250		/*
13251		 * Dispatch of the WRITE command failed; set up the error
13252		 * condition and send this IO back up the iodone chain.
13253		 */
13254		bioerror(orig_bp, EIO);
13255		orig_bp->b_resid = orig_bp->b_bcount;
13256
13257	} else {
13258		/*
13259		 * This is a regular READ request (ie, not a RMW). Copy the
13260		 * data from the shadow buf into the original buf. The
13261		 * copy_offset compensates for any "misalignment" between the
13262		 * shadow buf (with its un->un_tgt_blocksize blocks) and the
13263		 * original buf (with its un->un_sys_blocksize blocks).
13264		 */
13265		bcopy(bp->b_un.b_addr + copy_offset, orig_bp->b_un.b_addr,
13266		    copy_length);
13267	}
13268
13269freebuf_done:
13270
13271	/*
13272	 * At this point we still have both the shadow buf AND the original
13273	 * buf to deal with, as well as the layer-private data area in each.
13274	 * Local variables are as follows:
13275	 *
13276	 * bp -- points to shadow buf
13277	 * xp -- points to xbuf of shadow buf
13278	 * bsp -- points to layer-private data area of shadow buf
13279	 * orig_bp -- points to original buf
13280	 *
13281	 * First free the shadow buf and its associated xbuf, then free the
13282	 * layer-private data area from the shadow buf. There is no need to
13283	 * restore xb_private in the shadow xbuf.
13284	 */
13285	sd_shadow_buf_free(bp);
13286	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
13287
13288	/*
13289	 * Now update the local variables to point to the original buf, xbuf,
13290	 * and layer-private area.
13291	 */
13292	bp = orig_bp;
13293	xp = SD_GET_XBUF(bp);
13294	ASSERT(xp != NULL);
13295	ASSERT(xp == orig_xp);
13296	bsp = xp->xb_private;
13297	ASSERT(bsp != NULL);
13298
13299done:
13300	/*
13301	 * Restore xb_private to whatever it was set to by the next higher
13302	 * layer in the chain, then free the layer-private data area.
13303	 */
13304	xp->xb_private = bsp->mbs_oprivate;
13305	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
13306
13307exit:
13308	SD_TRACE(SD_LOG_IO_RMMEDIA, SD_GET_UN(bp),
13309	    "sd_mapblocksize_iodone: calling SD_NEXT_IODONE: buf:0x%p\n", bp);
13310
13311	SD_NEXT_IODONE(index, un, bp);
13312}
13313
13314
13315/*
13316 *    Function: sd_checksum_iostart
13317 *
13318 * Description: A stub function for a layer that's currently not used.
13319 *		For now just a placeholder.
13320 *
13321 *     Context: Kernel thread context
13322 */
13323
13324static void
13325sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp)
13326{
13327	ASSERT(un != NULL);
13328	ASSERT(bp != NULL);
13329	ASSERT(!mutex_owned(SD_MUTEX(un)));
13330	SD_NEXT_IOSTART(index, un, bp);
13331}
13332
13333
13334/*
13335 *    Function: sd_checksum_iodone
13336 *
13337 * Description: A stub function for a layer that's currently not used.
13338 *		For now just a placeholder.
13339 *
13340 *     Context: May be called under interrupt context
13341 */
13342
13343static void
13344sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp)
13345{
13346	ASSERT(un != NULL);
13347	ASSERT(bp != NULL);
13348	ASSERT(!mutex_owned(SD_MUTEX(un)));
13349	SD_NEXT_IODONE(index, un, bp);
13350}
13351
13352
13353/*
13354 *    Function: sd_checksum_uscsi_iostart
13355 *
13356 * Description: A stub function for a layer that's currently not used.
13357 *		For now just a placeholder.
13358 *
13359 *     Context: Kernel thread context
13360 */
13361
13362static void
13363sd_checksum_uscsi_iostart(int index, struct sd_lun *un, struct buf *bp)
13364{
13365	ASSERT(un != NULL);
13366	ASSERT(bp != NULL);
13367	ASSERT(!mutex_owned(SD_MUTEX(un)));
13368	SD_NEXT_IOSTART(index, un, bp);
13369}
13370
13371
13372/*
13373 *    Function: sd_checksum_uscsi_iodone
13374 *
13375 * Description: A stub function for a layer that's currently not used.
13376 *		For now just a placeholder.
13377 *
13378 *     Context: May be called under interrupt context
13379 */
13380
13381static void
13382sd_checksum_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
13383{
13384	ASSERT(un != NULL);
13385	ASSERT(bp != NULL);
13386	ASSERT(!mutex_owned(SD_MUTEX(un)));
13387	SD_NEXT_IODONE(index, un, bp);
13388}
13389
13390
13391/*
13392 *    Function: sd_pm_iostart
13393 *
13394 * Description: iostart-side routine for Power mangement.
13395 *
13396 *     Context: Kernel thread context
13397 */
13398
13399static void
13400sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp)
13401{
13402	ASSERT(un != NULL);
13403	ASSERT(bp != NULL);
13404	ASSERT(!mutex_owned(SD_MUTEX(un)));
13405	ASSERT(!mutex_owned(&un->un_pm_mutex));
13406
13407	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: entry\n");
13408
13409	if (sd_pm_entry(un) != DDI_SUCCESS) {
13410		/*
13411		 * Set up to return the failed buf back up the 'iodone'
13412		 * side of the calling chain.
13413		 */
13414		bioerror(bp, EIO);
13415		bp->b_resid = bp->b_bcount;
13416
13417		SD_BEGIN_IODONE(index, un, bp);
13418
13419		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
13420		return;
13421	}
13422
13423	SD_NEXT_IOSTART(index, un, bp);
13424
13425	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
13426}
13427
13428
13429/*
13430 *    Function: sd_pm_iodone
13431 *
13432 * Description: iodone-side routine for power mangement.
13433 *
13434 *     Context: may be called from interrupt context
13435 */
13436
13437static void
13438sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp)
13439{
13440	ASSERT(un != NULL);
13441	ASSERT(bp != NULL);
13442	ASSERT(!mutex_owned(&un->un_pm_mutex));
13443
13444	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: entry\n");
13445
13446	/*
13447	 * After attach the following flag is only read, so don't
13448	 * take the penalty of acquiring a mutex for it.
13449	 */
13450	if (un->un_f_pm_is_enabled == TRUE) {
13451		sd_pm_exit(un);
13452	}
13453
13454	SD_NEXT_IODONE(index, un, bp);
13455
13456	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: exit\n");
13457}
13458
13459
13460/*
13461 *    Function: sd_core_iostart
13462 *
13463 * Description: Primary driver function for enqueuing buf(9S) structs from
13464 *		the system and initiating IO to the target device
13465 *
13466 *     Context: Kernel thread context. Can sleep.
13467 *
13468 * Assumptions:  - The given xp->xb_blkno is absolute
13469 *		   (ie, relative to the start of the device).
13470 *		 - The IO is to be done using the native blocksize of
13471 *		   the device, as specified in un->un_tgt_blocksize.
13472 */
13473/* ARGSUSED */
13474static void
13475sd_core_iostart(int index, struct sd_lun *un, struct buf *bp)
13476{
13477	struct sd_xbuf *xp;
13478
13479	ASSERT(un != NULL);
13480	ASSERT(bp != NULL);
13481	ASSERT(!mutex_owned(SD_MUTEX(un)));
13482	ASSERT(bp->b_resid == 0);
13483
13484	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: entry: bp:0x%p\n", bp);
13485
13486	xp = SD_GET_XBUF(bp);
13487	ASSERT(xp != NULL);
13488
13489	mutex_enter(SD_MUTEX(un));
13490
13491	/*
13492	 * If we are currently in the failfast state, fail any new IO
13493	 * that has B_FAILFAST set, then return.
13494	 */
13495	if ((bp->b_flags & B_FAILFAST) &&
13496	    (un->un_failfast_state == SD_FAILFAST_ACTIVE)) {
13497		mutex_exit(SD_MUTEX(un));
13498		bioerror(bp, EIO);
13499		bp->b_resid = bp->b_bcount;
13500		SD_BEGIN_IODONE(index, un, bp);
13501		return;
13502	}
13503
13504	if (SD_IS_DIRECT_PRIORITY(xp)) {
13505		/*
13506		 * Priority command -- transport it immediately.
13507		 *
13508		 * Note: We may want to assert that USCSI_DIAGNOSE is set,
13509		 * because all direct priority commands should be associated
13510		 * with error recovery actions which we don't want to retry.
13511		 */
13512		sd_start_cmds(un, bp);
13513	} else {
13514		/*
13515		 * Normal command -- add it to the wait queue, then start
13516		 * transporting commands from the wait queue.
13517		 */
13518		sd_add_buf_to_waitq(un, bp);
13519		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
13520		sd_start_cmds(un, NULL);
13521	}
13522
13523	mutex_exit(SD_MUTEX(un));
13524
13525	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: exit: bp:0x%p\n", bp);
13526}
13527
13528
13529/*
13530 *    Function: sd_init_cdb_limits
13531 *
13532 * Description: This is to handle scsi_pkt initialization differences
13533 *		between the driver platforms.
13534 *
13535 *		Legacy behaviors:
13536 *
13537 *		If the block number or the sector count exceeds the
13538 *		capabilities of a Group 0 command, shift over to a
13539 *		Group 1 command. We don't blindly use Group 1
13540 *		commands because a) some drives (CDC Wren IVs) get a
13541 *		bit confused, and b) there is probably a fair amount
13542 *		of speed difference for a target to receive and decode
13543 *		a 10 byte command instead of a 6 byte command.
13544 *
13545 *		The xfer time difference of 6 vs 10 byte CDBs is
13546 *		still significant so this code is still worthwhile.
13547 *		10 byte CDBs are very inefficient with the fas HBA driver
13548 *		and older disks. Each CDB byte took 1 usec with some
13549 *		popular disks.
13550 *
13551 *     Context: Must be called at attach time
13552 */
13553
13554static void
13555sd_init_cdb_limits(struct sd_lun *un)
13556{
13557	int hba_cdb_limit;
13558
13559	/*
13560	 * Use CDB_GROUP1 commands for most devices except for
13561	 * parallel SCSI fixed drives in which case we get better
13562	 * performance using CDB_GROUP0 commands (where applicable).
13563	 */
13564	un->un_mincdb = SD_CDB_GROUP1;
13565#if !defined(__fibre)
13566	if (!un->un_f_is_fibre && !un->un_f_cfg_is_atapi && !ISROD(un) &&
13567	    !un->un_f_has_removable_media) {
13568		un->un_mincdb = SD_CDB_GROUP0;
13569	}
13570#endif
13571
13572	/*
13573	 * Try to read the max-cdb-length supported by HBA.
13574	 */
13575	un->un_max_hba_cdb = scsi_ifgetcap(SD_ADDRESS(un), "max-cdb-length", 1);
13576	if (0 >= un->un_max_hba_cdb) {
13577		un->un_max_hba_cdb = CDB_GROUP4;
13578		hba_cdb_limit = SD_CDB_GROUP4;
13579	} else if (0 < un->un_max_hba_cdb &&
13580	    un->un_max_hba_cdb < CDB_GROUP1) {
13581		hba_cdb_limit = SD_CDB_GROUP0;
13582	} else if (CDB_GROUP1 <= un->un_max_hba_cdb &&
13583	    un->un_max_hba_cdb < CDB_GROUP5) {
13584		hba_cdb_limit = SD_CDB_GROUP1;
13585	} else if (CDB_GROUP5 <= un->un_max_hba_cdb &&
13586	    un->un_max_hba_cdb < CDB_GROUP4) {
13587		hba_cdb_limit = SD_CDB_GROUP5;
13588	} else {
13589		hba_cdb_limit = SD_CDB_GROUP4;
13590	}
13591
13592	/*
13593	 * Use CDB_GROUP5 commands for removable devices.  Use CDB_GROUP4
13594	 * commands for fixed disks unless we are building for a 32 bit
13595	 * kernel.
13596	 */
13597#ifdef _LP64
13598	un->un_maxcdb = (un->un_f_has_removable_media) ? SD_CDB_GROUP5 :
13599	    min(hba_cdb_limit, SD_CDB_GROUP4);
13600#else
13601	un->un_maxcdb = (un->un_f_has_removable_media) ? SD_CDB_GROUP5 :
13602	    min(hba_cdb_limit, SD_CDB_GROUP1);
13603#endif
13604
13605	/*
13606	 * x86 systems require the PKT_DMA_PARTIAL flag
13607	 */
13608#if defined(__x86)
13609	un->un_pkt_flags = PKT_DMA_PARTIAL;
13610#else
13611	un->un_pkt_flags = 0;
13612#endif
13613
13614	un->un_status_len = (int)((un->un_f_arq_enabled == TRUE)
13615	    ? sizeof (struct scsi_arq_status) : 1);
13616	un->un_cmd_timeout = (ushort_t)sd_io_time;
13617	un->un_uscsi_timeout = ((ISCD(un)) ? 2 : 1) * un->un_cmd_timeout;
13618}
13619
13620
13621/*
13622 *    Function: sd_initpkt_for_buf
13623 *
13624 * Description: Allocate and initialize for transport a scsi_pkt struct,
13625 *		based upon the info specified in the given buf struct.
13626 *
13627 *		Assumes the xb_blkno in the request is absolute (ie,
13628 *		relative to the start of the device (NOT partition!).
13629 *		Also assumes that the request is using the native block
13630 *		size of the device (as returned by the READ CAPACITY
13631 *		command).
13632 *
13633 * Return Code: SD_PKT_ALLOC_SUCCESS
13634 *		SD_PKT_ALLOC_FAILURE
13635 *		SD_PKT_ALLOC_FAILURE_NO_DMA
13636 *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13637 *
13638 *     Context: Kernel thread and may be called from software interrupt context
13639 *		as part of a sdrunout callback. This function may not block or
13640 *		call routines that block
13641 */
13642
13643static int
13644sd_initpkt_for_buf(struct buf *bp, struct scsi_pkt **pktpp)
13645{
13646	struct sd_xbuf	*xp;
13647	struct scsi_pkt *pktp = NULL;
13648	struct sd_lun	*un;
13649	size_t		blockcount;
13650	daddr_t		startblock;
13651	int		rval;
13652	int		cmd_flags;
13653
13654	ASSERT(bp != NULL);
13655	ASSERT(pktpp != NULL);
13656	xp = SD_GET_XBUF(bp);
13657	ASSERT(xp != NULL);
13658	un = SD_GET_UN(bp);
13659	ASSERT(un != NULL);
13660	ASSERT(mutex_owned(SD_MUTEX(un)));
13661	ASSERT(bp->b_resid == 0);
13662
13663	SD_TRACE(SD_LOG_IO_CORE, un,
13664	    "sd_initpkt_for_buf: entry: buf:0x%p\n", bp);
13665
13666#if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13667	if (xp->xb_pkt_flags & SD_XB_DMA_FREED) {
13668		/*
13669		 * Already have a scsi_pkt -- just need DMA resources.
13670		 * We must recompute the CDB in case the mapping returns
13671		 * a nonzero pkt_resid.
13672		 * Note: if this is a portion of a PKT_DMA_PARTIAL transfer
13673		 * that is being retried, the unmap/remap of the DMA resouces
13674		 * will result in the entire transfer starting over again
13675		 * from the very first block.
13676		 */
13677		ASSERT(xp->xb_pktp != NULL);
13678		pktp = xp->xb_pktp;
13679	} else {
13680		pktp = NULL;
13681	}
13682#endif /* __i386 || __amd64 */
13683
13684	startblock = xp->xb_blkno;	/* Absolute block num. */
13685	blockcount = SD_BYTES2TGTBLOCKS(un, bp->b_bcount);
13686
13687#if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13688
13689	cmd_flags = un->un_pkt_flags | (xp->xb_pkt_flags & SD_XB_INITPKT_MASK);
13690
13691#else
13692
13693	cmd_flags = un->un_pkt_flags | xp->xb_pkt_flags;
13694
13695#endif
13696
13697	/*
13698	 * sd_setup_rw_pkt will determine the appropriate CDB group to use,
13699	 * call scsi_init_pkt, and build the CDB.
13700	 */
13701	rval = sd_setup_rw_pkt(un, &pktp, bp,
13702	    cmd_flags, sdrunout, (caddr_t)un,
13703	    startblock, blockcount);
13704
13705	if (rval == 0) {
13706		/*
13707		 * Success.
13708		 *
13709		 * If partial DMA is being used and required for this transfer.
13710		 * set it up here.
13711		 */
13712		if ((un->un_pkt_flags & PKT_DMA_PARTIAL) != 0 &&
13713		    (pktp->pkt_resid != 0)) {
13714
13715			/*
13716			 * Save the CDB length and pkt_resid for the
13717			 * next xfer
13718			 */
13719			xp->xb_dma_resid = pktp->pkt_resid;
13720
13721			/* rezero resid */
13722			pktp->pkt_resid = 0;
13723
13724		} else {
13725			xp->xb_dma_resid = 0;
13726		}
13727
13728		pktp->pkt_flags = un->un_tagflags;
13729		pktp->pkt_time  = un->un_cmd_timeout;
13730		pktp->pkt_comp  = sdintr;
13731
13732		pktp->pkt_private = bp;
13733		*pktpp = pktp;
13734
13735		SD_TRACE(SD_LOG_IO_CORE, un,
13736		    "sd_initpkt_for_buf: exit: buf:0x%p\n", bp);
13737
13738#if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13739		xp->xb_pkt_flags &= ~SD_XB_DMA_FREED;
13740#endif
13741
13742		return (SD_PKT_ALLOC_SUCCESS);
13743
13744	}
13745
13746	/*
13747	 * SD_PKT_ALLOC_FAILURE is the only expected failure code
13748	 * from sd_setup_rw_pkt.
13749	 */
13750	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
13751
13752	if (rval == SD_PKT_ALLOC_FAILURE) {
13753		*pktpp = NULL;
13754		/*
13755		 * Set the driver state to RWAIT to indicate the driver
13756		 * is waiting on resource allocations. The driver will not
13757		 * suspend, pm_suspend, or detatch while the state is RWAIT.
13758		 */
13759		New_state(un, SD_STATE_RWAIT);
13760
13761		SD_ERROR(SD_LOG_IO_CORE, un,
13762		    "sd_initpkt_for_buf: No pktp. exit bp:0x%p\n", bp);
13763
13764		if ((bp->b_flags & B_ERROR) != 0) {
13765			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
13766		}
13767		return (SD_PKT_ALLOC_FAILURE);
13768	} else {
13769		/*
13770		 * PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13771		 *
13772		 * This should never happen.  Maybe someone messed with the
13773		 * kernel's minphys?
13774		 */
13775		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13776		    "Request rejected: too large for CDB: "
13777		    "lba:0x%08lx  len:0x%08lx\n", startblock, blockcount);
13778		SD_ERROR(SD_LOG_IO_CORE, un,
13779		    "sd_initpkt_for_buf: No cp. exit bp:0x%p\n", bp);
13780		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13781
13782	}
13783}
13784
13785
13786/*
13787 *    Function: sd_destroypkt_for_buf
13788 *
13789 * Description: Free the scsi_pkt(9S) for the given bp (buf IO processing).
13790 *
13791 *     Context: Kernel thread or interrupt context
13792 */
13793
13794static void
13795sd_destroypkt_for_buf(struct buf *bp)
13796{
13797	ASSERT(bp != NULL);
13798	ASSERT(SD_GET_UN(bp) != NULL);
13799
13800	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
13801	    "sd_destroypkt_for_buf: entry: buf:0x%p\n", bp);
13802
13803	ASSERT(SD_GET_PKTP(bp) != NULL);
13804	scsi_destroy_pkt(SD_GET_PKTP(bp));
13805
13806	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
13807	    "sd_destroypkt_for_buf: exit: buf:0x%p\n", bp);
13808}
13809
13810/*
13811 *    Function: sd_setup_rw_pkt
13812 *
13813 * Description: Determines appropriate CDB group for the requested LBA
13814 *		and transfer length, calls scsi_init_pkt, and builds
13815 *		the CDB.  Do not use for partial DMA transfers except
13816 *		for the initial transfer since the CDB size must
13817 *		remain constant.
13818 *
13819 *     Context: Kernel thread and may be called from software interrupt
13820 *		context as part of a sdrunout callback. This function may not
13821 *		block or call routines that block
13822 */
13823
13824
13825int
13826sd_setup_rw_pkt(struct sd_lun *un,
13827    struct scsi_pkt **pktpp, struct buf *bp, int flags,
13828    int (*callback)(caddr_t), caddr_t callback_arg,
13829    diskaddr_t lba, uint32_t blockcount)
13830{
13831	struct scsi_pkt *return_pktp;
13832	union scsi_cdb *cdbp;
13833	struct sd_cdbinfo *cp = NULL;
13834	int i;
13835
13836	/*
13837	 * See which size CDB to use, based upon the request.
13838	 */
13839	for (i = un->un_mincdb; i <= un->un_maxcdb; i++) {
13840
13841		/*
13842		 * Check lba and block count against sd_cdbtab limits.
13843		 * In the partial DMA case, we have to use the same size
13844		 * CDB for all the transfers.  Check lba + blockcount
13845		 * against the max LBA so we know that segment of the
13846		 * transfer can use the CDB we select.
13847		 */
13848		if ((lba + blockcount - 1 <= sd_cdbtab[i].sc_maxlba) &&
13849		    (blockcount <= sd_cdbtab[i].sc_maxlen)) {
13850
13851			/*
13852			 * The command will fit into the CDB type
13853			 * specified by sd_cdbtab[i].
13854			 */
13855			cp = sd_cdbtab + i;
13856
13857			/*
13858			 * Call scsi_init_pkt so we can fill in the
13859			 * CDB.
13860			 */
13861			return_pktp = scsi_init_pkt(SD_ADDRESS(un), *pktpp,
13862			    bp, cp->sc_grpcode, un->un_status_len, 0,
13863			    flags, callback, callback_arg);
13864
13865			if (return_pktp != NULL) {
13866
13867				/*
13868				 * Return new value of pkt
13869				 */
13870				*pktpp = return_pktp;
13871
13872				/*
13873				 * To be safe, zero the CDB insuring there is
13874				 * no leftover data from a previous command.
13875				 */
13876				bzero(return_pktp->pkt_cdbp, cp->sc_grpcode);
13877
13878				/*
13879				 * Handle partial DMA mapping
13880				 */
13881				if (return_pktp->pkt_resid != 0) {
13882
13883					/*
13884					 * Not going to xfer as many blocks as
13885					 * originally expected
13886					 */
13887					blockcount -=
13888					    SD_BYTES2TGTBLOCKS(un,
13889						return_pktp->pkt_resid);
13890				}
13891
13892				cdbp = (union scsi_cdb *)return_pktp->pkt_cdbp;
13893
13894				/*
13895				 * Set command byte based on the CDB
13896				 * type we matched.
13897				 */
13898				cdbp->scc_cmd = cp->sc_grpmask |
13899				    ((bp->b_flags & B_READ) ?
13900					SCMD_READ : SCMD_WRITE);
13901
13902				SD_FILL_SCSI1_LUN(un, return_pktp);
13903
13904				/*
13905				 * Fill in LBA and length
13906				 */
13907				ASSERT((cp->sc_grpcode == CDB_GROUP1) ||
13908				    (cp->sc_grpcode == CDB_GROUP4) ||
13909				    (cp->sc_grpcode == CDB_GROUP0) ||
13910				    (cp->sc_grpcode == CDB_GROUP5));
13911
13912				if (cp->sc_grpcode == CDB_GROUP1) {
13913					FORMG1ADDR(cdbp, lba);
13914					FORMG1COUNT(cdbp, blockcount);
13915					return (0);
13916				} else if (cp->sc_grpcode == CDB_GROUP4) {
13917					FORMG4LONGADDR(cdbp, lba);
13918					FORMG4COUNT(cdbp, blockcount);
13919					return (0);
13920				} else if (cp->sc_grpcode == CDB_GROUP0) {
13921					FORMG0ADDR(cdbp, lba);
13922					FORMG0COUNT(cdbp, blockcount);
13923					return (0);
13924				} else if (cp->sc_grpcode == CDB_GROUP5) {
13925					FORMG5ADDR(cdbp, lba);
13926					FORMG5COUNT(cdbp, blockcount);
13927					return (0);
13928				}
13929
13930				/*
13931				 * It should be impossible to not match one
13932				 * of the CDB types above, so we should never
13933				 * reach this point.  Set the CDB command byte
13934				 * to test-unit-ready to avoid writing
13935				 * to somewhere we don't intend.
13936				 */
13937				cdbp->scc_cmd = SCMD_TEST_UNIT_READY;
13938				return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13939			} else {
13940				/*
13941				 * Couldn't get scsi_pkt
13942				 */
13943				return (SD_PKT_ALLOC_FAILURE);
13944			}
13945		}
13946	}
13947
13948	/*
13949	 * None of the available CDB types were suitable.  This really
13950	 * should never happen:  on a 64 bit system we support
13951	 * READ16/WRITE16 which will hold an entire 64 bit disk address
13952	 * and on a 32 bit system we will refuse to bind to a device
13953	 * larger than 2TB so addresses will never be larger than 32 bits.
13954	 */
13955	return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13956}
13957
13958#if defined(__i386) || defined(__amd64)
13959/*
13960 *    Function: sd_setup_next_rw_pkt
13961 *
13962 * Description: Setup packet for partial DMA transfers, except for the
13963 * 		initial transfer.  sd_setup_rw_pkt should be used for
13964 *		the initial transfer.
13965 *
13966 *     Context: Kernel thread and may be called from interrupt context.
13967 */
13968
13969int
13970sd_setup_next_rw_pkt(struct sd_lun *un,
13971    struct scsi_pkt *pktp, struct buf *bp,
13972    diskaddr_t lba, uint32_t blockcount)
13973{
13974	uchar_t com;
13975	union scsi_cdb *cdbp;
13976	uchar_t cdb_group_id;
13977
13978	ASSERT(pktp != NULL);
13979	ASSERT(pktp->pkt_cdbp != NULL);
13980
13981	cdbp = (union scsi_cdb *)pktp->pkt_cdbp;
13982	com = cdbp->scc_cmd;
13983	cdb_group_id = CDB_GROUPID(com);
13984
13985	ASSERT((cdb_group_id == CDB_GROUPID_0) ||
13986	    (cdb_group_id == CDB_GROUPID_1) ||
13987	    (cdb_group_id == CDB_GROUPID_4) ||
13988	    (cdb_group_id == CDB_GROUPID_5));
13989
13990	/*
13991	 * Move pkt to the next portion of the xfer.
13992	 * func is NULL_FUNC so we do not have to release
13993	 * the disk mutex here.
13994	 */
13995	if (scsi_init_pkt(SD_ADDRESS(un), pktp, bp, 0, 0, 0, 0,
13996	    NULL_FUNC, NULL) == pktp) {
13997		/* Success.  Handle partial DMA */
13998		if (pktp->pkt_resid != 0) {
13999			blockcount -=
14000			    SD_BYTES2TGTBLOCKS(un, pktp->pkt_resid);
14001		}
14002
14003		cdbp->scc_cmd = com;
14004		SD_FILL_SCSI1_LUN(un, pktp);
14005		if (cdb_group_id == CDB_GROUPID_1) {
14006			FORMG1ADDR(cdbp, lba);
14007			FORMG1COUNT(cdbp, blockcount);
14008			return (0);
14009		} else if (cdb_group_id == CDB_GROUPID_4) {
14010			FORMG4LONGADDR(cdbp, lba);
14011			FORMG4COUNT(cdbp, blockcount);
14012			return (0);
14013		} else if (cdb_group_id == CDB_GROUPID_0) {
14014			FORMG0ADDR(cdbp, lba);
14015			FORMG0COUNT(cdbp, blockcount);
14016			return (0);
14017		} else if (cdb_group_id == CDB_GROUPID_5) {
14018			FORMG5ADDR(cdbp, lba);
14019			FORMG5COUNT(cdbp, blockcount);
14020			return (0);
14021		}
14022
14023		/* Unreachable */
14024		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
14025	}
14026
14027	/*
14028	 * Error setting up next portion of cmd transfer.
14029	 * Something is definitely very wrong and this
14030	 * should not happen.
14031	 */
14032	return (SD_PKT_ALLOC_FAILURE);
14033}
14034#endif /* defined(__i386) || defined(__amd64) */
14035
14036/*
14037 *    Function: sd_initpkt_for_uscsi
14038 *
14039 * Description: Allocate and initialize for transport a scsi_pkt struct,
14040 *		based upon the info specified in the given uscsi_cmd struct.
14041 *
14042 * Return Code: SD_PKT_ALLOC_SUCCESS
14043 *		SD_PKT_ALLOC_FAILURE
14044 *		SD_PKT_ALLOC_FAILURE_NO_DMA
14045 *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
14046 *
14047 *     Context: Kernel thread and may be called from software interrupt context
14048 *		as part of a sdrunout callback. This function may not block or
14049 *		call routines that block
14050 */
14051
14052static int
14053sd_initpkt_for_uscsi(struct buf *bp, struct scsi_pkt **pktpp)
14054{
14055	struct uscsi_cmd *uscmd;
14056	struct sd_xbuf	*xp;
14057	struct scsi_pkt	*pktp;
14058	struct sd_lun	*un;
14059	uint32_t	flags = 0;
14060
14061	ASSERT(bp != NULL);
14062	ASSERT(pktpp != NULL);
14063	xp = SD_GET_XBUF(bp);
14064	ASSERT(xp != NULL);
14065	un = SD_GET_UN(bp);
14066	ASSERT(un != NULL);
14067	ASSERT(mutex_owned(SD_MUTEX(un)));
14068
14069	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
14070	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
14071	ASSERT(uscmd != NULL);
14072
14073	SD_TRACE(SD_LOG_IO_CORE, un,
14074	    "sd_initpkt_for_uscsi: entry: buf:0x%p\n", bp);
14075
14076	/*
14077	 * Allocate the scsi_pkt for the command.
14078	 * Note: If PKT_DMA_PARTIAL flag is set, scsi_vhci binds a path
14079	 *	 during scsi_init_pkt time and will continue to use the
14080	 *	 same path as long as the same scsi_pkt is used without
14081	 *	 intervening scsi_dma_free(). Since uscsi command does
14082	 *	 not call scsi_dmafree() before retry failed command, it
14083	 *	 is necessary to make sure PKT_DMA_PARTIAL flag is NOT
14084	 *	 set such that scsi_vhci can use other available path for
14085	 *	 retry. Besides, ucsci command does not allow DMA breakup,
14086	 *	 so there is no need to set PKT_DMA_PARTIAL flag.
14087	 */
14088	pktp = scsi_init_pkt(SD_ADDRESS(un), NULL,
14089	    ((bp->b_bcount != 0) ? bp : NULL), uscmd->uscsi_cdblen,
14090	    sizeof (struct scsi_arq_status), 0,
14091	    (un->un_pkt_flags & ~PKT_DMA_PARTIAL),
14092	    sdrunout, (caddr_t)un);
14093
14094	if (pktp == NULL) {
14095		*pktpp = NULL;
14096		/*
14097		 * Set the driver state to RWAIT to indicate the driver
14098		 * is waiting on resource allocations. The driver will not
14099		 * suspend, pm_suspend, or detatch while the state is RWAIT.
14100		 */
14101		New_state(un, SD_STATE_RWAIT);
14102
14103		SD_ERROR(SD_LOG_IO_CORE, un,
14104		    "sd_initpkt_for_uscsi: No pktp. exit bp:0x%p\n", bp);
14105
14106		if ((bp->b_flags & B_ERROR) != 0) {
14107			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
14108		}
14109		return (SD_PKT_ALLOC_FAILURE);
14110	}
14111
14112	/*
14113	 * We do not do DMA breakup for USCSI commands, so return failure
14114	 * here if all the needed DMA resources were not allocated.
14115	 */
14116	if ((un->un_pkt_flags & PKT_DMA_PARTIAL) &&
14117	    (bp->b_bcount != 0) && (pktp->pkt_resid != 0)) {
14118		scsi_destroy_pkt(pktp);
14119		SD_ERROR(SD_LOG_IO_CORE, un, "sd_initpkt_for_uscsi: "
14120		    "No partial DMA for USCSI. exit: buf:0x%p\n", bp);
14121		return (SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL);
14122	}
14123
14124	/* Init the cdb from the given uscsi struct */
14125	(void) scsi_setup_cdb((union scsi_cdb *)pktp->pkt_cdbp,
14126	    uscmd->uscsi_cdb[0], 0, 0, 0);
14127
14128	SD_FILL_SCSI1_LUN(un, pktp);
14129
14130	/*
14131	 * Set up the optional USCSI flags. See the uscsi (7I) man page
14132	 * for listing of the supported flags.
14133	 */
14134
14135	if (uscmd->uscsi_flags & USCSI_SILENT) {
14136		flags |= FLAG_SILENT;
14137	}
14138
14139	if (uscmd->uscsi_flags & USCSI_DIAGNOSE) {
14140		flags |= FLAG_DIAGNOSE;
14141	}
14142
14143	if (uscmd->uscsi_flags & USCSI_ISOLATE) {
14144		flags |= FLAG_ISOLATE;
14145	}
14146
14147	if (un->un_f_is_fibre == FALSE) {
14148		if (uscmd->uscsi_flags & USCSI_RENEGOT) {
14149			flags |= FLAG_RENEGOTIATE_WIDE_SYNC;
14150		}
14151	}
14152
14153	/*
14154	 * Set the pkt flags here so we save time later.
14155	 * Note: These flags are NOT in the uscsi man page!!!
14156	 */
14157	if (uscmd->uscsi_flags & USCSI_HEAD) {
14158		flags |= FLAG_HEAD;
14159	}
14160
14161	if (uscmd->uscsi_flags & USCSI_NOINTR) {
14162		flags |= FLAG_NOINTR;
14163	}
14164
14165	/*
14166	 * For tagged queueing, things get a bit complicated.
14167	 * Check first for head of queue and last for ordered queue.
14168	 * If neither head nor order, use the default driver tag flags.
14169	 */
14170	if ((uscmd->uscsi_flags & USCSI_NOTAG) == 0) {
14171		if (uscmd->uscsi_flags & USCSI_HTAG) {
14172			flags |= FLAG_HTAG;
14173		} else if (uscmd->uscsi_flags & USCSI_OTAG) {
14174			flags |= FLAG_OTAG;
14175		} else {
14176			flags |= un->un_tagflags & FLAG_TAGMASK;
14177		}
14178	}
14179
14180	if (uscmd->uscsi_flags & USCSI_NODISCON) {
14181		flags = (flags & ~FLAG_TAGMASK) | FLAG_NODISCON;
14182	}
14183
14184	pktp->pkt_flags = flags;
14185
14186	/* Copy the caller's CDB into the pkt... */
14187	bcopy(uscmd->uscsi_cdb, pktp->pkt_cdbp, uscmd->uscsi_cdblen);
14188
14189	if (uscmd->uscsi_timeout == 0) {
14190		pktp->pkt_time = un->un_uscsi_timeout;
14191	} else {
14192		pktp->pkt_time = uscmd->uscsi_timeout;
14193	}
14194
14195	/* need it later to identify USCSI request in sdintr */
14196	xp->xb_pkt_flags |= SD_XB_USCSICMD;
14197
14198	xp->xb_sense_resid = uscmd->uscsi_rqresid;
14199
14200	pktp->pkt_private = bp;
14201	pktp->pkt_comp = sdintr;
14202	*pktpp = pktp;
14203
14204	SD_TRACE(SD_LOG_IO_CORE, un,
14205	    "sd_initpkt_for_uscsi: exit: buf:0x%p\n", bp);
14206
14207	return (SD_PKT_ALLOC_SUCCESS);
14208}
14209
14210
14211/*
14212 *    Function: sd_destroypkt_for_uscsi
14213 *
14214 * Description: Free the scsi_pkt(9S) struct for the given bp, for uscsi
14215 *		IOs.. Also saves relevant info into the associated uscsi_cmd
14216 *		struct.
14217 *
14218 *     Context: May be called under interrupt context
14219 */
14220
14221static void
14222sd_destroypkt_for_uscsi(struct buf *bp)
14223{
14224	struct uscsi_cmd *uscmd;
14225	struct sd_xbuf	*xp;
14226	struct scsi_pkt	*pktp;
14227	struct sd_lun	*un;
14228
14229	ASSERT(bp != NULL);
14230	xp = SD_GET_XBUF(bp);
14231	ASSERT(xp != NULL);
14232	un = SD_GET_UN(bp);
14233	ASSERT(un != NULL);
14234	ASSERT(!mutex_owned(SD_MUTEX(un)));
14235	pktp = SD_GET_PKTP(bp);
14236	ASSERT(pktp != NULL);
14237
14238	SD_TRACE(SD_LOG_IO_CORE, un,
14239	    "sd_destroypkt_for_uscsi: entry: buf:0x%p\n", bp);
14240
14241	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
14242	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
14243	ASSERT(uscmd != NULL);
14244
14245	/* Save the status and the residual into the uscsi_cmd struct */
14246	uscmd->uscsi_status = ((*(pktp)->pkt_scbp) & STATUS_MASK);
14247	uscmd->uscsi_resid  = bp->b_resid;
14248
14249	/*
14250	 * If enabled, copy any saved sense data into the area specified
14251	 * by the uscsi command.
14252	 */
14253	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
14254	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
14255		/*
14256		 * Note: uscmd->uscsi_rqbuf should always point to a buffer
14257		 * at least SENSE_LENGTH bytes in size (see sd_send_scsi_cmd())
14258		 */
14259		uscmd->uscsi_rqstatus = xp->xb_sense_status;
14260		uscmd->uscsi_rqresid  = xp->xb_sense_resid;
14261		bcopy(xp->xb_sense_data, uscmd->uscsi_rqbuf, SENSE_LENGTH);
14262	}
14263
14264	/* We are done with the scsi_pkt; free it now */
14265	ASSERT(SD_GET_PKTP(bp) != NULL);
14266	scsi_destroy_pkt(SD_GET_PKTP(bp));
14267
14268	SD_TRACE(SD_LOG_IO_CORE, un,
14269	    "sd_destroypkt_for_uscsi: exit: buf:0x%p\n", bp);
14270}
14271
14272
14273/*
14274 *    Function: sd_bioclone_alloc
14275 *
14276 * Description: Allocate a buf(9S) and init it as per the given buf
14277 *		and the various arguments.  The associated sd_xbuf
14278 *		struct is (nearly) duplicated.  The struct buf *bp
14279 *		argument is saved in new_xp->xb_private.
14280 *
14281 *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
14282 *		datalen - size of data area for the shadow bp
14283 *		blkno - starting LBA
14284 *		func - function pointer for b_iodone in the shadow buf. (May
14285 *			be NULL if none.)
14286 *
14287 * Return Code: Pointer to allocates buf(9S) struct
14288 *
14289 *     Context: Can sleep.
14290 */
14291
14292static struct buf *
14293sd_bioclone_alloc(struct buf *bp, size_t datalen,
14294	daddr_t blkno, int (*func)(struct buf *))
14295{
14296	struct	sd_lun	*un;
14297	struct	sd_xbuf	*xp;
14298	struct	sd_xbuf	*new_xp;
14299	struct	buf	*new_bp;
14300
14301	ASSERT(bp != NULL);
14302	xp = SD_GET_XBUF(bp);
14303	ASSERT(xp != NULL);
14304	un = SD_GET_UN(bp);
14305	ASSERT(un != NULL);
14306	ASSERT(!mutex_owned(SD_MUTEX(un)));
14307
14308	new_bp = bioclone(bp, 0, datalen, SD_GET_DEV(un), blkno, func,
14309	    NULL, KM_SLEEP);
14310
14311	new_bp->b_lblkno	= blkno;
14312
14313	/*
14314	 * Allocate an xbuf for the shadow bp and copy the contents of the
14315	 * original xbuf into it.
14316	 */
14317	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
14318	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
14319
14320	/*
14321	 * The given bp is automatically saved in the xb_private member
14322	 * of the new xbuf.  Callers are allowed to depend on this.
14323	 */
14324	new_xp->xb_private = bp;
14325
14326	new_bp->b_private  = new_xp;
14327
14328	return (new_bp);
14329}
14330
14331/*
14332 *    Function: sd_shadow_buf_alloc
14333 *
14334 * Description: Allocate a buf(9S) and init it as per the given buf
14335 *		and the various arguments.  The associated sd_xbuf
14336 *		struct is (nearly) duplicated.  The struct buf *bp
14337 *		argument is saved in new_xp->xb_private.
14338 *
14339 *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
14340 *		datalen - size of data area for the shadow bp
14341 *		bflags - B_READ or B_WRITE (pseudo flag)
14342 *		blkno - starting LBA
14343 *		func - function pointer for b_iodone in the shadow buf. (May
14344 *			be NULL if none.)
14345 *
14346 * Return Code: Pointer to allocates buf(9S) struct
14347 *
14348 *     Context: Can sleep.
14349 */
14350
14351static struct buf *
14352sd_shadow_buf_alloc(struct buf *bp, size_t datalen, uint_t bflags,
14353	daddr_t blkno, int (*func)(struct buf *))
14354{
14355	struct	sd_lun	*un;
14356	struct	sd_xbuf	*xp;
14357	struct	sd_xbuf	*new_xp;
14358	struct	buf	*new_bp;
14359
14360	ASSERT(bp != NULL);
14361	xp = SD_GET_XBUF(bp);
14362	ASSERT(xp != NULL);
14363	un = SD_GET_UN(bp);
14364	ASSERT(un != NULL);
14365	ASSERT(!mutex_owned(SD_MUTEX(un)));
14366
14367	if (bp->b_flags & (B_PAGEIO | B_PHYS)) {
14368		bp_mapin(bp);
14369	}
14370
14371	bflags &= (B_READ | B_WRITE);
14372#if defined(__i386) || defined(__amd64)
14373	new_bp = getrbuf(KM_SLEEP);
14374	new_bp->b_un.b_addr = kmem_zalloc(datalen, KM_SLEEP);
14375	new_bp->b_bcount = datalen;
14376	new_bp->b_flags = bflags |
14377	    (bp->b_flags & ~(B_PAGEIO | B_PHYS | B_REMAPPED | B_SHADOW));
14378#else
14379	new_bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), NULL,
14380	    datalen, bflags, SLEEP_FUNC, NULL);
14381#endif
14382	new_bp->av_forw	= NULL;
14383	new_bp->av_back	= NULL;
14384	new_bp->b_dev	= bp->b_dev;
14385	new_bp->b_blkno	= blkno;
14386	new_bp->b_iodone = func;
14387	new_bp->b_edev	= bp->b_edev;
14388	new_bp->b_resid	= 0;
14389
14390	/* We need to preserve the B_FAILFAST flag */
14391	if (bp->b_flags & B_FAILFAST) {
14392		new_bp->b_flags |= B_FAILFAST;
14393	}
14394
14395	/*
14396	 * Allocate an xbuf for the shadow bp and copy the contents of the
14397	 * original xbuf into it.
14398	 */
14399	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
14400	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
14401
14402	/* Need later to copy data between the shadow buf & original buf! */
14403	new_xp->xb_pkt_flags |= PKT_CONSISTENT;
14404
14405	/*
14406	 * The given bp is automatically saved in the xb_private member
14407	 * of the new xbuf.  Callers are allowed to depend on this.
14408	 */
14409	new_xp->xb_private = bp;
14410
14411	new_bp->b_private  = new_xp;
14412
14413	return (new_bp);
14414}
14415
14416/*
14417 *    Function: sd_bioclone_free
14418 *
14419 * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations
14420 *		in the larger than partition operation.
14421 *
14422 *     Context: May be called under interrupt context
14423 */
14424
14425static void
14426sd_bioclone_free(struct buf *bp)
14427{
14428	struct sd_xbuf	*xp;
14429
14430	ASSERT(bp != NULL);
14431	xp = SD_GET_XBUF(bp);
14432	ASSERT(xp != NULL);
14433
14434	/*
14435	 * Call bp_mapout() before freeing the buf,  in case a lower
14436	 * layer or HBA  had done a bp_mapin().  we must do this here
14437	 * as we are the "originator" of the shadow buf.
14438	 */
14439	bp_mapout(bp);
14440
14441	/*
14442	 * Null out b_iodone before freeing the bp, to ensure that the driver
14443	 * never gets confused by a stale value in this field. (Just a little
14444	 * extra defensiveness here.)
14445	 */
14446	bp->b_iodone = NULL;
14447
14448	freerbuf(bp);
14449
14450	kmem_free(xp, sizeof (struct sd_xbuf));
14451}
14452
14453/*
14454 *    Function: sd_shadow_buf_free
14455 *
14456 * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations.
14457 *
14458 *     Context: May be called under interrupt context
14459 */
14460
14461static void
14462sd_shadow_buf_free(struct buf *bp)
14463{
14464	struct sd_xbuf	*xp;
14465
14466	ASSERT(bp != NULL);
14467	xp = SD_GET_XBUF(bp);
14468	ASSERT(xp != NULL);
14469
14470#if defined(__sparc)
14471	/*
14472	 * Call bp_mapout() before freeing the buf,  in case a lower
14473	 * layer or HBA  had done a bp_mapin().  we must do this here
14474	 * as we are the "originator" of the shadow buf.
14475	 */
14476	bp_mapout(bp);
14477#endif
14478
14479	/*
14480	 * Null out b_iodone before freeing the bp, to ensure that the driver
14481	 * never gets confused by a stale value in this field. (Just a little
14482	 * extra defensiveness here.)
14483	 */
14484	bp->b_iodone = NULL;
14485
14486#if defined(__i386) || defined(__amd64)
14487	kmem_free(bp->b_un.b_addr, bp->b_bcount);
14488	freerbuf(bp);
14489#else
14490	scsi_free_consistent_buf(bp);
14491#endif
14492
14493	kmem_free(xp, sizeof (struct sd_xbuf));
14494}
14495
14496
14497/*
14498 *    Function: sd_print_transport_rejected_message
14499 *
14500 * Description: This implements the ludicrously complex rules for printing
14501 *		a "transport rejected" message.  This is to address the
14502 *		specific problem of having a flood of this error message
14503 *		produced when a failover occurs.
14504 *
14505 *     Context: Any.
14506 */
14507
14508static void
14509sd_print_transport_rejected_message(struct sd_lun *un, struct sd_xbuf *xp,
14510	int code)
14511{
14512	ASSERT(un != NULL);
14513	ASSERT(mutex_owned(SD_MUTEX(un)));
14514	ASSERT(xp != NULL);
14515
14516	/*
14517	 * Print the "transport rejected" message under the following
14518	 * conditions:
14519	 *
14520	 * - Whenever the SD_LOGMASK_DIAG bit of sd_level_mask is set
14521	 * - The error code from scsi_transport() is NOT a TRAN_FATAL_ERROR.
14522	 * - If the error code IS a TRAN_FATAL_ERROR, then the message is
14523	 *   printed the FIRST time a TRAN_FATAL_ERROR is returned from
14524	 *   scsi_transport(9F) (which indicates that the target might have
14525	 *   gone off-line).  This uses the un->un_tran_fatal_count
14526	 *   count, which is incremented whenever a TRAN_FATAL_ERROR is
14527	 *   received, and reset to zero whenver a TRAN_ACCEPT is returned
14528	 *   from scsi_transport().
14529	 *
14530	 * The FLAG_SILENT in the scsi_pkt must be CLEARED in ALL of
14531	 * the preceeding cases in order for the message to be printed.
14532	 */
14533	if ((xp->xb_pktp->pkt_flags & FLAG_SILENT) == 0) {
14534		if ((sd_level_mask & SD_LOGMASK_DIAG) ||
14535		    (code != TRAN_FATAL_ERROR) ||
14536		    (un->un_tran_fatal_count == 1)) {
14537			switch (code) {
14538			case TRAN_BADPKT:
14539				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
14540				    "transport rejected bad packet\n");
14541				break;
14542			case TRAN_FATAL_ERROR:
14543				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
14544				    "transport rejected fatal error\n");
14545				break;
14546			default:
14547				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
14548				    "transport rejected (%d)\n", code);
14549				break;
14550			}
14551		}
14552	}
14553}
14554
14555
14556/*
14557 *    Function: sd_add_buf_to_waitq
14558 *
14559 * Description: Add the given buf(9S) struct to the wait queue for the
14560 *		instance.  If sorting is enabled, then the buf is added
14561 *		to the queue via an elevator sort algorithm (a la
14562 *		disksort(9F)).  The SD_GET_BLKNO(bp) is used as the sort key.
14563 *		If sorting is not enabled, then the buf is just added
14564 *		to the end of the wait queue.
14565 *
14566 * Return Code: void
14567 *
14568 *     Context: Does not sleep/block, therefore technically can be called
14569 *		from any context.  However if sorting is enabled then the
14570 *		execution time is indeterminate, and may take long if
14571 *		the wait queue grows large.
14572 */
14573
14574static void
14575sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp)
14576{
14577	struct buf *ap;
14578
14579	ASSERT(bp != NULL);
14580	ASSERT(un != NULL);
14581	ASSERT(mutex_owned(SD_MUTEX(un)));
14582
14583	/* If the queue is empty, add the buf as the only entry & return. */
14584	if (un->un_waitq_headp == NULL) {
14585		ASSERT(un->un_waitq_tailp == NULL);
14586		un->un_waitq_headp = un->un_waitq_tailp = bp;
14587		bp->av_forw = NULL;
14588		return;
14589	}
14590
14591	ASSERT(un->un_waitq_tailp != NULL);
14592
14593	/*
14594	 * If sorting is disabled, just add the buf to the tail end of
14595	 * the wait queue and return.
14596	 */
14597	if (un->un_f_disksort_disabled) {
14598		un->un_waitq_tailp->av_forw = bp;
14599		un->un_waitq_tailp = bp;
14600		bp->av_forw = NULL;
14601		return;
14602	}
14603
14604	/*
14605	 * Sort thru the list of requests currently on the wait queue
14606	 * and add the new buf request at the appropriate position.
14607	 *
14608	 * The un->un_waitq_headp is an activity chain pointer on which
14609	 * we keep two queues, sorted in ascending SD_GET_BLKNO() order. The
14610	 * first queue holds those requests which are positioned after
14611	 * the current SD_GET_BLKNO() (in the first request); the second holds
14612	 * requests which came in after their SD_GET_BLKNO() number was passed.
14613	 * Thus we implement a one way scan, retracting after reaching
14614	 * the end of the drive to the first request on the second
14615	 * queue, at which time it becomes the first queue.
14616	 * A one-way scan is natural because of the way UNIX read-ahead
14617	 * blocks are allocated.
14618	 *
14619	 * If we lie after the first request, then we must locate the
14620	 * second request list and add ourselves to it.
14621	 */
14622	ap = un->un_waitq_headp;
14623	if (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap)) {
14624		while (ap->av_forw != NULL) {
14625			/*
14626			 * Look for an "inversion" in the (normally
14627			 * ascending) block numbers. This indicates
14628			 * the start of the second request list.
14629			 */
14630			if (SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) {
14631				/*
14632				 * Search the second request list for the
14633				 * first request at a larger block number.
14634				 * We go before that; however if there is
14635				 * no such request, we go at the end.
14636				 */
14637				do {
14638					if (SD_GET_BLKNO(bp) <
14639					    SD_GET_BLKNO(ap->av_forw)) {
14640						goto insert;
14641					}
14642					ap = ap->av_forw;
14643				} while (ap->av_forw != NULL);
14644				goto insert;		/* after last */
14645			}
14646			ap = ap->av_forw;
14647		}
14648
14649		/*
14650		 * No inversions... we will go after the last, and
14651		 * be the first request in the second request list.
14652		 */
14653		goto insert;
14654	}
14655
14656	/*
14657	 * Request is at/after the current request...
14658	 * sort in the first request list.
14659	 */
14660	while (ap->av_forw != NULL) {
14661		/*
14662		 * We want to go after the current request (1) if
14663		 * there is an inversion after it (i.e. it is the end
14664		 * of the first request list), or (2) if the next
14665		 * request is a larger block no. than our request.
14666		 */
14667		if ((SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) ||
14668		    (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap->av_forw))) {
14669			goto insert;
14670		}
14671		ap = ap->av_forw;
14672	}
14673
14674	/*
14675	 * Neither a second list nor a larger request, therefore
14676	 * we go at the end of the first list (which is the same
14677	 * as the end of the whole schebang).
14678	 */
14679insert:
14680	bp->av_forw = ap->av_forw;
14681	ap->av_forw = bp;
14682
14683	/*
14684	 * If we inserted onto the tail end of the waitq, make sure the
14685	 * tail pointer is updated.
14686	 */
14687	if (ap == un->un_waitq_tailp) {
14688		un->un_waitq_tailp = bp;
14689	}
14690}
14691
14692
14693/*
14694 *    Function: sd_start_cmds
14695 *
14696 * Description: Remove and transport cmds from the driver queues.
14697 *
14698 *   Arguments: un - pointer to the unit (soft state) struct for the target.
14699 *
14700 *		immed_bp - ptr to a buf to be transported immediately. Only
14701 *		the immed_bp is transported; bufs on the waitq are not
14702 *		processed and the un_retry_bp is not checked.  If immed_bp is
14703 *		NULL, then normal queue processing is performed.
14704 *
14705 *     Context: May be called from kernel thread context, interrupt context,
14706 *		or runout callback context. This function may not block or
14707 *		call routines that block.
14708 */
14709
14710static void
14711sd_start_cmds(struct sd_lun *un, struct buf *immed_bp)
14712{
14713	struct	sd_xbuf	*xp;
14714	struct	buf	*bp;
14715	void	(*statp)(kstat_io_t *);
14716#if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14717	void	(*saved_statp)(kstat_io_t *);
14718#endif
14719	int	rval;
14720
14721	ASSERT(un != NULL);
14722	ASSERT(mutex_owned(SD_MUTEX(un)));
14723	ASSERT(un->un_ncmds_in_transport >= 0);
14724	ASSERT(un->un_throttle >= 0);
14725
14726	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: entry\n");
14727
14728	do {
14729#if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14730		saved_statp = NULL;
14731#endif
14732
14733		/*
14734		 * If we are syncing or dumping, fail the command to
14735		 * avoid recursively calling back into scsi_transport().
14736		 * The dump I/O itself uses a separate code path so this
14737		 * only prevents non-dump I/O from being sent while dumping.
14738		 * File system sync takes place before dumping begins.
14739		 * During panic, filesystem I/O is allowed provided
14740		 * un_in_callback is <= 1.  This is to prevent recursion
14741		 * such as sd_start_cmds -> scsi_transport -> sdintr ->
14742		 * sd_start_cmds and so on.  See panic.c for more information
14743		 * about the states the system can be in during panic.
14744		 */
14745		if ((un->un_state == SD_STATE_DUMPING) ||
14746		    (ddi_in_panic() && (un->un_in_callback > 1))) {
14747			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14748			    "sd_start_cmds: panicking\n");
14749			goto exit;
14750		}
14751
14752		if ((bp = immed_bp) != NULL) {
14753			/*
14754			 * We have a bp that must be transported immediately.
14755			 * It's OK to transport the immed_bp here without doing
14756			 * the throttle limit check because the immed_bp is
14757			 * always used in a retry/recovery case. This means
14758			 * that we know we are not at the throttle limit by
14759			 * virtue of the fact that to get here we must have
14760			 * already gotten a command back via sdintr(). This also
14761			 * relies on (1) the command on un_retry_bp preventing
14762			 * further commands from the waitq from being issued;
14763			 * and (2) the code in sd_retry_command checking the
14764			 * throttle limit before issuing a delayed or immediate
14765			 * retry. This holds even if the throttle limit is
14766			 * currently ratcheted down from its maximum value.
14767			 */
14768			statp = kstat_runq_enter;
14769			if (bp == un->un_retry_bp) {
14770				ASSERT((un->un_retry_statp == NULL) ||
14771				    (un->un_retry_statp == kstat_waitq_enter) ||
14772				    (un->un_retry_statp ==
14773				    kstat_runq_back_to_waitq));
14774				/*
14775				 * If the waitq kstat was incremented when
14776				 * sd_set_retry_bp() queued this bp for a retry,
14777				 * then we must set up statp so that the waitq
14778				 * count will get decremented correctly below.
14779				 * Also we must clear un->un_retry_statp to
14780				 * ensure that we do not act on a stale value
14781				 * in this field.
14782				 */
14783				if ((un->un_retry_statp == kstat_waitq_enter) ||
14784				    (un->un_retry_statp ==
14785				    kstat_runq_back_to_waitq)) {
14786					statp = kstat_waitq_to_runq;
14787				}
14788#if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14789				saved_statp = un->un_retry_statp;
14790#endif
14791				un->un_retry_statp = NULL;
14792
14793				SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
14794				    "sd_start_cmds: un:0x%p: GOT retry_bp:0x%p "
14795				    "un_throttle:%d un_ncmds_in_transport:%d\n",
14796				    un, un->un_retry_bp, un->un_throttle,
14797				    un->un_ncmds_in_transport);
14798			} else {
14799				SD_TRACE(SD_LOG_IO_CORE, un, "sd_start_cmds: "
14800				    "processing priority bp:0x%p\n", bp);
14801			}
14802
14803		} else if ((bp = un->un_waitq_headp) != NULL) {
14804			/*
14805			 * A command on the waitq is ready to go, but do not
14806			 * send it if:
14807			 *
14808			 * (1) the throttle limit has been reached, or
14809			 * (2) a retry is pending, or
14810			 * (3) a START_STOP_UNIT callback pending, or
14811			 * (4) a callback for a SD_PATH_DIRECT_PRIORITY
14812			 *	command is pending.
14813			 *
14814			 * For all of these conditions, IO processing will
14815			 * restart after the condition is cleared.
14816			 */
14817			if (un->un_ncmds_in_transport >= un->un_throttle) {
14818				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14819				    "sd_start_cmds: exiting, "
14820				    "throttle limit reached!\n");
14821				goto exit;
14822			}
14823			if (un->un_retry_bp != NULL) {
14824				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14825				    "sd_start_cmds: exiting, retry pending!\n");
14826				goto exit;
14827			}
14828			if (un->un_startstop_timeid != NULL) {
14829				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14830				    "sd_start_cmds: exiting, "
14831				    "START_STOP pending!\n");
14832				goto exit;
14833			}
14834			if (un->un_direct_priority_timeid != NULL) {
14835				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14836				    "sd_start_cmds: exiting, "
14837				    "SD_PATH_DIRECT_PRIORITY cmd. pending!\n");
14838				goto exit;
14839			}
14840
14841			/* Dequeue the command */
14842			un->un_waitq_headp = bp->av_forw;
14843			if (un->un_waitq_headp == NULL) {
14844				un->un_waitq_tailp = NULL;
14845			}
14846			bp->av_forw = NULL;
14847			statp = kstat_waitq_to_runq;
14848			SD_TRACE(SD_LOG_IO_CORE, un,
14849			    "sd_start_cmds: processing waitq bp:0x%p\n", bp);
14850
14851		} else {
14852			/* No work to do so bail out now */
14853			SD_TRACE(SD_LOG_IO_CORE, un,
14854			    "sd_start_cmds: no more work, exiting!\n");
14855			goto exit;
14856		}
14857
14858		/*
14859		 * Reset the state to normal. This is the mechanism by which
14860		 * the state transitions from either SD_STATE_RWAIT or
14861		 * SD_STATE_OFFLINE to SD_STATE_NORMAL.
14862		 * If state is SD_STATE_PM_CHANGING then this command is
14863		 * part of the device power control and the state must
14864		 * not be put back to normal. Doing so would would
14865		 * allow new commands to proceed when they shouldn't,
14866		 * the device may be going off.
14867		 */
14868		if ((un->un_state != SD_STATE_SUSPENDED) &&
14869		    (un->un_state != SD_STATE_PM_CHANGING)) {
14870			New_state(un, SD_STATE_NORMAL);
14871		    }
14872
14873		xp = SD_GET_XBUF(bp);
14874		ASSERT(xp != NULL);
14875
14876#if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14877		/*
14878		 * Allocate the scsi_pkt if we need one, or attach DMA
14879		 * resources if we have a scsi_pkt that needs them. The
14880		 * latter should only occur for commands that are being
14881		 * retried.
14882		 */
14883		if ((xp->xb_pktp == NULL) ||
14884		    ((xp->xb_pkt_flags & SD_XB_DMA_FREED) != 0)) {
14885#else
14886		if (xp->xb_pktp == NULL) {
14887#endif
14888			/*
14889			 * There is no scsi_pkt allocated for this buf. Call
14890			 * the initpkt function to allocate & init one.
14891			 *
14892			 * The scsi_init_pkt runout callback functionality is
14893			 * implemented as follows:
14894			 *
14895			 * 1) The initpkt function always calls
14896			 *    scsi_init_pkt(9F) with sdrunout specified as the
14897			 *    callback routine.
14898			 * 2) A successful packet allocation is initialized and
14899			 *    the I/O is transported.
14900			 * 3) The I/O associated with an allocation resource
14901			 *    failure is left on its queue to be retried via
14902			 *    runout or the next I/O.
14903			 * 4) The I/O associated with a DMA error is removed
14904			 *    from the queue and failed with EIO. Processing of
14905			 *    the transport queues is also halted to be
14906			 *    restarted via runout or the next I/O.
14907			 * 5) The I/O associated with a CDB size or packet
14908			 *    size error is removed from the queue and failed
14909			 *    with EIO. Processing of the transport queues is
14910			 *    continued.
14911			 *
14912			 * Note: there is no interface for canceling a runout
14913			 * callback. To prevent the driver from detaching or
14914			 * suspending while a runout is pending the driver
14915			 * state is set to SD_STATE_RWAIT
14916			 *
14917			 * Note: using the scsi_init_pkt callback facility can
14918			 * result in an I/O request persisting at the head of
14919			 * the list which cannot be satisfied even after
14920			 * multiple retries. In the future the driver may
14921			 * implement some kind of maximum runout count before
14922			 * failing an I/O.
14923			 *
14924			 * Note: the use of funcp below may seem superfluous,
14925			 * but it helps warlock figure out the correct
14926			 * initpkt function calls (see [s]sd.wlcmd).
14927			 */
14928			struct scsi_pkt	*pktp;
14929			int (*funcp)(struct buf *bp, struct scsi_pkt **pktp);
14930
14931			ASSERT(bp != un->un_rqs_bp);
14932
14933			funcp = sd_initpkt_map[xp->xb_chain_iostart];
14934			switch ((*funcp)(bp, &pktp)) {
14935			case  SD_PKT_ALLOC_SUCCESS:
14936				xp->xb_pktp = pktp;
14937				SD_TRACE(SD_LOG_IO_CORE, un,
14938				    "sd_start_cmd: SD_PKT_ALLOC_SUCCESS 0x%p\n",
14939				    pktp);
14940				goto got_pkt;
14941
14942			case SD_PKT_ALLOC_FAILURE:
14943				/*
14944				 * Temporary (hopefully) resource depletion.
14945				 * Since retries and RQS commands always have a
14946				 * scsi_pkt allocated, these cases should never
14947				 * get here. So the only cases this needs to
14948				 * handle is a bp from the waitq (which we put
14949				 * back onto the waitq for sdrunout), or a bp
14950				 * sent as an immed_bp (which we just fail).
14951				 */
14952				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14953				    "sd_start_cmds: SD_PKT_ALLOC_FAILURE\n");
14954
14955#if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14956
14957				if (bp == immed_bp) {
14958					/*
14959					 * If SD_XB_DMA_FREED is clear, then
14960					 * this is a failure to allocate a
14961					 * scsi_pkt, and we must fail the
14962					 * command.
14963					 */
14964					if ((xp->xb_pkt_flags &
14965					    SD_XB_DMA_FREED) == 0) {
14966						break;
14967					}
14968
14969					/*
14970					 * If this immediate command is NOT our
14971					 * un_retry_bp, then we must fail it.
14972					 */
14973					if (bp != un->un_retry_bp) {
14974						break;
14975					}
14976
14977					/*
14978					 * We get here if this cmd is our
14979					 * un_retry_bp that was DMAFREED, but
14980					 * scsi_init_pkt() failed to reallocate
14981					 * DMA resources when we attempted to
14982					 * retry it. This can happen when an
14983					 * mpxio failover is in progress, but
14984					 * we don't want to just fail the
14985					 * command in this case.
14986					 *
14987					 * Use timeout(9F) to restart it after
14988					 * a 100ms delay.  We don't want to
14989					 * let sdrunout() restart it, because
14990					 * sdrunout() is just supposed to start
14991					 * commands that are sitting on the
14992					 * wait queue.  The un_retry_bp stays
14993					 * set until the command completes, but
14994					 * sdrunout can be called many times
14995					 * before that happens.  Since sdrunout
14996					 * cannot tell if the un_retry_bp is
14997					 * already in the transport, it could
14998					 * end up calling scsi_transport() for
14999					 * the un_retry_bp multiple times.
15000					 *
15001					 * Also: don't schedule the callback
15002					 * if some other callback is already
15003					 * pending.
15004					 */
15005					if (un->un_retry_statp == NULL) {
15006						/*
15007						 * restore the kstat pointer to
15008						 * keep kstat counts coherent
15009						 * when we do retry the command.
15010						 */
15011						un->un_retry_statp =
15012						    saved_statp;
15013					}
15014
15015					if ((un->un_startstop_timeid == NULL) &&
15016					    (un->un_retry_timeid == NULL) &&
15017					    (un->un_direct_priority_timeid ==
15018					    NULL)) {
15019
15020						un->un_retry_timeid =
15021						    timeout(
15022						    sd_start_retry_command,
15023						    un, SD_RESTART_TIMEOUT);
15024					}
15025					goto exit;
15026				}
15027
15028#else
15029				if (bp == immed_bp) {
15030					break;	/* Just fail the command */
15031				}
15032#endif
15033
15034				/* Add the buf back to the head of the waitq */
15035				bp->av_forw = un->un_waitq_headp;
15036				un->un_waitq_headp = bp;
15037				if (un->un_waitq_tailp == NULL) {
15038					un->un_waitq_tailp = bp;
15039				}
15040				goto exit;
15041
15042			case SD_PKT_ALLOC_FAILURE_NO_DMA:
15043				/*
15044				 * HBA DMA resource failure. Fail the command
15045				 * and continue processing of the queues.
15046				 */
15047				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15048				    "sd_start_cmds: "
15049				    "SD_PKT_ALLOC_FAILURE_NO_DMA\n");
15050				break;
15051
15052			case SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL:
15053				/*
15054				 * Note:x86: Partial DMA mapping not supported
15055				 * for USCSI commands, and all the needed DMA
15056				 * resources were not allocated.
15057				 */
15058				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15059				    "sd_start_cmds: "
15060				    "SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL\n");
15061				break;
15062
15063			case SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL:
15064				/*
15065				 * Note:x86: Request cannot fit into CDB based
15066				 * on lba and len.
15067				 */
15068				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15069				    "sd_start_cmds: "
15070				    "SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL\n");
15071				break;
15072
15073			default:
15074				/* Should NEVER get here! */
15075				panic("scsi_initpkt error");
15076				/*NOTREACHED*/
15077			}
15078
15079			/*
15080			 * Fatal error in allocating a scsi_pkt for this buf.
15081			 * Update kstats & return the buf with an error code.
15082			 * We must use sd_return_failed_command_no_restart() to
15083			 * avoid a recursive call back into sd_start_cmds().
15084			 * However this also means that we must keep processing
15085			 * the waitq here in order to avoid stalling.
15086			 */
15087			if (statp == kstat_waitq_to_runq) {
15088				SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
15089			}
15090			sd_return_failed_command_no_restart(un, bp, EIO);
15091			if (bp == immed_bp) {
15092				/* immed_bp is gone by now, so clear this */
15093				immed_bp = NULL;
15094			}
15095			continue;
15096		}
15097got_pkt:
15098		if (bp == immed_bp) {
15099			/* goto the head of the class.... */
15100			xp->xb_pktp->pkt_flags |= FLAG_HEAD;
15101		}
15102
15103		un->un_ncmds_in_transport++;
15104		SD_UPDATE_KSTATS(un, statp, bp);
15105
15106		/*
15107		 * Call scsi_transport() to send the command to the target.
15108		 * According to SCSA architecture, we must drop the mutex here
15109		 * before calling scsi_transport() in order to avoid deadlock.
15110		 * Note that the scsi_pkt's completion routine can be executed
15111		 * (from interrupt context) even before the call to
15112		 * scsi_transport() returns.
15113		 */
15114		SD_TRACE(SD_LOG_IO_CORE, un,
15115		    "sd_start_cmds: calling scsi_transport()\n");
15116		DTRACE_PROBE1(scsi__transport__dispatch, struct buf *, bp);
15117
15118		mutex_exit(SD_MUTEX(un));
15119		rval = scsi_transport(xp->xb_pktp);
15120		mutex_enter(SD_MUTEX(un));
15121
15122		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15123		    "sd_start_cmds: scsi_transport() returned %d\n", rval);
15124
15125		switch (rval) {
15126		case TRAN_ACCEPT:
15127			/* Clear this with every pkt accepted by the HBA */
15128			un->un_tran_fatal_count = 0;
15129			break;	/* Success; try the next cmd (if any) */
15130
15131		case TRAN_BUSY:
15132			un->un_ncmds_in_transport--;
15133			ASSERT(un->un_ncmds_in_transport >= 0);
15134
15135			/*
15136			 * Don't retry request sense, the sense data
15137			 * is lost when another request is sent.
15138			 * Free up the rqs buf and retry
15139			 * the original failed cmd.  Update kstat.
15140			 */
15141			if (bp == un->un_rqs_bp) {
15142				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
15143				bp = sd_mark_rqs_idle(un, xp);
15144				sd_retry_command(un, bp, SD_RETRIES_STANDARD,
15145					NULL, NULL, EIO, SD_BSY_TIMEOUT / 500,
15146					kstat_waitq_enter);
15147				goto exit;
15148			}
15149
15150#if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
15151			/*
15152			 * Free the DMA resources for the  scsi_pkt. This will
15153			 * allow mpxio to select another path the next time
15154			 * we call scsi_transport() with this scsi_pkt.
15155			 * See sdintr() for the rationalization behind this.
15156			 */
15157			if ((un->un_f_is_fibre == TRUE) &&
15158			    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
15159			    ((xp->xb_pktp->pkt_flags & FLAG_SENSING) == 0)) {
15160				scsi_dmafree(xp->xb_pktp);
15161				xp->xb_pkt_flags |= SD_XB_DMA_FREED;
15162			}
15163#endif
15164
15165			if (SD_IS_DIRECT_PRIORITY(SD_GET_XBUF(bp))) {
15166				/*
15167				 * Commands that are SD_PATH_DIRECT_PRIORITY
15168				 * are for error recovery situations. These do
15169				 * not use the normal command waitq, so if they
15170				 * get a TRAN_BUSY we cannot put them back onto
15171				 * the waitq for later retry. One possible
15172				 * problem is that there could already be some
15173				 * other command on un_retry_bp that is waiting
15174				 * for this one to complete, so we would be
15175				 * deadlocked if we put this command back onto
15176				 * the waitq for later retry (since un_retry_bp
15177				 * must complete before the driver gets back to
15178				 * commands on the waitq).
15179				 *
15180				 * To avoid deadlock we must schedule a callback
15181				 * that will restart this command after a set
15182				 * interval.  This should keep retrying for as
15183				 * long as the underlying transport keeps
15184				 * returning TRAN_BUSY (just like for other
15185				 * commands).  Use the same timeout interval as
15186				 * for the ordinary TRAN_BUSY retry.
15187				 */
15188				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15189				    "sd_start_cmds: scsi_transport() returned "
15190				    "TRAN_BUSY for DIRECT_PRIORITY cmd!\n");
15191
15192				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
15193				un->un_direct_priority_timeid =
15194				    timeout(sd_start_direct_priority_command,
15195				    bp, SD_BSY_TIMEOUT / 500);
15196
15197				goto exit;
15198			}
15199
15200			/*
15201			 * For TRAN_BUSY, we want to reduce the throttle value,
15202			 * unless we are retrying a command.
15203			 */
15204			if (bp != un->un_retry_bp) {
15205				sd_reduce_throttle(un, SD_THROTTLE_TRAN_BUSY);
15206			}
15207
15208			/*
15209			 * Set up the bp to be tried again 10 ms later.
15210			 * Note:x86: Is there a timeout value in the sd_lun
15211			 * for this condition?
15212			 */
15213			sd_set_retry_bp(un, bp, SD_BSY_TIMEOUT / 500,
15214				kstat_runq_back_to_waitq);
15215			goto exit;
15216
15217		case TRAN_FATAL_ERROR:
15218			un->un_tran_fatal_count++;
15219			/* FALLTHRU */
15220
15221		case TRAN_BADPKT:
15222		default:
15223			un->un_ncmds_in_transport--;
15224			ASSERT(un->un_ncmds_in_transport >= 0);
15225
15226			/*
15227			 * If this is our REQUEST SENSE command with a
15228			 * transport error, we must get back the pointers
15229			 * to the original buf, and mark the REQUEST
15230			 * SENSE command as "available".
15231			 */
15232			if (bp == un->un_rqs_bp) {
15233				bp = sd_mark_rqs_idle(un, xp);
15234				xp = SD_GET_XBUF(bp);
15235			} else {
15236				/*
15237				 * Legacy behavior: do not update transport
15238				 * error count for request sense commands.
15239				 */
15240				SD_UPDATE_ERRSTATS(un, sd_transerrs);
15241			}
15242
15243			SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
15244			sd_print_transport_rejected_message(un, xp, rval);
15245
15246			/*
15247			 * We must use sd_return_failed_command_no_restart() to
15248			 * avoid a recursive call back into sd_start_cmds().
15249			 * However this also means that we must keep processing
15250			 * the waitq here in order to avoid stalling.
15251			 */
15252			sd_return_failed_command_no_restart(un, bp, EIO);
15253
15254			/*
15255			 * Notify any threads waiting in sd_ddi_suspend() that
15256			 * a command completion has occurred.
15257			 */
15258			if (un->un_state == SD_STATE_SUSPENDED) {
15259				cv_broadcast(&un->un_disk_busy_cv);
15260			}
15261
15262			if (bp == immed_bp) {
15263				/* immed_bp is gone by now, so clear this */
15264				immed_bp = NULL;
15265			}
15266			break;
15267		}
15268
15269	} while (immed_bp == NULL);
15270
15271exit:
15272	ASSERT(mutex_owned(SD_MUTEX(un)));
15273	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: exit\n");
15274}
15275
15276
15277/*
15278 *    Function: sd_return_command
15279 *
15280 * Description: Returns a command to its originator (with or without an
15281 *		error).  Also starts commands waiting to be transported
15282 *		to the target.
15283 *
15284 *     Context: May be called from interrupt, kernel, or timeout context
15285 */
15286
15287static void
15288sd_return_command(struct sd_lun *un, struct buf *bp)
15289{
15290	struct sd_xbuf *xp;
15291#if defined(__i386) || defined(__amd64)
15292	struct scsi_pkt *pktp;
15293#endif
15294
15295	ASSERT(bp != NULL);
15296	ASSERT(un != NULL);
15297	ASSERT(mutex_owned(SD_MUTEX(un)));
15298	ASSERT(bp != un->un_rqs_bp);
15299	xp = SD_GET_XBUF(bp);
15300	ASSERT(xp != NULL);
15301
15302#if defined(__i386) || defined(__amd64)
15303	pktp = SD_GET_PKTP(bp);
15304#endif
15305
15306	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: entry\n");
15307
15308#if defined(__i386) || defined(__amd64)
15309	/*
15310	 * Note:x86: check for the "sdrestart failed" case.
15311	 */
15312	if (((xp->xb_pkt_flags & SD_XB_USCSICMD) != SD_XB_USCSICMD) &&
15313		(geterror(bp) == 0) && (xp->xb_dma_resid != 0) &&
15314		(xp->xb_pktp->pkt_resid == 0)) {
15315
15316		if (sd_setup_next_xfer(un, bp, pktp, xp) != 0) {
15317			/*
15318			 * Successfully set up next portion of cmd
15319			 * transfer, try sending it
15320			 */
15321			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
15322			    NULL, NULL, 0, (clock_t)0, NULL);
15323			sd_start_cmds(un, NULL);
15324			return;	/* Note:x86: need a return here? */
15325		}
15326	}
15327#endif
15328
15329	/*
15330	 * If this is the failfast bp, clear it from un_failfast_bp. This
15331	 * can happen if upon being re-tried the failfast bp either
15332	 * succeeded or encountered another error (possibly even a different
15333	 * error than the one that precipitated the failfast state, but in
15334	 * that case it would have had to exhaust retries as well). Regardless,
15335	 * this should not occur whenever the instance is in the active
15336	 * failfast state.
15337	 */
15338	if (bp == un->un_failfast_bp) {
15339		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
15340		un->un_failfast_bp = NULL;
15341	}
15342
15343	/*
15344	 * Clear the failfast state upon successful completion of ANY cmd.
15345	 */
15346	if (bp->b_error == 0) {
15347		un->un_failfast_state = SD_FAILFAST_INACTIVE;
15348	}
15349
15350	/*
15351	 * This is used if the command was retried one or more times. Show that
15352	 * we are done with it, and allow processing of the waitq to resume.
15353	 */
15354	if (bp == un->un_retry_bp) {
15355		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15356		    "sd_return_command: un:0x%p: "
15357		    "RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
15358		un->un_retry_bp = NULL;
15359		un->un_retry_statp = NULL;
15360	}
15361
15362	SD_UPDATE_RDWR_STATS(un, bp);
15363	SD_UPDATE_PARTITION_STATS(un, bp);
15364
15365	switch (un->un_state) {
15366	case SD_STATE_SUSPENDED:
15367		/*
15368		 * Notify any threads waiting in sd_ddi_suspend() that
15369		 * a command completion has occurred.
15370		 */
15371		cv_broadcast(&un->un_disk_busy_cv);
15372		break;
15373	default:
15374		sd_start_cmds(un, NULL);
15375		break;
15376	}
15377
15378	/* Return this command up the iodone chain to its originator. */
15379	mutex_exit(SD_MUTEX(un));
15380
15381	(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
15382	xp->xb_pktp = NULL;
15383
15384	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
15385
15386	ASSERT(!mutex_owned(SD_MUTEX(un)));
15387	mutex_enter(SD_MUTEX(un));
15388
15389	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: exit\n");
15390}
15391
15392
15393/*
15394 *    Function: sd_return_failed_command
15395 *
15396 * Description: Command completion when an error occurred.
15397 *
15398 *     Context: May be called from interrupt context
15399 */
15400
15401static void
15402sd_return_failed_command(struct sd_lun *un, struct buf *bp, int errcode)
15403{
15404	ASSERT(bp != NULL);
15405	ASSERT(un != NULL);
15406	ASSERT(mutex_owned(SD_MUTEX(un)));
15407
15408	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15409	    "sd_return_failed_command: entry\n");
15410
15411	/*
15412	 * b_resid could already be nonzero due to a partial data
15413	 * transfer, so do not change it here.
15414	 */
15415	SD_BIOERROR(bp, errcode);
15416
15417	sd_return_command(un, bp);
15418	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15419	    "sd_return_failed_command: exit\n");
15420}
15421
15422
15423/*
15424 *    Function: sd_return_failed_command_no_restart
15425 *
15426 * Description: Same as sd_return_failed_command, but ensures that no
15427 *		call back into sd_start_cmds will be issued.
15428 *
15429 *     Context: May be called from interrupt context
15430 */
15431
15432static void
15433sd_return_failed_command_no_restart(struct sd_lun *un, struct buf *bp,
15434	int errcode)
15435{
15436	struct sd_xbuf *xp;
15437
15438	ASSERT(bp != NULL);
15439	ASSERT(un != NULL);
15440	ASSERT(mutex_owned(SD_MUTEX(un)));
15441	xp = SD_GET_XBUF(bp);
15442	ASSERT(xp != NULL);
15443	ASSERT(errcode != 0);
15444
15445	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15446	    "sd_return_failed_command_no_restart: entry\n");
15447
15448	/*
15449	 * b_resid could already be nonzero due to a partial data
15450	 * transfer, so do not change it here.
15451	 */
15452	SD_BIOERROR(bp, errcode);
15453
15454	/*
15455	 * If this is the failfast bp, clear it. This can happen if the
15456	 * failfast bp encounterd a fatal error when we attempted to
15457	 * re-try it (such as a scsi_transport(9F) failure).  However
15458	 * we should NOT be in an active failfast state if the failfast
15459	 * bp is not NULL.
15460	 */
15461	if (bp == un->un_failfast_bp) {
15462		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
15463		un->un_failfast_bp = NULL;
15464	}
15465
15466	if (bp == un->un_retry_bp) {
15467		/*
15468		 * This command was retried one or more times. Show that we are
15469		 * done with it, and allow processing of the waitq to resume.
15470		 */
15471		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15472		    "sd_return_failed_command_no_restart: "
15473		    " un:0x%p: RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
15474		un->un_retry_bp = NULL;
15475		un->un_retry_statp = NULL;
15476	}
15477
15478	SD_UPDATE_RDWR_STATS(un, bp);
15479	SD_UPDATE_PARTITION_STATS(un, bp);
15480
15481	mutex_exit(SD_MUTEX(un));
15482
15483	if (xp->xb_pktp != NULL) {
15484		(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
15485		xp->xb_pktp = NULL;
15486	}
15487
15488	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
15489
15490	mutex_enter(SD_MUTEX(un));
15491
15492	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15493	    "sd_return_failed_command_no_restart: exit\n");
15494}
15495
15496
15497/*
15498 *    Function: sd_retry_command
15499 *
15500 * Description: queue up a command for retry, or (optionally) fail it
15501 *		if retry counts are exhausted.
15502 *
15503 *   Arguments: un - Pointer to the sd_lun struct for the target.
15504 *
15505 *		bp - Pointer to the buf for the command to be retried.
15506 *
15507 *		retry_check_flag - Flag to see which (if any) of the retry
15508 *		   counts should be decremented/checked. If the indicated
15509 *		   retry count is exhausted, then the command will not be
15510 *		   retried; it will be failed instead. This should use a
15511 *		   value equal to one of the following:
15512 *
15513 *			SD_RETRIES_NOCHECK
15514 *			SD_RESD_RETRIES_STANDARD
15515 *			SD_RETRIES_VICTIM
15516 *
15517 *		   Optionally may be bitwise-OR'ed with SD_RETRIES_ISOLATE
15518 *		   if the check should be made to see of FLAG_ISOLATE is set
15519 *		   in the pkt. If FLAG_ISOLATE is set, then the command is
15520 *		   not retried, it is simply failed.
15521 *
15522 *		user_funcp - Ptr to function to call before dispatching the
15523 *		   command. May be NULL if no action needs to be performed.
15524 *		   (Primarily intended for printing messages.)
15525 *
15526 *		user_arg - Optional argument to be passed along to
15527 *		   the user_funcp call.
15528 *
15529 *		failure_code - errno return code to set in the bp if the
15530 *		   command is going to be failed.
15531 *
15532 *		retry_delay - Retry delay interval in (clock_t) units. May
15533 *		   be zero which indicates that the retry should be retried
15534 *		   immediately (ie, without an intervening delay).
15535 *
15536 *		statp - Ptr to kstat function to be updated if the command
15537 *		   is queued for a delayed retry. May be NULL if no kstat
15538 *		   update is desired.
15539 *
15540 *     Context: May be called from interupt context.
15541 */
15542
15543static void
15544sd_retry_command(struct sd_lun *un, struct buf *bp, int retry_check_flag,
15545	void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp, int
15546	code), void *user_arg, int failure_code,  clock_t retry_delay,
15547	void (*statp)(kstat_io_t *))
15548{
15549	struct sd_xbuf	*xp;
15550	struct scsi_pkt	*pktp;
15551
15552	ASSERT(un != NULL);
15553	ASSERT(mutex_owned(SD_MUTEX(un)));
15554	ASSERT(bp != NULL);
15555	xp = SD_GET_XBUF(bp);
15556	ASSERT(xp != NULL);
15557	pktp = SD_GET_PKTP(bp);
15558	ASSERT(pktp != NULL);
15559
15560	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
15561	    "sd_retry_command: entry: bp:0x%p xp:0x%p\n", bp, xp);
15562
15563	/*
15564	 * If we are syncing or dumping, fail the command to avoid
15565	 * recursively calling back into scsi_transport().
15566	 */
15567	if (ddi_in_panic()) {
15568		goto fail_command_no_log;
15569	}
15570
15571	/*
15572	 * We should never be be retrying a command with FLAG_DIAGNOSE set, so
15573	 * log an error and fail the command.
15574	 */
15575	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
15576		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
15577		    "ERROR, retrying FLAG_DIAGNOSE command.\n");
15578		sd_dump_memory(un, SD_LOG_IO, "CDB",
15579		    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
15580		sd_dump_memory(un, SD_LOG_IO, "Sense Data",
15581		    (uchar_t *)xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
15582		goto fail_command;
15583	}
15584
15585	/*
15586	 * If we are suspended, then put the command onto head of the
15587	 * wait queue since we don't want to start more commands.
15588	 */
15589	switch (un->un_state) {
15590	case SD_STATE_SUSPENDED:
15591	case SD_STATE_DUMPING:
15592		bp->av_forw = un->un_waitq_headp;
15593		un->un_waitq_headp = bp;
15594		if (un->un_waitq_tailp == NULL) {
15595			un->un_waitq_tailp = bp;
15596		}
15597		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
15598		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: "
15599		    "exiting; cmd bp:0x%p requeued for SUSPEND/DUMP\n", bp);
15600		return;
15601	default:
15602		break;
15603	}
15604
15605	/*
15606	 * If the caller wants us to check FLAG_ISOLATE, then see if that
15607	 * is set; if it is then we do not want to retry the command.
15608	 * Normally, FLAG_ISOLATE is only used with USCSI cmds.
15609	 */
15610	if ((retry_check_flag & SD_RETRIES_ISOLATE) != 0) {
15611		if ((pktp->pkt_flags & FLAG_ISOLATE) != 0) {
15612			goto fail_command;
15613		}
15614	}
15615
15616
15617	/*
15618	 * If SD_RETRIES_FAILFAST is set, it indicates that either a
15619	 * command timeout or a selection timeout has occurred. This means
15620	 * that we were unable to establish an kind of communication with
15621	 * the target, and subsequent retries and/or commands are likely
15622	 * to encounter similar results and take a long time to complete.
15623	 *
15624	 * If this is a failfast error condition, we need to update the
15625	 * failfast state, even if this bp does not have B_FAILFAST set.
15626	 */
15627	if (retry_check_flag & SD_RETRIES_FAILFAST) {
15628		if (un->un_failfast_state == SD_FAILFAST_ACTIVE) {
15629			ASSERT(un->un_failfast_bp == NULL);
15630			/*
15631			 * If we are already in the active failfast state, and
15632			 * another failfast error condition has been detected,
15633			 * then fail this command if it has B_FAILFAST set.
15634			 * If B_FAILFAST is clear, then maintain the legacy
15635			 * behavior of retrying heroically, even tho this will
15636			 * take a lot more time to fail the command.
15637			 */
15638			if (bp->b_flags & B_FAILFAST) {
15639				goto fail_command;
15640			}
15641		} else {
15642			/*
15643			 * We're not in the active failfast state, but we
15644			 * have a failfast error condition, so we must begin
15645			 * transition to the next state. We do this regardless
15646			 * of whether or not this bp has B_FAILFAST set.
15647			 */
15648			if (un->un_failfast_bp == NULL) {
15649				/*
15650				 * This is the first bp to meet a failfast
15651				 * condition so save it on un_failfast_bp &
15652				 * do normal retry processing. Do not enter
15653				 * active failfast state yet. This marks
15654				 * entry into the "failfast pending" state.
15655				 */
15656				un->un_failfast_bp = bp;
15657
15658			} else if (un->un_failfast_bp == bp) {
15659				/*
15660				 * This is the second time *this* bp has
15661				 * encountered a failfast error condition,
15662				 * so enter active failfast state & flush
15663				 * queues as appropriate.
15664				 */
15665				un->un_failfast_state = SD_FAILFAST_ACTIVE;
15666				un->un_failfast_bp = NULL;
15667				sd_failfast_flushq(un);
15668
15669				/*
15670				 * Fail this bp now if B_FAILFAST set;
15671				 * otherwise continue with retries. (It would
15672				 * be pretty ironic if this bp succeeded on a
15673				 * subsequent retry after we just flushed all
15674				 * the queues).
15675				 */
15676				if (bp->b_flags & B_FAILFAST) {
15677					goto fail_command;
15678				}
15679
15680#if !defined(lint) && !defined(__lint)
15681			} else {
15682				/*
15683				 * If neither of the preceeding conditionals
15684				 * was true, it means that there is some
15685				 * *other* bp that has met an inital failfast
15686				 * condition and is currently either being
15687				 * retried or is waiting to be retried. In
15688				 * that case we should perform normal retry
15689				 * processing on *this* bp, since there is a
15690				 * chance that the current failfast condition
15691				 * is transient and recoverable. If that does
15692				 * not turn out to be the case, then retries
15693				 * will be cleared when the wait queue is
15694				 * flushed anyway.
15695				 */
15696#endif
15697			}
15698		}
15699	} else {
15700		/*
15701		 * SD_RETRIES_FAILFAST is clear, which indicates that we
15702		 * likely were able to at least establish some level of
15703		 * communication with the target and subsequent commands
15704		 * and/or retries are likely to get through to the target,
15705		 * In this case we want to be aggressive about clearing
15706		 * the failfast state. Note that this does not affect
15707		 * the "failfast pending" condition.
15708		 */
15709		un->un_failfast_state = SD_FAILFAST_INACTIVE;
15710	}
15711
15712
15713	/*
15714	 * Check the specified retry count to see if we can still do
15715	 * any retries with this pkt before we should fail it.
15716	 */
15717	switch (retry_check_flag & SD_RETRIES_MASK) {
15718	case SD_RETRIES_VICTIM:
15719		/*
15720		 * Check the victim retry count. If exhausted, then fall
15721		 * thru & check against the standard retry count.
15722		 */
15723		if (xp->xb_victim_retry_count < un->un_victim_retry_count) {
15724			/* Increment count & proceed with the retry */
15725			xp->xb_victim_retry_count++;
15726			break;
15727		}
15728		/* Victim retries exhausted, fall back to std. retries... */
15729		/* FALLTHRU */
15730
15731	case SD_RETRIES_STANDARD:
15732		if (xp->xb_retry_count >= un->un_retry_count) {
15733			/* Retries exhausted, fail the command */
15734			SD_TRACE(SD_LOG_IO_CORE, un,
15735			    "sd_retry_command: retries exhausted!\n");
15736			/*
15737			 * update b_resid for failed SCMD_READ & SCMD_WRITE
15738			 * commands with nonzero pkt_resid.
15739			 */
15740			if ((pktp->pkt_reason == CMD_CMPLT) &&
15741			    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD) &&
15742			    (pktp->pkt_resid != 0)) {
15743				uchar_t op = SD_GET_PKT_OPCODE(pktp) & 0x1F;
15744				if ((op == SCMD_READ) || (op == SCMD_WRITE)) {
15745					SD_UPDATE_B_RESID(bp, pktp);
15746				}
15747			}
15748			goto fail_command;
15749		}
15750		xp->xb_retry_count++;
15751		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15752		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
15753		break;
15754
15755	case SD_RETRIES_UA:
15756		if (xp->xb_ua_retry_count >= sd_ua_retry_count) {
15757			/* Retries exhausted, fail the command */
15758			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15759			    "Unit Attention retries exhausted. "
15760			    "Check the target.\n");
15761			goto fail_command;
15762		}
15763		xp->xb_ua_retry_count++;
15764		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15765		    "sd_retry_command: retry count:%d\n",
15766			xp->xb_ua_retry_count);
15767		break;
15768
15769	case SD_RETRIES_BUSY:
15770		if (xp->xb_retry_count >= un->un_busy_retry_count) {
15771			/* Retries exhausted, fail the command */
15772			SD_TRACE(SD_LOG_IO_CORE, un,
15773			    "sd_retry_command: retries exhausted!\n");
15774			goto fail_command;
15775		}
15776		xp->xb_retry_count++;
15777		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15778		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
15779		break;
15780
15781	case SD_RETRIES_NOCHECK:
15782	default:
15783		/* No retry count to check. Just proceed with the retry */
15784		break;
15785	}
15786
15787	xp->xb_pktp->pkt_flags |= FLAG_HEAD;
15788
15789	/*
15790	 * If we were given a zero timeout, we must attempt to retry the
15791	 * command immediately (ie, without a delay).
15792	 */
15793	if (retry_delay == 0) {
15794		/*
15795		 * Check some limiting conditions to see if we can actually
15796		 * do the immediate retry.  If we cannot, then we must
15797		 * fall back to queueing up a delayed retry.
15798		 */
15799		if (un->un_ncmds_in_transport >= un->un_throttle) {
15800			/*
15801			 * We are at the throttle limit for the target,
15802			 * fall back to delayed retry.
15803			 */
15804			retry_delay = SD_BSY_TIMEOUT;
15805			statp = kstat_waitq_enter;
15806			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15807			    "sd_retry_command: immed. retry hit "
15808			    "throttle!\n");
15809		} else {
15810			/*
15811			 * We're clear to proceed with the immediate retry.
15812			 * First call the user-provided function (if any)
15813			 */
15814			if (user_funcp != NULL) {
15815				(*user_funcp)(un, bp, user_arg,
15816				    SD_IMMEDIATE_RETRY_ISSUED);
15817#ifdef __lock_lint
15818				sd_print_incomplete_msg(un, bp, user_arg,
15819				    SD_IMMEDIATE_RETRY_ISSUED);
15820				sd_print_cmd_incomplete_msg(un, bp, user_arg,
15821				    SD_IMMEDIATE_RETRY_ISSUED);
15822				sd_print_sense_failed_msg(un, bp, user_arg,
15823				    SD_IMMEDIATE_RETRY_ISSUED);
15824#endif
15825			}
15826
15827			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15828			    "sd_retry_command: issuing immediate retry\n");
15829
15830			/*
15831			 * Call sd_start_cmds() to transport the command to
15832			 * the target.
15833			 */
15834			sd_start_cmds(un, bp);
15835
15836			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15837			    "sd_retry_command exit\n");
15838			return;
15839		}
15840	}
15841
15842	/*
15843	 * Set up to retry the command after a delay.
15844	 * First call the user-provided function (if any)
15845	 */
15846	if (user_funcp != NULL) {
15847		(*user_funcp)(un, bp, user_arg, SD_DELAYED_RETRY_ISSUED);
15848	}
15849
15850	sd_set_retry_bp(un, bp, retry_delay, statp);
15851
15852	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
15853	return;
15854
15855fail_command:
15856
15857	if (user_funcp != NULL) {
15858		(*user_funcp)(un, bp, user_arg, SD_NO_RETRY_ISSUED);
15859	}
15860
15861fail_command_no_log:
15862
15863	SD_INFO(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15864	    "sd_retry_command: returning failed command\n");
15865
15866	sd_return_failed_command(un, bp, failure_code);
15867
15868	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
15869}
15870
15871
15872/*
15873 *    Function: sd_set_retry_bp
15874 *
15875 * Description: Set up the given bp for retry.
15876 *
15877 *   Arguments: un - ptr to associated softstate
15878 *		bp - ptr to buf(9S) for the command
15879 *		retry_delay - time interval before issuing retry (may be 0)
15880 *		statp - optional pointer to kstat function
15881 *
15882 *     Context: May be called under interrupt context
15883 */
15884
15885static void
15886sd_set_retry_bp(struct sd_lun *un, struct buf *bp, clock_t retry_delay,
15887	void (*statp)(kstat_io_t *))
15888{
15889	ASSERT(un != NULL);
15890	ASSERT(mutex_owned(SD_MUTEX(un)));
15891	ASSERT(bp != NULL);
15892
15893	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
15894	    "sd_set_retry_bp: entry: un:0x%p bp:0x%p\n", un, bp);
15895
15896	/*
15897	 * Indicate that the command is being retried. This will not allow any
15898	 * other commands on the wait queue to be transported to the target
15899	 * until this command has been completed (success or failure). The
15900	 * "retry command" is not transported to the target until the given
15901	 * time delay expires, unless the user specified a 0 retry_delay.
15902	 *
15903	 * Note: the timeout(9F) callback routine is what actually calls
15904	 * sd_start_cmds() to transport the command, with the exception of a
15905	 * zero retry_delay. The only current implementor of a zero retry delay
15906	 * is the case where a START_STOP_UNIT is sent to spin-up a device.
15907	 */
15908	if (un->un_retry_bp == NULL) {
15909		ASSERT(un->un_retry_statp == NULL);
15910		un->un_retry_bp = bp;
15911
15912		/*
15913		 * If the user has not specified a delay the command should
15914		 * be queued and no timeout should be scheduled.
15915		 */
15916		if (retry_delay == 0) {
15917			/*
15918			 * Save the kstat pointer that will be used in the
15919			 * call to SD_UPDATE_KSTATS() below, so that
15920			 * sd_start_cmds() can correctly decrement the waitq
15921			 * count when it is time to transport this command.
15922			 */
15923			un->un_retry_statp = statp;
15924			goto done;
15925		}
15926	}
15927
15928	if (un->un_retry_bp == bp) {
15929		/*
15930		 * Save the kstat pointer that will be used in the call to
15931		 * SD_UPDATE_KSTATS() below, so that sd_start_cmds() can
15932		 * correctly decrement the waitq count when it is time to
15933		 * transport this command.
15934		 */
15935		un->un_retry_statp = statp;
15936
15937		/*
15938		 * Schedule a timeout if:
15939		 *   1) The user has specified a delay.
15940		 *   2) There is not a START_STOP_UNIT callback pending.
15941		 *
15942		 * If no delay has been specified, then it is up to the caller
15943		 * to ensure that IO processing continues without stalling.
15944		 * Effectively, this means that the caller will issue the
15945		 * required call to sd_start_cmds(). The START_STOP_UNIT
15946		 * callback does this after the START STOP UNIT command has
15947		 * completed. In either of these cases we should not schedule
15948		 * a timeout callback here.  Also don't schedule the timeout if
15949		 * an SD_PATH_DIRECT_PRIORITY command is waiting to restart.
15950		 */
15951		if ((retry_delay != 0) && (un->un_startstop_timeid == NULL) &&
15952		    (un->un_direct_priority_timeid == NULL)) {
15953			un->un_retry_timeid =
15954			    timeout(sd_start_retry_command, un, retry_delay);
15955			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15956			    "sd_set_retry_bp: setting timeout: un: 0x%p"
15957			    " bp:0x%p un_retry_timeid:0x%p\n",
15958			    un, bp, un->un_retry_timeid);
15959		}
15960	} else {
15961		/*
15962		 * We only get in here if there is already another command
15963		 * waiting to be retried.  In this case, we just put the
15964		 * given command onto the wait queue, so it can be transported
15965		 * after the current retry command has completed.
15966		 *
15967		 * Also we have to make sure that if the command at the head
15968		 * of the wait queue is the un_failfast_bp, that we do not
15969		 * put ahead of it any other commands that are to be retried.
15970		 */
15971		if ((un->un_failfast_bp != NULL) &&
15972		    (un->un_failfast_bp == un->un_waitq_headp)) {
15973			/*
15974			 * Enqueue this command AFTER the first command on
15975			 * the wait queue (which is also un_failfast_bp).
15976			 */
15977			bp->av_forw = un->un_waitq_headp->av_forw;
15978			un->un_waitq_headp->av_forw = bp;
15979			if (un->un_waitq_headp == un->un_waitq_tailp) {
15980				un->un_waitq_tailp = bp;
15981			}
15982		} else {
15983			/* Enqueue this command at the head of the waitq. */
15984			bp->av_forw = un->un_waitq_headp;
15985			un->un_waitq_headp = bp;
15986			if (un->un_waitq_tailp == NULL) {
15987				un->un_waitq_tailp = bp;
15988			}
15989		}
15990
15991		if (statp == NULL) {
15992			statp = kstat_waitq_enter;
15993		}
15994		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15995		    "sd_set_retry_bp: un:0x%p already delayed retry\n", un);
15996	}
15997
15998done:
15999	if (statp != NULL) {
16000		SD_UPDATE_KSTATS(un, statp, bp);
16001	}
16002
16003	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16004	    "sd_set_retry_bp: exit un:0x%p\n", un);
16005}
16006
16007
16008/*
16009 *    Function: sd_start_retry_command
16010 *
16011 * Description: Start the command that has been waiting on the target's
16012 *		retry queue.  Called from timeout(9F) context after the
16013 *		retry delay interval has expired.
16014 *
16015 *   Arguments: arg - pointer to associated softstate for the device.
16016 *
16017 *     Context: timeout(9F) thread context.  May not sleep.
16018 */
16019
16020static void
16021sd_start_retry_command(void *arg)
16022{
16023	struct sd_lun *un = arg;
16024
16025	ASSERT(un != NULL);
16026	ASSERT(!mutex_owned(SD_MUTEX(un)));
16027
16028	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16029	    "sd_start_retry_command: entry\n");
16030
16031	mutex_enter(SD_MUTEX(un));
16032
16033	un->un_retry_timeid = NULL;
16034
16035	if (un->un_retry_bp != NULL) {
16036		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16037		    "sd_start_retry_command: un:0x%p STARTING bp:0x%p\n",
16038		    un, un->un_retry_bp);
16039		sd_start_cmds(un, un->un_retry_bp);
16040	}
16041
16042	mutex_exit(SD_MUTEX(un));
16043
16044	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16045	    "sd_start_retry_command: exit\n");
16046}
16047
16048
16049/*
16050 *    Function: sd_start_direct_priority_command
16051 *
16052 * Description: Used to re-start an SD_PATH_DIRECT_PRIORITY command that had
16053 *		received TRAN_BUSY when we called scsi_transport() to send it
16054 *		to the underlying HBA. This function is called from timeout(9F)
16055 *		context after the delay interval has expired.
16056 *
16057 *   Arguments: arg - pointer to associated buf(9S) to be restarted.
16058 *
16059 *     Context: timeout(9F) thread context.  May not sleep.
16060 */
16061
16062static void
16063sd_start_direct_priority_command(void *arg)
16064{
16065	struct buf	*priority_bp = arg;
16066	struct sd_lun	*un;
16067
16068	ASSERT(priority_bp != NULL);
16069	un = SD_GET_UN(priority_bp);
16070	ASSERT(un != NULL);
16071	ASSERT(!mutex_owned(SD_MUTEX(un)));
16072
16073	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16074	    "sd_start_direct_priority_command: entry\n");
16075
16076	mutex_enter(SD_MUTEX(un));
16077	un->un_direct_priority_timeid = NULL;
16078	sd_start_cmds(un, priority_bp);
16079	mutex_exit(SD_MUTEX(un));
16080
16081	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16082	    "sd_start_direct_priority_command: exit\n");
16083}
16084
16085
16086/*
16087 *    Function: sd_send_request_sense_command
16088 *
16089 * Description: Sends a REQUEST SENSE command to the target
16090 *
16091 *     Context: May be called from interrupt context.
16092 */
16093
16094static void
16095sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
16096	struct scsi_pkt *pktp)
16097{
16098	ASSERT(bp != NULL);
16099	ASSERT(un != NULL);
16100	ASSERT(mutex_owned(SD_MUTEX(un)));
16101
16102	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_send_request_sense_command: "
16103	    "entry: buf:0x%p\n", bp);
16104
16105	/*
16106	 * If we are syncing or dumping, then fail the command to avoid a
16107	 * recursive callback into scsi_transport(). Also fail the command
16108	 * if we are suspended (legacy behavior).
16109	 */
16110	if (ddi_in_panic() || (un->un_state == SD_STATE_SUSPENDED) ||
16111	    (un->un_state == SD_STATE_DUMPING)) {
16112		sd_return_failed_command(un, bp, EIO);
16113		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16114		    "sd_send_request_sense_command: syncing/dumping, exit\n");
16115		return;
16116	}
16117
16118	/*
16119	 * Retry the failed command and don't issue the request sense if:
16120	 *    1) the sense buf is busy
16121	 *    2) we have 1 or more outstanding commands on the target
16122	 *    (the sense data will be cleared or invalidated any way)
16123	 *
16124	 * Note: There could be an issue with not checking a retry limit here,
16125	 * the problem is determining which retry limit to check.
16126	 */
16127	if ((un->un_sense_isbusy != 0) || (un->un_ncmds_in_transport > 0)) {
16128		/* Don't retry if the command is flagged as non-retryable */
16129		if ((pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
16130			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
16131			    NULL, NULL, 0, SD_BSY_TIMEOUT, kstat_waitq_enter);
16132			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16133			    "sd_send_request_sense_command: "
16134			    "at full throttle, retrying exit\n");
16135		} else {
16136			sd_return_failed_command(un, bp, EIO);
16137			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16138			    "sd_send_request_sense_command: "
16139			    "at full throttle, non-retryable exit\n");
16140		}
16141		return;
16142	}
16143
16144	sd_mark_rqs_busy(un, bp);
16145	sd_start_cmds(un, un->un_rqs_bp);
16146
16147	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16148	    "sd_send_request_sense_command: exit\n");
16149}
16150
16151
16152/*
16153 *    Function: sd_mark_rqs_busy
16154 *
16155 * Description: Indicate that the request sense bp for this instance is
16156 *		in use.
16157 *
16158 *     Context: May be called under interrupt context
16159 */
16160
16161static void
16162sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp)
16163{
16164	struct sd_xbuf	*sense_xp;
16165
16166	ASSERT(un != NULL);
16167	ASSERT(bp != NULL);
16168	ASSERT(mutex_owned(SD_MUTEX(un)));
16169	ASSERT(un->un_sense_isbusy == 0);
16170
16171	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: entry: "
16172	    "buf:0x%p xp:0x%p un:0x%p\n", bp, SD_GET_XBUF(bp), un);
16173
16174	sense_xp = SD_GET_XBUF(un->un_rqs_bp);
16175	ASSERT(sense_xp != NULL);
16176
16177	SD_INFO(SD_LOG_IO, un,
16178	    "sd_mark_rqs_busy: entry: sense_xp:0x%p\n", sense_xp);
16179
16180	ASSERT(sense_xp->xb_pktp != NULL);
16181	ASSERT((sense_xp->xb_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD))
16182	    == (FLAG_SENSING | FLAG_HEAD));
16183
16184	un->un_sense_isbusy = 1;
16185	un->un_rqs_bp->b_resid = 0;
16186	sense_xp->xb_pktp->pkt_resid  = 0;
16187	sense_xp->xb_pktp->pkt_reason = 0;
16188
16189	/* So we can get back the bp at interrupt time! */
16190	sense_xp->xb_sense_bp = bp;
16191
16192	bzero(un->un_rqs_bp->b_un.b_addr, SENSE_LENGTH);
16193
16194	/*
16195	 * Mark this buf as awaiting sense data. (This is already set in
16196	 * the pkt_flags for the RQS packet.)
16197	 */
16198	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags |= FLAG_SENSING;
16199
16200	sense_xp->xb_retry_count	= 0;
16201	sense_xp->xb_victim_retry_count = 0;
16202	sense_xp->xb_ua_retry_count	= 0;
16203	sense_xp->xb_dma_resid  = 0;
16204
16205	/* Clean up the fields for auto-request sense */
16206	sense_xp->xb_sense_status = 0;
16207	sense_xp->xb_sense_state  = 0;
16208	sense_xp->xb_sense_resid  = 0;
16209	bzero(sense_xp->xb_sense_data, sizeof (sense_xp->xb_sense_data));
16210
16211	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: exit\n");
16212}
16213
16214
16215/*
16216 *    Function: sd_mark_rqs_idle
16217 *
16218 * Description: SD_MUTEX must be held continuously through this routine
16219 *		to prevent reuse of the rqs struct before the caller can
16220 *		complete it's processing.
16221 *
16222 * Return Code: Pointer to the RQS buf
16223 *
16224 *     Context: May be called under interrupt context
16225 */
16226
16227static struct buf *
16228sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *sense_xp)
16229{
16230	struct buf *bp;
16231	ASSERT(un != NULL);
16232	ASSERT(sense_xp != NULL);
16233	ASSERT(mutex_owned(SD_MUTEX(un)));
16234	ASSERT(un->un_sense_isbusy != 0);
16235
16236	un->un_sense_isbusy = 0;
16237	bp = sense_xp->xb_sense_bp;
16238	sense_xp->xb_sense_bp = NULL;
16239
16240	/* This pkt is no longer interested in getting sense data */
16241	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags &= ~FLAG_SENSING;
16242
16243	return (bp);
16244}
16245
16246
16247
16248/*
16249 *    Function: sd_alloc_rqs
16250 *
16251 * Description: Set up the unit to receive auto request sense data
16252 *
16253 * Return Code: DDI_SUCCESS or DDI_FAILURE
16254 *
16255 *     Context: Called under attach(9E) context
16256 */
16257
16258static int
16259sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un)
16260{
16261	struct sd_xbuf *xp;
16262
16263	ASSERT(un != NULL);
16264	ASSERT(!mutex_owned(SD_MUTEX(un)));
16265	ASSERT(un->un_rqs_bp == NULL);
16266	ASSERT(un->un_rqs_pktp == NULL);
16267
16268	/*
16269	 * First allocate the required buf and scsi_pkt structs, then set up
16270	 * the CDB in the scsi_pkt for a REQUEST SENSE command.
16271	 */
16272	un->un_rqs_bp = scsi_alloc_consistent_buf(&devp->sd_address, NULL,
16273	    SENSE_LENGTH, B_READ, SLEEP_FUNC, NULL);
16274	if (un->un_rqs_bp == NULL) {
16275		return (DDI_FAILURE);
16276	}
16277
16278	un->un_rqs_pktp = scsi_init_pkt(&devp->sd_address, NULL, un->un_rqs_bp,
16279	    CDB_GROUP0, 1, 0, PKT_CONSISTENT, SLEEP_FUNC, NULL);
16280
16281	if (un->un_rqs_pktp == NULL) {
16282		sd_free_rqs(un);
16283		return (DDI_FAILURE);
16284	}
16285
16286	/* Set up the CDB in the scsi_pkt for a REQUEST SENSE command. */
16287	(void) scsi_setup_cdb((union scsi_cdb *)un->un_rqs_pktp->pkt_cdbp,
16288	    SCMD_REQUEST_SENSE, 0, SENSE_LENGTH, 0);
16289
16290	SD_FILL_SCSI1_LUN(un, un->un_rqs_pktp);
16291
16292	/* Set up the other needed members in the ARQ scsi_pkt. */
16293	un->un_rqs_pktp->pkt_comp   = sdintr;
16294	un->un_rqs_pktp->pkt_time   = sd_io_time;
16295	un->un_rqs_pktp->pkt_flags |=
16296	    (FLAG_SENSING | FLAG_HEAD);	/* (1222170) */
16297
16298	/*
16299	 * Allocate  & init the sd_xbuf struct for the RQS command. Do not
16300	 * provide any intpkt, destroypkt routines as we take care of
16301	 * scsi_pkt allocation/freeing here and in sd_free_rqs().
16302	 */
16303	xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
16304	sd_xbuf_init(un, un->un_rqs_bp, xp, SD_CHAIN_NULL, NULL);
16305	xp->xb_pktp = un->un_rqs_pktp;
16306	SD_INFO(SD_LOG_ATTACH_DETACH, un,
16307	    "sd_alloc_rqs: un 0x%p, rqs  xp 0x%p,  pkt 0x%p,  buf 0x%p\n",
16308	    un, xp, un->un_rqs_pktp, un->un_rqs_bp);
16309
16310	/*
16311	 * Save the pointer to the request sense private bp so it can
16312	 * be retrieved in sdintr.
16313	 */
16314	un->un_rqs_pktp->pkt_private = un->un_rqs_bp;
16315	ASSERT(un->un_rqs_bp->b_private == xp);
16316
16317	/*
16318	 * See if the HBA supports auto-request sense for the specified
16319	 * target/lun. If it does, then try to enable it (if not already
16320	 * enabled).
16321	 *
16322	 * Note: For some HBAs (ifp & sf), scsi_ifsetcap will always return
16323	 * failure, while for other HBAs (pln) scsi_ifsetcap will always
16324	 * return success.  However, in both of these cases ARQ is always
16325	 * enabled and scsi_ifgetcap will always return true. The best approach
16326	 * is to issue the scsi_ifgetcap() first, then try the scsi_ifsetcap().
16327	 *
16328	 * The 3rd case is the HBA (adp) always return enabled on
16329	 * scsi_ifgetgetcap even when it's not enable, the best approach
16330	 * is issue a scsi_ifsetcap then a scsi_ifgetcap
16331	 * Note: this case is to circumvent the Adaptec bug. (x86 only)
16332	 */
16333
16334	if (un->un_f_is_fibre == TRUE) {
16335		un->un_f_arq_enabled = TRUE;
16336	} else {
16337#if defined(__i386) || defined(__amd64)
16338		/*
16339		 * Circumvent the Adaptec bug, remove this code when
16340		 * the bug is fixed
16341		 */
16342		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1);
16343#endif
16344		switch (scsi_ifgetcap(SD_ADDRESS(un), "auto-rqsense", 1)) {
16345		case 0:
16346			SD_INFO(SD_LOG_ATTACH_DETACH, un,
16347				"sd_alloc_rqs: HBA supports ARQ\n");
16348			/*
16349			 * ARQ is supported by this HBA but currently is not
16350			 * enabled. Attempt to enable it and if successful then
16351			 * mark this instance as ARQ enabled.
16352			 */
16353			if (scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1)
16354				== 1) {
16355				/* Successfully enabled ARQ in the HBA */
16356				SD_INFO(SD_LOG_ATTACH_DETACH, un,
16357					"sd_alloc_rqs: ARQ enabled\n");
16358				un->un_f_arq_enabled = TRUE;
16359			} else {
16360				/* Could not enable ARQ in the HBA */
16361				SD_INFO(SD_LOG_ATTACH_DETACH, un,
16362				"sd_alloc_rqs: failed ARQ enable\n");
16363				un->un_f_arq_enabled = FALSE;
16364			}
16365			break;
16366		case 1:
16367			/*
16368			 * ARQ is supported by this HBA and is already enabled.
16369			 * Just mark ARQ as enabled for this instance.
16370			 */
16371			SD_INFO(SD_LOG_ATTACH_DETACH, un,
16372				"sd_alloc_rqs: ARQ already enabled\n");
16373			un->un_f_arq_enabled = TRUE;
16374			break;
16375		default:
16376			/*
16377			 * ARQ is not supported by this HBA; disable it for this
16378			 * instance.
16379			 */
16380			SD_INFO(SD_LOG_ATTACH_DETACH, un,
16381				"sd_alloc_rqs: HBA does not support ARQ\n");
16382			un->un_f_arq_enabled = FALSE;
16383			break;
16384		}
16385	}
16386
16387	return (DDI_SUCCESS);
16388}
16389
16390
16391/*
16392 *    Function: sd_free_rqs
16393 *
16394 * Description: Cleanup for the pre-instance RQS command.
16395 *
16396 *     Context: Kernel thread context
16397 */
16398
16399static void
16400sd_free_rqs(struct sd_lun *un)
16401{
16402	ASSERT(un != NULL);
16403
16404	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: entry\n");
16405
16406	/*
16407	 * If consistent memory is bound to a scsi_pkt, the pkt
16408	 * has to be destroyed *before* freeing the consistent memory.
16409	 * Don't change the sequence of this operations.
16410	 * scsi_destroy_pkt() might access memory, which isn't allowed,
16411	 * after it was freed in scsi_free_consistent_buf().
16412	 */
16413	if (un->un_rqs_pktp != NULL) {
16414		scsi_destroy_pkt(un->un_rqs_pktp);
16415		un->un_rqs_pktp = NULL;
16416	}
16417
16418	if (un->un_rqs_bp != NULL) {
16419		kmem_free(SD_GET_XBUF(un->un_rqs_bp), sizeof (struct sd_xbuf));
16420		scsi_free_consistent_buf(un->un_rqs_bp);
16421		un->un_rqs_bp = NULL;
16422	}
16423	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: exit\n");
16424}
16425
16426
16427
16428/*
16429 *    Function: sd_reduce_throttle
16430 *
16431 * Description: Reduces the maximun # of outstanding commands on a
16432 *		target to the current number of outstanding commands.
16433 *		Queues a tiemout(9F) callback to restore the limit
16434 *		after a specified interval has elapsed.
16435 *		Typically used when we get a TRAN_BUSY return code
16436 *		back from scsi_transport().
16437 *
16438 *   Arguments: un - ptr to the sd_lun softstate struct
16439 *		throttle_type: SD_THROTTLE_TRAN_BUSY or SD_THROTTLE_QFULL
16440 *
16441 *     Context: May be called from interrupt context
16442 */
16443
16444static void
16445sd_reduce_throttle(struct sd_lun *un, int throttle_type)
16446{
16447	ASSERT(un != NULL);
16448	ASSERT(mutex_owned(SD_MUTEX(un)));
16449	ASSERT(un->un_ncmds_in_transport >= 0);
16450
16451	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
16452	    "entry: un:0x%p un_throttle:%d un_ncmds_in_transport:%d\n",
16453	    un, un->un_throttle, un->un_ncmds_in_transport);
16454
16455	if (un->un_throttle > 1) {
16456		if (un->un_f_use_adaptive_throttle == TRUE) {
16457			switch (throttle_type) {
16458			case SD_THROTTLE_TRAN_BUSY:
16459				if (un->un_busy_throttle == 0) {
16460					un->un_busy_throttle = un->un_throttle;
16461				}
16462				break;
16463			case SD_THROTTLE_QFULL:
16464				un->un_busy_throttle = 0;
16465				break;
16466			default:
16467				ASSERT(FALSE);
16468			}
16469
16470			if (un->un_ncmds_in_transport > 0) {
16471			    un->un_throttle = un->un_ncmds_in_transport;
16472			}
16473
16474		} else {
16475			if (un->un_ncmds_in_transport == 0) {
16476				un->un_throttle = 1;
16477			} else {
16478				un->un_throttle = un->un_ncmds_in_transport;
16479			}
16480		}
16481	}
16482
16483	/* Reschedule the timeout if none is currently active */
16484	if (un->un_reset_throttle_timeid == NULL) {
16485		un->un_reset_throttle_timeid = timeout(sd_restore_throttle,
16486		    un, SD_THROTTLE_RESET_INTERVAL);
16487		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16488		    "sd_reduce_throttle: timeout scheduled!\n");
16489	}
16490
16491	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
16492	    "exit: un:0x%p un_throttle:%d\n", un, un->un_throttle);
16493}
16494
16495
16496
16497/*
16498 *    Function: sd_restore_throttle
16499 *
16500 * Description: Callback function for timeout(9F).  Resets the current
16501 *		value of un->un_throttle to its default.
16502 *
16503 *   Arguments: arg - pointer to associated softstate for the device.
16504 *
16505 *     Context: May be called from interrupt context
16506 */
16507
16508static void
16509sd_restore_throttle(void *arg)
16510{
16511	struct sd_lun	*un = arg;
16512
16513	ASSERT(un != NULL);
16514	ASSERT(!mutex_owned(SD_MUTEX(un)));
16515
16516	mutex_enter(SD_MUTEX(un));
16517
16518	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
16519	    "entry: un:0x%p un_throttle:%d\n", un, un->un_throttle);
16520
16521	un->un_reset_throttle_timeid = NULL;
16522
16523	if (un->un_f_use_adaptive_throttle == TRUE) {
16524		/*
16525		 * If un_busy_throttle is nonzero, then it contains the
16526		 * value that un_throttle was when we got a TRAN_BUSY back
16527		 * from scsi_transport(). We want to revert back to this
16528		 * value.
16529		 *
16530		 * In the QFULL case, the throttle limit will incrementally
16531		 * increase until it reaches max throttle.
16532		 */
16533		if (un->un_busy_throttle > 0) {
16534			un->un_throttle = un->un_busy_throttle;
16535			un->un_busy_throttle = 0;
16536		} else {
16537			/*
16538			 * increase throttle by 10% open gate slowly, schedule
16539			 * another restore if saved throttle has not been
16540			 * reached
16541			 */
16542			short throttle;
16543			if (sd_qfull_throttle_enable) {
16544				throttle = un->un_throttle +
16545				    max((un->un_throttle / 10), 1);
16546				un->un_throttle =
16547				    (throttle < un->un_saved_throttle) ?
16548				    throttle : un->un_saved_throttle;
16549				if (un->un_throttle < un->un_saved_throttle) {
16550				    un->un_reset_throttle_timeid =
16551					timeout(sd_restore_throttle,
16552					un, SD_QFULL_THROTTLE_RESET_INTERVAL);
16553				}
16554			}
16555		}
16556
16557		/*
16558		 * If un_throttle has fallen below the low-water mark, we
16559		 * restore the maximum value here (and allow it to ratchet
16560		 * down again if necessary).
16561		 */
16562		if (un->un_throttle < un->un_min_throttle) {
16563			un->un_throttle = un->un_saved_throttle;
16564		}
16565	} else {
16566		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
16567		    "restoring limit from 0x%x to 0x%x\n",
16568		    un->un_throttle, un->un_saved_throttle);
16569		un->un_throttle = un->un_saved_throttle;
16570	}
16571
16572	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
16573	    "sd_restore_throttle: calling sd_start_cmds!\n");
16574
16575	sd_start_cmds(un, NULL);
16576
16577	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
16578	    "sd_restore_throttle: exit: un:0x%p un_throttle:%d\n",
16579	    un, un->un_throttle);
16580
16581	mutex_exit(SD_MUTEX(un));
16582
16583	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: exit\n");
16584}
16585
16586/*
16587 *    Function: sdrunout
16588 *
16589 * Description: Callback routine for scsi_init_pkt when a resource allocation
16590 *		fails.
16591 *
16592 *   Arguments: arg - a pointer to the sd_lun unit struct for the particular
16593 *		soft state instance.
16594 *
16595 * Return Code: The scsi_init_pkt routine allows for the callback function to
16596 *		return a 0 indicating the callback should be rescheduled or a 1
16597 *		indicating not to reschedule. This routine always returns 1
16598 *		because the driver always provides a callback function to
16599 *		scsi_init_pkt. This results in a callback always being scheduled
16600 *		(via the scsi_init_pkt callback implementation) if a resource
16601 *		failure occurs.
16602 *
16603 *     Context: This callback function may not block or call routines that block
16604 *
16605 *        Note: Using the scsi_init_pkt callback facility can result in an I/O
16606 *		request persisting at the head of the list which cannot be
16607 *		satisfied even after multiple retries. In the future the driver
16608 *		may implement some time of maximum runout count before failing
16609 *		an I/O.
16610 */
16611
16612static int
16613sdrunout(caddr_t arg)
16614{
16615	struct sd_lun	*un = (struct sd_lun *)arg;
16616
16617	ASSERT(un != NULL);
16618	ASSERT(!mutex_owned(SD_MUTEX(un)));
16619
16620	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: entry\n");
16621
16622	mutex_enter(SD_MUTEX(un));
16623	sd_start_cmds(un, NULL);
16624	mutex_exit(SD_MUTEX(un));
16625	/*
16626	 * This callback routine always returns 1 (i.e. do not reschedule)
16627	 * because we always specify sdrunout as the callback handler for
16628	 * scsi_init_pkt inside the call to sd_start_cmds.
16629	 */
16630	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: exit\n");
16631	return (1);
16632}
16633
16634
16635/*
16636 *    Function: sdintr
16637 *
16638 * Description: Completion callback routine for scsi_pkt(9S) structs
16639 *		sent to the HBA driver via scsi_transport(9F).
16640 *
16641 *     Context: Interrupt context
16642 */
16643
16644static void
16645sdintr(struct scsi_pkt *pktp)
16646{
16647	struct buf	*bp;
16648	struct sd_xbuf	*xp;
16649	struct sd_lun	*un;
16650
16651	ASSERT(pktp != NULL);
16652	bp = (struct buf *)pktp->pkt_private;
16653	ASSERT(bp != NULL);
16654	xp = SD_GET_XBUF(bp);
16655	ASSERT(xp != NULL);
16656	ASSERT(xp->xb_pktp != NULL);
16657	un = SD_GET_UN(bp);
16658	ASSERT(un != NULL);
16659	ASSERT(!mutex_owned(SD_MUTEX(un)));
16660
16661#ifdef SD_FAULT_INJECTION
16662
16663	SD_INFO(SD_LOG_IOERR, un, "sdintr: sdintr calling Fault injection\n");
16664	/* SD FaultInjection */
16665	sd_faultinjection(pktp);
16666
16667#endif /* SD_FAULT_INJECTION */
16668
16669	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: entry: buf:0x%p,"
16670	    " xp:0x%p, un:0x%p\n", bp, xp, un);
16671
16672	mutex_enter(SD_MUTEX(un));
16673
16674	/* Reduce the count of the #commands currently in transport */
16675	un->un_ncmds_in_transport--;
16676	ASSERT(un->un_ncmds_in_transport >= 0);
16677
16678	/* Increment counter to indicate that the callback routine is active */
16679	un->un_in_callback++;
16680
16681	SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
16682
16683#ifdef	SDDEBUG
16684	if (bp == un->un_retry_bp) {
16685		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sdintr: "
16686		    "un:0x%p: GOT retry_bp:0x%p un_ncmds_in_transport:%d\n",
16687		    un, un->un_retry_bp, un->un_ncmds_in_transport);
16688	}
16689#endif
16690
16691	/*
16692	 * If pkt_reason is CMD_DEV_GONE, just fail the command
16693	 */
16694	if (pktp->pkt_reason == CMD_DEV_GONE) {
16695		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
16696			    "Device is gone\n");
16697		sd_return_failed_command(un, bp, EIO);
16698		goto exit;
16699	}
16700
16701	/*
16702	 * First see if the pkt has auto-request sense data with it....
16703	 * Look at the packet state first so we don't take a performance
16704	 * hit looking at the arq enabled flag unless absolutely necessary.
16705	 */
16706	if ((pktp->pkt_state & STATE_ARQ_DONE) &&
16707	    (un->un_f_arq_enabled == TRUE)) {
16708		/*
16709		 * The HBA did an auto request sense for this command so check
16710		 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
16711		 * driver command that should not be retried.
16712		 */
16713		if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
16714			/*
16715			 * Save the relevant sense info into the xp for the
16716			 * original cmd.
16717			 */
16718			struct scsi_arq_status *asp;
16719			asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
16720			xp->xb_sense_status =
16721			    *((uchar_t *)(&(asp->sts_rqpkt_status)));
16722			xp->xb_sense_state  = asp->sts_rqpkt_state;
16723			xp->xb_sense_resid  = asp->sts_rqpkt_resid;
16724			bcopy(&asp->sts_sensedata, xp->xb_sense_data,
16725			    min(sizeof (struct scsi_extended_sense),
16726			    SENSE_LENGTH));
16727
16728			/* fail the command */
16729			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16730			    "sdintr: arq done and FLAG_DIAGNOSE set\n");
16731			sd_return_failed_command(un, bp, EIO);
16732			goto exit;
16733		}
16734
16735#if (defined(__i386) || defined(__amd64))	/* DMAFREE for x86 only */
16736		/*
16737		 * We want to either retry or fail this command, so free
16738		 * the DMA resources here.  If we retry the command then
16739		 * the DMA resources will be reallocated in sd_start_cmds().
16740		 * Note that when PKT_DMA_PARTIAL is used, this reallocation
16741		 * causes the *entire* transfer to start over again from the
16742		 * beginning of the request, even for PARTIAL chunks that
16743		 * have already transferred successfully.
16744		 */
16745		if ((un->un_f_is_fibre == TRUE) &&
16746		    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
16747		    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
16748			scsi_dmafree(pktp);
16749			xp->xb_pkt_flags |= SD_XB_DMA_FREED;
16750		}
16751#endif
16752
16753		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16754		    "sdintr: arq done, sd_handle_auto_request_sense\n");
16755
16756		sd_handle_auto_request_sense(un, bp, xp, pktp);
16757		goto exit;
16758	}
16759
16760	/* Next see if this is the REQUEST SENSE pkt for the instance */
16761	if (pktp->pkt_flags & FLAG_SENSING)  {
16762		/* This pktp is from the unit's REQUEST_SENSE command */
16763		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16764		    "sdintr: sd_handle_request_sense\n");
16765		sd_handle_request_sense(un, bp, xp, pktp);
16766		goto exit;
16767	}
16768
16769	/*
16770	 * Check to see if the command successfully completed as requested;
16771	 * this is the most common case (and also the hot performance path).
16772	 *
16773	 * Requirements for successful completion are:
16774	 * pkt_reason is CMD_CMPLT and packet status is status good.
16775	 * In addition:
16776	 * - A residual of zero indicates successful completion no matter what
16777	 *   the command is.
16778	 * - If the residual is not zero and the command is not a read or
16779	 *   write, then it's still defined as successful completion. In other
16780	 *   words, if the command is a read or write the residual must be
16781	 *   zero for successful completion.
16782	 * - If the residual is not zero and the command is a read or
16783	 *   write, and it's a USCSICMD, then it's still defined as
16784	 *   successful completion.
16785	 */
16786	if ((pktp->pkt_reason == CMD_CMPLT) &&
16787	    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD)) {
16788
16789		/*
16790		 * Since this command is returned with a good status, we
16791		 * can reset the count for Sonoma failover.
16792		 */
16793		un->un_sonoma_failure_count = 0;
16794
16795		/*
16796		 * Return all USCSI commands on good status
16797		 */
16798		if (pktp->pkt_resid == 0) {
16799			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16800			    "sdintr: returning command for resid == 0\n");
16801		} else if (((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_READ) &&
16802		    ((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_WRITE)) {
16803			SD_UPDATE_B_RESID(bp, pktp);
16804			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16805			    "sdintr: returning command for resid != 0\n");
16806		} else if (xp->xb_pkt_flags & SD_XB_USCSICMD) {
16807			SD_UPDATE_B_RESID(bp, pktp);
16808			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16809				"sdintr: returning uscsi command\n");
16810		} else {
16811			goto not_successful;
16812		}
16813		sd_return_command(un, bp);
16814
16815		/*
16816		 * Decrement counter to indicate that the callback routine
16817		 * is done.
16818		 */
16819		un->un_in_callback--;
16820		ASSERT(un->un_in_callback >= 0);
16821		mutex_exit(SD_MUTEX(un));
16822
16823		return;
16824	}
16825
16826not_successful:
16827
16828#if (defined(__i386) || defined(__amd64))	/* DMAFREE for x86 only */
16829	/*
16830	 * The following is based upon knowledge of the underlying transport
16831	 * and its use of DMA resources.  This code should be removed when
16832	 * PKT_DMA_PARTIAL support is taken out of the disk driver in favor
16833	 * of the new PKT_CMD_BREAKUP protocol. See also sd_initpkt_for_buf()
16834	 * and sd_start_cmds().
16835	 *
16836	 * Free any DMA resources associated with this command if there
16837	 * is a chance it could be retried or enqueued for later retry.
16838	 * If we keep the DMA binding then mpxio cannot reissue the
16839	 * command on another path whenever a path failure occurs.
16840	 *
16841	 * Note that when PKT_DMA_PARTIAL is used, free/reallocation
16842	 * causes the *entire* transfer to start over again from the
16843	 * beginning of the request, even for PARTIAL chunks that
16844	 * have already transferred successfully.
16845	 *
16846	 * This is only done for non-uscsi commands (and also skipped for the
16847	 * driver's internal RQS command). Also just do this for Fibre Channel
16848	 * devices as these are the only ones that support mpxio.
16849	 */
16850	if ((un->un_f_is_fibre == TRUE) &&
16851	    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
16852	    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
16853		scsi_dmafree(pktp);
16854		xp->xb_pkt_flags |= SD_XB_DMA_FREED;
16855	}
16856#endif
16857
16858	/*
16859	 * The command did not successfully complete as requested so check
16860	 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
16861	 * driver command that should not be retried so just return. If
16862	 * FLAG_DIAGNOSE is not set the error will be processed below.
16863	 */
16864	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
16865		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16866		    "sdintr: FLAG_DIAGNOSE: sd_return_failed_command\n");
16867		/*
16868		 * Issue a request sense if a check condition caused the error
16869		 * (we handle the auto request sense case above), otherwise
16870		 * just fail the command.
16871		 */
16872		if ((pktp->pkt_reason == CMD_CMPLT) &&
16873		    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK)) {
16874			sd_send_request_sense_command(un, bp, pktp);
16875		} else {
16876			sd_return_failed_command(un, bp, EIO);
16877		}
16878		goto exit;
16879	}
16880
16881	/*
16882	 * The command did not successfully complete as requested so process
16883	 * the error, retry, and/or attempt recovery.
16884	 */
16885	switch (pktp->pkt_reason) {
16886	case CMD_CMPLT:
16887		switch (SD_GET_PKT_STATUS(pktp)) {
16888		case STATUS_GOOD:
16889			/*
16890			 * The command completed successfully with a non-zero
16891			 * residual
16892			 */
16893			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16894			    "sdintr: STATUS_GOOD \n");
16895			sd_pkt_status_good(un, bp, xp, pktp);
16896			break;
16897
16898		case STATUS_CHECK:
16899		case STATUS_TERMINATED:
16900			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16901			    "sdintr: STATUS_TERMINATED | STATUS_CHECK\n");
16902			sd_pkt_status_check_condition(un, bp, xp, pktp);
16903			break;
16904
16905		case STATUS_BUSY:
16906			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16907			    "sdintr: STATUS_BUSY\n");
16908			sd_pkt_status_busy(un, bp, xp, pktp);
16909			break;
16910
16911		case STATUS_RESERVATION_CONFLICT:
16912			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16913			    "sdintr: STATUS_RESERVATION_CONFLICT\n");
16914			sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
16915			break;
16916
16917		case STATUS_QFULL:
16918			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16919			    "sdintr: STATUS_QFULL\n");
16920			sd_pkt_status_qfull(un, bp, xp, pktp);
16921			break;
16922
16923		case STATUS_MET:
16924		case STATUS_INTERMEDIATE:
16925		case STATUS_SCSI2:
16926		case STATUS_INTERMEDIATE_MET:
16927		case STATUS_ACA_ACTIVE:
16928			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
16929			    "Unexpected SCSI status received: 0x%x\n",
16930			    SD_GET_PKT_STATUS(pktp));
16931			sd_return_failed_command(un, bp, EIO);
16932			break;
16933
16934		default:
16935			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
16936			    "Invalid SCSI status received: 0x%x\n",
16937			    SD_GET_PKT_STATUS(pktp));
16938			sd_return_failed_command(un, bp, EIO);
16939			break;
16940
16941		}
16942		break;
16943
16944	case CMD_INCOMPLETE:
16945		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16946		    "sdintr:  CMD_INCOMPLETE\n");
16947		sd_pkt_reason_cmd_incomplete(un, bp, xp, pktp);
16948		break;
16949	case CMD_TRAN_ERR:
16950		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16951		    "sdintr: CMD_TRAN_ERR\n");
16952		sd_pkt_reason_cmd_tran_err(un, bp, xp, pktp);
16953		break;
16954	case CMD_RESET:
16955		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16956		    "sdintr: CMD_RESET \n");
16957		sd_pkt_reason_cmd_reset(un, bp, xp, pktp);
16958		break;
16959	case CMD_ABORTED:
16960		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16961		    "sdintr: CMD_ABORTED \n");
16962		sd_pkt_reason_cmd_aborted(un, bp, xp, pktp);
16963		break;
16964	case CMD_TIMEOUT:
16965		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16966		    "sdintr: CMD_TIMEOUT\n");
16967		sd_pkt_reason_cmd_timeout(un, bp, xp, pktp);
16968		break;
16969	case CMD_UNX_BUS_FREE:
16970		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16971		    "sdintr: CMD_UNX_BUS_FREE \n");
16972		sd_pkt_reason_cmd_unx_bus_free(un, bp, xp, pktp);
16973		break;
16974	case CMD_TAG_REJECT:
16975		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16976		    "sdintr: CMD_TAG_REJECT\n");
16977		sd_pkt_reason_cmd_tag_reject(un, bp, xp, pktp);
16978		break;
16979	default:
16980		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16981		    "sdintr: default\n");
16982		sd_pkt_reason_default(un, bp, xp, pktp);
16983		break;
16984	}
16985
16986exit:
16987	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: exit\n");
16988
16989	/* Decrement counter to indicate that the callback routine is done. */
16990	un->un_in_callback--;
16991	ASSERT(un->un_in_callback >= 0);
16992
16993	/*
16994	 * At this point, the pkt has been dispatched, ie, it is either
16995	 * being re-tried or has been returned to its caller and should
16996	 * not be referenced.
16997	 */
16998
16999	mutex_exit(SD_MUTEX(un));
17000}
17001
17002
17003/*
17004 *    Function: sd_print_incomplete_msg
17005 *
17006 * Description: Prints the error message for a CMD_INCOMPLETE error.
17007 *
17008 *   Arguments: un - ptr to associated softstate for the device.
17009 *		bp - ptr to the buf(9S) for the command.
17010 *		arg - message string ptr
17011 *		code - SD_DELAYED_RETRY_ISSUED, SD_IMMEDIATE_RETRY_ISSUED,
17012 *			or SD_NO_RETRY_ISSUED.
17013 *
17014 *     Context: May be called under interrupt context
17015 */
17016
17017static void
17018sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
17019{
17020	struct scsi_pkt	*pktp;
17021	char	*msgp;
17022	char	*cmdp = arg;
17023
17024	ASSERT(un != NULL);
17025	ASSERT(mutex_owned(SD_MUTEX(un)));
17026	ASSERT(bp != NULL);
17027	ASSERT(arg != NULL);
17028	pktp = SD_GET_PKTP(bp);
17029	ASSERT(pktp != NULL);
17030
17031	switch (code) {
17032	case SD_DELAYED_RETRY_ISSUED:
17033	case SD_IMMEDIATE_RETRY_ISSUED:
17034		msgp = "retrying";
17035		break;
17036	case SD_NO_RETRY_ISSUED:
17037	default:
17038		msgp = "giving up";
17039		break;
17040	}
17041
17042	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
17043		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17044		    "incomplete %s- %s\n", cmdp, msgp);
17045	}
17046}
17047
17048
17049
17050/*
17051 *    Function: sd_pkt_status_good
17052 *
17053 * Description: Processing for a STATUS_GOOD code in pkt_status.
17054 *
17055 *     Context: May be called under interrupt context
17056 */
17057
17058static void
17059sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
17060	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17061{
17062	char	*cmdp;
17063
17064	ASSERT(un != NULL);
17065	ASSERT(mutex_owned(SD_MUTEX(un)));
17066	ASSERT(bp != NULL);
17067	ASSERT(xp != NULL);
17068	ASSERT(pktp != NULL);
17069	ASSERT(pktp->pkt_reason == CMD_CMPLT);
17070	ASSERT(SD_GET_PKT_STATUS(pktp) == STATUS_GOOD);
17071	ASSERT(pktp->pkt_resid != 0);
17072
17073	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: entry\n");
17074
17075	SD_UPDATE_ERRSTATS(un, sd_harderrs);
17076	switch (SD_GET_PKT_OPCODE(pktp) & 0x1F) {
17077	case SCMD_READ:
17078		cmdp = "read";
17079		break;
17080	case SCMD_WRITE:
17081		cmdp = "write";
17082		break;
17083	default:
17084		SD_UPDATE_B_RESID(bp, pktp);
17085		sd_return_command(un, bp);
17086		SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
17087		return;
17088	}
17089
17090	/*
17091	 * See if we can retry the read/write, preferrably immediately.
17092	 * If retries are exhaused, then sd_retry_command() will update
17093	 * the b_resid count.
17094	 */
17095	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_incomplete_msg,
17096	    cmdp, EIO, (clock_t)0, NULL);
17097
17098	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
17099}
17100
17101
17102
17103
17104
17105/*
17106 *    Function: sd_handle_request_sense
17107 *
17108 * Description: Processing for non-auto Request Sense command.
17109 *
17110 *   Arguments: un - ptr to associated softstate
17111 *		sense_bp - ptr to buf(9S) for the RQS command
17112 *		sense_xp - ptr to the sd_xbuf for the RQS command
17113 *		sense_pktp - ptr to the scsi_pkt(9S) for the RQS command
17114 *
17115 *     Context: May be called under interrupt context
17116 */
17117
17118static void
17119sd_handle_request_sense(struct sd_lun *un, struct buf *sense_bp,
17120	struct sd_xbuf *sense_xp, struct scsi_pkt *sense_pktp)
17121{
17122	struct buf	*cmd_bp;	/* buf for the original command */
17123	struct sd_xbuf	*cmd_xp;	/* sd_xbuf for the original command */
17124	struct scsi_pkt *cmd_pktp;	/* pkt for the original command */
17125
17126	ASSERT(un != NULL);
17127	ASSERT(mutex_owned(SD_MUTEX(un)));
17128	ASSERT(sense_bp != NULL);
17129	ASSERT(sense_xp != NULL);
17130	ASSERT(sense_pktp != NULL);
17131
17132	/*
17133	 * Note the sense_bp, sense_xp, and sense_pktp here are for the
17134	 * RQS command and not the original command.
17135	 */
17136	ASSERT(sense_pktp == un->un_rqs_pktp);
17137	ASSERT(sense_bp   == un->un_rqs_bp);
17138	ASSERT((sense_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD)) ==
17139	    (FLAG_SENSING | FLAG_HEAD));
17140	ASSERT((((SD_GET_XBUF(sense_xp->xb_sense_bp))->xb_pktp->pkt_flags) &
17141	    FLAG_SENSING) == FLAG_SENSING);
17142
17143	/* These are the bp, xp, and pktp for the original command */
17144	cmd_bp = sense_xp->xb_sense_bp;
17145	cmd_xp = SD_GET_XBUF(cmd_bp);
17146	cmd_pktp = SD_GET_PKTP(cmd_bp);
17147
17148	if (sense_pktp->pkt_reason != CMD_CMPLT) {
17149		/*
17150		 * The REQUEST SENSE command failed.  Release the REQUEST
17151		 * SENSE command for re-use, get back the bp for the original
17152		 * command, and attempt to re-try the original command if
17153		 * FLAG_DIAGNOSE is not set in the original packet.
17154		 */
17155		SD_UPDATE_ERRSTATS(un, sd_harderrs);
17156		if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
17157			cmd_bp = sd_mark_rqs_idle(un, sense_xp);
17158			sd_retry_command(un, cmd_bp, SD_RETRIES_STANDARD,
17159			    NULL, NULL, EIO, (clock_t)0, NULL);
17160			return;
17161		}
17162	}
17163
17164	/*
17165	 * Save the relevant sense info into the xp for the original cmd.
17166	 *
17167	 * Note: if the request sense failed the state info will be zero
17168	 * as set in sd_mark_rqs_busy()
17169	 */
17170	cmd_xp->xb_sense_status = *(sense_pktp->pkt_scbp);
17171	cmd_xp->xb_sense_state  = sense_pktp->pkt_state;
17172	cmd_xp->xb_sense_resid  = sense_pktp->pkt_resid;
17173	bcopy(sense_bp->b_un.b_addr, cmd_xp->xb_sense_data, SENSE_LENGTH);
17174
17175	/*
17176	 *  Free up the RQS command....
17177	 *  NOTE:
17178	 *	Must do this BEFORE calling sd_validate_sense_data!
17179	 *	sd_validate_sense_data may return the original command in
17180	 *	which case the pkt will be freed and the flags can no
17181	 *	longer be touched.
17182	 *	SD_MUTEX is held through this process until the command
17183	 *	is dispatched based upon the sense data, so there are
17184	 *	no race conditions.
17185	 */
17186	(void) sd_mark_rqs_idle(un, sense_xp);
17187
17188	/*
17189	 * For a retryable command see if we have valid sense data, if so then
17190	 * turn it over to sd_decode_sense() to figure out the right course of
17191	 * action. Just fail a non-retryable command.
17192	 */
17193	if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
17194		if (sd_validate_sense_data(un, cmd_bp, cmd_xp) ==
17195		    SD_SENSE_DATA_IS_VALID) {
17196			sd_decode_sense(un, cmd_bp, cmd_xp, cmd_pktp);
17197		}
17198	} else {
17199		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Failed CDB",
17200		    (uchar_t *)cmd_pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
17201		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Sense Data",
17202		    (uchar_t *)cmd_xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
17203		sd_return_failed_command(un, cmd_bp, EIO);
17204	}
17205}
17206
17207
17208
17209
17210/*
17211 *    Function: sd_handle_auto_request_sense
17212 *
17213 * Description: Processing for auto-request sense information.
17214 *
17215 *   Arguments: un - ptr to associated softstate
17216 *		bp - ptr to buf(9S) for the command
17217 *		xp - ptr to the sd_xbuf for the command
17218 *		pktp - ptr to the scsi_pkt(9S) for the command
17219 *
17220 *     Context: May be called under interrupt context
17221 */
17222
17223static void
17224sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
17225	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17226{
17227	struct scsi_arq_status *asp;
17228
17229	ASSERT(un != NULL);
17230	ASSERT(mutex_owned(SD_MUTEX(un)));
17231	ASSERT(bp != NULL);
17232	ASSERT(xp != NULL);
17233	ASSERT(pktp != NULL);
17234	ASSERT(pktp != un->un_rqs_pktp);
17235	ASSERT(bp   != un->un_rqs_bp);
17236
17237	/*
17238	 * For auto-request sense, we get a scsi_arq_status back from
17239	 * the HBA, with the sense data in the sts_sensedata member.
17240	 * The pkt_scbp of the packet points to this scsi_arq_status.
17241	 */
17242	asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
17243
17244	if (asp->sts_rqpkt_reason != CMD_CMPLT) {
17245		/*
17246		 * The auto REQUEST SENSE failed; see if we can re-try
17247		 * the original command.
17248		 */
17249		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17250		    "auto request sense failed (reason=%s)\n",
17251		    scsi_rname(asp->sts_rqpkt_reason));
17252
17253		sd_reset_target(un, pktp);
17254
17255		sd_retry_command(un, bp, SD_RETRIES_STANDARD,
17256		    NULL, NULL, EIO, (clock_t)0, NULL);
17257		return;
17258	}
17259
17260	/* Save the relevant sense info into the xp for the original cmd. */
17261	xp->xb_sense_status = *((uchar_t *)(&(asp->sts_rqpkt_status)));
17262	xp->xb_sense_state  = asp->sts_rqpkt_state;
17263	xp->xb_sense_resid  = asp->sts_rqpkt_resid;
17264	bcopy(&asp->sts_sensedata, xp->xb_sense_data,
17265	    min(sizeof (struct scsi_extended_sense), SENSE_LENGTH));
17266
17267	/*
17268	 * See if we have valid sense data, if so then turn it over to
17269	 * sd_decode_sense() to figure out the right course of action.
17270	 */
17271	if (sd_validate_sense_data(un, bp, xp) == SD_SENSE_DATA_IS_VALID) {
17272		sd_decode_sense(un, bp, xp, pktp);
17273	}
17274}
17275
17276
17277/*
17278 *    Function: sd_print_sense_failed_msg
17279 *
17280 * Description: Print log message when RQS has failed.
17281 *
17282 *   Arguments: un - ptr to associated softstate
17283 *		bp - ptr to buf(9S) for the command
17284 *		arg - generic message string ptr
17285 *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
17286 *			or SD_NO_RETRY_ISSUED
17287 *
17288 *     Context: May be called from interrupt context
17289 */
17290
17291static void
17292sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp, void *arg,
17293	int code)
17294{
17295	char	*msgp = arg;
17296
17297	ASSERT(un != NULL);
17298	ASSERT(mutex_owned(SD_MUTEX(un)));
17299	ASSERT(bp != NULL);
17300
17301	if ((code == SD_NO_RETRY_ISSUED) && (msgp != NULL)) {
17302		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, msgp);
17303	}
17304}
17305
17306
17307/*
17308 *    Function: sd_validate_sense_data
17309 *
17310 * Description: Check the given sense data for validity.
17311 *		If the sense data is not valid, the command will
17312 *		be either failed or retried!
17313 *
17314 * Return Code: SD_SENSE_DATA_IS_INVALID
17315 *		SD_SENSE_DATA_IS_VALID
17316 *
17317 *     Context: May be called from interrupt context
17318 */
17319
17320static int
17321sd_validate_sense_data(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp)
17322{
17323	struct scsi_extended_sense *esp;
17324	struct	scsi_pkt *pktp;
17325	size_t	actual_len;
17326	char	*msgp = NULL;
17327
17328	ASSERT(un != NULL);
17329	ASSERT(mutex_owned(SD_MUTEX(un)));
17330	ASSERT(bp != NULL);
17331	ASSERT(bp != un->un_rqs_bp);
17332	ASSERT(xp != NULL);
17333
17334	pktp = SD_GET_PKTP(bp);
17335	ASSERT(pktp != NULL);
17336
17337	/*
17338	 * Check the status of the RQS command (auto or manual).
17339	 */
17340	switch (xp->xb_sense_status & STATUS_MASK) {
17341	case STATUS_GOOD:
17342		break;
17343
17344	case STATUS_RESERVATION_CONFLICT:
17345		sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
17346		return (SD_SENSE_DATA_IS_INVALID);
17347
17348	case STATUS_BUSY:
17349		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17350		    "Busy Status on REQUEST SENSE\n");
17351		sd_retry_command(un, bp, SD_RETRIES_BUSY, NULL,
17352		    NULL, EIO, SD_BSY_TIMEOUT / 500, kstat_waitq_enter);
17353		return (SD_SENSE_DATA_IS_INVALID);
17354
17355	case STATUS_QFULL:
17356		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17357		    "QFULL Status on REQUEST SENSE\n");
17358		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL,
17359		    NULL, EIO, SD_BSY_TIMEOUT / 500, kstat_waitq_enter);
17360		return (SD_SENSE_DATA_IS_INVALID);
17361
17362	case STATUS_CHECK:
17363	case STATUS_TERMINATED:
17364		msgp = "Check Condition on REQUEST SENSE\n";
17365		goto sense_failed;
17366
17367	default:
17368		msgp = "Not STATUS_GOOD on REQUEST_SENSE\n";
17369		goto sense_failed;
17370	}
17371
17372	/*
17373	 * See if we got the minimum required amount of sense data.
17374	 * Note: We are assuming the returned sense data is SENSE_LENGTH bytes
17375	 * or less.
17376	 */
17377	actual_len = (int)(SENSE_LENGTH - xp->xb_sense_resid);
17378	if (((xp->xb_sense_state & STATE_XFERRED_DATA) == 0) ||
17379	    (actual_len == 0)) {
17380		msgp = "Request Sense couldn't get sense data\n";
17381		goto sense_failed;
17382	}
17383
17384	if (actual_len < SUN_MIN_SENSE_LENGTH) {
17385		msgp = "Not enough sense information\n";
17386		goto sense_failed;
17387	}
17388
17389	/*
17390	 * We require the extended sense data
17391	 */
17392	esp = (struct scsi_extended_sense *)xp->xb_sense_data;
17393	if (esp->es_class != CLASS_EXTENDED_SENSE) {
17394		if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
17395			static char tmp[8];
17396			static char buf[148];
17397			char *p = (char *)(xp->xb_sense_data);
17398			int i;
17399
17400			mutex_enter(&sd_sense_mutex);
17401			(void) strcpy(buf, "undecodable sense information:");
17402			for (i = 0; i < actual_len; i++) {
17403				(void) sprintf(tmp, " 0x%x", *(p++)&0xff);
17404				(void) strcpy(&buf[strlen(buf)], tmp);
17405			}
17406			i = strlen(buf);
17407			(void) strcpy(&buf[i], "-(assumed fatal)\n");
17408			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, buf);
17409			mutex_exit(&sd_sense_mutex);
17410		}
17411		/* Note: Legacy behavior, fail the command with no retry */
17412		sd_return_failed_command(un, bp, EIO);
17413		return (SD_SENSE_DATA_IS_INVALID);
17414	}
17415
17416	/*
17417	 * Check that es_code is valid (es_class concatenated with es_code
17418	 * make up the "response code" field.  es_class will always be 7, so
17419	 * make sure es_code is 0, 1, 2, 3 or 0xf.  es_code will indicate the
17420	 * format.
17421	 */
17422	if ((esp->es_code != CODE_FMT_FIXED_CURRENT) &&
17423	    (esp->es_code != CODE_FMT_FIXED_DEFERRED) &&
17424	    (esp->es_code != CODE_FMT_DESCR_CURRENT) &&
17425	    (esp->es_code != CODE_FMT_DESCR_DEFERRED) &&
17426	    (esp->es_code != CODE_FMT_VENDOR_SPECIFIC)) {
17427		goto sense_failed;
17428	}
17429
17430	return (SD_SENSE_DATA_IS_VALID);
17431
17432sense_failed:
17433	/*
17434	 * If the request sense failed (for whatever reason), attempt
17435	 * to retry the original command.
17436	 */
17437#if defined(__i386) || defined(__amd64)
17438	/*
17439	 * SD_RETRY_DELAY is conditionally compile (#if fibre) in
17440	 * sddef.h for Sparc platform, and x86 uses 1 binary
17441	 * for both SCSI/FC.
17442	 * The SD_RETRY_DELAY value need to be adjusted here
17443	 * when SD_RETRY_DELAY change in sddef.h
17444	 */
17445	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
17446	    sd_print_sense_failed_msg, msgp, EIO,
17447		un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0, NULL);
17448#else
17449	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
17450	    sd_print_sense_failed_msg, msgp, EIO, SD_RETRY_DELAY, NULL);
17451#endif
17452
17453	return (SD_SENSE_DATA_IS_INVALID);
17454}
17455
17456
17457
17458/*
17459 *    Function: sd_decode_sense
17460 *
17461 * Description: Take recovery action(s) when SCSI Sense Data is received.
17462 *
17463 *     Context: Interrupt context.
17464 */
17465
17466static void
17467sd_decode_sense(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
17468	struct scsi_pkt *pktp)
17469{
17470	uint8_t sense_key;
17471
17472	ASSERT(un != NULL);
17473	ASSERT(mutex_owned(SD_MUTEX(un)));
17474	ASSERT(bp != NULL);
17475	ASSERT(bp != un->un_rqs_bp);
17476	ASSERT(xp != NULL);
17477	ASSERT(pktp != NULL);
17478
17479	sense_key = scsi_sense_key(xp->xb_sense_data);
17480
17481	switch (sense_key) {
17482	case KEY_NO_SENSE:
17483		sd_sense_key_no_sense(un, bp, xp, pktp);
17484		break;
17485	case KEY_RECOVERABLE_ERROR:
17486		sd_sense_key_recoverable_error(un, xp->xb_sense_data,
17487		    bp, xp, pktp);
17488		break;
17489	case KEY_NOT_READY:
17490		sd_sense_key_not_ready(un, xp->xb_sense_data,
17491		    bp, xp, pktp);
17492		break;
17493	case KEY_MEDIUM_ERROR:
17494	case KEY_HARDWARE_ERROR:
17495		sd_sense_key_medium_or_hardware_error(un,
17496		    xp->xb_sense_data, bp, xp, pktp);
17497		break;
17498	case KEY_ILLEGAL_REQUEST:
17499		sd_sense_key_illegal_request(un, bp, xp, pktp);
17500		break;
17501	case KEY_UNIT_ATTENTION:
17502		sd_sense_key_unit_attention(un, xp->xb_sense_data,
17503		    bp, xp, pktp);
17504		break;
17505	case KEY_WRITE_PROTECT:
17506	case KEY_VOLUME_OVERFLOW:
17507	case KEY_MISCOMPARE:
17508		sd_sense_key_fail_command(un, bp, xp, pktp);
17509		break;
17510	case KEY_BLANK_CHECK:
17511		sd_sense_key_blank_check(un, bp, xp, pktp);
17512		break;
17513	case KEY_ABORTED_COMMAND:
17514		sd_sense_key_aborted_command(un, bp, xp, pktp);
17515		break;
17516	case KEY_VENDOR_UNIQUE:
17517	case KEY_COPY_ABORTED:
17518	case KEY_EQUAL:
17519	case KEY_RESERVED:
17520	default:
17521		sd_sense_key_default(un, xp->xb_sense_data,
17522		    bp, xp, pktp);
17523		break;
17524	}
17525}
17526
17527
17528/*
17529 *    Function: sd_dump_memory
17530 *
17531 * Description: Debug logging routine to print the contents of a user provided
17532 *		buffer. The output of the buffer is broken up into 256 byte
17533 *		segments due to a size constraint of the scsi_log.
17534 *		implementation.
17535 *
17536 *   Arguments: un - ptr to softstate
17537 *		comp - component mask
17538 *		title - "title" string to preceed data when printed
17539 *		data - ptr to data block to be printed
17540 *		len - size of data block to be printed
17541 *		fmt - SD_LOG_HEX (use 0x%02x format) or SD_LOG_CHAR (use %c)
17542 *
17543 *     Context: May be called from interrupt context
17544 */
17545
17546#define	SD_DUMP_MEMORY_BUF_SIZE	256
17547
17548static char *sd_dump_format_string[] = {
17549		" 0x%02x",
17550		" %c"
17551};
17552
17553static void
17554sd_dump_memory(struct sd_lun *un, uint_t comp, char *title, uchar_t *data,
17555    int len, int fmt)
17556{
17557	int	i, j;
17558	int	avail_count;
17559	int	start_offset;
17560	int	end_offset;
17561	size_t	entry_len;
17562	char	*bufp;
17563	char	*local_buf;
17564	char	*format_string;
17565
17566	ASSERT((fmt == SD_LOG_HEX) || (fmt == SD_LOG_CHAR));
17567
17568	/*
17569	 * In the debug version of the driver, this function is called from a
17570	 * number of places which are NOPs in the release driver.
17571	 * The debug driver therefore has additional methods of filtering
17572	 * debug output.
17573	 */
17574#ifdef SDDEBUG
17575	/*
17576	 * In the debug version of the driver we can reduce the amount of debug
17577	 * messages by setting sd_error_level to something other than
17578	 * SCSI_ERR_ALL and clearing bits in sd_level_mask and
17579	 * sd_component_mask.
17580	 */
17581	if (((sd_level_mask & (SD_LOGMASK_DUMP_MEM | SD_LOGMASK_DIAG)) == 0) ||
17582	    (sd_error_level != SCSI_ERR_ALL)) {
17583		return;
17584	}
17585	if (((sd_component_mask & comp) == 0) ||
17586	    (sd_error_level != SCSI_ERR_ALL)) {
17587		return;
17588	}
17589#else
17590	if (sd_error_level != SCSI_ERR_ALL) {
17591		return;
17592	}
17593#endif
17594
17595	local_buf = kmem_zalloc(SD_DUMP_MEMORY_BUF_SIZE, KM_SLEEP);
17596	bufp = local_buf;
17597	/*
17598	 * Available length is the length of local_buf[], minus the
17599	 * length of the title string, minus one for the ":", minus
17600	 * one for the newline, minus one for the NULL terminator.
17601	 * This gives the #bytes available for holding the printed
17602	 * values from the given data buffer.
17603	 */
17604	if (fmt == SD_LOG_HEX) {
17605		format_string = sd_dump_format_string[0];
17606	} else /* SD_LOG_CHAR */ {
17607		format_string = sd_dump_format_string[1];
17608	}
17609	/*
17610	 * Available count is the number of elements from the given
17611	 * data buffer that we can fit into the available length.
17612	 * This is based upon the size of the format string used.
17613	 * Make one entry and find it's size.
17614	 */
17615	(void) sprintf(bufp, format_string, data[0]);
17616	entry_len = strlen(bufp);
17617	avail_count = (SD_DUMP_MEMORY_BUF_SIZE - strlen(title) - 3) / entry_len;
17618
17619	j = 0;
17620	while (j < len) {
17621		bufp = local_buf;
17622		bzero(bufp, SD_DUMP_MEMORY_BUF_SIZE);
17623		start_offset = j;
17624
17625		end_offset = start_offset + avail_count;
17626
17627		(void) sprintf(bufp, "%s:", title);
17628		bufp += strlen(bufp);
17629		for (i = start_offset; ((i < end_offset) && (j < len));
17630		    i++, j++) {
17631			(void) sprintf(bufp, format_string, data[i]);
17632			bufp += entry_len;
17633		}
17634		(void) sprintf(bufp, "\n");
17635
17636		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE, "%s", local_buf);
17637	}
17638	kmem_free(local_buf, SD_DUMP_MEMORY_BUF_SIZE);
17639}
17640
17641/*
17642 *    Function: sd_print_sense_msg
17643 *
17644 * Description: Log a message based upon the given sense data.
17645 *
17646 *   Arguments: un - ptr to associated softstate
17647 *		bp - ptr to buf(9S) for the command
17648 *		arg - ptr to associate sd_sense_info struct
17649 *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
17650 *			or SD_NO_RETRY_ISSUED
17651 *
17652 *     Context: May be called from interrupt context
17653 */
17654
17655static void
17656sd_print_sense_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
17657{
17658	struct sd_xbuf	*xp;
17659	struct scsi_pkt	*pktp;
17660	uint8_t *sensep;
17661	daddr_t request_blkno;
17662	diskaddr_t err_blkno;
17663	int severity;
17664	int pfa_flag;
17665	extern struct scsi_key_strings scsi_cmds[];
17666
17667	ASSERT(un != NULL);
17668	ASSERT(mutex_owned(SD_MUTEX(un)));
17669	ASSERT(bp != NULL);
17670	xp = SD_GET_XBUF(bp);
17671	ASSERT(xp != NULL);
17672	pktp = SD_GET_PKTP(bp);
17673	ASSERT(pktp != NULL);
17674	ASSERT(arg != NULL);
17675
17676	severity = ((struct sd_sense_info *)(arg))->ssi_severity;
17677	pfa_flag = ((struct sd_sense_info *)(arg))->ssi_pfa_flag;
17678
17679	if ((code == SD_DELAYED_RETRY_ISSUED) ||
17680	    (code == SD_IMMEDIATE_RETRY_ISSUED)) {
17681		severity = SCSI_ERR_RETRYABLE;
17682	}
17683
17684	/* Use absolute block number for the request block number */
17685	request_blkno = xp->xb_blkno;
17686
17687	/*
17688	 * Now try to get the error block number from the sense data
17689	 */
17690	sensep = xp->xb_sense_data;
17691
17692	if (scsi_sense_info_uint64(sensep, SENSE_LENGTH,
17693		(uint64_t *)&err_blkno)) {
17694		/*
17695		 * We retrieved the error block number from the information
17696		 * portion of the sense data.
17697		 *
17698		 * For USCSI commands we are better off using the error
17699		 * block no. as the requested block no. (This is the best
17700		 * we can estimate.)
17701		 */
17702		if ((SD_IS_BUFIO(xp) == FALSE) &&
17703		    ((pktp->pkt_flags & FLAG_SILENT) == 0)) {
17704			request_blkno = err_blkno;
17705		}
17706	} else {
17707		/*
17708		 * Without the es_valid bit set (for fixed format) or an
17709		 * information descriptor (for descriptor format) we cannot
17710		 * be certain of the error blkno, so just use the
17711		 * request_blkno.
17712		 */
17713		err_blkno = (diskaddr_t)request_blkno;
17714	}
17715
17716	/*
17717	 * The following will log the buffer contents for the release driver
17718	 * if the SD_LOGMASK_DIAG bit of sd_level_mask is set, or the error
17719	 * level is set to verbose.
17720	 */
17721	sd_dump_memory(un, SD_LOG_IO, "Failed CDB",
17722	    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
17723	sd_dump_memory(un, SD_LOG_IO, "Sense Data",
17724	    (uchar_t *)sensep, SENSE_LENGTH, SD_LOG_HEX);
17725
17726	if (pfa_flag == FALSE) {
17727		/* This is normally only set for USCSI */
17728		if ((pktp->pkt_flags & FLAG_SILENT) != 0) {
17729			return;
17730		}
17731
17732		if ((SD_IS_BUFIO(xp) == TRUE) &&
17733		    (((sd_level_mask & SD_LOGMASK_DIAG) == 0) &&
17734		    (severity < sd_error_level))) {
17735			return;
17736		}
17737	}
17738
17739	/*
17740	 * Check for Sonoma Failover and keep a count of how many failed I/O's
17741	 */
17742	if ((SD_IS_LSI(un)) &&
17743	    (scsi_sense_key(sensep) == KEY_ILLEGAL_REQUEST) &&
17744	    (scsi_sense_asc(sensep) == 0x94) &&
17745	    (scsi_sense_ascq(sensep) == 0x01)) {
17746		un->un_sonoma_failure_count++;
17747		if (un->un_sonoma_failure_count > 1) {
17748			return;
17749		}
17750	}
17751
17752	scsi_vu_errmsg(SD_SCSI_DEVP(un), pktp, sd_label, severity,
17753	    request_blkno, err_blkno, scsi_cmds,
17754	    (struct scsi_extended_sense *)sensep,
17755	    un->un_additional_codes, NULL);
17756}
17757
17758/*
17759 *    Function: sd_sense_key_no_sense
17760 *
17761 * Description: Recovery action when sense data was not received.
17762 *
17763 *     Context: May be called from interrupt context
17764 */
17765
17766static void
17767sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
17768	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17769{
17770	struct sd_sense_info	si;
17771
17772	ASSERT(un != NULL);
17773	ASSERT(mutex_owned(SD_MUTEX(un)));
17774	ASSERT(bp != NULL);
17775	ASSERT(xp != NULL);
17776	ASSERT(pktp != NULL);
17777
17778	si.ssi_severity = SCSI_ERR_FATAL;
17779	si.ssi_pfa_flag = FALSE;
17780
17781	SD_UPDATE_ERRSTATS(un, sd_softerrs);
17782
17783	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17784		&si, EIO, (clock_t)0, NULL);
17785}
17786
17787
17788/*
17789 *    Function: sd_sense_key_recoverable_error
17790 *
17791 * Description: Recovery actions for a SCSI "Recovered Error" sense key.
17792 *
17793 *     Context: May be called from interrupt context
17794 */
17795
17796static void
17797sd_sense_key_recoverable_error(struct sd_lun *un,
17798	uint8_t *sense_datap,
17799	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17800{
17801	struct sd_sense_info	si;
17802	uint8_t asc = scsi_sense_asc(sense_datap);
17803
17804	ASSERT(un != NULL);
17805	ASSERT(mutex_owned(SD_MUTEX(un)));
17806	ASSERT(bp != NULL);
17807	ASSERT(xp != NULL);
17808	ASSERT(pktp != NULL);
17809
17810	/*
17811	 * 0x5D: FAILURE PREDICTION THRESHOLD EXCEEDED
17812	 */
17813	if ((asc == 0x5D) && (sd_report_pfa != 0)) {
17814		SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
17815		si.ssi_severity = SCSI_ERR_INFO;
17816		si.ssi_pfa_flag = TRUE;
17817	} else {
17818		SD_UPDATE_ERRSTATS(un, sd_softerrs);
17819		SD_UPDATE_ERRSTATS(un, sd_rq_recov_err);
17820		si.ssi_severity = SCSI_ERR_RECOVERED;
17821		si.ssi_pfa_flag = FALSE;
17822	}
17823
17824	if (pktp->pkt_resid == 0) {
17825		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17826		sd_return_command(un, bp);
17827		return;
17828	}
17829
17830	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17831	    &si, EIO, (clock_t)0, NULL);
17832}
17833
17834
17835
17836
17837/*
17838 *    Function: sd_sense_key_not_ready
17839 *
17840 * Description: Recovery actions for a SCSI "Not Ready" sense key.
17841 *
17842 *     Context: May be called from interrupt context
17843 */
17844
17845static void
17846sd_sense_key_not_ready(struct sd_lun *un,
17847	uint8_t *sense_datap,
17848	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17849{
17850	struct sd_sense_info	si;
17851	uint8_t asc = scsi_sense_asc(sense_datap);
17852	uint8_t ascq = scsi_sense_ascq(sense_datap);
17853
17854	ASSERT(un != NULL);
17855	ASSERT(mutex_owned(SD_MUTEX(un)));
17856	ASSERT(bp != NULL);
17857	ASSERT(xp != NULL);
17858	ASSERT(pktp != NULL);
17859
17860	si.ssi_severity = SCSI_ERR_FATAL;
17861	si.ssi_pfa_flag = FALSE;
17862
17863	/*
17864	 * Update error stats after first NOT READY error. Disks may have
17865	 * been powered down and may need to be restarted.  For CDROMs,
17866	 * report NOT READY errors only if media is present.
17867	 */
17868	if ((ISCD(un) && (un->un_f_geometry_is_valid == TRUE)) ||
17869	    (xp->xb_retry_count > 0)) {
17870		SD_UPDATE_ERRSTATS(un, sd_harderrs);
17871		SD_UPDATE_ERRSTATS(un, sd_rq_ntrdy_err);
17872	}
17873
17874	/*
17875	 * Just fail if the "not ready" retry limit has been reached.
17876	 */
17877	if (xp->xb_retry_count >= un->un_notready_retry_count) {
17878		/* Special check for error message printing for removables. */
17879		if (un->un_f_has_removable_media && (asc == 0x04) &&
17880		    (ascq >= 0x04)) {
17881			si.ssi_severity = SCSI_ERR_ALL;
17882		}
17883		goto fail_command;
17884	}
17885
17886	/*
17887	 * Check the ASC and ASCQ in the sense data as needed, to determine
17888	 * what to do.
17889	 */
17890	switch (asc) {
17891	case 0x04:	/* LOGICAL UNIT NOT READY */
17892		/*
17893		 * disk drives that don't spin up result in a very long delay
17894		 * in format without warning messages. We will log a message
17895		 * if the error level is set to verbose.
17896		 */
17897		if (sd_error_level < SCSI_ERR_RETRYABLE) {
17898			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17899			    "logical unit not ready, resetting disk\n");
17900		}
17901
17902		/*
17903		 * There are different requirements for CDROMs and disks for
17904		 * the number of retries.  If a CD-ROM is giving this, it is
17905		 * probably reading TOC and is in the process of getting
17906		 * ready, so we should keep on trying for a long time to make
17907		 * sure that all types of media are taken in account (for
17908		 * some media the drive takes a long time to read TOC).  For
17909		 * disks we do not want to retry this too many times as this
17910		 * can cause a long hang in format when the drive refuses to
17911		 * spin up (a very common failure).
17912		 */
17913		switch (ascq) {
17914		case 0x00:  /* LUN NOT READY, CAUSE NOT REPORTABLE */
17915			/*
17916			 * Disk drives frequently refuse to spin up which
17917			 * results in a very long hang in format without
17918			 * warning messages.
17919			 *
17920			 * Note: This code preserves the legacy behavior of
17921			 * comparing xb_retry_count against zero for fibre
17922			 * channel targets instead of comparing against the
17923			 * un_reset_retry_count value.  The reason for this
17924			 * discrepancy has been so utterly lost beneath the
17925			 * Sands of Time that even Indiana Jones could not
17926			 * find it.
17927			 */
17928			if (un->un_f_is_fibre == TRUE) {
17929				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
17930					(xp->xb_retry_count > 0)) &&
17931					(un->un_startstop_timeid == NULL)) {
17932					scsi_log(SD_DEVINFO(un), sd_label,
17933					CE_WARN, "logical unit not ready, "
17934					"resetting disk\n");
17935					sd_reset_target(un, pktp);
17936				}
17937			} else {
17938				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
17939					(xp->xb_retry_count >
17940					un->un_reset_retry_count)) &&
17941					(un->un_startstop_timeid == NULL)) {
17942					scsi_log(SD_DEVINFO(un), sd_label,
17943					CE_WARN, "logical unit not ready, "
17944					"resetting disk\n");
17945					sd_reset_target(un, pktp);
17946				}
17947			}
17948			break;
17949
17950		case 0x01:  /* LUN IS IN PROCESS OF BECOMING READY */
17951			/*
17952			 * If the target is in the process of becoming
17953			 * ready, just proceed with the retry. This can
17954			 * happen with CD-ROMs that take a long time to
17955			 * read TOC after a power cycle or reset.
17956			 */
17957			goto do_retry;
17958
17959		case 0x02:  /* LUN NOT READY, INITITIALIZING CMD REQUIRED */
17960			break;
17961
17962		case 0x03:  /* LUN NOT READY, MANUAL INTERVENTION REQUIRED */
17963			/*
17964			 * Retries cannot help here so just fail right away.
17965			 */
17966			goto fail_command;
17967
17968		case 0x88:
17969			/*
17970			 * Vendor-unique code for T3/T4: it indicates a
17971			 * path problem in a mutipathed config, but as far as
17972			 * the target driver is concerned it equates to a fatal
17973			 * error, so we should just fail the command right away
17974			 * (without printing anything to the console). If this
17975			 * is not a T3/T4, fall thru to the default recovery
17976			 * action.
17977			 * T3/T4 is FC only, don't need to check is_fibre
17978			 */
17979			if (SD_IS_T3(un) || SD_IS_T4(un)) {
17980				sd_return_failed_command(un, bp, EIO);
17981				return;
17982			}
17983			/* FALLTHRU */
17984
17985		case 0x04:  /* LUN NOT READY, FORMAT IN PROGRESS */
17986		case 0x05:  /* LUN NOT READY, REBUILD IN PROGRESS */
17987		case 0x06:  /* LUN NOT READY, RECALCULATION IN PROGRESS */
17988		case 0x07:  /* LUN NOT READY, OPERATION IN PROGRESS */
17989		case 0x08:  /* LUN NOT READY, LONG WRITE IN PROGRESS */
17990		default:    /* Possible future codes in SCSI spec? */
17991			/*
17992			 * For removable-media devices, do not retry if
17993			 * ASCQ > 2 as these result mostly from USCSI commands
17994			 * on MMC devices issued to check status of an
17995			 * operation initiated in immediate mode.  Also for
17996			 * ASCQ >= 4 do not print console messages as these
17997			 * mainly represent a user-initiated operation
17998			 * instead of a system failure.
17999			 */
18000			if (un->un_f_has_removable_media) {
18001				si.ssi_severity = SCSI_ERR_ALL;
18002				goto fail_command;
18003			}
18004			break;
18005		}
18006
18007		/*
18008		 * As part of our recovery attempt for the NOT READY
18009		 * condition, we issue a START STOP UNIT command. However
18010		 * we want to wait for a short delay before attempting this
18011		 * as there may still be more commands coming back from the
18012		 * target with the check condition. To do this we use
18013		 * timeout(9F) to call sd_start_stop_unit_callback() after
18014		 * the delay interval expires. (sd_start_stop_unit_callback()
18015		 * dispatches sd_start_stop_unit_task(), which will issue
18016		 * the actual START STOP UNIT command. The delay interval
18017		 * is one-half of the delay that we will use to retry the
18018		 * command that generated the NOT READY condition.
18019		 *
18020		 * Note that we could just dispatch sd_start_stop_unit_task()
18021		 * from here and allow it to sleep for the delay interval,
18022		 * but then we would be tying up the taskq thread
18023		 * uncesessarily for the duration of the delay.
18024		 *
18025		 * Do not issue the START STOP UNIT if the current command
18026		 * is already a START STOP UNIT.
18027		 */
18028		if (pktp->pkt_cdbp[0] == SCMD_START_STOP) {
18029			break;
18030		}
18031
18032		/*
18033		 * Do not schedule the timeout if one is already pending.
18034		 */
18035		if (un->un_startstop_timeid != NULL) {
18036			SD_INFO(SD_LOG_ERROR, un,
18037			    "sd_sense_key_not_ready: restart already issued to"
18038			    " %s%d\n", ddi_driver_name(SD_DEVINFO(un)),
18039			    ddi_get_instance(SD_DEVINFO(un)));
18040			break;
18041		}
18042
18043		/*
18044		 * Schedule the START STOP UNIT command, then queue the command
18045		 * for a retry.
18046		 *
18047		 * Note: A timeout is not scheduled for this retry because we
18048		 * want the retry to be serial with the START_STOP_UNIT. The
18049		 * retry will be started when the START_STOP_UNIT is completed
18050		 * in sd_start_stop_unit_task.
18051		 */
18052		un->un_startstop_timeid = timeout(sd_start_stop_unit_callback,
18053		    un, SD_BSY_TIMEOUT / 2);
18054		xp->xb_retry_count++;
18055		sd_set_retry_bp(un, bp, 0, kstat_waitq_enter);
18056		return;
18057
18058	case 0x05:	/* LOGICAL UNIT DOES NOT RESPOND TO SELECTION */
18059		if (sd_error_level < SCSI_ERR_RETRYABLE) {
18060			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18061			    "unit does not respond to selection\n");
18062		}
18063		break;
18064
18065	case 0x3A:	/* MEDIUM NOT PRESENT */
18066		if (sd_error_level >= SCSI_ERR_FATAL) {
18067			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18068			    "Caddy not inserted in drive\n");
18069		}
18070
18071		sr_ejected(un);
18072		un->un_mediastate = DKIO_EJECTED;
18073		/* The state has changed, inform the media watch routines */
18074		cv_broadcast(&un->un_state_cv);
18075		/* Just fail if no media is present in the drive. */
18076		goto fail_command;
18077
18078	default:
18079		if (sd_error_level < SCSI_ERR_RETRYABLE) {
18080			scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
18081			    "Unit not Ready. Additional sense code 0x%x\n",
18082			    asc);
18083		}
18084		break;
18085	}
18086
18087do_retry:
18088
18089	/*
18090	 * Retry the command, as some targets may report NOT READY for
18091	 * several seconds after being reset.
18092	 */
18093	xp->xb_retry_count++;
18094	si.ssi_severity = SCSI_ERR_RETRYABLE;
18095	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
18096	    &si, EIO, SD_BSY_TIMEOUT, NULL);
18097
18098	return;
18099
18100fail_command:
18101	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18102	sd_return_failed_command(un, bp, EIO);
18103}
18104
18105
18106
18107/*
18108 *    Function: sd_sense_key_medium_or_hardware_error
18109 *
18110 * Description: Recovery actions for a SCSI "Medium Error" or "Hardware Error"
18111 *		sense key.
18112 *
18113 *     Context: May be called from interrupt context
18114 */
18115
18116static void
18117sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
18118	uint8_t *sense_datap,
18119	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
18120{
18121	struct sd_sense_info	si;
18122	uint8_t sense_key = scsi_sense_key(sense_datap);
18123	uint8_t asc = scsi_sense_asc(sense_datap);
18124
18125	ASSERT(un != NULL);
18126	ASSERT(mutex_owned(SD_MUTEX(un)));
18127	ASSERT(bp != NULL);
18128	ASSERT(xp != NULL);
18129	ASSERT(pktp != NULL);
18130
18131	si.ssi_severity = SCSI_ERR_FATAL;
18132	si.ssi_pfa_flag = FALSE;
18133
18134	if (sense_key == KEY_MEDIUM_ERROR) {
18135		SD_UPDATE_ERRSTATS(un, sd_rq_media_err);
18136	}
18137
18138	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18139
18140	if ((un->un_reset_retry_count != 0) &&
18141	    (xp->xb_retry_count == un->un_reset_retry_count)) {
18142		mutex_exit(SD_MUTEX(un));
18143		/* Do NOT do a RESET_ALL here: too intrusive. (4112858) */
18144		if (un->un_f_allow_bus_device_reset == TRUE) {
18145
18146			boolean_t try_resetting_target = B_TRUE;
18147
18148			/*
18149			 * We need to be able to handle specific ASC when we are
18150			 * handling a KEY_HARDWARE_ERROR. In particular
18151			 * taking the default action of resetting the target may
18152			 * not be the appropriate way to attempt recovery.
18153			 * Resetting a target because of a single LUN failure
18154			 * victimizes all LUNs on that target.
18155			 *
18156			 * This is true for the LSI arrays, if an LSI
18157			 * array controller returns an ASC of 0x84 (LUN Dead) we
18158			 * should trust it.
18159			 */
18160
18161			if (sense_key == KEY_HARDWARE_ERROR) {
18162				switch (asc) {
18163				case 0x84:
18164					if (SD_IS_LSI(un)) {
18165						try_resetting_target = B_FALSE;
18166					}
18167					break;
18168				default:
18169					break;
18170				}
18171			}
18172
18173			if (try_resetting_target == B_TRUE) {
18174				int reset_retval = 0;
18175				if (un->un_f_lun_reset_enabled == TRUE) {
18176					SD_TRACE(SD_LOG_IO_CORE, un,
18177					    "sd_sense_key_medium_or_hardware_"
18178					    "error: issuing RESET_LUN\n");
18179					reset_retval =
18180					    scsi_reset(SD_ADDRESS(un),
18181					    RESET_LUN);
18182				}
18183				if (reset_retval == 0) {
18184					SD_TRACE(SD_LOG_IO_CORE, un,
18185					    "sd_sense_key_medium_or_hardware_"
18186					    "error: issuing RESET_TARGET\n");
18187					(void) scsi_reset(SD_ADDRESS(un),
18188					    RESET_TARGET);
18189				}
18190			}
18191		}
18192		mutex_enter(SD_MUTEX(un));
18193	}
18194
18195	/*
18196	 * This really ought to be a fatal error, but we will retry anyway
18197	 * as some drives report this as a spurious error.
18198	 */
18199	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
18200	    &si, EIO, (clock_t)0, NULL);
18201}
18202
18203
18204
18205/*
18206 *    Function: sd_sense_key_illegal_request
18207 *
18208 * Description: Recovery actions for a SCSI "Illegal Request" sense key.
18209 *
18210 *     Context: May be called from interrupt context
18211 */
18212
18213static void
18214sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
18215	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18216{
18217	struct sd_sense_info	si;
18218
18219	ASSERT(un != NULL);
18220	ASSERT(mutex_owned(SD_MUTEX(un)));
18221	ASSERT(bp != NULL);
18222	ASSERT(xp != NULL);
18223	ASSERT(pktp != NULL);
18224
18225	SD_UPDATE_ERRSTATS(un, sd_softerrs);
18226	SD_UPDATE_ERRSTATS(un, sd_rq_illrq_err);
18227
18228	si.ssi_severity = SCSI_ERR_INFO;
18229	si.ssi_pfa_flag = FALSE;
18230
18231	/* Pointless to retry if the target thinks it's an illegal request */
18232	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18233	sd_return_failed_command(un, bp, EIO);
18234}
18235
18236
18237
18238
18239/*
18240 *    Function: sd_sense_key_unit_attention
18241 *
18242 * Description: Recovery actions for a SCSI "Unit Attention" sense key.
18243 *
18244 *     Context: May be called from interrupt context
18245 */
18246
18247static void
18248sd_sense_key_unit_attention(struct sd_lun *un,
18249	uint8_t *sense_datap,
18250	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
18251{
18252	/*
18253	 * For UNIT ATTENTION we allow retries for one minute. Devices
18254	 * like Sonoma can return UNIT ATTENTION close to a minute
18255	 * under certain conditions.
18256	 */
18257	int	retry_check_flag = SD_RETRIES_UA;
18258	boolean_t	kstat_updated = B_FALSE;
18259	struct	sd_sense_info		si;
18260	uint8_t asc = scsi_sense_asc(sense_datap);
18261
18262	ASSERT(un != NULL);
18263	ASSERT(mutex_owned(SD_MUTEX(un)));
18264	ASSERT(bp != NULL);
18265	ASSERT(xp != NULL);
18266	ASSERT(pktp != NULL);
18267
18268	si.ssi_severity = SCSI_ERR_INFO;
18269	si.ssi_pfa_flag = FALSE;
18270
18271
18272	switch (asc) {
18273	case 0x5D:  /* FAILURE PREDICTION THRESHOLD EXCEEDED */
18274		if (sd_report_pfa != 0) {
18275			SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
18276			si.ssi_pfa_flag = TRUE;
18277			retry_check_flag = SD_RETRIES_STANDARD;
18278			goto do_retry;
18279		}
18280
18281		break;
18282
18283	case 0x29:  /* POWER ON, RESET, OR BUS DEVICE RESET OCCURRED */
18284		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
18285			un->un_resvd_status |=
18286			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
18287		}
18288#ifdef _LP64
18289		if (un->un_blockcount + 1 > SD_GROUP1_MAX_ADDRESS) {
18290			if (taskq_dispatch(sd_tq, sd_reenable_dsense_task,
18291			    un, KM_NOSLEEP) == 0) {
18292				/*
18293				 * If we can't dispatch the task we'll just
18294				 * live without descriptor sense.  We can
18295				 * try again on the next "unit attention"
18296				 */
18297				SD_ERROR(SD_LOG_ERROR, un,
18298				    "sd_sense_key_unit_attention: "
18299				    "Could not dispatch "
18300				    "sd_reenable_dsense_task\n");
18301			}
18302		}
18303#endif /* _LP64 */
18304		/* FALLTHRU */
18305
18306	case 0x28: /* NOT READY TO READY CHANGE, MEDIUM MAY HAVE CHANGED */
18307		if (!un->un_f_has_removable_media) {
18308			break;
18309		}
18310
18311		/*
18312		 * When we get a unit attention from a removable-media device,
18313		 * it may be in a state that will take a long time to recover
18314		 * (e.g., from a reset).  Since we are executing in interrupt
18315		 * context here, we cannot wait around for the device to come
18316		 * back. So hand this command off to sd_media_change_task()
18317		 * for deferred processing under taskq thread context. (Note
18318		 * that the command still may be failed if a problem is
18319		 * encountered at a later time.)
18320		 */
18321		if (taskq_dispatch(sd_tq, sd_media_change_task, pktp,
18322		    KM_NOSLEEP) == 0) {
18323			/*
18324			 * Cannot dispatch the request so fail the command.
18325			 */
18326			SD_UPDATE_ERRSTATS(un, sd_harderrs);
18327			SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
18328			si.ssi_severity = SCSI_ERR_FATAL;
18329			sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18330			sd_return_failed_command(un, bp, EIO);
18331		}
18332
18333		/*
18334		 * If failed to dispatch sd_media_change_task(), we already
18335		 * updated kstat. If succeed to dispatch sd_media_change_task(),
18336		 * we should update kstat later if it encounters an error. So,
18337		 * we update kstat_updated flag here.
18338		 */
18339		kstat_updated = B_TRUE;
18340
18341		/*
18342		 * Either the command has been successfully dispatched to a
18343		 * task Q for retrying, or the dispatch failed. In either case
18344		 * do NOT retry again by calling sd_retry_command. This sets up
18345		 * two retries of the same command and when one completes and
18346		 * frees the resources the other will access freed memory,
18347		 * a bad thing.
18348		 */
18349		return;
18350
18351	default:
18352		break;
18353	}
18354
18355	/*
18356	 * Update kstat if we haven't done that.
18357	 */
18358	if (!kstat_updated) {
18359		SD_UPDATE_ERRSTATS(un, sd_harderrs);
18360		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
18361	}
18362
18363do_retry:
18364	sd_retry_command(un, bp, retry_check_flag, sd_print_sense_msg, &si,
18365	    EIO, SD_UA_RETRY_DELAY, NULL);
18366}
18367
18368
18369
18370/*
18371 *    Function: sd_sense_key_fail_command
18372 *
18373 * Description: Use to fail a command when we don't like the sense key that
18374 *		was returned.
18375 *
18376 *     Context: May be called from interrupt context
18377 */
18378
18379static void
18380sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
18381	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18382{
18383	struct sd_sense_info	si;
18384
18385	ASSERT(un != NULL);
18386	ASSERT(mutex_owned(SD_MUTEX(un)));
18387	ASSERT(bp != NULL);
18388	ASSERT(xp != NULL);
18389	ASSERT(pktp != NULL);
18390
18391	si.ssi_severity = SCSI_ERR_FATAL;
18392	si.ssi_pfa_flag = FALSE;
18393
18394	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18395	sd_return_failed_command(un, bp, EIO);
18396}
18397
18398
18399
18400/*
18401 *    Function: sd_sense_key_blank_check
18402 *
18403 * Description: Recovery actions for a SCSI "Blank Check" sense key.
18404 *		Has no monetary connotation.
18405 *
18406 *     Context: May be called from interrupt context
18407 */
18408
18409static void
18410sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
18411	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18412{
18413	struct sd_sense_info	si;
18414
18415	ASSERT(un != NULL);
18416	ASSERT(mutex_owned(SD_MUTEX(un)));
18417	ASSERT(bp != NULL);
18418	ASSERT(xp != NULL);
18419	ASSERT(pktp != NULL);
18420
18421	/*
18422	 * Blank check is not fatal for removable devices, therefore
18423	 * it does not require a console message.
18424	 */
18425	si.ssi_severity = (un->un_f_has_removable_media) ? SCSI_ERR_ALL :
18426	    SCSI_ERR_FATAL;
18427	si.ssi_pfa_flag = FALSE;
18428
18429	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18430	sd_return_failed_command(un, bp, EIO);
18431}
18432
18433
18434
18435
18436/*
18437 *    Function: sd_sense_key_aborted_command
18438 *
18439 * Description: Recovery actions for a SCSI "Aborted Command" sense key.
18440 *
18441 *     Context: May be called from interrupt context
18442 */
18443
18444static void
18445sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
18446	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18447{
18448	struct sd_sense_info	si;
18449
18450	ASSERT(un != NULL);
18451	ASSERT(mutex_owned(SD_MUTEX(un)));
18452	ASSERT(bp != NULL);
18453	ASSERT(xp != NULL);
18454	ASSERT(pktp != NULL);
18455
18456	si.ssi_severity = SCSI_ERR_FATAL;
18457	si.ssi_pfa_flag = FALSE;
18458
18459	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18460
18461	/*
18462	 * This really ought to be a fatal error, but we will retry anyway
18463	 * as some drives report this as a spurious error.
18464	 */
18465	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
18466	    &si, EIO, (clock_t)0, NULL);
18467}
18468
18469
18470
18471/*
18472 *    Function: sd_sense_key_default
18473 *
18474 * Description: Default recovery action for several SCSI sense keys (basically
18475 *		attempts a retry).
18476 *
18477 *     Context: May be called from interrupt context
18478 */
18479
18480static void
18481sd_sense_key_default(struct sd_lun *un,
18482	uint8_t *sense_datap,
18483	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
18484{
18485	struct sd_sense_info	si;
18486	uint8_t sense_key = scsi_sense_key(sense_datap);
18487
18488	ASSERT(un != NULL);
18489	ASSERT(mutex_owned(SD_MUTEX(un)));
18490	ASSERT(bp != NULL);
18491	ASSERT(xp != NULL);
18492	ASSERT(pktp != NULL);
18493
18494	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18495
18496	/*
18497	 * Undecoded sense key.	Attempt retries and hope that will fix
18498	 * the problem.  Otherwise, we're dead.
18499	 */
18500	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
18501		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18502		    "Unhandled Sense Key '%s'\n", sense_keys[sense_key]);
18503	}
18504
18505	si.ssi_severity = SCSI_ERR_FATAL;
18506	si.ssi_pfa_flag = FALSE;
18507
18508	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
18509	    &si, EIO, (clock_t)0, NULL);
18510}
18511
18512
18513
18514/*
18515 *    Function: sd_print_retry_msg
18516 *
18517 * Description: Print a message indicating the retry action being taken.
18518 *
18519 *   Arguments: un - ptr to associated softstate
18520 *		bp - ptr to buf(9S) for the command
18521 *		arg - not used.
18522 *		flag - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
18523 *			or SD_NO_RETRY_ISSUED
18524 *
18525 *     Context: May be called from interrupt context
18526 */
18527/* ARGSUSED */
18528static void
18529sd_print_retry_msg(struct sd_lun *un, struct buf *bp, void *arg, int flag)
18530{
18531	struct sd_xbuf	*xp;
18532	struct scsi_pkt *pktp;
18533	char *reasonp;
18534	char *msgp;
18535
18536	ASSERT(un != NULL);
18537	ASSERT(mutex_owned(SD_MUTEX(un)));
18538	ASSERT(bp != NULL);
18539	pktp = SD_GET_PKTP(bp);
18540	ASSERT(pktp != NULL);
18541	xp = SD_GET_XBUF(bp);
18542	ASSERT(xp != NULL);
18543
18544	ASSERT(!mutex_owned(&un->un_pm_mutex));
18545	mutex_enter(&un->un_pm_mutex);
18546	if ((un->un_state == SD_STATE_SUSPENDED) ||
18547	    (SD_DEVICE_IS_IN_LOW_POWER(un)) ||
18548	    (pktp->pkt_flags & FLAG_SILENT)) {
18549		mutex_exit(&un->un_pm_mutex);
18550		goto update_pkt_reason;
18551	}
18552	mutex_exit(&un->un_pm_mutex);
18553
18554	/*
18555	 * Suppress messages if they are all the same pkt_reason; with
18556	 * TQ, many (up to 256) are returned with the same pkt_reason.
18557	 * If we are in panic, then suppress the retry messages.
18558	 */
18559	switch (flag) {
18560	case SD_NO_RETRY_ISSUED:
18561		msgp = "giving up";
18562		break;
18563	case SD_IMMEDIATE_RETRY_ISSUED:
18564	case SD_DELAYED_RETRY_ISSUED:
18565		if (ddi_in_panic() || (un->un_state == SD_STATE_OFFLINE) ||
18566		    ((pktp->pkt_reason == un->un_last_pkt_reason) &&
18567		    (sd_error_level != SCSI_ERR_ALL))) {
18568			return;
18569		}
18570		msgp = "retrying command";
18571		break;
18572	default:
18573		goto update_pkt_reason;
18574	}
18575
18576	reasonp = (((pktp->pkt_statistics & STAT_PERR) != 0) ? "parity error" :
18577	    scsi_rname(pktp->pkt_reason));
18578
18579	scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18580	    "SCSI transport failed: reason '%s': %s\n", reasonp, msgp);
18581
18582update_pkt_reason:
18583	/*
18584	 * Update un->un_last_pkt_reason with the value in pktp->pkt_reason.
18585	 * This is to prevent multiple console messages for the same failure
18586	 * condition.  Note that un->un_last_pkt_reason is NOT restored if &
18587	 * when the command is retried successfully because there still may be
18588	 * more commands coming back with the same value of pktp->pkt_reason.
18589	 */
18590	if ((pktp->pkt_reason != CMD_CMPLT) || (xp->xb_retry_count == 0)) {
18591		un->un_last_pkt_reason = pktp->pkt_reason;
18592	}
18593}
18594
18595
18596/*
18597 *    Function: sd_print_cmd_incomplete_msg
18598 *
18599 * Description: Message logging fn. for a SCSA "CMD_INCOMPLETE" pkt_reason.
18600 *
18601 *   Arguments: un - ptr to associated softstate
18602 *		bp - ptr to buf(9S) for the command
18603 *		arg - passed to sd_print_retry_msg()
18604 *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
18605 *			or SD_NO_RETRY_ISSUED
18606 *
18607 *     Context: May be called from interrupt context
18608 */
18609
18610static void
18611sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg,
18612	int code)
18613{
18614	dev_info_t	*dip;
18615
18616	ASSERT(un != NULL);
18617	ASSERT(mutex_owned(SD_MUTEX(un)));
18618	ASSERT(bp != NULL);
18619
18620	switch (code) {
18621	case SD_NO_RETRY_ISSUED:
18622		/* Command was failed. Someone turned off this target? */
18623		if (un->un_state != SD_STATE_OFFLINE) {
18624			/*
18625			 * Suppress message if we are detaching and
18626			 * device has been disconnected
18627			 * Note that DEVI_IS_DEVICE_REMOVED is a consolidation
18628			 * private interface and not part of the DDI
18629			 */
18630			dip = un->un_sd->sd_dev;
18631			if (!(DEVI_IS_DETACHING(dip) &&
18632			    DEVI_IS_DEVICE_REMOVED(dip))) {
18633				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18634				"disk not responding to selection\n");
18635			}
18636			New_state(un, SD_STATE_OFFLINE);
18637		}
18638		break;
18639
18640	case SD_DELAYED_RETRY_ISSUED:
18641	case SD_IMMEDIATE_RETRY_ISSUED:
18642	default:
18643		/* Command was successfully queued for retry */
18644		sd_print_retry_msg(un, bp, arg, code);
18645		break;
18646	}
18647}
18648
18649
18650/*
18651 *    Function: sd_pkt_reason_cmd_incomplete
18652 *
18653 * Description: Recovery actions for a SCSA "CMD_INCOMPLETE" pkt_reason.
18654 *
18655 *     Context: May be called from interrupt context
18656 */
18657
18658static void
18659sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
18660	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18661{
18662	int flag = SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE;
18663
18664	ASSERT(un != NULL);
18665	ASSERT(mutex_owned(SD_MUTEX(un)));
18666	ASSERT(bp != NULL);
18667	ASSERT(xp != NULL);
18668	ASSERT(pktp != NULL);
18669
18670	/* Do not do a reset if selection did not complete */
18671	/* Note: Should this not just check the bit? */
18672	if (pktp->pkt_state != STATE_GOT_BUS) {
18673		SD_UPDATE_ERRSTATS(un, sd_transerrs);
18674		sd_reset_target(un, pktp);
18675	}
18676
18677	/*
18678	 * If the target was not successfully selected, then set
18679	 * SD_RETRIES_FAILFAST to indicate that we lost communication
18680	 * with the target, and further retries and/or commands are
18681	 * likely to take a long time.
18682	 */
18683	if ((pktp->pkt_state & STATE_GOT_TARGET) == 0) {
18684		flag |= SD_RETRIES_FAILFAST;
18685	}
18686
18687	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18688
18689	sd_retry_command(un, bp, flag,
18690	    sd_print_cmd_incomplete_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18691}
18692
18693
18694
18695/*
18696 *    Function: sd_pkt_reason_cmd_tran_err
18697 *
18698 * Description: Recovery actions for a SCSA "CMD_TRAN_ERR" pkt_reason.
18699 *
18700 *     Context: May be called from interrupt context
18701 */
18702
18703static void
18704sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
18705	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18706{
18707	ASSERT(un != NULL);
18708	ASSERT(mutex_owned(SD_MUTEX(un)));
18709	ASSERT(bp != NULL);
18710	ASSERT(xp != NULL);
18711	ASSERT(pktp != NULL);
18712
18713	/*
18714	 * Do not reset if we got a parity error, or if
18715	 * selection did not complete.
18716	 */
18717	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18718	/* Note: Should this not just check the bit for pkt_state? */
18719	if (((pktp->pkt_statistics & STAT_PERR) == 0) &&
18720	    (pktp->pkt_state != STATE_GOT_BUS)) {
18721		SD_UPDATE_ERRSTATS(un, sd_transerrs);
18722		sd_reset_target(un, pktp);
18723	}
18724
18725	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18726
18727	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18728	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18729}
18730
18731
18732
18733/*
18734 *    Function: sd_pkt_reason_cmd_reset
18735 *
18736 * Description: Recovery actions for a SCSA "CMD_RESET" pkt_reason.
18737 *
18738 *     Context: May be called from interrupt context
18739 */
18740
18741static void
18742sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
18743	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18744{
18745	ASSERT(un != NULL);
18746	ASSERT(mutex_owned(SD_MUTEX(un)));
18747	ASSERT(bp != NULL);
18748	ASSERT(xp != NULL);
18749	ASSERT(pktp != NULL);
18750
18751	/* The target may still be running the command, so try to reset. */
18752	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18753	sd_reset_target(un, pktp);
18754
18755	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18756
18757	/*
18758	 * If pkt_reason is CMD_RESET chances are that this pkt got
18759	 * reset because another target on this bus caused it. The target
18760	 * that caused it should get CMD_TIMEOUT with pkt_statistics
18761	 * of STAT_TIMEOUT/STAT_DEV_RESET.
18762	 */
18763
18764	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
18765	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18766}
18767
18768
18769
18770
18771/*
18772 *    Function: sd_pkt_reason_cmd_aborted
18773 *
18774 * Description: Recovery actions for a SCSA "CMD_ABORTED" pkt_reason.
18775 *
18776 *     Context: May be called from interrupt context
18777 */
18778
18779static void
18780sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
18781	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18782{
18783	ASSERT(un != NULL);
18784	ASSERT(mutex_owned(SD_MUTEX(un)));
18785	ASSERT(bp != NULL);
18786	ASSERT(xp != NULL);
18787	ASSERT(pktp != NULL);
18788
18789	/* The target may still be running the command, so try to reset. */
18790	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18791	sd_reset_target(un, pktp);
18792
18793	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18794
18795	/*
18796	 * If pkt_reason is CMD_ABORTED chances are that this pkt got
18797	 * aborted because another target on this bus caused it. The target
18798	 * that caused it should get CMD_TIMEOUT with pkt_statistics
18799	 * of STAT_TIMEOUT/STAT_DEV_RESET.
18800	 */
18801
18802	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
18803	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18804}
18805
18806
18807
18808/*
18809 *    Function: sd_pkt_reason_cmd_timeout
18810 *
18811 * Description: Recovery actions for a SCSA "CMD_TIMEOUT" pkt_reason.
18812 *
18813 *     Context: May be called from interrupt context
18814 */
18815
18816static void
18817sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
18818	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18819{
18820	ASSERT(un != NULL);
18821	ASSERT(mutex_owned(SD_MUTEX(un)));
18822	ASSERT(bp != NULL);
18823	ASSERT(xp != NULL);
18824	ASSERT(pktp != NULL);
18825
18826
18827	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18828	sd_reset_target(un, pktp);
18829
18830	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18831
18832	/*
18833	 * A command timeout indicates that we could not establish
18834	 * communication with the target, so set SD_RETRIES_FAILFAST
18835	 * as further retries/commands are likely to take a long time.
18836	 */
18837	sd_retry_command(un, bp,
18838	    (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE | SD_RETRIES_FAILFAST),
18839	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18840}
18841
18842
18843
18844/*
18845 *    Function: sd_pkt_reason_cmd_unx_bus_free
18846 *
18847 * Description: Recovery actions for a SCSA "CMD_UNX_BUS_FREE" pkt_reason.
18848 *
18849 *     Context: May be called from interrupt context
18850 */
18851
18852static void
18853sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
18854	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18855{
18856	void (*funcp)(struct sd_lun *un, struct buf *bp, void *arg, int code);
18857
18858	ASSERT(un != NULL);
18859	ASSERT(mutex_owned(SD_MUTEX(un)));
18860	ASSERT(bp != NULL);
18861	ASSERT(xp != NULL);
18862	ASSERT(pktp != NULL);
18863
18864	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18865	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18866
18867	funcp = ((pktp->pkt_statistics & STAT_PERR) == 0) ?
18868	    sd_print_retry_msg : NULL;
18869
18870	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18871	    funcp, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18872}
18873
18874
18875/*
18876 *    Function: sd_pkt_reason_cmd_tag_reject
18877 *
18878 * Description: Recovery actions for a SCSA "CMD_TAG_REJECT" pkt_reason.
18879 *
18880 *     Context: May be called from interrupt context
18881 */
18882
18883static void
18884sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
18885	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18886{
18887	ASSERT(un != NULL);
18888	ASSERT(mutex_owned(SD_MUTEX(un)));
18889	ASSERT(bp != NULL);
18890	ASSERT(xp != NULL);
18891	ASSERT(pktp != NULL);
18892
18893	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18894	pktp->pkt_flags = 0;
18895	un->un_tagflags = 0;
18896	if (un->un_f_opt_queueing == TRUE) {
18897		un->un_throttle = min(un->un_throttle, 3);
18898	} else {
18899		un->un_throttle = 1;
18900	}
18901	mutex_exit(SD_MUTEX(un));
18902	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
18903	mutex_enter(SD_MUTEX(un));
18904
18905	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18906
18907	/* Legacy behavior not to check retry counts here. */
18908	sd_retry_command(un, bp, (SD_RETRIES_NOCHECK | SD_RETRIES_ISOLATE),
18909	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18910}
18911
18912
18913/*
18914 *    Function: sd_pkt_reason_default
18915 *
18916 * Description: Default recovery actions for SCSA pkt_reason values that
18917 *		do not have more explicit recovery actions.
18918 *
18919 *     Context: May be called from interrupt context
18920 */
18921
18922static void
18923sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
18924	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18925{
18926	ASSERT(un != NULL);
18927	ASSERT(mutex_owned(SD_MUTEX(un)));
18928	ASSERT(bp != NULL);
18929	ASSERT(xp != NULL);
18930	ASSERT(pktp != NULL);
18931
18932	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18933	sd_reset_target(un, pktp);
18934
18935	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18936
18937	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18938	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18939}
18940
18941
18942
18943/*
18944 *    Function: sd_pkt_status_check_condition
18945 *
18946 * Description: Recovery actions for a "STATUS_CHECK" SCSI command status.
18947 *
18948 *     Context: May be called from interrupt context
18949 */
18950
18951static void
18952sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
18953	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18954{
18955	ASSERT(un != NULL);
18956	ASSERT(mutex_owned(SD_MUTEX(un)));
18957	ASSERT(bp != NULL);
18958	ASSERT(xp != NULL);
18959	ASSERT(pktp != NULL);
18960
18961	SD_TRACE(SD_LOG_IO, un, "sd_pkt_status_check_condition: "
18962	    "entry: buf:0x%p xp:0x%p\n", bp, xp);
18963
18964	/*
18965	 * If ARQ is NOT enabled, then issue a REQUEST SENSE command (the
18966	 * command will be retried after the request sense). Otherwise, retry
18967	 * the command. Note: we are issuing the request sense even though the
18968	 * retry limit may have been reached for the failed command.
18969	 */
18970	if (un->un_f_arq_enabled == FALSE) {
18971		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
18972		    "no ARQ, sending request sense command\n");
18973		sd_send_request_sense_command(un, bp, pktp);
18974	} else {
18975		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
18976		    "ARQ,retrying request sense command\n");
18977#if defined(__i386) || defined(__amd64)
18978		/*
18979		 * The SD_RETRY_DELAY value need to be adjusted here
18980		 * when SD_RETRY_DELAY change in sddef.h
18981		 */
18982		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
18983			un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0,
18984			NULL);
18985#else
18986		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL,
18987		    EIO, SD_RETRY_DELAY, NULL);
18988#endif
18989	}
18990
18991	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: exit\n");
18992}
18993
18994
18995/*
18996 *    Function: sd_pkt_status_busy
18997 *
18998 * Description: Recovery actions for a "STATUS_BUSY" SCSI command status.
18999 *
19000 *     Context: May be called from interrupt context
19001 */
19002
19003static void
19004sd_pkt_status_busy(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
19005	struct scsi_pkt *pktp)
19006{
19007	ASSERT(un != NULL);
19008	ASSERT(mutex_owned(SD_MUTEX(un)));
19009	ASSERT(bp != NULL);
19010	ASSERT(xp != NULL);
19011	ASSERT(pktp != NULL);
19012
19013	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19014	    "sd_pkt_status_busy: entry\n");
19015
19016	/* If retries are exhausted, just fail the command. */
19017	if (xp->xb_retry_count >= un->un_busy_retry_count) {
19018		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
19019		    "device busy too long\n");
19020		sd_return_failed_command(un, bp, EIO);
19021		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19022		    "sd_pkt_status_busy: exit\n");
19023		return;
19024	}
19025	xp->xb_retry_count++;
19026
19027	/*
19028	 * Try to reset the target. However, we do not want to perform
19029	 * more than one reset if the device continues to fail. The reset
19030	 * will be performed when the retry count reaches the reset
19031	 * threshold.  This threshold should be set such that at least
19032	 * one retry is issued before the reset is performed.
19033	 */
19034	if (xp->xb_retry_count ==
19035	    ((un->un_reset_retry_count < 2) ? 2 : un->un_reset_retry_count)) {
19036		int rval = 0;
19037		mutex_exit(SD_MUTEX(un));
19038		if (un->un_f_allow_bus_device_reset == TRUE) {
19039			/*
19040			 * First try to reset the LUN; if we cannot then
19041			 * try to reset the target.
19042			 */
19043			if (un->un_f_lun_reset_enabled == TRUE) {
19044				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19045				    "sd_pkt_status_busy: RESET_LUN\n");
19046				rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
19047			}
19048			if (rval == 0) {
19049				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19050				    "sd_pkt_status_busy: RESET_TARGET\n");
19051				rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
19052			}
19053		}
19054		if (rval == 0) {
19055			/*
19056			 * If the RESET_LUN and/or RESET_TARGET failed,
19057			 * try RESET_ALL
19058			 */
19059			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19060			    "sd_pkt_status_busy: RESET_ALL\n");
19061			rval = scsi_reset(SD_ADDRESS(un), RESET_ALL);
19062		}
19063		mutex_enter(SD_MUTEX(un));
19064		if (rval == 0) {
19065			/*
19066			 * The RESET_LUN, RESET_TARGET, and/or RESET_ALL failed.
19067			 * At this point we give up & fail the command.
19068			 */
19069			sd_return_failed_command(un, bp, EIO);
19070			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19071			    "sd_pkt_status_busy: exit (failed cmd)\n");
19072			return;
19073		}
19074	}
19075
19076	/*
19077	 * Retry the command. Be sure to specify SD_RETRIES_NOCHECK as
19078	 * we have already checked the retry counts above.
19079	 */
19080	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL,
19081	    EIO, SD_BSY_TIMEOUT, NULL);
19082
19083	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19084	    "sd_pkt_status_busy: exit\n");
19085}
19086
19087
19088/*
19089 *    Function: sd_pkt_status_reservation_conflict
19090 *
19091 * Description: Recovery actions for a "STATUS_RESERVATION_CONFLICT" SCSI
19092 *		command status.
19093 *
19094 *     Context: May be called from interrupt context
19095 */
19096
19097static void
19098sd_pkt_status_reservation_conflict(struct sd_lun *un, struct buf *bp,
19099	struct sd_xbuf *xp, struct scsi_pkt *pktp)
19100{
19101	ASSERT(un != NULL);
19102	ASSERT(mutex_owned(SD_MUTEX(un)));
19103	ASSERT(bp != NULL);
19104	ASSERT(xp != NULL);
19105	ASSERT(pktp != NULL);
19106
19107	/*
19108	 * If the command was PERSISTENT_RESERVATION_[IN|OUT] then reservation
19109	 * conflict could be due to various reasons like incorrect keys, not
19110	 * registered or not reserved etc. So, we return EACCES to the caller.
19111	 */
19112	if (un->un_reservation_type == SD_SCSI3_RESERVATION) {
19113		int cmd = SD_GET_PKT_OPCODE(pktp);
19114		if ((cmd == SCMD_PERSISTENT_RESERVE_IN) ||
19115		    (cmd == SCMD_PERSISTENT_RESERVE_OUT)) {
19116			sd_return_failed_command(un, bp, EACCES);
19117			return;
19118		}
19119	}
19120
19121	un->un_resvd_status |= SD_RESERVATION_CONFLICT;
19122
19123	if ((un->un_resvd_status & SD_FAILFAST) != 0) {
19124		if (sd_failfast_enable != 0) {
19125			/* By definition, we must panic here.... */
19126			sd_panic_for_res_conflict(un);
19127			/*NOTREACHED*/
19128		}
19129		SD_ERROR(SD_LOG_IO, un,
19130		    "sd_handle_resv_conflict: Disk Reserved\n");
19131		sd_return_failed_command(un, bp, EACCES);
19132		return;
19133	}
19134
19135	/*
19136	 * 1147670: retry only if sd_retry_on_reservation_conflict
19137	 * property is set (default is 1). Retries will not succeed
19138	 * on a disk reserved by another initiator. HA systems
19139	 * may reset this via sd.conf to avoid these retries.
19140	 *
19141	 * Note: The legacy return code for this failure is EIO, however EACCES
19142	 * seems more appropriate for a reservation conflict.
19143	 */
19144	if (sd_retry_on_reservation_conflict == 0) {
19145		SD_ERROR(SD_LOG_IO, un,
19146		    "sd_handle_resv_conflict: Device Reserved\n");
19147		sd_return_failed_command(un, bp, EIO);
19148		return;
19149	}
19150
19151	/*
19152	 * Retry the command if we can.
19153	 *
19154	 * Note: The legacy return code for this failure is EIO, however EACCES
19155	 * seems more appropriate for a reservation conflict.
19156	 */
19157	sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
19158	    (clock_t)2, NULL);
19159}
19160
19161
19162
19163/*
19164 *    Function: sd_pkt_status_qfull
19165 *
19166 * Description: Handle a QUEUE FULL condition from the target.  This can
19167 *		occur if the HBA does not handle the queue full condition.
19168 *		(Basically this means third-party HBAs as Sun HBAs will
19169 *		handle the queue full condition.)  Note that if there are
19170 *		some commands already in the transport, then the queue full
19171 *		has occurred because the queue for this nexus is actually
19172 *		full. If there are no commands in the transport, then the
19173 *		queue full is resulting from some other initiator or lun
19174 *		consuming all the resources at the target.
19175 *
19176 *     Context: May be called from interrupt context
19177 */
19178
19179static void
19180sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
19181	struct sd_xbuf *xp, struct scsi_pkt *pktp)
19182{
19183	ASSERT(un != NULL);
19184	ASSERT(mutex_owned(SD_MUTEX(un)));
19185	ASSERT(bp != NULL);
19186	ASSERT(xp != NULL);
19187	ASSERT(pktp != NULL);
19188
19189	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19190	    "sd_pkt_status_qfull: entry\n");
19191
19192	/*
19193	 * Just lower the QFULL throttle and retry the command.  Note that
19194	 * we do not limit the number of retries here.
19195	 */
19196	sd_reduce_throttle(un, SD_THROTTLE_QFULL);
19197	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL, 0,
19198	    SD_RESTART_TIMEOUT, NULL);
19199
19200	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19201	    "sd_pkt_status_qfull: exit\n");
19202}
19203
19204
19205/*
19206 *    Function: sd_reset_target
19207 *
19208 * Description: Issue a scsi_reset(9F), with either RESET_LUN,
19209 *		RESET_TARGET, or RESET_ALL.
19210 *
19211 *     Context: May be called under interrupt context.
19212 */
19213
19214static void
19215sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp)
19216{
19217	int rval = 0;
19218
19219	ASSERT(un != NULL);
19220	ASSERT(mutex_owned(SD_MUTEX(un)));
19221	ASSERT(pktp != NULL);
19222
19223	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: entry\n");
19224
19225	/*
19226	 * No need to reset if the transport layer has already done so.
19227	 */
19228	if ((pktp->pkt_statistics &
19229	    (STAT_BUS_RESET | STAT_DEV_RESET | STAT_ABORTED)) != 0) {
19230		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19231		    "sd_reset_target: no reset\n");
19232		return;
19233	}
19234
19235	mutex_exit(SD_MUTEX(un));
19236
19237	if (un->un_f_allow_bus_device_reset == TRUE) {
19238		if (un->un_f_lun_reset_enabled == TRUE) {
19239			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19240			    "sd_reset_target: RESET_LUN\n");
19241			rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
19242		}
19243		if (rval == 0) {
19244			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19245			    "sd_reset_target: RESET_TARGET\n");
19246			rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
19247		}
19248	}
19249
19250	if (rval == 0) {
19251		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19252		    "sd_reset_target: RESET_ALL\n");
19253		(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
19254	}
19255
19256	mutex_enter(SD_MUTEX(un));
19257
19258	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: exit\n");
19259}
19260
19261
19262/*
19263 *    Function: sd_media_change_task
19264 *
19265 * Description: Recovery action for CDROM to become available.
19266 *
19267 *     Context: Executes in a taskq() thread context
19268 */
19269
19270static void
19271sd_media_change_task(void *arg)
19272{
19273	struct	scsi_pkt	*pktp = arg;
19274	struct	sd_lun		*un;
19275	struct	buf		*bp;
19276	struct	sd_xbuf		*xp;
19277	int	err		= 0;
19278	int	retry_count	= 0;
19279	int	retry_limit	= SD_UNIT_ATTENTION_RETRY/10;
19280	struct	sd_sense_info	si;
19281
19282	ASSERT(pktp != NULL);
19283	bp = (struct buf *)pktp->pkt_private;
19284	ASSERT(bp != NULL);
19285	xp = SD_GET_XBUF(bp);
19286	ASSERT(xp != NULL);
19287	un = SD_GET_UN(bp);
19288	ASSERT(un != NULL);
19289	ASSERT(!mutex_owned(SD_MUTEX(un)));
19290	ASSERT(un->un_f_monitor_media_state);
19291
19292	si.ssi_severity = SCSI_ERR_INFO;
19293	si.ssi_pfa_flag = FALSE;
19294
19295	/*
19296	 * When a reset is issued on a CDROM, it takes a long time to
19297	 * recover. First few attempts to read capacity and other things
19298	 * related to handling unit attention fail (with a ASC 0x4 and
19299	 * ASCQ 0x1). In that case we want to do enough retries and we want
19300	 * to limit the retries in other cases of genuine failures like
19301	 * no media in drive.
19302	 */
19303	while (retry_count++ < retry_limit) {
19304		if ((err = sd_handle_mchange(un)) == 0) {
19305			break;
19306		}
19307		if (err == EAGAIN) {
19308			retry_limit = SD_UNIT_ATTENTION_RETRY;
19309		}
19310		/* Sleep for 0.5 sec. & try again */
19311		delay(drv_usectohz(500000));
19312	}
19313
19314	/*
19315	 * Dispatch (retry or fail) the original command here,
19316	 * along with appropriate console messages....
19317	 *
19318	 * Must grab the mutex before calling sd_retry_command,
19319	 * sd_print_sense_msg and sd_return_failed_command.
19320	 */
19321	mutex_enter(SD_MUTEX(un));
19322	if (err != SD_CMD_SUCCESS) {
19323		SD_UPDATE_ERRSTATS(un, sd_harderrs);
19324		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
19325		si.ssi_severity = SCSI_ERR_FATAL;
19326		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
19327		sd_return_failed_command(un, bp, EIO);
19328	} else {
19329		sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
19330		    &si, EIO, (clock_t)0, NULL);
19331	}
19332	mutex_exit(SD_MUTEX(un));
19333}
19334
19335
19336
19337/*
19338 *    Function: sd_handle_mchange
19339 *
19340 * Description: Perform geometry validation & other recovery when CDROM
19341 *		has been removed from drive.
19342 *
19343 * Return Code: 0 for success
19344 *		errno-type return code of either sd_send_scsi_DOORLOCK() or
19345 *		sd_send_scsi_READ_CAPACITY()
19346 *
19347 *     Context: Executes in a taskq() thread context
19348 */
19349
19350static int
19351sd_handle_mchange(struct sd_lun *un)
19352{
19353	uint64_t	capacity;
19354	uint32_t	lbasize;
19355	int		rval;
19356
19357	ASSERT(!mutex_owned(SD_MUTEX(un)));
19358	ASSERT(un->un_f_monitor_media_state);
19359
19360	if ((rval = sd_send_scsi_READ_CAPACITY(un, &capacity, &lbasize,
19361	    SD_PATH_DIRECT_PRIORITY)) != 0) {
19362		return (rval);
19363	}
19364
19365	mutex_enter(SD_MUTEX(un));
19366	sd_update_block_info(un, lbasize, capacity);
19367
19368	if (un->un_errstats != NULL) {
19369		struct	sd_errstats *stp =
19370		    (struct sd_errstats *)un->un_errstats->ks_data;
19371		stp->sd_capacity.value.ui64 = (uint64_t)
19372		    ((uint64_t)un->un_blockcount *
19373		    (uint64_t)un->un_tgt_blocksize);
19374	}
19375
19376	/*
19377	 * Note: Maybe let the strategy/partitioning chain worry about getting
19378	 * valid geometry.
19379	 */
19380	un->un_f_geometry_is_valid = FALSE;
19381	(void) sd_validate_geometry(un, SD_PATH_DIRECT_PRIORITY);
19382	if (un->un_f_geometry_is_valid == FALSE) {
19383		mutex_exit(SD_MUTEX(un));
19384		return (EIO);
19385	}
19386
19387	mutex_exit(SD_MUTEX(un));
19388
19389	/*
19390	 * Try to lock the door
19391	 */
19392	return (sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
19393	    SD_PATH_DIRECT_PRIORITY));
19394}
19395
19396
19397/*
19398 *    Function: sd_send_scsi_DOORLOCK
19399 *
19400 * Description: Issue the scsi DOOR LOCK command
19401 *
19402 *   Arguments: un    - pointer to driver soft state (unit) structure for
19403 *			this target.
19404 *		flag  - SD_REMOVAL_ALLOW
19405 *			SD_REMOVAL_PREVENT
19406 *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19407 *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19408 *			to use the USCSI "direct" chain and bypass the normal
19409 *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
19410 *			command is issued as part of an error recovery action.
19411 *
19412 * Return Code: 0   - Success
19413 *		errno return code from sd_send_scsi_cmd()
19414 *
19415 *     Context: Can sleep.
19416 */
19417
19418static int
19419sd_send_scsi_DOORLOCK(struct sd_lun *un, int flag, int path_flag)
19420{
19421	union scsi_cdb		cdb;
19422	struct uscsi_cmd	ucmd_buf;
19423	struct scsi_extended_sense	sense_buf;
19424	int			status;
19425
19426	ASSERT(un != NULL);
19427	ASSERT(!mutex_owned(SD_MUTEX(un)));
19428
19429	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_DOORLOCK: entry: un:0x%p\n", un);
19430
19431	/* already determined doorlock is not supported, fake success */
19432	if (un->un_f_doorlock_supported == FALSE) {
19433		return (0);
19434	}
19435
19436	bzero(&cdb, sizeof (cdb));
19437	bzero(&ucmd_buf, sizeof (ucmd_buf));
19438
19439	cdb.scc_cmd = SCMD_DOORLOCK;
19440	cdb.cdb_opaque[4] = (uchar_t)flag;
19441
19442	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19443	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19444	ucmd_buf.uscsi_bufaddr	= NULL;
19445	ucmd_buf.uscsi_buflen	= 0;
19446	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19447	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
19448	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
19449	ucmd_buf.uscsi_timeout	= 15;
19450
19451	SD_TRACE(SD_LOG_IO, un,
19452	    "sd_send_scsi_DOORLOCK: returning sd_send_scsi_cmd()\n");
19453
19454	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19455	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
19456
19457	if ((status == EIO) && (ucmd_buf.uscsi_status == STATUS_CHECK) &&
19458	    (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19459	    (scsi_sense_key((uint8_t *)&sense_buf) == KEY_ILLEGAL_REQUEST)) {
19460		/* fake success and skip subsequent doorlock commands */
19461		un->un_f_doorlock_supported = FALSE;
19462		return (0);
19463	}
19464
19465	return (status);
19466}
19467
19468/*
19469 *    Function: sd_send_scsi_READ_CAPACITY
19470 *
19471 * Description: This routine uses the scsi READ CAPACITY command to determine
19472 *		the device capacity in number of blocks and the device native
19473 *		block size. If this function returns a failure, then the
19474 *		values in *capp and *lbap are undefined.  If the capacity
19475 *		returned is 0xffffffff then the lun is too large for a
19476 *		normal READ CAPACITY command and the results of a
19477 *		READ CAPACITY 16 will be used instead.
19478 *
19479 *   Arguments: un   - ptr to soft state struct for the target
19480 *		capp - ptr to unsigned 64-bit variable to receive the
19481 *			capacity value from the command.
19482 *		lbap - ptr to unsigned 32-bit varaible to receive the
19483 *			block size value from the command
19484 *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19485 *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19486 *			to use the USCSI "direct" chain and bypass the normal
19487 *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
19488 *			command is issued as part of an error recovery action.
19489 *
19490 * Return Code: 0   - Success
19491 *		EIO - IO error
19492 *		EACCES - Reservation conflict detected
19493 *		EAGAIN - Device is becoming ready
19494 *		errno return code from sd_send_scsi_cmd()
19495 *
19496 *     Context: Can sleep.  Blocks until command completes.
19497 */
19498
19499#define	SD_CAPACITY_SIZE	sizeof (struct scsi_capacity)
19500
19501static int
19502sd_send_scsi_READ_CAPACITY(struct sd_lun *un, uint64_t *capp, uint32_t *lbap,
19503	int path_flag)
19504{
19505	struct	scsi_extended_sense	sense_buf;
19506	struct	uscsi_cmd	ucmd_buf;
19507	union	scsi_cdb	cdb;
19508	uint32_t		*capacity_buf;
19509	uint64_t		capacity;
19510	uint32_t		lbasize;
19511	int			status;
19512
19513	ASSERT(un != NULL);
19514	ASSERT(!mutex_owned(SD_MUTEX(un)));
19515	ASSERT(capp != NULL);
19516	ASSERT(lbap != NULL);
19517
19518	SD_TRACE(SD_LOG_IO, un,
19519	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
19520
19521	/*
19522	 * First send a READ_CAPACITY command to the target.
19523	 * (This command is mandatory under SCSI-2.)
19524	 *
19525	 * Set up the CDB for the READ_CAPACITY command.  The Partial
19526	 * Medium Indicator bit is cleared.  The address field must be
19527	 * zero if the PMI bit is zero.
19528	 */
19529	bzero(&cdb, sizeof (cdb));
19530	bzero(&ucmd_buf, sizeof (ucmd_buf));
19531
19532	capacity_buf = kmem_zalloc(SD_CAPACITY_SIZE, KM_SLEEP);
19533
19534	cdb.scc_cmd = SCMD_READ_CAPACITY;
19535
19536	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19537	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
19538	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity_buf;
19539	ucmd_buf.uscsi_buflen	= SD_CAPACITY_SIZE;
19540	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19541	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
19542	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19543	ucmd_buf.uscsi_timeout	= 60;
19544
19545	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19546	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
19547
19548	switch (status) {
19549	case 0:
19550		/* Return failure if we did not get valid capacity data. */
19551		if (ucmd_buf.uscsi_resid != 0) {
19552			kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19553			return (EIO);
19554		}
19555
19556		/*
19557		 * Read capacity and block size from the READ CAPACITY 10 data.
19558		 * This data may be adjusted later due to device specific
19559		 * issues.
19560		 *
19561		 * According to the SCSI spec, the READ CAPACITY 10
19562		 * command returns the following:
19563		 *
19564		 *  bytes 0-3: Maximum logical block address available.
19565		 *		(MSB in byte:0 & LSB in byte:3)
19566		 *
19567		 *  bytes 4-7: Block length in bytes
19568		 *		(MSB in byte:4 & LSB in byte:7)
19569		 *
19570		 */
19571		capacity = BE_32(capacity_buf[0]);
19572		lbasize = BE_32(capacity_buf[1]);
19573
19574		/*
19575		 * Done with capacity_buf
19576		 */
19577		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19578
19579		/*
19580		 * if the reported capacity is set to all 0xf's, then
19581		 * this disk is too large and requires SBC-2 commands.
19582		 * Reissue the request using READ CAPACITY 16.
19583		 */
19584		if (capacity == 0xffffffff) {
19585			status = sd_send_scsi_READ_CAPACITY_16(un, &capacity,
19586			    &lbasize, path_flag);
19587			if (status != 0) {
19588				return (status);
19589			}
19590		}
19591		break;	/* Success! */
19592	case EIO:
19593		switch (ucmd_buf.uscsi_status) {
19594		case STATUS_RESERVATION_CONFLICT:
19595			status = EACCES;
19596			break;
19597		case STATUS_CHECK:
19598			/*
19599			 * Check condition; look for ASC/ASCQ of 0x04/0x01
19600			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
19601			 */
19602			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19603			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x04) &&
19604			    (scsi_sense_ascq((uint8_t *)&sense_buf) == 0x01)) {
19605				kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19606				return (EAGAIN);
19607			}
19608			break;
19609		default:
19610			break;
19611		}
19612		/* FALLTHRU */
19613	default:
19614		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19615		return (status);
19616	}
19617
19618	/*
19619	 * Some ATAPI CD-ROM drives report inaccurate LBA size values
19620	 * (2352 and 0 are common) so for these devices always force the value
19621	 * to 2048 as required by the ATAPI specs.
19622	 */
19623	if ((un->un_f_cfg_is_atapi == TRUE) && (ISCD(un))) {
19624		lbasize = 2048;
19625	}
19626
19627	/*
19628	 * Get the maximum LBA value from the READ CAPACITY data.
19629	 * Here we assume that the Partial Medium Indicator (PMI) bit
19630	 * was cleared when issuing the command. This means that the LBA
19631	 * returned from the device is the LBA of the last logical block
19632	 * on the logical unit.  The actual logical block count will be
19633	 * this value plus one.
19634	 *
19635	 * Currently the capacity is saved in terms of un->un_sys_blocksize,
19636	 * so scale the capacity value to reflect this.
19637	 */
19638	capacity = (capacity + 1) * (lbasize / un->un_sys_blocksize);
19639
19640#if defined(__i386) || defined(__amd64)
19641	/*
19642	 * Refer to comments related to off-by-1 at the
19643	 * header of this file.
19644	 * Treat 1TB disk as (1T - 512)B.
19645	 */
19646	if (un->un_f_capacity_adjusted == 1)
19647	    capacity = DK_MAX_BLOCKS;
19648#endif
19649
19650	/*
19651	 * Copy the values from the READ CAPACITY command into the space
19652	 * provided by the caller.
19653	 */
19654	*capp = capacity;
19655	*lbap = lbasize;
19656
19657	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY: "
19658	    "capacity:0x%llx  lbasize:0x%x\n", capacity, lbasize);
19659
19660	/*
19661	 * Both the lbasize and capacity from the device must be nonzero,
19662	 * otherwise we assume that the values are not valid and return
19663	 * failure to the caller. (4203735)
19664	 */
19665	if ((capacity == 0) || (lbasize == 0)) {
19666		return (EIO);
19667	}
19668
19669	return (0);
19670}
19671
19672/*
19673 *    Function: sd_send_scsi_READ_CAPACITY_16
19674 *
19675 * Description: This routine uses the scsi READ CAPACITY 16 command to
19676 *		determine the device capacity in number of blocks and the
19677 *		device native block size.  If this function returns a failure,
19678 *		then the values in *capp and *lbap are undefined.
19679 *		This routine should always be called by
19680 *		sd_send_scsi_READ_CAPACITY which will appy any device
19681 *		specific adjustments to capacity and lbasize.
19682 *
19683 *   Arguments: un   - ptr to soft state struct for the target
19684 *		capp - ptr to unsigned 64-bit variable to receive the
19685 *			capacity value from the command.
19686 *		lbap - ptr to unsigned 32-bit varaible to receive the
19687 *			block size value from the command
19688 *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19689 *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19690 *			to use the USCSI "direct" chain and bypass the normal
19691 *			command waitq. SD_PATH_DIRECT_PRIORITY is used when
19692 *			this command is issued as part of an error recovery
19693 *			action.
19694 *
19695 * Return Code: 0   - Success
19696 *		EIO - IO error
19697 *		EACCES - Reservation conflict detected
19698 *		EAGAIN - Device is becoming ready
19699 *		errno return code from sd_send_scsi_cmd()
19700 *
19701 *     Context: Can sleep.  Blocks until command completes.
19702 */
19703
19704#define	SD_CAPACITY_16_SIZE	sizeof (struct scsi_capacity_16)
19705
19706static int
19707sd_send_scsi_READ_CAPACITY_16(struct sd_lun *un, uint64_t *capp,
19708	uint32_t *lbap, int path_flag)
19709{
19710	struct	scsi_extended_sense	sense_buf;
19711	struct	uscsi_cmd	ucmd_buf;
19712	union	scsi_cdb	cdb;
19713	uint64_t		*capacity16_buf;
19714	uint64_t		capacity;
19715	uint32_t		lbasize;
19716	int			status;
19717
19718	ASSERT(un != NULL);
19719	ASSERT(!mutex_owned(SD_MUTEX(un)));
19720	ASSERT(capp != NULL);
19721	ASSERT(lbap != NULL);
19722
19723	SD_TRACE(SD_LOG_IO, un,
19724	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
19725
19726	/*
19727	 * First send a READ_CAPACITY_16 command to the target.
19728	 *
19729	 * Set up the CDB for the READ_CAPACITY_16 command.  The Partial
19730	 * Medium Indicator bit is cleared.  The address field must be
19731	 * zero if the PMI bit is zero.
19732	 */
19733	bzero(&cdb, sizeof (cdb));
19734	bzero(&ucmd_buf, sizeof (ucmd_buf));
19735
19736	capacity16_buf = kmem_zalloc(SD_CAPACITY_16_SIZE, KM_SLEEP);
19737
19738	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19739	ucmd_buf.uscsi_cdblen	= CDB_GROUP4;
19740	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity16_buf;
19741	ucmd_buf.uscsi_buflen	= SD_CAPACITY_16_SIZE;
19742	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19743	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
19744	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19745	ucmd_buf.uscsi_timeout	= 60;
19746
19747	/*
19748	 * Read Capacity (16) is a Service Action In command.  One
19749	 * command byte (0x9E) is overloaded for multiple operations,
19750	 * with the second CDB byte specifying the desired operation
19751	 */
19752	cdb.scc_cmd = SCMD_SVC_ACTION_IN_G4;
19753	cdb.cdb_opaque[1] = SSVC_ACTION_READ_CAPACITY_G4;
19754
19755	/*
19756	 * Fill in allocation length field
19757	 */
19758	FORMG4COUNT(&cdb, ucmd_buf.uscsi_buflen);
19759
19760	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19761	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
19762
19763	switch (status) {
19764	case 0:
19765		/* Return failure if we did not get valid capacity data. */
19766		if (ucmd_buf.uscsi_resid > 20) {
19767			kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19768			return (EIO);
19769		}
19770
19771		/*
19772		 * Read capacity and block size from the READ CAPACITY 10 data.
19773		 * This data may be adjusted later due to device specific
19774		 * issues.
19775		 *
19776		 * According to the SCSI spec, the READ CAPACITY 10
19777		 * command returns the following:
19778		 *
19779		 *  bytes 0-7: Maximum logical block address available.
19780		 *		(MSB in byte:0 & LSB in byte:7)
19781		 *
19782		 *  bytes 8-11: Block length in bytes
19783		 *		(MSB in byte:8 & LSB in byte:11)
19784		 *
19785		 */
19786		capacity = BE_64(capacity16_buf[0]);
19787		lbasize = BE_32(*(uint32_t *)&capacity16_buf[1]);
19788
19789		/*
19790		 * Done with capacity16_buf
19791		 */
19792		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19793
19794		/*
19795		 * if the reported capacity is set to all 0xf's, then
19796		 * this disk is too large.  This could only happen with
19797		 * a device that supports LBAs larger than 64 bits which
19798		 * are not defined by any current T10 standards.
19799		 */
19800		if (capacity == 0xffffffffffffffff) {
19801			return (EIO);
19802		}
19803		break;	/* Success! */
19804	case EIO:
19805		switch (ucmd_buf.uscsi_status) {
19806		case STATUS_RESERVATION_CONFLICT:
19807			status = EACCES;
19808			break;
19809		case STATUS_CHECK:
19810			/*
19811			 * Check condition; look for ASC/ASCQ of 0x04/0x01
19812			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
19813			 */
19814			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19815			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x04) &&
19816			    (scsi_sense_ascq((uint8_t *)&sense_buf) == 0x01)) {
19817				kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19818				return (EAGAIN);
19819			}
19820			break;
19821		default:
19822			break;
19823		}
19824		/* FALLTHRU */
19825	default:
19826		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19827		return (status);
19828	}
19829
19830	*capp = capacity;
19831	*lbap = lbasize;
19832
19833	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY_16: "
19834	    "capacity:0x%llx  lbasize:0x%x\n", capacity, lbasize);
19835
19836	return (0);
19837}
19838
19839
19840/*
19841 *    Function: sd_send_scsi_START_STOP_UNIT
19842 *
19843 * Description: Issue a scsi START STOP UNIT command to the target.
19844 *
19845 *   Arguments: un    - pointer to driver soft state (unit) structure for
19846 *			this target.
19847 *		flag  - SD_TARGET_START
19848 *			SD_TARGET_STOP
19849 *			SD_TARGET_EJECT
19850 *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19851 *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19852 *			to use the USCSI "direct" chain and bypass the normal
19853 *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
19854 *			command is issued as part of an error recovery action.
19855 *
19856 * Return Code: 0   - Success
19857 *		EIO - IO error
19858 *		EACCES - Reservation conflict detected
19859 *		ENXIO  - Not Ready, medium not present
19860 *		errno return code from sd_send_scsi_cmd()
19861 *
19862 *     Context: Can sleep.
19863 */
19864
19865static int
19866sd_send_scsi_START_STOP_UNIT(struct sd_lun *un, int flag, int path_flag)
19867{
19868	struct	scsi_extended_sense	sense_buf;
19869	union scsi_cdb		cdb;
19870	struct uscsi_cmd	ucmd_buf;
19871	int			status;
19872
19873	ASSERT(un != NULL);
19874	ASSERT(!mutex_owned(SD_MUTEX(un)));
19875
19876	SD_TRACE(SD_LOG_IO, un,
19877	    "sd_send_scsi_START_STOP_UNIT: entry: un:0x%p\n", un);
19878
19879	if (un->un_f_check_start_stop &&
19880	    ((flag == SD_TARGET_START) || (flag == SD_TARGET_STOP)) &&
19881	    (un->un_f_start_stop_supported != TRUE)) {
19882		return (0);
19883	}
19884
19885	bzero(&cdb, sizeof (cdb));
19886	bzero(&ucmd_buf, sizeof (ucmd_buf));
19887	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19888
19889	cdb.scc_cmd = SCMD_START_STOP;
19890	cdb.cdb_opaque[4] = (uchar_t)flag;
19891
19892	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19893	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19894	ucmd_buf.uscsi_bufaddr	= NULL;
19895	ucmd_buf.uscsi_buflen	= 0;
19896	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19897	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19898	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
19899	ucmd_buf.uscsi_timeout	= 200;
19900
19901	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19902	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
19903
19904	switch (status) {
19905	case 0:
19906		break;	/* Success! */
19907	case EIO:
19908		switch (ucmd_buf.uscsi_status) {
19909		case STATUS_RESERVATION_CONFLICT:
19910			status = EACCES;
19911			break;
19912		case STATUS_CHECK:
19913			if (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) {
19914				switch (scsi_sense_key(
19915						(uint8_t *)&sense_buf)) {
19916				case KEY_ILLEGAL_REQUEST:
19917					status = ENOTSUP;
19918					break;
19919				case KEY_NOT_READY:
19920					if (scsi_sense_asc(
19921						    (uint8_t *)&sense_buf)
19922					    == 0x3A) {
19923						status = ENXIO;
19924					}
19925					break;
19926				default:
19927					break;
19928				}
19929			}
19930			break;
19931		default:
19932			break;
19933		}
19934		break;
19935	default:
19936		break;
19937	}
19938
19939	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_START_STOP_UNIT: exit\n");
19940
19941	return (status);
19942}
19943
19944
19945/*
19946 *    Function: sd_start_stop_unit_callback
19947 *
19948 * Description: timeout(9F) callback to begin recovery process for a
19949 *		device that has spun down.
19950 *
19951 *   Arguments: arg - pointer to associated softstate struct.
19952 *
19953 *     Context: Executes in a timeout(9F) thread context
19954 */
19955
19956static void
19957sd_start_stop_unit_callback(void *arg)
19958{
19959	struct sd_lun	*un = arg;
19960	ASSERT(un != NULL);
19961	ASSERT(!mutex_owned(SD_MUTEX(un)));
19962
19963	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_callback: entry\n");
19964
19965	(void) taskq_dispatch(sd_tq, sd_start_stop_unit_task, un, KM_NOSLEEP);
19966}
19967
19968
19969/*
19970 *    Function: sd_start_stop_unit_task
19971 *
19972 * Description: Recovery procedure when a drive is spun down.
19973 *
19974 *   Arguments: arg - pointer to associated softstate struct.
19975 *
19976 *     Context: Executes in a taskq() thread context
19977 */
19978
19979static void
19980sd_start_stop_unit_task(void *arg)
19981{
19982	struct sd_lun	*un = arg;
19983
19984	ASSERT(un != NULL);
19985	ASSERT(!mutex_owned(SD_MUTEX(un)));
19986
19987	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: entry\n");
19988
19989	/*
19990	 * Some unformatted drives report not ready error, no need to
19991	 * restart if format has been initiated.
19992	 */
19993	mutex_enter(SD_MUTEX(un));
19994	if (un->un_f_format_in_progress == TRUE) {
19995		mutex_exit(SD_MUTEX(un));
19996		return;
19997	}
19998	mutex_exit(SD_MUTEX(un));
19999
20000	/*
20001	 * When a START STOP command is issued from here, it is part of a
20002	 * failure recovery operation and must be issued before any other
20003	 * commands, including any pending retries. Thus it must be sent
20004	 * using SD_PATH_DIRECT_PRIORITY. It doesn't matter if the spin up
20005	 * succeeds or not, we will start I/O after the attempt.
20006	 */
20007	(void) sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
20008	    SD_PATH_DIRECT_PRIORITY);
20009
20010	/*
20011	 * The above call blocks until the START_STOP_UNIT command completes.
20012	 * Now that it has completed, we must re-try the original IO that
20013	 * received the NOT READY condition in the first place. There are
20014	 * three possible conditions here:
20015	 *
20016	 *  (1) The original IO is on un_retry_bp.
20017	 *  (2) The original IO is on the regular wait queue, and un_retry_bp
20018	 *	is NULL.
20019	 *  (3) The original IO is on the regular wait queue, and un_retry_bp
20020	 *	points to some other, unrelated bp.
20021	 *
20022	 * For each case, we must call sd_start_cmds() with un_retry_bp
20023	 * as the argument. If un_retry_bp is NULL, this will initiate
20024	 * processing of the regular wait queue.  If un_retry_bp is not NULL,
20025	 * then this will process the bp on un_retry_bp. That may or may not
20026	 * be the original IO, but that does not matter: the important thing
20027	 * is to keep the IO processing going at this point.
20028	 *
20029	 * Note: This is a very specific error recovery sequence associated
20030	 * with a drive that is not spun up. We attempt a START_STOP_UNIT and
20031	 * serialize the I/O with completion of the spin-up.
20032	 */
20033	mutex_enter(SD_MUTEX(un));
20034	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
20035	    "sd_start_stop_unit_task: un:0x%p starting bp:0x%p\n",
20036	    un, un->un_retry_bp);
20037	un->un_startstop_timeid = NULL;	/* Timeout is no longer pending */
20038	sd_start_cmds(un, un->un_retry_bp);
20039	mutex_exit(SD_MUTEX(un));
20040
20041	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: exit\n");
20042}
20043
20044
20045/*
20046 *    Function: sd_send_scsi_INQUIRY
20047 *
20048 * Description: Issue the scsi INQUIRY command.
20049 *
20050 *   Arguments: un
20051 *		bufaddr
20052 *		buflen
20053 *		evpd
20054 *		page_code
20055 *		page_length
20056 *
20057 * Return Code: 0   - Success
20058 *		errno return code from sd_send_scsi_cmd()
20059 *
20060 *     Context: Can sleep. Does not return until command is completed.
20061 */
20062
20063static int
20064sd_send_scsi_INQUIRY(struct sd_lun *un, uchar_t *bufaddr, size_t buflen,
20065	uchar_t evpd, uchar_t page_code, size_t *residp)
20066{
20067	union scsi_cdb		cdb;
20068	struct uscsi_cmd	ucmd_buf;
20069	int			status;
20070
20071	ASSERT(un != NULL);
20072	ASSERT(!mutex_owned(SD_MUTEX(un)));
20073	ASSERT(bufaddr != NULL);
20074
20075	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: entry: un:0x%p\n", un);
20076
20077	bzero(&cdb, sizeof (cdb));
20078	bzero(&ucmd_buf, sizeof (ucmd_buf));
20079	bzero(bufaddr, buflen);
20080
20081	cdb.scc_cmd = SCMD_INQUIRY;
20082	cdb.cdb_opaque[1] = evpd;
20083	cdb.cdb_opaque[2] = page_code;
20084	FORMG0COUNT(&cdb, buflen);
20085
20086	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20087	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
20088	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
20089	ucmd_buf.uscsi_buflen	= buflen;
20090	ucmd_buf.uscsi_rqbuf	= NULL;
20091	ucmd_buf.uscsi_rqlen	= 0;
20092	ucmd_buf.uscsi_flags	= USCSI_READ | USCSI_SILENT;
20093	ucmd_buf.uscsi_timeout	= 200;	/* Excessive legacy value */
20094
20095	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20096	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_DIRECT);
20097
20098	if ((status == 0) && (residp != NULL)) {
20099		*residp = ucmd_buf.uscsi_resid;
20100	}
20101
20102	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: exit\n");
20103
20104	return (status);
20105}
20106
20107
20108/*
20109 *    Function: sd_send_scsi_TEST_UNIT_READY
20110 *
20111 * Description: Issue the scsi TEST UNIT READY command.
20112 *		This routine can be told to set the flag USCSI_DIAGNOSE to
20113 *		prevent retrying failed commands. Use this when the intent
20114 *		is either to check for device readiness, to clear a Unit
20115 *		Attention, or to clear any outstanding sense data.
20116 *		However under specific conditions the expected behavior
20117 *		is for retries to bring a device ready, so use the flag
20118 *		with caution.
20119 *
20120 *   Arguments: un
20121 *		flag:   SD_CHECK_FOR_MEDIA: return ENXIO if no media present
20122 *			SD_DONT_RETRY_TUR: include uscsi flag USCSI_DIAGNOSE.
20123 *			0: dont check for media present, do retries on cmd.
20124 *
20125 * Return Code: 0   - Success
20126 *		EIO - IO error
20127 *		EACCES - Reservation conflict detected
20128 *		ENXIO  - Not Ready, medium not present
20129 *		errno return code from sd_send_scsi_cmd()
20130 *
20131 *     Context: Can sleep. Does not return until command is completed.
20132 */
20133
20134static int
20135sd_send_scsi_TEST_UNIT_READY(struct sd_lun *un, int flag)
20136{
20137	struct	scsi_extended_sense	sense_buf;
20138	union scsi_cdb		cdb;
20139	struct uscsi_cmd	ucmd_buf;
20140	int			status;
20141
20142	ASSERT(un != NULL);
20143	ASSERT(!mutex_owned(SD_MUTEX(un)));
20144
20145	SD_TRACE(SD_LOG_IO, un,
20146	    "sd_send_scsi_TEST_UNIT_READY: entry: un:0x%p\n", un);
20147
20148	/*
20149	 * Some Seagate elite1 TQ devices get hung with disconnect/reconnect
20150	 * timeouts when they receive a TUR and the queue is not empty. Check
20151	 * the configuration flag set during attach (indicating the drive has
20152	 * this firmware bug) and un_ncmds_in_transport before issuing the
20153	 * TUR. If there are
20154	 * pending commands return success, this is a bit arbitrary but is ok
20155	 * for non-removables (i.e. the eliteI disks) and non-clustering
20156	 * configurations.
20157	 */
20158	if (un->un_f_cfg_tur_check == TRUE) {
20159		mutex_enter(SD_MUTEX(un));
20160		if (un->un_ncmds_in_transport != 0) {
20161			mutex_exit(SD_MUTEX(un));
20162			return (0);
20163		}
20164		mutex_exit(SD_MUTEX(un));
20165	}
20166
20167	bzero(&cdb, sizeof (cdb));
20168	bzero(&ucmd_buf, sizeof (ucmd_buf));
20169	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20170
20171	cdb.scc_cmd = SCMD_TEST_UNIT_READY;
20172
20173	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20174	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
20175	ucmd_buf.uscsi_bufaddr	= NULL;
20176	ucmd_buf.uscsi_buflen	= 0;
20177	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20178	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20179	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
20180
20181	/* Use flag USCSI_DIAGNOSE to prevent retries if it fails. */
20182	if ((flag & SD_DONT_RETRY_TUR) != 0) {
20183		ucmd_buf.uscsi_flags |= USCSI_DIAGNOSE;
20184	}
20185	ucmd_buf.uscsi_timeout	= 60;
20186
20187	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20188	    UIO_SYSSPACE, UIO_SYSSPACE,
20189	    ((flag & SD_BYPASS_PM) ? SD_PATH_DIRECT : SD_PATH_STANDARD));
20190
20191	switch (status) {
20192	case 0:
20193		break;	/* Success! */
20194	case EIO:
20195		switch (ucmd_buf.uscsi_status) {
20196		case STATUS_RESERVATION_CONFLICT:
20197			status = EACCES;
20198			break;
20199		case STATUS_CHECK:
20200			if ((flag & SD_CHECK_FOR_MEDIA) == 0) {
20201				break;
20202			}
20203			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20204			    (scsi_sense_key((uint8_t *)&sense_buf) ==
20205				KEY_NOT_READY) &&
20206			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x3A)) {
20207				status = ENXIO;
20208			}
20209			break;
20210		default:
20211			break;
20212		}
20213		break;
20214	default:
20215		break;
20216	}
20217
20218	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_TEST_UNIT_READY: exit\n");
20219
20220	return (status);
20221}
20222
20223
20224/*
20225 *    Function: sd_send_scsi_PERSISTENT_RESERVE_IN
20226 *
20227 * Description: Issue the scsi PERSISTENT RESERVE IN command.
20228 *
20229 *   Arguments: un
20230 *
20231 * Return Code: 0   - Success
20232 *		EACCES
20233 *		ENOTSUP
20234 *		errno return code from sd_send_scsi_cmd()
20235 *
20236 *     Context: Can sleep. Does not return until command is completed.
20237 */
20238
20239static int
20240sd_send_scsi_PERSISTENT_RESERVE_IN(struct sd_lun *un, uchar_t  usr_cmd,
20241	uint16_t data_len, uchar_t *data_bufp)
20242{
20243	struct scsi_extended_sense	sense_buf;
20244	union scsi_cdb		cdb;
20245	struct uscsi_cmd	ucmd_buf;
20246	int			status;
20247	int			no_caller_buf = FALSE;
20248
20249	ASSERT(un != NULL);
20250	ASSERT(!mutex_owned(SD_MUTEX(un)));
20251	ASSERT((usr_cmd == SD_READ_KEYS) || (usr_cmd == SD_READ_RESV));
20252
20253	SD_TRACE(SD_LOG_IO, un,
20254	    "sd_send_scsi_PERSISTENT_RESERVE_IN: entry: un:0x%p\n", un);
20255
20256	bzero(&cdb, sizeof (cdb));
20257	bzero(&ucmd_buf, sizeof (ucmd_buf));
20258	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20259	if (data_bufp == NULL) {
20260		/* Allocate a default buf if the caller did not give one */
20261		ASSERT(data_len == 0);
20262		data_len  = MHIOC_RESV_KEY_SIZE;
20263		data_bufp = kmem_zalloc(MHIOC_RESV_KEY_SIZE, KM_SLEEP);
20264		no_caller_buf = TRUE;
20265	}
20266
20267	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_IN;
20268	cdb.cdb_opaque[1] = usr_cmd;
20269	FORMG1COUNT(&cdb, data_len);
20270
20271	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20272	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
20273	ucmd_buf.uscsi_bufaddr	= (caddr_t)data_bufp;
20274	ucmd_buf.uscsi_buflen	= data_len;
20275	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20276	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20277	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
20278	ucmd_buf.uscsi_timeout	= 60;
20279
20280	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20281	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_STANDARD);
20282
20283	switch (status) {
20284	case 0:
20285		break;	/* Success! */
20286	case EIO:
20287		switch (ucmd_buf.uscsi_status) {
20288		case STATUS_RESERVATION_CONFLICT:
20289			status = EACCES;
20290			break;
20291		case STATUS_CHECK:
20292			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20293			    (scsi_sense_key((uint8_t *)&sense_buf) ==
20294				KEY_ILLEGAL_REQUEST)) {
20295				status = ENOTSUP;
20296			}
20297			break;
20298		default:
20299			break;
20300		}
20301		break;
20302	default:
20303		break;
20304	}
20305
20306	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_IN: exit\n");
20307
20308	if (no_caller_buf == TRUE) {
20309		kmem_free(data_bufp, data_len);
20310	}
20311
20312	return (status);
20313}
20314
20315
20316/*
20317 *    Function: sd_send_scsi_PERSISTENT_RESERVE_OUT
20318 *
20319 * Description: This routine is the driver entry point for handling CD-ROM
20320 *		multi-host persistent reservation requests (MHIOCGRP_INKEYS,
20321 *		MHIOCGRP_INRESV) by sending the SCSI-3 PROUT commands to the
20322 *		device.
20323 *
20324 *   Arguments: un  -   Pointer to soft state struct for the target.
20325 *		usr_cmd SCSI-3 reservation facility command (one of
20326 *			SD_SCSI3_REGISTER, SD_SCSI3_RESERVE, SD_SCSI3_RELEASE,
20327 *			SD_SCSI3_PREEMPTANDABORT)
20328 *		usr_bufp - user provided pointer register, reserve descriptor or
20329 *			preempt and abort structure (mhioc_register_t,
20330 *                      mhioc_resv_desc_t, mhioc_preemptandabort_t)
20331 *
20332 * Return Code: 0   - Success
20333 *		EACCES
20334 *		ENOTSUP
20335 *		errno return code from sd_send_scsi_cmd()
20336 *
20337 *     Context: Can sleep. Does not return until command is completed.
20338 */
20339
20340static int
20341sd_send_scsi_PERSISTENT_RESERVE_OUT(struct sd_lun *un, uchar_t usr_cmd,
20342	uchar_t	*usr_bufp)
20343{
20344	struct scsi_extended_sense	sense_buf;
20345	union scsi_cdb		cdb;
20346	struct uscsi_cmd	ucmd_buf;
20347	int			status;
20348	uchar_t			data_len = sizeof (sd_prout_t);
20349	sd_prout_t		*prp;
20350
20351	ASSERT(un != NULL);
20352	ASSERT(!mutex_owned(SD_MUTEX(un)));
20353	ASSERT(data_len == 24);	/* required by scsi spec */
20354
20355	SD_TRACE(SD_LOG_IO, un,
20356	    "sd_send_scsi_PERSISTENT_RESERVE_OUT: entry: un:0x%p\n", un);
20357
20358	if (usr_bufp == NULL) {
20359		return (EINVAL);
20360	}
20361
20362	bzero(&cdb, sizeof (cdb));
20363	bzero(&ucmd_buf, sizeof (ucmd_buf));
20364	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20365	prp = kmem_zalloc(data_len, KM_SLEEP);
20366
20367	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_OUT;
20368	cdb.cdb_opaque[1] = usr_cmd;
20369	FORMG1COUNT(&cdb, data_len);
20370
20371	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20372	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
20373	ucmd_buf.uscsi_bufaddr	= (caddr_t)prp;
20374	ucmd_buf.uscsi_buflen	= data_len;
20375	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20376	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20377	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
20378	ucmd_buf.uscsi_timeout	= 60;
20379
20380	switch (usr_cmd) {
20381	case SD_SCSI3_REGISTER: {
20382		mhioc_register_t *ptr = (mhioc_register_t *)usr_bufp;
20383
20384		bcopy(ptr->oldkey.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
20385		bcopy(ptr->newkey.key, prp->service_key,
20386		    MHIOC_RESV_KEY_SIZE);
20387		prp->aptpl = ptr->aptpl;
20388		break;
20389	}
20390	case SD_SCSI3_RESERVE:
20391	case SD_SCSI3_RELEASE: {
20392		mhioc_resv_desc_t *ptr = (mhioc_resv_desc_t *)usr_bufp;
20393
20394		bcopy(ptr->key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
20395		prp->scope_address = BE_32(ptr->scope_specific_addr);
20396		cdb.cdb_opaque[2] = ptr->type;
20397		break;
20398	}
20399	case SD_SCSI3_PREEMPTANDABORT: {
20400		mhioc_preemptandabort_t *ptr =
20401		    (mhioc_preemptandabort_t *)usr_bufp;
20402
20403		bcopy(ptr->resvdesc.key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
20404		bcopy(ptr->victim_key.key, prp->service_key,
20405		    MHIOC_RESV_KEY_SIZE);
20406		prp->scope_address = BE_32(ptr->resvdesc.scope_specific_addr);
20407		cdb.cdb_opaque[2] = ptr->resvdesc.type;
20408		ucmd_buf.uscsi_flags |= USCSI_HEAD;
20409		break;
20410	}
20411	case SD_SCSI3_REGISTERANDIGNOREKEY:
20412	{
20413		mhioc_registerandignorekey_t *ptr;
20414		ptr = (mhioc_registerandignorekey_t *)usr_bufp;
20415		bcopy(ptr->newkey.key,
20416		    prp->service_key, MHIOC_RESV_KEY_SIZE);
20417		prp->aptpl = ptr->aptpl;
20418		break;
20419	}
20420	default:
20421		ASSERT(FALSE);
20422		break;
20423	}
20424
20425	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20426	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_STANDARD);
20427
20428	switch (status) {
20429	case 0:
20430		break;	/* Success! */
20431	case EIO:
20432		switch (ucmd_buf.uscsi_status) {
20433		case STATUS_RESERVATION_CONFLICT:
20434			status = EACCES;
20435			break;
20436		case STATUS_CHECK:
20437			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20438			    (scsi_sense_key((uint8_t *)&sense_buf) ==
20439				KEY_ILLEGAL_REQUEST)) {
20440				status = ENOTSUP;
20441			}
20442			break;
20443		default:
20444			break;
20445		}
20446		break;
20447	default:
20448		break;
20449	}
20450
20451	kmem_free(prp, data_len);
20452	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_OUT: exit\n");
20453	return (status);
20454}
20455
20456
20457/*
20458 *    Function: sd_send_scsi_SYNCHRONIZE_CACHE
20459 *
20460 * Description: Issues a scsi SYNCHRONIZE CACHE command to the target
20461 *
20462 *   Arguments: un - pointer to the target's soft state struct
20463 *
20464 * Return Code: 0 - success
20465 *		errno-type error code
20466 *
20467 *     Context: kernel thread context only.
20468 */
20469
20470static int
20471sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un, struct dk_callback *dkc)
20472{
20473	struct sd_uscsi_info	*uip;
20474	struct uscsi_cmd	*uscmd;
20475	union scsi_cdb		*cdb;
20476	struct buf		*bp;
20477	int			rval = 0;
20478
20479	SD_TRACE(SD_LOG_IO, un,
20480	    "sd_send_scsi_SYNCHRONIZE_CACHE: entry: un:0x%p\n", un);
20481
20482	ASSERT(un != NULL);
20483	ASSERT(!mutex_owned(SD_MUTEX(un)));
20484
20485	cdb = kmem_zalloc(CDB_GROUP1, KM_SLEEP);
20486	cdb->scc_cmd = SCMD_SYNCHRONIZE_CACHE;
20487
20488	/*
20489	 * First get some memory for the uscsi_cmd struct and cdb
20490	 * and initialize for SYNCHRONIZE_CACHE cmd.
20491	 */
20492	uscmd = kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
20493	uscmd->uscsi_cdblen = CDB_GROUP1;
20494	uscmd->uscsi_cdb = (caddr_t)cdb;
20495	uscmd->uscsi_bufaddr = NULL;
20496	uscmd->uscsi_buflen = 0;
20497	uscmd->uscsi_rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
20498	uscmd->uscsi_rqlen = SENSE_LENGTH;
20499	uscmd->uscsi_rqresid = SENSE_LENGTH;
20500	uscmd->uscsi_flags = USCSI_RQENABLE | USCSI_SILENT;
20501	uscmd->uscsi_timeout = sd_io_time;
20502
20503	/*
20504	 * Allocate an sd_uscsi_info struct and fill it with the info
20505	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
20506	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
20507	 * since we allocate the buf here in this function, we do not
20508	 * need to preserve the prior contents of b_private.
20509	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
20510	 */
20511	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
20512	uip->ui_flags = SD_PATH_DIRECT;
20513	uip->ui_cmdp  = uscmd;
20514
20515	bp = getrbuf(KM_SLEEP);
20516	bp->b_private = uip;
20517
20518	/*
20519	 * Setup buffer to carry uscsi request.
20520	 */
20521	bp->b_flags  = B_BUSY;
20522	bp->b_bcount = 0;
20523	bp->b_blkno  = 0;
20524
20525	if (dkc != NULL) {
20526		bp->b_iodone = sd_send_scsi_SYNCHRONIZE_CACHE_biodone;
20527		uip->ui_dkc = *dkc;
20528	}
20529
20530	bp->b_edev = SD_GET_DEV(un);
20531	bp->b_dev = cmpdev(bp->b_edev);	/* maybe unnecessary? */
20532
20533	(void) sd_uscsi_strategy(bp);
20534
20535	/*
20536	 * If synchronous request, wait for completion
20537	 * If async just return and let b_iodone callback
20538	 * cleanup.
20539	 * NOTE: On return, u_ncmds_in_driver will be decremented,
20540	 * but it was also incremented in sd_uscsi_strategy(), so
20541	 * we should be ok.
20542	 */
20543	if (dkc == NULL) {
20544		(void) biowait(bp);
20545		rval = sd_send_scsi_SYNCHRONIZE_CACHE_biodone(bp);
20546	}
20547
20548	return (rval);
20549}
20550
20551
20552static int
20553sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp)
20554{
20555	struct sd_uscsi_info *uip;
20556	struct uscsi_cmd *uscmd;
20557	uint8_t *sense_buf;
20558	struct sd_lun *un;
20559	int status;
20560
20561	uip = (struct sd_uscsi_info *)(bp->b_private);
20562	ASSERT(uip != NULL);
20563
20564	uscmd = uip->ui_cmdp;
20565	ASSERT(uscmd != NULL);
20566
20567	sense_buf = (uint8_t *)uscmd->uscsi_rqbuf;
20568	ASSERT(sense_buf != NULL);
20569
20570	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
20571	ASSERT(un != NULL);
20572
20573	status = geterror(bp);
20574	switch (status) {
20575	case 0:
20576		break;	/* Success! */
20577	case EIO:
20578		switch (uscmd->uscsi_status) {
20579		case STATUS_RESERVATION_CONFLICT:
20580			/* Ignore reservation conflict */
20581			status = 0;
20582			goto done;
20583
20584		case STATUS_CHECK:
20585			if ((uscmd->uscsi_rqstatus == STATUS_GOOD) &&
20586			    (scsi_sense_key(sense_buf) ==
20587				KEY_ILLEGAL_REQUEST)) {
20588				/* Ignore Illegal Request error */
20589				mutex_enter(SD_MUTEX(un));
20590				un->un_f_sync_cache_supported = FALSE;
20591				mutex_exit(SD_MUTEX(un));
20592				status = ENOTSUP;
20593				goto done;
20594			}
20595			break;
20596		default:
20597			break;
20598		}
20599		/* FALLTHRU */
20600	default:
20601		/*
20602		 * Don't log an error message if this device
20603		 * has removable media.
20604		 */
20605		if (!un->un_f_has_removable_media) {
20606			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
20607			    "SYNCHRONIZE CACHE command failed (%d)\n", status);
20608		}
20609		break;
20610	}
20611
20612done:
20613	if (uip->ui_dkc.dkc_callback != NULL) {
20614		(*uip->ui_dkc.dkc_callback)(uip->ui_dkc.dkc_cookie, status);
20615	}
20616
20617	ASSERT((bp->b_flags & B_REMAPPED) == 0);
20618	freerbuf(bp);
20619	kmem_free(uip, sizeof (struct sd_uscsi_info));
20620	kmem_free(uscmd->uscsi_rqbuf, SENSE_LENGTH);
20621	kmem_free(uscmd->uscsi_cdb, (size_t)uscmd->uscsi_cdblen);
20622	kmem_free(uscmd, sizeof (struct uscsi_cmd));
20623
20624	return (status);
20625}
20626
20627
20628/*
20629 *    Function: sd_send_scsi_GET_CONFIGURATION
20630 *
20631 * Description: Issues the get configuration command to the device.
20632 *		Called from sd_check_for_writable_cd & sd_get_media_info
20633 *		caller needs to ensure that buflen = SD_PROFILE_HEADER_LEN
20634 *   Arguments: un
20635 *		ucmdbuf
20636 *		rqbuf
20637 *		rqbuflen
20638 *		bufaddr
20639 *		buflen
20640 *
20641 * Return Code: 0   - Success
20642 *		errno return code from sd_send_scsi_cmd()
20643 *
20644 *     Context: Can sleep. Does not return until command is completed.
20645 *
20646 */
20647
20648static int
20649sd_send_scsi_GET_CONFIGURATION(struct sd_lun *un, struct uscsi_cmd *ucmdbuf,
20650	uchar_t *rqbuf, uint_t rqbuflen, uchar_t *bufaddr, uint_t buflen)
20651{
20652	char	cdb[CDB_GROUP1];
20653	int	status;
20654
20655	ASSERT(un != NULL);
20656	ASSERT(!mutex_owned(SD_MUTEX(un)));
20657	ASSERT(bufaddr != NULL);
20658	ASSERT(ucmdbuf != NULL);
20659	ASSERT(rqbuf != NULL);
20660
20661	SD_TRACE(SD_LOG_IO, un,
20662	    "sd_send_scsi_GET_CONFIGURATION: entry: un:0x%p\n", un);
20663
20664	bzero(cdb, sizeof (cdb));
20665	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
20666	bzero(rqbuf, rqbuflen);
20667	bzero(bufaddr, buflen);
20668
20669	/*
20670	 * Set up cdb field for the get configuration command.
20671	 */
20672	cdb[0] = SCMD_GET_CONFIGURATION;
20673	cdb[1] = 0x02;  /* Requested Type */
20674	cdb[8] = SD_PROFILE_HEADER_LEN;
20675	ucmdbuf->uscsi_cdb = cdb;
20676	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
20677	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
20678	ucmdbuf->uscsi_buflen = buflen;
20679	ucmdbuf->uscsi_timeout = sd_io_time;
20680	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
20681	ucmdbuf->uscsi_rqlen = rqbuflen;
20682	ucmdbuf->uscsi_flags = USCSI_RQENABLE|USCSI_SILENT|USCSI_READ;
20683
20684	status = sd_send_scsi_cmd(SD_GET_DEV(un), ucmdbuf, UIO_SYSSPACE,
20685	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_STANDARD);
20686
20687	switch (status) {
20688	case 0:
20689		break;  /* Success! */
20690	case EIO:
20691		switch (ucmdbuf->uscsi_status) {
20692		case STATUS_RESERVATION_CONFLICT:
20693			status = EACCES;
20694			break;
20695		default:
20696			break;
20697		}
20698		break;
20699	default:
20700		break;
20701	}
20702
20703	if (status == 0) {
20704		SD_DUMP_MEMORY(un, SD_LOG_IO,
20705		    "sd_send_scsi_GET_CONFIGURATION: data",
20706		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
20707	}
20708
20709	SD_TRACE(SD_LOG_IO, un,
20710	    "sd_send_scsi_GET_CONFIGURATION: exit\n");
20711
20712	return (status);
20713}
20714
20715/*
20716 *    Function: sd_send_scsi_feature_GET_CONFIGURATION
20717 *
20718 * Description: Issues the get configuration command to the device to
20719 *              retrieve a specfic feature. Called from
20720 *		sd_check_for_writable_cd & sd_set_mmc_caps.
20721 *   Arguments: un
20722 *              ucmdbuf
20723 *              rqbuf
20724 *              rqbuflen
20725 *              bufaddr
20726 *              buflen
20727 *		feature
20728 *
20729 * Return Code: 0   - Success
20730 *              errno return code from sd_send_scsi_cmd()
20731 *
20732 *     Context: Can sleep. Does not return until command is completed.
20733 *
20734 */
20735static int
20736sd_send_scsi_feature_GET_CONFIGURATION(struct sd_lun *un,
20737	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
20738	uchar_t *bufaddr, uint_t buflen, char feature)
20739{
20740	char    cdb[CDB_GROUP1];
20741	int	status;
20742
20743	ASSERT(un != NULL);
20744	ASSERT(!mutex_owned(SD_MUTEX(un)));
20745	ASSERT(bufaddr != NULL);
20746	ASSERT(ucmdbuf != NULL);
20747	ASSERT(rqbuf != NULL);
20748
20749	SD_TRACE(SD_LOG_IO, un,
20750	    "sd_send_scsi_feature_GET_CONFIGURATION: entry: un:0x%p\n", un);
20751
20752	bzero(cdb, sizeof (cdb));
20753	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
20754	bzero(rqbuf, rqbuflen);
20755	bzero(bufaddr, buflen);
20756
20757	/*
20758	 * Set up cdb field for the get configuration command.
20759	 */
20760	cdb[0] = SCMD_GET_CONFIGURATION;
20761	cdb[1] = 0x02;  /* Requested Type */
20762	cdb[3] = feature;
20763	cdb[8] = buflen;
20764	ucmdbuf->uscsi_cdb = cdb;
20765	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
20766	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
20767	ucmdbuf->uscsi_buflen = buflen;
20768	ucmdbuf->uscsi_timeout = sd_io_time;
20769	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
20770	ucmdbuf->uscsi_rqlen = rqbuflen;
20771	ucmdbuf->uscsi_flags = USCSI_RQENABLE|USCSI_SILENT|USCSI_READ;
20772
20773	status = sd_send_scsi_cmd(SD_GET_DEV(un), ucmdbuf, UIO_SYSSPACE,
20774	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_STANDARD);
20775
20776	switch (status) {
20777	case 0:
20778		break;  /* Success! */
20779	case EIO:
20780		switch (ucmdbuf->uscsi_status) {
20781		case STATUS_RESERVATION_CONFLICT:
20782			status = EACCES;
20783			break;
20784		default:
20785			break;
20786		}
20787		break;
20788	default:
20789		break;
20790	}
20791
20792	if (status == 0) {
20793		SD_DUMP_MEMORY(un, SD_LOG_IO,
20794		    "sd_send_scsi_feature_GET_CONFIGURATION: data",
20795		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
20796	}
20797
20798	SD_TRACE(SD_LOG_IO, un,
20799	    "sd_send_scsi_feature_GET_CONFIGURATION: exit\n");
20800
20801	return (status);
20802}
20803
20804
20805/*
20806 *    Function: sd_send_scsi_MODE_SENSE
20807 *
20808 * Description: Utility function for issuing a scsi MODE SENSE command.
20809 *		Note: This routine uses a consistent implementation for Group0,
20810 *		Group1, and Group2 commands across all platforms. ATAPI devices
20811 *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
20812 *
20813 *   Arguments: un - pointer to the softstate struct for the target.
20814 *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
20815 *			  CDB_GROUP[1|2] (10 byte).
20816 *		bufaddr - buffer for page data retrieved from the target.
20817 *		buflen - size of page to be retrieved.
20818 *		page_code - page code of data to be retrieved from the target.
20819 *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20820 *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20821 *			to use the USCSI "direct" chain and bypass the normal
20822 *			command waitq.
20823 *
20824 * Return Code: 0   - Success
20825 *		errno return code from sd_send_scsi_cmd()
20826 *
20827 *     Context: Can sleep. Does not return until command is completed.
20828 */
20829
20830static int
20831sd_send_scsi_MODE_SENSE(struct sd_lun *un, int cdbsize, uchar_t *bufaddr,
20832	size_t buflen,  uchar_t page_code, int path_flag)
20833{
20834	struct	scsi_extended_sense	sense_buf;
20835	union scsi_cdb		cdb;
20836	struct uscsi_cmd	ucmd_buf;
20837	int			status;
20838	int			headlen;
20839
20840	ASSERT(un != NULL);
20841	ASSERT(!mutex_owned(SD_MUTEX(un)));
20842	ASSERT(bufaddr != NULL);
20843	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
20844	    (cdbsize == CDB_GROUP2));
20845
20846	SD_TRACE(SD_LOG_IO, un,
20847	    "sd_send_scsi_MODE_SENSE: entry: un:0x%p\n", un);
20848
20849	bzero(&cdb, sizeof (cdb));
20850	bzero(&ucmd_buf, sizeof (ucmd_buf));
20851	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20852	bzero(bufaddr, buflen);
20853
20854	if (cdbsize == CDB_GROUP0) {
20855		cdb.scc_cmd = SCMD_MODE_SENSE;
20856		cdb.cdb_opaque[2] = page_code;
20857		FORMG0COUNT(&cdb, buflen);
20858		headlen = MODE_HEADER_LENGTH;
20859	} else {
20860		cdb.scc_cmd = SCMD_MODE_SENSE_G1;
20861		cdb.cdb_opaque[2] = page_code;
20862		FORMG1COUNT(&cdb, buflen);
20863		headlen = MODE_HEADER_LENGTH_GRP2;
20864	}
20865
20866	ASSERT(headlen <= buflen);
20867	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
20868
20869	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20870	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
20871	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
20872	ucmd_buf.uscsi_buflen	= buflen;
20873	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20874	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20875	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
20876	ucmd_buf.uscsi_timeout	= 60;
20877
20878	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20879	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
20880
20881	switch (status) {
20882	case 0:
20883		/*
20884		 * sr_check_wp() uses 0x3f page code and check the header of
20885		 * mode page to determine if target device is write-protected.
20886		 * But some USB devices return 0 bytes for 0x3f page code. For
20887		 * this case, make sure that mode page header is returned at
20888		 * least.
20889		 */
20890		if (buflen - ucmd_buf.uscsi_resid <  headlen)
20891			status = EIO;
20892		break;	/* Success! */
20893	case EIO:
20894		switch (ucmd_buf.uscsi_status) {
20895		case STATUS_RESERVATION_CONFLICT:
20896			status = EACCES;
20897			break;
20898		default:
20899			break;
20900		}
20901		break;
20902	default:
20903		break;
20904	}
20905
20906	if (status == 0) {
20907		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SENSE: data",
20908		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
20909	}
20910	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SENSE: exit\n");
20911
20912	return (status);
20913}
20914
20915
20916/*
20917 *    Function: sd_send_scsi_MODE_SELECT
20918 *
20919 * Description: Utility function for issuing a scsi MODE SELECT command.
20920 *		Note: This routine uses a consistent implementation for Group0,
20921 *		Group1, and Group2 commands across all platforms. ATAPI devices
20922 *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
20923 *
20924 *   Arguments: un - pointer to the softstate struct for the target.
20925 *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
20926 *			  CDB_GROUP[1|2] (10 byte).
20927 *		bufaddr - buffer for page data retrieved from the target.
20928 *		buflen - size of page to be retrieved.
20929 *		save_page - boolean to determin if SP bit should be set.
20930 *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20931 *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20932 *			to use the USCSI "direct" chain and bypass the normal
20933 *			command waitq.
20934 *
20935 * Return Code: 0   - Success
20936 *		errno return code from sd_send_scsi_cmd()
20937 *
20938 *     Context: Can sleep. Does not return until command is completed.
20939 */
20940
20941static int
20942sd_send_scsi_MODE_SELECT(struct sd_lun *un, int cdbsize, uchar_t *bufaddr,
20943	size_t buflen,  uchar_t save_page, int path_flag)
20944{
20945	struct	scsi_extended_sense	sense_buf;
20946	union scsi_cdb		cdb;
20947	struct uscsi_cmd	ucmd_buf;
20948	int			status;
20949
20950	ASSERT(un != NULL);
20951	ASSERT(!mutex_owned(SD_MUTEX(un)));
20952	ASSERT(bufaddr != NULL);
20953	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
20954	    (cdbsize == CDB_GROUP2));
20955
20956	SD_TRACE(SD_LOG_IO, un,
20957	    "sd_send_scsi_MODE_SELECT: entry: un:0x%p\n", un);
20958
20959	bzero(&cdb, sizeof (cdb));
20960	bzero(&ucmd_buf, sizeof (ucmd_buf));
20961	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20962
20963	/* Set the PF bit for many third party drives */
20964	cdb.cdb_opaque[1] = 0x10;
20965
20966	/* Set the savepage(SP) bit if given */
20967	if (save_page == SD_SAVE_PAGE) {
20968		cdb.cdb_opaque[1] |= 0x01;
20969	}
20970
20971	if (cdbsize == CDB_GROUP0) {
20972		cdb.scc_cmd = SCMD_MODE_SELECT;
20973		FORMG0COUNT(&cdb, buflen);
20974	} else {
20975		cdb.scc_cmd = SCMD_MODE_SELECT_G1;
20976		FORMG1COUNT(&cdb, buflen);
20977	}
20978
20979	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
20980
20981	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20982	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
20983	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
20984	ucmd_buf.uscsi_buflen	= buflen;
20985	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20986	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20987	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
20988	ucmd_buf.uscsi_timeout	= 60;
20989
20990	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20991	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
20992
20993	switch (status) {
20994	case 0:
20995		break;	/* Success! */
20996	case EIO:
20997		switch (ucmd_buf.uscsi_status) {
20998		case STATUS_RESERVATION_CONFLICT:
20999			status = EACCES;
21000			break;
21001		default:
21002			break;
21003		}
21004		break;
21005	default:
21006		break;
21007	}
21008
21009	if (status == 0) {
21010		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SELECT: data",
21011		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
21012	}
21013	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SELECT: exit\n");
21014
21015	return (status);
21016}
21017
21018
21019/*
21020 *    Function: sd_send_scsi_RDWR
21021 *
21022 * Description: Issue a scsi READ or WRITE command with the given parameters.
21023 *
21024 *   Arguments: un:      Pointer to the sd_lun struct for the target.
21025 *		cmd:	 SCMD_READ or SCMD_WRITE
21026 *		bufaddr: Address of caller's buffer to receive the RDWR data
21027 *		buflen:  Length of caller's buffer receive the RDWR data.
21028 *		start_block: Block number for the start of the RDWR operation.
21029 *			 (Assumes target-native block size.)
21030 *		residp:  Pointer to variable to receive the redisual of the
21031 *			 RDWR operation (may be NULL of no residual requested).
21032 *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
21033 *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
21034 *			to use the USCSI "direct" chain and bypass the normal
21035 *			command waitq.
21036 *
21037 * Return Code: 0   - Success
21038 *		errno return code from sd_send_scsi_cmd()
21039 *
21040 *     Context: Can sleep. Does not return until command is completed.
21041 */
21042
21043static int
21044sd_send_scsi_RDWR(struct sd_lun *un, uchar_t cmd, void *bufaddr,
21045	size_t buflen, daddr_t start_block, int path_flag)
21046{
21047	struct	scsi_extended_sense	sense_buf;
21048	union scsi_cdb		cdb;
21049	struct uscsi_cmd	ucmd_buf;
21050	uint32_t		block_count;
21051	int			status;
21052	int			cdbsize;
21053	uchar_t			flag;
21054
21055	ASSERT(un != NULL);
21056	ASSERT(!mutex_owned(SD_MUTEX(un)));
21057	ASSERT(bufaddr != NULL);
21058	ASSERT((cmd == SCMD_READ) || (cmd == SCMD_WRITE));
21059
21060	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: entry: un:0x%p\n", un);
21061
21062	if (un->un_f_tgt_blocksize_is_valid != TRUE) {
21063		return (EINVAL);
21064	}
21065
21066	mutex_enter(SD_MUTEX(un));
21067	block_count = SD_BYTES2TGTBLOCKS(un, buflen);
21068	mutex_exit(SD_MUTEX(un));
21069
21070	flag = (cmd == SCMD_READ) ? USCSI_READ : USCSI_WRITE;
21071
21072	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_RDWR: "
21073	    "bufaddr:0x%p buflen:0x%x start_block:0x%p block_count:0x%x\n",
21074	    bufaddr, buflen, start_block, block_count);
21075
21076	bzero(&cdb, sizeof (cdb));
21077	bzero(&ucmd_buf, sizeof (ucmd_buf));
21078	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
21079
21080	/* Compute CDB size to use */
21081	if (start_block > 0xffffffff)
21082		cdbsize = CDB_GROUP4;
21083	else if ((start_block & 0xFFE00000) ||
21084	    (un->un_f_cfg_is_atapi == TRUE))
21085		cdbsize = CDB_GROUP1;
21086	else
21087		cdbsize = CDB_GROUP0;
21088
21089	switch (cdbsize) {
21090	case CDB_GROUP0:	/* 6-byte CDBs */
21091		cdb.scc_cmd = cmd;
21092		FORMG0ADDR(&cdb, start_block);
21093		FORMG0COUNT(&cdb, block_count);
21094		break;
21095	case CDB_GROUP1:	/* 10-byte CDBs */
21096		cdb.scc_cmd = cmd | SCMD_GROUP1;
21097		FORMG1ADDR(&cdb, start_block);
21098		FORMG1COUNT(&cdb, block_count);
21099		break;
21100	case CDB_GROUP4:	/* 16-byte CDBs */
21101		cdb.scc_cmd = cmd | SCMD_GROUP4;
21102		FORMG4LONGADDR(&cdb, (uint64_t)start_block);
21103		FORMG4COUNT(&cdb, block_count);
21104		break;
21105	case CDB_GROUP5:	/* 12-byte CDBs (currently unsupported) */
21106	default:
21107		/* All others reserved */
21108		return (EINVAL);
21109	}
21110
21111	/* Set LUN bit(s) in CDB if this is a SCSI-1 device */
21112	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
21113
21114	ucmd_buf.uscsi_cdb	= (char *)&cdb;
21115	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
21116	ucmd_buf.uscsi_bufaddr	= bufaddr;
21117	ucmd_buf.uscsi_buflen	= buflen;
21118	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
21119	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
21120	ucmd_buf.uscsi_flags	= flag | USCSI_RQENABLE | USCSI_SILENT;
21121	ucmd_buf.uscsi_timeout	= 60;
21122	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
21123				UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
21124	switch (status) {
21125	case 0:
21126		break;	/* Success! */
21127	case EIO:
21128		switch (ucmd_buf.uscsi_status) {
21129		case STATUS_RESERVATION_CONFLICT:
21130			status = EACCES;
21131			break;
21132		default:
21133			break;
21134		}
21135		break;
21136	default:
21137		break;
21138	}
21139
21140	if (status == 0) {
21141		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_RDWR: data",
21142		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
21143	}
21144
21145	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: exit\n");
21146
21147	return (status);
21148}
21149
21150
21151/*
21152 *    Function: sd_send_scsi_LOG_SENSE
21153 *
21154 * Description: Issue a scsi LOG_SENSE command with the given parameters.
21155 *
21156 *   Arguments: un:      Pointer to the sd_lun struct for the target.
21157 *
21158 * Return Code: 0   - Success
21159 *		errno return code from sd_send_scsi_cmd()
21160 *
21161 *     Context: Can sleep. Does not return until command is completed.
21162 */
21163
21164static int
21165sd_send_scsi_LOG_SENSE(struct sd_lun *un, uchar_t *bufaddr, uint16_t buflen,
21166	uchar_t page_code, uchar_t page_control, uint16_t param_ptr,
21167	int path_flag)
21168
21169{
21170	struct	scsi_extended_sense	sense_buf;
21171	union scsi_cdb		cdb;
21172	struct uscsi_cmd	ucmd_buf;
21173	int			status;
21174
21175	ASSERT(un != NULL);
21176	ASSERT(!mutex_owned(SD_MUTEX(un)));
21177
21178	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: entry: un:0x%p\n", un);
21179
21180	bzero(&cdb, sizeof (cdb));
21181	bzero(&ucmd_buf, sizeof (ucmd_buf));
21182	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
21183
21184	cdb.scc_cmd = SCMD_LOG_SENSE_G1;
21185	cdb.cdb_opaque[2] = (page_control << 6) | page_code;
21186	cdb.cdb_opaque[5] = (uchar_t)((param_ptr & 0xFF00) >> 8);
21187	cdb.cdb_opaque[6] = (uchar_t)(param_ptr  & 0x00FF);
21188	FORMG1COUNT(&cdb, buflen);
21189
21190	ucmd_buf.uscsi_cdb	= (char *)&cdb;
21191	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
21192	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
21193	ucmd_buf.uscsi_buflen	= buflen;
21194	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
21195	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
21196	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
21197	ucmd_buf.uscsi_timeout	= 60;
21198
21199	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
21200	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
21201
21202	switch (status) {
21203	case 0:
21204		break;
21205	case EIO:
21206		switch (ucmd_buf.uscsi_status) {
21207		case STATUS_RESERVATION_CONFLICT:
21208			status = EACCES;
21209			break;
21210		case STATUS_CHECK:
21211			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
21212			    (scsi_sense_key((uint8_t *)&sense_buf) ==
21213				KEY_ILLEGAL_REQUEST) &&
21214			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x24)) {
21215				/*
21216				 * ASC 0x24: INVALID FIELD IN CDB
21217				 */
21218				switch (page_code) {
21219				case START_STOP_CYCLE_PAGE:
21220					/*
21221					 * The start stop cycle counter is
21222					 * implemented as page 0x31 in earlier
21223					 * generation disks. In new generation
21224					 * disks the start stop cycle counter is
21225					 * implemented as page 0xE. To properly
21226					 * handle this case if an attempt for
21227					 * log page 0xE is made and fails we
21228					 * will try again using page 0x31.
21229					 *
21230					 * Network storage BU committed to
21231					 * maintain the page 0x31 for this
21232					 * purpose and will not have any other
21233					 * page implemented with page code 0x31
21234					 * until all disks transition to the
21235					 * standard page.
21236					 */
21237					mutex_enter(SD_MUTEX(un));
21238					un->un_start_stop_cycle_page =
21239					    START_STOP_CYCLE_VU_PAGE;
21240					cdb.cdb_opaque[2] =
21241					    (char)(page_control << 6) |
21242					    un->un_start_stop_cycle_page;
21243					mutex_exit(SD_MUTEX(un));
21244					status = sd_send_scsi_cmd(
21245					    SD_GET_DEV(un), &ucmd_buf,
21246					    UIO_SYSSPACE, UIO_SYSSPACE,
21247					    UIO_SYSSPACE, path_flag);
21248
21249					break;
21250				case TEMPERATURE_PAGE:
21251					status = ENOTTY;
21252					break;
21253				default:
21254					break;
21255				}
21256			}
21257			break;
21258		default:
21259			break;
21260		}
21261		break;
21262	default:
21263		break;
21264	}
21265
21266	if (status == 0) {
21267		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_LOG_SENSE: data",
21268		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
21269	}
21270
21271	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: exit\n");
21272
21273	return (status);
21274}
21275
21276
21277/*
21278 *    Function: sdioctl
21279 *
21280 * Description: Driver's ioctl(9e) entry point function.
21281 *
21282 *   Arguments: dev     - device number
21283 *		cmd     - ioctl operation to be performed
21284 *		arg     - user argument, contains data to be set or reference
21285 *			  parameter for get
21286 *		flag    - bit flag, indicating open settings, 32/64 bit type
21287 *		cred_p  - user credential pointer
21288 *		rval_p  - calling process return value (OPT)
21289 *
21290 * Return Code: EINVAL
21291 *		ENOTTY
21292 *		ENXIO
21293 *		EIO
21294 *		EFAULT
21295 *		ENOTSUP
21296 *		EPERM
21297 *
21298 *     Context: Called from the device switch at normal priority.
21299 */
21300
21301static int
21302sdioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cred_p, int *rval_p)
21303{
21304	struct sd_lun	*un = NULL;
21305	int		geom_validated = FALSE;
21306	int		err = 0;
21307	int		i = 0;
21308	cred_t		*cr;
21309
21310	/*
21311	 * All device accesses go thru sdstrategy where we check on suspend
21312	 * status
21313	 */
21314	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21315		return (ENXIO);
21316	}
21317
21318	ASSERT(!mutex_owned(SD_MUTEX(un)));
21319
21320	/*
21321	 * Moved this wait from sd_uscsi_strategy to here for
21322	 * reasons of deadlock prevention. Internal driver commands,
21323	 * specifically those to change a devices power level, result
21324	 * in a call to sd_uscsi_strategy.
21325	 */
21326	mutex_enter(SD_MUTEX(un));
21327	while ((un->un_state == SD_STATE_SUSPENDED) ||
21328	    (un->un_state == SD_STATE_PM_CHANGING)) {
21329		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
21330	}
21331	/*
21332	 * Twiddling the counter here protects commands from now
21333	 * through to the top of sd_uscsi_strategy. Without the
21334	 * counter inc. a power down, for example, could get in
21335	 * after the above check for state is made and before
21336	 * execution gets to the top of sd_uscsi_strategy.
21337	 * That would cause problems.
21338	 */
21339	un->un_ncmds_in_driver++;
21340
21341	if ((un->un_f_geometry_is_valid == FALSE) &&
21342	    (flag & (FNDELAY | FNONBLOCK))) {
21343		switch (cmd) {
21344		case CDROMPAUSE:
21345		case CDROMRESUME:
21346		case CDROMPLAYMSF:
21347		case CDROMPLAYTRKIND:
21348		case CDROMREADTOCHDR:
21349		case CDROMREADTOCENTRY:
21350		case CDROMSTOP:
21351		case CDROMSTART:
21352		case CDROMVOLCTRL:
21353		case CDROMSUBCHNL:
21354		case CDROMREADMODE2:
21355		case CDROMREADMODE1:
21356		case CDROMREADOFFSET:
21357		case CDROMSBLKMODE:
21358		case CDROMGBLKMODE:
21359		case CDROMGDRVSPEED:
21360		case CDROMSDRVSPEED:
21361		case CDROMCDDA:
21362		case CDROMCDXA:
21363		case CDROMSUBCODE:
21364			if (!ISCD(un)) {
21365				un->un_ncmds_in_driver--;
21366				ASSERT(un->un_ncmds_in_driver >= 0);
21367				mutex_exit(SD_MUTEX(un));
21368				return (ENOTTY);
21369			}
21370			break;
21371		case FDEJECT:
21372		case DKIOCEJECT:
21373		case CDROMEJECT:
21374			if (!un->un_f_eject_media_supported) {
21375				un->un_ncmds_in_driver--;
21376				ASSERT(un->un_ncmds_in_driver >= 0);
21377				mutex_exit(SD_MUTEX(un));
21378				return (ENOTTY);
21379			}
21380			break;
21381		case DKIOCSVTOC:
21382		case DKIOCSETEFI:
21383		case DKIOCSMBOOT:
21384		case DKIOCFLUSHWRITECACHE:
21385			mutex_exit(SD_MUTEX(un));
21386			err = sd_send_scsi_TEST_UNIT_READY(un, 0);
21387			if (err != 0) {
21388				mutex_enter(SD_MUTEX(un));
21389				un->un_ncmds_in_driver--;
21390				ASSERT(un->un_ncmds_in_driver >= 0);
21391				mutex_exit(SD_MUTEX(un));
21392				return (EIO);
21393			}
21394			mutex_enter(SD_MUTEX(un));
21395			/* FALLTHROUGH */
21396		case DKIOCREMOVABLE:
21397		case DKIOCHOTPLUGGABLE:
21398		case DKIOCINFO:
21399		case DKIOCGMEDIAINFO:
21400		case MHIOCENFAILFAST:
21401		case MHIOCSTATUS:
21402		case MHIOCTKOWN:
21403		case MHIOCRELEASE:
21404		case MHIOCGRP_INKEYS:
21405		case MHIOCGRP_INRESV:
21406		case MHIOCGRP_REGISTER:
21407		case MHIOCGRP_RESERVE:
21408		case MHIOCGRP_PREEMPTANDABORT:
21409		case MHIOCGRP_REGISTERANDIGNOREKEY:
21410		case CDROMCLOSETRAY:
21411		case USCSICMD:
21412			goto skip_ready_valid;
21413		default:
21414			break;
21415		}
21416
21417		mutex_exit(SD_MUTEX(un));
21418		err = sd_ready_and_valid(un);
21419		mutex_enter(SD_MUTEX(un));
21420		if (err == SD_READY_NOT_VALID) {
21421			switch (cmd) {
21422			case DKIOCGAPART:
21423			case DKIOCGGEOM:
21424			case DKIOCSGEOM:
21425			case DKIOCGVTOC:
21426			case DKIOCSVTOC:
21427			case DKIOCSAPART:
21428			case DKIOCG_PHYGEOM:
21429			case DKIOCG_VIRTGEOM:
21430				err = ENOTSUP;
21431				un->un_ncmds_in_driver--;
21432				ASSERT(un->un_ncmds_in_driver >= 0);
21433				mutex_exit(SD_MUTEX(un));
21434				return (err);
21435			}
21436		}
21437		if (err != SD_READY_VALID) {
21438			switch (cmd) {
21439			case DKIOCSTATE:
21440			case CDROMGDRVSPEED:
21441			case CDROMSDRVSPEED:
21442			case FDEJECT:	/* for eject command */
21443			case DKIOCEJECT:
21444			case CDROMEJECT:
21445			case DKIOCGETEFI:
21446			case DKIOCSGEOM:
21447			case DKIOCREMOVABLE:
21448			case DKIOCHOTPLUGGABLE:
21449			case DKIOCSAPART:
21450			case DKIOCSETEFI:
21451				break;
21452			default:
21453				if (un->un_f_has_removable_media) {
21454					err = ENXIO;
21455				} else {
21456				/* Do not map SD_RESERVED_BY_OTHERS to EIO */
21457					if (err == SD_RESERVED_BY_OTHERS) {
21458						err = EACCES;
21459					} else {
21460						err = EIO;
21461					}
21462				}
21463				un->un_ncmds_in_driver--;
21464				ASSERT(un->un_ncmds_in_driver >= 0);
21465				mutex_exit(SD_MUTEX(un));
21466				return (err);
21467			}
21468		}
21469		geom_validated = TRUE;
21470	}
21471	if ((un->un_f_geometry_is_valid == TRUE) &&
21472	    (un->un_solaris_size > 0)) {
21473		/*
21474		 * the "geometry_is_valid" flag could be true if we
21475		 * have an fdisk table but no Solaris partition
21476		 */
21477		if (un->un_vtoc.v_sanity != VTOC_SANE) {
21478			/* it is EFI, so return ENOTSUP for these */
21479			switch (cmd) {
21480			case DKIOCGAPART:
21481			case DKIOCGGEOM:
21482			case DKIOCGVTOC:
21483			case DKIOCSVTOC:
21484			case DKIOCSAPART:
21485				err = ENOTSUP;
21486				un->un_ncmds_in_driver--;
21487				ASSERT(un->un_ncmds_in_driver >= 0);
21488				mutex_exit(SD_MUTEX(un));
21489				return (err);
21490			}
21491		}
21492	}
21493
21494skip_ready_valid:
21495	mutex_exit(SD_MUTEX(un));
21496
21497	switch (cmd) {
21498	case DKIOCINFO:
21499		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCINFO\n");
21500		err = sd_dkio_ctrl_info(dev, (caddr_t)arg, flag);
21501		break;
21502
21503	case DKIOCGMEDIAINFO:
21504		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGMEDIAINFO\n");
21505		err = sd_get_media_info(dev, (caddr_t)arg, flag);
21506		break;
21507
21508	case DKIOCGGEOM:
21509		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGGEOM\n");
21510		err = sd_dkio_get_geometry(dev, (caddr_t)arg, flag,
21511		    geom_validated);
21512		break;
21513
21514	case DKIOCSGEOM:
21515		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSGEOM\n");
21516		err = sd_dkio_set_geometry(dev, (caddr_t)arg, flag);
21517		break;
21518
21519	case DKIOCGAPART:
21520		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGAPART\n");
21521		err = sd_dkio_get_partition(dev, (caddr_t)arg, flag,
21522		    geom_validated);
21523		break;
21524
21525	case DKIOCSAPART:
21526		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSAPART\n");
21527		err = sd_dkio_set_partition(dev, (caddr_t)arg, flag);
21528		break;
21529
21530	case DKIOCGVTOC:
21531		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGVTOC\n");
21532		err = sd_dkio_get_vtoc(dev, (caddr_t)arg, flag,
21533		    geom_validated);
21534		break;
21535
21536	case DKIOCGETEFI:
21537		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGETEFI\n");
21538		err = sd_dkio_get_efi(dev, (caddr_t)arg, flag);
21539		break;
21540
21541	case DKIOCPARTITION:
21542		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCPARTITION\n");
21543		err = sd_dkio_partition(dev, (caddr_t)arg, flag);
21544		break;
21545
21546	case DKIOCSVTOC:
21547		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSVTOC\n");
21548		err = sd_dkio_set_vtoc(dev, (caddr_t)arg, flag);
21549		break;
21550
21551	case DKIOCSETEFI:
21552		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSETEFI\n");
21553		err = sd_dkio_set_efi(dev, (caddr_t)arg, flag);
21554		break;
21555
21556	case DKIOCGMBOOT:
21557		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGMBOOT\n");
21558		err = sd_dkio_get_mboot(dev, (caddr_t)arg, flag);
21559		break;
21560
21561	case DKIOCSMBOOT:
21562		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSMBOOT\n");
21563		err = sd_dkio_set_mboot(dev, (caddr_t)arg, flag);
21564		break;
21565
21566	case DKIOCLOCK:
21567		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCLOCK\n");
21568		err = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
21569		    SD_PATH_STANDARD);
21570		break;
21571
21572	case DKIOCUNLOCK:
21573		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCUNLOCK\n");
21574		err = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_ALLOW,
21575		    SD_PATH_STANDARD);
21576		break;
21577
21578	case DKIOCSTATE: {
21579		enum dkio_state		state;
21580		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSTATE\n");
21581
21582		if (ddi_copyin((void *)arg, &state, sizeof (int), flag) != 0) {
21583			err = EFAULT;
21584		} else {
21585			err = sd_check_media(dev, state);
21586			if (err == 0) {
21587				if (ddi_copyout(&un->un_mediastate, (void *)arg,
21588				    sizeof (int), flag) != 0)
21589					err = EFAULT;
21590			}
21591		}
21592		break;
21593	}
21594
21595	case DKIOCREMOVABLE:
21596		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCREMOVABLE\n");
21597		i = un->un_f_has_removable_media ? 1 : 0;
21598		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
21599			err = EFAULT;
21600		} else {
21601			err = 0;
21602		}
21603		break;
21604
21605	case DKIOCHOTPLUGGABLE:
21606		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCHOTPLUGGABLE\n");
21607		i = un->un_f_is_hotpluggable ? 1 : 0;
21608		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
21609			err = EFAULT;
21610		} else {
21611			err = 0;
21612		}
21613		break;
21614
21615	case DKIOCGTEMPERATURE:
21616		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGTEMPERATURE\n");
21617		err = sd_dkio_get_temp(dev, (caddr_t)arg, flag);
21618		break;
21619
21620	case MHIOCENFAILFAST:
21621		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCENFAILFAST\n");
21622		if ((err = drv_priv(cred_p)) == 0) {
21623			err = sd_mhdioc_failfast(dev, (caddr_t)arg, flag);
21624		}
21625		break;
21626
21627	case MHIOCTKOWN:
21628		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCTKOWN\n");
21629		if ((err = drv_priv(cred_p)) == 0) {
21630			err = sd_mhdioc_takeown(dev, (caddr_t)arg, flag);
21631		}
21632		break;
21633
21634	case MHIOCRELEASE:
21635		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCRELEASE\n");
21636		if ((err = drv_priv(cred_p)) == 0) {
21637			err = sd_mhdioc_release(dev);
21638		}
21639		break;
21640
21641	case MHIOCSTATUS:
21642		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCSTATUS\n");
21643		if ((err = drv_priv(cred_p)) == 0) {
21644			switch (sd_send_scsi_TEST_UNIT_READY(un, 0)) {
21645			case 0:
21646				err = 0;
21647				break;
21648			case EACCES:
21649				*rval_p = 1;
21650				err = 0;
21651				break;
21652			default:
21653				err = EIO;
21654				break;
21655			}
21656		}
21657		break;
21658
21659	case MHIOCQRESERVE:
21660		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCQRESERVE\n");
21661		if ((err = drv_priv(cred_p)) == 0) {
21662			err = sd_reserve_release(dev, SD_RESERVE);
21663		}
21664		break;
21665
21666	case MHIOCREREGISTERDEVID:
21667		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCREREGISTERDEVID\n");
21668		if (drv_priv(cred_p) == EPERM) {
21669			err = EPERM;
21670		} else if (!un->un_f_devid_supported) {
21671			err = ENOTTY;
21672		} else {
21673			err = sd_mhdioc_register_devid(dev);
21674		}
21675		break;
21676
21677	case MHIOCGRP_INKEYS:
21678		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INKEYS\n");
21679		if (((err = drv_priv(cred_p)) != EPERM) && arg != NULL) {
21680			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21681				err = ENOTSUP;
21682			} else {
21683				err = sd_mhdioc_inkeys(dev, (caddr_t)arg,
21684				    flag);
21685			}
21686		}
21687		break;
21688
21689	case MHIOCGRP_INRESV:
21690		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INRESV\n");
21691		if (((err = drv_priv(cred_p)) != EPERM) && arg != NULL) {
21692			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21693				err = ENOTSUP;
21694			} else {
21695				err = sd_mhdioc_inresv(dev, (caddr_t)arg, flag);
21696			}
21697		}
21698		break;
21699
21700	case MHIOCGRP_REGISTER:
21701		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_REGISTER\n");
21702		if ((err = drv_priv(cred_p)) != EPERM) {
21703			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21704				err = ENOTSUP;
21705			} else if (arg != NULL) {
21706				mhioc_register_t reg;
21707				if (ddi_copyin((void *)arg, &reg,
21708				    sizeof (mhioc_register_t), flag) != 0) {
21709					err = EFAULT;
21710				} else {
21711					err =
21712					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21713					    un, SD_SCSI3_REGISTER,
21714					    (uchar_t *)&reg);
21715				}
21716			}
21717		}
21718		break;
21719
21720	case MHIOCGRP_RESERVE:
21721		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_RESERVE\n");
21722		if ((err = drv_priv(cred_p)) != EPERM) {
21723			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21724				err = ENOTSUP;
21725			} else if (arg != NULL) {
21726				mhioc_resv_desc_t resv_desc;
21727				if (ddi_copyin((void *)arg, &resv_desc,
21728				    sizeof (mhioc_resv_desc_t), flag) != 0) {
21729					err = EFAULT;
21730				} else {
21731					err =
21732					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21733					    un, SD_SCSI3_RESERVE,
21734					    (uchar_t *)&resv_desc);
21735				}
21736			}
21737		}
21738		break;
21739
21740	case MHIOCGRP_PREEMPTANDABORT:
21741		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_PREEMPTANDABORT\n");
21742		if ((err = drv_priv(cred_p)) != EPERM) {
21743			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21744				err = ENOTSUP;
21745			} else if (arg != NULL) {
21746				mhioc_preemptandabort_t preempt_abort;
21747				if (ddi_copyin((void *)arg, &preempt_abort,
21748				    sizeof (mhioc_preemptandabort_t),
21749				    flag) != 0) {
21750					err = EFAULT;
21751				} else {
21752					err =
21753					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21754					    un, SD_SCSI3_PREEMPTANDABORT,
21755					    (uchar_t *)&preempt_abort);
21756				}
21757			}
21758		}
21759		break;
21760
21761	case MHIOCGRP_REGISTERANDIGNOREKEY:
21762		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_PREEMPTANDABORT\n");
21763		if ((err = drv_priv(cred_p)) != EPERM) {
21764			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21765				err = ENOTSUP;
21766			} else if (arg != NULL) {
21767				mhioc_registerandignorekey_t r_and_i;
21768				if (ddi_copyin((void *)arg, (void *)&r_and_i,
21769				    sizeof (mhioc_registerandignorekey_t),
21770				    flag) != 0) {
21771					err = EFAULT;
21772				} else {
21773					err =
21774					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21775					    un, SD_SCSI3_REGISTERANDIGNOREKEY,
21776					    (uchar_t *)&r_and_i);
21777				}
21778			}
21779		}
21780		break;
21781
21782	case USCSICMD:
21783		SD_TRACE(SD_LOG_IOCTL, un, "USCSICMD\n");
21784		cr = ddi_get_cred();
21785		if ((drv_priv(cred_p) != 0) && (drv_priv(cr) != 0)) {
21786			err = EPERM;
21787		} else {
21788			err = sd_uscsi_ioctl(dev, (caddr_t)arg, flag);
21789		}
21790		break;
21791
21792	case CDROMPAUSE:
21793	case CDROMRESUME:
21794		SD_TRACE(SD_LOG_IOCTL, un, "PAUSE-RESUME\n");
21795		if (!ISCD(un)) {
21796			err = ENOTTY;
21797		} else {
21798			err = sr_pause_resume(dev, cmd);
21799		}
21800		break;
21801
21802	case CDROMPLAYMSF:
21803		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYMSF\n");
21804		if (!ISCD(un)) {
21805			err = ENOTTY;
21806		} else {
21807			err = sr_play_msf(dev, (caddr_t)arg, flag);
21808		}
21809		break;
21810
21811	case CDROMPLAYTRKIND:
21812		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYTRKIND\n");
21813#if defined(__i386) || defined(__amd64)
21814		/*
21815		 * not supported on ATAPI CD drives, use CDROMPLAYMSF instead
21816		 */
21817		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
21818#else
21819		if (!ISCD(un)) {
21820#endif
21821			err = ENOTTY;
21822		} else {
21823			err = sr_play_trkind(dev, (caddr_t)arg, flag);
21824		}
21825		break;
21826
21827	case CDROMREADTOCHDR:
21828		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCHDR\n");
21829		if (!ISCD(un)) {
21830			err = ENOTTY;
21831		} else {
21832			err = sr_read_tochdr(dev, (caddr_t)arg, flag);
21833		}
21834		break;
21835
21836	case CDROMREADTOCENTRY:
21837		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCENTRY\n");
21838		if (!ISCD(un)) {
21839			err = ENOTTY;
21840		} else {
21841			err = sr_read_tocentry(dev, (caddr_t)arg, flag);
21842		}
21843		break;
21844
21845	case CDROMSTOP:
21846		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTOP\n");
21847		if (!ISCD(un)) {
21848			err = ENOTTY;
21849		} else {
21850			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_STOP,
21851			    SD_PATH_STANDARD);
21852		}
21853		break;
21854
21855	case CDROMSTART:
21856		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTART\n");
21857		if (!ISCD(un)) {
21858			err = ENOTTY;
21859		} else {
21860			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
21861			    SD_PATH_STANDARD);
21862		}
21863		break;
21864
21865	case CDROMCLOSETRAY:
21866		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCLOSETRAY\n");
21867		if (!ISCD(un)) {
21868			err = ENOTTY;
21869		} else {
21870			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_CLOSE,
21871			    SD_PATH_STANDARD);
21872		}
21873		break;
21874
21875	case FDEJECT:	/* for eject command */
21876	case DKIOCEJECT:
21877	case CDROMEJECT:
21878		SD_TRACE(SD_LOG_IOCTL, un, "EJECT\n");
21879		if (!un->un_f_eject_media_supported) {
21880			err = ENOTTY;
21881		} else {
21882			err = sr_eject(dev);
21883		}
21884		break;
21885
21886	case CDROMVOLCTRL:
21887		SD_TRACE(SD_LOG_IOCTL, un, "CDROMVOLCTRL\n");
21888		if (!ISCD(un)) {
21889			err = ENOTTY;
21890		} else {
21891			err = sr_volume_ctrl(dev, (caddr_t)arg, flag);
21892		}
21893		break;
21894
21895	case CDROMSUBCHNL:
21896		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCHNL\n");
21897		if (!ISCD(un)) {
21898			err = ENOTTY;
21899		} else {
21900			err = sr_read_subchannel(dev, (caddr_t)arg, flag);
21901		}
21902		break;
21903
21904	case CDROMREADMODE2:
21905		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE2\n");
21906		if (!ISCD(un)) {
21907			err = ENOTTY;
21908		} else if (un->un_f_cfg_is_atapi == TRUE) {
21909			/*
21910			 * If the drive supports READ CD, use that instead of
21911			 * switching the LBA size via a MODE SELECT
21912			 * Block Descriptor
21913			 */
21914			err = sr_read_cd_mode2(dev, (caddr_t)arg, flag);
21915		} else {
21916			err = sr_read_mode2(dev, (caddr_t)arg, flag);
21917		}
21918		break;
21919
21920	case CDROMREADMODE1:
21921		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE1\n");
21922		if (!ISCD(un)) {
21923			err = ENOTTY;
21924		} else {
21925			err = sr_read_mode1(dev, (caddr_t)arg, flag);
21926		}
21927		break;
21928
21929	case CDROMREADOFFSET:
21930		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADOFFSET\n");
21931		if (!ISCD(un)) {
21932			err = ENOTTY;
21933		} else {
21934			err = sr_read_sony_session_offset(dev, (caddr_t)arg,
21935			    flag);
21936		}
21937		break;
21938
21939	case CDROMSBLKMODE:
21940		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSBLKMODE\n");
21941		/*
21942		 * There is no means of changing block size in case of atapi
21943		 * drives, thus return ENOTTY if drive type is atapi
21944		 */
21945		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
21946			err = ENOTTY;
21947		} else if (un->un_f_mmc_cap == TRUE) {
21948
21949			/*
21950			 * MMC Devices do not support changing the
21951			 * logical block size
21952			 *
21953			 * Note: EINVAL is being returned instead of ENOTTY to
21954			 * maintain consistancy with the original mmc
21955			 * driver update.
21956			 */
21957			err = EINVAL;
21958		} else {
21959			mutex_enter(SD_MUTEX(un));
21960			if ((!(un->un_exclopen & (1<<SDPART(dev)))) ||
21961			    (un->un_ncmds_in_transport > 0)) {
21962				mutex_exit(SD_MUTEX(un));
21963				err = EINVAL;
21964			} else {
21965				mutex_exit(SD_MUTEX(un));
21966				err = sr_change_blkmode(dev, cmd, arg, flag);
21967			}
21968		}
21969		break;
21970
21971	case CDROMGBLKMODE:
21972		SD_TRACE(SD_LOG_IOCTL, un, "CDROMGBLKMODE\n");
21973		if (!ISCD(un)) {
21974			err = ENOTTY;
21975		} else if ((un->un_f_cfg_is_atapi != FALSE) &&
21976		    (un->un_f_blockcount_is_valid != FALSE)) {
21977			/*
21978			 * Drive is an ATAPI drive so return target block
21979			 * size for ATAPI drives since we cannot change the
21980			 * blocksize on ATAPI drives. Used primarily to detect
21981			 * if an ATAPI cdrom is present.
21982			 */
21983			if (ddi_copyout(&un->un_tgt_blocksize, (void *)arg,
21984			    sizeof (int), flag) != 0) {
21985				err = EFAULT;
21986			} else {
21987				err = 0;
21988			}
21989
21990		} else {
21991			/*
21992			 * Drive supports changing block sizes via a Mode
21993			 * Select.
21994			 */
21995			err = sr_change_blkmode(dev, cmd, arg, flag);
21996		}
21997		break;
21998
21999	case CDROMGDRVSPEED:
22000	case CDROMSDRVSPEED:
22001		SD_TRACE(SD_LOG_IOCTL, un, "CDROMXDRVSPEED\n");
22002		if (!ISCD(un)) {
22003			err = ENOTTY;
22004		} else if (un->un_f_mmc_cap == TRUE) {
22005			/*
22006			 * Note: In the future the driver implementation
22007			 * for getting and
22008			 * setting cd speed should entail:
22009			 * 1) If non-mmc try the Toshiba mode page
22010			 *    (sr_change_speed)
22011			 * 2) If mmc but no support for Real Time Streaming try
22012			 *    the SET CD SPEED (0xBB) command
22013			 *   (sr_atapi_change_speed)
22014			 * 3) If mmc and support for Real Time Streaming
22015			 *    try the GET PERFORMANCE and SET STREAMING
22016			 *    commands (not yet implemented, 4380808)
22017			 */
22018			/*
22019			 * As per recent MMC spec, CD-ROM speed is variable
22020			 * and changes with LBA. Since there is no such
22021			 * things as drive speed now, fail this ioctl.
22022			 *
22023			 * Note: EINVAL is returned for consistancy of original
22024			 * implementation which included support for getting
22025			 * the drive speed of mmc devices but not setting
22026			 * the drive speed. Thus EINVAL would be returned
22027			 * if a set request was made for an mmc device.
22028			 * We no longer support get or set speed for
22029			 * mmc but need to remain consistant with regard
22030			 * to the error code returned.
22031			 */
22032			err = EINVAL;
22033		} else if (un->un_f_cfg_is_atapi == TRUE) {
22034			err = sr_atapi_change_speed(dev, cmd, arg, flag);
22035		} else {
22036			err = sr_change_speed(dev, cmd, arg, flag);
22037		}
22038		break;
22039
22040	case CDROMCDDA:
22041		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDDA\n");
22042		if (!ISCD(un)) {
22043			err = ENOTTY;
22044		} else {
22045			err = sr_read_cdda(dev, (void *)arg, flag);
22046		}
22047		break;
22048
22049	case CDROMCDXA:
22050		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDXA\n");
22051		if (!ISCD(un)) {
22052			err = ENOTTY;
22053		} else {
22054			err = sr_read_cdxa(dev, (caddr_t)arg, flag);
22055		}
22056		break;
22057
22058	case CDROMSUBCODE:
22059		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCODE\n");
22060		if (!ISCD(un)) {
22061			err = ENOTTY;
22062		} else {
22063			err = sr_read_all_subcodes(dev, (caddr_t)arg, flag);
22064		}
22065		break;
22066
22067	case DKIOCPARTINFO: {
22068		/*
22069		 * Return parameters describing the selected disk slice.
22070		 * Note: this ioctl is for the intel platform only
22071		 */
22072#if defined(__i386) || defined(__amd64)
22073		int part;
22074
22075		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCPARTINFO\n");
22076		part = SDPART(dev);
22077
22078		/* don't check un_solaris_size for pN */
22079		if (part < P0_RAW_DISK && un->un_solaris_size == 0) {
22080			err = EIO;
22081		} else {
22082			struct part_info p;
22083
22084			p.p_start = (daddr_t)un->un_offset[part];
22085			p.p_length = (int)un->un_map[part].dkl_nblk;
22086#ifdef _MULTI_DATAMODEL
22087			switch (ddi_model_convert_from(flag & FMODELS)) {
22088			case DDI_MODEL_ILP32:
22089			{
22090				struct part_info32 p32;
22091
22092				p32.p_start = (daddr32_t)p.p_start;
22093				p32.p_length = p.p_length;
22094				if (ddi_copyout(&p32, (void *)arg,
22095				    sizeof (p32), flag))
22096					err = EFAULT;
22097				break;
22098			}
22099
22100			case DDI_MODEL_NONE:
22101			{
22102				if (ddi_copyout(&p, (void *)arg, sizeof (p),
22103				    flag))
22104					err = EFAULT;
22105				break;
22106			}
22107			}
22108#else /* ! _MULTI_DATAMODEL */
22109			if (ddi_copyout(&p, (void *)arg, sizeof (p), flag))
22110				err = EFAULT;
22111#endif /* _MULTI_DATAMODEL */
22112		}
22113#else
22114		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCPARTINFO\n");
22115		err = ENOTTY;
22116#endif
22117		break;
22118	}
22119
22120	case DKIOCG_PHYGEOM: {
22121		/* Return the driver's notion of the media physical geometry */
22122#if defined(__i386) || defined(__amd64)
22123		uint64_t	capacity;
22124		struct dk_geom	disk_geom;
22125		struct dk_geom	*dkgp = &disk_geom;
22126
22127		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCG_PHYGEOM\n");
22128		mutex_enter(SD_MUTEX(un));
22129
22130		if (un->un_g.dkg_nhead != 0 &&
22131		    un->un_g.dkg_nsect != 0) {
22132			/*
22133			 * We succeeded in getting a geometry, but
22134			 * right now it is being reported as just the
22135			 * Solaris fdisk partition, just like for
22136			 * DKIOCGGEOM. We need to change that to be
22137			 * correct for the entire disk now.
22138			 */
22139			bcopy(&un->un_g, dkgp, sizeof (*dkgp));
22140			dkgp->dkg_acyl = 0;
22141			dkgp->dkg_ncyl = un->un_blockcount /
22142			    (dkgp->dkg_nhead * dkgp->dkg_nsect);
22143		} else {
22144			bzero(dkgp, sizeof (struct dk_geom));
22145			/*
22146			 * This disk does not have a Solaris VTOC
22147			 * so we must present a physical geometry
22148			 * that will remain consistent regardless
22149			 * of how the disk is used. This will ensure
22150			 * that the geometry does not change regardless
22151			 * of the fdisk partition type (ie. EFI, FAT32,
22152			 * Solaris, etc).
22153			 */
22154			if (ISCD(un)) {
22155				dkgp->dkg_nhead = un->un_pgeom.g_nhead;
22156				dkgp->dkg_nsect = un->un_pgeom.g_nsect;
22157				dkgp->dkg_ncyl = un->un_pgeom.g_ncyl;
22158				dkgp->dkg_acyl = un->un_pgeom.g_acyl;
22159			} else {
22160				/*
22161				 * Invalid un_blockcount can generate invalid
22162				 * dk_geom and may result in division by zero
22163				 * system failure. Should make sure blockcount
22164				 * is valid before using it here.
22165				 */
22166				if (un->un_f_blockcount_is_valid == FALSE) {
22167					mutex_exit(SD_MUTEX(un));
22168					err = EIO;
22169
22170					break;
22171				}
22172
22173				/*
22174				 * Refer to comments related to off-by-1 at the
22175				 * header of this file
22176				 */
22177				if (!un->un_f_capacity_adjusted &&
22178					!un->un_f_has_removable_media &&
22179				    !un->un_f_is_hotpluggable &&
22180					(un->un_tgt_blocksize ==
22181					un->un_sys_blocksize))
22182					capacity = un->un_blockcount - 1;
22183				else
22184					capacity = un->un_blockcount;
22185
22186				sd_convert_geometry(capacity, dkgp);
22187				dkgp->dkg_acyl = 0;
22188				dkgp->dkg_ncyl = capacity /
22189				    (dkgp->dkg_nhead * dkgp->dkg_nsect);
22190			}
22191		}
22192		dkgp->dkg_pcyl = dkgp->dkg_ncyl + dkgp->dkg_acyl;
22193
22194		if (ddi_copyout(dkgp, (void *)arg,
22195		    sizeof (struct dk_geom), flag)) {
22196			mutex_exit(SD_MUTEX(un));
22197			err = EFAULT;
22198		} else {
22199			mutex_exit(SD_MUTEX(un));
22200			err = 0;
22201		}
22202#else
22203		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCG_PHYGEOM\n");
22204		err = ENOTTY;
22205#endif
22206		break;
22207	}
22208
22209	case DKIOCG_VIRTGEOM: {
22210		/* Return the driver's notion of the media's logical geometry */
22211#if defined(__i386) || defined(__amd64)
22212		struct dk_geom	disk_geom;
22213		struct dk_geom	*dkgp = &disk_geom;
22214
22215		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCG_VIRTGEOM\n");
22216		mutex_enter(SD_MUTEX(un));
22217		/*
22218		 * If there is no HBA geometry available, or
22219		 * if the HBA returned us something that doesn't
22220		 * really fit into an Int 13/function 8 geometry
22221		 * result, just fail the ioctl.  See PSARC 1998/313.
22222		 */
22223		if (un->un_lgeom.g_nhead == 0 ||
22224		    un->un_lgeom.g_nsect == 0 ||
22225		    un->un_lgeom.g_ncyl > 1024) {
22226			mutex_exit(SD_MUTEX(un));
22227			err = EINVAL;
22228		} else {
22229			dkgp->dkg_ncyl	= un->un_lgeom.g_ncyl;
22230			dkgp->dkg_acyl	= un->un_lgeom.g_acyl;
22231			dkgp->dkg_pcyl	= dkgp->dkg_ncyl + dkgp->dkg_acyl;
22232			dkgp->dkg_nhead	= un->un_lgeom.g_nhead;
22233			dkgp->dkg_nsect	= un->un_lgeom.g_nsect;
22234
22235			if (ddi_copyout(dkgp, (void *)arg,
22236			    sizeof (struct dk_geom), flag)) {
22237				mutex_exit(SD_MUTEX(un));
22238				err = EFAULT;
22239			} else {
22240				mutex_exit(SD_MUTEX(un));
22241				err = 0;
22242			}
22243		}
22244#else
22245		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCG_VIRTGEOM\n");
22246		err = ENOTTY;
22247#endif
22248		break;
22249	}
22250#ifdef SDDEBUG
22251/* RESET/ABORTS testing ioctls */
22252	case DKIOCRESET: {
22253		int	reset_level;
22254
22255		if (ddi_copyin((void *)arg, &reset_level, sizeof (int), flag)) {
22256			err = EFAULT;
22257		} else {
22258			SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCRESET: "
22259			    "reset_level = 0x%lx\n", reset_level);
22260			if (scsi_reset(SD_ADDRESS(un), reset_level)) {
22261				err = 0;
22262			} else {
22263				err = EIO;
22264			}
22265		}
22266		break;
22267	}
22268
22269	case DKIOCABORT:
22270		SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCABORT:\n");
22271		if (scsi_abort(SD_ADDRESS(un), NULL)) {
22272			err = 0;
22273		} else {
22274			err = EIO;
22275		}
22276		break;
22277#endif
22278
22279#ifdef SD_FAULT_INJECTION
22280/* SDIOC FaultInjection testing ioctls */
22281	case SDIOCSTART:
22282	case SDIOCSTOP:
22283	case SDIOCINSERTPKT:
22284	case SDIOCINSERTXB:
22285	case SDIOCINSERTUN:
22286	case SDIOCINSERTARQ:
22287	case SDIOCPUSH:
22288	case SDIOCRETRIEVE:
22289	case SDIOCRUN:
22290		SD_INFO(SD_LOG_SDTEST, un, "sdioctl:"
22291		    "SDIOC detected cmd:0x%X:\n", cmd);
22292		/* call error generator */
22293		sd_faultinjection_ioctl(cmd, arg, un);
22294		err = 0;
22295		break;
22296
22297#endif /* SD_FAULT_INJECTION */
22298
22299	case DKIOCFLUSHWRITECACHE:
22300		{
22301			struct dk_callback *dkc = (struct dk_callback *)arg;
22302
22303			mutex_enter(SD_MUTEX(un));
22304			if (!un->un_f_sync_cache_supported ||
22305			    !un->un_f_write_cache_enabled) {
22306				err = un->un_f_sync_cache_supported ?
22307					0 : ENOTSUP;
22308				mutex_exit(SD_MUTEX(un));
22309				if ((flag & FKIOCTL) && dkc != NULL &&
22310				    dkc->dkc_callback != NULL) {
22311					(*dkc->dkc_callback)(dkc->dkc_cookie,
22312					    err);
22313					/*
22314					 * Did callback and reported error.
22315					 * Since we did a callback, ioctl
22316					 * should return 0.
22317					 */
22318					err = 0;
22319				}
22320				break;
22321			}
22322			mutex_exit(SD_MUTEX(un));
22323
22324			if ((flag & FKIOCTL) && dkc != NULL &&
22325			    dkc->dkc_callback != NULL) {
22326				/* async SYNC CACHE request */
22327				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, dkc);
22328			} else {
22329				/* synchronous SYNC CACHE request */
22330				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, NULL);
22331			}
22332		}
22333		break;
22334
22335	case DKIOCGETWCE: {
22336
22337		int wce;
22338
22339		if ((err = sd_get_write_cache_enabled(un, &wce)) != 0) {
22340			break;
22341		}
22342
22343		if (ddi_copyout(&wce, (void *)arg, sizeof (wce), flag)) {
22344			err = EFAULT;
22345		}
22346		break;
22347	}
22348
22349	case DKIOCSETWCE: {
22350
22351		int wce, sync_supported;
22352
22353		if (ddi_copyin((void *)arg, &wce, sizeof (wce), flag)) {
22354			err = EFAULT;
22355			break;
22356		}
22357
22358		/*
22359		 * Synchronize multiple threads trying to enable
22360		 * or disable the cache via the un_f_wcc_cv
22361		 * condition variable.
22362		 */
22363		mutex_enter(SD_MUTEX(un));
22364
22365		/*
22366		 * Don't allow the cache to be enabled if the
22367		 * config file has it disabled.
22368		 */
22369		if (un->un_f_opt_disable_cache && wce) {
22370			mutex_exit(SD_MUTEX(un));
22371			err = EINVAL;
22372			break;
22373		}
22374
22375		/*
22376		 * Wait for write cache change in progress
22377		 * bit to be clear before proceeding.
22378		 */
22379		while (un->un_f_wcc_inprog)
22380			cv_wait(&un->un_wcc_cv, SD_MUTEX(un));
22381
22382		un->un_f_wcc_inprog = 1;
22383
22384		if (un->un_f_write_cache_enabled && wce == 0) {
22385			/*
22386			 * Disable the write cache.  Don't clear
22387			 * un_f_write_cache_enabled until after
22388			 * the mode select and flush are complete.
22389			 */
22390			sync_supported = un->un_f_sync_cache_supported;
22391			mutex_exit(SD_MUTEX(un));
22392			if ((err = sd_cache_control(un, SD_CACHE_NOCHANGE,
22393			    SD_CACHE_DISABLE)) == 0 && sync_supported) {
22394				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, NULL);
22395			}
22396
22397			mutex_enter(SD_MUTEX(un));
22398			if (err == 0) {
22399				un->un_f_write_cache_enabled = 0;
22400			}
22401
22402		} else if (!un->un_f_write_cache_enabled && wce != 0) {
22403			/*
22404			 * Set un_f_write_cache_enabled first, so there is
22405			 * no window where the cache is enabled, but the
22406			 * bit says it isn't.
22407			 */
22408			un->un_f_write_cache_enabled = 1;
22409			mutex_exit(SD_MUTEX(un));
22410
22411			err = sd_cache_control(un, SD_CACHE_NOCHANGE,
22412				SD_CACHE_ENABLE);
22413
22414			mutex_enter(SD_MUTEX(un));
22415
22416			if (err) {
22417				un->un_f_write_cache_enabled = 0;
22418			}
22419		}
22420
22421		un->un_f_wcc_inprog = 0;
22422		cv_broadcast(&un->un_wcc_cv);
22423		mutex_exit(SD_MUTEX(un));
22424		break;
22425	}
22426
22427	default:
22428		err = ENOTTY;
22429		break;
22430	}
22431	mutex_enter(SD_MUTEX(un));
22432	un->un_ncmds_in_driver--;
22433	ASSERT(un->un_ncmds_in_driver >= 0);
22434	mutex_exit(SD_MUTEX(un));
22435
22436	SD_TRACE(SD_LOG_IOCTL, un, "sdioctl: exit: %d\n", err);
22437	return (err);
22438}
22439
22440
22441/*
22442 *    Function: sd_uscsi_ioctl
22443 *
22444 * Description: This routine is the driver entry point for handling USCSI ioctl
22445 *		requests (USCSICMD).
22446 *
22447 *   Arguments: dev	- the device number
22448 *		arg	- user provided scsi command
22449 *		flag	- this argument is a pass through to ddi_copyxxx()
22450 *			  directly from the mode argument of ioctl().
22451 *
22452 * Return Code: code returned by sd_send_scsi_cmd
22453 *		ENXIO
22454 *		EFAULT
22455 *		EAGAIN
22456 */
22457
22458static int
22459sd_uscsi_ioctl(dev_t dev, caddr_t arg, int flag)
22460{
22461#ifdef _MULTI_DATAMODEL
22462	/*
22463	 * For use when a 32 bit app makes a call into a
22464	 * 64 bit ioctl
22465	 */
22466	struct uscsi_cmd32	uscsi_cmd_32_for_64;
22467	struct uscsi_cmd32	*ucmd32 = &uscsi_cmd_32_for_64;
22468	model_t			model;
22469#endif /* _MULTI_DATAMODEL */
22470	struct uscsi_cmd	*scmd = NULL;
22471	struct sd_lun		*un = NULL;
22472	enum uio_seg		uioseg;
22473	char			cdb[CDB_GROUP0];
22474	int			rval = 0;
22475
22476	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22477		return (ENXIO);
22478	}
22479
22480	SD_TRACE(SD_LOG_IOCTL, un, "sd_uscsi_ioctl: entry: un:0x%p\n", un);
22481
22482	scmd = (struct uscsi_cmd *)
22483	    kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
22484
22485#ifdef _MULTI_DATAMODEL
22486	switch (model = ddi_model_convert_from(flag & FMODELS)) {
22487	case DDI_MODEL_ILP32:
22488	{
22489		if (ddi_copyin((void *)arg, ucmd32, sizeof (*ucmd32), flag)) {
22490			rval = EFAULT;
22491			goto done;
22492		}
22493		/*
22494		 * Convert the ILP32 uscsi data from the
22495		 * application to LP64 for internal use.
22496		 */
22497		uscsi_cmd32touscsi_cmd(ucmd32, scmd);
22498		break;
22499	}
22500	case DDI_MODEL_NONE:
22501		if (ddi_copyin((void *)arg, scmd, sizeof (*scmd), flag)) {
22502			rval = EFAULT;
22503			goto done;
22504		}
22505		break;
22506	}
22507#else /* ! _MULTI_DATAMODEL */
22508	if (ddi_copyin((void *)arg, scmd, sizeof (*scmd), flag)) {
22509		rval = EFAULT;
22510		goto done;
22511	}
22512#endif /* _MULTI_DATAMODEL */
22513
22514	scmd->uscsi_flags &= ~USCSI_NOINTR;
22515	uioseg = (flag & FKIOCTL) ? UIO_SYSSPACE : UIO_USERSPACE;
22516	if (un->un_f_format_in_progress == TRUE) {
22517		rval = EAGAIN;
22518		goto done;
22519	}
22520
22521	/*
22522	 * Gotta do the ddi_copyin() here on the uscsi_cdb so that
22523	 * we will have a valid cdb[0] to test.
22524	 */
22525	if ((ddi_copyin(scmd->uscsi_cdb, cdb, CDB_GROUP0, flag) == 0) &&
22526	    (cdb[0] == SCMD_FORMAT)) {
22527		SD_TRACE(SD_LOG_IOCTL, un,
22528		    "sd_uscsi_ioctl: scmd->uscsi_cdb 0x%x\n", cdb[0]);
22529		mutex_enter(SD_MUTEX(un));
22530		un->un_f_format_in_progress = TRUE;
22531		mutex_exit(SD_MUTEX(un));
22532		rval = sd_send_scsi_cmd(dev, scmd, uioseg, uioseg, uioseg,
22533		    SD_PATH_STANDARD);
22534		mutex_enter(SD_MUTEX(un));
22535		un->un_f_format_in_progress = FALSE;
22536		mutex_exit(SD_MUTEX(un));
22537	} else {
22538		SD_TRACE(SD_LOG_IOCTL, un,
22539		    "sd_uscsi_ioctl: scmd->uscsi_cdb 0x%x\n", cdb[0]);
22540		/*
22541		 * It's OK to fall into here even if the ddi_copyin()
22542		 * on the uscsi_cdb above fails, because sd_send_scsi_cmd()
22543		 * does this same copyin and will return the EFAULT
22544		 * if it fails.
22545		 */
22546		rval = sd_send_scsi_cmd(dev, scmd, uioseg, uioseg, uioseg,
22547		    SD_PATH_STANDARD);
22548	}
22549#ifdef _MULTI_DATAMODEL
22550	switch (model) {
22551	case DDI_MODEL_ILP32:
22552		/*
22553		 * Convert back to ILP32 before copyout to the
22554		 * application
22555		 */
22556		uscsi_cmdtouscsi_cmd32(scmd, ucmd32);
22557		if (ddi_copyout(ucmd32, (void *)arg, sizeof (*ucmd32), flag)) {
22558			if (rval != 0) {
22559				rval = EFAULT;
22560			}
22561		}
22562		break;
22563	case DDI_MODEL_NONE:
22564		if (ddi_copyout(scmd, (void *)arg, sizeof (*scmd), flag)) {
22565			if (rval != 0) {
22566				rval = EFAULT;
22567			}
22568		}
22569		break;
22570	}
22571#else /* ! _MULTI_DATAMODE */
22572	if (ddi_copyout(scmd, (void *)arg, sizeof (*scmd), flag)) {
22573		if (rval != 0) {
22574			rval = EFAULT;
22575		}
22576	}
22577#endif /* _MULTI_DATAMODE */
22578done:
22579	kmem_free(scmd, sizeof (struct uscsi_cmd));
22580
22581	SD_TRACE(SD_LOG_IOCTL, un, "sd_uscsi_ioctl: exit: un:0x%p\n", un);
22582
22583	return (rval);
22584}
22585
22586
22587/*
22588 *    Function: sd_dkio_ctrl_info
22589 *
22590 * Description: This routine is the driver entry point for handling controller
22591 *		information ioctl requests (DKIOCINFO).
22592 *
22593 *   Arguments: dev  - the device number
22594 *		arg  - pointer to user provided dk_cinfo structure
22595 *		       specifying the controller type and attributes.
22596 *		flag - this argument is a pass through to ddi_copyxxx()
22597 *		       directly from the mode argument of ioctl().
22598 *
22599 * Return Code: 0
22600 *		EFAULT
22601 *		ENXIO
22602 */
22603
22604static int
22605sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag)
22606{
22607	struct sd_lun	*un = NULL;
22608	struct dk_cinfo	*info;
22609	dev_info_t	*pdip;
22610	int		lun, tgt;
22611
22612	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22613		return (ENXIO);
22614	}
22615
22616	info = (struct dk_cinfo *)
22617		kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP);
22618
22619	switch (un->un_ctype) {
22620	case CTYPE_CDROM:
22621		info->dki_ctype = DKC_CDROM;
22622		break;
22623	default:
22624		info->dki_ctype = DKC_SCSI_CCS;
22625		break;
22626	}
22627	pdip = ddi_get_parent(SD_DEVINFO(un));
22628	info->dki_cnum = ddi_get_instance(pdip);
22629	if (strlen(ddi_get_name(pdip)) < DK_DEVLEN) {
22630		(void) strcpy(info->dki_cname, ddi_get_name(pdip));
22631	} else {
22632		(void) strncpy(info->dki_cname, ddi_node_name(pdip),
22633		    DK_DEVLEN - 1);
22634	}
22635
22636	lun = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
22637	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_LUN, 0);
22638	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
22639	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_TARGET, 0);
22640
22641	/* Unit Information */
22642	info->dki_unit = ddi_get_instance(SD_DEVINFO(un));
22643	info->dki_slave = ((tgt << 3) | lun);
22644	(void) strncpy(info->dki_dname, ddi_driver_name(SD_DEVINFO(un)),
22645	    DK_DEVLEN - 1);
22646	info->dki_flags = DKI_FMTVOL;
22647	info->dki_partition = SDPART(dev);
22648
22649	/* Max Transfer size of this device in blocks */
22650	info->dki_maxtransfer = un->un_max_xfer_size / un->un_sys_blocksize;
22651	info->dki_addr = 0;
22652	info->dki_space = 0;
22653	info->dki_prio = 0;
22654	info->dki_vec = 0;
22655
22656	if (ddi_copyout(info, arg, sizeof (struct dk_cinfo), flag) != 0) {
22657		kmem_free(info, sizeof (struct dk_cinfo));
22658		return (EFAULT);
22659	} else {
22660		kmem_free(info, sizeof (struct dk_cinfo));
22661		return (0);
22662	}
22663}
22664
22665
22666/*
22667 *    Function: sd_get_media_info
22668 *
22669 * Description: This routine is the driver entry point for handling ioctl
22670 *		requests for the media type or command set profile used by the
22671 *		drive to operate on the media (DKIOCGMEDIAINFO).
22672 *
22673 *   Arguments: dev	- the device number
22674 *		arg	- pointer to user provided dk_minfo structure
22675 *			  specifying the media type, logical block size and
22676 *			  drive capacity.
22677 *		flag	- this argument is a pass through to ddi_copyxxx()
22678 *			  directly from the mode argument of ioctl().
22679 *
22680 * Return Code: 0
22681 *		EACCESS
22682 *		EFAULT
22683 *		ENXIO
22684 *		EIO
22685 */
22686
22687static int
22688sd_get_media_info(dev_t dev, caddr_t arg, int flag)
22689{
22690	struct sd_lun		*un = NULL;
22691	struct uscsi_cmd	com;
22692	struct scsi_inquiry	*sinq;
22693	struct dk_minfo		media_info;
22694	u_longlong_t		media_capacity;
22695	uint64_t		capacity;
22696	uint_t			lbasize;
22697	uchar_t			*out_data;
22698	uchar_t			*rqbuf;
22699	int			rval = 0;
22700	int			rtn;
22701
22702	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
22703	    (un->un_state == SD_STATE_OFFLINE)) {
22704		return (ENXIO);
22705	}
22706
22707	SD_TRACE(SD_LOG_IOCTL_DKIO, un, "sd_get_media_info: entry\n");
22708
22709	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
22710	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
22711
22712	/* Issue a TUR to determine if the drive is ready with media present */
22713	rval = sd_send_scsi_TEST_UNIT_READY(un, SD_CHECK_FOR_MEDIA);
22714	if (rval == ENXIO) {
22715		goto done;
22716	}
22717
22718	/* Now get configuration data */
22719	if (ISCD(un)) {
22720		media_info.dki_media_type = DK_CDROM;
22721
22722		/* Allow SCMD_GET_CONFIGURATION to MMC devices only */
22723		if (un->un_f_mmc_cap == TRUE) {
22724			rtn = sd_send_scsi_GET_CONFIGURATION(un, &com, rqbuf,
22725				SENSE_LENGTH, out_data, SD_PROFILE_HEADER_LEN);
22726
22727			if (rtn) {
22728				/*
22729				 * Failed for other than an illegal request
22730				 * or command not supported
22731				 */
22732				if ((com.uscsi_status == STATUS_CHECK) &&
22733				    (com.uscsi_rqstatus == STATUS_GOOD)) {
22734					if ((rqbuf[2] != KEY_ILLEGAL_REQUEST) ||
22735					    (rqbuf[12] != 0x20)) {
22736						rval = EIO;
22737						goto done;
22738					}
22739				}
22740			} else {
22741				/*
22742				 * The GET CONFIGURATION command succeeded
22743				 * so set the media type according to the
22744				 * returned data
22745				 */
22746				media_info.dki_media_type = out_data[6];
22747				media_info.dki_media_type <<= 8;
22748				media_info.dki_media_type |= out_data[7];
22749			}
22750		}
22751	} else {
22752		/*
22753		 * The profile list is not available, so we attempt to identify
22754		 * the media type based on the inquiry data
22755		 */
22756		sinq = un->un_sd->sd_inq;
22757		if (sinq->inq_qual == 0) {
22758			/* This is a direct access device */
22759			media_info.dki_media_type = DK_FIXED_DISK;
22760
22761			if ((bcmp(sinq->inq_vid, "IOMEGA", 6) == 0) ||
22762			    (bcmp(sinq->inq_vid, "iomega", 6) == 0)) {
22763				if ((bcmp(sinq->inq_pid, "ZIP", 3) == 0)) {
22764					media_info.dki_media_type = DK_ZIP;
22765				} else if (
22766				    (bcmp(sinq->inq_pid, "jaz", 3) == 0)) {
22767					media_info.dki_media_type = DK_JAZ;
22768				}
22769			}
22770		} else {
22771			/* Not a CD or direct access so return unknown media */
22772			media_info.dki_media_type = DK_UNKNOWN;
22773		}
22774	}
22775
22776	/* Now read the capacity so we can provide the lbasize and capacity */
22777	switch (sd_send_scsi_READ_CAPACITY(un, &capacity, &lbasize,
22778	    SD_PATH_DIRECT)) {
22779	case 0:
22780		break;
22781	case EACCES:
22782		rval = EACCES;
22783		goto done;
22784	default:
22785		rval = EIO;
22786		goto done;
22787	}
22788
22789	media_info.dki_lbsize = lbasize;
22790	media_capacity = capacity;
22791
22792	/*
22793	 * sd_send_scsi_READ_CAPACITY() reports capacity in
22794	 * un->un_sys_blocksize chunks. So we need to convert it into
22795	 * cap.lbasize chunks.
22796	 */
22797	media_capacity *= un->un_sys_blocksize;
22798	media_capacity /= lbasize;
22799	media_info.dki_capacity = media_capacity;
22800
22801	if (ddi_copyout(&media_info, arg, sizeof (struct dk_minfo), flag)) {
22802		rval = EFAULT;
22803		/* Put goto. Anybody might add some code below in future */
22804		goto done;
22805	}
22806done:
22807	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
22808	kmem_free(rqbuf, SENSE_LENGTH);
22809	return (rval);
22810}
22811
22812
22813/*
22814 *    Function: sd_dkio_get_geometry
22815 *
22816 * Description: This routine is the driver entry point for handling user
22817 *		requests to get the device geometry (DKIOCGGEOM).
22818 *
22819 *   Arguments: dev  - the device number
22820 *		arg  - pointer to user provided dk_geom structure specifying
22821 *			the controller's notion of the current geometry.
22822 *		flag - this argument is a pass through to ddi_copyxxx()
22823 *		       directly from the mode argument of ioctl().
22824 *		geom_validated - flag indicating if the device geometry has been
22825 *				 previously validated in the sdioctl routine.
22826 *
22827 * Return Code: 0
22828 *		EFAULT
22829 *		ENXIO
22830 *		EIO
22831 */
22832
22833static int
22834sd_dkio_get_geometry(dev_t dev, caddr_t arg, int flag, int geom_validated)
22835{
22836	struct sd_lun	*un = NULL;
22837	struct dk_geom	*tmp_geom = NULL;
22838	int		rval = 0;
22839
22840	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22841		return (ENXIO);
22842	}
22843
22844	if (geom_validated == FALSE) {
22845		/*
22846		 * sd_validate_geometry does not spin a disk up
22847		 * if it was spun down. We need to make sure it
22848		 * is ready.
22849		 */
22850		if ((rval = sd_send_scsi_TEST_UNIT_READY(un, 0)) != 0) {
22851			return (rval);
22852		}
22853		mutex_enter(SD_MUTEX(un));
22854		rval = sd_validate_geometry(un, SD_PATH_DIRECT);
22855		mutex_exit(SD_MUTEX(un));
22856	}
22857	if (rval)
22858		return (rval);
22859
22860	/*
22861	 * It is possible that un_solaris_size is 0(uninitialized)
22862	 * after sd_unit_attach. Reservation conflict may cause the
22863	 * above situation. Thus, the zero check of un_solaris_size
22864	 * should occur after the sd_validate_geometry() call.
22865	 */
22866#if defined(__i386) || defined(__amd64)
22867	if (un->un_solaris_size == 0) {
22868		return (EIO);
22869	}
22870#endif
22871
22872	/*
22873	 * Make a local copy of the soft state geometry to avoid some potential
22874	 * race conditions associated with holding the mutex and updating the
22875	 * write_reinstruct value
22876	 */
22877	tmp_geom = kmem_zalloc(sizeof (struct dk_geom), KM_SLEEP);
22878	mutex_enter(SD_MUTEX(un));
22879	bcopy(&un->un_g, tmp_geom, sizeof (struct dk_geom));
22880	mutex_exit(SD_MUTEX(un));
22881
22882	if (tmp_geom->dkg_write_reinstruct == 0) {
22883		tmp_geom->dkg_write_reinstruct =
22884		    (int)((int)(tmp_geom->dkg_nsect * tmp_geom->dkg_rpm *
22885		    sd_rot_delay) / (int)60000);
22886	}
22887
22888	rval = ddi_copyout(tmp_geom, (void *)arg, sizeof (struct dk_geom),
22889	    flag);
22890	if (rval != 0) {
22891		rval = EFAULT;
22892	}
22893
22894	kmem_free(tmp_geom, sizeof (struct dk_geom));
22895	return (rval);
22896
22897}
22898
22899
22900/*
22901 *    Function: sd_dkio_set_geometry
22902 *
22903 * Description: This routine is the driver entry point for handling user
22904 *		requests to set the device geometry (DKIOCSGEOM). The actual
22905 *		device geometry is not updated, just the driver "notion" of it.
22906 *
22907 *   Arguments: dev  - the device number
22908 *		arg  - pointer to user provided dk_geom structure used to set
22909 *			the controller's notion of the current geometry.
22910 *		flag - this argument is a pass through to ddi_copyxxx()
22911 *		       directly from the mode argument of ioctl().
22912 *
22913 * Return Code: 0
22914 *		EFAULT
22915 *		ENXIO
22916 *		EIO
22917 */
22918
22919static int
22920sd_dkio_set_geometry(dev_t dev, caddr_t arg, int flag)
22921{
22922	struct sd_lun	*un = NULL;
22923	struct dk_geom	*tmp_geom;
22924	struct dk_map	*lp;
22925	int		rval = 0;
22926	int		i;
22927
22928	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22929		return (ENXIO);
22930	}
22931
22932	/*
22933	 * Make sure there is no reservation conflict on the lun.
22934	 */
22935	if (sd_send_scsi_TEST_UNIT_READY(un, 0) == EACCES) {
22936		return (EACCES);
22937	}
22938
22939#if defined(__i386) || defined(__amd64)
22940	if (un->un_solaris_size == 0) {
22941		return (EIO);
22942	}
22943#endif
22944
22945	/*
22946	 * We need to copy the user specified geometry into local
22947	 * storage and then update the softstate. We don't want to hold
22948	 * the mutex and copyin directly from the user to the soft state
22949	 */
22950	tmp_geom = (struct dk_geom *)
22951	    kmem_zalloc(sizeof (struct dk_geom), KM_SLEEP);
22952	rval = ddi_copyin(arg, tmp_geom, sizeof (struct dk_geom), flag);
22953	if (rval != 0) {
22954		kmem_free(tmp_geom, sizeof (struct dk_geom));
22955		return (EFAULT);
22956	}
22957
22958	mutex_enter(SD_MUTEX(un));
22959	bcopy(tmp_geom, &un->un_g, sizeof (struct dk_geom));
22960	for (i = 0; i < NDKMAP; i++) {
22961		lp  = &un->un_map[i];
22962		un->un_offset[i] =
22963		    un->un_g.dkg_nhead * un->un_g.dkg_nsect * lp->dkl_cylno;
22964#if defined(__i386) || defined(__amd64)
22965		un->un_offset[i] += un->un_solaris_offset;
22966#endif
22967	}
22968	un->un_f_geometry_is_valid = FALSE;
22969	mutex_exit(SD_MUTEX(un));
22970	kmem_free(tmp_geom, sizeof (struct dk_geom));
22971
22972	return (rval);
22973}
22974
22975
22976/*
22977 *    Function: sd_dkio_get_partition
22978 *
22979 * Description: This routine is the driver entry point for handling user
22980 *		requests to get the partition table (DKIOCGAPART).
22981 *
22982 *   Arguments: dev  - the device number
22983 *		arg  - pointer to user provided dk_allmap structure specifying
22984 *			the controller's notion of the current partition table.
22985 *		flag - this argument is a pass through to ddi_copyxxx()
22986 *		       directly from the mode argument of ioctl().
22987 *		geom_validated - flag indicating if the device geometry has been
22988 *				 previously validated in the sdioctl routine.
22989 *
22990 * Return Code: 0
22991 *		EFAULT
22992 *		ENXIO
22993 *		EIO
22994 */
22995
22996static int
22997sd_dkio_get_partition(dev_t dev, caddr_t arg, int flag, int geom_validated)
22998{
22999	struct sd_lun	*un = NULL;
23000	int		rval = 0;
23001	int		size;
23002
23003	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23004		return (ENXIO);
23005	}
23006
23007	/*
23008	 * Make sure the geometry is valid before getting the partition
23009	 * information.
23010	 */
23011	mutex_enter(SD_MUTEX(un));
23012	if (geom_validated == FALSE) {
23013		/*
23014		 * sd_validate_geometry does not spin a disk up
23015		 * if it was spun down. We need to make sure it
23016		 * is ready before validating the geometry.
23017		 */
23018		mutex_exit(SD_MUTEX(un));
23019		if ((rval = sd_send_scsi_TEST_UNIT_READY(un, 0)) != 0) {
23020			return (rval);
23021		}
23022		mutex_enter(SD_MUTEX(un));
23023
23024		if ((rval = sd_validate_geometry(un, SD_PATH_DIRECT)) != 0) {
23025			mutex_exit(SD_MUTEX(un));
23026			return (rval);
23027		}
23028	}
23029	mutex_exit(SD_MUTEX(un));
23030
23031	/*
23032	 * It is possible that un_solaris_size is 0(uninitialized)
23033	 * after sd_unit_attach. Reservation conflict may cause the
23034	 * above situation. Thus, the zero check of un_solaris_size
23035	 * should occur after the sd_validate_geometry() call.
23036	 */
23037#if defined(__i386) || defined(__amd64)
23038	if (un->un_solaris_size == 0) {
23039		return (EIO);
23040	}
23041#endif
23042
23043#ifdef _MULTI_DATAMODEL
23044	switch (ddi_model_convert_from(flag & FMODELS)) {
23045	case DDI_MODEL_ILP32: {
23046		struct dk_map32 dk_map32[NDKMAP];
23047		int		i;
23048
23049		for (i = 0; i < NDKMAP; i++) {
23050			dk_map32[i].dkl_cylno = un->un_map[i].dkl_cylno;
23051			dk_map32[i].dkl_nblk  = un->un_map[i].dkl_nblk;
23052		}
23053		size = NDKMAP * sizeof (struct dk_map32);
23054		rval = ddi_copyout(dk_map32, (void *)arg, size, flag);
23055		if (rval != 0) {
23056			rval = EFAULT;
23057		}
23058		break;
23059	}
23060	case DDI_MODEL_NONE:
23061		size = NDKMAP * sizeof (struct dk_map);
23062		rval = ddi_copyout(un->un_map, (void *)arg, size, flag);
23063		if (rval != 0) {
23064			rval = EFAULT;
23065		}
23066		break;
23067	}
23068#else /* ! _MULTI_DATAMODEL */
23069	size = NDKMAP * sizeof (struct dk_map);
23070	rval = ddi_copyout(un->un_map, (void *)arg, size, flag);
23071	if (rval != 0) {
23072		rval = EFAULT;
23073	}
23074#endif /* _MULTI_DATAMODEL */
23075	return (rval);
23076}
23077
23078
23079/*
23080 *    Function: sd_dkio_set_partition
23081 *
23082 * Description: This routine is the driver entry point for handling user
23083 *		requests to set the partition table (DKIOCSAPART). The actual
23084 *		device partition is not updated.
23085 *
23086 *   Arguments: dev  - the device number
23087 *		arg  - pointer to user provided dk_allmap structure used to set
23088 *			the controller's notion of the partition table.
23089 *		flag - this argument is a pass through to ddi_copyxxx()
23090 *		       directly from the mode argument of ioctl().
23091 *
23092 * Return Code: 0
23093 *		EINVAL
23094 *		EFAULT
23095 *		ENXIO
23096 *		EIO
23097 */
23098
23099static int
23100sd_dkio_set_partition(dev_t dev, caddr_t arg, int flag)
23101{
23102	struct sd_lun	*un = NULL;
23103	struct dk_map	dk_map[NDKMAP];
23104	struct dk_map	*lp;
23105	int		rval = 0;
23106	int		size;
23107	int		i;
23108#if defined(_SUNOS_VTOC_16)
23109	struct dkl_partition	*vp;
23110#endif
23111
23112	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23113		return (ENXIO);
23114	}
23115
23116	/*
23117	 * Set the map for all logical partitions.  We lock
23118	 * the priority just to make sure an interrupt doesn't
23119	 * come in while the map is half updated.
23120	 */
23121	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_solaris_size))
23122	mutex_enter(SD_MUTEX(un));
23123	if (un->un_blockcount > DK_MAX_BLOCKS) {
23124		mutex_exit(SD_MUTEX(un));
23125		return (ENOTSUP);
23126	}
23127	mutex_exit(SD_MUTEX(un));
23128
23129	/*
23130	 * Make sure there is no reservation conflict on the lun.
23131	 */
23132	if (sd_send_scsi_TEST_UNIT_READY(un, 0) == EACCES) {
23133		return (EACCES);
23134	}
23135
23136#if defined(__i386) || defined(__amd64)
23137	if (un->un_solaris_size == 0) {
23138		return (EIO);
23139	}
23140#endif
23141
23142#ifdef _MULTI_DATAMODEL
23143	switch (ddi_model_convert_from(flag & FMODELS)) {
23144	case DDI_MODEL_ILP32: {
23145		struct dk_map32 dk_map32[NDKMAP];
23146
23147		size = NDKMAP * sizeof (struct dk_map32);
23148		rval = ddi_copyin((void *)arg, dk_map32, size, flag);
23149		if (rval != 0) {
23150			return (EFAULT);
23151		}
23152		for (i = 0; i < NDKMAP; i++) {
23153			dk_map[i].dkl_cylno = dk_map32[i].dkl_cylno;
23154			dk_map[i].dkl_nblk  = dk_map32[i].dkl_nblk;
23155		}
23156		break;
23157	}
23158	case DDI_MODEL_NONE:
23159		size = NDKMAP * sizeof (struct dk_map);
23160		rval = ddi_copyin((void *)arg, dk_map, size, flag);
23161		if (rval != 0) {
23162			return (EFAULT);
23163		}
23164		break;
23165	}
23166#else /* ! _MULTI_DATAMODEL */
23167	size = NDKMAP * sizeof (struct dk_map);
23168	rval = ddi_copyin((void *)arg, dk_map, size, flag);
23169	if (rval != 0) {
23170		return (EFAULT);
23171	}
23172#endif /* _MULTI_DATAMODEL */
23173
23174	mutex_enter(SD_MUTEX(un));
23175	/* Note: The size used in this bcopy is set based upon the data model */
23176	bcopy(dk_map, un->un_map, size);
23177#if defined(_SUNOS_VTOC_16)
23178	vp = (struct dkl_partition *)&(un->un_vtoc);
23179#endif	/* defined(_SUNOS_VTOC_16) */
23180	for (i = 0; i < NDKMAP; i++) {
23181		lp  = &un->un_map[i];
23182		un->un_offset[i] =
23183		    un->un_g.dkg_nhead * un->un_g.dkg_nsect * lp->dkl_cylno;
23184#if defined(_SUNOS_VTOC_16)
23185		vp->p_start = un->un_offset[i];
23186		vp->p_size = lp->dkl_nblk;
23187		vp++;
23188#endif	/* defined(_SUNOS_VTOC_16) */
23189#if defined(__i386) || defined(__amd64)
23190		un->un_offset[i] += un->un_solaris_offset;
23191#endif
23192	}
23193	mutex_exit(SD_MUTEX(un));
23194	return (rval);
23195}
23196
23197
23198/*
23199 *    Function: sd_dkio_get_vtoc
23200 *
23201 * Description: This routine is the driver entry point for handling user
23202 *		requests to get the current volume table of contents
23203 *		(DKIOCGVTOC).
23204 *
23205 *   Arguments: dev  - the device number
23206 *		arg  - pointer to user provided vtoc structure specifying
23207 *			the current vtoc.
23208 *		flag - this argument is a pass through to ddi_copyxxx()
23209 *		       directly from the mode argument of ioctl().
23210 *		geom_validated - flag indicating if the device geometry has been
23211 *				 previously validated in the sdioctl routine.
23212 *
23213 * Return Code: 0
23214 *		EFAULT
23215 *		ENXIO
23216 *		EIO
23217 */
23218
23219static int
23220sd_dkio_get_vtoc(dev_t dev, caddr_t arg, int flag, int geom_validated)
23221{
23222	struct sd_lun	*un = NULL;
23223#if defined(_SUNOS_VTOC_8)
23224	struct vtoc	user_vtoc;
23225#endif	/* defined(_SUNOS_VTOC_8) */
23226	int		rval = 0;
23227
23228	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23229		return (ENXIO);
23230	}
23231
23232	mutex_enter(SD_MUTEX(un));
23233	if (geom_validated == FALSE) {
23234		/*
23235		 * sd_validate_geometry does not spin a disk up
23236		 * if it was spun down. We need to make sure it
23237		 * is ready.
23238		 */
23239		mutex_exit(SD_MUTEX(un));
23240		if ((rval = sd_send_scsi_TEST_UNIT_READY(un, 0)) != 0) {
23241			return (rval);
23242		}
23243		mutex_enter(SD_MUTEX(un));
23244		if ((rval = sd_validate_geometry(un, SD_PATH_DIRECT)) != 0) {
23245			mutex_exit(SD_MUTEX(un));
23246			return (rval);
23247		}
23248	}
23249
23250#if defined(_SUNOS_VTOC_8)
23251	sd_build_user_vtoc(un, &user_vtoc);
23252	mutex_exit(SD_MUTEX(un));
23253
23254#ifdef _MULTI_DATAMODEL
23255	switch (ddi_model_convert_from(flag & FMODELS)) {
23256	case DDI_MODEL_ILP32: {
23257		struct vtoc32 user_vtoc32;
23258
23259		vtoctovtoc32(user_vtoc, user_vtoc32);
23260		if (ddi_copyout(&user_vtoc32, (void *)arg,
23261		    sizeof (struct vtoc32), flag)) {
23262			return (EFAULT);
23263		}
23264		break;
23265	}
23266
23267	case DDI_MODEL_NONE:
23268		if (ddi_copyout(&user_vtoc, (void *)arg,
23269		    sizeof (struct vtoc), flag)) {
23270			return (EFAULT);
23271		}
23272		break;
23273	}
23274#else /* ! _MULTI_DATAMODEL */
23275	if (ddi_copyout(&user_vtoc, (void *)arg, sizeof (struct vtoc), flag)) {
23276		return (EFAULT);
23277	}
23278#endif /* _MULTI_DATAMODEL */
23279
23280#elif defined(_SUNOS_VTOC_16)
23281	mutex_exit(SD_MUTEX(un));
23282
23283#ifdef _MULTI_DATAMODEL
23284	/*
23285	 * The un_vtoc structure is a "struct dk_vtoc"  which is always
23286	 * 32-bit to maintain compatibility with existing on-disk
23287	 * structures.  Thus, we need to convert the structure when copying
23288	 * it out to a datamodel-dependent "struct vtoc" in a 64-bit
23289	 * program.  If the target is a 32-bit program, then no conversion
23290	 * is necessary.
23291	 */
23292	/* LINTED: logical expression always true: op "||" */
23293	ASSERT(sizeof (un->un_vtoc) == sizeof (struct vtoc32));
23294	switch (ddi_model_convert_from(flag & FMODELS)) {
23295	case DDI_MODEL_ILP32:
23296		if (ddi_copyout(&(un->un_vtoc), (void *)arg,
23297		    sizeof (un->un_vtoc), flag)) {
23298			return (EFAULT);
23299		}
23300		break;
23301
23302	case DDI_MODEL_NONE: {
23303		struct vtoc user_vtoc;
23304
23305		vtoc32tovtoc(un->un_vtoc, user_vtoc);
23306		if (ddi_copyout(&user_vtoc, (void *)arg,
23307		    sizeof (struct vtoc), flag)) {
23308			return (EFAULT);
23309		}
23310		break;
23311	}
23312	}
23313#else /* ! _MULTI_DATAMODEL */
23314	if (ddi_copyout(&(un->un_vtoc), (void *)arg, sizeof (un->un_vtoc),
23315	    flag)) {
23316		return (EFAULT);
23317	}
23318#endif /* _MULTI_DATAMODEL */
23319#else
23320#error "No VTOC format defined."
23321#endif
23322
23323	return (rval);
23324}
23325
23326static int
23327sd_dkio_get_efi(dev_t dev, caddr_t arg, int flag)
23328{
23329	struct sd_lun	*un = NULL;
23330	dk_efi_t	user_efi;
23331	int		rval = 0;
23332	void		*buffer;
23333
23334	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL)
23335		return (ENXIO);
23336
23337	if (ddi_copyin(arg, &user_efi, sizeof (dk_efi_t), flag))
23338		return (EFAULT);
23339
23340	user_efi.dki_data = (void *)(uintptr_t)user_efi.dki_data_64;
23341
23342	if ((user_efi.dki_length % un->un_tgt_blocksize) ||
23343	    (user_efi.dki_length > un->un_max_xfer_size))
23344		return (EINVAL);
23345
23346	buffer = kmem_alloc(user_efi.dki_length, KM_SLEEP);
23347	rval = sd_send_scsi_READ(un, buffer, user_efi.dki_length,
23348	    user_efi.dki_lba, SD_PATH_DIRECT);
23349	if (rval == 0 && ddi_copyout(buffer, user_efi.dki_data,
23350	    user_efi.dki_length, flag) != 0)
23351		rval = EFAULT;
23352
23353	kmem_free(buffer, user_efi.dki_length);
23354	return (rval);
23355}
23356
23357#if defined(_SUNOS_VTOC_8)
23358/*
23359 *    Function: sd_build_user_vtoc
23360 *
23361 * Description: This routine populates a pass by reference variable with the
23362 *		current volume table of contents.
23363 *
23364 *   Arguments: un - driver soft state (unit) structure
23365 *		user_vtoc - pointer to vtoc structure to be populated
23366 */
23367
23368static void
23369sd_build_user_vtoc(struct sd_lun *un, struct vtoc *user_vtoc)
23370{
23371	struct dk_map2		*lpart;
23372	struct dk_map		*lmap;
23373	struct partition	*vpart;
23374	int			nblks;
23375	int			i;
23376
23377	ASSERT(mutex_owned(SD_MUTEX(un)));
23378
23379	/*
23380	 * Return vtoc structure fields in the provided VTOC area, addressed
23381	 * by *vtoc.
23382	 */
23383	bzero(user_vtoc, sizeof (struct vtoc));
23384	user_vtoc->v_bootinfo[0] = un->un_vtoc.v_bootinfo[0];
23385	user_vtoc->v_bootinfo[1] = un->un_vtoc.v_bootinfo[1];
23386	user_vtoc->v_bootinfo[2] = un->un_vtoc.v_bootinfo[2];
23387	user_vtoc->v_sanity	= VTOC_SANE;
23388	user_vtoc->v_version	= un->un_vtoc.v_version;
23389	bcopy(un->un_vtoc.v_volume, user_vtoc->v_volume, LEN_DKL_VVOL);
23390	user_vtoc->v_sectorsz = un->un_sys_blocksize;
23391	user_vtoc->v_nparts = un->un_vtoc.v_nparts;
23392	bcopy(un->un_vtoc.v_reserved, user_vtoc->v_reserved,
23393	    sizeof (un->un_vtoc.v_reserved));
23394	/*
23395	 * Convert partitioning information.
23396	 *
23397	 * Note the conversion from starting cylinder number
23398	 * to starting sector number.
23399	 */
23400	lmap = un->un_map;
23401	lpart = (struct dk_map2 *)un->un_vtoc.v_part;
23402	vpart = user_vtoc->v_part;
23403
23404	nblks = un->un_g.dkg_nsect * un->un_g.dkg_nhead;
23405
23406	for (i = 0; i < V_NUMPAR; i++) {
23407		vpart->p_tag	= lpart->p_tag;
23408		vpart->p_flag	= lpart->p_flag;
23409		vpart->p_start	= lmap->dkl_cylno * nblks;
23410		vpart->p_size	= lmap->dkl_nblk;
23411		lmap++;
23412		lpart++;
23413		vpart++;
23414
23415		/* (4364927) */
23416		user_vtoc->timestamp[i] = (time_t)un->un_vtoc.v_timestamp[i];
23417	}
23418
23419	bcopy(un->un_asciilabel, user_vtoc->v_asciilabel, LEN_DKL_ASCII);
23420}
23421#endif
23422
23423static int
23424sd_dkio_partition(dev_t dev, caddr_t arg, int flag)
23425{
23426	struct sd_lun		*un = NULL;
23427	struct partition64	p64;
23428	int			rval = 0;
23429	uint_t			nparts;
23430	efi_gpe_t		*partitions;
23431	efi_gpt_t		*buffer;
23432	diskaddr_t		gpe_lba;
23433
23434	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23435		return (ENXIO);
23436	}
23437
23438	if (ddi_copyin((const void *)arg, &p64,
23439	    sizeof (struct partition64), flag)) {
23440		return (EFAULT);
23441	}
23442
23443	buffer = kmem_alloc(EFI_MIN_ARRAY_SIZE, KM_SLEEP);
23444	rval = sd_send_scsi_READ(un, buffer, DEV_BSIZE,
23445		1, SD_PATH_DIRECT);
23446	if (rval != 0)
23447		goto done_error;
23448
23449	sd_swap_efi_gpt(buffer);
23450
23451	if ((rval = sd_validate_efi(buffer)) != 0)
23452		goto done_error;
23453
23454	nparts = buffer->efi_gpt_NumberOfPartitionEntries;
23455	gpe_lba = buffer->efi_gpt_PartitionEntryLBA;
23456	if (p64.p_partno > nparts) {
23457		/* couldn't find it */
23458		rval = ESRCH;
23459		goto done_error;
23460	}
23461	/*
23462	 * if we're dealing with a partition that's out of the normal
23463	 * 16K block, adjust accordingly
23464	 */
23465	gpe_lba += p64.p_partno / sizeof (efi_gpe_t);
23466	rval = sd_send_scsi_READ(un, buffer, EFI_MIN_ARRAY_SIZE,
23467			gpe_lba, SD_PATH_DIRECT);
23468	if (rval) {
23469		goto done_error;
23470	}
23471	partitions = (efi_gpe_t *)buffer;
23472
23473	sd_swap_efi_gpe(nparts, partitions);
23474
23475	partitions += p64.p_partno;
23476	bcopy(&partitions->efi_gpe_PartitionTypeGUID, &p64.p_type,
23477	    sizeof (struct uuid));
23478	p64.p_start = partitions->efi_gpe_StartingLBA;
23479	p64.p_size = partitions->efi_gpe_EndingLBA -
23480			p64.p_start + 1;
23481
23482	if (ddi_copyout(&p64, (void *)arg, sizeof (struct partition64), flag))
23483		rval = EFAULT;
23484
23485done_error:
23486	kmem_free(buffer, EFI_MIN_ARRAY_SIZE);
23487	return (rval);
23488}
23489
23490
23491/*
23492 *    Function: sd_dkio_set_vtoc
23493 *
23494 * Description: This routine is the driver entry point for handling user
23495 *		requests to set the current volume table of contents
23496 *		(DKIOCSVTOC).
23497 *
23498 *   Arguments: dev  - the device number
23499 *		arg  - pointer to user provided vtoc structure used to set the
23500 *			current vtoc.
23501 *		flag - this argument is a pass through to ddi_copyxxx()
23502 *		       directly from the mode argument of ioctl().
23503 *
23504 * Return Code: 0
23505 *		EFAULT
23506 *		ENXIO
23507 *		EINVAL
23508 *		ENOTSUP
23509 */
23510
23511static int
23512sd_dkio_set_vtoc(dev_t dev, caddr_t arg, int flag)
23513{
23514	struct sd_lun	*un = NULL;
23515	struct vtoc	user_vtoc;
23516	int		rval = 0;
23517
23518	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23519		return (ENXIO);
23520	}
23521
23522#if defined(__i386) || defined(__amd64)
23523	if (un->un_tgt_blocksize != un->un_sys_blocksize) {
23524		return (EINVAL);
23525	}
23526#endif
23527
23528#ifdef _MULTI_DATAMODEL
23529	switch (ddi_model_convert_from(flag & FMODELS)) {
23530	case DDI_MODEL_ILP32: {
23531		struct vtoc32 user_vtoc32;
23532
23533		if (ddi_copyin((const void *)arg, &user_vtoc32,
23534		    sizeof (struct vtoc32), flag)) {
23535			return (EFAULT);
23536		}
23537		vtoc32tovtoc(user_vtoc32, user_vtoc);
23538		break;
23539	}
23540
23541	case DDI_MODEL_NONE:
23542		if (ddi_copyin((const void *)arg, &user_vtoc,
23543		    sizeof (struct vtoc), flag)) {
23544			return (EFAULT);
23545		}
23546		break;
23547	}
23548#else /* ! _MULTI_DATAMODEL */
23549	if (ddi_copyin((const void *)arg, &user_vtoc,
23550	    sizeof (struct vtoc), flag)) {
23551		return (EFAULT);
23552	}
23553#endif /* _MULTI_DATAMODEL */
23554
23555	mutex_enter(SD_MUTEX(un));
23556	if (un->un_blockcount > DK_MAX_BLOCKS) {
23557		mutex_exit(SD_MUTEX(un));
23558		return (ENOTSUP);
23559	}
23560	if (un->un_g.dkg_ncyl == 0) {
23561		mutex_exit(SD_MUTEX(un));
23562		return (EINVAL);
23563	}
23564
23565	mutex_exit(SD_MUTEX(un));
23566	sd_clear_efi(un);
23567	ddi_remove_minor_node(SD_DEVINFO(un), "wd");
23568	ddi_remove_minor_node(SD_DEVINFO(un), "wd,raw");
23569	(void) ddi_create_minor_node(SD_DEVINFO(un), "h",
23570	    S_IFBLK, (SDUNIT(dev) << SDUNIT_SHIFT) | WD_NODE,
23571	    un->un_node_type, NULL);
23572	(void) ddi_create_minor_node(SD_DEVINFO(un), "h,raw",
23573	    S_IFCHR, (SDUNIT(dev) << SDUNIT_SHIFT) | WD_NODE,
23574	    un->un_node_type, NULL);
23575	mutex_enter(SD_MUTEX(un));
23576
23577	if ((rval = sd_build_label_vtoc(un, &user_vtoc)) == 0) {
23578		if ((rval = sd_write_label(dev)) == 0) {
23579			if ((rval = sd_validate_geometry(un, SD_PATH_DIRECT))
23580			    != 0) {
23581				SD_ERROR(SD_LOG_IOCTL_DKIO, un,
23582				    "sd_dkio_set_vtoc: "
23583				    "Failed validate geometry\n");
23584			}
23585		}
23586	}
23587
23588	/*
23589	 * If sd_build_label_vtoc, or sd_write_label failed above write the
23590	 * devid anyway, what can it hurt? Also preserve the device id by
23591	 * writing to the disk acyl for the case where a devid has been
23592	 * fabricated.
23593	 */
23594	if (un->un_f_devid_supported &&
23595	    (un->un_f_opt_fab_devid == TRUE)) {
23596		if (un->un_devid == NULL) {
23597			sd_register_devid(un, SD_DEVINFO(un),
23598			    SD_TARGET_IS_UNRESERVED);
23599		} else {
23600			/*
23601			 * The device id for this disk has been
23602			 * fabricated. Fabricated device id's are
23603			 * managed by storing them in the last 2
23604			 * available sectors on the drive. The device
23605			 * id must be preserved by writing it back out
23606			 * to this location.
23607			 */
23608			if (sd_write_deviceid(un) != 0) {
23609				ddi_devid_free(un->un_devid);
23610				un->un_devid = NULL;
23611			}
23612		}
23613	}
23614	mutex_exit(SD_MUTEX(un));
23615	return (rval);
23616}
23617
23618
23619/*
23620 *    Function: sd_build_label_vtoc
23621 *
23622 * Description: This routine updates the driver soft state current volume table
23623 *		of contents based on a user specified vtoc.
23624 *
23625 *   Arguments: un - driver soft state (unit) structure
23626 *		user_vtoc - pointer to vtoc structure specifying vtoc to be used
23627 *			    to update the driver soft state.
23628 *
23629 * Return Code: 0
23630 *		EINVAL
23631 */
23632
23633static int
23634sd_build_label_vtoc(struct sd_lun *un, struct vtoc *user_vtoc)
23635{
23636	struct dk_map		*lmap;
23637	struct partition	*vpart;
23638	int			nblks;
23639#if defined(_SUNOS_VTOC_8)
23640	int			ncyl;
23641	struct dk_map2		*lpart;
23642#endif	/* defined(_SUNOS_VTOC_8) */
23643	int			i;
23644
23645	ASSERT(mutex_owned(SD_MUTEX(un)));
23646
23647	/* Sanity-check the vtoc */
23648	if (user_vtoc->v_sanity != VTOC_SANE ||
23649	    user_vtoc->v_sectorsz != un->un_sys_blocksize ||
23650	    user_vtoc->v_nparts != V_NUMPAR) {
23651		return (EINVAL);
23652	}
23653
23654	nblks = un->un_g.dkg_nsect * un->un_g.dkg_nhead;
23655	if (nblks == 0) {
23656		return (EINVAL);
23657	}
23658
23659#if defined(_SUNOS_VTOC_8)
23660	vpart = user_vtoc->v_part;
23661	for (i = 0; i < V_NUMPAR; i++) {
23662		if ((vpart->p_start % nblks) != 0) {
23663			return (EINVAL);
23664		}
23665		ncyl = vpart->p_start / nblks;
23666		ncyl += vpart->p_size / nblks;
23667		if ((vpart->p_size % nblks) != 0) {
23668			ncyl++;
23669		}
23670		if (ncyl > (int)un->un_g.dkg_ncyl) {
23671			return (EINVAL);
23672		}
23673		vpart++;
23674	}
23675#endif	/* defined(_SUNOS_VTOC_8) */
23676
23677	/* Put appropriate vtoc structure fields into the disk label */
23678#if defined(_SUNOS_VTOC_16)
23679	/*
23680	 * The vtoc is always a 32bit data structure to maintain the
23681	 * on-disk format. Convert "in place" instead of bcopying it.
23682	 */
23683	vtoctovtoc32((*user_vtoc), (*((struct vtoc32 *)&(un->un_vtoc))));
23684
23685	/*
23686	 * in the 16-slice vtoc, starting sectors are expressed in
23687	 * numbers *relative* to the start of the Solaris fdisk partition.
23688	 */
23689	lmap = un->un_map;
23690	vpart = user_vtoc->v_part;
23691
23692	for (i = 0; i < (int)user_vtoc->v_nparts; i++, lmap++, vpart++) {
23693		lmap->dkl_cylno = vpart->p_start / nblks;
23694		lmap->dkl_nblk = vpart->p_size;
23695	}
23696
23697#elif defined(_SUNOS_VTOC_8)
23698
23699	un->un_vtoc.v_bootinfo[0] = (uint32_t)user_vtoc->v_bootinfo[0];
23700	un->un_vtoc.v_bootinfo[1] = (uint32_t)user_vtoc->v_bootinfo[1];
23701	un->un_vtoc.v_bootinfo[2] = (uint32_t)user_vtoc->v_bootinfo[2];
23702
23703	un->un_vtoc.v_sanity = (uint32_t)user_vtoc->v_sanity;
23704	un->un_vtoc.v_version = (uint32_t)user_vtoc->v_version;
23705
23706	bcopy(user_vtoc->v_volume, un->un_vtoc.v_volume, LEN_DKL_VVOL);
23707
23708	un->un_vtoc.v_nparts = user_vtoc->v_nparts;
23709
23710	bcopy(user_vtoc->v_reserved, un->un_vtoc.v_reserved,
23711	    sizeof (un->un_vtoc.v_reserved));
23712
23713	/*
23714	 * Note the conversion from starting sector number
23715	 * to starting cylinder number.
23716	 * Return error if division results in a remainder.
23717	 */
23718	lmap = un->un_map;
23719	lpart = un->un_vtoc.v_part;
23720	vpart = user_vtoc->v_part;
23721
23722	for (i = 0; i < (int)user_vtoc->v_nparts; i++) {
23723		lpart->p_tag  = vpart->p_tag;
23724		lpart->p_flag = vpart->p_flag;
23725		lmap->dkl_cylno = vpart->p_start / nblks;
23726		lmap->dkl_nblk = vpart->p_size;
23727
23728		lmap++;
23729		lpart++;
23730		vpart++;
23731
23732		/* (4387723) */
23733#ifdef _LP64
23734		if (user_vtoc->timestamp[i] > TIME32_MAX) {
23735			un->un_vtoc.v_timestamp[i] = TIME32_MAX;
23736		} else {
23737			un->un_vtoc.v_timestamp[i] = user_vtoc->timestamp[i];
23738		}
23739#else
23740		un->un_vtoc.v_timestamp[i] = user_vtoc->timestamp[i];
23741#endif
23742	}
23743
23744	bcopy(user_vtoc->v_asciilabel, un->un_asciilabel, LEN_DKL_ASCII);
23745#else
23746#error "No VTOC format defined."
23747#endif
23748	return (0);
23749}
23750
23751/*
23752 *    Function: sd_clear_efi
23753 *
23754 * Description: This routine clears all EFI labels.
23755 *
23756 *   Arguments: un - driver soft state (unit) structure
23757 *
23758 * Return Code: void
23759 */
23760
23761static void
23762sd_clear_efi(struct sd_lun *un)
23763{
23764	efi_gpt_t	*gpt;
23765	uint_t		lbasize;
23766	uint64_t	cap;
23767	int rval;
23768
23769	ASSERT(!mutex_owned(SD_MUTEX(un)));
23770
23771	mutex_enter(SD_MUTEX(un));
23772	un->un_reserved = -1;
23773	mutex_exit(SD_MUTEX(un));
23774	gpt = kmem_alloc(sizeof (efi_gpt_t), KM_SLEEP);
23775
23776	if (sd_send_scsi_READ(un, gpt, DEV_BSIZE, 1, SD_PATH_DIRECT) != 0) {
23777		goto done;
23778	}
23779
23780	sd_swap_efi_gpt(gpt);
23781	rval = sd_validate_efi(gpt);
23782	if (rval == 0) {
23783		/* clear primary */
23784		bzero(gpt, sizeof (efi_gpt_t));
23785		if ((rval = sd_send_scsi_WRITE(un, gpt, EFI_LABEL_SIZE, 1,
23786			SD_PATH_DIRECT))) {
23787			SD_INFO(SD_LOG_IO_PARTITION, un,
23788				"sd_clear_efi: clear primary label failed\n");
23789		}
23790	}
23791	/* the backup */
23792	rval = sd_send_scsi_READ_CAPACITY(un, &cap, &lbasize,
23793	    SD_PATH_DIRECT);
23794	if (rval) {
23795		goto done;
23796	}
23797	/*
23798	 * The MMC standard allows READ CAPACITY to be
23799	 * inaccurate by a bounded amount (in the interest of
23800	 * response latency).  As a result, failed READs are
23801	 * commonplace (due to the reading of metadata and not
23802	 * data). Depending on the per-Vendor/drive Sense data,
23803	 * the failed READ can cause many (unnecessary) retries.
23804	 */
23805	if ((rval = sd_send_scsi_READ(un, gpt, lbasize,
23806	    cap - 1, ISCD(un) ? SD_PATH_DIRECT_PRIORITY :
23807		SD_PATH_DIRECT)) != 0) {
23808		goto done;
23809	}
23810	sd_swap_efi_gpt(gpt);
23811	rval = sd_validate_efi(gpt);
23812	if (rval == 0) {
23813		/* clear backup */
23814		SD_TRACE(SD_LOG_IOCTL, un, "sd_clear_efi clear backup@%lu\n",
23815			cap-1);
23816		bzero(gpt, sizeof (efi_gpt_t));
23817		if ((rval = sd_send_scsi_WRITE(un, gpt, EFI_LABEL_SIZE,
23818		    cap-1, SD_PATH_DIRECT))) {
23819			SD_INFO(SD_LOG_IO_PARTITION, un,
23820				"sd_clear_efi: clear backup label failed\n");
23821		}
23822	} else {
23823		/*
23824		 * Refer to comments related to off-by-1 at the
23825		 * header of this file
23826		 */
23827		if ((rval = sd_send_scsi_READ(un, gpt, lbasize,
23828		    cap - 2, ISCD(un) ? SD_PATH_DIRECT_PRIORITY :
23829			SD_PATH_DIRECT)) != 0) {
23830			goto done;
23831		}
23832		sd_swap_efi_gpt(gpt);
23833		rval = sd_validate_efi(gpt);
23834		if (rval == 0) {
23835			/* clear legacy backup EFI label */
23836			SD_TRACE(SD_LOG_IOCTL, un,
23837			    "sd_clear_efi clear backup@%lu\n", cap-2);
23838			bzero(gpt, sizeof (efi_gpt_t));
23839			if ((rval = sd_send_scsi_WRITE(un, gpt, EFI_LABEL_SIZE,
23840			    cap-2, SD_PATH_DIRECT))) {
23841				SD_INFO(SD_LOG_IO_PARTITION,
23842				    un, "sd_clear_efi: "
23843				    " clear legacy backup label failed\n");
23844			}
23845		}
23846	}
23847
23848done:
23849	kmem_free(gpt, sizeof (efi_gpt_t));
23850}
23851
23852/*
23853 *    Function: sd_set_vtoc
23854 *
23855 * Description: This routine writes data to the appropriate positions
23856 *
23857 *   Arguments: un - driver soft state (unit) structure
23858 *              dkl  - the data to be written
23859 *
23860 * Return: void
23861 */
23862
23863static int
23864sd_set_vtoc(struct sd_lun *un, struct dk_label *dkl)
23865{
23866	void			*shadow_buf;
23867	uint_t			label_addr;
23868	int			sec;
23869	int			blk;
23870	int			head;
23871	int			cyl;
23872	int			rval;
23873
23874#if defined(__i386) || defined(__amd64)
23875	label_addr = un->un_solaris_offset + DK_LABEL_LOC;
23876#else
23877	/* Write the primary label at block 0 of the solaris partition. */
23878	label_addr = 0;
23879#endif
23880
23881	if (NOT_DEVBSIZE(un)) {
23882		shadow_buf = kmem_zalloc(un->un_tgt_blocksize, KM_SLEEP);
23883		/*
23884		 * Read the target's first block.
23885		 */
23886		if ((rval = sd_send_scsi_READ(un, shadow_buf,
23887		    un->un_tgt_blocksize, label_addr,
23888		    SD_PATH_STANDARD)) != 0) {
23889			goto exit;
23890		}
23891		/*
23892		 * Copy the contents of the label into the shadow buffer
23893		 * which is of the size of target block size.
23894		 */
23895		bcopy(dkl, shadow_buf, sizeof (struct dk_label));
23896	}
23897
23898	/* Write the primary label */
23899	if (NOT_DEVBSIZE(un)) {
23900		rval = sd_send_scsi_WRITE(un, shadow_buf, un->un_tgt_blocksize,
23901		    label_addr, SD_PATH_STANDARD);
23902	} else {
23903		rval = sd_send_scsi_WRITE(un, dkl, un->un_sys_blocksize,
23904		    label_addr, SD_PATH_STANDARD);
23905	}
23906	if (rval != 0) {
23907		return (rval);
23908	}
23909
23910	/*
23911	 * Calculate where the backup labels go.  They are always on
23912	 * the last alternate cylinder, but some older drives put them
23913	 * on head 2 instead of the last head.	They are always on the
23914	 * first 5 odd sectors of the appropriate track.
23915	 *
23916	 * We have no choice at this point, but to believe that the
23917	 * disk label is valid.	 Use the geometry of the disk
23918	 * as described in the label.
23919	 */
23920	cyl  = dkl->dkl_ncyl  + dkl->dkl_acyl - 1;
23921	head = dkl->dkl_nhead - 1;
23922
23923	/*
23924	 * Write and verify the backup labels. Make sure we don't try to
23925	 * write past the last cylinder.
23926	 */
23927	for (sec = 1; ((sec < 5 * 2 + 1) && (sec < dkl->dkl_nsect)); sec += 2) {
23928		blk = (daddr_t)(
23929		    (cyl * ((dkl->dkl_nhead * dkl->dkl_nsect) - dkl->dkl_apc)) +
23930		    (head * dkl->dkl_nsect) + sec);
23931#if defined(__i386) || defined(__amd64)
23932		blk += un->un_solaris_offset;
23933#endif
23934		if (NOT_DEVBSIZE(un)) {
23935			uint64_t	tblk;
23936			/*
23937			 * Need to read the block first for read modify write.
23938			 */
23939			tblk = (uint64_t)blk;
23940			blk = (int)((tblk * un->un_sys_blocksize) /
23941			    un->un_tgt_blocksize);
23942			if ((rval = sd_send_scsi_READ(un, shadow_buf,
23943			    un->un_tgt_blocksize, blk,
23944			    SD_PATH_STANDARD)) != 0) {
23945				goto exit;
23946			}
23947			/*
23948			 * Modify the shadow buffer with the label.
23949			 */
23950			bcopy(dkl, shadow_buf, sizeof (struct dk_label));
23951			rval = sd_send_scsi_WRITE(un, shadow_buf,
23952			    un->un_tgt_blocksize, blk, SD_PATH_STANDARD);
23953		} else {
23954			rval = sd_send_scsi_WRITE(un, dkl, un->un_sys_blocksize,
23955			    blk, SD_PATH_STANDARD);
23956			SD_INFO(SD_LOG_IO_PARTITION, un,
23957			"sd_set_vtoc: wrote backup label %d\n", blk);
23958		}
23959		if (rval != 0) {
23960			goto exit;
23961		}
23962	}
23963exit:
23964	if (NOT_DEVBSIZE(un)) {
23965		kmem_free(shadow_buf, un->un_tgt_blocksize);
23966	}
23967	return (rval);
23968}
23969
23970/*
23971 *    Function: sd_clear_vtoc
23972 *
23973 * Description: This routine clears out the VTOC labels.
23974 *
23975 *   Arguments: un - driver soft state (unit) structure
23976 *
23977 * Return: void
23978 */
23979
23980static void
23981sd_clear_vtoc(struct sd_lun *un)
23982{
23983	struct dk_label		*dkl;
23984
23985	mutex_exit(SD_MUTEX(un));
23986	dkl = kmem_zalloc(sizeof (struct dk_label), KM_SLEEP);
23987	mutex_enter(SD_MUTEX(un));
23988	/*
23989	 * sd_set_vtoc uses these fields in order to figure out
23990	 * where to overwrite the backup labels
23991	 */
23992	dkl->dkl_apc    = un->un_g.dkg_apc;
23993	dkl->dkl_ncyl   = un->un_g.dkg_ncyl;
23994	dkl->dkl_acyl   = un->un_g.dkg_acyl;
23995	dkl->dkl_nhead  = un->un_g.dkg_nhead;
23996	dkl->dkl_nsect  = un->un_g.dkg_nsect;
23997	mutex_exit(SD_MUTEX(un));
23998	(void) sd_set_vtoc(un, dkl);
23999	kmem_free(dkl, sizeof (struct dk_label));
24000
24001	mutex_enter(SD_MUTEX(un));
24002}
24003
24004/*
24005 *    Function: sd_write_label
24006 *
24007 * Description: This routine will validate and write the driver soft state vtoc
24008 *		contents to the device.
24009 *
24010 *   Arguments: dev - the device number
24011 *
24012 * Return Code: the code returned by sd_send_scsi_cmd()
24013 *		0
24014 *		EINVAL
24015 *		ENXIO
24016 *		ENOMEM
24017 */
24018
24019static int
24020sd_write_label(dev_t dev)
24021{
24022	struct sd_lun		*un;
24023	struct dk_label		*dkl;
24024	short			sum;
24025	short			*sp;
24026	int			i;
24027	int			rval;
24028
24029	if (((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) ||
24030	    (un->un_state == SD_STATE_OFFLINE)) {
24031		return (ENXIO);
24032	}
24033	ASSERT(mutex_owned(SD_MUTEX(un)));
24034	mutex_exit(SD_MUTEX(un));
24035	dkl = kmem_zalloc(sizeof (struct dk_label), KM_SLEEP);
24036	mutex_enter(SD_MUTEX(un));
24037
24038	bcopy(&un->un_vtoc, &dkl->dkl_vtoc, sizeof (struct dk_vtoc));
24039	dkl->dkl_rpm	= un->un_g.dkg_rpm;
24040	dkl->dkl_pcyl	= un->un_g.dkg_pcyl;
24041	dkl->dkl_apc	= un->un_g.dkg_apc;
24042	dkl->dkl_intrlv = un->un_g.dkg_intrlv;
24043	dkl->dkl_ncyl	= un->un_g.dkg_ncyl;
24044	dkl->dkl_acyl	= un->un_g.dkg_acyl;
24045	dkl->dkl_nhead	= un->un_g.dkg_nhead;
24046	dkl->dkl_nsect	= un->un_g.dkg_nsect;
24047
24048#if defined(_SUNOS_VTOC_8)
24049	dkl->dkl_obs1	= un->un_g.dkg_obs1;
24050	dkl->dkl_obs2	= un->un_g.dkg_obs2;
24051	dkl->dkl_obs3	= un->un_g.dkg_obs3;
24052	for (i = 0; i < NDKMAP; i++) {
24053		dkl->dkl_map[i].dkl_cylno = un->un_map[i].dkl_cylno;
24054		dkl->dkl_map[i].dkl_nblk  = un->un_map[i].dkl_nblk;
24055	}
24056	bcopy(un->un_asciilabel, dkl->dkl_asciilabel, LEN_DKL_ASCII);
24057#elif defined(_SUNOS_VTOC_16)
24058	dkl->dkl_skew	= un->un_dkg_skew;
24059#else
24060#error "No VTOC format defined."
24061#endif
24062
24063	dkl->dkl_magic			= DKL_MAGIC;
24064	dkl->dkl_write_reinstruct	= un->un_g.dkg_write_reinstruct;
24065	dkl->dkl_read_reinstruct	= un->un_g.dkg_read_reinstruct;
24066
24067	/* Construct checksum for the new disk label */
24068	sum = 0;
24069	sp = (short *)dkl;
24070	i = sizeof (struct dk_label) / sizeof (short);
24071	while (i--) {
24072		sum ^= *sp++;
24073	}
24074	dkl->dkl_cksum = sum;
24075
24076	mutex_exit(SD_MUTEX(un));
24077
24078	rval = sd_set_vtoc(un, dkl);
24079exit:
24080	kmem_free(dkl, sizeof (struct dk_label));
24081	mutex_enter(SD_MUTEX(un));
24082	return (rval);
24083}
24084
24085static int
24086sd_dkio_set_efi(dev_t dev, caddr_t arg, int flag)
24087{
24088	struct sd_lun	*un = NULL;
24089	dk_efi_t	user_efi;
24090	int		rval = 0;
24091	void		*buffer;
24092	int		valid_efi;
24093
24094	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL)
24095		return (ENXIO);
24096
24097	if (ddi_copyin(arg, &user_efi, sizeof (dk_efi_t), flag))
24098		return (EFAULT);
24099
24100	user_efi.dki_data = (void *)(uintptr_t)user_efi.dki_data_64;
24101
24102	if ((user_efi.dki_length % un->un_tgt_blocksize) ||
24103	    (user_efi.dki_length > un->un_max_xfer_size))
24104		return (EINVAL);
24105
24106	buffer = kmem_alloc(user_efi.dki_length, KM_SLEEP);
24107	if (ddi_copyin(user_efi.dki_data, buffer, user_efi.dki_length, flag)) {
24108		rval = EFAULT;
24109	} else {
24110		/*
24111		 * let's clear the vtoc labels and clear the softstate
24112		 * vtoc.
24113		 */
24114		mutex_enter(SD_MUTEX(un));
24115		if (un->un_vtoc.v_sanity == VTOC_SANE) {
24116			SD_TRACE(SD_LOG_IO_PARTITION, un,
24117				"sd_dkio_set_efi: CLEAR VTOC\n");
24118			sd_clear_vtoc(un);
24119			bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
24120			mutex_exit(SD_MUTEX(un));
24121			ddi_remove_minor_node(SD_DEVINFO(un), "h");
24122			ddi_remove_minor_node(SD_DEVINFO(un), "h,raw");
24123			(void) ddi_create_minor_node(SD_DEVINFO(un), "wd",
24124			    S_IFBLK,
24125			    (SDUNIT(dev) << SDUNIT_SHIFT) | WD_NODE,
24126			    un->un_node_type, NULL);
24127			(void) ddi_create_minor_node(SD_DEVINFO(un), "wd,raw",
24128			    S_IFCHR,
24129			    (SDUNIT(dev) << SDUNIT_SHIFT) | WD_NODE,
24130			    un->un_node_type, NULL);
24131		} else
24132			mutex_exit(SD_MUTEX(un));
24133		rval = sd_send_scsi_WRITE(un, buffer, user_efi.dki_length,
24134		    user_efi.dki_lba, SD_PATH_DIRECT);
24135		if (rval == 0) {
24136			mutex_enter(SD_MUTEX(un));
24137
24138			/*
24139			 * Set the un_reserved for valid efi label.
24140			 * Function clear_efi in fdisk and efi_write in
24141			 * libefi both change efi label on disk in 3 steps
24142			 * 1. Change primary gpt and gpe
24143			 * 2. Change backup gpe
24144			 * 3. Change backup gpt, which is one block
24145			 * We only reread the efi label after the 3rd step,
24146			 * or there will be warning "primary label corrupt".
24147			 */
24148			if (user_efi.dki_length == un->un_tgt_blocksize) {
24149				un->un_f_geometry_is_valid = FALSE;
24150				valid_efi = sd_use_efi(un, SD_PATH_DIRECT);
24151				if ((valid_efi == 0) &&
24152				    un->un_f_devid_supported &&
24153				    (un->un_f_opt_fab_devid == TRUE)) {
24154					if (un->un_devid == NULL) {
24155						sd_register_devid(un,
24156						    SD_DEVINFO(un),
24157						    SD_TARGET_IS_UNRESERVED);
24158					} else {
24159						/*
24160						 * The device id for this disk
24161						 * has been fabricated. The
24162						 * device id must be preserved
24163						 * by writing it back out to
24164						 * disk.
24165						 */
24166						if (sd_write_deviceid(un)
24167						    != 0) {
24168							ddi_devid_free(
24169							    un->un_devid);
24170							un->un_devid = NULL;
24171						}
24172					}
24173				}
24174			}
24175
24176			mutex_exit(SD_MUTEX(un));
24177		}
24178	}
24179	kmem_free(buffer, user_efi.dki_length);
24180	return (rval);
24181}
24182
24183/*
24184 *    Function: sd_dkio_get_mboot
24185 *
24186 * Description: This routine is the driver entry point for handling user
24187 *		requests to get the current device mboot (DKIOCGMBOOT)
24188 *
24189 *   Arguments: dev  - the device number
24190 *		arg  - pointer to user provided mboot structure specifying
24191 *			the current mboot.
24192 *		flag - this argument is a pass through to ddi_copyxxx()
24193 *		       directly from the mode argument of ioctl().
24194 *
24195 * Return Code: 0
24196 *		EINVAL
24197 *		EFAULT
24198 *		ENXIO
24199 */
24200
24201static int
24202sd_dkio_get_mboot(dev_t dev, caddr_t arg, int flag)
24203{
24204	struct sd_lun	*un;
24205	struct mboot	*mboot;
24206	int		rval;
24207	size_t		buffer_size;
24208
24209	if (((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) ||
24210	    (un->un_state == SD_STATE_OFFLINE)) {
24211		return (ENXIO);
24212	}
24213
24214	if (!un->un_f_mboot_supported || arg == NULL) {
24215		return (EINVAL);
24216	}
24217
24218	/*
24219	 * Read the mboot block, located at absolute block 0 on the target.
24220	 */
24221	buffer_size = SD_REQBYTES2TGTBYTES(un, sizeof (struct mboot));
24222
24223	SD_TRACE(SD_LOG_IO_PARTITION, un,
24224	    "sd_dkio_get_mboot: allocation size: 0x%x\n", buffer_size);
24225
24226	mboot = kmem_zalloc(buffer_size, KM_SLEEP);
24227	if ((rval = sd_send_scsi_READ(un, mboot, buffer_size, 0,
24228	    SD_PATH_STANDARD)) == 0) {
24229		if (ddi_copyout(mboot, (void *)arg,
24230		    sizeof (struct mboot), flag) != 0) {
24231			rval = EFAULT;
24232		}
24233	}
24234	kmem_free(mboot, buffer_size);
24235	return (rval);
24236}
24237
24238
24239/*
24240 *    Function: sd_dkio_set_mboot
24241 *
24242 * Description: This routine is the driver entry point for handling user
24243 *		requests to validate and set the device master boot
24244 *		(DKIOCSMBOOT).
24245 *
24246 *   Arguments: dev  - the device number
24247 *		arg  - pointer to user provided mboot structure used to set the
24248 *			master boot.
24249 *		flag - this argument is a pass through to ddi_copyxxx()
24250 *		       directly from the mode argument of ioctl().
24251 *
24252 * Return Code: 0
24253 *		EINVAL
24254 *		EFAULT
24255 *		ENXIO
24256 */
24257
24258static int
24259sd_dkio_set_mboot(dev_t dev, caddr_t arg, int flag)
24260{
24261	struct sd_lun	*un = NULL;
24262	struct mboot	*mboot = NULL;
24263	int		rval;
24264	ushort_t	magic;
24265
24266	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24267		return (ENXIO);
24268	}
24269
24270	ASSERT(!mutex_owned(SD_MUTEX(un)));
24271
24272	if (!un->un_f_mboot_supported) {
24273		return (EINVAL);
24274	}
24275
24276	if (arg == NULL) {
24277		return (EINVAL);
24278	}
24279
24280	mboot = kmem_zalloc(sizeof (struct mboot), KM_SLEEP);
24281
24282	if (ddi_copyin((const void *)arg, mboot,
24283	    sizeof (struct mboot), flag) != 0) {
24284		kmem_free(mboot, (size_t)(sizeof (struct mboot)));
24285		return (EFAULT);
24286	}
24287
24288	/* Is this really a master boot record? */
24289	magic = LE_16(mboot->signature);
24290	if (magic != MBB_MAGIC) {
24291		kmem_free(mboot, (size_t)(sizeof (struct mboot)));
24292		return (EINVAL);
24293	}
24294
24295	rval = sd_send_scsi_WRITE(un, mboot, un->un_sys_blocksize, 0,
24296	    SD_PATH_STANDARD);
24297
24298	mutex_enter(SD_MUTEX(un));
24299#if defined(__i386) || defined(__amd64)
24300	if (rval == 0) {
24301		/*
24302		 * mboot has been written successfully.
24303		 * update the fdisk and vtoc tables in memory
24304		 */
24305		rval = sd_update_fdisk_and_vtoc(un);
24306		if ((un->un_f_geometry_is_valid == FALSE) || (rval != 0)) {
24307			mutex_exit(SD_MUTEX(un));
24308			kmem_free(mboot, (size_t)(sizeof (struct mboot)));
24309			return (rval);
24310		}
24311	}
24312
24313#ifdef __lock_lint
24314	sd_setup_default_geometry(un);
24315#endif
24316
24317#else
24318	if (rval == 0) {
24319		/*
24320		 * mboot has been written successfully.
24321		 * set up the default geometry and VTOC
24322		 */
24323		if (un->un_blockcount <= DK_MAX_BLOCKS)
24324			sd_setup_default_geometry(un);
24325	}
24326#endif
24327	mutex_exit(SD_MUTEX(un));
24328	kmem_free(mboot, (size_t)(sizeof (struct mboot)));
24329	return (rval);
24330}
24331
24332
24333/*
24334 *    Function: sd_setup_default_geometry
24335 *
24336 * Description: This local utility routine sets the default geometry as part of
24337 *		setting the device mboot.
24338 *
24339 *   Arguments: un - driver soft state (unit) structure
24340 *
24341 * Note: This may be redundant with sd_build_default_label.
24342 */
24343
24344static void
24345sd_setup_default_geometry(struct sd_lun *un)
24346{
24347	/* zero out the soft state geometry and partition table. */
24348	bzero(&un->un_g, sizeof (struct dk_geom));
24349	bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
24350	bzero(un->un_map, NDKMAP * (sizeof (struct dk_map)));
24351	un->un_asciilabel[0] = '\0';
24352
24353	/*
24354	 * For the rpm, we use the minimum for the disk.
24355	 * For the head, cyl and number of sector per track,
24356	 * if the capacity <= 1GB, head = 64, sect = 32.
24357	 * else head = 255, sect 63
24358	 * Note: the capacity should be equal to C*H*S values.
24359	 * This will cause some truncation of size due to
24360	 * round off errors. For CD-ROMs, this truncation can
24361	 * have adverse side effects, so returning ncyl and
24362	 * nhead as 1. The nsect will overflow for most of
24363	 * CD-ROMs as nsect is of type ushort.
24364	 */
24365	if (ISCD(un)) {
24366		un->un_g.dkg_ncyl = 1;
24367		un->un_g.dkg_nhead = 1;
24368		un->un_g.dkg_nsect = un->un_blockcount;
24369	} else {
24370		if (un->un_blockcount <= 0x1000) {
24371			/* Needed for unlabeled SCSI floppies. */
24372			un->un_g.dkg_nhead = 2;
24373			un->un_g.dkg_ncyl = 80;
24374			un->un_g.dkg_pcyl = 80;
24375			un->un_g.dkg_nsect = un->un_blockcount / (2 * 80);
24376		} else if (un->un_blockcount <= 0x200000) {
24377			un->un_g.dkg_nhead = 64;
24378			un->un_g.dkg_nsect = 32;
24379			un->un_g.dkg_ncyl = un->un_blockcount / (64 * 32);
24380		} else {
24381			un->un_g.dkg_nhead = 255;
24382			un->un_g.dkg_nsect = 63;
24383			un->un_g.dkg_ncyl = un->un_blockcount / (255 * 63);
24384		}
24385		un->un_blockcount = un->un_g.dkg_ncyl *
24386		    un->un_g.dkg_nhead * un->un_g.dkg_nsect;
24387	}
24388	un->un_g.dkg_acyl = 0;
24389	un->un_g.dkg_bcyl = 0;
24390	un->un_g.dkg_intrlv = 1;
24391	un->un_g.dkg_rpm = 200;
24392	un->un_g.dkg_read_reinstruct = 0;
24393	un->un_g.dkg_write_reinstruct = 0;
24394	if (un->un_g.dkg_pcyl == 0) {
24395		un->un_g.dkg_pcyl = un->un_g.dkg_ncyl + un->un_g.dkg_acyl;
24396	}
24397
24398	un->un_map['a'-'a'].dkl_cylno = 0;
24399	un->un_map['a'-'a'].dkl_nblk = un->un_blockcount;
24400	un->un_map['c'-'a'].dkl_cylno = 0;
24401	un->un_map['c'-'a'].dkl_nblk = un->un_blockcount;
24402	un->un_f_geometry_is_valid = FALSE;
24403}
24404
24405
24406#if defined(__i386) || defined(__amd64)
24407/*
24408 *    Function: sd_update_fdisk_and_vtoc
24409 *
24410 * Description: This local utility routine updates the device fdisk and vtoc
24411 *		as part of setting the device mboot.
24412 *
24413 *   Arguments: un - driver soft state (unit) structure
24414 *
24415 * Return Code: 0 for success or errno-type return code.
24416 *
24417 *    Note:x86: This looks like a duplicate of sd_validate_geometry(), but
24418 *		these did exist seperately in x86 sd.c!!!
24419 */
24420
24421static int
24422sd_update_fdisk_and_vtoc(struct sd_lun *un)
24423{
24424	static char	labelstring[128];
24425	static char	buf[256];
24426	char		*label = 0;
24427	int		count;
24428	int		label_rc = 0;
24429	int		gvalid = un->un_f_geometry_is_valid;
24430	int		fdisk_rval;
24431	int		lbasize;
24432	int		capacity;
24433
24434	ASSERT(mutex_owned(SD_MUTEX(un)));
24435
24436	if (un->un_f_tgt_blocksize_is_valid == FALSE) {
24437		return (EINVAL);
24438	}
24439
24440	if (un->un_f_blockcount_is_valid == FALSE) {
24441		return (EINVAL);
24442	}
24443
24444#if defined(_SUNOS_VTOC_16)
24445	/*
24446	 * Set up the "whole disk" fdisk partition; this should always
24447	 * exist, regardless of whether the disk contains an fdisk table
24448	 * or vtoc.
24449	 */
24450	un->un_map[P0_RAW_DISK].dkl_cylno = 0;
24451	un->un_map[P0_RAW_DISK].dkl_nblk = un->un_blockcount;
24452#endif	/* defined(_SUNOS_VTOC_16) */
24453
24454	/*
24455	 * copy the lbasize and capacity so that if they're
24456	 * reset while we're not holding the SD_MUTEX(un), we will
24457	 * continue to use valid values after the SD_MUTEX(un) is
24458	 * reacquired.
24459	 */
24460	lbasize  = un->un_tgt_blocksize;
24461	capacity = un->un_blockcount;
24462
24463	/*
24464	 * refresh the logical and physical geometry caches.
24465	 * (data from mode sense format/rigid disk geometry pages,
24466	 * and scsi_ifgetcap("geometry").
24467	 */
24468	sd_resync_geom_caches(un, capacity, lbasize, SD_PATH_DIRECT);
24469
24470	/*
24471	 * Only DIRECT ACCESS devices will have Sun labels.
24472	 * CD's supposedly have a Sun label, too
24473	 */
24474	if (un->un_f_vtoc_label_supported) {
24475		fdisk_rval = sd_read_fdisk(un, capacity, lbasize,
24476		    SD_PATH_DIRECT);
24477		if (fdisk_rval == SD_CMD_FAILURE) {
24478			ASSERT(mutex_owned(SD_MUTEX(un)));
24479			return (EIO);
24480		}
24481
24482		if (fdisk_rval == SD_CMD_RESERVATION_CONFLICT) {
24483			ASSERT(mutex_owned(SD_MUTEX(un)));
24484			return (EACCES);
24485		}
24486
24487		if (un->un_solaris_size <= DK_LABEL_LOC) {
24488			/*
24489			 * Found fdisk table but no Solaris partition entry,
24490			 * so don't call sd_uselabel() and don't create
24491			 * a default label.
24492			 */
24493			label_rc = 0;
24494			un->un_f_geometry_is_valid = TRUE;
24495			goto no_solaris_partition;
24496		}
24497
24498#if defined(_SUNOS_VTOC_8)
24499		label = (char *)un->un_asciilabel;
24500#elif defined(_SUNOS_VTOC_16)
24501		label = (char *)un->un_vtoc.v_asciilabel;
24502#else
24503#error "No VTOC format defined."
24504#endif
24505	} else if (capacity < 0) {
24506		ASSERT(mutex_owned(SD_MUTEX(un)));
24507		return (EINVAL);
24508	}
24509
24510	/*
24511	 * For Removable media We reach here if we have found a
24512	 * SOLARIS PARTITION.
24513	 * If un_f_geometry_is_valid is FALSE it indicates that the SOLARIS
24514	 * PARTITION has changed from the previous one, hence we will setup a
24515	 * default VTOC in this case.
24516	 */
24517	if (un->un_f_geometry_is_valid == FALSE) {
24518		sd_build_default_label(un);
24519		label_rc = 0;
24520	}
24521
24522no_solaris_partition:
24523	if ((!un->un_f_has_removable_media ||
24524	    (un->un_f_has_removable_media &&
24525	    un->un_mediastate == DKIO_EJECTED)) &&
24526		(un->un_state == SD_STATE_NORMAL && !gvalid)) {
24527		/*
24528		 * Print out a message indicating who and what we are.
24529		 * We do this only when we happen to really validate the
24530		 * geometry. We may call sd_validate_geometry() at other
24531		 * times, ioctl()'s like Get VTOC in which case we
24532		 * don't want to print the label.
24533		 * If the geometry is valid, print the label string,
24534		 * else print vendor and product info, if available
24535		 */
24536		if ((un->un_f_geometry_is_valid == TRUE) && (label != NULL)) {
24537			SD_INFO(SD_LOG_IOCTL_DKIO, un, "?<%s>\n", label);
24538		} else {
24539			mutex_enter(&sd_label_mutex);
24540			sd_inq_fill(SD_INQUIRY(un)->inq_vid, VIDMAX,
24541			    labelstring);
24542			sd_inq_fill(SD_INQUIRY(un)->inq_pid, PIDMAX,
24543			    &labelstring[64]);
24544			(void) sprintf(buf, "?Vendor '%s', product '%s'",
24545			    labelstring, &labelstring[64]);
24546			if (un->un_f_blockcount_is_valid == TRUE) {
24547				(void) sprintf(&buf[strlen(buf)],
24548				    ", %" PRIu64 " %u byte blocks\n",
24549				    un->un_blockcount,
24550				    un->un_tgt_blocksize);
24551			} else {
24552				(void) sprintf(&buf[strlen(buf)],
24553				    ", (unknown capacity)\n");
24554			}
24555			SD_INFO(SD_LOG_IOCTL_DKIO, un, buf);
24556			mutex_exit(&sd_label_mutex);
24557		}
24558	}
24559
24560#if defined(_SUNOS_VTOC_16)
24561	/*
24562	 * If we have valid geometry, set up the remaining fdisk partitions.
24563	 * Note that dkl_cylno is not used for the fdisk map entries, so
24564	 * we set it to an entirely bogus value.
24565	 */
24566	for (count = 0; count < FD_NUMPART; count++) {
24567		un->un_map[FDISK_P1 + count].dkl_cylno = -1;
24568		un->un_map[FDISK_P1 + count].dkl_nblk =
24569		    un->un_fmap[count].fmap_nblk;
24570		un->un_offset[FDISK_P1 + count] =
24571		    un->un_fmap[count].fmap_start;
24572	}
24573#endif
24574
24575	for (count = 0; count < NDKMAP; count++) {
24576#if defined(_SUNOS_VTOC_8)
24577		struct dk_map *lp  = &un->un_map[count];
24578		un->un_offset[count] =
24579		    un->un_g.dkg_nhead * un->un_g.dkg_nsect * lp->dkl_cylno;
24580#elif defined(_SUNOS_VTOC_16)
24581		struct dkl_partition *vp = &un->un_vtoc.v_part[count];
24582		un->un_offset[count] = vp->p_start + un->un_solaris_offset;
24583#else
24584#error "No VTOC format defined."
24585#endif
24586	}
24587
24588	ASSERT(mutex_owned(SD_MUTEX(un)));
24589	return (label_rc);
24590}
24591#endif
24592
24593
24594/*
24595 *    Function: sd_check_media
24596 *
24597 * Description: This utility routine implements the functionality for the
24598 *		DKIOCSTATE ioctl. This ioctl blocks the user thread until the
24599 *		driver state changes from that specified by the user
24600 *		(inserted or ejected). For example, if the user specifies
24601 *		DKIO_EJECTED and the current media state is inserted this
24602 *		routine will immediately return DKIO_INSERTED. However, if the
24603 *		current media state is not inserted the user thread will be
24604 *		blocked until the drive state changes. If DKIO_NONE is specified
24605 *		the user thread will block until a drive state change occurs.
24606 *
24607 *   Arguments: dev  - the device number
24608 *		state  - user pointer to a dkio_state, updated with the current
24609 *			drive state at return.
24610 *
24611 * Return Code: ENXIO
24612 *		EIO
24613 *		EAGAIN
24614 *		EINTR
24615 */
24616
24617static int
24618sd_check_media(dev_t dev, enum dkio_state state)
24619{
24620	struct sd_lun		*un = NULL;
24621	enum dkio_state		prev_state;
24622	opaque_t		token = NULL;
24623	int			rval = 0;
24624
24625	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24626		return (ENXIO);
24627	}
24628
24629	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: entry\n");
24630
24631	mutex_enter(SD_MUTEX(un));
24632
24633	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: "
24634	    "state=%x, mediastate=%x\n", state, un->un_mediastate);
24635
24636	prev_state = un->un_mediastate;
24637
24638	/* is there anything to do? */
24639	if (state == un->un_mediastate || un->un_mediastate == DKIO_NONE) {
24640		/*
24641		 * submit the request to the scsi_watch service;
24642		 * scsi_media_watch_cb() does the real work
24643		 */
24644		mutex_exit(SD_MUTEX(un));
24645
24646		/*
24647		 * This change handles the case where a scsi watch request is
24648		 * added to a device that is powered down. To accomplish this
24649		 * we power up the device before adding the scsi watch request,
24650		 * since the scsi watch sends a TUR directly to the device
24651		 * which the device cannot handle if it is powered down.
24652		 */
24653		if (sd_pm_entry(un) != DDI_SUCCESS) {
24654			mutex_enter(SD_MUTEX(un));
24655			goto done;
24656		}
24657
24658		token = scsi_watch_request_submit(SD_SCSI_DEVP(un),
24659		    sd_check_media_time, SENSE_LENGTH, sd_media_watch_cb,
24660		    (caddr_t)dev);
24661
24662		sd_pm_exit(un);
24663
24664		mutex_enter(SD_MUTEX(un));
24665		if (token == NULL) {
24666			rval = EAGAIN;
24667			goto done;
24668		}
24669
24670		/*
24671		 * This is a special case IOCTL that doesn't return
24672		 * until the media state changes. Routine sdpower
24673		 * knows about and handles this so don't count it
24674		 * as an active cmd in the driver, which would
24675		 * keep the device busy to the pm framework.
24676		 * If the count isn't decremented the device can't
24677		 * be powered down.
24678		 */
24679		un->un_ncmds_in_driver--;
24680		ASSERT(un->un_ncmds_in_driver >= 0);
24681
24682		/*
24683		 * if a prior request had been made, this will be the same
24684		 * token, as scsi_watch was designed that way.
24685		 */
24686		un->un_swr_token = token;
24687		un->un_specified_mediastate = state;
24688
24689		/*
24690		 * now wait for media change
24691		 * we will not be signalled unless mediastate == state but it is
24692		 * still better to test for this condition, since there is a
24693		 * 2 sec cv_broadcast delay when mediastate == DKIO_INSERTED
24694		 */
24695		SD_TRACE(SD_LOG_COMMON, un,
24696		    "sd_check_media: waiting for media state change\n");
24697		while (un->un_mediastate == state) {
24698			if (cv_wait_sig(&un->un_state_cv, SD_MUTEX(un)) == 0) {
24699				SD_TRACE(SD_LOG_COMMON, un,
24700				    "sd_check_media: waiting for media state "
24701				    "was interrupted\n");
24702				un->un_ncmds_in_driver++;
24703				rval = EINTR;
24704				goto done;
24705			}
24706			SD_TRACE(SD_LOG_COMMON, un,
24707			    "sd_check_media: received signal, state=%x\n",
24708			    un->un_mediastate);
24709		}
24710		/*
24711		 * Inc the counter to indicate the device once again
24712		 * has an active outstanding cmd.
24713		 */
24714		un->un_ncmds_in_driver++;
24715	}
24716
24717	/* invalidate geometry */
24718	if (prev_state == DKIO_INSERTED && un->un_mediastate == DKIO_EJECTED) {
24719		sr_ejected(un);
24720	}
24721
24722	if (un->un_mediastate == DKIO_INSERTED && prev_state != DKIO_INSERTED) {
24723		uint64_t	capacity;
24724		uint_t		lbasize;
24725
24726		SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: media inserted\n");
24727		mutex_exit(SD_MUTEX(un));
24728		/*
24729		 * Since the following routines use SD_PATH_DIRECT, we must
24730		 * call PM directly before the upcoming disk accesses. This
24731		 * may cause the disk to be power/spin up.
24732		 */
24733
24734		if (sd_pm_entry(un) == DDI_SUCCESS) {
24735			rval = sd_send_scsi_READ_CAPACITY(un,
24736			    &capacity,
24737			    &lbasize, SD_PATH_DIRECT);
24738			if (rval != 0) {
24739				sd_pm_exit(un);
24740				mutex_enter(SD_MUTEX(un));
24741				goto done;
24742			}
24743		} else {
24744			rval = EIO;
24745			mutex_enter(SD_MUTEX(un));
24746			goto done;
24747		}
24748		mutex_enter(SD_MUTEX(un));
24749
24750		sd_update_block_info(un, lbasize, capacity);
24751
24752		un->un_f_geometry_is_valid	= FALSE;
24753		(void) sd_validate_geometry(un, SD_PATH_DIRECT);
24754
24755		mutex_exit(SD_MUTEX(un));
24756		rval = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
24757		    SD_PATH_DIRECT);
24758		sd_pm_exit(un);
24759
24760		mutex_enter(SD_MUTEX(un));
24761	}
24762done:
24763	un->un_f_watcht_stopped = FALSE;
24764	if (un->un_swr_token) {
24765		/*
24766		 * Use of this local token and the mutex ensures that we avoid
24767		 * some race conditions associated with terminating the
24768		 * scsi watch.
24769		 */
24770		token = un->un_swr_token;
24771		un->un_swr_token = (opaque_t)NULL;
24772		mutex_exit(SD_MUTEX(un));
24773		(void) scsi_watch_request_terminate(token,
24774		    SCSI_WATCH_TERMINATE_WAIT);
24775		mutex_enter(SD_MUTEX(un));
24776	}
24777
24778	/*
24779	 * Update the capacity kstat value, if no media previously
24780	 * (capacity kstat is 0) and a media has been inserted
24781	 * (un_f_blockcount_is_valid == TRUE)
24782	 */
24783	if (un->un_errstats) {
24784		struct sd_errstats	*stp = NULL;
24785
24786		stp = (struct sd_errstats *)un->un_errstats->ks_data;
24787		if ((stp->sd_capacity.value.ui64 == 0) &&
24788		    (un->un_f_blockcount_is_valid == TRUE)) {
24789			stp->sd_capacity.value.ui64 =
24790			    (uint64_t)((uint64_t)un->un_blockcount *
24791			    un->un_sys_blocksize);
24792		}
24793	}
24794	mutex_exit(SD_MUTEX(un));
24795	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: done\n");
24796	return (rval);
24797}
24798
24799
24800/*
24801 *    Function: sd_delayed_cv_broadcast
24802 *
24803 * Description: Delayed cv_broadcast to allow for target to recover from media
24804 *		insertion.
24805 *
24806 *   Arguments: arg - driver soft state (unit) structure
24807 */
24808
24809static void
24810sd_delayed_cv_broadcast(void *arg)
24811{
24812	struct sd_lun *un = arg;
24813
24814	SD_TRACE(SD_LOG_COMMON, un, "sd_delayed_cv_broadcast\n");
24815
24816	mutex_enter(SD_MUTEX(un));
24817	un->un_dcvb_timeid = NULL;
24818	cv_broadcast(&un->un_state_cv);
24819	mutex_exit(SD_MUTEX(un));
24820}
24821
24822
24823/*
24824 *    Function: sd_media_watch_cb
24825 *
24826 * Description: Callback routine used for support of the DKIOCSTATE ioctl. This
24827 *		routine processes the TUR sense data and updates the driver
24828 *		state if a transition has occurred. The user thread
24829 *		(sd_check_media) is then signalled.
24830 *
24831 *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
24832 *			among multiple watches that share this callback function
24833 *		resultp - scsi watch facility result packet containing scsi
24834 *			  packet, status byte and sense data
24835 *
24836 * Return Code: 0 for success, -1 for failure
24837 */
24838
24839static int
24840sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
24841{
24842	struct sd_lun			*un;
24843	struct scsi_status		*statusp = resultp->statusp;
24844	uint8_t				*sensep = (uint8_t *)resultp->sensep;
24845	enum dkio_state			state = DKIO_NONE;
24846	dev_t				dev = (dev_t)arg;
24847	uchar_t				actual_sense_length;
24848	uint8_t				skey, asc, ascq;
24849
24850	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24851		return (-1);
24852	}
24853	actual_sense_length = resultp->actual_sense_length;
24854
24855	mutex_enter(SD_MUTEX(un));
24856	SD_TRACE(SD_LOG_COMMON, un,
24857	    "sd_media_watch_cb: status=%x, sensep=%p, len=%x\n",
24858	    *((char *)statusp), (void *)sensep, actual_sense_length);
24859
24860	if (resultp->pkt->pkt_reason == CMD_DEV_GONE) {
24861		un->un_mediastate = DKIO_DEV_GONE;
24862		cv_broadcast(&un->un_state_cv);
24863		mutex_exit(SD_MUTEX(un));
24864
24865		return (0);
24866	}
24867
24868	/*
24869	 * If there was a check condition then sensep points to valid sense data
24870	 * If status was not a check condition but a reservation or busy status
24871	 * then the new state is DKIO_NONE
24872	 */
24873	if (sensep != NULL) {
24874		skey = scsi_sense_key(sensep);
24875		asc = scsi_sense_asc(sensep);
24876		ascq = scsi_sense_ascq(sensep);
24877
24878		SD_INFO(SD_LOG_COMMON, un,
24879		    "sd_media_watch_cb: sense KEY=%x, ASC=%x, ASCQ=%x\n",
24880		    skey, asc, ascq);
24881		/* This routine only uses up to 13 bytes of sense data. */
24882		if (actual_sense_length >= 13) {
24883			if (skey == KEY_UNIT_ATTENTION) {
24884				if (asc == 0x28) {
24885					state = DKIO_INSERTED;
24886				}
24887			} else {
24888				/*
24889				 * if 02/04/02  means that the host
24890				 * should send start command. Explicitly
24891				 * leave the media state as is
24892				 * (inserted) as the media is inserted
24893				 * and host has stopped device for PM
24894				 * reasons. Upon next true read/write
24895				 * to this media will bring the
24896				 * device to the right state good for
24897				 * media access.
24898				 */
24899				if ((skey == KEY_NOT_READY) &&
24900				    (asc == 0x3a)) {
24901					state = DKIO_EJECTED;
24902				}
24903
24904				/*
24905				 * If the drivge is busy with an operation
24906				 * or long write, keep the media in an
24907				 * inserted state.
24908				 */
24909
24910				if ((skey == KEY_NOT_READY) &&
24911				    (asc == 0x04) &&
24912				    ((ascq == 0x02) ||
24913				    (ascq == 0x07) ||
24914				    (ascq == 0x08))) {
24915					state = DKIO_INSERTED;
24916				}
24917			}
24918		}
24919	} else if ((*((char *)statusp) == STATUS_GOOD) &&
24920	    (resultp->pkt->pkt_reason == CMD_CMPLT)) {
24921		state = DKIO_INSERTED;
24922	}
24923
24924	SD_TRACE(SD_LOG_COMMON, un,
24925	    "sd_media_watch_cb: state=%x, specified=%x\n",
24926	    state, un->un_specified_mediastate);
24927
24928	/*
24929	 * now signal the waiting thread if this is *not* the specified state;
24930	 * delay the signal if the state is DKIO_INSERTED to allow the target
24931	 * to recover
24932	 */
24933	if (state != un->un_specified_mediastate) {
24934		un->un_mediastate = state;
24935		if (state == DKIO_INSERTED) {
24936			/*
24937			 * delay the signal to give the drive a chance
24938			 * to do what it apparently needs to do
24939			 */
24940			SD_TRACE(SD_LOG_COMMON, un,
24941			    "sd_media_watch_cb: delayed cv_broadcast\n");
24942			if (un->un_dcvb_timeid == NULL) {
24943				un->un_dcvb_timeid =
24944				    timeout(sd_delayed_cv_broadcast, un,
24945				    drv_usectohz((clock_t)MEDIA_ACCESS_DELAY));
24946			}
24947		} else {
24948			SD_TRACE(SD_LOG_COMMON, un,
24949			    "sd_media_watch_cb: immediate cv_broadcast\n");
24950			cv_broadcast(&un->un_state_cv);
24951		}
24952	}
24953	mutex_exit(SD_MUTEX(un));
24954	return (0);
24955}
24956
24957
24958/*
24959 *    Function: sd_dkio_get_temp
24960 *
24961 * Description: This routine is the driver entry point for handling ioctl
24962 *		requests to get the disk temperature.
24963 *
24964 *   Arguments: dev  - the device number
24965 *		arg  - pointer to user provided dk_temperature structure.
24966 *		flag - this argument is a pass through to ddi_copyxxx()
24967 *		       directly from the mode argument of ioctl().
24968 *
24969 * Return Code: 0
24970 *		EFAULT
24971 *		ENXIO
24972 *		EAGAIN
24973 */
24974
24975static int
24976sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag)
24977{
24978	struct sd_lun		*un = NULL;
24979	struct dk_temperature	*dktemp = NULL;
24980	uchar_t			*temperature_page;
24981	int			rval = 0;
24982	int			path_flag = SD_PATH_STANDARD;
24983
24984	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24985		return (ENXIO);
24986	}
24987
24988	dktemp = kmem_zalloc(sizeof (struct dk_temperature), KM_SLEEP);
24989
24990	/* copyin the disk temp argument to get the user flags */
24991	if (ddi_copyin((void *)arg, dktemp,
24992	    sizeof (struct dk_temperature), flag) != 0) {
24993		rval = EFAULT;
24994		goto done;
24995	}
24996
24997	/* Initialize the temperature to invalid. */
24998	dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
24999	dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
25000
25001	/*
25002	 * Note: Investigate removing the "bypass pm" semantic.
25003	 * Can we just bypass PM always?
25004	 */
25005	if (dktemp->dkt_flags & DKT_BYPASS_PM) {
25006		path_flag = SD_PATH_DIRECT;
25007		ASSERT(!mutex_owned(&un->un_pm_mutex));
25008		mutex_enter(&un->un_pm_mutex);
25009		if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
25010			/*
25011			 * If DKT_BYPASS_PM is set, and the drive happens to be
25012			 * in low power mode, we can not wake it up, Need to
25013			 * return EAGAIN.
25014			 */
25015			mutex_exit(&un->un_pm_mutex);
25016			rval = EAGAIN;
25017			goto done;
25018		} else {
25019			/*
25020			 * Indicate to PM the device is busy. This is required
25021			 * to avoid a race - i.e. the ioctl is issuing a
25022			 * command and the pm framework brings down the device
25023			 * to low power mode (possible power cut-off on some
25024			 * platforms).
25025			 */
25026			mutex_exit(&un->un_pm_mutex);
25027			if (sd_pm_entry(un) != DDI_SUCCESS) {
25028				rval = EAGAIN;
25029				goto done;
25030			}
25031		}
25032	}
25033
25034	temperature_page = kmem_zalloc(TEMPERATURE_PAGE_SIZE, KM_SLEEP);
25035
25036	if ((rval = sd_send_scsi_LOG_SENSE(un, temperature_page,
25037	    TEMPERATURE_PAGE_SIZE, TEMPERATURE_PAGE, 1, 0, path_flag)) != 0) {
25038		goto done2;
25039	}
25040
25041	/*
25042	 * For the current temperature verify that the parameter length is 0x02
25043	 * and the parameter code is 0x00
25044	 */
25045	if ((temperature_page[7] == 0x02) && (temperature_page[4] == 0x00) &&
25046	    (temperature_page[5] == 0x00)) {
25047		if (temperature_page[9] == 0xFF) {
25048			dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
25049		} else {
25050			dktemp->dkt_cur_temp = (short)(temperature_page[9]);
25051		}
25052	}
25053
25054	/*
25055	 * For the reference temperature verify that the parameter
25056	 * length is 0x02 and the parameter code is 0x01
25057	 */
25058	if ((temperature_page[13] == 0x02) && (temperature_page[10] == 0x00) &&
25059	    (temperature_page[11] == 0x01)) {
25060		if (temperature_page[15] == 0xFF) {
25061			dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
25062		} else {
25063			dktemp->dkt_ref_temp = (short)(temperature_page[15]);
25064		}
25065	}
25066
25067	/* Do the copyout regardless of the temperature commands status. */
25068	if (ddi_copyout(dktemp, (void *)arg, sizeof (struct dk_temperature),
25069	    flag) != 0) {
25070		rval = EFAULT;
25071	}
25072
25073done2:
25074	if (path_flag == SD_PATH_DIRECT) {
25075		sd_pm_exit(un);
25076	}
25077
25078	kmem_free(temperature_page, TEMPERATURE_PAGE_SIZE);
25079done:
25080	if (dktemp != NULL) {
25081		kmem_free(dktemp, sizeof (struct dk_temperature));
25082	}
25083
25084	return (rval);
25085}
25086
25087
25088/*
25089 *    Function: sd_log_page_supported
25090 *
25091 * Description: This routine uses sd_send_scsi_LOG_SENSE to find the list of
25092 *		supported log pages.
25093 *
25094 *   Arguments: un -
25095 *		log_page -
25096 *
25097 * Return Code: -1 - on error (log sense is optional and may not be supported).
25098 *		0  - log page not found.
25099 *  		1  - log page found.
25100 */
25101
25102static int
25103sd_log_page_supported(struct sd_lun *un, int log_page)
25104{
25105	uchar_t *log_page_data;
25106	int	i;
25107	int	match = 0;
25108	int	log_size;
25109
25110	log_page_data = kmem_zalloc(0xFF, KM_SLEEP);
25111
25112	if (sd_send_scsi_LOG_SENSE(un, log_page_data, 0xFF, 0, 0x01, 0,
25113	    SD_PATH_DIRECT) != 0) {
25114		SD_ERROR(SD_LOG_COMMON, un,
25115		    "sd_log_page_supported: failed log page retrieval\n");
25116		kmem_free(log_page_data, 0xFF);
25117		return (-1);
25118	}
25119	log_size = log_page_data[3];
25120
25121	/*
25122	 * The list of supported log pages start from the fourth byte. Check
25123	 * until we run out of log pages or a match is found.
25124	 */
25125	for (i = 4; (i < (log_size + 4)) && !match; i++) {
25126		if (log_page_data[i] == log_page) {
25127			match++;
25128		}
25129	}
25130	kmem_free(log_page_data, 0xFF);
25131	return (match);
25132}
25133
25134
25135/*
25136 *    Function: sd_mhdioc_failfast
25137 *
25138 * Description: This routine is the driver entry point for handling ioctl
25139 *		requests to enable/disable the multihost failfast option.
25140 *		(MHIOCENFAILFAST)
25141 *
25142 *   Arguments: dev	- the device number
25143 *		arg	- user specified probing interval.
25144 *		flag	- this argument is a pass through to ddi_copyxxx()
25145 *			  directly from the mode argument of ioctl().
25146 *
25147 * Return Code: 0
25148 *		EFAULT
25149 *		ENXIO
25150 */
25151
25152static int
25153sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag)
25154{
25155	struct sd_lun	*un = NULL;
25156	int		mh_time;
25157	int		rval = 0;
25158
25159	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25160		return (ENXIO);
25161	}
25162
25163	if (ddi_copyin((void *)arg, &mh_time, sizeof (int), flag))
25164		return (EFAULT);
25165
25166	if (mh_time) {
25167		mutex_enter(SD_MUTEX(un));
25168		un->un_resvd_status |= SD_FAILFAST;
25169		mutex_exit(SD_MUTEX(un));
25170		/*
25171		 * If mh_time is INT_MAX, then this ioctl is being used for
25172		 * SCSI-3 PGR purposes, and we don't need to spawn watch thread.
25173		 */
25174		if (mh_time != INT_MAX) {
25175			rval = sd_check_mhd(dev, mh_time);
25176		}
25177	} else {
25178		(void) sd_check_mhd(dev, 0);
25179		mutex_enter(SD_MUTEX(un));
25180		un->un_resvd_status &= ~SD_FAILFAST;
25181		mutex_exit(SD_MUTEX(un));
25182	}
25183	return (rval);
25184}
25185
25186
25187/*
25188 *    Function: sd_mhdioc_takeown
25189 *
25190 * Description: This routine is the driver entry point for handling ioctl
25191 *		requests to forcefully acquire exclusive access rights to the
25192 *		multihost disk (MHIOCTKOWN).
25193 *
25194 *   Arguments: dev	- the device number
25195 *		arg	- user provided structure specifying the delay
25196 *			  parameters in milliseconds
25197 *		flag	- this argument is a pass through to ddi_copyxxx()
25198 *			  directly from the mode argument of ioctl().
25199 *
25200 * Return Code: 0
25201 *		EFAULT
25202 *		ENXIO
25203 */
25204
25205static int
25206sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag)
25207{
25208	struct sd_lun		*un = NULL;
25209	struct mhioctkown	*tkown = NULL;
25210	int			rval = 0;
25211
25212	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25213		return (ENXIO);
25214	}
25215
25216	if (arg != NULL) {
25217		tkown = (struct mhioctkown *)
25218		    kmem_zalloc(sizeof (struct mhioctkown), KM_SLEEP);
25219		rval = ddi_copyin(arg, tkown, sizeof (struct mhioctkown), flag);
25220		if (rval != 0) {
25221			rval = EFAULT;
25222			goto error;
25223		}
25224	}
25225
25226	rval = sd_take_ownership(dev, tkown);
25227	mutex_enter(SD_MUTEX(un));
25228	if (rval == 0) {
25229		un->un_resvd_status |= SD_RESERVE;
25230		if (tkown != NULL && tkown->reinstate_resv_delay != 0) {
25231			sd_reinstate_resv_delay =
25232			    tkown->reinstate_resv_delay * 1000;
25233		} else {
25234			sd_reinstate_resv_delay = SD_REINSTATE_RESV_DELAY;
25235		}
25236		/*
25237		 * Give the scsi_watch routine interval set by
25238		 * the MHIOCENFAILFAST ioctl precedence here.
25239		 */
25240		if ((un->un_resvd_status & SD_FAILFAST) == 0) {
25241			mutex_exit(SD_MUTEX(un));
25242			(void) sd_check_mhd(dev, sd_reinstate_resv_delay/1000);
25243			SD_TRACE(SD_LOG_IOCTL_MHD, un,
25244			    "sd_mhdioc_takeown : %d\n",
25245			    sd_reinstate_resv_delay);
25246		} else {
25247			mutex_exit(SD_MUTEX(un));
25248		}
25249		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_NOTIFY,
25250		    sd_mhd_reset_notify_cb, (caddr_t)un);
25251	} else {
25252		un->un_resvd_status &= ~SD_RESERVE;
25253		mutex_exit(SD_MUTEX(un));
25254	}
25255
25256error:
25257	if (tkown != NULL) {
25258		kmem_free(tkown, sizeof (struct mhioctkown));
25259	}
25260	return (rval);
25261}
25262
25263
25264/*
25265 *    Function: sd_mhdioc_release
25266 *
25267 * Description: This routine is the driver entry point for handling ioctl
25268 *		requests to release exclusive access rights to the multihost
25269 *		disk (MHIOCRELEASE).
25270 *
25271 *   Arguments: dev	- the device number
25272 *
25273 * Return Code: 0
25274 *		ENXIO
25275 */
25276
25277static int
25278sd_mhdioc_release(dev_t dev)
25279{
25280	struct sd_lun		*un = NULL;
25281	timeout_id_t		resvd_timeid_save;
25282	int			resvd_status_save;
25283	int			rval = 0;
25284
25285	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25286		return (ENXIO);
25287	}
25288
25289	mutex_enter(SD_MUTEX(un));
25290	resvd_status_save = un->un_resvd_status;
25291	un->un_resvd_status &=
25292	    ~(SD_RESERVE | SD_LOST_RESERVE | SD_WANT_RESERVE);
25293	if (un->un_resvd_timeid) {
25294		resvd_timeid_save = un->un_resvd_timeid;
25295		un->un_resvd_timeid = NULL;
25296		mutex_exit(SD_MUTEX(un));
25297		(void) untimeout(resvd_timeid_save);
25298	} else {
25299		mutex_exit(SD_MUTEX(un));
25300	}
25301
25302	/*
25303	 * destroy any pending timeout thread that may be attempting to
25304	 * reinstate reservation on this device.
25305	 */
25306	sd_rmv_resv_reclaim_req(dev);
25307
25308	if ((rval = sd_reserve_release(dev, SD_RELEASE)) == 0) {
25309		mutex_enter(SD_MUTEX(un));
25310		if ((un->un_mhd_token) &&
25311		    ((un->un_resvd_status & SD_FAILFAST) == 0)) {
25312			mutex_exit(SD_MUTEX(un));
25313			(void) sd_check_mhd(dev, 0);
25314		} else {
25315			mutex_exit(SD_MUTEX(un));
25316		}
25317		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
25318		    sd_mhd_reset_notify_cb, (caddr_t)un);
25319	} else {
25320		/*
25321		 * sd_mhd_watch_cb will restart the resvd recover timeout thread
25322		 */
25323		mutex_enter(SD_MUTEX(un));
25324		un->un_resvd_status = resvd_status_save;
25325		mutex_exit(SD_MUTEX(un));
25326	}
25327	return (rval);
25328}
25329
25330
25331/*
25332 *    Function: sd_mhdioc_register_devid
25333 *
25334 * Description: This routine is the driver entry point for handling ioctl
25335 *		requests to register the device id (MHIOCREREGISTERDEVID).
25336 *
25337 *		Note: The implementation for this ioctl has been updated to
25338 *		be consistent with the original PSARC case (1999/357)
25339 *		(4375899, 4241671, 4220005)
25340 *
25341 *   Arguments: dev	- the device number
25342 *
25343 * Return Code: 0
25344 *		ENXIO
25345 */
25346
25347static int
25348sd_mhdioc_register_devid(dev_t dev)
25349{
25350	struct sd_lun	*un = NULL;
25351	int		rval = 0;
25352
25353	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25354		return (ENXIO);
25355	}
25356
25357	ASSERT(!mutex_owned(SD_MUTEX(un)));
25358
25359	mutex_enter(SD_MUTEX(un));
25360
25361	/* If a devid already exists, de-register it */
25362	if (un->un_devid != NULL) {
25363		ddi_devid_unregister(SD_DEVINFO(un));
25364		/*
25365		 * After unregister devid, needs to free devid memory
25366		 */
25367		ddi_devid_free(un->un_devid);
25368		un->un_devid = NULL;
25369	}
25370
25371	/* Check for reservation conflict */
25372	mutex_exit(SD_MUTEX(un));
25373	rval = sd_send_scsi_TEST_UNIT_READY(un, 0);
25374	mutex_enter(SD_MUTEX(un));
25375
25376	switch (rval) {
25377	case 0:
25378		sd_register_devid(un, SD_DEVINFO(un), SD_TARGET_IS_UNRESERVED);
25379		break;
25380	case EACCES:
25381		break;
25382	default:
25383		rval = EIO;
25384	}
25385
25386	mutex_exit(SD_MUTEX(un));
25387	return (rval);
25388}
25389
25390
25391/*
25392 *    Function: sd_mhdioc_inkeys
25393 *
25394 * Description: This routine is the driver entry point for handling ioctl
25395 *		requests to issue the SCSI-3 Persistent In Read Keys command
25396 *		to the device (MHIOCGRP_INKEYS).
25397 *
25398 *   Arguments: dev	- the device number
25399 *		arg	- user provided in_keys structure
25400 *		flag	- this argument is a pass through to ddi_copyxxx()
25401 *			  directly from the mode argument of ioctl().
25402 *
25403 * Return Code: code returned by sd_persistent_reservation_in_read_keys()
25404 *		ENXIO
25405 *		EFAULT
25406 */
25407
25408static int
25409sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag)
25410{
25411	struct sd_lun		*un;
25412	mhioc_inkeys_t		inkeys;
25413	int			rval = 0;
25414
25415	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25416		return (ENXIO);
25417	}
25418
25419#ifdef _MULTI_DATAMODEL
25420	switch (ddi_model_convert_from(flag & FMODELS)) {
25421	case DDI_MODEL_ILP32: {
25422		struct mhioc_inkeys32	inkeys32;
25423
25424		if (ddi_copyin(arg, &inkeys32,
25425		    sizeof (struct mhioc_inkeys32), flag) != 0) {
25426			return (EFAULT);
25427		}
25428		inkeys.li = (mhioc_key_list_t *)(uintptr_t)inkeys32.li;
25429		if ((rval = sd_persistent_reservation_in_read_keys(un,
25430		    &inkeys, flag)) != 0) {
25431			return (rval);
25432		}
25433		inkeys32.generation = inkeys.generation;
25434		if (ddi_copyout(&inkeys32, arg, sizeof (struct mhioc_inkeys32),
25435		    flag) != 0) {
25436			return (EFAULT);
25437		}
25438		break;
25439	}
25440	case DDI_MODEL_NONE:
25441		if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t),
25442		    flag) != 0) {
25443			return (EFAULT);
25444		}
25445		if ((rval = sd_persistent_reservation_in_read_keys(un,
25446		    &inkeys, flag)) != 0) {
25447			return (rval);
25448		}
25449		if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t),
25450		    flag) != 0) {
25451			return (EFAULT);
25452		}
25453		break;
25454	}
25455
25456#else /* ! _MULTI_DATAMODEL */
25457
25458	if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t), flag) != 0) {
25459		return (EFAULT);
25460	}
25461	rval = sd_persistent_reservation_in_read_keys(un, &inkeys, flag);
25462	if (rval != 0) {
25463		return (rval);
25464	}
25465	if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t), flag) != 0) {
25466		return (EFAULT);
25467	}
25468
25469#endif /* _MULTI_DATAMODEL */
25470
25471	return (rval);
25472}
25473
25474
25475/*
25476 *    Function: sd_mhdioc_inresv
25477 *
25478 * Description: This routine is the driver entry point for handling ioctl
25479 *		requests to issue the SCSI-3 Persistent In Read Reservations
25480 *		command to the device (MHIOCGRP_INKEYS).
25481 *
25482 *   Arguments: dev	- the device number
25483 *		arg	- user provided in_resv structure
25484 *		flag	- this argument is a pass through to ddi_copyxxx()
25485 *			  directly from the mode argument of ioctl().
25486 *
25487 * Return Code: code returned by sd_persistent_reservation_in_read_resv()
25488 *		ENXIO
25489 *		EFAULT
25490 */
25491
25492static int
25493sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag)
25494{
25495	struct sd_lun		*un;
25496	mhioc_inresvs_t		inresvs;
25497	int			rval = 0;
25498
25499	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25500		return (ENXIO);
25501	}
25502
25503#ifdef _MULTI_DATAMODEL
25504
25505	switch (ddi_model_convert_from(flag & FMODELS)) {
25506	case DDI_MODEL_ILP32: {
25507		struct mhioc_inresvs32	inresvs32;
25508
25509		if (ddi_copyin(arg, &inresvs32,
25510		    sizeof (struct mhioc_inresvs32), flag) != 0) {
25511			return (EFAULT);
25512		}
25513		inresvs.li = (mhioc_resv_desc_list_t *)(uintptr_t)inresvs32.li;
25514		if ((rval = sd_persistent_reservation_in_read_resv(un,
25515		    &inresvs, flag)) != 0) {
25516			return (rval);
25517		}
25518		inresvs32.generation = inresvs.generation;
25519		if (ddi_copyout(&inresvs32, arg,
25520		    sizeof (struct mhioc_inresvs32), flag) != 0) {
25521			return (EFAULT);
25522		}
25523		break;
25524	}
25525	case DDI_MODEL_NONE:
25526		if (ddi_copyin(arg, &inresvs,
25527		    sizeof (mhioc_inresvs_t), flag) != 0) {
25528			return (EFAULT);
25529		}
25530		if ((rval = sd_persistent_reservation_in_read_resv(un,
25531		    &inresvs, flag)) != 0) {
25532			return (rval);
25533		}
25534		if (ddi_copyout(&inresvs, arg,
25535		    sizeof (mhioc_inresvs_t), flag) != 0) {
25536			return (EFAULT);
25537		}
25538		break;
25539	}
25540
25541#else /* ! _MULTI_DATAMODEL */
25542
25543	if (ddi_copyin(arg, &inresvs, sizeof (mhioc_inresvs_t), flag) != 0) {
25544		return (EFAULT);
25545	}
25546	rval = sd_persistent_reservation_in_read_resv(un, &inresvs, flag);
25547	if (rval != 0) {
25548		return (rval);
25549	}
25550	if (ddi_copyout(&inresvs, arg, sizeof (mhioc_inresvs_t), flag)) {
25551		return (EFAULT);
25552	}
25553
25554#endif /* ! _MULTI_DATAMODEL */
25555
25556	return (rval);
25557}
25558
25559
25560/*
25561 * The following routines support the clustering functionality described below
25562 * and implement lost reservation reclaim functionality.
25563 *
25564 * Clustering
25565 * ----------
25566 * The clustering code uses two different, independent forms of SCSI
25567 * reservation. Traditional SCSI-2 Reserve/Release and the newer SCSI-3
25568 * Persistent Group Reservations. For any particular disk, it will use either
25569 * SCSI-2 or SCSI-3 PGR but never both at the same time for the same disk.
25570 *
25571 * SCSI-2
25572 * The cluster software takes ownership of a multi-hosted disk by issuing the
25573 * MHIOCTKOWN ioctl to the disk driver. It releases ownership by issuing the
25574 * MHIOCRELEASE ioctl.Closely related is the MHIOCENFAILFAST ioctl -- a cluster,
25575 * just after taking ownership of the disk with the MHIOCTKOWN ioctl then issues
25576 * the MHIOCENFAILFAST ioctl.  This ioctl "enables failfast" in the driver. The
25577 * meaning of failfast is that if the driver (on this host) ever encounters the
25578 * scsi error return code RESERVATION_CONFLICT from the device, it should
25579 * immediately panic the host. The motivation for this ioctl is that if this
25580 * host does encounter reservation conflict, the underlying cause is that some
25581 * other host of the cluster has decided that this host is no longer in the
25582 * cluster and has seized control of the disks for itself. Since this host is no
25583 * longer in the cluster, it ought to panic itself. The MHIOCENFAILFAST ioctl
25584 * does two things:
25585 *	(a) it sets a flag that will cause any returned RESERVATION_CONFLICT
25586 *      error to panic the host
25587 *      (b) it sets up a periodic timer to test whether this host still has
25588 *      "access" (in that no other host has reserved the device):  if the
25589 *      periodic timer gets RESERVATION_CONFLICT, the host is panicked. The
25590 *      purpose of that periodic timer is to handle scenarios where the host is
25591 *      otherwise temporarily quiescent, temporarily doing no real i/o.
25592 * The MHIOCTKOWN ioctl will "break" a reservation that is held by another host,
25593 * by issuing a SCSI Bus Device Reset.  It will then issue a SCSI Reserve for
25594 * the device itself.
25595 *
25596 * SCSI-3 PGR
25597 * A direct semantic implementation of the SCSI-3 Persistent Reservation
25598 * facility is supported through the shared multihost disk ioctls
25599 * (MHIOCGRP_INKEYS, MHIOCGRP_INRESV, MHIOCGRP_REGISTER, MHIOCGRP_RESERVE,
25600 * MHIOCGRP_PREEMPTANDABORT)
25601 *
25602 * Reservation Reclaim:
25603 * --------------------
25604 * To support the lost reservation reclaim operations this driver creates a
25605 * single thread to handle reinstating reservations on all devices that have
25606 * lost reservations sd_resv_reclaim_requests are logged for all devices that
25607 * have LOST RESERVATIONS when the scsi watch facility callsback sd_mhd_watch_cb
25608 * and the reservation reclaim thread loops through the requests to regain the
25609 * lost reservations.
25610 */
25611
25612/*
25613 *    Function: sd_check_mhd()
25614 *
25615 * Description: This function sets up and submits a scsi watch request or
25616 *		terminates an existing watch request. This routine is used in
25617 *		support of reservation reclaim.
25618 *
25619 *   Arguments: dev    - the device 'dev_t' is used for context to discriminate
25620 *			 among multiple watches that share the callback function
25621 *		interval - the number of microseconds specifying the watch
25622 *			   interval for issuing TEST UNIT READY commands. If
25623 *			   set to 0 the watch should be terminated. If the
25624 *			   interval is set to 0 and if the device is required
25625 *			   to hold reservation while disabling failfast, the
25626 *			   watch is restarted with an interval of
25627 *			   reinstate_resv_delay.
25628 *
25629 * Return Code: 0	   - Successful submit/terminate of scsi watch request
25630 *		ENXIO      - Indicates an invalid device was specified
25631 *		EAGAIN     - Unable to submit the scsi watch request
25632 */
25633
25634static int
25635sd_check_mhd(dev_t dev, int interval)
25636{
25637	struct sd_lun	*un;
25638	opaque_t	token;
25639
25640	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25641		return (ENXIO);
25642	}
25643
25644	/* is this a watch termination request? */
25645	if (interval == 0) {
25646		mutex_enter(SD_MUTEX(un));
25647		/* if there is an existing watch task then terminate it */
25648		if (un->un_mhd_token) {
25649			token = un->un_mhd_token;
25650			un->un_mhd_token = NULL;
25651			mutex_exit(SD_MUTEX(un));
25652			(void) scsi_watch_request_terminate(token,
25653			    SCSI_WATCH_TERMINATE_WAIT);
25654			mutex_enter(SD_MUTEX(un));
25655		} else {
25656			mutex_exit(SD_MUTEX(un));
25657			/*
25658			 * Note: If we return here we don't check for the
25659			 * failfast case. This is the original legacy
25660			 * implementation but perhaps we should be checking
25661			 * the failfast case.
25662			 */
25663			return (0);
25664		}
25665		/*
25666		 * If the device is required to hold reservation while
25667		 * disabling failfast, we need to restart the scsi_watch
25668		 * routine with an interval of reinstate_resv_delay.
25669		 */
25670		if (un->un_resvd_status & SD_RESERVE) {
25671			interval = sd_reinstate_resv_delay/1000;
25672		} else {
25673			/* no failfast so bail */
25674			mutex_exit(SD_MUTEX(un));
25675			return (0);
25676		}
25677		mutex_exit(SD_MUTEX(un));
25678	}
25679
25680	/*
25681	 * adjust minimum time interval to 1 second,
25682	 * and convert from msecs to usecs
25683	 */
25684	if (interval > 0 && interval < 1000) {
25685		interval = 1000;
25686	}
25687	interval *= 1000;
25688
25689	/*
25690	 * submit the request to the scsi_watch service
25691	 */
25692	token = scsi_watch_request_submit(SD_SCSI_DEVP(un), interval,
25693	    SENSE_LENGTH, sd_mhd_watch_cb, (caddr_t)dev);
25694	if (token == NULL) {
25695		return (EAGAIN);
25696	}
25697
25698	/*
25699	 * save token for termination later on
25700	 */
25701	mutex_enter(SD_MUTEX(un));
25702	un->un_mhd_token = token;
25703	mutex_exit(SD_MUTEX(un));
25704	return (0);
25705}
25706
25707
25708/*
25709 *    Function: sd_mhd_watch_cb()
25710 *
25711 * Description: This function is the call back function used by the scsi watch
25712 *		facility. The scsi watch facility sends the "Test Unit Ready"
25713 *		and processes the status. If applicable (i.e. a "Unit Attention"
25714 *		status and automatic "Request Sense" not used) the scsi watch
25715 *		facility will send a "Request Sense" and retrieve the sense data
25716 *		to be passed to this callback function. In either case the
25717 *		automatic "Request Sense" or the facility submitting one, this
25718 *		callback is passed the status and sense data.
25719 *
25720 *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
25721 *			among multiple watches that share this callback function
25722 *		resultp - scsi watch facility result packet containing scsi
25723 *			  packet, status byte and sense data
25724 *
25725 * Return Code: 0 - continue the watch task
25726 *		non-zero - terminate the watch task
25727 */
25728
25729static int
25730sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
25731{
25732	struct sd_lun			*un;
25733	struct scsi_status		*statusp;
25734	uint8_t				*sensep;
25735	struct scsi_pkt			*pkt;
25736	uchar_t				actual_sense_length;
25737	dev_t  				dev = (dev_t)arg;
25738
25739	ASSERT(resultp != NULL);
25740	statusp			= resultp->statusp;
25741	sensep			= (uint8_t *)resultp->sensep;
25742	pkt			= resultp->pkt;
25743	actual_sense_length	= resultp->actual_sense_length;
25744
25745	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25746		return (ENXIO);
25747	}
25748
25749	SD_TRACE(SD_LOG_IOCTL_MHD, un,
25750	    "sd_mhd_watch_cb: reason '%s', status '%s'\n",
25751	    scsi_rname(pkt->pkt_reason), sd_sname(*((unsigned char *)statusp)));
25752
25753	/* Begin processing of the status and/or sense data */
25754	if (pkt->pkt_reason != CMD_CMPLT) {
25755		/* Handle the incomplete packet */
25756		sd_mhd_watch_incomplete(un, pkt);
25757		return (0);
25758	} else if (*((unsigned char *)statusp) != STATUS_GOOD) {
25759		if (*((unsigned char *)statusp)
25760		    == STATUS_RESERVATION_CONFLICT) {
25761			/*
25762			 * Handle a reservation conflict by panicking if
25763			 * configured for failfast or by logging the conflict
25764			 * and updating the reservation status
25765			 */
25766			mutex_enter(SD_MUTEX(un));
25767			if ((un->un_resvd_status & SD_FAILFAST) &&
25768			    (sd_failfast_enable)) {
25769				sd_panic_for_res_conflict(un);
25770				/*NOTREACHED*/
25771			}
25772			SD_INFO(SD_LOG_IOCTL_MHD, un,
25773			    "sd_mhd_watch_cb: Reservation Conflict\n");
25774			un->un_resvd_status |= SD_RESERVATION_CONFLICT;
25775			mutex_exit(SD_MUTEX(un));
25776		}
25777	}
25778
25779	if (sensep != NULL) {
25780		if (actual_sense_length >= (SENSE_LENGTH - 2)) {
25781			mutex_enter(SD_MUTEX(un));
25782			if ((scsi_sense_asc(sensep) ==
25783			    SD_SCSI_RESET_SENSE_CODE) &&
25784			    (un->un_resvd_status & SD_RESERVE)) {
25785				/*
25786				 * The additional sense code indicates a power
25787				 * on or bus device reset has occurred; update
25788				 * the reservation status.
25789				 */
25790				un->un_resvd_status |=
25791				    (SD_LOST_RESERVE | SD_WANT_RESERVE);
25792				SD_INFO(SD_LOG_IOCTL_MHD, un,
25793				    "sd_mhd_watch_cb: Lost Reservation\n");
25794			}
25795		} else {
25796			return (0);
25797		}
25798	} else {
25799		mutex_enter(SD_MUTEX(un));
25800	}
25801
25802	if ((un->un_resvd_status & SD_RESERVE) &&
25803	    (un->un_resvd_status & SD_LOST_RESERVE)) {
25804		if (un->un_resvd_status & SD_WANT_RESERVE) {
25805			/*
25806			 * A reset occurred in between the last probe and this
25807			 * one so if a timeout is pending cancel it.
25808			 */
25809			if (un->un_resvd_timeid) {
25810				timeout_id_t temp_id = un->un_resvd_timeid;
25811				un->un_resvd_timeid = NULL;
25812				mutex_exit(SD_MUTEX(un));
25813				(void) untimeout(temp_id);
25814				mutex_enter(SD_MUTEX(un));
25815			}
25816			un->un_resvd_status &= ~SD_WANT_RESERVE;
25817		}
25818		if (un->un_resvd_timeid == 0) {
25819			/* Schedule a timeout to handle the lost reservation */
25820			un->un_resvd_timeid = timeout(sd_mhd_resvd_recover,
25821			    (void *)dev,
25822			    drv_usectohz(sd_reinstate_resv_delay));
25823		}
25824	}
25825	mutex_exit(SD_MUTEX(un));
25826	return (0);
25827}
25828
25829
25830/*
25831 *    Function: sd_mhd_watch_incomplete()
25832 *
25833 * Description: This function is used to find out why a scsi pkt sent by the
25834 *		scsi watch facility was not completed. Under some scenarios this
25835 *		routine will return. Otherwise it will send a bus reset to see
25836 *		if the drive is still online.
25837 *
25838 *   Arguments: un  - driver soft state (unit) structure
25839 *		pkt - incomplete scsi pkt
25840 */
25841
25842static void
25843sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt)
25844{
25845	int	be_chatty;
25846	int	perr;
25847
25848	ASSERT(pkt != NULL);
25849	ASSERT(un != NULL);
25850	be_chatty	= (!(pkt->pkt_flags & FLAG_SILENT));
25851	perr		= (pkt->pkt_statistics & STAT_PERR);
25852
25853	mutex_enter(SD_MUTEX(un));
25854	if (un->un_state == SD_STATE_DUMPING) {
25855		mutex_exit(SD_MUTEX(un));
25856		return;
25857	}
25858
25859	switch (pkt->pkt_reason) {
25860	case CMD_UNX_BUS_FREE:
25861		/*
25862		 * If we had a parity error that caused the target to drop BSY*,
25863		 * don't be chatty about it.
25864		 */
25865		if (perr && be_chatty) {
25866			be_chatty = 0;
25867		}
25868		break;
25869	case CMD_TAG_REJECT:
25870		/*
25871		 * The SCSI-2 spec states that a tag reject will be sent by the
25872		 * target if tagged queuing is not supported. A tag reject may
25873		 * also be sent during certain initialization periods or to
25874		 * control internal resources. For the latter case the target
25875		 * may also return Queue Full.
25876		 *
25877		 * If this driver receives a tag reject from a target that is
25878		 * going through an init period or controlling internal
25879		 * resources tagged queuing will be disabled. This is a less
25880		 * than optimal behavior but the driver is unable to determine
25881		 * the target state and assumes tagged queueing is not supported
25882		 */
25883		pkt->pkt_flags = 0;
25884		un->un_tagflags = 0;
25885
25886		if (un->un_f_opt_queueing == TRUE) {
25887			un->un_throttle = min(un->un_throttle, 3);
25888		} else {
25889			un->un_throttle = 1;
25890		}
25891		mutex_exit(SD_MUTEX(un));
25892		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
25893		mutex_enter(SD_MUTEX(un));
25894		break;
25895	case CMD_INCOMPLETE:
25896		/*
25897		 * The transport stopped with an abnormal state, fallthrough and
25898		 * reset the target and/or bus unless selection did not complete
25899		 * (indicated by STATE_GOT_BUS) in which case we don't want to
25900		 * go through a target/bus reset
25901		 */
25902		if (pkt->pkt_state == STATE_GOT_BUS) {
25903			break;
25904		}
25905		/*FALLTHROUGH*/
25906
25907	case CMD_TIMEOUT:
25908	default:
25909		/*
25910		 * The lun may still be running the command, so a lun reset
25911		 * should be attempted. If the lun reset fails or cannot be
25912		 * issued, than try a target reset. Lastly try a bus reset.
25913		 */
25914		if ((pkt->pkt_statistics &
25915		    (STAT_BUS_RESET|STAT_DEV_RESET|STAT_ABORTED)) == 0) {
25916			int reset_retval = 0;
25917			mutex_exit(SD_MUTEX(un));
25918			if (un->un_f_allow_bus_device_reset == TRUE) {
25919				if (un->un_f_lun_reset_enabled == TRUE) {
25920					reset_retval =
25921					    scsi_reset(SD_ADDRESS(un),
25922					    RESET_LUN);
25923				}
25924				if (reset_retval == 0) {
25925					reset_retval =
25926					    scsi_reset(SD_ADDRESS(un),
25927					    RESET_TARGET);
25928				}
25929			}
25930			if (reset_retval == 0) {
25931				(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
25932			}
25933			mutex_enter(SD_MUTEX(un));
25934		}
25935		break;
25936	}
25937
25938	/* A device/bus reset has occurred; update the reservation status. */
25939	if ((pkt->pkt_reason == CMD_RESET) || (pkt->pkt_statistics &
25940	    (STAT_BUS_RESET | STAT_DEV_RESET))) {
25941		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
25942			un->un_resvd_status |=
25943			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
25944			SD_INFO(SD_LOG_IOCTL_MHD, un,
25945			    "sd_mhd_watch_incomplete: Lost Reservation\n");
25946		}
25947	}
25948
25949	/*
25950	 * The disk has been turned off; Update the device state.
25951	 *
25952	 * Note: Should we be offlining the disk here?
25953	 */
25954	if (pkt->pkt_state == STATE_GOT_BUS) {
25955		SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_watch_incomplete: "
25956		    "Disk not responding to selection\n");
25957		if (un->un_state != SD_STATE_OFFLINE) {
25958			New_state(un, SD_STATE_OFFLINE);
25959		}
25960	} else if (be_chatty) {
25961		/*
25962		 * suppress messages if they are all the same pkt reason;
25963		 * with TQ, many (up to 256) are returned with the same
25964		 * pkt_reason
25965		 */
25966		if (pkt->pkt_reason != un->un_last_pkt_reason) {
25967			SD_ERROR(SD_LOG_IOCTL_MHD, un,
25968			    "sd_mhd_watch_incomplete: "
25969			    "SCSI transport failed: reason '%s'\n",
25970			    scsi_rname(pkt->pkt_reason));
25971		}
25972	}
25973	un->un_last_pkt_reason = pkt->pkt_reason;
25974	mutex_exit(SD_MUTEX(un));
25975}
25976
25977
25978/*
25979 *    Function: sd_sname()
25980 *
25981 * Description: This is a simple little routine to return a string containing
25982 *		a printable description of command status byte for use in
25983 *		logging.
25984 *
25985 *   Arguments: status - pointer to a status byte
25986 *
25987 * Return Code: char * - string containing status description.
25988 */
25989
25990static char *
25991sd_sname(uchar_t status)
25992{
25993	switch (status & STATUS_MASK) {
25994	case STATUS_GOOD:
25995		return ("good status");
25996	case STATUS_CHECK:
25997		return ("check condition");
25998	case STATUS_MET:
25999		return ("condition met");
26000	case STATUS_BUSY:
26001		return ("busy");
26002	case STATUS_INTERMEDIATE:
26003		return ("intermediate");
26004	case STATUS_INTERMEDIATE_MET:
26005		return ("intermediate - condition met");
26006	case STATUS_RESERVATION_CONFLICT:
26007		return ("reservation_conflict");
26008	case STATUS_TERMINATED:
26009		return ("command terminated");
26010	case STATUS_QFULL:
26011		return ("queue full");
26012	default:
26013		return ("<unknown status>");
26014	}
26015}
26016
26017
26018/*
26019 *    Function: sd_mhd_resvd_recover()
26020 *
26021 * Description: This function adds a reservation entry to the
26022 *		sd_resv_reclaim_request list and signals the reservation
26023 *		reclaim thread that there is work pending. If the reservation
26024 *		reclaim thread has not been previously created this function
26025 *		will kick it off.
26026 *
26027 *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
26028 *			among multiple watches that share this callback function
26029 *
26030 *     Context: This routine is called by timeout() and is run in interrupt
26031 *		context. It must not sleep or call other functions which may
26032 *		sleep.
26033 */
26034
26035static void
26036sd_mhd_resvd_recover(void *arg)
26037{
26038	dev_t			dev = (dev_t)arg;
26039	struct sd_lun		*un;
26040	struct sd_thr_request	*sd_treq = NULL;
26041	struct sd_thr_request	*sd_cur = NULL;
26042	struct sd_thr_request	*sd_prev = NULL;
26043	int			already_there = 0;
26044
26045	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
26046		return;
26047	}
26048
26049	mutex_enter(SD_MUTEX(un));
26050	un->un_resvd_timeid = NULL;
26051	if (un->un_resvd_status & SD_WANT_RESERVE) {
26052		/*
26053		 * There was a reset so don't issue the reserve, allow the
26054		 * sd_mhd_watch_cb callback function to notice this and
26055		 * reschedule the timeout for reservation.
26056		 */
26057		mutex_exit(SD_MUTEX(un));
26058		return;
26059	}
26060	mutex_exit(SD_MUTEX(un));
26061
26062	/*
26063	 * Add this device to the sd_resv_reclaim_request list and the
26064	 * sd_resv_reclaim_thread should take care of the rest.
26065	 *
26066	 * Note: We can't sleep in this context so if the memory allocation
26067	 * fails allow the sd_mhd_watch_cb callback function to notice this and
26068	 * reschedule the timeout for reservation.  (4378460)
26069	 */
26070	sd_treq = (struct sd_thr_request *)
26071	    kmem_zalloc(sizeof (struct sd_thr_request), KM_NOSLEEP);
26072	if (sd_treq == NULL) {
26073		return;
26074	}
26075
26076	sd_treq->sd_thr_req_next = NULL;
26077	sd_treq->dev = dev;
26078	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
26079	if (sd_tr.srq_thr_req_head == NULL) {
26080		sd_tr.srq_thr_req_head = sd_treq;
26081	} else {
26082		sd_cur = sd_prev = sd_tr.srq_thr_req_head;
26083		for (; sd_cur != NULL; sd_cur = sd_cur->sd_thr_req_next) {
26084			if (sd_cur->dev == dev) {
26085				/*
26086				 * already in Queue so don't log
26087				 * another request for the device
26088				 */
26089				already_there = 1;
26090				break;
26091			}
26092			sd_prev = sd_cur;
26093		}
26094		if (!already_there) {
26095			SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_resvd_recover: "
26096			    "logging request for %lx\n", dev);
26097			sd_prev->sd_thr_req_next = sd_treq;
26098		} else {
26099			kmem_free(sd_treq, sizeof (struct sd_thr_request));
26100		}
26101	}
26102
26103	/*
26104	 * Create a kernel thread to do the reservation reclaim and free up this
26105	 * thread. We cannot block this thread while we go away to do the
26106	 * reservation reclaim
26107	 */
26108	if (sd_tr.srq_resv_reclaim_thread == NULL)
26109		sd_tr.srq_resv_reclaim_thread = thread_create(NULL, 0,
26110		    sd_resv_reclaim_thread, NULL,
26111		    0, &p0, TS_RUN, v.v_maxsyspri - 2);
26112
26113	/* Tell the reservation reclaim thread that it has work to do */
26114	cv_signal(&sd_tr.srq_resv_reclaim_cv);
26115	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
26116}
26117
26118/*
26119 *    Function: sd_resv_reclaim_thread()
26120 *
26121 * Description: This function implements the reservation reclaim operations
26122 *
26123 *   Arguments: arg - the device 'dev_t' is used for context to discriminate
26124 *		      among multiple watches that share this callback function
26125 */
26126
26127static void
26128sd_resv_reclaim_thread()
26129{
26130	struct sd_lun		*un;
26131	struct sd_thr_request	*sd_mhreq;
26132
26133	/* Wait for work */
26134	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
26135	if (sd_tr.srq_thr_req_head == NULL) {
26136		cv_wait(&sd_tr.srq_resv_reclaim_cv,
26137		    &sd_tr.srq_resv_reclaim_mutex);
26138	}
26139
26140	/* Loop while we have work */
26141	while ((sd_tr.srq_thr_cur_req = sd_tr.srq_thr_req_head) != NULL) {
26142		un = ddi_get_soft_state(sd_state,
26143		    SDUNIT(sd_tr.srq_thr_cur_req->dev));
26144		if (un == NULL) {
26145			/*
26146			 * softstate structure is NULL so just
26147			 * dequeue the request and continue
26148			 */
26149			sd_tr.srq_thr_req_head =
26150			    sd_tr.srq_thr_cur_req->sd_thr_req_next;
26151			kmem_free(sd_tr.srq_thr_cur_req,
26152			    sizeof (struct sd_thr_request));
26153			continue;
26154		}
26155
26156		/* dequeue the request */
26157		sd_mhreq = sd_tr.srq_thr_cur_req;
26158		sd_tr.srq_thr_req_head =
26159		    sd_tr.srq_thr_cur_req->sd_thr_req_next;
26160		mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
26161
26162		/*
26163		 * Reclaim reservation only if SD_RESERVE is still set. There
26164		 * may have been a call to MHIOCRELEASE before we got here.
26165		 */
26166		mutex_enter(SD_MUTEX(un));
26167		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
26168			/*
26169			 * Note: The SD_LOST_RESERVE flag is cleared before
26170			 * reclaiming the reservation. If this is done after the
26171			 * call to sd_reserve_release a reservation loss in the
26172			 * window between pkt completion of reserve cmd and
26173			 * mutex_enter below may not be recognized
26174			 */
26175			un->un_resvd_status &= ~SD_LOST_RESERVE;
26176			mutex_exit(SD_MUTEX(un));
26177
26178			if (sd_reserve_release(sd_mhreq->dev,
26179			    SD_RESERVE) == 0) {
26180				mutex_enter(SD_MUTEX(un));
26181				un->un_resvd_status |= SD_RESERVE;
26182				mutex_exit(SD_MUTEX(un));
26183				SD_INFO(SD_LOG_IOCTL_MHD, un,
26184				    "sd_resv_reclaim_thread: "
26185				    "Reservation Recovered\n");
26186			} else {
26187				mutex_enter(SD_MUTEX(un));
26188				un->un_resvd_status |= SD_LOST_RESERVE;
26189				mutex_exit(SD_MUTEX(un));
26190				SD_INFO(SD_LOG_IOCTL_MHD, un,
26191				    "sd_resv_reclaim_thread: Failed "
26192				    "Reservation Recovery\n");
26193			}
26194		} else {
26195			mutex_exit(SD_MUTEX(un));
26196		}
26197		mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
26198		ASSERT(sd_mhreq == sd_tr.srq_thr_cur_req);
26199		kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
26200		sd_mhreq = sd_tr.srq_thr_cur_req = NULL;
26201		/*
26202		 * wakeup the destroy thread if anyone is waiting on
26203		 * us to complete.
26204		 */
26205		cv_signal(&sd_tr.srq_inprocess_cv);
26206		SD_TRACE(SD_LOG_IOCTL_MHD, un,
26207		    "sd_resv_reclaim_thread: cv_signalling current request \n");
26208	}
26209
26210	/*
26211	 * cleanup the sd_tr structure now that this thread will not exist
26212	 */
26213	ASSERT(sd_tr.srq_thr_req_head == NULL);
26214	ASSERT(sd_tr.srq_thr_cur_req == NULL);
26215	sd_tr.srq_resv_reclaim_thread = NULL;
26216	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
26217	thread_exit();
26218}
26219
26220
26221/*
26222 *    Function: sd_rmv_resv_reclaim_req()
26223 *
26224 * Description: This function removes any pending reservation reclaim requests
26225 *		for the specified device.
26226 *
26227 *   Arguments: dev - the device 'dev_t'
26228 */
26229
26230static void
26231sd_rmv_resv_reclaim_req(dev_t dev)
26232{
26233	struct sd_thr_request *sd_mhreq;
26234	struct sd_thr_request *sd_prev;
26235
26236	/* Remove a reservation reclaim request from the list */
26237	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
26238	if (sd_tr.srq_thr_cur_req && sd_tr.srq_thr_cur_req->dev == dev) {
26239		/*
26240		 * We are attempting to reinstate reservation for
26241		 * this device. We wait for sd_reserve_release()
26242		 * to return before we return.
26243		 */
26244		cv_wait(&sd_tr.srq_inprocess_cv,
26245		    &sd_tr.srq_resv_reclaim_mutex);
26246	} else {
26247		sd_prev = sd_mhreq = sd_tr.srq_thr_req_head;
26248		if (sd_mhreq && sd_mhreq->dev == dev) {
26249			sd_tr.srq_thr_req_head = sd_mhreq->sd_thr_req_next;
26250			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
26251			mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
26252			return;
26253		}
26254		for (; sd_mhreq != NULL; sd_mhreq = sd_mhreq->sd_thr_req_next) {
26255			if (sd_mhreq && sd_mhreq->dev == dev) {
26256				break;
26257			}
26258			sd_prev = sd_mhreq;
26259		}
26260		if (sd_mhreq != NULL) {
26261			sd_prev->sd_thr_req_next = sd_mhreq->sd_thr_req_next;
26262			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
26263		}
26264	}
26265	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
26266}
26267
26268
26269/*
26270 *    Function: sd_mhd_reset_notify_cb()
26271 *
26272 * Description: This is a call back function for scsi_reset_notify. This
26273 *		function updates the softstate reserved status and logs the
26274 *		reset. The driver scsi watch facility callback function
26275 *		(sd_mhd_watch_cb) and reservation reclaim thread functionality
26276 *		will reclaim the reservation.
26277 *
26278 *   Arguments: arg  - driver soft state (unit) structure
26279 */
26280
26281static void
26282sd_mhd_reset_notify_cb(caddr_t arg)
26283{
26284	struct sd_lun *un = (struct sd_lun *)arg;
26285
26286	mutex_enter(SD_MUTEX(un));
26287	if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
26288		un->un_resvd_status |= (SD_LOST_RESERVE | SD_WANT_RESERVE);
26289		SD_INFO(SD_LOG_IOCTL_MHD, un,
26290		    "sd_mhd_reset_notify_cb: Lost Reservation\n");
26291	}
26292	mutex_exit(SD_MUTEX(un));
26293}
26294
26295
26296/*
26297 *    Function: sd_take_ownership()
26298 *
26299 * Description: This routine implements an algorithm to achieve a stable
26300 *		reservation on disks which don't implement priority reserve,
26301 *		and makes sure that other host lose re-reservation attempts.
26302 *		This algorithm contains of a loop that keeps issuing the RESERVE
26303 *		for some period of time (min_ownership_delay, default 6 seconds)
26304 *		During that loop, it looks to see if there has been a bus device
26305 *		reset or bus reset (both of which cause an existing reservation
26306 *		to be lost). If the reservation is lost issue RESERVE until a
26307 *		period of min_ownership_delay with no resets has gone by, or
26308 *		until max_ownership_delay has expired. This loop ensures that
26309 *		the host really did manage to reserve the device, in spite of
26310 *		resets. The looping for min_ownership_delay (default six
26311 *		seconds) is important to early generation clustering products,
26312 *		Solstice HA 1.x and Sun Cluster 2.x. Those products use an
26313 *		MHIOCENFAILFAST periodic timer of two seconds. By having
26314 *		MHIOCTKOWN issue Reserves in a loop for six seconds, and having
26315 *		MHIOCENFAILFAST poll every two seconds, the idea is that by the
26316 *		time the MHIOCTKOWN ioctl returns, the other host (if any) will
26317 *		have already noticed, via the MHIOCENFAILFAST polling, that it
26318 *		no longer "owns" the disk and will have panicked itself.  Thus,
26319 *		the host issuing the MHIOCTKOWN is assured (with timing
26320 *		dependencies) that by the time it actually starts to use the
26321 *		disk for real work, the old owner is no longer accessing it.
26322 *
26323 *		min_ownership_delay is the minimum amount of time for which the
26324 *		disk must be reserved continuously devoid of resets before the
26325 *		MHIOCTKOWN ioctl will return success.
26326 *
26327 *		max_ownership_delay indicates the amount of time by which the
26328 *		take ownership should succeed or timeout with an error.
26329 *
26330 *   Arguments: dev - the device 'dev_t'
26331 *		*p  - struct containing timing info.
26332 *
26333 * Return Code: 0 for success or error code
26334 */
26335
26336static int
26337sd_take_ownership(dev_t dev, struct mhioctkown *p)
26338{
26339	struct sd_lun	*un;
26340	int		rval;
26341	int		err;
26342	int		reservation_count   = 0;
26343	int		min_ownership_delay =  6000000; /* in usec */
26344	int		max_ownership_delay = 30000000; /* in usec */
26345	clock_t		start_time;	/* starting time of this algorithm */
26346	clock_t		end_time;	/* time limit for giving up */
26347	clock_t		ownership_time;	/* time limit for stable ownership */
26348	clock_t		current_time;
26349	clock_t		previous_current_time;
26350
26351	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
26352		return (ENXIO);
26353	}
26354
26355	/*
26356	 * Attempt a device reservation. A priority reservation is requested.
26357	 */
26358	if ((rval = sd_reserve_release(dev, SD_PRIORITY_RESERVE))
26359	    != SD_SUCCESS) {
26360		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26361		    "sd_take_ownership: return(1)=%d\n", rval);
26362		return (rval);
26363	}
26364
26365	/* Update the softstate reserved status to indicate the reservation */
26366	mutex_enter(SD_MUTEX(un));
26367	un->un_resvd_status |= SD_RESERVE;
26368	un->un_resvd_status &=
26369	    ~(SD_LOST_RESERVE | SD_WANT_RESERVE | SD_RESERVATION_CONFLICT);
26370	mutex_exit(SD_MUTEX(un));
26371
26372	if (p != NULL) {
26373		if (p->min_ownership_delay != 0) {
26374			min_ownership_delay = p->min_ownership_delay * 1000;
26375		}
26376		if (p->max_ownership_delay != 0) {
26377			max_ownership_delay = p->max_ownership_delay * 1000;
26378		}
26379	}
26380	SD_INFO(SD_LOG_IOCTL_MHD, un,
26381	    "sd_take_ownership: min, max delays: %d, %d\n",
26382	    min_ownership_delay, max_ownership_delay);
26383
26384	start_time = ddi_get_lbolt();
26385	current_time	= start_time;
26386	ownership_time	= current_time + drv_usectohz(min_ownership_delay);
26387	end_time	= start_time + drv_usectohz(max_ownership_delay);
26388
26389	while (current_time - end_time < 0) {
26390		delay(drv_usectohz(500000));
26391
26392		if ((err = sd_reserve_release(dev, SD_RESERVE)) != 0) {
26393			if ((sd_reserve_release(dev, SD_RESERVE)) != 0) {
26394				mutex_enter(SD_MUTEX(un));
26395				rval = (un->un_resvd_status &
26396				    SD_RESERVATION_CONFLICT) ? EACCES : EIO;
26397				mutex_exit(SD_MUTEX(un));
26398				break;
26399			}
26400		}
26401		previous_current_time = current_time;
26402		current_time = ddi_get_lbolt();
26403		mutex_enter(SD_MUTEX(un));
26404		if (err || (un->un_resvd_status & SD_LOST_RESERVE)) {
26405			ownership_time = ddi_get_lbolt() +
26406			    drv_usectohz(min_ownership_delay);
26407			reservation_count = 0;
26408		} else {
26409			reservation_count++;
26410		}
26411		un->un_resvd_status |= SD_RESERVE;
26412		un->un_resvd_status &= ~(SD_LOST_RESERVE | SD_WANT_RESERVE);
26413		mutex_exit(SD_MUTEX(un));
26414
26415		SD_INFO(SD_LOG_IOCTL_MHD, un,
26416		    "sd_take_ownership: ticks for loop iteration=%ld, "
26417		    "reservation=%s\n", (current_time - previous_current_time),
26418		    reservation_count ? "ok" : "reclaimed");
26419
26420		if (current_time - ownership_time >= 0 &&
26421		    reservation_count >= 4) {
26422			rval = 0; /* Achieved a stable ownership */
26423			break;
26424		}
26425		if (current_time - end_time >= 0) {
26426			rval = EACCES; /* No ownership in max possible time */
26427			break;
26428		}
26429	}
26430	SD_TRACE(SD_LOG_IOCTL_MHD, un,
26431	    "sd_take_ownership: return(2)=%d\n", rval);
26432	return (rval);
26433}
26434
26435
26436/*
26437 *    Function: sd_reserve_release()
26438 *
26439 * Description: This function builds and sends scsi RESERVE, RELEASE, and
26440 *		PRIORITY RESERVE commands based on a user specified command type
26441 *
26442 *   Arguments: dev - the device 'dev_t'
26443 *		cmd - user specified command type; one of SD_PRIORITY_RESERVE,
26444 *		      SD_RESERVE, SD_RELEASE
26445 *
26446 * Return Code: 0 or Error Code
26447 */
26448
26449static int
26450sd_reserve_release(dev_t dev, int cmd)
26451{
26452	struct uscsi_cmd	*com = NULL;
26453	struct sd_lun		*un = NULL;
26454	char			cdb[CDB_GROUP0];
26455	int			rval;
26456
26457	ASSERT((cmd == SD_RELEASE) || (cmd == SD_RESERVE) ||
26458	    (cmd == SD_PRIORITY_RESERVE));
26459
26460	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
26461		return (ENXIO);
26462	}
26463
26464	/* instantiate and initialize the command and cdb */
26465	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
26466	bzero(cdb, CDB_GROUP0);
26467	com->uscsi_flags   = USCSI_SILENT;
26468	com->uscsi_timeout = un->un_reserve_release_time;
26469	com->uscsi_cdblen  = CDB_GROUP0;
26470	com->uscsi_cdb	   = cdb;
26471	if (cmd == SD_RELEASE) {
26472		cdb[0] = SCMD_RELEASE;
26473	} else {
26474		cdb[0] = SCMD_RESERVE;
26475	}
26476
26477	/* Send the command. */
26478	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
26479	    UIO_SYSSPACE, SD_PATH_STANDARD);
26480
26481	/*
26482	 * "break" a reservation that is held by another host, by issuing a
26483	 * reset if priority reserve is desired, and we could not get the
26484	 * device.
26485	 */
26486	if ((cmd == SD_PRIORITY_RESERVE) &&
26487	    (rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
26488		/*
26489		 * First try to reset the LUN. If we cannot, then try a target
26490		 * reset, followed by a bus reset if the target reset fails.
26491		 */
26492		int reset_retval = 0;
26493		if (un->un_f_lun_reset_enabled == TRUE) {
26494			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
26495		}
26496		if (reset_retval == 0) {
26497			/* The LUN reset either failed or was not issued */
26498			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
26499		}
26500		if ((reset_retval == 0) &&
26501		    (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0)) {
26502			rval = EIO;
26503			kmem_free(com, sizeof (*com));
26504			return (rval);
26505		}
26506
26507		bzero(com, sizeof (struct uscsi_cmd));
26508		com->uscsi_flags   = USCSI_SILENT;
26509		com->uscsi_cdb	   = cdb;
26510		com->uscsi_cdblen  = CDB_GROUP0;
26511		com->uscsi_timeout = 5;
26512
26513		/*
26514		 * Reissue the last reserve command, this time without request
26515		 * sense.  Assume that it is just a regular reserve command.
26516		 */
26517		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
26518		    UIO_SYSSPACE, SD_PATH_STANDARD);
26519	}
26520
26521	/* Return an error if still getting a reservation conflict. */
26522	if ((rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
26523		rval = EACCES;
26524	}
26525
26526	kmem_free(com, sizeof (*com));
26527	return (rval);
26528}
26529
26530
26531#define	SD_NDUMP_RETRIES	12
26532/*
26533 *	System Crash Dump routine
26534 */
26535
26536static int
26537sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
26538{
26539	int		instance;
26540	int		partition;
26541	int		i;
26542	int		err;
26543	struct sd_lun	*un;
26544	struct dk_map	*lp;
26545	struct scsi_pkt *wr_pktp;
26546	struct buf	*wr_bp;
26547	struct buf	wr_buf;
26548	daddr_t		tgt_byte_offset; /* rmw - byte offset for target */
26549	daddr_t		tgt_blkno;	/* rmw - blkno for target */
26550	size_t		tgt_byte_count; /* rmw -  # of bytes to xfer */
26551	size_t		tgt_nblk; /* rmw -  # of tgt blks to xfer */
26552	size_t		io_start_offset;
26553	int		doing_rmw = FALSE;
26554	int		rval;
26555#if defined(__i386) || defined(__amd64)
26556	ssize_t dma_resid;
26557	daddr_t oblkno;
26558#endif
26559
26560	instance = SDUNIT(dev);
26561	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
26562	    (!un->un_f_geometry_is_valid) || ISCD(un)) {
26563		return (ENXIO);
26564	}
26565
26566	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*un))
26567
26568	SD_TRACE(SD_LOG_DUMP, un, "sddump: entry\n");
26569
26570	partition = SDPART(dev);
26571	SD_INFO(SD_LOG_DUMP, un, "sddump: partition = %d\n", partition);
26572
26573	/* Validate blocks to dump at against partition size. */
26574	lp = &un->un_map[partition];
26575	if ((blkno + nblk) > lp->dkl_nblk) {
26576		SD_TRACE(SD_LOG_DUMP, un,
26577		    "sddump: dump range larger than partition: "
26578		    "blkno = 0x%x, nblk = 0x%x, dkl_nblk = 0x%x\n",
26579		    blkno, nblk, lp->dkl_nblk);
26580		return (EINVAL);
26581	}
26582
26583	mutex_enter(&un->un_pm_mutex);
26584	if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
26585		struct scsi_pkt *start_pktp;
26586
26587		mutex_exit(&un->un_pm_mutex);
26588
26589		/*
26590		 * use pm framework to power on HBA 1st
26591		 */
26592		(void) pm_raise_power(SD_DEVINFO(un), 0, SD_SPINDLE_ON);
26593
26594		/*
26595		 * Dump no long uses sdpower to power on a device, it's
26596		 * in-line here so it can be done in polled mode.
26597		 */
26598
26599		SD_INFO(SD_LOG_DUMP, un, "sddump: starting device\n");
26600
26601		start_pktp = scsi_init_pkt(SD_ADDRESS(un), NULL, NULL,
26602		    CDB_GROUP0, un->un_status_len, 0, 0, NULL_FUNC, NULL);
26603
26604		if (start_pktp == NULL) {
26605			/* We were not given a SCSI packet, fail. */
26606			return (EIO);
26607		}
26608		bzero(start_pktp->pkt_cdbp, CDB_GROUP0);
26609		start_pktp->pkt_cdbp[0] = SCMD_START_STOP;
26610		start_pktp->pkt_cdbp[4] = SD_TARGET_START;
26611		start_pktp->pkt_flags = FLAG_NOINTR;
26612
26613		mutex_enter(SD_MUTEX(un));
26614		SD_FILL_SCSI1_LUN(un, start_pktp);
26615		mutex_exit(SD_MUTEX(un));
26616		/*
26617		 * Scsi_poll returns 0 (success) if the command completes and
26618		 * the status block is STATUS_GOOD.
26619		 */
26620		if (sd_scsi_poll(un, start_pktp) != 0) {
26621			scsi_destroy_pkt(start_pktp);
26622			return (EIO);
26623		}
26624		scsi_destroy_pkt(start_pktp);
26625		(void) sd_ddi_pm_resume(un);
26626	} else {
26627		mutex_exit(&un->un_pm_mutex);
26628	}
26629
26630	mutex_enter(SD_MUTEX(un));
26631	un->un_throttle = 0;
26632
26633	/*
26634	 * The first time through, reset the specific target device.
26635	 * However, when cpr calls sddump we know that sd is in a
26636	 * a good state so no bus reset is required.
26637	 * Clear sense data via Request Sense cmd.
26638	 * In sddump we don't care about allow_bus_device_reset anymore
26639	 */
26640
26641	if ((un->un_state != SD_STATE_SUSPENDED) &&
26642	    (un->un_state != SD_STATE_DUMPING)) {
26643
26644		New_state(un, SD_STATE_DUMPING);
26645
26646		if (un->un_f_is_fibre == FALSE) {
26647			mutex_exit(SD_MUTEX(un));
26648			/*
26649			 * Attempt a bus reset for parallel scsi.
26650			 *
26651			 * Note: A bus reset is required because on some host
26652			 * systems (i.e. E420R) a bus device reset is
26653			 * insufficient to reset the state of the target.
26654			 *
26655			 * Note: Don't issue the reset for fibre-channel,
26656			 * because this tends to hang the bus (loop) for
26657			 * too long while everyone is logging out and in
26658			 * and the deadman timer for dumping will fire
26659			 * before the dump is complete.
26660			 */
26661			if (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0) {
26662				mutex_enter(SD_MUTEX(un));
26663				Restore_state(un);
26664				mutex_exit(SD_MUTEX(un));
26665				return (EIO);
26666			}
26667
26668			/* Delay to give the device some recovery time. */
26669			drv_usecwait(10000);
26670
26671			if (sd_send_polled_RQS(un) == SD_FAILURE) {
26672				SD_INFO(SD_LOG_DUMP, un,
26673					"sddump: sd_send_polled_RQS failed\n");
26674			}
26675			mutex_enter(SD_MUTEX(un));
26676		}
26677	}
26678
26679	/*
26680	 * Convert the partition-relative block number to a
26681	 * disk physical block number.
26682	 */
26683	blkno += un->un_offset[partition];
26684	SD_INFO(SD_LOG_DUMP, un, "sddump: disk blkno = 0x%x\n", blkno);
26685
26686
26687	/*
26688	 * Check if the device has a non-512 block size.
26689	 */
26690	wr_bp = NULL;
26691	if (NOT_DEVBSIZE(un)) {
26692		tgt_byte_offset = blkno * un->un_sys_blocksize;
26693		tgt_byte_count = nblk * un->un_sys_blocksize;
26694		if ((tgt_byte_offset % un->un_tgt_blocksize) ||
26695		    (tgt_byte_count % un->un_tgt_blocksize)) {
26696			doing_rmw = TRUE;
26697			/*
26698			 * Calculate the block number and number of block
26699			 * in terms of the media block size.
26700			 */
26701			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
26702			tgt_nblk =
26703			    ((tgt_byte_offset + tgt_byte_count +
26704				(un->un_tgt_blocksize - 1)) /
26705				un->un_tgt_blocksize) - tgt_blkno;
26706
26707			/*
26708			 * Invoke the routine which is going to do read part
26709			 * of read-modify-write.
26710			 * Note that this routine returns a pointer to
26711			 * a valid bp in wr_bp.
26712			 */
26713			err = sddump_do_read_of_rmw(un, tgt_blkno, tgt_nblk,
26714			    &wr_bp);
26715			if (err) {
26716				mutex_exit(SD_MUTEX(un));
26717				return (err);
26718			}
26719			/*
26720			 * Offset is being calculated as -
26721			 * (original block # * system block size) -
26722			 * (new block # * target block size)
26723			 */
26724			io_start_offset =
26725			    ((uint64_t)(blkno * un->un_sys_blocksize)) -
26726			    ((uint64_t)(tgt_blkno * un->un_tgt_blocksize));
26727
26728			ASSERT((io_start_offset >= 0) &&
26729			    (io_start_offset < un->un_tgt_blocksize));
26730			/*
26731			 * Do the modify portion of read modify write.
26732			 */
26733			bcopy(addr, &wr_bp->b_un.b_addr[io_start_offset],
26734			    (size_t)nblk * un->un_sys_blocksize);
26735		} else {
26736			doing_rmw = FALSE;
26737			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
26738			tgt_nblk = tgt_byte_count / un->un_tgt_blocksize;
26739		}
26740
26741		/* Convert blkno and nblk to target blocks */
26742		blkno = tgt_blkno;
26743		nblk = tgt_nblk;
26744	} else {
26745		wr_bp = &wr_buf;
26746		bzero(wr_bp, sizeof (struct buf));
26747		wr_bp->b_flags		= B_BUSY;
26748		wr_bp->b_un.b_addr	= addr;
26749		wr_bp->b_bcount		= nblk << DEV_BSHIFT;
26750		wr_bp->b_resid		= 0;
26751	}
26752
26753	mutex_exit(SD_MUTEX(un));
26754
26755	/*
26756	 * Obtain a SCSI packet for the write command.
26757	 * It should be safe to call the allocator here without
26758	 * worrying about being locked for DVMA mapping because
26759	 * the address we're passed is already a DVMA mapping
26760	 *
26761	 * We are also not going to worry about semaphore ownership
26762	 * in the dump buffer. Dumping is single threaded at present.
26763	 */
26764
26765	wr_pktp = NULL;
26766
26767#if defined(__i386) || defined(__amd64)
26768	dma_resid = wr_bp->b_bcount;
26769	oblkno = blkno;
26770	while (dma_resid != 0) {
26771#endif
26772
26773	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
26774		wr_bp->b_flags &= ~B_ERROR;
26775
26776#if defined(__i386) || defined(__amd64)
26777		blkno = oblkno +
26778			((wr_bp->b_bcount - dma_resid) /
26779			    un->un_tgt_blocksize);
26780		nblk = dma_resid / un->un_tgt_blocksize;
26781
26782		if (wr_pktp) {
26783			/* Partial DMA transfers after initial transfer */
26784			rval = sd_setup_next_rw_pkt(un, wr_pktp, wr_bp,
26785			    blkno, nblk);
26786		} else {
26787			/* Initial transfer */
26788			rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
26789			    un->un_pkt_flags, NULL_FUNC, NULL,
26790			    blkno, nblk);
26791		}
26792#else
26793		rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
26794		    0, NULL_FUNC, NULL, blkno, nblk);
26795#endif
26796
26797		if (rval == 0) {
26798			/* We were given a SCSI packet, continue. */
26799			break;
26800		}
26801
26802		if (i == 0) {
26803			if (wr_bp->b_flags & B_ERROR) {
26804				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26805				    "no resources for dumping; "
26806				    "error code: 0x%x, retrying",
26807				    geterror(wr_bp));
26808			} else {
26809				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26810				    "no resources for dumping; retrying");
26811			}
26812		} else if (i != (SD_NDUMP_RETRIES - 1)) {
26813			if (wr_bp->b_flags & B_ERROR) {
26814				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26815				    "no resources for dumping; error code: "
26816				    "0x%x, retrying\n", geterror(wr_bp));
26817			}
26818		} else {
26819			if (wr_bp->b_flags & B_ERROR) {
26820				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26821				    "no resources for dumping; "
26822				    "error code: 0x%x, retries failed, "
26823				    "giving up.\n", geterror(wr_bp));
26824			} else {
26825				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26826				    "no resources for dumping; "
26827				    "retries failed, giving up.\n");
26828			}
26829			mutex_enter(SD_MUTEX(un));
26830			Restore_state(un);
26831			if (NOT_DEVBSIZE(un) && (doing_rmw == TRUE)) {
26832				mutex_exit(SD_MUTEX(un));
26833				scsi_free_consistent_buf(wr_bp);
26834			} else {
26835				mutex_exit(SD_MUTEX(un));
26836			}
26837			return (EIO);
26838		}
26839		drv_usecwait(10000);
26840	}
26841
26842#if defined(__i386) || defined(__amd64)
26843	/*
26844	 * save the resid from PARTIAL_DMA
26845	 */
26846	dma_resid = wr_pktp->pkt_resid;
26847	if (dma_resid != 0)
26848		nblk -= SD_BYTES2TGTBLOCKS(un, dma_resid);
26849	wr_pktp->pkt_resid = 0;
26850#endif
26851
26852	/* SunBug 1222170 */
26853	wr_pktp->pkt_flags = FLAG_NOINTR;
26854
26855	err = EIO;
26856	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
26857
26858		/*
26859		 * Scsi_poll returns 0 (success) if the command completes and
26860		 * the status block is STATUS_GOOD.  We should only check
26861		 * errors if this condition is not true.  Even then we should
26862		 * send our own request sense packet only if we have a check
26863		 * condition and auto request sense has not been performed by
26864		 * the hba.
26865		 */
26866		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending write\n");
26867
26868		if ((sd_scsi_poll(un, wr_pktp) == 0) &&
26869		    (wr_pktp->pkt_resid == 0)) {
26870			err = SD_SUCCESS;
26871			break;
26872		}
26873
26874		/*
26875		 * Check CMD_DEV_GONE 1st, give up if device is gone.
26876		 */
26877		if (wr_pktp->pkt_reason == CMD_DEV_GONE) {
26878			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26879			    "Device is gone\n");
26880			break;
26881		}
26882
26883		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_CHECK) {
26884			SD_INFO(SD_LOG_DUMP, un,
26885			    "sddump: write failed with CHECK, try # %d\n", i);
26886			if (((wr_pktp->pkt_state & STATE_ARQ_DONE) == 0)) {
26887				(void) sd_send_polled_RQS(un);
26888			}
26889
26890			continue;
26891		}
26892
26893		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_BUSY) {
26894			int reset_retval = 0;
26895
26896			SD_INFO(SD_LOG_DUMP, un,
26897			    "sddump: write failed with BUSY, try # %d\n", i);
26898
26899			if (un->un_f_lun_reset_enabled == TRUE) {
26900				reset_retval = scsi_reset(SD_ADDRESS(un),
26901				    RESET_LUN);
26902			}
26903			if (reset_retval == 0) {
26904				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
26905			}
26906			(void) sd_send_polled_RQS(un);
26907
26908		} else {
26909			SD_INFO(SD_LOG_DUMP, un,
26910			    "sddump: write failed with 0x%x, try # %d\n",
26911			    SD_GET_PKT_STATUS(wr_pktp), i);
26912			mutex_enter(SD_MUTEX(un));
26913			sd_reset_target(un, wr_pktp);
26914			mutex_exit(SD_MUTEX(un));
26915		}
26916
26917		/*
26918		 * If we are not getting anywhere with lun/target resets,
26919		 * let's reset the bus.
26920		 */
26921		if (i == SD_NDUMP_RETRIES/2) {
26922			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
26923			(void) sd_send_polled_RQS(un);
26924		}
26925
26926	}
26927#if defined(__i386) || defined(__amd64)
26928	}	/* dma_resid */
26929#endif
26930
26931	scsi_destroy_pkt(wr_pktp);
26932	mutex_enter(SD_MUTEX(un));
26933	if ((NOT_DEVBSIZE(un)) && (doing_rmw == TRUE)) {
26934		mutex_exit(SD_MUTEX(un));
26935		scsi_free_consistent_buf(wr_bp);
26936	} else {
26937		mutex_exit(SD_MUTEX(un));
26938	}
26939	SD_TRACE(SD_LOG_DUMP, un, "sddump: exit: err = %d\n", err);
26940	return (err);
26941}
26942
26943/*
26944 *    Function: sd_scsi_poll()
26945 *
26946 * Description: This is a wrapper for the scsi_poll call.
26947 *
26948 *   Arguments: sd_lun - The unit structure
26949 *              scsi_pkt - The scsi packet being sent to the device.
26950 *
26951 * Return Code: 0 - Command completed successfully with good status
26952 *             -1 - Command failed.  This could indicate a check condition
26953 *                  or other status value requiring recovery action.
26954 *
26955 */
26956
26957static int
26958sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pktp)
26959{
26960	int status;
26961
26962	ASSERT(un != NULL);
26963	ASSERT(!mutex_owned(SD_MUTEX(un)));
26964	ASSERT(pktp != NULL);
26965
26966	status = SD_SUCCESS;
26967
26968	if (scsi_ifgetcap(&pktp->pkt_address, "tagged-qing", 1) == 1) {
26969		pktp->pkt_flags |= un->un_tagflags;
26970		pktp->pkt_flags &= ~FLAG_NODISCON;
26971	}
26972
26973	status = sd_ddi_scsi_poll(pktp);
26974	/*
26975	 * Scsi_poll returns 0 (success) if the command completes and the
26976	 * status block is STATUS_GOOD.  We should only check errors if this
26977	 * condition is not true.  Even then we should send our own request
26978	 * sense packet only if we have a check condition and auto
26979	 * request sense has not been performed by the hba.
26980	 * Don't get RQS data if pkt_reason is CMD_DEV_GONE.
26981	 */
26982	if ((status != SD_SUCCESS) &&
26983	    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK) &&
26984	    (pktp->pkt_state & STATE_ARQ_DONE) == 0 &&
26985	    (pktp->pkt_reason != CMD_DEV_GONE))
26986		(void) sd_send_polled_RQS(un);
26987
26988	return (status);
26989}
26990
26991/*
26992 *    Function: sd_send_polled_RQS()
26993 *
26994 * Description: This sends the request sense command to a device.
26995 *
26996 *   Arguments: sd_lun - The unit structure
26997 *
26998 * Return Code: 0 - Command completed successfully with good status
26999 *             -1 - Command failed.
27000 *
27001 */
27002
27003static int
27004sd_send_polled_RQS(struct sd_lun *un)
27005{
27006	int	ret_val;
27007	struct	scsi_pkt	*rqs_pktp;
27008	struct	buf		*rqs_bp;
27009
27010	ASSERT(un != NULL);
27011	ASSERT(!mutex_owned(SD_MUTEX(un)));
27012
27013	ret_val = SD_SUCCESS;
27014
27015	rqs_pktp = un->un_rqs_pktp;
27016	rqs_bp	 = un->un_rqs_bp;
27017
27018	mutex_enter(SD_MUTEX(un));
27019
27020	if (un->un_sense_isbusy) {
27021		ret_val = SD_FAILURE;
27022		mutex_exit(SD_MUTEX(un));
27023		return (ret_val);
27024	}
27025
27026	/*
27027	 * If the request sense buffer (and packet) is not in use,
27028	 * let's set the un_sense_isbusy and send our packet
27029	 */
27030	un->un_sense_isbusy 	= 1;
27031	rqs_pktp->pkt_resid  	= 0;
27032	rqs_pktp->pkt_reason 	= 0;
27033	rqs_pktp->pkt_flags |= FLAG_NOINTR;
27034	bzero(rqs_bp->b_un.b_addr, SENSE_LENGTH);
27035
27036	mutex_exit(SD_MUTEX(un));
27037
27038	SD_INFO(SD_LOG_COMMON, un, "sd_send_polled_RQS: req sense buf at"
27039	    " 0x%p\n", rqs_bp->b_un.b_addr);
27040
27041	/*
27042	 * Can't send this to sd_scsi_poll, we wrap ourselves around the
27043	 * axle - it has a call into us!
27044	 */
27045	if ((ret_val = sd_ddi_scsi_poll(rqs_pktp)) != 0) {
27046		SD_INFO(SD_LOG_COMMON, un,
27047		    "sd_send_polled_RQS: RQS failed\n");
27048	}
27049
27050	SD_DUMP_MEMORY(un, SD_LOG_COMMON, "sd_send_polled_RQS:",
27051	    (uchar_t *)rqs_bp->b_un.b_addr, SENSE_LENGTH, SD_LOG_HEX);
27052
27053	mutex_enter(SD_MUTEX(un));
27054	un->un_sense_isbusy = 0;
27055	mutex_exit(SD_MUTEX(un));
27056
27057	return (ret_val);
27058}
27059
27060/*
27061 * Defines needed for localized version of the scsi_poll routine.
27062 */
27063#define	SD_CSEC		10000			/* usecs */
27064#define	SD_SEC_TO_CSEC	(1000000/SD_CSEC)
27065
27066
27067/*
27068 *    Function: sd_ddi_scsi_poll()
27069 *
27070 * Description: Localized version of the scsi_poll routine.  The purpose is to
27071 *		send a scsi_pkt to a device as a polled command.  This version
27072 *		is to ensure more robust handling of transport errors.
27073 *		Specifically this routine cures not ready, coming ready
27074 *		transition for power up and reset of sonoma's.  This can take
27075 *		up to 45 seconds for power-on and 20 seconds for reset of a
27076 * 		sonoma lun.
27077 *
27078 *   Arguments: scsi_pkt - The scsi_pkt being sent to a device
27079 *
27080 * Return Code: 0 - Command completed successfully with good status
27081 *             -1 - Command failed.
27082 *
27083 */
27084
27085static int
27086sd_ddi_scsi_poll(struct scsi_pkt *pkt)
27087{
27088	int busy_count;
27089	int timeout;
27090	int rval = SD_FAILURE;
27091	int savef;
27092	uint8_t *sensep;
27093	long savet;
27094	void (*savec)();
27095	/*
27096	 * The following is defined in machdep.c and is used in determining if
27097	 * the scsi transport system will do polled I/O instead of interrupt
27098	 * I/O when called from xx_dump().
27099	 */
27100	extern int do_polled_io;
27101
27102	/*
27103	 * save old flags in pkt, to restore at end
27104	 */
27105	savef = pkt->pkt_flags;
27106	savec = pkt->pkt_comp;
27107	savet = pkt->pkt_time;
27108
27109	pkt->pkt_flags |= FLAG_NOINTR;
27110
27111	/*
27112	 * XXX there is nothing in the SCSA spec that states that we should not
27113	 * do a callback for polled cmds; however, removing this will break sd
27114	 * and probably other target drivers
27115	 */
27116	pkt->pkt_comp = NULL;
27117
27118	/*
27119	 * we don't like a polled command without timeout.
27120	 * 60 seconds seems long enough.
27121	 */
27122	if (pkt->pkt_time == 0) {
27123		pkt->pkt_time = SCSI_POLL_TIMEOUT;
27124	}
27125
27126	/*
27127	 * Send polled cmd.
27128	 *
27129	 * We do some error recovery for various errors.  Tran_busy,
27130	 * queue full, and non-dispatched commands are retried every 10 msec.
27131	 * as they are typically transient failures.  Busy status and Not
27132	 * Ready are retried every second as this status takes a while to
27133	 * change.  Unit attention is retried for pkt_time (60) times
27134	 * with no delay.
27135	 */
27136	timeout = pkt->pkt_time * SD_SEC_TO_CSEC;
27137
27138	for (busy_count = 0; busy_count < timeout; busy_count++) {
27139		int rc;
27140		int poll_delay;
27141
27142		/*
27143		 * Initialize pkt status variables.
27144		 */
27145		*pkt->pkt_scbp = pkt->pkt_reason = pkt->pkt_state = 0;
27146
27147		if ((rc = scsi_transport(pkt)) != TRAN_ACCEPT) {
27148			if (rc != TRAN_BUSY) {
27149				/* Transport failed - give up. */
27150				break;
27151			} else {
27152				/* Transport busy - try again. */
27153				poll_delay = 1 * SD_CSEC; /* 10 msec */
27154			}
27155		} else {
27156			/*
27157			 * Transport accepted - check pkt status.
27158			 */
27159			rc = (*pkt->pkt_scbp) & STATUS_MASK;
27160			if (pkt->pkt_reason == CMD_CMPLT &&
27161			    rc == STATUS_CHECK &&
27162			    pkt->pkt_state & STATE_ARQ_DONE) {
27163				struct scsi_arq_status *arqstat =
27164				    (struct scsi_arq_status *)(pkt->pkt_scbp);
27165
27166				sensep = (uint8_t *)&arqstat->sts_sensedata;
27167			} else {
27168				sensep = NULL;
27169			}
27170
27171			if ((pkt->pkt_reason == CMD_CMPLT) &&
27172			    (rc == STATUS_GOOD)) {
27173				/* No error - we're done */
27174				rval = SD_SUCCESS;
27175				break;
27176
27177			} else if (pkt->pkt_reason == CMD_DEV_GONE) {
27178				/* Lost connection - give up */
27179				break;
27180
27181			} else if ((pkt->pkt_reason == CMD_INCOMPLETE) &&
27182			    (pkt->pkt_state == 0)) {
27183				/* Pkt not dispatched - try again. */
27184				poll_delay = 1 * SD_CSEC; /* 10 msec. */
27185
27186			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
27187			    (rc == STATUS_QFULL)) {
27188				/* Queue full - try again. */
27189				poll_delay = 1 * SD_CSEC; /* 10 msec. */
27190
27191			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
27192			    (rc == STATUS_BUSY)) {
27193				/* Busy - try again. */
27194				poll_delay = 100 * SD_CSEC; /* 1 sec. */
27195				busy_count += (SD_SEC_TO_CSEC - 1);
27196
27197			} else if ((sensep != NULL) &&
27198			    (scsi_sense_key(sensep) ==
27199				KEY_UNIT_ATTENTION)) {
27200				/* Unit Attention - try again */
27201				busy_count += (SD_SEC_TO_CSEC - 1); /* 1 */
27202				continue;
27203
27204			} else if ((sensep != NULL) &&
27205			    (scsi_sense_key(sensep) == KEY_NOT_READY) &&
27206			    (scsi_sense_asc(sensep) == 0x04) &&
27207			    (scsi_sense_ascq(sensep) == 0x01)) {
27208				/* Not ready -> ready - try again. */
27209				poll_delay = 100 * SD_CSEC; /* 1 sec. */
27210				busy_count += (SD_SEC_TO_CSEC - 1);
27211
27212			} else {
27213				/* BAD status - give up. */
27214				break;
27215			}
27216		}
27217
27218		if ((curthread->t_flag & T_INTR_THREAD) == 0 &&
27219		    !do_polled_io) {
27220			delay(drv_usectohz(poll_delay));
27221		} else {
27222			/* we busy wait during cpr_dump or interrupt threads */
27223			drv_usecwait(poll_delay);
27224		}
27225	}
27226
27227	pkt->pkt_flags = savef;
27228	pkt->pkt_comp = savec;
27229	pkt->pkt_time = savet;
27230	return (rval);
27231}
27232
27233
27234/*
27235 *    Function: sd_persistent_reservation_in_read_keys
27236 *
27237 * Description: This routine is the driver entry point for handling CD-ROM
27238 *		multi-host persistent reservation requests (MHIOCGRP_INKEYS)
27239 *		by sending the SCSI-3 PRIN commands to the device.
27240 *		Processes the read keys command response by copying the
27241 *		reservation key information into the user provided buffer.
27242 *		Support for the 32/64 bit _MULTI_DATAMODEL is implemented.
27243 *
27244 *   Arguments: un   -  Pointer to soft state struct for the target.
27245 *		usrp -	user provided pointer to multihost Persistent In Read
27246 *			Keys structure (mhioc_inkeys_t)
27247 *		flag -	this argument is a pass through to ddi_copyxxx()
27248 *			directly from the mode argument of ioctl().
27249 *
27250 * Return Code: 0   - Success
27251 *		EACCES
27252 *		ENOTSUP
27253 *		errno return code from sd_send_scsi_cmd()
27254 *
27255 *     Context: Can sleep. Does not return until command is completed.
27256 */
27257
27258static int
27259sd_persistent_reservation_in_read_keys(struct sd_lun *un,
27260    mhioc_inkeys_t *usrp, int flag)
27261{
27262#ifdef _MULTI_DATAMODEL
27263	struct mhioc_key_list32	li32;
27264#endif
27265	sd_prin_readkeys_t	*in;
27266	mhioc_inkeys_t		*ptr;
27267	mhioc_key_list_t	li;
27268	uchar_t			*data_bufp;
27269	int 			data_len;
27270	int			rval;
27271	size_t			copysz;
27272
27273	if ((ptr = (mhioc_inkeys_t *)usrp) == NULL) {
27274		return (EINVAL);
27275	}
27276	bzero(&li, sizeof (mhioc_key_list_t));
27277
27278	/*
27279	 * Get the listsize from user
27280	 */
27281#ifdef _MULTI_DATAMODEL
27282
27283	switch (ddi_model_convert_from(flag & FMODELS)) {
27284	case DDI_MODEL_ILP32:
27285		copysz = sizeof (struct mhioc_key_list32);
27286		if (ddi_copyin(ptr->li, &li32, copysz, flag)) {
27287			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27288			    "sd_persistent_reservation_in_read_keys: "
27289			    "failed ddi_copyin: mhioc_key_list32_t\n");
27290			rval = EFAULT;
27291			goto done;
27292		}
27293		li.listsize = li32.listsize;
27294		li.list = (mhioc_resv_key_t *)(uintptr_t)li32.list;
27295		break;
27296
27297	case DDI_MODEL_NONE:
27298		copysz = sizeof (mhioc_key_list_t);
27299		if (ddi_copyin(ptr->li, &li, copysz, flag)) {
27300			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27301			    "sd_persistent_reservation_in_read_keys: "
27302			    "failed ddi_copyin: mhioc_key_list_t\n");
27303			rval = EFAULT;
27304			goto done;
27305		}
27306		break;
27307	}
27308
27309#else /* ! _MULTI_DATAMODEL */
27310	copysz = sizeof (mhioc_key_list_t);
27311	if (ddi_copyin(ptr->li, &li, copysz, flag)) {
27312		SD_ERROR(SD_LOG_IOCTL_MHD, un,
27313		    "sd_persistent_reservation_in_read_keys: "
27314		    "failed ddi_copyin: mhioc_key_list_t\n");
27315		rval = EFAULT;
27316		goto done;
27317	}
27318#endif
27319
27320	data_len  = li.listsize * MHIOC_RESV_KEY_SIZE;
27321	data_len += (sizeof (sd_prin_readkeys_t) - sizeof (caddr_t));
27322	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
27323
27324	if ((rval = sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_KEYS,
27325	    data_len, data_bufp)) != 0) {
27326		goto done;
27327	}
27328	in = (sd_prin_readkeys_t *)data_bufp;
27329	ptr->generation = BE_32(in->generation);
27330	li.listlen = BE_32(in->len) / MHIOC_RESV_KEY_SIZE;
27331
27332	/*
27333	 * Return the min(listsize, listlen) keys
27334	 */
27335#ifdef _MULTI_DATAMODEL
27336
27337	switch (ddi_model_convert_from(flag & FMODELS)) {
27338	case DDI_MODEL_ILP32:
27339		li32.listlen = li.listlen;
27340		if (ddi_copyout(&li32, ptr->li, copysz, flag)) {
27341			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27342			    "sd_persistent_reservation_in_read_keys: "
27343			    "failed ddi_copyout: mhioc_key_list32_t\n");
27344			rval = EFAULT;
27345			goto done;
27346		}
27347		break;
27348
27349	case DDI_MODEL_NONE:
27350		if (ddi_copyout(&li, ptr->li, copysz, flag)) {
27351			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27352			    "sd_persistent_reservation_in_read_keys: "
27353			    "failed ddi_copyout: mhioc_key_list_t\n");
27354			rval = EFAULT;
27355			goto done;
27356		}
27357		break;
27358	}
27359
27360#else /* ! _MULTI_DATAMODEL */
27361
27362	if (ddi_copyout(&li, ptr->li, copysz, flag)) {
27363		SD_ERROR(SD_LOG_IOCTL_MHD, un,
27364		    "sd_persistent_reservation_in_read_keys: "
27365		    "failed ddi_copyout: mhioc_key_list_t\n");
27366		rval = EFAULT;
27367		goto done;
27368	}
27369
27370#endif /* _MULTI_DATAMODEL */
27371
27372	copysz = min(li.listlen * MHIOC_RESV_KEY_SIZE,
27373	    li.listsize * MHIOC_RESV_KEY_SIZE);
27374	if (ddi_copyout(&in->keylist, li.list, copysz, flag)) {
27375		SD_ERROR(SD_LOG_IOCTL_MHD, un,
27376		    "sd_persistent_reservation_in_read_keys: "
27377		    "failed ddi_copyout: keylist\n");
27378		rval = EFAULT;
27379	}
27380done:
27381	kmem_free(data_bufp, data_len);
27382	return (rval);
27383}
27384
27385
27386/*
27387 *    Function: sd_persistent_reservation_in_read_resv
27388 *
27389 * Description: This routine is the driver entry point for handling CD-ROM
27390 *		multi-host persistent reservation requests (MHIOCGRP_INRESV)
27391 *		by sending the SCSI-3 PRIN commands to the device.
27392 *		Process the read persistent reservations command response by
27393 *		copying the reservation information into the user provided
27394 *		buffer. Support for the 32/64 _MULTI_DATAMODEL is implemented.
27395 *
27396 *   Arguments: un   -  Pointer to soft state struct for the target.
27397 *		usrp -	user provided pointer to multihost Persistent In Read
27398 *			Keys structure (mhioc_inkeys_t)
27399 *		flag -	this argument is a pass through to ddi_copyxxx()
27400 *			directly from the mode argument of ioctl().
27401 *
27402 * Return Code: 0   - Success
27403 *		EACCES
27404 *		ENOTSUP
27405 *		errno return code from sd_send_scsi_cmd()
27406 *
27407 *     Context: Can sleep. Does not return until command is completed.
27408 */
27409
27410static int
27411sd_persistent_reservation_in_read_resv(struct sd_lun *un,
27412    mhioc_inresvs_t *usrp, int flag)
27413{
27414#ifdef _MULTI_DATAMODEL
27415	struct mhioc_resv_desc_list32 resvlist32;
27416#endif
27417	sd_prin_readresv_t	*in;
27418	mhioc_inresvs_t		*ptr;
27419	sd_readresv_desc_t	*readresv_ptr;
27420	mhioc_resv_desc_list_t	resvlist;
27421	mhioc_resv_desc_t 	resvdesc;
27422	uchar_t			*data_bufp;
27423	int 			data_len;
27424	int			rval;
27425	int			i;
27426	size_t			copysz;
27427	mhioc_resv_desc_t	*bufp;
27428
27429	if ((ptr = usrp) == NULL) {
27430		return (EINVAL);
27431	}
27432
27433	/*
27434	 * Get the listsize from user
27435	 */
27436#ifdef _MULTI_DATAMODEL
27437	switch (ddi_model_convert_from(flag & FMODELS)) {
27438	case DDI_MODEL_ILP32:
27439		copysz = sizeof (struct mhioc_resv_desc_list32);
27440		if (ddi_copyin(ptr->li, &resvlist32, copysz, flag)) {
27441			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27442			    "sd_persistent_reservation_in_read_resv: "
27443			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
27444			rval = EFAULT;
27445			goto done;
27446		}
27447		resvlist.listsize = resvlist32.listsize;
27448		resvlist.list = (mhioc_resv_desc_t *)(uintptr_t)resvlist32.list;
27449		break;
27450
27451	case DDI_MODEL_NONE:
27452		copysz = sizeof (mhioc_resv_desc_list_t);
27453		if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
27454			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27455			    "sd_persistent_reservation_in_read_resv: "
27456			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
27457			rval = EFAULT;
27458			goto done;
27459		}
27460		break;
27461	}
27462#else /* ! _MULTI_DATAMODEL */
27463	copysz = sizeof (mhioc_resv_desc_list_t);
27464	if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
27465		SD_ERROR(SD_LOG_IOCTL_MHD, un,
27466		    "sd_persistent_reservation_in_read_resv: "
27467		    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
27468		rval = EFAULT;
27469		goto done;
27470	}
27471#endif /* ! _MULTI_DATAMODEL */
27472
27473	data_len  = resvlist.listsize * SCSI3_RESV_DESC_LEN;
27474	data_len += (sizeof (sd_prin_readresv_t) - sizeof (caddr_t));
27475	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
27476
27477	if ((rval = sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_RESV,
27478	    data_len, data_bufp)) != 0) {
27479		goto done;
27480	}
27481	in = (sd_prin_readresv_t *)data_bufp;
27482	ptr->generation = BE_32(in->generation);
27483	resvlist.listlen = BE_32(in->len) / SCSI3_RESV_DESC_LEN;
27484
27485	/*
27486	 * Return the min(listsize, listlen( keys
27487	 */
27488#ifdef _MULTI_DATAMODEL
27489
27490	switch (ddi_model_convert_from(flag & FMODELS)) {
27491	case DDI_MODEL_ILP32:
27492		resvlist32.listlen = resvlist.listlen;
27493		if (ddi_copyout(&resvlist32, ptr->li, copysz, flag)) {
27494			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27495			    "sd_persistent_reservation_in_read_resv: "
27496			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
27497			rval = EFAULT;
27498			goto done;
27499		}
27500		break;
27501
27502	case DDI_MODEL_NONE:
27503		if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
27504			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27505			    "sd_persistent_reservation_in_read_resv: "
27506			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
27507			rval = EFAULT;
27508			goto done;
27509		}
27510		break;
27511	}
27512
27513#else /* ! _MULTI_DATAMODEL */
27514
27515	if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
27516		SD_ERROR(SD_LOG_IOCTL_MHD, un,
27517		    "sd_persistent_reservation_in_read_resv: "
27518		    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
27519		rval = EFAULT;
27520		goto done;
27521	}
27522
27523#endif /* ! _MULTI_DATAMODEL */
27524
27525	readresv_ptr = (sd_readresv_desc_t *)&in->readresv_desc;
27526	bufp = resvlist.list;
27527	copysz = sizeof (mhioc_resv_desc_t);
27528	for (i = 0; i < min(resvlist.listlen, resvlist.listsize);
27529	    i++, readresv_ptr++, bufp++) {
27530
27531		bcopy(&readresv_ptr->resvkey, &resvdesc.key,
27532		    MHIOC_RESV_KEY_SIZE);
27533		resvdesc.type  = readresv_ptr->type;
27534		resvdesc.scope = readresv_ptr->scope;
27535		resvdesc.scope_specific_addr =
27536		    BE_32(readresv_ptr->scope_specific_addr);
27537
27538		if (ddi_copyout(&resvdesc, bufp, copysz, flag)) {
27539			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27540			    "sd_persistent_reservation_in_read_resv: "
27541			    "failed ddi_copyout: resvlist\n");
27542			rval = EFAULT;
27543			goto done;
27544		}
27545	}
27546done:
27547	kmem_free(data_bufp, data_len);
27548	return (rval);
27549}
27550
27551
27552/*
27553 *    Function: sr_change_blkmode()
27554 *
27555 * Description: This routine is the driver entry point for handling CD-ROM
27556 *		block mode ioctl requests. Support for returning and changing
27557 *		the current block size in use by the device is implemented. The
27558 *		LBA size is changed via a MODE SELECT Block Descriptor.
27559 *
27560 *		This routine issues a mode sense with an allocation length of
27561 *		12 bytes for the mode page header and a single block descriptor.
27562 *
27563 *   Arguments: dev - the device 'dev_t'
27564 *		cmd - the request type; one of CDROMGBLKMODE (get) or
27565 *		      CDROMSBLKMODE (set)
27566 *		data - current block size or requested block size
27567 *		flag - this argument is a pass through to ddi_copyxxx() directly
27568 *		       from the mode argument of ioctl().
27569 *
27570 * Return Code: the code returned by sd_send_scsi_cmd()
27571 *		EINVAL if invalid arguments are provided
27572 *		EFAULT if ddi_copyxxx() fails
27573 *		ENXIO if fail ddi_get_soft_state
27574 *		EIO if invalid mode sense block descriptor length
27575 *
27576 */
27577
27578static int
27579sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag)
27580{
27581	struct sd_lun			*un = NULL;
27582	struct mode_header		*sense_mhp, *select_mhp;
27583	struct block_descriptor		*sense_desc, *select_desc;
27584	int				current_bsize;
27585	int				rval = EINVAL;
27586	uchar_t				*sense = NULL;
27587	uchar_t				*select = NULL;
27588
27589	ASSERT((cmd == CDROMGBLKMODE) || (cmd == CDROMSBLKMODE));
27590
27591	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27592		return (ENXIO);
27593	}
27594
27595	/*
27596	 * The block length is changed via the Mode Select block descriptor, the
27597	 * "Read/Write Error Recovery" mode page (0x1) contents are not actually
27598	 * required as part of this routine. Therefore the mode sense allocation
27599	 * length is specified to be the length of a mode page header and a
27600	 * block descriptor.
27601	 */
27602	sense = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
27603
27604	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
27605	    BUFLEN_CHG_BLK_MODE, MODEPAGE_ERR_RECOV, SD_PATH_STANDARD)) != 0) {
27606		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27607		    "sr_change_blkmode: Mode Sense Failed\n");
27608		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
27609		return (rval);
27610	}
27611
27612	/* Check the block descriptor len to handle only 1 block descriptor */
27613	sense_mhp = (struct mode_header *)sense;
27614	if ((sense_mhp->bdesc_length == 0) ||
27615	    (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH)) {
27616		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27617		    "sr_change_blkmode: Mode Sense returned invalid block"
27618		    " descriptor length\n");
27619		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
27620		return (EIO);
27621	}
27622	sense_desc = (struct block_descriptor *)(sense + MODE_HEADER_LENGTH);
27623	current_bsize = ((sense_desc->blksize_hi << 16) |
27624	    (sense_desc->blksize_mid << 8) | sense_desc->blksize_lo);
27625
27626	/* Process command */
27627	switch (cmd) {
27628	case CDROMGBLKMODE:
27629		/* Return the block size obtained during the mode sense */
27630		if (ddi_copyout(&current_bsize, (void *)data,
27631		    sizeof (int), flag) != 0)
27632			rval = EFAULT;
27633		break;
27634	case CDROMSBLKMODE:
27635		/* Validate the requested block size */
27636		switch (data) {
27637		case CDROM_BLK_512:
27638		case CDROM_BLK_1024:
27639		case CDROM_BLK_2048:
27640		case CDROM_BLK_2056:
27641		case CDROM_BLK_2336:
27642		case CDROM_BLK_2340:
27643		case CDROM_BLK_2352:
27644		case CDROM_BLK_2368:
27645		case CDROM_BLK_2448:
27646		case CDROM_BLK_2646:
27647		case CDROM_BLK_2647:
27648			break;
27649		default:
27650			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27651			    "sr_change_blkmode: "
27652			    "Block Size '%ld' Not Supported\n", data);
27653			kmem_free(sense, BUFLEN_CHG_BLK_MODE);
27654			return (EINVAL);
27655		}
27656
27657		/*
27658		 * The current block size matches the requested block size so
27659		 * there is no need to send the mode select to change the size
27660		 */
27661		if (current_bsize == data) {
27662			break;
27663		}
27664
27665		/* Build the select data for the requested block size */
27666		select = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
27667		select_mhp = (struct mode_header *)select;
27668		select_desc =
27669		    (struct block_descriptor *)(select + MODE_HEADER_LENGTH);
27670		/*
27671		 * The LBA size is changed via the block descriptor, so the
27672		 * descriptor is built according to the user data
27673		 */
27674		select_mhp->bdesc_length = MODE_BLK_DESC_LENGTH;
27675		select_desc->blksize_hi  = (char)(((data) & 0x00ff0000) >> 16);
27676		select_desc->blksize_mid = (char)(((data) & 0x0000ff00) >> 8);
27677		select_desc->blksize_lo  = (char)((data) & 0x000000ff);
27678
27679		/* Send the mode select for the requested block size */
27680		if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0,
27681		    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
27682		    SD_PATH_STANDARD)) != 0) {
27683			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27684			    "sr_change_blkmode: Mode Select Failed\n");
27685			/*
27686			 * The mode select failed for the requested block size,
27687			 * so reset the data for the original block size and
27688			 * send it to the target. The error is indicated by the
27689			 * return value for the failed mode select.
27690			 */
27691			select_desc->blksize_hi  = sense_desc->blksize_hi;
27692			select_desc->blksize_mid = sense_desc->blksize_mid;
27693			select_desc->blksize_lo  = sense_desc->blksize_lo;
27694			(void) sd_send_scsi_MODE_SELECT(un, CDB_GROUP0,
27695			    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
27696			    SD_PATH_STANDARD);
27697		} else {
27698			ASSERT(!mutex_owned(SD_MUTEX(un)));
27699			mutex_enter(SD_MUTEX(un));
27700			sd_update_block_info(un, (uint32_t)data, 0);
27701
27702			mutex_exit(SD_MUTEX(un));
27703		}
27704		break;
27705	default:
27706		/* should not reach here, but check anyway */
27707		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27708		    "sr_change_blkmode: Command '%x' Not Supported\n", cmd);
27709		rval = EINVAL;
27710		break;
27711	}
27712
27713	if (select) {
27714		kmem_free(select, BUFLEN_CHG_BLK_MODE);
27715	}
27716	if (sense) {
27717		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
27718	}
27719	return (rval);
27720}
27721
27722
27723/*
27724 * Note: The following sr_change_speed() and sr_atapi_change_speed() routines
27725 * implement driver support for getting and setting the CD speed. The command
27726 * set used will be based on the device type. If the device has not been
27727 * identified as MMC the Toshiba vendor specific mode page will be used. If
27728 * the device is MMC but does not support the Real Time Streaming feature
27729 * the SET CD SPEED command will be used to set speed and mode page 0x2A will
27730 * be used to read the speed.
27731 */
27732
27733/*
27734 *    Function: sr_change_speed()
27735 *
27736 * Description: This routine is the driver entry point for handling CD-ROM
27737 *		drive speed ioctl requests for devices supporting the Toshiba
27738 *		vendor specific drive speed mode page. Support for returning
27739 *		and changing the current drive speed in use by the device is
27740 *		implemented.
27741 *
27742 *   Arguments: dev - the device 'dev_t'
27743 *		cmd - the request type; one of CDROMGDRVSPEED (get) or
27744 *		      CDROMSDRVSPEED (set)
27745 *		data - current drive speed or requested drive speed
27746 *		flag - this argument is a pass through to ddi_copyxxx() directly
27747 *		       from the mode argument of ioctl().
27748 *
27749 * Return Code: the code returned by sd_send_scsi_cmd()
27750 *		EINVAL if invalid arguments are provided
27751 *		EFAULT if ddi_copyxxx() fails
27752 *		ENXIO if fail ddi_get_soft_state
27753 *		EIO if invalid mode sense block descriptor length
27754 */
27755
27756static int
27757sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
27758{
27759	struct sd_lun			*un = NULL;
27760	struct mode_header		*sense_mhp, *select_mhp;
27761	struct mode_speed		*sense_page, *select_page;
27762	int				current_speed;
27763	int				rval = EINVAL;
27764	int				bd_len;
27765	uchar_t				*sense = NULL;
27766	uchar_t				*select = NULL;
27767
27768	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
27769	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27770		return (ENXIO);
27771	}
27772
27773	/*
27774	 * Note: The drive speed is being modified here according to a Toshiba
27775	 * vendor specific mode page (0x31).
27776	 */
27777	sense = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
27778
27779	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
27780	    BUFLEN_MODE_CDROM_SPEED, CDROM_MODE_SPEED,
27781		SD_PATH_STANDARD)) != 0) {
27782		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27783		    "sr_change_speed: Mode Sense Failed\n");
27784		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27785		return (rval);
27786	}
27787	sense_mhp  = (struct mode_header *)sense;
27788
27789	/* Check the block descriptor len to handle only 1 block descriptor */
27790	bd_len = sense_mhp->bdesc_length;
27791	if (bd_len > MODE_BLK_DESC_LENGTH) {
27792		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27793		    "sr_change_speed: Mode Sense returned invalid block "
27794		    "descriptor length\n");
27795		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27796		return (EIO);
27797	}
27798
27799	sense_page = (struct mode_speed *)
27800	    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
27801	current_speed = sense_page->speed;
27802
27803	/* Process command */
27804	switch (cmd) {
27805	case CDROMGDRVSPEED:
27806		/* Return the drive speed obtained during the mode sense */
27807		if (current_speed == 0x2) {
27808			current_speed = CDROM_TWELVE_SPEED;
27809		}
27810		if (ddi_copyout(&current_speed, (void *)data,
27811		    sizeof (int), flag) != 0) {
27812			rval = EFAULT;
27813		}
27814		break;
27815	case CDROMSDRVSPEED:
27816		/* Validate the requested drive speed */
27817		switch ((uchar_t)data) {
27818		case CDROM_TWELVE_SPEED:
27819			data = 0x2;
27820			/*FALLTHROUGH*/
27821		case CDROM_NORMAL_SPEED:
27822		case CDROM_DOUBLE_SPEED:
27823		case CDROM_QUAD_SPEED:
27824		case CDROM_MAXIMUM_SPEED:
27825			break;
27826		default:
27827			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27828			    "sr_change_speed: "
27829			    "Drive Speed '%d' Not Supported\n", (uchar_t)data);
27830			kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27831			return (EINVAL);
27832		}
27833
27834		/*
27835		 * The current drive speed matches the requested drive speed so
27836		 * there is no need to send the mode select to change the speed
27837		 */
27838		if (current_speed == data) {
27839			break;
27840		}
27841
27842		/* Build the select data for the requested drive speed */
27843		select = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
27844		select_mhp = (struct mode_header *)select;
27845		select_mhp->bdesc_length = 0;
27846		select_page =
27847		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
27848		select_page =
27849		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
27850		select_page->mode_page.code = CDROM_MODE_SPEED;
27851		select_page->mode_page.length = 2;
27852		select_page->speed = (uchar_t)data;
27853
27854		/* Send the mode select for the requested block size */
27855		if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
27856		    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
27857		    SD_DONTSAVE_PAGE, SD_PATH_STANDARD)) != 0) {
27858			/*
27859			 * The mode select failed for the requested drive speed,
27860			 * so reset the data for the original drive speed and
27861			 * send it to the target. The error is indicated by the
27862			 * return value for the failed mode select.
27863			 */
27864			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27865			    "sr_drive_speed: Mode Select Failed\n");
27866			select_page->speed = sense_page->speed;
27867			(void) sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
27868			    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
27869			    SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
27870		}
27871		break;
27872	default:
27873		/* should not reach here, but check anyway */
27874		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27875		    "sr_change_speed: Command '%x' Not Supported\n", cmd);
27876		rval = EINVAL;
27877		break;
27878	}
27879
27880	if (select) {
27881		kmem_free(select, BUFLEN_MODE_CDROM_SPEED);
27882	}
27883	if (sense) {
27884		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27885	}
27886
27887	return (rval);
27888}
27889
27890
27891/*
27892 *    Function: sr_atapi_change_speed()
27893 *
27894 * Description: This routine is the driver entry point for handling CD-ROM
27895 *		drive speed ioctl requests for MMC devices that do not support
27896 *		the Real Time Streaming feature (0x107).
27897 *
27898 *		Note: This routine will use the SET SPEED command which may not
27899 *		be supported by all devices.
27900 *
27901 *   Arguments: dev- the device 'dev_t'
27902 *		cmd- the request type; one of CDROMGDRVSPEED (get) or
27903 *		     CDROMSDRVSPEED (set)
27904 *		data- current drive speed or requested drive speed
27905 *		flag- this argument is a pass through to ddi_copyxxx() directly
27906 *		      from the mode argument of ioctl().
27907 *
27908 * Return Code: the code returned by sd_send_scsi_cmd()
27909 *		EINVAL if invalid arguments are provided
27910 *		EFAULT if ddi_copyxxx() fails
27911 *		ENXIO if fail ddi_get_soft_state
27912 *		EIO if invalid mode sense block descriptor length
27913 */
27914
27915static int
27916sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
27917{
27918	struct sd_lun			*un;
27919	struct uscsi_cmd		*com = NULL;
27920	struct mode_header_grp2		*sense_mhp;
27921	uchar_t				*sense_page;
27922	uchar_t				*sense = NULL;
27923	char				cdb[CDB_GROUP5];
27924	int				bd_len;
27925	int				current_speed = 0;
27926	int				max_speed = 0;
27927	int				rval;
27928
27929	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
27930
27931	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27932		return (ENXIO);
27933	}
27934
27935	sense = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
27936
27937	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense,
27938	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP,
27939	    SD_PATH_STANDARD)) != 0) {
27940		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27941		    "sr_atapi_change_speed: Mode Sense Failed\n");
27942		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27943		return (rval);
27944	}
27945
27946	/* Check the block descriptor len to handle only 1 block descriptor */
27947	sense_mhp = (struct mode_header_grp2 *)sense;
27948	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
27949	if (bd_len > MODE_BLK_DESC_LENGTH) {
27950		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27951		    "sr_atapi_change_speed: Mode Sense returned invalid "
27952		    "block descriptor length\n");
27953		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27954		return (EIO);
27955	}
27956
27957	/* Calculate the current and maximum drive speeds */
27958	sense_page = (uchar_t *)(sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
27959	current_speed = (sense_page[14] << 8) | sense_page[15];
27960	max_speed = (sense_page[8] << 8) | sense_page[9];
27961
27962	/* Process the command */
27963	switch (cmd) {
27964	case CDROMGDRVSPEED:
27965		current_speed /= SD_SPEED_1X;
27966		if (ddi_copyout(&current_speed, (void *)data,
27967		    sizeof (int), flag) != 0)
27968			rval = EFAULT;
27969		break;
27970	case CDROMSDRVSPEED:
27971		/* Convert the speed code to KB/sec */
27972		switch ((uchar_t)data) {
27973		case CDROM_NORMAL_SPEED:
27974			current_speed = SD_SPEED_1X;
27975			break;
27976		case CDROM_DOUBLE_SPEED:
27977			current_speed = 2 * SD_SPEED_1X;
27978			break;
27979		case CDROM_QUAD_SPEED:
27980			current_speed = 4 * SD_SPEED_1X;
27981			break;
27982		case CDROM_TWELVE_SPEED:
27983			current_speed = 12 * SD_SPEED_1X;
27984			break;
27985		case CDROM_MAXIMUM_SPEED:
27986			current_speed = 0xffff;
27987			break;
27988		default:
27989			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27990			    "sr_atapi_change_speed: invalid drive speed %d\n",
27991			    (uchar_t)data);
27992			kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27993			return (EINVAL);
27994		}
27995
27996		/* Check the request against the drive's max speed. */
27997		if (current_speed != 0xffff) {
27998			if (current_speed > max_speed) {
27999				kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
28000				return (EINVAL);
28001			}
28002		}
28003
28004		/*
28005		 * Build and send the SET SPEED command
28006		 *
28007		 * Note: The SET SPEED (0xBB) command used in this routine is
28008		 * obsolete per the SCSI MMC spec but still supported in the
28009		 * MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
28010		 * therefore the command is still implemented in this routine.
28011		 */
28012		bzero(cdb, sizeof (cdb));
28013		cdb[0] = (char)SCMD_SET_CDROM_SPEED;
28014		cdb[2] = (uchar_t)(current_speed >> 8);
28015		cdb[3] = (uchar_t)current_speed;
28016		com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28017		com->uscsi_cdb	   = (caddr_t)cdb;
28018		com->uscsi_cdblen  = CDB_GROUP5;
28019		com->uscsi_bufaddr = NULL;
28020		com->uscsi_buflen  = 0;
28021		com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT;
28022		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, 0,
28023		    UIO_SYSSPACE, SD_PATH_STANDARD);
28024		break;
28025	default:
28026		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28027		    "sr_atapi_change_speed: Command '%x' Not Supported\n", cmd);
28028		rval = EINVAL;
28029	}
28030
28031	if (sense) {
28032		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
28033	}
28034	if (com) {
28035		kmem_free(com, sizeof (*com));
28036	}
28037	return (rval);
28038}
28039
28040
28041/*
28042 *    Function: sr_pause_resume()
28043 *
28044 * Description: This routine is the driver entry point for handling CD-ROM
28045 *		pause/resume ioctl requests. This only affects the audio play
28046 *		operation.
28047 *
28048 *   Arguments: dev - the device 'dev_t'
28049 *		cmd - the request type; one of CDROMPAUSE or CDROMRESUME, used
28050 *		      for setting the resume bit of the cdb.
28051 *
28052 * Return Code: the code returned by sd_send_scsi_cmd()
28053 *		EINVAL if invalid mode specified
28054 *
28055 */
28056
28057static int
28058sr_pause_resume(dev_t dev, int cmd)
28059{
28060	struct sd_lun		*un;
28061	struct uscsi_cmd	*com;
28062	char			cdb[CDB_GROUP1];
28063	int			rval;
28064
28065	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
28066		return (ENXIO);
28067	}
28068
28069	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28070	bzero(cdb, CDB_GROUP1);
28071	cdb[0] = SCMD_PAUSE_RESUME;
28072	switch (cmd) {
28073	case CDROMRESUME:
28074		cdb[8] = 1;
28075		break;
28076	case CDROMPAUSE:
28077		cdb[8] = 0;
28078		break;
28079	default:
28080		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_pause_resume:"
28081		    " Command '%x' Not Supported\n", cmd);
28082		rval = EINVAL;
28083		goto done;
28084	}
28085
28086	com->uscsi_cdb    = cdb;
28087	com->uscsi_cdblen = CDB_GROUP1;
28088	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
28089
28090	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28091	    UIO_SYSSPACE, SD_PATH_STANDARD);
28092
28093done:
28094	kmem_free(com, sizeof (*com));
28095	return (rval);
28096}
28097
28098
28099/*
28100 *    Function: sr_play_msf()
28101 *
28102 * Description: This routine is the driver entry point for handling CD-ROM
28103 *		ioctl requests to output the audio signals at the specified
28104 *		starting address and continue the audio play until the specified
28105 *		ending address (CDROMPLAYMSF) The address is in Minute Second
28106 *		Frame (MSF) format.
28107 *
28108 *   Arguments: dev	- the device 'dev_t'
28109 *		data	- pointer to user provided audio msf structure,
28110 *		          specifying start/end addresses.
28111 *		flag	- this argument is a pass through to ddi_copyxxx()
28112 *		          directly from the mode argument of ioctl().
28113 *
28114 * Return Code: the code returned by sd_send_scsi_cmd()
28115 *		EFAULT if ddi_copyxxx() fails
28116 *		ENXIO if fail ddi_get_soft_state
28117 *		EINVAL if data pointer is NULL
28118 */
28119
28120static int
28121sr_play_msf(dev_t dev, caddr_t data, int flag)
28122{
28123	struct sd_lun		*un;
28124	struct uscsi_cmd	*com;
28125	struct cdrom_msf	msf_struct;
28126	struct cdrom_msf	*msf = &msf_struct;
28127	char			cdb[CDB_GROUP1];
28128	int			rval;
28129
28130	if (data == NULL) {
28131		return (EINVAL);
28132	}
28133
28134	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
28135		return (ENXIO);
28136	}
28137
28138	if (ddi_copyin(data, msf, sizeof (struct cdrom_msf), flag)) {
28139		return (EFAULT);
28140	}
28141
28142	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28143	bzero(cdb, CDB_GROUP1);
28144	cdb[0] = SCMD_PLAYAUDIO_MSF;
28145	if (un->un_f_cfg_playmsf_bcd == TRUE) {
28146		cdb[3] = BYTE_TO_BCD(msf->cdmsf_min0);
28147		cdb[4] = BYTE_TO_BCD(msf->cdmsf_sec0);
28148		cdb[5] = BYTE_TO_BCD(msf->cdmsf_frame0);
28149		cdb[6] = BYTE_TO_BCD(msf->cdmsf_min1);
28150		cdb[7] = BYTE_TO_BCD(msf->cdmsf_sec1);
28151		cdb[8] = BYTE_TO_BCD(msf->cdmsf_frame1);
28152	} else {
28153		cdb[3] = msf->cdmsf_min0;
28154		cdb[4] = msf->cdmsf_sec0;
28155		cdb[5] = msf->cdmsf_frame0;
28156		cdb[6] = msf->cdmsf_min1;
28157		cdb[7] = msf->cdmsf_sec1;
28158		cdb[8] = msf->cdmsf_frame1;
28159	}
28160	com->uscsi_cdb    = cdb;
28161	com->uscsi_cdblen = CDB_GROUP1;
28162	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
28163	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28164	    UIO_SYSSPACE, SD_PATH_STANDARD);
28165	kmem_free(com, sizeof (*com));
28166	return (rval);
28167}
28168
28169
28170/*
28171 *    Function: sr_play_trkind()
28172 *
28173 * Description: This routine is the driver entry point for handling CD-ROM
28174 *		ioctl requests to output the audio signals at the specified
28175 *		starting address and continue the audio play until the specified
28176 *		ending address (CDROMPLAYTRKIND). The address is in Track Index
28177 *		format.
28178 *
28179 *   Arguments: dev	- the device 'dev_t'
28180 *		data	- pointer to user provided audio track/index structure,
28181 *		          specifying start/end addresses.
28182 *		flag	- this argument is a pass through to ddi_copyxxx()
28183 *		          directly from the mode argument of ioctl().
28184 *
28185 * Return Code: the code returned by sd_send_scsi_cmd()
28186 *		EFAULT if ddi_copyxxx() fails
28187 *		ENXIO if fail ddi_get_soft_state
28188 *		EINVAL if data pointer is NULL
28189 */
28190
28191static int
28192sr_play_trkind(dev_t dev, caddr_t data, int flag)
28193{
28194	struct cdrom_ti		ti_struct;
28195	struct cdrom_ti		*ti = &ti_struct;
28196	struct uscsi_cmd	*com = NULL;
28197	char			cdb[CDB_GROUP1];
28198	int			rval;
28199
28200	if (data == NULL) {
28201		return (EINVAL);
28202	}
28203
28204	if (ddi_copyin(data, ti, sizeof (struct cdrom_ti), flag)) {
28205		return (EFAULT);
28206	}
28207
28208	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28209	bzero(cdb, CDB_GROUP1);
28210	cdb[0] = SCMD_PLAYAUDIO_TI;
28211	cdb[4] = ti->cdti_trk0;
28212	cdb[5] = ti->cdti_ind0;
28213	cdb[7] = ti->cdti_trk1;
28214	cdb[8] = ti->cdti_ind1;
28215	com->uscsi_cdb    = cdb;
28216	com->uscsi_cdblen = CDB_GROUP1;
28217	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
28218	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28219	    UIO_SYSSPACE, SD_PATH_STANDARD);
28220	kmem_free(com, sizeof (*com));
28221	return (rval);
28222}
28223
28224
28225/*
28226 *    Function: sr_read_all_subcodes()
28227 *
28228 * Description: This routine is the driver entry point for handling CD-ROM
28229 *		ioctl requests to return raw subcode data while the target is
28230 *		playing audio (CDROMSUBCODE).
28231 *
28232 *   Arguments: dev	- the device 'dev_t'
28233 *		data	- pointer to user provided cdrom subcode structure,
28234 *		          specifying the transfer length and address.
28235 *		flag	- this argument is a pass through to ddi_copyxxx()
28236 *		          directly from the mode argument of ioctl().
28237 *
28238 * Return Code: the code returned by sd_send_scsi_cmd()
28239 *		EFAULT if ddi_copyxxx() fails
28240 *		ENXIO if fail ddi_get_soft_state
28241 *		EINVAL if data pointer is NULL
28242 */
28243
28244static int
28245sr_read_all_subcodes(dev_t dev, caddr_t data, int flag)
28246{
28247	struct sd_lun		*un = NULL;
28248	struct uscsi_cmd	*com = NULL;
28249	struct cdrom_subcode	*subcode = NULL;
28250	int			rval;
28251	size_t			buflen;
28252	char			cdb[CDB_GROUP5];
28253
28254#ifdef _MULTI_DATAMODEL
28255	/* To support ILP32 applications in an LP64 world */
28256	struct cdrom_subcode32		cdrom_subcode32;
28257	struct cdrom_subcode32		*cdsc32 = &cdrom_subcode32;
28258#endif
28259	if (data == NULL) {
28260		return (EINVAL);
28261	}
28262
28263	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
28264		return (ENXIO);
28265	}
28266
28267	subcode = kmem_zalloc(sizeof (struct cdrom_subcode), KM_SLEEP);
28268
28269#ifdef _MULTI_DATAMODEL
28270	switch (ddi_model_convert_from(flag & FMODELS)) {
28271	case DDI_MODEL_ILP32:
28272		if (ddi_copyin(data, cdsc32, sizeof (*cdsc32), flag)) {
28273			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28274			    "sr_read_all_subcodes: ddi_copyin Failed\n");
28275			kmem_free(subcode, sizeof (struct cdrom_subcode));
28276			return (EFAULT);
28277		}
28278		/* Convert the ILP32 uscsi data from the application to LP64 */
28279		cdrom_subcode32tocdrom_subcode(cdsc32, subcode);
28280		break;
28281	case DDI_MODEL_NONE:
28282		if (ddi_copyin(data, subcode,
28283		    sizeof (struct cdrom_subcode), flag)) {
28284			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28285			    "sr_read_all_subcodes: ddi_copyin Failed\n");
28286			kmem_free(subcode, sizeof (struct cdrom_subcode));
28287			return (EFAULT);
28288		}
28289		break;
28290	}
28291#else /* ! _MULTI_DATAMODEL */
28292	if (ddi_copyin(data, subcode, sizeof (struct cdrom_subcode), flag)) {
28293		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28294		    "sr_read_all_subcodes: ddi_copyin Failed\n");
28295		kmem_free(subcode, sizeof (struct cdrom_subcode));
28296		return (EFAULT);
28297	}
28298#endif /* _MULTI_DATAMODEL */
28299
28300	/*
28301	 * Since MMC-2 expects max 3 bytes for length, check if the
28302	 * length input is greater than 3 bytes
28303	 */
28304	if ((subcode->cdsc_length & 0xFF000000) != 0) {
28305		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28306		    "sr_read_all_subcodes: "
28307		    "cdrom transfer length too large: %d (limit %d)\n",
28308		    subcode->cdsc_length, 0xFFFFFF);
28309		kmem_free(subcode, sizeof (struct cdrom_subcode));
28310		return (EINVAL);
28311	}
28312
28313	buflen = CDROM_BLK_SUBCODE * subcode->cdsc_length;
28314	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28315	bzero(cdb, CDB_GROUP5);
28316
28317	if (un->un_f_mmc_cap == TRUE) {
28318		cdb[0] = (char)SCMD_READ_CD;
28319		cdb[2] = (char)0xff;
28320		cdb[3] = (char)0xff;
28321		cdb[4] = (char)0xff;
28322		cdb[5] = (char)0xff;
28323		cdb[6] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
28324		cdb[7] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
28325		cdb[8] = ((subcode->cdsc_length) & 0x000000ff);
28326		cdb[10] = 1;
28327	} else {
28328		/*
28329		 * Note: A vendor specific command (0xDF) is being used her to
28330		 * request a read of all subcodes.
28331		 */
28332		cdb[0] = (char)SCMD_READ_ALL_SUBCODES;
28333		cdb[6] = (((subcode->cdsc_length) & 0xff000000) >> 24);
28334		cdb[7] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
28335		cdb[8] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
28336		cdb[9] = ((subcode->cdsc_length) & 0x000000ff);
28337	}
28338	com->uscsi_cdb	   = cdb;
28339	com->uscsi_cdblen  = CDB_GROUP5;
28340	com->uscsi_bufaddr = (caddr_t)subcode->cdsc_addr;
28341	com->uscsi_buflen  = buflen;
28342	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28343	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
28344	    UIO_SYSSPACE, SD_PATH_STANDARD);
28345	kmem_free(subcode, sizeof (struct cdrom_subcode));
28346	kmem_free(com, sizeof (*com));
28347	return (rval);
28348}
28349
28350
28351/*
28352 *    Function: sr_read_subchannel()
28353 *
28354 * Description: This routine is the driver entry point for handling CD-ROM
28355 *		ioctl requests to return the Q sub-channel data of the CD
28356 *		current position block. (CDROMSUBCHNL) The data includes the
28357 *		track number, index number, absolute CD-ROM address (LBA or MSF
28358 *		format per the user) , track relative CD-ROM address (LBA or MSF
28359 *		format per the user), control data and audio status.
28360 *
28361 *   Arguments: dev	- the device 'dev_t'
28362 *		data	- pointer to user provided cdrom sub-channel structure
28363 *		flag	- this argument is a pass through to ddi_copyxxx()
28364 *		          directly from the mode argument of ioctl().
28365 *
28366 * Return Code: the code returned by sd_send_scsi_cmd()
28367 *		EFAULT if ddi_copyxxx() fails
28368 *		ENXIO if fail ddi_get_soft_state
28369 *		EINVAL if data pointer is NULL
28370 */
28371
28372static int
28373sr_read_subchannel(dev_t dev, caddr_t data, int flag)
28374{
28375	struct sd_lun		*un;
28376	struct uscsi_cmd	*com;
28377	struct cdrom_subchnl	subchanel;
28378	struct cdrom_subchnl	*subchnl = &subchanel;
28379	char			cdb[CDB_GROUP1];
28380	caddr_t			buffer;
28381	int			rval;
28382
28383	if (data == NULL) {
28384		return (EINVAL);
28385	}
28386
28387	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28388	    (un->un_state == SD_STATE_OFFLINE)) {
28389		return (ENXIO);
28390	}
28391
28392	if (ddi_copyin(data, subchnl, sizeof (struct cdrom_subchnl), flag)) {
28393		return (EFAULT);
28394	}
28395
28396	buffer = kmem_zalloc((size_t)16, KM_SLEEP);
28397	bzero(cdb, CDB_GROUP1);
28398	cdb[0] = SCMD_READ_SUBCHANNEL;
28399	/* Set the MSF bit based on the user requested address format */
28400	cdb[1] = (subchnl->cdsc_format & CDROM_LBA) ? 0 : 0x02;
28401	/*
28402	 * Set the Q bit in byte 2 to indicate that Q sub-channel data be
28403	 * returned
28404	 */
28405	cdb[2] = 0x40;
28406	/*
28407	 * Set byte 3 to specify the return data format. A value of 0x01
28408	 * indicates that the CD-ROM current position should be returned.
28409	 */
28410	cdb[3] = 0x01;
28411	cdb[8] = 0x10;
28412	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28413	com->uscsi_cdb	   = cdb;
28414	com->uscsi_cdblen  = CDB_GROUP1;
28415	com->uscsi_bufaddr = buffer;
28416	com->uscsi_buflen  = 16;
28417	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28418	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28419	    UIO_SYSSPACE, SD_PATH_STANDARD);
28420	if (rval != 0) {
28421		kmem_free(buffer, 16);
28422		kmem_free(com, sizeof (*com));
28423		return (rval);
28424	}
28425
28426	/* Process the returned Q sub-channel data */
28427	subchnl->cdsc_audiostatus = buffer[1];
28428	subchnl->cdsc_adr	= (buffer[5] & 0xF0);
28429	subchnl->cdsc_ctrl	= (buffer[5] & 0x0F);
28430	subchnl->cdsc_trk	= buffer[6];
28431	subchnl->cdsc_ind	= buffer[7];
28432	if (subchnl->cdsc_format & CDROM_LBA) {
28433		subchnl->cdsc_absaddr.lba =
28434		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
28435		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
28436		subchnl->cdsc_reladdr.lba =
28437		    ((uchar_t)buffer[12] << 24) + ((uchar_t)buffer[13] << 16) +
28438		    ((uchar_t)buffer[14] << 8) + ((uchar_t)buffer[15]);
28439	} else if (un->un_f_cfg_readsub_bcd == TRUE) {
28440		subchnl->cdsc_absaddr.msf.minute = BCD_TO_BYTE(buffer[9]);
28441		subchnl->cdsc_absaddr.msf.second = BCD_TO_BYTE(buffer[10]);
28442		subchnl->cdsc_absaddr.msf.frame  = BCD_TO_BYTE(buffer[11]);
28443		subchnl->cdsc_reladdr.msf.minute = BCD_TO_BYTE(buffer[13]);
28444		subchnl->cdsc_reladdr.msf.second = BCD_TO_BYTE(buffer[14]);
28445		subchnl->cdsc_reladdr.msf.frame  = BCD_TO_BYTE(buffer[15]);
28446	} else {
28447		subchnl->cdsc_absaddr.msf.minute = buffer[9];
28448		subchnl->cdsc_absaddr.msf.second = buffer[10];
28449		subchnl->cdsc_absaddr.msf.frame  = buffer[11];
28450		subchnl->cdsc_reladdr.msf.minute = buffer[13];
28451		subchnl->cdsc_reladdr.msf.second = buffer[14];
28452		subchnl->cdsc_reladdr.msf.frame  = buffer[15];
28453	}
28454	kmem_free(buffer, 16);
28455	kmem_free(com, sizeof (*com));
28456	if (ddi_copyout(subchnl, data, sizeof (struct cdrom_subchnl), flag)
28457	    != 0) {
28458		return (EFAULT);
28459	}
28460	return (rval);
28461}
28462
28463
28464/*
28465 *    Function: sr_read_tocentry()
28466 *
28467 * Description: This routine is the driver entry point for handling CD-ROM
28468 *		ioctl requests to read from the Table of Contents (TOC)
28469 *		(CDROMREADTOCENTRY). This routine provides the ADR and CTRL
28470 *		fields, the starting address (LBA or MSF format per the user)
28471 *		and the data mode if the user specified track is a data track.
28472 *
28473 *		Note: The READ HEADER (0x44) command used in this routine is
28474 *		obsolete per the SCSI MMC spec but still supported in the
28475 *		MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
28476 *		therefore the command is still implemented in this routine.
28477 *
28478 *   Arguments: dev	- the device 'dev_t'
28479 *		data	- pointer to user provided toc entry structure,
28480 *			  specifying the track # and the address format
28481 *			  (LBA or MSF).
28482 *		flag	- this argument is a pass through to ddi_copyxxx()
28483 *		          directly from the mode argument of ioctl().
28484 *
28485 * Return Code: the code returned by sd_send_scsi_cmd()
28486 *		EFAULT if ddi_copyxxx() fails
28487 *		ENXIO if fail ddi_get_soft_state
28488 *		EINVAL if data pointer is NULL
28489 */
28490
28491static int
28492sr_read_tocentry(dev_t dev, caddr_t data, int flag)
28493{
28494	struct sd_lun		*un = NULL;
28495	struct uscsi_cmd	*com;
28496	struct cdrom_tocentry	toc_entry;
28497	struct cdrom_tocentry	*entry = &toc_entry;
28498	caddr_t			buffer;
28499	int			rval;
28500	char			cdb[CDB_GROUP1];
28501
28502	if (data == NULL) {
28503		return (EINVAL);
28504	}
28505
28506	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28507	    (un->un_state == SD_STATE_OFFLINE)) {
28508		return (ENXIO);
28509	}
28510
28511	if (ddi_copyin(data, entry, sizeof (struct cdrom_tocentry), flag)) {
28512		return (EFAULT);
28513	}
28514
28515	/* Validate the requested track and address format */
28516	if (!(entry->cdte_format & (CDROM_LBA | CDROM_MSF))) {
28517		return (EINVAL);
28518	}
28519
28520	if (entry->cdte_track == 0) {
28521		return (EINVAL);
28522	}
28523
28524	buffer = kmem_zalloc((size_t)12, KM_SLEEP);
28525	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28526	bzero(cdb, CDB_GROUP1);
28527
28528	cdb[0] = SCMD_READ_TOC;
28529	/* Set the MSF bit based on the user requested address format  */
28530	cdb[1] = ((entry->cdte_format & CDROM_LBA) ? 0 : 2);
28531	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
28532		cdb[6] = BYTE_TO_BCD(entry->cdte_track);
28533	} else {
28534		cdb[6] = entry->cdte_track;
28535	}
28536
28537	/*
28538	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
28539	 * (4 byte TOC response header + 8 byte track descriptor)
28540	 */
28541	cdb[8] = 12;
28542	com->uscsi_cdb	   = cdb;
28543	com->uscsi_cdblen  = CDB_GROUP1;
28544	com->uscsi_bufaddr = buffer;
28545	com->uscsi_buflen  = 0x0C;
28546	com->uscsi_flags   = (USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ);
28547	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28548	    UIO_SYSSPACE, SD_PATH_STANDARD);
28549	if (rval != 0) {
28550		kmem_free(buffer, 12);
28551		kmem_free(com, sizeof (*com));
28552		return (rval);
28553	}
28554
28555	/* Process the toc entry */
28556	entry->cdte_adr		= (buffer[5] & 0xF0) >> 4;
28557	entry->cdte_ctrl	= (buffer[5] & 0x0F);
28558	if (entry->cdte_format & CDROM_LBA) {
28559		entry->cdte_addr.lba =
28560		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
28561		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
28562	} else if (un->un_f_cfg_read_toc_addr_bcd == TRUE) {
28563		entry->cdte_addr.msf.minute	= BCD_TO_BYTE(buffer[9]);
28564		entry->cdte_addr.msf.second	= BCD_TO_BYTE(buffer[10]);
28565		entry->cdte_addr.msf.frame	= BCD_TO_BYTE(buffer[11]);
28566		/*
28567		 * Send a READ TOC command using the LBA address format to get
28568		 * the LBA for the track requested so it can be used in the
28569		 * READ HEADER request
28570		 *
28571		 * Note: The MSF bit of the READ HEADER command specifies the
28572		 * output format. The block address specified in that command
28573		 * must be in LBA format.
28574		 */
28575		cdb[1] = 0;
28576		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28577		    UIO_SYSSPACE, SD_PATH_STANDARD);
28578		if (rval != 0) {
28579			kmem_free(buffer, 12);
28580			kmem_free(com, sizeof (*com));
28581			return (rval);
28582		}
28583	} else {
28584		entry->cdte_addr.msf.minute	= buffer[9];
28585		entry->cdte_addr.msf.second	= buffer[10];
28586		entry->cdte_addr.msf.frame	= buffer[11];
28587		/*
28588		 * Send a READ TOC command using the LBA address format to get
28589		 * the LBA for the track requested so it can be used in the
28590		 * READ HEADER request
28591		 *
28592		 * Note: The MSF bit of the READ HEADER command specifies the
28593		 * output format. The block address specified in that command
28594		 * must be in LBA format.
28595		 */
28596		cdb[1] = 0;
28597		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28598		    UIO_SYSSPACE, SD_PATH_STANDARD);
28599		if (rval != 0) {
28600			kmem_free(buffer, 12);
28601			kmem_free(com, sizeof (*com));
28602			return (rval);
28603		}
28604	}
28605
28606	/*
28607	 * Build and send the READ HEADER command to determine the data mode of
28608	 * the user specified track.
28609	 */
28610	if ((entry->cdte_ctrl & CDROM_DATA_TRACK) &&
28611	    (entry->cdte_track != CDROM_LEADOUT)) {
28612		bzero(cdb, CDB_GROUP1);
28613		cdb[0] = SCMD_READ_HEADER;
28614		cdb[2] = buffer[8];
28615		cdb[3] = buffer[9];
28616		cdb[4] = buffer[10];
28617		cdb[5] = buffer[11];
28618		cdb[8] = 0x08;
28619		com->uscsi_buflen = 0x08;
28620		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28621		    UIO_SYSSPACE, SD_PATH_STANDARD);
28622		if (rval == 0) {
28623			entry->cdte_datamode = buffer[0];
28624		} else {
28625			/*
28626			 * READ HEADER command failed, since this is
28627			 * obsoleted in one spec, its better to return
28628			 * -1 for an invlid track so that we can still
28629			 * recieve the rest of the TOC data.
28630			 */
28631			entry->cdte_datamode = (uchar_t)-1;
28632		}
28633	} else {
28634		entry->cdte_datamode = (uchar_t)-1;
28635	}
28636
28637	kmem_free(buffer, 12);
28638	kmem_free(com, sizeof (*com));
28639	if (ddi_copyout(entry, data, sizeof (struct cdrom_tocentry), flag) != 0)
28640		return (EFAULT);
28641
28642	return (rval);
28643}
28644
28645
28646/*
28647 *    Function: sr_read_tochdr()
28648 *
28649 * Description: This routine is the driver entry point for handling CD-ROM
28650 * 		ioctl requests to read the Table of Contents (TOC) header
28651 *		(CDROMREADTOHDR). The TOC header consists of the disk starting
28652 *		and ending track numbers
28653 *
28654 *   Arguments: dev	- the device 'dev_t'
28655 *		data	- pointer to user provided toc header structure,
28656 *			  specifying the starting and ending track numbers.
28657 *		flag	- this argument is a pass through to ddi_copyxxx()
28658 *			  directly from the mode argument of ioctl().
28659 *
28660 * Return Code: the code returned by sd_send_scsi_cmd()
28661 *		EFAULT if ddi_copyxxx() fails
28662 *		ENXIO if fail ddi_get_soft_state
28663 *		EINVAL if data pointer is NULL
28664 */
28665
28666static int
28667sr_read_tochdr(dev_t dev, caddr_t data, int flag)
28668{
28669	struct sd_lun		*un;
28670	struct uscsi_cmd	*com;
28671	struct cdrom_tochdr	toc_header;
28672	struct cdrom_tochdr	*hdr = &toc_header;
28673	char			cdb[CDB_GROUP1];
28674	int			rval;
28675	caddr_t			buffer;
28676
28677	if (data == NULL) {
28678		return (EINVAL);
28679	}
28680
28681	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28682	    (un->un_state == SD_STATE_OFFLINE)) {
28683		return (ENXIO);
28684	}
28685
28686	buffer = kmem_zalloc(4, KM_SLEEP);
28687	bzero(cdb, CDB_GROUP1);
28688	cdb[0] = SCMD_READ_TOC;
28689	/*
28690	 * Specifying a track number of 0x00 in the READ TOC command indicates
28691	 * that the TOC header should be returned
28692	 */
28693	cdb[6] = 0x00;
28694	/*
28695	 * Bytes 7 & 8 are the 4 byte allocation length for TOC header.
28696	 * (2 byte data len + 1 byte starting track # + 1 byte ending track #)
28697	 */
28698	cdb[8] = 0x04;
28699	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28700	com->uscsi_cdb	   = cdb;
28701	com->uscsi_cdblen  = CDB_GROUP1;
28702	com->uscsi_bufaddr = buffer;
28703	com->uscsi_buflen  = 0x04;
28704	com->uscsi_timeout = 300;
28705	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28706
28707	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28708	    UIO_SYSSPACE, SD_PATH_STANDARD);
28709	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
28710		hdr->cdth_trk0 = BCD_TO_BYTE(buffer[2]);
28711		hdr->cdth_trk1 = BCD_TO_BYTE(buffer[3]);
28712	} else {
28713		hdr->cdth_trk0 = buffer[2];
28714		hdr->cdth_trk1 = buffer[3];
28715	}
28716	kmem_free(buffer, 4);
28717	kmem_free(com, sizeof (*com));
28718	if (ddi_copyout(hdr, data, sizeof (struct cdrom_tochdr), flag) != 0) {
28719		return (EFAULT);
28720	}
28721	return (rval);
28722}
28723
28724
28725/*
28726 * Note: The following sr_read_mode1(), sr_read_cd_mode2(), sr_read_mode2(),
28727 * sr_read_cdda(), sr_read_cdxa(), routines implement driver support for
28728 * handling CDROMREAD ioctl requests for mode 1 user data, mode 2 user data,
28729 * digital audio and extended architecture digital audio. These modes are
28730 * defined in the IEC908 (Red Book), ISO10149 (Yellow Book), and the SCSI3
28731 * MMC specs.
28732 *
28733 * In addition to support for the various data formats these routines also
28734 * include support for devices that implement only the direct access READ
28735 * commands (0x08, 0x28), devices that implement the READ_CD commands
28736 * (0xBE, 0xD4), and devices that implement the vendor unique READ CDDA and
28737 * READ CDXA commands (0xD8, 0xDB)
28738 */
28739
28740/*
28741 *    Function: sr_read_mode1()
28742 *
28743 * Description: This routine is the driver entry point for handling CD-ROM
28744 *		ioctl read mode1 requests (CDROMREADMODE1).
28745 *
28746 *   Arguments: dev	- the device 'dev_t'
28747 *		data	- pointer to user provided cd read structure specifying
28748 *			  the lba buffer address and length.
28749 *		flag	- this argument is a pass through to ddi_copyxxx()
28750 *			  directly from the mode argument of ioctl().
28751 *
28752 * Return Code: the code returned by sd_send_scsi_cmd()
28753 *		EFAULT if ddi_copyxxx() fails
28754 *		ENXIO if fail ddi_get_soft_state
28755 *		EINVAL if data pointer is NULL
28756 */
28757
28758static int
28759sr_read_mode1(dev_t dev, caddr_t data, int flag)
28760{
28761	struct sd_lun		*un;
28762	struct cdrom_read	mode1_struct;
28763	struct cdrom_read	*mode1 = &mode1_struct;
28764	int			rval;
28765#ifdef _MULTI_DATAMODEL
28766	/* To support ILP32 applications in an LP64 world */
28767	struct cdrom_read32	cdrom_read32;
28768	struct cdrom_read32	*cdrd32 = &cdrom_read32;
28769#endif /* _MULTI_DATAMODEL */
28770
28771	if (data == NULL) {
28772		return (EINVAL);
28773	}
28774
28775	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28776	    (un->un_state == SD_STATE_OFFLINE)) {
28777		return (ENXIO);
28778	}
28779
28780	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28781	    "sd_read_mode1: entry: un:0x%p\n", un);
28782
28783#ifdef _MULTI_DATAMODEL
28784	switch (ddi_model_convert_from(flag & FMODELS)) {
28785	case DDI_MODEL_ILP32:
28786		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
28787			return (EFAULT);
28788		}
28789		/* Convert the ILP32 uscsi data from the application to LP64 */
28790		cdrom_read32tocdrom_read(cdrd32, mode1);
28791		break;
28792	case DDI_MODEL_NONE:
28793		if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
28794			return (EFAULT);
28795		}
28796	}
28797#else /* ! _MULTI_DATAMODEL */
28798	if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
28799		return (EFAULT);
28800	}
28801#endif /* _MULTI_DATAMODEL */
28802
28803	rval = sd_send_scsi_READ(un, mode1->cdread_bufaddr,
28804	    mode1->cdread_buflen, mode1->cdread_lba, SD_PATH_STANDARD);
28805
28806	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28807	    "sd_read_mode1: exit: un:0x%p\n", un);
28808
28809	return (rval);
28810}
28811
28812
28813/*
28814 *    Function: sr_read_cd_mode2()
28815 *
28816 * Description: This routine is the driver entry point for handling CD-ROM
28817 *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
28818 *		support the READ CD (0xBE) command or the 1st generation
28819 *		READ CD (0xD4) command.
28820 *
28821 *   Arguments: dev	- the device 'dev_t'
28822 *		data	- pointer to user provided cd read structure specifying
28823 *			  the lba buffer address and length.
28824 *		flag	- this argument is a pass through to ddi_copyxxx()
28825 *			  directly from the mode argument of ioctl().
28826 *
28827 * Return Code: the code returned by sd_send_scsi_cmd()
28828 *		EFAULT if ddi_copyxxx() fails
28829 *		ENXIO if fail ddi_get_soft_state
28830 *		EINVAL if data pointer is NULL
28831 */
28832
28833static int
28834sr_read_cd_mode2(dev_t dev, caddr_t data, int flag)
28835{
28836	struct sd_lun		*un;
28837	struct uscsi_cmd	*com;
28838	struct cdrom_read	mode2_struct;
28839	struct cdrom_read	*mode2 = &mode2_struct;
28840	uchar_t			cdb[CDB_GROUP5];
28841	int			nblocks;
28842	int			rval;
28843#ifdef _MULTI_DATAMODEL
28844	/*  To support ILP32 applications in an LP64 world */
28845	struct cdrom_read32	cdrom_read32;
28846	struct cdrom_read32	*cdrd32 = &cdrom_read32;
28847#endif /* _MULTI_DATAMODEL */
28848
28849	if (data == NULL) {
28850		return (EINVAL);
28851	}
28852
28853	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28854	    (un->un_state == SD_STATE_OFFLINE)) {
28855		return (ENXIO);
28856	}
28857
28858#ifdef _MULTI_DATAMODEL
28859	switch (ddi_model_convert_from(flag & FMODELS)) {
28860	case DDI_MODEL_ILP32:
28861		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
28862			return (EFAULT);
28863		}
28864		/* Convert the ILP32 uscsi data from the application to LP64 */
28865		cdrom_read32tocdrom_read(cdrd32, mode2);
28866		break;
28867	case DDI_MODEL_NONE:
28868		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
28869			return (EFAULT);
28870		}
28871		break;
28872	}
28873
28874#else /* ! _MULTI_DATAMODEL */
28875	if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
28876		return (EFAULT);
28877	}
28878#endif /* _MULTI_DATAMODEL */
28879
28880	bzero(cdb, sizeof (cdb));
28881	if (un->un_f_cfg_read_cd_xd4 == TRUE) {
28882		/* Read command supported by 1st generation atapi drives */
28883		cdb[0] = SCMD_READ_CDD4;
28884	} else {
28885		/* Universal CD Access Command */
28886		cdb[0] = SCMD_READ_CD;
28887	}
28888
28889	/*
28890	 * Set expected sector type to: 2336s byte, Mode 2 Yellow Book
28891	 */
28892	cdb[1] = CDROM_SECTOR_TYPE_MODE2;
28893
28894	/* set the start address */
28895	cdb[2] = (uchar_t)((mode2->cdread_lba >> 24) & 0XFF);
28896	cdb[3] = (uchar_t)((mode2->cdread_lba >> 16) & 0XFF);
28897	cdb[4] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
28898	cdb[5] = (uchar_t)(mode2->cdread_lba & 0xFF);
28899
28900	/* set the transfer length */
28901	nblocks = mode2->cdread_buflen / 2336;
28902	cdb[6] = (uchar_t)(nblocks >> 16);
28903	cdb[7] = (uchar_t)(nblocks >> 8);
28904	cdb[8] = (uchar_t)nblocks;
28905
28906	/* set the filter bits */
28907	cdb[9] = CDROM_READ_CD_USERDATA;
28908
28909	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28910	com->uscsi_cdb = (caddr_t)cdb;
28911	com->uscsi_cdblen = sizeof (cdb);
28912	com->uscsi_bufaddr = mode2->cdread_bufaddr;
28913	com->uscsi_buflen = mode2->cdread_buflen;
28914	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28915
28916	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
28917	    UIO_SYSSPACE, SD_PATH_STANDARD);
28918	kmem_free(com, sizeof (*com));
28919	return (rval);
28920}
28921
28922
28923/*
28924 *    Function: sr_read_mode2()
28925 *
28926 * Description: This routine is the driver entry point for handling CD-ROM
28927 *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
28928 *		do not support the READ CD (0xBE) command.
28929 *
28930 *   Arguments: dev	- the device 'dev_t'
28931 *		data	- pointer to user provided cd read structure specifying
28932 *			  the lba buffer address and length.
28933 *		flag	- this argument is a pass through to ddi_copyxxx()
28934 *			  directly from the mode argument of ioctl().
28935 *
28936 * Return Code: the code returned by sd_send_scsi_cmd()
28937 *		EFAULT if ddi_copyxxx() fails
28938 *		ENXIO if fail ddi_get_soft_state
28939 *		EINVAL if data pointer is NULL
28940 *		EIO if fail to reset block size
28941 *		EAGAIN if commands are in progress in the driver
28942 */
28943
28944static int
28945sr_read_mode2(dev_t dev, caddr_t data, int flag)
28946{
28947	struct sd_lun		*un;
28948	struct cdrom_read	mode2_struct;
28949	struct cdrom_read	*mode2 = &mode2_struct;
28950	int			rval;
28951	uint32_t		restore_blksize;
28952	struct uscsi_cmd	*com;
28953	uchar_t			cdb[CDB_GROUP0];
28954	int			nblocks;
28955
28956#ifdef _MULTI_DATAMODEL
28957	/* To support ILP32 applications in an LP64 world */
28958	struct cdrom_read32	cdrom_read32;
28959	struct cdrom_read32	*cdrd32 = &cdrom_read32;
28960#endif /* _MULTI_DATAMODEL */
28961
28962	if (data == NULL) {
28963		return (EINVAL);
28964	}
28965
28966	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28967	    (un->un_state == SD_STATE_OFFLINE)) {
28968		return (ENXIO);
28969	}
28970
28971	/*
28972	 * Because this routine will update the device and driver block size
28973	 * being used we want to make sure there are no commands in progress.
28974	 * If commands are in progress the user will have to try again.
28975	 *
28976	 * We check for 1 instead of 0 because we increment un_ncmds_in_driver
28977	 * in sdioctl to protect commands from sdioctl through to the top of
28978	 * sd_uscsi_strategy. See sdioctl for details.
28979	 */
28980	mutex_enter(SD_MUTEX(un));
28981	if (un->un_ncmds_in_driver != 1) {
28982		mutex_exit(SD_MUTEX(un));
28983		return (EAGAIN);
28984	}
28985	mutex_exit(SD_MUTEX(un));
28986
28987	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28988	    "sd_read_mode2: entry: un:0x%p\n", un);
28989
28990#ifdef _MULTI_DATAMODEL
28991	switch (ddi_model_convert_from(flag & FMODELS)) {
28992	case DDI_MODEL_ILP32:
28993		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
28994			return (EFAULT);
28995		}
28996		/* Convert the ILP32 uscsi data from the application to LP64 */
28997		cdrom_read32tocdrom_read(cdrd32, mode2);
28998		break;
28999	case DDI_MODEL_NONE:
29000		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
29001			return (EFAULT);
29002		}
29003		break;
29004	}
29005#else /* ! _MULTI_DATAMODEL */
29006	if (ddi_copyin(data, mode2, sizeof (*mode2), flag)) {
29007		return (EFAULT);
29008	}
29009#endif /* _MULTI_DATAMODEL */
29010
29011	/* Store the current target block size for restoration later */
29012	restore_blksize = un->un_tgt_blocksize;
29013
29014	/* Change the device and soft state target block size to 2336 */
29015	if (sr_sector_mode(dev, SD_MODE2_BLKSIZE) != 0) {
29016		rval = EIO;
29017		goto done;
29018	}
29019
29020
29021	bzero(cdb, sizeof (cdb));
29022
29023	/* set READ operation */
29024	cdb[0] = SCMD_READ;
29025
29026	/* adjust lba for 2kbyte blocks from 512 byte blocks */
29027	mode2->cdread_lba >>= 2;
29028
29029	/* set the start address */
29030	cdb[1] = (uchar_t)((mode2->cdread_lba >> 16) & 0X1F);
29031	cdb[2] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
29032	cdb[3] = (uchar_t)(mode2->cdread_lba & 0xFF);
29033
29034	/* set the transfer length */
29035	nblocks = mode2->cdread_buflen / 2336;
29036	cdb[4] = (uchar_t)nblocks & 0xFF;
29037
29038	/* build command */
29039	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
29040	com->uscsi_cdb = (caddr_t)cdb;
29041	com->uscsi_cdblen = sizeof (cdb);
29042	com->uscsi_bufaddr = mode2->cdread_bufaddr;
29043	com->uscsi_buflen = mode2->cdread_buflen;
29044	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
29045
29046	/*
29047	 * Issue SCSI command with user space address for read buffer.
29048	 *
29049	 * This sends the command through main channel in the driver.
29050	 *
29051	 * Since this is accessed via an IOCTL call, we go through the
29052	 * standard path, so that if the device was powered down, then
29053	 * it would be 'awakened' to handle the command.
29054	 */
29055	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
29056	    UIO_SYSSPACE, SD_PATH_STANDARD);
29057
29058	kmem_free(com, sizeof (*com));
29059
29060	/* Restore the device and soft state target block size */
29061	if (sr_sector_mode(dev, restore_blksize) != 0) {
29062		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29063		    "can't do switch back to mode 1\n");
29064		/*
29065		 * If sd_send_scsi_READ succeeded we still need to report
29066		 * an error because we failed to reset the block size
29067		 */
29068		if (rval == 0) {
29069			rval = EIO;
29070		}
29071	}
29072
29073done:
29074	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
29075	    "sd_read_mode2: exit: un:0x%p\n", un);
29076
29077	return (rval);
29078}
29079
29080
29081/*
29082 *    Function: sr_sector_mode()
29083 *
29084 * Description: This utility function is used by sr_read_mode2 to set the target
29085 *		block size based on the user specified size. This is a legacy
29086 *		implementation based upon a vendor specific mode page
29087 *
29088 *   Arguments: dev	- the device 'dev_t'
29089 *		data	- flag indicating if block size is being set to 2336 or
29090 *			  512.
29091 *
29092 * Return Code: the code returned by sd_send_scsi_cmd()
29093 *		EFAULT if ddi_copyxxx() fails
29094 *		ENXIO if fail ddi_get_soft_state
29095 *		EINVAL if data pointer is NULL
29096 */
29097
29098static int
29099sr_sector_mode(dev_t dev, uint32_t blksize)
29100{
29101	struct sd_lun	*un;
29102	uchar_t		*sense;
29103	uchar_t		*select;
29104	int		rval;
29105
29106	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
29107	    (un->un_state == SD_STATE_OFFLINE)) {
29108		return (ENXIO);
29109	}
29110
29111	sense = kmem_zalloc(20, KM_SLEEP);
29112
29113	/* Note: This is a vendor specific mode page (0x81) */
29114	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense, 20, 0x81,
29115	    SD_PATH_STANDARD)) != 0) {
29116		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
29117		    "sr_sector_mode: Mode Sense failed\n");
29118		kmem_free(sense, 20);
29119		return (rval);
29120	}
29121	select = kmem_zalloc(20, KM_SLEEP);
29122	select[3] = 0x08;
29123	select[10] = ((blksize >> 8) & 0xff);
29124	select[11] = (blksize & 0xff);
29125	select[12] = 0x01;
29126	select[13] = 0x06;
29127	select[14] = sense[14];
29128	select[15] = sense[15];
29129	if (blksize == SD_MODE2_BLKSIZE) {
29130		select[14] |= 0x01;
29131	}
29132
29133	if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select, 20,
29134	    SD_DONTSAVE_PAGE, SD_PATH_STANDARD)) != 0) {
29135		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
29136		    "sr_sector_mode: Mode Select failed\n");
29137	} else {
29138		/*
29139		 * Only update the softstate block size if we successfully
29140		 * changed the device block mode.
29141		 */
29142		mutex_enter(SD_MUTEX(un));
29143		sd_update_block_info(un, blksize, 0);
29144		mutex_exit(SD_MUTEX(un));
29145	}
29146	kmem_free(sense, 20);
29147	kmem_free(select, 20);
29148	return (rval);
29149}
29150
29151
29152/*
29153 *    Function: sr_read_cdda()
29154 *
29155 * Description: This routine is the driver entry point for handling CD-ROM
29156 *		ioctl requests to return CD-DA or subcode data. (CDROMCDDA) If
29157 *		the target supports CDDA these requests are handled via a vendor
29158 *		specific command (0xD8) If the target does not support CDDA
29159 *		these requests are handled via the READ CD command (0xBE).
29160 *
29161 *   Arguments: dev	- the device 'dev_t'
29162 *		data	- pointer to user provided CD-DA structure specifying
29163 *			  the track starting address, transfer length, and
29164 *			  subcode options.
29165 *		flag	- this argument is a pass through to ddi_copyxxx()
29166 *			  directly from the mode argument of ioctl().
29167 *
29168 * Return Code: the code returned by sd_send_scsi_cmd()
29169 *		EFAULT if ddi_copyxxx() fails
29170 *		ENXIO if fail ddi_get_soft_state
29171 *		EINVAL if invalid arguments are provided
29172 *		ENOTTY
29173 */
29174
29175static int
29176sr_read_cdda(dev_t dev, caddr_t data, int flag)
29177{
29178	struct sd_lun			*un;
29179	struct uscsi_cmd		*com;
29180	struct cdrom_cdda		*cdda;
29181	int				rval;
29182	size_t				buflen;
29183	char				cdb[CDB_GROUP5];
29184
29185#ifdef _MULTI_DATAMODEL
29186	/* To support ILP32 applications in an LP64 world */
29187	struct cdrom_cdda32	cdrom_cdda32;
29188	struct cdrom_cdda32	*cdda32 = &cdrom_cdda32;
29189#endif /* _MULTI_DATAMODEL */
29190
29191	if (data == NULL) {
29192		return (EINVAL);
29193	}
29194
29195	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
29196		return (ENXIO);
29197	}
29198
29199	cdda = kmem_zalloc(sizeof (struct cdrom_cdda), KM_SLEEP);
29200
29201#ifdef _MULTI_DATAMODEL
29202	switch (ddi_model_convert_from(flag & FMODELS)) {
29203	case DDI_MODEL_ILP32:
29204		if (ddi_copyin(data, cdda32, sizeof (*cdda32), flag)) {
29205			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29206			    "sr_read_cdda: ddi_copyin Failed\n");
29207			kmem_free(cdda, sizeof (struct cdrom_cdda));
29208			return (EFAULT);
29209		}
29210		/* Convert the ILP32 uscsi data from the application to LP64 */
29211		cdrom_cdda32tocdrom_cdda(cdda32, cdda);
29212		break;
29213	case DDI_MODEL_NONE:
29214		if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
29215			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29216			    "sr_read_cdda: ddi_copyin Failed\n");
29217			kmem_free(cdda, sizeof (struct cdrom_cdda));
29218			return (EFAULT);
29219		}
29220		break;
29221	}
29222#else /* ! _MULTI_DATAMODEL */
29223	if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
29224		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29225		    "sr_read_cdda: ddi_copyin Failed\n");
29226		kmem_free(cdda, sizeof (struct cdrom_cdda));
29227		return (EFAULT);
29228	}
29229#endif /* _MULTI_DATAMODEL */
29230
29231	/*
29232	 * Since MMC-2 expects max 3 bytes for length, check if the
29233	 * length input is greater than 3 bytes
29234	 */
29235	if ((cdda->cdda_length & 0xFF000000) != 0) {
29236		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdda: "
29237		    "cdrom transfer length too large: %d (limit %d)\n",
29238		    cdda->cdda_length, 0xFFFFFF);
29239		kmem_free(cdda, sizeof (struct cdrom_cdda));
29240		return (EINVAL);
29241	}
29242
29243	switch (cdda->cdda_subcode) {
29244	case CDROM_DA_NO_SUBCODE:
29245		buflen = CDROM_BLK_2352 * cdda->cdda_length;
29246		break;
29247	case CDROM_DA_SUBQ:
29248		buflen = CDROM_BLK_2368 * cdda->cdda_length;
29249		break;
29250	case CDROM_DA_ALL_SUBCODE:
29251		buflen = CDROM_BLK_2448 * cdda->cdda_length;
29252		break;
29253	case CDROM_DA_SUBCODE_ONLY:
29254		buflen = CDROM_BLK_SUBCODE * cdda->cdda_length;
29255		break;
29256	default:
29257		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29258		    "sr_read_cdda: Subcode '0x%x' Not Supported\n",
29259		    cdda->cdda_subcode);
29260		kmem_free(cdda, sizeof (struct cdrom_cdda));
29261		return (EINVAL);
29262	}
29263
29264	/* Build and send the command */
29265	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
29266	bzero(cdb, CDB_GROUP5);
29267
29268	if (un->un_f_cfg_cdda == TRUE) {
29269		cdb[0] = (char)SCMD_READ_CD;
29270		cdb[1] = 0x04;
29271		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
29272		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
29273		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
29274		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
29275		cdb[6] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
29276		cdb[7] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
29277		cdb[8] = ((cdda->cdda_length) & 0x000000ff);
29278		cdb[9] = 0x10;
29279		switch (cdda->cdda_subcode) {
29280		case CDROM_DA_NO_SUBCODE :
29281			cdb[10] = 0x0;
29282			break;
29283		case CDROM_DA_SUBQ :
29284			cdb[10] = 0x2;
29285			break;
29286		case CDROM_DA_ALL_SUBCODE :
29287			cdb[10] = 0x1;
29288			break;
29289		case CDROM_DA_SUBCODE_ONLY :
29290			/* FALLTHROUGH */
29291		default :
29292			kmem_free(cdda, sizeof (struct cdrom_cdda));
29293			kmem_free(com, sizeof (*com));
29294			return (ENOTTY);
29295		}
29296	} else {
29297		cdb[0] = (char)SCMD_READ_CDDA;
29298		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
29299		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
29300		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
29301		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
29302		cdb[6] = (((cdda->cdda_length) & 0xff000000) >> 24);
29303		cdb[7] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
29304		cdb[8] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
29305		cdb[9] = ((cdda->cdda_length) & 0x000000ff);
29306		cdb[10] = cdda->cdda_subcode;
29307	}
29308
29309	com->uscsi_cdb = cdb;
29310	com->uscsi_cdblen = CDB_GROUP5;
29311	com->uscsi_bufaddr = (caddr_t)cdda->cdda_data;
29312	com->uscsi_buflen = buflen;
29313	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
29314
29315	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
29316	    UIO_SYSSPACE, SD_PATH_STANDARD);
29317
29318	kmem_free(cdda, sizeof (struct cdrom_cdda));
29319	kmem_free(com, sizeof (*com));
29320	return (rval);
29321}
29322
29323
29324/*
29325 *    Function: sr_read_cdxa()
29326 *
29327 * Description: This routine is the driver entry point for handling CD-ROM
29328 *		ioctl requests to return CD-XA (Extended Architecture) data.
29329 *		(CDROMCDXA).
29330 *
29331 *   Arguments: dev	- the device 'dev_t'
29332 *		data	- pointer to user provided CD-XA structure specifying
29333 *			  the data starting address, transfer length, and format
29334 *		flag	- this argument is a pass through to ddi_copyxxx()
29335 *			  directly from the mode argument of ioctl().
29336 *
29337 * Return Code: the code returned by sd_send_scsi_cmd()
29338 *		EFAULT if ddi_copyxxx() fails
29339 *		ENXIO if fail ddi_get_soft_state
29340 *		EINVAL if data pointer is NULL
29341 */
29342
29343static int
29344sr_read_cdxa(dev_t dev, caddr_t data, int flag)
29345{
29346	struct sd_lun		*un;
29347	struct uscsi_cmd	*com;
29348	struct cdrom_cdxa	*cdxa;
29349	int			rval;
29350	size_t			buflen;
29351	char			cdb[CDB_GROUP5];
29352	uchar_t			read_flags;
29353
29354#ifdef _MULTI_DATAMODEL
29355	/* To support ILP32 applications in an LP64 world */
29356	struct cdrom_cdxa32		cdrom_cdxa32;
29357	struct cdrom_cdxa32		*cdxa32 = &cdrom_cdxa32;
29358#endif /* _MULTI_DATAMODEL */
29359
29360	if (data == NULL) {
29361		return (EINVAL);
29362	}
29363
29364	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
29365		return (ENXIO);
29366	}
29367
29368	cdxa = kmem_zalloc(sizeof (struct cdrom_cdxa), KM_SLEEP);
29369
29370#ifdef _MULTI_DATAMODEL
29371	switch (ddi_model_convert_from(flag & FMODELS)) {
29372	case DDI_MODEL_ILP32:
29373		if (ddi_copyin(data, cdxa32, sizeof (*cdxa32), flag)) {
29374			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29375			return (EFAULT);
29376		}
29377		/*
29378		 * Convert the ILP32 uscsi data from the
29379		 * application to LP64 for internal use.
29380		 */
29381		cdrom_cdxa32tocdrom_cdxa(cdxa32, cdxa);
29382		break;
29383	case DDI_MODEL_NONE:
29384		if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
29385			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29386			return (EFAULT);
29387		}
29388		break;
29389	}
29390#else /* ! _MULTI_DATAMODEL */
29391	if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
29392		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29393		return (EFAULT);
29394	}
29395#endif /* _MULTI_DATAMODEL */
29396
29397	/*
29398	 * Since MMC-2 expects max 3 bytes for length, check if the
29399	 * length input is greater than 3 bytes
29400	 */
29401	if ((cdxa->cdxa_length & 0xFF000000) != 0) {
29402		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdxa: "
29403		    "cdrom transfer length too large: %d (limit %d)\n",
29404		    cdxa->cdxa_length, 0xFFFFFF);
29405		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29406		return (EINVAL);
29407	}
29408
29409	switch (cdxa->cdxa_format) {
29410	case CDROM_XA_DATA:
29411		buflen = CDROM_BLK_2048 * cdxa->cdxa_length;
29412		read_flags = 0x10;
29413		break;
29414	case CDROM_XA_SECTOR_DATA:
29415		buflen = CDROM_BLK_2352 * cdxa->cdxa_length;
29416		read_flags = 0xf8;
29417		break;
29418	case CDROM_XA_DATA_W_ERROR:
29419		buflen = CDROM_BLK_2646 * cdxa->cdxa_length;
29420		read_flags = 0xfc;
29421		break;
29422	default:
29423		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29424		    "sr_read_cdxa: Format '0x%x' Not Supported\n",
29425		    cdxa->cdxa_format);
29426		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29427		return (EINVAL);
29428	}
29429
29430	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
29431	bzero(cdb, CDB_GROUP5);
29432	if (un->un_f_mmc_cap == TRUE) {
29433		cdb[0] = (char)SCMD_READ_CD;
29434		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
29435		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
29436		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
29437		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
29438		cdb[6] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
29439		cdb[7] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
29440		cdb[8] = ((cdxa->cdxa_length) & 0x000000ff);
29441		cdb[9] = (char)read_flags;
29442	} else {
29443		/*
29444		 * Note: A vendor specific command (0xDB) is being used her to
29445		 * request a read of all subcodes.
29446		 */
29447		cdb[0] = (char)SCMD_READ_CDXA;
29448		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
29449		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
29450		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
29451		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
29452		cdb[6] = (((cdxa->cdxa_length) & 0xff000000) >> 24);
29453		cdb[7] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
29454		cdb[8] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
29455		cdb[9] = ((cdxa->cdxa_length) & 0x000000ff);
29456		cdb[10] = cdxa->cdxa_format;
29457	}
29458	com->uscsi_cdb	   = cdb;
29459	com->uscsi_cdblen  = CDB_GROUP5;
29460	com->uscsi_bufaddr = (caddr_t)cdxa->cdxa_data;
29461	com->uscsi_buflen  = buflen;
29462	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
29463	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
29464	    UIO_SYSSPACE, SD_PATH_STANDARD);
29465	kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29466	kmem_free(com, sizeof (*com));
29467	return (rval);
29468}
29469
29470
29471/*
29472 *    Function: sr_eject()
29473 *
29474 * Description: This routine is the driver entry point for handling CD-ROM
29475 *		eject ioctl requests (FDEJECT, DKIOCEJECT, CDROMEJECT)
29476 *
29477 *   Arguments: dev	- the device 'dev_t'
29478 *
29479 * Return Code: the code returned by sd_send_scsi_cmd()
29480 */
29481
29482static int
29483sr_eject(dev_t dev)
29484{
29485	struct sd_lun	*un;
29486	int		rval;
29487
29488	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
29489	    (un->un_state == SD_STATE_OFFLINE)) {
29490		return (ENXIO);
29491	}
29492	if ((rval = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_ALLOW,
29493	    SD_PATH_STANDARD)) != 0) {
29494		return (rval);
29495	}
29496
29497	rval = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_EJECT,
29498	    SD_PATH_STANDARD);
29499
29500	if (rval == 0) {
29501		mutex_enter(SD_MUTEX(un));
29502		sr_ejected(un);
29503		un->un_mediastate = DKIO_EJECTED;
29504		cv_broadcast(&un->un_state_cv);
29505		mutex_exit(SD_MUTEX(un));
29506	}
29507	return (rval);
29508}
29509
29510
29511/*
29512 *    Function: sr_ejected()
29513 *
29514 * Description: This routine updates the soft state structure to invalidate the
29515 *		geometry information after the media has been ejected or a
29516 *		media eject has been detected.
29517 *
29518 *   Arguments: un - driver soft state (unit) structure
29519 */
29520
29521static void
29522sr_ejected(struct sd_lun *un)
29523{
29524	struct sd_errstats *stp;
29525
29526	ASSERT(un != NULL);
29527	ASSERT(mutex_owned(SD_MUTEX(un)));
29528
29529	un->un_f_blockcount_is_valid	= FALSE;
29530	un->un_f_tgt_blocksize_is_valid	= FALSE;
29531	un->un_f_geometry_is_valid	= FALSE;
29532
29533	if (un->un_errstats != NULL) {
29534		stp = (struct sd_errstats *)un->un_errstats->ks_data;
29535		stp->sd_capacity.value.ui64 = 0;
29536	}
29537}
29538
29539
29540/*
29541 *    Function: sr_check_wp()
29542 *
29543 * Description: This routine checks the write protection of a removable
29544 *      media disk and hotpluggable devices via the write protect bit of
29545 *      the Mode Page Header device specific field. Some devices choke
29546 *      on unsupported mode page. In order to workaround this issue,
29547 *      this routine has been implemented to use 0x3f mode page(request
29548 *      for all pages) for all device types.
29549 *
29550 *   Arguments: dev		- the device 'dev_t'
29551 *
29552 * Return Code: int indicating if the device is write protected (1) or not (0)
29553 *
29554 *     Context: Kernel thread.
29555 *
29556 */
29557
29558static int
29559sr_check_wp(dev_t dev)
29560{
29561	struct sd_lun	*un;
29562	uchar_t		device_specific;
29563	uchar_t		*sense;
29564	int		hdrlen;
29565	int		rval = FALSE;
29566
29567	/*
29568	 * Note: The return codes for this routine should be reworked to
29569	 * properly handle the case of a NULL softstate.
29570	 */
29571	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
29572		return (FALSE);
29573	}
29574
29575	if (un->un_f_cfg_is_atapi == TRUE) {
29576		/*
29577		 * The mode page contents are not required; set the allocation
29578		 * length for the mode page header only
29579		 */
29580		hdrlen = MODE_HEADER_LENGTH_GRP2;
29581		sense = kmem_zalloc(hdrlen, KM_SLEEP);
29582		if (sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense, hdrlen,
29583		    MODEPAGE_ALLPAGES, SD_PATH_STANDARD) != 0)
29584			goto err_exit;
29585		device_specific =
29586		    ((struct mode_header_grp2 *)sense)->device_specific;
29587	} else {
29588		hdrlen = MODE_HEADER_LENGTH;
29589		sense = kmem_zalloc(hdrlen, KM_SLEEP);
29590		if (sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense, hdrlen,
29591		    MODEPAGE_ALLPAGES, SD_PATH_STANDARD) != 0)
29592			goto err_exit;
29593		device_specific =
29594		    ((struct mode_header *)sense)->device_specific;
29595	}
29596
29597	/*
29598	 * Write protect mode sense failed; not all disks
29599	 * understand this query. Return FALSE assuming that
29600	 * these devices are not writable.
29601	 */
29602	if (device_specific & WRITE_PROTECT) {
29603		rval = TRUE;
29604	}
29605
29606err_exit:
29607	kmem_free(sense, hdrlen);
29608	return (rval);
29609}
29610
29611/*
29612 *    Function: sr_volume_ctrl()
29613 *
29614 * Description: This routine is the driver entry point for handling CD-ROM
29615 *		audio output volume ioctl requests. (CDROMVOLCTRL)
29616 *
29617 *   Arguments: dev	- the device 'dev_t'
29618 *		data	- pointer to user audio volume control structure
29619 *		flag	- this argument is a pass through to ddi_copyxxx()
29620 *			  directly from the mode argument of ioctl().
29621 *
29622 * Return Code: the code returned by sd_send_scsi_cmd()
29623 *		EFAULT if ddi_copyxxx() fails
29624 *		ENXIO if fail ddi_get_soft_state
29625 *		EINVAL if data pointer is NULL
29626 *
29627 */
29628
29629static int
29630sr_volume_ctrl(dev_t dev, caddr_t data, int flag)
29631{
29632	struct sd_lun		*un;
29633	struct cdrom_volctrl    volume;
29634	struct cdrom_volctrl    *vol = &volume;
29635	uchar_t			*sense_page;
29636	uchar_t			*select_page;
29637	uchar_t			*sense;
29638	uchar_t			*select;
29639	int			sense_buflen;
29640	int			select_buflen;
29641	int			rval;
29642
29643	if (data == NULL) {
29644		return (EINVAL);
29645	}
29646
29647	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
29648	    (un->un_state == SD_STATE_OFFLINE)) {
29649		return (ENXIO);
29650	}
29651
29652	if (ddi_copyin(data, vol, sizeof (struct cdrom_volctrl), flag)) {
29653		return (EFAULT);
29654	}
29655
29656	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
29657		struct mode_header_grp2		*sense_mhp;
29658		struct mode_header_grp2		*select_mhp;
29659		int				bd_len;
29660
29661		sense_buflen = MODE_PARAM_LENGTH_GRP2 + MODEPAGE_AUDIO_CTRL_LEN;
29662		select_buflen = MODE_HEADER_LENGTH_GRP2 +
29663		    MODEPAGE_AUDIO_CTRL_LEN;
29664		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
29665		select = kmem_zalloc(select_buflen, KM_SLEEP);
29666		if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense,
29667		    sense_buflen, MODEPAGE_AUDIO_CTRL,
29668		    SD_PATH_STANDARD)) != 0) {
29669			SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
29670			    "sr_volume_ctrl: Mode Sense Failed\n");
29671			kmem_free(sense, sense_buflen);
29672			kmem_free(select, select_buflen);
29673			return (rval);
29674		}
29675		sense_mhp = (struct mode_header_grp2 *)sense;
29676		select_mhp = (struct mode_header_grp2 *)select;
29677		bd_len = (sense_mhp->bdesc_length_hi << 8) |
29678		    sense_mhp->bdesc_length_lo;
29679		if (bd_len > MODE_BLK_DESC_LENGTH) {
29680			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29681			    "sr_volume_ctrl: Mode Sense returned invalid "
29682			    "block descriptor length\n");
29683			kmem_free(sense, sense_buflen);
29684			kmem_free(select, select_buflen);
29685			return (EIO);
29686		}
29687		sense_page = (uchar_t *)
29688		    (sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
29689		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH_GRP2);
29690		select_mhp->length_msb = 0;
29691		select_mhp->length_lsb = 0;
29692		select_mhp->bdesc_length_hi = 0;
29693		select_mhp->bdesc_length_lo = 0;
29694	} else {
29695		struct mode_header		*sense_mhp, *select_mhp;
29696
29697		sense_buflen = MODE_PARAM_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
29698		select_buflen = MODE_HEADER_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
29699		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
29700		select = kmem_zalloc(select_buflen, KM_SLEEP);
29701		if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
29702		    sense_buflen, MODEPAGE_AUDIO_CTRL,
29703		    SD_PATH_STANDARD)) != 0) {
29704			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29705			    "sr_volume_ctrl: Mode Sense Failed\n");
29706			kmem_free(sense, sense_buflen);
29707			kmem_free(select, select_buflen);
29708			return (rval);
29709		}
29710		sense_mhp  = (struct mode_header *)sense;
29711		select_mhp = (struct mode_header *)select;
29712		if (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH) {
29713			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29714			    "sr_volume_ctrl: Mode Sense returned invalid "
29715			    "block descriptor length\n");
29716			kmem_free(sense, sense_buflen);
29717			kmem_free(select, select_buflen);
29718			return (EIO);
29719		}
29720		sense_page = (uchar_t *)
29721		    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
29722		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH);
29723		select_mhp->length = 0;
29724		select_mhp->bdesc_length = 0;
29725	}
29726	/*
29727	 * Note: An audio control data structure could be created and overlayed
29728	 * on the following in place of the array indexing method implemented.
29729	 */
29730
29731	/* Build the select data for the user volume data */
29732	select_page[0] = MODEPAGE_AUDIO_CTRL;
29733	select_page[1] = 0xE;
29734	/* Set the immediate bit */
29735	select_page[2] = 0x04;
29736	/* Zero out reserved fields */
29737	select_page[3] = 0x00;
29738	select_page[4] = 0x00;
29739	/* Return sense data for fields not to be modified */
29740	select_page[5] = sense_page[5];
29741	select_page[6] = sense_page[6];
29742	select_page[7] = sense_page[7];
29743	/* Set the user specified volume levels for channel 0 and 1 */
29744	select_page[8] = 0x01;
29745	select_page[9] = vol->channel0;
29746	select_page[10] = 0x02;
29747	select_page[11] = vol->channel1;
29748	/* Channel 2 and 3 are currently unsupported so return the sense data */
29749	select_page[12] = sense_page[12];
29750	select_page[13] = sense_page[13];
29751	select_page[14] = sense_page[14];
29752	select_page[15] = sense_page[15];
29753
29754	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
29755		rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP1, select,
29756		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
29757	} else {
29758		rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
29759		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
29760	}
29761
29762	kmem_free(sense, sense_buflen);
29763	kmem_free(select, select_buflen);
29764	return (rval);
29765}
29766
29767
29768/*
29769 *    Function: sr_read_sony_session_offset()
29770 *
29771 * Description: This routine is the driver entry point for handling CD-ROM
29772 *		ioctl requests for session offset information. (CDROMREADOFFSET)
29773 *		The address of the first track in the last session of a
29774 *		multi-session CD-ROM is returned
29775 *
29776 *		Note: This routine uses a vendor specific key value in the
29777 *		command control field without implementing any vendor check here
29778 *		or in the ioctl routine.
29779 *
29780 *   Arguments: dev	- the device 'dev_t'
29781 *		data	- pointer to an int to hold the requested address
29782 *		flag	- this argument is a pass through to ddi_copyxxx()
29783 *			  directly from the mode argument of ioctl().
29784 *
29785 * Return Code: the code returned by sd_send_scsi_cmd()
29786 *		EFAULT if ddi_copyxxx() fails
29787 *		ENXIO if fail ddi_get_soft_state
29788 *		EINVAL if data pointer is NULL
29789 */
29790
29791static int
29792sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag)
29793{
29794	struct sd_lun		*un;
29795	struct uscsi_cmd	*com;
29796	caddr_t			buffer;
29797	char			cdb[CDB_GROUP1];
29798	int			session_offset = 0;
29799	int			rval;
29800
29801	if (data == NULL) {
29802		return (EINVAL);
29803	}
29804
29805	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
29806	    (un->un_state == SD_STATE_OFFLINE)) {
29807		return (ENXIO);
29808	}
29809
29810	buffer = kmem_zalloc((size_t)SONY_SESSION_OFFSET_LEN, KM_SLEEP);
29811	bzero(cdb, CDB_GROUP1);
29812	cdb[0] = SCMD_READ_TOC;
29813	/*
29814	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
29815	 * (4 byte TOC response header + 8 byte response data)
29816	 */
29817	cdb[8] = SONY_SESSION_OFFSET_LEN;
29818	/* Byte 9 is the control byte. A vendor specific value is used */
29819	cdb[9] = SONY_SESSION_OFFSET_KEY;
29820	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
29821	com->uscsi_cdb = cdb;
29822	com->uscsi_cdblen = CDB_GROUP1;
29823	com->uscsi_bufaddr = buffer;
29824	com->uscsi_buflen = SONY_SESSION_OFFSET_LEN;
29825	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
29826
29827	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
29828	    UIO_SYSSPACE, SD_PATH_STANDARD);
29829	if (rval != 0) {
29830		kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
29831		kmem_free(com, sizeof (*com));
29832		return (rval);
29833	}
29834	if (buffer[1] == SONY_SESSION_OFFSET_VALID) {
29835		session_offset =
29836		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
29837		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
29838		/*
29839		 * Offset returned offset in current lbasize block's. Convert to
29840		 * 2k block's to return to the user
29841		 */
29842		if (un->un_tgt_blocksize == CDROM_BLK_512) {
29843			session_offset >>= 2;
29844		} else if (un->un_tgt_blocksize == CDROM_BLK_1024) {
29845			session_offset >>= 1;
29846		}
29847	}
29848
29849	if (ddi_copyout(&session_offset, data, sizeof (int), flag) != 0) {
29850		rval = EFAULT;
29851	}
29852
29853	kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
29854	kmem_free(com, sizeof (*com));
29855	return (rval);
29856}
29857
29858
29859/*
29860 *    Function: sd_wm_cache_constructor()
29861 *
29862 * Description: Cache Constructor for the wmap cache for the read/modify/write
29863 * 		devices.
29864 *
29865 *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
29866 *		un	- sd_lun structure for the device.
29867 *		flag	- the km flags passed to constructor
29868 *
29869 * Return Code: 0 on success.
29870 *		-1 on failure.
29871 */
29872
29873/*ARGSUSED*/
29874static int
29875sd_wm_cache_constructor(void *wm, void *un, int flags)
29876{
29877	bzero(wm, sizeof (struct sd_w_map));
29878	cv_init(&((struct sd_w_map *)wm)->wm_avail, NULL, CV_DRIVER, NULL);
29879	return (0);
29880}
29881
29882
29883/*
29884 *    Function: sd_wm_cache_destructor()
29885 *
29886 * Description: Cache destructor for the wmap cache for the read/modify/write
29887 * 		devices.
29888 *
29889 *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
29890 *		un	- sd_lun structure for the device.
29891 */
29892/*ARGSUSED*/
29893static void
29894sd_wm_cache_destructor(void *wm, void *un)
29895{
29896	cv_destroy(&((struct sd_w_map *)wm)->wm_avail);
29897}
29898
29899
29900/*
29901 *    Function: sd_range_lock()
29902 *
29903 * Description: Lock the range of blocks specified as parameter to ensure
29904 *		that read, modify write is atomic and no other i/o writes
29905 *		to the same location. The range is specified in terms
29906 *		of start and end blocks. Block numbers are the actual
29907 *		media block numbers and not system.
29908 *
29909 *   Arguments: un	- sd_lun structure for the device.
29910 *		startb - The starting block number
29911 *		endb - The end block number
29912 *		typ - type of i/o - simple/read_modify_write
29913 *
29914 * Return Code: wm  - pointer to the wmap structure.
29915 *
29916 *     Context: This routine can sleep.
29917 */
29918
29919static struct sd_w_map *
29920sd_range_lock(struct sd_lun *un, daddr_t startb, daddr_t endb, ushort_t typ)
29921{
29922	struct sd_w_map *wmp = NULL;
29923	struct sd_w_map *sl_wmp = NULL;
29924	struct sd_w_map *tmp_wmp;
29925	wm_state state = SD_WM_CHK_LIST;
29926
29927
29928	ASSERT(un != NULL);
29929	ASSERT(!mutex_owned(SD_MUTEX(un)));
29930
29931	mutex_enter(SD_MUTEX(un));
29932
29933	while (state != SD_WM_DONE) {
29934
29935		switch (state) {
29936		case SD_WM_CHK_LIST:
29937			/*
29938			 * This is the starting state. Check the wmap list
29939			 * to see if the range is currently available.
29940			 */
29941			if (!(typ & SD_WTYPE_RMW) && !(un->un_rmw_count)) {
29942				/*
29943				 * If this is a simple write and no rmw
29944				 * i/o is pending then try to lock the
29945				 * range as the range should be available.
29946				 */
29947				state = SD_WM_LOCK_RANGE;
29948			} else {
29949				tmp_wmp = sd_get_range(un, startb, endb);
29950				if (tmp_wmp != NULL) {
29951					if ((wmp != NULL) && ONLIST(un, wmp)) {
29952						/*
29953						 * Should not keep onlist wmps
29954						 * while waiting this macro
29955						 * will also do wmp = NULL;
29956						 */
29957						FREE_ONLIST_WMAP(un, wmp);
29958					}
29959					/*
29960					 * sl_wmp is the wmap on which wait
29961					 * is done, since the tmp_wmp points
29962					 * to the inuse wmap, set sl_wmp to
29963					 * tmp_wmp and change the state to sleep
29964					 */
29965					sl_wmp = tmp_wmp;
29966					state = SD_WM_WAIT_MAP;
29967				} else {
29968					state = SD_WM_LOCK_RANGE;
29969				}
29970
29971			}
29972			break;
29973
29974		case SD_WM_LOCK_RANGE:
29975			ASSERT(un->un_wm_cache);
29976			/*
29977			 * The range need to be locked, try to get a wmap.
29978			 * First attempt it with NO_SLEEP, want to avoid a sleep
29979			 * if possible as we will have to release the sd mutex
29980			 * if we have to sleep.
29981			 */
29982			if (wmp == NULL)
29983				wmp = kmem_cache_alloc(un->un_wm_cache,
29984				    KM_NOSLEEP);
29985			if (wmp == NULL) {
29986				mutex_exit(SD_MUTEX(un));
29987				_NOTE(DATA_READABLE_WITHOUT_LOCK
29988				    (sd_lun::un_wm_cache))
29989				wmp = kmem_cache_alloc(un->un_wm_cache,
29990				    KM_SLEEP);
29991				mutex_enter(SD_MUTEX(un));
29992				/*
29993				 * we released the mutex so recheck and go to
29994				 * check list state.
29995				 */
29996				state = SD_WM_CHK_LIST;
29997			} else {
29998				/*
29999				 * We exit out of state machine since we
30000				 * have the wmap. Do the housekeeping first.
30001				 * place the wmap on the wmap list if it is not
30002				 * on it already and then set the state to done.
30003				 */
30004				wmp->wm_start = startb;
30005				wmp->wm_end = endb;
30006				wmp->wm_flags = typ | SD_WM_BUSY;
30007				if (typ & SD_WTYPE_RMW) {
30008					un->un_rmw_count++;
30009				}
30010				/*
30011				 * If not already on the list then link
30012				 */
30013				if (!ONLIST(un, wmp)) {
30014					wmp->wm_next = un->un_wm;
30015					wmp->wm_prev = NULL;
30016					if (wmp->wm_next)
30017						wmp->wm_next->wm_prev = wmp;
30018					un->un_wm = wmp;
30019				}
30020				state = SD_WM_DONE;
30021			}
30022			break;
30023
30024		case SD_WM_WAIT_MAP:
30025			ASSERT(sl_wmp->wm_flags & SD_WM_BUSY);
30026			/*
30027			 * Wait is done on sl_wmp, which is set in the
30028			 * check_list state.
30029			 */
30030			sl_wmp->wm_wanted_count++;
30031			cv_wait(&sl_wmp->wm_avail, SD_MUTEX(un));
30032			sl_wmp->wm_wanted_count--;
30033			/*
30034			 * We can reuse the memory from the completed sl_wmp
30035			 * lock range for our new lock, but only if noone is
30036			 * waiting for it.
30037			 */
30038			ASSERT(!(sl_wmp->wm_flags & SD_WM_BUSY));
30039			if (sl_wmp->wm_wanted_count == 0) {
30040				if (wmp != NULL)
30041					CHK_N_FREEWMP(un, wmp);
30042				wmp = sl_wmp;
30043			}
30044			sl_wmp = NULL;
30045			/*
30046			 * After waking up, need to recheck for availability of
30047			 * range.
30048			 */
30049			state = SD_WM_CHK_LIST;
30050			break;
30051
30052		default:
30053			panic("sd_range_lock: "
30054			    "Unknown state %d in sd_range_lock", state);
30055			/*NOTREACHED*/
30056		} /* switch(state) */
30057
30058	} /* while(state != SD_WM_DONE) */
30059
30060	mutex_exit(SD_MUTEX(un));
30061
30062	ASSERT(wmp != NULL);
30063
30064	return (wmp);
30065}
30066
30067
30068/*
30069 *    Function: sd_get_range()
30070 *
30071 * Description: Find if there any overlapping I/O to this one
30072 *		Returns the write-map of 1st such I/O, NULL otherwise.
30073 *
30074 *   Arguments: un	- sd_lun structure for the device.
30075 *		startb - The starting block number
30076 *		endb - The end block number
30077 *
30078 * Return Code: wm  - pointer to the wmap structure.
30079 */
30080
30081static struct sd_w_map *
30082sd_get_range(struct sd_lun *un, daddr_t startb, daddr_t endb)
30083{
30084	struct sd_w_map *wmp;
30085
30086	ASSERT(un != NULL);
30087
30088	for (wmp = un->un_wm; wmp != NULL; wmp = wmp->wm_next) {
30089		if (!(wmp->wm_flags & SD_WM_BUSY)) {
30090			continue;
30091		}
30092		if ((startb >= wmp->wm_start) && (startb <= wmp->wm_end)) {
30093			break;
30094		}
30095		if ((endb >= wmp->wm_start) && (endb <= wmp->wm_end)) {
30096			break;
30097		}
30098	}
30099
30100	return (wmp);
30101}
30102
30103
30104/*
30105 *    Function: sd_free_inlist_wmap()
30106 *
30107 * Description: Unlink and free a write map struct.
30108 *
30109 *   Arguments: un      - sd_lun structure for the device.
30110 *		wmp	- sd_w_map which needs to be unlinked.
30111 */
30112
30113static void
30114sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp)
30115{
30116	ASSERT(un != NULL);
30117
30118	if (un->un_wm == wmp) {
30119		un->un_wm = wmp->wm_next;
30120	} else {
30121		wmp->wm_prev->wm_next = wmp->wm_next;
30122	}
30123
30124	if (wmp->wm_next) {
30125		wmp->wm_next->wm_prev = wmp->wm_prev;
30126	}
30127
30128	wmp->wm_next = wmp->wm_prev = NULL;
30129
30130	kmem_cache_free(un->un_wm_cache, wmp);
30131}
30132
30133
30134/*
30135 *    Function: sd_range_unlock()
30136 *
30137 * Description: Unlock the range locked by wm.
30138 *		Free write map if nobody else is waiting on it.
30139 *
30140 *   Arguments: un      - sd_lun structure for the device.
30141 *              wmp     - sd_w_map which needs to be unlinked.
30142 */
30143
30144static void
30145sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm)
30146{
30147	ASSERT(un != NULL);
30148	ASSERT(wm != NULL);
30149	ASSERT(!mutex_owned(SD_MUTEX(un)));
30150
30151	mutex_enter(SD_MUTEX(un));
30152
30153	if (wm->wm_flags & SD_WTYPE_RMW) {
30154		un->un_rmw_count--;
30155	}
30156
30157	if (wm->wm_wanted_count) {
30158		wm->wm_flags = 0;
30159		/*
30160		 * Broadcast that the wmap is available now.
30161		 */
30162		cv_broadcast(&wm->wm_avail);
30163	} else {
30164		/*
30165		 * If no one is waiting on the map, it should be free'ed.
30166		 */
30167		sd_free_inlist_wmap(un, wm);
30168	}
30169
30170	mutex_exit(SD_MUTEX(un));
30171}
30172
30173
30174/*
30175 *    Function: sd_read_modify_write_task
30176 *
30177 * Description: Called from a taskq thread to initiate the write phase of
30178 *		a read-modify-write request.  This is used for targets where
30179 *		un->un_sys_blocksize != un->un_tgt_blocksize.
30180 *
30181 *   Arguments: arg - a pointer to the buf(9S) struct for the write command.
30182 *
30183 *     Context: Called under taskq thread context.
30184 */
30185
30186static void
30187sd_read_modify_write_task(void *arg)
30188{
30189	struct sd_mapblocksize_info	*bsp;
30190	struct buf	*bp;
30191	struct sd_xbuf	*xp;
30192	struct sd_lun	*un;
30193
30194	bp = arg;	/* The bp is given in arg */
30195	ASSERT(bp != NULL);
30196
30197	/* Get the pointer to the layer-private data struct */
30198	xp = SD_GET_XBUF(bp);
30199	ASSERT(xp != NULL);
30200	bsp = xp->xb_private;
30201	ASSERT(bsp != NULL);
30202
30203	un = SD_GET_UN(bp);
30204	ASSERT(un != NULL);
30205	ASSERT(!mutex_owned(SD_MUTEX(un)));
30206
30207	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
30208	    "sd_read_modify_write_task: entry: buf:0x%p\n", bp);
30209
30210	/*
30211	 * This is the write phase of a read-modify-write request, called
30212	 * under the context of a taskq thread in response to the completion
30213	 * of the read portion of the rmw request completing under interrupt
30214	 * context. The write request must be sent from here down the iostart
30215	 * chain as if it were being sent from sd_mapblocksize_iostart(), so
30216	 * we use the layer index saved in the layer-private data area.
30217	 */
30218	SD_NEXT_IOSTART(bsp->mbs_layer_index, un, bp);
30219
30220	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
30221	    "sd_read_modify_write_task: exit: buf:0x%p\n", bp);
30222}
30223
30224
30225/*
30226 *    Function: sddump_do_read_of_rmw()
30227 *
30228 * Description: This routine will be called from sddump, If sddump is called
30229 *		with an I/O which not aligned on device blocksize boundary
30230 *		then the write has to be converted to read-modify-write.
30231 *		Do the read part here in order to keep sddump simple.
30232 *		Note - That the sd_mutex is held across the call to this
30233 *		routine.
30234 *
30235 *   Arguments: un	- sd_lun
30236 *		blkno	- block number in terms of media block size.
30237 *		nblk	- number of blocks.
30238 *		bpp	- pointer to pointer to the buf structure. On return
30239 *			from this function, *bpp points to the valid buffer
30240 *			to which the write has to be done.
30241 *
30242 * Return Code: 0 for success or errno-type return code
30243 */
30244
30245static int
30246sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
30247	struct buf **bpp)
30248{
30249	int err;
30250	int i;
30251	int rval;
30252	struct buf *bp;
30253	struct scsi_pkt *pkt = NULL;
30254	uint32_t target_blocksize;
30255
30256	ASSERT(un != NULL);
30257	ASSERT(mutex_owned(SD_MUTEX(un)));
30258
30259	target_blocksize = un->un_tgt_blocksize;
30260
30261	mutex_exit(SD_MUTEX(un));
30262
30263	bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), (struct buf *)NULL,
30264	    (size_t)(nblk * target_blocksize), B_READ, NULL_FUNC, NULL);
30265	if (bp == NULL) {
30266		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
30267		    "no resources for dumping; giving up");
30268		err = ENOMEM;
30269		goto done;
30270	}
30271
30272	rval = sd_setup_rw_pkt(un, &pkt, bp, 0, NULL_FUNC, NULL,
30273	    blkno, nblk);
30274	if (rval != 0) {
30275		scsi_free_consistent_buf(bp);
30276		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
30277		    "no resources for dumping; giving up");
30278		err = ENOMEM;
30279		goto done;
30280	}
30281
30282	pkt->pkt_flags |= FLAG_NOINTR;
30283
30284	err = EIO;
30285	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
30286
30287		/*
30288		 * Scsi_poll returns 0 (success) if the command completes and
30289		 * the status block is STATUS_GOOD.  We should only check
30290		 * errors if this condition is not true.  Even then we should
30291		 * send our own request sense packet only if we have a check
30292		 * condition and auto request sense has not been performed by
30293		 * the hba.
30294		 */
30295		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending read\n");
30296
30297		if ((sd_scsi_poll(un, pkt) == 0) && (pkt->pkt_resid == 0)) {
30298			err = 0;
30299			break;
30300		}
30301
30302		/*
30303		 * Check CMD_DEV_GONE 1st, give up if device is gone,
30304		 * no need to read RQS data.
30305		 */
30306		if (pkt->pkt_reason == CMD_DEV_GONE) {
30307			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
30308			    "Device is gone\n");
30309			break;
30310		}
30311
30312		if (SD_GET_PKT_STATUS(pkt) == STATUS_CHECK) {
30313			SD_INFO(SD_LOG_DUMP, un,
30314			    "sddump: read failed with CHECK, try # %d\n", i);
30315			if (((pkt->pkt_state & STATE_ARQ_DONE) == 0)) {
30316				(void) sd_send_polled_RQS(un);
30317			}
30318
30319			continue;
30320		}
30321
30322		if (SD_GET_PKT_STATUS(pkt) == STATUS_BUSY) {
30323			int reset_retval = 0;
30324
30325			SD_INFO(SD_LOG_DUMP, un,
30326			    "sddump: read failed with BUSY, try # %d\n", i);
30327
30328			if (un->un_f_lun_reset_enabled == TRUE) {
30329				reset_retval = scsi_reset(SD_ADDRESS(un),
30330				    RESET_LUN);
30331			}
30332			if (reset_retval == 0) {
30333				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
30334			}
30335			(void) sd_send_polled_RQS(un);
30336
30337		} else {
30338			SD_INFO(SD_LOG_DUMP, un,
30339			    "sddump: read failed with 0x%x, try # %d\n",
30340			    SD_GET_PKT_STATUS(pkt), i);
30341			mutex_enter(SD_MUTEX(un));
30342			sd_reset_target(un, pkt);
30343			mutex_exit(SD_MUTEX(un));
30344		}
30345
30346		/*
30347		 * If we are not getting anywhere with lun/target resets,
30348		 * let's reset the bus.
30349		 */
30350		if (i > SD_NDUMP_RETRIES/2) {
30351			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
30352			(void) sd_send_polled_RQS(un);
30353		}
30354
30355	}
30356	scsi_destroy_pkt(pkt);
30357
30358	if (err != 0) {
30359		scsi_free_consistent_buf(bp);
30360		*bpp = NULL;
30361	} else {
30362		*bpp = bp;
30363	}
30364
30365done:
30366	mutex_enter(SD_MUTEX(un));
30367	return (err);
30368}
30369
30370
30371/*
30372 *    Function: sd_failfast_flushq
30373 *
30374 * Description: Take all bp's on the wait queue that have B_FAILFAST set
30375 *		in b_flags and move them onto the failfast queue, then kick
30376 *		off a thread to return all bp's on the failfast queue to
30377 *		their owners with an error set.
30378 *
30379 *   Arguments: un - pointer to the soft state struct for the instance.
30380 *
30381 *     Context: may execute in interrupt context.
30382 */
30383
30384static void
30385sd_failfast_flushq(struct sd_lun *un)
30386{
30387	struct buf *bp;
30388	struct buf *next_waitq_bp;
30389	struct buf *prev_waitq_bp = NULL;
30390
30391	ASSERT(un != NULL);
30392	ASSERT(mutex_owned(SD_MUTEX(un)));
30393	ASSERT(un->un_failfast_state == SD_FAILFAST_ACTIVE);
30394	ASSERT(un->un_failfast_bp == NULL);
30395
30396	SD_TRACE(SD_LOG_IO_FAILFAST, un,
30397	    "sd_failfast_flushq: entry: un:0x%p\n", un);
30398
30399	/*
30400	 * Check if we should flush all bufs when entering failfast state, or
30401	 * just those with B_FAILFAST set.
30402	 */
30403	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) {
30404		/*
30405		 * Move *all* bp's on the wait queue to the failfast flush
30406		 * queue, including those that do NOT have B_FAILFAST set.
30407		 */
30408		if (un->un_failfast_headp == NULL) {
30409			ASSERT(un->un_failfast_tailp == NULL);
30410			un->un_failfast_headp = un->un_waitq_headp;
30411		} else {
30412			ASSERT(un->un_failfast_tailp != NULL);
30413			un->un_failfast_tailp->av_forw = un->un_waitq_headp;
30414		}
30415
30416		un->un_failfast_tailp = un->un_waitq_tailp;
30417
30418		/* update kstat for each bp moved out of the waitq */
30419		for (bp = un->un_waitq_headp; bp != NULL; bp = bp->av_forw) {
30420			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
30421		}
30422
30423		/* empty the waitq */
30424		un->un_waitq_headp = un->un_waitq_tailp = NULL;
30425
30426	} else {
30427		/*
30428		 * Go thru the wait queue, pick off all entries with
30429		 * B_FAILFAST set, and move these onto the failfast queue.
30430		 */
30431		for (bp = un->un_waitq_headp; bp != NULL; bp = next_waitq_bp) {
30432			/*
30433			 * Save the pointer to the next bp on the wait queue,
30434			 * so we get to it on the next iteration of this loop.
30435			 */
30436			next_waitq_bp = bp->av_forw;
30437
30438			/*
30439			 * If this bp from the wait queue does NOT have
30440			 * B_FAILFAST set, just move on to the next element
30441			 * in the wait queue. Note, this is the only place
30442			 * where it is correct to set prev_waitq_bp.
30443			 */
30444			if ((bp->b_flags & B_FAILFAST) == 0) {
30445				prev_waitq_bp = bp;
30446				continue;
30447			}
30448
30449			/*
30450			 * Remove the bp from the wait queue.
30451			 */
30452			if (bp == un->un_waitq_headp) {
30453				/* The bp is the first element of the waitq. */
30454				un->un_waitq_headp = next_waitq_bp;
30455				if (un->un_waitq_headp == NULL) {
30456					/* The wait queue is now empty */
30457					un->un_waitq_tailp = NULL;
30458				}
30459			} else {
30460				/*
30461				 * The bp is either somewhere in the middle
30462				 * or at the end of the wait queue.
30463				 */
30464				ASSERT(un->un_waitq_headp != NULL);
30465				ASSERT(prev_waitq_bp != NULL);
30466				ASSERT((prev_waitq_bp->b_flags & B_FAILFAST)
30467				    == 0);
30468				if (bp == un->un_waitq_tailp) {
30469					/* bp is the last entry on the waitq. */
30470					ASSERT(next_waitq_bp == NULL);
30471					un->un_waitq_tailp = prev_waitq_bp;
30472				}
30473				prev_waitq_bp->av_forw = next_waitq_bp;
30474			}
30475			bp->av_forw = NULL;
30476
30477			/*
30478			 * update kstat since the bp is moved out of
30479			 * the waitq
30480			 */
30481			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
30482
30483			/*
30484			 * Now put the bp onto the failfast queue.
30485			 */
30486			if (un->un_failfast_headp == NULL) {
30487				/* failfast queue is currently empty */
30488				ASSERT(un->un_failfast_tailp == NULL);
30489				un->un_failfast_headp =
30490				    un->un_failfast_tailp = bp;
30491			} else {
30492				/* Add the bp to the end of the failfast q */
30493				ASSERT(un->un_failfast_tailp != NULL);
30494				ASSERT(un->un_failfast_tailp->b_flags &
30495				    B_FAILFAST);
30496				un->un_failfast_tailp->av_forw = bp;
30497				un->un_failfast_tailp = bp;
30498			}
30499		}
30500	}
30501
30502	/*
30503	 * Now return all bp's on the failfast queue to their owners.
30504	 */
30505	while ((bp = un->un_failfast_headp) != NULL) {
30506
30507		un->un_failfast_headp = bp->av_forw;
30508		if (un->un_failfast_headp == NULL) {
30509			un->un_failfast_tailp = NULL;
30510		}
30511
30512		/*
30513		 * We want to return the bp with a failure error code, but
30514		 * we do not want a call to sd_start_cmds() to occur here,
30515		 * so use sd_return_failed_command_no_restart() instead of
30516		 * sd_return_failed_command().
30517		 */
30518		sd_return_failed_command_no_restart(un, bp, EIO);
30519	}
30520
30521	/* Flush the xbuf queues if required. */
30522	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_QUEUES) {
30523		ddi_xbuf_flushq(un->un_xbuf_attr, sd_failfast_flushq_callback);
30524	}
30525
30526	SD_TRACE(SD_LOG_IO_FAILFAST, un,
30527	    "sd_failfast_flushq: exit: un:0x%p\n", un);
30528}
30529
30530
30531/*
30532 *    Function: sd_failfast_flushq_callback
30533 *
30534 * Description: Return TRUE if the given bp meets the criteria for failfast
30535 *		flushing. Used with ddi_xbuf_flushq(9F).
30536 *
30537 *   Arguments: bp - ptr to buf struct to be examined.
30538 *
30539 *     Context: Any
30540 */
30541
30542static int
30543sd_failfast_flushq_callback(struct buf *bp)
30544{
30545	/*
30546	 * Return TRUE if (1) we want to flush ALL bufs when the failfast
30547	 * state is entered; OR (2) the given bp has B_FAILFAST set.
30548	 */
30549	return (((sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) ||
30550	    (bp->b_flags & B_FAILFAST)) ? TRUE : FALSE);
30551}
30552
30553
30554
30555#if defined(__i386) || defined(__amd64)
30556/*
30557 * Function: sd_setup_next_xfer
30558 *
30559 * Description: Prepare next I/O operation using DMA_PARTIAL
30560 *
30561 */
30562
30563static int
30564sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
30565    struct scsi_pkt *pkt, struct sd_xbuf *xp)
30566{
30567	ssize_t	num_blks_not_xfered;
30568	daddr_t	strt_blk_num;
30569	ssize_t	bytes_not_xfered;
30570	int	rval;
30571
30572	ASSERT(pkt->pkt_resid == 0);
30573
30574	/*
30575	 * Calculate next block number and amount to be transferred.
30576	 *
30577	 * How much data NOT transfered to the HBA yet.
30578	 */
30579	bytes_not_xfered = xp->xb_dma_resid;
30580
30581	/*
30582	 * figure how many blocks NOT transfered to the HBA yet.
30583	 */
30584	num_blks_not_xfered = SD_BYTES2TGTBLOCKS(un, bytes_not_xfered);
30585
30586	/*
30587	 * set starting block number to the end of what WAS transfered.
30588	 */
30589	strt_blk_num = xp->xb_blkno +
30590	    SD_BYTES2TGTBLOCKS(un, bp->b_bcount - bytes_not_xfered);
30591
30592	/*
30593	 * Move pkt to the next portion of the xfer.  sd_setup_next_rw_pkt
30594	 * will call scsi_initpkt with NULL_FUNC so we do not have to release
30595	 * the disk mutex here.
30596	 */
30597	rval = sd_setup_next_rw_pkt(un, pkt, bp,
30598	    strt_blk_num, num_blks_not_xfered);
30599
30600	if (rval == 0) {
30601
30602		/*
30603		 * Success.
30604		 *
30605		 * Adjust things if there are still more blocks to be
30606		 * transfered.
30607		 */
30608		xp->xb_dma_resid = pkt->pkt_resid;
30609		pkt->pkt_resid = 0;
30610
30611		return (1);
30612	}
30613
30614	/*
30615	 * There's really only one possible return value from
30616	 * sd_setup_next_rw_pkt which occurs when scsi_init_pkt
30617	 * returns NULL.
30618	 */
30619	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
30620
30621	bp->b_resid = bp->b_bcount;
30622	bp->b_flags |= B_ERROR;
30623
30624	scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
30625	    "Error setting up next portion of DMA transfer\n");
30626
30627	return (0);
30628}
30629#endif
30630
30631/*
30632 *    Function: sd_panic_for_res_conflict
30633 *
30634 * Description: Call panic with a string formated with "Reservation Conflict"
30635 *		and a human readable identifier indicating the SD instance
30636 *		that experienced the reservation conflict.
30637 *
30638 *   Arguments: un - pointer to the soft state struct for the instance.
30639 *
30640 *     Context: may execute in interrupt context.
30641 */
30642
30643#define	SD_RESV_CONFLICT_FMT_LEN 40
30644void
30645sd_panic_for_res_conflict(struct sd_lun *un)
30646{
30647	char panic_str[SD_RESV_CONFLICT_FMT_LEN+MAXPATHLEN];
30648	char path_str[MAXPATHLEN];
30649
30650	(void) snprintf(panic_str, sizeof (panic_str),
30651	    "Reservation Conflict\nDisk: %s",
30652	    ddi_pathname(SD_DEVINFO(un), path_str));
30653
30654	panic(panic_str);
30655}
30656
30657/*
30658 * Note: The following sd_faultinjection_ioctl( ) routines implement
30659 * driver support for handling fault injection for error analysis
30660 * causing faults in multiple layers of the driver.
30661 *
30662 */
30663
30664#ifdef SD_FAULT_INJECTION
30665static uint_t   sd_fault_injection_on = 0;
30666
30667/*
30668 *    Function: sd_faultinjection_ioctl()
30669 *
30670 * Description: This routine is the driver entry point for handling
30671 *              faultinjection ioctls to inject errors into the
30672 *              layer model
30673 *
30674 *   Arguments: cmd	- the ioctl cmd recieved
30675 *		arg	- the arguments from user and returns
30676 */
30677
30678static void
30679sd_faultinjection_ioctl(int cmd, intptr_t arg,  struct sd_lun *un) {
30680
30681	uint_t i;
30682	uint_t rval;
30683
30684	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl: entry\n");
30685
30686	mutex_enter(SD_MUTEX(un));
30687
30688	switch (cmd) {
30689	case SDIOCRUN:
30690		/* Allow pushed faults to be injected */
30691		SD_INFO(SD_LOG_SDTEST, un,
30692		    "sd_faultinjection_ioctl: Injecting Fault Run\n");
30693
30694		sd_fault_injection_on = 1;
30695
30696		SD_INFO(SD_LOG_IOERR, un,
30697		    "sd_faultinjection_ioctl: run finished\n");
30698		break;
30699
30700	case SDIOCSTART:
30701		/* Start Injection Session */
30702		SD_INFO(SD_LOG_SDTEST, un,
30703		    "sd_faultinjection_ioctl: Injecting Fault Start\n");
30704
30705		sd_fault_injection_on = 0;
30706		un->sd_injection_mask = 0xFFFFFFFF;
30707		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
30708			un->sd_fi_fifo_pkt[i] = NULL;
30709			un->sd_fi_fifo_xb[i] = NULL;
30710			un->sd_fi_fifo_un[i] = NULL;
30711			un->sd_fi_fifo_arq[i] = NULL;
30712		}
30713		un->sd_fi_fifo_start = 0;
30714		un->sd_fi_fifo_end = 0;
30715
30716		mutex_enter(&(un->un_fi_mutex));
30717		un->sd_fi_log[0] = '\0';
30718		un->sd_fi_buf_len = 0;
30719		mutex_exit(&(un->un_fi_mutex));
30720
30721		SD_INFO(SD_LOG_IOERR, un,
30722		    "sd_faultinjection_ioctl: start finished\n");
30723		break;
30724
30725	case SDIOCSTOP:
30726		/* Stop Injection Session */
30727		SD_INFO(SD_LOG_SDTEST, un,
30728		    "sd_faultinjection_ioctl: Injecting Fault Stop\n");
30729		sd_fault_injection_on = 0;
30730		un->sd_injection_mask = 0x0;
30731
30732		/* Empty stray or unuseds structs from fifo */
30733		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
30734			if (un->sd_fi_fifo_pkt[i] != NULL) {
30735				kmem_free(un->sd_fi_fifo_pkt[i],
30736				    sizeof (struct sd_fi_pkt));
30737			}
30738			if (un->sd_fi_fifo_xb[i] != NULL) {
30739				kmem_free(un->sd_fi_fifo_xb[i],
30740				    sizeof (struct sd_fi_xb));
30741			}
30742			if (un->sd_fi_fifo_un[i] != NULL) {
30743				kmem_free(un->sd_fi_fifo_un[i],
30744				    sizeof (struct sd_fi_un));
30745			}
30746			if (un->sd_fi_fifo_arq[i] != NULL) {
30747				kmem_free(un->sd_fi_fifo_arq[i],
30748				    sizeof (struct sd_fi_arq));
30749			}
30750			un->sd_fi_fifo_pkt[i] = NULL;
30751			un->sd_fi_fifo_un[i] = NULL;
30752			un->sd_fi_fifo_xb[i] = NULL;
30753			un->sd_fi_fifo_arq[i] = NULL;
30754		}
30755		un->sd_fi_fifo_start = 0;
30756		un->sd_fi_fifo_end = 0;
30757
30758		SD_INFO(SD_LOG_IOERR, un,
30759		    "sd_faultinjection_ioctl: stop finished\n");
30760		break;
30761
30762	case SDIOCINSERTPKT:
30763		/* Store a packet struct to be pushed onto fifo */
30764		SD_INFO(SD_LOG_SDTEST, un,
30765		    "sd_faultinjection_ioctl: Injecting Fault Insert Pkt\n");
30766
30767		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30768
30769		sd_fault_injection_on = 0;
30770
30771		/* No more that SD_FI_MAX_ERROR allowed in Queue */
30772		if (un->sd_fi_fifo_pkt[i] != NULL) {
30773			kmem_free(un->sd_fi_fifo_pkt[i],
30774			    sizeof (struct sd_fi_pkt));
30775		}
30776		if (arg != NULL) {
30777			un->sd_fi_fifo_pkt[i] =
30778			    kmem_alloc(sizeof (struct sd_fi_pkt), KM_NOSLEEP);
30779			if (un->sd_fi_fifo_pkt[i] == NULL) {
30780				/* Alloc failed don't store anything */
30781				break;
30782			}
30783			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_pkt[i],
30784			    sizeof (struct sd_fi_pkt), 0);
30785			if (rval == -1) {
30786				kmem_free(un->sd_fi_fifo_pkt[i],
30787				    sizeof (struct sd_fi_pkt));
30788				un->sd_fi_fifo_pkt[i] = NULL;
30789			}
30790		} else {
30791			SD_INFO(SD_LOG_IOERR, un,
30792			    "sd_faultinjection_ioctl: pkt null\n");
30793		}
30794		break;
30795
30796	case SDIOCINSERTXB:
30797		/* Store a xb struct to be pushed onto fifo */
30798		SD_INFO(SD_LOG_SDTEST, un,
30799		    "sd_faultinjection_ioctl: Injecting Fault Insert XB\n");
30800
30801		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30802
30803		sd_fault_injection_on = 0;
30804
30805		if (un->sd_fi_fifo_xb[i] != NULL) {
30806			kmem_free(un->sd_fi_fifo_xb[i],
30807			    sizeof (struct sd_fi_xb));
30808			un->sd_fi_fifo_xb[i] = NULL;
30809		}
30810		if (arg != NULL) {
30811			un->sd_fi_fifo_xb[i] =
30812			    kmem_alloc(sizeof (struct sd_fi_xb), KM_NOSLEEP);
30813			if (un->sd_fi_fifo_xb[i] == NULL) {
30814				/* Alloc failed don't store anything */
30815				break;
30816			}
30817			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_xb[i],
30818			    sizeof (struct sd_fi_xb), 0);
30819
30820			if (rval == -1) {
30821				kmem_free(un->sd_fi_fifo_xb[i],
30822				    sizeof (struct sd_fi_xb));
30823				un->sd_fi_fifo_xb[i] = NULL;
30824			}
30825		} else {
30826			SD_INFO(SD_LOG_IOERR, un,
30827			    "sd_faultinjection_ioctl: xb null\n");
30828		}
30829		break;
30830
30831	case SDIOCINSERTUN:
30832		/* Store a un struct to be pushed onto fifo */
30833		SD_INFO(SD_LOG_SDTEST, un,
30834		    "sd_faultinjection_ioctl: Injecting Fault Insert UN\n");
30835
30836		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30837
30838		sd_fault_injection_on = 0;
30839
30840		if (un->sd_fi_fifo_un[i] != NULL) {
30841			kmem_free(un->sd_fi_fifo_un[i],
30842			    sizeof (struct sd_fi_un));
30843			un->sd_fi_fifo_un[i] = NULL;
30844		}
30845		if (arg != NULL) {
30846			un->sd_fi_fifo_un[i] =
30847			    kmem_alloc(sizeof (struct sd_fi_un), KM_NOSLEEP);
30848			if (un->sd_fi_fifo_un[i] == NULL) {
30849				/* Alloc failed don't store anything */
30850				break;
30851			}
30852			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_un[i],
30853			    sizeof (struct sd_fi_un), 0);
30854			if (rval == -1) {
30855				kmem_free(un->sd_fi_fifo_un[i],
30856				    sizeof (struct sd_fi_un));
30857				un->sd_fi_fifo_un[i] = NULL;
30858			}
30859
30860		} else {
30861			SD_INFO(SD_LOG_IOERR, un,
30862			    "sd_faultinjection_ioctl: un null\n");
30863		}
30864
30865		break;
30866
30867	case SDIOCINSERTARQ:
30868		/* Store a arq struct to be pushed onto fifo */
30869		SD_INFO(SD_LOG_SDTEST, un,
30870		    "sd_faultinjection_ioctl: Injecting Fault Insert ARQ\n");
30871		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30872
30873		sd_fault_injection_on = 0;
30874
30875		if (un->sd_fi_fifo_arq[i] != NULL) {
30876			kmem_free(un->sd_fi_fifo_arq[i],
30877			    sizeof (struct sd_fi_arq));
30878			un->sd_fi_fifo_arq[i] = NULL;
30879		}
30880		if (arg != NULL) {
30881			un->sd_fi_fifo_arq[i] =
30882			    kmem_alloc(sizeof (struct sd_fi_arq), KM_NOSLEEP);
30883			if (un->sd_fi_fifo_arq[i] == NULL) {
30884				/* Alloc failed don't store anything */
30885				break;
30886			}
30887			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_arq[i],
30888			    sizeof (struct sd_fi_arq), 0);
30889			if (rval == -1) {
30890				kmem_free(un->sd_fi_fifo_arq[i],
30891				    sizeof (struct sd_fi_arq));
30892				un->sd_fi_fifo_arq[i] = NULL;
30893			}
30894
30895		} else {
30896			SD_INFO(SD_LOG_IOERR, un,
30897			    "sd_faultinjection_ioctl: arq null\n");
30898		}
30899
30900		break;
30901
30902	case SDIOCPUSH:
30903		/* Push stored xb, pkt, un, and arq onto fifo */
30904		sd_fault_injection_on = 0;
30905
30906		if (arg != NULL) {
30907			rval = ddi_copyin((void *)arg, &i, sizeof (uint_t), 0);
30908			if (rval != -1 &&
30909			    un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
30910				un->sd_fi_fifo_end += i;
30911			}
30912		} else {
30913			SD_INFO(SD_LOG_IOERR, un,
30914			    "sd_faultinjection_ioctl: push arg null\n");
30915			if (un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
30916				un->sd_fi_fifo_end++;
30917			}
30918		}
30919		SD_INFO(SD_LOG_IOERR, un,
30920		    "sd_faultinjection_ioctl: push to end=%d\n",
30921		    un->sd_fi_fifo_end);
30922		break;
30923
30924	case SDIOCRETRIEVE:
30925		/* Return buffer of log from Injection session */
30926		SD_INFO(SD_LOG_SDTEST, un,
30927		    "sd_faultinjection_ioctl: Injecting Fault Retreive");
30928
30929		sd_fault_injection_on = 0;
30930
30931		mutex_enter(&(un->un_fi_mutex));
30932		rval = ddi_copyout(un->sd_fi_log, (void *)arg,
30933		    un->sd_fi_buf_len+1, 0);
30934		mutex_exit(&(un->un_fi_mutex));
30935
30936		if (rval == -1) {
30937			/*
30938			 * arg is possibly invalid setting
30939			 * it to NULL for return
30940			 */
30941			arg = NULL;
30942		}
30943		break;
30944	}
30945
30946	mutex_exit(SD_MUTEX(un));
30947	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl:"
30948			    " exit\n");
30949}
30950
30951
30952/*
30953 *    Function: sd_injection_log()
30954 *
30955 * Description: This routine adds buff to the already existing injection log
30956 *              for retrieval via faultinjection_ioctl for use in fault
30957 *              detection and recovery
30958 *
30959 *   Arguments: buf - the string to add to the log
30960 */
30961
30962static void
30963sd_injection_log(char *buf, struct sd_lun *un)
30964{
30965	uint_t len;
30966
30967	ASSERT(un != NULL);
30968	ASSERT(buf != NULL);
30969
30970	mutex_enter(&(un->un_fi_mutex));
30971
30972	len = min(strlen(buf), 255);
30973	/* Add logged value to Injection log to be returned later */
30974	if (len + un->sd_fi_buf_len < SD_FI_MAX_BUF) {
30975		uint_t	offset = strlen((char *)un->sd_fi_log);
30976		char *destp = (char *)un->sd_fi_log + offset;
30977		int i;
30978		for (i = 0; i < len; i++) {
30979			*destp++ = *buf++;
30980		}
30981		un->sd_fi_buf_len += len;
30982		un->sd_fi_log[un->sd_fi_buf_len] = '\0';
30983	}
30984
30985	mutex_exit(&(un->un_fi_mutex));
30986}
30987
30988
30989/*
30990 *    Function: sd_faultinjection()
30991 *
30992 * Description: This routine takes the pkt and changes its
30993 *		content based on error injection scenerio.
30994 *
30995 *   Arguments: pktp	- packet to be changed
30996 */
30997
30998static void
30999sd_faultinjection(struct scsi_pkt *pktp)
31000{
31001	uint_t i;
31002	struct sd_fi_pkt *fi_pkt;
31003	struct sd_fi_xb *fi_xb;
31004	struct sd_fi_un *fi_un;
31005	struct sd_fi_arq *fi_arq;
31006	struct buf *bp;
31007	struct sd_xbuf *xb;
31008	struct sd_lun *un;
31009
31010	ASSERT(pktp != NULL);
31011
31012	/* pull bp xb and un from pktp */
31013	bp = (struct buf *)pktp->pkt_private;
31014	xb = SD_GET_XBUF(bp);
31015	un = SD_GET_UN(bp);
31016
31017	ASSERT(un != NULL);
31018
31019	mutex_enter(SD_MUTEX(un));
31020
31021	SD_TRACE(SD_LOG_SDTEST, un,
31022	    "sd_faultinjection: entry Injection from sdintr\n");
31023
31024	/* if injection is off return */
31025	if (sd_fault_injection_on == 0 ||
31026		un->sd_fi_fifo_start == un->sd_fi_fifo_end) {
31027		mutex_exit(SD_MUTEX(un));
31028		return;
31029	}
31030
31031
31032	/* take next set off fifo */
31033	i = un->sd_fi_fifo_start % SD_FI_MAX_ERROR;
31034
31035	fi_pkt = un->sd_fi_fifo_pkt[i];
31036	fi_xb = un->sd_fi_fifo_xb[i];
31037	fi_un = un->sd_fi_fifo_un[i];
31038	fi_arq = un->sd_fi_fifo_arq[i];
31039
31040
31041	/* set variables accordingly */
31042	/* set pkt if it was on fifo */
31043	if (fi_pkt != NULL) {
31044		SD_CONDSET(pktp, pkt, pkt_flags, "pkt_flags");
31045		SD_CONDSET(*pktp, pkt, pkt_scbp, "pkt_scbp");
31046		SD_CONDSET(*pktp, pkt, pkt_cdbp, "pkt_cdbp");
31047		SD_CONDSET(pktp, pkt, pkt_state, "pkt_state");
31048		SD_CONDSET(pktp, pkt, pkt_statistics, "pkt_statistics");
31049		SD_CONDSET(pktp, pkt, pkt_reason, "pkt_reason");
31050
31051	}
31052
31053	/* set xb if it was on fifo */
31054	if (fi_xb != NULL) {
31055		SD_CONDSET(xb, xb, xb_blkno, "xb_blkno");
31056		SD_CONDSET(xb, xb, xb_dma_resid, "xb_dma_resid");
31057		SD_CONDSET(xb, xb, xb_retry_count, "xb_retry_count");
31058		SD_CONDSET(xb, xb, xb_victim_retry_count,
31059		    "xb_victim_retry_count");
31060		SD_CONDSET(xb, xb, xb_sense_status, "xb_sense_status");
31061		SD_CONDSET(xb, xb, xb_sense_state, "xb_sense_state");
31062		SD_CONDSET(xb, xb, xb_sense_resid, "xb_sense_resid");
31063
31064		/* copy in block data from sense */
31065		if (fi_xb->xb_sense_data[0] != -1) {
31066			bcopy(fi_xb->xb_sense_data, xb->xb_sense_data,
31067			    SENSE_LENGTH);
31068		}
31069
31070		/* copy in extended sense codes */
31071		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_code,
31072		    "es_code");
31073		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_key,
31074		    "es_key");
31075		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_add_code,
31076		    "es_add_code");
31077		SD_CONDSET(((struct scsi_extended_sense *)xb), xb,
31078		    es_qual_code, "es_qual_code");
31079	}
31080
31081	/* set un if it was on fifo */
31082	if (fi_un != NULL) {
31083		SD_CONDSET(un->un_sd->sd_inq, un, inq_rmb, "inq_rmb");
31084		SD_CONDSET(un, un, un_ctype, "un_ctype");
31085		SD_CONDSET(un, un, un_reset_retry_count,
31086		    "un_reset_retry_count");
31087		SD_CONDSET(un, un, un_reservation_type, "un_reservation_type");
31088		SD_CONDSET(un, un, un_resvd_status, "un_resvd_status");
31089		SD_CONDSET(un, un, un_f_arq_enabled, "un_f_arq_enabled");
31090		SD_CONDSET(un, un, un_f_geometry_is_valid,
31091		    "un_f_geometry_is_valid");
31092		SD_CONDSET(un, un, un_f_allow_bus_device_reset,
31093		    "un_f_allow_bus_device_reset");
31094		SD_CONDSET(un, un, un_f_opt_queueing, "un_f_opt_queueing");
31095
31096	}
31097
31098	/* copy in auto request sense if it was on fifo */
31099	if (fi_arq != NULL) {
31100		bcopy(fi_arq, pktp->pkt_scbp, sizeof (struct sd_fi_arq));
31101	}
31102
31103	/* free structs */
31104	if (un->sd_fi_fifo_pkt[i] != NULL) {
31105		kmem_free(un->sd_fi_fifo_pkt[i], sizeof (struct sd_fi_pkt));
31106	}
31107	if (un->sd_fi_fifo_xb[i] != NULL) {
31108		kmem_free(un->sd_fi_fifo_xb[i], sizeof (struct sd_fi_xb));
31109	}
31110	if (un->sd_fi_fifo_un[i] != NULL) {
31111		kmem_free(un->sd_fi_fifo_un[i], sizeof (struct sd_fi_un));
31112	}
31113	if (un->sd_fi_fifo_arq[i] != NULL) {
31114		kmem_free(un->sd_fi_fifo_arq[i], sizeof (struct sd_fi_arq));
31115	}
31116
31117	/*
31118	 * kmem_free does not gurantee to set to NULL
31119	 * since we uses these to determine if we set
31120	 * values or not lets confirm they are always
31121	 * NULL after free
31122	 */
31123	un->sd_fi_fifo_pkt[i] = NULL;
31124	un->sd_fi_fifo_un[i] = NULL;
31125	un->sd_fi_fifo_xb[i] = NULL;
31126	un->sd_fi_fifo_arq[i] = NULL;
31127
31128	un->sd_fi_fifo_start++;
31129
31130	mutex_exit(SD_MUTEX(un));
31131
31132	SD_TRACE(SD_LOG_SDTEST, un, "sd_faultinjection: exit\n");
31133}
31134
31135#endif /* SD_FAULT_INJECTION */
31136
31137/*
31138 * This routine is invoked in sd_unit_attach(). Before calling it, the
31139 * properties in conf file should be processed already, and "hotpluggable"
31140 * property was processed also.
31141 *
31142 * The sd driver distinguishes 3 different type of devices: removable media,
31143 * non-removable media, and hotpluggable. Below the differences are defined:
31144 *
31145 * 1. Device ID
31146 *
31147 *     The device ID of a device is used to identify this device. Refer to
31148 *     ddi_devid_register(9F).
31149 *
31150 *     For a non-removable media disk device which can provide 0x80 or 0x83
31151 *     VPD page (refer to INQUIRY command of SCSI SPC specification), a unique
31152 *     device ID is created to identify this device. For other non-removable
31153 *     media devices, a default device ID is created only if this device has
31154 *     at least 2 alter cylinders. Otherwise, this device has no devid.
31155 *
31156 *     -------------------------------------------------------
31157 *     removable media   hotpluggable  | Can Have Device ID
31158 *     -------------------------------------------------------
31159 *         false             false     |     Yes
31160 *         false             true      |     Yes
31161 *         true                x       |     No
31162 *     ------------------------------------------------------
31163 *
31164 *
31165 * 2. SCSI group 4 commands
31166 *
31167 *     In SCSI specs, only some commands in group 4 command set can use
31168 *     8-byte addresses that can be used to access >2TB storage spaces.
31169 *     Other commands have no such capability. Without supporting group4,
31170 *     it is impossible to make full use of storage spaces of a disk with
31171 *     capacity larger than 2TB.
31172 *
31173 *     -----------------------------------------------
31174 *     removable media   hotpluggable   LP64  |  Group
31175 *     -----------------------------------------------
31176 *           false          false       false |   1
31177 *           false          false       true  |   4
31178 *           false          true        false |   1
31179 *           false          true        true  |   4
31180 *           true             x           x   |   5
31181 *     -----------------------------------------------
31182 *
31183 *
31184 * 3. Check for VTOC Label
31185 *
31186 *     If a direct-access disk has no EFI label, sd will check if it has a
31187 *     valid VTOC label. Now, sd also does that check for removable media
31188 *     and hotpluggable devices.
31189 *
31190 *     --------------------------------------------------------------
31191 *     Direct-Access   removable media    hotpluggable |  Check Label
31192 *     -------------------------------------------------------------
31193 *         false          false           false        |   No
31194 *         false          false           true         |   No
31195 *         false          true            false        |   Yes
31196 *         false          true            true         |   Yes
31197 *         true            x                x          |   Yes
31198 *     --------------------------------------------------------------
31199 *
31200 *
31201 * 4. Building default VTOC label
31202 *
31203 *     As section 3 says, sd checks if some kinds of devices have VTOC label.
31204 *     If those devices have no valid VTOC label, sd(7d) will attempt to
31205 *     create default VTOC for them. Currently sd creates default VTOC label
31206 *     for all devices on x86 platform (VTOC_16), but only for removable
31207 *     media devices on SPARC (VTOC_8).
31208 *
31209 *     -----------------------------------------------------------
31210 *       removable media hotpluggable platform   |   Default Label
31211 *     -----------------------------------------------------------
31212 *             false          false    sparc     |     No
31213 *             false          true      x86      |     Yes
31214 *             false          true     sparc     |     Yes
31215 *             true             x        x       |     Yes
31216 *     ----------------------------------------------------------
31217 *
31218 *
31219 * 5. Supported blocksizes of target devices
31220 *
31221 *     Sd supports non-512-byte blocksize for removable media devices only.
31222 *     For other devices, only 512-byte blocksize is supported. This may be
31223 *     changed in near future because some RAID devices require non-512-byte
31224 *     blocksize
31225 *
31226 *     -----------------------------------------------------------
31227 *     removable media    hotpluggable    | non-512-byte blocksize
31228 *     -----------------------------------------------------------
31229 *           false          false         |   No
31230 *           false          true          |   No
31231 *           true             x           |   Yes
31232 *     -----------------------------------------------------------
31233 *
31234 *
31235 * 6. Automatic mount & unmount
31236 *
31237 *     Sd(7d) driver provides DKIOCREMOVABLE ioctl. This ioctl is used to query
31238 *     if a device is removable media device. It return 1 for removable media
31239 *     devices, and 0 for others.
31240 *
31241 *     The automatic mounting subsystem should distinguish between the types
31242 *     of devices and apply automounting policies to each.
31243 *
31244 *
31245 * 7. fdisk partition management
31246 *
31247 *     Fdisk is traditional partition method on x86 platform. Sd(7d) driver
31248 *     just supports fdisk partitions on x86 platform. On sparc platform, sd
31249 *     doesn't support fdisk partitions at all. Note: pcfs(7fs) can recognize
31250 *     fdisk partitions on both x86 and SPARC platform.
31251 *
31252 *     -----------------------------------------------------------
31253 *       platform   removable media  USB/1394  |  fdisk supported
31254 *     -----------------------------------------------------------
31255 *        x86         X               X        |       true
31256 *     ------------------------------------------------------------
31257 *        sparc       X               X        |       false
31258 *     ------------------------------------------------------------
31259 *
31260 *
31261 * 8. MBOOT/MBR
31262 *
31263 *     Although sd(7d) doesn't support fdisk on SPARC platform, it does support
31264 *     read/write mboot for removable media devices on sparc platform.
31265 *
31266 *     -----------------------------------------------------------
31267 *       platform   removable media  USB/1394  |  mboot supported
31268 *     -----------------------------------------------------------
31269 *        x86         X               X        |       true
31270 *     ------------------------------------------------------------
31271 *        sparc      false           false     |       false
31272 *        sparc      false           true      |       true
31273 *        sparc      true            false     |       true
31274 *        sparc      true            true      |       true
31275 *     ------------------------------------------------------------
31276 *
31277 *
31278 * 9.  error handling during opening device
31279 *
31280 *     If failed to open a disk device, an errno is returned. For some kinds
31281 *     of errors, different errno is returned depending on if this device is
31282 *     a removable media device. This brings USB/1394 hard disks in line with
31283 *     expected hard disk behavior. It is not expected that this breaks any
31284 *     application.
31285 *
31286 *     ------------------------------------------------------
31287 *       removable media    hotpluggable   |  errno
31288 *     ------------------------------------------------------
31289 *             false          false        |   EIO
31290 *             false          true         |   EIO
31291 *             true             x          |   ENXIO
31292 *     ------------------------------------------------------
31293 *
31294 *
31295 * 11. ioctls: DKIOCEJECT, CDROMEJECT
31296 *
31297 *     These IOCTLs are applicable only to removable media devices.
31298 *
31299 *     -----------------------------------------------------------
31300 *       removable media    hotpluggable   |DKIOCEJECT, CDROMEJECT
31301 *     -----------------------------------------------------------
31302 *             false          false        |     No
31303 *             false          true         |     No
31304 *             true            x           |     Yes
31305 *     -----------------------------------------------------------
31306 *
31307 *
31308 * 12. Kstats for partitions
31309 *
31310 *     sd creates partition kstat for non-removable media devices. USB and
31311 *     Firewire hard disks now have partition kstats
31312 *
31313 *      ------------------------------------------------------
31314 *       removable media    hotplugable    |   kstat
31315 *      ------------------------------------------------------
31316 *             false          false        |    Yes
31317 *             false          true         |    Yes
31318 *             true             x          |    No
31319 *       ------------------------------------------------------
31320 *
31321 *
31322 * 13. Removable media & hotpluggable properties
31323 *
31324 *     Sd driver creates a "removable-media" property for removable media
31325 *     devices. Parent nexus drivers create a "hotpluggable" property if
31326 *     it supports hotplugging.
31327 *
31328 *     ---------------------------------------------------------------------
31329 *     removable media   hotpluggable |  "removable-media"   " hotpluggable"
31330 *     ---------------------------------------------------------------------
31331 *       false            false       |    No                   No
31332 *       false            true        |    No                   Yes
31333 *       true             false       |    Yes                  No
31334 *       true             true        |    Yes                  Yes
31335 *     ---------------------------------------------------------------------
31336 *
31337 *
31338 * 14. Power Management
31339 *
31340 *     sd only power manages removable media devices or devices that support
31341 *     LOG_SENSE or have a "pm-capable" property  (PSARC/2002/250)
31342 *
31343 *     A parent nexus that supports hotplugging can also set "pm-capable"
31344 *     if the disk can be power managed.
31345 *
31346 *     ------------------------------------------------------------
31347 *       removable media hotpluggable pm-capable  |   power manage
31348 *     ------------------------------------------------------------
31349 *             false          false     false     |     No
31350 *             false          false     true      |     Yes
31351 *             false          true      false     |     No
31352 *             false          true      true      |     Yes
31353 *             true             x        x        |     Yes
31354 *     ------------------------------------------------------------
31355 *
31356 *      USB and firewire hard disks can now be power managed independently
31357 *      of the framebuffer
31358 *
31359 *
31360 * 15. Support for USB disks with capacity larger than 1TB
31361 *
31362 *     Currently, sd doesn't permit a fixed disk device with capacity
31363 *     larger than 1TB to be used in a 32-bit operating system environment.
31364 *     However, sd doesn't do that for removable media devices. Instead, it
31365 *     assumes that removable media devices cannot have a capacity larger
31366 *     than 1TB. Therefore, using those devices on 32-bit system is partially
31367 *     supported, which can cause some unexpected results.
31368 *
31369 *     ---------------------------------------------------------------------
31370 *       removable media    USB/1394 | Capacity > 1TB |   Used in 32-bit env
31371 *     ---------------------------------------------------------------------
31372 *             false          false  |   true         |     no
31373 *             false          true   |   true         |     no
31374 *             true           false  |   true         |     Yes
31375 *             true           true   |   true         |     Yes
31376 *     ---------------------------------------------------------------------
31377 *
31378 *
31379 * 16. Check write-protection at open time
31380 *
31381 *     When a removable media device is being opened for writing without NDELAY
31382 *     flag, sd will check if this device is writable. If attempting to open
31383 *     without NDELAY flag a write-protected device, this operation will abort.
31384 *
31385 *     ------------------------------------------------------------
31386 *       removable media    USB/1394   |   WP Check
31387 *     ------------------------------------------------------------
31388 *             false          false    |     No
31389 *             false          true     |     No
31390 *             true           false    |     Yes
31391 *             true           true     |     Yes
31392 *     ------------------------------------------------------------
31393 *
31394 *
31395 * 17. syslog when corrupted VTOC is encountered
31396 *
31397 *      Currently, if an invalid VTOC is encountered, sd only print syslog
31398 *      for fixed SCSI disks.
31399 *     ------------------------------------------------------------
31400 *       removable media    USB/1394   |   print syslog
31401 *     ------------------------------------------------------------
31402 *             false          false    |     Yes
31403 *             false          true     |     No
31404 *             true           false    |     No
31405 *             true           true     |     No
31406 *     ------------------------------------------------------------
31407 */
31408static void
31409sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi)
31410{
31411	int	pm_capable_prop;
31412
31413	ASSERT(un->un_sd);
31414	ASSERT(un->un_sd->sd_inq);
31415
31416#if defined(_SUNOS_VTOC_16)
31417	/*
31418	 * For VTOC_16 devices, the default label will be created for all
31419	 * devices. (see sd_build_default_label)
31420	 */
31421	un->un_f_default_vtoc_supported = TRUE;
31422#endif
31423
31424	if (un->un_sd->sd_inq->inq_rmb) {
31425		/*
31426		 * The media of this device is removable. And for this kind
31427		 * of devices, it is possible to change medium after opening
31428		 * devices. Thus we should support this operation.
31429		 */
31430		un->un_f_has_removable_media = TRUE;
31431
31432#if defined(_SUNOS_VTOC_8)
31433		/*
31434		 * Note: currently, for VTOC_8 devices, default label is
31435		 * created for removable and hotpluggable devices only.
31436		 */
31437		un->un_f_default_vtoc_supported = TRUE;
31438#endif
31439		/*
31440		 * support non-512-byte blocksize of removable media devices
31441		 */
31442		un->un_f_non_devbsize_supported = TRUE;
31443
31444		/*
31445		 * Assume that all removable media devices support DOOR_LOCK
31446		 */
31447		un->un_f_doorlock_supported = TRUE;
31448
31449		/*
31450		 * For a removable media device, it is possible to be opened
31451		 * with NDELAY flag when there is no media in drive, in this
31452		 * case we don't care if device is writable. But if without
31453		 * NDELAY flag, we need to check if media is write-protected.
31454		 */
31455		un->un_f_chk_wp_open = TRUE;
31456
31457		/*
31458		 * need to start a SCSI watch thread to monitor media state,
31459		 * when media is being inserted or ejected, notify syseventd.
31460		 */
31461		un->un_f_monitor_media_state = TRUE;
31462
31463		/*
31464		 * Some devices don't support START_STOP_UNIT command.
31465		 * Therefore, we'd better check if a device supports it
31466		 * before sending it.
31467		 */
31468		un->un_f_check_start_stop = TRUE;
31469
31470		/*
31471		 * support eject media ioctl:
31472		 *		FDEJECT, DKIOCEJECT, CDROMEJECT
31473		 */
31474		un->un_f_eject_media_supported = TRUE;
31475
31476		/*
31477		 * Because many removable-media devices don't support
31478		 * LOG_SENSE, we couldn't use this command to check if
31479		 * a removable media device support power-management.
31480		 * We assume that they support power-management via
31481		 * START_STOP_UNIT command and can be spun up and down
31482		 * without limitations.
31483		 */
31484		un->un_f_pm_supported = TRUE;
31485
31486		/*
31487		 * Need to create a zero length (Boolean) property
31488		 * removable-media for the removable media devices.
31489		 * Note that the return value of the property is not being
31490		 * checked, since if unable to create the property
31491		 * then do not want the attach to fail altogether. Consistent
31492		 * with other property creation in attach.
31493		 */
31494		(void) ddi_prop_create(DDI_DEV_T_NONE, devi,
31495		    DDI_PROP_CANSLEEP, "removable-media", NULL, 0);
31496
31497	} else {
31498		/*
31499		 * create device ID for device
31500		 */
31501		un->un_f_devid_supported = TRUE;
31502
31503		/*
31504		 * Spin up non-removable-media devices once it is attached
31505		 */
31506		un->un_f_attach_spinup = TRUE;
31507
31508		/*
31509		 * According to SCSI specification, Sense data has two kinds of
31510		 * format: fixed format, and descriptor format. At present, we
31511		 * don't support descriptor format sense data for removable
31512		 * media.
31513		 */
31514		if (SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT) {
31515			un->un_f_descr_format_supported = TRUE;
31516		}
31517
31518		/*
31519		 * kstats are created only for non-removable media devices.
31520		 *
31521		 * Set this in sd.conf to 0 in order to disable kstats.  The
31522		 * default is 1, so they are enabled by default.
31523		 */
31524		un->un_f_pkstats_enabled = (ddi_prop_get_int(DDI_DEV_T_ANY,
31525		    SD_DEVINFO(un), DDI_PROP_DONTPASS,
31526			"enable-partition-kstats", 1));
31527
31528		/*
31529		 * Check if HBA has set the "pm-capable" property.
31530		 * If "pm-capable" exists and is non-zero then we can
31531		 * power manage the device without checking the start/stop
31532		 * cycle count log sense page.
31533		 *
31534		 * If "pm-capable" exists and is SD_PM_CAPABLE_FALSE (0)
31535		 * then we should not power manage the device.
31536		 *
31537		 * If "pm-capable" doesn't exist then pm_capable_prop will
31538		 * be set to SD_PM_CAPABLE_UNDEFINED (-1).  In this case,
31539		 * sd will check the start/stop cycle count log sense page
31540		 * and power manage the device if the cycle count limit has
31541		 * not been exceeded.
31542		 */
31543		pm_capable_prop = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
31544		    DDI_PROP_DONTPASS, "pm-capable", SD_PM_CAPABLE_UNDEFINED);
31545		if (pm_capable_prop == SD_PM_CAPABLE_UNDEFINED) {
31546			un->un_f_log_sense_supported = TRUE;
31547		} else {
31548			/*
31549			 * pm-capable property exists.
31550			 *
31551			 * Convert "TRUE" values for pm_capable_prop to
31552			 * SD_PM_CAPABLE_TRUE (1) to make it easier to check
31553			 * later. "TRUE" values are any values except
31554			 * SD_PM_CAPABLE_FALSE (0) and
31555			 * SD_PM_CAPABLE_UNDEFINED (-1)
31556			 */
31557			if (pm_capable_prop == SD_PM_CAPABLE_FALSE) {
31558				un->un_f_log_sense_supported = FALSE;
31559			} else {
31560				un->un_f_pm_supported = TRUE;
31561			}
31562
31563			SD_INFO(SD_LOG_ATTACH_DETACH, un,
31564			    "sd_unit_attach: un:0x%p pm-capable "
31565			    "property set to %d.\n", un, un->un_f_pm_supported);
31566		}
31567	}
31568
31569	if (un->un_f_is_hotpluggable) {
31570#if defined(_SUNOS_VTOC_8)
31571		/*
31572		 * Note: currently, for VTOC_8 devices, default label is
31573		 * created for removable and hotpluggable devices only.
31574		 */
31575		un->un_f_default_vtoc_supported = TRUE;
31576#endif
31577
31578		/*
31579		 * Have to watch hotpluggable devices as well, since
31580		 * that's the only way for userland applications to
31581		 * detect hot removal while device is busy/mounted.
31582		 */
31583		un->un_f_monitor_media_state = TRUE;
31584
31585		un->un_f_check_start_stop = TRUE;
31586
31587	}
31588
31589	/*
31590	 * By default, only DIRECT ACCESS devices and CDs will have Sun
31591	 * labels.
31592	 */
31593	if ((SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT) ||
31594	    (un->un_sd->sd_inq->inq_rmb)) {
31595		/*
31596		 * Direct access devices have disk label
31597		 */
31598		un->un_f_vtoc_label_supported = TRUE;
31599	}
31600
31601	/*
31602	 * Fdisk partitions are supported for all direct access devices on
31603	 * x86 platform, and just for removable media and hotpluggable
31604	 * devices on SPARC platform. Later, we will set the following flag
31605	 * to FALSE if current device is not removable media or hotpluggable
31606	 * device and if sd works on SAPRC platform.
31607	 */
31608	if (SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT) {
31609		un->un_f_mboot_supported = TRUE;
31610	}
31611
31612	if (!un->un_f_is_hotpluggable &&
31613	    !un->un_sd->sd_inq->inq_rmb) {
31614
31615#if defined(_SUNOS_VTOC_8)
31616		/*
31617		 * Don't support fdisk on fixed disk
31618		 */
31619		un->un_f_mboot_supported = FALSE;
31620#endif
31621
31622		/*
31623		 * Fixed disk support SYNC CACHE
31624		 */
31625		un->un_f_sync_cache_supported = TRUE;
31626
31627		/*
31628		 * For fixed disk, if its VTOC is not valid, we will write
31629		 * errlog into system log
31630		 */
31631		if (un->un_f_vtoc_label_supported)
31632			un->un_f_vtoc_errlog_supported = TRUE;
31633	}
31634}
31635