1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2009-2012,2016-2017 Microsoft Corp.
5 * Copyright (c) 2012 NetApp Inc.
6 * Copyright (c) 2012 Citrix Inc.
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice unmodified, this list of conditions, and the following
14 *    disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31/**
32 * StorVSC driver for Hyper-V.  This driver presents a SCSI HBA interface
33 * to the Comman Access Method (CAM) layer.  CAM control blocks (CCBs) are
34 * converted into VSCSI protocol messages which are delivered to the parent
35 * partition StorVSP driver over the Hyper-V VMBUS.
36 */
37
38#include <sys/param.h>
39#include <sys/proc.h>
40#include <sys/condvar.h>
41#include <sys/time.h>
42#include <sys/systm.h>
43#include <sys/sysctl.h>
44#include <sys/sockio.h>
45#include <sys/mbuf.h>
46#include <sys/malloc.h>
47#include <sys/module.h>
48#include <sys/kernel.h>
49#include <sys/queue.h>
50#include <sys/lock.h>
51#include <sys/sx.h>
52#include <sys/taskqueue.h>
53#include <sys/bus.h>
54#include <sys/mutex.h>
55#include <sys/callout.h>
56#include <sys/smp.h>
57#include <vm/vm.h>
58#include <vm/pmap.h>
59#include <vm/uma.h>
60#include <sys/lock.h>
61#include <sys/sema.h>
62#include <sys/eventhandler.h>
63#include <machine/bus.h>
64
65#include <cam/cam.h>
66#include <cam/cam_ccb.h>
67#include <cam/cam_periph.h>
68#include <cam/cam_sim.h>
69#include <cam/cam_xpt_sim.h>
70#include <cam/cam_xpt_internal.h>
71#include <cam/cam_debug.h>
72#include <cam/scsi/scsi_all.h>
73#include <cam/scsi/scsi_message.h>
74
75#include <dev/hyperv/include/hyperv.h>
76#include <dev/hyperv/include/vmbus.h>
77#include "hv_vstorage.h"
78#include "vmbus_if.h"
79
80#define STORVSC_MAX_LUNS_PER_TARGET	(64)
81#define STORVSC_MAX_IO_REQUESTS		(STORVSC_MAX_LUNS_PER_TARGET * 2)
82#define BLKVSC_MAX_IDE_DISKS_PER_TARGET	(1)
83#define BLKVSC_MAX_IO_REQUESTS		STORVSC_MAX_IO_REQUESTS
84#define STORVSC_MAX_TARGETS		(2)
85
86#define VSTOR_PKT_SIZE	(sizeof(struct vstor_packet) - vmscsi_size_delta)
87
88/*
89 * 33 segments are needed to allow 128KB maxio, in case the data
90 * in the first page is _not_ PAGE_SIZE aligned, e.g.
91 *
92 *     |<----------- 128KB ----------->|
93 *     |                               |
94 *  0  2K 4K    8K   16K   124K  128K  130K
95 *  |  |  |     |     |       |     |  |
96 *  +--+--+-----+-----+.......+-----+--+--+
97 *  |  |  |     |     |       |     |  |  | DATA
98 *  |  |  |     |     |       |     |  |  |
99 *  +--+--+-----+-----+.......------+--+--+
100 *     |  |                         |  |
101 *     | 1|            31           | 1| ...... # of segments
102 */
103#define STORVSC_DATA_SEGCNT_MAX		33
104#define STORVSC_DATA_SEGSZ_MAX		PAGE_SIZE
105#define STORVSC_DATA_SIZE_MAX		\
106	((STORVSC_DATA_SEGCNT_MAX - 1) * STORVSC_DATA_SEGSZ_MAX)
107
108struct storvsc_softc;
109
110struct hv_sglist {
111	struct iovec sg_iov[STORVSC_DATA_SEGCNT_MAX];
112	u_short	sg_nseg;
113	u_short	sg_maxseg;
114};
115
116struct hv_sgl_node {
117	LIST_ENTRY(hv_sgl_node) link;
118	struct hv_sglist *sgl_data;
119};
120
121struct hv_sgl_page_pool{
122	LIST_HEAD(, hv_sgl_node) in_use_sgl_list;
123	LIST_HEAD(, hv_sgl_node) free_sgl_list;
124	boolean_t                is_init;
125} g_hv_sgl_page_pool;
126
127enum storvsc_request_type {
128	WRITE_TYPE,
129	READ_TYPE,
130	UNKNOWN_TYPE
131};
132
133SYSCTL_NODE(_hw, OID_AUTO, storvsc, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
134	"Hyper-V storage interface");
135
136static u_int hv_storvsc_use_win8ext_flags = 1;
137SYSCTL_UINT(_hw_storvsc, OID_AUTO, use_win8ext_flags, CTLFLAG_RW,
138	&hv_storvsc_use_win8ext_flags, 0,
139	"Use win8 extension flags or not");
140
141static u_int hv_storvsc_use_pim_unmapped = 1;
142SYSCTL_UINT(_hw_storvsc, OID_AUTO, use_pim_unmapped, CTLFLAG_RDTUN,
143	&hv_storvsc_use_pim_unmapped, 0,
144	"Optimize storvsc by using unmapped I/O");
145
146static u_int hv_storvsc_ringbuffer_size = (64 * PAGE_SIZE);
147SYSCTL_UINT(_hw_storvsc, OID_AUTO, ringbuffer_size, CTLFLAG_RDTUN,
148	&hv_storvsc_ringbuffer_size, 0, "Hyper-V storage ringbuffer size");
149
150static u_int hv_storvsc_max_io = 512;
151SYSCTL_UINT(_hw_storvsc, OID_AUTO, max_io, CTLFLAG_RDTUN,
152	&hv_storvsc_max_io, 0, "Hyper-V storage max io limit");
153
154static int hv_storvsc_chan_cnt = 0;
155SYSCTL_INT(_hw_storvsc, OID_AUTO, chan_cnt, CTLFLAG_RDTUN,
156	&hv_storvsc_chan_cnt, 0, "# of channels to use");
157#ifdef DIAGNOSTIC
158static int hv_storvsc_srb_status = -1;
159SYSCTL_INT(_hw_storvsc, OID_AUTO, srb_status,  CTLFLAG_RW,
160	&hv_storvsc_srb_status, 0, "srb_status to inject");
161TUNABLE_INT("hw_storvsc.srb_status", &hv_storvsc_srb_status);
162#endif /* DIAGNOSTIC */
163
164#define STORVSC_MAX_IO						\
165	vmbus_chan_prplist_nelem(hv_storvsc_ringbuffer_size,	\
166	   STORVSC_DATA_SEGCNT_MAX, VSTOR_PKT_SIZE)
167
168struct hv_storvsc_sysctl {
169	u_long		data_bio_cnt;
170	u_long		data_vaddr_cnt;
171	u_long		data_sg_cnt;
172	u_long		chan_send_cnt[MAXCPU];
173};
174
175struct storvsc_gpa_range {
176	struct vmbus_gpa_range	gpa_range;
177	uint64_t		gpa_page[STORVSC_DATA_SEGCNT_MAX];
178} __packed;
179
180struct hv_storvsc_request {
181	LIST_ENTRY(hv_storvsc_request)	link;
182	struct vstor_packet		vstor_packet;
183	int				prp_cnt;
184	struct storvsc_gpa_range	prp_list;
185	void				*sense_data;
186	uint8_t				sense_info_len;
187	uint8_t				retries;
188	union ccb			*ccb;
189	struct storvsc_softc		*softc;
190	struct callout			callout;
191	struct sema			synch_sema; /*Synchronize the request/response if needed */
192	struct hv_sglist		*bounce_sgl;
193	unsigned int			bounce_sgl_count;
194	uint64_t			not_aligned_seg_bits;
195	bus_dmamap_t			data_dmap;
196};
197
198struct storvsc_softc {
199	struct vmbus_channel		*hs_chan;
200	LIST_HEAD(, hv_storvsc_request)	hs_free_list;
201	struct mtx			hs_lock;
202	struct storvsc_driver_props	*hs_drv_props;
203	int 				hs_unit;
204	uint32_t			hs_frozen;
205	struct cam_sim			*hs_sim;
206	struct cam_path 		*hs_path;
207	uint32_t			hs_num_out_reqs;
208	boolean_t			hs_destroy;
209	boolean_t			hs_drain_notify;
210	struct sema 			hs_drain_sema;
211	struct hv_storvsc_request	hs_init_req;
212	struct hv_storvsc_request	hs_reset_req;
213	device_t			hs_dev;
214	bus_dma_tag_t			storvsc_req_dtag;
215	struct hv_storvsc_sysctl	sysctl_data;
216	uint32_t			hs_nchan;
217	struct vmbus_channel		*hs_sel_chan[MAXCPU];
218};
219
220static eventhandler_tag storvsc_handler_tag;
221/*
222 * The size of the vmscsi_request has changed in win8. The
223 * additional size is for the newly added elements in the
224 * structure. These elements are valid only when we are talking
225 * to a win8 host.
226 * Track the correct size we need to apply.
227 */
228static int vmscsi_size_delta = sizeof(struct vmscsi_win8_extension);
229
230/**
231 * HyperV storvsc timeout testing cases:
232 * a. IO returned after first timeout;
233 * b. IO returned after second timeout and queue freeze;
234 * c. IO returned while timer handler is running
235 * The first can be tested by "sg_senddiag -vv /dev/daX",
236 * and the second and third can be done by
237 * "sg_wr_mode -v -p 08 -c 0,1a -m 0,ff /dev/daX".
238 */
239#define HVS_TIMEOUT_TEST 0
240
241/*
242 * Bus/adapter reset functionality on the Hyper-V host is
243 * buggy and it will be disabled until
244 * it can be further tested.
245 */
246#define HVS_HOST_RESET 0
247
248struct storvsc_driver_props {
249	char		*drv_name;
250	char		*drv_desc;
251	uint8_t		drv_max_luns_per_target;
252	uint32_t	drv_max_ios_per_target;
253	uint32_t	drv_ringbuffer_size;
254};
255
256enum hv_storage_type {
257	DRIVER_BLKVSC,
258	DRIVER_STORVSC,
259	DRIVER_UNKNOWN
260};
261
262#define HS_MAX_ADAPTERS 10
263
264#define HV_STORAGE_SUPPORTS_MULTI_CHANNEL 0x1
265
266/* {ba6163d9-04a1-4d29-b605-72e2ffb1dc7f} */
267static const struct hyperv_guid gStorVscDeviceType={
268	.hv_guid = {0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d,
269		 0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f}
270};
271
272/* {32412632-86cb-44a2-9b5c-50d1417354f5} */
273static const struct hyperv_guid gBlkVscDeviceType={
274	.hv_guid = {0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44,
275		 0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5}
276};
277
278static struct storvsc_driver_props g_drv_props_table[] = {
279	{"blkvsc", "Hyper-V IDE",
280	 BLKVSC_MAX_IDE_DISKS_PER_TARGET, BLKVSC_MAX_IO_REQUESTS,
281	 20*PAGE_SIZE},
282	{"storvsc", "Hyper-V SCSI",
283	 STORVSC_MAX_LUNS_PER_TARGET, STORVSC_MAX_IO_REQUESTS,
284	 20*PAGE_SIZE}
285};
286
287/*
288 * Sense buffer size changed in win8; have a run-time
289 * variable to track the size we should use.
290 */
291static int sense_buffer_size = PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE;
292
293/*
294 * The storage protocol version is determined during the
295 * initial exchange with the host.  It will indicate which
296 * storage functionality is available in the host.
297*/
298static int vmstor_proto_version;
299
300struct vmstor_proto {
301        int proto_version;
302        int sense_buffer_size;
303        int vmscsi_size_delta;
304};
305
306static const struct vmstor_proto vmstor_proto_list[] = {
307        {
308                VMSTOR_PROTOCOL_VERSION_WIN10,
309                POST_WIN7_STORVSC_SENSE_BUFFER_SIZE,
310                0
311        },
312        {
313                VMSTOR_PROTOCOL_VERSION_WIN8_1,
314                POST_WIN7_STORVSC_SENSE_BUFFER_SIZE,
315                0
316        },
317        {
318                VMSTOR_PROTOCOL_VERSION_WIN8,
319                POST_WIN7_STORVSC_SENSE_BUFFER_SIZE,
320                0
321        },
322        {
323                VMSTOR_PROTOCOL_VERSION_WIN7,
324                PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE,
325                sizeof(struct vmscsi_win8_extension),
326        },
327        {
328                VMSTOR_PROTOCOL_VERSION_WIN6,
329                PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE,
330                sizeof(struct vmscsi_win8_extension),
331        }
332};
333
334/* static functions */
335static int storvsc_probe(device_t dev);
336static int storvsc_attach(device_t dev);
337static int storvsc_detach(device_t dev);
338static void storvsc_poll(struct cam_sim * sim);
339static void storvsc_action(struct cam_sim * sim, union ccb * ccb);
340static int create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp);
341static void storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp);
342static enum hv_storage_type storvsc_get_storage_type(device_t dev);
343static void hv_storvsc_rescan_target(struct storvsc_softc *sc);
344static void hv_storvsc_on_channel_callback(struct vmbus_channel *chan, void *xsc);
345static void hv_storvsc_on_iocompletion( struct storvsc_softc *sc,
346					struct vstor_packet *vstor_packet,
347					struct hv_storvsc_request *request);
348static int hv_storvsc_connect_vsp(struct storvsc_softc *);
349static void storvsc_io_done(struct hv_storvsc_request *reqp);
350static void storvsc_copy_sgl_to_bounce_buf(struct hv_sglist *bounce_sgl,
351				bus_dma_segment_t *orig_sgl,
352				unsigned int orig_sgl_count,
353				uint64_t seg_bits);
354void storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl,
355				unsigned int dest_sgl_count,
356				struct hv_sglist *src_sgl,
357				uint64_t seg_bits);
358
359static device_method_t storvsc_methods[] = {
360	/* Device interface */
361	DEVMETHOD(device_probe,		storvsc_probe),
362	DEVMETHOD(device_attach,	storvsc_attach),
363	DEVMETHOD(device_detach,	storvsc_detach),
364	DEVMETHOD(device_shutdown,      bus_generic_shutdown),
365	DEVMETHOD_END
366};
367
368static driver_t storvsc_driver = {
369	"storvsc", storvsc_methods, sizeof(struct storvsc_softc),
370};
371
372DRIVER_MODULE(storvsc, vmbus, storvsc_driver, 0, 0);
373MODULE_VERSION(storvsc, 1);
374MODULE_DEPEND(storvsc, vmbus, 1, 1, 1);
375
376static void
377storvsc_subchan_attach(struct storvsc_softc *sc,
378    struct vmbus_channel *new_channel)
379{
380	struct vmstor_chan_props props;
381
382	memset(&props, 0, sizeof(props));
383
384	vmbus_chan_cpu_rr(new_channel);
385	vmbus_chan_open(new_channel,
386	    sc->hs_drv_props->drv_ringbuffer_size,
387  	    sc->hs_drv_props->drv_ringbuffer_size,
388	    (void *)&props,
389	    sizeof(struct vmstor_chan_props),
390	    hv_storvsc_on_channel_callback, sc);
391}
392
393/**
394 * @brief Send multi-channel creation request to host
395 *
396 * @param device  a Hyper-V device pointer
397 * @param max_chans  the max channels supported by vmbus
398 */
399static void
400storvsc_send_multichannel_request(struct storvsc_softc *sc, int max_subch)
401{
402	struct vmbus_channel **subchan;
403	struct hv_storvsc_request *request;
404	struct vstor_packet *vstor_packet;
405	int request_subch;
406	int i;
407
408	/* get sub-channel count that need to create */
409	request_subch = MIN(max_subch, mp_ncpus - 1);
410
411	request = &sc->hs_init_req;
412
413	/* request the host to create multi-channel */
414	memset(request, 0, sizeof(struct hv_storvsc_request));
415
416	sema_init(&request->synch_sema, 0, ("stor_synch_sema"));
417
418	vstor_packet = &request->vstor_packet;
419
420	vstor_packet->operation = VSTOR_OPERATION_CREATE_MULTI_CHANNELS;
421	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
422	vstor_packet->u.multi_channels_cnt = request_subch;
423
424	vmbus_chan_send(sc->hs_chan,
425	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
426	    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
427
428	sema_wait(&request->synch_sema);
429
430	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
431	    vstor_packet->status != 0) {
432		printf("Storvsc_error: create multi-channel invalid operation "
433		    "(%d) or statue (%u)\n",
434		    vstor_packet->operation, vstor_packet->status);
435		return;
436	}
437
438	/* Update channel count */
439	sc->hs_nchan = request_subch + 1;
440
441	/* Wait for sub-channels setup to complete. */
442	subchan = vmbus_subchan_get(sc->hs_chan, request_subch);
443
444	/* Attach the sub-channels. */
445	for (i = 0; i < request_subch; ++i)
446		storvsc_subchan_attach(sc, subchan[i]);
447
448	/* Release the sub-channels. */
449	vmbus_subchan_rel(subchan, request_subch);
450
451	if (bootverbose)
452		printf("Storvsc create multi-channel success!\n");
453}
454
455/**
456 * @brief initialize channel connection to parent partition
457 *
458 * @param dev  a Hyper-V device pointer
459 * @returns  0 on success, non-zero error on failure
460 */
461static int
462hv_storvsc_channel_init(struct storvsc_softc *sc)
463{
464	int ret = 0, i;
465	struct hv_storvsc_request *request;
466	struct vstor_packet *vstor_packet;
467	uint16_t max_subch;
468	boolean_t support_multichannel;
469	uint32_t version;
470
471	max_subch = 0;
472	support_multichannel = FALSE;
473
474	request = &sc->hs_init_req;
475	memset(request, 0, sizeof(struct hv_storvsc_request));
476	vstor_packet = &request->vstor_packet;
477	request->softc = sc;
478
479	/**
480	 * Initiate the vsc/vsp initialization protocol on the open channel
481	 */
482	sema_init(&request->synch_sema, 0, ("stor_synch_sema"));
483
484	vstor_packet->operation = VSTOR_OPERATION_BEGININITIALIZATION;
485	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
486
487
488	ret = vmbus_chan_send(sc->hs_chan,
489	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
490	    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
491
492	if (ret != 0)
493		goto cleanup;
494
495	sema_wait(&request->synch_sema);
496
497	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
498		vstor_packet->status != 0) {
499		goto cleanup;
500	}
501
502	for (i = 0; i < nitems(vmstor_proto_list); i++) {
503		/* reuse the packet for version range supported */
504
505		memset(vstor_packet, 0, sizeof(struct vstor_packet));
506		vstor_packet->operation = VSTOR_OPERATION_QUERYPROTOCOLVERSION;
507		vstor_packet->flags = REQUEST_COMPLETION_FLAG;
508
509		vstor_packet->u.version.major_minor =
510			vmstor_proto_list[i].proto_version;
511
512		/* revision is only significant for Windows guests */
513		vstor_packet->u.version.revision = 0;
514
515		ret = vmbus_chan_send(sc->hs_chan,
516		    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
517		    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
518
519		if (ret != 0)
520			goto cleanup;
521
522		sema_wait(&request->synch_sema);
523
524		if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO) {
525			ret = EINVAL;
526			goto cleanup;
527		}
528		if (vstor_packet->status == 0) {
529			vmstor_proto_version =
530				vmstor_proto_list[i].proto_version;
531			sense_buffer_size =
532				vmstor_proto_list[i].sense_buffer_size;
533			vmscsi_size_delta =
534				vmstor_proto_list[i].vmscsi_size_delta;
535			break;
536		}
537	}
538
539	if (vstor_packet->status != 0) {
540		ret = EINVAL;
541		goto cleanup;
542	}
543	/**
544	 * Query channel properties
545	 */
546	memset(vstor_packet, 0, sizeof(struct vstor_packet));
547	vstor_packet->operation = VSTOR_OPERATION_QUERYPROPERTIES;
548	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
549
550	ret = vmbus_chan_send(sc->hs_chan,
551	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
552	    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
553
554	if ( ret != 0)
555		goto cleanup;
556
557	sema_wait(&request->synch_sema);
558
559	/* TODO: Check returned version */
560	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
561	    vstor_packet->status != 0) {
562		goto cleanup;
563	}
564
565	max_subch = vstor_packet->u.chan_props.max_channel_cnt;
566	if (hv_storvsc_chan_cnt > 0 && hv_storvsc_chan_cnt < (max_subch + 1))
567		max_subch = hv_storvsc_chan_cnt - 1;
568
569	/* multi-channels feature is supported by WIN8 and above version */
570	version = VMBUS_GET_VERSION(device_get_parent(sc->hs_dev), sc->hs_dev);
571	if (version != VMBUS_VERSION_WIN7 && version != VMBUS_VERSION_WS2008 &&
572	    (vstor_packet->u.chan_props.flags &
573	     HV_STORAGE_SUPPORTS_MULTI_CHANNEL)) {
574		support_multichannel = TRUE;
575	}
576	if (bootverbose) {
577		device_printf(sc->hs_dev, "max chans %d%s\n", max_subch + 1,
578		    support_multichannel ? ", multi-chan capable" : "");
579	}
580
581	memset(vstor_packet, 0, sizeof(struct vstor_packet));
582	vstor_packet->operation = VSTOR_OPERATION_ENDINITIALIZATION;
583	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
584
585	ret = vmbus_chan_send(sc->hs_chan,
586	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
587	    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
588
589	if (ret != 0) {
590		goto cleanup;
591	}
592
593	sema_wait(&request->synch_sema);
594
595	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
596	    vstor_packet->status != 0)
597		goto cleanup;
598
599	/*
600	 * If multi-channel is supported, send multichannel create
601	 * request to host.
602	 */
603	if (support_multichannel && max_subch > 0)
604		storvsc_send_multichannel_request(sc, max_subch);
605cleanup:
606	sema_destroy(&request->synch_sema);
607	return (ret);
608}
609
610/**
611 * @brief Open channel connection to paraent partition StorVSP driver
612 *
613 * Open and initialize channel connection to parent partition StorVSP driver.
614 *
615 * @param pointer to a Hyper-V device
616 * @returns 0 on success, non-zero error on failure
617 */
618static int
619hv_storvsc_connect_vsp(struct storvsc_softc *sc)
620{
621	int ret = 0;
622	struct vmstor_chan_props props;
623
624	memset(&props, 0, sizeof(struct vmstor_chan_props));
625
626	/*
627	 * Open the channel
628	 */
629	vmbus_chan_cpu_rr(sc->hs_chan);
630	ret = vmbus_chan_open(
631		sc->hs_chan,
632		sc->hs_drv_props->drv_ringbuffer_size,
633		sc->hs_drv_props->drv_ringbuffer_size,
634		(void *)&props,
635		sizeof(struct vmstor_chan_props),
636		hv_storvsc_on_channel_callback, sc);
637
638	if (ret != 0) {
639		return ret;
640	}
641
642	ret = hv_storvsc_channel_init(sc);
643	return (ret);
644}
645
646#if HVS_HOST_RESET
647static int
648hv_storvsc_host_reset(struct storvsc_softc *sc)
649{
650	int ret = 0;
651
652	struct hv_storvsc_request *request;
653	struct vstor_packet *vstor_packet;
654
655	request = &sc->hs_reset_req;
656	request->softc = sc;
657	vstor_packet = &request->vstor_packet;
658
659	sema_init(&request->synch_sema, 0, "stor synch sema");
660
661	vstor_packet->operation = VSTOR_OPERATION_RESETBUS;
662	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
663
664	ret = vmbus_chan_send(dev->channel,
665	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
666	    vstor_packet, VSTOR_PKT_SIZE,
667	    (uint64_t)(uintptr_t)&sc->hs_reset_req);
668
669	if (ret != 0) {
670		goto cleanup;
671	}
672
673	sema_wait(&request->synch_sema);
674
675	/*
676	 * At this point, all outstanding requests in the adapter
677	 * should have been flushed out and return to us
678	 */
679
680cleanup:
681	sema_destroy(&request->synch_sema);
682	return (ret);
683}
684#endif /* HVS_HOST_RESET */
685
686/**
687 * @brief Function to initiate an I/O request
688 *
689 * @param device Hyper-V device pointer
690 * @param request pointer to a request structure
691 * @returns 0 on success, non-zero error on failure
692 */
693static int
694hv_storvsc_io_request(struct storvsc_softc *sc,
695					  struct hv_storvsc_request *request)
696{
697	struct vstor_packet *vstor_packet = &request->vstor_packet;
698	struct vmbus_channel* outgoing_channel = NULL;
699	int ret = 0, ch_sel;
700
701	vstor_packet->flags |= REQUEST_COMPLETION_FLAG;
702
703	vstor_packet->u.vm_srb.length =
704	    sizeof(struct vmscsi_req) - vmscsi_size_delta;
705
706	vstor_packet->u.vm_srb.sense_info_len = sense_buffer_size;
707
708	vstor_packet->u.vm_srb.transfer_len =
709	    request->prp_list.gpa_range.gpa_len;
710
711	vstor_packet->operation = VSTOR_OPERATION_EXECUTESRB;
712
713	ch_sel = (vstor_packet->u.vm_srb.lun + curcpu) % sc->hs_nchan;
714	/*
715	 * If we are panic'ing, then we are dumping core. Since storvsc_polls
716	 * always uses sc->hs_chan, then we must send to that channel or a poll
717	 * timeout will occur.
718	 */
719	if (KERNEL_PANICKED()) {
720		outgoing_channel = sc->hs_chan;
721	} else {
722		outgoing_channel = sc->hs_sel_chan[ch_sel];
723	}
724
725	mtx_unlock(&request->softc->hs_lock);
726	if (request->prp_list.gpa_range.gpa_len) {
727		ret = vmbus_chan_send_prplist(outgoing_channel,
728		    &request->prp_list.gpa_range, request->prp_cnt,
729		    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
730	} else {
731		ret = vmbus_chan_send(outgoing_channel,
732		    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
733		    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
734	}
735	/* statistic for successful request sending on each channel */
736	if (!ret) {
737		sc->sysctl_data.chan_send_cnt[ch_sel]++;
738	}
739	mtx_lock(&request->softc->hs_lock);
740
741	if (ret != 0) {
742		printf("Unable to send packet %p ret %d", vstor_packet, ret);
743	} else {
744		atomic_add_int(&sc->hs_num_out_reqs, 1);
745	}
746
747	return (ret);
748}
749
750
751/**
752 * Process IO_COMPLETION_OPERATION and ready
753 * the result to be completed for upper layer
754 * processing by the CAM layer.
755 */
756static void
757hv_storvsc_on_iocompletion(struct storvsc_softc *sc,
758			   struct vstor_packet *vstor_packet,
759			   struct hv_storvsc_request *request)
760{
761	struct vmscsi_req *vm_srb;
762
763	vm_srb = &vstor_packet->u.vm_srb;
764
765	/*
766	 * Copy some fields of the host's response into the request structure,
767	 * because the fields will be used later in storvsc_io_done().
768	 */
769	request->vstor_packet.u.vm_srb.scsi_status = vm_srb->scsi_status;
770	request->vstor_packet.u.vm_srb.srb_status = vm_srb->srb_status;
771	request->vstor_packet.u.vm_srb.transfer_len = vm_srb->transfer_len;
772
773	if (((vm_srb->scsi_status & 0xFF) == SCSI_STATUS_CHECK_COND) &&
774			(vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID)) {
775		/* Autosense data available */
776
777		KASSERT(vm_srb->sense_info_len <= request->sense_info_len,
778				("vm_srb->sense_info_len <= "
779				 "request->sense_info_len"));
780
781		memcpy(request->sense_data, vm_srb->u.sense_data,
782			vm_srb->sense_info_len);
783
784		request->sense_info_len = vm_srb->sense_info_len;
785	}
786
787	/* Complete request by passing to the CAM layer */
788	storvsc_io_done(request);
789	atomic_subtract_int(&sc->hs_num_out_reqs, 1);
790	if (sc->hs_drain_notify && (sc->hs_num_out_reqs == 0)) {
791		sema_post(&sc->hs_drain_sema);
792	}
793}
794
795static void
796hv_storvsc_rescan_target(struct storvsc_softc *sc)
797{
798	path_id_t pathid;
799	target_id_t targetid;
800	union ccb *ccb;
801
802	pathid = cam_sim_path(sc->hs_sim);
803	targetid = CAM_TARGET_WILDCARD;
804
805	/*
806	 * Allocate a CCB and schedule a rescan.
807	 */
808	ccb = xpt_alloc_ccb_nowait();
809	if (ccb == NULL) {
810		printf("unable to alloc CCB for rescan\n");
811		return;
812	}
813
814	if (xpt_create_path(&ccb->ccb_h.path, NULL, pathid, targetid,
815	    CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
816		printf("unable to create path for rescan, pathid: %u,"
817		    "targetid: %u\n", pathid, targetid);
818		xpt_free_ccb(ccb);
819		return;
820	}
821
822	if (targetid == CAM_TARGET_WILDCARD)
823		ccb->ccb_h.func_code = XPT_SCAN_BUS;
824	else
825		ccb->ccb_h.func_code = XPT_SCAN_TGT;
826
827	xpt_rescan(ccb);
828}
829
830static void
831hv_storvsc_on_channel_callback(struct vmbus_channel *channel, void *xsc)
832{
833	int ret = 0;
834	struct storvsc_softc *sc = xsc;
835	uint32_t bytes_recvd;
836	uint64_t request_id;
837	uint8_t packet[roundup2(sizeof(struct vstor_packet), 8)];
838	struct hv_storvsc_request *request;
839	struct vstor_packet *vstor_packet;
840
841	bytes_recvd = roundup2(VSTOR_PKT_SIZE, 8);
842	ret = vmbus_chan_recv(channel, packet, &bytes_recvd, &request_id);
843	KASSERT(ret != ENOBUFS, ("storvsc recvbuf is not large enough"));
844	/* XXX check bytes_recvd to make sure that it contains enough data */
845
846	while ((ret == 0) && (bytes_recvd > 0)) {
847		request = (struct hv_storvsc_request *)(uintptr_t)request_id;
848
849		if ((request == &sc->hs_init_req) ||
850			(request == &sc->hs_reset_req)) {
851			memcpy(&request->vstor_packet, packet,
852				   sizeof(struct vstor_packet));
853			sema_post(&request->synch_sema);
854		} else {
855			vstor_packet = (struct vstor_packet *)packet;
856			switch(vstor_packet->operation) {
857			case VSTOR_OPERATION_COMPLETEIO:
858				if (request == NULL)
859					panic("VMBUS: storvsc received a "
860					    "packet with NULL request id in "
861					    "COMPLETEIO operation.");
862
863				hv_storvsc_on_iocompletion(sc,
864							vstor_packet, request);
865				break;
866			case VSTOR_OPERATION_REMOVEDEVICE:
867				printf("VMBUS: storvsc operation %d not "
868				    "implemented.\n", vstor_packet->operation);
869				/* TODO: implement */
870				break;
871			case VSTOR_OPERATION_ENUMERATE_BUS:
872				hv_storvsc_rescan_target(sc);
873				break;
874			default:
875				break;
876			}
877		}
878
879		bytes_recvd = roundup2(VSTOR_PKT_SIZE, 8),
880		ret = vmbus_chan_recv(channel, packet, &bytes_recvd,
881		    &request_id);
882		KASSERT(ret != ENOBUFS,
883		    ("storvsc recvbuf is not large enough"));
884		/*
885		 * XXX check bytes_recvd to make sure that it contains
886		 * enough data
887		 */
888	}
889}
890
891/**
892 * @brief StorVSC probe function
893 *
894 * Device probe function.  Returns 0 if the input device is a StorVSC
895 * device.  Otherwise, a ENXIO is returned.  If the input device is
896 * for BlkVSC (paravirtual IDE) device and this support is disabled in
897 * favor of the emulated ATA/IDE device, return ENXIO.
898 *
899 * @param a device
900 * @returns 0 on success, ENXIO if not a matcing StorVSC device
901 */
902static int
903storvsc_probe(device_t dev)
904{
905	int ret	= ENXIO;
906
907	switch (storvsc_get_storage_type(dev)) {
908	case DRIVER_BLKVSC:
909		if(bootverbose)
910			device_printf(dev,
911			    "Enlightened ATA/IDE detected\n");
912		device_set_desc(dev, g_drv_props_table[DRIVER_BLKVSC].drv_desc);
913		ret = BUS_PROBE_DEFAULT;
914		break;
915	case DRIVER_STORVSC:
916		if(bootverbose)
917			device_printf(dev, "Enlightened SCSI device detected\n");
918		device_set_desc(dev, g_drv_props_table[DRIVER_STORVSC].drv_desc);
919		ret = BUS_PROBE_DEFAULT;
920		break;
921	default:
922		ret = ENXIO;
923	}
924	return (ret);
925}
926
927static void
928storvsc_create_chan_sel(struct storvsc_softc *sc)
929{
930	struct vmbus_channel **subch;
931	int i, nsubch;
932
933	sc->hs_sel_chan[0] = sc->hs_chan;
934	nsubch = sc->hs_nchan - 1;
935	if (nsubch == 0)
936		return;
937
938	subch = vmbus_subchan_get(sc->hs_chan, nsubch);
939	for (i = 0; i < nsubch; i++)
940		sc->hs_sel_chan[i + 1] = subch[i];
941	vmbus_subchan_rel(subch, nsubch);
942}
943
944static int
945storvsc_init_requests(device_t dev)
946{
947	struct storvsc_softc *sc = device_get_softc(dev);
948	struct hv_storvsc_request *reqp;
949	int error, i;
950
951	LIST_INIT(&sc->hs_free_list);
952
953	error = bus_dma_tag_create(
954		bus_get_dma_tag(dev),		/* parent */
955		1,				/* alignment */
956		PAGE_SIZE,			/* boundary */
957		BUS_SPACE_MAXADDR,		/* lowaddr */
958		BUS_SPACE_MAXADDR,		/* highaddr */
959		NULL, NULL,			/* filter, filterarg */
960		STORVSC_DATA_SIZE_MAX,		/* maxsize */
961		STORVSC_DATA_SEGCNT_MAX,	/* nsegments */
962		STORVSC_DATA_SEGSZ_MAX,		/* maxsegsize */
963		0,				/* flags */
964		NULL,				/* lockfunc */
965		NULL,				/* lockfuncarg */
966		&sc->storvsc_req_dtag);
967	if (error) {
968		device_printf(dev, "failed to create storvsc dma tag\n");
969		return (error);
970	}
971
972	for (i = 0; i < sc->hs_drv_props->drv_max_ios_per_target; ++i) {
973		reqp = malloc(sizeof(struct hv_storvsc_request),
974				 M_DEVBUF, M_WAITOK|M_ZERO);
975		reqp->softc = sc;
976		error = bus_dmamap_create(sc->storvsc_req_dtag, 0,
977				&reqp->data_dmap);
978		if (error) {
979			device_printf(dev, "failed to allocate storvsc "
980			    "data dmamap\n");
981			goto cleanup;
982		}
983		LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link);
984	}
985	return (0);
986
987cleanup:
988	while ((reqp = LIST_FIRST(&sc->hs_free_list)) != NULL) {
989		LIST_REMOVE(reqp, link);
990		bus_dmamap_destroy(sc->storvsc_req_dtag, reqp->data_dmap);
991		free(reqp, M_DEVBUF);
992	}
993	return (error);
994}
995
996static void
997storvsc_sysctl(device_t dev)
998{
999	struct sysctl_oid_list *child;
1000	struct sysctl_ctx_list *ctx;
1001	struct sysctl_oid *ch_tree, *chid_tree;
1002	struct storvsc_softc *sc;
1003	char name[16];
1004	int i;
1005
1006	sc = device_get_softc(dev);
1007	ctx = device_get_sysctl_ctx(dev);
1008	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
1009
1010	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "data_bio_cnt",
1011		CTLFLAG_RW | CTLFLAG_STATS, &sc->sysctl_data.data_bio_cnt,
1012		"# of bio data block");
1013	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "data_vaddr_cnt",
1014		CTLFLAG_RW | CTLFLAG_STATS, &sc->sysctl_data.data_vaddr_cnt,
1015		"# of vaddr data block");
1016	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "data_sg_cnt",
1017		CTLFLAG_RW | CTLFLAG_STATS, &sc->sysctl_data.data_sg_cnt,
1018		"# of sg data block");
1019
1020	/* dev.storvsc.UNIT.channel */
1021	ch_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "channel",
1022		CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
1023	if (ch_tree == NULL)
1024		return;
1025
1026	for (i = 0; i < sc->hs_nchan; i++) {
1027		uint32_t ch_id;
1028
1029		ch_id = vmbus_chan_id(sc->hs_sel_chan[i]);
1030		snprintf(name, sizeof(name), "%d", ch_id);
1031		/* dev.storvsc.UNIT.channel.CHID */
1032		chid_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(ch_tree),
1033			OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
1034		if (chid_tree == NULL)
1035			return;
1036		/* dev.storvsc.UNIT.channel.CHID.send_req */
1037		SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO,
1038			"send_req", CTLFLAG_RD, &sc->sysctl_data.chan_send_cnt[i],
1039			"# of request sending from this channel");
1040	}
1041}
1042
1043/**
1044 * @brief StorVSC attach function
1045 *
1046 * Function responsible for allocating per-device structures,
1047 * setting up CAM interfaces and scanning for available LUNs to
1048 * be used for SCSI device peripherals.
1049 *
1050 * @param a device
1051 * @returns 0 on success or an error on failure
1052 */
1053static int
1054storvsc_attach(device_t dev)
1055{
1056	enum hv_storage_type stor_type;
1057	struct storvsc_softc *sc;
1058	struct cam_devq *devq;
1059	int ret, i, j;
1060	struct hv_storvsc_request *reqp;
1061	struct root_hold_token *root_mount_token = NULL;
1062	struct hv_sgl_node *sgl_node = NULL;
1063	void *tmp_buff = NULL;
1064
1065	/*
1066	 * We need to serialize storvsc attach calls.
1067	 */
1068	root_mount_token = root_mount_hold("storvsc");
1069
1070	sc = device_get_softc(dev);
1071	sc->hs_nchan = 1;
1072	sc->hs_chan = vmbus_get_channel(dev);
1073
1074	stor_type = storvsc_get_storage_type(dev);
1075
1076	if (stor_type == DRIVER_UNKNOWN) {
1077		ret = ENODEV;
1078		goto cleanup;
1079	}
1080
1081	/* fill in driver specific properties */
1082	sc->hs_drv_props = &g_drv_props_table[stor_type];
1083	sc->hs_drv_props->drv_ringbuffer_size = hv_storvsc_ringbuffer_size;
1084	sc->hs_drv_props->drv_max_ios_per_target =
1085		MIN(STORVSC_MAX_IO, hv_storvsc_max_io);
1086	if (bootverbose) {
1087		printf("storvsc ringbuffer size: %d, max_io: %d\n",
1088			sc->hs_drv_props->drv_ringbuffer_size,
1089			sc->hs_drv_props->drv_max_ios_per_target);
1090	}
1091	/* fill in device specific properties */
1092	sc->hs_unit	= device_get_unit(dev);
1093	sc->hs_dev	= dev;
1094
1095	mtx_init(&sc->hs_lock, "hvslck", NULL, MTX_DEF);
1096
1097	ret = storvsc_init_requests(dev);
1098	if (ret != 0)
1099		goto cleanup;
1100
1101	/* create sg-list page pool */
1102	if (FALSE == g_hv_sgl_page_pool.is_init) {
1103		g_hv_sgl_page_pool.is_init = TRUE;
1104		LIST_INIT(&g_hv_sgl_page_pool.in_use_sgl_list);
1105		LIST_INIT(&g_hv_sgl_page_pool.free_sgl_list);
1106
1107		/*
1108		 * Pre-create SG list, each SG list with
1109		 * STORVSC_DATA_SEGCNT_MAX segments, each
1110		 * segment has one page buffer
1111		 */
1112		for (i = 0; i < sc->hs_drv_props->drv_max_ios_per_target; i++) {
1113	        	sgl_node = malloc(sizeof(struct hv_sgl_node),
1114			    M_DEVBUF, M_WAITOK|M_ZERO);
1115
1116			sgl_node->sgl_data = malloc(sizeof(struct hv_sglist),
1117			    M_DEVBUF, M_WAITOK|M_ZERO);
1118
1119			for (j = 0; j < STORVSC_DATA_SEGCNT_MAX; j++) {
1120				tmp_buff = malloc(PAGE_SIZE,
1121				    M_DEVBUF, M_WAITOK|M_ZERO);
1122
1123				sgl_node->sgl_data->sg_iov[j].iov_base =
1124				    tmp_buff;
1125			}
1126
1127			LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list,
1128			    sgl_node, link);
1129		}
1130	}
1131
1132	sc->hs_destroy = FALSE;
1133	sc->hs_drain_notify = FALSE;
1134	sema_init(&sc->hs_drain_sema, 0, "Store Drain Sema");
1135
1136	ret = hv_storvsc_connect_vsp(sc);
1137	if (ret != 0) {
1138		goto cleanup;
1139	}
1140
1141	/* Construct cpu to channel mapping */
1142	storvsc_create_chan_sel(sc);
1143
1144	/*
1145	 * Create the device queue.
1146	 * Hyper-V maps each target to one SCSI HBA
1147	 */
1148	devq = cam_simq_alloc(sc->hs_drv_props->drv_max_ios_per_target);
1149	if (devq == NULL) {
1150		device_printf(dev, "Failed to alloc device queue\n");
1151		ret = ENOMEM;
1152		goto cleanup;
1153	}
1154
1155	sc->hs_sim = cam_sim_alloc(storvsc_action,
1156				storvsc_poll,
1157				sc->hs_drv_props->drv_name,
1158				sc,
1159				sc->hs_unit,
1160				&sc->hs_lock, 1,
1161				sc->hs_drv_props->drv_max_ios_per_target,
1162				devq);
1163
1164	if (sc->hs_sim == NULL) {
1165		device_printf(dev, "Failed to alloc sim\n");
1166		cam_simq_free(devq);
1167		ret = ENOMEM;
1168		goto cleanup;
1169	}
1170
1171	mtx_lock(&sc->hs_lock);
1172	/* bus_id is set to 0, need to get it from VMBUS channel query? */
1173	if (xpt_bus_register(sc->hs_sim, dev, 0) != CAM_SUCCESS) {
1174		cam_sim_free(sc->hs_sim, /*free_devq*/TRUE);
1175		mtx_unlock(&sc->hs_lock);
1176		device_printf(dev, "Unable to register SCSI bus\n");
1177		ret = ENXIO;
1178		goto cleanup;
1179	}
1180
1181	if (xpt_create_path(&sc->hs_path, /*periph*/NULL,
1182		 cam_sim_path(sc->hs_sim),
1183		CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
1184		xpt_bus_deregister(cam_sim_path(sc->hs_sim));
1185		cam_sim_free(sc->hs_sim, /*free_devq*/TRUE);
1186		mtx_unlock(&sc->hs_lock);
1187		device_printf(dev, "Unable to create path\n");
1188		ret = ENXIO;
1189		goto cleanup;
1190	}
1191
1192	mtx_unlock(&sc->hs_lock);
1193
1194	storvsc_sysctl(dev);
1195
1196	root_mount_rel(root_mount_token);
1197	return (0);
1198
1199
1200cleanup:
1201	root_mount_rel(root_mount_token);
1202	while (!LIST_EMPTY(&sc->hs_free_list)) {
1203		reqp = LIST_FIRST(&sc->hs_free_list);
1204		LIST_REMOVE(reqp, link);
1205		bus_dmamap_destroy(sc->storvsc_req_dtag, reqp->data_dmap);
1206		free(reqp, M_DEVBUF);
1207	}
1208
1209	while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
1210		sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
1211		LIST_REMOVE(sgl_node, link);
1212		for (j = 0; j < STORVSC_DATA_SEGCNT_MAX; j++) {
1213			free(sgl_node->sgl_data->sg_iov[j].iov_base, M_DEVBUF);
1214		}
1215		free(sgl_node->sgl_data, M_DEVBUF);
1216		free(sgl_node, M_DEVBUF);
1217	}
1218
1219	return (ret);
1220}
1221
1222/**
1223 * @brief StorVSC device detach function
1224 *
1225 * This function is responsible for safely detaching a
1226 * StorVSC device.  This includes waiting for inbound responses
1227 * to complete and freeing associated per-device structures.
1228 *
1229 * @param dev a device
1230 * returns 0 on success
1231 */
1232static int
1233storvsc_detach(device_t dev)
1234{
1235	struct storvsc_softc *sc = device_get_softc(dev);
1236	struct hv_storvsc_request *reqp = NULL;
1237	struct hv_sgl_node *sgl_node = NULL;
1238	int j = 0;
1239
1240	sc->hs_destroy = TRUE;
1241
1242	/*
1243	 * At this point, all outbound traffic should be disabled. We
1244	 * only allow inbound traffic (responses) to proceed so that
1245	 * outstanding requests can be completed.
1246	 */
1247
1248	sc->hs_drain_notify = TRUE;
1249	sema_wait(&sc->hs_drain_sema);
1250	sc->hs_drain_notify = FALSE;
1251
1252	/*
1253	 * Since we have already drained, we don't need to busy wait.
1254	 * The call to close the channel will reset the callback
1255	 * under the protection of the incoming channel lock.
1256	 */
1257
1258	vmbus_chan_close(sc->hs_chan);
1259
1260	mtx_lock(&sc->hs_lock);
1261	while (!LIST_EMPTY(&sc->hs_free_list)) {
1262		reqp = LIST_FIRST(&sc->hs_free_list);
1263		LIST_REMOVE(reqp, link);
1264		bus_dmamap_destroy(sc->storvsc_req_dtag, reqp->data_dmap);
1265		free(reqp, M_DEVBUF);
1266	}
1267	mtx_unlock(&sc->hs_lock);
1268
1269	while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
1270		sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
1271		LIST_REMOVE(sgl_node, link);
1272		for (j = 0; j < STORVSC_DATA_SEGCNT_MAX; j++){
1273			free(sgl_node->sgl_data->sg_iov[j].iov_base, M_DEVBUF);
1274		}
1275		free(sgl_node->sgl_data, M_DEVBUF);
1276		free(sgl_node, M_DEVBUF);
1277	}
1278
1279	return (0);
1280}
1281
1282#if HVS_TIMEOUT_TEST
1283/**
1284 * @brief unit test for timed out operations
1285 *
1286 * This function provides unit testing capability to simulate
1287 * timed out operations.  Recompilation with HV_TIMEOUT_TEST=1
1288 * is required.
1289 *
1290 * @param reqp pointer to a request structure
1291 * @param opcode SCSI operation being performed
1292 * @param wait if 1, wait for I/O to complete
1293 */
1294static void
1295storvsc_timeout_test(struct hv_storvsc_request *reqp,
1296		uint8_t opcode, int wait)
1297{
1298	int ret;
1299	union ccb *ccb = reqp->ccb;
1300	struct storvsc_softc *sc = reqp->softc;
1301
1302	if (reqp->vstor_packet.vm_srb.cdb[0] != opcode) {
1303		return;
1304	}
1305
1306	if (wait) {
1307		mtx_lock(&reqp->event.mtx);
1308	}
1309	ret = hv_storvsc_io_request(sc, reqp);
1310	if (ret != 0) {
1311		if (wait) {
1312			mtx_unlock(&reqp->event.mtx);
1313		}
1314		printf("%s: io_request failed with %d.\n",
1315				__func__, ret);
1316		ccb->ccb_h.status = CAM_PROVIDE_FAIL;
1317		mtx_lock(&sc->hs_lock);
1318		storvsc_free_request(sc, reqp);
1319		xpt_done(ccb);
1320		mtx_unlock(&sc->hs_lock);
1321		return;
1322	}
1323
1324	if (wait) {
1325		xpt_print(ccb->ccb_h.path,
1326				"%u: %s: waiting for IO return.\n",
1327				ticks, __func__);
1328		ret = cv_timedwait(&reqp->event.cv, &reqp->event.mtx, 60*hz);
1329		mtx_unlock(&reqp->event.mtx);
1330		xpt_print(ccb->ccb_h.path, "%u: %s: %s.\n",
1331				ticks, __func__, (ret == 0)?
1332				"IO return detected" :
1333				"IO return not detected");
1334		/*
1335		 * Now both the timer handler and io done are running
1336		 * simultaneously. We want to confirm the io done always
1337		 * finishes after the timer handler exits. So reqp used by
1338		 * timer handler is not freed or stale. Do busy loop for
1339		 * another 1/10 second to make sure io done does
1340		 * wait for the timer handler to complete.
1341		 */
1342		DELAY(100*1000);
1343		mtx_lock(&sc->hs_lock);
1344		xpt_print(ccb->ccb_h.path,
1345				"%u: %s: finishing, queue frozen %d, "
1346				"ccb status 0x%x scsi_status 0x%x.\n",
1347				ticks, __func__, sc->hs_frozen,
1348				ccb->ccb_h.status,
1349				ccb->csio.scsi_status);
1350		mtx_unlock(&sc->hs_lock);
1351	}
1352}
1353#endif /* HVS_TIMEOUT_TEST */
1354
1355#ifdef notyet
1356/**
1357 * @brief timeout handler for requests
1358 *
1359 * This function is called as a result of a callout expiring.
1360 *
1361 * @param arg pointer to a request
1362 */
1363static void
1364storvsc_timeout(void *arg)
1365{
1366	struct hv_storvsc_request *reqp = arg;
1367	struct storvsc_softc *sc = reqp->softc;
1368	union ccb *ccb = reqp->ccb;
1369
1370	if (reqp->retries == 0) {
1371		mtx_lock(&sc->hs_lock);
1372		xpt_print(ccb->ccb_h.path,
1373		    "%u: IO timed out (req=0x%p), wait for another %u secs.\n",
1374		    ticks, reqp, ccb->ccb_h.timeout / 1000);
1375		cam_error_print(ccb, CAM_ESF_ALL, CAM_EPF_ALL);
1376		mtx_unlock(&sc->hs_lock);
1377
1378		reqp->retries++;
1379		callout_reset_sbt(&reqp->callout, SBT_1MS * ccb->ccb_h.timeout,
1380		    0, storvsc_timeout, reqp, 0);
1381#if HVS_TIMEOUT_TEST
1382		storvsc_timeout_test(reqp, SEND_DIAGNOSTIC, 0);
1383#endif
1384		return;
1385	}
1386
1387	mtx_lock(&sc->hs_lock);
1388	xpt_print(ccb->ccb_h.path,
1389		"%u: IO (reqp = 0x%p) did not return for %u seconds, %s.\n",
1390		ticks, reqp, ccb->ccb_h.timeout * (reqp->retries+1) / 1000,
1391		(sc->hs_frozen == 0)?
1392		"freezing the queue" : "the queue is already frozen");
1393	if (sc->hs_frozen == 0) {
1394		sc->hs_frozen = 1;
1395		xpt_freeze_simq(xpt_path_sim(ccb->ccb_h.path), 1);
1396	}
1397	mtx_unlock(&sc->hs_lock);
1398
1399#if HVS_TIMEOUT_TEST
1400	storvsc_timeout_test(reqp, MODE_SELECT_10, 1);
1401#endif
1402}
1403#endif
1404
1405/**
1406 * @brief StorVSC device poll function
1407 *
1408 * This function is responsible for servicing requests when
1409 * interrupts are disabled (i.e when we are dumping core.)
1410 *
1411 * @param sim a pointer to a CAM SCSI interface module
1412 */
1413static void
1414storvsc_poll(struct cam_sim *sim)
1415{
1416	struct storvsc_softc *sc = cam_sim_softc(sim);
1417
1418	mtx_assert(&sc->hs_lock, MA_OWNED);
1419	mtx_unlock(&sc->hs_lock);
1420	hv_storvsc_on_channel_callback(sc->hs_chan, sc);
1421	mtx_lock(&sc->hs_lock);
1422}
1423
1424/**
1425 * @brief StorVSC device action function
1426 *
1427 * This function is responsible for handling SCSI operations which
1428 * are passed from the CAM layer.  The requests are in the form of
1429 * CAM control blocks which indicate the action being performed.
1430 * Not all actions require converting the request to a VSCSI protocol
1431 * message - these actions can be responded to by this driver.
1432 * Requests which are destined for a backend storage device are converted
1433 * to a VSCSI protocol message and sent on the channel connection associated
1434 * with this device.
1435 *
1436 * @param sim pointer to a CAM SCSI interface module
1437 * @param ccb pointer to a CAM control block
1438 */
1439static void
1440storvsc_action(struct cam_sim *sim, union ccb *ccb)
1441{
1442	struct storvsc_softc *sc = cam_sim_softc(sim);
1443	int res;
1444
1445	mtx_assert(&sc->hs_lock, MA_OWNED);
1446	switch (ccb->ccb_h.func_code) {
1447	case XPT_PATH_INQ: {
1448		struct ccb_pathinq *cpi = &ccb->cpi;
1449
1450		cpi->version_num = 1;
1451		cpi->hba_inquiry = PI_TAG_ABLE|PI_SDTR_ABLE;
1452		cpi->target_sprt = 0;
1453		cpi->hba_misc = PIM_NOBUSRESET;
1454		if (hv_storvsc_use_pim_unmapped)
1455			cpi->hba_misc |= PIM_UNMAPPED;
1456		cpi->maxio = STORVSC_DATA_SIZE_MAX;
1457		cpi->hba_eng_cnt = 0;
1458		cpi->max_target = STORVSC_MAX_TARGETS;
1459		cpi->max_lun = sc->hs_drv_props->drv_max_luns_per_target;
1460		cpi->initiator_id = cpi->max_target;
1461		cpi->bus_id = cam_sim_bus(sim);
1462		cpi->base_transfer_speed = 300000;
1463		cpi->transport = XPORT_SAS;
1464		cpi->transport_version = 0;
1465		cpi->protocol = PROTO_SCSI;
1466		cpi->protocol_version = SCSI_REV_SPC2;
1467		strlcpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN);
1468		strlcpy(cpi->hba_vid, sc->hs_drv_props->drv_name, HBA_IDLEN);
1469		strlcpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN);
1470		cpi->unit_number = cam_sim_unit(sim);
1471
1472		ccb->ccb_h.status = CAM_REQ_CMP;
1473		xpt_done(ccb);
1474		return;
1475	}
1476	case XPT_GET_TRAN_SETTINGS: {
1477		struct  ccb_trans_settings *cts = &ccb->cts;
1478
1479		cts->transport = XPORT_SAS;
1480		cts->transport_version = 0;
1481		cts->protocol = PROTO_SCSI;
1482		cts->protocol_version = SCSI_REV_SPC2;
1483
1484		/* enable tag queuing and disconnected mode */
1485		cts->proto_specific.valid = CTS_SCSI_VALID_TQ;
1486		cts->proto_specific.scsi.valid = CTS_SCSI_VALID_TQ;
1487		cts->proto_specific.scsi.flags = CTS_SCSI_FLAGS_TAG_ENB;
1488		cts->xport_specific.valid = CTS_SPI_VALID_DISC;
1489		cts->xport_specific.spi.flags = CTS_SPI_FLAGS_DISC_ENB;
1490
1491		ccb->ccb_h.status = CAM_REQ_CMP;
1492		xpt_done(ccb);
1493		return;
1494	}
1495	case XPT_SET_TRAN_SETTINGS:	{
1496		ccb->ccb_h.status = CAM_REQ_CMP;
1497		xpt_done(ccb);
1498		return;
1499	}
1500	case XPT_CALC_GEOMETRY:{
1501		cam_calc_geometry(&ccb->ccg, 1);
1502		xpt_done(ccb);
1503		return;
1504	}
1505	case  XPT_RESET_BUS:
1506	case  XPT_RESET_DEV:{
1507#if HVS_HOST_RESET
1508		if ((res = hv_storvsc_host_reset(sc)) != 0) {
1509			xpt_print(ccb->ccb_h.path,
1510				"hv_storvsc_host_reset failed with %d\n", res);
1511			ccb->ccb_h.status = CAM_PROVIDE_FAIL;
1512			xpt_done(ccb);
1513			return;
1514		}
1515		ccb->ccb_h.status = CAM_REQ_CMP;
1516		xpt_done(ccb);
1517		return;
1518#else
1519		xpt_print(ccb->ccb_h.path,
1520				  "%s reset not supported.\n",
1521				  (ccb->ccb_h.func_code == XPT_RESET_BUS)?
1522				  "bus" : "dev");
1523		ccb->ccb_h.status = CAM_REQ_INVALID;
1524		xpt_done(ccb);
1525		return;
1526#endif	/* HVS_HOST_RESET */
1527	}
1528	case XPT_SCSI_IO:
1529	case XPT_IMMED_NOTIFY: {
1530		struct hv_storvsc_request *reqp = NULL;
1531		bus_dmamap_t dmap_saved;
1532
1533		if (ccb->csio.cdb_len == 0) {
1534			panic("cdl_len is 0\n");
1535		}
1536
1537		if (LIST_EMPTY(&sc->hs_free_list)) {
1538			ccb->ccb_h.status = CAM_REQUEUE_REQ;
1539			if (sc->hs_frozen == 0) {
1540				sc->hs_frozen = 1;
1541				xpt_freeze_simq(sim, /* count*/1);
1542			}
1543			xpt_done(ccb);
1544			return;
1545		}
1546
1547		reqp = LIST_FIRST(&sc->hs_free_list);
1548		LIST_REMOVE(reqp, link);
1549
1550		/* Save the data_dmap before reset request */
1551		dmap_saved = reqp->data_dmap;
1552
1553		/* XXX this is ugly */
1554		bzero(reqp, sizeof(struct hv_storvsc_request));
1555
1556		/* Restore necessary bits */
1557		reqp->data_dmap = dmap_saved;
1558		reqp->softc = sc;
1559
1560		ccb->ccb_h.status |= CAM_SIM_QUEUED;
1561		if ((res = create_storvsc_request(ccb, reqp)) != 0) {
1562			ccb->ccb_h.status = CAM_REQ_INVALID;
1563			xpt_done(ccb);
1564			return;
1565		}
1566
1567#ifdef notyet
1568		if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
1569			callout_init(&reqp->callout, 1);
1570			callout_reset_sbt(&reqp->callout,
1571			    SBT_1MS * ccb->ccb_h.timeout, 0,
1572			    storvsc_timeout, reqp, 0);
1573#if HVS_TIMEOUT_TEST
1574			cv_init(&reqp->event.cv, "storvsc timeout cv");
1575			mtx_init(&reqp->event.mtx, "storvsc timeout mutex",
1576					NULL, MTX_DEF);
1577			switch (reqp->vstor_packet.vm_srb.cdb[0]) {
1578				case MODE_SELECT_10:
1579				case SEND_DIAGNOSTIC:
1580					/* To have timer send the request. */
1581					return;
1582				default:
1583					break;
1584			}
1585#endif /* HVS_TIMEOUT_TEST */
1586		}
1587#endif
1588
1589		if ((res = hv_storvsc_io_request(sc, reqp)) != 0) {
1590			xpt_print(ccb->ccb_h.path,
1591				"hv_storvsc_io_request failed with %d\n", res);
1592			ccb->ccb_h.status = CAM_PROVIDE_FAIL;
1593			storvsc_free_request(sc, reqp);
1594			xpt_done(ccb);
1595			return;
1596		}
1597		return;
1598	}
1599
1600	default:
1601		ccb->ccb_h.status = CAM_REQ_INVALID;
1602		xpt_done(ccb);
1603		return;
1604	}
1605}
1606
1607/**
1608 * @brief destroy bounce buffer
1609 *
1610 * This function is responsible for destroy a Scatter/Gather list
1611 * that create by storvsc_create_bounce_buffer()
1612 *
1613 * @param sgl- the Scatter/Gather need be destroy
1614 * @param sg_count- page count of the SG list.
1615 *
1616 */
1617static void
1618storvsc_destroy_bounce_buffer(struct hv_sglist *sgl)
1619{
1620	struct hv_sgl_node *sgl_node = NULL;
1621	if (LIST_EMPTY(&g_hv_sgl_page_pool.in_use_sgl_list)) {
1622		printf("storvsc error: not enough in use sgl\n");
1623		return;
1624	}
1625	sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.in_use_sgl_list);
1626	LIST_REMOVE(sgl_node, link);
1627	sgl_node->sgl_data = sgl;
1628	LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list, sgl_node, link);
1629}
1630
1631/**
1632 * @brief create bounce buffer
1633 *
1634 * This function is responsible for create a Scatter/Gather list,
1635 * which hold several pages that can be aligned with page size.
1636 *
1637 * @param seg_count- SG-list segments count
1638 * @param write - if WRITE_TYPE, set SG list page used size to 0,
1639 * otherwise set used size to page size.
1640 *
1641 * return NULL if create failed
1642 */
1643static struct hv_sglist *
1644storvsc_create_bounce_buffer(uint16_t seg_count, int write)
1645{
1646	int i = 0;
1647	struct hv_sglist *bounce_sgl = NULL;
1648	unsigned int buf_len = ((write == WRITE_TYPE) ? 0 : PAGE_SIZE);
1649	struct hv_sgl_node *sgl_node = NULL;
1650
1651	/* get struct hv_sglist from free_sgl_list */
1652	if (LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
1653		printf("storvsc error: not enough free sgl\n");
1654		return NULL;
1655	}
1656	sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
1657	LIST_REMOVE(sgl_node, link);
1658	bounce_sgl = sgl_node->sgl_data;
1659	LIST_INSERT_HEAD(&g_hv_sgl_page_pool.in_use_sgl_list, sgl_node, link);
1660
1661	bounce_sgl->sg_maxseg = seg_count;
1662
1663	if (write == WRITE_TYPE)
1664		bounce_sgl->sg_nseg = 0;
1665	else
1666		bounce_sgl->sg_nseg = seg_count;
1667
1668	for (i = 0; i < seg_count; i++)
1669	        bounce_sgl->sg_iov[i].iov_len = buf_len;
1670
1671	return bounce_sgl;
1672}
1673
1674/**
1675 * @brief copy data from SG list to bounce buffer
1676 *
1677 * This function is responsible for copy data from one SG list's segments
1678 * to another SG list which used as bounce buffer.
1679 *
1680 * @param bounce_sgl - the destination SG list
1681 * @param orig_sgl - the segment of the source SG list.
1682 * @param orig_sgl_count - the count of segments.
1683 * @param orig_sgl_count - indicate which segment need bounce buffer,
1684 *  set 1 means need.
1685 *
1686 */
1687static void
1688storvsc_copy_sgl_to_bounce_buf(struct hv_sglist *bounce_sgl,
1689			       bus_dma_segment_t *orig_sgl,
1690			       unsigned int orig_sgl_count,
1691			       uint64_t seg_bits)
1692{
1693	int src_sgl_idx = 0;
1694
1695	for (src_sgl_idx = 0; src_sgl_idx < orig_sgl_count; src_sgl_idx++) {
1696		if (seg_bits & (1 << src_sgl_idx)) {
1697			memcpy(bounce_sgl->sg_iov[src_sgl_idx].iov_base,
1698			    (void*)orig_sgl[src_sgl_idx].ds_addr,
1699			    orig_sgl[src_sgl_idx].ds_len);
1700
1701			bounce_sgl->sg_iov[src_sgl_idx].iov_len =
1702			    orig_sgl[src_sgl_idx].ds_len;
1703		}
1704	}
1705}
1706
1707/**
1708 * @brief copy data from SG list which used as bounce to another SG list
1709 *
1710 * This function is responsible for copy data from one SG list with bounce
1711 * buffer to another SG list's segments.
1712 *
1713 * @param dest_sgl - the destination SG list's segments
1714 * @param dest_sgl_count - the count of destination SG list's segment.
1715 * @param src_sgl - the source SG list.
1716 * @param seg_bits - indicate which segment used bounce buffer of src SG-list.
1717 *
1718 */
1719void
1720storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl,
1721				    unsigned int dest_sgl_count,
1722				    struct hv_sglist* src_sgl,
1723				    uint64_t seg_bits)
1724{
1725	int sgl_idx = 0;
1726
1727	for (sgl_idx = 0; sgl_idx < dest_sgl_count; sgl_idx++) {
1728		if (seg_bits & (1 << sgl_idx)) {
1729			memcpy((void*)(dest_sgl[sgl_idx].ds_addr),
1730			    src_sgl->sg_iov[sgl_idx].iov_base,
1731			    src_sgl->sg_iov[sgl_idx].iov_len);
1732		}
1733	}
1734}
1735
1736/**
1737 * @brief check SG list with bounce buffer or not
1738 *
1739 * This function is responsible for check if need bounce buffer for SG list.
1740 *
1741 * @param sgl - the SG list's segments
1742 * @param sg_count - the count of SG list's segment.
1743 * @param bits - segmengs number that need bounce buffer
1744 *
1745 * return -1 if SG list needless bounce buffer
1746 */
1747static int
1748storvsc_check_bounce_buffer_sgl(bus_dma_segment_t *sgl,
1749				unsigned int sg_count,
1750				uint64_t *bits)
1751{
1752	int i = 0;
1753	int offset = 0;
1754	uint64_t phys_addr = 0;
1755	uint64_t tmp_bits = 0;
1756	boolean_t found_hole = FALSE;
1757	boolean_t pre_aligned = TRUE;
1758
1759	if (sg_count < 2){
1760		return -1;
1761	}
1762
1763	*bits = 0;
1764
1765	phys_addr = vtophys(sgl[0].ds_addr);
1766	offset =  phys_addr - trunc_page(phys_addr);
1767
1768	if (offset != 0) {
1769		pre_aligned = FALSE;
1770		tmp_bits |= 1;
1771	}
1772
1773	for (i = 1; i < sg_count; i++) {
1774		phys_addr = vtophys(sgl[i].ds_addr);
1775		offset =  phys_addr - trunc_page(phys_addr);
1776
1777		if (offset == 0) {
1778			if (FALSE == pre_aligned){
1779				/*
1780				 * This segment is aligned, if the previous
1781				 * one is not aligned, find a hole
1782				 */
1783				found_hole = TRUE;
1784			}
1785			pre_aligned = TRUE;
1786		} else {
1787			tmp_bits |= 1ULL << i;
1788			if (!pre_aligned) {
1789				if (phys_addr != vtophys(sgl[i-1].ds_addr +
1790				    sgl[i-1].ds_len)) {
1791					/*
1792					 * Check whether connect to previous
1793					 * segment,if not, find the hole
1794					 */
1795					found_hole = TRUE;
1796				}
1797			} else {
1798				found_hole = TRUE;
1799			}
1800			pre_aligned = FALSE;
1801		}
1802	}
1803
1804	if (!found_hole) {
1805		return (-1);
1806	} else {
1807		*bits = tmp_bits;
1808		return 0;
1809	}
1810}
1811
1812/**
1813 * Copy bus_dma segments to multiple page buffer, which requires
1814 * the pages are compact composed except for the 1st and last pages.
1815 */
1816static void
1817storvsc_xferbuf_prepare(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
1818{
1819	struct hv_storvsc_request *reqp = arg;
1820	union ccb *ccb = reqp->ccb;
1821	struct ccb_scsiio *csio = &ccb->csio;
1822	struct storvsc_gpa_range *prplist;
1823	int i;
1824
1825	prplist = &reqp->prp_list;
1826	prplist->gpa_range.gpa_len = csio->dxfer_len;
1827	prplist->gpa_range.gpa_ofs = segs[0].ds_addr & PAGE_MASK;
1828
1829	for (i = 0; i < nsegs; i++) {
1830#ifdef INVARIANTS
1831#if !defined(__aarch64__)
1832		if (nsegs > 1) {
1833			if (i == 0) {
1834				KASSERT((segs[i].ds_addr & PAGE_MASK) +
1835				    segs[i].ds_len == PAGE_SIZE,
1836				    ("invalid 1st page, ofs 0x%jx, len %zu",
1837				     (uintmax_t)segs[i].ds_addr,
1838				     segs[i].ds_len));
1839			} else if (i == nsegs - 1) {
1840				KASSERT((segs[i].ds_addr & PAGE_MASK) == 0,
1841				    ("invalid last page, ofs 0x%jx",
1842				     (uintmax_t)segs[i].ds_addr));
1843			} else {
1844				KASSERT((segs[i].ds_addr & PAGE_MASK) == 0 &&
1845				    segs[i].ds_len == PAGE_SIZE,
1846				    ("not a full page, ofs 0x%jx, len %zu",
1847				     (uintmax_t)segs[i].ds_addr,
1848				     segs[i].ds_len));
1849			}
1850		}
1851#endif
1852#endif
1853		prplist->gpa_page[i] = atop(segs[i].ds_addr);
1854	}
1855	reqp->prp_cnt = nsegs;
1856
1857	if ((ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE) {
1858		bus_dmasync_op_t op;
1859
1860		if ((ccb->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN)
1861			op = BUS_DMASYNC_PREREAD;
1862		else
1863			op = BUS_DMASYNC_PREWRITE;
1864
1865		bus_dmamap_sync(reqp->softc->storvsc_req_dtag,
1866		    reqp->data_dmap, op);
1867	}
1868}
1869
1870/**
1871 * @brief Fill in a request structure based on a CAM control block
1872 *
1873 * Fills in a request structure based on the contents of a CAM control
1874 * block.  The request structure holds the payload information for
1875 * VSCSI protocol request.
1876 *
1877 * @param ccb pointer to a CAM contorl block
1878 * @param reqp pointer to a request structure
1879 */
1880static int
1881create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp)
1882{
1883	struct ccb_scsiio *csio = &ccb->csio;
1884	uint64_t phys_addr;
1885	uint32_t pfn;
1886	uint64_t not_aligned_seg_bits = 0;
1887	int error;
1888
1889	/* refer to struct vmscsi_req for meanings of these two fields */
1890	reqp->vstor_packet.u.vm_srb.port =
1891		cam_sim_unit(xpt_path_sim(ccb->ccb_h.path));
1892	reqp->vstor_packet.u.vm_srb.path_id =
1893		cam_sim_bus(xpt_path_sim(ccb->ccb_h.path));
1894
1895	reqp->vstor_packet.u.vm_srb.target_id = ccb->ccb_h.target_id;
1896	reqp->vstor_packet.u.vm_srb.lun = ccb->ccb_h.target_lun;
1897
1898	reqp->vstor_packet.u.vm_srb.cdb_len = csio->cdb_len;
1899	if(ccb->ccb_h.flags & CAM_CDB_POINTER) {
1900		memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_ptr,
1901			csio->cdb_len);
1902	} else {
1903		memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_bytes,
1904			csio->cdb_len);
1905	}
1906
1907	if (hv_storvsc_use_win8ext_flags) {
1908		reqp->vstor_packet.u.vm_srb.win8_extension.time_out_value = 60;
1909		reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |=
1910			SRB_FLAGS_DISABLE_SYNCH_TRANSFER;
1911	}
1912	switch (ccb->ccb_h.flags & CAM_DIR_MASK) {
1913	case CAM_DIR_OUT:
1914		reqp->vstor_packet.u.vm_srb.data_in = WRITE_TYPE;
1915		if (hv_storvsc_use_win8ext_flags) {
1916			reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |=
1917				SRB_FLAGS_DATA_OUT;
1918		}
1919		break;
1920	case CAM_DIR_IN:
1921		reqp->vstor_packet.u.vm_srb.data_in = READ_TYPE;
1922		if (hv_storvsc_use_win8ext_flags) {
1923			reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |=
1924				SRB_FLAGS_DATA_IN;
1925		}
1926		break;
1927	case CAM_DIR_NONE:
1928		reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE;
1929		if (hv_storvsc_use_win8ext_flags) {
1930			reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |=
1931				SRB_FLAGS_NO_DATA_TRANSFER;
1932		}
1933		break;
1934	default:
1935		printf("Error: unexpected data direction: 0x%x\n",
1936			ccb->ccb_h.flags & CAM_DIR_MASK);
1937		return (EINVAL);
1938	}
1939
1940	reqp->sense_data     = &csio->sense_data;
1941	reqp->sense_info_len = csio->sense_len;
1942
1943	reqp->ccb = ccb;
1944	ccb->ccb_h.spriv_ptr0 = reqp;
1945
1946	if (0 == csio->dxfer_len) {
1947		return (0);
1948	}
1949
1950	switch (ccb->ccb_h.flags & CAM_DATA_MASK) {
1951	case CAM_DATA_BIO:
1952	case CAM_DATA_VADDR:
1953		error = bus_dmamap_load_ccb(reqp->softc->storvsc_req_dtag,
1954		    reqp->data_dmap, ccb, storvsc_xferbuf_prepare, reqp,
1955		    BUS_DMA_NOWAIT);
1956		if (error) {
1957			xpt_print(ccb->ccb_h.path,
1958			    "bus_dmamap_load_ccb failed: %d\n", error);
1959			return (error);
1960		}
1961		if ((ccb->ccb_h.flags & CAM_DATA_MASK) == CAM_DATA_BIO)
1962			reqp->softc->sysctl_data.data_bio_cnt++;
1963		else
1964			reqp->softc->sysctl_data.data_vaddr_cnt++;
1965		break;
1966
1967	case CAM_DATA_SG:
1968	{
1969		struct storvsc_gpa_range *prplist;
1970		int i = 0;
1971		int offset = 0;
1972		int ret;
1973
1974		bus_dma_segment_t *storvsc_sglist =
1975		    (bus_dma_segment_t *)ccb->csio.data_ptr;
1976		u_int16_t storvsc_sg_count = ccb->csio.sglist_cnt;
1977
1978		prplist = &reqp->prp_list;
1979		prplist->gpa_range.gpa_len = csio->dxfer_len;
1980
1981		printf("Storvsc: get SG I/O operation, %d\n",
1982		    reqp->vstor_packet.u.vm_srb.data_in);
1983
1984		if (storvsc_sg_count > STORVSC_DATA_SEGCNT_MAX){
1985			printf("Storvsc: %d segments is too much, "
1986			    "only support %d segments\n",
1987			    storvsc_sg_count, STORVSC_DATA_SEGCNT_MAX);
1988			return (EINVAL);
1989		}
1990
1991		/*
1992		 * We create our own bounce buffer function currently. Idealy
1993		 * we should use BUS_DMA(9) framework. But with current BUS_DMA
1994		 * code there is no callback API to check the page alignment of
1995		 * middle segments before busdma can decide if a bounce buffer
1996		 * is needed for particular segment. There is callback,
1997		 * "bus_dma_filter_t *filter", but the parrameters are not
1998		 * sufficient for storvsc driver.
1999		 * TODO:
2000		 *	Add page alignment check in BUS_DMA(9) callback. Once
2001		 *	this is complete, switch the following code to use
2002		 *	BUS_DMA(9) for storvsc bounce buffer support.
2003		 */
2004		/* check if we need to create bounce buffer */
2005		ret = storvsc_check_bounce_buffer_sgl(storvsc_sglist,
2006		    storvsc_sg_count, &not_aligned_seg_bits);
2007		if (ret != -1) {
2008			reqp->bounce_sgl =
2009			    storvsc_create_bounce_buffer(storvsc_sg_count,
2010			    reqp->vstor_packet.u.vm_srb.data_in);
2011			if (NULL == reqp->bounce_sgl) {
2012				printf("Storvsc_error: "
2013				    "create bounce buffer failed.\n");
2014				return (ENOMEM);
2015			}
2016
2017			reqp->bounce_sgl_count = storvsc_sg_count;
2018			reqp->not_aligned_seg_bits = not_aligned_seg_bits;
2019
2020			/*
2021			 * if it is write, we need copy the original data
2022			 *to bounce buffer
2023			 */
2024			if (WRITE_TYPE == reqp->vstor_packet.u.vm_srb.data_in) {
2025				storvsc_copy_sgl_to_bounce_buf(
2026				    reqp->bounce_sgl,
2027				    storvsc_sglist,
2028				    storvsc_sg_count,
2029				    reqp->not_aligned_seg_bits);
2030			}
2031
2032			/* transfer virtual address to physical frame number */
2033			if (reqp->not_aligned_seg_bits & 0x1){
2034 				phys_addr =
2035				    vtophys(reqp->bounce_sgl->sg_iov[0].iov_base);
2036			}else{
2037 				phys_addr =
2038					vtophys(storvsc_sglist[0].ds_addr);
2039			}
2040			prplist->gpa_range.gpa_ofs = phys_addr & PAGE_MASK;
2041
2042			pfn = phys_addr >> PAGE_SHIFT;
2043			prplist->gpa_page[0] = pfn;
2044
2045			for (i = 1; i < storvsc_sg_count; i++) {
2046				if (reqp->not_aligned_seg_bits & (1 << i)) {
2047					phys_addr =
2048					    vtophys(reqp->bounce_sgl->sg_iov[i].iov_base);
2049				} else {
2050					phys_addr =
2051					    vtophys(storvsc_sglist[i].ds_addr);
2052				}
2053
2054				pfn = phys_addr >> PAGE_SHIFT;
2055				prplist->gpa_page[i] = pfn;
2056			}
2057			reqp->prp_cnt = i;
2058		} else {
2059			phys_addr = vtophys(storvsc_sglist[0].ds_addr);
2060
2061			prplist->gpa_range.gpa_ofs = phys_addr & PAGE_MASK;
2062
2063			for (i = 0; i < storvsc_sg_count; i++) {
2064				phys_addr = vtophys(storvsc_sglist[i].ds_addr);
2065				pfn = phys_addr >> PAGE_SHIFT;
2066				prplist->gpa_page[i] = pfn;
2067			}
2068			reqp->prp_cnt = i;
2069
2070			/* check the last segment cross boundary or not */
2071			offset = phys_addr & PAGE_MASK;
2072			if (offset) {
2073				/* Add one more PRP entry */
2074				phys_addr =
2075				    vtophys(storvsc_sglist[i-1].ds_addr +
2076				    PAGE_SIZE - offset);
2077				pfn = phys_addr >> PAGE_SHIFT;
2078				prplist->gpa_page[i] = pfn;
2079				reqp->prp_cnt++;
2080			}
2081
2082			reqp->bounce_sgl_count = 0;
2083		}
2084		reqp->softc->sysctl_data.data_sg_cnt++;
2085		break;
2086	}
2087	default:
2088		printf("Unknow flags: %d\n", ccb->ccb_h.flags);
2089		return(EINVAL);
2090	}
2091
2092	return(0);
2093}
2094
2095static uint32_t
2096is_scsi_valid(const struct scsi_inquiry_data *inq_data)
2097{
2098	u_int8_t type;
2099
2100	type = SID_TYPE(inq_data);
2101	if (type == T_NODEVICE)
2102		return (0);
2103	if (SID_QUAL(inq_data) == SID_QUAL_BAD_LU)
2104		return (0);
2105	return (1);
2106}
2107
2108/**
2109 * @brief completion function before returning to CAM
2110 *
2111 * I/O process has been completed and the result needs
2112 * to be passed to the CAM layer.
2113 * Free resources related to this request.
2114 *
2115 * @param reqp pointer to a request structure
2116 */
2117static void
2118storvsc_io_done(struct hv_storvsc_request *reqp)
2119{
2120	union ccb *ccb = reqp->ccb;
2121	struct ccb_scsiio *csio = &ccb->csio;
2122	struct storvsc_softc *sc = reqp->softc;
2123	struct vmscsi_req *vm_srb = &reqp->vstor_packet.u.vm_srb;
2124	bus_dma_segment_t *ori_sglist = NULL;
2125	int ori_sg_count = 0;
2126	const struct scsi_generic *cmd;
2127
2128	if ((ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE) {
2129		bus_dmasync_op_t op;
2130
2131		if ((ccb->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN)
2132			op = BUS_DMASYNC_POSTREAD;
2133		else
2134			op = BUS_DMASYNC_POSTWRITE;
2135
2136		bus_dmamap_sync(reqp->softc->storvsc_req_dtag,
2137		    reqp->data_dmap, op);
2138		bus_dmamap_unload(sc->storvsc_req_dtag, reqp->data_dmap);
2139	}
2140
2141	/* destroy bounce buffer if it is used */
2142	if (reqp->bounce_sgl_count) {
2143		ori_sglist = (bus_dma_segment_t *)ccb->csio.data_ptr;
2144		ori_sg_count = ccb->csio.sglist_cnt;
2145
2146		/*
2147		 * If it is READ operation, we should copy back the data
2148		 * to original SG list.
2149		 */
2150		if (READ_TYPE == reqp->vstor_packet.u.vm_srb.data_in) {
2151			storvsc_copy_from_bounce_buf_to_sgl(ori_sglist,
2152			    ori_sg_count,
2153			    reqp->bounce_sgl,
2154			    reqp->not_aligned_seg_bits);
2155		}
2156
2157		storvsc_destroy_bounce_buffer(reqp->bounce_sgl);
2158		reqp->bounce_sgl_count = 0;
2159	}
2160
2161	if (reqp->retries > 0) {
2162		mtx_lock(&sc->hs_lock);
2163#if HVS_TIMEOUT_TEST
2164		xpt_print(ccb->ccb_h.path,
2165			"%u: IO returned after timeout, "
2166			"waking up timer handler if any.\n", ticks);
2167		mtx_lock(&reqp->event.mtx);
2168		cv_signal(&reqp->event.cv);
2169		mtx_unlock(&reqp->event.mtx);
2170#endif
2171		reqp->retries = 0;
2172		xpt_print(ccb->ccb_h.path,
2173			"%u: IO returned after timeout, "
2174			"stopping timer if any.\n", ticks);
2175		mtx_unlock(&sc->hs_lock);
2176	}
2177
2178#ifdef notyet
2179	/*
2180	 * callout_drain() will wait for the timer handler to finish
2181	 * if it is running. So we don't need any lock to synchronize
2182	 * between this routine and the timer handler.
2183	 * Note that we need to make sure reqp is not freed when timer
2184	 * handler is using or will use it.
2185	 */
2186	if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
2187		callout_drain(&reqp->callout);
2188	}
2189#endif
2190	cmd = (const struct scsi_generic *)
2191	    ((ccb->ccb_h.flags & CAM_CDB_POINTER) ?
2192	     csio->cdb_io.cdb_ptr : csio->cdb_io.cdb_bytes);
2193
2194	ccb->ccb_h.status &= ~CAM_SIM_QUEUED;
2195	ccb->ccb_h.status &= ~CAM_STATUS_MASK;
2196	int srb_status = SRB_STATUS(vm_srb->srb_status);
2197#ifdef DIAGNOSTIC
2198	if (hv_storvsc_srb_status != -1) {
2199		srb_status = SRB_STATUS(hv_storvsc_srb_status & 0x3f);
2200		hv_storvsc_srb_status = -1;
2201	}
2202#endif /* DIAGNOSTIC */
2203	if (vm_srb->scsi_status == SCSI_STATUS_OK) {
2204		if (srb_status != SRB_STATUS_SUCCESS) {
2205			bool log_error = true;
2206			switch (srb_status) {
2207				case SRB_STATUS_PENDING:
2208					/* We should never get this */
2209					panic("storvsc_io_done: SRB_STATUS_PENDING");
2210					break;
2211				case SRB_STATUS_ABORTED:
2212					/*
2213					 * storvsc doesn't support aborts yet
2214					 * but if we ever get this status
2215					 * the I/O is complete - treat it as a
2216					 * timeout
2217					 */
2218					ccb->ccb_h.status |= CAM_CMD_TIMEOUT;
2219					break;
2220				case SRB_STATUS_ABORT_FAILED:
2221					/* We should never get this */
2222					panic("storvsc_io_done: SRB_STATUS_ABORT_FAILED");
2223					break;
2224				case SRB_STATUS_ERROR:
2225					/*
2226					 * We should never get this.
2227					 * Treat it as a CAM_UNREC_HBA_ERROR.
2228					 * It will be retried
2229					 */
2230					ccb->ccb_h.status |= CAM_UNREC_HBA_ERROR;
2231					break;
2232				case SRB_STATUS_BUSY:
2233					/* Host is busy. Delay and retry */
2234					ccb->ccb_h.status |= CAM_BUSY;
2235					break;
2236				case SRB_STATUS_INVALID_REQUEST:
2237				case SRB_STATUS_INVALID_PATH_ID:
2238				case SRB_STATUS_NO_DEVICE:
2239				case SRB_STATUS_INVALID_TARGET_ID:
2240					/*
2241					 * These indicate an invalid address
2242					 * and really should never be seen.
2243					 * A CAM_PATH_INVALID could be
2244					 * used here but I want to run
2245					 * down retries.  Do a CAM_BUSY
2246					 * since the host might be having issues.
2247					 */
2248					ccb->ccb_h.status |= CAM_BUSY;
2249					break;
2250				case SRB_STATUS_TIMEOUT:
2251				case SRB_STATUS_COMMAND_TIMEOUT:
2252					/* The backend has timed this out */
2253					ccb->ccb_h.status |= CAM_BUSY;
2254					break;
2255				/* Some old pSCSI errors below */
2256				case SRB_STATUS_SELECTION_TIMEOUT:
2257				case SRB_STATUS_MESSAGE_REJECTED:
2258				case SRB_STATUS_PARITY_ERROR:
2259				case SRB_STATUS_NO_HBA:
2260				case SRB_STATUS_DATA_OVERRUN:
2261				case SRB_STATUS_UNEXPECTED_BUS_FREE:
2262				case SRB_STATUS_PHASE_SEQUENCE_FAILURE:
2263					/*
2264					 * Old pSCSI responses, should never get.
2265					 * If we do treat as a CAM_UNREC_HBA_ERROR
2266					 * which will be retried
2267					 */
2268					ccb->ccb_h.status |= CAM_UNREC_HBA_ERROR;
2269					break;
2270				case SRB_STATUS_BUS_RESET:
2271					ccb->ccb_h.status |= CAM_SCSI_BUS_RESET;
2272					break;
2273				case SRB_STATUS_BAD_SRB_BLOCK_LENGTH:
2274					/*
2275					 * The request block is malformed and
2276					 * I doubt it is from the guest. Just retry.
2277					 */
2278					ccb->ccb_h.status |= CAM_UNREC_HBA_ERROR;
2279					break;
2280				/* Not used statuses just retry */
2281				case SRB_STATUS_REQUEST_FLUSHED:
2282				case SRB_STATUS_BAD_FUNCTION:
2283				case SRB_STATUS_NOT_POWERED:
2284					ccb->ccb_h.status |= CAM_UNREC_HBA_ERROR;
2285					break;
2286				case SRB_STATUS_INVALID_LUN:
2287					/*
2288					 * Don't log an EMS for this response since
2289					 * there is no device at this LUN. This is a
2290					 * normal and expected response when a device
2291					 * is detached.
2292					 */
2293					ccb->ccb_h.status |= CAM_DEV_NOT_THERE;
2294					log_error = false;
2295					break;
2296				case SRB_STATUS_ERROR_RECOVERY:
2297				case SRB_STATUS_LINK_DOWN:
2298					/*
2299					 * I don't ever expect these from
2300					 * the host but if we ever get
2301					 * retry after a delay
2302					 */
2303					ccb->ccb_h.status |= CAM_BUSY;
2304					break;
2305				default:
2306					/*
2307					 * An undefined response assert on
2308					 * on debug builds else retry
2309					 */
2310					ccb->ccb_h.status |= CAM_UNREC_HBA_ERROR;
2311					KASSERT(srb_status <= SRB_STATUS_LINK_DOWN,
2312					    ("storvsc: %s, unexpected srb_status of 0x%x",
2313					    __func__, srb_status));
2314					break;
2315			}
2316			if (log_error) {
2317				xpt_print(ccb->ccb_h.path, "The hypervisor's I/O adapter "
2318					"driver received an unexpected response code 0x%x "
2319					"for operation: %s. If this continues to occur, "
2320					"report the condition to your hypervisor vendor so "
2321					"they can rectify the issue.\n", srb_status,
2322					scsi_op_desc(cmd->opcode, NULL));
2323			}
2324		} else {
2325			ccb->ccb_h.status |= CAM_REQ_CMP;
2326		}
2327
2328		if (cmd->opcode == INQUIRY &&
2329		    srb_status == SRB_STATUS_SUCCESS) {
2330			int resp_xfer_len, resp_buf_len, data_len;
2331			uint8_t *resp_buf = (uint8_t *)csio->data_ptr;
2332			struct scsi_inquiry_data *inq_data =
2333			    (struct scsi_inquiry_data *)csio->data_ptr;
2334
2335			/* Get the buffer length reported by host */
2336			resp_xfer_len = vm_srb->transfer_len;
2337
2338			/* Get the available buffer length */
2339			resp_buf_len = resp_xfer_len >= 5 ? resp_buf[4] + 5 : 0;
2340			data_len = (resp_buf_len < resp_xfer_len) ?
2341			    resp_buf_len : resp_xfer_len;
2342			if (bootverbose && data_len >= 5) {
2343				xpt_print(ccb->ccb_h.path, "storvsc inquiry "
2344				    "(%d) [%x %x %x %x %x ... ]\n", data_len,
2345				    resp_buf[0], resp_buf[1], resp_buf[2],
2346				    resp_buf[3], resp_buf[4]);
2347			}
2348			/*
2349			 * XXX: Hyper-V (since win2012r2) responses inquiry with
2350			 * unknown version (0) for GEN-2 DVD device.
2351			 * Manually set the version number to SPC3 in order to
2352			 * ask CAM to continue probing with "PROBE_REPORT_LUNS".
2353			 * see probedone() in scsi_xpt.c
2354			 */
2355			if (SID_TYPE(inq_data) == T_CDROM &&
2356			    inq_data->version == 0 &&
2357			    (vmstor_proto_version >= VMSTOR_PROTOCOL_VERSION_WIN8)) {
2358				inq_data->version = SCSI_REV_SPC3;
2359				if (bootverbose) {
2360					xpt_print(ccb->ccb_h.path,
2361					    "set version from 0 to %d\n",
2362					    inq_data->version);
2363				}
2364			}
2365			/*
2366			 * XXX: Manually fix the wrong response returned from WS2012
2367			 */
2368			if (!is_scsi_valid(inq_data) &&
2369			    (vmstor_proto_version == VMSTOR_PROTOCOL_VERSION_WIN8_1 ||
2370			    vmstor_proto_version == VMSTOR_PROTOCOL_VERSION_WIN8 ||
2371			    vmstor_proto_version == VMSTOR_PROTOCOL_VERSION_WIN7)) {
2372				if (data_len >= 4 &&
2373				    (resp_buf[2] == 0 || resp_buf[3] == 0)) {
2374					resp_buf[2] = SCSI_REV_SPC3;
2375					resp_buf[3] = 2; // resp fmt must be 2
2376					if (bootverbose)
2377						xpt_print(ccb->ccb_h.path,
2378						    "fix version and resp fmt for 0x%x\n",
2379						    vmstor_proto_version);
2380				}
2381			} else if (data_len >= SHORT_INQUIRY_LENGTH) {
2382				char vendor[16];
2383
2384				cam_strvis(vendor, inq_data->vendor,
2385				    sizeof(inq_data->vendor), sizeof(vendor));
2386				/*
2387				 * XXX: Upgrade SPC2 to SPC3 if host is WIN8 or
2388				 * WIN2012 R2 in order to support UNMAP feature.
2389				 */
2390				if (!strncmp(vendor, "Msft", 4) &&
2391				    SID_ANSI_REV(inq_data) == SCSI_REV_SPC2 &&
2392				    (vmstor_proto_version ==
2393				     VMSTOR_PROTOCOL_VERSION_WIN8_1 ||
2394				     vmstor_proto_version ==
2395				     VMSTOR_PROTOCOL_VERSION_WIN8)) {
2396					inq_data->version = SCSI_REV_SPC3;
2397					if (bootverbose) {
2398						xpt_print(ccb->ccb_h.path,
2399						    "storvsc upgrades "
2400						    "SPC2 to SPC3\n");
2401					}
2402				}
2403			}
2404		}
2405	} else {
2406		/**
2407		 * On Some Windows hosts TEST_UNIT_READY command can return
2408		 * SRB_STATUS_ERROR and sense data, for example, asc=0x3a,1
2409		 * "(Medium not present - tray closed)". This error can be
2410		 * ignored since it will be sent to host periodically.
2411		 */
2412		boolean_t unit_not_ready = \
2413		    vm_srb->scsi_status == SCSI_STATUS_CHECK_COND &&
2414		    cmd->opcode == TEST_UNIT_READY &&
2415		    srb_status == SRB_STATUS_ERROR;
2416		if (!unit_not_ready && bootverbose) {
2417			mtx_lock(&sc->hs_lock);
2418			xpt_print(ccb->ccb_h.path,
2419				"storvsc scsi_status = %d, srb_status = %d\n",
2420				vm_srb->scsi_status, srb_status);
2421			mtx_unlock(&sc->hs_lock);
2422		}
2423		ccb->ccb_h.status |= CAM_SCSI_STATUS_ERROR;
2424	}
2425
2426	ccb->csio.scsi_status = (vm_srb->scsi_status & 0xFF);
2427	if (srb_status == SRB_STATUS_SUCCESS ||
2428	    srb_status == SRB_STATUS_DATA_OVERRUN)
2429		ccb->csio.resid = ccb->csio.dxfer_len - vm_srb->transfer_len;
2430	else
2431		ccb->csio.resid = ccb->csio.dxfer_len;
2432
2433	if ((vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID) != 0 &&
2434	    reqp->sense_info_len != 0) {
2435		csio->sense_resid = csio->sense_len - reqp->sense_info_len;
2436		ccb->ccb_h.status |= CAM_AUTOSNS_VALID;
2437	}
2438
2439	mtx_lock(&sc->hs_lock);
2440	if (reqp->softc->hs_frozen == 1) {
2441		xpt_print(ccb->ccb_h.path,
2442			"%u: storvsc unfreezing softc 0x%p.\n",
2443			ticks, reqp->softc);
2444		ccb->ccb_h.status |= CAM_RELEASE_SIMQ;
2445		reqp->softc->hs_frozen = 0;
2446	}
2447	storvsc_free_request(sc, reqp);
2448	mtx_unlock(&sc->hs_lock);
2449
2450	xpt_done_direct(ccb);
2451}
2452
2453/**
2454 * @brief Free a request structure
2455 *
2456 * Free a request structure by returning it to the free list
2457 *
2458 * @param sc pointer to a softc
2459 * @param reqp pointer to a request structure
2460 */
2461static void
2462storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp)
2463{
2464
2465	LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link);
2466}
2467
2468/**
2469 * @brief Determine type of storage device from GUID
2470 *
2471 * Using the type GUID, determine if this is a StorVSC (paravirtual
2472 * SCSI or BlkVSC (paravirtual IDE) device.
2473 *
2474 * @param dev a device
2475 * returns an enum
2476 */
2477static enum hv_storage_type
2478storvsc_get_storage_type(device_t dev)
2479{
2480	device_t parent = device_get_parent(dev);
2481
2482	if (VMBUS_PROBE_GUID(parent, dev, &gBlkVscDeviceType) == 0)
2483		return DRIVER_BLKVSC;
2484	if (VMBUS_PROBE_GUID(parent, dev, &gStorVscDeviceType) == 0)
2485		return DRIVER_STORVSC;
2486	return DRIVER_UNKNOWN;
2487}
2488
2489#define	PCI_VENDOR_INTEL	0x8086
2490#define	PCI_PRODUCT_PIIX4	0x7111
2491
2492static void
2493storvsc_ada_probe_veto(void *arg __unused, struct cam_path *path,
2494    struct ata_params *ident_buf __unused, int *veto)
2495{
2496
2497	/*
2498	 * The ATA disks are shared with the controllers managed
2499	 * by this driver, so veto the ATA disks' attachment; the
2500	 * ATA disks will be attached as SCSI disks once this driver
2501	 * attached.
2502	 */
2503	if (path->device->protocol == PROTO_ATA) {
2504		struct ccb_pathinq cpi;
2505
2506		xpt_path_inq(&cpi, path);
2507		if (cpi.ccb_h.status == CAM_REQ_CMP &&
2508		    cpi.hba_vendor == PCI_VENDOR_INTEL &&
2509		    cpi.hba_device == PCI_PRODUCT_PIIX4) {
2510			(*veto)++;
2511			if (bootverbose) {
2512				xpt_print(path,
2513				    "Disable ATA disks on "
2514				    "simulated ATA controller (0x%04x%04x)\n",
2515				    cpi.hba_device, cpi.hba_vendor);
2516			}
2517		}
2518	}
2519}
2520
2521static void
2522storvsc_sysinit(void *arg __unused)
2523{
2524	if (vm_guest == VM_GUEST_HV) {
2525		storvsc_handler_tag = EVENTHANDLER_REGISTER(ada_probe_veto,
2526		    storvsc_ada_probe_veto, NULL, EVENTHANDLER_PRI_ANY);
2527	}
2528}
2529SYSINIT(storvsc_sys_init, SI_SUB_DRIVERS, SI_ORDER_SECOND, storvsc_sysinit,
2530    NULL);
2531
2532static void
2533storvsc_sysuninit(void *arg __unused)
2534{
2535	if (storvsc_handler_tag != NULL)
2536		EVENTHANDLER_DEREGISTER(ada_probe_veto, storvsc_handler_tag);
2537}
2538SYSUNINIT(storvsc_sys_uninit, SI_SUB_DRIVERS, SI_ORDER_SECOND,
2539    storvsc_sysuninit, NULL);
2540