1/*-
2 * Copyright (c) 2009-2012 Microsoft Corp.
3 * Copyright (c) 2012 NetApp Inc.
4 * Copyright (c) 2012 Citrix Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/**
30 * StorVSC driver for Hyper-V.  This driver presents a SCSI HBA interface
31 * to the Comman Access Method (CAM) layer.  CAM control blocks (CCBs) are
32 * converted into VSCSI protocol messages which are delivered to the parent
33 * partition StorVSP driver over the Hyper-V VMBUS.
34 */
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: releng/10.3/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c 320912 2017-07-12 08:07:55Z delphij $");
37
38#include <sys/param.h>
39#include <sys/proc.h>
40#include <sys/condvar.h>
41#include <sys/time.h>
42#include <sys/systm.h>
43#include <sys/sockio.h>
44#include <sys/mbuf.h>
45#include <sys/malloc.h>
46#include <sys/module.h>
47#include <sys/kernel.h>
48#include <sys/queue.h>
49#include <sys/lock.h>
50#include <sys/sx.h>
51#include <sys/taskqueue.h>
52#include <sys/bus.h>
53#include <sys/mutex.h>
54#include <sys/callout.h>
55#include <vm/vm.h>
56#include <vm/pmap.h>
57#include <vm/uma.h>
58#include <sys/lock.h>
59#include <sys/sema.h>
60#include <sys/sglist.h>
61#include <sys/eventhandler.h>
62#include <machine/bus.h>
63#include <sys/bus_dma.h>
64
65#include <cam/cam.h>
66#include <cam/cam_ccb.h>
67#include <cam/cam_periph.h>
68#include <cam/cam_sim.h>
69#include <cam/cam_xpt_sim.h>
70#include <cam/cam_xpt_internal.h>
71#include <cam/cam_debug.h>
72#include <cam/scsi/scsi_all.h>
73#include <cam/scsi/scsi_message.h>
74
75#include <dev/hyperv/include/hyperv.h>
76#include "hv_vstorage.h"
77
78#define STORVSC_RINGBUFFER_SIZE		(20*PAGE_SIZE)
79#define STORVSC_MAX_LUNS_PER_TARGET	(64)
80#define STORVSC_MAX_IO_REQUESTS		(STORVSC_MAX_LUNS_PER_TARGET * 2)
81#define BLKVSC_MAX_IDE_DISKS_PER_TARGET	(1)
82#define BLKVSC_MAX_IO_REQUESTS		STORVSC_MAX_IO_REQUESTS
83#define STORVSC_MAX_TARGETS		(2)
84
85#define VSTOR_PKT_SIZE	(sizeof(struct vstor_packet) - vmscsi_size_delta)
86
87#define HV_ALIGN(x, a) roundup2(x, a)
88
89struct storvsc_softc;
90
91struct hv_sgl_node {
92	LIST_ENTRY(hv_sgl_node) link;
93	struct sglist *sgl_data;
94};
95
96struct hv_sgl_page_pool{
97	LIST_HEAD(, hv_sgl_node) in_use_sgl_list;
98	LIST_HEAD(, hv_sgl_node) free_sgl_list;
99	boolean_t                is_init;
100} g_hv_sgl_page_pool;
101
102#define STORVSC_MAX_SG_PAGE_CNT STORVSC_MAX_IO_REQUESTS * HV_MAX_MULTIPAGE_BUFFER_COUNT
103
104enum storvsc_request_type {
105	WRITE_TYPE,
106	READ_TYPE,
107	UNKNOWN_TYPE
108};
109
110struct hv_storvsc_request {
111	LIST_ENTRY(hv_storvsc_request) link;
112	struct vstor_packet	vstor_packet;
113	hv_vmbus_multipage_buffer data_buf;
114	void *sense_data;
115	uint8_t sense_info_len;
116	uint8_t retries;
117	union ccb *ccb;
118	struct storvsc_softc *softc;
119	struct callout callout;
120	struct sema synch_sema; /*Synchronize the request/response if needed */
121	struct sglist *bounce_sgl;
122	unsigned int bounce_sgl_count;
123	uint64_t not_aligned_seg_bits;
124};
125
126struct storvsc_softc {
127	struct hv_device		*hs_dev;
128	LIST_HEAD(, hv_storvsc_request)	hs_free_list;
129	struct mtx			hs_lock;
130	struct storvsc_driver_props	*hs_drv_props;
131	int 				hs_unit;
132	uint32_t			hs_frozen;
133	struct cam_sim			*hs_sim;
134	struct cam_path 		*hs_path;
135	uint32_t			hs_num_out_reqs;
136	boolean_t			hs_destroy;
137	boolean_t			hs_drain_notify;
138	boolean_t			hs_open_multi_channel;
139	struct sema 			hs_drain_sema;
140	struct hv_storvsc_request	hs_init_req;
141	struct hv_storvsc_request	hs_reset_req;
142};
143
144
145/**
146 * HyperV storvsc timeout testing cases:
147 * a. IO returned after first timeout;
148 * b. IO returned after second timeout and queue freeze;
149 * c. IO returned while timer handler is running
150 * The first can be tested by "sg_senddiag -vv /dev/daX",
151 * and the second and third can be done by
152 * "sg_wr_mode -v -p 08 -c 0,1a -m 0,ff /dev/daX".
153 */
154#define HVS_TIMEOUT_TEST 0
155
156/*
157 * Bus/adapter reset functionality on the Hyper-V host is
158 * buggy and it will be disabled until
159 * it can be further tested.
160 */
161#define HVS_HOST_RESET 0
162
163struct storvsc_driver_props {
164	char		*drv_name;
165	char		*drv_desc;
166	uint8_t		drv_max_luns_per_target;
167	uint8_t		drv_max_ios_per_target;
168	uint32_t	drv_ringbuffer_size;
169};
170
171enum hv_storage_type {
172	DRIVER_BLKVSC,
173	DRIVER_STORVSC,
174	DRIVER_UNKNOWN
175};
176
177#define HS_MAX_ADAPTERS 10
178
179#define HV_STORAGE_SUPPORTS_MULTI_CHANNEL 0x1
180
181/* {ba6163d9-04a1-4d29-b605-72e2ffb1dc7f} */
182static const hv_guid gStorVscDeviceType={
183	.data = {0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d,
184		 0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f}
185};
186
187/* {32412632-86cb-44a2-9b5c-50d1417354f5} */
188static const hv_guid gBlkVscDeviceType={
189	.data = {0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44,
190		 0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5}
191};
192
193static struct storvsc_driver_props g_drv_props_table[] = {
194	{"blkvsc", "Hyper-V IDE Storage Interface",
195	 BLKVSC_MAX_IDE_DISKS_PER_TARGET, BLKVSC_MAX_IO_REQUESTS,
196	 STORVSC_RINGBUFFER_SIZE},
197	{"storvsc", "Hyper-V SCSI Storage Interface",
198	 STORVSC_MAX_LUNS_PER_TARGET, STORVSC_MAX_IO_REQUESTS,
199	 STORVSC_RINGBUFFER_SIZE}
200};
201
202static eventhandler_tag storvsc_handler_tag;
203/*
204 * Sense buffer size changed in win8; have a run-time
205 * variable to track the size we should use.
206 */
207static int sense_buffer_size = PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE;
208
209/*
210 * The size of the vmscsi_request has changed in win8. The
211 * additional size is for the newly added elements in the
212 * structure. These elements are valid only when we are talking
213 * to a win8 host.
214 * Track the correct size we need to apply.
215 */
216static int vmscsi_size_delta;
217/*
218 * The storage protocol version is determined during the
219 * initial exchange with the host.  It will indicate which
220 * storage functionality is available in the host.
221*/
222static int vmstor_proto_version;
223
224struct vmstor_proto {
225        int proto_version;
226        int sense_buffer_size;
227        int vmscsi_size_delta;
228};
229
230static const struct vmstor_proto vmstor_proto_list[] = {
231        {
232                VMSTOR_PROTOCOL_VERSION_WIN10,
233                POST_WIN7_STORVSC_SENSE_BUFFER_SIZE,
234                0
235        },
236        {
237                VMSTOR_PROTOCOL_VERSION_WIN8_1,
238                POST_WIN7_STORVSC_SENSE_BUFFER_SIZE,
239                0
240        },
241        {
242                VMSTOR_PROTOCOL_VERSION_WIN8,
243                POST_WIN7_STORVSC_SENSE_BUFFER_SIZE,
244                0
245        },
246        {
247                VMSTOR_PROTOCOL_VERSION_WIN7,
248                PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE,
249                sizeof(struct vmscsi_win8_extension),
250        },
251        {
252                VMSTOR_PROTOCOL_VERSION_WIN6,
253                PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE,
254                sizeof(struct vmscsi_win8_extension),
255        }
256};
257
258/* static functions */
259static int storvsc_probe(device_t dev);
260static int storvsc_attach(device_t dev);
261static int storvsc_detach(device_t dev);
262static void storvsc_poll(struct cam_sim * sim);
263static void storvsc_action(struct cam_sim * sim, union ccb * ccb);
264static int create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp);
265static void storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp);
266static enum hv_storage_type storvsc_get_storage_type(device_t dev);
267static void hv_storvsc_rescan_target(struct storvsc_softc *sc);
268static void hv_storvsc_on_channel_callback(void *context);
269static void hv_storvsc_on_iocompletion( struct storvsc_softc *sc,
270					struct vstor_packet *vstor_packet,
271					struct hv_storvsc_request *request);
272static int hv_storvsc_connect_vsp(struct hv_device *device);
273static void storvsc_io_done(struct hv_storvsc_request *reqp);
274static void storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl,
275				bus_dma_segment_t *orig_sgl,
276				unsigned int orig_sgl_count,
277				uint64_t seg_bits);
278void storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl,
279				unsigned int dest_sgl_count,
280				struct sglist* src_sgl,
281				uint64_t seg_bits);
282
283static device_method_t storvsc_methods[] = {
284	/* Device interface */
285	DEVMETHOD(device_probe,		storvsc_probe),
286	DEVMETHOD(device_attach,	storvsc_attach),
287	DEVMETHOD(device_detach,	storvsc_detach),
288	DEVMETHOD(device_shutdown,      bus_generic_shutdown),
289	DEVMETHOD_END
290};
291
292static driver_t storvsc_driver = {
293	"storvsc", storvsc_methods, sizeof(struct storvsc_softc),
294};
295
296static devclass_t storvsc_devclass;
297DRIVER_MODULE(storvsc, vmbus, storvsc_driver, storvsc_devclass, 0, 0);
298MODULE_VERSION(storvsc, 1);
299MODULE_DEPEND(storvsc, vmbus, 1, 1, 1);
300
301
302/**
303 * The host is capable of sending messages to us that are
304 * completely unsolicited. So, we need to address the race
305 * condition where we may be in the process of unloading the
306 * driver when the host may send us an unsolicited message.
307 * We address this issue by implementing a sequentially
308 * consistent protocol:
309 *
310 * 1. Channel callback is invoked while holding the the channel lock
311 *    and an unloading driver will reset the channel callback under
312 *    the protection of this channel lock.
313 *
314 * 2. To ensure bounded wait time for unloading a driver, we don't
315 *    permit outgoing traffic once the device is marked as being
316 *    destroyed.
317 *
318 * 3. Once the device is marked as being destroyed, we only
319 *    permit incoming traffic to properly account for
320 *    packets already sent out.
321 */
322static inline struct storvsc_softc *
323get_stor_device(struct hv_device *device,
324				boolean_t outbound)
325{
326	struct storvsc_softc *sc;
327
328	sc = device_get_softc(device->device);
329	if (sc == NULL) {
330		return NULL;
331	}
332
333	if (outbound) {
334		/*
335		 * Here we permit outgoing I/O only
336		 * if the device is not being destroyed.
337		 */
338
339		if (sc->hs_destroy) {
340			sc = NULL;
341		}
342	} else {
343		/*
344		 * inbound case; if being destroyed
345		 * only permit to account for
346		 * messages already sent out.
347		 */
348		if (sc->hs_destroy && (sc->hs_num_out_reqs == 0)) {
349			sc = NULL;
350		}
351	}
352	return sc;
353}
354
355/**
356 * @brief Callback handler, will be invoked when receive mutil-channel offer
357 *
358 * @param context  new multi-channel
359 */
360static void
361storvsc_handle_sc_creation(void *context)
362{
363	hv_vmbus_channel *new_channel;
364	struct hv_device *device;
365	struct storvsc_softc *sc;
366	struct vmstor_chan_props props;
367	int ret = 0;
368
369	new_channel = (hv_vmbus_channel *)context;
370	device = new_channel->primary_channel->device;
371	sc = get_stor_device(device, TRUE);
372	if (sc == NULL)
373		return;
374
375	if (FALSE == sc->hs_open_multi_channel)
376		return;
377
378	memset(&props, 0, sizeof(props));
379
380	ret = hv_vmbus_channel_open(new_channel,
381	    sc->hs_drv_props->drv_ringbuffer_size,
382  	    sc->hs_drv_props->drv_ringbuffer_size,
383	    (void *)&props,
384	    sizeof(struct vmstor_chan_props),
385	    hv_storvsc_on_channel_callback,
386	    new_channel);
387
388	return;
389}
390
391/**
392 * @brief Send multi-channel creation request to host
393 *
394 * @param device  a Hyper-V device pointer
395 * @param max_chans  the max channels supported by vmbus
396 */
397static void
398storvsc_send_multichannel_request(struct hv_device *dev, int max_chans)
399{
400	struct storvsc_softc *sc;
401	struct hv_storvsc_request *request;
402	struct vstor_packet *vstor_packet;
403	int request_channels_cnt = 0;
404	int ret;
405
406	/* get multichannels count that need to create */
407	request_channels_cnt = MIN(max_chans, mp_ncpus);
408
409	sc = get_stor_device(dev, TRUE);
410	if (sc == NULL) {
411		printf("Storvsc_error: get sc failed while send mutilchannel "
412		    "request\n");
413		return;
414	}
415
416	request = &sc->hs_init_req;
417
418	/* Establish a handler for multi-channel */
419	dev->channel->sc_creation_callback = storvsc_handle_sc_creation;
420
421	/* request the host to create multi-channel */
422	memset(request, 0, sizeof(struct hv_storvsc_request));
423
424	sema_init(&request->synch_sema, 0, ("stor_synch_sema"));
425
426	vstor_packet = &request->vstor_packet;
427
428	vstor_packet->operation = VSTOR_OPERATION_CREATE_MULTI_CHANNELS;
429	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
430	vstor_packet->u.multi_channels_cnt = request_channels_cnt;
431
432	ret = hv_vmbus_channel_send_packet(
433	    dev->channel,
434	    vstor_packet,
435	    VSTOR_PKT_SIZE,
436	    (uint64_t)(uintptr_t)request,
437	    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
438	    HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
439
440	/* wait for 5 seconds */
441	ret = sema_timedwait(&request->synch_sema, 5 * hz);
442	if (ret != 0) {
443		printf("Storvsc_error: create multi-channel timeout, %d\n",
444		    ret);
445		return;
446	}
447
448	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
449	    vstor_packet->status != 0) {
450		printf("Storvsc_error: create multi-channel invalid operation "
451		    "(%d) or statue (%u)\n",
452		    vstor_packet->operation, vstor_packet->status);
453		return;
454	}
455
456	sc->hs_open_multi_channel = TRUE;
457
458	if (bootverbose)
459		printf("Storvsc create multi-channel success!\n");
460}
461
462/**
463 * @brief initialize channel connection to parent partition
464 *
465 * @param dev  a Hyper-V device pointer
466 * @returns  0 on success, non-zero error on failure
467 */
468static int
469hv_storvsc_channel_init(struct hv_device *dev)
470{
471	int ret = 0, i;
472	struct hv_storvsc_request *request;
473	struct vstor_packet *vstor_packet;
474	struct storvsc_softc *sc;
475	uint16_t max_chans = 0;
476	boolean_t support_multichannel = FALSE;
477
478	max_chans = 0;
479	support_multichannel = FALSE;
480
481	sc = get_stor_device(dev, TRUE);
482	if (sc == NULL)
483		return (ENODEV);
484
485	request = &sc->hs_init_req;
486	memset(request, 0, sizeof(struct hv_storvsc_request));
487	vstor_packet = &request->vstor_packet;
488	request->softc = sc;
489
490	/**
491	 * Initiate the vsc/vsp initialization protocol on the open channel
492	 */
493	sema_init(&request->synch_sema, 0, ("stor_synch_sema"));
494
495	vstor_packet->operation = VSTOR_OPERATION_BEGININITIALIZATION;
496	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
497
498
499	ret = hv_vmbus_channel_send_packet(
500			dev->channel,
501			vstor_packet,
502			VSTOR_PKT_SIZE,
503			(uint64_t)(uintptr_t)request,
504			HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
505			HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
506
507	if (ret != 0)
508		goto cleanup;
509
510	/* wait 5 seconds */
511	ret = sema_timedwait(&request->synch_sema, 5 * hz);
512	if (ret != 0)
513		goto cleanup;
514
515	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
516		vstor_packet->status != 0) {
517		goto cleanup;
518	}
519
520	for (i = 0; i < nitems(vmstor_proto_list); i++) {
521		/* reuse the packet for version range supported */
522
523		memset(vstor_packet, 0, sizeof(struct vstor_packet));
524		vstor_packet->operation = VSTOR_OPERATION_QUERYPROTOCOLVERSION;
525		vstor_packet->flags = REQUEST_COMPLETION_FLAG;
526
527		vstor_packet->u.version.major_minor =
528			vmstor_proto_list[i].proto_version;
529
530		/* revision is only significant for Windows guests */
531		vstor_packet->u.version.revision = 0;
532
533		ret = hv_vmbus_channel_send_packet(
534			dev->channel,
535			vstor_packet,
536			VSTOR_PKT_SIZE,
537			(uint64_t)(uintptr_t)request,
538			HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
539			HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
540
541		if (ret != 0)
542			goto cleanup;
543
544		/* wait 5 seconds */
545		ret = sema_timedwait(&request->synch_sema, 5 * hz);
546
547		if (ret)
548			goto cleanup;
549
550		if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO) {
551			ret = EINVAL;
552			goto cleanup;
553		}
554		if (vstor_packet->status == 0) {
555			vmstor_proto_version =
556				vmstor_proto_list[i].proto_version;
557			sense_buffer_size =
558				vmstor_proto_list[i].sense_buffer_size;
559			vmscsi_size_delta =
560				vmstor_proto_list[i].vmscsi_size_delta;
561			break;
562		}
563	}
564
565	if (vstor_packet->status != 0) {
566		ret = EINVAL;
567		goto cleanup;
568	}
569	/**
570	 * Query channel properties
571	 */
572	memset(vstor_packet, 0, sizeof(struct vstor_packet));
573	vstor_packet->operation = VSTOR_OPERATION_QUERYPROPERTIES;
574	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
575
576	ret = hv_vmbus_channel_send_packet(
577				dev->channel,
578				vstor_packet,
579				VSTOR_PKT_SIZE,
580				(uint64_t)(uintptr_t)request,
581				HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
582				HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
583
584	if ( ret != 0)
585		goto cleanup;
586
587	/* wait 5 seconds */
588	ret = sema_timedwait(&request->synch_sema, 5 * hz);
589
590	if (ret != 0)
591		goto cleanup;
592
593	/* TODO: Check returned version */
594	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
595	    vstor_packet->status != 0) {
596		goto cleanup;
597	}
598
599	/* multi-channels feature is supported by WIN8 and above version */
600	max_chans = vstor_packet->u.chan_props.max_channel_cnt;
601	if ((hv_vmbus_protocal_version != HV_VMBUS_VERSION_WIN7) &&
602	    (hv_vmbus_protocal_version != HV_VMBUS_VERSION_WS2008) &&
603	    (vstor_packet->u.chan_props.flags &
604	     HV_STORAGE_SUPPORTS_MULTI_CHANNEL)) {
605		support_multichannel = TRUE;
606	}
607
608	memset(vstor_packet, 0, sizeof(struct vstor_packet));
609	vstor_packet->operation = VSTOR_OPERATION_ENDINITIALIZATION;
610	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
611
612	ret = hv_vmbus_channel_send_packet(
613			dev->channel,
614			vstor_packet,
615			VSTOR_PKT_SIZE,
616			(uint64_t)(uintptr_t)request,
617			HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
618			HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
619
620	if (ret != 0) {
621		goto cleanup;
622	}
623
624	/* wait 5 seconds */
625	ret = sema_timedwait(&request->synch_sema, 5 * hz);
626
627	if (ret != 0)
628		goto cleanup;
629
630	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
631	    vstor_packet->status != 0)
632		goto cleanup;
633
634	/*
635	 * If multi-channel is supported, send multichannel create
636	 * request to host.
637	 */
638	if (support_multichannel)
639		storvsc_send_multichannel_request(dev, max_chans);
640
641cleanup:
642	sema_destroy(&request->synch_sema);
643	return (ret);
644}
645
646/**
647 * @brief Open channel connection to paraent partition StorVSP driver
648 *
649 * Open and initialize channel connection to parent partition StorVSP driver.
650 *
651 * @param pointer to a Hyper-V device
652 * @returns 0 on success, non-zero error on failure
653 */
654static int
655hv_storvsc_connect_vsp(struct hv_device *dev)
656{
657	int ret = 0;
658	struct vmstor_chan_props props;
659	struct storvsc_softc *sc;
660
661	sc = device_get_softc(dev->device);
662
663	memset(&props, 0, sizeof(struct vmstor_chan_props));
664
665	/*
666	 * Open the channel
667	 */
668
669	ret = hv_vmbus_channel_open(
670		dev->channel,
671		sc->hs_drv_props->drv_ringbuffer_size,
672		sc->hs_drv_props->drv_ringbuffer_size,
673		(void *)&props,
674		sizeof(struct vmstor_chan_props),
675		hv_storvsc_on_channel_callback,
676		dev->channel);
677
678	if (ret != 0) {
679		return ret;
680	}
681
682	ret = hv_storvsc_channel_init(dev);
683
684	return (ret);
685}
686
687#if HVS_HOST_RESET
688static int
689hv_storvsc_host_reset(struct hv_device *dev)
690{
691	int ret = 0;
692	struct storvsc_softc *sc;
693
694	struct hv_storvsc_request *request;
695	struct vstor_packet *vstor_packet;
696
697	sc = get_stor_device(dev, TRUE);
698	if (sc == NULL) {
699		return ENODEV;
700	}
701
702	request = &sc->hs_reset_req;
703	request->softc = sc;
704	vstor_packet = &request->vstor_packet;
705
706	sema_init(&request->synch_sema, 0, "stor synch sema");
707
708	vstor_packet->operation = VSTOR_OPERATION_RESETBUS;
709	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
710
711	ret = hv_vmbus_channel_send_packet(dev->channel,
712			vstor_packet,
713			VSTOR_PKT_SIZE,
714			(uint64_t)(uintptr_t)&sc->hs_reset_req,
715			HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
716			HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
717
718	if (ret != 0) {
719		goto cleanup;
720	}
721
722	ret = sema_timedwait(&request->synch_sema, 5 * hz); /* KYS 5 seconds */
723
724	if (ret) {
725		goto cleanup;
726	}
727
728
729	/*
730	 * At this point, all outstanding requests in the adapter
731	 * should have been flushed out and return to us
732	 */
733
734cleanup:
735	sema_destroy(&request->synch_sema);
736	return (ret);
737}
738#endif /* HVS_HOST_RESET */
739
740/**
741 * @brief Function to initiate an I/O request
742 *
743 * @param device Hyper-V device pointer
744 * @param request pointer to a request structure
745 * @returns 0 on success, non-zero error on failure
746 */
747static int
748hv_storvsc_io_request(struct hv_device *device,
749					  struct hv_storvsc_request *request)
750{
751	struct storvsc_softc *sc;
752	struct vstor_packet *vstor_packet = &request->vstor_packet;
753	struct hv_vmbus_channel* outgoing_channel = NULL;
754	int ret = 0;
755
756	sc = get_stor_device(device, TRUE);
757
758	if (sc == NULL) {
759		return ENODEV;
760	}
761
762	vstor_packet->flags |= REQUEST_COMPLETION_FLAG;
763
764	vstor_packet->u.vm_srb.length = VSTOR_PKT_SIZE;
765
766	vstor_packet->u.vm_srb.sense_info_len = sense_buffer_size;
767
768	vstor_packet->u.vm_srb.transfer_len = request->data_buf.length;
769
770	vstor_packet->operation = VSTOR_OPERATION_EXECUTESRB;
771
772	outgoing_channel = vmbus_select_outgoing_channel(device->channel);
773
774	mtx_unlock(&request->softc->hs_lock);
775	if (request->data_buf.length) {
776		ret = hv_vmbus_channel_send_packet_multipagebuffer(
777				outgoing_channel,
778				&request->data_buf,
779				vstor_packet,
780				VSTOR_PKT_SIZE,
781				(uint64_t)(uintptr_t)request);
782
783	} else {
784		ret = hv_vmbus_channel_send_packet(
785			outgoing_channel,
786			vstor_packet,
787			VSTOR_PKT_SIZE,
788			(uint64_t)(uintptr_t)request,
789			HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
790			HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
791	}
792	mtx_lock(&request->softc->hs_lock);
793
794	if (ret != 0) {
795		printf("Unable to send packet %p ret %d", vstor_packet, ret);
796	} else {
797		atomic_add_int(&sc->hs_num_out_reqs, 1);
798	}
799
800	return (ret);
801}
802
803
804/**
805 * Process IO_COMPLETION_OPERATION and ready
806 * the result to be completed for upper layer
807 * processing by the CAM layer.
808 */
809static void
810hv_storvsc_on_iocompletion(struct storvsc_softc *sc,
811			   struct vstor_packet *vstor_packet,
812			   struct hv_storvsc_request *request)
813{
814	struct vmscsi_req *vm_srb;
815
816	vm_srb = &vstor_packet->u.vm_srb;
817
818	/*
819	 * Copy some fields of the host's response into the request structure,
820	 * because the fields will be used later in storvsc_io_done().
821	 */
822	request->vstor_packet.u.vm_srb.scsi_status = vm_srb->scsi_status;
823	request->vstor_packet.u.vm_srb.srb_status = vm_srb->srb_status;
824	request->vstor_packet.u.vm_srb.transfer_len = vm_srb->transfer_len;
825
826	if (((vm_srb->scsi_status & 0xFF) == SCSI_STATUS_CHECK_COND) &&
827			(vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID)) {
828		/* Autosense data available */
829
830		KASSERT(vm_srb->sense_info_len <= request->sense_info_len,
831				("vm_srb->sense_info_len <= "
832				 "request->sense_info_len"));
833
834		memcpy(request->sense_data, vm_srb->u.sense_data,
835			vm_srb->sense_info_len);
836
837		request->sense_info_len = vm_srb->sense_info_len;
838	}
839
840	/* Complete request by passing to the CAM layer */
841	storvsc_io_done(request);
842	atomic_subtract_int(&sc->hs_num_out_reqs, 1);
843	if (sc->hs_drain_notify && (sc->hs_num_out_reqs == 0)) {
844		sema_post(&sc->hs_drain_sema);
845	}
846}
847
848static void
849hv_storvsc_rescan_target(struct storvsc_softc *sc)
850{
851	path_id_t pathid;
852	target_id_t targetid;
853	union ccb *ccb;
854
855	pathid = cam_sim_path(sc->hs_sim);
856	targetid = CAM_TARGET_WILDCARD;
857
858	/*
859	 * Allocate a CCB and schedule a rescan.
860	 */
861	ccb = xpt_alloc_ccb_nowait();
862	if (ccb == NULL) {
863		printf("unable to alloc CCB for rescan\n");
864		return;
865	}
866
867	if (xpt_create_path(&ccb->ccb_h.path, NULL, pathid, targetid,
868	    CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
869		printf("unable to create path for rescan, pathid: %d,"
870		    "targetid: %d\n", pathid, targetid);
871		xpt_free_ccb(ccb);
872		return;
873	}
874
875	if (targetid == CAM_TARGET_WILDCARD)
876		ccb->ccb_h.func_code = XPT_SCAN_BUS;
877	else
878		ccb->ccb_h.func_code = XPT_SCAN_TGT;
879
880	xpt_rescan(ccb);
881}
882
883static void
884hv_storvsc_on_channel_callback(void *context)
885{
886	int ret = 0;
887	hv_vmbus_channel *channel = (hv_vmbus_channel *)context;
888	struct hv_device *device = NULL;
889	struct storvsc_softc *sc;
890	uint32_t bytes_recvd;
891	uint64_t request_id;
892	uint8_t packet[roundup2(sizeof(struct vstor_packet), 8)];
893	struct hv_storvsc_request *request;
894	struct vstor_packet *vstor_packet;
895
896	if (channel->primary_channel != NULL){
897		device = channel->primary_channel->device;
898	} else {
899		device = channel->device;
900	}
901
902	KASSERT(device, ("device is NULL"));
903
904	sc = get_stor_device(device, FALSE);
905	if (sc == NULL) {
906		printf("Storvsc_error: get stor device failed.\n");
907		return;
908	}
909
910	ret = hv_vmbus_channel_recv_packet(
911			channel,
912			packet,
913			roundup2(VSTOR_PKT_SIZE, 8),
914			&bytes_recvd,
915			&request_id);
916
917	while ((ret == 0) && (bytes_recvd > 0)) {
918		request = (struct hv_storvsc_request *)(uintptr_t)request_id;
919
920		if ((request == &sc->hs_init_req) ||
921			(request == &sc->hs_reset_req)) {
922			memcpy(&request->vstor_packet, packet,
923				   sizeof(struct vstor_packet));
924			sema_post(&request->synch_sema);
925		} else {
926			vstor_packet = (struct vstor_packet *)packet;
927			switch(vstor_packet->operation) {
928			case VSTOR_OPERATION_COMPLETEIO:
929				if (request == NULL)
930					panic("VMBUS: storvsc received a "
931					    "packet with NULL request id in "
932					    "COMPLETEIO operation.");
933
934				hv_storvsc_on_iocompletion(sc,
935							vstor_packet, request);
936				break;
937			case VSTOR_OPERATION_REMOVEDEVICE:
938				printf("VMBUS: storvsc operation %d not "
939				    "implemented.\n", vstor_packet->operation);
940				/* TODO: implement */
941				break;
942			case VSTOR_OPERATION_ENUMERATE_BUS:
943				hv_storvsc_rescan_target(sc);
944				break;
945			default:
946				break;
947			}
948		}
949		ret = hv_vmbus_channel_recv_packet(
950				channel,
951				packet,
952				roundup2(VSTOR_PKT_SIZE, 8),
953				&bytes_recvd,
954				&request_id);
955	}
956}
957
958/**
959 * @brief StorVSC probe function
960 *
961 * Device probe function.  Returns 0 if the input device is a StorVSC
962 * device.  Otherwise, a ENXIO is returned.  If the input device is
963 * for BlkVSC (paravirtual IDE) device and this support is disabled in
964 * favor of the emulated ATA/IDE device, return ENXIO.
965 *
966 * @param a device
967 * @returns 0 on success, ENXIO if not a matcing StorVSC device
968 */
969static int
970storvsc_probe(device_t dev)
971{
972	int ret	= ENXIO;
973
974	switch (storvsc_get_storage_type(dev)) {
975	case DRIVER_BLKVSC:
976		if(bootverbose)
977			device_printf(dev, "Enlightened ATA/IDE detected\n");
978		ret = BUS_PROBE_DEFAULT;
979		break;
980	case DRIVER_STORVSC:
981		if(bootverbose)
982			device_printf(dev, "Enlightened SCSI device detected\n");
983		ret = BUS_PROBE_DEFAULT;
984		break;
985	default:
986		ret = ENXIO;
987	}
988	return (ret);
989}
990
991/**
992 * @brief StorVSC attach function
993 *
994 * Function responsible for allocating per-device structures,
995 * setting up CAM interfaces and scanning for available LUNs to
996 * be used for SCSI device peripherals.
997 *
998 * @param a device
999 * @returns 0 on success or an error on failure
1000 */
1001static int
1002storvsc_attach(device_t dev)
1003{
1004	struct hv_device *hv_dev = vmbus_get_devctx(dev);
1005	enum hv_storage_type stor_type;
1006	struct storvsc_softc *sc;
1007	struct cam_devq *devq;
1008	int ret, i, j;
1009	struct hv_storvsc_request *reqp;
1010	struct root_hold_token *root_mount_token = NULL;
1011	struct hv_sgl_node *sgl_node = NULL;
1012	void *tmp_buff = NULL;
1013
1014	/*
1015	 * We need to serialize storvsc attach calls.
1016	 */
1017	root_mount_token = root_mount_hold("storvsc");
1018
1019	sc = device_get_softc(dev);
1020	if (sc == NULL) {
1021		ret = ENOMEM;
1022		goto cleanup;
1023	}
1024
1025	stor_type = storvsc_get_storage_type(dev);
1026
1027	if (stor_type == DRIVER_UNKNOWN) {
1028		ret = ENODEV;
1029		goto cleanup;
1030	}
1031
1032	bzero(sc, sizeof(struct storvsc_softc));
1033
1034	/* fill in driver specific properties */
1035	sc->hs_drv_props = &g_drv_props_table[stor_type];
1036
1037	/* fill in device specific properties */
1038	sc->hs_unit	= device_get_unit(dev);
1039	sc->hs_dev	= hv_dev;
1040	device_set_desc(dev, g_drv_props_table[stor_type].drv_desc);
1041
1042	LIST_INIT(&sc->hs_free_list);
1043	mtx_init(&sc->hs_lock, "hvslck", NULL, MTX_DEF);
1044
1045	for (i = 0; i < sc->hs_drv_props->drv_max_ios_per_target; ++i) {
1046		reqp = malloc(sizeof(struct hv_storvsc_request),
1047				 M_DEVBUF, M_WAITOK|M_ZERO);
1048		reqp->softc = sc;
1049
1050		LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link);
1051	}
1052
1053	/* create sg-list page pool */
1054	if (FALSE == g_hv_sgl_page_pool.is_init) {
1055		g_hv_sgl_page_pool.is_init = TRUE;
1056		LIST_INIT(&g_hv_sgl_page_pool.in_use_sgl_list);
1057		LIST_INIT(&g_hv_sgl_page_pool.free_sgl_list);
1058
1059		/*
1060		 * Pre-create SG list, each SG list with
1061		 * HV_MAX_MULTIPAGE_BUFFER_COUNT segments, each
1062		 * segment has one page buffer
1063		 */
1064		for (i = 0; i < STORVSC_MAX_IO_REQUESTS; i++) {
1065	        	sgl_node = malloc(sizeof(struct hv_sgl_node),
1066			    M_DEVBUF, M_WAITOK|M_ZERO);
1067
1068			sgl_node->sgl_data =
1069			    sglist_alloc(HV_MAX_MULTIPAGE_BUFFER_COUNT,
1070			    M_WAITOK|M_ZERO);
1071
1072			for (j = 0; j < HV_MAX_MULTIPAGE_BUFFER_COUNT; j++) {
1073				tmp_buff = malloc(PAGE_SIZE,
1074				    M_DEVBUF, M_WAITOK|M_ZERO);
1075
1076				sgl_node->sgl_data->sg_segs[j].ss_paddr =
1077				    (vm_paddr_t)tmp_buff;
1078			}
1079
1080			LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list,
1081			    sgl_node, link);
1082		}
1083	}
1084
1085	sc->hs_destroy = FALSE;
1086	sc->hs_drain_notify = FALSE;
1087	sc->hs_open_multi_channel = FALSE;
1088	sema_init(&sc->hs_drain_sema, 0, "Store Drain Sema");
1089
1090	ret = hv_storvsc_connect_vsp(hv_dev);
1091	if (ret != 0) {
1092		goto cleanup;
1093	}
1094
1095	/*
1096	 * Create the device queue.
1097	 * Hyper-V maps each target to one SCSI HBA
1098	 */
1099	devq = cam_simq_alloc(sc->hs_drv_props->drv_max_ios_per_target);
1100	if (devq == NULL) {
1101		device_printf(dev, "Failed to alloc device queue\n");
1102		ret = ENOMEM;
1103		goto cleanup;
1104	}
1105
1106	sc->hs_sim = cam_sim_alloc(storvsc_action,
1107				storvsc_poll,
1108				sc->hs_drv_props->drv_name,
1109				sc,
1110				sc->hs_unit,
1111				&sc->hs_lock, 1,
1112				sc->hs_drv_props->drv_max_ios_per_target,
1113				devq);
1114
1115	if (sc->hs_sim == NULL) {
1116		device_printf(dev, "Failed to alloc sim\n");
1117		cam_simq_free(devq);
1118		ret = ENOMEM;
1119		goto cleanup;
1120	}
1121
1122	mtx_lock(&sc->hs_lock);
1123	/* bus_id is set to 0, need to get it from VMBUS channel query? */
1124	if (xpt_bus_register(sc->hs_sim, dev, 0) != CAM_SUCCESS) {
1125		cam_sim_free(sc->hs_sim, /*free_devq*/TRUE);
1126		mtx_unlock(&sc->hs_lock);
1127		device_printf(dev, "Unable to register SCSI bus\n");
1128		ret = ENXIO;
1129		goto cleanup;
1130	}
1131
1132	if (xpt_create_path(&sc->hs_path, /*periph*/NULL,
1133		 cam_sim_path(sc->hs_sim),
1134		CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
1135		xpt_bus_deregister(cam_sim_path(sc->hs_sim));
1136		cam_sim_free(sc->hs_sim, /*free_devq*/TRUE);
1137		mtx_unlock(&sc->hs_lock);
1138		device_printf(dev, "Unable to create path\n");
1139		ret = ENXIO;
1140		goto cleanup;
1141	}
1142
1143	mtx_unlock(&sc->hs_lock);
1144
1145	root_mount_rel(root_mount_token);
1146	return (0);
1147
1148
1149cleanup:
1150	root_mount_rel(root_mount_token);
1151	while (!LIST_EMPTY(&sc->hs_free_list)) {
1152		reqp = LIST_FIRST(&sc->hs_free_list);
1153		LIST_REMOVE(reqp, link);
1154		free(reqp, M_DEVBUF);
1155	}
1156
1157	while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
1158		sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
1159		LIST_REMOVE(sgl_node, link);
1160		for (j = 0; j < HV_MAX_MULTIPAGE_BUFFER_COUNT; j++) {
1161			if (NULL !=
1162			    (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) {
1163				free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF);
1164			}
1165		}
1166		sglist_free(sgl_node->sgl_data);
1167		free(sgl_node, M_DEVBUF);
1168	}
1169
1170	return (ret);
1171}
1172
1173/**
1174 * @brief StorVSC device detach function
1175 *
1176 * This function is responsible for safely detaching a
1177 * StorVSC device.  This includes waiting for inbound responses
1178 * to complete and freeing associated per-device structures.
1179 *
1180 * @param dev a device
1181 * returns 0 on success
1182 */
1183static int
1184storvsc_detach(device_t dev)
1185{
1186	struct storvsc_softc *sc = device_get_softc(dev);
1187	struct hv_storvsc_request *reqp = NULL;
1188	struct hv_device *hv_device = vmbus_get_devctx(dev);
1189	struct hv_sgl_node *sgl_node = NULL;
1190	int j = 0;
1191
1192	mtx_lock(&hv_device->channel->inbound_lock);
1193	sc->hs_destroy = TRUE;
1194	mtx_unlock(&hv_device->channel->inbound_lock);
1195
1196	/*
1197	 * At this point, all outbound traffic should be disabled. We
1198	 * only allow inbound traffic (responses) to proceed so that
1199	 * outstanding requests can be completed.
1200	 */
1201
1202	sc->hs_drain_notify = TRUE;
1203	sema_wait(&sc->hs_drain_sema);
1204	sc->hs_drain_notify = FALSE;
1205
1206	/*
1207	 * Since we have already drained, we don't need to busy wait.
1208	 * The call to close the channel will reset the callback
1209	 * under the protection of the incoming channel lock.
1210	 */
1211
1212	hv_vmbus_channel_close(hv_device->channel);
1213
1214	mtx_lock(&sc->hs_lock);
1215	while (!LIST_EMPTY(&sc->hs_free_list)) {
1216		reqp = LIST_FIRST(&sc->hs_free_list);
1217		LIST_REMOVE(reqp, link);
1218
1219		free(reqp, M_DEVBUF);
1220	}
1221	mtx_unlock(&sc->hs_lock);
1222
1223	while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
1224		sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
1225		LIST_REMOVE(sgl_node, link);
1226		for (j = 0; j < HV_MAX_MULTIPAGE_BUFFER_COUNT; j++){
1227			if (NULL !=
1228			    (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) {
1229				free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF);
1230			}
1231		}
1232		sglist_free(sgl_node->sgl_data);
1233		free(sgl_node, M_DEVBUF);
1234	}
1235
1236	return (0);
1237}
1238
1239#if HVS_TIMEOUT_TEST
1240/**
1241 * @brief unit test for timed out operations
1242 *
1243 * This function provides unit testing capability to simulate
1244 * timed out operations.  Recompilation with HV_TIMEOUT_TEST=1
1245 * is required.
1246 *
1247 * @param reqp pointer to a request structure
1248 * @param opcode SCSI operation being performed
1249 * @param wait if 1, wait for I/O to complete
1250 */
1251static void
1252storvsc_timeout_test(struct hv_storvsc_request *reqp,
1253		uint8_t opcode, int wait)
1254{
1255	int ret;
1256	union ccb *ccb = reqp->ccb;
1257	struct storvsc_softc *sc = reqp->softc;
1258
1259	if (reqp->vstor_packet.vm_srb.cdb[0] != opcode) {
1260		return;
1261	}
1262
1263	if (wait) {
1264		mtx_lock(&reqp->event.mtx);
1265	}
1266	ret = hv_storvsc_io_request(sc->hs_dev, reqp);
1267	if (ret != 0) {
1268		if (wait) {
1269			mtx_unlock(&reqp->event.mtx);
1270		}
1271		printf("%s: io_request failed with %d.\n",
1272				__func__, ret);
1273		ccb->ccb_h.status = CAM_PROVIDE_FAIL;
1274		mtx_lock(&sc->hs_lock);
1275		storvsc_free_request(sc, reqp);
1276		xpt_done(ccb);
1277		mtx_unlock(&sc->hs_lock);
1278		return;
1279	}
1280
1281	if (wait) {
1282		xpt_print(ccb->ccb_h.path,
1283				"%u: %s: waiting for IO return.\n",
1284				ticks, __func__);
1285		ret = cv_timedwait(&reqp->event.cv, &reqp->event.mtx, 60*hz);
1286		mtx_unlock(&reqp->event.mtx);
1287		xpt_print(ccb->ccb_h.path, "%u: %s: %s.\n",
1288				ticks, __func__, (ret == 0)?
1289				"IO return detected" :
1290				"IO return not detected");
1291		/*
1292		 * Now both the timer handler and io done are running
1293		 * simultaneously. We want to confirm the io done always
1294		 * finishes after the timer handler exits. So reqp used by
1295		 * timer handler is not freed or stale. Do busy loop for
1296		 * another 1/10 second to make sure io done does
1297		 * wait for the timer handler to complete.
1298		 */
1299		DELAY(100*1000);
1300		mtx_lock(&sc->hs_lock);
1301		xpt_print(ccb->ccb_h.path,
1302				"%u: %s: finishing, queue frozen %d, "
1303				"ccb status 0x%x scsi_status 0x%x.\n",
1304				ticks, __func__, sc->hs_frozen,
1305				ccb->ccb_h.status,
1306				ccb->csio.scsi_status);
1307		mtx_unlock(&sc->hs_lock);
1308	}
1309}
1310#endif /* HVS_TIMEOUT_TEST */
1311
1312#ifdef notyet
1313/**
1314 * @brief timeout handler for requests
1315 *
1316 * This function is called as a result of a callout expiring.
1317 *
1318 * @param arg pointer to a request
1319 */
1320static void
1321storvsc_timeout(void *arg)
1322{
1323	struct hv_storvsc_request *reqp = arg;
1324	struct storvsc_softc *sc = reqp->softc;
1325	union ccb *ccb = reqp->ccb;
1326
1327	if (reqp->retries == 0) {
1328		mtx_lock(&sc->hs_lock);
1329		xpt_print(ccb->ccb_h.path,
1330		    "%u: IO timed out (req=0x%p), wait for another %u secs.\n",
1331		    ticks, reqp, ccb->ccb_h.timeout / 1000);
1332		cam_error_print(ccb, CAM_ESF_ALL, CAM_EPF_ALL);
1333		mtx_unlock(&sc->hs_lock);
1334
1335		reqp->retries++;
1336		callout_reset_sbt(&reqp->callout, SBT_1MS * ccb->ccb_h.timeout,
1337		    0, storvsc_timeout, reqp, 0);
1338#if HVS_TIMEOUT_TEST
1339		storvsc_timeout_test(reqp, SEND_DIAGNOSTIC, 0);
1340#endif
1341		return;
1342	}
1343
1344	mtx_lock(&sc->hs_lock);
1345	xpt_print(ccb->ccb_h.path,
1346		"%u: IO (reqp = 0x%p) did not return for %u seconds, %s.\n",
1347		ticks, reqp, ccb->ccb_h.timeout * (reqp->retries+1) / 1000,
1348		(sc->hs_frozen == 0)?
1349		"freezing the queue" : "the queue is already frozen");
1350	if (sc->hs_frozen == 0) {
1351		sc->hs_frozen = 1;
1352		xpt_freeze_simq(xpt_path_sim(ccb->ccb_h.path), 1);
1353	}
1354	mtx_unlock(&sc->hs_lock);
1355
1356#if HVS_TIMEOUT_TEST
1357	storvsc_timeout_test(reqp, MODE_SELECT_10, 1);
1358#endif
1359}
1360#endif
1361
1362/**
1363 * @brief StorVSC device poll function
1364 *
1365 * This function is responsible for servicing requests when
1366 * interrupts are disabled (i.e when we are dumping core.)
1367 *
1368 * @param sim a pointer to a CAM SCSI interface module
1369 */
1370static void
1371storvsc_poll(struct cam_sim *sim)
1372{
1373	struct storvsc_softc *sc = cam_sim_softc(sim);
1374
1375	mtx_assert(&sc->hs_lock, MA_OWNED);
1376	mtx_unlock(&sc->hs_lock);
1377	hv_storvsc_on_channel_callback(sc->hs_dev->channel);
1378	mtx_lock(&sc->hs_lock);
1379}
1380
1381/**
1382 * @brief StorVSC device action function
1383 *
1384 * This function is responsible for handling SCSI operations which
1385 * are passed from the CAM layer.  The requests are in the form of
1386 * CAM control blocks which indicate the action being performed.
1387 * Not all actions require converting the request to a VSCSI protocol
1388 * message - these actions can be responded to by this driver.
1389 * Requests which are destined for a backend storage device are converted
1390 * to a VSCSI protocol message and sent on the channel connection associated
1391 * with this device.
1392 *
1393 * @param sim pointer to a CAM SCSI interface module
1394 * @param ccb pointer to a CAM control block
1395 */
1396static void
1397storvsc_action(struct cam_sim *sim, union ccb *ccb)
1398{
1399	struct storvsc_softc *sc = cam_sim_softc(sim);
1400	int res;
1401
1402	mtx_assert(&sc->hs_lock, MA_OWNED);
1403	switch (ccb->ccb_h.func_code) {
1404	case XPT_PATH_INQ: {
1405		struct ccb_pathinq *cpi = &ccb->cpi;
1406
1407		cpi->version_num = 1;
1408		cpi->hba_inquiry = PI_TAG_ABLE|PI_SDTR_ABLE;
1409		cpi->target_sprt = 0;
1410		cpi->hba_misc = PIM_NOBUSRESET;
1411		cpi->hba_eng_cnt = 0;
1412		cpi->max_target = STORVSC_MAX_TARGETS;
1413		cpi->max_lun = sc->hs_drv_props->drv_max_luns_per_target;
1414		cpi->initiator_id = cpi->max_target;
1415		cpi->bus_id = cam_sim_bus(sim);
1416		cpi->base_transfer_speed = 300000;
1417		cpi->transport = XPORT_SAS;
1418		cpi->transport_version = 0;
1419		cpi->protocol = PROTO_SCSI;
1420		cpi->protocol_version = SCSI_REV_SPC2;
1421		strncpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN);
1422		strncpy(cpi->hba_vid, sc->hs_drv_props->drv_name, HBA_IDLEN);
1423		strncpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN);
1424		cpi->unit_number = cam_sim_unit(sim);
1425
1426		ccb->ccb_h.status = CAM_REQ_CMP;
1427		xpt_done(ccb);
1428		return;
1429	}
1430	case XPT_GET_TRAN_SETTINGS: {
1431		struct  ccb_trans_settings *cts = &ccb->cts;
1432
1433		cts->transport = XPORT_SAS;
1434		cts->transport_version = 0;
1435		cts->protocol = PROTO_SCSI;
1436		cts->protocol_version = SCSI_REV_SPC2;
1437
1438		/* enable tag queuing and disconnected mode */
1439		cts->proto_specific.valid = CTS_SCSI_VALID_TQ;
1440		cts->proto_specific.scsi.valid = CTS_SCSI_VALID_TQ;
1441		cts->proto_specific.scsi.flags = CTS_SCSI_FLAGS_TAG_ENB;
1442		cts->xport_specific.valid = CTS_SPI_VALID_DISC;
1443		cts->xport_specific.spi.flags = CTS_SPI_FLAGS_DISC_ENB;
1444
1445		ccb->ccb_h.status = CAM_REQ_CMP;
1446		xpt_done(ccb);
1447		return;
1448	}
1449	case XPT_SET_TRAN_SETTINGS:	{
1450		ccb->ccb_h.status = CAM_REQ_CMP;
1451		xpt_done(ccb);
1452		return;
1453	}
1454	case XPT_CALC_GEOMETRY:{
1455		cam_calc_geometry(&ccb->ccg, 1);
1456		xpt_done(ccb);
1457		return;
1458	}
1459	case  XPT_RESET_BUS:
1460	case  XPT_RESET_DEV:{
1461#if HVS_HOST_RESET
1462		if ((res = hv_storvsc_host_reset(sc->hs_dev)) != 0) {
1463			xpt_print(ccb->ccb_h.path,
1464				"hv_storvsc_host_reset failed with %d\n", res);
1465			ccb->ccb_h.status = CAM_PROVIDE_FAIL;
1466			xpt_done(ccb);
1467			return;
1468		}
1469		ccb->ccb_h.status = CAM_REQ_CMP;
1470		xpt_done(ccb);
1471		return;
1472#else
1473		xpt_print(ccb->ccb_h.path,
1474				  "%s reset not supported.\n",
1475				  (ccb->ccb_h.func_code == XPT_RESET_BUS)?
1476				  "bus" : "dev");
1477		ccb->ccb_h.status = CAM_REQ_INVALID;
1478		xpt_done(ccb);
1479		return;
1480#endif	/* HVS_HOST_RESET */
1481	}
1482	case XPT_SCSI_IO:
1483	case XPT_IMMED_NOTIFY: {
1484		struct hv_storvsc_request *reqp = NULL;
1485
1486		if (ccb->csio.cdb_len == 0) {
1487			panic("cdl_len is 0\n");
1488		}
1489
1490		if (LIST_EMPTY(&sc->hs_free_list)) {
1491			ccb->ccb_h.status = CAM_REQUEUE_REQ;
1492			if (sc->hs_frozen == 0) {
1493				sc->hs_frozen = 1;
1494				xpt_freeze_simq(sim, /* count*/1);
1495			}
1496			xpt_done(ccb);
1497			return;
1498		}
1499
1500		reqp = LIST_FIRST(&sc->hs_free_list);
1501		LIST_REMOVE(reqp, link);
1502
1503		bzero(reqp, sizeof(struct hv_storvsc_request));
1504		reqp->softc = sc;
1505
1506		ccb->ccb_h.status |= CAM_SIM_QUEUED;
1507		if ((res = create_storvsc_request(ccb, reqp)) != 0) {
1508			ccb->ccb_h.status = CAM_REQ_INVALID;
1509			xpt_done(ccb);
1510			return;
1511		}
1512
1513#ifdef notyet
1514		if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
1515			callout_init(&reqp->callout, CALLOUT_MPSAFE);
1516			callout_reset_sbt(&reqp->callout,
1517			    SBT_1MS * ccb->ccb_h.timeout, 0,
1518			    storvsc_timeout, reqp, 0);
1519#if HVS_TIMEOUT_TEST
1520			cv_init(&reqp->event.cv, "storvsc timeout cv");
1521			mtx_init(&reqp->event.mtx, "storvsc timeout mutex",
1522					NULL, MTX_DEF);
1523			switch (reqp->vstor_packet.vm_srb.cdb[0]) {
1524				case MODE_SELECT_10:
1525				case SEND_DIAGNOSTIC:
1526					/* To have timer send the request. */
1527					return;
1528				default:
1529					break;
1530			}
1531#endif /* HVS_TIMEOUT_TEST */
1532		}
1533#endif
1534
1535		if ((res = hv_storvsc_io_request(sc->hs_dev, reqp)) != 0) {
1536			xpt_print(ccb->ccb_h.path,
1537				"hv_storvsc_io_request failed with %d\n", res);
1538			ccb->ccb_h.status = CAM_PROVIDE_FAIL;
1539			storvsc_free_request(sc, reqp);
1540			xpt_done(ccb);
1541			return;
1542		}
1543		return;
1544	}
1545
1546	default:
1547		ccb->ccb_h.status = CAM_REQ_INVALID;
1548		xpt_done(ccb);
1549		return;
1550	}
1551}
1552
1553/**
1554 * @brief destroy bounce buffer
1555 *
1556 * This function is responsible for destroy a Scatter/Gather list
1557 * that create by storvsc_create_bounce_buffer()
1558 *
1559 * @param sgl- the Scatter/Gather need be destroy
1560 * @param sg_count- page count of the SG list.
1561 *
1562 */
1563static void
1564storvsc_destroy_bounce_buffer(struct sglist *sgl)
1565{
1566	struct hv_sgl_node *sgl_node = NULL;
1567
1568	sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.in_use_sgl_list);
1569	LIST_REMOVE(sgl_node, link);
1570	if (NULL == sgl_node) {
1571		printf("storvsc error: not enough in use sgl\n");
1572		return;
1573	}
1574	sgl_node->sgl_data = sgl;
1575	LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list, sgl_node, link);
1576}
1577
1578/**
1579 * @brief create bounce buffer
1580 *
1581 * This function is responsible for create a Scatter/Gather list,
1582 * which hold several pages that can be aligned with page size.
1583 *
1584 * @param seg_count- SG-list segments count
1585 * @param write - if WRITE_TYPE, set SG list page used size to 0,
1586 * otherwise set used size to page size.
1587 *
1588 * return NULL if create failed
1589 */
1590static struct sglist *
1591storvsc_create_bounce_buffer(uint16_t seg_count, int write)
1592{
1593	int i = 0;
1594	struct sglist *bounce_sgl = NULL;
1595	unsigned int buf_len = ((write == WRITE_TYPE) ? 0 : PAGE_SIZE);
1596	struct hv_sgl_node *sgl_node = NULL;
1597
1598	/* get struct sglist from free_sgl_list */
1599	sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
1600	LIST_REMOVE(sgl_node, link);
1601	if (NULL == sgl_node) {
1602		printf("storvsc error: not enough free sgl\n");
1603		return NULL;
1604	}
1605	bounce_sgl = sgl_node->sgl_data;
1606	LIST_INSERT_HEAD(&g_hv_sgl_page_pool.in_use_sgl_list, sgl_node, link);
1607
1608	bounce_sgl->sg_maxseg = seg_count;
1609
1610	if (write == WRITE_TYPE)
1611		bounce_sgl->sg_nseg = 0;
1612	else
1613		bounce_sgl->sg_nseg = seg_count;
1614
1615	for (i = 0; i < seg_count; i++)
1616	        bounce_sgl->sg_segs[i].ss_len = buf_len;
1617
1618	return bounce_sgl;
1619}
1620
1621/**
1622 * @brief copy data from SG list to bounce buffer
1623 *
1624 * This function is responsible for copy data from one SG list's segments
1625 * to another SG list which used as bounce buffer.
1626 *
1627 * @param bounce_sgl - the destination SG list
1628 * @param orig_sgl - the segment of the source SG list.
1629 * @param orig_sgl_count - the count of segments.
1630 * @param orig_sgl_count - indicate which segment need bounce buffer,
1631 *  set 1 means need.
1632 *
1633 */
1634static void
1635storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl,
1636			       bus_dma_segment_t *orig_sgl,
1637			       unsigned int orig_sgl_count,
1638			       uint64_t seg_bits)
1639{
1640	int src_sgl_idx = 0;
1641
1642	for (src_sgl_idx = 0; src_sgl_idx < orig_sgl_count; src_sgl_idx++) {
1643		if (seg_bits & (1 << src_sgl_idx)) {
1644			memcpy((void*)bounce_sgl->sg_segs[src_sgl_idx].ss_paddr,
1645			    (void*)orig_sgl[src_sgl_idx].ds_addr,
1646			    orig_sgl[src_sgl_idx].ds_len);
1647
1648			bounce_sgl->sg_segs[src_sgl_idx].ss_len =
1649			    orig_sgl[src_sgl_idx].ds_len;
1650		}
1651	}
1652}
1653
1654/**
1655 * @brief copy data from SG list which used as bounce to another SG list
1656 *
1657 * This function is responsible for copy data from one SG list with bounce
1658 * buffer to another SG list's segments.
1659 *
1660 * @param dest_sgl - the destination SG list's segments
1661 * @param dest_sgl_count - the count of destination SG list's segment.
1662 * @param src_sgl - the source SG list.
1663 * @param seg_bits - indicate which segment used bounce buffer of src SG-list.
1664 *
1665 */
1666void
1667storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl,
1668				    unsigned int dest_sgl_count,
1669				    struct sglist* src_sgl,
1670				    uint64_t seg_bits)
1671{
1672	int sgl_idx = 0;
1673
1674	for (sgl_idx = 0; sgl_idx < dest_sgl_count; sgl_idx++) {
1675		if (seg_bits & (1 << sgl_idx)) {
1676			memcpy((void*)(dest_sgl[sgl_idx].ds_addr),
1677			    (void*)(src_sgl->sg_segs[sgl_idx].ss_paddr),
1678			    src_sgl->sg_segs[sgl_idx].ss_len);
1679		}
1680	}
1681}
1682
1683/**
1684 * @brief check SG list with bounce buffer or not
1685 *
1686 * This function is responsible for check if need bounce buffer for SG list.
1687 *
1688 * @param sgl - the SG list's segments
1689 * @param sg_count - the count of SG list's segment.
1690 * @param bits - segmengs number that need bounce buffer
1691 *
1692 * return -1 if SG list needless bounce buffer
1693 */
1694static int
1695storvsc_check_bounce_buffer_sgl(bus_dma_segment_t *sgl,
1696				unsigned int sg_count,
1697				uint64_t *bits)
1698{
1699	int i = 0;
1700	int offset = 0;
1701	uint64_t phys_addr = 0;
1702	uint64_t tmp_bits = 0;
1703	boolean_t found_hole = FALSE;
1704	boolean_t pre_aligned = TRUE;
1705
1706	if (sg_count < 2){
1707		return -1;
1708	}
1709
1710	*bits = 0;
1711
1712	phys_addr = vtophys(sgl[0].ds_addr);
1713	offset =  phys_addr - trunc_page(phys_addr);
1714
1715	if (offset != 0) {
1716		pre_aligned = FALSE;
1717		tmp_bits |= 1;
1718	}
1719
1720	for (i = 1; i < sg_count; i++) {
1721		phys_addr = vtophys(sgl[i].ds_addr);
1722		offset =  phys_addr - trunc_page(phys_addr);
1723
1724		if (offset == 0) {
1725			if (FALSE == pre_aligned){
1726				/*
1727				 * This segment is aligned, if the previous
1728				 * one is not aligned, find a hole
1729				 */
1730				found_hole = TRUE;
1731			}
1732			pre_aligned = TRUE;
1733		} else {
1734			tmp_bits |= 1 << i;
1735			if (!pre_aligned) {
1736				if (phys_addr != vtophys(sgl[i-1].ds_addr +
1737				    sgl[i-1].ds_len)) {
1738					/*
1739					 * Check whether connect to previous
1740					 * segment,if not, find the hole
1741					 */
1742					found_hole = TRUE;
1743				}
1744			} else {
1745				found_hole = TRUE;
1746			}
1747			pre_aligned = FALSE;
1748		}
1749	}
1750
1751	if (!found_hole) {
1752		return (-1);
1753	} else {
1754		*bits = tmp_bits;
1755		return 0;
1756	}
1757}
1758
1759/**
1760 * @brief Fill in a request structure based on a CAM control block
1761 *
1762 * Fills in a request structure based on the contents of a CAM control
1763 * block.  The request structure holds the payload information for
1764 * VSCSI protocol request.
1765 *
1766 * @param ccb pointer to a CAM contorl block
1767 * @param reqp pointer to a request structure
1768 */
1769static int
1770create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp)
1771{
1772	struct ccb_scsiio *csio = &ccb->csio;
1773	uint64_t phys_addr;
1774	uint32_t bytes_to_copy = 0;
1775	uint32_t pfn_num = 0;
1776	uint32_t pfn;
1777	uint64_t not_aligned_seg_bits = 0;
1778
1779	/* refer to struct vmscsi_req for meanings of these two fields */
1780	reqp->vstor_packet.u.vm_srb.port =
1781		cam_sim_unit(xpt_path_sim(ccb->ccb_h.path));
1782	reqp->vstor_packet.u.vm_srb.path_id =
1783		cam_sim_bus(xpt_path_sim(ccb->ccb_h.path));
1784
1785	reqp->vstor_packet.u.vm_srb.target_id = ccb->ccb_h.target_id;
1786	reqp->vstor_packet.u.vm_srb.lun = ccb->ccb_h.target_lun;
1787
1788	reqp->vstor_packet.u.vm_srb.cdb_len = csio->cdb_len;
1789	if(ccb->ccb_h.flags & CAM_CDB_POINTER) {
1790		memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_ptr,
1791			csio->cdb_len);
1792	} else {
1793		memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_bytes,
1794			csio->cdb_len);
1795	}
1796
1797	switch (ccb->ccb_h.flags & CAM_DIR_MASK) {
1798	case CAM_DIR_OUT:
1799		reqp->vstor_packet.u.vm_srb.data_in = WRITE_TYPE;
1800		break;
1801	case CAM_DIR_IN:
1802		reqp->vstor_packet.u.vm_srb.data_in = READ_TYPE;
1803		break;
1804	case CAM_DIR_NONE:
1805		reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE;
1806		break;
1807	default:
1808		reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE;
1809		break;
1810	}
1811
1812	reqp->sense_data     = &csio->sense_data;
1813	reqp->sense_info_len = csio->sense_len;
1814
1815	reqp->ccb = ccb;
1816
1817	if (0 == csio->dxfer_len) {
1818		return (0);
1819	}
1820
1821	reqp->data_buf.length = csio->dxfer_len;
1822
1823	switch (ccb->ccb_h.flags & CAM_DATA_MASK) {
1824	case CAM_DATA_VADDR:
1825	{
1826		bytes_to_copy = csio->dxfer_len;
1827		phys_addr = vtophys(csio->data_ptr);
1828		reqp->data_buf.offset = phys_addr & PAGE_MASK;
1829
1830		while (bytes_to_copy != 0) {
1831			int bytes, page_offset;
1832			phys_addr =
1833			    vtophys(&csio->data_ptr[reqp->data_buf.length -
1834			    bytes_to_copy]);
1835			pfn = phys_addr >> PAGE_SHIFT;
1836			reqp->data_buf.pfn_array[pfn_num] = pfn;
1837			page_offset = phys_addr & PAGE_MASK;
1838
1839			bytes = min(PAGE_SIZE - page_offset, bytes_to_copy);
1840
1841			bytes_to_copy -= bytes;
1842			pfn_num++;
1843		}
1844		break;
1845	}
1846
1847	case CAM_DATA_SG:
1848	{
1849		int i = 0;
1850		int offset = 0;
1851		int ret;
1852
1853		bus_dma_segment_t *storvsc_sglist =
1854		    (bus_dma_segment_t *)ccb->csio.data_ptr;
1855		u_int16_t storvsc_sg_count = ccb->csio.sglist_cnt;
1856
1857		printf("Storvsc: get SG I/O operation, %d\n",
1858		    reqp->vstor_packet.u.vm_srb.data_in);
1859
1860		if (storvsc_sg_count > HV_MAX_MULTIPAGE_BUFFER_COUNT){
1861			printf("Storvsc: %d segments is too much, "
1862			    "only support %d segments\n",
1863			    storvsc_sg_count, HV_MAX_MULTIPAGE_BUFFER_COUNT);
1864			return (EINVAL);
1865		}
1866
1867		/*
1868		 * We create our own bounce buffer function currently. Idealy
1869		 * we should use BUS_DMA(9) framework. But with current BUS_DMA
1870		 * code there is no callback API to check the page alignment of
1871		 * middle segments before busdma can decide if a bounce buffer
1872		 * is needed for particular segment. There is callback,
1873		 * "bus_dma_filter_t *filter", but the parrameters are not
1874		 * sufficient for storvsc driver.
1875		 * TODO:
1876		 *	Add page alignment check in BUS_DMA(9) callback. Once
1877		 *	this is complete, switch the following code to use
1878		 *	BUS_DMA(9) for storvsc bounce buffer support.
1879		 */
1880		/* check if we need to create bounce buffer */
1881		ret = storvsc_check_bounce_buffer_sgl(storvsc_sglist,
1882		    storvsc_sg_count, &not_aligned_seg_bits);
1883		if (ret != -1) {
1884			reqp->bounce_sgl =
1885			    storvsc_create_bounce_buffer(storvsc_sg_count,
1886			    reqp->vstor_packet.u.vm_srb.data_in);
1887			if (NULL == reqp->bounce_sgl) {
1888				printf("Storvsc_error: "
1889				    "create bounce buffer failed.\n");
1890				return (ENOMEM);
1891			}
1892
1893			reqp->bounce_sgl_count = storvsc_sg_count;
1894			reqp->not_aligned_seg_bits = not_aligned_seg_bits;
1895
1896			/*
1897			 * if it is write, we need copy the original data
1898			 *to bounce buffer
1899			 */
1900			if (WRITE_TYPE == reqp->vstor_packet.u.vm_srb.data_in) {
1901				storvsc_copy_sgl_to_bounce_buf(
1902				    reqp->bounce_sgl,
1903				    storvsc_sglist,
1904				    storvsc_sg_count,
1905				    reqp->not_aligned_seg_bits);
1906			}
1907
1908			/* transfer virtual address to physical frame number */
1909			if (reqp->not_aligned_seg_bits & 0x1){
1910 				phys_addr =
1911				    vtophys(reqp->bounce_sgl->sg_segs[0].ss_paddr);
1912			}else{
1913 				phys_addr =
1914					vtophys(storvsc_sglist[0].ds_addr);
1915			}
1916			reqp->data_buf.offset = phys_addr & PAGE_MASK;
1917
1918			pfn = phys_addr >> PAGE_SHIFT;
1919			reqp->data_buf.pfn_array[0] = pfn;
1920
1921			for (i = 1; i < storvsc_sg_count; i++) {
1922				if (reqp->not_aligned_seg_bits & (1 << i)) {
1923					phys_addr =
1924					    vtophys(reqp->bounce_sgl->sg_segs[i].ss_paddr);
1925				} else {
1926					phys_addr =
1927					    vtophys(storvsc_sglist[i].ds_addr);
1928				}
1929
1930				pfn = phys_addr >> PAGE_SHIFT;
1931				reqp->data_buf.pfn_array[i] = pfn;
1932			}
1933		} else {
1934			phys_addr = vtophys(storvsc_sglist[0].ds_addr);
1935
1936			reqp->data_buf.offset = phys_addr & PAGE_MASK;
1937
1938			for (i = 0; i < storvsc_sg_count; i++) {
1939				phys_addr = vtophys(storvsc_sglist[i].ds_addr);
1940				pfn = phys_addr >> PAGE_SHIFT;
1941				reqp->data_buf.pfn_array[i] = pfn;
1942			}
1943
1944			/* check the last segment cross boundary or not */
1945			offset = phys_addr & PAGE_MASK;
1946			if (offset) {
1947				phys_addr =
1948				    vtophys(storvsc_sglist[i-1].ds_addr +
1949				    PAGE_SIZE - offset);
1950				pfn = phys_addr >> PAGE_SHIFT;
1951				reqp->data_buf.pfn_array[i] = pfn;
1952			}
1953
1954			reqp->bounce_sgl_count = 0;
1955		}
1956		break;
1957	}
1958	default:
1959		printf("Unknow flags: %d\n", ccb->ccb_h.flags);
1960		return(EINVAL);
1961	}
1962
1963	return(0);
1964}
1965
1966static uint32_t
1967is_scsi_valid(const struct scsi_inquiry_data *inq_data)
1968{
1969	u_int8_t type;
1970	type = SID_TYPE(inq_data);
1971	if (type == T_NODEVICE)
1972		return (0);
1973	if (SID_QUAL(inq_data) == SID_QUAL_BAD_LU)
1974		return (0);
1975	return (1);
1976}
1977/**
1978 * @brief completion function before returning to CAM
1979 *
1980 * I/O process has been completed and the result needs
1981 * to be passed to the CAM layer.
1982 * Free resources related to this request.
1983 *
1984 * @param reqp pointer to a request structure
1985 */
1986static void
1987storvsc_io_done(struct hv_storvsc_request *reqp)
1988{
1989	union ccb *ccb = reqp->ccb;
1990	struct ccb_scsiio *csio = &ccb->csio;
1991	struct storvsc_softc *sc = reqp->softc;
1992	struct vmscsi_req *vm_srb = &reqp->vstor_packet.u.vm_srb;
1993	bus_dma_segment_t *ori_sglist = NULL;
1994	int ori_sg_count = 0;
1995
1996	/* destroy bounce buffer if it is used */
1997	if (reqp->bounce_sgl_count) {
1998		ori_sglist = (bus_dma_segment_t *)ccb->csio.data_ptr;
1999		ori_sg_count = ccb->csio.sglist_cnt;
2000
2001		/*
2002		 * If it is READ operation, we should copy back the data
2003		 * to original SG list.
2004		 */
2005		if (READ_TYPE == reqp->vstor_packet.u.vm_srb.data_in) {
2006			storvsc_copy_from_bounce_buf_to_sgl(ori_sglist,
2007			    ori_sg_count,
2008			    reqp->bounce_sgl,
2009			    reqp->not_aligned_seg_bits);
2010		}
2011
2012		storvsc_destroy_bounce_buffer(reqp->bounce_sgl);
2013		reqp->bounce_sgl_count = 0;
2014	}
2015
2016	if (reqp->retries > 0) {
2017		mtx_lock(&sc->hs_lock);
2018#if HVS_TIMEOUT_TEST
2019		xpt_print(ccb->ccb_h.path,
2020			"%u: IO returned after timeout, "
2021			"waking up timer handler if any.\n", ticks);
2022		mtx_lock(&reqp->event.mtx);
2023		cv_signal(&reqp->event.cv);
2024		mtx_unlock(&reqp->event.mtx);
2025#endif
2026		reqp->retries = 0;
2027		xpt_print(ccb->ccb_h.path,
2028			"%u: IO returned after timeout, "
2029			"stopping timer if any.\n", ticks);
2030		mtx_unlock(&sc->hs_lock);
2031	}
2032
2033#ifdef notyet
2034	/*
2035	 * callout_drain() will wait for the timer handler to finish
2036	 * if it is running. So we don't need any lock to synchronize
2037	 * between this routine and the timer handler.
2038	 * Note that we need to make sure reqp is not freed when timer
2039	 * handler is using or will use it.
2040	 */
2041	if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
2042		callout_drain(&reqp->callout);
2043	}
2044#endif
2045	ccb->ccb_h.status &= ~CAM_SIM_QUEUED;
2046	ccb->ccb_h.status &= ~CAM_STATUS_MASK;
2047	if (vm_srb->scsi_status == SCSI_STATUS_OK) {
2048		const struct scsi_generic *cmd;
2049		cmd = (const struct scsi_generic *)
2050		    ((ccb->ccb_h.flags & CAM_CDB_POINTER) ?
2051		     csio->cdb_io.cdb_ptr : csio->cdb_io.cdb_bytes);
2052		if (vm_srb->srb_status != SRB_STATUS_SUCCESS) {
2053			/*
2054			 * If there are errors, for example, invalid LUN,
2055			 * host will inform VM through SRB status.
2056			 */
2057			if (bootverbose) {
2058				if (vm_srb->srb_status == SRB_STATUS_INVALID_LUN) {
2059					xpt_print(ccb->ccb_h.path,
2060					    "invalid LUN %d for op: %s\n",
2061					    vm_srb->lun,
2062					    scsi_op_desc(cmd->opcode, NULL));
2063				} else {
2064					xpt_print(ccb->ccb_h.path,
2065					    "Unknown SRB flag: %d for op: %s\n",
2066					    vm_srb->srb_status,
2067					    scsi_op_desc(cmd->opcode, NULL));
2068				}
2069			}
2070
2071			/*
2072			 * XXX For a selection timeout, all of the LUNs
2073			 * on the target will be gone.  It works for SCSI
2074			 * disks, but does not work for IDE disks.
2075			 *
2076			 * For CAM_DEV_NOT_THERE, CAM will only get
2077			 * rid of the device(s) specified by the path.
2078			 */
2079			if (storvsc_get_storage_type(sc->hs_dev->device) ==
2080			    DRIVER_STORVSC)
2081				ccb->ccb_h.status |= CAM_SEL_TIMEOUT;
2082			else
2083				ccb->ccb_h.status |= CAM_DEV_NOT_THERE;
2084		} else {
2085			ccb->ccb_h.status |= CAM_REQ_CMP;
2086		}
2087
2088		if (cmd->opcode == INQUIRY &&
2089		    vm_srb->srb_status == SRB_STATUS_SUCCESS) {
2090			int resp_xfer_len, resp_buf_len, data_len;
2091			struct scsi_inquiry_data *inq_data =
2092			    (struct scsi_inquiry_data *)csio->data_ptr;
2093			/* Get the buffer length reported by host */
2094			resp_xfer_len = vm_srb->transfer_len;
2095			uint8_t *resp_buf = (uint8_t *)csio->data_ptr;
2096
2097			/* Get the available buffer length */
2098			resp_buf_len = resp_xfer_len >= 5 ? resp_buf[4] + 5 : 0;
2099			data_len = (resp_buf_len < resp_xfer_len) ?
2100			    resp_buf_len : resp_xfer_len;
2101			if (bootverbose && data_len >= 5) {
2102				xpt_print(ccb->ccb_h.path, "storvsc inquiry "
2103				    "(%d) [%x %x %x %x %x ... ]\n", data_len,
2104				    resp_buf[0], resp_buf[1], resp_buf[2],
2105				    resp_buf[3], resp_buf[4]);
2106			}
2107			/*
2108			 * XXX: Manually fix the wrong response returned from WS2012
2109			 */
2110			if (!is_scsi_valid(inq_data) &&
2111			    (vmstor_proto_version == VMSTOR_PROTOCOL_VERSION_WIN8_1 ||
2112			    vmstor_proto_version == VMSTOR_PROTOCOL_VERSION_WIN8 ||
2113			    vmstor_proto_version == VMSTOR_PROTOCOL_VERSION_WIN7)) {
2114				if (data_len >= 4 &&
2115				    (resp_buf[2] == 0 || resp_buf[3] == 0)) {
2116					resp_buf[2] = 5; // verion=5 means SPC-3
2117					resp_buf[3] = 2; // resp fmt must be 2
2118					if (bootverbose)
2119						xpt_print(ccb->ccb_h.path,
2120						    "fix version and resp fmt for 0x%x\n",
2121						    vmstor_proto_version);
2122				}
2123			} else if (data_len >= SHORT_INQUIRY_LENGTH) {
2124				char vendor[16];
2125
2126				cam_strvis(vendor, inq_data->vendor,
2127				    sizeof(inq_data->vendor), sizeof(vendor));
2128				/*
2129				 * XXX: Upgrade SPC2 to SPC3 if host is WIN8 or
2130				 * WIN2012 R2 in order to support UNMAP feature.
2131				 */
2132				if (!strncmp(vendor, "Msft", 4) &&
2133				    SID_ANSI_REV(inq_data) == SCSI_REV_SPC2 &&
2134				    (vmstor_proto_version ==
2135				     VMSTOR_PROTOCOL_VERSION_WIN8_1 ||
2136				     vmstor_proto_version ==
2137				     VMSTOR_PROTOCOL_VERSION_WIN8)) {
2138					inq_data->version = SCSI_REV_SPC3;
2139					if (bootverbose) {
2140						xpt_print(ccb->ccb_h.path,
2141						    "storvsc upgrades "
2142						    "SPC2 to SPC3\n");
2143					}
2144				}
2145			}
2146		}
2147	} else {
2148		mtx_lock(&sc->hs_lock);
2149		xpt_print(ccb->ccb_h.path,
2150			"storvsc scsi_status = %d\n",
2151			vm_srb->scsi_status);
2152		mtx_unlock(&sc->hs_lock);
2153		ccb->ccb_h.status |= CAM_SCSI_STATUS_ERROR;
2154	}
2155
2156	ccb->csio.scsi_status = (vm_srb->scsi_status & 0xFF);
2157	ccb->csio.resid = ccb->csio.dxfer_len - vm_srb->transfer_len;
2158
2159	if (reqp->sense_info_len != 0) {
2160		csio->sense_resid = csio->sense_len - reqp->sense_info_len;
2161		ccb->ccb_h.status |= CAM_AUTOSNS_VALID;
2162	}
2163
2164	mtx_lock(&sc->hs_lock);
2165	if (reqp->softc->hs_frozen == 1) {
2166		xpt_print(ccb->ccb_h.path,
2167			"%u: storvsc unfreezing softc 0x%p.\n",
2168			ticks, reqp->softc);
2169		ccb->ccb_h.status |= CAM_RELEASE_SIMQ;
2170		reqp->softc->hs_frozen = 0;
2171	}
2172	storvsc_free_request(sc, reqp);
2173	xpt_done(ccb);
2174	mtx_unlock(&sc->hs_lock);
2175}
2176
2177/**
2178 * @brief Free a request structure
2179 *
2180 * Free a request structure by returning it to the free list
2181 *
2182 * @param sc pointer to a softc
2183 * @param reqp pointer to a request structure
2184 */
2185static void
2186storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp)
2187{
2188
2189	LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link);
2190}
2191
2192/**
2193 * @brief Determine type of storage device from GUID
2194 *
2195 * Using the type GUID, determine if this is a StorVSC (paravirtual
2196 * SCSI or BlkVSC (paravirtual IDE) device.
2197 *
2198 * @param dev a device
2199 * returns an enum
2200 */
2201static enum hv_storage_type
2202storvsc_get_storage_type(device_t dev)
2203{
2204	const char *p = vmbus_get_type(dev);
2205
2206	if (!memcmp(p, &gBlkVscDeviceType, sizeof(hv_guid))) {
2207		return DRIVER_BLKVSC;
2208	} else if (!memcmp(p, &gStorVscDeviceType, sizeof(hv_guid))) {
2209		return DRIVER_STORVSC;
2210	}
2211	return (DRIVER_UNKNOWN);
2212}
2213
2214#define	PCI_VENDOR_INTEL	0x8086
2215#define	PCI_PRODUCT_PIIX4	0x7111
2216
2217static void
2218storvsc_ada_probe_veto(void *arg __unused, struct cam_path *path,
2219    struct ata_params *ident_buf __unused, int *veto)
2220{
2221	/*
2222	 * Hyper-V should ignore ATA
2223	 */
2224	if (path->device->protocol == PROTO_ATA) {
2225		struct ccb_pathinq cpi;
2226
2227		bzero(&cpi, sizeof(cpi));
2228		xpt_setup_ccb(&cpi.ccb_h, path, CAM_PRIORITY_NONE);
2229		cpi.ccb_h.func_code = XPT_PATH_INQ;
2230		xpt_action((union ccb *)&cpi);
2231		if (cpi.ccb_h.status == CAM_REQ_CMP &&
2232		    cpi.hba_vendor == PCI_VENDOR_INTEL &&
2233		    cpi.hba_device == PCI_PRODUCT_PIIX4) {
2234			(*veto)++;
2235			xpt_print(path,
2236			    "Disable ATA for vendor: %x, device: %x\n",
2237			    cpi.hba_vendor, cpi.hba_device);
2238		}
2239	}
2240}
2241
2242static void
2243storvsc_sysinit(void *arg __unused)
2244{
2245	if (vm_guest == VM_GUEST_HV) {
2246		storvsc_handler_tag = EVENTHANDLER_REGISTER(ada_probe_veto,
2247		    storvsc_ada_probe_veto, NULL, EVENTHANDLER_PRI_ANY);
2248	}
2249}
2250SYSINIT(storvsc_sys_init, SI_SUB_DRIVERS, SI_ORDER_SECOND, storvsc_sysinit,
2251    NULL);
2252
2253static void
2254storvsc_sysuninit(void *arg __unused)
2255{
2256	if (storvsc_handler_tag != NULL) {
2257		EVENTHANDLER_DEREGISTER(ada_probe_veto, storvsc_handler_tag);
2258	}
2259}
2260SYSUNINIT(storvsc_sys_uninit, SI_SUB_DRIVERS, SI_ORDER_SECOND,
2261    storvsc_sysuninit, NULL);
2262